VSI_Analysis/components/VSI.R

# checks the main data, returns FALSE in case something is not OK
# for VSI, we need at least: idno, cntry, VSI1, VSI2, VSI3

check_main_file <- function (dataset) {

  if (!is.null(dataset$VSI1) && !is.null(dataset$VSI2) && !is.null(dataset$VSI3)
      && !is.null(dataset$idno)  && !is.null(dataset$cntry)) {

    return (TRUE)

  } else {
    if (is.null(dataset$VSI1)) {
      cat ("VSI1 variable is missing in the main dataset.  ");
    }
    if (is.null(dataset$VSI2)) {
      cat ("VSI2 variable is missing in the main dataset.  ");
    }
    if (is.null(dataset$VSI3)) {
      cat ("VSI3 variable is missing in the main dataset.  ");
    }
    if (is.null(dataset$idno)) {
      cat ("idno variable (interview ID) is missing in the main dataset.  ");
    }
    if (is.null(dataset$cntry)) {
      cat ("cntry variable (country code) is missing in the main dataset.  ");
    }

    cat ("**Please check your dataset and try again**")
    return (FALSE)
  }
}

# checks the intervier data, returns FALSE in case something is not OK
# here, we need at least: idno, cntry, intnum
check_inwer_file <- function (dataset) {

  if (!is.null(dataset$cntry) && !is.null(dataset$idno) && !is.null(dataset$intnum)) {

    return (TRUE)

  } else {
    if (is.null(dataset$intnum)) {
      cat ("intnum (intervie**wer** ID variable is missing in the interviewer dataset.  ");
    }
    if (is.null(dataset$idno)) {
      cat ("idno variable (interview ID) is missing in the interviewer dataset.  ");
    }
    if (is.null(dataset$cntry)) {
      cat ("cntry variable (country code) is missing in the interviewer dataset.  ");
    }
    cat ("**Please check your dataset and try again**")
    return (FALSE)
  }
}


# Extracts access points fingerprints from the data (5 APs from 1 VSI)
extract_ap <- function (source) {
  return (c(substr(source,0,7),
            substr(source,16,23),
            substr(source,32,39),
            substr(source,48,55),
            substr(source,64,71)));
}

# Checks whether at least 3 of 5 APs are present during an interview
match_within <- function (t1,t2,t3) {

  matched_within = 0

  for (a in seq(1,5)) {

    if (t1[a] %in% t2 && t1[a] %in% t3) {
      matched_within=matched_within + 1
    }
  }
  if (matched_within >= 3) {
    return (TRUE)
  }
  else {
    return (FALSE)

  }

}

# Checks whether any of other cases have 3 or more same APs (i.e. same location)

match_outside <- function (a_t1, a_t2, a_t3, b_t1, b_t2, b_t3) {

  matched = FALSE

  # only take cases that match !WITHIN!
  if (match_within(a_t1, a_t2, a_t3) == FALSE || match_within(b_t1, b_t2, b_t3) == FALSE) {
    return (FALSE) # no match, as we can't even do it within
  }


  matches=0

  # we iterate through all 5 APs. If at least three are repeated across
  # all time points in both surveys (a_t1... b_t3) we have a match

  for (a in seq(1,5)) {

    if (a_t1[a] %in% b_t1 && a_t1[a] %in% b_t2 && a_t1[a] %in% b_t3 &&
        a_t2[a] %in% b_t1 && a_t2[a] %in% b_t2 && a_t2[a] %in% b_t3 &&
        a_t3[a] %in% b_t1 && a_t3[a] %in% b_t2 && a_t3[a] %in% b_t3)
    {
      matches=matches+1
    }
  }

  if (matches >= 3) {
    return (TRUE)
  }
  else {
    return (FALSE)
  }
}


# Checks existing pairings and add it to appropriate position
# example:
# in raw data, matches are found between cases 22<->23; 23<->38; 40<->41

# This can be summarized into:
# Matched cases: 22, 23, 38 ; as well as 40, 41

# This function takes a pair (e.g. c(23,38)) and checks if any of them is
# already in any of the matches.

# if it is, it adds new case to the appropriate list.
# if not, it adds a new offset with the new pair

group_pairs <- function (groups, pair) {

  # if we receive an empty list, just populate it...
  if (length(groups) == 0) {
    groups <- list (c(pair[1], pair[2]))
    return (groups)
  }

  # find both elements in the groupings
  loc_1 <- which(sapply(groups, FUN=function(X) pair[1] %in% X))
  loc_2 <- which(sapply(groups, FUN=function(X) pair[2] %in% X))

  # none in existing list
  if (length(loc_1) == 0 && length(loc_2) == 0) {

    groups[[length(groups)+1]] <- c(pair[1], pair[2])

  } else if (length(loc_1) != 0 && length(loc_2) == 0) {

    # let's add the second (not present) to the first's offset
    groups[[loc_1]] <- c(groups[[loc_1]], pair[2])

  } else if (length(loc_1) == 0 && length(loc_2) != 0) {

    # let's add the first (not present) to the second's offset
    groups[[loc_2]] <- c(groups[[loc_2]], pair[1])

  }
  # yes, we can have both present (else) - no need to do anything then.
  return (groups)
}