# checks the main data, returns FALSE in case something is not OK # for VSI, we need at least: idno, cntry, VSI1, VSI2, VSI3 check_main_file <- function (dataset) { if (!is.null(dataset$VSI1) && !is.null(dataset$VSI2) && !is.null(dataset$VSI3) && !is.null(dataset$idno) && !is.null(dataset$cntry)) { return (TRUE) } else { if (is.null(dataset$VSI1)) { cat ("VSI1 variable is missing in the main dataset. "); } if (is.null(dataset$VSI2)) { cat ("VSI2 variable is missing in the main dataset. "); } if (is.null(dataset$VSI3)) { cat ("VSI3 variable is missing in the main dataset. "); } if (is.null(dataset$idno)) { cat ("idno variable (interview ID) is missing in the main dataset. "); } if (is.null(dataset$cntry)) { cat ("cntry variable (country code) is missing in the main dataset. "); } cat ("**Please check your dataset and try again**") return (FALSE) } } # checks the intervier data, returns FALSE in case something is not OK # here, we need at least: idno, cntry, intnum check_inwer_file <- function (dataset) { if (!is.null(dataset$cntry) && !is.null(dataset$idno) && !is.null(dataset$intnum)) { return (TRUE) } else { if (is.null(dataset$intnum)) { cat ("intnum (intervie**wer** ID variable is missing in the interviewer dataset. "); } if (is.null(dataset$idno)) { cat ("idno variable (interview ID) is missing in the interviewer dataset. "); } if (is.null(dataset$cntry)) { cat ("cntry variable (country code) is missing in the interviewer dataset. "); } cat ("**Please check your dataset and try again**") return (FALSE) } } # Extracts access points fingerprints from the data (5 APs from 1 VSI) extract_ap <- function (source) { return (c(substr(source,0,7), substr(source,16,23), substr(source,32,39), substr(source,48,55), substr(source,64,71))); } # Checks whether at least 3 of 5 APs are present during an interview match_within <- function (t1,t2,t3) { matched_within = 0 for (a in seq(1,5)) { if (t1[a] %in% t2 && t1[a] %in% t3) { matched_within=matched_within + 1 } } if (matched_within >= 3) { return (TRUE) } else { return (FALSE) } } # Checks whether any of other cases have 3 or more same APs (i.e. same location) match_outside <- function (a_t1, a_t2, a_t3, b_t1, b_t2, b_t3) { matched = FALSE # only take cases that match !WITHIN! if (match_within(a_t1, a_t2, a_t3) == FALSE || match_within(b_t1, b_t2, b_t3) == FALSE) { return (FALSE) # no match, as we can't even do it within } matches=0 # we iterate through all 5 APs. If at least three are repeated across # all time points in both surveys (a_t1... b_t3) we have a match for (a in seq(1,5)) { if (a_t1[a] %in% b_t1 && a_t1[a] %in% b_t2 && a_t1[a] %in% b_t3 && a_t2[a] %in% b_t1 && a_t2[a] %in% b_t2 && a_t2[a] %in% b_t3 && a_t3[a] %in% b_t1 && a_t3[a] %in% b_t2 && a_t3[a] %in% b_t3) { matches=matches+1 } } if (matches >= 3) { return (TRUE) } else { return (FALSE) } } # Checks existing pairings and add it to appropriate position # example: # in raw data, matches are found between cases 22<->23; 23<->38; 40<->41 # This can be summarized into: # Matched cases: 22, 23, 38 ; as well as 40, 41 # This function takes a pair (e.g. c(23,38)) and checks if any of them is # already in any of the matches. # if it is, it adds new case to the appropriate list. # if not, it adds a new offset with the new pair group_pairs <- function (groups, pair) { # if we receive an empty list, just populate it... if (length(groups) == 0) { groups <- list (c(pair[1], pair[2])) return (groups) } # find both elements in the groupings loc_1 <- which(sapply(groups, FUN=function(X) pair[1] %in% X)) loc_2 <- which(sapply(groups, FUN=function(X) pair[2] %in% X)) # none in existing list if (length(loc_1) == 0 && length(loc_2) == 0) { groups[[length(groups)+1]] <- c(pair[1], pair[2]) } else if (length(loc_1) != 0 && length(loc_2) == 0) { # let's add the second (not present) to the first's offset groups[[loc_1]] <- c(groups[[loc_1]], pair[2]) } else if (length(loc_1) == 0 && length(loc_2) != 0) { # let's add the first (not present) to the second's offset groups[[loc_2]] <- c(groups[[loc_2]], pair[1]) } # yes, we can have both present (else) - no need to do anything then. return (groups) }