141 lines
3.7 KiB
R
Raw Normal View History

# checks the data, returns FALSE in case something is not OK
check_data <- function (dataset) {
if (!is.null(dataset$VSI1) && !is.null(dataset$VSI2) && !is.null(dataset$VSI3) && !is.null(dataset$idno)) {
cat ("All the required variables are present, continuing with the analysis. ")
return (TRUE)
} else {
if (is.null(dataset$VSI1)) {
cat ("VSI1 variable is missing in the dataset. ");
}
if (is.null(dataset$VSI2)) {
cat ("VSI2 variable is missing in the dataset. ");
}
if (is.null(dataset$VSI3)) {
cat ("VSI3 variable is missing in the dataset. ");
}
if (is.null(dataset$idno)) {
cat ("idno variable (interview ID) is missing in the dataset. ");
}
cat ("**Please check your dataset and try again**")
return (FALSE)
}
}
# Extracts access points fingerprints from the data (5 APs from 1 VSI)
extract_ap <- function (source) {
return (c(substr(source,0,7),
substr(source,16,23),
substr(source,32,39),
substr(source,48,55),
substr(source,64,71)));
}
# Checks whether at least 3 of 5 APs are present during an interview
match_within <- function (t1,t2,t3) {
matched_within = 0
for (a in seq(1,5)) {
if (t1[a] %in% t2 && t1[a] %in% t3) {
matched_within=matched_within + 1
}
}
if (matched_within >= 3) {
return (TRUE)
}
else {
return (FALSE)
}
}
# Checks whether any of other cases have 3 or more same APs (i.e. same location)
match_outside <- function (a_t1, a_t2, a_t3, b_t1, b_t2, b_t3) {
matched = FALSE
# only take cases that match !WITHIN!
if (match_within(a_t1, a_t2, a_t3) == FALSE || match_within(b_t1, b_t2, b_t3) == FALSE) {
return (FALSE) # no match, as we can't even do it within
}
matches=0
# we iterate through all 5 APs. If at least three are repeated across
# all time points in both surveys (a_t1... b_t3) we have a match
for (a in seq(1,5)) {
if (a_t1[a] %in% b_t1 && a_t1[a] %in% b_t2 && a_t1[a] %in% b_t3 &&
a_t2[a] %in% b_t1 && a_t2[a] %in% b_t2 && a_t2[a] %in% b_t3 &&
a_t3[a] %in% b_t1 && a_t3[a] %in% b_t2 && a_t3[a] %in% b_t3)
{
matches=matches+1
}
}
if (matches >= 3) {
return (TRUE)
}
else {
return (FALSE)
}
}
# Checks existing pairings and add it to appropriate position
# example:
# in raw data, matches are found between cases 22<->23; 23<->38; 40<->41
# This can be summarized into:
# Matched cases: 22, 23, 38 ; as well as 40, 41
# This function takes a pair (e.g. c(23,38)) and checks if any of them is
# already in any of the matches.
# if it is, it adds new case to the appropriate list.
# if not, it adds a new offset with the new pair
group_pairs <- function (groups, pair) {
# if we receive an empty list, just populate it...
if (length(groups) == 0) {
groups <- list (c(pair[1], pair[2]))
return (groups)
}
# find both elements in the groupings
loc_1 <- which(sapply(groups, FUN=function(X) pair[1] %in% X))
loc_2 <- which(sapply(groups, FUN=function(X) pair[2] %in% X))
# none in existing list
if (length(loc_1) == 0 && length(loc_2) == 0) {
groups[[length(groups)+1]] <- c(pair[1], pair[2])
} else if (length(loc_1) != 0 && length(loc_2) == 0) {
# let's add the second (not present) to the first's offset
groups[[loc_1]] <- c(groups[[loc_1]], pair[2])
} else if (length(loc_1) == 0 && length(loc_2) != 0) {
# let's add the first (not present) to the second's offset
groups[[loc_2]] <- c(groups[[loc_2]], pair[1])
}
# yes, we can have both present (else) - no need to do anything then.
return (groups)
}