141 lines
3.7 KiB
R
141 lines
3.7 KiB
R
![]() |
# checks the data, returns FALSE in case something is not OK
|
||
|
check_data <- function (dataset) {
|
||
|
if (!is.null(dataset$VSI1) && !is.null(dataset$VSI2) && !is.null(dataset$VSI3) && !is.null(dataset$idno)) {
|
||
|
cat ("All the required variables are present, continuing with the analysis. ")
|
||
|
|
||
|
return (TRUE)
|
||
|
|
||
|
} else {
|
||
|
if (is.null(dataset$VSI1)) {
|
||
|
cat ("VSI1 variable is missing in the dataset. ");
|
||
|
}
|
||
|
if (is.null(dataset$VSI2)) {
|
||
|
cat ("VSI2 variable is missing in the dataset. ");
|
||
|
}
|
||
|
if (is.null(dataset$VSI3)) {
|
||
|
cat ("VSI3 variable is missing in the dataset. ");
|
||
|
}
|
||
|
if (is.null(dataset$idno)) {
|
||
|
cat ("idno variable (interview ID) is missing in the dataset. ");
|
||
|
}
|
||
|
|
||
|
cat ("**Please check your dataset and try again**")
|
||
|
return (FALSE)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
# Extracts access points fingerprints from the data (5 APs from 1 VSI)
|
||
|
extract_ap <- function (source) {
|
||
|
return (c(substr(source,0,7),
|
||
|
substr(source,16,23),
|
||
|
substr(source,32,39),
|
||
|
substr(source,48,55),
|
||
|
substr(source,64,71)));
|
||
|
}
|
||
|
|
||
|
# Checks whether at least 3 of 5 APs are present during an interview
|
||
|
match_within <- function (t1,t2,t3) {
|
||
|
|
||
|
matched_within = 0
|
||
|
|
||
|
for (a in seq(1,5)) {
|
||
|
|
||
|
if (t1[a] %in% t2 && t1[a] %in% t3) {
|
||
|
matched_within=matched_within + 1
|
||
|
}
|
||
|
}
|
||
|
if (matched_within >= 3) {
|
||
|
return (TRUE)
|
||
|
}
|
||
|
else {
|
||
|
return (FALSE)
|
||
|
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
# Checks whether any of other cases have 3 or more same APs (i.e. same location)
|
||
|
|
||
|
match_outside <- function (a_t1, a_t2, a_t3, b_t1, b_t2, b_t3) {
|
||
|
|
||
|
matched = FALSE
|
||
|
|
||
|
# only take cases that match !WITHIN!
|
||
|
if (match_within(a_t1, a_t2, a_t3) == FALSE || match_within(b_t1, b_t2, b_t3) == FALSE) {
|
||
|
return (FALSE) # no match, as we can't even do it within
|
||
|
}
|
||
|
|
||
|
|
||
|
matches=0
|
||
|
|
||
|
# we iterate through all 5 APs. If at least three are repeated across
|
||
|
# all time points in both surveys (a_t1... b_t3) we have a match
|
||
|
|
||
|
for (a in seq(1,5)) {
|
||
|
|
||
|
if (a_t1[a] %in% b_t1 && a_t1[a] %in% b_t2 && a_t1[a] %in% b_t3 &&
|
||
|
a_t2[a] %in% b_t1 && a_t2[a] %in% b_t2 && a_t2[a] %in% b_t3 &&
|
||
|
a_t3[a] %in% b_t1 && a_t3[a] %in% b_t2 && a_t3[a] %in% b_t3)
|
||
|
{
|
||
|
matches=matches+1
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (matches >= 3) {
|
||
|
return (TRUE)
|
||
|
}
|
||
|
else {
|
||
|
return (FALSE)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
# Checks existing pairings and add it to appropriate position
|
||
|
# example:
|
||
|
# in raw data, matches are found between cases 22<->23; 23<->38; 40<->41
|
||
|
|
||
|
# This can be summarized into:
|
||
|
# Matched cases: 22, 23, 38 ; as well as 40, 41
|
||
|
|
||
|
# This function takes a pair (e.g. c(23,38)) and checks if any of them is
|
||
|
# already in any of the matches.
|
||
|
|
||
|
# if it is, it adds new case to the appropriate list.
|
||
|
# if not, it adds a new offset with the new pair
|
||
|
|
||
|
group_pairs <- function (groups, pair) {
|
||
|
|
||
|
# if we receive an empty list, just populate it...
|
||
|
if (length(groups) == 0) {
|
||
|
groups <- list (c(pair[1], pair[2]))
|
||
|
return (groups)
|
||
|
}
|
||
|
|
||
|
# find both elements in the groupings
|
||
|
loc_1 <- which(sapply(groups, FUN=function(X) pair[1] %in% X))
|
||
|
loc_2 <- which(sapply(groups, FUN=function(X) pair[2] %in% X))
|
||
|
|
||
|
# none in existing list
|
||
|
if (length(loc_1) == 0 && length(loc_2) == 0) {
|
||
|
|
||
|
groups[[length(groups)+1]] <- c(pair[1], pair[2])
|
||
|
|
||
|
} else if (length(loc_1) != 0 && length(loc_2) == 0) {
|
||
|
|
||
|
# let's add the second (not present) to the first's offset
|
||
|
groups[[loc_1]] <- c(groups[[loc_1]], pair[2])
|
||
|
|
||
|
} else if (length(loc_1) == 0 && length(loc_2) != 0) {
|
||
|
|
||
|
# let's add the first (not present) to the second's offset
|
||
|
groups[[loc_2]] <- c(groups[[loc_2]], pair[1])
|
||
|
|
||
|
}
|
||
|
# yes, we can have both present (else) - no need to do anything then.
|
||
|
return (groups)
|
||
|
}
|