MAY fc628cdf28 First release ready version.
Cleaned up the code, provided additional data checks and instructions on
how to fix errors.

Refactored the code so it now uses "common.R" and "VSI.R". By doing
this, all other "tools" by the WP8 team can be streamlined and made more
user friendly.
2023-01-17 10:13:25 +01:00

170 lines
4.6 KiB
R

# checks the main data, returns FALSE in case something is not OK
# for VSI, we need at least: idno, cntry, VSI1, VSI2, VSI3
check_main_file <- function (dataset) {
if (!is.null(dataset$VSI1) && !is.null(dataset$VSI2) && !is.null(dataset$VSI3)
&& !is.null(dataset$idno) && !is.null(dataset$cntry)) {
return (TRUE)
} else {
if (is.null(dataset$VSI1)) {
cat ("VSI1 variable is missing in the main dataset. ");
}
if (is.null(dataset$VSI2)) {
cat ("VSI2 variable is missing in the main dataset. ");
}
if (is.null(dataset$VSI3)) {
cat ("VSI3 variable is missing in the main dataset. ");
}
if (is.null(dataset$idno)) {
cat ("idno variable (interview ID) is missing in the main dataset. ");
}
if (is.null(dataset$cntry)) {
cat ("cntry variable (country code) is missing in the main dataset. ");
}
cat ("**Please check your dataset and try again**")
return (FALSE)
}
}
# checks the intervier data, returns FALSE in case something is not OK
# here, we need at least: idno, cntry, intnum
check_inwer_file <- function (dataset) {
if (!is.null(dataset$cntry) && !is.null(dataset$idno) && !is.null(dataset$intnum)) {
return (TRUE)
} else {
if (is.null(dataset$intnum)) {
cat ("intnum (intervie**wer** ID variable is missing in the interviewer dataset. ");
}
if (is.null(dataset$idno)) {
cat ("idno variable (interview ID) is missing in the interviewer dataset. ");
}
if (is.null(dataset$cntry)) {
cat ("cntry variable (country code) is missing in the interviewer dataset. ");
}
cat ("**Please check your dataset and try again**")
return (FALSE)
}
}
# Extracts access points fingerprints from the data (5 APs from 1 VSI)
extract_ap <- function (source) {
return (c(substr(source,0,7),
substr(source,16,23),
substr(source,32,39),
substr(source,48,55),
substr(source,64,71)));
}
# Checks whether at least 3 of 5 APs are present during an interview
match_within <- function (t1,t2,t3) {
matched_within = 0
for (a in seq(1,5)) {
if (t1[a] %in% t2 && t1[a] %in% t3) {
matched_within=matched_within + 1
}
}
if (matched_within >= 3) {
return (TRUE)
}
else {
return (FALSE)
}
}
# Checks whether any of other cases have 3 or more same APs (i.e. same location)
match_outside <- function (a_t1, a_t2, a_t3, b_t1, b_t2, b_t3) {
matched = FALSE
# only take cases that match !WITHIN!
if (match_within(a_t1, a_t2, a_t3) == FALSE || match_within(b_t1, b_t2, b_t3) == FALSE) {
return (FALSE) # no match, as we can't even do it within
}
matches=0
# we iterate through all 5 APs. If at least three are repeated across
# all time points in both surveys (a_t1... b_t3) we have a match
for (a in seq(1,5)) {
if (a_t1[a] %in% b_t1 && a_t1[a] %in% b_t2 && a_t1[a] %in% b_t3 &&
a_t2[a] %in% b_t1 && a_t2[a] %in% b_t2 && a_t2[a] %in% b_t3 &&
a_t3[a] %in% b_t1 && a_t3[a] %in% b_t2 && a_t3[a] %in% b_t3)
{
matches=matches+1
}
}
if (matches >= 3) {
return (TRUE)
}
else {
return (FALSE)
}
}
# Checks existing pairings and add it to appropriate position
# example:
# in raw data, matches are found between cases 22<->23; 23<->38; 40<->41
# This can be summarized into:
# Matched cases: 22, 23, 38 ; as well as 40, 41
# This function takes a pair (e.g. c(23,38)) and checks if any of them is
# already in any of the matches.
# if it is, it adds new case to the appropriate list.
# if not, it adds a new offset with the new pair
group_pairs <- function (groups, pair) {
# if we receive an empty list, just populate it...
if (length(groups) == 0) {
groups <- list (c(pair[1], pair[2]))
return (groups)
}
# find both elements in the groupings
loc_1 <- which(sapply(groups, FUN=function(X) pair[1] %in% X))
loc_2 <- which(sapply(groups, FUN=function(X) pair[2] %in% X))
# none in existing list
if (length(loc_1) == 0 && length(loc_2) == 0) {
groups[[length(groups)+1]] <- c(pair[1], pair[2])
} else if (length(loc_1) != 0 && length(loc_2) == 0) {
# let's add the second (not present) to the first's offset
groups[[loc_1]] <- c(groups[[loc_1]], pair[2])
} else if (length(loc_1) == 0 && length(loc_2) != 0) {
# let's add the first (not present) to the second's offset
groups[[loc_2]] <- c(groups[[loc_2]], pair[1])
}
# yes, we can have both present (else) - no need to do anything then.
return (groups)
}