2023-01-13 11:53:32 +01:00

92 lines
2.3 KiB
R

library(foreign)
library(dplyr)
# Interviewer-Questionnaire with Inwer ID (intnum):
dataset <- read.csv2("vsi.csv", dec=".", stringsAsFactors=F)
extract_ap <- function (source) {
return (c(substr(source,0,7),
substr(source,16,23),
substr(source,32,39),
substr(source,48,55),
substr(source,64,71)));
}
match_within <- function (t1,t2,t3) {
matched_within = 0
for (a in seq(1,5)) {
if (t1[a] %in% t2 && t1[a] %in% t3) {
matched_within=matched_within + 1
}
}
if (matched_within >= 3) {
return (TRUE)
}
else {
return (FALSE)
}
}
match_outside <- function (a_t1, a_t2, a_t3, b_t1, b_t2, b_t3) {
matched = FALSE
# only take cases that match !WITHIN!
if (match_within(a_t1, a_t2, a_t3) == FALSE || match_within(b_t1, b_t2, b_t3) == FALSE) {
return (FALSE) # no match, as we can't even do it within
}
matches=0
# we iterate through all 5 APs. If at least three are repeated accross
# all time points in both surveys (a_t1... b_t3) we have a match
for (a in seq(1,5)) {
if (a_t1[a] %in% b_t1 && a_t1[a] %in% b_t2 && a_t1[a] %in% b_t3 &&
a_t2[a] %in% b_t1 && a_t2[a] %in% b_t2 && a_t2[a] %in% b_t3 &&
a_t3[a] %in% b_t1 && a_t3[a] %in% b_t2 && a_t3[a] %in% b_t3)
{
matches=matches+1
}
}
if (matches >= 3) {
return (TRUE)
}
else {
return (FALSE)
}
}
# non-optimal, case by case
for(current_row in 1:nrow(dataset)) {
vsi_t1_exploded <- extract_ap(dataset[current_row,]$VSI1);
vsi_t2_exploded <- extract_ap(dataset[current_row,]$VSI2);
vsi_t3_exploded <- extract_ap(dataset[current_row,]$VSI3);
# checking within
if (!match_within(vsi_t1_exploded,vsi_t2_exploded,vsi_t3_exploded)) {
print (paste("CASE ", current_row, " SEEM NOT TO BE TAKEN AT A SINGLE LOCATION!"))
}
# compare against all other cases
for (compare_against in current_row+1:nrow(dataset)) {
t1_oth <- extract_ap(dataset[compare_against,]$VSI1);
t2_oth <- extract_ap(dataset[compare_against,]$VSI2);
t3_oth <- extract_ap(dataset[compare_against,]$VSI3);
if (match_outside (vsi_t1_exploded, vsi_t2_exploded, vsi_t3_exploded, t1_oth, t2_oth, t3_oth)) {
print (paste ("WARNING, CASE ", current_row, " SEEMS TO BE TAKEN AT SAME LOCATION AS ", compare_against));
}
}
}