library(foreign) library(dplyr) # Interviewer-Questionnaire with Inwer ID (intnum): dataset <- read.csv2("vsi.csv", dec=".", stringsAsFactors=F) extract_ap <- function (source) { return (c(substr(source,0,7), substr(source,16,23), substr(source,32,39), substr(source,48,55), substr(source,64,71))); } match_within <- function (t1,t2,t3) { matched_within = 0 for (a in seq(1,5)) { if (t1[a] %in% t2 && t1[a] %in% t3) { matched_within=matched_within + 1 } } if (matched_within >= 3) { return (TRUE) } else { return (FALSE) } } match_outside <- function (a_t1, a_t2, a_t3, b_t1, b_t2, b_t3) { matched = FALSE # only take cases that match !WITHIN! if (match_within(a_t1, a_t2, a_t3) == FALSE || match_within(b_t1, b_t2, b_t3) == FALSE) { return (FALSE) # no match, as we can't even do it within } matches=0 # we iterate through all 5 APs. If at least three are repeated accross # all time points in both surveys (a_t1... b_t3) we have a match for (a in seq(1,5)) { if (a_t1[a] %in% b_t1 && a_t1[a] %in% b_t2 && a_t1[a] %in% b_t3 && a_t2[a] %in% b_t1 && a_t2[a] %in% b_t2 && a_t2[a] %in% b_t3 && a_t3[a] %in% b_t1 && a_t3[a] %in% b_t2 && a_t3[a] %in% b_t3) { matches=matches+1 } } if (matches >= 3) { return (TRUE) } else { return (FALSE) } } # non-optimal, case by case for(current_row in 1:nrow(dataset)) { vsi_t1_exploded <- extract_ap(dataset[current_row,]$VSI1); vsi_t2_exploded <- extract_ap(dataset[current_row,]$VSI2); vsi_t3_exploded <- extract_ap(dataset[current_row,]$VSI3); # checking within if (!match_within(vsi_t1_exploded,vsi_t2_exploded,vsi_t3_exploded)) { print (paste("CASE ", current_row, " SEEM NOT TO BE TAKEN AT A SINGLE LOCATION!")) } # compare against all other cases for (compare_against in current_row+1:nrow(dataset)) { t1_oth <- extract_ap(dataset[compare_against,]$VSI1); t2_oth <- extract_ap(dataset[compare_against,]$VSI2); t3_oth <- extract_ap(dataset[compare_against,]$VSI3); if (match_outside (vsi_t1_exploded, vsi_t2_exploded, vsi_t3_exploded, t1_oth, t2_oth, t3_oth)) { print (paste ("WARNING, CASE ", current_row, " SEEMS TO BE TAKEN AT SAME LOCATION AS ", compare_against)); } } }