From 238531cdae0f23aaa55fe44dd71407344f3d0375 Mon Sep 17 00:00:00 2001 From: MAY Date: Fri, 13 Jan 2023 11:53:32 +0100 Subject: [PATCH] Adding V1 --- vsi.R | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 vsi.R diff --git a/vsi.R b/vsi.R new file mode 100644 index 0000000..f0e28eb --- /dev/null +++ b/vsi.R @@ -0,0 +1,91 @@ +library(foreign) +library(dplyr) + +# Interviewer-Questionnaire with Inwer ID (intnum): +dataset <- read.csv2("vsi.csv", dec=".", stringsAsFactors=F) + +extract_ap <- function (source) { + return (c(substr(source,0,7), + substr(source,16,23), + substr(source,32,39), + substr(source,48,55), + substr(source,64,71))); +} + +match_within <- function (t1,t2,t3) { + + matched_within = 0 + + for (a in seq(1,5)) { + + if (t1[a] %in% t2 && t1[a] %in% t3) { + matched_within=matched_within + 1 + } + } + if (matched_within >= 3) { + return (TRUE) + } + else { + return (FALSE) + + } + +} + +match_outside <- function (a_t1, a_t2, a_t3, b_t1, b_t2, b_t3) { + + matched = FALSE + + # only take cases that match !WITHIN! + if (match_within(a_t1, a_t2, a_t3) == FALSE || match_within(b_t1, b_t2, b_t3) == FALSE) { + return (FALSE) # no match, as we can't even do it within + } + + + matches=0 + + # we iterate through all 5 APs. If at least three are repeated accross + # all time points in both surveys (a_t1... b_t3) we have a match + + for (a in seq(1,5)) { + + if (a_t1[a] %in% b_t1 && a_t1[a] %in% b_t2 && a_t1[a] %in% b_t3 && + a_t2[a] %in% b_t1 && a_t2[a] %in% b_t2 && a_t2[a] %in% b_t3 && + a_t3[a] %in% b_t1 && a_t3[a] %in% b_t2 && a_t3[a] %in% b_t3) + { + matches=matches+1 + } + } + + if (matches >= 3) { + return (TRUE) + } + else { + return (FALSE) + } +} + +# non-optimal, case by case + +for(current_row in 1:nrow(dataset)) { + vsi_t1_exploded <- extract_ap(dataset[current_row,]$VSI1); + vsi_t2_exploded <- extract_ap(dataset[current_row,]$VSI2); + vsi_t3_exploded <- extract_ap(dataset[current_row,]$VSI3); + + # checking within + if (!match_within(vsi_t1_exploded,vsi_t2_exploded,vsi_t3_exploded)) { + print (paste("CASE ", current_row, " SEEM NOT TO BE TAKEN AT A SINGLE LOCATION!")) + } + + # compare against all other cases + for (compare_against in current_row+1:nrow(dataset)) { + + t1_oth <- extract_ap(dataset[compare_against,]$VSI1); + t2_oth <- extract_ap(dataset[compare_against,]$VSI2); + t3_oth <- extract_ap(dataset[compare_against,]$VSI3); + + if (match_outside (vsi_t1_exploded, vsi_t2_exploded, vsi_t3_exploded, t1_oth, t2_oth, t3_oth)) { + print (paste ("WARNING, CASE ", current_row, " SEEMS TO BE TAKEN AT SAME LOCATION AS ", compare_against)); + } + } +}