First release ready version.

Cleaned up the code, provided additional data checks and instructions on how to fix errors. Refactored the code so it now uses "common.R" and "VSI.R". By doing this, all other "tools" by the WP8 team can be streamlined and made more user friendly.
2023-01-17 10:13:25 +01:00 · 2023-01-17 10:13:25 +01:00 · fc628cdf28
commit fc628cdf28
parent 7564dfb384
7 changed files with 209 additions and 59 deletions
--- a/README.md
+++ b/README.md
@ -1,9 +1,31 @@
 # Virtual Surrounding Impression analysis

-Version: 0.1b
+The tool for checking Virtual Surrounding Impression.  
+  
+It does the following two detections on the dataset:
+- interviews that were _most likely_ taken at the same location by the same interviewer (read: faking at home)
+- individual interviews that were _most likely_ conducted at multiple locations (multiple sittings or faking at multiple places)  
+  
+Please use the tool as an *additional* and not _sole_ indicator of undesired interviewer behaviour.  
+  
+Also, remember the tool is based on WIFI data.  
+Cases, conducted while the WIFI is turned off (or no wifi access points are visible) *will have the same VSI* and will appear to be taken at the same location.
+
+  
+Support: myESS or [mailto: may.dousak@fdv.uni-lj.si]

 ## Changelog

+*1.0 R 230117 - 17th Jan 2023*
+- cleaned up the code
+- created common files that can be shared with the other tools
+- script now checks the data and reports on the errors before processing
+- user friendly error messages and instructions provided when errors are detected
+
+
+*1.0b - 16th Jan 2023*
+R markdown version, knits PDF report
+
 *0.1b - 13th Jan 2023*  

 First working R version, translated from Python.  
--- a/components/VSI.R
+++ b/components/VSI.R
@ -1,22 +1,28 @@
-# checks the data, returns FALSE in case something is not OK
-check_data <- function (dataset) {
-  if (!is.null(dataset$VSI1) && !is.null(dataset$VSI2) && !is.null(dataset$VSI3) && !is.null(dataset$idno)) {
-    cat ("All the required variables are present, continuing with the analysis.  ")
-    
+# checks the main data, returns FALSE in case something is not OK
+# for VSI, we need at least: idno, cntry, VSI1, VSI2, VSI3
+
+check_main_file <- function (dataset) {
+
+  if (!is.null(dataset$VSI1) && !is.null(dataset$VSI2) && !is.null(dataset$VSI3) 
+      && !is.null(dataset$idno)  && !is.null(dataset$cntry)) {
+
    return (TRUE)
    
  } else {
    if (is.null(dataset$VSI1)) {
-      cat ("VSI1 variable is missing in the dataset.  ");
+      cat ("VSI1 variable is missing in the main dataset.  ");
    }
    if (is.null(dataset$VSI2)) {
-      cat ("VSI2 variable is missing in the dataset.  ");
+      cat ("VSI2 variable is missing in the main dataset.  ");
    }
    if (is.null(dataset$VSI3)) {
-      cat ("VSI3 variable is missing in the dataset.  ");
+      cat ("VSI3 variable is missing in the main dataset.  ");
    }
    if (is.null(dataset$idno)) {
-      cat ("idno variable (interview ID) is missing in the dataset.  ");
+      cat ("idno variable (interview ID) is missing in the main dataset.  ");
+    }
+    if (is.null(dataset$cntry)) {
+      cat ("cntry variable (country code) is missing in the main dataset.  ");
    }
    
    cat ("**Please check your dataset and try again**")
@ -24,6 +30,29 @@ check_data <- function (dataset) {
  }  
 }

+# checks the intervier data, returns FALSE in case something is not OK
+# here, we need at least: idno, cntry, intnum
+check_inwer_file <- function (dataset) {
+  
+  if (!is.null(dataset$cntry) && !is.null(dataset$idno) && !is.null(dataset$intnum)) {
+    
+    return (TRUE)
+    
+  } else {
+    if (is.null(dataset$intnum)) {
+      cat ("intnum (intervie**wer** ID variable is missing in the interviewer dataset.  ");
+    }
+    if (is.null(dataset$idno)) {
+      cat ("idno variable (interview ID) is missing in the interviewer dataset.  ");
+    }
+    if (is.null(dataset$cntry)) {
+      cat ("cntry variable (country code) is missing in the interviewer dataset.  ");
+    }    
+    cat ("**Please check your dataset and try again**")
+    return (FALSE)
+  }  
+}
+


 # Extracts access points fingerprints from the data (5 APs from 1 VSI)
--- a/components/common.R
+++ b/components/common.R
@ -0,0 +1,91 @@
+# call this at the top of the Rmd
+common_defs <- function () {
+  knitr::opts_chunk$set(echo = TRUE, results = "hide", message = TRUE, dev = "cairo_pdf", warning = TRUE)
+  knitr::opts_chunk$set(fig.pos = 'H')
+  
+  options(knitr.table.format = "latex", knitr.kable.NA = "")
+  
+  Sys.setlocale("LC_ALL","English")
+}
+
+# checks if file names are provided and file paths are valid.
+# returns TRUE if everything's OK or FALSE if not.
+
+check_files <- function (main, inwer) {
+  
+  pass = TRUE
+  
+  if (is.null(main) || main == "") {
+    
+    pass = FALSE
+    cat ("**Main data file not provided. Please profile main data file path as 'mainFile' parameter to the script!**  \n")
+  }
+  
+  if (is.null(inwer) || inwer == "") {
+    
+    pass = FALSE
+    cat ("**Interviewer data file not provided. Please profile interviewer data file path as 'intFile' parameter to the script!**  \n")
+  }
+  
+  
+  if (!file.exists(main)) {
+    
+    pass= FALSE
+    cat (paste("**I can't open the main data file '", main, "'.**  \n  
+      Did you provide a valid path to the file?  \n  
+      Please make sure that:  
+       - it is stored on a local (not network) drive
+       - file name or file path does not contain any non-english characters
+       - file name and file path only contains characters (a-z), numbers (0-1), dash (-) or underline (_)"))
+  }
+  
+  if (!file.exists(inwer)) {
+    cat (paste("**I can't open the interviewer data file '", inwer, "'.**  \n  
+      Did you provide a valid path to the file?  \n  
+      Please make sure that:  
+       - it is stored on a local (not network) drive
+       - file name or file path does not contain any non-english characters
+       - file name and file path only contains characters (a-z), numbers (0-1), dash (-) or underline (_)"))
+  }
+  
+  return (pass)
+}
+
+# loads the files and returns merged data
+# also checks if the needed variables are present
+# (check_data must be provided in additional R file, as it differs between tests)
+
+load_files <- function (main, inwer) {
+  
+  pass = TRUE
+  
+  # main file
+  dataset <- read.csv2(main, dec=".", stringsAsFactors=F)
+  # exit if data is not OK
+  
+  if (check_main_file(dataset) == FALSE) {
+    pass = FALSE
+  }  
+  
+  #interviewer file
+  Inwer <- read.csv2(inwer, dec=".", stringsAsFactors=F)
+
+  if (check_inwer_file(Inwer) == FALSE) {
+    pass = FALSE
+  }  
+  
+  # add country and interviewer ID to the main data frame; merge by idno and cntry
+  # only merge when files are OK
+  if (pass == TRUE) {
+    
+    cat ("All the required variables are present, continuing with the analysis.  \n")
+    
+    Inwer_ID <- dplyr::select(Inwer, "intnum", "idno", "cntry")
+    dataset <- dplyr::left_join(dataset, Inwer_ID, by = c("idno", "cntry"))
+    
+    return (dataset)
+  }
+  else {
+    return (FALSE)
+  }
+}
--- a/components/template.R
+++ b/components/template.R
@ -0,0 +1,24 @@
+ESSred <- rgb(.91, .20, .32)
+ESSgreen <- rgb(.14, .62, .51)
+ESSblue <- rgb(0, .25, .48)
+
+# now some adjacent and square colors (colortools has been removed from CRAN)
+ESS_colors_extra <- c(rgb(.44,.2,.91),rgb(.2,.91,.79),rgb(.68,.91,.2),rgb(.91,.2,.68),rgb(.91,.44,.2))
+
+ESSColors <- c(ESSred, ESS_colors_extra, ESSgreen, ESSblue)
+
+themeESS <- theme_tufte(base_size = 9, base_family = "Calibri") +
+  theme(axis.title = element_text(size = 9, face = "plain"),
+        axis.text = element_text(size = 9),
+        axis.line.x = element_line(),
+        plot.title = element_blank(),
+        legend.title = element_blank(),
+        legend.text = element_text(size = 9),
+        strip.text = element_text(size = 9, face = "bold"),
+        legend.position = "none",
+        legend.direction = "horizontal",
+        legend.box = "vertical",
+        legend.spacing = unit(0, "line"),
+        legend.key.size = unit(.75, "line"))
+linebreak <- "\\hspace{\\textwidth}"
+
--- a/demo_data/.~lock.inwer_file.csv#
+++ b/demo_data/.~lock.inwer_file.csv#
@ -1 +0,0 @@
-May Doušak,may,FDV,13.01.2023 14:25,file:///home/may/.config/libreoffice/4;
--- a/demo_data/inwer_file.csv
+++ b/demo_data/inwer_file.csv
@ -26,3 +26,16 @@
 25;"SI";5
 26;"SI";6
 27;"SI";6
+28;"SI";7
+29;"SI";7
+30;"SI";7
+31;"SI";7
+32;"SI";7
+33;"SI";7
+34;"SI";7
+35;"SI";7
+36;"SI";8
+37;"SI";8
+38;"SI";8
+39;"SI";8
+40;"SI";8
--- a/vsi.Rmd
+++ b/vsi.Rmd
@ -21,27 +21,15 @@ graphics: yes
 csl: ./components/apa.csl
 link-citations: yes
 params:
-  mainFile: ""
-  intFile: ""
-  version: "1.0 beta"
-  
+  mainFile: "demo_data/main.csv"
+  intFile: "demo_data/inwer.csv"
+  version: "1.0.230117"

 ---


 ```{r setup, include = FALSE, error=TRUE}
 library(knitr)
-#library(kableExtra)
-
-knitr::opts_chunk$set(echo = TRUE, results = "hide", message = TRUE, dev = "cairo_pdf", warning = TRUE)
-knitr::opts_chunk$set(fig.pos = 'H')
-
-options(knitr.table.format = "latex", knitr.kable.NA = "")
-
-Sys.setlocale("LC_ALL","English")
-```
-
-```{r setup2, include = FALSE,, error=T, warning=T, message=T}
 library(here) 
 library(foreign)
 library(dplyr)
@ -49,34 +37,13 @@ library(ggplot2)
 library(lubridate)
 library(ggthemes) # theme_tufte works

+source('components/VSI.R')
+source('components/common.R')

-```
+common_defs();

+source('components/template.R')

-```{r theme, include=FALSE, error=TRUE}
-ESSred <- rgb(.91, .20, .32)
-ESSgreen <- rgb(.14, .62, .51)
-ESSblue <- rgb(0, .25, .48)
-
-# now some adjacent and square colors (colortools has been removed from CRAN)
-ESS_colors_extra <- c(rgb(.44,.2,.91),rgb(.2,.91,.79),rgb(.68,.91,.2),rgb(.91,.2,.68),rgb(.91,.44,.2))
-
-ESSColors <- c(ESSred, ESS_colors_extra, ESSgreen, ESSblue)
-
-themeESS <- theme_tufte(base_size = 9, base_family = "Calibri") +
-  theme(axis.title = element_text(size = 9, face = "plain"),
-        axis.text = element_text(size = 9),
-        axis.line.x = element_line(),
-        plot.title = element_blank(),
-        legend.title = element_blank(),
-        legend.text = element_text(size = 9),
-        strip.text = element_text(size = 9, face = "bold"),
-        legend.position = "none",
-        legend.direction = "horizontal",
-        legend.box = "vertical",
-        legend.spacing = unit(0, "line"),
-        legend.key.size = unit(.75, "line"))
-linebreak <- "\\hspace{\\textwidth}"
 ```

 \newpage
@ -100,20 +67,25 @@ This tools checks the Virtual Surrounding Impression data and reports on the cas

 The results should always be combined with other methods of detecting undesired interviewer behaviour.

+## Support {-}
+
+Support is available on MyESS forums or via e-mail at may.dousak@fdv.uni-lj.si .
+  
+
 **Data check**

 ```{r datacheck, echo=FALSE, results='asis', error=TRUE}

-dataset <- read.csv2("demo_data/main.csv", dec=".", stringsAsFactors=F)
-# add interviewer file, too!
+# check files
+if (check_files(params$mainFile, params$intFile) == FALSE) {
+  cat ("  \n**Please fix the above errors and re-run the script.**")
+  knitr::knit_exit()
+}


-# include VSI functions
-
-source('components/VSI.R')
-
-# exit if data is not OK
-if (check_data(dataset) == FALSE) {
+dataset = load_files(params$mainFile, params$intFile);
+if (!(length(dataset) > 0 && dataset != FALSE)) {
+  cat ("  \n**Please fix the above errors and re-run the script.**")
  knitr::knit_exit()
 }
				`@ -1 +0,0 @@`
				`May Doušak,may,FDV,13.01.2023 14:25,file:///home/may/.config/libreoffice/4;`