From fc628cdf283fbacfae755959fb958e9fada3dced Mon Sep 17 00:00:00 2001 From: MAY Date: Tue, 17 Jan 2023 10:13:25 +0100 Subject: [PATCH] First release ready version. Cleaned up the code, provided additional data checks and instructions on how to fix errors. Refactored the code so it now uses "common.R" and "VSI.R". By doing this, all other "tools" by the WP8 team can be streamlined and made more user friendly. --- README.md | 24 ++++++- components/VSI.R | 47 ++++++++++--- components/common.R | 91 +++++++++++++++++++++++++ components/template.R | 24 +++++++ demo_data/.~lock.inwer_file.csv# | 1 - demo_data/{inwer_file.csv => inwer.csv} | 13 ++++ vsi.Rmd | 68 ++++++------------ 7 files changed, 209 insertions(+), 59 deletions(-) create mode 100644 components/common.R create mode 100644 components/template.R delete mode 100644 demo_data/.~lock.inwer_file.csv# rename demo_data/{inwer_file.csv => inwer.csv} (68%) diff --git a/README.md b/README.md index 4ef9dd1..857797c 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,31 @@ # Virtual Surrounding Impression analysis -Version: 0.1b +The tool for checking Virtual Surrounding Impression. + +It does the following two detections on the dataset: +- interviews that were _most likely_ taken at the same location by the same interviewer (read: faking at home) +- individual interviews that were _most likely_ conducted at multiple locations (multiple sittings or faking at multiple places) + +Please use the tool as an *additional* and not _sole_ indicator of undesired interviewer behaviour. + +Also, remember the tool is based on WIFI data. +Cases, conducted while the WIFI is turned off (or no wifi access points are visible) *will have the same VSI* and will appear to be taken at the same location. + + +Support: myESS or [mailto: may.dousak@fdv.uni-lj.si] ## Changelog +*1.0 R 230117 - 17th Jan 2023* +- cleaned up the code +- created common files that can be shared with the other tools +- script now checks the data and reports on the errors before processing +- user friendly error messages and instructions provided when errors are detected + + +*1.0b - 16th Jan 2023* +R markdown version, knits PDF report + *0.1b - 13th Jan 2023* First working R version, translated from Python. diff --git a/components/VSI.R b/components/VSI.R index 59fd837..28a4f3e 100644 --- a/components/VSI.R +++ b/components/VSI.R @@ -1,22 +1,28 @@ -# checks the data, returns FALSE in case something is not OK -check_data <- function (dataset) { - if (!is.null(dataset$VSI1) && !is.null(dataset$VSI2) && !is.null(dataset$VSI3) && !is.null(dataset$idno)) { - cat ("All the required variables are present, continuing with the analysis. ") - +# checks the main data, returns FALSE in case something is not OK +# for VSI, we need at least: idno, cntry, VSI1, VSI2, VSI3 + +check_main_file <- function (dataset) { + + if (!is.null(dataset$VSI1) && !is.null(dataset$VSI2) && !is.null(dataset$VSI3) + && !is.null(dataset$idno) && !is.null(dataset$cntry)) { + return (TRUE) } else { if (is.null(dataset$VSI1)) { - cat ("VSI1 variable is missing in the dataset. "); + cat ("VSI1 variable is missing in the main dataset. "); } if (is.null(dataset$VSI2)) { - cat ("VSI2 variable is missing in the dataset. "); + cat ("VSI2 variable is missing in the main dataset. "); } if (is.null(dataset$VSI3)) { - cat ("VSI3 variable is missing in the dataset. "); + cat ("VSI3 variable is missing in the main dataset. "); } if (is.null(dataset$idno)) { - cat ("idno variable (interview ID) is missing in the dataset. "); + cat ("idno variable (interview ID) is missing in the main dataset. "); + } + if (is.null(dataset$cntry)) { + cat ("cntry variable (country code) is missing in the main dataset. "); } cat ("**Please check your dataset and try again**") @@ -24,6 +30,29 @@ check_data <- function (dataset) { } } +# checks the intervier data, returns FALSE in case something is not OK +# here, we need at least: idno, cntry, intnum +check_inwer_file <- function (dataset) { + + if (!is.null(dataset$cntry) && !is.null(dataset$idno) && !is.null(dataset$intnum)) { + + return (TRUE) + + } else { + if (is.null(dataset$intnum)) { + cat ("intnum (intervie**wer** ID variable is missing in the interviewer dataset. "); + } + if (is.null(dataset$idno)) { + cat ("idno variable (interview ID) is missing in the interviewer dataset. "); + } + if (is.null(dataset$cntry)) { + cat ("cntry variable (country code) is missing in the interviewer dataset. "); + } + cat ("**Please check your dataset and try again**") + return (FALSE) + } +} + # Extracts access points fingerprints from the data (5 APs from 1 VSI) diff --git a/components/common.R b/components/common.R new file mode 100644 index 0000000..bc2ddfa --- /dev/null +++ b/components/common.R @@ -0,0 +1,91 @@ +# call this at the top of the Rmd +common_defs <- function () { + knitr::opts_chunk$set(echo = TRUE, results = "hide", message = TRUE, dev = "cairo_pdf", warning = TRUE) + knitr::opts_chunk$set(fig.pos = 'H') + + options(knitr.table.format = "latex", knitr.kable.NA = "") + + Sys.setlocale("LC_ALL","English") +} + +# checks if file names are provided and file paths are valid. +# returns TRUE if everything's OK or FALSE if not. + +check_files <- function (main, inwer) { + + pass = TRUE + + if (is.null(main) || main == "") { + + pass = FALSE + cat ("**Main data file not provided. Please profile main data file path as 'mainFile' parameter to the script!** \n") + } + + if (is.null(inwer) || inwer == "") { + + pass = FALSE + cat ("**Interviewer data file not provided. Please profile interviewer data file path as 'intFile' parameter to the script!** \n") + } + + + if (!file.exists(main)) { + + pass= FALSE + cat (paste("**I can't open the main data file '", main, "'.** \n + Did you provide a valid path to the file? \n + Please make sure that: + - it is stored on a local (not network) drive + - file name or file path does not contain any non-english characters + - file name and file path only contains characters (a-z), numbers (0-1), dash (-) or underline (_)")) + } + + if (!file.exists(inwer)) { + cat (paste("**I can't open the interviewer data file '", inwer, "'.** \n + Did you provide a valid path to the file? \n + Please make sure that: + - it is stored on a local (not network) drive + - file name or file path does not contain any non-english characters + - file name and file path only contains characters (a-z), numbers (0-1), dash (-) or underline (_)")) + } + + return (pass) +} + +# loads the files and returns merged data +# also checks if the needed variables are present +# (check_data must be provided in additional R file, as it differs between tests) + +load_files <- function (main, inwer) { + + pass = TRUE + + # main file + dataset <- read.csv2(main, dec=".", stringsAsFactors=F) + # exit if data is not OK + + if (check_main_file(dataset) == FALSE) { + pass = FALSE + } + + #interviewer file + Inwer <- read.csv2(inwer, dec=".", stringsAsFactors=F) + + if (check_inwer_file(Inwer) == FALSE) { + pass = FALSE + } + + # add country and interviewer ID to the main data frame; merge by idno and cntry + # only merge when files are OK + if (pass == TRUE) { + + cat ("All the required variables are present, continuing with the analysis. \n") + + Inwer_ID <- dplyr::select(Inwer, "intnum", "idno", "cntry") + dataset <- dplyr::left_join(dataset, Inwer_ID, by = c("idno", "cntry")) + + return (dataset) + } + else { + return (FALSE) + } +} \ No newline at end of file diff --git a/components/template.R b/components/template.R new file mode 100644 index 0000000..9403f95 --- /dev/null +++ b/components/template.R @@ -0,0 +1,24 @@ +ESSred <- rgb(.91, .20, .32) +ESSgreen <- rgb(.14, .62, .51) +ESSblue <- rgb(0, .25, .48) + +# now some adjacent and square colors (colortools has been removed from CRAN) +ESS_colors_extra <- c(rgb(.44,.2,.91),rgb(.2,.91,.79),rgb(.68,.91,.2),rgb(.91,.2,.68),rgb(.91,.44,.2)) + +ESSColors <- c(ESSred, ESS_colors_extra, ESSgreen, ESSblue) + +themeESS <- theme_tufte(base_size = 9, base_family = "Calibri") + + theme(axis.title = element_text(size = 9, face = "plain"), + axis.text = element_text(size = 9), + axis.line.x = element_line(), + plot.title = element_blank(), + legend.title = element_blank(), + legend.text = element_text(size = 9), + strip.text = element_text(size = 9, face = "bold"), + legend.position = "none", + legend.direction = "horizontal", + legend.box = "vertical", + legend.spacing = unit(0, "line"), + legend.key.size = unit(.75, "line")) +linebreak <- "\\hspace{\\textwidth}" + diff --git a/demo_data/.~lock.inwer_file.csv# b/demo_data/.~lock.inwer_file.csv# deleted file mode 100644 index 4c3f373..0000000 --- a/demo_data/.~lock.inwer_file.csv# +++ /dev/null @@ -1 +0,0 @@ -May DouĊĦak,may,FDV,13.01.2023 14:25,file:///home/may/.config/libreoffice/4; \ No newline at end of file diff --git a/demo_data/inwer_file.csv b/demo_data/inwer.csv similarity index 68% rename from demo_data/inwer_file.csv rename to demo_data/inwer.csv index d4f4838..a39221b 100644 --- a/demo_data/inwer_file.csv +++ b/demo_data/inwer.csv @@ -26,3 +26,16 @@ 25;"SI";5 26;"SI";6 27;"SI";6 +28;"SI";7 +29;"SI";7 +30;"SI";7 +31;"SI";7 +32;"SI";7 +33;"SI";7 +34;"SI";7 +35;"SI";7 +36;"SI";8 +37;"SI";8 +38;"SI";8 +39;"SI";8 +40;"SI";8 diff --git a/vsi.Rmd b/vsi.Rmd index 39942b9..39e9f1a 100644 --- a/vsi.Rmd +++ b/vsi.Rmd @@ -21,27 +21,15 @@ graphics: yes csl: ./components/apa.csl link-citations: yes params: - mainFile: "" - intFile: "" - version: "1.0 beta" - + mainFile: "demo_data/main.csv" + intFile: "demo_data/inwer.csv" + version: "1.0.230117" --- ```{r setup, include = FALSE, error=TRUE} library(knitr) -#library(kableExtra) - -knitr::opts_chunk$set(echo = TRUE, results = "hide", message = TRUE, dev = "cairo_pdf", warning = TRUE) -knitr::opts_chunk$set(fig.pos = 'H') - -options(knitr.table.format = "latex", knitr.kable.NA = "") - -Sys.setlocale("LC_ALL","English") -``` - -```{r setup2, include = FALSE,, error=T, warning=T, message=T} library(here) library(foreign) library(dplyr) @@ -49,34 +37,13 @@ library(ggplot2) library(lubridate) library(ggthemes) # theme_tufte works +source('components/VSI.R') +source('components/common.R') -``` +common_defs(); +source('components/template.R') -```{r theme, include=FALSE, error=TRUE} -ESSred <- rgb(.91, .20, .32) -ESSgreen <- rgb(.14, .62, .51) -ESSblue <- rgb(0, .25, .48) - -# now some adjacent and square colors (colortools has been removed from CRAN) -ESS_colors_extra <- c(rgb(.44,.2,.91),rgb(.2,.91,.79),rgb(.68,.91,.2),rgb(.91,.2,.68),rgb(.91,.44,.2)) - -ESSColors <- c(ESSred, ESS_colors_extra, ESSgreen, ESSblue) - -themeESS <- theme_tufte(base_size = 9, base_family = "Calibri") + - theme(axis.title = element_text(size = 9, face = "plain"), - axis.text = element_text(size = 9), - axis.line.x = element_line(), - plot.title = element_blank(), - legend.title = element_blank(), - legend.text = element_text(size = 9), - strip.text = element_text(size = 9, face = "bold"), - legend.position = "none", - legend.direction = "horizontal", - legend.box = "vertical", - legend.spacing = unit(0, "line"), - legend.key.size = unit(.75, "line")) -linebreak <- "\\hspace{\\textwidth}" ``` \newpage @@ -100,20 +67,25 @@ This tools checks the Virtual Surrounding Impression data and reports on the cas The results should always be combined with other methods of detecting undesired interviewer behaviour. +## Support {-} + +Support is available on MyESS forums or via e-mail at may.dousak@fdv.uni-lj.si . + + **Data check** ```{r datacheck, echo=FALSE, results='asis', error=TRUE} -dataset <- read.csv2("demo_data/main.csv", dec=".", stringsAsFactors=F) -# add interviewer file, too! +# check files +if (check_files(params$mainFile, params$intFile) == FALSE) { + cat (" \n**Please fix the above errors and re-run the script.**") + knitr::knit_exit() +} -# include VSI functions - -source('components/VSI.R') - -# exit if data is not OK -if (check_data(dataset) == FALSE) { +dataset = load_files(params$mainFile, params$intFile); +if (!(length(dataset) > 0 && dataset != FALSE)) { + cat (" \n**Please fix the above errors and re-run the script.**") knitr::knit_exit() }