diff --git a/README.md b/README.md index 4ef9dd1..857797c 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,31 @@ # Virtual Surrounding Impression analysis -Version: 0.1b +The tool for checking Virtual Surrounding Impression. + +It does the following two detections on the dataset: +- interviews that were _most likely_ taken at the same location by the same interviewer (read: faking at home) +- individual interviews that were _most likely_ conducted at multiple locations (multiple sittings or faking at multiple places) + +Please use the tool as an *additional* and not _sole_ indicator of undesired interviewer behaviour. + +Also, remember the tool is based on WIFI data. +Cases, conducted while the WIFI is turned off (or no wifi access points are visible) *will have the same VSI* and will appear to be taken at the same location. + + +Support: myESS or [mailto: may.dousak@fdv.uni-lj.si] ## Changelog +*1.0 R 230117 - 17th Jan 2023* +- cleaned up the code +- created common files that can be shared with the other tools +- script now checks the data and reports on the errors before processing +- user friendly error messages and instructions provided when errors are detected + + +*1.0b - 16th Jan 2023* +R markdown version, knits PDF report + *0.1b - 13th Jan 2023* First working R version, translated from Python. diff --git a/components/VSI.R b/components/VSI.R index 59fd837..28a4f3e 100644 --- a/components/VSI.R +++ b/components/VSI.R @@ -1,22 +1,28 @@ -# checks the data, returns FALSE in case something is not OK -check_data <- function (dataset) { - if (!is.null(dataset$VSI1) && !is.null(dataset$VSI2) && !is.null(dataset$VSI3) && !is.null(dataset$idno)) { - cat ("All the required variables are present, continuing with the analysis. ") - +# checks the main data, returns FALSE in case something is not OK +# for VSI, we need at least: idno, cntry, VSI1, VSI2, VSI3 + +check_main_file <- function (dataset) { + + if (!is.null(dataset$VSI1) && !is.null(dataset$VSI2) && !is.null(dataset$VSI3) + && !is.null(dataset$idno) && !is.null(dataset$cntry)) { + return (TRUE) } else { if (is.null(dataset$VSI1)) { - cat ("VSI1 variable is missing in the dataset. "); + cat ("VSI1 variable is missing in the main dataset. "); } if (is.null(dataset$VSI2)) { - cat ("VSI2 variable is missing in the dataset. "); + cat ("VSI2 variable is missing in the main dataset. "); } if (is.null(dataset$VSI3)) { - cat ("VSI3 variable is missing in the dataset. "); + cat ("VSI3 variable is missing in the main dataset. "); } if (is.null(dataset$idno)) { - cat ("idno variable (interview ID) is missing in the dataset. "); + cat ("idno variable (interview ID) is missing in the main dataset. "); + } + if (is.null(dataset$cntry)) { + cat ("cntry variable (country code) is missing in the main dataset. "); } cat ("**Please check your dataset and try again**") @@ -24,6 +30,29 @@ check_data <- function (dataset) { } } +# checks the intervier data, returns FALSE in case something is not OK +# here, we need at least: idno, cntry, intnum +check_inwer_file <- function (dataset) { + + if (!is.null(dataset$cntry) && !is.null(dataset$idno) && !is.null(dataset$intnum)) { + + return (TRUE) + + } else { + if (is.null(dataset$intnum)) { + cat ("intnum (intervie**wer** ID variable is missing in the interviewer dataset. "); + } + if (is.null(dataset$idno)) { + cat ("idno variable (interview ID) is missing in the interviewer dataset. "); + } + if (is.null(dataset$cntry)) { + cat ("cntry variable (country code) is missing in the interviewer dataset. "); + } + cat ("**Please check your dataset and try again**") + return (FALSE) + } +} + # Extracts access points fingerprints from the data (5 APs from 1 VSI) diff --git a/components/common.R b/components/common.R new file mode 100644 index 0000000..bc2ddfa --- /dev/null +++ b/components/common.R @@ -0,0 +1,91 @@ +# call this at the top of the Rmd +common_defs <- function () { + knitr::opts_chunk$set(echo = TRUE, results = "hide", message = TRUE, dev = "cairo_pdf", warning = TRUE) + knitr::opts_chunk$set(fig.pos = 'H') + + options(knitr.table.format = "latex", knitr.kable.NA = "") + + Sys.setlocale("LC_ALL","English") +} + +# checks if file names are provided and file paths are valid. +# returns TRUE if everything's OK or FALSE if not. + +check_files <- function (main, inwer) { + + pass = TRUE + + if (is.null(main) || main == "") { + + pass = FALSE + cat ("**Main data file not provided. Please profile main data file path as 'mainFile' parameter to the script!** \n") + } + + if (is.null(inwer) || inwer == "") { + + pass = FALSE + cat ("**Interviewer data file not provided. Please profile interviewer data file path as 'intFile' parameter to the script!** \n") + } + + + if (!file.exists(main)) { + + pass= FALSE + cat (paste("**I can't open the main data file '", main, "'.** \n + Did you provide a valid path to the file? \n + Please make sure that: + - it is stored on a local (not network) drive + - file name or file path does not contain any non-english characters + - file name and file path only contains characters (a-z), numbers (0-1), dash (-) or underline (_)")) + } + + if (!file.exists(inwer)) { + cat (paste("**I can't open the interviewer data file '", inwer, "'.** \n + Did you provide a valid path to the file? \n + Please make sure that: + - it is stored on a local (not network) drive + - file name or file path does not contain any non-english characters + - file name and file path only contains characters (a-z), numbers (0-1), dash (-) or underline (_)")) + } + + return (pass) +} + +# loads the files and returns merged data +# also checks if the needed variables are present +# (check_data must be provided in additional R file, as it differs between tests) + +load_files <- function (main, inwer) { + + pass = TRUE + + # main file + dataset <- read.csv2(main, dec=".", stringsAsFactors=F) + # exit if data is not OK + + if (check_main_file(dataset) == FALSE) { + pass = FALSE + } + + #interviewer file + Inwer <- read.csv2(inwer, dec=".", stringsAsFactors=F) + + if (check_inwer_file(Inwer) == FALSE) { + pass = FALSE + } + + # add country and interviewer ID to the main data frame; merge by idno and cntry + # only merge when files are OK + if (pass == TRUE) { + + cat ("All the required variables are present, continuing with the analysis. \n") + + Inwer_ID <- dplyr::select(Inwer, "intnum", "idno", "cntry") + dataset <- dplyr::left_join(dataset, Inwer_ID, by = c("idno", "cntry")) + + return (dataset) + } + else { + return (FALSE) + } +} \ No newline at end of file diff --git a/components/template.R b/components/template.R new file mode 100644 index 0000000..9403f95 --- /dev/null +++ b/components/template.R @@ -0,0 +1,24 @@ +ESSred <- rgb(.91, .20, .32) +ESSgreen <- rgb(.14, .62, .51) +ESSblue <- rgb(0, .25, .48) + +# now some adjacent and square colors (colortools has been removed from CRAN) +ESS_colors_extra <- c(rgb(.44,.2,.91),rgb(.2,.91,.79),rgb(.68,.91,.2),rgb(.91,.2,.68),rgb(.91,.44,.2)) + +ESSColors <- c(ESSred, ESS_colors_extra, ESSgreen, ESSblue) + +themeESS <- theme_tufte(base_size = 9, base_family = "Calibri") + + theme(axis.title = element_text(size = 9, face = "plain"), + axis.text = element_text(size = 9), + axis.line.x = element_line(), + plot.title = element_blank(), + legend.title = element_blank(), + legend.text = element_text(size = 9), + strip.text = element_text(size = 9, face = "bold"), + legend.position = "none", + legend.direction = "horizontal", + legend.box = "vertical", + legend.spacing = unit(0, "line"), + legend.key.size = unit(.75, "line")) +linebreak <- "\\hspace{\\textwidth}" + diff --git a/demo_data/.~lock.inwer_file.csv# b/demo_data/.~lock.inwer_file.csv# deleted file mode 100644 index 4c3f373..0000000 --- a/demo_data/.~lock.inwer_file.csv# +++ /dev/null @@ -1 +0,0 @@ -May DouĊĦak,may,FDV,13.01.2023 14:25,file:///home/may/.config/libreoffice/4; \ No newline at end of file diff --git a/demo_data/inwer_file.csv b/demo_data/inwer.csv similarity index 68% rename from demo_data/inwer_file.csv rename to demo_data/inwer.csv index d4f4838..a39221b 100644 --- a/demo_data/inwer_file.csv +++ b/demo_data/inwer.csv @@ -26,3 +26,16 @@ 25;"SI";5 26;"SI";6 27;"SI";6 +28;"SI";7 +29;"SI";7 +30;"SI";7 +31;"SI";7 +32;"SI";7 +33;"SI";7 +34;"SI";7 +35;"SI";7 +36;"SI";8 +37;"SI";8 +38;"SI";8 +39;"SI";8 +40;"SI";8 diff --git a/vsi.Rmd b/vsi.Rmd index 39942b9..39e9f1a 100644 --- a/vsi.Rmd +++ b/vsi.Rmd @@ -21,27 +21,15 @@ graphics: yes csl: ./components/apa.csl link-citations: yes params: - mainFile: "" - intFile: "" - version: "1.0 beta" - + mainFile: "demo_data/main.csv" + intFile: "demo_data/inwer.csv" + version: "1.0.230117" --- ```{r setup, include = FALSE, error=TRUE} library(knitr) -#library(kableExtra) - -knitr::opts_chunk$set(echo = TRUE, results = "hide", message = TRUE, dev = "cairo_pdf", warning = TRUE) -knitr::opts_chunk$set(fig.pos = 'H') - -options(knitr.table.format = "latex", knitr.kable.NA = "") - -Sys.setlocale("LC_ALL","English") -``` - -```{r setup2, include = FALSE,, error=T, warning=T, message=T} library(here) library(foreign) library(dplyr) @@ -49,34 +37,13 @@ library(ggplot2) library(lubridate) library(ggthemes) # theme_tufte works +source('components/VSI.R') +source('components/common.R') -``` +common_defs(); +source('components/template.R') -```{r theme, include=FALSE, error=TRUE} -ESSred <- rgb(.91, .20, .32) -ESSgreen <- rgb(.14, .62, .51) -ESSblue <- rgb(0, .25, .48) - -# now some adjacent and square colors (colortools has been removed from CRAN) -ESS_colors_extra <- c(rgb(.44,.2,.91),rgb(.2,.91,.79),rgb(.68,.91,.2),rgb(.91,.2,.68),rgb(.91,.44,.2)) - -ESSColors <- c(ESSred, ESS_colors_extra, ESSgreen, ESSblue) - -themeESS <- theme_tufte(base_size = 9, base_family = "Calibri") + - theme(axis.title = element_text(size = 9, face = "plain"), - axis.text = element_text(size = 9), - axis.line.x = element_line(), - plot.title = element_blank(), - legend.title = element_blank(), - legend.text = element_text(size = 9), - strip.text = element_text(size = 9, face = "bold"), - legend.position = "none", - legend.direction = "horizontal", - legend.box = "vertical", - legend.spacing = unit(0, "line"), - legend.key.size = unit(.75, "line")) -linebreak <- "\\hspace{\\textwidth}" ``` \newpage @@ -100,20 +67,25 @@ This tools checks the Virtual Surrounding Impression data and reports on the cas The results should always be combined with other methods of detecting undesired interviewer behaviour. +## Support {-} + +Support is available on MyESS forums or via e-mail at may.dousak@fdv.uni-lj.si . + + **Data check** ```{r datacheck, echo=FALSE, results='asis', error=TRUE} -dataset <- read.csv2("demo_data/main.csv", dec=".", stringsAsFactors=F) -# add interviewer file, too! +# check files +if (check_files(params$mainFile, params$intFile) == FALSE) { + cat (" \n**Please fix the above errors and re-run the script.**") + knitr::knit_exit() +} -# include VSI functions - -source('components/VSI.R') - -# exit if data is not OK -if (check_data(dataset) == FALSE) { +dataset = load_files(params$mainFile, params$intFile); +if (!(length(dataset) > 0 && dataset != FALSE)) { + cat (" \n**Please fix the above errors and re-run the script.**") knitr::knit_exit() }