First release ready version.

Cleaned up the code, provided additional data checks and instructions on
how to fix errors.

Refactored the code so it now uses "common.R" and "VSI.R". By doing
this, all other "tools" by the WP8 team can be streamlined and made more
user friendly.
This commit is contained in:
May Doušak 2023-01-17 10:13:25 +01:00
parent 7564dfb384
commit fc628cdf28
7 changed files with 209 additions and 59 deletions

View File

@ -1,9 +1,31 @@
# Virtual Surrounding Impression analysis
Version: 0.1b
The tool for checking Virtual Surrounding Impression.
It does the following two detections on the dataset:
- interviews that were _most likely_ taken at the same location by the same interviewer (read: faking at home)
- individual interviews that were _most likely_ conducted at multiple locations (multiple sittings or faking at multiple places)
Please use the tool as an *additional* and not _sole_ indicator of undesired interviewer behaviour.
Also, remember the tool is based on WIFI data.
Cases, conducted while the WIFI is turned off (or no wifi access points are visible) *will have the same VSI* and will appear to be taken at the same location.
Support: myESS or [mailto: may.dousak@fdv.uni-lj.si]
## Changelog
*1.0 R 230117 - 17th Jan 2023*
- cleaned up the code
- created common files that can be shared with the other tools
- script now checks the data and reports on the errors before processing
- user friendly error messages and instructions provided when errors are detected
*1.0b - 16th Jan 2023*
R markdown version, knits PDF report
*0.1b - 13th Jan 2023*
First working R version, translated from Python.

View File

@ -1,22 +1,28 @@
# checks the data, returns FALSE in case something is not OK
check_data <- function (dataset) {
if (!is.null(dataset$VSI1) && !is.null(dataset$VSI2) && !is.null(dataset$VSI3) && !is.null(dataset$idno)) {
cat ("All the required variables are present, continuing with the analysis. ")
# checks the main data, returns FALSE in case something is not OK
# for VSI, we need at least: idno, cntry, VSI1, VSI2, VSI3
check_main_file <- function (dataset) {
if (!is.null(dataset$VSI1) && !is.null(dataset$VSI2) && !is.null(dataset$VSI3)
&& !is.null(dataset$idno) && !is.null(dataset$cntry)) {
return (TRUE)
} else {
if (is.null(dataset$VSI1)) {
cat ("VSI1 variable is missing in the dataset. ");
cat ("VSI1 variable is missing in the main dataset. ");
}
if (is.null(dataset$VSI2)) {
cat ("VSI2 variable is missing in the dataset. ");
cat ("VSI2 variable is missing in the main dataset. ");
}
if (is.null(dataset$VSI3)) {
cat ("VSI3 variable is missing in the dataset. ");
cat ("VSI3 variable is missing in the main dataset. ");
}
if (is.null(dataset$idno)) {
cat ("idno variable (interview ID) is missing in the dataset. ");
cat ("idno variable (interview ID) is missing in the main dataset. ");
}
if (is.null(dataset$cntry)) {
cat ("cntry variable (country code) is missing in the main dataset. ");
}
cat ("**Please check your dataset and try again**")
@ -24,6 +30,29 @@ check_data <- function (dataset) {
}
}
# checks the intervier data, returns FALSE in case something is not OK
# here, we need at least: idno, cntry, intnum
check_inwer_file <- function (dataset) {
if (!is.null(dataset$cntry) && !is.null(dataset$idno) && !is.null(dataset$intnum)) {
return (TRUE)
} else {
if (is.null(dataset$intnum)) {
cat ("intnum (intervie**wer** ID variable is missing in the interviewer dataset. ");
}
if (is.null(dataset$idno)) {
cat ("idno variable (interview ID) is missing in the interviewer dataset. ");
}
if (is.null(dataset$cntry)) {
cat ("cntry variable (country code) is missing in the interviewer dataset. ");
}
cat ("**Please check your dataset and try again**")
return (FALSE)
}
}
# Extracts access points fingerprints from the data (5 APs from 1 VSI)

91
components/common.R Normal file
View File

@ -0,0 +1,91 @@
# call this at the top of the Rmd
common_defs <- function () {
knitr::opts_chunk$set(echo = TRUE, results = "hide", message = TRUE, dev = "cairo_pdf", warning = TRUE)
knitr::opts_chunk$set(fig.pos = 'H')
options(knitr.table.format = "latex", knitr.kable.NA = "")
Sys.setlocale("LC_ALL","English")
}
# checks if file names are provided and file paths are valid.
# returns TRUE if everything's OK or FALSE if not.
check_files <- function (main, inwer) {
pass = TRUE
if (is.null(main) || main == "") {
pass = FALSE
cat ("**Main data file not provided. Please profile main data file path as 'mainFile' parameter to the script!** \n")
}
if (is.null(inwer) || inwer == "") {
pass = FALSE
cat ("**Interviewer data file not provided. Please profile interviewer data file path as 'intFile' parameter to the script!** \n")
}
if (!file.exists(main)) {
pass= FALSE
cat (paste("**I can't open the main data file '", main, "'.** \n
Did you provide a valid path to the file? \n
Please make sure that:
- it is stored on a local (not network) drive
- file name or file path does not contain any non-english characters
- file name and file path only contains characters (a-z), numbers (0-1), dash (-) or underline (_)"))
}
if (!file.exists(inwer)) {
cat (paste("**I can't open the interviewer data file '", inwer, "'.** \n
Did you provide a valid path to the file? \n
Please make sure that:
- it is stored on a local (not network) drive
- file name or file path does not contain any non-english characters
- file name and file path only contains characters (a-z), numbers (0-1), dash (-) or underline (_)"))
}
return (pass)
}
# loads the files and returns merged data
# also checks if the needed variables are present
# (check_data must be provided in additional R file, as it differs between tests)
load_files <- function (main, inwer) {
pass = TRUE
# main file
dataset <- read.csv2(main, dec=".", stringsAsFactors=F)
# exit if data is not OK
if (check_main_file(dataset) == FALSE) {
pass = FALSE
}
#interviewer file
Inwer <- read.csv2(inwer, dec=".", stringsAsFactors=F)
if (check_inwer_file(Inwer) == FALSE) {
pass = FALSE
}
# add country and interviewer ID to the main data frame; merge by idno and cntry
# only merge when files are OK
if (pass == TRUE) {
cat ("All the required variables are present, continuing with the analysis. \n")
Inwer_ID <- dplyr::select(Inwer, "intnum", "idno", "cntry")
dataset <- dplyr::left_join(dataset, Inwer_ID, by = c("idno", "cntry"))
return (dataset)
}
else {
return (FALSE)
}
}

24
components/template.R Normal file
View File

@ -0,0 +1,24 @@
ESSred <- rgb(.91, .20, .32)
ESSgreen <- rgb(.14, .62, .51)
ESSblue <- rgb(0, .25, .48)
# now some adjacent and square colors (colortools has been removed from CRAN)
ESS_colors_extra <- c(rgb(.44,.2,.91),rgb(.2,.91,.79),rgb(.68,.91,.2),rgb(.91,.2,.68),rgb(.91,.44,.2))
ESSColors <- c(ESSred, ESS_colors_extra, ESSgreen, ESSblue)
themeESS <- theme_tufte(base_size = 9, base_family = "Calibri") +
theme(axis.title = element_text(size = 9, face = "plain"),
axis.text = element_text(size = 9),
axis.line.x = element_line(),
plot.title = element_blank(),
legend.title = element_blank(),
legend.text = element_text(size = 9),
strip.text = element_text(size = 9, face = "bold"),
legend.position = "none",
legend.direction = "horizontal",
legend.box = "vertical",
legend.spacing = unit(0, "line"),
legend.key.size = unit(.75, "line"))
linebreak <- "\\hspace{\\textwidth}"

View File

@ -1 +0,0 @@
May Doušak,may,FDV,13.01.2023 14:25,file:///home/may/.config/libreoffice/4;

View File

@ -26,3 +26,16 @@
25;"SI";5
26;"SI";6
27;"SI";6
28;"SI";7
29;"SI";7
30;"SI";7
31;"SI";7
32;"SI";7
33;"SI";7
34;"SI";7
35;"SI";7
36;"SI";8
37;"SI";8
38;"SI";8
39;"SI";8
40;"SI";8
1 idno cntry intnum
26 25 SI 5
27 26 SI 6
28 27 SI 6
29 28 SI 7
30 29 SI 7
31 30 SI 7
32 31 SI 7
33 32 SI 7
34 33 SI 7
35 34 SI 7
36 35 SI 7
37 36 SI 8
38 37 SI 8
39 38 SI 8
40 39 SI 8
41 40 SI 8

68
vsi.Rmd
View File

@ -21,27 +21,15 @@ graphics: yes
csl: ./components/apa.csl
link-citations: yes
params:
mainFile: ""
intFile: ""
version: "1.0 beta"
mainFile: "demo_data/main.csv"
intFile: "demo_data/inwer.csv"
version: "1.0.230117"
---
```{r setup, include = FALSE, error=TRUE}
library(knitr)
#library(kableExtra)
knitr::opts_chunk$set(echo = TRUE, results = "hide", message = TRUE, dev = "cairo_pdf", warning = TRUE)
knitr::opts_chunk$set(fig.pos = 'H')
options(knitr.table.format = "latex", knitr.kable.NA = "")
Sys.setlocale("LC_ALL","English")
```
```{r setup2, include = FALSE,, error=T, warning=T, message=T}
library(here)
library(foreign)
library(dplyr)
@ -49,34 +37,13 @@ library(ggplot2)
library(lubridate)
library(ggthemes) # theme_tufte works
source('components/VSI.R')
source('components/common.R')
```
common_defs();
source('components/template.R')
```{r theme, include=FALSE, error=TRUE}
ESSred <- rgb(.91, .20, .32)
ESSgreen <- rgb(.14, .62, .51)
ESSblue <- rgb(0, .25, .48)
# now some adjacent and square colors (colortools has been removed from CRAN)
ESS_colors_extra <- c(rgb(.44,.2,.91),rgb(.2,.91,.79),rgb(.68,.91,.2),rgb(.91,.2,.68),rgb(.91,.44,.2))
ESSColors <- c(ESSred, ESS_colors_extra, ESSgreen, ESSblue)
themeESS <- theme_tufte(base_size = 9, base_family = "Calibri") +
theme(axis.title = element_text(size = 9, face = "plain"),
axis.text = element_text(size = 9),
axis.line.x = element_line(),
plot.title = element_blank(),
legend.title = element_blank(),
legend.text = element_text(size = 9),
strip.text = element_text(size = 9, face = "bold"),
legend.position = "none",
legend.direction = "horizontal",
legend.box = "vertical",
legend.spacing = unit(0, "line"),
legend.key.size = unit(.75, "line"))
linebreak <- "\\hspace{\\textwidth}"
```
\newpage
@ -100,20 +67,25 @@ This tools checks the Virtual Surrounding Impression data and reports on the cas
The results should always be combined with other methods of detecting undesired interviewer behaviour.
## Support {-}
Support is available on MyESS forums or via e-mail at may.dousak@fdv.uni-lj.si .
**Data check**
```{r datacheck, echo=FALSE, results='asis', error=TRUE}
dataset <- read.csv2("demo_data/main.csv", dec=".", stringsAsFactors=F)
# add interviewer file, too!
# check files
if (check_files(params$mainFile, params$intFile) == FALSE) {
cat (" \n**Please fix the above errors and re-run the script.**")
knitr::knit_exit()
}
# include VSI functions
source('components/VSI.R')
# exit if data is not OK
if (check_data(dataset) == FALSE) {
dataset = load_files(params$mainFile, params$intFile);
if (!(length(dataset) > 0 && dataset != FALSE)) {
cat (" \n**Please fix the above errors and re-run the script.**")
knitr::knit_exit()
}