mysummary <- function(x){ a<- c(min = min(x), max = max(x), mean = mean(x), sd = sd(x), Q1 = as.numeric(quantile(x, .25)), Q2 = median(x), Q3 = as.numeric(quantile(x, .75)), n = length(x)) a[1:7] <- round(a[1:7], 4) a } catcountries <- function(x, dev = "and"){ l <- length(x) if(l == 1){ x } else if(l == 2){ paste(x, collapse = paste("", dev, "")) } else{ paste(paste0(x[1:(l-1)], collapse = ", "), dev, x[l]) } } catsources <- function(x, dev = "and"){ l <- length(x) if(l == 1){ x } else if(l == 2){ x[2] <- gsub("Based on ", "", x[2]) x <- paste(x, collapse = paste("", dev, "")) } else{ x[1:(l-1)] <- lapply(x[1:(l-1)], FUN = function(x) gsub("[.]$", "", x)) x[2:l] <- gsub("Based on ", "", x[2:l]) x <- paste(paste0(x[1:(l-1)], collapse = ", "), dev, x[l]) } x <- gsub(paste("[.]", dev), paste("", dev), x) x } catitems <- function(x){ l <- nrow(x) x$itemlabel <- with(x, paste(toupper(x$item), paste0("'", x$label, "'"))) if(l == 1){ x$itemlabel } else if(l == 2){ paste(x$itemlabel, collapse = " and ") } else{ paste(paste0(x$itemlabel[1:(l-1)], collapse = ", "), "and", x$itemlabel[l]) } } catnumeric <- function(x, unit = "round"){ l <- length(x) if(l == 1){ paste(firstup(unit), x) } else if(l == 2){ paste(paste0(firstup(unit), "s"), paste(x, collapse = " and ")) } else{ paste(paste0(firstup(unit), "s"), paste(paste0(x[1:(l-1)], collapse = ", "), "and", x[l])) } } previousrounds <- function(roundsset, rounds, qualifier = "only"){ prev <- ifelse(length(roundsset) == 0, "in none of the previous rounds", ifelse(setequal(rounds, roundsset), "in all previous rounds", ifelse(setequal(head(rounds[order(-rounds)], length(roundsset)), roundsset), paste("since Round", min(roundsset)), ifelse(length(roundsset) <= 3, ifelse(!is.null(qualifier), paste(qualifier, "in", catnumeric(roundsset)), paste("in", catnumeric(roundsset))), ifelse(length(roundsset)/length(rounds) <= .5, paste("in", catnumeric(roundsset)), paste("in all previous rounds, except", catnumeric(setdiff(rounds, roundsset)))))))) prev } weekinmonth <- function(date){ timing <- ifelse(lubridate::day(date) <= 7, "the beginning of", ifelse(lubridate::day(date) <= 14, "the second week of", ifelse(lubridate::day(date) <= 21, "the third week of", "the end of"))) timing } datedescr <- function(date){ paste(weekinmonth(date), lubridate::month(date, label = T, abbr = F), lubridate::year(date)) } firstup <- function(x) { substr(x, 1, 1) <- toupper(substr(x, 1, 1)) x } firstdown <- function(x) { substr(x, 1, 1) <- tolower(substr(x, 1, 1)) x } fullstop <- function(x) { x <- paste0(x, ".") x } comma <- function(x) { x <- paste0(x, ",") x } brackets <- function(x) { x <- paste0("(", x, ")") x } durationapprox <- function(x, qualifier = "only"){ a <- ifelse(x == 1, ifelse(!is.null(qualifier), paste(qualifier, "1 day"), "1 day"), ifelse(x < 7, ifelse(!is.null(qualifier), paste(qualifier, x, "days"), paste(x, "days")), ifelse(round(x/7) == 1, "1 week", ifelse(x < 25, paste("about", round(x/7), "weeks"), ifelse(x/30 <= 1.33, "about 1 month", ifelse(x/30 < 1.66, "about 1 and a half month", ifelse((x/30) %% 1 < .33 | (x/30) %% 1 > .66, paste("about", round(x/30), "months"), paste("about", floor(x/30), "and a half months")))))))) a } sumrow <- function(dat){ func <- function(z) if (is.numeric(z)) sum(z) else '' sumrow <- as.data.frame(lapply(dat, func)) sumrow } propapprox <- function(x, onlycutoff = NA){ a <- ifelse(x < .01, "less than 1%", ifelse(is.na(onlycutoff) | (!is.na(onlycutoff) & x > onlycutoff), as.character(formattable::percent(x, digits = 0)), paste("only", as.character(formattable::percent(x, digits = 0))))) a } is_outlier <- function(x) { return(x < quantile(x, 0.25, na.rm = TRUE) - 1.5 * IQR(x, na.rm = TRUE) | x > quantile(x, 0.75, na.rm = TRUE) + 1.5 * IQR(x, na.rm = TRUE)) } percentmatchDF_intwer <- function(df, id, intwer){ # returns a dataframe with the percentage of shared answers for every observation combination in a three variable format # inputs: a dataset, the IDs of the interviews (id) and the interviewers (intwer) either in df$id format or previously extracted # it is useful to subset the data before to include only substantial answers and extract the IDs so they do not count in the share of common answers df <- t(df) cols <- ncol(df) rows <- nrow(df) id1 <- as.integer(id) pmatch <- matrix(nrow = cols, ncol = cols) for (c in 1:cols){ comp <- df == df[,c] pmatch[c,] <- colSums(comp, na.rm=T)/rows } pmatch[upper.tri(pmatch)] <- NA diag(pmatch) <- NA id2 <- as.integer(rep(id[1], cols)) match <- pmatch[,1] pmatchdf <- as.data.frame(cbind(id1, id2, match)) for(c in 2:cols){ id2 <- as.integer(rep(id[c], cols)) match <- pmatch[,c] pmatchdf_temp <- as.data.frame(cbind(id1, id2, match)) pmatchdf <- rbind(pmatchdf, pmatchdf_temp) } pmatchdf$intwer1 <- rep(intwer, each = cols) pmatchdf$intwer2 <- rep(intwer, cols) pmatchdf <- subset(pmatchdf, is.na(pmatchdf$match) == FALSE) return(pmatchdf) } #FUNCTION 4: SINGLE COUNTRY DATASET FUCTION CatPCA_scntry <- function(ess,varnames,keptvars,mnint){ #convert vars so can be used in dplyr dpvarn<-enquo(varnames) dpkept<-enquo(keptvars) #datapreparation, get all counts needed for later subsetting esub<-Main %>% select(!!dpkept, !!dpvarn) esub<-esub %>% group_by(intnum) %>% dplyr::mutate(n_int= n()) esub$nmis<-rowSums(is.na(esub[varnames])) esub[varnames]<-sapply(esub[varnames], function(x) as.numeric(as.character(x))) esub$sd<-rowSds(as.matrix(esub[varnames]), na.rm = TRUE) #calculate scores esub$scores<-NA esub<-as.data.frame(esub) esub$scores[complete.cases(esub[varnames])]<-as.vector(princals(esub[complete.cases(esub[,varnames]),varnames], ndim=1)$objectscores*-1) #calculate table for pcat pcatable<-esub%>%filter(n_int>mnint & !is.na(scores))%>%group_by(intnum) %>% summarise(m_intscore=mean(scores), sd_intscore= sd(scores),min_intscore=min(scores), max_intscore=max(scores)) n_int<-esub %>% group_by(intnum) %>% dplyr::summarise(n_int=n()) completetable<-full_join(pcatable, n_int, by= "intnum") return(as.data.frame(completetable)) }