72 lines
3.0 KiB
R
72 lines
3.0 KiB
R
gen.survey.str <- function(colnames.dsa, questions.file, items.file){
|
|
#import questions file
|
|
questions <- fread(questions.file, skip=1, header=F,
|
|
select=c(2, 5, 6, 8, 9, 10),
|
|
col.names=c("question.id", "variable", "tip", "size", "visible", "params"))
|
|
|
|
#create variable list from survey data file
|
|
#remove "recnum" and "_text" fields
|
|
var.data <- colnames.dsa[sapply(colnames.dsa, function(x){substr(x, nchar(x)-4, nchar(x))})!="_text"]
|
|
|
|
#create variable list from questions file
|
|
var.questions <- questions$variable
|
|
|
|
#generate data.table from var.data list
|
|
survey.str <- data.table(variable = var.data)
|
|
|
|
setkey(questions, "variable")
|
|
setkey(survey.str, "variable")
|
|
|
|
#if all var.data in var.questions, do the simple merge and return file
|
|
if(all(var.data %in% var.questions)){
|
|
survey.str <- questions[survey.str,]
|
|
return(survey.str)
|
|
}else{ #if not, import items file and do additional merge with it...
|
|
#import items file
|
|
items <- fread(items.file, skip=1, header=F,
|
|
select=c(2, 3, 4),
|
|
col.names=c("question.id", "item.id", "variable"))
|
|
|
|
setkey(items, "question.id")
|
|
setkey(questions, "question.id")
|
|
|
|
#bind variables from questions and items (for the later, only take instances with no match in the questions file...)
|
|
survey.str.qi <- rbindlist(list(questions[var.questions %in% var.data,],
|
|
items[questions[!(var.questions %in% var.data), -"variable", with=F], nomatch=0L]),
|
|
fill=T)
|
|
|
|
#merge questions+items with survey data...
|
|
setkey(survey.str.qi, "variable")
|
|
setkey(survey.str, "variable")
|
|
survey.str <- survey.str.qi[survey.str,]
|
|
|
|
#if all var.data is now matched, return the survey.str
|
|
if(!(any(is.na(survey.str)))){
|
|
return(survey.str)
|
|
}else{ #if not, do additional merging...
|
|
#create index of all NA instaces from survey.str...
|
|
index <- apply(cbind(survey.str[, is.na(tip)],
|
|
(sapply(survey.str[, variable], function(x){
|
|
substr(x, 1, regexpr("\\_[^\\_]*$", x)-1)
|
|
}) %in% survey.str.qi$variable)
|
|
),
|
|
1, all)
|
|
|
|
#... using regex to find matches among unmatched instances from survey.str.qi
|
|
add <- merge(survey.str[index, list(variable, substr(variable, 1, regexpr("\\_[^\\_]*$", variable)-1))],
|
|
survey.str.qi[!(variable %in% survey.str$variable),],
|
|
by.x="V2", by.y="variable", all.y=F)[, list(question.id, item.id, tip, visible, size, params)]
|
|
|
|
#update survey.str with new values
|
|
survey.str[index, c("question.id", "item.id", "tip", "visible", "size", "params") := as.list(add)]
|
|
|
|
#if there is no NAs left, return survey.str, else return msg
|
|
if(!(any(is.na(survey.str$tip)))){
|
|
return(survey.str)
|
|
}else{
|
|
return(paste("No match found for: ", survey.str[is.na(tip), variable]))
|
|
}
|
|
}
|
|
}
|
|
}
|