759 lines
25 KiB
R
759 lines
25 KiB
R
![]() |
# Created by Miha 20.2.2020
|
||
|
|
||
|
#------------------------------------ CUSTOM FUNCTIONS ----------------------------------#
|
||
|
# Description #
|
||
|
# We create custom functions which will enable us to create
|
||
|
# frequency tables (flextable), ggplots and average tables (flextable)
|
||
|
# And also modify this table (color, width, height)
|
||
|
|
||
|
#-------------------- PREPARE DATA FOR FREQUENCY TABLE ---------------------#
|
||
|
# Function which will create frequency and proportion tables
|
||
|
# for single dimension of quality (dimenzija kakovosti)
|
||
|
# and therefore prepare data for flextable input
|
||
|
# The pripose of frequency tables (generated from Q2 variable)
|
||
|
# is the for comparison of each UE to all UE
|
||
|
# FUnction is intended ONLY for the first chapter
|
||
|
#' of @UE_single_report!!!!!
|
||
|
#' See @chapter @I. @Ključni @podatki
|
||
|
#' @param dfS data frame for single UE (upravna enota)
|
||
|
#' @param dfA data frame for all UE
|
||
|
#' @param item columns for which we wish to prepare
|
||
|
#' @param likert if TRUE, we gave grid questions
|
||
|
#' with likert scale on scale 1-5. Therefore we
|
||
|
#' onnly calculate proportion of values from 3-5
|
||
|
#' (% delež strinjanja)
|
||
|
#' @return A data.frame.
|
||
|
#' @author MM (mat.miha@@gmail.com)
|
||
|
prepFreq <- function (dfS, dfA, item, likert = FALSE) {
|
||
|
if(!missing(dfS)) {
|
||
|
# Our client would like to have tables with
|
||
|
# the proportion of values/asnwers
|
||
|
# larger than 2, i.e.,
|
||
|
# means:
|
||
|
## 3 - se strinja
|
||
|
## 4 - se zelo strinja
|
||
|
## 5 - se popolnoma strinja
|
||
|
# Therefore we will recode
|
||
|
# values 1-2 in into 1 and
|
||
|
# 3-5 into 2, in order to
|
||
|
# get the right proportion
|
||
|
|
||
|
# PREPARE DATA FOR TABLES #
|
||
|
#=========================#
|
||
|
# Check if Frequency table will
|
||
|
# consist of single or
|
||
|
# multiple variable
|
||
|
# like in case of Q2
|
||
|
# i.e. first chapter
|
||
|
# in report
|
||
|
# Rbind data frame columns for
|
||
|
# frequency table purposes
|
||
|
df1 <- subset(dfS, select = item)
|
||
|
if (ncol(df1) > 1 | likert == TRUE) {
|
||
|
# Data for single UE -----------
|
||
|
# concatenates multiple vectors into a single vector
|
||
|
# Or cbinds two columns into single
|
||
|
# for frequency table purposes
|
||
|
df1 <- stack(df1)[1]
|
||
|
# In order to prevent errors we need to check
|
||
|
# if some value on scale from 1-5 is missing
|
||
|
# IF it is we need to generate it and
|
||
|
# label it as 0
|
||
|
l.scale <- as.character(rep(1:5))
|
||
|
# Find possible difference - missing values
|
||
|
dif <- setdiff(l.scale, rownames(df1))
|
||
|
# Check if tehre was any missing
|
||
|
if (length(dif) > 0) {
|
||
|
df1 <- rbind(df1, dif)
|
||
|
# Missing categories should have 0
|
||
|
df1[rownames(df1) %in% dif, "values"] <- NA
|
||
|
}
|
||
|
# Recode data
|
||
|
df1 <- memisc::recode(df1$values,
|
||
|
"1:2=1;2:5=2")
|
||
|
|
||
|
# Data for ALL UE --------------
|
||
|
df2 <- subset(dfA, select = item)
|
||
|
# Rbind data frame columns for
|
||
|
# frequency table purposes
|
||
|
df2 <- stack(df2)[1]
|
||
|
# Recode data
|
||
|
df2 <- memisc::recode(df2$values,
|
||
|
"1:2=1;2:5=2")
|
||
|
|
||
|
} else {
|
||
|
# If we have single variable
|
||
|
# for frequency table then
|
||
|
# Rbind data frame columns for
|
||
|
# frequency table purposes
|
||
|
# Data for single UE -----------
|
||
|
df1 <- subset(dfS, select = item)
|
||
|
df1 <- stack(df1)[, 1]
|
||
|
# Data for ALL UE --------------
|
||
|
df2 <- subset(dfA, select = item)
|
||
|
# Rbind data frame columns for
|
||
|
# frequency table purposes
|
||
|
df2 <- stack(df2)[, 1]
|
||
|
|
||
|
}
|
||
|
|
||
|
# FREQUENCY TABLES #
|
||
|
#==================#
|
||
|
#' Using @janitor and
|
||
|
#' @tabyl
|
||
|
# Freq table for single UE
|
||
|
# It frequency table consist
|
||
|
# of multiple columns the df1
|
||
|
# will be data frame otherwise
|
||
|
# a vector.
|
||
|
if (ncol(subset(dfS, select = item)) > 1 | likert == TRUE) {
|
||
|
#' @IMPORTANT!!! zanima nas samo druga vrstica
|
||
|
#' Torej podatki za delež strinjanja
|
||
|
#' ker smo "2" rekodirali kot strinjanje
|
||
|
#' in 1 kot nestrinjanje
|
||
|
# Pogledao še, če imamo kakšno manjkajolo vrednost oz NA
|
||
|
# V tem primeru moramo vzeti prvo vrstico
|
||
|
df1.freq <- tabyl(df1, show_na = FALSE)
|
||
|
df1.freq <- df1.freq %>% filter(df1 == 2)
|
||
|
# Freq table for ALL UE
|
||
|
df2.freq <- tabyl(df2, show_na = FALSE)[2,]
|
||
|
if(all(is.na(df2.freq))) {
|
||
|
df2.freq <- tabyl(df2, show_na = FALSE)
|
||
|
}
|
||
|
} else {
|
||
|
# Druge vrstice ni, ker imamo samo eno spremenljivko
|
||
|
# oz. postavko
|
||
|
df1.freq <- tabyl(df1, show_na = FALSE)
|
||
|
# Freq table for ALL UE
|
||
|
df2.freq <- tabyl(df2, show_na = FALSE)
|
||
|
}
|
||
|
# Tidy and finalize data
|
||
|
# Merge data by UE type column (single and all)
|
||
|
df.freq <-
|
||
|
merge(df1.freq,
|
||
|
df2.freq,
|
||
|
by.x = "df1",
|
||
|
by.y = "df2",
|
||
|
all = TRUE,
|
||
|
sort = FALSE)
|
||
|
# If there are missing values (NA)
|
||
|
# replace them with 0
|
||
|
# For frequency table
|
||
|
df.freq[is.na(df.freq)] <- 0
|
||
|
|
||
|
# Get % for single UE
|
||
|
df.freq$percent.x <-
|
||
|
formatC(as.numeric(df.freq$percent.x) * 100 ,
|
||
|
format = "f",
|
||
|
digits = 1)
|
||
|
# Get % for ALL UE
|
||
|
df.freq$percent.y <-
|
||
|
formatC(as.numeric(df.freq$percent.y) * 100 ,
|
||
|
format = "f",
|
||
|
digits = 1)
|
||
|
|
||
|
return(df.freq)
|
||
|
} else {
|
||
|
warning("Argument si missing. Please check which arguments
|
||
|
does the function require!")
|
||
|
}
|
||
|
|
||
|
}
|
||
|
#------------------ //PREPARE DATA FOR FREQUENCY TABLE// -------------------#
|
||
|
|
||
|
|
||
|
#---------------------------- WORD FLEXTABLE ------------------------------#
|
||
|
# Function which will create, color and modify
|
||
|
# flextable which will be exported to word document
|
||
|
# In report there are three type of tables.
|
||
|
# Table with two columns, three columns and four columns.
|
||
|
# Therefore function will be created so it will automatically
|
||
|
# detect the number of columns and add name and columns
|
||
|
#' @param df data frame with UE data (seperate and all)
|
||
|
#' @param footnote If flextable sohlda have footnote
|
||
|
#' @param col.width width of table columns (table size)
|
||
|
#' @param all IF TRUE we are doing table for all UE
|
||
|
#' SKUPNO POROČILO, therefore the extracolumn for comparison
|
||
|
#' of single against all UE is not needed
|
||
|
#' @return A flextable.
|
||
|
#' @author MM (mat.miha@@gmail.com)
|
||
|
wordTable <- function(df, footnote = FALSE, col.width, all = FALSE) {
|
||
|
# Create Flextable
|
||
|
tab <- flextable(df)
|
||
|
# Check whixh type of table we are doing -----------
|
||
|
if (ncol(df) == 5 & all == FALSE) {
|
||
|
# Table has five column which means
|
||
|
# we are doing table for "n" and "%"
|
||
|
# Rename table headers
|
||
|
tab <- set_header_labels(
|
||
|
tab,
|
||
|
stolpci = " ",
|
||
|
n.x = "UE",
|
||
|
percent.x = "UE",
|
||
|
n.y = "RS",
|
||
|
percent.y = "RS"
|
||
|
)
|
||
|
} else if (ncol(df) == 3 & all == FALSE) {
|
||
|
# Table has three column which means
|
||
|
# we are doing table mean
|
||
|
tab <- set_header_labels(tab,
|
||
|
stolpci = " ",
|
||
|
m.UE = "UE",
|
||
|
m.UE.all = "RS")
|
||
|
}
|
||
|
#---------------------------------
|
||
|
|
||
|
# MODIFY TABLE -------------------
|
||
|
# Add another row below header
|
||
|
# Again check the type of table
|
||
|
if (ncol(df) == 5 & all == FALSE) {
|
||
|
# with frequency and proportion
|
||
|
tab <- add_header_row(tab,
|
||
|
values = c("", "n", "%", "n", "%"),
|
||
|
top = FALSE)
|
||
|
|
||
|
} else if (ncol(df) == 3 & all == FALSE) {
|
||
|
tab <- add_header_row(tab,
|
||
|
values = c("", "Povp.", "Povp."),
|
||
|
top = FALSE)
|
||
|
|
||
|
}
|
||
|
|
||
|
# Add another row below header
|
||
|
# Again check the type of table
|
||
|
if (ncol(df) == 5 & all == FALSE) {
|
||
|
# Merge table header as Custumer wants
|
||
|
# In first row merge second and third cell
|
||
|
tab <- merge_at(tab,
|
||
|
i = 1,
|
||
|
j = 2:3,
|
||
|
part = "header")
|
||
|
# In first row merge fourth and fifth cell
|
||
|
tab <- merge_at(tab,
|
||
|
i = 1,
|
||
|
j = 4:5,
|
||
|
part = "header")
|
||
|
}
|
||
|
# Apply theme box to a flextable: borders around table
|
||
|
tab <- theme_box(tab)
|
||
|
# Centre Table values
|
||
|
tab <- align_text_col(tab, align = "center")
|
||
|
# And align table header to center
|
||
|
tab <- align_nottext_col(tab, align = "center", header = TRUE)
|
||
|
# But keep row names or row labels justified left
|
||
|
tab <- align(
|
||
|
tab,
|
||
|
i = 1:nrow(df),
|
||
|
j = 1,
|
||
|
align = "left",
|
||
|
part = "body"
|
||
|
)
|
||
|
# Check if table needs additional explanation
|
||
|
if (footnote == TRUE & ncol(df) == 5 & all == FALSE) {
|
||
|
tab <- tab %>% add_footer_lines(
|
||
|
paste0(
|
||
|
"% predstavlja delež strinjanja ",
|
||
|
"(3 - Niti se strinjam niti se ne strinjam,
|
||
|
4 - Se strinjam, 5 - Povsem se strinjam."
|
||
|
)
|
||
|
)
|
||
|
} else if (footnote == TRUE & ncol(df) == 3 & all == FALSE) {
|
||
|
tab <- tab %>% add_footer_lines(
|
||
|
paste0(
|
||
|
"Stolpca 'Povp.' predstavljata poprečno oceno vseh ",
|
||
|
"odgovorov na lestvici od 1- Sploh se ne strinjam ",
|
||
|
"do 5 - Povsem se strinjam."
|
||
|
)
|
||
|
)
|
||
|
|
||
|
} else if (all == TRUE & footnote != FALSE) {
|
||
|
# VELJA LE ZA TABELO V SKUPNEM POROČILU!!!!
|
||
|
# Nadalje preverimo ali je footnote za povprečja ali deleže
|
||
|
if (!any(grepl("%" , colnames(df)))) {
|
||
|
tab <- tab %>% add_footer_lines(
|
||
|
paste0(
|
||
|
"Stolpec 'Povp.' predstavlja poprečno oceno vseh ",
|
||
|
"odgovorov na lestvici od 1 - Sploh se ne strinjam ",
|
||
|
"do 5 - Povsem se strinjam."
|
||
|
)
|
||
|
)
|
||
|
} else {
|
||
|
tab <- tab %>% add_footer_lines(
|
||
|
paste0(
|
||
|
"% predstavlja delež strinjanja ",
|
||
|
"3 - Niti se strinjam niti se ne strinjam,
|
||
|
4 - Se strinjam, ",
|
||
|
"5 - Povsem se strinjam)."
|
||
|
)
|
||
|
)
|
||
|
}
|
||
|
}
|
||
|
# Modify table size, i.e., add width
|
||
|
tab <- width(tab, j = 1, width = col.width)
|
||
|
# Finally add background color for header
|
||
|
tab <- bg(tab, bg = "#81d4FA", part = "header")
|
||
|
# And Also backgorund color for the last row.
|
||
|
# BUT color last row only
|
||
|
# If the last row presents (has row) TOTAL (Skupaj")
|
||
|
if (colSums("Skupaj" == df)[[1]] > 0 ) {
|
||
|
tab <- bg(
|
||
|
tab,
|
||
|
i = nrow(df),
|
||
|
j = 1:ncol(df),
|
||
|
bg = "#81d4FA",
|
||
|
part = "body"
|
||
|
)
|
||
|
}
|
||
|
# Finally change the default fontsize Arial 11 to 10
|
||
|
tab <- fontsize(tab, size = 10, part = "all")
|
||
|
# Return flextable
|
||
|
return(tab)
|
||
|
}
|
||
|
#-------------------------- //WORD FLEXTABLE// ----------------------------#
|
||
|
|
||
|
|
||
|
|
||
|
#------------------------------- GGPLOT: BARPLOT --------------------------------#
|
||
|
# Function which will create vertical barplot for each field (sklop)
|
||
|
# Plot will display proportion or means for each UE compared to ALL UE
|
||
|
# according to values in TABLE 1, 2, ...
|
||
|
#' @param data data frame with UE data (seperate and all)
|
||
|
#' @param field numeric value indicating field (row number) for wich
|
||
|
#' we would like to creat a plot
|
||
|
#' @param xime Label of ggplot x-axis
|
||
|
#' @param yime Label of ggplot y-axis
|
||
|
#' @param col.width Dodging width, when different to the width of
|
||
|
#' the individual elements. This is useful when you want
|
||
|
#' to align narrow geoms with wider geoms
|
||
|
#' @param FieldALL Default is set to FALSE
|
||
|
#' If TRUE, the barplot will be created
|
||
|
#' for all six fields (sklopov) and compared to ALL UE
|
||
|
#' so in total 12 bars will be displayed in plot
|
||
|
#' @param stat Default value is "pct" which means
|
||
|
#' ggplot will plot percentages (0-100). If set to
|
||
|
#' "mena", plto will display mean of likert scale frm
|
||
|
#' 1-5
|
||
|
#' @return ggplot (list).
|
||
|
#' @author MM (mat.miha@@gmail.com)
|
||
|
graf <- function(data,
|
||
|
field,
|
||
|
xime,
|
||
|
yime,
|
||
|
col.width,
|
||
|
FieldALL = FALSE,
|
||
|
stat = "pct") {
|
||
|
# Prepare data frame for ggplot--------------------------------
|
||
|
# Each row represents unique field (sklop)
|
||
|
# End for each field we will create plot
|
||
|
# Check if we are doing for single or all fields
|
||
|
if(FieldALL == FALSE) {
|
||
|
data <- data[field,]
|
||
|
}
|
||
|
# For ploting purposes (plot numeric to x-axis)
|
||
|
# We need to change class from string to numeric
|
||
|
data[2:length(data)] <-
|
||
|
data[2:length(data)] %>% mutate_if(is.character, as.numeric)
|
||
|
# Now we need to reshape data: GGPLOT requirements for ploting
|
||
|
# more than one data for same field (sklop)
|
||
|
# Reshape data according to column stolpci
|
||
|
data <- melt(data, id.vars = 1)
|
||
|
# We need to change factor to character in order
|
||
|
# for us to be able to recode values in that column
|
||
|
data$variable <- as.character(as.factor(data$variable))
|
||
|
|
||
|
# Check if we plot percentages or mean
|
||
|
# as data preparation differs
|
||
|
if (stat == "pct") {
|
||
|
# We only need rows in which percentages are
|
||
|
# Due to the fact we will plot percentages
|
||
|
data <- data[grep("percent", data$variable),]
|
||
|
# Now we recode column "variable" as we want
|
||
|
# so label bars according to UE
|
||
|
data$variable <-
|
||
|
memisc::recode(data$variable, "'percent.x'='UE';'percent.y'='RS'")
|
||
|
} else {
|
||
|
# Now we recode column "variable" as we want
|
||
|
# so label bars according to UE
|
||
|
data$variable <-
|
||
|
memisc::recode(data$variable, "'m.UE'='UE';'m.UE.all'='RS'")
|
||
|
}
|
||
|
# Before starting with the ggplot first check
|
||
|
# if we plot percentages or means
|
||
|
# Plot PErcentages
|
||
|
if (stat == "pct") {
|
||
|
scale <- function(x) {
|
||
|
x <- scale_y_continuous(
|
||
|
labels = function(x)
|
||
|
paste0(x, "%"),
|
||
|
expand = c(0, 0),
|
||
|
limits = c(0, 105),
|
||
|
oob = rescale_none
|
||
|
)
|
||
|
return(x)
|
||
|
}
|
||
|
} else {
|
||
|
# Plot means
|
||
|
scale <- function(x) {
|
||
|
x <-
|
||
|
scale_y_continuous(expand = c(0, 0),
|
||
|
limits = c(1, 5.4),
|
||
|
oob = rescale_none)
|
||
|
return(x)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# Start with ggplot---------------------------------
|
||
|
p <-
|
||
|
ggplot(data, aes(
|
||
|
# Check if we are doing for single or all fields
|
||
|
if (FieldALL == FALSE) {
|
||
|
x = variable
|
||
|
} else {
|
||
|
x = stolpci
|
||
|
},
|
||
|
y = value,
|
||
|
fill = as.factor(variable)
|
||
|
)) +
|
||
|
# a theme with a white background
|
||
|
theme_bw() +
|
||
|
# Barplot z širino stolpcev 0.4 in position dodge pomeni primerjava (glej graf)
|
||
|
geom_bar(stat = "identity", position = "dodge") +
|
||
|
# Horizontalna postavitev grafa
|
||
|
coord_flip()
|
||
|
# Y-axis should start from 0-105 with % as labels
|
||
|
#' or as 1 - 5 as means. According to @stat parameter
|
||
|
p <- p + scale(x) +
|
||
|
# barva stolpcev (name = "") ker ne želimo še poimenovati "legendo" grafa
|
||
|
# # Define colors for bars:
|
||
|
scale_fill_manual(values = c("#0288D1", "#81d4FA")) +
|
||
|
# Ime x in y osi
|
||
|
labs(x = xime, y = yime, "fill" = "") +
|
||
|
# Legenda naj se prikaže pod grafom
|
||
|
theme(legend.position = "bottom") +
|
||
|
# V posameznih stolpcih bodo prikazana povprečja posameznih kategorij
|
||
|
# Pozicija prikazanih povprečjih pa bo vedno tam, kjer je maksimalna vrednost
|
||
|
geom_text(
|
||
|
aes(
|
||
|
# Check if we are ploting single quality dimension or
|
||
|
# all quality dimensions
|
||
|
if (FieldALL == FALSE) {
|
||
|
x = variable
|
||
|
} else {
|
||
|
x = stolpci
|
||
|
},
|
||
|
# Check if we are adding labels and the top ob each
|
||
|
# bar for percentages or means
|
||
|
if (stat == "pct") {
|
||
|
# When labelingf percentages at the top
|
||
|
# of the each bar we add 1.5 when
|
||
|
# we plot percentages. otherwise
|
||
|
# value/labels is too near the bar border
|
||
|
y = max(value + 1.5, na.rm = TRUE)
|
||
|
} else {
|
||
|
y = max(value + 0.2, na.rm = TRUE)
|
||
|
},
|
||
|
label = ifelse(is.nan(value), "", sprintf("%1.1f", value))
|
||
|
),
|
||
|
angle = 0,
|
||
|
position = position_dodge(width = 0.6),
|
||
|
size = 3.2
|
||
|
)
|
||
|
|
||
|
return(p)
|
||
|
}
|
||
|
#----------------------------- //GGPLOT: BARPLOT// ------------------------------#
|
||
|
|
||
|
|
||
|
|
||
|
#-------------------- CALCULATE MEAN FOR FLEXTABLE ---------------------#
|
||
|
# Function which will create frequency and proportion tables
|
||
|
# for single dimension of quality (dimenzija kakovosti)
|
||
|
# and therefore prepare data for flextable input
|
||
|
# The pripose of frequency tables (generated from Q2 variable)
|
||
|
# is the for comparison of each UE to all UE
|
||
|
# FUnction is intended ONLY for the first chapter
|
||
|
#' of @UE_single_report!!!!!
|
||
|
#' See @chapter @I. @Ključni @podatki
|
||
|
#' @param df1 data frame for selected UE (upravna enota)
|
||
|
#' @param df2 data frame for all UE
|
||
|
#' @param item columns for which we wish to prepare
|
||
|
#' @return A data.frame.
|
||
|
#' @author MM (mat.miha@@gmail.com)
|
||
|
prepMean <- function (df1, df2, item) {
|
||
|
if(!missing(df1)) {
|
||
|
|
||
|
# PREPARE DATA FOR TABLES #
|
||
|
#=========================#
|
||
|
# Data for single UE -----------
|
||
|
df1 <- subset(df1, select = item)
|
||
|
# Rbind data frame columns for
|
||
|
# frequency table purposes
|
||
|
df1 <- stack(df1)[1]
|
||
|
|
||
|
# Data for ALL UE --------------
|
||
|
df2 <- subset(df2, select = item)
|
||
|
# Rbind data frame columns for
|
||
|
# frequency table purposes
|
||
|
df2 <- stack(df2)[1]
|
||
|
|
||
|
# MEANS #
|
||
|
#=======#
|
||
|
# Single UE
|
||
|
m.UE <-
|
||
|
formatC(mean(as.numeric(df1[, 1]), na.rm = TRUE), format = "f", digits = 1)
|
||
|
# All UE
|
||
|
m.UE.all <-
|
||
|
formatC(mean(as.numeric(df2[, 1]), na.rm = TRUE), format = "f", digits = 1)
|
||
|
# Combine to data frame
|
||
|
UE.m <-
|
||
|
as.data.frame(cbind(m.UE, m.UE.all), stringsAsFactors = FALSE)
|
||
|
|
||
|
return(UE.m)
|
||
|
} else {
|
||
|
warning("Argument si missing.
|
||
|
Please check which arguments function require!")
|
||
|
}
|
||
|
|
||
|
}
|
||
|
#------------------ //CALCULATE MEAN FOR FLEXTABLE// -------------------#
|
||
|
|
||
|
|
||
|
#------------------- CONTINGENCY TABLE (CROSSTABS) ---------------------#
|
||
|
#' # Function which will create, contingency flextable (crosstable)
|
||
|
#' For each dimmension items and display proportion for
|
||
|
#' single UE and all UE
|
||
|
#' @param dfS data frame for single UE (upravna enota)
|
||
|
#' @param dfA data frame for all UE
|
||
|
#' @param item columns for which we wish to create crosstabs
|
||
|
#' @param dim.nam Names of flextable rows (dimension item names)
|
||
|
#' @UPDATE MARCH 2020: client would like to have stacked bar plot
|
||
|
#' besides every contingency table. But this true only for
|
||
|
#' single UE report so we add this as @pl argument
|
||
|
#' @param
|
||
|
#' #' @return A flextable or data frame. It dependes on @pl argument
|
||
|
#' @author MM (mat.miha@@gmail.com)
|
||
|
# Contingency flextable
|
||
|
contingTable <- function(dfS, dfA, item, dim.nam, pl = FALSE) {
|
||
|
# GET UE DATA -----------------------------------
|
||
|
# Single UE
|
||
|
df1 <- subset(dfS, select = item)
|
||
|
# All UE
|
||
|
df2 <- subset(dfA, select = item)
|
||
|
|
||
|
# PREPARE DATA FOR SINGLE UE----------------------
|
||
|
# Data for each dimenzion of single UE
|
||
|
df1.dim <- apply(df1, 2, function(x)
|
||
|
tabyl(x, show_missing_levels = TRUE))
|
||
|
# Get %
|
||
|
df1.dim <- lapply(df1.dim, function(x) {
|
||
|
x[3] <- round(x[3] * 100, 1)
|
||
|
x
|
||
|
})
|
||
|
# Loop and prepare data for all items of dimension
|
||
|
df1.tb <- lapply(seq_along(df1.dim), function(x) {
|
||
|
# Transfose data for contigency table purposes
|
||
|
# and select only dirst and third row, as
|
||
|
# we will add totala row with total n
|
||
|
df1.dim[[x]] <-
|
||
|
as.data.frame(t(df1.dim[[x]])[c(1, 3), ]) %>%
|
||
|
# first row as column names ofr table purposes
|
||
|
row_to_names(., row_number = 1) %>%
|
||
|
# Add total N of answers, also for table
|
||
|
# pruposes
|
||
|
cbind("n" = colSums(df1.dim[[x]][2]), .)
|
||
|
})
|
||
|
|
||
|
# Rbind data for flextable
|
||
|
# Rbind data for flextable
|
||
|
# V primeru ko delamo kontingenčno tabelo:
|
||
|
# se lahko zgodi, da se kategorije ne ujemajo in vrne error.
|
||
|
# Torej je potrebno preveriti, ali pri posamezni UE manjka
|
||
|
# kakšna kategorija glede na vse UE, ter to kategorijo
|
||
|
# ustvarimo in dodamo frekvenco 0
|
||
|
df1.tb <- suppressWarnings(do.call(dplyr::bind_rows, df1.tb))
|
||
|
df1.tb <- df1.tb[, !is.na(colnames(df1.tb))]
|
||
|
|
||
|
df1.tb <- df1.tb %>%mutate_if(is.factor, as.character)
|
||
|
# Id there is NA it means that the categori was missing
|
||
|
# So there is 0% of that answers
|
||
|
df1.tb[is.na(df1.tb)] <- 0
|
||
|
|
||
|
# PREPARE DATA FOR ALL UE----------------------
|
||
|
# Data for each dimenzion of single UE
|
||
|
df2.dim <- apply(df2, 2, function(x)
|
||
|
tabyl(x, show_missing_levels = TRUE))
|
||
|
# Get %
|
||
|
df2.dim <- lapply(df2.dim, function(x) {
|
||
|
x[3] <- round(x[3] * 100, 2)
|
||
|
x
|
||
|
})
|
||
|
# Loop and prepare data for all items of dimension
|
||
|
df2.tb <- lapply(seq_along(df2.dim), function(x) {
|
||
|
# Transfose data for contigency table purposes
|
||
|
# and select only dirst and third row, as
|
||
|
# we will add totala row with total n
|
||
|
df2.dim[[x]] <-
|
||
|
as.data.frame(t(df2.dim[[x]])[c(1, 3), ]) %>%
|
||
|
# first row as column names ofr table purposes
|
||
|
row_to_names(., row_number = 1) %>%
|
||
|
# Add total N of answers, also for table
|
||
|
# pruposes
|
||
|
cbind("n" = colSums(df2.dim[[x]][2]), .)
|
||
|
})
|
||
|
|
||
|
# Rbind data for flextable
|
||
|
# V primeru ko delamo kontingenčno tabelo:
|
||
|
# se lahko zgodi, da se kategorije ne ujemajo in vrne error.
|
||
|
# Torej je potrebno preveriti, ali pri posamezni UE manjka
|
||
|
# kakšna kategorija glede na vse UE, ter to kategorijo
|
||
|
# ustvarimo in dodamo frekvenco 0
|
||
|
df2.tb <- suppressWarnings(do.call(dplyr::bind_rows, df2.tb))
|
||
|
df2.tb <- df2.tb[, !is.na(colnames(df2.tb))]
|
||
|
df2.tb <- df2.tb %>%mutate_if(is.factor, as.character)
|
||
|
# Id there is NA it means that the category was missing
|
||
|
# So there is 0% of that answers
|
||
|
df2.tb[is.na(df2.tb)] <- 0
|
||
|
|
||
|
# CBIND SINGLE AND ALL UE DATA AND CREATE FLEXTABLE ----
|
||
|
# Finally add dimension item names
|
||
|
# And label UE
|
||
|
# Single UE
|
||
|
df1.tb <- cbind(UE="UE", dim.nam, df1.tb)
|
||
|
# All UE
|
||
|
df2.tb <- cbind(UE = "RS", dim.nam, df2.tb)
|
||
|
# Rbind data for table priposes
|
||
|
# USE bind_rows if walue on a likert scale is
|
||
|
# missing in single UE and is present in all UE
|
||
|
# If value is missing (e.g. 1) we still want to
|
||
|
# display 0 frequency for that value in table
|
||
|
# Use suppressWarnings to surpress warningas
|
||
|
# from Unequal factor levels: coercing to character
|
||
|
# Different nubmer of columns of two df
|
||
|
df <- suppressWarnings(bind_rows(df1.tb, df2.tb))
|
||
|
# NA to 0
|
||
|
df[is.na(df)] <- 0
|
||
|
# First check is some colnames are missing
|
||
|
# which means there is no frequency or no data
|
||
|
# for that. Therefore we create dummy column
|
||
|
# We want to display all labels in tables
|
||
|
col.nam.likert <- c("1", "2", "3", "4", "5")
|
||
|
# Find names of missing columns
|
||
|
Missing <- setdiff(col.nam.likert, colnames(df[, 4:ncol(df)]))
|
||
|
# Add them, filled with '0's
|
||
|
df[Missing] <- 0
|
||
|
# Reorder for table purposes
|
||
|
df <-
|
||
|
df %>% dplyr::select("UE", "dim.nam", "n", "1", "2", "3", "4", "5")
|
||
|
# Add colnames which will be displayed in
|
||
|
# flextable
|
||
|
colnames(df) <-
|
||
|
c(
|
||
|
"UE",
|
||
|
"Postavke",
|
||
|
"n",
|
||
|
"Sploh se ne strinjam (%)",
|
||
|
"Se ne strinjam (%)",
|
||
|
"Niti se strinjam niti se ne strinjam (%)",
|
||
|
"Se strinjam (%)",
|
||
|
"Povsem se strinjam (%)"
|
||
|
)
|
||
|
# Finally round N for table purposes
|
||
|
df$n <- format(df$n, digits = 0)
|
||
|
|
||
|
# Check if we need flextable or data frame for
|
||
|
# plot purposes
|
||
|
if (pl == FALSE) {
|
||
|
# CREATE FLEXTABLE ------------------------------
|
||
|
df.tab <- flextable(df, cwidth = .5)
|
||
|
# Merge cel in first columns so only name
|
||
|
# of UE is displayed: first si single UE
|
||
|
df.tab <- merge_at(df.tab, i = 1:nrow(df1.tb), j = 1:1)
|
||
|
# Merge cells in first columns so only name
|
||
|
# of RS is displayed: All UE
|
||
|
df.tab <-
|
||
|
merge_at(df.tab, i = (nrow(df1.tb) + 1):nrow(df), j = 1:1)
|
||
|
# Add border to seperate UE from RS
|
||
|
df.tab <- hline(
|
||
|
df.tab,
|
||
|
i = nrow(df1.tb),
|
||
|
border = fp_border(color = "black") ,
|
||
|
part = "body"
|
||
|
)
|
||
|
# Change fontsize so the large table will fit to one page
|
||
|
df.tab <- fontsize(df.tab, size = 8, part = "all")
|
||
|
# Apply theme box to a flextable: borders around table
|
||
|
df.tab <- theme_box(df.tab)
|
||
|
# And align table header to center
|
||
|
df.tab <-
|
||
|
align_nottext_col(df.tab, align = "center", header = TRUE)
|
||
|
# Return Felxtable---------------------
|
||
|
return(df.tab)
|
||
|
} else {
|
||
|
# We will plot data for single UE so we are only
|
||
|
# interested in UE data
|
||
|
df <- df[df == "UE",]
|
||
|
# Prepare data for ggplot
|
||
|
df <-
|
||
|
# Gather multiple columns and collapses into key-value pairs
|
||
|
df %>% gather(UE, Delez, `Sploh se ne strinjam (%)`:`Povsem se strinjam (%)`)
|
||
|
# For Plot purposes percentage needs to be numeric
|
||
|
df$Delez <- as.numeric(df$Delez)
|
||
|
|
||
|
return(df)
|
||
|
}
|
||
|
|
||
|
}
|
||
|
#----------------- //CONTINGENCY TABLE (CROSSTABS)// -------------------#
|
||
|
|
||
|
|
||
|
#--------------------- GGPLOT FOR CONTINGENCY TABLE DATA -----------------------#
|
||
|
#' @UPDATE MARCH 2020: After clients wishes
|
||
|
|
||
|
#' Function which will create,plot for each contingency table
|
||
|
#' for each of the dimension presented in report for seperate UE
|
||
|
#' @param data data frame prepared for ggplot (see function
|
||
|
#' # @contingTable)
|
||
|
#' @IMPORTANT: this function can be be used only after @contg.pl
|
||
|
#' was used. THEY ARE CONNECTED
|
||
|
#' @param x value for x axis
|
||
|
#' @param y Value for y axis
|
||
|
#' @param dFill The colors of filled objects, like bars
|
||
|
#' @param ynam y ayis lable
|
||
|
#' @return ggplot
|
||
|
#' @author MM (mat.miha@@gmail.com)
|
||
|
# Contingency flextable
|
||
|
cont.pl <- function(data, x, y, dFill, ynam) {
|
||
|
p <- ggplot(data, aes(
|
||
|
fill = dFill,
|
||
|
y = y,
|
||
|
# package forcats, fct_rev() is very handy for
|
||
|
# reversing factor levels while plotting
|
||
|
x = fct_rev(x),
|
||
|
group = fct_rev(dFill)
|
||
|
)) +
|
||
|
geom_bar(stat = "identity") +
|
||
|
# Color bars
|
||
|
scale_fill_brewer(guide = guide_legend(reverse = FALSE), palette = "Blues") +
|
||
|
# Add values/percentages in each bar category
|
||
|
geom_text(aes(label = paste0(y, "%")),
|
||
|
position = position_stack(vjust = 0.5),
|
||
|
size = 3) +
|
||
|
# remove space between axis & area-plot in ggplot2
|
||
|
scale_y_continuous(limits = c(0,101), expand = c(0, 0)) +
|
||
|
# Vertical plot
|
||
|
coord_flip() +
|
||
|
# Add new label names
|
||
|
xlab(ynam) + ylab("Delež") +
|
||
|
# Remove legrnd title andd add legend to two rows
|
||
|
guides(fill = guide_legend(nrow = 3, byrow = TRUE, title="")) +
|
||
|
# a theme with a white background
|
||
|
theme_bw() +
|
||
|
# Add legend to bottom
|
||
|
theme(legend.position="bottom")
|
||
|
|
||
|
|
||
|
return(p)
|
||
|
|
||
|
}
|
||
|
#------------------- //GGPLOT FOR CONTINGENCY TABLE DATA// ---------------------#
|