# Created by Miha 20.2.2020 #------------------------------------ CUSTOM FUNCTIONS ----------------------------------# # Description # # We create custom functions which will enable us to create # frequency tables (flextable), ggplots and average tables (flextable) # And also modify this table (color, width, height) #-------------------- PREPARE DATA FOR FREQUENCY TABLE ---------------------# # Function which will create frequency and proportion tables # for single dimension of quality (dimenzija kakovosti) # and therefore prepare data for flextable input # The pripose of frequency tables (generated from Q2 variable) # is the for comparison of each UE to all UE # FUnction is intended ONLY for the first chapter #' of @UE_single_report!!!!! #' See @chapter @I. @Ključni @podatki #' @param dfS data frame for single UE (upravna enota) #' @param dfA data frame for all UE #' @param item columns for which we wish to prepare #' @param likert if TRUE, we gave grid questions #' with likert scale on scale 1-5. Therefore we #' onnly calculate proportion of values from 3-5 #' (% delež strinjanja) #' @return A data.frame. #' @author MM (mat.miha@@gmail.com) prepFreq <- function (dfS, dfA, item, likert = FALSE) { if(!missing(dfS)) { # Our client would like to have tables with # the proportion of values/asnwers # larger than 2, i.e., # means: ## 3 - se strinja ## 4 - se zelo strinja ## 5 - se popolnoma strinja # Therefore we will recode # values 1-2 in into 1 and # 3-5 into 2, in order to # get the right proportion # PREPARE DATA FOR TABLES # #=========================# # Check if Frequency table will # consist of single or # multiple variable # like in case of Q2 # i.e. first chapter # in report # Rbind data frame columns for # frequency table purposes df1 <- subset(dfS, select = item) if (ncol(df1) > 1 | likert == TRUE) { # Data for single UE ----------- # concatenates multiple vectors into a single vector # Or cbinds two columns into single # for frequency table purposes df1 <- stack(df1)[1] # In order to prevent errors we need to check # if some value on scale from 1-5 is missing # IF it is we need to generate it and # label it as 0 l.scale <- as.character(rep(1:5)) # Find possible difference - missing values dif <- setdiff(l.scale, rownames(df1)) # Check if tehre was any missing if (length(dif) > 0) { df1 <- rbind(df1, dif) # Missing categories should have 0 df1[rownames(df1) %in% dif, "values"] <- NA } # Recode data df1 <- memisc::recode(df1$values, "1:2=1;2:5=2") # Data for ALL UE -------------- df2 <- subset(dfA, select = item) # Rbind data frame columns for # frequency table purposes df2 <- stack(df2)[1] # Recode data df2 <- memisc::recode(df2$values, "1:2=1;2:5=2") } else { # If we have single variable # for frequency table then # Rbind data frame columns for # frequency table purposes # Data for single UE ----------- df1 <- subset(dfS, select = item) df1 <- stack(df1)[, 1] # Data for ALL UE -------------- df2 <- subset(dfA, select = item) # Rbind data frame columns for # frequency table purposes df2 <- stack(df2)[, 1] } # FREQUENCY TABLES # #==================# #' Using @janitor and #' @tabyl # Freq table for single UE # It frequency table consist # of multiple columns the df1 # will be data frame otherwise # a vector. if (ncol(subset(dfS, select = item)) > 1 | likert == TRUE) { #' @IMPORTANT!!! zanima nas samo druga vrstica #' Torej podatki za delež strinjanja #' ker smo "2" rekodirali kot strinjanje #' in 1 kot nestrinjanje # Pogledao še, če imamo kakšno manjkajolo vrednost oz NA # V tem primeru moramo vzeti prvo vrstico df1.freq <- tabyl(df1, show_na = FALSE) df1.freq <- df1.freq %>% filter(df1 == 2) # Freq table for ALL UE df2.freq <- tabyl(df2, show_na = FALSE)[2,] if(all(is.na(df2.freq))) { df2.freq <- tabyl(df2, show_na = FALSE) } } else { # Druge vrstice ni, ker imamo samo eno spremenljivko # oz. postavko df1.freq <- tabyl(df1, show_na = FALSE) # Freq table for ALL UE df2.freq <- tabyl(df2, show_na = FALSE) } # Tidy and finalize data # Merge data by UE type column (single and all) df.freq <- merge(df1.freq, df2.freq, by.x = "df1", by.y = "df2", all = TRUE, sort = FALSE) # If there are missing values (NA) # replace them with 0 # For frequency table df.freq[is.na(df.freq)] <- 0 # Get % for single UE df.freq$percent.x <- formatC(as.numeric(df.freq$percent.x) * 100 , format = "f", digits = 1) # Get % for ALL UE df.freq$percent.y <- formatC(as.numeric(df.freq$percent.y) * 100 , format = "f", digits = 1) return(df.freq) } else { warning("Argument si missing. Please check which arguments does the function require!") } } #------------------ //PREPARE DATA FOR FREQUENCY TABLE// -------------------# #---------------------------- WORD FLEXTABLE ------------------------------# # Function which will create, color and modify # flextable which will be exported to word document # In report there are three type of tables. # Table with two columns, three columns and four columns. # Therefore function will be created so it will automatically # detect the number of columns and add name and columns #' @param df data frame with UE data (seperate and all) #' @param footnote If flextable sohlda have footnote #' @param col.width width of table columns (table size) #' @param all IF TRUE we are doing table for all UE #' SKUPNO POROČILO, therefore the extracolumn for comparison #' of single against all UE is not needed #' @return A flextable. #' @author MM (mat.miha@@gmail.com) wordTable <- function(df, footnote = FALSE, col.width, all = FALSE) { # Create Flextable tab <- flextable(df) # Check whixh type of table we are doing ----------- if (ncol(df) == 5 & all == FALSE) { # Table has five column which means # we are doing table for "n" and "%" # Rename table headers tab <- set_header_labels( tab, stolpci = " ", n.x = "UE", percent.x = "UE", n.y = "RS", percent.y = "RS" ) } else if (ncol(df) == 3 & all == FALSE) { # Table has three column which means # we are doing table mean tab <- set_header_labels(tab, stolpci = " ", m.UE = "UE", m.UE.all = "RS") } #--------------------------------- # MODIFY TABLE ------------------- # Add another row below header # Again check the type of table if (ncol(df) == 5 & all == FALSE) { # with frequency and proportion tab <- add_header_row(tab, values = c("", "n", "%", "n", "%"), top = FALSE) } else if (ncol(df) == 3 & all == FALSE) { tab <- add_header_row(tab, values = c("", "Povp.", "Povp."), top = FALSE) } # Add another row below header # Again check the type of table if (ncol(df) == 5 & all == FALSE) { # Merge table header as Custumer wants # In first row merge second and third cell tab <- merge_at(tab, i = 1, j = 2:3, part = "header") # In first row merge fourth and fifth cell tab <- merge_at(tab, i = 1, j = 4:5, part = "header") } # Apply theme box to a flextable: borders around table tab <- theme_box(tab) # Centre Table values tab <- align_text_col(tab, align = "center") # And align table header to center tab <- align_nottext_col(tab, align = "center", header = TRUE) # But keep row names or row labels justified left tab <- align( tab, i = 1:nrow(df), j = 1, align = "left", part = "body" ) # Check if table needs additional explanation if (footnote == TRUE & ncol(df) == 5 & all == FALSE) { tab <- tab %>% add_footer_lines( paste0( "% predstavlja delež strinjanja ", "(3 - Niti se strinjam niti se ne strinjam, 4 - Se strinjam, 5 - Povsem se strinjam." ) ) } else if (footnote == TRUE & ncol(df) == 3 & all == FALSE) { tab <- tab %>% add_footer_lines( paste0( "Stolpca 'Povp.' predstavljata poprečno oceno vseh ", "odgovorov na lestvici od 1- Sploh se ne strinjam ", "do 5 - Povsem se strinjam." ) ) } else if (all == TRUE & footnote != FALSE) { # VELJA LE ZA TABELO V SKUPNEM POROČILU!!!! # Nadalje preverimo ali je footnote za povprečja ali deleže if (!any(grepl("%" , colnames(df)))) { tab <- tab %>% add_footer_lines( paste0( "Stolpec 'Povp.' predstavlja poprečno oceno vseh ", "odgovorov na lestvici od 1 - Sploh se ne strinjam ", "do 5 - Povsem se strinjam." ) ) } else { tab <- tab %>% add_footer_lines( paste0( "% predstavlja delež strinjanja ", "3 - Niti se strinjam niti se ne strinjam, 4 - Se strinjam, ", "5 - Povsem se strinjam)." ) ) } } # Modify table size, i.e., add width tab <- width(tab, j = 1, width = col.width) # Finally add background color for header tab <- bg(tab, bg = "#81d4FA", part = "header") # And Also backgorund color for the last row. # BUT color last row only # If the last row presents (has row) TOTAL (Skupaj") if (colSums("Skupaj" == df)[[1]] > 0 ) { tab <- bg( tab, i = nrow(df), j = 1:ncol(df), bg = "#81d4FA", part = "body" ) } # Finally change the default fontsize Arial 11 to 10 tab <- fontsize(tab, size = 10, part = "all") # Return flextable return(tab) } #-------------------------- //WORD FLEXTABLE// ----------------------------# #------------------------------- GGPLOT: BARPLOT --------------------------------# # Function which will create vertical barplot for each field (sklop) # Plot will display proportion or means for each UE compared to ALL UE # according to values in TABLE 1, 2, ... #' @param data data frame with UE data (seperate and all) #' @param field numeric value indicating field (row number) for wich #' we would like to creat a plot #' @param xime Label of ggplot x-axis #' @param yime Label of ggplot y-axis #' @param col.width Dodging width, when different to the width of #' the individual elements. This is useful when you want #' to align narrow geoms with wider geoms #' @param FieldALL Default is set to FALSE #' If TRUE, the barplot will be created #' for all six fields (sklopov) and compared to ALL UE #' so in total 12 bars will be displayed in plot #' @param stat Default value is "pct" which means #' ggplot will plot percentages (0-100). If set to #' "mena", plto will display mean of likert scale frm #' 1-5 #' @return ggplot (list). #' @author MM (mat.miha@@gmail.com) graf <- function(data, field, xime, yime, col.width, FieldALL = FALSE, stat = "pct") { # Prepare data frame for ggplot-------------------------------- # Each row represents unique field (sklop) # End for each field we will create plot # Check if we are doing for single or all fields if(FieldALL == FALSE) { data <- data[field,] } # For ploting purposes (plot numeric to x-axis) # We need to change class from string to numeric data[2:length(data)] <- data[2:length(data)] %>% mutate_if(is.character, as.numeric) # Now we need to reshape data: GGPLOT requirements for ploting # more than one data for same field (sklop) # Reshape data according to column stolpci data <- melt(data, id.vars = 1) # We need to change factor to character in order # for us to be able to recode values in that column data$variable <- as.character(as.factor(data$variable)) # Check if we plot percentages or mean # as data preparation differs if (stat == "pct") { # We only need rows in which percentages are # Due to the fact we will plot percentages data <- data[grep("percent", data$variable),] # Now we recode column "variable" as we want # so label bars according to UE data$variable <- memisc::recode(data$variable, "'percent.x'='UE';'percent.y'='RS'") } else { # Now we recode column "variable" as we want # so label bars according to UE data$variable <- memisc::recode(data$variable, "'m.UE'='UE';'m.UE.all'='RS'") } # Before starting with the ggplot first check # if we plot percentages or means # Plot PErcentages if (stat == "pct") { scale <- function(x) { x <- scale_y_continuous( labels = function(x) paste0(x, "%"), expand = c(0, 0), limits = c(0, 105), oob = rescale_none ) return(x) } } else { # Plot means scale <- function(x) { x <- scale_y_continuous(expand = c(0, 0), limits = c(1, 5.4), oob = rescale_none) return(x) } } # Start with ggplot--------------------------------- p <- ggplot(data, aes( # Check if we are doing for single or all fields if (FieldALL == FALSE) { x = variable } else { x = stolpci }, y = value, fill = as.factor(variable) )) + # a theme with a white background theme_bw() + # Barplot z širino stolpcev 0.4 in position dodge pomeni primerjava (glej graf) geom_bar(stat = "identity", position = "dodge") + # Horizontalna postavitev grafa coord_flip() # Y-axis should start from 0-105 with % as labels #' or as 1 - 5 as means. According to @stat parameter p <- p + scale(x) + # barva stolpcev (name = "") ker ne želimo še poimenovati "legendo" grafa # # Define colors for bars: scale_fill_manual(values = c("#0288D1", "#81d4FA")) + # Ime x in y osi labs(x = xime, y = yime, "fill" = "") + # Legenda naj se prikaže pod grafom theme(legend.position = "bottom") + # V posameznih stolpcih bodo prikazana povprečja posameznih kategorij # Pozicija prikazanih povprečjih pa bo vedno tam, kjer je maksimalna vrednost geom_text( aes( # Check if we are ploting single quality dimension or # all quality dimensions if (FieldALL == FALSE) { x = variable } else { x = stolpci }, # Check if we are adding labels and the top ob each # bar for percentages or means if (stat == "pct") { # When labelingf percentages at the top # of the each bar we add 1.5 when # we plot percentages. otherwise # value/labels is too near the bar border y = max(value + 1.5, na.rm = TRUE) } else { y = max(value + 0.2, na.rm = TRUE) }, label = ifelse(is.nan(value), "", sprintf("%1.1f", value)) ), angle = 0, position = position_dodge(width = 0.6), size = 3.2 ) return(p) } #----------------------------- //GGPLOT: BARPLOT// ------------------------------# #-------------------- CALCULATE MEAN FOR FLEXTABLE ---------------------# # Function which will create frequency and proportion tables # for single dimension of quality (dimenzija kakovosti) # and therefore prepare data for flextable input # The pripose of frequency tables (generated from Q2 variable) # is the for comparison of each UE to all UE # FUnction is intended ONLY for the first chapter #' of @UE_single_report!!!!! #' See @chapter @I. @Ključni @podatki #' @param df1 data frame for selected UE (upravna enota) #' @param df2 data frame for all UE #' @param item columns for which we wish to prepare #' @return A data.frame. #' @author MM (mat.miha@@gmail.com) prepMean <- function (df1, df2, item) { if(!missing(df1)) { # PREPARE DATA FOR TABLES # #=========================# # Data for single UE ----------- df1 <- subset(df1, select = item) # Rbind data frame columns for # frequency table purposes df1 <- stack(df1)[1] # Data for ALL UE -------------- df2 <- subset(df2, select = item) # Rbind data frame columns for # frequency table purposes df2 <- stack(df2)[1] # MEANS # #=======# # Single UE m.UE <- formatC(mean(as.numeric(df1[, 1]), na.rm = TRUE), format = "f", digits = 1) # All UE m.UE.all <- formatC(mean(as.numeric(df2[, 1]), na.rm = TRUE), format = "f", digits = 1) # Combine to data frame UE.m <- as.data.frame(cbind(m.UE, m.UE.all), stringsAsFactors = FALSE) return(UE.m) } else { warning("Argument si missing. Please check which arguments function require!") } } #------------------ //CALCULATE MEAN FOR FLEXTABLE// -------------------# #------------------- CONTINGENCY TABLE (CROSSTABS) ---------------------# #' # Function which will create, contingency flextable (crosstable) #' For each dimmension items and display proportion for #' single UE and all UE #' @param dfS data frame for single UE (upravna enota) #' @param dfA data frame for all UE #' @param item columns for which we wish to create crosstabs #' @param dim.nam Names of flextable rows (dimension item names) #' @UPDATE MARCH 2020: client would like to have stacked bar plot #' besides every contingency table. But this true only for #' single UE report so we add this as @pl argument #' @param #' #' @return A flextable or data frame. It dependes on @pl argument #' @author MM (mat.miha@@gmail.com) # Contingency flextable contingTable <- function(dfS, dfA, item, dim.nam, pl = FALSE) { # GET UE DATA ----------------------------------- # Single UE df1 <- subset(dfS, select = item) # All UE df2 <- subset(dfA, select = item) # PREPARE DATA FOR SINGLE UE---------------------- # Data for each dimenzion of single UE df1.dim <- apply(df1, 2, function(x) tabyl(x, show_missing_levels = TRUE)) # Get % df1.dim <- lapply(df1.dim, function(x) { x[3] <- round(x[3] * 100, 1) x }) # Loop and prepare data for all items of dimension df1.tb <- lapply(seq_along(df1.dim), function(x) { # Transfose data for contigency table purposes # and select only dirst and third row, as # we will add totala row with total n df1.dim[[x]] <- as.data.frame(t(df1.dim[[x]])[c(1, 3), ]) %>% # first row as column names ofr table purposes row_to_names(., row_number = 1) %>% # Add total N of answers, also for table # pruposes cbind("n" = colSums(df1.dim[[x]][2]), .) }) # Rbind data for flextable # Rbind data for flextable # V primeru ko delamo kontingenčno tabelo: # se lahko zgodi, da se kategorije ne ujemajo in vrne error. # Torej je potrebno preveriti, ali pri posamezni UE manjka # kakšna kategorija glede na vse UE, ter to kategorijo # ustvarimo in dodamo frekvenco 0 df1.tb <- suppressWarnings(do.call(dplyr::bind_rows, df1.tb)) df1.tb <- df1.tb[, !is.na(colnames(df1.tb))] df1.tb <- df1.tb %>%mutate_if(is.factor, as.character) # Id there is NA it means that the categori was missing # So there is 0% of that answers df1.tb[is.na(df1.tb)] <- 0 # PREPARE DATA FOR ALL UE---------------------- # Data for each dimenzion of single UE df2.dim <- apply(df2, 2, function(x) tabyl(x, show_missing_levels = TRUE)) # Get % df2.dim <- lapply(df2.dim, function(x) { x[3] <- round(x[3] * 100, 2) x }) # Loop and prepare data for all items of dimension df2.tb <- lapply(seq_along(df2.dim), function(x) { # Transfose data for contigency table purposes # and select only dirst and third row, as # we will add totala row with total n df2.dim[[x]] <- as.data.frame(t(df2.dim[[x]])[c(1, 3), ]) %>% # first row as column names ofr table purposes row_to_names(., row_number = 1) %>% # Add total N of answers, also for table # pruposes cbind("n" = colSums(df2.dim[[x]][2]), .) }) # Rbind data for flextable # V primeru ko delamo kontingenčno tabelo: # se lahko zgodi, da se kategorije ne ujemajo in vrne error. # Torej je potrebno preveriti, ali pri posamezni UE manjka # kakšna kategorija glede na vse UE, ter to kategorijo # ustvarimo in dodamo frekvenco 0 df2.tb <- suppressWarnings(do.call(dplyr::bind_rows, df2.tb)) df2.tb <- df2.tb[, !is.na(colnames(df2.tb))] df2.tb <- df2.tb %>%mutate_if(is.factor, as.character) # Id there is NA it means that the category was missing # So there is 0% of that answers df2.tb[is.na(df2.tb)] <- 0 # CBIND SINGLE AND ALL UE DATA AND CREATE FLEXTABLE ---- # Finally add dimension item names # And label UE # Single UE df1.tb <- cbind(UE="UE", dim.nam, df1.tb) # All UE df2.tb <- cbind(UE = "RS", dim.nam, df2.tb) # Rbind data for table priposes # USE bind_rows if walue on a likert scale is # missing in single UE and is present in all UE # If value is missing (e.g. 1) we still want to # display 0 frequency for that value in table # Use suppressWarnings to surpress warningas # from Unequal factor levels: coercing to character # Different nubmer of columns of two df df <- suppressWarnings(bind_rows(df1.tb, df2.tb)) # NA to 0 df[is.na(df)] <- 0 # First check is some colnames are missing # which means there is no frequency or no data # for that. Therefore we create dummy column # We want to display all labels in tables col.nam.likert <- c("1", "2", "3", "4", "5") # Find names of missing columns Missing <- setdiff(col.nam.likert, colnames(df[, 4:ncol(df)])) # Add them, filled with '0's df[Missing] <- 0 # Reorder for table purposes df <- df %>% dplyr::select("UE", "dim.nam", "n", "1", "2", "3", "4", "5") # Add colnames which will be displayed in # flextable colnames(df) <- c( "UE", "Postavke", "n", "Sploh se ne strinjam (%)", "Se ne strinjam (%)", "Niti se strinjam niti se ne strinjam (%)", "Se strinjam (%)", "Povsem se strinjam (%)" ) # Finally round N for table purposes df$n <- format(df$n, digits = 0) # Check if we need flextable or data frame for # plot purposes if (pl == FALSE) { # CREATE FLEXTABLE ------------------------------ df.tab <- flextable(df, cwidth = .5) # Merge cel in first columns so only name # of UE is displayed: first si single UE df.tab <- merge_at(df.tab, i = 1:nrow(df1.tb), j = 1:1) # Merge cells in first columns so only name # of RS is displayed: All UE df.tab <- merge_at(df.tab, i = (nrow(df1.tb) + 1):nrow(df), j = 1:1) # Add border to seperate UE from RS df.tab <- hline( df.tab, i = nrow(df1.tb), border = fp_border(color = "black") , part = "body" ) # Change fontsize so the large table will fit to one page df.tab <- fontsize(df.tab, size = 8, part = "all") # Apply theme box to a flextable: borders around table df.tab <- theme_box(df.tab) # And align table header to center df.tab <- align_nottext_col(df.tab, align = "center", header = TRUE) # Return Felxtable--------------------- return(df.tab) } else { # We will plot data for single UE so we are only # interested in UE data df <- df[df == "UE",] # Prepare data for ggplot df <- # Gather multiple columns and collapses into key-value pairs df %>% gather(UE, Delez, `Sploh se ne strinjam (%)`:`Povsem se strinjam (%)`) # For Plot purposes percentage needs to be numeric df$Delez <- as.numeric(df$Delez) return(df) } } #----------------- //CONTINGENCY TABLE (CROSSTABS)// -------------------# #--------------------- GGPLOT FOR CONTINGENCY TABLE DATA -----------------------# #' @UPDATE MARCH 2020: After clients wishes #' Function which will create,plot for each contingency table #' for each of the dimension presented in report for seperate UE #' @param data data frame prepared for ggplot (see function #' # @contingTable) #' @IMPORTANT: this function can be be used only after @contg.pl #' was used. THEY ARE CONNECTED #' @param x value for x axis #' @param y Value for y axis #' @param dFill The colors of filled objects, like bars #' @param ynam y ayis lable #' @return ggplot #' @author MM (mat.miha@@gmail.com) # Contingency flextable cont.pl <- function(data, x, y, dFill, ynam) { p <- ggplot(data, aes( fill = dFill, y = y, # package forcats, fct_rev() is very handy for # reversing factor levels while plotting x = fct_rev(x), group = fct_rev(dFill) )) + geom_bar(stat = "identity") + # Color bars scale_fill_brewer(guide = guide_legend(reverse = FALSE), palette = "Blues") + # Add values/percentages in each bar category geom_text(aes(label = paste0(y, "%")), position = position_stack(vjust = 0.5), size = 3) + # remove space between axis & area-plot in ggplot2 scale_y_continuous(limits = c(0,101), expand = c(0, 0)) + # Vertical plot coord_flip() + # Add new label names xlab(ynam) + ylab("Delež") + # Remove legrnd title andd add legend to two rows guides(fill = guide_legend(nrow = 3, byrow = TRUE, title="")) + # a theme with a white background theme_bw() + # Add legend to bottom theme(legend.position="bottom") return(p) } #------------------- //GGPLOT FOR CONTINGENCY TABLE DATA// ---------------------#