PREPARATION

Load packages

library(readxl)
library(ggplot2)
library(DT) # shiny app style table
library(formattable) # shiny app style table
library(leaflet) # gps map
library(shiny)
# install.packages("KoboconnectR") # https://cran.r-project.org/web/packages/KoboconnectR/KoboconnectR.pdf
library(KoboconnectR)
library(tidyr)
library(dplyr)
library(plotly)
packageVersion("KoboconnectR")
## [1] '1.2.2'

Load data directly via accessing the REST API of KoboToolbox

Functions, order and prepare dataframes

### Create custom made functions ----
# Create a function to activate the URL links
link_activator <- function(x) {
  sprintf('<a href="%s" target="_blank">%s</a>', x, x)
}
# Create a function to mark the duplicates in red, bold
dupl_checker <- formatter("span",
                          style = function(x) style(
                            color = ifelse(duplicated(x) == TRUE, "white", "black"),
                            background.color = ifelse(duplicated(x) == TRUE, "red", "white"),
                            font.weight = ifelse(duplicated(x) == TRUE, "bold", "normal")
                          ))
# Create a function to mark a duplicate in red, bold - WITHIN fullname
dupl_checker_within_fullname <- function(fullname_col, source_col) {
  formatter("span",
            style = function(x) style(
              color = ifelse(duplicated(paste(fullname_col, x)) & x %in% unique(x), "white", "black"),
              background.color = ifelse(duplicated(paste(fullname_col, x)) & x %in% unique(x), "red", "white"),
              font.weight = ifelse(duplicated(paste(fullname_col, x)) & x %in% unique(x), "bold", "normal")
            ))
}
# Create a function to mark the duplicates in green, bold
dupl_checker_masterlist <- formatter("span",
                          style = function(x) style(
                            color = ifelse(duplicated(x) == TRUE, "white", "black"),
                            background.color = ifelse(duplicated(x) == TRUE, "green", "white"),
                            font.weight = ifelse(duplicated(x) == TRUE, "bold", "normal")
                          ))
# Create a function to mark the missing in red, bold
missing_checker <- formatter("span",
                          style = function(x) style(
                            color = ifelse(is.na(x) == TRUE, "white", "black"),
                            background.color = ifelse(is.na(x) == TRUE, "red", "white"),
                            font.weight = ifelse(is.na(x) == TRUE, "bold", "normal")
                          ))
# Create a function to mark the ages outside 15-35 in orange, bold
age_checker <- formatter("span",
                         style = function(x) style(
                           color = ifelse(is.na(x) | x < 15 | x > 35, "white", "black"),
                           background.color = ifelse(is.na(x) | x < 15 | x > 35, "orange", "white"),
                           font.weight = ifelse(is.na(x) | x < 15 | x > 35, "bold", "normal")
                           ))
# Create a function to mark the age below 18 in orange, bold
age_18_checker <- formatter("span",
                         style = function(x) style(
                           color = ifelse(is.na(x) | x < 18, "white", "black"),
                           background.color = ifelse(is.na(x) | x < 18, "orange", "white"),
                           font.weight = ifelse(is.na(x) | x < 18, "bold", "normal")
                           ))
# Create a function to marks the gender men in orange, bold
gender_checker <- formatter("span",
                            style = function(x) style(
                              color = ifelse(grepl("_man", x, ignore.case = TRUE), "white", "black"),
                              background.color = ifelse(grepl("_man", x, ignore.case = TRUE), "orange", "white"),
                              font.weight = ifelse(grepl("_man", x, ignore.case = TRUE), "bold", "normal")
                            ))


### Reformat all 4 questionnaires ----
# stylist registration
df_sr <- purrr::map_df(stylist_reg_list, ~setNames(.x, paste0(names(.x), seq_along(.x)))) 
names(df_sr) <- gsub("\\d+$", "", names(df_sr)) # Remove digits from variable names
df_sr <- df_sr %>% mutate(fullname_s = paste(firstname_s, lastname_s, sep = " ")) # create fullname
df_sr <- df_sr %>% rename(submission_time_s = `_submission_time`,
                          id = `_id`)
df_sr$source <- "registration"
df_sr$salon_pic_inside_s_URL <- sapply(df_sr$salon_pic_inside_s_URL, link_activator) # Activate URL
df_sr$salon_pic_outside_s_URL <- sapply(df_sr$salon_pic_outside_s_URL, link_activator) # Activate URL
df_sr$salon_gps_s_latitude <- as.numeric(df_sr$`_salon_gps_s_latitude`)
df_sr$salon_gps_s_longitude <- as.numeric(df_sr$`_salon_gps_s_longitude`)
df_sr <- df_sr[order(df_sr$submission_time_s, decreasing = TRUE), ] # sort by latest submission date/time on top

# stylist survey
df_ss <- purrr::map_df(stylist_surv_list, ~setNames(.x, paste0(names(.x), seq_along(.x)))) 
names(df_ss) <- gsub("\\d+$", "", names(df_ss)) 
df_ss <- df_ss %>% mutate(fullname_s = paste(firstname_s, lastname_s, sep = " ")) 
df_ss <- df_ss %>% rename(submission_time_s = `_submission_time`,
                          id = `_id`)
df_ss$source <- "survey"
df_ss$sign_s_URL <- sapply(df_ss$sign_s_001_URL, link_activator) 
df_ss <- df_ss[order(df_ss$submission_time_s, decreasing = TRUE), ] 

# client registration
df_cr <- purrr::map_df(client_reg_list, ~setNames(.x, paste0(names(.x), seq_along(.x)))) 
names(df_cr) <- gsub("\\d+$", "", names(df_cr)) 
df_cr <- df_cr %>% mutate(fullname_c = paste(firstname_c, lastname_c, sep = " ")) 
df_cr <- df_cr %>% mutate(fullname_s = paste(stylist_firstname_c, stylist_lastname_c, sep = " ")) 
df_cr <- df_cr %>% rename(submission_time_c = `_submission_time`,
                          id = `_id`)
df_cr$source <- "registration"
df_cr$hair_pic_c_URL <- sapply(df_cr$hair_pic_c_URL, link_activator) 
df_cr$salon_gps_c_latitude <- as.numeric(df_cr$`_salon_gps_c_latitude`)
df_cr$salon_gps_c_longitude <- as.numeric(df_cr$`_salon_gps_c_longitude`)
df_cr <- df_cr[order(df_cr$submission_time_c, decreasing = TRUE), ] 

# client survey
df_cs <- purrr::map_df(client_surv_list, ~setNames(.x, paste0(names(.x), seq_along(.x)))) 
names(df_cs) <- gsub("\\d+$", "", names(df_cs)) 
df_cs <- df_cs %>% mutate(fullname_c = paste(firstname_c, lastname_c, sep = " ")) 
df_cs <- df_cs %>% mutate(fullname_s = paste(stylist_firstname_c, stylist_lastname_c, sep = " "))
df_cs <- df_cs %>% rename(submission_time_c = `_submission_time`,
                          id = `_id`)
df_cs$source <- "survey"
df_cs$sign_c_URL <- sapply(df_cs$sign_c_URL, link_activator) 
df_cs <- df_cs[order(df_cs$submission_time_c, decreasing = TRUE), ]

# Set scipen parameter to a high value to prevent scientific notation
options(scipen = 999)

Data cleaning

HAIR SALON GPS

leaflet(data = df_sr) %>%
  addTiles() %>%
  addMarkers(~salon_gps_s_longitude, ~salon_gps_s_latitude)

RECRUITMENT STYLISTS

df_sr$submission_date_only_s <- as.Date(df_sr$submission_time_s) # Extract Dates only from Date/Time variable
df_sr <- df_sr %>% arrange(submission_date_only_s) # Sort

# Calculate cumulative counts over time
total_counts_sr <- df_sr %>%
  mutate(Cumulative_Count = cumsum(rep(1, nrow(.)))) %>%
  select(submission_date_only_s, Cumulative_Count)

# Interactive Plot
earlier_date <- min(df_sr$submission_date_only_s) - 1  # start plot 1 day earlier
hover_text <- paste("Fantastic - another Stylist recruited!")
plot_ly() %>%
  add_trace(x = ~total_counts_sr$submission_date_only_s, y = ~total_counts_sr$Cumulative_Count, type = "scatter", mode = "lines+markers", line = list(color = "darkred"), marker = list(color = "darkred", size = 5), text = hover_text, name = "1 Stylist") %>%
  add_trace(x = c(min(total_counts_sr$submission_date_only_s), total_counts_sr$submission_date_only_s, max(total_counts_sr$submission_date_only_s)), y = c(0, total_counts_sr$Cumulative_Count, tail(total_counts_sr$Cumulative_Count, 1)), type = "scatter", mode = "lines", fill = "tozeroy", line = list(color = "transparent"), name = "Shaded Area") %>%
  layout(
    # title = list(text = "Cumulative Recruitment of Stylists", font = list(size = 20, color = "black", family = "Arial", weight = "bold")),
         xaxis = list(title = "Date", range = c(earlier_date, max(total_counts_sr$submission_date_only_s) + 1), visible = TRUE),
         yaxis = list(title = "Number of Stylists", tickmode = "linear", dtick = 1),
         showlegend = F)

RECRUITMENT CLIENTS

df_cr$submission_date_only_c <- as.Date(df_cr$submission_time_c) # Extract Dates only from Date/Time variable
df_cr <- df_cr %>% arrange(submission_date_only_c) # Sort
# Calculate cumulative counts over time
total_counts_cr <- df_cr %>%
  mutate(Cumulative_Count = cumsum(rep(1, nrow(.)))) %>%
  select(submission_date_only_c, Cumulative_Count)

# Interactive Plot
earlier_date <- min(df_sr$submission_date_only_c) - 1  # start plot 1 day earlier
hover_text <- paste("Fantastic - another Client recruited!")
plot_ly() %>%
  add_trace(x = ~total_counts_cr$submission_date_only_c, y = ~total_counts_cr$Cumulative_Count, type = "scatter", mode = "lines+markers", line = list(color = "darkred"), marker = list(color = "darkred", size = 5), text = hover_text, name = "1 Client") %>%
  add_trace(x = c(min(total_counts_cr$submission_date_only_c), total_counts_cr$submission_date_only_c, max(total_counts_cr$submission_date_only_c)), y = c(0, total_counts_cr$Cumulative_Count, tail(total_counts_cr$Cumulative_Count, 1)), type = "scatter", mode = "lines", fill = "tozeroy", line = list(color = "transparent"), name = "Shaded Area") %>%
  layout(
    # title = list(text = "Cumulative Recruitment of Stylists", font = list(size = 20, color = "black", family = "Arial", weight = "bold")),
         xaxis = list(title = "Date", range = c(earlier_date, max(total_counts_cr$submission_date_only_c) + 1), visible = TRUE),
         yaxis = list(title = "Number of Clients", tickmode = "linear", dtick = 1),
         showlegend = F)