Data Cleaning

df = 
  df %>%
  mutate(
    vax_status_retarget = case_when(
      num_dose == "0" ~ 0,
      num_dose %in% c("1", "2", "3") ~ 1
    ),
    vax_status_retarget_4 = case_when(
      num_dose == "0" ~ 0,
      num_dose == "1" ~ 1,
      num_dose == "2" ~ 2,
      num_dose == "3" ~ 3
    )
  )

# df$vax_status_retarget = factor(df$vax_status_retarget, levels = c(0, 1))
# df$vax_status_retarget_4 = factor(df$vax_status_retarget_4, levels = c(0,1,2,3))

df$country_answer[!df$country_answer %in% c("South Africa","Kenya","Nigeria","Ghana")] = NA
df$country_answer = factor(df$country_answer, levels = c("South Africa","Kenya","Nigeria","Ghana"))
df = df %>% filter(!is.na(country_answer))

df$doctor[!df$doctor %in% c("1","2","3","4","5")] = NA
df$doctor = strtoi(df$doctor)
df$family[!df$family %in% c("1","2","3","4","5")] = NA
df$family = strtoi(df$family)
df$friends[!df$friends %in% c("1","2","3","4","5")] = NA
df$friends = strtoi(df$friends)
df$minister_religious_leader[!df$minister_religious_leader %in% c("1","2","3","4","5")] = NA
df$minister_religious_leader = strtoi(df$minister_religious_leader)
df$community_leader[!df$community_leader %in% c("1","2","3","4","5")] = NA
df$community_leader = strtoi(df$community_leader)

df$clinic_worker[!df$clinic_worker %in% c("1","2","3","4","5")] = NA
df$clinic_worker = strtoi(df$clinic_worker)
df$community_based_org[!df$community_based_org %in% c("1","2","3","4","5")] = NA
df$community_based_org = strtoi(df$community_based_org)
df$health_department[!df$health_department %in% c("1","2","3","4","5")] = NA
df$health_department = strtoi(df$health_department)
df$who[!df$who %in% c("1","2","3","4","5")] = NA
df$who = strtoi(df$who)
df$african_scientist[!df$african_scientist %in% c("1","2","3","4","5")] = NA
df$african_scientist = strtoi(df$african_scientist)
df$international_scientist[!df$international_scientist %in% c("1","2","3","4","5")] = NA
df$international_scientist = strtoi(df$international_scientist)
df$international_ngo[!df$international_ngo %in% c("1","2","3","4","5")] = NA
df$international_ngo = strtoi(df$international_ngo)
df$local_news_media[!df$local_news_media %in% c("1","2","3","4","5")] = NA
df$local_news_media = strtoi(df$local_news_media)
df$international_news_media[!df$international_news_media %in% c("1","2","3","4","5")] = NA
df$international_news_media = strtoi(df$international_news_media)

df$covid_is_a_problem[!df$covid_is_a_problem %in% c("1","2","3","4","5")] = NA
df$covid_is_a_problem = strtoi(df$covid_is_a_problem)
df$covid_is_problem_my_country[!df$covid_is_problem_my_country %in% c("1","2","3","4","5")] = NA
df$covid_is_problem_my_country = strtoi(df$covid_is_problem_my_country)
df$vax_is_safe[!df$vax_is_safe %in% c("1","2","3","4","5")] = NA
df$vax_is_safe = strtoi(df$vax_is_safe)
df$vax_prevent_sick_death[!df$vax_prevent_sick_death %in% c("1","2","3","4","5")] = NA
df$vax_prevent_sick_death = strtoi(df$vax_prevent_sick_death)
df$need_vax_for_protection[!df$need_vax_for_protection %in% c("1","2","3","4","5")] = NA
df$need_vax_for_protection = strtoi(df$need_vax_for_protection)
df$vax_safer_covid[!df$vax_safer_covid %in% c("1","2","3","4","5")] = NA
df$vax_safer_covid = strtoi(df$vax_safer_covid)
df$developer_want_to_help[!df$developer_want_to_help %in% c("1","2","3","4","5")] = NA
df$developer_want_to_help = strtoi(df$developer_want_to_help)
df$health_worker_want_to_help[!df$health_worker_want_to_help %in% c("1","2","3","4","5")] = NA
df$health_worker_want_to_help = strtoi(df$health_worker_want_to_help)
df$gov_want_to_help[!df$gov_want_to_help %in% c("1","2","3","4","5")] = NA
df$gov_want_to_help = strtoi(df$gov_want_to_help)
df$important_to_protect_myself[!df$important_to_protect_myself %in% c("1","2","3","4","5")] = NA
df$important_to_protect_myself = strtoi(df$important_to_protect_myself)
df$important_to_protect_other[!df$important_to_protect_other %in% c("1","2","3","4","5")] = NA
df$important_to_protect_other = strtoi(df$important_to_protect_other)
df$vax_moral[!df$vax_moral %in% c("1","2","3","4","5")] = NA
df$vax_moral = strtoi(df$vax_moral)
df$worry_short_term_side_effect[!df$worry_short_term_side_effect %in% c("1","2","3","4","5")] = NA
df$worry_short_term_side_effect = strtoi(df$worry_short_term_side_effect)
df$worry_long_term_side_effect[!df$worry_long_term_side_effect %in% c("1","2","3","4","5")] = NA
df$worry_long_term_side_effect = strtoi(df$worry_long_term_side_effect)
df$covid_is_real[!df$covid_is_real %in% c("1","2","3","4","5")] = NA
df$covid_is_real = strtoi(df$covid_is_real)
df$probably_exposed[!df$probably_exposed %in% c("1","2","3","4","5")] = NA
df$probably_exposed = strtoi(df$probably_exposed)
df$afraid_needle[!df$afraid_needle %in% c("1","2","3","4","5")] = NA
df$afraid_needle = strtoi(df$afraid_needle)
df$deeply_religious[!df$deeply_religious %in% c("1","2","3","4","5")] = NA
df$deeply_religious = strtoi(df$deeply_religious)
df$important_moral[!df$important_moral %in% c("1","2","3","4","5")] = NA
df$important_moral = strtoi(df$important_moral)
df$fit_in_group_importance[!df$fit_in_group_importance %in% c("1","2","3","4","5")] = NA
df$fit_in_group_importance = strtoi(df$fit_in_group_importance)
df$responsible_importance[!df$responsible_importance %in% c("1","2","3","4","5")] = NA
df$responsible_importance = strtoi(df$responsible_importance)

df$num_dose[!df$num_dose %in% c("0", "1","2","3")] = NA
df$num_dose = strtoi(df$num_dose)
df$num_dose = as.factor(df$num_dose)

df$vax_next_year[!df$vax_next_year %in% c("1","2","3","4","5")] = NA
df$vax_next_year = strtoi(df$vax_next_year)
df$vax_next_year = as.factor(df$vax_next_year)

df$receive_other_vax[!df$receive_other_vax %in% c("yes","no")] = NA
df$receive_other_vax = as.factor(df$receive_other_vax)

df$time_to_get_vax[!df$time_to_get_vax %in% c("Less than 20 min","20-60 min", "1-2 hrs", "2+ hrs")] = NA
df$time_to_get_vax = factor(df$time_to_get_vax, levels=c("Less than 20 min","20-60 min", "1-2 hrs", "2+ hrs"))

df$wait_time[!df$wait_time %in% c("Less than 20 min","20-60 min", "1-2 hrs", "2+ hrs")] = NA
df$wait_time = factor(df$wait_time, levels=c("Less than 20 min","20-60 min", "1-2 hrs", "2+ hrs"))

df$transportation_vax[!df$transportation_vax %in% c("walking","bicyle", "bus", "car", "train", "other", "Don't know")] = NA
df$transportation_vax = factor(df$transportation_vax, levels=c("walking","bicyle", "bus", "car", "train", "other", "Don't know"))

df$need_appointment[!df$need_appointment %in% c("yes","no")] = NA
df$need_appointment = factor(df$need_appointment, levels=c("yes","no"))

df$vax_cost[!df$vax_cost %in% c("yes","no")] = NA
df$vax_cost = factor(df$vax_cost, levels=c("yes","no"))

df$go_back_another_dose[!df$go_back_another_dose %in% c("yes","no")] = NA
df$go_back_another_dose = factor(df$go_back_another_dose, levels=c("yes","no"))

df$prefer_day[!df$prefer_day %in% c("Su","M", "Tu", "W", "Th", "F", "S")] = NA
df$prefer_day = factor(df$prefer_day, levels=c("Su","M", "Tu", "W", "Th", "F", "S"))

numeric_cols <- function(df){
  out <- df %>%
    mutate(
      age = as.numeric(ifelse(demog_age %in% paste(18:99), demog_age, "")),
      gender_num = case_when(
        gender == "female" ~ 1,
        gender == "male" ~ 0
        ),
      education_num = case_when(
        demog_education == "< High school" ~ 1,
        demog_education == "High school" ~ 2,
        demog_education == "Some college" ~ 3,
        demog_education == "2-year degree" ~ 4,
        demog_education == "4-year degree" ~ 5,
        demog_education == "Graduate degree" ~ 6,
      ),
      religiosity_num = case_when(
        religiosity == "Not very religious" ~ 1,
        religiosity == "Somewhat religious" ~ 2,
        religiosity == "Very religious" ~ 3,
      ),
      location_num = case_when(
        location == "Rural" ~ 1,
        location == "Suburban" ~ 2,
        location == "Urban" ~ 3,
      ),
      location =factor(location, levels = c("Urban", "Suburban", "Rural", "Prefer not to say")),
      black = ifelse(ethnicity == "Black or African", 1, 0),
      ethnicity = factor(ethnicity, levels=c("Asian or Indian", 
                                                "Black or African", 
                                                "coloured", 
                                                "White or Caucasian", 
                                                "Other",
                                                "Prefer not to say")),
      christian = ifelse(demog_religion == "Christianity", 1, 0)                  
    )
    return(out)
}

df = numeric_cols(df)
df$vax_status_retarget = as.numeric(df$vax_status_retarget)
df$vax_status_retarget_4 = as.numeric(df$vax_status_retarget_4)

df$ethnicity[!df$ethnicity %in% c("Black or African","White or Caucasian", "Asian or Indian", "Other", "Prefer not to say")] = NA
df$ethnicity = factor(df$ethnicity, levels=c("Black or African","White or Caucasian", "Asian or Indian", "Other", "Prefer not to say"))

df$demog_religion[!df$demog_religion %in% c("Christianity","Islam", "African traditional", "Hinduism", "Judaism", "No religion", "Other", "Prefer not to say")] = NA
df$demog_religion = factor(df$demog_religion, levels=c("Christianity","Islam", "African traditional", "Hinduism", "Judaism", "No religion", "Other", "Prefer not to say"))

# did not include income

df$herd_immunity[!df$herd_immunity %in% c("yes","no")] = NA
df$herd_immunity = factor(df$herd_immunity, levels=c("yes","no"))

1 Trust Factors

We obtain five trust factors from the factor analysis (see screenshot below) based on how much the participants trust the respective categories on giving good information on vaccine. We roughly define the factors as the following categories.

  1. Trust Factor 1 (Practitoners): health department, WHO, clinic worker, doctor
  2. Trust Factor 2 (Scientists): international scientist, international NGO, African scientist
  3. Trust Factor 3 (Local Figures): community leader, minister religious leader, community based organization
  4. Trust Factor 4 (Media): local news media, international news media
  5. Trust Factor 5 (Family & Friends): family and friends

For each participant, we calculate a trust factor score by averaging the responses they give to the individual attributes within each trust factor. Note that we asked the trust question on a 1 - 5 scale, which allows us to take the average without having to do any other transformation. 1 being trust very little, and 5 being trust a lot.

We then determine which factor participants trust the most and which factor participants trust the least. If a participant has more than two factors s/he trust the most or least, we discard that participants as it is likely to be noise. Otherwise, we record the max/min factor(s) [i.e. up to 2 for max/min] for each participant.

Create Trust Factors Score

trust_factor_1 <- c("health_department", "who", "clinic_worker", "doctor")
trust_factor_2 <- c("international_scientist", "international_ngo", "african_scientist")
trust_factor_3 <- c("community_leader", "minister_religious_leader", "community_based_org")
trust_factor_4 <- c("local_news_media", "international_news_media")
trust_factor_5 <- c("family", "friends")

df$tf1_score <- rowMeans(df[trust_factor_1], na.rm = TRUE)
df$tf2_score <- rowMeans(df[trust_factor_2], na.rm = TRUE)
df$tf3_score <- rowMeans(df[trust_factor_3], na.rm = TRUE)
df$tf4_score <- rowMeans(df[trust_factor_4], na.rm = TRUE)
df$tf5_score <- rowMeans(df[trust_factor_5], na.rm = TRUE)

tf_scores <- paste0("tf", 1:5, "_score")

Calculate Min and Max Trust Factors

paste0("Original number of participants: ", nrow(df))
## [1] "Original number of participants: 1081"
max_factor <- sapply(1:nrow(df), function(x) which(df[x, tf_scores] == max(df[x, tf_scores])))
max_elig <- sapply(1:nrow(df), function(x) length(max_factor[[x]]) <= 2)
df_new <- df[max_elig,]
df_new$max_trust_factor <- unlist(lapply(max_factor[max_elig], function(x) paste(x, collapse = ", ")))
paste0("Dropped ", nrow(df) - nrow(df_new), " participants due to more than two MAX trust factors.")
## [1] "Dropped 116 participants due to more than two MAX trust factors."
min_factor <- sapply(1:nrow(df_new), function(x) which(df_new[x, tf_scores] == min(df_new[x, tf_scores])))
min_elig <- sapply(1:nrow(df_new), function(x) length(min_factor[[x]]) <= 2)
df_elig <- df_new[min_elig,]
df_elig$min_trust_factor <- unlist(lapply(min_factor[min_elig], function(x) paste(x, collapse = ", ")))
paste0("Dropped ", nrow(df_new) - nrow(df_elig), " participants due to more than two MIN trust factors.")
## [1] "Dropped 68 participants due to more than two MIN trust factors."
paste0("Remaining number of participants: ", nrow(df_elig))
## [1] "Remaining number of participants: 897"
resp.split <- strsplit(df_elig$max_trust_factor, ", ")
resp.dummy <- lapply(resp.split, function(x) table(factor(x, levels=c("1", "2", "3", "4", "5"))))
max_factors <- data.frame(do.call(rbind, resp.dummy))
colnames(max_factors) <- paste0("max_trust_factor_", 1:5)

resp.split <- strsplit(df_elig$min_trust_factor, ", ")
resp.dummy <- lapply(resp.split, function(x) table(factor(x, levels=c("1", "2", "3", "4", "5"))))
min_factors <- data.frame(do.call(rbind, resp.dummy))
colnames(min_factors) <- paste0("min_trust_factor_", 1:5)

df_elig <- cbind(df_elig, max_factors, min_factors)

2 Covariate Heatmaps

We construct two covariate heatmaps. One based on the participants’ max trust factor and one based on the participants’ min trust factor.

Here are the covariates we choose as of now. We also include min trust factor when we look at max trust factor heatmap and vice versa to get a sense of whether a trust in certain factor could lead to low trust in other factor.

The first set of questions are statements where we ask the participants to rate from 1 to 5. 1 = I Disagree Strongly, 2 = I Disagree, 3 = I’m Unsure, 4 = I Agree 5 = I Agree Strongly

Question Text Variable Name
People are still dying from COVID. covid_is_a_problem
COVID is a problem in [my country] covid_is_problem_my_country
I think the COVID vaccines are safe vax_is_safe
COVID vaccines help prevent serious sickness and death vax_prevent_sick_death
You need a vaccine for protection from sickness (not because you are sick) need_vax_for_protection
Getting the vaccine is much safer than getting COVID vax_safer_covid
I think the people who developed the vaccine wanted to help people developer_want_to_help
I think my local healthcare workers want me to be healthy and well health_worker_want_to_help
I think my government’s department of health workers want me to be healthy and well gov_want_to_help
It is important to me that I protect myself from the effects of COVID important_to_protect_myself
It is important to me that I protect others from the effects of COVID important_to_protect_other
Getting vaccinated is a moral issue vax_moral
I worry about short-term side effects of the COVID vaccine worry_short_term_side_effect
I worry about long-term side effects of the COVID vaccine worry_long_term_side_effect
COVID has killed millions of people worldwide covid_is_real
I will probably be exposed to someone with COVID over the next year probably_exposed
I am very afraid of needles afraid_needle
I am a deeply religious person deeply_religious
It is important to me that I be a moral person important_moral
It is important to me to feel like I “fit in” in with my group fit_in_group_importance
It is important to me to feel like I am a responsible member of my community responsible_importance

The second set of questions are demographic variables & vaccine related questions with encoding below

Covariates Variable Name (Encoding if Any)
Vaccine Status vax_status_retarget (1 if vaccinated with at least one dose 0 otherwise)
Likelihood to get vaccinated next year vax_next_year (1 = very unlikely, 2 = somewhat unlikely, 3 = unsure, 4 = probably will, 5 = definitely will)
Whether they received other vaccines before receive_other_vax (1 if yes 0 otherwise)
Age age
Gender gender_num (1 if female 0 otherwise)
Education education_num (1 if < high school, 2 if high school, 3 if some college, 4 if 2-year degree, 5 if 4-year degree, 6 if graduate degree)
Religiosity religiosity_num (1 if not very religious, 2 if somewhat religious, 3 if very religious)
Location location_num (1 if rural, 2 if suburban, 3 if urban)
Ethnicity black (1 if black or african 0 otherwise)
Religion christian (1 if christian 0 otherwise)
Ever heard of herd immunity herd_immunity (1 if yes 0 otherwise)

The numbers in the heatmap shown below are mean and standard error for each covariate filtered by max or min trust factors, respectively. The colors are standard deviation on a normalized distribution across all responses.

covariates <- c("covid_is_a_problem", "covid_is_problem_my_country", "vax_is_safe", "vax_prevent_sick_death", "need_vax_for_protection", "vax_safer_covid", "developer_want_to_help", "health_worker_want_to_help", "gov_want_to_help", "important_to_protect_myself", "important_to_protect_other", "vax_moral", "worry_short_term_side_effect", "worry_long_term_side_effect", "covid_is_real", "probably_exposed", "afraid_needle", "deeply_religious", "important_moral", "fit_in_group_importance", "responsible_importance", "vax_status_retarget", "vax_next_year", "receive_other_vax", "age", "gender_num", "education_num", "religiosity_num", "location_num", "black", "christian", "herd_immunity")
# max_covariates <- c(covariates, "min_trust_factor")
# min_covariates <- c(covariates, "max_trust_factor")
covariates_full <- c(covariates, paste0("max_trust_factor_", 1:5), paste0("min_trust_factor_", 1:5))
df_features <- df_elig[, covariates_full]
df_features$herd_immunity <- ifelse(df_features$herd_immunity == "yes", 1, 0)
df_features$receive_other_vax <- ifelse(df_features$receive_other_vax == "yes", 1, 0)
df_features <- df_features %>% mutate_if(is.factor, as.numeric)
feature_stats <- df_features %>%
  # filter(grepl("1", max_trust_factor)) %>%
  select(!ends_with("_trust_factor"))  %>%
  
  summarize_all(list(mean = function(x)mean(x, na.rm = T), 
                     sd = function(x)sd(x, na.rm = T))) %>%
  pivot_longer(cols = everything(),
               names_to = "feature",
               values_to = "value") %>%
  mutate(measure = paste("total", str_extract(feature, "mean|sd"), sep = "_"), 
         feature = str_remove(feature, "_mean|_sd")) %>%
  pivot_wider(names_from = "measure", 
              values_from = "value")
n_by_group_tbl <- df_features %>%
  select(starts_with("max_trust_factor") | starts_with("min_trust_factor")) %>%
  dplyr::summarize(across(everything(), sum))

2.1 Max Trust Factors

trust_factor_stats_max <- data.frame()
for(column in paste0("max_trust_factor_", 1:5)){
  summary <- df_features %>% 
    filter(df_features[[column]] == 1) %>%
    select(!starts_with("max_trust_factor"))  %>%
    summarise_all(list(sep_mean = ~ mean(., na.rm = T), 
                       sep_sd = ~ sd(., na.rm =T),
                       sep_n = ~ sum(., na.rm = T))) %>%
    pivot_longer(cols = everything(), names_to = c("name"))  %>%
    separate(name, into = c("feature", "value_type"), sep = "_sep_")%>%
    mutate(segment = column) %>%
    pivot_wider(names_from = "value_type", values_from = "value") %>%
    mutate(se = sd/sqrt(n)) %>%
    select(!sd & !n) 
    
  
 trust_factor_stats_max<- rbind(trust_factor_stats_max, summary)
}

trust_factor_stats_max <- trust_factor_stats_max %>%
  mutate_if(is.numeric, function(x)round(x, 3))  
tbl_max <- left_join(trust_factor_stats_max, feature_stats ,by = "feature") %>%
  mutate(n = unlist(n_by_group_tbl[match(segment, names(n_by_group_tbl))])) %>%
  mutate(label = paste(mean, paste("(", se, ")", sep = ""), sep = "\n"), 
         color = (mean - total_mean)/total_sd, 
         color = case_when(color > 2 ~ 2, 
                           color < -2 ~ -2, 
                           is.nan(se) ~ NA_real_,
                           TRUE ~ color),
         group = case_when(segment == "max_trust_factor_1" ~ "Practitioners",
                           segment == "max_trust_factor_2" ~ "Scientists",
                           segment == "max_trust_factor_3" ~ "Local Figures",
                           segment == "max_trust_factor_4" ~ "Media",
                           segment == "max_trust_factor_5" ~ "Friends & Family",
                           TRUE ~ NA_character_
                           ), 
         segment = case_when(segment == "max_trust_factor_1" ~ "Practitioners",
                           segment == "max_trust_factor_2" ~ "Scientists",
                           segment == "max_trust_factor_3" ~ "Local Figures",
                           segment == "max_trust_factor_4" ~ "Media",
                           segment == "max_trust_factor_5" ~ "Friends & Family",
                           TRUE ~ NA_character_
                           ),
         feature = case_when(feature == "min_trust_factor_1" ~ "min_trust_factor_practitioners",
                           feature == "min_trust_factor_2" ~ "min_trust_factor_scientists",
                           feature == "min_trust_factor_3" ~ "min_trust_factor_local_figures",
                           feature == "min_trust_factor_4" ~ "min_trust_factor_media",
                           feature == "min_trust_factor_5" ~ "min_trust_factor_friends_family",
                           TRUE ~ feature
                           ),
         # feature_group = case_when(grepl("impediments_hr_", feature) ~ "Impediments", 
         #                           grepl("best_treat_", feature) ~ "Best Treatment", 
         #                           grepl("opinion_hr", feature) ~ "Opinion", 
         #                           TRUE ~ "Demo"), 
         # segment = str_to_title(str_replace_all(str_remove(segment, "_seg"), "_", "\n")), 
         # feature = str_to_title(str_replace_all(str_remove(feature, "impediments_hr_|opinion_hr_|best_treat_"), "_", " ")), 
         prop = round(n/nrow(df_features), 2)) %>%
  mutate(segment_label = paste(segment, paste("N: ", n, sep = ""), paste("Prop: ", prop, sep = ""), sep = "\n"))
ggplot(tbl_max) +
  geom_tile(aes(segment_label, feature, fill = color), color = "white", lwd = .5) +
  geom_text(aes(segment_label, feature, label = label), size = 4) +
  facet_grid(~ group,  scales = "free_x", space = "free_x", labeller = labeller(group = str_to_title)) +
  scale_fill_gradient2(
    low = "red",
    mid = "white",
    high = "green",
    midpoint = 0,
      limits = c(-2,2), 
      labels = c( "<-2", "-1", "0", "1", ">2")) +
  theme_minimal() +
  theme(legend.position = "bottom", 
        axis.text= element_text(size=12),
        axis.title.y = element_text(angle = 0)) +
  labs(title = "Max Trust Factors", 
       x = "Trust Factor", 
       y = "Feature",
       fill = "Std Deviation on Normalized Distribution")   +
  guides(fill = guide_colourbar(barwidth = 10, title.position = "top"))

2.2 Min Trust Factors

trust_factor_stats_min <- data.frame()
for(column in paste0("min_trust_factor_", 1:5)){
  summary <- df_features %>% 
    filter(df_features[[column]] == 1) %>%
    select(!starts_with("min_trust_factor"))  %>%
    summarise_all(list(sep_mean = ~ mean(., na.rm = T), 
                       sep_sd = ~ sd(., na.rm =T),
                       sep_n = ~ sum(., na.rm = T))) %>%
    pivot_longer(cols = everything(), names_to = c("name"))  %>%
    separate(name, into = c("feature", "value_type"), sep = "_sep_")%>%
    mutate(segment = column) %>%
    pivot_wider(names_from = "value_type", values_from = "value") %>%
    mutate(se = sd/sqrt(n)) %>%
    select(!sd & !n) 
    
  
 trust_factor_stats_min<- rbind(trust_factor_stats_min, summary)
}

trust_factor_stats_min <- trust_factor_stats_min %>%
  mutate_if(is.numeric, function(x)round(x, 3))  
tbl_min <- left_join(trust_factor_stats_min, feature_stats ,by = "feature") %>%
  mutate(n = unlist(n_by_group_tbl[match(segment, names(n_by_group_tbl))])) %>%
  mutate(label = paste(mean, paste("(", se, ")", sep = ""), sep = "\n"), 
         color = (mean - total_mean)/total_sd, 
         color = case_when(color > 2 ~ 2, 
                           color < -2 ~ -2, 
                           is.nan(se) ~ NA_real_,
                           TRUE ~ color),
         group = case_when(segment == "min_trust_factor_1" ~ "Practitioners",
                           segment == "min_trust_factor_2" ~ "Scientists",
                           segment == "min_trust_factor_3" ~ "Local Figures",
                           segment == "min_trust_factor_4" ~ "Media",
                           segment == "min_trust_factor_5" ~ "Friends & Family",
                           TRUE ~ NA_character_
                           ), 
         segment = case_when(segment == "min_trust_factor_1" ~ "Practitioners",
                           segment == "min_trust_factor_2" ~ "Scientists",
                           segment == "min_trust_factor_3" ~ "Local Figures",
                           segment == "min_trust_factor_4" ~ "Media",
                           segment == "min_trust_factor_5" ~ "Friends & Family",
                           TRUE ~ NA_character_
                           ), 
         feature = case_when(feature == "max_trust_factor_1" ~ "max_trust_factor_practitioners",
                   feature == "max_trust_factor_2" ~ "max_trust_factor_scientists",
                   feature == "max_trust_factor_3" ~ "max_trust_factor_local_figures",
                   feature == "max_trust_factor_4" ~ "max_trust_factor_media",
                   feature == "max_trust_factor_5" ~ "max_trust_factor_friends_family",
                   TRUE ~ feature
                   ),
         # feature_group = case_when(grepl("impediments_hr_", feature) ~ "Impediments", 
         #                           grepl("best_treat_", feature) ~ "Best Treatment", 
         #                           grepl("opinion_hr", feature) ~ "Opinion", 
         #                           TRUE ~ "Demo"), 
         # segment = str_to_title(str_replace_all(str_remove(segment, "_seg"), "_", "\n")), 
         # feature = str_to_title(str_replace_all(str_remove(feature, "impediments_hr_|opinion_hr_|best_treat_"), "_", " ")), 
         prop = round(n/nrow(df_features), 2)) %>%
  mutate(segment_label = paste(segment, paste("N: ", n, sep = ""), paste("Prop: ", prop, sep = ""), sep = "\n"))
ggplot(tbl_min) +
  geom_tile(aes(segment_label, feature, fill = color), color = "white", lwd = .5) +
  geom_text(aes(segment_label, feature, label = label), size = 4) +
  facet_grid(~ group,  scales = "free_x", space = "free_x", labeller = labeller(group = str_to_title)) +
  scale_fill_gradient2(
    low = "red",
    mid = "white",
    high = "green",
    midpoint = 0,
      limits = c(-2,2), 
      labels = c( "<-2", "-1", "0", "1", ">2")) +
  theme_minimal() +
  theme(legend.position = "bottom", 
        axis.text= element_text(size=12),
        axis.title.y = element_text(angle = 0)) +
  labs(title = "Min Trust Factors", 
       x = "Trust Factor", 
       y = "Feature",
       fill = "Std Deviation on Normalized Distribution")   +
  guides(fill = guide_colourbar(barwidth = 10, title.position = "top"))