Data Cleaning

df = 
  df %>%
  mutate(
    vax_status_retarget = case_when(
      num_dose == "0" ~ 0,
      num_dose %in% c("1", "2", "3") ~ 1
    ),
    vax_status_retarget_4 = case_when(
      num_dose == "0" ~ 0,
      num_dose == "1" ~ 1,
      num_dose == "2" ~ 2,
      num_dose == "3" ~ 3
    )
  )

# df$vax_status_retarget = factor(df$vax_status_retarget, levels = c(0, 1))
# df$vax_status_retarget_4 = factor(df$vax_status_retarget_4, levels = c(0,1,2,3))

df$country_answer[!df$country_answer %in% c("South Africa","Kenya","Nigeria","Ghana")] = NA
df$country_answer = factor(df$country_answer, levels = c("South Africa","Kenya","Nigeria","Ghana"))
df = df %>% filter(!is.na(country_answer))

df$doctor[!df$doctor %in% c("1","2","3","4","5")] = NA
df$doctor = strtoi(df$doctor)
df$family[!df$family %in% c("1","2","3","4","5")] = NA
df$family = strtoi(df$family)
df$friends[!df$friends %in% c("1","2","3","4","5")] = NA
df$friends = strtoi(df$friends)
df$minister_religious_leader[!df$minister_religious_leader %in% c("1","2","3","4","5")] = NA
df$minister_religious_leader = strtoi(df$minister_religious_leader)
df$community_leader[!df$community_leader %in% c("1","2","3","4","5")] = NA
df$community_leader = strtoi(df$community_leader)

df$clinic_worker[!df$clinic_worker %in% c("1","2","3","4","5")] = NA
df$clinic_worker = strtoi(df$clinic_worker)
df$community_based_org[!df$community_based_org %in% c("1","2","3","4","5")] = NA
df$community_based_org = strtoi(df$community_based_org)
df$health_department[!df$health_department %in% c("1","2","3","4","5")] = NA
df$health_department = strtoi(df$health_department)
df$who[!df$who %in% c("1","2","3","4","5")] = NA
df$who = strtoi(df$who)
df$african_scientist[!df$african_scientist %in% c("1","2","3","4","5")] = NA
df$african_scientist = strtoi(df$african_scientist)
df$international_scientist[!df$international_scientist %in% c("1","2","3","4","5")] = NA
df$international_scientist = strtoi(df$international_scientist)
df$international_ngo[!df$international_ngo %in% c("1","2","3","4","5")] = NA
df$international_ngo = strtoi(df$international_ngo)
df$local_news_media[!df$local_news_media %in% c("1","2","3","4","5")] = NA
df$local_news_media = strtoi(df$local_news_media)
df$international_news_media[!df$international_news_media %in% c("1","2","3","4","5")] = NA
df$international_news_media = strtoi(df$international_news_media)

df$covid_is_a_problem[!df$covid_is_a_problem %in% c("1","2","3","4","5")] = NA
df$covid_is_a_problem = strtoi(df$covid_is_a_problem)
df$covid_is_problem_my_country[!df$covid_is_problem_my_country %in% c("1","2","3","4","5")] = NA
df$covid_is_problem_my_country = strtoi(df$covid_is_problem_my_country)
df$vax_is_safe[!df$vax_is_safe %in% c("1","2","3","4","5")] = NA
df$vax_is_safe = strtoi(df$vax_is_safe)
df$vax_prevent_sick_death[!df$vax_prevent_sick_death %in% c("1","2","3","4","5")] = NA
df$vax_prevent_sick_death = strtoi(df$vax_prevent_sick_death)
df$need_vax_for_protection[!df$need_vax_for_protection %in% c("1","2","3","4","5")] = NA
df$need_vax_for_protection = strtoi(df$need_vax_for_protection)
df$vax_safer_covid[!df$vax_safer_covid %in% c("1","2","3","4","5")] = NA
df$vax_safer_covid = strtoi(df$vax_safer_covid)
df$developer_want_to_help[!df$developer_want_to_help %in% c("1","2","3","4","5")] = NA
df$developer_want_to_help = strtoi(df$developer_want_to_help)
df$health_worker_want_to_help[!df$health_worker_want_to_help %in% c("1","2","3","4","5")] = NA
df$health_worker_want_to_help = strtoi(df$health_worker_want_to_help)
df$gov_want_to_help[!df$gov_want_to_help %in% c("1","2","3","4","5")] = NA
df$gov_want_to_help = strtoi(df$gov_want_to_help)
df$important_to_protect_myself[!df$important_to_protect_myself %in% c("1","2","3","4","5")] = NA
df$important_to_protect_myself = strtoi(df$important_to_protect_myself)
df$important_to_protect_other[!df$important_to_protect_other %in% c("1","2","3","4","5")] = NA
df$important_to_protect_other = strtoi(df$important_to_protect_other)
df$vax_moral[!df$vax_moral %in% c("1","2","3","4","5")] = NA
df$vax_moral = strtoi(df$vax_moral)
df$worry_short_term_side_effect[!df$worry_short_term_side_effect %in% c("1","2","3","4","5")] = NA
df$worry_short_term_side_effect = strtoi(df$worry_short_term_side_effect)
df$worry_long_term_side_effect[!df$worry_long_term_side_effect %in% c("1","2","3","4","5")] = NA
df$worry_long_term_side_effect = strtoi(df$worry_long_term_side_effect)
df$covid_is_real[!df$covid_is_real %in% c("1","2","3","4","5")] = NA
df$covid_is_real = strtoi(df$covid_is_real)
df$probably_exposed[!df$probably_exposed %in% c("1","2","3","4","5")] = NA
df$probably_exposed = strtoi(df$probably_exposed)
df$afraid_needle[!df$afraid_needle %in% c("1","2","3","4","5")] = NA
df$afraid_needle = strtoi(df$afraid_needle)
df$deeply_religious[!df$deeply_religious %in% c("1","2","3","4","5")] = NA
df$deeply_religious = strtoi(df$deeply_religious)
df$important_moral[!df$important_moral %in% c("1","2","3","4","5")] = NA
df$important_moral = strtoi(df$important_moral)
df$fit_in_group_importance[!df$fit_in_group_importance %in% c("1","2","3","4","5")] = NA
df$fit_in_group_importance = strtoi(df$fit_in_group_importance)
df$responsible_importance[!df$responsible_importance %in% c("1","2","3","4","5")] = NA
df$responsible_importance = strtoi(df$responsible_importance)

df$num_dose[!df$num_dose %in% c("0", "1","2","3")] = NA
df$num_dose = strtoi(df$num_dose)
df$num_dose = as.factor(df$num_dose)

df$vax_next_year[!df$vax_next_year %in% c("1","2","3","4","5")] = NA
df$vax_next_year = strtoi(df$vax_next_year)
df$vax_next_year = as.factor(df$vax_next_year)

df$receive_other_vax[!df$receive_other_vax %in% c("yes","no")] = NA
df$receive_other_vax = as.factor(df$receive_other_vax)

df$time_to_get_vax[!df$time_to_get_vax %in% c("Less than 20 min","20-60 min", "1-2 hrs", "2+ hrs")] = NA
df$time_to_get_vax = factor(df$time_to_get_vax, levels=c("Less than 20 min","20-60 min", "1-2 hrs", "2+ hrs"))

df$wait_time[!df$wait_time %in% c("Less than 20 min","20-60 min", "1-2 hrs", "2+ hrs")] = NA
df$wait_time = factor(df$wait_time, levels=c("Less than 20 min","20-60 min", "1-2 hrs", "2+ hrs"))

df$transportation_vax[!df$transportation_vax %in% c("walking","bicyle", "bus", "car", "train", "other", "Don't know")] = NA
df$transportation_vax = factor(df$transportation_vax, levels=c("walking","bicyle", "bus", "car", "train", "other", "Don't know"))

df$need_appointment[!df$need_appointment %in% c("yes","no")] = NA
df$need_appointment = factor(df$need_appointment, levels=c("yes","no"))

df$vax_cost[!df$vax_cost %in% c("yes","no")] = NA
df$vax_cost = factor(df$vax_cost, levels=c("yes","no"))

df$go_back_another_dose[!df$go_back_another_dose %in% c("yes","no")] = NA
df$go_back_another_dose = factor(df$go_back_another_dose, levels=c("yes","no"))

df$prefer_day[!df$prefer_day %in% c("Su","M", "Tu", "W", "Th", "F", "S")] = NA
df$prefer_day = factor(df$prefer_day, levels=c("Su","M", "Tu", "W", "Th", "F", "S"))

numeric_cols <- function(df){
  out <- df %>%
    mutate(
      age = as.numeric(ifelse(demog_age %in% paste(18:99), demog_age, "")),
      gender_num = case_when(
        gender == "female" ~ 1,
        gender == "male" ~ 0
        ),
      education_num = case_when(
        demog_education == "< High school" ~ 1,
        demog_education == "High school" ~ 2,
        demog_education == "Some college" ~ 3,
        demog_education == "2-year degree" ~ 4,
        demog_education == "4-year degree" ~ 5,
        demog_education == "Graduate degree" ~ 6,
      ),
      religiosity_num = case_when(
        religiosity == "Not very religious" ~ 1,
        religiosity == "Somewhat religious" ~ 2,
        religiosity == "Very religious" ~ 3,
      ),
      location_num = case_when(
        location == "Rural" ~ 1,
        location == "Suburban" ~ 2,
        location == "Urban" ~ 3,
      ),
      location =factor(location, levels = c("Urban", "Suburban", "Rural", "Prefer not to say")),
      black = ifelse(ethnicity == "Black or African", 1, 0),
      ethnicity = factor(ethnicity, levels=c("Asian or Indian", 
                                                "Black or African", 
                                                "coloured", 
                                                "White or Caucasian", 
                                                "Other",
                                                "Prefer not to say")),
      christian = ifelse(demog_religion == "Christianity", 1, 0)                  
    )
    return(out)
}

df = numeric_cols(df)
df$vax_status_retarget = as.numeric(df$vax_status_retarget)
df$vax_status_retarget_4 = as.numeric(df$vax_status_retarget_4)

df$ethnicity[!df$ethnicity %in% c("Black or African","White or Caucasian", "Asian or Indian", "Other", "Prefer not to say")] = NA
df$ethnicity = factor(df$ethnicity, levels=c("Black or African","White or Caucasian", "Asian or Indian", "Other", "Prefer not to say"))

df$demog_religion[!df$demog_religion %in% c("Christianity","Islam", "African traditional", "Hinduism", "Judaism", "No religion", "Other", "Prefer not to say")] = NA
df$demog_religion = factor(df$demog_religion, levels=c("Christianity","Islam", "African traditional", "Hinduism", "Judaism", "No religion", "Other", "Prefer not to say"))

# did not include income

df$herd_immunity[!df$herd_immunity %in% c("yes","no")] = NA
df$herd_immunity = factor(df$herd_immunity, levels=c("yes","no"))

1 Trust Factors

We obtain five trust factors from the factor analysis (see screenshot below) based on how much the participants trust the respective categories on giving good information on vaccine. We roughly define the factors as the following categories.

Trust Factor 1 (Practitoners): health department, WHO, clinic worker, doctor
Trust Factor 2 (Scientists): international scientist, international NGO, African scientist
Trust Factor 3 (Local Figures): community leader, minister religious leader, community based organization
Trust Factor 4 (Media): local news media, international news media
Trust Factor 5 (Family & Friends): family and friends

For each participant, we calculate a trust factor score by averaging the responses they give to the individual attributes within each trust factor. Note that we asked the trust question on a 1 - 5 scale, which allows us to take the average without having to do any other transformation. 1 being trust very little, and 5 being trust a lot.

We then determine which factor participants trust the most and which factor participants trust the least. If a participant has more than two factors s/he trust the most or least, we discard that participants as it is likely to be noise. Otherwise, we record the max/min factor(s) [i.e. up to 2 for max/min] for each participant.

Create Trust Factors Score

trust_factor_1 <- c("health_department", "who", "clinic_worker", "doctor")
trust_factor_2 <- c("international_scientist", "international_ngo", "african_scientist")
trust_factor_3 <- c("community_leader", "minister_religious_leader", "community_based_org")
trust_factor_4 <- c("local_news_media", "international_news_media")
trust_factor_5 <- c("family", "friends")

df$tf1_score <- rowMeans(df[trust_factor_1], na.rm = TRUE)
df$tf2_score <- rowMeans(df[trust_factor_2], na.rm = TRUE)
df$tf3_score <- rowMeans(df[trust_factor_3], na.rm = TRUE)
df$tf4_score <- rowMeans(df[trust_factor_4], na.rm = TRUE)
df$tf5_score <- rowMeans(df[trust_factor_5], na.rm = TRUE)

tf_scores <- paste0("tf", 1:5, "_score")

Calculate Min and Max Trust Factors

paste0("Original number of participants: ", nrow(df))

## [1] "Original number of participants: 1081"

max_factor <- sapply(1:nrow(df), function(x) which(df[x, tf_scores] == max(df[x, tf_scores])))
max_elig <- sapply(1:nrow(df), function(x) length(max_factor[[x]]) <= 2)
df_new <- df[max_elig,]
df_new$max_trust_factor <- unlist(lapply(max_factor[max_elig], function(x) paste(x, collapse = ", ")))
paste0("Dropped ", nrow(df) - nrow(df_new), " participants due to more than two MAX trust factors.")

## [1] "Dropped 116 participants due to more than two MAX trust factors."

min_factor <- sapply(1:nrow(df_new), function(x) which(df_new[x, tf_scores] == min(df_new[x, tf_scores])))
min_elig <- sapply(1:nrow(df_new), function(x) length(min_factor[[x]]) <= 2)
df_elig <- df_new[min_elig,]
df_elig$min_trust_factor <- unlist(lapply(min_factor[min_elig], function(x) paste(x, collapse = ", ")))
paste0("Dropped ", nrow(df_new) - nrow(df_elig), " participants due to more than two MIN trust factors.")

## [1] "Dropped 68 participants due to more than two MIN trust factors."

paste0("Remaining number of participants: ", nrow(df_elig))

## [1] "Remaining number of participants: 897"

resp.split <- strsplit(df_elig$max_trust_factor, ", ")
resp.dummy <- lapply(resp.split, function(x) table(factor(x, levels=c("1", "2", "3", "4", "5"))))
max_factors <- data.frame(do.call(rbind, resp.dummy))
colnames(max_factors) <- paste0("max_trust_factor_", 1:5)

resp.split <- strsplit(df_elig$min_trust_factor, ", ")
resp.dummy <- lapply(resp.split, function(x) table(factor(x, levels=c("1", "2", "3", "4", "5"))))
min_factors <- data.frame(do.call(rbind, resp.dummy))
colnames(min_factors) <- paste0("min_trust_factor_", 1:5)

df_elig <- cbind(df_elig, max_factors, min_factors)

2 Covariate Heatmaps

We construct two covariate heatmaps. One based on the participants’ max trust factor and one based on the participants’ min trust factor.

Here are the covariates we choose as of now. We also include min trust factor when we look at max trust factor heatmap and vice versa to get a sense of whether a trust in certain factor could lead to low trust in other factor.

The first set of questions are statements where we ask the participants to rate from 1 to 5. 1 = I Disagree Strongly, 2 = I Disagree, 3 = I’m Unsure, 4 = I Agree 5 = I Agree Strongly

Question Text	Variable Name
People are still dying from COVID.	covid_is_a_problem
COVID is a problem in [my country]	covid_is_problem_my_country
I think the COVID vaccines are safe	vax_is_safe
COVID vaccines help prevent serious sickness and death	vax_prevent_sick_death
You need a vaccine for protection from sickness (not because you are sick)	need_vax_for_protection
Getting the vaccine is much safer than getting COVID	vax_safer_covid
I think the people who developed the vaccine wanted to help people	developer_want_to_help
I think my local healthcare workers want me to be healthy and well	health_worker_want_to_help
I think my government’s department of health workers want me to be healthy and well	gov_want_to_help
It is important to me that I protect myself from the effects of COVID	important_to_protect_myself
It is important to me that I protect others from the effects of COVID	important_to_protect_other
Getting vaccinated is a moral issue	vax_moral
I worry about short-term side effects of the COVID vaccine	worry_short_term_side_effect
I worry about long-term side effects of the COVID vaccine	worry_long_term_side_effect
COVID has killed millions of people worldwide	covid_is_real
I will probably be exposed to someone with COVID over the next year	probably_exposed
I am very afraid of needles	afraid_needle
I am a deeply religious person	deeply_religious
It is important to me that I be a moral person	important_moral
It is important to me to feel like I “fit in” in with my group	fit_in_group_importance
It is important to me to feel like I am a responsible member of my community	responsible_importance

The second set of questions are demographic variables & vaccine related questions with encoding below

Covariates	Variable Name (Encoding if Any)
Vaccine Status	vax_status_retarget (1 if vaccinated with at least one dose 0 otherwise)
Likelihood to get vaccinated next year	vax_next_year (1 = very unlikely, 2 = somewhat unlikely, 3 = unsure, 4 = probably will, 5 = definitely will)
Whether they received other vaccines before	receive_other_vax (1 if yes 0 otherwise)
Age	age
Gender	gender_num (1 if female 0 otherwise)
Education	education_num (1 if < high school, 2 if high school, 3 if some college, 4 if 2-year degree, 5 if 4-year degree, 6 if graduate degree)
Religiosity	religiosity_num (1 if not very religious, 2 if somewhat religious, 3 if very religious)
Location	location_num (1 if rural, 2 if suburban, 3 if urban)
Ethnicity	black (1 if black or african 0 otherwise)
Religion	christian (1 if christian 0 otherwise)
Ever heard of herd immunity	herd_immunity (1 if yes 0 otherwise)

The numbers in the heatmap shown below are mean and standard error for each covariate filtered by max or min trust factors, respectively. The colors are standard deviation on a normalized distribution across all responses.

covariates <- c("covid_is_a_problem", "covid_is_problem_my_country", "vax_is_safe", "vax_prevent_sick_death", "need_vax_for_protection", "vax_safer_covid", "developer_want_to_help", "health_worker_want_to_help", "gov_want_to_help", "important_to_protect_myself", "important_to_protect_other", "vax_moral", "worry_short_term_side_effect", "worry_long_term_side_effect", "covid_is_real", "probably_exposed", "afraid_needle", "deeply_religious", "important_moral", "fit_in_group_importance", "responsible_importance", "vax_status_retarget", "vax_next_year", "receive_other_vax", "age", "gender_num", "education_num", "religiosity_num", "location_num", "black", "christian", "herd_immunity")
# max_covariates <- c(covariates, "min_trust_factor")
# min_covariates <- c(covariates, "max_trust_factor")
covariates_full <- c(covariates, paste0("max_trust_factor_", 1:5), paste0("min_trust_factor_", 1:5))

df_features <- df_elig[, covariates_full]
df_features$herd_immunity <- ifelse(df_features$herd_immunity == "yes", 1, 0)
df_features$receive_other_vax <- ifelse(df_features$receive_other_vax == "yes", 1, 0)
df_features <- df_features %>% mutate_if(is.factor, as.numeric)

feature_stats <- df_features %>%
  # filter(grepl("1", max_trust_factor)) %>%
  select(!ends_with("_trust_factor"))  %>%
  
  summarize_all(list(mean = function(x)mean(x, na.rm = T), 
                     sd = function(x)sd(x, na.rm = T))) %>%
  pivot_longer(cols = everything(),
               names_to = "feature",
               values_to = "value") %>%
  mutate(measure = paste("total", str_extract(feature, "mean|sd"), sep = "_"), 
         feature = str_remove(feature, "_mean|_sd")) %>%
  pivot_wider(names_from = "measure", 
              values_from = "value")

n_by_group_tbl <- df_features %>%
  select(starts_with("max_trust_factor") | starts_with("min_trust_factor")) %>%
  dplyr::summarize(across(everything(), sum))

2.1 Max Trust Factors

trust_factor_stats_max <- data.frame()
for(column in paste0("max_trust_factor_", 1:5)){
  summary <- df_features %>% 
    filter(df_features[[column]] == 1) %>%
    select(!starts_with("max_trust_factor"))  %>%
    summarise_all(list(sep_mean = ~ mean(., na.rm = T), 
                       sep_sd = ~ sd(., na.rm =T),
                       sep_n = ~ sum(., na.rm = T))) %>%
    pivot_longer(cols = everything(), names_to = c("name"))  %>%
    separate(name, into = c("feature", "value_type"), sep = "_sep_")%>%
    mutate(segment = column) %>%
    pivot_wider(names_from = "value_type", values_from = "value") %>%
    mutate(se = sd/sqrt(n)) %>%
    select(!sd & !n) 
    
  
 trust_factor_stats_max<- rbind(trust_factor_stats_max, summary)
}

trust_factor_stats_max <- trust_factor_stats_max %>%
  mutate_if(is.numeric, function(x)round(x, 3))

tbl_max <- left_join(trust_factor_stats_max, feature_stats ,by = "feature") %>%
  mutate(n = unlist(n_by_group_tbl[match(segment, names(n_by_group_tbl))])) %>%
  mutate(label = paste(mean, paste("(", se, ")", sep = ""), sep = "\n"), 
         color = (mean - total_mean)/total_sd, 
         color = case_when(color > 2 ~ 2, 
                           color < -2 ~ -2, 
                           is.nan(se) ~ NA_real_,
                           TRUE ~ color),
         group = case_when(segment == "max_trust_factor_1" ~ "Practitioners",
                           segment == "max_trust_factor_2" ~ "Scientists",
                           segment == "max_trust_factor_3" ~ "Local Figures",
                           segment == "max_trust_factor_4" ~ "Media",
                           segment == "max_trust_factor_5" ~ "Friends & Family",
                           TRUE ~ NA_character_
                           ), 
         segment = case_when(segment == "max_trust_factor_1" ~ "Practitioners",
                           segment == "max_trust_factor_2" ~ "Scientists",
                           segment == "max_trust_factor_3" ~ "Local Figures",
                           segment == "max_trust_factor_4" ~ "Media",
                           segment == "max_trust_factor_5" ~ "Friends & Family",
                           TRUE ~ NA_character_
                           ),
         feature = case_when(feature == "min_trust_factor_1" ~ "min_trust_factor_practitioners",
                           feature == "min_trust_factor_2" ~ "min_trust_factor_scientists",
                           feature == "min_trust_factor_3" ~ "min_trust_factor_local_figures",
                           feature == "min_trust_factor_4" ~ "min_trust_factor_media",
                           feature == "min_trust_factor_5" ~ "min_trust_factor_friends_family",
                           TRUE ~ feature
                           ),
         # feature_group = case_when(grepl("impediments_hr_", feature) ~ "Impediments", 
         #                           grepl("best_treat_", feature) ~ "Best Treatment", 
         #                           grepl("opinion_hr", feature) ~ "Opinion", 
         #                           TRUE ~ "Demo"), 
         # segment = str_to_title(str_replace_all(str_remove(segment, "_seg"), "_", "\n")), 
         # feature = str_to_title(str_replace_all(str_remove(feature, "impediments_hr_|opinion_hr_|best_treat_"), "_", " ")), 
         prop = round(n/nrow(df_features), 2)) %>%
  mutate(segment_label = paste(segment, paste("N: ", n, sep = ""), paste("Prop: ", prop, sep = ""), sep = "\n"))

ggplot(tbl_max) +
  geom_tile(aes(segment_label, feature, fill = color), color = "white", lwd = .5) +
  geom_text(aes(segment_label, feature, label = label), size = 4) +
  facet_grid(~ group,  scales = "free_x", space = "free_x", labeller = labeller(group = str_to_title)) +
  scale_fill_gradient2(
    low = "red",
    mid = "white",
    high = "green",
    midpoint = 0,
      limits = c(-2,2), 
      labels = c( "<-2", "-1", "0", "1", ">2")) +
  theme_minimal() +
  theme(legend.position = "bottom", 
        axis.text= element_text(size=12),
        axis.title.y = element_text(angle = 0)) +
  labs(title = "Max Trust Factors", 
       x = "Trust Factor", 
       y = "Feature",
       fill = "Std Deviation on Normalized Distribution")   +
  guides(fill = guide_colourbar(barwidth = 10, title.position = "top"))

2.2 Min Trust Factors

trust_factor_stats_min <- data.frame()
for(column in paste0("min_trust_factor_", 1:5)){
  summary <- df_features %>% 
    filter(df_features[[column]] == 1) %>%
    select(!starts_with("min_trust_factor"))  %>%
    summarise_all(list(sep_mean = ~ mean(., na.rm = T), 
                       sep_sd = ~ sd(., na.rm =T),
                       sep_n = ~ sum(., na.rm = T))) %>%
    pivot_longer(cols = everything(), names_to = c("name"))  %>%
    separate(name, into = c("feature", "value_type"), sep = "_sep_")%>%
    mutate(segment = column) %>%
    pivot_wider(names_from = "value_type", values_from = "value") %>%
    mutate(se = sd/sqrt(n)) %>%
    select(!sd & !n) 
    
  
 trust_factor_stats_min<- rbind(trust_factor_stats_min, summary)
}

trust_factor_stats_min <- trust_factor_stats_min %>%
  mutate_if(is.numeric, function(x)round(x, 3))

tbl_min <- left_join(trust_factor_stats_min, feature_stats ,by = "feature") %>%
  mutate(n = unlist(n_by_group_tbl[match(segment, names(n_by_group_tbl))])) %>%
  mutate(label = paste(mean, paste("(", se, ")", sep = ""), sep = "\n"), 
         color = (mean - total_mean)/total_sd, 
         color = case_when(color > 2 ~ 2, 
                           color < -2 ~ -2, 
                           is.nan(se) ~ NA_real_,
                           TRUE ~ color),
         group = case_when(segment == "min_trust_factor_1" ~ "Practitioners",
                           segment == "min_trust_factor_2" ~ "Scientists",
                           segment == "min_trust_factor_3" ~ "Local Figures",
                           segment == "min_trust_factor_4" ~ "Media",
                           segment == "min_trust_factor_5" ~ "Friends & Family",
                           TRUE ~ NA_character_
                           ), 
         segment = case_when(segment == "min_trust_factor_1" ~ "Practitioners",
                           segment == "min_trust_factor_2" ~ "Scientists",
                           segment == "min_trust_factor_3" ~ "Local Figures",
                           segment == "min_trust_factor_4" ~ "Media",
                           segment == "min_trust_factor_5" ~ "Friends & Family",
                           TRUE ~ NA_character_
                           ), 
         feature = case_when(feature == "max_trust_factor_1" ~ "max_trust_factor_practitioners",
                   feature == "max_trust_factor_2" ~ "max_trust_factor_scientists",
                   feature == "max_trust_factor_3" ~ "max_trust_factor_local_figures",
                   feature == "max_trust_factor_4" ~ "max_trust_factor_media",
                   feature == "max_trust_factor_5" ~ "max_trust_factor_friends_family",
                   TRUE ~ feature
                   ),
         # feature_group = case_when(grepl("impediments_hr_", feature) ~ "Impediments", 
         #                           grepl("best_treat_", feature) ~ "Best Treatment", 
         #                           grepl("opinion_hr", feature) ~ "Opinion", 
         #                           TRUE ~ "Demo"), 
         # segment = str_to_title(str_replace_all(str_remove(segment, "_seg"), "_", "\n")), 
         # feature = str_to_title(str_replace_all(str_remove(feature, "impediments_hr_|opinion_hr_|best_treat_"), "_", " ")), 
         prop = round(n/nrow(df_features), 2)) %>%
  mutate(segment_label = paste(segment, paste("N: ", n, sep = ""), paste("Prop: ", prop, sep = ""), sep = "\n"))

ggplot(tbl_min) +
  geom_tile(aes(segment_label, feature, fill = color), color = "white", lwd = .5) +
  geom_text(aes(segment_label, feature, label = label), size = 4) +
  facet_grid(~ group,  scales = "free_x", space = "free_x", labeller = labeller(group = str_to_title)) +
  scale_fill_gradient2(
    low = "red",
    mid = "white",
    high = "green",
    midpoint = 0,
      limits = c(-2,2), 
      labels = c( "<-2", "-1", "0", "1", ">2")) +
  theme_minimal() +
  theme(legend.position = "bottom", 
        axis.text= element_text(size=12),
        axis.title.y = element_text(angle = 0)) +
  labs(title = "Min Trust Factors", 
       x = "Trust Factor", 
       y = "Feature",
       fill = "Std Deviation on Normalized Distribution")   +
  guides(fill = guide_colourbar(barwidth = 10, title.position = "top"))

Trust Factor Analysis

Updated: January 25, 2023

1 Trust Factors

2 Covariate Heatmaps

2.1 Max Trust Factors

2.2 Min Trust Factors