Data Cleaning
df =
df %>%
mutate(
vax_status_retarget = case_when(
num_dose == "0" ~ 0,
num_dose %in% c("1", "2", "3") ~ 1
),
vax_status_retarget_4 = case_when(
num_dose == "0" ~ 0,
num_dose == "1" ~ 1,
num_dose == "2" ~ 2,
num_dose == "3" ~ 3
)
)
# df$vax_status_retarget = factor(df$vax_status_retarget, levels = c(0, 1))
# df$vax_status_retarget_4 = factor(df$vax_status_retarget_4, levels = c(0,1,2,3))
df$country_answer[!df$country_answer %in% c("South Africa","Kenya","Nigeria","Ghana")] = NA
df$country_answer = factor(df$country_answer, levels = c("South Africa","Kenya","Nigeria","Ghana"))
df = df %>% filter(!is.na(country_answer))
df$doctor[!df$doctor %in% c("1","2","3","4","5")] = NA
df$doctor = strtoi(df$doctor)
df$family[!df$family %in% c("1","2","3","4","5")] = NA
df$family = strtoi(df$family)
df$friends[!df$friends %in% c("1","2","3","4","5")] = NA
df$friends = strtoi(df$friends)
df$minister_religious_leader[!df$minister_religious_leader %in% c("1","2","3","4","5")] = NA
df$minister_religious_leader = strtoi(df$minister_religious_leader)
df$community_leader[!df$community_leader %in% c("1","2","3","4","5")] = NA
df$community_leader = strtoi(df$community_leader)
df$clinic_worker[!df$clinic_worker %in% c("1","2","3","4","5")] = NA
df$clinic_worker = strtoi(df$clinic_worker)
df$community_based_org[!df$community_based_org %in% c("1","2","3","4","5")] = NA
df$community_based_org = strtoi(df$community_based_org)
df$health_department[!df$health_department %in% c("1","2","3","4","5")] = NA
df$health_department = strtoi(df$health_department)
df$who[!df$who %in% c("1","2","3","4","5")] = NA
df$who = strtoi(df$who)
df$african_scientist[!df$african_scientist %in% c("1","2","3","4","5")] = NA
df$african_scientist = strtoi(df$african_scientist)
df$international_scientist[!df$international_scientist %in% c("1","2","3","4","5")] = NA
df$international_scientist = strtoi(df$international_scientist)
df$international_ngo[!df$international_ngo %in% c("1","2","3","4","5")] = NA
df$international_ngo = strtoi(df$international_ngo)
df$local_news_media[!df$local_news_media %in% c("1","2","3","4","5")] = NA
df$local_news_media = strtoi(df$local_news_media)
df$international_news_media[!df$international_news_media %in% c("1","2","3","4","5")] = NA
df$international_news_media = strtoi(df$international_news_media)
df$covid_is_a_problem[!df$covid_is_a_problem %in% c("1","2","3","4","5")] = NA
df$covid_is_a_problem = strtoi(df$covid_is_a_problem)
df$covid_is_problem_my_country[!df$covid_is_problem_my_country %in% c("1","2","3","4","5")] = NA
df$covid_is_problem_my_country = strtoi(df$covid_is_problem_my_country)
df$vax_is_safe[!df$vax_is_safe %in% c("1","2","3","4","5")] = NA
df$vax_is_safe = strtoi(df$vax_is_safe)
df$vax_prevent_sick_death[!df$vax_prevent_sick_death %in% c("1","2","3","4","5")] = NA
df$vax_prevent_sick_death = strtoi(df$vax_prevent_sick_death)
df$need_vax_for_protection[!df$need_vax_for_protection %in% c("1","2","3","4","5")] = NA
df$need_vax_for_protection = strtoi(df$need_vax_for_protection)
df$vax_safer_covid[!df$vax_safer_covid %in% c("1","2","3","4","5")] = NA
df$vax_safer_covid = strtoi(df$vax_safer_covid)
df$developer_want_to_help[!df$developer_want_to_help %in% c("1","2","3","4","5")] = NA
df$developer_want_to_help = strtoi(df$developer_want_to_help)
df$health_worker_want_to_help[!df$health_worker_want_to_help %in% c("1","2","3","4","5")] = NA
df$health_worker_want_to_help = strtoi(df$health_worker_want_to_help)
df$gov_want_to_help[!df$gov_want_to_help %in% c("1","2","3","4","5")] = NA
df$gov_want_to_help = strtoi(df$gov_want_to_help)
df$important_to_protect_myself[!df$important_to_protect_myself %in% c("1","2","3","4","5")] = NA
df$important_to_protect_myself = strtoi(df$important_to_protect_myself)
df$important_to_protect_other[!df$important_to_protect_other %in% c("1","2","3","4","5")] = NA
df$important_to_protect_other = strtoi(df$important_to_protect_other)
df$vax_moral[!df$vax_moral %in% c("1","2","3","4","5")] = NA
df$vax_moral = strtoi(df$vax_moral)
df$worry_short_term_side_effect[!df$worry_short_term_side_effect %in% c("1","2","3","4","5")] = NA
df$worry_short_term_side_effect = strtoi(df$worry_short_term_side_effect)
df$worry_long_term_side_effect[!df$worry_long_term_side_effect %in% c("1","2","3","4","5")] = NA
df$worry_long_term_side_effect = strtoi(df$worry_long_term_side_effect)
df$covid_is_real[!df$covid_is_real %in% c("1","2","3","4","5")] = NA
df$covid_is_real = strtoi(df$covid_is_real)
df$probably_exposed[!df$probably_exposed %in% c("1","2","3","4","5")] = NA
df$probably_exposed = strtoi(df$probably_exposed)
df$afraid_needle[!df$afraid_needle %in% c("1","2","3","4","5")] = NA
df$afraid_needle = strtoi(df$afraid_needle)
df$deeply_religious[!df$deeply_religious %in% c("1","2","3","4","5")] = NA
df$deeply_religious = strtoi(df$deeply_religious)
df$important_moral[!df$important_moral %in% c("1","2","3","4","5")] = NA
df$important_moral = strtoi(df$important_moral)
df$fit_in_group_importance[!df$fit_in_group_importance %in% c("1","2","3","4","5")] = NA
df$fit_in_group_importance = strtoi(df$fit_in_group_importance)
df$responsible_importance[!df$responsible_importance %in% c("1","2","3","4","5")] = NA
df$responsible_importance = strtoi(df$responsible_importance)
df$num_dose[!df$num_dose %in% c("0", "1","2","3")] = NA
df$num_dose = strtoi(df$num_dose)
df$num_dose = as.factor(df$num_dose)
df$vax_next_year[!df$vax_next_year %in% c("1","2","3","4","5")] = NA
df$vax_next_year = strtoi(df$vax_next_year)
df$vax_next_year = as.factor(df$vax_next_year)
df$receive_other_vax[!df$receive_other_vax %in% c("yes","no")] = NA
df$receive_other_vax = as.factor(df$receive_other_vax)
df$time_to_get_vax[!df$time_to_get_vax %in% c("Less than 20 min","20-60 min", "1-2 hrs", "2+ hrs")] = NA
df$time_to_get_vax = factor(df$time_to_get_vax, levels=c("Less than 20 min","20-60 min", "1-2 hrs", "2+ hrs"))
df$wait_time[!df$wait_time %in% c("Less than 20 min","20-60 min", "1-2 hrs", "2+ hrs")] = NA
df$wait_time = factor(df$wait_time, levels=c("Less than 20 min","20-60 min", "1-2 hrs", "2+ hrs"))
df$transportation_vax[!df$transportation_vax %in% c("walking","bicyle", "bus", "car", "train", "other", "Don't know")] = NA
df$transportation_vax = factor(df$transportation_vax, levels=c("walking","bicyle", "bus", "car", "train", "other", "Don't know"))
df$need_appointment[!df$need_appointment %in% c("yes","no")] = NA
df$need_appointment = factor(df$need_appointment, levels=c("yes","no"))
df$vax_cost[!df$vax_cost %in% c("yes","no")] = NA
df$vax_cost = factor(df$vax_cost, levels=c("yes","no"))
df$go_back_another_dose[!df$go_back_another_dose %in% c("yes","no")] = NA
df$go_back_another_dose = factor(df$go_back_another_dose, levels=c("yes","no"))
df$prefer_day[!df$prefer_day %in% c("Su","M", "Tu", "W", "Th", "F", "S")] = NA
df$prefer_day = factor(df$prefer_day, levels=c("Su","M", "Tu", "W", "Th", "F", "S"))
numeric_cols <- function(df){
out <- df %>%
mutate(
age = as.numeric(ifelse(demog_age %in% paste(18:99), demog_age, "")),
gender_num = case_when(
gender == "female" ~ 1,
gender == "male" ~ 0
),
education_num = case_when(
demog_education == "< High school" ~ 1,
demog_education == "High school" ~ 2,
demog_education == "Some college" ~ 3,
demog_education == "2-year degree" ~ 4,
demog_education == "4-year degree" ~ 5,
demog_education == "Graduate degree" ~ 6,
),
religiosity_num = case_when(
religiosity == "Not very religious" ~ 1,
religiosity == "Somewhat religious" ~ 2,
religiosity == "Very religious" ~ 3,
),
location_num = case_when(
location == "Rural" ~ 1,
location == "Suburban" ~ 2,
location == "Urban" ~ 3,
),
location =factor(location, levels = c("Urban", "Suburban", "Rural", "Prefer not to say")),
black = ifelse(ethnicity == "Black or African", 1, 0),
ethnicity = factor(ethnicity, levels=c("Asian or Indian",
"Black or African",
"coloured",
"White or Caucasian",
"Other",
"Prefer not to say")),
christian = ifelse(demog_religion == "Christianity", 1, 0)
)
return(out)
}
df = numeric_cols(df)
df$vax_status_retarget = as.numeric(df$vax_status_retarget)
df$vax_status_retarget_4 = as.numeric(df$vax_status_retarget_4)
df$ethnicity[!df$ethnicity %in% c("Black or African","White or Caucasian", "Asian or Indian", "Other", "Prefer not to say")] = NA
df$ethnicity = factor(df$ethnicity, levels=c("Black or African","White or Caucasian", "Asian or Indian", "Other", "Prefer not to say"))
df$demog_religion[!df$demog_religion %in% c("Christianity","Islam", "African traditional", "Hinduism", "Judaism", "No religion", "Other", "Prefer not to say")] = NA
df$demog_religion = factor(df$demog_religion, levels=c("Christianity","Islam", "African traditional", "Hinduism", "Judaism", "No religion", "Other", "Prefer not to say"))
# did not include income
df$herd_immunity[!df$herd_immunity %in% c("yes","no")] = NA
df$herd_immunity = factor(df$herd_immunity, levels=c("yes","no"))We obtain five trust factors from the factor analysis (see screenshot below) based on how much the participants trust the respective categories on giving good information on vaccine. We roughly define the factors as the following categories.
For each participant, we calculate a trust factor score by averaging the responses they give to the individual attributes within each trust factor. Note that we asked the trust question on a 1 - 5 scale, which allows us to take the average without having to do any other transformation. 1 being trust very little, and 5 being trust a lot.
We then determine which factor participants trust the most and which factor participants trust the least. If a participant has more than two factors s/he trust the most or least, we discard that participants as it is likely to be noise. Otherwise, we record the max/min factor(s) [i.e. up to 2 for max/min] for each participant.
Create Trust Factors Score
trust_factor_1 <- c("health_department", "who", "clinic_worker", "doctor")
trust_factor_2 <- c("international_scientist", "international_ngo", "african_scientist")
trust_factor_3 <- c("community_leader", "minister_religious_leader", "community_based_org")
trust_factor_4 <- c("local_news_media", "international_news_media")
trust_factor_5 <- c("family", "friends")
df$tf1_score <- rowMeans(df[trust_factor_1], na.rm = TRUE)
df$tf2_score <- rowMeans(df[trust_factor_2], na.rm = TRUE)
df$tf3_score <- rowMeans(df[trust_factor_3], na.rm = TRUE)
df$tf4_score <- rowMeans(df[trust_factor_4], na.rm = TRUE)
df$tf5_score <- rowMeans(df[trust_factor_5], na.rm = TRUE)
tf_scores <- paste0("tf", 1:5, "_score")Calculate Min and Max Trust Factors
paste0("Original number of participants: ", nrow(df))## [1] "Original number of participants: 1081"
max_factor <- sapply(1:nrow(df), function(x) which(df[x, tf_scores] == max(df[x, tf_scores])))
max_elig <- sapply(1:nrow(df), function(x) length(max_factor[[x]]) <= 2)
df_new <- df[max_elig,]
df_new$max_trust_factor <- unlist(lapply(max_factor[max_elig], function(x) paste(x, collapse = ", ")))
paste0("Dropped ", nrow(df) - nrow(df_new), " participants due to more than two MAX trust factors.")## [1] "Dropped 116 participants due to more than two MAX trust factors."
min_factor <- sapply(1:nrow(df_new), function(x) which(df_new[x, tf_scores] == min(df_new[x, tf_scores])))
min_elig <- sapply(1:nrow(df_new), function(x) length(min_factor[[x]]) <= 2)
df_elig <- df_new[min_elig,]
df_elig$min_trust_factor <- unlist(lapply(min_factor[min_elig], function(x) paste(x, collapse = ", ")))
paste0("Dropped ", nrow(df_new) - nrow(df_elig), " participants due to more than two MIN trust factors.")## [1] "Dropped 68 participants due to more than two MIN trust factors."
paste0("Remaining number of participants: ", nrow(df_elig))## [1] "Remaining number of participants: 897"
resp.split <- strsplit(df_elig$max_trust_factor, ", ")
resp.dummy <- lapply(resp.split, function(x) table(factor(x, levels=c("1", "2", "3", "4", "5"))))
max_factors <- data.frame(do.call(rbind, resp.dummy))
colnames(max_factors) <- paste0("max_trust_factor_", 1:5)
resp.split <- strsplit(df_elig$min_trust_factor, ", ")
resp.dummy <- lapply(resp.split, function(x) table(factor(x, levels=c("1", "2", "3", "4", "5"))))
min_factors <- data.frame(do.call(rbind, resp.dummy))
colnames(min_factors) <- paste0("min_trust_factor_", 1:5)
df_elig <- cbind(df_elig, max_factors, min_factors)We construct two covariate heatmaps. One based on the participants’ max trust factor and one based on the participants’ min trust factor.
Here are the covariates we choose as of now. We also include min trust factor when we look at max trust factor heatmap and vice versa to get a sense of whether a trust in certain factor could lead to low trust in other factor.
The first set of questions are statements where we ask the participants to rate from 1 to 5. 1 = I Disagree Strongly, 2 = I Disagree, 3 = I’m Unsure, 4 = I Agree 5 = I Agree Strongly
| Question Text | Variable Name |
|---|---|
| People are still dying from COVID. | covid_is_a_problem |
| COVID is a problem in [my country] | covid_is_problem_my_country |
| I think the COVID vaccines are safe | vax_is_safe |
| COVID vaccines help prevent serious sickness and death | vax_prevent_sick_death |
| You need a vaccine for protection from sickness (not because you are sick) | need_vax_for_protection |
| Getting the vaccine is much safer than getting COVID | vax_safer_covid |
| I think the people who developed the vaccine wanted to help people | developer_want_to_help |
| I think my local healthcare workers want me to be healthy and well | health_worker_want_to_help |
| I think my government’s department of health workers want me to be healthy and well | gov_want_to_help |
| It is important to me that I protect myself from the effects of COVID | important_to_protect_myself |
| It is important to me that I protect others from the effects of COVID | important_to_protect_other |
| Getting vaccinated is a moral issue | vax_moral |
| I worry about short-term side effects of the COVID vaccine | worry_short_term_side_effect |
| I worry about long-term side effects of the COVID vaccine | worry_long_term_side_effect |
| COVID has killed millions of people worldwide | covid_is_real |
| I will probably be exposed to someone with COVID over the next year | probably_exposed |
| I am very afraid of needles | afraid_needle |
| I am a deeply religious person | deeply_religious |
| It is important to me that I be a moral person | important_moral |
| It is important to me to feel like I “fit in” in with my group | fit_in_group_importance |
| It is important to me to feel like I am a responsible member of my community | responsible_importance |
The second set of questions are demographic variables & vaccine related questions with encoding below
| Covariates | Variable Name (Encoding if Any) |
|---|---|
| Vaccine Status | vax_status_retarget (1 if vaccinated with at least one dose 0 otherwise) |
| Likelihood to get vaccinated next year | vax_next_year (1 = very unlikely, 2 = somewhat unlikely, 3 = unsure, 4 = probably will, 5 = definitely will) |
| Whether they received other vaccines before | receive_other_vax (1 if yes 0 otherwise) |
| Age | age |
| Gender | gender_num (1 if female 0 otherwise) |
| Education | education_num (1 if < high school, 2 if high school, 3 if some college, 4 if 2-year degree, 5 if 4-year degree, 6 if graduate degree) |
| Religiosity | religiosity_num (1 if not very religious, 2 if somewhat religious, 3 if very religious) |
| Location | location_num (1 if rural, 2 if suburban, 3 if urban) |
| Ethnicity | black (1 if black or african 0 otherwise) |
| Religion | christian (1 if christian 0 otherwise) |
| Ever heard of herd immunity | herd_immunity (1 if yes 0 otherwise) |
The numbers in the heatmap shown below are mean and standard error for each covariate filtered by max or min trust factors, respectively. The colors are standard deviation on a normalized distribution across all responses.
covariates <- c("covid_is_a_problem", "covid_is_problem_my_country", "vax_is_safe", "vax_prevent_sick_death", "need_vax_for_protection", "vax_safer_covid", "developer_want_to_help", "health_worker_want_to_help", "gov_want_to_help", "important_to_protect_myself", "important_to_protect_other", "vax_moral", "worry_short_term_side_effect", "worry_long_term_side_effect", "covid_is_real", "probably_exposed", "afraid_needle", "deeply_religious", "important_moral", "fit_in_group_importance", "responsible_importance", "vax_status_retarget", "vax_next_year", "receive_other_vax", "age", "gender_num", "education_num", "religiosity_num", "location_num", "black", "christian", "herd_immunity")
# max_covariates <- c(covariates, "min_trust_factor")
# min_covariates <- c(covariates, "max_trust_factor")
covariates_full <- c(covariates, paste0("max_trust_factor_", 1:5), paste0("min_trust_factor_", 1:5))df_features <- df_elig[, covariates_full]
df_features$herd_immunity <- ifelse(df_features$herd_immunity == "yes", 1, 0)
df_features$receive_other_vax <- ifelse(df_features$receive_other_vax == "yes", 1, 0)
df_features <- df_features %>% mutate_if(is.factor, as.numeric)feature_stats <- df_features %>%
# filter(grepl("1", max_trust_factor)) %>%
select(!ends_with("_trust_factor")) %>%
summarize_all(list(mean = function(x)mean(x, na.rm = T),
sd = function(x)sd(x, na.rm = T))) %>%
pivot_longer(cols = everything(),
names_to = "feature",
values_to = "value") %>%
mutate(measure = paste("total", str_extract(feature, "mean|sd"), sep = "_"),
feature = str_remove(feature, "_mean|_sd")) %>%
pivot_wider(names_from = "measure",
values_from = "value")n_by_group_tbl <- df_features %>%
select(starts_with("max_trust_factor") | starts_with("min_trust_factor")) %>%
dplyr::summarize(across(everything(), sum))trust_factor_stats_max <- data.frame()
for(column in paste0("max_trust_factor_", 1:5)){
summary <- df_features %>%
filter(df_features[[column]] == 1) %>%
select(!starts_with("max_trust_factor")) %>%
summarise_all(list(sep_mean = ~ mean(., na.rm = T),
sep_sd = ~ sd(., na.rm =T),
sep_n = ~ sum(., na.rm = T))) %>%
pivot_longer(cols = everything(), names_to = c("name")) %>%
separate(name, into = c("feature", "value_type"), sep = "_sep_")%>%
mutate(segment = column) %>%
pivot_wider(names_from = "value_type", values_from = "value") %>%
mutate(se = sd/sqrt(n)) %>%
select(!sd & !n)
trust_factor_stats_max<- rbind(trust_factor_stats_max, summary)
}
trust_factor_stats_max <- trust_factor_stats_max %>%
mutate_if(is.numeric, function(x)round(x, 3)) tbl_max <- left_join(trust_factor_stats_max, feature_stats ,by = "feature") %>%
mutate(n = unlist(n_by_group_tbl[match(segment, names(n_by_group_tbl))])) %>%
mutate(label = paste(mean, paste("(", se, ")", sep = ""), sep = "\n"),
color = (mean - total_mean)/total_sd,
color = case_when(color > 2 ~ 2,
color < -2 ~ -2,
is.nan(se) ~ NA_real_,
TRUE ~ color),
group = case_when(segment == "max_trust_factor_1" ~ "Practitioners",
segment == "max_trust_factor_2" ~ "Scientists",
segment == "max_trust_factor_3" ~ "Local Figures",
segment == "max_trust_factor_4" ~ "Media",
segment == "max_trust_factor_5" ~ "Friends & Family",
TRUE ~ NA_character_
),
segment = case_when(segment == "max_trust_factor_1" ~ "Practitioners",
segment == "max_trust_factor_2" ~ "Scientists",
segment == "max_trust_factor_3" ~ "Local Figures",
segment == "max_trust_factor_4" ~ "Media",
segment == "max_trust_factor_5" ~ "Friends & Family",
TRUE ~ NA_character_
),
feature = case_when(feature == "min_trust_factor_1" ~ "min_trust_factor_practitioners",
feature == "min_trust_factor_2" ~ "min_trust_factor_scientists",
feature == "min_trust_factor_3" ~ "min_trust_factor_local_figures",
feature == "min_trust_factor_4" ~ "min_trust_factor_media",
feature == "min_trust_factor_5" ~ "min_trust_factor_friends_family",
TRUE ~ feature
),
# feature_group = case_when(grepl("impediments_hr_", feature) ~ "Impediments",
# grepl("best_treat_", feature) ~ "Best Treatment",
# grepl("opinion_hr", feature) ~ "Opinion",
# TRUE ~ "Demo"),
# segment = str_to_title(str_replace_all(str_remove(segment, "_seg"), "_", "\n")),
# feature = str_to_title(str_replace_all(str_remove(feature, "impediments_hr_|opinion_hr_|best_treat_"), "_", " ")),
prop = round(n/nrow(df_features), 2)) %>%
mutate(segment_label = paste(segment, paste("N: ", n, sep = ""), paste("Prop: ", prop, sep = ""), sep = "\n"))ggplot(tbl_max) +
geom_tile(aes(segment_label, feature, fill = color), color = "white", lwd = .5) +
geom_text(aes(segment_label, feature, label = label), size = 4) +
facet_grid(~ group, scales = "free_x", space = "free_x", labeller = labeller(group = str_to_title)) +
scale_fill_gradient2(
low = "red",
mid = "white",
high = "green",
midpoint = 0,
limits = c(-2,2),
labels = c( "<-2", "-1", "0", "1", ">2")) +
theme_minimal() +
theme(legend.position = "bottom",
axis.text= element_text(size=12),
axis.title.y = element_text(angle = 0)) +
labs(title = "Max Trust Factors",
x = "Trust Factor",
y = "Feature",
fill = "Std Deviation on Normalized Distribution") +
guides(fill = guide_colourbar(barwidth = 10, title.position = "top"))trust_factor_stats_min <- data.frame()
for(column in paste0("min_trust_factor_", 1:5)){
summary <- df_features %>%
filter(df_features[[column]] == 1) %>%
select(!starts_with("min_trust_factor")) %>%
summarise_all(list(sep_mean = ~ mean(., na.rm = T),
sep_sd = ~ sd(., na.rm =T),
sep_n = ~ sum(., na.rm = T))) %>%
pivot_longer(cols = everything(), names_to = c("name")) %>%
separate(name, into = c("feature", "value_type"), sep = "_sep_")%>%
mutate(segment = column) %>%
pivot_wider(names_from = "value_type", values_from = "value") %>%
mutate(se = sd/sqrt(n)) %>%
select(!sd & !n)
trust_factor_stats_min<- rbind(trust_factor_stats_min, summary)
}
trust_factor_stats_min <- trust_factor_stats_min %>%
mutate_if(is.numeric, function(x)round(x, 3)) tbl_min <- left_join(trust_factor_stats_min, feature_stats ,by = "feature") %>%
mutate(n = unlist(n_by_group_tbl[match(segment, names(n_by_group_tbl))])) %>%
mutate(label = paste(mean, paste("(", se, ")", sep = ""), sep = "\n"),
color = (mean - total_mean)/total_sd,
color = case_when(color > 2 ~ 2,
color < -2 ~ -2,
is.nan(se) ~ NA_real_,
TRUE ~ color),
group = case_when(segment == "min_trust_factor_1" ~ "Practitioners",
segment == "min_trust_factor_2" ~ "Scientists",
segment == "min_trust_factor_3" ~ "Local Figures",
segment == "min_trust_factor_4" ~ "Media",
segment == "min_trust_factor_5" ~ "Friends & Family",
TRUE ~ NA_character_
),
segment = case_when(segment == "min_trust_factor_1" ~ "Practitioners",
segment == "min_trust_factor_2" ~ "Scientists",
segment == "min_trust_factor_3" ~ "Local Figures",
segment == "min_trust_factor_4" ~ "Media",
segment == "min_trust_factor_5" ~ "Friends & Family",
TRUE ~ NA_character_
),
feature = case_when(feature == "max_trust_factor_1" ~ "max_trust_factor_practitioners",
feature == "max_trust_factor_2" ~ "max_trust_factor_scientists",
feature == "max_trust_factor_3" ~ "max_trust_factor_local_figures",
feature == "max_trust_factor_4" ~ "max_trust_factor_media",
feature == "max_trust_factor_5" ~ "max_trust_factor_friends_family",
TRUE ~ feature
),
# feature_group = case_when(grepl("impediments_hr_", feature) ~ "Impediments",
# grepl("best_treat_", feature) ~ "Best Treatment",
# grepl("opinion_hr", feature) ~ "Opinion",
# TRUE ~ "Demo"),
# segment = str_to_title(str_replace_all(str_remove(segment, "_seg"), "_", "\n")),
# feature = str_to_title(str_replace_all(str_remove(feature, "impediments_hr_|opinion_hr_|best_treat_"), "_", " ")),
prop = round(n/nrow(df_features), 2)) %>%
mutate(segment_label = paste(segment, paste("N: ", n, sep = ""), paste("Prop: ", prop, sep = ""), sep = "\n"))ggplot(tbl_min) +
geom_tile(aes(segment_label, feature, fill = color), color = "white", lwd = .5) +
geom_text(aes(segment_label, feature, label = label), size = 4) +
facet_grid(~ group, scales = "free_x", space = "free_x", labeller = labeller(group = str_to_title)) +
scale_fill_gradient2(
low = "red",
mid = "white",
high = "green",
midpoint = 0,
limits = c(-2,2),
labels = c( "<-2", "-1", "0", "1", ">2")) +
theme_minimal() +
theme(legend.position = "bottom",
axis.text= element_text(size=12),
axis.title.y = element_text(angle = 0)) +
labs(title = "Min Trust Factors",
x = "Trust Factor",
y = "Feature",
fill = "Std Deviation on Normalized Distribution") +
guides(fill = guide_colourbar(barwidth = 10, title.position = "top"))