If you’d like the source data, reach out to the BHAR office or DM me on Twitter
HRTC %>%
mutate(Replied = as.character(Replied)) %>%
mutate(
Replied = recode(Replied, "0"="No"),
Replied = recode(Replied, "1"="Yes")
) %>%
count(campg.day, Type, Replied) %>%
spread(Replied, n) %>%
group_by(Type) %>%
summarise(Yes = sum(Yes, na.rm=T), No=sum(No, na.rm=T)) %>%
mutate(
`Reply %` = Yes/(Yes+No),
n = Yes + No
) %>%
rename(Style=Type) %>%
ggplot(aes(x=Style, y=`Reply %`, fill=Style)) +
geom_col(color="black") +
geom_label(aes(y=0, label=percent(`Reply %`), color=Style), fill="white", vjust=0) +
scale_fill_manual(values=color_guide, guide=F) +
scale_color_manual(values=color_guide, guide=F) +
theme_pubclean() + theme(
plot.subtitle = element_text(face="italic", color="#A1A1A1", size=rel(0.9)),
plot.caption = element_text(face="bold", color="#2d6987", size=rel(1))
) +
labs(x="", y="Responded (%)", caption = "Fig xyz",
title="Percent of participants who responded by\nmessage style") +
scale_y_continuous(labels=percent)
df <- HRTC %>%
filter(userResponses>0) %>%
# filter(Type!="Fact", Type!="Hotline") %>%
mutate(Category = as.factor(Category)) %>%
mutate(Replied = ifelse(Replied==1, "Yes", "No")) %>%
mutate(Replied = factor(Replied, levels=c("No","Yes")))
mod <- glm(Replied ~ Type + grade + race + gender + userResponses + campg.num,
data=df, family=binomial(link = "logit"))
# HRTC %>% group_by(Type, BaseMsg, ReplyMsg) %>%
# summarise(
# Received = n(),
# Replied = sum(Replied, na.rm = T),
# Replied_percent = round(100*sum(Replied, na.rm = T)/n())
# ) %>%
# arrange(Type, Received) %>%
# readr::write_excel_csv("~/Downloads/Message Types.csv")
# Save df with B (estimate) & CIs (and their exp values)
x <- cbind(broom::tidy(mod), broom::confint_tidy(mod)) %>%
rename(B=estimate, LL=conf.low, UL=conf.high) %>%
mutate(expB=exp(B), expLL=exp(LL), expUL=exp(UL)) %>%
mutate(
VarCat = "",
VarCat = ifelse(str_detect(term, "Type"), "Style", VarCat),
term = str_replace(term, "Type", ""),
VarCat = ifelse(str_detect(term, "gender"), "Gender", VarCat),
term = str_replace(term, "gender", ""),
VarCat = ifelse(str_detect(term, "race"), "Race", VarCat),
term = str_replace(term, "race", ""),
VarCat = ifelse(str_detect(term, "grade"), "Grade", VarCat),
term = str_replace(term, "grade", ""),
term = ifelse(VarCat=="Grade", str_c(term, "th"), term),
term = ifelse(term=="Other", str_c(term, VarCat, sep=" "), term)
) %>%
dplyr::select(term, VarCat, B, LL, UL, expB, expLL, expUL, everything())
x %>% filter(VarCat!="") %>%
ggplot(aes(x=term, y=B, color=VarCat)) +
geom_hline(yintercept = 0, linetype = 2) +
geom_point() +
geom_errorbar(aes(ymin=LL, ymax=UL), width=.2) +
coord_flip() + guides(color=F) + theme_bw() +
labs(x="", y="Log-Odds Ratio of responding") +
facet_wrap("VarCat", scales="free")
x %>% filter(VarCat!="") %>%
filter(VarCat=="Style") %>%
ggplot(aes(x=term, y=expB, color=term)) +
geom_hline(yintercept = 1, linetype = 1, alpha=1, size=0.75) +
geom_point(size=2) +
geom_errorbar(aes(ymin=expLL, ymax=expUL), width=.2, size=1) +
geom_label_repel(aes(label=round(expB,2)), nudge_x=-0.75, alpha=0.75,
arrow = arrow(length = unit(0.02, "npc")), color="black") +
scale_color_manual(values=color_guide) +
scale_y_continuous(breaks=c(1,50,100, 150), minor_breaks = c(0,25,75,125)) +
coord_flip() + guides(color=F) + theme_pubclean(flip=T) +
theme(panel.grid.minor.x = element_line(linetype = "dotted",
color = "grey"),
plot.caption = element_text(face="bold", color="#2d6987", size=rel(1)),
plot.subtitle = element_text(face="italic", color="#A1A1A1", size=rel(0.9))) +
labs(x="", y="Odds ratio of responding", title="Odds of responding to message by message style",
subtitle='Reference = "Fact" message style', caption = 'Fig xyz')
x %>% filter(VarCat!="") %>%
filter(VarCat=="Style") %>%
filter(term!="Hotline") %>% # only look at bidirectional messages
ggplot(aes(x=term, y=expB, color=term)) +
geom_hline(yintercept = 1, linetype = 1, alpha=1, size=0.75) +
geom_point(size=2) +
geom_errorbar(aes(ymin=expLL, ymax=expUL), width=.2, size=1) +
geom_label_repel(aes(label=round(expB,2)), nudge_x=-0.4, alpha=0.75,
arrow = arrow(length = unit(0.02, "npc")), color="black") +
scale_color_manual(values=color_guide) +
scale_y_continuous(breaks=c(1,50,100, 150), minor_breaks = c(25,75,125)) +
coord_flip() + guides(color=F) + theme_pubclean(flip=T) +
theme(panel.grid.minor.x = element_line(linetype = "dotted",
color = "grey"),
plot.caption = element_text(face="bold", color="#2d6987", size=rel(1)),
plot.subtitle = element_text(face="italic", color="#A1A1A1", size=rel(0.9))) +
labs(x="", y="Odds ratio of responding", title="Odds of responding to bidirectional message styles",
subtitle='Reference = unilateral styles (Fact, Hotline)', caption = 'Fig xyz')
df <- HRTC %>%
filter(userResponses>0) %>%
# mutate(Type = ifelse(campg.day==26, "Hotline", Type)) %>% # Change the one hotline question from MORE to hotline
mutate(Category = as.factor(Category)) %>%
mutate(Replied = ifelse(Replied==1, "Yes", "No")) %>%
mutate(Replied = factor(Replied, levels=c("No","Yes")))
# Use same model as above, but use `campg.day` instead of `campg.num`
mod <- glm(Replied ~ Type + grade + race + gender + userResponses + campg.day,
data=df, family=binomial(link = "logit"))
## make comparison data....
jtoolResults <- jtools::make_predictions(mod, pred = "campg.day", return.orig.data=T)
predictions <- jtoolResults$predictions
actualData <- jtoolResults$data %>%
count(campg.day, Type, Replied) %>%
spread(Replied, n) %>%
mutate(
n = Yes + No,
PercentRespond = Yes/n
) %>% left_join(select(count(HRTC, Type, Message, campg.day, campg.num, mid), campg.day, Message))
rm(jtoolResults, predictions)
bind_rows(
make_predictions(mod, pred = "campg.day", at=list("Type"="MORE")),
make_predictions(mod, pred = "campg.day", at=list("Type"="Quiz")),
make_predictions(mod, pred = "campg.day", at=list("Type"="Hotline")),
make_predictions(mod, pred = "campg.day", at=list("Type"="T/F")),
make_predictions(mod, pred = "campg.day", at=list("Type"="Fact"))
) %>%
ggplot(aes(x=campg.day, y=Replied)) +
# geom_ribbon(aes(ymin=ymin, ymax=ymax, fill=Type), alpha=0.15) +
geom_line(aes(color=Type)) +
scale_fill_manual(values=color_guide, guide=F) +
geom_point(aes(x=campg.day, y=PercentRespond, color=Type, shape=Type),
size=2, data=actualData, alpha=0.4) +
scale_color_manual(values=color_guide, name="Style") +
scale_shape_discrete(name="Style") +
guides(
colour = guide_legend(override.aes = list(alpha = 1), reverse=T),
shape = guide_legend(reverse=T)) +
theme_pubclean() +
theme(
legend.position= "right",
plot.subtitle = element_text(face="italic", color="#A1A1A1", size=rel(0.9)),
plot.caption = element_text(face="bold", color="#2d6987", size=rel(1))
) +
labs(x="Day of Campaign", y="Pr(respond)", caption = "Fig xyz",
title="Predicted chance of responding\nby campaign day") +
scale_y_continuous(labels=percent)
rm(actualData)
survObj <- Surv(time=users$SurvEnd2, event=users$Censor)
SurvPlot <- survfit(survObj ~ 1, data = users) %>%
ggsurv(size.est=1, size.ci=0.5,cens.size=3,
lty.ci = 1 # linetype: was 5
)
# SurvPlot ## Plot the GGally plot
# Make your own plot
dat <- SurvPlot$data
dat.cens <- subset(dat, cens != 0)
dat %>%
ggplot(aes(x=time, y=surv)) +
geom_ribbon(aes(ymin=low, ymax=up), alpha=0.25) +
geom_step() +
geom_point(data=dat.cens, aes(y = surv, color="= Censored"),
shape=3, size=1.5) +
# geom_rug(data=count(HRTC, campg.day, Type), sides="b",
# aes(x=campg.day, y=0, color=Type), size=1) +
theme_pubclean() +
# theme(legend.position=c(0.5,0.1), legend.direction="horizontal") +
theme(
legend.position= c(0.2,0.2),
plot.subtitle = element_text(face="italic", color="#A1A1A1", size=rel(0.9)),
plot.caption = element_text(face="bold", color="#2d6987", size=rel(1))
) +
scale_y_continuous(labels = scales::percent, limits=c(0.6,1)) +
scale_color_manual(values=c("#df382c"), name="") +
labs(x="Day of Campaign", y="Subscribed to campaign (%)", caption = "Fig xyz",
title = "Retention during the campaign")
# # Autoplot
# survfit(survObj ~ 1, data = users) %>%
# ggsurvplot(data = users)
rm(SurvPlot, dat, dat.cens, survObj)
Demographic <- "race" # gender, grade, race
df <- semi_join(users, select(HRTC, userID)) %>%
count(!!sym(Demographic)) %>%
filter(!is.na(!!sym(Demographic)))
numFactors <- dim(count(df, !!sym(Demographic)))[1]
if(Demographic=="gender") pal <- suppressWarnings(RColorBrewer::brewer.pal(numFactors, "Set1"))
if(Demographic=="grade") pal <- suppressWarnings(RColorBrewer::brewer.pal(numFactors, "Spectral"))
if(Demographic=="race") pal <- suppressWarnings(RColorBrewer::brewer.pal(numFactors, "Dark2"))
df %>% waffle(legend_pos="bottom", flip=F, colors=pal)
rm(Demographic, numFactors, pal)
actual <- HRTC %>%
mutate(
Type = recode(Type, "Fact"="Fact/Hotline"),
Type = recode(Type, "Hotline"="Fact/Hotline")
) %>%
mutate(Replied = as.character(Replied)) %>%
mutate(Type = ifelse(campg.day==26, "MORE", Type)) %>%
mutate(
Replied = recode(Replied, "0"="No"),
Replied = recode(Replied, "1"="Yes")
) %>%
count(campg.day, Type, Replied, mid) %>%
spread(Replied, n) %>%
group_by(Type, campg.day, mid) %>%
summarise(Yes = sum(Yes, na.rm=T), No=sum(No, na.rm=T)) %>%
mutate(
`Reply %` = Yes/(Yes+No),
n = Yes + No
)
actual %>%
ggplot(aes(x=campg.day, y=`Reply %`, color=Type)) +
# geom_col(aes(fill=Type)) +
# geom_step() +
geom_point() + geom_smooth(aes(fill=Type)) +
facet_wrap("Type") +
theme_pubclean() +
theme(legend.position="bottom") +
scale_y_continuous(labels=percent, limits = c(0,0.5), oob=scales::squish) +
scale_color_manual(values=color_guide, name="Style") +
scale_fill_manual(values=color_guide, name="Style") +
labs(x="Days into campaign", y="Responded (%)")
dfModel <- HRTC %>%
filter(userResponses>0) %>%
mutate(Dummy = as.factor(zipcode)) %>%
mutate(userID = as.factor(userID)) %>%
mutate(Category = as.factor(Category)) %>%
mutate(Replied = ifelse(Replied==1, "Yes", "No")) %>%
mutate(Replied = factor(Replied, levels=c("No","Yes")))
mod <- glm(Replied ~ Type + userResponses + campg.num,
data=dfModel, family=binomial(link = "logit"))
newDf <- data.frame(
userResponses = rep(mean(dfModel$userResponses),5),
campg.num = rep(mean(dfModel$campg.num),5),
Type = unique(dfModel$Type)
)
cbind(newDf, predict(mod, newdata=newDf, type="response", se.fit=TRUE)) %>%
rename(prob=fit, se.prob=se.fit) %>%
mutate(
ll = prob - 1.96*se.prob,
ul = prob + 1.96*se.prob,
) %>%
ggplot(aes(x=Type, y = prob)) +
geom_col(aes(fill=Type)) +
geom_errorbar(aes(ymin = ll, ymax = ul), width = 0.2, lty=1, lwd=1) +
# geom_point(shape=18, size=3, fill="black") +
geom_point(aes(x=Type, y=`Reply %`), data=actual) +
scale_color_discrete(name="Message Style") +
scale_fill_discrete(name="Message Style") +
scale_y_continuous(labels=percent) +
labs(title= " Predicted probabilities", x="Question Type", y="Pr(respond to message)")
rm(dfModel, mod, newDf)
rm(actual)
pander(sessionInfo())
R version 3.3.2 (2016-10-31)
**Platform:** x86_64-apple-darwin13.4.0 (64-bit)
locale: en_US.UTF-8||en_US.UTF-8||en_US.UTF-8||C||en_US.UTF-8||en_US.UTF-8
attached base packages: stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: bindrcpp(v.0.2.2), jtools(v.2.0.0), GGally(v.1.4.0), waffle(v.0.9.1), ggrepel(v.0.6.6), pander(v.0.6.3), survMisc(v.0.5.5), survminer(v.0.4.3), ggpubr(v.0.2), magrittr(v.1.5), survival(v.2.40-1), lme4(v.1.1-19), Matrix(v.1.2-7.1), stargazer(v.5.2.2), scales(v.0.5.0), knitr(v.1.18), lubridate(v.1.7.1), forcats(v.0.2.0), stringr(v.1.2.0), dplyr(v.0.7.8), purrr(v.0.2.4), readr(v.1.1.1), tidyr(v.0.7.2), tibble(v.1.4.2), ggplot2(v.3.1.0.9000), tidyverse(v.1.2.1) and MASS(v.7.3-45)
loaded via a namespace (and not attached): httr(v.1.3.1), jsonlite(v.1.6), splines(v.3.3.2), modelr(v.0.1.1), assertthat(v.0.2.0), cellranger(v.1.1.0), yaml(v.2.2.0), Rttf2pt1(v.1.3.7), pillar(v.1.3.1), backports(v.1.1.2), lattice(v.0.20-34), glue(v.1.2.0), extrafontdb(v.1.0), digest(v.0.6.13), RColorBrewer(v.1.1-2), rvest(v.0.3.2), minqa(v.1.2.4), colorspace(v.1.3-2), cmprsk(v.2.2-7), htmltools(v.0.3.6), plyr(v.1.8.4), pkgconfig(v.2.0.2), broom(v.0.5.1.9000), haven(v.1.1.0), xtable(v.1.8-2), km.ci(v.0.5-2), KMsurv(v.0.1-5), generics(v.0.0.2), withr(v.2.1.2), lazyeval(v.0.2.1), cli(v.1.0.1), crayon(v.1.3.4), readxl(v.1.0.0), evaluate(v.0.10), nlme(v.3.1-128), xml2(v.1.1.1), tools(v.3.3.2), data.table(v.1.10.4-3), hms(v.0.4.0), munsell(v.0.5.0), rlang(v.0.3.0.1), grid(v.3.3.2), nloptr(v.1.2.1), rstudioapi(v.0.9.0), labeling(v.0.3), rmarkdown(v.1.8), gtable(v.0.2.0), reshape(v.0.8.8), curl(v.3.1), R6(v.2.2.2), gridExtra(v.2.2.1), zoo(v.1.8-4), extrafont(v.0.17), bindr(v.0.1.1), rprojroot(v.1.3-2), stringi(v.1.1.6), Rcpp(v.1.0.0) and tidyselect(v.0.2.3)