## ── Attaching packages ─────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.3 ✔ dplyr 0.8.3
## ✔ tidyr 1.0.0 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## Loading required package: viridisLite
setwd("/Users/adkinsty/Box/side_projects/covid/visualization/")
my_theme = theme_wsj(base_size = 10,color = "grey") +
theme(
axis.line.y = element_line(size=.5),
legend.title = element_text(size=12,family="sans",face="bold"),
axis.text = element_text(face = "plain"),
axis.title = element_text(size = 12,family="sans",face="bold"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.title = element_text(hjust = .5,size=13,family="sans", face="bold"))
theme_set(my_theme)data_input = read.csv("../data/prepped/data_long_study2.csv")
# define and remove 'extreme' values (for better illustration)
extremes = data_input %>%
group_by(delay,outcome) %>%
summarise(upper = quantile(est, c(0.05,0.98)[2],na.rm=T)) %>%
mutate(lower = ifelse(outcome == "deaths",12692,393782))
data = merge(data_input,extremes,
intersect(names(data_input),names(extremes))) %>%
filter(est < upper & est > lower)
wide_data = read.csv("../data/prepped/data_wide_study2.csv") %>% filter(new==1)library(usmap)
loc_data = read.csv("../data/extra/loc/uszips.csv")
all_loc_data = merge(wide_data,loc_data,by="zip") %>%
mutate(state = state_id) %>%
group_by(state) %>%
summarise(n = n())# Population by state, 2015 Census
plot_usmap("states",data = statepop,values="pop_2015") +
scale_fill_viridis("2015 Population") +
theme(legend.position = "right")# Study 1 sample size by state
plot_usmap("states",data = all_loc_data,values="n") +
scale_fill_viridis("Sample size") +
theme(legend.position = "right")oldw <- getOption("warn")
options(warn = -1)
# Age
wide_data %>% ggplot(aes(x = age)) + geom_histogram(stat="count")# Edu
wide_data %>% ggplot(aes(x = edu)) + geom_histogram(stat="count") +
scale_x_discrete(limits=c("Some high school","High school","Some college",
"2-year degree","4-year degree","Advanced Degree"),
labels=c("sHS","HS","sC","2C","4C","A"))data %>%
ggplot(aes(x = est, group = interaction(outcome,delay))) +
geom_histogram(aes(y = ..density.., fill = outcome)) +
scale_fill_viridis_d(begin = .5) +
geom_density(adjust=2,size=1) +
facet_wrap(outcome~delay,nrow = 3,ncol=3,scales="free")## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
data %>%
ggplot(aes(x = conf, group = interaction(outcome,delay))) +
geom_histogram(aes(y = ..density.., fill = outcome)) +
scale_fill_viridis_d(begin = .5) +
geom_density(adjust=2,size=1) +
facet_wrap(outcome~delay,nrow = 3,ncol=3,scales="free")## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Estimates are of confirmed cases (‘cCases’), actual cases (‘aCases’), and deaths (‘deaths’). Estimates from Mar. 11 to Apr. 7 (‘truth’) are from worldometers.info. Estimates for Apr. 10, April 13, and April 16 are from MTurk workers who were administered the earlier worldometers.info data in one of text (‘t’), table (‘ta’) or graph (‘g’) form.
p = all_data %>%
ggplot(aes(x = factor(date), y = est,
colour = group, group = group)) +
stat_summary(geom="point") +
stat_summary(geom="line") +
scale_x_discrete("Date",
labels = c("M11","M18",
"M25","A1","A7",
"A10","A13","A16")) +
scale_color_colorblind() +
facet_wrap(.~outcome,ncol = 3,scales="free_y")
suppressMessages(print(p)) p = all_data %>%
ggplot(aes(x = factor(date), y = est,
colour = age_bin, group = age_bin)) +
stat_summary(geom="point") +
stat_summary(geom="line") +
scale_x_discrete("Date",
labels = c("M11","M18",
"M25","A1","A7",
"A10","A13","A16")) +
scale_color_colorblind() +
facet_wrap(.~outcome,ncol = 3,scales="free_y")
suppressMessages(print(p))## Warning: Removed 10 rows containing missing values (geom_point).
## Warning: Removed 10 rows containing missing values (geom_path).
Hypotheses adapted from the Study 1 Protocol Google doc.
p = data %>%
ggplot(aes(x=group,y=est)) +
stat_summary() +
facet_wrap(new~outcome,ncol = 3,scales="free")
suppressMessages(print(p)) p = data %>%
ggplot(aes(x=group,y=conf)) +
stat_summary() +
facet_wrap(new~outcome,ncol = 3,scales="free")
suppressMessages(print(p)) p = data %>%
ggplot(aes(x=risk_mu,y=est)) +
geom_smooth(method="auto") +
stat_summary_bin(bins = 10) +
facet_wrap(.~outcome,ncol = 3,
scales="free")
suppressMessages(print(p)) p = data %>%
ggplot(aes(x=know_someone,y=est)) +
stat_summary() +
facet_wrap(.~outcome,ncol = 3,scales="free")
suppressMessages(print(p)) p = wide_data %>% filter(!is.na(age)) %>%
ggplot(aes(x=age,y=prob_of_case,
colour=know_someone,group=know_someone)) +
geom_smooth(method="auto") +
stat_summary_bin(bins = 10)
suppressMessages(print(p)) ## Warning: Removed 2 rows containing missing values (geom_pointrange).
p = wide_data %>% filter(!is.na(age)) %>%
ggplot(aes(x=age,y=prob_of_hosp,
colour=know_someone,group=know_someone)) +
geom_smooth(method="auto") +
stat_summary_bin(bins = 10)
suppressMessages(print(p)) ## Warning: Removed 2 rows containing missing values (geom_pointrange).
p = wide_data %>% filter(!is.na(age)) %>%
ggplot(aes(x=age,y=prob_of_death,
colour=know_someone,group=know_someone)) +
geom_smooth(method="auto") +
stat_summary_bin(bins = 10)
suppressMessages(print(p)) ## Warning: Removed 2 rows containing missing values (geom_pointrange).
Patterns: Participants with conservatism scores near zero estimated the smallest numbers of cases and deaths. But these participants also saw themselves more likely to get sick with the disease.
p = wide_data %>%
ggplot(aes(x=conserv_mu,y=prob_of_case)) +
geom_smooth(method="auto") +
stat_summary_bin(bins = 10)
suppressMessages(print(p)) p = wide_data %>%
ggplot(aes(x=conserv_mu,y=prob_of_hosp)) +
geom_smooth(method="auto") +
stat_summary_bin(bins = 10)
suppressMessages(print(p)) p = wide_data %>%
ggplot(aes(x=conserv_mu,y=prob_of_death)) +
geom_smooth(method="auto") +
stat_summary_bin(bins = 10)
suppressMessages(print(p)) Below are plots corresponding to predictors included in Tyler’s regression model of case estimates.
ad = data_input %>% filter(outcome == "aCases")
cd = data_input %>% filter(outcome == "cCases")
d = cd %>% mutate(c_est = est, a_est = ad$est) %>%
merge(extremes,intersect(names(data_input),names(extremes))) %>%
filter(c_est < upper & c_est > lower &
a_est < upper & a_est > lower) %>%
mutate(bin_cons = conserv_mu > median(conserv_mu,na.rm=T))
library(viridis)
d %>% ggplot(aes(x=c_est/100000,y=a_est/100000,
colour=bin_cons,group=bin_cons)) +
geom_smooth(method="auto") +
geom_point(size=.5,alpha=.5) +
geom_abline(slope=1,linetype="dashed") +
scale_x_continuous("Estimates of confirmed cases (100k)") +
scale_y_continuous("Estimates of actual cases (100k)") +
scale_colour_manual("Conservatism > median",values=c("blue","red")) +
coord_equal(xlim = c(4,22),ylim=c(4,22))## `geom_smooth()` using method = 'loess' and formula 'y ~ x'