## Loading required package: Rcpp
## Loading 'brms' package (version 2.10.0). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## This is bayesplot version 1.7.0
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
## * Does _not_ affect other ggplot2 plots
## * See ?bayesplot_theme_set for details on theme setting
## rstanarm (Version 2.19.2, packaged: 2019-10-01 20:20:33 UTC)
## - Do not expect the default priors to remain the same in future rstanarm versions.
## Thus, R scripts should specify priors explicitly, even if they are just the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores())
## - bayesplot theme set to bayesplot::theme_default()
## * Does _not_ affect other ggplot2 plots
## * See ?bayesplot_theme_set for details on theme setting
##
## Attaching package: 'rstanarm'
## The following objects are masked from 'package:brms':
##
## dirichlet, exponential, get_y, lasso, loo_R2, ngrps
## Loading required package: StanHeaders
## Loading required package: ggplot2
## rstan (Version 2.19.2, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'sjstats'
## The following object is masked from 'package:rstanarm':
##
## se
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:rstan':
##
## extract
library(ggthemes)
library(ggplot2)
library(emmeans)
library(xtable)
run_model <- function(expr, path, reuse = TRUE) {
path <- paste0(path, ".Rds")
if (reuse) {
fit <- suppressWarnings(try(readRDS(path), silent = TRUE))
} else {
fit <- eval(expr)
saveRDS(fit, file = path)
}
fit
}
rescale.center = function(x,m2,s2) {
# rescales x to have mean of m2 and sd of s2
# http://www.stat.columbia.edu/~gelman/research/unpublished/standardizing.pdf
# http://www.stat.columbia.edu/~gelman/research/published/priors11.pdf
m1 = mean(x,na.rm=T)
s1 = sd(x,na.rm=T)
y = m2 + (x - m1) * (s2/s1)
return(y)
}
my_theme = theme_tufte(base_size = 10,base_family = "sans") +
theme(
axis.line.y = element_line(size=.5),
axis.line.x = element_line(size=.5),
legend.title = element_text(size=12,family="sans",face="bold"),
axis.text = element_text(face = "plain"),
axis.title = element_text(size = 12,family="sans",face="bold"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.title = element_text(hjust = .5,size=13,family="sans", face="bold"))
options(mc.cores = parallel::detectCores())
mcmc_controls = list(adapt_delta = 0.9, max_treedepth=15, stepsize=0.2)
rstan_options(auto_write = TRUE)
options(contrasts = c("contr.sdif","contr.sdif"))
color_scheme_set("mix-blue-red")
setwd("/Users/adkinsty/Box/side_projects/covid/modeling/")loc_data = read.csv("../data/extra_data/loc/uszips.csv")
data = read.csv("../data/mar28_covid_data_long.csv") %>%
merge(loc_data,by="zip",all.x = TRUE) %>%
mutate(state = state_id)
# model data
m_data = data %>%
filter(outcome != "deaths") %>%
filter(est < 1e6 & est > 25000) %>% # remove extreme values
mutate(
lag = factor(delay,levels = c(1,2,3)),
cond = factor(group, levels=c("g","t","ta")),
y_est = rescale.center(est,0,1),
pop = rescale.center(population,0,1),
gen_anx = rescale.center(gen_anxiety,0,1),
risk = rescale.center(risk_mu,0,1),
cons = rescale.center(conserv_mu,0,1),
age = rescale.center(age,0,1),
covid_anx = rescale.center(corona_anxious_1,0,1),
covid_news = rescale.center(corona_news_1,0,1),
health = rescale.center(gen_health,0,1)) %>%
select(y_est,est,pop,delay,lag,cond,personal,state_id,id,outcome,
age,est,risk,covid_anx,covid_news,health,cons,gen_anx) %>%
tidyr::drop_na()priors = c(set_prior("normal(0,.5)",class="b"),
set_prior("normal(0,1)",class="sd"),
set_prior("normal(0,1)",class="b",dpar = "sigma"),
set_prior("normal(0,5)",class="b",dpar = "alpha"))
m_formula = bf(
y_est ~ 0 + + (0 + intercept | state_id) + intercept + lag + outcome + cond + personal + cons +
risk + pop + age + health + gen_anx + covid_anx + covid_news,
sigma ~ 0 + intercept + outcome + lag,
alpha ~ 0 + intercept + outcome + lag)
# get_prior(m_formula,m_data,family=skew_normal())This model ignores the data likelihood and merely samples from the priors.
prior_m = run_model(
path = "fitted_models/cases_model_prior_Apr3", reuse = TRUE,
expr= brm(
sample_prior = "only",
formula = m_formula,
data = m_data,
family = skew_normal(),
prior = priors,
control = mcmc_controls,
warmup = 5000, iter = 10000, chains = 4,seed = 420))
prior_m## Family: skew_normal
## Links: mu = identity; sigma = log; alpha = identity
## Formula: y_est ~ 0 + intercept + lag + outcome + cond + personal + cons + risk + pop + age + health + gen_anx + covid_anx + covid_news + (0 + intercept | state_id)
## sigma ~ 0 + intercept + outcome + lag
## alpha ~ 0 + intercept + outcome + lag
## Data: m_data (Number of observations: 4764)
## Samples: 4 chains, each with iter = 10000; warmup = 5000; thin = 1;
## total post-warmup samples = 20000
##
## Group-Level Effects:
## ~state_id (Number of levels: 50)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(intercept) 0.80 0.61 0.03 2.26 1.00 18622 10005
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## intercept -0.00 0.50 -0.99 0.98 1.00
## lag2M1 0.00 0.50 -0.99 0.99 1.00
## lag3M2 0.00 0.50 -0.97 0.97 1.00
## outcomecCasesMaCases 0.00 0.50 -0.97 0.97 1.00
## condtMg -0.00 0.50 -0.97 0.97 1.00
## condtaMt -0.00 0.50 -0.99 0.98 1.00
## personal 0.00 0.50 -0.97 0.98 1.00
## cons 0.00 0.49 -0.96 0.96 1.00
## risk -0.00 0.50 -0.97 0.99 1.00
## pop -0.00 0.50 -0.97 0.96 1.00
## age -0.00 0.49 -0.97 0.96 1.00
## health 0.00 0.50 -0.97 0.99 1.00
## gen_anx -0.00 0.50 -0.98 0.98 1.00
## covid_anx -0.00 0.49 -0.97 0.97 1.00
## covid_news -0.00 0.50 -1.00 1.01 1.00
## sigma_intercept 0.00 1.01 -1.98 1.98 1.00
## sigma_outcomecCasesMaCases -0.00 0.99 -1.96 1.96 1.00
## sigma_lag2M1 -0.00 0.99 -1.95 1.95 1.00
## sigma_lag3M2 -0.00 0.99 -1.93 1.94 1.00
## alpha_intercept -0.01 5.02 -9.83 9.76 1.00
## alpha_outcomecCasesMaCases -0.04 5.03 -9.96 9.86 1.00
## alpha_lag2M1 0.02 4.93 -9.70 9.62 1.00
## alpha_lag3M2 -0.03 4.99 -9.84 9.68 1.00
## Bulk_ESS Tail_ESS
## intercept 46311 14507
## lag2M1 47445 13804
## lag3M2 50269 14352
## outcomecCasesMaCases 47606 13689
## condtMg 50632 14098
## condtaMt 48681 13787
## personal 47287 13804
## cons 47996 13501
## risk 49838 14482
## pop 50112 12843
## age 46694 13628
## health 44571 14785
## gen_anx 46953 12615
## covid_anx 46409 14499
## covid_news 43265 13040
## sigma_intercept 46908 13647
## sigma_outcomecCasesMaCases 49963 13858
## sigma_lag2M1 45910 14749
## sigma_lag3M2 47287 14360
## alpha_intercept 49534 14236
## alpha_outcomecCasesMaCases 47839 13699
## alpha_lag2M1 51446 13665
## alpha_lag3M2 48804 13768
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
# prior predictive check
prior_y = prior_m$data$y_est
prior_yrep = posterior_predict(prior_m,nsamples=4000)
prior_dens = ppc_dens_overlay(prior_y,prior_yrep[sample(1:4000,200),],
size = .1,alpha=.5) + xlim(-5,10)
ggsave(filename = "../visualization/figures/bayesian/prior_pc_case_model_Apr3.pdf",
plot = prior_dens,units="in",height=4,width=4)## Warning: Removed 53839 rows containing non-finite values (stat_density).
## Warning: Removed 53839 rows containing non-finite values (stat_density).
m = run_model(
path = "fitted_models/cases_model_Apr3", reuse = TRUE,
expr= update(prior_m,sample_prior="no"))
# launch_shinystan(m) # for diagnostics
m## Family: skew_normal
## Links: mu = identity; sigma = log; alpha = identity
## Formula: y_est ~ 0 + intercept + lag + outcome + cond + personal + cons + risk + pop + age + health + gen_anx + covid_anx + covid_news + (0 + intercept | state_id)
## sigma ~ 0 + intercept + outcome + lag
## alpha ~ 0 + intercept + outcome + lag
## Data: m_data (Number of observations: 4764)
## Samples: 4 chains, each with iter = 10000; warmup = 5000; thin = 1;
## total post-warmup samples = 20000
##
## Group-Level Effects:
## ~state_id (Number of levels: 50)
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(intercept) 0.07 0.01 0.04 0.09 1.00 5971 10592
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## intercept 0.10 0.02 0.07 0.13 1.00
## lag2M1 0.43 0.02 0.39 0.47 1.00
## lag3M2 0.26 0.03 0.21 0.32 1.00
## outcomecCasesMaCases -0.33 0.02 -0.36 -0.29 1.00
## condtMg 0.02 0.02 -0.01 0.05 1.00
## condtaMt 0.05 0.01 0.02 0.08 1.00
## personal -0.01 0.02 -0.05 0.03 1.00
## cons -0.01 0.01 -0.02 0.00 1.00
## risk -0.01 0.01 -0.02 -0.00 1.00
## pop 0.01 0.01 -0.00 0.02 1.00
## age -0.01 0.01 -0.02 0.00 1.00
## health -0.02 0.01 -0.03 -0.01 1.00
## gen_anx -0.01 0.01 -0.03 0.00 1.00
## covid_anx -0.01 0.01 -0.02 0.01 1.00
## covid_news 0.04 0.01 0.03 0.06 1.00
## sigma_intercept -0.31 0.01 -0.33 -0.29 1.00
## sigma_outcomecCasesMaCases -0.40 0.02 -0.44 -0.36 1.00
## sigma_lag2M1 0.53 0.03 0.48 0.58 1.00
## sigma_lag3M2 0.25 0.03 0.20 0.30 1.00
## alpha_intercept 10.17 0.54 9.17 11.28 1.00
## alpha_outcomecCasesMaCases -2.61 0.90 -4.42 -0.87 1.00
## alpha_lag2M1 2.39 0.94 0.55 4.22 1.00
## alpha_lag3M2 1.27 1.20 -0.98 3.73 1.00
## Bulk_ESS Tail_ESS
## intercept 7425 10876
## lag2M1 10210 13843
## lag3M2 11573 13855
## outcomecCasesMaCases 15127 16092
## condtMg 16260 15669
## condtaMt 15996 16116
## personal 21013 15465
## cons 23096 15974
## risk 21111 15709
## pop 19425 13938
## age 21362 14798
## health 20197 16205
## gen_anx 16222 15323
## covid_anx 15285 14563
## covid_news 18394 15992
## sigma_intercept 16148 15727
## sigma_outcomecCasesMaCases 15528 15515
## sigma_lag2M1 11736 14118
## sigma_lag3M2 11163 13325
## alpha_intercept 15449 14034
## alpha_outcomecCasesMaCases 18791 15695
## alpha_lag2M1 15988 14543
## alpha_lag3M2 17370 14968
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
# posterior predictive check
y = m$data$y_est
yrep = posterior_predict(m,nsamples=4000)
dens = ppc_dens_overlay(y,yrep[sample(1:4000,200),],
size = .1,alpha=.5) + xlim(-5,10)
ggsave(filename = "../visualization/figures/bayesian/post_pc_case_model_Apr3.pdf",
plot = dens,units="in",height=4,width=4)
densparams = brms::parnames(m)
color_scheme_set("teal")
areas = bayesplot::mcmc_areas_ridges(x = m,pars = params[5:15],prob = .89) +
vline_0(colour="orange") +
theme_tufte(base_size = 11,base_family = "sans") +
theme(
axis.line.y = element_line(size=.5),
axis.line.x = element_line(size=.5),
legend.title = element_text(size=12,family="sans",face="bold"),
axis.text = element_text(face = "plain"),
axis.title = element_text(size = 12,family="sans",face="bold"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.title = element_text(hjust = .5,size=13,family="sans", face="bold"))
ggsave("../visualization/figures/bayesian/areas_ridges_model_Apr3.pdf",
units="in",height=6,width=6)
areas# Estimates for 3,6, or 9 days future
m_data %>% ggplot(aes(x=lag,y=est)) + stat_summary() + my_theme## No summary function supplied, defaulting to `mean_se()
# outcome estimated: confirmed vs actual cases
m_data %>% ggplot(aes(x=outcome,y=est)) + stat_summary() + my_theme## No summary function supplied, defaulting to `mean_se()
# data format group: graph, table, text
m_data %>% ggplot(aes(x=cond,y=est)) + stat_summary() + my_theme## No summary function supplied, defaulting to `mean_se()
## No summary function supplied, defaulting to `mean_se()
# anxiety about coronavirus
m_data %>% ggplot(aes(x=covid_anx,y=est)) + geom_smooth(method="lm") + my_theme# watch news about coronavirus
m_data %>% ggplot(aes(x=covid_news,y=est)) + geom_smooth(method="lm") + my_theme