TJ Mahr
Sept. 21, 2016
Madison R Users Group
A programming language for probablistic stats.
# R version
lm(log(earnings) ~ height + male)
data {
int<lower=0> N;
vector[N] earn;
vector[N] height;
vector[N] male;
}
transformed data {
// log transformation
vector[N] log_earn;
log_earn = log(earn);
}
parameters {
vector[3] beta;
real<lower=0> sigma;
}
model {
log_earn ~ normal(beta[1] + beta[2] * height + beta[3] * male, sigma);
}
generated quantities {
// optional
}
~ is a sampling statement.RStan Applied Regression Modeling
glm -> stan_glm, glmer -> stan_glmer.stan_ to the front, and add a prior.arm package.# Some toy data
davis <- car::Davis %>% filter(100 < height) %>% as_data_frame
davis
#> # A tibble: 199 × 5
#> sex weight height repwt repht
#> <fctr> <int> <int> <int> <int>
#> 1 M 77 182 77 180
#> 2 F 58 161 51 159
#> 3 F 53 161 54 158
#> 4 M 68 177 70 175
#> 5 F 59 157 59 155
#> 6 M 76 170 76 165
#> 7 M 76 167 77 165
#> 8 M 69 186 73 180
#> 9 M 71 178 71 175
#> 10 M 65 171 64 170
#> # ... with 189 more rows
model <- lm(weight ~ height * sex, davis)
summary(model) %>% print(digits = 2)
#>
#> Call:
#> lm(formula = weight ~ height * sex, data = davis)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -20.9 -4.8 -0.9 4.5 41.1
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -45.71 22.19 -2.1 0.04 *
#> height 0.62 0.13 4.6 7e-06 ***
#> sexM -55.62 32.54 -1.7 0.09 .
#> height:sexM 0.37 0.19 2.0 0.05 .
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 8 on 195 degrees of freedom
#> Multiple R-squared: 0.64, Adjusted R-squared: 0.64
#> F-statistic: 1.2e+02 on 3 and 195 DF, p-value: <2e-16
library(rstanarm)
#> Loading required package: Rcpp
#> rstanarm (Version 2.12.1, packaged: 2016-09-12 13:08:24 UTC)
#> - Do not expect the default priors to remain the same in future rstanarm versions.
#> Thus, R scripts should specify priors explicitly, even if they are just the defaults.
#> - For execution on a local, multicore CPU with excess RAM we recommend calling
#> options(mc.cores = parallel::detectCores())
mc.cores option when I have a big model that going to take
more than a minute to fit.stan_glm().
stan_lm() uses a different specification of the prior.stan_model <- stan_glm(
weight ~ height * sex,
data = davis,
family = gaussian,
prior = normal(location = 0, scale = 5),
prior_intercept = normal(0, 10)
)
#>
#> SAMPLING FOR MODEL 'continuous' NOW (CHAIN 1).
#>
#> Chain 1, Iteration: 1 / 2000 [ 0%] (Warmup)
#> Chain 1, Iteration: 200 / 2000 [ 10%] (Warmup)
#> Chain 1, Iteration: 400 / 2000 [ 20%] (Warmup)
#> Chain 1, Iteration: 600 / 2000 [ 30%] (Warmup)
#> Chain 1, Iteration: 800 / 2000 [ 40%] (Warmup)
#> Chain 1, Iteration: 1000 / 2000 [ 50%] (Warmup)
#> Chain 1, Iteration: 1001 / 2000 [ 50%] (Sampling)
#> Chain 1, Iteration: 1200 / 2000 [ 60%] (Sampling)
#> Chain 1, Iteration: 1400 / 2000 [ 70%] (Sampling)
#> Chain 1, Iteration: 1600 / 2000 [ 80%] (Sampling)
#> Chain 1, Iteration: 1800 / 2000 [ 90%] (Sampling)
#> Chain 1, Iteration: 2000 / 2000 [100%] (Sampling)
#> Elapsed Time: 1.718 seconds (Warm-up)
#> 1.881 seconds (Sampling)
#> 3.599 seconds (Total)
#>
#>
#> SAMPLING FOR MODEL 'continuous' NOW (CHAIN 2).
#>
#> Chain 2, Iteration: 1 / 2000 [ 0%] (Warmup)
#> Chain 2, Iteration: 200 / 2000 [ 10%] (Warmup)
#> Chain 2, Iteration: 400 / 2000 [ 20%] (Warmup)
#> Chain 2, Iteration: 600 / 2000 [ 30%] (Warmup)
#> Chain 2, Iteration: 800 / 2000 [ 40%] (Warmup)
#> Chain 2, Iteration: 1000 / 2000 [ 50%] (Warmup)
#> Chain 2, Iteration: 1001 / 2000 [ 50%] (Sampling)
#> Chain 2, Iteration: 1200 / 2000 [ 60%] (Sampling)
#> Chain 2, Iteration: 1400 / 2000 [ 70%] (Sampling)
#> Chain 2, Iteration: 1600 / 2000 [ 80%] (Sampling)
#> Chain 2, Iteration: 1800 / 2000 [ 90%] (Sampling)
#> Chain 2, Iteration: 2000 / 2000 [100%] (Sampling)
#> Elapsed Time: 1.924 seconds (Warm-up)
#> 2.052 seconds (Sampling)
#> 3.976 seconds (Total)
#>
#>
#> SAMPLING FOR MODEL 'continuous' NOW (CHAIN 3).
#>
#> Chain 3, Iteration: 1 / 2000 [ 0%] (Warmup)
#> Chain 3, Iteration: 200 / 2000 [ 10%] (Warmup)
#> Chain 3, Iteration: 400 / 2000 [ 20%] (Warmup)
#> Chain 3, Iteration: 600 / 2000 [ 30%] (Warmup)
#> Chain 3, Iteration: 800 / 2000 [ 40%] (Warmup)
#> Chain 3, Iteration: 1000 / 2000 [ 50%] (Warmup)
#> Chain 3, Iteration: 1001 / 2000 [ 50%] (Sampling)
#> Chain 3, Iteration: 1200 / 2000 [ 60%] (Sampling)
#> Chain 3, Iteration: 1400 / 2000 [ 70%] (Sampling)
#> Chain 3, Iteration: 1600 / 2000 [ 80%] (Sampling)
#> Chain 3, Iteration: 1800 / 2000 [ 90%] (Sampling)
#> Chain 3, Iteration: 2000 / 2000 [100%] (Sampling)
#> Elapsed Time: 1.75 seconds (Warm-up)
#> 1.788 seconds (Sampling)
#> 3.538 seconds (Total)
#>
#>
#> SAMPLING FOR MODEL 'continuous' NOW (CHAIN 4).
#>
#> Chain 4, Iteration: 1 / 2000 [ 0%] (Warmup)
#> Chain 4, Iteration: 200 / 2000 [ 10%] (Warmup)
#> Chain 4, Iteration: 400 / 2000 [ 20%] (Warmup)
#> Chain 4, Iteration: 600 / 2000 [ 30%] (Warmup)
#> Chain 4, Iteration: 800 / 2000 [ 40%] (Warmup)
#> Chain 4, Iteration: 1000 / 2000 [ 50%] (Warmup)
#> Chain 4, Iteration: 1001 / 2000 [ 50%] (Sampling)
#> Chain 4, Iteration: 1200 / 2000 [ 60%] (Sampling)
#> Chain 4, Iteration: 1400 / 2000 [ 70%] (Sampling)
#> Chain 4, Iteration: 1600 / 2000 [ 80%] (Sampling)
#> Chain 4, Iteration: 1800 / 2000 [ 90%] (Sampling)
#> Chain 4, Iteration: 2000 / 2000 [100%] (Sampling)
#> Elapsed Time: 1.672 seconds (Warm-up)
#> 2.082 seconds (Sampling)
#> 3.754 seconds (Total)
# comment to make the bottom of the output visible
stan_model
#> stan_glm(formula = weight ~ height * sex, family = gaussian,
#> data = davis, prior = normal(location = 0, scale = 5), prior_intercept = normal(0,
#> 10))
#>
#> Estimates:
#> Median MAD_SD
#> (Intercept) -48.1 21.5
#> height 0.6 0.1
#> sexM -49.9 29.8
#> height:sexM 0.3 0.2
#> sigma 8.0 0.4
#>
#> Sample avg. posterior predictive
#> distribution of y (X = xbar):
#> Median MAD_SD
#> mean_PPD 65.3 0.8
#>
#> Observations: 199 Number of unconstrained parameters: 5
summary(stan_model)
#> stan_glm(formula = weight ~ height * sex, family = gaussian,
#> data = davis, prior = normal(location = 0, scale = 5), prior_intercept = normal(0,
#> 10))
#>
#> Family: gaussian (identity)
#> Algorithm: sampling
#> Posterior sample size: 4000
#> Observations: 199
#>
#> Estimates:
#> mean sd 2.5% 25% 50% 75% 97.5%
#> (Intercept) -48.2 21.4 -90.7 -62.3 -48.1 -33.2 -8.1
#> height 0.6 0.1 0.4 0.5 0.6 0.7 0.9
#> sexM -50.0 30.7 -110.2 -70.5 -49.9 -30.6 12.3
#> height:sexM 0.3 0.2 0.0 0.2 0.3 0.5 0.7
#> sigma 8.0 0.4 7.3 7.7 8.0 8.3 8.9
#> mean_PPD 65.3 0.8 63.7 64.8 65.3 65.9 66.9
#> log-posterior -708.8 1.6 -712.7 -709.6 -708.5 -707.6 -706.7
#>
#> Diagnostics:
#> mcse Rhat n_eff
#> (Intercept) 0.6 1.0 1254
#> height 0.0 1.0 1258
#> sexM 0.9 1.0 1086
#> height:sexM 0.0 1.0 1070
#> sigma 0.0 1.0 2092
#> mean_PPD 0.0 1.0 2562
#> log-posterior 0.0 1.0 1441
#>
#> For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
# comment to make the bottom of the output visible
mean_PPD is the predicted value for a completely average observationCoerce to a data-frame. Columns are parameters. One row per posterior sample.
samples <- stan_model %>% as.data.frame %>% tbl_df
samples
#> # A tibble: 4,000 × 5
#> `(Intercept)` height sexM `height:sexM` sigma
#> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 -56.71142 0.6895427 -68.550433 0.44384885 7.537152
#> 2 -56.70521 0.6909141 -69.411803 0.44392578 7.784114
#> 3 -56.34580 0.6865143 -69.497981 0.44949406 7.795977
#> 4 -30.71059 0.5202990 -58.429948 0.40582612 7.991750
#> 5 -34.68467 0.5452743 -64.090664 0.42866842 7.784008
#> 6 -87.38731 0.8865953 2.711235 0.02298263 8.324237
#> 7 -98.27211 0.9459078 15.229156 -0.05056481 8.665427
#> 8 -58.71536 0.6977810 -13.289139 0.13100168 7.894166
#> 9 -63.19315 0.7205729 -22.734855 0.17883580 8.501026
#> 10 -71.94144 0.7791263 -52.550236 0.33906170 8.051117
#> # ... with 3,990 more rows
ggplot(samples) + aes(x = height) + geom_histogram()
posterior_interval(stan_model)
#> 5% 95%
#> (Intercept) -84.07171214 -13.2795824
#> height 0.42566160 0.8566897
#> sexM -100.94954649 0.7700961
#> height:sexM 0.04578185 0.6331447
#> sigma 7.41023334 8.7265783
Basic plot() method shows 80% and 95% intervals.
plot(stan_model)
# Match a subset of the parameters
plot(stan_model, regex_pars = "height")
This thing is pretty awesome.
launch_shinystan(stan_model)
[Demo outside of slides.]
Get a data-frame with the parameters from each sample. We now have 4000 plausble regression lines.
df_model <- stan_model %>% as_data_frame()
df_model
#> # A tibble: 4,000 × 5
#> `(Intercept)` height sexM `height:sexM` sigma
#> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 -56.71142 0.6895427 -68.550433 0.44384885 7.537152
#> 2 -56.70521 0.6909141 -69.411803 0.44392578 7.784114
#> 3 -56.34580 0.6865143 -69.497981 0.44949406 7.795977
#> 4 -30.71059 0.5202990 -58.429948 0.40582612 7.991750
#> 5 -34.68467 0.5452743 -64.090664 0.42866842 7.784008
#> 6 -87.38731 0.8865953 2.711235 0.02298263 8.324237
#> 7 -98.27211 0.9459078 15.229156 -0.05056481 8.665427
#> 8 -58.71536 0.6977810 -13.289139 0.13100168 7.894166
#> 9 -63.19315 0.7205729 -22.734855 0.17883580 8.501026
#> 10 -71.94144 0.7791263 -52.550236 0.33906170 8.051117
#> # ... with 3,990 more rows
Apply the group effects to get regression lines for each sex.
df_model2 <- df_model %>%
mutate(F_Intercept = `(Intercept)`, F_Slope = height,
M_Intercept = `(Intercept)` + sexM,
M_Slope = height + `height:sexM`) %>%
select(F_Intercept:M_Slope)
df_model2
#> # A tibble: 4,000 × 4
#> F_Intercept F_Slope M_Intercept M_Slope
#> <dbl> <dbl> <dbl> <dbl>
#> 1 -56.71142 0.6895427 -125.26185 1.1333915
#> 2 -56.70521 0.6909141 -126.11701 1.1348399
#> 3 -56.34580 0.6865143 -125.84378 1.1360084
#> 4 -30.71059 0.5202990 -89.14054 0.9261251
#> 5 -34.68467 0.5452743 -98.77534 0.9739427
#> 6 -87.38731 0.8865953 -84.67607 0.9095779
#> 7 -98.27211 0.9459078 -83.04295 0.8953430
#> 8 -58.71536 0.6977810 -72.00450 0.8287827
#> 9 -63.19315 0.7205729 -85.92801 0.8994087
#> 10 -71.94144 0.7791263 -124.49167 1.1181880
#> # ... with 3,990 more rows
Plot lines with the median parameter values and a random sample of lines.
fits <- sample_n(df_model2, 200)
medians <- df_model2 %>% summarise_each(funs = funs(median))
p2 <- ggplot(davis) +
aes(x = height, y = weight, color = sex) +
geom_abline(aes(color = "F", intercept = F_Intercept,
slope = F_Slope), data = fits, alpha = .075) +
geom_abline(aes(color = "M", intercept = M_Intercept,
slope = M_Slope), data = fits, alpha = .075) +
geom_abline(aes(color = "F", intercept = F_Intercept,
slope = F_Slope), data = medians, size = 1.25) +
geom_abline(aes(color = "M", intercept = M_Intercept,
slope = M_Slope), data = medians, size = 1.25) +
geom_point() +
theme(legend.position = c(0, 1), legend.justification = c(0, 1))
Get a sample of height values within each group's range.
# # do some stuff
# ...
new_data
#> # A tibble: 160 × 3
#> Observation sex height
#> <chr> <chr> <dbl>
#> 1 1 F 148.0000
#> 2 2 F 148.3797
#> 3 3 F 148.7595
#> 4 4 F 149.1392
#> 5 5 F 149.5190
#> 6 6 F 149.8987
#> 7 7 F 150.2785
#> 8 8 F 150.6582
#> 9 9 F 151.0380
#> 10 10 F 151.4177
#> # ... with 150 more rows
With the normal predict() function on a classical regression model, we use the
model parameters to get the expected mean (\( \mu_i \)) for each row in newdata.
With posterior_linpred(), we do the same thing, but for each of the 4000
posterior samples.
linpreds <- posterior_linpred(stan_model, newdata = new_data)
dim(linpreds)
#> [1] 4000 160
# In long format... 4000 samples x 160 data points
tidy_linpreds <- linpreds %>%
as_data_frame %>%
tibble::rownames_to_column("Draw") %>%
tidyr::gather(Observation, Value, -Draw)
tidy_linpreds
#> # A tibble: 640,000 × 3
#> Draw Observation Value
#> <chr> <chr> <dbl>
#> 1 1 1 45.34089
#> 2 2 1 45.55008
#> 3 3 1 45.25832
#> 4 4 1 46.29367
#> 5 5 1 46.01592
#> 6 6 1 43.82879
#> 7 7 1 41.72225
#> 8 8 1 44.55623
#> 9 9 1 43.45163
#> 10 10 1 43.36925
#> # ... with 639,990 more rows
Summarize the posterior predictions for each new data point. Go from 4000 rows per new point to just one row per new data point.
df_predictions <- tidy_linpreds %>%
group_by(Observation) %>%
summarise(median = median(Value),
ymin = quantile(Value, .025),
ymax = quantile(Value, .975)) %>%
left_join(new_data)
df_predictions
#> # A tibble: 160 × 6
#> Observation median ymin ymax sex height
#> <chr> <dbl> <dbl> <dbl> <chr> <dbl>
#> 1 1 46.26271 41.56672 50.55823 F 148.0000
#> 2 10 48.44859 44.61635 52.04461 F 151.4177
#> 3 100 69.23704 66.83380 71.73073 M 171.1772
#> 4 101 69.65821 67.32576 72.05880 M 171.6076
#> 5 102 70.09021 67.80754 72.40900 M 172.0380
#> 6 103 70.51185 68.29674 72.76241 M 172.4684
#> 7 104 70.93182 68.79742 73.11356 M 172.8987
#> 8 105 71.35920 69.29262 73.47099 M 173.3291
#> 9 106 71.78164 69.78014 73.83019 M 173.7595
#> 10 107 72.20361 70.23412 74.17940 M 174.1899
#> # ... with 150 more rows
p3 <- ggplot(davis) +
aes(x = height, y = weight, color = sex, group = sex) +
geom_point() +
geom_ribbon(aes(ymin = ymin, ymax = ymax, y = NULL, color = NULL),
data = df_predictions, fill = "grey60", alpha = .4) +
geom_line(aes(y = median), data = df_predictions, size = 1.25) +
theme(legend.position = c(0, 1), legend.justification = c(0, 1))
RStanARM does not like posterior_linpred(). Documentation says:
See also:
posterior_predictto draw from the posterior predictive distribution of the outcome, which is almost always preferable.
Why? Because we have samples of the error term sigma, and we're not using
them!
Same plan as before, but use posterior_predict() to get predictions for
observations (\( y_i \)), instead of estimates of the mean (\( \mu_i \)).
# ?posterior_predict
preds <- posterior_predict(stan_model, newdata = new_data)
dim(preds)
#> [1] 4000 160
tidy_preds <- preds %>%
# Rename columns from V1, V2, ... to 1, 2, ...
as_data_frame %>% setNames(seq_len(ncol(.))) %>%
tibble::rownames_to_column("Draw") %>%
tidyr::gather(Observation, Value, -Draw)
tidy_preds
#> # A tibble: 640,000 × 3
#> Draw Observation Value
#> <chr> <chr> <dbl>
#> 1 1 1 48.09020
#> 2 2 1 56.69339
#> 3 3 1 43.43079
#> 4 4 1 45.32523
#> 5 5 1 52.31126
#> 6 6 1 49.02409
#> 7 7 1 45.03329
#> 8 8 1 36.15589
#> 9 9 1 51.22175
#> 10 10 1 67.86039
#> # ... with 639,990 more rows
df_predictions <- tidy_preds %>%
group_by(Observation) %>%
summarise(median = median(Value),
ymin = quantile(Value, .025),
ymax = quantile(Value, .975)) %>%
left_join(new_data)
df_predictions
#> # A tibble: 160 × 6
#> Observation median ymin ymax sex height
#> <chr> <dbl> <dbl> <dbl> <chr> <dbl>
#> 1 1 46.20671 29.78864 63.33542 F 148.0000
#> 2 10 48.40533 32.28516 64.73203 F 151.4177
#> 3 100 69.18329 53.50192 84.44524 M 171.1772
#> 4 101 69.75366 53.64065 85.40446 M 171.6076
#> 5 102 69.82163 54.19379 85.86467 M 172.0380
#> 6 103 70.37902 54.75848 86.75807 M 172.4684
#> 7 104 71.01157 54.98491 86.40210 M 172.8987
#> 8 105 71.60191 55.75243 87.27941 M 173.3291
#> 9 106 71.55881 56.19622 87.82290 M 173.7595
#> 10 107 72.33715 55.81252 88.64755 M 174.1899
#> # ... with 150 more rows
p4 <- ggplot(davis) +
aes(x = height, y = weight, color = sex, group = sex) +
geom_point() +
geom_ribbon(aes(ymin = ymin, ymax = ymax, y = NULL, color = NULL),
data = df_predictions, fill = "grey60", alpha = .2) +
geom_line(aes(y = median),
data = df_predictions, size = 1.25) +
theme(legend.position = c(0, 1), legend.justification = c(0, 1))
Do we need the sex-height interaction?
Do we need the sex predictor at all?
Let's fit alternative models.
stan_model_no_inter <- update(stan_model, weight ~ sex + height)
stan_model_no_group <- update(stan_model, weight ~ height)
For classical regression models, we could compare models with AIC and BIC.
model_no_inter <- update(model, weight ~ sex + height)
model_no_group <- update(model, weight ~ height)
model_list <- list(
no_group = model_no_group,
no_inter = model_no_inter,
inter = model)
model_list %>% lapply(AIC) %>% unlist
#> no_group no_inter inter
#> 1421.572 1401.590 1399.692
model_list %>% lapply(BIC) %>% unlist
#> no_group no_inter inter
#> 1431.452 1414.764 1416.158
Or with anova().
anova(model_no_group, model_no_inter, model)
#> Analysis of Variance Table
#>
#> Model 1: weight ~ height
#> Model 2: weight ~ sex + height
#> Model 3: weight ~ height * sex
#> Res.Df RSS Df Sum of Sq F Pr(>F)
#> 1 197 14312
#> 2 196 12815 1 1496.72 23.2250 2.892e-06 ***
#> 3 195 12567 1 248.64 3.8582 0.05092 .
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
For RStanARM, we compare the models by using approximation leave-one-out cross-validation. The details are outside of the scope of this talk.
loo1 <- loo(stan_model_no_group)
loo2 <- loo(stan_model_no_inter)
loo3 <- loo(stan_model)
compare(loo1, loo2, loo3)
#> looic se_looic elpd_loo se_elpd_loo p_loo se_p_loo
#> loo3 1401.7 33.9 -700.8 16.9 6.7 2.1
#> loo2 1402.9 33.3 -701.5 16.6 5.3 1.9
#> loo1 1422.7 31.9 -711.3 15.9 4.2 1.7
looic), the model with the interaction is expected to perform the best on out-of-sample data.se_looic indicates considerable overlap with the model with no interaction term.normal(0, 5) gets rescaled to match each predictor
variable.scale() our measurements so that they are
scale free
(with mean = 0 and SD = 1).RStanARM will compare samples from the posterior and the prior.
posterior_vs_prior(stan_model)
# Select just parameters with "height" in the name
posterior_vs_prior(stan_model, regex_pars = "height")
# use the faceting options
comparison <- posterior_vs_prior(
stan_model,
group_by_parameter = TRUE,
facet_args = list(scales = "free_y"),
prob = .95)
comparison + theme_grey() + guides(color = FALSE)
We can also sample from the prior directly.
stan_model_prior <- stan_glm(
weight ~ height * sex,
data = davis,
family = gaussian,
prior = normal(0, 5),
prior_intercept = normal(0, 10),
# this line is new:
prior_PD = TRUE
)
Our initial prior says that an increase in height of 1 cm may predict a 0 +/- 7 kg increase in weight in women. This is a very generous interval for this effect!
summary(stan_model_prior, probs = c(.1, .5, .9))
#> stan_glm(formula = weight ~ height * sex, family = gaussian,
#> data = davis, prior = normal(0, 5), prior_intercept = normal(0,
#> 10), prior_PD = TRUE)
#>
#> Family: gaussian (identity)
#> Algorithm: sampling
#> Posterior sample size: 4000
#> Observations: 199
#>
#> Estimates:
#> mean sd 10% 50% 90%
#> (Intercept) 5.8 1208.5 -1531.0 10.7 1553.6
#> height -0.1 6.9 -8.9 0.0 8.8
#> sexM 1.9 126.0 -164.8 4.0 161.9
#> height:sexM 0.0 0.7 -0.9 0.0 0.9
#> sigma 22.5 335.0 0.9 5.2 31.3
#> mean_PPD -3.8 258.4 -335.8 -1.1 326.4
#> log-posterior -13.6 1.5 -15.6 -13.2 -11.9
#>
#> Diagnostics:
#> mcse Rhat n_eff
#> (Intercept) 19.1 1.0 4000
#> height 0.1 1.0 4000
#> sexM 2.0 1.0 4000
#> height:sexM 0.0 1.0 4000
#> sigma 5.3 1.0 3941
#> mean_PPD 4.1 1.0 4000
#> log-posterior 0.0 1.0 1843
#>
#> For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
# comment
We didn't use stan_lm() because those models have one prior for all the
parameters: R2.
lm_model <- stan_lm(
weight ~ height * sex,
data = davis,
# Prior: I asked Wolfram Alpha for height and weight
# correlation in adults and squared it
prior = R2(0.63))
summary(lm_model)
#> stan_lm(formula = weight ~ height * sex, data = davis, prior = R2(0.63))
#>
#> Family: gaussian (identity)
#> Algorithm: sampling
#> Posterior sample size: 4000
#> Observations: 199
#>
#> Estimates:
#> mean sd 2.5% 25% 50% 75% 97.5%
#> (Intercept) -44.5 21.9 -87.8 -59.3 -44.4 -30.1 -2.1
#> height 0.6 0.1 0.4 0.5 0.6 0.7 0.9
#> sexM -55.4 32.2 -118.6 -76.7 -54.7 -33.6 6.2
#> height:sexM 0.4 0.2 0.0 0.2 0.4 0.5 0.7
#> sigma 8.1 0.4 7.3 7.8 8.0 8.3 8.9
#> log-fit_ratio 0.0 0.0 -0.1 0.0 0.0 0.0 0.1
#> R2 0.6 0.0 0.6 0.6 0.6 0.7 0.7
#> mean_PPD 65.3 0.8 63.7 64.7 65.3 65.8 66.8
#> log-posterior -700.1 2.0 -704.9 -701.3 -699.8 -698.6 -697.2
#>
#> Diagnostics:
#> mcse Rhat n_eff
#> (Intercept) 0.5 1.0 1899
#> height 0.0 1.0 1903
#> sexM 0.7 1.0 2355
#> height:sexM 0.0 1.0 2298
#> sigma 0.0 1.0 2621
#> log-fit_ratio 0.0 1.0 2163
#> R2 0.0 1.0 2440
#> mean_PPD 0.0 1.0 4000
#> log-posterior 0.1 1.0 1108
#>
#> For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).