References
- Ch4. Matsuura. Bayesian Statistical Modeling with Stan and R.
Load packages
library(tidyverse)
library(rstan)
library(ggmcmc)
Create data
data1 <- tribble(
~X,~Y,
24,472,
24,403,
26,454,
32,575,
33,546,
35,781,
38,750,
40,601,
40,814,
43,792,
43,745,
44,837,
48,868,
52,988,
56,1092,
56,1007,
57,1233,
58,1202,
59,1123,
59,1314)
data1
## # A tibble: 20 x 2
## X Y
## <dbl> <dbl>
## 1 24 472
## 2 24 403
## 3 26 454
## 4 32 575
## 5 33 546
## 6 35 781
## 7 38 750
## 8 40 601
## 9 40 814
## 10 43 792
## 11 43 745
## 12 44 837
## 13 48 868
## 14 52 988
## 15 56 1092
## 16 56 1007
## 17 57 1233
## 18 58 1202
## 19 59 1123
## 20 59 1314
Check data
ggplot(data = data1, mapping = aes(x = X, y = Y)) +
geom_point() +
geom_smooth(method = "lm") +
theme_bw() + theme(legend.key = element_blank())

lm fit for sanity check
lm1 <- lm(formula = Y ~ X,
data = data1)
tableone::ShowRegTable(lm1, exp = FALSE)
## coef [confint] p
## (Intercept) -119.70 [-262.87, 23.48] 0.096
## X 21.90 [18.71, 25.09] <0.001
Stan fit
## Need to create a list to pass to Stan
data_list <- list(N = nrow(data1),
X = data1$X,
Y = data1$Y)
stan1 <- stan(file = "./salary.stan", data = data_list)
## In file included from file3d83bca6b7b.cpp:8:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/src/stan/model/model_header.hpp:4:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/stan/math.hpp:4:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/stan/math/rev/mat.hpp:4:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/stan/math/rev/core.hpp:12:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/stan/math/rev/core/gevv_vvv_vari.hpp:5:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/stan/math/rev/core/var.hpp:7:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/BH/include/boost/math/tools/config.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/BH/include/boost/config.hpp:39:
## /Library/Frameworks/R.framework/Versions/3.4/Resources/library/BH/include/boost/config/compiler/clang.hpp:196:11: warning: 'BOOST_NO_CXX11_RVALUE_REFERENCES' macro redefined [-Wmacro-redefined]
## # define BOOST_NO_CXX11_RVALUE_REFERENCES
## ^
## <command line>:6:9: note: previous definition is here
## #define BOOST_NO_CXX11_RVALUE_REFERENCES 1
## ^
## In file included from file3d83bca6b7b.cpp:8:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/src/stan/model/model_header.hpp:4:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/stan/math.hpp:4:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/stan/math/rev/mat.hpp:4:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/stan/math/rev/core.hpp:42:
## /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/stan/math/rev/core/set_zero_all_adjoints.hpp:14:17: warning: unused function 'set_zero_all_adjoints' [-Wunused-function]
## static void set_zero_all_adjoints() {
## ^
## In file included from file3d83bca6b7b.cpp:8:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/src/stan/model/model_header.hpp:4:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/stan/math.hpp:4:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/stan/math/rev/mat.hpp:4:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/stan/math/rev/core.hpp:43:
## /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/stan/math/rev/core/set_zero_all_adjoints_nested.hpp:17:17: warning: 'static' function 'set_zero_all_adjoints_nested' declared in header file should be declared 'static inline' [-Wunneeded-internal-declaration]
## static void set_zero_all_adjoints_nested() {
## ^
## In file included from file3d83bca6b7b.cpp:8:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/src/stan/model/model_header.hpp:4:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/stan/math.hpp:4:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/stan/math/rev/mat.hpp:12:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/stan/math/prim/mat.hpp:58:
## /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/autocorrelation.hpp:17:14: warning: function 'fft_next_good_size' is not needed and will not be emitted [-Wunneeded-internal-declaration]
## size_t fft_next_good_size(size_t N) {
## ^
## In file included from file3d83bca6b7b.cpp:8:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/src/stan/model/model_header.hpp:4:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/stan/math.hpp:4:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/stan/math/rev/mat.hpp:12:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/stan/math/prim/mat.hpp:298:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/stan/math/prim/arr.hpp:38:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/StanHeaders/include/stan/math/prim/arr/functor/integrate_ode_rk45.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/BH/include/boost/numeric/odeint.hpp:61:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/BH/include/boost/numeric/odeint/util/multi_array_adaption.hpp:29:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/BH/include/boost/multi_array.hpp:21:
## In file included from /Library/Frameworks/R.framework/Versions/3.4/Resources/library/BH/include/boost/multi_array/base.hpp:28:
## /Library/Frameworks/R.framework/Versions/3.4/Resources/library/BH/include/boost/multi_array/concept_checks.hpp:42:43: warning: unused typedef 'index_range' [-Wunused-local-typedef]
## typedef typename Array::index_range index_range;
## ^
## /Library/Frameworks/R.framework/Versions/3.4/Resources/library/BH/include/boost/multi_array/concept_checks.hpp:43:37: warning: unused typedef 'index' [-Wunused-local-typedef]
## typedef typename Array::index index;
## ^
## /Library/Frameworks/R.framework/Versions/3.4/Resources/library/BH/include/boost/multi_array/concept_checks.hpp:53:43: warning: unused typedef 'index_range' [-Wunused-local-typedef]
## typedef typename Array::index_range index_range;
## ^
## /Library/Frameworks/R.framework/Versions/3.4/Resources/library/BH/include/boost/multi_array/concept_checks.hpp:54:37: warning: unused typedef 'index' [-Wunused-local-typedef]
## typedef typename Array::index index;
## ^
## 8 warnings generated.
##
## SAMPLING FOR MODEL 'salary' NOW (CHAIN 1).
##
## Gradient evaluation took 4.4e-05 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.44 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 2000 [ 0%] (Warmup)
## Iteration: 200 / 2000 [ 10%] (Warmup)
## Iteration: 400 / 2000 [ 20%] (Warmup)
## Iteration: 600 / 2000 [ 30%] (Warmup)
## Iteration: 800 / 2000 [ 40%] (Warmup)
## Iteration: 1000 / 2000 [ 50%] (Warmup)
## Iteration: 1001 / 2000 [ 50%] (Sampling)
## Iteration: 1200 / 2000 [ 60%] (Sampling)
## Iteration: 1400 / 2000 [ 70%] (Sampling)
## Iteration: 1600 / 2000 [ 80%] (Sampling)
## Iteration: 1800 / 2000 [ 90%] (Sampling)
## Iteration: 2000 / 2000 [100%] (Sampling)
##
## Elapsed Time: 0.515523 seconds (Warm-up)
## 0.321766 seconds (Sampling)
## 0.837289 seconds (Total)
##
##
## SAMPLING FOR MODEL 'salary' NOW (CHAIN 2).
##
## Gradient evaluation took 1.9e-05 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.19 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 2000 [ 0%] (Warmup)
## Iteration: 200 / 2000 [ 10%] (Warmup)
## Iteration: 400 / 2000 [ 20%] (Warmup)
## Iteration: 600 / 2000 [ 30%] (Warmup)
## Iteration: 800 / 2000 [ 40%] (Warmup)
## Iteration: 1000 / 2000 [ 50%] (Warmup)
## Iteration: 1001 / 2000 [ 50%] (Sampling)
## Iteration: 1200 / 2000 [ 60%] (Sampling)
## Iteration: 1400 / 2000 [ 70%] (Sampling)
## Iteration: 1600 / 2000 [ 80%] (Sampling)
## Iteration: 1800 / 2000 [ 90%] (Sampling)
## Iteration: 2000 / 2000 [100%] (Sampling)
##
## Elapsed Time: 0.490234 seconds (Warm-up)
## 0.212108 seconds (Sampling)
## 0.702342 seconds (Total)
##
##
## SAMPLING FOR MODEL 'salary' NOW (CHAIN 3).
##
## Gradient evaluation took 1.3e-05 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.13 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 2000 [ 0%] (Warmup)
## Iteration: 200 / 2000 [ 10%] (Warmup)
## Iteration: 400 / 2000 [ 20%] (Warmup)
## Iteration: 600 / 2000 [ 30%] (Warmup)
## Iteration: 800 / 2000 [ 40%] (Warmup)
## Iteration: 1000 / 2000 [ 50%] (Warmup)
## Iteration: 1001 / 2000 [ 50%] (Sampling)
## Iteration: 1200 / 2000 [ 60%] (Sampling)
## Iteration: 1400 / 2000 [ 70%] (Sampling)
## Iteration: 1600 / 2000 [ 80%] (Sampling)
## Iteration: 1800 / 2000 [ 90%] (Sampling)
## Iteration: 2000 / 2000 [100%] (Sampling)
##
## Elapsed Time: 0.454587 seconds (Warm-up)
## 0.341104 seconds (Sampling)
## 0.795691 seconds (Total)
##
##
## SAMPLING FOR MODEL 'salary' NOW (CHAIN 4).
##
## Gradient evaluation took 2.7e-05 seconds
## 1000 transitions using 10 leapfrog steps per transition would take 0.27 seconds.
## Adjust your expectations accordingly!
##
##
## Iteration: 1 / 2000 [ 0%] (Warmup)
## Iteration: 200 / 2000 [ 10%] (Warmup)
## Iteration: 400 / 2000 [ 20%] (Warmup)
## Iteration: 600 / 2000 [ 30%] (Warmup)
## Iteration: 800 / 2000 [ 40%] (Warmup)
## Iteration: 1000 / 2000 [ 50%] (Warmup)
## Iteration: 1001 / 2000 [ 50%] (Sampling)
## Iteration: 1200 / 2000 [ 60%] (Sampling)
## Iteration: 1400 / 2000 [ 70%] (Sampling)
## Iteration: 1600 / 2000 [ 80%] (Sampling)
## Iteration: 1800 / 2000 [ 90%] (Sampling)
## Iteration: 2000 / 2000 [100%] (Sampling)
##
## Elapsed Time: 0.486255 seconds (Warm-up)
## 0.201203 seconds (Sampling)
## 0.687458 seconds (Total)
Model examination
## Model file
get_stanmodel(stan1)
## S4 class stanmodel 'salary' coded as follows:
## data {
## int N;
## real X[N];
## real Y[N];
## }
##
##
## parameters {
##
## /* Parameters of interests */
## real beta0;
## real beta1;
## /* Nuisance parameter */
## real<lower=0> sigma;
##
## }
##
##
## model {
##
## /* Likelihood */
## for (i in 1:N) {
## Y[i] ~ normal(beta0 + beta1 * X[i], sigma);
## }
##
## /* Non-informative prior implied */
## }
## Computation time
get_elapsed_time(stan1)
## warmup sample
## chain:1 0.515523 0.321766
## chain:2 0.490234 0.212108
## chain:3 0.454587 0.341104
## chain:4 0.486255 0.201203
## Initial values for each chain
get_inits(stan1)
## [[1]]
## [[1]]$beta0
## [1] 1.230078
##
## [[1]]$beta1
## [1] -1.826941
##
## [[1]]$sigma
## [1] 1.172365
##
##
## [[2]]
## [[2]]$beta0
## [1] -1.925283
##
## [[2]]$beta1
## [1] -0.4918617
##
## [[2]]$sigma
## [1] 3.384842
##
##
## [[3]]
## [[3]]$beta0
## [1] -1.638182
##
## [[3]]$beta1
## [1] -1.064468
##
## [[3]]$sigma
## [1] 0.8219448
##
##
## [[4]]
## [[4]]$beta0
## [1] 1.331988
##
## [[4]]$beta1
## [1] 1.829562
##
## [[4]]$sigma
## [1] 2.862171
## Seed for each chain
get_seed(stan1)
## [1] 1970907798
## Results
stan1
## Inference for Stan model: salary.
## 4 chains, each with iter=2000; warmup=1000; thin=1;
## post-warmup draws per chain=1000, total post-warmup draws=4000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## beta0 -118.04 1.97 72.75 -258.25 -165.23 -118.36 -72.16 26.49 1363 1
## beta1 21.86 0.04 1.62 18.59 20.84 21.87 22.89 24.99 1346 1
## sigma 84.53 0.39 15.88 60.32 73.48 82.33 92.68 121.54 1653 1
## lp__ -93.64 0.04 1.33 -97.10 -94.23 -93.28 -92.68 -92.14 1202 1
##
## Samples were drawn using NUTS(diag_e) at Tue Jul 4 00:37:25 2017.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
## Summary
summary(stan1)
## $summary
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## beta0 -118.03557 1.97028572 72.748035 -258.25213 -165.23260 -118.35904 -72.15879 26.49050 1363.277 1.000887
## beta1 21.85854 0.04411657 1.618466 18.58960 20.83599 21.86978 22.89211 24.99071 1345.872 1.000892
## sigma 84.52520 0.39050671 15.875316 60.32051 73.48387 82.32750 92.68133 121.54393 1652.676 1.002584
## lp__ -93.64000 0.03847493 1.333885 -97.09937 -94.22646 -93.28080 -92.67910 -92.13741 1201.935 1.001915
##
## $c_summary
## , , chains = chain:1
##
## stats
## parameter mean sd 2.5% 25% 50% 75% 97.5%
## beta0 -117.89551 71.565957 -251.71470 -165.08179 -119.44672 -70.82849 18.38938
## beta1 21.83986 1.594733 18.70088 20.77708 21.86299 22.91317 24.99195
## sigma 84.60532 15.794519 60.18423 73.72317 82.29529 92.75919 121.23665
## lp__ -93.62147 1.345125 -97.01983 -94.20297 -93.26760 -92.65686 -92.12740
##
## , , chains = chain:2
##
## stats
## parameter mean sd 2.5% 25% 50% 75% 97.5%
## beta0 -118.04036 73.516152 -263.51088 -165.96921 -115.03016 -68.40623 19.50686
## beta1 21.86239 1.642312 18.62426 20.83497 21.81397 22.89021 25.08177
## sigma 86.22674 17.474431 60.57548 73.73674 83.48702 96.27961 125.36751
## lp__ -93.75089 1.404648 -97.33940 -94.43465 -93.40912 -92.68607 -92.14880
##
## , , chains = chain:3
##
## stats
## parameter mean sd 2.5% 25% 50% 75% 97.5%
## beta0 -123.11789 70.417550 -265.14200 -165.85381 -125.70109 -77.55371 20.14472
## beta1 21.97516 1.559524 18.73129 21.03883 22.06604 22.91334 25.05479
## sigma 84.02194 14.550677 61.47518 74.07790 82.61394 92.16271 117.89610
## lp__ -93.50731 1.225361 -96.76198 -93.94542 -93.21363 -92.64446 -92.12535
##
## , , chains = chain:4
##
## stats
## parameter mean sd 2.5% 25% 50% 75% 97.5%
## beta0 -113.08853 75.164377 -250.61689 -164.79349 -114.99048 -70.88472 53.12659
## beta1 21.75676 1.669962 18.26603 20.75479 21.76324 22.84880 24.84600
## sigma 83.24679 15.411707 59.16397 72.52632 81.18587 90.84440 119.26291
## lp__ -93.68032 1.344070 -97.08792 -94.24617 -93.30376 -92.71408 -92.17411
## Plot mean and variability of posterior
plot(stan1)

## Trace
traceplot(stan1)

ggmcmc
## ggmcmc do them all
ggmcmc(ggs(stan1, inc_warmup = TRUE, stan_include_auxiliar = TRUE), file = NULL)
## Plotting histograms

## Plotting density plots

## Plotting traceplots

## Plotting running means

## Plotting comparison of partial and full chain

## Plotting autocorrelation plots

## Plotting crosscorrelation plot

## Plotting Potential Scale Reduction Factors

## Plotting Geweke Diagnostic

## Plotting caterpillar plot
## Time taken to generate the report: 40 seconds.