References

Assumptions

Load packages

library(magrittr)
library(XLConnect)
library(rstan)

Load data

datFile <- "nhefs_book.xls"
if(!file.exists(datFile)) {
    download.file(url = "http://cdn1.sph.harvard.edu/wp-content/uploads/sites/1268/2013/01/nhefs_book.xls",
                  destfile = datFile)
}
nhefs <- readWorksheetFromFile(datFile, sheet = 1)
## Complete cases only
nhefs <- nhefs[!is.na(nhefs$wt82_71),]
dim(nhefs)
## [1] 1679   61

Regular frequentist multiple linear regression

outcomeModel <- lm(wt82_71 ~ qsmk + sex + age + race + smokeyrs, data = nhefs)
summary(outcomeModel)
## 
## Call:
## lm(formula = wt82_71 ~ qsmk + sex + age + race + smokeyrs, data = nhefs)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -43.031  -3.957  -0.039   4.134  45.825 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.94751    0.81219  12.248  < 2e-16 ***
## qsmk         2.95305    0.42228   6.993 3.87e-12 ***
## sex         -0.09579    0.38366  -0.250   0.8029    
## age         -0.21764    0.03214  -6.772 1.75e-11 ***
## race         0.06203    0.53826   0.115   0.9083    
## smokeyrs     0.06257    0.03252   1.924   0.0545 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.476 on 1673 degrees of freedom
## Multiple R-squared:  0.0814, Adjusted R-squared:  0.07865 
## F-statistic: 29.65 on 5 and 1673 DF,  p-value: < 2.2e-16

Bayesian approach using Stan

## Create Stan data
dat <- list(N        = nrow(nhefs),
            p        = 6,
            wt82_71  = nhefs$wt82_71,
            qsmk     = nhefs$qsmk,
            sex      = nhefs$sex,
            age      = nhefs$age,
            race     = nhefs$race,
            smokeyrs = nhefs$smokeyrs)

## Load Stan file
fileName <- "./hernan_book_regression.stan"
stan_code <- readChar(fileName, file.info(fileName)$size)
cat(stan_code)
## data {
##   // Define variables in data
##   // Number of observations (an integer)
##   int<lower=0> N;
##   // Number of parameters
##   int<lower=0> p;
##   // Variables
##   real wt82_71[N];
##   int<lower=0>  qsmk[N];
##   int<lower=0>  sex[N];
##   real<lower=0> age[N];
##   int<lower=0>  race[N];
##   real<lower=0> smokeyrs[N];
## }
## 
## parameters {
##   // Define parameters to estimate
##   real beta[p];
##   
##   // standard deviation (a positive real number)
##   real<lower=0> sigma;
## }
## 
## transformed parameters  {
##   // Mean
##   real mu[N];
##   for (i in 1:N) {
##     mu[i] <- beta[1] + beta[2]*qsmk[i] + beta[3]*sex[i] + beta[4]*age[i] + beta[5]*race[i] + beta[6]*smokeyrs[i]; 
##   }
## }
## 
## model {
##   // Prior part of Bayesian inference (flat if unspecified)
## 
##   // Likelihood part of Bayesian inference
##     wt82_71 ~ normal(mu, sigma);  
## }
## Run Stan
resStan <- stan(model_code = stan_code, data = dat,
                chains = 3, iter = 3000, warmup = 500, thin = 10)

## Show traceplot
traceplot(resStan, pars = c("beta","sigma"), inc_warmup = TRUE)

Comparison

## Bayesian
print(resStan, pars = c("beta","sigma"))
## Inference for Stan model: stan_code.
## 3 chains, each with iter=3000; warmup=500; thin=10; 
## post-warmup draws per chain=250, total post-warmup draws=750.
## 
##          mean se_mean   sd  2.5%   25%   50%   75% 97.5% n_eff Rhat
## beta[1]  9.93    0.03 0.81  8.35  9.37  9.91 10.52 11.46   709    1
## beta[2]  2.95    0.02 0.42  2.12  2.67  2.94  3.24  3.76   750    1
## beta[3] -0.11    0.01 0.38 -0.84 -0.37 -0.11  0.13  0.65   742    1
## beta[4] -0.22    0.00 0.03 -0.28 -0.24 -0.22 -0.19 -0.16   625    1
## beta[5]  0.07    0.02 0.55 -1.03 -0.25  0.05  0.40  1.27   732    1
## beta[6]  0.06    0.00 0.03  0.00  0.04  0.06  0.08  0.13   750    1
## sigma    7.48    0.00 0.13  7.24  7.39  7.48  7.57  7.76   750    1
## 
## Samples were drawn using NUTS(diag_e) at Mon Feb 23 17:50:03 2015.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
## Frequentist
tableone::ShowRegTable(outcomeModel, exp = FALSE)
##             beta [confint]       p     
## (Intercept)  9.95 [8.35, 11.54]  <0.001
## qsmk         2.95 [2.12, 3.78]   <0.001
## sex         -0.10 [-0.85, 0.66]   0.803
## age         -0.22 [-0.28, -0.15] <0.001
## race         0.06 [-0.99, 1.12]   0.908
## smokeyrs     0.06 [-0.00, 0.13]   0.055
summary(outcomeModel)$sigma
## [1] 7.475789