Variance components (Bayesian & frequentist)

Introduction

Return to this later

        rm(list=ls())
        set.seed(874)
        startTime<-proc.time()
        library(knitr)
        options(width=120)
        opts_chunk$set(comment = "", warning = FALSE, message = FALSE,
                       echo = TRUE, tidy = FALSE, size="tiny",  cache=FALSE,
                       progress=TRUE,
                       cache.path = 'program_Cache/',
                       fig.path='figure/')
         
        knitr::knit_hooks$set(inline = function(x) {
          knitr:::format_sci(x, 'md')
        })

        where<-"home" #this is used in the sourced program 
 
        path <- ""  
     
        work<-    paste("X:/", path, sep = "")
        nonwork<- paste("~/X/", path, sep = "")
        if (where=="home") {wd<- nonwork} else {wd<-work}
        
        path2 <- ""
        
        work2<-    paste("X:/", path2, sep = "")
        nonwork2<- paste("~/X/", path2, sep = "")
        
        if (where=="home") {wd2<- nonwork2} else {wd2<-work2}
        
        work3<-    paste("X:/FUNCTIONS/R", sep = "")
        nonwork3<- paste("~/X/FUNCTIONS/R", sep = "")
        
        if (where=="home") {wd3<- nonwork3} else {wd3<-work3}
        setwd(wd)
        opts_knit$set(root.dir = wd)  ##THIS SETS YOUR WORKING DIRECTORY

        list.of.packages <- c("lme4","rethinking", "rstanarm")
        
        new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
        if(length(new.packages)) install.packages(new.packages)
        
        sapply(X = list.of.packages, require, character.only = TRUE)

        p3 <- function(x) {formatC(x, format="f", digits=3)}
        p4 <- function(x) {formatC(x, format="f", digits=4)}
        p2 <- function(x) {formatC(x, format="f", digits=2)}
         p1 <- function(x) {formatC(x, format="f", digits=0)}
        # p1 <- function(x) {print(formatC(x, format="f", digits=1),quote=FALSE)}
        # p2 <- function(x) {print(formatC(x, format="f", digits=2),quote=FALSE)}
        # p3 <- function(x) {print(formatC(x, format="f", digits=3),quote=FALSE)}
        # p4 <- function(x) {print(formatC(x, format="f", digits=4),quote=FALSE)}
        #perhaps help colour plot text based on loop count
        is.even <- function(x){ x %% 2 == 0 }

Modelling

    data(Glucose, package="VCA") # be specific, nlme also has a Glucose dataset, see 'data()'
    head(Glucose)
  
    dd <- Glucose
  
    dd$run <- rep(1:(length(dd$run)/2),each=2)

    # frequentist
    f <- lme4::lmer(result ~ (1 | day/run), data = dd, 
                  REML = TRUE, na.action = "na.omit")

    # Bayesian rethinking
    # bundle data and some indtructions  
    require(rethinking)
    
    data_list <- list( 
      y = dd$result, 
      day = dd$day, 
      run = rep(1:(length(dd$run)/2),each=2),
      start=list(sigma=sd(dd$result), 
                 sigma_day=sd(dd$result), 
                 sigma_run=sd(dd$result), 
                 a =mean(dd$result) ))
    
    
   # mux <- mean(dd$result)
    # model
    f2 <- alist(
      ( y ~ dnorm(mu,sigma)),
    
      mu <- a + bN[day] + bR[run] ,
      
      a ~ dnorm(244 ,10),
      bN[day] ~ dnorm(0, sigma_day ),
      bR[run] ~ dnorm(0, sigma_run ),
      sigma_run ~ dcauchy(0,1), 
      sigma_day ~ dcauchy(0,1), 
      sigma ~ dcauchy(0,1)
    )
 
    # execute
    rethink1 <- map2stan( f2 , 
                   data=data_list , 
                   iter=100000, warmup=2000, chains=8  )

 

    # Bayesian Rstan 
    require(rstanarm)
    ITER <- 500L
    CHAINS <- 2L
    CORES <- 1L
    SEED <- 12345

    rstan1 <- stan_lmer(result ~ (1 | day/run), data = dd, 
                        prior=cauchy(0,1), #default 2.5
                        prior_intercept = normal( 0,NULL),# default 10
                        prior_covariance = decov(shape =1, scale = 1),
                        adapt_delta = 0.999, chains = CHAINS, cores = 1,
                        seed = SEED)

Model estimation

    print(f, digits=4)

Linear mixed model fit by REML ['lmerMod']
Formula: result ~ (1 | day/run)
   Data: dd
REML criterion at convergence: 422.7308
Random effects:
 Groups   Name        Std.Dev.
 run:day  (Intercept) 1.754   
 day      (Intercept) 1.399   
 Residual             2.811   
Number of obs: 80, groups:  run:day, 40; day, 20
Fixed Effects:
(Intercept)  
      244.2

    precis(rethink1, depth = 1, prob = 0.95, digits = 3)

             Mean StdDev lower 0.95 upper 0.95 n_eff  Rhat
a         244.202  0.515    243.180    245.210 30831 1.000
sigma_run   1.581  0.680      0.226      2.778  5532 1.006
sigma_day   1.137  0.635      0.099      2.290 13249 1.001
sigma       2.931  0.333      2.306      3.589 49520 1.002

    print(rstan1, digits=3)

stan_lmer(formula = result ~ (1 | day/run), data = dd, chains = CHAINS, 
    cores = 1, seed = SEED, prior = cauchy(0, 1), prior_intercept = normal(0, 
        NULL), prior_covariance = decov(shape = 1, scale = 1), 
    adapt_delta = 0.999)

Estimates:
            Median  MAD_SD 
(Intercept) 244.176   0.541
sigma         2.890   0.328

Error terms:
 Groups   Name        Std.Dev.
 run:day  (Intercept) 1.634   
 day      (Intercept) 1.226   
 Residual             2.890   
Num. levels: run:day 40, day 20 

Sample avg. posterior predictive 
distribution of y (X = xbar):
         Median  MAD_SD 
mean_PPD 244.190   0.443

#     posterior_interval(test, pars = c("(Intercept)", "sigma" ))
# 
# 
#     stan_trac(test)
#     stan_hist(test)

    #launch_shinystan(post2)

#     y_rep <- posterior_predict(post2)
#     dim(y_rep)

      

# mat = as.matrix(test)
# 
# # get the random effect columns
# group_cols <- grep(" day:", colnames(mat), value = T)
# group_posteriors <- mat[, group_cols]
# sqrt(median(apply(group_posteriors,2, var)))
# 
# group_cols <- grep(":day:", colnames(mat), value = T)
# group_posteriors <- mat[, group_cols]
# sqrt(mean(apply(group_posteriors,1, var)))
# 
# group_cols <- grep("sigma", colnames(mat), value = T)
# group_posteriors <- mat[, group_cols]
# (median(group_posteriors))
# 
# 
# 
# 
# 
# ##########
# # extract posterior samples
# mat = as.matrix(test)
# 
# # get the random effect columns
# group_cols <- grep(":day", colnames(mat), value = T) #run
# group_posteriors <- mat[, group_cols]
# 
# # example variable slope term: b[var1_scaled donor:donor1]
# variable_names <- unique(gsub("b\\[(.*)\\s.*", "\\1", colnames(group_posteriors)))
# 
# # for each variable, compute its SD at each MCMC iteration
# var_group_summary <- sapply(variable_names, function(vname){
#   var_group_posteriors <- group_posteriors[, grep(vname, colnames(group_posteriors), value = T)]
#   group_sd <- sapply(1:nrow(var_group_posteriors), function(crow){
#     out <- sd(var_group_posteriors[crow, ])
#     return(out)
#   })
#   group_summary <- c(mean = mean(group_sd), quantile(x = group_sd, probs = c(0.5, 0.025, 0.975)))
#   return(group_summary)
# })
# return(var_group_summary)
# }
# ##########







# coef(test)

# cv <- loo(post2)
# 
# par(mfrow = 1:2, mar = c(5,3.8,1,0) + 0.1, las = 3)
# plot(cv, label_points = TRUE)




  
# set.seed(666)
# Omega <- rbind(
#   c(1, 0.3, 0.2),
#   c(0.3, 1, 0.1),
#   c(0.2, 0.1, 1)
# )
# sigma <- c(1, 2, 3)
# Sigma <- diag(sigma) %*% Omega %*% diag(sigma)
# N <- 100 
# y <- mvtnorm::rmvnorm(N, c(0,0,0), Sigma)
# 
# apply(y, 2, mean)
# apply(y, 2, sd)

References

https://groups.google.com/forum/#!topic/stan-users/F23kBsgqGcY
https://github.com/stan-dev/stan/wiki/Prior-Choice-Recommendations
http://stla.github.io/stlapblog/posts/StanLKJprior.html
http://www.psychstatistics.com/2014/12/27/d-lkj-priors/
stan reference manual
https://cran.r-project.org/web/packages/rstanarm/vignettes/rstanarm.html # note-on-prior-beliefs-and-default-priors: The default priors in rstanarm are designed to be weakly informative, by which we mean that they avoid placing unwarranted prior weight on nonsensical parameter values and provide some regularization to avoid overfitting, but also do allow for extreme values if warranted by the data.
http://rstudio-pubs-static.s3.amazonaws.com/170731_a498f35a82f74e009b6d8def040e422f.html#extracting-posterior-samples
https://github.com/stan-dev/rstanarm/wiki/Prior-distributions

Computing Environment

R version 3.2.2 (2015-08-14)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 8 x64 (build 9200)

locale:
[1] LC_COLLATE=English_United Kingdom.1252  LC_CTYPE=English_United Kingdom.1252   
[3] LC_MONETARY=English_United Kingdom.1252 LC_NUMERIC=C                           
[5] LC_TIME=English_United Kingdom.1252    

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] reshape_0.8.5        rms_4.5-0            SparseM_1.7          Hmisc_3.17-4         Formula_1.2-1       
 [6] survival_2.39-3      lattice_0.20-33      rstanarm_2.10.1      Rcpp_0.12.4          rethinking_1.58     
[11] rstan_2.10.1         StanHeaders_2.10.0-2 ggplot2_2.1.0        lme4_1.1-12          Matrix_1.2-2        
[16] knitr_1.12.3        

loaded via a namespace (and not attached):
 [1] jsonlite_0.9.19     splines_3.2.2       gtools_3.5.0        threejs_0.2.2       shiny_0.13.2       
 [6] stats4_3.2.2        latticeExtra_0.6-28 yaml_2.1.13         quantreg_5.21       chron_2.3-47       
[11] digest_0.6.9        RColorBrewer_1.1-2  minqa_1.2.4         colorspace_1.2-6    sandwich_2.3-4     
[16] htmltools_0.3.5     httpuv_1.3.3        plyr_1.8.3          dygraphs_0.9        xtable_1.8-2       
[21] mvtnorm_1.0-5       scales_0.4.0        MatrixModels_0.4-1  DT_0.1              TH.data_1.0-7      
[26] shinyjs_0.6         nnet_7.3-12         magrittr_1.5        mime_0.4            polspline_1.1.12   
[31] evaluate_0.9        nlme_3.1-127        MASS_7.3-45         xts_0.9-7           foreign_0.8-66     
[36] rsconnect_0.4.3     tools_3.2.2         loo_0.1.6           data.table_1.9.6    formatR_1.3        
[41] matrixStats_0.50.2  multcomp_1.4-5      stringr_1.0.0       munsell_0.4.3       cluster_2.0.3      
[46] grid_3.2.2          nloptr_1.0.4        htmlwidgets_0.6     miniUI_0.1.1        base64enc_0.1-3    
[51] rmarkdown_0.9.6     gtable_0.2.0        codetools_0.2-14    inline_0.3.14       markdown_0.7.7     
[56] reshape2_1.4.1      R6_2.1.2            gridExtra_2.2.1     zoo_1.7-13          shinystan_2.2.0    
[61] shinythemes_1.0.1   stringi_1.0-1       rpart_4.1-10        acepack_1.3-3.3     coda_0.18-1

[1] "~/X/"

This took 786.2 seconds to execute.

Variance components (Bayesian & frequentist)

Eamonn O’Brien

21 July, 2016

Introduction

Modelling

Model estimation

References

Computing Environment