Nested Variance Components Analysis (Bayesian)

Introduction

Return to this later

        rm(list=ls())
        set.seed(874)
        startTime<-proc.time()
        library(knitr)
        options(width=120)
        opts_chunk$set(comment = "", warning = FALSE, message = FALSE,
                       echo = TRUE, tidy = FALSE, size="tiny",  cache=FALSE,
                       progress=TRUE,
                       cache.path = 'program_Cache/',
                       fig.path='figure/')
         
        knitr::knit_hooks$set(inline = function(x) {
          knitr:::format_sci(x, 'md')
        })

        where<-"home" #this is used in the sourced program 
 
        path <- ""  
     
        work<-    paste("X:/", path, sep = "")
        nonwork<- paste("~/X/", path, sep = "")
        if (where=="home") {wd<- nonwork} else {wd<-work}
        
        path2 <- ""
        
        work2<-    paste("X:/", path2, sep = "")
        nonwork2<- paste("~/X/", path2, sep = "")
        
        if (where=="home") {wd2<- nonwork2} else {wd2<-work2}
        
        work3<-    paste("X:/FUNCTIONS/R", sep = "")
        nonwork3<- paste("~/X/FUNCTIONS/R", sep = "")
        
        if (where=="home") {wd3<- nonwork3} else {wd3<-work3}
        setwd(wd)
        opts_knit$set(root.dir = wd)  ##THIS SETS YOUR WORKING DIRECTORY

        list.of.packages <- c("lme4","rethinking", "rstanarm")
        
        new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
        if(length(new.packages)) install.packages(new.packages)
        
        sapply(X = list.of.packages, require, character.only = TRUE)

        p3 <- function(x) {formatC(x, format="f", digits=3)}
        p4 <- function(x) {formatC(x, format="f", digits=4)}
        p2 <- function(x) {formatC(x, format="f", digits=2)}
         p1 <- function(x) {formatC(x, format="f", digits=0)}
        # p1 <- function(x) {print(formatC(x, format="f", digits=1),quote=FALSE)}
        # p2 <- function(x) {print(formatC(x, format="f", digits=2),quote=FALSE)}
        # p3 <- function(x) {print(formatC(x, format="f", digits=3),quote=FALSE)}
        # p4 <- function(x) {print(formatC(x, format="f", digits=4),quote=FALSE)}
        #perhaps help colour plot text based on loop count
        is.even <- function(x){ x %% 2 == 0 }

Modelling

   data(Glucose, package="VCA") # be specific, nlme also has a Glucose dataset, see 'data()'
    head(Glucose)

  day run result
1   1   1    242
2   1   1    246
3   1   2    245
4   1   2    246
5   2   1    243
6   2   1    242

    dd <- Glucose
  
    dd$run <- rep(1:(length(dd$run)/2),each=2)

    # frequentist
    f <- lme4::lmer(result ~ (1 | day/run), data = dd, 
                  REML = TRUE, na.action = "na.omit")      
        
        
        
   #data(eggs, package="faraway")

     # eggs$labtech <- factor(paste0(eggs$Lab,eggs$Technician))
    #  eggs$labtechsamp <- factor(paste0(eggs$Lab,eggs$Technician,eggs$Sample))
      
      levind1 <- as.numeric(dd$day)
      levind2 <- as.numeric(dd$run)

      sdscal <- sd(dd$result)
      
      eggdat <- list(Nobs=nrow(dd),
                     Nlev1=max(levind1),
                     Nlev2=max(levind2),
                     
                     y=dd$result,
                     levind1=levind1,
                     levind2=levind2,
                    
                     sdscal=sdscal)
      
      nested.stan <-"
      data {
        int<lower=0> Nobs;
        int<lower=0> Nlev1;
        int<lower=0> Nlev2;

        vector[Nobs] y;
        int<lower=1,upper=Nlev1> levind1[Nobs];
        int<lower=1,upper=Nlev2> levind2[Nobs];
      
        real<lower=0> sdscal;
      }
      parameters {
        real mu;
        real<lower=0> sigmalev1;
        real<lower=0> sigmalev2;

        real<lower=0> sigmaeps;
        
        vector[Nlev1] eta1;
        vector[Nlev2] eta2;
 
      }
      transformed parameters {
        vector[Nlev1] ran1;
        vector[Nlev2] ran2;
   
        vector[Nobs] yhat;
        
        ran1  = sigmalev1 * eta1;
        ran2  = sigmalev2 * eta2;
        
        
        for (i in 1:Nobs)
          yhat[i] <- mu+ran1[levind1[i]]+ran2[levind2[i]] ;
        
      }
      model {
        eta1 ~ normal(0, 1);
        eta2 ~ normal(0, 1);
      
        sigmalev1 ~ cauchy(0, 2.5*sdscal);
        sigmalev2 ~ cauchy(0, 2.5*sdscal);
         
        sigmaeps ~ cauchy(0, 2.5*sdscal);
        y ~ normal(yhat, sigmaeps);
      }"
      
      rt <- stanc( model_code="nested.stan", model_name="Nested")
      sm <- stan_model(stanc_ret = rt, verbose=FALSE)

C:/Users/User/Documents/R/win-library/3.2/StanHeaders/include/stan/math/rev/core/set_zero_all_adjoints.hpp:14:17: warning: 'void stan::math::set_zero_all_adjoints()' defined but not used [-Wunused-function]

      system.time(fit <- sampling(sm, data=eggdat))


SAMPLING FOR MODEL 'Nested' NOW (CHAIN 1).

Chain 1, Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 1, Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 1, Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 1, Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 1, Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 1, Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 1, Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 1, Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 1, Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 1, Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 1, Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 1, Iteration: 2000 / 2000 [100%]  (Sampling)
 Elapsed Time: 2.835 seconds (Warm-up)
               0.356 seconds (Sampling)
               3.191 seconds (Total)


SAMPLING FOR MODEL 'Nested' NOW (CHAIN 2).

Chain 2, Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 2, Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 2, Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 2, Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 2, Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 2, Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 2, Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 2, Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 2, Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 2, Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 2, Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 2, Iteration: 2000 / 2000 [100%]  (Sampling)
 Elapsed Time: 2.516 seconds (Warm-up)
               0.425 seconds (Sampling)
               2.941 seconds (Total)

[1] "The following numerical problems occured the indicated number of times after warmup on chain 2"
                                                                                              count
Exception thrown at line 46: normal_log: Location parameter[1] is 1.#INF, but must be finite!     9
[1] "When a numerical problem occurs, the Metropolis proposal gets rejected."
[1] "However, by design Metropolis proposals sometimes get rejected  even when there are no numerical problems."
[1] "Thus, if the number in the 'count' column is small,  do not ask about this message on stan-users."

SAMPLING FOR MODEL 'Nested' NOW (CHAIN 3).

Chain 3, Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 3, Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 3, Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 3, Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 3, Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 3, Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 3, Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 3, Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 3, Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 3, Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 3, Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 3, Iteration: 2000 / 2000 [100%]  (Sampling)
 Elapsed Time: 3.846 seconds (Warm-up)
               0.459 seconds (Sampling)
               4.305 seconds (Total)


SAMPLING FOR MODEL 'Nested' NOW (CHAIN 4).

Chain 4, Iteration:    1 / 2000 [  0%]  (Warmup)
Chain 4, Iteration:  200 / 2000 [ 10%]  (Warmup)
Chain 4, Iteration:  400 / 2000 [ 20%]  (Warmup)
Chain 4, Iteration:  600 / 2000 [ 30%]  (Warmup)
Chain 4, Iteration:  800 / 2000 [ 40%]  (Warmup)
Chain 4, Iteration: 1000 / 2000 [ 50%]  (Warmup)
Chain 4, Iteration: 1001 / 2000 [ 50%]  (Sampling)
Chain 4, Iteration: 1200 / 2000 [ 60%]  (Sampling)
Chain 4, Iteration: 1400 / 2000 [ 70%]  (Sampling)
Chain 4, Iteration: 1600 / 2000 [ 80%]  (Sampling)
Chain 4, Iteration: 1800 / 2000 [ 90%]  (Sampling)
Chain 4, Iteration: 2000 / 2000 [100%]  (Sampling)
 Elapsed Time: 2.269 seconds (Warm-up)
               0.401 seconds (Sampling)
               2.67 seconds (Total)

[1] "The following numerical problems occured the indicated number of times after warmup on chain 4"
                                                                                              count
Exception thrown at line 46: normal_log: Location parameter[1] is 1.#INF, but must be finite!    11
[1] "When a numerical problem occurs, the Metropolis proposal gets rejected."
[1] "However, by design Metropolis proposals sometimes get rejected  even when there are no numerical problems."
[1] "Thus, if the number in the 'count' column is small,  do not ask about this message on stan-users."

   user  system elapsed 
  13.40    0.00   13.43

      print(fit,pars=c("mu","sigmalev1","sigmalev2", "sigmaeps"))

Inference for Stan model: Nested.
4 chains, each with iter=2000; warmup=1000; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=4000.

            mean se_mean   sd   2.5%    25%    50%    75%  97.5% n_eff Rhat
mu        244.18    0.01 0.55 243.05 243.82 244.19 244.54 245.24  1975    1
sigmalev1   1.38    0.03 0.74   0.09   0.82   1.38   1.87   2.90   623    1
sigmalev2   1.58    0.03 0.75   0.12   1.05   1.62   2.12   2.95   476    1
sigmaeps    2.97    0.01 0.34   2.38   2.73   2.95   3.19   3.70  1212    1

Samples were drawn using NUTS(diag_e) at Thu Jul 21 22:57:22 2016.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
 The estimated Bayesian Fraction of Missing Information is a measure of
 the efficiency of the sampler with values close to 1 being ideal.
 For each chain, these estimates are
 0.6 0.6 0.6 0.6

References

Computing Environment

R version 3.2.2 (2015-08-14)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 8 x64 (build 9200)

locale:
[1] LC_COLLATE=English_United Kingdom.1252  LC_CTYPE=English_United Kingdom.1252   
[3] LC_MONETARY=English_United Kingdom.1252 LC_NUMERIC=C                           
[5] LC_TIME=English_United Kingdom.1252    

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] reshape_0.8.5        rms_4.5-0            SparseM_1.7          Hmisc_3.17-4         Formula_1.2-1       
 [6] survival_2.39-3      lattice_0.20-33      rstanarm_2.10.1      Rcpp_0.12.4          rethinking_1.58     
[11] rstan_2.10.1         StanHeaders_2.10.0-2 ggplot2_2.1.0        lme4_1.1-12          Matrix_1.2-2        
[16] knitr_1.12.3        

loaded via a namespace (and not attached):
 [1] jsonlite_0.9.19     splines_3.2.2       gtools_3.5.0        threejs_0.2.2       shiny_0.13.2       
 [6] stats4_3.2.2        latticeExtra_0.6-28 yaml_2.1.13         quantreg_5.21       chron_2.3-47       
[11] digest_0.6.9        RColorBrewer_1.1-2  minqa_1.2.4         colorspace_1.2-6    sandwich_2.3-4     
[16] htmltools_0.3.5     httpuv_1.3.3        plyr_1.8.3          dygraphs_0.9        xtable_1.8-2       
[21] mvtnorm_1.0-5       scales_0.4.0        MatrixModels_0.4-1  DT_0.1              TH.data_1.0-7      
[26] shinyjs_0.6         nnet_7.3-12         magrittr_1.5        mime_0.4            polspline_1.1.12   
[31] evaluate_0.9        nlme_3.1-127        MASS_7.3-45         xts_0.9-7           foreign_0.8-66     
[36] rsconnect_0.4.3     tools_3.2.2         loo_0.1.6           data.table_1.9.6    formatR_1.3        
[41] matrixStats_0.50.2  multcomp_1.4-5      stringr_1.0.0       munsell_0.4.3       cluster_2.0.3      
[46] grid_3.2.2          nloptr_1.0.4        htmlwidgets_0.6     miniUI_0.1.1        base64enc_0.1-3    
[51] rmarkdown_0.9.6     gtable_0.2.0        codetools_0.2-14    inline_0.3.14       markdown_0.7.7     
[56] reshape2_1.4.1      R6_2.1.2            gridExtra_2.2.1     zoo_1.7-13          shinystan_2.2.0    
[61] shinythemes_1.0.1   stringi_1.0-1       rpart_4.1-10        acepack_1.3-3.3     coda_0.18-1

[1] "~/X/"

This took 21.53 seconds to execute.

Nested Variance Components Analysis (Bayesian)

Eamonn O’Brien

21 July, 2016

Introduction

Modelling

References

Computing Environment