Bayesian Analysis on visit length from trial 1

knitr::opts_chunk$set(echo = TRUE)
setwd("C:/Users/marti/OneDrive/Documents/job/bayesian inference/Minicourse/")
rm(list = ls())

library(tidyverse)

## -- Attaching packages --------------------------------------------------------------------------- tidyverse 1.2.1 --

## v ggplot2 3.1.0       v purrr   0.3.2  
## v tibble  2.1.1       v dplyr   0.8.0.1
## v tidyr   0.8.3       v stringr 1.4.0  
## v readr   1.3.1       v forcats 0.4.0

## -- Conflicts ------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(coda)
library(mcmcplots)
library(rstan)

## Loading required package: StanHeaders

## rstan (Version 2.19.2, GitRev: 2e1f913d3ca3)

## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)

## For improved execution time, we recommend calling
## Sys.setenv(LOCAL_CPPFLAGS = '-march=native')
## although this causes Stan to throw an error on a few processors.

## 
## Attaching package: 'rstan'

## The following object is masked from 'package:coda':
## 
##     traceplot

## The following object is masked from 'package:tidyr':
## 
##     extract

library(ggmcmc)

options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)

Estimating variance components on visit length at the Feeder using 3273 records from trial 1

1. Model Eartag

1.1. Check autorrelation, effective sample size, traceplot

ggs_autocorrelation(ggs(outp1)%>%filter(Parameter%in%c("Loc1_t","Loc2_t",
 "Median Weight")))

ggs_autocorrelation(ggs(outp1)%>%filter(Parameter%in%c("var_eartag","var_error","prp_var_eartag","prp_var_error")))

autocorr.diag(outp1)

##              Loc1_t      Loc2_t Median Weight var_eartag     var_error
## Lag 0   1.000000000  1.00000000   1.000000000 1.00000000  1.0000000000
## Lag 1   0.366273241  0.34114005   0.199055425 0.21037059  0.1861834339
## Lag 5   0.132184780  0.10568983   0.001137396 0.06191106 -0.0106205242
## Lag 10  0.082412253  0.05061742   0.008546769 0.03278978 -0.0101749500
## Lag 50 -0.008976241 -0.01528574  -0.003060866 0.02134821  0.0001855811
##        prp_var_eartag prp_var_error       lp__
## Lag 0      1.00000000    1.00000000 1.00000000
## Lag 1      0.19194577    0.19194577 0.47492939
## Lag 5      0.06261349    0.06261349 0.05775022
## Lag 10     0.03359399    0.03359399 0.02487429
## Lag 50     0.01798682    0.01798682 0.02927602

effectiveSize(outp1)

##         Loc1_t         Loc2_t  Median Weight     var_eartag      var_error 
##       1676.642       2202.544       5343.169       3360.375       5487.953 
## prp_var_eartag  prp_var_error           lp__ 
##       3415.842       3415.842       2348.186

geweke.diag(outp1)

## [[1]]
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##         Loc1_t         Loc2_t  Median Weight     var_eartag      var_error 
##        0.66932        0.63736       -0.07481        1.08593        1.53640 
## prp_var_eartag  prp_var_error           lp__ 
##        1.19137       -1.19137       -1.30622

#traplot(outp1, col =c("red1","blue4","purple3"))
#denplot(outp1, col =c("red1","blue4","purple3"))
traplot(outp1, col =c("blue4"))

denplot(outp1, col =c("purple3"))

1.2. Summary Posterior Distribution

summary(outp1)

## 
## Iterations = 2001:10000
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 8000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                      Mean      SD  Naive SE Time-series SE
## Loc1_t            30.9922 2.28958 0.0255983      0.0559160
## Loc2_t            27.9963 2.24944 0.0251495      0.0479305
## Median Weight     -0.1820 0.02173 0.0002430      0.0002973
## var_eartag        14.9460 5.30517 0.0593136      0.0915178
## var_error         62.8460 1.55793 0.0174181      0.0210301
## prp_var_eartag     0.1887 0.05147 0.0005755      0.0008807
## prp_var_error      0.8113 0.05147 0.0005755      0.0008807
## lp__           -8451.3930 3.83088 0.0428306      0.0790556
## 
## 2. Quantiles for each variable:
## 
##                      2.5%        25%        50%        75%      97.5%
## Loc1_t            26.5321    29.4786    30.9702    32.5331    35.4469
## Loc2_t            23.5807    26.4688    27.9744    29.5372    32.3226
## Median Weight     -0.2240    -0.1967    -0.1821    -0.1673    -0.1396
## var_eartag         7.8178    11.2888    13.8402    17.4943    28.4888
## var_error         59.8915    61.7764    62.8380    63.8891    65.9433
## prp_var_eartag     0.1105     0.1524     0.1807     0.2179     0.3114
## prp_var_error      0.6886     0.7821     0.8193     0.8476     0.8895
## lp__           -8459.8401 -8453.7395 -8451.0913 -8448.6816 -8444.7414

print(m1t1)

## Inference for Stan model: M1_trial1_chol.
## 1 chains, each with iter=10000; warmup=2000; thin=1; 
## post-warmup draws per chain=8000, total post-warmup draws=8000.
## 
##                    mean se_mean   sd     2.5%      25%      50%      75%
## beta[1]           30.99    0.06 2.29    26.53    29.48    30.97    32.53
## beta[2]           28.00    0.05 2.25    23.58    26.47    27.97    29.54
## beta[3]           -0.18    0.00 0.02    -0.22    -0.20    -0.18    -0.17
## var_eartag        14.95    0.10 5.31     7.82    11.29    13.84    17.49
## var_error         62.85    0.02 1.56    59.89    61.78    62.84    63.89
## prp_var_eartag     0.19    0.00 0.05     0.11     0.15     0.18     0.22
## prp_var_error      0.81    0.00 0.05     0.69     0.78     0.82     0.85
## lp__           -8451.39    0.08 3.83 -8459.84 -8453.74 -8451.09 -8448.68
##                   97.5% n_eff Rhat
## beta[1]           35.45  1528    1
## beta[2]           32.32  2068    1
## beta[3]           -0.14  5310    1
## var_eartag        28.49  3006    1
## var_error         65.94  5484    1
## prp_var_eartag     0.31  3045    1
## prp_var_error      0.89  3045    1
## lp__           -8444.74  2310    1
## 
## Samples were drawn using NUTS(diag_e) at Fri Oct 04 14:31:39 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

1.3. Posterior Correlation of model parameters

parcorplot(outp1,col = terrain.colors(15,0.5), cex.axis=0.75)

2. Model Eartag + Follower

2.1. Check autorrelation, effective sample size, traceplot

ggs_autocorrelation(ggs(outp2)%>%filter(Parameter%in%c("Loc1_t","Loc2_t",
 "Median Weight")))

ggs_autocorrelation(ggs(outp2)%>%filter(Parameter%in%c("var_eartag","var_follower",
  "var_error","prp_var_eartag","prp_var_follower","prp_var_error")))

autocorr.diag(outp2)

##              Loc1_t      Loc2_t Median Weight  var_eartag  var_follower
## Lag 0   1.000000000  1.00000000   1.000000000 1.000000000  1.0000000000
## Lag 1  -0.008698397 -0.01595702  -0.239243962 0.005879020  0.0530184946
## Lag 5   0.037364939  0.03364811  -0.003442937 0.021175218  0.0177121109
## Lag 10  0.004118998 -0.01507738  -0.005733075 0.010917084  0.0006166859
## Lag 50  0.003986789  0.01099953   0.001998016 0.003869074 -0.0176051977
##          var_error prp_var_eartag prp_var_follower prp_var_error
## Lag 0   1.00000000    1.000000000      1.000000000   1.000000000
## Lag 1  -0.28517021   -0.038797350      0.047086925  -0.035433837
## Lag 5  -0.00962033    0.017857817      0.016174215   0.017587146
## Lag 10 -0.01132947    0.007371976     -0.003895872   0.012624820
## Lag 50  0.01913637    0.003617656     -0.015054811   0.002528805
##               lp__
## Lag 0   1.00000000
## Lag 1   0.47937713
## Lag 5   0.03177195
## Lag 10 -0.00626006
## Lag 50  0.03333844

effectiveSize(outp2)

##           Loc1_t           Loc2_t    Median Weight       var_eartag 
##         4192.665         4682.911        12556.346         4630.570 
##     var_follower        var_error   prp_var_eartag prp_var_follower 
##         5531.188        14381.153         5044.397         5625.635 
##    prp_var_error             lp__ 
##         4952.449         2815.011

geweke.diag(outp2)

## [[1]]
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##           Loc1_t           Loc2_t    Median Weight       var_eartag 
##          -0.8801          -1.0044          -0.3588           0.7573 
##     var_follower        var_error   prp_var_eartag prp_var_follower 
##           0.6541          -0.7687           0.8675           0.4980 
##    prp_var_error             lp__ 
##          -0.9486           1.5052

traplot(outp2, col = c("red1"))

denplot(outp2,col = c("red1"))

### 2.2. Summary Posterior Distribution

summary(outp2)

## 
## Iterations = 2001:10000
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 8000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                        Mean      SD  Naive SE Time-series SE
## Loc1_t            3.105e+01 2.31743 2.591e-02      0.0357900
## Loc2_t            2.853e+01 2.29409 2.565e-02      0.0335237
## Median Weight    -1.828e-01 0.02181 2.438e-04      0.0001946
## var_eartag        1.500e+01 5.30358 5.930e-02      0.0779384
##  var_follower     1.578e+00 0.65285 7.299e-03      0.0087782
## var_error         6.123e+01 1.52239 1.702e-02      0.0126949
## prp_var_eartag    1.893e-01 0.05159 5.768e-04      0.0007264
## prp_var_follower  2.031e-02 0.00827 9.247e-05      0.0001103
## prp_var_error     7.904e-01 0.05089 5.689e-04      0.0007231
## lp__             -8.425e+03 5.47375 6.120e-02      0.1031680
## 
## 2. Quantiles for each variable:
## 
##                        2.5%        25%        50%        75%      97.5%
## Loc1_t            2.655e+01  2.945e+01  3.100e+01  3.261e+01  3.563e+01
## Loc2_t            2.414e+01  2.698e+01  2.850e+01  3.006e+01  3.305e+01
## Median Weight    -2.260e-01 -1.974e-01 -1.827e-01 -1.683e-01 -1.405e-01
## var_eartag        7.776e+00  1.127e+01  1.400e+01  1.764e+01  2.821e+01
##  var_follower     6.907e-01  1.123e+00  1.454e+00  1.905e+00  3.165e+00
## var_error         5.834e+01  6.016e+01  6.119e+01  6.224e+01  6.434e+01
## prp_var_eartag    1.095e-01  1.520e-01  1.819e-01  2.190e-01  3.098e-01
## prp_var_follower  8.868e-03  1.444e-02  1.874e-02  2.453e-02  4.049e-02
## prp_var_error     6.722e-01  7.606e-01  7.975e-01  8.266e-01  8.700e-01
## lp__             -8.436e+03 -8.428e+03 -8.425e+03 -8.421e+03 -8.415e+03

print(m2t1)

## Inference for Stan model: M2_trial1_chol.
## 1 chains, each with iter=10000; warmup=2000; thin=1; 
## post-warmup draws per chain=8000, total post-warmup draws=8000.
## 
##                      mean se_mean   sd     2.5%      25%      50%      75%
## beta[1]             31.05    0.03 2.32    26.55    29.45    31.00    32.61
## beta[2]             28.53    0.03 2.29    24.14    26.98    28.50    30.06
## beta[3]             -0.18    0.00 0.02    -0.23    -0.20    -0.18    -0.17
## var_eartag          15.00    0.08 5.30     7.78    11.27    14.00    17.64
## var_follower         1.58    0.01 0.65     0.69     1.12     1.45     1.91
## var_error           61.23    0.01 1.52    58.34    60.16    61.19    62.24
## prp_var_eartag       0.19    0.00 0.05     0.11     0.15     0.18     0.22
## prp_var_follower     0.02    0.00 0.01     0.01     0.01     0.02     0.02
## prp_var_error        0.79    0.00 0.05     0.67     0.76     0.80     0.83
## lp__             -8424.84    0.10 5.47 -8436.42 -8428.43 -8424.51 -8420.92
##                     97.5% n_eff Rhat
## beta[1]             35.63  4392    1
## beta[2]             33.05  4601    1
## beta[3]             -0.14 12554    1
## var_eartag          28.21  4320    1
## var_follower         3.16  5362    1
## var_error           64.34 14053    1
## prp_var_eartag       0.31  4785    1
## prp_var_follower     0.04  5467    1
## prp_var_error        0.87  4704    1
## lp__             -8415.11  2804    1
## 
## Samples were drawn using NUTS(diag_e) at Fri Oct 04 14:35:04 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

2.3. Posterior Correlation of model parameters

parcorplot(outp2,col = terrain.colors(15,0.5), cex.axis=0.75)

3. Model Eartag + Follower + cov(E,F)

3.1. Check autocorrelation, sample size, traceplot

ggs_autocorrelation(ggs(outp3)%>%filter(Parameter%in%c("Loc1_t","Loc2_t",
 "Median Weight")))

ggs_autocorrelation(ggs(outp3)%>%filter(Parameter%in%c("rho","var_eartag",
"var_follower","var_error","prp_var_eartag","prp_var_follower","prp_var_error")))

autocorr.diag(outp3)

##            Loc1_t     Loc2_t Median Weight          rho   var_eartag
## Lag 0  1.00000000 1.00000000   1.000000000  1.000000000  1.000000000
## Lag 1  0.12834537 0.11110802  -0.194933305  0.097622347  0.082818476
## Lag 5  0.08335296 0.07684287   0.005371631  0.018854238  0.026348178
## Lag 10 0.04402041 0.02832810   0.012007579 -0.013410975  0.018527300
## Lag 50 0.02387449 0.02297548   0.018626619 -0.004072156 -0.003749654
##         var_follower    var_error prp_var_eartag prp_var_follower
## Lag 0   1.0000000000  1.000000000    1.000000000     1.000000e+00
## Lag 1   0.0869614525 -0.182130864    0.018026126     5.616980e-02
## Lag 5   0.0134036024 -0.004900273    0.023441133     1.049945e-02
## Lag 10  0.0349101347  0.011314735    0.016501594     3.385703e-02
## Lag 50 -0.0009954365 -0.015390217   -0.003767423     7.415580e-06
##        prp_var_error         lp__
## Lag 0    1.000000000  1.000000000
## Lag 1    0.040669592  0.492168852
## Lag 5    0.024993849  0.038574453
## Lag 10   0.018153811 -0.001944221
## Lag 50  -0.005880573  0.023948319

effectiveSize(outp3)

##           Loc1_t           Loc2_t    Median Weight              rho 
##         2673.717         2966.033        11872.645         5834.872 
##       var_eartag     var_follower        var_error   prp_var_eartag 
##         4160.817         4545.487        11561.586         4654.739 
## prp_var_follower    prp_var_error             lp__ 
##         4585.298         4447.037         2722.307

geweke.diag(outp3)

## [[1]]
## 
## Fraction in 1st window = 0.1
## Fraction in 2nd window = 0.5 
## 
##           Loc1_t           Loc2_t    Median Weight              rho 
##         -0.99529         -1.22156          0.86641         -0.18930 
##       var_eartag     var_follower        var_error   prp_var_eartag 
##          0.26861         -1.54794         -1.44902          0.19954 
## prp_var_follower    prp_var_error             lp__ 
##         -1.82782          0.06633          1.27820

traplot(outp3,col =c("purple3"))

denplot(outp3,col = c("purple3"))

### 3.2. Summary Posterior Distribution

summary(outp3)

## 
## Iterations = 2001:10000
## Thinning interval = 1 
## Number of chains = 1 
## Sample size per chain = 8000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                        Mean       SD  Naive SE Time-series SE
## Loc1_t            3.111e+01 2.410386 2.695e-02      0.0466153
## Loc2_t            2.857e+01 2.402704 2.686e-02      0.0441176
## Median Weight    -1.835e-01 0.021635 2.419e-04      0.0001986
## rho               5.025e-01 0.198198 2.216e-03      0.0025947
## var_eartag        1.540e+01 5.590413 6.250e-02      0.0866672
##  var_follower     1.652e+00 0.701571 7.844e-03      0.0104059
## var_error         6.121e+01 1.530507 1.711e-02      0.0142340
## prp_var_eartag    1.930e-01 0.052678 5.890e-04      0.0007721
## prp_var_follower  2.102e-02 0.008417 9.411e-05      0.0001243
## prp_var_error     7.860e-01 0.053441 5.975e-04      0.0008014
## lp__             -8.422e+03 5.585947 6.245e-02      0.1070603
## 
## 2. Quantiles for each variable:
## 
##                        2.5%        25%        50%        75%      97.5%
## Loc1_t            2.640e+01  2.943e+01  3.113e+01  3.274e+01    35.7865
## Loc2_t            2.384e+01  2.694e+01  2.857e+01  3.021e+01    33.2358
## Median Weight    -2.254e-01 -1.979e-01 -1.833e-01 -1.688e-01    -0.1417
## rho               4.921e-02  3.807e-01  5.268e-01  6.500e-01     0.8188
## var_eartag        7.948e+00  1.154e+01  1.432e+01  1.800e+01    29.1364
##  var_follower     7.116e-01  1.167e+00  1.515e+00  1.977e+00     3.4075
## var_error         5.833e+01  6.014e+01  6.118e+01  6.225e+01    64.2471
## prp_var_eartag    1.124e-01  1.554e-01  1.856e-01  2.223e-01     0.3164
## prp_var_follower  9.352e-03  1.517e-02  1.949e-02  2.518e-02     0.0422
## prp_var_error     6.610e-01  7.564e-01  7.932e-01  8.240e-01     0.8687
## lp__             -8.434e+03 -8.426e+03 -8.422e+03 -8.418e+03 -8412.3528

print(m3t1)

## Inference for Stan model: M3_trial1_chol.
## 1 chains, each with iter=10000; warmup=2000; thin=1; 
## post-warmup draws per chain=8000, total post-warmup draws=8000.
## 
##                      mean se_mean   sd     2.5%      25%      50%      75%
## beta[1]             31.11    0.05 2.41    26.40    29.43    31.13    32.74
## beta[2]             28.57    0.04 2.40    23.84    26.94    28.57    30.21
## beta[3]             -0.18    0.00 0.02    -0.23    -0.20    -0.18    -0.17
## rho                  0.50    0.00 0.20     0.05     0.38     0.53     0.65
## var_eartag          15.40    0.09 5.59     7.95    11.54    14.32    18.00
## var_follower         1.65    0.01 0.70     0.71     1.17     1.52     1.98
## var_error           61.21    0.01 1.53    58.33    60.14    61.18    62.25
## prp_var_eartag       0.19    0.00 0.05     0.11     0.16     0.19     0.22
## prp_var_follower     0.02    0.00 0.01     0.01     0.02     0.02     0.03
## prp_var_error        0.79    0.00 0.05     0.66     0.76     0.79     0.82
## lp__             -8422.42    0.11 5.59 -8434.37 -8425.98 -8422.14 -8418.48
##                     97.5% n_eff Rhat
## beta[1]             35.79  2647    1
## beta[2]             33.24  2963    1
## beta[3]             -0.14 12337    1
## rho                  0.82  5601    1
## var_eartag          29.14  3987    1
## var_follower         3.41  4684    1
## var_error           64.25 10892    1
## prp_var_eartag       0.32  4421    1
## prp_var_follower     0.04  4930    1
## prp_var_error        0.87  4180    1
## lp__             -8412.35  2683    1
## 
## Samples were drawn using NUTS(diag_e) at Fri Oct 04 14:39:04 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

3.3. Posterior Correlation of model parameters

parcorplot(outp3,col = terrain.colors(15,0.5), cex.axis=0.75)