Pengantar

vignette menjelaskan cara menggunakan paket tidybayes untuk mengekstraksi tidy data frame dari model Bbrms, dan penarikan dari distribusi posterior variabel model, kecocokan, dan prediksi dari brms :: brm. Untuk pengenalan yang lebih umum tentang tidybayes, lihat vignette(“tidybayes”).

Untuk menjalankan vignette membutuhkan package sebagai berikut :

library(magrittr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(purrr)
## 
## Attaching package: 'purrr'
## The following object is masked from 'package:magrittr':
## 
##     set_names
library(forcats)
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
## 
##     extract
library(modelr)
library(ggdist)
library(tidybayes)
library(ggplot2)
library(cowplot)
library(rstan)
## Loading required package: StanHeaders
## rstan (Version 2.21.2, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## 
## Attaching package: 'rstan'
## The following object is masked from 'package:tidyr':
## 
##     extract
## The following object is masked from 'package:magrittr':
## 
##     extract
library(brms)
## Loading required package: Rcpp
## Loading 'brms' package (version 2.14.4). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## 
## Attaching package: 'brms'
## The following object is masked from 'package:rstan':
## 
##     loo
## The following objects are masked from 'package:tidybayes':
## 
##     dstudent_t, pstudent_t, qstudent_t, rstudent_t
## The following objects are masked from 'package:ggdist':
## 
##     dstudent_t, pstudent_t, qstudent_t, rstudent_t
## The following object is masked from 'package:stats':
## 
##     ar
library(ggrepel)
library(RColorBrewer)
library(gganimate)
## No renderer backend detected. gganimate will default to writing frames to separate files
## Consider installing:
## - the `gifski` package for gif output
## - the `av` package for video output
## and restarting the R session
theme_set(theme_tidybayes() + panel_border())

Opsi ini membantu Stan bekerja lebih cepat:

rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())

Kita mengambil dataset mtcars :

head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

Untuk mendemonstrasikan drawing fit curves dengan ketidakpastian, mari sesuaikan model yang sedikit naif ke bagian dari set data mtcars:

m_mpg = brm(
  mpg ~ hp * cyl, 
  data = mtcars, 
)
## Compiling Stan program...
## Trying to compile a simple C file
## Running /usr/lib/R/bin/R CMD SHLIB foo.c
## gcc -std=gnu99 -I"/usr/share/R/include" -DNDEBUG   -I"/home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/Rcpp/include/"  -I"/home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/RcppEigen/include/"  -I"/home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/RcppEigen/include/unsupported"  -I"/home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/BH/include" -I"/home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/StanHeaders/include/src/"  -I"/home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/StanHeaders/include/"  -I"/home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/RcppParallel/include/"  -I"/home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DBOOST_NO_AUTO_PTR  -include '/home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1     -fpic  -g -O2 -fdebug-prefix-map=/build/r-base-V28x5H/r-base-3.6.3=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -g  -c foo.c -o foo.o
## In file included from /home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/RcppEigen/include/Eigen/Core:88:0,
##                  from /home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/RcppEigen/include/Eigen/Dense:1,
##                  from /home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13,
##                  from <command-line>:0:
## /home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name ‘namespace’
##  namespace Eigen {
##  ^~~~~~~~~
## /home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:17: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘{’ token
##  namespace Eigen {
##                  ^
## In file included from /home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/RcppEigen/include/Eigen/Dense:1:0,
##                  from /home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13,
##                  from <command-line>:0:
## /home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/RcppEigen/include/Eigen/Core:96:10: fatal error: complex: No such file or directory
##  #include <complex>
##           ^~~~~~~~~
## compilation terminated.
## /usr/lib/R/etc/Makeconf:168: recipe for target 'foo.o' failed
## make: *** [foo.o] Error 1
## Start sampling

Data dari m_mpg adalah dapat diliharsebagai berikut :

head(m_mpg)
## $formula
## mpg ~ hp * cyl 
## 
## $data
##                      mpg  hp cyl
## Mazda RX4           21.0 110   6
## Mazda RX4 Wag       21.0 110   6
## Datsun 710          22.8  93   4
## Hornet 4 Drive      21.4 110   6
## Hornet Sportabout   18.7 175   8
## Valiant             18.1 105   6
## Duster 360          14.3 245   8
## Merc 240D           24.4  62   4
## Merc 230            22.8  95   4
## Merc 280            19.2 123   6
## Merc 280C           17.8 123   6
## Merc 450SE          16.4 180   8
## Merc 450SL          17.3 180   8
## Merc 450SLC         15.2 180   8
## Cadillac Fleetwood  10.4 205   8
## Lincoln Continental 10.4 215   8
## Chrysler Imperial   14.7 230   8
## Fiat 128            32.4  66   4
## Honda Civic         30.4  52   4
## Toyota Corolla      33.9  65   4
## Toyota Corona       21.5  97   4
## Dodge Challenger    15.5 150   8
## AMC Javelin         15.2 150   8
## Camaro Z28          13.3 245   8
## Pontiac Firebird    19.2 175   8
## Fiat X1-9           27.3  66   4
## Porsche 914-2       26.0  91   4
## Lotus Europa        30.4 113   4
## Ford Pantera L      15.8 264   8
## Ferrari Dino        19.7 175   6
## Maserati Bora       15.0 335   8
## Volvo 142E          21.4 109   4
## 
## $prior
##                    prior     class   coef group resp dpar nlpar bound
##                   (flat)         b                                   
##                   (flat)         b    cyl                            
##                   (flat)         b     hp                            
##                   (flat)         b hp:cyl                            
##  student_t(3, 19.2, 5.4) Intercept                                   
##     student_t(3, 0, 5.4)     sigma                                   
##        source
##       default
##  (vectorized)
##  (vectorized)
##  (vectorized)
##       default
##       default
## 
## $data2
## list()
## 
## $stanvars
## list()
## attr(,"class")
## [1] "stanvars"
## 
## $model
## // generated with brms 2.14.4
## functions {
## }
## data {
##   int<lower=1> N;  // total number of observations
##   vector[N] Y;  // response variable
##   int<lower=1> K;  // number of population-level effects
##   matrix[N, K] X;  // population-level design matrix
##   int prior_only;  // should the likelihood be ignored?
## }
## transformed data {
##   int Kc = K - 1;
##   matrix[N, Kc] Xc;  // centered version of X without an intercept
##   vector[Kc] means_X;  // column means of X before centering
##   for (i in 2:K) {
##     means_X[i - 1] = mean(X[, i]);
##     Xc[, i - 1] = X[, i] - means_X[i - 1];
##   }
## }
## parameters {
##   vector[Kc] b;  // population-level effects
##   real Intercept;  // temporary intercept for centered predictors
##   real<lower=0> sigma;  // residual SD
## }
## transformed parameters {
## }
## model {
##   // likelihood including all constants
##   if (!prior_only) {
##     target += normal_id_glm_lpdf(Y | Xc, Intercept, b, sigma);
##   }
##   // priors including all constants
##   target += student_t_lpdf(Intercept | 3, 19.2, 5.4);
##   target += student_t_lpdf(sigma | 3, 0, 5.4)
##     - 1 * student_t_lccdf(0 | 3, 0, 5.4);
## }
## generated quantities {
##   // actual population-level intercept
##   real b_Intercept = Intercept - dot_product(means_X, b);
## }

Kita bisa menggambar kurva yang pas dengan pita probabilitas:

mtcars %>%
  group_by(cyl) %>%
  data_grid(hp = seq_range(hp, n = 51)) %>%
  add_fitted_draws(m_mpg) %>%
  ggplot(aes(x = hp, y = mpg, color = ordered(cyl))) +
  stat_lineribbon(aes(y = .value)) +
  geom_point(data = mtcars) +
  scale_fill_brewer(palette = "Greys") +
  scale_color_brewer(palette = "Set2")

Atau kita dapat mengambil sampel garis fit dalam jumlah yang masuk akal (katakanlah 100) dan overplot:

mtcars %>%
  group_by(cyl) %>%
  data_grid(hp = seq_range(hp, n = 101)) %>%
  add_fitted_draws(m_mpg, n = 100) %>%
  ggplot(aes(x = hp, y = mpg, color = ordered(cyl))) +
  geom_line(aes(y = .value, group = paste(cyl, .draw)), alpha = .1) +
  geom_point(data = mtcars) +
  scale_color_brewer(palette = "Dark2")

Atau kita dapat membuat plot hasil hipotetis (HOP) animasi dari garis yang sesuai:

set.seed(123456)
ndraws = 50

p = mtcars %>%
  group_by(cyl) %>%
  data_grid(hp = seq_range(hp, n = 101)) %>%
  add_fitted_draws(m_mpg, n = ndraws) %>%
  ggplot(aes(x = hp, y = mpg, color = ordered(cyl))) +
  geom_line(aes(y = .value, group = paste(cyl, .draw))) +
  geom_point(data = mtcars) +
  scale_color_brewer(palette = "Dark2") +
  transition_states(.draw, 0, 1) +
  shadow_mark(future = TRUE, color = "gray50", alpha = 1/20)

animate(p, nframes = ndraws, fps = 2.5, width = 432, height = 288, res = 96, dev = "png", type = "cairo")
## Warning: No renderer available. Please install the gifski, av, or magick package
## to create animated output

Atau, untuk prediksi posterior (alih-alih cocok), kita dapat kembali ke pita probabilitas:

mtcars %>%
  group_by(cyl) %>%
  data_grid(hp = seq_range(hp, n = 101)) %>%
  add_predicted_draws(m_mpg) %>%
  ggplot(aes(x = hp, y = mpg, color = ordered(cyl), fill = ordered(cyl))) +
  stat_lineribbon(aes(y = .prediction), .width = c(.95, .80, .50), alpha = 1/4) +
  geom_point(data = mtcars) +
  scale_fill_brewer(palette = "Set2") +
  scale_color_brewer(palette = "Dark2")

Ini menjadi sulit untuk dinilai berdasarkan kelompok, jadi mungkin lebih baik untuk membagi menjadi beberapa plot. Untungnya, karena kami menggunakan ggplot, fungsionalitas itu sudah ada di dalamnya:

mtcars %>%
  group_by(cyl) %>%
  data_grid(hp = seq_range(hp, n = 101)) %>%
  add_predicted_draws(m_mpg) %>%
  ggplot(aes(x = hp, y = mpg)) +
  stat_lineribbon(aes(y = .prediction), .width = c(.99, .95, .8, .5), color = brewer.pal(5, "Blues")[[5]]) +
  geom_point(data = mtcars) +
  scale_fill_brewer() +
  facet_grid(. ~ cyl, space = "free_x", scales = "free_x")

Daftar pustaka

1.http://mjskay.github.io/tidybayes/articles/tidy-brms.html