Pengantar
vignette menjelaskan cara menggunakan paket tidybayes untuk mengekstraksi tidy data frame dari model Bbrms, dan penarikan dari distribusi posterior variabel model, kecocokan, dan prediksi dari brms :: brm. Untuk pengenalan yang lebih umum tentang tidybayes, lihat vignette(“tidybayes”).
Untuk menjalankan vignette membutuhkan package sebagai berikut :
library(magrittr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(purrr)
##
## Attaching package: 'purrr'
## The following object is masked from 'package:magrittr':
##
## set_names
library(forcats)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
##
## extract
library(modelr)
library(ggdist)
library(tidybayes)
library(ggplot2)
library(cowplot)
library(rstan)
## Loading required package: StanHeaders
## rstan (Version 2.21.2, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
##
## Attaching package: 'rstan'
## The following object is masked from 'package:tidyr':
##
## extract
## The following object is masked from 'package:magrittr':
##
## extract
library(brms)
## Loading required package: Rcpp
## Loading 'brms' package (version 2.14.4). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
##
## Attaching package: 'brms'
## The following object is masked from 'package:rstan':
##
## loo
## The following objects are masked from 'package:tidybayes':
##
## dstudent_t, pstudent_t, qstudent_t, rstudent_t
## The following objects are masked from 'package:ggdist':
##
## dstudent_t, pstudent_t, qstudent_t, rstudent_t
## The following object is masked from 'package:stats':
##
## ar
library(ggrepel)
library(RColorBrewer)
library(gganimate)
## No renderer backend detected. gganimate will default to writing frames to separate files
## Consider installing:
## - the `gifski` package for gif output
## - the `av` package for video output
## and restarting the R session
theme_set(theme_tidybayes() + panel_border())
Opsi ini membantu Stan bekerja lebih cepat:
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
Kita mengambil dataset mtcars :
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
Untuk mendemonstrasikan drawing fit curves dengan ketidakpastian, mari sesuaikan model yang sedikit naif ke bagian dari set data mtcars:
m_mpg = brm(
mpg ~ hp * cyl,
data = mtcars,
)
## Compiling Stan program...
## Trying to compile a simple C file
## Running /usr/lib/R/bin/R CMD SHLIB foo.c
## gcc -std=gnu99 -I"/usr/share/R/include" -DNDEBUG -I"/home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/Rcpp/include/" -I"/home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/RcppEigen/include/" -I"/home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/RcppEigen/include/unsupported" -I"/home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/BH/include" -I"/home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/StanHeaders/include/src/" -I"/home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/StanHeaders/include/" -I"/home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/RcppParallel/include/" -I"/home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DBOOST_NO_AUTO_PTR -include '/home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -fpic -g -O2 -fdebug-prefix-map=/build/r-base-V28x5H/r-base-3.6.3=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -g -c foo.c -o foo.o
## In file included from /home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/RcppEigen/include/Eigen/Core:88:0,
## from /home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/RcppEigen/include/Eigen/Dense:1,
## from /home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13,
## from <command-line>:0:
## /home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name ‘namespace’
## namespace Eigen {
## ^~~~~~~~~
## /home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:17: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘{’ token
## namespace Eigen {
## ^
## In file included from /home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/RcppEigen/include/Eigen/Dense:1:0,
## from /home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13,
## from <command-line>:0:
## /home/suhartono/R/x86_64-pc-linux-gnu-library/3.6/RcppEigen/include/Eigen/Core:96:10: fatal error: complex: No such file or directory
## #include <complex>
## ^~~~~~~~~
## compilation terminated.
## /usr/lib/R/etc/Makeconf:168: recipe for target 'foo.o' failed
## make: *** [foo.o] Error 1
## Start sampling
Data dari m_mpg adalah dapat diliharsebagai berikut :
head(m_mpg)
## $formula
## mpg ~ hp * cyl
##
## $data
## mpg hp cyl
## Mazda RX4 21.0 110 6
## Mazda RX4 Wag 21.0 110 6
## Datsun 710 22.8 93 4
## Hornet 4 Drive 21.4 110 6
## Hornet Sportabout 18.7 175 8
## Valiant 18.1 105 6
## Duster 360 14.3 245 8
## Merc 240D 24.4 62 4
## Merc 230 22.8 95 4
## Merc 280 19.2 123 6
## Merc 280C 17.8 123 6
## Merc 450SE 16.4 180 8
## Merc 450SL 17.3 180 8
## Merc 450SLC 15.2 180 8
## Cadillac Fleetwood 10.4 205 8
## Lincoln Continental 10.4 215 8
## Chrysler Imperial 14.7 230 8
## Fiat 128 32.4 66 4
## Honda Civic 30.4 52 4
## Toyota Corolla 33.9 65 4
## Toyota Corona 21.5 97 4
## Dodge Challenger 15.5 150 8
## AMC Javelin 15.2 150 8
## Camaro Z28 13.3 245 8
## Pontiac Firebird 19.2 175 8
## Fiat X1-9 27.3 66 4
## Porsche 914-2 26.0 91 4
## Lotus Europa 30.4 113 4
## Ford Pantera L 15.8 264 8
## Ferrari Dino 19.7 175 6
## Maserati Bora 15.0 335 8
## Volvo 142E 21.4 109 4
##
## $prior
## prior class coef group resp dpar nlpar bound
## (flat) b
## (flat) b cyl
## (flat) b hp
## (flat) b hp:cyl
## student_t(3, 19.2, 5.4) Intercept
## student_t(3, 0, 5.4) sigma
## source
## default
## (vectorized)
## (vectorized)
## (vectorized)
## default
## default
##
## $data2
## list()
##
## $stanvars
## list()
## attr(,"class")
## [1] "stanvars"
##
## $model
## // generated with brms 2.14.4
## functions {
## }
## data {
## int<lower=1> N; // total number of observations
## vector[N] Y; // response variable
## int<lower=1> K; // number of population-level effects
## matrix[N, K] X; // population-level design matrix
## int prior_only; // should the likelihood be ignored?
## }
## transformed data {
## int Kc = K - 1;
## matrix[N, Kc] Xc; // centered version of X without an intercept
## vector[Kc] means_X; // column means of X before centering
## for (i in 2:K) {
## means_X[i - 1] = mean(X[, i]);
## Xc[, i - 1] = X[, i] - means_X[i - 1];
## }
## }
## parameters {
## vector[Kc] b; // population-level effects
## real Intercept; // temporary intercept for centered predictors
## real<lower=0> sigma; // residual SD
## }
## transformed parameters {
## }
## model {
## // likelihood including all constants
## if (!prior_only) {
## target += normal_id_glm_lpdf(Y | Xc, Intercept, b, sigma);
## }
## // priors including all constants
## target += student_t_lpdf(Intercept | 3, 19.2, 5.4);
## target += student_t_lpdf(sigma | 3, 0, 5.4)
## - 1 * student_t_lccdf(0 | 3, 0, 5.4);
## }
## generated quantities {
## // actual population-level intercept
## real b_Intercept = Intercept - dot_product(means_X, b);
## }
Kita bisa menggambar kurva yang pas dengan pita probabilitas:
mtcars %>%
group_by(cyl) %>%
data_grid(hp = seq_range(hp, n = 51)) %>%
add_fitted_draws(m_mpg) %>%
ggplot(aes(x = hp, y = mpg, color = ordered(cyl))) +
stat_lineribbon(aes(y = .value)) +
geom_point(data = mtcars) +
scale_fill_brewer(palette = "Greys") +
scale_color_brewer(palette = "Set2")
Atau kita dapat mengambil sampel garis fit dalam jumlah yang masuk akal (katakanlah 100) dan overplot:
mtcars %>%
group_by(cyl) %>%
data_grid(hp = seq_range(hp, n = 101)) %>%
add_fitted_draws(m_mpg, n = 100) %>%
ggplot(aes(x = hp, y = mpg, color = ordered(cyl))) +
geom_line(aes(y = .value, group = paste(cyl, .draw)), alpha = .1) +
geom_point(data = mtcars) +
scale_color_brewer(palette = "Dark2")
Atau kita dapat membuat plot hasil hipotetis (HOP) animasi dari garis yang sesuai:
set.seed(123456)
ndraws = 50
p = mtcars %>%
group_by(cyl) %>%
data_grid(hp = seq_range(hp, n = 101)) %>%
add_fitted_draws(m_mpg, n = ndraws) %>%
ggplot(aes(x = hp, y = mpg, color = ordered(cyl))) +
geom_line(aes(y = .value, group = paste(cyl, .draw))) +
geom_point(data = mtcars) +
scale_color_brewer(palette = "Dark2") +
transition_states(.draw, 0, 1) +
shadow_mark(future = TRUE, color = "gray50", alpha = 1/20)
animate(p, nframes = ndraws, fps = 2.5, width = 432, height = 288, res = 96, dev = "png", type = "cairo")
## Warning: No renderer available. Please install the gifski, av, or magick package
## to create animated output
Atau, untuk prediksi posterior (alih-alih cocok), kita dapat kembali ke pita probabilitas:
mtcars %>%
group_by(cyl) %>%
data_grid(hp = seq_range(hp, n = 101)) %>%
add_predicted_draws(m_mpg) %>%
ggplot(aes(x = hp, y = mpg, color = ordered(cyl), fill = ordered(cyl))) +
stat_lineribbon(aes(y = .prediction), .width = c(.95, .80, .50), alpha = 1/4) +
geom_point(data = mtcars) +
scale_fill_brewer(palette = "Set2") +
scale_color_brewer(palette = "Dark2")
Ini menjadi sulit untuk dinilai berdasarkan kelompok, jadi mungkin lebih baik untuk membagi menjadi beberapa plot. Untungnya, karena kami menggunakan ggplot, fungsionalitas itu sudah ada di dalamnya:
mtcars %>%
group_by(cyl) %>%
data_grid(hp = seq_range(hp, n = 101)) %>%
add_predicted_draws(m_mpg) %>%
ggplot(aes(x = hp, y = mpg)) +
stat_lineribbon(aes(y = .prediction), .width = c(.99, .95, .8, .5), color = brewer.pal(5, "Blues")[[5]]) +
geom_point(data = mtcars) +
scale_fill_brewer() +
facet_grid(. ~ cyl, space = "free_x", scales = "free_x")
Daftar pustaka
1.http://mjskay.github.io/tidybayes/articles/tidy-brms.html