Model ordinal dengan prediktor kategoris

library(magrittr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(purrr)
## 
## Attaching package: 'purrr'
## The following object is masked from 'package:magrittr':
## 
##     set_names
library(forcats)
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
## 
##     extract
library(modelr)
library(ggdist)
library(tidybayes)
library(ggplot2)
library(cowplot)
library(rstan)
## Loading required package: StanHeaders
## rstan (Version 2.21.2, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## 
## Attaching package: 'rstan'
## The following object is masked from 'package:tidyr':
## 
##     extract
## The following object is masked from 'package:magrittr':
## 
##     extract
library(brms)
## Loading required package: Rcpp
## Loading 'brms' package (version 2.14.4). Useful instructions
## can be found by typing help('brms'). A more detailed introduction
## to the package is available through vignette('brms_overview').
## 
## Attaching package: 'brms'
## The following object is masked from 'package:rstan':
## 
##     loo
## The following objects are masked from 'package:tidybayes':
## 
##     dstudent_t, pstudent_t, qstudent_t, rstudent_t
## The following objects are masked from 'package:ggdist':
## 
##     dstudent_t, pstudent_t, qstudent_t, rstudent_t
## The following object is masked from 'package:stats':
## 
##     ar
library(ggrepel)
library(RColorBrewer)
library(gganimate)
## No renderer backend detected. gganimate will default to writing frames to separate files
## Consider installing:
## - the `gifski` package for gif output
## - the `av` package for video output
## and restarting the R session
theme_set(theme_tidybayes() + panel_border())
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
data(esoph)
head(esoph)
##   agegp     alcgp    tobgp ncases ncontrols
## 1 25-34 0-39g/day 0-9g/day      0        40
## 2 25-34 0-39g/day    10-19      0        10
## 3 25-34 0-39g/day    20-29      0         6
## 4 25-34 0-39g/day      30+      0         5
## 5 25-34     40-79 0-9g/day      0        27
## 6 25-34     40-79    10-19      0         7
m_esoph_brm = brm(
  tobgp ~ agegp, 
  data = esoph, 
  family = cumulative(),
)
## Compiling Stan program...
## Start sampling

Berikut adalah model ordinal dengan prediktor kategorikal:

Kemudian kita dapat memplot probabilitas yang diprediksi untuk setiap kategori hasil dalam setiap tingkat prediktor:

esoph %>%
  data_grid(agegp) %>%
  add_fitted_draws(m_esoph_brm, dpar = TRUE) %>%
  mutate(tobgp = levels(esoph$tobgp)[.category]) %>%
  ggplot(aes(x = agegp, y = .value, color = tobgp)) +
  stat_pointinterval(position = position_dodge(width = .4)) +
  scale_size_continuous(guide = FALSE) +
  scale_color_manual(values = brewer.pal(6, "Blues")[-c(1,2)])

Sulit untuk melihat perubahan kategori pada plot di atas; mari kita coba sesuatu yang memberikan gambaran yang lebih baik tentang distribusi setiap tahun:

esoph_plot = esoph %>%
  data_grid(agegp) %>%
  add_fitted_draws(m_esoph_brm) %>%
  mutate(tobgp = levels(esoph$tobgp)[.category]) %>%
  ggplot(aes(x = .value, y = tobgp)) +
  coord_cartesian(expand = FALSE) +
  facet_grid(. ~ agegp, switch = "x") +
  theme_classic() +
  theme(strip.background = element_blank(), strip.placement = "outside") +
  ggtitle("P(tobacco consumption category | age group)") +
  xlab("age group")

esoph_plot +
  stat_summary(fun = median, geom = "bar", fill = "gray75", width = 1, color = "white") +
  stat_pointinterval()

Batang dalam kasus ini mungkin memberikan kesan presisi yang salah, jadi kami juga dapat mencoba barplot CCDF sebagai gantinya:

esoph_plot +
  stat_ccdfinterval() +
  expand_limits(x = 0) #ensure bars go to 0

Keluaran ini harus sangat mirip dengan keluaran dari model m_esoph_rs yang sesuai dalam vignette (“tidy-rstanarm”) (modulo prior berbeda), meskipun brms melakukan lebih banyak pekerjaan bagi kita untuk memproduksinya daripada rstanarm.

Daftar pustaka :

  1. http://mjskay.github.io/tidybayes/articles/tidy-brms.html