A model for multiple responses using the categorical distribution

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.0.5     ✓ dplyr   1.0.3
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(rstan)

## Loading required package: StanHeaders

## rstan (Version 2.21.2, GitRev: 2e1f913d3ca3)

## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)

## 
## Attaching package: 'rstan'

## The following object is masked from 'package:tidyr':
## 
##     extract

library(bayesplot)

## This is bayesplot version 1.8.0

## - Online documentation and vignettes at mc-stan.org/bayesplot

## - bayesplot theme set to bayesplot::theme_default()

##    * Does _not_ affect other ggplot2 plots

##    * See ?bayesplot_theme_set for details on theme setting

(true_theta <- tibble(P01 = .2, 
                      P02 = .2,
                      P03 = .2,
                      P04 = .2,
                      P05 =  1 -
                        (P01 + P02 + P03 + P04)))

## # A tibble: 1 x 5
##     P01   P02   P03   P04   P05
##   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1   0.2   0.2   0.2   0.2 0.200

sum(true_theta)

## [1] 1

N_obs <- 100 
ans_cat <- extraDistr::rcat(N_obs, prob = as.matrix(true_theta))
ans_cat

##   [1] 5 2 5 1 3 3 5 3 2 3 1 1 5 5 5 1 1 5 3 5 1 1 4 2 1 3 4 2 2 4 3 3 2 2 3 4 2
##  [38] 4 4 1 1 2 3 3 1 3 3 3 3 4 2 5 4 2 1 3 2 1 3 5 2 1 2 3 5 5 1 2 2 5 2 2 1 3
##  [75] 1 3 1 5 4 4 4 5 2 4 3 2 2 4 4 4 1 2 4 3 1 2 2 1 5 3

data_cat <-  list(N_obs = N_obs,w_ans = ans_cat)
str(data_cat)

## List of 2
##  $ N_obs: num 100
##  $ w_ans: num [1:100] 5 2 5 1 3 3 5 3 2 3 ...

fit_cat <- stan(file = 'categorical.stan', data = data_cat)

## 
## SAMPLING FOR MODEL 'categorical' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 2.7e-05 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.27 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 0.083508 seconds (Warm-up)
## Chain 1:                0.09474 seconds (Sampling)
## Chain 1:                0.178248 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'categorical' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 1.8e-05 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.18 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 0.084546 seconds (Warm-up)
## Chain 2:                0.092167 seconds (Sampling)
## Chain 2:                0.176713 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'categorical' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 1.8e-05 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.18 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 0.084136 seconds (Warm-up)
## Chain 3:                0.091769 seconds (Sampling)
## Chain 3:                0.175905 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'categorical' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 1.7e-05 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.17 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 0.084694 seconds (Warm-up)
## Chain 4:                0.086337 seconds (Sampling)
## Chain 4:                0.171031 seconds (Total)
## Chain 4:

print(fit_cat, pars = c("theta"))

## Inference for Stan model: categorical.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##          mean se_mean   sd 2.5%  25%  50%  75% 97.5% n_eff Rhat
## theta[1] 0.21       0 0.04 0.14 0.18 0.21 0.23  0.29  4418    1
## theta[2] 0.24       0 0.04 0.16 0.21 0.23 0.26  0.32  4188    1
## theta[3] 0.23       0 0.04 0.15 0.20 0.22 0.25  0.31  4200    1
## theta[4] 0.16       0 0.03 0.10 0.14 0.16 0.19  0.24  4162    1
## theta[5] 0.16       0 0.04 0.10 0.14 0.16 0.19  0.24  3997    1
## 
## Samples were drawn using NUTS(diag_e) at Tue Jan 26 13:00:50 2021.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

as.data.frame(fit_cat) %>%
  select(starts_with("theta")) %>%
  mcmc_recover_hist(true = unlist(true_theta)) +
  coord_cartesian(xlim = c(0, 1))

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

dafta pustaka :

https://vasishth.github.io/bayescogsci/book/modeling-multiple-categorical-responses.html#sec:cat

A model for multiple responses using the categorical distribution

Copy Right @ Prof Dr Suhartono M.Kom @ Magister Informatika UIN Maulana Malik @ (http://suhartono.lecturer.uin-malang.ac.id/riwayat-hidup/)

26 Desember 2020