library(tidyverse) # ggplot, dplyr, and friends
library(haven) # Read Stata files
library(cmdstanr)
library(survey) # Panel-ish regression models
library(brms) # The best formula-based interface to Stan
library(broom) # Convert model objects to tidy data frames
library(marginaleffects)
library(scales) # Nicer labeling functions
library(broom.helpers) # Add empty reference categories to tidy model data frames
library(ggforce) # For facet_col()
library(tidybayes) # Manipulate Stan results in tidy ways


candidate <- read_stata("candidate.dta") %>%
  as_factor()

variable_lookup <- tibble::tribble(
  ~variable,    ~variable_nice,
  "atmilitary", "Military",
  "atreligion", "Religion",
  "ated",       "Education",
  "atprof",     "Profession",
  "atmale",     "Gender",
  "atinc",      "Income",
  "atrace",     "Race",
  "atage",      "Age"
) %>%
  mutate(variable_nice = fct_inorder(variable_nice))


candidate_svy_design <- survey::svydesign(
  ids = ~resID,
  weights = ~1,
  data = candidate
)

model_svy <- survey::svyglm(
  selected ~ atmilitary + atreligion + ated +
    atprof + atinc + atrace + atage + atmale,
  design = candidate_svy_design
)

# Extract all the right-hand variables
rhs_vars <- all.vars(stats::update(formula(model_svy), 0 ~ .))

# Make a data frame of all the levels of all the non-numeric things
model_variable_levels <- tibble(
  variable = rhs_vars
) %>%
  mutate(levels = map(variable, ~ {
    x <- candidate[[.x]]
    if (is.numeric(x)) {
      ""
    } else if (is.factor(x)) {
      levels(x)
    } else {
      sort(unique(x))
    }
  })) %>%
  unnest(levels) %>%
  mutate(term = paste0(variable, levels))


# BAYESIAN MODELING!
priors <- c(
  prior(normal(0, 1), class = Intercept),
  prior(normal(0, 1), class = b),
  prior(exponential(1), class = sd)
)

model_brms <- brm(
  bf(selected ~ atmilitary + atreligion + ated +
    atprof + atinc + atrace + atage + atmale +
    (1 | resID)),
  data = candidate,
  family = bernoulli(link = "logit"),
  prior = priors,
  chains = 4,
  cores = detectCores() - 1,
  iter = 2000,
  seed = 1234,
  backend = "cmdstanr", threads = threading(2), refresh = 0,
  file = "candidate_model_brms"
)

# Define the horseshoe prior for the fixed effects
hs_prior <- c(
  prior(horseshoe(df = 1), class = b), # horseshoe prior for regression coefficients
  prior(normal(0, 1), class = Intercept), # prior for the intercept
  prior(exponential(1), class = sd) # prior for random effects standard deviation
)

# Update the brm model with the horseshoe prior
model_brms_hs <- brm(
  bf(selected ~ atmilitary + atreligion + ated +
    atprof + atinc + atrace + atage + atmale +
    (1 | resID)),
  data = candidate,
  family = bernoulli(link = "logit"),
  prior = hs_prior,
  chains = 4,
  cores = detectCores() - 1,
  iter = 3000,
  seed = 1234,
  backend = "cmdstanr",
  threads = threading(2),
  refresh = 0,
  file = "candidate_model_brms_hs",
  control = list(adapt_delta = 0.99, max_treedepth = 15)
)

# Load necessary libraries
library(dplyr)
library(ggplot2)
library(broom)

# Extract coefficients
coef_brms <- as.data.frame(fixef(model_brms)) %>%
  rownames_to_column("variable") %>%
  mutate(model = "Standard")

coef_brms_hs <- as.data.frame(fixef(model_brms_hs)) %>%
  rownames_to_column("variable") %>%
  mutate(model = "Horseshoe")

# Combine coefficients into a single data frame
coef_compare <- bind_rows(coef_brms, coef_brms_hs) %>%
  select(variable, Estimate, Est.Error, model)

# Plot the comparison
ggplot(coef_compare, aes(x = variable, y = Estimate, color = model)) +
  geom_point(position = position_dodge(width = 0.5)) +
  geom_errorbar(aes(ymin = Estimate - Est.Error, ymax = Estimate + Est.Error),
    position = position_dodge(width = 0.5), width = 0.2
  ) +
  labs(
    title = "Coefficient Comparison: Standard vs. Horseshoe Priors",
    x = "Variables", y = "Estimate"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_blank()) # Remove x-axis text labels

sessionInfo()
#;-) R version 4.4.1 (2024-06-14)
#;-) Platform: aarch64-apple-darwin20
#;-) Running under: macOS Sonoma 14.1.1
#;-) 
#;-) Matrix products: default
#;-) BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
#;-) LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
#;-) 
#;-) locale:
#;-) [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#;-) 
#;-) time zone: Europe/Berlin
#;-) tzcode source: internal
#;-) 
#;-) attached base packages:
#;-) [1] grid      stats     graphics  grDevices utils     datasets  methods  
#;-) [8] base     
#;-) 
#;-) other attached packages:
#;-)  [1] tidybayes_3.0.7        ggforce_0.4.2          broom.helpers_1.17.0  
#;-)  [4] scales_1.3.0           marginaleffects_0.23.0 broom_1.0.6           
#;-)  [7] brms_2.21.0            Rcpp_1.0.13            survey_4.4-2          
#;-) [10] survival_3.6-4         Matrix_1.7-0           cmdstanr_0.8.0        
#;-) [13] haven_2.5.4            lubridate_1.9.3        forcats_1.0.0         
#;-) [16] stringr_1.5.1          dplyr_1.1.4            purrr_1.0.2           
#;-) [19] readr_2.1.5            tidyr_1.3.1            tibble_3.2.1          
#;-) [22] ggplot2_3.5.1          tidyverse_2.0.0       
#;-) 
#;-) loaded via a namespace (and not attached):
#;-)  [1] DBI_1.2.3            gridExtra_2.3        inline_0.3.19       
#;-)  [4] rlang_1.1.4          magrittr_2.0.3       matrixStats_1.4.1   
#;-)  [7] compiler_4.4.1       loo_2.8.0            vctrs_0.6.5         
#;-) [10] pkgconfig_2.0.3      arrayhelpers_1.1-0   fastmap_1.2.0       
#;-) [13] backports_1.5.0      labeling_0.4.3       utf8_1.2.4          
#;-) [16] rmarkdown_2.28       tzdb_0.4.0           ps_1.8.0            
#;-) [19] xfun_0.47            reprex_2.1.1         jsonlite_1.8.9      
#;-) [22] highr_0.11           styler_1.10.3        tweenr_2.0.3        
#;-) [25] parallel_4.4.1       R6_2.5.1             stringi_1.8.4       
#;-) [28] StanHeaders_2.32.10  rstan_2.32.6         knitr_1.48          
#;-) [31] R.utils_2.12.3       bayesplot_1.11.1     splines_4.4.1       
#;-) [34] R.cache_0.16.0       timechange_0.3.0     tidyselect_1.2.1    
#;-) [37] rstudioapi_0.16.0    abind_1.4-8          yaml_2.3.10         
#;-) [40] codetools_0.2-20     curl_5.2.2           processx_3.8.4      
#;-) [43] pkgbuild_1.4.4       lattice_0.22-6       withr_3.0.1         
#;-) [46] bridgesampling_1.1-2 posterior_1.6.0      coda_0.19-4.1       
#;-) [49] evaluate_0.24.0      RcppParallel_5.1.9   polyclip_1.10-7     
#;-) [52] xml2_1.3.6           ggdist_3.3.2         pillar_1.9.0        
#;-) [55] tensorA_0.36.2.1     checkmate_2.3.2      stats4_4.4.1        
#;-) [58] distributional_0.4.0 generics_0.1.3       hms_1.1.3           
#;-) [61] rstantools_2.4.0     munsell_0.5.1        glue_1.7.0          
#;-) [64] tools_4.4.1          data.table_1.16.0    fs_1.6.4            
#;-) [67] mvtnorm_1.3-1        mitools_2.4          QuickJSR_1.3.1      
#;-) [70] colorspace_2.1-1     nlme_3.1-164         cli_3.6.3           
#;-) [73] fansi_1.0.6          svUnit_1.0.6         Brobdingnag_1.2-9   
#;-) [76] V8_5.0.0             gtable_0.3.5         R.methodsS3_1.8.2   
#;-) [79] digest_0.6.37        farver_2.1.2         htmltools_0.5.8.1   
#;-) [82] R.oo_1.26.0          lifecycle_1.0.4      MASS_7.3-61

Assignment: Conjoint Analysis

Question 1: Alternative Contexts for Conjoint Analysis

Question 2: Random Effects Syntax in `brm()`

Question 3: Horseshoe Prior

3.1. How did our results change when we used a horseshoe prior?

3.2. In what scenarios might a Horseshoe prior be preferable? Consider specific aspects of the data that make this prior suitable.

Session info

Assignment: Conjoint Analysis

Question 1: Alternative Contexts for Conjoint Analysis

Question 2: Random Effects Syntax in brm()

Question 3: Horseshoe Prior

3.1. How did our results change when we used a horseshoe prior?

3.2. In what scenarios might a Horseshoe prior be preferable? Consider specific aspects of the data that make this prior suitable.

Session info

Question 2: Random Effects Syntax in `brm()`