library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(broom)
library(glmnet)

## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loaded glmnet 4.1-7

Load and process data

data_url <- c(
  "../results/merged_results_gpteval_all.csv"
)
df <- map_df(data_url, read_csv)[, -1]

## New names:
## Rows: 40286 Columns: 11
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (7): filename, model_engine, graph, condition, domain, rubric, collection dbl
## (4): ...1, temperature, score, max_score
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

This creates the data for the model.

factors <-   c("model_engine", "graph", "domain", "temperature", "collection")  
interactions <- c("`model and temp`", "`graph and domain`", "`graph and collection`", "`domain and collection`", "`graph, domain, collection`")

interaction_pattern <-"model and temp|graph and domain|graph and collection|domain and collection|graph, domain, collection"
df_fit <- df %>%
  mutate(
    temperature = factor(temperature, levels=c(0, .5, 1)),
    `model and temp` = interaction(model_engine, temperature),
    `graph and domain` = interaction(graph, domain),
    `graph and collection` = interaction(graph, collection),
    `domain and collection` = interaction(domain, collection),
    `graph, domain, collection` = interaction(graph, domain, collection)
  ) %>% 
  mutate(
    collection = fct_relevel(collection, "Traversal"),
    graph = fct_relevel(graph, c("n7line", "n13line", "n7tree", "n15star", "n21star", "n16cluster")),
    domain = fct_relevel(domain, "ordRooms"),
    model= fct_relevel(model_engine, "replicate-alpaca-7b")
  ) %>%
  select(-filename, -rubric, -condition, -model)

Model selection with elastic net

Using elastic net for model selection. The goal here is to use elastic net to work with elastic net to get a stable set of predictors. We’ll then use these predictors with GLM so we can get p-values and do an analysis of deviance analysis.

# Extract the predictors matrix
X <- model.matrix(~ . , data = select(df_fit, -score, -max_score))

# Create a response vector
Y <- df_fit$score / df_fit$max_score

# Find best lambda via cross-validation
cv_fit <- cv.glmnet(X, Y, alpha = .5, family = "binomial") #
best_lambda <- cv_fit$lambda.min

# Fit the final model
regularized_model <- glmnet(X, Y, alpha = .5, lambda = best_lambda, family = "binomial")

Fitting a logistic regression model

Now that we have the regularized model, we’ll use take the coefficients from the model that have non-zero estimates. However, we’ll also keep the basic (non-interaction) terms in the new model, because we need those for the analysis of deviance.

parse_variable <- Vectorize(function(term){
  groups <- c(interactions, factors)
  for(group in groups){
    if(str_detect(term, group)){
      return(group)
    }
  }
  return(term)
})

delete_variable <- Vectorize(function(term){
  groups <- c(interactions, factors)
  for(group in groups){
    if(str_detect(term, group)){
      return(str_replace(term, group, ""))
    }
  }
  return(term)
})

factor_terms <- colnames(X)[map_lgl(colnames(X), ~ !str_detect(.x, interaction_pattern))]
remaining_terms <- colnames(X)[as.numeric(regularized_model$beta) > 0]
selected_vars <- unique(c(factor_terms, remaining_terms))
selected_model_df <- as.data.frame(X[, selected_vars])
final_model <- glm(Y ~ 0 + ., data=selected_model_df, family=binomial(link="logit"))
fit_summary <- final_model %>%
  tidy %>%
  mutate(
    term = map_chr(term, ~ str_replace_all(.x, "\\\\", "")),
    factor = parse_variable(term),
    level = delete_variable(term),
    level = map_chr(level, ~ str_replace_all(.x, "(?<=[a-zA-Z])\\.(?=[a-zA-Z])", "\\ & ")),
    `odds multiple` = round(exp(estimate), 4),
    estimate = round(estimate, 4),
    p.value = round(p.value, 4),
    `p value` = round(p.value, 4)
  ) %>%
  select(factor, level, estimate, `odds multiple`, `p value`)
  

write_csv(fit_summary, "fit_summary_threeway and temp+model_collection-not-condition_enet.csv")
fit_summary

Generate analysis of deviance table

Finally, generate the analysis of deviance table.

anova_factors <- setdiff(fit_summary$factor, "`(Intercept)`" )
formula_string <- paste(anova_factors, collapse=" + ")
formula_string <- paste0(c("cbind(score, max_score)", formula_string), collapse=" ~ ")
broad_fit <- glm(formula(formula_string), data=df_fit, family=binomial(link="logit"))

fit_anova <- suppressWarnings(anova(broad_fit))
anova_summary = fit_anova %>%
  tidy %>%
  select(term, deviance, df) %>%
  mutate(`p value` = round(1-map2_dbl(deviance, df, pchisq),4)) %>%
  rename(`Chi-squared Stat (Deviance)`=deviance) %>%
  {.[-1, ]}
  
write_csv(anova_summary, "anova_summary_rebuttle.csv")
anova_summary

Statistical analysis, updated at rebuttal time

Load and process data

Model selection with elastic net

Fitting a logistic regression model

Generate analysis of deviance table