0. Objective

Find parsimonious gradient boosting model for PF5.

1. Predict F

1.0 Evaluate Accuracy with Study 5 Data

We use the cleaned data (n=1460) for analysis in this notebook.
In this notebook, F ~= \(\text{ATTN1} + \text{CRT} + \text{Matrices} + \text{3D}\), and C~= \(\text{age} + \text{synonym} + \text{antonym}\).

Correlation between F Score and F Compiste Measured, Compared with C

ggpairs(MP5_data, c("F","ATTN1","crt2_score", "rotsum", "MXsum","Antonym","Synonym","C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

1.0.1 IVs

Complete list of IVs used can be found in this google spreadsheet.
Refer to ML_FACE_MP5.html notebook for more details about IVs, missing data, and imputation.

# Revert scaled variables back to their original scale
MP5_data<-MP5_data%>%
  mutate(crt2_score=crt2_score.OrgScale,
         rotsum=rotsum.OrgScale,
         MXsum=MXsum.OrgScale,
         AGE=AGE.OrgScale,
         numEDU=numEDU.OrgScale,
         numINCOME=numINCOME.OrgScale,
         Antonym=Antonym.OrgScale,
         Synonym=Synonym.OrgScale,
         DOSE_SunkCost=DOSE_SunkCost.OrgScale,
         DOSE_LessMore=DOSE_LessMore.OrgScale,
         DOSE_UnusualDis=DOSE_UnusualDis.OrgScale,
         DOSE_Default=DOSE_Default.OrgScale)



FAM = grep("FAM", names(MP5_data), value = TRUE)
DOSE_SOURCE = grep("DOSE|SOURCE", names(MP5_data), value = TRUE)
TIME.lg = grep("lg", names(MP5_data), value = TRUE)
F_C_TIPI_data = grep("crt|rot|MX|Matrix|3D|Antonym|Synonym|ANT|SYN|TIPI|Extra|Agree|Consc|EmoS|Openn", names(MP5_data), value = TRUE)
IV_DV_data <- grep("TIME_SUNK_|TIME_DISEASE_|TIME_DEFAULT_|TIME_LESSMORE_", names(MP5_data), value = TRUE)

MP5_data<-MP5_data%>% # ensure that factors are entered as factors,
  mutate(
        # ZIP_LW_IV2.recoded = as.factor(ZIP_LW_IV2.recoded),
        day_of_week = as.factor(day_of_week),
         numGENDER = case_when(numGENDER==1 ~ "Female",
                               TRUE ~ "Male"),
         numGENDER = as.factor(numGENDER),
         META_Browser = as.factor(META_Browser),
         META_Browser = relevel(META_Browser, ref = "Chrome"),
         ATTN1 = as.factor(case_when(ATTN1==1~"_Correct",
                                     TRUE ~ "_InCorrect")),
         ATTN2 = as.factor(case_when(ATTN2==1 ~"_Correct",
                                     TRUE ~ "_InCorrect")), # variable hygiene
         CONFUSE = as.factor(CONFUSE),
         TEDIOUS = as.factor(TEDIOUS),
         mobile = as.numeric(mobile),
         mobile=as.factor(case_when(
          is.na(mobile) == T ~ "_No",
          mobile == 0 ~ "_No",
          TRUE ~ "_YES")))%>%
  rename(GENDER = numGENDER) %>%
  separate(META_Resolution, into = c("Width", "Height"), sep = "x") %>%
  mutate(
    Width = as.numeric(Width),
    Height = as.numeric(Height),
    Resolution_Product = Width * Height
  )

columns_to_remove <- c(FAM, TIME.lg,F_C_TIPI_data,IV_DV_data,DOSE_SOURCE)

MP5_data<-MP5_data%>%
  select(-all_of(columns_to_remove))#remove all the familiarity and time.lg data

### Variable Selection for M1 ###

TIME = grep("\\.Submit", names(MP5_data), value = TRUE)
First_click = grep("\\_First.Click", names(MP5_data), value = TRUE)
Last_click = grep("\\_Last.Click", names(MP5_data), value = TRUE)
Total_click = grep("\\_Click.Count", names(MP5_data), value = TRUE)


### Calculate the differences between the first and the lack click times ###

for (i in 1:length(First_click)) {
  first_col <- First_click[i]
  last_col <- Last_click[i]
  diff_col <- paste0(sub("\\_First.Click", "", first_col), "_Click.Time.Diff")

  MP5_data <- MP5_data %>%
    mutate(!!diff_col := !!sym(last_col) - !!sym(first_col))
}

Click_time_diff = grep("\\_Click.Time.Diff", names(MP5_data), value = TRUE)

### DONE ###
na_columns <- sapply(MP5_data, function(x) any(is.na(x)))

# Extract the names of columns that contain NA values
# names(na_columns[na_columns])

# as expected, append next
#  [1] "TIME_DEMCHAR_Page.Submit"     "TIME_DEMCHAR_First.Click"
#  [3] "TIME_DEMCHAR_Last.Click"      "TIME_DEMCHAR_Click.Count"
#  [5] "Q_RelevantIDFraudScore"       "LocationLatitude"
#  [7] "LocationLongitude"            "DEMCHAR_FleschKincaid"
#  [9] "CONFUSEOPEN_FleschKincaid"    "TEDIOUSOPEN_FleschKincaid"
# [11] "TIME_DEMCHAR_Click.Time.Diff"

# MP5_data%>%
#   filter(LocationLongitude ==0 | LocationLatitude ==0) # none

### Additional wrangling to append missing values ###

MP5_data<-MP5_data %>%
  # filter(!is.na(day_tempF) & !is.na(hist_tempF) & !is.na(LW_tempdiff)) %>% # unclear why these are NA values but removed for accurate prediction
  # as discussed in 07/19 meeting, we retain these observations and impute them by 0.
  group_by(Panel) %>%
  mutate(
    Avg_RecaptchaScore = mean(Q_RecaptchaScore, na.rm = TRUE),
    Avg_RelevantIDFraudScore = mean(Q_RelevantIDFraudScore, na.rm = TRUE)
  ) %>%
  ungroup() %>%
  mutate(
    Q_RecaptchaScore = ifelse(is.na(Q_RecaptchaScore), Avg_RecaptchaScore, Q_RecaptchaScore),
    Q_RelevantIDFraudScore = ifelse(is.na(Q_RelevantIDFraudScore), Avg_RelevantIDFraudScore, Q_RelevantIDFraudScore) # append missing Q_RecaptchaScore and Q_RelevantIDFraudScore with Panel average
  ) %>%
  select(-Avg_RecaptchaScore, -Avg_RelevantIDFraudScore)

# NA Check

### DONE ###
### Select Variables of Interest ###
M5_variable = c("Duration..in.seconds.","Q_RecaptchaScore","Q_RelevantIDFraudScore","CONFUSE","TEDIOUS","day_of_week","clock_time","ATTN1","ATTN2","numINCOME","numEDU","GENDER","AGE","mobile","Resolution_Product","LocationLatitude","LocationLongitude","META_Browser","Panel")

FACE=c("F","A","C","E")
IV_DV = c("numSunkCost","numLessMore","numDefault","numDisease","SunkCondition","LessMoreCondition","DiseaseCondition","DefaultCondition")

Text_analysis = grep("(_WordCount|_SentenceCount|_CharacterCount|_TypoCount|_avg.WordRarity|_AvgWordLength|PURPOSEOPEN_keyword_count)", names(MP5_data), value = TRUE)

# is the IV for LW correct?
Data_M5 <- MP5_data %>%
  select(all_of(c(FACE, IV_DV, TIME, First_click, Last_click, Total_click, M5_variable, Click_time_diff,Text_analysis)))
# it is okay...., Text_analysis var are not selected anyways.

na_columns <- sapply(Data_M5, function(x) any(is.na(x)))

# Extract the names of columns that contain NA values
# names(na_columns[na_columns])

# all as expected and documented
# [1] "TIME_DEMCHAR_Page.Submit"     "TIME_DEMCHAR_First.Click"
# [3] "TIME_DEMCHAR_Last.Click"      "TIME_DEMCHAR_Click.Count"
# [5] "TIME_DEMCHAR_Click.Time.Diff" "DEMCHAR_FleschKincaid"
# [7] "CONFUSEOPEN_FleschKincaid"    "TEDIOUSOPEN_FleschKincaid"

Data_M5<-Data_M5%>%
  mutate(across(everything(), ~ replace_na(., 0)))

Data_M5a<-Data_M5%>%
  rowwise() %>%
  mutate(
    avg_first_click = mean(c_across(all_of(grep("First.Click", names(Data_M5), value = TRUE))), na.rm = TRUE),
    avg_last_click = mean(c_across(all_of(grep("Last.Click", names(Data_M5), value = TRUE))), na.rm = TRUE),
    avg_click_count = mean(c_across(all_of(grep("Click.Count", names(Data_M5), value = TRUE))), na.rm = TRUE),
    avg_click_time_diff = mean(c_across(all_of(grep("Click.Time.Diff", names(Data_M5), value = TRUE))), na.rm = TRUE),
    avg_page_submit = mean(c_across(all_of(grep(".Submit", names(Data_M5), value = TRUE))), na.rm = TRUE)
  ) %>%
  ungroup()

### DONE ###

# Data_M5[duplicated(Data_M5), ] #; none, good

### Perform Log Transformation and scaling

# Identify numeric columns and their indices
numeric_indices <- which(sapply(Data_M5a, is.numeric))
log_columns <- names(Data_M5a)[numeric_indices[numeric_indices >= 13]]
scale_columns <- names(Data_M5a)[numeric_indices[numeric_indices >= 13]]

# perform log transformation among only click time, click count, duration data, and resolution product (also right skewed) - need to exclude the scaled items, such as text analysis
log.exclude_columns <- c("LocationLatitude", "LocationLongitude","Q_RecaptchaScore")
log_columns <- setdiff(log_columns, c(log.exclude_columns, Text_analysis))

Data_M5a.log <- Data_M5a %>%
  mutate(
    across(.cols = all_of(log_columns), .fns = ~ log(. + 1)))


Data_M5a.log_z<-Data_M5a.log%>%
  mutate(# Apply log to selected columns, excluding some
    across(.cols = all_of(scale_columns), .fns = scale)  # Apply scale to all numeric columns starting from the 18th
  )
###

### One-hot coding ###
# factor_columns <- sapply(Data_M5a, is.factor)
# factors_in_Data_M5a <- Data_M5a[, factor_columns]
# names(factors_in_Data_M5a)

# install.packages("fastDummies")
library(fastDummies)


process_dataset <- function(data) {
  data <- data %>%
    mutate(
      CONFUSE = relevel(CONFUSE, ref = "No"),
      TEDIOUS = relevel(TEDIOUS, ref = "No"),
      day_of_week = relevel(day_of_week, ref = "Tuesday"),
      ATTN1 = relevel(ATTN1, ref = "_InCorrect"),
      ATTN2 = relevel(ATTN2, ref = "_InCorrect"),
      mobile = relevel(mobile, ref = "_No"),
      GENDER = relevel(GENDER, ref = "Male")
    )
    
  day_mapping <- c("Sunday" = 0, "Monday" = 1, "Tuesday" = 2, "Wednesday" = 3,
                   "Thursday" = 4, "Friday" = 5, "Saturday" = 6)
  data$day_of_week_num.from_Sunday <- day_mapping[data$day_of_week]
  data <- data %>%
    select(-day_of_week)%>%
    select(Panel, everything())

  data_part1 <- as.data.frame(data[, 1:14])
  data_part2 <- as.data.frame(data[, 15:ncol(data)])

  data_part2_dummy_coded <- dummy_cols(data_part2,
                                       remove_first_dummy = TRUE,
                                       remove_selected_columns = TRUE)

  data_dummy_coded <- cbind(data_part1, data_part2_dummy_coded)
  colnames(data_dummy_coded) <- gsub(" ", "_", colnames(data_dummy_coded))

  return(data_dummy_coded)
}

Data_M5a.log_z_dummy_coded <- process_dataset(Data_M5a.log_z)%>%
  mutate(TIME_INTRO_Page.Submit=as.numeric(TIME_INTRO_Page.Submit)) # need to look into this further....
Data_M5a.log_dummy_coded <- process_dataset(Data_M5a.log)

na_columns <- sapply(Data_M5a.log_z_dummy_coded, function(x) any(is.na(x)))

# # Extract the names of columns that contain NA values
# names(na_columns[na_columns]) # NONE
write.csv(Data_M5a.log_dummy_coded,"Data_M5a.log_dummy_coded_20240921.csv")

# class(Data_M5a.log_dummy_coded$TIME_INTRO_Page.Submit)
# class(Data_M5a.log_z_dummy_coded$TIME_INTRO_Page.Submit)
F_measures<-read.csv("./Data Prep for ML/MP5_Full_data_ML_20240908.csv")%>%select(crt2_score,rotsum,MXsum)
Data_M5a.log_z_dummy_coded$crt2_score<-F_measures$crt2_score
Data_M5a.log_z_dummy_coded$rotsum<-F_measures$rotsum
Data_M5a.log_z_dummy_coded$MXsum<-F_measures$MXsum

# Set seed for reproducibility
set.seed(123)

# Split the data into training and test sets
train_M5a <- createDataPartition(Data_M5a.log_z_dummy_coded$F, p = 0.8, list = FALSE)

variables_to_remove.F <- c("A","C","E","numSunkCost","numLessMore","numDefault","numDisease","SunkCondition","LessMoreCondition","DiseaseCondition","DefaultCondition","Panel",Text_analysis,"crt2_score","rotsum","MXsum","Antonym","Synonym")
# We need to remove all the above variables other than F for training and testing; but we'd like to see their relationship with the predicted F

# Create training and testing sets
data.trn_M5a <- as.data.frame(Data_M5a.log_z_dummy_coded[train_M5a, ])
data.tst_M5a <- as.data.frame(Data_M5a.log_z_dummy_coded[-train_M5a, ]) 

# data.tst_M5a[duplicated(rbind(data.trn_M5a, data.tst_M5a))[-(1:nrow(data.trn_M5a))], ] # none, good, no duplicates, so we won't need to worry aboyt data leakage

data.trn_M5a.F <- as.data.frame(data.trn_M5a[, !colnames(data.trn_M5a) %in% variables_to_remove.F])
actual_M5a.F <- data.tst_M5a$F

ctrl <- trainControl(method = "cv", number = 10, summaryFunction = defaultSummary)
tuneGrid <- expand.grid(
  mtry = c(64),
  min.node.size = c(5),
  splitrule = c("variance")
)

# Model_M5a_rf <- train(F ~ ., data = data.trn_M5a.F, method = "ranger",
#                   tuneGrid = tuneGrid, importance = 'impurity', num.trees = 750,
#                   trControl = ctrl)
# # # Save the Random Forest model
# save(Model_M5a_rf, file = "./Saved ML Model and Data/Model_M5a_rf.RData")
load("./Saved ML Model and Data/Model_M5a_rf.RData")

pred_M5a_rf<- predict(Model_M5a_rf,data.tst_M5a)
test_perf.M5a_rf <- postResample(pred_M5a_rf, actual_M5a.F)
data.tst_M5a.rf <- cbind(data.tst_M5a, Predicted_F = pred_M5a_rf)
pred_full_M5a_rf <- predict(Model_M5a_rf, Data_M5a.log_z_dummy_coded)
data.full_M5a.rf<- cbind(Data_M5a.log_z_dummy_coded, Predicted_F = pred_full_M5a_rf)



### DONE with RF



Model_M5a_reg <- train(F ~ ., data = data.trn_M5a.F, method = "lm")
pred_M5a_reg<- predict(Model_M5a_reg,data.tst_M5a)
test_perf.M5a_reg <- postResample(pred_M5a_reg, actual_M5a.F)
data.tst_M5a.reg <- cbind(data.tst_M5a, Predicted_F = pred_M5a_reg)
pred_full_M5a_reg <- predict(Model_M5a_reg, Data_M5a.log_z_dummy_coded)
data.full_M5a.reg<- cbind(Data_M5a.log_z_dummy_coded, Predicted_F = pred_full_M5a_reg)


### DONE training reg Model ###

### Train Elastic Net ###

Model_M5a_net <- train(F ~ ., data = data.trn_M5a.F, method = "glmnet", trControl = ctrl, tuneLength = 40) # 10 fold cv to select the best model, set tune length to 40
pred_M5a_net<- predict(Model_M5a_net,data.tst_M5a)
test_perf.M5a_net <- postResample(pred_M5a_net, actual_M5a.F)
data.tst_M5a.net <- cbind(data.tst_M5a, Predicted_F = pred_M5a_net)
pred_full_M5a_net <- predict(Model_M5a_net, Data_M5a.log_z_dummy_coded)
data.full_M5a.net<- cbind(Data_M5a.log_z_dummy_coded, Predicted_F = pred_full_M5a_net)


tuneGrid.4 <- expand.grid(
  n.trees = c(800),   # Number of trees (boosting iterations)
  interaction.depth = c(4), # Tree depth
  shrinkage = c(0.01), #   shrinkage = c(0.01,0,03,0.05,0.08,0.1); 0,01 best
  n.minobsinnode = c(50)     # Minimum number of observations in terminal nodes
)


# Model_M5a_gbm_4 <- train(F ~ ., data = data.trn_M5a.F, method = "gbm",
#                     tuneGrid = tuneGrid.4, trControl = ctrl,
#                     verbose = FALSE)
# save(Model_M5a_gbm_4, file = "./Saved ML Model and Data/Model_M5a_gbm.RData")
load("./Saved ML Model and Data/Model_M5a_gbm_4_50.RData")
Model_M5a_gbm<-Model_M5a_gbm_4
pred_M5a_gbm <- predict(Model_M5a_gbm, data.tst_M5a)
test_perf.M5a_gbm <- postResample(pred_M5a_gbm, actual_M5a.F)
data.tst_M5a.gbm <- cbind(data.tst_M5a, Predicted_F = pred_M5a_gbm)
pred_full_M5a_gbm <- predict(Model_M5a_gbm, Data_M5a.log_z_dummy_coded)
data.full_M5a.gbm <- cbind(Data_M5a.log_z_dummy_coded, Predicted_F = pred_full_M5a_gbm)
actual_M5a.F_full<-Data_M5a.log_z_dummy_coded$F
test_perf.full_M5a_gbm<- postResample(pred_full_M5a_gbm, actual_M5a.F_full)
test_perf.full_M5a_rf<- postResample(pred_full_M5a_rf, actual_M5a.F_full)
test_perf.full_M5a_reg<- postResample(pred_full_M5a_reg, actual_M5a.F_full)
test_perf.full_M5a_net<- postResample(pred_full_M5a_net, actual_M5a.F_full)
# test_perf.M5a_gbm
# test_perf.full_M5a_rf
# test_perf.full_M5a_reg
# test_perf.full_M5a_net

1.0.2 Model Fit

The following stats come from cross-validation

# for Elastic Net, this section needs a bit more work but the results are only off by slightly. Needs to come back to this

train_perf_M5a_rf <- Model_M5a_rf$results[which.min(Model_M5a_rf$results$RMSE), c("RMSE", "Rsquared", "MAE")]
train_perf_M5a_reg <- Model_M5a_reg$results[which.min(Model_M5a_reg$results$RMSE), c("RMSE", "Rsquared", "MAE")]
train_perf_M5a_net <- Model_M5a_net$results[which.min(Model_M5a_net$results$RMSE), c("RMSE", "Rsquared", "MAE")]
train_perf_M5a_gbm <- Model_M5a_gbm$results[which.min(Model_M5a_gbm$results$RMSE), c("RMSE", "Rsquared", "MAE")]

# Combine training performance metrics into a data frame
performance_metrics_trn <- data.frame(
  Model = factor(c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting"),
                 levels = c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting")),
  MAE = c(as.numeric(train_perf_M5a_reg["MAE"]), as.numeric(train_perf_M5a_net["MAE"]), as.numeric(train_perf_M5a_rf["MAE"]), as.numeric(train_perf_M5a_gbm["MAE"])),
  RMSE = c(as.numeric(train_perf_M5a_reg["RMSE"]), as.numeric(train_perf_M5a_net["RMSE"]), as.numeric(train_perf_M5a_rf["RMSE"]), as.numeric(train_perf_M5a_gbm["RMSE"])),
  Rsquared = c(as.numeric(train_perf_M5a_reg["Rsquared"]), as.numeric(train_perf_M5a_net["Rsquared"]), as.numeric(train_perf_M5a_rf["Rsquared"]), as.numeric(train_perf_M5a_gbm["Rsquared"])))


performance_table_trn <- performance_metrics_trn %>%
  mutate(across(where(is.numeric), round, 4))

# Display the table
kable(performance_table_trn, caption = "Model Fit Statistics") %>%
  kable_styling(full_width = F, position = "center")

Model Fit Statistics
Model	MAE	RMSE	Rsquared
Linear Regression	0.5825	0.7245	0.5824
Elastic Net	0.5576	0.6913	0.6154
Random Forest	0.5250	0.6535	0.6602
Gradient Boosting	0.5141	0.6374	0.6750

###Plot ###

performance_metrics_long_trn <- performance_metrics_trn %>%
  pivot_longer(cols = c(RMSE, Rsquared, MAE), names_to = "Metric", values_to = "Value")

trn_M5a.sd_F <- sd(data.trn_M5a$F)

ggplot(performance_metrics_long_trn, aes(x = Metric, y = Value, fill = Model)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_segment(data = performance_metrics_long_trn %>% filter(Metric == "RMSE"),
               aes(x = 1.6, xend = 2.4, y = trn_M5a.sd_F, yend = trn_M5a.sd_F),
               linetype = "dashed", color = "#FF9999") +
  geom_text(data = performance_metrics_long_trn %>% filter(Metric == "RMSE"),
            aes(x = 2, y = trn_M5a.sd_F, label = paste0("Std.dev of F in Training Data: ", round(trn_M5a.sd_F, 2))),
            vjust = -0.5, color = "#FF9999") +
  geom_text(aes(label = round(Value, 2)), position = position_dodge(width = 0.9), vjust = -0.5) +
  scale_fill_manual(values = c("gray80", "gray50", "gray30", "gray20")) +  # Less colorful palette
  labs(title = "Model Fit Statistics",
       x = "Metric",
       y = "Value") +
  theme_minimal() +
  theme(legend.position = "bottom")

1.0.3 Model Interpretation I

Refer to Section 1.1.1.1 for variable importance when trained with full MP5 data.

1.0.4 Model Interpretation II (SHAP Value for tree-based models)

Refer to Section 1.1.1.2 for variable importance when trained with full MP5 data.

1.0.5 Model Accuracy on Held-out Test Data

Correlation between predicted F using different methods

predicted_f_reg <- data.tst_M5a.reg$Predicted_F
predicted_f_rf <- data.tst_M5a.rf$Predicted_F
predicted_f_gbm <- data.tst_M5a.gbm$Predicted_F 
predicted_f_net <- data.tst_M5a.net$Predicted_F
actual_F<-data.tst_M5a.net$F

# Combine into a data frame
predicted_f <- data.frame(
  Pred_F.Reg = predicted_f_reg,
  Pred_F.RandomForest = predicted_f_rf,
  Pred_F.GradientBoosting = predicted_f_gbm,
  Pred_F.ElasticNet = predicted_f_net,
  F = actual_F
)

ggpairs(predicted_f, c("Pred_F.Reg","Pred_F.ElasticNet","Pred_F.RandomForest","Pred_F.GradientBoosting","F"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

The following metrics pertains to how the model did in predicting held-out data (un-used during model training). Signallings how generalizable the models are.

performance_metrics <- data.frame(
Method = factor(c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting"),
                 levels = c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting")),
  MAE = c(test_perf.M5a_reg["MAE"], test_perf.M5a_net["MAE"], test_perf.M5a_rf["MAE"], test_perf.M5a_gbm["MAE"]),
  RMSE = c(test_perf.M5a_reg["RMSE"], test_perf.M5a_net["RMSE"], test_perf.M5a_rf["RMSE"], test_perf.M5a_gbm["RMSE"]),
  Rsquared = c(test_perf.M5a_reg["Rsquared"], test_perf.M5a_net["Rsquared"], test_perf.M5a_rf["Rsquared"], test_perf.M5a_gbm["Rsquared"])
)



performance_metrics_long <- performance_metrics %>%
  pivot_longer(cols = c(RMSE, Rsquared, MAE), names_to = "Metric", values_to = "Value")


# Create the performance metrics table
performance_table <- performance_metrics %>%
  mutate(across(where(is.numeric), round, 4))

# Display the table
kable(performance_table, caption = "Comparison of Model Performance in Held-Out Test Data") %>%
  kable_styling(full_width = F, position = "center")

Comparison of Model Performance in Held-Out Test Data
Method	MAE	RMSE	Rsquared
Linear Regression	0.5910	0.7424	0.5438
Elastic Net	0.5913	0.7324	0.5501
Random Forest	0.5525	0.6922	0.5994
Gradient Boosting	0.5051	0.6262	0.6760

tst_M5a.sd_F <- sd(data.tst_M5a$F)

ggplot(performance_metrics_long, aes(x = Metric, y = Value, fill = Method)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_segment(data = performance_metrics_long %>% filter(Metric == "RMSE"),
               aes(x = 1.6, xend = 2.4, y = tst_M5a.sd_F, yend = tst_M5a.sd_F),
               linetype = "dashed", color = "#FF9999") +
  geom_text(data = performance_metrics_long %>% filter(Metric == "RMSE"),
            aes(x = 2, y = tst_M5a.sd_F, label = paste0("Std.dev of F in Held-Out Test Data: ", round(tst_M5a.sd_F, 2))),
            vjust = -0.5, color = "#FF9999") +
  geom_text(aes(label = round(Value, 2)), position = position_dodge(width = 0.9), vjust = -0.5) +
  scale_fill_manual(values = c("gray80", "gray50", "gray30", "gray20")) +  # Less colorful palette
  labs(title = "Comparison of Model Performance in Held-Out Testing Data",
       x = "Metric",
       y = "Value") +
  theme_minimal() +
  theme(legend.position = "bottom")

1.0.6 Density and Residual Plots for Predicting F in Held-Out Test Data

Is there heterogeniety in prediction accuracy, as a function of 1] the actual F score or 2] free data?
- 1]: Visual inspection of density and residual plots below to suggest that lower values of F tend to be overestimated and higher values of F tend to be underestimated
- 2] we can build another model to predict the residuals using the same explanatory variables that were used for predicting F.

Histogram of F in Held-Out Test Data

hist(data.tst_M5a$F)

library(gridExtra)
pred_M5a_rf <- predict(Model_M5a_rf, data.tst_M5a)
pred_M5a_reg <- predict(Model_M5a_reg, data.tst_M5a)
pred_M5a_net <- predict(Model_M5a_net, data.tst_M5a)
pred_M5a_gbm <- predict(Model_M5a_gbm, data.tst_M5a)

# Calculate residuals
residuals_rf <- actual_M5a.F - pred_M5a_rf
residuals_reg <- actual_M5a.F - pred_M5a_reg
residuals_net <- actual_M5a.F - pred_M5a_net
residuals_gbm <- actual_M5a.F - pred_M5a_gbm

# Create a data frame with residuals and actual values
residuals_data <- data.frame(
  Actual = actual_M5a.F,
  Pred_RF = pred_M5a_rf,
  Pred_Reg = pred_M5a_reg,
  Pred_NET = pred_M5a_net,
  Pred_GBM = pred_M5a_gbm,
  Residual_RF = residuals_rf,
  Residual_Reg = residuals_reg,
  Residual_NET = residuals_net,
  Residual_GBM = residuals_gbm
)

create_density_plot <- function(predicted, actual, title) {
  ggplot() +
    geom_density(aes(x = predicted, color = "Predicted F"), alpha = 0.4, size = 1) +
    geom_density(aes(x = actual, color = "Actual F"), alpha = 0.4, size = 1) +
    scale_y_continuous(limits = c(0, 1)) +
    scale_color_manual(values = c("Actual F" = "firebrick", "Predicted F" = "darkorange")) +
    labs(title = title, x = "F Score", y = "Density") +
    theme_minimal() +
    theme(legend.position = "bottom", legend.title = element_blank())
}

# Create density plots for each model
density_rf <- create_density_plot(residuals_data$Pred_RF, residuals_data$Actual, "Density Plot for Random Forest")
density_reg <- create_density_plot(residuals_data$Pred_Reg, residuals_data$Actual, "Density Plot for Regression")
density_net <- create_density_plot(residuals_data$Pred_NET, residuals_data$Actual, "Density Plot for Elastic Net")
density_gbm <- create_density_plot(residuals_data$Pred_GBM, residuals_data$Actual, "Density Plot for Gradient Boosting")

# Arrange all density plots in a grid
grid.arrange(density_reg, density_net, density_rf, density_gbm, ncol = 2, nrow = 2)

create_residual_plot <- function(actual, residuals, title) {
  ggplot(residuals_data, aes(x = actual, y = residuals)) +
    geom_point(alpha = 0.4) +
    geom_rug(sides = "b", alpha = 0.2) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
    labs(title = title, x = "Actual F Score", y = "Residuals") +
    theme_minimal() +
    ylim(-2, 2)
}

# Create residual plots for each model
residual_rf <- create_residual_plot(residuals_data$Actual, residuals_data$Residual_RF, "Residuals Plot for Random Forest")
residual_reg <- create_residual_plot(residuals_data$Actual, residuals_data$Residual_Reg, "Residuals Plot for Regression")
residual_net <- create_residual_plot(residuals_data$Actual, residuals_data$Residual_NET, "Residuals Plot for Elastic Net")
residual_gbm <- create_residual_plot(residuals_data$Actual, residuals_data$Residual_GBM, "Residuals Plot for Gradient Boosting")

# Arrange all residual plots in a grid
grid.arrange( residual_reg, residual_net,residual_rf, residual_gbm, ncol = 2, nrow = 2)

1.0.7 Correlation Btw ML Predicted F and other FACE factors

We focus on the test dataset (n=292)
Significance Notation: *** if the p-value is < 0.001, ** if the p-value is < 0.01, * if the p-value is < 0.05, . if the p-value is < 0.10

Linear Regression

ggpairs(data.tst_M5a.reg, c("Predicted_F","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Elastic Net

ggpairs(data.tst_M5a.net, c("Predicted_F","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Random Forest

ggpairs(data.tst_M5a.rf, c("Predicted_F","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Gradient Boosting

ggpairs(data.tst_M5a.gbm, c("Predicted_F","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

GBM: Correlation between PF and F measures and ATTN1

ggpairs(data.tst_M5a.gbm, c("Predicted_F","ATTN1__Correct","crt2_score","MXsum", "rotsum","C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

1.0.8 Explain Heterogenity with Predicted F in Test Sample (20% study 5 Data)

Default Paradigm

Default.PAN <- glm(numDefault ~ DefaultCondition * Panel, data = data.tst_M5a, family = binomial)

Default.PAN_FACE <- glm(numDefault ~ DefaultCondition * F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel , data = data.tst_M5a, family = binomial)


Default.PAN_Pred_F.reg_ACE <- glm(numDefault ~ DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_M5a.reg , family = binomial)

Default.PAN_Pred_F.net_ACE <- glm(numDefault ~  DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_M5a.net, family = binomial)

Default.PAN_Pred_F.rf_ACE <- glm(numDefault ~   DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_M5a.rf, family = binomial)

Default.PAN_Pred_F.gbm_ACE <- glm(numDefault ~   DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_M5a.gbm, family = binomial)

Default_model <- list(
  "Default.PAN" = Default.PAN,
  "Default.PAN_FACE" = Default.PAN_FACE,
  "Default.PAN_Pred_F.reg_ACE" = Default.PAN_Pred_F.reg_ACE,
  "Default.PAN_Pred_F.net_ACE" = Default.PAN_Pred_F.net_ACE,
  "Default.PAN_Pred_F.rf_ACE" = Default.PAN_Pred_F.rf_ACE,
  "Default.PAN_Pred_F.gbm_ACE" = Default.PAN_Pred_F.gbm_ACE
)

Model Results

term_names <- names(coef(Default.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Default_result<-tab_model(Default.PAN, Default.PAN_FACE, Default.PAN_Pred_F.reg_ACE,Default.PAN_Pred_F.net_ACE,Default.PAN_Pred_F.rf_ACE,Default.PAN_Pred_F.gbm_ACE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.F(reg)_ACE", 
                        "PAN_Pred.F(net)_ACE", 
                        "PAN_Pred.F(rf)_ACE", 
                        "PAN_Pred.F(gbm)_ACE"), title = "Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Default_result

Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.F(reg)_ACE				PAN_Pred.F(net)_ACE				PAN_Pred.F(rf)_ACE				PAN_Pred.F(gbm)_ACE
Predictors	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p
(Intercept)	0.96	0.28	-0.15	0.884	0.94	0.28	-0.21	0.837	0.81	0.26	-0.66	0.510	0.82	0.27	-0.62	0.534	0.91	0.29	-0.30	0.762	0.90	0.28	-0.34	0.737
DefaultCondition [OPTOUT]	1.41	0.58	0.83	0.409	1.44	0.61	0.86	0.391	1.71	0.80	1.16	0.248	1.74	0.83	1.15	0.248	1.57	0.74	0.96	0.335	1.49	0.68	0.87	0.382
F					0.87	0.35	-0.35	0.725
A					0.85	0.17	-0.79	0.431	0.75	0.16	-1.36	0.173	0.77	0.16	-1.25	0.210	0.82	0.17	-0.96	0.338	0.82	0.16	-1.01	0.314
C					1.69	0.58	1.53	0.127	1.40	0.33	1.41	0.158	1.42	0.34	1.46	0.145	1.52	0.37	1.69	0.091	1.48	0.38	1.55	0.121
E					0.88	0.16	-0.72	0.474	0.94	0.15	-0.40	0.687	0.93	0.15	-0.44	0.661	0.91	0.15	-0.59	0.558	0.92	0.15	-0.54	0.590
DefaultCondition [OPTOUT] × F					1.35	0.84	0.49	0.627
DefaultCondition [OPTOUT] × A					1.07	0.31	0.24	0.814	1.23	0.37	0.70	0.481	1.21	0.36	0.63	0.526	1.13	0.33	0.43	0.669	1.13	0.33	0.42	0.674
DefaultCondition [OPTOUT] × C					0.68	0.34	-0.77	0.443	0.91	0.31	-0.26	0.792	0.91	0.31	-0.28	0.779	0.86	0.30	-0.43	0.668	0.85	0.31	-0.44	0.658
DefaultCondition [OPTOUT] × E					1.68	0.50	1.74	0.082	1.48	0.38	1.54	0.123	1.48	0.38	1.54	0.123	1.52	0.39	1.64	0.101	1.54	0.40	1.67	0.094
Predicted F									1.42	0.37	1.34	0.179	1.35	0.37	1.10	0.271	1.06	0.32	0.20	0.839	1.12	0.34	0.36	0.718
DefaultCondition [OPTOUT] × Predicted F									0.67	0.26	-1.01	0.311	0.68	0.29	-0.92	0.355	0.84	0.38	-0.39	0.700	0.90	0.42	-0.22	0.827
Observations	292				292				292				292				292				292
AIC	389.079				395.399				393.762				394.331				395.482				395.506
log-Likelihood	-188.540				-183.699				-182.881				-183.165				-183.741				-183.753

Anova and Eta Square

As in the origianl paper, type I anova is used. We first attribute variance to condition, FACE, then Panel main effect, followed by the conditionFACE interaction, and the Panelcondition interaction last.
As in the original paper, Etasq of a variable in linear regression models (LessMore and Sunk) is calculated as the sum of squares explained by the variables divided by the total sum of squares. Etasq of a variable in logistic regression models (Default and Unusual Disease) is calculated as the deviance explained by the model relative to the null model (deviance from the null model divided by the model’s residual deviance).
I validated this pipeline by reproducing the stats reported in the SI for Study 1.
Woudl we like to see the full Anova stats?

calculate_deviance_explained.Default <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for Panel and DefaultCondition:Panel interaction
  dev_cond <- anova_res["DefaultCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DefaultCondition:Panel", "Deviance"]
  
  p_cond <- anova_res["DefaultCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DefaultCondition:Panel", "Pr(>Chi)"]
  
sig_cond <- ifelse(p_cond < 0.001, "***", 
             ifelse(p_cond < 0.01, "**", 
             ifelse(p_cond < 0.05, "*", 
             ifelse(p_cond < 0.1, ".", ""))))

sig_panel <- ifelse(p_panel < 0.001, "***", 
              ifelse(p_panel < 0.01, "**", 
              ifelse(p_panel < 0.05, "*", 
              ifelse(p_panel < 0.1, ".", ""))))

sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                   ifelse(p_interaction < 0.01, "**", 
                   ifelse(p_interaction < 0.05, "*", 
                   ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total) 
  perc_panel <- (dev_panel / dev_total) 
  perc_interaction <- (dev_interaction / dev_total) 
  
  res_cond <- paste0(round(perc_cond , 5),  sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}

# apply the fucntion to all the models in default paradigm
deviance_explained <- sapply(Default_model, calculate_deviance_explained.Default)

# Convert to a data frame for easy viewing
deviance_explained_df.Default <- as.data.frame(t(deviance_explained)) # view how this looks like
colnames(deviance_explained_df.Default) <- c("Condition","Panel", "Default Condition:Panel")

# display
kable(deviance_explained_df.Default,caption = "Etasq Table: Default Paradigm")

Etasq Table: Default Paradigm
	Condition	Panel	Default Condition:Panel
Default.PAN	0.02968***	0.0095	0.02393**
Default.PAN_FACE	0.02968***	0.00305	0.00314
Default.PAN_Pred_F.reg_ACE	0.02968***	0.00282	0.00619
Default.PAN_Pred_F.net_ACE	0.02968***	0.00285	0.00605
Default.PAN_Pred_F.rf_ACE	0.02968***	0.00321	0.00513
Default.PAN_Pred_F.gbm_ACE	0.02968***	0.00287	0.00488

Framing (Unusual Disease) Paradigm

Disease.PAN <- glm(numDisease ~ DiseaseCondition * Panel, data = data.tst_M5a.reg, family = binomial)
Disease.PAN_FACE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E+DiseaseCondition * Panel , data = data.tst_M5a.reg, family = binomial)

Disease.PAN_Pred_F.reg_ACE <- glm(numDisease ~  DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_M5a.reg, family = binomial)

Disease.PAN_Pred_F.net_ACE <- glm(numDisease ~   DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_M5a.net, family = binomial)

Disease.PAN_Pred_F.rf_ACE <- glm(numDisease ~   DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_M5a.rf, family = binomial)

Disease.PAN_Pred_F.gbm_ACE <- glm(numDisease ~  DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E + DiseaseCondition * Panel , data = data.tst_M5a.gbm, family = binomial)


Disease_model <- list(
  "Disease.PAN" = Disease.PAN,
  "Disease.PAN_FACE" = Disease.PAN_FACE,
  "Disease.PAN_Pred_F.reg_ACE" = Disease.PAN_Pred_F.reg_ACE,
  "Disease.PAN_Pred_F.net_ACE" = Disease.PAN_Pred_F.net_ACE,
  "Disease.PAN_Pred_F.rf_ACE" = Disease.PAN_Pred_F.rf_ACE, 
  "Disease.PAN_Pred_F.gbm_ACE" = Disease.PAN_Pred_F.gbm_ACE
)


# extract_aic(Disease_model)

Model Results

term_names <- names(coef(Disease.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Disease_result<-tab_model(Disease.PAN, Disease.PAN_FACE, Disease.PAN_Pred_F.reg_ACE,Disease.PAN_Pred_F.net_ACE,Disease.PAN_Pred_F.rf_ACE,Disease.PAN_Pred_F.gbm_ACE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.F(reg)_ACE", 
                        "PAN_Pred.F(net)_ACE", 
                        "PAN_Pred.F(rf)_ACE", 
                        "PAN_Pred.F(gbm)_ACE"), title = "Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Disease_result

Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.F(reg)_ACE				PAN_Pred.F(net)_ACE				PAN_Pred.F(rf)_ACE				PAN_Pred.F(gbm)_ACE
Predictors	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p
(Intercept)	0.48	0.13	-2.67	0.008	0.44	0.13	-2.80	0.005	0.38	0.12	-2.96	0.003	0.38	0.13	-2.91	0.004	0.40	0.13	-2.81	0.005	0.40	0.13	-2.86	0.004
DiseaseCondition [LOSS]	4.04	1.82	3.09	0.002	4.19	1.97	3.04	0.002	4.37	2.24	2.87	0.004	4.34	2.28	2.79	0.005	3.47	1.81	2.38	0.017	3.78	1.91	2.62	0.009
F					2.42	1.16	1.84	0.065
A					1.19	0.23	0.90	0.369	1.14	0.23	0.65	0.512	1.16	0.23	0.74	0.462	1.18	0.23	0.84	0.399	1.17	0.23	0.80	0.426
C					0.41	0.17	-2.20	0.028	0.63	0.16	-1.78	0.075	0.63	0.17	-1.74	0.082	0.64	0.17	-1.68	0.093	0.61	0.17	-1.78	0.074
E					1.07	0.21	0.33	0.745	0.94	0.16	-0.37	0.713	0.93	0.16	-0.43	0.670	0.91	0.16	-0.55	0.582	0.93	0.16	-0.41	0.680
DiseaseCondition [LOSS] × F					0.92	0.64	-0.12	0.902
DiseaseCondition [LOSS] × A					1.49	0.50	1.17	0.243	1.73	0.58	1.63	0.103	1.71	0.57	1.60	0.109	1.62	0.54	1.44	0.150	1.63	0.55	1.47	0.141
DiseaseCondition [LOSS] × C					1.17	0.65	0.28	0.780	1.16	0.42	0.41	0.683	1.15	0.42	0.37	0.709	0.98	0.37	-0.04	0.966	1.07	0.42	0.18	0.859
DiseaseCondition [LOSS] × E					0.77	0.24	-0.86	0.392	0.72	0.19	-1.23	0.219	0.73	0.19	-1.19	0.234	0.78	0.21	-0.94	0.347	0.75	0.20	-1.04	0.297
Predicted F									1.71	0.54	1.71	0.088	1.66	0.54	1.55	0.121	1.59	0.55	1.33	0.184	1.69	0.62	1.43	0.153
DiseaseCondition [LOSS] × Predicted F									0.65	0.28	-1.00	0.317	0.68	0.31	-0.85	0.395	1.08	0.54	0.16	0.873	0.89	0.46	-0.23	0.815
Observations	292				292				292				292				292				292
AIC	363.491				356.242				359.179				359.740				358.168				358.950
log-Likelihood	-175.745				-164.121				-165.589				-165.870				-165.084				-165.475

Anova and Eta Square

calculate_deviance_explained.Disease <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for DiseaseCondition, Panel, and DiseaseCondition:Panel interaction
  dev_cond <- anova_res["DiseaseCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DiseaseCondition:Panel", "Deviance"]
  
  # Extract p-values
  p_cond <- anova_res["DiseaseCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DiseaseCondition:Panel", "Pr(>Chi)"]
  
  # Determine significance levels
  sig_cond <- ifelse(p_cond < 0.001, "***", 
               ifelse(p_cond < 0.01, "**", 
               ifelse(p_cond < 0.05, "*", 
               ifelse(p_cond < 0.1, ".", ""))))

  sig_panel <- ifelse(p_panel < 0.001, "***", 
                ifelse(p_panel < 0.01, "**", 
                ifelse(p_panel < 0.05, "*", 
                ifelse(p_panel < 0.1, ".", ""))))

  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total)
  perc_panel <- (dev_panel / dev_total)
  perc_interaction <- (dev_interaction / dev_total)
  
  # Combine percentage and significance
  res_cond <- paste0(round(perc_cond, 5), sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}


# Apply the function to each model
deviance_explained <- sapply(Disease_model, calculate_deviance_explained.Disease)

# Convert to a data frame for easy viewing
deviance_explained_df.Disease <- as.data.frame(t(deviance_explained))
colnames(deviance_explained_df.Disease) <- c("Condition","Panel", "DiseaseCondition:Panel")

# Display the results
kable(deviance_explained_df.Disease,caption = "Etasq Table: Disease Paradigm")

Etasq Table: Disease Paradigm
	Condition	Panel	DiseaseCondition:Panel
Disease.PAN	0.03974***	0.03523**	0.01646*
Disease.PAN_FACE	0.03974***	0.0196*	0.00098
Disease.PAN_Pred_F.reg_ACE	0.03974***	0.00683	0.00154
Disease.PAN_Pred_F.net_ACE	0.03974***	0.00643	0.00141
Disease.PAN_Pred_F.rf_ACE	0.03974***	0.0091	8e-04
Disease.PAN_Pred_F.gbm_ACE	0.03974***	0.00913	9e-04

Less is More Paradigm

LessMore.PAN <- lm(numLessMore ~ LessMoreCondition * Panel, data = data.tst_M5a.reg)
LessMore.PAN_FACE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_M5a.reg)
LessMore.PAN_Pred_F.reg_ACE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_M5a.reg)
LessMore.PAN_Pred_F.net_ACE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.tst_M5a.net)
LessMore.PAN_Pred_F.rf_ACE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_M5a.rf)
LessMore.PAN_Pred_F.gbm_ACE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.tst_M5a.gbm)


LessMore_model <- list(
  "LessMore.PAN" = LessMore.PAN,
  "LessMore.PAN_FACE" = LessMore.PAN_FACE,
  "LessMore.PAN_Pred_F.reg_ACE" = LessMore.PAN_Pred_F.reg_ACE,
  "LessMore.PAN_Pred_F.net_ACE" = LessMore.PAN_Pred_F.net_ACE,
  "LessMore.PAN_Pred_F.rf_ACE" = LessMore.PAN_Pred_F.rf_ACE, 
  "LessMore.PAN_Pred_F.gbm_ACE" = LessMore.PAN_Pred_F.gbm_ACE
)

Model Results

term_names <- names(coef(LessMore.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

LessMore_result<-tab_model(LessMore.PAN, LessMore.PAN_FACE, LessMore.PAN_Pred_F.reg_ACE,LessMore.PAN_Pred_F.net_ACE,LessMore.PAN_Pred_F.rf_ACE,LessMore.PAN_Pred_F.gbm_ACE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.F(reg)_ACE", 
                        "PAN_Pred.F(net)_ACE", 
                        "PAN_Pred.F(rf)_ACE", 
                        "PAN_Pred.F(gbm)_ACE"), title = "LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
LessMore_result

LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.F(reg)_ACE				PAN_Pred.F(net)_ACE				PAN_Pred.F(rf)_ACE				PAN_Pred.F(gbm)_ACE
Predictors	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p
(Intercept)	5.51	0.21	25.98	<0.001	5.52	0.21	26.19	<0.001	5.51	0.23	23.51	<0.001	5.52	0.24	23.08	<0.001	5.53	0.23	23.85	<0.001	5.59	0.23	24.74	<0.001
LessMoreCondition [SCARF]	0.76	0.29	2.65	0.009	0.83	0.29	2.89	0.004	0.80	0.31	2.58	0.010	0.74	0.32	2.35	0.019	0.66	0.31	2.11	0.035	0.66	0.30	2.17	0.031
F					-0.03	0.25	-0.11	0.913
A					-0.26	0.14	-1.88	0.062	-0.27	0.14	-1.93	0.055	-0.27	0.14	-1.91	0.057	-0.26	0.14	-1.93	0.054	-0.24	0.14	-1.71	0.088
C					0.16	0.21	0.77	0.442	0.14	0.15	0.94	0.348	0.14	0.15	0.98	0.330	0.15	0.15	0.98	0.326	0.20	0.16	1.27	0.207
E					-0.02	0.12	-0.17	0.863	-0.01	0.10	-0.08	0.940	-0.01	0.10	-0.12	0.901	-0.02	0.10	-0.15	0.880	-0.04	0.10	-0.43	0.669
LessMoreCondition [SCARF] × F					0.38	0.40	0.95	0.345
LessMoreCondition [SCARF] × A					0.51	0.18	2.78	0.006	0.52	0.19	2.79	0.006	0.50	0.19	2.73	0.007	0.47	0.18	2.58	0.010	0.46	0.18	2.51	0.013
LessMoreCondition [SCARF] × C					-0.24	0.32	-0.75	0.452	-0.05	0.22	-0.23	0.819	-0.08	0.22	-0.36	0.717	-0.17	0.23	-0.75	0.453	-0.19	0.24	-0.82	0.414
LessMoreCondition [SCARF] × E					0.05	0.18	0.28	0.780	-0.04	0.15	-0.24	0.807	-0.03	0.15	-0.17	0.866	-0.02	0.15	-0.13	0.898	0.02	0.15	0.11	0.911
Predicted F									0.03	0.19	0.14	0.887	0.00	0.20	0.00	0.998	-0.01	0.21	-0.07	0.943	-0.18	0.22	-0.82	0.412
LessMoreCondition [SCARF] × Predicted F									0.12	0.25	0.48	0.635	0.23	0.27	0.87	0.386	0.47	0.29	1.64	0.102	0.51	0.29	1.73	0.084
Observations	292				292				292				292				292				292
R² / R² adjusted	0.208 / 0.194				0.249 / 0.214				0.247 / 0.212				0.250 / 0.215				0.259 / 0.225				0.254 / 0.220

Anova and Eta Square

calculate_variance_explained_LessMore <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for LessMoreCondition, Panel, and LessMoreCondition:Panel interaction
  ss_condition <- anova_res["LessMoreCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["LessMoreCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["LessMoreCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["LessMoreCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- ss_condition / ss_total
  perc_panel <- ss_panel / ss_total
  perc_interaction <- ss_interaction / ss_total
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}

variance_explained_lm <- sapply(LessMore_model, calculate_variance_explained_LessMore)

# Convert to a data frame for easy viewing
variance_explained_lm_df.LessMore <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.LessMore,caption = "Etasq Table: LessMore Paradigm")

Etasq Table: LessMore Paradigm
	Condition	Panel	LessMoreCondition:Panel
LessMore.PAN	0.08314***	0.12202***	0.00272
LessMore.PAN_FACE	0.08314***	0.04611***	0.00099
LessMore.PAN_Pred_F.reg_ACE	0.08314***	0.05089***	0.00111
LessMore.PAN_Pred_F.net_ACE	0.08314***	0.05157***	0.00089
LessMore.PAN_Pred_F.rf_ACE	0.08314***	0.05388***	0.00094
LessMore.PAN_Pred_F.gbm_ACE	0.08314***	0.05087***	0.0011

Sunk Cost Paradigm

Sunk.PAN <- lm(numSunkCost~ SunkCondition * Panel, data = data.tst_M5a)
Sunk.PAN_FACE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_M5a.reg)
Sunk.PAN_Pred_F.reg_ACE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E+SunkCondition * Panel, data = data.tst_M5a.reg)
Sunk.PAN_Pred_F.net_ACE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_M5a.net)
Sunk.PAN_Pred_F.rf_ACE <- lm(numSunkCost ~ SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E  + SunkCondition * Panel, data = data.tst_M5a.rf)
Sunk.PAN_Pred_F.gbm_ACE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_M5a.gbm)


Sunk_model <- list(
  "Sunk.PAN" = Sunk.PAN,
  "Sunk.PAN_FACE" = Sunk.PAN_FACE,
  "Sunk.PAN_Pred_F.reg_ACE" = Sunk.PAN_Pred_F.reg_ACE,
  "Sunk.PAN_Pred_F.net_ACE" = Sunk.PAN_Pred_F.net_ACE,
  "Sunk.PAN_Pred_F.rf_ACE" = Sunk.PAN_Pred_F.rf_ACE, #0.302 with unscaled
  "Sunk.PAN_Pred_F.gbm_ACE" = Sunk.PAN_Pred_F.gbm_ACE
)

# extract_adjusted_r_squared(Sunk_model)

Full Model Results

term_names <- names(coef(Sunk.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Sunk_result<-tab_model(Sunk.PAN, Sunk.PAN_FACE, Sunk.PAN_Pred_F.reg_ACE,Sunk.PAN_Pred_F.net_ACE,Sunk.PAN_Pred_F.rf_ACE,Sunk.PAN_Pred_F.gbm_ACE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred_F(reg)_ACE", 
                        "PAN_Pred_F(net)_ACE", 
                        "PAN_Pred_F(rf)_ACE", 
                        "PAN_Pred_F(gbm)_ACE"), title = "Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Sunk_result

Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred_F(reg)_ACE				PAN_Pred_F(net)_ACE				PAN_Pred_F(rf)_ACE				PAN_Pred_F(gbm)_ACE
Predictors	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p
(Intercept)	5.51	0.36	15.18	<0.001	5.51	0.36	15.31	<0.001	5.40	0.41	13.25	<0.001	5.40	0.42	12.84	<0.001	5.54	0.41	13.60	<0.001	5.41	0.40	13.50	<0.001
SunkCondition [PAID]	0.14	0.54	0.26	0.792	0.06	0.53	0.12	0.906	0.18	0.58	0.31	0.760	0.18	0.59	0.30	0.764	0.04	0.58	0.06	0.949	0.17	0.57	0.30	0.765
F					0.60	0.49	1.21	0.226
A					0.22	0.24	0.93	0.355	0.23	0.25	0.94	0.348	0.24	0.24	0.98	0.327	0.29	0.24	1.20	0.231	0.24	0.24	0.99	0.323
C					-1.07	0.42	-2.52	0.012	-0.75	0.31	-2.42	0.016	-0.74	0.31	-2.40	0.017	-0.70	0.31	-2.22	0.027	-0.78	0.32	-2.42	0.016
E					0.07	0.24	0.29	0.771	-0.07	0.20	-0.34	0.736	-0.07	0.20	-0.34	0.736	-0.11	0.20	-0.54	0.592	-0.06	0.20	-0.30	0.767
SunkCondition [PAID] × F					0.53	0.73	0.72	0.469
SunkCondition [PAID] × A					-0.50	0.34	-1.46	0.144	-0.40	0.35	-1.15	0.250	-0.40	0.34	-1.16	0.248	-0.45	0.34	-1.31	0.191	-0.41	0.34	-1.20	0.233
SunkCondition [PAID] × C					0.05	0.60	0.08	0.937	0.35	0.42	0.84	0.403	0.36	0.42	0.84	0.399	0.29	0.44	0.67	0.503	0.35	0.45	0.78	0.436
SunkCondition [PAID] × E					0.05	0.33	0.15	0.883	-0.04	0.28	-0.16	0.877	-0.05	0.28	-0.18	0.859	-0.01	0.28	-0.05	0.964	-0.05	0.28	-0.17	0.863
Predicted F									0.21	0.33	0.64	0.522	0.20	0.36	0.57	0.570	-0.02	0.36	-0.07	0.947	0.25	0.37	0.68	0.500
SunkCondition [PAID] × Predicted F									-0.03	0.47	-0.06	0.950	-0.04	0.50	-0.08	0.932	0.22	0.55	0.40	0.690	0.00	0.57	0.00	0.998
Observations	292				292				292				292				292				292
R² / R² adjusted	0.040 / 0.023				0.086 / 0.043				0.070 / 0.026				0.069 / 0.026				0.068 / 0.024				0.070 / 0.027

Anova and Eta Square

calculate_variance_explained_Sunk <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for SunkCondition, Panel, and SunkCondition:Panel interaction
  ss_condition <- anova_res["SunkCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["SunkCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["SunkCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["SunkCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- (ss_condition / ss_total)
  perc_panel <- (ss_panel / ss_total)
  perc_interaction <- (ss_interaction / ss_total)
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 6), sig_condition)
  res_panel <- paste0(round(perc_panel, 6), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 6), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}


variance_explained_lm <- sapply(Sunk_model, calculate_variance_explained_Sunk)

# Convert to a data frame for easy viewing
variance_explained_lm_df.SunkCost <- as.data.frame(t(variance_explained_lm))

colnames(variance_explained_lm_df.SunkCost) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.SunkCost,caption = "Etasq Table: SunkCost Paradigm")

Etasq Table: SunkCost Paradigm
	Condition	Panel	LessMoreCondition:Panel
Sunk.PAN	0.004668	0.025882*	0.009219
Sunk.PAN_FACE	0.004668	0.01253	0.003898
Sunk.PAN_Pred_F.reg_ACE	0.004668	0.026614*	0.006125
Sunk.PAN_Pred_F.net_ACE	0.004668	0.027143*	0.006041
Sunk.PAN_Pred_F.rf_ACE	0.004668	0.027873*	0.005487
Sunk.PAN_Pred_F.gbm_ACE	0.004668	0.022653*	0.00569

Summary

Model Fit

\(R^2\): \(R^2\) for linear regressions, and \(1 − \frac{\log L(\text{model})}{\log L(\text{null model})}\) for logistic regressions (as in the paper)
note that the y-axes for different paradigms are not aligned.

#following antonia's analysis here...
Default.null<-glm(numDefault~1,data.tst_M5a,family=binomial)


R2.Default = as.data.frame(cbind( "R2" = c((1-logLik(Default.PAN)/logLik(Default.null)),
                    (1-logLik(Default.PAN_FACE)/logLik(Default.null)),  
                    (1-logLik(Default.PAN_Pred_F.reg_ACE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_F.net_ACE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_F.rf_ACE)/logLik(Default.null)),
                    (1-logLik(Default.PAN_Pred_F.gbm_ACE)/logLik(Default.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"), 
            "Paradigm" = "Default"))



Disease.null<-glm(numDisease~1,data.tst_M5a,family=binomial)
R2.Disease = as.data.frame(cbind( "R2" = c((1-logLik(Disease.PAN)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_FACE)/logLik(Disease.null)),  
                    (1-logLik(Disease.PAN_Pred_F.reg_ACE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_F.net_ACE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_F.rf_ACE)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_Pred_F.gbm_ACE)/logLik(Disease.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"), 
            "Paradigm" = "Disease"))

R2.LessMore = as.data.frame(cbind( "R2" = c(summary(LessMore.PAN)$r.squared,
                     summary(LessMore.PAN_FACE)$r.squared,  
                     summary(LessMore.PAN_Pred_F.reg_ACE)$r.squared,  
                     summary(LessMore.PAN_Pred_F.net_ACE)$r.squared, 
                     summary(LessMore.PAN_Pred_F.rf_ACE)$r.squared,
                     summary(LessMore.PAN_Pred_F.gbm_ACE )$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"), 
            "Paradigm" = "LessMore"))

R2.Sunk = as.data.frame(cbind( "R2" = c(summary(Sunk.PAN)$r.squared,
                     summary(Sunk.PAN_FACE)$r.squared,  
                     summary(Sunk.PAN_Pred_F.reg_ACE)$r.squared,  
                     summary(Sunk.PAN_Pred_F.net_ACE)$r.squared, 
                     summary(Sunk.PAN_Pred_F.rf_ACE )$r.squared,
                     summary(Sunk.PAN_Pred_F.gbm_ACE )$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"), 
            "Paradigm" = "SunkCost"))




R2<-rbind(R2.Default,R2.Disease,R2.LessMore,R2.Sunk)%>%
  mutate(R2=round(as.numeric(R2),3))

R2$Model <- factor(R2$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.F(reg)+ACE",  
                                   "Pan+Pred.F(net)+ACE", 
                                   "Pan+Pred.F(rf)+ACE",
                                   "Pan+Pred.F(gbm)+ACE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(R2, aes(x = Model, y = R2, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(R^2)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

AIC.Default = as.data.frame(cbind( "AIC" = c(AIC(Default.PAN),
                    AIC(Default.PAN_FACE),  
                    AIC(Default.PAN_Pred_F.reg_ACE), 
                    AIC(Default.PAN_Pred_F.net_ACE), 
                    AIC(Default.PAN_Pred_F.rf_ACE),
                    AIC(Default.PAN_Pred_F.gbm_ACE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PF(reg)+ACE",  "Pan+PF(net)+ACE" , "Pan+PF(rf)+ACE","Pan+PF(gbm)+ACE"), 
            "Paradigm" = "Default"))



AIC.Disease = as.data.frame(cbind( "AIC" = c(AIC(Disease.PAN),
                    AIC(Disease.PAN_FACE),  
                    AIC(Disease.PAN_Pred_F.reg_ACE), 
                    AIC(Disease.PAN_Pred_F.net_ACE), 
                    AIC(Disease.PAN_Pred_F.rf_ACE),
                    AIC(Disease.PAN_Pred_F.gbm_ACE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PF(reg)+ACE",  "Pan+PF(net)+ACE" , "Pan+PF(rf)+ACE","Pan+PF(gbm)+ACE"), 
            "Paradigm" = "Disease"))


AIC<-rbind(AIC.Default,AIC.Disease)%>%
  mutate(AIC=round(as.numeric(AIC),5))

AIC$Model <- factor(AIC$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+PF(reg)+ACE",  
                                   "Pan+PF(net)+ACE", 
                                   "Pan+PF(rf)+ACE",
                                   "Pan+PF(gbm)+ACE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(AIC, aes(x = Model, y = AIC, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(AIC)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")+
  coord_cartesian(ylim = c(300, 400))

R2$R2 <- round(as.numeric(R2$R2), 3)

R2 <- R2 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement <- R2 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))


### Avg. improvement in R2 for each model across all four paradigms, in FULL study 5
Default.null.Study5<-glm(numDefault~1,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.null.Study5<-glm(numDisease~1,Data_M5a.log_z_dummy_coded,family=binomial)
Default.PAN.Study5<-glm(numDefault~DefaultCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.PAN.Study5<-glm(numDisease~DiseaseCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
LessMore.PAN.Study5<-lm(numLessMore~LessMoreCondition*Panel,Data_M5a.log_z_dummy_coded)
Sunk.PAN.Study5<-lm(numSunkCost~SunkCondition*Panel,Data_M5a.log_z_dummy_coded)


Default.PAN_FACE.Study5<-glm(numDefault~DefaultCondition*F+DefaultCondition*A+DefaultCondition*C+DefaultCondition*E+DefaultCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.PAN_FACE.Study5<-glm(numDisease~DiseaseCondition*F+DiseaseCondition*A+DiseaseCondition*C+DiseaseCondition*E+DiseaseCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
LessMore.PAN_FACE.Study5<-lm(numLessMore~LessMoreCondition*F+LessMoreCondition*A+LessMoreCondition*C+LessMoreCondition*E+LessMoreCondition*Panel,Data_M5a.log_z_dummy_coded)
Sunk.PAN_FACE.Study5<-lm(numSunkCost~SunkCondition*F+SunkCondition*A+SunkCondition*C+SunkCondition*E+SunkCondition*Panel,Data_M5a.log_z_dummy_coded)

R2.Default = as.data.frame(cbind( 
  "R2" = c((1-logLik(Default.PAN.Study5)/logLik(Default.null.Study5)),
           (1-logLik(Default.PAN_FACE.Study5)/logLik(Default.null.Study5))),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "Default"
))


R2.Disease = as.data.frame(cbind( 
  "R2" = c((1-logLik(Disease.PAN.Study5)/logLik(Disease.null.Study5)),
           (1-logLik(Disease.PAN_FACE.Study5)/logLik(Disease.null.Study5))),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "Disease"
))

# For LessMore paradigm (linear models)
R2.LessMore = as.data.frame(cbind( 
  "R2" = c(summary(LessMore.PAN.Study5)$r.squared,
           summary(LessMore.PAN_FACE.Study5)$r.squared),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "LessMore"
))

# For SunkCost paradigm (linear models)
R2.Sunk = as.data.frame(cbind( 
  "R2" = c(summary(Sunk.PAN.Study5)$r.squared,
           summary(Sunk.PAN_FACE.Study5)$r.squared),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "SunkCost"
))

R2.FullStudy5 <- rbind(R2.Default, R2.Disease, R2.LessMore, R2.Sunk)
R2.FullStudy5$R2 <- round(as.numeric(R2.FullStudy5$R2), 3)

R2.FullStudy5 <- R2.FullStudy5 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement_full_study_5 <- R2.FullStudy5 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))%>%
  slice(-1) 

avg_improvemnt_combined <- rbind(avg_improvement, avg_improvement_full_study_5)

kable(avg_improvemnt_combined, caption = "Percentage Improvement in R2 (compared to PAN) Across Paradigms in the test sample", digits = 2)

Percentage Improvement in R2 (compared to PAN) Across Paradigms in the test sample
Model	Avg_Improvement
Pan	0.00
Pan+FACE	59.96
Pan+Pred.F(reg)+ACE	49.11
Pan+Pred.F(net)+ACE	47.90
Pan+Pred.F(rf)+ACE	48.54
Pan+Pred.F(gbm)+ACE	48.64
Pan+FACE (Full Study 5)	43.20

F*Cond Model Coefficients

DVs for Default & Framing are 1/0
DVs for LessMore & SunkCost are on the original scale

AlllCoef = as.data.frame(rbind(
cbind(coef(Default.PAN_FACE),confint(Default.PAN_FACE)),
cbind(coef(Default.PAN_Pred_F.reg_ACE),confint(Default.PAN_Pred_F.reg_ACE)),
cbind(coef(Default.PAN_Pred_F.net_ACE),confint(Default.PAN_Pred_F.net_ACE)),
cbind(coef(Default.PAN_Pred_F.rf_ACE),confint(Default.PAN_Pred_F.rf_ACE)),
cbind(coef(Default.PAN_Pred_F.gbm_ACE),confint(Default.PAN_Pred_F.gbm_ACE)),
cbind(coef(Default.PAN_FACE.Study5),confint(Default.PAN_FACE.Study5)),

cbind(coef(Disease.PAN_FACE),confint(Disease.PAN_FACE)),
cbind(coef(Disease.PAN_Pred_F.reg_ACE),confint(Disease.PAN_Pred_F.reg_ACE)),
cbind(coef(Disease.PAN_Pred_F.net_ACE),confint(Disease.PAN_Pred_F.net_ACE)),
cbind(coef(Disease.PAN_Pred_F.rf_ACE),confint(Disease.PAN_Pred_F.rf_ACE)),
cbind(coef(Disease.PAN_Pred_F.gbm_ACE),confint(Disease.PAN_Pred_F.gbm_ACE)),
cbind(coef(Disease.PAN_FACE.Study5),confint(Disease.PAN_FACE.Study5)),

cbind(coef(LessMore.PAN_FACE),confint(LessMore.PAN_FACE)),
cbind(coef(LessMore.PAN_Pred_F.reg_ACE),confint(LessMore.PAN_Pred_F.reg_ACE)),
cbind(coef(LessMore.PAN_Pred_F.net_ACE),confint(LessMore.PAN_Pred_F.net_ACE)),
cbind(coef(LessMore.PAN_Pred_F.rf_ACE),confint(LessMore.PAN_Pred_F.rf_ACE)),
cbind(coef(LessMore.PAN_Pred_F.gbm_ACE),confint(LessMore.PAN_Pred_F.gbm_ACE)),
cbind(coef(LessMore.PAN_FACE.Study5),confint(LessMore.PAN_FACE.Study5)),

cbind(coef(Sunk.PAN_FACE),confint(Sunk.PAN_FACE)),
cbind(coef(Sunk.PAN_Pred_F.reg_ACE),confint(Sunk.PAN_Pred_F.reg_ACE)),
cbind(coef(Sunk.PAN_Pred_F.net_ACE),confint(Sunk.PAN_Pred_F.net_ACE)),
cbind(coef(Sunk.PAN_Pred_F.rf_ACE),confint(Sunk.PAN_Pred_F.rf_ACE)),
cbind(coef(Sunk.PAN_Pred_F.gbm_ACE),confint(Sunk.PAN_Pred_F.gbm_ACE)),
cbind(coef(Sunk.PAN_FACE.Study5),confint(Sunk.PAN_FACE.Study5))))

AlllCoef <- AlllCoef[!grepl("Panel", rownames(AlllCoef)), ]


AlllCoef$type = factor(rep(c("Intercept", " Condition(Treat)","F", "A", "C", "E","F: Condition(Treat)", "A: Condition(Treat)", "C: Condition(Treat)", "E: Condition(Treat)"), 24))

AlllCoef$Paradigm = factor((rep(c("Default","Framing","Less is Better","Sunk Cost"), each=60)),levels = rev(c("Default","Framing","Less is Better","Sunk Cost")))

AlllCoef$Model = factor(rep(rep(c("FACE","PF(reg) + ACE","PF(net) + ACE","PF(rf) + ACE","PF(gbm) + ACE","FACE (Full Study 5)"), each = 10),4), levels = rev(c("FACE","PF(reg) + ACE","PF(net) + ACE","PF(rf) + ACE","PF(gbm) + ACE","FACE (Full Study 5)")))

colnames(AlllCoef)[1:3] = c("Beta","low", "high")

allcoefplot = ggplot(AlllCoef[(AlllCoef$type %in% c("F: Condition(Treat)")),], aes( y = Beta, ymin= low, ymax= high, x = Paradigm, fill = Model, color = Model))+
  geom_bar(stat = "identity", position = "dodge",alpha= .25)+
  geom_pointrange(position = position_dodge(width = .8))+
  coord_flip()+
  xlab("")+
  theme_bw()+
  theme(strip.background = element_blank(), text = element_text(size = 12)) +
  facet_grid(~type)

allcoefplot

Etasq

### Study 5 Results ###
Default_model.Study5<-list(
  "Default.PAN.Study5" = Default.PAN.Study5,
  "Default.PAN_FACE.Study5" = Default.PAN_FACE.Study5)
Disease_model.Study5<-list(
  "Disease.PAN.Study5" = Disease.PAN.Study5,
  "Disease.PAN_FACE.Study5" = Disease.PAN_FACE.Study5)

LessMore_model.Study5<-list(
  "LessMore.PAN.Study5" = LessMore.PAN.Study5,
  "LessMore.PAN_FACE.Study5" = LessMore.PAN_FACE.Study5)


Sunk_model.Study5<-list(
  "Sunk.PAN.Study5" = Sunk.PAN.Study5,
  "Sunk.PAN_FACE.Study5" = Sunk.PAN_FACE.Study5)

deviance_explained <- sapply(Default_model.Study5, calculate_deviance_explained.Default)
deviance_explained_df.Default.Study5 <- as.data.frame(t(deviance_explained)) 
colnames(deviance_explained_df.Default.Study5) <- c("Condition","Panel", "Default Condition:Panel")

deviance_explained <- sapply(Disease_model.Study5, calculate_deviance_explained.Disease)
deviance_explained_df.Disease.Study5 <- as.data.frame(t(deviance_explained)) 
colnames(deviance_explained_df.Disease.Study5) <- c("Condition","Panel", "Disease Condition:Panel")

variance_explained_lm <- sapply(LessMore_model.Study5, calculate_variance_explained_LessMore)
variance_explained_lm_df.LessMore.Study5 <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore.Study5) <- c("Condition","Panel", "LessMoreCondition:Panel")

variance_explained_lm <- sapply(Sunk_model.Study5, calculate_variance_explained_Sunk)
variance_explained_lm_df.SunkCost.Study5 <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.SunkCost.Study5) <- c("Condition","Panel", "LessMoreCondition:Panel")

# label paradigm
deviance_explained_df.Default$Paradigm <- "Default"
deviance_explained_df.Default.Study5$Paradigm <- "Default"

deviance_explained_df.Disease$Paradigm <- "Disease"
deviance_explained_df.Disease.Study5$Paradigm <- "Disease"

variance_explained_lm_df.LessMore$Paradigm <- "LessMore"
variance_explained_lm_df.LessMore.Study5$Paradigm <- "LessMore"

variance_explained_lm_df.SunkCost$Paradigm <- "SunkCost"
variance_explained_lm_df.SunkCost.Study5$Paradigm <- "SunkCost"


Model <- c("Pan", "Pan+FACE", "Pan+Pred.F(reg)+ACE", "Pan+Pred.F(net)+ACE", "Pan+Pred.F(rf)+ACE", "Pan+Pred.F(gbm)+ACE")
Model.Study5 <- c("Pan (Full Study5)", "Pan+FACE (Full Study5)")
deviance_explained_df.Default$Model<-Model
deviance_explained_df.Disease$Model<-Model
variance_explained_lm_df.LessMore$Model<-Model
variance_explained_lm_df.SunkCost$Model<-Model

deviance_explained_df.Default.Study5$Model<-Model.Study5
deviance_explained_df.Disease.Study5$Model<-Model.Study5
variance_explained_lm_df.LessMore.Study5$Model<-Model.Study5
variance_explained_lm_df.SunkCost.Study5$Model<-Model.Study5

colnames(deviance_explained_df.Default)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost)[3] <- "Condition:Panel"

colnames(deviance_explained_df.Default.Study5)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost.Study5)[3] <- "Condition:Panel"


etasq <- rbind(deviance_explained_df.Default, 
               deviance_explained_df.Default.Study5,
               deviance_explained_df.Disease, 
               deviance_explained_df.Disease.Study5,
               variance_explained_lm_df.LessMore, 
               variance_explained_lm_df.LessMore.Study5,
               variance_explained_lm_df.SunkCost,
               variance_explained_lm_df.SunkCost.Study5)


etasq$Model <- factor(etasq$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.F(reg)+ACE",  
                                   "Pan+Pred.F(net)+ACE", 
                                   "Pan+Pred.F(rf)+ACE",
                                   "Pan+Pred.F(gbm)+ACE",
                                   "Pan (Full Study5)", 
                                   "Pan+FACE (Full Study5)"),
                        ordered = TRUE)

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$Panel)  # Extract the significance symbols
  df$Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$Panel))
  return(df)
}

etasq.pan<-split_value_and_sig(etasq)

ggplot(etasq.pan, aes(x = Model, y = Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$`Condition:Panel`)  # Extract the significance symbols
  df$Condition_Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$`Condition:Panel`))
  return(df)
}

etasq.pan_cond<-split_value_and_sig(etasq)

ggplot(etasq.pan_cond, aes(x = Model, y = Condition_Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel*cond") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

# junk data??? really unstable;; perhaps to look at this meaningfully, we need 20% of the data to equally come from each of the 3 panels.
 # table(data.tst_M5a$Panel)
 #  it looks fine though
 #   Lucid    MTurk Prolific 
 #     100      101       91 


etasq_study5 <- etasq.pan_cond %>%
  filter(Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))

etasq_study5 <- etasq_study5 %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan (Full Study5)"] - Condition_Panel) / Condition_Panel[Model == "Pan (Full Study5)"] * 100)


etasq_study5.test <- etasq.pan_cond %>%
  filter(!Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))
etasq_study5.test <- etasq_study5.test %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan"] - Condition_Panel) / Condition_Panel[Model == "Pan"] * 100)

etasq.pan_cond<-rbind(etasq_study5,etasq_study5.test)

avg_reduction <- etasq.pan_cond %>%
  group_by(Model) %>%
  summarize(Avg_Reduction = mean(Reduction, na.rm = TRUE))

  
kable(avg_reduction, caption = "Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in the test sample", digits = 2)

Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in the test sample
Model	Avg_Reduction
Pan	0.00
Pan+FACE	75.56
Pan+Pred.F(reg)+ACE	64.38
Pan+Pred.F(net)+ACE	66.98
Pan+Pred.F(rf)+ACE	69.91
Pan+Pred.F(gbm)+ACE	67.99
Pan (Full Study5)	0.00
Pan+FACE (Full Study5)	74.19

1.0.9 Explain Heterogenity with Predicted F in Full MP5 data

Random forest performs best when predicting the full Study 5 data, outperforming GBM. This could be because while we had made GBM more parsimounious, we haven’t done so with random forest.

1.0.9.1 Accuracy in full MP5 data

performance_metrics <- data.frame(
Method = factor(c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting"),
                 levels = c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting")),
  MAE = c(test_perf.full_M5a_reg["MAE"], test_perf.full_M5a_net["MAE"], test_perf.full_M5a_rf["MAE"], test_perf.M5a_gbm["MAE"]),
  RMSE = c(test_perf.full_M5a_reg["RMSE"], test_perf.full_M5a_net["RMSE"], test_perf.full_M5a_rf["RMSE"], test_perf.M5a_gbm["RMSE"]),
  Rsquared = c(test_perf.full_M5a_reg["Rsquared"], test_perf.full_M5a_net["Rsquared"], test_perf.full_M5a_rf["Rsquared"], test_perf.full_M5a_gbm["Rsquared"])
)



performance_metrics_long <- performance_metrics %>%
  pivot_longer(cols = c(RMSE, Rsquared, MAE), names_to = "Metric", values_to = "Value")


# Create the performance metrics table
performance_table <- performance_metrics %>%
  mutate(across(where(is.numeric), round, 4))

# Display the table
kable(performance_table, caption = "Comparison of Model Performance in Held-Out Test Data") %>%
  kable_styling(full_width = F, position = "center")

Comparison of Model Performance in Held-Out Test Data
Method	MAE	RMSE	Rsquared
Linear Regression	0.5392	0.6736	0.6300
Elastic Net	0.5499	0.6822	0.6211
Random Forest	0.2748	0.3866	0.8885
Gradient Boosting	0.5051	0.6262	0.7305

tst_M5a.sd_F <- sd(data.tst_M5a$F)

ggplot(performance_metrics_long, aes(x = Metric, y = Value, fill = Method)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_segment(data = performance_metrics_long %>% filter(Metric == "RMSE"),
               aes(x = 1.6, xend = 2.4, y = tst_M5a.sd_F, yend = tst_M5a.sd_F),
               linetype = "dashed", color = "#FF9999") +
  geom_text(data = performance_metrics_long %>% filter(Metric == "RMSE"),
            aes(x = 2, y = tst_M5a.sd_F, label = paste0("Std.dev of F in Held-Out Test Data: ", round(tst_M5a.sd_F, 2))),
            vjust = -0.5, color = "#FF9999") +
  geom_text(aes(label = round(Value, 2)), position = position_dodge(width = 0.9), vjust = -0.5) +
  scale_fill_manual(values = c("gray80", "gray50", "gray30", "gray20")) +  # Less colorful palette
  labs(title = "Comparison of Model Performance in Full Study 5 Data",
       x = "Metric",
       y = "Value") +
  theme_minimal() +
  theme(legend.position = "bottom")

# do this by panel?

Default Paradigm

Default.PAN <- glm(numDefault ~ DefaultCondition * Panel, data = data.full_M5a.gbm, family = binomial)

Default.PAN_FACE <- glm(numDefault ~ DefaultCondition * F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel , data = data.full_M5a.gbm, family = binomial)


Default.PAN_Pred_F.reg_ACE <- glm(numDefault ~ DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel, data = data.full_M5a.reg , family = binomial)

Default.PAN_Pred_F.net_ACE <- glm(numDefault ~  DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel, data = data.full_M5a.net, family = binomial)

Default.PAN_Pred_F.rf_ACE <- glm(numDefault ~   DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel, data = data.full_M5a.rf, family = binomial)

Default.PAN_Pred_F.gbm_ACE <- glm(numDefault ~   DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel, data = data.full_M5a.gbm, family = binomial)

Default_model <- list(
  "Default.PAN" = Default.PAN,
  "Default.PAN_FACE" = Default.PAN_FACE,
  "Default.PAN_Pred_F.reg_ACE" = Default.PAN_Pred_F.reg_ACE,
  "Default.PAN_Pred_F.net_ACE" = Default.PAN_Pred_F.net_ACE,
  "Default.PAN_Pred_F.rf_ACE" = Default.PAN_Pred_F.rf_ACE,
  "Default.PAN_Pred_F.gbm_ACE" = Default.PAN_Pred_F.gbm_ACE
)

Model Results

term_names <- names(coef(Default.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Default_result<-tab_model(Default.PAN, Default.PAN_FACE, Default.PAN_Pred_F.reg_ACE,Default.PAN_Pred_F.net_ACE,Default.PAN_Pred_F.rf_ACE,Default.PAN_Pred_F.gbm_ACE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.F(reg)_ACE", 
                        "PAN_Pred.F(net)_ACE", 
                        "PAN_Pred.F(rf)_ACE", 
                        "PAN_Pred.F(gbm)_ACE"), title = "Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Default_result

Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.F(reg)_ACE				PAN_Pred.F(net)_ACE				PAN_Pred.F(rf)_ACE				PAN_Pred.F(gbm)_ACE
Predictors	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p
(Intercept)	1.05	0.13	0.38	0.703	1.05	0.14	0.38	0.702	0.96	0.13	-0.29	0.768	0.96	0.13	-0.31	0.754	1.03	0.14	0.24	0.809	1.00	0.14	0.00	0.997
DefaultCondition [OPTOUT]	1.74	0.32	3.01	0.003	1.76	0.33	3.02	0.003	2.04	0.41	3.53	<0.001	2.09	0.43	3.57	<0.001	1.78	0.34	2.97	0.003	1.89	0.37	3.24	0.001
F					1.02	0.19	0.13	0.894
A					0.95	0.08	-0.54	0.590	0.90	0.08	-1.20	0.229	0.91	0.08	-1.11	0.268	0.94	0.08	-0.70	0.482	0.92	0.08	-0.97	0.332
C					1.72	0.25	3.78	<0.001	1.62	0.17	4.49	<0.001	1.64	0.18	4.57	<0.001	1.68	0.20	4.26	<0.001	1.63	0.18	4.36	<0.001
E					1.04	0.09	0.40	0.686	1.08	0.08	1.02	0.306	1.07	0.08	0.95	0.342	1.05	0.08	0.62	0.534	1.07	0.08	0.86	0.389
DefaultCondition [OPTOUT] × F					1.25	0.34	0.83	0.408
DefaultCondition [OPTOUT] × A					0.84	0.11	-1.32	0.188	0.94	0.12	-0.45	0.653	0.94	0.12	-0.47	0.637	0.88	0.12	-1.00	0.317	0.91	0.12	-0.70	0.481
DefaultCondition [OPTOUT] × C					0.62	0.13	-2.29	0.022	0.78	0.12	-1.60	0.109	0.78	0.12	-1.62	0.105	0.72	0.13	-1.84	0.066	0.77	0.12	-1.63	0.104
DefaultCondition [OPTOUT] × E					1.09	0.14	0.68	0.499	0.95	0.11	-0.43	0.667	0.95	0.11	-0.43	0.668	1.01	0.12	0.10	0.918	0.98	0.11	-0.20	0.838
Predicted F									1.32	0.17	2.18	0.029	1.30	0.17	1.94	0.052	1.11	0.19	0.61	0.540	1.24	0.18	1.51	0.130
DefaultCondition [OPTOUT] × Predicted F									0.66	0.12	-2.21	0.027	0.65	0.13	-2.18	0.029	0.92	0.22	-0.34	0.735	0.74	0.16	-1.41	0.158
Observations	1460				1460				1460				1460				1460				1460
AIC	1884.213				1858.463				1854.228				1854.838				1859.655				1857.472
log-Likelihood	-936.107				-915.232				-913.114				-913.419				-915.828				-914.736

Anova and Eta Square

As in the origianl paper, type I anova is used. We first attribute variance to condition, FACE, then Panel main effect, followed by the conditionFACE interaction, and the Panelcondition interaction last.
As in the original paper, Etasq of a variable in linear regression models (LessMore and Sunk) is calculated as the sum of squares explained by the variables divided by the total sum of squares. Etasq of a variable in logistic regression models (Default and Unusual Disease) is calculated as the deviance explained by the model relative to the null model (deviance from the null model divided by the model’s residual deviance).
I validated this pipeline by reproducing the stats reported in the SI for Study 1.
Woudl we like to see the full Anova stats?

calculate_deviance_explained.Default <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for Panel and DefaultCondition:Panel interaction
  dev_cond <- anova_res["DefaultCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DefaultCondition:Panel", "Deviance"]
  
  p_cond <- anova_res["DefaultCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DefaultCondition:Panel", "Pr(>Chi)"]
  
sig_cond <- ifelse(p_cond < 0.001, "***", 
             ifelse(p_cond < 0.01, "**", 
             ifelse(p_cond < 0.05, "*", 
             ifelse(p_cond < 0.1, ".", ""))))

sig_panel <- ifelse(p_panel < 0.001, "***", 
              ifelse(p_panel < 0.01, "**", 
              ifelse(p_panel < 0.05, "*", 
              ifelse(p_panel < 0.1, ".", ""))))

sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                   ifelse(p_interaction < 0.01, "**", 
                   ifelse(p_interaction < 0.05, "*", 
                   ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total) 
  perc_panel <- (dev_panel / dev_total) 
  perc_interaction <- (dev_interaction / dev_total) 
  
  res_cond <- paste0(round(perc_cond , 5),  sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}

# apply the fucntion to all the models in default paradigm
deviance_explained <- sapply(Default_model, calculate_deviance_explained.Default)

# Convert to a data frame for easy viewing
deviance_explained_df.Default <- as.data.frame(t(deviance_explained)) # view how this looks like
colnames(deviance_explained_df.Default) <- c("Condition","Panel", "Default Condition:Panel")

# display
kable(deviance_explained_df.Default,caption = "Etasq Table: Default Paradigm")

Etasq Table: Default Paradigm
	Condition	Panel	Default Condition:Panel
Default.PAN	0.03801***	0.00298.	0.01188***
Default.PAN_FACE	0.03801***	0.00051	0.00159
Default.PAN_Pred_F.reg_ACE	0.03801***	0.00021	0.00551**
Default.PAN_Pred_F.net_ACE	0.03801***	0.00013	0.00546**
Default.PAN_Pred_F.rf_ACE	0.03801***	0.00013	0.00308*
Default.PAN_Pred_F.gbm_ACE	0.03801***	0.00019	0.00462*

Framing (Unusual Disease) Paradigm

Disease.PAN <- glm(numDisease ~ DiseaseCondition * Panel, data = data.full_M5a.reg, family = binomial)
Disease.PAN_FACE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E+DiseaseCondition * Panel , data = data.full_M5a.reg, family = binomial)

Disease.PAN_Pred_F.reg_ACE <- glm(numDisease ~  DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.full_M5a.reg, family = binomial)

Disease.PAN_Pred_F.net_ACE <- glm(numDisease ~   DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.full_M5a.net, family = binomial)

Disease.PAN_Pred_F.rf_ACE <- glm(numDisease ~   DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.full_M5a.rf, family = binomial)

Disease.PAN_Pred_F.gbm_ACE <- glm(numDisease ~  DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E + DiseaseCondition * Panel , data = data.full_M5a.gbm, family = binomial)


Disease_model <- list(
  "Disease.PAN" = Disease.PAN,
  "Disease.PAN_FACE" = Disease.PAN_FACE,
  "Disease.PAN_Pred_F.reg_ACE" = Disease.PAN_Pred_F.reg_ACE,
  "Disease.PAN_Pred_F.net_ACE" = Disease.PAN_Pred_F.net_ACE,
  "Disease.PAN_Pred_F.rf_ACE" = Disease.PAN_Pred_F.rf_ACE, 
  "Disease.PAN_Pred_F.gbm_ACE" = Disease.PAN_Pred_F.gbm_ACE
)


# extract_aic(Disease_model)

Model Results

term_names <- names(coef(Disease.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Disease_result<-tab_model(Disease.PAN, Disease.PAN_FACE, Disease.PAN_Pred_F.reg_ACE,Disease.PAN_Pred_F.net_ACE,Disease.PAN_Pred_F.rf_ACE,Disease.PAN_Pred_F.gbm_ACE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.F(reg)_ACE", 
                        "PAN_Pred.F(net)_ACE", 
                        "PAN_Pred.F(rf)_ACE", 
                        "PAN_Pred.F(gbm)_ACE"), title = "Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Disease_result

Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.F(reg)_ACE				PAN_Pred.F(net)_ACE				PAN_Pred.F(rf)_ACE				PAN_Pred.F(gbm)_ACE
Predictors	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p
(Intercept)	0.45	0.06	-5.84	<0.001	0.43	0.06	-6.03	<0.001	0.46	0.07	-5.22	<0.001	0.46	0.07	-5.03	<0.001	0.43	0.06	-5.73	<0.001	0.44	0.07	-5.53	<0.001
DiseaseCondition [LOSS]	3.40	0.64	6.46	<0.001	3.69	0.72	6.73	<0.001	3.18	0.66	5.59	<0.001	3.07	0.65	5.33	<0.001	3.45	0.69	6.17	<0.001	3.28	0.67	5.84	<0.001
F					1.07	0.23	0.31	0.755
A					1.40	0.14	3.34	0.001	1.45	0.15	3.59	<0.001	1.45	0.15	3.62	<0.001	1.42	0.15	3.40	0.001	1.43	0.15	3.47	0.001
C					0.73	0.12	-1.83	0.067	0.79	0.09	-1.97	0.048	0.80	0.09	-1.93	0.054	0.77	0.11	-1.88	0.060	0.78	0.10	-1.99	0.047
E					0.89	0.09	-1.14	0.256	0.85	0.08	-1.77	0.077	0.85	0.08	-1.81	0.071	0.87	0.08	-1.46	0.144	0.87	0.08	-1.61	0.108
DiseaseCondition [LOSS] × F					1.21	0.35	0.65	0.513
DiseaseCondition [LOSS] × A					1.03	0.14	0.23	0.820	0.98	0.14	-0.11	0.913	0.98	0.14	-0.15	0.883	1.00	0.14	-0.01	0.995	0.98	0.14	-0.14	0.889
DiseaseCondition [LOSS] × C					1.26	0.28	1.06	0.290	1.25	0.20	1.37	0.169	1.24	0.20	1.34	0.180	1.19	0.22	0.94	0.345	1.19	0.20	1.06	0.288
DiseaseCondition [LOSS] × E					1.04	0.14	0.27	0.791	1.06	0.13	0.50	0.616	1.07	0.13	0.56	0.577	1.07	0.13	0.52	0.604	1.07	0.13	0.58	0.563
Predicted F									0.87	0.12	-1.00	0.318	0.85	0.12	-1.13	0.260	0.98	0.19	-0.13	0.898	0.93	0.15	-0.46	0.648
DiseaseCondition [LOSS] × Predicted F									1.51	0.30	2.08	0.037	1.59	0.33	2.24	0.025	1.48	0.39	1.51	0.132	1.63	0.36	2.19	0.029
Observations	1460				1460				1460				1460				1460				1460
AIC	1762.505				1721.769				1718.733				1718.139				1719.185				1716.082
log-Likelihood	-875.252				-846.884				-845.367				-845.070				-845.592				-844.041

Anova and Eta Square

calculate_deviance_explained.Disease <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for DiseaseCondition, Panel, and DiseaseCondition:Panel interaction
  dev_cond <- anova_res["DiseaseCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DiseaseCondition:Panel", "Deviance"]
  
  # Extract p-values
  p_cond <- anova_res["DiseaseCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DiseaseCondition:Panel", "Pr(>Chi)"]
  
  # Determine significance levels
  sig_cond <- ifelse(p_cond < 0.001, "***", 
               ifelse(p_cond < 0.01, "**", 
               ifelse(p_cond < 0.05, "*", 
               ifelse(p_cond < 0.1, ".", ""))))

  sig_panel <- ifelse(p_panel < 0.001, "***", 
                ifelse(p_panel < 0.01, "**", 
                ifelse(p_panel < 0.05, "*", 
                ifelse(p_panel < 0.1, ".", ""))))

  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total)
  perc_panel <- (dev_panel / dev_total)
  perc_interaction <- (dev_interaction / dev_total)
  
  # Combine percentage and significance
  res_cond <- paste0(round(perc_cond, 5), sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}


# Apply the function to each model
deviance_explained <- sapply(Disease_model, calculate_deviance_explained.Disease)

# Convert to a data frame for easy viewing
deviance_explained_df.Disease <- as.data.frame(t(deviance_explained))
colnames(deviance_explained_df.Disease) <- c("Condition","Panel", "DiseaseCondition:Panel")

# Display the results
kable(deviance_explained_df.Disease,caption = "Etasq Table: Disease Paradigm")

Etasq Table: Disease Paradigm
	Condition	Panel	DiseaseCondition:Panel
Disease.PAN	0.05317***	0.03246***	0.01394***
Disease.PAN_FACE	0.05317***	0.00514**	0.00395*
Disease.PAN_Pred_F.reg_ACE	0.05317***	0.00525**	0.00424*
Disease.PAN_Pred_F.net_ACE	0.05317***	0.00516**	0.00436*
Disease.PAN_Pred_F.rf_ACE	0.05317***	0.00601**	0.00443*
Disease.PAN_Pred_F.gbm_ACE	0.05317***	0.00635**	0.0045*

Less is More Paradigm

LessMore.PAN <- lm(numLessMore ~ LessMoreCondition * Panel, data = data.full_M5a.reg)
LessMore.PAN_FACE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.full_M5a.reg)
LessMore.PAN_Pred_F.reg_ACE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.full_M5a.reg)
LessMore.PAN_Pred_F.net_ACE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.full_M5a.net)
LessMore.PAN_Pred_F.rf_ACE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.full_M5a.rf)
LessMore.PAN_Pred_F.gbm_ACE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.full_M5a.gbm)


LessMore_model <- list(
  "LessMore.PAN" = LessMore.PAN,
  "LessMore.PAN_FACE" = LessMore.PAN_FACE,
  "LessMore.PAN_Pred_F.reg_ACE" = LessMore.PAN_Pred_F.reg_ACE,
  "LessMore.PAN_Pred_F.net_ACE" = LessMore.PAN_Pred_F.net_ACE,
  "LessMore.PAN_Pred_F.rf_ACE" = LessMore.PAN_Pred_F.rf_ACE, 
  "LessMore.PAN_Pred_F.gbm_ACE" = LessMore.PAN_Pred_F.gbm_ACE
)

Model Results

term_names <- names(coef(LessMore.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

LessMore_result<-tab_model(LessMore.PAN, LessMore.PAN_FACE, LessMore.PAN_Pred_F.reg_ACE,LessMore.PAN_Pred_F.net_ACE,LessMore.PAN_Pred_F.rf_ACE,LessMore.PAN_Pred_F.gbm_ACE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.F(reg)_ACE", 
                        "PAN_Pred.F(net)_ACE", 
                        "PAN_Pred.F(rf)_ACE", 
                        "PAN_Pred.F(gbm)_ACE"), title = "LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
LessMore_result

LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.F(reg)_ACE				PAN_Pred.F(net)_ACE				PAN_Pred.F(rf)_ACE				PAN_Pred.F(gbm)_ACE
Predictors	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p
(Intercept)	5.20	0.09	59.66	<0.001	5.22	0.09	61.17	<0.001	5.20	0.09	57.18	<0.001	5.20	0.09	56.01	<0.001	5.20	0.09	59.30	<0.001	5.20	0.09	58.64	<0.001
LessMoreCondition [SCARF]	1.11	0.12	9.00	<0.001	1.10	0.12	9.13	<0.001	1.09	0.13	8.50	<0.001	1.08	0.13	8.31	<0.001	1.07	0.12	8.66	<0.001	1.07	0.13	8.53	<0.001
F					0.13	0.12	1.12	0.265
A					-0.13	0.06	-2.23	0.026	-0.12	0.06	-2.06	0.040	-0.12	0.06	-2.08	0.038	-0.13	0.06	-2.20	0.028	-0.12	0.06	-2.08	0.037
C					0.01	0.09	0.08	0.934	0.07	0.07	1.03	0.301	0.07	0.07	1.01	0.314	0.04	0.08	0.49	0.624	0.06	0.07	0.90	0.368
E					-0.01	0.06	-0.10	0.919	-0.03	0.05	-0.68	0.498	-0.03	0.05	-0.65	0.518	-0.02	0.05	-0.36	0.722	-0.03	0.05	-0.65	0.517
LessMoreCondition [SCARF] × F					-0.02	0.17	-0.13	0.899
LessMoreCondition [SCARF] × A					0.39	0.08	4.82	<0.001	0.38	0.08	4.72	<0.001	0.38	0.08	4.70	<0.001	0.36	0.08	4.51	<0.001	0.37	0.08	4.57	<0.001
LessMoreCondition [SCARF] × C					0.23	0.13	1.74	0.082	0.21	0.10	2.14	0.032	0.20	0.10	2.11	0.035	0.16	0.11	1.49	0.137	0.18	0.10	1.82	0.069
LessMoreCondition [SCARF] × E					0.01	0.08	0.08	0.940	0.02	0.07	0.23	0.816	0.02	0.07	0.26	0.794	0.04	0.07	0.48	0.631	0.03	0.07	0.42	0.672
Predicted F									0.04	0.08	0.44	0.658	0.05	0.09	0.54	0.592	0.11	0.11	0.98	0.329	0.05	0.09	0.54	0.588
LessMoreCondition [SCARF] × Predicted F									0.02	0.12	0.20	0.838	0.04	0.12	0.30	0.764	0.12	0.15	0.80	0.423	0.10	0.13	0.73	0.466
Observations	1460				1460				1460				1460				1460				1460
R² / R² adjusted	0.204 / 0.201				0.248 / 0.241				0.247 / 0.240				0.247 / 0.241				0.250 / 0.243				0.248 / 0.242

Anova and Eta Square

calculate_variance_explained_LessMore <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for LessMoreCondition, Panel, and LessMoreCondition:Panel interaction
  ss_condition <- anova_res["LessMoreCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["LessMoreCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["LessMoreCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["LessMoreCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- ss_condition / ss_total
  perc_panel <- ss_panel / ss_total
  perc_interaction <- ss_interaction / ss_total
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}

variance_explained_lm <- sapply(LessMore_model, calculate_variance_explained_LessMore)

# Convert to a data frame for easy viewing
variance_explained_lm_df.LessMore <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.LessMore,caption = "Etasq Table: LessMore Paradigm")

Etasq Table: LessMore Paradigm
	Condition	Panel	LessMoreCondition:Panel
LessMore.PAN	0.09206***	0.10649***	0.00506*
LessMore.PAN_FACE	0.09206***	0.02908***	0.00051
LessMore.PAN_Pred_F.reg_ACE	0.09206***	0.02842***	0.00122
LessMore.PAN_Pred_F.net_ACE	0.09206***	0.0286***	0.00129
LessMore.PAN_Pred_F.rf_ACE	0.09206***	0.02984***	0.00154
LessMore.PAN_Pred_F.gbm_ACE	0.09206***	0.02872***	0.00157

Sunk Cost Paradigm

Sunk.PAN <- lm(numSunkCost~ SunkCondition * Panel, data = data.full_M5a.reg)
Sunk.PAN_FACE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.full_M5a.reg)
Sunk.PAN_Pred_F.reg_ACE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E+SunkCondition * Panel, data = data.full_M5a.reg)
Sunk.PAN_Pred_F.net_ACE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.full_M5a.net)
Sunk.PAN_Pred_F.rf_ACE <- lm(numSunkCost ~ SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E  + SunkCondition * Panel, data = data.full_M5a.rf)
Sunk.PAN_Pred_F.gbm_ACE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.full_M5a.gbm)


Sunk_model <- list(
  "Sunk.PAN" = Sunk.PAN,
  "Sunk.PAN_FACE" = Sunk.PAN_FACE,
  "Sunk.PAN_Pred_F.reg_ACE" = Sunk.PAN_Pred_F.reg_ACE,
  "Sunk.PAN_Pred_F.net_ACE" = Sunk.PAN_Pred_F.net_ACE,
  "Sunk.PAN_Pred_F.rf_ACE" = Sunk.PAN_Pred_F.rf_ACE, #0.302 with unscaled
  "Sunk.PAN_Pred_F.gbm_ACE" = Sunk.PAN_Pred_F.gbm_ACE
)

# extract_adjusted_r_squared(Sunk_model)

Full Model Results

term_names <- names(coef(Sunk.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Sunk_result<-tab_model(Sunk.PAN, Sunk.PAN_FACE, Sunk.PAN_Pred_F.reg_ACE,Sunk.PAN_Pred_F.net_ACE,Sunk.PAN_Pred_F.rf_ACE,Sunk.PAN_Pred_F.gbm_ACE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred_F(reg)_ACE", 
                        "PAN_Pred_F(net)_ACE", 
                        "PAN_Pred_F(rf)_ACE", 
                        "PAN_Pred_F(gbm)_ACE"), title = "Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Sunk_result

Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred_F(reg)_ACE				PAN_Pred_F(net)_ACE				PAN_Pred_F(rf)_ACE				PAN_Pred_F(gbm)_ACE
Predictors	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p
(Intercept)	5.78	0.17	34.79	<0.001	5.77	0.17	34.93	<0.001	5.67	0.18	31.92	<0.001	5.68	0.18	31.32	<0.001	5.71	0.17	33.18	<0.001	5.69	0.17	32.63	<0.001
SunkCondition [PAID]	0.43	0.23	1.85	0.065	0.46	0.23	1.97	0.049	0.41	0.25	1.66	0.097	0.39	0.25	1.55	0.120	0.44	0.24	1.81	0.070	0.45	0.24	1.83	0.067
F					0.39	0.22	1.77	0.077
A					0.07	0.11	0.61	0.539	0.07	0.11	0.65	0.519	0.08	0.11	0.72	0.472	0.07	0.11	0.68	0.500	0.07	0.11	0.62	0.535
C					-0.26	0.18	-1.50	0.133	-0.11	0.13	-0.85	0.394	-0.10	0.13	-0.75	0.452	-0.17	0.15	-1.12	0.265	-0.14	0.14	-1.03	0.303
E					-0.06	0.11	-0.59	0.558	-0.12	0.10	-1.25	0.211	-0.13	0.10	-1.34	0.182	-0.11	0.10	-1.08	0.281	-0.12	0.10	-1.20	0.229
SunkCondition [PAID] × F					0.38	0.32	1.18	0.239
SunkCondition [PAID] × A					0.02	0.16	0.13	0.895	0.04	0.16	0.23	0.817	0.04	0.16	0.23	0.817	0.04	0.16	0.26	0.798	0.06	0.16	0.36	0.715
SunkCondition [PAID] × C					-0.20	0.25	-0.79	0.428	-0.04	0.19	-0.19	0.851	-0.04	0.19	-0.24	0.810	-0.08	0.21	-0.36	0.718	-0.02	0.19	-0.10	0.917
SunkCondition [PAID] × E					0.11	0.15	0.72	0.469	0.05	0.14	0.33	0.739	0.05	0.14	0.36	0.717	0.06	0.14	0.41	0.680	0.03	0.14	0.24	0.811
Predicted F									0.25	0.16	1.62	0.105	0.22	0.17	1.34	0.181	0.30	0.20	1.48	0.138	0.30	0.18	1.69	0.092
SunkCondition [PAID] × Predicted F									0.16	0.22	0.69	0.490	0.19	0.24	0.80	0.426	0.22	0.30	0.74	0.462	0.09	0.25	0.35	0.724
Observations	1460				1460				1460				1460				1460				1460
R² / R² adjusted	0.023 / 0.020				0.042 / 0.033				0.039 / 0.030				0.038 / 0.029				0.038 / 0.029				0.038 / 0.029

Anova and Eta Square

calculate_variance_explained_Sunk <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for SunkCondition, Panel, and SunkCondition:Panel interaction
  ss_condition <- anova_res["SunkCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["SunkCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["SunkCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["SunkCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- (ss_condition / ss_total)
  perc_panel <- (ss_panel / ss_total)
  perc_interaction <- (ss_interaction / ss_total)
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 6), sig_condition)
  res_panel <- paste0(round(perc_panel, 6), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 6), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}


variance_explained_lm <- sapply(Sunk_model, calculate_variance_explained_Sunk)

# Convert to a data frame for easy viewing
variance_explained_lm_df.SunkCost <- as.data.frame(t(variance_explained_lm))

colnames(variance_explained_lm_df.SunkCost) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.SunkCost,caption = "Etasq Table: SunkCost Paradigm")

Etasq Table: SunkCost Paradigm
	Condition	Panel	LessMoreCondition:Panel
Sunk.PAN	0.007943***	0.011323***	0.003981.
Sunk.PAN_FACE	0.007943***	0.000985	0.002048
Sunk.PAN_Pred_F.reg_ACE	0.007943***	0.001641	0.002109
Sunk.PAN_Pred_F.net_ACE	0.007943***	0.002035	0.002104
Sunk.PAN_Pred_F.rf_ACE	0.007943***	0.000325	0.001955
Sunk.PAN_Pred_F.gbm_ACE	0.007943***	0.001118	0.002033

Summary

Model Fit

\(R^2\): \(R^2\) for linear regressions, and \(1 − \frac{\log L(\text{model})}{\log L(\text{null model})}\) for logistic regressions (as in the paper)
note that the y-axes for different paradigms are not aligned.

#following antonia's analysis here...
Default.null<-glm(numDefault~1,data.full_M5a.gbm,family=binomial)


R2.Default = as.data.frame(cbind( "R2" = c((1-logLik(Default.PAN)/logLik(Default.null)),
                    (1-logLik(Default.PAN_FACE)/logLik(Default.null)),  
                    (1-logLik(Default.PAN_Pred_F.reg_ACE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_F.net_ACE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_F.rf_ACE)/logLik(Default.null)),
                    (1-logLik(Default.PAN_Pred_F.gbm_ACE)/logLik(Default.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"), 
            "Paradigm" = "Default"))



Disease.null<-glm(numDisease~1,data.full_M5a.gbm,family=binomial)
R2.Disease = as.data.frame(cbind( "R2" = c((1-logLik(Disease.PAN)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_FACE)/logLik(Disease.null)),  
                    (1-logLik(Disease.PAN_Pred_F.reg_ACE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_F.net_ACE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_F.rf_ACE)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_Pred_F.gbm_ACE)/logLik(Disease.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"), 
            "Paradigm" = "Disease"))

R2.LessMore = as.data.frame(cbind( "R2" = c(summary(LessMore.PAN)$r.squared,
                     summary(LessMore.PAN_FACE)$r.squared,  
                     summary(LessMore.PAN_Pred_F.reg_ACE)$r.squared,  
                     summary(LessMore.PAN_Pred_F.net_ACE)$r.squared, 
                     summary(LessMore.PAN_Pred_F.rf_ACE)$r.squared,
                     summary(LessMore.PAN_Pred_F.gbm_ACE )$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"), 
            "Paradigm" = "LessMore"))

R2.Sunk = as.data.frame(cbind( "R2" = c(summary(Sunk.PAN)$r.squared,
                     summary(Sunk.PAN_FACE)$r.squared,  
                     summary(Sunk.PAN_Pred_F.reg_ACE)$r.squared,  
                     summary(Sunk.PAN_Pred_F.net_ACE)$r.squared, 
                     summary(Sunk.PAN_Pred_F.rf_ACE )$r.squared,
                     summary(Sunk.PAN_Pred_F.gbm_ACE )$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"), 
            "Paradigm" = "SunkCost"))




R2<-rbind(R2.Default,R2.Disease,R2.LessMore,R2.Sunk)%>%
  mutate(R2=round(as.numeric(R2),3))

R2$Model <- factor(R2$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.F(reg)+ACE",  
                                   "Pan+Pred.F(net)+ACE", 
                                   "Pan+Pred.F(rf)+ACE",
                                   "Pan+Pred.F(gbm)+ACE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(R2, aes(x = Model, y = R2, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(R^2)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

AIC.Default = as.data.frame(cbind( "AIC" = c(AIC(Default.PAN),
                    AIC(Default.PAN_FACE),  
                    AIC(Default.PAN_Pred_F.reg_ACE), 
                    AIC(Default.PAN_Pred_F.net_ACE), 
                    AIC(Default.PAN_Pred_F.rf_ACE),
                    AIC(Default.PAN_Pred_F.gbm_ACE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PF(reg)+ACE",  "Pan+PF(net)+ACE" , "Pan+PF(rf)+ACE","Pan+PF(gbm)+ACE"), 
            "Paradigm" = "Default"))



AIC.Disease = as.data.frame(cbind( "AIC" = c(AIC(Disease.PAN),
                    AIC(Disease.PAN_FACE),  
                    AIC(Disease.PAN_Pred_F.reg_ACE), 
                    AIC(Disease.PAN_Pred_F.net_ACE), 
                    AIC(Disease.PAN_Pred_F.rf_ACE),
                    AIC(Disease.PAN_Pred_F.gbm_ACE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PF(reg)+ACE",  "Pan+PF(net)+ACE" , "Pan+PF(rf)+ACE","Pan+PF(gbm)+ACE"), 
            "Paradigm" = "Disease"))


AIC<-rbind(AIC.Default,AIC.Disease)%>%
  mutate(AIC=round(as.numeric(AIC),5))

AIC$Model <- factor(AIC$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+PF(reg)+ACE",  
                                   "Pan+PF(net)+ACE", 
                                   "Pan+PF(rf)+ACE",
                                   "Pan+PF(gbm)+ACE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(AIC, aes(x = Model, y = AIC, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(AIC)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")+
  coord_cartesian(ylim = c(300, 400))

R2$R2 <- round(as.numeric(R2$R2), 3)

R2 <- R2 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement <- R2 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))


### Avg. improvement in R2 for each model across all four paradigms, in FULL study 5
Default.null.Study5<-glm(numDefault~1,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.null.Study5<-glm(numDisease~1,Data_M5a.log_z_dummy_coded,family=binomial)
Default.PAN.Study5<-glm(numDefault~DefaultCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.PAN.Study5<-glm(numDisease~DiseaseCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
LessMore.PAN.Study5<-lm(numLessMore~LessMoreCondition*Panel,Data_M5a.log_z_dummy_coded)
Sunk.PAN.Study5<-lm(numSunkCost~SunkCondition*Panel,Data_M5a.log_z_dummy_coded)


Default.PAN_FACE.Study5<-glm(numDefault~DefaultCondition*F+DefaultCondition*A+DefaultCondition*C+DefaultCondition*E+DefaultCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.PAN_FACE.Study5<-glm(numDisease~DiseaseCondition*F+DiseaseCondition*A+DiseaseCondition*C+DiseaseCondition*E+DiseaseCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
LessMore.PAN_FACE.Study5<-lm(numLessMore~LessMoreCondition*F+LessMoreCondition*A+LessMoreCondition*C+LessMoreCondition*E+LessMoreCondition*Panel,Data_M5a.log_z_dummy_coded)
Sunk.PAN_FACE.Study5<-lm(numSunkCost~SunkCondition*F+SunkCondition*A+SunkCondition*C+SunkCondition*E+SunkCondition*Panel,Data_M5a.log_z_dummy_coded)

R2.Default = as.data.frame(cbind( 
  "R2" = c((1-logLik(Default.PAN.Study5)/logLik(Default.null.Study5)),
           (1-logLik(Default.PAN_FACE.Study5)/logLik(Default.null.Study5))),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "Default"
))


R2.Disease = as.data.frame(cbind( 
  "R2" = c((1-logLik(Disease.PAN.Study5)/logLik(Disease.null.Study5)),
           (1-logLik(Disease.PAN_FACE.Study5)/logLik(Disease.null.Study5))),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "Disease"
))

# For LessMore paradigm (linear models)
R2.LessMore = as.data.frame(cbind( 
  "R2" = c(summary(LessMore.PAN.Study5)$r.squared,
           summary(LessMore.PAN_FACE.Study5)$r.squared),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "LessMore"
))

# For SunkCost paradigm (linear models)
R2.Sunk = as.data.frame(cbind( 
  "R2" = c(summary(Sunk.PAN.Study5)$r.squared,
           summary(Sunk.PAN_FACE.Study5)$r.squared),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "SunkCost"
))

R2.FullStudy5 <- rbind(R2.Default, R2.Disease, R2.LessMore, R2.Sunk)
R2.FullStudy5$R2 <- round(as.numeric(R2.FullStudy5$R2), 3)

R2.FullStudy5 <- R2.FullStudy5 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement_full_study_5 <- R2.FullStudy5 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))%>%
  slice(-1) 

avg_improvemnt_combined <- rbind(avg_improvement, avg_improvement_full_study_5)

kable(avg_improvemnt_combined, caption = "Percentage Improvement in R2 (compared to PAN) Across Paradigms in the test sample", digits = 2)

Percentage Improvement in R2 (compared to PAN) Across Paradigms in the test sample
Model	Avg_Improvement
Pan	0.00
Pan+FACE	43.20
Pan+Pred.F(reg)+ACE	41.01
Pan+Pred.F(net)+ACE	40.17
Pan+Pred.F(rf)+ACE	38.88
Pan+Pred.F(gbm)+ACE	39.60
Pan+FACE (Full Study 5)	43.20

F*Cond Model Coefficients

DVs for Default & Framing are 1/0
DVs for LessMore & SunkCost are on the original scale

AlllCoef = as.data.frame(rbind(
cbind(coef(Default.PAN_FACE),confint(Default.PAN_FACE)),
cbind(coef(Default.PAN_Pred_F.reg_ACE),confint(Default.PAN_Pred_F.reg_ACE)),
cbind(coef(Default.PAN_Pred_F.net_ACE),confint(Default.PAN_Pred_F.net_ACE)),
cbind(coef(Default.PAN_Pred_F.rf_ACE),confint(Default.PAN_Pred_F.rf_ACE)),
cbind(coef(Default.PAN_Pred_F.gbm_ACE),confint(Default.PAN_Pred_F.gbm_ACE)),
cbind(coef(Default.PAN_FACE.Study5),confint(Default.PAN_FACE.Study5)),

cbind(coef(Disease.PAN_FACE),confint(Disease.PAN_FACE)),
cbind(coef(Disease.PAN_Pred_F.reg_ACE),confint(Disease.PAN_Pred_F.reg_ACE)),
cbind(coef(Disease.PAN_Pred_F.net_ACE),confint(Disease.PAN_Pred_F.net_ACE)),
cbind(coef(Disease.PAN_Pred_F.rf_ACE),confint(Disease.PAN_Pred_F.rf_ACE)),
cbind(coef(Disease.PAN_Pred_F.gbm_ACE),confint(Disease.PAN_Pred_F.gbm_ACE)),
cbind(coef(Disease.PAN_FACE.Study5),confint(Disease.PAN_FACE.Study5)),

cbind(coef(LessMore.PAN_FACE),confint(LessMore.PAN_FACE)),
cbind(coef(LessMore.PAN_Pred_F.reg_ACE),confint(LessMore.PAN_Pred_F.reg_ACE)),
cbind(coef(LessMore.PAN_Pred_F.net_ACE),confint(LessMore.PAN_Pred_F.net_ACE)),
cbind(coef(LessMore.PAN_Pred_F.rf_ACE),confint(LessMore.PAN_Pred_F.rf_ACE)),
cbind(coef(LessMore.PAN_Pred_F.gbm_ACE),confint(LessMore.PAN_Pred_F.gbm_ACE)),
cbind(coef(LessMore.PAN_FACE.Study5),confint(LessMore.PAN_FACE.Study5)),

cbind(coef(Sunk.PAN_FACE),confint(Sunk.PAN_FACE)),
cbind(coef(Sunk.PAN_Pred_F.reg_ACE),confint(Sunk.PAN_Pred_F.reg_ACE)),
cbind(coef(Sunk.PAN_Pred_F.net_ACE),confint(Sunk.PAN_Pred_F.net_ACE)),
cbind(coef(Sunk.PAN_Pred_F.rf_ACE),confint(Sunk.PAN_Pred_F.rf_ACE)),
cbind(coef(Sunk.PAN_Pred_F.gbm_ACE),confint(Sunk.PAN_Pred_F.gbm_ACE)),
cbind(coef(Sunk.PAN_FACE.Study5),confint(Sunk.PAN_FACE.Study5))))

AlllCoef <- AlllCoef[!grepl("Panel", rownames(AlllCoef)), ]


AlllCoef$type = factor(rep(c("Intercept", " Condition(Treat)","F", "A", "C", "E","F: Condition(Treat)", "A: Condition(Treat)", "C: Condition(Treat)", "E: Condition(Treat)"), 24))

AlllCoef$Paradigm = factor((rep(c("Default","Framing","Less is Better","Sunk Cost"), each=60)),levels = rev(c("Default","Framing","Less is Better","Sunk Cost")))

AlllCoef$Model = factor(rep(rep(c("FACE","PF(reg) + ACE","PF(net) + ACE","PF(rf) + ACE","PF(gbm) + ACE","FACE (Full Study 5)"), each = 10),4), levels = rev(c("FACE","PF(reg) + ACE","PF(net) + ACE","PF(rf) + ACE","PF(gbm) + ACE","FACE (Full Study 5)")))

colnames(AlllCoef)[1:3] = c("Beta","low", "high")

allcoefplot = ggplot(AlllCoef[(AlllCoef$type %in% c("F: Condition(Treat)")),], aes( y = Beta, ymin= low, ymax= high, x = Paradigm, fill = Model, color = Model))+
  geom_bar(stat = "identity", position = "dodge",alpha= .25)+
  geom_pointrange(position = position_dodge(width = .8))+
  coord_flip()+
  xlab("")+
  theme_bw()+
  theme(strip.background = element_blank(), text = element_text(size = 12)) +
  facet_grid(~type)

allcoefplot

Etasq

### Study 5 Results ###
Default_model.Study5<-list(
  "Default.PAN.Study5" = Default.PAN.Study5,
  "Default.PAN_FACE.Study5" = Default.PAN_FACE.Study5)
Disease_model.Study5<-list(
  "Disease.PAN.Study5" = Disease.PAN.Study5,
  "Disease.PAN_FACE.Study5" = Disease.PAN_FACE.Study5)

LessMore_model.Study5<-list(
  "LessMore.PAN.Study5" = LessMore.PAN.Study5,
  "LessMore.PAN_FACE.Study5" = LessMore.PAN_FACE.Study5)


Sunk_model.Study5<-list(
  "Sunk.PAN.Study5" = Sunk.PAN.Study5,
  "Sunk.PAN_FACE.Study5" = Sunk.PAN_FACE.Study5)

deviance_explained <- sapply(Default_model.Study5, calculate_deviance_explained.Default)
deviance_explained_df.Default.Study5 <- as.data.frame(t(deviance_explained)) 
colnames(deviance_explained_df.Default.Study5) <- c("Condition","Panel", "Default Condition:Panel")

deviance_explained <- sapply(Disease_model.Study5, calculate_deviance_explained.Disease)
deviance_explained_df.Disease.Study5 <- as.data.frame(t(deviance_explained)) 
colnames(deviance_explained_df.Disease.Study5) <- c("Condition","Panel", "Disease Condition:Panel")

variance_explained_lm <- sapply(LessMore_model.Study5, calculate_variance_explained_LessMore)
variance_explained_lm_df.LessMore.Study5 <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore.Study5) <- c("Condition","Panel", "LessMoreCondition:Panel")

variance_explained_lm <- sapply(Sunk_model.Study5, calculate_variance_explained_Sunk)
variance_explained_lm_df.SunkCost.Study5 <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.SunkCost.Study5) <- c("Condition","Panel", "LessMoreCondition:Panel")

# label paradigm
deviance_explained_df.Default$Paradigm <- "Default"
deviance_explained_df.Default.Study5$Paradigm <- "Default"

deviance_explained_df.Disease$Paradigm <- "Disease"
deviance_explained_df.Disease.Study5$Paradigm <- "Disease"

variance_explained_lm_df.LessMore$Paradigm <- "LessMore"
variance_explained_lm_df.LessMore.Study5$Paradigm <- "LessMore"

variance_explained_lm_df.SunkCost$Paradigm <- "SunkCost"
variance_explained_lm_df.SunkCost.Study5$Paradigm <- "SunkCost"


Model <- c("Pan", "Pan+FACE", "Pan+Pred.F(reg)+ACE", "Pan+Pred.F(net)+ACE", "Pan+Pred.F(rf)+ACE", "Pan+Pred.F(gbm)+ACE")
Model.Study5 <- c("Pan (Full Study5)", "Pan+FACE (Full Study5)")
deviance_explained_df.Default$Model<-Model
deviance_explained_df.Disease$Model<-Model
variance_explained_lm_df.LessMore$Model<-Model
variance_explained_lm_df.SunkCost$Model<-Model

deviance_explained_df.Default.Study5$Model<-Model.Study5
deviance_explained_df.Disease.Study5$Model<-Model.Study5
variance_explained_lm_df.LessMore.Study5$Model<-Model.Study5
variance_explained_lm_df.SunkCost.Study5$Model<-Model.Study5

colnames(deviance_explained_df.Default)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost)[3] <- "Condition:Panel"

colnames(deviance_explained_df.Default.Study5)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost.Study5)[3] <- "Condition:Panel"


etasq <- rbind(deviance_explained_df.Default, 
               deviance_explained_df.Default.Study5,
               deviance_explained_df.Disease, 
               deviance_explained_df.Disease.Study5,
               variance_explained_lm_df.LessMore, 
               variance_explained_lm_df.LessMore.Study5,
               variance_explained_lm_df.SunkCost,
               variance_explained_lm_df.SunkCost.Study5)


etasq$Model <- factor(etasq$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.F(reg)+ACE",  
                                   "Pan+Pred.F(net)+ACE", 
                                   "Pan+Pred.F(rf)+ACE",
                                   "Pan+Pred.F(gbm)+ACE",
                                   "Pan (Full Study5)", 
                                   "Pan+FACE (Full Study5)"),
                        ordered = TRUE)

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$Panel)  # Extract the significance symbols
  df$Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$Panel))
  return(df)
}

etasq.pan<-split_value_and_sig(etasq)

ggplot(etasq.pan, aes(x = Model, y = Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$`Condition:Panel`)  # Extract the significance symbols
  df$Condition_Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$`Condition:Panel`))
  return(df)
}

etasq.pan_cond<-split_value_and_sig(etasq)

ggplot(etasq.pan_cond, aes(x = Model, y = Condition_Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel*cond") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

# junk data??? really unstable;; perhaps to look at this meaningfully, we need 20% of the data to equally come from each of the 3 panels.
 # table(data.tst_M5a$Panel)
 #  it looks fine though
 #   Lucid    MTurk Prolific 
 #     100      101       91 


etasq_study5 <- etasq.pan_cond %>%
  filter(Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))

etasq_study5 <- etasq_study5 %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan (Full Study5)"] - Condition_Panel) / Condition_Panel[Model == "Pan (Full Study5)"] * 100)


etasq_study5.test <- etasq.pan_cond %>%
  filter(!Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))
etasq_study5.test <- etasq_study5.test %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan"] - Condition_Panel) / Condition_Panel[Model == "Pan"] * 100)

etasq.pan_cond<-rbind(etasq_study5,etasq_study5.test)

avg_reduction <- etasq.pan_cond %>%
  group_by(Model) %>%
  summarize(Avg_Reduction = mean(Reduction, na.rm = TRUE))

  
kable(avg_reduction, caption = "Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in full study 5 data", digits = 2)

Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in full study 5 data
Model	Avg_Reduction
Pan	0.00
Pan+FACE	74.19
Pan+Pred.F(reg)+ACE	61.53
Pan+Pred.F(net)+ACE	61.10
Pan+Pred.F(rf)+ACE	65.69
Pan+Pred.F(gbm)+ACE	61.68
Pan (Full Study5)	0.00
Pan+FACE (Full Study5)	74.19

1.1 Evaluate Portability with Study 1 Data

# Data_M1a_dummy_coded<-read.csv("../MP Study 1/Data_M1a_dummy_coded_20240808.csv") # seperately .log_z data
Data_M1a.log_dummy_coded<-read.csv("../MP Study 1/Data_M1a.log_dummy_coded_20241003.csv")%>%
  rename_with(~ gsub("DEMCHAR", "PurposeOpenEnd", .x))

F_measure.Study1<-read.csv("../MP Study 1/Data Prep for ML/MP1_Full_data_F_ML_20240916.csv")
META_Browser_columns <- names(Data_M1a.log_dummy_coded)[grepl("META_Browser", names(Data_M1a.log_dummy_coded))] # tentatively retain browser Browsers to make predictions
Data_M1a.log_dummy_coded<-Data_M1a.log_dummy_coded%>% 
  select(-c(X,LW_DV,numLW_IV)) 
names(Data_M1a.log_dummy_coded) <- gsub("iNTRO", "INTRO", names(Data_M1a.log_dummy_coded))
# make variable names concistent


# This reread the data using read.csv function to ensures data format consistency, need to understand more about this function here....
# Data_M5a_dummy_coded<-read.csv("Data_M5a_dummy_coded_20240808.csv") # seperately .log_Z data

Data_M5a.log_dummy_coded<-read.csv("Data_M5a.log_dummy_coded_20240921.csv")

META_Browser_columns <- names(Data_M5a.log_dummy_coded)[grepl("META_Browser", names(Data_M5a.log_dummy_coded))]
Data_M5a.log_dummy_coded<-Data_M5a.log_dummy_coded%>%
  select(-c(X))


# Calculate mean and standard deviation for each column in Data_M5a.log_dummy_coded
means <- sapply(Data_M5a.log_dummy_coded[, scale_columns], mean, na.rm = TRUE)
sds <- sapply(Data_M5a.log_dummy_coded[, scale_columns], sd, na.rm = TRUE)

scale_with_reference_M5a <- function(x, mean_ref, sd_ref) {
  (x - mean_ref) / sd_ref
}

Data_M5a.log_z_dummy_coded <- Data_M5a.log_dummy_coded %>%
  mutate(across(.cols = all_of(scale_columns), 
                .fns = ~ scale_with_reference_M5a(., means[cur_column()], sds[cur_column()])))

# Scale Data_M1a.log_dummy_coded using the mean and sd from Data_M5a.log_dummy_coded
Data_M1a.log_z_dummy_coded <- Data_M1a.log_dummy_coded %>%
  mutate(across(.cols = all_of(scale_columns), 
                .fns = ~ scale_with_reference_M5a(., means[cur_column()], sds[cur_column()])))

Data_M1a.log_z_dummy_coded <- Data_M1a.log_z_dummy_coded 
Data_M1a.log_z_dummy_coded$BNT<-F_measure.Study1$BNT
Data_M1a.log_z_dummy_coded$CRTScore<-F_measure.Study1$CRTScore

# Set seed for reproducibility
set.seed(123)

variables_to_remove <- c("A","C","E","numSunkCost","numLessMore","numDefault","numDisease","SunkCondition","LessMoreCondition","DiseaseCondition","DefaultCondition","Panel",Text_analysis,"Antonym","Synonym","crt2_score","rotsum","MXsum")

# Create training and testing sets

data.trn_M5a_FULL <- Data_M5a.log_z_dummy_coded 
data.tst_MP1.US <- Data_M1a.log_z_dummy_coded%>%filter(Country_US==1) #  MP1 US data
data.tst_MP1.UK <- Data_M1a.log_z_dummy_coded%>%filter(Country_UK==1) #  MP1 UK data
data.tst_MP1.Neth <- Data_M1a.log_z_dummy_coded%>%filter(Country_US==0&Country_UK==0) # #  MP1 Netherlands student data
# There are columns in study 1 dataset not in study 5, but this is okay. 

data.trn_M5a_FULL <- as.data.frame(data.trn_M5a_FULL[, !colnames(data.trn_M5a_FULL) %in% variables_to_remove])

actual_MP1.US <- data.tst_MP1.US$F
actual_MP1.UK <- data.tst_MP1.UK$F
actual_MP1.Neth <- data.tst_MP1.Neth$F


tuneGrid <- expand.grid(
  mtry = c(64),
  min.node.size = c(5),
  splitrule = c("variance")
)

# Model_M5a_rf.FULL <- caret::train(F ~ ., data = data.trn_M5a_FULL, method = "ranger",
#                   tuneGrid = tuneGrid, importance = 'impurity', num.trees = 750,
#                   trControl = ctrl)
# save(Model_M5a_rf.FULL, file = "./Saved ML Model and Data/Model_M5a_rf_FULL.RData")
load("./Saved ML Model and Data/Model_M5a_rf_FULL.RData")
M5a_rf_pred_MP1.US<- predict(Model_M5a_rf.FULL,data.tst_MP1.US)
test_perf.M5a_rf.MP1.US <- postResample(M5a_rf_pred_MP1.US, actual_MP1.US)
data.tst_MP1.US.rf <- cbind(data.tst_MP1.US, Predicted_F = M5a_rf_pred_MP1.US)


Model_M5a_reg.FULL <- caret::train(F ~ ., data = data.trn_M5a_FULL, method = "lm")
M5a_reg_pred_MP1.US<- predict(Model_M5a_reg.FULL,data.tst_MP1.US)
test_perf.M5a_reg.MP1.US <- postResample(M5a_reg_pred_MP1.US, actual_MP1.US)
data.tst_MP1.US.reg <- cbind(data.tst_MP1.US, Predicted_F = M5a_reg_pred_MP1.US)

Model_M5a_net.FULL <- train(F ~ ., data = data.trn_M5a_FULL, method = "glmnet", trControl = ctrl, tuneLength=40) 

M5a_net_pred_MP1.US<- predict(Model_M5a_net.FULL,data.tst_MP1.US)
test_perf.M5a_net.MP1.US <- postResample(M5a_net_pred_MP1.US, actual_MP1.US)
data.tst_MP1.US.net <- cbind(data.tst_MP1.US, Predicted_F = M5a_net_pred_MP1.US)



# Model_M5a_gbm4.FULL <- caret::train(F ~ ., data = data.trn_M5a_FULL, method = "gbm",
#                     tuneGrid = tuneGrid.4, trControl = ctrl,
#                     verbose = FALSE)
# 
# save(Model_M5a_gbm4.FULL, file = "./Saved ML Model and Data/Model_M5a_gbm_FULL.RData")
load("./Saved ML Model and Data/Model_M5a_gbm_FULL_4_50.RData")
Model_M5a_gbm.FULL<-Model_M5a_gbm4.FULL
M5a_gbm_pred_MP1.US<- predict(Model_M5a_gbm.FULL,data.tst_MP1.US)
test_perf.M5a_gbm.MP1.US <- postResample(M5a_gbm_pred_MP1.US, actual_MP1.US)
data.tst_MP1.US.gbm<- cbind(data.tst_MP1.US, Predicted_F = M5a_gbm_pred_MP1.US)

1.1.1 Study 1 US Data

n = 5336

ggpairs(data.tst_MP1.US, c("F","CRTScore","BNT","ATTN1__Correct"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

1.1.1.0 Model Fit

We trained the model with full Study 5 data
The following stats come from cross-validation during model training

train_perf_M5a_rf.FULL <- Model_M5a_rf.FULL$results[which.min(Model_M5a_rf.FULL$results$RMSE), c("RMSE", "Rsquared", "MAE")]
train_perf_M5a_reg.FULL <- Model_M5a_reg.FULL$results[which.min(Model_M5a_reg.FULL$results$RMSE), c("RMSE", "Rsquared", "MAE")]
train_perf_M5a_net.FULL <- Model_M5a_net.FULL$results[which.min(Model_M5a_net.FULL$results$RMSE), c("RMSE", "Rsquared", "MAE")]
train_perf_M5a_gbm.FULL <- Model_M5a_gbm.FULL$results[which.min(Model_M5a_gbm.FULL$results$RMSE), c("RMSE", "Rsquared", "MAE")]

# Combine training performance metrics into a data frame
performance_metrics_trn_M1 <- data.frame(
  Model = factor(c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting"),
                 levels = c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting")),
  MAE = c(as.numeric(train_perf_M5a_reg.FULL["MAE"]), as.numeric(train_perf_M5a_net.FULL["MAE"]), as.numeric(train_perf_M5a_rf.FULL["MAE"]), as.numeric(train_perf_M5a_gbm.FULL["MAE"])),
  RMSE = c(as.numeric(train_perf_M5a_reg.FULL["RMSE"]), as.numeric(train_perf_M5a_net.FULL["RMSE"]), as.numeric(train_perf_M5a_rf.FULL["RMSE"]), as.numeric(train_perf_M5a_gbm.FULL["RMSE"])),
  Rsquared = c(as.numeric(train_perf_M5a_reg.FULL["Rsquared"]), as.numeric(train_perf_M5a_net.FULL["Rsquared"]), as.numeric(train_perf_M5a_rf.FULL["Rsquared"]), as.numeric(train_perf_M5a_gbm.FULL["Rsquared"])))


performance_table_trn <- performance_metrics_trn_M1 %>%
  mutate(across(where(is.numeric), round, 4))

# Display the table
kable(performance_table_trn, caption = "Comparison of Model Performance in Training Data (Full Study 5 Data) using CV") %>%
  kable_styling(full_width = F, position = "center")

Comparison of Model Performance in Training Data (Full Study 5 Data) using CV
Model	MAE	RMSE	Rsquared
Linear Regression	0.5782	0.7185	0.5833
Elastic Net	0.5660	0.6995	0.6016
Random Forest	0.5301	0.6595	0.6532
Gradient Boosting	0.5021	0.6295	0.6756

###Plot ###

performance_metrics_long_trn <- performance_metrics_trn_M1 %>%
  pivot_longer(cols = c(RMSE, Rsquared, MAE), names_to = "Metric", values_to = "Value")

trn_M5a_FULL.sd_F <- sd(data.trn_M5a_FULL$F)

ggplot(performance_metrics_long_trn, aes(x = Metric, y = Value, fill = Model)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_segment(data = performance_metrics_long_trn %>% filter(Metric == "RMSE"),
               aes(x = 1.6, xend = 2.4, y = trn_M5a_FULL.sd_F, yend = trn_M5a_FULL.sd_F),
               linetype = "dashed", color = "#FF9999") +
  geom_text(data = performance_metrics_long_trn %>% filter(Metric == "RMSE"),
            aes(x = 2, y = trn_M5a_FULL.sd_F, label = paste0("Std.dev of F in Study 5 Full Data: ", round(trn_M5a_FULL.sd_F, 2))),
            vjust = -0.5, color = "#FF9999") +
  geom_text(aes(label = round(Value, 2)), position = position_dodge(width = 0.9), vjust = -0.5) +
  scale_fill_manual(values = c("gray80", "gray50", "gray30", "gray20")) +  # Less colorful palette
  labs(title = "Comparison of Model Performance in Training Data (Full Study 5 Data) using CV",
       x = "Metric",
       y = "Value") +
  theme_minimal() +
  theme(legend.position = "bottom")

1.1.1.4 Correlation Btw ML Predicted F and other FACE factors

Significance Notation: *** if the p-value is < 0.001, ** if the p-value is < 0.01, * if the p-value is < 0.05, . if the p-value is < 0.10

Random Forest

ggpairs(data.tst_MP1.US.rf, c("Predicted_F","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Gradient Boosting

ggpairs(data.tst_MP1.US.gbm, c("Predicted_F","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

GBM: Correlation between PF5 and individual F measures in Study 1 and ATTN1

ggpairs(data.tst_MP1.US.gbm, c("Predicted_F","CRTScore","BNT","ATTN1__Correct"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

1.1.1.5 Explain Heterogenity with Predicted F

1.1.1.5.1 Default Paradigm

Default.PAN <- glm(numDefault ~ DefaultCondition * Panel, data = data.tst_MP1.US.gbm%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_FACE <- glm(numDefault ~ DefaultCondition * F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel , data = data.tst_MP1.US.gbm%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_F.reg_ACE <- glm(numDefault ~ DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_MP1.US.reg%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_F.net_ACE <- glm(numDefault ~  DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_MP1.US.net%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_F.rf_ACE <- glm(numDefault ~   DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_MP1.US.rf%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_F.gbm_ACE <- glm(numDefault ~   DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_MP1.US.gbm%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)


Default_model <- list(
  "Default.PAN" = Default.PAN,
  "Default.PAN_FACE" = Default.PAN_FACE,
  "Default.PAN_Pred_F.reg_ACE" = Default.PAN_Pred_F.reg_ACE,
  "Default.PAN_Pred_F.net_ACE" = Default.PAN_Pred_F.net_ACE,
  "Default.PAN_Pred_F.rf_ACE" = Default.PAN_Pred_F.rf_ACE,
  "Default.PAN_Pred_F.gbm_ACE" = Default.PAN_Pred_F.gbm_ACE
)

Model Results

term_names <- names(coef(Default.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Default_result<-tab_model(Default.PAN, Default.PAN_FACE, Default.PAN_Pred_F.reg_ACE,Default.PAN_Pred_F.net_ACE,Default.PAN_Pred_F.rf_ACE,Default.PAN_Pred_F.gbm_ACE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.F(reg)_ACE", 
                        "PAN_Pred.F(net)_ACE", 
                        "PAN_Pred.F(rf)_ACE", 
                        "PAN_Pred.F(gbm)_ACE"), title = "Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Default_result

Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.F(reg)_ACE				PAN_Pred.F(net)_ACE				PAN_Pred.F(rf)_ACE				PAN_Pred.F(gbm)_ACE
Predictors	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p
(Intercept)	0.88	0.11	-1.06	0.291	0.87	0.11	-1.14	0.253	0.81	0.10	-1.64	0.100	0.79	0.10	-1.86	0.062	0.77	0.10	-2.00	0.046	0.81	0.10	-1.60	0.110
DefaultCondition [OPTOUT]	3.10	0.56	6.31	<0.001	3.22	0.59	6.42	<0.001	3.31	0.61	6.51	<0.001	3.38	0.63	6.60	<0.001	3.37	0.63	6.55	<0.001	3.28	0.60	6.47	<0.001
F					1.57	0.16	4.48	<0.001
A					1.41	0.17	2.87	0.004	1.18	0.15	1.35	0.178	1.18	0.15	1.29	0.196	1.17	0.15	1.27	0.206	1.20	0.15	1.45	0.148
C					1.02	0.05	0.39	0.698	0.99	0.05	-0.21	0.836	0.99	0.05	-0.14	0.885	1.03	0.05	0.53	0.599	1.01	0.05	0.10	0.922
E					0.86	0.07	-1.93	0.054	0.88	0.07	-1.70	0.089	0.88	0.07	-1.68	0.092	0.88	0.07	-1.68	0.093	0.88	0.07	-1.67	0.094
DefaultCondition [OPTOUT] × F					1.03	0.16	0.17	0.867
DefaultCondition [OPTOUT] × A					0.44	0.08	-4.53	<0.001	0.51	0.10	-3.60	<0.001	0.51	0.10	-3.56	<0.001	0.50	0.09	-3.69	<0.001	0.49	0.09	-3.82	<0.001
DefaultCondition [OPTOUT] × C					1.18	0.10	1.98	0.048	1.24	0.10	2.61	0.009	1.24	0.10	2.60	0.009	1.20	0.10	2.29	0.022	1.21	0.10	2.32	0.020
DefaultCondition [OPTOUT] × E					1.23	0.14	1.79	0.073	1.16	0.14	1.31	0.190	1.16	0.14	1.31	0.191	1.19	0.14	1.48	0.140	1.20	0.14	1.58	0.115
Predicted F									1.53	0.12	5.50	<0.001	1.56	0.13	5.47	<0.001	1.52	0.12	5.11	<0.001	1.54	0.12	5.40	<0.001
DefaultCondition [OPTOUT] × Predicted F									0.76	0.09	-2.35	0.019	0.75	0.09	-2.37	0.018	0.82	0.10	-1.61	0.108	0.84	0.10	-1.46	0.145
Observations	3541				3541				3541				3541				3541				3541
AIC	4466.919				4382.086				4385.418				4386.017				4387.018				4381.467
log-Likelihood	-2215.460				-2165.043				-2166.709				-2167.008				-2167.509				-2164.734

Anova and Eta Square

calculate_deviance_explained.Default <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for Panel and DefaultCondition:Panel interaction
  dev_cond <- anova_res["DefaultCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DefaultCondition:Panel", "Deviance"]
  
  p_cond <- anova_res["DefaultCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DefaultCondition:Panel", "Pr(>Chi)"]
  
sig_cond <- ifelse(p_cond < 0.001, "***", 
             ifelse(p_cond < 0.01, "**", 
             ifelse(p_cond < 0.05, "*", 
             ifelse(p_cond < 0.1, ".", ""))))

sig_panel <- ifelse(p_panel < 0.001, "***", 
              ifelse(p_panel < 0.01, "**", 
              ifelse(p_panel < 0.05, "*", 
              ifelse(p_panel < 0.1, ".", ""))))

sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                   ifelse(p_interaction < 0.01, "**", 
                   ifelse(p_interaction < 0.05, "*", 
                   ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total) 
  perc_panel <- (dev_panel / dev_total) 
  perc_interaction <- (dev_interaction / dev_total) 
  
  res_cond <- paste0(round(perc_cond , 5),  sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}

# apply the fucntion to all the models in default paradigm
deviance_explained <- sapply(Default_model, calculate_deviance_explained.Default)

# Convert to a data frame for easy viewing
deviance_explained_df.Default <- as.data.frame(t(deviance_explained)) # view how this looks like
colnames(deviance_explained_df.Default) <- c("Condition","Panel", "Default Condition:Panel")

# display
kable(deviance_explained_df.Default,caption = "Etasq Table: Default Paradigm")

Etasq Table: Default Paradigm
	Condition	Panel	Default Condition:Panel
Default.PAN	0.05063***	0.00612***	0.00751***
Default.PAN_FACE	0.05063***	0.00456**	0.0031.
Default.PAN_Pred_F.reg_ACE	0.05063***	0.00387*	0.00229
Default.PAN_Pred_F.net_ACE	0.05063***	0.00391*	0.00226
Default.PAN_Pred_F.rf_ACE	0.05063***	0.00369*	0.00279
Default.PAN_Pred_F.gbm_ACE	0.05063***	0.00368*	0.0027

1.1.1.5.2 Framing (Unusual Disease) Paradigm

Disease.PAN <- glm(numDisease ~ DiseaseCondition * Panel, data = data.tst_MP1.US.gbm, family = binomial)
Disease.PAN_FACE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E+DiseaseCondition * Panel , data = data.tst_MP1.US.gbm, family = binomial)

Disease.PAN_Pred_F.reg_ACE <- glm(numDisease ~  DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_MP1.US.reg, family = binomial)

Disease.PAN_Pred_F.net_ACE <- glm(numDisease ~   DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_MP1.US.net, family = binomial)

Disease.PAN_Pred_F.rf_ACE <- glm(numDisease ~   DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_MP1.US.rf, family = binomial)

Disease.PAN_Pred_F.gbm_ACE <- glm(numDisease ~  DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E + DiseaseCondition * Panel , data = data.tst_MP1.US.gbm, family = binomial)


Disease_model <- list(
  "Disease.PAN" = Disease.PAN,
  "Disease.PAN_FACE" = Disease.PAN_FACE,
  "Disease.PAN_Pred_F.reg_ACE" = Disease.PAN_Pred_F.reg_ACE,
  "Disease.PAN_Pred_F.net_ACE" = Disease.PAN_Pred_F.net_ACE,
  "Disease.PAN_Pred_F.rf_ACE" = Disease.PAN_Pred_F.rf_ACE, 
  "Disease.PAN_Pred_F.gbm_ACE" = Disease.PAN_Pred_F.gbm_ACE
)
# extract_aic(Disease_model)

Model Results

term_names <- names(coef(Disease.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Disease_result<-tab_model(Disease.PAN, Disease.PAN_FACE, Disease.PAN_Pred_F.reg_ACE,Disease.PAN_Pred_F.net_ACE,Disease.PAN_Pred_F.rf_ACE,Disease.PAN_Pred_F.gbm_ACE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.F(reg)_ACE", 
                        "PAN_Pred.F(net)_ACE", 
                        "PAN_Pred.F(rf)_ACE", 
                        "PAN_Pred.F(gbm)_ACE"), title = "Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Disease_result

Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.F(reg)_ACE				PAN_Pred.F(net)_ACE				PAN_Pred.F(rf)_ACE				PAN_Pred.F(gbm)_ACE
Predictors	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p
(Intercept)	0.49	0.05	-6.86	<0.001	0.47	0.05	-7.15	<0.001	0.47	0.05	-6.92	<0.001	0.48	0.05	-6.80	<0.001	0.48	0.05	-6.69	<0.001	0.47	0.05	-6.99	<0.001
DiseaseCondition [LOSS]	2.72	0.39	6.97	<0.001	2.91	0.43	7.29	<0.001	2.74	0.41	6.83	<0.001	2.67	0.40	6.61	<0.001	2.63	0.39	6.46	<0.001	2.74	0.40	6.84	<0.001
F					1.30	0.12	2.82	0.005
A					1.66	0.18	4.58	<0.001	1.76	0.20	4.96	<0.001	1.77	0.20	5.00	<0.001	1.79	0.21	5.06	<0.001	1.74	0.20	4.89	<0.001
C					0.79	0.04	-4.89	<0.001	0.83	0.04	-3.82	<0.001	0.83	0.04	-3.81	<0.001	0.82	0.04	-4.03	<0.001	0.82	0.04	-3.97	<0.001
E					0.96	0.06	-0.67	0.501	0.90	0.06	-1.56	0.118	0.90	0.06	-1.61	0.107	0.89	0.06	-1.66	0.097	0.90	0.06	-1.53	0.126
DiseaseCondition [LOSS] × F					0.87	0.11	-1.14	0.254
DiseaseCondition [LOSS] × A					1.01	0.15	0.07	0.947	0.89	0.14	-0.77	0.439	0.87	0.13	-0.91	0.363	0.86	0.13	-1.00	0.318	0.89	0.14	-0.79	0.430
DiseaseCondition [LOSS] × C					1.17	0.08	2.39	0.017	1.09	0.07	1.30	0.192	1.08	0.07	1.21	0.227	1.11	0.07	1.60	0.110	1.09	0.07	1.38	0.169
DiseaseCondition [LOSS] × E					0.79	0.07	-2.58	0.010	0.86	0.08	-1.67	0.094	0.87	0.08	-1.54	0.125	0.87	0.08	-1.47	0.141	0.87	0.08	-1.52	0.129
Predicted F									0.90	0.06	-1.50	0.134	0.89	0.06	-1.66	0.097	0.88	0.06	-1.81	0.070	0.92	0.06	-1.24	0.214
DiseaseCondition [LOSS] × Predicted F									1.31	0.12	2.92	0.003	1.39	0.13	3.36	0.001	1.39	0.14	3.38	0.001	1.36	0.13	3.22	0.001
Observations	5336				5336				5336				5336				5336				5336
AIC	6723.912				6573.688				6574.232				6571.002				6571.291				6570.558
log-Likelihood	-3343.956				-3260.844				-3261.116				-3259.501				-3259.645				-3259.279

Anova and Eta Square

calculate_deviance_explained.Disease <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for DiseaseCondition, Panel, and DiseaseCondition:Panel interaction
  dev_cond <- anova_res["DiseaseCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DiseaseCondition:Panel", "Deviance"]
  
  # Extract p-values
  p_cond <- anova_res["DiseaseCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DiseaseCondition:Panel", "Pr(>Chi)"]
  
  # Determine significance levels
  sig_cond <- ifelse(p_cond < 0.001, "***", 
               ifelse(p_cond < 0.01, "**", 
               ifelse(p_cond < 0.05, "*", 
               ifelse(p_cond < 0.1, ".", ""))))

  sig_panel <- ifelse(p_panel < 0.001, "***", 
                ifelse(p_panel < 0.01, "**", 
                ifelse(p_panel < 0.05, "*", 
                ifelse(p_panel < 0.1, ".", ""))))

  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total)
  perc_panel <- (dev_panel / dev_total)
  perc_interaction <- (dev_interaction / dev_total)
  
  # Combine percentage and significance
  res_cond <- paste0(round(perc_cond, 5), sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}


# Apply the function to each model
deviance_explained <- sapply(Disease_model, calculate_deviance_explained.Disease)

# Convert to a data frame for easy viewing
deviance_explained_df.Disease <- as.data.frame(t(deviance_explained))
colnames(deviance_explained_df.Disease) <- c("Condition","Panel", "DiseaseCondition:Panel")

# Display the results
kable(deviance_explained_df.Disease,caption = "Etasq Table: Disease Paradigm")

Etasq Table: Disease Paradigm
	Condition	Panel	DiseaseCondition:Panel
Disease.PAN	0.05826***	0.01261***	0.0093***
Disease.PAN_FACE	0.05826***	0.00366***	0.00557***
Disease.PAN_Pred_F.reg_ACE	0.05826***	0.00276*	0.00383***
Disease.PAN_Pred_F.net_ACE	0.05826***	0.00279**	0.00376***
Disease.PAN_Pred_F.rf_ACE	0.05826***	0.00274*	0.00389***
Disease.PAN_Pred_F.gbm_ACE	0.05826***	0.00294**	0.00381***

1.1.1.5.3 Less is More Paradigm

LessMore.PAN <- lm(numLessMore ~ LessMoreCondition * Panel, data = data.tst_MP1.US.gbm)
LessMore.PAN_FACE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_MP1.US.gbm)
LessMore.PAN_Pred_F.reg_ACE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_MP1.US.reg)
LessMore.PAN_Pred_F.net_ACE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.tst_MP1.US.net)
LessMore.PAN_Pred_F.rf_ACE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_MP1.US.rf)
LessMore.PAN_Pred_F.gbm_ACE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.tst_MP1.US.gbm)


LessMore_model <- list(
  "LessMore.PAN" = LessMore.PAN,
  "LessMore.PAN_FACE" = LessMore.PAN_FACE,
  "LessMore.PAN_Pred_F.reg_ACE" = LessMore.PAN_Pred_F.reg_ACE,
  "LessMore.PAN_Pred_F.net_ACE" = LessMore.PAN_Pred_F.net_ACE,
  "LessMore.PAN_Pred_F.rf_ACE" = LessMore.PAN_Pred_F.rf_ACE, 
  "LessMore.PAN_Pred_F.gbm_ACE" = LessMore.PAN_Pred_F.gbm_ACE
)

Model Results

term_names <- names(coef(LessMore.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

LessMore_result<-tab_model(LessMore.PAN, LessMore.PAN_FACE, LessMore.PAN_Pred_F.reg_ACE,LessMore.PAN_Pred_F.net_ACE,LessMore.PAN_Pred_F.rf_ACE,LessMore.PAN_Pred_F.gbm_ACE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.F(reg)_ACE", 
                        "PAN_Pred.F(net)_ACE", 
                        "PAN_Pred.F(rf)_ACE", 
                        "PAN_Pred.F(gbm)_ACE"), title = "LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
LessMore_result

LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.F(reg)_ACE				PAN_Pred.F(net)_ACE				PAN_Pred.F(rf)_ACE				PAN_Pred.F(gbm)_ACE
Predictors	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p
(Intercept)	5.37	0.07	79.64	<0.001	5.36	0.07	81.93	<0.001	5.34	0.07	80.99	<0.001	5.33	0.07	80.47	<0.001	5.33	0.07	79.95	<0.001	5.34	0.07	81.30	<0.001
LessMoreCondition [SCARF]	0.95	0.09	10.04	<0.001	0.99	0.09	10.82	<0.001	0.97	0.09	10.46	<0.001	0.96	0.09	10.33	<0.001	0.95	0.09	10.12	<0.001	0.97	0.09	10.48	<0.001
F					-0.11	0.05	-1.99	0.046
A					0.38	0.06	6.14	<0.001	0.33	0.06	5.16	<0.001	0.32	0.06	4.98	<0.001	0.33	0.06	5.09	<0.001	0.33	0.06	5.17	<0.001
C					0.02	0.03	0.58	0.562	-0.01	0.03	-0.47	0.637	-0.02	0.03	-0.59	0.553	-0.00	0.03	-0.18	0.858	-0.01	0.03	-0.43	0.667
E					-0.00	0.04	-0.09	0.930	0.03	0.04	0.87	0.385	0.04	0.04	1.01	0.310	0.03	0.04	0.87	0.384	0.04	0.04	0.99	0.324
LessMoreCondition [SCARF] × F					0.28	0.08	3.67	<0.001
LessMoreCondition [SCARF] × A					0.45	0.09	5.04	<0.001	0.40	0.09	4.34	<0.001	0.40	0.09	4.33	<0.001	0.37	0.09	4.03	<0.001	0.39	0.09	4.32	<0.001
LessMoreCondition [SCARF] × C					-0.01	0.04	-0.14	0.886	0.00	0.04	0.05	0.958	0.00	0.04	0.10	0.922	0.01	0.04	0.23	0.818	0.00	0.04	0.12	0.903
LessMoreCondition [SCARF] × E					-0.12	0.06	-2.24	0.025	-0.14	0.06	-2.57	0.010	-0.14	0.06	-2.56	0.010	-0.13	0.06	-2.39	0.017	-0.14	0.06	-2.51	0.012
Predicted F									0.09	0.04	2.31	0.021	0.12	0.04	2.75	0.006	0.09	0.04	2.05	0.040	0.11	0.04	2.48	0.013
LessMoreCondition [SCARF] × Predicted F									0.12	0.06	2.12	0.034	0.13	0.06	2.16	0.031	0.17	0.06	2.82	0.005	0.14	0.06	2.46	0.014
Observations	5336				5336				5336				5336				5336				5336
R² / R² adjusted	0.173 / 0.170				0.224 / 0.220				0.227 / 0.223				0.228 / 0.225				0.228 / 0.225				0.228 / 0.225

Anova and Eta Square

calculate_variance_explained_LessMore <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for LessMoreCondition, Panel, and LessMoreCondition:Panel interaction
  ss_condition <- anova_res["LessMoreCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["LessMoreCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["LessMoreCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["LessMoreCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- ss_condition / ss_total
  perc_panel <- ss_panel / ss_total
  perc_interaction <- ss_interaction / ss_total
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}

variance_explained_lm <- sapply(LessMore_model, calculate_variance_explained_LessMore)

# Convert to a data frame for easy viewing
variance_explained_lm_df.LessMore <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.LessMore,caption = "Etasq Table: LessMore Paradigm")

Etasq Table: LessMore Paradigm
	Condition	Panel	LessMoreCondition:Panel
LessMore.PAN	0.12958***	0.03699***	0.00642***
LessMore.PAN_FACE	0.12958***	0.01349***	0.00148
LessMore.PAN_Pred_F.reg_ACE	0.12958***	0.01294***	0.00163
LessMore.PAN_Pred_F.net_ACE	0.12958***	0.01315***	0.0016
LessMore.PAN_Pred_F.rf_ACE	0.12958***	0.01248***	0.00167
LessMore.PAN_Pred_F.gbm_ACE	0.12958***	0.01279***	0.00155

1.1.1.5.4 Sunk Cost Paradigm

Sunk.PAN <- lm(numSunkCost~ SunkCondition * Panel, data = data.tst_MP1.US.gbm)
Sunk.PAN_FACE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_MP1.US.gbm)
Sunk.PAN_Pred_F.reg_ACE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E+SunkCondition * Panel, data = data.tst_MP1.US.reg)
Sunk.PAN_Pred_F.net_ACE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_MP1.US.net)
Sunk.PAN_Pred_F.rf_ACE <- lm(numSunkCost ~ SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E  + SunkCondition * Panel, data = data.tst_MP1.US.rf)
Sunk.PAN_Pred_F.gbm_ACE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_MP1.US.gbm)


Sunk_model <- list(
  "Sunk.PAN" = Sunk.PAN,
  "Sunk.PAN_FACE" = Sunk.PAN_FACE,
  "Sunk.PAN_Pred_F.reg_ACE" = Sunk.PAN_Pred_F.reg_ACE,
  "Sunk.PAN_Pred_F.net_ACE" = Sunk.PAN_Pred_F.net_ACE,
  "Sunk.PAN_Pred_F.rf_ACE" = Sunk.PAN_Pred_F.rf_ACE, #0.302 with unscaled
  "Sunk.PAN_Pred_F.gbm_ACE" = Sunk.PAN_Pred_F.gbm_ACE
)

# extract_adjusted_r_squared(Sunk_model)

Full Model Results

term_names <- names(coef(Sunk.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Sunk_result<-tab_model(Sunk.PAN, Sunk.PAN_FACE, Sunk.PAN_Pred_F.reg_ACE,Sunk.PAN_Pred_F.net_ACE,Sunk.PAN_Pred_F.rf_ACE,Sunk.PAN_Pred_F.gbm_ACE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred_F(reg)_ACE", 
                        "PAN_Pred_F(net)_ACE", 
                        "PAN_Pred_F(rf)_ACE", 
                        "PAN_Pred_F(gbm)_ACE"), title = "Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Sunk_result

Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred_F(reg)_ACE				PAN_Pred_F(net)_ACE				PAN_Pred_F(rf)_ACE				PAN_Pred_F(gbm)_ACE
Predictors	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p
(Intercept)	5.46	0.14	39.29	<0.001	5.46	0.14	39.54	<0.001	5.43	0.14	38.85	<0.001	5.41	0.14	38.53	<0.001	5.43	0.14	38.46	<0.001	5.43	0.14	39.00	<0.001
SunkCondition [PAID]	0.70	0.20	3.48	0.001	0.70	0.20	3.52	<0.001	0.65	0.20	3.23	0.001	0.63	0.20	3.14	0.002	0.59	0.20	2.92	0.004	0.65	0.20	3.26	0.001
F					0.32	0.11	2.82	0.005
A					0.59	0.14	4.33	<0.001	0.54	0.14	3.81	<0.001	0.53	0.14	3.72	<0.001	0.56	0.14	3.91	<0.001	0.53	0.14	3.79	<0.001
C					0.10	0.06	1.71	0.087	0.11	0.06	1.81	0.071	0.11	0.06	1.75	0.080	0.13	0.06	2.16	0.031	0.11	0.06	1.82	0.069
E					0.20	0.08	2.32	0.021	0.18	0.09	2.13	0.033	0.19	0.09	2.21	0.027	0.17	0.09	2.02	0.043	0.19	0.09	2.26	0.024
SunkCondition [PAID] × F					0.18	0.16	1.10	0.270
SunkCondition [PAID] × A					0.02	0.19	0.11	0.914	-0.12	0.20	-0.58	0.559	-0.11	0.20	-0.55	0.584	-0.16	0.20	-0.80	0.422	-0.11	0.20	-0.55	0.579
SunkCondition [PAID] × C					-0.02	0.08	-0.18	0.854	-0.05	0.09	-0.60	0.550	-0.04	0.09	-0.52	0.605	-0.03	0.08	-0.40	0.692	-0.04	0.08	-0.46	0.645
SunkCondition [PAID] × E					-0.04	0.12	-0.31	0.753	-0.00	0.12	-0.04	0.972	-0.01	0.12	-0.06	0.950	0.01	0.12	0.05	0.964	-0.01	0.12	-0.05	0.957
Predicted F									0.15	0.09	1.73	0.083	0.18	0.09	1.94	0.052	0.11	0.09	1.16	0.248	0.19	0.09	2.11	0.035
SunkCondition [PAID] × Predicted F									0.31	0.12	2.49	0.013	0.30	0.13	2.33	0.020	0.37	0.13	2.84	0.004	0.31	0.13	2.43	0.015
Observations	5336				5336				5336				5336				5336				5336
R² / R² adjusted	0.025 / 0.022				0.040 / 0.035				0.040 / 0.036				0.040 / 0.036				0.040 / 0.035				0.041 / 0.036

Anova and Eta Square

calculate_variance_explained_Sunk <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for SunkCondition, Panel, and SunkCondition:Panel interaction
  ss_condition <- anova_res["SunkCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["SunkCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["SunkCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["SunkCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- (ss_condition / ss_total)
  perc_panel <- (ss_panel / ss_total)
  perc_interaction <- (ss_interaction / ss_total)
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}


variance_explained_lm <- sapply(Sunk_model, calculate_variance_explained_Sunk)

# Convert to a data frame for easy viewing
variance_explained_lm_df.SunkCost <- as.data.frame(t(variance_explained_lm))

colnames(variance_explained_lm_df.SunkCost) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.SunkCost,caption = "Etasq Table: SunkCost Paradigm")

Etasq Table: SunkCost Paradigm
	Condition	Panel	LessMoreCondition:Panel
Sunk.PAN	0.01083***	0.00971***	0.00447**
Sunk.PAN_FACE	0.01083***	0.00563***	0.00226
Sunk.PAN_Pred_F.reg_ACE	0.01083***	0.00788***	0.0018
Sunk.PAN_Pred_F.net_ACE	0.01083***	0.00792***	0.00188
Sunk.PAN_Pred_F.rf_ACE	0.01083***	0.00776***	0.0018
Sunk.PAN_Pred_F.gbm_ACE	0.01083***	0.00687***	0.00192

1.1.1.5.5 Summary

Model Fit

\(R^2\): \(R^2\) for linear regressions, and \(1 − \frac{\log L(\text{model})}{\log L(\text{null model})}\) for logistic regressions (as in the paper)
note that the y-axes for different paradigms are not aligned.

#following antonia's analysis here...
Default.null<-glm(numDefault~1,data.tst_MP1.US,family=binomial)

R2.Default = as.data.frame(cbind( "R2" = c((1-logLik(Default.PAN)/logLik(Default.null)),
                    (1-logLik(Default.PAN_FACE)/logLik(Default.null)),  
                    (1-logLik(Default.PAN_Pred_F.reg_ACE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_F.net_ACE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_F.rf_ACE)/logLik(Default.null)),
                    (1-logLik(Default.PAN_Pred_F.gbm_ACE)/logLik(Default.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"), 
            "Paradigm" = "Default"))


Disease.null<-glm(numDisease~1,data.tst_MP1.US.gbm,family=binomial)
R2.Disease = as.data.frame(cbind( "R2" = c((1-logLik(Disease.PAN)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_FACE)/logLik(Disease.null)),  
                    (1-logLik(Disease.PAN_Pred_F.reg_ACE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_F.net_ACE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_F.rf_ACE)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_Pred_F.gbm_ACE)/logLik(Disease.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"), 
            "Paradigm" = "Disease"))

R2.LessMore = as.data.frame(cbind( "R2" = c(summary(LessMore.PAN)$r.squared,
                     summary(LessMore.PAN_FACE)$r.squared,  
                     summary(LessMore.PAN_Pred_F.reg_ACE)$r.squared,  
                     summary(LessMore.PAN_Pred_F.net_ACE)$r.squared, 
                     summary(LessMore.PAN_Pred_F.rf_ACE )$r.squared,
                     summary(LessMore.PAN_Pred_F.gbm_ACE )$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"), 
            "Paradigm" = "LessMore"))

R2.Sunk = as.data.frame(cbind( "R2" = c(summary(Sunk.PAN)$r.squared,
                     summary(Sunk.PAN_FACE)$r.squared,  
                     summary(Sunk.PAN_Pred_F.reg_ACE)$r.squared,  
                     summary(Sunk.PAN_Pred_F.net_ACE)$r.squared, 
                     summary(Sunk.PAN_Pred_F.rf_ACE )$r.squared,
                     summary(Sunk.PAN_Pred_F.gbm_ACE )$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"), 
            "Paradigm" = "SunkCost"))


R2<-rbind(R2.Default,R2.Disease,R2.LessMore,R2.Sunk)%>%
  mutate(R2=round(as.numeric(R2),3))

R2$Model <- factor(R2$Model, 
                        levels = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(R2, aes(x = Model, y = R2, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(R^2)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

AIC.Default = as.data.frame(cbind( "AIC" = c(AIC(Default.PAN),
                    AIC(Default.PAN_FACE),  
                    AIC(Default.PAN_Pred_F.reg_ACE), 
                    AIC(Default.PAN_Pred_F.net_ACE), 
                    AIC(Default.PAN_Pred_F.rf_ACE),
                    AIC(Default.PAN_Pred_F.gbm_ACE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PF(reg)+ACE",  "Pan+PF(net)+ACE" , "Pan+PF(rf)+ACE","Pan+PF(gbm)+ACE"), 
            "Paradigm" = "Default"))



AIC.Disease = as.data.frame(cbind( "AIC" = c(AIC(Disease.PAN),
                    AIC(Disease.PAN_FACE),  
                    AIC(Disease.PAN_Pred_F.reg_ACE), 
                    AIC(Disease.PAN_Pred_F.net_ACE), 
                    AIC(Disease.PAN_Pred_F.rf_ACE),
                    AIC(Disease.PAN_Pred_F.gbm_ACE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PF(reg)+ACE",  "Pan+PF(net)+ACE" , "Pan+PF(rf)+ACE","Pan+PF(gbm)+ACE"), 
            "Paradigm" = "Disease"))



AIC<-rbind(AIC.Default,AIC.Disease)%>%
  mutate(AIC=round(as.numeric(AIC),5))

AIC$Model <- factor(AIC$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+PF(reg)+ACE",  
                                   "Pan+PF(net)+ACE", 
                                   "Pan+PF(rf)+ACE",
                                   "Pan+PF(gbm)+ACE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(AIC, aes(x = Model, y = AIC, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(AIC)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")+
  coord_cartesian(ylim = c(4200, 7000))

R2$R2 <- round(as.numeric(R2$R2), 3)

R2 <- R2 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement <- R2 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))


avg_improvemnt_combined <- rbind(avg_improvement, avg_improvement_full_study_5)

kable(avg_improvemnt_combined, caption = "Percentage Improvement in R2 (compared to PAN) Across Paradigms with Study 1 data", digits = 2)

Percentage Improvement in R2 (compared to PAN) Across Paradigms with Study 1 data
Model	Avg_Improvement
Pan	0.00
Pan+FACE	30.51
Pan+Pred.F(reg)+ACE	30.94
Pan+Pred.F(net)+ACE	31.08
Pan+Pred.F(rf)+ACE	31.08
Pan+Pred.F(gbm)+ACE	32.08
Pan+FACE (Full Study 5)	43.20

F*Cond Model Coefficients

DVs for Default & Framing are 1/0
DVs for LessMore & SunkCost are on the original scale

AlllCoef = as.data.frame(rbind(
cbind(coef(Default.PAN_FACE),confint(Default.PAN_FACE)),
cbind(coef(Default.PAN_Pred_F.reg_ACE),confint(Default.PAN_Pred_F.reg_ACE)),
cbind(coef(Default.PAN_Pred_F.net_ACE),confint(Default.PAN_Pred_F.net_ACE)),
cbind(coef(Default.PAN_Pred_F.rf_ACE),confint(Default.PAN_Pred_F.rf_ACE)),
cbind(coef(Default.PAN_Pred_F.gbm_ACE),confint(Default.PAN_Pred_F.gbm_ACE)),
cbind(coef(Default.PAN_FACE.Study5),confint(Default.PAN_FACE.Study5)),

cbind(coef(Disease.PAN_FACE),confint(Disease.PAN_FACE)),
cbind(coef(Disease.PAN_Pred_F.reg_ACE),confint(Disease.PAN_Pred_F.reg_ACE)),
cbind(coef(Disease.PAN_Pred_F.net_ACE),confint(Disease.PAN_Pred_F.net_ACE)),
cbind(coef(Disease.PAN_Pred_F.rf_ACE),confint(Disease.PAN_Pred_F.rf_ACE)),
cbind(coef(Disease.PAN_Pred_F.gbm_ACE),confint(Disease.PAN_Pred_F.gbm_ACE)),
cbind(coef(Disease.PAN_FACE.Study5),confint(Disease.PAN_FACE.Study5)),

cbind(coef(LessMore.PAN_FACE),confint(LessMore.PAN_FACE)),
cbind(coef(LessMore.PAN_Pred_F.reg_ACE),confint(LessMore.PAN_Pred_F.reg_ACE)),
cbind(coef(LessMore.PAN_Pred_F.net_ACE),confint(LessMore.PAN_Pred_F.net_ACE)),
cbind(coef(LessMore.PAN_Pred_F.rf_ACE),confint(LessMore.PAN_Pred_F.rf_ACE)),
cbind(coef(LessMore.PAN_Pred_F.gbm_ACE),confint(LessMore.PAN_Pred_F.gbm_ACE)),
cbind(coef(LessMore.PAN_FACE.Study5),confint(LessMore.PAN_FACE.Study5)),

cbind(coef(Sunk.PAN_FACE),confint(Sunk.PAN_FACE)),
cbind(coef(Sunk.PAN_Pred_F.reg_ACE),confint(Sunk.PAN_Pred_F.reg_ACE)),
cbind(coef(Sunk.PAN_Pred_F.net_ACE),confint(Sunk.PAN_Pred_F.net_ACE)),
cbind(coef(Sunk.PAN_Pred_F.rf_ACE),confint(Sunk.PAN_Pred_F.rf_ACE)),
cbind(coef(Sunk.PAN_Pred_F.gbm_ACE),confint(Sunk.PAN_Pred_F.gbm_ACE)),
cbind(coef(Sunk.PAN_FACE.Study5),confint(Sunk.PAN_FACE.Study5))))

AlllCoef <- AlllCoef[!grepl("Panel", rownames(AlllCoef)), ]



AlllCoef$type = factor(rep(c("Intercept", " Condition(Treat)","F", "A", "C", "E","F: Condition(Treat)", "A: Condition(Treat)", "C: Condition(Treat)", "E: Condition(Treat)"), 24))

AlllCoef$Paradigm = factor((rep(c("Default","Framing","Less is Better","Sunk Cost"), each=60)),levels = rev(c("Default","Framing","Less is Better","Sunk Cost")))

AlllCoef$Model = factor(rep(rep(c("FACE","PF(reg) + ACE","PF(net) + ACE","PF(rf) + ACE","PF(gbm) + ACE","FACE (Full Study 5)"), each = 10),4), levels = rev(c("FACE","PF(reg) + ACE","PF(net) + ACE","PF(rf) + ACE","PF(gbm) + ACE","FACE (Full Study 5)")))

colnames(AlllCoef)[1:3] = c("Beta","low", "high")

allcoefplot = ggplot(AlllCoef[(AlllCoef$type %in% c("F: Condition(Treat)")),], aes( y = Beta, ymin= low, ymax= high, x = Paradigm, fill = Model, color = Model))+
  geom_bar(stat = "identity", position = "dodge",alpha= .25)+
  geom_pointrange(position = position_dodge(width = .8))+
  coord_flip()+
  xlab("")+
  theme_bw()+
  theme(strip.background = element_blank(), text = element_text(size = 12)) +
  facet_grid(~type)

allcoefplot

Etasq

note that the y-axes for different paradigms are not aligned..
We have observed a slight improvement in explaining heterogeneity across all methods, compared to what we initially had. I think this is due to our predictors being more refined: For example, we used longitude and latitude info to swap “minutes since midnight (EST)” to “clock time.

# label paradigm
deviance_explained_df.Default$Paradigm <- "Default"
deviance_explained_df.Disease$Paradigm <- "Disease"
variance_explained_lm_df.LessMore$Paradigm <- "LessMore"
variance_explained_lm_df.SunkCost$Paradigm <- "SunkCost"

Model <- c("Pan", "Pan+FACE", "Pan+Pred.F(reg)+ACE", "Pan+Pred.F(net)+ACE", "Pan+Pred.F(rf)+ACE", "Pan+Pred.F(gbm)+ACE")

deviance_explained_df.Default$Model<-Model
deviance_explained_df.Disease$Model<-Model
variance_explained_lm_df.LessMore$Model<-Model
variance_explained_lm_df.SunkCost$Model<-Model

colnames(deviance_explained_df.Default)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost)[3] <- "Condition:Panel"

etasq <- rbind(deviance_explained_df.Default, 
               deviance_explained_df.Default.Study5,
               deviance_explained_df.Disease, 
               deviance_explained_df.Disease.Study5,
               variance_explained_lm_df.LessMore, 
               variance_explained_lm_df.LessMore.Study5,
               variance_explained_lm_df.SunkCost,
               variance_explained_lm_df.SunkCost.Study5)

etasq$Model <- factor(etasq$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.F(reg)+ACE",  
                                   "Pan+Pred.F(net)+ACE", 
                                   "Pan+Pred.F(rf)+ACE",
                                   "Pan+Pred.F(gbm)+ACE",
                                   "Pan (Full Study5)", 
                                   "Pan+FACE (Full Study5)"),
                        ordered = TRUE)

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$Panel)  # Extract the significance symbols
  df$Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$Panel))
  return(df)
}

etasq.pan<-split_value_and_sig(etasq)

ggplot(etasq.pan, aes(x = Model, y = Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$`Condition:Panel`)  # Extract the significance symbols
  df$Condition_Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$`Condition:Panel`))
  return(df)
}

etasq.pan_cond<-split_value_and_sig(etasq)

ggplot(etasq.pan_cond, aes(x = Model, y = Condition_Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel*cond") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

# junk data??? really unstable;; perhaps to look at this meaningfully, we need 20% of the data to equally come from each of the 3 panels.
 # table(data.tst_M5a$Panel)
 #  it looks fine though
 #   Lucid    MTurk Prolific 
 #     100      101       91 


etasq_study5 <- etasq.pan_cond %>%
  filter(Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))

etasq_study5 <- etasq_study5 %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan (Full Study5)"] - Condition_Panel) / Condition_Panel[Model == "Pan (Full Study5)"] * 100)


etasq_study1 <- etasq.pan_cond %>%
  filter(!Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))
etasq_study1 <- etasq_study1 %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan"] - Condition_Panel) / Condition_Panel[Model == "Pan"] * 100)

etasq.pan_cond<-rbind(etasq_study5,etasq_study1)

avg_reduction <- etasq.pan_cond %>%
  group_by(Model) %>%
  summarize(Avg_Reduction = mean(Reduction, na.rm = TRUE))

  
kable(avg_reduction, caption = "Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in study 1", digits = 2)

Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in study 1
Model	Avg_Reduction
Pan	0.00
Pan+FACE	56.30
Pan+Pred.F(reg)+ACE	65.67
Pan+Pred.F(net)+ACE	65.62
Pan+Pred.F(rf)+ACE	63.69
Pan+Pred.F(gbm)+ACE	64.00
Pan (Full Study5)	0.00
Pan+FACE (Full Study5)	74.19

Results without Study 5

# label paradigm
deviance_explained_df.Default$Paradigm <- "Default"
deviance_explained_df.Disease$Paradigm <- "Disease"
variance_explained_lm_df.LessMore$Paradigm <- "LessMore"
variance_explained_lm_df.SunkCost$Paradigm <- "SunkCost"

Moderators <- c("Panel", "Panel + Original FACE", "Panel + Pred.F(reg)+ACE", "Panel + Pred.F(net)+ACE", "Panel + Pred.F(rf)+ACE", "Panel + Pred.F(gbm)+ACE")
deviance_explained_df.Default$Moderators<-Moderators
deviance_explained_df.Disease$Moderators<-Moderators
variance_explained_lm_df.LessMore$Moderators<-Moderators
variance_explained_lm_df.SunkCost$Moderators<-Moderators

colnames(deviance_explained_df.Default)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost)[3] <- "Condition:Panel"

etasq <- rbind(deviance_explained_df.Default, 
               deviance_explained_df.Disease, 
               variance_explained_lm_df.LessMore, 
               variance_explained_lm_df.SunkCost)

etasq$Moderators <- factor(etasq$Moderators, 
                        levels = c("Panel", 
                                   "Panel + Original FACE",  
                                   "Panel + Pred.F(reg)+ACE",  
                                   "Panel + Pred.F(net)+ACE", 
                                   "Panel + Pred.F(rf)+ACE",
                                   "Panel + Pred.F(gbm)+ACE"),
                        ordered = TRUE)


split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$`Condition:Panel`)  # Extract the significance symbols
  df$Condition_Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$`Condition:Panel`))
  return(df)
}

etasq.pan_cond<-split_value_and_sig(etasq)

(etasq.pan_cond.plot<-ggplot(etasq.pan_cond, aes(x = Moderators, y = Condition_Panel, fill = Moderators)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
 ylab(expression(italic(eta)^2 ~ "(Panel: Treatment)")) + 
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold"),
    axis.title.y = element_text(size = 14, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2"))

ggsave("etasq.pan_cond.svg", plot = etasq.pan_cond.plot, device = "svg",  width = 20, height = 12, units = "cm")

2. Predict C

2.0 Evaluation Accuracy with MP5 data

We use the cleaned data (n=1460) for analysis in this notebook.
In this notebook, F ~= \(\text{ATTN1} + \text{CRT} + \text{Matrices} + \text{3D}\), and C~= \(\text{age} + \text{synonym} + \text{antonym}\).

mean_C <- mean(MP5_data$C, na.rm = TRUE)
sd_C <- sd(MP5_data$C, na.rm = TRUE)

# Define thresholds for outliers
threshold_3sd_upper <- mean_C + 3 * sd_C
threshold_3sd_lower <- mean_C - 3 * sd_C

threshold_2.5sd_upper <- mean_C + 2.5 * sd_C
threshold_2.5sd_lower <- mean_C - 2.5 * sd_C

# Identify outliers using ±3 SD
outliers_3sd <- MP5_data %>%
  filter(C > threshold_3sd_upper | C < threshold_3sd_lower)
# outliers_3sd  # no outlier identified
# Identify outliers using ±2.5 SD
outliers_2.5sd <- MP5_data %>%
  filter(C > threshold_2.5sd_upper | C < threshold_2.5sd_lower)
# outliers_2.5sd # no outlier identified

ANT_SYN<-read.csv("./Data Prep for ML/MP5_Full_data_ML_20240908.csv")

Data_M5a.log_z_dummy_coded$Antonym<-ANT_SYN$Antonym
Data_M5a.log_z_dummy_coded$Synonym<-ANT_SYN$Synonym

library(GGally)
ggpairs(Data_M5a.log_z_dummy_coded, c("C","Antonym","Synonym","numEDU","AGE","ATTN1__Correct"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

2.0.1 IVs

Complete list of IVs used can be found in this google spreadsheet.
- In addition to IVs in this list, in the current notebook, we have a set of text analysis variables based on respondents’ answers to the OpenEnd questions.

Text Analysis Variables

See “./Data Prep for ML/Study_5_OSF_adapted_ML_xw.Rmd” line 1106-1233 for data wrangling
In addition to the predicting variables used for F, I included 6 “text analysis” variables for predicting C (see below). These 6 variables were calculated for each of the three open-ended questions
- Tedious OpenEnd: “What did you find tedious about this survey?”
- Confuse OpenEnd: “What did you find confusing about this survey?”
- Purpose OpenEnd: “In a few sentences, please explain what you think the purpose of this study is. If you are not sure, please give your best guess.”?
Character Count
- The total number of characters in the response, excluding spaces and punctuation.
- Implementation: This was calculated using the nchar() function after removing spaces and punctuation marks via the gsub() function in base R.
Word Count
- total number of words in the response, counted as the number of word boundaries (spaces between words)
- Implementation: Calculated using the str_count() function from the stringr package with boundary(“word”).
Average Word Length
- Character Count divided by Word Count
Sentence Count
- Number of sentences in a response
- Implementation: Sentences were separated by punctuation marks such as periods (.), exclamation marks (!), question marks (?), and semicolons (;), and counted using str_split() from the stringr package.
Typo Count
- The number of misspelled words in the response, as identified by comparing each word to a dictionary.
- Implementation Misspelled words were identified using the hunspell() function from the hunspell package. Each word in the response was compared to a US English dictionary, and any non-matching words were flagged as typos.
Average Word Rarity
- the average rarity of words in the response, where rarity is defined as 1/frequencyCount based on the SUBTLEX_US word frequency database (which uses word frequencies from film subtitles as compiled by (Brysbaert & New, 2009; frequencyCount = the total number of times this word appeared in the sample used in the study). I chose this as a convenient and accessible source for word frequency data… Higher rarity values correspond to less common (rarer) words.
- Implementation: Words from the response were extracted and matched against the SUBTLEX_US word frequency database. For each identified word, the rarity score was calculated as 1/frequency of occurrence in the database. The average of these scores was then computed across all words in the response. If no identifiable words were found, the score was set to 0.
In addition, for Purpose OpenEnd, Antonia suggested that we draft a sample response and calculate the cosine similarity between participants’ responses and this sample as a predictor C. I did a sillier implements…: I summarized key words related to the tasks (also reviewed participants’ responses). For each key word that appeared in a response, I created a variable whose score was incremented by 1. See “./Data Prep for ML/Study_5_OSF_adapted_ML_xw.Rmd” line 1241-1282 for words used

Text_analysis_var_study5<-read.csv("./Data Prep for ML/MP5_Full_data_ML_20240908.csv")
Text_analysis_var_study1<-read.csv("../MP Study 1/Data_M1a.log_dummy_coded_20241003.csv")%>%
  filter(!Panel%in%c("Prolific_UK_Rep","Students"))

library(reshape2)

TEDIOUS OpenEnd

TEDIOUSEOPEN_text_analysis_var.Study5 <- Text_analysis_var_study5[, grepl("TEDIOUSOPEN_", colnames(Text_analysis_var_study5))]
tediousopen_column_names <- colnames(Text_analysis_var_study5)[grepl("TEDIOUSOPEN_", colnames(Text_analysis_var_study5))]
TEDIOUSEOPEN_text_analysis_var.Study1 <- Text_analysis_var_study1[, grepl("TEDIOUSOPEN_", colnames(Text_analysis_var_study1))]


TEDIOUSEOPEN_text_analysis_var.Study5$Study <- "Study 5"
TEDIOUSEOPEN_text_analysis_var.Study1$Study <- "Study 1"


combined_data <- rbind(TEDIOUSEOPEN_text_analysis_var.Study5, TEDIOUSEOPEN_text_analysis_var.Study1)

data_long_combined <- melt(combined_data, id.vars = "Study")
mean_data <- aggregate(value ~ variable + Study, data_long_combined, mean)
median_data <- aggregate(value ~ variable + Study, data_long_combined, median)


ggplot(data_long_combined, aes(x = value, fill = Study)) +
  geom_histogram(binwidth = 0.5, position = "identity", alpha = 0.5, color = "black") +
  geom_text(data = mean_data, aes(label = paste("Mean:", round(value, 2)), 
                                  x = Inf, y = Inf, color = Study), 
            hjust = 1.5, vjust = 1.5, size = 3) +
  geom_text(data = median_data, aes(label = paste("Median:", round(value, 2)), 
                                    x = Inf, y = Inf, color = Study), 
            hjust = 1.5, vjust = 3, size = 3) +
  
  facet_wrap(~ variable*Study, scales = "free")+
  labs(title = "Distribution of TEDIOUSOPEN Variables (Study 1 vs Study 5)", x = "Values", y = "Frequency") +
  theme_minimal() +
  scale_fill_manual(values = c("Study 1" = "lightblue", "Study 5" = "red")) +
  scale_color_manual(values = c("Study 1" = "blue", "Study 5" = "darkred")) +
  theme(legend.position = "top")

CONFUSE OpenEnd

CONFUSEOPEN_text_analysis_var.Study5 <- Text_analysis_var_study5[, grepl("CONFUSEOPEN_", colnames(Text_analysis_var_study5))]

confuseopen_column_names <- colnames(Text_analysis_var_study5)[grepl("CONFUSEOPEN_", colnames(Text_analysis_var_study5))]
CONFUSEOPEN_text_analysis_var.Study1 <- Text_analysis_var_study1[, grepl("CONFUSEOPEN_", colnames(Text_analysis_var_study1))]


CONFUSEOPEN_text_analysis_var.Study5$Study <- "Study 5"
CONFUSEOPEN_text_analysis_var.Study1$Study <- "Study 1"


combined_data <- rbind(CONFUSEOPEN_text_analysis_var.Study5, CONFUSEOPEN_text_analysis_var.Study1)

data_long_combined <- melt(combined_data, id.vars = "Study")
mean_data <- aggregate(value ~ variable + Study, data_long_combined, mean)
median_data <- aggregate(value ~ variable + Study, data_long_combined, median)


ggplot(data_long_combined, aes(x = value, fill = Study)) +
  geom_histogram(binwidth = 0.5, position = "identity", alpha = 0.5, color = "black") +
  geom_text(data = mean_data, aes(label = paste("Mean:", round(value, 2)), 
                                  x = Inf, y = Inf, color = Study), 
            hjust = 1.5, vjust = 1.5, size = 3) +
  geom_text(data = median_data, aes(label = paste("Median:", round(value, 2)), 
                                    x = Inf, y = Inf, color = Study), 
            hjust = 1.5, vjust = 3, size = 3) +
  
  facet_wrap(~ variable*Study, scales = "free")+
  labs(title = "Distribution of CONFUSEOPEN Variables (Study 1 vs Study 5)", x = "Values", y = "Frequency") +
  theme_minimal() +
  scale_fill_manual(values = c("Study 1" = "lightblue", "Study 5" = "red")) +
  scale_color_manual(values = c("Study 1" = "blue", "Study 5" = "darkred")) +
  theme(legend.position = "top")

Study Purpose OpenEnd

PURPOSE_text_analysis_var.Study5 <- Text_analysis_var_study5[, grepl("PURPOSEOPEN_", colnames(Text_analysis_var_study5)) & !grepl("\\.gpt$", colnames(Text_analysis_var_study5))]
purposeopen_column_names <- colnames(Text_analysis_var_study5)[grepl("PURPOSEOPEN_", colnames(Text_analysis_var_study5)) & !grepl("\\.gpt$", colnames(Text_analysis_var_study5))]

PURPOSE_text_analysis_var.Study1 <- Text_analysis_var_study1[, grepl("PURPOSEOPEN_", colnames(Text_analysis_var_study1))]


PURPOSE_text_analysis_var.Study5$Study <- "Study 5"
PURPOSE_text_analysis_var.Study1$Study <- "Study 1"


combined_data <- rbind(PURPOSE_text_analysis_var.Study5, PURPOSE_text_analysis_var.Study1)

data_long_combined <- melt(combined_data, id.vars = "Study")
mean_data <- aggregate(value ~ variable + Study, data_long_combined, mean)
median_data <- aggregate(value ~ variable + Study, data_long_combined, median)


ggplot(data_long_combined, aes(x = value, fill = Study)) +
  geom_histogram(binwidth = 0.5, position = "identity", alpha = 0.5, color = "black") +
  geom_text(data = mean_data, aes(label = paste("Mean:", round(value, 2)), 
                                  x = Inf, y = Inf, color = Study), 
            hjust = 1.5, vjust = 1.5, size = 3) +
  geom_text(data = median_data, aes(label = paste("Median:", round(value, 2)), 
                                    x = Inf, y = Inf, color = Study), 
            hjust = 1.5, vjust = 3, size = 3) +
  
  facet_wrap(~ variable*Study, scales = "free")+
  labs(title = "Distribution of PURPOSEOPEN Variables (Study 1 vs Study 5)", x = "Values", y = "Frequency") +
  theme_minimal() +
  scale_fill_manual(values = c("Study 1" = "lightblue", "Study 5" = "red")) +
  scale_color_manual(values = c("Study 1" = "blue", "Study 5" = "darkred")) +
  theme(legend.position = "top")

lm(C~Text Analysis Variables)

Text_analysis_var_study5 <- read.csv("./Data Prep for ML/MP5_Full_data_ML_20240908.csv")

all_text_analysis_iv <- c(tediousopen_column_names, confuseopen_column_names, purposeopen_column_names)
Text_analysis_var_study5 <- Text_analysis_var_study5[, c("C", all_text_analysis_iv)]
Text_analysis_var_study5[all_text_analysis_iv] <- lapply(Text_analysis_var_study5[all_text_analysis_iv], function(x) log(x + 1))
model <- lm(C ~ ., data = Text_analysis_var_study5)

summary(model)

## 
## Call:
## lm(formula = C ~ ., data = Text_analysis_var_study5)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1291 -0.6205 -0.1084  0.5354  2.6394 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 -0.3952     0.2713  -1.457 0.145386    
## TEDIOUSOPEN_WordCount       -0.3906     0.3582  -1.090 0.275701    
## TEDIOUSOPEN_CharacterCount   0.4636     0.3177   1.459 0.144808    
## TEDIOUSOPEN_SentenceCount    0.0723     0.2030   0.356 0.721773    
## TEDIOUSOPEN_TypoCount       -0.1125     0.1716  -0.656 0.511981    
## TEDIOUSOPEN_avg.WordRarity  -0.4888     1.2159  -0.402 0.687762    
## TEDIOUSOPEN_AvgWordLength   -0.4429     0.2197  -2.016 0.044005 *  
## CONFUSEOPEN_WordCount       -0.6800     0.3558  -1.911 0.056156 .  
## CONFUSEOPEN_CharacterCount   0.6716     0.3163   2.124 0.033881 *  
## CONFUSEOPEN_SentenceCount    0.1121     0.2306   0.486 0.626877    
## CONFUSEOPEN_TypoCount       -0.1574     0.1838  -0.856 0.391983    
## CONFUSEOPEN_avg.WordRarity  -1.1021     1.9431  -0.567 0.570683    
## CONFUSEOPEN_AvgWordLength   -0.6577     0.2188  -3.007 0.002687 ** 
## PURPOSEOPEN_WordCount       -0.8875     0.2983  -2.976 0.002973 ** 
## PURPOSEOPEN_CharacterCount   0.9386     0.2579   3.640 0.000282 ***
## PURPOSEOPEN_SentenceCount    0.6009     0.1073   5.601 2.54e-08 ***
## PURPOSEOPEN_TypoCount       -0.2570     0.0953  -2.697 0.007089 ** 
## PURPOSEOPEN_avg.WordRarity   0.9469     0.9871   0.959 0.337608    
## PURPOSEOPEN_AvgWordLength   -0.8496     0.2677  -3.174 0.001535 ** 
## PURPOSEOPEN_keyword_count    0.6312     0.1054   5.989 2.67e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8398 on 1440 degrees of freedom
## Multiple R-squared:  0.247,  Adjusted R-squared:  0.2371 
## F-statistic: 24.86 on 19 and 1440 DF,  p-value: < 2.2e-16

set.seed(123)
train_M5a <- createDataPartition(Data_M5a.log_z_dummy_coded$F, p = 0.8, list = FALSE)
data.trn_M5a <- as.data.frame(Data_M5a.log_z_dummy_coded[train_M5a, ])
data.tst_M5a <- as.data.frame(Data_M5a.log_z_dummy_coded[-train_M5a, ])


variables_to_remove.C <- c("F","A","E","numSunkCost","numLessMore","numDefault","numDisease","SunkCondition","LessMoreCondition","DiseaseCondition","DefaultCondition","Panel","Antonym","Synonym","crt2_score","rotsum","MXsum")
# We need to remove all the above variables other than C for training and testing; but we'd like to see their relationship with the predicted F


data.trn_M5a.C <- as.data.frame(data.trn_M5a[, colnames(data.trn_M5a) %notin% variables_to_remove.C])

actual_M5a.C <- data.tst_M5a$C


ctrl <- trainControl(method = "cv", number = 10, summaryFunction = defaultSummary)
tuneGrid <- expand.grid(
  mtry = c(33),
  min.node.size = c(5),
  splitrule = c("variance")
)

# Model_M5a_rf.C <- caret::train(C ~ ., data = data.trn_M5a.C, method = "ranger",
#                   tuneGrid = tuneGrid, importance = 'impurity', num.trees = 750,
#                   trControl = ctrl)
# save(Model_M5a_rf.C, file = "./Saved ML Model and Data/Model_M5a_rf.C.RData")
load("./Saved ML Model and Data/Model_M5a_rf.C.RData")

pred_M5a_rf.C<- predict(Model_M5a_rf.C,data.tst_M5a)
test_perf.M5a_rf.C <- postResample(pred_M5a_rf.C, actual_M5a.C)
data.tst_M5a.rf.C <- cbind(data.tst_M5a, Predicted_C = pred_M5a_rf.C)
pred_full_M5a_rf.C<- predict(Model_M5a_rf.C, Data_M5a.log_z_dummy_coded)
data.full_M5a.rf.C <- cbind(Data_M5a.log_z_dummy_coded, Predicted_C = pred_full_M5a_rf.C)



Model_M5a_reg.C <- train(C ~ ., data = data.trn_M5a.C, method = "lm")
pred_M5a_reg.C<- predict(Model_M5a_reg.C,data.tst_M5a)
test_perf.M5a_reg.C <- postResample(pred_M5a_reg.C, actual_M5a.C)
data.tst_M5a.reg.C <- cbind(data.tst_M5a, Predicted_C = pred_M5a_reg.C)
pred_full_M5a_reg.C<- predict(Model_M5a_reg.C, Data_M5a.log_z_dummy_coded)
data.full_M5a.reg.C <- cbind(Data_M5a.log_z_dummy_coded, Predicted_C = pred_full_M5a_reg.C)


Model_M5a_net.C <- train(C ~ ., data = data.trn_M5a.C, method = "glmnet", trControl = ctrl, tuneLength = 40) # 10 fold cv to select the best model, set tune length to 40
pred_M5a_net.C<- predict(Model_M5a_net.C,data.tst_M5a)
test_perf.M5a_net.C <- postResample(pred_M5a_net.C, actual_M5a.C)
data.tst_M5a.net.C <- cbind(data.tst_M5a, Predicted_C = pred_M5a_net.C)
pred_full_M5a_net.C<- predict(Model_M5a_net.C, Data_M5a.log_z_dummy_coded)
data.full_M5a.net.C <- cbind(Data_M5a.log_z_dummy_coded, Predicted_C = pred_full_M5a_net.C)



tuneGrid.4 <- expand.grid(
  n.trees = c(800),   # Number of trees (boosting iterations)
  interaction.depth = c(4), # Tree depth
  shrinkage = c(0.01), #   shrinkage = c(0.01,0,03,0.05,0.08,0.1); 0,01 best
  n.minobsinnode = c(50)     # Minimum number of observations in terminal nodes
)

# Model_M5a_gbm.C <- train(C ~ ., data = data.trn_M5a.C, method = "gbm",
#                     tuneGrid = tuneGrid.4, trControl = ctrl,
#                     verbose = FALSE)
# 
# # how to check the number of depths in the original tree?????
# # Save the Gradient Boosting model
# save(Model_M5a_gbm.C, file = "./Saved ML Model and Data/Model_M5a_gbm.C.RData")
load("./Saved ML Model and Data/Model_M5a_gbm.C.RData")

pred_M5a_gbm.C <- predict(Model_M5a_gbm.C, data.tst_M5a)
test_perf.M5a_gbm.C <- postResample(pred_M5a_gbm.C, actual_M5a.C)
data.tst_M5a.gbm.C <- cbind(data.tst_M5a, Predicted_C = pred_M5a_gbm.C)
pred_full_M5a_gbm.C <- predict(Model_M5a_gbm.C, Data_M5a.log_z_dummy_coded)
data.full_M5a.gbm.C <- cbind(Data_M5a.log_z_dummy_coded, Predicted_C = pred_full_M5a_gbm.C)

actual_M5a.F_full.C<-Data_M5a.log_z_dummy_coded$C
test_perf.full_M5a_gbm.C<- postResample(pred_full_M5a_gbm.C, actual_M5a.F_full.C)
test_perf.full_M5a_rf.C<- postResample(pred_full_M5a_rf.C, actual_M5a.F_full.C)
test_perf.full_M5a_reg.C<- postResample(pred_full_M5a_reg.C, actual_M5a.F_full.C)
test_perf.full_M5a_net.C<- postResample(pred_full_M5a_net.C, actual_M5a.F_full.C)

2.0.2 Model Fit

The following stats come from cross-validation

train_perf_M5a_rf <- Model_M5a_rf.C$results[which.min(Model_M5a_rf.C$results$RMSE), c("RMSE", "Rsquared", "MAE")]
train_perf_M5a_reg <- Model_M5a_reg.C$results[which.min(Model_M5a_reg.C$results$RMSE), c("RMSE", "Rsquared", "MAE")]
train_perf_M5a_net <- Model_M5a_net.C$results[which.min(Model_M5a_net.C$results$RMSE), c("RMSE", "Rsquared", "MAE")]
train_perf_M5a_gbm <- Model_M5a_gbm.C$results[which.min(Model_M5a_gbm.C$results$RMSE), c("RMSE", "Rsquared", "MAE")]

# Combine training performance metrics into a data frame
performance_metrics_trn <- data.frame(
  Model = factor(c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting"),
                 levels = c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting")),
  MAE = c(as.numeric(train_perf_M5a_reg["MAE"]), as.numeric(train_perf_M5a_net["MAE"]), as.numeric(train_perf_M5a_rf["MAE"]), as.numeric(train_perf_M5a_gbm["MAE"])),
  RMSE = c(as.numeric(train_perf_M5a_reg["RMSE"]), as.numeric(train_perf_M5a_net["RMSE"]), as.numeric(train_perf_M5a_rf["RMSE"]), as.numeric(train_perf_M5a_gbm["RMSE"])),
  Rsquared = c(as.numeric(train_perf_M5a_reg["Rsquared"]), as.numeric(train_perf_M5a_net["Rsquared"]), as.numeric(train_perf_M5a_rf["Rsquared"]), as.numeric(train_perf_M5a_gbm["Rsquared"])))


performance_table_trn <- performance_metrics_trn %>%
  mutate(across(where(is.numeric), round, 4))

# Display the table
kable(performance_table_trn, caption = "Model Fit Statistics") %>%
  kable_styling(full_width = F, position = "center")

Model Fit Statistics
Model	MAE	RMSE	Rsquared
Linear Regression	0.6067	0.7554	0.4062
Elastic Net	0.5745	0.7124	0.4572
Random Forest	0.5623	0.6965	0.4995
Gradient Boosting	0.5339	0.6726	0.5199

###Plot ###

performance_metrics_long_trn <- performance_metrics_trn %>%
  pivot_longer(cols = c(RMSE, Rsquared, MAE), names_to = "Metric", values_to = "Value")

trn_M5a.sd_C <- sd(data.trn_M5a$C)

ggplot(performance_metrics_long_trn, aes(x = Metric, y = Value, fill = Model)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_segment(data = performance_metrics_long_trn %>% filter(Metric == "RMSE"),
               aes(x = 1.6, xend = 2.4, y = trn_M5a.sd_C, yend = trn_M5a.sd_C),
               linetype = "dashed", color = "#FF9999") +
  geom_text(data = performance_metrics_long_trn %>% filter(Metric == "RMSE"),
            aes(x = 2, y = trn_M5a.sd_C, label = paste0("Std.dev of C in Training Data: ", round(trn_M5a.sd_C, 2))),
            vjust = -0.5, color = "#FF9999") +
  geom_text(aes(label = round(Value, 2)), position = position_dodge(width = 0.9), vjust = -0.5) +
  scale_fill_manual(values = c("gray80", "gray50", "gray30", "gray20")) +  # Less colorful palette
  labs(title = "Model Fit Statistics",
       x = "Metric",
       y = "Value") +
  theme_minimal() +
  theme(legend.position = "bottom")

2.0.3 Model Interpretation I

Refer to Section 2.1.1.1 for variable importance when trained with full MP5 data.

2.0.4 Model Interpretation II (SHAP Value for tree-based models)

Refer to Section 2.1.1.2 for variable importance when trained with full MP5 data.

2.0.5 Model Accuracy on Held-out Test Data

Correlation between predicted F using different methods

predicted_f_reg <- data.tst_M5a.reg.C$Predicted_C
predicted_f_rf <- data.tst_M5a.rf.C$Predicted_C
predicted_f_gbm <- data.tst_M5a.gbm.C$Predicted_C
predicted_f_net <- data.tst_M5a.net.C$Predicted_C
actual_C<-data.tst_M5a.gbm.C$C

# Combine into a data frame
predicted_C <- data.frame(
  Pred_C.Reg = predicted_f_reg,
  Pred_C.RandomForest = predicted_f_rf,
  Pred_C.GradientBoosting = predicted_f_gbm,
  Pred_C.ElasticNet = predicted_f_net,
  C = actual_C
)

ggpairs(predicted_C, c("Pred_C.Reg","Pred_C.ElasticNet","Pred_C.RandomForest","Pred_C.GradientBoosting","C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

The following metrics pertains to how the model did in predicting held-out data (un-used during model training). Signallings how generalizable the models are.

# Bar plot indicating overall model performance, plotting RMSE, R-squared, and MEA

# Combine performance metrics into a data frame
performance_metrics <- data.frame(
Model = factor(c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting"),
                 levels = c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting")),
  MAE = c(test_perf.M5a_reg.C["MAE"], test_perf.M5a_net.C["MAE"], test_perf.M5a_rf.C["MAE"], test_perf.M5a_gbm.C["MAE"]),
  RMSE = c(test_perf.M5a_reg.C["RMSE"], test_perf.M5a_net.C["RMSE"], test_perf.M5a_rf.C["RMSE"], test_perf.M5a_gbm.C["RMSE"]),
  Rsquared = c(test_perf.M5a_reg.C["Rsquared"], test_perf.M5a_net.C["Rsquared"], test_perf.M5a_rf.C["Rsquared"], test_perf.M5a_gbm.C["Rsquared"])
)



performance_metrics_long <- performance_metrics %>%
  pivot_longer(cols = c(RMSE, Rsquared, MAE), names_to = "Metric", values_to = "Value")


# Create the performance metrics table
performance_table <- performance_metrics %>%
  mutate(across(where(is.numeric), round, 4))

# Display the table
kable(performance_table, caption = "Comparison of Model Performance in Held-Out Test Data") %>%
  kable_styling(full_width = F, position = "center")

Comparison of Model Performance in Held-Out Test Data
Model	MAE	RMSE	Rsquared
Linear Regression	0.6134	0.7409	0.3925
Elastic Net	0.6034	0.7239	0.4125
Random Forest	0.5749	0.6966	0.4644
Gradient Boosting	0.5548	0.6804	0.4821

tst_M5a.sd_C <- sd(data.tst_M5a$C)

ggplot(performance_metrics_long, aes(x = Metric, y = Value, fill = Model)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_segment(data = performance_metrics_long %>% filter(Metric == "RMSE"),
               aes(x = 1.6, xend = 2.4, y = tst_M5a.sd_C, yend = tst_M5a.sd_C),
               linetype = "dashed", color = "#FF9999") +
  geom_text(data = performance_metrics_long %>% filter(Metric == "RMSE"),
            aes(x = 2, y = tst_M5a.sd_C, label = paste0("Std.dev of C in Held-Out Test Data: ", round(tst_M5a.sd_C, 2))),
            vjust = -0.5, color = "#FF9999") +
  geom_text(aes(label = round(Value, 2)), position = position_dodge(width = 0.9), vjust = -0.5) +
  scale_fill_manual(values = c("gray80", "gray50", "gray30", "gray20")) +  # Less colorful palette
  labs(title = "Comparison of Model Performance in Held-Out Testing Data",
       x = "Metric",
       y = "Value") +
  theme_minimal() +
  theme(legend.position = "bottom")

2.0.6 Density and Residual Plots for the Predicted C in Held-Out Test Data

Histogram of C in Held-Out Test Data

hist(data.tst_M5a$C)

library(gridExtra)

# Calculate residuals
residuals_rf <- actual_M5a.C - pred_M5a_rf.C
residuals_reg <- actual_M5a.C - pred_M5a_reg.C
residuals_net <- actual_M5a.C - pred_M5a_net.C
residuals_gbm <- actual_M5a.C - pred_M5a_gbm.C

# Create a data frame with residuals and actual values
residuals_data <- data.frame(
  Actual = actual_M5a.C,
  Pred_RF = pred_M5a_rf.C,
  Pred_Reg = pred_M5a_reg.C,
  Pred_NET = pred_M5a_net.C,
  Pred_GBM = pred_M5a_gbm.C,
  Residual_RF = residuals_rf,
  Residual_Reg = residuals_reg,
  Residual_NET = residuals_net,
  Residual_GBM = residuals_gbm
)

create_density_plot <- function(predicted, actual, title) {
  ggplot() +
    geom_density(aes(x = predicted, color = "Predicted C"), alpha = 0.4, size = 1) +
    geom_density(aes(x = actual, color = "Actual C"), alpha = 0.4, size = 1) +
    scale_y_continuous(limits = c(0, 1)) +
    scale_color_manual(values = c("Actual C" = "firebrick", "Predicted C" = "darkorange")) +
    labs(title = title, x = "C Score", y = "Density") +
    theme_minimal() +
    theme(legend.position = "bottom", legend.title = element_blank())
}

# Create density plots for each model
density_rf <- create_density_plot(residuals_data$Pred_RF, residuals_data$Actual, "Density Plot for PF5 - Random Forest")
density_reg <- create_density_plot(residuals_data$Pred_Reg, residuals_data$Actual, "Density Plot for PF5 - Regression")
density_net <- create_density_plot(residuals_data$Pred_NET, residuals_data$Actual, "Density Plot for PF5 - Elastic Net")
density_gbm <- create_density_plot(residuals_data$Pred_GBM, residuals_data$Actual, "Density Plot for PF5 - Gradient Boosting")

# Arrange all density plots in a grid
grid.arrange(density_reg, density_net, density_rf, density_gbm, ncol = 2, nrow = 2)

create_residual_plot <- function(actual, residuals, title) {
  ggplot(residuals_data, aes(x = actual, y = residuals)) +
    geom_point(alpha = 0.4) +
    geom_rug(sides = "b", alpha = 0.2) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
    labs(title = title, x = "Actual C Score", y = "Residuals") +
    theme_minimal() +
    ylim(-2, 2)
}

# Create residual plots for each model
residual_rf <- create_residual_plot(residuals_data$Actual, residuals_data$Residual_RF, "Residuals Plot for Random Forest")
residual_reg <- create_residual_plot(residuals_data$Actual, residuals_data$Residual_Reg, "Residuals Plot for Regression")
residual_net <- create_residual_plot(residuals_data$Actual, residuals_data$Residual_NET, "Residuals Plot for Elastic Net")
residual_gbm <- create_residual_plot(residuals_data$Actual, residuals_data$Residual_GBM, "Residuals Plot for Gradient Boosting")

# Arrange all residual plots in a grid
grid.arrange( residual_reg, residual_net,residual_rf, residual_gbm, ncol = 2, nrow = 2)

2.0.7 Correlation Btw ML Predicted F and other FACE factors

We focus on the test dataset (n=292)
Significance Notation: *** if the p-value is < 0.001, ** if the p-value is < 0.01, * if the p-value is < 0.05, . if the p-value is < 0.10

Linear Regression

ggpairs(data.tst_M5a.reg.C, c("Predicted_C","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Elastic Net

ggpairs(data.tst_M5a.net.C, c("Predicted_C","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Random Forest

ggpairs(data.tst_M5a.rf.C, c("Predicted_C","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Gradient Boosting

ggpairs(data.tst_M5a.gbm.C, c("Predicted_C","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

GBM: Correlation between PC, numEDU, AGE, and ATTN1

ggpairs(data.tst_M5a.gbm.C, c("Predicted_C","Antonym","Synonym","numEDU","AGE","ATTN1__Correct","C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

2.0.8 Explain Heterogenity with Predicted C in Test Sample (20% study 5 Data)

Default Paradigm

Default.PAN <- glm(numDefault ~ DefaultCondition * Panel, data = data.tst_M5a.reg, family = binomial)

Default.PAN_FACE <- glm(numDefault ~ DefaultCondition * F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel , data = data.tst_M5a.gbm.C, family = binomial)

Default.PAN_Pred_C.reg_FAE <- glm(numDefault ~ DefaultCondition * F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_M5a.reg.C , family = binomial)

Default.PAN_Pred_C.net_FAE <- glm(numDefault ~  DefaultCondition * F + DefaultCondition * A+ DefaultCondition * Predicted_C + DefaultCondition * E + DefaultCondition * Panel, data = data.tst_M5a.net.C, family = binomial)

Default.PAN_Pred_C.rf_FAE <- glm(numDefault ~   DefaultCondition * F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_M5a.rf.C, family = binomial)

Default.PAN_Pred_C.gbm_FAE <- glm(numDefault ~   DefaultCondition * F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_M5a.gbm.C, family = binomial)


Default_model <- list(
  "Default.PAN" = Default.PAN,
  "Default.PAN_FACE" = Default.PAN_FACE,
  "Default.PAN_Pred_C.reg_FAE" = Default.PAN_Pred_C.reg_FAE,
  "Default.PAN_Pred_C.net_FAE" = Default.PAN_Pred_C.net_FAE,
  "Default.PAN_Pred_C.rf_FAE" = Default.PAN_Pred_C.rf_FAE,
  "Default.PAN_Pred_C.gbm_FAE" = Default.PAN_Pred_C.gbm_FAE
)

Model Results

term_names <- names(coef(Default.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Default_result<-tab_model(Default.PAN, Default.PAN_FACE, Default.PAN_Pred_C.reg_FAE,Default.PAN_Pred_C.net_FAE,Default.PAN_Pred_C.rf_FAE,Default.PAN_Pred_C.gbm_FAE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.C(reg)_FAE", 
                        "PAN_Pred.C(net)_FAE", 
                        "PAN_Pred.C(rf)_FAE", 
                        "PAN_Pred.C(gbm)_FAE"), title = "Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Default_result

Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.C(reg)_FAE				PAN_Pred.C(net)_FAE				PAN_Pred.C(rf)_FAE				PAN_Pred.C(gbm)_FAE
Predictors	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p
(Intercept)	0.96	0.28	-0.15	0.884	0.94	0.28	-0.21	0.837	0.89	0.27	-0.40	0.690	0.87	0.26	-0.46	0.644	0.84	0.26	-0.56	0.578	0.88	0.27	-0.44	0.663
DefaultCondition [OPTOUT]	1.41	0.58	0.83	0.409	1.44	0.61	0.86	0.391	1.51	0.66	0.95	0.341	1.57	0.69	1.03	0.305	1.49	0.65	0.90	0.367	1.54	0.67	0.99	0.322
F					0.87	0.35	-0.35	0.725	1.02	0.30	0.06	0.949	1.06	0.32	0.20	0.838	1.11	0.33	0.36	0.719	1.05	0.32	0.17	0.865
A					0.85	0.17	-0.79	0.431	0.71	0.16	-1.54	0.124	0.76	0.16	-1.31	0.190	0.76	0.16	-1.28	0.202	0.77	0.16	-1.25	0.210
C					1.69	0.58	1.53	0.127
E					0.88	0.16	-0.72	0.474	0.92	0.16	-0.49	0.625	0.92	0.16	-0.46	0.648	0.92	0.16	-0.48	0.635	0.91	0.16	-0.52	0.603
DefaultCondition [OPTOUT] × F					1.35	0.84	0.49	0.627	1.31	0.60	0.59	0.556	1.29	0.60	0.55	0.579	1.10	0.50	0.21	0.837	1.28	0.59	0.54	0.589
DefaultCondition [OPTOUT] × A					1.07	0.31	0.24	0.814	1.27	0.40	0.78	0.435	1.22	0.37	0.64	0.524	1.14	0.35	0.42	0.677	1.19	0.36	0.57	0.572
DefaultCondition [OPTOUT] × C					0.68	0.34	-0.77	0.443
DefaultCondition [OPTOUT] × E					1.68	0.50	1.74	0.082	1.65	0.47	1.77	0.077	1.65	0.47	1.75	0.080	1.63	0.46	1.72	0.085	1.67	0.48	1.78	0.075
Predicted C									2.50	0.96	2.36	0.018	2.17	0.93	1.79	0.073	2.20	1.06	1.64	0.101	2.20	0.86	2.01	0.044
DefaultCondition [OPTOUT] × Predicted C									0.42	0.23	-1.57	0.116	0.44	0.28	-1.28	0.199	0.65	0.47	-0.59	0.553	0.46	0.26	-1.35	0.177
Observations	292				292				292				292				292				292
AIC	389.079				395.399				392.063				394.646				394.754				393.782
log-Likelihood	-188.540				-183.699				-182.031				-183.323				-183.377				-182.891

Anova and Eta Square

As in the origianl paper, type I anova is used. We first attribute variance to condition, FACE, then Panel main effect, followed by the conditionFACE interaction, and the Panelcondition interaction last.
As in the original paper, Etasq of a variable in linear regression models (LessMore and Sunk) is calculated as the sum of squares explained by the variables divided by the total sum of squares. Etasq of a variable in logistic regression models (Default and Unusual Disease) is calculated as the deviance explained by the model relative to the null model (deviance from the null model divided by the model’s residual deviance).
I validated this pipeline by reproducing the stats reported in the SI for Study 1.
Woudl we like to see the full Anova stats?

calculate_deviance_explained.Default <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for Panel and DefaultCondition:Panel interaction
  dev_cond <- anova_res["DefaultCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DefaultCondition:Panel", "Deviance"]
  
  p_cond <- anova_res["DefaultCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DefaultCondition:Panel", "Pr(>Chi)"]
  
sig_cond <- ifelse(p_cond < 0.001, "***", 
             ifelse(p_cond < 0.01, "**", 
             ifelse(p_cond < 0.05, "*", 
             ifelse(p_cond < 0.1, ".", ""))))

sig_panel <- ifelse(p_panel < 0.001, "***", 
              ifelse(p_panel < 0.01, "**", 
              ifelse(p_panel < 0.05, "*", 
              ifelse(p_panel < 0.1, ".", ""))))

sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                   ifelse(p_interaction < 0.01, "**", 
                   ifelse(p_interaction < 0.05, "*", 
                   ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total) 
  perc_panel <- (dev_panel / dev_total) 
  perc_interaction <- (dev_interaction / dev_total) 
  
  res_cond <- paste0(round(perc_cond , 5),  sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}

# apply the fucntion to all the models in default paradigm
deviance_explained <- sapply(Default_model, calculate_deviance_explained.Default)

# Convert to a data frame for easy viewing
deviance_explained_df.Default <- as.data.frame(t(deviance_explained)) # view how this looks like
colnames(deviance_explained_df.Default) <- c("Condition","Panel", "Default Condition:Panel")

# display
kable(deviance_explained_df.Default,caption = "Etasq Table: Default Paradigm")

Etasq Table: Default Paradigm
	Condition	Panel	Default Condition:Panel
Default.PAN	0.02968***	0.0095	0.02393**
Default.PAN_FACE	0.02968***	0.00305	0.00314
Default.PAN_Pred_C.reg_FAE	0.02968***	0.00284	0.00209
Default.PAN_Pred_C.net_FAE	0.02968***	0.00314	0.00215
Default.PAN_Pred_C.rf_FAE	0.02968***	0.00165	0.00271
Default.PAN_Pred_C.gbm_FAE	0.02968***	0.00228	0.00155

Framing (Unusual Disease) Paradigm

Disease.PAN <- glm(numDisease ~ DiseaseCondition * Panel, data = data.tst_M5a.reg, family = binomial)
Disease.PAN_FACE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E+DiseaseCondition * Panel , data = data.tst_M5a.reg.C, family = binomial)

Disease.PAN_Pred_C.reg_FAE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_M5a.reg.C, family = binomial)

Disease.PAN_Pred_C.net_FAE <- glm(numDisease ~   DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_M5a.net.C, family = binomial)

Disease.PAN_Pred_C.rf_FAE <- glm(numDisease ~   DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_M5a.rf.C, family = binomial)

Disease.PAN_Pred_C.gbm_FAE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel , data = data.tst_M5a.gbm.C, family = binomial)


Disease_model <- list(
  "Disease.PAN" = Disease.PAN,
  "Disease.PAN_FACE" = Disease.PAN_FACE,
  "Disease.PAN_Pred_C.reg_FAE" = Disease.PAN_Pred_C.reg_FAE,
  "Disease.PAN_Pred_C.net_FAE" = Disease.PAN_Pred_C.net_FAE,
  "Disease.PAN_Pred_C.rf_FAE" = Disease.PAN_Pred_C.rf_FAE, 
  "Disease.PAN_Pred_C.gbm_FAE" = Disease.PAN_Pred_C.gbm_FAE
)

# extract_aic(Disease_model)

Model Results

term_names <- names(coef(Disease.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Disease_result<-tab_model(Disease.PAN, Disease.PAN_FACE, Disease.PAN_Pred_C.reg_FAE,Disease.PAN_Pred_C.net_FAE,Disease.PAN_Pred_C.rf_FAE,Disease.PAN_Pred_C.gbm_FAE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.C(reg)_FAE", 
                        "PAN_Pred.C(net)_FAE", 
                        "PAN_Pred.C(rf)_FAE", 
                        "PAN_Pred.C(gbm)_FAE"), title = "Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Disease_result

Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.C(reg)_FAE				PAN_Pred.C(net)_FAE				PAN_Pred.C(rf)_FAE				PAN_Pred.C(gbm)_FAE
Predictors	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p
(Intercept)	0.48	0.13	-2.67	0.008	0.44	0.13	-2.80	0.005	0.50	0.14	-2.43	0.015	0.51	0.15	-2.32	0.020	0.53	0.15	-2.19	0.028	0.52	0.15	-2.31	0.021
DiseaseCondition [LOSS]	4.04	1.82	3.09	0.002	4.19	1.97	3.04	0.002	3.84	1.82	2.83	0.005	3.83	1.84	2.80	0.005	3.40	1.63	2.56	0.010	3.54	1.67	2.68	0.007
F					2.42	1.16	1.84	0.065	1.17	0.36	0.51	0.612	1.20	0.38	0.58	0.564	1.25	0.40	0.69	0.491	1.30	0.42	0.81	0.419
A					1.19	0.23	0.90	0.369	1.26	0.25	1.15	0.251	1.27	0.26	1.18	0.237	1.30	0.26	1.29	0.197	1.28	0.25	1.26	0.208
C					0.41	0.17	-2.20	0.028
E					1.07	0.21	0.33	0.745	0.92	0.17	-0.46	0.642	0.92	0.17	-0.46	0.649	0.94	0.17	-0.33	0.743	0.93	0.17	-0.38	0.707
DiseaseCondition [LOSS] × F					0.92	0.64	-0.12	0.902	1.14	0.56	0.26	0.793	1.11	0.55	0.20	0.838	0.92	0.45	-0.18	0.859	0.93	0.46	-0.14	0.888
DiseaseCondition [LOSS] × A					1.49	0.50	1.17	0.243	1.52	0.53	1.20	0.231	1.50	0.53	1.16	0.245	1.43	0.50	1.02	0.310	1.45	0.50	1.08	0.280
DiseaseCondition [LOSS] × C					1.17	0.65	0.28	0.780
DiseaseCondition [LOSS] × E					0.77	0.24	-0.86	0.392	0.80	0.24	-0.75	0.452	0.80	0.24	-0.76	0.444	0.76	0.22	-0.92	0.356	0.78	0.23	-0.86	0.392
Predicted C									0.71	0.28	-0.86	0.390	0.64	0.30	-0.94	0.346	0.53	0.28	-1.18	0.237	0.54	0.24	-1.40	0.162
DiseaseCondition [LOSS] × Predicted C									0.84	0.50	-0.29	0.772	0.87	0.59	-0.20	0.842	1.48	1.13	0.51	0.607	1.33	0.80	0.47	0.639
Observations	292				292				292				292				292				292
AIC	363.491				356.242				363.239				363.114				363.792				362.802
log-Likelihood	-175.745				-164.121				-167.619				-167.557				-167.896				-167.401

Anova and Eta Square

calculate_deviance_explained.Disease <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for DiseaseCondition, Panel, and DiseaseCondition:Panel interaction
  dev_cond <- anova_res["DiseaseCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DiseaseCondition:Panel", "Deviance"]
  
  # Extract p-values
  p_cond <- anova_res["DiseaseCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DiseaseCondition:Panel", "Pr(>Chi)"]
  
  # Determine significance levels
  sig_cond <- ifelse(p_cond < 0.001, "***", 
               ifelse(p_cond < 0.01, "**", 
               ifelse(p_cond < 0.05, "*", 
               ifelse(p_cond < 0.1, ".", ""))))

  sig_panel <- ifelse(p_panel < 0.001, "***", 
                ifelse(p_panel < 0.01, "**", 
                ifelse(p_panel < 0.05, "*", 
                ifelse(p_panel < 0.1, ".", ""))))

  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total)
  perc_panel <- (dev_panel / dev_total)
  perc_interaction <- (dev_interaction / dev_total)
  
  # Combine percentage and significance
  res_cond <- paste0(round(perc_cond, 5), sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}


# Apply the function to each model
deviance_explained <- sapply(Disease_model, calculate_deviance_explained.Disease)

# Convert to a data frame for easy viewing
deviance_explained_df.Disease <- as.data.frame(t(deviance_explained))
colnames(deviance_explained_df.Disease) <- c("Condition","Panel", "DiseaseCondition:Panel")

# Display the results
kable(deviance_explained_df.Disease,caption = "Etasq Table: Disease Paradigm")

Etasq Table: Disease Paradigm
	Condition	Panel	DiseaseCondition:Panel
Disease.PAN	0.03974***	0.03523**	0.01646*
Disease.PAN_FACE	0.03974***	0.0196*	0.00098
Disease.PAN_Pred_C.reg_FAE	0.03974***	0.0073	0.00126
Disease.PAN_Pred_C.net_FAE	0.03974***	0.00794	0.00116
Disease.PAN_Pred_C.rf_FAE	0.03974***	0.00665	0.00024
Disease.PAN_Pred_C.gbm_FAE	0.03974***	0.00796	0.00041

Less is More Paradigm

LessMore.PAN <- lm(numLessMore ~ LessMoreCondition * Panel, data = data.tst_M5a.reg)
LessMore.PAN_FACE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_M5a.reg.C)
LessMore.PAN_Pred_C.reg_FAE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_M5a.reg.C)
LessMore.PAN_Pred_C.net_FAE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.tst_M5a.net.C)
LessMore.PAN_Pred_C.rf_FAE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_M5a.rf.C)
LessMore.PAN_Pred_C.gbm_FAE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.tst_M5a.gbm.C)


LessMore_model <- list(
  "LessMore.PAN" = LessMore.PAN,
  "LessMore.PAN_FACE" = LessMore.PAN_FACE,
  "LessMore.PAN_Pred_C.reg_FAE" = LessMore.PAN_Pred_C.reg_FAE,
  "LessMore.PAN_Pred_C.net_FAE" = LessMore.PAN_Pred_C.net_FAE,
  "LessMore.PAN_Pred_C.rf_FAE" = LessMore.PAN_Pred_C.rf_FAE, 
  "LessMore.PAN_Pred_C.gbm_FAE" = LessMore.PAN_Pred_C.gbm_FAE
)

Model Results

term_names <- names(coef(LessMore.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

LessMore_result<-tab_model(LessMore.PAN, LessMore.PAN_FACE, LessMore.PAN_Pred_C.reg_FAE,LessMore.PAN_Pred_C.net_FAE,LessMore.PAN_Pred_C.rf_FAE,LessMore.PAN_Pred_C.gbm_FAE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.C(reg)_FAE", 
                        "PAN_Pred.C(net)_FAE", 
                        "PAN_Pred.C(rf)_FAE", 
                        "PAN_Pred.C(gbm)_FAE"), title = "LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
LessMore_result

LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.C(reg)_FAE				PAN_Pred.C(net)_FAE				PAN_Pred.C(rf)_FAE				PAN_Pred.C(gbm)_FAE
Predictors	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p
(Intercept)	5.51	0.21	25.98	<0.001	5.52	0.21	26.19	<0.001	5.50	0.21	25.75	<0.001	5.53	0.21	25.75	<0.001	5.52	0.21	25.78	<0.001	5.52	0.21	26.02	<0.001
LessMoreCondition [SCARF]	0.76	0.29	2.65	0.009	0.83	0.29	2.89	0.004	0.86	0.29	2.97	0.003	0.80	0.29	2.74	0.007	0.77	0.29	2.62	0.009	0.82	0.29	2.83	0.005
F					-0.03	0.25	-0.11	0.913	0.08	0.19	0.45	0.656	0.14	0.19	0.75	0.455	0.13	0.19	0.71	0.480	0.12	0.19	0.67	0.504
A					-0.26	0.14	-1.88	0.062	-0.28	0.15	-1.94	0.053	-0.25	0.15	-1.75	0.082	-0.26	0.15	-1.77	0.078	-0.26	0.14	-1.84	0.067
C					0.16	0.21	0.77	0.442
E					-0.02	0.12	-0.17	0.863	0.01	0.11	0.05	0.963	0.00	0.11	0.04	0.968	0.00	0.11	0.04	0.964	0.00	0.11	0.04	0.965
LessMoreCondition [SCARF] × F					0.38	0.40	0.95	0.345	0.23	0.30	0.76	0.450	0.05	0.30	0.18	0.858	0.02	0.30	0.08	0.936	0.11	0.30	0.36	0.716
LessMoreCondition [SCARF] × A					0.51	0.18	2.78	0.006	0.54	0.19	2.83	0.005	0.48	0.19	2.52	0.012	0.46	0.19	2.36	0.019	0.50	0.19	2.65	0.008
LessMoreCondition [SCARF] × C					-0.24	0.32	-0.75	0.452
LessMoreCondition [SCARF] × E					0.05	0.18	0.28	0.780	0.01	0.17	0.08	0.938	-0.00	0.17	-0.03	0.978	-0.02	0.17	-0.13	0.898	-0.00	0.17	-0.00	0.997
Predicted C									0.10	0.24	0.41	0.679	-0.09	0.28	-0.33	0.739	-0.07	0.31	-0.23	0.819	-0.03	0.24	-0.13	0.895
LessMoreCondition [SCARF] × Predicted C									-0.18	0.35	-0.52	0.601	0.28	0.41	0.69	0.492	0.43	0.47	0.92	0.359	0.12	0.37	0.32	0.749
Observations	292				292				292				292				292				292
R² / R² adjusted	0.208 / 0.194				0.249 / 0.214				0.248 / 0.212				0.248 / 0.213				0.250 / 0.215				0.247 / 0.212

Anova and Eta Square

calculate_variance_explained_LessMore <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for LessMoreCondition, Panel, and LessMoreCondition:Panel interaction
  ss_condition <- anova_res["LessMoreCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["LessMoreCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["LessMoreCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["LessMoreCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- ss_condition / ss_total
  perc_panel <- ss_panel / ss_total
  perc_interaction <- ss_interaction / ss_total
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}

variance_explained_lm <- sapply(LessMore_model, calculate_variance_explained_LessMore)

# Convert to a data frame for easy viewing
variance_explained_lm_df.LessMore <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.LessMore,caption = "Etasq Table: LessMore Paradigm")

Etasq Table: LessMore Paradigm
	Condition	Panel	LessMoreCondition:Panel
LessMore.PAN	0.08314***	0.12202***	0.00272
LessMore.PAN_FACE	0.08314***	0.04611***	0.00099
LessMore.PAN_Pred_C.reg_FAE	0.08314***	0.05544***	7e-04
LessMore.PAN_Pred_C.net_FAE	0.08314***	0.0524***	0.00161
LessMore.PAN_Pred_C.rf_FAE	0.08314***	0.04324***	0.0025
LessMore.PAN_Pred_C.gbm_FAE	0.08314***	0.04916***	0.00122

Sunk Cost Paradigm

Sunk.PAN <- lm(numSunkCost~ SunkCondition * Panel, data = data.tst_M5a)
Sunk.PAN_FACE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_M5a)
Sunk.PAN_Pred_C.reg_FAE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E+SunkCondition * Panel, data = data.tst_M5a.reg.C)
Sunk.PAN_Pred_C.net_FAE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_M5a.net.C)
Sunk.PAN_Pred_C.rf_FAE <- lm(numSunkCost ~ SunkCondition * F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E  + SunkCondition * Panel, data = data.tst_M5a.rf.C)
Sunk.PAN_Pred_C.gbm_FAE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_M5a.gbm.C)


Sunk_model <- list(
  "Sunk.PAN" = Sunk.PAN,
  "Sunk.PAN_FACE" = Sunk.PAN_FACE,
  "Sunk.PAN_Pred_C.reg_FAE" = Sunk.PAN_Pred_C.reg_FAE,
  "Sunk.PAN_Pred_C.net_FAE" = Sunk.PAN_Pred_C.net_FAE,
  "Sunk.PAN_Pred_C.rf_FAE" = Sunk.PAN_Pred_C.rf_FAE, #0.302 with unscaled
  "Sunk.PAN_Pred_C.gbm_FAE" = Sunk.PAN_Pred_C.gbm_FAE
)

# extract_adjusted_r_squared(Sunk_model)

Full Model Results

term_names <- names(coef(Sunk.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Sunk_result<-tab_model(Sunk.PAN, Sunk.PAN_FACE, Sunk.PAN_Pred_C.reg_FAE,Sunk.PAN_Pred_C.net_FAE,Sunk.PAN_Pred_C.rf_FAE,Sunk.PAN_Pred_C.gbm_FAE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred_C(reg)_FAE", 
                        "PAN_Pred_C(net)_FAE", 
                        "PAN_Pred_C(rf)_FAE", 
                        "PAN_Pred_C(gbm)_FAE"), title = "Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Sunk_result

Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred_C(reg)_FAE				PAN_Pred_C(net)_FAE				PAN_Pred_C(rf)_FAE				PAN_Pred_C(gbm)_FAE
Predictors	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p
(Intercept)	5.51	0.36	15.18	<0.001	5.51	0.36	15.31	<0.001	5.46	0.38	14.33	<0.001	5.49	0.39	14.14	<0.001	5.47	0.39	13.97	<0.001	5.53	0.38	14.44	<0.001
SunkCondition [PAID]	0.14	0.54	0.26	0.792	0.06	0.53	0.12	0.906	0.12	0.55	0.23	0.822	0.15	0.56	0.26	0.794	0.16	0.56	0.28	0.779	0.08	0.55	0.14	0.887
F					0.60	0.49	1.21	0.226	-0.36	0.37	-0.95	0.344	-0.32	0.38	-0.85	0.396	-0.33	0.38	-0.88	0.381	-0.28	0.38	-0.74	0.459
A					0.22	0.24	0.93	0.355	0.13	0.26	0.50	0.618	0.16	0.26	0.61	0.540	0.15	0.26	0.56	0.575	0.19	0.25	0.74	0.459
C					-1.07	0.42	-2.52	0.012
E					0.07	0.24	0.29	0.771	-0.13	0.23	-0.57	0.568	-0.13	0.23	-0.55	0.581	-0.14	0.23	-0.59	0.555	-0.13	0.23	-0.56	0.577
SunkCondition [PAID] × F					0.53	0.73	0.72	0.469	0.87	0.56	1.57	0.118	0.88	0.57	1.56	0.119	0.48	0.56	0.85	0.395	0.63	0.56	1.13	0.261
SunkCondition [PAID] × A					-0.50	0.34	-1.46	0.144	-0.29	0.36	-0.79	0.429	-0.32	0.36	-0.89	0.374	-0.36	0.37	-0.99	0.322	-0.39	0.36	-1.09	0.275
SunkCondition [PAID] × C					0.05	0.60	0.08	0.937
SunkCondition [PAID] × E					0.05	0.33	0.15	0.883	0.11	0.32	0.34	0.736	0.13	0.32	0.42	0.675	0.09	0.32	0.27	0.787	0.11	0.32	0.33	0.740
Predicted C									0.28	0.48	0.58	0.559	0.16	0.55	0.29	0.773	0.22	0.58	0.38	0.704	-0.00	0.46	-0.01	0.994
SunkCondition [PAID] × Predicted C									-1.18	0.68	-1.74	0.083	-1.24	0.78	-1.57	0.116	-0.33	0.92	-0.36	0.723	-0.61	0.73	-0.84	0.403
Observations	292				292				292				292				292				292
R² / R² adjusted	0.040 / 0.023				0.086 / 0.043				0.059 / 0.015				0.059 / 0.015				0.047 / 0.002				0.050 / 0.006

Anova and Eta Square

calculate_variance_explained_Sunk <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for SunkCondition, Panel, and SunkCondition:Panel interaction
  ss_condition <- anova_res["SunkCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["SunkCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["SunkCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["SunkCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- (ss_condition / ss_total)
  perc_panel <- (ss_panel / ss_total)
  perc_interaction <- (ss_interaction / ss_total)
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 6), sig_condition)
  res_panel <- paste0(round(perc_panel, 6), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 6), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}


variance_explained_lm <- sapply(Sunk_model, calculate_variance_explained_Sunk)

# Convert to a data frame for easy viewing
variance_explained_lm_df.SunkCost <- as.data.frame(t(variance_explained_lm))

colnames(variance_explained_lm_df.SunkCost) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.SunkCost,caption = "Etasq Table: SunkCost Paradigm")

Etasq Table: SunkCost Paradigm
	Condition	Panel	LessMoreCondition:Panel
Sunk.PAN	0.004668	0.025882*	0.009219
Sunk.PAN_FACE	0.004668	0.01253	0.003898
Sunk.PAN_Pred_C.reg_FAE	0.004668	0.012942	0.010976
Sunk.PAN_Pred_C.net_FAE	0.004668	0.012947	0.011858
Sunk.PAN_Pred_C.rf_FAE	0.004668	0.013695	0.006579
Sunk.PAN_Pred_C.gbm_FAE	0.004668	0.013776	0.009041

Summary

Model Fit

\(R^2\): \(R^2\) for linear regressions, and \(1 − \frac{\log L(\text{model})}{\log L(\text{null model})}\) for logistic regressions (as in the paper)
note that the y-axes for different paradigms are not aligned.

Default.null<-glm(numDefault~1,data.tst_M5a.reg.C,family=binomial)


R2.Default = as.data.frame(cbind( "R2" = c((1-logLik(Default.PAN)/logLik(Default.null)),
                    (1-logLik(Default.PAN_FACE)/logLik(Default.null)),  
                    (1-logLik(Default.PAN_Pred_C.reg_FAE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_C.net_FAE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_C.rf_FAE)/logLik(Default.null)),
                    (1-logLik(Default.PAN_Pred_C.gbm_FAE)/logLik(Default.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.C(reg)+FAE",  "Pan+Pred.C(net)+FAE" , "Pan+Pred.C(rf)+FAE","Pan+Pred.C(gbm)+FAE"), 
            "Paradigm" = "Default"))


Disease.null<-glm(numDisease~1,data.tst_M5a.reg.C,family=binomial)
R2.Disease = as.data.frame(cbind( "R2" = c((1-logLik(Disease.PAN)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_FACE)/logLik(Disease.null)),  
                    (1-logLik(Disease.PAN_Pred_C.reg_FAE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_C.net_FAE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_C.rf_FAE)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_Pred_C.gbm_FAE)/logLik(Disease.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.C(reg)+FAE",  "Pan+Pred.C(net)+FAE" , "Pan+Pred.C(rf)+FAE","Pan+Pred.C(gbm)+FAE"), 
            "Paradigm" = "Disease"))

R2.LessMore = as.data.frame(cbind( "R2" = c(summary(LessMore.PAN)$r.squared,
                     summary(LessMore.PAN_FACE)$r.squared,  
                     summary(LessMore.PAN_Pred_C.reg_FAE)$r.squared,  
                     summary(LessMore.PAN_Pred_C.net_FAE)$r.squared, 
                     summary(LessMore.PAN_Pred_C.rf_FAE)$r.squared,
                     summary(LessMore.PAN_Pred_C.gbm_FAE )$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.C(reg)+FAE",  "Pan+Pred.C(net)+FAE" , "Pan+Pred.C(rf)+FAE","Pan+Pred.C(gbm)+FAE"), 
            "Paradigm" = "LessMore"))

R2.Sunk = as.data.frame(cbind( "R2" = c(summary(Sunk.PAN)$r.squared,
                     summary(Sunk.PAN_FACE)$r.squared,  
                     summary(Sunk.PAN_Pred_C.reg_FAE)$r.squared,  
                     summary(Sunk.PAN_Pred_C.net_FAE)$r.squared, 
                     summary(Sunk.PAN_Pred_C.rf_FAE )$r.squared,
                     summary(Sunk.PAN_Pred_C.gbm_FAE )$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.C(reg)+FAE",  "Pan+Pred.C(net)+FAE" , "Pan+Pred.C(rf)+FAE","Pan+Pred.C(gbm)+FAE"), 
            "Paradigm" = "SunkCost"))


R2<-rbind(R2.Default,R2.Disease,R2.LessMore,R2.Sunk)%>%
  mutate(R2=round(as.numeric(R2),3))

R2$Model <- factor(R2$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.C(reg)+FAE",  
                                   "Pan+Pred.C(net)+FAE", 
                                   "Pan+Pred.C(rf)+FAE",
                                   "Pan+Pred.C(gbm)+FAE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(R2, aes(x = Model, y = R2, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(R^2)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

AIC.Default = as.data.frame(cbind( "AIC" = c(AIC(Default.PAN),
                    AIC(Default.PAN_FACE),  
                    AIC(Default.PAN_Pred_C.reg_FAE), 
                    AIC(Default.PAN_Pred_C.net_FAE), 
                    AIC(Default.PAN_Pred_C.rf_FAE),
                    AIC(Default.PAN_Pred_C.gbm_FAE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PC(reg)+FAE",  "Pan+PC(net)+FAE" , "Pan+PC(rf)+FAE","Pan+PC(gbm)+FAE"), 
            "Paradigm" = "Default"))



AIC.Disease = as.data.frame(cbind( "AIC" = c(AIC(Disease.PAN),
                    AIC(Disease.PAN_FACE),  
                    AIC(Disease.PAN_Pred_C.reg_FAE), 
                    AIC(Disease.PAN_Pred_C.net_FAE), 
                    AIC(Disease.PAN_Pred_C.rf_FAE),
                    AIC(Disease.PAN_Pred_C.gbm_FAE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PC(reg)+FAE",  "Pan+PC(net)+FAE" , "Pan+PC(rf)+FAE","Pan+PC(gbm)+FAE"), 
            "Paradigm" = "Disease"))


AIC<-rbind(AIC.Default,AIC.Disease)%>%
  mutate(AIC=round(as.numeric(AIC),5))

AIC$Model <- factor(AIC$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+PC(reg)+FAE",  
                                   "Pan+PC(net)+FAE", 
                                   "Pan+PC(rf)+FAE",
                                   "Pan+PC(gbm)+FAE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(AIC, aes(x = Model, y = AIC, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(AIC)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")+
  coord_cartesian(ylim = c(300, 400))

R2$R2 <- round(as.numeric(R2$R2), 3)

R2 <- R2 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement <- R2 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))


### Avg. improvement in R2 for each model across all four paradigms, in FULL study 5
Default.null.Study5<-glm(numDefault~1,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.null.Study5<-glm(numDisease~1,Data_M5a.log_z_dummy_coded,family=binomial)
Default.PAN.Study5<-glm(numDefault~DefaultCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.PAN.Study5<-glm(numDisease~DiseaseCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
LessMore.PAN.Study5<-lm(numLessMore~LessMoreCondition*Panel,Data_M5a.log_z_dummy_coded)
Sunk.PAN.Study5<-lm(numSunkCost~SunkCondition*Panel,Data_M5a.log_z_dummy_coded)


Default.PAN_FACE.Study5<-glm(numDefault~DefaultCondition*F+DefaultCondition*A+DefaultCondition*C+DefaultCondition*E+DefaultCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.PAN_FACE.Study5<-glm(numDisease~DiseaseCondition*F+DiseaseCondition*A+DiseaseCondition*C+DiseaseCondition*E+DiseaseCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
LessMore.PAN_FACE.Study5<-lm(numLessMore~LessMoreCondition*F+LessMoreCondition*A+LessMoreCondition*C+LessMoreCondition*E+LessMoreCondition*Panel,Data_M5a.log_z_dummy_coded)
Sunk.PAN_FACE.Study5<-lm(numSunkCost~SunkCondition*F+SunkCondition*A+SunkCondition*C+SunkCondition*E+SunkCondition*Panel,Data_M5a.log_z_dummy_coded)

R2.Default = as.data.frame(cbind( 
  "R2" = c((1-logLik(Default.PAN.Study5)/logLik(Default.null.Study5)),
           (1-logLik(Default.PAN_FACE.Study5)/logLik(Default.null.Study5))),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "Default"
))


R2.Disease = as.data.frame(cbind( 
  "R2" = c((1-logLik(Disease.PAN.Study5)/logLik(Disease.null.Study5)),
           (1-logLik(Disease.PAN_FACE.Study5)/logLik(Disease.null.Study5))),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "Disease"
))

# For LessMore paradigm (linear models)
R2.LessMore = as.data.frame(cbind( 
  "R2" = c(summary(LessMore.PAN.Study5)$r.squared,
           summary(LessMore.PAN_FACE.Study5)$r.squared),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "LessMore"
))

# For SunkCost paradigm (linear models)
R2.Sunk = as.data.frame(cbind( 
  "R2" = c(summary(Sunk.PAN.Study5)$r.squared,
           summary(Sunk.PAN_FACE.Study5)$r.squared),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "SunkCost"
))

R2.FullStudy5 <- rbind(R2.Default, R2.Disease, R2.LessMore, R2.Sunk)
R2.FullStudy5$R2 <- round(as.numeric(R2.FullStudy5$R2), 3)

R2.FullStudy5 <- R2.FullStudy5 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement_full_study_5 <- R2.FullStudy5 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))%>%
  slice(-1) 

avg_improvemnt_combined <- rbind(avg_improvement, avg_improvement_full_study_5)

kable(avg_improvemnt_combined, caption = "Percentage Improvement in R2 (compared to PAN) Across Paradigms in the test sample", digits = 2)

Percentage Improvement in R2 (compared to PAN) Across Paradigms in the test sample
Model	Avg_Improvement
Pan	0.00
Pan+FACE	59.96
Pan+Pred.C(reg)+FAE	40.92
Pan+Pred.C(net)+FAE	38.81
Pan+Pred.C(rf)+FAE	31.00
Pan+Pred.C(gbm)+FAE	34.14
Pan+FACE (Full Study 5)	43.20

C*Cond Model Coefficients

DVs for Default & Framing are 1/0
DVs for LessMore & SunkCost are on the original scale

AlllCoef = as.data.frame(rbind(
cbind(coef(Default.PAN_FACE),confint(Default.PAN_FACE)),
cbind(coef(Default.PAN_Pred_C.reg_FAE),confint(Default.PAN_Pred_C.reg_FAE)),
cbind(coef(Default.PAN_Pred_C.net_FAE),confint(Default.PAN_Pred_C.net_FAE)),
cbind(coef(Default.PAN_Pred_C.rf_FAE),confint(Default.PAN_Pred_C.rf_FAE)),
cbind(coef(Default.PAN_Pred_C.gbm_FAE),confint(Default.PAN_Pred_C.gbm_FAE)),
cbind(coef(Default.PAN_FACE.Study5),confint(Default.PAN_FACE.Study5)),

cbind(coef(Disease.PAN_FACE),confint(Disease.PAN_FACE)),
cbind(coef(Disease.PAN_Pred_C.reg_FAE),confint(Disease.PAN_Pred_C.reg_FAE)),
cbind(coef(Disease.PAN_Pred_C.net_FAE),confint(Disease.PAN_Pred_C.net_FAE)),
cbind(coef(Disease.PAN_Pred_C.rf_FAE),confint(Disease.PAN_Pred_C.rf_FAE)),
cbind(coef(Disease.PAN_Pred_C.gbm_FAE),confint(Disease.PAN_Pred_C.gbm_FAE)),
cbind(coef(Disease.PAN_FACE.Study5),confint(Disease.PAN_FACE.Study5)),

cbind(coef(LessMore.PAN_FACE),confint(LessMore.PAN_FACE)),
cbind(coef(LessMore.PAN_Pred_C.reg_FAE),confint(LessMore.PAN_Pred_C.reg_FAE)),
cbind(coef(LessMore.PAN_Pred_C.net_FAE),confint(LessMore.PAN_Pred_C.net_FAE)),
cbind(coef(LessMore.PAN_Pred_C.rf_FAE),confint(LessMore.PAN_Pred_C.rf_FAE)),
cbind(coef(LessMore.PAN_Pred_C.gbm_FAE),confint(LessMore.PAN_Pred_C.gbm_FAE)),
cbind(coef(LessMore.PAN_FACE.Study5),confint(LessMore.PAN_FACE.Study5)),

cbind(coef(Sunk.PAN_FACE),confint(Sunk.PAN_FACE)),
cbind(coef(Sunk.PAN_Pred_C.reg_FAE),confint(Sunk.PAN_Pred_C.reg_FAE)),
cbind(coef(Sunk.PAN_Pred_C.net_FAE),confint(Sunk.PAN_Pred_C.net_FAE)),
cbind(coef(Sunk.PAN_Pred_C.rf_FAE),confint(Sunk.PAN_Pred_C.rf_FAE)),
cbind(coef(Sunk.PAN_Pred_C.gbm_FAE),confint(Sunk.PAN_Pred_C.gbm_FAE)),
cbind(coef(Sunk.PAN_FACE.Study5),confint(Sunk.PAN_FACE.Study5))))

AlllCoef <- AlllCoef[!grepl("Panel", rownames(AlllCoef)), ]

AlllCoef

AlllCoef$type = factor(rep(c("Intercept", " Condition(Treat)","F", "A", "C", "E","F: Condition(Treat)", "A: Condition(Treat)", "C: Condition(Treat)", "E: Condition(Treat)"), 24))

AlllCoef$Paradigm = factor((rep(c("Default","Framing","Less is Better","Sunk Cost"), each=60)),levels = rev(c("Default","Framing","Less is Better","Sunk Cost")))

AlllCoef$Model = factor(rep(rep(c("FACE","PF(reg) + ACE","PF(net) + ACE","PF(rf) + ACE","PF(gbm) + ACE","FACE (Full Study 5)"), each = 10),4), levels = rev(c("FACE","PF(reg) + ACE","PF(net) + ACE","PF(rf) + ACE","PF(gbm) + ACE","FACE (Full Study 5)")))

colnames(AlllCoef)[1:3] = c("Beta","low", "high")

allcoefplot = ggplot(AlllCoef[(AlllCoef$type %in% c("C: Condition(Treat)")),], aes( y = Beta, ymin= low, ymax= high, x = Paradigm, fill = Model, color = Model))+
  geom_bar(stat = "identity", position = "dodge",alpha= .25)+
  geom_pointrange(position = position_dodge(width = .8))+
  coord_flip()+
  xlab("")+
  theme_bw()+
  theme(strip.background = element_blank(), text = element_text(size = 12)) +
  facet_grid(~type)

allcoefplot

Etasq

### Study 5 Results ###
Default_model.Study5<-list(
  "Default.PAN.Study5" = Default.PAN.Study5,
  "Default.PAN_FACE.Study5" = Default.PAN_FACE.Study5)
Disease_model.Study5<-list(
  "Disease.PAN.Study5" = Disease.PAN.Study5,
  "Disease.PAN_FACE.Study5" = Disease.PAN_FACE.Study5)

LessMore_model.Study5<-list(
  "LessMore.PAN.Study5" = LessMore.PAN.Study5,
  "LessMore.PAN_FACE.Study5" = LessMore.PAN_FACE.Study5)


Sunk_model.Study5<-list(
  "Sunk.PAN.Study5" = Sunk.PAN.Study5,
  "Sunk.PAN_FACE.Study5" = Sunk.PAN_FACE.Study5)

deviance_explained <- sapply(Default_model.Study5, calculate_deviance_explained.Default)
deviance_explained_df.Default.Study5 <- as.data.frame(t(deviance_explained)) 
colnames(deviance_explained_df.Default.Study5) <- c("Condition","Panel", "Default Condition:Panel")

deviance_explained <- sapply(Disease_model.Study5, calculate_deviance_explained.Disease)
deviance_explained_df.Disease.Study5 <- as.data.frame(t(deviance_explained)) 
colnames(deviance_explained_df.Disease.Study5) <- c("Condition","Panel", "Disease Condition:Panel")

variance_explained_lm <- sapply(LessMore_model.Study5, calculate_variance_explained_LessMore)
variance_explained_lm_df.LessMore.Study5 <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore.Study5) <- c("Condition","Panel", "LessMoreCondition:Panel")

variance_explained_lm <- sapply(Sunk_model.Study5, calculate_variance_explained_Sunk)
variance_explained_lm_df.SunkCost.Study5 <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.SunkCost.Study5) <- c("Condition","Panel", "LessMoreCondition:Panel")

# label paradigm
deviance_explained_df.Default$Paradigm <- "Default"
deviance_explained_df.Default.Study5$Paradigm <- "Default"

deviance_explained_df.Disease$Paradigm <- "Disease"
deviance_explained_df.Disease.Study5$Paradigm <- "Disease"

variance_explained_lm_df.LessMore$Paradigm <- "LessMore"
variance_explained_lm_df.LessMore.Study5$Paradigm <- "LessMore"

variance_explained_lm_df.SunkCost$Paradigm <- "SunkCost"
variance_explained_lm_df.SunkCost.Study5$Paradigm <- "SunkCost"


Model <- c("Pan", "Pan+FACE", "Pan+Pred.C(reg)+FAE", "Pan+Pred.C(net)+FAE", "Pan+Pred.C(rf)+FAE", "Pan+Pred.C(gbm)+FAE")
Model.Study5 <- c("Pan (Full Study5)", "Pan+FACE (Full Study5)")
deviance_explained_df.Default$Model<-Model
deviance_explained_df.Disease$Model<-Model
variance_explained_lm_df.LessMore$Model<-Model
variance_explained_lm_df.SunkCost$Model<-Model

deviance_explained_df.Default.Study5$Model<-Model.Study5
deviance_explained_df.Disease.Study5$Model<-Model.Study5
variance_explained_lm_df.LessMore.Study5$Model<-Model.Study5
variance_explained_lm_df.SunkCost.Study5$Model<-Model.Study5

colnames(deviance_explained_df.Default)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost)[3] <- "Condition:Panel"

colnames(deviance_explained_df.Default.Study5)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost.Study5)[3] <- "Condition:Panel"


etasq <- rbind(deviance_explained_df.Default, 
               deviance_explained_df.Default.Study5,
               deviance_explained_df.Disease, 
               deviance_explained_df.Disease.Study5,
               variance_explained_lm_df.LessMore, 
               variance_explained_lm_df.LessMore.Study5,
               variance_explained_lm_df.SunkCost,
               variance_explained_lm_df.SunkCost.Study5)

etasq$Model <- factor(etasq$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.C(reg)+FAE",  
                                   "Pan+Pred.C(net)+FAE", 
                                   "Pan+Pred.C(rf)+FAE",
                                   "Pan+Pred.C(gbm)+FAE",
                                   "Pan (Full Study5)", 
                                   "Pan+FACE (Full Study5)"),
                        ordered = TRUE)

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$Panel)  # Extract the significance symbols
  df$Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$Panel))
  return(df)
}

etasq.pan<-split_value_and_sig(etasq)

ggplot(etasq.pan, aes(x = Model, y = Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$`Condition:Panel`)  # Extract the significance symbols
  df$Condition_Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$`Condition:Panel`))
  return(df)
}

etasq.pan_cond<-split_value_and_sig(etasq)

ggplot(etasq.pan_cond, aes(x = Model, y = Condition_Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel*cond") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

# junk data??? really unstable;; perhaps to look at this meaningfully, we need 20% of the data to equally come from each of the 3 panels.
 # table(data.tst_M5a$Panel)
 #  it looks fine though
 #   Lucid    MTurk Prolific 
 #     100      101       91 


etasq_study5 <- etasq.pan_cond %>%
  filter(Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))

etasq_study5 <- etasq_study5 %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan (Full Study5)"] - Condition_Panel) / Condition_Panel[Model == "Pan (Full Study5)"] * 100)


etasq_study5.test <- etasq.pan_cond %>%
  filter(!Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))
etasq_study5.test <- etasq_study5.test %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan"] - Condition_Panel) / Condition_Panel[Model == "Pan"] * 100)

etasq.pan_cond<-rbind(etasq_study5,etasq_study5.test)

avg_reduction <- etasq.pan_cond %>%
  group_by(Model) %>%
  summarize(Avg_Reduction = mean(Reduction, na.rm = TRUE))

  
kable(avg_reduction, caption = "Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in the test sample", digits = 2)

Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in the test sample
Model	Avg_Reduction
Pan	0.00
Pan+FACE	75.56
Pan+Pred.C(reg)+FAE	59.70
Pan+Pred.C(net)+FAE	49.04
Pan+Pred.C(rf)+FAE	55.99
Pan+Pred.C(gbm)+FAE	62.03
Pan (Full Study5)	0.00
Pan+FACE (Full Study5)	74.19

2.0.9 Explain Heterogenity with Predicted C in Full study 5 data

2.0.9.1 Accuracy in full MP5 data

Random forest performs best when predicting the full Study 5 data, outperforming GBM. This could be because while we had made GBM more parsimounious, we haven’t done so with random forest.

performance_metrics <- data.frame(
Method = factor(c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting"),
                 levels = c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting")),
  MAE = c(test_perf.full_M5a_reg.C["MAE"], test_perf.full_M5a_net.C["MAE"], test_perf.full_M5a_rf.C["MAE"], test_perf.full_M5a_gbm.C["MAE"]),
  RMSE = c(test_perf.full_M5a_reg.C["RMSE"], test_perf.full_M5a_net.C["RMSE"], test_perf.full_M5a_rf.C["RMSE"], test_perf.full_M5a_gbm.C["RMSE"]),
  Rsquared = c(test_perf.full_M5a_reg.C["Rsquared"], test_perf.full_M5a_net.C["Rsquared"], test_perf.full_M5a_rf.C["Rsquared"], test_perf.full_M5a_gbm.C["Rsquared"])
)



performance_metrics_long <- performance_metrics %>%
  pivot_longer(cols = c(RMSE, Rsquared, MAE), names_to = "Metric", values_to = "Value")


# Create the performance metrics table
performance_table <- performance_metrics %>%
  mutate(across(where(is.numeric), round, 4))

# Display the table
kable(performance_table, caption = "Comparison of Model Performance in Held-Out Test Data") %>%
  kable_styling(full_width = F, position = "center")

Comparison of Model Performance in Held-Out Test Data
Method	MAE	RMSE	Rsquared
Linear Regression	0.5493	0.6827	0.4957
Elastic Net	0.5636	0.6959	0.4775
Random Forest	0.2913	0.3982	0.8588
Gradient Boosting	0.4593	0.5788	0.6455

tst_M5a.sd_F <- sd(data.tst_M5a$F)

ggplot(performance_metrics_long, aes(x = Metric, y = Value, fill = Method)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_segment(data = performance_metrics_long %>% filter(Metric == "RMSE"),
               aes(x = 1.6, xend = 2.4, y = tst_M5a.sd_F, yend = tst_M5a.sd_F),
               linetype = "dashed", color = "#FF9999") +
  geom_text(data = performance_metrics_long %>% filter(Metric == "RMSE"),
            aes(x = 2, y = tst_M5a.sd_F, label = paste0("Std.dev of F in Held-Out Test Data: ", round(tst_M5a.sd_F, 2))),
            vjust = -0.5, color = "#FF9999") +
  geom_text(aes(label = round(Value, 2)), position = position_dodge(width = 0.9), vjust = -0.5) +
  scale_fill_manual(values = c("gray80", "gray50", "gray30", "gray20")) +  # Less colorful palette
  labs(title = "Comparison of Model Performance in Full Study 5 Data",
       x = "Metric",
       y = "Value") +
  theme_minimal() +
  theme(legend.position = "bottom")

# do this by panel?

Default Paradigm

Default.PAN <- glm(numDefault ~ DefaultCondition * Panel, data = data.full_M5a.reg, family = binomial)

Default.PAN_FACE <- glm(numDefault ~ DefaultCondition * F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel , data = data.full_M5a.gbm.C, family = binomial)

Default.PAN_Pred_C.reg_FAE <- glm(numDefault ~ DefaultCondition * F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.full_M5a.reg.C , family = binomial)

Default.PAN_Pred_C.net_FAE <- glm(numDefault ~  DefaultCondition * F + DefaultCondition * A+ DefaultCondition * Predicted_C + DefaultCondition * E + DefaultCondition * Panel, data = data.full_M5a.net.C, family = binomial)

Default.PAN_Pred_C.rf_FAE <- glm(numDefault ~   DefaultCondition * F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.full_M5a.rf.C, family = binomial)

Default.PAN_Pred_C.gbm_FAE <- glm(numDefault ~   DefaultCondition * F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.full_M5a.gbm.C, family = binomial)


Default_model <- list(
  "Default.PAN" = Default.PAN,
  "Default.PAN_FACE" = Default.PAN_FACE,
  "Default.PAN_Pred_C.reg_FAE" = Default.PAN_Pred_C.reg_FAE,
  "Default.PAN_Pred_C.net_FAE" = Default.PAN_Pred_C.net_FAE,
  "Default.PAN_Pred_C.rf_FAE" = Default.PAN_Pred_C.rf_FAE,
  "Default.PAN_Pred_C.gbm_FAE" = Default.PAN_Pred_C.gbm_FAE
)

Model Results

term_names <- names(coef(Default.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Default_result<-tab_model(Default.PAN, Default.PAN_FACE, Default.PAN_Pred_C.reg_FAE,Default.PAN_Pred_C.net_FAE,Default.PAN_Pred_C.rf_FAE,Default.PAN_Pred_C.gbm_FAE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.C(reg)_FAE", 
                        "PAN_Pred.C(net)_FAE", 
                        "PAN_Pred.C(rf)_FAE", 
                        "PAN_Pred.C(gbm)_FAE"), title = "Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Default_result

Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.C(reg)_FAE				PAN_Pred.C(net)_FAE				PAN_Pred.C(rf)_FAE				PAN_Pred.C(gbm)_FAE
Predictors	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p
(Intercept)	1.05	0.13	0.38	0.703	1.05	0.14	0.38	0.702	1.00	0.13	-0.01	0.990	0.98	0.13	-0.15	0.882	1.01	0.13	0.05	0.956	1.00	0.13	0.01	0.991
DefaultCondition [OPTOUT]	1.74	0.32	3.01	0.003	1.76	0.33	3.02	0.003	1.94	0.37	3.49	<0.001	2.03	0.39	3.68	<0.001	1.84	0.35	3.23	0.001	1.90	0.36	3.38	0.001
F					1.02	0.19	0.13	0.894	1.32	0.19	1.87	0.061	1.36	0.20	2.07	0.038	1.05	0.18	0.27	0.784	1.19	0.19	1.11	0.267
A					0.95	0.08	-0.54	0.590	0.88	0.08	-1.48	0.139	0.88	0.08	-1.39	0.165	0.91	0.08	-1.13	0.257	0.89	0.08	-1.32	0.186
C					1.72	0.25	3.78	<0.001
E					1.04	0.09	0.40	0.686	1.14	0.10	1.52	0.130	1.13	0.10	1.47	0.140	1.06	0.09	0.71	0.476	1.11	0.09	1.21	0.228
DefaultCondition [OPTOUT] × F					1.25	0.34	0.83	0.408	1.18	0.26	0.75	0.455	1.19	0.26	0.79	0.427	1.29	0.33	1.00	0.316	1.27	0.29	1.05	0.295
DefaultCondition [OPTOUT] × A					0.84	0.11	-1.32	0.188	0.94	0.13	-0.43	0.670	0.96	0.13	-0.34	0.736	0.89	0.12	-0.92	0.360	0.92	0.12	-0.61	0.541
DefaultCondition [OPTOUT] × C					0.62	0.13	-2.29	0.022
DefaultCondition [OPTOUT] × E					1.09	0.14	0.68	0.499	1.00	0.13	0.00	1.000	1.00	0.13	-0.00	0.996	1.07	0.14	0.57	0.572	1.04	0.13	0.32	0.751
Predicted C									1.83	0.32	3.45	0.001	1.78	0.35	2.90	0.004	2.26	0.45	4.13	<0.001	2.13	0.41	3.89	<0.001
DefaultCondition [OPTOUT] × Predicted C									0.41	0.11	-3.48	0.001	0.36	0.10	-3.51	<0.001	0.45	0.13	-2.81	0.005	0.37	0.11	-3.46	0.001
Observations	1460				1460				1460				1460				1460				1460
AIC	1884.213				1858.463				1858.556				1860.214				1855.651				1856.522
log-Likelihood	-936.107				-915.232				-915.278				-916.107				-913.826				-914.261

Anova and Eta Square

As in the origianl paper, type I anova is used. We first attribute variance to condition, FACE, then Panel main effect, followed by the conditionFACE interaction, and the Panelcondition interaction last.
As in the original paper, Etasq of a variable in linear regression models (LessMore and Sunk) is calculated as the sum of squares explained by the variables divided by the total sum of squares. Etasq of a variable in logistic regression models (Default and Unusual Disease) is calculated as the deviance explained by the model relative to the null model (deviance from the null model divided by the model’s residual deviance).
I validated this pipeline by reproducing the stats reported in the SI for Study 1.
Woudl we like to see the full Anova stats?

calculate_deviance_explained.Default <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for Panel and DefaultCondition:Panel interaction
  dev_cond <- anova_res["DefaultCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DefaultCondition:Panel", "Deviance"]
  
  p_cond <- anova_res["DefaultCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DefaultCondition:Panel", "Pr(>Chi)"]
  
sig_cond <- ifelse(p_cond < 0.001, "***", 
             ifelse(p_cond < 0.01, "**", 
             ifelse(p_cond < 0.05, "*", 
             ifelse(p_cond < 0.1, ".", ""))))

sig_panel <- ifelse(p_panel < 0.001, "***", 
              ifelse(p_panel < 0.01, "**", 
              ifelse(p_panel < 0.05, "*", 
              ifelse(p_panel < 0.1, ".", ""))))

sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                   ifelse(p_interaction < 0.01, "**", 
                   ifelse(p_interaction < 0.05, "*", 
                   ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total) 
  perc_panel <- (dev_panel / dev_total) 
  perc_interaction <- (dev_interaction / dev_total) 
  
  res_cond <- paste0(round(perc_cond , 5),  sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}

# apply the fucntion to all the models in default paradigm
deviance_explained <- sapply(Default_model, calculate_deviance_explained.Default)

# Convert to a data frame for easy viewing
deviance_explained_df.Default <- as.data.frame(t(deviance_explained)) # view how this looks like
colnames(deviance_explained_df.Default) <- c("Condition","Panel", "Default Condition:Panel")

# display
kable(deviance_explained_df.Default,caption = "Etasq Table: Default Paradigm")

Etasq Table: Default Paradigm
	Condition	Panel	Default Condition:Panel
Default.PAN	0.03801***	0.00298.	0.01188***
Default.PAN_FACE	0.03801***	0.00051	0.00159
Default.PAN_Pred_C.reg_FAE	0.03801***	0.00263.	0.00173
Default.PAN_Pred_C.net_FAE	0.03801***	0.00312*	0.00156
Default.PAN_Pred_C.rf_FAE	0.03801***	0.00085	0.00111
Default.PAN_Pred_C.gbm_FAE	0.03801***	0.00197	0.00093

Framing (Unusual Disease) Paradigm

Disease.PAN <- glm(numDisease ~ DiseaseCondition * Panel, data = data.full_M5a.reg, family = binomial)
Disease.PAN_FACE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E+DiseaseCondition * Panel , data = data.full_M5a.reg.C, family = binomial)

Disease.PAN_Pred_C.reg_FAE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.full_M5a.reg.C, family = binomial)

Disease.PAN_Pred_C.net_FAE <- glm(numDisease ~   DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.full_M5a.net.C, family = binomial)

Disease.PAN_Pred_C.rf_FAE <- glm(numDisease ~   DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.full_M5a.rf.C, family = binomial)

Disease.PAN_Pred_C.gbm_FAE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel , data = data.full_M5a.gbm.C, family = binomial)


Disease_model <- list(
  "Disease.PAN" = Disease.PAN,
  "Disease.PAN_FACE" = Disease.PAN_FACE,
  "Disease.PAN_Pred_C.reg_FAE" = Disease.PAN_Pred_C.reg_FAE,
  "Disease.PAN_Pred_C.net_FAE" = Disease.PAN_Pred_C.net_FAE,
  "Disease.PAN_Pred_C.rf_FAE" = Disease.PAN_Pred_C.rf_FAE, 
  "Disease.PAN_Pred_C.gbm_FAE" = Disease.PAN_Pred_C.gbm_FAE
)

# extract_aic(Disease_model)

Model Results

term_names <- names(coef(Disease.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Disease_result<-tab_model(Disease.PAN, Disease.PAN_FACE, Disease.PAN_Pred_C.reg_FAE,Disease.PAN_Pred_C.net_FAE,Disease.PAN_Pred_C.rf_FAE,Disease.PAN_Pred_C.gbm_FAE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.C(reg)_FAE", 
                        "PAN_Pred.C(net)_FAE", 
                        "PAN_Pred.C(rf)_FAE", 
                        "PAN_Pred.C(gbm)_FAE"), title = "Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Disease_result

Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.C(reg)_FAE				PAN_Pred.C(net)_FAE				PAN_Pred.C(rf)_FAE				PAN_Pred.C(gbm)_FAE
Predictors	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p
(Intercept)	0.45	0.06	-5.84	<0.001	0.43	0.06	-6.03	<0.001	0.46	0.06	-5.58	<0.001	0.47	0.07	-5.36	<0.001	0.45	0.06	-5.73	<0.001	0.46	0.06	-5.53	<0.001
DiseaseCondition [LOSS]	3.40	0.64	6.46	<0.001	3.69	0.72	6.73	<0.001	3.46	0.68	6.33	<0.001	3.33	0.66	6.09	<0.001	3.53	0.69	6.47	<0.001	3.45	0.68	6.33	<0.001
F					1.07	0.23	0.31	0.755	0.92	0.15	-0.49	0.622	0.97	0.16	-0.19	0.850	1.03	0.20	0.14	0.892	1.00	0.17	0.02	0.988
A					1.40	0.14	3.34	0.001	1.46	0.15	3.63	<0.001	1.49	0.16	3.79	<0.001	1.44	0.15	3.55	<0.001	1.46	0.15	3.67	<0.001
C					0.73	0.12	-1.83	0.067
E					0.89	0.09	-1.14	0.256	0.84	0.08	-1.81	0.070	0.84	0.08	-1.82	0.069	0.88	0.09	-1.34	0.182	0.86	0.08	-1.59	0.113
DiseaseCondition [LOSS] × F					1.21	0.35	0.65	0.513	1.20	0.27	0.82	0.415	1.13	0.26	0.54	0.586	1.07	0.29	0.25	0.803	1.07	0.26	0.28	0.777
DiseaseCondition [LOSS] × A					1.03	0.14	0.23	0.820	0.98	0.14	-0.18	0.861	0.95	0.14	-0.35	0.723	1.00	0.14	0.01	0.990	0.98	0.14	-0.17	0.868
DiseaseCondition [LOSS] × C					1.26	0.28	1.06	0.290
DiseaseCondition [LOSS] × E					1.04	0.14	0.27	0.791	1.09	0.14	0.66	0.511	1.09	0.14	0.67	0.501	1.03	0.14	0.22	0.827	1.06	0.14	0.42	0.674
Predicted C									0.70	0.14	-1.80	0.073	0.59	0.13	-2.33	0.020	0.66	0.15	-1.85	0.065	0.62	0.14	-2.18	0.029
DiseaseCondition [LOSS] × Predicted C									1.72	0.47	2.01	0.045	2.16	0.66	2.52	0.012	1.76	0.53	1.88	0.059	2.06	0.61	2.43	0.015
Observations	1460				1460				1460				1460				1460				1460
AIC	1762.505				1721.769				1721.143				1718.492				1721.411				1719.140
log-Likelihood	-875.252				-846.884				-846.571				-845.246				-846.705				-845.570

Anova and Eta Square

calculate_deviance_explained.Disease <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for DiseaseCondition, Panel, and DiseaseCondition:Panel interaction
  dev_cond <- anova_res["DiseaseCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DiseaseCondition:Panel", "Deviance"]
  
  # Extract p-values
  p_cond <- anova_res["DiseaseCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DiseaseCondition:Panel", "Pr(>Chi)"]
  
  # Determine significance levels
  sig_cond <- ifelse(p_cond < 0.001, "***", 
               ifelse(p_cond < 0.01, "**", 
               ifelse(p_cond < 0.05, "*", 
               ifelse(p_cond < 0.1, ".", ""))))

  sig_panel <- ifelse(p_panel < 0.001, "***", 
                ifelse(p_panel < 0.01, "**", 
                ifelse(p_panel < 0.05, "*", 
                ifelse(p_panel < 0.1, ".", ""))))

  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total)
  perc_panel <- (dev_panel / dev_total)
  perc_interaction <- (dev_interaction / dev_total)
  
  # Combine percentage and significance
  res_cond <- paste0(round(perc_cond, 5), sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}


# Apply the function to each model
deviance_explained <- sapply(Disease_model, calculate_deviance_explained.Disease)

# Convert to a data frame for easy viewing
deviance_explained_df.Disease <- as.data.frame(t(deviance_explained))
colnames(deviance_explained_df.Disease) <- c("Condition","Panel", "DiseaseCondition:Panel")

# Display the results
kable(deviance_explained_df.Disease,caption = "Etasq Table: Disease Paradigm")

Etasq Table: Disease Paradigm
	Condition	Panel	DiseaseCondition:Panel
Disease.PAN	0.05317***	0.03246***	0.01394***
Disease.PAN_FACE	0.05317***	0.00514**	0.00395*
Disease.PAN_Pred_C.reg_FAE	0.05317***	0.00384*	0.00378*
Disease.PAN_Pred_C.net_FAE	0.05317***	0.00404*	0.00332*
Disease.PAN_Pred_C.rf_FAE	0.05317***	0.0039*	0.00291.
Disease.PAN_Pred_C.gbm_FAE	0.05317***	0.00374*	0.00256.

Less is More Paradigm

LessMore.PAN <- lm(numLessMore ~ LessMoreCondition * Panel, data = data.full_M5a.reg)
LessMore.PAN_FACE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.full_M5a.reg.C)
LessMore.PAN_Pred_C.reg_FAE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.full_M5a.reg.C)
LessMore.PAN_Pred_C.net_FAE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.full_M5a.net.C)
LessMore.PAN_Pred_C.rf_FAE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.full_M5a.rf.C)
LessMore.PAN_Pred_C.gbm_FAE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.full_M5a.gbm.C)


LessMore_model <- list(
  "LessMore.PAN" = LessMore.PAN,
  "LessMore.PAN_FACE" = LessMore.PAN_FACE,
  "LessMore.PAN_Pred_C.reg_FAE" = LessMore.PAN_Pred_C.reg_FAE,
  "LessMore.PAN_Pred_C.net_FAE" = LessMore.PAN_Pred_C.net_FAE,
  "LessMore.PAN_Pred_C.rf_FAE" = LessMore.PAN_Pred_C.rf_FAE, 
  "LessMore.PAN_Pred_C.gbm_FAE" = LessMore.PAN_Pred_C.gbm_FAE
)

Model Results

term_names <- names(coef(LessMore.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

LessMore_result<-tab_model(LessMore.PAN, LessMore.PAN_FACE, LessMore.PAN_Pred_C.reg_FAE,LessMore.PAN_Pred_C.net_FAE,LessMore.PAN_Pred_C.rf_FAE,LessMore.PAN_Pred_C.gbm_FAE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.C(reg)_FAE", 
                        "PAN_Pred.C(net)_FAE", 
                        "PAN_Pred.C(rf)_FAE", 
                        "PAN_Pred.C(gbm)_FAE"), title = "LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
LessMore_result

LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.C(reg)_FAE				PAN_Pred.C(net)_FAE				PAN_Pred.C(rf)_FAE				PAN_Pred.C(gbm)_FAE
Predictors	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p
(Intercept)	5.20	0.09	59.66	<0.001	5.22	0.09	61.17	<0.001	5.19	0.09	60.63	<0.001	5.19	0.09	60.11	<0.001	5.21	0.08	61.35	<0.001	5.21	0.08	61.29	<0.001
LessMoreCondition [SCARF]	1.11	0.12	9.00	<0.001	1.10	0.12	9.13	<0.001	1.10	0.12	9.10	<0.001	1.09	0.12	8.95	<0.001	1.06	0.12	8.78	<0.001	1.07	0.12	8.84	<0.001
F					0.13	0.12	1.12	0.265	0.03	0.09	0.37	0.712	0.06	0.09	0.67	0.504	0.10	0.11	0.91	0.362	0.01	0.10	0.14	0.892
A					-0.13	0.06	-2.23	0.026	-0.16	0.06	-2.67	0.008	-0.15	0.06	-2.55	0.011	-0.14	0.06	-2.29	0.022	-0.16	0.06	-2.63	0.009
C					0.01	0.09	0.08	0.934
E					-0.01	0.06	-0.10	0.919	-0.00	0.05	-0.04	0.971	-0.00	0.05	-0.05	0.957	-0.01	0.05	-0.18	0.856	-0.01	0.05	-0.25	0.806
LessMoreCondition [SCARF] × F					-0.02	0.17	-0.13	0.899	0.20	0.13	1.49	0.136	0.16	0.13	1.18	0.237	-0.05	0.16	-0.30	0.764	0.11	0.14	0.77	0.441
LessMoreCondition [SCARF] × A					0.39	0.08	4.82	<0.001	0.39	0.08	4.70	<0.001	0.37	0.08	4.52	<0.001	0.37	0.08	4.52	<0.001	0.37	0.08	4.58	<0.001
LessMoreCondition [SCARF] × C					0.23	0.13	1.74	0.082
LessMoreCondition [SCARF] × E					0.01	0.08	0.08	0.940	0.05	0.08	0.61	0.539	0.05	0.08	0.63	0.528	0.01	0.08	0.15	0.878	0.04	0.08	0.52	0.601
Predicted C									0.25	0.11	2.22	0.026	0.20	0.13	1.57	0.118	0.07	0.13	0.55	0.586	0.28	0.12	2.29	0.022
LessMoreCondition [SCARF] × Predicted C									-0.03	0.16	-0.18	0.859	0.08	0.18	0.46	0.643	0.37	0.18	2.11	0.035	0.15	0.18	0.83	0.406
Observations	1460				1460				1460				1460				1460				1460
R² / R² adjusted	0.204 / 0.201				0.248 / 0.241				0.249 / 0.242				0.248 / 0.242				0.251 / 0.244				0.253 / 0.247

Anova and Eta Square

calculate_variance_explained_LessMore <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for LessMoreCondition, Panel, and LessMoreCondition:Panel interaction
  ss_condition <- anova_res["LessMoreCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["LessMoreCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["LessMoreCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["LessMoreCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- ss_condition / ss_total
  perc_panel <- ss_panel / ss_total
  perc_interaction <- ss_interaction / ss_total
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}

variance_explained_lm <- sapply(LessMore_model, calculate_variance_explained_LessMore)

# Convert to a data frame for easy viewing
variance_explained_lm_df.LessMore <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.LessMore,caption = "Etasq Table: LessMore Paradigm")

Etasq Table: LessMore Paradigm
	Condition	Panel	LessMoreCondition:Panel
LessMore.PAN	0.09206***	0.10649***	0.00506*
LessMore.PAN_FACE	0.09206***	0.02908***	0.00051
LessMore.PAN_Pred_C.reg_FAE	0.09206***	0.03071***	0.00174
LessMore.PAN_Pred_C.net_FAE	0.09206***	0.03068***	0.00144
LessMore.PAN_Pred_C.rf_FAE	0.09206***	0.0248***	0.00054
LessMore.PAN_Pred_C.gbm_FAE	0.09206***	0.02263***	0.00104

Sunk Cost Paradigm

Sunk.PAN <- lm(numSunkCost~ SunkCondition * Panel, data = data.full_M5a.reg.C)

Sunk.PAN_FACE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.full_M5a.reg.C)
Sunk.PAN_Pred_C.reg_FAE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E+SunkCondition * Panel, data = data.full_M5a.reg.C)
Sunk.PAN_Pred_C.net_FAE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E + SunkCondition * Panel, data = data.full_M5a.net.C)
Sunk.PAN_Pred_C.rf_FAE <- lm(numSunkCost ~ SunkCondition * F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E  + SunkCondition * Panel, data = data.full_M5a.rf.C)
Sunk.PAN_Pred_C.gbm_FAE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E + SunkCondition * Panel, data = data.full_M5a.gbm.C)


Sunk_model <- list(
  "Sunk.PAN" = Sunk.PAN,
  "Sunk.PAN_FACE" = Sunk.PAN_FACE,
  "Sunk.PAN_Pred_C.reg_FAE" = Sunk.PAN_Pred_C.reg_FAE,
  "Sunk.PAN_Pred_C.net_FAE" = Sunk.PAN_Pred_C.net_FAE,
  "Sunk.PAN_Pred_C.rf_FAE" = Sunk.PAN_Pred_C.rf_FAE, #0.302 with unscaled
  "Sunk.PAN_Pred_C.gbm_FAE" = Sunk.PAN_Pred_C.gbm_FAE
)

# extract_adjusted_r_squared(Sunk_model)

Full Model Results

term_names <- names(coef(Sunk.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Sunk_result<-tab_model(Sunk.PAN, Sunk.PAN_FACE, Sunk.PAN_Pred_C.reg_FAE,Sunk.PAN_Pred_C.net_FAE,Sunk.PAN_Pred_C.rf_FAE,Sunk.PAN_Pred_C.gbm_FAE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred_C(reg)_FAE", 
                        "PAN_Pred_C(net)_FAE", 
                        "PAN_Pred_C(rf)_FAE", 
                        "PAN_Pred_C(gbm)_FAE"), title = "Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Sunk_result

Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred_C(reg)_FAE				PAN_Pred_C(net)_FAE				PAN_Pred_C(rf)_FAE				PAN_Pred_C(gbm)_FAE
Predictors	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p
(Intercept)	5.78	0.17	34.79	<0.001	5.77	0.17	34.93	<0.001	5.77	0.17	34.24	<0.001	5.80	0.17	34.09	<0.001	5.77	0.17	34.46	<0.001	5.78	0.17	34.39	<0.001
SunkCondition [PAID]	0.43	0.23	1.85	0.065	0.46	0.23	1.97	0.049	0.47	0.24	2.00	0.045	0.44	0.24	1.86	0.063	0.48	0.24	2.04	0.042	0.46	0.24	1.96	0.050
F					0.39	0.22	1.77	0.077	0.16	0.18	0.91	0.365	0.22	0.18	1.21	0.226	0.15	0.21	0.72	0.470	0.21	0.19	1.08	0.280
A					0.07	0.11	0.61	0.539	0.07	0.11	0.62	0.533	0.09	0.11	0.79	0.430	0.07	0.11	0.62	0.537	0.08	0.11	0.70	0.485
C					-0.26	0.18	-1.50	0.133
E					-0.06	0.11	-0.59	0.558	-0.11	0.11	-1.02	0.308	-0.11	0.11	-1.06	0.288	-0.11	0.11	-1.01	0.315	-0.10	0.11	-0.98	0.326
SunkCondition [PAID] × F					0.38	0.32	1.18	0.239	0.19	0.26	0.73	0.463	0.11	0.26	0.42	0.671	0.31	0.31	1.02	0.308	0.16	0.28	0.59	0.554
SunkCondition [PAID] × A					0.02	0.16	0.13	0.895	0.04	0.16	0.23	0.815	0.01	0.16	0.07	0.942	0.05	0.16	0.29	0.770	0.03	0.16	0.20	0.843
SunkCondition [PAID] × C					-0.20	0.25	-0.79	0.428
SunkCondition [PAID] × E					0.11	0.15	0.72	0.469	0.07	0.15	0.50	0.617	0.08	0.15	0.53	0.596	0.09	0.15	0.62	0.537	0.07	0.15	0.49	0.624
Predicted C									-0.01	0.22	-0.06	0.954	-0.17	0.25	-0.70	0.483	0.01	0.24	0.03	0.979	-0.11	0.24	-0.46	0.644
SunkCondition [PAID] × Predicted C									-0.03	0.31	-0.11	0.914	0.19	0.35	0.54	0.592	-0.23	0.35	-0.67	0.504	0.03	0.34	0.08	0.939
Observations	1460				1460				1460				1460				1460				1460
R² / R² adjusted	0.023 / 0.020				0.042 / 0.033				0.036 / 0.027				0.036 / 0.028				0.036 / 0.028				0.036 / 0.028

Anova and Eta Square

calculate_variance_explained_Sunk <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for SunkCondition, Panel, and SunkCondition:Panel interaction
  ss_condition <- anova_res["SunkCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["SunkCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["SunkCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["SunkCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- (ss_condition / ss_total)
  perc_panel <- (ss_panel / ss_total)
  perc_interaction <- (ss_interaction / ss_total)
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 6), sig_condition)
  res_panel <- paste0(round(perc_panel, 6), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 6), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}


variance_explained_lm <- sapply(Sunk_model, calculate_variance_explained_Sunk)

# Convert to a data frame for easy viewing
variance_explained_lm_df.SunkCost <- as.data.frame(t(variance_explained_lm))

colnames(variance_explained_lm_df.SunkCost) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.SunkCost,caption = "Etasq Table: SunkCost Paradigm")

Etasq Table: SunkCost Paradigm
	Condition	Panel	LessMoreCondition:Panel
Sunk.PAN	0.007943***	0.011323***	0.003981.
Sunk.PAN_FACE	0.007943***	0.000985	0.002048
Sunk.PAN_Pred_C.reg_FAE	0.007943***	9.8e-05	0.001764
Sunk.PAN_Pred_C.net_FAE	0.007943***	8.1e-05	0.00151
Sunk.PAN_Pred_C.rf_FAE	0.007943***	8.2e-05	0.002066
Sunk.PAN_Pred_C.gbm_FAE	0.007943***	9.8e-05	0.001547

Summary

Model Fit

\(R^2\): \(R^2\) for linear regressions, and \(1 − \frac{\log L(\text{model})}{\log L(\text{null model})}\) for logistic regressions (as in the paper)
note that the y-axes for different paradigms are not aligned.

Default.null<-glm(numDefault~1,data.full_M5a.reg.C,family=binomial)


R2.Default = as.data.frame(cbind( "R2" = c((1-logLik(Default.PAN)/logLik(Default.null)),
                    (1-logLik(Default.PAN_FACE)/logLik(Default.null)),  
                    (1-logLik(Default.PAN_Pred_C.reg_FAE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_C.net_FAE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_C.rf_FAE)/logLik(Default.null)),
                    (1-logLik(Default.PAN_Pred_C.gbm_FAE)/logLik(Default.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.C(reg)+FAE",  "Pan+Pred.C(net)+FAE" , "Pan+Pred.C(rf)+FAE","Pan+Pred.C(gbm)+FAE"), 
            "Paradigm" = "Default"))


Disease.null<-glm(numDisease~1,data.full_M5a.reg.C,family=binomial)
R2.Disease = as.data.frame(cbind( "R2" = c((1-logLik(Disease.PAN)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_FACE)/logLik(Disease.null)),  
                    (1-logLik(Disease.PAN_Pred_C.reg_FAE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_C.net_FAE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_C.rf_FAE)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_Pred_C.gbm_FAE)/logLik(Disease.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.C(reg)+FAE",  "Pan+Pred.C(net)+FAE" , "Pan+Pred.C(rf)+FAE","Pan+Pred.C(gbm)+FAE"), 
            "Paradigm" = "Disease"))

R2.LessMore = as.data.frame(cbind( "R2" = c(summary(LessMore.PAN)$r.squared,
                     summary(LessMore.PAN_FACE)$r.squared,  
                     summary(LessMore.PAN_Pred_C.reg_FAE)$r.squared,  
                     summary(LessMore.PAN_Pred_C.net_FAE)$r.squared, 
                     summary(LessMore.PAN_Pred_C.rf_FAE)$r.squared,
                     summary(LessMore.PAN_Pred_C.gbm_FAE )$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.C(reg)+FAE",  "Pan+Pred.C(net)+FAE" , "Pan+Pred.C(rf)+FAE","Pan+Pred.C(gbm)+FAE"), 
            "Paradigm" = "LessMore"))

R2.Sunk = as.data.frame(cbind( "R2" = c(summary(Sunk.PAN)$r.squared,
                     summary(Sunk.PAN_FACE)$r.squared,  
                     summary(Sunk.PAN_Pred_C.reg_FAE)$r.squared,  
                     summary(Sunk.PAN_Pred_C.net_FAE)$r.squared, 
                     summary(Sunk.PAN_Pred_C.rf_FAE )$r.squared,
                     summary(Sunk.PAN_Pred_C.gbm_FAE )$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.C(reg)+FAE",  "Pan+Pred.C(net)+FAE" , "Pan+Pred.C(rf)+FAE","Pan+Pred.C(gbm)+FAE"), 
            "Paradigm" = "SunkCost"))


R2<-rbind(R2.Default,R2.Disease,R2.LessMore,R2.Sunk)%>%
  mutate(R2=round(as.numeric(R2),3))

R2$Model <- factor(R2$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.C(reg)+FAE",  
                                   "Pan+Pred.C(net)+FAE", 
                                   "Pan+Pred.C(rf)+FAE",
                                   "Pan+Pred.C(gbm)+FAE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(R2, aes(x = Model, y = R2, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(R^2)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

AIC.Default = as.data.frame(cbind( "AIC" = c(AIC(Default.PAN),
                    AIC(Default.PAN_FACE),  
                    AIC(Default.PAN_Pred_C.reg_FAE), 
                    AIC(Default.PAN_Pred_C.net_FAE), 
                    AIC(Default.PAN_Pred_C.rf_FAE),
                    AIC(Default.PAN_Pred_C.gbm_FAE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PC(reg)+FAE",  "Pan+PC(net)+FAE" , "Pan+PC(rf)+FAE","Pan+PC(gbm)+FAE"), 
            "Paradigm" = "Default"))



AIC.Disease = as.data.frame(cbind( "AIC" = c(AIC(Disease.PAN),
                    AIC(Disease.PAN_FACE),  
                    AIC(Disease.PAN_Pred_C.reg_FAE), 
                    AIC(Disease.PAN_Pred_C.net_FAE), 
                    AIC(Disease.PAN_Pred_C.rf_FAE),
                    AIC(Disease.PAN_Pred_C.gbm_FAE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PC(reg)+FAE",  "Pan+PC(net)+FAE" , "Pan+PC(rf)+FAE","Pan+PC(gbm)+FAE"), 
            "Paradigm" = "Disease"))


AIC<-rbind(AIC.Default,AIC.Disease)%>%
  mutate(AIC=round(as.numeric(AIC),5))

AIC$Model <- factor(AIC$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+PC(reg)+FAE",  
                                   "Pan+PC(net)+FAE", 
                                   "Pan+PC(rf)+FAE",
                                   "Pan+PC(gbm)+FAE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(AIC, aes(x = Model, y = AIC, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(AIC)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")+
  coord_cartesian(ylim = c(300, 400))

R2$R2 <- round(as.numeric(R2$R2), 3)

R2 <- R2 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement <- R2 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))


### Avg. improvement in R2 for each model across all four paradigms, in FULL study 5
Default.null.Study5<-glm(numDefault~1,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.null.Study5<-glm(numDisease~1,Data_M5a.log_z_dummy_coded,family=binomial)
Default.PAN.Study5<-glm(numDefault~DefaultCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.PAN.Study5<-glm(numDisease~DiseaseCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
LessMore.PAN.Study5<-lm(numLessMore~LessMoreCondition*Panel,Data_M5a.log_z_dummy_coded)
Sunk.PAN.Study5<-lm(numSunkCost~SunkCondition*Panel,Data_M5a.log_z_dummy_coded)


Default.PAN_FACE.Study5<-glm(numDefault~DefaultCondition*F+DefaultCondition*A+DefaultCondition*C+DefaultCondition*E+DefaultCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.PAN_FACE.Study5<-glm(numDisease~DiseaseCondition*F+DiseaseCondition*A+DiseaseCondition*C+DiseaseCondition*E+DiseaseCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
LessMore.PAN_FACE.Study5<-lm(numLessMore~LessMoreCondition*F+LessMoreCondition*A+LessMoreCondition*C+LessMoreCondition*E+LessMoreCondition*Panel,Data_M5a.log_z_dummy_coded)
Sunk.PAN_FACE.Study5<-lm(numSunkCost~SunkCondition*F+SunkCondition*A+SunkCondition*C+SunkCondition*E+SunkCondition*Panel,Data_M5a.log_z_dummy_coded)

R2.Default = as.data.frame(cbind( 
  "R2" = c((1-logLik(Default.PAN.Study5)/logLik(Default.null.Study5)),
           (1-logLik(Default.PAN_FACE.Study5)/logLik(Default.null.Study5))),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "Default"
))


R2.Disease = as.data.frame(cbind( 
  "R2" = c((1-logLik(Disease.PAN.Study5)/logLik(Disease.null.Study5)),
           (1-logLik(Disease.PAN_FACE.Study5)/logLik(Disease.null.Study5))),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "Disease"
))

# For LessMore paradigm (linear models)
R2.LessMore = as.data.frame(cbind( 
  "R2" = c(summary(LessMore.PAN.Study5)$r.squared,
           summary(LessMore.PAN_FACE.Study5)$r.squared),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "LessMore"
))

# For SunkCost paradigm (linear models)
R2.Sunk = as.data.frame(cbind( 
  "R2" = c(summary(Sunk.PAN.Study5)$r.squared,
           summary(Sunk.PAN_FACE.Study5)$r.squared),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "SunkCost"
))

R2.FullStudy5 <- rbind(R2.Default, R2.Disease, R2.LessMore, R2.Sunk)
R2.FullStudy5$R2 <- round(as.numeric(R2.FullStudy5$R2), 3)

R2.FullStudy5 <- R2.FullStudy5 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement_full_study_5 <- R2.FullStudy5 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))%>%
  slice(-1) 

avg_improvemnt_combined <- rbind(avg_improvement, avg_improvement_full_study_5)

kable(avg_improvemnt_combined, caption = "Percentage Improvement in R2 (compared to PAN) Across Paradigms in the test sample", digits = 2)

Percentage Improvement in R2 (compared to PAN) Across Paradigms in the test sample
Model	Avg_Improvement
Pan	0.00
Pan+FACE	43.20
Pan+Pred.C(reg)+FAE	36.80
Pan+Pred.C(net)+FAE	36.46
Pan+Pred.C(rf)+FAE	37.52
Pan+Pred.C(gbm)+FAE	38.01
Pan+FACE (Full Study 5)	43.20

C*Cond Model Coefficients

DVs for Default & Framing are 1/0
DVs for LessMore & SunkCost are on the original scale

AlllCoef = as.data.frame(rbind(
cbind(coef(Default.PAN_FACE),confint(Default.PAN_FACE)),
cbind(coef(Default.PAN_Pred_C.reg_FAE),confint(Default.PAN_Pred_C.reg_FAE)),
cbind(coef(Default.PAN_Pred_C.net_FAE),confint(Default.PAN_Pred_C.net_FAE)),
cbind(coef(Default.PAN_Pred_C.rf_FAE),confint(Default.PAN_Pred_C.rf_FAE)),
cbind(coef(Default.PAN_Pred_C.gbm_FAE),confint(Default.PAN_Pred_C.gbm_FAE)),
cbind(coef(Default.PAN_FACE.Study5),confint(Default.PAN_FACE.Study5)),

cbind(coef(Disease.PAN_FACE),confint(Disease.PAN_FACE)),
cbind(coef(Disease.PAN_Pred_C.reg_FAE),confint(Disease.PAN_Pred_C.reg_FAE)),
cbind(coef(Disease.PAN_Pred_C.net_FAE),confint(Disease.PAN_Pred_C.net_FAE)),
cbind(coef(Disease.PAN_Pred_C.rf_FAE),confint(Disease.PAN_Pred_C.rf_FAE)),
cbind(coef(Disease.PAN_Pred_C.gbm_FAE),confint(Disease.PAN_Pred_C.gbm_FAE)),
cbind(coef(Disease.PAN_FACE.Study5),confint(Disease.PAN_FACE.Study5)),

cbind(coef(LessMore.PAN_FACE),confint(LessMore.PAN_FACE)),
cbind(coef(LessMore.PAN_Pred_C.reg_FAE),confint(LessMore.PAN_Pred_C.reg_FAE)),
cbind(coef(LessMore.PAN_Pred_C.net_FAE),confint(LessMore.PAN_Pred_C.net_FAE)),
cbind(coef(LessMore.PAN_Pred_C.rf_FAE),confint(LessMore.PAN_Pred_C.rf_FAE)),
cbind(coef(LessMore.PAN_Pred_C.gbm_FAE),confint(LessMore.PAN_Pred_C.gbm_FAE)),
cbind(coef(LessMore.PAN_FACE.Study5),confint(LessMore.PAN_FACE.Study5)),

cbind(coef(Sunk.PAN_FACE),confint(Sunk.PAN_FACE)),
cbind(coef(Sunk.PAN_Pred_C.reg_FAE),confint(Sunk.PAN_Pred_C.reg_FAE)),
cbind(coef(Sunk.PAN_Pred_C.net_FAE),confint(Sunk.PAN_Pred_C.net_FAE)),
cbind(coef(Sunk.PAN_Pred_C.rf_FAE),confint(Sunk.PAN_Pred_C.rf_FAE)),
cbind(coef(Sunk.PAN_Pred_C.gbm_FAE),confint(Sunk.PAN_Pred_C.gbm_FAE)),
cbind(coef(Sunk.PAN_FACE.Study5),confint(Sunk.PAN_FACE.Study5))))

AlllCoef <- AlllCoef[!grepl("Panel", rownames(AlllCoef)), ]

AlllCoef

AlllCoef$type = factor(rep(c("Intercept", " Condition(Treat)","F", "A", "C", "E","F: Condition(Treat)", "A: Condition(Treat)", "C: Condition(Treat)", "E: Condition(Treat)"), 24))

AlllCoef$Paradigm = factor((rep(c("Default","Framing","Less is Better","Sunk Cost"), each=60)),levels = rev(c("Default","Framing","Less is Better","Sunk Cost")))

AlllCoef$Model = factor(rep(rep(c("FACE","PF(reg) + ACE","PF(net) + ACE","PF(rf) + ACE","PF(gbm) + ACE","FACE (Full Study 5)"), each = 10),4), levels = rev(c("FACE","PF(reg) + ACE","PF(net) + ACE","PF(rf) + ACE","PF(gbm) + ACE","FACE (Full Study 5)")))

colnames(AlllCoef)[1:3] = c("Beta","low", "high")

allcoefplot = ggplot(AlllCoef[(AlllCoef$type %in% c("C: Condition(Treat)")),], aes( y = Beta, ymin= low, ymax= high, x = Paradigm, fill = Model, color = Model))+
  geom_bar(stat = "identity", position = "dodge",alpha= .25)+
  geom_pointrange(position = position_dodge(width = .8))+
  coord_flip()+
  xlab("")+
  theme_bw()+
  theme(strip.background = element_blank(), text = element_text(size = 12)) +
  facet_grid(~type)

allcoefplot

Etasq

### Study 5 Results ###
Default_model.Study5<-list(
  "Default.PAN.Study5" = Default.PAN.Study5,
  "Default.PAN_FACE.Study5" = Default.PAN_FACE.Study5)
Disease_model.Study5<-list(
  "Disease.PAN.Study5" = Disease.PAN.Study5,
  "Disease.PAN_FACE.Study5" = Disease.PAN_FACE.Study5)

LessMore_model.Study5<-list(
  "LessMore.PAN.Study5" = LessMore.PAN.Study5,
  "LessMore.PAN_FACE.Study5" = LessMore.PAN_FACE.Study5)


Sunk_model.Study5<-list(
  "Sunk.PAN.Study5" = Sunk.PAN.Study5,
  "Sunk.PAN_FACE.Study5" = Sunk.PAN_FACE.Study5)

deviance_explained <- sapply(Default_model.Study5, calculate_deviance_explained.Default)
deviance_explained_df.Default.Study5 <- as.data.frame(t(deviance_explained)) 
colnames(deviance_explained_df.Default.Study5) <- c("Condition","Panel", "Default Condition:Panel")

deviance_explained <- sapply(Disease_model.Study5, calculate_deviance_explained.Disease)
deviance_explained_df.Disease.Study5 <- as.data.frame(t(deviance_explained)) 
colnames(deviance_explained_df.Disease.Study5) <- c("Condition","Panel", "Disease Condition:Panel")

variance_explained_lm <- sapply(LessMore_model.Study5, calculate_variance_explained_LessMore)
variance_explained_lm_df.LessMore.Study5 <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore.Study5) <- c("Condition","Panel", "LessMoreCondition:Panel")

variance_explained_lm <- sapply(Sunk_model.Study5, calculate_variance_explained_Sunk)
variance_explained_lm_df.SunkCost.Study5 <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.SunkCost.Study5) <- c("Condition","Panel", "LessMoreCondition:Panel")

# label paradigm
deviance_explained_df.Default$Paradigm <- "Default"
deviance_explained_df.Default.Study5$Paradigm <- "Default"

deviance_explained_df.Disease$Paradigm <- "Disease"
deviance_explained_df.Disease.Study5$Paradigm <- "Disease"

variance_explained_lm_df.LessMore$Paradigm <- "LessMore"
variance_explained_lm_df.LessMore.Study5$Paradigm <- "LessMore"

variance_explained_lm_df.SunkCost$Paradigm <- "SunkCost"
variance_explained_lm_df.SunkCost.Study5$Paradigm <- "SunkCost"


Model <- c("Pan", "Pan+FACE", "Pan+Pred.C(reg)+FAE", "Pan+Pred.C(net)+FAE", "Pan+Pred.C(rf)+FAE", "Pan+Pred.C(gbm)+FAE")
Model.Study5 <- c("Pan (Full Study5)", "Pan+FACE (Full Study5)")
deviance_explained_df.Default$Model<-Model
deviance_explained_df.Disease$Model<-Model
variance_explained_lm_df.LessMore$Model<-Model
variance_explained_lm_df.SunkCost$Model<-Model

deviance_explained_df.Default.Study5$Model<-Model.Study5
deviance_explained_df.Disease.Study5$Model<-Model.Study5
variance_explained_lm_df.LessMore.Study5$Model<-Model.Study5
variance_explained_lm_df.SunkCost.Study5$Model<-Model.Study5

colnames(deviance_explained_df.Default)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost)[3] <- "Condition:Panel"

colnames(deviance_explained_df.Default.Study5)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost.Study5)[3] <- "Condition:Panel"


etasq <- rbind(deviance_explained_df.Default, 
               deviance_explained_df.Default.Study5,
               deviance_explained_df.Disease, 
               deviance_explained_df.Disease.Study5,
               variance_explained_lm_df.LessMore, 
               variance_explained_lm_df.LessMore.Study5,
               variance_explained_lm_df.SunkCost,
               variance_explained_lm_df.SunkCost.Study5)

etasq$Model <- factor(etasq$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.C(reg)+FAE",  
                                   "Pan+Pred.C(net)+FAE", 
                                   "Pan+Pred.C(rf)+FAE",
                                   "Pan+Pred.C(gbm)+FAE",
                                   "Pan (Full Study5)", 
                                   "Pan+FACE (Full Study5)"),
                        ordered = TRUE)

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$Panel)  # Extract the significance symbols
  df$Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$Panel))
  return(df)
}

etasq.pan<-split_value_and_sig(etasq)

ggplot(etasq.pan, aes(x = Model, y = Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$`Condition:Panel`)  # Extract the significance symbols
  df$Condition_Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$`Condition:Panel`))
  return(df)
}

etasq.pan_cond<-split_value_and_sig(etasq)

ggplot(etasq.pan_cond, aes(x = Model, y = Condition_Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel*cond") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

# junk data??? really unstable;; perhaps to look at this meaningfully, we need 20% of the data to equally come from each of the 3 panels.
 # table(data.full_M5a$Panel)
 #  it looks fine though
 #   Lucid    MTurk Prolific 
 #     100      101       91 


etasq_study5 <- etasq.pan_cond %>%
  filter(Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))

etasq_study5 <- etasq_study5 %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan (Full Study5)"] - Condition_Panel) / Condition_Panel[Model == "Pan (Full Study5)"] * 100)


etasq_study5.test <- etasq.pan_cond %>%
  filter(!Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))
etasq_study5.test <- etasq_study5.test %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan"] - Condition_Panel) / Condition_Panel[Model == "Pan"] * 100)

etasq.pan_cond<-rbind(etasq_study5,etasq_study5.test)

avg_reduction <- etasq.pan_cond %>%
  group_by(Model) %>%
  summarize(Avg_Reduction = mean(Reduction, na.rm = TRUE))

  
kable(avg_reduction, caption = "Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in full study 5 data", digits = 2)

Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in full study 5 data
Model	Avg_Reduction
Pan	0.00
Pan+FACE	74.19
Pan+Pred.C(reg)+FAE	69.91
Pan+Pred.C(net)+FAE	74.17
Pan+Pred.C(rf)+FAE	76.80
Pan+Pred.C(gbm)+FAE	78.60
Pan (Full Study5)	0.00
Pan+FACE (Full Study5)	74.19

2.1 Evaluate Portability with Study 1 Data

2.1.1 Study 1 US Data

# Data_M1a_dummy_coded<-read.csv("../MP Study 1/Data_M1a_dummy_coded_20240808.csv") # seperately .log_z data

Data_M1a.log_dummy_coded<-read.csv("../MP Study 1/Data_M1a.log_dummy_coded_20241003.csv")%>%
  rename_with(~ gsub("DEMCHAR", "PurposeOpenEnd", .x))
  
META_Browser_columns <- names(Data_M1a.log_dummy_coded)[grepl("META_Browser", names(Data_M1a.log_dummy_coded))] # tentatively retain browser Browsers to make predictions
Data_M1a.log_dummy_coded<-Data_M1a.log_dummy_coded%>% 
  select(-c(X,LW_DV,numLW_IV)) 
names(Data_M1a.log_dummy_coded) <- gsub("iNTRO", "INTRO", names(Data_M1a.log_dummy_coded))


# This reread the data using read.csv function to ensures data format consistency, need to understand more about this function here....

Data_M5a.log_dummy_coded<-read.csv("Data_M5a.log_dummy_coded_20240921.csv")

META_Browser_columns <- names(Data_M5a.log_dummy_coded)[grepl("META_Browser", names(Data_M5a.log_dummy_coded))]
Data_M5a.log_dummy_coded<-Data_M5a.log_dummy_coded%>%
  select(-c(X))


# Calculate mean and standard deviation for each column in Data_M5a.log_dummy_coded
means <- sapply(Data_M5a.log_dummy_coded[, scale_columns], mean, na.rm = TRUE)
sds <- sapply(Data_M5a.log_dummy_coded[, scale_columns], sd, na.rm = TRUE)

scale_with_reference_M5a <- function(x, mean_ref, sd_ref) {
  (x - mean_ref) / sd_ref
}

Data_M5a.log_z_dummy_coded <- Data_M5a.log_dummy_coded %>%
  mutate(across(.cols = all_of(scale_columns), 
                .fns = ~ scale_with_reference_M5a(., means[cur_column()], sds[cur_column()])))

# Scale Data_M1a.log_dummy_coded using the mean and sd from Data_M5a.log_dummy_coded
Data_M1a.log_z_dummy_coded <- Data_M1a.log_dummy_coded %>%
  mutate(across(.cols = all_of(scale_columns), 
                .fns = ~ scale_with_reference_M5a(., means[cur_column()], sds[cur_column()])))

Data_M1a.log_z_dummy_coded <- Data_M1a.log_z_dummy_coded

# same hyperparameters from section 2.0
# Set seed for reproducibility
set.seed(123)


variables_to_remove.C <- c("F","A","E","numSunkCost","numLessMore","numDefault","numDisease","SunkCondition","LessMoreCondition","DiseaseCondition","DefaultCondition","Panel","Antonym","Synonym","crt2_score","rotsum","MXsum")

# Create training and testing sets

data.trn_M5a_FULL <- Data_M5a.log_z_dummy_coded 
data.tst_MP1.US <- Data_M1a.log_z_dummy_coded%>%filter(Country_US==1) #  MP1 US data
data.tst_MP1.UK <- Data_M1a.log_z_dummy_coded%>%filter(Country_UK==1) #  MP1 UK data
data.tst_MP1.Neth <- Data_M1a.log_z_dummy_coded%>%filter(Country_US==0&Country_UK==0) # #  MP1 Netherlands student data
# There are columns in study 1 dataset not in study 5, but this is okay. 

data.trn_M5a_FULL.C <- as.data.frame(data.trn_M5a_FULL[, !colnames(data.trn_M5a_FULL) %in% variables_to_remove.C])

actual_MP1.US.C <- data.tst_MP1.US$C

ctrl <- trainControl(method = "cv", number = 10, summaryFunction = defaultSummary)
tuneGrid <- expand.grid(
  mtry = c(33),
  min.node.size = c(200),
  splitrule = c("variance")
)

# Model_M5a_rf.FULL.C <- caret::train(C ~ ., data = data.trn_M5a_FULL.C, method = "ranger",
#                   tuneGrid = tuneGrid, importance = 'impurity', num.trees = 750,
#                   trControl = ctrl)
# save(Model_M5a_rf.FULL.C, file = "./Saved ML Model and Data/Model_M5a_rf_FULL.C.RData")
load("./Saved ML Model and Data/Model_M5a_rf_FULL.C.RData")
M5a_rf_pred_MP1.US.C<- predict(Model_M5a_rf.FULL.C,data.tst_MP1.US)
test_perf.M5a_rf.MP1.US.C <- postResample(M5a_rf_pred_MP1.US.C, actual_MP1.US.C)
data.tst_MP1.US.rf.C <- cbind(data.tst_MP1.US, Predicted_C = M5a_rf_pred_MP1.US.C)



Model_M5a_reg.FULL.C <- train(C ~ ., data = data.trn_M5a_FULL.C, method = "lm")
M5a_reg_pred_MP1.US.C<- predict(Model_M5a_reg.FULL.C,data.tst_MP1.US)
test_perf.M5a_reg.MP1.US.C <- postResample(M5a_reg_pred_MP1.US.C, actual_MP1.US.C)
data.tst_MP1.US.reg.C <- cbind(data.tst_MP1.US, Predicted_C = M5a_reg_pred_MP1.US.C)

Model_M5a_net.FULL.C <- train(C ~ ., data = data.trn_M5a_FULL.C, method = "glmnet", trControl = ctrl, tuneLength=40) # 10 fold cv to select the best model, random search 

M5a_net_pred_MP1.US.C<- predict(Model_M5a_net.FULL.C,data.tst_MP1.US)
test_perf.M5a_net.MP1.US.C <- postResample(M5a_net_pred_MP1.US.C, actual_MP1.US.C)
data.tst_MP1.US.net.C <- cbind(data.tst_MP1.US, Predicted_C = M5a_net_pred_MP1.US.C)


# Model_M5a_gbm.FULL.C <- caret::train(C ~ ., data = data.trn_M5a_FULL.C, method = "gbm",
#                     tuneGrid = tuneGrid.4, trControl = ctrl,
#                     verbose = FALSE)
# 
# save(Model_M5a_gbm.FULL.C, file = "./Saved ML Model and Data/Model_M5a_gbm_FULL.C.RData")
load("./Saved ML Model and Data/Model_M5a_gbm_FULL.C.RData")

M5a_gbm_pred_MP1.US.C<- predict(Model_M5a_gbm.FULL.C,data.tst_MP1.US)
test_perf.M5a_gbm.MP1.US.C <- postResample(M5a_gbm_pred_MP1.US.C, actual_MP1.US.C)
data.tst_MP1.US.gbm.C <- cbind(data.tst_MP1.US, Predicted_C = M5a_gbm_pred_MP1.US.C)

2.1.1.0 Model Fit

We trained the model with full Study 5 data
The following stats come from cross-validation during model training

train_perf_M5a_rf.FULL <- Model_M5a_rf.FULL.C$results[which.min(Model_M5a_rf.FULL.C$results$RMSE), c("RMSE", "Rsquared", "MAE")]
train_perf_M5a_reg.FULL <- Model_M5a_reg.FULL.C$results[which.min(Model_M5a_reg.FULL.C$results$RMSE), c("RMSE", "Rsquared", "MAE")]
train_perf_M5a_net.FULL <- Model_M5a_net.FULL.C$results[which.min(Model_M5a_net.FULL.C$results$RMSE), c("RMSE", "Rsquared", "MAE")]
train_perf_M5a_gbm.FULL <- Model_M5a_gbm.FULL.C$results[which.min(Model_M5a_gbm.FULL.C$results$RMSE), c("RMSE", "Rsquared", "MAE")]

# Combine training performance metrics into a data frame
performance_metrics_trn <- data.frame(
  Model = factor(c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting"),
                 levels = c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting")),
  MAE = c(as.numeric(train_perf_M5a_reg.FULL["MAE"]), as.numeric(train_perf_M5a_net.FULL["MAE"]), as.numeric(train_perf_M5a_rf.FULL["MAE"]), as.numeric(train_perf_M5a_gbm.FULL["MAE"])),
  RMSE = c(as.numeric(train_perf_M5a_reg.FULL["RMSE"]), as.numeric(train_perf_M5a_net.FULL["RMSE"]), as.numeric(train_perf_M5a_rf.FULL["RMSE"]), as.numeric(train_perf_M5a_gbm.FULL["RMSE"])),
  Rsquared = c(as.numeric(train_perf_M5a_reg.FULL["Rsquared"]), as.numeric(train_perf_M5a_net.FULL["Rsquared"]), as.numeric(train_perf_M5a_rf.FULL["Rsquared"]), as.numeric(train_perf_M5a_gbm.FULL["Rsquared"])))


performance_table_trn <- performance_metrics_trn %>%
  mutate(across(where(is.numeric), round, 4))

# Display the table
kable(performance_table_trn, caption = "Comparison of Model Performance in Training Data (Full Study 5 Data) using CV") %>%
  kable_styling(full_width = F, position = "center")

Comparison of Model Performance in Training Data (Full Study 5 Data) using CV
Model	MAE	RMSE	Rsquared
Linear Regression	0.5997	0.7432	0.4113
Elastic Net	0.5785	0.7130	0.4515
Random Forest	0.5950	0.7296	0.4507
Gradient Boosting	0.5305	0.6689	0.5199

###Plot ###

performance_metrics_long_trn <- performance_metrics_trn %>%
  pivot_longer(cols = c(RMSE, Rsquared, MAE), names_to = "Metric", values_to = "Value")

trn_M5a_FULL.sd_C <- sd(data.trn_M5a_FULL.C$C)

ggplot(performance_metrics_long_trn, aes(x = Metric, y = Value, fill = Model)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_segment(data = performance_metrics_long_trn %>% filter(Metric == "RMSE"),
               aes(x = 1.6, xend = 2.4, y = trn_M5a_FULL.sd_C, yend = trn_M5a_FULL.sd_C),
               linetype = "dashed", color = "#FF9999") +
  geom_text(data = performance_metrics_long_trn %>% filter(Metric == "RMSE"),
            aes(x = 2, y = trn_M5a_FULL.sd_C, label = paste0("Std.dev of C in Study 5 Full Data: ", round(trn_M5a_FULL.sd_C, 2))),
            vjust = -0.5, color = "#FF9999") +
  geom_text(aes(label = round(Value, 2)), position = position_dodge(width = 0.9), vjust = -0.5) +
  scale_fill_manual(values = c("gray80", "gray50", "gray30", "gray20")) +  # Less colorful palette
  labs(title = "Model Fit with FUll MP5 data",
       x = "Metric",
       y = "Value") +
  theme_minimal() +
  theme(legend.position = "bottom")

2.1.1.4 Correlation Btw ML Predicted C and other FACE factors

Significance Notation: *** if the p-value is < 0.001, ** if the p-value is < 0.01, * if the p-value is < 0.05, . if the p-value is < 0.10

Regression

ggpairs(data.tst_MP1.US.reg.C, c("Predicted_C","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Elastic Net

ggpairs(data.tst_MP1.US.net.C, c("Predicted_C","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Random Forest

ggpairs(data.tst_MP1.US.rf.C, c("Predicted_C","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Gradient Boosting

ggpairs(data.tst_MP1.US.gbm.C, c("Predicted_C","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

GBM: Correlation between PC5 and numEDU and Age in Study 1

ggpairs(data.tst_MP1.US.gbm.C, c("Predicted_C","numEDU", "AGE","ATTN1__Correct"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

2.1.1.5 Explain Heterogenity with Predicted C

2.1.1.5.1 Default Paradigm

Default.PAN <- glm(numDefault ~ DefaultCondition * Panel, data = data.tst_MP1.US.gbm.C%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_FACE <- glm(numDefault ~ DefaultCondition * F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel , data = data.tst_MP1.US.gbm.C%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_C.reg_FAE <- glm(numDefault ~ DefaultCondition * F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_MP1.US.reg.C%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_C.net_FAE <- glm(numDefault ~  DefaultCondition * F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_MP1.US.net.C%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_C.rf_FAE <- glm(numDefault ~   DefaultCondition * F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_MP1.US.rf.C%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_C.gbm_FAE <- glm(numDefault ~   DefaultCondition * F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_MP1.US.gbm.C%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)


Default_model <- list(
  "Default.PAN" = Default.PAN,
  "Default.PAN_FACE" = Default.PAN_FACE,
  "Default.PAN_Pred_C.reg_FAE" = Default.PAN_Pred_C.reg_FAE,
  "Default.PAN_Pred_C.net_FAE" = Default.PAN_Pred_C.net_FAE,
  "Default.PAN_Pred_C.rf_FAE" = Default.PAN_Pred_C.rf_FAE,
  "Default.PAN_Pred_C.gbm_FAE" = Default.PAN_Pred_C.gbm_FAE
)

Model Results

term_names <- names(coef(Default.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Default_result<-tab_model(Default.PAN, Default.PAN_FACE, Default.PAN_Pred_C.reg_FAE,Default.PAN_Pred_C.net_FAE,Default.PAN_Pred_C.rf_FAE,Default.PAN_Pred_C.gbm_FAE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.C(reg)_FAE", 
                        "PAN_Pred.C(net)_FAE", 
                        "PAN_Pred.C(rf)_FAE", 
                        "PAN_Pred.C(gbm)_FAE"), title = "Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Default_result

Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.C(reg)_FAE				PAN_Pred.C(net)_FAE				PAN_Pred.C(rf)_FAE				PAN_Pred.C(gbm)_FAE
Predictors	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p
(Intercept)	0.88	0.11	-1.06	0.291	0.87	0.11	-1.14	0.253	0.90	0.12	-0.78	0.433	0.91	0.12	-0.76	0.449	0.85	0.11	-1.29	0.196	0.92	0.12	-0.67	0.500
DefaultCondition [OPTOUT]	3.10	0.56	6.31	<0.001	3.22	0.59	6.42	<0.001	3.09	0.57	6.18	<0.001	3.09	0.57	6.16	<0.001	3.30	0.60	6.53	<0.001	3.10	0.57	6.16	<0.001
F					1.57	0.16	4.48	<0.001	1.42	0.14	3.43	0.001	1.40	0.14	3.34	0.001	1.44	0.14	3.65	<0.001	1.40	0.14	3.30	0.001
A					1.41	0.17	2.87	0.004	1.15	0.15	1.05	0.293	1.12	0.15	0.83	0.405	1.03	0.14	0.24	0.807	1.16	0.15	1.15	0.250
C					1.02	0.05	0.39	0.698
E					0.86	0.07	-1.93	0.054	0.89	0.07	-1.43	0.153	0.90	0.07	-1.39	0.164	0.91	0.07	-1.22	0.222	0.90	0.07	-1.38	0.169
DefaultCondition [OPTOUT] × F					1.03	0.16	0.17	0.867	1.23	0.19	1.32	0.188	1.23	0.19	1.30	0.194	1.14	0.18	0.85	0.395	1.19	0.19	1.09	0.275
DefaultCondition [OPTOUT] × A					0.44	0.08	-4.53	<0.001	0.53	0.10	-3.28	0.001	0.54	0.10	-3.17	0.002	0.53	0.10	-3.24	0.001	0.51	0.09	-3.64	<0.001
DefaultCondition [OPTOUT] × C					1.18	0.10	1.98	0.048
DefaultCondition [OPTOUT] × E					1.23	0.14	1.79	0.073	1.20	0.14	1.55	0.122	1.20	0.14	1.56	0.119	1.22	0.14	1.72	0.085	1.22	0.14	1.70	0.090
Predicted C									1.65	0.19	4.41	<0.001	1.77	0.23	4.47	<0.001	2.39	0.40	5.26	<0.001	1.90	0.23	5.32	<0.001
DefaultCondition [OPTOUT] × Predicted C									0.59	0.10	-3.08	0.002	0.58	0.11	-2.87	0.004	0.60	0.15	-2.10	0.036	0.62	0.11	-2.59	0.010
Observations	3541				3541				3541				3541				3541				3541
AIC	4466.919				4382.086				4370.959				4370.480				4358.654				4360.471
log-Likelihood	-2215.460				-2165.043				-2159.480				-2159.240				-2153.327				-2154.235

Anova and Eta Square

calculate_deviance_explained.Default <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for Panel and DefaultCondition:Panel interaction
  dev_cond <- anova_res["DefaultCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DefaultCondition:Panel", "Deviance"]
  
  p_cond <- anova_res["DefaultCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DefaultCondition:Panel", "Pr(>Chi)"]
  
sig_cond <- ifelse(p_cond < 0.001, "***", 
             ifelse(p_cond < 0.01, "**", 
             ifelse(p_cond < 0.05, "*", 
             ifelse(p_cond < 0.1, ".", ""))))

sig_panel <- ifelse(p_panel < 0.001, "***", 
              ifelse(p_panel < 0.01, "**", 
              ifelse(p_panel < 0.05, "*", 
              ifelse(p_panel < 0.1, ".", ""))))

sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                   ifelse(p_interaction < 0.01, "**", 
                   ifelse(p_interaction < 0.05, "*", 
                   ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total) 
  perc_panel <- (dev_panel / dev_total) 
  perc_interaction <- (dev_interaction / dev_total) 
  
  res_cond <- paste0(round(perc_cond , 5),  sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}

# apply the fucntion to all the models in default paradigm
deviance_explained <- sapply(Default_model, calculate_deviance_explained.Default)

# Convert to a data frame for easy viewing
deviance_explained_df.Default <- as.data.frame(t(deviance_explained)) # view how this looks like
colnames(deviance_explained_df.Default) <- c("Condition","Panel", "Default Condition:Panel")

# display
kable(deviance_explained_df.Default,caption = "Etasq Table: Default Paradigm")

Etasq Table: Default Paradigm
	Condition	Panel	Default Condition:Panel
Default.PAN	0.05063***	0.00612***	0.00751***
Default.PAN_FACE	0.05063***	0.00456**	0.0031.
Default.PAN_Pred_C.reg_FAE	0.05063***	0.00508**	0.00201
Default.PAN_Pred_C.net_FAE	0.05063***	0.00523**	0.00204
Default.PAN_Pred_C.rf_FAE	0.05063***	0.00501**	0.00271
Default.PAN_Pred_C.gbm_FAE	0.05063***	0.00497**	0.0026

2.1.1.5.2 Framing (Unusual Disease) Paradigm

Disease.PAN <- glm(numDisease ~ DiseaseCondition * Panel, data = data.tst_MP1.US.gbm.C, family = binomial)
Disease.PAN_FACE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E+DiseaseCondition * Panel , data = data.tst_MP1.US.gbm.C, family = binomial)

Disease.PAN_Pred_C.reg_FAE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_MP1.US.reg.C, family = binomial)

Disease.PAN_Pred_C.net_FAE <- glm(numDisease ~   DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_MP1.US.net.C, family = binomial)

Disease.PAN_Pred_C.rf_FAE <- glm(numDisease ~   DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_MP1.US.rf.C, family = binomial)

Disease.PAN_Pred_C.gbm_FAE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel , data = data.tst_MP1.US.gbm.C, family = binomial)


Disease_model <- list(
  "Disease.PAN" = Disease.PAN,
  "Disease.PAN_FACE" = Disease.PAN_FACE,
  "Disease.PAN_Pred_C.reg_FAE" = Disease.PAN_Pred_C.reg_FAE,
  "Disease.PAN_Pred_C.net_FAE" = Disease.PAN_Pred_C.net_FAE,
  "Disease.PAN_Pred_C.rf_FAE" = Disease.PAN_Pred_C.rf_FAE, 
  "Disease.PAN_Pred_C.gbm_FAE" = Disease.PAN_Pred_C.gbm_FAE
)
# extract_aic(Disease_model)

Model Results

term_names <- names(coef(Disease.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Disease_result<-tab_model(Disease.PAN, Disease.PAN_FACE, Disease.PAN_Pred_C.reg_FAE,Disease.PAN_Pred_C.net_FAE,Disease.PAN_Pred_C.rf_FAE,Disease.PAN_Pred_C.gbm_FAE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.C(reg)_FAE", 
                        "PAN_Pred.C(net)_FAE", 
                        "PAN_Pred.C(rf)_FAE", 
                        "PAN_Pred.C(gbm)_FAE"), title = "Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Disease_result

Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.C(reg)_FAE				PAN_Pred.C(net)_FAE				PAN_Pred.C(rf)_FAE				PAN_Pred.C(gbm)_FAE
Predictors	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p
(Intercept)	0.49	0.05	-6.86	<0.001	0.47	0.05	-7.15	<0.001	0.47	0.05	-7.15	<0.001	0.46	0.05	-7.23	<0.001	0.48	0.05	-6.92	<0.001	0.46	0.05	-7.24	<0.001
DiseaseCondition [LOSS]	2.72	0.39	6.97	<0.001	2.91	0.43	7.29	<0.001	2.92	0.43	7.31	<0.001	2.97	0.44	7.42	<0.001	2.77	0.41	6.97	<0.001	2.99	0.44	7.45	<0.001
F					1.30	0.12	2.82	0.005	1.30	0.12	2.81	0.005	1.32	0.12	2.92	0.003	1.22	0.11	2.21	0.027	1.29	0.12	2.68	0.007
A					1.66	0.18	4.58	<0.001	2.01	0.23	6.01	<0.001	2.07	0.24	6.20	<0.001	1.94	0.23	5.51	<0.001	1.92	0.22	5.74	<0.001
C					0.79	0.04	-4.89	<0.001
E					0.96	0.06	-0.67	0.501	0.91	0.06	-1.39	0.166	0.90	0.06	-1.48	0.138	0.92	0.06	-1.23	0.217	0.92	0.06	-1.29	0.196
DiseaseCondition [LOSS] × F					0.87	0.11	-1.14	0.254	0.81	0.10	-1.70	0.089	0.78	0.10	-1.93	0.054	0.85	0.11	-1.33	0.185	0.79	0.10	-1.87	0.062
DiseaseCondition [LOSS] × A					1.01	0.15	0.07	0.947	0.79	0.12	-1.54	0.124	0.73	0.12	-1.95	0.051	0.76	0.12	-1.70	0.089	0.80	0.12	-1.45	0.148
DiseaseCondition [LOSS] × C					1.17	0.08	2.39	0.017
DiseaseCondition [LOSS] × E					0.79	0.07	-2.58	0.010	0.84	0.08	-1.91	0.056	0.85	0.08	-1.73	0.083	0.84	0.08	-1.86	0.063	0.84	0.08	-1.83	0.067
Predicted C									0.66	0.07	-4.04	<0.001	0.61	0.07	-4.31	<0.001	0.71	0.10	-2.36	0.018	0.68	0.07	-3.52	<0.001
DiseaseCondition [LOSS] × Predicted C									1.77	0.24	4.18	<0.001	2.13	0.33	4.88	<0.001	2.06	0.40	3.69	<0.001	2.02	0.30	4.75	<0.001
Observations	5336				5336				5336				5336				5336				5336
AIC	6723.912				6573.688				6582.429				6576.921				6588.102				6579.211
log-Likelihood	-3343.956				-3260.844				-3265.214				-3262.460				-3268.051				-3263.605

Anova and Eta Square

calculate_deviance_explained.Disease <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for DiseaseCondition, Panel, and DiseaseCondition:Panel interaction
  dev_cond <- anova_res["DiseaseCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DiseaseCondition:Panel", "Deviance"]
  
  # Extract p-values
  p_cond <- anova_res["DiseaseCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DiseaseCondition:Panel", "Pr(>Chi)"]
  
  # Determine significance levels
  sig_cond <- ifelse(p_cond < 0.001, "***", 
               ifelse(p_cond < 0.01, "**", 
               ifelse(p_cond < 0.05, "*", 
               ifelse(p_cond < 0.1, ".", ""))))

  sig_panel <- ifelse(p_panel < 0.001, "***", 
                ifelse(p_panel < 0.01, "**", 
                ifelse(p_panel < 0.05, "*", 
                ifelse(p_panel < 0.1, ".", ""))))

  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total)
  perc_panel <- (dev_panel / dev_total)
  perc_interaction <- (dev_interaction / dev_total)
  
  # Combine percentage and significance
  res_cond <- paste0(round(perc_cond, 5), sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}


# Apply the function to each model
deviance_explained <- sapply(Disease_model, calculate_deviance_explained.Disease)

# Convert to a data frame for easy viewing
deviance_explained_df.Disease <- as.data.frame(t(deviance_explained))
colnames(deviance_explained_df.Disease) <- c("Condition","Panel", "DiseaseCondition:Panel")

# Display the results
kable(deviance_explained_df.Disease,caption = "Etasq Table: Disease Paradigm")

Etasq Table: Disease Paradigm
	Condition	Panel	DiseaseCondition:Panel
Disease.PAN	0.05826***	0.01261***	0.0093***
Disease.PAN_FACE	0.05826***	0.00366***	0.00557***
Disease.PAN_Pred_C.reg_FAE	0.05826***	0.00402***	0.00415***
Disease.PAN_Pred_C.net_FAE	0.05826***	0.00397***	0.00388***
Disease.PAN_Pred_C.rf_FAE	0.05826***	0.00381***	0.00395***
Disease.PAN_Pred_C.gbm_FAE	0.05826***	0.00384***	0.00385***

2.1.1.5.3 Less is More Paradigm

LessMore.PAN <- lm(numLessMore ~ LessMoreCondition * Panel, data = data.tst_MP1.US.gbm.C)
LessMore.PAN_FACE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_MP1.US.gbm.C)
LessMore.PAN_Pred_C.reg_FAE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_MP1.US.reg.C)
LessMore.PAN_Pred_C.net_FAE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.tst_MP1.US.net.C)
LessMore.PAN_Pred_C.rf_FAE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_MP1.US.rf.C)
LessMore.PAN_Pred_C.gbm_FAE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.tst_MP1.US.gbm.C)


LessMore_model <- list(
  "LessMore.PAN" = LessMore.PAN,
  "LessMore.PAN_FACE" = LessMore.PAN_FACE,
  "LessMore.PAN_Pred_C.reg_FAE" = LessMore.PAN_Pred_C.reg_FAE,
  "LessMore.PAN_Pred_C.net_FAE" = LessMore.PAN_Pred_C.net_FAE,
  "LessMore.PAN_Pred_C.rf_FAE" = LessMore.PAN_Pred_C.rf_FAE, 
  "LessMore.PAN_Pred_C.gbm_FAE" = LessMore.PAN_Pred_C.gbm_FAE
)

Model Results

term_names <- names(coef(LessMore.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

LessMore_result<-tab_model(LessMore.PAN, LessMore.PAN_FACE, LessMore.PAN_Pred_C.reg_FAE,LessMore.PAN_Pred_C.net_FAE,LessMore.PAN_Pred_C.rf_FAE,LessMore.PAN_Pred_C.gbm_FAE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.C(reg)_FAE", 
                        "PAN_Pred.C(net)_FAE", 
                        "PAN_Pred.C(rf)_FAE", 
                        "PAN_Pred.C(gbm)_FAE"), title = "LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
LessMore_result

LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.C(reg)_FAE				PAN_Pred.C(net)_FAE				PAN_Pred.C(rf)_FAE				PAN_Pred.C(gbm)_FAE
Predictors	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p
(Intercept)	5.37	0.07	79.64	<0.001	5.36	0.07	81.93	<0.001	5.37	0.07	82.30	<0.001	5.38	0.07	82.39	<0.001	5.34	0.06	82.15	<0.001	5.38	0.07	82.29	<0.001
LessMoreCondition [SCARF]	0.95	0.09	10.04	<0.001	0.99	0.09	10.82	<0.001	1.00	0.09	10.87	<0.001	0.99	0.09	10.86	<0.001	1.01	0.09	11.05	<0.001	1.00	0.09	10.90	<0.001
F					-0.11	0.05	-1.99	0.046	-0.16	0.05	-2.87	0.004	-0.16	0.05	-2.98	0.003	-0.17	0.05	-3.22	0.001	-0.16	0.05	-2.88	0.004
A					0.38	0.06	6.14	<0.001	0.29	0.07	4.40	<0.001	0.27	0.07	4.07	<0.001	0.18	0.07	2.64	0.008	0.31	0.06	4.89	<0.001
C					0.02	0.03	0.58	0.562
E					-0.00	0.04	-0.09	0.930	0.02	0.04	0.51	0.608	0.03	0.04	0.63	0.529	0.05	0.04	1.19	0.235	0.02	0.04	0.52	0.606
LessMoreCondition [SCARF] × F					0.28	0.08	3.67	<0.001	0.27	0.08	3.53	<0.001	0.26	0.08	3.39	0.001	0.27	0.07	3.57	<0.001	0.26	0.08	3.43	0.001
LessMoreCondition [SCARF] × A					0.45	0.09	5.04	<0.001	0.43	0.09	4.58	<0.001	0.41	0.09	4.37	<0.001	0.43	0.10	4.48	<0.001	0.42	0.09	4.61	<0.001
LessMoreCondition [SCARF] × C					-0.01	0.04	-0.14	0.886
LessMoreCondition [SCARF] × E					-0.12	0.06	-2.24	0.025	-0.13	0.06	-2.28	0.023	-0.13	0.06	-2.24	0.025	-0.14	0.06	-2.51	0.012	-0.13	0.06	-2.28	0.023
Predicted C									0.24	0.06	3.99	<0.001	0.29	0.07	4.28	<0.001	0.58	0.08	6.94	<0.001	0.24	0.06	3.79	<0.001
LessMoreCondition [SCARF] × Predicted C									0.02	0.08	0.25	0.799	0.06	0.09	0.64	0.524	-0.02	0.12	-0.15	0.884	0.06	0.09	0.64	0.520
Observations	5336				5336				5336				5336				5336				5336
R² / R² adjusted	0.173 / 0.170				0.224 / 0.220				0.229 / 0.226				0.231 / 0.227				0.238 / 0.234				0.229 / 0.226

Anova and Eta Square

calculate_variance_explained_LessMore <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for LessMoreCondition, Panel, and LessMoreCondition:Panel interaction
  ss_condition <- anova_res["LessMoreCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["LessMoreCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["LessMoreCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["LessMoreCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- ss_condition / ss_total
  perc_panel <- ss_panel / ss_total
  perc_interaction <- ss_interaction / ss_total
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}

variance_explained_lm <- sapply(LessMore_model, calculate_variance_explained_LessMore)

# Convert to a data frame for easy viewing
variance_explained_lm_df.LessMore <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.LessMore,caption = "Etasq Table: LessMore Paradigm")

Etasq Table: LessMore Paradigm
	Condition	Panel	LessMoreCondition:Panel
LessMore.PAN	0.12958***	0.03699***	0.00642***
LessMore.PAN_FACE	0.12958***	0.01349***	0.00148
LessMore.PAN_Pred_C.reg_FAE	0.12958***	0.01202***	0.00137
LessMore.PAN_Pred_C.net_FAE	0.12958***	0.01191***	0.00137
LessMore.PAN_Pred_C.rf_FAE	0.12958***	0.00983***	0.00136
LessMore.PAN_Pred_C.gbm_FAE	0.12958***	0.01089***	0.00142

2.1.1.5.4 Sunk Cost Paradigm

Sunk.PAN <- lm(numSunkCost~ SunkCondition * Panel, data = data.tst_MP1.US.gbm.C)
Sunk.PAN_FACE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_MP1.US.gbm.C)
Sunk.PAN_Pred_C.reg_FAE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E+SunkCondition * Panel, data = data.tst_MP1.US.reg.C)
Sunk.PAN_Pred_C.net_FAE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_MP1.US.net.C)
Sunk.PAN_Pred_C.rf_FAE <- lm(numSunkCost ~ SunkCondition * F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E  + SunkCondition * Panel, data = data.tst_MP1.US.rf.C)
Sunk.PAN_Pred_C.gbm_FAE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_MP1.US.gbm.C)


Sunk_model <- list(
  "Sunk.PAN" = Sunk.PAN,
  "Sunk.PAN_FACE" = Sunk.PAN_FACE,
  "Sunk.PAN_Pred_C.reg_FAE" = Sunk.PAN_Pred_C.reg_FAE,
  "Sunk.PAN_Pred_C.net_FAE" = Sunk.PAN_Pred_C.net_FAE,
  "Sunk.PAN_Pred_C.rf_FAE" = Sunk.PAN_Pred_C.rf_FAE, #0.302 with unscaled
  "Sunk.PAN_Pred_C.gbm_FAE" = Sunk.PAN_Pred_C.gbm_FAE
)

# extract_adjusted_r_squared(Sunk_model)

Full Model Results

term_names <- names(coef(Sunk.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Sunk_result<-tab_model(Sunk.PAN, Sunk.PAN_FACE, Sunk.PAN_Pred_C.reg_FAE,Sunk.PAN_Pred_C.net_FAE,Sunk.PAN_Pred_C.rf_FAE,Sunk.PAN_Pred_C.gbm_FAE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred_C(reg)_FAE", 
                        "PAN_Pred_C(net)_FAE", 
                        "PAN_Pred_C(rf)_FAE", 
                        "PAN_Pred_C(gbm)_FAE"), title = "Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Sunk_result

Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred_C(reg)_FAE				PAN_Pred_C(net)_FAE				PAN_Pred_C(rf)_FAE				PAN_Pred_C(gbm)_FAE
Predictors	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p
(Intercept)	5.46	0.14	39.29	<0.001	5.46	0.14	39.54	<0.001	5.47	0.14	39.55	<0.001	5.47	0.14	39.56	<0.001	5.46	0.14	39.57	<0.001	5.46	0.14	39.44	<0.001
SunkCondition [PAID]	0.70	0.20	3.48	0.001	0.70	0.20	3.52	<0.001	0.71	0.20	3.57	<0.001	0.72	0.20	3.61	<0.001	0.67	0.20	3.38	0.001	0.74	0.20	3.70	<0.001
F					0.32	0.11	2.82	0.005	0.34	0.12	2.95	0.003	0.33	0.12	2.87	0.004	0.37	0.11	3.23	0.001	0.39	0.12	3.37	0.001
A					0.59	0.14	4.33	<0.001	0.53	0.14	3.65	<0.001	0.51	0.15	3.48	0.001	0.58	0.15	3.90	<0.001	0.60	0.14	4.31	<0.001
C					0.10	0.06	1.71	0.087
E					0.20	0.08	2.32	0.021	0.21	0.09	2.50	0.013	0.22	0.09	2.54	0.011	0.20	0.09	2.35	0.019	0.19	0.09	2.27	0.023
SunkCondition [PAID] × F					0.18	0.16	1.10	0.270	0.12	0.16	0.76	0.450	0.12	0.16	0.71	0.479	0.05	0.16	0.34	0.737	0.04	0.16	0.27	0.790
SunkCondition [PAID] × A					0.02	0.19	0.11	0.914	-0.05	0.20	-0.23	0.814	-0.06	0.21	-0.30	0.766	-0.30	0.21	-1.42	0.156	-0.13	0.20	-0.67	0.501
SunkCondition [PAID] × C					-0.02	0.08	-0.18	0.854
SunkCondition [PAID] × E					-0.04	0.12	-0.31	0.753	-0.02	0.12	-0.18	0.860	-0.02	0.12	-0.15	0.883	0.03	0.12	0.21	0.837	0.01	0.12	0.04	0.965
Predicted C									0.11	0.13	0.90	0.371	0.16	0.14	1.11	0.266	-0.03	0.18	-0.14	0.888	-0.12	0.14	-0.85	0.397
SunkCondition [PAID] × Predicted C									0.18	0.18	1.03	0.304	0.23	0.20	1.14	0.256	0.89	0.26	3.45	0.001	0.54	0.19	2.79	0.005
Observations	5336				5336				5336				5336				5336				5336
R² / R² adjusted	0.025 / 0.022				0.040 / 0.035				0.040 / 0.035				0.040 / 0.036				0.043 / 0.038				0.040 / 0.036

Anova and Eta Square

calculate_variance_explained_Sunk <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for SunkCondition, Panel, and SunkCondition:Panel interaction
  ss_condition <- anova_res["SunkCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["SunkCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["SunkCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["SunkCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- (ss_condition / ss_total)
  perc_panel <- (ss_panel / ss_total)
  perc_interaction <- (ss_interaction / ss_total)
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}


variance_explained_lm <- sapply(Sunk_model, calculate_variance_explained_Sunk)

# Convert to a data frame for easy viewing
variance_explained_lm_df.SunkCost <- as.data.frame(t(variance_explained_lm))

colnames(variance_explained_lm_df.SunkCost) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.SunkCost,caption = "Etasq Table: SunkCost Paradigm")

Etasq Table: SunkCost Paradigm
	Condition	Panel	LessMoreCondition:Panel
Sunk.PAN	0.01083***	0.00971***	0.00447**
Sunk.PAN_FACE	0.01083***	0.00563***	0.00226
Sunk.PAN_Pred_C.reg_FAE	0.01083***	0.00644***	0.00215
Sunk.PAN_Pred_C.net_FAE	0.01083***	0.00659***	0.00215
Sunk.PAN_Pred_C.rf_FAE	0.01083***	0.00637***	0.00163
Sunk.PAN_Pred_C.gbm_FAE	0.01083***	0.00605***	0.0018

2.1.1.5.5 Summary

Model Fit

\(R^2\): \(R^2\) for linear regressions, and \(1 − \frac{\log L(\text{model})}{\log L(\text{null model})}\) for logistic regressions (as in the paper)
note that the y-axes for different paradigms are not aligned.

#following antonia's analysis here...
Default.null<-glm(numDefault~1,data.tst_MP1.US.reg.C,family=binomial)


R2.Default = as.data.frame(cbind( "R2" = c((1-logLik(Default.PAN)/logLik(Default.null)),
                    (1-logLik(Default.PAN_FACE)/logLik(Default.null)),  
                    (1-logLik(Default.PAN_Pred_C.reg_FAE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_C.net_FAE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_C.rf_FAE)/logLik(Default.null)),
                    (1-logLik(Default.PAN_Pred_C.gbm_FAE)/logLik(Default.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.C(reg)+FAE",  "Pan+Pred.C(net)+FAE" , "Pan+Pred.C(rf)+FAE","Pan+Pred.C(gbm)+FAE"), 
            "Paradigm" = "Default"))


Disease.null<-glm(numDisease~1,data.tst_MP1.US.reg.C,family=binomial)
R2.Disease = as.data.frame(cbind( "R2" = c((1-logLik(Disease.PAN)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_FACE)/logLik(Disease.null)),  
                    (1-logLik(Disease.PAN_Pred_C.reg_FAE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_C.net_FAE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_C.rf_FAE)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_Pred_C.gbm_FAE)/logLik(Disease.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.C(reg)+FAE",  "Pan+Pred.C(net)+FAE" , "Pan+Pred.C(rf)+FAE","Pan+Pred.C(gbm)+FAE"), 
            "Paradigm" = "Disease"))

R2.LessMore = as.data.frame(cbind( "R2" = c(summary(LessMore.PAN)$r.squared,
                     summary(LessMore.PAN_FACE)$r.squared,  
                     summary(LessMore.PAN_Pred_C.reg_FAE)$r.squared,  
                     summary(LessMore.PAN_Pred_C.net_FAE)$r.squared, 
                     summary(LessMore.PAN_Pred_C.rf_FAE)$r.squared,
                     summary(LessMore.PAN_Pred_C.gbm_FAE )$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.C(reg)+FAE",  "Pan+Pred.C(net)+FAE" , "Pan+Pred.C(rf)+FAE","Pan+Pred.C(gbm)+FAE"), 
            "Paradigm" = "LessMore"))

R2.Sunk = as.data.frame(cbind( "R2" = c(summary(Sunk.PAN)$r.squared,
                     summary(Sunk.PAN_FACE)$r.squared,  
                     summary(Sunk.PAN_Pred_C.reg_FAE)$r.squared,  
                     summary(Sunk.PAN_Pred_C.net_FAE)$r.squared, 
                     summary(Sunk.PAN_Pred_C.rf_FAE)$r.squared,
                     summary(Sunk.PAN_Pred_C.gbm_FAE)$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.C(reg)+FAE",  "Pan+Pred.C(net)+FAE" , "Pan+Pred.C(rf)+FAE","Pan+Pred.C(gbm)+FAE"), 
            "Paradigm" = "SunkCost"))



R2<-rbind(R2.Default,R2.Disease,R2.LessMore,R2.Sunk)%>%
  mutate(R2=round(as.numeric(R2),3))

R2$Model <- factor(R2$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.C(reg)+FAE",  
                                   "Pan+Pred.C(net)+FAE", 
                                   "Pan+Pred.C(rf)+FAE",
                                   "Pan+Pred.C(gbm)+FAE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(R2, aes(x = Model, y = R2, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(R^2)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

AIC.Default = as.data.frame(cbind( "AIC" = c(AIC(Default.PAN),
                    AIC(Default.PAN_FACE),  
                    AIC(Default.PAN_Pred_C.reg_FAE), 
                    AIC(Default.PAN_Pred_C.net_FAE), 
                    AIC(Default.PAN_Pred_C.rf_FAE),
                    AIC(Default.PAN_Pred_C.gbm_FAE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PC(reg)+FAE",  "Pan+PC(net)+FAE" , "Pan+PC(rf)+FAE","Pan+PC(gbm)+FAE"), 
            "Paradigm" = "Default"))



AIC.Disease = as.data.frame(cbind( "AIC" = c(AIC(Disease.PAN),
                    AIC(Disease.PAN_FACE),  
                    AIC(Disease.PAN_Pred_C.reg_FAE), 
                    AIC(Disease.PAN_Pred_C.net_FAE), 
                    AIC(Disease.PAN_Pred_C.rf_FAE),
                    AIC(Disease.PAN_Pred_C.gbm_FAE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PC(reg)+FAE",  "Pan+PC(net)+FAE" , "Pan+PC(rf)+FAE","Pan+PC(gbm)+FAE"), 
            "Paradigm" = "Disease"))


AIC<-rbind(AIC.Default,AIC.Disease)%>%
  mutate(AIC=round(as.numeric(AIC),5))

AIC$Model <- factor(AIC$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+PC(reg)+FAE",  
                                   "Pan+PC(net)+FAE", 
                                   "Pan+PC(rf)+FAE",
                                   "Pan+PC(gbm)+FAE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(AIC, aes(x = Model, y = AIC, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(AIC)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")+
  coord_cartesian(ylim = c(300, 400))

R2$R2 <- round(as.numeric(R2$R2), 3)

R2 <- R2 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement <- R2 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))

avg_improvemnt_combined <- rbind(avg_improvement, avg_improvement_full_study_5)

kable(avg_improvemnt_combined, caption = "Percentage Improvement in R2 (compared to PAN) Across Paradigms in the test sample", digits = 2)

Percentage Improvement in R2 (compared to PAN) Across Paradigms in the test sample
Model	Avg_Improvement
Pan	0.00
Pan+FACE	30.51
Pan+Pred.C(reg)+FAE	31.05
Pan+Pred.C(net)+FAE	31.65
Pan+Pred.C(rf)+FAE	35.18
Pan+Pred.C(gbm)+FAE	31.12
Pan+FACE (Full Study 5)	43.20

R2$R2 <- round(as.numeric(R2$R2), 3)

R2 <- R2 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement <- R2 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))


avg_improvemnt_combined <- rbind(avg_improvement, avg_improvement_full_study_5)

kable(avg_improvemnt_combined, caption = "Percentage Improvement in R2 (compared to PAN) Across Paradigms with Study 1 data", digits = 2)

Percentage Improvement in R2 (compared to PAN) Across Paradigms with Study 1 data
Model	Avg_Improvement
Pan	0.00
Pan+FACE	30.51
Pan+Pred.C(reg)+FAE	31.05
Pan+Pred.C(net)+FAE	31.65
Pan+Pred.C(rf)+FAE	35.18
Pan+Pred.C(gbm)+FAE	31.12
Pan+FACE (Full Study 5)	43.20

C*Cond Model Coefficients

DVs for Default & Framing are 1/0
DVs for LessMore & SunkCost are on the original scale

AlllCoef = as.data.frame(rbind(
cbind(coef(Default.PAN_FACE),confint(Default.PAN_FACE)),
cbind(coef(Default.PAN_Pred_C.reg_FAE),confint(Default.PAN_Pred_C.reg_FAE)),
cbind(coef(Default.PAN_Pred_C.net_FAE),confint(Default.PAN_Pred_C.net_FAE)),
cbind(coef(Default.PAN_Pred_C.rf_FAE),confint(Default.PAN_Pred_C.rf_FAE)),
cbind(coef(Default.PAN_Pred_C.gbm_FAE),confint(Default.PAN_Pred_C.gbm_FAE)),
cbind(coef(Default.PAN_FACE.Study5),confint(Default.PAN_FACE.Study5)),

cbind(coef(Disease.PAN_FACE),confint(Disease.PAN_FACE)),
cbind(coef(Disease.PAN_Pred_C.reg_FAE),confint(Disease.PAN_Pred_C.reg_FAE)),
cbind(coef(Disease.PAN_Pred_C.net_FAE),confint(Disease.PAN_Pred_C.net_FAE)),
cbind(coef(Disease.PAN_Pred_C.rf_FAE),confint(Disease.PAN_Pred_C.rf_FAE)),
cbind(coef(Disease.PAN_Pred_C.gbm_FAE),confint(Disease.PAN_Pred_C.gbm_FAE)),
cbind(coef(Disease.PAN_FACE.Study5),confint(Disease.PAN_FACE.Study5)),

cbind(coef(LessMore.PAN_FACE),confint(LessMore.PAN_FACE)),
cbind(coef(LessMore.PAN_Pred_C.reg_FAE),confint(LessMore.PAN_Pred_C.reg_FAE)),
cbind(coef(LessMore.PAN_Pred_C.net_FAE),confint(LessMore.PAN_Pred_C.net_FAE)),
cbind(coef(LessMore.PAN_Pred_C.rf_FAE),confint(LessMore.PAN_Pred_C.rf_FAE)),
cbind(coef(LessMore.PAN_Pred_C.gbm_FAE),confint(LessMore.PAN_Pred_C.gbm_FAE)),
cbind(coef(LessMore.PAN_FACE.Study5),confint(LessMore.PAN_FACE.Study5)),

cbind(coef(Sunk.PAN_FACE),confint(Sunk.PAN_FACE)),
cbind(coef(Sunk.PAN_Pred_C.reg_FAE),confint(Sunk.PAN_Pred_C.reg_FAE)),
cbind(coef(Sunk.PAN_Pred_C.net_FAE),confint(Sunk.PAN_Pred_C.net_FAE)),
cbind(coef(Sunk.PAN_Pred_C.rf_FAE),confint(Sunk.PAN_Pred_C.rf_FAE)),
cbind(coef(Sunk.PAN_Pred_C.gbm_FAE),confint(Sunk.PAN_Pred_C.gbm_FAE)),
cbind(coef(Sunk.PAN_FACE.Study5),confint(Sunk.PAN_FACE.Study5))))

AlllCoef <- AlllCoef[!grepl("Panel", rownames(AlllCoef)), ]

AlllCoef$type = factor(rep(c("Intercept", " Condition(Treat)","F", "A", "C", "E","F: Condition(Treat)", "A: Condition(Treat)", "C: Condition(Treat)", "E: Condition(Treat)"), 24))

AlllCoef$Paradigm = factor((rep(c("Default","Framing","Less is Better","Sunk Cost"), each=60)),levels = rev(c("Default","Framing","Less is Better","Sunk Cost")))

AlllCoef$Model = factor(rep(rep(c("FACE","PF(reg) + ACE","PF(net) + ACE","PF(rf) + ACE","PF(gbm) + ACE","FACE (Full Study 5)"), each = 10),4), levels = rev(c("FACE","PF(reg) + ACE","PF(net) + ACE","PF(rf) + ACE","PF(gbm) + ACE","FACE (Full Study 5)")))


colnames(AlllCoef)[1:3] = c("Beta","low", "high")

allcoefplot = ggplot(AlllCoef[(AlllCoef$type %in% c("C: Condition(Treat)")),], aes( y = Beta, ymin= low, ymax= high, x = Paradigm, fill = Model, color = Model))+
  geom_bar(stat = "identity", position = "dodge",alpha= .25)+
  geom_pointrange(position = position_dodge(width = .8))+
  coord_flip()+
  xlab("")+
  theme_bw()+
  theme(strip.background = element_blank(), text = element_text(size = 12)) +
  facet_grid(~type)

allcoefplot

Etasq

note that the y-axes for different paradigms are not aligned..
We have observed a slight improvement in explaining heterogeneity across all methods, compared to what we initially had. I think this is due to our predictors being more refined: For example, we used longitude and latitude info to swap “minutes since midnight (EST)” to “clock time.

# label paradigm
deviance_explained_df.Default$Paradigm <- "Default"
deviance_explained_df.Disease$Paradigm <- "Disease"
variance_explained_lm_df.LessMore$Paradigm <- "LessMore"
variance_explained_lm_df.SunkCost$Paradigm <- "SunkCost"

Model <- c("Pan", "Pan+FACE", "Pan+Pred.C(reg)+FAE", "Pan+Pred.C(net)+FAE", "Pan+Pred.C(rf)+FAE", "Pan+Pred.C(gbm)+FAE")

deviance_explained_df.Default$Model<-Model
deviance_explained_df.Disease$Model<-Model
variance_explained_lm_df.LessMore$Model<-Model
variance_explained_lm_df.SunkCost$Model<-Model

colnames(deviance_explained_df.Default)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost)[3] <- "Condition:Panel"

etasq <- rbind(deviance_explained_df.Default, 
               deviance_explained_df.Default.Study5,
               deviance_explained_df.Disease, 
               deviance_explained_df.Disease.Study5,
               variance_explained_lm_df.LessMore, 
               variance_explained_lm_df.LessMore.Study5,
               variance_explained_lm_df.SunkCost,
               variance_explained_lm_df.SunkCost.Study5)

etasq$Model <- factor(etasq$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.C(reg)+FAE",  
                                   "Pan+Pred.C(net)+FAE", 
                                   "Pan+Pred.C(rf)+FAE",
                                   "Pan+Pred.C(gbm)+FAE",
                                   "Pan (Full Study5)", 
                                   "Pan+FACE (Full Study5)"),
                        ordered = TRUE)


split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$Panel)  # Extract the significance symbols
  df$Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$Panel))
  return(df)
}

etasq.pan<-split_value_and_sig(etasq)

ggplot(etasq.pan, aes(x = Model, y = Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$`Condition:Panel`)  # Extract the significance symbols
  df$Condition_Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$`Condition:Panel`))
  return(df)
}

etasq.pan_cond<-split_value_and_sig(etasq)

ggplot(etasq.pan_cond, aes(x = Model, y = Condition_Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel*cond") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

# junk data??? really unstable;; perhaps to look at this meaningfully, we need 20% of the data to equally come from each of the 3 panels.
 # table(data.tst_M5a$Panel)
 #  it looks fine though
 #   Lucid    MTurk Prolific 
 #     100      101       91 


etasq_study5 <- etasq.pan_cond %>%
  filter(Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))

etasq_study5 <- etasq_study5 %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan (Full Study5)"] - Condition_Panel) / Condition_Panel[Model == "Pan (Full Study5)"] * 100)


etasq_study1 <- etasq.pan_cond %>%
  filter(!Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))
etasq_study1 <- etasq_study1 %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan"] - Condition_Panel) / Condition_Panel[Model == "Pan"] * 100)

etasq.pan_cond<-rbind(etasq_study5,etasq_study1)

avg_reduction <- etasq.pan_cond %>%
  group_by(Model) %>%
  summarize(Avg_Reduction = mean(Reduction, na.rm = TRUE))

  
kable(avg_reduction, caption = "Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in study 1", digits = 2)

Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in study 1
Model	Avg_Reduction
Pan	0.00
Pan+FACE	56.30
Pan+Pred.C(reg)+FAE	64.79
Pan+Pred.C(net)+FAE	65.42
Pan+Pred.C(rf)+FAE	65.95
Pan+Pred.C(gbm)+FAE	65.40
Pan (Full Study5)	0.00
Pan+FACE (Full Study5)	74.19

3. Predict both F and C

3.0 Evaluate Accuracy with Study 5 held-out Data

data.tst_M5a.reg.FC<-cbind(data.tst_M5a.reg.C, Predicted_F = pred_M5a_reg)
data.tst_M5a.net.FC<-cbind(data.tst_M5a.net.C, Predicted_F = pred_M5a_net)
data.tst_M5a.rf.FC<-cbind(data.tst_M5a.rf.C, Predicted_F = pred_M5a_rf)
data.tst_M5a.gbm.FC<-cbind(data.tst_M5a.gbm.C, Predicted_F = pred_M5a_gbm)
data.tst_M5a.gbm.FC$rotsum<-data.tst_M5a.gbm$rotsum
data.tst_M5a.gbm.FC$MXsum<-data.tst_M5a.gbm$MXsum
data.tst_M5a.gbm.FC$crt2_score<-data.tst_M5a.gbm$crt2_score

Correlations Btw PF5, PC5, and Study 5 FACE

Linear Regression

ggpairs(data.tst_M5a.reg.FC, c("Predicted_F","F","A","C", "E","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Elastic Net

ggpairs(data.tst_M5a.net.FC, c("Predicted_F","F","A","C", "E","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Random Forest

ggpairs(data.tst_M5a.rf.FC, c("Predicted_F","F","A","C", "E","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Gradient Boosting

ggpairs(data.tst_M5a.gbm.FC, c("Predicted_F","F","A","C", "E","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

GBM: Correlation between PF, PC, and F and C’s composite measures

ggpairs(data.tst_M5a.gbm.FC, c("Predicted_F","ATTN1__Correct","crt2_score", "rotsum", "MXsum","Antonym","Synonym","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Explain Heterogeniety with PF5 and PC5

Default Paradigm

Default.PAN <- glm(numDefault ~ DefaultCondition * Panel, data = data.tst_M5a.gbm.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_FACE <- glm(numDefault ~ DefaultCondition * F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel , data = data.tst_M5a.gbm.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_FC.reg_AE <- glm(numDefault ~ DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_M5a.reg.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_FC.net_AE <- glm(numDefault ~  DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_M5a.net.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_FC.rf_AE <- glm(numDefault ~   DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_M5a.rf.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_FC.gbm_AE <- glm(numDefault ~   DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_M5a.gbm.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)


Default_model <- list(
  "Default.PAN" = Default.PAN,
  "Default.PAN_FACE" = Default.PAN_FACE,
  "Default.PAN_Pred_FC.reg_AE" = Default.PAN_Pred_FC.reg_AE,
  "Default.PAN_Pred_FC.net_AE" = Default.PAN_Pred_FC.net_AE,
  "Default.PAN_Pred_FC.rf_AE" = Default.PAN_Pred_FC.rf_AE,
  "Default.PAN_Pred_FC.gbm_AE" = Default.PAN_Pred_FC.gbm_AE
)

Model Results

term_names <- names(coef(Default.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Default_result<-tab_model(Default.PAN, Default.PAN_FACE, Default.PAN_Pred_FC.reg_AE,Default.PAN_Pred_FC.net_AE,Default.PAN_Pred_FC.rf_AE,Default.PAN_Pred_FC.gbm_AE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.FC(reg)_AE", 
                        "PAN_Pred.FC(net)_AE", 
                        "PAN_Pred.FC(rf)_AE", 
                        "PAN_Pred.FC(gbm)_AE"), title = "Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Default_result

Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.FC(reg)_AE				PAN_Pred.FC(net)_AE				PAN_Pred.FC(rf)_AE				PAN_Pred.FC(gbm)_AE
Predictors	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p
(Intercept)	0.96	0.28	-0.15	0.884	0.94	0.28	-0.21	0.837	0.90	0.29	-0.33	0.739	0.87	0.29	-0.43	0.666	0.88	0.28	-0.39	0.696	0.92	0.29	-0.25	0.803
DefaultCondition [OPTOUT]	1.41	0.58	0.83	0.409	1.44	0.61	0.86	0.391	1.55	0.73	0.93	0.352	1.62	0.78	0.99	0.320	1.59	0.74	1.00	0.317	1.38	0.63	0.71	0.477
F					0.87	0.35	-0.35	0.725
A					0.85	0.17	-0.79	0.431	0.72	0.15	-1.54	0.124	0.76	0.16	-1.29	0.196	0.79	0.16	-1.16	0.248	0.80	0.16	-1.10	0.271
C					1.69	0.58	1.53	0.127
E					0.88	0.16	-0.72	0.474	0.91	0.15	-0.58	0.560	0.91	0.15	-0.59	0.553	0.88	0.14	-0.79	0.427	0.87	0.15	-0.81	0.419
DefaultCondition [OPTOUT] × F					1.35	0.84	0.49	0.627
DefaultCondition [OPTOUT] × A					1.07	0.31	0.24	0.814	1.28	0.39	0.81	0.418	1.21	0.37	0.63	0.527	1.07	0.33	0.23	0.817	1.17	0.35	0.51	0.610
DefaultCondition [OPTOUT] × C					0.68	0.34	-0.77	0.443
DefaultCondition [OPTOUT] × E					1.68	0.50	1.74	0.082	1.48	0.38	1.54	0.122	1.47	0.37	1.52	0.129	1.51	0.38	1.63	0.102	1.58	0.41	1.76	0.078
Predicted F									0.97	0.34	-0.09	0.927	1.02	0.40	0.04	0.968	0.83	0.31	-0.49	0.622	0.78	0.30	-0.64	0.522
Predicted C									2.60	1.30	1.92	0.055	2.22	1.28	1.38	0.167	2.86	1.63	1.84	0.066	2.81	1.37	2.12	0.034
DefaultCondition [OPTOUT] × Predicted F									0.82	0.47	-0.34	0.732	0.77	0.50	-0.41	0.682	0.72	0.43	-0.56	0.578	1.33	0.83	0.45	0.654
DefaultCondition [OPTOUT] × Predicted C									0.57	0.43	-0.74	0.457	0.68	0.61	-0.43	0.670	1.02	0.93	0.02	0.983	0.41	0.31	-1.18	0.239
Observations	292				292				292				292				292				292
AIC	389.079				395.399				392.482				395.287				393.751				394.135
log-Likelihood	-188.540				-183.699				-182.241				-183.644				-182.876				-183.067

Anova and Eta Square

calculate_deviance_explained.Default <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for Panel and DefaultCondition:Panel interaction
  dev_cond <- anova_res["DefaultCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DefaultCondition:Panel", "Deviance"]
  
  p_cond <- anova_res["DefaultCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DefaultCondition:Panel", "Pr(>Chi)"]
  
sig_cond <- ifelse(p_cond < 0.001, "***", 
             ifelse(p_cond < 0.01, "**", 
             ifelse(p_cond < 0.05, "*", 
             ifelse(p_cond < 0.1, ".", ""))))

sig_panel <- ifelse(p_panel < 0.001, "***", 
              ifelse(p_panel < 0.01, "**", 
              ifelse(p_panel < 0.05, "*", 
              ifelse(p_panel < 0.1, ".", ""))))

sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                   ifelse(p_interaction < 0.01, "**", 
                   ifelse(p_interaction < 0.05, "*", 
                   ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total) 
  perc_panel <- (dev_panel / dev_total) 
  perc_interaction <- (dev_interaction / dev_total) 
  
  res_cond <- paste0(round(perc_cond , 5),  sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}

# apply the fucntion to all the models in default paradigm
deviance_explained <- sapply(Default_model, calculate_deviance_explained.Default)

# Convert to a data frame for easy viewing
deviance_explained_df.Default <- as.data.frame(t(deviance_explained)) # view how this looks like
colnames(deviance_explained_df.Default) <- c("Condition","Panel", "Default Condition:Panel")

# display
kable(deviance_explained_df.Default,caption = "Etasq Table: Default Paradigm")

Etasq Table: Default Paradigm
	Condition	Panel	Default Condition:Panel
Default.PAN	0.02968***	0.0095	0.02393**
Default.PAN_FACE	0.02968***	0.00305	0.00314
Default.PAN_Pred_FC.reg_AE	0.02968***	0.00372	0.00401
Default.PAN_Pred_FC.net_AE	0.02968***	0.0039	0.00409
Default.PAN_Pred_FC.rf_AE	0.02968***	0.0028	0.00517
Default.PAN_Pred_FC.gbm_AE	0.02968***	0.00252	0.00168

Framing (Unusual Disease) Paradigm

Disease.PAN <- glm(numDisease ~ DiseaseCondition * Panel, data = data.tst_M5a.gbm.FC, family = binomial)
Disease.PAN_FACE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E+DiseaseCondition * Panel , data = data.tst_M5a.gbm.FC, family = binomial)

Disease.PAN_Pred_FC.reg_AE <- glm(numDisease ~  DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_M5a.reg.FC, family = binomial)

Disease.PAN_Pred_FC.net_AE <- glm(numDisease ~   DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_M5a.net.FC, family = binomial)

Disease.PAN_Pred_FC.rf_AE <- glm(numDisease ~   DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_M5a.rf.FC, family = binomial)

Disease.PAN_Pred_FC.gbm_AE <- glm(numDisease ~  DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel , data = data.tst_M5a.gbm.FC, family = binomial)


Disease_model <- list(
  "Disease.PAN" = Disease.PAN,
  "Disease.PAN_FACE" = Disease.PAN_FACE,
  "Disease.PAN_Pred_FC.reg_AE" = Disease.PAN_Pred_FC.reg_AE,
  "Disease.PAN_Pred_FC.net_AE" = Disease.PAN_Pred_FC.net_AE,
  "Disease.PAN_Pred_FC.rf_AE" = Disease.PAN_Pred_FC.rf_AE, 
  "Disease.PAN_Pred_FC.gbm_AE" = Disease.PAN_Pred_FC.gbm_AE
)

# extract_aic(Disease_model)

Model Results

term_names <- names(coef(Disease.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Disease_result<-tab_model(Disease.PAN, Disease.PAN_FACE, Disease.PAN_Pred_FC.reg_AE,Disease.PAN_Pred_FC.net_AE,Disease.PAN_Pred_FC.rf_AE,Disease.PAN_Pred_FC.gbm_AE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.FC(reg)_AE", 
                        "PAN_Pred.FC(net)_AE", 
                        "PAN_Pred.FC(rf)_AE", 
                        "PAN_Pred.FC(gbm)_AE"), title = "Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Disease_result

Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.FC(reg)_AE				PAN_Pred.FC(net)_AE				PAN_Pred.FC(rf)_AE				PAN_Pred.FC(gbm)_AE
Predictors	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p
(Intercept)	0.48	0.13	-2.67	0.008	0.44	0.13	-2.80	0.005	0.36	0.12	-3.08	0.002	0.35	0.12	-3.05	0.002	0.43	0.14	-2.66	0.008	0.41	0.13	-2.80	0.005
DiseaseCondition [LOSS]	4.04	1.82	3.09	0.002	4.19	1.97	3.04	0.002	4.29	2.25	2.79	0.005	4.31	2.32	2.71	0.007	3.24	1.66	2.30	0.022	3.52	1.78	2.49	0.013
F					2.42	1.16	1.84	0.065
A					1.19	0.23	0.90	0.369	1.20	0.24	0.90	0.370	1.25	0.25	1.09	0.275	1.29	0.26	1.25	0.212	1.24	0.24	1.07	0.287
C					0.41	0.17	-2.20	0.028
E					1.07	0.21	0.33	0.745	0.95	0.16	-0.33	0.745	0.93	0.16	-0.40	0.690	0.94	0.16	-0.34	0.733	0.95	0.16	-0.28	0.780
DiseaseCondition [LOSS] × F					0.92	0.64	-0.12	0.902
DiseaseCondition [LOSS] × A					1.49	0.50	1.17	0.243	1.69	0.59	1.53	0.127	1.63	0.56	1.42	0.154	1.53	0.53	1.24	0.216	1.55	0.52	1.29	0.196
DiseaseCondition [LOSS] × C					1.17	0.65	0.28	0.780
DiseaseCondition [LOSS] × E					0.77	0.24	-0.86	0.392	0.74	0.20	-1.11	0.265	0.75	0.20	-1.06	0.289	0.77	0.20	-1.00	0.315	0.78	0.21	-0.93	0.353
Predicted F									2.62	1.12	2.26	0.024	3.16	1.56	2.33	0.020	2.21	0.97	1.80	0.072	2.54	1.19	1.99	0.047
Predicted C									0.32	0.17	-2.14	0.032	0.21	0.14	-2.29	0.022	0.29	0.19	-1.93	0.054	0.29	0.16	-2.22	0.027
DiseaseCondition [LOSS] × Predicted F									0.69	0.44	-0.59	0.558	0.62	0.44	-0.67	0.503	1.05	0.68	0.07	0.945	0.93	0.66	-0.10	0.920
DiseaseCondition [LOSS] × Predicted C									1.18	0.95	0.21	0.836	1.57	1.54	0.46	0.644	1.13	1.12	0.13	0.899	1.22	1.01	0.24	0.812
Observations	292				292				292				292				292				292
AIC	363.491				356.242				356.789				356.320				357.827				356.696
log-Likelihood	-175.745				-164.121				-164.395				-164.160				-164.914				-164.348

Anova and Eta Square

calculate_deviance_explained.Disease <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for DiseaseCondition, Panel, and DiseaseCondition:Panel interaction
  dev_cond <- anova_res["DiseaseCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DiseaseCondition:Panel", "Deviance"]
  
  # Extract p-values
  p_cond <- anova_res["DiseaseCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DiseaseCondition:Panel", "Pr(>Chi)"]
  
  # Determine significance levels
  sig_cond <- ifelse(p_cond < 0.001, "***", 
               ifelse(p_cond < 0.01, "**", 
               ifelse(p_cond < 0.05, "*", 
               ifelse(p_cond < 0.1, ".", ""))))

  sig_panel <- ifelse(p_panel < 0.001, "***", 
                ifelse(p_panel < 0.01, "**", 
                ifelse(p_panel < 0.05, "*", 
                ifelse(p_panel < 0.1, ".", ""))))

  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total)
  perc_panel <- (dev_panel / dev_total)
  perc_interaction <- (dev_interaction / dev_total)
  
  # Combine percentage and significance
  res_cond <- paste0(round(perc_cond, 5), sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}


# Apply the function to each model
deviance_explained <- sapply(Disease_model, calculate_deviance_explained.Disease)

# Convert to a data frame for easy viewing
deviance_explained_df.Disease <- as.data.frame(t(deviance_explained))
colnames(deviance_explained_df.Disease) <- c("Condition","Panel", "DiseaseCondition:Panel")

# Display the results
kable(deviance_explained_df.Disease,caption = "Etasq Table: Disease Paradigm")

Etasq Table: Disease Paradigm
	Condition	Panel	DiseaseCondition:Panel
Disease.PAN	0.03974***	0.03523**	0.01646*
Disease.PAN_FACE	0.03974***	0.0196*	0.00098
Disease.PAN_Pred_FC.reg_AE	0.03974***	0.01486.	0.00148
Disease.PAN_Pred_FC.net_AE	0.03974***	0.01624*	0.00126
Disease.PAN_Pred_FC.rf_AE	0.03974***	0.01595*	0.00046
Disease.PAN_Pred_FC.gbm_AE	0.03974***	0.01836*	0.00056

Less is More Paradigm

LessMore.PAN <- lm(numLessMore ~ LessMoreCondition * Panel, data = data.tst_M5a.gbm.FC)
LessMore.PAN_FACE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_M5a.gbm.FC)
LessMore.PAN_Pred_FC.reg_AE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_M5a.reg.FC)
LessMore.PAN_Pred_FC.net_AE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.tst_M5a.net.FC)
LessMore.PAN_Pred_FC.rf_AE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_M5a.rf.FC)
LessMore.PAN_Pred_FC.gbm_AE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.tst_M5a.gbm.FC)


LessMore_model <- list(
  "LessMore.PAN" = LessMore.PAN,
  "LessMore.PAN_FACE" = LessMore.PAN_FACE,
  "LessMore.PAN_Pred_FC.reg_AE" = LessMore.PAN_Pred_FC.reg_AE,
  "LessMore.PAN_Pred_FC.net_AE" = LessMore.PAN_Pred_FC.net_AE,
  "LessMore.PAN_Pred_FC.rf_AE" = LessMore.PAN_Pred_FC.rf_AE, 
  "LessMore.PAN_Pred_FC.gbm_AE" = LessMore.PAN_Pred_FC.gbm_AE
)

Model Results

term_names <- names(coef(LessMore.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

LessMore_result<-tab_model(LessMore.PAN, LessMore.PAN_FACE, LessMore.PAN_Pred_FC.reg_AE,LessMore.PAN_Pred_FC.net_AE,LessMore.PAN_Pred_FC.rf_AE,LessMore.PAN_Pred_FC.gbm_AE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.FC(reg)_AE", 
                        "PAN_Pred.FC(net)_AE", 
                        "PAN_Pred.FC(rf)_AE", 
                        "PAN_Pred.FC(gbm)_AE"), title = "LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
LessMore_result

LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.FC(reg)_AE				PAN_Pred.FC(net)_AE				PAN_Pred.FC(rf)_AE				PAN_Pred.FC(gbm)_AE
Predictors	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p
(Intercept)	5.51	0.21	25.98	<0.001	5.52	0.21	26.19	<0.001	5.51	0.24	23.16	<0.001	5.48	0.24	22.38	<0.001	5.50	0.23	23.69	<0.001	5.57	0.23	24.49	<0.001
LessMoreCondition [SCARF]	0.76	0.29	2.65	0.009	0.83	0.29	2.89	0.004	0.75	0.31	2.39	0.017	0.78	0.32	2.41	0.017	0.69	0.31	2.24	0.026	0.67	0.30	2.19	0.029
F					-0.03	0.25	-0.11	0.913
A					-0.26	0.14	-1.88	0.062	-0.27	0.14	-1.88	0.062	-0.23	0.14	-1.64	0.102	-0.24	0.14	-1.67	0.096	-0.23	0.14	-1.63	0.105
C					0.16	0.21	0.77	0.442
E					-0.02	0.12	-0.17	0.863	-0.02	0.10	-0.21	0.832	-0.02	0.10	-0.22	0.826	-0.02	0.10	-0.23	0.818	-0.06	0.10	-0.54	0.591
LessMoreCondition [SCARF] × F					0.38	0.40	0.95	0.345
LessMoreCondition [SCARF] × A					0.51	0.18	2.78	0.006	0.55	0.19	2.91	0.004	0.48	0.19	2.54	0.012	0.45	0.19	2.36	0.019	0.47	0.19	2.52	0.012
LessMoreCondition [SCARF] × C					-0.24	0.32	-0.75	0.452
LessMoreCondition [SCARF] × E					0.05	0.18	0.28	0.780	-0.03	0.15	-0.17	0.867	-0.02	0.15	-0.12	0.906	-0.01	0.15	-0.06	0.953	0.04	0.15	0.25	0.803
Predicted F									-0.02	0.25	-0.07	0.945	0.12	0.28	0.43	0.668	0.09	0.26	0.33	0.739	-0.15	0.27	-0.57	0.569
Predicted C									0.16	0.31	0.51	0.611	-0.12	0.38	-0.32	0.746	-0.07	0.37	-0.18	0.858	0.15	0.30	0.48	0.629
LessMoreCondition [SCARF] × Predicted F									0.33	0.36	0.92	0.360	0.12	0.41	0.29	0.776	0.40	0.36	1.10	0.274	0.64	0.39	1.64	0.103
LessMoreCondition [SCARF] × Predicted C									-0.42	0.47	-0.88	0.381	0.15	0.58	0.26	0.798	-0.03	0.57	-0.05	0.960	-0.41	0.48	-0.84	0.400
Observations	292				292				292				292				292				292
R² / R² adjusted	0.208 / 0.194				0.249 / 0.214				0.246 / 0.211				0.247 / 0.212				0.257 / 0.222				0.252 / 0.217

Anova and Eta Square

calculate_variance_explained_LessMore <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for LessMoreCondition, Panel, and LessMoreCondition:Panel interaction
  ss_condition <- anova_res["LessMoreCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["LessMoreCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["LessMoreCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["LessMoreCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- ss_condition / ss_total
  perc_panel <- ss_panel / ss_total
  perc_interaction <- ss_interaction / ss_total
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}

variance_explained_lm <- sapply(LessMore_model, calculate_variance_explained_LessMore)

# Convert to a data frame for easy viewing
variance_explained_lm_df.LessMore <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.LessMore,caption = "Etasq Table: LessMore Paradigm")

Etasq Table: LessMore Paradigm
	Condition	Panel	LessMoreCondition:Panel
LessMore.PAN	0.08314***	0.12202***	0.00272
LessMore.PAN_FACE	0.08314***	0.04611***	0.00099
LessMore.PAN_Pred_FC.reg_AE	0.08314***	0.05379***	0.00016
LessMore.PAN_Pred_FC.net_AE	0.08314***	0.05187***	0.00105
LessMore.PAN_Pred_FC.rf_AE	0.08314***	0.04951***	0.00117
LessMore.PAN_Pred_FC.gbm_AE	0.08314***	0.04768***	0.00108

Sunk Cost Paradigm

Sunk.PAN <- lm(numSunkCost~ SunkCondition * Panel, data = data.tst_M5a.gbm.FC)
Sunk.PAN_FACE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_M5a.gbm.FC)
Sunk.PAN_Pred_FC.reg_AE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E+SunkCondition * Panel, data = data.tst_M5a.reg.FC)
Sunk.PAN_Pred_FC.net_AE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_M5a.net.FC)
Sunk.PAN_Pred_FC.rf_AE <- lm(numSunkCost ~ SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E  + SunkCondition * Panel, data = data.tst_M5a.rf.FC)
Sunk.PAN_Pred_FC.gbm_AE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_M5a.gbm.FC)


Sunk_model <- list(
  "Sunk.PAN" = Sunk.PAN,
  "Sunk.PAN_FACE" = Sunk.PAN_FACE,
  "Sunk.PAN_Pred_FC.reg_ACE" = Sunk.PAN_Pred_FC.reg_AE,
  "Sunk.PAN_Pred_FC.net_ACE" = Sunk.PAN_Pred_FC.net_AE,
  "Sunk.PAN_Pred_FC.rf_ACE" = Sunk.PAN_Pred_FC.rf_AE, #0.302 with unscaled
  "Sunk.PAN_Pred_FC.gbm_ACE" = Sunk.PAN_Pred_FC.gbm_AE
)

# extract_adjusted_r_squared(Sunk_model)

Full Model Results

term_names <- names(coef(Sunk.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Sunk_result<-tab_model(Sunk.PAN, Sunk.PAN_FACE, Sunk.PAN_Pred_FC.reg_AE,Sunk.PAN_Pred_FC.net_AE,Sunk.PAN_Pred_FC.rf_AE,Sunk.PAN_Pred_FC.gbm_AE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred_FC(reg)_AE", 
                        "PAN_Pred_FC(net)_AE", 
                        "PAN_Pred_FC(rf)_AE", 
                        "PAN_Pred_FC(gbm)_AE"), title = "Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Sunk_result

Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred_FC(reg)_AE				PAN_Pred_FC(net)_AE				PAN_Pred_FC(rf)_AE				PAN_Pred_FC(gbm)_AE
Predictors	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p
(Intercept)	5.51	0.36	15.18	<0.001	5.51	0.36	15.31	<0.001	5.50	0.41	13.46	<0.001	5.51	0.42	13.07	<0.001	5.64	0.41	13.80	<0.001	5.54	0.40	13.80	<0.001
SunkCondition [PAID]	0.14	0.54	0.26	0.792	0.06	0.53	0.12	0.906	-0.17	0.59	-0.29	0.771	-0.17	0.60	-0.28	0.783	0.02	0.59	0.04	0.969	0.01	0.58	0.02	0.985
F					0.60	0.49	1.21	0.226
A					0.22	0.24	0.93	0.355	0.08	0.25	0.32	0.749	0.12	0.25	0.45	0.650	0.10	0.26	0.39	0.695	0.15	0.25	0.59	0.557
C					-1.07	0.42	-2.52	0.012
E					0.07	0.24	0.29	0.771	-0.02	0.20	-0.11	0.913	-0.03	0.20	-0.13	0.894	-0.09	0.20	-0.45	0.656	-0.04	0.21	-0.17	0.863
SunkCondition [PAID] × F					0.53	0.73	0.72	0.469
SunkCondition [PAID] × A					-0.50	0.34	-1.46	0.144	-0.22	0.35	-0.63	0.528	-0.24	0.35	-0.68	0.499	-0.30	0.36	-0.83	0.409	-0.33	0.35	-0.93	0.351
SunkCondition [PAID] × C					0.05	0.60	0.08	0.937
SunkCondition [PAID] × E					0.05	0.33	0.15	0.883	-0.07	0.28	-0.24	0.811	-0.04	0.28	-0.16	0.874	-0.00	0.28	-0.01	0.992	-0.05	0.29	-0.16	0.870
Predicted F									-0.04	0.45	-0.09	0.925	0.02	0.52	0.03	0.976	-0.51	0.48	-1.06	0.289	0.01	0.51	0.02	0.986
Predicted C									0.17	0.62	0.27	0.787	-0.02	0.76	-0.02	0.981	0.58	0.74	0.78	0.435	-0.13	0.62	-0.21	0.832
SunkCondition [PAID] × Predicted F									0.98	0.66	1.47	0.143	1.00	0.76	1.33	0.186	0.41	0.69	0.60	0.550	0.31	0.73	0.43	0.669
SunkCondition [PAID] × Predicted C									-1.68	0.89	-1.90	0.059	-1.77	1.08	-1.64	0.102	-0.46	1.09	-0.42	0.676	-0.51	0.91	-0.56	0.575
Observations	292				292				292				292				292				292
R² / R² adjusted	0.040 / 0.023				0.086 / 0.043				0.063 / 0.019				0.062 / 0.018				0.048 / 0.003				0.047 / 0.003

Anova and Eta Square

calculate_variance_explained_Sunk <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for SunkCondition, Panel, and SunkCondition:Panel interaction
  ss_condition <- anova_res["SunkCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["SunkCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["SunkCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["SunkCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- (ss_condition / ss_total)
  perc_panel <- (ss_panel / ss_total)
  perc_interaction <- (ss_interaction / ss_total)
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}


variance_explained_lm <- sapply(Sunk_model, calculate_variance_explained_Sunk)

# Convert to a data frame for easy viewing
variance_explained_lm_df.SunkCost <- as.data.frame(t(variance_explained_lm))

colnames(variance_explained_lm_df.SunkCost) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.SunkCost,caption = "Etasq Table: SunkCost Paradigm")

Etasq Table: SunkCost Paradigm
	Condition	Panel	LessMoreCondition:Panel
Sunk.PAN	0.00467	0.02588*	0.00922
Sunk.PAN_FACE	0.00467	0.01253	0.0039
Sunk.PAN_Pred_FC.reg_ACE	0.00467	0.01676.	0.01312
Sunk.PAN_Pred_FC.net_ACE	0.00467	0.01768.	0.01463
Sunk.PAN_Pred_FC.rf_ACE	0.00467	0.0229*	0.00722
Sunk.PAN_Pred_FC.gbm_ACE	0.00467	0.01779.	0.00956

Summary

Model Fit

\(R^2\): \(R^2\) for linear regressions, and \(1 − \frac{\log L(\text{model})}{\log L(\text{null model})}\) for logistic regressions (as in the paper)
Model order: consistently (1) PAN only; (2) PAN + FACE; (3) PAN + Pred_C(reg)ACE; (4) PAN + Pred_C(net)ACE; (5) PAN + Pred_C(rf)ACE; (6) PAN + Pred_C(gbm)ACE
note that the y-axes for different paradigms are not aligned.

#following antonia's analysis here...
Default.null<-glm(numDefault~1,data.tst_M5a.gbm.FC,family=binomial)

R2.Default = as.data.frame(cbind( "R2" = c((1-logLik(Default.PAN)/logLik(Default.null)),
                    (1-logLik(Default.PAN_FACE)/logLik(Default.null)),  
                    (1-logLik(Default.PAN_Pred_FC.reg_AE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_FC.net_AE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_FC.rf_AE)/logLik(Default.null)),
                    (1-logLik(Default.PAN_Pred_FC.gbm_AE)/logLik(Default.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "Default"))


Disease.null<-glm(numDisease~1,data.tst_M5a.gbm.FC,family=binomial)
R2.Disease = as.data.frame(cbind( "R2" = c((1-logLik(Disease.PAN)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_FACE)/logLik(Disease.null)),  
                    (1-logLik(Disease.PAN_Pred_FC.reg_AE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_FC.net_AE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_FC.rf_AE)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_Pred_FC.gbm_AE)/logLik(Disease.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "Disease"))

R2.LessMore = as.data.frame(cbind( "R2" = c(summary(LessMore.PAN)$r.squared,
                     summary(LessMore.PAN_FACE)$r.squared,  
                     summary(LessMore.PAN_Pred_FC.reg_AE)$r.squared,  
                     summary(LessMore.PAN_Pred_FC.net_AE)$r.squared, 
                     summary(LessMore.PAN_Pred_FC.rf_AE)$r.squared,
                     summary(LessMore.PAN_Pred_FC.gbm_AE)$r.squared),
                      "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "LessMore"))

R2.Sunk = as.data.frame(cbind( "R2" = c(summary(Sunk.PAN)$r.squared,
                     summary(Sunk.PAN_FACE)$r.squared,  
                     summary(Sunk.PAN_Pred_FC.reg_AE)$r.squared,  
                     summary(Sunk.PAN_Pred_FC.net_AE)$r.squared, 
                     summary(Sunk.PAN_Pred_FC.rf_AE)$r.squared,
                     summary(Sunk.PAN_Pred_FC.gbm_AE)$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "SunkCost"))

R2<-rbind(R2.Default,R2.Disease,R2.LessMore,R2.Sunk)%>%
  mutate(R2=round(as.numeric(R2),5))

R2$Model <- factor(R2$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+PFC(reg)+AE",  
                                   "Pan+PFC(net)+AE", 
                                   "Pan+PFC(rf)+AE",
                                   "Pan+PFC(gbm)+AE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(R2, aes(x = Model, y = R2, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(R^2)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

AIC.Default = as.data.frame(cbind( "AIC" = c(AIC(Default.PAN),
                    AIC(Default.PAN_FACE),  
                    AIC(Default.PAN_Pred_FC.reg_AE), 
                    AIC(Default.PAN_Pred_FC.net_AE), 
                    AIC(Default.PAN_Pred_FC.rf_AE),
                    AIC(Default.PAN_Pred_FC.gbm_AE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "Default"))



AIC.Disease = as.data.frame(cbind( "AIC" = c(AIC(Disease.PAN),
                    AIC(Disease.PAN_FACE),  
                    AIC(Disease.PAN_Pred_FC.reg_AE), 
                    AIC(Disease.PAN_Pred_FC.net_AE), 
                    AIC(Disease.PAN_Pred_FC.rf_AE),
                    AIC(Disease.PAN_Pred_FC.gbm_AE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "Disease"))

AIC<-rbind(AIC.Default,AIC.Disease)%>%
  mutate(AIC=round(as.numeric(AIC),5))

AIC$Model <- factor(AIC$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+PFC(reg)+AE",  
                                   "Pan+PFC(net)+AE", 
                                   "Pan+PFC(rf)+AE",
                                   "Pan+PFC(gbm)+AE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(AIC, aes(x = Model, y = AIC, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  geom_text(aes(label = round(AIC,2)), vjust = -0.5, size = 3, fontface = "bold") + 
  theme_bw() +
  ylab(bquote(AIC)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")+
  coord_cartesian(ylim = c(300, 400))

R2$R2 <- round(as.numeric(R2$R2), 3)

R2 <- R2 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement <- R2 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))


avg_improvemnt_combined <- rbind(avg_improvement, avg_improvement_full_study_5)

kable(avg_improvemnt_combined, caption = "Percentage Improvement in R2 (compared to PAN) Across Paradigms with study 5 test data", digits = 2)

Percentage Improvement in R2 (compared to PAN) Across Paradigms with study 5 test data
Model	Avg_Improvement
Pan	0.00
Pan+FACE	59.96
Pan+PFC(reg)+AE	47.45
Pan+PFC(net)+AE	44.44
Pan+PFC(rf)+AE	37.39
Pan+PFC(gbm)+AE	36.59
Pan+FACE (Full Study 5)	43.20

Etasq

note that the y-axes for different paradigms are not aligned..

# label paradigm
deviance_explained_df.Default$Paradigm <- "Default"
deviance_explained_df.Default.Study5$Paradigm <- "Default"

deviance_explained_df.Disease$Paradigm <- "Disease"
deviance_explained_df.Disease.Study5$Paradigm <- "Disease"

variance_explained_lm_df.LessMore$Paradigm <- "LessMore"
variance_explained_lm_df.LessMore.Study5$Paradigm <- "LessMore"

variance_explained_lm_df.SunkCost$Paradigm <- "SunkCost"
variance_explained_lm_df.SunkCost.Study5$Paradigm <- "SunkCost"


Model <- c("Pan", "Pan+FACE", "Pan+Pred.FC(reg)+AE", "Pan+Pred.FC(net)+AE", "Pan+Pred.FC(rf)+AE", "Pan+Pred.FC(gbm)+AE")
Model.Study5 <- c("Pan (Full Study5)", "Pan+FACE (Full Study5)")
deviance_explained_df.Default$Model<-Model
deviance_explained_df.Disease$Model<-Model
variance_explained_lm_df.LessMore$Model<-Model
variance_explained_lm_df.SunkCost$Model<-Model

deviance_explained_df.Default.Study5$Model<-Model.Study5
deviance_explained_df.Disease.Study5$Model<-Model.Study5
variance_explained_lm_df.LessMore.Study5$Model<-Model.Study5
variance_explained_lm_df.SunkCost.Study5$Model<-Model.Study5

colnames(deviance_explained_df.Default)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost)[3] <- "Condition:Panel"

colnames(deviance_explained_df.Default.Study5)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost.Study5)[3] <- "Condition:Panel"


etasq <- rbind(deviance_explained_df.Default, 
               deviance_explained_df.Default.Study5,
               deviance_explained_df.Disease, 
               deviance_explained_df.Disease.Study5,
               variance_explained_lm_df.LessMore, 
               variance_explained_lm_df.LessMore.Study5,
               variance_explained_lm_df.SunkCost,
               variance_explained_lm_df.SunkCost.Study5)

etasq$Model <- factor(etasq$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.FC(reg)+AE",  
                                   "Pan+Pred.FC(net)+AE", 
                                   "Pan+Pred.FC(rf)+AE",
                                   "Pan+Pred.FC(gbm)+AE",
                                   "Pan (Full Study5)", 
                                   "Pan+FACE (Full Study5)"),
                        ordered = TRUE)

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$Panel)  # Extract the significance symbols
  df$Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$Panel))
  return(df)
}

etasq.pan<-split_value_and_sig(etasq)

ggplot(etasq.pan, aes(x = Model, y = Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$`Condition:Panel`)  # Extract the significance symbols
  df$Condition_Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$`Condition:Panel`))
  return(df)
}

etasq.pan_cond<-split_value_and_sig(etasq)

ggplot(etasq.pan_cond, aes(x = Model, y = Condition_Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel*cond") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

# junk data??? really unstable;; perhaps to look at this meaningfully, we need 20% of the data to equally come from each of the 3 panels.
 # table(data.tst_M5a$Panel)
 #  it looks fine though
 #   Lucid    MTurk Prolific 
 #     100      101       91 


etasq_study5 <- etasq.pan_cond %>%
  filter(Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))

etasq_study5 <- etasq_study5 %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan (Full Study5)"] - Condition_Panel) / Condition_Panel[Model == "Pan (Full Study5)"] * 100)


etasq_study5.test <- etasq.pan_cond %>%
  filter(!Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))
etasq_study5.test <- etasq_study5.test %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan"] - Condition_Panel) / Condition_Panel[Model == "Pan"] * 100)

etasq.pan_cond<-rbind(etasq_study5,etasq_study5.test)

avg_reduction <- etasq.pan_cond %>%
  group_by(Model) %>%
  summarize(Avg_Reduction = mean(Reduction, na.rm = TRUE))

  
kable(avg_reduction, caption = "Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in study 5 test sample", digits = 2)

Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in study 5 test sample
Model	Avg_Reduction
Pan	0.00
Pan+FACE	75.56
Pan+Pred.FC(reg)+AE	56.52
Pan+Pred.FC(net)+AE	44.49
Pan+Pred.FC(rf)+AE	63.57
Pan+Pred.FC(gbm)+AE	61.55
Pan (Full Study5)	0.00
Pan+FACE (Full Study5)	74.19

F/C*Cond Model Coefficients

DVs for Default & Framing were 1/0
DVs for LessMore & SunkCost are on original scale

Default.PAN_FACE.Study5<-glm(numDefault~F*DefaultCondition+A*DefaultCondition+C*DefaultCondition+E*DefaultCondition+DefaultCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.PAN_FACE.Study5<-glm(numDisease~F*DiseaseCondition+A*DiseaseCondition+C*DiseaseCondition+E*DiseaseCondition+DiseaseCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
LessMore.PAN_FACE.Study5<-lm(numLessMore~F*LessMoreCondition+A*LessMoreCondition+C*LessMoreCondition+E*LessMoreCondition+LessMoreCondition*Panel,Data_M5a.log_z_dummy_coded)
Sunk.PAN_FACE.Study5<-lm(numSunkCost~F*SunkCondition+A*SunkCondition+C*SunkCondition+E*SunkCondition+SunkCondition*Panel,Data_M5a.log_z_dummy_coded)

AlllCoef = as.data.frame(rbind(
cbind(coef(Default.PAN_FACE),confint(Default.PAN_FACE)),
cbind(coef(Default.PAN_Pred_FC.reg_AE),confint(Default.PAN_Pred_FC.reg_AE)),
cbind(coef(Default.PAN_Pred_FC.net_AE),confint(Default.PAN_Pred_FC.net_AE)),
cbind(coef(Default.PAN_Pred_FC.rf_AE),confint(Default.PAN_Pred_FC.rf_AE)),
cbind(coef(Default.PAN_Pred_FC.gbm_AE),confint(Default.PAN_Pred_FC.gbm_AE)),
cbind(coef(Default.PAN_FACE.Study5),confint(Default.PAN_FACE.Study5)),

cbind(coef(Disease.PAN_FACE),confint(Disease.PAN_FACE)),
cbind(coef(Disease.PAN_Pred_FC.reg_AE),confint(Disease.PAN_Pred_FC.reg_AE)),
cbind(coef(Disease.PAN_Pred_FC.net_AE),confint(Disease.PAN_Pred_FC.net_AE)),
cbind(coef(Disease.PAN_Pred_FC.rf_AE),confint(Disease.PAN_Pred_FC.rf_AE)),
cbind(coef(Disease.PAN_Pred_FC.gbm_AE),confint(Disease.PAN_Pred_FC.gbm_AE)),
cbind(coef(Disease.PAN_FACE.Study5),confint(Disease.PAN_FACE.Study5)),

cbind(coef(LessMore.PAN_FACE),confint(LessMore.PAN_FACE)),
cbind(coef(LessMore.PAN_Pred_FC.reg_AE),confint(LessMore.PAN_Pred_FC.reg_AE)),
cbind(coef(LessMore.PAN_Pred_FC.net_AE),confint(LessMore.PAN_Pred_FC.net_AE)),
cbind(coef(LessMore.PAN_Pred_FC.rf_AE),confint(LessMore.PAN_Pred_FC.rf_AE)),
cbind(coef(LessMore.PAN_Pred_FC.gbm_AE),confint(LessMore.PAN_Pred_FC.gbm_AE)),
cbind(coef(LessMore.PAN_FACE.Study5),confint(LessMore.PAN_FACE.Study5)),

cbind(coef(Sunk.PAN_FACE),confint(Sunk.PAN_FACE)),
cbind(coef(Sunk.PAN_Pred_FC.reg_AE),confint(Sunk.PAN_Pred_FC.reg_AE)),
cbind(coef(Sunk.PAN_Pred_FC.net_AE),confint(Sunk.PAN_Pred_FC.net_AE)),
cbind(coef(Sunk.PAN_Pred_FC.rf_AE),confint(Sunk.PAN_Pred_FC.rf_AE)),
cbind(coef(Sunk.PAN_Pred_FC.gbm_AE),confint(Sunk.PAN_Pred_FC.gbm_AE)),
cbind(coef(Sunk.PAN_FACE.Study5),confint(Sunk.PAN_FACE.Study5))))

AlllCoef <- AlllCoef[!grepl("Panel", rownames(AlllCoef)), ]

AlllCoef$type = factor(rep(c("Intercept", " Condition(Treat)","F", "A", "C", "E","F: Condition(Treat)", "A: Condition(Treat)", "C: Condition(Treat)", "E: Condition(Treat)"), 24),levels = c("Intercept", " Condition(Treat)","F", "A", "C", "E","F: Condition(Treat)", "A: Condition(Treat)", "C: Condition(Treat)", "E: Condition(Treat)"))

AlllCoef$Paradigm = factor((rep(c("Default","Framing","Less is Better","Sunk Cost"), each=60)),levels = rev(c("Default","Framing","Less is Better","Sunk Cost")))

AlllCoef$Model = factor(rep(rep(c("FACE","PC(reg) + ACE","PC(net) + ACE","PC(rf) + ACE","PC(gbm) + ACE","FACE (Full Study 5)"), each = 10),4), levels = rev(c("FACE","PC(reg) + ACE","PC(net) + ACE","PC(rf) + ACE","PC(gbm) + ACE","FACE (Full Study 5)")))

colnames(AlllCoef)[1:3] = c("Beta","low", "high")

allcoefplot = ggplot(AlllCoef[(AlllCoef$type %in% c("F: Condition(Treat)","C: Condition(Treat)")),], aes( y = Beta, ymin= low, ymax= high, x = Paradigm, fill = Model, color = Model))+
  geom_bar(stat = "identity", position = "dodge",alpha= .25)+
  geom_pointrange(position = position_dodge(width = .8))+
  coord_flip()+
  xlab("")+
  theme_bw()+
  theme(strip.background = element_blank(), text = element_text(size = 12)) +
  facet_grid(~type)

allcoefplot

3.1 Evaluate Accuracy with Study 5 FULL Data

data.full_M5a.reg.FC<-cbind(data.full_M5a.reg.C, Predicted_F = pred_full_M5a_reg)
data.full_M5a.net.FC<-cbind(data.full_M5a.net.C, Predicted_F = pred_full_M5a_net)
data.full_M5a.rf.FC<-cbind(data.full_M5a.rf.C, Predicted_F = pred_full_M5a_rf)
data.full_M5a.gbm.FC<-cbind(data.full_M5a.gbm.C, Predicted_F = pred_full_M5a_gbm)
data.full_M5a.gbm.FC$rotsum<-data.full_M5a.gbm$rotsum
data.full_M5a.gbm.FC$MXsum<-data.full_M5a.gbm$MXsum
data.full_M5a.gbm.FC$crt2_score<-data.full_M5a.gbm$crt2_score

Correlations Btw PF5, PC5, and Study 5 FACE

Linear Regression

ggpairs(data.full_M5a.reg.FC, c("Predicted_F","F","A","C", "E","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Elastic Net

ggpairs(data.full_M5a.net.FC, c("Predicted_F","F","A","C", "E","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Random Forest

ggpairs(data.full_M5a.rf.FC, c("Predicted_F","F","A","C", "E","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Gradient Boosting

ggpairs(data.full_M5a.gbm.FC, c("Predicted_F","F","A","C", "E","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

GBM: Correlation between PF, PC, and F and C’s composite measures

ggpairs(data.full_M5a.gbm.FC, c("Predicted_F","ATTN1__Correct","crt2_score", "rotsum", "MXsum","Antonym","Synonym","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Explain Heterogeniety with PF5 and PC5

Default Paradigm

Default.PAN <- glm(numDefault ~ DefaultCondition * Panel, data = data.full_M5a.gbm.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_FACE <- glm(numDefault ~ DefaultCondition * F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel , data = data.full_M5a.gbm.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_FC.reg_AE <- glm(numDefault ~ DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.full_M5a.reg.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_FC.net_AE <- glm(numDefault ~  DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.full_M5a.net.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_FC.rf_AE <- glm(numDefault ~   DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.full_M5a.rf.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_FC.gbm_AE <- glm(numDefault ~   DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.full_M5a.gbm.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)


Default_model <- list(
  "Default.PAN" = Default.PAN,
  "Default.PAN_FACE" = Default.PAN_FACE,
  "Default.PAN_Pred_FC.reg_AE" = Default.PAN_Pred_FC.reg_AE,
  "Default.PAN_Pred_FC.net_AE" = Default.PAN_Pred_FC.net_AE,
  "Default.PAN_Pred_FC.rf_AE" = Default.PAN_Pred_FC.rf_AE,
  "Default.PAN_Pred_FC.gbm_AE" = Default.PAN_Pred_FC.gbm_AE
)

Model Results

term_names <- names(coef(Default.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Default_result<-tab_model(Default.PAN, Default.PAN_FACE, Default.PAN_Pred_FC.reg_AE,Default.PAN_Pred_FC.net_AE,Default.PAN_Pred_FC.rf_AE,Default.PAN_Pred_FC.gbm_AE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.FC(reg)_AE", 
                        "PAN_Pred.FC(net)_AE", 
                        "PAN_Pred.FC(rf)_AE", 
                        "PAN_Pred.FC(gbm)_AE"), title = "Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Default_result

Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.FC(reg)_AE				PAN_Pred.FC(net)_AE				PAN_Pred.FC(rf)_AE				PAN_Pred.FC(gbm)_AE
Predictors	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p
(Intercept)	1.05	0.13	0.38	0.703	1.05	0.14	0.38	0.702	0.97	0.13	-0.23	0.818	0.94	0.13	-0.42	0.678	1.02	0.14	0.11	0.909	1.00	0.14	0.02	0.985
DefaultCondition [OPTOUT]	1.74	0.32	3.01	0.003	1.76	0.33	3.02	0.003	1.94	0.39	3.27	0.001	1.99	0.41	3.33	0.001	1.77	0.34	2.98	0.003	1.82	0.36	3.04	0.002
F					1.02	0.19	0.13	0.894
A					0.95	0.08	-0.54	0.590	0.91	0.08	-1.13	0.259	0.92	0.08	-0.99	0.323	0.92	0.08	-0.96	0.335	0.92	0.08	-1.02	0.308
C					1.72	0.25	3.78	<0.001
E					1.04	0.09	0.40	0.686	1.06	0.08	0.79	0.428	1.05	0.08	0.62	0.533	1.04	0.08	0.46	0.648	1.05	0.08	0.68	0.496
DefaultCondition [OPTOUT] × F					1.25	0.34	0.83	0.408
DefaultCondition [OPTOUT] × A					0.84	0.11	-1.32	0.188	0.96	0.13	-0.27	0.785	0.97	0.13	-0.25	0.806	0.89	0.12	-0.87	0.385	0.93	0.12	-0.54	0.586
DefaultCondition [OPTOUT] × C					0.62	0.13	-2.29	0.022
DefaultCondition [OPTOUT] × E					1.09	0.14	0.68	0.499	0.96	0.11	-0.40	0.689	0.96	0.11	-0.37	0.711	1.02	0.12	0.20	0.841	0.98	0.11	-0.15	0.884
Predicted F									1.08	0.18	0.44	0.657	1.07	0.20	0.38	0.700	0.93	0.17	-0.37	0.714	0.96	0.17	-0.22	0.822
Predicted C									2.00	0.43	3.19	0.001	2.02	0.52	2.75	0.006	2.46	0.49	4.54	<0.001	2.49	0.56	4.08	<0.001
DefaultCondition [OPTOUT] × Predicted F									0.87	0.22	-0.55	0.580	0.92	0.26	-0.31	0.758	1.10	0.30	0.33	0.739	1.04	0.28	0.16	0.873
DefaultCondition [OPTOUT] × Predicted C									0.52	0.16	-2.08	0.037	0.44	0.17	-2.17	0.030	0.51	0.15	-2.33	0.020	0.43	0.14	-2.60	0.009
Observations	1460				1460				1460				1460				1460				1460
AIC	1884.213				1858.463				1869.271				1873.485				1858.182				1863.784
log-Likelihood	-936.107				-915.232				-920.635				-922.742				-915.091				-917.892

Anova and Eta Square

calculate_deviance_explained.Default <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for Panel and DefaultCondition:Panel interaction
  dev_cond <- anova_res["DefaultCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DefaultCondition:Panel", "Deviance"]
  
  p_cond <- anova_res["DefaultCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DefaultCondition:Panel", "Pr(>Chi)"]
  
sig_cond <- ifelse(p_cond < 0.001, "***", 
             ifelse(p_cond < 0.01, "**", 
             ifelse(p_cond < 0.05, "*", 
             ifelse(p_cond < 0.1, ".", ""))))

sig_panel <- ifelse(p_panel < 0.001, "***", 
              ifelse(p_panel < 0.01, "**", 
              ifelse(p_panel < 0.05, "*", 
              ifelse(p_panel < 0.1, ".", ""))))

sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                   ifelse(p_interaction < 0.01, "**", 
                   ifelse(p_interaction < 0.05, "*", 
                   ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total) 
  perc_panel <- (dev_panel / dev_total) 
  perc_interaction <- (dev_interaction / dev_total) 
  
  res_cond <- paste0(round(perc_cond , 5),  sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}

# apply the fucntion to all the models in default paradigm
deviance_explained <- sapply(Default_model, calculate_deviance_explained.Default)

# Convert to a data frame for easy viewing
deviance_explained_df.Default <- as.data.frame(t(deviance_explained)) # view how this looks like
colnames(deviance_explained_df.Default) <- c("Condition","Panel", "Default Condition:Panel")

# display
kable(deviance_explained_df.Default,caption = "Etasq Table: Default Paradigm")

Etasq Table: Default Paradigm
	Condition	Panel	Default Condition:Panel
Default.PAN	0.03801***	0.00298.	0.01188***
Default.PAN_FACE	0.03801***	0.00051	0.00159
Default.PAN_Pred_FC.reg_AE	0.03801***	3e-04	0.00357*
Default.PAN_Pred_FC.net_AE	0.03801***	0.00043	0.00299.
Default.PAN_Pred_FC.rf_AE	0.03801***	1e-05	0.0019
Default.PAN_Pred_FC.gbm_AE	0.03801***	0	0.0024.

Framing (Unusual Disease) Paradigm

Disease.PAN <- glm(numDisease ~ DiseaseCondition * Panel, data = data.full_M5a.gbm.FC, family = binomial)
Disease.PAN_FACE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E+DiseaseCondition * Panel , data = data.full_M5a.gbm.FC, family = binomial)

Disease.PAN_Pred_FC.reg_AE <- glm(numDisease ~  DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.full_M5a.reg.FC, family = binomial)

Disease.PAN_Pred_FC.net_AE <- glm(numDisease ~   DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.full_M5a.net.FC, family = binomial)

Disease.PAN_Pred_FC.rf_AE <- glm(numDisease ~   DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.full_M5a.rf.FC, family = binomial)

Disease.PAN_Pred_FC.gbm_AE <- glm(numDisease ~  DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel , data = data.full_M5a.gbm.FC, family = binomial)


Disease_model <- list(
  "Disease.PAN" = Disease.PAN,
  "Disease.PAN_FACE" = Disease.PAN_FACE,
  "Disease.PAN_Pred_FC.reg_AE" = Disease.PAN_Pred_FC.reg_AE,
  "Disease.PAN_Pred_FC.net_AE" = Disease.PAN_Pred_FC.net_AE,
  "Disease.PAN_Pred_FC.rf_AE" = Disease.PAN_Pred_FC.rf_AE, 
  "Disease.PAN_Pred_FC.gbm_AE" = Disease.PAN_Pred_FC.gbm_AE
)

# extract_aic(Disease_model)

Model Results

term_names <- names(coef(Disease.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Disease_result<-tab_model(Disease.PAN, Disease.PAN_FACE, Disease.PAN_Pred_FC.reg_AE,Disease.PAN_Pred_FC.net_AE,Disease.PAN_Pred_FC.rf_AE,Disease.PAN_Pred_FC.gbm_AE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.FC(reg)_AE", 
                        "PAN_Pred.FC(net)_AE", 
                        "PAN_Pred.FC(rf)_AE", 
                        "PAN_Pred.FC(gbm)_AE"), title = "Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Disease_result

Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.FC(reg)_AE				PAN_Pred.FC(net)_AE				PAN_Pred.FC(rf)_AE				PAN_Pred.FC(gbm)_AE
Predictors	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p
(Intercept)	0.45	0.06	-5.84	<0.001	0.43	0.06	-6.03	<0.001	0.46	0.07	-5.18	<0.001	0.46	0.07	-5.10	<0.001	0.44	0.06	-5.66	<0.001	0.45	0.07	-5.51	<0.001
DiseaseCondition [LOSS]	3.40	0.64	6.46	<0.001	3.69	0.72	6.73	<0.001	3.18	0.66	5.55	<0.001	3.14	0.67	5.38	<0.001	3.42	0.68	6.17	<0.001	3.26	0.66	5.81	<0.001
F					1.07	0.23	0.31	0.755
A					1.40	0.14	3.34	0.001	1.45	0.15	3.59	<0.001	1.48	0.15	3.77	<0.001	1.44	0.15	3.52	<0.001	1.45	0.15	3.61	<0.001
C					0.73	0.12	-1.83	0.067
E					0.89	0.09	-1.14	0.256	0.86	0.08	-1.75	0.080	0.85	0.08	-1.81	0.070	0.88	0.08	-1.37	0.170	0.87	0.08	-1.56	0.118
DiseaseCondition [LOSS] × F					1.21	0.35	0.65	0.513
DiseaseCondition [LOSS] × A					1.03	0.14	0.23	0.820	0.98	0.14	-0.14	0.887	0.96	0.14	-0.32	0.750	0.98	0.14	-0.13	0.897	0.96	0.13	-0.28	0.782
DiseaseCondition [LOSS] × C					1.26	0.28	1.06	0.290
DiseaseCondition [LOSS] × E					1.04	0.14	0.27	0.791	1.06	0.13	0.50	0.616	1.07	0.13	0.58	0.563	1.06	0.13	0.42	0.671	1.07	0.13	0.56	0.572
Predicted F									0.99	0.19	-0.08	0.939	1.09	0.23	0.41	0.684	1.06	0.23	0.25	0.803	1.14	0.23	0.64	0.524
Predicted C									0.68	0.16	-1.62	0.104	0.53	0.15	-2.18	0.029	0.65	0.15	-1.93	0.054	0.56	0.14	-2.37	0.018
DiseaseCondition [LOSS] × Predicted F									1.30	0.35	0.99	0.322	1.19	0.35	0.60	0.546	1.27	0.38	0.80	0.422	1.31	0.37	0.95	0.340
DiseaseCondition [LOSS] × Predicted C									1.52	0.51	1.25	0.213	1.98	0.79	1.71	0.087	1.57	0.48	1.48	0.138	1.73	0.59	1.61	0.108
Observations	1460				1460				1460				1460				1460				1460
AIC	1762.505				1721.769				1720.009				1717.034				1719.467				1714.675
log-Likelihood	-875.252				-846.884				-846.005				-844.517				-845.734				-843.338

Anova and Eta Square

calculate_deviance_explained.Disease <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for DiseaseCondition, Panel, and DiseaseCondition:Panel interaction
  dev_cond <- anova_res["DiseaseCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DiseaseCondition:Panel", "Deviance"]
  
  # Extract p-values
  p_cond <- anova_res["DiseaseCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DiseaseCondition:Panel", "Pr(>Chi)"]
  
  # Determine significance levels
  sig_cond <- ifelse(p_cond < 0.001, "***", 
               ifelse(p_cond < 0.01, "**", 
               ifelse(p_cond < 0.05, "*", 
               ifelse(p_cond < 0.1, ".", ""))))

  sig_panel <- ifelse(p_panel < 0.001, "***", 
                ifelse(p_panel < 0.01, "**", 
                ifelse(p_panel < 0.05, "*", 
                ifelse(p_panel < 0.1, ".", ""))))

  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total)
  perc_panel <- (dev_panel / dev_total)
  perc_interaction <- (dev_interaction / dev_total)
  
  # Combine percentage and significance
  res_cond <- paste0(round(perc_cond, 5), sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}


# Apply the function to each model
deviance_explained <- sapply(Disease_model, calculate_deviance_explained.Disease)

# Convert to a data frame for easy viewing
deviance_explained_df.Disease <- as.data.frame(t(deviance_explained))
colnames(deviance_explained_df.Disease) <- c("Condition","Panel", "DiseaseCondition:Panel")

# Display the results
kable(deviance_explained_df.Disease,caption = "Etasq Table: Disease Paradigm")

Etasq Table: Disease Paradigm
	Condition	Panel	DiseaseCondition:Panel
Disease.PAN	0.05317***	0.03246***	0.01394***
Disease.PAN_FACE	0.05317***	0.00514**	0.00395*
Disease.PAN_Pred_FC.reg_AE	0.05317***	0.00541**	0.0039*
Disease.PAN_Pred_FC.net_AE	0.05317***	0.00569**	0.00341*
Disease.PAN_Pred_FC.rf_AE	0.05317***	0.00557**	0.00329*
Disease.PAN_Pred_FC.gbm_AE	0.05317***	0.00697**	0.00304.

Less is More Paradigm

LessMore.PAN <- lm(numLessMore ~ LessMoreCondition * Panel, data = data.full_M5a.gbm.FC)
LessMore.PAN_FACE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.full_M5a.gbm.FC)
LessMore.PAN_Pred_FC.reg_AE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.full_M5a.reg.FC)
LessMore.PAN_Pred_FC.net_AE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.full_M5a.net.FC)
LessMore.PAN_Pred_FC.rf_AE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.full_M5a.rf.FC)
LessMore.PAN_Pred_FC.gbm_AE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.full_M5a.gbm.FC)


LessMore_model <- list(
  "LessMore.PAN" = LessMore.PAN,
  "LessMore.PAN_FACE" = LessMore.PAN_FACE,
  "LessMore.PAN_Pred_FC.reg_AE" = LessMore.PAN_Pred_FC.reg_AE,
  "LessMore.PAN_Pred_FC.net_AE" = LessMore.PAN_Pred_FC.net_AE,
  "LessMore.PAN_Pred_FC.rf_AE" = LessMore.PAN_Pred_FC.rf_AE, 
  "LessMore.PAN_Pred_FC.gbm_AE" = LessMore.PAN_Pred_FC.gbm_AE
)

Model Results

term_names <- names(coef(LessMore.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

LessMore_result<-tab_model(LessMore.PAN, LessMore.PAN_FACE, LessMore.PAN_Pred_FC.reg_AE,LessMore.PAN_Pred_FC.net_AE,LessMore.PAN_Pred_FC.rf_AE,LessMore.PAN_Pred_FC.gbm_AE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.FC(reg)_AE", 
                        "PAN_Pred.FC(net)_AE", 
                        "PAN_Pred.FC(rf)_AE", 
                        "PAN_Pred.FC(gbm)_AE"), title = "LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
LessMore_result

LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.FC(reg)_AE				PAN_Pred.FC(net)_AE				PAN_Pred.FC(rf)_AE				PAN_Pred.FC(gbm)_AE
Predictors	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p
(Intercept)	5.20	0.09	59.66	<0.001	5.22	0.09	61.17	<0.001	5.24	0.09	57.25	<0.001	5.22	0.09	55.93	<0.001	5.20	0.09	59.60	<0.001	5.24	0.09	59.05	<0.001
LessMoreCondition [SCARF]	1.11	0.12	9.00	<0.001	1.10	0.12	9.13	<0.001	1.06	0.13	8.26	<0.001	1.08	0.13	8.20	<0.001	1.06	0.12	8.57	<0.001	1.03	0.13	8.26	<0.001
F					0.13	0.12	1.12	0.265
A					-0.13	0.06	-2.23	0.026	-0.15	0.06	-2.47	0.014	-0.14	0.06	-2.39	0.017	-0.13	0.06	-2.24	0.025	-0.14	0.06	-2.45	0.014
C					0.01	0.09	0.08	0.934
E					-0.01	0.06	-0.10	0.919	-0.03	0.05	-0.66	0.508	-0.03	0.05	-0.68	0.499	-0.02	0.05	-0.41	0.685	-0.04	0.05	-0.74	0.459
LessMoreCondition [SCARF] × F					-0.02	0.17	-0.13	0.899
LessMoreCondition [SCARF] × A					0.39	0.08	4.82	<0.001	0.41	0.08	5.08	<0.001	0.40	0.08	4.89	<0.001	0.35	0.08	4.36	<0.001	0.38	0.08	4.69	<0.001
LessMoreCondition [SCARF] × C					0.23	0.13	1.74	0.082
LessMoreCondition [SCARF] × E					0.01	0.08	0.08	0.940	0.02	0.07	0.22	0.823	0.02	0.07	0.26	0.792	0.03	0.07	0.44	0.659	0.03	0.07	0.42	0.675
Predicted F									-0.17	0.11	-1.55	0.122	-0.12	0.12	-1.01	0.313	0.08	0.12	0.66	0.511	-0.14	0.12	-1.24	0.216
Predicted C									0.42	0.14	3.05	0.002	0.36	0.16	2.23	0.026	0.08	0.13	0.65	0.514	0.41	0.14	2.92	0.004
LessMoreCondition [SCARF] × Predicted F									0.09	0.16	0.60	0.550	0.01	0.17	0.09	0.931	0.05	0.17	0.29	0.773	0.11	0.16	0.66	0.509
LessMoreCondition [SCARF] × Predicted C									0.02	0.20	0.08	0.938	0.18	0.23	0.79	0.431	0.31	0.18	1.71	0.087	0.14	0.20	0.69	0.488
Observations	1460				1460				1460				1460				1460				1460
R² / R² adjusted	0.204 / 0.201				0.248 / 0.241				0.247 / 0.241				0.246 / 0.239				0.251 / 0.245				0.253 / 0.247

Anova and Eta Square

calculate_variance_explained_LessMore <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for LessMoreCondition, Panel, and LessMoreCondition:Panel interaction
  ss_condition <- anova_res["LessMoreCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["LessMoreCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["LessMoreCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["LessMoreCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- ss_condition / ss_total
  perc_panel <- ss_panel / ss_total
  perc_interaction <- ss_interaction / ss_total
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}

variance_explained_lm <- sapply(LessMore_model, calculate_variance_explained_LessMore)

# Convert to a data frame for easy viewing
variance_explained_lm_df.LessMore <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.LessMore,caption = "Etasq Table: LessMore Paradigm")

Etasq Table: LessMore Paradigm
	Condition	Panel	LessMoreCondition:Panel
LessMore.PAN	0.09206***	0.10649***	0.00506*
LessMore.PAN_FACE	0.09206***	0.02908***	0.00051
LessMore.PAN_Pred_FC.reg_AE	0.09206***	0.0287***	0.00079
LessMore.PAN_Pred_FC.net_AE	0.09206***	0.02819***	0.00059
LessMore.PAN_Pred_FC.rf_AE	0.09206***	0.02531***	0.00112
LessMore.PAN_Pred_FC.gbm_AE	0.09206***	0.02066***	0.00102

Sunk Cost Paradigm

Sunk.PAN <- lm(numSunkCost~ SunkCondition * Panel, data = data.full_M5a.gbm.FC)
Sunk.PAN_FACE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.full_M5a.gbm.FC)
Sunk.PAN_Pred_FC.reg_AE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E+SunkCondition * Panel, data = data.full_M5a.reg.FC)
Sunk.PAN_Pred_FC.net_AE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E + SunkCondition * Panel, data = data.full_M5a.net.FC)
Sunk.PAN_Pred_FC.rf_AE <- lm(numSunkCost ~ SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E  + SunkCondition * Panel, data = data.full_M5a.rf.FC)
Sunk.PAN_Pred_FC.gbm_AE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E + SunkCondition * Panel, data = data.full_M5a.gbm.FC)


Sunk_model <- list(
  "Sunk.PAN" = Sunk.PAN,
  "Sunk.PAN_FACE" = Sunk.PAN_FACE,
  "Sunk.PAN_Pred_FC.reg_ACE" = Sunk.PAN_Pred_FC.reg_AE,
  "Sunk.PAN_Pred_FC.net_ACE" = Sunk.PAN_Pred_FC.net_AE,
  "Sunk.PAN_Pred_FC.rf_ACE" = Sunk.PAN_Pred_FC.rf_AE, #0.302 with unscaled
  "Sunk.PAN_Pred_FC.gbm_ACE" = Sunk.PAN_Pred_FC.gbm_AE
)

# extract_adjusted_r_squared(Sunk_model)

Full Model Results

term_names <- names(coef(Sunk.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Sunk_result<-tab_model(Sunk.PAN, Sunk.PAN_FACE, Sunk.PAN_Pred_FC.reg_AE,Sunk.PAN_Pred_FC.net_AE,Sunk.PAN_Pred_FC.rf_AE,Sunk.PAN_Pred_FC.gbm_AE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred_FC(reg)_AE", 
                        "PAN_Pred_FC(net)_AE", 
                        "PAN_Pred_FC(rf)_AE", 
                        "PAN_Pred_FC(gbm)_AE"), title = "Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Sunk_result

Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred_FC(reg)_AE				PAN_Pred_FC(net)_AE				PAN_Pred_FC(rf)_AE				PAN_Pred_FC(gbm)_AE
Predictors	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p
(Intercept)	5.78	0.17	34.79	<0.001	5.77	0.17	34.93	<0.001	5.67	0.18	31.76	<0.001	5.65	0.18	30.98	<0.001	5.73	0.17	33.45	<0.001	5.69	0.17	32.70	<0.001
SunkCondition [PAID]	0.43	0.23	1.85	0.065	0.46	0.23	1.97	0.049	0.40	0.25	1.61	0.107	0.42	0.26	1.64	0.101	0.44	0.24	1.84	0.066	0.46	0.24	1.87	0.062
F					0.39	0.22	1.77	0.077
A					0.07	0.11	0.61	0.539	0.07	0.11	0.65	0.519	0.10	0.11	0.92	0.359	0.07	0.11	0.60	0.546	0.07	0.11	0.64	0.520
C					-0.26	0.18	-1.50	0.133
E					-0.06	0.11	-0.59	0.558	-0.12	0.10	-1.25	0.212	-0.13	0.10	-1.39	0.165	-0.11	0.10	-1.14	0.254	-0.11	0.10	-1.15	0.250
SunkCondition [PAID] × F					0.38	0.32	1.18	0.239
SunkCondition [PAID] × A					0.02	0.16	0.13	0.895	0.04	0.16	0.26	0.793	0.01	0.16	0.09	0.930	0.05	0.16	0.33	0.741	0.05	0.16	0.33	0.745
SunkCondition [PAID] × C					-0.20	0.25	-0.79	0.428
SunkCondition [PAID] × E					0.11	0.15	0.72	0.469	0.05	0.14	0.34	0.730	0.06	0.14	0.43	0.666	0.06	0.14	0.41	0.682	0.03	0.14	0.23	0.817
Predicted F									0.34	0.21	1.60	0.110	0.45	0.24	1.90	0.058	0.18	0.23	0.78	0.435	0.39	0.22	1.74	0.083
Predicted C									-0.23	0.27	-0.84	0.403	-0.49	0.33	-1.50	0.133	-0.01	0.24	-0.04	0.967	-0.29	0.27	-1.07	0.286
SunkCondition [PAID] × Predicted F									0.20	0.30	0.64	0.519	0.05	0.34	0.15	0.880	0.24	0.33	0.72	0.469	-0.00	0.32	-0.01	0.995
SunkCondition [PAID] × Predicted C									-0.08	0.38	-0.21	0.834	0.23	0.45	0.51	0.611	-0.15	0.35	-0.44	0.663	0.14	0.39	0.36	0.720
Observations	1460				1460				1460				1460				1460				1460
R² / R² adjusted	0.023 / 0.020				0.042 / 0.033				0.039 / 0.030				0.039 / 0.030				0.036 / 0.027				0.037 / 0.028

Anova and Eta Square

calculate_variance_explained_Sunk <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for SunkCondition, Panel, and SunkCondition:Panel interaction
  ss_condition <- anova_res["SunkCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["SunkCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["SunkCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["SunkCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- (ss_condition / ss_total)
  perc_panel <- (ss_panel / ss_total)
  perc_interaction <- (ss_interaction / ss_total)
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}


variance_explained_lm <- sapply(Sunk_model, calculate_variance_explained_Sunk)

# Convert to a data frame for easy viewing
variance_explained_lm_df.SunkCost <- as.data.frame(t(variance_explained_lm))

colnames(variance_explained_lm_df.SunkCost) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.SunkCost,caption = "Etasq Table: SunkCost Paradigm")

Etasq Table: SunkCost Paradigm
	Condition	Panel	LessMoreCondition:Panel
Sunk.PAN	0.00794***	0.01132***	0.00398.
Sunk.PAN_FACE	0.00794***	0.00099	0.00205
Sunk.PAN_Pred_FC.reg_ACE	0.00794***	0.00093	0.0021
Sunk.PAN_Pred_FC.net_ACE	0.00794***	0.00116	0.00181
Sunk.PAN_Pred_FC.rf_ACE	0.00794***	0.00038	0.00197
Sunk.PAN_Pred_FC.gbm_ACE	0.00794***	0.00081	0.00168

Summary

Model Fit

\(R^2\): \(R^2\) for linear regressions, and \(1 − \frac{\log L(\text{model})}{\log L(\text{null model})}\) for logistic regressions (as in the paper)
Model order: consistently (1) PAN only; (2) PAN + FACE; (3) PAN + Pred_C(reg)ACE; (4) PAN + Pred_C(net)ACE; (5) PAN + Pred_C(rf)ACE; (6) PAN + Pred_C(gbm)ACE
note that the y-axes for different paradigms are not aligned.

#following antonia's analysis here...
Default.null<-glm(numDefault~1,data.full_M5a.gbm.FC,family=binomial)

R2.Default = as.data.frame(cbind( "R2" = c((1-logLik(Default.PAN)/logLik(Default.null)),
                    (1-logLik(Default.PAN_FACE)/logLik(Default.null)),  
                    (1-logLik(Default.PAN_Pred_FC.reg_AE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_FC.net_AE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_FC.rf_AE)/logLik(Default.null)),
                    (1-logLik(Default.PAN_Pred_FC.gbm_AE)/logLik(Default.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "Default"))


Disease.null<-glm(numDisease~1,data.full_M5a.gbm.FC,family=binomial)
R2.Disease = as.data.frame(cbind( "R2" = c((1-logLik(Disease.PAN)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_FACE)/logLik(Disease.null)),  
                    (1-logLik(Disease.PAN_Pred_FC.reg_AE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_FC.net_AE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_FC.rf_AE)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_Pred_FC.gbm_AE)/logLik(Disease.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "Disease"))

R2.LessMore = as.data.frame(cbind( "R2" = c(summary(LessMore.PAN)$r.squared,
                     summary(LessMore.PAN_FACE)$r.squared,  
                     summary(LessMore.PAN_Pred_FC.reg_AE)$r.squared,  
                     summary(LessMore.PAN_Pred_FC.net_AE)$r.squared, 
                     summary(LessMore.PAN_Pred_FC.rf_AE)$r.squared,
                     summary(LessMore.PAN_Pred_FC.gbm_AE)$r.squared),
                      "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "LessMore"))

R2.Sunk = as.data.frame(cbind( "R2" = c(summary(Sunk.PAN)$r.squared,
                     summary(Sunk.PAN_FACE)$r.squared,  
                     summary(Sunk.PAN_Pred_FC.reg_AE)$r.squared,  
                     summary(Sunk.PAN_Pred_FC.net_AE)$r.squared, 
                     summary(Sunk.PAN_Pred_FC.rf_AE)$r.squared,
                     summary(Sunk.PAN_Pred_FC.gbm_AE)$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "SunkCost"))

R2<-rbind(R2.Default,R2.Disease,R2.LessMore,R2.Sunk)%>%
  mutate(R2=round(as.numeric(R2),5))

R2$Model <- factor(R2$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+PFC(reg)+AE",  
                                   "Pan+PFC(net)+AE", 
                                   "Pan+PFC(rf)+AE",
                                   "Pan+PFC(gbm)+AE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(R2, aes(x = Model, y = R2, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(R^2)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

AIC.Default = as.data.frame(cbind( "AIC" = c(AIC(Default.PAN),
                    AIC(Default.PAN_FACE),  
                    AIC(Default.PAN_Pred_FC.reg_AE), 
                    AIC(Default.PAN_Pred_FC.net_AE), 
                    AIC(Default.PAN_Pred_FC.rf_AE),
                    AIC(Default.PAN_Pred_FC.gbm_AE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "Default"))



AIC.Disease = as.data.frame(cbind( "AIC" = c(AIC(Disease.PAN),
                    AIC(Disease.PAN_FACE),  
                    AIC(Disease.PAN_Pred_FC.reg_AE), 
                    AIC(Disease.PAN_Pred_FC.net_AE), 
                    AIC(Disease.PAN_Pred_FC.rf_AE),
                    AIC(Disease.PAN_Pred_FC.gbm_AE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "Disease"))

AIC<-rbind(AIC.Default,AIC.Disease)%>%
  mutate(AIC=round(as.numeric(AIC),5))

AIC$Model <- factor(AIC$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+PFC(reg)+AE",  
                                   "Pan+PFC(net)+AE", 
                                   "Pan+PFC(rf)+AE",
                                   "Pan+PFC(gbm)+AE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(AIC, aes(x = Model, y = AIC, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  geom_text(aes(label = round(AIC,2)), vjust = -0.5, size = 3, fontface = "bold") + 
  theme_bw() +
  ylab(bquote(AIC)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")+
  coord_cartesian(ylim = c(300, 400))

R2$R2 <- round(as.numeric(R2$R2), 3)

R2 <- R2 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement <- R2 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))


avg_improvemnt_combined <- rbind(avg_improvement, avg_improvement_full_study_5)

kable(avg_improvemnt_combined, caption = "Percentage Improvement in R2 (compared to PAN) Across Paradigms with study 5 test data", digits = 2)

Percentage Improvement in R2 (compared to PAN) Across Paradigms with study 5 test data
Model	Avg_Improvement
Pan	0.00
Pan+FACE	43.20
Pan+PFC(reg)+AE	37.71
Pan+PFC(net)+AE	36.42
Pan+PFC(rf)+AE	37.30
Pan+PFC(gbm)+AE	37.71
Pan+FACE (Full Study 5)	43.20

Etasq

note that the y-axes for different paradigms are not aligned..

# label paradigm
deviance_explained_df.Default$Paradigm <- "Default"
deviance_explained_df.Default.Study5$Paradigm <- "Default"

deviance_explained_df.Disease$Paradigm <- "Disease"
deviance_explained_df.Disease.Study5$Paradigm <- "Disease"

variance_explained_lm_df.LessMore$Paradigm <- "LessMore"
variance_explained_lm_df.LessMore.Study5$Paradigm <- "LessMore"

variance_explained_lm_df.SunkCost$Paradigm <- "SunkCost"
variance_explained_lm_df.SunkCost.Study5$Paradigm <- "SunkCost"


Model <- c("Pan", "Pan+FACE", "Pan+Pred.FC(reg)+AE", "Pan+Pred.FC(net)+AE", "Pan+Pred.FC(rf)+AE", "Pan+Pred.FC(gbm)+AE")
Model.Study5 <- c("Pan (Full Study5)", "Pan+FACE (Full Study5)")
deviance_explained_df.Default$Model<-Model
deviance_explained_df.Disease$Model<-Model
variance_explained_lm_df.LessMore$Model<-Model
variance_explained_lm_df.SunkCost$Model<-Model

deviance_explained_df.Default.Study5$Model<-Model.Study5
deviance_explained_df.Disease.Study5$Model<-Model.Study5
variance_explained_lm_df.LessMore.Study5$Model<-Model.Study5
variance_explained_lm_df.SunkCost.Study5$Model<-Model.Study5

colnames(deviance_explained_df.Default)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost)[3] <- "Condition:Panel"

colnames(deviance_explained_df.Default.Study5)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost.Study5)[3] <- "Condition:Panel"


etasq <- rbind(deviance_explained_df.Default, 
               deviance_explained_df.Default.Study5,
               deviance_explained_df.Disease, 
               deviance_explained_df.Disease.Study5,
               variance_explained_lm_df.LessMore, 
               variance_explained_lm_df.LessMore.Study5,
               variance_explained_lm_df.SunkCost,
               variance_explained_lm_df.SunkCost.Study5)

etasq$Model <- factor(etasq$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.FC(reg)+AE",  
                                   "Pan+Pred.FC(net)+AE", 
                                   "Pan+Pred.FC(rf)+AE",
                                   "Pan+Pred.FC(gbm)+AE",
                                   "Pan (Full Study5)", 
                                   "Pan+FACE (Full Study5)"),
                        ordered = TRUE)

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$Panel)  # Extract the significance symbols
  df$Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$Panel))
  return(df)
}

etasq.pan<-split_value_and_sig(etasq)

ggplot(etasq.pan, aes(x = Model, y = Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$`Condition:Panel`)  # Extract the significance symbols
  df$Condition_Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$`Condition:Panel`))
  return(df)
}

etasq.pan_cond<-split_value_and_sig(etasq)

ggplot(etasq.pan_cond, aes(x = Model, y = Condition_Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel*cond") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

# junk data??? really unstable;; perhaps to look at this meaningfully, we need 20% of the data to equally come from each of the 3 panels.
 # table(data.full_M5a$Panel)
 #  it looks fine though
 #   Lucid    MTurk Prolific 
 #     100      101       91 


etasq_study5 <- etasq.pan_cond %>%
  filter(Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))

etasq_study5 <- etasq_study5 %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan (Full Study5)"] - Condition_Panel) / Condition_Panel[Model == "Pan (Full Study5)"] * 100)


etasq_study5.test <- etasq.pan_cond %>%
  filter(!Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))
etasq_study5.test <- etasq_study5.test %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan"] - Condition_Panel) / Condition_Panel[Model == "Pan"] * 100)

etasq.pan_cond<-rbind(etasq_study5,etasq_study5.test)

avg_reduction <- etasq.pan_cond %>%
  group_by(Model) %>%
  summarize(Avg_Reduction = mean(Reduction, na.rm = TRUE))

  
kable(avg_reduction, caption = "Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in full study 5 data", digits = 2)

Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in full study 5 data
Model	Avg_Reduction
Pan	0.00
Pan+FACE	74.17
Pan+Pred.FC(reg)+AE	68.40
Pan+Pred.FC(net)+AE	73.31
Pan+Pred.FC(rf)+AE	72.19
Pan+Pred.FC(gbm)+AE	73.91
Pan (Full Study5)	0.00
Pan+FACE (Full Study5)	74.19

F/C*Cond Model Coefficients

DVs for Default & Framing were 1/0
DVs for LessMore & SunkCost are on original scale

Default.PAN_FACE.Study5<-glm(numDefault~F*DefaultCondition+A*DefaultCondition+C*DefaultCondition+E*DefaultCondition+DefaultCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.PAN_FACE.Study5<-glm(numDisease~F*DiseaseCondition+A*DiseaseCondition+C*DiseaseCondition+E*DiseaseCondition+DiseaseCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
LessMore.PAN_FACE.Study5<-lm(numLessMore~F*LessMoreCondition+A*LessMoreCondition+C*LessMoreCondition+E*LessMoreCondition+LessMoreCondition*Panel,Data_M5a.log_z_dummy_coded)
Sunk.PAN_FACE.Study5<-lm(numSunkCost~F*SunkCondition+A*SunkCondition+C*SunkCondition+E*SunkCondition+SunkCondition*Panel,Data_M5a.log_z_dummy_coded)

AlllCoef = as.data.frame(rbind(
cbind(coef(Default.PAN_FACE),confint(Default.PAN_FACE)),
cbind(coef(Default.PAN_Pred_FC.reg_AE),confint(Default.PAN_Pred_FC.reg_AE)),
cbind(coef(Default.PAN_Pred_FC.net_AE),confint(Default.PAN_Pred_FC.net_AE)),
cbind(coef(Default.PAN_Pred_FC.rf_AE),confint(Default.PAN_Pred_FC.rf_AE)),
cbind(coef(Default.PAN_Pred_FC.gbm_AE),confint(Default.PAN_Pred_FC.gbm_AE)),
cbind(coef(Default.PAN_FACE.Study5),confint(Default.PAN_FACE.Study5)),

cbind(coef(Disease.PAN_FACE),confint(Disease.PAN_FACE)),
cbind(coef(Disease.PAN_Pred_FC.reg_AE),confint(Disease.PAN_Pred_FC.reg_AE)),
cbind(coef(Disease.PAN_Pred_FC.net_AE),confint(Disease.PAN_Pred_FC.net_AE)),
cbind(coef(Disease.PAN_Pred_FC.rf_AE),confint(Disease.PAN_Pred_FC.rf_AE)),
cbind(coef(Disease.PAN_Pred_FC.gbm_AE),confint(Disease.PAN_Pred_FC.gbm_AE)),
cbind(coef(Disease.PAN_FACE.Study5),confint(Disease.PAN_FACE.Study5)),

cbind(coef(LessMore.PAN_FACE),confint(LessMore.PAN_FACE)),
cbind(coef(LessMore.PAN_Pred_FC.reg_AE),confint(LessMore.PAN_Pred_FC.reg_AE)),
cbind(coef(LessMore.PAN_Pred_FC.net_AE),confint(LessMore.PAN_Pred_FC.net_AE)),
cbind(coef(LessMore.PAN_Pred_FC.rf_AE),confint(LessMore.PAN_Pred_FC.rf_AE)),
cbind(coef(LessMore.PAN_Pred_FC.gbm_AE),confint(LessMore.PAN_Pred_FC.gbm_AE)),
cbind(coef(LessMore.PAN_FACE.Study5),confint(LessMore.PAN_FACE.Study5)),

cbind(coef(Sunk.PAN_FACE),confint(Sunk.PAN_FACE)),
cbind(coef(Sunk.PAN_Pred_FC.reg_AE),confint(Sunk.PAN_Pred_FC.reg_AE)),
cbind(coef(Sunk.PAN_Pred_FC.net_AE),confint(Sunk.PAN_Pred_FC.net_AE)),
cbind(coef(Sunk.PAN_Pred_FC.rf_AE),confint(Sunk.PAN_Pred_FC.rf_AE)),
cbind(coef(Sunk.PAN_Pred_FC.gbm_AE),confint(Sunk.PAN_Pred_FC.gbm_AE)),
cbind(coef(Sunk.PAN_FACE.Study5),confint(Sunk.PAN_FACE.Study5))))

AlllCoef <- AlllCoef[!grepl("Panel", rownames(AlllCoef)), ]

AlllCoef$type = factor(rep(c("Intercept", " Condition(Treat)","F", "A", "C", "E","F: Condition(Treat)", "A: Condition(Treat)", "C: Condition(Treat)", "E: Condition(Treat)"), 24),levels = c("Intercept", " Condition(Treat)","F", "A", "C", "E","F: Condition(Treat)", "A: Condition(Treat)", "C: Condition(Treat)", "E: Condition(Treat)"))

AlllCoef$Paradigm = factor((rep(c("Default","Framing","Less is Better","Sunk Cost"), each=60)),levels = rev(c("Default","Framing","Less is Better","Sunk Cost")))

AlllCoef$Model = factor(rep(rep(c("FACE","PC(reg) + ACE","PC(net) + ACE","PC(rf) + ACE","PC(gbm) + ACE","FACE (Full Study 5)"), each = 10),4), levels = rev(c("FACE","PC(reg) + ACE","PC(net) + ACE","PC(rf) + ACE","PC(gbm) + ACE","FACE (Full Study 5)")))

colnames(AlllCoef)[1:3] = c("Beta","low", "high")

allcoefplot = ggplot(AlllCoef[(AlllCoef$type %in% c("F: Condition(Treat)","C: Condition(Treat)")),], aes( y = Beta, ymin= low, ymax= high, x = Paradigm, fill = Model, color = Model))+
  geom_bar(stat = "identity", position = "dodge",alpha= .25)+
  geom_pointrange(position = position_dodge(width = .8))+
  coord_flip()+
  xlab("")+
  theme_bw()+
  theme(strip.background = element_blank(), text = element_text(size = 12)) +
  facet_grid(~type)

allcoefplot

3.2 Evaluate Portability with Study 1 Data

3.2.1 Study 1 US Data

data.tst_MP1.US.reg.FC<-cbind(data.tst_MP1.US.reg.C, Predicted_F = M5a_reg_pred_MP1.US)

data.tst_MP1.US.net.FC<-cbind(data.tst_MP1.US.net.C, Predicted_F = M5a_net_pred_MP1.US)
data.tst_MP1.US.rf.FC<-cbind(data.tst_MP1.US.rf.C, Predicted_F = M5a_rf_pred_MP1.US)
data.tst_MP1.US.gbm.FC<-cbind(data.tst_MP1.US.gbm.C, Predicted_F = M5a_gbm_pred_MP1.US)

Correlations Btw PF1, PC1, and Study 1 FACE

Linear Regression

Predicted F1 and Predicted C1 highly correlated

ggpairs(data.tst_MP1.US.reg.FC, c("Predicted_F","F","A","C", "E","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Elastic Net

ggpairs(data.tst_MP1.US.net.FC, c("Predicted_F","F","A","C", "E","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Random Forest

ggpairs(data.tst_MP1.US.rf.FC, c("Predicted_F","F","A","C", "E","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Gradient Boosting

ggpairs(data.tst_MP1.US.gbm.FC, c("Predicted_F","F","A","C", "E","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

GBM: Correlation between PF and PC and Study 1 F composite measures

F_measure.Study1<-F_measure.Study1%>%
  filter(!Panel%in%c("Students","Prolific_UK_Rep"))
data.tst_MP1.US.gbm.FC$BNT<-F_measure.Study1$BNT
data.tst_MP1.US.gbm.FC$CRTScore<-F_measure.Study1$CRTScore

ggpairs(data.tst_MP1.US.gbm.FC, c("Predicted_F","BNT","CRTScore","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Explain Heterogeniety with PF1 and PC1

Default Paradigm

Default.PAN <- glm(numDefault ~ DefaultCondition * Panel, data = data.tst_MP1.US.gbm.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_FACE <- glm(numDefault ~ DefaultCondition * F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel , data = data.tst_MP1.US.gbm.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_FC.reg_AE <- glm(numDefault ~ DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_MP1.US.reg.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_FC.net_AE <- glm(numDefault ~  DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_MP1.US.net.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_FC.rf_AE <- glm(numDefault ~   DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_MP1.US.rf.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_FC.gbm_AE <- glm(numDefault ~   DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_MP1.US.gbm.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)


Default_model <- list(
  "Default.PAN" = Default.PAN,
  "Default.PAN_FACE" = Default.PAN_FACE,
  "Default.PAN_Pred_FC.reg_AE" = Default.PAN_Pred_FC.reg_AE,
  "Default.PAN_Pred_FC.net_AE" = Default.PAN_Pred_FC.net_AE,
  "Default.PAN_Pred_FC.rf_AE" = Default.PAN_Pred_FC.rf_AE,
  "Default.PAN_Pred_FC.gbm_AE" = Default.PAN_Pred_FC.gbm_AE
)

Model Results

term_names <- names(coef(Default.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Default_result<-tab_model(Default.PAN, Default.PAN_FACE, Default.PAN_Pred_FC.reg_AE,Default.PAN_Pred_FC.net_AE,Default.PAN_Pred_FC.rf_AE,Default.PAN_Pred_FC.gbm_AE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.FC(reg)_AE", 
                        "PAN_Pred.FC(net)_AE", 
                        "PAN_Pred.FC(rf)_AE", 
                        "PAN_Pred.FC(gbm)_AE"), title = "Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Default_result

Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.FC(reg)_AE				PAN_Pred.FC(net)_AE				PAN_Pred.FC(rf)_AE				PAN_Pred.FC(gbm)_AE
Predictors	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p
(Intercept)	0.88	0.11	-1.06	0.291	0.87	0.11	-1.14	0.253	0.85	0.11	-1.21	0.226	0.84	0.11	-1.26	0.208	0.81	0.11	-1.57	0.117	0.89	0.12	-0.85	0.397
DefaultCondition [OPTOUT]	3.10	0.56	6.31	<0.001	3.22	0.59	6.42	<0.001	3.01	0.56	5.88	<0.001	3.01	0.58	5.74	<0.001	3.27	0.61	6.32	<0.001	2.96	0.56	5.77	<0.001
F					1.57	0.16	4.48	<0.001
A					1.41	0.17	2.87	0.004	1.12	0.15	0.91	0.365	1.10	0.14	0.73	0.464	1.01	0.14	0.09	0.932	1.13	0.14	0.95	0.341
C					1.02	0.05	0.39	0.698
E					0.86	0.07	-1.93	0.054	0.88	0.07	-1.71	0.088	0.88	0.07	-1.68	0.092	0.88	0.07	-1.61	0.107	0.87	0.07	-1.73	0.084
DefaultCondition [OPTOUT] × F					1.03	0.16	0.17	0.867
DefaultCondition [OPTOUT] × A					0.44	0.08	-4.53	<0.001	0.53	0.10	-3.29	0.001	0.54	0.10	-3.17	0.002	0.53	0.10	-3.26	0.001	0.50	0.09	-3.68	<0.001
DefaultCondition [OPTOUT] × C					1.18	0.10	1.98	0.048
DefaultCondition [OPTOUT] × E					1.23	0.14	1.79	0.073	1.18	0.14	1.44	0.151	1.18	0.14	1.45	0.148	1.21	0.14	1.62	0.106	1.23	0.14	1.77	0.076
Predicted F									1.31	0.15	2.34	0.019	1.30	0.16	2.05	0.040	1.18	0.13	1.48	0.138	1.16	0.14	1.26	0.208
Predicted C									1.34	0.23	1.72	0.086	1.43	0.29	1.78	0.076	2.11	0.48	3.30	0.001	1.76	0.31	3.17	0.002
DefaultCondition [OPTOUT] × Predicted F									1.09	0.18	0.49	0.624	1.07	0.20	0.35	0.727	0.99	0.16	-0.08	0.938	1.20	0.21	1.07	0.286
DefaultCondition [OPTOUT] × Predicted C									0.58	0.14	-2.18	0.029	0.57	0.17	-1.88	0.060	0.65	0.21	-1.30	0.193	0.55	0.14	-2.29	0.022
Observations	3541				3541				3541				3541				3541				3541
AIC	4466.919				4382.086				4391.381				4393.132				4386.772				4381.301
log-Likelihood	-2215.460				-2165.043				-2169.691				-2170.566				-2167.386				-2164.650

Anova and Eta Square

calculate_deviance_explained.Default <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for Panel and DefaultCondition:Panel interaction
  dev_cond <- anova_res["DefaultCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DefaultCondition:Panel", "Deviance"]
  
  p_cond <- anova_res["DefaultCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DefaultCondition:Panel", "Pr(>Chi)"]
  
sig_cond <- ifelse(p_cond < 0.001, "***", 
             ifelse(p_cond < 0.01, "**", 
             ifelse(p_cond < 0.05, "*", 
             ifelse(p_cond < 0.1, ".", ""))))

sig_panel <- ifelse(p_panel < 0.001, "***", 
              ifelse(p_panel < 0.01, "**", 
              ifelse(p_panel < 0.05, "*", 
              ifelse(p_panel < 0.1, ".", ""))))

sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                   ifelse(p_interaction < 0.01, "**", 
                   ifelse(p_interaction < 0.05, "*", 
                   ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total) 
  perc_panel <- (dev_panel / dev_total) 
  perc_interaction <- (dev_interaction / dev_total) 
  
  res_cond <- paste0(round(perc_cond , 5),  sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}

# apply the fucntion to all the models in default paradigm
deviance_explained <- sapply(Default_model, calculate_deviance_explained.Default)

# Convert to a data frame for easy viewing
deviance_explained_df.Default <- as.data.frame(t(deviance_explained)) # view how this looks like
colnames(deviance_explained_df.Default) <- c("Condition","Panel", "Default Condition:Panel")

# display
kable(deviance_explained_df.Default,caption = "Etasq Table: Default Paradigm")

Etasq Table: Default Paradigm
	Condition	Panel	Default Condition:Panel
Default.PAN	0.05063***	0.00612***	0.00751***
Default.PAN_FACE	0.05063***	0.00456**	0.0031.
Default.PAN_Pred_FC.reg_AE	0.05063***	0.00378*	0.00195
Default.PAN_Pred_FC.net_AE	0.05063***	0.00388*	0.00196
Default.PAN_Pred_FC.rf_AE	0.05063***	0.00351*	0.00263
Default.PAN_Pred_FC.gbm_AE	0.05063***	0.00353*	0.00277

Framing (Unusual Disease) Paradigm

Disease.PAN <- glm(numDisease ~ DiseaseCondition * Panel, data = data.tst_MP1.US.gbm.FC, family = binomial)
Disease.PAN_FACE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E+DiseaseCondition * Panel , data = data.tst_MP1.US.gbm.FC, family = binomial)

Disease.PAN_Pred_FC.reg_AE <- glm(numDisease ~  DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_MP1.US.reg.FC, family = binomial)

Disease.PAN_Pred_FC.net_AE <- glm(numDisease ~   DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_MP1.US.net.FC, family = binomial)

Disease.PAN_Pred_FC.rf_AE <- glm(numDisease ~   DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_MP1.US.rf.FC, family = binomial)

Disease.PAN_Pred_FC.gbm_AE <- glm(numDisease ~  DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel , data = data.tst_MP1.US.gbm.FC, family = binomial)


Disease_model <- list(
  "Disease.PAN" = Disease.PAN,
  "Disease.PAN_FACE" = Disease.PAN_FACE,
  "Disease.PAN_Pred_FC.reg_AE" = Disease.PAN_Pred_FC.reg_AE,
  "Disease.PAN_Pred_FC.net_AE" = Disease.PAN_Pred_FC.net_AE,
  "Disease.PAN_Pred_FC.rf_AE" = Disease.PAN_Pred_FC.rf_AE, 
  "Disease.PAN_Pred_FC.gbm_AE" = Disease.PAN_Pred_FC.gbm_AE
)

# extract_aic(Disease_model)

Model Results

term_names <- names(coef(Disease.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Disease_result<-tab_model(Disease.PAN, Disease.PAN_FACE, Disease.PAN_Pred_FC.reg_AE,Disease.PAN_Pred_FC.net_AE,Disease.PAN_Pred_FC.rf_AE,Disease.PAN_Pred_FC.gbm_AE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.FC(reg)_AE", 
                        "PAN_Pred.FC(net)_AE", 
                        "PAN_Pred.FC(rf)_AE", 
                        "PAN_Pred.FC(gbm)_AE"), title = "Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Disease_result

Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.FC(reg)_AE				PAN_Pred.FC(net)_AE				PAN_Pred.FC(rf)_AE				PAN_Pred.FC(gbm)_AE
Predictors	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p	Odds Ratios	std. Error	Statistic	p
(Intercept)	0.49	0.05	-6.86	<0.001	0.47	0.05	-7.15	<0.001	0.47	0.05	-6.95	<0.001	0.46	0.05	-6.91	<0.001	0.50	0.05	-6.41	<0.001	0.46	0.05	-7.00	<0.001
DiseaseCondition [LOSS]	2.72	0.39	6.97	<0.001	2.91	0.43	7.29	<0.001	2.83	0.43	6.88	<0.001	2.89	0.45	6.85	<0.001	2.62	0.39	6.39	<0.001	2.92	0.44	7.08	<0.001
F					1.30	0.12	2.82	0.005
A					1.66	0.18	4.58	<0.001	1.96	0.23	5.85	<0.001	2.02	0.24	5.99	<0.001	1.90	0.23	5.34	<0.001	1.89	0.21	5.62	<0.001
C					0.79	0.04	-4.89	<0.001
E					0.96	0.06	-0.67	0.501	0.88	0.06	-1.86	0.062	0.88	0.06	-1.93	0.053	0.88	0.06	-1.83	0.068	0.89	0.06	-1.75	0.080
DiseaseCondition [LOSS] × F					0.87	0.11	-1.14	0.254
DiseaseCondition [LOSS] × A					1.01	0.15	0.07	0.947	0.80	0.13	-1.41	0.159	0.76	0.12	-1.76	0.078	0.78	0.13	-1.56	0.118	0.81	0.12	-1.37	0.170
DiseaseCondition [LOSS] × C					1.17	0.08	2.39	0.017
DiseaseCondition [LOSS] × E					0.79	0.07	-2.58	0.010	0.87	0.08	-1.55	0.120	0.88	0.08	-1.40	0.163	0.88	0.08	-1.38	0.166	0.87	0.08	-1.46	0.145
Predicted F									0.99	0.10	-0.14	0.890	1.01	0.11	0.10	0.916	0.85	0.08	-1.64	0.101	0.99	0.10	-0.09	0.930
Predicted C									0.73	0.11	-2.12	0.034	0.66	0.12	-2.35	0.019	0.96	0.19	-0.19	0.849	0.75	0.12	-1.79	0.073
DiseaseCondition [LOSS] × Predicted F									1.11	0.15	0.79	0.432	1.07	0.16	0.44	0.659	1.27	0.17	1.77	0.077	1.07	0.15	0.48	0.633
DiseaseCondition [LOSS] × Predicted C									1.47	0.29	1.90	0.057	1.79	0.43	2.44	0.015	1.38	0.37	1.20	0.229	1.72	0.38	2.50	0.012
Observations	5336				5336				5336				5336				5336				5336
AIC	6723.912				6573.688				6589.687				6585.017				6589.809				6586.068
log-Likelihood	-3343.956				-3260.844				-3268.843				-3266.509				-3268.905				-3267.034

Anova and Eta Square

calculate_deviance_explained.Disease <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for DiseaseCondition, Panel, and DiseaseCondition:Panel interaction
  dev_cond <- anova_res["DiseaseCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DiseaseCondition:Panel", "Deviance"]
  
  # Extract p-values
  p_cond <- anova_res["DiseaseCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DiseaseCondition:Panel", "Pr(>Chi)"]
  
  # Determine significance levels
  sig_cond <- ifelse(p_cond < 0.001, "***", 
               ifelse(p_cond < 0.01, "**", 
               ifelse(p_cond < 0.05, "*", 
               ifelse(p_cond < 0.1, ".", ""))))

  sig_panel <- ifelse(p_panel < 0.001, "***", 
                ifelse(p_panel < 0.01, "**", 
                ifelse(p_panel < 0.05, "*", 
                ifelse(p_panel < 0.1, ".", ""))))

  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total)
  perc_panel <- (dev_panel / dev_total)
  perc_interaction <- (dev_interaction / dev_total)
  
  # Combine percentage and significance
  res_cond <- paste0(round(perc_cond, 5), sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}


# Apply the function to each model
deviance_explained <- sapply(Disease_model, calculate_deviance_explained.Disease)

# Convert to a data frame for easy viewing
deviance_explained_df.Disease <- as.data.frame(t(deviance_explained))
colnames(deviance_explained_df.Disease) <- c("Condition","Panel", "DiseaseCondition:Panel")

# Display the results
kable(deviance_explained_df.Disease,caption = "Etasq Table: Disease Paradigm")

Etasq Table: Disease Paradigm
	Condition	Panel	DiseaseCondition:Panel
Disease.PAN	0.05826***	0.01261***	0.0093***
Disease.PAN_FACE	0.05826***	0.00366***	0.00557***
Disease.PAN_Pred_FC.reg_AE	0.05826***	0.0033**	0.0036***
Disease.PAN_Pred_FC.net_AE	0.05826***	0.00329**	0.00343**
Disease.PAN_Pred_FC.rf_AE	0.05826***	0.00304**	0.00349**
Disease.PAN_Pred_FC.gbm_AE	0.05826***	0.00316**	0.00334**

Less is More Paradigm

LessMore.PAN <- lm(numLessMore ~ LessMoreCondition * Panel, data = data.tst_MP1.US.gbm.FC)
LessMore.PAN_FACE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_MP1.US.gbm.FC)
LessMore.PAN_Pred_FC.reg_AE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_MP1.US.reg.FC)
LessMore.PAN_Pred_FC.net_AE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.tst_MP1.US.net.FC)
LessMore.PAN_Pred_FC.rf_AE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_MP1.US.rf.FC)
LessMore.PAN_Pred_FC.gbm_AE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.tst_MP1.US.gbm.FC)


LessMore_model <- list(
  "LessMore.PAN" = LessMore.PAN,
  "LessMore.PAN_FACE" = LessMore.PAN_FACE,
  "LessMore.PAN_Pred_FC.reg_AE" = LessMore.PAN_Pred_FC.reg_AE,
  "LessMore.PAN_Pred_FC.net_AE" = LessMore.PAN_Pred_FC.net_AE,
  "LessMore.PAN_Pred_FC.rf_AE" = LessMore.PAN_Pred_FC.rf_AE, 
  "LessMore.PAN_Pred_FC.gbm_AE" = LessMore.PAN_Pred_FC.gbm_AE
)

Model Results

term_names <- names(coef(LessMore.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

LessMore_result<-tab_model(LessMore.PAN, LessMore.PAN_FACE, LessMore.PAN_Pred_FC.reg_AE,LessMore.PAN_Pred_FC.net_AE,LessMore.PAN_Pred_FC.rf_AE,LessMore.PAN_Pred_FC.gbm_AE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.FC(reg)_ACE", 
                        "PAN_Pred.FC(net)_ACE", 
                        "PAN_Pred.FC(rf)_ACE", 
                        "PAN_Pred.FC(gbm)_ACE"), title = "LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
LessMore_result

LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred.FC(reg)_ACE				PAN_Pred.FC(net)_ACE				PAN_Pred.FC(rf)_ACE				PAN_Pred.FC(gbm)_ACE
Predictors	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p
(Intercept)	5.37	0.07	79.64	<0.001	5.36	0.07	81.93	<0.001	5.38	0.07	80.03	<0.001	5.38	0.07	78.08	<0.001	5.39	0.07	80.82	<0.001	5.37	0.07	80.00	<0.001
LessMoreCondition [SCARF]	0.95	0.09	10.04	<0.001	0.99	0.09	10.82	<0.001	0.95	0.09	10.08	<0.001	0.95	0.10	9.80	<0.001	0.93	0.09	9.93	<0.001	0.95	0.09	10.11	<0.001
F					-0.11	0.05	-1.99	0.046
A					0.38	0.06	6.14	<0.001	0.29	0.07	4.51	<0.001	0.28	0.07	4.21	<0.001	0.18	0.07	2.73	0.006	0.32	0.06	4.93	<0.001
C					0.02	0.03	0.58	0.562
E					-0.00	0.04	-0.09	0.930	0.04	0.04	0.90	0.368	0.04	0.04	1.03	0.301	0.05	0.04	1.16	0.247	0.04	0.04	0.96	0.340
LessMoreCondition [SCARF] × F					0.28	0.08	3.67	<0.001
LessMoreCondition [SCARF] × A					0.45	0.09	5.04	<0.001	0.41	0.09	4.43	<0.001	0.40	0.09	4.27	<0.001	0.42	0.10	4.35	<0.001	0.40	0.09	4.36	<0.001
LessMoreCondition [SCARF] × C					-0.01	0.04	-0.14	0.886
LessMoreCondition [SCARF] × E					-0.12	0.06	-2.24	0.025	-0.14	0.06	-2.59	0.010	-0.14	0.06	-2.56	0.010	-0.14	0.06	-2.55	0.011	-0.14	0.06	-2.50	0.012
Predicted F									-0.02	0.06	-0.38	0.706	-0.01	0.07	-0.21	0.836	-0.19	0.06	-3.26	0.001	0.01	0.06	0.16	0.875
Predicted C									0.22	0.09	2.51	0.012	0.25	0.10	2.43	0.015	0.79	0.11	6.92	<0.001	0.18	0.09	1.95	0.052
LessMoreCondition [SCARF] × Predicted F									0.17	0.08	2.04	0.041	0.16	0.09	1.68	0.092	0.28	0.08	3.45	0.001	0.19	0.09	2.14	0.033
LessMoreCondition [SCARF] × Predicted C									-0.09	0.12	-0.78	0.438	-0.05	0.15	-0.34	0.733	-0.32	0.16	-1.96	0.050	-0.08	0.13	-0.58	0.560
Observations	5336				5336				5336				5336				5336				5336
R² / R² adjusted	0.173 / 0.170				0.224 / 0.220				0.228 / 0.225				0.230 / 0.226				0.238 / 0.234				0.229 / 0.226

Anova and Eta Square

calculate_variance_explained_LessMore <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for LessMoreCondition, Panel, and LessMoreCondition:Panel interaction
  ss_condition <- anova_res["LessMoreCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["LessMoreCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["LessMoreCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["LessMoreCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- ss_condition / ss_total
  perc_panel <- ss_panel / ss_total
  perc_interaction <- ss_interaction / ss_total
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}

variance_explained_lm <- sapply(LessMore_model, calculate_variance_explained_LessMore)

# Convert to a data frame for easy viewing
variance_explained_lm_df.LessMore <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.LessMore,caption = "Etasq Table: LessMore Paradigm")

Etasq Table: LessMore Paradigm
	Condition	Panel	LessMoreCondition:Panel
LessMore.PAN	0.12958***	0.03699***	0.00642***
LessMore.PAN_FACE	0.12958***	0.01349***	0.00148
LessMore.PAN_Pred_FC.reg_AE	0.12958***	0.01225***	0.0016
LessMore.PAN_Pred_FC.net_AE	0.12958***	0.01226***	0.0016
LessMore.PAN_Pred_FC.rf_AE	0.12958***	0.00993***	0.00168
LessMore.PAN_Pred_FC.gbm_AE	0.12958***	0.01152***	0.00155

Sunk Cost Paradigm

Sunk.PAN <- lm(numSunkCost~ SunkCondition * Panel, data = data.tst_MP1.US.gbm.FC)
Sunk.PAN_FACE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_MP1.US.gbm.FC)
Sunk.PAN_Pred_FC.reg_AE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E+SunkCondition * Panel, data = data.tst_MP1.US.reg.FC)
Sunk.PAN_Pred_FC.net_AE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_MP1.US.net.FC)
Sunk.PAN_Pred_FC.rf_AE <- lm(numSunkCost ~ SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E  + SunkCondition * Panel, data = data.tst_MP1.US.rf.FC)
Sunk.PAN_Pred_FC.gbm_AE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_MP1.US.gbm.FC)


Sunk_model <- list(
  "Sunk.PAN" = Sunk.PAN,
  "Sunk.PAN_FACE" = Sunk.PAN_FACE,
  "Sunk.PAN_Pred_FC.reg_ACE" = Sunk.PAN_Pred_FC.reg_AE,
  "Sunk.PAN_Pred_FC.net_ACE" = Sunk.PAN_Pred_FC.net_AE,
  "Sunk.PAN_Pred_FC.rf_ACE" = Sunk.PAN_Pred_FC.rf_AE, #0.302 with unscaled
  "Sunk.PAN_Pred_FC.gbm_ACE" = Sunk.PAN_Pred_FC.gbm_AE
)

# extract_adjusted_r_squared(Sunk_model)

Full Model Results

term_names <- names(coef(Sunk.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Sunk_result<-tab_model(Sunk.PAN, Sunk.PAN_FACE, Sunk.PAN_Pred_FC.reg_AE,Sunk.PAN_Pred_FC.net_AE,Sunk.PAN_Pred_FC.rf_AE,Sunk.PAN_Pred_FC.gbm_AE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred_FC(reg)_AE", 
                        "PAN_Pred_FC(net)_AE", 
                        "PAN_Pred_FC(rf)_AE", 
                        "PAN_Pred_FC(gbm)_AE"), title = "Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Sunk_result

Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
	PAN				PAN_FACE				PAN_Pred_FC(reg)_AE				PAN_Pred_FC(net)_AE				PAN_Pred_FC(rf)_AE				PAN_Pred_FC(gbm)_AE
Predictors	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p	Estimates	std. Error	Statistic	p
(Intercept)	5.46	0.14	39.29	<0.001	5.46	0.14	39.54	<0.001	5.41	0.14	37.90	<0.001	5.40	0.15	36.89	<0.001	5.40	0.14	37.96	<0.001	5.32	0.14	37.35	<0.001
SunkCondition [PAID]	0.70	0.20	3.48	0.001	0.70	0.20	3.52	<0.001	0.62	0.20	3.01	0.003	0.60	0.21	2.87	0.004	0.66	0.20	3.21	0.001	0.74	0.20	3.62	<0.001
F					0.32	0.11	2.82	0.005
A					0.59	0.14	4.33	<0.001	0.51	0.14	3.54	<0.001	0.50	0.15	3.41	0.001	0.56	0.15	3.79	<0.001	0.56	0.14	3.99	<0.001
C					0.10	0.06	1.71	0.087
E					0.20	0.08	2.32	0.021	0.19	0.08	2.26	0.024	0.20	0.09	2.34	0.020	0.18	0.09	2.12	0.034	0.21	0.09	2.46	0.014
SunkCondition [PAID] × F					0.18	0.16	1.10	0.270
SunkCondition [PAID] × A					0.02	0.19	0.11	0.914	-0.07	0.20	-0.32	0.745	-0.06	0.21	-0.31	0.755	-0.31	0.21	-1.49	0.137	-0.15	0.20	-0.78	0.436
SunkCondition [PAID] × C					-0.02	0.08	-0.18	0.854
SunkCondition [PAID] × E					-0.04	0.12	-0.31	0.753	-0.01	0.12	-0.12	0.907	-0.02	0.12	-0.14	0.890	0.01	0.12	0.12	0.902	-0.02	0.12	-0.15	0.881
Predicted F									0.21	0.13	1.62	0.105	0.23	0.14	1.65	0.100	0.21	0.13	1.63	0.103	0.53	0.13	3.97	<0.001
Predicted C									-0.02	0.19	-0.12	0.901	-0.02	0.22	-0.10	0.924	-0.20	0.25	-0.78	0.433	-0.61	0.20	-3.00	0.003
SunkCondition [PAID] × Predicted F									0.40	0.18	2.19	0.029	0.38	0.20	1.87	0.062	0.07	0.18	0.41	0.679	0.01	0.19	0.07	0.944
SunkCondition [PAID] × Predicted C									-0.21	0.27	-0.80	0.422	-0.19	0.32	-0.59	0.553	0.82	0.35	2.32	0.021	0.57	0.29	1.98	0.048
Observations	5336				5336				5336				5336				5336				5336
R² / R² adjusted	0.025 / 0.022				0.040 / 0.035				0.040 / 0.035				0.040 / 0.035				0.040 / 0.035				0.042 / 0.037

Anova and Eta Square

calculate_variance_explained_Sunk <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for SunkCondition, Panel, and SunkCondition:Panel interaction
  ss_condition <- anova_res["SunkCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["SunkCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["SunkCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["SunkCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- (ss_condition / ss_total)
  perc_panel <- (ss_panel / ss_total)
  perc_interaction <- (ss_interaction / ss_total)
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}


variance_explained_lm <- sapply(Sunk_model, calculate_variance_explained_Sunk)

# Convert to a data frame for easy viewing
variance_explained_lm_df.SunkCost <- as.data.frame(t(variance_explained_lm))

colnames(variance_explained_lm_df.SunkCost) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.SunkCost,caption = "Etasq Table: SunkCost Paradigm")

Etasq Table: SunkCost Paradigm
	Condition	Panel	LessMoreCondition:Panel
Sunk.PAN	0.01083***	0.00971***	0.00447**
Sunk.PAN_FACE	0.01083***	0.00563***	0.00226
Sunk.PAN_Pred_FC.reg_ACE	0.01083***	0.0074***	0.00205
Sunk.PAN_Pred_FC.net_ACE	0.01083***	0.00753***	0.00209
Sunk.PAN_Pred_FC.rf_ACE	0.01083***	0.0085***	0.00166
Sunk.PAN_Pred_FC.gbm_ACE	0.01083***	0.00604***	0.00178

Summary

Model Fit

\(R^2\): \(R^2\) for linear regressions, and \(1 − \frac{\log L(\text{model})}{\log L(\text{null model})}\) for logistic regressions (as in the paper)
Model order: consistently (1) PAN only; (2) PAN + FACE; (3) PAN + Pred_C(reg)ACE; (4) PAN + Pred_C(net)ACE; (5) PAN + Pred_C(rf)ACE; (6) PAN + Pred_C(gbm)ACE
note that the y-axes for different paradigms are not aligned.

# Need a plot here for the AIC of the different models
#following antonia's analysis here...
Default.null<-glm(numDefault~1,data.tst_MP1.US.gbm.FC,family=binomial)
R2.Default = as.data.frame(cbind( "R2" = c((1-logLik(Default.PAN)/logLik(Default.null)),
                    (1-logLik(Default.PAN_FACE)/logLik(Default.null)),  
                    (1-logLik(Default.PAN_Pred_FC.reg_AE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_FC.net_AE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_FC.rf_AE)/logLik(Default.null)),
                    (1-logLik(Default.PAN_Pred_FC.gbm_AE)/logLik(Default.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "Default"))


Disease.null<-glm(numDisease~1,data.tst_MP1.US.gbm.FC,family=binomial)
R2.Disease = as.data.frame(cbind( "R2" = c((1-logLik(Disease.PAN)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_FACE)/logLik(Disease.null)),  
                    (1-logLik(Disease.PAN_Pred_FC.reg_AE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_FC.net_AE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_FC.rf_AE)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_Pred_FC.gbm_AE)/logLik(Disease.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "Disease"))

R2.LessMore = as.data.frame(cbind( "R2" = c(summary(LessMore.PAN)$r.squared,
                     summary(LessMore.PAN_FACE)$r.squared,  
                     summary(LessMore.PAN_Pred_FC.reg_AE)$r.squared,  
                     summary(LessMore.PAN_Pred_FC.net_AE)$r.squared, 
                     summary(LessMore.PAN_Pred_FC.rf_AE)$r.squared,
                     summary(LessMore.PAN_Pred_FC.gbm_AE)$r.squared),
                      "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "LessMore"))

R2.Sunk = as.data.frame(cbind( "R2" = c(summary(Sunk.PAN)$r.squared,
                     summary(Sunk.PAN_FACE)$r.squared,  
                     summary(Sunk.PAN_Pred_FC.reg_AE)$r.squared,  
                     summary(Sunk.PAN_Pred_FC.net_AE)$r.squared, 
                     summary(Sunk.PAN_Pred_FC.rf_AE)$r.squared,
                     summary(Sunk.PAN_Pred_FC.gbm_AE)$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "SunkCost "))

R2<-rbind(R2.Default,R2.Disease,R2.LessMore,R2.Sunk)%>%
  mutate(R2=round(as.numeric(R2),5))

R2$Model <- factor(R2$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+PFC(reg)+AE",  
                                   "Pan+PFC(net)+AE", 
                                   "Pan+PFC(rf)+AE",
                                   "Pan+PFC(gbm)+AE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(R2, aes(x = Model, y = R2, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(R^2)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

AIC.Default = as.data.frame(cbind( "AIC" = c(AIC(Default.PAN),
                    AIC(Default.PAN_FACE),  
                    AIC(Default.PAN_Pred_FC.reg_AE), 
                    AIC(Default.PAN_Pred_FC.net_AE), 
                    AIC(Default.PAN_Pred_FC.rf_AE),
                    AIC(Default.PAN_Pred_FC.gbm_AE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "Default"))



AIC.Disease = as.data.frame(cbind( "AIC" = c(AIC(Disease.PAN),
                    AIC(Disease.PAN_FACE),  
                    AIC(Disease.PAN_Pred_FC.reg_AE), 
                    AIC(Disease.PAN_Pred_FC.net_AE), 
                    AIC(Disease.PAN_Pred_FC.rf_AE),
                    AIC(Disease.PAN_Pred_FC.gbm_AE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "Disease"))

AIC<-rbind(AIC.Default,AIC.Disease)%>%
  mutate(AIC=round(as.numeric(AIC),5))

AIC$Model <- factor(AIC$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+PFC(reg)+AE",  
                                   "Pan+PFC(net)+AE", 
                                   "Pan+PFC(rf)+AE",
                                   "Pan+PFC(gbm)+AE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(AIC, aes(x = Model, y = AIC, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  geom_text(aes(label = round(AIC,2)), vjust = -0.5, size = 3, fontface = "bold") + 
  theme_bw() +
  ylab(bquote(AIC)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")+
  coord_cartesian(ylim = c(4000, 7000))

R2$R2 <- round(as.numeric(R2$R2), 3)

R2 <- R2 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement <- R2 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))


avg_improvemnt_combined <- rbind(avg_improvement, avg_improvement_full_study_5)

kable(avg_improvemnt_combined, caption = "Percentage Improvement in R2 (compared to PAN) Across Paradigms with Study 1 data", digits = 2)

Percentage Improvement in R2 (compared to PAN) Across Paradigms with Study 1 data
Model	Avg_Improvement
Pan	0.00
Pan+FACE	30.51
Pan+PFC(reg)+AE	30.39
Pan+PFC(net)+AE	30.68
Pan+PFC(rf)+AE	31.90
Pan+PFC(gbm)+AE	32.67
Pan+FACE (Full Study 5)	43.20

Etasq

note that the y-axes for different paradigms are not aligned..
We have observed a slight improvement in explaining heterogeneity across all methods, compared to what we initially had. I think this is due to our predictors being more refined: For example, we used longitude and latitude info to swap “minutes since midnight (EST)” to “clock time.

# label paradigm
deviance_explained_df.Default$Paradigm <- "Default"
deviance_explained_df.Default.Study5$Paradigm <- "Default"

deviance_explained_df.Disease$Paradigm <- "Disease"
deviance_explained_df.Disease.Study5$Paradigm <- "Disease"

variance_explained_lm_df.LessMore$Paradigm <- "LessMore"
variance_explained_lm_df.LessMore.Study5$Paradigm <- "LessMore"

variance_explained_lm_df.SunkCost$Paradigm <- "SunkCost"
variance_explained_lm_df.SunkCost.Study5$Paradigm <- "SunkCost"


Model <- c("Pan", "Pan+FACE", "Pan+Pred.FC(reg)+AE", "Pan+Pred.FC(net)+AE", "Pan+Pred.FC(rf)+AE", "Pan+Pred.FC(gbm)+AE")
Model.Study5 <- c("Pan (Full Study5)", "Pan+FACE (Full Study5)")
deviance_explained_df.Default$Model<-Model
deviance_explained_df.Disease$Model<-Model
variance_explained_lm_df.LessMore$Model<-Model
variance_explained_lm_df.SunkCost$Model<-Model

deviance_explained_df.Default.Study5$Model<-Model.Study5
deviance_explained_df.Disease.Study5$Model<-Model.Study5
variance_explained_lm_df.LessMore.Study5$Model<-Model.Study5
variance_explained_lm_df.SunkCost.Study5$Model<-Model.Study5

colnames(deviance_explained_df.Default)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost)[3] <- "Condition:Panel"

colnames(deviance_explained_df.Default.Study5)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost.Study5)[3] <- "Condition:Panel"


etasq <- rbind(deviance_explained_df.Default, 
               deviance_explained_df.Default.Study5,
               deviance_explained_df.Disease, 
               deviance_explained_df.Disease.Study5,
               variance_explained_lm_df.LessMore, 
               variance_explained_lm_df.LessMore.Study5,
               variance_explained_lm_df.SunkCost,
               variance_explained_lm_df.SunkCost.Study5)

etasq$Model <- factor(etasq$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.FC(reg)+AE",  
                                   "Pan+Pred.FC(net)+AE", 
                                   "Pan+Pred.FC(rf)+AE",
                                   "Pan+Pred.FC(gbm)+AE",
                                   "Pan (Full Study5)", 
                                   "Pan+FACE (Full Study5)"),
                        ordered = TRUE)

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$Panel)  # Extract the significance symbols
  df$Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$Panel))
  return(df)
}

etasq.pan<-split_value_and_sig(etasq)

ggplot(etasq.pan, aes(x = Model, y = Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$`Condition:Panel`)  # Extract the significance symbols
  df$Condition_Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$`Condition:Panel`))
  return(df)
}

etasq.pan_cond<-split_value_and_sig(etasq)

ggplot(etasq.pan_cond, aes(x = Model, y = Condition_Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel*cond") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

# junk data??? really unstable;; perhaps to look at this meaningfully, we need 20% of the data to equally come from each of the 3 panels.
 # table(data.tst_M5a$Panel)
 #  it looks fine though
 #   Lucid    MTurk Prolific 
 #     100      101       91 


etasq_study5 <- etasq.pan_cond %>%
  filter(Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))

etasq_study5 <- etasq_study5 %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan (Full Study5)"] - Condition_Panel) / Condition_Panel[Model == "Pan (Full Study5)"] * 100)


etasq_study5.test <- etasq.pan_cond %>%
  filter(!Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))
etasq_study5.test <- etasq_study5.test %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan"] - Condition_Panel) / Condition_Panel[Model == "Pan"] * 100)

etasq.pan_cond<-rbind(etasq_study5,etasq_study5.test)

avg_reduction <- etasq.pan_cond %>%
  group_by(Model) %>%
  summarize(Avg_Reduction = mean(Reduction, na.rm = TRUE))

  
kable(avg_reduction, caption = "Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in study 1", digits = 2)

Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in study 1
Model	Avg_Reduction
Pan	0.00
Pan+FACE	56.30
Pan+Pred.FC(reg)+AE	66.14
Pan+Pred.FC(net)+AE	66.34
Pan+Pred.FC(rf)+AE	66.04
Pan+Pred.FC(gbm)+AE	65.81
Pan (Full Study5)	0.00
Pan+FACE (Full Study5)	74.19

# label paradigm
deviance_explained_df.Default$Paradigm <- "Default"
deviance_explained_df.Disease$Paradigm <- "Disease"
variance_explained_lm_df.LessMore$Paradigm <- "LessMore"
variance_explained_lm_df.SunkCost$Paradigm <- "SunkCost"

Model <- c("Pan", "Pan+FACE", "Pan+Pred.FC(reg)+AE", "Pan+Pred.FC(net)+AE", "Pan+Pred.FC(rf)+AE", "Pan+Pred.FC(gbm)+AE")
deviance_explained_df.Default$Model<-Model
deviance_explained_df.Disease$Model<-Model
variance_explained_lm_df.LessMore$Model<-Model
variance_explained_lm_df.SunkCost$Model<-Model

colnames(deviance_explained_df.Default)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost)[3] <- "Condition:Panel"

etasq <- rbind(deviance_explained_df.Default, 
                     deviance_explained_df.Disease, 
                     variance_explained_lm_df.LessMore, 
                     variance_explained_lm_df.SunkCost)

etasq$Model <- factor(etasq$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.FC(reg)+AE",  
                                   "Pan+Pred.FC(net)+AE", 
                                   "Pan+Pred.FC(rf)+AE",
                                   "Pan+Pred.FC(gbm)+AE"),
                        ordered = TRUE)

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$Panel)  # Extract the significance symbols
  df$Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$Panel))
  return(df)
}

etasq.pan<-split_value_and_sig(etasq)

ggplot(etasq.pan, aes(x = Model, y = Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$`Condition:Panel`)  # Extract the significance symbols
  df$Condition_Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$`Condition:Panel`))
  return(df)
}

etasq.pan_cond<-split_value_and_sig(etasq)

ggplot(etasq.pan_cond, aes(x = Model, y = Condition_Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel*cond") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

F/C*Cond Model Coefficients

DVs for Default & Framing are 1/0
DVs for LessMore & SunkCost are on the original scale

AlllCoef = as.data.frame(rbind(
cbind(coef(Default.PAN_FACE),confint(Default.PAN_FACE)),
cbind(coef(Default.PAN_Pred_FC.reg_AE),confint(Default.PAN_Pred_FC.reg_AE)),
cbind(coef(Default.PAN_Pred_FC.net_AE),confint(Default.PAN_Pred_FC.net_AE)),
cbind(coef(Default.PAN_Pred_FC.rf_AE),confint(Default.PAN_Pred_FC.rf_AE)),
cbind(coef(Default.PAN_Pred_FC.gbm_AE),confint(Default.PAN_Pred_FC.gbm_AE)),
cbind(coef(Default.PAN_FACE.Study5),confint(Default.PAN_FACE.Study5)),

cbind(coef(Disease.PAN_FACE),confint(Disease.PAN_FACE)),
cbind(coef(Disease.PAN_Pred_FC.reg_AE),confint(Disease.PAN_Pred_FC.reg_AE)),
cbind(coef(Disease.PAN_Pred_FC.net_AE),confint(Disease.PAN_Pred_FC.net_AE)),
cbind(coef(Disease.PAN_Pred_FC.rf_AE),confint(Disease.PAN_Pred_FC.rf_AE)),
cbind(coef(Disease.PAN_Pred_FC.gbm_AE),confint(Disease.PAN_Pred_FC.gbm_AE)),
cbind(coef(Disease.PAN_FACE.Study5),confint(Disease.PAN_FACE.Study5)),

cbind(coef(LessMore.PAN_FACE),confint(LessMore.PAN_FACE)),
cbind(coef(LessMore.PAN_Pred_FC.reg_AE),confint(LessMore.PAN_Pred_FC.reg_AE)),
cbind(coef(LessMore.PAN_Pred_FC.net_AE),confint(LessMore.PAN_Pred_FC.net_AE)),
cbind(coef(LessMore.PAN_Pred_FC.rf_AE),confint(LessMore.PAN_Pred_FC.rf_AE)),
cbind(coef(LessMore.PAN_Pred_FC.gbm_AE),confint(LessMore.PAN_Pred_FC.gbm_AE)),
cbind(coef(LessMore.PAN_FACE.Study5),confint(LessMore.PAN_FACE.Study5)),

cbind(coef(Sunk.PAN_FACE),confint(Sunk.PAN_FACE)),
cbind(coef(Sunk.PAN_Pred_FC.reg_AE),confint(Sunk.PAN_Pred_FC.reg_AE)),
cbind(coef(Sunk.PAN_Pred_FC.net_AE),confint(Sunk.PAN_Pred_FC.net_AE)),
cbind(coef(Sunk.PAN_Pred_FC.rf_AE),confint(Sunk.PAN_Pred_FC.rf_AE)),
cbind(coef(Sunk.PAN_Pred_FC.gbm_AE),confint(Sunk.PAN_Pred_FC.gbm_AE)),
cbind(coef(Sunk.PAN_FACE.Study5),confint(Sunk.PAN_FACE.Study5))))

AlllCoef <- AlllCoef[!grepl("Panel", rownames(AlllCoef)), ]

AlllCoef$type = factor(rep(c("Intercept", " Condition(Treat)","F", "A", "C", "E","F: Condition(Treat)", "A: Condition(Treat)", "C: Condition(Treat)", "E: Condition(Treat)"), 24),levels = c("Intercept", " Condition(Treat)","F", "A", "C", "E","F: Condition(Treat)", "A: Condition(Treat)", "C: Condition(Treat)", "E: Condition(Treat)"))

AlllCoef$Paradigm = factor((rep(c("Default","Framing","Less is Better","Sunk Cost"), each=60)),levels = rev(c("Default","Framing","Less is Better","Sunk Cost")))

AlllCoef$Model = factor(rep(rep(c("FACE","PC(reg) + ACE","PC(net) + ACE","PC(rf) + ACE","PC(gbm) + ACE","FACE (Study 5)"), each = 10),4), levels = rev(c("FACE","PC(reg) + ACE","PC(net) + ACE","PC(rf) + ACE","PC(gbm) + ACE","FACE (Study 5)")))

colnames(AlllCoef)[1:3] = c("Beta","low", "high")

allcoefplot = ggplot(AlllCoef[(AlllCoef$type %in% c("F: Condition(Treat)","C: Condition(Treat)")),], aes( y = Beta, ymin= low, ymax= high, x = Paradigm, fill = Model, color = Model))+
  geom_bar(stat = "identity", position = "dodge",alpha= .25)+
  geom_pointrange(position = position_dodge(width = .8))+
  coord_flip()+
  xlab("")+
  theme_bw()+
  theme(strip.background = element_blank(), text = element_text(size = 12)) +
  facet_grid(~type)

allcoefplot

Parsimonious PF5 (GBM)

17 November, 2024

0. Objective

1. Predict F

1.0 Evaluate Accuracy with Study 5 Data

Correlation between F Score and F Compiste Measured, Compared with C

1.0.1 IVs

1.0.2 Model Fit

1.0.3 Model Interpretation I

1.0.4 Model Interpretation II (SHAP Value for tree-based models)

1.0.5 Model Accuracy on Held-out Test Data

1.0.6 Density and Residual Plots for Predicting F in Held-Out Test Data

Histogram of F in Held-Out Test Data

1.0.7 Correlation Btw ML Predicted F and other FACE factors

Linear Regression

Elastic Net

Random Forest

Gradient Boosting

GBM: Correlation between PF and F measures and ATTN1

1.0.8 Explain Heterogenity with Predicted F in Test Sample (20% study 5 Data)

Default Paradigm

Model Results

Anova and Eta Square

Framing (Unusual Disease) Paradigm

Model Results

Anova and Eta Square

Less is More Paradigm

Model Results

Anova and Eta Square

Sunk Cost Paradigm

Full Model Results

Anova and Eta Square

Summary

Model Fit

F*Cond Model Coefficients

Etasq

1.0.9 Explain Heterogenity with Predicted F in Full MP5 data

1.0.9.1 Accuracy in full MP5 data

Default Paradigm

Model Results

Anova and Eta Square

Framing (Unusual Disease) Paradigm

Model Results

Anova and Eta Square

Less is More Paradigm

Model Results

Anova and Eta Square

Sunk Cost Paradigm

Full Model Results

Anova and Eta Square

Summary

Model Fit

F*Cond Model Coefficients

Etasq

1.1 Evaluate Portability with Study 1 Data

1.1.1 Study 1 US Data

1.1.1.0 Model Fit

1.1.1.4 Correlation Btw ML Predicted F and other FACE factors

Random Forest

Gradient Boosting

GBM: Correlation between PF5 and individual F measures in Study 1 and ATTN1

1.1.1.5 Explain Heterogenity with Predicted F

1.1.1.5.1 Default Paradigm

Model Results

Anova and Eta Square

1.1.1.5.2 Framing (Unusual Disease) Paradigm

Model Results

Anova and Eta Square

1.1.1.5.3 Less is More Paradigm

Model Results

Anova and Eta Square

1.1.1.5.4 Sunk Cost Paradigm

Full Model Results

Anova and Eta Square

1.1.1.5.5 Summary

Model Fit

F*Cond Model Coefficients

Etasq

2. Predict C

2.0 Evaluation Accuracy with MP5 data