0. Objective

Find parsimonious gradient boosting model for PF5.

1. Predict F

1.0 Evaluate Accuracy with Study 5 Data

  • We use the cleaned data (n=1460) for analysis in this notebook.
  • In this notebook, F ~= \(\text{ATTN1} + \text{CRT} + \text{Matrices} + \text{3D}\), and C~= \(\text{age} + \text{synonym} + \text{antonym}\).

Correlation between F Score and F Compiste Measured, Compared with C

ggpairs(MP5_data, c("F","ATTN1","crt2_score", "rotsum", "MXsum","Antonym","Synonym","C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

1.0.1 IVs

  • Complete list of IVs used can be found in this google spreadsheet.
  • Refer to ML_FACE_MP5.html notebook for more details about IVs, missing data, and imputation.
# Revert scaled variables back to their original scale
MP5_data<-MP5_data%>%
  mutate(crt2_score=crt2_score.OrgScale,
         rotsum=rotsum.OrgScale,
         MXsum=MXsum.OrgScale,
         AGE=AGE.OrgScale,
         numEDU=numEDU.OrgScale,
         numINCOME=numINCOME.OrgScale,
         Antonym=Antonym.OrgScale,
         Synonym=Synonym.OrgScale,
         DOSE_SunkCost=DOSE_SunkCost.OrgScale,
         DOSE_LessMore=DOSE_LessMore.OrgScale,
         DOSE_UnusualDis=DOSE_UnusualDis.OrgScale,
         DOSE_Default=DOSE_Default.OrgScale)



FAM = grep("FAM", names(MP5_data), value = TRUE)
DOSE_SOURCE = grep("DOSE|SOURCE", names(MP5_data), value = TRUE)
TIME.lg = grep("lg", names(MP5_data), value = TRUE)
F_C_TIPI_data = grep("crt|rot|MX|Matrix|3D|Antonym|Synonym|ANT|SYN|TIPI|Extra|Agree|Consc|EmoS|Openn", names(MP5_data), value = TRUE)
IV_DV_data <- grep("TIME_SUNK_|TIME_DISEASE_|TIME_DEFAULT_|TIME_LESSMORE_", names(MP5_data), value = TRUE)

MP5_data<-MP5_data%>% # ensure that factors are entered as factors,
  mutate(
        # ZIP_LW_IV2.recoded = as.factor(ZIP_LW_IV2.recoded),
        day_of_week = as.factor(day_of_week),
         numGENDER = case_when(numGENDER==1 ~ "Female",
                               TRUE ~ "Male"),
         numGENDER = as.factor(numGENDER),
         META_Browser = as.factor(META_Browser),
         META_Browser = relevel(META_Browser, ref = "Chrome"),
         ATTN1 = as.factor(case_when(ATTN1==1~"_Correct",
                                     TRUE ~ "_InCorrect")),
         ATTN2 = as.factor(case_when(ATTN2==1 ~"_Correct",
                                     TRUE ~ "_InCorrect")), # variable hygiene
         CONFUSE = as.factor(CONFUSE),
         TEDIOUS = as.factor(TEDIOUS),
         mobile = as.numeric(mobile),
         mobile=as.factor(case_when(
          is.na(mobile) == T ~ "_No",
          mobile == 0 ~ "_No",
          TRUE ~ "_YES")))%>%
  rename(GENDER = numGENDER) %>%
  separate(META_Resolution, into = c("Width", "Height"), sep = "x") %>%
  mutate(
    Width = as.numeric(Width),
    Height = as.numeric(Height),
    Resolution_Product = Width * Height
  )

columns_to_remove <- c(FAM, TIME.lg,F_C_TIPI_data,IV_DV_data,DOSE_SOURCE)

MP5_data<-MP5_data%>%
  select(-all_of(columns_to_remove))#remove all the familiarity and time.lg data

### Variable Selection for M1 ###

TIME = grep("\\.Submit", names(MP5_data), value = TRUE)
First_click = grep("\\_First.Click", names(MP5_data), value = TRUE)
Last_click = grep("\\_Last.Click", names(MP5_data), value = TRUE)
Total_click = grep("\\_Click.Count", names(MP5_data), value = TRUE)


### Calculate the differences between the first and the lack click times ###

for (i in 1:length(First_click)) {
  first_col <- First_click[i]
  last_col <- Last_click[i]
  diff_col <- paste0(sub("\\_First.Click", "", first_col), "_Click.Time.Diff")

  MP5_data <- MP5_data %>%
    mutate(!!diff_col := !!sym(last_col) - !!sym(first_col))
}

Click_time_diff = grep("\\_Click.Time.Diff", names(MP5_data), value = TRUE)

### DONE ###
na_columns <- sapply(MP5_data, function(x) any(is.na(x)))

# Extract the names of columns that contain NA values
# names(na_columns[na_columns])

# as expected, append next
#  [1] "TIME_DEMCHAR_Page.Submit"     "TIME_DEMCHAR_First.Click"
#  [3] "TIME_DEMCHAR_Last.Click"      "TIME_DEMCHAR_Click.Count"
#  [5] "Q_RelevantIDFraudScore"       "LocationLatitude"
#  [7] "LocationLongitude"            "DEMCHAR_FleschKincaid"
#  [9] "CONFUSEOPEN_FleschKincaid"    "TEDIOUSOPEN_FleschKincaid"
# [11] "TIME_DEMCHAR_Click.Time.Diff"

# MP5_data%>%
#   filter(LocationLongitude ==0 | LocationLatitude ==0) # none
### Additional wrangling to append missing values ###

MP5_data<-MP5_data %>%
  # filter(!is.na(day_tempF) & !is.na(hist_tempF) & !is.na(LW_tempdiff)) %>% # unclear why these are NA values but removed for accurate prediction
  # as discussed in 07/19 meeting, we retain these observations and impute them by 0.
  group_by(Panel) %>%
  mutate(
    Avg_RecaptchaScore = mean(Q_RecaptchaScore, na.rm = TRUE),
    Avg_RelevantIDFraudScore = mean(Q_RelevantIDFraudScore, na.rm = TRUE)
  ) %>%
  ungroup() %>%
  mutate(
    Q_RecaptchaScore = ifelse(is.na(Q_RecaptchaScore), Avg_RecaptchaScore, Q_RecaptchaScore),
    Q_RelevantIDFraudScore = ifelse(is.na(Q_RelevantIDFraudScore), Avg_RelevantIDFraudScore, Q_RelevantIDFraudScore) # append missing Q_RecaptchaScore and Q_RelevantIDFraudScore with Panel average
  ) %>%
  select(-Avg_RecaptchaScore, -Avg_RelevantIDFraudScore)

# NA Check

### DONE ###
### Select Variables of Interest ###
M5_variable = c("Duration..in.seconds.","Q_RecaptchaScore","Q_RelevantIDFraudScore","CONFUSE","TEDIOUS","day_of_week","clock_time","ATTN1","ATTN2","numINCOME","numEDU","GENDER","AGE","mobile","Resolution_Product","LocationLatitude","LocationLongitude","META_Browser","Panel")

FACE=c("F","A","C","E")
IV_DV = c("numSunkCost","numLessMore","numDefault","numDisease","SunkCondition","LessMoreCondition","DiseaseCondition","DefaultCondition")

Text_analysis = grep("(_WordCount|_SentenceCount|_CharacterCount|_TypoCount|_avg.WordRarity|_AvgWordLength|PURPOSEOPEN_keyword_count)", names(MP5_data), value = TRUE)

# is the IV for LW correct?
Data_M5 <- MP5_data %>%
  select(all_of(c(FACE, IV_DV, TIME, First_click, Last_click, Total_click, M5_variable, Click_time_diff,Text_analysis)))
# it is okay...., Text_analysis var are not selected anyways.

na_columns <- sapply(Data_M5, function(x) any(is.na(x)))

# Extract the names of columns that contain NA values
# names(na_columns[na_columns])

# all as expected and documented
# [1] "TIME_DEMCHAR_Page.Submit"     "TIME_DEMCHAR_First.Click"
# [3] "TIME_DEMCHAR_Last.Click"      "TIME_DEMCHAR_Click.Count"
# [5] "TIME_DEMCHAR_Click.Time.Diff" "DEMCHAR_FleschKincaid"
# [7] "CONFUSEOPEN_FleschKincaid"    "TEDIOUSOPEN_FleschKincaid"
Data_M5<-Data_M5%>%
  mutate(across(everything(), ~ replace_na(., 0)))

Data_M5a<-Data_M5%>%
  rowwise() %>%
  mutate(
    avg_first_click = mean(c_across(all_of(grep("First.Click", names(Data_M5), value = TRUE))), na.rm = TRUE),
    avg_last_click = mean(c_across(all_of(grep("Last.Click", names(Data_M5), value = TRUE))), na.rm = TRUE),
    avg_click_count = mean(c_across(all_of(grep("Click.Count", names(Data_M5), value = TRUE))), na.rm = TRUE),
    avg_click_time_diff = mean(c_across(all_of(grep("Click.Time.Diff", names(Data_M5), value = TRUE))), na.rm = TRUE),
    avg_page_submit = mean(c_across(all_of(grep(".Submit", names(Data_M5), value = TRUE))), na.rm = TRUE)
  ) %>%
  ungroup()

### DONE ###

# Data_M5[duplicated(Data_M5), ] #; none, good
### Perform Log Transformation and scaling

# Identify numeric columns and their indices
numeric_indices <- which(sapply(Data_M5a, is.numeric))
log_columns <- names(Data_M5a)[numeric_indices[numeric_indices >= 13]]
scale_columns <- names(Data_M5a)[numeric_indices[numeric_indices >= 13]]

# perform log transformation among only click time, click count, duration data, and resolution product (also right skewed) - need to exclude the scaled items, such as text analysis
log.exclude_columns <- c("LocationLatitude", "LocationLongitude","Q_RecaptchaScore")
log_columns <- setdiff(log_columns, c(log.exclude_columns, Text_analysis))

Data_M5a.log <- Data_M5a %>%
  mutate(
    across(.cols = all_of(log_columns), .fns = ~ log(. + 1)))


Data_M5a.log_z<-Data_M5a.log%>%
  mutate(# Apply log to selected columns, excluding some
    across(.cols = all_of(scale_columns), .fns = scale)  # Apply scale to all numeric columns starting from the 18th
  )
###

### One-hot coding ###
# factor_columns <- sapply(Data_M5a, is.factor)
# factors_in_Data_M5a <- Data_M5a[, factor_columns]
# names(factors_in_Data_M5a)

# install.packages("fastDummies")
library(fastDummies)


process_dataset <- function(data) {
  data <- data %>%
    mutate(
      CONFUSE = relevel(CONFUSE, ref = "No"),
      TEDIOUS = relevel(TEDIOUS, ref = "No"),
      day_of_week = relevel(day_of_week, ref = "Tuesday"),
      ATTN1 = relevel(ATTN1, ref = "_InCorrect"),
      ATTN2 = relevel(ATTN2, ref = "_InCorrect"),
      mobile = relevel(mobile, ref = "_No"),
      GENDER = relevel(GENDER, ref = "Male")
    )
    
  day_mapping <- c("Sunday" = 0, "Monday" = 1, "Tuesday" = 2, "Wednesday" = 3,
                   "Thursday" = 4, "Friday" = 5, "Saturday" = 6)
  data$day_of_week_num.from_Sunday <- day_mapping[data$day_of_week]
  data <- data %>%
    select(-day_of_week)%>%
    select(Panel, everything())

  data_part1 <- as.data.frame(data[, 1:14])
  data_part2 <- as.data.frame(data[, 15:ncol(data)])

  data_part2_dummy_coded <- dummy_cols(data_part2,
                                       remove_first_dummy = TRUE,
                                       remove_selected_columns = TRUE)

  data_dummy_coded <- cbind(data_part1, data_part2_dummy_coded)
  colnames(data_dummy_coded) <- gsub(" ", "_", colnames(data_dummy_coded))

  return(data_dummy_coded)
}

Data_M5a.log_z_dummy_coded <- process_dataset(Data_M5a.log_z)%>%
  mutate(TIME_INTRO_Page.Submit=as.numeric(TIME_INTRO_Page.Submit)) # need to look into this further....
Data_M5a.log_dummy_coded <- process_dataset(Data_M5a.log)

na_columns <- sapply(Data_M5a.log_z_dummy_coded, function(x) any(is.na(x)))

# # Extract the names of columns that contain NA values
# names(na_columns[na_columns]) # NONE
write.csv(Data_M5a.log_dummy_coded,"Data_M5a.log_dummy_coded_20240921.csv")

# class(Data_M5a.log_dummy_coded$TIME_INTRO_Page.Submit)
# class(Data_M5a.log_z_dummy_coded$TIME_INTRO_Page.Submit)
F_measures<-read.csv("./Data Prep for ML/MP5_Full_data_ML_20240908.csv")%>%select(crt2_score,rotsum,MXsum)
Data_M5a.log_z_dummy_coded$crt2_score<-F_measures$crt2_score
Data_M5a.log_z_dummy_coded$rotsum<-F_measures$rotsum
Data_M5a.log_z_dummy_coded$MXsum<-F_measures$MXsum
# Set seed for reproducibility
set.seed(123)

# Split the data into training and test sets
train_M5a <- createDataPartition(Data_M5a.log_z_dummy_coded$F, p = 0.8, list = FALSE)

variables_to_remove.F <- c("A","C","E","numSunkCost","numLessMore","numDefault","numDisease","SunkCondition","LessMoreCondition","DiseaseCondition","DefaultCondition","Panel",Text_analysis,"crt2_score","rotsum","MXsum","Antonym","Synonym")
# We need to remove all the above variables other than F for training and testing; but we'd like to see their relationship with the predicted F

# Create training and testing sets
data.trn_M5a <- as.data.frame(Data_M5a.log_z_dummy_coded[train_M5a, ])
data.tst_M5a <- as.data.frame(Data_M5a.log_z_dummy_coded[-train_M5a, ]) 

# data.tst_M5a[duplicated(rbind(data.trn_M5a, data.tst_M5a))[-(1:nrow(data.trn_M5a))], ] # none, good, no duplicates, so we won't need to worry aboyt data leakage

data.trn_M5a.F <- as.data.frame(data.trn_M5a[, !colnames(data.trn_M5a) %in% variables_to_remove.F])
actual_M5a.F <- data.tst_M5a$F

ctrl <- trainControl(method = "cv", number = 10, summaryFunction = defaultSummary)
tuneGrid <- expand.grid(
  mtry = c(64),
  min.node.size = c(5),
  splitrule = c("variance")
)

# Model_M5a_rf <- train(F ~ ., data = data.trn_M5a.F, method = "ranger",
#                   tuneGrid = tuneGrid, importance = 'impurity', num.trees = 750,
#                   trControl = ctrl)
# # # Save the Random Forest model
# save(Model_M5a_rf, file = "./Saved ML Model and Data/Model_M5a_rf.RData")
load("./Saved ML Model and Data/Model_M5a_rf.RData")

pred_M5a_rf<- predict(Model_M5a_rf,data.tst_M5a)
test_perf.M5a_rf <- postResample(pred_M5a_rf, actual_M5a.F)
data.tst_M5a.rf <- cbind(data.tst_M5a, Predicted_F = pred_M5a_rf)
pred_full_M5a_rf <- predict(Model_M5a_rf, Data_M5a.log_z_dummy_coded)
data.full_M5a.rf<- cbind(Data_M5a.log_z_dummy_coded, Predicted_F = pred_full_M5a_rf)



### DONE with RF



Model_M5a_reg <- train(F ~ ., data = data.trn_M5a.F, method = "lm")
pred_M5a_reg<- predict(Model_M5a_reg,data.tst_M5a)
test_perf.M5a_reg <- postResample(pred_M5a_reg, actual_M5a.F)
data.tst_M5a.reg <- cbind(data.tst_M5a, Predicted_F = pred_M5a_reg)
pred_full_M5a_reg <- predict(Model_M5a_reg, Data_M5a.log_z_dummy_coded)
data.full_M5a.reg<- cbind(Data_M5a.log_z_dummy_coded, Predicted_F = pred_full_M5a_reg)


### DONE training reg Model ###

### Train Elastic Net ###

Model_M5a_net <- train(F ~ ., data = data.trn_M5a.F, method = "glmnet", trControl = ctrl, tuneLength = 40) # 10 fold cv to select the best model, set tune length to 40
pred_M5a_net<- predict(Model_M5a_net,data.tst_M5a)
test_perf.M5a_net <- postResample(pred_M5a_net, actual_M5a.F)
data.tst_M5a.net <- cbind(data.tst_M5a, Predicted_F = pred_M5a_net)
pred_full_M5a_net <- predict(Model_M5a_net, Data_M5a.log_z_dummy_coded)
data.full_M5a.net<- cbind(Data_M5a.log_z_dummy_coded, Predicted_F = pred_full_M5a_net)


tuneGrid.4 <- expand.grid(
  n.trees = c(800),   # Number of trees (boosting iterations)
  interaction.depth = c(4), # Tree depth
  shrinkage = c(0.01), #   shrinkage = c(0.01,0,03,0.05,0.08,0.1); 0,01 best
  n.minobsinnode = c(50)     # Minimum number of observations in terminal nodes
)


# Model_M5a_gbm_4 <- train(F ~ ., data = data.trn_M5a.F, method = "gbm",
#                     tuneGrid = tuneGrid.4, trControl = ctrl,
#                     verbose = FALSE)
# save(Model_M5a_gbm_4, file = "./Saved ML Model and Data/Model_M5a_gbm.RData")
load("./Saved ML Model and Data/Model_M5a_gbm_4_50.RData")
Model_M5a_gbm<-Model_M5a_gbm_4
pred_M5a_gbm <- predict(Model_M5a_gbm, data.tst_M5a)
test_perf.M5a_gbm <- postResample(pred_M5a_gbm, actual_M5a.F)
data.tst_M5a.gbm <- cbind(data.tst_M5a, Predicted_F = pred_M5a_gbm)
pred_full_M5a_gbm <- predict(Model_M5a_gbm, Data_M5a.log_z_dummy_coded)
data.full_M5a.gbm <- cbind(Data_M5a.log_z_dummy_coded, Predicted_F = pred_full_M5a_gbm)
actual_M5a.F_full<-Data_M5a.log_z_dummy_coded$F
test_perf.full_M5a_gbm<- postResample(pred_full_M5a_gbm, actual_M5a.F_full)
test_perf.full_M5a_rf<- postResample(pred_full_M5a_rf, actual_M5a.F_full)
test_perf.full_M5a_reg<- postResample(pred_full_M5a_reg, actual_M5a.F_full)
test_perf.full_M5a_net<- postResample(pred_full_M5a_net, actual_M5a.F_full)
# test_perf.M5a_gbm
# test_perf.full_M5a_rf
# test_perf.full_M5a_reg
# test_perf.full_M5a_net

1.0.2 Model Fit

  • The following stats come from cross-validation
# for Elastic Net, this section needs a bit more work but the results are only off by slightly. Needs to come back to this

train_perf_M5a_rf <- Model_M5a_rf$results[which.min(Model_M5a_rf$results$RMSE), c("RMSE", "Rsquared", "MAE")]
train_perf_M5a_reg <- Model_M5a_reg$results[which.min(Model_M5a_reg$results$RMSE), c("RMSE", "Rsquared", "MAE")]
train_perf_M5a_net <- Model_M5a_net$results[which.min(Model_M5a_net$results$RMSE), c("RMSE", "Rsquared", "MAE")]
train_perf_M5a_gbm <- Model_M5a_gbm$results[which.min(Model_M5a_gbm$results$RMSE), c("RMSE", "Rsquared", "MAE")]

# Combine training performance metrics into a data frame
performance_metrics_trn <- data.frame(
  Model = factor(c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting"),
                 levels = c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting")),
  MAE = c(as.numeric(train_perf_M5a_reg["MAE"]), as.numeric(train_perf_M5a_net["MAE"]), as.numeric(train_perf_M5a_rf["MAE"]), as.numeric(train_perf_M5a_gbm["MAE"])),
  RMSE = c(as.numeric(train_perf_M5a_reg["RMSE"]), as.numeric(train_perf_M5a_net["RMSE"]), as.numeric(train_perf_M5a_rf["RMSE"]), as.numeric(train_perf_M5a_gbm["RMSE"])),
  Rsquared = c(as.numeric(train_perf_M5a_reg["Rsquared"]), as.numeric(train_perf_M5a_net["Rsquared"]), as.numeric(train_perf_M5a_rf["Rsquared"]), as.numeric(train_perf_M5a_gbm["Rsquared"])))


performance_table_trn <- performance_metrics_trn %>%
  mutate(across(where(is.numeric), round, 4))

# Display the table
kable(performance_table_trn, caption = "Model Fit Statistics") %>%
  kable_styling(full_width = F, position = "center")
Model Fit Statistics
Model MAE RMSE Rsquared
Linear Regression 0.5825 0.7245 0.5824
Elastic Net 0.5576 0.6913 0.6154
Random Forest 0.5250 0.6535 0.6602
Gradient Boosting 0.5141 0.6374 0.6750
###Plot ###

performance_metrics_long_trn <- performance_metrics_trn %>%
  pivot_longer(cols = c(RMSE, Rsquared, MAE), names_to = "Metric", values_to = "Value")

trn_M5a.sd_F <- sd(data.trn_M5a$F)

ggplot(performance_metrics_long_trn, aes(x = Metric, y = Value, fill = Model)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_segment(data = performance_metrics_long_trn %>% filter(Metric == "RMSE"),
               aes(x = 1.6, xend = 2.4, y = trn_M5a.sd_F, yend = trn_M5a.sd_F),
               linetype = "dashed", color = "#FF9999") +
  geom_text(data = performance_metrics_long_trn %>% filter(Metric == "RMSE"),
            aes(x = 2, y = trn_M5a.sd_F, label = paste0("Std.dev of F in Training Data: ", round(trn_M5a.sd_F, 2))),
            vjust = -0.5, color = "#FF9999") +
  geom_text(aes(label = round(Value, 2)), position = position_dodge(width = 0.9), vjust = -0.5) +
  scale_fill_manual(values = c("gray80", "gray50", "gray30", "gray20")) +  # Less colorful palette
  labs(title = "Model Fit Statistics",
       x = "Metric",
       y = "Value") +
  theme_minimal() +
  theme(legend.position = "bottom")

1.0.3 Model Interpretation I

  • Refer to Section 1.1.1.1 for variable importance when trained with full MP5 data.

1.0.4 Model Interpretation II (SHAP Value for tree-based models)

  • Refer to Section 1.1.1.2 for variable importance when trained with full MP5 data.

1.0.5 Model Accuracy on Held-out Test Data

  • Correlation between predicted F using different methods
predicted_f_reg <- data.tst_M5a.reg$Predicted_F
predicted_f_rf <- data.tst_M5a.rf$Predicted_F
predicted_f_gbm <- data.tst_M5a.gbm$Predicted_F 
predicted_f_net <- data.tst_M5a.net$Predicted_F
actual_F<-data.tst_M5a.net$F

# Combine into a data frame
predicted_f <- data.frame(
  Pred_F.Reg = predicted_f_reg,
  Pred_F.RandomForest = predicted_f_rf,
  Pred_F.GradientBoosting = predicted_f_gbm,
  Pred_F.ElasticNet = predicted_f_net,
  F = actual_F
)

ggpairs(predicted_f, c("Pred_F.Reg","Pred_F.ElasticNet","Pred_F.RandomForest","Pred_F.GradientBoosting","F"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

  • The following metrics pertains to how the model did in predicting held-out data (un-used during model training). Signallings how generalizable the models are.
performance_metrics <- data.frame(
Method = factor(c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting"),
                 levels = c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting")),
  MAE = c(test_perf.M5a_reg["MAE"], test_perf.M5a_net["MAE"], test_perf.M5a_rf["MAE"], test_perf.M5a_gbm["MAE"]),
  RMSE = c(test_perf.M5a_reg["RMSE"], test_perf.M5a_net["RMSE"], test_perf.M5a_rf["RMSE"], test_perf.M5a_gbm["RMSE"]),
  Rsquared = c(test_perf.M5a_reg["Rsquared"], test_perf.M5a_net["Rsquared"], test_perf.M5a_rf["Rsquared"], test_perf.M5a_gbm["Rsquared"])
)



performance_metrics_long <- performance_metrics %>%
  pivot_longer(cols = c(RMSE, Rsquared, MAE), names_to = "Metric", values_to = "Value")


# Create the performance metrics table
performance_table <- performance_metrics %>%
  mutate(across(where(is.numeric), round, 4))

# Display the table
kable(performance_table, caption = "Comparison of Model Performance in Held-Out Test Data") %>%
  kable_styling(full_width = F, position = "center")
Comparison of Model Performance in Held-Out Test Data
Method MAE RMSE Rsquared
Linear Regression 0.5910 0.7424 0.5438
Elastic Net 0.5913 0.7324 0.5501
Random Forest 0.5525 0.6922 0.5994
Gradient Boosting 0.5051 0.6262 0.6760
tst_M5a.sd_F <- sd(data.tst_M5a$F)

ggplot(performance_metrics_long, aes(x = Metric, y = Value, fill = Method)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_segment(data = performance_metrics_long %>% filter(Metric == "RMSE"),
               aes(x = 1.6, xend = 2.4, y = tst_M5a.sd_F, yend = tst_M5a.sd_F),
               linetype = "dashed", color = "#FF9999") +
  geom_text(data = performance_metrics_long %>% filter(Metric == "RMSE"),
            aes(x = 2, y = tst_M5a.sd_F, label = paste0("Std.dev of F in Held-Out Test Data: ", round(tst_M5a.sd_F, 2))),
            vjust = -0.5, color = "#FF9999") +
  geom_text(aes(label = round(Value, 2)), position = position_dodge(width = 0.9), vjust = -0.5) +
  scale_fill_manual(values = c("gray80", "gray50", "gray30", "gray20")) +  # Less colorful palette
  labs(title = "Comparison of Model Performance in Held-Out Testing Data",
       x = "Metric",
       y = "Value") +
  theme_minimal() +
  theme(legend.position = "bottom")

1.0.6 Density and Residual Plots for Predicting F in Held-Out Test Data

  • Is there heterogeniety in prediction accuracy, as a function of 1] the actual F score or 2] free data?
    • 1]: Visual inspection of density and residual plots below to suggest that lower values of F tend to be overestimated and higher values of F tend to be underestimated
    • 2] we can build another model to predict the residuals using the same explanatory variables that were used for predicting F.

Histogram of F in Held-Out Test Data

hist(data.tst_M5a$F)

library(gridExtra)
pred_M5a_rf <- predict(Model_M5a_rf, data.tst_M5a)
pred_M5a_reg <- predict(Model_M5a_reg, data.tst_M5a)
pred_M5a_net <- predict(Model_M5a_net, data.tst_M5a)
pred_M5a_gbm <- predict(Model_M5a_gbm, data.tst_M5a)

# Calculate residuals
residuals_rf <- actual_M5a.F - pred_M5a_rf
residuals_reg <- actual_M5a.F - pred_M5a_reg
residuals_net <- actual_M5a.F - pred_M5a_net
residuals_gbm <- actual_M5a.F - pred_M5a_gbm

# Create a data frame with residuals and actual values
residuals_data <- data.frame(
  Actual = actual_M5a.F,
  Pred_RF = pred_M5a_rf,
  Pred_Reg = pred_M5a_reg,
  Pred_NET = pred_M5a_net,
  Pred_GBM = pred_M5a_gbm,
  Residual_RF = residuals_rf,
  Residual_Reg = residuals_reg,
  Residual_NET = residuals_net,
  Residual_GBM = residuals_gbm
)

create_density_plot <- function(predicted, actual, title) {
  ggplot() +
    geom_density(aes(x = predicted, color = "Predicted F"), alpha = 0.4, size = 1) +
    geom_density(aes(x = actual, color = "Actual F"), alpha = 0.4, size = 1) +
    scale_y_continuous(limits = c(0, 1)) +
    scale_color_manual(values = c("Actual F" = "firebrick", "Predicted F" = "darkorange")) +
    labs(title = title, x = "F Score", y = "Density") +
    theme_minimal() +
    theme(legend.position = "bottom", legend.title = element_blank())
}

# Create density plots for each model
density_rf <- create_density_plot(residuals_data$Pred_RF, residuals_data$Actual, "Density Plot for Random Forest")
density_reg <- create_density_plot(residuals_data$Pred_Reg, residuals_data$Actual, "Density Plot for Regression")
density_net <- create_density_plot(residuals_data$Pred_NET, residuals_data$Actual, "Density Plot for Elastic Net")
density_gbm <- create_density_plot(residuals_data$Pred_GBM, residuals_data$Actual, "Density Plot for Gradient Boosting")

# Arrange all density plots in a grid
grid.arrange(density_reg, density_net, density_rf, density_gbm, ncol = 2, nrow = 2)

create_residual_plot <- function(actual, residuals, title) {
  ggplot(residuals_data, aes(x = actual, y = residuals)) +
    geom_point(alpha = 0.4) +
    geom_rug(sides = "b", alpha = 0.2) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
    labs(title = title, x = "Actual F Score", y = "Residuals") +
    theme_minimal() +
    ylim(-2, 2)
}

# Create residual plots for each model
residual_rf <- create_residual_plot(residuals_data$Actual, residuals_data$Residual_RF, "Residuals Plot for Random Forest")
residual_reg <- create_residual_plot(residuals_data$Actual, residuals_data$Residual_Reg, "Residuals Plot for Regression")
residual_net <- create_residual_plot(residuals_data$Actual, residuals_data$Residual_NET, "Residuals Plot for Elastic Net")
residual_gbm <- create_residual_plot(residuals_data$Actual, residuals_data$Residual_GBM, "Residuals Plot for Gradient Boosting")

# Arrange all residual plots in a grid
grid.arrange( residual_reg, residual_net,residual_rf, residual_gbm, ncol = 2, nrow = 2)

1.0.7 Correlation Btw ML Predicted F and other FACE factors

  • We focus on the test dataset (n=292)
  • Significance Notation: *** if the p-value is < 0.001, ** if the p-value is < 0.01, * if the p-value is < 0.05, . if the p-value is < 0.10

Linear Regression

ggpairs(data.tst_M5a.reg, c("Predicted_F","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Elastic Net

ggpairs(data.tst_M5a.net, c("Predicted_F","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Random Forest

ggpairs(data.tst_M5a.rf, c("Predicted_F","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Gradient Boosting

ggpairs(data.tst_M5a.gbm, c("Predicted_F","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

GBM: Correlation between PF and F measures and ATTN1

ggpairs(data.tst_M5a.gbm, c("Predicted_F","ATTN1__Correct","crt2_score","MXsum", "rotsum","C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

1.0.8 Explain Heterogenity with Predicted F in Test Sample (20% study 5 Data)

Default Paradigm

Default.PAN <- glm(numDefault ~ DefaultCondition * Panel, data = data.tst_M5a, family = binomial)

Default.PAN_FACE <- glm(numDefault ~ DefaultCondition * F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel , data = data.tst_M5a, family = binomial)


Default.PAN_Pred_F.reg_ACE <- glm(numDefault ~ DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_M5a.reg , family = binomial)

Default.PAN_Pred_F.net_ACE <- glm(numDefault ~  DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_M5a.net, family = binomial)

Default.PAN_Pred_F.rf_ACE <- glm(numDefault ~   DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_M5a.rf, family = binomial)

Default.PAN_Pred_F.gbm_ACE <- glm(numDefault ~   DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_M5a.gbm, family = binomial)

Default_model <- list(
  "Default.PAN" = Default.PAN,
  "Default.PAN_FACE" = Default.PAN_FACE,
  "Default.PAN_Pred_F.reg_ACE" = Default.PAN_Pred_F.reg_ACE,
  "Default.PAN_Pred_F.net_ACE" = Default.PAN_Pred_F.net_ACE,
  "Default.PAN_Pred_F.rf_ACE" = Default.PAN_Pred_F.rf_ACE,
  "Default.PAN_Pred_F.gbm_ACE" = Default.PAN_Pred_F.gbm_ACE
)
Model Results
term_names <- names(coef(Default.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Default_result<-tab_model(Default.PAN, Default.PAN_FACE, Default.PAN_Pred_F.reg_ACE,Default.PAN_Pred_F.net_ACE,Default.PAN_Pred_F.rf_ACE,Default.PAN_Pred_F.gbm_ACE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.F(reg)_ACE", 
                        "PAN_Pred.F(net)_ACE", 
                        "PAN_Pred.F(rf)_ACE", 
                        "PAN_Pred.F(gbm)_ACE"), title = "Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Default_result
Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.F(reg)_ACE PAN_Pred.F(net)_ACE PAN_Pred.F(rf)_ACE PAN_Pred.F(gbm)_ACE
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
(Intercept) 0.96 0.28 -0.15 0.884 0.94 0.28 -0.21 0.837 0.81 0.26 -0.66 0.510 0.82 0.27 -0.62 0.534 0.91 0.29 -0.30 0.762 0.90 0.28 -0.34 0.737
DefaultCondition [OPTOUT] 1.41 0.58 0.83 0.409 1.44 0.61 0.86 0.391 1.71 0.80 1.16 0.248 1.74 0.83 1.15 0.248 1.57 0.74 0.96 0.335 1.49 0.68 0.87 0.382
F 0.87 0.35 -0.35 0.725
A 0.85 0.17 -0.79 0.431 0.75 0.16 -1.36 0.173 0.77 0.16 -1.25 0.210 0.82 0.17 -0.96 0.338 0.82 0.16 -1.01 0.314
C 1.69 0.58 1.53 0.127 1.40 0.33 1.41 0.158 1.42 0.34 1.46 0.145 1.52 0.37 1.69 0.091 1.48 0.38 1.55 0.121
E 0.88 0.16 -0.72 0.474 0.94 0.15 -0.40 0.687 0.93 0.15 -0.44 0.661 0.91 0.15 -0.59 0.558 0.92 0.15 -0.54 0.590
DefaultCondition [OPTOUT]
× F
1.35 0.84 0.49 0.627
DefaultCondition [OPTOUT]
× A
1.07 0.31 0.24 0.814 1.23 0.37 0.70 0.481 1.21 0.36 0.63 0.526 1.13 0.33 0.43 0.669 1.13 0.33 0.42 0.674
DefaultCondition [OPTOUT]
× C
0.68 0.34 -0.77 0.443 0.91 0.31 -0.26 0.792 0.91 0.31 -0.28 0.779 0.86 0.30 -0.43 0.668 0.85 0.31 -0.44 0.658
DefaultCondition [OPTOUT]
× E
1.68 0.50 1.74 0.082 1.48 0.38 1.54 0.123 1.48 0.38 1.54 0.123 1.52 0.39 1.64 0.101 1.54 0.40 1.67 0.094
Predicted F 1.42 0.37 1.34 0.179 1.35 0.37 1.10 0.271 1.06 0.32 0.20 0.839 1.12 0.34 0.36 0.718
DefaultCondition [OPTOUT]
× Predicted F
0.67 0.26 -1.01 0.311 0.68 0.29 -0.92 0.355 0.84 0.38 -0.39 0.700 0.90 0.42 -0.22 0.827
Observations 292 292 292 292 292 292
AIC 389.079 395.399 393.762 394.331 395.482 395.506
log-Likelihood -188.540 -183.699 -182.881 -183.165 -183.741 -183.753
Anova and Eta Square
  • As in the origianl paper, type I anova is used. We first attribute variance to condition, FACE, then Panel main effect, followed by the conditionFACE interaction, and the Panelcondition interaction last.
  • As in the original paper, Etasq of a variable in linear regression models (LessMore and Sunk) is calculated as the sum of squares explained by the variables divided by the total sum of squares. Etasq of a variable in logistic regression models (Default and Unusual Disease) is calculated as the deviance explained by the model relative to the null model (deviance from the null model divided by the model’s residual deviance).
  • I validated this pipeline by reproducing the stats reported in the SI for Study 1.
  • Woudl we like to see the full Anova stats?
calculate_deviance_explained.Default <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for Panel and DefaultCondition:Panel interaction
  dev_cond <- anova_res["DefaultCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DefaultCondition:Panel", "Deviance"]
  
  p_cond <- anova_res["DefaultCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DefaultCondition:Panel", "Pr(>Chi)"]
  
sig_cond <- ifelse(p_cond < 0.001, "***", 
             ifelse(p_cond < 0.01, "**", 
             ifelse(p_cond < 0.05, "*", 
             ifelse(p_cond < 0.1, ".", ""))))

sig_panel <- ifelse(p_panel < 0.001, "***", 
              ifelse(p_panel < 0.01, "**", 
              ifelse(p_panel < 0.05, "*", 
              ifelse(p_panel < 0.1, ".", ""))))

sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                   ifelse(p_interaction < 0.01, "**", 
                   ifelse(p_interaction < 0.05, "*", 
                   ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total) 
  perc_panel <- (dev_panel / dev_total) 
  perc_interaction <- (dev_interaction / dev_total) 
  
  res_cond <- paste0(round(perc_cond , 5),  sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}

# apply the fucntion to all the models in default paradigm
deviance_explained <- sapply(Default_model, calculate_deviance_explained.Default)

# Convert to a data frame for easy viewing
deviance_explained_df.Default <- as.data.frame(t(deviance_explained)) # view how this looks like
colnames(deviance_explained_df.Default) <- c("Condition","Panel", "Default Condition:Panel")

# display
kable(deviance_explained_df.Default,caption = "Etasq Table: Default Paradigm")
Etasq Table: Default Paradigm
Condition Panel Default Condition:Panel
Default.PAN 0.02968*** 0.0095 0.02393**
Default.PAN_FACE 0.02968*** 0.00305 0.00314
Default.PAN_Pred_F.reg_ACE 0.02968*** 0.00282 0.00619
Default.PAN_Pred_F.net_ACE 0.02968*** 0.00285 0.00605
Default.PAN_Pred_F.rf_ACE 0.02968*** 0.00321 0.00513
Default.PAN_Pred_F.gbm_ACE 0.02968*** 0.00287 0.00488

Framing (Unusual Disease) Paradigm

Disease.PAN <- glm(numDisease ~ DiseaseCondition * Panel, data = data.tst_M5a.reg, family = binomial)
Disease.PAN_FACE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E+DiseaseCondition * Panel , data = data.tst_M5a.reg, family = binomial)

Disease.PAN_Pred_F.reg_ACE <- glm(numDisease ~  DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_M5a.reg, family = binomial)

Disease.PAN_Pred_F.net_ACE <- glm(numDisease ~   DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_M5a.net, family = binomial)

Disease.PAN_Pred_F.rf_ACE <- glm(numDisease ~   DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_M5a.rf, family = binomial)

Disease.PAN_Pred_F.gbm_ACE <- glm(numDisease ~  DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E + DiseaseCondition * Panel , data = data.tst_M5a.gbm, family = binomial)


Disease_model <- list(
  "Disease.PAN" = Disease.PAN,
  "Disease.PAN_FACE" = Disease.PAN_FACE,
  "Disease.PAN_Pred_F.reg_ACE" = Disease.PAN_Pred_F.reg_ACE,
  "Disease.PAN_Pred_F.net_ACE" = Disease.PAN_Pred_F.net_ACE,
  "Disease.PAN_Pred_F.rf_ACE" = Disease.PAN_Pred_F.rf_ACE, 
  "Disease.PAN_Pred_F.gbm_ACE" = Disease.PAN_Pred_F.gbm_ACE
)


# extract_aic(Disease_model)
Model Results
term_names <- names(coef(Disease.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Disease_result<-tab_model(Disease.PAN, Disease.PAN_FACE, Disease.PAN_Pred_F.reg_ACE,Disease.PAN_Pred_F.net_ACE,Disease.PAN_Pred_F.rf_ACE,Disease.PAN_Pred_F.gbm_ACE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.F(reg)_ACE", 
                        "PAN_Pred.F(net)_ACE", 
                        "PAN_Pred.F(rf)_ACE", 
                        "PAN_Pred.F(gbm)_ACE"), title = "Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Disease_result
Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.F(reg)_ACE PAN_Pred.F(net)_ACE PAN_Pred.F(rf)_ACE PAN_Pred.F(gbm)_ACE
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
(Intercept) 0.48 0.13 -2.67 0.008 0.44 0.13 -2.80 0.005 0.38 0.12 -2.96 0.003 0.38 0.13 -2.91 0.004 0.40 0.13 -2.81 0.005 0.40 0.13 -2.86 0.004
DiseaseCondition [LOSS] 4.04 1.82 3.09 0.002 4.19 1.97 3.04 0.002 4.37 2.24 2.87 0.004 4.34 2.28 2.79 0.005 3.47 1.81 2.38 0.017 3.78 1.91 2.62 0.009
F 2.42 1.16 1.84 0.065
A 1.19 0.23 0.90 0.369 1.14 0.23 0.65 0.512 1.16 0.23 0.74 0.462 1.18 0.23 0.84 0.399 1.17 0.23 0.80 0.426
C 0.41 0.17 -2.20 0.028 0.63 0.16 -1.78 0.075 0.63 0.17 -1.74 0.082 0.64 0.17 -1.68 0.093 0.61 0.17 -1.78 0.074
E 1.07 0.21 0.33 0.745 0.94 0.16 -0.37 0.713 0.93 0.16 -0.43 0.670 0.91 0.16 -0.55 0.582 0.93 0.16 -0.41 0.680
DiseaseCondition [LOSS] ×
F
0.92 0.64 -0.12 0.902
DiseaseCondition [LOSS] ×
A
1.49 0.50 1.17 0.243 1.73 0.58 1.63 0.103 1.71 0.57 1.60 0.109 1.62 0.54 1.44 0.150 1.63 0.55 1.47 0.141
DiseaseCondition [LOSS] ×
C
1.17 0.65 0.28 0.780 1.16 0.42 0.41 0.683 1.15 0.42 0.37 0.709 0.98 0.37 -0.04 0.966 1.07 0.42 0.18 0.859
DiseaseCondition [LOSS] ×
E
0.77 0.24 -0.86 0.392 0.72 0.19 -1.23 0.219 0.73 0.19 -1.19 0.234 0.78 0.21 -0.94 0.347 0.75 0.20 -1.04 0.297
Predicted F 1.71 0.54 1.71 0.088 1.66 0.54 1.55 0.121 1.59 0.55 1.33 0.184 1.69 0.62 1.43 0.153
DiseaseCondition [LOSS] ×
Predicted F
0.65 0.28 -1.00 0.317 0.68 0.31 -0.85 0.395 1.08 0.54 0.16 0.873 0.89 0.46 -0.23 0.815
Observations 292 292 292 292 292 292
AIC 363.491 356.242 359.179 359.740 358.168 358.950
log-Likelihood -175.745 -164.121 -165.589 -165.870 -165.084 -165.475
Anova and Eta Square
calculate_deviance_explained.Disease <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for DiseaseCondition, Panel, and DiseaseCondition:Panel interaction
  dev_cond <- anova_res["DiseaseCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DiseaseCondition:Panel", "Deviance"]
  
  # Extract p-values
  p_cond <- anova_res["DiseaseCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DiseaseCondition:Panel", "Pr(>Chi)"]
  
  # Determine significance levels
  sig_cond <- ifelse(p_cond < 0.001, "***", 
               ifelse(p_cond < 0.01, "**", 
               ifelse(p_cond < 0.05, "*", 
               ifelse(p_cond < 0.1, ".", ""))))

  sig_panel <- ifelse(p_panel < 0.001, "***", 
                ifelse(p_panel < 0.01, "**", 
                ifelse(p_panel < 0.05, "*", 
                ifelse(p_panel < 0.1, ".", ""))))

  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total)
  perc_panel <- (dev_panel / dev_total)
  perc_interaction <- (dev_interaction / dev_total)
  
  # Combine percentage and significance
  res_cond <- paste0(round(perc_cond, 5), sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}


# Apply the function to each model
deviance_explained <- sapply(Disease_model, calculate_deviance_explained.Disease)

# Convert to a data frame for easy viewing
deviance_explained_df.Disease <- as.data.frame(t(deviance_explained))
colnames(deviance_explained_df.Disease) <- c("Condition","Panel", "DiseaseCondition:Panel")

# Display the results
kable(deviance_explained_df.Disease,caption = "Etasq Table: Disease Paradigm")
Etasq Table: Disease Paradigm
Condition Panel DiseaseCondition:Panel
Disease.PAN 0.03974*** 0.03523** 0.01646*
Disease.PAN_FACE 0.03974*** 0.0196* 0.00098
Disease.PAN_Pred_F.reg_ACE 0.03974*** 0.00683 0.00154
Disease.PAN_Pred_F.net_ACE 0.03974*** 0.00643 0.00141
Disease.PAN_Pred_F.rf_ACE 0.03974*** 0.0091 8e-04
Disease.PAN_Pred_F.gbm_ACE 0.03974*** 0.00913 9e-04

Less is More Paradigm

LessMore.PAN <- lm(numLessMore ~ LessMoreCondition * Panel, data = data.tst_M5a.reg)
LessMore.PAN_FACE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_M5a.reg)
LessMore.PAN_Pred_F.reg_ACE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_M5a.reg)
LessMore.PAN_Pred_F.net_ACE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.tst_M5a.net)
LessMore.PAN_Pred_F.rf_ACE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_M5a.rf)
LessMore.PAN_Pred_F.gbm_ACE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.tst_M5a.gbm)


LessMore_model <- list(
  "LessMore.PAN" = LessMore.PAN,
  "LessMore.PAN_FACE" = LessMore.PAN_FACE,
  "LessMore.PAN_Pred_F.reg_ACE" = LessMore.PAN_Pred_F.reg_ACE,
  "LessMore.PAN_Pred_F.net_ACE" = LessMore.PAN_Pred_F.net_ACE,
  "LessMore.PAN_Pred_F.rf_ACE" = LessMore.PAN_Pred_F.rf_ACE, 
  "LessMore.PAN_Pred_F.gbm_ACE" = LessMore.PAN_Pred_F.gbm_ACE
)
Model Results
term_names <- names(coef(LessMore.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

LessMore_result<-tab_model(LessMore.PAN, LessMore.PAN_FACE, LessMore.PAN_Pred_F.reg_ACE,LessMore.PAN_Pred_F.net_ACE,LessMore.PAN_Pred_F.rf_ACE,LessMore.PAN_Pred_F.gbm_ACE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.F(reg)_ACE", 
                        "PAN_Pred.F(net)_ACE", 
                        "PAN_Pred.F(rf)_ACE", 
                        "PAN_Pred.F(gbm)_ACE"), title = "LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
LessMore_result
LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.F(reg)_ACE PAN_Pred.F(net)_ACE PAN_Pred.F(rf)_ACE PAN_Pred.F(gbm)_ACE
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
(Intercept) 5.51 0.21 25.98 <0.001 5.52 0.21 26.19 <0.001 5.51 0.23 23.51 <0.001 5.52 0.24 23.08 <0.001 5.53 0.23 23.85 <0.001 5.59 0.23 24.74 <0.001
LessMoreCondition [SCARF] 0.76 0.29 2.65 0.009 0.83 0.29 2.89 0.004 0.80 0.31 2.58 0.010 0.74 0.32 2.35 0.019 0.66 0.31 2.11 0.035 0.66 0.30 2.17 0.031
F -0.03 0.25 -0.11 0.913
A -0.26 0.14 -1.88 0.062 -0.27 0.14 -1.93 0.055 -0.27 0.14 -1.91 0.057 -0.26 0.14 -1.93 0.054 -0.24 0.14 -1.71 0.088
C 0.16 0.21 0.77 0.442 0.14 0.15 0.94 0.348 0.14 0.15 0.98 0.330 0.15 0.15 0.98 0.326 0.20 0.16 1.27 0.207
E -0.02 0.12 -0.17 0.863 -0.01 0.10 -0.08 0.940 -0.01 0.10 -0.12 0.901 -0.02 0.10 -0.15 0.880 -0.04 0.10 -0.43 0.669
LessMoreCondition [SCARF]
× F
0.38 0.40 0.95 0.345
LessMoreCondition [SCARF]
× A
0.51 0.18 2.78 0.006 0.52 0.19 2.79 0.006 0.50 0.19 2.73 0.007 0.47 0.18 2.58 0.010 0.46 0.18 2.51 0.013
LessMoreCondition [SCARF]
× C
-0.24 0.32 -0.75 0.452 -0.05 0.22 -0.23 0.819 -0.08 0.22 -0.36 0.717 -0.17 0.23 -0.75 0.453 -0.19 0.24 -0.82 0.414
LessMoreCondition [SCARF]
× E
0.05 0.18 0.28 0.780 -0.04 0.15 -0.24 0.807 -0.03 0.15 -0.17 0.866 -0.02 0.15 -0.13 0.898 0.02 0.15 0.11 0.911
Predicted F 0.03 0.19 0.14 0.887 0.00 0.20 0.00 0.998 -0.01 0.21 -0.07 0.943 -0.18 0.22 -0.82 0.412
LessMoreCondition [SCARF]
× Predicted F
0.12 0.25 0.48 0.635 0.23 0.27 0.87 0.386 0.47 0.29 1.64 0.102 0.51 0.29 1.73 0.084
Observations 292 292 292 292 292 292
R2 / R2 adjusted 0.208 / 0.194 0.249 / 0.214 0.247 / 0.212 0.250 / 0.215 0.259 / 0.225 0.254 / 0.220
Anova and Eta Square
calculate_variance_explained_LessMore <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for LessMoreCondition, Panel, and LessMoreCondition:Panel interaction
  ss_condition <- anova_res["LessMoreCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["LessMoreCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["LessMoreCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["LessMoreCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- ss_condition / ss_total
  perc_panel <- ss_panel / ss_total
  perc_interaction <- ss_interaction / ss_total
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}

variance_explained_lm <- sapply(LessMore_model, calculate_variance_explained_LessMore)

# Convert to a data frame for easy viewing
variance_explained_lm_df.LessMore <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.LessMore,caption = "Etasq Table: LessMore Paradigm")
Etasq Table: LessMore Paradigm
Condition Panel LessMoreCondition:Panel
LessMore.PAN 0.08314*** 0.12202*** 0.00272
LessMore.PAN_FACE 0.08314*** 0.04611*** 0.00099
LessMore.PAN_Pred_F.reg_ACE 0.08314*** 0.05089*** 0.00111
LessMore.PAN_Pred_F.net_ACE 0.08314*** 0.05157*** 0.00089
LessMore.PAN_Pred_F.rf_ACE 0.08314*** 0.05388*** 0.00094
LessMore.PAN_Pred_F.gbm_ACE 0.08314*** 0.05087*** 0.0011

Sunk Cost Paradigm

Sunk.PAN <- lm(numSunkCost~ SunkCondition * Panel, data = data.tst_M5a)
Sunk.PAN_FACE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_M5a.reg)
Sunk.PAN_Pred_F.reg_ACE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E+SunkCondition * Panel, data = data.tst_M5a.reg)
Sunk.PAN_Pred_F.net_ACE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_M5a.net)
Sunk.PAN_Pred_F.rf_ACE <- lm(numSunkCost ~ SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E  + SunkCondition * Panel, data = data.tst_M5a.rf)
Sunk.PAN_Pred_F.gbm_ACE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_M5a.gbm)


Sunk_model <- list(
  "Sunk.PAN" = Sunk.PAN,
  "Sunk.PAN_FACE" = Sunk.PAN_FACE,
  "Sunk.PAN_Pred_F.reg_ACE" = Sunk.PAN_Pred_F.reg_ACE,
  "Sunk.PAN_Pred_F.net_ACE" = Sunk.PAN_Pred_F.net_ACE,
  "Sunk.PAN_Pred_F.rf_ACE" = Sunk.PAN_Pred_F.rf_ACE, #0.302 with unscaled
  "Sunk.PAN_Pred_F.gbm_ACE" = Sunk.PAN_Pred_F.gbm_ACE
)

# extract_adjusted_r_squared(Sunk_model)
Full Model Results
term_names <- names(coef(Sunk.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Sunk_result<-tab_model(Sunk.PAN, Sunk.PAN_FACE, Sunk.PAN_Pred_F.reg_ACE,Sunk.PAN_Pred_F.net_ACE,Sunk.PAN_Pred_F.rf_ACE,Sunk.PAN_Pred_F.gbm_ACE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred_F(reg)_ACE", 
                        "PAN_Pred_F(net)_ACE", 
                        "PAN_Pred_F(rf)_ACE", 
                        "PAN_Pred_F(gbm)_ACE"), title = "Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Sunk_result
Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred_F(reg)_ACE PAN_Pred_F(net)_ACE PAN_Pred_F(rf)_ACE PAN_Pred_F(gbm)_ACE
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
(Intercept) 5.51 0.36 15.18 <0.001 5.51 0.36 15.31 <0.001 5.40 0.41 13.25 <0.001 5.40 0.42 12.84 <0.001 5.54 0.41 13.60 <0.001 5.41 0.40 13.50 <0.001
SunkCondition [PAID] 0.14 0.54 0.26 0.792 0.06 0.53 0.12 0.906 0.18 0.58 0.31 0.760 0.18 0.59 0.30 0.764 0.04 0.58 0.06 0.949 0.17 0.57 0.30 0.765
F 0.60 0.49 1.21 0.226
A 0.22 0.24 0.93 0.355 0.23 0.25 0.94 0.348 0.24 0.24 0.98 0.327 0.29 0.24 1.20 0.231 0.24 0.24 0.99 0.323
C -1.07 0.42 -2.52 0.012 -0.75 0.31 -2.42 0.016 -0.74 0.31 -2.40 0.017 -0.70 0.31 -2.22 0.027 -0.78 0.32 -2.42 0.016
E 0.07 0.24 0.29 0.771 -0.07 0.20 -0.34 0.736 -0.07 0.20 -0.34 0.736 -0.11 0.20 -0.54 0.592 -0.06 0.20 -0.30 0.767
SunkCondition [PAID] × F 0.53 0.73 0.72 0.469
SunkCondition [PAID] × A -0.50 0.34 -1.46 0.144 -0.40 0.35 -1.15 0.250 -0.40 0.34 -1.16 0.248 -0.45 0.34 -1.31 0.191 -0.41 0.34 -1.20 0.233
SunkCondition [PAID] × C 0.05 0.60 0.08 0.937 0.35 0.42 0.84 0.403 0.36 0.42 0.84 0.399 0.29 0.44 0.67 0.503 0.35 0.45 0.78 0.436
SunkCondition [PAID] × E 0.05 0.33 0.15 0.883 -0.04 0.28 -0.16 0.877 -0.05 0.28 -0.18 0.859 -0.01 0.28 -0.05 0.964 -0.05 0.28 -0.17 0.863
Predicted F 0.21 0.33 0.64 0.522 0.20 0.36 0.57 0.570 -0.02 0.36 -0.07 0.947 0.25 0.37 0.68 0.500
SunkCondition [PAID] ×
Predicted F
-0.03 0.47 -0.06 0.950 -0.04 0.50 -0.08 0.932 0.22 0.55 0.40 0.690 0.00 0.57 0.00 0.998
Observations 292 292 292 292 292 292
R2 / R2 adjusted 0.040 / 0.023 0.086 / 0.043 0.070 / 0.026 0.069 / 0.026 0.068 / 0.024 0.070 / 0.027
Anova and Eta Square
calculate_variance_explained_Sunk <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for SunkCondition, Panel, and SunkCondition:Panel interaction
  ss_condition <- anova_res["SunkCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["SunkCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["SunkCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["SunkCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- (ss_condition / ss_total)
  perc_panel <- (ss_panel / ss_total)
  perc_interaction <- (ss_interaction / ss_total)
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 6), sig_condition)
  res_panel <- paste0(round(perc_panel, 6), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 6), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}


variance_explained_lm <- sapply(Sunk_model, calculate_variance_explained_Sunk)

# Convert to a data frame for easy viewing
variance_explained_lm_df.SunkCost <- as.data.frame(t(variance_explained_lm))

colnames(variance_explained_lm_df.SunkCost) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.SunkCost,caption = "Etasq Table: SunkCost Paradigm")
Etasq Table: SunkCost Paradigm
Condition Panel LessMoreCondition:Panel
Sunk.PAN 0.004668 0.025882* 0.009219
Sunk.PAN_FACE 0.004668 0.01253 0.003898
Sunk.PAN_Pred_F.reg_ACE 0.004668 0.026614* 0.006125
Sunk.PAN_Pred_F.net_ACE 0.004668 0.027143* 0.006041
Sunk.PAN_Pred_F.rf_ACE 0.004668 0.027873* 0.005487
Sunk.PAN_Pred_F.gbm_ACE 0.004668 0.022653* 0.00569

Summary

Model Fit
  • \(R^2\): \(R^2\) for linear regressions, and \(1 − \frac{\log L(\text{model})}{\log L(\text{null model})}\) for logistic regressions (as in the paper)

  • note that the y-axes for different paradigms are not aligned.

#following antonia's analysis here...
Default.null<-glm(numDefault~1,data.tst_M5a,family=binomial)


R2.Default = as.data.frame(cbind( "R2" = c((1-logLik(Default.PAN)/logLik(Default.null)),
                    (1-logLik(Default.PAN_FACE)/logLik(Default.null)),  
                    (1-logLik(Default.PAN_Pred_F.reg_ACE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_F.net_ACE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_F.rf_ACE)/logLik(Default.null)),
                    (1-logLik(Default.PAN_Pred_F.gbm_ACE)/logLik(Default.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"), 
            "Paradigm" = "Default"))



Disease.null<-glm(numDisease~1,data.tst_M5a,family=binomial)
R2.Disease = as.data.frame(cbind( "R2" = c((1-logLik(Disease.PAN)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_FACE)/logLik(Disease.null)),  
                    (1-logLik(Disease.PAN_Pred_F.reg_ACE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_F.net_ACE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_F.rf_ACE)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_Pred_F.gbm_ACE)/logLik(Disease.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"), 
            "Paradigm" = "Disease"))

R2.LessMore = as.data.frame(cbind( "R2" = c(summary(LessMore.PAN)$r.squared,
                     summary(LessMore.PAN_FACE)$r.squared,  
                     summary(LessMore.PAN_Pred_F.reg_ACE)$r.squared,  
                     summary(LessMore.PAN_Pred_F.net_ACE)$r.squared, 
                     summary(LessMore.PAN_Pred_F.rf_ACE)$r.squared,
                     summary(LessMore.PAN_Pred_F.gbm_ACE )$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"), 
            "Paradigm" = "LessMore"))

R2.Sunk = as.data.frame(cbind( "R2" = c(summary(Sunk.PAN)$r.squared,
                     summary(Sunk.PAN_FACE)$r.squared,  
                     summary(Sunk.PAN_Pred_F.reg_ACE)$r.squared,  
                     summary(Sunk.PAN_Pred_F.net_ACE)$r.squared, 
                     summary(Sunk.PAN_Pred_F.rf_ACE )$r.squared,
                     summary(Sunk.PAN_Pred_F.gbm_ACE )$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"), 
            "Paradigm" = "SunkCost"))




R2<-rbind(R2.Default,R2.Disease,R2.LessMore,R2.Sunk)%>%
  mutate(R2=round(as.numeric(R2),3))

R2$Model <- factor(R2$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.F(reg)+ACE",  
                                   "Pan+Pred.F(net)+ACE", 
                                   "Pan+Pred.F(rf)+ACE",
                                   "Pan+Pred.F(gbm)+ACE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(R2, aes(x = Model, y = R2, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(R^2)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

AIC.Default = as.data.frame(cbind( "AIC" = c(AIC(Default.PAN),
                    AIC(Default.PAN_FACE),  
                    AIC(Default.PAN_Pred_F.reg_ACE), 
                    AIC(Default.PAN_Pred_F.net_ACE), 
                    AIC(Default.PAN_Pred_F.rf_ACE),
                    AIC(Default.PAN_Pred_F.gbm_ACE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PF(reg)+ACE",  "Pan+PF(net)+ACE" , "Pan+PF(rf)+ACE","Pan+PF(gbm)+ACE"), 
            "Paradigm" = "Default"))



AIC.Disease = as.data.frame(cbind( "AIC" = c(AIC(Disease.PAN),
                    AIC(Disease.PAN_FACE),  
                    AIC(Disease.PAN_Pred_F.reg_ACE), 
                    AIC(Disease.PAN_Pred_F.net_ACE), 
                    AIC(Disease.PAN_Pred_F.rf_ACE),
                    AIC(Disease.PAN_Pred_F.gbm_ACE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PF(reg)+ACE",  "Pan+PF(net)+ACE" , "Pan+PF(rf)+ACE","Pan+PF(gbm)+ACE"), 
            "Paradigm" = "Disease"))


AIC<-rbind(AIC.Default,AIC.Disease)%>%
  mutate(AIC=round(as.numeric(AIC),5))

AIC$Model <- factor(AIC$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+PF(reg)+ACE",  
                                   "Pan+PF(net)+ACE", 
                                   "Pan+PF(rf)+ACE",
                                   "Pan+PF(gbm)+ACE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(AIC, aes(x = Model, y = AIC, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(AIC)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")+
  coord_cartesian(ylim = c(300, 400))

R2$R2 <- round(as.numeric(R2$R2), 3)

R2 <- R2 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement <- R2 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))


### Avg. improvement in R2 for each model across all four paradigms, in FULL study 5
Default.null.Study5<-glm(numDefault~1,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.null.Study5<-glm(numDisease~1,Data_M5a.log_z_dummy_coded,family=binomial)
Default.PAN.Study5<-glm(numDefault~DefaultCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.PAN.Study5<-glm(numDisease~DiseaseCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
LessMore.PAN.Study5<-lm(numLessMore~LessMoreCondition*Panel,Data_M5a.log_z_dummy_coded)
Sunk.PAN.Study5<-lm(numSunkCost~SunkCondition*Panel,Data_M5a.log_z_dummy_coded)


Default.PAN_FACE.Study5<-glm(numDefault~DefaultCondition*F+DefaultCondition*A+DefaultCondition*C+DefaultCondition*E+DefaultCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.PAN_FACE.Study5<-glm(numDisease~DiseaseCondition*F+DiseaseCondition*A+DiseaseCondition*C+DiseaseCondition*E+DiseaseCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
LessMore.PAN_FACE.Study5<-lm(numLessMore~LessMoreCondition*F+LessMoreCondition*A+LessMoreCondition*C+LessMoreCondition*E+LessMoreCondition*Panel,Data_M5a.log_z_dummy_coded)
Sunk.PAN_FACE.Study5<-lm(numSunkCost~SunkCondition*F+SunkCondition*A+SunkCondition*C+SunkCondition*E+SunkCondition*Panel,Data_M5a.log_z_dummy_coded)

R2.Default = as.data.frame(cbind( 
  "R2" = c((1-logLik(Default.PAN.Study5)/logLik(Default.null.Study5)),
           (1-logLik(Default.PAN_FACE.Study5)/logLik(Default.null.Study5))),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "Default"
))


R2.Disease = as.data.frame(cbind( 
  "R2" = c((1-logLik(Disease.PAN.Study5)/logLik(Disease.null.Study5)),
           (1-logLik(Disease.PAN_FACE.Study5)/logLik(Disease.null.Study5))),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "Disease"
))

# For LessMore paradigm (linear models)
R2.LessMore = as.data.frame(cbind( 
  "R2" = c(summary(LessMore.PAN.Study5)$r.squared,
           summary(LessMore.PAN_FACE.Study5)$r.squared),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "LessMore"
))

# For SunkCost paradigm (linear models)
R2.Sunk = as.data.frame(cbind( 
  "R2" = c(summary(Sunk.PAN.Study5)$r.squared,
           summary(Sunk.PAN_FACE.Study5)$r.squared),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "SunkCost"
))

R2.FullStudy5 <- rbind(R2.Default, R2.Disease, R2.LessMore, R2.Sunk)
R2.FullStudy5$R2 <- round(as.numeric(R2.FullStudy5$R2), 3)

R2.FullStudy5 <- R2.FullStudy5 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement_full_study_5 <- R2.FullStudy5 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))%>%
  slice(-1) 

avg_improvemnt_combined <- rbind(avg_improvement, avg_improvement_full_study_5)

kable(avg_improvemnt_combined, caption = "Percentage Improvement in R2 (compared to PAN) Across Paradigms in the test sample", digits = 2)
Percentage Improvement in R2 (compared to PAN) Across Paradigms in the test sample
Model Avg_Improvement
Pan 0.00
Pan+FACE 59.96
Pan+Pred.F(reg)+ACE 49.11
Pan+Pred.F(net)+ACE 47.90
Pan+Pred.F(rf)+ACE 48.54
Pan+Pred.F(gbm)+ACE 48.64
Pan+FACE (Full Study 5) 43.20
F*Cond Model Coefficients
  • DVs for Default & Framing are 1/0
  • DVs for LessMore & SunkCost are on the original scale
AlllCoef = as.data.frame(rbind(
cbind(coef(Default.PAN_FACE),confint(Default.PAN_FACE)),
cbind(coef(Default.PAN_Pred_F.reg_ACE),confint(Default.PAN_Pred_F.reg_ACE)),
cbind(coef(Default.PAN_Pred_F.net_ACE),confint(Default.PAN_Pred_F.net_ACE)),
cbind(coef(Default.PAN_Pred_F.rf_ACE),confint(Default.PAN_Pred_F.rf_ACE)),
cbind(coef(Default.PAN_Pred_F.gbm_ACE),confint(Default.PAN_Pred_F.gbm_ACE)),
cbind(coef(Default.PAN_FACE.Study5),confint(Default.PAN_FACE.Study5)),

cbind(coef(Disease.PAN_FACE),confint(Disease.PAN_FACE)),
cbind(coef(Disease.PAN_Pred_F.reg_ACE),confint(Disease.PAN_Pred_F.reg_ACE)),
cbind(coef(Disease.PAN_Pred_F.net_ACE),confint(Disease.PAN_Pred_F.net_ACE)),
cbind(coef(Disease.PAN_Pred_F.rf_ACE),confint(Disease.PAN_Pred_F.rf_ACE)),
cbind(coef(Disease.PAN_Pred_F.gbm_ACE),confint(Disease.PAN_Pred_F.gbm_ACE)),
cbind(coef(Disease.PAN_FACE.Study5),confint(Disease.PAN_FACE.Study5)),

cbind(coef(LessMore.PAN_FACE),confint(LessMore.PAN_FACE)),
cbind(coef(LessMore.PAN_Pred_F.reg_ACE),confint(LessMore.PAN_Pred_F.reg_ACE)),
cbind(coef(LessMore.PAN_Pred_F.net_ACE),confint(LessMore.PAN_Pred_F.net_ACE)),
cbind(coef(LessMore.PAN_Pred_F.rf_ACE),confint(LessMore.PAN_Pred_F.rf_ACE)),
cbind(coef(LessMore.PAN_Pred_F.gbm_ACE),confint(LessMore.PAN_Pred_F.gbm_ACE)),
cbind(coef(LessMore.PAN_FACE.Study5),confint(LessMore.PAN_FACE.Study5)),

cbind(coef(Sunk.PAN_FACE),confint(Sunk.PAN_FACE)),
cbind(coef(Sunk.PAN_Pred_F.reg_ACE),confint(Sunk.PAN_Pred_F.reg_ACE)),
cbind(coef(Sunk.PAN_Pred_F.net_ACE),confint(Sunk.PAN_Pred_F.net_ACE)),
cbind(coef(Sunk.PAN_Pred_F.rf_ACE),confint(Sunk.PAN_Pred_F.rf_ACE)),
cbind(coef(Sunk.PAN_Pred_F.gbm_ACE),confint(Sunk.PAN_Pred_F.gbm_ACE)),
cbind(coef(Sunk.PAN_FACE.Study5),confint(Sunk.PAN_FACE.Study5))))

AlllCoef <- AlllCoef[!grepl("Panel", rownames(AlllCoef)), ]


AlllCoef$type = factor(rep(c("Intercept", " Condition(Treat)","F", "A", "C", "E","F: Condition(Treat)", "A: Condition(Treat)", "C: Condition(Treat)", "E: Condition(Treat)"), 24))

AlllCoef$Paradigm = factor((rep(c("Default","Framing","Less is Better","Sunk Cost"), each=60)),levels = rev(c("Default","Framing","Less is Better","Sunk Cost")))

AlllCoef$Model = factor(rep(rep(c("FACE","PF(reg) + ACE","PF(net) + ACE","PF(rf) + ACE","PF(gbm) + ACE","FACE (Full Study 5)"), each = 10),4), levels = rev(c("FACE","PF(reg) + ACE","PF(net) + ACE","PF(rf) + ACE","PF(gbm) + ACE","FACE (Full Study 5)")))

colnames(AlllCoef)[1:3] = c("Beta","low", "high")

allcoefplot = ggplot(AlllCoef[(AlllCoef$type %in% c("F: Condition(Treat)")),], aes( y = Beta, ymin= low, ymax= high, x = Paradigm, fill = Model, color = Model))+
  geom_bar(stat = "identity", position = "dodge",alpha= .25)+
  geom_pointrange(position = position_dodge(width = .8))+
  coord_flip()+
  xlab("")+
  theme_bw()+
  theme(strip.background = element_blank(), text = element_text(size = 12)) +
  facet_grid(~type)

allcoefplot

Etasq
### Study 5 Results ###
Default_model.Study5<-list(
  "Default.PAN.Study5" = Default.PAN.Study5,
  "Default.PAN_FACE.Study5" = Default.PAN_FACE.Study5)
Disease_model.Study5<-list(
  "Disease.PAN.Study5" = Disease.PAN.Study5,
  "Disease.PAN_FACE.Study5" = Disease.PAN_FACE.Study5)

LessMore_model.Study5<-list(
  "LessMore.PAN.Study5" = LessMore.PAN.Study5,
  "LessMore.PAN_FACE.Study5" = LessMore.PAN_FACE.Study5)


Sunk_model.Study5<-list(
  "Sunk.PAN.Study5" = Sunk.PAN.Study5,
  "Sunk.PAN_FACE.Study5" = Sunk.PAN_FACE.Study5)

deviance_explained <- sapply(Default_model.Study5, calculate_deviance_explained.Default)
deviance_explained_df.Default.Study5 <- as.data.frame(t(deviance_explained)) 
colnames(deviance_explained_df.Default.Study5) <- c("Condition","Panel", "Default Condition:Panel")

deviance_explained <- sapply(Disease_model.Study5, calculate_deviance_explained.Disease)
deviance_explained_df.Disease.Study5 <- as.data.frame(t(deviance_explained)) 
colnames(deviance_explained_df.Disease.Study5) <- c("Condition","Panel", "Disease Condition:Panel")

variance_explained_lm <- sapply(LessMore_model.Study5, calculate_variance_explained_LessMore)
variance_explained_lm_df.LessMore.Study5 <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore.Study5) <- c("Condition","Panel", "LessMoreCondition:Panel")

variance_explained_lm <- sapply(Sunk_model.Study5, calculate_variance_explained_Sunk)
variance_explained_lm_df.SunkCost.Study5 <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.SunkCost.Study5) <- c("Condition","Panel", "LessMoreCondition:Panel")
# label paradigm
deviance_explained_df.Default$Paradigm <- "Default"
deviance_explained_df.Default.Study5$Paradigm <- "Default"

deviance_explained_df.Disease$Paradigm <- "Disease"
deviance_explained_df.Disease.Study5$Paradigm <- "Disease"

variance_explained_lm_df.LessMore$Paradigm <- "LessMore"
variance_explained_lm_df.LessMore.Study5$Paradigm <- "LessMore"

variance_explained_lm_df.SunkCost$Paradigm <- "SunkCost"
variance_explained_lm_df.SunkCost.Study5$Paradigm <- "SunkCost"


Model <- c("Pan", "Pan+FACE", "Pan+Pred.F(reg)+ACE", "Pan+Pred.F(net)+ACE", "Pan+Pred.F(rf)+ACE", "Pan+Pred.F(gbm)+ACE")
Model.Study5 <- c("Pan (Full Study5)", "Pan+FACE (Full Study5)")
deviance_explained_df.Default$Model<-Model
deviance_explained_df.Disease$Model<-Model
variance_explained_lm_df.LessMore$Model<-Model
variance_explained_lm_df.SunkCost$Model<-Model

deviance_explained_df.Default.Study5$Model<-Model.Study5
deviance_explained_df.Disease.Study5$Model<-Model.Study5
variance_explained_lm_df.LessMore.Study5$Model<-Model.Study5
variance_explained_lm_df.SunkCost.Study5$Model<-Model.Study5

colnames(deviance_explained_df.Default)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost)[3] <- "Condition:Panel"

colnames(deviance_explained_df.Default.Study5)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost.Study5)[3] <- "Condition:Panel"


etasq <- rbind(deviance_explained_df.Default, 
               deviance_explained_df.Default.Study5,
               deviance_explained_df.Disease, 
               deviance_explained_df.Disease.Study5,
               variance_explained_lm_df.LessMore, 
               variance_explained_lm_df.LessMore.Study5,
               variance_explained_lm_df.SunkCost,
               variance_explained_lm_df.SunkCost.Study5)


etasq$Model <- factor(etasq$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.F(reg)+ACE",  
                                   "Pan+Pred.F(net)+ACE", 
                                   "Pan+Pred.F(rf)+ACE",
                                   "Pan+Pred.F(gbm)+ACE",
                                   "Pan (Full Study5)", 
                                   "Pan+FACE (Full Study5)"),
                        ordered = TRUE)

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$Panel)  # Extract the significance symbols
  df$Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$Panel))
  return(df)
}

etasq.pan<-split_value_and_sig(etasq)

ggplot(etasq.pan, aes(x = Model, y = Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$`Condition:Panel`)  # Extract the significance symbols
  df$Condition_Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$`Condition:Panel`))
  return(df)
}

etasq.pan_cond<-split_value_and_sig(etasq)

ggplot(etasq.pan_cond, aes(x = Model, y = Condition_Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel*cond") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

# junk data??? really unstable;; perhaps to look at this meaningfully, we need 20% of the data to equally come from each of the 3 panels.
 # table(data.tst_M5a$Panel)
 #  it looks fine though
 #   Lucid    MTurk Prolific 
 #     100      101       91 


etasq_study5 <- etasq.pan_cond %>%
  filter(Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))

etasq_study5 <- etasq_study5 %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan (Full Study5)"] - Condition_Panel) / Condition_Panel[Model == "Pan (Full Study5)"] * 100)


etasq_study5.test <- etasq.pan_cond %>%
  filter(!Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))
etasq_study5.test <- etasq_study5.test %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan"] - Condition_Panel) / Condition_Panel[Model == "Pan"] * 100)

etasq.pan_cond<-rbind(etasq_study5,etasq_study5.test)

avg_reduction <- etasq.pan_cond %>%
  group_by(Model) %>%
  summarize(Avg_Reduction = mean(Reduction, na.rm = TRUE))

  
kable(avg_reduction, caption = "Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in the test sample", digits = 2)
Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in the test sample
Model Avg_Reduction
Pan 0.00
Pan+FACE 75.56
Pan+Pred.F(reg)+ACE 64.38
Pan+Pred.F(net)+ACE 66.98
Pan+Pred.F(rf)+ACE 69.91
Pan+Pred.F(gbm)+ACE 67.99
Pan (Full Study5) 0.00
Pan+FACE (Full Study5) 74.19

1.0.9 Explain Heterogenity with Predicted F in Full MP5 data

  • Random forest performs best when predicting the full Study 5 data, outperforming GBM. This could be because while we had made GBM more parsimounious, we haven’t done so with random forest.

1.0.9.1 Accuracy in full MP5 data

performance_metrics <- data.frame(
Method = factor(c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting"),
                 levels = c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting")),
  MAE = c(test_perf.full_M5a_reg["MAE"], test_perf.full_M5a_net["MAE"], test_perf.full_M5a_rf["MAE"], test_perf.M5a_gbm["MAE"]),
  RMSE = c(test_perf.full_M5a_reg["RMSE"], test_perf.full_M5a_net["RMSE"], test_perf.full_M5a_rf["RMSE"], test_perf.M5a_gbm["RMSE"]),
  Rsquared = c(test_perf.full_M5a_reg["Rsquared"], test_perf.full_M5a_net["Rsquared"], test_perf.full_M5a_rf["Rsquared"], test_perf.full_M5a_gbm["Rsquared"])
)



performance_metrics_long <- performance_metrics %>%
  pivot_longer(cols = c(RMSE, Rsquared, MAE), names_to = "Metric", values_to = "Value")


# Create the performance metrics table
performance_table <- performance_metrics %>%
  mutate(across(where(is.numeric), round, 4))

# Display the table
kable(performance_table, caption = "Comparison of Model Performance in Held-Out Test Data") %>%
  kable_styling(full_width = F, position = "center")
Comparison of Model Performance in Held-Out Test Data
Method MAE RMSE Rsquared
Linear Regression 0.5392 0.6736 0.6300
Elastic Net 0.5499 0.6822 0.6211
Random Forest 0.2748 0.3866 0.8885
Gradient Boosting 0.5051 0.6262 0.7305
tst_M5a.sd_F <- sd(data.tst_M5a$F)

ggplot(performance_metrics_long, aes(x = Metric, y = Value, fill = Method)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_segment(data = performance_metrics_long %>% filter(Metric == "RMSE"),
               aes(x = 1.6, xend = 2.4, y = tst_M5a.sd_F, yend = tst_M5a.sd_F),
               linetype = "dashed", color = "#FF9999") +
  geom_text(data = performance_metrics_long %>% filter(Metric == "RMSE"),
            aes(x = 2, y = tst_M5a.sd_F, label = paste0("Std.dev of F in Held-Out Test Data: ", round(tst_M5a.sd_F, 2))),
            vjust = -0.5, color = "#FF9999") +
  geom_text(aes(label = round(Value, 2)), position = position_dodge(width = 0.9), vjust = -0.5) +
  scale_fill_manual(values = c("gray80", "gray50", "gray30", "gray20")) +  # Less colorful palette
  labs(title = "Comparison of Model Performance in Full Study 5 Data",
       x = "Metric",
       y = "Value") +
  theme_minimal() +
  theme(legend.position = "bottom")

# do this by panel?

Default Paradigm

Default.PAN <- glm(numDefault ~ DefaultCondition * Panel, data = data.full_M5a.gbm, family = binomial)

Default.PAN_FACE <- glm(numDefault ~ DefaultCondition * F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel , data = data.full_M5a.gbm, family = binomial)


Default.PAN_Pred_F.reg_ACE <- glm(numDefault ~ DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel, data = data.full_M5a.reg , family = binomial)

Default.PAN_Pred_F.net_ACE <- glm(numDefault ~  DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel, data = data.full_M5a.net, family = binomial)

Default.PAN_Pred_F.rf_ACE <- glm(numDefault ~   DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel, data = data.full_M5a.rf, family = binomial)

Default.PAN_Pred_F.gbm_ACE <- glm(numDefault ~   DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel, data = data.full_M5a.gbm, family = binomial)

Default_model <- list(
  "Default.PAN" = Default.PAN,
  "Default.PAN_FACE" = Default.PAN_FACE,
  "Default.PAN_Pred_F.reg_ACE" = Default.PAN_Pred_F.reg_ACE,
  "Default.PAN_Pred_F.net_ACE" = Default.PAN_Pred_F.net_ACE,
  "Default.PAN_Pred_F.rf_ACE" = Default.PAN_Pred_F.rf_ACE,
  "Default.PAN_Pred_F.gbm_ACE" = Default.PAN_Pred_F.gbm_ACE
)
Model Results
term_names <- names(coef(Default.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Default_result<-tab_model(Default.PAN, Default.PAN_FACE, Default.PAN_Pred_F.reg_ACE,Default.PAN_Pred_F.net_ACE,Default.PAN_Pred_F.rf_ACE,Default.PAN_Pred_F.gbm_ACE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.F(reg)_ACE", 
                        "PAN_Pred.F(net)_ACE", 
                        "PAN_Pred.F(rf)_ACE", 
                        "PAN_Pred.F(gbm)_ACE"), title = "Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Default_result
Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.F(reg)_ACE PAN_Pred.F(net)_ACE PAN_Pred.F(rf)_ACE PAN_Pred.F(gbm)_ACE
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
(Intercept) 1.05 0.13 0.38 0.703 1.05 0.14 0.38 0.702 0.96 0.13 -0.29 0.768 0.96 0.13 -0.31 0.754 1.03 0.14 0.24 0.809 1.00 0.14 0.00 0.997
DefaultCondition [OPTOUT] 1.74 0.32 3.01 0.003 1.76 0.33 3.02 0.003 2.04 0.41 3.53 <0.001 2.09 0.43 3.57 <0.001 1.78 0.34 2.97 0.003 1.89 0.37 3.24 0.001
F 1.02 0.19 0.13 0.894
A 0.95 0.08 -0.54 0.590 0.90 0.08 -1.20 0.229 0.91 0.08 -1.11 0.268 0.94 0.08 -0.70 0.482 0.92 0.08 -0.97 0.332
C 1.72 0.25 3.78 <0.001 1.62 0.17 4.49 <0.001 1.64 0.18 4.57 <0.001 1.68 0.20 4.26 <0.001 1.63 0.18 4.36 <0.001
E 1.04 0.09 0.40 0.686 1.08 0.08 1.02 0.306 1.07 0.08 0.95 0.342 1.05 0.08 0.62 0.534 1.07 0.08 0.86 0.389
DefaultCondition [OPTOUT]
× F
1.25 0.34 0.83 0.408
DefaultCondition [OPTOUT]
× A
0.84 0.11 -1.32 0.188 0.94 0.12 -0.45 0.653 0.94 0.12 -0.47 0.637 0.88 0.12 -1.00 0.317 0.91 0.12 -0.70 0.481
DefaultCondition [OPTOUT]
× C
0.62 0.13 -2.29 0.022 0.78 0.12 -1.60 0.109 0.78 0.12 -1.62 0.105 0.72 0.13 -1.84 0.066 0.77 0.12 -1.63 0.104
DefaultCondition [OPTOUT]
× E
1.09 0.14 0.68 0.499 0.95 0.11 -0.43 0.667 0.95 0.11 -0.43 0.668 1.01 0.12 0.10 0.918 0.98 0.11 -0.20 0.838
Predicted F 1.32 0.17 2.18 0.029 1.30 0.17 1.94 0.052 1.11 0.19 0.61 0.540 1.24 0.18 1.51 0.130
DefaultCondition [OPTOUT]
× Predicted F
0.66 0.12 -2.21 0.027 0.65 0.13 -2.18 0.029 0.92 0.22 -0.34 0.735 0.74 0.16 -1.41 0.158
Observations 1460 1460 1460 1460 1460 1460
AIC 1884.213 1858.463 1854.228 1854.838 1859.655 1857.472
log-Likelihood -936.107 -915.232 -913.114 -913.419 -915.828 -914.736
Anova and Eta Square
  • As in the origianl paper, type I anova is used. We first attribute variance to condition, FACE, then Panel main effect, followed by the conditionFACE interaction, and the Panelcondition interaction last.
  • As in the original paper, Etasq of a variable in linear regression models (LessMore and Sunk) is calculated as the sum of squares explained by the variables divided by the total sum of squares. Etasq of a variable in logistic regression models (Default and Unusual Disease) is calculated as the deviance explained by the model relative to the null model (deviance from the null model divided by the model’s residual deviance).
  • I validated this pipeline by reproducing the stats reported in the SI for Study 1.
  • Woudl we like to see the full Anova stats?
calculate_deviance_explained.Default <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for Panel and DefaultCondition:Panel interaction
  dev_cond <- anova_res["DefaultCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DefaultCondition:Panel", "Deviance"]
  
  p_cond <- anova_res["DefaultCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DefaultCondition:Panel", "Pr(>Chi)"]
  
sig_cond <- ifelse(p_cond < 0.001, "***", 
             ifelse(p_cond < 0.01, "**", 
             ifelse(p_cond < 0.05, "*", 
             ifelse(p_cond < 0.1, ".", ""))))

sig_panel <- ifelse(p_panel < 0.001, "***", 
              ifelse(p_panel < 0.01, "**", 
              ifelse(p_panel < 0.05, "*", 
              ifelse(p_panel < 0.1, ".", ""))))

sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                   ifelse(p_interaction < 0.01, "**", 
                   ifelse(p_interaction < 0.05, "*", 
                   ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total) 
  perc_panel <- (dev_panel / dev_total) 
  perc_interaction <- (dev_interaction / dev_total) 
  
  res_cond <- paste0(round(perc_cond , 5),  sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}

# apply the fucntion to all the models in default paradigm
deviance_explained <- sapply(Default_model, calculate_deviance_explained.Default)

# Convert to a data frame for easy viewing
deviance_explained_df.Default <- as.data.frame(t(deviance_explained)) # view how this looks like
colnames(deviance_explained_df.Default) <- c("Condition","Panel", "Default Condition:Panel")

# display
kable(deviance_explained_df.Default,caption = "Etasq Table: Default Paradigm")
Etasq Table: Default Paradigm
Condition Panel Default Condition:Panel
Default.PAN 0.03801*** 0.00298. 0.01188***
Default.PAN_FACE 0.03801*** 0.00051 0.00159
Default.PAN_Pred_F.reg_ACE 0.03801*** 0.00021 0.00551**
Default.PAN_Pred_F.net_ACE 0.03801*** 0.00013 0.00546**
Default.PAN_Pred_F.rf_ACE 0.03801*** 0.00013 0.00308*
Default.PAN_Pred_F.gbm_ACE 0.03801*** 0.00019 0.00462*

Framing (Unusual Disease) Paradigm

Disease.PAN <- glm(numDisease ~ DiseaseCondition * Panel, data = data.full_M5a.reg, family = binomial)
Disease.PAN_FACE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E+DiseaseCondition * Panel , data = data.full_M5a.reg, family = binomial)

Disease.PAN_Pred_F.reg_ACE <- glm(numDisease ~  DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.full_M5a.reg, family = binomial)

Disease.PAN_Pred_F.net_ACE <- glm(numDisease ~   DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.full_M5a.net, family = binomial)

Disease.PAN_Pred_F.rf_ACE <- glm(numDisease ~   DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.full_M5a.rf, family = binomial)

Disease.PAN_Pred_F.gbm_ACE <- glm(numDisease ~  DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E + DiseaseCondition * Panel , data = data.full_M5a.gbm, family = binomial)


Disease_model <- list(
  "Disease.PAN" = Disease.PAN,
  "Disease.PAN_FACE" = Disease.PAN_FACE,
  "Disease.PAN_Pred_F.reg_ACE" = Disease.PAN_Pred_F.reg_ACE,
  "Disease.PAN_Pred_F.net_ACE" = Disease.PAN_Pred_F.net_ACE,
  "Disease.PAN_Pred_F.rf_ACE" = Disease.PAN_Pred_F.rf_ACE, 
  "Disease.PAN_Pred_F.gbm_ACE" = Disease.PAN_Pred_F.gbm_ACE
)


# extract_aic(Disease_model)
Model Results
term_names <- names(coef(Disease.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Disease_result<-tab_model(Disease.PAN, Disease.PAN_FACE, Disease.PAN_Pred_F.reg_ACE,Disease.PAN_Pred_F.net_ACE,Disease.PAN_Pred_F.rf_ACE,Disease.PAN_Pred_F.gbm_ACE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.F(reg)_ACE", 
                        "PAN_Pred.F(net)_ACE", 
                        "PAN_Pred.F(rf)_ACE", 
                        "PAN_Pred.F(gbm)_ACE"), title = "Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Disease_result
Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.F(reg)_ACE PAN_Pred.F(net)_ACE PAN_Pred.F(rf)_ACE PAN_Pred.F(gbm)_ACE
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
(Intercept) 0.45 0.06 -5.84 <0.001 0.43 0.06 -6.03 <0.001 0.46 0.07 -5.22 <0.001 0.46 0.07 -5.03 <0.001 0.43 0.06 -5.73 <0.001 0.44 0.07 -5.53 <0.001
DiseaseCondition [LOSS] 3.40 0.64 6.46 <0.001 3.69 0.72 6.73 <0.001 3.18 0.66 5.59 <0.001 3.07 0.65 5.33 <0.001 3.45 0.69 6.17 <0.001 3.28 0.67 5.84 <0.001
F 1.07 0.23 0.31 0.755
A 1.40 0.14 3.34 0.001 1.45 0.15 3.59 <0.001 1.45 0.15 3.62 <0.001 1.42 0.15 3.40 0.001 1.43 0.15 3.47 0.001
C 0.73 0.12 -1.83 0.067 0.79 0.09 -1.97 0.048 0.80 0.09 -1.93 0.054 0.77 0.11 -1.88 0.060 0.78 0.10 -1.99 0.047
E 0.89 0.09 -1.14 0.256 0.85 0.08 -1.77 0.077 0.85 0.08 -1.81 0.071 0.87 0.08 -1.46 0.144 0.87 0.08 -1.61 0.108
DiseaseCondition [LOSS] ×
F
1.21 0.35 0.65 0.513
DiseaseCondition [LOSS] ×
A
1.03 0.14 0.23 0.820 0.98 0.14 -0.11 0.913 0.98 0.14 -0.15 0.883 1.00 0.14 -0.01 0.995 0.98 0.14 -0.14 0.889
DiseaseCondition [LOSS] ×
C
1.26 0.28 1.06 0.290 1.25 0.20 1.37 0.169 1.24 0.20 1.34 0.180 1.19 0.22 0.94 0.345 1.19 0.20 1.06 0.288
DiseaseCondition [LOSS] ×
E
1.04 0.14 0.27 0.791 1.06 0.13 0.50 0.616 1.07 0.13 0.56 0.577 1.07 0.13 0.52 0.604 1.07 0.13 0.58 0.563
Predicted F 0.87 0.12 -1.00 0.318 0.85 0.12 -1.13 0.260 0.98 0.19 -0.13 0.898 0.93 0.15 -0.46 0.648
DiseaseCondition [LOSS] ×
Predicted F
1.51 0.30 2.08 0.037 1.59 0.33 2.24 0.025 1.48 0.39 1.51 0.132 1.63 0.36 2.19 0.029
Observations 1460 1460 1460 1460 1460 1460
AIC 1762.505 1721.769 1718.733 1718.139 1719.185 1716.082
log-Likelihood -875.252 -846.884 -845.367 -845.070 -845.592 -844.041
Anova and Eta Square
calculate_deviance_explained.Disease <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for DiseaseCondition, Panel, and DiseaseCondition:Panel interaction
  dev_cond <- anova_res["DiseaseCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DiseaseCondition:Panel", "Deviance"]
  
  # Extract p-values
  p_cond <- anova_res["DiseaseCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DiseaseCondition:Panel", "Pr(>Chi)"]
  
  # Determine significance levels
  sig_cond <- ifelse(p_cond < 0.001, "***", 
               ifelse(p_cond < 0.01, "**", 
               ifelse(p_cond < 0.05, "*", 
               ifelse(p_cond < 0.1, ".", ""))))

  sig_panel <- ifelse(p_panel < 0.001, "***", 
                ifelse(p_panel < 0.01, "**", 
                ifelse(p_panel < 0.05, "*", 
                ifelse(p_panel < 0.1, ".", ""))))

  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total)
  perc_panel <- (dev_panel / dev_total)
  perc_interaction <- (dev_interaction / dev_total)
  
  # Combine percentage and significance
  res_cond <- paste0(round(perc_cond, 5), sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}


# Apply the function to each model
deviance_explained <- sapply(Disease_model, calculate_deviance_explained.Disease)

# Convert to a data frame for easy viewing
deviance_explained_df.Disease <- as.data.frame(t(deviance_explained))
colnames(deviance_explained_df.Disease) <- c("Condition","Panel", "DiseaseCondition:Panel")

# Display the results
kable(deviance_explained_df.Disease,caption = "Etasq Table: Disease Paradigm")
Etasq Table: Disease Paradigm
Condition Panel DiseaseCondition:Panel
Disease.PAN 0.05317*** 0.03246*** 0.01394***
Disease.PAN_FACE 0.05317*** 0.00514** 0.00395*
Disease.PAN_Pred_F.reg_ACE 0.05317*** 0.00525** 0.00424*
Disease.PAN_Pred_F.net_ACE 0.05317*** 0.00516** 0.00436*
Disease.PAN_Pred_F.rf_ACE 0.05317*** 0.00601** 0.00443*
Disease.PAN_Pred_F.gbm_ACE 0.05317*** 0.00635** 0.0045*

Less is More Paradigm

LessMore.PAN <- lm(numLessMore ~ LessMoreCondition * Panel, data = data.full_M5a.reg)
LessMore.PAN_FACE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.full_M5a.reg)
LessMore.PAN_Pred_F.reg_ACE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.full_M5a.reg)
LessMore.PAN_Pred_F.net_ACE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.full_M5a.net)
LessMore.PAN_Pred_F.rf_ACE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.full_M5a.rf)
LessMore.PAN_Pred_F.gbm_ACE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.full_M5a.gbm)


LessMore_model <- list(
  "LessMore.PAN" = LessMore.PAN,
  "LessMore.PAN_FACE" = LessMore.PAN_FACE,
  "LessMore.PAN_Pred_F.reg_ACE" = LessMore.PAN_Pred_F.reg_ACE,
  "LessMore.PAN_Pred_F.net_ACE" = LessMore.PAN_Pred_F.net_ACE,
  "LessMore.PAN_Pred_F.rf_ACE" = LessMore.PAN_Pred_F.rf_ACE, 
  "LessMore.PAN_Pred_F.gbm_ACE" = LessMore.PAN_Pred_F.gbm_ACE
)
Model Results
term_names <- names(coef(LessMore.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

LessMore_result<-tab_model(LessMore.PAN, LessMore.PAN_FACE, LessMore.PAN_Pred_F.reg_ACE,LessMore.PAN_Pred_F.net_ACE,LessMore.PAN_Pred_F.rf_ACE,LessMore.PAN_Pred_F.gbm_ACE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.F(reg)_ACE", 
                        "PAN_Pred.F(net)_ACE", 
                        "PAN_Pred.F(rf)_ACE", 
                        "PAN_Pred.F(gbm)_ACE"), title = "LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
LessMore_result
LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.F(reg)_ACE PAN_Pred.F(net)_ACE PAN_Pred.F(rf)_ACE PAN_Pred.F(gbm)_ACE
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
(Intercept) 5.20 0.09 59.66 <0.001 5.22 0.09 61.17 <0.001 5.20 0.09 57.18 <0.001 5.20 0.09 56.01 <0.001 5.20 0.09 59.30 <0.001 5.20 0.09 58.64 <0.001
LessMoreCondition [SCARF] 1.11 0.12 9.00 <0.001 1.10 0.12 9.13 <0.001 1.09 0.13 8.50 <0.001 1.08 0.13 8.31 <0.001 1.07 0.12 8.66 <0.001 1.07 0.13 8.53 <0.001
F 0.13 0.12 1.12 0.265
A -0.13 0.06 -2.23 0.026 -0.12 0.06 -2.06 0.040 -0.12 0.06 -2.08 0.038 -0.13 0.06 -2.20 0.028 -0.12 0.06 -2.08 0.037
C 0.01 0.09 0.08 0.934 0.07 0.07 1.03 0.301 0.07 0.07 1.01 0.314 0.04 0.08 0.49 0.624 0.06 0.07 0.90 0.368
E -0.01 0.06 -0.10 0.919 -0.03 0.05 -0.68 0.498 -0.03 0.05 -0.65 0.518 -0.02 0.05 -0.36 0.722 -0.03 0.05 -0.65 0.517
LessMoreCondition [SCARF]
× F
-0.02 0.17 -0.13 0.899
LessMoreCondition [SCARF]
× A
0.39 0.08 4.82 <0.001 0.38 0.08 4.72 <0.001 0.38 0.08 4.70 <0.001 0.36 0.08 4.51 <0.001 0.37 0.08 4.57 <0.001
LessMoreCondition [SCARF]
× C
0.23 0.13 1.74 0.082 0.21 0.10 2.14 0.032 0.20 0.10 2.11 0.035 0.16 0.11 1.49 0.137 0.18 0.10 1.82 0.069
LessMoreCondition [SCARF]
× E
0.01 0.08 0.08 0.940 0.02 0.07 0.23 0.816 0.02 0.07 0.26 0.794 0.04 0.07 0.48 0.631 0.03 0.07 0.42 0.672
Predicted F 0.04 0.08 0.44 0.658 0.05 0.09 0.54 0.592 0.11 0.11 0.98 0.329 0.05 0.09 0.54 0.588
LessMoreCondition [SCARF]
× Predicted F
0.02 0.12 0.20 0.838 0.04 0.12 0.30 0.764 0.12 0.15 0.80 0.423 0.10 0.13 0.73 0.466
Observations 1460 1460 1460 1460 1460 1460
R2 / R2 adjusted 0.204 / 0.201 0.248 / 0.241 0.247 / 0.240 0.247 / 0.241 0.250 / 0.243 0.248 / 0.242
Anova and Eta Square
calculate_variance_explained_LessMore <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for LessMoreCondition, Panel, and LessMoreCondition:Panel interaction
  ss_condition <- anova_res["LessMoreCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["LessMoreCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["LessMoreCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["LessMoreCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- ss_condition / ss_total
  perc_panel <- ss_panel / ss_total
  perc_interaction <- ss_interaction / ss_total
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}

variance_explained_lm <- sapply(LessMore_model, calculate_variance_explained_LessMore)

# Convert to a data frame for easy viewing
variance_explained_lm_df.LessMore <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.LessMore,caption = "Etasq Table: LessMore Paradigm")
Etasq Table: LessMore Paradigm
Condition Panel LessMoreCondition:Panel
LessMore.PAN 0.09206*** 0.10649*** 0.00506*
LessMore.PAN_FACE 0.09206*** 0.02908*** 0.00051
LessMore.PAN_Pred_F.reg_ACE 0.09206*** 0.02842*** 0.00122
LessMore.PAN_Pred_F.net_ACE 0.09206*** 0.0286*** 0.00129
LessMore.PAN_Pred_F.rf_ACE 0.09206*** 0.02984*** 0.00154
LessMore.PAN_Pred_F.gbm_ACE 0.09206*** 0.02872*** 0.00157

Sunk Cost Paradigm

Sunk.PAN <- lm(numSunkCost~ SunkCondition * Panel, data = data.full_M5a.reg)
Sunk.PAN_FACE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.full_M5a.reg)
Sunk.PAN_Pred_F.reg_ACE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E+SunkCondition * Panel, data = data.full_M5a.reg)
Sunk.PAN_Pred_F.net_ACE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.full_M5a.net)
Sunk.PAN_Pred_F.rf_ACE <- lm(numSunkCost ~ SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E  + SunkCondition * Panel, data = data.full_M5a.rf)
Sunk.PAN_Pred_F.gbm_ACE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.full_M5a.gbm)


Sunk_model <- list(
  "Sunk.PAN" = Sunk.PAN,
  "Sunk.PAN_FACE" = Sunk.PAN_FACE,
  "Sunk.PAN_Pred_F.reg_ACE" = Sunk.PAN_Pred_F.reg_ACE,
  "Sunk.PAN_Pred_F.net_ACE" = Sunk.PAN_Pred_F.net_ACE,
  "Sunk.PAN_Pred_F.rf_ACE" = Sunk.PAN_Pred_F.rf_ACE, #0.302 with unscaled
  "Sunk.PAN_Pred_F.gbm_ACE" = Sunk.PAN_Pred_F.gbm_ACE
)

# extract_adjusted_r_squared(Sunk_model)
Full Model Results
term_names <- names(coef(Sunk.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Sunk_result<-tab_model(Sunk.PAN, Sunk.PAN_FACE, Sunk.PAN_Pred_F.reg_ACE,Sunk.PAN_Pred_F.net_ACE,Sunk.PAN_Pred_F.rf_ACE,Sunk.PAN_Pred_F.gbm_ACE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred_F(reg)_ACE", 
                        "PAN_Pred_F(net)_ACE", 
                        "PAN_Pred_F(rf)_ACE", 
                        "PAN_Pred_F(gbm)_ACE"), title = "Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Sunk_result
Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred_F(reg)_ACE PAN_Pred_F(net)_ACE PAN_Pred_F(rf)_ACE PAN_Pred_F(gbm)_ACE
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
(Intercept) 5.78 0.17 34.79 <0.001 5.77 0.17 34.93 <0.001 5.67 0.18 31.92 <0.001 5.68 0.18 31.32 <0.001 5.71 0.17 33.18 <0.001 5.69 0.17 32.63 <0.001
SunkCondition [PAID] 0.43 0.23 1.85 0.065 0.46 0.23 1.97 0.049 0.41 0.25 1.66 0.097 0.39 0.25 1.55 0.120 0.44 0.24 1.81 0.070 0.45 0.24 1.83 0.067
F 0.39 0.22 1.77 0.077
A 0.07 0.11 0.61 0.539 0.07 0.11 0.65 0.519 0.08 0.11 0.72 0.472 0.07 0.11 0.68 0.500 0.07 0.11 0.62 0.535
C -0.26 0.18 -1.50 0.133 -0.11 0.13 -0.85 0.394 -0.10 0.13 -0.75 0.452 -0.17 0.15 -1.12 0.265 -0.14 0.14 -1.03 0.303
E -0.06 0.11 -0.59 0.558 -0.12 0.10 -1.25 0.211 -0.13 0.10 -1.34 0.182 -0.11 0.10 -1.08 0.281 -0.12 0.10 -1.20 0.229
SunkCondition [PAID] × F 0.38 0.32 1.18 0.239
SunkCondition [PAID] × A 0.02 0.16 0.13 0.895 0.04 0.16 0.23 0.817 0.04 0.16 0.23 0.817 0.04 0.16 0.26 0.798 0.06 0.16 0.36 0.715
SunkCondition [PAID] × C -0.20 0.25 -0.79 0.428 -0.04 0.19 -0.19 0.851 -0.04 0.19 -0.24 0.810 -0.08 0.21 -0.36 0.718 -0.02 0.19 -0.10 0.917
SunkCondition [PAID] × E 0.11 0.15 0.72 0.469 0.05 0.14 0.33 0.739 0.05 0.14 0.36 0.717 0.06 0.14 0.41 0.680 0.03 0.14 0.24 0.811
Predicted F 0.25 0.16 1.62 0.105 0.22 0.17 1.34 0.181 0.30 0.20 1.48 0.138 0.30 0.18 1.69 0.092
SunkCondition [PAID] ×
Predicted F
0.16 0.22 0.69 0.490 0.19 0.24 0.80 0.426 0.22 0.30 0.74 0.462 0.09 0.25 0.35 0.724
Observations 1460 1460 1460 1460 1460 1460
R2 / R2 adjusted 0.023 / 0.020 0.042 / 0.033 0.039 / 0.030 0.038 / 0.029 0.038 / 0.029 0.038 / 0.029
Anova and Eta Square
calculate_variance_explained_Sunk <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for SunkCondition, Panel, and SunkCondition:Panel interaction
  ss_condition <- anova_res["SunkCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["SunkCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["SunkCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["SunkCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- (ss_condition / ss_total)
  perc_panel <- (ss_panel / ss_total)
  perc_interaction <- (ss_interaction / ss_total)
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 6), sig_condition)
  res_panel <- paste0(round(perc_panel, 6), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 6), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}


variance_explained_lm <- sapply(Sunk_model, calculate_variance_explained_Sunk)

# Convert to a data frame for easy viewing
variance_explained_lm_df.SunkCost <- as.data.frame(t(variance_explained_lm))

colnames(variance_explained_lm_df.SunkCost) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.SunkCost,caption = "Etasq Table: SunkCost Paradigm")
Etasq Table: SunkCost Paradigm
Condition Panel LessMoreCondition:Panel
Sunk.PAN 0.007943*** 0.011323*** 0.003981.
Sunk.PAN_FACE 0.007943*** 0.000985 0.002048
Sunk.PAN_Pred_F.reg_ACE 0.007943*** 0.001641 0.002109
Sunk.PAN_Pred_F.net_ACE 0.007943*** 0.002035 0.002104
Sunk.PAN_Pred_F.rf_ACE 0.007943*** 0.000325 0.001955
Sunk.PAN_Pred_F.gbm_ACE 0.007943*** 0.001118 0.002033

Summary

Model Fit
  • \(R^2\): \(R^2\) for linear regressions, and \(1 − \frac{\log L(\text{model})}{\log L(\text{null model})}\) for logistic regressions (as in the paper)

  • note that the y-axes for different paradigms are not aligned.

#following antonia's analysis here...
Default.null<-glm(numDefault~1,data.full_M5a.gbm,family=binomial)


R2.Default = as.data.frame(cbind( "R2" = c((1-logLik(Default.PAN)/logLik(Default.null)),
                    (1-logLik(Default.PAN_FACE)/logLik(Default.null)),  
                    (1-logLik(Default.PAN_Pred_F.reg_ACE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_F.net_ACE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_F.rf_ACE)/logLik(Default.null)),
                    (1-logLik(Default.PAN_Pred_F.gbm_ACE)/logLik(Default.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"), 
            "Paradigm" = "Default"))



Disease.null<-glm(numDisease~1,data.full_M5a.gbm,family=binomial)
R2.Disease = as.data.frame(cbind( "R2" = c((1-logLik(Disease.PAN)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_FACE)/logLik(Disease.null)),  
                    (1-logLik(Disease.PAN_Pred_F.reg_ACE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_F.net_ACE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_F.rf_ACE)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_Pred_F.gbm_ACE)/logLik(Disease.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"), 
            "Paradigm" = "Disease"))

R2.LessMore = as.data.frame(cbind( "R2" = c(summary(LessMore.PAN)$r.squared,
                     summary(LessMore.PAN_FACE)$r.squared,  
                     summary(LessMore.PAN_Pred_F.reg_ACE)$r.squared,  
                     summary(LessMore.PAN_Pred_F.net_ACE)$r.squared, 
                     summary(LessMore.PAN_Pred_F.rf_ACE)$r.squared,
                     summary(LessMore.PAN_Pred_F.gbm_ACE )$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"), 
            "Paradigm" = "LessMore"))

R2.Sunk = as.data.frame(cbind( "R2" = c(summary(Sunk.PAN)$r.squared,
                     summary(Sunk.PAN_FACE)$r.squared,  
                     summary(Sunk.PAN_Pred_F.reg_ACE)$r.squared,  
                     summary(Sunk.PAN_Pred_F.net_ACE)$r.squared, 
                     summary(Sunk.PAN_Pred_F.rf_ACE )$r.squared,
                     summary(Sunk.PAN_Pred_F.gbm_ACE )$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"), 
            "Paradigm" = "SunkCost"))




R2<-rbind(R2.Default,R2.Disease,R2.LessMore,R2.Sunk)%>%
  mutate(R2=round(as.numeric(R2),3))

R2$Model <- factor(R2$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.F(reg)+ACE",  
                                   "Pan+Pred.F(net)+ACE", 
                                   "Pan+Pred.F(rf)+ACE",
                                   "Pan+Pred.F(gbm)+ACE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(R2, aes(x = Model, y = R2, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(R^2)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

AIC.Default = as.data.frame(cbind( "AIC" = c(AIC(Default.PAN),
                    AIC(Default.PAN_FACE),  
                    AIC(Default.PAN_Pred_F.reg_ACE), 
                    AIC(Default.PAN_Pred_F.net_ACE), 
                    AIC(Default.PAN_Pred_F.rf_ACE),
                    AIC(Default.PAN_Pred_F.gbm_ACE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PF(reg)+ACE",  "Pan+PF(net)+ACE" , "Pan+PF(rf)+ACE","Pan+PF(gbm)+ACE"), 
            "Paradigm" = "Default"))



AIC.Disease = as.data.frame(cbind( "AIC" = c(AIC(Disease.PAN),
                    AIC(Disease.PAN_FACE),  
                    AIC(Disease.PAN_Pred_F.reg_ACE), 
                    AIC(Disease.PAN_Pred_F.net_ACE), 
                    AIC(Disease.PAN_Pred_F.rf_ACE),
                    AIC(Disease.PAN_Pred_F.gbm_ACE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PF(reg)+ACE",  "Pan+PF(net)+ACE" , "Pan+PF(rf)+ACE","Pan+PF(gbm)+ACE"), 
            "Paradigm" = "Disease"))


AIC<-rbind(AIC.Default,AIC.Disease)%>%
  mutate(AIC=round(as.numeric(AIC),5))

AIC$Model <- factor(AIC$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+PF(reg)+ACE",  
                                   "Pan+PF(net)+ACE", 
                                   "Pan+PF(rf)+ACE",
                                   "Pan+PF(gbm)+ACE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(AIC, aes(x = Model, y = AIC, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(AIC)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")+
  coord_cartesian(ylim = c(300, 400))

R2$R2 <- round(as.numeric(R2$R2), 3)

R2 <- R2 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement <- R2 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))


### Avg. improvement in R2 for each model across all four paradigms, in FULL study 5
Default.null.Study5<-glm(numDefault~1,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.null.Study5<-glm(numDisease~1,Data_M5a.log_z_dummy_coded,family=binomial)
Default.PAN.Study5<-glm(numDefault~DefaultCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.PAN.Study5<-glm(numDisease~DiseaseCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
LessMore.PAN.Study5<-lm(numLessMore~LessMoreCondition*Panel,Data_M5a.log_z_dummy_coded)
Sunk.PAN.Study5<-lm(numSunkCost~SunkCondition*Panel,Data_M5a.log_z_dummy_coded)


Default.PAN_FACE.Study5<-glm(numDefault~DefaultCondition*F+DefaultCondition*A+DefaultCondition*C+DefaultCondition*E+DefaultCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.PAN_FACE.Study5<-glm(numDisease~DiseaseCondition*F+DiseaseCondition*A+DiseaseCondition*C+DiseaseCondition*E+DiseaseCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
LessMore.PAN_FACE.Study5<-lm(numLessMore~LessMoreCondition*F+LessMoreCondition*A+LessMoreCondition*C+LessMoreCondition*E+LessMoreCondition*Panel,Data_M5a.log_z_dummy_coded)
Sunk.PAN_FACE.Study5<-lm(numSunkCost~SunkCondition*F+SunkCondition*A+SunkCondition*C+SunkCondition*E+SunkCondition*Panel,Data_M5a.log_z_dummy_coded)

R2.Default = as.data.frame(cbind( 
  "R2" = c((1-logLik(Default.PAN.Study5)/logLik(Default.null.Study5)),
           (1-logLik(Default.PAN_FACE.Study5)/logLik(Default.null.Study5))),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "Default"
))


R2.Disease = as.data.frame(cbind( 
  "R2" = c((1-logLik(Disease.PAN.Study5)/logLik(Disease.null.Study5)),
           (1-logLik(Disease.PAN_FACE.Study5)/logLik(Disease.null.Study5))),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "Disease"
))

# For LessMore paradigm (linear models)
R2.LessMore = as.data.frame(cbind( 
  "R2" = c(summary(LessMore.PAN.Study5)$r.squared,
           summary(LessMore.PAN_FACE.Study5)$r.squared),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "LessMore"
))

# For SunkCost paradigm (linear models)
R2.Sunk = as.data.frame(cbind( 
  "R2" = c(summary(Sunk.PAN.Study5)$r.squared,
           summary(Sunk.PAN_FACE.Study5)$r.squared),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "SunkCost"
))

R2.FullStudy5 <- rbind(R2.Default, R2.Disease, R2.LessMore, R2.Sunk)
R2.FullStudy5$R2 <- round(as.numeric(R2.FullStudy5$R2), 3)

R2.FullStudy5 <- R2.FullStudy5 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement_full_study_5 <- R2.FullStudy5 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))%>%
  slice(-1) 

avg_improvemnt_combined <- rbind(avg_improvement, avg_improvement_full_study_5)

kable(avg_improvemnt_combined, caption = "Percentage Improvement in R2 (compared to PAN) Across Paradigms in the test sample", digits = 2)
Percentage Improvement in R2 (compared to PAN) Across Paradigms in the test sample
Model Avg_Improvement
Pan 0.00
Pan+FACE 43.20
Pan+Pred.F(reg)+ACE 41.01
Pan+Pred.F(net)+ACE 40.17
Pan+Pred.F(rf)+ACE 38.88
Pan+Pred.F(gbm)+ACE 39.60
Pan+FACE (Full Study 5) 43.20
F*Cond Model Coefficients
  • DVs for Default & Framing are 1/0
  • DVs for LessMore & SunkCost are on the original scale
AlllCoef = as.data.frame(rbind(
cbind(coef(Default.PAN_FACE),confint(Default.PAN_FACE)),
cbind(coef(Default.PAN_Pred_F.reg_ACE),confint(Default.PAN_Pred_F.reg_ACE)),
cbind(coef(Default.PAN_Pred_F.net_ACE),confint(Default.PAN_Pred_F.net_ACE)),
cbind(coef(Default.PAN_Pred_F.rf_ACE),confint(Default.PAN_Pred_F.rf_ACE)),
cbind(coef(Default.PAN_Pred_F.gbm_ACE),confint(Default.PAN_Pred_F.gbm_ACE)),
cbind(coef(Default.PAN_FACE.Study5),confint(Default.PAN_FACE.Study5)),

cbind(coef(Disease.PAN_FACE),confint(Disease.PAN_FACE)),
cbind(coef(Disease.PAN_Pred_F.reg_ACE),confint(Disease.PAN_Pred_F.reg_ACE)),
cbind(coef(Disease.PAN_Pred_F.net_ACE),confint(Disease.PAN_Pred_F.net_ACE)),
cbind(coef(Disease.PAN_Pred_F.rf_ACE),confint(Disease.PAN_Pred_F.rf_ACE)),
cbind(coef(Disease.PAN_Pred_F.gbm_ACE),confint(Disease.PAN_Pred_F.gbm_ACE)),
cbind(coef(Disease.PAN_FACE.Study5),confint(Disease.PAN_FACE.Study5)),

cbind(coef(LessMore.PAN_FACE),confint(LessMore.PAN_FACE)),
cbind(coef(LessMore.PAN_Pred_F.reg_ACE),confint(LessMore.PAN_Pred_F.reg_ACE)),
cbind(coef(LessMore.PAN_Pred_F.net_ACE),confint(LessMore.PAN_Pred_F.net_ACE)),
cbind(coef(LessMore.PAN_Pred_F.rf_ACE),confint(LessMore.PAN_Pred_F.rf_ACE)),
cbind(coef(LessMore.PAN_Pred_F.gbm_ACE),confint(LessMore.PAN_Pred_F.gbm_ACE)),
cbind(coef(LessMore.PAN_FACE.Study5),confint(LessMore.PAN_FACE.Study5)),

cbind(coef(Sunk.PAN_FACE),confint(Sunk.PAN_FACE)),
cbind(coef(Sunk.PAN_Pred_F.reg_ACE),confint(Sunk.PAN_Pred_F.reg_ACE)),
cbind(coef(Sunk.PAN_Pred_F.net_ACE),confint(Sunk.PAN_Pred_F.net_ACE)),
cbind(coef(Sunk.PAN_Pred_F.rf_ACE),confint(Sunk.PAN_Pred_F.rf_ACE)),
cbind(coef(Sunk.PAN_Pred_F.gbm_ACE),confint(Sunk.PAN_Pred_F.gbm_ACE)),
cbind(coef(Sunk.PAN_FACE.Study5),confint(Sunk.PAN_FACE.Study5))))

AlllCoef <- AlllCoef[!grepl("Panel", rownames(AlllCoef)), ]


AlllCoef$type = factor(rep(c("Intercept", " Condition(Treat)","F", "A", "C", "E","F: Condition(Treat)", "A: Condition(Treat)", "C: Condition(Treat)", "E: Condition(Treat)"), 24))

AlllCoef$Paradigm = factor((rep(c("Default","Framing","Less is Better","Sunk Cost"), each=60)),levels = rev(c("Default","Framing","Less is Better","Sunk Cost")))

AlllCoef$Model = factor(rep(rep(c("FACE","PF(reg) + ACE","PF(net) + ACE","PF(rf) + ACE","PF(gbm) + ACE","FACE (Full Study 5)"), each = 10),4), levels = rev(c("FACE","PF(reg) + ACE","PF(net) + ACE","PF(rf) + ACE","PF(gbm) + ACE","FACE (Full Study 5)")))

colnames(AlllCoef)[1:3] = c("Beta","low", "high")

allcoefplot = ggplot(AlllCoef[(AlllCoef$type %in% c("F: Condition(Treat)")),], aes( y = Beta, ymin= low, ymax= high, x = Paradigm, fill = Model, color = Model))+
  geom_bar(stat = "identity", position = "dodge",alpha= .25)+
  geom_pointrange(position = position_dodge(width = .8))+
  coord_flip()+
  xlab("")+
  theme_bw()+
  theme(strip.background = element_blank(), text = element_text(size = 12)) +
  facet_grid(~type)

allcoefplot

Etasq
### Study 5 Results ###
Default_model.Study5<-list(
  "Default.PAN.Study5" = Default.PAN.Study5,
  "Default.PAN_FACE.Study5" = Default.PAN_FACE.Study5)
Disease_model.Study5<-list(
  "Disease.PAN.Study5" = Disease.PAN.Study5,
  "Disease.PAN_FACE.Study5" = Disease.PAN_FACE.Study5)

LessMore_model.Study5<-list(
  "LessMore.PAN.Study5" = LessMore.PAN.Study5,
  "LessMore.PAN_FACE.Study5" = LessMore.PAN_FACE.Study5)


Sunk_model.Study5<-list(
  "Sunk.PAN.Study5" = Sunk.PAN.Study5,
  "Sunk.PAN_FACE.Study5" = Sunk.PAN_FACE.Study5)

deviance_explained <- sapply(Default_model.Study5, calculate_deviance_explained.Default)
deviance_explained_df.Default.Study5 <- as.data.frame(t(deviance_explained)) 
colnames(deviance_explained_df.Default.Study5) <- c("Condition","Panel", "Default Condition:Panel")

deviance_explained <- sapply(Disease_model.Study5, calculate_deviance_explained.Disease)
deviance_explained_df.Disease.Study5 <- as.data.frame(t(deviance_explained)) 
colnames(deviance_explained_df.Disease.Study5) <- c("Condition","Panel", "Disease Condition:Panel")

variance_explained_lm <- sapply(LessMore_model.Study5, calculate_variance_explained_LessMore)
variance_explained_lm_df.LessMore.Study5 <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore.Study5) <- c("Condition","Panel", "LessMoreCondition:Panel")

variance_explained_lm <- sapply(Sunk_model.Study5, calculate_variance_explained_Sunk)
variance_explained_lm_df.SunkCost.Study5 <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.SunkCost.Study5) <- c("Condition","Panel", "LessMoreCondition:Panel")
# label paradigm
deviance_explained_df.Default$Paradigm <- "Default"
deviance_explained_df.Default.Study5$Paradigm <- "Default"

deviance_explained_df.Disease$Paradigm <- "Disease"
deviance_explained_df.Disease.Study5$Paradigm <- "Disease"

variance_explained_lm_df.LessMore$Paradigm <- "LessMore"
variance_explained_lm_df.LessMore.Study5$Paradigm <- "LessMore"

variance_explained_lm_df.SunkCost$Paradigm <- "SunkCost"
variance_explained_lm_df.SunkCost.Study5$Paradigm <- "SunkCost"


Model <- c("Pan", "Pan+FACE", "Pan+Pred.F(reg)+ACE", "Pan+Pred.F(net)+ACE", "Pan+Pred.F(rf)+ACE", "Pan+Pred.F(gbm)+ACE")
Model.Study5 <- c("Pan (Full Study5)", "Pan+FACE (Full Study5)")
deviance_explained_df.Default$Model<-Model
deviance_explained_df.Disease$Model<-Model
variance_explained_lm_df.LessMore$Model<-Model
variance_explained_lm_df.SunkCost$Model<-Model

deviance_explained_df.Default.Study5$Model<-Model.Study5
deviance_explained_df.Disease.Study5$Model<-Model.Study5
variance_explained_lm_df.LessMore.Study5$Model<-Model.Study5
variance_explained_lm_df.SunkCost.Study5$Model<-Model.Study5

colnames(deviance_explained_df.Default)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost)[3] <- "Condition:Panel"

colnames(deviance_explained_df.Default.Study5)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost.Study5)[3] <- "Condition:Panel"


etasq <- rbind(deviance_explained_df.Default, 
               deviance_explained_df.Default.Study5,
               deviance_explained_df.Disease, 
               deviance_explained_df.Disease.Study5,
               variance_explained_lm_df.LessMore, 
               variance_explained_lm_df.LessMore.Study5,
               variance_explained_lm_df.SunkCost,
               variance_explained_lm_df.SunkCost.Study5)


etasq$Model <- factor(etasq$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.F(reg)+ACE",  
                                   "Pan+Pred.F(net)+ACE", 
                                   "Pan+Pred.F(rf)+ACE",
                                   "Pan+Pred.F(gbm)+ACE",
                                   "Pan (Full Study5)", 
                                   "Pan+FACE (Full Study5)"),
                        ordered = TRUE)

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$Panel)  # Extract the significance symbols
  df$Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$Panel))
  return(df)
}

etasq.pan<-split_value_and_sig(etasq)

ggplot(etasq.pan, aes(x = Model, y = Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$`Condition:Panel`)  # Extract the significance symbols
  df$Condition_Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$`Condition:Panel`))
  return(df)
}

etasq.pan_cond<-split_value_and_sig(etasq)

ggplot(etasq.pan_cond, aes(x = Model, y = Condition_Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel*cond") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

# junk data??? really unstable;; perhaps to look at this meaningfully, we need 20% of the data to equally come from each of the 3 panels.
 # table(data.tst_M5a$Panel)
 #  it looks fine though
 #   Lucid    MTurk Prolific 
 #     100      101       91 


etasq_study5 <- etasq.pan_cond %>%
  filter(Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))

etasq_study5 <- etasq_study5 %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan (Full Study5)"] - Condition_Panel) / Condition_Panel[Model == "Pan (Full Study5)"] * 100)


etasq_study5.test <- etasq.pan_cond %>%
  filter(!Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))
etasq_study5.test <- etasq_study5.test %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan"] - Condition_Panel) / Condition_Panel[Model == "Pan"] * 100)

etasq.pan_cond<-rbind(etasq_study5,etasq_study5.test)

avg_reduction <- etasq.pan_cond %>%
  group_by(Model) %>%
  summarize(Avg_Reduction = mean(Reduction, na.rm = TRUE))

  
kable(avg_reduction, caption = "Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in full study 5 data", digits = 2)
Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in full study 5 data
Model Avg_Reduction
Pan 0.00
Pan+FACE 74.19
Pan+Pred.F(reg)+ACE 61.53
Pan+Pred.F(net)+ACE 61.10
Pan+Pred.F(rf)+ACE 65.69
Pan+Pred.F(gbm)+ACE 61.68
Pan (Full Study5) 0.00
Pan+FACE (Full Study5) 74.19

1.1 Evaluate Portability with Study 1 Data

# Data_M1a_dummy_coded<-read.csv("../MP Study 1/Data_M1a_dummy_coded_20240808.csv") # seperately .log_z data
Data_M1a.log_dummy_coded<-read.csv("../MP Study 1/Data_M1a.log_dummy_coded_20241003.csv")%>%
  rename_with(~ gsub("DEMCHAR", "PurposeOpenEnd", .x))

F_measure.Study1<-read.csv("../MP Study 1/Data Prep for ML/MP1_Full_data_F_ML_20240916.csv")
META_Browser_columns <- names(Data_M1a.log_dummy_coded)[grepl("META_Browser", names(Data_M1a.log_dummy_coded))] # tentatively retain browser Browsers to make predictions
Data_M1a.log_dummy_coded<-Data_M1a.log_dummy_coded%>% 
  select(-c(X,LW_DV,numLW_IV)) 
names(Data_M1a.log_dummy_coded) <- gsub("iNTRO", "INTRO", names(Data_M1a.log_dummy_coded))
# make variable names concistent


# This reread the data using read.csv function to ensures data format consistency, need to understand more about this function here....
# Data_M5a_dummy_coded<-read.csv("Data_M5a_dummy_coded_20240808.csv") # seperately .log_Z data

Data_M5a.log_dummy_coded<-read.csv("Data_M5a.log_dummy_coded_20240921.csv")

META_Browser_columns <- names(Data_M5a.log_dummy_coded)[grepl("META_Browser", names(Data_M5a.log_dummy_coded))]
Data_M5a.log_dummy_coded<-Data_M5a.log_dummy_coded%>%
  select(-c(X))


# Calculate mean and standard deviation for each column in Data_M5a.log_dummy_coded
means <- sapply(Data_M5a.log_dummy_coded[, scale_columns], mean, na.rm = TRUE)
sds <- sapply(Data_M5a.log_dummy_coded[, scale_columns], sd, na.rm = TRUE)

scale_with_reference_M5a <- function(x, mean_ref, sd_ref) {
  (x - mean_ref) / sd_ref
}

Data_M5a.log_z_dummy_coded <- Data_M5a.log_dummy_coded %>%
  mutate(across(.cols = all_of(scale_columns), 
                .fns = ~ scale_with_reference_M5a(., means[cur_column()], sds[cur_column()])))

# Scale Data_M1a.log_dummy_coded using the mean and sd from Data_M5a.log_dummy_coded
Data_M1a.log_z_dummy_coded <- Data_M1a.log_dummy_coded %>%
  mutate(across(.cols = all_of(scale_columns), 
                .fns = ~ scale_with_reference_M5a(., means[cur_column()], sds[cur_column()])))

Data_M1a.log_z_dummy_coded <- Data_M1a.log_z_dummy_coded 
Data_M1a.log_z_dummy_coded$BNT<-F_measure.Study1$BNT
Data_M1a.log_z_dummy_coded$CRTScore<-F_measure.Study1$CRTScore
# Set seed for reproducibility
set.seed(123)

variables_to_remove <- c("A","C","E","numSunkCost","numLessMore","numDefault","numDisease","SunkCondition","LessMoreCondition","DiseaseCondition","DefaultCondition","Panel",Text_analysis,"Antonym","Synonym","crt2_score","rotsum","MXsum")

# Create training and testing sets

data.trn_M5a_FULL <- Data_M5a.log_z_dummy_coded 
data.tst_MP1.US <- Data_M1a.log_z_dummy_coded%>%filter(Country_US==1) #  MP1 US data
data.tst_MP1.UK <- Data_M1a.log_z_dummy_coded%>%filter(Country_UK==1) #  MP1 UK data
data.tst_MP1.Neth <- Data_M1a.log_z_dummy_coded%>%filter(Country_US==0&Country_UK==0) # #  MP1 Netherlands student data
# There are columns in study 1 dataset not in study 5, but this is okay. 

data.trn_M5a_FULL <- as.data.frame(data.trn_M5a_FULL[, !colnames(data.trn_M5a_FULL) %in% variables_to_remove])

actual_MP1.US <- data.tst_MP1.US$F
actual_MP1.UK <- data.tst_MP1.UK$F
actual_MP1.Neth <- data.tst_MP1.Neth$F


tuneGrid <- expand.grid(
  mtry = c(64),
  min.node.size = c(5),
  splitrule = c("variance")
)

# Model_M5a_rf.FULL <- caret::train(F ~ ., data = data.trn_M5a_FULL, method = "ranger",
#                   tuneGrid = tuneGrid, importance = 'impurity', num.trees = 750,
#                   trControl = ctrl)
# save(Model_M5a_rf.FULL, file = "./Saved ML Model and Data/Model_M5a_rf_FULL.RData")
load("./Saved ML Model and Data/Model_M5a_rf_FULL.RData")
M5a_rf_pred_MP1.US<- predict(Model_M5a_rf.FULL,data.tst_MP1.US)
test_perf.M5a_rf.MP1.US <- postResample(M5a_rf_pred_MP1.US, actual_MP1.US)
data.tst_MP1.US.rf <- cbind(data.tst_MP1.US, Predicted_F = M5a_rf_pred_MP1.US)


Model_M5a_reg.FULL <- caret::train(F ~ ., data = data.trn_M5a_FULL, method = "lm")
M5a_reg_pred_MP1.US<- predict(Model_M5a_reg.FULL,data.tst_MP1.US)
test_perf.M5a_reg.MP1.US <- postResample(M5a_reg_pred_MP1.US, actual_MP1.US)
data.tst_MP1.US.reg <- cbind(data.tst_MP1.US, Predicted_F = M5a_reg_pred_MP1.US)

Model_M5a_net.FULL <- train(F ~ ., data = data.trn_M5a_FULL, method = "glmnet", trControl = ctrl, tuneLength=40) 

M5a_net_pred_MP1.US<- predict(Model_M5a_net.FULL,data.tst_MP1.US)
test_perf.M5a_net.MP1.US <- postResample(M5a_net_pred_MP1.US, actual_MP1.US)
data.tst_MP1.US.net <- cbind(data.tst_MP1.US, Predicted_F = M5a_net_pred_MP1.US)



# Model_M5a_gbm4.FULL <- caret::train(F ~ ., data = data.trn_M5a_FULL, method = "gbm",
#                     tuneGrid = tuneGrid.4, trControl = ctrl,
#                     verbose = FALSE)
# 
# save(Model_M5a_gbm4.FULL, file = "./Saved ML Model and Data/Model_M5a_gbm_FULL.RData")
load("./Saved ML Model and Data/Model_M5a_gbm_FULL_4_50.RData")
Model_M5a_gbm.FULL<-Model_M5a_gbm4.FULL
M5a_gbm_pred_MP1.US<- predict(Model_M5a_gbm.FULL,data.tst_MP1.US)
test_perf.M5a_gbm.MP1.US <- postResample(M5a_gbm_pred_MP1.US, actual_MP1.US)
data.tst_MP1.US.gbm<- cbind(data.tst_MP1.US, Predicted_F = M5a_gbm_pred_MP1.US)

1.1.1 Study 1 US Data

  • n = 5336
ggpairs(data.tst_MP1.US, c("F","CRTScore","BNT","ATTN1__Correct"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

1.1.1.0 Model Fit

  • We trained the model with full Study 5 data
  • The following stats come from cross-validation during model training
train_perf_M5a_rf.FULL <- Model_M5a_rf.FULL$results[which.min(Model_M5a_rf.FULL$results$RMSE), c("RMSE", "Rsquared", "MAE")]
train_perf_M5a_reg.FULL <- Model_M5a_reg.FULL$results[which.min(Model_M5a_reg.FULL$results$RMSE), c("RMSE", "Rsquared", "MAE")]
train_perf_M5a_net.FULL <- Model_M5a_net.FULL$results[which.min(Model_M5a_net.FULL$results$RMSE), c("RMSE", "Rsquared", "MAE")]
train_perf_M5a_gbm.FULL <- Model_M5a_gbm.FULL$results[which.min(Model_M5a_gbm.FULL$results$RMSE), c("RMSE", "Rsquared", "MAE")]

# Combine training performance metrics into a data frame
performance_metrics_trn_M1 <- data.frame(
  Model = factor(c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting"),
                 levels = c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting")),
  MAE = c(as.numeric(train_perf_M5a_reg.FULL["MAE"]), as.numeric(train_perf_M5a_net.FULL["MAE"]), as.numeric(train_perf_M5a_rf.FULL["MAE"]), as.numeric(train_perf_M5a_gbm.FULL["MAE"])),
  RMSE = c(as.numeric(train_perf_M5a_reg.FULL["RMSE"]), as.numeric(train_perf_M5a_net.FULL["RMSE"]), as.numeric(train_perf_M5a_rf.FULL["RMSE"]), as.numeric(train_perf_M5a_gbm.FULL["RMSE"])),
  Rsquared = c(as.numeric(train_perf_M5a_reg.FULL["Rsquared"]), as.numeric(train_perf_M5a_net.FULL["Rsquared"]), as.numeric(train_perf_M5a_rf.FULL["Rsquared"]), as.numeric(train_perf_M5a_gbm.FULL["Rsquared"])))


performance_table_trn <- performance_metrics_trn_M1 %>%
  mutate(across(where(is.numeric), round, 4))

# Display the table
kable(performance_table_trn, caption = "Comparison of Model Performance in Training Data (Full Study 5 Data) using CV") %>%
  kable_styling(full_width = F, position = "center")
Comparison of Model Performance in Training Data (Full Study 5 Data) using CV
Model MAE RMSE Rsquared
Linear Regression 0.5782 0.7185 0.5833
Elastic Net 0.5660 0.6995 0.6016
Random Forest 0.5301 0.6595 0.6532
Gradient Boosting 0.5021 0.6295 0.6756
###Plot ###

performance_metrics_long_trn <- performance_metrics_trn_M1 %>%
  pivot_longer(cols = c(RMSE, Rsquared, MAE), names_to = "Metric", values_to = "Value")

trn_M5a_FULL.sd_F <- sd(data.trn_M5a_FULL$F)

ggplot(performance_metrics_long_trn, aes(x = Metric, y = Value, fill = Model)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_segment(data = performance_metrics_long_trn %>% filter(Metric == "RMSE"),
               aes(x = 1.6, xend = 2.4, y = trn_M5a_FULL.sd_F, yend = trn_M5a_FULL.sd_F),
               linetype = "dashed", color = "#FF9999") +
  geom_text(data = performance_metrics_long_trn %>% filter(Metric == "RMSE"),
            aes(x = 2, y = trn_M5a_FULL.sd_F, label = paste0("Std.dev of F in Study 5 Full Data: ", round(trn_M5a_FULL.sd_F, 2))),
            vjust = -0.5, color = "#FF9999") +
  geom_text(aes(label = round(Value, 2)), position = position_dodge(width = 0.9), vjust = -0.5) +
  scale_fill_manual(values = c("gray80", "gray50", "gray30", "gray20")) +  # Less colorful palette
  labs(title = "Comparison of Model Performance in Training Data (Full Study 5 Data) using CV",
       x = "Metric",
       y = "Value") +
  theme_minimal() +
  theme(legend.position = "bottom")

1.1.1.4 Correlation Btw ML Predicted F and other FACE factors

  • Significance Notation: *** if the p-value is < 0.001, ** if the p-value is < 0.01, * if the p-value is < 0.05, . if the p-value is < 0.10
Random Forest
ggpairs(data.tst_MP1.US.rf, c("Predicted_F","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Gradient Boosting
ggpairs(data.tst_MP1.US.gbm, c("Predicted_F","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

GBM: Correlation between PF5 and individual F measures in Study 1 and ATTN1

ggpairs(data.tst_MP1.US.gbm, c("Predicted_F","CRTScore","BNT","ATTN1__Correct"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

1.1.1.5 Explain Heterogenity with Predicted F

1.1.1.5.1 Default Paradigm
Default.PAN <- glm(numDefault ~ DefaultCondition * Panel, data = data.tst_MP1.US.gbm%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_FACE <- glm(numDefault ~ DefaultCondition * F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel , data = data.tst_MP1.US.gbm%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_F.reg_ACE <- glm(numDefault ~ DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_MP1.US.reg%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_F.net_ACE <- glm(numDefault ~  DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_MP1.US.net%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_F.rf_ACE <- glm(numDefault ~   DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_MP1.US.rf%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_F.gbm_ACE <- glm(numDefault ~   DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_MP1.US.gbm%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)


Default_model <- list(
  "Default.PAN" = Default.PAN,
  "Default.PAN_FACE" = Default.PAN_FACE,
  "Default.PAN_Pred_F.reg_ACE" = Default.PAN_Pred_F.reg_ACE,
  "Default.PAN_Pred_F.net_ACE" = Default.PAN_Pred_F.net_ACE,
  "Default.PAN_Pred_F.rf_ACE" = Default.PAN_Pred_F.rf_ACE,
  "Default.PAN_Pred_F.gbm_ACE" = Default.PAN_Pred_F.gbm_ACE
)
Model Results
term_names <- names(coef(Default.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Default_result<-tab_model(Default.PAN, Default.PAN_FACE, Default.PAN_Pred_F.reg_ACE,Default.PAN_Pred_F.net_ACE,Default.PAN_Pred_F.rf_ACE,Default.PAN_Pred_F.gbm_ACE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.F(reg)_ACE", 
                        "PAN_Pred.F(net)_ACE", 
                        "PAN_Pred.F(rf)_ACE", 
                        "PAN_Pred.F(gbm)_ACE"), title = "Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Default_result
Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.F(reg)_ACE PAN_Pred.F(net)_ACE PAN_Pred.F(rf)_ACE PAN_Pred.F(gbm)_ACE
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
(Intercept) 0.88 0.11 -1.06 0.291 0.87 0.11 -1.14 0.253 0.81 0.10 -1.64 0.100 0.79 0.10 -1.86 0.062 0.77 0.10 -2.00 0.046 0.81 0.10 -1.60 0.110
DefaultCondition [OPTOUT] 3.10 0.56 6.31 <0.001 3.22 0.59 6.42 <0.001 3.31 0.61 6.51 <0.001 3.38 0.63 6.60 <0.001 3.37 0.63 6.55 <0.001 3.28 0.60 6.47 <0.001
F 1.57 0.16 4.48 <0.001
A 1.41 0.17 2.87 0.004 1.18 0.15 1.35 0.178 1.18 0.15 1.29 0.196 1.17 0.15 1.27 0.206 1.20 0.15 1.45 0.148
C 1.02 0.05 0.39 0.698 0.99 0.05 -0.21 0.836 0.99 0.05 -0.14 0.885 1.03 0.05 0.53 0.599 1.01 0.05 0.10 0.922
E 0.86 0.07 -1.93 0.054 0.88 0.07 -1.70 0.089 0.88 0.07 -1.68 0.092 0.88 0.07 -1.68 0.093 0.88 0.07 -1.67 0.094
DefaultCondition [OPTOUT]
× F
1.03 0.16 0.17 0.867
DefaultCondition [OPTOUT]
× A
0.44 0.08 -4.53 <0.001 0.51 0.10 -3.60 <0.001 0.51 0.10 -3.56 <0.001 0.50 0.09 -3.69 <0.001 0.49 0.09 -3.82 <0.001
DefaultCondition [OPTOUT]
× C
1.18 0.10 1.98 0.048 1.24 0.10 2.61 0.009 1.24 0.10 2.60 0.009 1.20 0.10 2.29 0.022 1.21 0.10 2.32 0.020
DefaultCondition [OPTOUT]
× E
1.23 0.14 1.79 0.073 1.16 0.14 1.31 0.190 1.16 0.14 1.31 0.191 1.19 0.14 1.48 0.140 1.20 0.14 1.58 0.115
Predicted F 1.53 0.12 5.50 <0.001 1.56 0.13 5.47 <0.001 1.52 0.12 5.11 <0.001 1.54 0.12 5.40 <0.001
DefaultCondition [OPTOUT]
× Predicted F
0.76 0.09 -2.35 0.019 0.75 0.09 -2.37 0.018 0.82 0.10 -1.61 0.108 0.84 0.10 -1.46 0.145
Observations 3541 3541 3541 3541 3541 3541
AIC 4466.919 4382.086 4385.418 4386.017 4387.018 4381.467
log-Likelihood -2215.460 -2165.043 -2166.709 -2167.008 -2167.509 -2164.734
Anova and Eta Square
calculate_deviance_explained.Default <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for Panel and DefaultCondition:Panel interaction
  dev_cond <- anova_res["DefaultCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DefaultCondition:Panel", "Deviance"]
  
  p_cond <- anova_res["DefaultCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DefaultCondition:Panel", "Pr(>Chi)"]
  
sig_cond <- ifelse(p_cond < 0.001, "***", 
             ifelse(p_cond < 0.01, "**", 
             ifelse(p_cond < 0.05, "*", 
             ifelse(p_cond < 0.1, ".", ""))))

sig_panel <- ifelse(p_panel < 0.001, "***", 
              ifelse(p_panel < 0.01, "**", 
              ifelse(p_panel < 0.05, "*", 
              ifelse(p_panel < 0.1, ".", ""))))

sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                   ifelse(p_interaction < 0.01, "**", 
                   ifelse(p_interaction < 0.05, "*", 
                   ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total) 
  perc_panel <- (dev_panel / dev_total) 
  perc_interaction <- (dev_interaction / dev_total) 
  
  res_cond <- paste0(round(perc_cond , 5),  sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}

# apply the fucntion to all the models in default paradigm
deviance_explained <- sapply(Default_model, calculate_deviance_explained.Default)

# Convert to a data frame for easy viewing
deviance_explained_df.Default <- as.data.frame(t(deviance_explained)) # view how this looks like
colnames(deviance_explained_df.Default) <- c("Condition","Panel", "Default Condition:Panel")

# display
kable(deviance_explained_df.Default,caption = "Etasq Table: Default Paradigm")
Etasq Table: Default Paradigm
Condition Panel Default Condition:Panel
Default.PAN 0.05063*** 0.00612*** 0.00751***
Default.PAN_FACE 0.05063*** 0.00456** 0.0031.
Default.PAN_Pred_F.reg_ACE 0.05063*** 0.00387* 0.00229
Default.PAN_Pred_F.net_ACE 0.05063*** 0.00391* 0.00226
Default.PAN_Pred_F.rf_ACE 0.05063*** 0.00369* 0.00279
Default.PAN_Pred_F.gbm_ACE 0.05063*** 0.00368* 0.0027
1.1.1.5.2 Framing (Unusual Disease) Paradigm
Disease.PAN <- glm(numDisease ~ DiseaseCondition * Panel, data = data.tst_MP1.US.gbm, family = binomial)
Disease.PAN_FACE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E+DiseaseCondition * Panel , data = data.tst_MP1.US.gbm, family = binomial)

Disease.PAN_Pred_F.reg_ACE <- glm(numDisease ~  DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_MP1.US.reg, family = binomial)

Disease.PAN_Pred_F.net_ACE <- glm(numDisease ~   DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_MP1.US.net, family = binomial)

Disease.PAN_Pred_F.rf_ACE <- glm(numDisease ~   DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_MP1.US.rf, family = binomial)

Disease.PAN_Pred_F.gbm_ACE <- glm(numDisease ~  DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E + DiseaseCondition * Panel , data = data.tst_MP1.US.gbm, family = binomial)


Disease_model <- list(
  "Disease.PAN" = Disease.PAN,
  "Disease.PAN_FACE" = Disease.PAN_FACE,
  "Disease.PAN_Pred_F.reg_ACE" = Disease.PAN_Pred_F.reg_ACE,
  "Disease.PAN_Pred_F.net_ACE" = Disease.PAN_Pred_F.net_ACE,
  "Disease.PAN_Pred_F.rf_ACE" = Disease.PAN_Pred_F.rf_ACE, 
  "Disease.PAN_Pred_F.gbm_ACE" = Disease.PAN_Pred_F.gbm_ACE
)
# extract_aic(Disease_model)
Model Results
term_names <- names(coef(Disease.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Disease_result<-tab_model(Disease.PAN, Disease.PAN_FACE, Disease.PAN_Pred_F.reg_ACE,Disease.PAN_Pred_F.net_ACE,Disease.PAN_Pred_F.rf_ACE,Disease.PAN_Pred_F.gbm_ACE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.F(reg)_ACE", 
                        "PAN_Pred.F(net)_ACE", 
                        "PAN_Pred.F(rf)_ACE", 
                        "PAN_Pred.F(gbm)_ACE"), title = "Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Disease_result
Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.F(reg)_ACE PAN_Pred.F(net)_ACE PAN_Pred.F(rf)_ACE PAN_Pred.F(gbm)_ACE
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
(Intercept) 0.49 0.05 -6.86 <0.001 0.47 0.05 -7.15 <0.001 0.47 0.05 -6.92 <0.001 0.48 0.05 -6.80 <0.001 0.48 0.05 -6.69 <0.001 0.47 0.05 -6.99 <0.001
DiseaseCondition [LOSS] 2.72 0.39 6.97 <0.001 2.91 0.43 7.29 <0.001 2.74 0.41 6.83 <0.001 2.67 0.40 6.61 <0.001 2.63 0.39 6.46 <0.001 2.74 0.40 6.84 <0.001
F 1.30 0.12 2.82 0.005
A 1.66 0.18 4.58 <0.001 1.76 0.20 4.96 <0.001 1.77 0.20 5.00 <0.001 1.79 0.21 5.06 <0.001 1.74 0.20 4.89 <0.001
C 0.79 0.04 -4.89 <0.001 0.83 0.04 -3.82 <0.001 0.83 0.04 -3.81 <0.001 0.82 0.04 -4.03 <0.001 0.82 0.04 -3.97 <0.001
E 0.96 0.06 -0.67 0.501 0.90 0.06 -1.56 0.118 0.90 0.06 -1.61 0.107 0.89 0.06 -1.66 0.097 0.90 0.06 -1.53 0.126
DiseaseCondition [LOSS] ×
F
0.87 0.11 -1.14 0.254
DiseaseCondition [LOSS] ×
A
1.01 0.15 0.07 0.947 0.89 0.14 -0.77 0.439 0.87 0.13 -0.91 0.363 0.86 0.13 -1.00 0.318 0.89 0.14 -0.79 0.430
DiseaseCondition [LOSS] ×
C
1.17 0.08 2.39 0.017 1.09 0.07 1.30 0.192 1.08 0.07 1.21 0.227 1.11 0.07 1.60 0.110 1.09 0.07 1.38 0.169
DiseaseCondition [LOSS] ×
E
0.79 0.07 -2.58 0.010 0.86 0.08 -1.67 0.094 0.87 0.08 -1.54 0.125 0.87 0.08 -1.47 0.141 0.87 0.08 -1.52 0.129
Predicted F 0.90 0.06 -1.50 0.134 0.89 0.06 -1.66 0.097 0.88 0.06 -1.81 0.070 0.92 0.06 -1.24 0.214
DiseaseCondition [LOSS] ×
Predicted F
1.31 0.12 2.92 0.003 1.39 0.13 3.36 0.001 1.39 0.14 3.38 0.001 1.36 0.13 3.22 0.001
Observations 5336 5336 5336 5336 5336 5336
AIC 6723.912 6573.688 6574.232 6571.002 6571.291 6570.558
log-Likelihood -3343.956 -3260.844 -3261.116 -3259.501 -3259.645 -3259.279
Anova and Eta Square
calculate_deviance_explained.Disease <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for DiseaseCondition, Panel, and DiseaseCondition:Panel interaction
  dev_cond <- anova_res["DiseaseCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DiseaseCondition:Panel", "Deviance"]
  
  # Extract p-values
  p_cond <- anova_res["DiseaseCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DiseaseCondition:Panel", "Pr(>Chi)"]
  
  # Determine significance levels
  sig_cond <- ifelse(p_cond < 0.001, "***", 
               ifelse(p_cond < 0.01, "**", 
               ifelse(p_cond < 0.05, "*", 
               ifelse(p_cond < 0.1, ".", ""))))

  sig_panel <- ifelse(p_panel < 0.001, "***", 
                ifelse(p_panel < 0.01, "**", 
                ifelse(p_panel < 0.05, "*", 
                ifelse(p_panel < 0.1, ".", ""))))

  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total)
  perc_panel <- (dev_panel / dev_total)
  perc_interaction <- (dev_interaction / dev_total)
  
  # Combine percentage and significance
  res_cond <- paste0(round(perc_cond, 5), sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}


# Apply the function to each model
deviance_explained <- sapply(Disease_model, calculate_deviance_explained.Disease)

# Convert to a data frame for easy viewing
deviance_explained_df.Disease <- as.data.frame(t(deviance_explained))
colnames(deviance_explained_df.Disease) <- c("Condition","Panel", "DiseaseCondition:Panel")

# Display the results
kable(deviance_explained_df.Disease,caption = "Etasq Table: Disease Paradigm")
Etasq Table: Disease Paradigm
Condition Panel DiseaseCondition:Panel
Disease.PAN 0.05826*** 0.01261*** 0.0093***
Disease.PAN_FACE 0.05826*** 0.00366*** 0.00557***
Disease.PAN_Pred_F.reg_ACE 0.05826*** 0.00276* 0.00383***
Disease.PAN_Pred_F.net_ACE 0.05826*** 0.00279** 0.00376***
Disease.PAN_Pred_F.rf_ACE 0.05826*** 0.00274* 0.00389***
Disease.PAN_Pred_F.gbm_ACE 0.05826*** 0.00294** 0.00381***
1.1.1.5.3 Less is More Paradigm
LessMore.PAN <- lm(numLessMore ~ LessMoreCondition * Panel, data = data.tst_MP1.US.gbm)
LessMore.PAN_FACE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_MP1.US.gbm)
LessMore.PAN_Pred_F.reg_ACE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_MP1.US.reg)
LessMore.PAN_Pred_F.net_ACE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.tst_MP1.US.net)
LessMore.PAN_Pred_F.rf_ACE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_MP1.US.rf)
LessMore.PAN_Pred_F.gbm_ACE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.tst_MP1.US.gbm)


LessMore_model <- list(
  "LessMore.PAN" = LessMore.PAN,
  "LessMore.PAN_FACE" = LessMore.PAN_FACE,
  "LessMore.PAN_Pred_F.reg_ACE" = LessMore.PAN_Pred_F.reg_ACE,
  "LessMore.PAN_Pred_F.net_ACE" = LessMore.PAN_Pred_F.net_ACE,
  "LessMore.PAN_Pred_F.rf_ACE" = LessMore.PAN_Pred_F.rf_ACE, 
  "LessMore.PAN_Pred_F.gbm_ACE" = LessMore.PAN_Pred_F.gbm_ACE
)
Model Results
term_names <- names(coef(LessMore.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

LessMore_result<-tab_model(LessMore.PAN, LessMore.PAN_FACE, LessMore.PAN_Pred_F.reg_ACE,LessMore.PAN_Pred_F.net_ACE,LessMore.PAN_Pred_F.rf_ACE,LessMore.PAN_Pred_F.gbm_ACE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.F(reg)_ACE", 
                        "PAN_Pred.F(net)_ACE", 
                        "PAN_Pred.F(rf)_ACE", 
                        "PAN_Pred.F(gbm)_ACE"), title = "LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
LessMore_result
LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.F(reg)_ACE PAN_Pred.F(net)_ACE PAN_Pred.F(rf)_ACE PAN_Pred.F(gbm)_ACE
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
(Intercept) 5.37 0.07 79.64 <0.001 5.36 0.07 81.93 <0.001 5.34 0.07 80.99 <0.001 5.33 0.07 80.47 <0.001 5.33 0.07 79.95 <0.001 5.34 0.07 81.30 <0.001
LessMoreCondition [SCARF] 0.95 0.09 10.04 <0.001 0.99 0.09 10.82 <0.001 0.97 0.09 10.46 <0.001 0.96 0.09 10.33 <0.001 0.95 0.09 10.12 <0.001 0.97 0.09 10.48 <0.001
F -0.11 0.05 -1.99 0.046
A 0.38 0.06 6.14 <0.001 0.33 0.06 5.16 <0.001 0.32 0.06 4.98 <0.001 0.33 0.06 5.09 <0.001 0.33 0.06 5.17 <0.001
C 0.02 0.03 0.58 0.562 -0.01 0.03 -0.47 0.637 -0.02 0.03 -0.59 0.553 -0.00 0.03 -0.18 0.858 -0.01 0.03 -0.43 0.667
E -0.00 0.04 -0.09 0.930 0.03 0.04 0.87 0.385 0.04 0.04 1.01 0.310 0.03 0.04 0.87 0.384 0.04 0.04 0.99 0.324
LessMoreCondition [SCARF]
× F
0.28 0.08 3.67 <0.001
LessMoreCondition [SCARF]
× A
0.45 0.09 5.04 <0.001 0.40 0.09 4.34 <0.001 0.40 0.09 4.33 <0.001 0.37 0.09 4.03 <0.001 0.39 0.09 4.32 <0.001
LessMoreCondition [SCARF]
× C
-0.01 0.04 -0.14 0.886 0.00 0.04 0.05 0.958 0.00 0.04 0.10 0.922 0.01 0.04 0.23 0.818 0.00 0.04 0.12 0.903
LessMoreCondition [SCARF]
× E
-0.12 0.06 -2.24 0.025 -0.14 0.06 -2.57 0.010 -0.14 0.06 -2.56 0.010 -0.13 0.06 -2.39 0.017 -0.14 0.06 -2.51 0.012
Predicted F 0.09 0.04 2.31 0.021 0.12 0.04 2.75 0.006 0.09 0.04 2.05 0.040 0.11 0.04 2.48 0.013
LessMoreCondition [SCARF]
× Predicted F
0.12 0.06 2.12 0.034 0.13 0.06 2.16 0.031 0.17 0.06 2.82 0.005 0.14 0.06 2.46 0.014
Observations 5336 5336 5336 5336 5336 5336
R2 / R2 adjusted 0.173 / 0.170 0.224 / 0.220 0.227 / 0.223 0.228 / 0.225 0.228 / 0.225 0.228 / 0.225
Anova and Eta Square
calculate_variance_explained_LessMore <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for LessMoreCondition, Panel, and LessMoreCondition:Panel interaction
  ss_condition <- anova_res["LessMoreCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["LessMoreCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["LessMoreCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["LessMoreCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- ss_condition / ss_total
  perc_panel <- ss_panel / ss_total
  perc_interaction <- ss_interaction / ss_total
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}

variance_explained_lm <- sapply(LessMore_model, calculate_variance_explained_LessMore)

# Convert to a data frame for easy viewing
variance_explained_lm_df.LessMore <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.LessMore,caption = "Etasq Table: LessMore Paradigm")
Etasq Table: LessMore Paradigm
Condition Panel LessMoreCondition:Panel
LessMore.PAN 0.12958*** 0.03699*** 0.00642***
LessMore.PAN_FACE 0.12958*** 0.01349*** 0.00148
LessMore.PAN_Pred_F.reg_ACE 0.12958*** 0.01294*** 0.00163
LessMore.PAN_Pred_F.net_ACE 0.12958*** 0.01315*** 0.0016
LessMore.PAN_Pred_F.rf_ACE 0.12958*** 0.01248*** 0.00167
LessMore.PAN_Pred_F.gbm_ACE 0.12958*** 0.01279*** 0.00155
1.1.1.5.4 Sunk Cost Paradigm
Sunk.PAN <- lm(numSunkCost~ SunkCondition * Panel, data = data.tst_MP1.US.gbm)
Sunk.PAN_FACE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_MP1.US.gbm)
Sunk.PAN_Pred_F.reg_ACE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E+SunkCondition * Panel, data = data.tst_MP1.US.reg)
Sunk.PAN_Pred_F.net_ACE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_MP1.US.net)
Sunk.PAN_Pred_F.rf_ACE <- lm(numSunkCost ~ SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E  + SunkCondition * Panel, data = data.tst_MP1.US.rf)
Sunk.PAN_Pred_F.gbm_ACE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_MP1.US.gbm)


Sunk_model <- list(
  "Sunk.PAN" = Sunk.PAN,
  "Sunk.PAN_FACE" = Sunk.PAN_FACE,
  "Sunk.PAN_Pred_F.reg_ACE" = Sunk.PAN_Pred_F.reg_ACE,
  "Sunk.PAN_Pred_F.net_ACE" = Sunk.PAN_Pred_F.net_ACE,
  "Sunk.PAN_Pred_F.rf_ACE" = Sunk.PAN_Pred_F.rf_ACE, #0.302 with unscaled
  "Sunk.PAN_Pred_F.gbm_ACE" = Sunk.PAN_Pred_F.gbm_ACE
)

# extract_adjusted_r_squared(Sunk_model)
Full Model Results
term_names <- names(coef(Sunk.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Sunk_result<-tab_model(Sunk.PAN, Sunk.PAN_FACE, Sunk.PAN_Pred_F.reg_ACE,Sunk.PAN_Pred_F.net_ACE,Sunk.PAN_Pred_F.rf_ACE,Sunk.PAN_Pred_F.gbm_ACE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred_F(reg)_ACE", 
                        "PAN_Pred_F(net)_ACE", 
                        "PAN_Pred_F(rf)_ACE", 
                        "PAN_Pred_F(gbm)_ACE"), title = "Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Sunk_result
Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred_F(reg)_ACE PAN_Pred_F(net)_ACE PAN_Pred_F(rf)_ACE PAN_Pred_F(gbm)_ACE
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
(Intercept) 5.46 0.14 39.29 <0.001 5.46 0.14 39.54 <0.001 5.43 0.14 38.85 <0.001 5.41 0.14 38.53 <0.001 5.43 0.14 38.46 <0.001 5.43 0.14 39.00 <0.001
SunkCondition [PAID] 0.70 0.20 3.48 0.001 0.70 0.20 3.52 <0.001 0.65 0.20 3.23 0.001 0.63 0.20 3.14 0.002 0.59 0.20 2.92 0.004 0.65 0.20 3.26 0.001
F 0.32 0.11 2.82 0.005
A 0.59 0.14 4.33 <0.001 0.54 0.14 3.81 <0.001 0.53 0.14 3.72 <0.001 0.56 0.14 3.91 <0.001 0.53 0.14 3.79 <0.001
C 0.10 0.06 1.71 0.087 0.11 0.06 1.81 0.071 0.11 0.06 1.75 0.080 0.13 0.06 2.16 0.031 0.11 0.06 1.82 0.069
E 0.20 0.08 2.32 0.021 0.18 0.09 2.13 0.033 0.19 0.09 2.21 0.027 0.17 0.09 2.02 0.043 0.19 0.09 2.26 0.024
SunkCondition [PAID] × F 0.18 0.16 1.10 0.270
SunkCondition [PAID] × A 0.02 0.19 0.11 0.914 -0.12 0.20 -0.58 0.559 -0.11 0.20 -0.55 0.584 -0.16 0.20 -0.80 0.422 -0.11 0.20 -0.55 0.579
SunkCondition [PAID] × C -0.02 0.08 -0.18 0.854 -0.05 0.09 -0.60 0.550 -0.04 0.09 -0.52 0.605 -0.03 0.08 -0.40 0.692 -0.04 0.08 -0.46 0.645
SunkCondition [PAID] × E -0.04 0.12 -0.31 0.753 -0.00 0.12 -0.04 0.972 -0.01 0.12 -0.06 0.950 0.01 0.12 0.05 0.964 -0.01 0.12 -0.05 0.957
Predicted F 0.15 0.09 1.73 0.083 0.18 0.09 1.94 0.052 0.11 0.09 1.16 0.248 0.19 0.09 2.11 0.035
SunkCondition [PAID] ×
Predicted F
0.31 0.12 2.49 0.013 0.30 0.13 2.33 0.020 0.37 0.13 2.84 0.004 0.31 0.13 2.43 0.015
Observations 5336 5336 5336 5336 5336 5336
R2 / R2 adjusted 0.025 / 0.022 0.040 / 0.035 0.040 / 0.036 0.040 / 0.036 0.040 / 0.035 0.041 / 0.036
Anova and Eta Square
calculate_variance_explained_Sunk <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for SunkCondition, Panel, and SunkCondition:Panel interaction
  ss_condition <- anova_res["SunkCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["SunkCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["SunkCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["SunkCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- (ss_condition / ss_total)
  perc_panel <- (ss_panel / ss_total)
  perc_interaction <- (ss_interaction / ss_total)
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}


variance_explained_lm <- sapply(Sunk_model, calculate_variance_explained_Sunk)

# Convert to a data frame for easy viewing
variance_explained_lm_df.SunkCost <- as.data.frame(t(variance_explained_lm))

colnames(variance_explained_lm_df.SunkCost) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.SunkCost,caption = "Etasq Table: SunkCost Paradigm")
Etasq Table: SunkCost Paradigm
Condition Panel LessMoreCondition:Panel
Sunk.PAN 0.01083*** 0.00971*** 0.00447**
Sunk.PAN_FACE 0.01083*** 0.00563*** 0.00226
Sunk.PAN_Pred_F.reg_ACE 0.01083*** 0.00788*** 0.0018
Sunk.PAN_Pred_F.net_ACE 0.01083*** 0.00792*** 0.00188
Sunk.PAN_Pred_F.rf_ACE 0.01083*** 0.00776*** 0.0018
Sunk.PAN_Pred_F.gbm_ACE 0.01083*** 0.00687*** 0.00192
1.1.1.5.5 Summary
Model Fit
  • \(R^2\): \(R^2\) for linear regressions, and \(1 − \frac{\log L(\text{model})}{\log L(\text{null model})}\) for logistic regressions (as in the paper)

  • note that the y-axes for different paradigms are not aligned.

#following antonia's analysis here...
Default.null<-glm(numDefault~1,data.tst_MP1.US,family=binomial)

R2.Default = as.data.frame(cbind( "R2" = c((1-logLik(Default.PAN)/logLik(Default.null)),
                    (1-logLik(Default.PAN_FACE)/logLik(Default.null)),  
                    (1-logLik(Default.PAN_Pred_F.reg_ACE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_F.net_ACE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_F.rf_ACE)/logLik(Default.null)),
                    (1-logLik(Default.PAN_Pred_F.gbm_ACE)/logLik(Default.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"), 
            "Paradigm" = "Default"))


Disease.null<-glm(numDisease~1,data.tst_MP1.US.gbm,family=binomial)
R2.Disease = as.data.frame(cbind( "R2" = c((1-logLik(Disease.PAN)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_FACE)/logLik(Disease.null)),  
                    (1-logLik(Disease.PAN_Pred_F.reg_ACE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_F.net_ACE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_F.rf_ACE)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_Pred_F.gbm_ACE)/logLik(Disease.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"), 
            "Paradigm" = "Disease"))

R2.LessMore = as.data.frame(cbind( "R2" = c(summary(LessMore.PAN)$r.squared,
                     summary(LessMore.PAN_FACE)$r.squared,  
                     summary(LessMore.PAN_Pred_F.reg_ACE)$r.squared,  
                     summary(LessMore.PAN_Pred_F.net_ACE)$r.squared, 
                     summary(LessMore.PAN_Pred_F.rf_ACE )$r.squared,
                     summary(LessMore.PAN_Pred_F.gbm_ACE )$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"), 
            "Paradigm" = "LessMore"))

R2.Sunk = as.data.frame(cbind( "R2" = c(summary(Sunk.PAN)$r.squared,
                     summary(Sunk.PAN_FACE)$r.squared,  
                     summary(Sunk.PAN_Pred_F.reg_ACE)$r.squared,  
                     summary(Sunk.PAN_Pred_F.net_ACE)$r.squared, 
                     summary(Sunk.PAN_Pred_F.rf_ACE )$r.squared,
                     summary(Sunk.PAN_Pred_F.gbm_ACE )$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"), 
            "Paradigm" = "SunkCost"))


R2<-rbind(R2.Default,R2.Disease,R2.LessMore,R2.Sunk)%>%
  mutate(R2=round(as.numeric(R2),3))

R2$Model <- factor(R2$Model, 
                        levels = c("Pan", "Pan+FACE",  "Pan+Pred.F(reg)+ACE",  "Pan+Pred.F(net)+ACE" , "Pan+Pred.F(rf)+ACE","Pan+Pred.F(gbm)+ACE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(R2, aes(x = Model, y = R2, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(R^2)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

AIC.Default = as.data.frame(cbind( "AIC" = c(AIC(Default.PAN),
                    AIC(Default.PAN_FACE),  
                    AIC(Default.PAN_Pred_F.reg_ACE), 
                    AIC(Default.PAN_Pred_F.net_ACE), 
                    AIC(Default.PAN_Pred_F.rf_ACE),
                    AIC(Default.PAN_Pred_F.gbm_ACE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PF(reg)+ACE",  "Pan+PF(net)+ACE" , "Pan+PF(rf)+ACE","Pan+PF(gbm)+ACE"), 
            "Paradigm" = "Default"))



AIC.Disease = as.data.frame(cbind( "AIC" = c(AIC(Disease.PAN),
                    AIC(Disease.PAN_FACE),  
                    AIC(Disease.PAN_Pred_F.reg_ACE), 
                    AIC(Disease.PAN_Pred_F.net_ACE), 
                    AIC(Disease.PAN_Pred_F.rf_ACE),
                    AIC(Disease.PAN_Pred_F.gbm_ACE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PF(reg)+ACE",  "Pan+PF(net)+ACE" , "Pan+PF(rf)+ACE","Pan+PF(gbm)+ACE"), 
            "Paradigm" = "Disease"))



AIC<-rbind(AIC.Default,AIC.Disease)%>%
  mutate(AIC=round(as.numeric(AIC),5))

AIC$Model <- factor(AIC$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+PF(reg)+ACE",  
                                   "Pan+PF(net)+ACE", 
                                   "Pan+PF(rf)+ACE",
                                   "Pan+PF(gbm)+ACE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(AIC, aes(x = Model, y = AIC, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(AIC)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")+
  coord_cartesian(ylim = c(4200, 7000))

R2$R2 <- round(as.numeric(R2$R2), 3)

R2 <- R2 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement <- R2 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))


avg_improvemnt_combined <- rbind(avg_improvement, avg_improvement_full_study_5)

kable(avg_improvemnt_combined, caption = "Percentage Improvement in R2 (compared to PAN) Across Paradigms with Study 1 data", digits = 2)
Percentage Improvement in R2 (compared to PAN) Across Paradigms with Study 1 data
Model Avg_Improvement
Pan 0.00
Pan+FACE 30.51
Pan+Pred.F(reg)+ACE 30.94
Pan+Pred.F(net)+ACE 31.08
Pan+Pred.F(rf)+ACE 31.08
Pan+Pred.F(gbm)+ACE 32.08
Pan+FACE (Full Study 5) 43.20
F*Cond Model Coefficients
  • DVs for Default & Framing are 1/0
  • DVs for LessMore & SunkCost are on the original scale
AlllCoef = as.data.frame(rbind(
cbind(coef(Default.PAN_FACE),confint(Default.PAN_FACE)),
cbind(coef(Default.PAN_Pred_F.reg_ACE),confint(Default.PAN_Pred_F.reg_ACE)),
cbind(coef(Default.PAN_Pred_F.net_ACE),confint(Default.PAN_Pred_F.net_ACE)),
cbind(coef(Default.PAN_Pred_F.rf_ACE),confint(Default.PAN_Pred_F.rf_ACE)),
cbind(coef(Default.PAN_Pred_F.gbm_ACE),confint(Default.PAN_Pred_F.gbm_ACE)),
cbind(coef(Default.PAN_FACE.Study5),confint(Default.PAN_FACE.Study5)),

cbind(coef(Disease.PAN_FACE),confint(Disease.PAN_FACE)),
cbind(coef(Disease.PAN_Pred_F.reg_ACE),confint(Disease.PAN_Pred_F.reg_ACE)),
cbind(coef(Disease.PAN_Pred_F.net_ACE),confint(Disease.PAN_Pred_F.net_ACE)),
cbind(coef(Disease.PAN_Pred_F.rf_ACE),confint(Disease.PAN_Pred_F.rf_ACE)),
cbind(coef(Disease.PAN_Pred_F.gbm_ACE),confint(Disease.PAN_Pred_F.gbm_ACE)),
cbind(coef(Disease.PAN_FACE.Study5),confint(Disease.PAN_FACE.Study5)),

cbind(coef(LessMore.PAN_FACE),confint(LessMore.PAN_FACE)),
cbind(coef(LessMore.PAN_Pred_F.reg_ACE),confint(LessMore.PAN_Pred_F.reg_ACE)),
cbind(coef(LessMore.PAN_Pred_F.net_ACE),confint(LessMore.PAN_Pred_F.net_ACE)),
cbind(coef(LessMore.PAN_Pred_F.rf_ACE),confint(LessMore.PAN_Pred_F.rf_ACE)),
cbind(coef(LessMore.PAN_Pred_F.gbm_ACE),confint(LessMore.PAN_Pred_F.gbm_ACE)),
cbind(coef(LessMore.PAN_FACE.Study5),confint(LessMore.PAN_FACE.Study5)),

cbind(coef(Sunk.PAN_FACE),confint(Sunk.PAN_FACE)),
cbind(coef(Sunk.PAN_Pred_F.reg_ACE),confint(Sunk.PAN_Pred_F.reg_ACE)),
cbind(coef(Sunk.PAN_Pred_F.net_ACE),confint(Sunk.PAN_Pred_F.net_ACE)),
cbind(coef(Sunk.PAN_Pred_F.rf_ACE),confint(Sunk.PAN_Pred_F.rf_ACE)),
cbind(coef(Sunk.PAN_Pred_F.gbm_ACE),confint(Sunk.PAN_Pred_F.gbm_ACE)),
cbind(coef(Sunk.PAN_FACE.Study5),confint(Sunk.PAN_FACE.Study5))))

AlllCoef <- AlllCoef[!grepl("Panel", rownames(AlllCoef)), ]



AlllCoef$type = factor(rep(c("Intercept", " Condition(Treat)","F", "A", "C", "E","F: Condition(Treat)", "A: Condition(Treat)", "C: Condition(Treat)", "E: Condition(Treat)"), 24))

AlllCoef$Paradigm = factor((rep(c("Default","Framing","Less is Better","Sunk Cost"), each=60)),levels = rev(c("Default","Framing","Less is Better","Sunk Cost")))

AlllCoef$Model = factor(rep(rep(c("FACE","PF(reg) + ACE","PF(net) + ACE","PF(rf) + ACE","PF(gbm) + ACE","FACE (Full Study 5)"), each = 10),4), levels = rev(c("FACE","PF(reg) + ACE","PF(net) + ACE","PF(rf) + ACE","PF(gbm) + ACE","FACE (Full Study 5)")))

colnames(AlllCoef)[1:3] = c("Beta","low", "high")

allcoefplot = ggplot(AlllCoef[(AlllCoef$type %in% c("F: Condition(Treat)")),], aes( y = Beta, ymin= low, ymax= high, x = Paradigm, fill = Model, color = Model))+
  geom_bar(stat = "identity", position = "dodge",alpha= .25)+
  geom_pointrange(position = position_dodge(width = .8))+
  coord_flip()+
  xlab("")+
  theme_bw()+
  theme(strip.background = element_blank(), text = element_text(size = 12)) +
  facet_grid(~type)

allcoefplot

Etasq
  • note that the y-axes for different paradigms are not aligned..
  • We have observed a slight improvement in explaining heterogeneity across all methods, compared to what we initially had. I think this is due to our predictors being more refined: For example, we used longitude and latitude info to swap “minutes since midnight (EST)” to “clock time.
# label paradigm
deviance_explained_df.Default$Paradigm <- "Default"
deviance_explained_df.Disease$Paradigm <- "Disease"
variance_explained_lm_df.LessMore$Paradigm <- "LessMore"
variance_explained_lm_df.SunkCost$Paradigm <- "SunkCost"

Model <- c("Pan", "Pan+FACE", "Pan+Pred.F(reg)+ACE", "Pan+Pred.F(net)+ACE", "Pan+Pred.F(rf)+ACE", "Pan+Pred.F(gbm)+ACE")

deviance_explained_df.Default$Model<-Model
deviance_explained_df.Disease$Model<-Model
variance_explained_lm_df.LessMore$Model<-Model
variance_explained_lm_df.SunkCost$Model<-Model

colnames(deviance_explained_df.Default)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost)[3] <- "Condition:Panel"

etasq <- rbind(deviance_explained_df.Default, 
               deviance_explained_df.Default.Study5,
               deviance_explained_df.Disease, 
               deviance_explained_df.Disease.Study5,
               variance_explained_lm_df.LessMore, 
               variance_explained_lm_df.LessMore.Study5,
               variance_explained_lm_df.SunkCost,
               variance_explained_lm_df.SunkCost.Study5)

etasq$Model <- factor(etasq$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.F(reg)+ACE",  
                                   "Pan+Pred.F(net)+ACE", 
                                   "Pan+Pred.F(rf)+ACE",
                                   "Pan+Pred.F(gbm)+ACE",
                                   "Pan (Full Study5)", 
                                   "Pan+FACE (Full Study5)"),
                        ordered = TRUE)

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$Panel)  # Extract the significance symbols
  df$Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$Panel))
  return(df)
}

etasq.pan<-split_value_and_sig(etasq)

ggplot(etasq.pan, aes(x = Model, y = Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$`Condition:Panel`)  # Extract the significance symbols
  df$Condition_Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$`Condition:Panel`))
  return(df)
}

etasq.pan_cond<-split_value_and_sig(etasq)

ggplot(etasq.pan_cond, aes(x = Model, y = Condition_Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel*cond") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

# junk data??? really unstable;; perhaps to look at this meaningfully, we need 20% of the data to equally come from each of the 3 panels.
 # table(data.tst_M5a$Panel)
 #  it looks fine though
 #   Lucid    MTurk Prolific 
 #     100      101       91 


etasq_study5 <- etasq.pan_cond %>%
  filter(Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))

etasq_study5 <- etasq_study5 %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan (Full Study5)"] - Condition_Panel) / Condition_Panel[Model == "Pan (Full Study5)"] * 100)


etasq_study1 <- etasq.pan_cond %>%
  filter(!Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))
etasq_study1 <- etasq_study1 %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan"] - Condition_Panel) / Condition_Panel[Model == "Pan"] * 100)

etasq.pan_cond<-rbind(etasq_study5,etasq_study1)

avg_reduction <- etasq.pan_cond %>%
  group_by(Model) %>%
  summarize(Avg_Reduction = mean(Reduction, na.rm = TRUE))

  
kable(avg_reduction, caption = "Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in study 1", digits = 2)
Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in study 1
Model Avg_Reduction
Pan 0.00
Pan+FACE 56.30
Pan+Pred.F(reg)+ACE 65.67
Pan+Pred.F(net)+ACE 65.62
Pan+Pred.F(rf)+ACE 63.69
Pan+Pred.F(gbm)+ACE 64.00
Pan (Full Study5) 0.00
Pan+FACE (Full Study5) 74.19
  • Results without Study 5
# label paradigm
deviance_explained_df.Default$Paradigm <- "Default"
deviance_explained_df.Disease$Paradigm <- "Disease"
variance_explained_lm_df.LessMore$Paradigm <- "LessMore"
variance_explained_lm_df.SunkCost$Paradigm <- "SunkCost"

Moderators <- c("Panel", "Panel + Original FACE", "Panel + Pred.F(reg)+ACE", "Panel + Pred.F(net)+ACE", "Panel + Pred.F(rf)+ACE", "Panel + Pred.F(gbm)+ACE")
deviance_explained_df.Default$Moderators<-Moderators
deviance_explained_df.Disease$Moderators<-Moderators
variance_explained_lm_df.LessMore$Moderators<-Moderators
variance_explained_lm_df.SunkCost$Moderators<-Moderators

colnames(deviance_explained_df.Default)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost)[3] <- "Condition:Panel"

etasq <- rbind(deviance_explained_df.Default, 
               deviance_explained_df.Disease, 
               variance_explained_lm_df.LessMore, 
               variance_explained_lm_df.SunkCost)

etasq$Moderators <- factor(etasq$Moderators, 
                        levels = c("Panel", 
                                   "Panel + Original FACE",  
                                   "Panel + Pred.F(reg)+ACE",  
                                   "Panel + Pred.F(net)+ACE", 
                                   "Panel + Pred.F(rf)+ACE",
                                   "Panel + Pred.F(gbm)+ACE"),
                        ordered = TRUE)


split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$`Condition:Panel`)  # Extract the significance symbols
  df$Condition_Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$`Condition:Panel`))
  return(df)
}

etasq.pan_cond<-split_value_and_sig(etasq)

(etasq.pan_cond.plot<-ggplot(etasq.pan_cond, aes(x = Moderators, y = Condition_Panel, fill = Moderators)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
 ylab(expression(italic(eta)^2 ~ "(Panel: Treatment)")) + 
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold"),
    axis.title.y = element_text(size = 14, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2"))

ggsave("etasq.pan_cond.svg", plot = etasq.pan_cond.plot, device = "svg",  width = 20, height = 12, units = "cm")

2. Predict C

2.0 Evaluation Accuracy with MP5 data

  • We use the cleaned data (n=1460) for analysis in this notebook.
  • In this notebook, F ~= \(\text{ATTN1} + \text{CRT} + \text{Matrices} + \text{3D}\), and C~= \(\text{age} + \text{synonym} + \text{antonym}\).
mean_C <- mean(MP5_data$C, na.rm = TRUE)
sd_C <- sd(MP5_data$C, na.rm = TRUE)

# Define thresholds for outliers
threshold_3sd_upper <- mean_C + 3 * sd_C
threshold_3sd_lower <- mean_C - 3 * sd_C

threshold_2.5sd_upper <- mean_C + 2.5 * sd_C
threshold_2.5sd_lower <- mean_C - 2.5 * sd_C

# Identify outliers using ±3 SD
outliers_3sd <- MP5_data %>%
  filter(C > threshold_3sd_upper | C < threshold_3sd_lower)
# outliers_3sd  # no outlier identified
# Identify outliers using ±2.5 SD
outliers_2.5sd <- MP5_data %>%
  filter(C > threshold_2.5sd_upper | C < threshold_2.5sd_lower)
# outliers_2.5sd # no outlier identified
ANT_SYN<-read.csv("./Data Prep for ML/MP5_Full_data_ML_20240908.csv")

Data_M5a.log_z_dummy_coded$Antonym<-ANT_SYN$Antonym
Data_M5a.log_z_dummy_coded$Synonym<-ANT_SYN$Synonym

library(GGally)
ggpairs(Data_M5a.log_z_dummy_coded, c("C","Antonym","Synonym","numEDU","AGE","ATTN1__Correct"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

2.0.1 IVs

  • Complete list of IVs used can be found in this google spreadsheet.
    • In addition to IVs in this list, in the current notebook, we have a set of text analysis variables based on respondents’ answers to the OpenEnd questions.

Text Analysis Variables

  • See “./Data Prep for ML/Study_5_OSF_adapted_ML_xw.Rmd” line 1106-1233 for data wrangling
  • In addition to the predicting variables used for F, I included 6 “text analysis” variables for predicting C (see below). These 6 variables were calculated for each of the three open-ended questions
    • Tedious OpenEnd: “What did you find tedious about this survey?”
    • Confuse OpenEnd: “What did you find confusing about this survey?”
    • Purpose OpenEnd: “In a few sentences, please explain what you think the purpose of this study is. If you are not sure, please give your best guess.”?
  • Character Count
    • The total number of characters in the response, excluding spaces and punctuation.
    • Implementation: This was calculated using the nchar() function after removing spaces and punctuation marks via the gsub() function in base R.
  • Word Count
    • total number of words in the response, counted as the number of word boundaries (spaces between words)
    • Implementation: Calculated using the str_count() function from the stringr package with boundary(“word”).
  • Average Word Length
    • Character Count divided by Word Count
  • Sentence Count
    • Number of sentences in a response
    • Implementation: Sentences were separated by punctuation marks such as periods (.), exclamation marks (!), question marks (?), and semicolons (;), and counted using str_split() from the stringr package.
  • Typo Count
    • The number of misspelled words in the response, as identified by comparing each word to a dictionary.
    • Implementation Misspelled words were identified using the hunspell() function from the hunspell package. Each word in the response was compared to a US English dictionary, and any non-matching words were flagged as typos.
  • Average Word Rarity
    • the average rarity of words in the response, where rarity is defined as 1/frequencyCount based on the SUBTLEX_US word frequency database (which uses word frequencies from film subtitles as compiled by (Brysbaert & New, 2009; frequencyCount = the total number of times this word appeared in the sample used in the study). I chose this as a convenient and accessible source for word frequency data… Higher rarity values correspond to less common (rarer) words.
    • Implementation: Words from the response were extracted and matched against the SUBTLEX_US word frequency database. For each identified word, the rarity score was calculated as 1/frequency of occurrence in the database. The average of these scores was then computed across all words in the response. If no identifiable words were found, the score was set to 0.
  • In addition, for Purpose OpenEnd, Antonia suggested that we draft a sample response and calculate the cosine similarity between participants’ responses and this sample as a predictor C. I did a sillier implements…: I summarized key words related to the tasks (also reviewed participants’ responses). For each key word that appeared in a response, I created a variable whose score was incremented by 1. See “./Data Prep for ML/Study_5_OSF_adapted_ML_xw.Rmd” line 1241-1282 for words used
Text_analysis_var_study5<-read.csv("./Data Prep for ML/MP5_Full_data_ML_20240908.csv")
Text_analysis_var_study1<-read.csv("../MP Study 1/Data_M1a.log_dummy_coded_20241003.csv")%>%
  filter(!Panel%in%c("Prolific_UK_Rep","Students"))

library(reshape2)
TEDIOUS OpenEnd
TEDIOUSEOPEN_text_analysis_var.Study5 <- Text_analysis_var_study5[, grepl("TEDIOUSOPEN_", colnames(Text_analysis_var_study5))]
tediousopen_column_names <- colnames(Text_analysis_var_study5)[grepl("TEDIOUSOPEN_", colnames(Text_analysis_var_study5))]
TEDIOUSEOPEN_text_analysis_var.Study1 <- Text_analysis_var_study1[, grepl("TEDIOUSOPEN_", colnames(Text_analysis_var_study1))]


TEDIOUSEOPEN_text_analysis_var.Study5$Study <- "Study 5"
TEDIOUSEOPEN_text_analysis_var.Study1$Study <- "Study 1"


combined_data <- rbind(TEDIOUSEOPEN_text_analysis_var.Study5, TEDIOUSEOPEN_text_analysis_var.Study1)

data_long_combined <- melt(combined_data, id.vars = "Study")
mean_data <- aggregate(value ~ variable + Study, data_long_combined, mean)
median_data <- aggregate(value ~ variable + Study, data_long_combined, median)


ggplot(data_long_combined, aes(x = value, fill = Study)) +
  geom_histogram(binwidth = 0.5, position = "identity", alpha = 0.5, color = "black") +
  geom_text(data = mean_data, aes(label = paste("Mean:", round(value, 2)), 
                                  x = Inf, y = Inf, color = Study), 
            hjust = 1.5, vjust = 1.5, size = 3) +
  geom_text(data = median_data, aes(label = paste("Median:", round(value, 2)), 
                                    x = Inf, y = Inf, color = Study), 
            hjust = 1.5, vjust = 3, size = 3) +
  
  facet_wrap(~ variable*Study, scales = "free")+
  labs(title = "Distribution of TEDIOUSOPEN Variables (Study 1 vs Study 5)", x = "Values", y = "Frequency") +
  theme_minimal() +
  scale_fill_manual(values = c("Study 1" = "lightblue", "Study 5" = "red")) +
  scale_color_manual(values = c("Study 1" = "blue", "Study 5" = "darkred")) +
  theme(legend.position = "top")

CONFUSE OpenEnd
CONFUSEOPEN_text_analysis_var.Study5 <- Text_analysis_var_study5[, grepl("CONFUSEOPEN_", colnames(Text_analysis_var_study5))]

confuseopen_column_names <- colnames(Text_analysis_var_study5)[grepl("CONFUSEOPEN_", colnames(Text_analysis_var_study5))]
CONFUSEOPEN_text_analysis_var.Study1 <- Text_analysis_var_study1[, grepl("CONFUSEOPEN_", colnames(Text_analysis_var_study1))]


CONFUSEOPEN_text_analysis_var.Study5$Study <- "Study 5"
CONFUSEOPEN_text_analysis_var.Study1$Study <- "Study 1"


combined_data <- rbind(CONFUSEOPEN_text_analysis_var.Study5, CONFUSEOPEN_text_analysis_var.Study1)

data_long_combined <- melt(combined_data, id.vars = "Study")
mean_data <- aggregate(value ~ variable + Study, data_long_combined, mean)
median_data <- aggregate(value ~ variable + Study, data_long_combined, median)


ggplot(data_long_combined, aes(x = value, fill = Study)) +
  geom_histogram(binwidth = 0.5, position = "identity", alpha = 0.5, color = "black") +
  geom_text(data = mean_data, aes(label = paste("Mean:", round(value, 2)), 
                                  x = Inf, y = Inf, color = Study), 
            hjust = 1.5, vjust = 1.5, size = 3) +
  geom_text(data = median_data, aes(label = paste("Median:", round(value, 2)), 
                                    x = Inf, y = Inf, color = Study), 
            hjust = 1.5, vjust = 3, size = 3) +
  
  facet_wrap(~ variable*Study, scales = "free")+
  labs(title = "Distribution of CONFUSEOPEN Variables (Study 1 vs Study 5)", x = "Values", y = "Frequency") +
  theme_minimal() +
  scale_fill_manual(values = c("Study 1" = "lightblue", "Study 5" = "red")) +
  scale_color_manual(values = c("Study 1" = "blue", "Study 5" = "darkred")) +
  theme(legend.position = "top")

Study Purpose OpenEnd
PURPOSE_text_analysis_var.Study5 <- Text_analysis_var_study5[, grepl("PURPOSEOPEN_", colnames(Text_analysis_var_study5)) & !grepl("\\.gpt$", colnames(Text_analysis_var_study5))]
purposeopen_column_names <- colnames(Text_analysis_var_study5)[grepl("PURPOSEOPEN_", colnames(Text_analysis_var_study5)) & !grepl("\\.gpt$", colnames(Text_analysis_var_study5))]

PURPOSE_text_analysis_var.Study1 <- Text_analysis_var_study1[, grepl("PURPOSEOPEN_", colnames(Text_analysis_var_study1))]


PURPOSE_text_analysis_var.Study5$Study <- "Study 5"
PURPOSE_text_analysis_var.Study1$Study <- "Study 1"


combined_data <- rbind(PURPOSE_text_analysis_var.Study5, PURPOSE_text_analysis_var.Study1)

data_long_combined <- melt(combined_data, id.vars = "Study")
mean_data <- aggregate(value ~ variable + Study, data_long_combined, mean)
median_data <- aggregate(value ~ variable + Study, data_long_combined, median)


ggplot(data_long_combined, aes(x = value, fill = Study)) +
  geom_histogram(binwidth = 0.5, position = "identity", alpha = 0.5, color = "black") +
  geom_text(data = mean_data, aes(label = paste("Mean:", round(value, 2)), 
                                  x = Inf, y = Inf, color = Study), 
            hjust = 1.5, vjust = 1.5, size = 3) +
  geom_text(data = median_data, aes(label = paste("Median:", round(value, 2)), 
                                    x = Inf, y = Inf, color = Study), 
            hjust = 1.5, vjust = 3, size = 3) +
  
  facet_wrap(~ variable*Study, scales = "free")+
  labs(title = "Distribution of PURPOSEOPEN Variables (Study 1 vs Study 5)", x = "Values", y = "Frequency") +
  theme_minimal() +
  scale_fill_manual(values = c("Study 1" = "lightblue", "Study 5" = "red")) +
  scale_color_manual(values = c("Study 1" = "blue", "Study 5" = "darkred")) +
  theme(legend.position = "top")

lm(C~Text Analysis Variables)
Text_analysis_var_study5 <- read.csv("./Data Prep for ML/MP5_Full_data_ML_20240908.csv")

all_text_analysis_iv <- c(tediousopen_column_names, confuseopen_column_names, purposeopen_column_names)
Text_analysis_var_study5 <- Text_analysis_var_study5[, c("C", all_text_analysis_iv)]
Text_analysis_var_study5[all_text_analysis_iv] <- lapply(Text_analysis_var_study5[all_text_analysis_iv], function(x) log(x + 1))
model <- lm(C ~ ., data = Text_analysis_var_study5)

summary(model)
## 
## Call:
## lm(formula = C ~ ., data = Text_analysis_var_study5)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1291 -0.6205 -0.1084  0.5354  2.6394 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 -0.3952     0.2713  -1.457 0.145386    
## TEDIOUSOPEN_WordCount       -0.3906     0.3582  -1.090 0.275701    
## TEDIOUSOPEN_CharacterCount   0.4636     0.3177   1.459 0.144808    
## TEDIOUSOPEN_SentenceCount    0.0723     0.2030   0.356 0.721773    
## TEDIOUSOPEN_TypoCount       -0.1125     0.1716  -0.656 0.511981    
## TEDIOUSOPEN_avg.WordRarity  -0.4888     1.2159  -0.402 0.687762    
## TEDIOUSOPEN_AvgWordLength   -0.4429     0.2197  -2.016 0.044005 *  
## CONFUSEOPEN_WordCount       -0.6800     0.3558  -1.911 0.056156 .  
## CONFUSEOPEN_CharacterCount   0.6716     0.3163   2.124 0.033881 *  
## CONFUSEOPEN_SentenceCount    0.1121     0.2306   0.486 0.626877    
## CONFUSEOPEN_TypoCount       -0.1574     0.1838  -0.856 0.391983    
## CONFUSEOPEN_avg.WordRarity  -1.1021     1.9431  -0.567 0.570683    
## CONFUSEOPEN_AvgWordLength   -0.6577     0.2188  -3.007 0.002687 ** 
## PURPOSEOPEN_WordCount       -0.8875     0.2983  -2.976 0.002973 ** 
## PURPOSEOPEN_CharacterCount   0.9386     0.2579   3.640 0.000282 ***
## PURPOSEOPEN_SentenceCount    0.6009     0.1073   5.601 2.54e-08 ***
## PURPOSEOPEN_TypoCount       -0.2570     0.0953  -2.697 0.007089 ** 
## PURPOSEOPEN_avg.WordRarity   0.9469     0.9871   0.959 0.337608    
## PURPOSEOPEN_AvgWordLength   -0.8496     0.2677  -3.174 0.001535 ** 
## PURPOSEOPEN_keyword_count    0.6312     0.1054   5.989 2.67e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8398 on 1440 degrees of freedom
## Multiple R-squared:  0.247,  Adjusted R-squared:  0.2371 
## F-statistic: 24.86 on 19 and 1440 DF,  p-value: < 2.2e-16
set.seed(123)
train_M5a <- createDataPartition(Data_M5a.log_z_dummy_coded$F, p = 0.8, list = FALSE)
data.trn_M5a <- as.data.frame(Data_M5a.log_z_dummy_coded[train_M5a, ])
data.tst_M5a <- as.data.frame(Data_M5a.log_z_dummy_coded[-train_M5a, ])


variables_to_remove.C <- c("F","A","E","numSunkCost","numLessMore","numDefault","numDisease","SunkCondition","LessMoreCondition","DiseaseCondition","DefaultCondition","Panel","Antonym","Synonym","crt2_score","rotsum","MXsum")
# We need to remove all the above variables other than C for training and testing; but we'd like to see their relationship with the predicted F


data.trn_M5a.C <- as.data.frame(data.trn_M5a[, colnames(data.trn_M5a) %notin% variables_to_remove.C])

actual_M5a.C <- data.tst_M5a$C


ctrl <- trainControl(method = "cv", number = 10, summaryFunction = defaultSummary)
tuneGrid <- expand.grid(
  mtry = c(33),
  min.node.size = c(5),
  splitrule = c("variance")
)

# Model_M5a_rf.C <- caret::train(C ~ ., data = data.trn_M5a.C, method = "ranger",
#                   tuneGrid = tuneGrid, importance = 'impurity', num.trees = 750,
#                   trControl = ctrl)
# save(Model_M5a_rf.C, file = "./Saved ML Model and Data/Model_M5a_rf.C.RData")
load("./Saved ML Model and Data/Model_M5a_rf.C.RData")

pred_M5a_rf.C<- predict(Model_M5a_rf.C,data.tst_M5a)
test_perf.M5a_rf.C <- postResample(pred_M5a_rf.C, actual_M5a.C)
data.tst_M5a.rf.C <- cbind(data.tst_M5a, Predicted_C = pred_M5a_rf.C)
pred_full_M5a_rf.C<- predict(Model_M5a_rf.C, Data_M5a.log_z_dummy_coded)
data.full_M5a.rf.C <- cbind(Data_M5a.log_z_dummy_coded, Predicted_C = pred_full_M5a_rf.C)



Model_M5a_reg.C <- train(C ~ ., data = data.trn_M5a.C, method = "lm")
pred_M5a_reg.C<- predict(Model_M5a_reg.C,data.tst_M5a)
test_perf.M5a_reg.C <- postResample(pred_M5a_reg.C, actual_M5a.C)
data.tst_M5a.reg.C <- cbind(data.tst_M5a, Predicted_C = pred_M5a_reg.C)
pred_full_M5a_reg.C<- predict(Model_M5a_reg.C, Data_M5a.log_z_dummy_coded)
data.full_M5a.reg.C <- cbind(Data_M5a.log_z_dummy_coded, Predicted_C = pred_full_M5a_reg.C)


Model_M5a_net.C <- train(C ~ ., data = data.trn_M5a.C, method = "glmnet", trControl = ctrl, tuneLength = 40) # 10 fold cv to select the best model, set tune length to 40
pred_M5a_net.C<- predict(Model_M5a_net.C,data.tst_M5a)
test_perf.M5a_net.C <- postResample(pred_M5a_net.C, actual_M5a.C)
data.tst_M5a.net.C <- cbind(data.tst_M5a, Predicted_C = pred_M5a_net.C)
pred_full_M5a_net.C<- predict(Model_M5a_net.C, Data_M5a.log_z_dummy_coded)
data.full_M5a.net.C <- cbind(Data_M5a.log_z_dummy_coded, Predicted_C = pred_full_M5a_net.C)



tuneGrid.4 <- expand.grid(
  n.trees = c(800),   # Number of trees (boosting iterations)
  interaction.depth = c(4), # Tree depth
  shrinkage = c(0.01), #   shrinkage = c(0.01,0,03,0.05,0.08,0.1); 0,01 best
  n.minobsinnode = c(50)     # Minimum number of observations in terminal nodes
)

# Model_M5a_gbm.C <- train(C ~ ., data = data.trn_M5a.C, method = "gbm",
#                     tuneGrid = tuneGrid.4, trControl = ctrl,
#                     verbose = FALSE)
# 
# # how to check the number of depths in the original tree?????
# # Save the Gradient Boosting model
# save(Model_M5a_gbm.C, file = "./Saved ML Model and Data/Model_M5a_gbm.C.RData")
load("./Saved ML Model and Data/Model_M5a_gbm.C.RData")

pred_M5a_gbm.C <- predict(Model_M5a_gbm.C, data.tst_M5a)
test_perf.M5a_gbm.C <- postResample(pred_M5a_gbm.C, actual_M5a.C)
data.tst_M5a.gbm.C <- cbind(data.tst_M5a, Predicted_C = pred_M5a_gbm.C)
pred_full_M5a_gbm.C <- predict(Model_M5a_gbm.C, Data_M5a.log_z_dummy_coded)
data.full_M5a.gbm.C <- cbind(Data_M5a.log_z_dummy_coded, Predicted_C = pred_full_M5a_gbm.C)

actual_M5a.F_full.C<-Data_M5a.log_z_dummy_coded$C
test_perf.full_M5a_gbm.C<- postResample(pred_full_M5a_gbm.C, actual_M5a.F_full.C)
test_perf.full_M5a_rf.C<- postResample(pred_full_M5a_rf.C, actual_M5a.F_full.C)
test_perf.full_M5a_reg.C<- postResample(pred_full_M5a_reg.C, actual_M5a.F_full.C)
test_perf.full_M5a_net.C<- postResample(pred_full_M5a_net.C, actual_M5a.F_full.C)

2.0.2 Model Fit

  • The following stats come from cross-validation
train_perf_M5a_rf <- Model_M5a_rf.C$results[which.min(Model_M5a_rf.C$results$RMSE), c("RMSE", "Rsquared", "MAE")]
train_perf_M5a_reg <- Model_M5a_reg.C$results[which.min(Model_M5a_reg.C$results$RMSE), c("RMSE", "Rsquared", "MAE")]
train_perf_M5a_net <- Model_M5a_net.C$results[which.min(Model_M5a_net.C$results$RMSE), c("RMSE", "Rsquared", "MAE")]
train_perf_M5a_gbm <- Model_M5a_gbm.C$results[which.min(Model_M5a_gbm.C$results$RMSE), c("RMSE", "Rsquared", "MAE")]

# Combine training performance metrics into a data frame
performance_metrics_trn <- data.frame(
  Model = factor(c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting"),
                 levels = c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting")),
  MAE = c(as.numeric(train_perf_M5a_reg["MAE"]), as.numeric(train_perf_M5a_net["MAE"]), as.numeric(train_perf_M5a_rf["MAE"]), as.numeric(train_perf_M5a_gbm["MAE"])),
  RMSE = c(as.numeric(train_perf_M5a_reg["RMSE"]), as.numeric(train_perf_M5a_net["RMSE"]), as.numeric(train_perf_M5a_rf["RMSE"]), as.numeric(train_perf_M5a_gbm["RMSE"])),
  Rsquared = c(as.numeric(train_perf_M5a_reg["Rsquared"]), as.numeric(train_perf_M5a_net["Rsquared"]), as.numeric(train_perf_M5a_rf["Rsquared"]), as.numeric(train_perf_M5a_gbm["Rsquared"])))


performance_table_trn <- performance_metrics_trn %>%
  mutate(across(where(is.numeric), round, 4))

# Display the table
kable(performance_table_trn, caption = "Model Fit Statistics") %>%
  kable_styling(full_width = F, position = "center")
Model Fit Statistics
Model MAE RMSE Rsquared
Linear Regression 0.6067 0.7554 0.4062
Elastic Net 0.5745 0.7124 0.4572
Random Forest 0.5623 0.6965 0.4995
Gradient Boosting 0.5339 0.6726 0.5199
###Plot ###

performance_metrics_long_trn <- performance_metrics_trn %>%
  pivot_longer(cols = c(RMSE, Rsquared, MAE), names_to = "Metric", values_to = "Value")

trn_M5a.sd_C <- sd(data.trn_M5a$C)

ggplot(performance_metrics_long_trn, aes(x = Metric, y = Value, fill = Model)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_segment(data = performance_metrics_long_trn %>% filter(Metric == "RMSE"),
               aes(x = 1.6, xend = 2.4, y = trn_M5a.sd_C, yend = trn_M5a.sd_C),
               linetype = "dashed", color = "#FF9999") +
  geom_text(data = performance_metrics_long_trn %>% filter(Metric == "RMSE"),
            aes(x = 2, y = trn_M5a.sd_C, label = paste0("Std.dev of C in Training Data: ", round(trn_M5a.sd_C, 2))),
            vjust = -0.5, color = "#FF9999") +
  geom_text(aes(label = round(Value, 2)), position = position_dodge(width = 0.9), vjust = -0.5) +
  scale_fill_manual(values = c("gray80", "gray50", "gray30", "gray20")) +  # Less colorful palette
  labs(title = "Model Fit Statistics",
       x = "Metric",
       y = "Value") +
  theme_minimal() +
  theme(legend.position = "bottom")

2.0.3 Model Interpretation I

  • Refer to Section 2.1.1.1 for variable importance when trained with full MP5 data.

2.0.4 Model Interpretation II (SHAP Value for tree-based models)

  • Refer to Section 2.1.1.2 for variable importance when trained with full MP5 data.

2.0.5 Model Accuracy on Held-out Test Data

  • Correlation between predicted F using different methods
predicted_f_reg <- data.tst_M5a.reg.C$Predicted_C
predicted_f_rf <- data.tst_M5a.rf.C$Predicted_C
predicted_f_gbm <- data.tst_M5a.gbm.C$Predicted_C
predicted_f_net <- data.tst_M5a.net.C$Predicted_C
actual_C<-data.tst_M5a.gbm.C$C

# Combine into a data frame
predicted_C <- data.frame(
  Pred_C.Reg = predicted_f_reg,
  Pred_C.RandomForest = predicted_f_rf,
  Pred_C.GradientBoosting = predicted_f_gbm,
  Pred_C.ElasticNet = predicted_f_net,
  C = actual_C
)

ggpairs(predicted_C, c("Pred_C.Reg","Pred_C.ElasticNet","Pred_C.RandomForest","Pred_C.GradientBoosting","C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

  • The following metrics pertains to how the model did in predicting held-out data (un-used during model training). Signallings how generalizable the models are.
# Bar plot indicating overall model performance, plotting RMSE, R-squared, and MEA

# Combine performance metrics into a data frame
performance_metrics <- data.frame(
Model = factor(c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting"),
                 levels = c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting")),
  MAE = c(test_perf.M5a_reg.C["MAE"], test_perf.M5a_net.C["MAE"], test_perf.M5a_rf.C["MAE"], test_perf.M5a_gbm.C["MAE"]),
  RMSE = c(test_perf.M5a_reg.C["RMSE"], test_perf.M5a_net.C["RMSE"], test_perf.M5a_rf.C["RMSE"], test_perf.M5a_gbm.C["RMSE"]),
  Rsquared = c(test_perf.M5a_reg.C["Rsquared"], test_perf.M5a_net.C["Rsquared"], test_perf.M5a_rf.C["Rsquared"], test_perf.M5a_gbm.C["Rsquared"])
)



performance_metrics_long <- performance_metrics %>%
  pivot_longer(cols = c(RMSE, Rsquared, MAE), names_to = "Metric", values_to = "Value")


# Create the performance metrics table
performance_table <- performance_metrics %>%
  mutate(across(where(is.numeric), round, 4))

# Display the table
kable(performance_table, caption = "Comparison of Model Performance in Held-Out Test Data") %>%
  kable_styling(full_width = F, position = "center")
Comparison of Model Performance in Held-Out Test Data
Model MAE RMSE Rsquared
Linear Regression 0.6134 0.7409 0.3925
Elastic Net 0.6034 0.7239 0.4125
Random Forest 0.5749 0.6966 0.4644
Gradient Boosting 0.5548 0.6804 0.4821
tst_M5a.sd_C <- sd(data.tst_M5a$C)

ggplot(performance_metrics_long, aes(x = Metric, y = Value, fill = Model)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_segment(data = performance_metrics_long %>% filter(Metric == "RMSE"),
               aes(x = 1.6, xend = 2.4, y = tst_M5a.sd_C, yend = tst_M5a.sd_C),
               linetype = "dashed", color = "#FF9999") +
  geom_text(data = performance_metrics_long %>% filter(Metric == "RMSE"),
            aes(x = 2, y = tst_M5a.sd_C, label = paste0("Std.dev of C in Held-Out Test Data: ", round(tst_M5a.sd_C, 2))),
            vjust = -0.5, color = "#FF9999") +
  geom_text(aes(label = round(Value, 2)), position = position_dodge(width = 0.9), vjust = -0.5) +
  scale_fill_manual(values = c("gray80", "gray50", "gray30", "gray20")) +  # Less colorful palette
  labs(title = "Comparison of Model Performance in Held-Out Testing Data",
       x = "Metric",
       y = "Value") +
  theme_minimal() +
  theme(legend.position = "bottom")

2.0.6 Density and Residual Plots for the Predicted C in Held-Out Test Data

Histogram of C in Held-Out Test Data

hist(data.tst_M5a$C)

library(gridExtra)

# Calculate residuals
residuals_rf <- actual_M5a.C - pred_M5a_rf.C
residuals_reg <- actual_M5a.C - pred_M5a_reg.C
residuals_net <- actual_M5a.C - pred_M5a_net.C
residuals_gbm <- actual_M5a.C - pred_M5a_gbm.C

# Create a data frame with residuals and actual values
residuals_data <- data.frame(
  Actual = actual_M5a.C,
  Pred_RF = pred_M5a_rf.C,
  Pred_Reg = pred_M5a_reg.C,
  Pred_NET = pred_M5a_net.C,
  Pred_GBM = pred_M5a_gbm.C,
  Residual_RF = residuals_rf,
  Residual_Reg = residuals_reg,
  Residual_NET = residuals_net,
  Residual_GBM = residuals_gbm
)

create_density_plot <- function(predicted, actual, title) {
  ggplot() +
    geom_density(aes(x = predicted, color = "Predicted C"), alpha = 0.4, size = 1) +
    geom_density(aes(x = actual, color = "Actual C"), alpha = 0.4, size = 1) +
    scale_y_continuous(limits = c(0, 1)) +
    scale_color_manual(values = c("Actual C" = "firebrick", "Predicted C" = "darkorange")) +
    labs(title = title, x = "C Score", y = "Density") +
    theme_minimal() +
    theme(legend.position = "bottom", legend.title = element_blank())
}

# Create density plots for each model
density_rf <- create_density_plot(residuals_data$Pred_RF, residuals_data$Actual, "Density Plot for PF5 - Random Forest")
density_reg <- create_density_plot(residuals_data$Pred_Reg, residuals_data$Actual, "Density Plot for PF5 - Regression")
density_net <- create_density_plot(residuals_data$Pred_NET, residuals_data$Actual, "Density Plot for PF5 - Elastic Net")
density_gbm <- create_density_plot(residuals_data$Pred_GBM, residuals_data$Actual, "Density Plot for PF5 - Gradient Boosting")

# Arrange all density plots in a grid
grid.arrange(density_reg, density_net, density_rf, density_gbm, ncol = 2, nrow = 2)

create_residual_plot <- function(actual, residuals, title) {
  ggplot(residuals_data, aes(x = actual, y = residuals)) +
    geom_point(alpha = 0.4) +
    geom_rug(sides = "b", alpha = 0.2) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
    labs(title = title, x = "Actual C Score", y = "Residuals") +
    theme_minimal() +
    ylim(-2, 2)
}

# Create residual plots for each model
residual_rf <- create_residual_plot(residuals_data$Actual, residuals_data$Residual_RF, "Residuals Plot for Random Forest")
residual_reg <- create_residual_plot(residuals_data$Actual, residuals_data$Residual_Reg, "Residuals Plot for Regression")
residual_net <- create_residual_plot(residuals_data$Actual, residuals_data$Residual_NET, "Residuals Plot for Elastic Net")
residual_gbm <- create_residual_plot(residuals_data$Actual, residuals_data$Residual_GBM, "Residuals Plot for Gradient Boosting")

# Arrange all residual plots in a grid
grid.arrange( residual_reg, residual_net,residual_rf, residual_gbm, ncol = 2, nrow = 2)

2.0.7 Correlation Btw ML Predicted F and other FACE factors

  • We focus on the test dataset (n=292)
  • Significance Notation: *** if the p-value is < 0.001, ** if the p-value is < 0.01, * if the p-value is < 0.05, . if the p-value is < 0.10

Linear Regression

ggpairs(data.tst_M5a.reg.C, c("Predicted_C","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Elastic Net

ggpairs(data.tst_M5a.net.C, c("Predicted_C","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Random Forest

ggpairs(data.tst_M5a.rf.C, c("Predicted_C","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Gradient Boosting

ggpairs(data.tst_M5a.gbm.C, c("Predicted_C","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

GBM: Correlation between PC, numEDU, AGE, and ATTN1

ggpairs(data.tst_M5a.gbm.C, c("Predicted_C","Antonym","Synonym","numEDU","AGE","ATTN1__Correct","C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

2.0.8 Explain Heterogenity with Predicted C in Test Sample (20% study 5 Data)

Default Paradigm

Default.PAN <- glm(numDefault ~ DefaultCondition * Panel, data = data.tst_M5a.reg, family = binomial)

Default.PAN_FACE <- glm(numDefault ~ DefaultCondition * F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel , data = data.tst_M5a.gbm.C, family = binomial)

Default.PAN_Pred_C.reg_FAE <- glm(numDefault ~ DefaultCondition * F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_M5a.reg.C , family = binomial)

Default.PAN_Pred_C.net_FAE <- glm(numDefault ~  DefaultCondition * F + DefaultCondition * A+ DefaultCondition * Predicted_C + DefaultCondition * E + DefaultCondition * Panel, data = data.tst_M5a.net.C, family = binomial)

Default.PAN_Pred_C.rf_FAE <- glm(numDefault ~   DefaultCondition * F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_M5a.rf.C, family = binomial)

Default.PAN_Pred_C.gbm_FAE <- glm(numDefault ~   DefaultCondition * F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_M5a.gbm.C, family = binomial)


Default_model <- list(
  "Default.PAN" = Default.PAN,
  "Default.PAN_FACE" = Default.PAN_FACE,
  "Default.PAN_Pred_C.reg_FAE" = Default.PAN_Pred_C.reg_FAE,
  "Default.PAN_Pred_C.net_FAE" = Default.PAN_Pred_C.net_FAE,
  "Default.PAN_Pred_C.rf_FAE" = Default.PAN_Pred_C.rf_FAE,
  "Default.PAN_Pred_C.gbm_FAE" = Default.PAN_Pred_C.gbm_FAE
)
Model Results
term_names <- names(coef(Default.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Default_result<-tab_model(Default.PAN, Default.PAN_FACE, Default.PAN_Pred_C.reg_FAE,Default.PAN_Pred_C.net_FAE,Default.PAN_Pred_C.rf_FAE,Default.PAN_Pred_C.gbm_FAE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.C(reg)_FAE", 
                        "PAN_Pred.C(net)_FAE", 
                        "PAN_Pred.C(rf)_FAE", 
                        "PAN_Pred.C(gbm)_FAE"), title = "Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Default_result
Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.C(reg)_FAE PAN_Pred.C(net)_FAE PAN_Pred.C(rf)_FAE PAN_Pred.C(gbm)_FAE
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
(Intercept) 0.96 0.28 -0.15 0.884 0.94 0.28 -0.21 0.837 0.89 0.27 -0.40 0.690 0.87 0.26 -0.46 0.644 0.84 0.26 -0.56 0.578 0.88 0.27 -0.44 0.663
DefaultCondition [OPTOUT] 1.41 0.58 0.83 0.409 1.44 0.61 0.86 0.391 1.51 0.66 0.95 0.341 1.57 0.69 1.03 0.305 1.49 0.65 0.90 0.367 1.54 0.67 0.99 0.322
F 0.87 0.35 -0.35 0.725 1.02 0.30 0.06 0.949 1.06 0.32 0.20 0.838 1.11 0.33 0.36 0.719 1.05 0.32 0.17 0.865
A 0.85 0.17 -0.79 0.431 0.71 0.16 -1.54 0.124 0.76 0.16 -1.31 0.190 0.76 0.16 -1.28 0.202 0.77 0.16 -1.25 0.210
C 1.69 0.58 1.53 0.127
E 0.88 0.16 -0.72 0.474 0.92 0.16 -0.49 0.625 0.92 0.16 -0.46 0.648 0.92 0.16 -0.48 0.635 0.91 0.16 -0.52 0.603
DefaultCondition [OPTOUT]
× F
1.35 0.84 0.49 0.627 1.31 0.60 0.59 0.556 1.29 0.60 0.55 0.579 1.10 0.50 0.21 0.837 1.28 0.59 0.54 0.589
DefaultCondition [OPTOUT]
× A
1.07 0.31 0.24 0.814 1.27 0.40 0.78 0.435 1.22 0.37 0.64 0.524 1.14 0.35 0.42 0.677 1.19 0.36 0.57 0.572
DefaultCondition [OPTOUT]
× C
0.68 0.34 -0.77 0.443
DefaultCondition [OPTOUT]
× E
1.68 0.50 1.74 0.082 1.65 0.47 1.77 0.077 1.65 0.47 1.75 0.080 1.63 0.46 1.72 0.085 1.67 0.48 1.78 0.075
Predicted C 2.50 0.96 2.36 0.018 2.17 0.93 1.79 0.073 2.20 1.06 1.64 0.101 2.20 0.86 2.01 0.044
DefaultCondition [OPTOUT]
× Predicted C
0.42 0.23 -1.57 0.116 0.44 0.28 -1.28 0.199 0.65 0.47 -0.59 0.553 0.46 0.26 -1.35 0.177
Observations 292 292 292 292 292 292
AIC 389.079 395.399 392.063 394.646 394.754 393.782
log-Likelihood -188.540 -183.699 -182.031 -183.323 -183.377 -182.891
Anova and Eta Square
  • As in the origianl paper, type I anova is used. We first attribute variance to condition, FACE, then Panel main effect, followed by the conditionFACE interaction, and the Panelcondition interaction last.
  • As in the original paper, Etasq of a variable in linear regression models (LessMore and Sunk) is calculated as the sum of squares explained by the variables divided by the total sum of squares. Etasq of a variable in logistic regression models (Default and Unusual Disease) is calculated as the deviance explained by the model relative to the null model (deviance from the null model divided by the model’s residual deviance).
  • I validated this pipeline by reproducing the stats reported in the SI for Study 1.
  • Woudl we like to see the full Anova stats?
calculate_deviance_explained.Default <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for Panel and DefaultCondition:Panel interaction
  dev_cond <- anova_res["DefaultCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DefaultCondition:Panel", "Deviance"]
  
  p_cond <- anova_res["DefaultCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DefaultCondition:Panel", "Pr(>Chi)"]
  
sig_cond <- ifelse(p_cond < 0.001, "***", 
             ifelse(p_cond < 0.01, "**", 
             ifelse(p_cond < 0.05, "*", 
             ifelse(p_cond < 0.1, ".", ""))))

sig_panel <- ifelse(p_panel < 0.001, "***", 
              ifelse(p_panel < 0.01, "**", 
              ifelse(p_panel < 0.05, "*", 
              ifelse(p_panel < 0.1, ".", ""))))

sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                   ifelse(p_interaction < 0.01, "**", 
                   ifelse(p_interaction < 0.05, "*", 
                   ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total) 
  perc_panel <- (dev_panel / dev_total) 
  perc_interaction <- (dev_interaction / dev_total) 
  
  res_cond <- paste0(round(perc_cond , 5),  sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}

# apply the fucntion to all the models in default paradigm
deviance_explained <- sapply(Default_model, calculate_deviance_explained.Default)

# Convert to a data frame for easy viewing
deviance_explained_df.Default <- as.data.frame(t(deviance_explained)) # view how this looks like
colnames(deviance_explained_df.Default) <- c("Condition","Panel", "Default Condition:Panel")

# display
kable(deviance_explained_df.Default,caption = "Etasq Table: Default Paradigm")
Etasq Table: Default Paradigm
Condition Panel Default Condition:Panel
Default.PAN 0.02968*** 0.0095 0.02393**
Default.PAN_FACE 0.02968*** 0.00305 0.00314
Default.PAN_Pred_C.reg_FAE 0.02968*** 0.00284 0.00209
Default.PAN_Pred_C.net_FAE 0.02968*** 0.00314 0.00215
Default.PAN_Pred_C.rf_FAE 0.02968*** 0.00165 0.00271
Default.PAN_Pred_C.gbm_FAE 0.02968*** 0.00228 0.00155

Framing (Unusual Disease) Paradigm

Disease.PAN <- glm(numDisease ~ DiseaseCondition * Panel, data = data.tst_M5a.reg, family = binomial)
Disease.PAN_FACE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E+DiseaseCondition * Panel , data = data.tst_M5a.reg.C, family = binomial)

Disease.PAN_Pred_C.reg_FAE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_M5a.reg.C, family = binomial)

Disease.PAN_Pred_C.net_FAE <- glm(numDisease ~   DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_M5a.net.C, family = binomial)

Disease.PAN_Pred_C.rf_FAE <- glm(numDisease ~   DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_M5a.rf.C, family = binomial)

Disease.PAN_Pred_C.gbm_FAE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel , data = data.tst_M5a.gbm.C, family = binomial)


Disease_model <- list(
  "Disease.PAN" = Disease.PAN,
  "Disease.PAN_FACE" = Disease.PAN_FACE,
  "Disease.PAN_Pred_C.reg_FAE" = Disease.PAN_Pred_C.reg_FAE,
  "Disease.PAN_Pred_C.net_FAE" = Disease.PAN_Pred_C.net_FAE,
  "Disease.PAN_Pred_C.rf_FAE" = Disease.PAN_Pred_C.rf_FAE, 
  "Disease.PAN_Pred_C.gbm_FAE" = Disease.PAN_Pred_C.gbm_FAE
)

# extract_aic(Disease_model)
Model Results
term_names <- names(coef(Disease.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Disease_result<-tab_model(Disease.PAN, Disease.PAN_FACE, Disease.PAN_Pred_C.reg_FAE,Disease.PAN_Pred_C.net_FAE,Disease.PAN_Pred_C.rf_FAE,Disease.PAN_Pred_C.gbm_FAE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.C(reg)_FAE", 
                        "PAN_Pred.C(net)_FAE", 
                        "PAN_Pred.C(rf)_FAE", 
                        "PAN_Pred.C(gbm)_FAE"), title = "Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Disease_result
Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.C(reg)_FAE PAN_Pred.C(net)_FAE PAN_Pred.C(rf)_FAE PAN_Pred.C(gbm)_FAE
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
(Intercept) 0.48 0.13 -2.67 0.008 0.44 0.13 -2.80 0.005 0.50 0.14 -2.43 0.015 0.51 0.15 -2.32 0.020 0.53 0.15 -2.19 0.028 0.52 0.15 -2.31 0.021
DiseaseCondition [LOSS] 4.04 1.82 3.09 0.002 4.19 1.97 3.04 0.002 3.84 1.82 2.83 0.005 3.83 1.84 2.80 0.005 3.40 1.63 2.56 0.010 3.54 1.67 2.68 0.007
F 2.42 1.16 1.84 0.065 1.17 0.36 0.51 0.612 1.20 0.38 0.58 0.564 1.25 0.40 0.69 0.491 1.30 0.42 0.81 0.419
A 1.19 0.23 0.90 0.369 1.26 0.25 1.15 0.251 1.27 0.26 1.18 0.237 1.30 0.26 1.29 0.197 1.28 0.25 1.26 0.208
C 0.41 0.17 -2.20 0.028
E 1.07 0.21 0.33 0.745 0.92 0.17 -0.46 0.642 0.92 0.17 -0.46 0.649 0.94 0.17 -0.33 0.743 0.93 0.17 -0.38 0.707
DiseaseCondition [LOSS] ×
F
0.92 0.64 -0.12 0.902 1.14 0.56 0.26 0.793 1.11 0.55 0.20 0.838 0.92 0.45 -0.18 0.859 0.93 0.46 -0.14 0.888
DiseaseCondition [LOSS] ×
A
1.49 0.50 1.17 0.243 1.52 0.53 1.20 0.231 1.50 0.53 1.16 0.245 1.43 0.50 1.02 0.310 1.45 0.50 1.08 0.280
DiseaseCondition [LOSS] ×
C
1.17 0.65 0.28 0.780
DiseaseCondition [LOSS] ×
E
0.77 0.24 -0.86 0.392 0.80 0.24 -0.75 0.452 0.80 0.24 -0.76 0.444 0.76 0.22 -0.92 0.356 0.78 0.23 -0.86 0.392
Predicted C 0.71 0.28 -0.86 0.390 0.64 0.30 -0.94 0.346 0.53 0.28 -1.18 0.237 0.54 0.24 -1.40 0.162
DiseaseCondition [LOSS] ×
Predicted C
0.84 0.50 -0.29 0.772 0.87 0.59 -0.20 0.842 1.48 1.13 0.51 0.607 1.33 0.80 0.47 0.639
Observations 292 292 292 292 292 292
AIC 363.491 356.242 363.239 363.114 363.792 362.802
log-Likelihood -175.745 -164.121 -167.619 -167.557 -167.896 -167.401
Anova and Eta Square
calculate_deviance_explained.Disease <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for DiseaseCondition, Panel, and DiseaseCondition:Panel interaction
  dev_cond <- anova_res["DiseaseCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DiseaseCondition:Panel", "Deviance"]
  
  # Extract p-values
  p_cond <- anova_res["DiseaseCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DiseaseCondition:Panel", "Pr(>Chi)"]
  
  # Determine significance levels
  sig_cond <- ifelse(p_cond < 0.001, "***", 
               ifelse(p_cond < 0.01, "**", 
               ifelse(p_cond < 0.05, "*", 
               ifelse(p_cond < 0.1, ".", ""))))

  sig_panel <- ifelse(p_panel < 0.001, "***", 
                ifelse(p_panel < 0.01, "**", 
                ifelse(p_panel < 0.05, "*", 
                ifelse(p_panel < 0.1, ".", ""))))

  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total)
  perc_panel <- (dev_panel / dev_total)
  perc_interaction <- (dev_interaction / dev_total)
  
  # Combine percentage and significance
  res_cond <- paste0(round(perc_cond, 5), sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}


# Apply the function to each model
deviance_explained <- sapply(Disease_model, calculate_deviance_explained.Disease)

# Convert to a data frame for easy viewing
deviance_explained_df.Disease <- as.data.frame(t(deviance_explained))
colnames(deviance_explained_df.Disease) <- c("Condition","Panel", "DiseaseCondition:Panel")

# Display the results
kable(deviance_explained_df.Disease,caption = "Etasq Table: Disease Paradigm")
Etasq Table: Disease Paradigm
Condition Panel DiseaseCondition:Panel
Disease.PAN 0.03974*** 0.03523** 0.01646*
Disease.PAN_FACE 0.03974*** 0.0196* 0.00098
Disease.PAN_Pred_C.reg_FAE 0.03974*** 0.0073 0.00126
Disease.PAN_Pred_C.net_FAE 0.03974*** 0.00794 0.00116
Disease.PAN_Pred_C.rf_FAE 0.03974*** 0.00665 0.00024
Disease.PAN_Pred_C.gbm_FAE 0.03974*** 0.00796 0.00041

Less is More Paradigm

LessMore.PAN <- lm(numLessMore ~ LessMoreCondition * Panel, data = data.tst_M5a.reg)
LessMore.PAN_FACE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_M5a.reg.C)
LessMore.PAN_Pred_C.reg_FAE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_M5a.reg.C)
LessMore.PAN_Pred_C.net_FAE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.tst_M5a.net.C)
LessMore.PAN_Pred_C.rf_FAE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_M5a.rf.C)
LessMore.PAN_Pred_C.gbm_FAE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.tst_M5a.gbm.C)


LessMore_model <- list(
  "LessMore.PAN" = LessMore.PAN,
  "LessMore.PAN_FACE" = LessMore.PAN_FACE,
  "LessMore.PAN_Pred_C.reg_FAE" = LessMore.PAN_Pred_C.reg_FAE,
  "LessMore.PAN_Pred_C.net_FAE" = LessMore.PAN_Pred_C.net_FAE,
  "LessMore.PAN_Pred_C.rf_FAE" = LessMore.PAN_Pred_C.rf_FAE, 
  "LessMore.PAN_Pred_C.gbm_FAE" = LessMore.PAN_Pred_C.gbm_FAE
)
Model Results
term_names <- names(coef(LessMore.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

LessMore_result<-tab_model(LessMore.PAN, LessMore.PAN_FACE, LessMore.PAN_Pred_C.reg_FAE,LessMore.PAN_Pred_C.net_FAE,LessMore.PAN_Pred_C.rf_FAE,LessMore.PAN_Pred_C.gbm_FAE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.C(reg)_FAE", 
                        "PAN_Pred.C(net)_FAE", 
                        "PAN_Pred.C(rf)_FAE", 
                        "PAN_Pred.C(gbm)_FAE"), title = "LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
LessMore_result
LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.C(reg)_FAE PAN_Pred.C(net)_FAE PAN_Pred.C(rf)_FAE PAN_Pred.C(gbm)_FAE
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
(Intercept) 5.51 0.21 25.98 <0.001 5.52 0.21 26.19 <0.001 5.50 0.21 25.75 <0.001 5.53 0.21 25.75 <0.001 5.52 0.21 25.78 <0.001 5.52 0.21 26.02 <0.001
LessMoreCondition [SCARF] 0.76 0.29 2.65 0.009 0.83 0.29 2.89 0.004 0.86 0.29 2.97 0.003 0.80 0.29 2.74 0.007 0.77 0.29 2.62 0.009 0.82 0.29 2.83 0.005
F -0.03 0.25 -0.11 0.913 0.08 0.19 0.45 0.656 0.14 0.19 0.75 0.455 0.13 0.19 0.71 0.480 0.12 0.19 0.67 0.504
A -0.26 0.14 -1.88 0.062 -0.28 0.15 -1.94 0.053 -0.25 0.15 -1.75 0.082 -0.26 0.15 -1.77 0.078 -0.26 0.14 -1.84 0.067
C 0.16 0.21 0.77 0.442
E -0.02 0.12 -0.17 0.863 0.01 0.11 0.05 0.963 0.00 0.11 0.04 0.968 0.00 0.11 0.04 0.964 0.00 0.11 0.04 0.965
LessMoreCondition [SCARF]
× F
0.38 0.40 0.95 0.345 0.23 0.30 0.76 0.450 0.05 0.30 0.18 0.858 0.02 0.30 0.08 0.936 0.11 0.30 0.36 0.716
LessMoreCondition [SCARF]
× A
0.51 0.18 2.78 0.006 0.54 0.19 2.83 0.005 0.48 0.19 2.52 0.012 0.46 0.19 2.36 0.019 0.50 0.19 2.65 0.008
LessMoreCondition [SCARF]
× C
-0.24 0.32 -0.75 0.452
LessMoreCondition [SCARF]
× E
0.05 0.18 0.28 0.780 0.01 0.17 0.08 0.938 -0.00 0.17 -0.03 0.978 -0.02 0.17 -0.13 0.898 -0.00 0.17 -0.00 0.997
Predicted C 0.10 0.24 0.41 0.679 -0.09 0.28 -0.33 0.739 -0.07 0.31 -0.23 0.819 -0.03 0.24 -0.13 0.895
LessMoreCondition [SCARF]
× Predicted C
-0.18 0.35 -0.52 0.601 0.28 0.41 0.69 0.492 0.43 0.47 0.92 0.359 0.12 0.37 0.32 0.749
Observations 292 292 292 292 292 292
R2 / R2 adjusted 0.208 / 0.194 0.249 / 0.214 0.248 / 0.212 0.248 / 0.213 0.250 / 0.215 0.247 / 0.212
Anova and Eta Square
calculate_variance_explained_LessMore <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for LessMoreCondition, Panel, and LessMoreCondition:Panel interaction
  ss_condition <- anova_res["LessMoreCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["LessMoreCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["LessMoreCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["LessMoreCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- ss_condition / ss_total
  perc_panel <- ss_panel / ss_total
  perc_interaction <- ss_interaction / ss_total
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}

variance_explained_lm <- sapply(LessMore_model, calculate_variance_explained_LessMore)

# Convert to a data frame for easy viewing
variance_explained_lm_df.LessMore <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.LessMore,caption = "Etasq Table: LessMore Paradigm")
Etasq Table: LessMore Paradigm
Condition Panel LessMoreCondition:Panel
LessMore.PAN 0.08314*** 0.12202*** 0.00272
LessMore.PAN_FACE 0.08314*** 0.04611*** 0.00099
LessMore.PAN_Pred_C.reg_FAE 0.08314*** 0.05544*** 7e-04
LessMore.PAN_Pred_C.net_FAE 0.08314*** 0.0524*** 0.00161
LessMore.PAN_Pred_C.rf_FAE 0.08314*** 0.04324*** 0.0025
LessMore.PAN_Pred_C.gbm_FAE 0.08314*** 0.04916*** 0.00122

Sunk Cost Paradigm

Sunk.PAN <- lm(numSunkCost~ SunkCondition * Panel, data = data.tst_M5a)
Sunk.PAN_FACE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_M5a)
Sunk.PAN_Pred_C.reg_FAE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E+SunkCondition * Panel, data = data.tst_M5a.reg.C)
Sunk.PAN_Pred_C.net_FAE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_M5a.net.C)
Sunk.PAN_Pred_C.rf_FAE <- lm(numSunkCost ~ SunkCondition * F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E  + SunkCondition * Panel, data = data.tst_M5a.rf.C)
Sunk.PAN_Pred_C.gbm_FAE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_M5a.gbm.C)


Sunk_model <- list(
  "Sunk.PAN" = Sunk.PAN,
  "Sunk.PAN_FACE" = Sunk.PAN_FACE,
  "Sunk.PAN_Pred_C.reg_FAE" = Sunk.PAN_Pred_C.reg_FAE,
  "Sunk.PAN_Pred_C.net_FAE" = Sunk.PAN_Pred_C.net_FAE,
  "Sunk.PAN_Pred_C.rf_FAE" = Sunk.PAN_Pred_C.rf_FAE, #0.302 with unscaled
  "Sunk.PAN_Pred_C.gbm_FAE" = Sunk.PAN_Pred_C.gbm_FAE
)

# extract_adjusted_r_squared(Sunk_model)
Full Model Results
term_names <- names(coef(Sunk.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Sunk_result<-tab_model(Sunk.PAN, Sunk.PAN_FACE, Sunk.PAN_Pred_C.reg_FAE,Sunk.PAN_Pred_C.net_FAE,Sunk.PAN_Pred_C.rf_FAE,Sunk.PAN_Pred_C.gbm_FAE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred_C(reg)_FAE", 
                        "PAN_Pred_C(net)_FAE", 
                        "PAN_Pred_C(rf)_FAE", 
                        "PAN_Pred_C(gbm)_FAE"), title = "Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Sunk_result
Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred_C(reg)_FAE PAN_Pred_C(net)_FAE PAN_Pred_C(rf)_FAE PAN_Pred_C(gbm)_FAE
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
(Intercept) 5.51 0.36 15.18 <0.001 5.51 0.36 15.31 <0.001 5.46 0.38 14.33 <0.001 5.49 0.39 14.14 <0.001 5.47 0.39 13.97 <0.001 5.53 0.38 14.44 <0.001
SunkCondition [PAID] 0.14 0.54 0.26 0.792 0.06 0.53 0.12 0.906 0.12 0.55 0.23 0.822 0.15 0.56 0.26 0.794 0.16 0.56 0.28 0.779 0.08 0.55 0.14 0.887
F 0.60 0.49 1.21 0.226 -0.36 0.37 -0.95 0.344 -0.32 0.38 -0.85 0.396 -0.33 0.38 -0.88 0.381 -0.28 0.38 -0.74 0.459
A 0.22 0.24 0.93 0.355 0.13 0.26 0.50 0.618 0.16 0.26 0.61 0.540 0.15 0.26 0.56 0.575 0.19 0.25 0.74 0.459
C -1.07 0.42 -2.52 0.012
E 0.07 0.24 0.29 0.771 -0.13 0.23 -0.57 0.568 -0.13 0.23 -0.55 0.581 -0.14 0.23 -0.59 0.555 -0.13 0.23 -0.56 0.577
SunkCondition [PAID] × F 0.53 0.73 0.72 0.469 0.87 0.56 1.57 0.118 0.88 0.57 1.56 0.119 0.48 0.56 0.85 0.395 0.63 0.56 1.13 0.261
SunkCondition [PAID] × A -0.50 0.34 -1.46 0.144 -0.29 0.36 -0.79 0.429 -0.32 0.36 -0.89 0.374 -0.36 0.37 -0.99 0.322 -0.39 0.36 -1.09 0.275
SunkCondition [PAID] × C 0.05 0.60 0.08 0.937
SunkCondition [PAID] × E 0.05 0.33 0.15 0.883 0.11 0.32 0.34 0.736 0.13 0.32 0.42 0.675 0.09 0.32 0.27 0.787 0.11 0.32 0.33 0.740
Predicted C 0.28 0.48 0.58 0.559 0.16 0.55 0.29 0.773 0.22 0.58 0.38 0.704 -0.00 0.46 -0.01 0.994
SunkCondition [PAID] ×
Predicted C
-1.18 0.68 -1.74 0.083 -1.24 0.78 -1.57 0.116 -0.33 0.92 -0.36 0.723 -0.61 0.73 -0.84 0.403
Observations 292 292 292 292 292 292
R2 / R2 adjusted 0.040 / 0.023 0.086 / 0.043 0.059 / 0.015 0.059 / 0.015 0.047 / 0.002 0.050 / 0.006
Anova and Eta Square
calculate_variance_explained_Sunk <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for SunkCondition, Panel, and SunkCondition:Panel interaction
  ss_condition <- anova_res["SunkCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["SunkCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["SunkCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["SunkCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- (ss_condition / ss_total)
  perc_panel <- (ss_panel / ss_total)
  perc_interaction <- (ss_interaction / ss_total)
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 6), sig_condition)
  res_panel <- paste0(round(perc_panel, 6), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 6), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}


variance_explained_lm <- sapply(Sunk_model, calculate_variance_explained_Sunk)

# Convert to a data frame for easy viewing
variance_explained_lm_df.SunkCost <- as.data.frame(t(variance_explained_lm))

colnames(variance_explained_lm_df.SunkCost) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.SunkCost,caption = "Etasq Table: SunkCost Paradigm")
Etasq Table: SunkCost Paradigm
Condition Panel LessMoreCondition:Panel
Sunk.PAN 0.004668 0.025882* 0.009219
Sunk.PAN_FACE 0.004668 0.01253 0.003898
Sunk.PAN_Pred_C.reg_FAE 0.004668 0.012942 0.010976
Sunk.PAN_Pred_C.net_FAE 0.004668 0.012947 0.011858
Sunk.PAN_Pred_C.rf_FAE 0.004668 0.013695 0.006579
Sunk.PAN_Pred_C.gbm_FAE 0.004668 0.013776 0.009041

Summary

Model Fit
  • \(R^2\): \(R^2\) for linear regressions, and \(1 − \frac{\log L(\text{model})}{\log L(\text{null model})}\) for logistic regressions (as in the paper)

  • note that the y-axes for different paradigms are not aligned.

Default.null<-glm(numDefault~1,data.tst_M5a.reg.C,family=binomial)


R2.Default = as.data.frame(cbind( "R2" = c((1-logLik(Default.PAN)/logLik(Default.null)),
                    (1-logLik(Default.PAN_FACE)/logLik(Default.null)),  
                    (1-logLik(Default.PAN_Pred_C.reg_FAE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_C.net_FAE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_C.rf_FAE)/logLik(Default.null)),
                    (1-logLik(Default.PAN_Pred_C.gbm_FAE)/logLik(Default.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.C(reg)+FAE",  "Pan+Pred.C(net)+FAE" , "Pan+Pred.C(rf)+FAE","Pan+Pred.C(gbm)+FAE"), 
            "Paradigm" = "Default"))


Disease.null<-glm(numDisease~1,data.tst_M5a.reg.C,family=binomial)
R2.Disease = as.data.frame(cbind( "R2" = c((1-logLik(Disease.PAN)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_FACE)/logLik(Disease.null)),  
                    (1-logLik(Disease.PAN_Pred_C.reg_FAE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_C.net_FAE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_C.rf_FAE)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_Pred_C.gbm_FAE)/logLik(Disease.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.C(reg)+FAE",  "Pan+Pred.C(net)+FAE" , "Pan+Pred.C(rf)+FAE","Pan+Pred.C(gbm)+FAE"), 
            "Paradigm" = "Disease"))

R2.LessMore = as.data.frame(cbind( "R2" = c(summary(LessMore.PAN)$r.squared,
                     summary(LessMore.PAN_FACE)$r.squared,  
                     summary(LessMore.PAN_Pred_C.reg_FAE)$r.squared,  
                     summary(LessMore.PAN_Pred_C.net_FAE)$r.squared, 
                     summary(LessMore.PAN_Pred_C.rf_FAE)$r.squared,
                     summary(LessMore.PAN_Pred_C.gbm_FAE )$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.C(reg)+FAE",  "Pan+Pred.C(net)+FAE" , "Pan+Pred.C(rf)+FAE","Pan+Pred.C(gbm)+FAE"), 
            "Paradigm" = "LessMore"))

R2.Sunk = as.data.frame(cbind( "R2" = c(summary(Sunk.PAN)$r.squared,
                     summary(Sunk.PAN_FACE)$r.squared,  
                     summary(Sunk.PAN_Pred_C.reg_FAE)$r.squared,  
                     summary(Sunk.PAN_Pred_C.net_FAE)$r.squared, 
                     summary(Sunk.PAN_Pred_C.rf_FAE )$r.squared,
                     summary(Sunk.PAN_Pred_C.gbm_FAE )$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.C(reg)+FAE",  "Pan+Pred.C(net)+FAE" , "Pan+Pred.C(rf)+FAE","Pan+Pred.C(gbm)+FAE"), 
            "Paradigm" = "SunkCost"))


R2<-rbind(R2.Default,R2.Disease,R2.LessMore,R2.Sunk)%>%
  mutate(R2=round(as.numeric(R2),3))

R2$Model <- factor(R2$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.C(reg)+FAE",  
                                   "Pan+Pred.C(net)+FAE", 
                                   "Pan+Pred.C(rf)+FAE",
                                   "Pan+Pred.C(gbm)+FAE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(R2, aes(x = Model, y = R2, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(R^2)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

AIC.Default = as.data.frame(cbind( "AIC" = c(AIC(Default.PAN),
                    AIC(Default.PAN_FACE),  
                    AIC(Default.PAN_Pred_C.reg_FAE), 
                    AIC(Default.PAN_Pred_C.net_FAE), 
                    AIC(Default.PAN_Pred_C.rf_FAE),
                    AIC(Default.PAN_Pred_C.gbm_FAE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PC(reg)+FAE",  "Pan+PC(net)+FAE" , "Pan+PC(rf)+FAE","Pan+PC(gbm)+FAE"), 
            "Paradigm" = "Default"))



AIC.Disease = as.data.frame(cbind( "AIC" = c(AIC(Disease.PAN),
                    AIC(Disease.PAN_FACE),  
                    AIC(Disease.PAN_Pred_C.reg_FAE), 
                    AIC(Disease.PAN_Pred_C.net_FAE), 
                    AIC(Disease.PAN_Pred_C.rf_FAE),
                    AIC(Disease.PAN_Pred_C.gbm_FAE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PC(reg)+FAE",  "Pan+PC(net)+FAE" , "Pan+PC(rf)+FAE","Pan+PC(gbm)+FAE"), 
            "Paradigm" = "Disease"))


AIC<-rbind(AIC.Default,AIC.Disease)%>%
  mutate(AIC=round(as.numeric(AIC),5))

AIC$Model <- factor(AIC$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+PC(reg)+FAE",  
                                   "Pan+PC(net)+FAE", 
                                   "Pan+PC(rf)+FAE",
                                   "Pan+PC(gbm)+FAE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(AIC, aes(x = Model, y = AIC, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(AIC)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")+
  coord_cartesian(ylim = c(300, 400))

R2$R2 <- round(as.numeric(R2$R2), 3)

R2 <- R2 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement <- R2 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))


### Avg. improvement in R2 for each model across all four paradigms, in FULL study 5
Default.null.Study5<-glm(numDefault~1,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.null.Study5<-glm(numDisease~1,Data_M5a.log_z_dummy_coded,family=binomial)
Default.PAN.Study5<-glm(numDefault~DefaultCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.PAN.Study5<-glm(numDisease~DiseaseCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
LessMore.PAN.Study5<-lm(numLessMore~LessMoreCondition*Panel,Data_M5a.log_z_dummy_coded)
Sunk.PAN.Study5<-lm(numSunkCost~SunkCondition*Panel,Data_M5a.log_z_dummy_coded)


Default.PAN_FACE.Study5<-glm(numDefault~DefaultCondition*F+DefaultCondition*A+DefaultCondition*C+DefaultCondition*E+DefaultCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.PAN_FACE.Study5<-glm(numDisease~DiseaseCondition*F+DiseaseCondition*A+DiseaseCondition*C+DiseaseCondition*E+DiseaseCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
LessMore.PAN_FACE.Study5<-lm(numLessMore~LessMoreCondition*F+LessMoreCondition*A+LessMoreCondition*C+LessMoreCondition*E+LessMoreCondition*Panel,Data_M5a.log_z_dummy_coded)
Sunk.PAN_FACE.Study5<-lm(numSunkCost~SunkCondition*F+SunkCondition*A+SunkCondition*C+SunkCondition*E+SunkCondition*Panel,Data_M5a.log_z_dummy_coded)

R2.Default = as.data.frame(cbind( 
  "R2" = c((1-logLik(Default.PAN.Study5)/logLik(Default.null.Study5)),
           (1-logLik(Default.PAN_FACE.Study5)/logLik(Default.null.Study5))),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "Default"
))


R2.Disease = as.data.frame(cbind( 
  "R2" = c((1-logLik(Disease.PAN.Study5)/logLik(Disease.null.Study5)),
           (1-logLik(Disease.PAN_FACE.Study5)/logLik(Disease.null.Study5))),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "Disease"
))

# For LessMore paradigm (linear models)
R2.LessMore = as.data.frame(cbind( 
  "R2" = c(summary(LessMore.PAN.Study5)$r.squared,
           summary(LessMore.PAN_FACE.Study5)$r.squared),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "LessMore"
))

# For SunkCost paradigm (linear models)
R2.Sunk = as.data.frame(cbind( 
  "R2" = c(summary(Sunk.PAN.Study5)$r.squared,
           summary(Sunk.PAN_FACE.Study5)$r.squared),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "SunkCost"
))

R2.FullStudy5 <- rbind(R2.Default, R2.Disease, R2.LessMore, R2.Sunk)
R2.FullStudy5$R2 <- round(as.numeric(R2.FullStudy5$R2), 3)

R2.FullStudy5 <- R2.FullStudy5 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement_full_study_5 <- R2.FullStudy5 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))%>%
  slice(-1) 

avg_improvemnt_combined <- rbind(avg_improvement, avg_improvement_full_study_5)

kable(avg_improvemnt_combined, caption = "Percentage Improvement in R2 (compared to PAN) Across Paradigms in the test sample", digits = 2)
Percentage Improvement in R2 (compared to PAN) Across Paradigms in the test sample
Model Avg_Improvement
Pan 0.00
Pan+FACE 59.96
Pan+Pred.C(reg)+FAE 40.92
Pan+Pred.C(net)+FAE 38.81
Pan+Pred.C(rf)+FAE 31.00
Pan+Pred.C(gbm)+FAE 34.14
Pan+FACE (Full Study 5) 43.20
C*Cond Model Coefficients
  • DVs for Default & Framing are 1/0
  • DVs for LessMore & SunkCost are on the original scale
AlllCoef = as.data.frame(rbind(
cbind(coef(Default.PAN_FACE),confint(Default.PAN_FACE)),
cbind(coef(Default.PAN_Pred_C.reg_FAE),confint(Default.PAN_Pred_C.reg_FAE)),
cbind(coef(Default.PAN_Pred_C.net_FAE),confint(Default.PAN_Pred_C.net_FAE)),
cbind(coef(Default.PAN_Pred_C.rf_FAE),confint(Default.PAN_Pred_C.rf_FAE)),
cbind(coef(Default.PAN_Pred_C.gbm_FAE),confint(Default.PAN_Pred_C.gbm_FAE)),
cbind(coef(Default.PAN_FACE.Study5),confint(Default.PAN_FACE.Study5)),

cbind(coef(Disease.PAN_FACE),confint(Disease.PAN_FACE)),
cbind(coef(Disease.PAN_Pred_C.reg_FAE),confint(Disease.PAN_Pred_C.reg_FAE)),
cbind(coef(Disease.PAN_Pred_C.net_FAE),confint(Disease.PAN_Pred_C.net_FAE)),
cbind(coef(Disease.PAN_Pred_C.rf_FAE),confint(Disease.PAN_Pred_C.rf_FAE)),
cbind(coef(Disease.PAN_Pred_C.gbm_FAE),confint(Disease.PAN_Pred_C.gbm_FAE)),
cbind(coef(Disease.PAN_FACE.Study5),confint(Disease.PAN_FACE.Study5)),

cbind(coef(LessMore.PAN_FACE),confint(LessMore.PAN_FACE)),
cbind(coef(LessMore.PAN_Pred_C.reg_FAE),confint(LessMore.PAN_Pred_C.reg_FAE)),
cbind(coef(LessMore.PAN_Pred_C.net_FAE),confint(LessMore.PAN_Pred_C.net_FAE)),
cbind(coef(LessMore.PAN_Pred_C.rf_FAE),confint(LessMore.PAN_Pred_C.rf_FAE)),
cbind(coef(LessMore.PAN_Pred_C.gbm_FAE),confint(LessMore.PAN_Pred_C.gbm_FAE)),
cbind(coef(LessMore.PAN_FACE.Study5),confint(LessMore.PAN_FACE.Study5)),

cbind(coef(Sunk.PAN_FACE),confint(Sunk.PAN_FACE)),
cbind(coef(Sunk.PAN_Pred_C.reg_FAE),confint(Sunk.PAN_Pred_C.reg_FAE)),
cbind(coef(Sunk.PAN_Pred_C.net_FAE),confint(Sunk.PAN_Pred_C.net_FAE)),
cbind(coef(Sunk.PAN_Pred_C.rf_FAE),confint(Sunk.PAN_Pred_C.rf_FAE)),
cbind(coef(Sunk.PAN_Pred_C.gbm_FAE),confint(Sunk.PAN_Pred_C.gbm_FAE)),
cbind(coef(Sunk.PAN_FACE.Study5),confint(Sunk.PAN_FACE.Study5))))

AlllCoef <- AlllCoef[!grepl("Panel", rownames(AlllCoef)), ]

AlllCoef
AlllCoef$type = factor(rep(c("Intercept", " Condition(Treat)","F", "A", "C", "E","F: Condition(Treat)", "A: Condition(Treat)", "C: Condition(Treat)", "E: Condition(Treat)"), 24))

AlllCoef$Paradigm = factor((rep(c("Default","Framing","Less is Better","Sunk Cost"), each=60)),levels = rev(c("Default","Framing","Less is Better","Sunk Cost")))

AlllCoef$Model = factor(rep(rep(c("FACE","PF(reg) + ACE","PF(net) + ACE","PF(rf) + ACE","PF(gbm) + ACE","FACE (Full Study 5)"), each = 10),4), levels = rev(c("FACE","PF(reg) + ACE","PF(net) + ACE","PF(rf) + ACE","PF(gbm) + ACE","FACE (Full Study 5)")))

colnames(AlllCoef)[1:3] = c("Beta","low", "high")

allcoefplot = ggplot(AlllCoef[(AlllCoef$type %in% c("C: Condition(Treat)")),], aes( y = Beta, ymin= low, ymax= high, x = Paradigm, fill = Model, color = Model))+
  geom_bar(stat = "identity", position = "dodge",alpha= .25)+
  geom_pointrange(position = position_dodge(width = .8))+
  coord_flip()+
  xlab("")+
  theme_bw()+
  theme(strip.background = element_blank(), text = element_text(size = 12)) +
  facet_grid(~type)

allcoefplot

Etasq
### Study 5 Results ###
Default_model.Study5<-list(
  "Default.PAN.Study5" = Default.PAN.Study5,
  "Default.PAN_FACE.Study5" = Default.PAN_FACE.Study5)
Disease_model.Study5<-list(
  "Disease.PAN.Study5" = Disease.PAN.Study5,
  "Disease.PAN_FACE.Study5" = Disease.PAN_FACE.Study5)

LessMore_model.Study5<-list(
  "LessMore.PAN.Study5" = LessMore.PAN.Study5,
  "LessMore.PAN_FACE.Study5" = LessMore.PAN_FACE.Study5)


Sunk_model.Study5<-list(
  "Sunk.PAN.Study5" = Sunk.PAN.Study5,
  "Sunk.PAN_FACE.Study5" = Sunk.PAN_FACE.Study5)

deviance_explained <- sapply(Default_model.Study5, calculate_deviance_explained.Default)
deviance_explained_df.Default.Study5 <- as.data.frame(t(deviance_explained)) 
colnames(deviance_explained_df.Default.Study5) <- c("Condition","Panel", "Default Condition:Panel")

deviance_explained <- sapply(Disease_model.Study5, calculate_deviance_explained.Disease)
deviance_explained_df.Disease.Study5 <- as.data.frame(t(deviance_explained)) 
colnames(deviance_explained_df.Disease.Study5) <- c("Condition","Panel", "Disease Condition:Panel")

variance_explained_lm <- sapply(LessMore_model.Study5, calculate_variance_explained_LessMore)
variance_explained_lm_df.LessMore.Study5 <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore.Study5) <- c("Condition","Panel", "LessMoreCondition:Panel")

variance_explained_lm <- sapply(Sunk_model.Study5, calculate_variance_explained_Sunk)
variance_explained_lm_df.SunkCost.Study5 <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.SunkCost.Study5) <- c("Condition","Panel", "LessMoreCondition:Panel")
# label paradigm
deviance_explained_df.Default$Paradigm <- "Default"
deviance_explained_df.Default.Study5$Paradigm <- "Default"

deviance_explained_df.Disease$Paradigm <- "Disease"
deviance_explained_df.Disease.Study5$Paradigm <- "Disease"

variance_explained_lm_df.LessMore$Paradigm <- "LessMore"
variance_explained_lm_df.LessMore.Study5$Paradigm <- "LessMore"

variance_explained_lm_df.SunkCost$Paradigm <- "SunkCost"
variance_explained_lm_df.SunkCost.Study5$Paradigm <- "SunkCost"


Model <- c("Pan", "Pan+FACE", "Pan+Pred.C(reg)+FAE", "Pan+Pred.C(net)+FAE", "Pan+Pred.C(rf)+FAE", "Pan+Pred.C(gbm)+FAE")
Model.Study5 <- c("Pan (Full Study5)", "Pan+FACE (Full Study5)")
deviance_explained_df.Default$Model<-Model
deviance_explained_df.Disease$Model<-Model
variance_explained_lm_df.LessMore$Model<-Model
variance_explained_lm_df.SunkCost$Model<-Model

deviance_explained_df.Default.Study5$Model<-Model.Study5
deviance_explained_df.Disease.Study5$Model<-Model.Study5
variance_explained_lm_df.LessMore.Study5$Model<-Model.Study5
variance_explained_lm_df.SunkCost.Study5$Model<-Model.Study5

colnames(deviance_explained_df.Default)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost)[3] <- "Condition:Panel"

colnames(deviance_explained_df.Default.Study5)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost.Study5)[3] <- "Condition:Panel"


etasq <- rbind(deviance_explained_df.Default, 
               deviance_explained_df.Default.Study5,
               deviance_explained_df.Disease, 
               deviance_explained_df.Disease.Study5,
               variance_explained_lm_df.LessMore, 
               variance_explained_lm_df.LessMore.Study5,
               variance_explained_lm_df.SunkCost,
               variance_explained_lm_df.SunkCost.Study5)

etasq$Model <- factor(etasq$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.C(reg)+FAE",  
                                   "Pan+Pred.C(net)+FAE", 
                                   "Pan+Pred.C(rf)+FAE",
                                   "Pan+Pred.C(gbm)+FAE",
                                   "Pan (Full Study5)", 
                                   "Pan+FACE (Full Study5)"),
                        ordered = TRUE)

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$Panel)  # Extract the significance symbols
  df$Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$Panel))
  return(df)
}

etasq.pan<-split_value_and_sig(etasq)

ggplot(etasq.pan, aes(x = Model, y = Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$`Condition:Panel`)  # Extract the significance symbols
  df$Condition_Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$`Condition:Panel`))
  return(df)
}

etasq.pan_cond<-split_value_and_sig(etasq)

ggplot(etasq.pan_cond, aes(x = Model, y = Condition_Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel*cond") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

# junk data??? really unstable;; perhaps to look at this meaningfully, we need 20% of the data to equally come from each of the 3 panels.
 # table(data.tst_M5a$Panel)
 #  it looks fine though
 #   Lucid    MTurk Prolific 
 #     100      101       91 


etasq_study5 <- etasq.pan_cond %>%
  filter(Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))

etasq_study5 <- etasq_study5 %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan (Full Study5)"] - Condition_Panel) / Condition_Panel[Model == "Pan (Full Study5)"] * 100)


etasq_study5.test <- etasq.pan_cond %>%
  filter(!Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))
etasq_study5.test <- etasq_study5.test %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan"] - Condition_Panel) / Condition_Panel[Model == "Pan"] * 100)

etasq.pan_cond<-rbind(etasq_study5,etasq_study5.test)

avg_reduction <- etasq.pan_cond %>%
  group_by(Model) %>%
  summarize(Avg_Reduction = mean(Reduction, na.rm = TRUE))

  
kable(avg_reduction, caption = "Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in the test sample", digits = 2)
Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in the test sample
Model Avg_Reduction
Pan 0.00
Pan+FACE 75.56
Pan+Pred.C(reg)+FAE 59.70
Pan+Pred.C(net)+FAE 49.04
Pan+Pred.C(rf)+FAE 55.99
Pan+Pred.C(gbm)+FAE 62.03
Pan (Full Study5) 0.00
Pan+FACE (Full Study5) 74.19

2.0.9 Explain Heterogenity with Predicted C in Full study 5 data

2.0.9.1 Accuracy in full MP5 data

  • Random forest performs best when predicting the full Study 5 data, outperforming GBM. This could be because while we had made GBM more parsimounious, we haven’t done so with random forest.
performance_metrics <- data.frame(
Method = factor(c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting"),
                 levels = c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting")),
  MAE = c(test_perf.full_M5a_reg.C["MAE"], test_perf.full_M5a_net.C["MAE"], test_perf.full_M5a_rf.C["MAE"], test_perf.full_M5a_gbm.C["MAE"]),
  RMSE = c(test_perf.full_M5a_reg.C["RMSE"], test_perf.full_M5a_net.C["RMSE"], test_perf.full_M5a_rf.C["RMSE"], test_perf.full_M5a_gbm.C["RMSE"]),
  Rsquared = c(test_perf.full_M5a_reg.C["Rsquared"], test_perf.full_M5a_net.C["Rsquared"], test_perf.full_M5a_rf.C["Rsquared"], test_perf.full_M5a_gbm.C["Rsquared"])
)



performance_metrics_long <- performance_metrics %>%
  pivot_longer(cols = c(RMSE, Rsquared, MAE), names_to = "Metric", values_to = "Value")


# Create the performance metrics table
performance_table <- performance_metrics %>%
  mutate(across(where(is.numeric), round, 4))

# Display the table
kable(performance_table, caption = "Comparison of Model Performance in Held-Out Test Data") %>%
  kable_styling(full_width = F, position = "center")
Comparison of Model Performance in Held-Out Test Data
Method MAE RMSE Rsquared
Linear Regression 0.5493 0.6827 0.4957
Elastic Net 0.5636 0.6959 0.4775
Random Forest 0.2913 0.3982 0.8588
Gradient Boosting 0.4593 0.5788 0.6455
tst_M5a.sd_F <- sd(data.tst_M5a$F)

ggplot(performance_metrics_long, aes(x = Metric, y = Value, fill = Method)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_segment(data = performance_metrics_long %>% filter(Metric == "RMSE"),
               aes(x = 1.6, xend = 2.4, y = tst_M5a.sd_F, yend = tst_M5a.sd_F),
               linetype = "dashed", color = "#FF9999") +
  geom_text(data = performance_metrics_long %>% filter(Metric == "RMSE"),
            aes(x = 2, y = tst_M5a.sd_F, label = paste0("Std.dev of F in Held-Out Test Data: ", round(tst_M5a.sd_F, 2))),
            vjust = -0.5, color = "#FF9999") +
  geom_text(aes(label = round(Value, 2)), position = position_dodge(width = 0.9), vjust = -0.5) +
  scale_fill_manual(values = c("gray80", "gray50", "gray30", "gray20")) +  # Less colorful palette
  labs(title = "Comparison of Model Performance in Full Study 5 Data",
       x = "Metric",
       y = "Value") +
  theme_minimal() +
  theme(legend.position = "bottom")

# do this by panel?

Default Paradigm

Default.PAN <- glm(numDefault ~ DefaultCondition * Panel, data = data.full_M5a.reg, family = binomial)

Default.PAN_FACE <- glm(numDefault ~ DefaultCondition * F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel , data = data.full_M5a.gbm.C, family = binomial)

Default.PAN_Pred_C.reg_FAE <- glm(numDefault ~ DefaultCondition * F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.full_M5a.reg.C , family = binomial)

Default.PAN_Pred_C.net_FAE <- glm(numDefault ~  DefaultCondition * F + DefaultCondition * A+ DefaultCondition * Predicted_C + DefaultCondition * E + DefaultCondition * Panel, data = data.full_M5a.net.C, family = binomial)

Default.PAN_Pred_C.rf_FAE <- glm(numDefault ~   DefaultCondition * F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.full_M5a.rf.C, family = binomial)

Default.PAN_Pred_C.gbm_FAE <- glm(numDefault ~   DefaultCondition * F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.full_M5a.gbm.C, family = binomial)


Default_model <- list(
  "Default.PAN" = Default.PAN,
  "Default.PAN_FACE" = Default.PAN_FACE,
  "Default.PAN_Pred_C.reg_FAE" = Default.PAN_Pred_C.reg_FAE,
  "Default.PAN_Pred_C.net_FAE" = Default.PAN_Pred_C.net_FAE,
  "Default.PAN_Pred_C.rf_FAE" = Default.PAN_Pred_C.rf_FAE,
  "Default.PAN_Pred_C.gbm_FAE" = Default.PAN_Pred_C.gbm_FAE
)
Model Results
term_names <- names(coef(Default.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Default_result<-tab_model(Default.PAN, Default.PAN_FACE, Default.PAN_Pred_C.reg_FAE,Default.PAN_Pred_C.net_FAE,Default.PAN_Pred_C.rf_FAE,Default.PAN_Pred_C.gbm_FAE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.C(reg)_FAE", 
                        "PAN_Pred.C(net)_FAE", 
                        "PAN_Pred.C(rf)_FAE", 
                        "PAN_Pred.C(gbm)_FAE"), title = "Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Default_result
Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.C(reg)_FAE PAN_Pred.C(net)_FAE PAN_Pred.C(rf)_FAE PAN_Pred.C(gbm)_FAE
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
(Intercept) 1.05 0.13 0.38 0.703 1.05 0.14 0.38 0.702 1.00 0.13 -0.01 0.990 0.98 0.13 -0.15 0.882 1.01 0.13 0.05 0.956 1.00 0.13 0.01 0.991
DefaultCondition [OPTOUT] 1.74 0.32 3.01 0.003 1.76 0.33 3.02 0.003 1.94 0.37 3.49 <0.001 2.03 0.39 3.68 <0.001 1.84 0.35 3.23 0.001 1.90 0.36 3.38 0.001
F 1.02 0.19 0.13 0.894 1.32 0.19 1.87 0.061 1.36 0.20 2.07 0.038 1.05 0.18 0.27 0.784 1.19 0.19 1.11 0.267
A 0.95 0.08 -0.54 0.590 0.88 0.08 -1.48 0.139 0.88 0.08 -1.39 0.165 0.91 0.08 -1.13 0.257 0.89 0.08 -1.32 0.186
C 1.72 0.25 3.78 <0.001
E 1.04 0.09 0.40 0.686 1.14 0.10 1.52 0.130 1.13 0.10 1.47 0.140 1.06 0.09 0.71 0.476 1.11 0.09 1.21 0.228
DefaultCondition [OPTOUT]
× F
1.25 0.34 0.83 0.408 1.18 0.26 0.75 0.455 1.19 0.26 0.79 0.427 1.29 0.33 1.00 0.316 1.27 0.29 1.05 0.295
DefaultCondition [OPTOUT]
× A
0.84 0.11 -1.32 0.188 0.94 0.13 -0.43 0.670 0.96 0.13 -0.34 0.736 0.89 0.12 -0.92 0.360 0.92 0.12 -0.61 0.541
DefaultCondition [OPTOUT]
× C
0.62 0.13 -2.29 0.022
DefaultCondition [OPTOUT]
× E
1.09 0.14 0.68 0.499 1.00 0.13 0.00 1.000 1.00 0.13 -0.00 0.996 1.07 0.14 0.57 0.572 1.04 0.13 0.32 0.751
Predicted C 1.83 0.32 3.45 0.001 1.78 0.35 2.90 0.004 2.26 0.45 4.13 <0.001 2.13 0.41 3.89 <0.001
DefaultCondition [OPTOUT]
× Predicted C
0.41 0.11 -3.48 0.001 0.36 0.10 -3.51 <0.001 0.45 0.13 -2.81 0.005 0.37 0.11 -3.46 0.001
Observations 1460 1460 1460 1460 1460 1460
AIC 1884.213 1858.463 1858.556 1860.214 1855.651 1856.522
log-Likelihood -936.107 -915.232 -915.278 -916.107 -913.826 -914.261
Anova and Eta Square
  • As in the origianl paper, type I anova is used. We first attribute variance to condition, FACE, then Panel main effect, followed by the conditionFACE interaction, and the Panelcondition interaction last.
  • As in the original paper, Etasq of a variable in linear regression models (LessMore and Sunk) is calculated as the sum of squares explained by the variables divided by the total sum of squares. Etasq of a variable in logistic regression models (Default and Unusual Disease) is calculated as the deviance explained by the model relative to the null model (deviance from the null model divided by the model’s residual deviance).
  • I validated this pipeline by reproducing the stats reported in the SI for Study 1.
  • Woudl we like to see the full Anova stats?
calculate_deviance_explained.Default <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for Panel and DefaultCondition:Panel interaction
  dev_cond <- anova_res["DefaultCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DefaultCondition:Panel", "Deviance"]
  
  p_cond <- anova_res["DefaultCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DefaultCondition:Panel", "Pr(>Chi)"]
  
sig_cond <- ifelse(p_cond < 0.001, "***", 
             ifelse(p_cond < 0.01, "**", 
             ifelse(p_cond < 0.05, "*", 
             ifelse(p_cond < 0.1, ".", ""))))

sig_panel <- ifelse(p_panel < 0.001, "***", 
              ifelse(p_panel < 0.01, "**", 
              ifelse(p_panel < 0.05, "*", 
              ifelse(p_panel < 0.1, ".", ""))))

sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                   ifelse(p_interaction < 0.01, "**", 
                   ifelse(p_interaction < 0.05, "*", 
                   ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total) 
  perc_panel <- (dev_panel / dev_total) 
  perc_interaction <- (dev_interaction / dev_total) 
  
  res_cond <- paste0(round(perc_cond , 5),  sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}

# apply the fucntion to all the models in default paradigm
deviance_explained <- sapply(Default_model, calculate_deviance_explained.Default)

# Convert to a data frame for easy viewing
deviance_explained_df.Default <- as.data.frame(t(deviance_explained)) # view how this looks like
colnames(deviance_explained_df.Default) <- c("Condition","Panel", "Default Condition:Panel")

# display
kable(deviance_explained_df.Default,caption = "Etasq Table: Default Paradigm")
Etasq Table: Default Paradigm
Condition Panel Default Condition:Panel
Default.PAN 0.03801*** 0.00298. 0.01188***
Default.PAN_FACE 0.03801*** 0.00051 0.00159
Default.PAN_Pred_C.reg_FAE 0.03801*** 0.00263. 0.00173
Default.PAN_Pred_C.net_FAE 0.03801*** 0.00312* 0.00156
Default.PAN_Pred_C.rf_FAE 0.03801*** 0.00085 0.00111
Default.PAN_Pred_C.gbm_FAE 0.03801*** 0.00197 0.00093

Framing (Unusual Disease) Paradigm

Disease.PAN <- glm(numDisease ~ DiseaseCondition * Panel, data = data.full_M5a.reg, family = binomial)
Disease.PAN_FACE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E+DiseaseCondition * Panel , data = data.full_M5a.reg.C, family = binomial)

Disease.PAN_Pred_C.reg_FAE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.full_M5a.reg.C, family = binomial)

Disease.PAN_Pred_C.net_FAE <- glm(numDisease ~   DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.full_M5a.net.C, family = binomial)

Disease.PAN_Pred_C.rf_FAE <- glm(numDisease ~   DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.full_M5a.rf.C, family = binomial)

Disease.PAN_Pred_C.gbm_FAE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel , data = data.full_M5a.gbm.C, family = binomial)


Disease_model <- list(
  "Disease.PAN" = Disease.PAN,
  "Disease.PAN_FACE" = Disease.PAN_FACE,
  "Disease.PAN_Pred_C.reg_FAE" = Disease.PAN_Pred_C.reg_FAE,
  "Disease.PAN_Pred_C.net_FAE" = Disease.PAN_Pred_C.net_FAE,
  "Disease.PAN_Pred_C.rf_FAE" = Disease.PAN_Pred_C.rf_FAE, 
  "Disease.PAN_Pred_C.gbm_FAE" = Disease.PAN_Pred_C.gbm_FAE
)

# extract_aic(Disease_model)
Model Results
term_names <- names(coef(Disease.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Disease_result<-tab_model(Disease.PAN, Disease.PAN_FACE, Disease.PAN_Pred_C.reg_FAE,Disease.PAN_Pred_C.net_FAE,Disease.PAN_Pred_C.rf_FAE,Disease.PAN_Pred_C.gbm_FAE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.C(reg)_FAE", 
                        "PAN_Pred.C(net)_FAE", 
                        "PAN_Pred.C(rf)_FAE", 
                        "PAN_Pred.C(gbm)_FAE"), title = "Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Disease_result
Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.C(reg)_FAE PAN_Pred.C(net)_FAE PAN_Pred.C(rf)_FAE PAN_Pred.C(gbm)_FAE
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
(Intercept) 0.45 0.06 -5.84 <0.001 0.43 0.06 -6.03 <0.001 0.46 0.06 -5.58 <0.001 0.47 0.07 -5.36 <0.001 0.45 0.06 -5.73 <0.001 0.46 0.06 -5.53 <0.001
DiseaseCondition [LOSS] 3.40 0.64 6.46 <0.001 3.69 0.72 6.73 <0.001 3.46 0.68 6.33 <0.001 3.33 0.66 6.09 <0.001 3.53 0.69 6.47 <0.001 3.45 0.68 6.33 <0.001
F 1.07 0.23 0.31 0.755 0.92 0.15 -0.49 0.622 0.97 0.16 -0.19 0.850 1.03 0.20 0.14 0.892 1.00 0.17 0.02 0.988
A 1.40 0.14 3.34 0.001 1.46 0.15 3.63 <0.001 1.49 0.16 3.79 <0.001 1.44 0.15 3.55 <0.001 1.46 0.15 3.67 <0.001
C 0.73 0.12 -1.83 0.067
E 0.89 0.09 -1.14 0.256 0.84 0.08 -1.81 0.070 0.84 0.08 -1.82 0.069 0.88 0.09 -1.34 0.182 0.86 0.08 -1.59 0.113
DiseaseCondition [LOSS] ×
F
1.21 0.35 0.65 0.513 1.20 0.27 0.82 0.415 1.13 0.26 0.54 0.586 1.07 0.29 0.25 0.803 1.07 0.26 0.28 0.777
DiseaseCondition [LOSS] ×
A
1.03 0.14 0.23 0.820 0.98 0.14 -0.18 0.861 0.95 0.14 -0.35 0.723 1.00 0.14 0.01 0.990 0.98 0.14 -0.17 0.868
DiseaseCondition [LOSS] ×
C
1.26 0.28 1.06 0.290
DiseaseCondition [LOSS] ×
E
1.04 0.14 0.27 0.791 1.09 0.14 0.66 0.511 1.09 0.14 0.67 0.501 1.03 0.14 0.22 0.827 1.06 0.14 0.42 0.674
Predicted C 0.70 0.14 -1.80 0.073 0.59 0.13 -2.33 0.020 0.66 0.15 -1.85 0.065 0.62 0.14 -2.18 0.029
DiseaseCondition [LOSS] ×
Predicted C
1.72 0.47 2.01 0.045 2.16 0.66 2.52 0.012 1.76 0.53 1.88 0.059 2.06 0.61 2.43 0.015
Observations 1460 1460 1460 1460 1460 1460
AIC 1762.505 1721.769 1721.143 1718.492 1721.411 1719.140
log-Likelihood -875.252 -846.884 -846.571 -845.246 -846.705 -845.570
Anova and Eta Square
calculate_deviance_explained.Disease <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for DiseaseCondition, Panel, and DiseaseCondition:Panel interaction
  dev_cond <- anova_res["DiseaseCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DiseaseCondition:Panel", "Deviance"]
  
  # Extract p-values
  p_cond <- anova_res["DiseaseCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DiseaseCondition:Panel", "Pr(>Chi)"]
  
  # Determine significance levels
  sig_cond <- ifelse(p_cond < 0.001, "***", 
               ifelse(p_cond < 0.01, "**", 
               ifelse(p_cond < 0.05, "*", 
               ifelse(p_cond < 0.1, ".", ""))))

  sig_panel <- ifelse(p_panel < 0.001, "***", 
                ifelse(p_panel < 0.01, "**", 
                ifelse(p_panel < 0.05, "*", 
                ifelse(p_panel < 0.1, ".", ""))))

  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total)
  perc_panel <- (dev_panel / dev_total)
  perc_interaction <- (dev_interaction / dev_total)
  
  # Combine percentage and significance
  res_cond <- paste0(round(perc_cond, 5), sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}


# Apply the function to each model
deviance_explained <- sapply(Disease_model, calculate_deviance_explained.Disease)

# Convert to a data frame for easy viewing
deviance_explained_df.Disease <- as.data.frame(t(deviance_explained))
colnames(deviance_explained_df.Disease) <- c("Condition","Panel", "DiseaseCondition:Panel")

# Display the results
kable(deviance_explained_df.Disease,caption = "Etasq Table: Disease Paradigm")
Etasq Table: Disease Paradigm
Condition Panel DiseaseCondition:Panel
Disease.PAN 0.05317*** 0.03246*** 0.01394***
Disease.PAN_FACE 0.05317*** 0.00514** 0.00395*
Disease.PAN_Pred_C.reg_FAE 0.05317*** 0.00384* 0.00378*
Disease.PAN_Pred_C.net_FAE 0.05317*** 0.00404* 0.00332*
Disease.PAN_Pred_C.rf_FAE 0.05317*** 0.0039* 0.00291.
Disease.PAN_Pred_C.gbm_FAE 0.05317*** 0.00374* 0.00256.

Less is More Paradigm

LessMore.PAN <- lm(numLessMore ~ LessMoreCondition * Panel, data = data.full_M5a.reg)
LessMore.PAN_FACE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.full_M5a.reg.C)
LessMore.PAN_Pred_C.reg_FAE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.full_M5a.reg.C)
LessMore.PAN_Pred_C.net_FAE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.full_M5a.net.C)
LessMore.PAN_Pred_C.rf_FAE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.full_M5a.rf.C)
LessMore.PAN_Pred_C.gbm_FAE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.full_M5a.gbm.C)


LessMore_model <- list(
  "LessMore.PAN" = LessMore.PAN,
  "LessMore.PAN_FACE" = LessMore.PAN_FACE,
  "LessMore.PAN_Pred_C.reg_FAE" = LessMore.PAN_Pred_C.reg_FAE,
  "LessMore.PAN_Pred_C.net_FAE" = LessMore.PAN_Pred_C.net_FAE,
  "LessMore.PAN_Pred_C.rf_FAE" = LessMore.PAN_Pred_C.rf_FAE, 
  "LessMore.PAN_Pred_C.gbm_FAE" = LessMore.PAN_Pred_C.gbm_FAE
)
Model Results
term_names <- names(coef(LessMore.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

LessMore_result<-tab_model(LessMore.PAN, LessMore.PAN_FACE, LessMore.PAN_Pred_C.reg_FAE,LessMore.PAN_Pred_C.net_FAE,LessMore.PAN_Pred_C.rf_FAE,LessMore.PAN_Pred_C.gbm_FAE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.C(reg)_FAE", 
                        "PAN_Pred.C(net)_FAE", 
                        "PAN_Pred.C(rf)_FAE", 
                        "PAN_Pred.C(gbm)_FAE"), title = "LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
LessMore_result
LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.C(reg)_FAE PAN_Pred.C(net)_FAE PAN_Pred.C(rf)_FAE PAN_Pred.C(gbm)_FAE
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
(Intercept) 5.20 0.09 59.66 <0.001 5.22 0.09 61.17 <0.001 5.19 0.09 60.63 <0.001 5.19 0.09 60.11 <0.001 5.21 0.08 61.35 <0.001 5.21 0.08 61.29 <0.001
LessMoreCondition [SCARF] 1.11 0.12 9.00 <0.001 1.10 0.12 9.13 <0.001 1.10 0.12 9.10 <0.001 1.09 0.12 8.95 <0.001 1.06 0.12 8.78 <0.001 1.07 0.12 8.84 <0.001
F 0.13 0.12 1.12 0.265 0.03 0.09 0.37 0.712 0.06 0.09 0.67 0.504 0.10 0.11 0.91 0.362 0.01 0.10 0.14 0.892
A -0.13 0.06 -2.23 0.026 -0.16 0.06 -2.67 0.008 -0.15 0.06 -2.55 0.011 -0.14 0.06 -2.29 0.022 -0.16 0.06 -2.63 0.009
C 0.01 0.09 0.08 0.934
E -0.01 0.06 -0.10 0.919 -0.00 0.05 -0.04 0.971 -0.00 0.05 -0.05 0.957 -0.01 0.05 -0.18 0.856 -0.01 0.05 -0.25 0.806
LessMoreCondition [SCARF]
× F
-0.02 0.17 -0.13 0.899 0.20 0.13 1.49 0.136 0.16 0.13 1.18 0.237 -0.05 0.16 -0.30 0.764 0.11 0.14 0.77 0.441
LessMoreCondition [SCARF]
× A
0.39 0.08 4.82 <0.001 0.39 0.08 4.70 <0.001 0.37 0.08 4.52 <0.001 0.37 0.08 4.52 <0.001 0.37 0.08 4.58 <0.001
LessMoreCondition [SCARF]
× C
0.23 0.13 1.74 0.082
LessMoreCondition [SCARF]
× E
0.01 0.08 0.08 0.940 0.05 0.08 0.61 0.539 0.05 0.08 0.63 0.528 0.01 0.08 0.15 0.878 0.04 0.08 0.52 0.601
Predicted C 0.25 0.11 2.22 0.026 0.20 0.13 1.57 0.118 0.07 0.13 0.55 0.586 0.28 0.12 2.29 0.022
LessMoreCondition [SCARF]
× Predicted C
-0.03 0.16 -0.18 0.859 0.08 0.18 0.46 0.643 0.37 0.18 2.11 0.035 0.15 0.18 0.83 0.406
Observations 1460 1460 1460 1460 1460 1460
R2 / R2 adjusted 0.204 / 0.201 0.248 / 0.241 0.249 / 0.242 0.248 / 0.242 0.251 / 0.244 0.253 / 0.247
Anova and Eta Square
calculate_variance_explained_LessMore <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for LessMoreCondition, Panel, and LessMoreCondition:Panel interaction
  ss_condition <- anova_res["LessMoreCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["LessMoreCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["LessMoreCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["LessMoreCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- ss_condition / ss_total
  perc_panel <- ss_panel / ss_total
  perc_interaction <- ss_interaction / ss_total
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}

variance_explained_lm <- sapply(LessMore_model, calculate_variance_explained_LessMore)

# Convert to a data frame for easy viewing
variance_explained_lm_df.LessMore <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.LessMore,caption = "Etasq Table: LessMore Paradigm")
Etasq Table: LessMore Paradigm
Condition Panel LessMoreCondition:Panel
LessMore.PAN 0.09206*** 0.10649*** 0.00506*
LessMore.PAN_FACE 0.09206*** 0.02908*** 0.00051
LessMore.PAN_Pred_C.reg_FAE 0.09206*** 0.03071*** 0.00174
LessMore.PAN_Pred_C.net_FAE 0.09206*** 0.03068*** 0.00144
LessMore.PAN_Pred_C.rf_FAE 0.09206*** 0.0248*** 0.00054
LessMore.PAN_Pred_C.gbm_FAE 0.09206*** 0.02263*** 0.00104

Sunk Cost Paradigm

Sunk.PAN <- lm(numSunkCost~ SunkCondition * Panel, data = data.full_M5a.reg.C)

Sunk.PAN_FACE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.full_M5a.reg.C)
Sunk.PAN_Pred_C.reg_FAE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E+SunkCondition * Panel, data = data.full_M5a.reg.C)
Sunk.PAN_Pred_C.net_FAE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E + SunkCondition * Panel, data = data.full_M5a.net.C)
Sunk.PAN_Pred_C.rf_FAE <- lm(numSunkCost ~ SunkCondition * F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E  + SunkCondition * Panel, data = data.full_M5a.rf.C)
Sunk.PAN_Pred_C.gbm_FAE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E + SunkCondition * Panel, data = data.full_M5a.gbm.C)


Sunk_model <- list(
  "Sunk.PAN" = Sunk.PAN,
  "Sunk.PAN_FACE" = Sunk.PAN_FACE,
  "Sunk.PAN_Pred_C.reg_FAE" = Sunk.PAN_Pred_C.reg_FAE,
  "Sunk.PAN_Pred_C.net_FAE" = Sunk.PAN_Pred_C.net_FAE,
  "Sunk.PAN_Pred_C.rf_FAE" = Sunk.PAN_Pred_C.rf_FAE, #0.302 with unscaled
  "Sunk.PAN_Pred_C.gbm_FAE" = Sunk.PAN_Pred_C.gbm_FAE
)

# extract_adjusted_r_squared(Sunk_model)
Full Model Results
term_names <- names(coef(Sunk.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Sunk_result<-tab_model(Sunk.PAN, Sunk.PAN_FACE, Sunk.PAN_Pred_C.reg_FAE,Sunk.PAN_Pred_C.net_FAE,Sunk.PAN_Pred_C.rf_FAE,Sunk.PAN_Pred_C.gbm_FAE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred_C(reg)_FAE", 
                        "PAN_Pred_C(net)_FAE", 
                        "PAN_Pred_C(rf)_FAE", 
                        "PAN_Pred_C(gbm)_FAE"), title = "Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Sunk_result
Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred_C(reg)_FAE PAN_Pred_C(net)_FAE PAN_Pred_C(rf)_FAE PAN_Pred_C(gbm)_FAE
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
(Intercept) 5.78 0.17 34.79 <0.001 5.77 0.17 34.93 <0.001 5.77 0.17 34.24 <0.001 5.80 0.17 34.09 <0.001 5.77 0.17 34.46 <0.001 5.78 0.17 34.39 <0.001
SunkCondition [PAID] 0.43 0.23 1.85 0.065 0.46 0.23 1.97 0.049 0.47 0.24 2.00 0.045 0.44 0.24 1.86 0.063 0.48 0.24 2.04 0.042 0.46 0.24 1.96 0.050
F 0.39 0.22 1.77 0.077 0.16 0.18 0.91 0.365 0.22 0.18 1.21 0.226 0.15 0.21 0.72 0.470 0.21 0.19 1.08 0.280
A 0.07 0.11 0.61 0.539 0.07 0.11 0.62 0.533 0.09 0.11 0.79 0.430 0.07 0.11 0.62 0.537 0.08 0.11 0.70 0.485
C -0.26 0.18 -1.50 0.133
E -0.06 0.11 -0.59 0.558 -0.11 0.11 -1.02 0.308 -0.11 0.11 -1.06 0.288 -0.11 0.11 -1.01 0.315 -0.10 0.11 -0.98 0.326
SunkCondition [PAID] × F 0.38 0.32 1.18 0.239 0.19 0.26 0.73 0.463 0.11 0.26 0.42 0.671 0.31 0.31 1.02 0.308 0.16 0.28 0.59 0.554
SunkCondition [PAID] × A 0.02 0.16 0.13 0.895 0.04 0.16 0.23 0.815 0.01 0.16 0.07 0.942 0.05 0.16 0.29 0.770 0.03 0.16 0.20 0.843
SunkCondition [PAID] × C -0.20 0.25 -0.79 0.428
SunkCondition [PAID] × E 0.11 0.15 0.72 0.469 0.07 0.15 0.50 0.617 0.08 0.15 0.53 0.596 0.09 0.15 0.62 0.537 0.07 0.15 0.49 0.624
Predicted C -0.01 0.22 -0.06 0.954 -0.17 0.25 -0.70 0.483 0.01 0.24 0.03 0.979 -0.11 0.24 -0.46 0.644
SunkCondition [PAID] ×
Predicted C
-0.03 0.31 -0.11 0.914 0.19 0.35 0.54 0.592 -0.23 0.35 -0.67 0.504 0.03 0.34 0.08 0.939
Observations 1460 1460 1460 1460 1460 1460
R2 / R2 adjusted 0.023 / 0.020 0.042 / 0.033 0.036 / 0.027 0.036 / 0.028 0.036 / 0.028 0.036 / 0.028
Anova and Eta Square
calculate_variance_explained_Sunk <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for SunkCondition, Panel, and SunkCondition:Panel interaction
  ss_condition <- anova_res["SunkCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["SunkCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["SunkCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["SunkCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- (ss_condition / ss_total)
  perc_panel <- (ss_panel / ss_total)
  perc_interaction <- (ss_interaction / ss_total)
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 6), sig_condition)
  res_panel <- paste0(round(perc_panel, 6), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 6), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}


variance_explained_lm <- sapply(Sunk_model, calculate_variance_explained_Sunk)

# Convert to a data frame for easy viewing
variance_explained_lm_df.SunkCost <- as.data.frame(t(variance_explained_lm))

colnames(variance_explained_lm_df.SunkCost) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.SunkCost,caption = "Etasq Table: SunkCost Paradigm")
Etasq Table: SunkCost Paradigm
Condition Panel LessMoreCondition:Panel
Sunk.PAN 0.007943*** 0.011323*** 0.003981.
Sunk.PAN_FACE 0.007943*** 0.000985 0.002048
Sunk.PAN_Pred_C.reg_FAE 0.007943*** 9.8e-05 0.001764
Sunk.PAN_Pred_C.net_FAE 0.007943*** 8.1e-05 0.00151
Sunk.PAN_Pred_C.rf_FAE 0.007943*** 8.2e-05 0.002066
Sunk.PAN_Pred_C.gbm_FAE 0.007943*** 9.8e-05 0.001547

Summary

Model Fit
  • \(R^2\): \(R^2\) for linear regressions, and \(1 − \frac{\log L(\text{model})}{\log L(\text{null model})}\) for logistic regressions (as in the paper)

  • note that the y-axes for different paradigms are not aligned.

Default.null<-glm(numDefault~1,data.full_M5a.reg.C,family=binomial)


R2.Default = as.data.frame(cbind( "R2" = c((1-logLik(Default.PAN)/logLik(Default.null)),
                    (1-logLik(Default.PAN_FACE)/logLik(Default.null)),  
                    (1-logLik(Default.PAN_Pred_C.reg_FAE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_C.net_FAE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_C.rf_FAE)/logLik(Default.null)),
                    (1-logLik(Default.PAN_Pred_C.gbm_FAE)/logLik(Default.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.C(reg)+FAE",  "Pan+Pred.C(net)+FAE" , "Pan+Pred.C(rf)+FAE","Pan+Pred.C(gbm)+FAE"), 
            "Paradigm" = "Default"))


Disease.null<-glm(numDisease~1,data.full_M5a.reg.C,family=binomial)
R2.Disease = as.data.frame(cbind( "R2" = c((1-logLik(Disease.PAN)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_FACE)/logLik(Disease.null)),  
                    (1-logLik(Disease.PAN_Pred_C.reg_FAE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_C.net_FAE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_C.rf_FAE)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_Pred_C.gbm_FAE)/logLik(Disease.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.C(reg)+FAE",  "Pan+Pred.C(net)+FAE" , "Pan+Pred.C(rf)+FAE","Pan+Pred.C(gbm)+FAE"), 
            "Paradigm" = "Disease"))

R2.LessMore = as.data.frame(cbind( "R2" = c(summary(LessMore.PAN)$r.squared,
                     summary(LessMore.PAN_FACE)$r.squared,  
                     summary(LessMore.PAN_Pred_C.reg_FAE)$r.squared,  
                     summary(LessMore.PAN_Pred_C.net_FAE)$r.squared, 
                     summary(LessMore.PAN_Pred_C.rf_FAE)$r.squared,
                     summary(LessMore.PAN_Pred_C.gbm_FAE )$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.C(reg)+FAE",  "Pan+Pred.C(net)+FAE" , "Pan+Pred.C(rf)+FAE","Pan+Pred.C(gbm)+FAE"), 
            "Paradigm" = "LessMore"))

R2.Sunk = as.data.frame(cbind( "R2" = c(summary(Sunk.PAN)$r.squared,
                     summary(Sunk.PAN_FACE)$r.squared,  
                     summary(Sunk.PAN_Pred_C.reg_FAE)$r.squared,  
                     summary(Sunk.PAN_Pred_C.net_FAE)$r.squared, 
                     summary(Sunk.PAN_Pred_C.rf_FAE )$r.squared,
                     summary(Sunk.PAN_Pred_C.gbm_FAE )$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.C(reg)+FAE",  "Pan+Pred.C(net)+FAE" , "Pan+Pred.C(rf)+FAE","Pan+Pred.C(gbm)+FAE"), 
            "Paradigm" = "SunkCost"))


R2<-rbind(R2.Default,R2.Disease,R2.LessMore,R2.Sunk)%>%
  mutate(R2=round(as.numeric(R2),3))

R2$Model <- factor(R2$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.C(reg)+FAE",  
                                   "Pan+Pred.C(net)+FAE", 
                                   "Pan+Pred.C(rf)+FAE",
                                   "Pan+Pred.C(gbm)+FAE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(R2, aes(x = Model, y = R2, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(R^2)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

AIC.Default = as.data.frame(cbind( "AIC" = c(AIC(Default.PAN),
                    AIC(Default.PAN_FACE),  
                    AIC(Default.PAN_Pred_C.reg_FAE), 
                    AIC(Default.PAN_Pred_C.net_FAE), 
                    AIC(Default.PAN_Pred_C.rf_FAE),
                    AIC(Default.PAN_Pred_C.gbm_FAE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PC(reg)+FAE",  "Pan+PC(net)+FAE" , "Pan+PC(rf)+FAE","Pan+PC(gbm)+FAE"), 
            "Paradigm" = "Default"))



AIC.Disease = as.data.frame(cbind( "AIC" = c(AIC(Disease.PAN),
                    AIC(Disease.PAN_FACE),  
                    AIC(Disease.PAN_Pred_C.reg_FAE), 
                    AIC(Disease.PAN_Pred_C.net_FAE), 
                    AIC(Disease.PAN_Pred_C.rf_FAE),
                    AIC(Disease.PAN_Pred_C.gbm_FAE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PC(reg)+FAE",  "Pan+PC(net)+FAE" , "Pan+PC(rf)+FAE","Pan+PC(gbm)+FAE"), 
            "Paradigm" = "Disease"))


AIC<-rbind(AIC.Default,AIC.Disease)%>%
  mutate(AIC=round(as.numeric(AIC),5))

AIC$Model <- factor(AIC$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+PC(reg)+FAE",  
                                   "Pan+PC(net)+FAE", 
                                   "Pan+PC(rf)+FAE",
                                   "Pan+PC(gbm)+FAE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(AIC, aes(x = Model, y = AIC, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(AIC)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")+
  coord_cartesian(ylim = c(300, 400))

R2$R2 <- round(as.numeric(R2$R2), 3)

R2 <- R2 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement <- R2 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))


### Avg. improvement in R2 for each model across all four paradigms, in FULL study 5
Default.null.Study5<-glm(numDefault~1,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.null.Study5<-glm(numDisease~1,Data_M5a.log_z_dummy_coded,family=binomial)
Default.PAN.Study5<-glm(numDefault~DefaultCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.PAN.Study5<-glm(numDisease~DiseaseCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
LessMore.PAN.Study5<-lm(numLessMore~LessMoreCondition*Panel,Data_M5a.log_z_dummy_coded)
Sunk.PAN.Study5<-lm(numSunkCost~SunkCondition*Panel,Data_M5a.log_z_dummy_coded)


Default.PAN_FACE.Study5<-glm(numDefault~DefaultCondition*F+DefaultCondition*A+DefaultCondition*C+DefaultCondition*E+DefaultCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.PAN_FACE.Study5<-glm(numDisease~DiseaseCondition*F+DiseaseCondition*A+DiseaseCondition*C+DiseaseCondition*E+DiseaseCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
LessMore.PAN_FACE.Study5<-lm(numLessMore~LessMoreCondition*F+LessMoreCondition*A+LessMoreCondition*C+LessMoreCondition*E+LessMoreCondition*Panel,Data_M5a.log_z_dummy_coded)
Sunk.PAN_FACE.Study5<-lm(numSunkCost~SunkCondition*F+SunkCondition*A+SunkCondition*C+SunkCondition*E+SunkCondition*Panel,Data_M5a.log_z_dummy_coded)

R2.Default = as.data.frame(cbind( 
  "R2" = c((1-logLik(Default.PAN.Study5)/logLik(Default.null.Study5)),
           (1-logLik(Default.PAN_FACE.Study5)/logLik(Default.null.Study5))),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "Default"
))


R2.Disease = as.data.frame(cbind( 
  "R2" = c((1-logLik(Disease.PAN.Study5)/logLik(Disease.null.Study5)),
           (1-logLik(Disease.PAN_FACE.Study5)/logLik(Disease.null.Study5))),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "Disease"
))

# For LessMore paradigm (linear models)
R2.LessMore = as.data.frame(cbind( 
  "R2" = c(summary(LessMore.PAN.Study5)$r.squared,
           summary(LessMore.PAN_FACE.Study5)$r.squared),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "LessMore"
))

# For SunkCost paradigm (linear models)
R2.Sunk = as.data.frame(cbind( 
  "R2" = c(summary(Sunk.PAN.Study5)$r.squared,
           summary(Sunk.PAN_FACE.Study5)$r.squared),  
  "Model" = c("Pan", "Pan+FACE (Full Study 5)"), 
  "Paradigm" = "SunkCost"
))

R2.FullStudy5 <- rbind(R2.Default, R2.Disease, R2.LessMore, R2.Sunk)
R2.FullStudy5$R2 <- round(as.numeric(R2.FullStudy5$R2), 3)

R2.FullStudy5 <- R2.FullStudy5 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement_full_study_5 <- R2.FullStudy5 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))%>%
  slice(-1) 

avg_improvemnt_combined <- rbind(avg_improvement, avg_improvement_full_study_5)

kable(avg_improvemnt_combined, caption = "Percentage Improvement in R2 (compared to PAN) Across Paradigms in the test sample", digits = 2)
Percentage Improvement in R2 (compared to PAN) Across Paradigms in the test sample
Model Avg_Improvement
Pan 0.00
Pan+FACE 43.20
Pan+Pred.C(reg)+FAE 36.80
Pan+Pred.C(net)+FAE 36.46
Pan+Pred.C(rf)+FAE 37.52
Pan+Pred.C(gbm)+FAE 38.01
Pan+FACE (Full Study 5) 43.20
C*Cond Model Coefficients
  • DVs for Default & Framing are 1/0
  • DVs for LessMore & SunkCost are on the original scale
AlllCoef = as.data.frame(rbind(
cbind(coef(Default.PAN_FACE),confint(Default.PAN_FACE)),
cbind(coef(Default.PAN_Pred_C.reg_FAE),confint(Default.PAN_Pred_C.reg_FAE)),
cbind(coef(Default.PAN_Pred_C.net_FAE),confint(Default.PAN_Pred_C.net_FAE)),
cbind(coef(Default.PAN_Pred_C.rf_FAE),confint(Default.PAN_Pred_C.rf_FAE)),
cbind(coef(Default.PAN_Pred_C.gbm_FAE),confint(Default.PAN_Pred_C.gbm_FAE)),
cbind(coef(Default.PAN_FACE.Study5),confint(Default.PAN_FACE.Study5)),

cbind(coef(Disease.PAN_FACE),confint(Disease.PAN_FACE)),
cbind(coef(Disease.PAN_Pred_C.reg_FAE),confint(Disease.PAN_Pred_C.reg_FAE)),
cbind(coef(Disease.PAN_Pred_C.net_FAE),confint(Disease.PAN_Pred_C.net_FAE)),
cbind(coef(Disease.PAN_Pred_C.rf_FAE),confint(Disease.PAN_Pred_C.rf_FAE)),
cbind(coef(Disease.PAN_Pred_C.gbm_FAE),confint(Disease.PAN_Pred_C.gbm_FAE)),
cbind(coef(Disease.PAN_FACE.Study5),confint(Disease.PAN_FACE.Study5)),

cbind(coef(LessMore.PAN_FACE),confint(LessMore.PAN_FACE)),
cbind(coef(LessMore.PAN_Pred_C.reg_FAE),confint(LessMore.PAN_Pred_C.reg_FAE)),
cbind(coef(LessMore.PAN_Pred_C.net_FAE),confint(LessMore.PAN_Pred_C.net_FAE)),
cbind(coef(LessMore.PAN_Pred_C.rf_FAE),confint(LessMore.PAN_Pred_C.rf_FAE)),
cbind(coef(LessMore.PAN_Pred_C.gbm_FAE),confint(LessMore.PAN_Pred_C.gbm_FAE)),
cbind(coef(LessMore.PAN_FACE.Study5),confint(LessMore.PAN_FACE.Study5)),

cbind(coef(Sunk.PAN_FACE),confint(Sunk.PAN_FACE)),
cbind(coef(Sunk.PAN_Pred_C.reg_FAE),confint(Sunk.PAN_Pred_C.reg_FAE)),
cbind(coef(Sunk.PAN_Pred_C.net_FAE),confint(Sunk.PAN_Pred_C.net_FAE)),
cbind(coef(Sunk.PAN_Pred_C.rf_FAE),confint(Sunk.PAN_Pred_C.rf_FAE)),
cbind(coef(Sunk.PAN_Pred_C.gbm_FAE),confint(Sunk.PAN_Pred_C.gbm_FAE)),
cbind(coef(Sunk.PAN_FACE.Study5),confint(Sunk.PAN_FACE.Study5))))

AlllCoef <- AlllCoef[!grepl("Panel", rownames(AlllCoef)), ]

AlllCoef
AlllCoef$type = factor(rep(c("Intercept", " Condition(Treat)","F", "A", "C", "E","F: Condition(Treat)", "A: Condition(Treat)", "C: Condition(Treat)", "E: Condition(Treat)"), 24))

AlllCoef$Paradigm = factor((rep(c("Default","Framing","Less is Better","Sunk Cost"), each=60)),levels = rev(c("Default","Framing","Less is Better","Sunk Cost")))

AlllCoef$Model = factor(rep(rep(c("FACE","PF(reg) + ACE","PF(net) + ACE","PF(rf) + ACE","PF(gbm) + ACE","FACE (Full Study 5)"), each = 10),4), levels = rev(c("FACE","PF(reg) + ACE","PF(net) + ACE","PF(rf) + ACE","PF(gbm) + ACE","FACE (Full Study 5)")))

colnames(AlllCoef)[1:3] = c("Beta","low", "high")

allcoefplot = ggplot(AlllCoef[(AlllCoef$type %in% c("C: Condition(Treat)")),], aes( y = Beta, ymin= low, ymax= high, x = Paradigm, fill = Model, color = Model))+
  geom_bar(stat = "identity", position = "dodge",alpha= .25)+
  geom_pointrange(position = position_dodge(width = .8))+
  coord_flip()+
  xlab("")+
  theme_bw()+
  theme(strip.background = element_blank(), text = element_text(size = 12)) +
  facet_grid(~type)

allcoefplot

Etasq
### Study 5 Results ###
Default_model.Study5<-list(
  "Default.PAN.Study5" = Default.PAN.Study5,
  "Default.PAN_FACE.Study5" = Default.PAN_FACE.Study5)
Disease_model.Study5<-list(
  "Disease.PAN.Study5" = Disease.PAN.Study5,
  "Disease.PAN_FACE.Study5" = Disease.PAN_FACE.Study5)

LessMore_model.Study5<-list(
  "LessMore.PAN.Study5" = LessMore.PAN.Study5,
  "LessMore.PAN_FACE.Study5" = LessMore.PAN_FACE.Study5)


Sunk_model.Study5<-list(
  "Sunk.PAN.Study5" = Sunk.PAN.Study5,
  "Sunk.PAN_FACE.Study5" = Sunk.PAN_FACE.Study5)

deviance_explained <- sapply(Default_model.Study5, calculate_deviance_explained.Default)
deviance_explained_df.Default.Study5 <- as.data.frame(t(deviance_explained)) 
colnames(deviance_explained_df.Default.Study5) <- c("Condition","Panel", "Default Condition:Panel")

deviance_explained <- sapply(Disease_model.Study5, calculate_deviance_explained.Disease)
deviance_explained_df.Disease.Study5 <- as.data.frame(t(deviance_explained)) 
colnames(deviance_explained_df.Disease.Study5) <- c("Condition","Panel", "Disease Condition:Panel")

variance_explained_lm <- sapply(LessMore_model.Study5, calculate_variance_explained_LessMore)
variance_explained_lm_df.LessMore.Study5 <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore.Study5) <- c("Condition","Panel", "LessMoreCondition:Panel")

variance_explained_lm <- sapply(Sunk_model.Study5, calculate_variance_explained_Sunk)
variance_explained_lm_df.SunkCost.Study5 <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.SunkCost.Study5) <- c("Condition","Panel", "LessMoreCondition:Panel")
# label paradigm
deviance_explained_df.Default$Paradigm <- "Default"
deviance_explained_df.Default.Study5$Paradigm <- "Default"

deviance_explained_df.Disease$Paradigm <- "Disease"
deviance_explained_df.Disease.Study5$Paradigm <- "Disease"

variance_explained_lm_df.LessMore$Paradigm <- "LessMore"
variance_explained_lm_df.LessMore.Study5$Paradigm <- "LessMore"

variance_explained_lm_df.SunkCost$Paradigm <- "SunkCost"
variance_explained_lm_df.SunkCost.Study5$Paradigm <- "SunkCost"


Model <- c("Pan", "Pan+FACE", "Pan+Pred.C(reg)+FAE", "Pan+Pred.C(net)+FAE", "Pan+Pred.C(rf)+FAE", "Pan+Pred.C(gbm)+FAE")
Model.Study5 <- c("Pan (Full Study5)", "Pan+FACE (Full Study5)")
deviance_explained_df.Default$Model<-Model
deviance_explained_df.Disease$Model<-Model
variance_explained_lm_df.LessMore$Model<-Model
variance_explained_lm_df.SunkCost$Model<-Model

deviance_explained_df.Default.Study5$Model<-Model.Study5
deviance_explained_df.Disease.Study5$Model<-Model.Study5
variance_explained_lm_df.LessMore.Study5$Model<-Model.Study5
variance_explained_lm_df.SunkCost.Study5$Model<-Model.Study5

colnames(deviance_explained_df.Default)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost)[3] <- "Condition:Panel"

colnames(deviance_explained_df.Default.Study5)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost.Study5)[3] <- "Condition:Panel"


etasq <- rbind(deviance_explained_df.Default, 
               deviance_explained_df.Default.Study5,
               deviance_explained_df.Disease, 
               deviance_explained_df.Disease.Study5,
               variance_explained_lm_df.LessMore, 
               variance_explained_lm_df.LessMore.Study5,
               variance_explained_lm_df.SunkCost,
               variance_explained_lm_df.SunkCost.Study5)

etasq$Model <- factor(etasq$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.C(reg)+FAE",  
                                   "Pan+Pred.C(net)+FAE", 
                                   "Pan+Pred.C(rf)+FAE",
                                   "Pan+Pred.C(gbm)+FAE",
                                   "Pan (Full Study5)", 
                                   "Pan+FACE (Full Study5)"),
                        ordered = TRUE)

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$Panel)  # Extract the significance symbols
  df$Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$Panel))
  return(df)
}

etasq.pan<-split_value_and_sig(etasq)

ggplot(etasq.pan, aes(x = Model, y = Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$`Condition:Panel`)  # Extract the significance symbols
  df$Condition_Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$`Condition:Panel`))
  return(df)
}

etasq.pan_cond<-split_value_and_sig(etasq)

ggplot(etasq.pan_cond, aes(x = Model, y = Condition_Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel*cond") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

# junk data??? really unstable;; perhaps to look at this meaningfully, we need 20% of the data to equally come from each of the 3 panels.
 # table(data.full_M5a$Panel)
 #  it looks fine though
 #   Lucid    MTurk Prolific 
 #     100      101       91 


etasq_study5 <- etasq.pan_cond %>%
  filter(Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))

etasq_study5 <- etasq_study5 %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan (Full Study5)"] - Condition_Panel) / Condition_Panel[Model == "Pan (Full Study5)"] * 100)


etasq_study5.test <- etasq.pan_cond %>%
  filter(!Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))
etasq_study5.test <- etasq_study5.test %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan"] - Condition_Panel) / Condition_Panel[Model == "Pan"] * 100)

etasq.pan_cond<-rbind(etasq_study5,etasq_study5.test)

avg_reduction <- etasq.pan_cond %>%
  group_by(Model) %>%
  summarize(Avg_Reduction = mean(Reduction, na.rm = TRUE))

  
kable(avg_reduction, caption = "Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in full study 5 data", digits = 2)
Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in full study 5 data
Model Avg_Reduction
Pan 0.00
Pan+FACE 74.19
Pan+Pred.C(reg)+FAE 69.91
Pan+Pred.C(net)+FAE 74.17
Pan+Pred.C(rf)+FAE 76.80
Pan+Pred.C(gbm)+FAE 78.60
Pan (Full Study5) 0.00
Pan+FACE (Full Study5) 74.19

2.1 Evaluate Portability with Study 1 Data

2.1.1 Study 1 US Data

# Data_M1a_dummy_coded<-read.csv("../MP Study 1/Data_M1a_dummy_coded_20240808.csv") # seperately .log_z data

Data_M1a.log_dummy_coded<-read.csv("../MP Study 1/Data_M1a.log_dummy_coded_20241003.csv")%>%
  rename_with(~ gsub("DEMCHAR", "PurposeOpenEnd", .x))
  
META_Browser_columns <- names(Data_M1a.log_dummy_coded)[grepl("META_Browser", names(Data_M1a.log_dummy_coded))] # tentatively retain browser Browsers to make predictions
Data_M1a.log_dummy_coded<-Data_M1a.log_dummy_coded%>% 
  select(-c(X,LW_DV,numLW_IV)) 
names(Data_M1a.log_dummy_coded) <- gsub("iNTRO", "INTRO", names(Data_M1a.log_dummy_coded))


# This reread the data using read.csv function to ensures data format consistency, need to understand more about this function here....

Data_M5a.log_dummy_coded<-read.csv("Data_M5a.log_dummy_coded_20240921.csv")

META_Browser_columns <- names(Data_M5a.log_dummy_coded)[grepl("META_Browser", names(Data_M5a.log_dummy_coded))]
Data_M5a.log_dummy_coded<-Data_M5a.log_dummy_coded%>%
  select(-c(X))


# Calculate mean and standard deviation for each column in Data_M5a.log_dummy_coded
means <- sapply(Data_M5a.log_dummy_coded[, scale_columns], mean, na.rm = TRUE)
sds <- sapply(Data_M5a.log_dummy_coded[, scale_columns], sd, na.rm = TRUE)

scale_with_reference_M5a <- function(x, mean_ref, sd_ref) {
  (x - mean_ref) / sd_ref
}

Data_M5a.log_z_dummy_coded <- Data_M5a.log_dummy_coded %>%
  mutate(across(.cols = all_of(scale_columns), 
                .fns = ~ scale_with_reference_M5a(., means[cur_column()], sds[cur_column()])))

# Scale Data_M1a.log_dummy_coded using the mean and sd from Data_M5a.log_dummy_coded
Data_M1a.log_z_dummy_coded <- Data_M1a.log_dummy_coded %>%
  mutate(across(.cols = all_of(scale_columns), 
                .fns = ~ scale_with_reference_M5a(., means[cur_column()], sds[cur_column()])))

Data_M1a.log_z_dummy_coded <- Data_M1a.log_z_dummy_coded 
# same hyperparameters from section 2.0
# Set seed for reproducibility
set.seed(123)


variables_to_remove.C <- c("F","A","E","numSunkCost","numLessMore","numDefault","numDisease","SunkCondition","LessMoreCondition","DiseaseCondition","DefaultCondition","Panel","Antonym","Synonym","crt2_score","rotsum","MXsum")

# Create training and testing sets

data.trn_M5a_FULL <- Data_M5a.log_z_dummy_coded 
data.tst_MP1.US <- Data_M1a.log_z_dummy_coded%>%filter(Country_US==1) #  MP1 US data
data.tst_MP1.UK <- Data_M1a.log_z_dummy_coded%>%filter(Country_UK==1) #  MP1 UK data
data.tst_MP1.Neth <- Data_M1a.log_z_dummy_coded%>%filter(Country_US==0&Country_UK==0) # #  MP1 Netherlands student data
# There are columns in study 1 dataset not in study 5, but this is okay. 

data.trn_M5a_FULL.C <- as.data.frame(data.trn_M5a_FULL[, !colnames(data.trn_M5a_FULL) %in% variables_to_remove.C])

actual_MP1.US.C <- data.tst_MP1.US$C

ctrl <- trainControl(method = "cv", number = 10, summaryFunction = defaultSummary)
tuneGrid <- expand.grid(
  mtry = c(33),
  min.node.size = c(200),
  splitrule = c("variance")
)

# Model_M5a_rf.FULL.C <- caret::train(C ~ ., data = data.trn_M5a_FULL.C, method = "ranger",
#                   tuneGrid = tuneGrid, importance = 'impurity', num.trees = 750,
#                   trControl = ctrl)
# save(Model_M5a_rf.FULL.C, file = "./Saved ML Model and Data/Model_M5a_rf_FULL.C.RData")
load("./Saved ML Model and Data/Model_M5a_rf_FULL.C.RData")
M5a_rf_pred_MP1.US.C<- predict(Model_M5a_rf.FULL.C,data.tst_MP1.US)
test_perf.M5a_rf.MP1.US.C <- postResample(M5a_rf_pred_MP1.US.C, actual_MP1.US.C)
data.tst_MP1.US.rf.C <- cbind(data.tst_MP1.US, Predicted_C = M5a_rf_pred_MP1.US.C)



Model_M5a_reg.FULL.C <- train(C ~ ., data = data.trn_M5a_FULL.C, method = "lm")
M5a_reg_pred_MP1.US.C<- predict(Model_M5a_reg.FULL.C,data.tst_MP1.US)
test_perf.M5a_reg.MP1.US.C <- postResample(M5a_reg_pred_MP1.US.C, actual_MP1.US.C)
data.tst_MP1.US.reg.C <- cbind(data.tst_MP1.US, Predicted_C = M5a_reg_pred_MP1.US.C)

Model_M5a_net.FULL.C <- train(C ~ ., data = data.trn_M5a_FULL.C, method = "glmnet", trControl = ctrl, tuneLength=40) # 10 fold cv to select the best model, random search 

M5a_net_pred_MP1.US.C<- predict(Model_M5a_net.FULL.C,data.tst_MP1.US)
test_perf.M5a_net.MP1.US.C <- postResample(M5a_net_pred_MP1.US.C, actual_MP1.US.C)
data.tst_MP1.US.net.C <- cbind(data.tst_MP1.US, Predicted_C = M5a_net_pred_MP1.US.C)


# Model_M5a_gbm.FULL.C <- caret::train(C ~ ., data = data.trn_M5a_FULL.C, method = "gbm",
#                     tuneGrid = tuneGrid.4, trControl = ctrl,
#                     verbose = FALSE)
# 
# save(Model_M5a_gbm.FULL.C, file = "./Saved ML Model and Data/Model_M5a_gbm_FULL.C.RData")
load("./Saved ML Model and Data/Model_M5a_gbm_FULL.C.RData")

M5a_gbm_pred_MP1.US.C<- predict(Model_M5a_gbm.FULL.C,data.tst_MP1.US)
test_perf.M5a_gbm.MP1.US.C <- postResample(M5a_gbm_pred_MP1.US.C, actual_MP1.US.C)
data.tst_MP1.US.gbm.C <- cbind(data.tst_MP1.US, Predicted_C = M5a_gbm_pred_MP1.US.C)

2.1.1.0 Model Fit

  • We trained the model with full Study 5 data
  • The following stats come from cross-validation during model training
train_perf_M5a_rf.FULL <- Model_M5a_rf.FULL.C$results[which.min(Model_M5a_rf.FULL.C$results$RMSE), c("RMSE", "Rsquared", "MAE")]
train_perf_M5a_reg.FULL <- Model_M5a_reg.FULL.C$results[which.min(Model_M5a_reg.FULL.C$results$RMSE), c("RMSE", "Rsquared", "MAE")]
train_perf_M5a_net.FULL <- Model_M5a_net.FULL.C$results[which.min(Model_M5a_net.FULL.C$results$RMSE), c("RMSE", "Rsquared", "MAE")]
train_perf_M5a_gbm.FULL <- Model_M5a_gbm.FULL.C$results[which.min(Model_M5a_gbm.FULL.C$results$RMSE), c("RMSE", "Rsquared", "MAE")]

# Combine training performance metrics into a data frame
performance_metrics_trn <- data.frame(
  Model = factor(c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting"),
                 levels = c("Linear Regression", "Elastic Net", "Random Forest", "Gradient Boosting")),
  MAE = c(as.numeric(train_perf_M5a_reg.FULL["MAE"]), as.numeric(train_perf_M5a_net.FULL["MAE"]), as.numeric(train_perf_M5a_rf.FULL["MAE"]), as.numeric(train_perf_M5a_gbm.FULL["MAE"])),
  RMSE = c(as.numeric(train_perf_M5a_reg.FULL["RMSE"]), as.numeric(train_perf_M5a_net.FULL["RMSE"]), as.numeric(train_perf_M5a_rf.FULL["RMSE"]), as.numeric(train_perf_M5a_gbm.FULL["RMSE"])),
  Rsquared = c(as.numeric(train_perf_M5a_reg.FULL["Rsquared"]), as.numeric(train_perf_M5a_net.FULL["Rsquared"]), as.numeric(train_perf_M5a_rf.FULL["Rsquared"]), as.numeric(train_perf_M5a_gbm.FULL["Rsquared"])))


performance_table_trn <- performance_metrics_trn %>%
  mutate(across(where(is.numeric), round, 4))

# Display the table
kable(performance_table_trn, caption = "Comparison of Model Performance in Training Data (Full Study 5 Data) using CV") %>%
  kable_styling(full_width = F, position = "center")
Comparison of Model Performance in Training Data (Full Study 5 Data) using CV
Model MAE RMSE Rsquared
Linear Regression 0.5997 0.7432 0.4113
Elastic Net 0.5785 0.7130 0.4515
Random Forest 0.5950 0.7296 0.4507
Gradient Boosting 0.5305 0.6689 0.5199
###Plot ###

performance_metrics_long_trn <- performance_metrics_trn %>%
  pivot_longer(cols = c(RMSE, Rsquared, MAE), names_to = "Metric", values_to = "Value")

trn_M5a_FULL.sd_C <- sd(data.trn_M5a_FULL.C$C)

ggplot(performance_metrics_long_trn, aes(x = Metric, y = Value, fill = Model)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_segment(data = performance_metrics_long_trn %>% filter(Metric == "RMSE"),
               aes(x = 1.6, xend = 2.4, y = trn_M5a_FULL.sd_C, yend = trn_M5a_FULL.sd_C),
               linetype = "dashed", color = "#FF9999") +
  geom_text(data = performance_metrics_long_trn %>% filter(Metric == "RMSE"),
            aes(x = 2, y = trn_M5a_FULL.sd_C, label = paste0("Std.dev of C in Study 5 Full Data: ", round(trn_M5a_FULL.sd_C, 2))),
            vjust = -0.5, color = "#FF9999") +
  geom_text(aes(label = round(Value, 2)), position = position_dodge(width = 0.9), vjust = -0.5) +
  scale_fill_manual(values = c("gray80", "gray50", "gray30", "gray20")) +  # Less colorful palette
  labs(title = "Model Fit with FUll MP5 data",
       x = "Metric",
       y = "Value") +
  theme_minimal() +
  theme(legend.position = "bottom")

2.1.1.4 Correlation Btw ML Predicted C and other FACE factors

  • Significance Notation: *** if the p-value is < 0.001, ** if the p-value is < 0.01, * if the p-value is < 0.05, . if the p-value is < 0.10
Regression
ggpairs(data.tst_MP1.US.reg.C, c("Predicted_C","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Elastic Net
ggpairs(data.tst_MP1.US.net.C, c("Predicted_C","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Random Forest
ggpairs(data.tst_MP1.US.rf.C, c("Predicted_C","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Gradient Boosting
ggpairs(data.tst_MP1.US.gbm.C, c("Predicted_C","F","A","C", "E"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

GBM: Correlation between PC5 and numEDU and Age in Study 1
ggpairs(data.tst_MP1.US.gbm.C, c("Predicted_C","numEDU", "AGE","ATTN1__Correct"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

2.1.1.5 Explain Heterogenity with Predicted C

2.1.1.5.1 Default Paradigm
Default.PAN <- glm(numDefault ~ DefaultCondition * Panel, data = data.tst_MP1.US.gbm.C%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_FACE <- glm(numDefault ~ DefaultCondition * F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel , data = data.tst_MP1.US.gbm.C%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_C.reg_FAE <- glm(numDefault ~ DefaultCondition * F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_MP1.US.reg.C%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_C.net_FAE <- glm(numDefault ~  DefaultCondition * F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_MP1.US.net.C%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_C.rf_FAE <- glm(numDefault ~   DefaultCondition * F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_MP1.US.rf.C%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_C.gbm_FAE <- glm(numDefault ~   DefaultCondition * F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_MP1.US.gbm.C%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)


Default_model <- list(
  "Default.PAN" = Default.PAN,
  "Default.PAN_FACE" = Default.PAN_FACE,
  "Default.PAN_Pred_C.reg_FAE" = Default.PAN_Pred_C.reg_FAE,
  "Default.PAN_Pred_C.net_FAE" = Default.PAN_Pred_C.net_FAE,
  "Default.PAN_Pred_C.rf_FAE" = Default.PAN_Pred_C.rf_FAE,
  "Default.PAN_Pred_C.gbm_FAE" = Default.PAN_Pred_C.gbm_FAE
)
Model Results
term_names <- names(coef(Default.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Default_result<-tab_model(Default.PAN, Default.PAN_FACE, Default.PAN_Pred_C.reg_FAE,Default.PAN_Pred_C.net_FAE,Default.PAN_Pred_C.rf_FAE,Default.PAN_Pred_C.gbm_FAE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.C(reg)_FAE", 
                        "PAN_Pred.C(net)_FAE", 
                        "PAN_Pred.C(rf)_FAE", 
                        "PAN_Pred.C(gbm)_FAE"), title = "Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Default_result
Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.C(reg)_FAE PAN_Pred.C(net)_FAE PAN_Pred.C(rf)_FAE PAN_Pred.C(gbm)_FAE
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
(Intercept) 0.88 0.11 -1.06 0.291 0.87 0.11 -1.14 0.253 0.90 0.12 -0.78 0.433 0.91 0.12 -0.76 0.449 0.85 0.11 -1.29 0.196 0.92 0.12 -0.67 0.500
DefaultCondition [OPTOUT] 3.10 0.56 6.31 <0.001 3.22 0.59 6.42 <0.001 3.09 0.57 6.18 <0.001 3.09 0.57 6.16 <0.001 3.30 0.60 6.53 <0.001 3.10 0.57 6.16 <0.001
F 1.57 0.16 4.48 <0.001 1.42 0.14 3.43 0.001 1.40 0.14 3.34 0.001 1.44 0.14 3.65 <0.001 1.40 0.14 3.30 0.001
A 1.41 0.17 2.87 0.004 1.15 0.15 1.05 0.293 1.12 0.15 0.83 0.405 1.03 0.14 0.24 0.807 1.16 0.15 1.15 0.250
C 1.02 0.05 0.39 0.698
E 0.86 0.07 -1.93 0.054 0.89 0.07 -1.43 0.153 0.90 0.07 -1.39 0.164 0.91 0.07 -1.22 0.222 0.90 0.07 -1.38 0.169
DefaultCondition [OPTOUT]
× F
1.03 0.16 0.17 0.867 1.23 0.19 1.32 0.188 1.23 0.19 1.30 0.194 1.14 0.18 0.85 0.395 1.19 0.19 1.09 0.275
DefaultCondition [OPTOUT]
× A
0.44 0.08 -4.53 <0.001 0.53 0.10 -3.28 0.001 0.54 0.10 -3.17 0.002 0.53 0.10 -3.24 0.001 0.51 0.09 -3.64 <0.001
DefaultCondition [OPTOUT]
× C
1.18 0.10 1.98 0.048
DefaultCondition [OPTOUT]
× E
1.23 0.14 1.79 0.073 1.20 0.14 1.55 0.122 1.20 0.14 1.56 0.119 1.22 0.14 1.72 0.085 1.22 0.14 1.70 0.090
Predicted C 1.65 0.19 4.41 <0.001 1.77 0.23 4.47 <0.001 2.39 0.40 5.26 <0.001 1.90 0.23 5.32 <0.001
DefaultCondition [OPTOUT]
× Predicted C
0.59 0.10 -3.08 0.002 0.58 0.11 -2.87 0.004 0.60 0.15 -2.10 0.036 0.62 0.11 -2.59 0.010
Observations 3541 3541 3541 3541 3541 3541
AIC 4466.919 4382.086 4370.959 4370.480 4358.654 4360.471
log-Likelihood -2215.460 -2165.043 -2159.480 -2159.240 -2153.327 -2154.235
Anova and Eta Square
calculate_deviance_explained.Default <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for Panel and DefaultCondition:Panel interaction
  dev_cond <- anova_res["DefaultCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DefaultCondition:Panel", "Deviance"]
  
  p_cond <- anova_res["DefaultCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DefaultCondition:Panel", "Pr(>Chi)"]
  
sig_cond <- ifelse(p_cond < 0.001, "***", 
             ifelse(p_cond < 0.01, "**", 
             ifelse(p_cond < 0.05, "*", 
             ifelse(p_cond < 0.1, ".", ""))))

sig_panel <- ifelse(p_panel < 0.001, "***", 
              ifelse(p_panel < 0.01, "**", 
              ifelse(p_panel < 0.05, "*", 
              ifelse(p_panel < 0.1, ".", ""))))

sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                   ifelse(p_interaction < 0.01, "**", 
                   ifelse(p_interaction < 0.05, "*", 
                   ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total) 
  perc_panel <- (dev_panel / dev_total) 
  perc_interaction <- (dev_interaction / dev_total) 
  
  res_cond <- paste0(round(perc_cond , 5),  sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}

# apply the fucntion to all the models in default paradigm
deviance_explained <- sapply(Default_model, calculate_deviance_explained.Default)

# Convert to a data frame for easy viewing
deviance_explained_df.Default <- as.data.frame(t(deviance_explained)) # view how this looks like
colnames(deviance_explained_df.Default) <- c("Condition","Panel", "Default Condition:Panel")

# display
kable(deviance_explained_df.Default,caption = "Etasq Table: Default Paradigm")
Etasq Table: Default Paradigm
Condition Panel Default Condition:Panel
Default.PAN 0.05063*** 0.00612*** 0.00751***
Default.PAN_FACE 0.05063*** 0.00456** 0.0031.
Default.PAN_Pred_C.reg_FAE 0.05063*** 0.00508** 0.00201
Default.PAN_Pred_C.net_FAE 0.05063*** 0.00523** 0.00204
Default.PAN_Pred_C.rf_FAE 0.05063*** 0.00501** 0.00271
Default.PAN_Pred_C.gbm_FAE 0.05063*** 0.00497** 0.0026
2.1.1.5.2 Framing (Unusual Disease) Paradigm
Disease.PAN <- glm(numDisease ~ DiseaseCondition * Panel, data = data.tst_MP1.US.gbm.C, family = binomial)
Disease.PAN_FACE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E+DiseaseCondition * Panel , data = data.tst_MP1.US.gbm.C, family = binomial)

Disease.PAN_Pred_C.reg_FAE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_MP1.US.reg.C, family = binomial)

Disease.PAN_Pred_C.net_FAE <- glm(numDisease ~   DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_MP1.US.net.C, family = binomial)

Disease.PAN_Pred_C.rf_FAE <- glm(numDisease ~   DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_MP1.US.rf.C, family = binomial)

Disease.PAN_Pred_C.gbm_FAE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel , data = data.tst_MP1.US.gbm.C, family = binomial)


Disease_model <- list(
  "Disease.PAN" = Disease.PAN,
  "Disease.PAN_FACE" = Disease.PAN_FACE,
  "Disease.PAN_Pred_C.reg_FAE" = Disease.PAN_Pred_C.reg_FAE,
  "Disease.PAN_Pred_C.net_FAE" = Disease.PAN_Pred_C.net_FAE,
  "Disease.PAN_Pred_C.rf_FAE" = Disease.PAN_Pred_C.rf_FAE, 
  "Disease.PAN_Pred_C.gbm_FAE" = Disease.PAN_Pred_C.gbm_FAE
)
# extract_aic(Disease_model)
Model Results
term_names <- names(coef(Disease.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Disease_result<-tab_model(Disease.PAN, Disease.PAN_FACE, Disease.PAN_Pred_C.reg_FAE,Disease.PAN_Pred_C.net_FAE,Disease.PAN_Pred_C.rf_FAE,Disease.PAN_Pred_C.gbm_FAE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.C(reg)_FAE", 
                        "PAN_Pred.C(net)_FAE", 
                        "PAN_Pred.C(rf)_FAE", 
                        "PAN_Pred.C(gbm)_FAE"), title = "Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Disease_result
Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.C(reg)_FAE PAN_Pred.C(net)_FAE PAN_Pred.C(rf)_FAE PAN_Pred.C(gbm)_FAE
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
(Intercept) 0.49 0.05 -6.86 <0.001 0.47 0.05 -7.15 <0.001 0.47 0.05 -7.15 <0.001 0.46 0.05 -7.23 <0.001 0.48 0.05 -6.92 <0.001 0.46 0.05 -7.24 <0.001
DiseaseCondition [LOSS] 2.72 0.39 6.97 <0.001 2.91 0.43 7.29 <0.001 2.92 0.43 7.31 <0.001 2.97 0.44 7.42 <0.001 2.77 0.41 6.97 <0.001 2.99 0.44 7.45 <0.001
F 1.30 0.12 2.82 0.005 1.30 0.12 2.81 0.005 1.32 0.12 2.92 0.003 1.22 0.11 2.21 0.027 1.29 0.12 2.68 0.007
A 1.66 0.18 4.58 <0.001 2.01 0.23 6.01 <0.001 2.07 0.24 6.20 <0.001 1.94 0.23 5.51 <0.001 1.92 0.22 5.74 <0.001
C 0.79 0.04 -4.89 <0.001
E 0.96 0.06 -0.67 0.501 0.91 0.06 -1.39 0.166 0.90 0.06 -1.48 0.138 0.92 0.06 -1.23 0.217 0.92 0.06 -1.29 0.196
DiseaseCondition [LOSS] ×
F
0.87 0.11 -1.14 0.254 0.81 0.10 -1.70 0.089 0.78 0.10 -1.93 0.054 0.85 0.11 -1.33 0.185 0.79 0.10 -1.87 0.062
DiseaseCondition [LOSS] ×
A
1.01 0.15 0.07 0.947 0.79 0.12 -1.54 0.124 0.73 0.12 -1.95 0.051 0.76 0.12 -1.70 0.089 0.80 0.12 -1.45 0.148
DiseaseCondition [LOSS] ×
C
1.17 0.08 2.39 0.017
DiseaseCondition [LOSS] ×
E
0.79 0.07 -2.58 0.010 0.84 0.08 -1.91 0.056 0.85 0.08 -1.73 0.083 0.84 0.08 -1.86 0.063 0.84 0.08 -1.83 0.067
Predicted C 0.66 0.07 -4.04 <0.001 0.61 0.07 -4.31 <0.001 0.71 0.10 -2.36 0.018 0.68 0.07 -3.52 <0.001
DiseaseCondition [LOSS] ×
Predicted C
1.77 0.24 4.18 <0.001 2.13 0.33 4.88 <0.001 2.06 0.40 3.69 <0.001 2.02 0.30 4.75 <0.001
Observations 5336 5336 5336 5336 5336 5336
AIC 6723.912 6573.688 6582.429 6576.921 6588.102 6579.211
log-Likelihood -3343.956 -3260.844 -3265.214 -3262.460 -3268.051 -3263.605
Anova and Eta Square
calculate_deviance_explained.Disease <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for DiseaseCondition, Panel, and DiseaseCondition:Panel interaction
  dev_cond <- anova_res["DiseaseCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DiseaseCondition:Panel", "Deviance"]
  
  # Extract p-values
  p_cond <- anova_res["DiseaseCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DiseaseCondition:Panel", "Pr(>Chi)"]
  
  # Determine significance levels
  sig_cond <- ifelse(p_cond < 0.001, "***", 
               ifelse(p_cond < 0.01, "**", 
               ifelse(p_cond < 0.05, "*", 
               ifelse(p_cond < 0.1, ".", ""))))

  sig_panel <- ifelse(p_panel < 0.001, "***", 
                ifelse(p_panel < 0.01, "**", 
                ifelse(p_panel < 0.05, "*", 
                ifelse(p_panel < 0.1, ".", ""))))

  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total)
  perc_panel <- (dev_panel / dev_total)
  perc_interaction <- (dev_interaction / dev_total)
  
  # Combine percentage and significance
  res_cond <- paste0(round(perc_cond, 5), sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}


# Apply the function to each model
deviance_explained <- sapply(Disease_model, calculate_deviance_explained.Disease)

# Convert to a data frame for easy viewing
deviance_explained_df.Disease <- as.data.frame(t(deviance_explained))
colnames(deviance_explained_df.Disease) <- c("Condition","Panel", "DiseaseCondition:Panel")

# Display the results
kable(deviance_explained_df.Disease,caption = "Etasq Table: Disease Paradigm")
Etasq Table: Disease Paradigm
Condition Panel DiseaseCondition:Panel
Disease.PAN 0.05826*** 0.01261*** 0.0093***
Disease.PAN_FACE 0.05826*** 0.00366*** 0.00557***
Disease.PAN_Pred_C.reg_FAE 0.05826*** 0.00402*** 0.00415***
Disease.PAN_Pred_C.net_FAE 0.05826*** 0.00397*** 0.00388***
Disease.PAN_Pred_C.rf_FAE 0.05826*** 0.00381*** 0.00395***
Disease.PAN_Pred_C.gbm_FAE 0.05826*** 0.00384*** 0.00385***
2.1.1.5.3 Less is More Paradigm
LessMore.PAN <- lm(numLessMore ~ LessMoreCondition * Panel, data = data.tst_MP1.US.gbm.C)
LessMore.PAN_FACE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_MP1.US.gbm.C)
LessMore.PAN_Pred_C.reg_FAE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_MP1.US.reg.C)
LessMore.PAN_Pred_C.net_FAE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.tst_MP1.US.net.C)
LessMore.PAN_Pred_C.rf_FAE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_MP1.US.rf.C)
LessMore.PAN_Pred_C.gbm_FAE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.tst_MP1.US.gbm.C)


LessMore_model <- list(
  "LessMore.PAN" = LessMore.PAN,
  "LessMore.PAN_FACE" = LessMore.PAN_FACE,
  "LessMore.PAN_Pred_C.reg_FAE" = LessMore.PAN_Pred_C.reg_FAE,
  "LessMore.PAN_Pred_C.net_FAE" = LessMore.PAN_Pred_C.net_FAE,
  "LessMore.PAN_Pred_C.rf_FAE" = LessMore.PAN_Pred_C.rf_FAE, 
  "LessMore.PAN_Pred_C.gbm_FAE" = LessMore.PAN_Pred_C.gbm_FAE
)
Model Results
term_names <- names(coef(LessMore.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

LessMore_result<-tab_model(LessMore.PAN, LessMore.PAN_FACE, LessMore.PAN_Pred_C.reg_FAE,LessMore.PAN_Pred_C.net_FAE,LessMore.PAN_Pred_C.rf_FAE,LessMore.PAN_Pred_C.gbm_FAE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.C(reg)_FAE", 
                        "PAN_Pred.C(net)_FAE", 
                        "PAN_Pred.C(rf)_FAE", 
                        "PAN_Pred.C(gbm)_FAE"), title = "LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
LessMore_result
LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.C(reg)_FAE PAN_Pred.C(net)_FAE PAN_Pred.C(rf)_FAE PAN_Pred.C(gbm)_FAE
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
(Intercept) 5.37 0.07 79.64 <0.001 5.36 0.07 81.93 <0.001 5.37 0.07 82.30 <0.001 5.38 0.07 82.39 <0.001 5.34 0.06 82.15 <0.001 5.38 0.07 82.29 <0.001
LessMoreCondition [SCARF] 0.95 0.09 10.04 <0.001 0.99 0.09 10.82 <0.001 1.00 0.09 10.87 <0.001 0.99 0.09 10.86 <0.001 1.01 0.09 11.05 <0.001 1.00 0.09 10.90 <0.001
F -0.11 0.05 -1.99 0.046 -0.16 0.05 -2.87 0.004 -0.16 0.05 -2.98 0.003 -0.17 0.05 -3.22 0.001 -0.16 0.05 -2.88 0.004
A 0.38 0.06 6.14 <0.001 0.29 0.07 4.40 <0.001 0.27 0.07 4.07 <0.001 0.18 0.07 2.64 0.008 0.31 0.06 4.89 <0.001
C 0.02 0.03 0.58 0.562
E -0.00 0.04 -0.09 0.930 0.02 0.04 0.51 0.608 0.03 0.04 0.63 0.529 0.05 0.04 1.19 0.235 0.02 0.04 0.52 0.606
LessMoreCondition [SCARF]
× F
0.28 0.08 3.67 <0.001 0.27 0.08 3.53 <0.001 0.26 0.08 3.39 0.001 0.27 0.07 3.57 <0.001 0.26 0.08 3.43 0.001
LessMoreCondition [SCARF]
× A
0.45 0.09 5.04 <0.001 0.43 0.09 4.58 <0.001 0.41 0.09 4.37 <0.001 0.43 0.10 4.48 <0.001 0.42 0.09 4.61 <0.001
LessMoreCondition [SCARF]
× C
-0.01 0.04 -0.14 0.886
LessMoreCondition [SCARF]
× E
-0.12 0.06 -2.24 0.025 -0.13 0.06 -2.28 0.023 -0.13 0.06 -2.24 0.025 -0.14 0.06 -2.51 0.012 -0.13 0.06 -2.28 0.023
Predicted C 0.24 0.06 3.99 <0.001 0.29 0.07 4.28 <0.001 0.58 0.08 6.94 <0.001 0.24 0.06 3.79 <0.001
LessMoreCondition [SCARF]
× Predicted C
0.02 0.08 0.25 0.799 0.06 0.09 0.64 0.524 -0.02 0.12 -0.15 0.884 0.06 0.09 0.64 0.520
Observations 5336 5336 5336 5336 5336 5336
R2 / R2 adjusted 0.173 / 0.170 0.224 / 0.220 0.229 / 0.226 0.231 / 0.227 0.238 / 0.234 0.229 / 0.226
Anova and Eta Square
calculate_variance_explained_LessMore <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for LessMoreCondition, Panel, and LessMoreCondition:Panel interaction
  ss_condition <- anova_res["LessMoreCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["LessMoreCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["LessMoreCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["LessMoreCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- ss_condition / ss_total
  perc_panel <- ss_panel / ss_total
  perc_interaction <- ss_interaction / ss_total
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}

variance_explained_lm <- sapply(LessMore_model, calculate_variance_explained_LessMore)

# Convert to a data frame for easy viewing
variance_explained_lm_df.LessMore <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.LessMore,caption = "Etasq Table: LessMore Paradigm")
Etasq Table: LessMore Paradigm
Condition Panel LessMoreCondition:Panel
LessMore.PAN 0.12958*** 0.03699*** 0.00642***
LessMore.PAN_FACE 0.12958*** 0.01349*** 0.00148
LessMore.PAN_Pred_C.reg_FAE 0.12958*** 0.01202*** 0.00137
LessMore.PAN_Pred_C.net_FAE 0.12958*** 0.01191*** 0.00137
LessMore.PAN_Pred_C.rf_FAE 0.12958*** 0.00983*** 0.00136
LessMore.PAN_Pred_C.gbm_FAE 0.12958*** 0.01089*** 0.00142
2.1.1.5.4 Sunk Cost Paradigm
Sunk.PAN <- lm(numSunkCost~ SunkCondition * Panel, data = data.tst_MP1.US.gbm.C)
Sunk.PAN_FACE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_MP1.US.gbm.C)
Sunk.PAN_Pred_C.reg_FAE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E+SunkCondition * Panel, data = data.tst_MP1.US.reg.C)
Sunk.PAN_Pred_C.net_FAE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_MP1.US.net.C)
Sunk.PAN_Pred_C.rf_FAE <- lm(numSunkCost ~ SunkCondition * F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E  + SunkCondition * Panel, data = data.tst_MP1.US.rf.C)
Sunk.PAN_Pred_C.gbm_FAE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_MP1.US.gbm.C)


Sunk_model <- list(
  "Sunk.PAN" = Sunk.PAN,
  "Sunk.PAN_FACE" = Sunk.PAN_FACE,
  "Sunk.PAN_Pred_C.reg_FAE" = Sunk.PAN_Pred_C.reg_FAE,
  "Sunk.PAN_Pred_C.net_FAE" = Sunk.PAN_Pred_C.net_FAE,
  "Sunk.PAN_Pred_C.rf_FAE" = Sunk.PAN_Pred_C.rf_FAE, #0.302 with unscaled
  "Sunk.PAN_Pred_C.gbm_FAE" = Sunk.PAN_Pred_C.gbm_FAE
)

# extract_adjusted_r_squared(Sunk_model)
Full Model Results
term_names <- names(coef(Sunk.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Sunk_result<-tab_model(Sunk.PAN, Sunk.PAN_FACE, Sunk.PAN_Pred_C.reg_FAE,Sunk.PAN_Pred_C.net_FAE,Sunk.PAN_Pred_C.rf_FAE,Sunk.PAN_Pred_C.gbm_FAE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred_C(reg)_FAE", 
                        "PAN_Pred_C(net)_FAE", 
                        "PAN_Pred_C(rf)_FAE", 
                        "PAN_Pred_C(gbm)_FAE"), title = "Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Sunk_result
Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred_C(reg)_FAE PAN_Pred_C(net)_FAE PAN_Pred_C(rf)_FAE PAN_Pred_C(gbm)_FAE
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
(Intercept) 5.46 0.14 39.29 <0.001 5.46 0.14 39.54 <0.001 5.47 0.14 39.55 <0.001 5.47 0.14 39.56 <0.001 5.46 0.14 39.57 <0.001 5.46 0.14 39.44 <0.001
SunkCondition [PAID] 0.70 0.20 3.48 0.001 0.70 0.20 3.52 <0.001 0.71 0.20 3.57 <0.001 0.72 0.20 3.61 <0.001 0.67 0.20 3.38 0.001 0.74 0.20 3.70 <0.001
F 0.32 0.11 2.82 0.005 0.34 0.12 2.95 0.003 0.33 0.12 2.87 0.004 0.37 0.11 3.23 0.001 0.39 0.12 3.37 0.001
A 0.59 0.14 4.33 <0.001 0.53 0.14 3.65 <0.001 0.51 0.15 3.48 0.001 0.58 0.15 3.90 <0.001 0.60 0.14 4.31 <0.001
C 0.10 0.06 1.71 0.087
E 0.20 0.08 2.32 0.021 0.21 0.09 2.50 0.013 0.22 0.09 2.54 0.011 0.20 0.09 2.35 0.019 0.19 0.09 2.27 0.023
SunkCondition [PAID] × F 0.18 0.16 1.10 0.270 0.12 0.16 0.76 0.450 0.12 0.16 0.71 0.479 0.05 0.16 0.34 0.737 0.04 0.16 0.27 0.790
SunkCondition [PAID] × A 0.02 0.19 0.11 0.914 -0.05 0.20 -0.23 0.814 -0.06 0.21 -0.30 0.766 -0.30 0.21 -1.42 0.156 -0.13 0.20 -0.67 0.501
SunkCondition [PAID] × C -0.02 0.08 -0.18 0.854
SunkCondition [PAID] × E -0.04 0.12 -0.31 0.753 -0.02 0.12 -0.18 0.860 -0.02 0.12 -0.15 0.883 0.03 0.12 0.21 0.837 0.01 0.12 0.04 0.965
Predicted C 0.11 0.13 0.90 0.371 0.16 0.14 1.11 0.266 -0.03 0.18 -0.14 0.888 -0.12 0.14 -0.85 0.397
SunkCondition [PAID] ×
Predicted C
0.18 0.18 1.03 0.304 0.23 0.20 1.14 0.256 0.89 0.26 3.45 0.001 0.54 0.19 2.79 0.005
Observations 5336 5336 5336 5336 5336 5336
R2 / R2 adjusted 0.025 / 0.022 0.040 / 0.035 0.040 / 0.035 0.040 / 0.036 0.043 / 0.038 0.040 / 0.036
Anova and Eta Square
calculate_variance_explained_Sunk <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for SunkCondition, Panel, and SunkCondition:Panel interaction
  ss_condition <- anova_res["SunkCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["SunkCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["SunkCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["SunkCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- (ss_condition / ss_total)
  perc_panel <- (ss_panel / ss_total)
  perc_interaction <- (ss_interaction / ss_total)
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}


variance_explained_lm <- sapply(Sunk_model, calculate_variance_explained_Sunk)

# Convert to a data frame for easy viewing
variance_explained_lm_df.SunkCost <- as.data.frame(t(variance_explained_lm))

colnames(variance_explained_lm_df.SunkCost) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.SunkCost,caption = "Etasq Table: SunkCost Paradigm")
Etasq Table: SunkCost Paradigm
Condition Panel LessMoreCondition:Panel
Sunk.PAN 0.01083*** 0.00971*** 0.00447**
Sunk.PAN_FACE 0.01083*** 0.00563*** 0.00226
Sunk.PAN_Pred_C.reg_FAE 0.01083*** 0.00644*** 0.00215
Sunk.PAN_Pred_C.net_FAE 0.01083*** 0.00659*** 0.00215
Sunk.PAN_Pred_C.rf_FAE 0.01083*** 0.00637*** 0.00163
Sunk.PAN_Pred_C.gbm_FAE 0.01083*** 0.00605*** 0.0018
2.1.1.5.5 Summary
Model Fit
  • \(R^2\): \(R^2\) for linear regressions, and \(1 − \frac{\log L(\text{model})}{\log L(\text{null model})}\) for logistic regressions (as in the paper)

  • note that the y-axes for different paradigms are not aligned.

#following antonia's analysis here...
Default.null<-glm(numDefault~1,data.tst_MP1.US.reg.C,family=binomial)


R2.Default = as.data.frame(cbind( "R2" = c((1-logLik(Default.PAN)/logLik(Default.null)),
                    (1-logLik(Default.PAN_FACE)/logLik(Default.null)),  
                    (1-logLik(Default.PAN_Pred_C.reg_FAE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_C.net_FAE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_C.rf_FAE)/logLik(Default.null)),
                    (1-logLik(Default.PAN_Pred_C.gbm_FAE)/logLik(Default.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.C(reg)+FAE",  "Pan+Pred.C(net)+FAE" , "Pan+Pred.C(rf)+FAE","Pan+Pred.C(gbm)+FAE"), 
            "Paradigm" = "Default"))


Disease.null<-glm(numDisease~1,data.tst_MP1.US.reg.C,family=binomial)
R2.Disease = as.data.frame(cbind( "R2" = c((1-logLik(Disease.PAN)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_FACE)/logLik(Disease.null)),  
                    (1-logLik(Disease.PAN_Pred_C.reg_FAE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_C.net_FAE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_C.rf_FAE)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_Pred_C.gbm_FAE)/logLik(Disease.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.C(reg)+FAE",  "Pan+Pred.C(net)+FAE" , "Pan+Pred.C(rf)+FAE","Pan+Pred.C(gbm)+FAE"), 
            "Paradigm" = "Disease"))

R2.LessMore = as.data.frame(cbind( "R2" = c(summary(LessMore.PAN)$r.squared,
                     summary(LessMore.PAN_FACE)$r.squared,  
                     summary(LessMore.PAN_Pred_C.reg_FAE)$r.squared,  
                     summary(LessMore.PAN_Pred_C.net_FAE)$r.squared, 
                     summary(LessMore.PAN_Pred_C.rf_FAE)$r.squared,
                     summary(LessMore.PAN_Pred_C.gbm_FAE )$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.C(reg)+FAE",  "Pan+Pred.C(net)+FAE" , "Pan+Pred.C(rf)+FAE","Pan+Pred.C(gbm)+FAE"), 
            "Paradigm" = "LessMore"))

R2.Sunk = as.data.frame(cbind( "R2" = c(summary(Sunk.PAN)$r.squared,
                     summary(Sunk.PAN_FACE)$r.squared,  
                     summary(Sunk.PAN_Pred_C.reg_FAE)$r.squared,  
                     summary(Sunk.PAN_Pred_C.net_FAE)$r.squared, 
                     summary(Sunk.PAN_Pred_C.rf_FAE)$r.squared,
                     summary(Sunk.PAN_Pred_C.gbm_FAE)$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+Pred.C(reg)+FAE",  "Pan+Pred.C(net)+FAE" , "Pan+Pred.C(rf)+FAE","Pan+Pred.C(gbm)+FAE"), 
            "Paradigm" = "SunkCost"))



R2<-rbind(R2.Default,R2.Disease,R2.LessMore,R2.Sunk)%>%
  mutate(R2=round(as.numeric(R2),3))

R2$Model <- factor(R2$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.C(reg)+FAE",  
                                   "Pan+Pred.C(net)+FAE", 
                                   "Pan+Pred.C(rf)+FAE",
                                   "Pan+Pred.C(gbm)+FAE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(R2, aes(x = Model, y = R2, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(R^2)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

AIC.Default = as.data.frame(cbind( "AIC" = c(AIC(Default.PAN),
                    AIC(Default.PAN_FACE),  
                    AIC(Default.PAN_Pred_C.reg_FAE), 
                    AIC(Default.PAN_Pred_C.net_FAE), 
                    AIC(Default.PAN_Pred_C.rf_FAE),
                    AIC(Default.PAN_Pred_C.gbm_FAE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PC(reg)+FAE",  "Pan+PC(net)+FAE" , "Pan+PC(rf)+FAE","Pan+PC(gbm)+FAE"), 
            "Paradigm" = "Default"))



AIC.Disease = as.data.frame(cbind( "AIC" = c(AIC(Disease.PAN),
                    AIC(Disease.PAN_FACE),  
                    AIC(Disease.PAN_Pred_C.reg_FAE), 
                    AIC(Disease.PAN_Pred_C.net_FAE), 
                    AIC(Disease.PAN_Pred_C.rf_FAE),
                    AIC(Disease.PAN_Pred_C.gbm_FAE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PC(reg)+FAE",  "Pan+PC(net)+FAE" , "Pan+PC(rf)+FAE","Pan+PC(gbm)+FAE"), 
            "Paradigm" = "Disease"))


AIC<-rbind(AIC.Default,AIC.Disease)%>%
  mutate(AIC=round(as.numeric(AIC),5))

AIC$Model <- factor(AIC$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+PC(reg)+FAE",  
                                   "Pan+PC(net)+FAE", 
                                   "Pan+PC(rf)+FAE",
                                   "Pan+PC(gbm)+FAE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(AIC, aes(x = Model, y = AIC, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(AIC)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")+
  coord_cartesian(ylim = c(300, 400))

R2$R2 <- round(as.numeric(R2$R2), 3)

R2 <- R2 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement <- R2 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))

avg_improvemnt_combined <- rbind(avg_improvement, avg_improvement_full_study_5)

kable(avg_improvemnt_combined, caption = "Percentage Improvement in R2 (compared to PAN) Across Paradigms in the test sample", digits = 2)
Percentage Improvement in R2 (compared to PAN) Across Paradigms in the test sample
Model Avg_Improvement
Pan 0.00
Pan+FACE 30.51
Pan+Pred.C(reg)+FAE 31.05
Pan+Pred.C(net)+FAE 31.65
Pan+Pred.C(rf)+FAE 35.18
Pan+Pred.C(gbm)+FAE 31.12
Pan+FACE (Full Study 5) 43.20
R2$R2 <- round(as.numeric(R2$R2), 3)

R2 <- R2 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement <- R2 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))


avg_improvemnt_combined <- rbind(avg_improvement, avg_improvement_full_study_5)

kable(avg_improvemnt_combined, caption = "Percentage Improvement in R2 (compared to PAN) Across Paradigms with Study 1 data", digits = 2)
Percentage Improvement in R2 (compared to PAN) Across Paradigms with Study 1 data
Model Avg_Improvement
Pan 0.00
Pan+FACE 30.51
Pan+Pred.C(reg)+FAE 31.05
Pan+Pred.C(net)+FAE 31.65
Pan+Pred.C(rf)+FAE 35.18
Pan+Pred.C(gbm)+FAE 31.12
Pan+FACE (Full Study 5) 43.20
C*Cond Model Coefficients
  • DVs for Default & Framing are 1/0
  • DVs for LessMore & SunkCost are on the original scale
AlllCoef = as.data.frame(rbind(
cbind(coef(Default.PAN_FACE),confint(Default.PAN_FACE)),
cbind(coef(Default.PAN_Pred_C.reg_FAE),confint(Default.PAN_Pred_C.reg_FAE)),
cbind(coef(Default.PAN_Pred_C.net_FAE),confint(Default.PAN_Pred_C.net_FAE)),
cbind(coef(Default.PAN_Pred_C.rf_FAE),confint(Default.PAN_Pred_C.rf_FAE)),
cbind(coef(Default.PAN_Pred_C.gbm_FAE),confint(Default.PAN_Pred_C.gbm_FAE)),
cbind(coef(Default.PAN_FACE.Study5),confint(Default.PAN_FACE.Study5)),

cbind(coef(Disease.PAN_FACE),confint(Disease.PAN_FACE)),
cbind(coef(Disease.PAN_Pred_C.reg_FAE),confint(Disease.PAN_Pred_C.reg_FAE)),
cbind(coef(Disease.PAN_Pred_C.net_FAE),confint(Disease.PAN_Pred_C.net_FAE)),
cbind(coef(Disease.PAN_Pred_C.rf_FAE),confint(Disease.PAN_Pred_C.rf_FAE)),
cbind(coef(Disease.PAN_Pred_C.gbm_FAE),confint(Disease.PAN_Pred_C.gbm_FAE)),
cbind(coef(Disease.PAN_FACE.Study5),confint(Disease.PAN_FACE.Study5)),

cbind(coef(LessMore.PAN_FACE),confint(LessMore.PAN_FACE)),
cbind(coef(LessMore.PAN_Pred_C.reg_FAE),confint(LessMore.PAN_Pred_C.reg_FAE)),
cbind(coef(LessMore.PAN_Pred_C.net_FAE),confint(LessMore.PAN_Pred_C.net_FAE)),
cbind(coef(LessMore.PAN_Pred_C.rf_FAE),confint(LessMore.PAN_Pred_C.rf_FAE)),
cbind(coef(LessMore.PAN_Pred_C.gbm_FAE),confint(LessMore.PAN_Pred_C.gbm_FAE)),
cbind(coef(LessMore.PAN_FACE.Study5),confint(LessMore.PAN_FACE.Study5)),

cbind(coef(Sunk.PAN_FACE),confint(Sunk.PAN_FACE)),
cbind(coef(Sunk.PAN_Pred_C.reg_FAE),confint(Sunk.PAN_Pred_C.reg_FAE)),
cbind(coef(Sunk.PAN_Pred_C.net_FAE),confint(Sunk.PAN_Pred_C.net_FAE)),
cbind(coef(Sunk.PAN_Pred_C.rf_FAE),confint(Sunk.PAN_Pred_C.rf_FAE)),
cbind(coef(Sunk.PAN_Pred_C.gbm_FAE),confint(Sunk.PAN_Pred_C.gbm_FAE)),
cbind(coef(Sunk.PAN_FACE.Study5),confint(Sunk.PAN_FACE.Study5))))

AlllCoef <- AlllCoef[!grepl("Panel", rownames(AlllCoef)), ]

AlllCoef$type = factor(rep(c("Intercept", " Condition(Treat)","F", "A", "C", "E","F: Condition(Treat)", "A: Condition(Treat)", "C: Condition(Treat)", "E: Condition(Treat)"), 24))

AlllCoef$Paradigm = factor((rep(c("Default","Framing","Less is Better","Sunk Cost"), each=60)),levels = rev(c("Default","Framing","Less is Better","Sunk Cost")))

AlllCoef$Model = factor(rep(rep(c("FACE","PF(reg) + ACE","PF(net) + ACE","PF(rf) + ACE","PF(gbm) + ACE","FACE (Full Study 5)"), each = 10),4), levels = rev(c("FACE","PF(reg) + ACE","PF(net) + ACE","PF(rf) + ACE","PF(gbm) + ACE","FACE (Full Study 5)")))


colnames(AlllCoef)[1:3] = c("Beta","low", "high")

allcoefplot = ggplot(AlllCoef[(AlllCoef$type %in% c("C: Condition(Treat)")),], aes( y = Beta, ymin= low, ymax= high, x = Paradigm, fill = Model, color = Model))+
  geom_bar(stat = "identity", position = "dodge",alpha= .25)+
  geom_pointrange(position = position_dodge(width = .8))+
  coord_flip()+
  xlab("")+
  theme_bw()+
  theme(strip.background = element_blank(), text = element_text(size = 12)) +
  facet_grid(~type)

allcoefplot

Etasq
  • note that the y-axes for different paradigms are not aligned..
  • We have observed a slight improvement in explaining heterogeneity across all methods, compared to what we initially had. I think this is due to our predictors being more refined: For example, we used longitude and latitude info to swap “minutes since midnight (EST)” to “clock time.
# label paradigm
deviance_explained_df.Default$Paradigm <- "Default"
deviance_explained_df.Disease$Paradigm <- "Disease"
variance_explained_lm_df.LessMore$Paradigm <- "LessMore"
variance_explained_lm_df.SunkCost$Paradigm <- "SunkCost"

Model <- c("Pan", "Pan+FACE", "Pan+Pred.C(reg)+FAE", "Pan+Pred.C(net)+FAE", "Pan+Pred.C(rf)+FAE", "Pan+Pred.C(gbm)+FAE")

deviance_explained_df.Default$Model<-Model
deviance_explained_df.Disease$Model<-Model
variance_explained_lm_df.LessMore$Model<-Model
variance_explained_lm_df.SunkCost$Model<-Model

colnames(deviance_explained_df.Default)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost)[3] <- "Condition:Panel"

etasq <- rbind(deviance_explained_df.Default, 
               deviance_explained_df.Default.Study5,
               deviance_explained_df.Disease, 
               deviance_explained_df.Disease.Study5,
               variance_explained_lm_df.LessMore, 
               variance_explained_lm_df.LessMore.Study5,
               variance_explained_lm_df.SunkCost,
               variance_explained_lm_df.SunkCost.Study5)

etasq$Model <- factor(etasq$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.C(reg)+FAE",  
                                   "Pan+Pred.C(net)+FAE", 
                                   "Pan+Pred.C(rf)+FAE",
                                   "Pan+Pred.C(gbm)+FAE",
                                   "Pan (Full Study5)", 
                                   "Pan+FACE (Full Study5)"),
                        ordered = TRUE)


split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$Panel)  # Extract the significance symbols
  df$Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$Panel))
  return(df)
}

etasq.pan<-split_value_and_sig(etasq)

ggplot(etasq.pan, aes(x = Model, y = Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$`Condition:Panel`)  # Extract the significance symbols
  df$Condition_Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$`Condition:Panel`))
  return(df)
}

etasq.pan_cond<-split_value_and_sig(etasq)

ggplot(etasq.pan_cond, aes(x = Model, y = Condition_Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel*cond") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

# junk data??? really unstable;; perhaps to look at this meaningfully, we need 20% of the data to equally come from each of the 3 panels.
 # table(data.tst_M5a$Panel)
 #  it looks fine though
 #   Lucid    MTurk Prolific 
 #     100      101       91 


etasq_study5 <- etasq.pan_cond %>%
  filter(Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))

etasq_study5 <- etasq_study5 %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan (Full Study5)"] - Condition_Panel) / Condition_Panel[Model == "Pan (Full Study5)"] * 100)


etasq_study1 <- etasq.pan_cond %>%
  filter(!Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))
etasq_study1 <- etasq_study1 %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan"] - Condition_Panel) / Condition_Panel[Model == "Pan"] * 100)

etasq.pan_cond<-rbind(etasq_study5,etasq_study1)

avg_reduction <- etasq.pan_cond %>%
  group_by(Model) %>%
  summarize(Avg_Reduction = mean(Reduction, na.rm = TRUE))

  
kable(avg_reduction, caption = "Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in study 1", digits = 2)
Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in study 1
Model Avg_Reduction
Pan 0.00
Pan+FACE 56.30
Pan+Pred.C(reg)+FAE 64.79
Pan+Pred.C(net)+FAE 65.42
Pan+Pred.C(rf)+FAE 65.95
Pan+Pred.C(gbm)+FAE 65.40
Pan (Full Study5) 0.00
Pan+FACE (Full Study5) 74.19

3. Predict both F and C

3.0 Evaluate Accuracy with Study 5 held-out Data

data.tst_M5a.reg.FC<-cbind(data.tst_M5a.reg.C, Predicted_F = pred_M5a_reg)
data.tst_M5a.net.FC<-cbind(data.tst_M5a.net.C, Predicted_F = pred_M5a_net)
data.tst_M5a.rf.FC<-cbind(data.tst_M5a.rf.C, Predicted_F = pred_M5a_rf)
data.tst_M5a.gbm.FC<-cbind(data.tst_M5a.gbm.C, Predicted_F = pred_M5a_gbm)
data.tst_M5a.gbm.FC$rotsum<-data.tst_M5a.gbm$rotsum
data.tst_M5a.gbm.FC$MXsum<-data.tst_M5a.gbm$MXsum
data.tst_M5a.gbm.FC$crt2_score<-data.tst_M5a.gbm$crt2_score

Correlations Btw PF5, PC5, and Study 5 FACE

Linear Regression

ggpairs(data.tst_M5a.reg.FC, c("Predicted_F","F","A","C", "E","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Elastic Net

ggpairs(data.tst_M5a.net.FC, c("Predicted_F","F","A","C", "E","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Random Forest

ggpairs(data.tst_M5a.rf.FC, c("Predicted_F","F","A","C", "E","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Gradient Boosting

ggpairs(data.tst_M5a.gbm.FC, c("Predicted_F","F","A","C", "E","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

GBM: Correlation between PF, PC, and F and C’s composite measures

ggpairs(data.tst_M5a.gbm.FC, c("Predicted_F","ATTN1__Correct","crt2_score", "rotsum", "MXsum","Antonym","Synonym","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Explain Heterogeniety with PF5 and PC5

Default Paradigm

Default.PAN <- glm(numDefault ~ DefaultCondition * Panel, data = data.tst_M5a.gbm.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_FACE <- glm(numDefault ~ DefaultCondition * F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel , data = data.tst_M5a.gbm.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_FC.reg_AE <- glm(numDefault ~ DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_M5a.reg.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_FC.net_AE <- glm(numDefault ~  DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_M5a.net.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_FC.rf_AE <- glm(numDefault ~   DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_M5a.rf.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_FC.gbm_AE <- glm(numDefault ~   DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_M5a.gbm.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)


Default_model <- list(
  "Default.PAN" = Default.PAN,
  "Default.PAN_FACE" = Default.PAN_FACE,
  "Default.PAN_Pred_FC.reg_AE" = Default.PAN_Pred_FC.reg_AE,
  "Default.PAN_Pred_FC.net_AE" = Default.PAN_Pred_FC.net_AE,
  "Default.PAN_Pred_FC.rf_AE" = Default.PAN_Pred_FC.rf_AE,
  "Default.PAN_Pred_FC.gbm_AE" = Default.PAN_Pred_FC.gbm_AE
)
Model Results
term_names <- names(coef(Default.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Default_result<-tab_model(Default.PAN, Default.PAN_FACE, Default.PAN_Pred_FC.reg_AE,Default.PAN_Pred_FC.net_AE,Default.PAN_Pred_FC.rf_AE,Default.PAN_Pred_FC.gbm_AE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.FC(reg)_AE", 
                        "PAN_Pred.FC(net)_AE", 
                        "PAN_Pred.FC(rf)_AE", 
                        "PAN_Pred.FC(gbm)_AE"), title = "Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Default_result
Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.FC(reg)_AE PAN_Pred.FC(net)_AE PAN_Pred.FC(rf)_AE PAN_Pred.FC(gbm)_AE
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
(Intercept) 0.96 0.28 -0.15 0.884 0.94 0.28 -0.21 0.837 0.90 0.29 -0.33 0.739 0.87 0.29 -0.43 0.666 0.88 0.28 -0.39 0.696 0.92 0.29 -0.25 0.803
DefaultCondition [OPTOUT] 1.41 0.58 0.83 0.409 1.44 0.61 0.86 0.391 1.55 0.73 0.93 0.352 1.62 0.78 0.99 0.320 1.59 0.74 1.00 0.317 1.38 0.63 0.71 0.477
F 0.87 0.35 -0.35 0.725
A 0.85 0.17 -0.79 0.431 0.72 0.15 -1.54 0.124 0.76 0.16 -1.29 0.196 0.79 0.16 -1.16 0.248 0.80 0.16 -1.10 0.271
C 1.69 0.58 1.53 0.127
E 0.88 0.16 -0.72 0.474 0.91 0.15 -0.58 0.560 0.91 0.15 -0.59 0.553 0.88 0.14 -0.79 0.427 0.87 0.15 -0.81 0.419
DefaultCondition [OPTOUT]
× F
1.35 0.84 0.49 0.627
DefaultCondition [OPTOUT]
× A
1.07 0.31 0.24 0.814 1.28 0.39 0.81 0.418 1.21 0.37 0.63 0.527 1.07 0.33 0.23 0.817 1.17 0.35 0.51 0.610
DefaultCondition [OPTOUT]
× C
0.68 0.34 -0.77 0.443
DefaultCondition [OPTOUT]
× E
1.68 0.50 1.74 0.082 1.48 0.38 1.54 0.122 1.47 0.37 1.52 0.129 1.51 0.38 1.63 0.102 1.58 0.41 1.76 0.078
Predicted F 0.97 0.34 -0.09 0.927 1.02 0.40 0.04 0.968 0.83 0.31 -0.49 0.622 0.78 0.30 -0.64 0.522
Predicted C 2.60 1.30 1.92 0.055 2.22 1.28 1.38 0.167 2.86 1.63 1.84 0.066 2.81 1.37 2.12 0.034
DefaultCondition [OPTOUT]
× Predicted F
0.82 0.47 -0.34 0.732 0.77 0.50 -0.41 0.682 0.72 0.43 -0.56 0.578 1.33 0.83 0.45 0.654
DefaultCondition [OPTOUT]
× Predicted C
0.57 0.43 -0.74 0.457 0.68 0.61 -0.43 0.670 1.02 0.93 0.02 0.983 0.41 0.31 -1.18 0.239
Observations 292 292 292 292 292 292
AIC 389.079 395.399 392.482 395.287 393.751 394.135
log-Likelihood -188.540 -183.699 -182.241 -183.644 -182.876 -183.067
Anova and Eta Square
calculate_deviance_explained.Default <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for Panel and DefaultCondition:Panel interaction
  dev_cond <- anova_res["DefaultCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DefaultCondition:Panel", "Deviance"]
  
  p_cond <- anova_res["DefaultCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DefaultCondition:Panel", "Pr(>Chi)"]
  
sig_cond <- ifelse(p_cond < 0.001, "***", 
             ifelse(p_cond < 0.01, "**", 
             ifelse(p_cond < 0.05, "*", 
             ifelse(p_cond < 0.1, ".", ""))))

sig_panel <- ifelse(p_panel < 0.001, "***", 
              ifelse(p_panel < 0.01, "**", 
              ifelse(p_panel < 0.05, "*", 
              ifelse(p_panel < 0.1, ".", ""))))

sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                   ifelse(p_interaction < 0.01, "**", 
                   ifelse(p_interaction < 0.05, "*", 
                   ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total) 
  perc_panel <- (dev_panel / dev_total) 
  perc_interaction <- (dev_interaction / dev_total) 
  
  res_cond <- paste0(round(perc_cond , 5),  sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}

# apply the fucntion to all the models in default paradigm
deviance_explained <- sapply(Default_model, calculate_deviance_explained.Default)

# Convert to a data frame for easy viewing
deviance_explained_df.Default <- as.data.frame(t(deviance_explained)) # view how this looks like
colnames(deviance_explained_df.Default) <- c("Condition","Panel", "Default Condition:Panel")

# display
kable(deviance_explained_df.Default,caption = "Etasq Table: Default Paradigm")
Etasq Table: Default Paradigm
Condition Panel Default Condition:Panel
Default.PAN 0.02968*** 0.0095 0.02393**
Default.PAN_FACE 0.02968*** 0.00305 0.00314
Default.PAN_Pred_FC.reg_AE 0.02968*** 0.00372 0.00401
Default.PAN_Pred_FC.net_AE 0.02968*** 0.0039 0.00409
Default.PAN_Pred_FC.rf_AE 0.02968*** 0.0028 0.00517
Default.PAN_Pred_FC.gbm_AE 0.02968*** 0.00252 0.00168

Framing (Unusual Disease) Paradigm

Disease.PAN <- glm(numDisease ~ DiseaseCondition * Panel, data = data.tst_M5a.gbm.FC, family = binomial)
Disease.PAN_FACE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E+DiseaseCondition * Panel , data = data.tst_M5a.gbm.FC, family = binomial)

Disease.PAN_Pred_FC.reg_AE <- glm(numDisease ~  DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_M5a.reg.FC, family = binomial)

Disease.PAN_Pred_FC.net_AE <- glm(numDisease ~   DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_M5a.net.FC, family = binomial)

Disease.PAN_Pred_FC.rf_AE <- glm(numDisease ~   DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_M5a.rf.FC, family = binomial)

Disease.PAN_Pred_FC.gbm_AE <- glm(numDisease ~  DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel , data = data.tst_M5a.gbm.FC, family = binomial)


Disease_model <- list(
  "Disease.PAN" = Disease.PAN,
  "Disease.PAN_FACE" = Disease.PAN_FACE,
  "Disease.PAN_Pred_FC.reg_AE" = Disease.PAN_Pred_FC.reg_AE,
  "Disease.PAN_Pred_FC.net_AE" = Disease.PAN_Pred_FC.net_AE,
  "Disease.PAN_Pred_FC.rf_AE" = Disease.PAN_Pred_FC.rf_AE, 
  "Disease.PAN_Pred_FC.gbm_AE" = Disease.PAN_Pred_FC.gbm_AE
)

# extract_aic(Disease_model)
Model Results
term_names <- names(coef(Disease.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Disease_result<-tab_model(Disease.PAN, Disease.PAN_FACE, Disease.PAN_Pred_FC.reg_AE,Disease.PAN_Pred_FC.net_AE,Disease.PAN_Pred_FC.rf_AE,Disease.PAN_Pred_FC.gbm_AE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.FC(reg)_AE", 
                        "PAN_Pred.FC(net)_AE", 
                        "PAN_Pred.FC(rf)_AE", 
                        "PAN_Pred.FC(gbm)_AE"), title = "Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Disease_result
Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.FC(reg)_AE PAN_Pred.FC(net)_AE PAN_Pred.FC(rf)_AE PAN_Pred.FC(gbm)_AE
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
(Intercept) 0.48 0.13 -2.67 0.008 0.44 0.13 -2.80 0.005 0.36 0.12 -3.08 0.002 0.35 0.12 -3.05 0.002 0.43 0.14 -2.66 0.008 0.41 0.13 -2.80 0.005
DiseaseCondition [LOSS] 4.04 1.82 3.09 0.002 4.19 1.97 3.04 0.002 4.29 2.25 2.79 0.005 4.31 2.32 2.71 0.007 3.24 1.66 2.30 0.022 3.52 1.78 2.49 0.013
F 2.42 1.16 1.84 0.065
A 1.19 0.23 0.90 0.369 1.20 0.24 0.90 0.370 1.25 0.25 1.09 0.275 1.29 0.26 1.25 0.212 1.24 0.24 1.07 0.287
C 0.41 0.17 -2.20 0.028
E 1.07 0.21 0.33 0.745 0.95 0.16 -0.33 0.745 0.93 0.16 -0.40 0.690 0.94 0.16 -0.34 0.733 0.95 0.16 -0.28 0.780
DiseaseCondition [LOSS] ×
F
0.92 0.64 -0.12 0.902
DiseaseCondition [LOSS] ×
A
1.49 0.50 1.17 0.243 1.69 0.59 1.53 0.127 1.63 0.56 1.42 0.154 1.53 0.53 1.24 0.216 1.55 0.52 1.29 0.196
DiseaseCondition [LOSS] ×
C
1.17 0.65 0.28 0.780
DiseaseCondition [LOSS] ×
E
0.77 0.24 -0.86 0.392 0.74 0.20 -1.11 0.265 0.75 0.20 -1.06 0.289 0.77 0.20 -1.00 0.315 0.78 0.21 -0.93 0.353
Predicted F 2.62 1.12 2.26 0.024 3.16 1.56 2.33 0.020 2.21 0.97 1.80 0.072 2.54 1.19 1.99 0.047
Predicted C 0.32 0.17 -2.14 0.032 0.21 0.14 -2.29 0.022 0.29 0.19 -1.93 0.054 0.29 0.16 -2.22 0.027
DiseaseCondition [LOSS] ×
Predicted F
0.69 0.44 -0.59 0.558 0.62 0.44 -0.67 0.503 1.05 0.68 0.07 0.945 0.93 0.66 -0.10 0.920
DiseaseCondition [LOSS] ×
Predicted C
1.18 0.95 0.21 0.836 1.57 1.54 0.46 0.644 1.13 1.12 0.13 0.899 1.22 1.01 0.24 0.812
Observations 292 292 292 292 292 292
AIC 363.491 356.242 356.789 356.320 357.827 356.696
log-Likelihood -175.745 -164.121 -164.395 -164.160 -164.914 -164.348
Anova and Eta Square
calculate_deviance_explained.Disease <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for DiseaseCondition, Panel, and DiseaseCondition:Panel interaction
  dev_cond <- anova_res["DiseaseCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DiseaseCondition:Panel", "Deviance"]
  
  # Extract p-values
  p_cond <- anova_res["DiseaseCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DiseaseCondition:Panel", "Pr(>Chi)"]
  
  # Determine significance levels
  sig_cond <- ifelse(p_cond < 0.001, "***", 
               ifelse(p_cond < 0.01, "**", 
               ifelse(p_cond < 0.05, "*", 
               ifelse(p_cond < 0.1, ".", ""))))

  sig_panel <- ifelse(p_panel < 0.001, "***", 
                ifelse(p_panel < 0.01, "**", 
                ifelse(p_panel < 0.05, "*", 
                ifelse(p_panel < 0.1, ".", ""))))

  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total)
  perc_panel <- (dev_panel / dev_total)
  perc_interaction <- (dev_interaction / dev_total)
  
  # Combine percentage and significance
  res_cond <- paste0(round(perc_cond, 5), sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}


# Apply the function to each model
deviance_explained <- sapply(Disease_model, calculate_deviance_explained.Disease)

# Convert to a data frame for easy viewing
deviance_explained_df.Disease <- as.data.frame(t(deviance_explained))
colnames(deviance_explained_df.Disease) <- c("Condition","Panel", "DiseaseCondition:Panel")

# Display the results
kable(deviance_explained_df.Disease,caption = "Etasq Table: Disease Paradigm")
Etasq Table: Disease Paradigm
Condition Panel DiseaseCondition:Panel
Disease.PAN 0.03974*** 0.03523** 0.01646*
Disease.PAN_FACE 0.03974*** 0.0196* 0.00098
Disease.PAN_Pred_FC.reg_AE 0.03974*** 0.01486. 0.00148
Disease.PAN_Pred_FC.net_AE 0.03974*** 0.01624* 0.00126
Disease.PAN_Pred_FC.rf_AE 0.03974*** 0.01595* 0.00046
Disease.PAN_Pred_FC.gbm_AE 0.03974*** 0.01836* 0.00056

Less is More Paradigm

LessMore.PAN <- lm(numLessMore ~ LessMoreCondition * Panel, data = data.tst_M5a.gbm.FC)
LessMore.PAN_FACE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_M5a.gbm.FC)
LessMore.PAN_Pred_FC.reg_AE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_M5a.reg.FC)
LessMore.PAN_Pred_FC.net_AE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.tst_M5a.net.FC)
LessMore.PAN_Pred_FC.rf_AE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_M5a.rf.FC)
LessMore.PAN_Pred_FC.gbm_AE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.tst_M5a.gbm.FC)


LessMore_model <- list(
  "LessMore.PAN" = LessMore.PAN,
  "LessMore.PAN_FACE" = LessMore.PAN_FACE,
  "LessMore.PAN_Pred_FC.reg_AE" = LessMore.PAN_Pred_FC.reg_AE,
  "LessMore.PAN_Pred_FC.net_AE" = LessMore.PAN_Pred_FC.net_AE,
  "LessMore.PAN_Pred_FC.rf_AE" = LessMore.PAN_Pred_FC.rf_AE, 
  "LessMore.PAN_Pred_FC.gbm_AE" = LessMore.PAN_Pred_FC.gbm_AE
)
Model Results
term_names <- names(coef(LessMore.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

LessMore_result<-tab_model(LessMore.PAN, LessMore.PAN_FACE, LessMore.PAN_Pred_FC.reg_AE,LessMore.PAN_Pred_FC.net_AE,LessMore.PAN_Pred_FC.rf_AE,LessMore.PAN_Pred_FC.gbm_AE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.FC(reg)_AE", 
                        "PAN_Pred.FC(net)_AE", 
                        "PAN_Pred.FC(rf)_AE", 
                        "PAN_Pred.FC(gbm)_AE"), title = "LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
LessMore_result
LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.FC(reg)_AE PAN_Pred.FC(net)_AE PAN_Pred.FC(rf)_AE PAN_Pred.FC(gbm)_AE
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
(Intercept) 5.51 0.21 25.98 <0.001 5.52 0.21 26.19 <0.001 5.51 0.24 23.16 <0.001 5.48 0.24 22.38 <0.001 5.50 0.23 23.69 <0.001 5.57 0.23 24.49 <0.001
LessMoreCondition [SCARF] 0.76 0.29 2.65 0.009 0.83 0.29 2.89 0.004 0.75 0.31 2.39 0.017 0.78 0.32 2.41 0.017 0.69 0.31 2.24 0.026 0.67 0.30 2.19 0.029
F -0.03 0.25 -0.11 0.913
A -0.26 0.14 -1.88 0.062 -0.27 0.14 -1.88 0.062 -0.23 0.14 -1.64 0.102 -0.24 0.14 -1.67 0.096 -0.23 0.14 -1.63 0.105
C 0.16 0.21 0.77 0.442
E -0.02 0.12 -0.17 0.863 -0.02 0.10 -0.21 0.832 -0.02 0.10 -0.22 0.826 -0.02 0.10 -0.23 0.818 -0.06 0.10 -0.54 0.591
LessMoreCondition [SCARF]
× F
0.38 0.40 0.95 0.345
LessMoreCondition [SCARF]
× A
0.51 0.18 2.78 0.006 0.55 0.19 2.91 0.004 0.48 0.19 2.54 0.012 0.45 0.19 2.36 0.019 0.47 0.19 2.52 0.012
LessMoreCondition [SCARF]
× C
-0.24 0.32 -0.75 0.452
LessMoreCondition [SCARF]
× E
0.05 0.18 0.28 0.780 -0.03 0.15 -0.17 0.867 -0.02 0.15 -0.12 0.906 -0.01 0.15 -0.06 0.953 0.04 0.15 0.25 0.803
Predicted F -0.02 0.25 -0.07 0.945 0.12 0.28 0.43 0.668 0.09 0.26 0.33 0.739 -0.15 0.27 -0.57 0.569
Predicted C 0.16 0.31 0.51 0.611 -0.12 0.38 -0.32 0.746 -0.07 0.37 -0.18 0.858 0.15 0.30 0.48 0.629
LessMoreCondition [SCARF]
× Predicted F
0.33 0.36 0.92 0.360 0.12 0.41 0.29 0.776 0.40 0.36 1.10 0.274 0.64 0.39 1.64 0.103
LessMoreCondition [SCARF]
× Predicted C
-0.42 0.47 -0.88 0.381 0.15 0.58 0.26 0.798 -0.03 0.57 -0.05 0.960 -0.41 0.48 -0.84 0.400
Observations 292 292 292 292 292 292
R2 / R2 adjusted 0.208 / 0.194 0.249 / 0.214 0.246 / 0.211 0.247 / 0.212 0.257 / 0.222 0.252 / 0.217
Anova and Eta Square
calculate_variance_explained_LessMore <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for LessMoreCondition, Panel, and LessMoreCondition:Panel interaction
  ss_condition <- anova_res["LessMoreCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["LessMoreCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["LessMoreCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["LessMoreCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- ss_condition / ss_total
  perc_panel <- ss_panel / ss_total
  perc_interaction <- ss_interaction / ss_total
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}

variance_explained_lm <- sapply(LessMore_model, calculate_variance_explained_LessMore)

# Convert to a data frame for easy viewing
variance_explained_lm_df.LessMore <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.LessMore,caption = "Etasq Table: LessMore Paradigm")
Etasq Table: LessMore Paradigm
Condition Panel LessMoreCondition:Panel
LessMore.PAN 0.08314*** 0.12202*** 0.00272
LessMore.PAN_FACE 0.08314*** 0.04611*** 0.00099
LessMore.PAN_Pred_FC.reg_AE 0.08314*** 0.05379*** 0.00016
LessMore.PAN_Pred_FC.net_AE 0.08314*** 0.05187*** 0.00105
LessMore.PAN_Pred_FC.rf_AE 0.08314*** 0.04951*** 0.00117
LessMore.PAN_Pred_FC.gbm_AE 0.08314*** 0.04768*** 0.00108

Sunk Cost Paradigm

Sunk.PAN <- lm(numSunkCost~ SunkCondition * Panel, data = data.tst_M5a.gbm.FC)
Sunk.PAN_FACE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_M5a.gbm.FC)
Sunk.PAN_Pred_FC.reg_AE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E+SunkCondition * Panel, data = data.tst_M5a.reg.FC)
Sunk.PAN_Pred_FC.net_AE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_M5a.net.FC)
Sunk.PAN_Pred_FC.rf_AE <- lm(numSunkCost ~ SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E  + SunkCondition * Panel, data = data.tst_M5a.rf.FC)
Sunk.PAN_Pred_FC.gbm_AE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_M5a.gbm.FC)


Sunk_model <- list(
  "Sunk.PAN" = Sunk.PAN,
  "Sunk.PAN_FACE" = Sunk.PAN_FACE,
  "Sunk.PAN_Pred_FC.reg_ACE" = Sunk.PAN_Pred_FC.reg_AE,
  "Sunk.PAN_Pred_FC.net_ACE" = Sunk.PAN_Pred_FC.net_AE,
  "Sunk.PAN_Pred_FC.rf_ACE" = Sunk.PAN_Pred_FC.rf_AE, #0.302 with unscaled
  "Sunk.PAN_Pred_FC.gbm_ACE" = Sunk.PAN_Pred_FC.gbm_AE
)

# extract_adjusted_r_squared(Sunk_model)
Full Model Results
term_names <- names(coef(Sunk.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Sunk_result<-tab_model(Sunk.PAN, Sunk.PAN_FACE, Sunk.PAN_Pred_FC.reg_AE,Sunk.PAN_Pred_FC.net_AE,Sunk.PAN_Pred_FC.rf_AE,Sunk.PAN_Pred_FC.gbm_AE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred_FC(reg)_AE", 
                        "PAN_Pred_FC(net)_AE", 
                        "PAN_Pred_FC(rf)_AE", 
                        "PAN_Pred_FC(gbm)_AE"), title = "Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Sunk_result
Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred_FC(reg)_AE PAN_Pred_FC(net)_AE PAN_Pred_FC(rf)_AE PAN_Pred_FC(gbm)_AE
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
(Intercept) 5.51 0.36 15.18 <0.001 5.51 0.36 15.31 <0.001 5.50 0.41 13.46 <0.001 5.51 0.42 13.07 <0.001 5.64 0.41 13.80 <0.001 5.54 0.40 13.80 <0.001
SunkCondition [PAID] 0.14 0.54 0.26 0.792 0.06 0.53 0.12 0.906 -0.17 0.59 -0.29 0.771 -0.17 0.60 -0.28 0.783 0.02 0.59 0.04 0.969 0.01 0.58 0.02 0.985
F 0.60 0.49 1.21 0.226
A 0.22 0.24 0.93 0.355 0.08 0.25 0.32 0.749 0.12 0.25 0.45 0.650 0.10 0.26 0.39 0.695 0.15 0.25 0.59 0.557
C -1.07 0.42 -2.52 0.012
E 0.07 0.24 0.29 0.771 -0.02 0.20 -0.11 0.913 -0.03 0.20 -0.13 0.894 -0.09 0.20 -0.45 0.656 -0.04 0.21 -0.17 0.863
SunkCondition [PAID] × F 0.53 0.73 0.72 0.469
SunkCondition [PAID] × A -0.50 0.34 -1.46 0.144 -0.22 0.35 -0.63 0.528 -0.24 0.35 -0.68 0.499 -0.30 0.36 -0.83 0.409 -0.33 0.35 -0.93 0.351
SunkCondition [PAID] × C 0.05 0.60 0.08 0.937
SunkCondition [PAID] × E 0.05 0.33 0.15 0.883 -0.07 0.28 -0.24 0.811 -0.04 0.28 -0.16 0.874 -0.00 0.28 -0.01 0.992 -0.05 0.29 -0.16 0.870
Predicted F -0.04 0.45 -0.09 0.925 0.02 0.52 0.03 0.976 -0.51 0.48 -1.06 0.289 0.01 0.51 0.02 0.986
Predicted C 0.17 0.62 0.27 0.787 -0.02 0.76 -0.02 0.981 0.58 0.74 0.78 0.435 -0.13 0.62 -0.21 0.832
SunkCondition [PAID] ×
Predicted F
0.98 0.66 1.47 0.143 1.00 0.76 1.33 0.186 0.41 0.69 0.60 0.550 0.31 0.73 0.43 0.669
SunkCondition [PAID] ×
Predicted C
-1.68 0.89 -1.90 0.059 -1.77 1.08 -1.64 0.102 -0.46 1.09 -0.42 0.676 -0.51 0.91 -0.56 0.575
Observations 292 292 292 292 292 292
R2 / R2 adjusted 0.040 / 0.023 0.086 / 0.043 0.063 / 0.019 0.062 / 0.018 0.048 / 0.003 0.047 / 0.003
Anova and Eta Square
calculate_variance_explained_Sunk <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for SunkCondition, Panel, and SunkCondition:Panel interaction
  ss_condition <- anova_res["SunkCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["SunkCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["SunkCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["SunkCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- (ss_condition / ss_total)
  perc_panel <- (ss_panel / ss_total)
  perc_interaction <- (ss_interaction / ss_total)
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}


variance_explained_lm <- sapply(Sunk_model, calculate_variance_explained_Sunk)

# Convert to a data frame for easy viewing
variance_explained_lm_df.SunkCost <- as.data.frame(t(variance_explained_lm))

colnames(variance_explained_lm_df.SunkCost) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.SunkCost,caption = "Etasq Table: SunkCost Paradigm")
Etasq Table: SunkCost Paradigm
Condition Panel LessMoreCondition:Panel
Sunk.PAN 0.00467 0.02588* 0.00922
Sunk.PAN_FACE 0.00467 0.01253 0.0039
Sunk.PAN_Pred_FC.reg_ACE 0.00467 0.01676. 0.01312
Sunk.PAN_Pred_FC.net_ACE 0.00467 0.01768. 0.01463
Sunk.PAN_Pred_FC.rf_ACE 0.00467 0.0229* 0.00722
Sunk.PAN_Pred_FC.gbm_ACE 0.00467 0.01779. 0.00956

Summary

Model Fit
  • \(R^2\): \(R^2\) for linear regressions, and \(1 − \frac{\log L(\text{model})}{\log L(\text{null model})}\) for logistic regressions (as in the paper)

  • Model order: consistently (1) PAN only; (2) PAN + FACE; (3) PAN + Pred_C(reg)ACE; (4) PAN + Pred_C(net)ACE; (5) PAN + Pred_C(rf)ACE; (6) PAN + Pred_C(gbm)ACE

  • note that the y-axes for different paradigms are not aligned.

#following antonia's analysis here...
Default.null<-glm(numDefault~1,data.tst_M5a.gbm.FC,family=binomial)

R2.Default = as.data.frame(cbind( "R2" = c((1-logLik(Default.PAN)/logLik(Default.null)),
                    (1-logLik(Default.PAN_FACE)/logLik(Default.null)),  
                    (1-logLik(Default.PAN_Pred_FC.reg_AE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_FC.net_AE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_FC.rf_AE)/logLik(Default.null)),
                    (1-logLik(Default.PAN_Pred_FC.gbm_AE)/logLik(Default.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "Default"))


Disease.null<-glm(numDisease~1,data.tst_M5a.gbm.FC,family=binomial)
R2.Disease = as.data.frame(cbind( "R2" = c((1-logLik(Disease.PAN)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_FACE)/logLik(Disease.null)),  
                    (1-logLik(Disease.PAN_Pred_FC.reg_AE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_FC.net_AE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_FC.rf_AE)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_Pred_FC.gbm_AE)/logLik(Disease.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "Disease"))

R2.LessMore = as.data.frame(cbind( "R2" = c(summary(LessMore.PAN)$r.squared,
                     summary(LessMore.PAN_FACE)$r.squared,  
                     summary(LessMore.PAN_Pred_FC.reg_AE)$r.squared,  
                     summary(LessMore.PAN_Pred_FC.net_AE)$r.squared, 
                     summary(LessMore.PAN_Pred_FC.rf_AE)$r.squared,
                     summary(LessMore.PAN_Pred_FC.gbm_AE)$r.squared),
                      "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "LessMore"))

R2.Sunk = as.data.frame(cbind( "R2" = c(summary(Sunk.PAN)$r.squared,
                     summary(Sunk.PAN_FACE)$r.squared,  
                     summary(Sunk.PAN_Pred_FC.reg_AE)$r.squared,  
                     summary(Sunk.PAN_Pred_FC.net_AE)$r.squared, 
                     summary(Sunk.PAN_Pred_FC.rf_AE)$r.squared,
                     summary(Sunk.PAN_Pred_FC.gbm_AE)$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "SunkCost"))

R2<-rbind(R2.Default,R2.Disease,R2.LessMore,R2.Sunk)%>%
  mutate(R2=round(as.numeric(R2),5))

R2$Model <- factor(R2$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+PFC(reg)+AE",  
                                   "Pan+PFC(net)+AE", 
                                   "Pan+PFC(rf)+AE",
                                   "Pan+PFC(gbm)+AE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(R2, aes(x = Model, y = R2, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(R^2)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

AIC.Default = as.data.frame(cbind( "AIC" = c(AIC(Default.PAN),
                    AIC(Default.PAN_FACE),  
                    AIC(Default.PAN_Pred_FC.reg_AE), 
                    AIC(Default.PAN_Pred_FC.net_AE), 
                    AIC(Default.PAN_Pred_FC.rf_AE),
                    AIC(Default.PAN_Pred_FC.gbm_AE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "Default"))



AIC.Disease = as.data.frame(cbind( "AIC" = c(AIC(Disease.PAN),
                    AIC(Disease.PAN_FACE),  
                    AIC(Disease.PAN_Pred_FC.reg_AE), 
                    AIC(Disease.PAN_Pred_FC.net_AE), 
                    AIC(Disease.PAN_Pred_FC.rf_AE),
                    AIC(Disease.PAN_Pred_FC.gbm_AE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "Disease"))

AIC<-rbind(AIC.Default,AIC.Disease)%>%
  mutate(AIC=round(as.numeric(AIC),5))

AIC$Model <- factor(AIC$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+PFC(reg)+AE",  
                                   "Pan+PFC(net)+AE", 
                                   "Pan+PFC(rf)+AE",
                                   "Pan+PFC(gbm)+AE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(AIC, aes(x = Model, y = AIC, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  geom_text(aes(label = round(AIC,2)), vjust = -0.5, size = 3, fontface = "bold") + 
  theme_bw() +
  ylab(bquote(AIC)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")+
  coord_cartesian(ylim = c(300, 400))

R2$R2 <- round(as.numeric(R2$R2), 3)

R2 <- R2 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement <- R2 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))


avg_improvemnt_combined <- rbind(avg_improvement, avg_improvement_full_study_5)

kable(avg_improvemnt_combined, caption = "Percentage Improvement in R2 (compared to PAN) Across Paradigms with study 5 test data", digits = 2)
Percentage Improvement in R2 (compared to PAN) Across Paradigms with study 5 test data
Model Avg_Improvement
Pan 0.00
Pan+FACE 59.96
Pan+PFC(reg)+AE 47.45
Pan+PFC(net)+AE 44.44
Pan+PFC(rf)+AE 37.39
Pan+PFC(gbm)+AE 36.59
Pan+FACE (Full Study 5) 43.20
Etasq
  • note that the y-axes for different paradigms are not aligned..
# label paradigm
deviance_explained_df.Default$Paradigm <- "Default"
deviance_explained_df.Default.Study5$Paradigm <- "Default"

deviance_explained_df.Disease$Paradigm <- "Disease"
deviance_explained_df.Disease.Study5$Paradigm <- "Disease"

variance_explained_lm_df.LessMore$Paradigm <- "LessMore"
variance_explained_lm_df.LessMore.Study5$Paradigm <- "LessMore"

variance_explained_lm_df.SunkCost$Paradigm <- "SunkCost"
variance_explained_lm_df.SunkCost.Study5$Paradigm <- "SunkCost"


Model <- c("Pan", "Pan+FACE", "Pan+Pred.FC(reg)+AE", "Pan+Pred.FC(net)+AE", "Pan+Pred.FC(rf)+AE", "Pan+Pred.FC(gbm)+AE")
Model.Study5 <- c("Pan (Full Study5)", "Pan+FACE (Full Study5)")
deviance_explained_df.Default$Model<-Model
deviance_explained_df.Disease$Model<-Model
variance_explained_lm_df.LessMore$Model<-Model
variance_explained_lm_df.SunkCost$Model<-Model

deviance_explained_df.Default.Study5$Model<-Model.Study5
deviance_explained_df.Disease.Study5$Model<-Model.Study5
variance_explained_lm_df.LessMore.Study5$Model<-Model.Study5
variance_explained_lm_df.SunkCost.Study5$Model<-Model.Study5

colnames(deviance_explained_df.Default)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost)[3] <- "Condition:Panel"

colnames(deviance_explained_df.Default.Study5)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost.Study5)[3] <- "Condition:Panel"


etasq <- rbind(deviance_explained_df.Default, 
               deviance_explained_df.Default.Study5,
               deviance_explained_df.Disease, 
               deviance_explained_df.Disease.Study5,
               variance_explained_lm_df.LessMore, 
               variance_explained_lm_df.LessMore.Study5,
               variance_explained_lm_df.SunkCost,
               variance_explained_lm_df.SunkCost.Study5)

etasq$Model <- factor(etasq$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.FC(reg)+AE",  
                                   "Pan+Pred.FC(net)+AE", 
                                   "Pan+Pred.FC(rf)+AE",
                                   "Pan+Pred.FC(gbm)+AE",
                                   "Pan (Full Study5)", 
                                   "Pan+FACE (Full Study5)"),
                        ordered = TRUE)

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$Panel)  # Extract the significance symbols
  df$Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$Panel))
  return(df)
}

etasq.pan<-split_value_and_sig(etasq)

ggplot(etasq.pan, aes(x = Model, y = Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$`Condition:Panel`)  # Extract the significance symbols
  df$Condition_Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$`Condition:Panel`))
  return(df)
}

etasq.pan_cond<-split_value_and_sig(etasq)

ggplot(etasq.pan_cond, aes(x = Model, y = Condition_Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel*cond") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

# junk data??? really unstable;; perhaps to look at this meaningfully, we need 20% of the data to equally come from each of the 3 panels.
 # table(data.tst_M5a$Panel)
 #  it looks fine though
 #   Lucid    MTurk Prolific 
 #     100      101       91 


etasq_study5 <- etasq.pan_cond %>%
  filter(Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))

etasq_study5 <- etasq_study5 %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan (Full Study5)"] - Condition_Panel) / Condition_Panel[Model == "Pan (Full Study5)"] * 100)


etasq_study5.test <- etasq.pan_cond %>%
  filter(!Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))
etasq_study5.test <- etasq_study5.test %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan"] - Condition_Panel) / Condition_Panel[Model == "Pan"] * 100)

etasq.pan_cond<-rbind(etasq_study5,etasq_study5.test)

avg_reduction <- etasq.pan_cond %>%
  group_by(Model) %>%
  summarize(Avg_Reduction = mean(Reduction, na.rm = TRUE))

  
kable(avg_reduction, caption = "Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in study 5 test sample", digits = 2)
Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in study 5 test sample
Model Avg_Reduction
Pan 0.00
Pan+FACE 75.56
Pan+Pred.FC(reg)+AE 56.52
Pan+Pred.FC(net)+AE 44.49
Pan+Pred.FC(rf)+AE 63.57
Pan+Pred.FC(gbm)+AE 61.55
Pan (Full Study5) 0.00
Pan+FACE (Full Study5) 74.19
F/C*Cond Model Coefficients
  • DVs for Default & Framing were 1/0
  • DVs for LessMore & SunkCost are on original scale
Default.PAN_FACE.Study5<-glm(numDefault~F*DefaultCondition+A*DefaultCondition+C*DefaultCondition+E*DefaultCondition+DefaultCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.PAN_FACE.Study5<-glm(numDisease~F*DiseaseCondition+A*DiseaseCondition+C*DiseaseCondition+E*DiseaseCondition+DiseaseCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
LessMore.PAN_FACE.Study5<-lm(numLessMore~F*LessMoreCondition+A*LessMoreCondition+C*LessMoreCondition+E*LessMoreCondition+LessMoreCondition*Panel,Data_M5a.log_z_dummy_coded)
Sunk.PAN_FACE.Study5<-lm(numSunkCost~F*SunkCondition+A*SunkCondition+C*SunkCondition+E*SunkCondition+SunkCondition*Panel,Data_M5a.log_z_dummy_coded)

AlllCoef = as.data.frame(rbind(
cbind(coef(Default.PAN_FACE),confint(Default.PAN_FACE)),
cbind(coef(Default.PAN_Pred_FC.reg_AE),confint(Default.PAN_Pred_FC.reg_AE)),
cbind(coef(Default.PAN_Pred_FC.net_AE),confint(Default.PAN_Pred_FC.net_AE)),
cbind(coef(Default.PAN_Pred_FC.rf_AE),confint(Default.PAN_Pred_FC.rf_AE)),
cbind(coef(Default.PAN_Pred_FC.gbm_AE),confint(Default.PAN_Pred_FC.gbm_AE)),
cbind(coef(Default.PAN_FACE.Study5),confint(Default.PAN_FACE.Study5)),

cbind(coef(Disease.PAN_FACE),confint(Disease.PAN_FACE)),
cbind(coef(Disease.PAN_Pred_FC.reg_AE),confint(Disease.PAN_Pred_FC.reg_AE)),
cbind(coef(Disease.PAN_Pred_FC.net_AE),confint(Disease.PAN_Pred_FC.net_AE)),
cbind(coef(Disease.PAN_Pred_FC.rf_AE),confint(Disease.PAN_Pred_FC.rf_AE)),
cbind(coef(Disease.PAN_Pred_FC.gbm_AE),confint(Disease.PAN_Pred_FC.gbm_AE)),
cbind(coef(Disease.PAN_FACE.Study5),confint(Disease.PAN_FACE.Study5)),

cbind(coef(LessMore.PAN_FACE),confint(LessMore.PAN_FACE)),
cbind(coef(LessMore.PAN_Pred_FC.reg_AE),confint(LessMore.PAN_Pred_FC.reg_AE)),
cbind(coef(LessMore.PAN_Pred_FC.net_AE),confint(LessMore.PAN_Pred_FC.net_AE)),
cbind(coef(LessMore.PAN_Pred_FC.rf_AE),confint(LessMore.PAN_Pred_FC.rf_AE)),
cbind(coef(LessMore.PAN_Pred_FC.gbm_AE),confint(LessMore.PAN_Pred_FC.gbm_AE)),
cbind(coef(LessMore.PAN_FACE.Study5),confint(LessMore.PAN_FACE.Study5)),

cbind(coef(Sunk.PAN_FACE),confint(Sunk.PAN_FACE)),
cbind(coef(Sunk.PAN_Pred_FC.reg_AE),confint(Sunk.PAN_Pred_FC.reg_AE)),
cbind(coef(Sunk.PAN_Pred_FC.net_AE),confint(Sunk.PAN_Pred_FC.net_AE)),
cbind(coef(Sunk.PAN_Pred_FC.rf_AE),confint(Sunk.PAN_Pred_FC.rf_AE)),
cbind(coef(Sunk.PAN_Pred_FC.gbm_AE),confint(Sunk.PAN_Pred_FC.gbm_AE)),
cbind(coef(Sunk.PAN_FACE.Study5),confint(Sunk.PAN_FACE.Study5))))

AlllCoef <- AlllCoef[!grepl("Panel", rownames(AlllCoef)), ]

AlllCoef$type = factor(rep(c("Intercept", " Condition(Treat)","F", "A", "C", "E","F: Condition(Treat)", "A: Condition(Treat)", "C: Condition(Treat)", "E: Condition(Treat)"), 24),levels = c("Intercept", " Condition(Treat)","F", "A", "C", "E","F: Condition(Treat)", "A: Condition(Treat)", "C: Condition(Treat)", "E: Condition(Treat)"))

AlllCoef$Paradigm = factor((rep(c("Default","Framing","Less is Better","Sunk Cost"), each=60)),levels = rev(c("Default","Framing","Less is Better","Sunk Cost")))

AlllCoef$Model = factor(rep(rep(c("FACE","PC(reg) + ACE","PC(net) + ACE","PC(rf) + ACE","PC(gbm) + ACE","FACE (Full Study 5)"), each = 10),4), levels = rev(c("FACE","PC(reg) + ACE","PC(net) + ACE","PC(rf) + ACE","PC(gbm) + ACE","FACE (Full Study 5)")))

colnames(AlllCoef)[1:3] = c("Beta","low", "high")

allcoefplot = ggplot(AlllCoef[(AlllCoef$type %in% c("F: Condition(Treat)","C: Condition(Treat)")),], aes( y = Beta, ymin= low, ymax= high, x = Paradigm, fill = Model, color = Model))+
  geom_bar(stat = "identity", position = "dodge",alpha= .25)+
  geom_pointrange(position = position_dodge(width = .8))+
  coord_flip()+
  xlab("")+
  theme_bw()+
  theme(strip.background = element_blank(), text = element_text(size = 12)) +
  facet_grid(~type)

allcoefplot

3.1 Evaluate Accuracy with Study 5 FULL Data

data.full_M5a.reg.FC<-cbind(data.full_M5a.reg.C, Predicted_F = pred_full_M5a_reg)
data.full_M5a.net.FC<-cbind(data.full_M5a.net.C, Predicted_F = pred_full_M5a_net)
data.full_M5a.rf.FC<-cbind(data.full_M5a.rf.C, Predicted_F = pred_full_M5a_rf)
data.full_M5a.gbm.FC<-cbind(data.full_M5a.gbm.C, Predicted_F = pred_full_M5a_gbm)
data.full_M5a.gbm.FC$rotsum<-data.full_M5a.gbm$rotsum
data.full_M5a.gbm.FC$MXsum<-data.full_M5a.gbm$MXsum
data.full_M5a.gbm.FC$crt2_score<-data.full_M5a.gbm$crt2_score

Correlations Btw PF5, PC5, and Study 5 FACE

Linear Regression

ggpairs(data.full_M5a.reg.FC, c("Predicted_F","F","A","C", "E","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Elastic Net

ggpairs(data.full_M5a.net.FC, c("Predicted_F","F","A","C", "E","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Random Forest

ggpairs(data.full_M5a.rf.FC, c("Predicted_F","F","A","C", "E","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Gradient Boosting

ggpairs(data.full_M5a.gbm.FC, c("Predicted_F","F","A","C", "E","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

GBM: Correlation between PF, PC, and F and C’s composite measures

ggpairs(data.full_M5a.gbm.FC, c("Predicted_F","ATTN1__Correct","crt2_score", "rotsum", "MXsum","Antonym","Synonym","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Explain Heterogeniety with PF5 and PC5

Default Paradigm

Default.PAN <- glm(numDefault ~ DefaultCondition * Panel, data = data.full_M5a.gbm.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_FACE <- glm(numDefault ~ DefaultCondition * F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel , data = data.full_M5a.gbm.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_FC.reg_AE <- glm(numDefault ~ DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.full_M5a.reg.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_FC.net_AE <- glm(numDefault ~  DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.full_M5a.net.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_FC.rf_AE <- glm(numDefault ~   DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.full_M5a.rf.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_FC.gbm_AE <- glm(numDefault ~   DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.full_M5a.gbm.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)


Default_model <- list(
  "Default.PAN" = Default.PAN,
  "Default.PAN_FACE" = Default.PAN_FACE,
  "Default.PAN_Pred_FC.reg_AE" = Default.PAN_Pred_FC.reg_AE,
  "Default.PAN_Pred_FC.net_AE" = Default.PAN_Pred_FC.net_AE,
  "Default.PAN_Pred_FC.rf_AE" = Default.PAN_Pred_FC.rf_AE,
  "Default.PAN_Pred_FC.gbm_AE" = Default.PAN_Pred_FC.gbm_AE
)
Model Results
term_names <- names(coef(Default.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Default_result<-tab_model(Default.PAN, Default.PAN_FACE, Default.PAN_Pred_FC.reg_AE,Default.PAN_Pred_FC.net_AE,Default.PAN_Pred_FC.rf_AE,Default.PAN_Pred_FC.gbm_AE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.FC(reg)_AE", 
                        "PAN_Pred.FC(net)_AE", 
                        "PAN_Pred.FC(rf)_AE", 
                        "PAN_Pred.FC(gbm)_AE"), title = "Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Default_result
Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.FC(reg)_AE PAN_Pred.FC(net)_AE PAN_Pred.FC(rf)_AE PAN_Pred.FC(gbm)_AE
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
(Intercept) 1.05 0.13 0.38 0.703 1.05 0.14 0.38 0.702 0.97 0.13 -0.23 0.818 0.94 0.13 -0.42 0.678 1.02 0.14 0.11 0.909 1.00 0.14 0.02 0.985
DefaultCondition [OPTOUT] 1.74 0.32 3.01 0.003 1.76 0.33 3.02 0.003 1.94 0.39 3.27 0.001 1.99 0.41 3.33 0.001 1.77 0.34 2.98 0.003 1.82 0.36 3.04 0.002
F 1.02 0.19 0.13 0.894
A 0.95 0.08 -0.54 0.590 0.91 0.08 -1.13 0.259 0.92 0.08 -0.99 0.323 0.92 0.08 -0.96 0.335 0.92 0.08 -1.02 0.308
C 1.72 0.25 3.78 <0.001
E 1.04 0.09 0.40 0.686 1.06 0.08 0.79 0.428 1.05 0.08 0.62 0.533 1.04 0.08 0.46 0.648 1.05 0.08 0.68 0.496
DefaultCondition [OPTOUT]
× F
1.25 0.34 0.83 0.408
DefaultCondition [OPTOUT]
× A
0.84 0.11 -1.32 0.188 0.96 0.13 -0.27 0.785 0.97 0.13 -0.25 0.806 0.89 0.12 -0.87 0.385 0.93 0.12 -0.54 0.586
DefaultCondition [OPTOUT]
× C
0.62 0.13 -2.29 0.022
DefaultCondition [OPTOUT]
× E
1.09 0.14 0.68 0.499 0.96 0.11 -0.40 0.689 0.96 0.11 -0.37 0.711 1.02 0.12 0.20 0.841 0.98 0.11 -0.15 0.884
Predicted F 1.08 0.18 0.44 0.657 1.07 0.20 0.38 0.700 0.93 0.17 -0.37 0.714 0.96 0.17 -0.22 0.822
Predicted C 2.00 0.43 3.19 0.001 2.02 0.52 2.75 0.006 2.46 0.49 4.54 <0.001 2.49 0.56 4.08 <0.001
DefaultCondition [OPTOUT]
× Predicted F
0.87 0.22 -0.55 0.580 0.92 0.26 -0.31 0.758 1.10 0.30 0.33 0.739 1.04 0.28 0.16 0.873
DefaultCondition [OPTOUT]
× Predicted C
0.52 0.16 -2.08 0.037 0.44 0.17 -2.17 0.030 0.51 0.15 -2.33 0.020 0.43 0.14 -2.60 0.009
Observations 1460 1460 1460 1460 1460 1460
AIC 1884.213 1858.463 1869.271 1873.485 1858.182 1863.784
log-Likelihood -936.107 -915.232 -920.635 -922.742 -915.091 -917.892
Anova and Eta Square
calculate_deviance_explained.Default <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for Panel and DefaultCondition:Panel interaction
  dev_cond <- anova_res["DefaultCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DefaultCondition:Panel", "Deviance"]
  
  p_cond <- anova_res["DefaultCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DefaultCondition:Panel", "Pr(>Chi)"]
  
sig_cond <- ifelse(p_cond < 0.001, "***", 
             ifelse(p_cond < 0.01, "**", 
             ifelse(p_cond < 0.05, "*", 
             ifelse(p_cond < 0.1, ".", ""))))

sig_panel <- ifelse(p_panel < 0.001, "***", 
              ifelse(p_panel < 0.01, "**", 
              ifelse(p_panel < 0.05, "*", 
              ifelse(p_panel < 0.1, ".", ""))))

sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                   ifelse(p_interaction < 0.01, "**", 
                   ifelse(p_interaction < 0.05, "*", 
                   ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total) 
  perc_panel <- (dev_panel / dev_total) 
  perc_interaction <- (dev_interaction / dev_total) 
  
  res_cond <- paste0(round(perc_cond , 5),  sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}

# apply the fucntion to all the models in default paradigm
deviance_explained <- sapply(Default_model, calculate_deviance_explained.Default)

# Convert to a data frame for easy viewing
deviance_explained_df.Default <- as.data.frame(t(deviance_explained)) # view how this looks like
colnames(deviance_explained_df.Default) <- c("Condition","Panel", "Default Condition:Panel")

# display
kable(deviance_explained_df.Default,caption = "Etasq Table: Default Paradigm")
Etasq Table: Default Paradigm
Condition Panel Default Condition:Panel
Default.PAN 0.03801*** 0.00298. 0.01188***
Default.PAN_FACE 0.03801*** 0.00051 0.00159
Default.PAN_Pred_FC.reg_AE 0.03801*** 3e-04 0.00357*
Default.PAN_Pred_FC.net_AE 0.03801*** 0.00043 0.00299.
Default.PAN_Pred_FC.rf_AE 0.03801*** 1e-05 0.0019
Default.PAN_Pred_FC.gbm_AE 0.03801*** 0 0.0024.

Framing (Unusual Disease) Paradigm

Disease.PAN <- glm(numDisease ~ DiseaseCondition * Panel, data = data.full_M5a.gbm.FC, family = binomial)
Disease.PAN_FACE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E+DiseaseCondition * Panel , data = data.full_M5a.gbm.FC, family = binomial)

Disease.PAN_Pred_FC.reg_AE <- glm(numDisease ~  DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.full_M5a.reg.FC, family = binomial)

Disease.PAN_Pred_FC.net_AE <- glm(numDisease ~   DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.full_M5a.net.FC, family = binomial)

Disease.PAN_Pred_FC.rf_AE <- glm(numDisease ~   DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.full_M5a.rf.FC, family = binomial)

Disease.PAN_Pred_FC.gbm_AE <- glm(numDisease ~  DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel , data = data.full_M5a.gbm.FC, family = binomial)


Disease_model <- list(
  "Disease.PAN" = Disease.PAN,
  "Disease.PAN_FACE" = Disease.PAN_FACE,
  "Disease.PAN_Pred_FC.reg_AE" = Disease.PAN_Pred_FC.reg_AE,
  "Disease.PAN_Pred_FC.net_AE" = Disease.PAN_Pred_FC.net_AE,
  "Disease.PAN_Pred_FC.rf_AE" = Disease.PAN_Pred_FC.rf_AE, 
  "Disease.PAN_Pred_FC.gbm_AE" = Disease.PAN_Pred_FC.gbm_AE
)

# extract_aic(Disease_model)
Model Results
term_names <- names(coef(Disease.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Disease_result<-tab_model(Disease.PAN, Disease.PAN_FACE, Disease.PAN_Pred_FC.reg_AE,Disease.PAN_Pred_FC.net_AE,Disease.PAN_Pred_FC.rf_AE,Disease.PAN_Pred_FC.gbm_AE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.FC(reg)_AE", 
                        "PAN_Pred.FC(net)_AE", 
                        "PAN_Pred.FC(rf)_AE", 
                        "PAN_Pred.FC(gbm)_AE"), title = "Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Disease_result
Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.FC(reg)_AE PAN_Pred.FC(net)_AE PAN_Pred.FC(rf)_AE PAN_Pred.FC(gbm)_AE
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
(Intercept) 0.45 0.06 -5.84 <0.001 0.43 0.06 -6.03 <0.001 0.46 0.07 -5.18 <0.001 0.46 0.07 -5.10 <0.001 0.44 0.06 -5.66 <0.001 0.45 0.07 -5.51 <0.001
DiseaseCondition [LOSS] 3.40 0.64 6.46 <0.001 3.69 0.72 6.73 <0.001 3.18 0.66 5.55 <0.001 3.14 0.67 5.38 <0.001 3.42 0.68 6.17 <0.001 3.26 0.66 5.81 <0.001
F 1.07 0.23 0.31 0.755
A 1.40 0.14 3.34 0.001 1.45 0.15 3.59 <0.001 1.48 0.15 3.77 <0.001 1.44 0.15 3.52 <0.001 1.45 0.15 3.61 <0.001
C 0.73 0.12 -1.83 0.067
E 0.89 0.09 -1.14 0.256 0.86 0.08 -1.75 0.080 0.85 0.08 -1.81 0.070 0.88 0.08 -1.37 0.170 0.87 0.08 -1.56 0.118
DiseaseCondition [LOSS] ×
F
1.21 0.35 0.65 0.513
DiseaseCondition [LOSS] ×
A
1.03 0.14 0.23 0.820 0.98 0.14 -0.14 0.887 0.96 0.14 -0.32 0.750 0.98 0.14 -0.13 0.897 0.96 0.13 -0.28 0.782
DiseaseCondition [LOSS] ×
C
1.26 0.28 1.06 0.290
DiseaseCondition [LOSS] ×
E
1.04 0.14 0.27 0.791 1.06 0.13 0.50 0.616 1.07 0.13 0.58 0.563 1.06 0.13 0.42 0.671 1.07 0.13 0.56 0.572
Predicted F 0.99 0.19 -0.08 0.939 1.09 0.23 0.41 0.684 1.06 0.23 0.25 0.803 1.14 0.23 0.64 0.524
Predicted C 0.68 0.16 -1.62 0.104 0.53 0.15 -2.18 0.029 0.65 0.15 -1.93 0.054 0.56 0.14 -2.37 0.018
DiseaseCondition [LOSS] ×
Predicted F
1.30 0.35 0.99 0.322 1.19 0.35 0.60 0.546 1.27 0.38 0.80 0.422 1.31 0.37 0.95 0.340
DiseaseCondition [LOSS] ×
Predicted C
1.52 0.51 1.25 0.213 1.98 0.79 1.71 0.087 1.57 0.48 1.48 0.138 1.73 0.59 1.61 0.108
Observations 1460 1460 1460 1460 1460 1460
AIC 1762.505 1721.769 1720.009 1717.034 1719.467 1714.675
log-Likelihood -875.252 -846.884 -846.005 -844.517 -845.734 -843.338
Anova and Eta Square
calculate_deviance_explained.Disease <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for DiseaseCondition, Panel, and DiseaseCondition:Panel interaction
  dev_cond <- anova_res["DiseaseCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DiseaseCondition:Panel", "Deviance"]
  
  # Extract p-values
  p_cond <- anova_res["DiseaseCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DiseaseCondition:Panel", "Pr(>Chi)"]
  
  # Determine significance levels
  sig_cond <- ifelse(p_cond < 0.001, "***", 
               ifelse(p_cond < 0.01, "**", 
               ifelse(p_cond < 0.05, "*", 
               ifelse(p_cond < 0.1, ".", ""))))

  sig_panel <- ifelse(p_panel < 0.001, "***", 
                ifelse(p_panel < 0.01, "**", 
                ifelse(p_panel < 0.05, "*", 
                ifelse(p_panel < 0.1, ".", ""))))

  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total)
  perc_panel <- (dev_panel / dev_total)
  perc_interaction <- (dev_interaction / dev_total)
  
  # Combine percentage and significance
  res_cond <- paste0(round(perc_cond, 5), sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}


# Apply the function to each model
deviance_explained <- sapply(Disease_model, calculate_deviance_explained.Disease)

# Convert to a data frame for easy viewing
deviance_explained_df.Disease <- as.data.frame(t(deviance_explained))
colnames(deviance_explained_df.Disease) <- c("Condition","Panel", "DiseaseCondition:Panel")

# Display the results
kable(deviance_explained_df.Disease,caption = "Etasq Table: Disease Paradigm")
Etasq Table: Disease Paradigm
Condition Panel DiseaseCondition:Panel
Disease.PAN 0.05317*** 0.03246*** 0.01394***
Disease.PAN_FACE 0.05317*** 0.00514** 0.00395*
Disease.PAN_Pred_FC.reg_AE 0.05317*** 0.00541** 0.0039*
Disease.PAN_Pred_FC.net_AE 0.05317*** 0.00569** 0.00341*
Disease.PAN_Pred_FC.rf_AE 0.05317*** 0.00557** 0.00329*
Disease.PAN_Pred_FC.gbm_AE 0.05317*** 0.00697** 0.00304.

Less is More Paradigm

LessMore.PAN <- lm(numLessMore ~ LessMoreCondition * Panel, data = data.full_M5a.gbm.FC)
LessMore.PAN_FACE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.full_M5a.gbm.FC)
LessMore.PAN_Pred_FC.reg_AE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.full_M5a.reg.FC)
LessMore.PAN_Pred_FC.net_AE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.full_M5a.net.FC)
LessMore.PAN_Pred_FC.rf_AE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.full_M5a.rf.FC)
LessMore.PAN_Pred_FC.gbm_AE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.full_M5a.gbm.FC)


LessMore_model <- list(
  "LessMore.PAN" = LessMore.PAN,
  "LessMore.PAN_FACE" = LessMore.PAN_FACE,
  "LessMore.PAN_Pred_FC.reg_AE" = LessMore.PAN_Pred_FC.reg_AE,
  "LessMore.PAN_Pred_FC.net_AE" = LessMore.PAN_Pred_FC.net_AE,
  "LessMore.PAN_Pred_FC.rf_AE" = LessMore.PAN_Pred_FC.rf_AE, 
  "LessMore.PAN_Pred_FC.gbm_AE" = LessMore.PAN_Pred_FC.gbm_AE
)
Model Results
term_names <- names(coef(LessMore.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

LessMore_result<-tab_model(LessMore.PAN, LessMore.PAN_FACE, LessMore.PAN_Pred_FC.reg_AE,LessMore.PAN_Pred_FC.net_AE,LessMore.PAN_Pred_FC.rf_AE,LessMore.PAN_Pred_FC.gbm_AE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.FC(reg)_AE", 
                        "PAN_Pred.FC(net)_AE", 
                        "PAN_Pred.FC(rf)_AE", 
                        "PAN_Pred.FC(gbm)_AE"), title = "LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
LessMore_result
LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.FC(reg)_AE PAN_Pred.FC(net)_AE PAN_Pred.FC(rf)_AE PAN_Pred.FC(gbm)_AE
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
(Intercept) 5.20 0.09 59.66 <0.001 5.22 0.09 61.17 <0.001 5.24 0.09 57.25 <0.001 5.22 0.09 55.93 <0.001 5.20 0.09 59.60 <0.001 5.24 0.09 59.05 <0.001
LessMoreCondition [SCARF] 1.11 0.12 9.00 <0.001 1.10 0.12 9.13 <0.001 1.06 0.13 8.26 <0.001 1.08 0.13 8.20 <0.001 1.06 0.12 8.57 <0.001 1.03 0.13 8.26 <0.001
F 0.13 0.12 1.12 0.265
A -0.13 0.06 -2.23 0.026 -0.15 0.06 -2.47 0.014 -0.14 0.06 -2.39 0.017 -0.13 0.06 -2.24 0.025 -0.14 0.06 -2.45 0.014
C 0.01 0.09 0.08 0.934
E -0.01 0.06 -0.10 0.919 -0.03 0.05 -0.66 0.508 -0.03 0.05 -0.68 0.499 -0.02 0.05 -0.41 0.685 -0.04 0.05 -0.74 0.459
LessMoreCondition [SCARF]
× F
-0.02 0.17 -0.13 0.899
LessMoreCondition [SCARF]
× A
0.39 0.08 4.82 <0.001 0.41 0.08 5.08 <0.001 0.40 0.08 4.89 <0.001 0.35 0.08 4.36 <0.001 0.38 0.08 4.69 <0.001
LessMoreCondition [SCARF]
× C
0.23 0.13 1.74 0.082
LessMoreCondition [SCARF]
× E
0.01 0.08 0.08 0.940 0.02 0.07 0.22 0.823 0.02 0.07 0.26 0.792 0.03 0.07 0.44 0.659 0.03 0.07 0.42 0.675
Predicted F -0.17 0.11 -1.55 0.122 -0.12 0.12 -1.01 0.313 0.08 0.12 0.66 0.511 -0.14 0.12 -1.24 0.216
Predicted C 0.42 0.14 3.05 0.002 0.36 0.16 2.23 0.026 0.08 0.13 0.65 0.514 0.41 0.14 2.92 0.004
LessMoreCondition [SCARF]
× Predicted F
0.09 0.16 0.60 0.550 0.01 0.17 0.09 0.931 0.05 0.17 0.29 0.773 0.11 0.16 0.66 0.509
LessMoreCondition [SCARF]
× Predicted C
0.02 0.20 0.08 0.938 0.18 0.23 0.79 0.431 0.31 0.18 1.71 0.087 0.14 0.20 0.69 0.488
Observations 1460 1460 1460 1460 1460 1460
R2 / R2 adjusted 0.204 / 0.201 0.248 / 0.241 0.247 / 0.241 0.246 / 0.239 0.251 / 0.245 0.253 / 0.247
Anova and Eta Square
calculate_variance_explained_LessMore <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for LessMoreCondition, Panel, and LessMoreCondition:Panel interaction
  ss_condition <- anova_res["LessMoreCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["LessMoreCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["LessMoreCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["LessMoreCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- ss_condition / ss_total
  perc_panel <- ss_panel / ss_total
  perc_interaction <- ss_interaction / ss_total
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}

variance_explained_lm <- sapply(LessMore_model, calculate_variance_explained_LessMore)

# Convert to a data frame for easy viewing
variance_explained_lm_df.LessMore <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.LessMore,caption = "Etasq Table: LessMore Paradigm")
Etasq Table: LessMore Paradigm
Condition Panel LessMoreCondition:Panel
LessMore.PAN 0.09206*** 0.10649*** 0.00506*
LessMore.PAN_FACE 0.09206*** 0.02908*** 0.00051
LessMore.PAN_Pred_FC.reg_AE 0.09206*** 0.0287*** 0.00079
LessMore.PAN_Pred_FC.net_AE 0.09206*** 0.02819*** 0.00059
LessMore.PAN_Pred_FC.rf_AE 0.09206*** 0.02531*** 0.00112
LessMore.PAN_Pred_FC.gbm_AE 0.09206*** 0.02066*** 0.00102

Sunk Cost Paradigm

Sunk.PAN <- lm(numSunkCost~ SunkCondition * Panel, data = data.full_M5a.gbm.FC)
Sunk.PAN_FACE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.full_M5a.gbm.FC)
Sunk.PAN_Pred_FC.reg_AE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E+SunkCondition * Panel, data = data.full_M5a.reg.FC)
Sunk.PAN_Pred_FC.net_AE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E + SunkCondition * Panel, data = data.full_M5a.net.FC)
Sunk.PAN_Pred_FC.rf_AE <- lm(numSunkCost ~ SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E  + SunkCondition * Panel, data = data.full_M5a.rf.FC)
Sunk.PAN_Pred_FC.gbm_AE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E + SunkCondition * Panel, data = data.full_M5a.gbm.FC)


Sunk_model <- list(
  "Sunk.PAN" = Sunk.PAN,
  "Sunk.PAN_FACE" = Sunk.PAN_FACE,
  "Sunk.PAN_Pred_FC.reg_ACE" = Sunk.PAN_Pred_FC.reg_AE,
  "Sunk.PAN_Pred_FC.net_ACE" = Sunk.PAN_Pred_FC.net_AE,
  "Sunk.PAN_Pred_FC.rf_ACE" = Sunk.PAN_Pred_FC.rf_AE, #0.302 with unscaled
  "Sunk.PAN_Pred_FC.gbm_ACE" = Sunk.PAN_Pred_FC.gbm_AE
)

# extract_adjusted_r_squared(Sunk_model)
Full Model Results
term_names <- names(coef(Sunk.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Sunk_result<-tab_model(Sunk.PAN, Sunk.PAN_FACE, Sunk.PAN_Pred_FC.reg_AE,Sunk.PAN_Pred_FC.net_AE,Sunk.PAN_Pred_FC.rf_AE,Sunk.PAN_Pred_FC.gbm_AE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred_FC(reg)_AE", 
                        "PAN_Pred_FC(net)_AE", 
                        "PAN_Pred_FC(rf)_AE", 
                        "PAN_Pred_FC(gbm)_AE"), title = "Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Sunk_result
Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred_FC(reg)_AE PAN_Pred_FC(net)_AE PAN_Pred_FC(rf)_AE PAN_Pred_FC(gbm)_AE
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
(Intercept) 5.78 0.17 34.79 <0.001 5.77 0.17 34.93 <0.001 5.67 0.18 31.76 <0.001 5.65 0.18 30.98 <0.001 5.73 0.17 33.45 <0.001 5.69 0.17 32.70 <0.001
SunkCondition [PAID] 0.43 0.23 1.85 0.065 0.46 0.23 1.97 0.049 0.40 0.25 1.61 0.107 0.42 0.26 1.64 0.101 0.44 0.24 1.84 0.066 0.46 0.24 1.87 0.062
F 0.39 0.22 1.77 0.077
A 0.07 0.11 0.61 0.539 0.07 0.11 0.65 0.519 0.10 0.11 0.92 0.359 0.07 0.11 0.60 0.546 0.07 0.11 0.64 0.520
C -0.26 0.18 -1.50 0.133
E -0.06 0.11 -0.59 0.558 -0.12 0.10 -1.25 0.212 -0.13 0.10 -1.39 0.165 -0.11 0.10 -1.14 0.254 -0.11 0.10 -1.15 0.250
SunkCondition [PAID] × F 0.38 0.32 1.18 0.239
SunkCondition [PAID] × A 0.02 0.16 0.13 0.895 0.04 0.16 0.26 0.793 0.01 0.16 0.09 0.930 0.05 0.16 0.33 0.741 0.05 0.16 0.33 0.745
SunkCondition [PAID] × C -0.20 0.25 -0.79 0.428
SunkCondition [PAID] × E 0.11 0.15 0.72 0.469 0.05 0.14 0.34 0.730 0.06 0.14 0.43 0.666 0.06 0.14 0.41 0.682 0.03 0.14 0.23 0.817
Predicted F 0.34 0.21 1.60 0.110 0.45 0.24 1.90 0.058 0.18 0.23 0.78 0.435 0.39 0.22 1.74 0.083
Predicted C -0.23 0.27 -0.84 0.403 -0.49 0.33 -1.50 0.133 -0.01 0.24 -0.04 0.967 -0.29 0.27 -1.07 0.286
SunkCondition [PAID] ×
Predicted F
0.20 0.30 0.64 0.519 0.05 0.34 0.15 0.880 0.24 0.33 0.72 0.469 -0.00 0.32 -0.01 0.995
SunkCondition [PAID] ×
Predicted C
-0.08 0.38 -0.21 0.834 0.23 0.45 0.51 0.611 -0.15 0.35 -0.44 0.663 0.14 0.39 0.36 0.720
Observations 1460 1460 1460 1460 1460 1460
R2 / R2 adjusted 0.023 / 0.020 0.042 / 0.033 0.039 / 0.030 0.039 / 0.030 0.036 / 0.027 0.037 / 0.028
Anova and Eta Square
calculate_variance_explained_Sunk <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for SunkCondition, Panel, and SunkCondition:Panel interaction
  ss_condition <- anova_res["SunkCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["SunkCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["SunkCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["SunkCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- (ss_condition / ss_total)
  perc_panel <- (ss_panel / ss_total)
  perc_interaction <- (ss_interaction / ss_total)
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}


variance_explained_lm <- sapply(Sunk_model, calculate_variance_explained_Sunk)

# Convert to a data frame for easy viewing
variance_explained_lm_df.SunkCost <- as.data.frame(t(variance_explained_lm))

colnames(variance_explained_lm_df.SunkCost) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.SunkCost,caption = "Etasq Table: SunkCost Paradigm")
Etasq Table: SunkCost Paradigm
Condition Panel LessMoreCondition:Panel
Sunk.PAN 0.00794*** 0.01132*** 0.00398.
Sunk.PAN_FACE 0.00794*** 0.00099 0.00205
Sunk.PAN_Pred_FC.reg_ACE 0.00794*** 0.00093 0.0021
Sunk.PAN_Pred_FC.net_ACE 0.00794*** 0.00116 0.00181
Sunk.PAN_Pred_FC.rf_ACE 0.00794*** 0.00038 0.00197
Sunk.PAN_Pred_FC.gbm_ACE 0.00794*** 0.00081 0.00168

Summary

Model Fit
  • \(R^2\): \(R^2\) for linear regressions, and \(1 − \frac{\log L(\text{model})}{\log L(\text{null model})}\) for logistic regressions (as in the paper)

  • Model order: consistently (1) PAN only; (2) PAN + FACE; (3) PAN + Pred_C(reg)ACE; (4) PAN + Pred_C(net)ACE; (5) PAN + Pred_C(rf)ACE; (6) PAN + Pred_C(gbm)ACE

  • note that the y-axes for different paradigms are not aligned.

#following antonia's analysis here...
Default.null<-glm(numDefault~1,data.full_M5a.gbm.FC,family=binomial)

R2.Default = as.data.frame(cbind( "R2" = c((1-logLik(Default.PAN)/logLik(Default.null)),
                    (1-logLik(Default.PAN_FACE)/logLik(Default.null)),  
                    (1-logLik(Default.PAN_Pred_FC.reg_AE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_FC.net_AE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_FC.rf_AE)/logLik(Default.null)),
                    (1-logLik(Default.PAN_Pred_FC.gbm_AE)/logLik(Default.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "Default"))


Disease.null<-glm(numDisease~1,data.full_M5a.gbm.FC,family=binomial)
R2.Disease = as.data.frame(cbind( "R2" = c((1-logLik(Disease.PAN)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_FACE)/logLik(Disease.null)),  
                    (1-logLik(Disease.PAN_Pred_FC.reg_AE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_FC.net_AE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_FC.rf_AE)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_Pred_FC.gbm_AE)/logLik(Disease.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "Disease"))

R2.LessMore = as.data.frame(cbind( "R2" = c(summary(LessMore.PAN)$r.squared,
                     summary(LessMore.PAN_FACE)$r.squared,  
                     summary(LessMore.PAN_Pred_FC.reg_AE)$r.squared,  
                     summary(LessMore.PAN_Pred_FC.net_AE)$r.squared, 
                     summary(LessMore.PAN_Pred_FC.rf_AE)$r.squared,
                     summary(LessMore.PAN_Pred_FC.gbm_AE)$r.squared),
                      "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "LessMore"))

R2.Sunk = as.data.frame(cbind( "R2" = c(summary(Sunk.PAN)$r.squared,
                     summary(Sunk.PAN_FACE)$r.squared,  
                     summary(Sunk.PAN_Pred_FC.reg_AE)$r.squared,  
                     summary(Sunk.PAN_Pred_FC.net_AE)$r.squared, 
                     summary(Sunk.PAN_Pred_FC.rf_AE)$r.squared,
                     summary(Sunk.PAN_Pred_FC.gbm_AE)$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "SunkCost"))

R2<-rbind(R2.Default,R2.Disease,R2.LessMore,R2.Sunk)%>%
  mutate(R2=round(as.numeric(R2),5))

R2$Model <- factor(R2$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+PFC(reg)+AE",  
                                   "Pan+PFC(net)+AE", 
                                   "Pan+PFC(rf)+AE",
                                   "Pan+PFC(gbm)+AE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(R2, aes(x = Model, y = R2, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(R^2)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

AIC.Default = as.data.frame(cbind( "AIC" = c(AIC(Default.PAN),
                    AIC(Default.PAN_FACE),  
                    AIC(Default.PAN_Pred_FC.reg_AE), 
                    AIC(Default.PAN_Pred_FC.net_AE), 
                    AIC(Default.PAN_Pred_FC.rf_AE),
                    AIC(Default.PAN_Pred_FC.gbm_AE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "Default"))



AIC.Disease = as.data.frame(cbind( "AIC" = c(AIC(Disease.PAN),
                    AIC(Disease.PAN_FACE),  
                    AIC(Disease.PAN_Pred_FC.reg_AE), 
                    AIC(Disease.PAN_Pred_FC.net_AE), 
                    AIC(Disease.PAN_Pred_FC.rf_AE),
                    AIC(Disease.PAN_Pred_FC.gbm_AE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "Disease"))

AIC<-rbind(AIC.Default,AIC.Disease)%>%
  mutate(AIC=round(as.numeric(AIC),5))

AIC$Model <- factor(AIC$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+PFC(reg)+AE",  
                                   "Pan+PFC(net)+AE", 
                                   "Pan+PFC(rf)+AE",
                                   "Pan+PFC(gbm)+AE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(AIC, aes(x = Model, y = AIC, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  geom_text(aes(label = round(AIC,2)), vjust = -0.5, size = 3, fontface = "bold") + 
  theme_bw() +
  ylab(bquote(AIC)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")+
  coord_cartesian(ylim = c(300, 400))

R2$R2 <- round(as.numeric(R2$R2), 3)

R2 <- R2 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement <- R2 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))


avg_improvemnt_combined <- rbind(avg_improvement, avg_improvement_full_study_5)

kable(avg_improvemnt_combined, caption = "Percentage Improvement in R2 (compared to PAN) Across Paradigms with study 5 test data", digits = 2)
Percentage Improvement in R2 (compared to PAN) Across Paradigms with study 5 test data
Model Avg_Improvement
Pan 0.00
Pan+FACE 43.20
Pan+PFC(reg)+AE 37.71
Pan+PFC(net)+AE 36.42
Pan+PFC(rf)+AE 37.30
Pan+PFC(gbm)+AE 37.71
Pan+FACE (Full Study 5) 43.20
Etasq
  • note that the y-axes for different paradigms are not aligned..
# label paradigm
deviance_explained_df.Default$Paradigm <- "Default"
deviance_explained_df.Default.Study5$Paradigm <- "Default"

deviance_explained_df.Disease$Paradigm <- "Disease"
deviance_explained_df.Disease.Study5$Paradigm <- "Disease"

variance_explained_lm_df.LessMore$Paradigm <- "LessMore"
variance_explained_lm_df.LessMore.Study5$Paradigm <- "LessMore"

variance_explained_lm_df.SunkCost$Paradigm <- "SunkCost"
variance_explained_lm_df.SunkCost.Study5$Paradigm <- "SunkCost"


Model <- c("Pan", "Pan+FACE", "Pan+Pred.FC(reg)+AE", "Pan+Pred.FC(net)+AE", "Pan+Pred.FC(rf)+AE", "Pan+Pred.FC(gbm)+AE")
Model.Study5 <- c("Pan (Full Study5)", "Pan+FACE (Full Study5)")
deviance_explained_df.Default$Model<-Model
deviance_explained_df.Disease$Model<-Model
variance_explained_lm_df.LessMore$Model<-Model
variance_explained_lm_df.SunkCost$Model<-Model

deviance_explained_df.Default.Study5$Model<-Model.Study5
deviance_explained_df.Disease.Study5$Model<-Model.Study5
variance_explained_lm_df.LessMore.Study5$Model<-Model.Study5
variance_explained_lm_df.SunkCost.Study5$Model<-Model.Study5

colnames(deviance_explained_df.Default)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost)[3] <- "Condition:Panel"

colnames(deviance_explained_df.Default.Study5)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost.Study5)[3] <- "Condition:Panel"


etasq <- rbind(deviance_explained_df.Default, 
               deviance_explained_df.Default.Study5,
               deviance_explained_df.Disease, 
               deviance_explained_df.Disease.Study5,
               variance_explained_lm_df.LessMore, 
               variance_explained_lm_df.LessMore.Study5,
               variance_explained_lm_df.SunkCost,
               variance_explained_lm_df.SunkCost.Study5)

etasq$Model <- factor(etasq$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.FC(reg)+AE",  
                                   "Pan+Pred.FC(net)+AE", 
                                   "Pan+Pred.FC(rf)+AE",
                                   "Pan+Pred.FC(gbm)+AE",
                                   "Pan (Full Study5)", 
                                   "Pan+FACE (Full Study5)"),
                        ordered = TRUE)

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$Panel)  # Extract the significance symbols
  df$Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$Panel))
  return(df)
}

etasq.pan<-split_value_and_sig(etasq)

ggplot(etasq.pan, aes(x = Model, y = Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$`Condition:Panel`)  # Extract the significance symbols
  df$Condition_Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$`Condition:Panel`))
  return(df)
}

etasq.pan_cond<-split_value_and_sig(etasq)

ggplot(etasq.pan_cond, aes(x = Model, y = Condition_Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel*cond") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

# junk data??? really unstable;; perhaps to look at this meaningfully, we need 20% of the data to equally come from each of the 3 panels.
 # table(data.full_M5a$Panel)
 #  it looks fine though
 #   Lucid    MTurk Prolific 
 #     100      101       91 


etasq_study5 <- etasq.pan_cond %>%
  filter(Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))

etasq_study5 <- etasq_study5 %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan (Full Study5)"] - Condition_Panel) / Condition_Panel[Model == "Pan (Full Study5)"] * 100)


etasq_study5.test <- etasq.pan_cond %>%
  filter(!Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))
etasq_study5.test <- etasq_study5.test %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan"] - Condition_Panel) / Condition_Panel[Model == "Pan"] * 100)

etasq.pan_cond<-rbind(etasq_study5,etasq_study5.test)

avg_reduction <- etasq.pan_cond %>%
  group_by(Model) %>%
  summarize(Avg_Reduction = mean(Reduction, na.rm = TRUE))

  
kable(avg_reduction, caption = "Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in full study 5 data", digits = 2)
Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in full study 5 data
Model Avg_Reduction
Pan 0.00
Pan+FACE 74.17
Pan+Pred.FC(reg)+AE 68.40
Pan+Pred.FC(net)+AE 73.31
Pan+Pred.FC(rf)+AE 72.19
Pan+Pred.FC(gbm)+AE 73.91
Pan (Full Study5) 0.00
Pan+FACE (Full Study5) 74.19
F/C*Cond Model Coefficients
  • DVs for Default & Framing were 1/0
  • DVs for LessMore & SunkCost are on original scale
Default.PAN_FACE.Study5<-glm(numDefault~F*DefaultCondition+A*DefaultCondition+C*DefaultCondition+E*DefaultCondition+DefaultCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
Disease.PAN_FACE.Study5<-glm(numDisease~F*DiseaseCondition+A*DiseaseCondition+C*DiseaseCondition+E*DiseaseCondition+DiseaseCondition*Panel,Data_M5a.log_z_dummy_coded,family=binomial)
LessMore.PAN_FACE.Study5<-lm(numLessMore~F*LessMoreCondition+A*LessMoreCondition+C*LessMoreCondition+E*LessMoreCondition+LessMoreCondition*Panel,Data_M5a.log_z_dummy_coded)
Sunk.PAN_FACE.Study5<-lm(numSunkCost~F*SunkCondition+A*SunkCondition+C*SunkCondition+E*SunkCondition+SunkCondition*Panel,Data_M5a.log_z_dummy_coded)

AlllCoef = as.data.frame(rbind(
cbind(coef(Default.PAN_FACE),confint(Default.PAN_FACE)),
cbind(coef(Default.PAN_Pred_FC.reg_AE),confint(Default.PAN_Pred_FC.reg_AE)),
cbind(coef(Default.PAN_Pred_FC.net_AE),confint(Default.PAN_Pred_FC.net_AE)),
cbind(coef(Default.PAN_Pred_FC.rf_AE),confint(Default.PAN_Pred_FC.rf_AE)),
cbind(coef(Default.PAN_Pred_FC.gbm_AE),confint(Default.PAN_Pred_FC.gbm_AE)),
cbind(coef(Default.PAN_FACE.Study5),confint(Default.PAN_FACE.Study5)),

cbind(coef(Disease.PAN_FACE),confint(Disease.PAN_FACE)),
cbind(coef(Disease.PAN_Pred_FC.reg_AE),confint(Disease.PAN_Pred_FC.reg_AE)),
cbind(coef(Disease.PAN_Pred_FC.net_AE),confint(Disease.PAN_Pred_FC.net_AE)),
cbind(coef(Disease.PAN_Pred_FC.rf_AE),confint(Disease.PAN_Pred_FC.rf_AE)),
cbind(coef(Disease.PAN_Pred_FC.gbm_AE),confint(Disease.PAN_Pred_FC.gbm_AE)),
cbind(coef(Disease.PAN_FACE.Study5),confint(Disease.PAN_FACE.Study5)),

cbind(coef(LessMore.PAN_FACE),confint(LessMore.PAN_FACE)),
cbind(coef(LessMore.PAN_Pred_FC.reg_AE),confint(LessMore.PAN_Pred_FC.reg_AE)),
cbind(coef(LessMore.PAN_Pred_FC.net_AE),confint(LessMore.PAN_Pred_FC.net_AE)),
cbind(coef(LessMore.PAN_Pred_FC.rf_AE),confint(LessMore.PAN_Pred_FC.rf_AE)),
cbind(coef(LessMore.PAN_Pred_FC.gbm_AE),confint(LessMore.PAN_Pred_FC.gbm_AE)),
cbind(coef(LessMore.PAN_FACE.Study5),confint(LessMore.PAN_FACE.Study5)),

cbind(coef(Sunk.PAN_FACE),confint(Sunk.PAN_FACE)),
cbind(coef(Sunk.PAN_Pred_FC.reg_AE),confint(Sunk.PAN_Pred_FC.reg_AE)),
cbind(coef(Sunk.PAN_Pred_FC.net_AE),confint(Sunk.PAN_Pred_FC.net_AE)),
cbind(coef(Sunk.PAN_Pred_FC.rf_AE),confint(Sunk.PAN_Pred_FC.rf_AE)),
cbind(coef(Sunk.PAN_Pred_FC.gbm_AE),confint(Sunk.PAN_Pred_FC.gbm_AE)),
cbind(coef(Sunk.PAN_FACE.Study5),confint(Sunk.PAN_FACE.Study5))))

AlllCoef <- AlllCoef[!grepl("Panel", rownames(AlllCoef)), ]

AlllCoef$type = factor(rep(c("Intercept", " Condition(Treat)","F", "A", "C", "E","F: Condition(Treat)", "A: Condition(Treat)", "C: Condition(Treat)", "E: Condition(Treat)"), 24),levels = c("Intercept", " Condition(Treat)","F", "A", "C", "E","F: Condition(Treat)", "A: Condition(Treat)", "C: Condition(Treat)", "E: Condition(Treat)"))

AlllCoef$Paradigm = factor((rep(c("Default","Framing","Less is Better","Sunk Cost"), each=60)),levels = rev(c("Default","Framing","Less is Better","Sunk Cost")))

AlllCoef$Model = factor(rep(rep(c("FACE","PC(reg) + ACE","PC(net) + ACE","PC(rf) + ACE","PC(gbm) + ACE","FACE (Full Study 5)"), each = 10),4), levels = rev(c("FACE","PC(reg) + ACE","PC(net) + ACE","PC(rf) + ACE","PC(gbm) + ACE","FACE (Full Study 5)")))

colnames(AlllCoef)[1:3] = c("Beta","low", "high")

allcoefplot = ggplot(AlllCoef[(AlllCoef$type %in% c("F: Condition(Treat)","C: Condition(Treat)")),], aes( y = Beta, ymin= low, ymax= high, x = Paradigm, fill = Model, color = Model))+
  geom_bar(stat = "identity", position = "dodge",alpha= .25)+
  geom_pointrange(position = position_dodge(width = .8))+
  coord_flip()+
  xlab("")+
  theme_bw()+
  theme(strip.background = element_blank(), text = element_text(size = 12)) +
  facet_grid(~type)

allcoefplot

3.2 Evaluate Portability with Study 1 Data

3.2.1 Study 1 US Data

data.tst_MP1.US.reg.FC<-cbind(data.tst_MP1.US.reg.C, Predicted_F = M5a_reg_pred_MP1.US)

data.tst_MP1.US.net.FC<-cbind(data.tst_MP1.US.net.C, Predicted_F = M5a_net_pred_MP1.US)
data.tst_MP1.US.rf.FC<-cbind(data.tst_MP1.US.rf.C, Predicted_F = M5a_rf_pred_MP1.US)
data.tst_MP1.US.gbm.FC<-cbind(data.tst_MP1.US.gbm.C, Predicted_F = M5a_gbm_pred_MP1.US)

Correlations Btw PF1, PC1, and Study 1 FACE

Linear Regression
  • Predicted F1 and Predicted C1 highly correlated
ggpairs(data.tst_MP1.US.reg.FC, c("Predicted_F","F","A","C", "E","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Elastic Net
ggpairs(data.tst_MP1.US.net.FC, c("Predicted_F","F","A","C", "E","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Random Forest
ggpairs(data.tst_MP1.US.rf.FC, c("Predicted_F","F","A","C", "E","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Gradient Boosting
ggpairs(data.tst_MP1.US.gbm.FC, c("Predicted_F","F","A","C", "E","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

GBM: Correlation between PF and PC and Study 1 F composite measures
F_measure.Study1<-F_measure.Study1%>%
  filter(!Panel%in%c("Students","Prolific_UK_Rep"))
data.tst_MP1.US.gbm.FC$BNT<-F_measure.Study1$BNT
data.tst_MP1.US.gbm.FC$CRTScore<-F_measure.Study1$CRTScore

ggpairs(data.tst_MP1.US.gbm.FC, c("Predicted_F","BNT","CRTScore","Predicted_C"),
        lower = list(continuous = wrap("points",
                                       position = position_jitter(height = .02, width = .02))),diag = list(continuous = "density"))

Explain Heterogeniety with PF1 and PC1

Default Paradigm
Default.PAN <- glm(numDefault ~ DefaultCondition * Panel, data = data.tst_MP1.US.gbm.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_FACE <- glm(numDefault ~ DefaultCondition * F + DefaultCondition * A+ DefaultCondition * C+ DefaultCondition * E + DefaultCondition * Panel , data = data.tst_MP1.US.gbm.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_FC.reg_AE <- glm(numDefault ~ DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_MP1.US.reg.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_FC.net_AE <- glm(numDefault ~  DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_MP1.US.net.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_FC.rf_AE <- glm(numDefault ~   DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_MP1.US.rf.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)

Default.PAN_Pred_FC.gbm_AE <- glm(numDefault ~   DefaultCondition * Predicted_F + DefaultCondition * A+ DefaultCondition * Predicted_C+ DefaultCondition * E + DefaultCondition * Panel, data = data.tst_MP1.US.gbm.FC%>%filter(DefaultCondition!="OPTNEUTRAL"), family = binomial)


Default_model <- list(
  "Default.PAN" = Default.PAN,
  "Default.PAN_FACE" = Default.PAN_FACE,
  "Default.PAN_Pred_FC.reg_AE" = Default.PAN_Pred_FC.reg_AE,
  "Default.PAN_Pred_FC.net_AE" = Default.PAN_Pred_FC.net_AE,
  "Default.PAN_Pred_FC.rf_AE" = Default.PAN_Pred_FC.rf_AE,
  "Default.PAN_Pred_FC.gbm_AE" = Default.PAN_Pred_FC.gbm_AE
)
Model Results
term_names <- names(coef(Default.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Default_result<-tab_model(Default.PAN, Default.PAN_FACE, Default.PAN_Pred_FC.reg_AE,Default.PAN_Pred_FC.net_AE,Default.PAN_Pred_FC.rf_AE,Default.PAN_Pred_FC.gbm_AE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.FC(reg)_AE", 
                        "PAN_Pred.FC(net)_AE", 
                        "PAN_Pred.FC(rf)_AE", 
                        "PAN_Pred.FC(gbm)_AE"), title = "Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Default_result
Default Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.FC(reg)_AE PAN_Pred.FC(net)_AE PAN_Pred.FC(rf)_AE PAN_Pred.FC(gbm)_AE
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
(Intercept) 0.88 0.11 -1.06 0.291 0.87 0.11 -1.14 0.253 0.85 0.11 -1.21 0.226 0.84 0.11 -1.26 0.208 0.81 0.11 -1.57 0.117 0.89 0.12 -0.85 0.397
DefaultCondition [OPTOUT] 3.10 0.56 6.31 <0.001 3.22 0.59 6.42 <0.001 3.01 0.56 5.88 <0.001 3.01 0.58 5.74 <0.001 3.27 0.61 6.32 <0.001 2.96 0.56 5.77 <0.001
F 1.57 0.16 4.48 <0.001
A 1.41 0.17 2.87 0.004 1.12 0.15 0.91 0.365 1.10 0.14 0.73 0.464 1.01 0.14 0.09 0.932 1.13 0.14 0.95 0.341
C 1.02 0.05 0.39 0.698
E 0.86 0.07 -1.93 0.054 0.88 0.07 -1.71 0.088 0.88 0.07 -1.68 0.092 0.88 0.07 -1.61 0.107 0.87 0.07 -1.73 0.084
DefaultCondition [OPTOUT]
× F
1.03 0.16 0.17 0.867
DefaultCondition [OPTOUT]
× A
0.44 0.08 -4.53 <0.001 0.53 0.10 -3.29 0.001 0.54 0.10 -3.17 0.002 0.53 0.10 -3.26 0.001 0.50 0.09 -3.68 <0.001
DefaultCondition [OPTOUT]
× C
1.18 0.10 1.98 0.048
DefaultCondition [OPTOUT]
× E
1.23 0.14 1.79 0.073 1.18 0.14 1.44 0.151 1.18 0.14 1.45 0.148 1.21 0.14 1.62 0.106 1.23 0.14 1.77 0.076
Predicted F 1.31 0.15 2.34 0.019 1.30 0.16 2.05 0.040 1.18 0.13 1.48 0.138 1.16 0.14 1.26 0.208
Predicted C 1.34 0.23 1.72 0.086 1.43 0.29 1.78 0.076 2.11 0.48 3.30 0.001 1.76 0.31 3.17 0.002
DefaultCondition [OPTOUT]
× Predicted F
1.09 0.18 0.49 0.624 1.07 0.20 0.35 0.727 0.99 0.16 -0.08 0.938 1.20 0.21 1.07 0.286
DefaultCondition [OPTOUT]
× Predicted C
0.58 0.14 -2.18 0.029 0.57 0.17 -1.88 0.060 0.65 0.21 -1.30 0.193 0.55 0.14 -2.29 0.022
Observations 3541 3541 3541 3541 3541 3541
AIC 4466.919 4382.086 4391.381 4393.132 4386.772 4381.301
log-Likelihood -2215.460 -2165.043 -2169.691 -2170.566 -2167.386 -2164.650
Anova and Eta Square
calculate_deviance_explained.Default <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for Panel and DefaultCondition:Panel interaction
  dev_cond <- anova_res["DefaultCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DefaultCondition:Panel", "Deviance"]
  
  p_cond <- anova_res["DefaultCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DefaultCondition:Panel", "Pr(>Chi)"]
  
sig_cond <- ifelse(p_cond < 0.001, "***", 
             ifelse(p_cond < 0.01, "**", 
             ifelse(p_cond < 0.05, "*", 
             ifelse(p_cond < 0.1, ".", ""))))

sig_panel <- ifelse(p_panel < 0.001, "***", 
              ifelse(p_panel < 0.01, "**", 
              ifelse(p_panel < 0.05, "*", 
              ifelse(p_panel < 0.1, ".", ""))))

sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                   ifelse(p_interaction < 0.01, "**", 
                   ifelse(p_interaction < 0.05, "*", 
                   ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total) 
  perc_panel <- (dev_panel / dev_total) 
  perc_interaction <- (dev_interaction / dev_total) 
  
  res_cond <- paste0(round(perc_cond , 5),  sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}

# apply the fucntion to all the models in default paradigm
deviance_explained <- sapply(Default_model, calculate_deviance_explained.Default)

# Convert to a data frame for easy viewing
deviance_explained_df.Default <- as.data.frame(t(deviance_explained)) # view how this looks like
colnames(deviance_explained_df.Default) <- c("Condition","Panel", "Default Condition:Panel")

# display
kable(deviance_explained_df.Default,caption = "Etasq Table: Default Paradigm")
Etasq Table: Default Paradigm
Condition Panel Default Condition:Panel
Default.PAN 0.05063*** 0.00612*** 0.00751***
Default.PAN_FACE 0.05063*** 0.00456** 0.0031.
Default.PAN_Pred_FC.reg_AE 0.05063*** 0.00378* 0.00195
Default.PAN_Pred_FC.net_AE 0.05063*** 0.00388* 0.00196
Default.PAN_Pred_FC.rf_AE 0.05063*** 0.00351* 0.00263
Default.PAN_Pred_FC.gbm_AE 0.05063*** 0.00353* 0.00277
Framing (Unusual Disease) Paradigm
Disease.PAN <- glm(numDisease ~ DiseaseCondition * Panel, data = data.tst_MP1.US.gbm.FC, family = binomial)
Disease.PAN_FACE <- glm(numDisease ~  DiseaseCondition * F + DiseaseCondition * A+ DiseaseCondition * C+ DiseaseCondition * E+DiseaseCondition * Panel , data = data.tst_MP1.US.gbm.FC, family = binomial)

Disease.PAN_Pred_FC.reg_AE <- glm(numDisease ~  DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_MP1.US.reg.FC, family = binomial)

Disease.PAN_Pred_FC.net_AE <- glm(numDisease ~   DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_MP1.US.net.FC, family = binomial)

Disease.PAN_Pred_FC.rf_AE <- glm(numDisease ~   DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel, data = data.tst_MP1.US.rf.FC, family = binomial)

Disease.PAN_Pred_FC.gbm_AE <- glm(numDisease ~  DiseaseCondition * Predicted_F + DiseaseCondition * A+ DiseaseCondition * Predicted_C+ DiseaseCondition * E + DiseaseCondition * Panel , data = data.tst_MP1.US.gbm.FC, family = binomial)


Disease_model <- list(
  "Disease.PAN" = Disease.PAN,
  "Disease.PAN_FACE" = Disease.PAN_FACE,
  "Disease.PAN_Pred_FC.reg_AE" = Disease.PAN_Pred_FC.reg_AE,
  "Disease.PAN_Pred_FC.net_AE" = Disease.PAN_Pred_FC.net_AE,
  "Disease.PAN_Pred_FC.rf_AE" = Disease.PAN_Pred_FC.rf_AE, 
  "Disease.PAN_Pred_FC.gbm_AE" = Disease.PAN_Pred_FC.gbm_AE
)

# extract_aic(Disease_model)
Model Results
term_names <- names(coef(Disease.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Disease_result<-tab_model(Disease.PAN, Disease.PAN_FACE, Disease.PAN_Pred_FC.reg_AE,Disease.PAN_Pred_FC.net_AE,Disease.PAN_Pred_FC.rf_AE,Disease.PAN_Pred_FC.gbm_AE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=0,show.aic = 1,
          show.loglik = 1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.FC(reg)_AE", 
                        "PAN_Pred.FC(net)_AE", 
                        "PAN_Pred.FC(rf)_AE", 
                        "PAN_Pred.FC(gbm)_AE"), title = "Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Disease_result
Unusual Disease Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.FC(reg)_AE PAN_Pred.FC(net)_AE PAN_Pred.FC(rf)_AE PAN_Pred.FC(gbm)_AE
Predictors Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p Odds Ratios std. Error Statistic p
(Intercept) 0.49 0.05 -6.86 <0.001 0.47 0.05 -7.15 <0.001 0.47 0.05 -6.95 <0.001 0.46 0.05 -6.91 <0.001 0.50 0.05 -6.41 <0.001 0.46 0.05 -7.00 <0.001
DiseaseCondition [LOSS] 2.72 0.39 6.97 <0.001 2.91 0.43 7.29 <0.001 2.83 0.43 6.88 <0.001 2.89 0.45 6.85 <0.001 2.62 0.39 6.39 <0.001 2.92 0.44 7.08 <0.001
F 1.30 0.12 2.82 0.005
A 1.66 0.18 4.58 <0.001 1.96 0.23 5.85 <0.001 2.02 0.24 5.99 <0.001 1.90 0.23 5.34 <0.001 1.89 0.21 5.62 <0.001
C 0.79 0.04 -4.89 <0.001
E 0.96 0.06 -0.67 0.501 0.88 0.06 -1.86 0.062 0.88 0.06 -1.93 0.053 0.88 0.06 -1.83 0.068 0.89 0.06 -1.75 0.080
DiseaseCondition [LOSS] ×
F
0.87 0.11 -1.14 0.254
DiseaseCondition [LOSS] ×
A
1.01 0.15 0.07 0.947 0.80 0.13 -1.41 0.159 0.76 0.12 -1.76 0.078 0.78 0.13 -1.56 0.118 0.81 0.12 -1.37 0.170
DiseaseCondition [LOSS] ×
C
1.17 0.08 2.39 0.017
DiseaseCondition [LOSS] ×
E
0.79 0.07 -2.58 0.010 0.87 0.08 -1.55 0.120 0.88 0.08 -1.40 0.163 0.88 0.08 -1.38 0.166 0.87 0.08 -1.46 0.145
Predicted F 0.99 0.10 -0.14 0.890 1.01 0.11 0.10 0.916 0.85 0.08 -1.64 0.101 0.99 0.10 -0.09 0.930
Predicted C 0.73 0.11 -2.12 0.034 0.66 0.12 -2.35 0.019 0.96 0.19 -0.19 0.849 0.75 0.12 -1.79 0.073
DiseaseCondition [LOSS] ×
Predicted F
1.11 0.15 0.79 0.432 1.07 0.16 0.44 0.659 1.27 0.17 1.77 0.077 1.07 0.15 0.48 0.633
DiseaseCondition [LOSS] ×
Predicted C
1.47 0.29 1.90 0.057 1.79 0.43 2.44 0.015 1.38 0.37 1.20 0.229 1.72 0.38 2.50 0.012
Observations 5336 5336 5336 5336 5336 5336
AIC 6723.912 6573.688 6589.687 6585.017 6589.809 6586.068
log-Likelihood -3343.956 -3260.844 -3268.843 -3266.509 -3268.905 -3267.034
Anova and Eta Square
calculate_deviance_explained.Disease <- function(model) {
  anova_res <- anova(model)
  
  # Extract deviance for DiseaseCondition, Panel, and DiseaseCondition:Panel interaction
  dev_cond <- anova_res["DiseaseCondition", "Deviance"]
  dev_panel <- anova_res["Panel", "Deviance"]
  dev_interaction <- anova_res["DiseaseCondition:Panel", "Deviance"]
  
  # Extract p-values
  p_cond <- anova_res["DiseaseCondition", "Pr(>Chi)"]
  p_panel <- anova_res["Panel", "Pr(>Chi)"]
  p_interaction <- anova_res["DiseaseCondition:Panel", "Pr(>Chi)"]
  
  # Determine significance levels
  sig_cond <- ifelse(p_cond < 0.001, "***", 
               ifelse(p_cond < 0.01, "**", 
               ifelse(p_cond < 0.05, "*", 
               ifelse(p_cond < 0.1, ".", ""))))

  sig_panel <- ifelse(p_panel < 0.001, "***", 
                ifelse(p_panel < 0.01, "**", 
                ifelse(p_panel < 0.05, "*", 
                ifelse(p_panel < 0.1, ".", ""))))

  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total deviance
  dev_total <- sum(anova_res[1, "Resid. Dev"])
  
  # Calculate percentage of deviance explained
  perc_cond <- (dev_cond / dev_total)
  perc_panel <- (dev_panel / dev_total)
  perc_interaction <- (dev_interaction / dev_total)
  
  # Combine percentage and significance
  res_cond <- paste0(round(perc_cond, 5), sig_cond)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_cond, res_panel, res_interaction))
}


# Apply the function to each model
deviance_explained <- sapply(Disease_model, calculate_deviance_explained.Disease)

# Convert to a data frame for easy viewing
deviance_explained_df.Disease <- as.data.frame(t(deviance_explained))
colnames(deviance_explained_df.Disease) <- c("Condition","Panel", "DiseaseCondition:Panel")

# Display the results
kable(deviance_explained_df.Disease,caption = "Etasq Table: Disease Paradigm")
Etasq Table: Disease Paradigm
Condition Panel DiseaseCondition:Panel
Disease.PAN 0.05826*** 0.01261*** 0.0093***
Disease.PAN_FACE 0.05826*** 0.00366*** 0.00557***
Disease.PAN_Pred_FC.reg_AE 0.05826*** 0.0033** 0.0036***
Disease.PAN_Pred_FC.net_AE 0.05826*** 0.00329** 0.00343**
Disease.PAN_Pred_FC.rf_AE 0.05826*** 0.00304** 0.00349**
Disease.PAN_Pred_FC.gbm_AE 0.05826*** 0.00316** 0.00334**
Less is More Paradigm
LessMore.PAN <- lm(numLessMore ~ LessMoreCondition * Panel, data = data.tst_MP1.US.gbm.FC)
LessMore.PAN_FACE <- lm(numLessMore ~  LessMoreCondition * F + LessMoreCondition * A+ LessMoreCondition * C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_MP1.US.gbm.FC)
LessMore.PAN_Pred_FC.reg_AE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_MP1.US.reg.FC)
LessMore.PAN_Pred_FC.net_AE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.tst_MP1.US.net.FC)
LessMore.PAN_Pred_FC.rf_AE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel, data = data.tst_MP1.US.rf.FC)
LessMore.PAN_Pred_FC.gbm_AE <- lm(numLessMore ~  LessMoreCondition * Predicted_F + LessMoreCondition * A+ LessMoreCondition * Predicted_C+ LessMoreCondition * E + LessMoreCondition * Panel , data = data.tst_MP1.US.gbm.FC)


LessMore_model <- list(
  "LessMore.PAN" = LessMore.PAN,
  "LessMore.PAN_FACE" = LessMore.PAN_FACE,
  "LessMore.PAN_Pred_FC.reg_AE" = LessMore.PAN_Pred_FC.reg_AE,
  "LessMore.PAN_Pred_FC.net_AE" = LessMore.PAN_Pred_FC.net_AE,
  "LessMore.PAN_Pred_FC.rf_AE" = LessMore.PAN_Pred_FC.rf_AE, 
  "LessMore.PAN_Pred_FC.gbm_AE" = LessMore.PAN_Pred_FC.gbm_AE
)
Model Results
term_names <- names(coef(LessMore.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

LessMore_result<-tab_model(LessMore.PAN, LessMore.PAN_FACE, LessMore.PAN_Pred_FC.reg_AE,LessMore.PAN_Pred_FC.net_AE,LessMore.PAN_Pred_FC.rf_AE,LessMore.PAN_Pred_FC.gbm_AE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred.FC(reg)_ACE", 
                        "PAN_Pred.FC(net)_ACE", 
                        "PAN_Pred.FC(rf)_ACE", 
                        "PAN_Pred.FC(gbm)_ACE"), title = "LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
LessMore_result
LessMore Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred.FC(reg)_ACE PAN_Pred.FC(net)_ACE PAN_Pred.FC(rf)_ACE PAN_Pred.FC(gbm)_ACE
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
(Intercept) 5.37 0.07 79.64 <0.001 5.36 0.07 81.93 <0.001 5.38 0.07 80.03 <0.001 5.38 0.07 78.08 <0.001 5.39 0.07 80.82 <0.001 5.37 0.07 80.00 <0.001
LessMoreCondition [SCARF] 0.95 0.09 10.04 <0.001 0.99 0.09 10.82 <0.001 0.95 0.09 10.08 <0.001 0.95 0.10 9.80 <0.001 0.93 0.09 9.93 <0.001 0.95 0.09 10.11 <0.001
F -0.11 0.05 -1.99 0.046
A 0.38 0.06 6.14 <0.001 0.29 0.07 4.51 <0.001 0.28 0.07 4.21 <0.001 0.18 0.07 2.73 0.006 0.32 0.06 4.93 <0.001
C 0.02 0.03 0.58 0.562
E -0.00 0.04 -0.09 0.930 0.04 0.04 0.90 0.368 0.04 0.04 1.03 0.301 0.05 0.04 1.16 0.247 0.04 0.04 0.96 0.340
LessMoreCondition [SCARF]
× F
0.28 0.08 3.67 <0.001
LessMoreCondition [SCARF]
× A
0.45 0.09 5.04 <0.001 0.41 0.09 4.43 <0.001 0.40 0.09 4.27 <0.001 0.42 0.10 4.35 <0.001 0.40 0.09 4.36 <0.001
LessMoreCondition [SCARF]
× C
-0.01 0.04 -0.14 0.886
LessMoreCondition [SCARF]
× E
-0.12 0.06 -2.24 0.025 -0.14 0.06 -2.59 0.010 -0.14 0.06 -2.56 0.010 -0.14 0.06 -2.55 0.011 -0.14 0.06 -2.50 0.012
Predicted F -0.02 0.06 -0.38 0.706 -0.01 0.07 -0.21 0.836 -0.19 0.06 -3.26 0.001 0.01 0.06 0.16 0.875
Predicted C 0.22 0.09 2.51 0.012 0.25 0.10 2.43 0.015 0.79 0.11 6.92 <0.001 0.18 0.09 1.95 0.052
LessMoreCondition [SCARF]
× Predicted F
0.17 0.08 2.04 0.041 0.16 0.09 1.68 0.092 0.28 0.08 3.45 0.001 0.19 0.09 2.14 0.033
LessMoreCondition [SCARF]
× Predicted C
-0.09 0.12 -0.78 0.438 -0.05 0.15 -0.34 0.733 -0.32 0.16 -1.96 0.050 -0.08 0.13 -0.58 0.560
Observations 5336 5336 5336 5336 5336 5336
R2 / R2 adjusted 0.173 / 0.170 0.224 / 0.220 0.228 / 0.225 0.230 / 0.226 0.238 / 0.234 0.229 / 0.226
Anova and Eta Square
calculate_variance_explained_LessMore <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for LessMoreCondition, Panel, and LessMoreCondition:Panel interaction
  ss_condition <- anova_res["LessMoreCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["LessMoreCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["LessMoreCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["LessMoreCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- ss_condition / ss_total
  perc_panel <- ss_panel / ss_total
  perc_interaction <- ss_interaction / ss_total
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}

variance_explained_lm <- sapply(LessMore_model, calculate_variance_explained_LessMore)

# Convert to a data frame for easy viewing
variance_explained_lm_df.LessMore <- as.data.frame(t(variance_explained_lm))
colnames(variance_explained_lm_df.LessMore) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.LessMore,caption = "Etasq Table: LessMore Paradigm")
Etasq Table: LessMore Paradigm
Condition Panel LessMoreCondition:Panel
LessMore.PAN 0.12958*** 0.03699*** 0.00642***
LessMore.PAN_FACE 0.12958*** 0.01349*** 0.00148
LessMore.PAN_Pred_FC.reg_AE 0.12958*** 0.01225*** 0.0016
LessMore.PAN_Pred_FC.net_AE 0.12958*** 0.01226*** 0.0016
LessMore.PAN_Pred_FC.rf_AE 0.12958*** 0.00993*** 0.00168
LessMore.PAN_Pred_FC.gbm_AE 0.12958*** 0.01152*** 0.00155
Sunk Cost Paradigm
Sunk.PAN <- lm(numSunkCost~ SunkCondition * Panel, data = data.tst_MP1.US.gbm.FC)
Sunk.PAN_FACE <- lm(numSunkCost ~  SunkCondition * F + SunkCondition * A+ SunkCondition * C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_MP1.US.gbm.FC)
Sunk.PAN_Pred_FC.reg_AE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E+SunkCondition * Panel, data = data.tst_MP1.US.reg.FC)
Sunk.PAN_Pred_FC.net_AE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_MP1.US.net.FC)
Sunk.PAN_Pred_FC.rf_AE <- lm(numSunkCost ~ SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E  + SunkCondition * Panel, data = data.tst_MP1.US.rf.FC)
Sunk.PAN_Pred_FC.gbm_AE <- lm(numSunkCost ~  SunkCondition * Predicted_F + SunkCondition * A+ SunkCondition * Predicted_C+ SunkCondition * E + SunkCondition * Panel, data = data.tst_MP1.US.gbm.FC)


Sunk_model <- list(
  "Sunk.PAN" = Sunk.PAN,
  "Sunk.PAN_FACE" = Sunk.PAN_FACE,
  "Sunk.PAN_Pred_FC.reg_ACE" = Sunk.PAN_Pred_FC.reg_AE,
  "Sunk.PAN_Pred_FC.net_ACE" = Sunk.PAN_Pred_FC.net_AE,
  "Sunk.PAN_Pred_FC.rf_ACE" = Sunk.PAN_Pred_FC.rf_AE, #0.302 with unscaled
  "Sunk.PAN_Pred_FC.gbm_ACE" = Sunk.PAN_Pred_FC.gbm_AE
)

# extract_adjusted_r_squared(Sunk_model)
Full Model Results
term_names <- names(coef(Sunk.PAN))
panel_terms <- term_names[grepl("Panel", term_names)]

Sunk_result<-tab_model(Sunk.PAN, Sunk.PAN_FACE, Sunk.PAN_Pred_FC.reg_AE,Sunk.PAN_Pred_FC.net_AE,Sunk.PAN_Pred_FC.rf_AE,Sunk.PAN_Pred_FC.gbm_AE,
          show.ci = FALSE, show.stat = TRUE, show.se = TRUE, show.re.var = TRUE, 
          show.icc = FALSE, show.r2=1,
          show.fstat = 0,
          rm.terms = panel_terms,
          dv.labels = c("PAN", 
                        "PAN_FACE", 
                        "PAN_Pred_FC(reg)_AE", 
                        "PAN_Pred_FC(net)_AE", 
                        "PAN_Pred_FC(rf)_AE", 
                        "PAN_Pred_FC(gbm)_AE"), title = "Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability")
Sunk_result
Sunk Cost Paradigm; ✔ Panel and interaction terms are included in the model but omitted for readability
  PAN PAN_FACE PAN_Pred_FC(reg)_AE PAN_Pred_FC(net)_AE PAN_Pred_FC(rf)_AE PAN_Pred_FC(gbm)_AE
Predictors Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p Estimates std. Error Statistic p
(Intercept) 5.46 0.14 39.29 <0.001 5.46 0.14 39.54 <0.001 5.41 0.14 37.90 <0.001 5.40 0.15 36.89 <0.001 5.40 0.14 37.96 <0.001 5.32 0.14 37.35 <0.001
SunkCondition [PAID] 0.70 0.20 3.48 0.001 0.70 0.20 3.52 <0.001 0.62 0.20 3.01 0.003 0.60 0.21 2.87 0.004 0.66 0.20 3.21 0.001 0.74 0.20 3.62 <0.001
F 0.32 0.11 2.82 0.005
A 0.59 0.14 4.33 <0.001 0.51 0.14 3.54 <0.001 0.50 0.15 3.41 0.001 0.56 0.15 3.79 <0.001 0.56 0.14 3.99 <0.001
C 0.10 0.06 1.71 0.087
E 0.20 0.08 2.32 0.021 0.19 0.08 2.26 0.024 0.20 0.09 2.34 0.020 0.18 0.09 2.12 0.034 0.21 0.09 2.46 0.014
SunkCondition [PAID] × F 0.18 0.16 1.10 0.270
SunkCondition [PAID] × A 0.02 0.19 0.11 0.914 -0.07 0.20 -0.32 0.745 -0.06 0.21 -0.31 0.755 -0.31 0.21 -1.49 0.137 -0.15 0.20 -0.78 0.436
SunkCondition [PAID] × C -0.02 0.08 -0.18 0.854
SunkCondition [PAID] × E -0.04 0.12 -0.31 0.753 -0.01 0.12 -0.12 0.907 -0.02 0.12 -0.14 0.890 0.01 0.12 0.12 0.902 -0.02 0.12 -0.15 0.881
Predicted F 0.21 0.13 1.62 0.105 0.23 0.14 1.65 0.100 0.21 0.13 1.63 0.103 0.53 0.13 3.97 <0.001
Predicted C -0.02 0.19 -0.12 0.901 -0.02 0.22 -0.10 0.924 -0.20 0.25 -0.78 0.433 -0.61 0.20 -3.00 0.003
SunkCondition [PAID] ×
Predicted F
0.40 0.18 2.19 0.029 0.38 0.20 1.87 0.062 0.07 0.18 0.41 0.679 0.01 0.19 0.07 0.944
SunkCondition [PAID] ×
Predicted C
-0.21 0.27 -0.80 0.422 -0.19 0.32 -0.59 0.553 0.82 0.35 2.32 0.021 0.57 0.29 1.98 0.048
Observations 5336 5336 5336 5336 5336 5336
R2 / R2 adjusted 0.025 / 0.022 0.040 / 0.035 0.040 / 0.035 0.040 / 0.035 0.040 / 0.035 0.042 / 0.037
Anova and Eta Square
calculate_variance_explained_Sunk <- function(model) {
  anova_res <- anova(model)
  
  # Extract sum of squares for SunkCondition, Panel, and SunkCondition:Panel interaction
  ss_condition <- anova_res["SunkCondition", "Sum Sq"]
  ss_panel <- anova_res["Panel", "Sum Sq"]
  ss_interaction <- anova_res["SunkCondition:Panel", "Sum Sq"]
  
  # Extract p-values
  p_condition <- anova_res["SunkCondition", "Pr(>F)"]
  p_panel <- anova_res["Panel", "Pr(>F)"]
  p_interaction <- anova_res["SunkCondition:Panel", "Pr(>F)"]
  
  # Determine significance levels
  sig_condition <- ifelse(p_condition < 0.001, "***", 
                   ifelse(p_condition < 0.01, "**", 
                   ifelse(p_condition < 0.05, "*", 
                   ifelse(p_condition < 0.1, ".", ""))))
  
  sig_panel <- ifelse(p_panel < 0.001, "***", 
               ifelse(p_panel < 0.01, "**", 
               ifelse(p_panel < 0.05, "*", 
               ifelse(p_panel < 0.1, ".", ""))))
  
  sig_interaction <- ifelse(p_interaction < 0.001, "***", 
                     ifelse(p_interaction < 0.01, "**", 
                     ifelse(p_interaction < 0.05, "*", 
                     ifelse(p_interaction < 0.1, ".", ""))))
  
  # Calculate total sum of squares
  ss_total <- sum(anova_res[, "Sum Sq"])
  
  # Calculate percentage of variance explained
  perc_condition <- (ss_condition / ss_total)
  perc_panel <- (ss_panel / ss_total)
  perc_interaction <- (ss_interaction / ss_total)
  
  # Combine percentage and significance
  res_condition <- paste0(round(perc_condition, 5), sig_condition)
  res_panel <- paste0(round(perc_panel, 5), sig_panel)
  res_interaction <- paste0(round(perc_interaction, 5), sig_interaction)
  
  return(c(res_condition, res_panel, res_interaction))
}


variance_explained_lm <- sapply(Sunk_model, calculate_variance_explained_Sunk)

# Convert to a data frame for easy viewing
variance_explained_lm_df.SunkCost <- as.data.frame(t(variance_explained_lm))

colnames(variance_explained_lm_df.SunkCost) <- c("Condition","Panel", "LessMoreCondition:Panel")

# Display the results
kable(variance_explained_lm_df.SunkCost,caption = "Etasq Table: SunkCost Paradigm")
Etasq Table: SunkCost Paradigm
Condition Panel LessMoreCondition:Panel
Sunk.PAN 0.01083*** 0.00971*** 0.00447**
Sunk.PAN_FACE 0.01083*** 0.00563*** 0.00226
Sunk.PAN_Pred_FC.reg_ACE 0.01083*** 0.0074*** 0.00205
Sunk.PAN_Pred_FC.net_ACE 0.01083*** 0.00753*** 0.00209
Sunk.PAN_Pred_FC.rf_ACE 0.01083*** 0.0085*** 0.00166
Sunk.PAN_Pred_FC.gbm_ACE 0.01083*** 0.00604*** 0.00178
Summary
Model Fit
  • \(R^2\): \(R^2\) for linear regressions, and \(1 − \frac{\log L(\text{model})}{\log L(\text{null model})}\) for logistic regressions (as in the paper)

  • Model order: consistently (1) PAN only; (2) PAN + FACE; (3) PAN + Pred_C(reg)ACE; (4) PAN + Pred_C(net)ACE; (5) PAN + Pred_C(rf)ACE; (6) PAN + Pred_C(gbm)ACE

  • note that the y-axes for different paradigms are not aligned.

# Need a plot here for the AIC of the different models
#following antonia's analysis here...
Default.null<-glm(numDefault~1,data.tst_MP1.US.gbm.FC,family=binomial)
R2.Default = as.data.frame(cbind( "R2" = c((1-logLik(Default.PAN)/logLik(Default.null)),
                    (1-logLik(Default.PAN_FACE)/logLik(Default.null)),  
                    (1-logLik(Default.PAN_Pred_FC.reg_AE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_FC.net_AE)/logLik(Default.null)), 
                    (1-logLik(Default.PAN_Pred_FC.rf_AE)/logLik(Default.null)),
                    (1-logLik(Default.PAN_Pred_FC.gbm_AE)/logLik(Default.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "Default"))


Disease.null<-glm(numDisease~1,data.tst_MP1.US.gbm.FC,family=binomial)
R2.Disease = as.data.frame(cbind( "R2" = c((1-logLik(Disease.PAN)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_FACE)/logLik(Disease.null)),  
                    (1-logLik(Disease.PAN_Pred_FC.reg_AE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_FC.net_AE)/logLik(Disease.null)), 
                    (1-logLik(Disease.PAN_Pred_FC.rf_AE)/logLik(Disease.null)),
                    (1-logLik(Disease.PAN_Pred_FC.gbm_AE)/logLik(Disease.null))),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "Disease"))

R2.LessMore = as.data.frame(cbind( "R2" = c(summary(LessMore.PAN)$r.squared,
                     summary(LessMore.PAN_FACE)$r.squared,  
                     summary(LessMore.PAN_Pred_FC.reg_AE)$r.squared,  
                     summary(LessMore.PAN_Pred_FC.net_AE)$r.squared, 
                     summary(LessMore.PAN_Pred_FC.rf_AE)$r.squared,
                     summary(LessMore.PAN_Pred_FC.gbm_AE)$r.squared),
                      "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "LessMore"))

R2.Sunk = as.data.frame(cbind( "R2" = c(summary(Sunk.PAN)$r.squared,
                     summary(Sunk.PAN_FACE)$r.squared,  
                     summary(Sunk.PAN_Pred_FC.reg_AE)$r.squared,  
                     summary(Sunk.PAN_Pred_FC.net_AE)$r.squared, 
                     summary(Sunk.PAN_Pred_FC.rf_AE)$r.squared,
                     summary(Sunk.PAN_Pred_FC.gbm_AE)$r.squared),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "SunkCost "))

R2<-rbind(R2.Default,R2.Disease,R2.LessMore,R2.Sunk)%>%
  mutate(R2=round(as.numeric(R2),5))

R2$Model <- factor(R2$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+PFC(reg)+AE",  
                                   "Pan+PFC(net)+AE", 
                                   "Pan+PFC(rf)+AE",
                                   "Pan+PFC(gbm)+AE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(R2, aes(x = Model, y = R2, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab(bquote(R^2)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

AIC.Default = as.data.frame(cbind( "AIC" = c(AIC(Default.PAN),
                    AIC(Default.PAN_FACE),  
                    AIC(Default.PAN_Pred_FC.reg_AE), 
                    AIC(Default.PAN_Pred_FC.net_AE), 
                    AIC(Default.PAN_Pred_FC.rf_AE),
                    AIC(Default.PAN_Pred_FC.gbm_AE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "Default"))



AIC.Disease = as.data.frame(cbind( "AIC" = c(AIC(Disease.PAN),
                    AIC(Disease.PAN_FACE),  
                    AIC(Disease.PAN_Pred_FC.reg_AE), 
                    AIC(Disease.PAN_Pred_FC.net_AE), 
                    AIC(Disease.PAN_Pred_FC.rf_AE),
                    AIC(Disease.PAN_Pred_FC.gbm_AE)),
                     "Model" = c("Pan", "Pan+FACE",  "Pan+PFC(reg)+AE",  "Pan+PFC(net)+AE" , "Pan+PFC(rf)+AE","Pan+PFC(gbm)+AE"), 
            "Paradigm" = "Disease"))

AIC<-rbind(AIC.Default,AIC.Disease)%>%
  mutate(AIC=round(as.numeric(AIC),5))

AIC$Model <- factor(AIC$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+PFC(reg)+AE",  
                                   "Pan+PFC(net)+AE", 
                                   "Pan+PFC(rf)+AE",
                                   "Pan+PFC(gbm)+AE"),
                        ordered = TRUE)

library(RColorBrewer)

ggplot(AIC, aes(x = Model, y = AIC, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  geom_text(aes(label = round(AIC,2)), vjust = -0.5, size = 3, fontface = "bold") + 
  theme_bw() +
  ylab(bquote(AIC)) +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")+
  coord_cartesian(ylim = c(4000, 7000))

R2$R2 <- round(as.numeric(R2$R2), 3)

R2 <- R2 %>%
  group_by(Paradigm) %>%
  mutate(Improvement = (R2 - R2[Model == "Pan"]) / R2[Model == "Pan"] * 100)

avg_improvement <- R2 %>%
  group_by(Model) %>%
  summarize(Avg_Improvement = mean(Improvement, na.rm = TRUE))


avg_improvemnt_combined <- rbind(avg_improvement, avg_improvement_full_study_5)

kable(avg_improvemnt_combined, caption = "Percentage Improvement in R2 (compared to PAN) Across Paradigms with Study 1 data", digits = 2)
Percentage Improvement in R2 (compared to PAN) Across Paradigms with Study 1 data
Model Avg_Improvement
Pan 0.00
Pan+FACE 30.51
Pan+PFC(reg)+AE 30.39
Pan+PFC(net)+AE 30.68
Pan+PFC(rf)+AE 31.90
Pan+PFC(gbm)+AE 32.67
Pan+FACE (Full Study 5) 43.20
Etasq
  • note that the y-axes for different paradigms are not aligned..
  • We have observed a slight improvement in explaining heterogeneity across all methods, compared to what we initially had. I think this is due to our predictors being more refined: For example, we used longitude and latitude info to swap “minutes since midnight (EST)” to “clock time.
# label paradigm
deviance_explained_df.Default$Paradigm <- "Default"
deviance_explained_df.Default.Study5$Paradigm <- "Default"

deviance_explained_df.Disease$Paradigm <- "Disease"
deviance_explained_df.Disease.Study5$Paradigm <- "Disease"

variance_explained_lm_df.LessMore$Paradigm <- "LessMore"
variance_explained_lm_df.LessMore.Study5$Paradigm <- "LessMore"

variance_explained_lm_df.SunkCost$Paradigm <- "SunkCost"
variance_explained_lm_df.SunkCost.Study5$Paradigm <- "SunkCost"


Model <- c("Pan", "Pan+FACE", "Pan+Pred.FC(reg)+AE", "Pan+Pred.FC(net)+AE", "Pan+Pred.FC(rf)+AE", "Pan+Pred.FC(gbm)+AE")
Model.Study5 <- c("Pan (Full Study5)", "Pan+FACE (Full Study5)")
deviance_explained_df.Default$Model<-Model
deviance_explained_df.Disease$Model<-Model
variance_explained_lm_df.LessMore$Model<-Model
variance_explained_lm_df.SunkCost$Model<-Model

deviance_explained_df.Default.Study5$Model<-Model.Study5
deviance_explained_df.Disease.Study5$Model<-Model.Study5
variance_explained_lm_df.LessMore.Study5$Model<-Model.Study5
variance_explained_lm_df.SunkCost.Study5$Model<-Model.Study5

colnames(deviance_explained_df.Default)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost)[3] <- "Condition:Panel"

colnames(deviance_explained_df.Default.Study5)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore.Study5)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost.Study5)[3] <- "Condition:Panel"


etasq <- rbind(deviance_explained_df.Default, 
               deviance_explained_df.Default.Study5,
               deviance_explained_df.Disease, 
               deviance_explained_df.Disease.Study5,
               variance_explained_lm_df.LessMore, 
               variance_explained_lm_df.LessMore.Study5,
               variance_explained_lm_df.SunkCost,
               variance_explained_lm_df.SunkCost.Study5)

etasq$Model <- factor(etasq$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.FC(reg)+AE",  
                                   "Pan+Pred.FC(net)+AE", 
                                   "Pan+Pred.FC(rf)+AE",
                                   "Pan+Pred.FC(gbm)+AE",
                                   "Pan (Full Study5)", 
                                   "Pan+FACE (Full Study5)"),
                        ordered = TRUE)

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$Panel)  # Extract the significance symbols
  df$Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$Panel))
  return(df)
}

etasq.pan<-split_value_and_sig(etasq)

ggplot(etasq.pan, aes(x = Model, y = Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$`Condition:Panel`)  # Extract the significance symbols
  df$Condition_Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$`Condition:Panel`))
  return(df)
}

etasq.pan_cond<-split_value_and_sig(etasq)

ggplot(etasq.pan_cond, aes(x = Model, y = Condition_Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel*cond") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

# junk data??? really unstable;; perhaps to look at this meaningfully, we need 20% of the data to equally come from each of the 3 panels.
 # table(data.tst_M5a$Panel)
 #  it looks fine though
 #   Lucid    MTurk Prolific 
 #     100      101       91 


etasq_study5 <- etasq.pan_cond %>%
  filter(Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))

etasq_study5 <- etasq_study5 %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan (Full Study5)"] - Condition_Panel) / Condition_Panel[Model == "Pan (Full Study5)"] * 100)


etasq_study5.test <- etasq.pan_cond %>%
  filter(!Model %in% c("Pan (Full Study5)", "Pan+FACE (Full Study5)"))
etasq_study5.test <- etasq_study5.test %>%
  group_by(Paradigm) %>%
  mutate(Reduction = (Condition_Panel[Model == "Pan"] - Condition_Panel) / Condition_Panel[Model == "Pan"] * 100)

etasq.pan_cond<-rbind(etasq_study5,etasq_study5.test)

avg_reduction <- etasq.pan_cond %>%
  group_by(Model) %>%
  summarize(Avg_Reduction = mean(Reduction, na.rm = TRUE))

  
kable(avg_reduction, caption = "Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in study 1", digits = 2)
Average Reduction (%) in Pan_Cond Eta Square Across Paradigms in study 1
Model Avg_Reduction
Pan 0.00
Pan+FACE 56.30
Pan+Pred.FC(reg)+AE 66.14
Pan+Pred.FC(net)+AE 66.34
Pan+Pred.FC(rf)+AE 66.04
Pan+Pred.FC(gbm)+AE 65.81
Pan (Full Study5) 0.00
Pan+FACE (Full Study5) 74.19
# label paradigm
deviance_explained_df.Default$Paradigm <- "Default"
deviance_explained_df.Disease$Paradigm <- "Disease"
variance_explained_lm_df.LessMore$Paradigm <- "LessMore"
variance_explained_lm_df.SunkCost$Paradigm <- "SunkCost"

Model <- c("Pan", "Pan+FACE", "Pan+Pred.FC(reg)+AE", "Pan+Pred.FC(net)+AE", "Pan+Pred.FC(rf)+AE", "Pan+Pred.FC(gbm)+AE")
deviance_explained_df.Default$Model<-Model
deviance_explained_df.Disease$Model<-Model
variance_explained_lm_df.LessMore$Model<-Model
variance_explained_lm_df.SunkCost$Model<-Model

colnames(deviance_explained_df.Default)[3] <- "Condition:Panel"
colnames(deviance_explained_df.Disease)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.LessMore)[3] <- "Condition:Panel"
colnames(variance_explained_lm_df.SunkCost)[3] <- "Condition:Panel"

etasq <- rbind(deviance_explained_df.Default, 
                     deviance_explained_df.Disease, 
                     variance_explained_lm_df.LessMore, 
                     variance_explained_lm_df.SunkCost)

etasq$Model <- factor(etasq$Model, 
                        levels = c("Pan", 
                                   "Pan+FACE",  
                                   "Pan+Pred.FC(reg)+AE",  
                                   "Pan+Pred.FC(net)+AE", 
                                   "Pan+Pred.FC(rf)+AE",
                                   "Pan+Pred.FC(gbm)+AE"),
                        ordered = TRUE)

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$Panel)  # Extract the significance symbols
  df$Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$Panel))
  return(df)
}

etasq.pan<-split_value_and_sig(etasq)

ggplot(etasq.pan, aes(x = Model, y = Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

split_value_and_sig <- function(df) {
  df$Significance <- gsub("[0-9.]", "", df$`Condition:Panel`)  # Extract the significance symbols
  df$Condition_Panel <- as.numeric(sub("^([0-9]+\\.[0-9]+).*", "\\1", df$`Condition:Panel`))
  return(df)
}

etasq.pan_cond<-split_value_and_sig(etasq)

ggplot(etasq.pan_cond, aes(x = Model, y = Condition_Panel, fill = Model)) +
  geom_bar(stat = "identity") +
  facet_wrap(.~Paradigm, ncol = 4, scales = "free") +
  theme_bw() +
  ylab("Etasq_panel*cond") +
  theme(
    legend.position = "none", 
    strip.background = element_rect(fill = "white"), 
    strip.text = element_text(size = 18),  
    axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1, face = "bold")
  ) +
  scale_fill_brewer(palette = "Set2")

F/C*Cond Model Coefficients
  • DVs for Default & Framing are 1/0
  • DVs for LessMore & SunkCost are on the original scale
AlllCoef = as.data.frame(rbind(
cbind(coef(Default.PAN_FACE),confint(Default.PAN_FACE)),
cbind(coef(Default.PAN_Pred_FC.reg_AE),confint(Default.PAN_Pred_FC.reg_AE)),
cbind(coef(Default.PAN_Pred_FC.net_AE),confint(Default.PAN_Pred_FC.net_AE)),
cbind(coef(Default.PAN_Pred_FC.rf_AE),confint(Default.PAN_Pred_FC.rf_AE)),
cbind(coef(Default.PAN_Pred_FC.gbm_AE),confint(Default.PAN_Pred_FC.gbm_AE)),
cbind(coef(Default.PAN_FACE.Study5),confint(Default.PAN_FACE.Study5)),

cbind(coef(Disease.PAN_FACE),confint(Disease.PAN_FACE)),
cbind(coef(Disease.PAN_Pred_FC.reg_AE),confint(Disease.PAN_Pred_FC.reg_AE)),
cbind(coef(Disease.PAN_Pred_FC.net_AE),confint(Disease.PAN_Pred_FC.net_AE)),
cbind(coef(Disease.PAN_Pred_FC.rf_AE),confint(Disease.PAN_Pred_FC.rf_AE)),
cbind(coef(Disease.PAN_Pred_FC.gbm_AE),confint(Disease.PAN_Pred_FC.gbm_AE)),
cbind(coef(Disease.PAN_FACE.Study5),confint(Disease.PAN_FACE.Study5)),

cbind(coef(LessMore.PAN_FACE),confint(LessMore.PAN_FACE)),
cbind(coef(LessMore.PAN_Pred_FC.reg_AE),confint(LessMore.PAN_Pred_FC.reg_AE)),
cbind(coef(LessMore.PAN_Pred_FC.net_AE),confint(LessMore.PAN_Pred_FC.net_AE)),
cbind(coef(LessMore.PAN_Pred_FC.rf_AE),confint(LessMore.PAN_Pred_FC.rf_AE)),
cbind(coef(LessMore.PAN_Pred_FC.gbm_AE),confint(LessMore.PAN_Pred_FC.gbm_AE)),
cbind(coef(LessMore.PAN_FACE.Study5),confint(LessMore.PAN_FACE.Study5)),

cbind(coef(Sunk.PAN_FACE),confint(Sunk.PAN_FACE)),
cbind(coef(Sunk.PAN_Pred_FC.reg_AE),confint(Sunk.PAN_Pred_FC.reg_AE)),
cbind(coef(Sunk.PAN_Pred_FC.net_AE),confint(Sunk.PAN_Pred_FC.net_AE)),
cbind(coef(Sunk.PAN_Pred_FC.rf_AE),confint(Sunk.PAN_Pred_FC.rf_AE)),
cbind(coef(Sunk.PAN_Pred_FC.gbm_AE),confint(Sunk.PAN_Pred_FC.gbm_AE)),
cbind(coef(Sunk.PAN_FACE.Study5),confint(Sunk.PAN_FACE.Study5))))

AlllCoef <- AlllCoef[!grepl("Panel", rownames(AlllCoef)), ]

AlllCoef$type = factor(rep(c("Intercept", " Condition(Treat)","F", "A", "C", "E","F: Condition(Treat)", "A: Condition(Treat)", "C: Condition(Treat)", "E: Condition(Treat)"), 24),levels = c("Intercept", " Condition(Treat)","F", "A", "C", "E","F: Condition(Treat)", "A: Condition(Treat)", "C: Condition(Treat)", "E: Condition(Treat)"))

AlllCoef$Paradigm = factor((rep(c("Default","Framing","Less is Better","Sunk Cost"), each=60)),levels = rev(c("Default","Framing","Less is Better","Sunk Cost")))

AlllCoef$Model = factor(rep(rep(c("FACE","PC(reg) + ACE","PC(net) + ACE","PC(rf) + ACE","PC(gbm) + ACE","FACE (Study 5)"), each = 10),4), levels = rev(c("FACE","PC(reg) + ACE","PC(net) + ACE","PC(rf) + ACE","PC(gbm) + ACE","FACE (Study 5)")))

colnames(AlllCoef)[1:3] = c("Beta","low", "high")

allcoefplot = ggplot(AlllCoef[(AlllCoef$type %in% c("F: Condition(Treat)","C: Condition(Treat)")),], aes( y = Beta, ymin= low, ymax= high, x = Paradigm, fill = Model, color = Model))+
  geom_bar(stat = "identity", position = "dodge",alpha= .25)+
  geom_pointrange(position = position_dodge(width = .8))+
  coord_flip()+
  xlab("")+
  theme_bw()+
  theme(strip.background = element_blank(), text = element_text(size = 12)) +
  facet_grid(~type)

allcoefplot