setwd("/Users/anjalisingh/Downloads/")


# ============================================================
# Pilot Analysis: 2 (Personalised vs Not) x 2 (Drug vs Cos) x 2 (Prev vs Cure)
# DVs: risk, time, price
# ============================================================

# --- Packages ---
# install.packages(c("psych", "dplyr"))   # run once if not installed
library(psych)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# --- 1. Load data ---
# Qualtrics CSVs have 2 extra header rows (description + JSON). Skip them.
raw <- read.csv(
  "S1+Personalisation+x+Schema+x+Goal_May+16,+2026_11.09.csv",
  header = TRUE,
  stringsAsFactors = FALSE,
  na.strings = c("", "NA")
)
raw <- raw[-c(1, 2), ]                 # drop the 2 Qualtrics meta rows

# --- 2. Clean: keep valid participants, attention-check passers, debriefed ---
# Coerce the filter columns to numeric (they come in as character after the skip)
raw$R_AC <- as.numeric(raw$R_AC)
raw$Deb  <- as.numeric(raw$Deb)

dat <- raw %>%
  filter(!is.na(participantId) & participantId != "",
         R_AC == 3,
         Deb  == 1)

cat("N after cleaning:", nrow(dat), "\n")
## N after cleaning: 292
# Coerce the variables we'll analyse to numeric
num_vars <- c("R1", "R2", "R3",
              "MC_1", "MC_2", "MC_3", "MC_4", "MC_5")
dat[num_vars] <- lapply(dat[num_vars], as.numeric)

# --- 3. Cronbach's alpha for the risk scale (R1, R2, R3) ---
alpha_risk <- psych::alpha(dat[, c("R1", "R2", "R3")], check.keys = FALSE)
cat("\n===== Cronbach's alpha: R1, R2, R3 =====\n")
## 
## ===== Cronbach's alpha: R1, R2, R3 =====
print(alpha_risk$total)               # raw_alpha is the key number
##  raw_alpha std.alpha   G6(smc) average_r      S/N         ase       mean
##  0.9214579 0.9218414 0.8903227 0.7972218 11.79449 0.008024167 -0.6381279
##        sd  median_r
##  1.402945 0.8132692
# If alpha is low, inspect alpha_risk$alpha.drop to see which item hurts it

# --- 4. Manipulation checks ---
# Cond1: P vs NP  | Cond2: Drug vs Cos  | Cond3: Prev vs Cure
mc_vars   <- c("MC_1", "MC_2", "MC_3", "MC_4", "MC_5")
cond_vars <- c("Cond1", "Cond2", "Cond3")

# 4a. Cell means (and SDs, n) for each MC item within each condition
cat("\n===== Means by condition cell =====\n")
## 
## ===== Means by condition cell =====
for (cond in cond_vars) {
  cat("\n--- Grouped by", cond, "---\n")
  summary_tbl <- dat %>%
    group_by(.data[[cond]]) %>%
    summarise(across(all_of(mc_vars),
                     list(M  = ~mean(.x, na.rm = TRUE),
                          SD = ~sd(.x,   na.rm = TRUE),
                          n  = ~sum(!is.na(.x))),
                     .names = "{.col}_{.fn}"),
              .groups = "drop")
  print(as.data.frame(summary_tbl))
}
## 
## --- Grouped by Cond1 ---
##   Cond1    MC_1_M  MC_1_SD MC_1_n    MC_2_M  MC_2_SD MC_2_n     MC_3_M  MC_3_SD
## 1    NP -1.061224 1.676691    147 1.2448980 1.478577    147 -1.5306122 1.597541
## 2     P -1.089655 1.763508    145 0.8758621 1.747549    145  0.1172414 1.987822
##   MC_3_n     MC_4_M  MC_4_SD MC_4_n     MC_5_M  MC_5_SD MC_5_n
## 1    147 -0.8571429 1.926740    147 -0.6734694 1.902281    147
## 2    145 -0.5862069 1.959968    145 -0.8551724 1.783331    145
## 
## --- Grouped by Cond2 ---
##   Cond2     MC_1_M  MC_1_SD MC_1_n    MC_2_M  MC_2_SD MC_2_n     MC_3_M
## 1   Cos -1.2262774 1.581105    137 1.1824818 1.471389    137 -0.7445255
## 2  Drug -0.9419355 1.824219    155 0.9548387 1.748253    155 -0.6838710
##    MC_3_SD MC_3_n     MC_4_M  MC_4_SD MC_4_n     MC_5_M  MC_5_SD MC_5_n
## 1 1.959251    137 -0.6131387 1.914527    137 -0.8175182 1.795497    137
## 2 2.002449    155 -0.8193548 1.972114    155 -0.7161290 1.888980    155
## 
## --- Grouped by Cond3 ---
##   Cond3     MC_1_M  MC_1_SD MC_1_n    MC_2_M  MC_2_SD MC_2_n     MC_3_M
## 1  Cure -0.9928058 1.767240    139 1.1654676 1.608839    139 -0.7625899
## 2  Prev -1.1503268 1.673207    153 0.9673203 1.640035    153 -0.6666667
##    MC_3_SD MC_3_n     MC_4_M  MC_4_SD MC_4_n     MC_5_M  MC_5_SD MC_5_n
## 1 1.991223    139 -1.2517986 1.702911    139 -0.6618705 1.843728    139
## 2 1.973509    153 -0.2418301 2.029474    153 -0.8562092 1.843974    153
# 4b. Independent-samples t-tests: each MC item across the 2 cells of each condition
cat("\n===== T-tests: MC items across cells within each condition =====\n")
## 
## ===== T-tests: MC items across cells within each condition =====
ttest_results <- data.frame()

for (cond in cond_vars) {
  for (mc in mc_vars) {
    # Drop missing for this pair
    sub <- dat[!is.na(dat[[mc]]) & !is.na(dat[[cond]]) & dat[[cond]] != "", ]
    lvls <- unique(sub[[cond]])
    if (length(lvls) != 2) next        # skip if not exactly 2 cells
    
    t_out <- t.test(sub[[mc]] ~ sub[[cond]])
    
    m1 <- mean(sub[[mc]][sub[[cond]] == lvls[1]], na.rm = TRUE)
    m2 <- mean(sub[[mc]][sub[[cond]] == lvls[2]], na.rm = TRUE)
    
    ttest_results <- rbind(ttest_results, data.frame(
      Condition = cond,
      MC_item   = mc,
      Level_1   = lvls[1],
      Mean_1    = round(m1, 2),
      Level_2   = lvls[2],
      Mean_2    = round(m2, 2),
      t         = round(unname(t_out$statistic), 3),
      df        = round(unname(t_out$parameter), 2),
      p_value   = round(t_out$p.value, 4),
      sig       = ifelse(t_out$p.value < .05, "*", "")
    ))
  }
}

print(ttest_results, row.names = FALSE)
##  Condition MC_item Level_1 Mean_1 Level_2 Mean_2      t     df p_value sig
##      Cond1    MC_1       P  -1.09      NP  -1.06  0.141 288.81  0.8879    
##      Cond1    MC_2       P   0.88      NP   1.24  1.947 280.98  0.0526    
##      Cond1    MC_3       P   0.12      NP  -1.53 -7.802 275.60  0.0000   *
##      Cond1    MC_4       P  -0.59      NP  -0.86 -1.191 289.72  0.2346    
##      Cond1    MC_5       P  -0.86      NP  -0.67  0.842 289.25  0.4004    
##      Cond2    MC_1    Drug  -0.94     Cos  -1.23 -1.427 289.89  0.1547    
##      Cond2    MC_2    Drug   0.95     Cos   1.18  1.208 289.33  0.2281    
##      Cond2    MC_3    Drug  -0.68     Cos  -0.74 -0.261 287.00  0.7941    
##      Cond2    MC_4    Drug  -0.82     Cos  -0.61  0.906 287.44  0.3659    
##      Cond2    MC_5    Drug  -0.72     Cos  -0.82 -0.470 288.46  0.6388    
##      Cond3    MC_1    Prev  -1.15    Cure  -0.99  0.780 283.56  0.4359    
##      Cond3    MC_2    Prev   0.97    Cure   1.17  1.041 288.28  0.2986    
##      Cond3    MC_3    Prev  -0.67    Cure  -0.76 -0.413 286.82  0.6800    
##      Cond3    MC_4    Prev  -0.24    Cure  -1.25 -4.620 288.22  0.0000   *
##      Cond3    MC_5    Prev  -0.86    Cure  -0.66  0.899 287.34  0.3691
# Optional: save the t-test table
# write.csv


###MC


# ============================================================
# Clean presentation of MC t-test results
# ============================================================
# install.packages(c("dplyr", "tidyr", "gt"))   # run once if needed
library(dplyr)
library(tidyr)
# Recompute SDs alongside means so the table is self-contained
mc_vars   <- c("MC_1", "MC_2", "MC_3", "MC_4", "MC_5")
cond_vars <- c("Cond1", "Cond2", "Cond3")

# Human-readable labels for each condition
cond_labels <- c(
  Cond1 = "Personalisation (P vs NP)",
  Cond2 = "Schema (Drug vs Cos)",
  Cond3 = "Goal (Prev vs Cure)"
)

results <- data.frame()

for (cond in cond_vars) {
  for (mc in mc_vars) {
    sub <- dat[!is.na(dat[[mc]]) & !is.na(dat[[cond]]) & dat[[cond]] != "", ]
    lvls <- unique(sub[[cond]])
    if (length(lvls) != 2) next
    
    g1 <- sub[[mc]][sub[[cond]] == lvls[1]]
    g2 <- sub[[mc]][sub[[cond]] == lvls[2]]
    t_out <- t.test(sub[[mc]] ~ sub[[cond]])
    
    results <- rbind(results, data.frame(
      Condition = cond_labels[[cond]],
      MC_item   = mc,
      Cell_1    = sprintf("%s: %.2f (%.2f), n=%d",
                          lvls[1], mean(g1, na.rm = TRUE),
                          sd(g1, na.rm = TRUE), sum(!is.na(g1))),
      Cell_2    = sprintf("%s: %.2f (%.2f), n=%d",
                          lvls[2], mean(g2, na.rm = TRUE),
                          sd(g2, na.rm = TRUE), sum(!is.na(g2))),
      t         = sprintf("%.2f", unname(t_out$statistic)),
      df        = sprintf("%.1f", unname(t_out$parameter)),
      p         = ifelse(t_out$p.value < .001, "<.001",
                         sprintf("%.3f", t_out$p.value)),
      Sig       = ifelse(t_out$p.value < .001, "***",
                         ifelse(t_out$p.value < .01,  "**",
                                ifelse(t_out$p.value < .05,  "*",
                                       ifelse(t_out$p.value < .10,  ".", ""))))
    ))
  }
}

# Plain console view
print(results, row.names = FALSE)
##                  Condition MC_item                    Cell_1
##  Personalisation (P vs NP)    MC_1    P: -1.09 (1.76), n=145
##  Personalisation (P vs NP)    MC_2     P: 0.88 (1.75), n=145
##  Personalisation (P vs NP)    MC_3     P: 0.12 (1.99), n=145
##  Personalisation (P vs NP)    MC_4    P: -0.59 (1.96), n=145
##  Personalisation (P vs NP)    MC_5    P: -0.86 (1.78), n=145
##       Schema (Drug vs Cos)    MC_1 Drug: -0.94 (1.82), n=155
##       Schema (Drug vs Cos)    MC_2  Drug: 0.95 (1.75), n=155
##       Schema (Drug vs Cos)    MC_3 Drug: -0.68 (2.00), n=155
##       Schema (Drug vs Cos)    MC_4 Drug: -0.82 (1.97), n=155
##       Schema (Drug vs Cos)    MC_5 Drug: -0.72 (1.89), n=155
##        Goal (Prev vs Cure)    MC_1 Prev: -1.15 (1.67), n=153
##        Goal (Prev vs Cure)    MC_2  Prev: 0.97 (1.64), n=153
##        Goal (Prev vs Cure)    MC_3 Prev: -0.67 (1.97), n=153
##        Goal (Prev vs Cure)    MC_4 Prev: -0.24 (2.03), n=153
##        Goal (Prev vs Cure)    MC_5 Prev: -0.86 (1.84), n=153
##                     Cell_2     t    df     p Sig
##    NP: -1.06 (1.68), n=147  0.14 288.8 0.888    
##     NP: 1.24 (1.48), n=147  1.95 281.0 0.053   .
##    NP: -1.53 (1.60), n=147 -7.80 275.6 <.001 ***
##    NP: -0.86 (1.93), n=147 -1.19 289.7 0.235    
##    NP: -0.67 (1.90), n=147  0.84 289.3 0.400    
##   Cos: -1.23 (1.58), n=137 -1.43 289.9 0.155    
##    Cos: 1.18 (1.47), n=137  1.21 289.3 0.228    
##   Cos: -0.74 (1.96), n=137 -0.26 287.0 0.794    
##   Cos: -0.61 (1.91), n=137  0.91 287.4 0.366    
##   Cos: -0.82 (1.80), n=137 -0.47 288.5 0.639    
##  Cure: -0.99 (1.77), n=139  0.78 283.6 0.436    
##   Cure: 1.17 (1.61), n=139  1.04 288.3 0.299    
##  Cure: -0.76 (1.99), n=139 -0.41 286.8 0.680    
##  Cure: -1.25 (1.70), n=139 -4.62 288.2 <.001 ***
##  Cure: -0.66 (1.84), n=139  0.90 287.3 0.369
library(gt)

results %>%
  gt(groupname_col = "Condition") %>%
  tab_header(
    title    = "Manipulation Check Results",
    subtitle = "Means (SD) and independent-samples t-tests within each condition"
  ) %>%
  cols_label(
    MC_item = "MC Item",
    Cell_1  = "Cell 1: M (SD), n",
    Cell_2  = "Cell 2: M (SD), n",
    t       = "t",
    df      = "df",
    p       = "p",
    Sig     = ""
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(columns = Sig, rows = Sig %in% c("*", "**", "***"))
  ) %>%
  tab_source_note(
    md("*Significance:* `*` p < .05, `**` p < .01, `***` p < .001, `.` p < .10")
  ) %>%
  tab_options(
    row_group.background.color = "#f2f2f2",
    heading.background.color   = "#e8e8e8"
  )
Manipulation Check Results
Means (SD) and independent-samples t-tests within each condition
MC Item Cell 1: M (SD), n Cell 2: M (SD), n t df p
Personalisation (P vs NP)
MC_1 P: -1.09 (1.76), n=145 NP: -1.06 (1.68), n=147 0.14 288.8 0.888
MC_2 P: 0.88 (1.75), n=145 NP: 1.24 (1.48), n=147 1.95 281.0 0.053 .
MC_3 P: 0.12 (1.99), n=145 NP: -1.53 (1.60), n=147 -7.80 275.6 <.001 ***
MC_4 P: -0.59 (1.96), n=145 NP: -0.86 (1.93), n=147 -1.19 289.7 0.235
MC_5 P: -0.86 (1.78), n=145 NP: -0.67 (1.90), n=147 0.84 289.3 0.400
Schema (Drug vs Cos)
MC_1 Drug: -0.94 (1.82), n=155 Cos: -1.23 (1.58), n=137 -1.43 289.9 0.155
MC_2 Drug: 0.95 (1.75), n=155 Cos: 1.18 (1.47), n=137 1.21 289.3 0.228
MC_3 Drug: -0.68 (2.00), n=155 Cos: -0.74 (1.96), n=137 -0.26 287.0 0.794
MC_4 Drug: -0.82 (1.97), n=155 Cos: -0.61 (1.91), n=137 0.91 287.4 0.366
MC_5 Drug: -0.72 (1.89), n=155 Cos: -0.82 (1.80), n=137 -0.47 288.5 0.639
Goal (Prev vs Cure)
MC_1 Prev: -1.15 (1.67), n=153 Cure: -0.99 (1.77), n=139 0.78 283.6 0.436
MC_2 Prev: 0.97 (1.64), n=153 Cure: 1.17 (1.61), n=139 1.04 288.3 0.299
MC_3 Prev: -0.67 (1.97), n=153 Cure: -0.76 (1.99), n=139 -0.41 286.8 0.680
MC_4 Prev: -0.24 (2.03), n=153 Cure: -1.25 (1.70), n=139 -4.62 288.2 <.001 ***
MC_5 Prev: -0.86 (1.84), n=153 Cure: -0.66 (1.84), n=139 0.90 287.3 0.369
Significance: * p < .05, ** p < .01, *** p < .001, . p < .10
# To save: gtsave(., "MC_results_table.html")  or  gtsave(., "MC_results_table.png")
# Useful if you want a compact "at a glance" view
wide <- results %>%
  select(Condition, MC_item, t, p, Sig) %>%
  mutate(t_p = paste0("t=", t, ", p=", p, Sig)) %>%
  select(MC_item, Condition, t_p) %>%
  pivot_wider(names_from = Condition, values_from = t_p)

print(wide)
## # A tibble: 5 × 4
##   MC_item Personalisation (P vs N…¹ `Schema (Drug vs Cos)` `Goal (Prev vs Cure)`
##   <chr>   <chr>                     <chr>                  <chr>                
## 1 MC_1    t=0.14, p=0.888           t=-1.43, p=0.155       t=0.78, p=0.436      
## 2 MC_2    t=1.95, p=0.053.          t=1.21, p=0.228        t=1.04, p=0.299      
## 3 MC_3    t=-7.80, p=<.001***       t=-0.26, p=0.794       t=-0.41, p=0.680     
## 4 MC_4    t=-1.19, p=0.235          t=0.91, p=0.366        t=-4.62, p=<.001***  
## 5 MC_5    t=0.84, p=0.400           t=-0.47, p=0.639       t=0.90, p=0.369      
## # ℹ abbreviated name: ¹​`Personalisation (P vs NP)`
##DV

# ============================================================
# Effects of each Condition on each DV — one at a time
# ============================================================
library(dplyr)
library(effsize)
## 
## Attaching package: 'effsize'
## The following object is masked from 'package:psych':
## 
##     cohen.d
# --- Prep ---
dv_raw <- c("FP_1", "T1", "T2_1", "R1", "R2", "R3")
dat[dv_raw] <- lapply(dat[dv_raw], as.numeric)
dat$R_avg <- rowMeans(dat[, c("R1", "R2", "R3")], na.rm = TRUE)

dvs       <- c("FP_1", "T1", "T2_1", "R_avg")
dv_labels <- c(FP_1 = "Price (FP_1)",
               T1   = "Time (T1)",
               T2_1 = "Time (T2_1)",
               R_avg= "Risk (avg R1-R3)")

# --- Function ---
run_condition <- function(cond_var, cond_label) {
  out <- data.frame()
  for (dv in dvs) {
    sub <- dat[!is.na(dat[[dv]]) & !is.na(dat[[cond_var]]) & dat[[cond_var]] != "", ]
    lvls <- unique(sub[[cond_var]])
    if (length(lvls) != 2) next
    
    g1 <- sub[[dv]][sub[[cond_var]] == lvls[1]]
    g2 <- sub[[dv]][sub[[cond_var]] == lvls[2]]
    t_out <- t.test(sub[[dv]] ~ sub[[cond_var]])
    d_out <- effsize::cohen.d(sub[[dv]] ~ sub[[cond_var]], na.rm = TRUE)
    
    out <- rbind(out, data.frame(
      DV       = dv_labels[[dv]],
      Cell_1   = sprintf("%s: %.2f (%.2f), n=%d",
                         lvls[1], mean(g1, na.rm=TRUE), sd(g1, na.rm=TRUE), sum(!is.na(g1))),
      Cell_2   = sprintf("%s: %.2f (%.2f), n=%d",
                         lvls[2], mean(g2, na.rm=TRUE), sd(g2, na.rm=TRUE), sum(!is.na(g2))),
      t        = sprintf("%.2f", unname(t_out$statistic)),
      df       = sprintf("%.1f", unname(t_out$parameter)),
      p        = ifelse(t_out$p.value < .001, "<.001", sprintf("%.3f", t_out$p.value)),
      Cohens_d = sprintf("%.2f", unname(d_out$estimate)),
      Sig      = ifelse(t_out$p.value < .001, "***",
                        ifelse(t_out$p.value < .01,  "**",
                               ifelse(t_out$p.value < .05,  "*",
                                      ifelse(t_out$p.value < .10,  ".", ""))))
    ))
  }
  cat("\n=========================================================\n")
  cat("CONDITION:", cond_label, "\n")
  cat("=========================================================\n")
  print(out, row.names = FALSE)
  return(out)
}

# --- Run each condition explicitly (no invisible return) ---
res_cond1 <- run_condition("Cond1", "Personalisation (NP vs P)")
## 
## =========================================================
## CONDITION: Personalisation (NP vs P) 
## =========================================================
##                DV                  Cell_1                   Cell_2     t    df
##      Price (FP_1) P: 23.75 (16.19), n=145 NP: 25.78 (15.61), n=147  1.09 289.3
##         Time (T1)  P: -0.67 (1.30), n=145  NP: -0.59 (1.29), n=147  0.51 289.9
##       Time (T2_1)   P: 3.16 (1.64), n=145   NP: 2.75 (1.45), n=147 -2.26 284.6
##  Risk (avg R1-R3)  P: -0.72 (1.38), n=145  NP: -0.56 (1.42), n=147  0.96 290.0
##      p Cohens_d Sig
##  0.278     0.13    
##  0.612     0.06    
##  0.025    -0.26   *
##  0.339     0.11
res_cond2 <- run_condition("Cond2", "Schema (Cos vs Drug)")
## 
## =========================================================
## CONDITION: Schema (Cos vs Drug) 
## =========================================================
##                DV                     Cell_1                    Cell_2     t
##      Price (FP_1) Drug: 25.59 (15.76), n=155 Cos: 23.84 (16.08), n=137 -0.94
##         Time (T1)  Drug: -0.70 (1.32), n=155  Cos: -0.55 (1.27), n=137  0.94
##       Time (T2_1)   Drug: 3.04 (1.57), n=155   Cos: 2.85 (1.55), n=137 -1.01
##  Risk (avg R1-R3)  Drug: -0.54 (1.46), n=155  Cos: -0.75 (1.33), n=137 -1.34
##     df     p Cohens_d Sig
##  284.1 0.348    -0.11    
##  287.7 0.350     0.11    
##  286.4 0.314    -0.12    
##  289.7 0.182    -0.16
res_cond3 <- run_condition("Cond3", "Goal (Prev vs Cure)")
## 
## =========================================================
## CONDITION: Goal (Prev vs Cure) 
## =========================================================
##                DV                     Cell_1                     Cell_2     t
##      Price (FP_1) Prev: 24.05 (17.41), n=153 Cure: 25.57 (14.08), n=139  0.82
##         Time (T1)  Prev: -0.75 (1.34), n=153  Cure: -0.50 (1.24), n=139  1.69
##       Time (T2_1)   Prev: 3.09 (1.62), n=153   Cure: 2.80 (1.49), n=139 -1.61
##  Risk (avg R1-R3)  Prev: -0.70 (1.43), n=153  Cure: -0.57 (1.38), n=139  0.81
##     df     p Cohens_d Sig
##  286.3 0.410     0.10    
##  289.9 0.091     0.20   .
##  289.9 0.108    -0.19    
##  288.9 0.418     0.09
##3 WAY INTERACTION


# ============================================================
# 3-way ANOVA: Cond1 x Cond2 x Cond3 on each DV
# DVs: FP_1, T1, T2_1, R_avg
# ============================================================
# install.packages("car")   # run once if needed
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:psych':
## 
##     logit
library(dplyr)

# --- Prep (skip if already done) ---
dv_raw <- c("FP_1", "T1", "T2_1", "R1", "R2", "R3")
dat[dv_raw] <- lapply(dat[dv_raw], as.numeric)
dat$R_avg <- rowMeans(dat[, c("R1", "R2", "R3")], na.rm = TRUE)

# Factorise conditions
dat$Cond1 <- factor(dat$Cond1, levels = c("NP", "P"))
dat$Cond2 <- factor(dat$Cond2, levels = c("Cos", "Drug"))
dat$Cond3 <- factor(dat$Cond3, levels = c("Prev", "Cure"))

dvs       <- c("FP_1", "T1", "T2_1", "R_avg")
dv_labels <- c(FP_1 = "Price (FP_1)",
               T1   = "Time (T1)",
               T2_1 = "Time (T2_1)",
               R_avg= "Risk (avg R1-R3)")

# Type III SS requires sum-to-zero contrasts for meaningful results
options(contrasts = c("contr.sum", "contr.poly"))

# --- Function: run 3-way ANOVA for one DV and print nicely ---
run_anova_3way <- function(dv, dv_label) {
  
  cat("\n=========================================================\n")
  cat("DV:", dv_label, "\n")
  cat("Model:", dv, "~ Cond1 * Cond2 * Cond3\n")
  cat("=========================================================\n")
  
  fml <- as.formula(paste(dv, "~ Cond1 * Cond2 * Cond3"))
  mod <- lm(fml, data = dat)
  aov_tab <- car::Anova(mod, type = "III")
  
  # Add partial eta-squared
  ss <- aov_tab[["Sum Sq"]]
  ss_resid <- ss[rownames(aov_tab) == "Residuals"]
  aov_tab$partial_eta_sq <- round(ss / (ss + ss_resid), 3)
  
  # Add significance stars
  p <- aov_tab[["Pr(>F)"]]
  aov_tab$Sig <- ifelse(is.na(p), "",
                        ifelse(p < .001, "***",
                               ifelse(p < .01,  "**",
                                      ifelse(p < .05,  "*",
                                             ifelse(p < .10,  ".", "")))))
  
  print(aov_tab)
  
  # Pull out the 3-way interaction specifically
  three_way <- aov_tab["Cond1:Cond2:Cond3", ]
  cat("\n--- 3-way interaction (Cond1 x Cond2 x Cond3) ---\n")
  cat(sprintf("F(%d, %d) = %.2f, p = %.3f, partial eta-sq = %.3f %s\n",
              three_way$Df,
              aov_tab["Residuals", "Df"],
              three_way$`F value`,
              three_way$`Pr(>F)`,
              three_way$partial_eta_sq,
              three_way$Sig))
  
  return(aov_tab)
}

# --- Run for each DV ---
anova_FP   <- run_anova_3way("FP_1",  dv_labels[["FP_1"]])
## 
## =========================================================
## DV: Price (FP_1) 
## Model: FP_1 ~ Cond1 * Cond2 * Cond3
## =========================================================
## Anova Table (Type III tests)
## 
## Response: FP_1
##                   Sum Sq  Df  F value  Pr(>F) partial_eta_sq Sig
## (Intercept)       174992   1 686.4423 0.00000          0.707   2
## Cond1                289   1   1.1351 0.28760          0.004   1
## Cond2                232   1   0.9097 0.34101          0.003   1
## Cond3                117   1   0.4608 0.49782          0.002   1
## Cond1:Cond2            0   1   0.0000 0.99525          0.000   1
## Cond1:Cond3          232   1   0.9103 0.34084          0.003   1
## Cond2:Cond3            0   1   0.0003 0.98714          0.000   1
## Cond1:Cond2:Cond3    357   1   1.4019 0.23740          0.005   1
## Residuals          72399 284                           0.500   1
## 
## --- 3-way interaction (Cond1 x Cond2 x Cond3) ---
## F(1, 284) = 1.40, p = 0.237, partial eta-sq = 0.005
anova_T1   <- run_anova_3way("T1",    dv_labels[["T1"]])
## 
## =========================================================
## DV: Time (T1) 
## Model: T1 ~ Cond1 * Cond2 * Cond3
## =========================================================
## Anova Table (Type III tests)
## 
## Response: T1
##                   Sum Sq  Df F value  Pr(>F) partial_eta_sq Sig
## (Intercept)       112.11   1 67.0701 0.00000          0.191   2
## Cond1               0.03   1  0.0205 0.88619          0.000   1
## Cond2               1.49   1  0.8930 0.34548          0.003   1
## Cond3               3.83   1  2.2914 0.13121          0.008   1
## Cond1:Cond2         0.66   1  0.3952 0.53006          0.001   1
## Cond1:Cond3         0.10   1  0.0608 0.80543          0.000   1
## Cond2:Cond3         3.64   1  2.1758 0.14130          0.008   1
## Cond1:Cond2:Cond3   2.28   1  1.3618 0.24421          0.005   1
## Residuals         474.73 284                          0.500   1
## 
## --- 3-way interaction (Cond1 x Cond2 x Cond3) ---
## F(1, 284) = 1.36, p = 0.244, partial eta-sq = 0.005
anova_T2   <- run_anova_3way("T2_1",  dv_labels[["T2_1"]])
## 
## =========================================================
## DV: Time (T2_1) 
## Model: T2_1 ~ Cond1 * Cond2 * Cond3
## =========================================================
## Anova Table (Type III tests)
## 
## Response: T2_1
##                    Sum Sq  Df   F value  Pr(>F) partial_eta_sq Sig
## (Intercept)       2477.62   1 1026.6542 0.00000          0.783   3
## Cond1                9.29   1    3.8501 0.05072          0.013   2
## Cond2                2.45   1    1.0161 0.31430          0.004   1
## Cond3                4.04   1    1.6720 0.19704          0.006   1
## Cond1:Cond2          1.46   1    0.6030 0.43809          0.002   1
## Cond1:Cond3          0.10   1    0.0427 0.83637          0.000   1
## Cond2:Cond3          1.20   1    0.4961 0.48182          0.002   1
## Cond1:Cond2:Cond3    1.72   1    0.7141 0.39878          0.003   1
## Residuals          685.37 284                            0.500   1
## 
## --- 3-way interaction (Cond1 x Cond2 x Cond3) ---
## F(1, 284) = 0.71, p = 0.399, partial eta-sq = 0.003
anova_Ravg <- run_anova_3way("R_avg", dv_labels[["R_avg"]])
## 
## =========================================================
## DV: Risk (avg R1-R3) 
## Model: R_avg ~ Cond1 * Cond2 * Cond3
## =========================================================
## Anova Table (Type III tests)
## 
## Response: R_avg
##                   Sum Sq  Df F value  Pr(>F) partial_eta_sq Sig
## (Intercept)       108.32   1 55.5527 0.00000          0.164   3
## Cond1               1.43   1  0.7325 0.39279          0.003   1
## Cond2               5.36   1  2.7510 0.09830          0.010   2
## Cond3               1.17   1  0.6002 0.43914          0.002   1
## Cond1:Cond2         0.58   1  0.2959 0.58691          0.001   1
## Cond1:Cond3         4.71   1  2.4144 0.12134          0.008   1
## Cond2:Cond3         0.00   1  0.0000 0.99881          0.000   1
## Cond1:Cond2:Cond3   7.00   1  3.5913 0.05910          0.012   2
## Residuals         553.74 284                          0.500   1
## 
## --- 3-way interaction (Cond1 x Cond2 x Cond3) ---
## F(1, 284) = 3.59, p = 0.059, partial eta-sq = 0.012 .
# --- Cell means across all 8 cells (useful if any interaction is significant) ---
cat("\n===== Cell means across all 8 cells of the 2x2x2 design =====\n")
## 
## ===== Cell means across all 8 cells of the 2x2x2 design =====
cell_means <- dat %>%
  group_by(Cond1, Cond2, Cond3) %>%
  summarise(
    n     = n(),
    FP_M  = round(mean(FP_1,  na.rm = TRUE), 2),
    FP_SD = round(sd(FP_1,    na.rm = TRUE), 2),
    T1_M  = round(mean(T1,    na.rm = TRUE), 2),
    T2_M  = round(mean(T2_1,  na.rm = TRUE), 2),
    R_M   = round(mean(R_avg, na.rm = TRUE), 2),
    .groups = "drop"
  )
print(as.data.frame(cell_means))
##   Cond1 Cond2 Cond3  n  FP_M FP_SD  T1_M T2_M   R_M
## 1    NP   Cos  Prev 37 22.19 18.31 -0.49 2.70 -0.73
## 2    NP   Cos  Cure 33 27.55 12.77 -0.70 2.79 -0.55
## 3    NP  Drug  Prev 33 26.27 16.09 -0.94 3.03 -0.23
## 4    NP  Drug  Cure 44 27.09 14.76 -0.34 2.55 -0.67
## 5     P   Cos  Prev 35 24.23 18.26 -0.63 3.11 -0.90
## 6     P   Cos  Cure 32 21.50 13.68 -0.41 2.81 -0.83
## 7     P  Drug  Prev 48 23.81 17.32 -0.92 3.42 -0.85
## 8     P  Drug  Cure 30 25.50 14.66 -0.60 3.17 -0.16
# Restore default contrasts
options(contrasts = c("contr.treatment", "contr.poly"))


# ============================================================
# Plots for 2 x 2 x 2 design: Cond1 x Cond2 x Cond3 on each DV
# ============================================================
# install.packages(c("ggplot2", "dplyr"))   # run once if needed
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(dplyr)

# --- Prep (skip if already done) ---
dv_raw <- c("FP_1", "T1", "T2_1", "R1", "R2", "R3")
dat[dv_raw] <- lapply(dat[dv_raw], as.numeric)
dat$R_avg <- rowMeans(dat[, c("R1", "R2", "R3")], na.rm = TRUE)

dat$Cond1 <- factor(dat$Cond1, levels = c("NP", "P"),
                    labels = c("Not Personalised", "Personalised"))
dat$Cond2 <- factor(dat$Cond2, levels = c("Cos", "Drug"),
                    labels = c("Cosmetic", "Drug"))
dat$Cond3 <- factor(dat$Cond3, levels = c("Prev", "Cure"),
                    labels = c("Prevention", "Cure"))

dvs       <- c("FP_1", "T1", "T2_1", "R_avg")
dv_labels <- c(FP_1 = "Price (FP_1)",
               T1   = "Time (T1)",
               T2_1 = "Time (T2_1)",
               R_avg= "Risk (avg R1-R3)")

# --- Function: 3-way interaction plot for one DV ---
# x-axis: Cond1 (Personalisation)
# colour: Cond2 (Schema: Drug vs Cos)
# facet:  Cond3 (Goal: Prev vs Cure)
plot_3way <- function(dv, dv_label) {
  
  summ <- dat %>%
    filter(!is.na(.data[[dv]])) %>%
    group_by(Cond1, Cond2, Cond3) %>%
    summarise(
      M    = mean(.data[[dv]], na.rm = TRUE),
      SE   = sd(.data[[dv]],   na.rm = TRUE) / sqrt(n()),
      n    = n(),
      .groups = "drop"
    )
  
  ggplot(summ, aes(x = Cond1, y = M, colour = Cond2, group = Cond2)) +
    geom_line(linewidth = 1) +
    geom_point(size = 3) +
    geom_errorbar(aes(ymin = M - SE, ymax = M + SE), width = 0.1, linewidth = 0.7) +
    facet_wrap(~ Cond3) +
    scale_colour_manual(values = c("Cosmetic" = "#E69F00", "Drug" = "#0072B2")) +
    labs(
      title    = paste("3-way interaction:", dv_label),
      subtitle = "Error bars = ±1 SE",
      x        = "Personalisation",
      y        = dv_label,
      colour   = "Schema"
    ) +
    theme_minimal(base_size = 13) +
    theme(
      panel.grid.minor  = element_blank(),
      strip.background  = element_rect(fill = "#f2f2f2", colour = NA),
      strip.text        = element_text(face = "bold"),
      plot.title        = element_text(face = "bold"),
      legend.position   = "bottom"
    )
}

# --- Generate plots for each DV ---
p_FP   <- plot_3way("FP_1",  dv_labels[["FP_1"]])
p_T1   <- plot_3way("T1",    dv_labels[["T1"]])
p_T2   <- plot_3way("T2_1",  dv_labels[["T2_1"]])
p_Ravg <- plot_3way("R_avg", dv_labels[["R_avg"]])

# Display them
print(p_FP)

print(p_T1)

print(p_T2)

print(p_Ravg)