CP analysis

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(bruceR)

## 
## bruceR (v2025.8)
## Broadly Useful Convenient and Efficient R functions
## 
## Packages also loaded:
## ✔ dplyr      ✔ data.table
## ✔ tidyr      ✔ emmeans
## ✔ stringr    ✔ lmerTest
## ✔ forcats    ✔ effectsize
## ✔ ggplot2    ✔ performance
## ✔ cowplot    ✔ interactions
## 
## Main functions of `bruceR`:
## cc()             Describe()  TTEST()
## add()            Freq()      MANOVA()
## .mean()          Corr()      EMMEANS()
## set.wd()         Alpha()     PROCESS()
## import()         EFA()       model_summary()
## print_table()    CFA()       lavaan_summary()
## 
## For full functionality, please install all dependencies:
## install.packages("bruceR", dep=TRUE)
## 
## Online documentation:
## https://psychbruce.github.io/bruceR
## 
## To use this package in publications, please cite:
## Bao, H. W. S. (2021). bruceR: Broadly useful convenient and efficient R functions (Version 2025.8) [Computer software]. https://doi.org/10.32614/CRAN.package.bruceR
## 
## 
## These packages are dependencies but not yet installed:
## - pacman, openxlsx, ggtext, see, lmtest, vars, phia, MuMIn, GGally
## 
## ***** Install All Dependencies *****
## install.packages("bruceR", dep=TRUE)

#清空环境变量
# rm(list = ls())
set.wd()

## ✔ Set working directory to "C:/Users/quent/Desktop/Desktop/MDD 文章文件/CP conference"

source('summarySE.R')

library(afex)

## ************
## Welcome to afex. For support visit: http://afex.singmann.science/
## - Functions for ANOVAs: aov_car(), aov_ez(), and aov_4()
## - Methods for calculating p-values with mixed(): 'S', 'KR', 'LRT', and 'PB'
## - 'afex_aov' and 'mixed' objects can be passed to emmeans() for follow-up tests
## - Get and set global package options with: afex_options()
## - Set sum-to-zero contrasts globally: set_sum_contrasts()
## - For example analyses see: browseVignettes("afex")
## ************
## 
## Attaching package: 'afex'
## 
## The following object is masked from 'package:lme4':
## 
##     lmer

library(emmeans) # for post hoc test
library(ggthemes)

## 
## Attaching package: 'ggthemes'
## 
## The following object is masked from 'package:cowplot':
## 
##     theme_map

library(cowplot)
library(ggpubr)

## 
## Attaching package: 'ggpubr'
## 
## The following object is masked from 'package:cowplot':
## 
##     get_legend

library(rstatix)

## 
## Attaching package: 'rstatix'
## 
## The following objects are masked from 'package:effectsize':
## 
##     cohens_d, eta_squared
## 
## The following object is masked from 'package:stats':
## 
##     filter

library(effsize)
library(data.table)  
library(BayesFactor)

## Loading required package: coda
## ************
## Welcome to BayesFactor 0.9.12-4.7. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).
## 
## Type BFManual() to open the manual.
## ************

#===========筛选被试===========
# MD 005, MD 069， 是双相，排除
# MD 030, MD 019，MD 083 大脑信号异常，排除
# HC 109是废问卷
# HC 087，HC 072，HC 065大脑异常，排除
# HC 057，HC 002，缓解期抑郁，排除
# HC 024 可疑焦虑，排除
# HC 093，可疑PTSD，排除
# HC 116，高危排除，HC 115，有亲属是精神疾病
# MD 079 HAMD评分较低排除
# MD069,MD070,MD071,MD072,MD075,MD077,MD082,MD086随访转燥，排除

# my_data <- read_csv('edfData_DotFace_Allsubject_in_trial_fix.csv') %>% filter(subnum != 'HC029_')
excluded_subnums <- c('MD005_', 'MD069_', 'MD030_', 'MD019_', 'MD083_', 'HC109_', 
                      'HC087_', 'HC072_', 'HC065_', 'HC057_', 'HC002_', 'HC024_', 
                      'HC029_', 'HC093_', 'HC116_', 'HC115_', 'MD079_', 'MD070_',
                      'MD071_', 'MD072_', 'MD075_', 'MD077_', 'MD082_', 'MD086_')

my_data <- read_csv('edfData_DotFace_Allsubject_in_trial_fix.csv') %>% 
  filter(!subnum %in% excluded_subnums)

## Rows: 136014 Columns: 29
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (6): subnum, sub_type, expression, L_stim, R_stim, emo_stim
## dbl (23): index, startT, endT, duration, avgX, avgY, avgPupil, AOI, AOI_gaze...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

my_data <- my_data %>% mutate(trial_type=case_when(
  trial_type == "1" ~ "guided_emo",
  trial_type == "2" ~ "guided_neu",
  trial_type == "3" ~ "centered",
  TRUE ~ NA_character_)) %>% 
  mutate(dwell_emo=case_when( 
    dwell_emo == "1" ~ "guided_emo",
    dwell_emo == "2" ~ "guided_neu",
    dwell_emo == "31" ~ "centered_emo",
    dwell_emo == "32" ~ "centered_neu")) %>% 
  filter(duration >= 60) %>% 
  group_by(subnum,Ntrial) %>% 
  mutate(continue_gaze=rleid(AOI_emo)) %>% 
  ungroup()
  

# 看看有效引导的百分比
my_data_1ratio <- my_data %>% group_by(subnum, sub_type, trial_type) %>%
  select(c(subnum, sub_type, trial_type, guided_trial)) %>%
  replace_na(list(guided_trial = 0)) %>%
  summarise(across(everything(), ~mean(. == 1))) %>% ungroup()

## `summarise()` has grouped output by 'subnum', 'sub_type'. You can override
## using the `.groups` argument.

my_data <- my_data %>% filter(subnum != 'MD034_') # filtered because the guided trial rate is lower than 70%

subj_info <- read.csv('./AQ_SPIN/final_data.csv')

# add a '_' suffix to every value in the sub column in subj_info
subj_info$sub <- paste0(subj_info$sub, "_")

raw_data <- read_csv('edfData_DotFace_Allsubject_in_trial_fix.csv')

## Rows: 136014 Columns: 29
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (6): subnum, sub_type, expression, L_stim, R_stim, emo_stim
## dbl (23): index, startT, endT, duration, avgX, avgY, avgPupil, AOI, AOI_gaze...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

sub_info_summary_more <- raw_data |> 
  left_join(subj_info, by = c("subnum" = "sub")) |> 
  # keep unique subnum values
  distinct(subnum, .keep_all = TRUE) |> 
  # count the number of values by each group by 'sub_type' column
  group_by(sub_type) |> 
  summarise(count = n(), 
  mean_age = mean(age, na.rm = TRUE),
  gender_count = list(table(gender))
  ) |> 
  ungroup()

knitr::kable(sub_info_summary_more)

sub_type	count	mean_age	gender_count
HC	51	26.08511	15, 32
MDD	76	25.97222	18, 54

sub_info_summary <- my_data |> 
  left_join(subj_info, by = c("subnum" = "sub")) |> 
  # keep unique subnum values
  distinct(subnum, .keep_all = TRUE) |> 
  # count the number of values by each group by 'sub_type' column
  group_by(sub_type) |> 
  summarise(count = n(), 
  mean_age = mean(age, na.rm = TRUE),
  sd = sd(age, na.rm = TRUE),
  gender_count = list(table(gender))
  ) |> 
  ungroup()
  
knitr::kable(sub_info_summary)

sub_type	count	mean_age	sd	gender_count
HC	47	26.08511	4.127478	15, 32
MDD	63	25.96825	6.433201	15, 48

# 首注视偏好
# 每个被试每种表情的注视偏好比例
data4 <- my_data %>% 
  filter(trial_type == 'centered', AOI_emo==1|AOI_emo==2) %>%
  group_by(subnum, Ntrial, sub_type, expression, AOI_emo) %>% 
  mutate(order=1:n()) %>% 
  ungroup() %>% 
  filter(order==1) %>% 
  group_by(subnum, Ntrial, sub_type, expression) %>% 
  mutate(order=1:n(),AOI_emo=factor(AOI_emo,levels=c(1,2),
                                    labels=c('emo','neu'))) %>% 
  ungroup() %>% 
  filter(order==1) %>% 
  group_by(subnum, sub_type, expression, AOI_emo) %>% 
  dplyr::summarize(num=n()) %>% 
  ungroup() %>% 
  spread(key = AOI_emo, value = num) %>% 
  mutate(tot=emo+neu,rate=emo/tot)

## `summarise()` has grouped output by 'subnum', 'sub_type', 'expression'. You can
## override using the `.groups` argument.

# Test if the first gaze rate within each bs and ws factor is significantly higher than 0.5
t_test_results <- data4 %>%
  group_by(sub_type, expression) %>%
  summarise(t_test = list(t.test(rate, mu = 0.5, alternative = "greater")),
    mean_rate = mean(rate, na.rm = TRUE),# Mean of 'rate'
    sd_rate = sd(rate, na.rm = TRUE),   # Standard deviation
    n = n(),                            # Sample size
    .groups = "drop"
            
  ) %>%
  
  mutate(p_value = map_dbl(t_test, "p.value"),
         statistic = map_dbl(t_test, "statistic"),
         df = map_dbl(t_test, "parameter"),  # Extract degrees of freedom
         cohens_d = (mean_rate - 0.5) / sd_rate,# Manual Cohen's d
         conf_int = map(t_test, "conf.int"))
         # conf_int_lower = map_dbl(t_test, ~ .x$conf.int[1]),  # Lower bound of CI
         # conf_int_upper = map_dbl(t_test, ~ .x$conf.int[2]))   # Upper bound of CI

# Apply Bonferroni correction
num_tests <- nrow(t_test_results)
t_test_results <- t_test_results %>%
  mutate(p_value_bonferroni = p.adjust(p_value, method = "bonferroni", n = num_tests))
  
# Print the t-test results
print(t_test_results)

## # A tibble: 6 × 12
##   sub_type expression t_test  mean_rate sd_rate     n    p_value statistic    df
##   <chr>    <chr>      <list>      <dbl>   <dbl> <int>      <dbl>     <dbl> <dbl>
## 1 HC       F          <htest>     0.610   0.123    47    1.07e-7      6.09    46
## 2 HC       H          <htest>     0.518   0.110    47    1.28e-1      1.15    46
## 3 HC       S          <htest>     0.548   0.132    47    8.25e-3      2.49    46
## 4 MDD      F          <htest>     0.594   0.114    63    6.60e-9      6.54    62
## 5 MDD      H          <htest>     0.543   0.125    63    4.35e-3      2.71    62
## 6 MDD      S          <htest>     0.581   0.109    63    8.95e-8      5.88    62
## # ℹ 3 more variables: cohens_d <dbl>, conf_int <list>, p_value_bonferroni <dbl>

# Load necessary libraries
library(ggplot2)
library(dplyr)

# Prepare the data for plotting
plot_data <- t_test_results %>%
  mutate(
    # Calculate standard error
    se = sd_rate / sqrt(n)  # Standard error = standard deviation / sqrt(sample size)
  ) %>%
  rowwise() %>%
  mutate(
    sig_annot = case_when(
      p_value_bonferroni < 0.001 ~ "***",
      p_value_bonferroni < 0.01 ~ "**",
      p_value_bonferroni < 0.05 ~ "*",
      TRUE ~ ""
    )
  ) %>%
  mutate(expression_num = case_when(
    expression == "F" ~ 1,
    expression == "H" ~ 2,
    expression == "S" ~ 3,
    TRUE ~ NA_real_
  )) %>%
  ungroup()

plot_data <- plot_data %>%
  mutate(sub_type = factor(sub_type, levels = c("HC", "MDD")))

# Create the plot
p1 <- ggplot(plot_data, aes(x = expression_num, y = mean_rate, group = sub_type, color = sub_type)) +
  geom_point(position = position_dodge(width = 0.2), size = 3) +
  geom_errorbar(aes(ymin = mean_rate - se, ymax = mean_rate + se), 
                position = position_dodge(width = 0.2), width = 0.2) +
  scale_color_manual(values = c("HC" = "#f9d580", "MDD" = "#99b9e9")) +
  geom_text(aes(label = sig_annot, y = mean_rate + se),  # Position text above error bars
            color = 'black', vjust = 0,  # Adjust vjust for fine-tuning
            position = position_dodge(width = 0.2)) +
  scale_x_continuous(breaks = c(1, 2, 3), labels = c("Fear", "Happy", "Sad")) +  # Set custom x-axis labels
  labs(
    x = "Emotion",
    y = "Mean First Gaze Preference",
    title = "First Gaze Preference by Emotion and Group",
    color = "Group"  # Add legend title for sub_type
  ) +
  theme_bw() +
  theme(
    text = element_text(size = 22),                # all text
    axis.title = element_text(face = "bold", size = 22),
    axis.text = element_text(size = 22),
    strip.text = element_text(face = "bold", size = 22),
    legend.text = element_text(size = 22),
    legend.title = element_text(size = 22),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 23),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.background = element_rect(fill = "white"),
    strip.background = element_rect(fill = "lightgray", color = "black"),
    legend.position = "bottom"
  ) + 
  scale_y_continuous(limits = c(min(plot_data$mean_rate) - 0.03, max(plot_data$mean_rate) + 0.1))

# p1

# ggsave("./result pics/first_gaze_rate.png", plot = p1, width = 8, height = 5, dpi = 300)

# Perform ANOVA for first fixation preference
anova_first_prefer <- aov_ez(
  data = data4,
  id = 'subnum',
  dv = 'rate',
  between = 'sub_type',
  within = 'expression',
  observed = 'sub_type',
  effect_size = "pes" # partial eta-squared
)

## Converting to factor: sub_type

## Contrasts set to contr.sum for the following variables: sub_type

anova_first_prefer

## Anova Table (Type 3 tests)
## 
## Response: rate
##                Effect           df  MSE         F  ges p.value
## 1            sub_type       1, 108 0.02      1.01 .003    .318
## 2          expression 1.98, 214.18 0.01 10.19 *** .056   <.001
## 3 sub_type:expression 1.98, 214.18 0.01      1.33 .008    .267
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
## 
## Sphericity correction method: GG

# Get ANOVA summary
anova_summary <- summary(anova_first_prefer)

## Warning in summary.Anova.mlm(object$Anova, multivariate = FALSE): HF eps > 1
## treated as 1

print(anova_summary)

## 
## Univariate Type III Repeated-Measures ANOVA Assuming Sphericity
## 
##                      Sum Sq num Df Error SS den Df   F value    Pr(>F)    
## (Intercept)         103.309      1   1.6557    108 6738.6135 < 2.2e-16 ***
## sub_type              0.015      1   1.6557    108    1.0056    0.3182    
## expression            0.274      2   2.9083    216   10.1886  5.91e-05 ***
## sub_type:expression   0.036      2   2.9083    216    1.3301    0.2666    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Mauchly Tests for Sphericity
## 
##                     Test statistic p-value
## expression                 0.99148 0.63283
## sub_type:expression        0.99148 0.63283
## 
## 
## Greenhouse-Geisser and Huynh-Feldt Corrections
##  for Departure from Sphericity
## 
##                      GG eps Pr(>F[GG])    
## expression          0.99156  6.262e-05 ***
## sub_type:expression 0.99156     0.2666    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                       HF eps   Pr(>F[HF])
## expression          1.010024 5.910466e-05
## sub_type:expression 1.010024 2.666188e-01

# Get estimated marginal means
emm_first_prefer <- emmeans(anova_first_prefer, ~ expression | sub_type)

# Perform pairwise comparisons with adjustment for multiple comparisons
posthoc_results <- pairs(emm_first_prefer, adjust = "tukey")

# Convert to dataframe for plotting
posthoc_df <- as.data.frame(posthoc_results)

# Create significance labels (you can adjust the p-value thresholds as needed)
posthoc_df$sig_label <- ifelse(posthoc_df$p.value < 0.001, "***",
                              ifelse(posthoc_df$p.value < 0.01, "**",
                                     ifelse(posthoc_df$p.value < 0.05, "*", "")))

library(ggplot2)
library(ggsignif)

p1

posthoc_df

## sub_type = HC:
##  contrast    estimate         SE  df t.ratio p.value sig_label
##  F - H     0.09120861 0.02280521 108   3.999  0.0003 ***      
##  F - S     0.06184413 0.02447170 108   2.527  0.0343 *        
##  H - S    -0.02936449 0.02449287 108  -1.199  0.4564          
## 
## sub_type = MDD:
##  contrast    estimate         SE  df t.ratio p.value sig_label
##  F - H     0.05149366 0.01969758 108   2.614  0.0274 *        
##  F - S     0.01347530 0.02113697 108   0.638  0.7998          
##  H - S    -0.03801835 0.02115526 108  -1.797  0.1754          
## 
## P value adjustment: tukey method for comparing a family of 3 estimates

# Create bracket coordinates data frame
bracket_data <- data.frame(
  start = c(0.95, 0.95, 1.05),  # x positions for HC comparisons (dodged left)
  end = c(1.95, 2.95, 2.05),    # x positions for MDD comparisons (dodged right)
  y = c(0.65, 0.67, 0.69),  # y positions for brackets (adjust based on your data)
  label = c("***", " * ", "*") # significance labels
)

# Add significance labels using geom_signif
p3 <- p1 +
  geom_signif(
    data = bracket_data,
    aes(xmin = start, xmax = end, 
        annotations = label, y_position = y),
    manual = TRUE,
    tip_length = 0.01,
    textsize = 4,
    vjust = -0.2,
    inherit.aes = FALSE
  )

## Warning in geom_signif(data = bracket_data, aes(xmin = start, xmax = end, :
## Ignoring unknown aesthetics: xmin, xmax, annotations, and y_position

# Display the plot
print(p3)

ggsave("./result pics/first_gaze_rate_v2.png", plot = p3, width = 8, height = 5, dpi = 300)

# Calculate means and standard errors
anova_plot_data <- data4 %>%
  group_by(sub_type, expression) %>%
  summarise(
    mean_rate = mean(rate, na.rm = TRUE),
    se = sd(rate, na.rm = TRUE) / sqrt(n()),
    .groups = "drop"
  ) %>%
  mutate(
    expression_num = case_when(
      expression == "F" ~ 1,
      expression == "H" ~ 2,
      expression == "S" ~ 3,
      TRUE ~ NA_real_
    )
  )

# Load required packages
library(ggplot2)
library(emmeans)

# Calculate estimated marginal means (swap expression and sub_type)
emm <- emmeans(anova_first_prefer, ~ sub_type * expression)

# Convert to dataframe and plot
interaction_plot <- ggplot(as.data.frame(emm), 
                          aes(x = sub_type, y = emmean, 
                              color = expression, group = expression)) +
  geom_line(size = 1) +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = emmean - SE, ymax = emmean + SE), 
                width = 0.1, size = 1) +
  labs(x = "Subject Type (Group)", 
       y = "Estimated Marginal Mean of Rate", 
       color = "Expression") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "top")

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Display the plot
print(interaction_plot)

# Create the plot
p2 <- ggplot(anova_plot_data, aes(x = expression_num, y = mean_rate, group = sub_type, color = sub_type)) +
  geom_point(position = position_dodge(width = 0.2), size = 3) +
  geom_errorbar(aes(ymin = mean_rate - se, ymax = mean_rate + se), 
                position = position_dodge(width = 0.2), width = 0.2) +
  # geom_text(aes(label = sig_annot), color = 'black', vjust = -1, position = position_dodge(width = 0.2)) + 
  scale_x_continuous(breaks = c(1, 2, 3), labels = c("F", "H", "S")) +  # Set custom x-axis labels
  labs(
    x = "Expression",
    y = "Mean Rate",
    title = "ANOVA Results: First Gaze Rate by Expression and Group",
    color = "Group"  # Add legend title for sub_type
  ) +
  theme_bw() +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.background = element_rect(fill = "white"),
    axis.line = element_blank(),
    legend.position = "bottom",
    plot.title = element_text(hjust = 0.5)  # Center-align the title
  ) +
  scale_color_manual(values = c("HC" = "#f9d580", "MDD" = "#99b9e9"))  # Set custom colors for groups

# Print the plot
print(p2)

# Load required libraries
library(emmeans)
library(ggplot2)

# Calculate estimated marginal means (EMMs) with 95% CIs
emm <- emmeans(anova_first_prefer, ~ expression * sub_type)
emm_df <- as.data.frame(emm)

# Create interaction plot
ggplot(emm_df, aes(x = expression, y = emmean, fill = sub_type)) +
  geom_bar(stat = "identity", position = position_dodge(), width = 0.7) +
  geom_errorbar(aes(ymin = emmean - SE, ymax = emmean + SE),
                position = position_dodge(.7), width = 0.2) +
  labs(
    title = "ANOVA Results: First Fixation Preference",
    x = "Expression",
    y = "Mean Fixation Rate",
    fill = "Subject Type"
  ) +
  theme_minimal() +
  theme(
    legend.position = "bottom",
    plot.title = element_text(hjust = 0.5, face = "bold"),
    axis.text = element_text(size = 12),
    axis.title = element_text(size = 14, face = "bold")
  )

# Perform post hoc test on 'expression' variable
emmeans_result <- emmeans(anova_first_prefer, pairwise ~ expression, adjust = "bonferroni")

# Print the post hoc test results
print(emmeans_result)

## $emmeans
##  expression emmean     SE  df lower.CL upper.CL
##  F           0.602 0.0114 108    0.579    0.624
##  H           0.531 0.0114 108    0.508    0.553
##  S           0.564 0.0115 108    0.541    0.587
## 
## Results are averaged over the levels of: sub_type 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast estimate     SE  df t.ratio p.value
##  F - H      0.0714 0.0151 108   4.736  <.0001
##  F - S      0.0377 0.0162 108   2.329  0.0651
##  H - S     -0.0337 0.0162 108  -2.082  0.1191
## 
## Results are averaged over the levels of: sub_type 
## P value adjustment: bonferroni method for 3 tests

# Prepare the data
first_prefer_data <- data4

# Ensure subnum is a factor
first_prefer_data$subnum <- as.factor(first_prefer_data$subnum)
first_prefer_data$sub_type <- as.factor(first_prefer_data$sub_type)
first_prefer_data$expression <- as.factor(first_prefer_data$expression)

# Perform Bayesian ANOVA
bf_anova <- anovaBF(rate ~ sub_type * expression + subnum, data = first_prefer_data, whichRandom = "subnum")

## Warning: data coerced from tibble to data frame

# Print the results
print(bf_anova)

## Bayes factor analysis
## --------------
## [1] sub_type + subnum                                    : 0.2080014 ±0.96%
## [2] expression + subnum                                  : 357.8966  ±1.02%
## [3] sub_type + expression + subnum                       : 186.4455  ±58.04%
## [4] sub_type + expression + sub_type:expression + subnum : 15.73633  ±2.49%
## 
## Against denominator:
##   rate ~ subnum 
## ---
## Bayes factor type: BFlinearModel, JZS

# Extract posterior samples from the full model (index 4)
# The full model includes sub_type, expression, their interaction, and subnum.
posterior_samples <- posterior(bf_anova, iterations = 1000, index = 4)

# # Inspect the column names in the posterior_samples object
# print(colnames(posterior_samples))

# Test specific hypothesis: HC in sub_type is different from MDD in sub_type
# Calculate the difference between HC and MDD in sub_type
diff_HC_MDD <- posterior_samples[, "sub_type-HC"] - posterior_samples[, "sub_type-MDD"]

# Calculate the Bayes factor for the hypothesis
bf_hypothesis <- mean(diff_HC_MDD > 0) / mean(diff_HC_MDD < 0)

# Print the hypothesis test results
print(bf_hypothesis)

## [1] 0.2360939

# 引导中间 潜伏期
outlier.IQR <- function(x, multiple = 1.5) {
  q <- quantile(x, na.rm = TRUE) #四分位间距3倍间距以外的认为是离群值
  IQR <- q[4] - q[2]
  x1 <- q[2] - multiple * IQR 
  x2 <- q[4] + multiple * IQR
  return(c(x1, x2))
}


data3 <- my_data %>% 
  filter(trial_type == 'centered', AOI_emo==1|AOI_emo==2) %>%
  group_by(subnum, Ntrial, sub_type, expression, AOI_emo) %>% 
  mutate(order=1:n()) %>% 
  ungroup() %>% 
  filter(order==1) %>% 
  mutate(latency=startT-Tstart,AOI_emo=factor(AOI_emo)) %>% 
  # filter(latency<=2500, latency>=120) %>%  # need papers to support
  group_by(subnum, Ntrial, sub_type, trial_type, expression) %>%
  summarise(
    emo_latency = latency[AOI_emo == 1],
    neu_latency = latency[AOI_emo == 2],
    emo_neu_latency = emo_latency - neu_latency
  ) %>%
  ungroup() %>% 
  # kick outlier within subject
  group_by(subnum) %>%
  mutate(
    IQR_L = outlier.IQR(emo_neu_latency)[1],
    IQR_H = outlier.IQR(emo_neu_latency)[2],
    valid1 = if_else(!is.na(IQR_L) & !is.na(IQR_H) & emo_neu_latency >= IQR_L & emo_neu_latency <= IQR_H, 1, 0)
  ) %>%
  filter(valid1 == 1) %>%
  ungroup()

## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## `summarise()` has grouped output by 'subnum', 'Ntrial', 'sub_type',
## 'trial_type', 'expression'. You can override using the `.groups` argument.

sumrepdat3E_sub <- summarySE(data3, measurevar = "emo_neu_latency", 
                   groupvars=c('subnum',"sub_type","trial_type","expression"))

## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced

# Test if the emo - neu first fixation latency within each bs and ws factor is significantly higher than 0.5
t_test_results <- data3 %>%
  group_by(sub_type, expression) %>%
  summarise(t_test = list(t.test(emo_neu_latency, mu = 0, alternative = "two.sided"))) %>%
  mutate(p_value = map_dbl(t_test, "p.value"),
         statistic = map_dbl(t_test, "statistic"),
         conf_int = map(t_test, "conf.int"))

## `summarise()` has grouped output by 'sub_type'. You can override using the
## `.groups` argument.

# Apply Bonferroni correction
num_tests <- nrow(t_test_results)
t_test_results <- t_test_results %>%
  mutate(p_value_bonferroni = p.adjust(p_value, method = "bonferroni", n = num_tests))

# Print the t-test results
print(t_test_results)

## # A tibble: 6 × 7
## # Groups:   sub_type [2]
##   sub_type expression t_test   p_value statistic conf_int  p_value_bonferroni
##   <chr>    <chr>      <list>     <dbl>     <dbl> <list>                 <dbl>
## 1 HC       F          <htest> 1.39e-12     -7.23 <dbl [2]>           8.35e-12
## 2 HC       H          <htest> 3.81e- 2     -2.08 <dbl [2]>           2.28e- 1
## 3 HC       S          <htest> 5.02e- 4     -3.50 <dbl [2]>           3.01e- 3
## 4 MDD      F          <htest> 5.22e-12     -7.02 <dbl [2]>           3.13e-11
## 5 MDD      H          <htest> 1.61e- 3     -3.17 <dbl [2]>           9.68e- 3
## 6 MDD      S          <htest> 2.60e- 4     -3.67 <dbl [2]>           1.56e- 3

# Perform ANOVA for first fixation latency
anova_fix_latency <- aov_ez(
  data = sumrepdat3E_sub,
  id = 'subnum',
  dv = 'emo_neu_latency_mean',
  between = 'sub_type',
  within = 'expression',
  observed = 'sub_type'
)

## Converting to factor: sub_type

## Warning: Missing values for 2 ID(s), which were removed before analysis:
## MD029_, MD039_
## Below the first few rows (in wide format) of the removed cases with missing data.
##      subnum sub_type   F         H    S
## # 55 MD029_      MDD 220        NA   NA
## # 58 MD039_      MDD  NA -370.6667 -492

## Contrasts set to contr.sum for the following variables: sub_type

# Summarize the ANOVA results
summary(anova_fix_latency)

## Warning in summary.Anova.mlm(object$Anova, multivariate = FALSE): HF eps > 1
## treated as 1

## 
## Univariate Type III Repeated-Measures ANOVA Assuming Sphericity
## 
##                      Sum Sq num Df Error SS den Df F value    Pr(>F)    
## (Intercept)         4710491      1  5387676    106 92.6767  3.88e-16 ***
## sub_type                128      1  5387676    106  0.0025 0.9600225    
## expression           593069      2  8806917    212  7.1382 0.0009997 ***
## sub_type:expression   16555      2  8806917    212  0.1993 0.8194925    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Mauchly Tests for Sphericity
## 
##                     Test statistic p-value
## expression                 0.98694 0.50141
## sub_type:expression        0.98694 0.50141
## 
## 
## Greenhouse-Geisser and Huynh-Feldt Corrections
##  for Departure from Sphericity
## 
##                      GG eps Pr(>F[GG])   
## expression          0.98711   0.001057 **
## sub_type:expression 0.98711   0.816722   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                       HF eps   Pr(>F[HF])
## expression          1.005715 0.0009996549
## sub_type:expression 1.005715 0.8194924540

# Perform post hoc test on 'sub_type' variable
emmeans_latency_result <- emmeans(anova_fix_latency, pairwise ~ expression, adjust = "bonferroni")

# Print the post hoc test results
print(emmeans_latency_result)

## $emmeans
##  expression emmean   SE  df lower.CL upper.CL
##  F          -180.2 22.9 106     -226   -134.8
##  H           -77.6 18.9 106     -115    -40.0
##  S          -107.0 19.4 106     -146    -68.5
## 
## Results are averaged over the levels of: sub_type 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast estimate   SE  df t.ratio p.value
##  F - H      -102.6 27.1 106  -3.791  0.0007
##  F - S       -73.2 29.5 106  -2.480  0.0441
##  H - S        29.4 27.3 106   1.078  0.8501
## 
## Results are averaged over the levels of: sub_type 
## P value adjustment: bonferroni method for 3 tests

#####
# Perform Bayesian ANOVA for emo_neu_latency
latency_data <- data3

# Ensure subnum is a factor
latency_data$subnum <- as.factor(latency_data$subnum)
latency_data$sub_type <- as.factor(latency_data$sub_type)
latency_data$expression <- as.factor(latency_data$expression)

# Perform Bayesian ANOVA
bf_anova_latency <- anovaBF(emo_neu_latency ~ sub_type * expression + subnum, data = latency_data, whichRandom = "subnum")

## Warning: data coerced from tibble to data frame

# Print the results
print(bf_anova_latency)

## Bayes factor analysis
## --------------
## [1] sub_type + subnum                                    : 0.05421859 ±1.15%
## [2] expression + subnum                                  : 60.00924   ±1.11%
## [3] sub_type + expression + subnum                       : 3.326937   ±2.03%
## [4] sub_type + expression + sub_type:expression + subnum : 0.02162887 ±1.44%
## 
## Against denominator:
##   emo_neu_latency ~ subnum 
## ---
## Bayes factor type: BFlinearModel, JZS

# Extract posterior samples from the full model (index 4)
# The full model includes sub_type, expression, their interaction, and subnum.
posterior_samples_latency <- posterior(bf_anova_latency, iterations = 1000, index = 4)

# Test specific hypothesis: HC in sub_type is different from MDD in sub_type
# Calculate the difference between HC and MDD in sub_type
diff_HC_MDD <- posterior_samples_latency[, "sub_type-HC"] - posterior_samples_latency[, "sub_type-MDD"]

# Calculate the Bayes factor for the hypothesis
bf_hypothesis <- mean(diff_HC_MDD > 0) / mean(diff_HC_MDD < 0)

# Print the hypothesis test results
print(bf_hypothesis) # 3.016, which suppport the hypothesis that HC in sub_type is different from MDD in sub_type

## [1] 1.544529

# 引导条件下首滞留
outlier.IQR <- function(x, multiple = 1.5) {
  q <- quantile(x, na.rm = TRUE) #四分位间距3倍间距以外的认为是离群值
  IQR <- q[4] - q[2]
  x1 <- q[2] - multiple * IQR 
  x2 <- q[4] + multiple * IQR
  return(c(x1, x2))
}

# Function to check continuity of index numbers within each group
check_continuity <- function(df) {
  smallest_index <- min(df$index)
  df <- df %>% arrange(index)
  for (i in seq_along(df$index)) {
    if (i == 1) next
    if (df$index[i] != df$index[i - 1] + 1) {
      return(df[1:(i - 1), ])
    }
  }
  return(df)
}

# Function to process data for a given trial type and AOI_emo
process_data <- function(data, Trial_type, aoi_emo) {
  data <- data %>%
    filter(trial_type == Trial_type) %>% 
    filter(AOI_emo == aoi_emo) %>%
    group_by(subnum, Ntrial) %>%
    group_modify(~ check_continuity(.x)) %>%
    mutate(dwell_time = sum(duration, na.rm = TRUE)) %>%
    ungroup()
  return(data)
}

# Process data for data2_emo
data2_emo <- process_data(my_data, 'guided_emo', 1)

# Create new variable data2_emo_dwell
data2_emo_dwell <- data2_emo %>%
  select(subnum, sub_type, trial_type, Ntrial, expression, duration) %>%
  group_by(subnum, Ntrial) %>%
  mutate(dwell_emo = sum(duration, na.rm = TRUE)) %>%
  ungroup() %>% 
  select(-duration) %>%  # Drop the duration column
  distinct(subnum, Ntrial, .keep_all = TRUE)  # Drop duplicate rows based on subnum and Ntrial

# The first dwell time for guided_emo stimulus (emo dwell)
data2_emo_dwell_subed <- data2_emo_dwell %>%
  # kick outlier within subject
  group_by(subnum) %>%
  mutate(
    IQR_L = outlier.IQR(dwell_emo)[1],
    IQR_H = outlier.IQR(dwell_emo)[2],
    valid1 = if_else(!is.na(IQR_L) & !is.na(IQR_H) & dwell_emo >= IQR_L & dwell_emo <= IQR_H, 1, 0)
  ) %>%
  filter(valid1 == 1) %>%
  ungroup()

# Process data for data2_neu
data2_neu <- process_data(my_data, 'guided_neu', 2)

# Create new variable data2_neu_dwell
data2_neu_dwell <- data2_neu %>%
  select(subnum, sub_type, trial_type, Ntrial, expression, duration) %>%
  group_by(subnum, Ntrial) %>%
  mutate(dwell_neu = sum(duration, na.rm = TRUE)) %>%
  ungroup() %>% 
  select(-duration) %>%  # Drop the duration column
  distinct(subnum, Ntrial, .keep_all = TRUE)  # Drop duplicate rows based on subnum and Ntrial


# The first dwell time for guided_neu stimulus (neu dwell)
data2_neu_dwell_subed <- data2_neu_dwell %>%
  # kick outlier within subject
  group_by(subnum) %>%
  mutate(
    IQR_L = outlier.IQR(dwell_neu)[1],
    IQR_H = outlier.IQR(dwell_neu)[2],
    valid1 = if_else(!is.na(IQR_L) & !is.na(IQR_H) & dwell_neu >= IQR_L & dwell_neu <= IQR_H, 1, 0)
  ) %>%
  filter(valid1 == 1) %>%
  ungroup()

# Summary statistics for emo_neu_dwell
sumrepdat2E_sub <- summarySE(data2_emo_dwell_subed, measurevar = "dwell_emo",
                             groupvars=c('subnum',"sub_type", "trial_type", "expression"))

sumrepdat2N_sub <- summarySE(data2_neu_dwell_subed, measurevar = "dwell_neu",
                             groupvars=c('subnum',"sub_type", "trial_type", "expression"))

# Perform ANOVA for first dwell on emo 
anova_emo_dwell <- aov_ez(
  data = sumrepdat2E_sub,
  id = 'subnum',
  dv = 'dwell_emo_mean',
  between = 'sub_type',
  within = 'expression',
  observed = 'sub_type',
  effect_size = "pes" # partial eta-squared
)

## Converting to factor: sub_type

## Contrasts set to contr.sum for the following variables: sub_type

anova_emo_dwell

## Anova Table (Type 3 tests)
## 
## Response: dwell_emo_mean
##                Effect           df       MSE    F   ges p.value
## 1            sub_type       1, 108 173830.56 0.04 <.001    .840
## 2          expression 1.95, 210.62  10918.44 1.93  .002    .148
## 3 sub_type:expression 1.95, 210.62  10918.44 0.36 <.001    .694
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
## 
## Sphericity correction method: GG

# Summarize the ANOVA results
summary(anova_emo_dwell)

## 
## Univariate Type III Repeated-Measures ANOVA Assuming Sphericity
## 
##                       Sum Sq num Df Error SS den Df  F value Pr(>F)    
## (Intercept)         94100379      1 18773700    108 541.3339 <2e-16 ***
## sub_type                7127      1 18773700    108   0.0410 0.8399    
## expression             41191      2  2299690    216   1.9344 0.1470    
## sub_type:expression     7619      2  2299690    216   0.3578 0.6996    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Mauchly Tests for Sphericity
## 
##                     Test statistic p-value
## expression                 0.97448 0.25079
## sub_type:expression        0.97448 0.25079
## 
## 
## Greenhouse-Geisser and Huynh-Feldt Corrections
##  for Departure from Sphericity
## 
##                      GG eps Pr(>F[GG])
## expression          0.97511     0.1482
## sub_type:expression 0.97511     0.6943
## 
##                        HF eps Pr(>F[HF])
## expression          0.9928109  0.1473436
## sub_type:expression 0.9928109  0.6980929

# # Perform post hoc test on 'expression' variable
# emmeans_result <- emmeans(anova_emo_dwell, pairwise ~ expression, adjust = "bonferroni")
# 
# # Print the post hoc test results
# print(emmeans_result)

plot_emo_dwell_data <- data2_emo_dwell %>%
  group_by(sub_type, expression) %>%
  summarise(
    dwell_emo_mean = mean(dwell_emo, na.rm = TRUE),
    sd = sd(dwell_emo, na.rm = TRUE),   # Standard deviation
    n = n(),
    .groups = "drop"
  ) %>%
  mutate(
    se = sd / sqrt(n),  # Standard error = standard deviation / sqrt(sample size)
    expression_num = case_when(
      expression == "F" ~ 1,
      expression == "H" ~ 2,
      expression == "S" ~ 3,
      TRUE ~ NA_real_
    ),
    sub_type = factor(sub_type, levels = c("HC", "MDD"))
  ) |> 
  ungroup()

# Create the plot
p4 <- ggplot(plot_emo_dwell_data, aes(x = expression_num, y = dwell_emo_mean, group = sub_type, color = sub_type)) +
  geom_point(position = position_dodge(width = 0.2), size = 3) +
  geom_errorbar(aes(ymin = dwell_emo_mean - se, ymax = dwell_emo_mean + se), 
                position = position_dodge(width = 0.2), width = 0.2) +
  scale_color_manual(values = c("HC" = "#f9d580", "MDD" = "#99b9e9")) +
  scale_x_continuous(breaks = c(1, 2, 3), labels = c("Fear", "Happy", "Sad")) +
  labs(
    x = "Emotion",
    y = "Mean First Dwell Time (ms)",
    title = "First Dwell Time on Emotional Faces by Group",
    color = "Group"
  ) +
  theme_bw() +
  theme(
    text = element_text(size = 22),                # all text
    axis.title = element_text(face = "bold", size = 22),
    axis.text.x = element_text(size = 22),
    axis.text.y = element_text(size = 22),
    strip.text = element_text(face = "bold", size = 22),
    legend.text = element_text(size = 22),
    legend.title = element_text(size = 22),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 23),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.background = element_rect(fill = "white"),
    strip.background = element_rect(fill = "lightgray", color = "black"),
    legend.position = "bottom"
  )

p4

ggsave("./result pics/first_dwell_time_emo_v2.png", plot = p4, width = 8, height = 5, dpi = 300)

# Perform ANOVA for first dwell on neu
anova_neu_dwell <- aov_ez(
  data = sumrepdat2N_sub,
  id = 'subnum',
  dv = 'dwell_neu_mean',
  between = 'sub_type',
  within = 'expression',
  observed = 'sub_type'
)

## Converting to factor: sub_type

## Contrasts set to contr.sum for the following variables: sub_type

anova_neu_dwell

## Anova Table (Type 3 tests)
## 
## Response: dwell_neu_mean
##                Effect           df       MSE    F   ges p.value
## 1            sub_type       1, 108 127213.98 0.25  .002    .622
## 2          expression 1.86, 200.72   4071.53 1.99  .001    .143
## 3 sub_type:expression 1.86, 200.72   4071.53 0.47 <.001    .611
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
## 
## Sphericity correction method: GG

# Summarize the ANOVA results
summary(anova_neu_dwell)

## 
## Univariate Type III Repeated-Measures ANOVA Assuming Sphericity
## 
##                       Sum Sq num Df Error SS den Df  F value Pr(>F)    
## (Intercept)         85998470      1 13739110    108 676.0143 <2e-16 ***
## sub_type               31186      1 13739110    108   0.2451 0.6215    
## expression             15026      2   817252    216   1.9857 0.1398    
## sub_type:expression     3569      2   817252    216   0.4716 0.6246    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Mauchly Tests for Sphericity
## 
##                     Test statistic p-value
## expression                 0.92389 0.01448
## sub_type:expression        0.92389 0.01448
## 
## 
## Greenhouse-Geisser and Huynh-Feldt Corrections
##  for Departure from Sphericity
## 
##                      GG eps Pr(>F[GG])
## expression          0.92928     0.1433
## sub_type:expression 0.92928     0.6106
## 
##                        HF eps Pr(>F[HF])
## expression          0.9448817  0.1425377
## sub_type:expression 0.9448817  0.6137841

# Perform post hoc test on 'expression' variable
emmeans_result <- emmeans(anova_neu_dwell, pairwise ~ expression, adjust = "bonferroni")

# Print the post hoc test results
print(emmeans_result)

## $emmeans
##  expression emmean   SE  df lower.CL upper.CL
##  F             526 21.8 108      482      569
##  H             512 20.5 108      472      553
##  S             510 18.9 108      473      548
## 
## Results are averaged over the levels of: sub_type 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast estimate   SE  df t.ratio p.value
##  F - H        13.4 7.88 108   1.697  0.2780
##  F - S        15.4 9.47 108   1.623  0.3225
##  H - S         2.0 7.69 108   0.260  1.0000
## 
## Results are averaged over the levels of: sub_type 
## P value adjustment: bonferroni method for 3 tests

plot_neu_dwell_data <- data2_neu_dwell %>%
  group_by(sub_type, expression) %>%
  summarise(
    dwell_neu_mean = mean(dwell_neu, na.rm = TRUE),
    sd = sd(dwell_neu, na.rm = TRUE),   # Standard deviation
    n = n(),
    .groups = "drop"
  ) %>%
  mutate(
    se = sd / sqrt(n),  # Standard error = standard deviation / sqrt(sample size)
    expression_num = case_when(
      expression == "F" ~ 1,
      expression == "H" ~ 2,
      expression == "S" ~ 3,
      TRUE ~ NA_real_
    ),
    sub_type = factor(sub_type, levels = c("HC", "MDD"))
  ) |> 
  ungroup()

# Create the plot
p5 <- ggplot(plot_neu_dwell_data, aes(x = expression_num, y = dwell_neu_mean, group = sub_type, color = sub_type)) +
  geom_point(position = position_dodge(width = 0.2), size = 3) +
  geom_errorbar(aes(ymin = dwell_neu_mean - se, ymax = dwell_neu_mean + se), 
                position = position_dodge(width = 0.2), width = 0.2) +
  scale_color_manual(values = c("HC" = "#f9d580", "MDD" = "#99b9e9")) +
  scale_x_continuous(breaks = c(1, 2, 3), labels = c("Fear", "Happy", "Sad")) +
  labs(
    x = "Emotion",
    y = "Mean First Dwell Time (ms)",
    title = "First Dwell Time on Neutral Faces by Group",
    color = "Group"
  ) +
  theme_bw() +
  theme(
    text = element_text(size = 22),                # all text
    axis.title = element_text(face = "bold", size = 22),
    axis.text.x = element_text(size = 22),
    axis.text.y = element_text(size = 22),
    strip.text = element_text(face = "bold", size = 22),
    legend.text = element_text(size = 22),
    legend.title = element_text(size = 22),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 23),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.background = element_rect(fill = "white"),
    strip.background = element_rect(fill = "lightgray", color = "black"),
    legend.position = "bottom"
  )

p5

ggsave("./result pics/first_dwell_time_neu_v2.png", plot = p5, width = 8, height = 5, dpi = 300)

# Perform Bayesian ANOVA for emo_dwell
dwell_emo_data <- data2_emo_dwell_subed

# Ensure subnum is a factor
dwell_emo_data$subnum <- as.factor(dwell_emo_data$subnum)
dwell_emo_data$sub_type <- as.factor(dwell_emo_data$sub_type)
dwell_emo_data$expression <- as.factor(dwell_emo_data$expression)

# Perform Bayesian ANOVA
bf_anova_dwell_emo <- anovaBF(dwell_emo ~ sub_type * expression + subnum, data = dwell_emo_data, whichRandom = "subnum")

## Warning: data coerced from tibble to data frame

# Print the results
print(bf_anova_dwell_emo)

## Bayes factor analysis
## --------------
## [1] sub_type + subnum                                    : 0.1743482    ±9.87%
## [2] expression + subnum                                  : 0.04495238   ±0.89%
## [3] sub_type + expression + subnum                       : 0.007047586  ±1.5%
## [4] sub_type + expression + sub_type:expression + subnum : 7.647644e-05 ±12.76%
## 
## Against denominator:
##   dwell_emo ~ subnum 
## ---
## Bayes factor type: BFlinearModel, JZS

# Extract posterior samples from the full model (index 4)
# The full model includes sub_type, expression, their interaction, and subnum.
posterior_samples_dwell_emo <- posterior(bf_anova_dwell_emo, iterations = 1000, index = 4)

# summary(posterior_samples_dwell_emo)

# Test specific hypothesis: HC in sub_type is different from MDD in sub_type
# Calculate the difference between HC and MDD in sub_type
diff_HC_MDD <- posterior_samples_dwell_emo[, "sub_type-HC"] - posterior_samples_dwell_emo[, "sub_type-MDD"]

# Calculate the Bayes factor for the hypothesis
bf_hypothesis <- mean(diff_HC_MDD > 0) / mean(diff_HC_MDD < 0)

# Print the hypothesis test results
print(bf_hypothesis) # 0.1641, which suppport the hypothesis that HC in sub_type is not different from MDD in sub_type

## [1] 0.7699115

# Perform Bayesian ANOVA for neu_dwell
dwell_neu_data <- data2_neu_dwell_subed

# Ensure subnum is a factor
dwell_neu_data$subnum <- as.factor(dwell_neu_data$subnum)
dwell_neu_data$sub_type <- as.factor(dwell_neu_data$sub_type)
dwell_neu_data$expression <- as.factor(dwell_neu_data$expression)

# Perform Bayesian ANOVA
bf_anova_dwell_neu <- anovaBF(dwell_neu ~ sub_type * expression + subnum, data = dwell_neu_data, whichRandom = "subnum")

## Warning: data coerced from tibble to data frame

# Print the results
print(bf_anova_dwell_neu)

## Bayes factor analysis
## --------------
## [1] sub_type + subnum                                    : 0.1731454    ±3.64%
## [2] expression + subnum                                  : 0.01012408   ±1.06%
## [3] sub_type + expression + subnum                       : 0.001594814  ±1%
## [4] sub_type + expression + sub_type:expression + subnum : 1.310725e-05 ±7.96%
## 
## Against denominator:
##   dwell_neu ~ subnum 
## ---
## Bayes factor type: BFlinearModel, JZS

# Extract posterior samples from the full model (index 4)
# The full model includes sub_type, expression, their interaction, and subnum.
posterior_samples_dwell_neu <- posterior(bf_anova_dwell_neu, iterations = 1000, index = 4)

# summary(posterior_samples_dwell_neu)

# Test specific hypothesis: HC in sub_type is different from MDD in sub_type
# Calculate the difference between HC and MDD in sub_type
diff_HC_MDD <- posterior_samples_dwell_neu[, "sub_type-HC"] - posterior_samples_dwell_neu[, "sub_type-MDD"]

# Calculate the Bayes factor for the hypothesis
bf_hypothesis <- mean(diff_HC_MDD > 0) / mean(diff_HC_MDD < 0)

# Print the hypothesis test results
print(bf_hypothesis) # 0.0976, which suppport the hypothesis that HC in sub_type is not different from MDD in sub_type

## [1] 0.4224751

# library(BayesFactor)
# 
# # Prepare the data
# first_prefer_data <- data4
# 
# # Ensure subnum is a factor
# first_prefer_data$subnum <- as.factor(first_prefer_data$subnum)
# first_prefer_data$sub_type <- as.factor(first_prefer_data$sub_type)
# first_prefer_data$expression <- as.factor(first_prefer_data$expression)
# 
# # Perform Bayesian ANOVA
# bf_anova <- anovaBF(rate ~ sub_type * expression + subnum, data = first_prefer_data, whichRandom = "subnum")
# 
# # Print the results
# print(bf_anova)
# 
# # Extract posterior samples from the full model (index 4)
# # The full model includes sub_type, expression, their interaction, and subnum.
# posterior_samples <- posterior(bf_anova, iterations = 1000, index = 4)
# 
# # # Inspect the column names in the posterior_samples object
# # print(colnames(posterior_samples))
# 
# # Test specific hypothesis: HC in sub_type is different from MDD in sub_type
# # Calculate the difference between HC and MDD in sub_type
# diff_HC_MDD <- posterior_samples[, "sub_type-HC"] - posterior_samples[, "sub_type-MDD"]
# 
# # Calculate the Bayes factor for the hypothesis
# bf_hypothesis <- mean(diff_HC_MDD > 0) / mean(diff_HC_MDD < 0)
# 
# # Print the hypothesis test results
# print(bf_hypothesis) # 0.1429, which suppport the hypothesis that HC in sub_type is different from MDD in sub_type
# 
# #####
# # Perform Bayesian ANOVA for emo_neu_latency
# latency_data <- data3
# 
# # Ensure subnum is a factor
# latency_data$subnum <- as.factor(latency_data$subnum)
# latency_data$sub_type <- as.factor(latency_data$sub_type)
# latency_data$expression <- as.factor(latency_data$expression)
# 
# # Perform Bayesian ANOVA
# bf_anova_latency <- anovaBF(emo_neu_latency ~ sub_type * expression + subnum, data = latency_data, whichRandom = "subnum")
# 
# # Print the results
# print(bf_anova_latency)
# 
# # Extract posterior samples from the full model (index 4)
# # The full model includes sub_type, expression, their interaction, and subnum.
# posterior_samples_latency <- posterior(bf_anova_latency, iterations = 1000, index = 4)
# 
# # Test specific hypothesis: HC in sub_type is different from MDD in sub_type
# # Calculate the difference between HC and MDD in sub_type
# diff_HC_MDD <- posterior_samples_latency[, "sub_type-HC"] - posterior_samples_latency[, "sub_type-MDD"]
# 
# # Calculate the Bayes factor for the hypothesis
# bf_hypothesis <- mean(diff_HC_MDD > 0) / mean(diff_HC_MDD < 0)
# 
# # Print the hypothesis test results
# print(bf_hypothesis) # 3.016, which suppport the hypothesis that HC in sub_type is different from MDD in sub_type
# 
# # Perform Bayesian ANOVA for emo_dwell
# dwell_emo_data <- data2_emo_dwell_subed
# 
# # Ensure subnum is a factor
# dwell_emo_data$subnum <- as.factor(dwell_emo_data$subnum)
# dwell_emo_data$sub_type <- as.factor(dwell_emo_data$sub_type)
# dwell_emo_data$expression <- as.factor(dwell_emo_data$expression)
# 
# # Perform Bayesian ANOVA
# bf_anova_dwell_emo <- anovaBF(dwell_emo ~ sub_type * expression + subnum, data = dwell_emo_data, whichRandom = "subnum")
# 
# # Print the results
# print(bf_anova_dwell_emo)
# 
# # Extract posterior samples from the full model (index 4)
# # The full model includes sub_type, expression, their interaction, and subnum.
# posterior_samples_dwell_emo <- posterior(bf_anova_dwell_emo, iterations = 1000, index = 4)
# 
# # summary(posterior_samples_dwell_emo)
# 
# # Test specific hypothesis: HC in sub_type is different from MDD in sub_type
# # Calculate the difference between HC and MDD in sub_type
# diff_HC_MDD <- posterior_samples_dwell_emo[, "sub_type-HC"] - posterior_samples_dwell_emo[, "sub_type-MDD"]
# 
# # Calculate the Bayes factor for the hypothesis
# bf_hypothesis <- mean(diff_HC_MDD > 0) / mean(diff_HC_MDD < 0)
# 
# # Print the hypothesis test results
# print(bf_hypothesis) # 0.1641, which suppport the hypothesis that HC in sub_type is not different from MDD in sub_type
# 
# # Perform Bayesian ANOVA for neu_dwell
# dwell_neu_data <- data2_neu_dwell_subed
# 
# # Ensure subnum is a factor
# dwell_neu_data$subnum <- as.factor(dwell_neu_data$subnum)
# dwell_neu_data$sub_type <- as.factor(dwell_neu_data$sub_type)
# dwell_neu_data$expression <- as.factor(dwell_neu_data$expression)
# 
# # Perform Bayesian ANOVA
# bf_anova_dwell_neu <- anovaBF(dwell_neu ~ sub_type * expression + subnum, data = dwell_neu_data, whichRandom = "subnum")
# 
# # Print the results
# print(bf_anova_dwell_neu)
# 
# # Extract posterior samples from the full model (index 4)
# # The full model includes sub_type, expression, their interaction, and subnum.
# posterior_samples_dwell_neu <- posterior(bf_anova_dwell_neu, iterations = 1000, index = 4)
# 
# # summary(posterior_samples_dwell_neu)
# 
# # Test specific hypothesis: HC in sub_type is different from MDD in sub_type
# # Calculate the difference between HC and MDD in sub_type
# diff_HC_MDD <- posterior_samples_dwell_neu[, "sub_type-HC"] - posterior_samples_dwell_neu[, "sub_type-MDD"]
# 
# # Calculate the Bayes factor for the hypothesis
# bf_hypothesis <- mean(diff_HC_MDD > 0) / mean(diff_HC_MDD < 0)
# 
# # Print the hypothesis test results
# print(bf_hypothesis) # 0.0976, which suppport the hypothesis that HC in sub_type is not different from MDD in sub_type

# 引导条件下首滞留
outlier.IQR <- function(x, multiple = 1.5) {
  q <- quantile(x, na.rm = TRUE) #四分位间距3倍间距以外的认为是离群值
  IQR <- q[4] - q[2]
  x1 <- q[2] - multiple * IQR 
  x2 <- q[4] + multiple * IQR
  return(c(x1, x2))
}

# Function to check continuity of index numbers within each group
check_continuity <- function(df) {
  smallest_index <- min(df$index)
  df <- df %>% arrange(index)
  for (i in seq_along(df$index)) {
    if (i == 1) next
    if (df$index[i] != df$index[i - 1] + 1) {
      return(df[1:(i - 1), ])
    }
  }
  return(df)
}

# Function to process data for a given trial type and AOI_emo
process_data <- function(data, Trial_type, aoi_emo) {
  data <- data %>%
    filter(trial_type == Trial_type) %>% 
    filter(AOI_emo == aoi_emo) %>%
    group_by(subnum, Ntrial) %>%
    group_modify(~ check_continuity(.x)) %>%
    mutate(dwell_time = sum(duration, na.rm = TRUE)) %>%
    ungroup()
  return(data)
}

# Process data for data5_emo
data5_emo <- process_data(my_data, 'guided_emo', 1)

# Create new variable data5_emo_dwell
data5_emo_dwell <- data2_emo %>%
  select(subnum, sub_type, trial_type, Ntrial, expression, duration) %>%
  group_by(subnum, Ntrial) %>%
  mutate(dwell_emo = sum(duration, na.rm = TRUE)) %>%
  ungroup() %>% 
  select(-duration) %>%  # Drop the duration column
  distinct(subnum, Ntrial, .keep_all = TRUE)  # Drop duplicate rows based on subnum and Ntrial

# The first dwell time for guided_emo stimulus (emo dwell)
data5_emo_dwell_subed <- data5_emo_dwell %>%
  # kick outlier within subject
  group_by(subnum) %>%
  mutate(
    IQR_L = outlier.IQR(dwell_emo)[1],
    IQR_H = outlier.IQR(dwell_emo)[2],
    valid1 = if_else(!is.na(IQR_L) & !is.na(IQR_H) & dwell_emo >= IQR_L & dwell_emo <= IQR_H, 1, 0)
  ) %>%
  filter(valid1 == 1) %>%
  ungroup()

# Process data for data5_neu
data5_neu <- process_data(my_data, 'guided_neu', 2)

# Create new variable data5_neu_dwell
data5_neu_dwell <- data5_neu %>%
  select(subnum, sub_type, trial_type, Ntrial, expression, duration) %>%
  group_by(subnum, Ntrial) %>%
  mutate(dwell_neu = sum(duration, na.rm = TRUE)) %>%
  ungroup() %>% 
  select(-duration) %>%  # Drop the duration column
  distinct(subnum, Ntrial, .keep_all = TRUE)  # Drop duplicate rows based on subnum and Ntrial


# The first dwell time for guided_neu stimulus (neu dwell)
data5_neu_dwell_subed <- data5_neu_dwell %>%
  # kick outlier within subject
  group_by(subnum) %>%
  mutate(
    IQR_L = outlier.IQR(dwell_neu)[1],
    IQR_H = outlier.IQR(dwell_neu)[2],
    valid1 = if_else(!is.na(IQR_L) & !is.na(IQR_H) & dwell_neu >= IQR_L & dwell_neu <= IQR_H, 1, 0)
  ) %>%
  filter(valid1 == 1) %>%
  ungroup()

# Summary statistics for emo_neu_dwell
sumrepdat5E_sub <- summarySE(data5_emo_dwell_subed, measurevar = "dwell_emo", 
                             groupvars=c('subnum',"sub_type", "trial_type", "expression")) 

sumrepdat5N_sub <- summarySE(data5_neu_dwell_subed, measurevar = "dwell_neu", 
                             groupvars=c('subnum',"sub_type", "trial_type", "expression")) 

# Perform ANOVA for first dwell on emo 
anova_emo_dwell <- aov_ez(
  data = sumrepdat5E_sub,
  id = 'subnum',
  dv = 'dwell_emo_mean',
  between = 'sub_type',
  within = 'expression',
  observed = 'sub_type'
)

## Converting to factor: sub_type

## Contrasts set to contr.sum for the following variables: sub_type

# Summarize the ANOVA results
summary(anova_emo_dwell)

## 
## Univariate Type III Repeated-Measures ANOVA Assuming Sphericity
## 
##                       Sum Sq num Df Error SS den Df  F value Pr(>F)    
## (Intercept)         94100379      1 18773700    108 541.3339 <2e-16 ***
## sub_type                7127      1 18773700    108   0.0410 0.8399    
## expression             41191      2  2299690    216   1.9344 0.1470    
## sub_type:expression     7619      2  2299690    216   0.3578 0.6996    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Mauchly Tests for Sphericity
## 
##                     Test statistic p-value
## expression                 0.97448 0.25079
## sub_type:expression        0.97448 0.25079
## 
## 
## Greenhouse-Geisser and Huynh-Feldt Corrections
##  for Departure from Sphericity
## 
##                      GG eps Pr(>F[GG])
## expression          0.97511     0.1482
## sub_type:expression 0.97511     0.6943
## 
##                        HF eps Pr(>F[HF])
## expression          0.9928109  0.1473436
## sub_type:expression 0.9928109  0.6980929

# Perform post hoc test on 'expression' variable
emmeans_result <- emmeans(anova_emo_dwell, pairwise ~ expression, adjust = "bonferroni")

# Print the post hoc test results
print(emmeans_result)

## $emmeans
##  expression emmean   SE  df lower.CL upper.CL
##  F             552 26.1 108      501      604
##  H             542 25.3 108      492      592
##  S             525 22.1 108      481      569
## 
## Results are averaged over the levels of: sub_type 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast estimate   SE  df t.ratio p.value
##  F - H        10.7 15.0 108   0.715  1.0000
##  F - S        27.4 13.0 108   2.108  0.1120
##  H - S        16.7 14.1 108   1.184  0.7165
## 
## Results are averaged over the levels of: sub_type 
## P value adjustment: bonferroni method for 3 tests

# Perform ANOVA for first dwell on neu
anova_neu_dwell <- aov_ez(
  data = sumrepdat5N_sub,
  id = 'subnum',
  dv = 'dwell_neu_mean',
  between = 'sub_type',
  within = 'expression',
  observed = 'sub_type'
)

## Converting to factor: sub_type
## Contrasts set to contr.sum for the following variables: sub_type

# Summarize the ANOVA results
summary(anova_neu_dwell)

## 
## Univariate Type III Repeated-Measures ANOVA Assuming Sphericity
## 
##                       Sum Sq num Df Error SS den Df  F value Pr(>F)    
## (Intercept)         85998470      1 13739110    108 676.0143 <2e-16 ***
## sub_type               31186      1 13739110    108   0.2451 0.6215    
## expression             15026      2   817252    216   1.9857 0.1398    
## sub_type:expression     3569      2   817252    216   0.4716 0.6246    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Mauchly Tests for Sphericity
## 
##                     Test statistic p-value
## expression                 0.92389 0.01448
## sub_type:expression        0.92389 0.01448
## 
## 
## Greenhouse-Geisser and Huynh-Feldt Corrections
##  for Departure from Sphericity
## 
##                      GG eps Pr(>F[GG])
## expression          0.92928     0.1433
## sub_type:expression 0.92928     0.6106
## 
##                        HF eps Pr(>F[HF])
## expression          0.9448817  0.1425377
## sub_type:expression 0.9448817  0.6137841

# Perform post hoc test on 'expression' variable
emmeans_result <- emmeans(anova_neu_dwell, pairwise ~ expression, adjust = "bonferroni")

# Print the post hoc test results
print(emmeans_result)

## $emmeans
##  expression emmean   SE  df lower.CL upper.CL
##  F             526 21.8 108      482      569
##  H             512 20.5 108      472      553
##  S             510 18.9 108      473      548
## 
## Results are averaged over the levels of: sub_type 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast estimate   SE  df t.ratio p.value
##  F - H        13.4 7.88 108   1.697  0.2780
##  F - S        15.4 9.47 108   1.623  0.3225
##  H - S         2.0 7.69 108   0.260  1.0000
## 
## Results are averaged over the levels of: sub_type 
## P value adjustment: bonferroni method for 3 tests

CP analysis

Quentin Lam

2024-11-26