CP analysis

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(bruceR)

## 
## bruceR (v0.8.10)
## BRoadly Useful Convenient and Efficient R functions
## 
## Packages also loaded:
## ✔ data.table ✔ emmeans
## ✔ dplyr      ✔ lmerTest
## ✔ tidyr      ✔ effectsize
## ✔ stringr    ✔ performance
## ✔ ggplot2    ✔ interactions
## 
## Main functions of `bruceR`:
## cc()             Describe()  TTEST()
## add()            Freq()      MANOVA()
## .mean()          Corr()      EMMEANS()
## set.wd()         Alpha()     PROCESS()
## import()         EFA()       model_summary()
## print_table()    CFA()       lavaan_summary()
## 
## For full functionality, please install all dependencies:
## install.packages("bruceR", dep=TRUE)
## 
## Online documentation:
## https://psychbruce.github.io/bruceR
## 
## 
## NEWS: A new version of bruceR (2024.6) is available (2024-06-13)!
## 
## ************** Update **************
## install.packages("bruceR", dep=TRUE)
## ************************************
## 
## 
## These packages are dependencies of `bruceR` but not installed:
## - pacman, ggtext, see, vars, phia, GGally, GPArotation
## 
## ***** Install all dependencies *****
## install.packages("bruceR", dep=TRUE)
## ************************************

#清空环境变量
rm(list = ls())
set.wd()

## ✔ Set working directory to "C:/Users/20191/Desktop/CP conference"

source('summarySE.R')

library(afex)

## ************
## Welcome to afex. For support visit: http://afex.singmann.science/
## - Functions for ANOVAs: aov_car(), aov_ez(), and aov_4()
## - Methods for calculating p-values with mixed(): 'S', 'KR', 'LRT', and 'PB'
## - 'afex_aov' and 'mixed' objects can be passed to emmeans() for follow-up tests
## - Get and set global package options with: afex_options()
## - Set sum-to-zero contrasts globally: set_sum_contrasts()
## - For example analyses see: browseVignettes("afex")
## ************
## 
## Attaching package: 'afex'
## 
## The following object is masked from 'package:lme4':
## 
##     lmer

library(emmeans) # for post hoc test
library(ggthemes)

## Warning: package 'ggthemes' was built under R version 4.3.1

library(cowplot)

## 
## Attaching package: 'cowplot'
## 
## The following object is masked from 'package:ggthemes':
## 
##     theme_map
## 
## The following object is masked from 'package:lubridate':
## 
##     stamp

library(ggpubr)

## Warning: package 'ggpubr' was built under R version 4.3.1

## Registered S3 methods overwritten by 'broom':
##   method            from  
##   tidy.glht         jtools
##   tidy.summary.glht jtools
## 
## Attaching package: 'ggpubr'
## 
## The following object is masked from 'package:cowplot':
## 
##     get_legend

library(rstatix)

## Warning: package 'rstatix' was built under R version 4.3.1

## 
## Attaching package: 'rstatix'
## 
## The following objects are masked from 'package:effectsize':
## 
##     cohens_d, eta_squared
## 
## The following object is masked from 'package:stats':
## 
##     filter

library(effsize)

## Warning: package 'effsize' was built under R version 4.3.3

library(data.table)  

#===========筛选被试===========
# MD 005, MD 069， 是双相，排除
# MD 030, MD 019，MD 083 大脑信号异常，排除
# HC 109是废问卷
# HC 087，HC 072，HC 065大脑异常，排除
# HC 057，HC 002，缓解期抑郁，排除
# HC 024 可疑焦虑，排除
# HC 093，可疑PTSD，排除
# HC 116，高危排除，HC 115，有亲属是精神疾病
# MD 079 HAMD评分较低排除
# MD069,MD070,MD071,MD072,MD075,MD077,MD082,MD086随访转燥，排除

# my_data <- read_csv('edfData_DotFace_Allsubject_in_trial_fix.csv') %>% filter(subnum != 'HC029_')
excluded_subnums <- c('MD005_', 'MD069_', 'MD030_', 'MD019_', 'MD083_', 'HC109_', 
                      'HC087_', 'HC072_', 'HC065_', 'HC057_', 'HC002_', 'HC024_', 
                      'HC029_', 'HC093_', 'HC116_', 'HC115_', 'MD079_', 'MD070_',
                      'MD071_', 'MD072_', 'MD075_', 'MD077_', 'MD082_', 'MD086_')

my_data <- read_csv('edfData_DotFace_Allsubject_in_trial_fix.csv') %>% 
  filter(!subnum %in% excluded_subnums)

## Rows: 136014 Columns: 29
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (6): subnum, sub_type, expression, L_stim, R_stim, emo_stim
## dbl (23): index, startT, endT, duration, avgX, avgY, avgPupil, AOI, AOI_gaze...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

my_data <- my_data %>% mutate(trial_type=case_when(
  trial_type == "1" ~ "guided_emo",
  trial_type == "2" ~ "guided_neu",
  trial_type == "3" ~ "centered",
  TRUE ~ NA_character_)) %>% 
  mutate(dwell_emo=case_when( 
    dwell_emo == "1" ~ "guided_emo",
    dwell_emo == "2" ~ "guided_neu",
    dwell_emo == "31" ~ "centered_emo",
    dwell_emo == "32" ~ "centered_neu")) %>% 
  filter(duration >= 60) %>% 
  group_by(subnum,Ntrial) %>% 
  mutate(continue_gaze=rleid(AOI_emo)) %>% 
  ungroup()
  

# 看看有效引导的百分比
my_data_1ratio <- my_data %>% group_by(subnum, sub_type, trial_type) %>%  
  select(c(subnum, sub_type, trial_type, guided_trial)) %>% 
  replace_na(list(guided_trial = 0)) %>% 
  summarise(across(everything(), ~mean(. == 1))) %>% ungroup()

## `summarise()` has grouped output by 'subnum', 'sub_type'. You can override
## using the `.groups` argument.

my_data <- my_data %>% filter(subnum != 'MD034_') # filtered because the guided trial rate is lower than 70%

subj_info <- read.csv('./AQ_SPIN/final_data.csv')

# add a '_' suffix to every value in the sub column in subj_info
subj_info$sub <- paste0(subj_info$sub, "_")

sub_info_summary <- my_data |> 
  left_join(subj_info, by = c("subnum" = "sub")) |> 
  # keep unique subnum values
  distinct(subnum, .keep_all = TRUE) |> 
  # count the number of values by each group by 'sub_type' column
  group_by(sub_type) |> 
  summarise(count = n(), 
  mean_age = mean(age, na.rm = TRUE),
  gender_count = list(table(gender))
  ) |> 
  ungroup()
  
knitr::kable(sub_info_summary)

sub_type	count	mean_age	gender_count
HC	47	26.08511	15, 32
MDD	63	25.96825	15, 48

# 首注视偏好
# 每个被试每种表情的注视偏好比例
data4 <- my_data %>% 
  filter(trial_type == 'centered', AOI_emo==1|AOI_emo==2) %>%
  group_by(subnum, Ntrial, sub_type, expression, AOI_emo) %>% 
  mutate(order=1:n()) %>% 
  ungroup() %>% 
  filter(order==1) %>% 
  group_by(subnum, Ntrial, sub_type, expression) %>% 
  mutate(order=1:n(),AOI_emo=factor(AOI_emo,levels=c(1,2),
                                    labels=c('emo','neu'))) %>% 
  ungroup() %>% 
  filter(order==1) %>% 
  group_by(subnum, sub_type, expression, AOI_emo) %>% 
  dplyr::summarize(num=n()) %>% 
  ungroup() %>% 
  spread(key = AOI_emo, value = num) %>% 
  mutate(tot=emo+neu,rate=emo/tot)

## `summarise()` has grouped output by 'subnum', 'sub_type', 'expression'. You can
## override using the `.groups` argument.

# Test if the first gaze rate within each bs and ws factor is significantly higher than 0.5
t_test_results <- data4 %>%
  group_by(sub_type, expression) %>%
  summarise(t_test = list(t.test(rate, mu = 0.5, alternative = "greater"))) %>%
  mutate(p_value = map_dbl(t_test, "p.value"),
         statistic = map_dbl(t_test, "statistic"),
         conf_int = map(t_test, "conf.int"))

## `summarise()` has grouped output by 'sub_type'. You can override using the
## `.groups` argument.

# Apply Bonferroni correction
num_tests <- nrow(t_test_results)
t_test_results <- t_test_results %>%
  mutate(p_value_bonferroni = p.adjust(p_value, method = "bonferroni", n = num_tests))
  
# Print the t-test results
print(t_test_results)

## # A tibble: 6 × 7
## # Groups:   sub_type [2]
##   sub_type expression t_test       p_value statistic conf_int p_value_bonferroni
##   <chr>    <chr>      <list>         <dbl>     <dbl> <list>                <dbl>
## 1 HC       F          <htest>      1.07e-7      6.09 <dbl>          0.000000644 
## 2 HC       H          <htest>      1.28e-1      1.15 <dbl>          0.771       
## 3 HC       S          <htest>      8.25e-3      2.49 <dbl>          0.0495      
## 4 MDD      F          <htest>      6.60e-9      6.54 <dbl>          0.0000000396
## 5 MDD      H          <htest>      4.35e-3      2.71 <dbl>          0.0261      
## 6 MDD      S          <htest>      8.95e-8      5.88 <dbl>          0.000000537

# Perform ANOVA for first fixation preference
anova_first_prefer <- aov_ez(
  data = data4,
  id = 'subnum',
  dv = 'rate',
  between = 'sub_type',
  within = 'expression',
  observed = 'sub_type'
)

## Converting to factor: sub_type
## Contrasts set to contr.sum for the following variables: sub_type

# Summarize the ANOVA results
summary(anova_first_prefer)

## Warning in summary.Anova.mlm(object$Anova, multivariate = FALSE): HF eps > 1
## treated as 1

## 
## Univariate Type III Repeated-Measures ANOVA Assuming Sphericity
## 
##                      Sum Sq num Df Error SS den Df   F value    Pr(>F)    
## (Intercept)         103.309      1   1.6557    108 6738.6135 < 2.2e-16 ***
## sub_type              0.015      1   1.6557    108    1.0056    0.3182    
## expression            0.274      2   2.9083    216   10.1886  5.91e-05 ***
## sub_type:expression   0.036      2   2.9083    216    1.3301    0.2666    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Mauchly Tests for Sphericity
## 
##                     Test statistic p-value
## expression                 0.99148 0.63283
## sub_type:expression        0.99148 0.63283
## 
## 
## Greenhouse-Geisser and Huynh-Feldt Corrections
##  for Departure from Sphericity
## 
##                      GG eps Pr(>F[GG])    
## expression          0.99156  6.262e-05 ***
## sub_type:expression 0.99156     0.2666    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                       HF eps   Pr(>F[HF])
## expression          1.010024 5.910466e-05
## sub_type:expression 1.010024 2.666188e-01

# Perform post hoc test on 'expression' variable
emmeans_result <- emmeans(anova_first_prefer, pairwise ~ expression, adjust = "bonferroni")

# Print the post hoc test results
print(emmeans_result)

## $emmeans
##  expression emmean     SE  df lower.CL upper.CL
##  F           0.602 0.0114 108    0.579    0.624
##  H           0.531 0.0114 108    0.508    0.553
##  S           0.564 0.0115 108    0.541    0.587
## 
## Results are averaged over the levels of: sub_type 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast estimate     SE  df t.ratio p.value
##  F - H      0.0714 0.0151 108   4.736  <.0001
##  F - S      0.0377 0.0162 108   2.329  0.0651
##  H - S     -0.0337 0.0162 108  -2.082  0.1191
## 
## Results are averaged over the levels of: sub_type 
## P value adjustment: bonferroni method for 3 tests

# 引导中间 潜伏期
outlier.IQR <- function(x, multiple = 1.5) {
  q <- quantile(x, na.rm = TRUE) #四分位间距3倍间距以外的认为是离群值
  IQR <- q[4] - q[2]
  x1 <- q[2] - multiple * IQR 
  x2 <- q[4] + multiple * IQR
  return(c(x1, x2))
}


data3 <- my_data %>% 
  filter(trial_type == 'centered', AOI_emo==1|AOI_emo==2) %>%
  group_by(subnum, Ntrial, sub_type, expression, AOI_emo) %>% 
  mutate(order=1:n()) %>% 
  ungroup() %>% 
  filter(order==1) %>% 
  mutate(latency=startT-Tstart,AOI_emo=factor(AOI_emo)) %>% 
  # filter(latency<=2500, latency>=120) %>%  # need papers to support
  group_by(subnum, Ntrial, sub_type, trial_type, expression) %>%
  summarise(
    emo_latency = latency[AOI_emo == 1],
    neu_latency = latency[AOI_emo == 2],
    emo_neu_latency = emo_latency - neu_latency
  ) %>%
  ungroup() %>% 
  # kick outlier within subject
  group_by(subnum) %>%
  mutate(
    IQR_L = outlier.IQR(emo_neu_latency)[1],
    IQR_H = outlier.IQR(emo_neu_latency)[2],
    valid1 = if_else(!is.na(IQR_L) & !is.na(IQR_H) & emo_neu_latency >= IQR_L & emo_neu_latency <= IQR_H, 1, 0)
  ) %>%
  filter(valid1 == 1) %>%
  ungroup()

## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## `summarise()` has grouped output by 'subnum', 'Ntrial', 'sub_type',
## 'trial_type', 'expression'. You can override using the `.groups` argument.

sumrepdat3E_sub <- summarySE(data3, measurevar = "emo_neu_latency", 
                   groupvars=c('subnum',"sub_type","trial_type","expression"))

## Warning in qt(conf.interval/2 + 0.5, datac$N - 1): NaNs produced

# Test if the emo - neu first fixation latency within each bs and ws factor is significantly higher than 0.5
t_test_results <- data3 %>%
  group_by(sub_type, expression) %>%
  summarise(t_test = list(t.test(emo_neu_latency, mu = 0, alternative = "two.sided"))) %>%
  mutate(p_value = map_dbl(t_test, "p.value"),
         statistic = map_dbl(t_test, "statistic"),
         conf_int = map(t_test, "conf.int"))

## `summarise()` has grouped output by 'sub_type'. You can override using the
## `.groups` argument.

# Apply Bonferroni correction
num_tests <- nrow(t_test_results)
t_test_results <- t_test_results %>%
  mutate(p_value_bonferroni = p.adjust(p_value, method = "bonferroni", n = num_tests))

# Print the t-test results
print(t_test_results)

## # A tibble: 6 × 7
## # Groups:   sub_type [2]
##   sub_type expression t_test   p_value statistic conf_int  p_value_bonferroni
##   <chr>    <chr>      <list>     <dbl>     <dbl> <list>                 <dbl>
## 1 HC       F          <htest> 1.39e-12     -7.23 <dbl [2]>           8.35e-12
## 2 HC       H          <htest> 3.81e- 2     -2.08 <dbl [2]>           2.28e- 1
## 3 HC       S          <htest> 5.02e- 4     -3.50 <dbl [2]>           3.01e- 3
## 4 MDD      F          <htest> 5.22e-12     -7.02 <dbl [2]>           3.13e-11
## 5 MDD      H          <htest> 1.61e- 3     -3.17 <dbl [2]>           9.68e- 3
## 6 MDD      S          <htest> 2.60e- 4     -3.67 <dbl [2]>           1.56e- 3

# Perform ANOVA for first fixation latency
anova_fix_latency <- aov_ez(
  data = sumrepdat3E_sub,
  id = 'subnum',
  dv = 'emo_neu_latency_mean',
  between = 'sub_type',
  within = 'expression',
  observed = 'sub_type'
)

## Converting to factor: sub_type

## Warning: Missing values for 2 ID(s), which were removed before analysis:
## MD029_, MD039_
## Below the first few rows (in wide format) of the removed cases with missing data.
##      subnum sub_type   F         H    S
## # 55 MD029_      MDD 220        NA   NA
## # 58 MD039_      MDD  NA -370.6667 -492

## Contrasts set to contr.sum for the following variables: sub_type

# Summarize the ANOVA results
summary(anova_fix_latency)

## Warning in summary.Anova.mlm(object$Anova, multivariate = FALSE): HF eps > 1
## treated as 1

## 
## Univariate Type III Repeated-Measures ANOVA Assuming Sphericity
## 
##                      Sum Sq num Df Error SS den Df F value    Pr(>F)    
## (Intercept)         4710491      1  5387676    106 92.6767  3.88e-16 ***
## sub_type                128      1  5387676    106  0.0025 0.9600225    
## expression           593069      2  8806917    212  7.1382 0.0009997 ***
## sub_type:expression   16555      2  8806917    212  0.1993 0.8194925    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Mauchly Tests for Sphericity
## 
##                     Test statistic p-value
## expression                 0.98694 0.50141
## sub_type:expression        0.98694 0.50141
## 
## 
## Greenhouse-Geisser and Huynh-Feldt Corrections
##  for Departure from Sphericity
## 
##                      GG eps Pr(>F[GG])   
## expression          0.98711   0.001057 **
## sub_type:expression 0.98711   0.816722   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                       HF eps   Pr(>F[HF])
## expression          1.005715 0.0009996549
## sub_type:expression 1.005715 0.8194924540

# Perform post hoc test on 'sub_type' variable
emmeans_latency_result <- emmeans(anova_fix_latency, pairwise ~ expression, adjust = "bonferroni")

# Print the post hoc test results
print(emmeans_latency_result)

## $emmeans
##  expression emmean   SE  df lower.CL upper.CL
##  F          -180.2 22.9 106     -226   -134.8
##  H           -77.6 18.9 106     -115    -40.0
##  S          -107.0 19.4 106     -146    -68.5
## 
## Results are averaged over the levels of: sub_type 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast estimate   SE  df t.ratio p.value
##  F - H      -102.6 27.1 106  -3.791  0.0007
##  F - S       -73.2 29.5 106  -2.480  0.0441
##  H - S        29.4 27.3 106   1.078  0.8501
## 
## Results are averaged over the levels of: sub_type 
## P value adjustment: bonferroni method for 3 tests

# 引导条件下首滞留
outlier.IQR <- function(x, multiple = 1.5) {
  q <- quantile(x, na.rm = TRUE) #四分位间距3倍间距以外的认为是离群值
  IQR <- q[4] - q[2]
  x1 <- q[2] - multiple * IQR 
  x2 <- q[4] + multiple * IQR
  return(c(x1, x2))
}

# Function to check continuity of index numbers within each group
check_continuity <- function(df) {
  smallest_index <- min(df$index)
  df <- df %>% arrange(index)
  for (i in seq_along(df$index)) {
    if (i == 1) next
    if (df$index[i] != df$index[i - 1] + 1) {
      return(df[1:(i - 1), ])
    }
  }
  return(df)
}

# Function to process data for a given trial type and AOI_emo
process_data <- function(data, Trial_type, aoi_emo) {
  data <- data %>%
    filter(trial_type == Trial_type) %>% 
    filter(AOI_emo == aoi_emo) %>%
    group_by(subnum, Ntrial) %>%
    group_modify(~ check_continuity(.x)) %>%
    mutate(dwell_time = sum(duration, na.rm = TRUE)) %>%
    ungroup()
  return(data)
}

# Process data for data2_emo
data2_emo <- process_data(my_data, 'guided_emo', 1)

# Create new variable data2_emo_dwell
data2_emo_dwell <- data2_emo %>%
  select(subnum, sub_type, trial_type, Ntrial, expression, duration) %>%
  group_by(subnum, Ntrial) %>%
  mutate(dwell_emo = sum(duration, na.rm = TRUE)) %>%
  ungroup() %>% 
  select(-duration) %>%  # Drop the duration column
  distinct(subnum, Ntrial, .keep_all = TRUE)  # Drop duplicate rows based on subnum and Ntrial

# The first dwell time for guided_emo stimulus (emo dwell)
data2_emo_dwell_subed <- data2_emo_dwell %>%
  # kick outlier within subject
  group_by(subnum) %>%
  mutate(
    IQR_L = outlier.IQR(dwell_emo)[1],
    IQR_H = outlier.IQR(dwell_emo)[2],
    valid1 = if_else(!is.na(IQR_L) & !is.na(IQR_H) & dwell_emo >= IQR_L & dwell_emo <= IQR_H, 1, 0)
  ) %>%
  filter(valid1 == 1) %>%
  ungroup()

# Process data for data2_neu
data2_neu <- process_data(my_data, 'guided_neu', 2)

# Create new variable data2_neu_dwell
data2_neu_dwell <- data2_neu %>%
  select(subnum, sub_type, trial_type, Ntrial, expression, duration) %>%
  group_by(subnum, Ntrial) %>%
  mutate(dwell_neu = sum(duration, na.rm = TRUE)) %>%
  ungroup() %>% 
  select(-duration) %>%  # Drop the duration column
  distinct(subnum, Ntrial, .keep_all = TRUE)  # Drop duplicate rows based on subnum and Ntrial


# The first dwell time for guided_neu stimulus (neu dwell)
data2_neu_dwell_subed <- data2_neu_dwell %>%
  # kick outlier within subject
  group_by(subnum) %>%
  mutate(
    IQR_L = outlier.IQR(dwell_neu)[1],
    IQR_H = outlier.IQR(dwell_neu)[2],
    valid1 = if_else(!is.na(IQR_L) & !is.na(IQR_H) & dwell_neu >= IQR_L & dwell_neu <= IQR_H, 1, 0)
  ) %>%
  filter(valid1 == 1) %>%
  ungroup()

# Summary statistics for emo_neu_dwell
sumrepdat2E_sub <- summarySE(data2_emo_dwell_subed, measurevar = "dwell_emo", 
                             groupvars=c('subnum',"sub_type", "trial_type", "expression")) 

sumrepdat2N_sub <- summarySE(data2_neu_dwell_subed, measurevar = "dwell_neu", 
                             groupvars=c('subnum',"sub_type", "trial_type", "expression")) 

# Perform ANOVA for first dwell on emo 
anova_emo_dwell <- aov_ez(
  data = sumrepdat2E_sub,
  id = 'subnum',
  dv = 'dwell_emo_mean',
  between = 'sub_type',
  within = 'expression',
  observed = 'sub_type'
)

## Converting to factor: sub_type

## Contrasts set to contr.sum for the following variables: sub_type

# Summarize the ANOVA results
summary(anova_emo_dwell)

## 
## Univariate Type III Repeated-Measures ANOVA Assuming Sphericity
## 
##                       Sum Sq num Df Error SS den Df  F value Pr(>F)    
## (Intercept)         94100379      1 18773700    108 541.3339 <2e-16 ***
## sub_type                7127      1 18773700    108   0.0410 0.8399    
## expression             41191      2  2299690    216   1.9344 0.1470    
## sub_type:expression     7619      2  2299690    216   0.3578 0.6996    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Mauchly Tests for Sphericity
## 
##                     Test statistic p-value
## expression                 0.97448 0.25079
## sub_type:expression        0.97448 0.25079
## 
## 
## Greenhouse-Geisser and Huynh-Feldt Corrections
##  for Departure from Sphericity
## 
##                      GG eps Pr(>F[GG])
## expression          0.97511     0.1482
## sub_type:expression 0.97511     0.6943
## 
##                        HF eps Pr(>F[HF])
## expression          0.9928109  0.1473436
## sub_type:expression 0.9928109  0.6980929

# Perform post hoc test on 'expression' variable
emmeans_result <- emmeans(anova_emo_dwell, pairwise ~ expression, adjust = "bonferroni")

# Print the post hoc test results
print(emmeans_result)

## $emmeans
##  expression emmean   SE  df lower.CL upper.CL
##  F             552 26.1 108      501      604
##  H             542 25.3 108      492      592
##  S             525 22.1 108      481      569
## 
## Results are averaged over the levels of: sub_type 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast estimate   SE  df t.ratio p.value
##  F - H        10.7 15.0 108   0.715  1.0000
##  F - S        27.4 13.0 108   2.108  0.1120
##  H - S        16.7 14.1 108   1.184  0.7165
## 
## Results are averaged over the levels of: sub_type 
## P value adjustment: bonferroni method for 3 tests

# Perform ANOVA for first dwell on neu
anova_neu_dwell <- aov_ez(
  data = sumrepdat2N_sub,
  id = 'subnum',
  dv = 'dwell_neu_mean',
  between = 'sub_type',
  within = 'expression',
  observed = 'sub_type'
)

## Converting to factor: sub_type
## Contrasts set to contr.sum for the following variables: sub_type

# Summarize the ANOVA results
summary(anova_neu_dwell)

## 
## Univariate Type III Repeated-Measures ANOVA Assuming Sphericity
## 
##                       Sum Sq num Df Error SS den Df  F value Pr(>F)    
## (Intercept)         85998470      1 13739110    108 676.0143 <2e-16 ***
## sub_type               31186      1 13739110    108   0.2451 0.6215    
## expression             15026      2   817252    216   1.9857 0.1398    
## sub_type:expression     3569      2   817252    216   0.4716 0.6246    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Mauchly Tests for Sphericity
## 
##                     Test statistic p-value
## expression                 0.92389 0.01448
## sub_type:expression        0.92389 0.01448
## 
## 
## Greenhouse-Geisser and Huynh-Feldt Corrections
##  for Departure from Sphericity
## 
##                      GG eps Pr(>F[GG])
## expression          0.92928     0.1433
## sub_type:expression 0.92928     0.6106
## 
##                        HF eps Pr(>F[HF])
## expression          0.9448817  0.1425377
## sub_type:expression 0.9448817  0.6137841

# Perform post hoc test on 'expression' variable
emmeans_result <- emmeans(anova_neu_dwell, pairwise ~ expression, adjust = "bonferroni")

# Print the post hoc test results
print(emmeans_result)

## $emmeans
##  expression emmean   SE  df lower.CL upper.CL
##  F             526 21.8 108      482      569
##  H             512 20.5 108      472      553
##  S             510 18.9 108      473      548
## 
## Results are averaged over the levels of: sub_type 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast estimate   SE  df t.ratio p.value
##  F - H        13.4 7.88 108   1.697  0.2780
##  F - S        15.4 9.47 108   1.623  0.3225
##  H - S         2.0 7.69 108   0.260  1.0000
## 
## Results are averaged over the levels of: sub_type 
## P value adjustment: bonferroni method for 3 tests

library(BayesFactor)

## Warning: package 'BayesFactor' was built under R version 4.3.3

## Loading required package: coda

## Warning: package 'coda' was built under R version 4.3.3

## ************
## Welcome to BayesFactor 0.9.12-4.7. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).
## 
## Type BFManual() to open the manual.
## ************

# Prepare the data
first_prefer_data <- data4

# Ensure subnum is a factor
first_prefer_data$subnum <- as.factor(first_prefer_data$subnum)
first_prefer_data$sub_type <- as.factor(first_prefer_data$sub_type)
first_prefer_data$expression <- as.factor(first_prefer_data$expression)

# Perform Bayesian ANOVA
bf_anova <- anovaBF(rate ~ sub_type * expression + subnum, data = first_prefer_data, whichRandom = "subnum")

## Warning: data coerced from tibble to data frame

# Print the results
print(bf_anova)

## Bayes factor analysis
## --------------
## [1] sub_type + subnum                                    : 0.2063041 ±0.79%
## [2] expression + subnum                                  : 352.9047  ±0.62%
## [3] sub_type + expression + subnum                       : 75.90506  ±1.6%
## [4] sub_type + expression + sub_type:expression + subnum : 14.93258  ±1.75%
## 
## Against denominator:
##   rate ~ subnum 
## ---
## Bayes factor type: BFlinearModel, JZS

# Extract posterior samples from the full model (index 4)
# The full model includes sub_type, expression, their interaction, and subnum.
posterior_samples <- posterior(bf_anova, iterations = 1000, index = 4)

# # Inspect the column names in the posterior_samples object
# print(colnames(posterior_samples))

# Test specific hypothesis: HC in sub_type is different from MDD in sub_type
# Calculate the difference between HC and MDD in sub_type
diff_HC_MDD <- posterior_samples[, "sub_type-HC"] - posterior_samples[, "sub_type-MDD"]

# Calculate the Bayes factor for the hypothesis
bf_hypothesis <- mean(diff_HC_MDD > 0) / mean(diff_HC_MDD < 0)

# Print the hypothesis test results
print(bf_hypothesis) # 0.1429, which suppport the hypothesis that HC in sub_type is different from MDD in sub_type

## [1] 0.2437811

#####
# Perform Bayesian ANOVA for emo_neu_latency
latency_data <- data3

# Ensure subnum is a factor
latency_data$subnum <- as.factor(latency_data$subnum)
latency_data$sub_type <- as.factor(latency_data$sub_type)
latency_data$expression <- as.factor(latency_data$expression)

# Perform Bayesian ANOVA
bf_anova_latency <- anovaBF(emo_neu_latency ~ sub_type * expression + subnum, data = latency_data, whichRandom = "subnum")

## Warning: data coerced from tibble to data frame

# Print the results
print(bf_anova_latency)

## Bayes factor analysis
## --------------
## [1] sub_type + subnum                                    : 0.05393521 ±1.27%
## [2] expression + subnum                                  : 59.36817   ±0.91%
## [3] sub_type + expression + subnum                       : 3.338669   ±2%
## [4] sub_type + expression + sub_type:expression + subnum : 0.02258741 ±1.68%
## 
## Against denominator:
##   emo_neu_latency ~ subnum 
## ---
## Bayes factor type: BFlinearModel, JZS

# Extract posterior samples from the full model (index 4)
# The full model includes sub_type, expression, their interaction, and subnum.
posterior_samples_latency <- posterior(bf_anova_latency, iterations = 1000, index = 4)

# summary(posterior_samples_latency)
# sub_type-HC and sub_type-MDD:

# These terms represent the estimated effects (coefficients) for the levels HC and MDD of the sub_type factor, respectively.
# In a Bayesian ANOVA, each level of a factor (except the reference level) has its own coefficient that represents the effect of that level compared to the reference level.
# For example, if HC is the reference level, sub_type-MDD would represent the effect of being in the MDD group compared to the HC group.

# # Inspect the column names in the posterior_samples object
# print(colnames(posterior_samples_latency))

# Test specific hypothesis: HC in sub_type is different from MDD in sub_type
# Calculate the difference between HC and MDD in sub_type
diff_HC_MDD <- posterior_samples_latency[, "sub_type-HC"] - posterior_samples_latency[, "sub_type-MDD"]

# Calculate the Bayes factor for the hypothesis
bf_hypothesis <- mean(diff_HC_MDD > 0) / mean(diff_HC_MDD < 0)

# Print the hypothesis test results
print(bf_hypothesis) # 3.016, which suppport the hypothesis that HC in sub_type is different from MDD in sub_type

## [1] 1.695418

# Perform Bayesian ANOVA for emo_dwell
dwell_emo_data <- data2_emo_dwell_subed

# Ensure subnum is a factor
dwell_emo_data$subnum <- as.factor(dwell_emo_data$subnum)
dwell_emo_data$sub_type <- as.factor(dwell_emo_data$sub_type)
dwell_emo_data$expression <- as.factor(dwell_emo_data$expression)

# Perform Bayesian ANOVA
bf_anova_dwell_emo <- anovaBF(dwell_emo ~ sub_type * expression + subnum, data = dwell_emo_data, whichRandom = "subnum")

## Warning: data coerced from tibble to data frame

# Print the results
print(bf_anova_dwell_emo)

## Bayes factor analysis
## --------------
## [1] sub_type + subnum                                    : 0.1573874    ±0.97%
## [2] expression + subnum                                  : 0.046158     ±2.68%
## [3] sub_type + expression + subnum                       : 0.007199467  ±3.21%
## [4] sub_type + expression + sub_type:expression + subnum : 6.783955e-05 ±2.1%
## 
## Against denominator:
##   dwell_emo ~ subnum 
## ---
## Bayes factor type: BFlinearModel, JZS

# Extract posterior samples from the full model (index 4)
# The full model includes sub_type, expression, their interaction, and subnum.
posterior_samples_dwell_emo <- posterior(bf_anova_dwell_emo, iterations = 1000, index = 4)

# summary(posterior_samples_dwell_emo)

# Test specific hypothesis: HC in sub_type is different from MDD in sub_type
# Calculate the difference between HC and MDD in sub_type
diff_HC_MDD <- posterior_samples_dwell_emo[, "sub_type-HC"] - posterior_samples_dwell_emo[, "sub_type-MDD"]

# Calculate the Bayes factor for the hypothesis
bf_hypothesis <- mean(diff_HC_MDD > 0) / mean(diff_HC_MDD < 0)

# Print the hypothesis test results
print(bf_hypothesis) # 0.1641, which suppport the hypothesis that HC in sub_type is not different from MDD in sub_type

## [1] 0.7636684

# Perform Bayesian ANOVA for neu_dwell
dwell_neu_data <- data2_neu_dwell_subed

# Ensure subnum is a factor
dwell_neu_data$subnum <- as.factor(dwell_neu_data$subnum)
dwell_neu_data$sub_type <- as.factor(dwell_neu_data$sub_type)
dwell_neu_data$expression <- as.factor(dwell_neu_data$expression)

# Perform Bayesian ANOVA
bf_anova_dwell_neu <- anovaBF(dwell_neu ~ sub_type * expression + subnum, data = dwell_neu_data, whichRandom = "subnum")

## Warning: data coerced from tibble to data frame

# Print the results
print(bf_anova_dwell_neu)

## Bayes factor analysis
## --------------
## [1] sub_type + subnum                                    : 0.1687375    ±1.59%
## [2] expression + subnum                                  : 0.01045106   ±2.9%
## [3] sub_type + expression + subnum                       : 0.001673543  ±1.68%
## [4] sub_type + expression + sub_type:expression + subnum : 1.197504e-05 ±2.86%
## 
## Against denominator:
##   dwell_neu ~ subnum 
## ---
## Bayes factor type: BFlinearModel, JZS

# Extract posterior samples from the full model (index 4)
# The full model includes sub_type, expression, their interaction, and subnum.
posterior_samples_dwell_neu <- posterior(bf_anova_dwell_neu, iterations = 1000, index = 4)

# summary(posterior_samples_dwell_neu)

# Test specific hypothesis: HC in sub_type is different from MDD in sub_type
# Calculate the difference between HC and MDD in sub_type
diff_HC_MDD <- posterior_samples_dwell_neu[, "sub_type-HC"] - posterior_samples_dwell_neu[, "sub_type-MDD"]

# Calculate the Bayes factor for the hypothesis
bf_hypothesis <- mean(diff_HC_MDD > 0) / mean(diff_HC_MDD < 0)

# Print the hypothesis test results
print(bf_hypothesis) # 0.0976, which suppport the hypothesis that HC in sub_type is not different from MDD in sub_type

## [1] 0.4771049

# 引导条件下首滞留
outlier.IQR <- function(x, multiple = 1.5) {
  q <- quantile(x, na.rm = TRUE) #四分位间距3倍间距以外的认为是离群值
  IQR <- q[4] - q[2]
  x1 <- q[2] - multiple * IQR 
  x2 <- q[4] + multiple * IQR
  return(c(x1, x2))
}

# Function to check continuity of index numbers within each group
check_continuity <- function(df) {
  smallest_index <- min(df$index)
  df <- df %>% arrange(index)
  for (i in seq_along(df$index)) {
    if (i == 1) next
    if (df$index[i] != df$index[i - 1] + 1) {
      return(df[1:(i - 1), ])
    }
  }
  return(df)
}

# Function to process data for a given trial type and AOI_emo
process_data <- function(data, Trial_type, aoi_emo) {
  data <- data %>%
    filter(trial_type == Trial_type) %>% 
    filter(AOI_emo == aoi_emo) %>%
    group_by(subnum, Ntrial) %>%
    group_modify(~ check_continuity(.x)) %>%
    mutate(dwell_time = sum(duration, na.rm = TRUE)) %>%
    ungroup()
  return(data)
}

# Process data for data5_emo
data5_emo <- process_data(my_data, 'guided_emo', 1)

# Create new variable data5_emo_dwell
data5_emo_dwell <- data2_emo %>%
  select(subnum, sub_type, trial_type, Ntrial, expression, duration) %>%
  group_by(subnum, Ntrial) %>%
  mutate(dwell_emo = sum(duration, na.rm = TRUE)) %>%
  ungroup() %>% 
  select(-duration) %>%  # Drop the duration column
  distinct(subnum, Ntrial, .keep_all = TRUE)  # Drop duplicate rows based on subnum and Ntrial

# The first dwell time for guided_emo stimulus (emo dwell)
data5_emo_dwell_subed <- data5_emo_dwell %>%
  # kick outlier within subject
  group_by(subnum) %>%
  mutate(
    IQR_L = outlier.IQR(dwell_emo)[1],
    IQR_H = outlier.IQR(dwell_emo)[2],
    valid1 = if_else(!is.na(IQR_L) & !is.na(IQR_H) & dwell_emo >= IQR_L & dwell_emo <= IQR_H, 1, 0)
  ) %>%
  filter(valid1 == 1) %>%
  ungroup()

# Process data for data5_neu
data5_neu <- process_data(my_data, 'guided_neu', 2)

# Create new variable data5_neu_dwell
data5_neu_dwell <- data5_neu %>%
  select(subnum, sub_type, trial_type, Ntrial, expression, duration) %>%
  group_by(subnum, Ntrial) %>%
  mutate(dwell_neu = sum(duration, na.rm = TRUE)) %>%
  ungroup() %>% 
  select(-duration) %>%  # Drop the duration column
  distinct(subnum, Ntrial, .keep_all = TRUE)  # Drop duplicate rows based on subnum and Ntrial


# The first dwell time for guided_neu stimulus (neu dwell)
data5_neu_dwell_subed <- data5_neu_dwell %>%
  # kick outlier within subject
  group_by(subnum) %>%
  mutate(
    IQR_L = outlier.IQR(dwell_neu)[1],
    IQR_H = outlier.IQR(dwell_neu)[2],
    valid1 = if_else(!is.na(IQR_L) & !is.na(IQR_H) & dwell_neu >= IQR_L & dwell_neu <= IQR_H, 1, 0)
  ) %>%
  filter(valid1 == 1) %>%
  ungroup()

# Summary statistics for emo_neu_dwell
sumrepdat5E_sub <- summarySE(data5_emo_dwell_subed, measurevar = "dwell_emo", 
                             groupvars=c('subnum',"sub_type", "trial_type", "expression")) 

sumrepdat5N_sub <- summarySE(data5_neu_dwell_subed, measurevar = "dwell_neu", 
                             groupvars=c('subnum',"sub_type", "trial_type", "expression")) 

# Perform ANOVA for first dwell on emo 
anova_emo_dwell <- aov_ez(
  data = sumrepdat5E_sub,
  id = 'subnum',
  dv = 'dwell_emo_mean',
  between = 'sub_type',
  within = 'expression',
  observed = 'sub_type'
)

## Converting to factor: sub_type

## Contrasts set to contr.sum for the following variables: sub_type

# Summarize the ANOVA results
summary(anova_emo_dwell)

## 
## Univariate Type III Repeated-Measures ANOVA Assuming Sphericity
## 
##                       Sum Sq num Df Error SS den Df  F value Pr(>F)    
## (Intercept)         94100379      1 18773700    108 541.3339 <2e-16 ***
## sub_type                7127      1 18773700    108   0.0410 0.8399    
## expression             41191      2  2299690    216   1.9344 0.1470    
## sub_type:expression     7619      2  2299690    216   0.3578 0.6996    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Mauchly Tests for Sphericity
## 
##                     Test statistic p-value
## expression                 0.97448 0.25079
## sub_type:expression        0.97448 0.25079
## 
## 
## Greenhouse-Geisser and Huynh-Feldt Corrections
##  for Departure from Sphericity
## 
##                      GG eps Pr(>F[GG])
## expression          0.97511     0.1482
## sub_type:expression 0.97511     0.6943
## 
##                        HF eps Pr(>F[HF])
## expression          0.9928109  0.1473436
## sub_type:expression 0.9928109  0.6980929

# Perform post hoc test on 'expression' variable
emmeans_result <- emmeans(anova_emo_dwell, pairwise ~ expression, adjust = "bonferroni")

# Print the post hoc test results
print(emmeans_result)

## $emmeans
##  expression emmean   SE  df lower.CL upper.CL
##  F             552 26.1 108      501      604
##  H             542 25.3 108      492      592
##  S             525 22.1 108      481      569
## 
## Results are averaged over the levels of: sub_type 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast estimate   SE  df t.ratio p.value
##  F - H        10.7 15.0 108   0.715  1.0000
##  F - S        27.4 13.0 108   2.108  0.1120
##  H - S        16.7 14.1 108   1.184  0.7165
## 
## Results are averaged over the levels of: sub_type 
## P value adjustment: bonferroni method for 3 tests

# Perform ANOVA for first dwell on neu
anova_neu_dwell <- aov_ez(
  data = sumrepdat5N_sub,
  id = 'subnum',
  dv = 'dwell_neu_mean',
  between = 'sub_type',
  within = 'expression',
  observed = 'sub_type'
)

## Converting to factor: sub_type
## Contrasts set to contr.sum for the following variables: sub_type

# Summarize the ANOVA results
summary(anova_neu_dwell)

## 
## Univariate Type III Repeated-Measures ANOVA Assuming Sphericity
## 
##                       Sum Sq num Df Error SS den Df  F value Pr(>F)    
## (Intercept)         85998470      1 13739110    108 676.0143 <2e-16 ***
## sub_type               31186      1 13739110    108   0.2451 0.6215    
## expression             15026      2   817252    216   1.9857 0.1398    
## sub_type:expression     3569      2   817252    216   0.4716 0.6246    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Mauchly Tests for Sphericity
## 
##                     Test statistic p-value
## expression                 0.92389 0.01448
## sub_type:expression        0.92389 0.01448
## 
## 
## Greenhouse-Geisser and Huynh-Feldt Corrections
##  for Departure from Sphericity
## 
##                      GG eps Pr(>F[GG])
## expression          0.92928     0.1433
## sub_type:expression 0.92928     0.6106
## 
##                        HF eps Pr(>F[HF])
## expression          0.9448817  0.1425377
## sub_type:expression 0.9448817  0.6137841

# Perform post hoc test on 'expression' variable
emmeans_result <- emmeans(anova_neu_dwell, pairwise ~ expression, adjust = "bonferroni")

# Print the post hoc test results
print(emmeans_result)

## $emmeans
##  expression emmean   SE  df lower.CL upper.CL
##  F             526 21.8 108      482      569
##  H             512 20.5 108      472      553
##  S             510 18.9 108      473      548
## 
## Results are averaged over the levels of: sub_type 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast estimate   SE  df t.ratio p.value
##  F - H        13.4 7.88 108   1.697  0.2780
##  F - S        15.4 9.47 108   1.623  0.3225
##  H - S         2.0 7.69 108   0.260  1.0000
## 
## Results are averaged over the levels of: sub_type 
## P value adjustment: bonferroni method for 3 tests

CP analysis

Quentin Lam

2024-11-26