Setup

Load libraries

library(readr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ purrr     1.0.2
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.1

Load data

MergeFlankerDec8 <- read_excel("C:/Users/John Majoubi/Downloads/MergeFlankerDec8.xlsx")
All_Jan2_April4 <- read_csv("~/All_Jan2_April4.csv")
## New names:
## Rows: 354 Columns: 236
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (16): StartDate, EndDate, IPAddress, SONA/SFSU ID...11, SONA/SFSU ID...... dbl
## (220): Index, Progress, Duration__in_seconds, Finished, LocationLatitude...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `SONA/SFSU ID` -> `SONA/SFSU ID...11`
## • `SONA/SFSU ID` -> `SONA/SFSU ID...12`
MergeSimonDec8th <- read_excel("C:/Users/John Majoubi/Downloads/MergeSimonDec8th.xlsx")
MergeStroopDec8Full <- read_excel("C:/Users/John Majoubi/Downloads/MergeStroopDec8Full.xlsx")

Data Processing

MergeFlankerDec8_c <- MergeFlankerDec8 %>% 
  dplyr::select(Subject, Session, Block, 9:13, 29:32, 80:92) 
MergeSimonDec8th_c <- MergeSimonDec8th %>% 
  dplyr::select(Subject, Session, Block, 9:13, 29:32, 80:92)
MergeStroopDec8Full_c <- MergeStroopDec8Full %>% 
  dplyr::select(Subject, Session, Block, 9:13, 29:32, 80:92)
All_Jan2_April4_c <- All_Jan2_April4 %>% 
  dplyr::select(`SONA/SFSU ID...11`, `SONA/SFSU ID...12`, SFSUID, 58:233, GPA) %>% 
  mutate(`SONA/SFSU ID...11` = as.numeric(`SONA/SFSU ID...11`),
         `SONA/SFSU ID...12` = as.numeric(`SONA/SFSU ID...12`),
         SFSUID = as.numeric(SFSUID)) %>% 
  mutate(`SONA/SFSU ID...12` = ifelse(is.na(`SONA/SFSU ID...12`), SFSUID, `SONA/SFSU ID...12`))
## Warning: There were 3 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `SONA/SFSU ID...11 = as.numeric(`SONA/SFSU ID...11`)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 2 remaining warnings.
library(readr)
library(tidyverse)
library(readxl)
library(mice)
## 
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
## 
##     filter
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
impute_missing_values <- function(df) {
  numeric_cols <- sapply(df, is.numeric)
  na_columns <- colnames(df)[numeric_cols & colSums(is.na(df)) > 0]
  df[na_columns] <- lapply(df[na_columns], function(x) replace(x, is.na(x), mean(x, na.rm = TRUE)))
  return(df)
}

# Apply the function to your datasets
MergeFlankerDec8_c <- impute_missing_values(MergeFlankerDec8_c)
MergeSimonDec8th_c <- impute_missing_values(MergeSimonDec8th_c)
MergeStroopDec8Full_c <- impute_missing_values(MergeStroopDec8Full_c)
All_Jan2_April4_c <- impute_missing_values(All_Jan2_April4_c)

Flanker

Data Joining

  • only 198 IDs matched
joined_data_F <- MergeFlankerDec8_c %>%
  inner_join(
    All_Jan2_April4_c,
    by = c("Subject" = "SONA/SFSU ID...12")
  )
## Warning in inner_join(., All_Jan2_April4_c, by = c(Subject = "SONA/SFSU ID...12")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 2775 of `x` matches multiple rows in `y`.
## ℹ Row 254 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
joined_data_F %>% 
  dim()
## [1] 8351  204

Simon

Data Joining

  • 200 IDs matched
joined_data_S <- MergeSimonDec8th_c %>% 
  inner_join(
    All_Jan2_April4_c,
    by = c("Subject" = "SONA/SFSU ID...12")
  )
## Warning in inner_join(., All_Jan2_April4_c, by = c(Subject = "SONA/SFSU ID...12")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 13657 of `x` matches multiple rows in `y`.
## ℹ Row 28 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
joined_data_S %>% 
  dim()
## [1] 17550   204

Stroop

Data Joining

  • 199 IDs matched
joined_data_St <- MergeStroopDec8Full_c %>% 
  inner_join(
    All_Jan2_April4_c,
    by = c("Subject" = "SONA/SFSU ID...12")
  )
## Warning in inner_join(., All_Jan2_April4_c, by = c(Subject = "SONA/SFSU ID...12")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 8581 of `x` matches multiple rows in `y`.
## ℹ Row 28 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
joined_data_St %>% 
  dim()
## [1] 11253   204
# Load necessary libraries
library(dplyr)
library(tidyr)
library(readr)

# Convert Task.RT to numeric and remove zero values
joined_data_F <- joined_data_F %>%
  mutate(Task.RT = as.numeric(as.character(Task.RT))) %>%  # Ensure Task.RT is numeric
  dplyr::filter(Task.RT > 00, FinalScore > 0)  # Remove rows where Task.RT is zero
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Task.RT = as.numeric(as.character(Task.RT))`.
## Caused by warning:
## ! NAs introduced by coercion
# Proceed with previous steps if necessary to recheck data types or handle NAs
sum(is.na(joined_data_S$Task.RT))  # Check for NAs introduced by conversion
## [1] 0
# Convert Task.RT to numeric and remove zero values
joined_data_S <- joined_data_S %>%
  mutate(Task.RT = as.numeric(as.character(Task.RT))) %>%  # Ensure Task.RT is numeric
  dplyr::filter(Task.RT > 0, FinalScore > 0)  # Remove rows where Task.RT is zero
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Task.RT = as.numeric(as.character(Task.RT))`.
## Caused by warning:
## ! NAs introduced by coercion
# Proceed with previous steps if necessary to recheck data types or handle NAs
sum(is.na(joined_data_S$Task.RT))  # Check for NAs introduced by conversio
## [1] 0
joined_data_F %>% distinct(Subject)
## # A tibble: 177 × 1
##    Subject
##      <dbl>
##  1   94558
##  2   94789
##  3  104065
##  4  102454
##  5  104068
##  6  104161
##  7  103141
##  8  104323
##  9  103669
## 10  104320
## # ℹ 167 more rows
joined_data_S %>% distinct(Subject)
## # A tibble: 200 × 1
##    Subject
##      <dbl>
##  1   89223
##  2  984514
##  3  659709
##  4  955615
##  5  793920
##  6   83029
##  7  765061
##  8  715947
##  9  486783
## 10  675062
## # ℹ 190 more rows

Quick EDA

joined_data_F %>% 
  select(Subject, Task.RT, FinalScore) %>% 
  mutate(Task.RT = as.numeric(Task.RT)) %>% 
  group_by(Subject) %>% 
  summarise(Task.RT_Mean = mean(Task.RT, na.rm = TRUE),
            FinalScore_Mean = mean(FinalScore, na.rm = TRUE)) %>% 
  ggplot(aes(x = Task.RT_Mean, y = FinalScore_Mean)) +
  geom_point() +
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

library(DataExplorer)

joined_data_F %>% 
  select(Subject, Task.RT, FinalScore) %>% 
  mutate(Task.RT = as.numeric(Task.RT)) %>% 
  group_by(Subject) %>% 
  summarise(Task.RT_Mean = mean(Task.RT, na.rm = TRUE),
            FinalScore_Mean = mean(FinalScore, na.rm = TRUE)) %>% 
  plot_correlation()

Mean RT Scores

RT_Scores <- joined_data_F %>% 
  select(Subject, Task.RT, FinalScore, TrialType) %>% 
  mutate(Task.RT = as.numeric(Task.RT)) %>% 
  group_by(Subject) %>% 
  summarise(Task.RT_Mean = mean(Task.RT, na.rm = TRUE),
            Task.RT_FI = mean(Task.RT[TrialType == "FullyIncongruent"], na.rm = TRUE),
            Task.RT_SFI = mean(Task.RT[TrialType %in% c("FullyIncongruent", "StimCongruentRespIncongruent")], na.rm = TRUE),
            Task.RT_SI = mean(Task.RT[TrialType %in% c("StimCongruentRespIncongruent")], na.rm = TRUE),
            Task.RT_FC = mean(Task.RT[TrialType %in% c("FullyCongruent")], na.rm = TRUE),
            FinalScore_Mean = mean(FinalScore, na.rm = TRUE))  %>% 
  ungroup()
AllJan_scores <- joined_data_F %>% 
  dplyr::select(Subject, `Simon SQ`, `Flanker SQ`, `Stroop SQ`, Anti1MeanRT, Anti2MeanRT, SwitchMean, SwitchCostRT, MixingCostRT, MixingCostPC, NeutralPC, SwitchCostPC, Anti1PC, Anti2PC, RavenSum, RepeatPC, BSCmean, PerseveranceMean, ACSmean, PremedMean, UrgencyMeanR, RavenSum, MentalHealthMean, HappinessMean, SelfEsteemMean, GPA) %>% 
  pivot_longer(!Subject) %>% 
  group_by(Subject, name) %>% 
  summarise(Mean = mean(value, na.rm = TRUE)) %>% 
  ungroup() %>% 
  pivot_wider(names_from = name, values_from = Mean)
## `summarise()` has grouped output by 'Subject'. You can override using the
## `.groups` argument.

Merge the mean Global RT with self-ratings on Subject to align them for correlation calculation

# Load necessary libraries
library(dplyr)
library(ggplot2)
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
# Assuming joined_data_F is already loaded and cleaned

# Compute mean Task.RT for each individual
flanker_global_rt <- joined_data_F %>%
  group_by(Subject) %>%
  summarise(`Flanker Global RT` = mean(Task.RT, na.rm = TRUE))

# Select the relevant columns for self-ratings
self_ratings <- joined_data_F %>%
  select(Subject, ACSmean, BSCmean, PerseveranceMean, UrgencyMeanR, PremedMean) %>%
  distinct()

# Merge the mean Task.RT with self-ratings on Subject to align them for correlation calculation
merged_data <- inner_join(flanker_global_rt, self_ratings, by = "Subject")

# Calculate the correlation matrix
cor_matrix <- merged_data %>%
  select(`Flanker Global RT`, ACSmean, BSCmean, PerseveranceMean, UrgencyMeanR, PremedMean) %>%
  cor(use = "complete.obs")

# Print the correlation matrix
print(cor_matrix)
##                   Flanker Global RT    ACSmean    BSCmean PerseveranceMean
## Flanker Global RT       1.000000000 -0.1130421 0.02773726       0.08874055
## ACSmean                -0.113042068  1.0000000 0.50617815       0.39630922
## BSCmean                 0.027737259  0.5061782 1.00000000       0.55956668
## PerseveranceMean        0.088740550  0.3963092 0.55956668       1.00000000
## UrgencyMeanR           -0.092515650  0.4357928 0.53545847       0.25062457
## PremedMean              0.006658044  0.1256466 0.28230265       0.26520768
##                   UrgencyMeanR  PremedMean
## Flanker Global RT  -0.09251565 0.006658044
## ACSmean             0.43579280 0.125646583
## BSCmean             0.53545847 0.282302646
## PerseveranceMean    0.25062457 0.265207678
## UrgencyMeanR        1.00000000 0.205222592
## PremedMean          0.20522259 1.000000000
# Melt the correlation matrix for visualization
cor_matrix_melt <- melt(cor_matrix)

# Create the heatmap
ggplot(cor_matrix_melt, aes(Var1, Var2, fill = value)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, limit = c(-1, 1), space = "Lab", name = "Correlation") +
  geom_text(aes(Var1, Var2, label = round(value, 2)), color = "black", size = 3) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1)) +
  coord_fixed() +
  labs(title = "Correlation Matrix Heatmap", x = "", y = "")

Merge the mean Global RT with Simon SQ, Flanker SQ, Stroop SQ, Anti1MeanRT, SwitchCostRT on Subject to align them for correlation calculation

# Load necessary libraries
library(dplyr)
library(ggplot2)
library(reshape2)

# Compute mean Task.RT for each individual as Flanker Global RT
Flanker_global_rt <- joined_data_F %>%
  group_by(Subject) %>%
  summarise(Flanker_global_rt = mean(Task.RT, na.rm = TRUE))

# Select the relevant columns for reaction times and other measures
self_ratings <- joined_data_F %>%
  select(Subject, `Simon SQ`, `Flanker SQ`, `Stroop SQ`, Anti1MeanRT,  SwitchCostRT) %>%
  distinct()

# Merge the mean Task.RT with the other measures on Subject to align them for correlation calculation
merged_data <- inner_join(Flanker_global_rt, self_ratings, by = "Subject")

# Calculate the correlation matrix
cor_matrix <- merged_data %>%
  select(Flanker_global_rt,  Anti1MeanRT, SwitchCostRT,  `Simon SQ`, `Stroop SQ`, `Flanker SQ`) %>%
  cor(use = "complete.obs")

# Print the correlation matrix
print(cor_matrix)
##                   Flanker_global_rt Anti1MeanRT SwitchCostRT   Simon SQ
## Flanker_global_rt         1.0000000   0.2635080    0.3842765 -0.5258351
## Anti1MeanRT               0.2635080   1.0000000    0.3006497 -0.4848588
## SwitchCostRT              0.3842765   0.3006497    1.0000000 -0.3002098
## Simon SQ                 -0.5258351  -0.4848588   -0.3002098  1.0000000
## Stroop SQ                -0.4053664  -0.2598792   -0.3023199  0.4120765
## Flanker SQ               -0.7940021  -0.2820028   -0.2908478  0.5420837
##                    Stroop SQ Flanker SQ
## Flanker_global_rt -0.4053664 -0.7940021
## Anti1MeanRT       -0.2598792 -0.2820028
## SwitchCostRT      -0.3023199 -0.2908478
## Simon SQ           0.4120765  0.5420837
## Stroop SQ          1.0000000  0.4836205
## Flanker SQ         0.4836205  1.0000000
# Melt the correlation matrix for visualization
cor_matrix_melt <- melt(cor_matrix)

# Create the heatmap
p <- ggplot(cor_matrix_melt, aes(Var1, Var2, fill = value)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, limit = c(-1, 1), space = "Lab", name = "Correlation") +
  geom_text(aes(Var1, Var2, label = round(value, 2)), color = "black", size = 3) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 10, hjust = 1)) +
  coord_fixed() +
  labs(title = "Correlation Matrix Heatmap", x = "", y = "")

# Display the plot
print(p)

# Save the plot with 300 DPI resolution
ggsave("GlobalRT_heatmap_plot.png", plot = p, dpi = 300, width = 8, height = 6, units = "in")

Outlier Cleaning

# Convert Task.RT to numeric
# This step also checks for non-numeric characters which will be turned into NA
joined_data_F$Task.RT <- as.numeric(joined_data_F$Task.RT)

# Check if there were any non-numeric values that got converted to NA
sum(is.na(joined_data_F$Task.RT))
## [1] 0
remove_outliers_iqr <- function(data, cols, multiplier = 1.5) {
  for (col in cols) {
    if (col %in% names(data) && is.numeric(data[[col]])) {
      Q1 <- quantile(data[[col]], 0.25, na.rm = TRUE)
      Q3 <- quantile(data[[col]], 0.75, na.rm = TRUE)
      IQR <- Q3 - Q1
      lower_bound <- Q1 - multiplier * IQR
      upper_bound <- Q3 + multiplier * IQR
      data <- data[data[[col]] >= lower_bound & data[[col]] <= upper_bound, ]
    }
  }
  return(data)
}
joined_data_F %>% distinct(Subject)
## # A tibble: 177 × 1
##    Subject
##      <dbl>
##  1   94558
##  2   94789
##  3  104065
##  4  102454
##  5  104068
##  6  104161
##  7  103141
##  8  104323
##  9  103669
## 10  104320
## # ℹ 167 more rows
#Assuming remove_outliers_iqr function is defined as above and handles only numeric columns
numeric_columns <- c("Task.RT", "FinalScore", "Anti1MeanRT", "Anti2MeanRT", "SwitchCostRT", "MixingCostRT")
joined_data_F <- remove_outliers_iqr(joined_data_F, numeric_columns)
library(ggplot2)
ggplot(joined_data_F, aes(x = Task.RT)) +
geom_histogram(bins = 30, fill = 'blue') +
ggtitle("Distribution of Task.RT After Cleaning and Outlier Removal")

Convert TrialType to a factor

joined_data_F$TrialType <- as.factor(joined_data_F$TrialType)

# Print out the levels to confirm they are correct
print(levels(joined_data_F$TrialType))
## [1] "FullyCongruent"               "FullyIncongruent"            
## [3] "StimCongruentRespIncongruent" "StimIncongruentRespCongruent"

DIFFERENCE SCORES (TRADITIONAL)

Calculate the mean for combined trial types and for FullyCongruent separately

# Calculate the mean for combined trial types and for FullyCongruent separately
combined_scores <- joined_data_F %>%
  group_by(Subject) %>%
  summarise(
    FullyIncongruent = mean(Task.RT[TrialType == "FullyIncongruent"], na.rm = TRUE),
    StimCongruentRespIncongruent = mean(Task.RT[TrialType == "StimCongruentRespIncongruent"], na.rm = TRUE),
    StimIncongruentRespCongruent = mean(Task.RT[TrialType == "StimIncongruentRespCongruent"], na.rm = TRUE),
    FullyCongruent = mean(Task.RT[TrialType == "FullyCongruent"], na.rm = TRUE)
  ) %>%
  mutate(
    Combined_Mean = (FullyIncongruent + StimCongruentRespIncongruent + StimIncongruentRespCongruent) / 3,
    Difference = Combined_Mean - FullyCongruent
  ) %>%
  left_join(select(joined_data_F, Subject, `Simon SQ`, ACSmean, Anti1MeanRT, BSCmean, UrgencyMeanR, PerseveranceMean, MentalHealthMean, PremedMean), by = "Subject") %>%
  distinct()



# Calculate correlations
correlation_SimonSQ <- cor(combined_scores$Difference, combined_scores$`Simon SQ`, use = "complete.obs")
correlation_ACSmean <- cor(combined_scores$Difference, combined_scores$ACSmean, use = "complete.obs")
correlation_Anti1MeanRT <- cor(combined_scores$Difference, combined_scores$Anti1MeanRT, use = "complete.obs")
correlation_BSCmean <- cor(combined_scores$Difference, combined_scores$BSCmean, use = "complete.obs")
correlation_UrgencyMeanR <- cor(combined_scores$Difference, combined_scores$UrgencyMeanR, use = "complete.obs")
correlation_PerseveranceMean <- cor(combined_scores$Difference, combined_scores$PerseveranceMean, use = "complete.obs")
correlation_MentalHealthMean <- cor(combined_scores$Difference, combined_scores$MentalHealthMean, use = "complete.obs")
correlation_PremedMean <- cor(combined_scores$Difference, combined_scores$PremedMean, use = "complete.obs")

# Print correlations
print(correlation_SimonSQ)
## [1] -0.1669946
print(correlation_ACSmean)
## [1] -0.2530343
print(correlation_BSCmean)
## [1] -0.1578991
print(correlation_UrgencyMeanR)
## [1] -0.1403306
print(correlation_PerseveranceMean)
## [1] -0.02455753
print(correlation_PremedMean)
## [1] -0.144658
# Load necessary libraries
library(dplyr)
library(tidyr)
library(ggplot2)

# Calculate the mean for combined trial types and for FullyCongruent separately
combined_scores <- joined_data_F %>%
  group_by(Subject) %>%
  summarise(
    FullyIncongruent = mean(Task.RT[TrialType == "FullyIncongruent"], na.rm = TRUE),
    StimCongruentRespIncongruent = mean(Task.RT[TrialType == "StimCongruentRespIncongruent"], na.rm = TRUE),
    StimIncongruentRespCongruent = mean(Task.RT[TrialType == "StimIncongruentRespCongruent"], na.rm = TRUE),
    FullyCongruent = mean(Task.RT[TrialType == "FullyCongruent"], na.rm = TRUE)
  ) %>%
  mutate(
    Combined_Mean = (FullyIncongruent + StimCongruentRespIncongruent + StimIncongruentRespCongruent) / 3,
    Difference = Combined_Mean - FullyCongruent
  ) %>%
  left_join(select(joined_data_F, Subject, `Simon SQ`, ACSmean, Anti1MeanRT, BSCmean, UrgencyMeanR, PerseveranceMean, MentalHealthMean, PremedMean), by = "Subject") %>%
  distinct()



# Create a correlation matrix
cor_matrix <- combined_scores %>%
  select(Difference, ACSmean, BSCmean, UrgencyMeanR, PerseveranceMean, PremedMean) %>%
  cor(use = "complete.obs")

# Melt the correlation matrix
cor_matrix_melt <- as.data.frame(as.table(cor_matrix))

# Plot the correlation matrix heatmap
Diff_Score_Heatmap <- ggplot(cor_matrix_melt, aes(Var1, Var2, fill = Freq)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
                       midpoint = 0, limit = c(-1, 1), space = "Lab", 
                       name = "Correlation") +
  geom_text(aes(Var1, Var2, label = round(Freq, 2)), color = "black", size = 2) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 8, hjust = 1)) +
  coord_fixed() +
  labs(title = "Diff_Score_Heatmap", x = "", y = "")

# Save the ggplot2 heatmap
ggsave("Diff_Score_Heatmap.png", plot = Diff_Score_Heatmap, dpi = 300, width = 6, height = 4, units = "in")

# Print the plot
print(Diff_Score_Heatmap)

Calculate the difference for each N-1 FullyCongruent trial

# Load necessary libraries
library(dplyr)
library(tidyr)
library(readr)
library(ggplot2)
library(reshape2)
library(ggpubr)





# Ensure Task.RT and other necessary variables are numeric
joined_data_F <- joined_data_F %>%
  mutate(Task.RT = as.numeric(as.character(Task.RT)),
         ACSmean = as.numeric(as.character(ACSmean)),
         BSCmean = as.numeric(as.character(BSCmean)),
         PerseveranceMean = as.numeric(as.character(PerseveranceMean)),
         PremedMean = as.numeric(as.character(PremedMean)),
         UrgencyMeanR = as.numeric(as.character(UrgencyMeanR)),
         `Flanker SQ` = as.numeric(as.character(`Flanker SQ`)),
         `Simon SQ` = as.numeric(as.character(`Simon SQ`)),
         `Stroop SQ` = as.numeric(as.character(`Stroop SQ`)),
         Anti1MeanRT = as.numeric(as.character(Anti1MeanRT)),
         SwitchCostRT = as.numeric(as.character(SwitchCostRT)))

# Calculate the difference score correctly
N_1FullyCongruent <- joined_data_F %>%
  mutate(Next_Task_RT = dplyr::lead(Task.RT),  # Create a lead column for Task.RT
         Next_TrialType = dplyr::lead(TrialType)) %>%  # Create a lead column for TrialType as well
  dplyr::filter(TrialType == "FullyCongruent",  # Keep only FullyCongruent trials
                Next_TrialType != "FullyCongruent") %>%  # Exclude cases where the next trial is also FullyCongruent
  mutate(Difference_Score = Next_Task_RT - Task.RT)  # Calculate the difference as (next - current)

# View the result
print(N_1FullyCongruent)
## # A tibble: 1,118 × 207
##    Subject Session Block FinalScore FullyCongruentFinalScore
##      <dbl>   <dbl> <dbl>      <dbl>                    <dbl>
##  1   94558       1    14         33                        9
##  2   94558       1    17         33                        9
##  3   94558       1    24         33                        9
##  4   94558       1    27         33                        9
##  5   94558       1    30         33                        9
##  6   94558       1    34         33                        9
##  7   94558       1    38         33                        9
##  8   94558       1    42         33                        9
##  9   94558       1    45         33                        9
## 10   94789       1    10         28                        8
## # ℹ 1,108 more rows
## # ℹ 202 more variables: FullyCongruentTrialsCount <dbl>,
## #   FullyIncongruentFinalScore <dbl>, FullyIncongruentTrialsCount <dbl>,
## #   StimCongruentRespIncongruentFinalScore <dbl>,
## #   StimCongruentRespIncongruentTrialsCount <dbl>,
## #   StimIncongruentRespCongruentFinalScore <dbl>,
## #   StimIncongruentRespCongruentTrialsCount <dbl>, Task.DurationError <chr>, …
# Optionally, if you need to merge this back to the original data or perform further analysis:
joined_data_F <- joined_data_F %>%
  left_join(N_1FullyCongruent %>% select(Subject, Difference_Score), by = "Subject")
## Warning in left_join(., N_1FullyCongruent %>% select(Subject, Difference_Score), : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 1 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
# Check the updated dataset
print(head(joined_data_F))
## # A tibble: 6 × 205
##   Subject Session Block FinalScore FullyCongruentFinalS…¹ FullyCongruentTrials…²
##     <dbl>   <dbl> <dbl>      <dbl>                  <dbl>                  <dbl>
## 1   94558       1    13         33                      9                      9
## 2   94558       1    13         33                      9                      9
## 3   94558       1    13         33                      9                      9
## 4   94558       1    13         33                      9                      9
## 5   94558       1    13         33                      9                      9
## 6   94558       1    13         33                      9                      9
## # ℹ abbreviated names: ¹​FullyCongruentFinalScore, ²​FullyCongruentTrialsCount
## # ℹ 199 more variables: FullyIncongruentFinalScore <dbl>,
## #   FullyIncongruentTrialsCount <dbl>,
## #   StimCongruentRespIncongruentFinalScore <dbl>,
## #   StimCongruentRespIncongruentTrialsCount <dbl>,
## #   StimIncongruentRespCongruentFinalScore <dbl>,
## #   StimIncongruentRespCongruentTrialsCount <dbl>, Task.DurationError <chr>, …
# Load necessary libraries
library(dplyr)
library(tidyr)
library(ggplot2)

# Calculate the average Difference_Score for each subject
N_1FullyCongruent_avg_Score <- N_1FullyCongruent %>%
  group_by(Subject) %>%
  summarise(N_1FullyCongruent_avg_Score = mean(Difference_Score, na.rm = TRUE))

# Merge the average difference scores with ACSmean, BSCmean, PerseveranceMean, PremedMean, UrgencyMeanR, and Flanker SQ
merged_data <- joined_data_F %>%
  select(Subject, ACSmean, BSCmean, PerseveranceMean, PremedMean, UrgencyMeanR, `Flanker SQ`) %>%
  distinct() %>%
  left_join(N_1FullyCongruent_avg_Score, by = "Subject")

# Create a correlation matrix
cor_matrix <- merged_data %>%
  select(N_1FullyCongruent_avg_Score, ACSmean, BSCmean, PerseveranceMean, PremedMean, UrgencyMeanR, `Flanker SQ`) %>%
  cor(use = "complete.obs")

# Melt the correlation matrix
melted_cor_matrix <- melt(cor_matrix)

# Plot the correlation matrix heatmap
N_1_Correlation_Matrix_Heatmap <- ggplot(data = melted_cor_matrix, aes(x = Var1, y = Var2, fill = value)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
                       midpoint = 0, limit = c(-1, 1), space = "Lab", 
                       name = "Correlation") +
  theme_minimal() + 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, 
                                   size = 8, hjust = 1)) +
  coord_fixed() +
  geom_text(aes(label = round(value, 2)), color = "black", size = 2) +
  labs(title = "N-1 Correlation Matrix Heatmap")

# Save the ggplot2 heatmap
ggsave("N_1_Correlation_Matrix_Heatmap.png", plot = N_1_Correlation_Matrix_Heatmap, dpi = 300, width = 6, height = 4, units = "in")

# Print the plot
print(N_1_Correlation_Matrix_Heatmap)

# Merge the average difference scores with the selected variables
merged_data <- joined_data_F %>%
  select(Subject, `Simon SQ`, `Flanker SQ`, `Stroop SQ`, Anti1MeanRT, SwitchCostRT) %>%
  distinct() %>%
  left_join(N_1FullyCongruent_avg_Score, by = "Subject")
# Create a correlation matrix
cor_matrix <- merged_data %>%
  select(N_1FullyCongruent_avg_Score, `Simon SQ`, `Flanker SQ`, `Stroop SQ`, Anti1MeanRT, SwitchCostRT) %>%
  cor(use = "complete.obs")

# Melt the correlation matrix
library(reshape2)
melted_cor_matrix <- melt(cor_matrix)

# Filter out negative scores from Simon SQ, Flanker SQ, and Stroop SQ
joined_data_F <- joined_data_F %>%
  dplyr::filter(`Simon SQ` >= 0, `Flanker SQ` >= 0, `Stroop SQ` >= 0)

# Print dimensions to see how many rows remain after filtering
#print(dim(joined_data_F))
joined_data_F %>% distinct(Subject)
## # A tibble: 130 × 1
##    Subject
##      <dbl>
##  1   94558
##  2   94789
##  3  104065
##  4  102454
##  5  104068
##  6  104161
##  7  103141
##  8  104323
##  9  103669
## 10   99631
## # ℹ 120 more rows
# Plot the correlation matrix heatmap
library(ggplot2)
p <- ggplot(data = melted_cor_matrix, aes(x = Var1, y = Var2, fill = value)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
                       midpoint = 0, limit = c(-1, 1), space = "Lab", 
                       name = "Correlation") +
  theme_minimal() + 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, 
                                   size = 8, hjust = 1)) +
  coord_fixed() +
  geom_text(aes(label = round(value, 2)), color = "black", size = 2) +
  labs(title = "Correlation Matrix Heatmap")
print(p)

# Save the plot with 300 DPI resolution
ggsave("heatmap_plot.png", plot = p, dpi = 300, width = 6, height = 4, units = "in")