knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
This script merge preprocessed individual data set. Outputs are: mergedC (all controls), mergedMA (all MA), mergedMO (all MO), and allGroups (all groups combine). Outputs with _OL are per group outlier removed output.
# loading filepath
filepath_in <- "C:/R/exp_1/results/BEH/indv_merge_dataset/"
# Saving filepath
filepath_out <- "C:/R/exp_1/results/BEH/merged_group_dataset/"
# Read all C participants
C_4 <- read_csv(paste0(filepath_in, "C_4.csv"))
C_5 <- read_csv(paste0(filepath_in, "C_5.csv"))
C_6 <- read_csv(paste0(filepath_in, "C_6.csv"))
C_8 <- read_csv(paste0(filepath_in, "C_8.csv"))
C_12 <- read_csv(paste0(filepath_in, "C_12.csv"))
C_13 <- read_csv(paste0(filepath_in, "C_13.csv"))
C_14 <- read_csv(paste0(filepath_in, "C_14.csv"))
C_15 <- read_csv(paste0(filepath_in, "C_15.csv"))
# C_16 <- read_csv(paste0(filepath_in, "C_16.csv"))
# C_18 <- read_csv(paste0(filepath_in, "C_18.csv"))
C_19 <- read_csv(paste0(filepath_in, "C_19.csv"))
C_22 <- read_csv(paste0(filepath_in, "C_22.csv"))
C_25 <- read_csv(paste0(filepath_in, "C_25.csv"))
# C_26 <- read_csv(paste0(filepath_in, "C_26.csv"))
# C_28 <- read_csv(paste0(filepath_in, "C_28.csv"))
C_29 <- read_csv(paste0(filepath_in, "C_29.csv"))
C_31 <- read_csv(paste0(filepath_in, "C_31.csv"))
C_36 <- read_csv(paste0(filepath_in, "C_36.csv"))
C_37 <- read_csv(paste0(filepath_in, "C_37.csv")) #15 stops here
# C_38 <- read_csv(paste0(filepath_in, "C_38.csv"))
# Merge all controls
mergedC <- full_join(C_4, C_5) |>
full_join(C_6) |>
full_join(C_8) |>
full_join(C_12) |>
full_join(C_13) |>
full_join(C_14) |>
full_join(C_15) |>
# full_join(C_16) |>
# full_join(C_18) |>
full_join(C_19) |>
full_join(C_22) |>
full_join(C_25) |>
# full_join(C_26) |>
# full_join(C_28) |>
full_join(C_29) |>
full_join(C_31) |>
full_join(C_36) |>
full_join(C_37)
# Save C merged dataset
write_csv(mergedC, paste0(filepath_out, "mergedC.csv"))
# IQR
mergedC_OL <- mergedC|>
rstatix::identify_outliers(rtDet)
# List every rtDet classified as outlier (is.outlier vs is.extreme)
outlierList <- c(mergedC_OL$rtDet[mergedC_OL$is.outlier == TRUE])
# Remove rows with outlier based on above
mergedC_OL <- mergedC|>
filter(!rtDet %in% outlierList[])
write_csv(mergedC_OL, paste0(filepath_out, "mergedC_OL.csv"))
# Read all MA participants
MA_7 <- read_csv(paste0(filepath_in, "MA_7.csv"))
MA_9 <- read_csv(paste0(filepath_in, "MA_9.csv"))
MA_11 <- read_csv(paste0(filepath_in, "MA_11.csv"))
MA_20 <- read_csv(paste0(filepath_in, "MA_20.csv"))
MA_21 <- read_csv(paste0(filepath_in, "MA_21.csv"))
MA_24 <- read_csv(paste0(filepath_in, "MA_24.csv"))
MA_27 <- read_csv(paste0(filepath_in, "MA_27.csv"))
MA_30 <- read_csv(paste0(filepath_in, "MA_30.csv"))
MA_33 <- read_csv(paste0(filepath_in, "MA_33.csv"))
MA_34 <- read_csv(paste0(filepath_in, "MA_34.csv"))
MA_35 <- read_csv(paste0(filepath_in, "MA_35.csv"))
# Merge to 1 dataset
mergedMA <- full_join(MA_7, MA_9) |>
full_join(MA_11) |>
full_join(MA_20) |>
full_join(MA_21) |>
full_join(MA_24) |>
full_join(MA_27) |>
full_join(MA_30) |>
full_join(MA_33) |>
full_join(MA_34) |>
full_join(MA_35)
# Save MA merged dataset
write_csv(mergedMA, paste0(filepath_out, "mergedMA.csv"))
# IQR
mergedMA_OL <- mergedMA|>
rstatix::identify_outliers(rtDet)
# List every rtDet classified as outlier (is.outlier vs is.extreme)
outlierList <- c(mergedMA_OL$rtDet[mergedMA_OL$is.outlier == TRUE])
# Remove rows with outlier based on above
mergedMA_OL <- mergedMA|>
filter(!rtDet %in% outlierList[])
write_csv(mergedMA_OL, paste0(filepath_out, "mergedMA_OL.csv"))
MO_2 <- read_csv(paste0(filepath_in, "MO_2.csv"))
MO_3 <- read_csv(paste0(filepath_in, "MO_3.csv"))
MO_10 <- read_csv(paste0(filepath_in, "MO_10.csv"))
MO_17 <- read_csv(paste0(filepath_in, "MO_17.csv"))
MO_32 <- read_csv(paste0(filepath_in, "MO_32.csv"))
# Merge to 1 dataset
mergedMO <- full_join(MO_2, MO_3) |>
full_join(MO_10) |>
full_join(MO_17) |>
full_join(MO_32)
# Save MA merged dataset
write_csv(mergedMO, paste0(filepath_out, "mergedMO.csv"))
# IQR
mergedMO_OL <- mergedMO|>
rstatix::identify_outliers(rtDet)
# List every rtDet classified as outlier (is.outlier vs is.extreme)
outlierList <- c(mergedMO_OL$rtDet[mergedMO_OL$is.outlier == TRUE])
# Remove rows with outlier based on above
mergedMO_OL <- mergedMO|>
filter(!rtDet %in% outlierList[])
write_csv(mergedMO_OL, paste0(filepath_out, "mergedMO_OL.csv"))
mergedM <- full_join(mergedMO, mergedMA) |>
mutate(group = recode(group, "MO" = "M",
"MA" = "M"))
write.csv(mergedM, paste0(filepath_out, "mergedM.csv"))
allGroups <- full_join(mergedC, mergedM)
write.csv(allGroups, paste0(filepath_out, "allGroups.csv"))
mergedM_OL <- full_join(mergedMO_OL, mergedMA_OL) |>
mutate(group = recode(group, "MO" = "M",
"MA" = "M"))
write.csv(mergedM_OL, paste0(filepath_out, "mergedM_OL.csv"))
allGroups_OL <- full_join(mergedC_OL, mergedM_OL)
write.csv(allGroups_OL, paste0(filepath_out, "allGroups_OL.csv"))