knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(warn = FALSE)
knitr::opts_chunk$set(include = TRUE)
knitr::opts_knit$set(root.dir = "/Users/sarenseeley/Dropbox/Postdoc/nwtc_study/")
options(scipen=999)
rm(list = ls())
# load packages
library(emmeans)
library(dplyr)
library(ggplot2)
library(ggpubr)
library(ggthemes)
library(afex)
filter <- dplyr::filter
select <- dplyr::select
rename <- dplyr::rename
library(dplyr)
library(readr)
library(purrr)
library(readxl)
library(stringr)
# Define the path to the parent folder containing the subfolders
parent_folder_path <-
"/Users/sarenseeley/Dropbox/Postdoc/nwtc_study/data/volBrain/HIPS"
# List all CSV files in the parent folder and its subfolders
csv_files <- NULL
csv_files <-
list.files(
path = parent_folder_path,
pattern = glob2rx("report*.csv"),
recursive = TRUE,
full.names = TRUE
)
# Read each CSV file and store its data as a separate data frame in a list
data_list <-
map(csv_files,
~ read.csv(
.x,
sep = ";",
stringsAsFactors = FALSE,
check.names = FALSE
))
# Combine all the data frames into a single data frame, stacking them vertically
volBrain_Combined_df <- bind_rows(data_list)
all_subject_ids <-
read.csv(
"/Users/sarenseeley/Dropbox/Postdoc/nwtc_study/data/volBrain/HIPS/_volBrain HIPS log.csv",
stringsAsFactors = FALSE
)
# Extract the two columns you want to add, but only for the rows matching volBrain_Combined_df
record_id <- all_subject_ids[[5]]#[1:nrow(volBrain_Combined_df)]
# Add the new columns to the volBrain_Combined_df data frame as the first two columns
volBrain_Combined_df <- cbind(record_id, volBrain_Combined_df)
snt_data <-
read_excel(
"/Users/sarenseeley/Dropbox/Postdoc/mentoring/Isabella Fonseca/SNT/data/SNT_data/SNT-behavior_n80.xlsx"
) %>% rename(record_id = sub_id)
snt_data$record_id <-
str_replace(snt_data$record_id, "nwtc", "NWTC-")
df <- full_join(volBrain_Combined_df, snt_data, by = "record_id")
df <-
df %>% filter(!record_id == "NWTC-1172" & !record_id == "NWTC-034")
colnames(df) <- gsub(" ", "_", colnames(df))
colnames(df) <- gsub("%", "pc", colnames(df))
other <- readRDS("data/_cleaned/clean_v2_dataset_2024.rds")
grp <- read.csv("notes/_nwtc_bids_key.csv")[1:97, c(1:3)]
df <- left_join(df, other, by = "record_id")
df <- left_join(df, grp, by = "record_id")
df <-
df %>% mutate(
group.use = factor(
group.use,
levels = c("Resilient", "Low-exposed", "PTSD"),
labels = c("Highly resilient", "Lower WTC-exposed", "PTSD"),
ordered = FALSE
),
health_meds_psych.cns_bin = if_else(health_meds_psych.cns == 0, 0, 1),
ptsd = if_else(group.use == "PTSD", "PTSD", "NO_PTSD")
) %>% rename(meds_allPsych = health_meds_psych.cns,
meds_allPsych_bin = health_meds_psych.cns_bin)
df$meds_allPsych_bin[df$bids_id == "sub-084"] <-
1 # zolpidem night before 4pm scan
below_50_df <-
readRDS(
"/Users/sarenseeley/Dropbox/Postdoc/mentoring/Isabella Fonseca/SNT/isabella_r_scripts/below_50_df.rds"
) %>% rename(record_id = sub_id)
drop_ids <-
c(below_50_df$record_id, "NWTC-034") #excludes NWTC-034 [lots of missing trials]
# long dataset
df_long <-
df %>% select(
record_id,
group.use,
Sex,
Age,
tot_ctq,
tot_tleq_nonW,
exposures_count,
ptsd,
(contains("_pc") |
contains("_Asym") |
contains("_cm3")) &
!contains("scid")
) %>% filter(!record_id %in% drop_ids) #scid pcp shouldn't be included
df_long <- df_long %>%
tidyr::pivot_longer(
cols = -c(
record_id,
group.use,
Sex,
Age,
tot_ctq,
tot_tleq_nonW,
exposures_count,
ptsd,
ICV_cm3
),
names_to = "region",
values_to = "value"
)
df_long[12:14] <-
str_split_fixed(df_long$region, "_", 3) # split column into 3 columns, by underscore
df_long <-
df_long %>% select(!region) %>% rename(region = V1,
lat = V2,
measure = V3) # which subfield, which hemisphere, which metric (cm3, percent, asymmetry index)
df_long1 <-
df_long %>% filter(!measure == "" &
!lat == "total" &
measure == "cm3" &
!region == "Hippocampus") # don't include total hippocampus
df_long2 <-
df_long %>% filter(!measure == "" &
!lat == "total" &
measure == "pc" & !region == "Hippocampus")
df_long2b <-
df_long %>% filter(!measure == "" &
lat == "total" &
measure == "pc" )#& !region == "Hippocampus")
df_long3 <-
df_long %>% filter(lat == "asymmetry" & !region == "Hippocampus")
ggplot(df_long2b, aes(ptsd, value, fill = region)) +
stat_summary(
geom = "col",
fun = mean,
position = "dodge",
color = "black"
) +
stat_summary(
geom = "errorbar",
fun.data = mean_se,
position = position_dodge(.9),
width = 0.25,
color = "black"
) + theme_clean(base_size=16) + theme(axis.text.x=element_text(angle=45, hjust=1)) + geom_hline(yintercept = 0) + facet_wrap( ~ region, scales = "free") + labs(title =
"Hippocampal volume (%): subfields and total in PTSD/no PTSD", fill="region")
ggplot(df_long2, aes(ptsd, value, fill = region)) +
stat_summary(
geom = "col",
fun = mean,
position = "dodge",
color = "black"
) +
stat_summary(
geom = "errorbar",
fun.data = mean_se,
position = position_dodge(.9),
width = 0.25,
color = "black"
) + theme_clean(base_size = 10) + theme(axis.text.x=element_text(angle=45, hjust=1)) + geom_hline(yintercept = 0) + facet_wrap( ~ region*lat, scales = "free") + labs(title =
"Normalized hippocampal volume (%) across subfields, hemispheres, and PTSD/No PTSD", fill="region")
NA
NA
NA
SUMMARY: Normalized hippocampal volume in the right CA2-CA3 subfield(s) was positively correlated with mean distance from self (
pov_2d_dist_mean_meanandpov_3d_dist_mean_mean), as well as greaterneu_2d_angle_mean_mean, r’s = .25-.26, p’s = .029-.032.
library(corrplot)
cordf <-
df%>% filter(!record_id %in% drop_ids & !is.na(affil_mean_mean)) %>% select(contains("mean_mean"), ends_with("pc") &
!contains("scid"))
cor_result <- Hmisc::rcorr(as.matrix(cordf))
cor_matrix <- cor_result$r # Correlation matrix
p_matrix <- cor_result$P # P-values matrix
significant_cor_matrix <- cor_matrix * (p_matrix <= 0.05)
corrplot(
significant_cor_matrix,
method = "color",
tl.col = "black",
outline = TRUE,
addrect = TRUE,
tl.cex = 1,
title = "p <.05",
tl.pos = "ld",
type = "lower"
)
ggplot(cordf, aes(y=`CA2-CA3_right_pc`, x=pov_2d_dist_mean_mean)) +
geom_point(size = 1) + theme_clean() + geom_smooth(method = "lm") + labs(title="POV 2D distance mean mean")
`geom_smooth()` using formula = 'y ~ x'
#summary(lm(`CA2-CA3_right_pc` ~ pov_2d_dist_mean_mean,cordf))
cor.test(cordf$`CA2-CA3_right_pc`,cordf$pov_2d_dist_mean_mean)
Pearson's product-moment correlation
data: cordf$`CA2-CA3_right_pc` and cordf$pov_2d_dist_mean_mean
t = 2.1886, df = 71, p-value = 0.03192
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.02264018 0.45514063
sample estimates:
cor
0.251398
ggplot(cordf, aes(y=`CA2-CA3_right_pc`, x=pov_3d_dist_mean_mean)) +
geom_point(size = 1) + theme_clean() + geom_smooth(method = "lm") + labs(title="POV 3D distance mean mean")
`geom_smooth()` using formula = 'y ~ x'
#summary(lm(`CA2-CA3_right_pc` ~ pov_3d_dist_mean_mean,cordf))
cor.test(cordf$`CA2-CA3_right_pc`,cordf$pov_3d_dist_mean_mean)
Pearson's product-moment correlation
data: cordf$`CA2-CA3_right_pc` and cordf$pov_3d_dist_mean_mean
t = 2.2311, df = 71, p-value = 0.02883
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.02751381 0.45899852
sample estimates:
cor
0.2559608
ggplot(cordf, aes(y=`CA2-CA3_right_pc`, x=neu_3d_angle_mean_mean)) +
geom_point(size = 1) + theme_clean() + geom_smooth(method = "lm") + labs(title="Neutral 3D angle mean mean")
`geom_smooth()` using formula = 'y ~ x'
#summary(lm(`CA2-CA3_right_pc` ~ neu_3d_angle_mean_mean,cordf))
cor.test(cordf$`CA2-CA3_right_pc`,cordf$neu_3d_angle_mean_mean)
Pearson's product-moment correlation
data: cordf$`CA2-CA3_right_pc` and cordf$neu_3d_angle_mean_mean
t = 2.2014, df = 71, p-value = 0.03096
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.02410572 0.45630244
sample estimates:
cor
0.2527711