imaging data pulled: 2021-10-12
clinical data pulled: 2020-11-16
code written: 2021-11-15
last ran: 2021-11-17
Description. Here, we visualize (1) distributions of several clinical variables (PHQ-9, MADRS, PSS, GAD-7) and cognitive variables (RBANS, DKEFS), and (2) univariate correlations between the aforementioned variables and LC NM. The LC NM and the cognitive variables (RBANS and D-KEFS) have been age- and sex-corrected (_cor
), and the 4 RT D-KEFS CWI variables have also been normalized (_normcor
). The multivariate CCA analysis of LC NM-MRI and select cognition variables (RBANS and DKEFS) is later performed in 06_CCA
.
Load libraries and data
#clear environment
rm(list = ls())
#list required libraries
packages <- c(
'tidyverse',
'purr', #set_names
'ggpubr',
'cowplot'
)
#load required libraries
lapply(packages, require, character.only = TRUE)
#figure settings
knitr::opts_chunk$set(fig.width=9.5, warning=FALSE, message=FALSE)
#read in cleaned participant demographic/clinical
df <- read.csv(dir('../clinical', full.names=T, pattern="^df_2021")) #48
#read in cleaned, normed, cognition and LC
df_norm <- read.csv(dir('../clinical', full.names=T, pattern="^dfCorrected")) #48
Data cleaning
#for clarity, keep only depression variables
df <- df[, grep('^id$|pss|gad7|phq9_[0-9]|phq9_total|madrs|^avg_max_cor$', names(df))]
#merge dataframes
df <- merge(df_norm, df, by='id')
#clean up
rm(df_norm)
#pull out variables for each assessment, excluding total
vars_LC <- set_names(names(df[grep('^(?!.*total).*avg', names(df), perl=T)]))
vars_phq9 <- set_names(names(df[grep('^(?!.*total|diff).*phq9', names(df), perl=T)]))
vars_madrs <- set_names(names(df[grep('^(?!.*total).*madrs', names(df), perl=T)]))
vars_rbans <- set_names(names(df[grep('^(?!.*total).*rbans', names(df), perl=T)]))
vars_dkefs <- set_names(names(df[grep('^(?!.*total).*dkefs', names(df), perl=T)]))
vars_pss <- set_names(names(df[grep('^(?!.*total).*pss', names(df), perl=T)]))
vars_gad7 <- set_names(names(df[grep('^(?!.*total).*gad7', names(df), perl=T)]))
#pull out all total variables
vars_totals <- set_names(names(df[grep('_total', names(df))]))
#identify the minimum and maximum LC values
LC_min <- min(df[grep('avg', names(df))])
LC_max <- max(df[grep('avg', names(df))])
Functions
#function for ordinal variables - bargraph
plotOrdinal_fn <- function(df=df, vars, var_total){
p1 <- df %>%
pivot_longer(cols=-c(id, Diagnosis, Age, Sex)) %>%
filter(name %in% vars) %>%
ggplot(aes(x=value)) +
geom_bar(stat='count', aes(fill=Diagnosis)) +
facet_wrap(~name, scales='free') +
theme_classic() +
theme(legend.position = 'none')
p2 <- df %>%
ggplot(aes_string(x=var_total)) +
geom_bar(stat='count', aes(fill=Diagnosis)) +
theme_classic() +
theme(legend.position='top')
ggarrange(p1, p2, widths=c(2/3, 1/3))
}
#function for continuous variables - histogram
plotContinuous_fn <- function(df=df, vars){
df %>%
pivot_longer(cols=-c(id, Diagnosis, Age, Sex)) %>%
filter(name %in% vars) %>%
ggplot(aes(x=value, color=Diagnosis, fill=Diagnosis)) +
geom_histogram(aes(y=..density..)) +
geom_density(col='black', alpha=.2) +
facet_wrap(~name, scales='free') +
theme_classic() +
theme(legend.position='top')
}
#function for scatterplot correlation of "continuous" total variables
plotCorrelation_fn <- function(x,y) {
ggplot(df, aes_string(x=x, y=y, color='Diagnosis')) +
geom_point() +
geom_smooth(method="lm") +
ylim(LC_min, LC_max) +
theme_classic() +
theme(legend.position='none') +
ggpubr::stat_cor()
}
Distributions
plotOrdinal_fn(df, vars_phq9, 'phq9_total')
plotOrdinal_fn(df, vars_madrs, 'madrs_total_calc')
plotOrdinal_fn(df, vars_pss, 'pss_total')
plotOrdinal_fn(df, vars_gad7, 'gad7_total')
plotContinuous_fn(df, vars_rbans)
plotContinuous_fn(df, vars_dkefs)
Note: the age- and sex- correction, and normalization, applied to the DKEFS RT variables mean that the x-axis values no longer represent seconds.
Correlations, clinical scores
plot_grid(plotlist = map(vars_LC, ~map(vars_totals, plotCorrelation_fn, y=.x))[[1]])
plot_grid(plotlist = map(vars_LC, ~map(vars_totals, plotCorrelation_fn, y=.x))[[2]])
plot_grid(plotlist = map(vars_LC, ~map(vars_totals, plotCorrelation_fn, y=.x))[[3]])
plot_grid(plotlist = map(vars_LC, ~map(vars_totals, plotCorrelation_fn, y=.x))[[4]])
plot_grid(plotlist = map(vars_LC, ~map(vars_totals, plotCorrelation_fn, y=.x))[[5]])
plot_grid(plotlist = map(vars_LC, ~map(vars_totals, plotCorrelation_fn, y=.x))[[6]])
plot_grid(plotlist = map(vars_LC, ~map(vars_totals, plotCorrelation_fn, y=.x))[[7]])
Correlations, RBANS
plot_grid(plotlist = map(vars_LC, ~map(vars_rbans, plotCorrelation_fn, y=.x))[[1]])
plot_grid(plotlist = map(vars_LC, ~map(vars_rbans, plotCorrelation_fn, y=.x))[[2]])
plot_grid(plotlist = map(vars_LC, ~map(vars_rbans, plotCorrelation_fn, y=.x))[[3]])
plot_grid(plotlist = map(vars_LC, ~map(vars_rbans, plotCorrelation_fn, y=.x))[[4]])
plot_grid(plotlist = map(vars_LC, ~map(vars_rbans, plotCorrelation_fn, y=.x))[[5]])
plot_grid(plotlist = map(vars_LC, ~map(vars_rbans, plotCorrelation_fn, y=.x))[[6]])
plot_grid(plotlist = map(vars_LC, ~map(vars_rbans, plotCorrelation_fn, y=.x))[[7]])
Correlations, DKEFS
plot_grid(plotlist = map(vars_LC, ~map(vars_dkefs, plotCorrelation_fn, y=.x))[[1]])
plot_grid(plotlist = map(vars_LC, ~map(vars_dkefs, plotCorrelation_fn, y=.x))[[2]])
plot_grid(plotlist = map(vars_LC, ~map(vars_dkefs, plotCorrelation_fn, y=.x))[[3]])
plot_grid(plotlist = map(vars_LC, ~map(vars_dkefs, plotCorrelation_fn, y=.x))[[4]])
plot_grid(plotlist = map(vars_LC, ~map(vars_dkefs, plotCorrelation_fn, y=.x))[[5]])
plot_grid(plotlist = map(vars_LC, ~map(vars_dkefs, plotCorrelation_fn, y=.x))[[6]])
plot_grid(plotlist = map(vars_LC, ~map(vars_dkefs, plotCorrelation_fn, y=.x))[[7]])