imaging data pulled: 2021-10-12
clinical data pulled: 2020-11-16
code written: 2021-11-15
last ran: 2021-11-17

Description. Here, we visualize (1) distributions of several clinical variables (PHQ-9, MADRS, PSS, GAD-7) and cognitive variables (RBANS, DKEFS), and (2) univariate correlations between the aforementioned variables and LC NM. The LC NM and the cognitive variables (RBANS and D-KEFS) have been age- and sex-corrected (_cor), and the 4 RT D-KEFS CWI variables have also been normalized (_normcor). The multivariate CCA analysis of LC NM-MRI and select cognition variables (RBANS and DKEFS) is later performed in 06_CCA.


Load libraries and data

#clear environment
rm(list = ls())

#list required libraries
packages <- c(
  'tidyverse',
  'purr', #set_names
  'ggpubr',
  'cowplot'
)

#load required libraries
lapply(packages, require, character.only = TRUE)

#figure settings
knitr::opts_chunk$set(fig.width=9.5, warning=FALSE, message=FALSE) 

#read in cleaned participant demographic/clinical
df <- read.csv(dir('../clinical', full.names=T, pattern="^df_2021")) #48

#read in cleaned, normed, cognition and LC
df_norm <- read.csv(dir('../clinical', full.names=T, pattern="^dfCorrected")) #48

Data cleaning

#for clarity, keep only depression variables
df <- df[, grep('^id$|pss|gad7|phq9_[0-9]|phq9_total|madrs|^avg_max_cor$', names(df))]

#merge dataframes
df <- merge(df_norm, df, by='id')

#clean up
rm(df_norm)

#pull out variables for each assessment, excluding total
vars_LC <- set_names(names(df[grep('^(?!.*total).*avg', names(df), perl=T)]))
vars_phq9 <- set_names(names(df[grep('^(?!.*total|diff).*phq9', names(df), perl=T)]))
vars_madrs <- set_names(names(df[grep('^(?!.*total).*madrs', names(df), perl=T)]))
vars_rbans <- set_names(names(df[grep('^(?!.*total).*rbans', names(df), perl=T)]))
vars_dkefs <- set_names(names(df[grep('^(?!.*total).*dkefs', names(df), perl=T)]))
vars_pss <- set_names(names(df[grep('^(?!.*total).*pss', names(df), perl=T)]))
vars_gad7 <- set_names(names(df[grep('^(?!.*total).*gad7', names(df), perl=T)]))

#pull out all total variables
vars_totals <- set_names(names(df[grep('_total', names(df))]))

#identify the minimum and maximum LC values
LC_min <- min(df[grep('avg', names(df))])
LC_max <- max(df[grep('avg', names(df))])

Functions

#function for ordinal variables - bargraph
plotOrdinal_fn <- function(df=df, vars, var_total){
  p1 <- df %>% 
    pivot_longer(cols=-c(id, Diagnosis, Age, Sex)) %>%
    filter(name %in% vars) %>%
    ggplot(aes(x=value)) +
      geom_bar(stat='count', aes(fill=Diagnosis)) + 
      facet_wrap(~name, scales='free') + 
      theme_classic() +
      theme(legend.position = 'none')
  p2 <- df %>%
    ggplot(aes_string(x=var_total)) +
      geom_bar(stat='count', aes(fill=Diagnosis)) + 
      theme_classic() +
      theme(legend.position='top')
  ggarrange(p1, p2, widths=c(2/3, 1/3))
}

#function for continuous variables - histogram
plotContinuous_fn <- function(df=df, vars){
  df %>%
    pivot_longer(cols=-c(id, Diagnosis, Age, Sex)) %>%
    filter(name %in% vars) %>%
    ggplot(aes(x=value, color=Diagnosis, fill=Diagnosis)) +
      geom_histogram(aes(y=..density..)) +
      geom_density(col='black', alpha=.2) +
      facet_wrap(~name, scales='free') +
      theme_classic() +
      theme(legend.position='top') 
}

#function for scatterplot correlation of "continuous" total variables
plotCorrelation_fn <- function(x,y) {
  ggplot(df, aes_string(x=x, y=y, color='Diagnosis')) +
    geom_point() +
    geom_smooth(method="lm") +
    ylim(LC_min, LC_max) +
    theme_classic() +
    theme(legend.position='none') + 
    ggpubr::stat_cor()
}

Distributions

PHQ-9

plotOrdinal_fn(df, vars_phq9, 'phq9_total')

MADRS

plotOrdinal_fn(df, vars_madrs, 'madrs_total_calc')

PSS

plotOrdinal_fn(df, vars_pss, 'pss_total')

GAD-7

plotOrdinal_fn(df, vars_gad7, 'gad7_total')

RBANS

plotContinuous_fn(df, vars_rbans)

DKEFS

plotContinuous_fn(df, vars_dkefs)

Note: the age- and sex- correction, and normalization, applied to the DKEFS RT variables mean that the x-axis values no longer represent seconds.


Correlations, clinical scores

Segment 1

plot_grid(plotlist = map(vars_LC, ~map(vars_totals, plotCorrelation_fn, y=.x))[[1]])

Segment 2

plot_grid(plotlist = map(vars_LC, ~map(vars_totals, plotCorrelation_fn, y=.x))[[2]])

Segment 3

plot_grid(plotlist = map(vars_LC, ~map(vars_totals, plotCorrelation_fn, y=.x))[[3]])

Segment 4

plot_grid(plotlist = map(vars_LC, ~map(vars_totals, plotCorrelation_fn, y=.x))[[4]])

Segment 5

plot_grid(plotlist = map(vars_LC, ~map(vars_totals, plotCorrelation_fn, y=.x))[[5]])

Segment 6

plot_grid(plotlist = map(vars_LC, ~map(vars_totals, plotCorrelation_fn, y=.x))[[6]])

Combined segments

plot_grid(plotlist = map(vars_LC, ~map(vars_totals, plotCorrelation_fn, y=.x))[[7]])


Correlations, RBANS

Segment 1

plot_grid(plotlist = map(vars_LC, ~map(vars_rbans, plotCorrelation_fn, y=.x))[[1]])

Segment 2

plot_grid(plotlist = map(vars_LC, ~map(vars_rbans, plotCorrelation_fn, y=.x))[[2]])

Segment 3

plot_grid(plotlist = map(vars_LC, ~map(vars_rbans, plotCorrelation_fn, y=.x))[[3]])

Segment 4

plot_grid(plotlist = map(vars_LC, ~map(vars_rbans, plotCorrelation_fn, y=.x))[[4]])

Segment 5

plot_grid(plotlist = map(vars_LC, ~map(vars_rbans, plotCorrelation_fn, y=.x))[[5]])

Segment 6

plot_grid(plotlist = map(vars_LC, ~map(vars_rbans, plotCorrelation_fn, y=.x))[[6]])

Combined segments

plot_grid(plotlist = map(vars_LC, ~map(vars_rbans, plotCorrelation_fn, y=.x))[[7]])



Correlations, DKEFS

Segment 1

plot_grid(plotlist = map(vars_LC, ~map(vars_dkefs, plotCorrelation_fn, y=.x))[[1]])

Segment 2

plot_grid(plotlist = map(vars_LC, ~map(vars_dkefs, plotCorrelation_fn, y=.x))[[2]])

Segment 3

plot_grid(plotlist = map(vars_LC, ~map(vars_dkefs, plotCorrelation_fn, y=.x))[[3]])

Segment 4

plot_grid(plotlist = map(vars_LC, ~map(vars_dkefs, plotCorrelation_fn, y=.x))[[4]])

Segment 5

plot_grid(plotlist = map(vars_LC, ~map(vars_dkefs, plotCorrelation_fn, y=.x))[[5]])

Segment 6

plot_grid(plotlist = map(vars_LC, ~map(vars_dkefs, plotCorrelation_fn, y=.x))[[6]])

Combined segments

plot_grid(plotlist = map(vars_LC, ~map(vars_dkefs, plotCorrelation_fn, y=.x))[[7]])