imaging data pulled: 2021-10-12
clinical data pulled: 2020-11-16
code written: 2021-11-05
last ran: 2021-11-15

Description. Here, we review several aspects of the LC NM-MRI data, largely on raw values (i.e., uncorrected for age and sex). The intent is to understand our data, and ensure the distributions are consonant with what we expect.


Load libraries and data

#clear environment
rm(list = ls())

#list required libraries
packages <- c(
  'tidyverse',
  'kableExtra', #pretty tables
  'jtools', #summ,
  'interactions', #interact_plot
  'ggpubr', #plots
  'gridExtra', #grob
  'lm.beta', #model comparison
  'inflection' #check_curve
)

#load required libraries
lapply(packages, require, character.only = TRUE)

#read in LC data (contains raw and corrected for age and sex, but not yet standardized)
df <- read.csv(dir('../clinical', full.names=T, pattern="^df_2021"), stringsAsFactors = T) #48

#keep only variables of interest, for clarity
df <-  df[, grep('^id$|Diagnosis|Age|Sex|avg_max_seg', names(df))] #16

Data munging

#pull out names of LC segment variables, of interest
vars_segmentsCorrected <- names(df[, grep('avg.+_cor', names(df))])
vars_segmentsUncorrected <- names(df[, grep('avg_max_seg.+[0-9]$', names(df))])

#created separate, melted dfs, of corrected and uncorrected data
df_corrected <- reshape::melt(df[c('id', 'Diagnosis', 'Age', 'Sex', vars_segmentsCorrected)], id.vars=c('id', 'Diagnosis', 'Age', 'Sex'))
df_uncorrected <- reshape::melt(df[c('id', 'Diagnosis', 'Age', 'Sex', vars_segmentsUncorrected)], id.vars=c('id', 'Diagnosis', 'Age', 'Sex'))

#to the corrected df, add a 'hemisphere' variable
df_corrected <- df_corrected %>%  
   mutate(hemisphere = case_when(
     grepl("1|2|3", variable) ~ "left",
     grepl("4|5|6", variable) ~"right"))    
    
#to the corrected df, also add a 'ROI' variable
df_corrected <- df_corrected %>%  
   mutate(ROI = case_when(
     grepl("1|4", variable) ~ "rostral",
     grepl("2|5", variable) ~ "medial",
     grepl("3|6", variable) ~ "caudal"))  

#define age limits in data
age_min <- min(df$Age)
age_max <- max(df$Age)

#define LC NM-MRI limits in data
NM_min <- min(df[, vars_segmentsUncorrected])
NM_max <- max(df[, vars_segmentsUncorrected])

Data correction
We have previously corrected all LC and cognition variables for age and sex, and four cognition variables (all RT on the D-KEFS CWI) for normality. Here, we additionally correct LC variables for sex only (not age), for select exploratory analyses. (Note: at present, the sex-corrected-only LC variables are not reviewed in any analyses.)

#make a separate df for variables we will correct for sex
sex <- df[, c('id', 'Sex', vars_segmentsUncorrected)]

#function to add a 'sex corrected only' variant of each of the 6 segments
adjustVars_fn <- function(x, data){ #function to correct for sex
  fit <- lm(x ~ Sex, na.action = na.exclude, data) 
  adjusted <- resid(fit) + coef(fit)[1] #residual intercept
  return(adjusted)
}

#apply sex-correct function to all variables, put in new df for review
df_sexCorrected <- data.frame(df$id,   
    sapply(sex[vars_segmentsUncorrected], function(x) adjustVars_fn(x, data=sex)))

#adjust names in adjusted df, so clear have been corrected
names(df_sexCorrected)[-1] <- paste(names(df_sexCorrected[,-1]), '_sexcor', sep = "")

#add the adjusted variables back to our cognitive df
df_sexCorrected <- merge(df[, c('id', 'Diagnosis', 'Sex', 'Age')], df_sexCorrected, by.x='id', by.y='df.id')

#for clarity, remove the unneeded df
rm(sex)

Define segment-wise statistical models

#linear model 
model_ageDxNM_lin <- lapply(df[, vars_segmentsUncorrected], function(x) lm(as.formula(x ~ Age * Diagnosis), data=df))
model_ageDxNM_lin_summary <- lapply(model_ageDxNM_lin, summ)

#quadratic model
model_ageDxNM_quad <- lapply(df[, vars_segmentsUncorrected], function(x) lm(as.formula(x ~ poly(Age,2) * Diagnosis), data=df))
model_ageDxNM_quad_summary <- lapply(model_ageDxNM_quad, summ)

#linear models - by diagnosis
model_ageDxNM_lin_HC <- lapply(df[df$Diagnosis=='HC', vars_segmentsUncorrected], function(x) lm(as.formula(x ~ Age), df[df$Diagnosis=='HC',]))
model_ageDxNM_lin_LLD <- lapply(df[df$Diagnosis=='LLD', vars_segmentsUncorrected], function(x) lm(as.formula(x ~ Age), df[df$Diagnosis=='LLD',]))

#quadratic model - by diagnosis
model_ageDxNM_quad_HC <- lapply(df[df$Diagnosis=='HC', vars_segmentsUncorrected], function(x) lm(as.formula(x ~ poly(Age,2)), df[df$Diagnosis=='HC',]))
model_ageDxNM_quad_LLD <- lapply(df[df$Diagnosis=='LLD', vars_segmentsUncorrected], function(x) lm(as.formula(x ~ poly(Age,2)), df[df$Diagnosis=='LLD',]))

#cubic model - by diagnosis (required for "inflection point" analysis)  
model_ageDxNM_cubic_fn <- function(df, vars, group){
  model <- lapply(df[df$Diagnosis==group, vars], function(x) lm(as.formula(x ~ poly(Age,3)), df[df$Diagnosis==group,]))
}

#cubic models - by diagnosis
model_ageDxNM_cubicHC <- model_ageDxNM_cubic_fn(df, vars_segmentsUncorrected, 'HC')
model_ageDxNM_cubicLLD <- model_ageDxNM_cubic_fn(df, vars_segmentsUncorrected, 'LLD')

Define plotting functions

#scatter plots (question 1)
plotScatter_fn <- function(df, group=NULL, x, y, color=NULL){
  if(!is.null(group)) df <- df[df$Diagnosis == group,]
  if(is.null(color)){
    scatter <- ggscatter(data=df, x=x, y=y,
      add="reg.line", conf.int=TRUE) +
      stat_cor() +
      facet_wrap(~variable) 
  } else {
    scatter <- ggscatter(data=df, x=x, y=y,
      add="reg.line", conf.int=TRUE, color=color) +
      stat_cor(aes(color=Diagnosis)) +
      facet_wrap(~variable) 
  }
  return(scatter)
}

###########################################################################################

#box plots (question 2)
plotBox_fn <- function(df, group=NULL, x, y){
  if(!is.null(group)){
    df <- df[df$Diagnosis == group,] 
    box <- ggboxplot(df, x=x, y=y, add='jitter') +
      stat_compare_means(method='t.test', vjust=1) +
      facet_wrap(~variable) 
  } else {
   box <- ggboxplot(df, x=x, y=y, add='jitter', color = 'Diagnosis') +
      stat_compare_means(method='t.test', vjust=1) +
      facet_wrap(~variable) 
  }
  return(box)
}

###########################################################################################

#interaction plot
list_ageDxInt = list()
for (i in 1:6){
  plot <- interactions::interact_plot(model_ageDxNM_lin[[i]], pred=Age, modx=Diagnosis, plot.points = TRUE, main.title=paste('Segment', i),legend=NULL)  
  list_ageDxInt[[i]] <- plot
}

###########################################################################################

#ANOVA plot (question 3)
plotAnova_fn <- function(df, x, y, group=NULL){
  if(!is.null(group)) df <- df[df$Diagnosis == group,] 
  ggboxplot(df, x=x, y=y) +
  stat_compare_means() +
  rotate_x_text(10) + 
  ylim(NM_min-5, NM_max+5) +
  theme(text=element_text(size=8),
        axis.title = element_blank()) 
}

###########################################################################################

#function for ANOVA table (question 3)
tblAnova_fn <- function(df, x, y, group=NULL){
  if(!is.null(group)) df <- df[df$Diagnosis == group,] 
  x <- deparse(substitute(x))
  y <- deparse(substitute(y))
  f <- reformulate(x, response=y)
  compare_means(f, data=df)[, c("group1","group2","p.format","p.adj","p.signif")] %>%
  kable(col.names = c('variable 1','variable 2','p value','adj. p value','significance level')) %>%
  kable_styling(bootstrap_options = c('striped', 'hover', 'condensed'), fixed_thead=T)
}

###########################################################################################

#age plot, with confidence bands (question 7)
confidencePlot_fn <- function(df, x_var, y_var, title, group=NULL){
  if(!is.null(group)) df <- df[df$Diagnosis == group,]
  ggplot(df, aes_string(x_var, y_var)) + 
  geom_point(size=3) + 
  geom_smooth(method='lm', color='black', alpha=.3) + 
  stat_smooth(method='lm', formula= y ~ poly(x,2), color='red', fill='red', alpha=.2) + 
  theme_bw() + 
  theme(panel.grid = element_blank()) + 
  ggtitle(title) +
  ylim(NM_min - 1, NM_max + 1)
}


###########################################################################################

#linear/quad model comparison function to pull out the r2 values, by segment and diagnosis (question 10)
adjR2_fn <- function(model){
  vector <- c()
  for (i in 1:6){
    value <- summary(model[[i]])$adj.r.squared
    vector[i] <- value
  }
  return(vector)
}

###########################################################################################

#linear/quad model comparison function to pull out the beta values from the non-linear equations, by segment, and diagnosis (question 10)
standBeta_fn <- function(model){
  vector <- c()
  for (i in 1:6){
    beta <- coef(lm.beta(model[[i]]))[2] 
    vector[i] <- beta
  }
  return(vector)
}

###########################################################################################

#linear/quad model comparison function to compute the difference between the linear and quadratic models, and pull the p value (question 10)
compareR2_fn <- function(model_lin, model_quad){
  vector <- c()
  for (i in 1:6){
    pvalue <- anova(model_lin[[i]], model_quad[[i]])$`Pr(>F)`[2]
    vector[i] <- pvalue
  }
  return(vector)
}

###########################################################################################

#function to create formulas of non-linear (quadratic) models (question 11)
equations_fn <- function(model, segments){
  equation_list <- list()
  model <- deparse(substitute(model))
  segments <- deparse(substitute(segments))
  for (i in 1:length(NM_vars_uncorrectedList)){
    term_s <- round(eval(parse(text=paste(model, '$', NM_vars_uncorrectedList[i], '$coefficients[3]', sep=''))),3)
    term_l <- round(eval(parse(text=paste(model, '$', NM_vars_uncorrectedList[i], '$coefficients[2]', sep=''))),3)
    term_i <- round(eval(parse(text=paste(model, '$', NM_vars_uncorrectedList[i], '$coefficients[1]', sep=''))),3)
    equation <- paste(term_s, '(Age)2 + ', term_l, '(Age) + ', term_i, sep='')
    equation_list[[i]] <- equation
  }
  return(equation_list)
}

###########################################################################################

#function to find the points on the quadratic curve - from ggplot (question 12)
getCurve_fn <- function(df, x_var, segments, order=2, group=NULL){
  curveList <- list()
  if(!is.null(group)) df <- df[df$Diagnosis == group,]
  for (i in 1:length(segments)){
    y_var <- segments[[i]]
    myplot <- ggplot(df, aes_string(x_var, y_var)) + geom_point() + stat_smooth(method='lm', formula= y ~ poly(x,order)) 
    x <- ggplot_build(myplot)$data[[2]]$x #pull out smoothing x
    y <- ggplot_build(myplot)$data[[2]]$y #pull out smoothing y
    curveList[[i]] <- list(x_Age=x,y_Segment=y)
  }
  return(curveList)
}

###########################################################################################

#function to find the location of the biggest change (i.e., elbow); irrespective of sign (question 12)
elbow_fn  <- function(quadraticList) {
  elbowList <- list()
  for (i in 1:length(quadraticList)){
    x=unlist(quadraticList[[i]]$x_Age)
    y=unlist(quadraticList[[i]]$y_Segment)
    d1 <- diff(y) / diff(x) # first derivative
    d2 <- diff(d1) / diff(x[-1]) # second derivative
    index <- which.max(abs(d2)) 
    drop <- max(abs(d2))
    xvalue <- x[index]
    yvalue <- y[index]
    elbowList[[i]] <- xvalue
  }
  return(elbowList) 
}

###########################################################################################

#function to find where changes sign, i.e., positive to negative, or vice versa (question 12)
change_fn  <- function(quadraticList) {
  changeList <- list()
  for (i in 1:length(quadraticList)){
    change <- c(FALSE, diff(diff(quadraticList[[i]]$y_Segment)>0)!=0)
    change <- min(which(change == TRUE)) #this is the INDEX out of 80
    change <- quadraticList[[i]]$x_Age[change]
    changeList[[i]] <- change
  }
  return(changeList)
}

###########################################################################################

#function to calculate the actual "inflection" point (uses cubic) (question 12)
getInflection_fn <- function(cubicList) {
  inflectionList <- list()
  for (i in 1:length(cubicList)){
    curveType <- inflection::check_curve(cubicList[[i]]$x_Age, cubicList[[i]]$y_Segment)
    inflection <- bese(cubicList[[i]]$x_Age, cubicList[[i]]$y_Segment, curveType$index)
    inflection <- inflection$iplast
    inflectionList[[i]] <- inflection
  }
  return(inflectionList)
}

###########################################################################################

#function to plot quadratic fit, and the 3 alternative "inflection" points (question 12)
inflectionPlot_fn <- function(df, x_var, y_var, title, group=NULL, i){
  if(!is.null(group)) df <- df[df$Diagnosis == group,]
  ggplot(df, aes_string(x_var, y_var)) + 
  geom_point(size=3) + 
  stat_smooth(method='lm', formula= y ~ poly(x,2), color='black', fill='black', alpha=.2) + 
  theme_bw() + 
  theme(panel.grid = element_blank()) + 
  ggtitle(title) +
  ylim(NM_min - 1, NM_max + 1) + 
  geom_vline(xintercept = eval(parse(text=paste('df_inflectionPts$elbow_', group, '[', i, ']', sep=''))), color='darkslategray3', size=2, linetype='dashed') + 
  geom_vline(xintercept = eval(parse(text=paste('df_inflectionPts$change_', group, '[', i, ']', sep=''))), color='darkgoldenrod1', size=2, linetype='twodash') + 
  geom_vline(xintercept = eval(parse(text=paste('df_inflectionPts$inflection_', group, '[', i, ']', sep=''))), color='coral1', size=2, linetype='longdash') +
  geom_label(aes(
    x=eval(parse(text=paste('df_inflectionPts$elbow_', group, '[', i, ']', sep=''))), y=NM_min+2,
    label=round(eval(parse(text=paste('df_inflectionPts$elbow_', group, '[', i, ']', sep=''))),3)), fill='darkslategray3') +
  geom_label(aes(
    x=eval(parse(text=paste('df_inflectionPts$change_', group, '[', i, ']', sep=''))), y=NM_min+4,
    label=round(eval(parse(text=paste('df_inflectionPts$change_', group, '[', i, ']', sep=''))),3)), fill='darkgoldenrod1') +
 geom_label(aes(
    x=eval(parse(text=paste('df_inflectionPts$inflection_', group, '[', i, ']', sep=''))), y=NM_min+6,
    label=round(eval(parse(text=paste('df_inflectionPts$inflection_', group, '[', i, ']', sep=''))),3)), fill='coral1')
}

###########################################################################################

#function to compare correlations between linear and non-linear fits (question 13)
correlation_fn <- function(df, x_var='Age', y_var=vars_segmentsUncorrected, group=NULL){
  
  #separate calculations for group, if required
  if(!is.null(group)) df <- df[df$Diagnosis == group,]

  #first, calculate correlations
  cor_pearson <-  lapply(df[, y_var], function(x) cor.test(x, df[,x_var], method='pearson'))
  cor_spearman <- lapply(df[, y_var], function(x) cor.test(x, df[,x_var], method='spearman', exact=FALSE))
  cor_kendall <-  lapply(df[, y_var], function(x) cor.test(x, df[,x_var], method='kendall', exact=FALSE))
  
  #make a table for correlation and p values
  tbl_cor <- matrix(ncol=length(y_var), nrow=6)
  
  #pull out correlation values
  for (i in 1:length(y_var)){
    tbl_cor[1, i]  <- round(eval(parse(text=paste('cor_pearson$', y_var[i], '$estimate', sep=''))),3)
    tbl_cor[2, i]  <- round(eval(parse(text=paste('cor_spearman$', y_var[i], '$estimate', sep=''))),3)
    tbl_cor[3, i]  <- round(eval(parse(text=paste('cor_kendall$', y_var[i], '$estimate', sep=''))),3)
    
  #pull out p values
    tbl_cor[4, i]  <- round(eval(parse(text=paste('cor_pearson$', y_var[i], '$p.value', sep=''))),3)
    tbl_cor[5, i]  <- round(eval(parse(text=paste('cor_spearman$', y_var[i], '$p.value', sep=''))),3)
    tbl_cor[6, i]  <- round(eval(parse(text=paste('cor_kendall$', y_var[i], '$p.value', sep=''))),3)
  }
  tbl_cor <- as.data.frame(tbl_cor)
  names(tbl_cor) <- y_var
  tbl_cor <- cbind(method=rep(c('Pearson', 'Spearman', 'Kendall'),2), tbl_cor)
  return(tbl_cor)
}

###########################################################################################

[1] Are there age differences in NM, before correction? (cor.test)
No; however, we nonetheless corrected for age (and sex).

Combined

plotScatter_fn(df=df_uncorrected, x='Age', y='value')

LLD & HC

plotScatter_fn(df_uncorrected, x='Age', y='value', color='Diagnosis')


[2] Is there an interaction between diagnosis and age, on NM, before correction?
No. None of the interaction terms, in any segment, are significant.

#make a list of p values
p_ageDxInt <- list()
for (i in vars_segmentsUncorrected){
  ps <- round(eval(parse(text=paste('model_ageDxNM_lin_summary$', i, '$coeftable[4,4]', sep=''))),3)
  p_ageDxInt[[i]] <- ps
}

#paste together into a dataframe
rbind(vars_segmentsUncorrected, p_ageDxInt) %>%
  kable(col.names=NULL, rownames=NULL) %>% kable_styling()
vars_segmentsUncorrected avg_max_seg_1 avg_max_seg_2 avg_max_seg_3 avg_max_seg_4 avg_max_seg_5 avg_max_seg_6
p_ageDxInt 0.596 0.754 0.532 0.34 0.356 0.697

[3] Are there sex differences in NM, before correction? (t-test)
No; however, we nonetheless corrected for sex (and age).

Combined

plotBox_fn(df_uncorrected, x='Sex', y='value')

LLD

plotBox_fn(df_uncorrected, x='Sex', y='value',group='LLD')

HC

plotBox_fn(df_uncorrected, x='Sex', y='value',group='HC')


[4] Are there differences in the NM values, in the 6 segments, after correction? (ANOVA)
Yes; ANOVA in combined and separate diagnostic groups significant; many/most pairwise comparisons (Wilcox) show a difference.

Combined

plotAnova_fn(df=df_corrected, x='variable', y='value')

tblAnova_fn(df_corrected, variable, value)
variable 1 variable 2 p value adj. p value significance level
avg_max_seg_1_cor avg_max_seg_2_cor 4.5e-13 0.000 ****
avg_max_seg_1_cor avg_max_seg_3_cor < 2e-16 0.000 ****
avg_max_seg_1_cor avg_max_seg_4_cor < 2e-16 0.000 ****
avg_max_seg_1_cor avg_max_seg_5_cor 0.155 0.150 ns
avg_max_seg_1_cor avg_max_seg_6_cor < 2e-16 0.000 ****
avg_max_seg_2_cor avg_max_seg_3_cor < 2e-16 0.000 ****
avg_max_seg_2_cor avg_max_seg_4_cor < 2e-16 0.000 ****
avg_max_seg_2_cor avg_max_seg_5_cor 3.5e-09 0.000 ****
avg_max_seg_2_cor avg_max_seg_6_cor < 2e-16 0.000 ****
avg_max_seg_3_cor avg_max_seg_4_cor 0.048 0.096
avg_max_seg_3_cor avg_max_seg_5_cor < 2e-16 0.000 ****
avg_max_seg_3_cor avg_max_seg_6_cor 1.8e-13 0.000 ****
avg_max_seg_4_cor avg_max_seg_5_cor < 2e-16 0.000 ****
avg_max_seg_4_cor avg_max_seg_6_cor < 2e-16 0.000 ****
avg_max_seg_5_cor avg_max_seg_6_cor < 2e-16 0.000 ****

LLD

plotAnova_fn(df=df_corrected, x='variable', y='value', group='LLD')

tblAnova_fn(df_corrected, variable, value, group='LLD')
variable 1 variable 2 p value adj. p value significance level
avg_max_seg_1_cor avg_max_seg_2_cor 1.9e-06 7.5e-06 ****
avg_max_seg_1_cor avg_max_seg_3_cor 2.8e-09 0.0e+00 ****
avg_max_seg_1_cor avg_max_seg_4_cor 3.3e-11 0.0e+00 ****
avg_max_seg_1_cor avg_max_seg_5_cor 1.00 1.0e+00 ns
avg_max_seg_1_cor avg_max_seg_6_cor 1.6e-14 0.0e+00 ****
avg_max_seg_2_cor avg_max_seg_3_cor 1.1e-12 0.0e+00 ****
avg_max_seg_2_cor avg_max_seg_4_cor 6.3e-14 0.0e+00 ****
avg_max_seg_2_cor avg_max_seg_5_cor 3.1e-05 9.2e-05 ****
avg_max_seg_2_cor avg_max_seg_6_cor 1.6e-14 0.0e+00 ****
avg_max_seg_3_cor avg_max_seg_4_cor 0.22 4.3e-01 ns
avg_max_seg_3_cor avg_max_seg_5_cor 7.0e-08 4.0e-07 ****
avg_max_seg_3_cor avg_max_seg_6_cor 4.8e-09 0.0e+00 ****
avg_max_seg_4_cor avg_max_seg_5_cor 2.7e-08 2.0e-07 ****
avg_max_seg_4_cor avg_max_seg_6_cor 1.9e-13 0.0e+00 ****
avg_max_seg_5_cor avg_max_seg_6_cor 1.6e-14 0.0e+00 ****

HC

plotAnova_fn(df=df_corrected, x='variable', y='value', group='HC')

tblAnova_fn(df_corrected, variable, value, group='HC')
variable 1 variable 2 p value adj. p value significance level
avg_max_seg_1_cor avg_max_seg_2_cor 5.1e-08 3.0e-07 ****
avg_max_seg_1_cor avg_max_seg_3_cor 1.1e-08 1.0e-07 ****
avg_max_seg_1_cor avg_max_seg_4_cor 1.1e-09 0.0e+00 ****
avg_max_seg_1_cor avg_max_seg_5_cor 0.021 4.3e-02
avg_max_seg_1_cor avg_max_seg_6_cor 3.4e-11 0.0e+00 ****
avg_max_seg_2_cor avg_max_seg_3_cor 9.7e-13 0.0e+00 ****
avg_max_seg_2_cor avg_max_seg_4_cor 2.4e-13 0.0e+00 ****
avg_max_seg_2_cor avg_max_seg_5_cor 3.2e-05 9.5e-05 ****
avg_max_seg_2_cor avg_max_seg_6_cor 2.4e-13 0.0e+00 ****
avg_max_seg_3_cor avg_max_seg_4_cor 0.148 1.5e-01 ns
avg_max_seg_3_cor avg_max_seg_5_cor 2.9e-10 0.0e+00 ****
avg_max_seg_3_cor avg_max_seg_6_cor 6.3e-06 2.5e-05 ****
avg_max_seg_4_cor avg_max_seg_5_cor 2.4e-11 0.0e+00 ****
avg_max_seg_4_cor avg_max_seg_6_cor 5.5e-07 2.7e-06 ****
avg_max_seg_5_cor avg_max_seg_6_cor 1.7e-12 0.0e+00 ****


[5] Are there differences in the NM values, by hemisphere (left vs right), after correction? (Wilcoxon test)
Yes, left hemisphere is higher.

Combined

plotAnova_fn(df_corrected, x='hemisphere', y='value')

LLD

plotAnova_fn(df_corrected, x='hemisphere', y='value', group='LLD')

HC

plotAnova_fn(df_corrected, x='hemisphere', y='value', group='HC')


[6] Are there differences in the NM values, by segment, after correction? (ANOVA)
Yes, medial is higher than rostral and caudal, and rostral is higher than caudal.

Combined

plotAnova_fn(df_corrected, 'ROI', 'value')

tblAnova_fn(df_corrected, ROI, value)
variable 1 variable 2 p value adj. p value significance level
rostral medial <2e-16 0 ****
rostral caudal <2e-16 0 ****
medial caudal <2e-16 0 ****

LLD

plotAnova_fn(df_corrected, x='ROI', y='value', group='LLD')

tblAnova_fn(df_corrected, ROI, value, group='LLD')
variable 1 variable 2 p value adj. p value significance level
rostral medial 3.3e-08 0 ****
rostral caudal 3.9e-11 0 ****
medial caudal 3.4e-16 0 ****

HC

plotAnova_fn(df_corrected, x='ROI', y='value', group='HC')

tblAnova_fn(df_corrected, ROI, value, group='HC')
variable 1 variable 2 p value adj. p value significance level
rostral medial 3.7e-13 0 ****
rostral caudal 1.8e-10 0 ****
medial caudal < 2e-16 0 ****


[7] Is the effect of age on NM (before correction), by diagnosis, actually linear?
LLD looks non-linear. Also, the LLD distribution often appears “U”-shaped, opposite of the expected inverted U.

Segment 1

grid.arrange(
  confidencePlot_fn(df, x_var='Age', y_var='avg_max_seg_1', title='HC, Segment 1', group='HC'),
  confidencePlot_fn(df, x_var='Age', y_var='avg_max_seg_1', title='LLD, Segment 1', group='LLD'),
ncol=2)

Segment 2

grid.arrange(
  confidencePlot_fn(df, x_var='Age', y_var='avg_max_seg_2', title='HC, Segment 2', group='HC'),
  confidencePlot_fn(df, x_var='Age', y_var='avg_max_seg_2', title='LLD, Segment 2', group='LLD'),
ncol=2)

Segment 3

grid.arrange(
  confidencePlot_fn(df, x_var='Age', y_var='avg_max_seg_3', title='HC, Segment 3', group='HC'),
  confidencePlot_fn(df, x_var='Age', y_var='avg_max_seg_3', title='LLD, Segment 3', group='LLD'),
ncol=2)

Segment 4

grid.arrange(
  confidencePlot_fn(df, x_var='Age', y_var='avg_max_seg_4', title='HC, Segment 4', group='HC'),
  confidencePlot_fn(df, x_var='Age', y_var='avg_max_seg_4', title='LLD, Segment 4', group='LLD'),
ncol=2)

Segment 5

grid.arrange(
  confidencePlot_fn(df, x_var='Age', y_var='avg_max_seg_5', title='HC, Segment 5', group='HC'),
  confidencePlot_fn(df, x_var='Age', y_var='avg_max_seg_5', title='LLD, Segment 5', group='LLD'),
ncol=2)

Segment 6

grid.arrange(
  confidencePlot_fn(df, x_var='Age', y_var='avg_max_seg_6', title='HC, Segment 6', group='HC'),
  confidencePlot_fn(df, x_var='Age', y_var='avg_max_seg_6', title='LLD, Segment 6', group='LLD'),
ncol=2)


[8] Summary of the linear model, of the effect of age on LC NM-MRI before correction, by diagnosis
Note: the p value associated with the interaction term in the final row of these tables is synonymous with the p values reported in [2].

Segment 1

summ(model_ageDxNM_lin[[1]])
Observations 48
Dependent variable x
Type OLS linear regression
F(3,44) 1.90
0.11
Adj. R² 0.05
Est. S.E. t val. p
(Intercept) 30.03 6.36 4.72 0.00
Age -0.08 0.09 -0.90 0.37
DiagnosisLLD 7.30 10.82 0.68 0.50
Age:DiagnosisLLD -0.08 0.16 -0.53 0.60
Standard errors: OLS

Segment 2

summ(model_ageDxNM_lin[[2]])
Observations 48
Dependent variable x
Type OLS linear regression
F(3,44) 1.10
0.07
Adj. R² 0.01
Est. S.E. t val. p
(Intercept) 39.62 10.68 3.71 0.00
Age -0.18 0.15 -1.20 0.24
DiagnosisLLD 6.28 18.17 0.35 0.73
Age:DiagnosisLLD -0.08 0.26 -0.32 0.75
Standard errors: OLS

Segment 3

summ(model_ageDxNM_lin[[3]])
Observations 48
Dependent variable x
Type OLS linear regression
F(3,44) 0.87
0.06
Adj. R² -0.01
Est. S.E. t val. p
(Intercept) 20.25 8.70 2.33 0.02
Age -0.01 0.12 -0.12 0.91
DiagnosisLLD 11.02 14.80 0.74 0.46
Age:DiagnosisLLD -0.14 0.21 -0.63 0.53
Standard errors: OLS

Segment 4

summ(model_ageDxNM_lin[[4]])
Observations 48
Dependent variable x
Type OLS linear regression
F(3,44) 0.95
0.06
Adj. R² -0.00
Est. S.E. t val. p
(Intercept) 21.58 5.85 3.69 0.00
Age 0.02 0.08 0.28 0.78
DiagnosisLLD 10.74 9.96 1.08 0.29
Age:DiagnosisLLD -0.14 0.14 -0.97 0.34
Standard errors: OLS

Segment 5

summ(model_ageDxNM_lin[[5]])
Observations 48
Dependent variable x
Type OLS linear regression
F(3,44) 1.06
0.07
Adj. R² 0.00
Est. S.E. t val. p
(Intercept) 31.43 8.67 3.62 0.00
Age -0.09 0.12 -0.70 0.49
DiagnosisLLD 13.23 14.75 0.90 0.37
Age:DiagnosisLLD -0.20 0.21 -0.93 0.36
Standard errors: OLS

Segment 6

summ(model_ageDxNM_lin[[6]])
Observations 48
Dependent variable x
Type OLS linear regression
F(3,44) 0.37
0.02
Adj. R² -0.04
Est. S.E. t val. p
(Intercept) 17.20 9.00 1.91 0.06
Age 0.01 0.13 0.08 0.94
DiagnosisLLD 4.61 15.30 0.30 0.76
Age:DiagnosisLLD -0.09 0.22 -0.39 0.70
Standard errors: OLS


[9] Summary of the quadratic model, of the effect of age on LC NM-MRI before correction, by diagnosis

Segment 1

summ(model_ageDxNM_quad[[1]])
Observations 48
Dependent variable x
Type OLS linear regression
F(5,42) 1.58
0.16
Adj. R² 0.06
Est. S.E. t val. p
(Intercept) 24.66 0.74 33.36 0.00
poly(Age, 2)1 -3.23 4.21 -0.77 0.45
poly(Age, 2)2 -5.69 4.77 -1.19 0.24
DiagnosisLLD 1.52 1.04 1.46 0.15
poly(Age, 2)1:DiagnosisLLD -2.87 7.50 -0.38 0.70
poly(Age, 2)2:DiagnosisLLD 10.55 7.32 1.44 0.16
Standard errors: OLS

Segment 2

summ(model_ageDxNM_quad[[2]])
Observations 48
Dependent variable x
Type OLS linear regression
F(5,42) 1.91
0.18
Adj. R² 0.09
Est. S.E. t val. p
(Intercept) 27.06 1.19 22.70 0.00
poly(Age, 2)1 -8.56 6.79 -1.26 0.21
poly(Age, 2)2 1.47 7.69 0.19 0.85
DiagnosisLLD 1.62 1.68 0.97 0.34
poly(Age, 2)1:DiagnosisLLD 3.25 12.09 0.27 0.79
poly(Age, 2)2:DiagnosisLLD 20.32 11.80 1.72 0.09
Standard errors: OLS

Segment 3

summ(model_ageDxNM_quad[[3]])
Observations 48
Dependent variable x
Type OLS linear regression
F(5,42) 0.71
0.08
Adj. R² -0.03
Est. S.E. t val. p
(Intercept) 19.08 1.03 18.61 0.00
poly(Age, 2)1 -1.09 5.84 -0.19 0.85
poly(Age, 2)2 4.41 6.61 0.67 0.51
DiagnosisLLD 2.14 1.45 1.48 0.15
poly(Age, 2)1:DiagnosisLLD -4.02 10.40 -0.39 0.70
poly(Age, 2)2:DiagnosisLLD 1.37 10.16 0.14 0.89
Standard errors: OLS

Segment 4

summ(model_ageDxNM_quad[[4]])
Observations 48
Dependent variable x
Type OLS linear regression
F(5,42) 0.78
0.08
Adj. R² -0.02
Est. S.E. t val. p
(Intercept) 23.26 0.69 33.76 0.00
poly(Age, 2)1 1.28 3.93 0.33 0.75
poly(Age, 2)2 -2.23 4.44 -0.50 0.62
DiagnosisLLD 1.26 0.97 1.29 0.20
poly(Age, 2)1:DiagnosisLLD -5.17 6.99 -0.74 0.46
poly(Age, 2)2:DiagnosisLLD 6.99 6.82 1.02 0.31
Standard errors: OLS

Segment 5

summ(model_ageDxNM_quad[[5]])
Observations 48
Dependent variable x
Type OLS linear regression
F(5,42) 1.43
0.15
Adj. R² 0.04
Est. S.E. t val. p
(Intercept) 25.39 0.99 25.65 0.00
poly(Age, 2)1 -4.16 5.64 -0.74 0.46
poly(Age, 2)2 1.44 6.38 0.22 0.82
DiagnosisLLD 0.21 1.40 0.15 0.88
poly(Age, 2)1:DiagnosisLLD -4.50 10.04 -0.45 0.66
poly(Age, 2)2:DiagnosisLLD 13.05 9.80 1.33 0.19
Standard errors: OLS

Segment 6

summ(model_ageDxNM_quad[[6]])
Observations 48
Dependent variable x
Type OLS linear regression
F(5,42) 1.04
0.11
Adj. R² 0.00
Est. S.E. t val. p
(Intercept) 18.17 1.02 17.74 0.00
poly(Age, 2)1 1.12 5.84 0.19 0.85
poly(Age, 2)2 -6.93 6.60 -1.05 0.30
DiagnosisLLD -1.06 1.44 -0.73 0.47
poly(Age, 2)1:DiagnosisLLD -0.47 10.39 -0.05 0.96
poly(Age, 2)2:DiagnosisLLD 20.17 10.14 1.99 0.05
Standard errors: OLS


[10] Compare the linear and quadratic models, of the effect of age on LC NM-MRI before correction, by diagnosis
The more complex (quadratic) model provides a better fit – but because higher order terms always increase \(R^2\) (by definition), a “much better” fit is required in order to justify increased model complexity. The model comparison value is a p value.

#pull out the r2 values, by segment and diagnosis
adjR2_linear_HC <- do.call(adjR2_fn, list(model=model_ageDxNM_lin_HC))
adjR2_nonLinear_HC <- do.call(adjR2_fn, list(model=model_ageDxNM_quad_HC))
adjR2_linear_LLD <- do.call(adjR2_fn, list(model=model_ageDxNM_lin_LLD))
adjR2_nonLinear_LLD <- do.call(adjR2_fn, list(model=model_ageDxNM_quad_LLD))

#pull out the beta values from the non-linear equations, by segment, and diagnosis
standBeta_nonLinear_HC <- do.call(standBeta_fn, list(model=model_ageDxNM_quad_HC))
standBeta_nonLinear_LLD <- do.call(standBeta_fn, list(model=model_ageDxNM_quad_LLD))

#compute the difference between the linear and quadratic models, and pull the p value
modelComparison_HC <- do.call(compareR2_fn, list(model_ageDxNM_lin_HC, model_ageDxNM_quad_HC))
modelComparison_LLD <- do.call(compareR2_fn, list(model_ageDxNM_lin_LLD, model_ageDxNM_quad_LLD))

#put everything together into a table
tbl_age <- as.data.frame(cbind(
  adjR2_linear_HC, 
  adjR2_nonLinear_HC, 
  adjR2_linear_LLD, 
  adjR2_nonLinear_LLD,
  standBeta_nonLinear_HC, 
  standBeta_nonLinear_LLD,
  modelComparison_HC,
  modelComparison_LLD))

#add row.names
row.names(tbl_age) <- vars_segmentsUncorrected

#make an explicit column for the comparison between R2 values, R2 delta
tbl_age$adjR2_delta_HC <- adjR2_linear_HC - adjR2_nonLinear_HC 
tbl_age$adjR2_delta_LLD <- adjR2_linear_LLD - adjR2_nonLinear_LLD

#nice, scrollable table
tbl_age %>%
  kable(digits=3) %>%
  kable_styling() %>%
  row_spec(0, font_size=9) %>%
  scroll_box(width='800px')
adjR2_linear_HC adjR2_nonLinear_HC adjR2_linear_LLD adjR2_nonLinear_LLD standBeta_nonLinear_HC standBeta_nonLinear_LLD modelComparison_HC modelComparison_LLD adjR2_delta_HC adjR2_delta_LLD
avg_max_seg_1 -0.014 -0.001 0.036 0.031 -0.180 -0.277 0.276 0.360 -0.012 0.005
avg_max_seg_2 0.043 -0.003 0.008 0.158 -0.293 -0.223 0.835 0.034 0.046 -0.150
avg_max_seg_3 -0.047 -0.072 -0.015 -0.037 -0.027 -0.166 0.483 0.486 0.025 0.022
avg_max_seg_4 -0.044 -0.086 0.009 0.012 0.055 -0.224 0.657 0.310 0.041 -0.003
avg_max_seg_5 -0.014 -0.062 0.045 0.127 -0.178 -0.291 0.802 0.089 0.047 -0.082
avg_max_seg_6 -0.047 -0.055 -0.033 0.090 0.015 -0.100 0.370 0.055 0.008 -0.123

[11] What are the quadratic equations, by diagnosis and segment, uncorrected?

#get names of segments in list format -- easier to work with
NM_vars_uncorrectedList <- as.list(vars_segmentsUncorrected)

#apply equations_fn function
nlEquations_HC <- equations_fn(model_ageDxNM_quad_HC, NM_vars_uncorrectedList)
nlEquations_LLD <- equations_fn(model_ageDxNM_quad_LLD, NM_vars_uncorrectedList)

#put together in nice table
tbl_nlEquations <- data.frame(cbind(nlEquations_HC, nlEquations_LLD))
row.names(tbl_nlEquations) <- vars_segmentsUncorrected

#clean up
rm(nlEquations_HC, nlEquations_LLD)

#nice table
tbl_nlEquations %>% kable() %>% kable_styling()
nlEquations_HC nlEquations_LLD
avg_max_seg_1 -4.042(Age)2 + -3.053(Age) + 24.343 2.968(Age)2 + -4.368(Age) + 26.105
avg_max_seg_2 1.046(Age)2 + -6.814(Age) + 26.935 13.281(Age)2 + -7.001(Age) + 27.91
avg_max_seg_3 3.131(Age)2 + -0.54(Age) + 19.25 3.522(Age)2 + -3.963(Age) + 21.086
avg_max_seg_4 -1.585(Age)2 + 0.866(Age) + 23.192 2.9(Age)2 + -3.08(Age) + 24.403
avg_max_seg_5 1.02(Age)2 + -3.258(Age) + 25.363 8.831(Age)2 + -7.578(Age) + 25.185
avg_max_seg_6 -4.928(Age)2 + 0.373(Age) + 17.889 8.069(Age)2 + -2.04(Age) + 16.566

[12] Plot the “inflection” points on the quadratic fit (on uncorrected)
Blue line: “Elbow point”, i.e., the x-axis (age) location of the y axis (NM) has the biggest change.
Yellow line: “Change point”, i.e., the location of the quadratic curve where the curve changes sign (not present if curve goes in only one direction).
Pink line: “Inflection point”, i.e., where the second derivative \(g''(x)\) switches signs (changes concavity), i.e., is equal to 0. (Note that there is by definition no inflection of a quadratic curve, so a cubic curve (3 degrees) was fit to identify this point.)

#apply getQuad_fn function to find points along quadratic (order=2)
quadraticList_HC <- getCurve_fn(df, x_var='Age', segments=vars_segmentsUncorrected, order=2, group='HC')
quadraticList_LLD <- getCurve_fn(df, x_var='Age', segments=vars_segmentsUncorrected, order=2, group='LLD')

#apply getQuad_fn function to find points along cubic (order=3)
cubicList_HC <- getCurve_fn(df, x_var='Age', segments=vars_segmentsUncorrected, order=3, group='HC')
cubicList_LLD <- getCurve_fn(df, x_var='Age', segments=vars_segmentsUncorrected, order=3, group='LLD')

#apply elbow_fn function (location of the biggest change, irrespective of sign)
elbowList_HC <- elbow_fn(quadraticList_HC)
elbowList_LLD <- elbow_fn(quadraticList_LLD)
  
#apply change_fn function (where changes sign from pos to neg, vice versa)
changeList_HC <- change_fn(quadraticList_HC) 
changeList_LLD <- change_fn(quadraticList_LLD) 

#apply getInflection_fn function
inflectionList_HC <-getInflection_fn(cubicList_HC)
inflectionList_LLD <-getInflection_fn(cubicList_LLD)

#get our 3 "inflection point" lists into a dataframe
df_inflectionPts <- do.call(rbind, Map(data.frame,
  elbow_HC = elbowList_HC, 
  elbow_LLD = elbowList_LLD, 
  change_HC = changeList_HC, 
  change_LLD = changeList_LLD,
  inflection_HC = inflectionList_HC,
  inflection_LLD = inflectionList_LLD))

#add segments explicitly
df_inflectionPts <- cbind(Segments = vars_segmentsUncorrected, df_inflectionPts)

#nice table
df_inflectionPts %>%
  kable(digits=3,
        align=rep('c', ncol(df_inflectionPts)),
        col.names=c('Segment', rep(c('HC', 'LLD'),3))) %>%
  kable_styling(bootstrap_options = 'striped') %>%
  add_header_above(c(' '=1,'Elbow' = 2, 'Change' = 2, 'Inflection'=2))
Elbow
Change
Inflection
Segment HC LLD HC LLD HC LLD
avg_max_seg_1 66.405 61.671 69.316 75.316 71.937 75.316
avg_max_seg_2 77.468 63.899 NA 71.975 70.481 72.392
avg_max_seg_3 67.278 61.392 71.937 73.924 69.316 71.278
avg_max_seg_4 80.962 60.835 73.101 73.924 71.209 NaN
avg_max_seg_5 74.848 65.570 80.671 73.089 70.190 75.316
avg_max_seg_6 64.076 65.013 71.646 71.139 73.538 79.354

Segment 1

grid.arrange(
inflectionPlot_fn(df=df, x_var='Age', y_var='avg_max_seg_1', title='HC, Segment 1', group='HC', i=1),
inflectionPlot_fn(df=df, x_var='Age', y_var='avg_max_seg_1', title='LLD, Segment 1', group='LLD', i=1),
ncol=2)

Segment 2

grid.arrange(
inflectionPlot_fn(df=df, x_var='Age', y_var='avg_max_seg_2', title='HC, Segment 2', group='HC', i=2),
inflectionPlot_fn(df=df, x_var='Age', y_var='avg_max_seg_2', title='LLD, Segment 2', group='LLD', i=2),
ncol=2)

Segment 3

grid.arrange(
inflectionPlot_fn(df=df, x_var='Age', y_var='avg_max_seg_3', title='HC, Segment 3', group='HC', i=3),
inflectionPlot_fn(df=df, x_var='Age', y_var='avg_max_seg_3', title='LLD, Segment 3', group='LLD', i=3),
ncol=2)

Segment 4

grid.arrange(
inflectionPlot_fn(df=df, x_var='Age', y_var='avg_max_seg_4', title='HC, Segment 4', group='HC', i=4),
inflectionPlot_fn(df=df, x_var='Age', y_var='avg_max_seg_4', title='LLD, Segment 4', group='LLD', i=4),
ncol=2)

Segment 5

grid.arrange(
inflectionPlot_fn(df=df, x_var='Age', y_var='avg_max_seg_5', title='HC, Segment 5', group='HC', i=5),
inflectionPlot_fn(df=df, x_var='Age', y_var='avg_max_seg_5', title='LLD, Segment 5', group='LLD', i=5),
ncol=2)

Segment 6

grid.arrange(
inflectionPlot_fn(df=df, x_var='Age', y_var='avg_max_seg_6', title='HC, Segment 6', group='HC', i=6),
inflectionPlot_fn(df=df, x_var='Age', y_var='avg_max_seg_6', title='LLD, Segment 6', group='LLD', i=6),
ncol=2)


[13] Test correlations between age and uncorrected NM, for each group, when modelled as linear vs. non-linear.
Non-linear tests based on rank-based correlation coefficients: (1) Spearman’s rank correlation coefficient (rho) and (2) Kendall’s rank correlation coefficient (tau). We do not observe any important differences.

HC

correlation_fn(df, group='HC') %>% kable %>% kable_styling() %>%
  pack_rows('correlation values', 1, 3) %>%
  pack_rows('p values', 4, 6) 
method avg_max_seg_1 avg_max_seg_2 avg_max_seg_3 avg_max_seg_4 avg_max_seg_5 avg_max_seg_6
correlation values
Pearson -0.180 -0.293 -0.027 0.055 -0.178 0.015
Spearman -0.106 -0.240 -0.112 0.123 -0.116 0.106
Kendall -0.077 -0.101 -0.012 0.101 -0.077 0.085
p values
Pearson 0.410 0.174 0.902 0.804 0.416 0.945
Spearman 0.632 0.270 0.611 0.575 0.598 0.632
Kendall 0.614 0.508 0.937 0.508 0.614 0.578

LLD

correlation_fn(df, group='LLD') %>% kable %>% kable_styling() %>%
  pack_rows('correlation values', 1, 3) %>%
  pack_rows('p values', 4, 6) 
method avg_max_seg_1 avg_max_seg_2 avg_max_seg_3 avg_max_seg_4 avg_max_seg_5 avg_max_seg_6
correlation values
Pearson -0.277 -0.223 -0.166 -0.224 -0.291 -0.100
Spearman -0.292 -0.207 -0.112 -0.233 -0.252 -0.124
Kendall -0.196 -0.154 -0.072 -0.202 -0.189 -0.113
p values
Pearson 0.181 0.284 0.428 0.282 0.158 0.635
Spearman 0.157 0.321 0.594 0.262 0.224 0.553
Kendall 0.181 0.290 0.622 0.166 0.196 0.438