04_sanityChecks.utf8

imaging data pulled: 2021-10-12
clinical data pulled: 2020-11-16
code written: 2021-11-05
last ran: 2021-11-15

Description. Here, we review several aspects of the LC NM-MRI data, largely on raw values (i.e., uncorrected for age and sex). The intent is to understand our data, and ensure the distributions are consonant with what we expect.

Load libraries and data

#clear environment
rm(list = ls())

#list required libraries
packages <- c(
  'tidyverse',
  'kableExtra', #pretty tables
  'jtools', #summ,
  'interactions', #interact_plot
  'ggpubr', #plots
  'gridExtra', #grob
  'lm.beta', #model comparison
  'inflection' #check_curve
)

#load required libraries
lapply(packages, require, character.only = TRUE)

#read in LC data (contains raw and corrected for age and sex, but not yet standardized)
df <- read.csv(dir('../clinical', full.names=T, pattern="^df_2021"), stringsAsFactors = T) #48

#keep only variables of interest, for clarity
df <-  df[, grep('^id$|Diagnosis|Age|Sex|avg_max_seg', names(df))] #16

Data munging

#pull out names of LC segment variables, of interest
vars_segmentsCorrected <- names(df[, grep('avg.+_cor', names(df))])
vars_segmentsUncorrected <- names(df[, grep('avg_max_seg.+[0-9]$', names(df))])

#created separate, melted dfs, of corrected and uncorrected data
df_corrected <- reshape::melt(df[c('id', 'Diagnosis', 'Age', 'Sex', vars_segmentsCorrected)], id.vars=c('id', 'Diagnosis', 'Age', 'Sex'))
df_uncorrected <- reshape::melt(df[c('id', 'Diagnosis', 'Age', 'Sex', vars_segmentsUncorrected)], id.vars=c('id', 'Diagnosis', 'Age', 'Sex'))

#to the corrected df, add a 'hemisphere' variable
df_corrected <- df_corrected %>%  
   mutate(hemisphere = case_when(
     grepl("1|2|3", variable) ~ "left",
     grepl("4|5|6", variable) ~"right"))    
    
#to the corrected df, also add a 'ROI' variable
df_corrected <- df_corrected %>%  
   mutate(ROI = case_when(
     grepl("1|4", variable) ~ "rostral",
     grepl("2|5", variable) ~ "medial",
     grepl("3|6", variable) ~ "caudal"))  

#define age limits in data
age_min <- min(df$Age)
age_max <- max(df$Age)

#define LC NM-MRI limits in data
NM_min <- min(df[, vars_segmentsUncorrected])
NM_max <- max(df[, vars_segmentsUncorrected])

Data correction
We have previously corrected all LC and cognition variables for age and sex, and four cognition variables (all RT on the D-KEFS CWI) for normality. Here, we additionally correct LC variables for sex only (not age), for select exploratory analyses. (Note: at present, the sex-corrected-only LC variables are not reviewed in any analyses.)

#make a separate df for variables we will correct for sex
sex <- df[, c('id', 'Sex', vars_segmentsUncorrected)]

#function to add a 'sex corrected only' variant of each of the 6 segments
adjustVars_fn <- function(x, data){ #function to correct for sex
  fit <- lm(x ~ Sex, na.action = na.exclude, data) 
  adjusted <- resid(fit) + coef(fit)[1] #residual intercept
  return(adjusted)
}

#apply sex-correct function to all variables, put in new df for review
df_sexCorrected <- data.frame(df$id,   
    sapply(sex[vars_segmentsUncorrected], function(x) adjustVars_fn(x, data=sex)))

#adjust names in adjusted df, so clear have been corrected
names(df_sexCorrected)[-1] <- paste(names(df_sexCorrected[,-1]), '_sexcor', sep = "")

#add the adjusted variables back to our cognitive df
df_sexCorrected <- merge(df[, c('id', 'Diagnosis', 'Sex', 'Age')], df_sexCorrected, by.x='id', by.y='df.id')

#for clarity, remove the unneeded df
rm(sex)

Define segment-wise statistical models

#linear model 
model_ageDxNM_lin <- lapply(df[, vars_segmentsUncorrected], function(x) lm(as.formula(x ~ Age * Diagnosis), data=df))
model_ageDxNM_lin_summary <- lapply(model_ageDxNM_lin, summ)

#quadratic model
model_ageDxNM_quad <- lapply(df[, vars_segmentsUncorrected], function(x) lm(as.formula(x ~ poly(Age,2) * Diagnosis), data=df))
model_ageDxNM_quad_summary <- lapply(model_ageDxNM_quad, summ)

#linear models - by diagnosis
model_ageDxNM_lin_HC <- lapply(df[df$Diagnosis=='HC', vars_segmentsUncorrected], function(x) lm(as.formula(x ~ Age), df[df$Diagnosis=='HC',]))
model_ageDxNM_lin_LLD <- lapply(df[df$Diagnosis=='LLD', vars_segmentsUncorrected], function(x) lm(as.formula(x ~ Age), df[df$Diagnosis=='LLD',]))

#quadratic model - by diagnosis
model_ageDxNM_quad_HC <- lapply(df[df$Diagnosis=='HC', vars_segmentsUncorrected], function(x) lm(as.formula(x ~ poly(Age,2)), df[df$Diagnosis=='HC',]))
model_ageDxNM_quad_LLD <- lapply(df[df$Diagnosis=='LLD', vars_segmentsUncorrected], function(x) lm(as.formula(x ~ poly(Age,2)), df[df$Diagnosis=='LLD',]))

#cubic model - by diagnosis (required for "inflection point" analysis)  
model_ageDxNM_cubic_fn <- function(df, vars, group){
  model <- lapply(df[df$Diagnosis==group, vars], function(x) lm(as.formula(x ~ poly(Age,3)), df[df$Diagnosis==group,]))
}

#cubic models - by diagnosis
model_ageDxNM_cubicHC <- model_ageDxNM_cubic_fn(df, vars_segmentsUncorrected, 'HC')
model_ageDxNM_cubicLLD <- model_ageDxNM_cubic_fn(df, vars_segmentsUncorrected, 'LLD')

Define plotting functions

#scatter plots (question 1)
plotScatter_fn <- function(df, group=NULL, x, y, color=NULL){
  if(!is.null(group)) df <- df[df$Diagnosis == group,]
  if(is.null(color)){
    scatter <- ggscatter(data=df, x=x, y=y,
      add="reg.line", conf.int=TRUE) +
      stat_cor() +
      facet_wrap(~variable) 
  } else {
    scatter <- ggscatter(data=df, x=x, y=y,
      add="reg.line", conf.int=TRUE, color=color) +
      stat_cor(aes(color=Diagnosis)) +
      facet_wrap(~variable) 
  }
  return(scatter)
}

###########################################################################################

#box plots (question 2)
plotBox_fn <- function(df, group=NULL, x, y){
  if(!is.null(group)){
    df <- df[df$Diagnosis == group,] 
    box <- ggboxplot(df, x=x, y=y, add='jitter') +
      stat_compare_means(method='t.test', vjust=1) +
      facet_wrap(~variable) 
  } else {
   box <- ggboxplot(df, x=x, y=y, add='jitter', color = 'Diagnosis') +
      stat_compare_means(method='t.test', vjust=1) +
      facet_wrap(~variable) 
  }
  return(box)
}

###########################################################################################

#interaction plot
list_ageDxInt = list()
for (i in 1:6){
  plot <- interactions::interact_plot(model_ageDxNM_lin[[i]], pred=Age, modx=Diagnosis, plot.points = TRUE, main.title=paste('Segment', i),legend=NULL)  
  list_ageDxInt[[i]] <- plot
}

###########################################################################################

#ANOVA plot (question 3)
plotAnova_fn <- function(df, x, y, group=NULL){
  if(!is.null(group)) df <- df[df$Diagnosis == group,] 
  ggboxplot(df, x=x, y=y) +
  stat_compare_means() +
  rotate_x_text(10) + 
  ylim(NM_min-5, NM_max+5) +
  theme(text=element_text(size=8),
        axis.title = element_blank()) 
}

###########################################################################################

#function for ANOVA table (question 3)
tblAnova_fn <- function(df, x, y, group=NULL){
  if(!is.null(group)) df <- df[df$Diagnosis == group,] 
  x <- deparse(substitute(x))
  y <- deparse(substitute(y))
  f <- reformulate(x, response=y)
  compare_means(f, data=df)[, c("group1","group2","p.format","p.adj","p.signif")] %>%
  kable(col.names = c('variable 1','variable 2','p value','adj. p value','significance level')) %>%
  kable_styling(bootstrap_options = c('striped', 'hover', 'condensed'), fixed_thead=T)
}

###########################################################################################

#age plot, with confidence bands (question 7)
confidencePlot_fn <- function(df, x_var, y_var, title, group=NULL){
  if(!is.null(group)) df <- df[df$Diagnosis == group,]
  ggplot(df, aes_string(x_var, y_var)) + 
  geom_point(size=3) + 
  geom_smooth(method='lm', color='black', alpha=.3) + 
  stat_smooth(method='lm', formula= y ~ poly(x,2), color='red', fill='red', alpha=.2) + 
  theme_bw() + 
  theme(panel.grid = element_blank()) + 
  ggtitle(title) +
  ylim(NM_min - 1, NM_max + 1)
}


###########################################################################################

#linear/quad model comparison function to pull out the r2 values, by segment and diagnosis (question 10)
adjR2_fn <- function(model){
  vector <- c()
  for (i in 1:6){
    value <- summary(model[[i]])$adj.r.squared
    vector[i] <- value
  }
  return(vector)
}

###########################################################################################

#linear/quad model comparison function to pull out the beta values from the non-linear equations, by segment, and diagnosis (question 10)
standBeta_fn <- function(model){
  vector <- c()
  for (i in 1:6){
    beta <- coef(lm.beta(model[[i]]))[2] 
    vector[i] <- beta
  }
  return(vector)
}

###########################################################################################

#linear/quad model comparison function to compute the difference between the linear and quadratic models, and pull the p value (question 10)
compareR2_fn <- function(model_lin, model_quad){
  vector <- c()
  for (i in 1:6){
    pvalue <- anova(model_lin[[i]], model_quad[[i]])$`Pr(>F)`[2]
    vector[i] <- pvalue
  }
  return(vector)
}

###########################################################################################

#function to create formulas of non-linear (quadratic) models (question 11)
equations_fn <- function(model, segments){
  equation_list <- list()
  model <- deparse(substitute(model))
  segments <- deparse(substitute(segments))
  for (i in 1:length(NM_vars_uncorrectedList)){
    term_s <- round(eval(parse(text=paste(model, '$', NM_vars_uncorrectedList[i], '$coefficients[3]', sep=''))),3)
    term_l <- round(eval(parse(text=paste(model, '$', NM_vars_uncorrectedList[i], '$coefficients[2]', sep=''))),3)
    term_i <- round(eval(parse(text=paste(model, '$', NM_vars_uncorrectedList[i], '$coefficients[1]', sep=''))),3)
    equation <- paste(term_s, '(Age)2 + ', term_l, '(Age) + ', term_i, sep='')
    equation_list[[i]] <- equation
  }
  return(equation_list)
}

###########################################################################################

#function to find the points on the quadratic curve - from ggplot (question 12)
getCurve_fn <- function(df, x_var, segments, order=2, group=NULL){
  curveList <- list()
  if(!is.null(group)) df <- df[df$Diagnosis == group,]
  for (i in 1:length(segments)){
    y_var <- segments[[i]]
    myplot <- ggplot(df, aes_string(x_var, y_var)) + geom_point() + stat_smooth(method='lm', formula= y ~ poly(x,order)) 
    x <- ggplot_build(myplot)$data[[2]]$x #pull out smoothing x
    y <- ggplot_build(myplot)$data[[2]]$y #pull out smoothing y
    curveList[[i]] <- list(x_Age=x,y_Segment=y)
  }
  return(curveList)
}

###########################################################################################

#function to find the location of the biggest change (i.e., elbow); irrespective of sign (question 12)
elbow_fn  <- function(quadraticList) {
  elbowList <- list()
  for (i in 1:length(quadraticList)){
    x=unlist(quadraticList[[i]]$x_Age)
    y=unlist(quadraticList[[i]]$y_Segment)
    d1 <- diff(y) / diff(x) # first derivative
    d2 <- diff(d1) / diff(x[-1]) # second derivative
    index <- which.max(abs(d2)) 
    drop <- max(abs(d2))
    xvalue <- x[index]
    yvalue <- y[index]
    elbowList[[i]] <- xvalue
  }
  return(elbowList) 
}

###########################################################################################

#function to find where changes sign, i.e., positive to negative, or vice versa (question 12)
change_fn  <- function(quadraticList) {
  changeList <- list()
  for (i in 1:length(quadraticList)){
    change <- c(FALSE, diff(diff(quadraticList[[i]]$y_Segment)>0)!=0)
    change <- min(which(change == TRUE)) #this is the INDEX out of 80
    change <- quadraticList[[i]]$x_Age[change]
    changeList[[i]] <- change
  }
  return(changeList)
}

###########################################################################################

#function to calculate the actual "inflection" point (uses cubic) (question 12)
getInflection_fn <- function(cubicList) {
  inflectionList <- list()
  for (i in 1:length(cubicList)){
    curveType <- inflection::check_curve(cubicList[[i]]$x_Age, cubicList[[i]]$y_Segment)
    inflection <- bese(cubicList[[i]]$x_Age, cubicList[[i]]$y_Segment, curveType$index)
    inflection <- inflection$iplast
    inflectionList[[i]] <- inflection
  }
  return(inflectionList)
}

###########################################################################################

#function to plot quadratic fit, and the 3 alternative "inflection" points (question 12)
inflectionPlot_fn <- function(df, x_var, y_var, title, group=NULL, i){
  if(!is.null(group)) df <- df[df$Diagnosis == group,]
  ggplot(df, aes_string(x_var, y_var)) + 
  geom_point(size=3) + 
  stat_smooth(method='lm', formula= y ~ poly(x,2), color='black', fill='black', alpha=.2) + 
  theme_bw() + 
  theme(panel.grid = element_blank()) + 
  ggtitle(title) +
  ylim(NM_min - 1, NM_max + 1) + 
  geom_vline(xintercept = eval(parse(text=paste('df_inflectionPts$elbow_', group, '[', i, ']', sep=''))), color='darkslategray3', size=2, linetype='dashed') + 
  geom_vline(xintercept = eval(parse(text=paste('df_inflectionPts$change_', group, '[', i, ']', sep=''))), color='darkgoldenrod1', size=2, linetype='twodash') + 
  geom_vline(xintercept = eval(parse(text=paste('df_inflectionPts$inflection_', group, '[', i, ']', sep=''))), color='coral1', size=2, linetype='longdash') +
  geom_label(aes(
    x=eval(parse(text=paste('df_inflectionPts$elbow_', group, '[', i, ']', sep=''))), y=NM_min+2,
    label=round(eval(parse(text=paste('df_inflectionPts$elbow_', group, '[', i, ']', sep=''))),3)), fill='darkslategray3') +
  geom_label(aes(
    x=eval(parse(text=paste('df_inflectionPts$change_', group, '[', i, ']', sep=''))), y=NM_min+4,
    label=round(eval(parse(text=paste('df_inflectionPts$change_', group, '[', i, ']', sep=''))),3)), fill='darkgoldenrod1') +
 geom_label(aes(
    x=eval(parse(text=paste('df_inflectionPts$inflection_', group, '[', i, ']', sep=''))), y=NM_min+6,
    label=round(eval(parse(text=paste('df_inflectionPts$inflection_', group, '[', i, ']', sep=''))),3)), fill='coral1')
}

###########################################################################################

#function to compare correlations between linear and non-linear fits (question 13)
correlation_fn <- function(df, x_var='Age', y_var=vars_segmentsUncorrected, group=NULL){
  
  #separate calculations for group, if required
  if(!is.null(group)) df <- df[df$Diagnosis == group,]

  #first, calculate correlations
  cor_pearson <-  lapply(df[, y_var], function(x) cor.test(x, df[,x_var], method='pearson'))
  cor_spearman <- lapply(df[, y_var], function(x) cor.test(x, df[,x_var], method='spearman', exact=FALSE))
  cor_kendall <-  lapply(df[, y_var], function(x) cor.test(x, df[,x_var], method='kendall', exact=FALSE))
  
  #make a table for correlation and p values
  tbl_cor <- matrix(ncol=length(y_var), nrow=6)
  
  #pull out correlation values
  for (i in 1:length(y_var)){
    tbl_cor[1, i]  <- round(eval(parse(text=paste('cor_pearson$', y_var[i], '$estimate', sep=''))),3)
    tbl_cor[2, i]  <- round(eval(parse(text=paste('cor_spearman$', y_var[i], '$estimate', sep=''))),3)
    tbl_cor[3, i]  <- round(eval(parse(text=paste('cor_kendall$', y_var[i], '$estimate', sep=''))),3)
    
  #pull out p values
    tbl_cor[4, i]  <- round(eval(parse(text=paste('cor_pearson$', y_var[i], '$p.value', sep=''))),3)
    tbl_cor[5, i]  <- round(eval(parse(text=paste('cor_spearman$', y_var[i], '$p.value', sep=''))),3)
    tbl_cor[6, i]  <- round(eval(parse(text=paste('cor_kendall$', y_var[i], '$p.value', sep=''))),3)
  }
  tbl_cor <- as.data.frame(tbl_cor)
  names(tbl_cor) <- y_var
  tbl_cor <- cbind(method=rep(c('Pearson', 'Spearman', 'Kendall'),2), tbl_cor)
  return(tbl_cor)
}

###########################################################################################

[1] Are there age differences in NM, before correction? (cor.test)
No; however, we nonetheless corrected for age (and sex).

Combined

plotScatter_fn(df=df_uncorrected, x='Age', y='value')

LLD & HC

plotScatter_fn(df_uncorrected, x='Age', y='value', color='Diagnosis')

[2] Is there an interaction between diagnosis and age, on NM, before correction?
No. None of the interaction terms, in any segment, are significant.

#make a list of p values
p_ageDxInt <- list()
for (i in vars_segmentsUncorrected){
  ps <- round(eval(parse(text=paste('model_ageDxNM_lin_summary$', i, '$coeftable[4,4]', sep=''))),3)
  p_ageDxInt[[i]] <- ps
}

#paste together into a dataframe
rbind(vars_segmentsUncorrected, p_ageDxInt) %>%
  kable(col.names=NULL, rownames=NULL) %>% kable_styling()

vars_segmentsUncorrected	avg_max_seg_1	avg_max_seg_2	avg_max_seg_3	avg_max_seg_4	avg_max_seg_5	avg_max_seg_6
p_ageDxInt	0.596	0.754	0.532	0.34	0.356	0.697

[3] Are there sex differences in NM, before correction? (t-test)
No; however, we nonetheless corrected for sex (and age).

Combined

plotBox_fn(df_uncorrected, x='Sex', y='value')

LLD

plotBox_fn(df_uncorrected, x='Sex', y='value',group='LLD')

HC

plotBox_fn(df_uncorrected, x='Sex', y='value',group='HC')

[4] Are there differences in the NM values, in the 6 segments, after correction? (ANOVA)
Yes; ANOVA in combined and separate diagnostic groups significant; many/most pairwise comparisons (Wilcox) show a difference.

Combined

plotAnova_fn(df=df_corrected, x='variable', y='value')

tblAnova_fn(df_corrected, variable, value)

variable 1	variable 2	p value	adj. p value	significance level
avg_max_seg_1_cor	avg_max_seg_2_cor	4.5e-13	0.000	****
avg_max_seg_1_cor	avg_max_seg_3_cor	< 2e-16	0.000	****
avg_max_seg_1_cor	avg_max_seg_4_cor	< 2e-16	0.000	****
avg_max_seg_1_cor	avg_max_seg_5_cor	0.155	0.150	ns
avg_max_seg_1_cor	avg_max_seg_6_cor	< 2e-16	0.000	****
avg_max_seg_2_cor	avg_max_seg_3_cor	< 2e-16	0.000	****
avg_max_seg_2_cor	avg_max_seg_4_cor	< 2e-16	0.000	****
avg_max_seg_2_cor	avg_max_seg_5_cor	3.5e-09	0.000	****
avg_max_seg_2_cor	avg_max_seg_6_cor	< 2e-16	0.000	****
avg_max_seg_3_cor	avg_max_seg_4_cor	0.048	0.096
avg_max_seg_3_cor	avg_max_seg_5_cor	< 2e-16	0.000	****
avg_max_seg_3_cor	avg_max_seg_6_cor	1.8e-13	0.000	****
avg_max_seg_4_cor	avg_max_seg_5_cor	< 2e-16	0.000	****
avg_max_seg_4_cor	avg_max_seg_6_cor	< 2e-16	0.000	****
avg_max_seg_5_cor	avg_max_seg_6_cor	< 2e-16	0.000	****

LLD

plotAnova_fn(df=df_corrected, x='variable', y='value', group='LLD')

tblAnova_fn(df_corrected, variable, value, group='LLD')

variable 1	variable 2	p value	adj. p value	significance level
avg_max_seg_1_cor	avg_max_seg_2_cor	1.9e-06	7.5e-06	****
avg_max_seg_1_cor	avg_max_seg_3_cor	2.8e-09	0.0e+00	****
avg_max_seg_1_cor	avg_max_seg_4_cor	3.3e-11	0.0e+00	****
avg_max_seg_1_cor	avg_max_seg_5_cor	1.00	1.0e+00	ns
avg_max_seg_1_cor	avg_max_seg_6_cor	1.6e-14	0.0e+00	****
avg_max_seg_2_cor	avg_max_seg_3_cor	1.1e-12	0.0e+00	****
avg_max_seg_2_cor	avg_max_seg_4_cor	6.3e-14	0.0e+00	****
avg_max_seg_2_cor	avg_max_seg_5_cor	3.1e-05	9.2e-05	****
avg_max_seg_2_cor	avg_max_seg_6_cor	1.6e-14	0.0e+00	****
avg_max_seg_3_cor	avg_max_seg_4_cor	0.22	4.3e-01	ns
avg_max_seg_3_cor	avg_max_seg_5_cor	7.0e-08	4.0e-07	****
avg_max_seg_3_cor	avg_max_seg_6_cor	4.8e-09	0.0e+00	****
avg_max_seg_4_cor	avg_max_seg_5_cor	2.7e-08	2.0e-07	****
avg_max_seg_4_cor	avg_max_seg_6_cor	1.9e-13	0.0e+00	****
avg_max_seg_5_cor	avg_max_seg_6_cor	1.6e-14	0.0e+00	****

HC

plotAnova_fn(df=df_corrected, x='variable', y='value', group='HC')

tblAnova_fn(df_corrected, variable, value, group='HC')

variable 1	variable 2	p value	adj. p value	significance level
avg_max_seg_1_cor	avg_max_seg_2_cor	5.1e-08	3.0e-07	****
avg_max_seg_1_cor	avg_max_seg_3_cor	1.1e-08	1.0e-07	****
avg_max_seg_1_cor	avg_max_seg_4_cor	1.1e-09	0.0e+00	****
avg_max_seg_1_cor	avg_max_seg_5_cor	0.021	4.3e-02
avg_max_seg_1_cor	avg_max_seg_6_cor	3.4e-11	0.0e+00	****
avg_max_seg_2_cor	avg_max_seg_3_cor	9.7e-13	0.0e+00	****
avg_max_seg_2_cor	avg_max_seg_4_cor	2.4e-13	0.0e+00	****
avg_max_seg_2_cor	avg_max_seg_5_cor	3.2e-05	9.5e-05	****
avg_max_seg_2_cor	avg_max_seg_6_cor	2.4e-13	0.0e+00	****
avg_max_seg_3_cor	avg_max_seg_4_cor	0.148	1.5e-01	ns
avg_max_seg_3_cor	avg_max_seg_5_cor	2.9e-10	0.0e+00	****
avg_max_seg_3_cor	avg_max_seg_6_cor	6.3e-06	2.5e-05	****
avg_max_seg_4_cor	avg_max_seg_5_cor	2.4e-11	0.0e+00	****
avg_max_seg_4_cor	avg_max_seg_6_cor	5.5e-07	2.7e-06	****
avg_max_seg_5_cor	avg_max_seg_6_cor	1.7e-12	0.0e+00	****

[5] Are there differences in the NM values, by hemisphere (left vs right), after correction? (Wilcoxon test)
Yes, left hemisphere is higher.

Combined

plotAnova_fn(df_corrected, x='hemisphere', y='value')

LLD

plotAnova_fn(df_corrected, x='hemisphere', y='value', group='LLD')

HC

plotAnova_fn(df_corrected, x='hemisphere', y='value', group='HC')

[6] Are there differences in the NM values, by segment, after correction? (ANOVA)
Yes, medial is higher than rostral and caudal, and rostral is higher than caudal.

Combined

plotAnova_fn(df_corrected, 'ROI', 'value')

tblAnova_fn(df_corrected, ROI, value)

variable 1	variable 2	p value	significance level
rostral	medial	<2e-16	****
rostral	caudal	<2e-16	****
medial	caudal	<2e-16	****

LLD

plotAnova_fn(df_corrected, x='ROI', y='value', group='LLD')

tblAnova_fn(df_corrected, ROI, value, group='LLD')

variable 1	variable 2	p value	significance level
rostral	medial	3.3e-08	****
rostral	caudal	3.9e-11	****
medial	caudal	3.4e-16	****

HC

plotAnova_fn(df_corrected, x='ROI', y='value', group='HC')

tblAnova_fn(df_corrected, ROI, value, group='HC')

variable 1	variable 2	p value	significance level
rostral	medial	3.7e-13	****
rostral	caudal	1.8e-10	****
medial	caudal	< 2e-16	****

[7] Is the effect of age on NM (before correction), by diagnosis, actually linear?
LLD looks non-linear. Also, the LLD distribution often appears “U”-shaped, opposite of the expected inverted U.

Segment 1

grid.arrange(
  confidencePlot_fn(df, x_var='Age', y_var='avg_max_seg_1', title='HC, Segment 1', group='HC'),
  confidencePlot_fn(df, x_var='Age', y_var='avg_max_seg_1', title='LLD, Segment 1', group='LLD'),
ncol=2)

Segment 2

grid.arrange(
  confidencePlot_fn(df, x_var='Age', y_var='avg_max_seg_2', title='HC, Segment 2', group='HC'),
  confidencePlot_fn(df, x_var='Age', y_var='avg_max_seg_2', title='LLD, Segment 2', group='LLD'),
ncol=2)

Segment 3

grid.arrange(
  confidencePlot_fn(df, x_var='Age', y_var='avg_max_seg_3', title='HC, Segment 3', group='HC'),
  confidencePlot_fn(df, x_var='Age', y_var='avg_max_seg_3', title='LLD, Segment 3', group='LLD'),
ncol=2)

Segment 4

grid.arrange(
  confidencePlot_fn(df, x_var='Age', y_var='avg_max_seg_4', title='HC, Segment 4', group='HC'),
  confidencePlot_fn(df, x_var='Age', y_var='avg_max_seg_4', title='LLD, Segment 4', group='LLD'),
ncol=2)

Segment 5

grid.arrange(
  confidencePlot_fn(df, x_var='Age', y_var='avg_max_seg_5', title='HC, Segment 5', group='HC'),
  confidencePlot_fn(df, x_var='Age', y_var='avg_max_seg_5', title='LLD, Segment 5', group='LLD'),
ncol=2)

Segment 6

grid.arrange(
  confidencePlot_fn(df, x_var='Age', y_var='avg_max_seg_6', title='HC, Segment 6', group='HC'),
  confidencePlot_fn(df, x_var='Age', y_var='avg_max_seg_6', title='LLD, Segment 6', group='LLD'),
ncol=2)

[8] Summary of the linear model, of the effect of age on LC NM-MRI before correction, by diagnosis
Note: the p value associated with the interaction term in the final row of these tables is synonymous with the p values reported in [2].

Segment 1

summ(model_ageDxNM_lin[[1]])

Observations	48
Dependent variable	x
Type	OLS linear regression

F(3,44)	1.90
R²	0.11
Adj. R²	0.05

	Est.	S.E.	t val.	p
(Intercept)	30.03	6.36	4.72	0.00
Age	-0.08	0.09	-0.90	0.37
DiagnosisLLD	7.30	10.82	0.68	0.50
Age:DiagnosisLLD	-0.08	0.16	-0.53	0.60
Standard errors: OLS

Segment 2

summ(model_ageDxNM_lin[[2]])

Observations	48
Dependent variable	x
Type	OLS linear regression

F(3,44)	1.10
R²	0.07
Adj. R²	0.01

	Est.	S.E.	t val.	p
(Intercept)	39.62	10.68	3.71	0.00
Age	-0.18	0.15	-1.20	0.24
DiagnosisLLD	6.28	18.17	0.35	0.73
Age:DiagnosisLLD	-0.08	0.26	-0.32	0.75
Standard errors: OLS

Segment 3

summ(model_ageDxNM_lin[[3]])

Observations	48
Dependent variable	x
Type	OLS linear regression

F(3,44)	0.87
R²	0.06
Adj. R²	-0.01

	Est.	S.E.	t val.	p
(Intercept)	20.25	8.70	2.33	0.02
Age	-0.01	0.12	-0.12	0.91
DiagnosisLLD	11.02	14.80	0.74	0.46
Age:DiagnosisLLD	-0.14	0.21	-0.63	0.53
Standard errors: OLS

Segment 4

summ(model_ageDxNM_lin[[4]])

Observations	48
Dependent variable	x
Type	OLS linear regression

F(3,44)	0.95
R²	0.06
Adj. R²	-0.00

	Est.	S.E.	t val.	p
(Intercept)	21.58	5.85	3.69	0.00
Age	0.02	0.08	0.28	0.78
DiagnosisLLD	10.74	9.96	1.08	0.29
Age:DiagnosisLLD	-0.14	0.14	-0.97	0.34
Standard errors: OLS

Segment 5

summ(model_ageDxNM_lin[[5]])

Observations	48
Dependent variable	x
Type	OLS linear regression

F(3,44)	1.06
R²	0.07
Adj. R²	0.00

	Est.	S.E.	t val.	p
(Intercept)	31.43	8.67	3.62	0.00
Age	-0.09	0.12	-0.70	0.49
DiagnosisLLD	13.23	14.75	0.90	0.37
Age:DiagnosisLLD	-0.20	0.21	-0.93	0.36
Standard errors: OLS

Segment 6

summ(model_ageDxNM_lin[[6]])

Observations	48
Dependent variable	x
Type	OLS linear regression

F(3,44)	0.37
R²	0.02
Adj. R²	-0.04

	Est.	S.E.	t val.	p
(Intercept)	17.20	9.00	1.91	0.06
Age	0.01	0.13	0.08	0.94
DiagnosisLLD	4.61	15.30	0.30	0.76
Age:DiagnosisLLD	-0.09	0.22	-0.39	0.70
Standard errors: OLS

[9] Summary of the quadratic model, of the effect of age on LC NM-MRI before correction, by diagnosis

Segment 1

summ(model_ageDxNM_quad[[1]])

Observations	48
Dependent variable	x
Type	OLS linear regression

F(5,42)	1.58
R²	0.16
Adj. R²	0.06

	Est.	S.E.	t val.	p
(Intercept)	24.66	0.74	33.36	0.00
poly(Age, 2)1	-3.23	4.21	-0.77	0.45
poly(Age, 2)2	-5.69	4.77	-1.19	0.24
DiagnosisLLD	1.52	1.04	1.46	0.15
poly(Age, 2)1:DiagnosisLLD	-2.87	7.50	-0.38	0.70
poly(Age, 2)2:DiagnosisLLD	10.55	7.32	1.44	0.16
Standard errors: OLS

Segment 2

summ(model_ageDxNM_quad[[2]])

Observations	48
Dependent variable	x
Type	OLS linear regression

F(5,42)	1.91
R²	0.18
Adj. R²	0.09

	Est.	S.E.	t val.	p
(Intercept)	27.06	1.19	22.70	0.00
poly(Age, 2)1	-8.56	6.79	-1.26	0.21
poly(Age, 2)2	1.47	7.69	0.19	0.85
DiagnosisLLD	1.62	1.68	0.97	0.34
poly(Age, 2)1:DiagnosisLLD	3.25	12.09	0.27	0.79
poly(Age, 2)2:DiagnosisLLD	20.32	11.80	1.72	0.09
Standard errors: OLS

Segment 3

summ(model_ageDxNM_quad[[3]])

Observations	48
Dependent variable	x
Type	OLS linear regression

F(5,42)	0.71
R²	0.08
Adj. R²	-0.03

	Est.	S.E.	t val.	p
(Intercept)	19.08	1.03	18.61	0.00
poly(Age, 2)1	-1.09	5.84	-0.19	0.85
poly(Age, 2)2	4.41	6.61	0.67	0.51
DiagnosisLLD	2.14	1.45	1.48	0.15
poly(Age, 2)1:DiagnosisLLD	-4.02	10.40	-0.39	0.70
poly(Age, 2)2:DiagnosisLLD	1.37	10.16	0.14	0.89
Standard errors: OLS

Segment 4

summ(model_ageDxNM_quad[[4]])

Observations	48
Dependent variable	x
Type	OLS linear regression

F(5,42)	0.78
R²	0.08
Adj. R²	-0.02

	Est.	S.E.	t val.	p
(Intercept)	23.26	0.69	33.76	0.00
poly(Age, 2)1	1.28	3.93	0.33	0.75
poly(Age, 2)2	-2.23	4.44	-0.50	0.62
DiagnosisLLD	1.26	0.97	1.29	0.20
poly(Age, 2)1:DiagnosisLLD	-5.17	6.99	-0.74	0.46
poly(Age, 2)2:DiagnosisLLD	6.99	6.82	1.02	0.31
Standard errors: OLS

Segment 5

summ(model_ageDxNM_quad[[5]])

Observations	48
Dependent variable	x
Type	OLS linear regression

F(5,42)	1.43
R²	0.15
Adj. R²	0.04

	Est.	S.E.	t val.	p
(Intercept)	25.39	0.99	25.65	0.00
poly(Age, 2)1	-4.16	5.64	-0.74	0.46
poly(Age, 2)2	1.44	6.38	0.22	0.82
DiagnosisLLD	0.21	1.40	0.15	0.88
poly(Age, 2)1:DiagnosisLLD	-4.50	10.04	-0.45	0.66
poly(Age, 2)2:DiagnosisLLD	13.05	9.80	1.33	0.19
Standard errors: OLS

Segment 6

summ(model_ageDxNM_quad[[6]])

Observations	48
Dependent variable	x
Type	OLS linear regression

F(5,42)	1.04
R²	0.11
Adj. R²	0.00

	Est.	S.E.	t val.	p
(Intercept)	18.17	1.02	17.74	0.00
poly(Age, 2)1	1.12	5.84	0.19	0.85
poly(Age, 2)2	-6.93	6.60	-1.05	0.30
DiagnosisLLD	-1.06	1.44	-0.73	0.47
poly(Age, 2)1:DiagnosisLLD	-0.47	10.39	-0.05	0.96
poly(Age, 2)2:DiagnosisLLD	20.17	10.14	1.99	0.05
Standard errors: OLS

[10] Compare the linear and quadratic models, of the effect of age on LC NM-MRI before correction, by diagnosis
The more complex (quadratic) model provides a better fit – but because higher order terms always increase \(R^2\) (by definition), a “much better” fit is required in order to justify increased model complexity. The model comparison value is a p value.

#pull out the r2 values, by segment and diagnosis
adjR2_linear_HC <- do.call(adjR2_fn, list(model=model_ageDxNM_lin_HC))
adjR2_nonLinear_HC <- do.call(adjR2_fn, list(model=model_ageDxNM_quad_HC))
adjR2_linear_LLD <- do.call(adjR2_fn, list(model=model_ageDxNM_lin_LLD))
adjR2_nonLinear_LLD <- do.call(adjR2_fn, list(model=model_ageDxNM_quad_LLD))

#pull out the beta values from the non-linear equations, by segment, and diagnosis
standBeta_nonLinear_HC <- do.call(standBeta_fn, list(model=model_ageDxNM_quad_HC))
standBeta_nonLinear_LLD <- do.call(standBeta_fn, list(model=model_ageDxNM_quad_LLD))

#compute the difference between the linear and quadratic models, and pull the p value
modelComparison_HC <- do.call(compareR2_fn, list(model_ageDxNM_lin_HC, model_ageDxNM_quad_HC))
modelComparison_LLD <- do.call(compareR2_fn, list(model_ageDxNM_lin_LLD, model_ageDxNM_quad_LLD))

#put everything together into a table
tbl_age <- as.data.frame(cbind(
  adjR2_linear_HC, 
  adjR2_nonLinear_HC, 
  adjR2_linear_LLD, 
  adjR2_nonLinear_LLD,
  standBeta_nonLinear_HC, 
  standBeta_nonLinear_LLD,
  modelComparison_HC,
  modelComparison_LLD))

#add row.names
row.names(tbl_age) <- vars_segmentsUncorrected

#make an explicit column for the comparison between R2 values, R2 delta
tbl_age$adjR2_delta_HC <- adjR2_linear_HC - adjR2_nonLinear_HC 
tbl_age$adjR2_delta_LLD <- adjR2_linear_LLD - adjR2_nonLinear_LLD

#nice, scrollable table
tbl_age %>%
  kable(digits=3) %>%
  kable_styling() %>%
  row_spec(0, font_size=9) %>%
  scroll_box(width='800px')

	adjR2_linear_HC	adjR2_nonLinear_HC	adjR2_linear_LLD	adjR2_nonLinear_LLD	standBeta_nonLinear_HC	standBeta_nonLinear_LLD	modelComparison_HC	modelComparison_LLD	adjR2_delta_HC	adjR2_delta_LLD
avg_max_seg_1	-0.014	-0.001	0.036	0.031	-0.180	-0.277	0.276	0.360	-0.012	0.005
avg_max_seg_2	0.043	-0.003	0.008	0.158	-0.293	-0.223	0.835	0.034	0.046	-0.150
avg_max_seg_3	-0.047	-0.072	-0.015	-0.037	-0.027	-0.166	0.483	0.486	0.025	0.022
avg_max_seg_4	-0.044	-0.086	0.009	0.012	0.055	-0.224	0.657	0.310	0.041	-0.003
avg_max_seg_5	-0.014	-0.062	0.045	0.127	-0.178	-0.291	0.802	0.089	0.047	-0.082
avg_max_seg_6	-0.047	-0.055	-0.033	0.090	0.015	-0.100	0.370	0.055	0.008	-0.123

[11] What are the quadratic equations, by diagnosis and segment, uncorrected?

#get names of segments in list format -- easier to work with
NM_vars_uncorrectedList <- as.list(vars_segmentsUncorrected)

#apply equations_fn function
nlEquations_HC <- equations_fn(model_ageDxNM_quad_HC, NM_vars_uncorrectedList)
nlEquations_LLD <- equations_fn(model_ageDxNM_quad_LLD, NM_vars_uncorrectedList)

#put together in nice table
tbl_nlEquations <- data.frame(cbind(nlEquations_HC, nlEquations_LLD))
row.names(tbl_nlEquations) <- vars_segmentsUncorrected

#clean up
rm(nlEquations_HC, nlEquations_LLD)

#nice table
tbl_nlEquations %>% kable() %>% kable_styling()

	nlEquations_HC	nlEquations_LLD
avg_max_seg_1	-4.042(Age)2 + -3.053(Age) + 24.343	2.968(Age)2 + -4.368(Age) + 26.105
avg_max_seg_2	1.046(Age)2 + -6.814(Age) + 26.935	13.281(Age)2 + -7.001(Age) + 27.91
avg_max_seg_3	3.131(Age)2 + -0.54(Age) + 19.25	3.522(Age)2 + -3.963(Age) + 21.086
avg_max_seg_4	-1.585(Age)2 + 0.866(Age) + 23.192	2.9(Age)2 + -3.08(Age) + 24.403
avg_max_seg_5	1.02(Age)2 + -3.258(Age) + 25.363	8.831(Age)2 + -7.578(Age) + 25.185
avg_max_seg_6	-4.928(Age)2 + 0.373(Age) + 17.889	8.069(Age)2 + -2.04(Age) + 16.566

[12] Plot the “inflection” points on the quadratic fit (on uncorrected)
Blue line: “Elbow point”, i.e., the x-axis (age) location of the y axis (NM) has the biggest change.
Yellow line: “Change point”, i.e., the location of the quadratic curve where the curve changes sign (not present if curve goes in only one direction).
Pink line: “Inflection point”, i.e., where the second derivative \(g''(x)\) switches signs (changes concavity), i.e., is equal to 0. (Note that there is by definition no inflection of a quadratic curve, so a cubic curve (3 degrees) was fit to identify this point.)

#apply getQuad_fn function to find points along quadratic (order=2)
quadraticList_HC <- getCurve_fn(df, x_var='Age', segments=vars_segmentsUncorrected, order=2, group='HC')
quadraticList_LLD <- getCurve_fn(df, x_var='Age', segments=vars_segmentsUncorrected, order=2, group='LLD')

#apply getQuad_fn function to find points along cubic (order=3)
cubicList_HC <- getCurve_fn(df, x_var='Age', segments=vars_segmentsUncorrected, order=3, group='HC')
cubicList_LLD <- getCurve_fn(df, x_var='Age', segments=vars_segmentsUncorrected, order=3, group='LLD')

#apply elbow_fn function (location of the biggest change, irrespective of sign)
elbowList_HC <- elbow_fn(quadraticList_HC)
elbowList_LLD <- elbow_fn(quadraticList_LLD)
  
#apply change_fn function (where changes sign from pos to neg, vice versa)
changeList_HC <- change_fn(quadraticList_HC) 
changeList_LLD <- change_fn(quadraticList_LLD) 

#apply getInflection_fn function
inflectionList_HC <-getInflection_fn(cubicList_HC)
inflectionList_LLD <-getInflection_fn(cubicList_LLD)

#get our 3 "inflection point" lists into a dataframe
df_inflectionPts <- do.call(rbind, Map(data.frame,
  elbow_HC = elbowList_HC, 
  elbow_LLD = elbowList_LLD, 
  change_HC = changeList_HC, 
  change_LLD = changeList_LLD,
  inflection_HC = inflectionList_HC,
  inflection_LLD = inflectionList_LLD))

#add segments explicitly
df_inflectionPts <- cbind(Segments = vars_segmentsUncorrected, df_inflectionPts)

#nice table
df_inflectionPts %>%
  kable(digits=3,
        align=rep('c', ncol(df_inflectionPts)),
        col.names=c('Segment', rep(c('HC', 'LLD'),3))) %>%
  kable_styling(bootstrap_options = 'striped') %>%
  add_header_above(c(' '=1,'Elbow' = 2, 'Change' = 2, 'Inflection'=2))

	Elbow		Change		Inflection
Segment	HC	LLD	HC	LLD	HC	LLD
avg_max_seg_1	66.405	61.671	69.316	75.316	71.937	75.316
avg_max_seg_2	77.468	63.899	NA	71.975	70.481	72.392
avg_max_seg_3	67.278	61.392	71.937	73.924	69.316	71.278
avg_max_seg_4	80.962	60.835	73.101	73.924	71.209	NaN
avg_max_seg_5	74.848	65.570	80.671	73.089	70.190	75.316
avg_max_seg_6	64.076	65.013	71.646	71.139	73.538	79.354

Segment 1

grid.arrange(
inflectionPlot_fn(df=df, x_var='Age', y_var='avg_max_seg_1', title='HC, Segment 1', group='HC', i=1),
inflectionPlot_fn(df=df, x_var='Age', y_var='avg_max_seg_1', title='LLD, Segment 1', group='LLD', i=1),
ncol=2)

Segment 2

grid.arrange(
inflectionPlot_fn(df=df, x_var='Age', y_var='avg_max_seg_2', title='HC, Segment 2', group='HC', i=2),
inflectionPlot_fn(df=df, x_var='Age', y_var='avg_max_seg_2', title='LLD, Segment 2', group='LLD', i=2),
ncol=2)

Segment 3

grid.arrange(
inflectionPlot_fn(df=df, x_var='Age', y_var='avg_max_seg_3', title='HC, Segment 3', group='HC', i=3),
inflectionPlot_fn(df=df, x_var='Age', y_var='avg_max_seg_3', title='LLD, Segment 3', group='LLD', i=3),
ncol=2)

Segment 4

grid.arrange(
inflectionPlot_fn(df=df, x_var='Age', y_var='avg_max_seg_4', title='HC, Segment 4', group='HC', i=4),
inflectionPlot_fn(df=df, x_var='Age', y_var='avg_max_seg_4', title='LLD, Segment 4', group='LLD', i=4),
ncol=2)

Segment 5

grid.arrange(
inflectionPlot_fn(df=df, x_var='Age', y_var='avg_max_seg_5', title='HC, Segment 5', group='HC', i=5),
inflectionPlot_fn(df=df, x_var='Age', y_var='avg_max_seg_5', title='LLD, Segment 5', group='LLD', i=5),
ncol=2)

Segment 6

grid.arrange(
inflectionPlot_fn(df=df, x_var='Age', y_var='avg_max_seg_6', title='HC, Segment 6', group='HC', i=6),
inflectionPlot_fn(df=df, x_var='Age', y_var='avg_max_seg_6', title='LLD, Segment 6', group='LLD', i=6),
ncol=2)

[13] Test correlations between age and uncorrected NM, for each group, when modelled as linear vs. non-linear.
Non-linear tests based on rank-based correlation coefficients: (1) Spearman’s rank correlation coefficient (rho) and (2) Kendall’s rank correlation coefficient (tau). We do not observe any important differences.

HC

correlation_fn(df, group='HC') %>% kable %>% kable_styling() %>%
  pack_rows('correlation values', 1, 3) %>%
  pack_rows('p values', 4, 6)

method	avg_max_seg_1	avg_max_seg_2	avg_max_seg_3	avg_max_seg_4	avg_max_seg_5	avg_max_seg_6
correlation values
Pearson	-0.180	-0.293	-0.027	0.055	-0.178	0.015
Spearman	-0.106	-0.240	-0.112	0.123	-0.116	0.106
Kendall	-0.077	-0.101	-0.012	0.101	-0.077	0.085
p values
Pearson	0.410	0.174	0.902	0.804	0.416	0.945
Spearman	0.632	0.270	0.611	0.575	0.598	0.632
Kendall	0.614	0.508	0.937	0.508	0.614	0.578

LLD

correlation_fn(df, group='LLD') %>% kable %>% kable_styling() %>%
  pack_rows('correlation values', 1, 3) %>%
  pack_rows('p values', 4, 6)

method	avg_max_seg_1	avg_max_seg_2	avg_max_seg_3	avg_max_seg_4	avg_max_seg_5	avg_max_seg_6
correlation values
Pearson	-0.277	-0.223	-0.166	-0.224	-0.291	-0.100
Spearman	-0.292	-0.207	-0.112	-0.233	-0.252	-0.124
Kendall	-0.196	-0.154	-0.072	-0.202	-0.189	-0.113
p values
Pearson	0.181	0.284	0.428	0.282	0.158	0.635
Spearman	0.157	0.321	0.594	0.262	0.224	0.553
Kendall	0.181	0.290	0.622	0.166	0.196	0.438

NM-MRI 04_sanityChecks

Combined

LLD & HC

Combined

LLD

HC

Combined

LLD

HC

Combined

LLD

HC

Combined

LLD

HC

Segment 1

Segment 2

Segment 3

Segment 4

Segment 5

Segment 6

Segment 1

Segment 2

Segment 3

Segment 4

Segment 5

Segment 6

Segment 1

Segment 2

Segment 3

Segment 4

Segment 5

Segment 6

Segment 1

Segment 2

Segment 3

Segment 4

Segment 5

Segment 6

HC

LLD

NM-MRI
04_sanityChecks