Libraries

library(tidyr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(corrplot)

## corrplot 0.92 loaded

library(openxlsx)

## Warning: package 'openxlsx' was built under R version 4.3.3

library(readxl)

This markdown can be used to wrangle, visualize and analyze data, from OPT-neuro and PACT-MD

OPT-Neuro

Preprocessing

Load data

OPTData <- read_excel("/Users/hassanabdulrasul/Desktop/OPT/ON_DATASET_08302024.xlsx")

# Run any pre-processing steps here that make the data readable - e.g, turn blanks "" into NA

# Replace blank with NA

replace_blank_with_na <- function(x) {
  if (is.character(x)) {
    x <- na_if(x, "")
  }
  return(x)
}

OPTData <- OPTData %>% mutate(across(everything(), ~ replace_blank_with_na(.)))

# Data collected at CU cannot be used - pending REB/ERB approval

OPTData <- OPTData %>% filter(SITE_TXT_data != "CU")

Demographics

We need to organize a dataframe (OPT_demo) that contains all the demographic information we need - and adjust as needed.

# Variables of interest - site, age, gender, years of education, no cognitive disorder, mild cognitive impairment, dementia, and CDRscore  

OPT_demo <- OPTData %>%
  select(ID_complete, SITE_MRI_1_WU_2_UT_3_UP_4_LA_5_CU, AGE, GENDER, ED, NCD_01, MCI_01, DEM_01, CDRSCORE_01) %>%
 
  
  
   mutate(
    SEX = factor(GENDER, levels = c(1, 2), labels = c("Male", "Female")), # Confirm levels with Peter
    SITE = case_when(  # Create a column that simplifies site naming 
      SITE_MRI_1_WU_2_UT_3_UP_4_LA_5_CU == 1 ~ "WU",
      SITE_MRI_1_WU_2_UT_3_UP_4_LA_5_CU == 2 ~ "UT",
      SITE_MRI_1_WU_2_UT_3_UP_4_LA_5_CU == 3 ~ "UP",
      SITE_MRI_1_WU_2_UT_3_UP_4_LA_5_CU == 4 ~ "LA",
      SITE_MRI_1_WU_2_UT_3_UP_4_LA_5_CU == 5 ~ "CU"
    )
  ) %>%
  
  select(ID_complete, SITE, AGE, SEX, ED, NCD_01, MCI_01, DEM_01, CDRSCORE_01) # Simplified column names for clarity

Neuropsych

OPT_np <- OPTData %>%
  select(ID_complete, AIS_01, IMIS_01, LIS_01, MVCIS_01, MDMIS_01, MTOTALIS_01, INDEXSUM_01)

# Attention RBANS index, immediate memory RBANS index, language RBANS index, visuospatial index, delayed memory index, total index score, sum of all scores.

Final dataframe

# We are now going to construct a new data frame OPT_neuro - that combines our variables of interest from the OPT_demo and OPT_np

# Select columns we want from OPT_demo

OPT_neuro <- OPT_demo %>%
  select(1:9) # adjust as needed

# Now we will join the two data frames using left_join. note that we are using ID_complete to match the columns. 

# adjust OPT_np as needed 1 = ID_complete, 2:8 = rest of the dataframe

OPT_neuro <- left_join(OPT_neuro, OPT_np[, c(1, 2:8)] , by = "ID_complete")

We are going to filter out incomplete datasets (NA) from OPT_model along with invalid scores (CDRSCORE_01 > 10 and < 0)

OPT_neuro <- OPT_neuro %>% filter(CDRSCORE_01 <= 10 & CDRSCORE_01 >= 0 & !is.na(CDRSCORE_01))

OPT-Blood

Preprocessing

Load data

# We are specifically importing the second sheet as this has the complete list of biomarker records for each participant - however, these include baseline and some 6m. We will process these 6m records out.

OPTB <- read_excel("/Users/hassanabdulrasul/Desktop/OPT/O-Neuro_SASP_1Notes_2Round_Sep2023.xlsx", sheet = "SASP_O-Neuro_2Round")

## New names:
## • `Sample ID` -> `Sample ID...1`
## • `Sample ID` -> `Sample ID...2`

# Run any pre-processing steps here that make the data readable - e.g, turn blanks "" into NA

# Replace blank with NA

replace_blank_with_na <- function(x) {
  if (is.character(x)) {
    x <- na_if(x, "")
  }
  return(x)
}

OPTB <- OPTB %>% mutate(across(everything(), ~ replace_blank_with_na(.)))

# The second column "Sample ID..2" contains the actual ID. We will rename this to ID_complete to match the naming convention of the OPT_neuro

OPTB <- OPTB %>%
  rename(ID_complete = `Sample ID...2`) 

# Convert the entire Date column from Excel serial numbers to R dates
OPTB <- OPTB %>%
  mutate(Date = as.Date(as.numeric(Date), origin = "1899-12-30"))

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Date = as.Date(as.numeric(Date), origin = "1899-12-30")`.
## Caused by warning in `as.Date()`:
## ! NAs introduced by coercion

We will remove the second record for every duplicate we find - this second record is the 6m. We will arrange by date and keep the first(earlier) date.

OPTB <- OPTB %>%
  arrange(Date) %>%  # Sort by date ascending, oldest first
  group_by(ID_complete) %>%  # Group by the identifier column
  filter(row_number() == 1) %>%  # Keep only the first row in each group, which is the oldest
  ungroup()  


# some dates are NA - upon cross checking with the source excel sheet, these are also 6m. We will filter them out

OPTB <- OPTB %>%
  filter(!is.na(Date))

# IDs that begin with CU are from ColumbiaU, the use of their data is still pending REB/ERB approval. We are going to filter them out.

OPTB <- OPTB %>%
  filter(!grepl("^CU", ID_complete))

UT participants have a different nomenclature. These participants will need to be renamed to match their IDs in OPT-Neuro

OPT Model

Preprocessing

We will now Merge: OPT_neuro and OPTB by ID_complete

OPT_model <- left_join(OPT_neuro, OPTB[, c(2, 4:25)] , by = "ID_complete")

# We are now going to harmonize  to be numeric (majority are integer)

OPT_model <- OPT_model %>%
  mutate(across(3:38, as.numeric))

# add new MTOTALIS standardized column

OPT_model$MTOTALIS <- (OPT_model$MTOTALIS_01 - 100) / 15

# add new column - group which will identify whether a participant is MCI or not

OPT_model$group <- ifelse(OPT_model$MCI_01 == 1, "MDD + MCI", "MDD")

Filter high SD

Identify our variables of interest here

OPT_variables <- c("IL-1 beta/IL-1F2", "TNF RI/TNFRSF1A", "IL-6", "TNF RII/TNFRSF1B" )

OPT_np_variables <- c("AIS_01",      "IMIS_01",     "LIS_01",     "MVCIS_01",    "MDMIS_01",    "MTOTALIS_01", "INDEXSUM_01")

We will now filter out participants that do not have any blood biomarkers.

OPT_model <- OPT_model %>% filter(!is.na('IL-6'))

# Pre-filter reporting
cat("Pre-filter Mean and Standard Deviation of Biomarkers in OPT_model:\n")

## Pre-filter Mean and Standard Deviation of Biomarkers in OPT_model:

for(var in OPT_variables) {
  cat(sprintf("Pre-filter Mean of %s: %.2f\n", var, mean(OPT_model[[var]], na.rm = TRUE)))
  cat(sprintf("Pre-filter SD of %s: %.2f\n", var, sd(OPT_model[[var]], na.rm = TRUE)))
}

## Pre-filter Mean of IL-1 beta/IL-1F2: 6.50
## Pre-filter SD of IL-1 beta/IL-1F2: 5.98
## Pre-filter Mean of TNF RI/TNFRSF1A: 1389.12
## Pre-filter SD of TNF RI/TNFRSF1A: 611.56
## Pre-filter Mean of IL-6: 3.52
## Pre-filter SD of IL-6: 7.74
## Pre-filter Mean of TNF RII/TNFRSF1B: 3486.25
## Pre-filter SD of TNF RII/TNFRSF1B: 1719.72

cat(sprintf("Pre-filter Number of participants: %d\n\n", nrow(OPT_model)))

## Pre-filter Number of participants: 360

# Calculate means and standard deviations for each biomarker and filter the dataframe
OPT_model_filtered <- OPT_model
for(var in OPT_variables) {
  mean_var <- mean(OPT_model[[var]], na.rm = TRUE)
  sd_var <- sd(OPT_model[[var]], na.rm = TRUE)
  
  OPT_model_filtered <- OPT_model_filtered[
    OPT_model_filtered[[var]] > (mean_var - 5 * sd_var) & 
    OPT_model_filtered[[var]] < (mean_var + 5 * sd_var),
  ]
}

# Post-filter reporting
cat("Post-filter Mean and Standard Deviation of Biomarkers in OPT_model:\n")

## Post-filter Mean and Standard Deviation of Biomarkers in OPT_model:

for(var in OPT_variables) {
  cat(sprintf("Post-filter Mean of %s: %.2f\n", var, mean(OPT_model_filtered[[var]], na.rm = TRUE)))
  cat(sprintf("Post-filter SD of %s: %.2f\n", var, sd(OPT_model_filtered[[var]], na.rm = TRUE)))
}

## Post-filter Mean of IL-1 beta/IL-1F2: 6.19
## Post-filter SD of IL-1 beta/IL-1F2: 2.73
## Post-filter Mean of TNF RI/TNFRSF1A: 1374.57
## Post-filter SD of TNF RI/TNFRSF1A: 576.94
## Post-filter Mean of IL-6: 3.10
## Post-filter SD of IL-6: 3.37
## Post-filter Mean of TNF RII/TNFRSF1B: 3414.74
## Post-filter SD of TNF RII/TNFRSF1B: 1441.35

cat(sprintf("Post-filter Number of participants: %d\n\n", nrow(OPT_model_filtered)))

## Post-filter Number of participants: 357

Correlation plot

data_for_correlation_OPT <- OPT_model %>% select(-ID_complete, -SITE, -group)

# Calculate the correlation matrix
OPT_correlation_matrix <- cor(data_for_correlation_OPT, use = "complete.obs", method = "pearson")

# calculate p values 
p_matrix_OPT <- cor.mtest(data_for_correlation_OPT, use = "complete.obs", method = "pearson")

# Use corrplot to visualize the correlations and their significance
corrplot(OPT_correlation_matrix, p.mat = p_matrix_OPT$p, method = "color",
         sig.level = 0.05, type = "lower", order = "hclust",
         tl.cex = 0.6, tl.col = "black", tl.srt = 45,
         col = colorRampPalette(c("blue", "white", "red"))(200),
         addrect = 2)

Correlation plot- filtered

data_for_correlation_OPT_filtered <- OPT_model_filtered %>% select(-ID_complete, -SITE, -group)

# Calculate the correlation matrix
OPT_correlation_matrix_filtered <- cor(data_for_correlation_OPT_filtered, use = "complete.obs", method = "pearson")

# calculate p values 
p_matrix_OPT_filtered <- cor.mtest(OPT_correlation_matrix_filtered, use = "complete.obs", method = "pearson")

# Use corrplot to visualize the correlations and their significance
corrplot(OPT_correlation_matrix_filtered, p.mat = p_matrix_OPT$p, method = "color",
         sig.level = 0.05, type = "lower", order = "hclust",
         tl.cex = 0.6, tl.col = "black", tl.srt = 45,
         col = colorRampPalette(c("blue", "white", "red"))(200),
         addrect = 2)

####export

OPT_correlation_matrix <- as.data.frame(as.table(OPT_correlation_matrix))

OPT_correlation_matrix_filtered <- as.data.frame(as.table(OPT_correlation_matrix_filtered))

write.xlsx(OPT_correlation_matrix, "/Users/hassanabdulrasul/Desktop/OPT/OPT_correlation_matrix.xlsx")

write.xlsx(OPT_correlation_matrix_filtered, "/Users/hassanabdulrasul/Desktop/OPT/OPT_correlation_matrix_filtered.xlsx")

Visualizations

Scatter (standardized) Plot - prefilter

Plots with standardized MTOTALIS

plots <- lapply(OPT_variables, function(var) {
  # Create the plot
  plot <- ggplot(OPT_model, aes(x = MTOTALIS, y = .data[[var]])) +
    geom_point() +  # Add points
    geom_smooth(method = "lm", se = FALSE, color = "blue") +  
    labs(title = paste("Scatter plot of MTOTALIS vs", var),
         x = "MTOTALIS_01", y = var) +
    geom_text(aes(label = sprintf("r = %.2f", cor(MTOTALIS_01, .data[[var]], use = "complete.obs")),
                   x = Inf, y = Inf), hjust = 1.1, vjust = 1.1, color = "red", size = 5)

  return(plot)
})

plots

## [[1]]

## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(MTOTALIS_01, .data[[var]], : All aesthetics have length 1, but the data has 360 rows.
## ℹ Did you mean to use `annotate()`?

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_smooth()`).

## Warning: Removed 36 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## [[2]]

## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(MTOTALIS_01, .data[[var]], : All aesthetics have length 1, but the data has 360 rows.
## ℹ Did you mean to use `annotate()`?

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 36 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## [[3]]

## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(MTOTALIS_01, .data[[var]], : All aesthetics have length 1, but the data has 360 rows.
## ℹ Did you mean to use `annotate()`?

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 36 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## [[4]]

## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(MTOTALIS_01, .data[[var]], : All aesthetics have length 1, but the data has 360 rows.
## ℹ Did you mean to use `annotate()`?

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 36 rows containing missing values or values outside the scale range
## (`geom_point()`).

Scatter (standardized) Plot - Post-filter

plots_2 <- lapply(OPT_variables, function(var) {
  # Create the plot
  plot2 <- ggplot(OPT_model_filtered, aes(x = MTOTALIS, y = .data[[var]])) +
    geom_point() +  # Add points
    geom_smooth(method = "lm", se = FALSE, color = "blue") +  
    labs(title = paste("Scatter plot of MTOTALIS vs", var),
         x = "MTOTALIS_01", y = var) +
    geom_text(aes(label = sprintf("r = %.2f", cor(MTOTALIS_01, .data[[var]], use = "complete.obs")),
                   x = Inf, y = Inf), hjust = 1.1, vjust = 1.1, color = "red", size = 5)

  return(plot2)
})

plots_2

## [[1]]

## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(MTOTALIS_01, .data[[var]], : All aesthetics have length 1, but the data has 357 rows.
## ℹ Did you mean to use `annotate()`?

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_smooth()`).

## Warning: Removed 36 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## [[2]]

## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(MTOTALIS_01, .data[[var]], : All aesthetics have length 1, but the data has 357 rows.
## ℹ Did you mean to use `annotate()`?

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 36 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## [[3]]

## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(MTOTALIS_01, .data[[var]], : All aesthetics have length 1, but the data has 357 rows.
## ℹ Did you mean to use `annotate()`?

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 36 rows containing missing values or values outside the scale range
## (`geom_point()`).

## 
## [[4]]

## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(MTOTALIS_01, .data[[var]], : All aesthetics have length 1, but the data has 357 rows.
## ℹ Did you mean to use `annotate()`?

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 36 rows containing missing values or values outside the scale range
## (`geom_point()`).

Violin Plots (biomarker) - Prefilter

OPT_violin_plots <- lapply(OPT_variables, function(var) {
  ggplot(OPT_model, aes(x = group, y = .data[[var]], fill = group)) +
    geom_violin(trim = FALSE) +
    labs(title = paste("Violin Plot of", var, "by Group"),
         x = "Group",
         y = var) +
    theme_minimal()
})


OPT_violin_plots

## [[1]]

## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

## 
## [[2]]

## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

## 
## [[3]]

## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

## 
## [[4]]

## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

Violin Plots (biomarker) - Postfilter

OPT_violin_plots_post_filter <- lapply(OPT_variables, function(var) {
  ggplot(OPT_model_filtered, aes(x = group, y = .data[[var]], fill = group)) +
    geom_violin(trim = FALSE) +
    labs(title = paste("Violin Plot of", var, "by Group"),
         x = "Group",
         y = var) +
    theme_minimal()
})


OPT_violin_plots_post_filter

## [[1]]

## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

## 
## [[2]]

## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

## 
## [[3]]

## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

## 
## [[4]]

## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

Violin Plots (cog) - Prefilter

OPT_np_violin_plots <- lapply(OPT_np_variables, function(var) {
  ggplot(OPT_model, aes(x = group, y = .data[[var]], fill = group)) +
    geom_violin(trim = FALSE) +
    labs(title = paste("Violin Plot of", var, "by Group"),
         x = "Group",
         y = var) +
    theme_minimal()
})

OPT_np_violin_plots

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

## 
## [[7]]

## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

Violin Plots (cog) - Postfilter

OPT_np_violin_plots_post_filter <- lapply(OPT_np_variables, function(var) {
  ggplot(OPT_model_filtered, aes(x = group, y = .data[[var]], fill = group)) +
    geom_violin(trim = FALSE) +
    labs(title = paste("Violin Plot of", var, "by Group"),
         x = "Group",
         y = var) +
    theme_minimal()
})

OPT_np_violin_plots_post_filter

## [[1]]

## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

## 
## [[2]]

## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

## 
## [[3]]

## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

## 
## [[4]]

## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

## 
## [[5]]

## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

## 
## [[6]]

## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

## 
## [[7]]

## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

Histogram plots

OPT_histogram_plots <- lapply(OPT_variables, function(var) {
  ggplot(OPT_model, aes(x = .data[[var]], fill = group)) +
    geom_histogram(position = "dodge", bins = 30, alpha = 0.7) +  
    
    #facet_wrap(~group, scales = "free") +  
    
    labs(title = paste("Histogram of", var, "by Group Pre Filter"),
         x = var,
         y = "Count") +
    theme_minimal() +
    scale_fill_manual(values = c("MDD" = "blue", "MDD + MCI" = "red"))  # Set colors for each group
})


OPT_histogram_plots

## [[1]]

## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_bin()`).

## 
## [[2]]

## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_bin()`).

## 
## [[3]]

## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_bin()`).

## 
## [[4]]

## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_bin()`).

OPT_histogram_plots_post_filter <- lapply(OPT_variables, function(var) {
  ggplot(OPT_model_filtered, aes(x = .data[[var]], fill = group)) +
    geom_histogram(position = "dodge", bins = 30, alpha = 0.7) +  
    
    #facet_wrap(~group, scales = "free") +  
    
    labs(title = paste("Histogram of", var, "by Group Post Filter"),
         x = var,
         y = "Count") +
    theme_minimal() +
    scale_fill_manual(values = c("MDD" = "blue", "MDD + MCI" = "red"))  # Set colors for each group
})

OPT_histogram_plots_post_filter

## [[1]]

## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_bin()`).

## 
## [[2]]

## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_bin()`).

## 
## [[3]]

## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_bin()`).

## 
## [[4]]

## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_bin()`).

Regression - prefilter

model_IL1beta <- lm(MTOTALIS ~ `IL-1 beta/IL-1F2` + AGE + SEX, data = OPT_model)
summary(model_IL1beta)

## 
## Call:
## lm(formula = MTOTALIS ~ `IL-1 beta/IL-1F2` + AGE + SEX, data = OPT_model)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.69745 -0.63401  0.03453  0.61474  3.10105 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)         0.082051   0.597158   0.137   0.8908  
## `IL-1 beta/IL-1F2` -0.013366   0.008193  -1.631   0.1038  
## AGE                -0.007588   0.008323  -0.912   0.3626  
## SEX                 0.219779   0.103906   2.115   0.0352 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8768 on 320 degrees of freedom
##   (36 observations deleted due to missingness)
## Multiple R-squared:  0.02253,    Adjusted R-squared:  0.01337 
## F-statistic: 2.459 on 3 and 320 DF,  p-value: 0.06281

model_TNFRIa <- lm(MTOTALIS ~ `TNF RI/TNFRSF1A` + AGE + SEX, data = OPT_model)
summary(model_TNFRIa)

## 
## Call:
## lm(formula = MTOTALIS ~ `TNF RI/TNFRSF1A` + AGE + SEX, data = OPT_model)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6481 -0.5981  0.0578  0.6034  3.1633 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        1.978e-01  5.996e-01   0.330   0.7418  
## `TNF RI/TNFRSF1A` -1.654e-04  7.963e-05  -2.077   0.0386 *
## AGE               -6.878e-03  8.315e-03  -0.827   0.4087  
## SEX                2.070e-01  1.031e-01   2.008   0.0455 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8745 on 320 degrees of freedom
##   (36 observations deleted due to missingness)
## Multiple R-squared:  0.02751,    Adjusted R-squared:  0.01839 
## F-statistic: 3.017 on 3 and 320 DF,  p-value: 0.0301

model_TNFRIb <- lm(MTOTALIS ~ `TNF RII/TNFRSF1B` + AGE + SEX, data = OPT_model)
summary(model_TNFRIb)

## 
## Call:
## lm(formula = MTOTALIS ~ `TNF RII/TNFRSF1B` + AGE + SEX, data = OPT_model)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6451 -0.6269  0.0386  0.5942  3.1246 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)  
## (Intercept)         1.846e-01  6.048e-01   0.305   0.7604  
## `TNF RII/TNFRSF1B` -4.076e-05  2.837e-05  -1.437   0.1517  
## AGE                -7.732e-03  8.330e-03  -0.928   0.3540  
## SEX                 1.974e-01  1.035e-01   1.908   0.0573 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8776 on 320 degrees of freedom
##   (36 observations deleted due to missingness)
## Multiple R-squared:  0.02072,    Adjusted R-squared:  0.01154 
## F-statistic: 2.257 on 3 and 320 DF,  p-value: 0.0817

model_IL6 <- lm(MTOTALIS ~ `IL-6` + AGE + SEX, data = OPT_model)
summary(model_IL6)

## 
## Call:
## lm(formula = MTOTALIS ~ `IL-6` + AGE + SEX, data = OPT_model)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.62133 -0.64230  0.02244  0.60405  3.10190 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.102924   0.594573   0.173   0.8627  
## `IL-6`      -0.014809   0.006269  -2.362   0.0188 *
## AGE         -0.008228   0.008284  -0.993   0.3214  
## SEX          0.212746   0.102965   2.066   0.0396 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8728 on 320 degrees of freedom
##   (36 observations deleted due to missingness)
## Multiple R-squared:  0.03129,    Adjusted R-squared:  0.02221 
## F-statistic: 3.446 on 3 and 320 DF,  p-value: 0.01702

Regression - postfilter

model_IL1beta_pf <- lm(MTOTALIS ~ `IL-1 beta/IL-1F2` + AGE + SEX, data = OPT_model_filtered)
summary(model_IL1beta_pf)

## 
## Call:
## lm(formula = MTOTALIS ~ `IL-1 beta/IL-1F2` + AGE + SEX, data = OPT_model_filtered)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.69807 -0.63439  0.03131  0.61250  3.09894 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)         0.085360   0.600723   0.142   0.8871  
## `IL-1 beta/IL-1F2` -0.013564   0.018360  -0.739   0.4606  
## AGE                -0.007559   0.008422  -0.898   0.3701  
## SEX                 0.217614   0.105670   2.059   0.0403 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8785 on 317 degrees of freedom
##   (36 observations deleted due to missingness)
## Multiple R-squared:  0.01678,    Adjusted R-squared:  0.007475 
## F-statistic: 1.803 on 3 and 317 DF,  p-value: 0.1464

model_TNFRIa_pf <- lm(MTOTALIS ~ `TNF RI/TNFRSF1A` + AGE + SEX, data = OPT_model_filtered)
summary(model_TNFRIa_pf)

## 
## Call:
## lm(formula = MTOTALIS ~ `TNF RI/TNFRSF1A` + AGE + SEX, data = OPT_model_filtered)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6545 -0.5930  0.0673  0.5988  3.1702 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        1.945e-01  6.001e-01   0.324   0.7461  
## `TNF RI/TNFRSF1A` -1.720e-04  8.501e-05  -2.023   0.0439 *
## AGE               -6.953e-03  8.336e-03  -0.834   0.4048  
## SEX                2.188e-01  1.039e-01   2.106   0.0360 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8736 on 317 degrees of freedom
##   (36 observations deleted due to missingness)
## Multiple R-squared:  0.02764,    Adjusted R-squared:  0.01844 
## F-statistic: 3.004 on 3 and 317 DF,  p-value: 0.03064

model_TNFRIb_pf <- lm(MTOTALIS ~ `TNF RII/TNFRSF1B` + AGE + SEX, data = OPT_model_filtered)
summary(model_TNFRIb_pf)

## 
## Call:
## lm(formula = MTOTALIS ~ `TNF RII/TNFRSF1B` + AGE + SEX, data = OPT_model_filtered)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.65271 -0.61837  0.05012  0.59136  3.13321 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)  
## (Intercept)         1.846e-01  6.046e-01   0.305   0.7603  
## `TNF RII/TNFRSF1B` -4.636e-05  3.402e-05  -1.363   0.1739  
## AGE                -7.734e-03  8.349e-03  -0.926   0.3550  
## SEX                 2.099e-01  1.041e-01   2.016   0.0446 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8766 on 317 degrees of freedom
##   (36 observations deleted due to missingness)
## Multiple R-squared:  0.02083,    Adjusted R-squared:  0.01156 
## F-statistic: 2.247 on 3 and 317 DF,  p-value: 0.08276

model_IL6_pf <- lm(MTOTALIS ~ `IL-6` + AGE + SEX, data = OPT_model_filtered)
summary(model_IL6_pf)

## 
## Call:
## lm(formula = MTOTALIS ~ `IL-6` + AGE + SEX, data = OPT_model_filtered)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.59596 -0.57579  0.02356  0.59930  3.08999 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.113414   0.595900   0.190   0.8492  
## `IL-6`      -0.034770   0.014443  -2.407   0.0166 *
## AGE         -0.007523   0.008293  -0.907   0.3650  
## SEX          0.213571   0.103467   2.064   0.0398 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8713 on 317 degrees of freedom
##   (36 observations deleted due to missingness)
## Multiple R-squared:  0.03277,    Adjusted R-squared:  0.02362 
## F-statistic:  3.58 on 3 and 317 DF,  p-value: 0.01424

PACtMD

PACtMD is split up into many sheets. We will be importing the relevant sheets individually and then create a merged dataframe - akin to OPT_model

a group column will also be made as we process the data to ensure we know which participant id is case or control. this will be coded as follows. this information will be obtained using class_enrollment from the Enrollment sheet

case = group == 1 control = group == 2

Demographics

PAC_demo <- read_excel("/Users/hassanabdulrasul/Desktop/OPT/PACtMD T0 Clinical Case.xlsx", sheet = "Demo")

PAC_demo <- PAC_demo %>% mutate(across(everything(), ~ replace_blank_with_na(.)))

To make things simpler, we are going to just work with the main dataframe of imports - given the multiple sheets.

PAC_demo will contain both control and case

We need to remove the columns we don’t care about and also make a new column - age - using demo_dob and demo_date

# Age - rounded to nearest whole number

PAC_demo <- PAC_demo %>%
  mutate(
    demo_dob = as.Date(demo_dob, format = "%Y-%m-%d"), 
    demo_date = as.Date(demo_date, format = "%Y-%m-%d"),  
    age = round(interval(start = demo_dob, end = demo_date) / years(1), digits = 0)
  )

# rename demo_id to record_id and remove the suffix

PAC_demo <- PAC_demo %>%
  mutate(record_id = gsub("_01$", "", demo_id)) %>%
  select(-demo_id) 

#  select columns we care about - demo_gender and age and demo_id - and begin filling up the PAC_demo df

PAC_demo <- PAC_demo %>%
  select(record_id, demo_gender, age)

# filter out the incomplete rows

PAC_demo <- PAC_demo %>% filter(!is.na(demo_gender))

Group

# Reload the excel sheet again but only the enrollment sheet

class_enrollment <- read_excel("/Users/hassanabdulrasul/Desktop/OPT/PACtMD T0 Clinical Case.xlsx", 
                               sheet = "Enrollment") 

#  rename the class_enrollment to group 

class_enrollment <- class_enrollment %>%
   rename(group = class_enrollment)

# merge the the class_enrollment dataframe with group information to PAC_demo

PAC_demo <- PAC_demo %>%
  left_join(class_enrollment, by = "record_id")

# select columns we care about

PAC_demo <- PAC_demo %>%
  select(record_id, group, demo_gender, age)

# remove class_enrolment

rm(class_enrollment)

Case assignment

The case population has 3 criterias. remitted Major Depressive Disorder (rMDD), Mild Cognitive impairment and rmDD, MCI only

We will first load up variables we will need into the PAC_demo to allow us to determine this

# load up consensus notes - that determine whether a participant met threshold for MCI

PAC_consensus <- read_excel("/Users/hassanabdulrasul/Desktop/OPT/PACtMD T0 Clinical Case.xlsx", sheet = "Consensus") %>%
  mutate(across(everything(), ~ replace_blank_with_na(.))) %>%
  mutate(record_id = gsub("_01$", "", cc_par_id)) %>%
  select(record_id, cc_mci_cc_mci, cc_mci_psych_dx) 

# group assignment?

PAC_p_dx <- read_excel("/Users/hassanabdulrasul/Desktop/OPT/PACtMD T0 Clinical Case.xlsx", sheet = "Provisional Dx")   %>%
  mutate(across(everything(), ~ replace_blank_with_na(.)))

PAC_p_dx <- PAC_p_dx %>%
  mutate(record_id = gsub("_01$", "", provisional_id)) %>%
  select(-provisional_id) 

# Montgomery–Åsberg Depression Rating Scale

PAC_madrs <- read_excel("/Users/hassanabdulrasul/Desktop/OPT/PACtMD T0 Clinical Case.xlsx", sheet = "MADRS") %>%
  mutate(across(everything(), ~ replace_blank_with_na(.))) %>%
  mutate(record_id = gsub("_01$", "", madrs_id)) %>%
  select(record_id, madrs_madrs_total_calc)


# CDR rating

PAC_cdr <- read_excel("/Users/hassanabdulrasul/Desktop/OPT/PACtMD T0 Clinical Case.xlsx", sheet = "CDR") %>%
  mutate(across(everything(), ~ replace_blank_with_na(.))) %>%
  mutate(record_id = gsub("_01$", "", cdr_id)) %>%
  select(record_id, cdr_rating)

# Join the resepctive dfs

PAC_demo <- PAC_demo %>%
  left_join(PAC_consensus, by = "record_id") %>%
  left_join(PAC_p_dx, by = "record_id") %>%
  left_join(PAC_madrs, by = "record_id") %>%
  left_join(PAC_cdr, by = "record_id")

Now we will make a new variable - case - which will either be rMDD, rMDD + MCI, MCI

its a bit ambigious so we’re going to use provisional_dx and group - if group = 2 then whatever number provisional_dx is will be 4. this way we have 3 case groups and one HC group

PAC_demo <- PAC_demo %>%
  mutate(provisional_dx = if_else(group == 2, 4, provisional_dx))

Neuropsych

We will load up and process both case(PAC_np_case) and control(PAC_np_control) separately and then merge them to one final data frame - PAC_np

# Load up the NP case excel sheet - specifically the COMPOSITE scores

PAC_np_case <- read_excel("/Users/hassanabdulrasul/Desktop/OPT/PACtMD T0 NP Case.xlsx", 
                               sheet = "COMPOSITE") 

# Load up the NP control excel sheet - specifically the COMPOSITE scores

PAC_np_control <- read_excel("/Users/hassanabdulrasul/Desktop/OPT/PACtMD T0 NP Control.xlsx", 
                               sheet = "Composite")

Case

What we need to do - rename ID to record_id (this will allow us to merge), filter out empty scores

# rename ID to record_id and remove the suffix

PAC_np_case <- PAC_np_case %>%
  mutate(record_id = gsub("_01$", "", ID)) %>%
  select(-ID) 

# filter out the incomplete rows

PAC_np_case <- PAC_np_case %>% filter(!is.na(VerbalMemoryDomainT0))

Control

# rename ID to record_id and remove the suffix

PAC_np_control <- PAC_np_control %>%
  mutate(record_id = gsub("_01$", "", ID)) %>%
  select(-ID) 

# filter out the incomplete rows

PAC_np_control <- PAC_np_control %>% filter(!is.na(VerbMemComp))

### REMOVE INVALID CONTROL  PAC01_CMH_3060006 (NOTE : Do not use for analysis. Data no valid due to English language difficulties. Participant ineligible.)

PAC_np_control <- PAC_np_control %>%
  filter(record_id != "PAC01_CMH_3060006")

Merge

We will now merge the two but of course they’re not the same column names. we will first change the column names of PAC_np_control to match PAC_np_case

# Rename columns from control to match case

PAC_np_control <- PAC_np_control %>%
  rename(
    VerbalMemoryDomainT0 = VerbMemComp, 
    VisuospatialMemoryDomainT0 = VisuoMemComp,
    ProcessingSpeedDomainT0 = ProccSpeedComp,
    LanguageDomainT0 = LanguageComp,
    ExecutiveDomainT0 = ExecutiveComp,
    WorkingMemoryDomainT0 = WorKMemComp,
    OverallCompositeT0 = OverallComp
    
  )

# Merge to form final PAC_np

PAC_np <- bind_rows(PAC_np_case, PAC_np_control) %>% arrange(record_id)

#  reorder

PAC_np <- PAC_np %>%
  select(record_id, VerbalMemoryDomainT0, VisuospatialMemoryDomainT0, ProcessingSpeedDomainT0,LanguageDomainT0,ExecutiveDomainT0,WorkingMemoryDomainT0,OverallCompositeT0)

Final dataframe

# We are now going to construct a new data frame PACTMD - that combines our variables of interest from the PAC_demo and PAC_np

# Select columns we want from PAC_demo

PACTMD <- PAC_demo

# Now we will join the two data frames using left_join. note that we are using record_id to match the columns. 

# adjust OPT_np as needed 1 = record_id, 2:8 = rest of the dataframe

PACTMD <- left_join(PACTMD, PAC_np[, c(1, 2:8)] , by = "record_id")

Biomarkers

Now we will begin processing and preparing the biomarker dataframe (PAC_bio)

PAC_bio <- read_excel("/Users/hassanabdulrasul/Desktop/OPT/PACt-MD_Final_Jan2023.xlsx", sheet = "PACt-MD_SendFile")

## New names:
## • `pg/mL` -> `pg/mL...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...22`
## • `` -> `...23`
## • `` -> `...24`
## • `` -> `...25`
## • `` -> `...26`
## • `` -> `...27`
## • `` -> `...28`
## • `` -> `...29`
## • `` -> `...30`
## • `` -> `...31`
## • `` -> `...32`
## • `` -> `...33`
## • `` -> `...34`
## • `` -> `...35`
## • `` -> `...36`
## • `` -> `...37`
## • `` -> `...38`
## • `` -> `...39`
## • `` -> `...40`
## • `` -> `...41`
## • `` -> `...42`
## • `` -> `...43`
## • `` -> `...44`
## • `` -> `...45`
## • `` -> `...46`
## • `` -> `...47`
## • `` -> `...48`
## • `` -> `...49`
## • `` -> `...50`
## • `` -> `...51`
## • `` -> `...52`
## • `` -> `...53`
## • `` -> `...54`
## • `` -> `...55`
## • `` -> `...56`
## • `` -> `...57`
## • `pg/mL` -> `pg/mL...60`
## • `` -> `...61`
## • `` -> `...62`
## • `` -> `...63`
## • `` -> `...64`
## • `` -> `...65`
## • `` -> `...66`
## • `` -> `...67`
## • `` -> `...68`
## • `` -> `...69`
## • `` -> `...70`
## • `` -> `...71`
## • `` -> `...72`
## • `` -> `...73`
## • `` -> `...74`
## • `` -> `...75`
## • `` -> `...76`
## • `` -> `...77`
## • `` -> `...78`
## • `` -> `...79`
## • `` -> `...80`

PAC_bio <- PAC_bio %>% mutate(across(everything(), ~ replace_blank_with_na(.)))

The excel sheet is formatted heavily which is impacting the way the data is being imported to R. Specifically, the actual headers for columns 5-80 are in the first row.

We will adjust this first

# Set the first row as the column names using whats written in the first row between 5:80

colnames(PAC_bio)[5:ncol(PAC_bio)] <- as.character(PAC_bio[1, 5:ncol(PAC_bio)])

# Now remove the first row since it's set as the header
PAC_bio <- PAC_bio[-1, ]

PAC_bio doesn’t have a record_id column instead it has Participant Number. Moreover, the participant number is just the numbers at the end of PAC01_CMH_XXXXX. We will now add this prefix (PAC01_CMH_) to the number and then change the column name to record_id

PAC_bio <- PAC_bio %>%
  mutate(record_id = paste0("PAC01_CMH_", `Participant Number`)) %>%
  select(-`Participant Number`)

PAC Model

Preprocessing

We will now Merge: PAC_np and PAC_bio by record_id

PAC_model <- left_join(PACTMD, PAC_bio[, c(4:80)] , by = "record_id")

# We are now going to harmonize  to be numeric (majority are integer)

PAC_model <- PAC_model %>%
  mutate(across(2:92, as.numeric))

Identify variabels of interest here

PAC_variables <- c("TNF-alpha", "IL-1beta/IL-1F2", "IL-6", "TNFRI/TNFRSF1A","TNFRII/TNFRSF1B")

PAC_np_variables <- c( "VerbalMemoryDomainT0" ,"VisuospatialMemoryDomainT0", "ProcessingSpeedDomainT0", "LanguageDomainT0", "ExecutiveDomainT0","WorkingMemoryDomainT0", "OverallCompositeT0" )

Filter High SD

We will now filter out participants that do not have a complete dataset we will us na.omit for this

PAC_model <- na.omit(PAC_model)

# Pre-filter reporting
cat("Pre-filter Mean and Standard Deviation of Biomarkers in PAC_model:\n")

## Pre-filter Mean and Standard Deviation of Biomarkers in PAC_model:

for(var in PAC_variables) {
  cat(sprintf("Pre-filter Mean of %s: %.2f\n", var, mean(PAC_model[[var]], na.rm = TRUE)))
  cat(sprintf("Pre-filter SD of %s: %.2f\n", var, sd(PAC_model[[var]], na.rm = TRUE)))
}

## Pre-filter Mean of TNF-alpha: 44.45
## Pre-filter SD of TNF-alpha: 10.89
## Pre-filter Mean of IL-1beta/IL-1F2: 6.47
## Pre-filter SD of IL-1beta/IL-1F2: 1.46
## Pre-filter Mean of IL-6: 2.90
## Pre-filter SD of IL-6: 9.01
## Pre-filter Mean of TNFRI/TNFRSF1A: 1064.58
## Pre-filter SD of TNFRI/TNFRSF1A: 399.64
## Pre-filter Mean of TNFRII/TNFRSF1B: 2774.44
## Pre-filter SD of TNFRII/TNFRSF1B: 1327.45

cat(sprintf("Pre-filter Number of participants: %d\n\n", nrow(PAC_model)))

## Pre-filter Number of participants: 380

# Calculate means and standard deviations for each biomarker and filter the dataframe
PAC_model_filtered <- PAC_model
for(var in PAC_variables) {
  mean_var <- mean(PAC_model[[var]], na.rm = TRUE)
  sd_var <- sd(PAC_model[[var]], na.rm = TRUE)
  
  PAC_model_filtered <- PAC_model_filtered[
    PAC_model_filtered[[var]] > (mean_var - 5 * sd_var) & 
    PAC_model_filtered[[var]] < (mean_var + 5 * sd_var),
  ]
}

# Post-filter reporting
cat("Post-filter Mean and Standard Deviation of Biomarkers in PAC_model:\n")

## Post-filter Mean and Standard Deviation of Biomarkers in PAC_model:

for(var in PAC_variables) {
  cat(sprintf("Post-filter Mean of %s: %.2f\n", var, mean(PAC_model_filtered[[var]], na.rm = TRUE)))
  cat(sprintf("Post-filter SD of %s: %.2f\n", var, sd(PAC_model_filtered[[var]], na.rm = TRUE)))
}

## Post-filter Mean of TNF-alpha: 43.99
## Post-filter SD of TNF-alpha: 10.02
## Post-filter Mean of IL-1beta/IL-1F2: 6.47
## Post-filter SD of IL-1beta/IL-1F2: 1.46
## Post-filter Mean of IL-6: 2.16
## Post-filter SD of IL-6: 2.30
## Post-filter Mean of TNFRI/TNFRSF1A: 1063.29
## Post-filter SD of TNFRI/TNFRSF1A: 390.16
## Post-filter Mean of TNFRII/TNFRSF1B: 2748.80
## Post-filter SD of TNFRII/TNFRSF1B: 1151.51

cat(sprintf("Post-filter Number of participants: %d\n\n", nrow(PAC_model_filtered)))

## Post-filter Number of participants: 375

Correlation plot

data_for_correlation_PAC <- PAC_model %>% select(-record_id, -group)

data_for_correlation_PAC <- data_for_correlation_PAC %>%
  mutate(across(where(is.character), ~as.numeric(as.character(.))))

# Calculate the correlation matrix
PAC_correlation_matrix <- cor(data_for_correlation_PAC, use = "complete.obs", method = "pearson")

# calculate p values 
p_matrix_pac <- cor.mtest(data_for_correlation_PAC, use = "complete.obs", method = "pearson")

# Use corrplot to visualize the correlations and their significance
corrplot(PAC_correlation_matrix, p.mat = p_matrix_pac$p, method = "color",
         sig.level = 0.05, type = "lower", order = "hclust",
         tl.cex = 0.6, tl.col = "black", tl.srt = 45,
         col = colorRampPalette(c("blue", "white", "red"))(200),
         addrect = 2)

Correlation plot-filtered

data_for_correlation_PAC_filtered <- PAC_model_filtered %>% select(-record_id, -group)

data_for_correlation_PAC_filtered <- data_for_correlation_PAC_filtered %>%
  mutate(across(where(is.character), ~as.numeric(as.character(.))))

# Calculate the correlation matrix
PAC_correlation_matrix_filtered <- cor(data_for_correlation_PAC_filtered, use = "complete.obs", method = "pearson")

# calculate p values 
p_matrix_pac_filtered <- cor.mtest(data_for_correlation_PAC_filtered, use = "complete.obs", method = "pearson")

# Use corrplot to visualize the correlations and their significance
corrplot(PAC_correlation_matrix_filtered, p.mat = p_matrix_pac$p, method = "color",
         sig.level = 0.05, type = "lower", order = "hclust",
         tl.cex = 0.6, tl.col = "black", tl.srt = 45,
         col = colorRampPalette(c("blue", "white", "red"))(200),
         addrect = 2)

export

PAC_correlation_matrix <- as.data.frame(as.table(PAC_correlation_matrix))

PAC_correlation_matrix_filtered <- as.data.frame(as.table(PAC_correlation_matrix_filtered))

write.xlsx(PAC_correlation_matrix, "/Users/hassanabdulrasul/Desktop/OPT/PAC_correlation_matrix.xlsx")

write.xlsx(PAC_correlation_matrix_filtered, "/Users/hassanabdulrasul/Desktop/OPT/PAC_correlation_matrix_filtered.xlsx")

Visualizations

Scatter plot : Prefilter

scatter plot for OverallCompositeT0 vs “TNF-alpha”, “IL-1beta/IL-1F2”, “IL-6”, “TNFRI/TNFRSF1A”,“TNFRII/TNFRSF1B”

# adapted from stack overflow

PAC_plots <- lapply(PAC_variables, function(var) {
  # Create the plot
  PAC_plot <- ggplot(data_for_correlation_PAC, aes(x = OverallCompositeT0, y = .data[[var]])) +
    geom_point() +  # Add points
    geom_smooth(method = "lm", se = FALSE, color = "blue") +  
    labs(title = paste("Scatter plot of Overall Composite vs", var),
         x = "OverallCompositeT0", y = var) +
    geom_text(aes(label = sprintf("r = %.2f", cor(OverallCompositeT0, .data[[var]], use = "complete.obs")),
                   x = Inf, y = Inf), hjust = 1.1, vjust = 1.1, color = "red", size = 5)

  return(PAC_plot)
})

PAC_plots

## [[1]]

## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(OverallCompositeT0, : All aesthetics have length 1, but the data has 380 rows.
## ℹ Did you mean to use `annotate()`?

## `geom_smooth()` using formula = 'y ~ x'

## 
## [[2]]

## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(OverallCompositeT0, : All aesthetics have length 1, but the data has 380 rows.
## ℹ Did you mean to use `annotate()`?

## `geom_smooth()` using formula = 'y ~ x'

## 
## [[3]]

## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(OverallCompositeT0, : All aesthetics have length 1, but the data has 380 rows.
## ℹ Did you mean to use `annotate()`?

## `geom_smooth()` using formula = 'y ~ x'

## 
## [[4]]

## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(OverallCompositeT0, : All aesthetics have length 1, but the data has 380 rows.
## ℹ Did you mean to use `annotate()`?

## `geom_smooth()` using formula = 'y ~ x'

## 
## [[5]]

## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(OverallCompositeT0, : All aesthetics have length 1, but the data has 380 rows.
## ℹ Did you mean to use `annotate()`?

## `geom_smooth()` using formula = 'y ~ x'

Scatter plot : Postfilter

scatter plot for OverallCompositeT0 vs “TNF-alpha”, “IL-1beta/IL-1F2”, “IL-6”, “TNFRI/TNFRSF1A”,“TNFRII/TNFRSF1B”

# adapted from stack overflow

PAC_plots_filtered <- lapply(PAC_variables, function(var) {
  # Create the plot
  PAC_plot_filtered <- ggplot(data_for_correlation_PAC_filtered, aes(x = OverallCompositeT0, y = .data[[var]])) +
    geom_point() +  # Add points
    geom_smooth(method = "lm", se = FALSE, color = "blue") +  
    labs(title = paste("Scatter plot of Overall Composite vs", var),
         x = "OverallCompositeT0", y = var) +
    geom_text(aes(label = sprintf("r = %.2f", cor(OverallCompositeT0, .data[[var]], use = "complete.obs")),
                   x = Inf, y = Inf), hjust = 1.1, vjust = 1.1, color = "red", size = 5)

  return(PAC_plot_filtered)
})

PAC_plots_filtered

## [[1]]

## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(OverallCompositeT0, : All aesthetics have length 1, but the data has 375 rows.
## ℹ Did you mean to use `annotate()`?

## `geom_smooth()` using formula = 'y ~ x'

## 
## [[2]]

## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(OverallCompositeT0, : All aesthetics have length 1, but the data has 375 rows.
## ℹ Did you mean to use `annotate()`?

## `geom_smooth()` using formula = 'y ~ x'

## 
## [[3]]

## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(OverallCompositeT0, : All aesthetics have length 1, but the data has 375 rows.
## ℹ Did you mean to use `annotate()`?

## `geom_smooth()` using formula = 'y ~ x'

## 
## [[4]]

## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(OverallCompositeT0, : All aesthetics have length 1, but the data has 375 rows.
## ℹ Did you mean to use `annotate()`?

## `geom_smooth()` using formula = 'y ~ x'

## 
## [[5]]

## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(OverallCompositeT0, : All aesthetics have length 1, but the data has 375 rows.
## ℹ Did you mean to use `annotate()`?

## `geom_smooth()` using formula = 'y ~ x'

Violin plot (group) : Prefilter

PAC_violin_np_plots <- lapply(PAC_np_variables, function(var) {
  ggplot(data_for_correlation_PAC, aes(x = provisional_dx, y = .data[[var]], fill = factor(provisional_dx))) +
    geom_violin(trim = FALSE) +  # Displays the full range of data distribution
    labs(title = paste("Violin Plot of", var, "by Provisional Diagnosis Group"),
         x = "Provisional Diagnosis Group",
         y = var) +
    theme_minimal() +
    scale_fill_brewer(palette = "Set1")  # Adds a color palette suitable for categorical data
})
# Display the plots
PAC_violin_np_plots

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

Histogram (group) : Prefilter

PAC_histogram_plots <- lapply(PAC_variables, function(var) {
  ggplot(data_for_correlation_PAC, aes(x = .data[[var]], fill = factor(provisional_dx))) +
    geom_histogram(bins = 30, position = "dodge", alpha = 0.7) +  
    
    #facet_wrap(~provisional_dx, scales = "free") +  
    
    labs(title = paste("Histogram of", var, "by Provisional Diagnosis Group"),
         x = var,
         y = "Count") +
    theme_minimal() +
    scale_fill_brewer(palette = "Set1")  
})

PAC_histogram_plots

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

Violin plot (group) : Postfilter

PAC_violin_np_plots_postfilter <- lapply(PAC_np_variables, function(var) {
  ggplot(data_for_correlation_PAC_filtered, aes(x = provisional_dx, y = .data[[var]], fill = factor(provisional_dx))) +
    geom_violin(trim = FALSE) +  # Displays the full range of data distribution
    labs(title = paste("Violin Plot of", var, "by Provisional Diagnosis (filterd) Group"),
         x = "Provisional Diagnosis Group",
         y = var) +
    theme_minimal() +
    scale_fill_brewer(palette = "Set1")  # Adds a color palette suitable for categorical data
})
# Display the plots
PAC_violin_np_plots_postfilter

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

Histogram (group) : Postfilter

PAC_histogram_plots_filtered <- lapply(PAC_variables, function(var) {
  ggplot(data_for_correlation_PAC_filtered, aes(x = .data[[var]], fill = factor(provisional_dx))) +
    geom_histogram(bins = 30, position = "dodge", alpha = 0.7) +  
    
    #facet_wrap(~provisional_dx, scales = "free") +  
    
    labs(title = paste("Histogram of", var, "by Provisional Diagnosis Group"),
         x = var,
         y = "Count") +
    theme_minimal() +
    scale_fill_brewer(palette = "Set1")  
})

PAC_histogram_plots_filtered

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

Regression - prefilter

PAC_model_IL1beta <- lm(OverallCompositeT0 ~ `IL-1beta/IL-1F2` + age + demo_gender, data = PAC_model)
summary(PAC_model_IL1beta)

## 
## Call:
## lm(formula = OverallCompositeT0 ~ `IL-1beta/IL-1F2` + age + demo_gender, 
##     data = PAC_model)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.54452 -0.42662  0.06964  0.54713  1.70192 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        2.406212   0.492851   4.882 1.55e-06 ***
## `IL-1beta/IL-1F2` -0.057404   0.026332  -2.180   0.0299 *  
## age               -0.040334   0.006113  -6.598 1.42e-10 ***
## demo_gender        0.061848   0.078757   0.785   0.4328    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7458 on 376 degrees of freedom
## Multiple R-squared:  0.1218, Adjusted R-squared:  0.1148 
## F-statistic: 17.38 on 3 and 376 DF,  p-value: 1.377e-10

PAC_model_TNF<- lm(OverallCompositeT0 ~ `TNF-alpha` + age + demo_gender, data = PAC_model)
summary(PAC_model_TNF)

## 
## Call:
## lm(formula = OverallCompositeT0 ~ `TNF-alpha` + age + demo_gender, 
##     data = PAC_model)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4318 -0.4166  0.1075  0.5665  1.7127 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.242728   0.486931   4.606 5.63e-06 ***
## `TNF-alpha` -0.004854   0.003551  -1.367    0.172    
## age         -0.040364   0.006146  -6.567 1.70e-10 ***
## demo_gender  0.068006   0.078987   0.861    0.390    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7487 on 376 degrees of freedom
## Multiple R-squared:  0.1151, Adjusted R-squared:  0.108 
## F-statistic:  16.3 on 3 and 376 DF,  p-value: 5.594e-10

PAC_model_TNFRIa <- lm(OverallCompositeT0 ~ `TNFRI/TNFRSF1A` + age + demo_gender, data = PAC_model)
summary(PAC_model_TNFRIa)

## 
## Call:
## lm(formula = OverallCompositeT0 ~ `TNFRI/TNFRSF1A` + age + demo_gender, 
##     data = PAC_model)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5041 -0.4156  0.1204  0.5801  1.6419 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       2.061e+00  4.698e-01   4.386 1.50e-05 ***
## `TNFRI/TNFRSF1A` -2.706e-05  1.007e-04  -0.269    0.788    
## age              -4.054e-02  6.378e-03  -6.357 5.96e-10 ***
## demo_gender       7.293e-02  7.930e-02   0.920    0.358    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7504 on 376 degrees of freedom
## Multiple R-squared:  0.1108, Adjusted R-squared:  0.1038 
## F-statistic: 15.63 on 3 and 376 DF,  p-value: 1.346e-09

PAC_model_TNFRIb <- lm(OverallCompositeT0 ~ `TNFRII/TNFRSF1B` + age + demo_gender, data = PAC_model)
summary(PAC_model_TNFRIb)

## 
## Call:
## lm(formula = OverallCompositeT0 ~ `TNFRII/TNFRSF1B` + age + demo_gender, 
##     data = PAC_model)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4760 -0.4170  0.1089  0.5630  1.6878 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        2.057e+00  4.693e-01   4.383 1.52e-05 ***
## `TNFRII/TNFRSF1B`  2.974e-05  2.967e-05   1.002    0.317    
## age               -4.221e-02  6.252e-03  -6.752 5.55e-11 ***
## demo_gender        8.066e-02  7.915e-02   1.019    0.309    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7495 on 376 degrees of freedom
## Multiple R-squared:  0.113,  Adjusted R-squared:  0.106 
## F-statistic: 15.97 on 3 and 376 DF,  p-value: 8.532e-10

PAC_model_IL6 <- lm(OverallCompositeT0 ~ `IL-6` + age + demo_gender, data = PAC_model)
summary(PAC_model_IL6)

## 
## Call:
## lm(formula = OverallCompositeT0 ~ `IL-6` + age + demo_gender, 
##     data = PAC_model)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5026 -0.4038  0.1217  0.5736  1.6580 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.115645   0.470522   4.496 9.22e-06 ***
## `IL-6`      -0.005664   0.004301  -1.317    0.189    
## age         -0.041249   0.006132  -6.727 6.47e-11 ***
## demo_gender  0.062513   0.079390   0.787    0.432    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7488 on 376 degrees of freedom
## Multiple R-squared:  0.1148, Adjusted R-squared:  0.1077 
## F-statistic: 16.25 on 3 and 376 DF,  p-value: 5.973e-10

Regression - postfilter

PAC_model_IL1beta_pf <- lm(OverallCompositeT0 ~ `IL-1beta/IL-1F2` + age + demo_gender, data = PAC_model_filtered)
summary(PAC_model_IL1beta_pf)

## 
## Call:
## lm(formula = OverallCompositeT0 ~ `IL-1beta/IL-1F2` + age + demo_gender, 
##     data = PAC_model_filtered)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.54757 -0.41552  0.07242  0.54686  1.70660 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        2.500733   0.494493   5.057 6.70e-07 ***
## `IL-1beta/IL-1F2` -0.061139   0.026437  -2.313   0.0213 *  
## age               -0.041013   0.006118  -6.704 7.58e-11 ***
## demo_gender        0.051514   0.079436   0.648   0.5171    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7454 on 371 degrees of freedom
## Multiple R-squared:  0.1268, Adjusted R-squared:  0.1197 
## F-statistic: 17.96 on 3 and 371 DF,  p-value: 6.656e-11

PAC_model_TNF_pf<- lm(OverallCompositeT0 ~ `TNF-alpha` + age + demo_gender, data = PAC_model_filtered)
summary(PAC_model_TNF_pf)

## 
## Call:
## lm(formula = OverallCompositeT0 ~ `TNF-alpha` + age + demo_gender, 
##     data = PAC_model_filtered)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4402 -0.4150  0.1159  0.5683  1.7039 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.271896   0.489335   4.643 4.78e-06 ***
## `TNF-alpha` -0.004070   0.003894  -1.045    0.297    
## age         -0.041064   0.006176  -6.649 1.06e-10 ***
## demo_gender  0.061378   0.079751   0.770    0.442    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7496 on 371 degrees of freedom
## Multiple R-squared:  0.1168, Adjusted R-squared:  0.1097 
## F-statistic: 16.36 on 3 and 371 DF,  p-value: 5.276e-10

PAC_model_TNFRIa_pf <- lm(OverallCompositeT0 ~ `TNFRI/TNFRSF1A` + age + demo_gender, data = PAC_model_filtered)
summary(PAC_model_TNFRIa_pf)

## 
## Call:
## lm(formula = OverallCompositeT0 ~ `TNFRI/TNFRSF1A` + age + demo_gender, 
##     data = PAC_model_filtered)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5206 -0.4135  0.1147  0.5834  1.6358 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       2.130e+00  4.713e-01   4.520 8.35e-06 ***
## `TNFRI/TNFRSF1A` -6.317e-05  1.038e-04  -0.609    0.543    
## age              -4.065e-02  6.391e-03  -6.361 5.91e-10 ***
## demo_gender       6.147e-02  7.998e-02   0.769    0.443    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7504 on 371 degrees of freedom
## Multiple R-squared:  0.1151, Adjusted R-squared:  0.1079 
## F-statistic: 16.09 on 3 and 371 DF,  p-value: 7.511e-10

PAC_model_TNFRIb_pf <- lm(OverallCompositeT0 ~ `TNFRII/TNFRSF1B` + age + demo_gender, data = PAC_model_filtered)
summary(PAC_model_TNFRIb_pf)

## 
## Call:
## lm(formula = OverallCompositeT0 ~ `TNFRII/TNFRSF1B` + age + demo_gender, 
##     data = PAC_model_filtered)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4815 -0.4143  0.1212  0.5771  1.6736 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        2.130e+00  4.714e-01   4.518 8.42e-06 ***
## `TNFRII/TNFRSF1B`  1.858e-05  3.457e-05   0.537    0.591    
## age               -4.242e-02  6.296e-03  -6.738 6.16e-11 ***
## demo_gender        6.743e-02  7.987e-02   0.844    0.399    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7504 on 371 degrees of freedom
## Multiple R-squared:  0.1149, Adjusted R-squared:  0.1077 
## F-statistic: 16.05 on 3 and 371 DF,  p-value: 7.817e-10

PAC_model_IL6_pf <- lm(OverallCompositeT0 ~ `IL-6` + age + demo_gender, data = PAC_model_filtered)
summary(PAC_model_IL6_pf)

## 
## Call:
## lm(formula = OverallCompositeT0 ~ `IL-6` + age + demo_gender, 
##     data = PAC_model_filtered)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4778 -0.4162  0.1243  0.5769  1.6449 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.107774   0.473090   4.455 1.11e-05 ***
## `IL-6`       0.010216   0.016866   0.606    0.545    
## age         -0.041717   0.006152  -6.781 4.71e-11 ***
## demo_gender  0.067435   0.079836   0.845    0.399    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7504 on 371 degrees of freedom
## Multiple R-squared:  0.1151, Adjusted R-squared:  0.1079 
## F-statistic: 16.08 on 3 and 371 DF,  p-value: 7.524e-10

Exploratory analysis - women only (filtered)

OPT_model_female <- OPT_model_filtered %>%
  filter(SEX == 2)

PAC_model_female <- PAC_model_filtered %>%
  filter(demo_gender == 2)

Regression models for osteo + cog in OPT

model_OPT_F_MTOTALIS<- lm(MTOTALIS ~ `Osteoprotegerin/TNFRSF11B` + AGE, data = OPT_model_female)
summary(model_OPT_F_MTOTALIS)

## 
## Call:
## lm(formula = MTOTALIS ~ `Osteoprotegerin/TNFRSF11B` + AGE, data = OPT_model_female)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.65320 -0.63919 -0.02194  0.66427  1.99018 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)
## (Intercept)                  1.068e-01  7.154e-01   0.149    0.881
## `Osteoprotegerin/TNFRSF11B` -7.056e-05  1.678e-04  -0.420    0.675
## AGE                         -1.742e-03  1.077e-02  -0.162    0.872
## 
## Residual standard error: 0.8864 on 212 degrees of freedom
## Multiple R-squared:  0.001207,   Adjusted R-squared:  -0.008216 
## F-statistic: 0.1281 on 2 and 212 DF,  p-value: 0.8799

model_OPT_F_AIS<- lm(AIS_01 ~ `Osteoprotegerin/TNFRSF11B` + AGE, data = OPT_model_female)
summary(model_OPT_F_AIS)

## 
## Call:
## lm(formula = AIS_01 ~ `Osteoprotegerin/TNFRSF11B` + AGE, data = OPT_model_female)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -42.667  -8.347   0.412   9.614  36.713 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 91.376270  11.623365   7.861  1.9e-13 ***
## `Osteoprotegerin/TNFRSF11B` -0.000291   0.002727  -0.107    0.915    
## AGE                          0.123501   0.175049   0.706    0.481    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.4 on 212 degrees of freedom
## Multiple R-squared:  0.002373,   Adjusted R-squared:  -0.007038 
## F-statistic: 0.2522 on 2 and 212 DF,  p-value: 0.7773

model_OPT_F_IMIS <- lm(IMIS_01 ~ `Osteoprotegerin/TNFRSF11B` + AGE, data = OPT_model_female)
summary(model_OPT_F_IMIS)

## 
## Call:
## lm(formula = IMIS_01 ~ `Osteoprotegerin/TNFRSF11B` + AGE, data = OPT_model_female)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -43.720 -10.902   2.472  11.074  35.321 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  1.016e+02  1.268e+01   8.011  7.5e-14 ***
## `Osteoprotegerin/TNFRSF11B` -4.525e-04  2.974e-03  -0.152    0.879    
## AGE                         -3.705e-03  1.909e-01  -0.019    0.985    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.71 on 212 degrees of freedom
## Multiple R-squared:  0.000127,   Adjusted R-squared:  -0.009306 
## F-statistic: 0.01346 on 2 and 212 DF,  p-value: 0.9866

model_OPT_F_LIS <- lm(LIS_01 ~ `Osteoprotegerin/TNFRSF11B` + AGE , data = OPT_model_female)
summary(model_OPT_F_LIS)

## 
## Call:
## lm(formula = LIS_01 ~ `Osteoprotegerin/TNFRSF11B` + AGE, data = OPT_model_female)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -31.361  -4.414  -1.072   4.647  26.875 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 101.230999   7.448308  13.591   <2e-16 ***
## `Osteoprotegerin/TNFRSF11B`  -0.001760   0.001747  -1.007    0.315    
## AGE                           0.005737   0.112172   0.051    0.959    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.228 on 212 degrees of freedom
## Multiple R-squared:  0.004987,   Adjusted R-squared:  -0.0044 
## F-statistic: 0.5313 on 2 and 212 DF,  p-value: 0.5886

model_OPT_F_MVCIS <- lm(MVCIS_01 ~ `Osteoprotegerin/TNFRSF11B` + AGE , data = OPT_model_female)
summary(model_OPT_F_MVCIS)

## 
## Call:
## lm(formula = MVCIS_01 ~ `Osteoprotegerin/TNFRSF11B` + AGE, data = OPT_model_female)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.682 -12.150  -0.191  12.758  35.835 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  1.047e+02  1.384e+01   7.568 1.14e-12 ***
## `Osteoprotegerin/TNFRSF11B`  4.792e-04  3.246e-03   0.148    0.883    
## AGE                         -1.254e-01  2.084e-01  -0.602    0.548    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.14 on 212 degrees of freedom
## Multiple R-squared:  0.001706,   Adjusted R-squared:  -0.007712 
## F-statistic: 0.1811 on 2 and 212 DF,  p-value: 0.8345

model_OPT_F_MDMIS <- lm(MDMIS_01 ~ `Osteoprotegerin/TNFRSF11B` + AGE, data = OPT_model_female)
summary(model_OPT_F_MDMIS)

## 
## Call:
## lm(formula = MDMIS_01 ~ `Osteoprotegerin/TNFRSF11B` + AGE, data = OPT_model_female)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -47.315  -5.388   0.983   9.055  25.113 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 108.156005  11.234635   9.627   <2e-16 ***
## `Osteoprotegerin/TNFRSF11B`  -0.002088   0.002636  -0.792    0.429    
## AGE                          -0.098840   0.169194  -0.584    0.560    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.92 on 212 degrees of freedom
## Multiple R-squared:  0.006098,   Adjusted R-squared:  -0.003279 
## F-statistic: 0.6503 on 2 and 212 DF,  p-value: 0.5229

Regression models for osteo + cog in PAC

model_PAC_F_VERB<- lm(VerbalMemoryDomainT0 ~ `Osteoprotegerin` + age, data = PAC_model_female)
summary(model_PAC_F_VERB)

## 
## Call:
## lm(formula = VerbalMemoryDomainT0 ~ Osteoprotegerin + age, data = PAC_model_female)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.7426 -0.5786  0.1320  0.9046  2.2800 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      1.5000410  0.9731506   1.541   0.1246  
## Osteoprotegerin -0.0001952  0.0002499  -0.781   0.4354  
## age             -0.0278706  0.0143389  -1.944   0.0532 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.228 on 226 degrees of freedom
## Multiple R-squared:  0.0261, Adjusted R-squared:  0.01748 
## F-statistic: 3.028 on 2 and 226 DF,  p-value: 0.05038

model_PAC_F_VIS<- lm(VisuospatialMemoryDomainT0 ~ `Osteoprotegerin` + age, data = PAC_model_female)
summary(model_PAC_F_VIS)

## 
## Call:
## lm(formula = VisuospatialMemoryDomainT0 ~ Osteoprotegerin + age, 
##     data = PAC_model_female)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6586 -0.6546  0.2915  0.8622  2.0244 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)      2.401e+00  9.785e-01   2.454  0.01490 * 
## Osteoprotegerin -6.576e-05  2.512e-04  -0.262  0.79374   
## age             -4.172e-02  1.442e-02  -2.894  0.00418 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.235 on 226 degrees of freedom
## Multiple R-squared:  0.04255,    Adjusted R-squared:  0.03408 
## F-statistic: 5.022 on 2 and 226 DF,  p-value: 0.007348

model_PAC_F_LANG<- lm(LanguageDomainT0 ~ `Osteoprotegerin` + age, data = PAC_model_female)
summary(model_PAC_F_LANG)

## 
## Call:
## lm(formula = LanguageDomainT0 ~ Osteoprotegerin + age, data = PAC_model_female)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2204 -0.5250  0.2375  0.7792  1.9390 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      1.2790024  0.8848739   1.445   0.1497  
## Osteoprotegerin -0.0002501  0.0002272  -1.101   0.2721  
## age             -0.0251973  0.0130381  -1.933   0.0545 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.117 on 226 degrees of freedom
## Multiple R-squared:  0.03061,    Adjusted R-squared:  0.02203 
## F-statistic: 3.568 on 2 and 226 DF,  p-value: 0.02982

model_PAC_F_EXEC<- lm(ExecutiveDomainT0 ~ `Osteoprotegerin` + age, data = PAC_model_female)
summary(model_PAC_F_EXEC)

## 
## Call:
## lm(formula = ExecutiveDomainT0 ~ Osteoprotegerin + age, data = PAC_model_female)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4082 -0.5380  0.1457  0.6534  2.1276 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      2.499e+00  7.000e-01   3.569 0.000437 ***
## Osteoprotegerin -6.148e-05  1.797e-04  -0.342 0.732605    
## age             -4.200e-02  1.031e-02  -4.072 6.44e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8837 on 226 degrees of freedom
## Multiple R-squared:  0.0805, Adjusted R-squared:  0.07237 
## F-statistic: 9.893 on 2 and 226 DF,  p-value: 7.606e-05

model_OPT_F_WM <- lm(WorkingMemoryDomainT0 ~ `Osteoprotegerin` + age, data = PAC_model_female)
summary(model_OPT_F_WM)

## 
## Call:
## lm(formula = WorkingMemoryDomainT0 ~ Osteoprotegerin + age, data = PAC_model_female)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.28236 -0.56148  0.06138  0.54501  2.11212 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      2.887e+00  6.467e-01   4.464 1.27e-05 ***
## Osteoprotegerin -5.988e-06  1.660e-04  -0.036    0.971    
## age             -5.144e-02  9.529e-03  -5.399 1.70e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8164 on 226 degrees of freedom
## Multiple R-squared:  0.127,  Adjusted R-squared:  0.1193 
## F-statistic: 16.44 on 2 and 226 DF,  p-value: 2.167e-07

model_OPT_F_COMPOSITE <- lm(OverallCompositeT0 ~ `Osteoprotegerin` + age, data = PAC_model_female)
summary(model_OPT_F_COMPOSITE)

## 
## Call:
## lm(formula = OverallCompositeT0 ~ Osteoprotegerin + age, data = PAC_model_female)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.53267 -0.44388  0.07337  0.57840  1.67103 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      2.3201432  0.5928589   3.913  0.00012 ***
## Osteoprotegerin -0.0001340  0.0001522  -0.880  0.37968    
## age             -0.0406972  0.0087355  -4.659 5.43e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7484 on 226 degrees of freedom
## Multiple R-squared:  0.1112, Adjusted R-squared:  0.1034 
## F-statistic: 14.14 on 2 and 226 DF,  p-value: 1.631e-06

Exploratory analysis : Tau and age interaction

model_PAC_tau_VERB<- lm(VerbalMemoryDomainT0 ~ `pTau`* age + demo_gender, data = PAC_model_filtered)
summary(model_PAC_tau_VERB)

## 
## Call:
## lm(formula = VerbalMemoryDomainT0 ~ pTau * age + demo_gender, 
##     data = PAC_model_filtered)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.7790 -0.5720  0.0872  0.8362  2.3201 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  1.1767125  1.1231739   1.048  0.29548   
## pTau         0.0315391  0.2331862   0.135  0.89249   
## age         -0.0354468  0.0152037  -2.331  0.02027 * 
## demo_gender  0.3458754  0.1246853   2.774  0.00582 **
## pTau:age    -0.0005023  0.0032484  -0.155  0.87719   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.171 on 370 degrees of freedom
## Multiple R-squared:  0.06318,    Adjusted R-squared:  0.05305 
## F-statistic: 6.238 on 4 and 370 DF,  p-value: 7.243e-05

model_PAC_tau_VIS<- lm(VisuospatialMemoryDomainT0 ~ `pTau`* age + demo_gender, data = PAC_model_filtered)
summary(model_PAC_tau_VIS)

## 
## Call:
## lm(formula = VisuospatialMemoryDomainT0 ~ pTau * age + demo_gender, 
##     data = PAC_model_filtered)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6349 -0.6021  0.2895  0.8546  2.2493 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  2.031863   1.191779   1.705   0.0891 . 
## pTau         0.073223   0.247429   0.296   0.7674   
## age         -0.043086   0.016132  -2.671   0.0079 **
## demo_gender  0.203706   0.132301   1.540   0.1245   
## pTau:age    -0.001034   0.003447  -0.300   0.7643   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.242 on 370 degrees of freedom
## Multiple R-squared:  0.06315,    Adjusted R-squared:  0.05303 
## F-statistic: 6.235 on 4 and 370 DF,  p-value: 7.276e-05

model_PAC_tau_LANG<- lm(LanguageDomainT0 ~ `pTau`* age + demo_gender, data = PAC_model_filtered)
summary(model_PAC_tau_LANG)

## 
## Call:
## lm(formula = LanguageDomainT0 ~ pTau * age + demo_gender, data = PAC_model_filtered)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3106 -0.5383  0.1659  0.7693  2.0927 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  1.0332991  1.0488798   0.985   0.3252  
## pTau         0.0832202  0.2177617   0.382   0.7026  
## age         -0.0275275  0.0141980  -1.939   0.0533 .
## demo_gender  0.0446689  0.1164378   0.384   0.7015  
## pTau:age    -0.0008286  0.0030336  -0.273   0.7849  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.093 on 370 degrees of freedom
## Multiple R-squared:  0.03588,    Adjusted R-squared:  0.02545 
## F-statistic: 3.442 on 4 and 370 DF,  p-value: 0.008861

model_PAC_tau_EXEC<- lm(ExecutiveDomainT0 ~ `pTau`* age + demo_gender, data = PAC_model_filtered)
summary(model_PAC_tau_EXEC)

## 
## Call:
## lm(formula = ExecutiveDomainT0 ~ pTau * age + demo_gender, data = PAC_model_filtered)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.63477 -0.53636  0.08739  0.61301  2.19265 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.764284   0.838825   0.911   0.3628  
## pTau         0.362191   0.174152   2.080   0.0382 *
## age         -0.019713   0.011355  -1.736   0.0834 .
## demo_gender  0.029222   0.093119   0.314   0.7538  
## pTau:age    -0.004917   0.002426  -2.027   0.0434 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8743 on 370 degrees of freedom
## Multiple R-squared:  0.07947,    Adjusted R-squared:  0.06952 
## F-statistic: 7.985 on 4 and 370 DF,  p-value: 3.492e-06

model_PAC_tau_WM <- lm(WorkingMemoryDomainT0 ~ `pTau`* age + demo_gender, data = PAC_model_filtered)
summary(model_PAC_tau_WM)

## 
## Call:
## lm(formula = WorkingMemoryDomainT0 ~ pTau * age + demo_gender, 
##     data = PAC_model_filtered)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.70803 -0.61826  0.05493  0.57064  2.23985 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.979595   0.827536   1.184   0.2373  
## pTau         0.379942   0.171808   2.211   0.0276 *
## age         -0.019682   0.011202  -1.757   0.0797 .
## demo_gender -0.216286   0.091866  -2.354   0.0191 *
## pTau:age    -0.004996   0.002393  -2.087   0.0375 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8626 on 370 degrees of freedom
## Multiple R-squared:  0.09495,    Adjusted R-squared:  0.08517 
## F-statistic: 9.705 on 4 and 370 DF,  p-value: 1.79e-07

model_PAC_tau_COMPOSITE <- lm(OverallCompositeT0 ~ `pTau`* age + demo_gender, data = PAC_model_filtered)
summary(model_PAC_tau_COMPOSITE)

## 
## Call:
## lm(formula = OverallCompositeT0 ~ pTau * age + demo_gender, data = PAC_model_filtered)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.48591 -0.41143  0.09383  0.56572  1.66538 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.688331   0.718669   2.349 0.019337 *  
## pTau         0.137115   0.149206   0.919 0.358713    
## age         -0.036182   0.009728  -3.719 0.000231 ***
## demo_gender  0.066317   0.079781   0.831 0.406373    
## pTau:age    -0.001710   0.002079  -0.823 0.411165    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7491 on 370 degrees of freedom
## Multiple R-squared:  0.1205, Adjusted R-squared:  0.111 
## F-statistic: 12.67 on 4 and 370 DF,  p-value: 1.128e-09

Exploratory analysis : Tau and sex interaction

model_PAC_tausex_VERB<- lm(VerbalMemoryDomainT0 ~ `pTau`* demo_gender + age, data = PAC_model_filtered)
summary(model_PAC_tausex_VERB)

## 
## Call:
## lm(formula = VerbalMemoryDomainT0 ~ pTau * demo_gender + age, 
##     data = PAC_model_filtered)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.7713 -0.5642  0.0848  0.8289  2.3396 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       1.549661   0.762919   2.031 0.042946 *  
## pTau             -0.063751   0.053606  -1.189 0.235102    
## demo_gender       0.214528   0.167302   1.282 0.200548    
## age              -0.037831   0.009622  -3.932 0.000101 ***
## pTau:demo_gender  0.038139   0.032826   1.162 0.246045    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.169 on 370 degrees of freedom
## Multiple R-squared:  0.06652,    Adjusted R-squared:  0.05643 
## F-statistic: 6.592 on 4 and 370 DF,  p-value: 3.919e-05

model_PAC_tausex_VIS<- lm(VisuospatialMemoryDomainT0 ~ `pTau`* demo_gender + age, data = PAC_model_filtered)
summary(model_PAC_tausex_VIS)

## 
## Call:
## lm(formula = VisuospatialMemoryDomainT0 ~ pTau * demo_gender + 
##     age, data = PAC_model_filtered)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6120 -0.6042  0.2892  0.8299  2.2251 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       2.52902    0.80991   3.123  0.00193 ** 
## pTau             -0.05652    0.05691  -0.993  0.32126    
## demo_gender       0.07914    0.17761   0.446  0.65616    
## age              -0.04736    0.01021  -4.637 4.92e-06 ***
## pTau:demo_gender  0.03579    0.03485   1.027  0.30505    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.241 on 370 degrees of freedom
## Multiple R-squared:  0.06559,    Adjusted R-squared:  0.05549 
## F-statistic: 6.493 on 4 and 370 DF,  p-value: 4.655e-05

model_PAC_tausex_LANG<- lm(LanguageDomainT0 ~ `pTau`* demo_gender + age, data = PAC_model_filtered)
summary(model_PAC_tausex_LANG)

## 
## Call:
## lm(formula = LanguageDomainT0 ~ pTau * demo_gender + age, data = PAC_model_filtered)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1452 -0.5662  0.1926  0.7983  2.1725 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       1.418237   0.713079   1.989 0.047449 *  
## pTau             -0.017445   0.050104  -0.348 0.727911    
## demo_gender      -0.047954   0.156373  -0.307 0.759271    
## age              -0.030922   0.008994  -3.438 0.000652 ***
## pTau:demo_gender  0.026573   0.030681   0.866 0.386999    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.092 on 370 degrees of freedom
## Multiple R-squared:  0.03763,    Adjusted R-squared:  0.02723 
## F-statistic: 3.617 on 4 and 370 DF,  p-value: 0.006592

model_PAC_tausex_EXEC<- lm(ExecutiveDomainT0 ~ `pTau`* demo_gender + age, data = PAC_model_filtered)
summary(model_PAC_tausex_EXEC)

## 
## Call:
## lm(formula = ExecutiveDomainT0 ~ pTau * demo_gender + age, data = PAC_model_filtered)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6316 -0.5811  0.1019  0.6101  2.2573 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       1.888101   0.573130   3.294  0.00108 ** 
## pTau              0.049687   0.040271   1.234  0.21805    
## demo_gender       0.104733   0.125683   0.833  0.40521    
## age              -0.037148   0.007228  -5.139 4.48e-07 ***
## pTau:demo_gender -0.025453   0.024660  -1.032  0.30267    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8779 on 370 degrees of freedom
## Multiple R-squared:  0.07192,    Adjusted R-squared:  0.06189 
## F-statistic: 7.168 on 4 and 370 DF,  p-value: 1.441e-05

model_PAC_tausex_WM <- lm(WorkingMemoryDomainT0 ~ `pTau`* demo_gender + age, data = PAC_model_filtered)
summary(model_PAC_tausex_WM)

## 
## Call:
## lm(formula = WorkingMemoryDomainT0 ~ pTau * demo_gender + age, 
##     data = PAC_model_filtered)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.69918 -0.58470  0.07103  0.58773  2.19516 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       2.200787   0.566189   3.887  0.00012 ***
## pTau              0.042946   0.039783   1.080  0.28106    
## demo_gender      -0.182275   0.124161  -1.468  0.14294    
## age              -0.037583   0.007141  -5.263 2.41e-07 ***
## pTau:demo_gender -0.013349   0.024361  -0.548  0.58406    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8673 on 370 degrees of freedom
## Multiple R-squared:  0.08504,    Adjusted R-squared:  0.07514 
## F-statistic: 8.597 on 4 and 370 DF,  p-value: 1.211e-06

model_PAC_tausex_COMPOSITE <- lm(OverallCompositeT0 ~ `pTau`* demo_gender + age, data = PAC_model_filtered)
summary(model_PAC_tausex_COMPOSITE)

## 
## Call:
## lm(formula = OverallCompositeT0 ~ pTau * demo_gender + age, data = PAC_model_filtered)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.47073 -0.40933  0.09054  0.56468  1.70146 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       2.239532   0.489075   4.579 6.39e-06 ***
## pTau             -0.010932   0.034365  -0.318    0.751    
## demo_gender       0.006209   0.107250   0.058    0.954    
## age              -0.042621   0.006168  -6.910 2.13e-11 ***
## pTau:demo_gender  0.016450   0.021043   0.782    0.435    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7492 on 370 degrees of freedom
## Multiple R-squared:  0.1203, Adjusted R-squared:  0.1108 
## F-statistic: 12.65 on 4 and 370 DF,  p-value: 1.165e-09

OPT_grant

2024-10-10

Libraries

OPT-Neuro

Preprocessing

Demographics

Neuropsych

Final dataframe

OPT-Blood

Preprocessing

OPT Model

Preprocessing

Filter high SD

Correlation plot

Correlation plot- filtered

Visualizations

Scatter (standardized) Plot - prefilter

Scatter (standardized) Plot - Post-filter

Violin Plots (biomarker) - Prefilter

Violin Plots (biomarker) - Postfilter

Violin Plots (cog) - Prefilter

Violin Plots (cog) - Postfilter

Histogram plots

Regression - prefilter

Regression - postfilter

PACtMD

Demographics

Group

Case assignment

Neuropsych

Case

Control

Merge

Final dataframe

Biomarkers

PAC Model

Preprocessing

Filter High SD

Correlation plot

Correlation plot-filtered

export

Visualizations

Scatter plot : Prefilter

Scatter plot : Postfilter

Violin plot (group) : Prefilter

Histogram (group) : Prefilter

Violin plot (group) : Postfilter

Histogram (group) : Postfilter

Regression - prefilter

Regression - postfilter

Exploratory analysis - women only (filtered)

Exploratory analysis : Tau and age interaction

Exploratory analysis : Tau and sex interaction