library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(corrplot)
## corrplot 0.92 loaded
library(openxlsx)
## Warning: package 'openxlsx' was built under R version 4.3.3
library(readxl)
This markdown can be used to wrangle, visualize and analyze data, from OPT-neuro and PACT-MD
Load data
OPTData <- read_excel("/Users/hassanabdulrasul/Desktop/OPT/ON_DATASET_08302024.xlsx")
# Run any pre-processing steps here that make the data readable - e.g, turn blanks "" into NA
# Replace blank with NA
replace_blank_with_na <- function(x) {
if (is.character(x)) {
x <- na_if(x, "")
}
return(x)
}
OPTData <- OPTData %>% mutate(across(everything(), ~ replace_blank_with_na(.)))
# Data collected at CU cannot be used - pending REB/ERB approval
OPTData <- OPTData %>% filter(SITE_TXT_data != "CU")
We need to organize a dataframe (OPT_demo) that contains all the demographic information we need - and adjust as needed.
# Variables of interest - site, age, gender, years of education, no cognitive disorder, mild cognitive impairment, dementia, and CDRscore
OPT_demo <- OPTData %>%
select(ID_complete, SITE_MRI_1_WU_2_UT_3_UP_4_LA_5_CU, AGE, GENDER, ED, NCD_01, MCI_01, DEM_01, CDRSCORE_01) %>%
mutate(
SEX = factor(GENDER, levels = c(1, 2), labels = c("Male", "Female")), # Confirm levels with Peter
SITE = case_when( # Create a column that simplifies site naming
SITE_MRI_1_WU_2_UT_3_UP_4_LA_5_CU == 1 ~ "WU",
SITE_MRI_1_WU_2_UT_3_UP_4_LA_5_CU == 2 ~ "UT",
SITE_MRI_1_WU_2_UT_3_UP_4_LA_5_CU == 3 ~ "UP",
SITE_MRI_1_WU_2_UT_3_UP_4_LA_5_CU == 4 ~ "LA",
SITE_MRI_1_WU_2_UT_3_UP_4_LA_5_CU == 5 ~ "CU"
)
) %>%
select(ID_complete, SITE, AGE, SEX, ED, NCD_01, MCI_01, DEM_01, CDRSCORE_01) # Simplified column names for clarity
OPT_np <- OPTData %>%
select(ID_complete, AIS_01, IMIS_01, LIS_01, MVCIS_01, MDMIS_01, MTOTALIS_01, INDEXSUM_01)
# Attention RBANS index, immediate memory RBANS index, language RBANS index, visuospatial index, delayed memory index, total index score, sum of all scores.
# We are now going to construct a new data frame OPT_neuro - that combines our variables of interest from the OPT_demo and OPT_np
# Select columns we want from OPT_demo
OPT_neuro <- OPT_demo %>%
select(1:9) # adjust as needed
# Now we will join the two data frames using left_join. note that we are using ID_complete to match the columns.
# adjust OPT_np as needed 1 = ID_complete, 2:8 = rest of the dataframe
OPT_neuro <- left_join(OPT_neuro, OPT_np[, c(1, 2:8)] , by = "ID_complete")
We are going to filter out incomplete datasets (NA) from OPT_model along with invalid scores (CDRSCORE_01 > 10 and < 0)
OPT_neuro <- OPT_neuro %>% filter(CDRSCORE_01 <= 10 & CDRSCORE_01 >= 0 & !is.na(CDRSCORE_01))
Load data
# We are specifically importing the second sheet as this has the complete list of biomarker records for each participant - however, these include baseline and some 6m. We will process these 6m records out.
OPTB <- read_excel("/Users/hassanabdulrasul/Desktop/OPT/O-Neuro_SASP_1Notes_2Round_Sep2023.xlsx", sheet = "SASP_O-Neuro_2Round")
## New names:
## • `Sample ID` -> `Sample ID...1`
## • `Sample ID` -> `Sample ID...2`
# Run any pre-processing steps here that make the data readable - e.g, turn blanks "" into NA
# Replace blank with NA
replace_blank_with_na <- function(x) {
if (is.character(x)) {
x <- na_if(x, "")
}
return(x)
}
OPTB <- OPTB %>% mutate(across(everything(), ~ replace_blank_with_na(.)))
# The second column "Sample ID..2" contains the actual ID. We will rename this to ID_complete to match the naming convention of the OPT_neuro
OPTB <- OPTB %>%
rename(ID_complete = `Sample ID...2`)
# Convert the entire Date column from Excel serial numbers to R dates
OPTB <- OPTB %>%
mutate(Date = as.Date(as.numeric(Date), origin = "1899-12-30"))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Date = as.Date(as.numeric(Date), origin = "1899-12-30")`.
## Caused by warning in `as.Date()`:
## ! NAs introduced by coercion
We will remove the second record for every duplicate we find - this second record is the 6m. We will arrange by date and keep the first(earlier) date.
OPTB <- OPTB %>%
arrange(Date) %>% # Sort by date ascending, oldest first
group_by(ID_complete) %>% # Group by the identifier column
filter(row_number() == 1) %>% # Keep only the first row in each group, which is the oldest
ungroup()
# some dates are NA - upon cross checking with the source excel sheet, these are also 6m. We will filter them out
OPTB <- OPTB %>%
filter(!is.na(Date))
# IDs that begin with CU are from ColumbiaU, the use of their data is still pending REB/ERB approval. We are going to filter them out.
OPTB <- OPTB %>%
filter(!grepl("^CU", ID_complete))
UT participants have a different nomenclature. These participants will need to be renamed to match their IDs in OPT-Neuro
We will now Merge: OPT_neuro and OPTB by ID_complete
OPT_model <- left_join(OPT_neuro, OPTB[, c(2, 4:25)] , by = "ID_complete")
# We are now going to harmonize to be numeric (majority are integer)
OPT_model <- OPT_model %>%
mutate(across(3:38, as.numeric))
# add new MTOTALIS standardized column
OPT_model$MTOTALIS <- (OPT_model$MTOTALIS_01 - 100) / 15
# add new column - group which will identify whether a participant is MCI or not
OPT_model$group <- ifelse(OPT_model$MCI_01 == 1, "MDD + MCI", "MDD")
Identify our variables of interest here
OPT_variables <- c("IL-1 beta/IL-1F2", "TNF RI/TNFRSF1A", "IL-6", "TNF RII/TNFRSF1B" )
OPT_np_variables <- c("AIS_01", "IMIS_01", "LIS_01", "MVCIS_01", "MDMIS_01", "MTOTALIS_01", "INDEXSUM_01")
We will now filter out participants that do not have any blood biomarkers.
OPT_model <- OPT_model %>% filter(!is.na('IL-6'))
# Pre-filter reporting
cat("Pre-filter Mean and Standard Deviation of Biomarkers in OPT_model:\n")
## Pre-filter Mean and Standard Deviation of Biomarkers in OPT_model:
for(var in OPT_variables) {
cat(sprintf("Pre-filter Mean of %s: %.2f\n", var, mean(OPT_model[[var]], na.rm = TRUE)))
cat(sprintf("Pre-filter SD of %s: %.2f\n", var, sd(OPT_model[[var]], na.rm = TRUE)))
}
## Pre-filter Mean of IL-1 beta/IL-1F2: 6.50
## Pre-filter SD of IL-1 beta/IL-1F2: 5.98
## Pre-filter Mean of TNF RI/TNFRSF1A: 1389.12
## Pre-filter SD of TNF RI/TNFRSF1A: 611.56
## Pre-filter Mean of IL-6: 3.52
## Pre-filter SD of IL-6: 7.74
## Pre-filter Mean of TNF RII/TNFRSF1B: 3486.25
## Pre-filter SD of TNF RII/TNFRSF1B: 1719.72
cat(sprintf("Pre-filter Number of participants: %d\n\n", nrow(OPT_model)))
## Pre-filter Number of participants: 360
# Calculate means and standard deviations for each biomarker and filter the dataframe
OPT_model_filtered <- OPT_model
for(var in OPT_variables) {
mean_var <- mean(OPT_model[[var]], na.rm = TRUE)
sd_var <- sd(OPT_model[[var]], na.rm = TRUE)
OPT_model_filtered <- OPT_model_filtered[
OPT_model_filtered[[var]] > (mean_var - 5 * sd_var) &
OPT_model_filtered[[var]] < (mean_var + 5 * sd_var),
]
}
# Post-filter reporting
cat("Post-filter Mean and Standard Deviation of Biomarkers in OPT_model:\n")
## Post-filter Mean and Standard Deviation of Biomarkers in OPT_model:
for(var in OPT_variables) {
cat(sprintf("Post-filter Mean of %s: %.2f\n", var, mean(OPT_model_filtered[[var]], na.rm = TRUE)))
cat(sprintf("Post-filter SD of %s: %.2f\n", var, sd(OPT_model_filtered[[var]], na.rm = TRUE)))
}
## Post-filter Mean of IL-1 beta/IL-1F2: 6.19
## Post-filter SD of IL-1 beta/IL-1F2: 2.73
## Post-filter Mean of TNF RI/TNFRSF1A: 1374.57
## Post-filter SD of TNF RI/TNFRSF1A: 576.94
## Post-filter Mean of IL-6: 3.10
## Post-filter SD of IL-6: 3.37
## Post-filter Mean of TNF RII/TNFRSF1B: 3414.74
## Post-filter SD of TNF RII/TNFRSF1B: 1441.35
cat(sprintf("Post-filter Number of participants: %d\n\n", nrow(OPT_model_filtered)))
## Post-filter Number of participants: 357
data_for_correlation_OPT <- OPT_model %>% select(-ID_complete, -SITE, -group)
# Calculate the correlation matrix
OPT_correlation_matrix <- cor(data_for_correlation_OPT, use = "complete.obs", method = "pearson")
# calculate p values
p_matrix_OPT <- cor.mtest(data_for_correlation_OPT, use = "complete.obs", method = "pearson")
# Use corrplot to visualize the correlations and their significance
corrplot(OPT_correlation_matrix, p.mat = p_matrix_OPT$p, method = "color",
sig.level = 0.05, type = "lower", order = "hclust",
tl.cex = 0.6, tl.col = "black", tl.srt = 45,
col = colorRampPalette(c("blue", "white", "red"))(200),
addrect = 2)
data_for_correlation_OPT_filtered <- OPT_model_filtered %>% select(-ID_complete, -SITE, -group)
# Calculate the correlation matrix
OPT_correlation_matrix_filtered <- cor(data_for_correlation_OPT_filtered, use = "complete.obs", method = "pearson")
# calculate p values
p_matrix_OPT_filtered <- cor.mtest(OPT_correlation_matrix_filtered, use = "complete.obs", method = "pearson")
# Use corrplot to visualize the correlations and their significance
corrplot(OPT_correlation_matrix_filtered, p.mat = p_matrix_OPT$p, method = "color",
sig.level = 0.05, type = "lower", order = "hclust",
tl.cex = 0.6, tl.col = "black", tl.srt = 45,
col = colorRampPalette(c("blue", "white", "red"))(200),
addrect = 2)
####export
OPT_correlation_matrix <- as.data.frame(as.table(OPT_correlation_matrix))
OPT_correlation_matrix_filtered <- as.data.frame(as.table(OPT_correlation_matrix_filtered))
write.xlsx(OPT_correlation_matrix, "/Users/hassanabdulrasul/Desktop/OPT/OPT_correlation_matrix.xlsx")
write.xlsx(OPT_correlation_matrix_filtered, "/Users/hassanabdulrasul/Desktop/OPT/OPT_correlation_matrix_filtered.xlsx")
Plots with standardized MTOTALIS
plots <- lapply(OPT_variables, function(var) {
# Create the plot
plot <- ggplot(OPT_model, aes(x = MTOTALIS, y = .data[[var]])) +
geom_point() + # Add points
geom_smooth(method = "lm", se = FALSE, color = "blue") +
labs(title = paste("Scatter plot of MTOTALIS vs", var),
x = "MTOTALIS_01", y = var) +
geom_text(aes(label = sprintf("r = %.2f", cor(MTOTALIS_01, .data[[var]], use = "complete.obs")),
x = Inf, y = Inf), hjust = 1.1, vjust = 1.1, color = "red", size = 5)
return(plot)
})
plots
## [[1]]
## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(MTOTALIS_01, .data[[var]], : All aesthetics have length 1, but the data has 360 rows.
## ℹ Did you mean to use `annotate()`?
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 36 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[2]]
## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(MTOTALIS_01, .data[[var]], : All aesthetics have length 1, but the data has 360 rows.
## ℹ Did you mean to use `annotate()`?
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 36 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[3]]
## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(MTOTALIS_01, .data[[var]], : All aesthetics have length 1, but the data has 360 rows.
## ℹ Did you mean to use `annotate()`?
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 36 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[4]]
## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(MTOTALIS_01, .data[[var]], : All aesthetics have length 1, but the data has 360 rows.
## ℹ Did you mean to use `annotate()`?
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 36 rows containing missing values or values outside the scale range
## (`geom_point()`).
plots_2 <- lapply(OPT_variables, function(var) {
# Create the plot
plot2 <- ggplot(OPT_model_filtered, aes(x = MTOTALIS, y = .data[[var]])) +
geom_point() + # Add points
geom_smooth(method = "lm", se = FALSE, color = "blue") +
labs(title = paste("Scatter plot of MTOTALIS vs", var),
x = "MTOTALIS_01", y = var) +
geom_text(aes(label = sprintf("r = %.2f", cor(MTOTALIS_01, .data[[var]], use = "complete.obs")),
x = Inf, y = Inf), hjust = 1.1, vjust = 1.1, color = "red", size = 5)
return(plot2)
})
plots_2
## [[1]]
## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(MTOTALIS_01, .data[[var]], : All aesthetics have length 1, but the data has 357 rows.
## ℹ Did you mean to use `annotate()`?
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 36 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[2]]
## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(MTOTALIS_01, .data[[var]], : All aesthetics have length 1, but the data has 357 rows.
## ℹ Did you mean to use `annotate()`?
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 36 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[3]]
## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(MTOTALIS_01, .data[[var]], : All aesthetics have length 1, but the data has 357 rows.
## ℹ Did you mean to use `annotate()`?
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 36 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[4]]
## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(MTOTALIS_01, .data[[var]], : All aesthetics have length 1, but the data has 357 rows.
## ℹ Did you mean to use `annotate()`?
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 36 rows containing missing values or values outside the scale range
## (`geom_point()`).
OPT_violin_plots <- lapply(OPT_variables, function(var) {
ggplot(OPT_model, aes(x = group, y = .data[[var]], fill = group)) +
geom_violin(trim = FALSE) +
labs(title = paste("Violin Plot of", var, "by Group"),
x = "Group",
y = var) +
theme_minimal()
})
OPT_violin_plots
## [[1]]
## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
##
## [[2]]
## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
##
## [[3]]
## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
##
## [[4]]
## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
OPT_violin_plots_post_filter <- lapply(OPT_variables, function(var) {
ggplot(OPT_model_filtered, aes(x = group, y = .data[[var]], fill = group)) +
geom_violin(trim = FALSE) +
labs(title = paste("Violin Plot of", var, "by Group"),
x = "Group",
y = var) +
theme_minimal()
})
OPT_violin_plots_post_filter
## [[1]]
## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
##
## [[2]]
## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
##
## [[3]]
## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
##
## [[4]]
## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
OPT_np_violin_plots <- lapply(OPT_np_variables, function(var) {
ggplot(OPT_model, aes(x = group, y = .data[[var]], fill = group)) +
geom_violin(trim = FALSE) +
labs(title = paste("Violin Plot of", var, "by Group"),
x = "Group",
y = var) +
theme_minimal()
})
OPT_np_violin_plots
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
##
## [[7]]
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
OPT_np_violin_plots_post_filter <- lapply(OPT_np_variables, function(var) {
ggplot(OPT_model_filtered, aes(x = group, y = .data[[var]], fill = group)) +
geom_violin(trim = FALSE) +
labs(title = paste("Violin Plot of", var, "by Group"),
x = "Group",
y = var) +
theme_minimal()
})
OPT_np_violin_plots_post_filter
## [[1]]
## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
##
## [[2]]
## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
##
## [[3]]
## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
##
## [[4]]
## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
##
## [[5]]
## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
##
## [[6]]
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
##
## [[7]]
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
OPT_histogram_plots <- lapply(OPT_variables, function(var) {
ggplot(OPT_model, aes(x = .data[[var]], fill = group)) +
geom_histogram(position = "dodge", bins = 30, alpha = 0.7) +
#facet_wrap(~group, scales = "free") +
labs(title = paste("Histogram of", var, "by Group Pre Filter"),
x = var,
y = "Count") +
theme_minimal() +
scale_fill_manual(values = c("MDD" = "blue", "MDD + MCI" = "red")) # Set colors for each group
})
OPT_histogram_plots
## [[1]]
## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_bin()`).
##
## [[2]]
## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_bin()`).
##
## [[3]]
## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_bin()`).
##
## [[4]]
## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_bin()`).
OPT_histogram_plots_post_filter <- lapply(OPT_variables, function(var) {
ggplot(OPT_model_filtered, aes(x = .data[[var]], fill = group)) +
geom_histogram(position = "dodge", bins = 30, alpha = 0.7) +
#facet_wrap(~group, scales = "free") +
labs(title = paste("Histogram of", var, "by Group Post Filter"),
x = var,
y = "Count") +
theme_minimal() +
scale_fill_manual(values = c("MDD" = "blue", "MDD + MCI" = "red")) # Set colors for each group
})
OPT_histogram_plots_post_filter
## [[1]]
## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_bin()`).
##
## [[2]]
## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_bin()`).
##
## [[3]]
## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_bin()`).
##
## [[4]]
## Warning: Removed 35 rows containing non-finite outside the scale range
## (`stat_bin()`).
model_IL1beta <- lm(MTOTALIS ~ `IL-1 beta/IL-1F2` + AGE + SEX, data = OPT_model)
summary(model_IL1beta)
##
## Call:
## lm(formula = MTOTALIS ~ `IL-1 beta/IL-1F2` + AGE + SEX, data = OPT_model)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.69745 -0.63401 0.03453 0.61474 3.10105
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.082051 0.597158 0.137 0.8908
## `IL-1 beta/IL-1F2` -0.013366 0.008193 -1.631 0.1038
## AGE -0.007588 0.008323 -0.912 0.3626
## SEX 0.219779 0.103906 2.115 0.0352 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8768 on 320 degrees of freedom
## (36 observations deleted due to missingness)
## Multiple R-squared: 0.02253, Adjusted R-squared: 0.01337
## F-statistic: 2.459 on 3 and 320 DF, p-value: 0.06281
model_TNFRIa <- lm(MTOTALIS ~ `TNF RI/TNFRSF1A` + AGE + SEX, data = OPT_model)
summary(model_TNFRIa)
##
## Call:
## lm(formula = MTOTALIS ~ `TNF RI/TNFRSF1A` + AGE + SEX, data = OPT_model)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6481 -0.5981 0.0578 0.6034 3.1633
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.978e-01 5.996e-01 0.330 0.7418
## `TNF RI/TNFRSF1A` -1.654e-04 7.963e-05 -2.077 0.0386 *
## AGE -6.878e-03 8.315e-03 -0.827 0.4087
## SEX 2.070e-01 1.031e-01 2.008 0.0455 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8745 on 320 degrees of freedom
## (36 observations deleted due to missingness)
## Multiple R-squared: 0.02751, Adjusted R-squared: 0.01839
## F-statistic: 3.017 on 3 and 320 DF, p-value: 0.0301
model_TNFRIb <- lm(MTOTALIS ~ `TNF RII/TNFRSF1B` + AGE + SEX, data = OPT_model)
summary(model_TNFRIb)
##
## Call:
## lm(formula = MTOTALIS ~ `TNF RII/TNFRSF1B` + AGE + SEX, data = OPT_model)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6451 -0.6269 0.0386 0.5942 3.1246
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.846e-01 6.048e-01 0.305 0.7604
## `TNF RII/TNFRSF1B` -4.076e-05 2.837e-05 -1.437 0.1517
## AGE -7.732e-03 8.330e-03 -0.928 0.3540
## SEX 1.974e-01 1.035e-01 1.908 0.0573 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8776 on 320 degrees of freedom
## (36 observations deleted due to missingness)
## Multiple R-squared: 0.02072, Adjusted R-squared: 0.01154
## F-statistic: 2.257 on 3 and 320 DF, p-value: 0.0817
model_IL6 <- lm(MTOTALIS ~ `IL-6` + AGE + SEX, data = OPT_model)
summary(model_IL6)
##
## Call:
## lm(formula = MTOTALIS ~ `IL-6` + AGE + SEX, data = OPT_model)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.62133 -0.64230 0.02244 0.60405 3.10190
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.102924 0.594573 0.173 0.8627
## `IL-6` -0.014809 0.006269 -2.362 0.0188 *
## AGE -0.008228 0.008284 -0.993 0.3214
## SEX 0.212746 0.102965 2.066 0.0396 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8728 on 320 degrees of freedom
## (36 observations deleted due to missingness)
## Multiple R-squared: 0.03129, Adjusted R-squared: 0.02221
## F-statistic: 3.446 on 3 and 320 DF, p-value: 0.01702
model_IL1beta_pf <- lm(MTOTALIS ~ `IL-1 beta/IL-1F2` + AGE + SEX, data = OPT_model_filtered)
summary(model_IL1beta_pf)
##
## Call:
## lm(formula = MTOTALIS ~ `IL-1 beta/IL-1F2` + AGE + SEX, data = OPT_model_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.69807 -0.63439 0.03131 0.61250 3.09894
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.085360 0.600723 0.142 0.8871
## `IL-1 beta/IL-1F2` -0.013564 0.018360 -0.739 0.4606
## AGE -0.007559 0.008422 -0.898 0.3701
## SEX 0.217614 0.105670 2.059 0.0403 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8785 on 317 degrees of freedom
## (36 observations deleted due to missingness)
## Multiple R-squared: 0.01678, Adjusted R-squared: 0.007475
## F-statistic: 1.803 on 3 and 317 DF, p-value: 0.1464
model_TNFRIa_pf <- lm(MTOTALIS ~ `TNF RI/TNFRSF1A` + AGE + SEX, data = OPT_model_filtered)
summary(model_TNFRIa_pf)
##
## Call:
## lm(formula = MTOTALIS ~ `TNF RI/TNFRSF1A` + AGE + SEX, data = OPT_model_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6545 -0.5930 0.0673 0.5988 3.1702
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.945e-01 6.001e-01 0.324 0.7461
## `TNF RI/TNFRSF1A` -1.720e-04 8.501e-05 -2.023 0.0439 *
## AGE -6.953e-03 8.336e-03 -0.834 0.4048
## SEX 2.188e-01 1.039e-01 2.106 0.0360 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8736 on 317 degrees of freedom
## (36 observations deleted due to missingness)
## Multiple R-squared: 0.02764, Adjusted R-squared: 0.01844
## F-statistic: 3.004 on 3 and 317 DF, p-value: 0.03064
model_TNFRIb_pf <- lm(MTOTALIS ~ `TNF RII/TNFRSF1B` + AGE + SEX, data = OPT_model_filtered)
summary(model_TNFRIb_pf)
##
## Call:
## lm(formula = MTOTALIS ~ `TNF RII/TNFRSF1B` + AGE + SEX, data = OPT_model_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.65271 -0.61837 0.05012 0.59136 3.13321
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.846e-01 6.046e-01 0.305 0.7603
## `TNF RII/TNFRSF1B` -4.636e-05 3.402e-05 -1.363 0.1739
## AGE -7.734e-03 8.349e-03 -0.926 0.3550
## SEX 2.099e-01 1.041e-01 2.016 0.0446 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8766 on 317 degrees of freedom
## (36 observations deleted due to missingness)
## Multiple R-squared: 0.02083, Adjusted R-squared: 0.01156
## F-statistic: 2.247 on 3 and 317 DF, p-value: 0.08276
model_IL6_pf <- lm(MTOTALIS ~ `IL-6` + AGE + SEX, data = OPT_model_filtered)
summary(model_IL6_pf)
##
## Call:
## lm(formula = MTOTALIS ~ `IL-6` + AGE + SEX, data = OPT_model_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.59596 -0.57579 0.02356 0.59930 3.08999
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.113414 0.595900 0.190 0.8492
## `IL-6` -0.034770 0.014443 -2.407 0.0166 *
## AGE -0.007523 0.008293 -0.907 0.3650
## SEX 0.213571 0.103467 2.064 0.0398 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8713 on 317 degrees of freedom
## (36 observations deleted due to missingness)
## Multiple R-squared: 0.03277, Adjusted R-squared: 0.02362
## F-statistic: 3.58 on 3 and 317 DF, p-value: 0.01424
PACtMD is split up into many sheets. We will be importing the relevant sheets individually and then create a merged dataframe - akin to OPT_model
a group column will also be made as we process the data to ensure we know which participant id is case or control. this will be coded as follows. this information will be obtained using class_enrollment from the Enrollment sheet
case = group == 1 control = group == 2
PAC_demo <- read_excel("/Users/hassanabdulrasul/Desktop/OPT/PACtMD T0 Clinical Case.xlsx", sheet = "Demo")
PAC_demo <- PAC_demo %>% mutate(across(everything(), ~ replace_blank_with_na(.)))
To make things simpler, we are going to just work with the main dataframe of imports - given the multiple sheets.
PAC_demo will contain both control and case
We need to remove the columns we don’t care about and also make a new column - age - using demo_dob and demo_date
# Age - rounded to nearest whole number
PAC_demo <- PAC_demo %>%
mutate(
demo_dob = as.Date(demo_dob, format = "%Y-%m-%d"),
demo_date = as.Date(demo_date, format = "%Y-%m-%d"),
age = round(interval(start = demo_dob, end = demo_date) / years(1), digits = 0)
)
# rename demo_id to record_id and remove the suffix
PAC_demo <- PAC_demo %>%
mutate(record_id = gsub("_01$", "", demo_id)) %>%
select(-demo_id)
# select columns we care about - demo_gender and age and demo_id - and begin filling up the PAC_demo df
PAC_demo <- PAC_demo %>%
select(record_id, demo_gender, age)
# filter out the incomplete rows
PAC_demo <- PAC_demo %>% filter(!is.na(demo_gender))
# Reload the excel sheet again but only the enrollment sheet
class_enrollment <- read_excel("/Users/hassanabdulrasul/Desktop/OPT/PACtMD T0 Clinical Case.xlsx",
sheet = "Enrollment")
# rename the class_enrollment to group
class_enrollment <- class_enrollment %>%
rename(group = class_enrollment)
# merge the the class_enrollment dataframe with group information to PAC_demo
PAC_demo <- PAC_demo %>%
left_join(class_enrollment, by = "record_id")
# select columns we care about
PAC_demo <- PAC_demo %>%
select(record_id, group, demo_gender, age)
# remove class_enrolment
rm(class_enrollment)
The case population has 3 criterias. remitted Major Depressive Disorder (rMDD), Mild Cognitive impairment and rmDD, MCI only
We will first load up variables we will need into the PAC_demo to allow us to determine this
# load up consensus notes - that determine whether a participant met threshold for MCI
PAC_consensus <- read_excel("/Users/hassanabdulrasul/Desktop/OPT/PACtMD T0 Clinical Case.xlsx", sheet = "Consensus") %>%
mutate(across(everything(), ~ replace_blank_with_na(.))) %>%
mutate(record_id = gsub("_01$", "", cc_par_id)) %>%
select(record_id, cc_mci_cc_mci, cc_mci_psych_dx)
# group assignment?
PAC_p_dx <- read_excel("/Users/hassanabdulrasul/Desktop/OPT/PACtMD T0 Clinical Case.xlsx", sheet = "Provisional Dx") %>%
mutate(across(everything(), ~ replace_blank_with_na(.)))
PAC_p_dx <- PAC_p_dx %>%
mutate(record_id = gsub("_01$", "", provisional_id)) %>%
select(-provisional_id)
# Montgomery–Åsberg Depression Rating Scale
PAC_madrs <- read_excel("/Users/hassanabdulrasul/Desktop/OPT/PACtMD T0 Clinical Case.xlsx", sheet = "MADRS") %>%
mutate(across(everything(), ~ replace_blank_with_na(.))) %>%
mutate(record_id = gsub("_01$", "", madrs_id)) %>%
select(record_id, madrs_madrs_total_calc)
# CDR rating
PAC_cdr <- read_excel("/Users/hassanabdulrasul/Desktop/OPT/PACtMD T0 Clinical Case.xlsx", sheet = "CDR") %>%
mutate(across(everything(), ~ replace_blank_with_na(.))) %>%
mutate(record_id = gsub("_01$", "", cdr_id)) %>%
select(record_id, cdr_rating)
# Join the resepctive dfs
PAC_demo <- PAC_demo %>%
left_join(PAC_consensus, by = "record_id") %>%
left_join(PAC_p_dx, by = "record_id") %>%
left_join(PAC_madrs, by = "record_id") %>%
left_join(PAC_cdr, by = "record_id")
Now we will make a new variable - case - which will either be rMDD, rMDD + MCI, MCI
its a bit ambigious so we’re going to use provisional_dx and group - if group = 2 then whatever number provisional_dx is will be 4. this way we have 3 case groups and one HC group
PAC_demo <- PAC_demo %>%
mutate(provisional_dx = if_else(group == 2, 4, provisional_dx))
We will load up and process both case(PAC_np_case) and control(PAC_np_control) separately and then merge them to one final data frame - PAC_np
# Load up the NP case excel sheet - specifically the COMPOSITE scores
PAC_np_case <- read_excel("/Users/hassanabdulrasul/Desktop/OPT/PACtMD T0 NP Case.xlsx",
sheet = "COMPOSITE")
# Load up the NP control excel sheet - specifically the COMPOSITE scores
PAC_np_control <- read_excel("/Users/hassanabdulrasul/Desktop/OPT/PACtMD T0 NP Control.xlsx",
sheet = "Composite")
What we need to do - rename ID to record_id (this will allow us to merge), filter out empty scores
# rename ID to record_id and remove the suffix
PAC_np_case <- PAC_np_case %>%
mutate(record_id = gsub("_01$", "", ID)) %>%
select(-ID)
# filter out the incomplete rows
PAC_np_case <- PAC_np_case %>% filter(!is.na(VerbalMemoryDomainT0))
# rename ID to record_id and remove the suffix
PAC_np_control <- PAC_np_control %>%
mutate(record_id = gsub("_01$", "", ID)) %>%
select(-ID)
# filter out the incomplete rows
PAC_np_control <- PAC_np_control %>% filter(!is.na(VerbMemComp))
### REMOVE INVALID CONTROL PAC01_CMH_3060006 (NOTE : Do not use for analysis. Data no valid due to English language difficulties. Participant ineligible.)
PAC_np_control <- PAC_np_control %>%
filter(record_id != "PAC01_CMH_3060006")
We will now merge the two but of course they’re not the same column names. we will first change the column names of PAC_np_control to match PAC_np_case
# Rename columns from control to match case
PAC_np_control <- PAC_np_control %>%
rename(
VerbalMemoryDomainT0 = VerbMemComp,
VisuospatialMemoryDomainT0 = VisuoMemComp,
ProcessingSpeedDomainT0 = ProccSpeedComp,
LanguageDomainT0 = LanguageComp,
ExecutiveDomainT0 = ExecutiveComp,
WorkingMemoryDomainT0 = WorKMemComp,
OverallCompositeT0 = OverallComp
)
# Merge to form final PAC_np
PAC_np <- bind_rows(PAC_np_case, PAC_np_control) %>% arrange(record_id)
# reorder
PAC_np <- PAC_np %>%
select(record_id, VerbalMemoryDomainT0, VisuospatialMemoryDomainT0, ProcessingSpeedDomainT0,LanguageDomainT0,ExecutiveDomainT0,WorkingMemoryDomainT0,OverallCompositeT0)
# We are now going to construct a new data frame PACTMD - that combines our variables of interest from the PAC_demo and PAC_np
# Select columns we want from PAC_demo
PACTMD <- PAC_demo
# Now we will join the two data frames using left_join. note that we are using record_id to match the columns.
# adjust OPT_np as needed 1 = record_id, 2:8 = rest of the dataframe
PACTMD <- left_join(PACTMD, PAC_np[, c(1, 2:8)] , by = "record_id")
Now we will begin processing and preparing the biomarker dataframe (PAC_bio)
PAC_bio <- read_excel("/Users/hassanabdulrasul/Desktop/OPT/PACt-MD_Final_Jan2023.xlsx", sheet = "PACt-MD_SendFile")
## New names:
## • `pg/mL` -> `pg/mL...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...22`
## • `` -> `...23`
## • `` -> `...24`
## • `` -> `...25`
## • `` -> `...26`
## • `` -> `...27`
## • `` -> `...28`
## • `` -> `...29`
## • `` -> `...30`
## • `` -> `...31`
## • `` -> `...32`
## • `` -> `...33`
## • `` -> `...34`
## • `` -> `...35`
## • `` -> `...36`
## • `` -> `...37`
## • `` -> `...38`
## • `` -> `...39`
## • `` -> `...40`
## • `` -> `...41`
## • `` -> `...42`
## • `` -> `...43`
## • `` -> `...44`
## • `` -> `...45`
## • `` -> `...46`
## • `` -> `...47`
## • `` -> `...48`
## • `` -> `...49`
## • `` -> `...50`
## • `` -> `...51`
## • `` -> `...52`
## • `` -> `...53`
## • `` -> `...54`
## • `` -> `...55`
## • `` -> `...56`
## • `` -> `...57`
## • `pg/mL` -> `pg/mL...60`
## • `` -> `...61`
## • `` -> `...62`
## • `` -> `...63`
## • `` -> `...64`
## • `` -> `...65`
## • `` -> `...66`
## • `` -> `...67`
## • `` -> `...68`
## • `` -> `...69`
## • `` -> `...70`
## • `` -> `...71`
## • `` -> `...72`
## • `` -> `...73`
## • `` -> `...74`
## • `` -> `...75`
## • `` -> `...76`
## • `` -> `...77`
## • `` -> `...78`
## • `` -> `...79`
## • `` -> `...80`
PAC_bio <- PAC_bio %>% mutate(across(everything(), ~ replace_blank_with_na(.)))
The excel sheet is formatted heavily which is impacting the way the data is being imported to R. Specifically, the actual headers for columns 5-80 are in the first row.
We will adjust this first
# Set the first row as the column names using whats written in the first row between 5:80
colnames(PAC_bio)[5:ncol(PAC_bio)] <- as.character(PAC_bio[1, 5:ncol(PAC_bio)])
# Now remove the first row since it's set as the header
PAC_bio <- PAC_bio[-1, ]
PAC_bio doesn’t have a record_id column instead it has Participant Number. Moreover, the participant number is just the numbers at the end of PAC01_CMH_XXXXX. We will now add this prefix (PAC01_CMH_) to the number and then change the column name to record_id
PAC_bio <- PAC_bio %>%
mutate(record_id = paste0("PAC01_CMH_", `Participant Number`)) %>%
select(-`Participant Number`)
We will now Merge: PAC_np and PAC_bio by record_id
PAC_model <- left_join(PACTMD, PAC_bio[, c(4:80)] , by = "record_id")
# We are now going to harmonize to be numeric (majority are integer)
PAC_model <- PAC_model %>%
mutate(across(2:92, as.numeric))
Identify variabels of interest here
PAC_variables <- c("TNF-alpha", "IL-1beta/IL-1F2", "IL-6", "TNFRI/TNFRSF1A","TNFRII/TNFRSF1B")
PAC_np_variables <- c( "VerbalMemoryDomainT0" ,"VisuospatialMemoryDomainT0", "ProcessingSpeedDomainT0", "LanguageDomainT0", "ExecutiveDomainT0","WorkingMemoryDomainT0", "OverallCompositeT0" )
We will now filter out participants that do not have a complete dataset we will us na.omit for this
PAC_model <- na.omit(PAC_model)
# Pre-filter reporting
cat("Pre-filter Mean and Standard Deviation of Biomarkers in PAC_model:\n")
## Pre-filter Mean and Standard Deviation of Biomarkers in PAC_model:
for(var in PAC_variables) {
cat(sprintf("Pre-filter Mean of %s: %.2f\n", var, mean(PAC_model[[var]], na.rm = TRUE)))
cat(sprintf("Pre-filter SD of %s: %.2f\n", var, sd(PAC_model[[var]], na.rm = TRUE)))
}
## Pre-filter Mean of TNF-alpha: 44.45
## Pre-filter SD of TNF-alpha: 10.89
## Pre-filter Mean of IL-1beta/IL-1F2: 6.47
## Pre-filter SD of IL-1beta/IL-1F2: 1.46
## Pre-filter Mean of IL-6: 2.90
## Pre-filter SD of IL-6: 9.01
## Pre-filter Mean of TNFRI/TNFRSF1A: 1064.58
## Pre-filter SD of TNFRI/TNFRSF1A: 399.64
## Pre-filter Mean of TNFRII/TNFRSF1B: 2774.44
## Pre-filter SD of TNFRII/TNFRSF1B: 1327.45
cat(sprintf("Pre-filter Number of participants: %d\n\n", nrow(PAC_model)))
## Pre-filter Number of participants: 380
# Calculate means and standard deviations for each biomarker and filter the dataframe
PAC_model_filtered <- PAC_model
for(var in PAC_variables) {
mean_var <- mean(PAC_model[[var]], na.rm = TRUE)
sd_var <- sd(PAC_model[[var]], na.rm = TRUE)
PAC_model_filtered <- PAC_model_filtered[
PAC_model_filtered[[var]] > (mean_var - 5 * sd_var) &
PAC_model_filtered[[var]] < (mean_var + 5 * sd_var),
]
}
# Post-filter reporting
cat("Post-filter Mean and Standard Deviation of Biomarkers in PAC_model:\n")
## Post-filter Mean and Standard Deviation of Biomarkers in PAC_model:
for(var in PAC_variables) {
cat(sprintf("Post-filter Mean of %s: %.2f\n", var, mean(PAC_model_filtered[[var]], na.rm = TRUE)))
cat(sprintf("Post-filter SD of %s: %.2f\n", var, sd(PAC_model_filtered[[var]], na.rm = TRUE)))
}
## Post-filter Mean of TNF-alpha: 43.99
## Post-filter SD of TNF-alpha: 10.02
## Post-filter Mean of IL-1beta/IL-1F2: 6.47
## Post-filter SD of IL-1beta/IL-1F2: 1.46
## Post-filter Mean of IL-6: 2.16
## Post-filter SD of IL-6: 2.30
## Post-filter Mean of TNFRI/TNFRSF1A: 1063.29
## Post-filter SD of TNFRI/TNFRSF1A: 390.16
## Post-filter Mean of TNFRII/TNFRSF1B: 2748.80
## Post-filter SD of TNFRII/TNFRSF1B: 1151.51
cat(sprintf("Post-filter Number of participants: %d\n\n", nrow(PAC_model_filtered)))
## Post-filter Number of participants: 375
data_for_correlation_PAC <- PAC_model %>% select(-record_id, -group)
data_for_correlation_PAC <- data_for_correlation_PAC %>%
mutate(across(where(is.character), ~as.numeric(as.character(.))))
# Calculate the correlation matrix
PAC_correlation_matrix <- cor(data_for_correlation_PAC, use = "complete.obs", method = "pearson")
# calculate p values
p_matrix_pac <- cor.mtest(data_for_correlation_PAC, use = "complete.obs", method = "pearson")
# Use corrplot to visualize the correlations and their significance
corrplot(PAC_correlation_matrix, p.mat = p_matrix_pac$p, method = "color",
sig.level = 0.05, type = "lower", order = "hclust",
tl.cex = 0.6, tl.col = "black", tl.srt = 45,
col = colorRampPalette(c("blue", "white", "red"))(200),
addrect = 2)
data_for_correlation_PAC_filtered <- PAC_model_filtered %>% select(-record_id, -group)
data_for_correlation_PAC_filtered <- data_for_correlation_PAC_filtered %>%
mutate(across(where(is.character), ~as.numeric(as.character(.))))
# Calculate the correlation matrix
PAC_correlation_matrix_filtered <- cor(data_for_correlation_PAC_filtered, use = "complete.obs", method = "pearson")
# calculate p values
p_matrix_pac_filtered <- cor.mtest(data_for_correlation_PAC_filtered, use = "complete.obs", method = "pearson")
# Use corrplot to visualize the correlations and their significance
corrplot(PAC_correlation_matrix_filtered, p.mat = p_matrix_pac$p, method = "color",
sig.level = 0.05, type = "lower", order = "hclust",
tl.cex = 0.6, tl.col = "black", tl.srt = 45,
col = colorRampPalette(c("blue", "white", "red"))(200),
addrect = 2)
PAC_correlation_matrix <- as.data.frame(as.table(PAC_correlation_matrix))
PAC_correlation_matrix_filtered <- as.data.frame(as.table(PAC_correlation_matrix_filtered))
write.xlsx(PAC_correlation_matrix, "/Users/hassanabdulrasul/Desktop/OPT/PAC_correlation_matrix.xlsx")
write.xlsx(PAC_correlation_matrix_filtered, "/Users/hassanabdulrasul/Desktop/OPT/PAC_correlation_matrix_filtered.xlsx")
scatter plot for OverallCompositeT0 vs “TNF-alpha”, “IL-1beta/IL-1F2”, “IL-6”, “TNFRI/TNFRSF1A”,“TNFRII/TNFRSF1B”
# adapted from stack overflow
PAC_plots <- lapply(PAC_variables, function(var) {
# Create the plot
PAC_plot <- ggplot(data_for_correlation_PAC, aes(x = OverallCompositeT0, y = .data[[var]])) +
geom_point() + # Add points
geom_smooth(method = "lm", se = FALSE, color = "blue") +
labs(title = paste("Scatter plot of Overall Composite vs", var),
x = "OverallCompositeT0", y = var) +
geom_text(aes(label = sprintf("r = %.2f", cor(OverallCompositeT0, .data[[var]], use = "complete.obs")),
x = Inf, y = Inf), hjust = 1.1, vjust = 1.1, color = "red", size = 5)
return(PAC_plot)
})
PAC_plots
## [[1]]
## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(OverallCompositeT0, : All aesthetics have length 1, but the data has 380 rows.
## ℹ Did you mean to use `annotate()`?
## `geom_smooth()` using formula = 'y ~ x'
##
## [[2]]
## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(OverallCompositeT0, : All aesthetics have length 1, but the data has 380 rows.
## ℹ Did you mean to use `annotate()`?
## `geom_smooth()` using formula = 'y ~ x'
##
## [[3]]
## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(OverallCompositeT0, : All aesthetics have length 1, but the data has 380 rows.
## ℹ Did you mean to use `annotate()`?
## `geom_smooth()` using formula = 'y ~ x'
##
## [[4]]
## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(OverallCompositeT0, : All aesthetics have length 1, but the data has 380 rows.
## ℹ Did you mean to use `annotate()`?
## `geom_smooth()` using formula = 'y ~ x'
##
## [[5]]
## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(OverallCompositeT0, : All aesthetics have length 1, but the data has 380 rows.
## ℹ Did you mean to use `annotate()`?
## `geom_smooth()` using formula = 'y ~ x'
scatter plot for OverallCompositeT0 vs “TNF-alpha”, “IL-1beta/IL-1F2”, “IL-6”, “TNFRI/TNFRSF1A”,“TNFRII/TNFRSF1B”
# adapted from stack overflow
PAC_plots_filtered <- lapply(PAC_variables, function(var) {
# Create the plot
PAC_plot_filtered <- ggplot(data_for_correlation_PAC_filtered, aes(x = OverallCompositeT0, y = .data[[var]])) +
geom_point() + # Add points
geom_smooth(method = "lm", se = FALSE, color = "blue") +
labs(title = paste("Scatter plot of Overall Composite vs", var),
x = "OverallCompositeT0", y = var) +
geom_text(aes(label = sprintf("r = %.2f", cor(OverallCompositeT0, .data[[var]], use = "complete.obs")),
x = Inf, y = Inf), hjust = 1.1, vjust = 1.1, color = "red", size = 5)
return(PAC_plot_filtered)
})
PAC_plots_filtered
## [[1]]
## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(OverallCompositeT0, : All aesthetics have length 1, but the data has 375 rows.
## ℹ Did you mean to use `annotate()`?
## `geom_smooth()` using formula = 'y ~ x'
##
## [[2]]
## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(OverallCompositeT0, : All aesthetics have length 1, but the data has 375 rows.
## ℹ Did you mean to use `annotate()`?
## `geom_smooth()` using formula = 'y ~ x'
##
## [[3]]
## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(OverallCompositeT0, : All aesthetics have length 1, but the data has 375 rows.
## ℹ Did you mean to use `annotate()`?
## `geom_smooth()` using formula = 'y ~ x'
##
## [[4]]
## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(OverallCompositeT0, : All aesthetics have length 1, but the data has 375 rows.
## ℹ Did you mean to use `annotate()`?
## `geom_smooth()` using formula = 'y ~ x'
##
## [[5]]
## Warning in geom_text(aes(label = sprintf("r = %.2f", cor(OverallCompositeT0, : All aesthetics have length 1, but the data has 375 rows.
## ℹ Did you mean to use `annotate()`?
## `geom_smooth()` using formula = 'y ~ x'
PAC_violin_np_plots <- lapply(PAC_np_variables, function(var) {
ggplot(data_for_correlation_PAC, aes(x = provisional_dx, y = .data[[var]], fill = factor(provisional_dx))) +
geom_violin(trim = FALSE) + # Displays the full range of data distribution
labs(title = paste("Violin Plot of", var, "by Provisional Diagnosis Group"),
x = "Provisional Diagnosis Group",
y = var) +
theme_minimal() +
scale_fill_brewer(palette = "Set1") # Adds a color palette suitable for categorical data
})
# Display the plots
PAC_violin_np_plots
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
PAC_histogram_plots <- lapply(PAC_variables, function(var) {
ggplot(data_for_correlation_PAC, aes(x = .data[[var]], fill = factor(provisional_dx))) +
geom_histogram(bins = 30, position = "dodge", alpha = 0.7) +
#facet_wrap(~provisional_dx, scales = "free") +
labs(title = paste("Histogram of", var, "by Provisional Diagnosis Group"),
x = var,
y = "Count") +
theme_minimal() +
scale_fill_brewer(palette = "Set1")
})
PAC_histogram_plots
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
PAC_violin_np_plots_postfilter <- lapply(PAC_np_variables, function(var) {
ggplot(data_for_correlation_PAC_filtered, aes(x = provisional_dx, y = .data[[var]], fill = factor(provisional_dx))) +
geom_violin(trim = FALSE) + # Displays the full range of data distribution
labs(title = paste("Violin Plot of", var, "by Provisional Diagnosis (filterd) Group"),
x = "Provisional Diagnosis Group",
y = var) +
theme_minimal() +
scale_fill_brewer(palette = "Set1") # Adds a color palette suitable for categorical data
})
# Display the plots
PAC_violin_np_plots_postfilter
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
PAC_histogram_plots_filtered <- lapply(PAC_variables, function(var) {
ggplot(data_for_correlation_PAC_filtered, aes(x = .data[[var]], fill = factor(provisional_dx))) +
geom_histogram(bins = 30, position = "dodge", alpha = 0.7) +
#facet_wrap(~provisional_dx, scales = "free") +
labs(title = paste("Histogram of", var, "by Provisional Diagnosis Group"),
x = var,
y = "Count") +
theme_minimal() +
scale_fill_brewer(palette = "Set1")
})
PAC_histogram_plots_filtered
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
PAC_model_IL1beta <- lm(OverallCompositeT0 ~ `IL-1beta/IL-1F2` + age + demo_gender, data = PAC_model)
summary(PAC_model_IL1beta)
##
## Call:
## lm(formula = OverallCompositeT0 ~ `IL-1beta/IL-1F2` + age + demo_gender,
## data = PAC_model)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.54452 -0.42662 0.06964 0.54713 1.70192
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.406212 0.492851 4.882 1.55e-06 ***
## `IL-1beta/IL-1F2` -0.057404 0.026332 -2.180 0.0299 *
## age -0.040334 0.006113 -6.598 1.42e-10 ***
## demo_gender 0.061848 0.078757 0.785 0.4328
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7458 on 376 degrees of freedom
## Multiple R-squared: 0.1218, Adjusted R-squared: 0.1148
## F-statistic: 17.38 on 3 and 376 DF, p-value: 1.377e-10
PAC_model_TNF<- lm(OverallCompositeT0 ~ `TNF-alpha` + age + demo_gender, data = PAC_model)
summary(PAC_model_TNF)
##
## Call:
## lm(formula = OverallCompositeT0 ~ `TNF-alpha` + age + demo_gender,
## data = PAC_model)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4318 -0.4166 0.1075 0.5665 1.7127
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.242728 0.486931 4.606 5.63e-06 ***
## `TNF-alpha` -0.004854 0.003551 -1.367 0.172
## age -0.040364 0.006146 -6.567 1.70e-10 ***
## demo_gender 0.068006 0.078987 0.861 0.390
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7487 on 376 degrees of freedom
## Multiple R-squared: 0.1151, Adjusted R-squared: 0.108
## F-statistic: 16.3 on 3 and 376 DF, p-value: 5.594e-10
PAC_model_TNFRIa <- lm(OverallCompositeT0 ~ `TNFRI/TNFRSF1A` + age + demo_gender, data = PAC_model)
summary(PAC_model_TNFRIa)
##
## Call:
## lm(formula = OverallCompositeT0 ~ `TNFRI/TNFRSF1A` + age + demo_gender,
## data = PAC_model)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5041 -0.4156 0.1204 0.5801 1.6419
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.061e+00 4.698e-01 4.386 1.50e-05 ***
## `TNFRI/TNFRSF1A` -2.706e-05 1.007e-04 -0.269 0.788
## age -4.054e-02 6.378e-03 -6.357 5.96e-10 ***
## demo_gender 7.293e-02 7.930e-02 0.920 0.358
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7504 on 376 degrees of freedom
## Multiple R-squared: 0.1108, Adjusted R-squared: 0.1038
## F-statistic: 15.63 on 3 and 376 DF, p-value: 1.346e-09
PAC_model_TNFRIb <- lm(OverallCompositeT0 ~ `TNFRII/TNFRSF1B` + age + demo_gender, data = PAC_model)
summary(PAC_model_TNFRIb)
##
## Call:
## lm(formula = OverallCompositeT0 ~ `TNFRII/TNFRSF1B` + age + demo_gender,
## data = PAC_model)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4760 -0.4170 0.1089 0.5630 1.6878
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.057e+00 4.693e-01 4.383 1.52e-05 ***
## `TNFRII/TNFRSF1B` 2.974e-05 2.967e-05 1.002 0.317
## age -4.221e-02 6.252e-03 -6.752 5.55e-11 ***
## demo_gender 8.066e-02 7.915e-02 1.019 0.309
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7495 on 376 degrees of freedom
## Multiple R-squared: 0.113, Adjusted R-squared: 0.106
## F-statistic: 15.97 on 3 and 376 DF, p-value: 8.532e-10
PAC_model_IL6 <- lm(OverallCompositeT0 ~ `IL-6` + age + demo_gender, data = PAC_model)
summary(PAC_model_IL6)
##
## Call:
## lm(formula = OverallCompositeT0 ~ `IL-6` + age + demo_gender,
## data = PAC_model)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5026 -0.4038 0.1217 0.5736 1.6580
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.115645 0.470522 4.496 9.22e-06 ***
## `IL-6` -0.005664 0.004301 -1.317 0.189
## age -0.041249 0.006132 -6.727 6.47e-11 ***
## demo_gender 0.062513 0.079390 0.787 0.432
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7488 on 376 degrees of freedom
## Multiple R-squared: 0.1148, Adjusted R-squared: 0.1077
## F-statistic: 16.25 on 3 and 376 DF, p-value: 5.973e-10
PAC_model_IL1beta_pf <- lm(OverallCompositeT0 ~ `IL-1beta/IL-1F2` + age + demo_gender, data = PAC_model_filtered)
summary(PAC_model_IL1beta_pf)
##
## Call:
## lm(formula = OverallCompositeT0 ~ `IL-1beta/IL-1F2` + age + demo_gender,
## data = PAC_model_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.54757 -0.41552 0.07242 0.54686 1.70660
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.500733 0.494493 5.057 6.70e-07 ***
## `IL-1beta/IL-1F2` -0.061139 0.026437 -2.313 0.0213 *
## age -0.041013 0.006118 -6.704 7.58e-11 ***
## demo_gender 0.051514 0.079436 0.648 0.5171
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7454 on 371 degrees of freedom
## Multiple R-squared: 0.1268, Adjusted R-squared: 0.1197
## F-statistic: 17.96 on 3 and 371 DF, p-value: 6.656e-11
PAC_model_TNF_pf<- lm(OverallCompositeT0 ~ `TNF-alpha` + age + demo_gender, data = PAC_model_filtered)
summary(PAC_model_TNF_pf)
##
## Call:
## lm(formula = OverallCompositeT0 ~ `TNF-alpha` + age + demo_gender,
## data = PAC_model_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4402 -0.4150 0.1159 0.5683 1.7039
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.271896 0.489335 4.643 4.78e-06 ***
## `TNF-alpha` -0.004070 0.003894 -1.045 0.297
## age -0.041064 0.006176 -6.649 1.06e-10 ***
## demo_gender 0.061378 0.079751 0.770 0.442
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7496 on 371 degrees of freedom
## Multiple R-squared: 0.1168, Adjusted R-squared: 0.1097
## F-statistic: 16.36 on 3 and 371 DF, p-value: 5.276e-10
PAC_model_TNFRIa_pf <- lm(OverallCompositeT0 ~ `TNFRI/TNFRSF1A` + age + demo_gender, data = PAC_model_filtered)
summary(PAC_model_TNFRIa_pf)
##
## Call:
## lm(formula = OverallCompositeT0 ~ `TNFRI/TNFRSF1A` + age + demo_gender,
## data = PAC_model_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5206 -0.4135 0.1147 0.5834 1.6358
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.130e+00 4.713e-01 4.520 8.35e-06 ***
## `TNFRI/TNFRSF1A` -6.317e-05 1.038e-04 -0.609 0.543
## age -4.065e-02 6.391e-03 -6.361 5.91e-10 ***
## demo_gender 6.147e-02 7.998e-02 0.769 0.443
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7504 on 371 degrees of freedom
## Multiple R-squared: 0.1151, Adjusted R-squared: 0.1079
## F-statistic: 16.09 on 3 and 371 DF, p-value: 7.511e-10
PAC_model_TNFRIb_pf <- lm(OverallCompositeT0 ~ `TNFRII/TNFRSF1B` + age + demo_gender, data = PAC_model_filtered)
summary(PAC_model_TNFRIb_pf)
##
## Call:
## lm(formula = OverallCompositeT0 ~ `TNFRII/TNFRSF1B` + age + demo_gender,
## data = PAC_model_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4815 -0.4143 0.1212 0.5771 1.6736
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.130e+00 4.714e-01 4.518 8.42e-06 ***
## `TNFRII/TNFRSF1B` 1.858e-05 3.457e-05 0.537 0.591
## age -4.242e-02 6.296e-03 -6.738 6.16e-11 ***
## demo_gender 6.743e-02 7.987e-02 0.844 0.399
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7504 on 371 degrees of freedom
## Multiple R-squared: 0.1149, Adjusted R-squared: 0.1077
## F-statistic: 16.05 on 3 and 371 DF, p-value: 7.817e-10
PAC_model_IL6_pf <- lm(OverallCompositeT0 ~ `IL-6` + age + demo_gender, data = PAC_model_filtered)
summary(PAC_model_IL6_pf)
##
## Call:
## lm(formula = OverallCompositeT0 ~ `IL-6` + age + demo_gender,
## data = PAC_model_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4778 -0.4162 0.1243 0.5769 1.6449
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.107774 0.473090 4.455 1.11e-05 ***
## `IL-6` 0.010216 0.016866 0.606 0.545
## age -0.041717 0.006152 -6.781 4.71e-11 ***
## demo_gender 0.067435 0.079836 0.845 0.399
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7504 on 371 degrees of freedom
## Multiple R-squared: 0.1151, Adjusted R-squared: 0.1079
## F-statistic: 16.08 on 3 and 371 DF, p-value: 7.524e-10
OPT_model_female <- OPT_model_filtered %>%
filter(SEX == 2)
PAC_model_female <- PAC_model_filtered %>%
filter(demo_gender == 2)
Regression models for osteo + cog in OPT
model_OPT_F_MTOTALIS<- lm(MTOTALIS ~ `Osteoprotegerin/TNFRSF11B` + AGE, data = OPT_model_female)
summary(model_OPT_F_MTOTALIS)
##
## Call:
## lm(formula = MTOTALIS ~ `Osteoprotegerin/TNFRSF11B` + AGE, data = OPT_model_female)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.65320 -0.63919 -0.02194 0.66427 1.99018
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.068e-01 7.154e-01 0.149 0.881
## `Osteoprotegerin/TNFRSF11B` -7.056e-05 1.678e-04 -0.420 0.675
## AGE -1.742e-03 1.077e-02 -0.162 0.872
##
## Residual standard error: 0.8864 on 212 degrees of freedom
## Multiple R-squared: 0.001207, Adjusted R-squared: -0.008216
## F-statistic: 0.1281 on 2 and 212 DF, p-value: 0.8799
model_OPT_F_AIS<- lm(AIS_01 ~ `Osteoprotegerin/TNFRSF11B` + AGE, data = OPT_model_female)
summary(model_OPT_F_AIS)
##
## Call:
## lm(formula = AIS_01 ~ `Osteoprotegerin/TNFRSF11B` + AGE, data = OPT_model_female)
##
## Residuals:
## Min 1Q Median 3Q Max
## -42.667 -8.347 0.412 9.614 36.713
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 91.376270 11.623365 7.861 1.9e-13 ***
## `Osteoprotegerin/TNFRSF11B` -0.000291 0.002727 -0.107 0.915
## AGE 0.123501 0.175049 0.706 0.481
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.4 on 212 degrees of freedom
## Multiple R-squared: 0.002373, Adjusted R-squared: -0.007038
## F-statistic: 0.2522 on 2 and 212 DF, p-value: 0.7773
model_OPT_F_IMIS <- lm(IMIS_01 ~ `Osteoprotegerin/TNFRSF11B` + AGE, data = OPT_model_female)
summary(model_OPT_F_IMIS)
##
## Call:
## lm(formula = IMIS_01 ~ `Osteoprotegerin/TNFRSF11B` + AGE, data = OPT_model_female)
##
## Residuals:
## Min 1Q Median 3Q Max
## -43.720 -10.902 2.472 11.074 35.321
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.016e+02 1.268e+01 8.011 7.5e-14 ***
## `Osteoprotegerin/TNFRSF11B` -4.525e-04 2.974e-03 -0.152 0.879
## AGE -3.705e-03 1.909e-01 -0.019 0.985
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.71 on 212 degrees of freedom
## Multiple R-squared: 0.000127, Adjusted R-squared: -0.009306
## F-statistic: 0.01346 on 2 and 212 DF, p-value: 0.9866
model_OPT_F_LIS <- lm(LIS_01 ~ `Osteoprotegerin/TNFRSF11B` + AGE , data = OPT_model_female)
summary(model_OPT_F_LIS)
##
## Call:
## lm(formula = LIS_01 ~ `Osteoprotegerin/TNFRSF11B` + AGE, data = OPT_model_female)
##
## Residuals:
## Min 1Q Median 3Q Max
## -31.361 -4.414 -1.072 4.647 26.875
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 101.230999 7.448308 13.591 <2e-16 ***
## `Osteoprotegerin/TNFRSF11B` -0.001760 0.001747 -1.007 0.315
## AGE 0.005737 0.112172 0.051 0.959
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.228 on 212 degrees of freedom
## Multiple R-squared: 0.004987, Adjusted R-squared: -0.0044
## F-statistic: 0.5313 on 2 and 212 DF, p-value: 0.5886
model_OPT_F_MVCIS <- lm(MVCIS_01 ~ `Osteoprotegerin/TNFRSF11B` + AGE , data = OPT_model_female)
summary(model_OPT_F_MVCIS)
##
## Call:
## lm(formula = MVCIS_01 ~ `Osteoprotegerin/TNFRSF11B` + AGE, data = OPT_model_female)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.682 -12.150 -0.191 12.758 35.835
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.047e+02 1.384e+01 7.568 1.14e-12 ***
## `Osteoprotegerin/TNFRSF11B` 4.792e-04 3.246e-03 0.148 0.883
## AGE -1.254e-01 2.084e-01 -0.602 0.548
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.14 on 212 degrees of freedom
## Multiple R-squared: 0.001706, Adjusted R-squared: -0.007712
## F-statistic: 0.1811 on 2 and 212 DF, p-value: 0.8345
model_OPT_F_MDMIS <- lm(MDMIS_01 ~ `Osteoprotegerin/TNFRSF11B` + AGE, data = OPT_model_female)
summary(model_OPT_F_MDMIS)
##
## Call:
## lm(formula = MDMIS_01 ~ `Osteoprotegerin/TNFRSF11B` + AGE, data = OPT_model_female)
##
## Residuals:
## Min 1Q Median 3Q Max
## -47.315 -5.388 0.983 9.055 25.113
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 108.156005 11.234635 9.627 <2e-16 ***
## `Osteoprotegerin/TNFRSF11B` -0.002088 0.002636 -0.792 0.429
## AGE -0.098840 0.169194 -0.584 0.560
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.92 on 212 degrees of freedom
## Multiple R-squared: 0.006098, Adjusted R-squared: -0.003279
## F-statistic: 0.6503 on 2 and 212 DF, p-value: 0.5229
Regression models for osteo + cog in PAC
model_PAC_F_VERB<- lm(VerbalMemoryDomainT0 ~ `Osteoprotegerin` + age, data = PAC_model_female)
summary(model_PAC_F_VERB)
##
## Call:
## lm(formula = VerbalMemoryDomainT0 ~ Osteoprotegerin + age, data = PAC_model_female)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.7426 -0.5786 0.1320 0.9046 2.2800
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.5000410 0.9731506 1.541 0.1246
## Osteoprotegerin -0.0001952 0.0002499 -0.781 0.4354
## age -0.0278706 0.0143389 -1.944 0.0532 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.228 on 226 degrees of freedom
## Multiple R-squared: 0.0261, Adjusted R-squared: 0.01748
## F-statistic: 3.028 on 2 and 226 DF, p-value: 0.05038
model_PAC_F_VIS<- lm(VisuospatialMemoryDomainT0 ~ `Osteoprotegerin` + age, data = PAC_model_female)
summary(model_PAC_F_VIS)
##
## Call:
## lm(formula = VisuospatialMemoryDomainT0 ~ Osteoprotegerin + age,
## data = PAC_model_female)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.6586 -0.6546 0.2915 0.8622 2.0244
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.401e+00 9.785e-01 2.454 0.01490 *
## Osteoprotegerin -6.576e-05 2.512e-04 -0.262 0.79374
## age -4.172e-02 1.442e-02 -2.894 0.00418 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.235 on 226 degrees of freedom
## Multiple R-squared: 0.04255, Adjusted R-squared: 0.03408
## F-statistic: 5.022 on 2 and 226 DF, p-value: 0.007348
model_PAC_F_LANG<- lm(LanguageDomainT0 ~ `Osteoprotegerin` + age, data = PAC_model_female)
summary(model_PAC_F_LANG)
##
## Call:
## lm(formula = LanguageDomainT0 ~ Osteoprotegerin + age, data = PAC_model_female)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2204 -0.5250 0.2375 0.7792 1.9390
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2790024 0.8848739 1.445 0.1497
## Osteoprotegerin -0.0002501 0.0002272 -1.101 0.2721
## age -0.0251973 0.0130381 -1.933 0.0545 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.117 on 226 degrees of freedom
## Multiple R-squared: 0.03061, Adjusted R-squared: 0.02203
## F-statistic: 3.568 on 2 and 226 DF, p-value: 0.02982
model_PAC_F_EXEC<- lm(ExecutiveDomainT0 ~ `Osteoprotegerin` + age, data = PAC_model_female)
summary(model_PAC_F_EXEC)
##
## Call:
## lm(formula = ExecutiveDomainT0 ~ Osteoprotegerin + age, data = PAC_model_female)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4082 -0.5380 0.1457 0.6534 2.1276
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.499e+00 7.000e-01 3.569 0.000437 ***
## Osteoprotegerin -6.148e-05 1.797e-04 -0.342 0.732605
## age -4.200e-02 1.031e-02 -4.072 6.44e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8837 on 226 degrees of freedom
## Multiple R-squared: 0.0805, Adjusted R-squared: 0.07237
## F-statistic: 9.893 on 2 and 226 DF, p-value: 7.606e-05
model_OPT_F_WM <- lm(WorkingMemoryDomainT0 ~ `Osteoprotegerin` + age, data = PAC_model_female)
summary(model_OPT_F_WM)
##
## Call:
## lm(formula = WorkingMemoryDomainT0 ~ Osteoprotegerin + age, data = PAC_model_female)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.28236 -0.56148 0.06138 0.54501 2.11212
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.887e+00 6.467e-01 4.464 1.27e-05 ***
## Osteoprotegerin -5.988e-06 1.660e-04 -0.036 0.971
## age -5.144e-02 9.529e-03 -5.399 1.70e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8164 on 226 degrees of freedom
## Multiple R-squared: 0.127, Adjusted R-squared: 0.1193
## F-statistic: 16.44 on 2 and 226 DF, p-value: 2.167e-07
model_OPT_F_COMPOSITE <- lm(OverallCompositeT0 ~ `Osteoprotegerin` + age, data = PAC_model_female)
summary(model_OPT_F_COMPOSITE)
##
## Call:
## lm(formula = OverallCompositeT0 ~ Osteoprotegerin + age, data = PAC_model_female)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.53267 -0.44388 0.07337 0.57840 1.67103
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.3201432 0.5928589 3.913 0.00012 ***
## Osteoprotegerin -0.0001340 0.0001522 -0.880 0.37968
## age -0.0406972 0.0087355 -4.659 5.43e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7484 on 226 degrees of freedom
## Multiple R-squared: 0.1112, Adjusted R-squared: 0.1034
## F-statistic: 14.14 on 2 and 226 DF, p-value: 1.631e-06
model_PAC_tau_VERB<- lm(VerbalMemoryDomainT0 ~ `pTau`* age + demo_gender, data = PAC_model_filtered)
summary(model_PAC_tau_VERB)
##
## Call:
## lm(formula = VerbalMemoryDomainT0 ~ pTau * age + demo_gender,
## data = PAC_model_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.7790 -0.5720 0.0872 0.8362 2.3201
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.1767125 1.1231739 1.048 0.29548
## pTau 0.0315391 0.2331862 0.135 0.89249
## age -0.0354468 0.0152037 -2.331 0.02027 *
## demo_gender 0.3458754 0.1246853 2.774 0.00582 **
## pTau:age -0.0005023 0.0032484 -0.155 0.87719
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.171 on 370 degrees of freedom
## Multiple R-squared: 0.06318, Adjusted R-squared: 0.05305
## F-statistic: 6.238 on 4 and 370 DF, p-value: 7.243e-05
model_PAC_tau_VIS<- lm(VisuospatialMemoryDomainT0 ~ `pTau`* age + demo_gender, data = PAC_model_filtered)
summary(model_PAC_tau_VIS)
##
## Call:
## lm(formula = VisuospatialMemoryDomainT0 ~ pTau * age + demo_gender,
## data = PAC_model_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.6349 -0.6021 0.2895 0.8546 2.2493
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.031863 1.191779 1.705 0.0891 .
## pTau 0.073223 0.247429 0.296 0.7674
## age -0.043086 0.016132 -2.671 0.0079 **
## demo_gender 0.203706 0.132301 1.540 0.1245
## pTau:age -0.001034 0.003447 -0.300 0.7643
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.242 on 370 degrees of freedom
## Multiple R-squared: 0.06315, Adjusted R-squared: 0.05303
## F-statistic: 6.235 on 4 and 370 DF, p-value: 7.276e-05
model_PAC_tau_LANG<- lm(LanguageDomainT0 ~ `pTau`* age + demo_gender, data = PAC_model_filtered)
summary(model_PAC_tau_LANG)
##
## Call:
## lm(formula = LanguageDomainT0 ~ pTau * age + demo_gender, data = PAC_model_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3106 -0.5383 0.1659 0.7693 2.0927
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.0332991 1.0488798 0.985 0.3252
## pTau 0.0832202 0.2177617 0.382 0.7026
## age -0.0275275 0.0141980 -1.939 0.0533 .
## demo_gender 0.0446689 0.1164378 0.384 0.7015
## pTau:age -0.0008286 0.0030336 -0.273 0.7849
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.093 on 370 degrees of freedom
## Multiple R-squared: 0.03588, Adjusted R-squared: 0.02545
## F-statistic: 3.442 on 4 and 370 DF, p-value: 0.008861
model_PAC_tau_EXEC<- lm(ExecutiveDomainT0 ~ `pTau`* age + demo_gender, data = PAC_model_filtered)
summary(model_PAC_tau_EXEC)
##
## Call:
## lm(formula = ExecutiveDomainT0 ~ pTau * age + demo_gender, data = PAC_model_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.63477 -0.53636 0.08739 0.61301 2.19265
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.764284 0.838825 0.911 0.3628
## pTau 0.362191 0.174152 2.080 0.0382 *
## age -0.019713 0.011355 -1.736 0.0834 .
## demo_gender 0.029222 0.093119 0.314 0.7538
## pTau:age -0.004917 0.002426 -2.027 0.0434 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8743 on 370 degrees of freedom
## Multiple R-squared: 0.07947, Adjusted R-squared: 0.06952
## F-statistic: 7.985 on 4 and 370 DF, p-value: 3.492e-06
model_PAC_tau_WM <- lm(WorkingMemoryDomainT0 ~ `pTau`* age + demo_gender, data = PAC_model_filtered)
summary(model_PAC_tau_WM)
##
## Call:
## lm(formula = WorkingMemoryDomainT0 ~ pTau * age + demo_gender,
## data = PAC_model_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.70803 -0.61826 0.05493 0.57064 2.23985
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.979595 0.827536 1.184 0.2373
## pTau 0.379942 0.171808 2.211 0.0276 *
## age -0.019682 0.011202 -1.757 0.0797 .
## demo_gender -0.216286 0.091866 -2.354 0.0191 *
## pTau:age -0.004996 0.002393 -2.087 0.0375 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8626 on 370 degrees of freedom
## Multiple R-squared: 0.09495, Adjusted R-squared: 0.08517
## F-statistic: 9.705 on 4 and 370 DF, p-value: 1.79e-07
model_PAC_tau_COMPOSITE <- lm(OverallCompositeT0 ~ `pTau`* age + demo_gender, data = PAC_model_filtered)
summary(model_PAC_tau_COMPOSITE)
##
## Call:
## lm(formula = OverallCompositeT0 ~ pTau * age + demo_gender, data = PAC_model_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.48591 -0.41143 0.09383 0.56572 1.66538
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.688331 0.718669 2.349 0.019337 *
## pTau 0.137115 0.149206 0.919 0.358713
## age -0.036182 0.009728 -3.719 0.000231 ***
## demo_gender 0.066317 0.079781 0.831 0.406373
## pTau:age -0.001710 0.002079 -0.823 0.411165
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7491 on 370 degrees of freedom
## Multiple R-squared: 0.1205, Adjusted R-squared: 0.111
## F-statistic: 12.67 on 4 and 370 DF, p-value: 1.128e-09
model_PAC_tausex_VERB<- lm(VerbalMemoryDomainT0 ~ `pTau`* demo_gender + age, data = PAC_model_filtered)
summary(model_PAC_tausex_VERB)
##
## Call:
## lm(formula = VerbalMemoryDomainT0 ~ pTau * demo_gender + age,
## data = PAC_model_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.7713 -0.5642 0.0848 0.8289 2.3396
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.549661 0.762919 2.031 0.042946 *
## pTau -0.063751 0.053606 -1.189 0.235102
## demo_gender 0.214528 0.167302 1.282 0.200548
## age -0.037831 0.009622 -3.932 0.000101 ***
## pTau:demo_gender 0.038139 0.032826 1.162 0.246045
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.169 on 370 degrees of freedom
## Multiple R-squared: 0.06652, Adjusted R-squared: 0.05643
## F-statistic: 6.592 on 4 and 370 DF, p-value: 3.919e-05
model_PAC_tausex_VIS<- lm(VisuospatialMemoryDomainT0 ~ `pTau`* demo_gender + age, data = PAC_model_filtered)
summary(model_PAC_tausex_VIS)
##
## Call:
## lm(formula = VisuospatialMemoryDomainT0 ~ pTau * demo_gender +
## age, data = PAC_model_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.6120 -0.6042 0.2892 0.8299 2.2251
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.52902 0.80991 3.123 0.00193 **
## pTau -0.05652 0.05691 -0.993 0.32126
## demo_gender 0.07914 0.17761 0.446 0.65616
## age -0.04736 0.01021 -4.637 4.92e-06 ***
## pTau:demo_gender 0.03579 0.03485 1.027 0.30505
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.241 on 370 degrees of freedom
## Multiple R-squared: 0.06559, Adjusted R-squared: 0.05549
## F-statistic: 6.493 on 4 and 370 DF, p-value: 4.655e-05
model_PAC_tausex_LANG<- lm(LanguageDomainT0 ~ `pTau`* demo_gender + age, data = PAC_model_filtered)
summary(model_PAC_tausex_LANG)
##
## Call:
## lm(formula = LanguageDomainT0 ~ pTau * demo_gender + age, data = PAC_model_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1452 -0.5662 0.1926 0.7983 2.1725
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.418237 0.713079 1.989 0.047449 *
## pTau -0.017445 0.050104 -0.348 0.727911
## demo_gender -0.047954 0.156373 -0.307 0.759271
## age -0.030922 0.008994 -3.438 0.000652 ***
## pTau:demo_gender 0.026573 0.030681 0.866 0.386999
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.092 on 370 degrees of freedom
## Multiple R-squared: 0.03763, Adjusted R-squared: 0.02723
## F-statistic: 3.617 on 4 and 370 DF, p-value: 0.006592
model_PAC_tausex_EXEC<- lm(ExecutiveDomainT0 ~ `pTau`* demo_gender + age, data = PAC_model_filtered)
summary(model_PAC_tausex_EXEC)
##
## Call:
## lm(formula = ExecutiveDomainT0 ~ pTau * demo_gender + age, data = PAC_model_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.6316 -0.5811 0.1019 0.6101 2.2573
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.888101 0.573130 3.294 0.00108 **
## pTau 0.049687 0.040271 1.234 0.21805
## demo_gender 0.104733 0.125683 0.833 0.40521
## age -0.037148 0.007228 -5.139 4.48e-07 ***
## pTau:demo_gender -0.025453 0.024660 -1.032 0.30267
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8779 on 370 degrees of freedom
## Multiple R-squared: 0.07192, Adjusted R-squared: 0.06189
## F-statistic: 7.168 on 4 and 370 DF, p-value: 1.441e-05
model_PAC_tausex_WM <- lm(WorkingMemoryDomainT0 ~ `pTau`* demo_gender + age, data = PAC_model_filtered)
summary(model_PAC_tausex_WM)
##
## Call:
## lm(formula = WorkingMemoryDomainT0 ~ pTau * demo_gender + age,
## data = PAC_model_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.69918 -0.58470 0.07103 0.58773 2.19516
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.200787 0.566189 3.887 0.00012 ***
## pTau 0.042946 0.039783 1.080 0.28106
## demo_gender -0.182275 0.124161 -1.468 0.14294
## age -0.037583 0.007141 -5.263 2.41e-07 ***
## pTau:demo_gender -0.013349 0.024361 -0.548 0.58406
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8673 on 370 degrees of freedom
## Multiple R-squared: 0.08504, Adjusted R-squared: 0.07514
## F-statistic: 8.597 on 4 and 370 DF, p-value: 1.211e-06
model_PAC_tausex_COMPOSITE <- lm(OverallCompositeT0 ~ `pTau`* demo_gender + age, data = PAC_model_filtered)
summary(model_PAC_tausex_COMPOSITE)
##
## Call:
## lm(formula = OverallCompositeT0 ~ pTau * demo_gender + age, data = PAC_model_filtered)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.47073 -0.40933 0.09054 0.56468 1.70146
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.239532 0.489075 4.579 6.39e-06 ***
## pTau -0.010932 0.034365 -0.318 0.751
## demo_gender 0.006209 0.107250 0.058 0.954
## age -0.042621 0.006168 -6.910 2.13e-11 ***
## pTau:demo_gender 0.016450 0.021043 0.782 0.435
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7492 on 370 degrees of freedom
## Multiple R-squared: 0.1203, Adjusted R-squared: 0.1108
## F-statistic: 12.65 on 4 and 370 DF, p-value: 1.165e-09