all |> dplyr::distinct(event, study)
notice some peoples first visit ( V1 ) may be in
HCA or, AABC – same being true for
V2
all <- all |>
dplyr::mutate(
event = factor(
event,
levels = c(
"V1", "F1",
"V2", "F2",
"CR",
"V3", "F3",
"V4",
"AF1"
),
ordered = TRUE
),
study = factor(
study,
levels = c("HCA", "AABC"),
ordered = TRUE
)
)
unique(all$study)
## [1] HCA AABC
## Levels: HCA < AABC
unique(all$event)
## [1] V1 F1 V2 F2 CR V3 F3 AF1 V4
## Levels: V1 < F1 < V2 < F2 < CR < V3 < F3 < V4 < AF1
patient_paths <-
all |>
filter(age_open != "90 or older") |>
mutate(
age_open = as.numeric(age_open),
patient = str_remove(id, "^[A-Z]+")
) |>
distinct(patient, study, event, age_open) |>
arrange(patient, age_open, study, event) |>
group_by(patient) |>
summarise(
n_studies = n_distinct(study),
n_events = n(),
years_passed = if_else(
n() > 1,
max(age_open, na.rm = TRUE) - min(age_open, na.rm = TRUE),
NA_real_
),
study_event_path = paste(
paste(study, event, sep = ":"),
collapse = " → "
),
.groups = "drop"
) |>
dplyr::select(
patient,
years_passed,
study_event_path,
everything()
) |>
arrange(desc(years_passed))
plot(patient_paths$years_passed, patient_paths$n_events, xlab = "Yrs Passed", ylab = "n_events")
patient_paths
library(tidyverse)
patient_steps <-
patient_paths |>
separate_rows(study_event_path, sep = " → ") |>
separate(
study_event_path,
into = c("study", "event"),
sep = ":"
) |>
mutate(
study = factor(
study,
levels = c("HCA", "AABC"),
ordered = TRUE
),
event = factor(
event,
levels = c(
"V1", "F1",
"V2", "F2",
"CR",
"V3", "F3",
"V4",
"AF1"
),
ordered = TRUE
)
) |>
group_by(patient) |>
mutate(step = row_number()) |>
ungroup()
patient_steps <-
patient_steps |>
mutate(
study_event = interaction(
study,
event,
sep = ":",
lex.order = TRUE
)
)
patient_edges <-
patient_steps |>
arrange(patient, step) |>
group_by(patient) |>
mutate(
next_study_event = lead(study_event),
next_step = lead(step)
) |>
filter(!is.na(next_study_event)) |>
ungroup()
transition_counts <-
patient_edges |>
count(
step,
next_step,
study_event,
next_study_event,
name = "n_patients"
)
library(grid)
max_step <- max(patient_steps$step)
path_plot <-
ggplot(transition_counts) +
geom_segment(
aes(
x = step,
xend = next_step,
y = study_event,
yend = next_study_event,
color = n_patients,
linewidth = n_patients
),
arrow = arrow(length = unit(0.15, "cm")),
alpha = 0.8
) +
geom_point(
data = patient_steps |> distinct(step, study_event),
aes(x = step, y = study_event),
size = 3
) +
scale_x_continuous(
breaks = seq(1, max_step, by = 1),
limits = c(1, max_step)
) +
scale_linewidth(range = c(0.5, 3)) +
scale_color_gradient(
low = "mistyrose",
high = "red4"
) +
labs(
x = "Sequence Step",
y = "Study:Event",
color = "Number of Patients",
linewidth = "Number of Patients",
title = "Patient Study/Event Paths"
) +
theme_minimal()
path_plot
library(dplyr)
library(stringr)
event_levels <- c(
"V1", "F1",
"V2", "F2",
"CR",
"V3", "F3",
"V4",
"AF1"
)
time_between_events <-
all |>
filter(age_open != "90 or older") |>
mutate(
age_open = as.numeric(age_open),
patient = str_remove(id, "^[A-Z]+"),
event = factor(
event,
levels = event_levels,
ordered = TRUE
),
study = factor(
study,
levels = c("HCA", "AABC"),
ordered = TRUE
),
study_event = paste(study, event, sep = ":")
) |>
distinct(
patient,
study,
event,
age_open,
.keep_all = TRUE
) |>
arrange(
patient,
event,
study,
age_open
) |>
group_by(patient) |>
mutate(
from_step = row_number(),
to_step = lead(from_step),
from_study = study,
from_event = event,
from_study_event = study_event,
from_age = age_open,
to_study = lead(study),
to_event = lead(event),
to_study_event = lead(study_event),
to_age = lead(age_open),
years_between = to_age - from_age
) |>
ungroup() |>
filter(!is.na(to_study_event)) |>
dplyr::select(
patient,
from_step,
to_step,
from_study,
from_event,
from_study_event,
from_age,
to_study,
to_event,
to_study_event,
to_age,
years_between
)
event_steps <-
bind_rows(
time_between_events |>
transmute(
patient,
step = from_step,
study_event = from_study_event,
age = from_age
),
time_between_events |>
transmute(
patient,
step = to_step,
study_event = to_study_event,
age = to_age
)
) |>
distinct(patient, step, study_event, age) |>
arrange(patient, step)
patient_event_paths <-
event_steps |>
group_by(patient) |>
summarise(
full_path = paste(study_event, collapse = " → "),
n_events = n(),
.groups = "drop"
)
path_direct <- paste(
c("HCA:V1", "HCA:F1", "HCA:F2"),
collapse = " → "
)
path_with_visit <- paste(
c("HCA:V1", "HCA:F1", "HCA:V2", "HCA:F2"),
collapse = " → "
)
direct_patients <-
patient_event_paths |>
filter(full_path == path_direct) |>
pull(patient)
visit_patients <-
patient_event_paths |>
filter(full_path == path_with_visit) |>
pull(patient)
direct_f1_to_f2_time <-
event_steps |>
filter(patient %in% direct_patients) |>
group_by(patient) |>
summarise(
path_type = "HCA:V1 → HCA:F1 → HCA:F2",
age_f1 = age[study_event == "HCA:F1"],
age_f2 = age[study_event == "HCA:F2"],
years_f1_to_f2 = age_f2 - age_f1,
.groups = "drop"
)
visit_f1_to_f2_time <-
event_steps |>
filter(patient %in% visit_patients) |>
group_by(patient) |>
summarise(
path_type = "HCA:V1 → HCA:F1 → HCA:V2 → HCA:F2",
age_f1 = age[study_event == "HCA:F1"],
age_v2 = age[study_event == "HCA:V2"],
age_f2 = age[study_event == "HCA:F2"],
years_f1_to_v2 = age_v2 - age_f1,
years_v2_to_f2 = age_f2 - age_v2,
years_f1_to_f2 = age_f2 - age_f1,
.groups = "drop"
)
f1_to_f2_comparison <-
bind_rows(
direct_f1_to_f2_time,
visit_f1_to_f2_time |>
dplyr::select(patient, path_type, age_f1, age_f2, years_f1_to_f2)
)
f1_to_f2_comparison
f1_to_f2_summary <-
f1_to_f2_comparison |>
group_by(path_type) |>
summarise(
n_patients = n(),
mean_years_f1_to_f2 = mean(years_f1_to_f2, na.rm = TRUE),
median_years_f1_to_f2 = median(years_f1_to_f2, na.rm = TRUE),
sd_years_f1_to_f2 = sd(years_f1_to_f2, na.rm = TRUE),
min_years_f1_to_f2 = min(years_f1_to_f2, na.rm = TRUE),
max_years_f1_to_f2 = max(years_f1_to_f2, na.rm = TRUE),
.groups = "drop"
)
f1_to_f2_summary
ggplot(f1_to_f2_comparison,
aes(x = path_type, y = years_f1_to_f2)) +
geom_boxplot() +
geom_jitter(width = 0.15, alpha = 0.5) +
labs(
x = "Exact patient path",
y = "Years from HCA:F1 to HCA:F2",
title = "Time from HCA:F1 to HCA:F2 by exact event path"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 20, hjust = 1))
A great deal of data is missing ( NA ) – therefore our
data is heavily biased. We would like to determine in
what way its biased. Perhaps by adjusting for
this bias we see strong associations for each variable to the cognitive
markers in studying dimentia.
library(dplyr)
library(naniar)
blood |>
miss_var_summary() |>
as.data.frame() |>
dplyr::select(variable, pct_miss)
direct_ldl is entirely missing, we may discard this feature
Many repeated values
diet |>
miss_var_summary() |>
as.data.frame() |>
dplyr::select(variable, pct_miss)
interpretation :
Notice the only features without missingness
Notice the large amount of missingness is the
| Participant info/Demographics |
data_categoryNotice that the missingness appears to repeat – showing
runs of the same pct_miss for both
features
ct <-
diet |>
miss_var_summary() |>
mutate(
ct = ifelse(pct_miss == 0, 0, 1)
) |> count(ct)
ct
Interpretation :
84.6 percent missing – which is a HUGE
amt.ct <-
blood |>
miss_var_summary()
library(ggplot2)
ct |>
ggplot(aes(pct_miss)) +
geom_histogram(binwidth = 10)
interpretation
this one is slightly more varied
0, about 60, 90 100% percent missing – with 60, 90 happening quite frequently
ct <- ct |>
mutate(
char = format(pct_miss)
)
ct <- as.vector(ct$pct_miss)/100
ct <- ct |> round(1)
ct <- ct |> as.character()
barplot(table(ct))
patient
study & events ctPurpose : identify how often a patients appear in
our data.
For example :
Are they in both studies?
How many unique events?
unique_id, Patient Variablelibrary(dplyr)
diet <-
diet |>
mutate(
patient = str_remove(id, "^[A-Z]+")
) |> dplyr::select(patient, everything())
blood <-
blood |>
mutate(
patient = str_remove(id, "^[A-Z]+")
) |> dplyr::select(patient, everything())
patient
specific missing ctPurpose : identify how often patients appear in
our data to better understand missing values.
diet_patients <-
diet |>
group_by(patient) |>
summarize(
n_rows = n(),
total_missing =
sum(across(everything(), ~ sum(is.na(.x)))),
.groups = "drop"
) |>
mutate(
pct =
round(
total_missing /
(n_rows * (ncol(diet) - 1)),
2
)
) |>
arrange(desc(pct))
diet_patients <-
diet_patients |> mutate(
p_rank = paste0("patient", row_number())
) |> dplyr::select(p_rank, everything(), patient)
plot(diet_patients$n_rows, diet_patients$total_missing)
blood_patients <-
blood |>
group_by(patient) |>
summarize(
n_rows = n(),
total_missing =
sum(across(everything(), ~ sum(is.na(.x)))),
total_cells =
n_rows * ncol(pick(everything())),
pct =
round(total_missing / total_cells, 2),
.groups = "drop"
) |>
arrange(desc(pct))
blood_patients <-
blood_patients |> mutate(
p_rank = paste0("patient", row_number())
) |> dplyr::select(p_rank, everything(), patient)
plot(blood_patients$n_rows,blood_patients$total_missing)
Cognitive Features – ALLNote :
we have repeated observations, so we are taking people who had more visits more into account
Relevant factor analysis Features^
Memory_Tr35_60y,
FluidIQ_Tr35_60y,
CrystIQ_Tr35_60y
Memory <- all$Memory_Tr35_60y |> as.numeric()
fluidIQ <- all$FluidIQ_Tr35_60y |> as.numeric()
CrystIQ <- all$CrystIQ_Tr35_60y |> as.numeric()
Memory,
fluidIQ dot-plt# Memory ~ fluidIQ
Memory_fluidIQ_mdl <- lm(Memory ~ fluidIQ)
plot(Memory, fluidIQ)
abline(Memory_fluidIQ_mdl, col = "red")
Memory,
CrystIQ dot-plt# Memory ~ CrystIQ
Memory_CrystIQ_mdl <- lm(Memory ~ CrystIQ)
plot(Memory, CrystIQ)
abline(Memory_CrystIQ_mdl, col = "red")
fluidIQ,
CrystIQ dot-plt# fluidIQ ~ CrystIQ
fluidIQ_CrystIQ_mdl <- lm(fluidIQ ~ CrystIQ)
plot(fluidIQ, CrystIQ)
abline(fluidIQ_CrystIQ_mdl, col = "red")
diet_num <- diet |> dplyr::select(where(is.numeric))
blood_num <- blood |> dplyr::select(where(is.numeric))
diet_num <- cbind(Memory, fluidIQ, CrystIQ, diet_num)
blood_num <- cbind(Memory, fluidIQ, CrystIQ, blood_num)
Memorymem_cor <-
cor(diet_num$Memory, diet_num[4:length(diet_num)],
use = "complete.obs")
mem_cor <-
mem_cor |>
t() |> as.data.frame()
mem_cor <-
mem_cor |>
rename(Memory = V1) |>
round(3) |>
dplyr::select(Memory)
mem_cor
fluidIQfluid_cor <-
cor(diet_num$fluidIQ, diet_num[4:length(diet_num)],
use = "complete.obs")
fluid_cor <-
fluid_cor |>
t() |> as.data.frame()
fluid_cor |>
rename(fluidIQ = V1) |>
round(3) |>
dplyr::select(fluidIQ)
CrystIQcryst_cor <-
cor(diet_num$CrystIQ, diet_num[4:length(diet_num)],
use = "complete.obs")
cryst_cor <-
cryst_cor |>
t() |> as.data.frame()
cryst_cor <-
cryst_cor |>
rename(CrystIQ = V1) |>
round(3) |> dplyr::select(CrystIQ)
cryst_cor
Overall interpretation
Extremely weak correlations for all variables in Diet Data
Possible Causes
underlying relationship isnt linear
Missing values may be biasing variables
diet_cor_lst <-
list(
cryst_cor=cryst_cor,
fluid_cor=fluid_cor,
mem_cor=mem_cor
)
blood |>
group_by(patient) |>
dplyr::select(patient, where(is.numeric)) |>
summarize(
across(everything(), ~ mean(.x, na.rm = TRUE))
)
Cognitive Featureshere we generate a report of correlations for each individual patient
Suppose we just average out their values across all
Study and event
diet_patients
cognitive_df <-
all |>
dplyr::select(id_event, id, event, study,
Memory_Tr35_60y,
FluidIQ_Tr35_60y,
CrystIQ_Tr35_60y) |>
mutate(
patient = str_remove(id, "^[A-Z]+")
) |> dplyr::select(patient, everything()) |>
mutate(
across(
-c(patient, id_event, id, event, study),
as.numeric
)
)
par(mfrow = c(1, 3))
hist(
cognitive_df$Memory_Tr35_60y,
main = "Memory",
xlab = ""
)
hist(
cognitive_df$FluidIQ_Tr35_60y,
main = "FluidIQ",
xlab = ""
)
hist(
cognitive_df$CrystIQ_Tr35_60y,
main = "CrystIQ",
xlab = ""
)
cognitive_df |>
dplyr::select(where(is.numeric)) |>
summarize(
mean_Memory = mean(Memory_Tr35_60y, na.rm = TRUE),
sd_Memory = sd(Memory_Tr35_60y, na.rm = TRUE),
mean_FluidIQ_y = mean(FluidIQ_Tr35_60y, na.rm = TRUE),
sd_FluidIQ = sd(FluidIQ_Tr35_60y, na.rm = TRUE),
mean_CrystIQ = mean(CrystIQ_Tr35_60y, na.rm = TRUE),
sd_CrystIQ = sd(CrystIQ_Tr35_60y, na.rm = TRUE)
)
tbl <-
all %>%
count(study, event, name = "n") %>%
mutate(percent = round(100 * n / sum(n), 2)) %>%
arrange(study, event)
library(dplyr)
library(gt)
library(scales)
all %>%
count(study, event, name = "n") %>%
mutate(percent = n / sum(n)) %>%
arrange(study, event) %>%
gt(groupname_col = "study") %>%
cols_label(
event = "Event",
n = "Count",
percent = "Percent"
) %>%
fmt_percent(
columns = percent,
decimals = 1
) %>%
tab_header(
title = md("**Event Counts by Study**"),
subtitle = "Distribution of event-study pairs"
) %>%
cols_align(
align = "center",
columns = c(n, percent)
) %>%
tab_options(
table.font.size = px(14),
heading.title.font.size = px(18)
)
| Event Counts by Study | ||
| Distribution of event-study pairs | ||
| Event | Count | Percent |
|---|---|---|
| HCA | ||
| V1 | 1198 | 18.5% |
| F1 | 1136 | 17.6% |
| V2 | 609 | 9.4% |
| F2 | 843 | 13.0% |
| CR | 491 | 7.6% |
| F3 | 550 | 8.5% |
| AABC | ||
| V1 | 198 | 3.1% |
| V2 | 306 | 4.7% |
| V3 | 471 | 7.3% |
| V4 | 96 | 1.5% |
| AF1 | 567 | 8.8% |
patient_age_order <-
all |>
filter(age_open != "90 or older") |>
mutate(
patient = str_remove(id, "^[A-Z]+"),
age_open = as.numeric(age_open),
event = factor(
event,
levels = event_levels,
ordered = TRUE
),
study = factor(
study,
levels = c("HCA", "AABC"),
ordered = TRUE
)
) |>
arrange(
patient,
age_open,
study,
event
) |>
group_by(patient) |>
mutate(
visit_order = row_number()
) |>
ungroup()
patient_age_order |> mutate(
person =
paste0(
"p",
dense_rank(patient)
)
) |>
dplyr::select(person, visit_order,
age_open,
study,
event,
patient
) |>
arrange(
patient,
age_open,
study,
event
) |>
group_by(patient)
library(dplyr)
library(ggplot2)
library(grid)
study_event_levels <-
expand.grid(
study = c("HCA", "AABC"),
event = c(
"V1", "F1",
"V2", "F2",
"CR",
"V3", "F3",
"V4",
"AF1"
)
) |>
arrange(study, event) |>
transmute(
study_event = paste(study, event, sep = ":")
) |>
pull(study_event)
age_ordered_steps <-
patient_age_order |>
mutate(
person =
paste0(
"p",
dense_rank(patient)
),
study = factor(
study,
levels = c("HCA", "AABC"),
ordered = TRUE
),
event = factor(
event,
levels = c(
"V1", "F1",
"V2", "F2",
"CR",
"V3", "F3",
"V4",
"AF1"
),
ordered = TRUE
),
study_event =
factor(
paste(study, event, sep = ":"),
levels = study_event_levels,
ordered = TRUE
)
) |>
arrange(
patient,
age_open,
study,
event
) |>
group_by(patient) |>
mutate(
step = row_number()
) |>
ungroup()
age_ordered_edges <-
age_ordered_steps |>
arrange(patient, step) |>
group_by(patient) |>
mutate(
next_step = lead(step),
next_study_event =
lead(study_event),
next_age =
lead(age_open)
) |>
filter(!is.na(next_study_event)) |>
ungroup()
age_transition_counts <-
age_ordered_edges |>
count(
step,
next_step,
study_event,
next_study_event,
name = "n_patients"
)
max_step <- max(age_ordered_steps$step)
age_path_plot <-
ggplot(age_transition_counts) +
geom_segment(
aes(
x = step,
xend = next_step,
y = study_event,
yend = next_study_event,
linewidth = n_patients,
color = n_patients
),
arrow =
arrow(length = unit(0.12, "cm")),
alpha = 0.75
) +
geom_point(
data =
age_ordered_steps |>
distinct(step, study_event),
aes(
x = step,
y = study_event
),
size = 3
) +
scale_x_continuous(
breaks = seq(1, max_step, by = 1)
) +
scale_linewidth(
range = c(0.5, 4)
) +
scale_color_gradient(
low = "mistyrose",
high = "red4"
) +
labs(
title =
"Age-Ordered Patient Movement Through Study Events",
subtitle =
"Transitions follow youngest → oldest observations",
x = "Sequential Observation Order",
y = "Study : Event",
linewidth = "Patients",
color = "Patients"
) +
theme_minimal()
age_path_plot
retention_df <-
patient_age_order |>
group_by(patient) |>
summarise(
n_visits = n(),
first_age =
min(age_open, na.rm = TRUE),
last_age =
max(age_open, na.rm = TRUE),
years_followed =
last_age - first_age,
first_event =
first(event),
last_event =
last(event),
first_study =
first(study),
last_study =
last(study),
completed_v4 =
any(event == "V4"),
completed_f3 =
any(event == "F3"),
last_observed_event =
paste(last_study, last_event, sep = ":"),
.groups = "drop"
)
retention_df
visit_retention <-
patient_age_order |>
group_by(patient) |>
mutate(
max_visit =
max(visit_order)
) |>
ungroup()
d <- visit_retention |> group_by(patient) |> mutate(mean(max_visit))
barplot(table(d$max_visit))
table(d$max_visit)
##
## 1 2 3 4 5 6 7 8 9
## 130 290 309 1020 1220 1254 1190 672 90
summary(d$max_visit)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 4.000 6.000 5.483 7.000 9.000
library(dplyr)
library(ggplot2)
library(grid)
study_event_levels <-
c(
"HCA:V1",
"HCA:F1",
"HCA:V2",
"HCA:F2",
"HCA:CR",
"HCA:V3",
"HCA:F3",
"HCA:V4",
"HCA:AF1",
"AABC:V1",
"AABC:F1",
"AABC:V2",
"AABC:F2",
"AABC:CR",
"AABC:V3",
"AABC:F3",
"AABC:V4",
"AABC:AF1"
)
retention_paths <-
patient_age_order |>
mutate(
study_event =
factor(
paste(study, event, sep = ":"),
levels = study_event_levels,
ordered = TRUE
)
) |>
arrange(
patient,
age_open,
study,
event
) |>
group_by(patient) |>
mutate(
visit_order = row_number(),
next_visit = lead(visit_order),
next_study_event =
lead(study_event)
) |>
ungroup()
node_counts <-
retention_paths |>
count(
visit_order,
study_event,
name = "n_patients"
)
edge_counts <-
retention_paths |>
filter(!is.na(next_study_event)) |>
count(
visit_order,
next_visit,
study_event,
next_study_event,
name = "n_patients"
)
dropoff_counts <-
retention_paths |>
group_by(patient) |>
summarise(
final_visit =
max(visit_order)
) |>
count(final_visit)
retention_plot <-
ggplot() +
geom_segment(
data = edge_counts,
aes(
x = visit_order,
xend = next_visit,
y = study_event,
yend = next_study_event,
linewidth = n_patients,
color = n_patients
),
alpha = 0.5,
arrow =
arrow(length = unit(0.10, "cm"))
) +
geom_point(
data = node_counts,
aes(
x = visit_order,
y = study_event,
size = n_patients
),
color = "black",
alpha = 0.85
) +
geom_text(
data =
dropoff_counts,
aes(
x = final_visit,
y = 1,
label = paste0(
"dropoff=", n
)
),
size = 3,
vjust = -1
) +
scale_x_continuous(
breaks = 1:9
) +
scale_size(
range = c(2, 12)
) +
scale_linewidth(
range = c(0.3, 4)
) +
scale_color_gradient(
low = "lightblue",
high = "darkred"
) +
labs(
title =
"Longitudinal Retention and Movement Through Study Events",
subtitle =
"Node size = remaining participants | edge thickness = transitions",
x =
"Visit Order",
y =
"Study : Event",
size =
"Patients",
linewidth =
"Transitions",
color =
"Transitions"
) +
theme_minimal()
retention_plot
library(dplyr)
baseline_features <-
all |>
filter(age_open != "90 or older") |>
mutate(
patient = str_remove(id, "^[A-Z]+")
) |>
group_by(patient) |>
summarise(
Memory =
mean(as.numeric(Memory_Tr35_60y), na.rm = TRUE),
FluidIQ =
mean(as.numeric(FluidIQ_Tr35_60y), na.rm = TRUE),
CrystIQ =
mean(as.numeric(CrystIQ_Tr35_60y), na.rm = TRUE),
age =
mean(as.numeric(age_open), na.rm = TRUE),
sex =
first(sex),
.groups = "drop"
)
blood_summary <-
blood |>
group_by(patient) |>
summarise(
glucose =
mean(glucose, na.rm = TRUE),
hba1c =
mean(hba1c, na.rm = TRUE),
hdl =
mean(hdl, na.rm = TRUE),
cholesterol =
mean(cholesterol, na.rm = TRUE),
insulin =
mean(insulin, na.rm = TRUE),
.groups = "drop"
)
diet_summary <-
diet |>
group_by(patient) |>
summarise(
asa24_fibe =
mean(asa24_fibe, na.rm = TRUE),
asa24_magn =
mean(asa24_magn, na.rm = TRUE),
asa24_fola =
mean(asa24_fola, na.rm = TRUE),
asa24_kcal =
mean(asa24_kcal, na.rm = TRUE),
asa24_sugr =
mean(asa24_sugr, na.rm = TRUE),
.groups = "drop"
)
retention_master <-
baseline_features |>
left_join(
blood_summary,
by = "patient"
) |>
left_join(
diet_summary,
by = "patient"
) |>
left_join(
retention_df,
by = "patient"
)
library(MASS)
full_model <-
lm(
Memory ~
age +
sex +
CrystIQ +
glucose +
hba1c +
hdl +
cholesterol +
insulin +
asa24_fibe +
asa24_magn +
asa24_fola +
asa24_kcal +
asa24_sugr +
n_visits +
years_followed +
completed_v4 +
completed_f3,
data = retention_master
)
step_model <-
stepAIC(
full_model,
direction = "both"
)
## Start: AIC=-524.24
## Memory ~ age + sex + CrystIQ + glucose + hba1c + hdl + cholesterol +
## insulin + asa24_fibe + asa24_magn + asa24_fola + asa24_kcal +
## asa24_sugr + n_visits + years_followed + completed_v4 + completed_f3
##
## Df Sum of Sq RSS AIC
## - hdl 1 0.001 397.15 -526.24
## - glucose 1 0.083 397.23 -526.07
## - cholesterol 1 0.089 397.24 -526.06
## - asa24_sugr 1 0.090 397.24 -526.06
## - asa24_fola 1 0.103 397.25 -526.03
## - completed_v4 1 0.453 397.60 -525.33
## - asa24_magn 1 0.529 397.68 -525.18
## - completed_f3 1 0.649 397.80 -524.93
## - hba1c 1 0.758 397.91 -524.71
## - asa24_kcal 1 0.883 398.03 -524.46
## <none> 397.15 -524.24
## - asa24_fibe 1 1.292 398.44 -523.64
## - years_followed 1 1.726 398.88 -522.77
## - n_visits 1 2.142 399.29 -521.94
## - insulin 1 3.269 400.42 -519.68
## - sex 1 30.471 427.62 -467.10
## - age 1 95.106 492.25 -354.49
## - CrystIQ 1 127.146 524.30 -304.05
##
## Step: AIC=-526.24
## Memory ~ age + sex + CrystIQ + glucose + hba1c + cholesterol +
## insulin + asa24_fibe + asa24_magn + asa24_fola + asa24_kcal +
## asa24_sugr + n_visits + years_followed + completed_v4 + completed_f3
##
## Df Sum of Sq RSS AIC
## - glucose 1 0.081 397.23 -528.07
## - asa24_sugr 1 0.089 397.24 -528.06
## - cholesterol 1 0.100 397.25 -528.04
## - asa24_fola 1 0.103 397.25 -528.03
## - completed_v4 1 0.455 397.61 -527.32
## - asa24_magn 1 0.532 397.68 -527.17
## - completed_f3 1 0.649 397.80 -526.93
## - hba1c 1 0.785 397.94 -526.66
## - asa24_kcal 1 0.885 398.04 -526.46
## <none> 397.15 -526.24
## - asa24_fibe 1 1.292 398.44 -525.64
## - years_followed 1 1.725 398.88 -524.77
## + hdl 1 0.001 397.15 -524.24
## - n_visits 1 2.142 399.29 -523.94
## - insulin 1 3.646 400.80 -520.93
## - sex 1 34.525 431.68 -461.55
## - age 1 99.389 496.54 -349.56
## - CrystIQ 1 127.401 524.55 -305.65
##
## Step: AIC=-528.07
## Memory ~ age + sex + CrystIQ + hba1c + cholesterol + insulin +
## asa24_fibe + asa24_magn + asa24_fola + asa24_kcal + asa24_sugr +
## n_visits + years_followed + completed_v4 + completed_f3
##
## Df Sum of Sq RSS AIC
## - asa24_sugr 1 0.097 397.33 -529.88
## - cholesterol 1 0.099 397.33 -529.87
## - asa24_fola 1 0.108 397.34 -529.86
## - completed_v4 1 0.429 397.66 -529.21
## - asa24_magn 1 0.541 397.77 -528.98
## - completed_f3 1 0.674 397.91 -528.72
## - asa24_kcal 1 0.905 398.14 -528.25
## <none> 397.23 -528.07
## - asa24_fibe 1 1.314 398.55 -527.43
## - years_followed 1 1.762 398.99 -526.53
## + glucose 1 0.081 397.15 -526.24
## + hdl 1 0.000 397.23 -526.07
## - n_visits 1 2.127 399.36 -525.80
## - hba1c 1 2.657 399.89 -524.74
## - insulin 1 3.565 400.80 -522.93
## - sex 1 35.472 432.70 -461.65
## - age 1 100.419 497.65 -349.77
## - CrystIQ 1 127.764 525.00 -306.98
##
## Step: AIC=-529.88
## Memory ~ age + sex + CrystIQ + hba1c + cholesterol + insulin +
## asa24_fibe + asa24_magn + asa24_fola + asa24_kcal + n_visits +
## years_followed + completed_v4 + completed_f3
##
## Df Sum of Sq RSS AIC
## - cholesterol 1 0.095 397.42 -531.69
## - asa24_fola 1 0.103 397.43 -531.67
## - completed_v4 1 0.431 397.76 -531.01
## - asa24_magn 1 0.508 397.84 -530.86
## - completed_f3 1 0.670 398.00 -530.53
## - asa24_kcal 1 0.899 398.23 -530.07
## <none> 397.33 -529.88
## - asa24_fibe 1 1.359 398.69 -529.15
## - years_followed 1 1.777 399.11 -528.31
## + asa24_sugr 1 0.097 397.23 -528.07
## + glucose 1 0.089 397.24 -528.06
## + hdl 1 0.000 397.33 -527.88
## - n_visits 1 2.143 399.47 -527.58
## - hba1c 1 2.735 400.06 -526.39
## - insulin 1 3.604 400.93 -524.66
## - sex 1 36.105 433.43 -462.30
## - age 1 102.629 499.96 -348.07
## - CrystIQ 1 127.923 525.25 -308.59
##
## Step: AIC=-531.69
## Memory ~ age + sex + CrystIQ + hba1c + insulin + asa24_fibe +
## asa24_magn + asa24_fola + asa24_kcal + n_visits + years_followed +
## completed_v4 + completed_f3
##
## Df Sum of Sq RSS AIC
## - asa24_fola 1 0.105 397.53 -533.48
## - completed_v4 1 0.445 397.87 -532.79
## - asa24_magn 1 0.503 397.93 -532.68
## - completed_f3 1 0.687 398.11 -532.31
## - asa24_kcal 1 0.897 398.32 -531.88
## <none> 397.42 -531.69
## - asa24_fibe 1 1.362 398.79 -530.95
## - years_followed 1 1.781 399.20 -530.11
## + cholesterol 1 0.095 397.33 -529.88
## + asa24_sugr 1 0.092 397.33 -529.87
## + glucose 1 0.088 397.34 -529.87
## + hdl 1 0.004 397.42 -529.70
## - n_visits 1 2.133 399.56 -529.41
## - hba1c 1 2.764 400.19 -528.14
## - insulin 1 3.541 400.97 -526.59
## - sex 1 38.200 435.62 -460.27
## - age 1 103.679 501.10 -348.24
## - CrystIQ 1 128.562 525.99 -309.47
##
## Step: AIC=-533.48
## Memory ~ age + sex + CrystIQ + hba1c + insulin + asa24_fibe +
## asa24_magn + asa24_kcal + n_visits + years_followed + completed_v4 +
## completed_f3
##
## Df Sum of Sq RSS AIC
## - completed_v4 1 0.469 398.00 -534.53
## - asa24_magn 1 0.555 398.08 -534.36
## - completed_f3 1 0.677 398.20 -534.12
## - asa24_kcal 1 0.799 398.33 -533.87
## <none> 397.53 -533.48
## - years_followed 1 1.802 399.33 -531.86
## + asa24_fola 1 0.105 397.42 -531.69
## + cholesterol 1 0.096 397.43 -531.67
## + glucose 1 0.093 397.43 -531.66
## + asa24_sugr 1 0.088 397.44 -531.65
## - asa24_fibe 1 1.961 399.49 -531.54
## + hdl 1 0.004 397.52 -531.48
## - n_visits 1 2.179 399.71 -531.10
## - hba1c 1 2.727 400.25 -530.01
## - insulin 1 3.542 401.07 -528.38
## - sex 1 38.192 435.72 -462.09
## - age 1 104.200 501.73 -349.24
## - CrystIQ 1 129.113 526.64 -310.47
##
## Step: AIC=-534.53
## Memory ~ age + sex + CrystIQ + hba1c + insulin + asa24_fibe +
## asa24_magn + asa24_kcal + n_visits + years_followed + completed_f3
##
## Df Sum of Sq RSS AIC
## - asa24_magn 1 0.528 398.52 -535.47
## - asa24_kcal 1 0.797 398.79 -534.93
## <none> 398.00 -534.53
## - completed_f3 1 1.179 399.18 -534.17
## + completed_v4 1 0.469 397.53 -533.48
## - years_followed 1 1.541 399.54 -533.44
## - n_visits 1 1.724 399.72 -533.08
## + asa24_fola 1 0.129 397.87 -532.79
## + cholesterol 1 0.110 397.89 -532.76
## + asa24_sugr 1 0.089 397.91 -532.71
## + glucose 1 0.065 397.93 -532.66
## + hdl 1 0.008 397.99 -532.55
## - asa24_fibe 1 2.072 400.07 -532.38
## - hba1c 1 2.737 400.73 -531.05
## - insulin 1 3.547 401.54 -529.44
## - sex 1 39.872 437.87 -460.15
## - age 1 103.804 501.80 -351.13
## - CrystIQ 1 129.252 527.25 -311.55
##
## Step: AIC=-535.47
## Memory ~ age + sex + CrystIQ + hba1c + insulin + asa24_fibe +
## asa24_kcal + n_visits + years_followed + completed_f3
##
## Df Sum of Sq RSS AIC
## - asa24_kcal 1 0.397 398.92 -536.68
## <none> 398.52 -535.47
## - completed_f3 1 1.155 399.68 -535.16
## - years_followed 1 1.466 399.99 -534.54
## + asa24_magn 1 0.528 398.00 -534.53
## + completed_v4 1 0.442 398.08 -534.36
## - n_visits 1 1.624 400.15 -534.22
## + asa24_fola 1 0.183 398.34 -533.84
## + cholesterol 1 0.105 398.42 -533.68
## + glucose 1 0.073 398.45 -533.62
## + asa24_sugr 1 0.057 398.47 -533.59
## + hdl 1 0.013 398.51 -533.50
## - hba1c 1 2.931 401.46 -531.61
## - insulin 1 3.487 402.01 -530.51
## - asa24_fibe 1 7.178 405.70 -523.19
## - sex 1 39.766 438.29 -461.38
## - age 1 103.510 502.03 -352.75
## - CrystIQ 1 129.599 528.12 -312.22
##
## Step: AIC=-536.68
## Memory ~ age + sex + CrystIQ + hba1c + insulin + asa24_fibe +
## n_visits + years_followed + completed_f3
##
## Df Sum of Sq RSS AIC
## <none> 398.92 -536.68
## - completed_f3 1 1.210 400.13 -536.26
## - years_followed 1 1.452 400.37 -535.77
## + completed_v4 1 0.452 398.47 -535.59
## - n_visits 1 1.575 400.50 -535.53
## + asa24_kcal 1 0.397 398.52 -535.47
## + asa24_magn 1 0.127 398.79 -534.93
## + cholesterol 1 0.105 398.82 -534.89
## + glucose 1 0.072 398.85 -534.82
## + asa24_sugr 1 0.041 398.88 -534.76
## + asa24_fola 1 0.038 398.88 -534.75
## + hdl 1 0.021 398.90 -534.72
## - hba1c 1 2.964 401.89 -532.75
## - insulin 1 3.432 402.35 -531.82
## - asa24_fibe 1 7.948 406.87 -522.90
## - sex 1 43.351 442.27 -456.15
## - age 1 103.126 502.05 -354.73
## - CrystIQ 1 130.140 529.06 -312.81
summary(step_model)
##
## Call:
## lm(formula = Memory ~ age + sex + CrystIQ + hba1c + insulin +
## asa24_fibe + n_visits + years_followed + completed_f3, data = retention_master)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.18045 -0.48211 -0.00079 0.48311 2.26815
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.562761 0.257084 6.079 1.88e-09 ***
## age -0.026125 0.001828 -14.291 < 2e-16 ***
## sexM -0.477937 0.051582 -9.266 < 2e-16 ***
## CrystIQ 0.519937 0.032387 16.054 < 2e-16 ***
## hba1c -0.101033 0.041699 -2.423 0.0156 *
## insulin 0.007689 0.002949 2.607 0.0093 **
## asa24_fibe 0.008220 0.002072 3.967 7.93e-05 ***
## n_visits 0.048320 0.027361 1.766 0.0778 .
## years_followed -0.039671 0.023395 -1.696 0.0903 .
## completed_f3TRUE 0.095800 0.061898 1.548 0.1221
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7106 on 790 degrees of freedom
## (550 observations deleted due to missingness)
## Multiple R-squared: 0.4914, Adjusted R-squared: 0.4856
## F-statistic: 84.82 on 9 and 790 DF, p-value: < 2.2e-16
The final step-wise regression model identified several important predictors of Memory performance in the HCA/AABC cohort. Crystallized IQ was the strongest positive predictor, while age showed a strong negative association with memory. Males also scored lower on average than females after adjustment for other variables.
Higher HbA1c levels were associated with worse memory performance, while higher dietary fiber intake was associated with better memory scores. Insulin also remained significant, suggesting a potential relationship between metabolic regulation and cognition.
Unlike earlier cross-sectional analyses, this model incorporated longitudinal retention variables such as number of visits and years followed to partially account for dropout and participation bias. Overall, the model explained approximately 49% of the variance in Memory scores, indicating that cognitive reserve, aging, metabolic health, diet, and retention behavior all contribute to cognitive outcomes in this dataset.
# Percent missing for ALL variables in retention_master
missing_summary <-
retention_master |>
summarise(
across(
everything(),
~ mean(is.na(.x))
)
) |>
pivot_longer(
everything(),
names_to = "variable",
values_to = "pct_missing"
) |>
mutate(
pct_missing =
round(
100 * pct_missing,
2
)
) |>
arrange(desc(pct_missing))
missing_summary
library(mgcv)
memory_gam <-
gam(
Memory ~
sex +
s(age) +
s(CrystIQ) +
s(hba1c) +
s(insulin) +
s(asa24_fibe) +
n_visits +
years_followed,
data = retention_master,
method = "REML"
)
summary(memory_gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Memory ~ sex + s(age) + s(CrystIQ) + s(hba1c) + s(insulin) +
## s(asa24_fibe) + n_visits + years_followed
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.36067 0.06867 -5.252 1.93e-07 ***
## sexM -0.49300 0.05151 -9.571 < 2e-16 ***
## n_visits 0.05030 0.02720 1.849 0.0648 .
## years_followed -0.03312 0.02285 -1.450 0.1475
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(age) 1.000 1.001 200.324 < 2e-16 ***
## s(CrystIQ) 2.626 3.355 81.978 < 2e-16 ***
## s(hba1c) 1.000 1.000 5.357 0.0209 *
## s(insulin) 1.777 2.239 4.162 0.0133 *
## s(asa24_fibe) 1.000 1.001 15.436 9.27e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.491 Deviance explained = 49.8%
## -REML = 879.81 Scale est. = 0.49949 n = 800
library(mgcv)
library(ggplot2)
# visualize GAM smooth terms
par(mfrow = c(2, 2))
plot(
memory_gam,
shade = TRUE,
seWithMean = TRUE,
pages = 1
)
par(mfrow = c(1, 1))
# residual diagnostics
par(mfrow = c(2, 2))
gam.check(memory_gam)
##
## Method: REML Optimizer: outer newton
## full convergence after 12 iterations.
## Gradient range [-0.0001403381,0.0001573871]
## (score 879.8084 & scale 0.4994854).
## Hessian positive definite, eigenvalue range [2.220847e-05,395.5019].
## Model rank = 49 / 49
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(age) 9.00 1.00 1.04 0.90
## s(CrystIQ) 9.00 2.63 0.97 0.20
## s(hba1c) 9.00 1.00 1.01 0.58
## s(insulin) 9.00 1.78 1.03 0.76
## s(asa24_fibe) 9.00 1.00 1.01 0.61
par(mfrow = c(1, 1))
# nonlinear age effect alone
ggplot(
retention_master,
aes(
x = age,
y = Memory
)
) +
geom_point(alpha = 0.25) +
geom_smooth(
method = "gam",
formula = y ~ s(x),
color = "blue"
) +
labs(
title = "Nonlinear Age Effect on Memory",
x = "Age",
y = "Memory"
) +
theme_minimal()
# nonlinear hba1c effect
ggplot(
retention_master,
aes(
x = hba1c,
y = Memory
)
) +
geom_point(alpha = 0.25) +
geom_smooth(
method = "gam",
formula = y ~ s(x),
color = "darkred"
) +
labs(
title = "Nonlinear HbA1c Effect on Memory",
x = "HbA1c",
y = "Memory"
) +
theme_minimal()
# Crystallized IQ
ggplot(
retention_master,
aes(
x = CrystIQ,
y = Memory
)
) +
geom_point(alpha = 0.25) +
geom_smooth(
method = "gam",
formula = y ~ s(x),
se = TRUE
) +
labs(
title = "Nonlinear Effect of Crystallized IQ on Memory",
x = "Crystallized IQ",
y = "Memory"
) +
theme_minimal()
# Insulin
ggplot(
retention_master,
aes(
x = insulin,
y = Memory
)
) +
geom_point(alpha = 0.25) +
geom_smooth(
method = "gam",
formula = y ~ s(x),
se = TRUE
) +
labs(
title = "Nonlinear Effect of Insulin on Memory",
x = "Insulin",
y = "Memory"
) +
theme_minimal()
hist(retention_master$insulin)
hist(log10(retention_master$insulin))
# Dietary Fiber
ggplot(
retention_master,
aes(
x = asa24_fibe,
y = Memory
)
) +
geom_point(alpha = 0.25) +
geom_smooth(
method = "gam",
formula = y ~ s(x),
se = TRUE
) +
labs(
title = "Nonlinear Effect of Fiber Intake on Memory",
x = "Dietary Fiber",
y = "Memory"
) +
theme_minimal()
hist(retention_master$asa24_fibe)
hist(log10(retention_master$asa24_fibe))
retention_master <-
retention_master |>
mutate(
log_insulin =
log1p(insulin),
log_fiber =
log1p(asa24_fibe)
)
library(mgcv)
memory_gam_log <-
gam(
Memory ~
sex +
s(age) +
s(CrystIQ) +
s(hba1c) +
s(log_insulin) +
s(log_fiber) +
n_visits +
years_followed,
data = retention_master,
method = "REML"
)
summary(memory_gam_log)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Memory ~ sex + s(age) + s(CrystIQ) + s(hba1c) + s(log_insulin) +
## s(log_fiber) + n_visits + years_followed
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.35988 0.06884 -5.228 2.2e-07 ***
## sexM -0.48646 0.05151 -9.443 < 2e-16 ***
## n_visits 0.05034 0.02727 1.846 0.0652 .
## years_followed -0.03389 0.02290 -1.480 0.1393
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(age) 1.001 1.002 205.934 < 2e-16 ***
## s(CrystIQ) 2.653 3.388 78.419 < 2e-16 ***
## s(hba1c) 1.001 1.002 5.036 0.02505 *
## s(log_insulin) 2.858 3.689 2.306 0.07696 .
## s(log_fiber) 1.450 1.790 7.023 0.00116 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.49 Deviance explained = 49.7%
## -REML = 882.21 Scale est. = 0.50089 n = 800
# LOG INSULIN SPLINE
ggplot(
retention_master,
aes(
x = log_insulin,
y = Memory
)
) +
geom_point(alpha = 0.25) +
geom_smooth(
method = "gam",
formula = y ~ s(x),
se = TRUE
) +
labs(
title = "Spline Effect of Log(Insulin) on Memory",
x = "log(1 + Insulin)",
y = "Memory"
) +
theme_minimal()
# LOG FIBER SPLINE
ggplot(
retention_master,
aes(
x = log_fiber,
y = Memory
)
) +
geom_point(alpha = 0.25) +
geom_smooth(
method = "gam",
formula = y ~ s(x),
se = TRUE
) +
labs(
title = "Spline Effect of Log(Fiber Intake) on Memory",
x = "log(1 + Fiber Intake)",
y = "Memory"
) +
theme_minimal()
ggplot(
retention_master,
aes(
x = log_insulin,
y = Memory
)
) +
stat_summary_bin(
fun = mean,
bins = 25,
geom = "point"
) +
stat_summary_bin(
fun = mean,
bins = 25,
geom = "line"
) +
labs(
title = "Binned Mean Memory by Log(Insulin)",
x = "log(1 + Insulin)",
y = "Mean Memory"
) +
theme_minimal()
library(mgcv)
library(rsample)
library(purrr)
library(dplyr)
library(yardstick)
set.seed(141)
# 5-fold cross validation
folds <-
vfold_cv(
retention_master,
v = 20
)
cv_results <-
folds |>
mutate(
model =
map(
splits,
~ gam(
Memory ~
sex +
s(age) +
s(CrystIQ) +
s(hba1c) +
insulin +
asa24_fibe +
n_visits +
years_followed,
data = analysis(.x),
method = "REML"
)
),
preds =
map2(
model,
splits,
~ {
assess_data <- assessment(.y)
assess_data |>
mutate(
pred =
predict(
.x,
newdata = assess_data
)
)
}
),
metrics =
map(
preds,
~ metrics(
.x,
truth = Memory,
estimate = pred
)
)
)
# summarize CV performance
cv_summary <-
cv_results |>
dplyr::select(metrics) |>
unnest(metrics)
cv_summary
cv_summary |>
group_by(.metric) |>
summarise(
mean =
mean(.estimate),
sd =
sd(.estimate)
)
best_fold_index <-
cv_results |>
mutate(
rsq =
map_dbl(
metrics,
~ filter(.x, .metric == "rsq")$.estimate
)
) |>
arrange(desc(rsq)) |>
slice(1) |>
pull(id)
best_fold_index
## [1] "Fold06"
best_model_df <-
cv_results |>
mutate(
rsq =
map_dbl(
metrics,
~ filter(.x, .metric == "rsq")$.estimate
)
) |>
arrange(desc(rsq)) |>
slice(1)
best_model <-
best_model_df$model[[1]]
summary(best_model)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Memory ~ sex + s(age) + s(CrystIQ) + s(hba1c) + insulin + asa24_fibe +
## n_visits + years_followed
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.623252 0.089640 -6.953 7.82e-12 ***
## sexM -0.468244 0.053403 -8.768 < 2e-16 ***
## insulin 0.008033 0.003001 2.677 0.007603 **
## asa24_fibe 0.008122 0.002130 3.814 0.000148 ***
## n_visits 0.041051 0.028110 1.460 0.144609
## years_followed -0.026041 0.023513 -1.107 0.268447
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(age) 1.005 1.010 182.146 <2e-16 ***
## s(CrystIQ) 2.690 3.437 73.504 <2e-16 ***
## s(hba1c) 1.001 1.001 5.263 0.022 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.483 Deviance explained = 49%
## -REML = 843.55 Scale est. = 0.50727 n = 757
par(mfrow = c(2, 2))
plot(
best_model,
shade = TRUE,
pages = 1
)
par(mfrow = c(1, 1))
par(mfrow = c(2, 2))
gam.check(best_model)
##
## Method: REML Optimizer: outer newton
## full convergence after 8 iterations.
## Gradient range [-0.0002795111,0.001470631]
## (score 843.5461 & scale 0.5072726).
## Hessian positive definite, eigenvalue range [9.771001e-05,374.0004].
## Model rank = 33 / 33
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(age) 9.00 1.00 1.04 0.80
## s(CrystIQ) 9.00 2.69 0.97 0.21
## s(hba1c) 9.00 1.00 0.98 0.30
par(mfrow = c(1, 1))
library(ggplot2)
library(patchwork)
# AGE
p_age <-
ggplot(
retention_master,
aes(
x = age,
y = Memory
)
) +
geom_point(alpha = 0.25) +
geom_smooth(
method = "gam",
formula = y ~ s(x),
se = TRUE
) +
labs(
title = "Age",
x = "Age",
y = "Memory"
) +
theme_minimal()
# CRYSTIQ
p_cryst <-
ggplot(
retention_master,
aes(
x = CrystIQ,
y = Memory
)
) +
geom_point(alpha = 0.25) +
geom_smooth(
method = "gam",
formula = y ~ s(x),
se = TRUE
) +
labs(
title = "Crystallized IQ",
x = "Crystallized IQ",
y = "Memory"
) +
theme_minimal()
# HBA1C
p_hba1c <-
ggplot(
retention_master,
aes(
x = hba1c,
y = Memory
)
) +
geom_point(alpha = 0.25) +
geom_smooth(
method = "gam",
formula = y ~ s(x),
se = TRUE
) +
labs(
title = "HbA1c",
x = "HbA1c",
y = "Memory"
) +
theme_minimal()
# INSULIN
p_insulin <-
ggplot(
retention_master,
aes(
x = insulin,
y = Memory
)
) +
geom_point(alpha = 0.25) +
geom_smooth(
method = "gam",
formula = y ~ s(x),
se = TRUE
) +
labs(
title = "Insulin",
x = "Insulin",
y = "Memory"
) +
theme_minimal()
# FIBER
p_fiber <-
ggplot(
retention_master,
aes(
x = asa24_fibe,
y = Memory
)
) +
geom_point(alpha = 0.25) +
geom_smooth(
method = "gam",
formula = y ~ s(x),
se = TRUE
) +
labs(
title = "Dietary Fiber",
x = "Fiber Intake",
y = "Memory"
) +
theme_minimal()
# COMBINE ALL PLOTS
(p_age | p_cryst) /
(p_hba1c | p_insulin) /
p_fiber
The generalized additive model (GAM) demonstrated good statistical performance and adequate model diagnostics, with successful convergence, stable optimization, and no evidence that spline complexity was insufficient. The spline basis checks suggested that the selected smoothness parameters appropriately captured nonlinear structure without overfitting.
Age showed a strong negative association with memory performance, with the estimated degrees of freedom (edf ≈ 1) indicating an approximately linear decline in memory with increasing age. Crystallized IQ demonstrated a significant nonlinear relationship with memory (edf ≈ 2.7), suggesting that the protective cognitive effects of crystallized intelligence vary across its range rather than following a simple linear trend. HbA1c also showed a significant association with memory, with higher levels linked to poorer cognitive performance, supporting a relationship between glucose dysregulation and cognition.
Among the linear predictors, male sex was associated with lower memory scores, while higher insulin levels and greater dietary fiber intake were associated with better memory performance after adjustment for demographic and metabolic covariates. Number of study visits and years followed were not statistically significant but were retained to partially account for longitudinal retention effects.
Overall, the model explained approximately 49% of the variance in memory scores (adjusted R² = 0.483), indicating substantial explanatory power for observational cognitive aging data. These findings support the broader AABC study hypothesis that modifiable metabolic and dietary factors may contribute to cognitive aging trajectories and resilience in brain health.
“Is there a relationship between changes in diet and changes in cognition over time?”
library(dplyr)
library(stringr)
library(lme4)
library(lmerTest)
# ---------------------------------
# Build longitudinal merged dataset
# ---------------------------------
longitudinal_df <-
all |>
filter(age_open != "90 or older") |>
mutate(
patient =
str_remove(id, "^[A-Z]+"),
age =
as.numeric(age_open),
Memory =
as.numeric(Memory_Tr35_60y)
) |>
dplyr::select(
patient,
id_event,
event,
study,
age,
sex,
Memory
) |>
left_join(
blood |>
mutate(
patient =
str_remove(id, "^[A-Z]+")
) |>
dplyr::select(
patient,
id_event,
hba1c,
insulin,
glucose
),
by = c("patient", "id_event")
) |>
left_join(
diet |>
mutate(
patient =
str_remove(id, "^[A-Z]+")
) |>
dplyr::select(
patient,
id_event,
asa24_fibe,
asa24_sugr,
asa24_kcal
),
by = c("patient", "id_event")
)
# ---------------------------------
# Keep rows with key variables
# ---------------------------------
model_df <-
longitudinal_df |>
filter(
!is.na(Memory),
!is.na(age),
!is.na(hba1c),
!is.na(insulin),
!is.na(asa24_fibe)
)
all |> mutate(
patient = str_remove(id, "^[A-Z]+")) |> distinct(patient) |> nrow()
## [1] 1396
longitudinal_df |> distinct(patient) |> nrow()
## [1] 1350
model_df |> distinct(patient) |> nrow()
## [1] 692
model_df$event |> unique()
## [1] V3 V2 V4 V1
## Levels: V1 < F1 < V2 < F2 < CR < V3 < F3 < V4 < AF1
# -----------------------------------------
# Flexible retention plot function
# Works for:
# all
# longitudinal_df
# model_df
# -----------------------------------------
library(dplyr)
library(stringr)
library(ggplot2)
library(grid)
make_retention_plot <- function(df, title_text) {
# -----------------------------------------
# Build age variable if needed
# -----------------------------------------
if(!"age_open" %in% colnames(df)) {
if("age" %in% colnames(df)) {
df <- df |>
mutate(
age_open = age
)
} else {
stop("No age or age_open column found.")
}
}
# -----------------------------------------
# Build patient variable if needed
# -----------------------------------------
if(!"patient" %in% colnames(df)) {
if("id" %in% colnames(df)) {
df <- df |>
mutate(
patient =
str_remove(id, "^[A-Z]+")
)
} else {
stop("No patient or id column found.")
}
}
# -----------------------------------------
# Event ordering
# -----------------------------------------
study_event_levels <-
c(
"HCA:V1","HCA:F1","HCA:V2","HCA:F2",
"HCA:CR","HCA:V3","HCA:F3","HCA:V4","HCA:AF1",
"AABC:V1","AABC:F1","AABC:V2","AABC:F2",
"AABC:CR","AABC:V3","AABC:F3","AABC:V4","AABC:AF1"
)
# -----------------------------------------
# Build longitudinal structure
# -----------------------------------------
retention_paths <-
df |>
filter(!is.na(age_open)) |>
mutate(
age_open =
suppressWarnings(as.numeric(age_open)),
study_event =
factor(
paste(study, event, sep = ":"),
levels = study_event_levels,
ordered = TRUE
)
) |>
arrange(
patient,
age_open
) |>
group_by(patient) |>
mutate(
visit_order =
row_number(),
next_visit =
lead(visit_order),
next_study_event =
lead(study_event)
) |>
ungroup()
# -----------------------------------------
# Node counts
# -----------------------------------------
node_counts <-
retention_paths |>
count(
visit_order,
study_event,
name = "n_patients"
)
# -----------------------------------------
# Edge counts
# -----------------------------------------
edge_counts <-
retention_paths |>
filter(!is.na(next_study_event)) |>
count(
visit_order,
next_visit,
study_event,
next_study_event,
name = "n_patients"
)
# -----------------------------------------
# Dropoff counts
# -----------------------------------------
dropoff_counts <-
retention_paths |>
group_by(patient) |>
summarise(
final_visit =
max(visit_order),
.groups = "drop"
) |>
count(final_visit)
# -----------------------------------------
# Plot
# -----------------------------------------
ggplot() +
geom_segment(
data = edge_counts,
aes(
x = visit_order,
xend = next_visit,
y = study_event,
yend = next_study_event,
linewidth = n_patients,
color = n_patients
),
alpha = 0.5,
arrow =
arrow(length = unit(0.10, "cm"))
) +
geom_point(
data = node_counts,
aes(
x = visit_order,
y = study_event,
size = n_patients
),
color = "black",
alpha = 0.85
) +
geom_text(
data = dropoff_counts,
aes(
x = final_visit,
y = 1,
label = paste0(
"dropoff=", n
)
),
size = 3,
vjust = -1
) +
scale_x_continuous(
breaks = 1:9
) +
scale_size(
range = c(2, 12)
) +
scale_linewidth(
range = c(0.3, 4)
) +
scale_color_gradient(
low = "lightblue",
high = "darkred"
) +
labs(
title = title_text,
subtitle =
"Node size = remaining participants | edge thickness = transitions",
x =
"Visit Order",
y =
"Study : Event",
size =
"Patients",
linewidth =
"Transitions",
color =
"Transitions"
) +
theme_minimal()
}
# -----------------------------------------
# Generate retention plots
# -----------------------------------------
retention_all <-
make_retention_plot(
all,
"Retention Plot: ALL dataset"
)
retention_longitudinal <-
make_retention_plot(
longitudinal_df,
"Retention Plot: longitudinal_df"
)
retention_model <-
make_retention_plot(
model_df,
"Retention Plot: model_df"
)
# -----------------------------------------
# Print plots
# -----------------------------------------
retention_all
retention_longitudinal
retention_model
# ---------------------------------
# Longitudinal mixed-effects model
# ---------------------------------
memory_lmm <-
lmer(
Memory ~
age +
sex +
hba1c +
insulin +
asa24_fibe +
(1 | patient),
data = model_df
)
summary(memory_lmm)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Memory ~ age + sex + hba1c + insulin + asa24_fibe + (1 | patient)
## Data: model_df
##
## REML criterion at convergence: 2057.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.36665 -0.31327 -0.00039 0.32416 2.16056
##
## Random effects:
## Groups Name Variance Std.Dev.
## patient (Intercept) 0.6950 0.8337
## Residual 0.1612 0.4015
## Number of obs: 794, groups: patient, 692
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2.035e+00 2.922e-01 7.867e+02 6.963 7.01e-12 ***
## age -3.187e-02 2.557e-03 7.169e+02 -12.463 < 2e-16 ***
## sexM -4.400e-01 7.167e-02 6.993e+02 -6.139 1.39e-09 ***
## hba1c -9.738e-02 4.558e-02 7.680e+02 -2.136 0.033 *
## insulin -2.633e-04 3.254e-03 6.960e+02 -0.081 0.936
## asa24_fibe 1.058e-02 2.392e-03 6.611e+02 4.424 1.13e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) age sexM hba1c insuln
## age -0.446
## sexM 0.003 -0.054
## hba1c -0.784 -0.142 -0.059
## insulin -0.029 0.117 0.011 -0.203
## asa24_fibe -0.239 0.066 -0.152 0.049 0.055
par(mfrow = c(2,2))
plot(memory_lmm)
qqnorm(residuals(memory_lmm))
qqline(residuals(memory_lmm))
hist(residuals(memory_lmm))
par(mfrow = c(1,1))
icc <-
0.695 / (0.695 + 0.161)
icc
## [1] 0.8119159
memory_lmm_no_corr <-
lmer(
Memory ~
age +
sex +
hba1c +
asa24_fibe +
(1 | patient),
data = model_df
)
summary(memory_lmm_no_corr)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Memory ~ age + sex + hba1c + asa24_fibe + (1 | patient)
## Data: model_df
##
## REML criterion at convergence: 2047.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.36946 -0.31390 0.00029 0.32360 2.16804
##
## Random effects:
## Groups Name Variance Std.Dev.
## patient (Intercept) 0.6944 0.8333
## Residual 0.1609 0.4011
## Number of obs: 794, groups: patient, 692
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2.034170 0.291900 787.831518 6.969 6.76e-12 ***
## age -0.031841 0.002538 712.160475 -12.547 < 2e-16 ***
## sexM -0.439949 0.071627 699.893234 -6.142 1.36e-09 ***
## hba1c -0.098134 0.044603 767.634375 -2.200 0.0281 *
## asa24_fibe 0.010591 0.002387 663.369856 4.437 1.07e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) age sexM hba1c
## age -0.446
## sexM 0.003 -0.056
## hba1c -0.807 -0.122 -0.058
## asa24_fibe -0.238 0.060 -0.153 0.061
par(mfrow = c(2,2))
plot(memory_lmm_no_corr)
qqnorm(residuals(memory_lmm_no_corr))
qqline(residuals(memory_lmm_no_corr))
hist(residuals(memory_lmm_no_corr))
par(mfrow = c(1,1))
anova(memory_lmm, memory_lmm_no_corr)
The likelihood-ratio test showed that adding insulin did not improve
the longitudinal mixed-effects model (p = 0.94), meaning
the model with insulin performed essentially identically to the simpler
model without it. Because the simpler model also had a slightly lower
AIC, it is preferred for interpretation and inference. This suggests
that insulin’s earlier association with Memory was likely driven by
between-person differences or confounding rather than a stable
within-person longitudinal relationship. In contrast, age, HbA1c, and
dietary fiber remained meaningful predictors after accounting for
repeated observations within participants, indicating that cognitive
aging, glucose regulation, and diet are the strongest longitudinal
signals associated with Memory performance in this cohort.
model_df <-
longitudinal_df |>
left_join(
baseline_features |>
dplyr::select(
patient,
CrystIQ
),
by = "patient"
) |>
filter(
!is.na(Memory),
!is.na(age),
!is.na(CrystIQ),
!is.na(hba1c),
!is.na(asa24_fibe)
)
library(mgcv)
memory_gamm <-
gamm(
Memory ~
sex +
s(age) +
s(CrystIQ) +
s(hba1c) +
s(asa24_fibe),
random =
list(
patient = ~1
),
data = model_df,
method = "REML"
)
summary(memory_gamm$gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Memory ~ sex + s(age) + s(CrystIQ) + s(hba1c) + s(asa24_fibe)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.31761 0.03991 -7.959 6.05e-15 ***
## sexM -0.45675 0.06190 -7.378 4.08e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(age) 3.735 3.735 52.561 <2e-16 ***
## s(CrystIQ) 2.576 2.576 84.693 <2e-16 ***
## s(hba1c) 1.000 1.000 3.168 0.0755 .
## s(asa24_fibe) 2.543 2.543 6.086 0.0027 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.442
## Scale est. = 0.15491 n = 799
plot(memory_gamm$gam, pages = 1)
gam.check(memory_gamm$gam)
##
## 'gamm' based fit - care required with interpretation.
## Checks based on working residuals may be misleading.
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(age) 9.00 3.74 1.04 0.92
## s(CrystIQ) 9.00 2.58 0.89 <2e-16 ***
## s(hba1c) 9.00 1.00 0.96 0.10 .
## s(asa24_fibe) 9.00 2.54 0.94 0.03 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
AIC(memory_lmm)
## [1] 2073.055
AIC(memory_gamm$lme)
## [1] 1875.152
library(gratia)
library(ggplot2)
draw(
memory_gamm$gam,
residuals = TRUE
)
memory_gamm_2d <-
gamm(
Memory ~
sex +
s(CrystIQ) +
te(age, hba1c) +
s(asa24_fibe),
random =
list(patient = ~1),
data = model_df,
method = "REML"
)
vis.gam(
memory_gamm_2d$gam,
view = c("age", "hba1c"),
plot.type = "contour",
color = "terrain"
)
vis.gam(
memory_gamm_2d$gam,
view = c("age", "hba1c"),
theta = 35,
phi = 30
)
library(mgcv)
library(plotly)
library(dplyr)
# prediction grid
surface_df <-
expand.grid(
age =
seq(
min(model_df$age),
max(model_df$age),
length.out = 60
),
hba1c =
seq(
min(model_df$hba1c),
max(model_df$hba1c),
length.out = 60
),
CrystIQ =
mean(model_df$CrystIQ, na.rm = TRUE),
asa24_fibe =
mean(model_df$asa24_fibe, na.rm = TRUE),
sex = "F"
)
# predicted values
surface_df$pred <-
predict(
memory_gamm_2d$gam,
newdata = surface_df
)
# reshape into matrix
z_matrix <-
matrix(
surface_df$pred,
nrow = 60,
ncol = 60
)
# interactive surface
plot_ly(
x =
unique(surface_df$age),
y =
unique(surface_df$hba1c),
z =
z_matrix,
type = "surface"
) |>
layout(
title =
"Interactive GAM Surface: Age × HbA1c on Memory",
scene = list(
xaxis =
list(title = "Age"),
yaxis =
list(title = "HbA1c"),
zaxis =
list(title = "Predicted Memory")
)
)
library(mgcv)
library(plotly)
library(dplyr)
# -----------------------------
# Prediction grid by sex
# -----------------------------
surface_df <-
expand.grid(
age =
seq(
min(model_df$age),
max(model_df$age),
length.out = 60
),
hba1c =
seq(
min(model_df$hba1c),
max(model_df$hba1c),
length.out = 60
),
sex = c("F", "M"),
CrystIQ =
mean(model_df$CrystIQ, na.rm = TRUE),
asa24_fibe =
mean(model_df$asa24_fibe, na.rm = TRUE)
)
# -----------------------------
# Predictions
# -----------------------------
surface_df$pred <-
predict(
memory_gamm_2d$gam,
newdata = surface_df
)
# -----------------------------
# Split surfaces
# -----------------------------
female_df <-
filter(surface_df, sex == "F")
male_df <-
filter(surface_df, sex == "M")
z_f <-
matrix(
female_df$pred,
nrow = 60,
ncol = 60
)
z_m <-
matrix(
male_df$pred,
nrow = 60,
ncol = 60
)
# -----------------------------
# Interactive subplot surfaces
# -----------------------------
p_f <-
plot_ly(
x = unique(female_df$age),
y = unique(female_df$hba1c),
z = z_f,
type = "surface"
) |>
layout(
title = "Females",
scene = list(
xaxis = list(title = "Age"),
yaxis = list(title = "HbA1c"),
zaxis = list(title = "Predicted Memory")
)
)
p_m <-
plot_ly(
x = unique(male_df$age),
y = unique(male_df$hba1c),
z = z_m,
type = "surface"
) |>
layout(
title = "Males",
scene = list(
xaxis = list(title = "Age"),
yaxis = list(title = "HbA1c"),
zaxis = list(title = "Predicted Memory")
)
)
subplot(
p_f,
p_m,
margin = 0.05
)
model_df <-
longitudinal_df |>
left_join(
baseline_features |>
dplyr::select(
patient,
CrystIQ
),
by = "patient"
) |>
left_join(
retention_df |>
dplyr::select(
patient,
n_visits,
years_followed,
completed_f3,
completed_v4
),
by = "patient"
) |>
filter(
!is.na(Memory),
!is.na(age),
!is.na(CrystIQ),
!is.na(hba1c),
!is.na(asa24_fibe)
)
memory_gamm_dropout <-
gamm(
Memory ~
sex +
s(age) +
s(CrystIQ) +
s(hba1c) +
s(asa24_fibe) +
n_visits +
years_followed +
completed_f3 +
completed_v4,
random =
list(
patient = ~1
),
data = model_df,
method = "REML"
)
summary(memory_gamm_dropout$gam)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Memory ~ sex + s(age) + s(CrystIQ) + s(hba1c) + s(asa24_fibe) +
## n_visits + years_followed + completed_f3 + completed_v4
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.55784 0.08846 -6.306 4.77e-10 ***
## sexM -0.44945 0.06217 -7.229 1.16e-12 ***
## n_visits 0.13772 0.03477 3.961 8.14e-05 ***
## years_followed -0.08107 0.02808 -2.887 0.0040 **
## completed_f3TRUE -0.09440 0.07721 -1.223 0.2218
## completed_v4TRUE -0.19848 0.10794 -1.839 0.0663 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(age) 3.812 3.812 55.064 < 2e-16 ***
## s(CrystIQ) 2.589 2.589 72.003 < 2e-16 ***
## s(hba1c) 1.000 1.000 2.611 0.10652
## s(asa24_fibe) 2.446 2.446 5.668 0.00405 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.453
## Scale est. = 0.15541 n = 799
# ---------------------------------
# Compare ALL cross-sectional
# and longitudinal models by AIC
# ---------------------------------
library(dplyr)
model_comparison <-
tibble(
model = c(
# Cross-sectional models
"Cross-sectional LM",
"Cross-sectional GAM",
"Cross-sectional GAM (log transformed)",
# Longitudinal models
"Longitudinal LMM",
"Longitudinal LMM (no insulin)",
"Longitudinal GAMM",
"Longitudinal GAMM + dropout"
),
AIC = c(
# Cross-sectional
AIC(step_model),
AIC(memory_gam),
AIC(memory_gam_log),
# Longitudinal
AIC(memory_lmm),
AIC(memory_lmm_no_corr),
AIC(memory_gamm$lme),
AIC(memory_gamm_dropout$lme)
)
) |>
arrange(AIC)
model_comparison
library(ggplot2)
ggplot(
model_comparison,
aes(
x = reorder(model, AIC),
y = AIC
)
) +
geom_col() +
coord_flip() +
labs(
title = "Model Comparison by AIC",
x = "",
y = "AIC (lower is better)"
) +
theme_minimal()
The AABC/HCA analyses focused on whether modifiable lifestyle and metabolic factors are associated with cognitive aging, particularly Memory performance. The strongest and most consistent findings across models were:
Older age was associated with lower Memory scores.
Higher HbA1c (poorer glucose regulation) was associated with worse cognition.
Higher dietary fiber intake was associated with better Memory performance.
Crystallized IQ showed a strong protective relationship with cognition.
Several relationships were nonlinear, meaning GAM/GAMM models captured the data substantially better than standard linear models.
Cross-sectional GAM models produced the best overall fit (AIC ≈ 1731), explaining nearly 49% of the variance in Memory scores. Longitudinal mixed-effects models confirmed that repeated measurements within individuals were highly correlated (ICC ≈ 0.81), supporting the importance of participant-specific longitudinal modeling.