ELSA dissertation
getwd()
file.exists("raw data/RAW_data_stata/wave_1_core_data_v3.dta")
w1_core <- read_dta("raw data/RAW_data_stata/wave_1_core_data_v3.dta")
w1_derived <- read_dta("raw data/RAW_data_stata/wave_1_ifs_derived_variables.dta")
w1 <- left_join(w1_core, w1_derived, by = "idauniq")#load waves 2 to 10
w2_core <- read_dta("raw data/RAW_data_stata/wave_2_core_data_v4.dta")
w3_core <- read_dta("raw data/RAW_data_stata/wave_3_elsa_data_v4.dta")
w4_core <- read_dta("raw data/RAW_data_stata/wave_4_elsa_data_v3.dta")
w5_core <- read_dta("raw data/RAW_data_stata/wave_5_elsa_data_v4.dta")
w6_core <- read_dta("raw data/RAW_data_stata/wave_6_elsa_data_v2.dta")
w7_core <- read_dta("raw data/RAW_data_stata/wave_7_elsa_data.dta")
w8_core <- read_dta("raw data/RAW_data_stata/wave_8_elsa_data_eul_v2.dta")
w9_core <- read_dta("raw data/RAW_data_stata/wave_9_elsa_data_eul_v2.dta")
w10_core <- read_dta("raw data/RAW_data_stata/wave_10_elsa_data_eul_v4.dta")Table 1 baseline characteristics NO PARTICIPANTS EXCLUDED
## [1] "dhdobyr" "dhager" "didob" "heage" "wprage" "indobyr.x"
## [7] "indager" "aagemab" "aagepab" "aageangi" "aagehart" "aagestro"
## [13] "aagedi" "age" "age_p" "indobyr.y" "indobyr_p" "agebuhead"
## [19] "agebusp" "agehoh" "agehhch1" "agehhch2" "agehhch3" "agehhch4"
## [25] "agehhch5" "agehhch6" "agehhch7" "agehhch8" "agebuch1" "agebuch2"
## [31] "agebuch3" "agebuch4" "agebuch5" "agebuch6" "agebuch7" "agebuch8"
## [37] "chage1" "chage2" "chage3" "chage4" "chage5" "chage6"
## [43] "chage7" "chage8" "chage9" "chage10" "chage11" "chage12"
## [49] "chage13" "chage14" "chage15" "chage16" "ageg5" "ageg5_bu"
## [55] "ageg7" "ageg7_bu" "ageg10" "ageg10_bu" "ageg3" "ageg3_bu"
## [61] "ageg3_spa" "spage" "spage_bu" "agehhldr1" "agehhldr2" "agehhldr3"
## [67] "agehhldr4" "mothage" "magedied" "fathage" "fagedied"
## [1] "dhsex" "disex" "indsex" "asex" "sex" "sex_p"
## [7] "sexbuhead" "sexhoh" "chsex1" "chsex2" "chsex3" "chsex4"
## [13] "chsex5" "chsex6" "chsex7" "chsex8" "chsex9" "chsex10"
## [19] "chsex11" "chsex12" "chsex13" "chsex14" "chsex15" "chsex16"
## [1] "fqqual1" "fqqual2" "fqqual3" "edqual.x" "aqual" "aeducend"
## [7] "edqual.y" "qual2" "qual3" "qual2_p" "qual3_p"
## [1] "fqethnr" "aethnicr"
## [1] "dimar" "wpamar" "partner" "marstat"
## [1] "difjob" "wpjob" "wpjobl" "wpsjoby"
## [5] "wpsjobm" "wpcjob" "wphjob" "iawork"
## [9] "hojob" "aeverjob" "aemploye" "astwork"
## [13] "hhgriddhwork" "hhgriddhwork_p" "worktime" "everwork"
## [17] "exwork" "exworkb" "exwork55" "exwork55b"
## [21] "exwork60" "exwork60b" "exwork65" "exwork65b"
## [25] "difjobm"
## [1] "hecig" "smoker" "smokerstat"
## character(0)
## character(0)
## [1] "cesd_sc" "cesd_na"
## [1] "hedia01" "hedia02" "hedia03" "hedia04" "hedia05" "hedia06" "hedia07"
## [8] "hedia08" "hedia09" "hedia10"
## [1] "hedib01" "hedib02" "hedib03" "hedib04" "hedib05" "hedib06" "hedib07"
## [8] "hedib08" "hedib09" "hedib10"
## [1] "Age variable from HH grid collapsed at 90 plus"
##
## 20 30 31 32 33 34 35 36 37 38
## 1 2 1 1 6 3 4 6 8 15
## [1] "ASK OR CODE RESPONDENT~S SEX"
##
## 1 2
## 5335 6764
## [1] "(D) Highest Educational Qualification at ELSA W1"
##
## -9 -8 -1 1 2 3 4 5 6 7
## 6 11 18 1388 1333 764 1974 582 1015 5008
## [1] "ELSA ethnic group collapsed into White and Non-white to avoid disclosure"
##
## -9 -8 -1 1 2
## 12 2 6810 5111 164
## [1] "marital status - couple1 combined with dimar"
##
## 1 2 3 4 5 6
## 8035 504 575 1951 823 211
## [1] "smoker status (past or present)"
##
## -9 -8 -2 0 1 2 3 4
## 5 11 175 4286 674 4342 445 2161
## [1] "number of cesd questions answered yes"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.000 0.000 1.000 1.504 2.000 8.000
## [1] "number of cesd questions answered"
##
## -2 -1 0 3 5 6 7 8
## 136 175 67 2 5 15 87 11612
# alcohol - broader search
names(w1)[grepl("alco|drink|beer|wine|spirit|units", names(w1), ignore.case = TRUE)]## character(0)
# BMI / body size - broader search
names(w1)[grepl("bmi|body|mass|height|weight|wt|ht", names(w1), ignore.case = TRUE)]## [1] "wpwtx" "wpwtx2" "wpwtx3" "iashty1" "iashty2"
## [6] "iashty3" "iashty4" "hhtot" "refreshtype" "chtype1"
## [11] "chtype2" "chtype3" "chtype4" "chtype5" "chtype6"
## [16] "chtype7" "chtype8" "chtype9" "chtype10" "chtype11"
## [21] "chtype12" "chtype13" "chtype14" "chtype15" "chtype16"
## [26] "nright"
## [1] "Working full time or part time"
##
## -8 -1 1 2
## 118 7476 2741 1764
## [1] "HSE Feed Forward: Are you …{an employee or self-employed}"
##
## -1 1 2
## 563 10101 1435
## [1] "ever worked"
##
## -9 -8 -2 0 1
## 3 1 27 217 11851
## $label
## [1] "smoker status (past or present)"
##
## $format.stata
## [1] "%8.0g"
##
## $labels
## refused don't know not asked
## -9 -8 -2
## never smoked ex smoker - occasional ex smoker - regular
## 0 1 2
## ex smoker - DK freq current smoker
## 3 4
##
## $class
## [1] "haven_labelled" "vctrs_vctr" "double"
## $label
## [1] "(D) Highest Educational Qualification at ELSA W1"
##
## $format.stata
## [1] "%8.0g"
##
## $labels
## Refusal Don't know
## -9 -8
## Not applicable NVQ4/NVQ5/Degree or equiv
## -1 1
## Higher ed below degree NVQ3/GCE A Level equiv
## 2 3
## NVQ2/GCE O Level equiv NVQ1/CSE other grade equiv
## 4 5
## Foreign/other No qualification
## 6 7
##
## $class
## [1] "haven_labelled" "vctrs_vctr" "double"
## $label
## [1] "ELSA ethnic group collapsed into White and Non-white to avoid disclosure"
##
## $format.stata
## [1] "%8.0g"
##
## $labels
## Refusal Don't know Not applicable White Non-white
## -9 -8 -1 1 2
##
## $class
## [1] "haven_labelled" "vctrs_vctr" "double"
## $label
## [1] "marital status - couple1 combined with dimar"
##
## $format.stata
## [1] "%8.0g"
##
## $labels
## married (inc civ pship 06 onwards) cohabiting
## 1 2
## single, never married widowed
## 3 4
## divorced separated
## 5 6
##
## $class
## [1] "haven_labelled" "vctrs_vctr" "double"
# alcohol - wider search
names(w1)[grepl("drnk|drink|alc|wine|beer|spirit|unit|pub", names(w1), ignore.case = TRUE)]## [1] "healc"
# possible nurse / anthropometry style names for BMI
names(w1)[grepl("bm", names(w1), ignore.case = TRUE)]## [1] "wpsjobm" "iabm11" "iabm12" "iabm13" "iabm14" "iabm15" "iabm16"
## [8] "iabm17" "iabm18" "iabm19" "iabm20" "iabm31" "iabm32" "iabm33"
## [15] "iabm34" "iabm35" "iabm36" "iabm37" "iabm38" "iabm39" "iabm40"
## [22] "iabm48" "iabm49" "iabm50" "iabm51" "iabm52" "iabm53" "iabm54"
## [29] "iabm62" "iabm63" "iabm64" "iabm65" "iabm66" "iabm67" "iabm68"
## [36] "iadebm" "hobml" "hobmu" "hobme" "hobmr" "hohbm1" "hohbm2"
## [43] "hohbm3" "difjobm"
## [1] "iashty1" "iashty2" "iashty3" "iashty4" "hhtot"
## [6] "refreshtype" "chtype1" "chtype2" "chtype3" "chtype4"
## [11] "chtype5" "chtype6" "chtype7" "chtype8" "chtype9"
## [16] "chtype10" "chtype11" "chtype12" "chtype13" "chtype14"
## [21] "chtype15" "chtype16" "nright"
## [1] "wpwtx" "wpwtx2" "wpwtx3"
## [1] "Is this before or after tax?"
##
## -9 -8 -1 1 2
## 22 34 11235 172 636
## [1] "Is this before or after tax?"
##
## -9 -8 -1 1 2
## 3 2 12025 16 53
## [1] "Is this before or after tax?"
##
## -1 2
## 12092 7
## [1] "Do you now drink …? {a lot more..}"
##
## -8 -1 1 2 3 4
## 3 10442 41 349 582 682
## $label
## [1] "Do you now drink …? {a lot more..}"
##
## $format.stata
## [1] "%8.0g"
##
## $labels
## Refusal Don't Know Not applicable ... a lot more, a bit more,
## -9 -8 -1 1 2
## a bit less, or, a lot less?
## 3 4
##
## $class
## [1] "haven_labelled" "vctrs_vctr" "double"
# better search for height/weight/BMI
names(w1)[grepl("hei|highm|cm|metre|meter", names(w1), ignore.case = TRUE)]## [1] "heill" "heins" "heiqa" "heiqb" "heiqc"
## [6] "heiqd" "heiqe" "heiqf" "heiqg" "heiqh"
## [11] "heiqi" "heiqj" "heiqk" "heiql" "heiqm"
## [16] "heiqn" "heiqo" "heiqp" "heiqq" "heinct"
## [21] "wplrcm" "wplrcm2" "hoincm1" "hoincm2" "hoincm3"
## [26] "hoincm4" "horpcm" "gaselecmeth" "elecmeth" "rentincme"
## character(0)
## character(0)
# create disease indicator variables for Table 1
w1 <- w1 %>%
mutate(
htn_w1 = if_else(
hedia01 == 1 | hedia02 == 1 | hedia03 == 1 | hedia04 == 1 | hedia05 == 1 |
hedia06 == 1 | hedia07 == 1 | hedia08 == 1 | hedia09 == 1 | hedia10 == 1,
1, 0
),
hf_w1 = if_else(
hedia01 == 4 | hedia02 == 4 | hedia03 == 4 | hedia04 == 4 | hedia05 == 4 |
hedia06 == 4 | hedia07 == 4 | hedia08 == 4 | hedia09 == 4 | hedia10 == 4,
1, 0
),
arrhythmia_w1 = if_else(
hedia01 == 6 | hedia02 == 6 | hedia03 == 6 | hedia04 == 6 | hedia05 == 6 |
hedia06 == 6 | hedia07 == 6 | hedia08 == 6 | hedia09 == 6 | hedia10 == 6,
1, 0
),
diabetes_w1 = if_else(
hedia01 == 7 | hedia02 == 7 | hedia03 == 7 | hedia04 == 7 | hedia05 == 7 |
hedia06 == 7 | hedia07 == 7 | hedia08 == 7 | hedia09 == 7 | hedia10 == 7,
1, 0
),
stroke_w1 = if_else(
hedia01 == 8 | hedia02 == 8 | hedia03 == 8 | hedia04 == 8 | hedia05 == 8 |
hedia06 == 8 | hedia07 == 8 | hedia08 == 8 | hedia09 == 8 | hedia10 == 8,
1, 0
),
parkinsons_w1 = if_else(
hedib01 == 6 | hedib02 == 6 | hedib03 == 6 | hedib04 == 6 | hedib05 == 6 |
hedib06 == 6 | hedib07 == 6 | hedib08 == 6 | hedib09 == 6 | hedib10 == 6,
1, 0
),
alz_w1 = if_else(
hedib01 == 8 | hedib02 == 8 | hedib03 == 8 | hedib04 == 8 | hedib05 == 8 |
hedib06 == 8 | hedib07 == 8 | hedib08 == 8 | hedib09 == 8 | hedib10 == 8,
1, 0
),
dementia_w1 = if_else(
hedib01 == 9 | hedib02 == 9 | hedib03 == 9 | hedib04 == 9 | hedib05 == 9 |
hedib06 == 9 | hedib07 == 9 | hedib08 == 9 | hedib09 == 9 | hedib10 == 9,
1, 0
)
)
# check the new disease variables
table(w1$htn_w1, useNA = "ifany")##
## 0 1
## 7627 4472
##
## 0 1
## 12017 82
##
## 0 1
## 11367 732
##
## 0 1
## 11233 866
##
## 0 1
## 11588 511
##
## 0 1
## 12043 56
##
## 0 1
## 12085 14
##
## 0 1
## 12035 64
# keep a clean draft Table 1 dataset - no exclusions
table1_w1 <- w1 %>%
transmute(
idauniq,
age = dhager,
sex = as_factor(dhsex),
education = as_factor(edqual.x),
ethnicity = as_factor(fqethnr),
marital_status = as_factor(marstat),
employment = as_factor(worktime),
smoking = as_factor(smokerstat),
depression_score = cesd_sc,
hypertension = htn_w1,
heart_failure = hf_w1,
abnormal_heart_rhythm = arrhythmia_w1,
diabetes = diabetes_w1,
stroke = stroke_w1,
baseline_parkinsons = parkinsons_w1,
baseline_alzheimers = alz_w1,
baseline_dementia = dementia_w1
)
# quick checks
dim(table1_w1)## [1] 12099 17
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 20.00 55.00 63.00 64.19 72.00 99.00
##
## Refusal Don't Know Not applicable Male Female
## 0 0 0 5335 6764
##
## Refusal Don't know
## 6 11
## Not applicable NVQ4/NVQ5/Degree or equiv
## 18 1388
## Higher ed below degree NVQ3/GCE A Level equiv
## 1333 764
## NVQ2/GCE O Level equiv NVQ1/CSE other grade equiv
## 1974 582
## Foreign/other No qualification
## 1015 5008
##
## Refusal Don't know Not applicable White Non-white
## 12 2 6810 5111 164
##
## married (inc civ pship 06 onwards) cohabiting
## 8035 504
## single, never married widowed
## 575 1951
## divorced separated
## 823 211
##
## unknown Not working Full time (>=35) Part time
## 118 7476 2741 1764
##
## refused don't know not asked
## 5 11 175
## never smoked ex smoker - occasional ex smoker - regular
## 4286 674 4342
## ex smoker - DK freq current smoker
## 445 2161
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.000 0.000 1.000 1.504 2.000 8.000
##
## 0 1
## 7627 4472
##
## 0 1
## 12017 82
##
## 0 1
## 11367 732
##
## 0 1
## 11233 866
##
## 0 1
## 11588 511
##
## 0 1
## 12085 14
##
## 0 1
## 12035 64
w1 <- w1 %>%
mutate(
w1_palevel = case_when(
heacta %in% c(1, 2) ~ "High",
!heacta %in% c(1, 2) & heactb %in% c(1, 2) ~ "Moderate",
!heacta %in% c(1, 2) & !heactb %in% c(1, 2) & heactc %in% c(1, 2) ~ "Low",
heacta %in% c(3, 4) & heactb %in% c(3, 4) & heactc %in% c(3, 4) ~ "Sedentary",
TRUE ~ NA_character_
)
)
table(w1$w1_palevel, useNA = "ifany")##
## High Low Moderate Sedentary <NA>
## 3302 1756 5607 1240 194
# WAVE 1 IQCODE CREATION
iqcode_vars <- c(
"heiqa",
"heiqb",
"heiqc",
"heiqd",
"heiqe",
"heiqf",
"heiqg",
"heiqh",
"heiqi",
"heiqj",
"heiqk",
"heiql",
"heiqm",
"heiqn",
"heiqo",
"heiqp"
)
w1 <- w1 %>%
mutate(
across(all_of(iqcode_vars), ~if_else(.x < 0, NA_real_, as.numeric(.x)))
) %>%
rowwise() %>%
mutate(
iqcode_n_answered = sum(!is.na(c_across(all_of(iqcode_vars)))),
iqcode_mean = if_else(
iqcode_n_answered >= 12,
mean(c_across(all_of(iqcode_vars)), na.rm = TRUE),
NA_real_
)
) %>%
ungroup() %>%
mutate(
baseline_pathological_cognitive_decline = if_else(
!is.na(iqcode_mean) & iqcode_mean >= 3.38,
1, 0,
missing = NA_real_
)
)
summary(w1$iqcode_mean)## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.000 2.875 3.000 3.261 3.606 4.750 11927
##
## 0 1
## 12047 52
##
## 0 1 6 8 14 15 16
## 11924 1 1 1 5 4 163
# add physical activity group to the draft Table 1 dataset
table1_w1 <- w1 %>%
transmute(
idauniq,
pa_level = w1_palevel,
age = dhager,
sex = as_factor(dhsex),
education = as_factor(edqual.x),
ethnicity = as_factor(fqethnr),
marital_status = as_factor(marstat),
employment = as_factor(worktime),
smoking = as_factor(smokerstat),
depression_score = cesd_sc,
hypertension = htn_w1,
heart_failure = hf_w1,
abnormal_heart_rhythm = arrhythmia_w1,
diabetes = diabetes_w1,
stroke = stroke_w1,
baseline_alzheimers = alz_w1,
baseline_dementia = dementia_w1,
baseline_parkinsons = parkinsons_w1,
iqcode_mean = iqcode_mean,
baseline_pathological_cognitive_decline = baseline_pathological_cognitive_decline
)
# check PA distribution with no exclusions
table(table1_w1$pa_level, useNA = "ifany")##
## High Low Moderate Sedentary <NA>
## 3302 1756 5607 1240 194
##
## 0 1
## 12043 56
# mean age by PA level
table1_w1 %>%
group_by(pa_level) %>%
summarise(
n = n(),
mean_age = mean(age, na.rm = TRUE),
sd_age = sd(age, na.rm = TRUE)
)## # A tibble: 5 × 4
## pa_level n mean_age sd_age
## <chr> <int> <dbl> <dbl>
## 1 High 3302 60.6 9.22
## 2 Low 1756 67.4 12.0
## 3 Moderate 5607 63.6 10.4
## 4 Sedentary 1240 70.8 12.2
## 5 <NA> 194 69.3 15.0
##
## Refusal Don't Know Not applicable Male Female
## High 0 0 0 1593 1709
## Low 0 0 0 552 1204
## Moderate 0 0 0 2507 3100
## Sedentary 0 0 0 590 650
## <NA> 0 0 0 93 101
##
## refused don't know not asked never smoked ex smoker - occasional
## High 0 0 0 1309 194
## Low 0 1 0 587 89
## Moderate 0 1 0 1991 343
## Sedentary 0 0 0 395 48
## <NA> 5 9 175 4 0
##
## ex smoker - regular ex smoker - DK freq current smoker
## High 1200 147 452
## Low 621 62 396
## Moderate 2022 182 1068
## Sedentary 499 54 244
## <NA> 0 0 1
##
## 0 1
## High 2338 964
## Low 960 796
## Moderate 3533 2074
## Sedentary 662 578
## <NA> 134 60
##
## 0 1
## High 3172 130
## Low 1572 184
## Moderate 5240 367
## Sedentary 1074 166
## <NA> 175 19
##
## 0 1
## High 3247 55
## Low 1655 101
## Moderate 5434 173
## Sedentary 1088 152
## <NA> 164 30
##
## 0 1
## High 3142 160
## Low 1634 122
## Moderate 5286 321
## Sedentary 1128 112
## <NA> 177 17
##
## 0 1
## High 3296 6
## Low 1730 26
## Moderate 5587 20
## Sedentary 1215 25
## <NA> 189 5
##
## 0 1
## High 3302 0
## Low 1756 0
## Moderate 5606 1
## Sedentary 1233 7
## <NA> 188 6
##
## 0 1
## High 3297 5
## Low 1748 8
## Moderate 5595 12
## Sedentary 1221 19
## <NA> 174 20
table1_w1_clean <- table1_w1 %>%
mutate(
pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
sex = fct_drop(sex),
smoking3 = case_when(
smoking == "never smoked" ~ "Never",
smoking %in% c("ex smoker - occasional", "ex smoker - regular", "ex smoker - DK freq") ~ "Ex-smoker",
smoking == "current smoker" ~ "Current",
TRUE ~ NA_character_
),
ethnicity2 = case_when(
ethnicity == "White" ~ "White",
ethnicity == "Non-white" ~ "Non-white",
TRUE ~ NA_character_
),
depression_score = if_else(depression_score < 0, NA_real_, as.numeric(depression_score))
)
# check cleaned variables
table(table1_w1_clean$pa_level, useNA = "ifany")##
## High Moderate Low Sedentary <NA>
## 3302 5607 1756 1240 194
##
## Current Ex-smoker Never <NA>
## 2161 5461 4286 191
##
## Non-white White <NA>
## 164 5111 6824
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 0.000 1.000 1.582 2.000 8.000 311
table1_summary <- table1_w1_clean %>%
mutate(
pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
) %>%
group_by(pa_level, .drop = FALSE) %>%
summarise(
N = n(),
`Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
`Female, n (%)` = sprintf("%d (%.1f%%)",
sum(sex == "Female", na.rm = TRUE),
100 * sum(sex == "Female", na.rm = TRUE) / n()),
`No qualification, n (%)` = sprintf("%d (%.1f%%)",
sum(education == "No qualification", na.rm = TRUE),
100 * sum(education == "No qualification", na.rm = TRUE) / n()),
`Married/cohabiting, n (%)` = sprintf("%d (%.1f%%)",
sum(marital_status %in% c("married (inc civ pship 06 onwards)", "cohabiting"), na.rm = TRUE),
100 * sum(marital_status %in% c("married (inc civ pship 06 onwards)", "cohabiting"), na.rm = TRUE) / n()),
`Working full/part time, n (%)` = sprintf("%d (%.1f%%)",
sum(employment %in% c("Full time (>=35)", "Part time"), na.rm = TRUE),
100 * sum(employment %in% c("Full time (>=35)", "Part time"), na.rm = TRUE) / n()),
`Current smoker, n (%)` = sprintf("%d (%.1f%%)",
sum(smoking3 == "Current", na.rm = TRUE),
100 * sum(smoking3 == "Current", na.rm = TRUE) / n()),
`Depression score, mean (SD)` = sprintf("%.2f (%.2f)",
mean(depression_score, na.rm = TRUE),
sd(depression_score, na.rm = TRUE)),
`Hypertension, n (%)` = sprintf("%d (%.1f%%)",
sum(hypertension == 1, na.rm = TRUE),
100 * sum(hypertension == 1, na.rm = TRUE) / n()),
`Diabetes, n (%)` = sprintf("%d (%.1f%%)",
sum(diabetes == 1, na.rm = TRUE),
100 * sum(diabetes == 1, na.rm = TRUE) / n()),
`Stroke, n (%)` = sprintf("%d (%.1f%%)",
sum(stroke == 1, na.rm = TRUE),
100 * sum(stroke == 1, na.rm = TRUE) / n()),
`Abnormal heart rhythm, n (%)` = sprintf("%d (%.1f%%)",
sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()),
`Heart failure, n (%)` = sprintf("%d (%.1f%%)",
sum(heart_failure == 1, na.rm = TRUE),
100 * sum(heart_failure == 1, na.rm = TRUE) / n()),
`Baseline Parkinson's disease, n (%)` = sprintf("%d (%.1f%%)",
sum(baseline_parkinsons == 1, na.rm = TRUE),
100 * sum(baseline_parkinsons == 1, na.rm = TRUE) / n()),
`Baseline Alzheimer’s, n (%)` = sprintf("%d (%.1f%%)",
sum(baseline_alzheimers == 1, na.rm = TRUE),
100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()),
`Baseline dementia, n (%)` = sprintf("%d (%.1f%%)",
sum(baseline_dementia == 1, na.rm = TRUE),
100 * sum(baseline_dementia == 1, na.rm = TRUE) / n())
)
knitr::kable(
table1_summary,
caption = "Draft Table 1. Baseline characteristics by physical activity group, wave 1."
)| pa_level | N | Age, mean (SD) | Female, n (%) | No qualification, n (%) | Married/cohabiting, n (%) | Working full/part time, n (%) | Current smoker, n (%) | Depression score, mean (SD) | Hypertension, n (%) | Diabetes, n (%) | Stroke, n (%) | Abnormal heart rhythm, n (%) | Heart failure, n (%) | Baseline Parkinson’s disease, n (%) | Baseline Alzheimer’s, n (%) | Baseline dementia, n (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| High | 3302 | 60.6 (9.2) | 1709 (51.8%) | 935 (28.3%) | 2582 (78.2%) | 1753 (53.1%) | 452 (13.7%) | 1.06 (1.61) | 964 (29.2%) | 130 (3.9%) | 55 (1.7%) | 160 (4.8%) | 6 (0.2%) | 10 (0.3%) | 0 (0.0%) | 5 (0.2%) |
| Moderate | 5607 | 63.6 (10.4) | 3100 (55.3%) | 2159 (38.5%) | 4067 (72.5%) | 2200 (39.2%) | 1068 (19.0%) | 1.41 (1.88) | 2074 (37.0%) | 367 (6.5%) | 173 (3.1%) | 321 (5.7%) | 20 (0.4%) | 15 (0.3%) | 1 (0.0%) | 12 (0.2%) |
| Low | 1756 | 67.4 (12.0) | 1204 (68.6%) | 1051 (59.9%) | 1049 (59.7%) | 374 (21.3%) | 396 (22.6%) | 2.32 (2.22) | 796 (45.3%) | 184 (10.5%) | 101 (5.8%) | 122 (6.9%) | 26 (1.5%) | 15 (0.9%) | 0 (0.0%) | 8 (0.5%) |
| Sedentary | 1240 | 70.8 (12.2) | 650 (52.4%) | 766 (61.8%) | 705 (56.9%) | 176 (14.2%) | 244 (19.7%) | 2.73 (2.32) | 578 (46.6%) | 166 (13.4%) | 152 (12.3%) | 112 (9.0%) | 25 (2.0%) | 12 (1.0%) | 7 (0.6%) | 19 (1.5%) |
| Missing PA | 194 | 69.3 (15.0) | 101 (52.1%) | 97 (50.0%) | 136 (70.1%) | 2 (1.0%) | 1 (0.5%) | 0.25 (0.62) | 60 (30.9%) | 19 (9.8%) | 30 (15.5%) | 17 (8.8%) | 5 (2.6%) | 4 (2.1%) | 6 (3.1%) | 20 (10.3%) |
table1_summary_final <- table1_summary %>%
mutate(N = as.integer(N))
knitr::kable(
table1_summary_final,
caption = "Table 1. Baseline characteristics by physical activity group at wave 1. No participants were excluded at this stage."
)| pa_level | N | Age, mean (SD) | Female, n (%) | No qualification, n (%) | Married/cohabiting, n (%) | Working full/part time, n (%) | Current smoker, n (%) | Depression score, mean (SD) | Hypertension, n (%) | Diabetes, n (%) | Stroke, n (%) | Abnormal heart rhythm, n (%) | Heart failure, n (%) | Baseline Parkinson’s disease, n (%) | Baseline Alzheimer’s, n (%) | Baseline dementia, n (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| High | 3302 | 60.6 (9.2) | 1709 (51.8%) | 935 (28.3%) | 2582 (78.2%) | 1753 (53.1%) | 452 (13.7%) | 1.06 (1.61) | 964 (29.2%) | 130 (3.9%) | 55 (1.7%) | 160 (4.8%) | 6 (0.2%) | 10 (0.3%) | 0 (0.0%) | 5 (0.2%) |
| Moderate | 5607 | 63.6 (10.4) | 3100 (55.3%) | 2159 (38.5%) | 4067 (72.5%) | 2200 (39.2%) | 1068 (19.0%) | 1.41 (1.88) | 2074 (37.0%) | 367 (6.5%) | 173 (3.1%) | 321 (5.7%) | 20 (0.4%) | 15 (0.3%) | 1 (0.0%) | 12 (0.2%) |
| Low | 1756 | 67.4 (12.0) | 1204 (68.6%) | 1051 (59.9%) | 1049 (59.7%) | 374 (21.3%) | 396 (22.6%) | 2.32 (2.22) | 796 (45.3%) | 184 (10.5%) | 101 (5.8%) | 122 (6.9%) | 26 (1.5%) | 15 (0.9%) | 0 (0.0%) | 8 (0.5%) |
| Sedentary | 1240 | 70.8 (12.2) | 650 (52.4%) | 766 (61.8%) | 705 (56.9%) | 176 (14.2%) | 244 (19.7%) | 2.73 (2.32) | 578 (46.6%) | 166 (13.4%) | 152 (12.3%) | 112 (9.0%) | 25 (2.0%) | 12 (1.0%) | 7 (0.6%) | 19 (1.5%) |
| Missing PA | 194 | 69.3 (15.0) | 101 (52.1%) | 97 (50.0%) | 136 (70.1%) | 2 (1.0%) | 1 (0.5%) | 0.25 (0.62) | 60 (30.9%) | 19 (9.8%) | 30 (15.5%) | 17 (8.8%) | 5 (2.6%) | 4 (2.1%) | 6 (3.1%) | 20 (10.3%) |
table1_summary_wide <- table1_summary_final %>%
mutate(N = as.character(N)) %>%
pivot_longer(
cols = -pa_level,
names_to = "Characteristic",
values_to = "Value"
) %>%
pivot_wider(
names_from = pa_level,
values_from = Value
) %>%
select(
Characteristic,
High,
Moderate,
Low,
Sedentary,
`Missing PA`
)
knitr::kable(
table1_summary_wide,
caption = "Table 1. Baseline characteristics at wave 1 by physical activity category. No participants were excluded at this stage."
)| Characteristic | High | Moderate | Low | Sedentary | Missing PA |
|---|---|---|---|---|---|
| N | 3302 | 5607 | 1756 | 1240 | 194 |
| Age, mean (SD) | 60.6 (9.2) | 63.6 (10.4) | 67.4 (12.0) | 70.8 (12.2) | 69.3 (15.0) |
| Female, n (%) | 1709 (51.8%) | 3100 (55.3%) | 1204 (68.6%) | 650 (52.4%) | 101 (52.1%) |
| No qualification, n (%) | 935 (28.3%) | 2159 (38.5%) | 1051 (59.9%) | 766 (61.8%) | 97 (50.0%) |
| Married/cohabiting, n (%) | 2582 (78.2%) | 4067 (72.5%) | 1049 (59.7%) | 705 (56.9%) | 136 (70.1%) |
| Working full/part time, n (%) | 1753 (53.1%) | 2200 (39.2%) | 374 (21.3%) | 176 (14.2%) | 2 (1.0%) |
| Current smoker, n (%) | 452 (13.7%) | 1068 (19.0%) | 396 (22.6%) | 244 (19.7%) | 1 (0.5%) |
| Depression score, mean (SD) | 1.06 (1.61) | 1.41 (1.88) | 2.32 (2.22) | 2.73 (2.32) | 0.25 (0.62) |
| Hypertension, n (%) | 964 (29.2%) | 2074 (37.0%) | 796 (45.3%) | 578 (46.6%) | 60 (30.9%) |
| Diabetes, n (%) | 130 (3.9%) | 367 (6.5%) | 184 (10.5%) | 166 (13.4%) | 19 (9.8%) |
| Stroke, n (%) | 55 (1.7%) | 173 (3.1%) | 101 (5.8%) | 152 (12.3%) | 30 (15.5%) |
| Abnormal heart rhythm, n (%) | 160 (4.8%) | 321 (5.7%) | 122 (6.9%) | 112 (9.0%) | 17 (8.8%) |
| Heart failure, n (%) | 6 (0.2%) | 20 (0.4%) | 26 (1.5%) | 25 (2.0%) | 5 (2.6%) |
| Baseline Parkinson’s disease, n (%) | 10 (0.3%) | 15 (0.3%) | 15 (0.9%) | 12 (1.0%) | 4 (2.1%) |
| Baseline Alzheimer’s, n (%) | 0 (0.0%) | 1 (0.0%) | 0 (0.0%) | 7 (0.6%) | 6 (3.1%) |
| Baseline dementia, n (%) | 5 (0.2%) | 12 (0.2%) | 8 (0.5%) | 19 (1.5%) | 20 (10.3%) |
#numbers of alzheimers and dementia in wave 1
w1 %>%
summarise(
alz_n = sum(alz_w1 == 1, na.rm = TRUE),
dem_n = sum(dementia_w1 == 1, na.rm = TRUE),
both_n = sum(alz_w1 == 1 & dementia_w1 == 1, na.rm = TRUE),
either_n = sum(alz_w1 == 1 | dementia_w1 == 1, na.rm = TRUE)
)## # A tibble: 1 × 4
## alz_n dem_n both_n either_n
## <int> <int> <int> <int>
## 1 14 64 3 75
# people with both baseline Alzheimer's and baseline dementia wave1
both_alz_dem <- table1_w1_clean %>%
filter(baseline_alzheimers == 1, baseline_dementia == 1)
# how many participants are there with both alzheimer's and dementia at baseline wave 1?
nrow(both_alz_dem)## [1] 3
#what category of PA level are the participants who are categorised as both alzheimer's and dementia in?
both_alz_dem %>%
count(pa_level, .drop = FALSE)## # A tibble: 5 × 2
## pa_level n
## <fct> <int>
## 1 High 0
## 2 Moderate 0
## 3 Low 0
## 4 Sedentary 2
## 5 <NA> 1
# which IDs are the participants with both alzheimers and dementa at baseline wave 1?
both_alz_dem %>%
select(idauniq, pa_level, baseline_alzheimers, baseline_dementia)## # A tibble: 3 × 4
## idauniq pa_level baseline_alzheimers baseline_dementia
## <dbl> <fct> <dbl> <dbl>
## 1 106735 <NA> 1 1
## 2 108547 Sedentary 1 1
## 3 119099 Sedentary 1 1
#dementia follow up coding chunk
w2_dem <- w2_core %>%
transmute(
idauniq,
dem_w2 = if_else(
hedib01 == 9 | hedib02 == 9 | hedib03 == 9 | hedib04 == 9,
1, 0
)
)
w3_dem <- w3_core %>%
transmute(
idauniq,
dem_w3 = if_else(hedibde == 3, 1, 0)
)
w4_dem <- w4_core %>% transmute(idauniq, dem_w4 = if_else(hedibde == 1, 1, 0))
w5_dem <- w5_core %>% transmute(idauniq, dem_w5 = if_else(hedibde == 1, 1, 0))
w6_dem <- w6_core %>% transmute(idauniq, dem_w6 = if_else(hedibde == 1, 1, 0))
w7_dem <- w7_core %>% transmute(idauniq, dem_w7 = if_else(hedibde == 1, 1, 0))
w8_dem <- w8_core %>% transmute(idauniq, dem_w8 = if_else(hedibde == 1, 1, 0))
w9_dem <- w9_core %>% transmute(idauniq, dem_w9 = if_else(hedibde == 1, 1, 0))#follow up merge chunk
followup_w1 <- table1_w1_clean %>%
mutate(
baseline_alzheimers = as.numeric(baseline_alzheimers),
baseline_dementia = as.numeric(baseline_dementia),
baseline_parkinsons = as.numeric(baseline_parkinsons),
iqcode_mean = as.numeric(iqcode_mean),
baseline_pathological_cognitive_decline = as.numeric(baseline_pathological_cognitive_decline)
) %>%
left_join(w2_dem, by = "idauniq") %>%
left_join(w3_dem, by = "idauniq") %>%
left_join(w4_dem, by = "idauniq") %>%
left_join(w5_dem, by = "idauniq") %>%
left_join(w6_dem, by = "idauniq") %>%
left_join(w7_dem, by = "idauniq") %>%
left_join(w8_dem, by = "idauniq") %>%
left_join(w9_dem, by = "idauniq") %>%
mutate(
across(starts_with("dem_w"), ~replace_na(., 0))
)
dim(followup_w1)## [1] 12099 30
#number of wave 1 participants met iqcode pathological decline threshold
w1 %>%
count(baseline_pathological_cognitive_decline, useNA = "ifany")## # A tibble: 2 × 3
## baseline_pathological_cognitive_decline useNA n
## <dbl> <chr> <int>
## 1 0 ifany 12047
## 2 1 ifany 52
#post other exclusions are IQCODE path decline participants present
followup_w1 %>%
mutate(
prevalent_dem_alz_w1 = if_else(baseline_alzheimers == 1 | baseline_dementia == 1, 1, 0)
) %>%
filter(
prevalent_dem_alz_w1 == 0,
baseline_parkinsons == 0,
!is.na(pa_level)
) %>%
count(baseline_pathological_cognitive_decline, useNA = "ifany")## # A tibble: 1 × 3
## baseline_pathological_cognitive_decline useNA n
## <dbl> <chr> <int>
## 1 0 ifany 11805
#analysis dataset chunk
analysis_w1 <- followup_w1 %>%
mutate(
prevalent_dem_alz_w1 = if_else(baseline_alzheimers == 1 | baseline_dementia == 1, 1, 0)
) %>%
filter(
prevalent_dem_alz_w1 == 0,
baseline_parkinsons == 0,
baseline_pathological_cognitive_decline == 0,
!is.na(pa_level)
) %>%
mutate(
first_dem_wave = case_when(
dem_w2 == 1 ~ 2,
dem_w3 == 1 ~ 3,
dem_w4 == 1 ~ 4,
dem_w5 == 1 ~ 5,
dem_w6 == 1 ~ 6,
dem_w7 == 1 ~ 7,
dem_w8 == 1 ~ 8,
dem_w9 == 1 ~ 9,
TRUE ~ NA_real_
),
event_dementia = if_else(!is.na(first_dem_wave), 1, 0),
time_to_event_waves = if_else(event_dementia == 1, first_dem_wave - 1, 8),
pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
current_smoker = if_else(smoking3 == "Current", 1, 0, missing = NA_real_),
depression_binary = if_else(depression_score >= 4, 1, 0, missing = NA_real_)
)
dim(analysis_w1)## [1] 11805 36
##
## 0 1
## 11325 480
##
## High Moderate Low Sedentary
## 3287 5580 1733 1205
#cox model chunk
cox_unadjusted <- coxph(
Surv(time_to_event_waves, event_dementia) ~ pa_level,
data = analysis_w1
)
cox_adjusted <- coxph(
Surv(time_to_event_waves, event_dementia) ~ pa_level + age + sex +
current_smoker + hypertension + diabetes + stroke +
abnormal_heart_rhythm + heart_failure + depression_binary,
data = analysis_w1
)
#output chunk for document
summary(cox_unadjusted)## Call:
## coxph(formula = Surv(time_to_event_waves, event_dementia) ~ pa_level,
## data = analysis_w1)
##
## n= 11805, number of events= 480
##
## coef exp(coef) se(coef) z Pr(>|z|)
## pa_levelModerate 0.5470 1.7281 0.1288 4.246 2.18e-05 ***
## pa_levelLow 0.8084 2.2444 0.1516 5.332 9.69e-08 ***
## pa_levelSedentary 0.8857 2.4246 0.1632 5.427 5.72e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## pa_levelModerate 1.728 0.5787 1.342 2.225
## pa_levelLow 2.244 0.4456 1.667 3.021
## pa_levelSedentary 2.425 0.4124 1.761 3.339
##
## Concordance= 0.579 (se = 0.012 )
## Likelihood ratio test= 41.82 on 3 df, p=4e-09
## Wald test = 38.66 on 3 df, p=2e-08
## Score (logrank) test = 40.21 on 3 df, p=1e-08
## Call:
## coxph(formula = Surv(time_to_event_waves, event_dementia) ~ pa_level +
## age + sex + current_smoker + hypertension + diabetes + stroke +
## abnormal_heart_rhythm + heart_failure + depression_binary,
## data = analysis_w1)
##
## n= 11679, number of events= 476
## (126 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## pa_levelModerate 0.288026 1.333792 0.131591 2.189 0.0286 *
## pa_levelLow 0.204592 1.227024 0.161119 1.270 0.2041
## pa_levelSedentary 0.020806 1.021024 0.178992 0.116 0.9075
## age 0.061431 1.063357 0.004261 14.418 <2e-16 ***
## sexFemale 0.208323 1.231611 0.096090 2.168 0.0302 *
## current_smoker -0.117040 0.889550 0.140726 -0.832 0.4056
## hypertension 0.085720 1.089501 0.094001 0.912 0.3618
## diabetes 0.160271 1.173829 0.157535 1.017 0.3090
## stroke 0.218415 1.244104 0.178580 1.223 0.2213
## abnormal_heart_rhythm 0.329790 1.390676 0.155400 2.122 0.0338 *
## heart_failure -0.696122 0.498515 0.582639 -1.195 0.2322
## depression_binary 0.258923 1.295534 0.113208 2.287 0.0222 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## pa_levelModerate 1.3338 0.7497 1.0306 1.726
## pa_levelLow 1.2270 0.8150 0.8948 1.683
## pa_levelSedentary 1.0210 0.9794 0.7189 1.450
## age 1.0634 0.9404 1.0545 1.072
## sexFemale 1.2316 0.8119 1.0202 1.487
## current_smoker 0.8895 1.1242 0.6751 1.172
## hypertension 1.0895 0.9179 0.9062 1.310
## diabetes 1.1738 0.8519 0.8620 1.598
## stroke 1.2441 0.8038 0.8767 1.765
## abnormal_heart_rhythm 1.3907 0.7191 1.0255 1.886
## heart_failure 0.4985 2.0060 0.1591 1.562
## depression_binary 1.2955 0.7719 1.0377 1.617
##
## Concordance= 0.74 (se = 0.01 )
## Likelihood ratio test= 298.4 on 12 df, p=<2e-16
## Wald test = 306.5 on 12 df, p=<2e-16
## Score (logrank) test = 326.3 on 12 df, p=<2e-16
Sensitivity analysis NO IQCODE
analysis_w1_no_iqcode <- followup_w1 %>%
mutate(
prevalent_dem_alz_w1 = if_else(baseline_alzheimers == 1 | baseline_dementia == 1, 1, 0)
) %>%
filter(
prevalent_dem_alz_w1 == 0,
baseline_parkinsons == 0,
!is.na(pa_level)
) %>%
mutate(
first_dem_wave = case_when(
dem_w2 == 1 ~ 2,
dem_w3 == 1 ~ 3,
dem_w4 == 1 ~ 4,
dem_w5 == 1 ~ 5,
dem_w6 == 1 ~ 6,
dem_w7 == 1 ~ 7,
dem_w8 == 1 ~ 8,
dem_w9 == 1 ~ 9,
TRUE ~ NA_real_
),
event_dementia = if_else(!is.na(first_dem_wave), 1, 0),
time_to_event_waves = if_else(event_dementia == 1, first_dem_wave - 1, 8),
pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
current_smoker = if_else(smoking3 == "Current", 1, 0, missing = NA_real_),
depression_binary = if_else(depression_score >= 4, 1, 0, missing = NA_real_)
)
dim(analysis_w1_no_iqcode)## [1] 11805 36
##
## 0 1
## 11325 480
##
## High Moderate Low Sedentary
## 3287 5580 1733 1205
2. Cox models: NO IQCODE exclusion
cox_unadjusted_no_iqcode <- coxph(
Surv(time_to_event_waves, event_dementia) ~ pa_level,
data = analysis_w1_no_iqcode
)
cox_adjusted_no_iqcode <- coxph(
Surv(time_to_event_waves, event_dementia) ~ pa_level + age + sex +
current_smoker + hypertension + diabetes + stroke +
abnormal_heart_rhythm + heart_failure + depression_binary,
data = analysis_w1_no_iqcode
)
summary(cox_unadjusted_no_iqcode)## Call:
## coxph(formula = Surv(time_to_event_waves, event_dementia) ~ pa_level,
## data = analysis_w1_no_iqcode)
##
## n= 11805, number of events= 480
##
## coef exp(coef) se(coef) z Pr(>|z|)
## pa_levelModerate 0.5470 1.7281 0.1288 4.246 2.18e-05 ***
## pa_levelLow 0.8084 2.2444 0.1516 5.332 9.69e-08 ***
## pa_levelSedentary 0.8857 2.4246 0.1632 5.427 5.72e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## pa_levelModerate 1.728 0.5787 1.342 2.225
## pa_levelLow 2.244 0.4456 1.667 3.021
## pa_levelSedentary 2.425 0.4124 1.761 3.339
##
## Concordance= 0.579 (se = 0.012 )
## Likelihood ratio test= 41.82 on 3 df, p=4e-09
## Wald test = 38.66 on 3 df, p=2e-08
## Score (logrank) test = 40.21 on 3 df, p=1e-08
## Call:
## coxph(formula = Surv(time_to_event_waves, event_dementia) ~ pa_level +
## age + sex + current_smoker + hypertension + diabetes + stroke +
## abnormal_heart_rhythm + heart_failure + depression_binary,
## data = analysis_w1_no_iqcode)
##
## n= 11679, number of events= 476
## (126 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## pa_levelModerate 0.288026 1.333792 0.131591 2.189 0.0286 *
## pa_levelLow 0.204592 1.227024 0.161119 1.270 0.2041
## pa_levelSedentary 0.020806 1.021024 0.178992 0.116 0.9075
## age 0.061431 1.063357 0.004261 14.418 <2e-16 ***
## sexFemale 0.208323 1.231611 0.096090 2.168 0.0302 *
## current_smoker -0.117040 0.889550 0.140726 -0.832 0.4056
## hypertension 0.085720 1.089501 0.094001 0.912 0.3618
## diabetes 0.160271 1.173829 0.157535 1.017 0.3090
## stroke 0.218415 1.244104 0.178580 1.223 0.2213
## abnormal_heart_rhythm 0.329790 1.390676 0.155400 2.122 0.0338 *
## heart_failure -0.696122 0.498515 0.582639 -1.195 0.2322
## depression_binary 0.258923 1.295534 0.113208 2.287 0.0222 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## pa_levelModerate 1.3338 0.7497 1.0306 1.726
## pa_levelLow 1.2270 0.8150 0.8948 1.683
## pa_levelSedentary 1.0210 0.9794 0.7189 1.450
## age 1.0634 0.9404 1.0545 1.072
## sexFemale 1.2316 0.8119 1.0202 1.487
## current_smoker 0.8895 1.1242 0.6751 1.172
## hypertension 1.0895 0.9179 0.9062 1.310
## diabetes 1.1738 0.8519 0.8620 1.598
## stroke 1.2441 0.8038 0.8767 1.765
## abnormal_heart_rhythm 1.3907 0.7191 1.0255 1.886
## heart_failure 0.4985 2.0060 0.1591 1.562
## depression_binary 1.2955 0.7719 1.0377 1.617
##
## Concordance= 0.74 (se = 0.01 )
## Likelihood ratio test= 298.4 on 12 df, p=<2e-16
## Wald test = 306.5 on 12 df, p=<2e-16
## Score (logrank) test = 326.3 on 12 df, p=<2e-16
participants WITH IQCODE PATHOLOGICAL COGNITIVE DECLINE
followup_w1 %>%
mutate(
prevalent_dem_alz_w1 = if_else(baseline_alzheimers == 1 | baseline_dementia == 1, 1, 0)
) %>%
filter(
prevalent_dem_alz_w1 == 0,
baseline_parkinsons == 0,
!is.na(pa_level)
) %>%
count(baseline_pathological_cognitive_decline, useNA = "ifany")
Wave 2 Table 1
w2_derived <- read_dta("raw data/RAW_data_stata/wave_2_derived_variables.dta")
w2 <- left_join(w2_core, w2_derived, by = "idauniq")
w2 <- w2 %>%
mutate(
htn_w2 = if_else(
hedia01 == 1 | hedia02 == 1 | hedia03 == 1 | hedia04 == 1,
1, 0
),
hf_w2 = if_else(
hedia01 == 4 | hedia02 == 4 | hedia03 == 4 | hedia04 == 4,
1, 0
),
arrhythmia_w2 = if_else(
hedia01 == 6 | hedia02 == 6 | hedia03 == 6 | hedia04 == 6,
1, 0
),
diabetes_w2 = if_else(
hedia01 == 7 | hedia02 == 7 | hedia03 == 7 | hedia04 == 7,
1, 0
),
stroke_w2 = if_else(
hedia01 == 8 | hedia02 == 8 | hedia03 == 8 | hedia04 == 8,
1, 0
),
alz_w2 = if_else(
hedib01 == 8 | hedib02 == 8 | hedib03 == 8 | hedib04 == 8,
1, 0
),
dementia_w2 = if_else(
hedib01 == 9 | hedib02 == 9 | hedib03 == 9 | hedib04 == 9,
1, 0
),
pa_level = case_when(
palevel == 3 ~ "High",
palevel == 2 ~ "Moderate",
palevel == 1 ~ "Low",
palevel == 0 ~ "Sedentary",
TRUE ~ NA_character_
)
)
w2_table1 <- w2 %>%
transmute(
idauniq,
pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
age = dhager,
sex = as_factor(DhSex),
hypertension = htn_w2,
heart_failure = hf_w2,
abnormal_heart_rhythm = arrhythmia_w2,
diabetes = diabetes_w2,
stroke = stroke_w2,
baseline_alzheimers = alz_w2,
baseline_dementia = dementia_w2
)
w2_table1_summary <- w2_table1 %>%
mutate(
pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
) %>%
group_by(pa_level, .drop = FALSE) %>%
summarise(
N = n(),
`Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
`Female, n (%)` = sprintf(
"%d (%.1f%%)",
sum(sex == "Female", na.rm = TRUE),
100 * sum(sex == "Female", na.rm = TRUE) / n()
),
`Hypertension, n (%)` = sprintf(
"%d (%.1f%%)",
sum(hypertension == 1, na.rm = TRUE),
100 * sum(hypertension == 1, na.rm = TRUE) / n()
),
`Diabetes, n (%)` = sprintf(
"%d (%.1f%%)",
sum(diabetes == 1, na.rm = TRUE),
100 * sum(diabetes == 1, na.rm = TRUE) / n()
),
`Stroke, n (%)` = sprintf(
"%d (%.1f%%)",
sum(stroke == 1, na.rm = TRUE),
100 * sum(stroke == 1, na.rm = TRUE) / n()
),
`Abnormal heart rhythm, n (%)` = sprintf(
"%d (%.1f%%)",
sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()
),
`Heart failure, n (%)` = sprintf(
"%d (%.1f%%)",
sum(heart_failure == 1, na.rm = TRUE),
100 * sum(heart_failure == 1, na.rm = TRUE) / n()
),
`Baseline Alzheimer’s, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_alzheimers == 1, na.rm = TRUE),
100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()
),
`Baseline dementia, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_dementia == 1, na.rm = TRUE),
100 * sum(baseline_dementia == 1, na.rm = TRUE) / n()
)
)
w2_table1_summary_wide <- w2_table1_summary %>%
mutate(N = as.character(N)) %>%
pivot_longer(
cols = -pa_level,
names_to = "Characteristic",
values_to = "Value"
) %>%
pivot_wider(
names_from = pa_level,
values_from = Value
) %>%
select(
Characteristic,
High,
Moderate,
Low,
Sedentary,
`Missing PA`
)
knitr::kable(
w2_table1_summary_wide,
caption = "Table 1. Baseline characteristics at wave 2 by physical activity category."
)| Characteristic | High | Moderate | Low | Sedentary | Missing PA |
|---|---|---|---|---|---|
| N | 1744 | 4684 | 2309 | 556 | 139 |
| Age, mean (SD) | 61.8 (8.7) | 64.5 (9.8) | 68.8 (11.2) | 75.2 (11.6) | 70.6 (15.8) |
| Female, n (%) | 835 (47.9%) | 2615 (55.8%) | 1465 (63.4%) | 325 (58.5%) | 66 (47.5%) |
| Hypertension, n (%) | 253 (14.5%) | 784 (16.7%) | 487 (21.1%) | 156 (28.1%) | 26 (18.7%) |
| Diabetes, n (%) | 28 (1.6%) | 151 (3.2%) | 139 (6.0%) | 55 (9.9%) | 10 (7.2%) |
| Stroke, n (%) | 3 (0.2%) | 49 (1.0%) | 59 (2.6%) | 39 (7.0%) | 13 (9.4%) |
| Abnormal heart rhythm, n (%) | 48 (2.8%) | 138 (2.9%) | 79 (3.4%) | 44 (7.9%) | 5 (3.6%) |
| Heart failure, n (%) | 0 (0.0%) | 8 (0.2%) | 8 (0.3%) | 4 (0.7%) | 0 (0.0%) |
| Baseline Alzheimer’s, n (%) | 1 (0.1%) | 2 (0.0%) | 2 (0.1%) | 1 (0.2%) | 5 (3.6%) |
| Baseline dementia, n (%) | 2 (0.1%) | 11 (0.2%) | 9 (0.4%) | 7 (1.3%) | 14 (10.1%) |
w3_table1_raw <- w3_core %>%
mutate(
hypertension = if_else(hediabp == 1, 1, 0, missing = 0),
heart_failure = if_else(hediahf == 1, 1, 0, missing = 0),
abnormal_heart_rhythm = if_else(hediaar == 1, 1, 0, missing = 0),
diabetes = if_else(hediadi == 1, 1, 0, missing = 0),
stroke = if_else(hediast == 1, 1, 0, missing = 0),
baseline_alzheimers = if_else(hedibad %in% c(1, 2, 3), 1, 0, missing = 0),
baseline_dementia = if_else(hedibde %in% c(1, 2, 3), 1, 0, missing = 0),
pa_level = case_when(
palevel == 3 ~ "High",
palevel == 2 ~ "Moderate",
palevel == 1 ~ "Low",
palevel == 0 ~ "Sedentary",
TRUE ~ NA_character_
)
)
w3_table1 <- w3_table1_raw %>%
transmute(
idauniq,
pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
age = dhager,
sex = as_factor(dhsex),
hypertension,
heart_failure,
abnormal_heart_rhythm,
diabetes,
stroke,
baseline_alzheimers,
baseline_dementia
)
w3_table1_summary <- w3_table1 %>%
mutate(
pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
) %>%
group_by(pa_level, .drop = FALSE) %>%
summarise(
N = n(),
`Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
`Female, n (%)` = sprintf(
"%d (%.1f%%)",
sum(sex == "Female", na.rm = TRUE),
100 * sum(sex == "Female", na.rm = TRUE) / n()
),
`Hypertension, n (%)` = sprintf(
"%d (%.1f%%)",
sum(hypertension == 1, na.rm = TRUE),
100 * sum(hypertension == 1, na.rm = TRUE) / n()
),
`Diabetes, n (%)` = sprintf(
"%d (%.1f%%)",
sum(diabetes == 1, na.rm = TRUE),
100 * sum(diabetes == 1, na.rm = TRUE) / n()
),
`Stroke, n (%)` = sprintf(
"%d (%.1f%%)",
sum(stroke == 1, na.rm = TRUE),
100 * sum(stroke == 1, na.rm = TRUE) / n()
),
`Abnormal heart rhythm, n (%)` = sprintf(
"%d (%.1f%%)",
sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()
),
`Heart failure, n (%)` = sprintf(
"%d (%.1f%%)",
sum(heart_failure == 1, na.rm = TRUE),
100 * sum(heart_failure == 1, na.rm = TRUE) / n()
),
`Baseline Alzheimer’s, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_alzheimers == 1, na.rm = TRUE),
100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()
),
`Baseline dementia, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_dementia == 1, na.rm = TRUE),
100 * sum(baseline_dementia == 1, na.rm = TRUE) / n()
)
)
w3_table1_summary_wide <- w3_table1_summary %>%
mutate(N = as.character(N)) %>%
pivot_longer(
cols = -pa_level,
names_to = "Characteristic",
values_to = "Value"
) %>%
pivot_wider(
names_from = pa_level,
values_from = Value
) %>%
select(
Characteristic,
High,
Moderate,
Low,
Sedentary,
`Missing PA`
)
knitr::kable(
w3_table1_summary_wide,
caption = "Table 1. Baseline characteristics at wave 3 by physical activity category."
)| Characteristic | High | Moderate | Low | Sedentary | Missing PA |
|---|---|---|---|---|---|
| N | 1969 | 4838 | 2263 | 686 | 15 |
| Age, mean (SD) | 59.8 (8.9) | 63.3 (10.5) | 68.1 (12.0) | 74.7 (14.2) | 61.6 (12.2) |
| Female, n (%) | 939 (47.7%) | 2669 (55.2%) | 1458 (64.4%) | 399 (58.2%) | 11 (73.3%) |
| Hypertension, n (%) | 174 (8.8%) | 473 (9.8%) | 221 (9.8%) | 59 (8.6%) | 0 (0.0%) |
| Diabetes, n (%) | 36 (1.8%) | 115 (2.4%) | 75 (3.3%) | 30 (4.4%) | 0 (0.0%) |
| Stroke, n (%) | 15 (0.8%) | 31 (0.6%) | 30 (1.3%) | 31 (4.5%) | 0 (0.0%) |
| Abnormal heart rhythm, n (%) | 44 (2.2%) | 90 (1.9%) | 62 (2.7%) | 21 (3.1%) | 0 (0.0%) |
| Heart failure, n (%) | 1 (0.1%) | 1 (0.0%) | 8 (0.4%) | 4 (0.6%) | 0 (0.0%) |
| Baseline Alzheimer’s, n (%) | 1 (0.1%) | 5 (0.1%) | 9 (0.4%) | 17 (2.5%) | 0 (0.0%) |
| Baseline dementia, n (%) | 3 (0.2%) | 20 (0.4%) | 33 (1.5%) | 56 (8.2%) | 0 (0.0%) |
wave 4 table 1
w4_table1_raw <- w4_core %>%
mutate(
hypertension = if_else(hediabp == 1, 1, 0, missing = 0),
heart_failure = if_else(hediahf == 1, 1, 0, missing = 0),
abnormal_heart_rhythm = if_else(hediaar == 1, 1, 0, missing = 0),
diabetes = if_else(hediadi == 1, 1, 0, missing = 0),
stroke = if_else(hediast == 1, 1, 0, missing = 0),
baseline_alzheimers = if_else(hedibad == 1, 1, 0, missing = 0),
baseline_dementia = if_else(hedibde == 1, 1, 0, missing = 0),
pa_level = case_when(
palevel == 3 ~ "High",
palevel == 2 ~ "Moderate",
palevel == 1 ~ "Low",
palevel == 0 ~ "Sedentary",
TRUE ~ NA_character_
)
)
w4_table1 <- w4_table1_raw %>%
transmute(
idauniq,
pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
age = indager,
sex = as_factor(dhsex),
hypertension,
heart_failure,
abnormal_heart_rhythm,
diabetes,
stroke,
baseline_alzheimers,
baseline_dementia
)
w4_table1_summary <- w4_table1 %>%
mutate(
pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
) %>%
group_by(pa_level, .drop = FALSE) %>%
summarise(
N = n(),
`Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
`Female, n (%)` = sprintf(
"%d (%.1f%%)",
sum(sex == "Female", na.rm = TRUE),
100 * sum(sex == "Female", na.rm = TRUE) / n()
),
`Hypertension, n (%)` = sprintf(
"%d (%.1f%%)",
sum(hypertension == 1, na.rm = TRUE),
100 * sum(hypertension == 1, na.rm = TRUE) / n()
),
`Diabetes, n (%)` = sprintf(
"%d (%.1f%%)",
sum(diabetes == 1, na.rm = TRUE),
100 * sum(diabetes == 1, na.rm = TRUE) / n()
),
`Stroke, n (%)` = sprintf(
"%d (%.1f%%)",
sum(stroke == 1, na.rm = TRUE),
100 * sum(stroke == 1, na.rm = TRUE) / n()
),
`Abnormal heart rhythm, n (%)` = sprintf(
"%d (%.1f%%)",
sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()
),
`Heart failure, n (%)` = sprintf(
"%d (%.1f%%)",
sum(heart_failure == 1, na.rm = TRUE),
100 * sum(heart_failure == 1, na.rm = TRUE) / n()
),
`Baseline Alzheimer’s, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_alzheimers == 1, na.rm = TRUE),
100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()
),
`Baseline dementia, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_dementia == 1, na.rm = TRUE),
100 * sum(baseline_dementia == 1, na.rm = TRUE) / n()
)
)
w4_table1_summary_wide <- w4_table1_summary %>%
mutate(N = as.character(N)) %>%
pivot_longer(
cols = -pa_level,
names_to = "Characteristic",
values_to = "Value"
) %>%
pivot_wider(
names_from = pa_level,
values_from = Value
) %>%
select(
Characteristic,
High,
Moderate,
Low,
Sedentary,
`Missing PA`
)
knitr::kable(
w4_table1_summary_wide,
caption = "Table 1. Baseline characteristics at wave 4 by physical activity category."
)| Characteristic | High | Moderate | Low | Sedentary | Missing PA |
|---|---|---|---|---|---|
| N | 2254 | 5384 | 2562 | 835 | 15 |
| Age, mean (SD) | 61.7 (8.4) | 63.9 (9.5) | 68.2 (11.3) | 74.2 (12.5) | 59.1 (9.2) |
| Female, n (%) | 1070 (47.5%) | 2929 (54.4%) | 1649 (64.4%) | 472 (56.5%) | 5 (33.3%) |
| Hypertension, n (%) | 201 (8.9%) | 603 (11.2%) | 350 (13.7%) | 135 (16.2%) | 0 (0.0%) |
| Diabetes, n (%) | 43 (1.9%) | 149 (2.8%) | 125 (4.9%) | 61 (7.3%) | 1 (6.7%) |
| Stroke, n (%) | 7 (0.3%) | 55 (1.0%) | 46 (1.8%) | 68 (8.1%) | 0 (0.0%) |
| Abnormal heart rhythm, n (%) | 32 (1.4%) | 111 (2.1%) | 88 (3.4%) | 45 (5.4%) | 0 (0.0%) |
| Heart failure, n (%) | 1 (0.0%) | 3 (0.1%) | 6 (0.2%) | 15 (1.8%) | 0 (0.0%) |
| Baseline Alzheimer’s, n (%) | 0 (0.0%) | 5 (0.1%) | 3 (0.1%) | 29 (3.5%) | 0 (0.0%) |
| Baseline dementia, n (%) | 1 (0.0%) | 17 (0.3%) | 19 (0.7%) | 60 (7.2%) | 0 (0.0%) |
wave 5 table 1
## Wave 5 table 1
w5_table1_raw <- w5_core %>%
mutate(
hypertension = if_else(hediabp == 1, 1, 0, missing = 0),
heart_failure = if_else(hediahf == 1, 1, 0, missing = 0),
abnormal_heart_rhythm = if_else(hediaar == 1, 1, 0, missing = 0),
diabetes = if_else(hediadi == 1, 1, 0, missing = 0),
stroke = if_else(hediast == 1, 1, 0, missing = 0),
baseline_alzheimers = if_else(hedibad == 1, 1, 0, missing = 0),
baseline_dementia = if_else(hedibde == 1, 1, 0, missing = 0),
pa_level = case_when(
palevel == 3 ~ "High",
palevel == 2 ~ "Moderate",
palevel == 1 ~ "Low",
palevel == 0 ~ "Sedentary",
TRUE ~ NA_character_
)
)
# ORIGINAL VERSION OF W5 TABLE 1 PERFORMED FIRST - SEE BELOW FOR AMENDED FINAL VERSION
# Original version produced implausible age summaries because wave 5 #..indager contains negative
# ..special missing codes, which distorted the mean and SD if not #..recoded.
# Kept here as part of the analytic process to include in methods
w5_table1 <- w5_table1_raw %>%
transmute(
idauniq,
pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
age = indager,
sex = as_factor(dhsex),
hypertension,
heart_failure,
abnormal_heart_rhythm,
diabetes,
stroke,
baseline_alzheimers,
baseline_dementia
)
# Wave 5 table 1 CORRECTED VERSION
# Wave 5 indager includes negative values representing special missing codes.
# These were recoded to NA before summary statistics were generated for wave 5 table 1 amended final version
w5_table1 <- w5_table1_raw %>%
transmute(
idauniq,
pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
age = if_else(indager < 0, NA_real_, as.numeric(indager)),
sex = as_factor(dhsex),
hypertension,
heart_failure,
abnormal_heart_rhythm,
diabetes,
stroke,
baseline_alzheimers,
baseline_dementia
)
w5_table1_summary <- w5_table1 %>%
mutate(
pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
) %>%
group_by(pa_level, .drop = FALSE) %>%
summarise(
N = n(),
`Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
`Female, n (%)` = sprintf(
"%d (%.1f%%)",
sum(sex == "Female", na.rm = TRUE),
100 * sum(sex == "Female", na.rm = TRUE) / n()
),
`Hypertension, n (%)` = sprintf(
"%d (%.1f%%)",
sum(hypertension == 1, na.rm = TRUE),
100 * sum(hypertension == 1, na.rm = TRUE) / n()
),
`Diabetes, n (%)` = sprintf(
"%d (%.1f%%)",
sum(diabetes == 1, na.rm = TRUE),
100 * sum(diabetes == 1, na.rm = TRUE) / n()
),
`Stroke, n (%)` = sprintf(
"%d (%.1f%%)",
sum(stroke == 1, na.rm = TRUE),
100 * sum(stroke == 1, na.rm = TRUE) / n()
),
`Abnormal heart rhythm, n (%)` = sprintf(
"%d (%.1f%%)",
sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()
),
`Heart failure, n (%)` = sprintf(
"%d (%.1f%%)",
sum(heart_failure == 1, na.rm = TRUE),
100 * sum(heart_failure == 1, na.rm = TRUE) / n()
),
`Baseline Alzheimer’s, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_alzheimers == 1, na.rm = TRUE),
100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()
),
`Baseline dementia, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_dementia == 1, na.rm = TRUE),
100 * sum(baseline_dementia == 1, na.rm = TRUE) / n()
)
)
w5_table1_summary_wide <- w5_table1_summary %>%
mutate(N = as.character(N)) %>%
pivot_longer(
cols = -pa_level,
names_to = "Characteristic",
values_to = "Value"
) %>%
pivot_wider(
names_from = pa_level,
values_from = Value
) %>%
select(
Characteristic,
High,
Moderate,
Low,
Sedentary,
`Missing PA`
)
knitr::kable(
w5_table1_summary_wide,
caption = "Table 1. Baseline characteristics at wave 5 by physical activity category."
)| Characteristic | High | Moderate | Low | Sedentary | Missing PA |
|---|---|---|---|---|---|
| N | 1981 | 4936 | 2432 | 801 | 124 |
| Age, mean (SD) | 62.9 (7.8) | 65.5 (8.9) | 68.9 (10.0) | 73.0 (10.2) | 60.7 (5.5) |
| Female, n (%) | 934 (47.1%) | 2736 (55.4%) | 1507 (62.0%) | 472 (58.9%) | 56 (45.2%) |
| Hypertension, n (%) | 141 (7.1%) | 335 (6.8%) | 202 (8.3%) | 90 (11.2%) | 8 (6.5%) |
| Diabetes, n (%) | 33 (1.7%) | 112 (2.3%) | 93 (3.8%) | 49 (6.1%) | 1 (0.8%) |
| Stroke, n (%) | 10 (0.5%) | 32 (0.6%) | 43 (1.8%) | 64 (8.0%) | 0 (0.0%) |
| Abnormal heart rhythm, n (%) | 25 (1.3%) | 99 (2.0%) | 78 (3.2%) | 52 (6.5%) | 2 (1.6%) |
| Heart failure, n (%) | 1 (0.1%) | 4 (0.1%) | 8 (0.3%) | 11 (1.4%) | 0 (0.0%) |
| Baseline Alzheimer’s, n (%) | 0 (0.0%) | 7 (0.1%) | 6 (0.2%) | 33 (4.1%) | 0 (0.0%) |
| Baseline dementia, n (%) | 0 (0.0%) | 11 (0.2%) | 26 (1.1%) | 73 (9.1%) | 0 (0.0%) |
wave 6 table 1
w6_table1_raw <- w6_core %>%
mutate(
hypertension = if_else(hediabp == 1, 1, 0, missing = 0),
heart_failure = if_else(hediahf == 1, 1, 0, missing = 0),
abnormal_heart_rhythm = if_else(hediaar == 1, 1, 0, missing = 0),
diabetes = if_else(hediadi == 1, 1, 0, missing = 0),
stroke = if_else(hediast == 1, 1, 0, missing = 0),
baseline_alzheimers = if_else(hedibad == 1, 1, 0, missing = 0),
baseline_dementia = if_else(hedibde == 1, 1, 0, missing = 0),
pa_level = case_when(
HeActa %in% c(1, 2) ~ "High",
!HeActa %in% c(1, 2) & HeActb %in% c(1, 2) ~ "Moderate",
!HeActa %in% c(1, 2) & !HeActb %in% c(1, 2) & HeActc %in% c(1, 2) ~ "Low",
HeActa %in% c(3, 4) & HeActb %in% c(3, 4) & HeActc %in% c(3, 4) ~ "Sedentary",
TRUE ~ NA_character_
)
)
# ORIGINAL VERSION OF W6 AGE HANDLING (kept as methodological note)
# age = indager
# This is not used in final summaries because negative indager values are special missing codes.
# wave 6 table 1 CORRECTED VERSION
w6_table1 <- w6_table1_raw %>%
transmute(
idauniq,
pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
age = if_else(indager < 0, NA_real_, as.numeric(indager)),
sex = as_factor(DhSex),
hypertension,
heart_failure,
abnormal_heart_rhythm,
diabetes,
stroke,
baseline_alzheimers,
baseline_dementia
)
# diagnostic age check
w6_table1 %>%
group_by(pa_level) %>%
summarise(
min_age = min(age, na.rm = TRUE),
q1_age = quantile(age, 0.25, na.rm = TRUE),
median_age = median(age, na.rm = TRUE),
mean_age = mean(age, na.rm = TRUE),
q3_age = quantile(age, 0.75, na.rm = TRUE),
max_age = max(age, na.rm = TRUE),
sd_age = sd(age, na.rm = TRUE),
n_missing_age = sum(is.na(age))
)## # A tibble: 5 × 9
## pa_level min_age q1_age median_age mean_age q3_age max_age sd_age
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 High 28 57 63 63.3 69 89 8.44
## 2 Moderate 31 59 65 65.8 73 89 9.35
## 3 Low 41 61 69 69.4 78 89 10.5
## 4 Sedentary 40 62 72 71.1 80 89 10.8
## 5 <NA> 63 66.8 71 73 77.2 87 10.4
## # ℹ 1 more variable: n_missing_age <int>
w6_table1_summary <- w6_table1 %>%
mutate(
pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
) %>%
group_by(pa_level, .drop = FALSE) %>%
summarise(
N = n(),
`Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
`Female, n (%)` = sprintf(
"%d (%.1f%%)",
sum(sex == "Female", na.rm = TRUE),
100 * sum(sex == "Female", na.rm = TRUE) / n()
),
`Hypertension, n (%)` = sprintf(
"%d (%.1f%%)",
sum(hypertension == 1, na.rm = TRUE),
100 * sum(hypertension == 1, na.rm = TRUE) / n()
),
`Diabetes, n (%)` = sprintf(
"%d (%.1f%%)",
sum(diabetes == 1, na.rm = TRUE),
100 * sum(diabetes == 1, na.rm = TRUE) / n()
),
`Stroke, n (%)` = sprintf(
"%d (%.1f%%)",
sum(stroke == 1, na.rm = TRUE),
100 * sum(stroke == 1, na.rm = TRUE) / n()
),
`Abnormal heart rhythm, n (%)` = sprintf(
"%d (%.1f%%)",
sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()
),
`Heart failure, n (%)` = sprintf(
"%d (%.1f%%)",
sum(heart_failure == 1, na.rm = TRUE),
100 * sum(heart_failure == 1, na.rm = TRUE) / n()
),
`Baseline Alzheimer’s, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_alzheimers == 1, na.rm = TRUE),
100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()
),
`Baseline dementia, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_dementia == 1, na.rm = TRUE),
100 * sum(baseline_dementia == 1, na.rm = TRUE) / n()
)
)
w6_table1_summary_wide <- w6_table1_summary %>%
mutate(N = as.character(N)) %>%
pivot_longer(
cols = -pa_level,
names_to = "Characteristic",
values_to = "Value"
) %>%
pivot_wider(
names_from = pa_level,
values_from = Value
) %>%
select(
Characteristic,
High,
Moderate,
Low,
Sedentary,
`Missing PA`
)
knitr::kable(
w6_table1_summary_wide,
caption = "Table 1. Baseline characteristics at wave 6 by physical activity category."
)| Characteristic | High | Moderate | Low | Sedentary | Missing PA |
|---|---|---|---|---|---|
| N | 3156 | 4810 | 1626 | 1005 | 4 |
| Age, mean (SD) | 63.3 (8.4) | 65.8 (9.4) | 69.4 (10.5) | 71.1 (10.8) | 73.0 (10.4) |
| Female, n (%) | 1526 (48.4%) | 2713 (56.4%) | 1087 (66.9%) | 527 (52.4%) | 4 (100.0%) |
| Hypertension, n (%) | 217 (6.9%) | 350 (7.3%) | 117 (7.2%) | 138 (13.7%) | 0 (0.0%) |
| Diabetes, n (%) | 57 (1.8%) | 130 (2.7%) | 50 (3.1%) | 79 (7.9%) | 0 (0.0%) |
| Stroke, n (%) | 10 (0.3%) | 47 (1.0%) | 44 (2.7%) | 54 (5.4%) | 0 (0.0%) |
| Abnormal heart rhythm, n (%) | 69 (2.2%) | 107 (2.2%) | 72 (4.4%) | 38 (3.8%) | 0 (0.0%) |
| Heart failure, n (%) | 2 (0.1%) | 6 (0.1%) | 4 (0.2%) | 13 (1.3%) | 0 (0.0%) |
| Baseline Alzheimer’s, n (%) | 2 (0.1%) | 7 (0.1%) | 4 (0.2%) | 37 (3.7%) | 0 (0.0%) |
| Baseline dementia, n (%) | 2 (0.1%) | 15 (0.3%) | 18 (1.1%) | 77 (7.7%) | 0 (0.0%) |
wave 7 table 1
## Wave 7 table 1
w7_table1_raw <- w7_core %>%
mutate(
hypertension = if_else(hediabp == 1, 1, 0, missing = 0),
heart_failure = if_else(hediahf == 1, 1, 0, missing = 0),
abnormal_heart_rhythm = if_else(hediaar == 1, 1, 0, missing = 0),
diabetes = if_else(hediadi == 1, 1, 0, missing = 0),
stroke = if_else(hediast == 1, 1, 0, missing = 0),
baseline_alzheimers = if_else(hedibad == 1, 1, 0, missing = 0),
baseline_dementia = if_else(hedibde == 1, 1, 0, missing = 0),
pa_level = case_when(
HeActa %in% c(1, 2) ~ "High",
!HeActa %in% c(1, 2) & HeActb %in% c(1, 2) ~ "Moderate",
!HeActa %in% c(1, 2) & !HeActb %in% c(1, 2) & HeActc %in% c(1, 2) ~ "Low",
HeActa %in% c(3, 4) & HeActb %in% c(3, 4) & HeActc %in% c(3, 4) ~ "Sedentary",
TRUE ~ NA_character_
)
)
# ORIGINAL VERSION OF W7 AGE HANDLING (kept as methodological note)
# age = indager
# This is not used in final summaries because negative indager values are special missing codes.
# Wave 7 corrected version
w7_table1 <- w7_table1_raw %>%
transmute(
idauniq,
pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
age = if_else(indager < 0, NA_real_, as.numeric(indager)),
sex = as_factor(DhSex),
hypertension,
heart_failure,
abnormal_heart_rhythm,
diabetes,
stroke,
baseline_alzheimers,
baseline_dementia
)
# diagnostic age check
w7_table1 %>%
group_by(pa_level) %>%
summarise(
min_age = min(age, na.rm = TRUE),
q1_age = quantile(age, 0.25, na.rm = TRUE),
median_age = median(age, na.rm = TRUE),
mean_age = mean(age, na.rm = TRUE),
q3_age = quantile(age, 0.75, na.rm = TRUE),
max_age = max(age, na.rm = TRUE),
sd_age = sd(age, na.rm = TRUE),
n_missing_age = sum(is.na(age))
)## # A tibble: 5 × 9
## pa_level min_age q1_age median_age mean_age q3_age max_age sd_age
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 High 29 58 64 63.9 69 89 8.68
## 2 Moderate 33 60 66 66.7 73 89 9.21
## 3 Low 39 62 70 70.1 78 89 10.3
## 4 Sedentary 38 64 74 72.3 81 89 10.6
## 5 <NA> 63 68.5 74 73 78 82 9.54
## # ℹ 1 more variable: n_missing_age <int>
w7_table1_summary <- w7_table1 %>%
mutate(
pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
) %>%
group_by(pa_level, .drop = FALSE) %>%
summarise(
N = n(),
`Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
`Female, n (%)` = sprintf(
"%d (%.1f%%)",
sum(sex == "Female", na.rm = TRUE),
100 * sum(sex == "Female", na.rm = TRUE) / n()
),
`Hypertension, n (%)` = sprintf(
"%d (%.1f%%)",
sum(hypertension == 1, na.rm = TRUE),
100 * sum(hypertension == 1, na.rm = TRUE) / n()
),
`Diabetes, n (%)` = sprintf(
"%d (%.1f%%)",
sum(diabetes == 1, na.rm = TRUE),
100 * sum(diabetes == 1, na.rm = TRUE) / n()
),
`Stroke, n (%)` = sprintf(
"%d (%.1f%%)",
sum(stroke == 1, na.rm = TRUE),
100 * sum(stroke == 1, na.rm = TRUE) / n()
),
`Abnormal heart rhythm, n (%)` = sprintf(
"%d (%.1f%%)",
sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()
),
`Heart failure, n (%)` = sprintf(
"%d (%.1f%%)",
sum(heart_failure == 1, na.rm = TRUE),
100 * sum(heart_failure == 1, na.rm = TRUE) / n()
),
`Baseline Alzheimer’s, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_alzheimers == 1, na.rm = TRUE),
100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()
),
`Baseline dementia, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_dementia == 1, na.rm = TRUE),
100 * sum(baseline_dementia == 1, na.rm = TRUE) / n()
)
)
w7_table1_summary_wide <- w7_table1_summary %>%
mutate(N = as.character(N)) %>%
pivot_longer(
cols = -pa_level,
names_to = "Characteristic",
values_to = "Value"
) %>%
pivot_wider(
names_from = pa_level,
values_from = Value
) %>%
select(
Characteristic,
High,
Moderate,
Low,
Sedentary,
`Missing PA`
)
knitr::kable(
w7_table1_summary_wide,
caption = "Table 1. Baseline characteristics at wave 7 by physical activity category."
)| Characteristic | High | Moderate | Low | Sedentary | Missing PA |
|---|---|---|---|---|---|
| N | 2848 | 4426 | 1483 | 906 | 3 |
| Age, mean (SD) | 63.9 (8.7) | 66.7 (9.2) | 70.1 (10.3) | 72.3 (10.6) | 73.0 (9.5) |
| Female, n (%) | 1400 (49.2%) | 2529 (57.1%) | 963 (64.9%) | 474 (52.3%) | 2 (66.7%) |
| Hypertension, n (%) | 141 (5.0%) | 291 (6.6%) | 111 (7.5%) | 111 (12.3%) | 0 (0.0%) |
| Diabetes, n (%) | 45 (1.6%) | 110 (2.5%) | 60 (4.0%) | 64 (7.1%) | 0 (0.0%) |
| Stroke, n (%) | 13 (0.5%) | 39 (0.9%) | 35 (2.4%) | 49 (5.4%) | 1 (33.3%) |
| Abnormal heart rhythm, n (%) | 55 (1.9%) | 116 (2.6%) | 54 (3.6%) | 49 (5.4%) | 0 (0.0%) |
| Heart failure, n (%) | 4 (0.1%) | 6 (0.1%) | 11 (0.7%) | 12 (1.3%) | 0 (0.0%) |
| Baseline Alzheimer’s, n (%) | 0 (0.0%) | 7 (0.2%) | 7 (0.5%) | 29 (3.2%) | 0 (0.0%) |
| Baseline dementia, n (%) | 3 (0.1%) | 22 (0.5%) | 13 (0.9%) | 76 (8.4%) | 0 (0.0%) |
wave 8 table 1
w8_table1_raw <- w8_core %>%
mutate(
hypertension = if_else(hediabp == 1, 1, 0, missing = 0),
heart_failure = if_else(hediahf == 1, 1, 0, missing = 0),
abnormal_heart_rhythm = if_else(hediaar == 1, 1, 0, missing = 0),
diabetes = if_else(hediadi == 1, 1, 0, missing = 0),
stroke = if_else(hediast == 1, 1, 0, missing = 0),
baseline_alzheimers = if_else(hedibad == 1, 1, 0, missing = 0),
baseline_dementia = if_else(hedibde == 1, 1, 0, missing = 0),
pa_level = case_when(
heacta %in% c(1, 2) ~ "High",
!heacta %in% c(1, 2) & heactb %in% c(1, 2) ~ "Moderate",
!heacta %in% c(1, 2) & !heactb %in% c(1, 2) & heactc %in% c(1, 2) ~ "Low",
heacta %in% c(3, 4) & heactb %in% c(3, 4) & heactc %in% c(3, 4) ~ "Sedentary",
TRUE ~ NA_character_
)
)
# ORIGINAL VERSION OF W8 AGE HANDLING (kept as methodological note)
# age = indager
# This is not used in final summaries because negative indager values are special missing codes.
# Wave 8 corrected version
w8_table1 <- w8_table1_raw %>%
transmute(
idauniq,
pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
age = if_else(indager < 0, NA_real_, as.numeric(indager)),
sex = as_factor(indsex),
hypertension,
heart_failure,
abnormal_heart_rhythm,
diabetes,
stroke,
baseline_alzheimers,
baseline_dementia
)
# diagnostic age check
w8_table1 %>%
group_by(pa_level) %>%
summarise(
min_age = min(age, na.rm = TRUE),
q1_age = quantile(age, 0.25, na.rm = TRUE),
median_age = median(age, na.rm = TRUE),
mean_age = mean(age, na.rm = TRUE),
q3_age = quantile(age, 0.75, na.rm = TRUE),
max_age = max(age, na.rm = TRUE),
sd_age = sd(age, na.rm = TRUE),
n_missing_age = sum(is.na(age))
)## # A tibble: 5 × 9
## pa_level min_age q1_age median_age mean_age q3_age max_age sd_age
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 High 31 60 65 65.5 71 89 8.14
## 2 Moderate 34 62 68 68.4 75 89 8.84
## 3 Low 40 64 71 71.3 80 89 9.74
## 4 Sedentary 40 66 74 73.4 81 89 9.61
## 5 <NA> 66 67.5 69 70.3 72.5 76 5.13
## # ℹ 1 more variable: n_missing_age <int>
w8_table1_summary <- w8_table1 %>%
mutate(
pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
) %>%
group_by(pa_level, .drop = FALSE) %>%
summarise(
N = n(),
`Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
`Female, n (%)` = sprintf(
"%d (%.1f%%)",
sum(sex == "Female", na.rm = TRUE),
100 * sum(sex == "Female", na.rm = TRUE) / n()
),
`Hypertension, n (%)` = sprintf(
"%d (%.1f%%)",
sum(hypertension == 1, na.rm = TRUE),
100 * sum(hypertension == 1, na.rm = TRUE) / n()
),
`Diabetes, n (%)` = sprintf(
"%d (%.1f%%)",
sum(diabetes == 1, na.rm = TRUE),
100 * sum(diabetes == 1, na.rm = TRUE) / n()
),
`Stroke, n (%)` = sprintf(
"%d (%.1f%%)",
sum(stroke == 1, na.rm = TRUE),
100 * sum(stroke == 1, na.rm = TRUE) / n()
),
`Abnormal heart rhythm, n (%)` = sprintf(
"%d (%.1f%%)",
sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()
),
`Heart failure, n (%)` = sprintf(
"%d (%.1f%%)",
sum(heart_failure == 1, na.rm = TRUE),
100 * sum(heart_failure == 1, na.rm = TRUE) / n()
),
`Baseline Alzheimer’s, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_alzheimers == 1, na.rm = TRUE),
100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()
),
`Baseline dementia, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_dementia == 1, na.rm = TRUE),
100 * sum(baseline_dementia == 1, na.rm = TRUE) / n()
)
)
w8_table1_summary_wide <- w8_table1_summary %>%
mutate(N = as.character(N)) %>%
pivot_longer(
cols = -pa_level,
names_to = "Characteristic",
values_to = "Value"
) %>%
pivot_wider(
names_from = pa_level,
values_from = Value
) %>%
select(
Characteristic,
High,
Moderate,
Low,
Sedentary,
`Missing PA`
)
knitr::kable(
w8_table1_summary_wide,
caption = "Table 1. Baseline characteristics at wave 8 by physical activity category."
)| Characteristic | High | Moderate | Low | Sedentary | Missing PA |
|---|---|---|---|---|---|
| N | 2443 | 3932 | 1281 | 786 | 3 |
| Age, mean (SD) | 65.5 (8.1) | 68.4 (8.8) | 71.3 (9.7) | 73.4 (9.6) | 70.3 (5.1) |
| Female, n (%) | 1204 (49.3%) | 2253 (57.3%) | 848 (66.2%) | 390 (49.6%) | 0 (0.0%) |
| Hypertension, n (%) | 111 (4.5%) | 204 (5.2%) | 92 (7.2%) | 78 (9.9%) | 0 (0.0%) |
| Diabetes, n (%) | 35 (1.4%) | 73 (1.9%) | 43 (3.4%) | 48 (6.1%) | 0 (0.0%) |
| Stroke, n (%) | 14 (0.6%) | 42 (1.1%) | 30 (2.3%) | 62 (7.9%) | 0 (0.0%) |
| Abnormal heart rhythm, n (%) | 49 (2.0%) | 99 (2.5%) | 49 (3.8%) | 50 (6.4%) | 0 (0.0%) |
| Heart failure, n (%) | 4 (0.2%) | 14 (0.4%) | 7 (0.5%) | 18 (2.3%) | 0 (0.0%) |
| Baseline Alzheimer’s, n (%) | 2 (0.1%) | 12 (0.3%) | 12 (0.9%) | 29 (3.7%) | 0 (0.0%) |
| Baseline dementia, n (%) | 7 (0.3%) | 18 (0.5%) | 25 (2.0%) | 76 (9.7%) | 0 (0.0%) |
wave 9 table 1
## Wave 9 table 1
w9_table1_raw <- w9_core %>%
mutate(
hypertension = if_else(hediabp == 1, 1, 0, missing = 0),
heart_failure = if_else(hediahf == 1, 1, 0, missing = 0),
abnormal_heart_rhythm = if_else(hediaar == 1, 1, 0, missing = 0),
diabetes = if_else(hediadi == 1, 1, 0, missing = 0),
stroke = if_else(hediast == 1, 1, 0, missing = 0),
baseline_alzheimers = if_else(hedibad == 1, 1, 0, missing = 0),
baseline_dementia = if_else(hedibde == 1, 1, 0, missing = 0),
pa_level = case_when(
heacta %in% c(1, 2) ~ "High",
!heacta %in% c(1, 2) & heactb %in% c(1, 2) ~ "Moderate",
!heacta %in% c(1, 2) & !heactb %in% c(1, 2) & heactc %in% c(1, 2) ~ "Low",
heacta %in% c(3, 4) & heactb %in% c(3, 4) & heactc %in% c(3, 4) ~ "Sedentary",
TRUE ~ NA_character_
)
)
# ORIGINAL VERSION OF W9 AGE HANDLING (kept as methodological note)
# age = indager
# This is not used in final summaries because negative indager values are special missing codes.
# Wave 9 corrected version
w9_table1 <- w9_table1_raw %>%
transmute(
idauniq,
pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
age = if_else(indager < 0, NA_real_, as.numeric(indager)),
sex = as_factor(indsex),
hypertension,
heart_failure,
abnormal_heart_rhythm,
diabetes,
stroke,
baseline_alzheimers,
baseline_dementia
)
# diagnostic age check
w9_table1 %>%
group_by(pa_level) %>%
summarise(
min_age = min(age, na.rm = TRUE),
q1_age = quantile(age, 0.25, na.rm = TRUE),
median_age = median(age, na.rm = TRUE),
mean_age = mean(age, na.rm = TRUE),
q3_age = quantile(age, 0.75, na.rm = TRUE),
max_age = max(age, na.rm = TRUE),
sd_age = sd(age, na.rm = TRUE),
n_missing_age = sum(is.na(age))
)## # A tibble: 5 × 9
## pa_level min_age q1_age median_age mean_age q3_age max_age sd_age
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 High 33 56 64 63.9 71 89 9.20
## 2 Moderate 33 61 68 67.7 75 90 9.95
## 3 Low 30 64 72 71.2 80.5 90 10.9
## 4 Sedentary 42 65 74 73.4 83 90 10.9
## 5 <NA> 56 56 56 56 56 56 NA
## # ℹ 1 more variable: n_missing_age <int>
w9_table1_summary <- w9_table1 %>%
mutate(
pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
) %>%
group_by(pa_level, .drop = FALSE) %>%
summarise(
N = n(),
`Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
`Female, n (%)` = sprintf(
"%d (%.1f%%)",
sum(sex == "Female", na.rm = TRUE),
100 * sum(sex == "Female", na.rm = TRUE) / n()
),
`Hypertension, n (%)` = sprintf(
"%d (%.1f%%)",
sum(hypertension == 1, na.rm = TRUE),
100 * sum(hypertension == 1, na.rm = TRUE) / n()
),
`Diabetes, n (%)` = sprintf(
"%d (%.1f%%)",
sum(diabetes == 1, na.rm = TRUE),
100 * sum(diabetes == 1, na.rm = TRUE) / n()
),
`Stroke, n (%)` = sprintf(
"%d (%.1f%%)",
sum(stroke == 1, na.rm = TRUE),
100 * sum(stroke == 1, na.rm = TRUE) / n()
),
`Abnormal heart rhythm, n (%)` = sprintf(
"%d (%.1f%%)",
sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()
),
`Heart failure, n (%)` = sprintf(
"%d (%.1f%%)",
sum(heart_failure == 1, na.rm = TRUE),
100 * sum(heart_failure == 1, na.rm = TRUE) / n()
),
`Baseline Alzheimer’s, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_alzheimers == 1, na.rm = TRUE),
100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()
),
`Baseline dementia, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_dementia == 1, na.rm = TRUE),
100 * sum(baseline_dementia == 1, na.rm = TRUE) / n()
)
)
w9_table1_summary_wide <- w9_table1_summary %>%
mutate(N = as.character(N)) %>%
pivot_longer(
cols = -pa_level,
names_to = "Characteristic",
values_to = "Value"
) %>%
pivot_wider(
names_from = pa_level,
values_from = Value
) %>%
select(
Characteristic,
High,
Moderate,
Low,
Sedentary,
`Missing PA`
)
knitr::kable(
w9_table1_summary_wide,
caption = "Table 1. Baseline characteristics at wave 9 by physical activity category."
)| Characteristic | High | Moderate | Low | Sedentary | Missing PA |
|---|---|---|---|---|---|
| N | 2741 | 3876 | 1337 | 780 | 2 |
| Age, mean (SD) | 63.9 (9.2) | 67.7 (9.9) | 71.2 (10.9) | 73.4 (10.9) | 56.0 (NA) |
| Female, n (%) | 1384 (50.5%) | 2203 (56.8%) | 892 (66.7%) | 402 (51.5%) | 1 (50.0%) |
| Hypertension, n (%) | 198 (7.2%) | 289 (7.5%) | 102 (7.6%) | 113 (14.5%) | 0 (0.0%) |
| Diabetes, n (%) | 88 (3.2%) | 139 (3.6%) | 60 (4.5%) | 61 (7.8%) | 0 (0.0%) |
| Stroke, n (%) | 14 (0.5%) | 39 (1.0%) | 28 (2.1%) | 39 (5.0%) | 0 (0.0%) |
| Abnormal heart rhythm, n (%) | 68 (2.5%) | 168 (4.3%) | 64 (4.8%) | 59 (7.6%) | 0 (0.0%) |
| Heart failure, n (%) | 2 (0.1%) | 8 (0.2%) | 18 (1.3%) | 16 (2.1%) | 0 (0.0%) |
| Baseline Alzheimer’s, n (%) | 1 (0.0%) | 9 (0.2%) | 9 (0.7%) | 28 (3.6%) | 0 (0.0%) |
| Baseline dementia, n (%) | 4 (0.1%) | 23 (0.6%) | 23 (1.7%) | 76 (9.7%) | 1 (50.0%) |
Wave summary numbers
### Total participant numbers by wave
wave_total_participant_numbers <- tibble(
wave = c("Wave 1", "Wave 2", "Wave 3", "Wave 4", "Wave 5", "Wave 6", "Wave 7", "Wave 8", "Wave 9"),
total_n = c(
nrow(w1),
nrow(w2_core),
nrow(w3_core),
nrow(w4_core),
nrow(w5_core),
nrow(w6_core),
nrow(w7_core),
nrow(w8_core),
nrow(w9_core)
)
)
knitr::kable(
wave_total_participant_numbers,
caption = "Total participant numbers by wave."
)| wave | total_n |
|---|---|
| Wave 1 | 12099 |
| Wave 2 | 9432 |
| Wave 3 | 9771 |
| Wave 4 | 11050 |
| Wave 5 | 10274 |
| Wave 6 | 10601 |
| Wave 7 | 9666 |
| Wave 8 | 8445 |
| Wave 9 | 8736 |
### Diagnosis summary numbers by wave
# Wave 1 diagnosis summary numbers
wave1_diagnosis_summary_numbers <- w1 %>%
summarise(
wave = "Wave 1",
alzheimers_n = sum(alz_w1 == 1, na.rm = TRUE),
dementia_n = sum(dementia_w1 == 1, na.rm = TRUE),
both_n = sum(alz_w1 == 1 & dementia_w1 == 1, na.rm = TRUE),
either_n = sum(alz_w1 == 1 | dementia_w1 == 1, na.rm = TRUE)
)
# Wave 2 diagnosis summary numbers
wave2_diagnosis_summary_numbers <- w2_core %>%
summarise(
wave = "Wave 2",
alzheimers_n = sum(hedib01 == 8 | hedib02 == 8 | hedib03 == 8 | hedib04 == 8, na.rm = TRUE),
dementia_n = sum(hedib01 == 9 | hedib02 == 9 | hedib03 == 9 | hedib04 == 9, na.rm = TRUE),
both_n = sum(
(hedib01 %in% c(8, 9)) +
(hedib02 %in% c(8, 9)) +
(hedib03 %in% c(8, 9)) +
(hedib04 %in% c(8, 9)) > 1,
na.rm = TRUE
),
either_n = sum(
hedib01 %in% c(8, 9) | hedib02 %in% c(8, 9) |
hedib03 %in% c(8, 9) | hedib04 %in% c(8, 9),
na.rm = TRUE
)
)
# Wave 3 diagnosis summary numbers
wave3_diagnosis_summary_numbers <- w3_core %>%
summarise(
wave = "Wave 3",
alzheimers_n = sum(hedibad %in% c(1, 2, 3), na.rm = TRUE),
dementia_n = sum(hedibde %in% c(1, 2, 3), na.rm = TRUE),
both_n = sum(hedibad %in% c(1, 2, 3) & hedibde %in% c(1, 2, 3), na.rm = TRUE),
either_n = sum(hedibad %in% c(1, 2, 3) | hedibde %in% c(1, 2, 3), na.rm = TRUE)
)
# Waves 4 to 9 diagnosis summary numbers
wave4_diagnosis_summary_numbers <- w4_core %>%
summarise(
wave = "Wave 4",
alzheimers_n = sum(hedibad == 1, na.rm = TRUE),
dementia_n = sum(hedibde == 1, na.rm = TRUE),
both_n = sum(hedibad == 1 & hedibde == 1, na.rm = TRUE),
either_n = sum(hedibad == 1 | hedibde == 1, na.rm = TRUE)
)
wave5_diagnosis_summary_numbers <- w5_core %>%
summarise(
wave = "Wave 5",
alzheimers_n = sum(hedibad == 1, na.rm = TRUE),
dementia_n = sum(hedibde == 1, na.rm = TRUE),
both_n = sum(hedibad == 1 & hedibde == 1, na.rm = TRUE),
either_n = sum(hedibad == 1 | hedibde == 1, na.rm = TRUE)
)
wave6_diagnosis_summary_numbers <- w6_core %>%
summarise(
wave = "Wave 6",
alzheimers_n = sum(hedibad == 1, na.rm = TRUE),
dementia_n = sum(hedibde == 1, na.rm = TRUE),
both_n = sum(hedibad == 1 & hedibde == 1, na.rm = TRUE),
either_n = sum(hedibad == 1 | hedibde == 1, na.rm = TRUE)
)
wave7_diagnosis_summary_numbers <- w7_core %>%
summarise(
wave = "Wave 7",
alzheimers_n = sum(hedibad == 1, na.rm = TRUE),
dementia_n = sum(hedibde == 1, na.rm = TRUE),
both_n = sum(hedibad == 1 & hedibde == 1, na.rm = TRUE),
either_n = sum(hedibad == 1 | hedibde == 1, na.rm = TRUE)
)
wave8_diagnosis_summary_numbers <- w8_core %>%
summarise(
wave = "Wave 8",
alzheimers_n = sum(hedibad == 1, na.rm = TRUE),
dementia_n = sum(hedibde == 1, na.rm = TRUE),
both_n = sum(hedibad == 1 & hedibde == 1, na.rm = TRUE),
either_n = sum(hedibad == 1 | hedibde == 1, na.rm = TRUE)
)
wave9_diagnosis_summary_numbers <- w9_core %>%
summarise(
wave = "Wave 9",
alzheimers_n = sum(hedibad == 1, na.rm = TRUE),
dementia_n = sum(hedibde == 1, na.rm = TRUE),
both_n = sum(hedibad == 1 & hedibde == 1, na.rm = TRUE),
either_n = sum(hedibad == 1 | hedibde == 1, na.rm = TRUE)
)
wave_diagnosis_summary_numbers <- bind_rows(
wave1_diagnosis_summary_numbers,
wave2_diagnosis_summary_numbers,
wave3_diagnosis_summary_numbers,
wave4_diagnosis_summary_numbers,
wave5_diagnosis_summary_numbers,
wave6_diagnosis_summary_numbers,
wave7_diagnosis_summary_numbers,
wave8_diagnosis_summary_numbers,
wave9_diagnosis_summary_numbers
)
knitr::kable(
wave_diagnosis_summary_numbers,
caption = "Diagnosis summary numbers by wave."
)| wave | alzheimers_n | dementia_n | both_n | either_n |
|---|---|---|---|---|
| Wave 1 | 14 | 64 | 3 | 75 |
| Wave 2 | 11 | 43 | 2 | 52 |
| Wave 3 | 32 | 112 | 11 | 133 |
| Wave 4 | 37 | 97 | 13 | 121 |
| Wave 5 | 46 | 110 | 19 | 137 |
| Wave 6 | 50 | 112 | 17 | 145 |
| Wave 7 | 43 | 114 | 19 | 138 |
| Wave 8 | 55 | 126 | 23 | 158 |
| Wave 9 | 47 | 127 | 21 | 153 |
### Missing physical activity summary numbers by wave
# Wave 1 missing PA
wave1_missing_pa_summary_numbers <- table1_w1_clean %>%
summarise(
wave = "Wave 1",
missing_pa_n = sum(is.na(pa_level))
)
# Wave 2 missing PA
w2_derived <- read_dta("raw data/RAW_data_stata/wave_2_derived_variables.dta")
w2 <- left_join(w2_core, w2_derived, by = "idauniq")
wave2_missing_pa_summary_numbers <- w2 %>%
mutate(pa_level = if_else(palevel %in% c(0, 1, 2, 3), palevel, NA_real_)) %>%
summarise(
wave = "Wave 2",
missing_pa_n = sum(is.na(pa_level))
)
#waves 3 to 9 MISSING PA FOR NOW -> 'NA' AS PLACEHOLDER AS HAVEN'T STANDARDISED PA VARIABLES FOR WAVES 3 - 9 AS YET
wave3_missing_pa_summary_numbers <- tibble(wave = "Wave 3", missing_pa_n = NA_integer_)
wave4_missing_pa_summary_numbers <- tibble(wave = "Wave 4", missing_pa_n = NA_integer_)
wave5_missing_pa_summary_numbers <- tibble(wave = "Wave 5", missing_pa_n = NA_integer_)
wave6_missing_pa_summary_numbers <- tibble(wave = "Wave 6", missing_pa_n = NA_integer_)
wave7_missing_pa_summary_numbers <- tibble(wave = "Wave 7", missing_pa_n = NA_integer_)
wave8_missing_pa_summary_numbers <- tibble(wave = "Wave 8", missing_pa_n = NA_integer_)
wave9_missing_pa_summary_numbers <- tibble(wave = "Wave 9", missing_pa_n = NA_integer_)
wave_missing_pa_summary_numbers <- bind_rows(
wave1_missing_pa_summary_numbers,
wave2_missing_pa_summary_numbers,
wave3_missing_pa_summary_numbers,
wave4_missing_pa_summary_numbers,
wave5_missing_pa_summary_numbers,
wave6_missing_pa_summary_numbers,
wave7_missing_pa_summary_numbers,
wave8_missing_pa_summary_numbers,
wave9_missing_pa_summary_numbers
)
knitr::kable(
wave_missing_pa_summary_numbers,
caption = "Missing physical activity summary numbers by wave."
)| wave | missing_pa_n |
|---|---|
| Wave 1 | 194 |
| Wave 2 | 139 |
| Wave 3 | NA |
| Wave 4 | NA |
| Wave 5 | NA |
| Wave 6 | NA |
| Wave 7 | NA |
| Wave 8 | NA |
| Wave 9 | NA |
## correction for PA missing
wave_missing_pa_summary_numbers <- bind_rows(
table1_w1_clean %>% summarise(wave = "Wave 1", missing_pa_n = sum(is.na(pa_level))),
w2 %>% mutate(pa_level = if_else(palevel %in% c(0, 1, 2, 3), palevel, NA_real_)) %>%
summarise(wave = "Wave 2", missing_pa_n = sum(is.na(pa_level))),
w3_table1 %>% summarise(wave = "Wave 3", missing_pa_n = sum(is.na(pa_level))),
w4_table1 %>% summarise(wave = "Wave 4", missing_pa_n = sum(is.na(pa_level))),
w5_table1 %>% summarise(wave = "Wave 5", missing_pa_n = sum(is.na(pa_level))),
w6_table1 %>% summarise(wave = "Wave 6", missing_pa_n = sum(is.na(pa_level))),
w7_table1 %>% summarise(wave = "Wave 7", missing_pa_n = sum(is.na(pa_level))),
w8_table1 %>% summarise(wave = "Wave 8", missing_pa_n = sum(is.na(pa_level))),
w9_table1 %>% summarise(wave = "Wave 9", missing_pa_n = sum(is.na(pa_level)))
)
### Combined wave summary numbers
wave_summary_numbers_table <- wave_total_participant_numbers %>%
left_join(wave_diagnosis_summary_numbers, by = "wave") %>%
left_join(wave_missing_pa_summary_numbers, by = "wave") %>%
mutate(
final_analysis_n = case_when(
wave == "Wave 1" ~ nrow(analysis_w1),
TRUE ~ NA_integer_
)
)
knitr::kable(
wave_summary_numbers_table,
caption = "Combined wave summary numbers."
)| wave | total_n | alzheimers_n | dementia_n | both_n | either_n | missing_pa_n | final_analysis_n |
|---|---|---|---|---|---|---|---|
| Wave 1 | 12099 | 14 | 64 | 3 | 75 | 194 | 11805 |
| Wave 2 | 9432 | 11 | 43 | 2 | 52 | 139 | NA |
| Wave 3 | 9771 | 32 | 112 | 11 | 133 | 15 | NA |
| Wave 4 | 11050 | 37 | 97 | 13 | 121 | 15 | NA |
| Wave 5 | 10274 | 46 | 110 | 19 | 137 | 124 | NA |
| Wave 6 | 10601 | 50 | 112 | 17 | 145 | 4 | NA |
| Wave 7 | 9666 | 43 | 114 | 19 | 138 | 3 | NA |
| Wave 8 | 8445 | 55 | 126 | 23 | 158 | 3 | NA |
| Wave 9 | 8736 | 47 | 127 | 21 | 153 | 2 | NA |
Part 2: Data Analysis
2.1 Aims and objectives
The principle aim of the following data analysis is to investigate whether there is any directional association between PA and incident dementia risk in adults over fifty years old across a ten year follow-up period in the longitudinal data set provided by ELSA. ELSA is a repository of data from the population residing in private domiciles in England.
The ELSA data includes social demographic variables, in addition to lifestyle and health characteristics. The original sample was based on participants who responded between 1998 to 2001 to the Health Survey for England (HSE), following which the original respondents participated in interviews every two years and were organised into ‘waves’. Interviews included the same questions being inquired about health and lifestyle every two-years. The participant pool was supplemented across certain waves. The above details make the ELSA data-set suitable to meet the primary aim of the analysis by providing a long follow-up period from a representative population whereby new diagnoses of dementia, and baseline levels of physical activity can be matched to assess for any present association.(33)
An objective of the data analysis was to complement the literature review rather than replicate any particular study that formed part of it. Additionally other objectives included the derivation of a baseline cohort from ELSA wave 1 that depicted different levels of PA, dementia, alzheimer’s dementia, alongside age, gender, vascular disease, and other comorbidities to elucidate the raw dataset at inception. Secondly, to categorise the raw dataset based on PA level to assess whether a pattern emerged between different PA levels and different ages and comorbidities. Thirdly the exclusion of participants who have at baseline any dementia or pathological cognitive state for the purpose of minimising reverse causation and finally to utilise Cox proportional hazard to assess for any association between PA and incident dementia.
The final objective was to see if the results could be practically relevant to population health and inform discussions between clinicians and patients.
2.2 Design and Methods
2.2.1 Study design
The details of the data source, ELSA, used to perform this data analysis is documented in section 2.1 (Aims and Objectives).
In order to appreciate any emergent patterns at baseline prior to longitudinal analysis, the wave 1 core dataset was joined using the distinct participant ID (‘idauniq’) with the wave 1 derived variables which led to the creation of a unique wave 1 (‘w1’) dataset that combined the variables of concern from the core dataset (dementia, Alzheimer’s, PA, and IQCODE) with variables from the ‘wave 1 ifs derived variables’ dataset such as smoking status, education, employment and depression score means. This process is part of data stewardship by establishing whether the data is likely to be accurate by enabling expected patterns to be readily visualised, and similarly to highlight any peculiarities within the data. The same process was performed for subsequent waves, for the additional purpose of producing descriptive summaries for each wave.
For the longitudinal analysis, the wave 1 baseline dataset with dementia, Alzheimer’s, Parkinson’s disease and pathological cognitive decline excluded (termed ‘analysis_w1’), was followed up across subsequent waves to observe for cases of incident dementia via the participant ID.
2.2.2 Physical activity
In ELSA PA was gathered via self-reported questionnaires by which participants indicated the type of activity that they engaged with, that was then apportioned into distinct categories to reflect the intensity of activity (vigorous, moderate and mild). The frequency by which they engaged in activity was also gathered. In certain studies that conducted research on the ELSA dataset including one in the literature review both vigorous and moderate categories were combined, potentially reducing the granularity of the information and losing the ability to adequately discriminate between various PA groups.(24) As such a methodological decision was taken to produce in this analysis four categories: ‘high’, ‘moderate’, ‘mild’ and ‘sedentary’, where ‘high’ equated to ELSA’s vigorous (coded as: heacta), ‘moderate’ to ELSA’s moderate (coded as: heactb), ‘low’ to ELSA’s mild (coded as: heactc), and ‘sedentary’ for participants’ that did not meet the thresholds for ‘high’, ‘moderate’, or ‘mild’ (namely ELSA’s vigorous, moderate or mild respectively).
w1_palevel = case_when( heacta %in% c(1, 2) ~ “High”, !heacta %in% c(1, 2) & heactb %in% c(1, 2) ~ “Moderate”, !heacta %in% c(1, 2) & !heactb %in% c(1, 2) & heactc %in% c(1, 2) ~ “Low”, heacta %in% c(3, 4) & heactb %in% c(3, 4) & heactc %in% c(3, 4) ~ “Sedentary”,
Each PA level created in the data analysis related to participating at the PA level for a frequency of at least once per week. If a PA level(s) was established less than once per week they were classed as ‘sedentary’. As per ELSA’s description, ‘high’ (ELSA’s vigorous) equated to activities that included digging, aerobics and cycling; ‘moderate’ (ELSA’s moderate) related to activities such as a moderately paced walk, dancing or cleaning a car; ‘low’ (ELSA’s mild) was housework to include laundry. To qualify as any of ‘high’, ‘moderate’ or ‘low’, to establish a unique cohort for each, the participants could not be able to be included in any of the other PA levels.
Choosing to delineate PA into these distinct categories could prove more useful when providing patients with information about types of activities that could prove profitable to reducing the risk of dementia, by creating specific PA levels that relate to particular types of activities rather than diluting the nuances between PA levels by combining disparate levels together, and therefore not being able to decipher what type of activity is driving any change produced. Furthermore commercial fitness based opportunities where everyday activities are promoted to encourage against sedentary lifestyles could be leveraged to improve healthier lifestyles in a stepwise moderated way.
2.2.3 Incident dementia
Dementia was identified in ELSA by a participant’s positive response to the following question: ‘Has a doctor ever told you that you have (or have had) any of the conditions on this card (with option ’9’ responding to ‘Dementia, organic brain syndrome, senility or any other serious memory impairment’)?’.(34 pg.24) A distinction should be made regarding the use of IQCODE which could be used to denote dementia, but instead was used to establish pathological cognitive impairment in this data analysis. Dementia was solely appropriated to participants identified by the above self-assessment question measure streamlining the process in which dementia was identified in ELSA, whilst IQCODE was used to eliminate pathological cognitive decline participants in an effort to curb reverse causality and as part of sensitivity analysis.
Furthermore the interviewer query that relates to dementia is not reserved only for ‘dementia’ but also senility and other named syndromes and impairments that impact cognition resulting in a heterogenous compilation of diagnostic states as per the design of the ELSA study. Secondly in relation to the ELSA design, Alzheimer’s disease was captured as a separate disease state, and not combined with ‘dementia’, and so as to maintain integrity to the ELSA original construct dementia cases were identified by the aforementioned ‘dementia’ question solely.
In order to locate cases of incident dementia in waves subsequent to wave 1, a longitudinal analysis dataset was created in which dementia indicators from waves 2 to 9 were joined to eligible participants via a unique identifier. By this method the first wave in which any case of incident dementia was found could be isolated.
2.2.4 Covariates, data stewardship and data quality
Variables that could influence the dementia outcome variable were chosen to include modifiable, nonmodifiable, pathological and lifestyle factors, in order to assist with discussions with diverse patient populations about how these findings in conjunction with other research and national guidance could be used to live healthier lives in context of dementia risk. Modifiable covariates chosen included smoking and hypertension. Smoking has been noted to increase dementia risk by between fifty to eighty percent (35 pg.466) and as such was noted as important to include here.
The smoking variable provided an example of how data stewardship and quality was approached within the analysis. The value labels within wave 1 for smoking (‘smokerstat’) were: never smoked, current smoker, three ex-smoker variables (ex-smoker occassional, ex-smoker regular, ex-smoker DK-frequency) and negative codes that included ‘refusal’, ‘didn’t know’, and wasn’t asked. These value labels were then recoded into three categories: Never; Ex-smoker; Current, in a newly created ‘Smoker3’ vector, and in the Cox model a binary variable was created:
current_smoker = if_else(smoking3 == “Current”, 1, 0, missing = NA_real_)
By performing the above the aim was the responsible management of data by assessing the original coding variables, handling negative categories explicitly and producing a current smoker indicator appropriate for analysis.
In England current national guidance refers to cardiovascular disease as a risk factor for dementia, and states that the early prevention of cardiovascular and stroke states are thought to attenuate both vascular and mixed dementias. The inclusion of hypertension, diabetes, abnormal heart rhythms and heart failure were chosen as variables that could affect our outcome variable. Similarly certain publications have noted a significant risk of stroke for incident dementia, which led to the methodological choice of the inclusion of stroke. (36, 37). Age and sex are notable confounding factors and were therefore added to our variable model to assess how these demographic factors may influence dementia.
2.2.5 Reverse causation and sensitivity analysis
To reduce the risk that participants who already had signs of cognitive decline which could impact their ability to engage with PA, participants with an IQCODE of 3.38 and above were excluded from the cox analysis. A score of 3.38 was observed by authors in the field as suitably equating to pathological cognitive decline whilst maintaining both sensitivity and specificity (38). The above was a method used to reduce reverse causation by excluding participants who possibly had prodromal features of dementia and who if included could increase the chance of adding participants that did not engage with PA due to early cognitive states.
Excluding participants who may have dementia in the context of other disease states (namely Parkinson’s disease) was a methodological choice, as parkinson’s disease is known to impact cognitive abilities to varying degrees. Those with Alzheimer’s disease were also excluded for similar reasons. Furthermore, by choosing a follow-up period of ten years, this increased the chance that dementia which is known to have a long prodromal period was more likely to occur after the PA exposure helping to limit issues of reverse causation additionally.
Sensitivity analysis was conducted comparing models with and without IQCODE exclusion to assess if excluding IQCODE determined participants with pathological cognitive decline impacted the outcome in anyway. This was considered as methodologically pertinent to analyse the assumption that the presence of those with informant informed functional cognitive decline may change the outcome by reflecting reduced PA due to prodromal disease thus inadvertently weakening any cautious causal interpretation, and impacting temporality by attenuating the likelihood that any reduced PA was reflecting ability outside of pre-existing cognitive deterioration, thus creating ambiguity that low PA or sedentary levels preceeded any already existing signs of cognitive impairment.
2.2.6 Statistical analyses
Descriptive statistical analyses was initiated on the baseline wave 1 dataset prior to any exclusions to illustrate any trends in age, gender, socio-demographic, and medical factors according to PA levels. Standard deviations were produced for continous variables and categorical variables were represented with percentages. Presentation of the findings in a baseline table provided a transparent account of the dataset creating readily accessbile deductions to be made and allowing for broad appreciation as to whether trends that were presented were expected, and unexpected findings to be highlighted early for further inspection.
Cox proportional hazard regression models were used to assess for associations between PA levels and incident dementia. Two models were produced: unadjusted for covariates, and adjusted. Moderate, low and sedentary PA levels were compared with high PA levels in both models and the results were articulated through hazard ratios, 95% confidence intervals and p-values.
2.3 Results
2.3.1 Descriptive analyses and baseline cohort
Prior to any exclusions the baseline wave 1 dataset was comprised of a total of 12,099 participants. Following the exclusion of prevalent dementia, Alzheimer’s disease, Parkinson’s and pathological cognitive decline the number decreased to 11,805. Table 1 shows the baseline characteristics of the original 12,099. Table 1 evidences that when high PA and sedentary groups were compared, the sedentary group was ten years older (mean ages of 60.6 and 70.8 respectively) and the sedentary group had the highest percentages of hypertension, diabetes, stroke, abnormal heart rhythm, and heart failure compared to all the other PA levels, showing that the sedentary group were older, and burdened with more comorbidities than the high PA and moderate PA groups especially.
The unadjusted cox modeling was performed on the 11,805 participants following the above exclusions and upon follow-up there were 480 cases of incident dementia. Missing covariate information led to a further loss of participants such that a total of 11,679 participants were analysed in the adjusted model, with 476 incident dementia occurences.
2.3.2 Unadjusted Cox model
There was an inverse relationship between lower PA levels and increased risk of incident dementia. This relationship was in a dose-response manner. Hazard ratios were depicted using high PA level acted as the reference group. Moderate PA had a hazard ratio of 1.73 with CI: 1.34 to 2.23 and p=2.18 x 10-5; low PA hazard ratio: 2.24 with CI: 1.67 to 3.02 and p=9.69 x 10-8; sedentary activity hazard ratio: 2.42 with CI: 1.76 to 3.34 and p=5.72 x 10-8.
2.3.3 Adjusted Cox model
The following covariates were accounted for in the adjusted model: age, gender, current smoker, hypertension, diabetes, stroke, abnormal heart rhythms, heart failure and depression. The hazard ratios for moderate, and low activity in comparison to the high PA level remained illustrative of exercise producing a protective effect with hazard ratios of 1.33 and 1.23 respectively, whilst the sedentary group with a HR 1.02 demonstrates no clear difference between sedentary and high PA groups.
The CIs and p-values provide further information with Moderate PA HR 1.33 CI: 1.03 to 1.73, p = 0.0286; low PA HR 1.23 CI: 0.89 to 1.68, p = 0.204; sedentary HR: 1.02 CI: 0.72 to 1.45, p = 0.91. Moderate PA exhibiting a 95% CI that does not intersect 1, and a p value below 0.05 is a significant result, whilst low PA and sedentary groups have a smaller impact when the above covariates are taken into account, and no association in both low PA and sedentary groups can not be ruled out given the CIs for these groups.
Age with a HR 1.06 (CI: 1.05 to 1.072), and p value of <2 x10 -16 was the most highly significant covariate for dementia incidence. Female gender, abnormal heart rhythms and depression were comparable with moderate PA in terms of significance for their respective hazard towards dementia incidence. Their CIs and p-values are as follows.
Female gender: HR 1.23 (CI: 1.02 to 1.49); p = 0.03 Abnormal heart rhythm: HR 1.39 (CI: 1.03 to 1.9); p = 0.03 Depression: HR 1.3 (CI: 1.04 to 1.62); p = 0.02
2.3.4 Sensitivity analysis
Sensitivity analysis was performed both with and without pathological cognitive decline as per an IQCODE score of >3.38, to assess for any change to the results. The cox unadjusted and adjusted statistical analyses produced the same results that the inclusion of pathological cognitive decline determined by IQCODE did. When the above result was investigated for data qualtiy by assessing the counts of participants with IQCODE >3.38 prior to the exclusions of dementia, alzheimer’s and parkinson’s disease the expected number of 52 was generated. Once the count of participants with IQCODE >3.38 following the above exclusions was performed, the count became ‘0’.
The output of the above is represented below.
cox_unadjusted_no_iqcode <- coxph( Surv(time_to_event_waves, event_dementia) ~ pa_level, data = analysis_w1_no_iqcode )
cox_adjusted_no_iqcode <- coxph( Surv(time_to_event_waves, event_dementia) ~ pa_level + age + sex + current_smoker + hypertension + diabetes + stroke + abnormal_heart_rhythm + heart_failure + depression_binary, data = analysis_w1_no_iqcode )
summary(cox_unadjusted_no_iqcode) summary(cox_adjusted_no_iqcode)
Call:
coxph(formula = Surv(time_to_event_waves, event_dementia) ~ pa_level,
data = analysis_w1_no_iqcode)
n= 11805, number of events= 480
coef exp(coef) se(coef) z Pr(>|z|)
pa_levelModerate 0.5470 1.7281 0.1288 4.246 2.18e-05 ***
pa_levelLow 0.8084 2.2444 0.1516 5.332 9.69e-08 ***
pa_levelSedentary 0.8857 2.4246 0.1632 5.427 5.72e-08 ***
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
pa_levelModerate 1.728 0.5787 1.342 2.225
pa_levelLow 2.244 0.4456 1.667 3.021
pa_levelSedentary 2.425 0.4124 1.761 3.339
Concordance= 0.579 (se = 0.012 )
Likelihood ratio test= 41.82 on 3 df, p=4e-09
Wald test = 38.66 on 3 df, p=2e-08
Score (logrank) test = 40.21 on 3 df, p=1e-08
Call:
coxph(formula = Surv(time_to_event_waves, event_dementia) ~ pa_level +
age + sex + current_smoker + hypertension + diabetes + stroke +
abnormal_heart_rhythm + heart_failure + depression_binary,
data = analysis_w1_no_iqcode)
n= 11679, number of events= 476
(126 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
pa_levelModerate 0.288026 1.333792 0.131591 2.189 0.0286 *
pa_levelLow 0.204592 1.227024 0.161119 1.270 0.2041
pa_levelSedentary 0.020806 1.021024 0.178992 0.116 0.9075
age 0.061431 1.063357 0.004261 14.418 <2e-16 ***
sexFemale 0.208323 1.231611 0.096090 2.168 0.0302 *
current_smoker -0.117040 0.889550 0.140726 -0.832 0.4056
hypertension 0.085720 1.089501 0.094001 0.912 0.3618
diabetes 0.160271 1.173829 0.157535 1.017 0.3090
stroke 0.218415 1.244104 0.178580 1.223 0.2213
abnormal_heart_rhythm 0.329790 1.390676 0.155400 2.122 0.0338 *
heart_failure -0.696122 0.498515 0.582639 -1.195 0.2322
depression_binary 0.258923 1.295534 0.113208 2.287 0.0222 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
pa_levelModerate 1.3338 0.7497 1.0306 1.726
pa_levelLow 1.2270 0.8150 0.8948 1.683
pa_levelSedentary 1.0210 0.9794 0.7189 1.450
age 1.0634 0.9404 1.0545 1.072
sexFemale 1.2316 0.8119 1.0202 1.487
current_smoker 0.8895 1.1242 0.6751 1.172
hypertension 1.0895 0.9179 0.9062 1.310
diabetes 1.1738 0.8519 0.8620 1.598
stroke 1.2441 0.8038 0.8767 1.765
abnormal_heart_rhythm 1.3907 0.7191 1.0255 1.886
heart_failure 0.4985 2.0060 0.1591 1.562
depression_binary 1.2955 0.7719 1.0377 1.617
Concordance= 0.74 (se = 0.01 )
Likelihood ratio test= 298.4 on 12 df, p=<2e-16
Wald test = 306.5 on 12 df, p=<2e-16
Score (logrank) test = 326.3 on 12 df, p=<2e-16
Sensitivity analysis comparing
w1 %>%
count(baseline_pathological_cognitive_decline, useNA = "ifany")
A tibble:2 × 3
baseline_pathological_cognitive_decline
<dbl>
useNA
<chr>
n
<int>
0 ifany 12047
1 ifany 52
2 rows
followup_w1 %>%
mutate(
prevalent_dem_alz_w1 = if_else(baseline_alzheimers == 1 | baseline_dementia == 1, 1, 0)
) %>%
filter(
prevalent_dem_alz_w1 == 0,
baseline_parkinsons == 0,
!is.na(pa_level)
) %>%
count(baseline_pathological_cognitive_decline, useNA = "ifany")
baseline_pathological_cognitive_decline
<dbl>
useNA
<chr>
n
<int>
0 ifany 11805
#### 2.4 Discussion
The principle aim of the data analysis was to assess for any association between PA and incident dementia in adults above fifty years old from a UK dataset (ELSA). The other associated objectives acted to serve the above aim by creating stepwise goals to describe the data, assess for patterns and perform formal statistical analyses to answer whether different PA levels could exert an impact on dementia. The data analysis transparently shows that increased levels of PA have a protective effect on incident dementia, however this association was weakened when covariates were adjusted for. These findings are more cautious than the trends discussed in the literature review studies which included studies with different cognitive baselines (MCI), whilst the overall conclusion is the same, which also is in tandem with wider research on this subject.
The data analysis is informative when compared to the literature review and as such complements it, in that it included 'abnormal heart rhythm' as a distinct covariate, rather than compiling all cardiovascular diseases, and medical comorbidities into one grouping thus losing granularity. By chosing abnormal heart rhythm as a sole covariate it showed the potential importance of abnormal heart rhythms as a unique comorbidity towards dementia risk with a HR 1.39 and p vlaue of 0.03). Abnormal cardiac rhythms have the most direct plausible implications for vascular dementia sufferers by damaging arterial vessels that provide blood flow to the brain. If the transport of vital nutrients to the brain are compromised, this can lead to deleterious effects on cognitive function, and PA such as walking has been noted to have a beneficial impact on mortality and wider CVD.(39, 40)
The above findings are useful in considering that the benefits of PA are most potent when viewed as part of a protective lifestyle choice whose advantages are not only relevant to dementia, but other risk factors that can contribute towards incident dementia as documented in a wide variety of clinical and academic sources. The risk factors that are repeatedly associated with dementia and cardiovascular comorbidities include the covariates analysed in this dissertation, which aligns with the proposed pathophysiology of dementia which encompasses oxidative stress and inflammation. Furthermore PA is also proposed as a management strategy to manage depression, depression being a covariate that in the adjustment model was a significant hazard for dementia. The reasons for depression having an impact on incident dementia could be wide ranging, to include social isoloation leading to reduced social engagement and therefore limiting broader cognitive functioning.
The statistical analyses support the almost universal finding that advanced age is the greatest risk factor for dementia. Older participants in this analysis were also evidenced to have the greatest burden of comorbidities, which may assist in explaining the smaller impact of PA in the adjustment model in that low and sedentary groups were older, and were more likely to have hypertension, diabetes and depression as examples. Indeed the baseline characteristics evidenced an average age of 60 in the high PA group and 70 years old in the sedentary group, which is akin to comparing the non elderly (less than 65 years old) with the elderly (more than 65 years old). Attempts to mitigate reverse causation were used to limit participants with undiagnosed cognitive decline that could impact their ability to engage in physical activity, additionally whilst reasoning to approach the sensitivity analysis by examining the impact of pathological cognitive decline was logically sound (to avoid cognitively impaired inclusions that were not formally screend out by other means), these participants were already subsumed in either Alzheimers, Parkinson's or prevalent dementia and thus were already accounted for.
#### 2.5 Limitations
The following limitations in the data analysis should be stated. The initial limitation involves the description of 'dementia' within ELSA. The dementia outcome measured includes 'organic brain syndrome, senility or any other serious memory impairment' in addition to 'dementia'. This creates heterogenity in the outcome analysed, and further more the definition of 'dementia' itself is lacking, so the degree to which mixed-dementias, vascular dementia and other dementias are included in this is unknown. This may also partly explain why pathological cognitive impairment as expressed through IQCODE produced the same exact output in the sensitivity analysis.
Self-reporting of dementia and PA was exclusively relied upon, to demonstrate the dementia counts and PA levels. This was chosen to examine the findings from the self-reporting process as one clear source. Self reporting is prone to recall bias, possibly resulting in misclassification which could have hampered the results.
Dementia incidence was based on the wave at which dementia was diagnosed, as opposed to by a diagnosis date, making the timing of incident dementia less precise. Additionally as typicpal for longitudinal studies confounding despite adjustment modelling can not be entirely excluded, and similarly actions to do so as performed in this analyses can reduce the possibility of observing pathways of interest. An example of this is by adjusting for vascular conditions which the majority of covariates occupy (such as hypertension, diabetes, stroke), may obscure more nuanced physical activity findings, which also produce some of its benefits through vascular processes (impacting oxidative stress and inflammation).
Detailed sub-analysis on the characteristics of the group with missing PA information (a total of 194 individuals) may have produced a more rounded analysis in that this sub-group were older (mean age 69 years old), after the sedentary group had the highest percentage of participants with abnormal heart rhythm, and had the highest precentage of participatns wtih Alzheimers, Parkinson's and prevalent dementia at baseline.
#### 2.6 Conclusion
The findings of the data analysis resonate with everyday clinical primary care in that physical activity is rarely discussed in terms of being the sole solution to any one condition, but instead physical activity advice is provided in the context of a suite of broad positive lifestyle factors that include healthy eating and appropriate hydration, stress management, appropriate sleep, and avoidance of hazardous habits such as smoking, illicit drug use, and excessive alcohol consumption. The above factors could limit a range of causes of cognitive impairment that may or may not progress to dementia, in addition to dementia itself, in the context of healthy ageing conversations with individual patients and populations. The adjusted cox harzard model findings provide a cautious estimation of the impact that PA as a solitary factor may have on incident dementia, and as additional comorbidities and advancing age can also influence dementia occurrence, the promotion of only physical activity to those with functional symptoms of cognitive impairment may limit the breadth of useful advice that could be provided to assist in supporting protective lifestyles against dementia.
Furthermore, physical activity is additionally relevant in preventing a host of other conditions to include depression, hypertension, diabetes and stroke, which are also associated with dementia, making physical activity as an exposure an important factor to discuss both in primary care and public health settings.
Using physical activity as an exposure as opposed to exercise is useful as it enables the discussion of everyday activities that could if done mindfully make the concept of movement more accessible to a variety of patients who may due to frailty be less able to engage in more formal activities such as walking at pace, jogging or aerobics. The above data analysis informs examples of moderate activity such as cleaning a car, and laundry (low activity) that can be provided to patients. Guidance can be tailored to their ability, different days of the week, and can be built on over time.
The data analysis also suggests the impact of other factors on incident dementia, to include the impact of gender with women having increased hazards, and depression. The above analysis can be similarly tailored to the diversity of patients seen in primary care, by finding out more about, for example, a woman's daily activities, and creating systems of how what they already do can be modified to exert increased METs acknowledging that a leisurely walk could have a different result to walking at pace, as could higher BMI. The data analysis is therefore particularly useful in informing how the information can be communicated to patients, making it accessible to their lives.
Future work of interest to further the above analysis would include exploration of the other lifestyle factors within the ELSA data for their possible association to dementia, to produce a lifestyle factors package which could further the advice provided to patients and the public in the actions that can be taken to reduce their risk of both the factors that contribute to dementia and possibly dementia itself.
## Summaries your findings,discuss them in the context of other similar work or questions and suggestions for future work. Conclude your portfolio with what started your data exploration and what have the data contributed in the decisions for patient care or health service delivery.
### covering data access requirements, ethics, metadata and all methodological aspects of your project
### Results
### Use this section to showcase the results of your data manipulation that will contribute to the project
###Table 1 shows the baseline characteristics at wave 1, organised around physical activity (PA) categories: high, moderate, low and sedentary. As mean age increases the proportion of participants in low and sedentary PA categories increases. Furthermore the low and sedentary categories have higher prevalence of comorbidities that include hypertension, diabetes, stroke, abnormal heart rhythms, and heart failure.
### Discussion
### Conclusion
## Summaries your findings,discuss them in the context of other similar work or questions and suggestions for future work. Conclude your portfolio with what started your data exploration and what have the data contributed in the decisions for patient care or health service delivery.
# In text elements
##Some examples of having in-text elements as you develop your portfolio are provided here.
#* "# Headings"
#* "## Subheading 1"
#* "### subheading 2"
#* "#### subheading 3"
## Hyperlinks
#[Healthcare Data Science](https://github.com/CambridgeICE-HDS/MSt-Healthcare-Data-Science)
## Notice box
#::: {.infobox .caution data-latex="{caution}"}
#The format to add boxes to your portfolio
#:::
## Tables
### In-text table
#A template format to add a table into
#the document you can use the following md code structure.
#|Data table | Coverage |Area |
#|--------------|--------------|-------------------------|
#|Health survey | 2015 |Self-reported outcomes |
#|EHR | 2000 onwards |Electronic health records|
# Data tables
## defining a dataframe
``` r
CREL <- data.frame(
Data = c("Health survey","EHR"),
Coverage=c(
"2015","2000 onwards"
),
Area=c(
"Self-reported outcomes",
"Electronic Health Records"
))
kable(CREL)
| Data | Coverage | Area |
|---|---|---|
| Health survey | 2015 | Self-reported outcomes |
| EHR | 2000 onwards | Electronic Health Records |