Table 1 baseline characteristics NO PARTICIPANTS EXCLUDED
# Demographic / social variables
names(w1)[grepl("age|ager|dob", names(w1), ignore.case = TRUE)]
## [1] "dhdobyr" "dhager" "didob" "heage" "wprage" "indobyr.x"
## [7] "indager" "aagemab" "aagepab" "aageangi" "aagehart" "aagestro"
## [13] "aagedi" "age" "age_p" "indobyr.y" "indobyr_p" "agebuhead"
## [19] "agebusp" "agehoh" "agehhch1" "agehhch2" "agehhch3" "agehhch4"
## [25] "agehhch5" "agehhch6" "agehhch7" "agehhch8" "agebuch1" "agebuch2"
## [31] "agebuch3" "agebuch4" "agebuch5" "agebuch6" "agebuch7" "agebuch8"
## [37] "chage1" "chage2" "chage3" "chage4" "chage5" "chage6"
## [43] "chage7" "chage8" "chage9" "chage10" "chage11" "chage12"
## [49] "chage13" "chage14" "chage15" "chage16" "ageg5" "ageg5_bu"
## [55] "ageg7" "ageg7_bu" "ageg10" "ageg10_bu" "ageg3" "ageg3_bu"
## [61] "ageg3_spa" "spage" "spage_bu" "agehhldr1" "agehhldr2" "agehhldr3"
## [67] "agehhldr4" "mothage" "magedied" "fathage" "fagedied"
names(w1)[grepl("sex", names(w1), ignore.case = TRUE)]
## [1] "dhsex" "disex" "indsex" "asex" "sex" "sex_p"
## [7] "sexbuhead" "sexhoh" "chsex1" "chsex2" "chsex3" "chsex4"
## [13] "chsex5" "chsex6" "chsex7" "chsex8" "chsex9" "chsex10"
## [19] "chsex11" "chsex12" "chsex13" "chsex14" "chsex15" "chsex16"
names(w1)[grepl("educ|qual|school", names(w1), ignore.case = TRUE)]
## [1] "fqqual1" "fqqual2" "fqqual3" "edqual.x" "aqual" "aeducend"
## [7] "edqual.y" "qual2" "qual3" "qual2_p" "qual3_p"
names(w1)[grepl("ethnic|ethn|race", names(w1), ignore.case = TRUE)]
## [1] "fqethnr" "aethnicr"
names(w1)[grepl("mar|partner|spouse|widow|single", names(w1), ignore.case = TRUE)]
## [1] "dimar" "wpamar" "partner" "marstat"
names(w1)[grepl("employ|work|job|retir", names(w1), ignore.case = TRUE)]
## [1] "difjob" "wpjob" "wpjobl" "wpsjoby"
## [5] "wpsjobm" "wpcjob" "wphjob" "iawork"
## [9] "hojob" "aeverjob" "aemploye" "astwork"
## [13] "hhgriddhwork" "hhgriddhwork_p" "worktime" "everwork"
## [17] "exwork" "exworkb" "exwork55" "exwork55b"
## [21] "exwork60" "exwork60b" "exwork65" "exwork65b"
## [25] "difjobm"
# Lifestyle variables
names(w1)[grepl("smok|cig", names(w1), ignore.case = TRUE)]
## [1] "hecig" "smoker" "smokerstat"
names(w1)[grepl("alcohol|drink", names(w1), ignore.case = TRUE)]
## character(0)
# BMI / body size
names(w1)[grepl("bmi|height|weight", names(w1), ignore.case = TRUE)]
## character(0)
# Mood / depression
names(w1)[grepl("depress|cesd|mood", names(w1), ignore.case = TRUE)]
## [1] "cesd_sc" "cesd_na"
# Disease-history blocks
names(w1)[grepl("^hedia", names(w1), ignore.case = TRUE)]
## [1] "hedia01" "hedia02" "hedia03" "hedia04" "hedia05" "hedia06" "hedia07"
## [8] "hedia08" "hedia09" "hedia10"
names(w1)[grepl("^hedib", names(w1), ignore.case = TRUE)]
## [1] "hedib01" "hedib02" "hedib03" "hedib04" "hedib05" "hedib06" "hedib07"
## [8] "hedib08" "hedib09" "hedib10"
# age
attr(w1$dhager, "label")
## [1] "Age variable from HH grid collapsed at 90 plus"
table(w1$dhager, useNA = "ifany")[1:10]
##
## 20 30 31 32 33 34 35 36 37 38
## 1 2 1 1 6 3 4 6 8 15
# sex
attr(w1$dhsex, "label")
## [1] "ASK OR CODE RESPONDENT~S SEX"
table(w1$dhsex, useNA = "ifany")
##
## 1 2
## 5335 6764
# education
attr(w1$edqual.x, "label")
## [1] "(D) Highest Educational Qualification at ELSA W1"
table(w1$edqual.x, useNA = "ifany")
##
## -9 -8 -1 1 2 3 4 5 6 7
## 6 11 18 1388 1333 764 1974 582 1015 5008
# ethnicity
attr(w1$fqethnr, "label")
## [1] "ELSA ethnic group collapsed into White and Non-white to avoid disclosure"
table(w1$fqethnr, useNA = "ifany")
##
## -9 -8 -1 1 2
## 12 2 6810 5111 164
# marital status
attr(w1$marstat, "label")
## [1] "marital status - couple1 combined with dimar"
table(w1$marstat, useNA = "ifany")
##
## 1 2 3 4 5 6
## 8035 504 575 1951 823 211
# smoking
attr(w1$smokerstat, "label")
## [1] "smoker status (past or present)"
table(w1$smokerstat, useNA = "ifany")
##
## -9 -8 -2 0 1 2 3 4
## 5 11 175 4286 674 4342 445 2161
# depression
attr(w1$cesd_sc, "label")
## [1] "number of cesd questions answered yes"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.000 0.000 1.000 1.504 2.000 8.000
attr(w1$cesd_na, "label")
## [1] "number of cesd questions answered"
table(w1$cesd_na, useNA = "ifany")
##
## -2 -1 0 3 5 6 7 8
## 136 175 67 2 5 15 87 11612
# alcohol - broader search
names(w1)[grepl("alco|drink|beer|wine|spirit|units", names(w1), ignore.case = TRUE)]
## character(0)
# BMI / body size - broader search
names(w1)[grepl("bmi|body|mass|height|weight|wt|ht", names(w1), ignore.case = TRUE)]
## [1] "wpwtx" "wpwtx2" "wpwtx3" "iashty1" "iashty2"
## [6] "iashty3" "iashty4" "hhtot" "refreshtype" "chtype1"
## [11] "chtype2" "chtype3" "chtype4" "chtype5" "chtype6"
## [16] "chtype7" "chtype8" "chtype9" "chtype10" "chtype11"
## [21] "chtype12" "chtype13" "chtype14" "chtype15" "chtype16"
## [26] "nright"
# employment - inspect strongest candidates
attr(w1$worktime, "label")
## [1] "Working full time or part time"
table(w1$worktime, useNA = "ifany")
##
## -8 -1 1 2
## 118 7476 2741 1764
attr(w1$aemploye, "label")
## [1] "HSE Feed Forward: Are you …{an employee or self-employed}"
table(w1$aemploye, useNA = "ifany")
##
## -1 1 2
## 563 10101 1435
attr(w1$everwork, "label")
## [1] "ever worked"
table(w1$everwork, useNA = "ifany")
##
## -9 -8 -2 0 1
## 3 1 27 217 11851
# smoking labels
attributes(w1$smokerstat)
## $label
## [1] "smoker status (past or present)"
##
## $format.stata
## [1] "%8.0g"
##
## $labels
## refused don't know not asked
## -9 -8 -2
## never smoked ex smoker - occasional ex smoker - regular
## 0 1 2
## ex smoker - DK freq current smoker
## 3 4
##
## $class
## [1] "haven_labelled" "vctrs_vctr" "double"
# education labels
attributes(w1$edqual.x)
## $label
## [1] "(D) Highest Educational Qualification at ELSA W1"
##
## $format.stata
## [1] "%8.0g"
##
## $labels
## Refusal Don't know
## -9 -8
## Not applicable NVQ4/NVQ5/Degree or equiv
## -1 1
## Higher ed below degree NVQ3/GCE A Level equiv
## 2 3
## NVQ2/GCE O Level equiv NVQ1/CSE other grade equiv
## 4 5
## Foreign/other No qualification
## 6 7
##
## $class
## [1] "haven_labelled" "vctrs_vctr" "double"
# ethnicity labels
attributes(w1$fqethnr)
## $label
## [1] "ELSA ethnic group collapsed into White and Non-white to avoid disclosure"
##
## $format.stata
## [1] "%8.0g"
##
## $labels
## Refusal Don't know Not applicable White Non-white
## -9 -8 -1 1 2
##
## $class
## [1] "haven_labelled" "vctrs_vctr" "double"
# marital status labels
attributes(w1$marstat)
## $label
## [1] "marital status - couple1 combined with dimar"
##
## $format.stata
## [1] "%8.0g"
##
## $labels
## married (inc civ pship 06 onwards) cohabiting
## 1 2
## single, never married widowed
## 3 4
## divorced separated
## 5 6
##
## $class
## [1] "haven_labelled" "vctrs_vctr" "double"
# alcohol - wider search
names(w1)[grepl("drnk|drink|alc|wine|beer|spirit|unit|pub", names(w1), ignore.case = TRUE)]
## [1] "healc"
# possible nurse / anthropometry style names for BMI
names(w1)[grepl("bm", names(w1), ignore.case = TRUE)]
## [1] "wpsjobm" "iabm11" "iabm12" "iabm13" "iabm14" "iabm15" "iabm16"
## [8] "iabm17" "iabm18" "iabm19" "iabm20" "iabm31" "iabm32" "iabm33"
## [15] "iabm34" "iabm35" "iabm36" "iabm37" "iabm38" "iabm39" "iabm40"
## [22] "iabm48" "iabm49" "iabm50" "iabm51" "iabm52" "iabm53" "iabm54"
## [29] "iabm62" "iabm63" "iabm64" "iabm65" "iabm66" "iabm67" "iabm68"
## [36] "iadebm" "hobml" "hobmu" "hobme" "hobmr" "hohbm1" "hohbm2"
## [43] "hohbm3" "difjobm"
names(w1)[grepl("ht", names(w1), ignore.case = TRUE)]
## [1] "iashty1" "iashty2" "iashty3" "iashty4" "hhtot"
## [6] "refreshtype" "chtype1" "chtype2" "chtype3" "chtype4"
## [11] "chtype5" "chtype6" "chtype7" "chtype8" "chtype9"
## [16] "chtype10" "chtype11" "chtype12" "chtype13" "chtype14"
## [21] "chtype15" "chtype16" "nright"
names(w1)[grepl("wt", names(w1), ignore.case = TRUE)]
## [1] "wpwtx" "wpwtx2" "wpwtx3"
# inspect the most plausible body-size candidates already found
attr(w1$wpwtx, "label")
## [1] "Is this before or after tax?"
table(w1$wpwtx, useNA = "ifany")
##
## -9 -8 -1 1 2
## 22 34 11235 172 636
## [1] "Is this before or after tax?"
table(w1$wpwtx2, useNA = "ifany")
##
## -9 -8 -1 1 2
## 3 2 12025 16 53
## [1] "Is this before or after tax?"
table(w1$wpwtx3, useNA = "ifany")
##
## -1 2
## 12092 7
# alcohol
attr(w1$healc, "label")
## [1] "Do you now drink …? {a lot more..}"
table(w1$healc, useNA = "ifany")
##
## -8 -1 1 2 3 4
## 3 10442 41 349 582 682
## $label
## [1] "Do you now drink …? {a lot more..}"
##
## $format.stata
## [1] "%8.0g"
##
## $labels
## Refusal Don't Know Not applicable ... a lot more, a bit more,
## -9 -8 -1 1 2
## a bit less, or, a lot less?
## 3 4
##
## $class
## [1] "haven_labelled" "vctrs_vctr" "double"
# better search for height/weight/BMI
names(w1)[grepl("hei|highm|cm|metre|meter", names(w1), ignore.case = TRUE)]
## [1] "heill" "heins" "heiqa" "heiqb" "heiqc"
## [6] "heiqd" "heiqe" "heiqf" "heiqg" "heiqh"
## [11] "heiqi" "heiqj" "heiqk" "heiql" "heiqm"
## [16] "heiqn" "heiqo" "heiqp" "heiqq" "heinct"
## [21] "wplrcm" "wplrcm2" "hoincm1" "hoincm2" "hoincm3"
## [26] "hoincm4" "horpcm" "gaselecmeth" "elecmeth" "rentincme"
names(w1)[grepl("weigh|kilo|kg|stone|pound", names(w1), ignore.case = TRUE)]
## character(0)
names(w1)[grepl("bmi", names(w1), ignore.case = TRUE)]
## character(0)
# create disease indicator variables for Table 1
w1 <- w1 %>%
mutate(
htn_w1 = if_else(
hedia01 == 1 | hedia02 == 1 | hedia03 == 1 | hedia04 == 1 | hedia05 == 1 |
hedia06 == 1 | hedia07 == 1 | hedia08 == 1 | hedia09 == 1 | hedia10 == 1,
1, 0
),
hf_w1 = if_else(
hedia01 == 4 | hedia02 == 4 | hedia03 == 4 | hedia04 == 4 | hedia05 == 4 |
hedia06 == 4 | hedia07 == 4 | hedia08 == 4 | hedia09 == 4 | hedia10 == 4,
1, 0
),
arrhythmia_w1 = if_else(
hedia01 == 6 | hedia02 == 6 | hedia03 == 6 | hedia04 == 6 | hedia05 == 6 |
hedia06 == 6 | hedia07 == 6 | hedia08 == 6 | hedia09 == 6 | hedia10 == 6,
1, 0
),
diabetes_w1 = if_else(
hedia01 == 7 | hedia02 == 7 | hedia03 == 7 | hedia04 == 7 | hedia05 == 7 |
hedia06 == 7 | hedia07 == 7 | hedia08 == 7 | hedia09 == 7 | hedia10 == 7,
1, 0
),
stroke_w1 = if_else(
hedia01 == 8 | hedia02 == 8 | hedia03 == 8 | hedia04 == 8 | hedia05 == 8 |
hedia06 == 8 | hedia07 == 8 | hedia08 == 8 | hedia09 == 8 | hedia10 == 8,
1, 0
),
alz_w1 = if_else(
hedib01 == 8 | hedib02 == 8 | hedib03 == 8 | hedib04 == 8 | hedib05 == 8 |
hedib06 == 8 | hedib07 == 8 | hedib08 == 8 | hedib09 == 8 | hedib10 == 8,
1, 0
),
dementia_w1 = if_else(
hedib01 == 9 | hedib02 == 9 | hedib03 == 9 | hedib04 == 9 | hedib05 == 9 |
hedib06 == 9 | hedib07 == 9 | hedib08 == 9 | hedib09 == 9 | hedib10 == 9,
1, 0
)
)
# check the new disease variables
table(w1$htn_w1, useNA = "ifany")
##
## 0 1
## 7627 4472
table(w1$hf_w1, useNA = "ifany")
##
## 0 1
## 12017 82
table(w1$arrhythmia_w1, useNA = "ifany")
##
## 0 1
## 11367 732
table(w1$diabetes_w1, useNA = "ifany")
##
## 0 1
## 11233 866
table(w1$stroke_w1, useNA = "ifany")
##
## 0 1
## 11588 511
table(w1$alz_w1, useNA = "ifany")
##
## 0 1
## 12085 14
table(w1$dementia_w1, useNA = "ifany")
##
## 0 1
## 12035 64
# keep a clean draft Table 1 dataset - no exclusions
table1_w1 <- w1 %>%
transmute(
idauniq,
age = dhager,
sex = as_factor(dhsex),
education = as_factor(edqual.x),
ethnicity = as_factor(fqethnr),
marital_status = as_factor(marstat),
employment = as_factor(worktime),
smoking = as_factor(smokerstat),
depression_score = cesd_sc,
hypertension = htn_w1,
heart_failure = hf_w1,
abnormal_heart_rhythm = arrhythmia_w1,
diabetes = diabetes_w1,
stroke = stroke_w1,
baseline_alzheimers = alz_w1,
baseline_dementia = dementia_w1
)
# quick checks
dim(table1_w1)
## [1] 12099 16
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 20.00 55.00 63.00 64.19 72.00 99.00
table(table1_w1$sex, useNA = "ifany")
##
## Refusal Don't Know Not applicable Male Female
## 0 0 0 5335 6764
table(table1_w1$education, useNA = "ifany")
##
## Refusal Don't know
## 6 11
## Not applicable NVQ4/NVQ5/Degree or equiv
## 18 1388
## Higher ed below degree NVQ3/GCE A Level equiv
## 1333 764
## NVQ2/GCE O Level equiv NVQ1/CSE other grade equiv
## 1974 582
## Foreign/other No qualification
## 1015 5008
table(table1_w1$ethnicity, useNA = "ifany")
##
## Refusal Don't know Not applicable White Non-white
## 12 2 6810 5111 164
table(table1_w1$marital_status, useNA = "ifany")
##
## married (inc civ pship 06 onwards) cohabiting
## 8035 504
## single, never married widowed
## 575 1951
## divorced separated
## 823 211
table(table1_w1$employment, useNA = "ifany")
##
## unknown Not working Full time (>=35) Part time
## 118 7476 2741 1764
table(table1_w1$smoking, useNA = "ifany")
##
## refused don't know not asked
## 5 11 175
## never smoked ex smoker - occasional ex smoker - regular
## 4286 674 4342
## ex smoker - DK freq current smoker
## 445 2161
summary(table1_w1$depression_score)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.000 0.000 1.000 1.504 2.000 8.000
table(table1_w1$hypertension, useNA = "ifany")
##
## 0 1
## 7627 4472
table(table1_w1$heart_failure, useNA = "ifany")
##
## 0 1
## 12017 82
table(table1_w1$abnormal_heart_rhythm, useNA = "ifany")
##
## 0 1
## 11367 732
table(table1_w1$diabetes, useNA = "ifany")
##
## 0 1
## 11233 866
table(table1_w1$stroke, useNA = "ifany")
##
## 0 1
## 11588 511
table(table1_w1$baseline_alzheimers, useNA = "ifany")
##
## 0 1
## 12085 14
table(table1_w1$baseline_dementia, useNA = "ifany")
##
## 0 1
## 12035 64
w1 <- w1 %>%
mutate(
w1_palevel = case_when(
heacta %in% c(1, 2) ~ "High",
!heacta %in% c(1, 2) & heactb %in% c(1, 2) ~ "Moderate",
!heacta %in% c(1, 2) & !heactb %in% c(1, 2) & heactc %in% c(1, 2) ~ "Low",
heacta %in% c(3, 4) & heactb %in% c(3, 4) & heactc %in% c(3, 4) ~ "Sedentary",
TRUE ~ NA_character_
)
)
table(w1$w1_palevel, useNA = "ifany")
##
## High Low Moderate Sedentary <NA>
## 3302 1756 5607 1240 194
# add physical activity group to the draft Table 1 dataset
table1_w1 <- w1 %>%
transmute(
idauniq,
pa_level = w1_palevel,
age = dhager,
sex = as_factor(dhsex),
education = as_factor(edqual.x),
ethnicity = as_factor(fqethnr),
marital_status = as_factor(marstat),
employment = as_factor(worktime),
smoking = as_factor(smokerstat),
depression_score = cesd_sc,
hypertension = htn_w1,
heart_failure = hf_w1,
abnormal_heart_rhythm = arrhythmia_w1,
diabetes = diabetes_w1,
stroke = stroke_w1,
baseline_alzheimers = alz_w1,
baseline_dementia = dementia_w1
)
# check PA distribution with no exclusions
table(table1_w1$pa_level, useNA = "ifany")
##
## High Low Moderate Sedentary <NA>
## 3302 1756 5607 1240 194
# mean age by PA level
table1_w1 %>%
group_by(pa_level) %>%
summarise(
n = n(),
mean_age = mean(age, na.rm = TRUE),
sd_age = sd(age, na.rm = TRUE)
)
## # A tibble: 5 × 4
## pa_level n mean_age sd_age
## <chr> <int> <dbl> <dbl>
## 1 High 3302 60.6 9.22
## 2 Low 1756 67.4 12.0
## 3 Moderate 5607 63.6 10.4
## 4 Sedentary 1240 70.8 12.2
## 5 <NA> 194 69.3 15.0
# sex by PA level
table(table1_w1$pa_level, table1_w1$sex, useNA = "ifany")
##
## Refusal Don't Know Not applicable Male Female
## High 0 0 0 1593 1709
## Low 0 0 0 552 1204
## Moderate 0 0 0 2507 3100
## Sedentary 0 0 0 590 650
## <NA> 0 0 0 93 101
# smoking by PA level
table(table1_w1$pa_level, table1_w1$smoking, useNA = "ifany")
##
## refused don't know not asked never smoked ex smoker - occasional
## High 0 0 0 1309 194
## Low 0 1 0 587 89
## Moderate 0 1 0 1991 343
## Sedentary 0 0 0 395 48
## <NA> 5 9 175 4 0
##
## ex smoker - regular ex smoker - DK freq current smoker
## High 1200 147 452
## Low 621 62 396
## Moderate 2022 182 1068
## Sedentary 499 54 244
## <NA> 0 0 1
# disease variables by PA level
table(table1_w1$pa_level, table1_w1$hypertension, useNA = "ifany")
##
## 0 1
## High 2338 964
## Low 960 796
## Moderate 3533 2074
## Sedentary 662 578
## <NA> 134 60
table(table1_w1$pa_level, table1_w1$diabetes, useNA = "ifany")
##
## 0 1
## High 3172 130
## Low 1572 184
## Moderate 5240 367
## Sedentary 1074 166
## <NA> 175 19
table(table1_w1$pa_level, table1_w1$stroke, useNA = "ifany")
##
## 0 1
## High 3247 55
## Low 1655 101
## Moderate 5434 173
## Sedentary 1088 152
## <NA> 164 30
table(table1_w1$pa_level, table1_w1$abnormal_heart_rhythm, useNA = "ifany")
##
## 0 1
## High 3142 160
## Low 1634 122
## Moderate 5286 321
## Sedentary 1128 112
## <NA> 177 17
table(table1_w1$pa_level, table1_w1$heart_failure, useNA = "ifany")
##
## 0 1
## High 3296 6
## Low 1730 26
## Moderate 5587 20
## Sedentary 1215 25
## <NA> 189 5
table(table1_w1$pa_level, table1_w1$baseline_alzheimers, useNA = "ifany")
##
## 0 1
## High 3302 0
## Low 1756 0
## Moderate 5606 1
## Sedentary 1233 7
## <NA> 188 6
table(table1_w1$pa_level, table1_w1$baseline_dementia, useNA = "ifany")
##
## 0 1
## High 3297 5
## Low 1748 8
## Moderate 5595 12
## Sedentary 1221 19
## <NA> 174 20
table1_w1_clean <- table1_w1 %>%
mutate(
pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
sex = fct_drop(sex),
smoking3 = case_when(
smoking == "never smoked" ~ "Never",
smoking %in% c("ex smoker - occasional", "ex smoker - regular", "ex smoker - DK freq") ~ "Ex-smoker",
smoking == "current smoker" ~ "Current",
TRUE ~ NA_character_
),
ethnicity2 = case_when(
ethnicity == "White" ~ "White",
ethnicity == "Non-white" ~ "Non-white",
TRUE ~ NA_character_
),
depression_score = if_else(depression_score < 0, NA_real_, as.numeric(depression_score))
)
# check cleaned variables
table(table1_w1_clean$pa_level, useNA = "ifany")
##
## High Moderate Low Sedentary <NA>
## 3302 5607 1756 1240 194
table(table1_w1_clean$smoking3, useNA = "ifany")
##
## Current Ex-smoker Never <NA>
## 2161 5461 4286 191
table(table1_w1_clean$ethnicity2, useNA = "ifany")
##
## Non-white White <NA>
## 164 5111 6824
summary(table1_w1_clean$depression_score)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 0.000 1.000 1.582 2.000 8.000 311
table1_summary <- table1_w1_clean %>%
mutate(
pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
) %>%
group_by(pa_level, .drop = FALSE) %>%
summarise(
N = n(),
`Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
`Female, n (%)` = sprintf("%d (%.1f%%)",
sum(sex == "Female", na.rm = TRUE),
100 * sum(sex == "Female", na.rm = TRUE) / n()),
`No qualification, n (%)` = sprintf("%d (%.1f%%)",
sum(education == "No qualification", na.rm = TRUE),
100 * sum(education == "No qualification", na.rm = TRUE) / n()),
`Married/cohabiting, n (%)` = sprintf("%d (%.1f%%)",
sum(marital_status %in% c("married (inc civ pship 06 onwards)", "cohabiting"), na.rm = TRUE),
100 * sum(marital_status %in% c("married (inc civ pship 06 onwards)", "cohabiting"), na.rm = TRUE) / n()),
`Working full/part time, n (%)` = sprintf("%d (%.1f%%)",
sum(employment %in% c("Full time (>=35)", "Part time"), na.rm = TRUE),
100 * sum(employment %in% c("Full time (>=35)", "Part time"), na.rm = TRUE) / n()),
`Current smoker, n (%)` = sprintf("%d (%.1f%%)",
sum(smoking3 == "Current", na.rm = TRUE),
100 * sum(smoking3 == "Current", na.rm = TRUE) / n()),
`Depression score, mean (SD)` = sprintf("%.2f (%.2f)",
mean(depression_score, na.rm = TRUE),
sd(depression_score, na.rm = TRUE)),
`Hypertension, n (%)` = sprintf("%d (%.1f%%)",
sum(hypertension == 1, na.rm = TRUE),
100 * sum(hypertension == 1, na.rm = TRUE) / n()),
`Diabetes, n (%)` = sprintf("%d (%.1f%%)",
sum(diabetes == 1, na.rm = TRUE),
100 * sum(diabetes == 1, na.rm = TRUE) / n()),
`Stroke, n (%)` = sprintf("%d (%.1f%%)",
sum(stroke == 1, na.rm = TRUE),
100 * sum(stroke == 1, na.rm = TRUE) / n()),
`Abnormal heart rhythm, n (%)` = sprintf("%d (%.1f%%)",
sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()),
`Heart failure, n (%)` = sprintf("%d (%.1f%%)",
sum(heart_failure == 1, na.rm = TRUE),
100 * sum(heart_failure == 1, na.rm = TRUE) / n()),
`Baseline Alzheimer’s, n (%)` = sprintf("%d (%.1f%%)",
sum(baseline_alzheimers == 1, na.rm = TRUE),
100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()),
`Baseline dementia, n (%)` = sprintf("%d (%.1f%%)",
sum(baseline_dementia == 1, na.rm = TRUE),
100 * sum(baseline_dementia == 1, na.rm = TRUE) / n())
)
knitr::kable(
table1_summary,
caption = "Draft Table 1. Baseline characteristics by physical activity group, wave 1."
)
Draft Table 1. Baseline characteristics by physical activity
group, wave 1.
| High |
3302 |
60.6 (9.2) |
1709 (51.8%) |
935 (28.3%) |
2582 (78.2%) |
1753 (53.1%) |
452 (13.7%) |
1.06 (1.61) |
964 (29.2%) |
130 (3.9%) |
55 (1.7%) |
160 (4.8%) |
6 (0.2%) |
0 (0.0%) |
5 (0.2%) |
| Moderate |
5607 |
63.6 (10.4) |
3100 (55.3%) |
2159 (38.5%) |
4067 (72.5%) |
2200 (39.2%) |
1068 (19.0%) |
1.41 (1.88) |
2074 (37.0%) |
367 (6.5%) |
173 (3.1%) |
321 (5.7%) |
20 (0.4%) |
1 (0.0%) |
12 (0.2%) |
| Low |
1756 |
67.4 (12.0) |
1204 (68.6%) |
1051 (59.9%) |
1049 (59.7%) |
374 (21.3%) |
396 (22.6%) |
2.32 (2.22) |
796 (45.3%) |
184 (10.5%) |
101 (5.8%) |
122 (6.9%) |
26 (1.5%) |
0 (0.0%) |
8 (0.5%) |
| Sedentary |
1240 |
70.8 (12.2) |
650 (52.4%) |
766 (61.8%) |
705 (56.9%) |
176 (14.2%) |
244 (19.7%) |
2.73 (2.32) |
578 (46.6%) |
166 (13.4%) |
152 (12.3%) |
112 (9.0%) |
25 (2.0%) |
7 (0.6%) |
19 (1.5%) |
| Missing PA |
194 |
69.3 (15.0) |
101 (52.1%) |
97 (50.0%) |
136 (70.1%) |
2 (1.0%) |
1 (0.5%) |
0.25 (0.62) |
60 (30.9%) |
19 (9.8%) |
30 (15.5%) |
17 (8.8%) |
5 (2.6%) |
6 (3.1%) |
20 (10.3%) |
table1_summary_final <- table1_summary %>%
mutate(N = as.integer(N))
knitr::kable(
table1_summary_final,
caption = "Table 1. Baseline characteristics by physical activity group at wave 1. No participants were excluded at this stage."
)
Table 1. Baseline characteristics by physical activity group at
wave 1. No participants were excluded at this stage.
| High |
3302 |
60.6 (9.2) |
1709 (51.8%) |
935 (28.3%) |
2582 (78.2%) |
1753 (53.1%) |
452 (13.7%) |
1.06 (1.61) |
964 (29.2%) |
130 (3.9%) |
55 (1.7%) |
160 (4.8%) |
6 (0.2%) |
0 (0.0%) |
5 (0.2%) |
| Moderate |
5607 |
63.6 (10.4) |
3100 (55.3%) |
2159 (38.5%) |
4067 (72.5%) |
2200 (39.2%) |
1068 (19.0%) |
1.41 (1.88) |
2074 (37.0%) |
367 (6.5%) |
173 (3.1%) |
321 (5.7%) |
20 (0.4%) |
1 (0.0%) |
12 (0.2%) |
| Low |
1756 |
67.4 (12.0) |
1204 (68.6%) |
1051 (59.9%) |
1049 (59.7%) |
374 (21.3%) |
396 (22.6%) |
2.32 (2.22) |
796 (45.3%) |
184 (10.5%) |
101 (5.8%) |
122 (6.9%) |
26 (1.5%) |
0 (0.0%) |
8 (0.5%) |
| Sedentary |
1240 |
70.8 (12.2) |
650 (52.4%) |
766 (61.8%) |
705 (56.9%) |
176 (14.2%) |
244 (19.7%) |
2.73 (2.32) |
578 (46.6%) |
166 (13.4%) |
152 (12.3%) |
112 (9.0%) |
25 (2.0%) |
7 (0.6%) |
19 (1.5%) |
| Missing PA |
194 |
69.3 (15.0) |
101 (52.1%) |
97 (50.0%) |
136 (70.1%) |
2 (1.0%) |
1 (0.5%) |
0.25 (0.62) |
60 (30.9%) |
19 (9.8%) |
30 (15.5%) |
17 (8.8%) |
5 (2.6%) |
6 (3.1%) |
20 (10.3%) |
table1_summary_wide <- table1_summary_final %>%
mutate(N = as.character(N)) %>%
pivot_longer(
cols = -pa_level,
names_to = "Characteristic",
values_to = "Value"
) %>%
pivot_wider(
names_from = pa_level,
values_from = Value
) %>%
select(
Characteristic,
High,
Moderate,
Low,
Sedentary,
`Missing PA`
)
knitr::kable(
table1_summary_wide,
caption = "Table 1. Baseline characteristics at wave 1 by physical activity category. No participants were excluded at this stage."
)
Table 1. Baseline characteristics at wave 1 by physical
activity category. No participants were excluded at this
stage.
| N |
3302 |
5607 |
1756 |
1240 |
194 |
| Age, mean (SD) |
60.6 (9.2) |
63.6 (10.4) |
67.4 (12.0) |
70.8 (12.2) |
69.3 (15.0) |
| Female, n (%) |
1709 (51.8%) |
3100 (55.3%) |
1204 (68.6%) |
650 (52.4%) |
101 (52.1%) |
| No qualification, n (%) |
935 (28.3%) |
2159 (38.5%) |
1051 (59.9%) |
766 (61.8%) |
97 (50.0%) |
| Married/cohabiting, n (%) |
2582 (78.2%) |
4067 (72.5%) |
1049 (59.7%) |
705 (56.9%) |
136 (70.1%) |
| Working full/part time, n (%) |
1753 (53.1%) |
2200 (39.2%) |
374 (21.3%) |
176 (14.2%) |
2 (1.0%) |
| Current smoker, n (%) |
452 (13.7%) |
1068 (19.0%) |
396 (22.6%) |
244 (19.7%) |
1 (0.5%) |
| Depression score, mean (SD) |
1.06 (1.61) |
1.41 (1.88) |
2.32 (2.22) |
2.73 (2.32) |
0.25 (0.62) |
| Hypertension, n (%) |
964 (29.2%) |
2074 (37.0%) |
796 (45.3%) |
578 (46.6%) |
60 (30.9%) |
| Diabetes, n (%) |
130 (3.9%) |
367 (6.5%) |
184 (10.5%) |
166 (13.4%) |
19 (9.8%) |
| Stroke, n (%) |
55 (1.7%) |
173 (3.1%) |
101 (5.8%) |
152 (12.3%) |
30 (15.5%) |
| Abnormal heart rhythm, n (%) |
160 (4.8%) |
321 (5.7%) |
122 (6.9%) |
112 (9.0%) |
17 (8.8%) |
| Heart failure, n (%) |
6 (0.2%) |
20 (0.4%) |
26 (1.5%) |
25 (2.0%) |
5 (2.6%) |
| Baseline Alzheimer’s, n (%) |
0 (0.0%) |
1 (0.0%) |
0 (0.0%) |
7 (0.6%) |
6 (3.1%) |
| Baseline dementia, n (%) |
5 (0.2%) |
12 (0.2%) |
8 (0.5%) |
19 (1.5%) |
20 (10.3%) |
#numbers of alzheimers and dementia in wave 1
w1 %>%
summarise(
alz_n = sum(alz_w1 == 1, na.rm = TRUE),
dem_n = sum(dementia_w1 == 1, na.rm = TRUE),
both_n = sum(alz_w1 == 1 & dementia_w1 == 1, na.rm = TRUE),
either_n = sum(alz_w1 == 1 | dementia_w1 == 1, na.rm = TRUE)
)
## # A tibble: 1 × 4
## alz_n dem_n both_n either_n
## <int> <int> <int> <int>
## 1 14 64 3 75
# people with both baseline Alzheimer's and baseline dementia wave1
both_alz_dem <- table1_w1_clean %>%
filter(baseline_alzheimers == 1, baseline_dementia == 1)
# how many participants are there with both alzheimer's and dementia at baseline wave 1?
nrow(both_alz_dem)
## [1] 3
#what category of PA level are the participants who are categorised as both alzheimer's and dementia in?
both_alz_dem %>%
count(pa_level, .drop = FALSE)
## # A tibble: 5 × 2
## pa_level n
## <fct> <int>
## 1 High 0
## 2 Moderate 0
## 3 Low 0
## 4 Sedentary 2
## 5 <NA> 1
# which IDs are the participants with both alzheimers and dementa at baseline wave 1?
both_alz_dem %>%
select(idauniq, pa_level, baseline_alzheimers, baseline_dementia)
## # A tibble: 3 × 4
## idauniq pa_level baseline_alzheimers baseline_dementia
## <dbl> <fct> <dbl> <dbl>
## 1 106735 <NA> 1 1
## 2 108547 Sedentary 1 1
## 3 119099 Sedentary 1 1
#dementia follow up coding chunk
w2_dem <- w2_core %>%
transmute(
idauniq,
dem_w2 = if_else(
hedib01 == 9 | hedib02 == 9 | hedib03 == 9 | hedib04 == 9,
1, 0
)
)
w3_dem <- w3_core %>%
transmute(
idauniq,
dem_w3 = if_else(hedibde == 3, 1, 0)
)
w4_dem <- w4_core %>% transmute(idauniq, dem_w4 = if_else(hedibde == 1, 1, 0))
w5_dem <- w5_core %>% transmute(idauniq, dem_w5 = if_else(hedibde == 1, 1, 0))
w6_dem <- w6_core %>% transmute(idauniq, dem_w6 = if_else(hedibde == 1, 1, 0))
w7_dem <- w7_core %>% transmute(idauniq, dem_w7 = if_else(hedibde == 1, 1, 0))
w8_dem <- w8_core %>% transmute(idauniq, dem_w8 = if_else(hedibde == 1, 1, 0))
w9_dem <- w9_core %>% transmute(idauniq, dem_w9 = if_else(hedibde == 1, 1, 0))
#follow up merge chunk
followup_w1 <- table1_w1_clean %>%
mutate(
baseline_alzheimers = as.numeric(baseline_alzheimers),
baseline_dementia = as.numeric(baseline_dementia)
) %>%
left_join(w2_dem, by = "idauniq") %>%
left_join(w3_dem, by = "idauniq") %>%
left_join(w4_dem, by = "idauniq") %>%
left_join(w5_dem, by = "idauniq") %>%
left_join(w6_dem, by = "idauniq") %>%
left_join(w7_dem, by = "idauniq") %>%
left_join(w8_dem, by = "idauniq") %>%
left_join(w9_dem, by = "idauniq") %>%
mutate(
across(starts_with("dem_w"), ~replace_na(., 0))
)
dim(followup_w1)
## [1] 12099 27
#analysis dataset chunk
analysis_w1 <- followup_w1 %>%
mutate(
prevalent_dem_alz_w1 = if_else(baseline_alzheimers == 1 | baseline_dementia == 1, 1, 0)
) %>%
filter(
prevalent_dem_alz_w1 == 0,
!is.na(pa_level)
) %>%
mutate(
first_dem_wave = case_when(
dem_w2 == 1 ~ 2,
dem_w3 == 1 ~ 3,
dem_w4 == 1 ~ 4,
dem_w5 == 1 ~ 5,
dem_w6 == 1 ~ 6,
dem_w7 == 1 ~ 7,
dem_w8 == 1 ~ 8,
dem_w9 == 1 ~ 9,
TRUE ~ NA_real_
),
event_dementia = if_else(!is.na(first_dem_wave), 1, 0),
time_to_event_waves = if_else(event_dementia == 1, first_dem_wave - 1, 8),
pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
current_smoker = if_else(smoking3 == "Current", 1, 0, missing = NA_real_),
depression_binary = if_else(depression_score >= 4, 1, 0, missing = NA_real_)
)
dim(analysis_w1)
## [1] 11855 33
table(analysis_w1$event_dementia, useNA = "ifany")
##
## 0 1
## 11366 489
table(analysis_w1$pa_level, useNA = "ifany")
##
## High Moderate Low Sedentary
## 3297 5594 1748 1216
#cox model chunk
cox_unadjusted <- coxph(
Surv(time_to_event_waves, event_dementia) ~ pa_level,
data = analysis_w1
)
cox_adjusted <- coxph(
Surv(time_to_event_waves, event_dementia) ~ pa_level + age + sex +
current_smoker + hypertension + diabetes + stroke +
abnormal_heart_rhythm + heart_failure + depression_binary,
data = analysis_w1
)
#output chunk for document
summary(cox_unadjusted)
## Call:
## coxph(formula = Surv(time_to_event_waves, event_dementia) ~ pa_level,
## data = analysis_w1)
##
## n= 11855, number of events= 489
##
## coef exp(coef) se(coef) z Pr(>|z|)
## pa_levelModerate 0.5030 1.6536 0.1265 3.976 7.01e-05 ***
## pa_levelLow 0.7862 2.1950 0.1486 5.291 1.21e-07 ***
## pa_levelSedentary 0.8453 2.3287 0.1608 5.258 1.46e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## pa_levelModerate 1.654 0.6047 1.291 2.119
## pa_levelLow 2.195 0.4556 1.640 2.937
## pa_levelSedentary 2.329 0.4294 1.699 3.191
##
## Concordance= 0.577 (se = 0.012 )
## Likelihood ratio test= 39.91 on 3 df, p=1e-08
## Wald test = 37.44 on 3 df, p=4e-08
## Score (logrank) test = 38.81 on 3 df, p=2e-08
## Call:
## coxph(formula = Surv(time_to_event_waves, event_dementia) ~ pa_level +
## age + sex + current_smoker + hypertension + diabetes + stroke +
## abnormal_heart_rhythm + heart_failure + depression_binary,
## data = analysis_w1)
##
## n= 11728, number of events= 485
## (127 observations deleted due to missingness)
##
## coef exp(coef) se(coef) z Pr(>|z|)
## pa_levelModerate 0.24344 1.27563 0.12921 1.884 0.0595 .
## pa_levelLow 0.18022 1.19748 0.15806 1.140 0.2542
## pa_levelSedentary -0.01925 0.98094 0.17645 -0.109 0.9131
## age 0.06162 1.06356 0.00423 14.566 <2e-16 ***
## sexFemale 0.19684 1.21754 0.09505 2.071 0.0384 *
## current_smoker -0.13331 0.87520 0.14047 -0.949 0.3426
## hypertension 0.09580 1.10054 0.09312 1.029 0.3036
## diabetes 0.16264 1.17661 0.15589 1.043 0.2968
## stroke 0.23524 1.26521 0.17619 1.335 0.1818
## abnormal_heart_rhythm 0.30517 1.35685 0.15522 1.966 0.0493 *
## heart_failure -0.70697 0.49313 0.58253 -1.214 0.2249
## depression_binary 0.24828 1.28182 0.11251 2.207 0.0273 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## pa_levelModerate 1.2756 0.7839 0.9903 1.643
## pa_levelLow 1.1975 0.8351 0.8785 1.632
## pa_levelSedentary 0.9809 1.0194 0.6941 1.386
## age 1.0636 0.9402 1.0548 1.072
## sexFemale 1.2175 0.8213 1.0106 1.467
## current_smoker 0.8752 1.1426 0.6646 1.153
## hypertension 1.1005 0.9086 0.9169 1.321
## diabetes 1.1766 0.8499 0.8668 1.597
## stroke 1.2652 0.7904 0.8957 1.787
## abnormal_heart_rhythm 1.3569 0.7370 1.0009 1.839
## heart_failure 0.4931 2.0278 0.1574 1.545
## depression_binary 1.2818 0.7801 1.0282 1.598
##
## Concordance= 0.74 (se = 0.01 )
## Likelihood ratio test= 302 on 12 df, p=<2e-16
## Wald test = 310.1 on 12 df, p=<2e-16
## Score (logrank) test = 330 on 12 df, p=<2e-16
Wave 2 Table 1
w2_derived <- read_dta("raw data/RAW_data_stata/wave_2_derived_variables.dta")
w2 <- left_join(w2_core, w2_derived, by = "idauniq")
w2 <- w2 %>%
mutate(
htn_w2 = if_else(
hedia01 == 1 | hedia02 == 1 | hedia03 == 1 | hedia04 == 1,
1, 0
),
hf_w2 = if_else(
hedia01 == 4 | hedia02 == 4 | hedia03 == 4 | hedia04 == 4,
1, 0
),
arrhythmia_w2 = if_else(
hedia01 == 6 | hedia02 == 6 | hedia03 == 6 | hedia04 == 6,
1, 0
),
diabetes_w2 = if_else(
hedia01 == 7 | hedia02 == 7 | hedia03 == 7 | hedia04 == 7,
1, 0
),
stroke_w2 = if_else(
hedia01 == 8 | hedia02 == 8 | hedia03 == 8 | hedia04 == 8,
1, 0
),
alz_w2 = if_else(
hedib01 == 8 | hedib02 == 8 | hedib03 == 8 | hedib04 == 8,
1, 0
),
dementia_w2 = if_else(
hedib01 == 9 | hedib02 == 9 | hedib03 == 9 | hedib04 == 9,
1, 0
),
pa_level = case_when(
palevel == 3 ~ "High",
palevel == 2 ~ "Moderate",
palevel == 1 ~ "Low",
palevel == 0 ~ "Sedentary",
TRUE ~ NA_character_
)
)
w2_table1 <- w2 %>%
transmute(
idauniq,
pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
age = dhager,
sex = as_factor(DhSex),
hypertension = htn_w2,
heart_failure = hf_w2,
abnormal_heart_rhythm = arrhythmia_w2,
diabetes = diabetes_w2,
stroke = stroke_w2,
baseline_alzheimers = alz_w2,
baseline_dementia = dementia_w2
)
w2_table1_summary <- w2_table1 %>%
mutate(
pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
) %>%
group_by(pa_level, .drop = FALSE) %>%
summarise(
N = n(),
`Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
`Female, n (%)` = sprintf(
"%d (%.1f%%)",
sum(sex == "Female", na.rm = TRUE),
100 * sum(sex == "Female", na.rm = TRUE) / n()
),
`Hypertension, n (%)` = sprintf(
"%d (%.1f%%)",
sum(hypertension == 1, na.rm = TRUE),
100 * sum(hypertension == 1, na.rm = TRUE) / n()
),
`Diabetes, n (%)` = sprintf(
"%d (%.1f%%)",
sum(diabetes == 1, na.rm = TRUE),
100 * sum(diabetes == 1, na.rm = TRUE) / n()
),
`Stroke, n (%)` = sprintf(
"%d (%.1f%%)",
sum(stroke == 1, na.rm = TRUE),
100 * sum(stroke == 1, na.rm = TRUE) / n()
),
`Abnormal heart rhythm, n (%)` = sprintf(
"%d (%.1f%%)",
sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()
),
`Heart failure, n (%)` = sprintf(
"%d (%.1f%%)",
sum(heart_failure == 1, na.rm = TRUE),
100 * sum(heart_failure == 1, na.rm = TRUE) / n()
),
`Baseline Alzheimer’s, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_alzheimers == 1, na.rm = TRUE),
100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()
),
`Baseline dementia, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_dementia == 1, na.rm = TRUE),
100 * sum(baseline_dementia == 1, na.rm = TRUE) / n()
)
)
w2_table1_summary_wide <- w2_table1_summary %>%
mutate(N = as.character(N)) %>%
pivot_longer(
cols = -pa_level,
names_to = "Characteristic",
values_to = "Value"
) %>%
pivot_wider(
names_from = pa_level,
values_from = Value
) %>%
select(
Characteristic,
High,
Moderate,
Low,
Sedentary,
`Missing PA`
)
knitr::kable(
w2_table1_summary_wide,
caption = "Table 1. Baseline characteristics at wave 2 by physical activity category."
)
Table 1. Baseline characteristics at wave 2 by physical
activity category.
| N |
1744 |
4684 |
2309 |
556 |
139 |
| Age, mean (SD) |
61.8 (8.7) |
64.5 (9.8) |
68.8 (11.2) |
75.2 (11.6) |
70.6 (15.8) |
| Female, n (%) |
835 (47.9%) |
2615 (55.8%) |
1465 (63.4%) |
325 (58.5%) |
66 (47.5%) |
| Hypertension, n (%) |
253 (14.5%) |
784 (16.7%) |
487 (21.1%) |
156 (28.1%) |
26 (18.7%) |
| Diabetes, n (%) |
28 (1.6%) |
151 (3.2%) |
139 (6.0%) |
55 (9.9%) |
10 (7.2%) |
| Stroke, n (%) |
3 (0.2%) |
49 (1.0%) |
59 (2.6%) |
39 (7.0%) |
13 (9.4%) |
| Abnormal heart rhythm, n (%) |
48 (2.8%) |
138 (2.9%) |
79 (3.4%) |
44 (7.9%) |
5 (3.6%) |
| Heart failure, n (%) |
0 (0.0%) |
8 (0.2%) |
8 (0.3%) |
4 (0.7%) |
0 (0.0%) |
| Baseline Alzheimer’s, n (%) |
1 (0.1%) |
2 (0.0%) |
2 (0.1%) |
1 (0.2%) |
5 (3.6%) |
| Baseline dementia, n (%) |
2 (0.1%) |
11 (0.2%) |
9 (0.4%) |
7 (1.3%) |
14 (10.1%) |
w3_table1_raw <- w3_core %>%
mutate(
hypertension = if_else(hediabp == 1, 1, 0, missing = 0),
heart_failure = if_else(hediahf == 1, 1, 0, missing = 0),
abnormal_heart_rhythm = if_else(hediaar == 1, 1, 0, missing = 0),
diabetes = if_else(hediadi == 1, 1, 0, missing = 0),
stroke = if_else(hediast == 1, 1, 0, missing = 0),
baseline_alzheimers = if_else(hedibad %in% c(1, 2, 3), 1, 0, missing = 0),
baseline_dementia = if_else(hedibde %in% c(1, 2, 3), 1, 0, missing = 0),
pa_level = case_when(
palevel == 3 ~ "High",
palevel == 2 ~ "Moderate",
palevel == 1 ~ "Low",
palevel == 0 ~ "Sedentary",
TRUE ~ NA_character_
)
)
w3_table1 <- w3_table1_raw %>%
transmute(
idauniq,
pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
age = dhager,
sex = as_factor(dhsex),
hypertension,
heart_failure,
abnormal_heart_rhythm,
diabetes,
stroke,
baseline_alzheimers,
baseline_dementia
)
w3_table1_summary <- w3_table1 %>%
mutate(
pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
) %>%
group_by(pa_level, .drop = FALSE) %>%
summarise(
N = n(),
`Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
`Female, n (%)` = sprintf(
"%d (%.1f%%)",
sum(sex == "Female", na.rm = TRUE),
100 * sum(sex == "Female", na.rm = TRUE) / n()
),
`Hypertension, n (%)` = sprintf(
"%d (%.1f%%)",
sum(hypertension == 1, na.rm = TRUE),
100 * sum(hypertension == 1, na.rm = TRUE) / n()
),
`Diabetes, n (%)` = sprintf(
"%d (%.1f%%)",
sum(diabetes == 1, na.rm = TRUE),
100 * sum(diabetes == 1, na.rm = TRUE) / n()
),
`Stroke, n (%)` = sprintf(
"%d (%.1f%%)",
sum(stroke == 1, na.rm = TRUE),
100 * sum(stroke == 1, na.rm = TRUE) / n()
),
`Abnormal heart rhythm, n (%)` = sprintf(
"%d (%.1f%%)",
sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()
),
`Heart failure, n (%)` = sprintf(
"%d (%.1f%%)",
sum(heart_failure == 1, na.rm = TRUE),
100 * sum(heart_failure == 1, na.rm = TRUE) / n()
),
`Baseline Alzheimer’s, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_alzheimers == 1, na.rm = TRUE),
100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()
),
`Baseline dementia, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_dementia == 1, na.rm = TRUE),
100 * sum(baseline_dementia == 1, na.rm = TRUE) / n()
)
)
w3_table1_summary_wide <- w3_table1_summary %>%
mutate(N = as.character(N)) %>%
pivot_longer(
cols = -pa_level,
names_to = "Characteristic",
values_to = "Value"
) %>%
pivot_wider(
names_from = pa_level,
values_from = Value
) %>%
select(
Characteristic,
High,
Moderate,
Low,
Sedentary,
`Missing PA`
)
knitr::kable(
w3_table1_summary_wide,
caption = "Table 1. Baseline characteristics at wave 3 by physical activity category."
)
Table 1. Baseline characteristics at wave 3 by physical
activity category.
| N |
1969 |
4838 |
2263 |
686 |
15 |
| Age, mean (SD) |
59.8 (8.9) |
63.3 (10.5) |
68.1 (12.0) |
74.7 (14.2) |
61.6 (12.2) |
| Female, n (%) |
939 (47.7%) |
2669 (55.2%) |
1458 (64.4%) |
399 (58.2%) |
11 (73.3%) |
| Hypertension, n (%) |
174 (8.8%) |
473 (9.8%) |
221 (9.8%) |
59 (8.6%) |
0 (0.0%) |
| Diabetes, n (%) |
36 (1.8%) |
115 (2.4%) |
75 (3.3%) |
30 (4.4%) |
0 (0.0%) |
| Stroke, n (%) |
15 (0.8%) |
31 (0.6%) |
30 (1.3%) |
31 (4.5%) |
0 (0.0%) |
| Abnormal heart rhythm, n (%) |
44 (2.2%) |
90 (1.9%) |
62 (2.7%) |
21 (3.1%) |
0 (0.0%) |
| Heart failure, n (%) |
1 (0.1%) |
1 (0.0%) |
8 (0.4%) |
4 (0.6%) |
0 (0.0%) |
| Baseline Alzheimer’s, n (%) |
1 (0.1%) |
5 (0.1%) |
9 (0.4%) |
17 (2.5%) |
0 (0.0%) |
| Baseline dementia, n (%) |
3 (0.2%) |
20 (0.4%) |
33 (1.5%) |
56 (8.2%) |
0 (0.0%) |
wave 4 table 1
w4_table1_raw <- w4_core %>%
mutate(
hypertension = if_else(hediabp == 1, 1, 0, missing = 0),
heart_failure = if_else(hediahf == 1, 1, 0, missing = 0),
abnormal_heart_rhythm = if_else(hediaar == 1, 1, 0, missing = 0),
diabetes = if_else(hediadi == 1, 1, 0, missing = 0),
stroke = if_else(hediast == 1, 1, 0, missing = 0),
baseline_alzheimers = if_else(hedibad == 1, 1, 0, missing = 0),
baseline_dementia = if_else(hedibde == 1, 1, 0, missing = 0),
pa_level = case_when(
palevel == 3 ~ "High",
palevel == 2 ~ "Moderate",
palevel == 1 ~ "Low",
palevel == 0 ~ "Sedentary",
TRUE ~ NA_character_
)
)
w4_table1 <- w4_table1_raw %>%
transmute(
idauniq,
pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
age = indager,
sex = as_factor(dhsex),
hypertension,
heart_failure,
abnormal_heart_rhythm,
diabetes,
stroke,
baseline_alzheimers,
baseline_dementia
)
w4_table1_summary <- w4_table1 %>%
mutate(
pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
) %>%
group_by(pa_level, .drop = FALSE) %>%
summarise(
N = n(),
`Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
`Female, n (%)` = sprintf(
"%d (%.1f%%)",
sum(sex == "Female", na.rm = TRUE),
100 * sum(sex == "Female", na.rm = TRUE) / n()
),
`Hypertension, n (%)` = sprintf(
"%d (%.1f%%)",
sum(hypertension == 1, na.rm = TRUE),
100 * sum(hypertension == 1, na.rm = TRUE) / n()
),
`Diabetes, n (%)` = sprintf(
"%d (%.1f%%)",
sum(diabetes == 1, na.rm = TRUE),
100 * sum(diabetes == 1, na.rm = TRUE) / n()
),
`Stroke, n (%)` = sprintf(
"%d (%.1f%%)",
sum(stroke == 1, na.rm = TRUE),
100 * sum(stroke == 1, na.rm = TRUE) / n()
),
`Abnormal heart rhythm, n (%)` = sprintf(
"%d (%.1f%%)",
sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()
),
`Heart failure, n (%)` = sprintf(
"%d (%.1f%%)",
sum(heart_failure == 1, na.rm = TRUE),
100 * sum(heart_failure == 1, na.rm = TRUE) / n()
),
`Baseline Alzheimer’s, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_alzheimers == 1, na.rm = TRUE),
100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()
),
`Baseline dementia, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_dementia == 1, na.rm = TRUE),
100 * sum(baseline_dementia == 1, na.rm = TRUE) / n()
)
)
w4_table1_summary_wide <- w4_table1_summary %>%
mutate(N = as.character(N)) %>%
pivot_longer(
cols = -pa_level,
names_to = "Characteristic",
values_to = "Value"
) %>%
pivot_wider(
names_from = pa_level,
values_from = Value
) %>%
select(
Characteristic,
High,
Moderate,
Low,
Sedentary,
`Missing PA`
)
knitr::kable(
w4_table1_summary_wide,
caption = "Table 1. Baseline characteristics at wave 4 by physical activity category."
)
Table 1. Baseline characteristics at wave 4 by physical
activity category.
| N |
2254 |
5384 |
2562 |
835 |
15 |
| Age, mean (SD) |
61.7 (8.4) |
63.9 (9.5) |
68.2 (11.3) |
74.2 (12.5) |
59.1 (9.2) |
| Female, n (%) |
1070 (47.5%) |
2929 (54.4%) |
1649 (64.4%) |
472 (56.5%) |
5 (33.3%) |
| Hypertension, n (%) |
201 (8.9%) |
603 (11.2%) |
350 (13.7%) |
135 (16.2%) |
0 (0.0%) |
| Diabetes, n (%) |
43 (1.9%) |
149 (2.8%) |
125 (4.9%) |
61 (7.3%) |
1 (6.7%) |
| Stroke, n (%) |
7 (0.3%) |
55 (1.0%) |
46 (1.8%) |
68 (8.1%) |
0 (0.0%) |
| Abnormal heart rhythm, n (%) |
32 (1.4%) |
111 (2.1%) |
88 (3.4%) |
45 (5.4%) |
0 (0.0%) |
| Heart failure, n (%) |
1 (0.0%) |
3 (0.1%) |
6 (0.2%) |
15 (1.8%) |
0 (0.0%) |
| Baseline Alzheimer’s, n (%) |
0 (0.0%) |
5 (0.1%) |
3 (0.1%) |
29 (3.5%) |
0 (0.0%) |
| Baseline dementia, n (%) |
1 (0.0%) |
17 (0.3%) |
19 (0.7%) |
60 (7.2%) |
0 (0.0%) |
wave 5 table 1
## Wave 5 table 1
w5_table1_raw <- w5_core %>%
mutate(
hypertension = if_else(hediabp == 1, 1, 0, missing = 0),
heart_failure = if_else(hediahf == 1, 1, 0, missing = 0),
abnormal_heart_rhythm = if_else(hediaar == 1, 1, 0, missing = 0),
diabetes = if_else(hediadi == 1, 1, 0, missing = 0),
stroke = if_else(hediast == 1, 1, 0, missing = 0),
baseline_alzheimers = if_else(hedibad == 1, 1, 0, missing = 0),
baseline_dementia = if_else(hedibde == 1, 1, 0, missing = 0),
pa_level = case_when(
palevel == 3 ~ "High",
palevel == 2 ~ "Moderate",
palevel == 1 ~ "Low",
palevel == 0 ~ "Sedentary",
TRUE ~ NA_character_
)
)
# ORIGINAL VERSION OF W5 TABLE 1 PERFORMED FIRST - SEE BELOW FOR AMENDED FINAL VERSION
# Original version produced implausible age summaries because wave 5 #..indager contains negative
# ..special missing codes, which distorted the mean and SD if not #..recoded.
# Kept here as part of the analytic process to include in methods
w5_table1 <- w5_table1_raw %>%
transmute(
idauniq,
pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
age = indager,
sex = as_factor(dhsex),
hypertension,
heart_failure,
abnormal_heart_rhythm,
diabetes,
stroke,
baseline_alzheimers,
baseline_dementia
)
# Wave 5 table 1 CORRECTED VERSION
# Wave 5 indager includes negative values representing special missing codes.
# These were recoded to NA before summary statistics were generated for wave 5 table 1 amended final version
w5_table1 <- w5_table1_raw %>%
transmute(
idauniq,
pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
age = if_else(indager < 0, NA_real_, as.numeric(indager)),
sex = as_factor(dhsex),
hypertension,
heart_failure,
abnormal_heart_rhythm,
diabetes,
stroke,
baseline_alzheimers,
baseline_dementia
)
w5_table1_summary <- w5_table1 %>%
mutate(
pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
) %>%
group_by(pa_level, .drop = FALSE) %>%
summarise(
N = n(),
`Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
`Female, n (%)` = sprintf(
"%d (%.1f%%)",
sum(sex == "Female", na.rm = TRUE),
100 * sum(sex == "Female", na.rm = TRUE) / n()
),
`Hypertension, n (%)` = sprintf(
"%d (%.1f%%)",
sum(hypertension == 1, na.rm = TRUE),
100 * sum(hypertension == 1, na.rm = TRUE) / n()
),
`Diabetes, n (%)` = sprintf(
"%d (%.1f%%)",
sum(diabetes == 1, na.rm = TRUE),
100 * sum(diabetes == 1, na.rm = TRUE) / n()
),
`Stroke, n (%)` = sprintf(
"%d (%.1f%%)",
sum(stroke == 1, na.rm = TRUE),
100 * sum(stroke == 1, na.rm = TRUE) / n()
),
`Abnormal heart rhythm, n (%)` = sprintf(
"%d (%.1f%%)",
sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()
),
`Heart failure, n (%)` = sprintf(
"%d (%.1f%%)",
sum(heart_failure == 1, na.rm = TRUE),
100 * sum(heart_failure == 1, na.rm = TRUE) / n()
),
`Baseline Alzheimer’s, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_alzheimers == 1, na.rm = TRUE),
100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()
),
`Baseline dementia, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_dementia == 1, na.rm = TRUE),
100 * sum(baseline_dementia == 1, na.rm = TRUE) / n()
)
)
w5_table1_summary_wide <- w5_table1_summary %>%
mutate(N = as.character(N)) %>%
pivot_longer(
cols = -pa_level,
names_to = "Characteristic",
values_to = "Value"
) %>%
pivot_wider(
names_from = pa_level,
values_from = Value
) %>%
select(
Characteristic,
High,
Moderate,
Low,
Sedentary,
`Missing PA`
)
knitr::kable(
w5_table1_summary_wide,
caption = "Table 1. Baseline characteristics at wave 5 by physical activity category."
)
Table 1. Baseline characteristics at wave 5 by physical
activity category.
| N |
1981 |
4936 |
2432 |
801 |
124 |
| Age, mean (SD) |
62.9 (7.8) |
65.5 (8.9) |
68.9 (10.0) |
73.0 (10.2) |
60.7 (5.5) |
| Female, n (%) |
934 (47.1%) |
2736 (55.4%) |
1507 (62.0%) |
472 (58.9%) |
56 (45.2%) |
| Hypertension, n (%) |
141 (7.1%) |
335 (6.8%) |
202 (8.3%) |
90 (11.2%) |
8 (6.5%) |
| Diabetes, n (%) |
33 (1.7%) |
112 (2.3%) |
93 (3.8%) |
49 (6.1%) |
1 (0.8%) |
| Stroke, n (%) |
10 (0.5%) |
32 (0.6%) |
43 (1.8%) |
64 (8.0%) |
0 (0.0%) |
| Abnormal heart rhythm, n (%) |
25 (1.3%) |
99 (2.0%) |
78 (3.2%) |
52 (6.5%) |
2 (1.6%) |
| Heart failure, n (%) |
1 (0.1%) |
4 (0.1%) |
8 (0.3%) |
11 (1.4%) |
0 (0.0%) |
| Baseline Alzheimer’s, n (%) |
0 (0.0%) |
7 (0.1%) |
6 (0.2%) |
33 (4.1%) |
0 (0.0%) |
| Baseline dementia, n (%) |
0 (0.0%) |
11 (0.2%) |
26 (1.1%) |
73 (9.1%) |
0 (0.0%) |
wave 6 table 1
w6_table1_raw <- w6_core %>%
mutate(
hypertension = if_else(hediabp == 1, 1, 0, missing = 0),
heart_failure = if_else(hediahf == 1, 1, 0, missing = 0),
abnormal_heart_rhythm = if_else(hediaar == 1, 1, 0, missing = 0),
diabetes = if_else(hediadi == 1, 1, 0, missing = 0),
stroke = if_else(hediast == 1, 1, 0, missing = 0),
baseline_alzheimers = if_else(hedibad == 1, 1, 0, missing = 0),
baseline_dementia = if_else(hedibde == 1, 1, 0, missing = 0),
pa_level = case_when(
HeActa %in% c(1, 2) ~ "High",
!HeActa %in% c(1, 2) & HeActb %in% c(1, 2) ~ "Moderate",
!HeActa %in% c(1, 2) & !HeActb %in% c(1, 2) & HeActc %in% c(1, 2) ~ "Low",
HeActa %in% c(3, 4) & HeActb %in% c(3, 4) & HeActc %in% c(3, 4) ~ "Sedentary",
TRUE ~ NA_character_
)
)
# ORIGINAL VERSION OF W6 AGE HANDLING (kept as methodological note)
# age = indager
# This is not used in final summaries because negative indager values are special missing codes.
# wave 6 table 1 CORRECTED VERSION
w6_table1 <- w6_table1_raw %>%
transmute(
idauniq,
pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
age = if_else(indager < 0, NA_real_, as.numeric(indager)),
sex = as_factor(DhSex),
hypertension,
heart_failure,
abnormal_heart_rhythm,
diabetes,
stroke,
baseline_alzheimers,
baseline_dementia
)
# diagnostic age check
w6_table1 %>%
group_by(pa_level) %>%
summarise(
min_age = min(age, na.rm = TRUE),
q1_age = quantile(age, 0.25, na.rm = TRUE),
median_age = median(age, na.rm = TRUE),
mean_age = mean(age, na.rm = TRUE),
q3_age = quantile(age, 0.75, na.rm = TRUE),
max_age = max(age, na.rm = TRUE),
sd_age = sd(age, na.rm = TRUE),
n_missing_age = sum(is.na(age))
)
## # A tibble: 5 × 9
## pa_level min_age q1_age median_age mean_age q3_age max_age sd_age
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 High 28 57 63 63.3 69 89 8.44
## 2 Moderate 31 59 65 65.8 73 89 9.35
## 3 Low 41 61 69 69.4 78 89 10.5
## 4 Sedentary 40 62 72 71.1 80 89 10.8
## 5 <NA> 63 66.8 71 73 77.2 87 10.4
## # ℹ 1 more variable: n_missing_age <int>
w6_table1_summary <- w6_table1 %>%
mutate(
pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
) %>%
group_by(pa_level, .drop = FALSE) %>%
summarise(
N = n(),
`Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
`Female, n (%)` = sprintf(
"%d (%.1f%%)",
sum(sex == "Female", na.rm = TRUE),
100 * sum(sex == "Female", na.rm = TRUE) / n()
),
`Hypertension, n (%)` = sprintf(
"%d (%.1f%%)",
sum(hypertension == 1, na.rm = TRUE),
100 * sum(hypertension == 1, na.rm = TRUE) / n()
),
`Diabetes, n (%)` = sprintf(
"%d (%.1f%%)",
sum(diabetes == 1, na.rm = TRUE),
100 * sum(diabetes == 1, na.rm = TRUE) / n()
),
`Stroke, n (%)` = sprintf(
"%d (%.1f%%)",
sum(stroke == 1, na.rm = TRUE),
100 * sum(stroke == 1, na.rm = TRUE) / n()
),
`Abnormal heart rhythm, n (%)` = sprintf(
"%d (%.1f%%)",
sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()
),
`Heart failure, n (%)` = sprintf(
"%d (%.1f%%)",
sum(heart_failure == 1, na.rm = TRUE),
100 * sum(heart_failure == 1, na.rm = TRUE) / n()
),
`Baseline Alzheimer’s, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_alzheimers == 1, na.rm = TRUE),
100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()
),
`Baseline dementia, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_dementia == 1, na.rm = TRUE),
100 * sum(baseline_dementia == 1, na.rm = TRUE) / n()
)
)
w6_table1_summary_wide <- w6_table1_summary %>%
mutate(N = as.character(N)) %>%
pivot_longer(
cols = -pa_level,
names_to = "Characteristic",
values_to = "Value"
) %>%
pivot_wider(
names_from = pa_level,
values_from = Value
) %>%
select(
Characteristic,
High,
Moderate,
Low,
Sedentary,
`Missing PA`
)
knitr::kable(
w6_table1_summary_wide,
caption = "Table 1. Baseline characteristics at wave 6 by physical activity category."
)
Table 1. Baseline characteristics at wave 6 by physical
activity category.
| N |
3156 |
4810 |
1626 |
1005 |
4 |
| Age, mean (SD) |
63.3 (8.4) |
65.8 (9.4) |
69.4 (10.5) |
71.1 (10.8) |
73.0 (10.4) |
| Female, n (%) |
1526 (48.4%) |
2713 (56.4%) |
1087 (66.9%) |
527 (52.4%) |
4 (100.0%) |
| Hypertension, n (%) |
217 (6.9%) |
350 (7.3%) |
117 (7.2%) |
138 (13.7%) |
0 (0.0%) |
| Diabetes, n (%) |
57 (1.8%) |
130 (2.7%) |
50 (3.1%) |
79 (7.9%) |
0 (0.0%) |
| Stroke, n (%) |
10 (0.3%) |
47 (1.0%) |
44 (2.7%) |
54 (5.4%) |
0 (0.0%) |
| Abnormal heart rhythm, n (%) |
69 (2.2%) |
107 (2.2%) |
72 (4.4%) |
38 (3.8%) |
0 (0.0%) |
| Heart failure, n (%) |
2 (0.1%) |
6 (0.1%) |
4 (0.2%) |
13 (1.3%) |
0 (0.0%) |
| Baseline Alzheimer’s, n (%) |
2 (0.1%) |
7 (0.1%) |
4 (0.2%) |
37 (3.7%) |
0 (0.0%) |
| Baseline dementia, n (%) |
2 (0.1%) |
15 (0.3%) |
18 (1.1%) |
77 (7.7%) |
0 (0.0%) |
wave 7 table 1
## Wave 7 table 1
w7_table1_raw <- w7_core %>%
mutate(
hypertension = if_else(hediabp == 1, 1, 0, missing = 0),
heart_failure = if_else(hediahf == 1, 1, 0, missing = 0),
abnormal_heart_rhythm = if_else(hediaar == 1, 1, 0, missing = 0),
diabetes = if_else(hediadi == 1, 1, 0, missing = 0),
stroke = if_else(hediast == 1, 1, 0, missing = 0),
baseline_alzheimers = if_else(hedibad == 1, 1, 0, missing = 0),
baseline_dementia = if_else(hedibde == 1, 1, 0, missing = 0),
pa_level = case_when(
HeActa %in% c(1, 2) ~ "High",
!HeActa %in% c(1, 2) & HeActb %in% c(1, 2) ~ "Moderate",
!HeActa %in% c(1, 2) & !HeActb %in% c(1, 2) & HeActc %in% c(1, 2) ~ "Low",
HeActa %in% c(3, 4) & HeActb %in% c(3, 4) & HeActc %in% c(3, 4) ~ "Sedentary",
TRUE ~ NA_character_
)
)
# ORIGINAL VERSION OF W7 AGE HANDLING (kept as methodological note)
# age = indager
# This is not used in final summaries because negative indager values are special missing codes.
# Wave 7 corrected version
w7_table1 <- w7_table1_raw %>%
transmute(
idauniq,
pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
age = if_else(indager < 0, NA_real_, as.numeric(indager)),
sex = as_factor(DhSex),
hypertension,
heart_failure,
abnormal_heart_rhythm,
diabetes,
stroke,
baseline_alzheimers,
baseline_dementia
)
# diagnostic age check
w7_table1 %>%
group_by(pa_level) %>%
summarise(
min_age = min(age, na.rm = TRUE),
q1_age = quantile(age, 0.25, na.rm = TRUE),
median_age = median(age, na.rm = TRUE),
mean_age = mean(age, na.rm = TRUE),
q3_age = quantile(age, 0.75, na.rm = TRUE),
max_age = max(age, na.rm = TRUE),
sd_age = sd(age, na.rm = TRUE),
n_missing_age = sum(is.na(age))
)
## # A tibble: 5 × 9
## pa_level min_age q1_age median_age mean_age q3_age max_age sd_age
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 High 29 58 64 63.9 69 89 8.68
## 2 Moderate 33 60 66 66.7 73 89 9.21
## 3 Low 39 62 70 70.1 78 89 10.3
## 4 Sedentary 38 64 74 72.3 81 89 10.6
## 5 <NA> 63 68.5 74 73 78 82 9.54
## # ℹ 1 more variable: n_missing_age <int>
w7_table1_summary <- w7_table1 %>%
mutate(
pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
) %>%
group_by(pa_level, .drop = FALSE) %>%
summarise(
N = n(),
`Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
`Female, n (%)` = sprintf(
"%d (%.1f%%)",
sum(sex == "Female", na.rm = TRUE),
100 * sum(sex == "Female", na.rm = TRUE) / n()
),
`Hypertension, n (%)` = sprintf(
"%d (%.1f%%)",
sum(hypertension == 1, na.rm = TRUE),
100 * sum(hypertension == 1, na.rm = TRUE) / n()
),
`Diabetes, n (%)` = sprintf(
"%d (%.1f%%)",
sum(diabetes == 1, na.rm = TRUE),
100 * sum(diabetes == 1, na.rm = TRUE) / n()
),
`Stroke, n (%)` = sprintf(
"%d (%.1f%%)",
sum(stroke == 1, na.rm = TRUE),
100 * sum(stroke == 1, na.rm = TRUE) / n()
),
`Abnormal heart rhythm, n (%)` = sprintf(
"%d (%.1f%%)",
sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()
),
`Heart failure, n (%)` = sprintf(
"%d (%.1f%%)",
sum(heart_failure == 1, na.rm = TRUE),
100 * sum(heart_failure == 1, na.rm = TRUE) / n()
),
`Baseline Alzheimer’s, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_alzheimers == 1, na.rm = TRUE),
100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()
),
`Baseline dementia, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_dementia == 1, na.rm = TRUE),
100 * sum(baseline_dementia == 1, na.rm = TRUE) / n()
)
)
w7_table1_summary_wide <- w7_table1_summary %>%
mutate(N = as.character(N)) %>%
pivot_longer(
cols = -pa_level,
names_to = "Characteristic",
values_to = "Value"
) %>%
pivot_wider(
names_from = pa_level,
values_from = Value
) %>%
select(
Characteristic,
High,
Moderate,
Low,
Sedentary,
`Missing PA`
)
knitr::kable(
w7_table1_summary_wide,
caption = "Table 1. Baseline characteristics at wave 7 by physical activity category."
)
Table 1. Baseline characteristics at wave 7 by physical
activity category.
| N |
2848 |
4426 |
1483 |
906 |
3 |
| Age, mean (SD) |
63.9 (8.7) |
66.7 (9.2) |
70.1 (10.3) |
72.3 (10.6) |
73.0 (9.5) |
| Female, n (%) |
1400 (49.2%) |
2529 (57.1%) |
963 (64.9%) |
474 (52.3%) |
2 (66.7%) |
| Hypertension, n (%) |
141 (5.0%) |
291 (6.6%) |
111 (7.5%) |
111 (12.3%) |
0 (0.0%) |
| Diabetes, n (%) |
45 (1.6%) |
110 (2.5%) |
60 (4.0%) |
64 (7.1%) |
0 (0.0%) |
| Stroke, n (%) |
13 (0.5%) |
39 (0.9%) |
35 (2.4%) |
49 (5.4%) |
1 (33.3%) |
| Abnormal heart rhythm, n (%) |
55 (1.9%) |
116 (2.6%) |
54 (3.6%) |
49 (5.4%) |
0 (0.0%) |
| Heart failure, n (%) |
4 (0.1%) |
6 (0.1%) |
11 (0.7%) |
12 (1.3%) |
0 (0.0%) |
| Baseline Alzheimer’s, n (%) |
0 (0.0%) |
7 (0.2%) |
7 (0.5%) |
29 (3.2%) |
0 (0.0%) |
| Baseline dementia, n (%) |
3 (0.1%) |
22 (0.5%) |
13 (0.9%) |
76 (8.4%) |
0 (0.0%) |
wave 8 table 1
w8_table1_raw <- w8_core %>%
mutate(
hypertension = if_else(hediabp == 1, 1, 0, missing = 0),
heart_failure = if_else(hediahf == 1, 1, 0, missing = 0),
abnormal_heart_rhythm = if_else(hediaar == 1, 1, 0, missing = 0),
diabetes = if_else(hediadi == 1, 1, 0, missing = 0),
stroke = if_else(hediast == 1, 1, 0, missing = 0),
baseline_alzheimers = if_else(hedibad == 1, 1, 0, missing = 0),
baseline_dementia = if_else(hedibde == 1, 1, 0, missing = 0),
pa_level = case_when(
heacta %in% c(1, 2) ~ "High",
!heacta %in% c(1, 2) & heactb %in% c(1, 2) ~ "Moderate",
!heacta %in% c(1, 2) & !heactb %in% c(1, 2) & heactc %in% c(1, 2) ~ "Low",
heacta %in% c(3, 4) & heactb %in% c(3, 4) & heactc %in% c(3, 4) ~ "Sedentary",
TRUE ~ NA_character_
)
)
# ORIGINAL VERSION OF W8 AGE HANDLING (kept as methodological note)
# age = indager
# This is not used in final summaries because negative indager values are special missing codes.
# Wave 8 corrected version
w8_table1 <- w8_table1_raw %>%
transmute(
idauniq,
pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
age = if_else(indager < 0, NA_real_, as.numeric(indager)),
sex = as_factor(indsex),
hypertension,
heart_failure,
abnormal_heart_rhythm,
diabetes,
stroke,
baseline_alzheimers,
baseline_dementia
)
# diagnostic age check
w8_table1 %>%
group_by(pa_level) %>%
summarise(
min_age = min(age, na.rm = TRUE),
q1_age = quantile(age, 0.25, na.rm = TRUE),
median_age = median(age, na.rm = TRUE),
mean_age = mean(age, na.rm = TRUE),
q3_age = quantile(age, 0.75, na.rm = TRUE),
max_age = max(age, na.rm = TRUE),
sd_age = sd(age, na.rm = TRUE),
n_missing_age = sum(is.na(age))
)
## # A tibble: 5 × 9
## pa_level min_age q1_age median_age mean_age q3_age max_age sd_age
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 High 31 60 65 65.5 71 89 8.14
## 2 Moderate 34 62 68 68.4 75 89 8.84
## 3 Low 40 64 71 71.3 80 89 9.74
## 4 Sedentary 40 66 74 73.4 81 89 9.61
## 5 <NA> 66 67.5 69 70.3 72.5 76 5.13
## # ℹ 1 more variable: n_missing_age <int>
w8_table1_summary <- w8_table1 %>%
mutate(
pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
) %>%
group_by(pa_level, .drop = FALSE) %>%
summarise(
N = n(),
`Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
`Female, n (%)` = sprintf(
"%d (%.1f%%)",
sum(sex == "Female", na.rm = TRUE),
100 * sum(sex == "Female", na.rm = TRUE) / n()
),
`Hypertension, n (%)` = sprintf(
"%d (%.1f%%)",
sum(hypertension == 1, na.rm = TRUE),
100 * sum(hypertension == 1, na.rm = TRUE) / n()
),
`Diabetes, n (%)` = sprintf(
"%d (%.1f%%)",
sum(diabetes == 1, na.rm = TRUE),
100 * sum(diabetes == 1, na.rm = TRUE) / n()
),
`Stroke, n (%)` = sprintf(
"%d (%.1f%%)",
sum(stroke == 1, na.rm = TRUE),
100 * sum(stroke == 1, na.rm = TRUE) / n()
),
`Abnormal heart rhythm, n (%)` = sprintf(
"%d (%.1f%%)",
sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()
),
`Heart failure, n (%)` = sprintf(
"%d (%.1f%%)",
sum(heart_failure == 1, na.rm = TRUE),
100 * sum(heart_failure == 1, na.rm = TRUE) / n()
),
`Baseline Alzheimer’s, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_alzheimers == 1, na.rm = TRUE),
100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()
),
`Baseline dementia, n (%)` = sprintf(
"%d (%.1f%%)",
sum(baseline_dementia == 1, na.rm = TRUE),
100 * sum(baseline_dementia == 1, na.rm = TRUE) / n()
)
)
w8_table1_summary_wide <- w8_table1_summary %>%
mutate(N = as.character(N)) %>%
pivot_longer(
cols = -pa_level,
names_to = "Characteristic",
values_to = "Value"
) %>%
pivot_wider(
names_from = pa_level,
values_from = Value
) %>%
select(
Characteristic,
High,
Moderate,
Low,
Sedentary,
`Missing PA`
)
knitr::kable(
w8_table1_summary_wide,
caption = "Table 1. Baseline characteristics at wave 8 by physical activity category."
)
Table 1. Baseline characteristics at wave 8 by physical
activity category.
| N |
2443 |
3932 |
1281 |
786 |
3 |
| Age, mean (SD) |
65.5 (8.1) |
68.4 (8.8) |
71.3 (9.7) |
73.4 (9.6) |
70.3 (5.1) |
| Female, n (%) |
1204 (49.3%) |
2253 (57.3%) |
848 (66.2%) |
390 (49.6%) |
0 (0.0%) |
| Hypertension, n (%) |
111 (4.5%) |
204 (5.2%) |
92 (7.2%) |
78 (9.9%) |
0 (0.0%) |
| Diabetes, n (%) |
35 (1.4%) |
73 (1.9%) |
43 (3.4%) |
48 (6.1%) |
0 (0.0%) |
| Stroke, n (%) |
14 (0.6%) |
42 (1.1%) |
30 (2.3%) |
62 (7.9%) |
0 (0.0%) |
| Abnormal heart rhythm, n (%) |
49 (2.0%) |
99 (2.5%) |
49 (3.8%) |
50 (6.4%) |
0 (0.0%) |
| Heart failure, n (%) |
4 (0.2%) |
14 (0.4%) |
7 (0.5%) |
18 (2.3%) |
0 (0.0%) |
| Baseline Alzheimer’s, n (%) |
2 (0.1%) |
12 (0.3%) |
12 (0.9%) |
29 (3.7%) |
0 (0.0%) |
| Baseline dementia, n (%) |
7 (0.3%) |
18 (0.5%) |
25 (2.0%) |
76 (9.7%) |
0 (0.0%) |
Wave summary numbers
### Total participant numbers by wave
wave_total_participant_numbers <- tibble(
wave = c("Wave 1", "Wave 2", "Wave 3", "Wave 4", "Wave 5", "Wave 6", "Wave 7", "Wave 8", "Wave 9"),
total_n = c(
nrow(w1),
nrow(w2_core),
nrow(w3_core),
nrow(w4_core),
nrow(w5_core),
nrow(w6_core),
nrow(w7_core),
nrow(w8_core),
nrow(w9_core)
)
)
knitr::kable(
wave_total_participant_numbers,
caption = "Total participant numbers by wave."
)
Total participant numbers by wave.
| Wave 1 |
12099 |
| Wave 2 |
9432 |
| Wave 3 |
9771 |
| Wave 4 |
11050 |
| Wave 5 |
10274 |
| Wave 6 |
10601 |
| Wave 7 |
9666 |
| Wave 8 |
8445 |
| Wave 9 |
8736 |
### Diagnosis summary numbers by wave
# Wave 1 diagnosis summary numbers
wave1_diagnosis_summary_numbers <- w1 %>%
summarise(
wave = "Wave 1",
alzheimers_n = sum(alz_w1 == 1, na.rm = TRUE),
dementia_n = sum(dementia_w1 == 1, na.rm = TRUE),
both_n = sum(alz_w1 == 1 & dementia_w1 == 1, na.rm = TRUE),
either_n = sum(alz_w1 == 1 | dementia_w1 == 1, na.rm = TRUE)
)
# Wave 2 diagnosis summary numbers
wave2_diagnosis_summary_numbers <- w2_core %>%
summarise(
wave = "Wave 2",
alzheimers_n = sum(hedib01 == 8 | hedib02 == 8 | hedib03 == 8 | hedib04 == 8, na.rm = TRUE),
dementia_n = sum(hedib01 == 9 | hedib02 == 9 | hedib03 == 9 | hedib04 == 9, na.rm = TRUE),
both_n = sum(
(hedib01 %in% c(8, 9)) +
(hedib02 %in% c(8, 9)) +
(hedib03 %in% c(8, 9)) +
(hedib04 %in% c(8, 9)) > 1,
na.rm = TRUE
),
either_n = sum(
hedib01 %in% c(8, 9) | hedib02 %in% c(8, 9) |
hedib03 %in% c(8, 9) | hedib04 %in% c(8, 9),
na.rm = TRUE
)
)
# Wave 3 diagnosis summary numbers
wave3_diagnosis_summary_numbers <- w3_core %>%
summarise(
wave = "Wave 3",
alzheimers_n = sum(hedibad %in% c(1, 2, 3), na.rm = TRUE),
dementia_n = sum(hedibde %in% c(1, 2, 3), na.rm = TRUE),
both_n = sum(hedibad %in% c(1, 2, 3) & hedibde %in% c(1, 2, 3), na.rm = TRUE),
either_n = sum(hedibad %in% c(1, 2, 3) | hedibde %in% c(1, 2, 3), na.rm = TRUE)
)
# Waves 4 to 9 diagnosis summary numbers
wave4_diagnosis_summary_numbers <- w4_core %>%
summarise(
wave = "Wave 4",
alzheimers_n = sum(hedibad == 1, na.rm = TRUE),
dementia_n = sum(hedibde == 1, na.rm = TRUE),
both_n = sum(hedibad == 1 & hedibde == 1, na.rm = TRUE),
either_n = sum(hedibad == 1 | hedibde == 1, na.rm = TRUE)
)
wave5_diagnosis_summary_numbers <- w5_core %>%
summarise(
wave = "Wave 5",
alzheimers_n = sum(hedibad == 1, na.rm = TRUE),
dementia_n = sum(hedibde == 1, na.rm = TRUE),
both_n = sum(hedibad == 1 & hedibde == 1, na.rm = TRUE),
either_n = sum(hedibad == 1 | hedibde == 1, na.rm = TRUE)
)
wave6_diagnosis_summary_numbers <- w6_core %>%
summarise(
wave = "Wave 6",
alzheimers_n = sum(hedibad == 1, na.rm = TRUE),
dementia_n = sum(hedibde == 1, na.rm = TRUE),
both_n = sum(hedibad == 1 & hedibde == 1, na.rm = TRUE),
either_n = sum(hedibad == 1 | hedibde == 1, na.rm = TRUE)
)
wave7_diagnosis_summary_numbers <- w7_core %>%
summarise(
wave = "Wave 7",
alzheimers_n = sum(hedibad == 1, na.rm = TRUE),
dementia_n = sum(hedibde == 1, na.rm = TRUE),
both_n = sum(hedibad == 1 & hedibde == 1, na.rm = TRUE),
either_n = sum(hedibad == 1 | hedibde == 1, na.rm = TRUE)
)
wave8_diagnosis_summary_numbers <- w8_core %>%
summarise(
wave = "Wave 8",
alzheimers_n = sum(hedibad == 1, na.rm = TRUE),
dementia_n = sum(hedibde == 1, na.rm = TRUE),
both_n = sum(hedibad == 1 & hedibde == 1, na.rm = TRUE),
either_n = sum(hedibad == 1 | hedibde == 1, na.rm = TRUE)
)
wave9_diagnosis_summary_numbers <- w9_core %>%
summarise(
wave = "Wave 9",
alzheimers_n = sum(hedibad == 1, na.rm = TRUE),
dementia_n = sum(hedibde == 1, na.rm = TRUE),
both_n = sum(hedibad == 1 & hedibde == 1, na.rm = TRUE),
either_n = sum(hedibad == 1 | hedibde == 1, na.rm = TRUE)
)
wave_diagnosis_summary_numbers <- bind_rows(
wave1_diagnosis_summary_numbers,
wave2_diagnosis_summary_numbers,
wave3_diagnosis_summary_numbers,
wave4_diagnosis_summary_numbers,
wave5_diagnosis_summary_numbers,
wave6_diagnosis_summary_numbers,
wave7_diagnosis_summary_numbers,
wave8_diagnosis_summary_numbers,
wave9_diagnosis_summary_numbers
)
knitr::kable(
wave_diagnosis_summary_numbers,
caption = "Diagnosis summary numbers by wave."
)
Diagnosis summary numbers by wave.
| Wave 1 |
14 |
64 |
3 |
75 |
| Wave 2 |
11 |
43 |
2 |
52 |
| Wave 3 |
32 |
112 |
11 |
133 |
| Wave 4 |
37 |
97 |
13 |
121 |
| Wave 5 |
46 |
110 |
19 |
137 |
| Wave 6 |
50 |
112 |
17 |
145 |
| Wave 7 |
43 |
114 |
19 |
138 |
| Wave 8 |
55 |
126 |
23 |
158 |
| Wave 9 |
47 |
127 |
21 |
153 |
### Missing physical activity summary numbers by wave
# Wave 1 missing PA
wave1_missing_pa_summary_numbers <- table1_w1_clean %>%
summarise(
wave = "Wave 1",
missing_pa_n = sum(is.na(pa_level))
)
# Wave 2 missing PA
w2_derived <- read_dta("raw data/RAW_data_stata/wave_2_derived_variables.dta")
w2 <- left_join(w2_core, w2_derived, by = "idauniq")
wave2_missing_pa_summary_numbers <- w2 %>%
mutate(pa_level = if_else(palevel %in% c(0, 1, 2, 3), palevel, NA_real_)) %>%
summarise(
wave = "Wave 2",
missing_pa_n = sum(is.na(pa_level))
)
#waves 3 to 9 MISSING PA FOR NOW -> 'NA' AS PLACEHOLDER AS HAVEN'T STANDARDISED PA VARIABLES FOR WAVES 3 - 9 AS YET
# Placeholder for waves 3 to 9
wave3_missing_pa_summary_numbers <- tibble(wave = "Wave 3", missing_pa_n = NA_integer_)
wave4_missing_pa_summary_numbers <- tibble(wave = "Wave 4", missing_pa_n = NA_integer_)
wave5_missing_pa_summary_numbers <- tibble(wave = "Wave 5", missing_pa_n = NA_integer_)
wave6_missing_pa_summary_numbers <- tibble(wave = "Wave 6", missing_pa_n = NA_integer_)
wave7_missing_pa_summary_numbers <- tibble(wave = "Wave 7", missing_pa_n = NA_integer_)
wave8_missing_pa_summary_numbers <- tibble(wave = "Wave 8", missing_pa_n = NA_integer_)
wave9_missing_pa_summary_numbers <- tibble(wave = "Wave 9", missing_pa_n = NA_integer_)
wave_missing_pa_summary_numbers <- bind_rows(
wave1_missing_pa_summary_numbers,
wave2_missing_pa_summary_numbers,
wave3_missing_pa_summary_numbers,
wave4_missing_pa_summary_numbers,
wave5_missing_pa_summary_numbers,
wave6_missing_pa_summary_numbers,
wave7_missing_pa_summary_numbers,
wave8_missing_pa_summary_numbers,
wave9_missing_pa_summary_numbers
)
knitr::kable(
wave_missing_pa_summary_numbers,
caption = "Missing physical activity summary numbers by wave."
)
Missing physical activity summary numbers by wave.
| Wave 1 |
194 |
| Wave 2 |
139 |
| Wave 3 |
NA |
| Wave 4 |
NA |
| Wave 5 |
NA |
| Wave 6 |
NA |
| Wave 7 |
NA |
| Wave 8 |
NA |
| Wave 9 |
NA |
### Combined wave summary numbers
wave_summary_numbers_table <- wave_total_participant_numbers %>%
left_join(wave_diagnosis_summary_numbers, by = "wave") %>%
left_join(wave_missing_pa_summary_numbers, by = "wave") %>%
mutate(
final_analysis_n = case_when(
wave == "Wave 1" ~ nrow(analysis_w1),
TRUE ~ NA_integer_
)
)
knitr::kable(
wave_summary_numbers_table,
caption = "Combined wave summary numbers."
)
Combined wave summary numbers.
| Wave 1 |
12099 |
14 |
64 |
3 |
75 |
194 |
11855 |
| Wave 2 |
9432 |
11 |
43 |
2 |
52 |
139 |
NA |
| Wave 3 |
9771 |
32 |
112 |
11 |
133 |
NA |
NA |
| Wave 4 |
11050 |
37 |
97 |
13 |
121 |
NA |
NA |
| Wave 5 |
10274 |
46 |
110 |
19 |
137 |
NA |
NA |
| Wave 6 |
10601 |
50 |
112 |
17 |
145 |
NA |
NA |
| Wave 7 |
9666 |
43 |
114 |
19 |
138 |
NA |
NA |
| Wave 8 |
8445 |
55 |
126 |
23 |
158 |
NA |
NA |
| Wave 9 |
8736 |
47 |
127 |
21 |
153 |
NA |
NA |