ELSA dissertation

w1_core <- read_dta("raw data/RAW_data_stata/wave_1_core_data_v3.dta")
w1_derived <- read_dta("raw data/RAW_data_stata/wave_1_ifs_derived_variables.dta")

w1 <- left_join(w1_core, w1_derived, by = "idauniq")
#load waves 2 to 9
w2_core  <- read_dta("raw data/RAW_data_stata/wave_2_core_data_v4.dta")
w3_core  <- read_dta("raw data/RAW_data_stata/wave_3_elsa_data_v4.dta")
w4_core  <- read_dta("raw data/RAW_data_stata/wave_4_elsa_data_v3.dta")
w5_core  <- read_dta("raw data/RAW_data_stata/wave_5_elsa_data_v4.dta")
w6_core  <- read_dta("raw data/RAW_data_stata/wave_6_elsa_data_v2.dta")
w7_core  <- read_dta("raw data/RAW_data_stata/wave_7_elsa_data.dta")
w8_core  <- read_dta("raw data/RAW_data_stata/wave_8_elsa_data_eul_v2.dta")
w9_core  <- read_dta("raw data/RAW_data_stata/wave_9_elsa_data_eul_v2.dta")

Table 1 baseline characteristics NO PARTICIPANTS EXCLUDED

# Demographic / social variables
names(w1)[grepl("age|ager|dob", names(w1), ignore.case = TRUE)]
##  [1] "dhdobyr"   "dhager"    "didob"     "heage"     "wprage"    "indobyr.x"
##  [7] "indager"   "aagemab"   "aagepab"   "aageangi"  "aagehart"  "aagestro" 
## [13] "aagedi"    "age"       "age_p"     "indobyr.y" "indobyr_p" "agebuhead"
## [19] "agebusp"   "agehoh"    "agehhch1"  "agehhch2"  "agehhch3"  "agehhch4" 
## [25] "agehhch5"  "agehhch6"  "agehhch7"  "agehhch8"  "agebuch1"  "agebuch2" 
## [31] "agebuch3"  "agebuch4"  "agebuch5"  "agebuch6"  "agebuch7"  "agebuch8" 
## [37] "chage1"    "chage2"    "chage3"    "chage4"    "chage5"    "chage6"   
## [43] "chage7"    "chage8"    "chage9"    "chage10"   "chage11"   "chage12"  
## [49] "chage13"   "chage14"   "chage15"   "chage16"   "ageg5"     "ageg5_bu" 
## [55] "ageg7"     "ageg7_bu"  "ageg10"    "ageg10_bu" "ageg3"     "ageg3_bu" 
## [61] "ageg3_spa" "spage"     "spage_bu"  "agehhldr1" "agehhldr2" "agehhldr3"
## [67] "agehhldr4" "mothage"   "magedied"  "fathage"   "fagedied"
names(w1)[grepl("sex", names(w1), ignore.case = TRUE)]
##  [1] "dhsex"     "disex"     "indsex"    "asex"      "sex"       "sex_p"    
##  [7] "sexbuhead" "sexhoh"    "chsex1"    "chsex2"    "chsex3"    "chsex4"   
## [13] "chsex5"    "chsex6"    "chsex7"    "chsex8"    "chsex9"    "chsex10"  
## [19] "chsex11"   "chsex12"   "chsex13"   "chsex14"   "chsex15"   "chsex16"
names(w1)[grepl("educ|qual|school", names(w1), ignore.case = TRUE)]
##  [1] "fqqual1"  "fqqual2"  "fqqual3"  "edqual.x" "aqual"    "aeducend"
##  [7] "edqual.y" "qual2"    "qual3"    "qual2_p"  "qual3_p"
names(w1)[grepl("ethnic|ethn|race", names(w1), ignore.case = TRUE)]
## [1] "fqethnr"  "aethnicr"
names(w1)[grepl("mar|partner|spouse|widow|single", names(w1), ignore.case = TRUE)]
## [1] "dimar"   "wpamar"  "partner" "marstat"
names(w1)[grepl("employ|work|job|retir", names(w1), ignore.case = TRUE)]
##  [1] "difjob"         "wpjob"          "wpjobl"         "wpsjoby"       
##  [5] "wpsjobm"        "wpcjob"         "wphjob"         "iawork"        
##  [9] "hojob"          "aeverjob"       "aemploye"       "astwork"       
## [13] "hhgriddhwork"   "hhgriddhwork_p" "worktime"       "everwork"      
## [17] "exwork"         "exworkb"        "exwork55"       "exwork55b"     
## [21] "exwork60"       "exwork60b"      "exwork65"       "exwork65b"     
## [25] "difjobm"
# Lifestyle variables
names(w1)[grepl("smok|cig", names(w1), ignore.case = TRUE)]
## [1] "hecig"      "smoker"     "smokerstat"
names(w1)[grepl("alcohol|drink", names(w1), ignore.case = TRUE)]
## character(0)
# BMI / body size
names(w1)[grepl("bmi|height|weight", names(w1), ignore.case = TRUE)]
## character(0)
# Mood / depression
names(w1)[grepl("depress|cesd|mood", names(w1), ignore.case = TRUE)]
## [1] "cesd_sc" "cesd_na"
# Disease-history blocks
names(w1)[grepl("^hedia", names(w1), ignore.case = TRUE)]
##  [1] "hedia01" "hedia02" "hedia03" "hedia04" "hedia05" "hedia06" "hedia07"
##  [8] "hedia08" "hedia09" "hedia10"
names(w1)[grepl("^hedib", names(w1), ignore.case = TRUE)]
##  [1] "hedib01" "hedib02" "hedib03" "hedib04" "hedib05" "hedib06" "hedib07"
##  [8] "hedib08" "hedib09" "hedib10"
# age
attr(w1$dhager, "label")
## [1] "Age variable from HH grid collapsed at 90 plus"
table(w1$dhager, useNA = "ifany")[1:10]
## 
## 20 30 31 32 33 34 35 36 37 38 
##  1  2  1  1  6  3  4  6  8 15
# sex
attr(w1$dhsex, "label")
## [1] "ASK OR CODE RESPONDENT~S SEX"
table(w1$dhsex, useNA = "ifany")
## 
##    1    2 
## 5335 6764
# education
attr(w1$edqual.x, "label")
## [1] "(D) Highest Educational Qualification at ELSA W1"
table(w1$edqual.x, useNA = "ifany")
## 
##   -9   -8   -1    1    2    3    4    5    6    7 
##    6   11   18 1388 1333  764 1974  582 1015 5008
# ethnicity
attr(w1$fqethnr, "label")
## [1] "ELSA ethnic group collapsed into White and Non-white to avoid disclosure"
table(w1$fqethnr, useNA = "ifany")
## 
##   -9   -8   -1    1    2 
##   12    2 6810 5111  164
# marital status
attr(w1$marstat, "label")
## [1] "marital status - couple1 combined with dimar"
table(w1$marstat, useNA = "ifany")
## 
##    1    2    3    4    5    6 
## 8035  504  575 1951  823  211
# smoking
attr(w1$smokerstat, "label")
## [1] "smoker status (past or present)"
table(w1$smokerstat, useNA = "ifany")
## 
##   -9   -8   -2    0    1    2    3    4 
##    5   11  175 4286  674 4342  445 2161
# depression
attr(w1$cesd_sc, "label")
## [1] "number of cesd questions answered yes"
summary(w1$cesd_sc)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -2.000   0.000   1.000   1.504   2.000   8.000
attr(w1$cesd_na, "label")
## [1] "number of cesd questions answered"
table(w1$cesd_na, useNA = "ifany")
## 
##    -2    -1     0     3     5     6     7     8 
##   136   175    67     2     5    15    87 11612
# alcohol - broader search
names(w1)[grepl("alco|drink|beer|wine|spirit|units", names(w1), ignore.case = TRUE)]
## character(0)
# BMI / body size - broader search
names(w1)[grepl("bmi|body|mass|height|weight|wt|ht", names(w1), ignore.case = TRUE)]
##  [1] "wpwtx"       "wpwtx2"      "wpwtx3"      "iashty1"     "iashty2"    
##  [6] "iashty3"     "iashty4"     "hhtot"       "refreshtype" "chtype1"    
## [11] "chtype2"     "chtype3"     "chtype4"     "chtype5"     "chtype6"    
## [16] "chtype7"     "chtype8"     "chtype9"     "chtype10"    "chtype11"   
## [21] "chtype12"    "chtype13"    "chtype14"    "chtype15"    "chtype16"   
## [26] "nright"
# employment - inspect strongest candidates
attr(w1$worktime, "label")
## [1] "Working full time or part time"
table(w1$worktime, useNA = "ifany")
## 
##   -8   -1    1    2 
##  118 7476 2741 1764
attr(w1$aemploye, "label")
## [1] "HSE Feed Forward: Are you …{an employee or self-employed}"
table(w1$aemploye, useNA = "ifany")
## 
##    -1     1     2 
##   563 10101  1435
attr(w1$everwork, "label")
## [1] "ever worked"
table(w1$everwork, useNA = "ifany")
## 
##    -9    -8    -2     0     1 
##     3     1    27   217 11851
# smoking labels
attributes(w1$smokerstat)
## $label
## [1] "smoker status (past or present)"
## 
## $format.stata
## [1] "%8.0g"
## 
## $labels
##                refused             don't know              not asked 
##                     -9                     -8                     -2 
##           never smoked ex smoker - occasional    ex smoker - regular 
##                      0                      1                      2 
##    ex smoker - DK freq         current smoker 
##                      3                      4 
## 
## $class
## [1] "haven_labelled" "vctrs_vctr"     "double"
# education labels
attributes(w1$edqual.x)
## $label
## [1] "(D) Highest Educational Qualification at ELSA W1"
## 
## $format.stata
## [1] "%8.0g"
## 
## $labels
##                    Refusal                 Don't know 
##                         -9                         -8 
##             Not applicable  NVQ4/NVQ5/Degree or equiv 
##                         -1                          1 
##     Higher ed below degree     NVQ3/GCE A Level equiv 
##                          2                          3 
##     NVQ2/GCE O Level equiv NVQ1/CSE other grade equiv 
##                          4                          5 
##              Foreign/other           No qualification 
##                          6                          7 
## 
## $class
## [1] "haven_labelled" "vctrs_vctr"     "double"
# ethnicity labels
attributes(w1$fqethnr)
## $label
## [1] "ELSA ethnic group collapsed into White and Non-white to avoid disclosure"
## 
## $format.stata
## [1] "%8.0g"
## 
## $labels
##        Refusal     Don't know Not applicable          White      Non-white 
##             -9             -8             -1              1              2 
## 
## $class
## [1] "haven_labelled" "vctrs_vctr"     "double"
# marital status labels
attributes(w1$marstat)
## $label
## [1] "marital status - couple1 combined with dimar"
## 
## $format.stata
## [1] "%8.0g"
## 
## $labels
## married (inc civ pship 06 onwards)                         cohabiting 
##                                  1                                  2 
##              single, never married                            widowed 
##                                  3                                  4 
##                           divorced                          separated 
##                                  5                                  6 
## 
## $class
## [1] "haven_labelled" "vctrs_vctr"     "double"
# alcohol - wider search
names(w1)[grepl("drnk|drink|alc|wine|beer|spirit|unit|pub", names(w1), ignore.case = TRUE)]
## [1] "healc"
# possible nurse / anthropometry style names for BMI
names(w1)[grepl("bm", names(w1), ignore.case = TRUE)]
##  [1] "wpsjobm" "iabm11"  "iabm12"  "iabm13"  "iabm14"  "iabm15"  "iabm16" 
##  [8] "iabm17"  "iabm18"  "iabm19"  "iabm20"  "iabm31"  "iabm32"  "iabm33" 
## [15] "iabm34"  "iabm35"  "iabm36"  "iabm37"  "iabm38"  "iabm39"  "iabm40" 
## [22] "iabm48"  "iabm49"  "iabm50"  "iabm51"  "iabm52"  "iabm53"  "iabm54" 
## [29] "iabm62"  "iabm63"  "iabm64"  "iabm65"  "iabm66"  "iabm67"  "iabm68" 
## [36] "iadebm"  "hobml"   "hobmu"   "hobme"   "hobmr"   "hohbm1"  "hohbm2" 
## [43] "hohbm3"  "difjobm"
names(w1)[grepl("ht", names(w1), ignore.case = TRUE)]
##  [1] "iashty1"     "iashty2"     "iashty3"     "iashty4"     "hhtot"      
##  [6] "refreshtype" "chtype1"     "chtype2"     "chtype3"     "chtype4"    
## [11] "chtype5"     "chtype6"     "chtype7"     "chtype8"     "chtype9"    
## [16] "chtype10"    "chtype11"    "chtype12"    "chtype13"    "chtype14"   
## [21] "chtype15"    "chtype16"    "nright"
names(w1)[grepl("wt", names(w1), ignore.case = TRUE)]
## [1] "wpwtx"  "wpwtx2" "wpwtx3"
# inspect the most plausible body-size candidates already found
attr(w1$wpwtx, "label")
## [1] "Is this before or after tax?"
table(w1$wpwtx, useNA = "ifany")
## 
##    -9    -8    -1     1     2 
##    22    34 11235   172   636
attr(w1$wpwtx2, "label")
## [1] "Is this before or after tax?"
table(w1$wpwtx2, useNA = "ifany")
## 
##    -9    -8    -1     1     2 
##     3     2 12025    16    53
attr(w1$wpwtx3, "label")
## [1] "Is this before or after tax?"
table(w1$wpwtx3, useNA = "ifany")
## 
##    -1     2 
## 12092     7
# alcohol
attr(w1$healc, "label")
## [1] "Do you now drink …? {a lot more..}"
table(w1$healc, useNA = "ifany")
## 
##    -8    -1     1     2     3     4 
##     3 10442    41   349   582   682
attributes(w1$healc)
## $label
## [1] "Do you now drink …? {a lot more..}"
## 
## $format.stata
## [1] "%8.0g"
## 
## $labels
##         Refusal      Don't Know  Not applicable ... a lot more,     a bit more, 
##              -9              -8              -1               1               2 
##     a bit less, or, a lot less? 
##               3               4 
## 
## $class
## [1] "haven_labelled" "vctrs_vctr"     "double"
# better search for height/weight/BMI
names(w1)[grepl("hei|highm|cm|metre|meter", names(w1), ignore.case = TRUE)]
##  [1] "heill"       "heins"       "heiqa"       "heiqb"       "heiqc"      
##  [6] "heiqd"       "heiqe"       "heiqf"       "heiqg"       "heiqh"      
## [11] "heiqi"       "heiqj"       "heiqk"       "heiql"       "heiqm"      
## [16] "heiqn"       "heiqo"       "heiqp"       "heiqq"       "heinct"     
## [21] "wplrcm"      "wplrcm2"     "hoincm1"     "hoincm2"     "hoincm3"    
## [26] "hoincm4"     "horpcm"      "gaselecmeth" "elecmeth"    "rentincme"
names(w1)[grepl("weigh|kilo|kg|stone|pound", names(w1), ignore.case = TRUE)]
## character(0)
names(w1)[grepl("bmi", names(w1), ignore.case = TRUE)]
## character(0)
# create disease indicator variables for Table 1
w1 <- w1 %>%
  mutate(
    htn_w1 = if_else(
      hedia01 == 1 | hedia02 == 1 | hedia03 == 1 | hedia04 == 1 | hedia05 == 1 |
      hedia06 == 1 | hedia07 == 1 | hedia08 == 1 | hedia09 == 1 | hedia10 == 1,
      1, 0
    ),
    hf_w1 = if_else(
      hedia01 == 4 | hedia02 == 4 | hedia03 == 4 | hedia04 == 4 | hedia05 == 4 |
      hedia06 == 4 | hedia07 == 4 | hedia08 == 4 | hedia09 == 4 | hedia10 == 4,
      1, 0
    ),
    arrhythmia_w1 = if_else(
      hedia01 == 6 | hedia02 == 6 | hedia03 == 6 | hedia04 == 6 | hedia05 == 6 |
      hedia06 == 6 | hedia07 == 6 | hedia08 == 6 | hedia09 == 6 | hedia10 == 6,
      1, 0
    ),
    diabetes_w1 = if_else(
      hedia01 == 7 | hedia02 == 7 | hedia03 == 7 | hedia04 == 7 | hedia05 == 7 |
      hedia06 == 7 | hedia07 == 7 | hedia08 == 7 | hedia09 == 7 | hedia10 == 7,
      1, 0
    ),
    stroke_w1 = if_else(
      hedia01 == 8 | hedia02 == 8 | hedia03 == 8 | hedia04 == 8 | hedia05 == 8 |
      hedia06 == 8 | hedia07 == 8 | hedia08 == 8 | hedia09 == 8 | hedia10 == 8,
      1, 0
    ),
    alz_w1 = if_else(
      hedib01 == 8 | hedib02 == 8 | hedib03 == 8 | hedib04 == 8 | hedib05 == 8 |
      hedib06 == 8 | hedib07 == 8 | hedib08 == 8 | hedib09 == 8 | hedib10 == 8,
      1, 0
    ),
    dementia_w1 = if_else(
      hedib01 == 9 | hedib02 == 9 | hedib03 == 9 | hedib04 == 9 | hedib05 == 9 |
      hedib06 == 9 | hedib07 == 9 | hedib08 == 9 | hedib09 == 9 | hedib10 == 9,
      1, 0
    )
  )

# check the new disease variables
table(w1$htn_w1, useNA = "ifany")
## 
##    0    1 
## 7627 4472
table(w1$hf_w1, useNA = "ifany")
## 
##     0     1 
## 12017    82
table(w1$arrhythmia_w1, useNA = "ifany")
## 
##     0     1 
## 11367   732
table(w1$diabetes_w1, useNA = "ifany")
## 
##     0     1 
## 11233   866
table(w1$stroke_w1, useNA = "ifany")
## 
##     0     1 
## 11588   511
table(w1$alz_w1, useNA = "ifany")
## 
##     0     1 
## 12085    14
table(w1$dementia_w1, useNA = "ifany")
## 
##     0     1 
## 12035    64
# keep a clean draft Table 1 dataset - no exclusions
table1_w1 <- w1 %>%
  transmute(
    idauniq,
    age = dhager,
    sex = as_factor(dhsex),
    education = as_factor(edqual.x),
    ethnicity = as_factor(fqethnr),
    marital_status = as_factor(marstat),
    employment = as_factor(worktime),
    smoking = as_factor(smokerstat),
    depression_score = cesd_sc,
    hypertension = htn_w1,
    heart_failure = hf_w1,
    abnormal_heart_rhythm = arrhythmia_w1,
    diabetes = diabetes_w1,
    stroke = stroke_w1,
    baseline_alzheimers = alz_w1,
    baseline_dementia = dementia_w1
  )
# quick checks
dim(table1_w1)
## [1] 12099    16
summary(table1_w1$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   20.00   55.00   63.00   64.19   72.00   99.00
table(table1_w1$sex, useNA = "ifany")
## 
##        Refusal     Don't Know Not applicable           Male         Female 
##              0              0              0           5335           6764
table(table1_w1$education, useNA = "ifany")
## 
##                    Refusal                 Don't know 
##                          6                         11 
##             Not applicable  NVQ4/NVQ5/Degree or equiv 
##                         18                       1388 
##     Higher ed below degree     NVQ3/GCE A Level equiv 
##                       1333                        764 
##     NVQ2/GCE O Level equiv NVQ1/CSE other grade equiv 
##                       1974                        582 
##              Foreign/other           No qualification 
##                       1015                       5008
table(table1_w1$ethnicity, useNA = "ifany")
## 
##        Refusal     Don't know Not applicable          White      Non-white 
##             12              2           6810           5111            164
table(table1_w1$marital_status, useNA = "ifany")
## 
## married (inc civ pship 06 onwards)                         cohabiting 
##                               8035                                504 
##              single, never married                            widowed 
##                                575                               1951 
##                           divorced                          separated 
##                                823                                211
table(table1_w1$employment, useNA = "ifany")
## 
##          unknown      Not working Full time (>=35)        Part time 
##              118             7476             2741             1764
table(table1_w1$smoking, useNA = "ifany")
## 
##                refused             don't know              not asked 
##                      5                     11                    175 
##           never smoked ex smoker - occasional    ex smoker - regular 
##                   4286                    674                   4342 
##    ex smoker - DK freq         current smoker 
##                    445                   2161
summary(table1_w1$depression_score)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -2.000   0.000   1.000   1.504   2.000   8.000
table(table1_w1$hypertension, useNA = "ifany")
## 
##    0    1 
## 7627 4472
table(table1_w1$heart_failure, useNA = "ifany")
## 
##     0     1 
## 12017    82
table(table1_w1$abnormal_heart_rhythm, useNA = "ifany")
## 
##     0     1 
## 11367   732
table(table1_w1$diabetes, useNA = "ifany")
## 
##     0     1 
## 11233   866
table(table1_w1$stroke, useNA = "ifany")
## 
##     0     1 
## 11588   511
table(table1_w1$baseline_alzheimers, useNA = "ifany")
## 
##     0     1 
## 12085    14
table(table1_w1$baseline_dementia, useNA = "ifany")
## 
##     0     1 
## 12035    64
w1 <- w1 %>%
  mutate(
    w1_palevel = case_when(
      heacta %in% c(1, 2) ~ "High",
      !heacta %in% c(1, 2) & heactb %in% c(1, 2) ~ "Moderate",
      !heacta %in% c(1, 2) & !heactb %in% c(1, 2) & heactc %in% c(1, 2) ~ "Low",
      heacta %in% c(3, 4) & heactb %in% c(3, 4) & heactc %in% c(3, 4) ~ "Sedentary",
      TRUE ~ NA_character_
    )
  )

table(w1$w1_palevel, useNA = "ifany")
## 
##      High       Low  Moderate Sedentary      <NA> 
##      3302      1756      5607      1240       194
# add physical activity group to the draft Table 1 dataset
table1_w1 <- w1 %>%
  transmute(
    idauniq,
    pa_level = w1_palevel,
    age = dhager,
    sex = as_factor(dhsex),
    education = as_factor(edqual.x),
    ethnicity = as_factor(fqethnr),
    marital_status = as_factor(marstat),
    employment = as_factor(worktime),
    smoking = as_factor(smokerstat),
    depression_score = cesd_sc,
    hypertension = htn_w1,
    heart_failure = hf_w1,
    abnormal_heart_rhythm = arrhythmia_w1,
    diabetes = diabetes_w1,
    stroke = stroke_w1,
    baseline_alzheimers = alz_w1,
    baseline_dementia = dementia_w1
  )
# check PA distribution with no exclusions
table(table1_w1$pa_level, useNA = "ifany")
## 
##      High       Low  Moderate Sedentary      <NA> 
##      3302      1756      5607      1240       194
# mean age by PA level
table1_w1 %>%
  group_by(pa_level) %>%
  summarise(
    n = n(),
    mean_age = mean(age, na.rm = TRUE),
    sd_age = sd(age, na.rm = TRUE)
  )
## # A tibble: 5 × 4
##   pa_level      n mean_age sd_age
##   <chr>     <int>    <dbl>  <dbl>
## 1 High       3302     60.6   9.22
## 2 Low        1756     67.4  12.0 
## 3 Moderate   5607     63.6  10.4 
## 4 Sedentary  1240     70.8  12.2 
## 5 <NA>        194     69.3  15.0
# sex by PA level
table(table1_w1$pa_level, table1_w1$sex, useNA = "ifany")
##            
##             Refusal Don't Know Not applicable Male Female
##   High            0          0              0 1593   1709
##   Low             0          0              0  552   1204
##   Moderate        0          0              0 2507   3100
##   Sedentary       0          0              0  590    650
##   <NA>            0          0              0   93    101
# smoking by PA level
table(table1_w1$pa_level, table1_w1$smoking, useNA = "ifany")
##            
##             refused don't know not asked never smoked ex smoker - occasional
##   High            0          0         0         1309                    194
##   Low             0          1         0          587                     89
##   Moderate        0          1         0         1991                    343
##   Sedentary       0          0         0          395                     48
##   <NA>            5          9       175            4                      0
##            
##             ex smoker - regular ex smoker - DK freq current smoker
##   High                     1200                 147            452
##   Low                       621                  62            396
##   Moderate                 2022                 182           1068
##   Sedentary                 499                  54            244
##   <NA>                        0                   0              1
# disease variables by PA level
table(table1_w1$pa_level, table1_w1$hypertension, useNA = "ifany")
##            
##                0    1
##   High      2338  964
##   Low        960  796
##   Moderate  3533 2074
##   Sedentary  662  578
##   <NA>       134   60
table(table1_w1$pa_level, table1_w1$diabetes, useNA = "ifany")
##            
##                0    1
##   High      3172  130
##   Low       1572  184
##   Moderate  5240  367
##   Sedentary 1074  166
##   <NA>       175   19
table(table1_w1$pa_level, table1_w1$stroke, useNA = "ifany")
##            
##                0    1
##   High      3247   55
##   Low       1655  101
##   Moderate  5434  173
##   Sedentary 1088  152
##   <NA>       164   30
table(table1_w1$pa_level, table1_w1$abnormal_heart_rhythm, useNA = "ifany")
##            
##                0    1
##   High      3142  160
##   Low       1634  122
##   Moderate  5286  321
##   Sedentary 1128  112
##   <NA>       177   17
table(table1_w1$pa_level, table1_w1$heart_failure, useNA = "ifany")
##            
##                0    1
##   High      3296    6
##   Low       1730   26
##   Moderate  5587   20
##   Sedentary 1215   25
##   <NA>       189    5
table(table1_w1$pa_level, table1_w1$baseline_alzheimers, useNA = "ifany")
##            
##                0    1
##   High      3302    0
##   Low       1756    0
##   Moderate  5606    1
##   Sedentary 1233    7
##   <NA>       188    6
table(table1_w1$pa_level, table1_w1$baseline_dementia, useNA = "ifany")
##            
##                0    1
##   High      3297    5
##   Low       1748    8
##   Moderate  5595   12
##   Sedentary 1221   19
##   <NA>       174   20
table1_w1_clean <- table1_w1 %>%
  mutate(
    pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),

    sex = fct_drop(sex),

    smoking3 = case_when(
      smoking == "never smoked" ~ "Never",
      smoking %in% c("ex smoker - occasional", "ex smoker - regular", "ex smoker - DK freq") ~ "Ex-smoker",
      smoking == "current smoker" ~ "Current",
      TRUE ~ NA_character_
    ),

    ethnicity2 = case_when(
      ethnicity == "White" ~ "White",
      ethnicity == "Non-white" ~ "Non-white",
      TRUE ~ NA_character_
    ),

    depression_score = if_else(depression_score < 0, NA_real_, as.numeric(depression_score))
  )

# check cleaned variables
table(table1_w1_clean$pa_level, useNA = "ifany")
## 
##      High  Moderate       Low Sedentary      <NA> 
##      3302      5607      1756      1240       194
table(table1_w1_clean$smoking3, useNA = "ifany")
## 
##   Current Ex-smoker     Never      <NA> 
##      2161      5461      4286       191
table(table1_w1_clean$ethnicity2, useNA = "ifany")
## 
## Non-white     White      <NA> 
##       164      5111      6824
summary(table1_w1_clean$depression_score)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   0.000   1.000   1.582   2.000   8.000     311
table1_summary <- table1_w1_clean %>%
  mutate(
    pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
  ) %>%
  group_by(pa_level, .drop = FALSE) %>%
  summarise(
    N = n(),
    `Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
    `Female, n (%)` = sprintf("%d (%.1f%%)",
                              sum(sex == "Female", na.rm = TRUE),
                              100 * sum(sex == "Female", na.rm = TRUE) / n()),
    `No qualification, n (%)` = sprintf("%d (%.1f%%)",
                                        sum(education == "No qualification", na.rm = TRUE),
                                        100 * sum(education == "No qualification", na.rm = TRUE) / n()),
    `Married/cohabiting, n (%)` = sprintf("%d (%.1f%%)",
                                          sum(marital_status %in% c("married (inc civ pship 06 onwards)", "cohabiting"), na.rm = TRUE),
                                          100 * sum(marital_status %in% c("married (inc civ pship 06 onwards)", "cohabiting"), na.rm = TRUE) / n()),
    `Working full/part time, n (%)` = sprintf("%d (%.1f%%)",
                                              sum(employment %in% c("Full time (>=35)", "Part time"), na.rm = TRUE),
                                              100 * sum(employment %in% c("Full time (>=35)", "Part time"), na.rm = TRUE) / n()),
    `Current smoker, n (%)` = sprintf("%d (%.1f%%)",
                                      sum(smoking3 == "Current", na.rm = TRUE),
                                      100 * sum(smoking3 == "Current", na.rm = TRUE) / n()),
    `Depression score, mean (SD)` = sprintf("%.2f (%.2f)",
                                            mean(depression_score, na.rm = TRUE),
                                            sd(depression_score, na.rm = TRUE)),
    `Hypertension, n (%)` = sprintf("%d (%.1f%%)",
                                    sum(hypertension == 1, na.rm = TRUE),
                                    100 * sum(hypertension == 1, na.rm = TRUE) / n()),
    `Diabetes, n (%)` = sprintf("%d (%.1f%%)",
                                sum(diabetes == 1, na.rm = TRUE),
                                100 * sum(diabetes == 1, na.rm = TRUE) / n()),
    `Stroke, n (%)` = sprintf("%d (%.1f%%)",
                              sum(stroke == 1, na.rm = TRUE),
                              100 * sum(stroke == 1, na.rm = TRUE) / n()),
    `Abnormal heart rhythm, n (%)` = sprintf("%d (%.1f%%)",
                                             sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
                                             100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()),
    `Heart failure, n (%)` = sprintf("%d (%.1f%%)",
                                     sum(heart_failure == 1, na.rm = TRUE),
                                     100 * sum(heart_failure == 1, na.rm = TRUE) / n()),
    `Baseline Alzheimer’s, n (%)` = sprintf("%d (%.1f%%)",
                                            sum(baseline_alzheimers == 1, na.rm = TRUE),
                                            100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()),
    `Baseline dementia, n (%)` = sprintf("%d (%.1f%%)",
                                         sum(baseline_dementia == 1, na.rm = TRUE),
                                         100 * sum(baseline_dementia == 1, na.rm = TRUE) / n())
  )

knitr::kable(
  table1_summary,
  caption = "Draft Table 1. Baseline characteristics by physical activity group, wave 1."
)
Draft Table 1. Baseline characteristics by physical activity group, wave 1.
pa_level N Age, mean (SD) Female, n (%) No qualification, n (%) Married/cohabiting, n (%) Working full/part time, n (%) Current smoker, n (%) Depression score, mean (SD) Hypertension, n (%) Diabetes, n (%) Stroke, n (%) Abnormal heart rhythm, n (%) Heart failure, n (%) Baseline Alzheimer’s, n (%) Baseline dementia, n (%)
High 3302 60.6 (9.2) 1709 (51.8%) 935 (28.3%) 2582 (78.2%) 1753 (53.1%) 452 (13.7%) 1.06 (1.61) 964 (29.2%) 130 (3.9%) 55 (1.7%) 160 (4.8%) 6 (0.2%) 0 (0.0%) 5 (0.2%)
Moderate 5607 63.6 (10.4) 3100 (55.3%) 2159 (38.5%) 4067 (72.5%) 2200 (39.2%) 1068 (19.0%) 1.41 (1.88) 2074 (37.0%) 367 (6.5%) 173 (3.1%) 321 (5.7%) 20 (0.4%) 1 (0.0%) 12 (0.2%)
Low 1756 67.4 (12.0) 1204 (68.6%) 1051 (59.9%) 1049 (59.7%) 374 (21.3%) 396 (22.6%) 2.32 (2.22) 796 (45.3%) 184 (10.5%) 101 (5.8%) 122 (6.9%) 26 (1.5%) 0 (0.0%) 8 (0.5%)
Sedentary 1240 70.8 (12.2) 650 (52.4%) 766 (61.8%) 705 (56.9%) 176 (14.2%) 244 (19.7%) 2.73 (2.32) 578 (46.6%) 166 (13.4%) 152 (12.3%) 112 (9.0%) 25 (2.0%) 7 (0.6%) 19 (1.5%)
Missing PA 194 69.3 (15.0) 101 (52.1%) 97 (50.0%) 136 (70.1%) 2 (1.0%) 1 (0.5%) 0.25 (0.62) 60 (30.9%) 19 (9.8%) 30 (15.5%) 17 (8.8%) 5 (2.6%) 6 (3.1%) 20 (10.3%)
table1_summary_final <- table1_summary %>%
  mutate(N = as.integer(N))

knitr::kable(
  table1_summary_final,
  caption = "Table 1. Baseline characteristics by physical activity group at wave 1. No participants were excluded at this stage."
)
Table 1. Baseline characteristics by physical activity group at wave 1. No participants were excluded at this stage.
pa_level N Age, mean (SD) Female, n (%) No qualification, n (%) Married/cohabiting, n (%) Working full/part time, n (%) Current smoker, n (%) Depression score, mean (SD) Hypertension, n (%) Diabetes, n (%) Stroke, n (%) Abnormal heart rhythm, n (%) Heart failure, n (%) Baseline Alzheimer’s, n (%) Baseline dementia, n (%)
High 3302 60.6 (9.2) 1709 (51.8%) 935 (28.3%) 2582 (78.2%) 1753 (53.1%) 452 (13.7%) 1.06 (1.61) 964 (29.2%) 130 (3.9%) 55 (1.7%) 160 (4.8%) 6 (0.2%) 0 (0.0%) 5 (0.2%)
Moderate 5607 63.6 (10.4) 3100 (55.3%) 2159 (38.5%) 4067 (72.5%) 2200 (39.2%) 1068 (19.0%) 1.41 (1.88) 2074 (37.0%) 367 (6.5%) 173 (3.1%) 321 (5.7%) 20 (0.4%) 1 (0.0%) 12 (0.2%)
Low 1756 67.4 (12.0) 1204 (68.6%) 1051 (59.9%) 1049 (59.7%) 374 (21.3%) 396 (22.6%) 2.32 (2.22) 796 (45.3%) 184 (10.5%) 101 (5.8%) 122 (6.9%) 26 (1.5%) 0 (0.0%) 8 (0.5%)
Sedentary 1240 70.8 (12.2) 650 (52.4%) 766 (61.8%) 705 (56.9%) 176 (14.2%) 244 (19.7%) 2.73 (2.32) 578 (46.6%) 166 (13.4%) 152 (12.3%) 112 (9.0%) 25 (2.0%) 7 (0.6%) 19 (1.5%)
Missing PA 194 69.3 (15.0) 101 (52.1%) 97 (50.0%) 136 (70.1%) 2 (1.0%) 1 (0.5%) 0.25 (0.62) 60 (30.9%) 19 (9.8%) 30 (15.5%) 17 (8.8%) 5 (2.6%) 6 (3.1%) 20 (10.3%)
table1_summary_wide <- table1_summary_final %>%
  mutate(N = as.character(N)) %>%
  pivot_longer(
    cols = -pa_level,
    names_to = "Characteristic",
    values_to = "Value"
  ) %>%
  pivot_wider(
    names_from = pa_level,
    values_from = Value
  ) %>%
  select(
    Characteristic,
    High,
    Moderate,
    Low,
    Sedentary,
    `Missing PA`
  )

knitr::kable(
  table1_summary_wide,
  caption = "Table 1. Baseline characteristics at wave 1 by physical activity category. No participants were excluded at this stage."
)
Table 1. Baseline characteristics at wave 1 by physical activity category. No participants were excluded at this stage.
Characteristic High Moderate Low Sedentary Missing PA
N 3302 5607 1756 1240 194
Age, mean (SD) 60.6 (9.2) 63.6 (10.4) 67.4 (12.0) 70.8 (12.2) 69.3 (15.0)
Female, n (%) 1709 (51.8%) 3100 (55.3%) 1204 (68.6%) 650 (52.4%) 101 (52.1%)
No qualification, n (%) 935 (28.3%) 2159 (38.5%) 1051 (59.9%) 766 (61.8%) 97 (50.0%)
Married/cohabiting, n (%) 2582 (78.2%) 4067 (72.5%) 1049 (59.7%) 705 (56.9%) 136 (70.1%)
Working full/part time, n (%) 1753 (53.1%) 2200 (39.2%) 374 (21.3%) 176 (14.2%) 2 (1.0%)
Current smoker, n (%) 452 (13.7%) 1068 (19.0%) 396 (22.6%) 244 (19.7%) 1 (0.5%)
Depression score, mean (SD) 1.06 (1.61) 1.41 (1.88) 2.32 (2.22) 2.73 (2.32) 0.25 (0.62)
Hypertension, n (%) 964 (29.2%) 2074 (37.0%) 796 (45.3%) 578 (46.6%) 60 (30.9%)
Diabetes, n (%) 130 (3.9%) 367 (6.5%) 184 (10.5%) 166 (13.4%) 19 (9.8%)
Stroke, n (%) 55 (1.7%) 173 (3.1%) 101 (5.8%) 152 (12.3%) 30 (15.5%)
Abnormal heart rhythm, n (%) 160 (4.8%) 321 (5.7%) 122 (6.9%) 112 (9.0%) 17 (8.8%)
Heart failure, n (%) 6 (0.2%) 20 (0.4%) 26 (1.5%) 25 (2.0%) 5 (2.6%)
Baseline Alzheimer’s, n (%) 0 (0.0%) 1 (0.0%) 0 (0.0%) 7 (0.6%) 6 (3.1%)
Baseline dementia, n (%) 5 (0.2%) 12 (0.2%) 8 (0.5%) 19 (1.5%) 20 (10.3%)
#numbers of alzheimers and dementia in wave 1
w1 %>%
  summarise(
    alz_n = sum(alz_w1 == 1, na.rm = TRUE),
    dem_n = sum(dementia_w1 == 1, na.rm = TRUE),
    both_n = sum(alz_w1 == 1 & dementia_w1 == 1, na.rm = TRUE),
    either_n = sum(alz_w1 == 1 | dementia_w1 == 1, na.rm = TRUE)
  )
## # A tibble: 1 × 4
##   alz_n dem_n both_n either_n
##   <int> <int>  <int>    <int>
## 1    14    64      3       75
# people with both baseline Alzheimer's and baseline dementia wave1
both_alz_dem <- table1_w1_clean %>%
  filter(baseline_alzheimers == 1, baseline_dementia == 1)

# how many participants are there with both alzheimer's and dementia at baseline wave 1?
nrow(both_alz_dem)
## [1] 3
#what category of PA level are the participants who are categorised as both alzheimer's and dementia in?
both_alz_dem %>%
  count(pa_level, .drop = FALSE)
## # A tibble: 5 × 2
##   pa_level      n
##   <fct>     <int>
## 1 High          0
## 2 Moderate      0
## 3 Low           0
## 4 Sedentary     2
## 5 <NA>          1
# which IDs are the participants with both alzheimers and dementa at baseline wave 1?
both_alz_dem %>%
  select(idauniq, pa_level, baseline_alzheimers, baseline_dementia)
## # A tibble: 3 × 4
##   idauniq pa_level  baseline_alzheimers baseline_dementia
##     <dbl> <fct>                   <dbl>             <dbl>
## 1  106735 <NA>                        1                 1
## 2  108547 Sedentary                   1                 1
## 3  119099 Sedentary                   1                 1
#dementia follow up coding chunk
w2_dem <- w2_core %>%
  transmute(
    idauniq,
    dem_w2 = if_else(
      hedib01 == 9 | hedib02 == 9 | hedib03 == 9 | hedib04 == 9,
      1, 0
    )
  )

w3_dem <- w3_core %>%
  transmute(
    idauniq,
    dem_w3 = if_else(hedibde == 3, 1, 0)
  )

w4_dem <- w4_core %>% transmute(idauniq, dem_w4 = if_else(hedibde == 1, 1, 0))
w5_dem <- w5_core %>% transmute(idauniq, dem_w5 = if_else(hedibde == 1, 1, 0))
w6_dem <- w6_core %>% transmute(idauniq, dem_w6 = if_else(hedibde == 1, 1, 0))
w7_dem <- w7_core %>% transmute(idauniq, dem_w7 = if_else(hedibde == 1, 1, 0))
w8_dem <- w8_core %>% transmute(idauniq, dem_w8 = if_else(hedibde == 1, 1, 0))
w9_dem <- w9_core %>% transmute(idauniq, dem_w9 = if_else(hedibde == 1, 1, 0))
#follow up merge chunk
followup_w1 <- table1_w1_clean %>%
  mutate(
    baseline_alzheimers = as.numeric(baseline_alzheimers),
    baseline_dementia = as.numeric(baseline_dementia)
  ) %>%
  left_join(w2_dem, by = "idauniq") %>%
  left_join(w3_dem, by = "idauniq") %>%
  left_join(w4_dem, by = "idauniq") %>%
  left_join(w5_dem, by = "idauniq") %>%
  left_join(w6_dem, by = "idauniq") %>%
  left_join(w7_dem, by = "idauniq") %>%
  left_join(w8_dem, by = "idauniq") %>%
  left_join(w9_dem, by = "idauniq") %>%
  mutate(
    across(starts_with("dem_w"), ~replace_na(., 0))
  )

dim(followup_w1)
## [1] 12099    27
#analysis dataset chunk
analysis_w1 <- followup_w1 %>%
  mutate(
    prevalent_dem_alz_w1 = if_else(baseline_alzheimers == 1 | baseline_dementia == 1, 1, 0)
  ) %>%
  filter(
    prevalent_dem_alz_w1 == 0,
    !is.na(pa_level)
  ) %>%
  mutate(
    first_dem_wave = case_when(
      dem_w2 == 1 ~ 2,
      dem_w3 == 1 ~ 3,
      dem_w4 == 1 ~ 4,
      dem_w5 == 1 ~ 5,
      dem_w6 == 1 ~ 6,
      dem_w7 == 1 ~ 7,
      dem_w8 == 1 ~ 8,
      dem_w9 == 1 ~ 9,
      TRUE ~ NA_real_
    ),
    event_dementia = if_else(!is.na(first_dem_wave), 1, 0),
    time_to_event_waves = if_else(event_dementia == 1, first_dem_wave - 1, 8),
    pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
    current_smoker = if_else(smoking3 == "Current", 1, 0, missing = NA_real_),
    depression_binary = if_else(depression_score >= 4, 1, 0, missing = NA_real_)
  )

dim(analysis_w1)
## [1] 11855    33
table(analysis_w1$event_dementia, useNA = "ifany")
## 
##     0     1 
## 11366   489
table(analysis_w1$pa_level, useNA = "ifany")
## 
##      High  Moderate       Low Sedentary 
##      3297      5594      1748      1216
#cox model chunk
cox_unadjusted <- coxph(
  Surv(time_to_event_waves, event_dementia) ~ pa_level,
  data = analysis_w1
)

cox_adjusted <- coxph(
  Surv(time_to_event_waves, event_dementia) ~ pa_level + age + sex +
    current_smoker + hypertension + diabetes + stroke +
    abnormal_heart_rhythm + heart_failure + depression_binary,
  data = analysis_w1
)

#output chunk for document 
summary(cox_unadjusted)
## Call:
## coxph(formula = Surv(time_to_event_waves, event_dementia) ~ pa_level, 
##     data = analysis_w1)
## 
##   n= 11855, number of events= 489 
## 
##                     coef exp(coef) se(coef)     z Pr(>|z|)    
## pa_levelModerate  0.5030    1.6536   0.1265 3.976 7.01e-05 ***
## pa_levelLow       0.7862    2.1950   0.1486 5.291 1.21e-07 ***
## pa_levelSedentary 0.8453    2.3287   0.1608 5.258 1.46e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## pa_levelModerate      1.654     0.6047     1.291     2.119
## pa_levelLow           2.195     0.4556     1.640     2.937
## pa_levelSedentary     2.329     0.4294     1.699     3.191
## 
## Concordance= 0.577  (se = 0.012 )
## Likelihood ratio test= 39.91  on 3 df,   p=1e-08
## Wald test            = 37.44  on 3 df,   p=4e-08
## Score (logrank) test = 38.81  on 3 df,   p=2e-08
summary(cox_adjusted)
## Call:
## coxph(formula = Surv(time_to_event_waves, event_dementia) ~ pa_level + 
##     age + sex + current_smoker + hypertension + diabetes + stroke + 
##     abnormal_heart_rhythm + heart_failure + depression_binary, 
##     data = analysis_w1)
## 
##   n= 11728, number of events= 485 
##    (127 observations deleted due to missingness)
## 
##                           coef exp(coef) se(coef)      z Pr(>|z|)    
## pa_levelModerate       0.24344   1.27563  0.12921  1.884   0.0595 .  
## pa_levelLow            0.18022   1.19748  0.15806  1.140   0.2542    
## pa_levelSedentary     -0.01925   0.98094  0.17645 -0.109   0.9131    
## age                    0.06162   1.06356  0.00423 14.566   <2e-16 ***
## sexFemale              0.19684   1.21754  0.09505  2.071   0.0384 *  
## current_smoker        -0.13331   0.87520  0.14047 -0.949   0.3426    
## hypertension           0.09580   1.10054  0.09312  1.029   0.3036    
## diabetes               0.16264   1.17661  0.15589  1.043   0.2968    
## stroke                 0.23524   1.26521  0.17619  1.335   0.1818    
## abnormal_heart_rhythm  0.30517   1.35685  0.15522  1.966   0.0493 *  
## heart_failure         -0.70697   0.49313  0.58253 -1.214   0.2249    
## depression_binary      0.24828   1.28182  0.11251  2.207   0.0273 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                       exp(coef) exp(-coef) lower .95 upper .95
## pa_levelModerate         1.2756     0.7839    0.9903     1.643
## pa_levelLow              1.1975     0.8351    0.8785     1.632
## pa_levelSedentary        0.9809     1.0194    0.6941     1.386
## age                      1.0636     0.9402    1.0548     1.072
## sexFemale                1.2175     0.8213    1.0106     1.467
## current_smoker           0.8752     1.1426    0.6646     1.153
## hypertension             1.1005     0.9086    0.9169     1.321
## diabetes                 1.1766     0.8499    0.8668     1.597
## stroke                   1.2652     0.7904    0.8957     1.787
## abnormal_heart_rhythm    1.3569     0.7370    1.0009     1.839
## heart_failure            0.4931     2.0278    0.1574     1.545
## depression_binary        1.2818     0.7801    1.0282     1.598
## 
## Concordance= 0.74  (se = 0.01 )
## Likelihood ratio test= 302  on 12 df,   p=<2e-16
## Wald test            = 310.1  on 12 df,   p=<2e-16
## Score (logrank) test = 330  on 12 df,   p=<2e-16

Wave 2 Table 1

w2_derived <- read_dta("raw data/RAW_data_stata/wave_2_derived_variables.dta")
w2 <- left_join(w2_core, w2_derived, by = "idauniq")

w2 <- w2 %>%
  mutate(
    htn_w2 = if_else(
      hedia01 == 1 | hedia02 == 1 | hedia03 == 1 | hedia04 == 1,
      1, 0
    ),
    hf_w2 = if_else(
      hedia01 == 4 | hedia02 == 4 | hedia03 == 4 | hedia04 == 4,
      1, 0
    ),
    arrhythmia_w2 = if_else(
      hedia01 == 6 | hedia02 == 6 | hedia03 == 6 | hedia04 == 6,
      1, 0
    ),
    diabetes_w2 = if_else(
      hedia01 == 7 | hedia02 == 7 | hedia03 == 7 | hedia04 == 7,
      1, 0
    ),
    stroke_w2 = if_else(
      hedia01 == 8 | hedia02 == 8 | hedia03 == 8 | hedia04 == 8,
      1, 0
    ),
    alz_w2 = if_else(
      hedib01 == 8 | hedib02 == 8 | hedib03 == 8 | hedib04 == 8,
      1, 0
    ),
    dementia_w2 = if_else(
      hedib01 == 9 | hedib02 == 9 | hedib03 == 9 | hedib04 == 9,
      1, 0
    ),
    pa_level = case_when(
      palevel == 3 ~ "High",
      palevel == 2 ~ "Moderate",
      palevel == 1 ~ "Low",
      palevel == 0 ~ "Sedentary",
      TRUE ~ NA_character_
    )
  )

w2_table1 <- w2 %>%
  transmute(
    idauniq,
    pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
    age = dhager,
    sex = as_factor(DhSex),
    hypertension = htn_w2,
    heart_failure = hf_w2,
    abnormal_heart_rhythm = arrhythmia_w2,
    diabetes = diabetes_w2,
    stroke = stroke_w2,
    baseline_alzheimers = alz_w2,
    baseline_dementia = dementia_w2
  )

w2_table1_summary <- w2_table1 %>%
  mutate(
    pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
  ) %>%
  group_by(pa_level, .drop = FALSE) %>%
  summarise(
    N = n(),
    `Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
    `Female, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(sex == "Female", na.rm = TRUE),
      100 * sum(sex == "Female", na.rm = TRUE) / n()
    ),
    `Hypertension, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(hypertension == 1, na.rm = TRUE),
      100 * sum(hypertension == 1, na.rm = TRUE) / n()
    ),
    `Diabetes, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(diabetes == 1, na.rm = TRUE),
      100 * sum(diabetes == 1, na.rm = TRUE) / n()
    ),
    `Stroke, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(stroke == 1, na.rm = TRUE),
      100 * sum(stroke == 1, na.rm = TRUE) / n()
    ),
    `Abnormal heart rhythm, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
      100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()
    ),
    `Heart failure, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(heart_failure == 1, na.rm = TRUE),
      100 * sum(heart_failure == 1, na.rm = TRUE) / n()
    ),
    `Baseline Alzheimer’s, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(baseline_alzheimers == 1, na.rm = TRUE),
      100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()
    ),
    `Baseline dementia, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(baseline_dementia == 1, na.rm = TRUE),
      100 * sum(baseline_dementia == 1, na.rm = TRUE) / n()
    )
  )

w2_table1_summary_wide <- w2_table1_summary %>%
  mutate(N = as.character(N)) %>%
  pivot_longer(
    cols = -pa_level,
    names_to = "Characteristic",
    values_to = "Value"
  ) %>%
  pivot_wider(
    names_from = pa_level,
    values_from = Value
  ) %>%
  select(
    Characteristic,
    High,
    Moderate,
    Low,
    Sedentary,
    `Missing PA`
  )

knitr::kable(
  w2_table1_summary_wide,
  caption = "Table 1. Baseline characteristics at wave 2 by physical activity category."
)
Table 1. Baseline characteristics at wave 2 by physical activity category.
Characteristic High Moderate Low Sedentary Missing PA
N 1744 4684 2309 556 139
Age, mean (SD) 61.8 (8.7) 64.5 (9.8) 68.8 (11.2) 75.2 (11.6) 70.6 (15.8)
Female, n (%) 835 (47.9%) 2615 (55.8%) 1465 (63.4%) 325 (58.5%) 66 (47.5%)
Hypertension, n (%) 253 (14.5%) 784 (16.7%) 487 (21.1%) 156 (28.1%) 26 (18.7%)
Diabetes, n (%) 28 (1.6%) 151 (3.2%) 139 (6.0%) 55 (9.9%) 10 (7.2%)
Stroke, n (%) 3 (0.2%) 49 (1.0%) 59 (2.6%) 39 (7.0%) 13 (9.4%)
Abnormal heart rhythm, n (%) 48 (2.8%) 138 (2.9%) 79 (3.4%) 44 (7.9%) 5 (3.6%)
Heart failure, n (%) 0 (0.0%) 8 (0.2%) 8 (0.3%) 4 (0.7%) 0 (0.0%)
Baseline Alzheimer’s, n (%) 1 (0.1%) 2 (0.0%) 2 (0.1%) 1 (0.2%) 5 (3.6%)
Baseline dementia, n (%) 2 (0.1%) 11 (0.2%) 9 (0.4%) 7 (1.3%) 14 (10.1%)
## Wave 3 table 1
w3_table1_raw <- w3_core %>%
  mutate(
    hypertension = if_else(hediabp == 1, 1, 0, missing = 0),
    heart_failure = if_else(hediahf == 1, 1, 0, missing = 0),
    abnormal_heart_rhythm = if_else(hediaar == 1, 1, 0, missing = 0),
    diabetes = if_else(hediadi == 1, 1, 0, missing = 0),
    stroke = if_else(hediast == 1, 1, 0, missing = 0),

    baseline_alzheimers = if_else(hedibad %in% c(1, 2, 3), 1, 0, missing = 0),
    baseline_dementia   = if_else(hedibde %in% c(1, 2, 3), 1, 0, missing = 0),

    pa_level = case_when(
      palevel == 3 ~ "High",
      palevel == 2 ~ "Moderate",
      palevel == 1 ~ "Low",
      palevel == 0 ~ "Sedentary",
      TRUE ~ NA_character_
    )
  )

w3_table1 <- w3_table1_raw %>%
  transmute(
    idauniq,
    pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
    age = dhager,
    sex = as_factor(dhsex),
    hypertension,
    heart_failure,
    abnormal_heart_rhythm,
    diabetes,
    stroke,
    baseline_alzheimers,
    baseline_dementia
  )

w3_table1_summary <- w3_table1 %>%
  mutate(
    pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
  ) %>%
  group_by(pa_level, .drop = FALSE) %>%
  summarise(
    N = n(),
    `Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
    `Female, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(sex == "Female", na.rm = TRUE),
      100 * sum(sex == "Female", na.rm = TRUE) / n()
    ),
    `Hypertension, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(hypertension == 1, na.rm = TRUE),
      100 * sum(hypertension == 1, na.rm = TRUE) / n()
    ),
    `Diabetes, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(diabetes == 1, na.rm = TRUE),
      100 * sum(diabetes == 1, na.rm = TRUE) / n()
    ),
    `Stroke, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(stroke == 1, na.rm = TRUE),
      100 * sum(stroke == 1, na.rm = TRUE) / n()
    ),
    `Abnormal heart rhythm, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
      100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()
    ),
    `Heart failure, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(heart_failure == 1, na.rm = TRUE),
      100 * sum(heart_failure == 1, na.rm = TRUE) / n()
    ),
    `Baseline Alzheimer’s, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(baseline_alzheimers == 1, na.rm = TRUE),
      100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()
    ),
    `Baseline dementia, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(baseline_dementia == 1, na.rm = TRUE),
      100 * sum(baseline_dementia == 1, na.rm = TRUE) / n()
    )
  )

w3_table1_summary_wide <- w3_table1_summary %>%
  mutate(N = as.character(N)) %>%
  pivot_longer(
    cols = -pa_level,
    names_to = "Characteristic",
    values_to = "Value"
  ) %>%
  pivot_wider(
    names_from = pa_level,
    values_from = Value
  ) %>%
  select(
    Characteristic,
    High,
    Moderate,
    Low,
    Sedentary,
    `Missing PA`
  )

knitr::kable(
  w3_table1_summary_wide,
  caption = "Table 1. Baseline characteristics at wave 3 by physical activity category."
)
Table 1. Baseline characteristics at wave 3 by physical activity category.
Characteristic High Moderate Low Sedentary Missing PA
N 1969 4838 2263 686 15
Age, mean (SD) 59.8 (8.9) 63.3 (10.5) 68.1 (12.0) 74.7 (14.2) 61.6 (12.2)
Female, n (%) 939 (47.7%) 2669 (55.2%) 1458 (64.4%) 399 (58.2%) 11 (73.3%)
Hypertension, n (%) 174 (8.8%) 473 (9.8%) 221 (9.8%) 59 (8.6%) 0 (0.0%)
Diabetes, n (%) 36 (1.8%) 115 (2.4%) 75 (3.3%) 30 (4.4%) 0 (0.0%)
Stroke, n (%) 15 (0.8%) 31 (0.6%) 30 (1.3%) 31 (4.5%) 0 (0.0%)
Abnormal heart rhythm, n (%) 44 (2.2%) 90 (1.9%) 62 (2.7%) 21 (3.1%) 0 (0.0%)
Heart failure, n (%) 1 (0.1%) 1 (0.0%) 8 (0.4%) 4 (0.6%) 0 (0.0%)
Baseline Alzheimer’s, n (%) 1 (0.1%) 5 (0.1%) 9 (0.4%) 17 (2.5%) 0 (0.0%)
Baseline dementia, n (%) 3 (0.2%) 20 (0.4%) 33 (1.5%) 56 (8.2%) 0 (0.0%)

wave 4 table 1

w4_table1_raw <- w4_core %>%
  mutate(
    hypertension = if_else(hediabp == 1, 1, 0, missing = 0),
    heart_failure = if_else(hediahf == 1, 1, 0, missing = 0),
    abnormal_heart_rhythm = if_else(hediaar == 1, 1, 0, missing = 0),
    diabetes = if_else(hediadi == 1, 1, 0, missing = 0),
    stroke = if_else(hediast == 1, 1, 0, missing = 0),

    baseline_alzheimers = if_else(hedibad == 1, 1, 0, missing = 0),
    baseline_dementia   = if_else(hedibde == 1, 1, 0, missing = 0),

    pa_level = case_when(
      palevel == 3 ~ "High",
      palevel == 2 ~ "Moderate",
      palevel == 1 ~ "Low",
      palevel == 0 ~ "Sedentary",
      TRUE ~ NA_character_
    )
  )

w4_table1 <- w4_table1_raw %>%
  transmute(
    idauniq,
    pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
    age = indager,
    sex = as_factor(dhsex),
    hypertension,
    heart_failure,
    abnormal_heart_rhythm,
    diabetes,
    stroke,
    baseline_alzheimers,
    baseline_dementia
  )

w4_table1_summary <- w4_table1 %>%
  mutate(
    pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
  ) %>%
  group_by(pa_level, .drop = FALSE) %>%
  summarise(
    N = n(),
    `Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
    `Female, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(sex == "Female", na.rm = TRUE),
      100 * sum(sex == "Female", na.rm = TRUE) / n()
    ),
    `Hypertension, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(hypertension == 1, na.rm = TRUE),
      100 * sum(hypertension == 1, na.rm = TRUE) / n()
    ),
    `Diabetes, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(diabetes == 1, na.rm = TRUE),
      100 * sum(diabetes == 1, na.rm = TRUE) / n()
    ),
    `Stroke, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(stroke == 1, na.rm = TRUE),
      100 * sum(stroke == 1, na.rm = TRUE) / n()
    ),
    `Abnormal heart rhythm, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
      100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()
    ),
    `Heart failure, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(heart_failure == 1, na.rm = TRUE),
      100 * sum(heart_failure == 1, na.rm = TRUE) / n()
    ),
    `Baseline Alzheimer’s, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(baseline_alzheimers == 1, na.rm = TRUE),
      100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()
    ),
    `Baseline dementia, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(baseline_dementia == 1, na.rm = TRUE),
      100 * sum(baseline_dementia == 1, na.rm = TRUE) / n()
    )
  )

w4_table1_summary_wide <- w4_table1_summary %>%
  mutate(N = as.character(N)) %>%
  pivot_longer(
    cols = -pa_level,
    names_to = "Characteristic",
    values_to = "Value"
  ) %>%
  pivot_wider(
    names_from = pa_level,
    values_from = Value
  ) %>%
  select(
    Characteristic,
    High,
    Moderate,
    Low,
    Sedentary,
    `Missing PA`
  )

knitr::kable(
  w4_table1_summary_wide,
  caption = "Table 1. Baseline characteristics at wave 4 by physical activity category."
)
Table 1. Baseline characteristics at wave 4 by physical activity category.
Characteristic High Moderate Low Sedentary Missing PA
N 2254 5384 2562 835 15
Age, mean (SD) 61.7 (8.4) 63.9 (9.5) 68.2 (11.3) 74.2 (12.5) 59.1 (9.2)
Female, n (%) 1070 (47.5%) 2929 (54.4%) 1649 (64.4%) 472 (56.5%) 5 (33.3%)
Hypertension, n (%) 201 (8.9%) 603 (11.2%) 350 (13.7%) 135 (16.2%) 0 (0.0%)
Diabetes, n (%) 43 (1.9%) 149 (2.8%) 125 (4.9%) 61 (7.3%) 1 (6.7%)
Stroke, n (%) 7 (0.3%) 55 (1.0%) 46 (1.8%) 68 (8.1%) 0 (0.0%)
Abnormal heart rhythm, n (%) 32 (1.4%) 111 (2.1%) 88 (3.4%) 45 (5.4%) 0 (0.0%)
Heart failure, n (%) 1 (0.0%) 3 (0.1%) 6 (0.2%) 15 (1.8%) 0 (0.0%)
Baseline Alzheimer’s, n (%) 0 (0.0%) 5 (0.1%) 3 (0.1%) 29 (3.5%) 0 (0.0%)
Baseline dementia, n (%) 1 (0.0%) 17 (0.3%) 19 (0.7%) 60 (7.2%) 0 (0.0%)

wave 5 table 1

## Wave 5 table 1
w5_table1_raw <- w5_core %>%
  mutate(
    hypertension = if_else(hediabp == 1, 1, 0, missing = 0),
    heart_failure = if_else(hediahf == 1, 1, 0, missing = 0),
    abnormal_heart_rhythm = if_else(hediaar == 1, 1, 0, missing = 0),
    diabetes = if_else(hediadi == 1, 1, 0, missing = 0),
    stroke = if_else(hediast == 1, 1, 0, missing = 0),

    baseline_alzheimers = if_else(hedibad == 1, 1, 0, missing = 0),
    baseline_dementia   = if_else(hedibde == 1, 1, 0, missing = 0),

    pa_level = case_when(
      palevel == 3 ~ "High",
      palevel == 2 ~ "Moderate",
      palevel == 1 ~ "Low",
      palevel == 0 ~ "Sedentary",
      TRUE ~ NA_character_
    )
  )
# ORIGINAL VERSION OF W5 TABLE 1 PERFORMED FIRST - SEE BELOW FOR AMENDED FINAL VERSION
# Original version produced implausible age summaries because wave 5 #..indager contains negative
# ..special missing codes, which distorted the mean and SD if not #..recoded.
# Kept here as part of the analytic process to include in methods

w5_table1 <- w5_table1_raw %>%
  transmute(
    idauniq,
    pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
    age = indager,
    sex = as_factor(dhsex),
    hypertension,
    heart_failure,
    abnormal_heart_rhythm,
    diabetes,
    stroke,
    baseline_alzheimers,
    baseline_dementia
  )

# Wave 5 table 1 CORRECTED VERSION
# Wave 5 indager includes negative values representing special missing codes.
# These were recoded to NA before summary statistics were generated for wave 5 table 1 amended final version

w5_table1 <- w5_table1_raw %>%
  transmute(
    idauniq,
    pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
    age = if_else(indager < 0, NA_real_, as.numeric(indager)),
    sex = as_factor(dhsex),
    hypertension,
    heart_failure,
    abnormal_heart_rhythm,
    diabetes,
    stroke,
    baseline_alzheimers,
    baseline_dementia
  )

w5_table1_summary <- w5_table1 %>%
  mutate(
    pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
  ) %>%
  group_by(pa_level, .drop = FALSE) %>%
  summarise(
    N = n(),
    `Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
    `Female, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(sex == "Female", na.rm = TRUE),
      100 * sum(sex == "Female", na.rm = TRUE) / n()
    ),
    `Hypertension, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(hypertension == 1, na.rm = TRUE),
      100 * sum(hypertension == 1, na.rm = TRUE) / n()
    ),
    `Diabetes, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(diabetes == 1, na.rm = TRUE),
      100 * sum(diabetes == 1, na.rm = TRUE) / n()
    ),
    `Stroke, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(stroke == 1, na.rm = TRUE),
      100 * sum(stroke == 1, na.rm = TRUE) / n()
    ),
    `Abnormal heart rhythm, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
      100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()
    ),
    `Heart failure, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(heart_failure == 1, na.rm = TRUE),
      100 * sum(heart_failure == 1, na.rm = TRUE) / n()
    ),
    `Baseline Alzheimer’s, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(baseline_alzheimers == 1, na.rm = TRUE),
      100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()
    ),
    `Baseline dementia, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(baseline_dementia == 1, na.rm = TRUE),
      100 * sum(baseline_dementia == 1, na.rm = TRUE) / n()
    )
  )

w5_table1_summary_wide <- w5_table1_summary %>%
  mutate(N = as.character(N)) %>%
  pivot_longer(
    cols = -pa_level,
    names_to = "Characteristic",
    values_to = "Value"
  ) %>%
  pivot_wider(
    names_from = pa_level,
    values_from = Value
  ) %>%
  select(
    Characteristic,
    High,
    Moderate,
    Low,
    Sedentary,
    `Missing PA`
  )

knitr::kable(
  w5_table1_summary_wide,
  caption = "Table 1. Baseline characteristics at wave 5 by physical activity category."
)
Table 1. Baseline characteristics at wave 5 by physical activity category.
Characteristic High Moderate Low Sedentary Missing PA
N 1981 4936 2432 801 124
Age, mean (SD) 62.9 (7.8) 65.5 (8.9) 68.9 (10.0) 73.0 (10.2) 60.7 (5.5)
Female, n (%) 934 (47.1%) 2736 (55.4%) 1507 (62.0%) 472 (58.9%) 56 (45.2%)
Hypertension, n (%) 141 (7.1%) 335 (6.8%) 202 (8.3%) 90 (11.2%) 8 (6.5%)
Diabetes, n (%) 33 (1.7%) 112 (2.3%) 93 (3.8%) 49 (6.1%) 1 (0.8%)
Stroke, n (%) 10 (0.5%) 32 (0.6%) 43 (1.8%) 64 (8.0%) 0 (0.0%)
Abnormal heart rhythm, n (%) 25 (1.3%) 99 (2.0%) 78 (3.2%) 52 (6.5%) 2 (1.6%)
Heart failure, n (%) 1 (0.1%) 4 (0.1%) 8 (0.3%) 11 (1.4%) 0 (0.0%)
Baseline Alzheimer’s, n (%) 0 (0.0%) 7 (0.1%) 6 (0.2%) 33 (4.1%) 0 (0.0%)
Baseline dementia, n (%) 0 (0.0%) 11 (0.2%) 26 (1.1%) 73 (9.1%) 0 (0.0%)

wave 6 table 1

w6_table1_raw <- w6_core %>%
  mutate(
    hypertension = if_else(hediabp == 1, 1, 0, missing = 0),
    heart_failure = if_else(hediahf == 1, 1, 0, missing = 0),
    abnormal_heart_rhythm = if_else(hediaar == 1, 1, 0, missing = 0),
    diabetes = if_else(hediadi == 1, 1, 0, missing = 0),
    stroke = if_else(hediast == 1, 1, 0, missing = 0),

    baseline_alzheimers = if_else(hedibad == 1, 1, 0, missing = 0),
    baseline_dementia   = if_else(hedibde == 1, 1, 0, missing = 0),

    pa_level = case_when(
      HeActa %in% c(1, 2) ~ "High",
      !HeActa %in% c(1, 2) & HeActb %in% c(1, 2) ~ "Moderate",
      !HeActa %in% c(1, 2) & !HeActb %in% c(1, 2) & HeActc %in% c(1, 2) ~ "Low",
      HeActa %in% c(3, 4) & HeActb %in% c(3, 4) & HeActc %in% c(3, 4) ~ "Sedentary",
      TRUE ~ NA_character_
    )
  )

# ORIGINAL VERSION OF W6 AGE HANDLING (kept as methodological note)
# age = indager
# This is not used in final summaries because negative indager values are special missing codes.

# wave 6 table 1 CORRECTED VERSION
w6_table1 <- w6_table1_raw %>%
  transmute(
    idauniq,
    pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
    age = if_else(indager < 0, NA_real_, as.numeric(indager)),
    sex = as_factor(DhSex),
    hypertension,
    heart_failure,
    abnormal_heart_rhythm,
    diabetes,
    stroke,
    baseline_alzheimers,
    baseline_dementia
  )

# diagnostic age check
w6_table1 %>%
  group_by(pa_level) %>%
  summarise(
    min_age = min(age, na.rm = TRUE),
    q1_age = quantile(age, 0.25, na.rm = TRUE),
    median_age = median(age, na.rm = TRUE),
    mean_age = mean(age, na.rm = TRUE),
    q3_age = quantile(age, 0.75, na.rm = TRUE),
    max_age = max(age, na.rm = TRUE),
    sd_age = sd(age, na.rm = TRUE),
    n_missing_age = sum(is.na(age))
  )
## # A tibble: 5 × 9
##   pa_level  min_age q1_age median_age mean_age q3_age max_age sd_age
##   <fct>       <dbl>  <dbl>      <dbl>    <dbl>  <dbl>   <dbl>  <dbl>
## 1 High           28   57           63     63.3   69        89   8.44
## 2 Moderate       31   59           65     65.8   73        89   9.35
## 3 Low            41   61           69     69.4   78        89  10.5 
## 4 Sedentary      40   62           72     71.1   80        89  10.8 
## 5 <NA>           63   66.8         71     73     77.2      87  10.4 
## # ℹ 1 more variable: n_missing_age <int>
w6_table1_summary <- w6_table1 %>%
  mutate(
    pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
  ) %>%
  group_by(pa_level, .drop = FALSE) %>%
  summarise(
    N = n(),
    `Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
    `Female, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(sex == "Female", na.rm = TRUE),
      100 * sum(sex == "Female", na.rm = TRUE) / n()
    ),
    `Hypertension, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(hypertension == 1, na.rm = TRUE),
      100 * sum(hypertension == 1, na.rm = TRUE) / n()
    ),
    `Diabetes, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(diabetes == 1, na.rm = TRUE),
      100 * sum(diabetes == 1, na.rm = TRUE) / n()
    ),
    `Stroke, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(stroke == 1, na.rm = TRUE),
      100 * sum(stroke == 1, na.rm = TRUE) / n()
    ),
    `Abnormal heart rhythm, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
      100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()
    ),
    `Heart failure, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(heart_failure == 1, na.rm = TRUE),
      100 * sum(heart_failure == 1, na.rm = TRUE) / n()
    ),
    `Baseline Alzheimer’s, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(baseline_alzheimers == 1, na.rm = TRUE),
      100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()
    ),
    `Baseline dementia, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(baseline_dementia == 1, na.rm = TRUE),
      100 * sum(baseline_dementia == 1, na.rm = TRUE) / n()
    )
  )

w6_table1_summary_wide <- w6_table1_summary %>%
  mutate(N = as.character(N)) %>%
  pivot_longer(
    cols = -pa_level,
    names_to = "Characteristic",
    values_to = "Value"
  ) %>%
  pivot_wider(
    names_from = pa_level,
    values_from = Value
  ) %>%
  select(
    Characteristic,
    High,
    Moderate,
    Low,
    Sedentary,
    `Missing PA`
  )

knitr::kable(
  w6_table1_summary_wide,
  caption = "Table 1. Baseline characteristics at wave 6 by physical activity category."
)
Table 1. Baseline characteristics at wave 6 by physical activity category.
Characteristic High Moderate Low Sedentary Missing PA
N 3156 4810 1626 1005 4
Age, mean (SD) 63.3 (8.4) 65.8 (9.4) 69.4 (10.5) 71.1 (10.8) 73.0 (10.4)
Female, n (%) 1526 (48.4%) 2713 (56.4%) 1087 (66.9%) 527 (52.4%) 4 (100.0%)
Hypertension, n (%) 217 (6.9%) 350 (7.3%) 117 (7.2%) 138 (13.7%) 0 (0.0%)
Diabetes, n (%) 57 (1.8%) 130 (2.7%) 50 (3.1%) 79 (7.9%) 0 (0.0%)
Stroke, n (%) 10 (0.3%) 47 (1.0%) 44 (2.7%) 54 (5.4%) 0 (0.0%)
Abnormal heart rhythm, n (%) 69 (2.2%) 107 (2.2%) 72 (4.4%) 38 (3.8%) 0 (0.0%)
Heart failure, n (%) 2 (0.1%) 6 (0.1%) 4 (0.2%) 13 (1.3%) 0 (0.0%)
Baseline Alzheimer’s, n (%) 2 (0.1%) 7 (0.1%) 4 (0.2%) 37 (3.7%) 0 (0.0%)
Baseline dementia, n (%) 2 (0.1%) 15 (0.3%) 18 (1.1%) 77 (7.7%) 0 (0.0%)

wave 7 table 1

## Wave 7 table 1

w7_table1_raw <- w7_core %>%
  mutate(
    hypertension = if_else(hediabp == 1, 1, 0, missing = 0),
    heart_failure = if_else(hediahf == 1, 1, 0, missing = 0),
    abnormal_heart_rhythm = if_else(hediaar == 1, 1, 0, missing = 0),
    diabetes = if_else(hediadi == 1, 1, 0, missing = 0),
    stroke = if_else(hediast == 1, 1, 0, missing = 0),

    baseline_alzheimers = if_else(hedibad == 1, 1, 0, missing = 0),
    baseline_dementia   = if_else(hedibde == 1, 1, 0, missing = 0),

    pa_level = case_when(
      HeActa %in% c(1, 2) ~ "High",
      !HeActa %in% c(1, 2) & HeActb %in% c(1, 2) ~ "Moderate",
      !HeActa %in% c(1, 2) & !HeActb %in% c(1, 2) & HeActc %in% c(1, 2) ~ "Low",
      HeActa %in% c(3, 4) & HeActb %in% c(3, 4) & HeActc %in% c(3, 4) ~ "Sedentary",
      TRUE ~ NA_character_
    )
  )

# ORIGINAL VERSION OF W7 AGE HANDLING (kept as methodological note)
# age = indager
# This is not used in final summaries because negative indager values are special missing codes.

# Wave 7 corrected version
w7_table1 <- w7_table1_raw %>%
  transmute(
    idauniq,
    pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
    age = if_else(indager < 0, NA_real_, as.numeric(indager)),
    sex = as_factor(DhSex),
    hypertension,
    heart_failure,
    abnormal_heart_rhythm,
    diabetes,
    stroke,
    baseline_alzheimers,
    baseline_dementia
  )

# diagnostic age check
w7_table1 %>%
  group_by(pa_level) %>%
  summarise(
    min_age = min(age, na.rm = TRUE),
    q1_age = quantile(age, 0.25, na.rm = TRUE),
    median_age = median(age, na.rm = TRUE),
    mean_age = mean(age, na.rm = TRUE),
    q3_age = quantile(age, 0.75, na.rm = TRUE),
    max_age = max(age, na.rm = TRUE),
    sd_age = sd(age, na.rm = TRUE),
    n_missing_age = sum(is.na(age))
  )
## # A tibble: 5 × 9
##   pa_level  min_age q1_age median_age mean_age q3_age max_age sd_age
##   <fct>       <dbl>  <dbl>      <dbl>    <dbl>  <dbl>   <dbl>  <dbl>
## 1 High           29   58           64     63.9     69      89   8.68
## 2 Moderate       33   60           66     66.7     73      89   9.21
## 3 Low            39   62           70     70.1     78      89  10.3 
## 4 Sedentary      38   64           74     72.3     81      89  10.6 
## 5 <NA>           63   68.5         74     73       78      82   9.54
## # ℹ 1 more variable: n_missing_age <int>
w7_table1_summary <- w7_table1 %>%
  mutate(
    pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
  ) %>%
  group_by(pa_level, .drop = FALSE) %>%
  summarise(
    N = n(),
    `Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
    `Female, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(sex == "Female", na.rm = TRUE),
      100 * sum(sex == "Female", na.rm = TRUE) / n()
    ),
    `Hypertension, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(hypertension == 1, na.rm = TRUE),
      100 * sum(hypertension == 1, na.rm = TRUE) / n()
    ),
    `Diabetes, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(diabetes == 1, na.rm = TRUE),
      100 * sum(diabetes == 1, na.rm = TRUE) / n()
    ),
    `Stroke, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(stroke == 1, na.rm = TRUE),
      100 * sum(stroke == 1, na.rm = TRUE) / n()
    ),
    `Abnormal heart rhythm, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
      100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()
    ),
    `Heart failure, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(heart_failure == 1, na.rm = TRUE),
      100 * sum(heart_failure == 1, na.rm = TRUE) / n()
    ),
    `Baseline Alzheimer’s, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(baseline_alzheimers == 1, na.rm = TRUE),
      100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()
    ),
    `Baseline dementia, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(baseline_dementia == 1, na.rm = TRUE),
      100 * sum(baseline_dementia == 1, na.rm = TRUE) / n()
    )
  )

w7_table1_summary_wide <- w7_table1_summary %>%
  mutate(N = as.character(N)) %>%
  pivot_longer(
    cols = -pa_level,
    names_to = "Characteristic",
    values_to = "Value"
  ) %>%
  pivot_wider(
    names_from = pa_level,
    values_from = Value
  ) %>%
  select(
    Characteristic,
    High,
    Moderate,
    Low,
    Sedentary,
    `Missing PA`
  )

knitr::kable(
  w7_table1_summary_wide,
  caption = "Table 1. Baseline characteristics at wave 7 by physical activity category."
)
Table 1. Baseline characteristics at wave 7 by physical activity category.
Characteristic High Moderate Low Sedentary Missing PA
N 2848 4426 1483 906 3
Age, mean (SD) 63.9 (8.7) 66.7 (9.2) 70.1 (10.3) 72.3 (10.6) 73.0 (9.5)
Female, n (%) 1400 (49.2%) 2529 (57.1%) 963 (64.9%) 474 (52.3%) 2 (66.7%)
Hypertension, n (%) 141 (5.0%) 291 (6.6%) 111 (7.5%) 111 (12.3%) 0 (0.0%)
Diabetes, n (%) 45 (1.6%) 110 (2.5%) 60 (4.0%) 64 (7.1%) 0 (0.0%)
Stroke, n (%) 13 (0.5%) 39 (0.9%) 35 (2.4%) 49 (5.4%) 1 (33.3%)
Abnormal heart rhythm, n (%) 55 (1.9%) 116 (2.6%) 54 (3.6%) 49 (5.4%) 0 (0.0%)
Heart failure, n (%) 4 (0.1%) 6 (0.1%) 11 (0.7%) 12 (1.3%) 0 (0.0%)
Baseline Alzheimer’s, n (%) 0 (0.0%) 7 (0.2%) 7 (0.5%) 29 (3.2%) 0 (0.0%)
Baseline dementia, n (%) 3 (0.1%) 22 (0.5%) 13 (0.9%) 76 (8.4%) 0 (0.0%)

wave 8 table 1

w8_table1_raw <- w8_core %>%
  mutate(
    hypertension = if_else(hediabp == 1, 1, 0, missing = 0),
    heart_failure = if_else(hediahf == 1, 1, 0, missing = 0),
    abnormal_heart_rhythm = if_else(hediaar == 1, 1, 0, missing = 0),
    diabetes = if_else(hediadi == 1, 1, 0, missing = 0),
    stroke = if_else(hediast == 1, 1, 0, missing = 0),

    baseline_alzheimers = if_else(hedibad == 1, 1, 0, missing = 0),
    baseline_dementia   = if_else(hedibde == 1, 1, 0, missing = 0),

    pa_level = case_when(
      heacta %in% c(1, 2) ~ "High",
      !heacta %in% c(1, 2) & heactb %in% c(1, 2) ~ "Moderate",
      !heacta %in% c(1, 2) & !heactb %in% c(1, 2) & heactc %in% c(1, 2) ~ "Low",
      heacta %in% c(3, 4) & heactb %in% c(3, 4) & heactc %in% c(3, 4) ~ "Sedentary",
      TRUE ~ NA_character_
    )
  )

# ORIGINAL VERSION OF W8 AGE HANDLING (kept as methodological note)
# age = indager
# This is not used in final summaries because negative indager values are special missing codes.

# Wave 8 corrected version
w8_table1 <- w8_table1_raw %>%
  transmute(
    idauniq,
    pa_level = factor(pa_level, levels = c("High", "Moderate", "Low", "Sedentary")),
    age = if_else(indager < 0, NA_real_, as.numeric(indager)),
    sex = as_factor(indsex),
    hypertension,
    heart_failure,
    abnormal_heart_rhythm,
    diabetes,
    stroke,
    baseline_alzheimers,
    baseline_dementia
  )

# diagnostic age check
w8_table1 %>%
  group_by(pa_level) %>%
  summarise(
    min_age = min(age, na.rm = TRUE),
    q1_age = quantile(age, 0.25, na.rm = TRUE),
    median_age = median(age, na.rm = TRUE),
    mean_age = mean(age, na.rm = TRUE),
    q3_age = quantile(age, 0.75, na.rm = TRUE),
    max_age = max(age, na.rm = TRUE),
    sd_age = sd(age, na.rm = TRUE),
    n_missing_age = sum(is.na(age))
  )
## # A tibble: 5 × 9
##   pa_level  min_age q1_age median_age mean_age q3_age max_age sd_age
##   <fct>       <dbl>  <dbl>      <dbl>    <dbl>  <dbl>   <dbl>  <dbl>
## 1 High           31   60           65     65.5   71        89   8.14
## 2 Moderate       34   62           68     68.4   75        89   8.84
## 3 Low            40   64           71     71.3   80        89   9.74
## 4 Sedentary      40   66           74     73.4   81        89   9.61
## 5 <NA>           66   67.5         69     70.3   72.5      76   5.13
## # ℹ 1 more variable: n_missing_age <int>
w8_table1_summary <- w8_table1 %>%
  mutate(
    pa_level = forcats::fct_explicit_na(pa_level, na_level = "Missing PA")
  ) %>%
  group_by(pa_level, .drop = FALSE) %>%
  summarise(
    N = n(),
    `Age, mean (SD)` = sprintf("%.1f (%.1f)", mean(age, na.rm = TRUE), sd(age, na.rm = TRUE)),
    `Female, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(sex == "Female", na.rm = TRUE),
      100 * sum(sex == "Female", na.rm = TRUE) / n()
    ),
    `Hypertension, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(hypertension == 1, na.rm = TRUE),
      100 * sum(hypertension == 1, na.rm = TRUE) / n()
    ),
    `Diabetes, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(diabetes == 1, na.rm = TRUE),
      100 * sum(diabetes == 1, na.rm = TRUE) / n()
    ),
    `Stroke, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(stroke == 1, na.rm = TRUE),
      100 * sum(stroke == 1, na.rm = TRUE) / n()
    ),
    `Abnormal heart rhythm, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(abnormal_heart_rhythm == 1, na.rm = TRUE),
      100 * sum(abnormal_heart_rhythm == 1, na.rm = TRUE) / n()
    ),
    `Heart failure, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(heart_failure == 1, na.rm = TRUE),
      100 * sum(heart_failure == 1, na.rm = TRUE) / n()
    ),
    `Baseline Alzheimer’s, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(baseline_alzheimers == 1, na.rm = TRUE),
      100 * sum(baseline_alzheimers == 1, na.rm = TRUE) / n()
    ),
    `Baseline dementia, n (%)` = sprintf(
      "%d (%.1f%%)",
      sum(baseline_dementia == 1, na.rm = TRUE),
      100 * sum(baseline_dementia == 1, na.rm = TRUE) / n()
    )
  )

w8_table1_summary_wide <- w8_table1_summary %>%
  mutate(N = as.character(N)) %>%
  pivot_longer(
    cols = -pa_level,
    names_to = "Characteristic",
    values_to = "Value"
  ) %>%
  pivot_wider(
    names_from = pa_level,
    values_from = Value
  ) %>%
  select(
    Characteristic,
    High,
    Moderate,
    Low,
    Sedentary,
    `Missing PA`
  )

knitr::kable(
  w8_table1_summary_wide,
  caption = "Table 1. Baseline characteristics at wave 8 by physical activity category."
)
Table 1. Baseline characteristics at wave 8 by physical activity category.
Characteristic High Moderate Low Sedentary Missing PA
N 2443 3932 1281 786 3
Age, mean (SD) 65.5 (8.1) 68.4 (8.8) 71.3 (9.7) 73.4 (9.6) 70.3 (5.1)
Female, n (%) 1204 (49.3%) 2253 (57.3%) 848 (66.2%) 390 (49.6%) 0 (0.0%)
Hypertension, n (%) 111 (4.5%) 204 (5.2%) 92 (7.2%) 78 (9.9%) 0 (0.0%)
Diabetes, n (%) 35 (1.4%) 73 (1.9%) 43 (3.4%) 48 (6.1%) 0 (0.0%)
Stroke, n (%) 14 (0.6%) 42 (1.1%) 30 (2.3%) 62 (7.9%) 0 (0.0%)
Abnormal heart rhythm, n (%) 49 (2.0%) 99 (2.5%) 49 (3.8%) 50 (6.4%) 0 (0.0%)
Heart failure, n (%) 4 (0.2%) 14 (0.4%) 7 (0.5%) 18 (2.3%) 0 (0.0%)
Baseline Alzheimer’s, n (%) 2 (0.1%) 12 (0.3%) 12 (0.9%) 29 (3.7%) 0 (0.0%)
Baseline dementia, n (%) 7 (0.3%) 18 (0.5%) 25 (2.0%) 76 (9.7%) 0 (0.0%)

Wave summary numbers

### Total participant numbers by wave

wave_total_participant_numbers <- tibble(
  wave = c("Wave 1", "Wave 2", "Wave 3", "Wave 4", "Wave 5", "Wave 6", "Wave 7", "Wave 8", "Wave 9"),
  total_n = c(
    nrow(w1),
    nrow(w2_core),
    nrow(w3_core),
    nrow(w4_core),
    nrow(w5_core),
    nrow(w6_core),
    nrow(w7_core),
    nrow(w8_core),
    nrow(w9_core)
  )
)

knitr::kable(
  wave_total_participant_numbers,
  caption = "Total participant numbers by wave."
)
Total participant numbers by wave.
wave total_n
Wave 1 12099
Wave 2 9432
Wave 3 9771
Wave 4 11050
Wave 5 10274
Wave 6 10601
Wave 7 9666
Wave 8 8445
Wave 9 8736
### Diagnosis summary numbers by wave

# Wave 1 diagnosis summary numbers
wave1_diagnosis_summary_numbers <- w1 %>%
  summarise(
    wave = "Wave 1",
    alzheimers_n = sum(alz_w1 == 1, na.rm = TRUE),
    dementia_n = sum(dementia_w1 == 1, na.rm = TRUE),
    both_n = sum(alz_w1 == 1 & dementia_w1 == 1, na.rm = TRUE),
    either_n = sum(alz_w1 == 1 | dementia_w1 == 1, na.rm = TRUE)
  )

# Wave 2 diagnosis summary numbers
wave2_diagnosis_summary_numbers <- w2_core %>%
  summarise(
    wave = "Wave 2",
    alzheimers_n = sum(hedib01 == 8 | hedib02 == 8 | hedib03 == 8 | hedib04 == 8, na.rm = TRUE),
    dementia_n   = sum(hedib01 == 9 | hedib02 == 9 | hedib03 == 9 | hedib04 == 9, na.rm = TRUE),
    both_n = sum(
      (hedib01 %in% c(8, 9)) +
      (hedib02 %in% c(8, 9)) +
      (hedib03 %in% c(8, 9)) +
      (hedib04 %in% c(8, 9)) > 1,
      na.rm = TRUE
    ),
    either_n = sum(
      hedib01 %in% c(8, 9) | hedib02 %in% c(8, 9) |
      hedib03 %in% c(8, 9) | hedib04 %in% c(8, 9),
      na.rm = TRUE
    )
  )

# Wave 3 diagnosis summary numbers
wave3_diagnosis_summary_numbers <- w3_core %>%
  summarise(
    wave = "Wave 3",
    alzheimers_n = sum(hedibad %in% c(1, 2, 3), na.rm = TRUE),
    dementia_n   = sum(hedibde %in% c(1, 2, 3), na.rm = TRUE),
    both_n       = sum(hedibad %in% c(1, 2, 3) & hedibde %in% c(1, 2, 3), na.rm = TRUE),
    either_n     = sum(hedibad %in% c(1, 2, 3) | hedibde %in% c(1, 2, 3), na.rm = TRUE)
  )

# Waves 4 to 9 diagnosis summary numbers
wave4_diagnosis_summary_numbers <- w4_core %>%
  summarise(
    wave = "Wave 4",
    alzheimers_n = sum(hedibad == 1, na.rm = TRUE),
    dementia_n   = sum(hedibde == 1, na.rm = TRUE),
    both_n       = sum(hedibad == 1 & hedibde == 1, na.rm = TRUE),
    either_n     = sum(hedibad == 1 | hedibde == 1, na.rm = TRUE)
  )

wave5_diagnosis_summary_numbers <- w5_core %>%
  summarise(
    wave = "Wave 5",
    alzheimers_n = sum(hedibad == 1, na.rm = TRUE),
    dementia_n   = sum(hedibde == 1, na.rm = TRUE),
    both_n       = sum(hedibad == 1 & hedibde == 1, na.rm = TRUE),
    either_n     = sum(hedibad == 1 | hedibde == 1, na.rm = TRUE)
  )

wave6_diagnosis_summary_numbers <- w6_core %>%
  summarise(
    wave = "Wave 6",
    alzheimers_n = sum(hedibad == 1, na.rm = TRUE),
    dementia_n   = sum(hedibde == 1, na.rm = TRUE),
    both_n       = sum(hedibad == 1 & hedibde == 1, na.rm = TRUE),
    either_n     = sum(hedibad == 1 | hedibde == 1, na.rm = TRUE)
  )

wave7_diagnosis_summary_numbers <- w7_core %>%
  summarise(
    wave = "Wave 7",
    alzheimers_n = sum(hedibad == 1, na.rm = TRUE),
    dementia_n   = sum(hedibde == 1, na.rm = TRUE),
    both_n       = sum(hedibad == 1 & hedibde == 1, na.rm = TRUE),
    either_n     = sum(hedibad == 1 | hedibde == 1, na.rm = TRUE)
  )

wave8_diagnosis_summary_numbers <- w8_core %>%
  summarise(
    wave = "Wave 8",
    alzheimers_n = sum(hedibad == 1, na.rm = TRUE),
    dementia_n   = sum(hedibde == 1, na.rm = TRUE),
    both_n       = sum(hedibad == 1 & hedibde == 1, na.rm = TRUE),
    either_n     = sum(hedibad == 1 | hedibde == 1, na.rm = TRUE)
  )

wave9_diagnosis_summary_numbers <- w9_core %>%
  summarise(
    wave = "Wave 9",
    alzheimers_n = sum(hedibad == 1, na.rm = TRUE),
    dementia_n   = sum(hedibde == 1, na.rm = TRUE),
    both_n       = sum(hedibad == 1 & hedibde == 1, na.rm = TRUE),
    either_n     = sum(hedibad == 1 | hedibde == 1, na.rm = TRUE)
  )

wave_diagnosis_summary_numbers <- bind_rows(
  wave1_diagnosis_summary_numbers,
  wave2_diagnosis_summary_numbers,
  wave3_diagnosis_summary_numbers,
  wave4_diagnosis_summary_numbers,
  wave5_diagnosis_summary_numbers,
  wave6_diagnosis_summary_numbers,
  wave7_diagnosis_summary_numbers,
  wave8_diagnosis_summary_numbers,
  wave9_diagnosis_summary_numbers
)

knitr::kable(
  wave_diagnosis_summary_numbers,
  caption = "Diagnosis summary numbers by wave."
)
Diagnosis summary numbers by wave.
wave alzheimers_n dementia_n both_n either_n
Wave 1 14 64 3 75
Wave 2 11 43 2 52
Wave 3 32 112 11 133
Wave 4 37 97 13 121
Wave 5 46 110 19 137
Wave 6 50 112 17 145
Wave 7 43 114 19 138
Wave 8 55 126 23 158
Wave 9 47 127 21 153
### Missing physical activity summary numbers by wave

# Wave 1 missing PA
wave1_missing_pa_summary_numbers <- table1_w1_clean %>%
  summarise(
    wave = "Wave 1",
    missing_pa_n = sum(is.na(pa_level))
  )

# Wave 2 missing PA
w2_derived <- read_dta("raw data/RAW_data_stata/wave_2_derived_variables.dta")
w2 <- left_join(w2_core, w2_derived, by = "idauniq")

wave2_missing_pa_summary_numbers <- w2 %>%
  mutate(pa_level = if_else(palevel %in% c(0, 1, 2, 3), palevel, NA_real_)) %>%
  summarise(
    wave = "Wave 2",
    missing_pa_n = sum(is.na(pa_level))
  )
#waves 3 to 9 MISSING PA FOR NOW -> 'NA' AS PLACEHOLDER AS HAVEN'T STANDARDISED PA VARIABLES FOR WAVES 3 - 9 AS YET
# Placeholder for waves 3 to 9
wave3_missing_pa_summary_numbers <- tibble(wave = "Wave 3", missing_pa_n = NA_integer_)
wave4_missing_pa_summary_numbers <- tibble(wave = "Wave 4", missing_pa_n = NA_integer_)
wave5_missing_pa_summary_numbers <- tibble(wave = "Wave 5", missing_pa_n = NA_integer_)
wave6_missing_pa_summary_numbers <- tibble(wave = "Wave 6", missing_pa_n = NA_integer_)
wave7_missing_pa_summary_numbers <- tibble(wave = "Wave 7", missing_pa_n = NA_integer_)
wave8_missing_pa_summary_numbers <- tibble(wave = "Wave 8", missing_pa_n = NA_integer_)
wave9_missing_pa_summary_numbers <- tibble(wave = "Wave 9", missing_pa_n = NA_integer_)

wave_missing_pa_summary_numbers <- bind_rows(
  wave1_missing_pa_summary_numbers,
  wave2_missing_pa_summary_numbers,
  wave3_missing_pa_summary_numbers,
  wave4_missing_pa_summary_numbers,
  wave5_missing_pa_summary_numbers,
  wave6_missing_pa_summary_numbers,
  wave7_missing_pa_summary_numbers,
  wave8_missing_pa_summary_numbers,
  wave9_missing_pa_summary_numbers
)

knitr::kable(
  wave_missing_pa_summary_numbers,
  caption = "Missing physical activity summary numbers by wave."
)
Missing physical activity summary numbers by wave.
wave missing_pa_n
Wave 1 194
Wave 2 139
Wave 3 NA
Wave 4 NA
Wave 5 NA
Wave 6 NA
Wave 7 NA
Wave 8 NA
Wave 9 NA
### Combined wave summary numbers

wave_summary_numbers_table <- wave_total_participant_numbers %>%
  left_join(wave_diagnosis_summary_numbers, by = "wave") %>%
  left_join(wave_missing_pa_summary_numbers, by = "wave") %>%
  mutate(
    final_analysis_n = case_when(
      wave == "Wave 1" ~ nrow(analysis_w1),
      TRUE ~ NA_integer_
    )
  )

knitr::kable(
  wave_summary_numbers_table,
  caption = "Combined wave summary numbers."
)
Combined wave summary numbers.
wave total_n alzheimers_n dementia_n both_n either_n missing_pa_n final_analysis_n
Wave 1 12099 14 64 3 75 194 11855
Wave 2 9432 11 43 2 52 139 NA
Wave 3 9771 32 112 11 133 NA NA
Wave 4 11050 37 97 13 121 NA NA
Wave 5 10274 46 110 19 137 NA NA
Wave 6 10601 50 112 17 145 NA NA
Wave 7 9666 43 114 19 138 NA NA
Wave 8 8445 55 126 23 158 NA NA
Wave 9 8736 47 127 21 153 NA NA

Structure of the portfolio

Abstract

Provide the reader with a succinct summary of your work

Introduction

Provide an introduction to you portfolio to reader.

Method

covering data access requirements, ethics, metadata and all methodological aspects of your project

Results

Use this section to showcase the results of your data manipulation that will contribute to the project

###Table 1 shows the baseline characteristics at wave 1, organised around physical activity (PA) categories: high, moderate, low and sedentary. As mean age increases the proportion of participants in low and sedentary PA categories increases. Furthermore the low and sedentary categories have higher prevalence of comorbidities that include hypertension, diabetes, stroke, abnormal heart rhythms, and heart failure.

Conclusion

Summaries your findings,discuss them in the context of other similar work or questions and suggestions for future work. Conclude your portfolio with what started your data exploration and what have the data contributed in the decisions for patient care or health service delivery.

In text elements

##Some examples of having in-text elements as you develop your portfolio are provided here.

#* “# Headings” #* “## Subheading 1” #* “### subheading 2” #* “#### subheading 3”

Notice box

#::: {.infobox .caution data-latex=“{caution}”}

#The format to add boxes to your portfolio #:::

Tables

In-text table

#A template format to add a table into #the document you can use the following md code structure.

#|Data table | Coverage |Area | #|————–|————–|————————-| #|Health survey | 2015 |Self-reported outcomes | #|EHR | 2000 onwards |Electronic health records|

Data tables

defining a dataframe

CREL <- data.frame(
  Data = c("Health survey","EHR"), 
  Coverage=c(
    "2015","2000 onwards"
  ),
  Area=c(
    "Self-reported outcomes",
    "Electronic Health Records"
  ))

kable(CREL)
Data Coverage Area
Health survey 2015 Self-reported outcomes
EHR 2000 onwards Electronic Health Records

Interactive data elements

DT::datatable(
  CREL,
  extensions = 'Buttons',
  options = list(
              paging = TRUE,
              searching = TRUE,
              fixedColumns = TRUE,
              autoWidth = TRUE,
              ordering = TRUE,
              dom = 'tB',
              buttons = c('copy', 'excel')
              ),
              class = "display"
)