df$sex <- df$n622
df$sc_prenatal <- df$n492
df$smoking_preg <- df$n503
df$birth_weight <- df$n646

df$physical_activity_age7 <- df$n121
df$mon_read_child <- df$n179
df$dad_read_child <- df$n180
df$mon_read_book <- df$n186
df$mon_read_news <- df$n184
df$dad_read_book <- df$n187
df$dad_read_news <- df$n185
df$sc_age7 <- df$n190
df$breast_f <- df$n222
df$oral_ability_age7 <- df$n65
df$awareness_age7 <- df$n67
df$reading_ability_age7 <- df$n68
df$creativity_ability_age7 <- df$n69
df$numbers_ability_age7 <- df$n70
df$bsag_age7 <- df$n455
df$arithmetic_age7 <- df$n90
df$copying_age7 <- df$n457
df$draw_man_age7 <- df$n1840
df$reading_test_age7 <- df$n92

df$mon_walk_child_age11 <- df$n1145
df$dad_walk_child_age11 <- df$n1146
df$sc_age11 <- df$n1687
df$knowledge_age11 <- df$n876
df$numbers_ability_age11 <- df$n877
df$child_use_book_age11 <- df$n878
df$oral_ability_age11 <- df$n879
df$bsag_age11 <- df$n1008
df$verbal_ability_age11 <- df$n914
df$nonverbal_ability_age11 <- df$n917
df$general_ability_age11 <- df$n920
df$reading_ability_age11 <- df$n923
df$maths_ability_age11 <- df$n926
df$copying_age11 <- df$n929
df$child_bor_books_lib_age11 <- df$n931
df$child_read_books_age11 <- df$n935
df$child_read_news_age11 <- df$n936
df$physical_activity_age11 <- df$n941
df$child_writes_stories_age11 <- df$n942
df$child_draws_age11 <- df$n943
df$tv_age11 <- df$n949

df$sc_age16 <- df$n2384
df$iq_ag16 <- df$n1897
df$maths_ability_age16 <-df$n2244
df$english_ability_age16 <- df$n2245
df$language_age16 <- df$n2246
df$science_age16 <- df$n2247
df$practical_ability_age16 <- df$n2248
df$social_std_ability_age16 <- df$n2249
df$school_attendance_age16 <- df$n1722
df$child_read_age16 <- df$n2864
df$play_outdoor_age16 <- df$n2865
df$play_indoor_age16 <- df$n2867
df$swims_age16 <- df$n2866
df$tv_age16 <- df$n2868
df$smoking_age16 <- df$n2887
df$alcohol_age16 <- df$n2888
df$reading_test_age16 <- df$n2928
df$maths_test_age16 <- df$n2930
df$height_age16 <- df$dvht11
df$weight_age16 <- df$dvwt11

df$tv_age23 <- df$n5913_sweep4
df$read_book_age23 <- df$n5914_sweep4
df$read_news_age23 <- df$n5956_sweep4
df$physical_activity_age23 <- df$n5916_sweep4
df$alcohol_age23 <- df$n5920_sweep4
df$qual_age23 <- df$maxqual2_sweep4
df$smoking_age23 <- df$currentn_sweep4
df$income_age23 <- df$famnet_sweep4
df$job_time <- df$fullpart_sweep4
df$malaise_age23 <- df$malaise_sweep4

df$main_activity_age33 <- df$n500520_sweep5
df$sc_age33 <- df$n540056_sweep5
df$smoking_1_age33 <- df$n504262_sweep5
df$smoking_2_age33 <- df$n504265_sweep5
df$alcohol_age33 <- df$n504273_sweep5
df$occ_pa_age33 <- df$n504361_sweep5
df$physical_activity_1_age33 <- df$n504362_sweep5
df$physical_activity_2_age33 <- df$n504363_sweep5
df$qual_age33 <- df$hqual33_sweep5

df$occ_pa_age42 <- df$jdemand2_sweep6
df$smoking_age42 <- df$smoking_sweep6
df$alcohol_age42 <-df$drinks_sweep6
df$physical_activity_1_age42 <- df$exercise_sweep6
df$physical_activity_2_age42 <- df$breathls_sweep6
df$physical_activity_3_age42 <- df$sweat_sweep6
df$sc_age42 <- df$sc_sweep6

df$marital_age46 <- df$nd7ms_sweep7
df$income_age46 <- df$nd7iamtc_sweep7
df$main_activity_age46 <- df$nd7ecact_sweep7
df$qual_age46 <- df$nd7achq1_sweep7
df$computer_home_age46 <- df$n7pchome_sweep7
df$computer_home_freq_age46 <- df$n7hpcuse_sweep7
df$computer_work_age46 <- df$n7pcwork_sweep7
df$computer_work_freq_age46 <- df$n7wpcuse_sweep7
df$internet_leisure <- df$n7intacc_sweep7
df$read_news_age46 <- df$n7pread1_sweep7
df$read_book_age46 <- df$n7pread2_sweep7
df$diseases_age46 <- df$n7lsiany_sweep7
df$smoking_age46 <- df$nd7smoke_sweep7
df$alcohol_age46 <- df$n7drinks_sweep7
df$physical_activity_1_46 <- df$n7exers1_sweep7
df$physical_activity_2_46 <- df$n7breals_sweep7
df$physical_activity_3_46 <- df$n7sweat_sweep7
df$transport_age46 <- df$n7tranrt_sweep7

df$computer_home_age50 <- df$N8PCHOME_sweep8
df$computer_home_freq_age50 <- df$N8HPCUSE_sweep8
df$computer_work_age50 <- df$N8PCWORK_sweep8
df$computer_work_freq_age50 <- df$N8WPCUSE_sweep8
df$general_health_age50 <- df$N8HLTHGN_sweep8
df$diabetes_age50 <- df$N8KHPB03_sweep8
df$hypertension_age50 <- df$N8KHPB09_sweep8
df$words_immed_age50 <- df$N8CFLISN_sweep8
df$animals_named <- df$N8CFANI_sweep8
df$letter_canc_correct <- df$N8CFCOR_sweep8
df$letter_canc_accuracy <- df$N8CFMIS_sweep8
df$words_delayed <- df$N8CFLISD_sweep8
df$smoking_age50 <- df$N8SMOKIG_sweep8
df$alcohol_age50 <- df$N8DRINKS_sweep8
df$physical_activity_1_age50 <- df$N8EXERSE_sweep8
df$physical_activity_2_age50 <- df$N8BREALS_sweep8
df$physical_activity_3_age50 <- df$N8SWEAT_sweep8
df$occ_pa_age50 <- df$N8PHYSWK_sweep8
df$malaise_age50 <- df$ND8MALG_sweep8

df$physical_activity_age55 <- df$N9LEIS01_sweep9
df$internet_use_age55 <- df$N9INTRNT_sweep9
df$marital_age55 <- df$ND9MS_sweep9
df$main_activity_age55 <- df$ND9ECACT_sweep9
df$qual_age55 <- df$ND9HNVQ_sweep9
df$weight_age55 <- df$ND9WGHTK_sweep9
df$bmi_age55 <- df$ND9BMI_sweep9

df$marital_age62 <- df$n10hms_sweep10
df$main_activity_age62 <- df$n10econact2_sweep10
df$letter_can_accuracy <- df$n10cfmis_sweep10
df$animals_named_age62 <- df$n10cfani_sweep10
df$words_delayed_age62 <- df$n10cflisd_sweep10
df$general_health_age62 <- df$n10hlthgen_sweep10
df$diabetes_age62 <- df$n10khlprb04_sweep10
df$hypertension_age62 <- df$n10khlprb06_sweep10
df$dementia_age62 <- df$n10khlprb10_sweep10
df$physical_activity_1_age62 <- df$n10exercise_sweep10
df$physical_activity_2_age62 <- df$n10freqexer_sweep10
df$physical_activity_3_age62 <- df$n10sweat_sweep10
df$smoking_age62 <- df$n10smoking_sweep10
df$read_book_age62 <- df$n10ylq26_sweep10
df$tv_age62 <- df$n10ylq27_sweep10
df$bmi_age62 <- df$nd10bmi_rec_sweep10
df$malaise_age62 <- df$nd10mal_sweep10
df$qual_age62 <- df$nd10hnvq_sweep10

df$sc_age7 <- relevel(df$sc_age7, ref = "I")

df$breast_f <- relevel(df$breast_f, ref = "Under one month")

df$sc_age11 <- relevel(df$sc_age11, ref = "I")

df$sc_age16 <- relevel(df$sc_age16, ref = "I")

df$smoking_age16 <- relevel(df$smoking_age16, ref = "None,dont smoke")

df$alcohol_age16 <- relevel(df$alcohol_age16, ref = "Never had one")

df$qual_age33 <- relevel(df$qual_age33, ref = "No qualification")

df$diabetes_age50 <- relevel(df$diabetes_age50, ref = "Not mentioned")

df$hypertension_age50 <- relevel(df$hypertension_age50, ref = "Not mentioned")

df$malaise_age50 <- relevel(df$malaise_age50, ref = "Low malaise 0-3")

COGNITIVE FUNCTION AT AGE 7

library(broom)
library(dplyr)

df_complete <- df %>%
  filter(arithmetic_age7!=-1 &
           copying_age7!=-1  &
           draw_man_age7!=-1 &
           reading_test_age7!=-1)

# Age 7
# Problem Arithmetic test
df_complete$mean_arithmetic_age7 <- mean(df_complete$arithmetic_age7)
df_complete$sd_arithmetic_age7 <- sd(df_complete$arithmetic_age7)
df_complete$z_arithmetic_age7 <- (df_complete$arithmetic_age7-df_complete$mean_arithmetic_age7)/df_complete$sd_arithmetic_age7
df_complete$z_arithmetic_age7 = scale(df_complete$z_arithmetic_age7)[,1]

hist(df_complete$z_arithmetic_age7)

#Copying designs test
df_complete$mean_copying_age7 <- mean(df_complete$copying_age7)
df_complete$sd_copying_age7 <- sd(df_complete$copying_age7)
df_complete$z_copying_age7 <- (df_complete$copying_age7-df_complete$mean_copying_age7)/df_complete$sd_copying_age7
df_complete$z_copying_age7 = scale(df_complete$z_copying_age7)[,1]

hist(df_complete$z_copying_age7)

#Draw-a-man test
df_complete$mean_draw_man_age7 <- mean(df_complete$draw_man_age7)
df_complete$sd_draw_man_age7 <- sd(df_complete$draw_man_age7)
df_complete$z_draw_man_age7 <- (df_complete$draw_man_age7-df_complete$mean_draw_man_age7)/df_complete$sd_draw_man_age7
df_complete$z_draw_man_age7 = scale(df_complete$z_draw_man_age7)[,1]

hist(df_complete$z_draw_man_age7)

#Southgate Group Reading test
df_complete$mean_reading_test_age7 <- mean(df_complete$reading_test_age7)
df_complete$sd_reading_test_age7 <- sd(df_complete$reading_test_age7)
df_complete$z_reading_test_age7 <- (df_complete$reading_test_age7-df_complete$mean_reading_test_age7)/df_complete$sd_reading_test_age7
df_complete$z_reading_test_age7 = scale(df_complete$z_draw_man_age7)[,1]

hist(df_complete$z_reading_test_age7)

# averaging all z scores
df_complete$global_cog_age7 <- (df_complete$z_arithmetic_age7 + df_complete$z_copying_age7 + df_complete$z_draw_man_age7 + df_complete$z_reading_test_age7) / 4

# creating a z score for the averaged variable
df_complete$z_global_cog_age7 = scale(df_complete$global_cog_age7)[,1]

hist(df_complete$z_global_cog_age7)

# Reference levels
df_complete <- df_complete %>%
  mutate(
    dad_read_score = case_when(
      dad_read_book %in% c("Yes most weeks") ~ 2,
      dad_read_book %in% c("Yes occasionally") ~ 1,
      dad_read_book %in% c("Hardly ever") ~ 0,
      TRUE ~ NA_real_
    ),
    
    mom_read_score = case_when(
      mon_read_child %in% c("Yes,every week") ~ 2,
      mon_read_child %in% c("Yes,occasionally") ~ 1,
      mon_read_child %in% c("Hardly ever") ~ 0,
      TRUE ~ NA_real_
    )
  )

df_complete <- df_complete %>%
  mutate(
    parent_read = case_when(
      dad_read_score == 0 & mom_read_score == 0 ~ 0,
      dad_read_score > 0 | mom_read_score > 0 ~ 1,
      TRUE ~ NA_real_
    )
  )

df_complete <- df_complete %>%
  mutate(
    parent_read_level = pmax(dad_read_score, mom_read_score, na.rm = TRUE),
    parent_read_level = ifelse(
      is.infinite(parent_read_level), NA, parent_read_level
    )
  )


df_complete$parent_read_level <- factor(
  df_complete$parent_read_level,
  levels = c(0, 1, 2),
  labels = c("Hardly ever", "Occasionally", "Weekly"))

DescTools::Desc(df_complete$parent_read_level)
## ────────────────────────────────────────────────────────────────────────────── 
## df_complete$parent_read_level (factor)
## 
##   length      n    NAs unique levels  dupes
##   14'404 13'651    753      3      3      y
##           94.8%   5.2%                     
## 
##           level   freq   perc  cumfreq  cumperc
## 1        Weekly  9'459  69.3%    9'459    69.3%
## 2  Occasionally  3'170  23.2%   12'629    92.5%
## 3   Hardly ever  1'022   7.5%   13'651   100.0%

global_age7 <- lm(z_global_cog_age7 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f, data = df_complete)

tab_model(global_age7, digits = 3)
  z global cog age 7
Predictors Estimates CI p
(Intercept) -0.060 -0.278 – 0.158 0.590
sex [Female] 0.006 -0.027 – 0.038 0.736
sc prenatal
[Unemployed,sick]
-0.593 -1.541 – 0.355 0.220
sc prenatal [I] 0.331 0.180 – 0.482 <0.001
sc prenatal [II] 0.231 0.106 – 0.356 <0.001
sc prenatal [III] 0.172 0.058 – 0.287 0.003
sc prenatal [IV] 0.089 -0.034 – 0.211 0.155
sc prenatal [V] -0.029 -0.154 – 0.096 0.650
sc prenatal [Students] 0.254 -0.283 – 0.791 0.354
sc prenatal [Dead or
away]
-0.671 -2.010 – 0.668 0.326
sc prenatal [Retired] 0.263 -1.076 – 1.603 0.700
sc prenatal [Single,no
husbnd]
-0.095 -0.250 – 0.059 0.227
smoking preg [5mths-no
change]
0.029 -0.128 – 0.186 0.715
smoking preg [Now
non-smoker]
0.092 -0.076 – 0.259 0.283
smoking preg [1 to 4 now] 0.037 -0.143 – 0.218 0.684
smoking preg [5 to 9 now] -0.019 -0.207 – 0.169 0.840
smoking preg [10 to 14
now]
0.038 -0.177 – 0.254 0.729
smoking preg [15 to 19
now]
-0.058 -0.322 – 0.206 0.668
smoking preg [20 to 24
now]
-0.233 -0.537 – 0.071 0.133
smoking preg [25 to 29
now]
0.029 -0.441 – 0.499 0.903
smoking preg [30 or more
now]
0.025 -0.501 – 0.552 0.925
smoking_pregVariable-5mths 0.010 -0.160 – 0.180 0.908
birth weight 0.000 0.000 – 0.001 0.002
parent read level
[Occasionally]
0.122 0.053 – 0.191 0.001
parent read level
[Weekly]
0.199 0.134 – 0.263 <0.001
sc age7 [NA, unclear] -0.578 -0.795 – -0.361 <0.001
sc age7 [No male head] -0.441 -0.576 – -0.306 <0.001
sc age7 [II] -0.118 -0.215 – -0.021 0.017
sc age7 [III non-manual] -0.146 -0.250 – -0.042 0.006
sc age7 [III manual] -0.391 -0.487 – -0.294 <0.001
sc age7 [IV non-manual] -0.377 -0.536 – -0.219 <0.001
sc age7 [IV manual] -0.481 -0.584 – -0.378 <0.001
sc age7 [V] -0.620 -0.738 – -0.502 <0.001
breast f [NA] 0.237 -0.704 – 1.179 0.621
breast f [Dont know] -0.206 -0.435 – 0.022 0.077
breast f [No] -0.086 -0.130 – -0.042 <0.001
breast f [Over one month] 0.095 0.053 – 0.136 <0.001
Observations 13225
R2 / R2 adjusted 0.071 / 0.069
arith_age7 <- lm(z_arithmetic_age7 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f, data = df_complete)

arith_age7 %>%
  broom::tidy(conf.int = TRUE) %>%
  knitr::kable(digits = 3)
term estimate std.error statistic p.value conf.low conf.high
(Intercept) 0.084 0.113 0.745 0.456 -0.137 0.304
sexFemale -0.088 0.017 -5.197 0.000 -0.121 -0.055
sc_prenatalUnemployed,sick 0.384 0.489 0.786 0.432 -0.573 1.342
sc_prenatalI 0.313 0.078 4.024 0.000 0.161 0.466
sc_prenatalII 0.205 0.064 3.174 0.002 0.078 0.331
sc_prenatalIII 0.162 0.059 2.751 0.006 0.047 0.277
sc_prenatalIV 0.085 0.063 1.344 0.179 -0.039 0.208
sc_prenatalV 0.003 0.065 0.053 0.958 -0.123 0.130
sc_prenatalStudents 0.189 0.277 0.683 0.495 -0.353 0.732
sc_prenatalDead or away -0.066 0.690 -0.096 0.923 -1.419 1.286
sc_prenatalRetired -0.128 0.690 -0.186 0.853 -1.482 1.225
sc_prenatalSingle,no husbnd -0.013 0.080 -0.160 0.872 -0.169 0.144
smoking_preg5mths-no change -0.010 0.081 -0.126 0.900 -0.169 0.148
smoking_pregNow non-smoker 0.067 0.086 0.782 0.434 -0.101 0.236
smoking_preg1 to 4 now -0.030 0.093 -0.321 0.748 -0.212 0.152
smoking_preg5 to 9 now -0.005 0.097 -0.051 0.959 -0.195 0.185
smoking_preg10 to 14 now -0.006 0.111 -0.056 0.955 -0.224 0.212
smoking_preg15 to 19 now -0.148 0.136 -1.091 0.275 -0.415 0.118
smoking_preg20 to 24 now -0.240 0.157 -1.534 0.125 -0.547 0.067
smoking_preg25 to 29 now -0.533 0.242 -2.199 0.028 -1.008 -0.058
smoking_preg30 or more now -0.131 0.271 -0.482 0.630 -0.663 0.401
smoking_pregVariable-5mths -0.010 0.088 -0.117 0.907 -0.182 0.162
birth_weight 0.000 0.000 2.700 0.007 0.000 0.001
parent_read_levelOccasionally 0.094 0.036 2.625 0.009 0.024 0.163
parent_read_levelWeekly 0.155 0.033 4.672 0.000 0.090 0.221
sc_age7NA, unclear -0.458 0.112 -4.099 0.000 -0.677 -0.239
sc_age7No male head -0.438 0.070 -6.294 0.000 -0.575 -0.302
sc_age7II -0.126 0.050 -2.518 0.012 -0.224 -0.028
sc_age7III non-manual -0.172 0.053 -3.225 0.001 -0.277 -0.068
sc_age7III manual -0.415 0.050 -8.336 0.000 -0.512 -0.317
sc_age7IV non-manual -0.359 0.082 -4.389 0.000 -0.519 -0.199
sc_age7IV manual -0.490 0.053 -9.213 0.000 -0.595 -0.386
sc_age7V -0.558 0.061 -9.187 0.000 -0.677 -0.439
breast_fNA -0.041 0.485 -0.085 0.932 -0.992 0.910
breast_fDont know -0.312 0.118 -2.650 0.008 -0.544 -0.081
breast_fNo -0.039 0.023 -1.716 0.086 -0.083 0.006
breast_fOver one month 0.059 0.021 2.746 0.006 0.017 0.101
copying_age7 <- lm(z_copying_age7 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f, data = df_complete)

copying_age7 %>%
  broom::tidy(conf.int = TRUE) %>%
  knitr::kable(digits = 3)
term estimate std.error statistic p.value conf.low conf.high
(Intercept) 0.065 0.114 0.572 0.567 -0.158 0.289
sexFemale -0.048 0.017 -2.784 0.005 -0.081 -0.014
sc_prenatalUnemployed,sick 0.304 0.495 0.614 0.539 -0.666 1.274
sc_prenatalI 0.220 0.079 2.785 0.005 0.065 0.374
sc_prenatalII 0.103 0.065 1.575 0.115 -0.025 0.231
sc_prenatalIII 0.071 0.060 1.196 0.232 -0.046 0.188
sc_prenatalIV 0.007 0.064 0.117 0.907 -0.118 0.132
sc_prenatalV -0.116 0.065 -1.771 0.077 -0.244 0.012
sc_prenatalStudents 0.267 0.281 0.952 0.341 -0.283 0.817
sc_prenatalDead or away -0.503 0.699 -0.719 0.472 -1.874 0.868
sc_prenatalRetired 0.788 0.700 1.126 0.260 -0.584 2.160
sc_prenatalSingle,no husbnd -0.154 0.081 -1.906 0.057 -0.312 0.004
smoking_preg5mths-no change 0.021 0.082 0.251 0.802 -0.140 0.181
smoking_pregNow non-smoker 0.024 0.087 0.274 0.784 -0.147 0.195
smoking_preg1 to 4 now 0.019 0.094 0.197 0.844 -0.166 0.203
smoking_preg5 to 9 now -0.073 0.098 -0.745 0.456 -0.266 0.119
smoking_preg10 to 14 now 0.039 0.113 0.351 0.726 -0.181 0.260
smoking_preg15 to 19 now 0.051 0.138 0.372 0.710 -0.219 0.322
smoking_preg20 to 24 now -0.114 0.159 -0.716 0.474 -0.425 0.198
smoking_preg25 to 29 now 0.069 0.246 0.279 0.780 -0.413 0.550
smoking_preg30 or more now -0.224 0.275 -0.815 0.415 -0.763 0.315
smoking_pregVariable-5mths -0.044 0.089 -0.490 0.624 -0.218 0.131
birth_weight 0.000 0.000 2.217 0.027 0.000 0.001
parent_read_levelOccasionally 0.053 0.036 1.459 0.144 -0.018 0.124
parent_read_levelWeekly 0.120 0.034 3.563 0.000 0.054 0.186
sc_age7NA, unclear -0.350 0.113 -3.091 0.002 -0.572 -0.128
sc_age7No male head -0.308 0.071 -4.362 0.000 -0.446 -0.169
sc_age7II -0.069 0.051 -1.368 0.171 -0.169 0.030
sc_age7III non-manual -0.094 0.054 -1.730 0.084 -0.200 0.012
sc_age7III manual -0.267 0.050 -5.302 0.000 -0.366 -0.169
sc_age7IV non-manual -0.263 0.083 -3.173 0.002 -0.425 -0.100
sc_age7IV manual -0.307 0.054 -5.692 0.000 -0.413 -0.201
sc_age7V -0.428 0.062 -6.949 0.000 -0.549 -0.307
breast_fNA -0.165 0.492 -0.335 0.738 -1.129 0.799
breast_fDont know 0.035 0.119 0.294 0.769 -0.199 0.269
breast_fNo -0.089 0.023 -3.849 0.000 -0.134 -0.043
breast_fOver one month 0.070 0.022 3.213 0.001 0.027 0.112
draw_age7 <- lm(z_draw_man_age7 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f, data = df_complete)

draw_age7 %>%
  broom::tidy(conf.int = TRUE) %>%
  knitr::kable(digits = 3)
term estimate std.error statistic p.value conf.low conf.high
(Intercept) -0.167 0.113 -1.470 0.141 -0.389 0.056
sexFemale 0.076 0.017 4.488 0.000 0.043 0.110
sc_prenatalUnemployed,sick -1.255 0.492 -2.551 0.011 -2.220 -0.291
sc_prenatalI 0.242 0.078 3.085 0.002 0.088 0.395
sc_prenatalII 0.201 0.065 3.097 0.002 0.074 0.329
sc_prenatalIII 0.148 0.059 2.494 0.013 0.032 0.264
sc_prenatalIV 0.090 0.063 1.420 0.156 -0.034 0.214
sc_prenatalV 0.012 0.065 0.180 0.857 -0.116 0.139
sc_prenatalStudents 0.162 0.279 0.582 0.561 -0.384 0.709
sc_prenatalDead or away -0.747 0.695 -1.074 0.283 -2.109 0.616
sc_prenatalRetired 0.075 0.696 0.108 0.914 -1.288 1.438
sc_prenatalSingle,no husbnd -0.063 0.080 -0.784 0.433 -0.220 0.094
smoking_preg5mths-no change 0.040 0.081 0.487 0.626 -0.120 0.199
smoking_pregNow non-smoker 0.095 0.087 1.095 0.274 -0.075 0.265
smoking_preg1 to 4 now 0.063 0.094 0.674 0.500 -0.120 0.246
smoking_preg5 to 9 now 0.009 0.098 0.096 0.924 -0.182 0.201
smoking_preg10 to 14 now 0.042 0.112 0.375 0.708 -0.177 0.261
smoking_preg15 to 19 now -0.040 0.137 -0.294 0.769 -0.309 0.228
smoking_preg20 to 24 now -0.181 0.158 -1.148 0.251 -0.490 0.128
smoking_preg25 to 29 now 0.277 0.244 1.136 0.256 -0.201 0.756
smoking_preg30 or more now 0.216 0.273 0.791 0.429 -0.320 0.752
smoking_pregVariable-5mths 0.042 0.088 0.480 0.631 -0.131 0.215
birth_weight 0.000 0.000 2.205 0.027 0.000 0.001
parent_read_levelOccasionally 0.115 0.036 3.188 0.001 0.044 0.185
parent_read_levelWeekly 0.168 0.033 5.006 0.000 0.102 0.233
sc_age7NA, unclear -0.484 0.113 -4.297 0.000 -0.705 -0.263
sc_age7No male head -0.304 0.070 -4.337 0.000 -0.442 -0.167
sc_age7II -0.083 0.050 -1.654 0.098 -0.182 0.015
sc_age7III non-manual -0.091 0.054 -1.695 0.090 -0.197 0.014
sc_age7III manual -0.259 0.050 -5.172 0.000 -0.358 -0.161
sc_age7IV non-manual -0.269 0.082 -3.265 0.001 -0.430 -0.107
sc_age7IV manual -0.340 0.054 -6.348 0.000 -0.445 -0.235
sc_age7V -0.459 0.061 -7.508 0.000 -0.579 -0.340
breast_fNA 0.467 0.489 0.956 0.339 -0.491 1.426
breast_fDont know -0.178 0.119 -1.502 0.133 -0.411 0.054
breast_fNo -0.068 0.023 -2.994 0.003 -0.113 -0.024
breast_fOver one month 0.081 0.022 3.775 0.000 0.039 0.123
reading_age7 <- lm(z_reading_test_age7 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f, data = df_complete)

reading_age7 %>%
  broom::tidy(conf.int = TRUE) %>%
  knitr::kable(digits = 3)
term estimate std.error statistic p.value conf.low conf.high
(Intercept) -0.167 0.113 -1.470 0.141 -0.389 0.056
sexFemale 0.076 0.017 4.488 0.000 0.043 0.110
sc_prenatalUnemployed,sick -1.255 0.492 -2.551 0.011 -2.220 -0.291
sc_prenatalI 0.242 0.078 3.085 0.002 0.088 0.395
sc_prenatalII 0.201 0.065 3.097 0.002 0.074 0.329
sc_prenatalIII 0.148 0.059 2.494 0.013 0.032 0.264
sc_prenatalIV 0.090 0.063 1.420 0.156 -0.034 0.214
sc_prenatalV 0.012 0.065 0.180 0.857 -0.116 0.139
sc_prenatalStudents 0.162 0.279 0.582 0.561 -0.384 0.709
sc_prenatalDead or away -0.747 0.695 -1.074 0.283 -2.109 0.616
sc_prenatalRetired 0.075 0.696 0.108 0.914 -1.288 1.438
sc_prenatalSingle,no husbnd -0.063 0.080 -0.784 0.433 -0.220 0.094
smoking_preg5mths-no change 0.040 0.081 0.487 0.626 -0.120 0.199
smoking_pregNow non-smoker 0.095 0.087 1.095 0.274 -0.075 0.265
smoking_preg1 to 4 now 0.063 0.094 0.674 0.500 -0.120 0.246
smoking_preg5 to 9 now 0.009 0.098 0.096 0.924 -0.182 0.201
smoking_preg10 to 14 now 0.042 0.112 0.375 0.708 -0.177 0.261
smoking_preg15 to 19 now -0.040 0.137 -0.294 0.769 -0.309 0.228
smoking_preg20 to 24 now -0.181 0.158 -1.148 0.251 -0.490 0.128
smoking_preg25 to 29 now 0.277 0.244 1.136 0.256 -0.201 0.756
smoking_preg30 or more now 0.216 0.273 0.791 0.429 -0.320 0.752
smoking_pregVariable-5mths 0.042 0.088 0.480 0.631 -0.131 0.215
birth_weight 0.000 0.000 2.205 0.027 0.000 0.001
parent_read_levelOccasionally 0.115 0.036 3.188 0.001 0.044 0.185
parent_read_levelWeekly 0.168 0.033 5.006 0.000 0.102 0.233
sc_age7NA, unclear -0.484 0.113 -4.297 0.000 -0.705 -0.263
sc_age7No male head -0.304 0.070 -4.337 0.000 -0.442 -0.167
sc_age7II -0.083 0.050 -1.654 0.098 -0.182 0.015
sc_age7III non-manual -0.091 0.054 -1.695 0.090 -0.197 0.014
sc_age7III manual -0.259 0.050 -5.172 0.000 -0.358 -0.161
sc_age7IV non-manual -0.269 0.082 -3.265 0.001 -0.430 -0.107
sc_age7IV manual -0.340 0.054 -6.348 0.000 -0.445 -0.235
sc_age7V -0.459 0.061 -7.508 0.000 -0.579 -0.340
breast_fNA 0.467 0.489 0.956 0.339 -0.491 1.426
breast_fDont know -0.178 0.119 -1.502 0.133 -0.411 0.054
breast_fNo -0.068 0.023 -2.994 0.003 -0.113 -0.024
breast_fOver one month 0.081 0.022 3.775 0.000 0.039 0.123
extract_parent_read <- function(model, outcome_label) {
  
  coef_df <- tidy(model, conf.int = TRUE) %>%
    filter(term %in% c(
      "parent_read_levelOccasionally",
      "parent_read_levelWeekly"
    )) %>%
    mutate(
      exposure = case_when(
        term == "parent_read_levelOccasionally" ~ "Occasionally",
        term == "parent_read_levelWeekly" ~ "Weekly"
      ),
      outcome = outcome_label
    ) %>%
    dplyr::select(outcome, exposure, estimate, conf.low, conf.high)
  
  # referência (Hardly ever)
  ref_df <- tibble(
    outcome   = outcome_label,
    exposure  = "Hardly ever",
    estimate  = 0,
    conf.low  = 0,
    conf.high = 0
  )
  
  bind_rows(ref_df, coef_df)
}

plot_df <- bind_rows(
  extract_parent_read(global_age7,  "Global cognition"),
  extract_parent_read(arith_age7,   "Arithmetic"),
  extract_parent_read(copying_age7, "Copying"),
  extract_parent_read(draw_age7,    "Draw-a-man"),
  extract_parent_read(reading_age7, "Reading")
)

plot_df$exposure <- factor(
  plot_df$exposure,
  levels = c("Hardly ever", "Occasionally", "Weekly")
)


ggplot(plot_df, aes(x = estimate, y = outcome)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
  
  geom_pointrange(
    aes(xmin = conf.low, xmax = conf.high, shape = exposure),
    size = 1, position = position_dodge2(width = 0.9)
  ) +
  
  scale_shape_manual(
    values = c(
      "Hardly ever" = 15,
      "Occasionally" = 16,
      "Weekly" = 17
    )
  ) +
  
  labs(
    x = "Standardized difference in Z score (95% CI)",
    y = NULL,
    shape = "Parental reading frequency"
  ) +
  
  theme_minimal(base_size = 13) +
  theme(
    panel.grid.minor = element_blank(),
    legend.position = "bottom"
  )

COGNITIVE FUNCTION AT AGE 11

df_complete_7_11 <- df_complete %>%
  filter(verbal_ability_age11!=-1 &
           nonverbal_ability_age11!=-1  &
           reading_ability_age11!=-1 &
           maths_ability_age11!=-1 &
           copying_age11!=-1)

# Age 11
# Verbal score – general ability test
df_complete_7_11$mean_verbal_ability_age11 <- mean(df_complete_7_11$verbal_ability_age11)
df_complete_7_11$sd_verbal_ability_age11 <- sd(df_complete_7_11$verbal_ability_age11)
df_complete_7_11$z_verbal_ability_age11 <- (df_complete_7_11$arithmetic_age7-df_complete_7_11$mean_verbal_ability_age11)/df_complete_7_11$sd_verbal_ability_age11
df_complete_7_11$z_verbal_ability_age11 = scale(df_complete_7_11$z_verbal_ability_age11)[,1]

hist(df_complete_7_11$z_verbal_ability_age11)

# Non-verbal score – general ability test
df_complete_7_11$mean_n_verbal_ability_age11 <- mean(df_complete_7_11$nonverbal_ability_age11)
df_complete_7_11$sd_n_verbal_ability_age11 <- sd(df_complete_7_11$nonverbal_ability_age11)
df_complete_7_11$z_n_verbal_ability_age11 <- (df_complete_7_11$nonverbal_ability_age11-df_complete_7_11$mean_n_verbal_ability_age11)/df_complete_7_11$sd_n_verbal_ability_age11
df_complete_7_11$z_n_verbal_ability_age11 = scale(df_complete_7_11$z_n_verbal_ability_age11)[,1]

hist(df_complete_7_11$z_n_verbal_ability_age11)

#Reading comprehension test
df_complete_7_11$mean_reading_ability_age11 <- mean(df_complete_7_11$reading_ability_age11)
df_complete_7_11$sd_reading_ability_age11 <- sd(df_complete_7_11$reading_ability_age11)
df_complete_7_11$z_reading_ability_age11 <- (df_complete_7_11$reading_ability_age11-df_complete_7_11$mean_reading_ability_age11)/df_complete_7_11$sd_reading_ability_age11
df_complete_7_11$z_reading_ability_age11 = scale(df_complete_7_11$z_reading_ability_age11)[,1]

hist(df_complete_7_11$z_reading_ability_age11)

#Mathematics test score
df_complete_7_11$mean_maths_ability_age11 <- mean(df_complete_7_11$maths_ability_age11)
df_complete_7_11$sd_maths_ability_age11 <- sd(df_complete_7_11$maths_ability_age11)
df_complete_7_11$z_maths_ability_age11 <- (df_complete_7_11$maths_ability_age11-df_complete_7_11$mean_maths_ability_age11)/df_complete_7_11$sd_maths_ability_age11
df_complete_7_11$z_maths_ability_age11 = scale(df_complete_7_11$z_maths_ability_age11)[,1]

hist(df_complete_7_11$z_maths_ability_age11)

#Copying designs test score
df_complete_7_11$mean_copying_age11 <- mean(df_complete_7_11$copying_age11)
df_complete_7_11$sd_copying_age11 <- sd(df_complete_7_11$copying_age11)
df_complete_7_11$z_copying_age11 <- (df_complete_7_11$copying_age11-df_complete_7_11$mean_copying_age11)/df_complete_7_11$sd_copying_age11
df_complete_7_11$z_copying_age11 = scale(df_complete_7_11$z_copying_age11)[,1]

hist(df_complete_7_11$z_copying_age11)

# averaging all z scores
df_complete_7_11$global_cog_age11 <- (df_complete_7_11$z_verbal_ability_age11 + df_complete_7_11$z_n_verbal_ability_age11 + df_complete_7_11$z_reading_ability_age11 + df_complete_7_11$z_maths_ability_age11 + df_complete_7_11$z_copying_age11) / 5

# creating a z score for the averaged variable
df_complete_7_11$z_global_cog_age11 = scale(df_complete_7_11$global_cog_age11)[,1]

hist(df_complete_7_11$z_global_cog_age11)

# Reference levels
DescTools::Desc(df_complete_7_11$tv_age11)
## ────────────────────────────────────────────────────────────────────────────── 
## df_complete_7_11$tv_age11 (factor)
## 
##   length      n    NAs unique levels  dupes
##   12'301 12'301      0      4      4      y
##          100.0%   0.0%                     
## 
##          level    freq   perc  cumfreq  cumperc
## 1    most days  10'134  82.4%   10'134    82.4%
## 2    Sometimes   1'430  11.6%   11'564    94.0%
## 3           NA     438   3.6%   12'002    97.6%
## 4  Hardly ever     299   2.4%   12'301   100.0%

DescTools::Desc(df_complete_7_11$child_read_books_age11)
## ────────────────────────────────────────────────────────────────────────────── 
## df_complete_7_11$child_read_books_age11 (factor)
## 
##   length      n    NAs unique levels  dupes
##   12'301 12'301      0      4      4      y
##          100.0%   0.0%                     
## 
##          level   freq   perc  cumfreq  cumperc
## 1    Sometimes  5'273  42.9%    5'273    42.9%
## 2    most days  5'223  42.5%   10'496    85.3%
## 3  Hardly ever  1'368  11.1%   11'864    96.4%
## 4           NA    437   3.6%   12'301   100.0%

DescTools::Desc(df_complete_7_11$child_read_news_age11)
## ────────────────────────────────────────────────────────────────────────────── 
## df_complete_7_11$child_read_news_age11 (factor)
## 
##   length      n    NAs unique levels  dupes
##   12'301 12'301      0      4      4      y
##          100.0%   0.0%                     
## 
##          level   freq   perc  cumfreq  cumperc
## 1    most days  6'156  50.0%    6'156    50.0%
## 2    Sometimes  4'613  37.5%   10'769    87.5%
## 3  Hardly ever  1'033   8.4%   11'802    95.9%
## 4           NA    499   4.1%   12'301   100.0%

DescTools::Desc(df_complete_7_11$physical_activity_age11)
## ────────────────────────────────────────────────────────────────────────────── 
## df_complete_7_11$physical_activity_age11 (factor)
## 
##   length      n    NAs unique levels  dupes
##   12'301 12'301      0      4      4      y
##          100.0%   0.0%                     
## 
##          level   freq   perc  cumfreq  cumperc
## 1    most days  5'476  44.5%    5'476    44.5%
## 2    Sometimes  5'049  41.0%   10'525    85.6%
## 3  Hardly ever  1'334  10.8%   11'859    96.4%
## 4           NA    442   3.6%   12'301   100.0%

DescTools::Desc(df_complete_7_11$child_writes_stories_age11)
## ────────────────────────────────────────────────────────────────────────────── 
## df_complete_7_11$child_writes_stories_age11 (factor)
## 
##   length      n    NAs unique levels  dupes
##   12'301 12'301      0      4      4      y
##          100.0%   0.0%                     
## 
##          level   freq   perc  cumfreq  cumperc
## 1  Hardly ever  6'685  54.3%    6'685    54.3%
## 2    Sometimes  4'032  32.8%   10'717    87.1%
## 3    most days  1'082   8.8%   11'799    95.9%
## 4           NA    502   4.1%   12'301   100.0%

DescTools::Desc(df_complete_7_11$child_draws_age11)
## ────────────────────────────────────────────────────────────────────────────── 
## df_complete_7_11$child_draws_age11 (factor)
## 
##   length      n    NAs unique levels  dupes
##   12'301 12'301      0      4      4      y
##          100.0%   0.0%                     
## 
##          level   freq   perc  cumfreq  cumperc
## 1    Sometimes  6'007  48.8%    6'007    48.8%
## 2  Hardly ever  3'022  24.6%    9'029    73.4%
## 3    most days  2'799  22.8%   11'828    96.2%
## 4           NA    473   3.8%   12'301   100.0%

DescTools::Desc(df_complete_7_11$child_writes_stories_age11)
## ────────────────────────────────────────────────────────────────────────────── 
## df_complete_7_11$child_writes_stories_age11 (factor)
## 
##   length      n    NAs unique levels  dupes
##   12'301 12'301      0      4      4      y
##          100.0%   0.0%                     
## 
##          level   freq   perc  cumfreq  cumperc
## 1  Hardly ever  6'685  54.3%    6'685    54.3%
## 2    Sometimes  4'032  32.8%   10'717    87.1%
## 3    most days  1'082   8.8%   11'799    95.9%
## 4           NA    502   4.1%   12'301   100.0%

recode_freq <- function(x) {
  case_when(
    x == "Hardly ever" ~ 0,
    x == "Sometimes"  ~ 1,
    x == "most days"  ~ 2,
    TRUE ~ NA_real_
  )
}
df_complete_7_11 <- df_complete_7_11 %>%
  mutate(
    read_books_score = recode_freq(child_read_books_age11),
    read_news_score  = recode_freq(child_read_news_age11),
    
    child_reading_age11 = pmax(read_books_score, read_news_score, na.rm = TRUE),
    child_reading_age11 = ifelse(
      is.infinite(child_reading_age11), NA, child_reading_age11
    )
  )


df_complete_7_11$child_reading_age11 <- factor(
  df_complete_7_11$child_reading_age11,
  levels = c(0, 1, 2),
  labels = c("Hardly ever", "Sometimes", "Most days"))

table(df_complete_7_11$child_reading_age11, useNA = "ifany")
## 
## Hardly ever   Sometimes   Most days        <NA> 
##         233        3248        8536         284
df_complete_7_11 <- df_complete_7_11 %>%
  mutate(
    write_story_score = recode_freq(child_writes_stories_age11),
    draw_score        = recode_freq(child_draws_age11),
    
    child_creative_age11 = pmax(write_story_score, draw_score, na.rm = TRUE),
    child_creative_age11 = ifelse(
      is.infinite(child_creative_age11), NA, child_creative_age11
    )
  )

df_complete_7_11$child_creative_age11 <- factor(
  df_complete_7_11$child_creative_age11,
  levels = c(0, 1, 2),
  labels = c("Hardly ever", "Sometimes", "Most days"))

table(df_complete_7_11$child_creative_age11, useNA = "ifany")
## 
## Hardly ever   Sometimes   Most days        <NA> 
##        2074        6548        3391         288
df_complete_7_11$tv_age11 <- factor(
  df_complete_7_11$tv_age11,
  levels = c("Hardly ever", "Sometimes", "most days")
)

table(df_complete_7_11$tv_age11, useNA = "ifany")
## 
## Hardly ever   Sometimes   most days        <NA> 
##         299        1430       10134         438
df_complete_7_11$physical_activity_age11 <- factor(
  df_complete_7_11$physical_activity_age11,
  levels = c("Hardly ever", "Sometimes", "most days")
)

table(df_complete_7_11$physical_activity_age11, useNA = "ifany")
## 
## Hardly ever   Sometimes   most days        <NA> 
##        1334        5049        5476         442
global_age11 <- lm(z_global_cog_age11 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f +
                   sc_age11 + tv_age11 + child_reading_age11, data = df_complete_7_11)

global_age11 %>%
  broom::tidy(conf.int = TRUE) %>%
  knitr::kable(digits = 3)
term estimate std.error statistic p.value conf.low conf.high
(Intercept) -0.683 0.140 -4.869 0.000 -0.959 -0.408
sexFemale -0.053 0.017 -3.182 0.001 -0.086 -0.020
sc_prenatalUnemployed,sick 0.251 0.873 0.288 0.774 -1.460 1.962
sc_prenatalI 0.418 0.077 5.429 0.000 0.267 0.569
sc_prenatalII 0.267 0.064 4.175 0.000 0.142 0.392
sc_prenatalIII 0.134 0.058 2.295 0.022 0.019 0.248
sc_prenatalIV 0.073 0.062 1.179 0.239 -0.049 0.195
sc_prenatalV -0.090 0.064 -1.404 0.160 -0.216 0.036
sc_prenatalStudents 0.599 0.283 2.114 0.035 0.044 1.155
sc_prenatalDead or away -1.144 0.875 -1.308 0.191 -2.859 0.570
sc_prenatalRetired 0.758 0.622 1.219 0.223 -0.461 1.978
sc_prenatalSingle,no husbnd 0.023 0.080 0.285 0.776 -0.134 0.179
smoking_preg5mths-no change -0.033 0.082 -0.405 0.686 -0.194 0.128
smoking_pregNow non-smoker 0.005 0.087 0.057 0.955 -0.166 0.176
smoking_preg1 to 4 now -0.111 0.094 -1.178 0.239 -0.295 0.074
smoking_preg5 to 9 now -0.140 0.097 -1.440 0.150 -0.331 0.051
smoking_preg10 to 14 now -0.152 0.111 -1.370 0.171 -0.370 0.066
smoking_preg15 to 19 now -0.203 0.138 -1.468 0.142 -0.474 0.068
smoking_preg20 to 24 now -0.373 0.156 -2.395 0.017 -0.679 -0.068
smoking_preg25 to 29 now -0.178 0.264 -0.675 0.500 -0.697 0.340
smoking_preg30 or more now -0.478 0.288 -1.663 0.096 -1.042 0.085
smoking_pregVariable-5mths -0.124 0.089 -1.391 0.164 -0.298 0.051
birth_weight 0.000 0.000 1.759 0.079 0.000 0.000
parent_read_levelOccasionally 0.120 0.035 3.394 0.001 0.051 0.189
parent_read_levelWeekly 0.277 0.033 8.387 0.000 0.212 0.341
sc_age7NA, unclear -0.444 0.117 -3.803 0.000 -0.672 -0.215
sc_age7No male head -0.436 0.080 -5.458 0.000 -0.593 -0.280
sc_age7II -0.155 0.056 -2.774 0.006 -0.264 -0.045
sc_age7III non-manual -0.161 0.059 -2.729 0.006 -0.277 -0.046
sc_age7III manual -0.406 0.056 -7.232 0.000 -0.516 -0.296
sc_age7IV non-manual -0.395 0.084 -4.690 0.000 -0.561 -0.230
sc_age7IV manual -0.490 0.060 -8.221 0.000 -0.607 -0.373
sc_age7V -0.644 0.068 -9.527 0.000 -0.777 -0.512
breast_fNA 0.684 0.436 1.567 0.117 -0.171 1.539
breast_fDont know -0.217 0.125 -1.732 0.083 -0.463 0.029
breast_fNo -0.065 0.023 -2.884 0.004 -0.109 -0.021
breast_fOver one month 0.086 0.021 4.079 0.000 0.045 0.127
sc_age11NA,unclear -0.314 0.057 -5.555 0.000 -0.425 -0.203
sc_age11II -0.004 0.052 -0.086 0.932 -0.107 0.098
sc_age11III non manual -0.083 0.058 -1.437 0.151 -0.197 0.030
sc_age11III manual -0.229 0.053 -4.317 0.000 -0.333 -0.125
sc_age11IV -0.308 0.056 -5.463 0.000 -0.419 -0.198
sc_age11V -0.329 0.067 -4.925 0.000 -0.460 -0.198
sc_age11No male head -0.302 0.069 -4.399 0.000 -0.437 -0.167
tv_age11Sometimes 0.173 0.058 2.958 0.003 0.058 0.287
tv_age11most days 0.282 0.054 5.227 0.000 0.176 0.388
child_reading_age11Sometimes 0.492 0.063 7.806 0.000 0.369 0.616
child_reading_age11Most days 0.782 0.062 12.642 0.000 0.661 0.904
arith_age11 <- lm(z_maths_ability_age11 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f +
                   sc_age11 + tv_age11 + child_reading_age11, data = df_complete_7_11)

tab_model(arith_age11, digits = 3)
  z maths ability age 11
Predictors Estimates CI p
(Intercept) -0.658 -0.941 – -0.376 <0.001
sex [Female] -0.056 -0.089 – -0.022 0.001
sc prenatal
[Unemployed,sick]
-0.213 -1.970 – 1.543 0.812
sc prenatal [I] 0.429 0.274 – 0.584 <0.001
sc prenatal [II] 0.316 0.187 – 0.444 <0.001
sc prenatal [III] 0.120 0.003 – 0.237 0.045
sc prenatal [IV] 0.070 -0.055 – 0.195 0.274
sc prenatal [V] -0.092 -0.221 – 0.037 0.161
sc prenatal [Students] 0.784 0.213 – 1.354 0.007
sc prenatal [Dead or
away]
-0.915 -2.675 – 0.845 0.308
sc prenatal [Retired] 0.799 -0.452 – 2.051 0.211
sc prenatal [Single,no
husbnd]
0.004 -0.156 – 0.165 0.957
smoking preg [5mths-no
change]
0.032 -0.133 – 0.197 0.705
smoking preg [Now
non-smoker]
0.026 -0.150 – 0.201 0.775
smoking preg [1 to 4 now] -0.044 -0.233 – 0.145 0.647
smoking preg [5 to 9 now] -0.059 -0.255 – 0.136 0.552
smoking preg [10 to 14
now]
-0.123 -0.346 – 0.101 0.282
smoking preg [15 to 19
now]
-0.150 -0.428 – 0.128 0.291
smoking preg [20 to 24
now]
-0.172 -0.486 – 0.141 0.281
smoking preg [25 to 29
now]
-0.112 -0.643 – 0.420 0.681
smoking preg [30 or more
now]
-0.510 -1.089 – 0.068 0.084
smoking_pregVariable-5mths -0.072 -0.251 – 0.107 0.431
birth weight 0.000 0.000 – 0.001 0.036
parent read level
[Occasionally]
0.092 0.021 – 0.163 0.011
parent read level
[Weekly]
0.258 0.192 – 0.325 <0.001
sc age7 [NA, unclear] -0.381 -0.616 – -0.147 0.001
sc age7 [No male head] -0.456 -0.616 – -0.295 <0.001
sc age7 [II] -0.188 -0.300 – -0.076 0.001
sc age7 [III non-manual] -0.204 -0.323 – -0.085 0.001
sc age7 [III manual] -0.446 -0.559 – -0.333 <0.001
sc age7 [IV non-manual] -0.446 -0.616 – -0.277 <0.001
sc age7 [IV manual] -0.521 -0.641 – -0.401 <0.001
sc age7 [V] -0.639 -0.775 – -0.503 <0.001
breast f [NA] 0.979 0.101 – 1.856 0.029
breast f [Dont know] -0.318 -0.570 – -0.066 0.013
breast f [No] -0.047 -0.092 – -0.002 0.042
breast f [Over one month] 0.078 0.035 – 0.120 <0.001
sc age11 [NA,unclear] -0.255 -0.369 – -0.141 <0.001
sc age11 [II] 0.071 -0.034 – 0.176 0.183
sc age11 [III non manual] 0.022 -0.094 – 0.139 0.706
sc age11 [III manual] -0.143 -0.250 – -0.036 0.009
sc age11 [IV] -0.241 -0.354 – -0.127 <0.001
sc age11 [V] -0.245 -0.379 – -0.110 <0.001
sc age11 [No male head] -0.266 -0.404 – -0.128 <0.001
tv age11 [Sometimes] 0.173 0.056 – 0.290 0.004
tv age11 [most days] 0.261 0.153 – 0.370 <0.001
child reading age11
[Sometimes]
0.393 0.267 – 0.520 <0.001
child reading age11 [Most
days]
0.676 0.552 – 0.801 <0.001
Observations 10966
R2 / R2 adjusted 0.186 / 0.182
copying_age11 <- lm(z_copying_age11 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f +
                   sc_age11 + tv_age11 + child_reading_age11, data = df_complete_7_11)

tab_model(copying_age11, digits = 3)
  z copying age 11
Predictors Estimates CI p
(Intercept) 0.028 -0.270 – 0.327 0.853
sex [Female] -0.049 -0.085 – -0.014 0.007
sc prenatal
[Unemployed,sick]
-0.115 -1.971 – 1.741 0.903
sc prenatal [I] 0.043 -0.121 – 0.207 0.608
sc prenatal [II] 0.005 -0.131 – 0.141 0.946
sc prenatal [III] -0.057 -0.181 – 0.067 0.365
sc prenatal [IV] -0.047 -0.179 – 0.085 0.487
sc prenatal [V] -0.190 -0.326 – -0.054 0.006
sc prenatal [Students] 0.055 -0.548 – 0.657 0.859
sc prenatal [Dead or
away]
-0.972 -2.832 – 0.888 0.306
sc prenatal [Retired] 1.512 0.190 – 2.835 0.025
sc prenatal [Single,no
husbnd]
-0.134 -0.304 – 0.035 0.120
smoking preg [5mths-no
change]
-0.113 -0.287 – 0.062 0.205
smoking preg [Now
non-smoker]
-0.122 -0.308 – 0.063 0.196
smoking preg [1 to 4 now] -0.191 -0.391 – 0.009 0.062
smoking preg [5 to 9 now] -0.206 -0.413 – 0.001 0.051
smoking preg [10 to 14
now]
-0.242 -0.479 – -0.006 0.044
smoking preg [15 to 19
now]
-0.234 -0.528 – 0.060 0.119
smoking preg [20 to 24
now]
-0.610 -0.942 – -0.279 <0.001
smoking preg [25 to 29
now]
-0.425 -0.987 – 0.137 0.138
smoking preg [30 or more
now]
-0.643 -1.255 – -0.032 0.039
smoking_pregVariable-5mths -0.194 -0.383 – -0.005 0.044
birth weight 0.000 -0.000 – 0.000 0.745
parent read level
[Occasionally]
0.057 -0.018 – 0.132 0.136
parent read level
[Weekly]
0.096 0.026 – 0.166 0.007
sc age7 [NA, unclear] -0.084 -0.332 – 0.164 0.505
sc age7 [No male head] -0.174 -0.344 – -0.004 0.045
sc age7 [II] -0.017 -0.136 – 0.101 0.777
sc age7 [III non-manual] -0.012 -0.138 – 0.114 0.849
sc age7 [III manual] -0.087 -0.206 – 0.033 0.155
sc age7 [IV non-manual] -0.117 -0.296 – 0.062 0.200
sc age7 [IV manual] -0.157 -0.284 – -0.031 0.015
sc age7 [V] -0.226 -0.370 – -0.083 0.002
breast f [NA] 0.402 -0.526 – 1.329 0.396
breast f [Dont know] -0.098 -0.364 – 0.168 0.471
breast f [No] -0.062 -0.110 – -0.014 0.011
breast f [Over one month] 0.034 -0.011 – 0.079 0.134
sc age11 [NA,unclear] -0.252 -0.372 – -0.131 <0.001
sc age11 [II] -0.096 -0.207 – 0.015 0.089
sc age11 [III non manual] -0.172 -0.295 – -0.049 0.006
sc age11 [III manual] -0.211 -0.323 – -0.098 <0.001
sc age11 [IV] -0.212 -0.332 – -0.092 0.001
sc age11 [V] -0.262 -0.404 – -0.119 <0.001
sc age11 [No male head] -0.180 -0.326 – -0.034 0.016
tv age11 [Sometimes] 0.013 -0.111 – 0.137 0.841
tv age11 [most days] 0.088 -0.026 – 0.203 0.131
child reading age11
[Sometimes]
0.295 0.161 – 0.429 <0.001
child reading age11 [Most
days]
0.330 0.198 – 0.461 <0.001
Observations 10966
R2 / R2 adjusted 0.037 / 0.033
non_verbal_age11 <- lm(z_n_verbal_ability_age11 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f +
                   sc_age11 + tv_age11 + child_reading_age11, data = df_complete_7_11)

tab_model(non_verbal_age11, digits = 3)
  z n verbal ability age 11
Predictors Estimates CI p
(Intercept) -0.693 -0.978 – -0.408 <0.001
sex [Female] 0.017 -0.017 – 0.051 0.326
sc prenatal
[Unemployed,sick]
0.198 -1.574 – 1.970 0.827
sc prenatal [I] 0.320 0.164 – 0.476 <0.001
sc prenatal [II] 0.203 0.073 – 0.333 0.002
sc prenatal [III] 0.110 -0.009 – 0.228 0.069
sc prenatal [IV] 0.033 -0.093 – 0.160 0.604
sc prenatal [V] -0.108 -0.238 – 0.022 0.105
sc prenatal [Students] 0.370 -0.205 – 0.946 0.207
sc prenatal [Dead or
away]
-0.693 -2.469 – 1.083 0.444
sc prenatal [Retired] -0.236 -1.499 – 1.027 0.714
sc prenatal [Single,no
husbnd]
0.031 -0.131 – 0.193 0.705
smoking preg [5mths-no
change]
-0.004 -0.171 – 0.162 0.960
smoking preg [Now
non-smoker]
0.021 -0.156 – 0.198 0.813
smoking preg [1 to 4 now] -0.086 -0.277 – 0.104 0.375
smoking preg [5 to 9 now] -0.145 -0.343 – 0.052 0.150
smoking preg [10 to 14
now]
-0.077 -0.303 – 0.148 0.501
smoking preg [15 to 19
now]
-0.204 -0.485 – 0.076 0.154
smoking preg [20 to 24
now]
-0.107 -0.423 – 0.210 0.508
smoking preg [25 to 29
now]
0.176 -0.361 – 0.712 0.520
smoking preg [30 or more
now]
-0.249 -0.833 – 0.335 0.403
smoking_pregVariable-5mths -0.119 -0.299 – 0.062 0.198
birth weight 0.000 -0.000 – 0.000 0.247
parent read level
[Occasionally]
0.160 0.089 – 0.232 <0.001
parent read level
[Weekly]
0.292 0.225 – 0.359 <0.001
sc age7 [NA, unclear] -0.368 -0.605 – -0.131 0.002
sc age7 [No male head] -0.407 -0.570 – -0.245 <0.001
sc age7 [II] -0.148 -0.261 – -0.035 0.011
sc age7 [III non-manual] -0.162 -0.282 – -0.042 0.008
sc age7 [III manual] -0.357 -0.471 – -0.243 <0.001
sc age7 [IV non-manual] -0.360 -0.531 – -0.188 <0.001
sc age7 [IV manual] -0.426 -0.547 – -0.305 <0.001
sc age7 [V] -0.613 -0.750 – -0.476 <0.001
breast f [NA] 0.759 -0.126 – 1.644 0.093
breast f [Dont know] -0.029 -0.283 – 0.226 0.825
breast f [No] -0.085 -0.130 – -0.039 <0.001
breast f [Over one month] 0.080 0.037 – 0.123 <0.001
sc age11 [NA,unclear] -0.304 -0.419 – -0.189 <0.001
sc age11 [II] -0.026 -0.132 – 0.080 0.629
sc age11 [III non manual] -0.113 -0.231 – 0.004 0.059
sc age11 [III manual] -0.225 -0.332 – -0.117 <0.001
sc age11 [IV] -0.280 -0.394 – -0.165 <0.001
sc age11 [V] -0.343 -0.479 – -0.208 <0.001
sc age11 [No male head] -0.291 -0.430 – -0.151 <0.001
tv age11 [Sometimes] 0.207 0.089 – 0.325 0.001
tv age11 [most days] 0.308 0.199 – 0.418 <0.001
child reading age11
[Sometimes]
0.437 0.309 – 0.565 <0.001
child reading age11 [Most
days]
0.677 0.552 – 0.803 <0.001
Observations 10966
R2 / R2 adjusted 0.152 / 0.149
verbal_age11 <- lm(z_verbal_ability_age11 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f +
                   sc_age11 + tv_age11 + child_reading_age11, data = df_complete_7_11)

tab_model(verbal_age11, digits = 3)
  z verbal ability age 11
Predictors Estimates CI p
(Intercept) -0.416 -0.719 – -0.113 0.007
sex [Female] -0.104 -0.140 – -0.068 <0.001
sc prenatal
[Unemployed,sick]
0.719 -1.167 – 2.606 0.455
sc prenatal [I] 0.302 0.135 – 0.468 <0.001
sc prenatal [II] 0.225 0.087 – 0.363 0.001
sc prenatal [III] 0.200 0.074 – 0.325 0.002
sc prenatal [IV] 0.149 0.015 – 0.283 0.030
sc prenatal [V] 0.069 -0.069 – 0.208 0.326
sc prenatal [Students] 0.452 -0.160 – 1.065 0.148
sc prenatal [Dead or
away]
-0.574 -2.465 – 1.317 0.552
sc prenatal [Retired] -0.040 -1.384 – 1.305 0.954
sc prenatal [Single,no
husbnd]
0.103 -0.069 – 0.275 0.242
smoking preg [5mths-no
change]
-0.059 -0.236 – 0.118 0.515
smoking preg [Now
non-smoker]
0.012 -0.177 – 0.200 0.902
smoking preg [1 to 4 now] -0.083 -0.286 – 0.120 0.424
smoking preg [5 to 9 now] -0.081 -0.292 – 0.129 0.448
smoking preg [10 to 14
now]
-0.064 -0.304 – 0.176 0.602
smoking preg [15 to 19
now]
-0.205 -0.503 – 0.094 0.179
smoking preg [20 to 24
now]
-0.313 -0.650 – 0.024 0.068
smoking preg [25 to 29
now]
-0.500 -1.071 – 0.072 0.086
smoking preg [30 or more
now]
-0.061 -0.682 – 0.561 0.848
smoking_pregVariable-5mths -0.084 -0.277 – 0.108 0.388
birth weight 0.000 -0.000 – 0.000 0.242
parent read level
[Occasionally]
0.064 -0.012 – 0.140 0.100
parent read level
[Weekly]
0.125 0.054 – 0.196 0.001
sc age7 [NA, unclear] -0.315 -0.567 – -0.063 0.014
sc age7 [No male head] -0.271 -0.444 – -0.098 0.002
sc age7 [II] -0.110 -0.230 – 0.011 0.074
sc age7 [III non-manual] -0.139 -0.267 – -0.012 0.033
sc age7 [III manual] -0.306 -0.427 – -0.185 <0.001
sc age7 [IV non-manual] -0.252 -0.434 – -0.070 0.007
sc age7 [IV manual] -0.352 -0.481 – -0.223 <0.001
sc age7 [V] -0.421 -0.567 – -0.275 <0.001
breast f [NA] -0.014 -0.957 – 0.928 0.976
breast f [Dont know] -0.301 -0.571 – -0.030 0.029
breast f [No] -0.011 -0.060 – 0.038 0.654
breast f [Over one month] 0.058 0.013 – 0.104 0.012
sc age11 [NA,unclear] -0.199 -0.321 – -0.077 0.001
sc age11 [II] -0.042 -0.155 – 0.070 0.461
sc age11 [III non manual] -0.080 -0.205 – 0.045 0.208
sc age11 [III manual] -0.146 -0.260 – -0.031 0.013
sc age11 [IV] -0.211 -0.333 – -0.089 0.001
sc age11 [V] -0.154 -0.298 – -0.009 0.037
sc age11 [No male head] -0.194 -0.342 – -0.045 0.011
tv age11 [Sometimes] 0.099 -0.027 – 0.226 0.122
tv age11 [most days] 0.174 0.057 – 0.290 0.004
child reading age11
[Sometimes]
0.340 0.204 – 0.477 <0.001
child reading age11 [Most
days]
0.517 0.383 – 0.651 <0.001
Observations 10966
R2 / R2 adjusted 0.064 / 0.060
reading_age11 <- lm(z_reading_ability_age11 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f +
                   sc_age11 + tv_age11 + child_reading_age11, data = df_complete_7_11)

tab_model(reading_age11, digits = 3)
  z reading ability age 11
Predictors Estimates CI p
(Intercept) -0.888 -1.166 – -0.610 <0.001
sex [Female] -0.012 -0.046 – 0.021 0.460
sc prenatal
[Unemployed,sick]
0.376 -1.351 – 2.104 0.669
sc prenatal [I] 0.514 0.362 – 0.667 <0.001
sc prenatal [II] 0.278 0.152 – 0.405 <0.001
sc prenatal [III] 0.142 0.027 – 0.257 0.016
sc prenatal [IV] 0.076 -0.047 – 0.199 0.225
sc prenatal [V] -0.025 -0.152 – 0.101 0.694
sc prenatal [Students] 0.642 0.081 – 1.203 0.025
sc prenatal [Dead or
away]
-1.246 -2.977 – 0.485 0.158
sc prenatal [Retired] 0.878 -0.352 – 2.109 0.162
sc prenatal [Single,no
husbnd]
0.083 -0.075 – 0.241 0.301
smoking preg [5mths-no
change]
0.017 -0.146 – 0.179 0.841
smoking preg [Now
non-smoker]
0.082 -0.090 – 0.255 0.349
smoking preg [1 to 4 now] -0.022 -0.208 – 0.165 0.820
smoking preg [5 to 9 now] -0.047 -0.240 – 0.145 0.631
smoking preg [10 to 14
now]
-0.079 -0.299 – 0.141 0.480
smoking preg [15 to 19
now]
0.012 -0.261 – 0.286 0.930
smoking preg [20 to 24
now]
-0.232 -0.541 – 0.076 0.140
smoking preg [25 to 29
now]
0.175 -0.348 – 0.698 0.513
smoking preg [30 or more
now]
-0.375 -0.944 – 0.194 0.196
smoking_pregVariable-5mths -0.007 -0.182 – 0.169 0.942
birth weight 0.000 -0.000 – 0.000 0.080
parent read level
[Occasionally]
0.087 0.017 – 0.156 0.015
parent read level
[Weekly]
0.292 0.227 – 0.358 <0.001
sc age7 [NA, unclear] -0.556 -0.787 – -0.325 <0.001
sc age7 [No male head] -0.370 -0.528 – -0.211 <0.001
sc age7 [II] -0.132 -0.242 – -0.022 0.019
sc age7 [III non-manual] -0.103 -0.220 – 0.014 0.085
sc age7 [III manual] -0.367 -0.478 – -0.255 <0.001
sc age7 [IV non-manual] -0.345 -0.512 – -0.178 <0.001
sc age7 [IV manual] -0.428 -0.546 – -0.310 <0.001
sc age7 [V] -0.577 -0.711 – -0.443 <0.001
breast f [NA] 0.503 -0.360 – 1.366 0.253
breast f [Dont know] -0.089 -0.337 – 0.159 0.483
breast f [No] -0.045 -0.090 – -0.001 0.047
breast f [Over one month] 0.080 0.039 – 0.122 <0.001
sc age11 [NA,unclear] -0.200 -0.312 – -0.088 <0.001
sc age11 [II] 0.076 -0.027 – 0.180 0.148
sc age11 [III non manual] 0.023 -0.092 – 0.137 0.695
sc age11 [III manual] -0.155 -0.260 – -0.050 0.004
sc age11 [IV] -0.241 -0.353 – -0.130 <0.001
sc age11 [V] -0.262 -0.395 – -0.130 <0.001
sc age11 [No male head] -0.231 -0.367 – -0.096 0.001
tv age11 [Sometimes] 0.171 0.056 – 0.287 0.004
tv age11 [most days] 0.252 0.146 – 0.359 <0.001
child reading age11
[Sometimes]
0.426 0.301 – 0.551 <0.001
child reading age11 [Most
days]
0.808 0.685 – 0.930 <0.001
Observations 10966
R2 / R2 adjusted 0.199 / 0.195
extract_age11_exposures <- function(model, outcome_label) {
  
  coef_df <- tidy(model, conf.int = TRUE) %>%
    filter(term %in% c(
      "tv_age11Sometimes",
      "tv_age11most days",
      "child_reading_age11Sometimes",
      "child_reading_age11Most days"
    )) %>%
    mutate(
      exposure = case_when(
        term == "tv_age11Sometimes" ~ "TV: Sometimes",
        term == "tv_age11most days" ~ "TV: Most days",
        term == "child_reading_age11Sometimes" ~ "Reading: Sometimes",
        term == "child_reading_age11Most days" ~ "Reading: Most days"
      ),
      domain = case_when(
        grepl("^tv_", term) ~ "Passive SB (TV)",
        grepl("^child_reading_", term) ~ "Cognitively engaged SB (Reading)"
      ),
      outcome = outcome_label
    ) %>%
    dplyr::select(outcome, domain, exposure, estimate, conf.low, conf.high)
  
  # referências (Hardly ever)
  ref_df <- tibble(
    outcome   = rep(outcome_label, 2),
    domain    = c("Passive SB (TV)", "Cognitively engaged SB (Reading)"),
    exposure  = c("TV: Hardly ever", "Reading: Hardly ever"),
    estimate  = 0,
    conf.low  = 0,
    conf.high = 0
  )
  
  bind_rows(ref_df, coef_df)
}

plot_df_age11 <- bind_rows(
  extract_age11_exposures(global_age11,     "Global cognition"),
  extract_age11_exposures(arith_age11,      "Arithmetic"),
  extract_age11_exposures(copying_age11,    "Copying"),
  extract_age11_exposures(non_verbal_age11, "Non-verbal ability"),
  extract_age11_exposures(verbal_age11,     "Verbal ability"),
  extract_age11_exposures(reading_age11,    "Reading ability")
)

plot_df_age11$exposure <- factor(
  plot_df_age11$exposure,
  levels = c(
    "TV: Hardly ever", "TV: Sometimes", "TV: Most days",
    "Reading: Hardly ever", "Reading: Sometimes", "Reading: Most days"
  )
)

plot_df_age11$dose <- NA

plot_df_age11$dose[plot_df_age11$exposure=="Reading: Hardly ever"] <- "Hardly ever"
plot_df_age11$dose[plot_df_age11$exposure=="TV: Hardly ever"] <- "Hardly ever"
plot_df_age11$dose[plot_df_age11$exposure=="Reading: Sometimes"] <- "Sometimes"
plot_df_age11$dose[plot_df_age11$exposure=="TV: Sometimes"] <- "Sometimes"
plot_df_age11$dose[plot_df_age11$exposure=="TV: Most days"] <- "Most days"
plot_df_age11$dose[plot_df_age11$exposure=="Reading: Most days"] <- "Most days"

library(ggplot2)
plot_df_age11$dose <- factor(plot_df_age11$dose, levels = c("Most days", 
                                                            "Sometimes",
                                                            "Hardly ever"), 
                             ordered = T)

ggplot(plot_df_age11,
       aes(x = estimate, y = outcome, colour = domain, fill=domain, shape = dose)) +
  
  scale_color_jama() +
  scale_fill_jama() +

  geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
  
  geom_pointrange(
    aes(xmin = conf.low, xmax = conf.high, shape = dose),
    size = 1, position = position_dodge2(width = 0.9)
  ) +
  
  facet_wrap(~ domain, ncol = 1, scales = "free_y") +
  
  labs(
    x = "Standardized difference in cognitive score (z)",
    y = NULL,
    shape = "Exposure category"
  ) +
  
  theme_minimal(base_size = 13) +
  theme(
    panel.grid.minor = element_blank(),
    legend.position = "bottom",
    strip.text = element_text(face = "bold")
  )

library(broom)
library(dplyr)

extract_age7_11_exposures <- function(model, outcome_label) {
  
  coef_df <- tidy(model, conf.int = TRUE) %>%
    filter(term %in% c(
      # Age 7
      "parent_read_levelOccasionally",
      "parent_read_levelWeekly",
      # Age 11
      "tv_age11Sometimes",
      "tv_age11most days",
      "child_reading_age11Sometimes",
      "child_reading_age11Most days"
    )) %>%
    mutate(
      exposure = case_when(
        term == "parent_read_levelOccasionally" ~ "Reading: Sometimes",
        term == "parent_read_levelWeekly"       ~ "Reading: Most days",
        term == "child_reading_age11Sometimes"  ~ "Reading: Sometimes",
        term == "child_reading_age11Most days"  ~ "Reading: Most days",
        term == "tv_age11Sometimes"              ~ "TV: Sometimes",
        term == "tv_age11most days"              ~ "TV: Most days"
      ),
      domain = case_when(
        grepl("^tv_", term) ~ "Passive SB (TV)",
        TRUE                ~ "Cognitively engaged SB (Reading)"
      ),
      age = case_when(
        grepl("^parent_", term)         ~ "Age 7",
        grepl("^tv_", term)             ~ "Age 11",
        grepl("^child_reading_", term)  ~ "Age 11"
      ),
      outcome = outcome_label
    ) %>%
    dplyr::select(outcome, domain, age, exposure, estimate, conf.low, conf.high)
  
  # referências (Hardly ever)
  ref_df <- tibble(
    outcome   = outcome_label,
    domain    = c(
      "Cognitively engaged SB (Reading)",
      "Cognitively engaged SB (Reading)",
      "Passive SB (TV)"
    ),
    age       = c("Age 7", "Age 11", "Age 11"),
    exposure  = c(
      "Reading: Hardly ever",
      "Reading: Hardly ever",
      "TV: Hardly ever"
    ),
    estimate  = 0,
    conf.low  = 0,
    conf.high = 0
  )
  
  bind_rows(ref_df, coef_df)
}


plot_df <- bind_rows(
  extract_age7_11_exposures(global_age7,  "Global cognition"),
  extract_age7_11_exposures(arith_age7,   "Arithmetic"),
  extract_age7_11_exposures(copying_age7, "Copying"),
  extract_age7_11_exposures(draw_age7,    "Draw-a-man"),
  extract_age7_11_exposures(reading_age7, "Reading"),
  
  extract_age7_11_exposures(global_age11,     "Global cognition"),
  extract_age7_11_exposures(arith_age11,      "Arithmetic"),
  extract_age7_11_exposures(copying_age11,    "Copying"),
  extract_age7_11_exposures(non_verbal_age11, "Non-verbal ability"),
  extract_age7_11_exposures(verbal_age11,     "Verbal ability"),
  extract_age7_11_exposures(reading_age11,    "Reading ability")
)

plot_df$dose <- factor(
  gsub(".*: ", "", plot_df$exposure),
  levels = c("Hardly ever", "Sometimes", "Most days"),
  ordered = TRUE
)


library(ggplot2)
library(ggsci)

ggplot(plot_df,
       aes(x = estimate, y = outcome,
           colour = age, shape = dose)) +
  
  geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
  
  geom_pointrange(
    aes(xmin = conf.low, xmax = conf.high),
    position = position_dodge(width = 0.6),
    size = 0.9
  ) +
  
  facet_wrap(~ domain, ncol = 1, scales = "free_y") +
  
  scale_color_jama() +
  
  labs(
    x = "Standardized difference in cognitive score (z)",
    y = NULL,
    colour = "Age",
    shape  = "Exposure frequency"
  ) +
  
  theme_minimal(base_size = 13) +
  theme(
    panel.grid.minor = element_blank(),
    legend.position = "bottom",
    strip.text = element_text(face = "bold")
  )

COGNITIVE FUNCTION AT AGE 16

df_complete_7_16 <- df_complete_7_11 %>%
  filter(reading_test_age16!=-1 &
            maths_test_age16!=-1)

# Age 16
#Reading comprehension test
df_complete_7_16$mean_reading_ability_age16 <- mean(df_complete_7_16$reading_test_age16)
df_complete_7_16$sd_reading_ability_age16 <- sd(df_complete_7_16$reading_test_age16)
df_complete_7_16$z_reading_ability_age16 <- (df_complete_7_16$reading_test_age16-df_complete_7_16$mean_reading_ability_age16)/df_complete_7_16$sd_reading_ability_age16
df_complete_7_16$z_reading_ability_age16 = scale(df_complete_7_16$z_reading_ability_age16)[,1]

hist(df_complete_7_16$z_reading_ability_age16)

#Mathematics test score
df_complete_7_16$mean_maths_ability_age16 <- mean(df_complete_7_16$maths_test_age16)
df_complete_7_16$sd_maths_ability_age16 <- sd(df_complete_7_16$maths_test_age16)
df_complete_7_16$z_maths_ability_age16 <- (df_complete_7_16$maths_test_age16-df_complete_7_16$mean_maths_ability_age16)/df_complete_7_16$sd_maths_ability_age16
df_complete_7_16$z_maths_ability_age16 = scale(df_complete_7_16$z_maths_ability_age16)[,1]

hist(df_complete_7_16$z_maths_ability_age16)

# averaging all z scores
df_complete_7_16$global_cog_age16 <- (df_complete_7_16$z_reading_ability_age16 + df_complete_7_16$z_maths_ability_age16) / 2

# creating a z score for the averaged variable
df_complete_7_16$z_global_cog_age16 = scale(df_complete_7_16$global_cog_age16)[,1]

hist(df_complete_7_16$z_global_cog_age16)

# Reference levels
library(dplyr)
DescTools::Desc(df_complete_7_16$tv_age16)
## ────────────────────────────────────────────────────────────────────────────── 
## df_complete_7_16$tv_age16 (factor)
## 
##   length      n    NAs unique levels  dupes
##    9'082  9'082      0      5      5      y
##          100.0%   0.0%                     
## 
##          level   freq   perc  cumfreq  cumperc
## 1        Often  5'736  63.2%    5'736    63.2%
## 2    Sometimes  2'546  28.0%    8'282    91.2%
## 3  Hardly ever    426   4.7%    8'708    95.9%
## 4           NA    324   3.6%    9'032    99.4%
## 5    No chance     50   0.6%    9'082   100.0%

DescTools::Desc(df_complete_7_16$child_read_age16)
## ────────────────────────────────────────────────────────────────────────────── 
## df_complete_7_16$child_read_age16 (factor)
## 
##   length      n    NAs unique levels  dupes
##    9'082  9'082      0      5      5      y
##          100.0%   0.0%                     
## 
##          level   freq   perc  cumfreq  cumperc
## 1    Sometimes  3'838  42.3%    3'838    42.3%
## 2        Often  2'242  24.7%    6'080    66.9%
## 3  Hardly ever  2'136  23.5%    8'216    90.5%
## 4           NA    592   6.5%    8'808    97.0%
## 5    No chance    274   3.0%    9'082   100.0%

DescTools::Desc(df_complete_7_16$play_indoor_age16)
## ────────────────────────────────────────────────────────────────────────────── 
## df_complete_7_16$play_indoor_age16 (factor)
## 
##   length      n    NAs unique levels  dupes
##    9'082  9'082      0      5      5      y
##          100.0%   0.0%                     
## 
##          level   freq   perc  cumfreq  cumperc
## 1  Hardly ever  2'716  29.9%    2'716    29.9%
## 2    Sometimes  2'655  29.2%    5'371    59.1%
## 3        Often  2'096  23.1%    7'467    82.2%
## 4    No chance    851   9.4%    8'318    91.6%
## 5           NA    764   8.4%    9'082   100.0%

DescTools::Desc(df_complete_7_16$play_outdoor_age16)
## ────────────────────────────────────────────────────────────────────────────── 
## df_complete_7_16$play_outdoor_age16 (factor)
## 
##   length      n    NAs unique levels  dupes
##    9'082  9'082      0      5      5      y
##          100.0%   0.0%                     
## 
##          level   freq   perc  cumfreq  cumperc
## 1        Often  3'176  35.0%    3'176    35.0%
## 2    Sometimes  3'009  33.1%    6'185    68.1%
## 3  Hardly ever  2'048  22.6%    8'233    90.7%
## 4           NA    585   6.4%    8'818    97.1%
## 5    No chance    264   2.9%    9'082   100.0%

df_complete_7_16$pa_age16 <- dplyr::case_when(
  
  is.na(df_complete_7_16$play_indoor_age16) &
  is.na(df_complete_7_16$play_outdoor_age16) ~ NA_character_,
  
  df_complete_7_16$play_indoor_age16 == "Most days" |
  df_complete_7_16$play_outdoor_age16 == "Most days" ~ "Most days",
  
  df_complete_7_16$play_indoor_age16 == "Sometimes" |
  df_complete_7_16$play_outdoor_age16 == "Sometimes" ~ "Sometimes",
  
  df_complete_7_16$play_indoor_age16 == "Hardly ever" &
  df_complete_7_16$play_outdoor_age16 == "Hardly ever" ~ "Hardly ever"
)

df_complete_7_16$pa_age16 <- factor(
  df_complete_7_16$pa_age16,
  levels = c("Hardly ever", "Sometimes", "Most days")
)

df_complete_7_16$tv_age16 <- factor(
  df_complete_7_16$tv_age16,
  levels = c("No chance", "Hardly ever", "Sometimes", "Often")
)

df_complete_7_16$tv_age16 <- fct_collapse(df_complete_7_16$tv_age16,
                                                "Hardly ever" = c("No chance", "Hardly ever"))


df_complete_7_16$child_read_age16 <- factor(
  df_complete_7_16$child_read_age16,
  levels = c("Hardly ever", "Sometimes", "Most days")
)


global_age16 <- lm(z_global_cog_age16 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f +
                   sc_age11 + tv_age11 + child_reading_age11 +
                   sc_age16 + smoking_age16 + alcohol_age16 +
                   child_read_age16 + tv_age16 + school_attendance_age16, data = df_complete_7_16)

tab_model(global_age16, digits = 3)
  z global cog age 16
Predictors Estimates CI p
(Intercept) -1.079 -1.485 – -0.674 <0.001
sex [Female] -0.216 -0.261 – -0.172 <0.001
sc prenatal [I] 0.400 0.190 – 0.609 <0.001
sc prenatal [II] 0.358 0.190 – 0.527 <0.001
sc prenatal [III] 0.185 0.032 – 0.339 0.018
sc prenatal [IV] 0.126 -0.037 – 0.289 0.129
sc prenatal [V] 0.013 -0.156 – 0.183 0.877
sc prenatal [Students] 0.539 -0.089 – 1.167 0.093
sc prenatal [Retired] -0.056 -1.674 – 1.562 0.946
sc prenatal [Single,no
husbnd]
0.233 0.022 – 0.444 0.031
smoking preg [5mths-no
change]
0.120 -0.090 – 0.331 0.262
smoking preg [Now
non-smoker]
0.133 -0.092 – 0.358 0.246
smoking preg [1 to 4 now] 0.023 -0.218 – 0.265 0.850
smoking preg [5 to 9 now] 0.075 -0.178 – 0.328 0.562
smoking preg [10 to 14
now]
0.094 -0.189 – 0.378 0.513
smoking preg [15 to 19
now]
0.038 -0.342 – 0.418 0.845
smoking preg [20 to 24
now]
0.265 -0.154 – 0.684 0.216
smoking preg [25 to 29
now]
0.380 -0.301 – 1.060 0.274
smoking preg [30 or more
now]
-0.003 -0.686 – 0.679 0.993
smoking_pregVariable-5mths 0.079 -0.148 – 0.306 0.496
birth weight 0.000 0.000 – 0.001 0.038
parent read level
[Occasionally]
0.040 -0.053 – 0.133 0.402
parent read level
[Weekly]
0.246 0.159 – 0.333 <0.001
sc age7 [NA, unclear] -0.424 -0.731 – -0.118 0.007
sc age7 [No male head] -0.434 -0.657 – -0.210 <0.001
sc age7 [II] -0.148 -0.307 – 0.010 0.067
sc age7 [III non-manual] -0.162 -0.332 – 0.007 0.060
sc age7 [III manual] -0.330 -0.489 – -0.170 <0.001
sc age7 [IV non-manual] -0.353 -0.585 – -0.121 0.003
sc age7 [IV manual] -0.427 -0.595 – -0.259 <0.001
sc age7 [V] -0.453 -0.643 – -0.263 <0.001
breast f [NA] 0.640 -0.484 – 1.764 0.264
breast f [Dont know] -0.120 -0.434 – 0.194 0.455
breast f [No] -0.048 -0.107 – 0.010 0.103
breast f [Over one month] 0.097 0.042 – 0.152 0.001
sc age11 [NA,unclear] -0.231 -0.391 – -0.070 0.005
sc age11 [II] 0.021 -0.127 – 0.169 0.781
sc age11 [III non manual] -0.009 -0.172 – 0.153 0.912
sc age11 [III manual] -0.093 -0.243 – 0.058 0.226
sc age11 [IV] -0.195 -0.354 – -0.036 0.016
sc age11 [V] -0.178 -0.366 – 0.009 0.062
sc age11 [No male head] -0.083 -0.275 – 0.110 0.400
tv age11 [Sometimes] 0.153 -0.006 – 0.312 0.059
tv age11 [most days] 0.248 0.101 – 0.396 0.001
child reading age11
[Sometimes]
0.347 0.188 – 0.507 <0.001
child reading age11 [Most
days]
0.542 0.385 – 0.699 <0.001
sc age16 [NA,NMH,forces] -0.267 -0.424 – -0.109 0.001
sc age16 [II] -0.111 -0.270 – 0.049 0.174
sc age16 [III non-manual] -0.108 -0.282 – 0.067 0.226
sc age16 [III manual] -0.198 -0.358 – -0.039 0.015
sc age16 [IV non-manual] -0.174 -0.446 – 0.098 0.210
sc age16 [IV manual] -0.228 -0.402 – -0.054 0.010
sc age16 [V] -0.321 -0.519 – -0.122 0.002
sc age16 [Unclear] -0.214 -0.503 – 0.074 0.145
smoking age16 [NA] -0.677 -0.928 – -0.426 <0.001
smoking age16 [Less 1 a
week]
-0.127 -0.259 – 0.005 0.058
smoking age16 [1 to 9 a
week]
-0.262 -0.354 – -0.169 <0.001
smoking age16 [10 to 19 a
week]
-0.284 -0.387 – -0.181 <0.001
smoking age16 [20 to 29 a
week]
-0.355 -0.457 – -0.253 <0.001
smoking age16 [30 to 39 a
week]
-0.355 -0.460 – -0.250 <0.001
smoking age16 [40 to 49 a
week]
-0.386 -0.496 – -0.276 <0.001
smoking age16 [50 to 59 a
week]
-0.312 -0.431 – -0.193 <0.001
smoking age16 [60 or more
a wk]
-0.355 -0.451 – -0.260 <0.001
alcohol age16 [NA] -0.157 -0.517 – 0.204 0.394
alcohol age16 [Less than
1 week]
0.429 0.325 – 0.533 <0.001
alcohol age16 [2 to 4
weeks]
0.359 0.249 – 0.469 <0.001
alcohol age16 [5 to 8
weeks]
0.302 0.172 – 0.433 <0.001
alcohol age16 [9 to 12
weeks]
0.231 0.074 – 0.387 0.004
alcohol age16 [Over 12
weeks]
0.207 0.074 – 0.340 0.002
alcohol age16 [Do not
remember]
0.213 0.097 – 0.328 <0.001
child read age16
[Sometimes]
0.208 0.161 – 0.254 <0.001
tv age16 [Sometimes] -0.004 -0.108 – 0.100 0.939
tv age16 [Often] -0.024 -0.123 – 0.075 0.632
school attendance age16 0.002 0.001 – 0.002 <0.001
Observations 5309
R2 / R2 adjusted 0.271 / 0.260
arith_age16 <- lm(z_maths_ability_age16 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f +
                   sc_age11 + tv_age11 + child_reading_age11 +
                   sc_age16 + smoking_age16 + alcohol_age16 +
                   child_read_age16 + tv_age16 + school_attendance_age16, data = df_complete_7_16)

tab_model(arith_age16, digits = 3)
  z maths ability age 16
Predictors Estimates CI p
(Intercept) -0.454 -0.881 – -0.027 0.037
sex [Female] -0.271 -0.318 – -0.224 <0.001
sc prenatal [I] 0.308 0.087 – 0.529 0.006
sc prenatal [II] 0.288 0.111 – 0.466 0.001
sc prenatal [III] 0.070 -0.091 – 0.231 0.393
sc prenatal [IV] 0.034 -0.138 – 0.205 0.701
sc prenatal [V] -0.074 -0.252 – 0.105 0.419
sc prenatal [Students] 0.380 -0.281 – 1.041 0.260
sc prenatal [Retired] -0.548 -2.251 – 1.155 0.528
sc prenatal [Single,no
husbnd]
0.151 -0.071 – 0.373 0.184
smoking preg [5mths-no
change]
0.178 -0.044 – 0.400 0.115
smoking preg [Now
non-smoker]
0.134 -0.102 – 0.371 0.266
smoking preg [1 to 4 now] 0.086 -0.168 – 0.341 0.505
smoking preg [5 to 9 now] 0.138 -0.129 – 0.404 0.311
smoking preg [10 to 14
now]
0.093 -0.205 – 0.391 0.541
smoking preg [15 to 19
now]
0.095 -0.305 – 0.495 0.642
smoking preg [20 to 24
now]
0.250 -0.192 – 0.692 0.267
smoking preg [25 to 29
now]
0.400 -0.316 – 1.117 0.273
smoking preg [30 or more
now]
0.015 -0.703 – 0.734 0.966
smoking_pregVariable-5mths 0.103 -0.136 – 0.343 0.398
birth weight 0.000 -0.000 – 0.001 0.084
parent read level
[Occasionally]
-0.002 -0.100 – 0.096 0.964
parent read level
[Weekly]
0.180 0.088 – 0.272 <0.001
sc age7 [NA, unclear] -0.348 -0.671 – -0.025 0.035
sc age7 [No male head] -0.342 -0.577 – -0.106 0.004
sc age7 [II] -0.153 -0.320 – 0.014 0.072
sc age7 [III non-manual] -0.192 -0.371 – -0.014 0.034
sc age7 [III manual] -0.310 -0.478 – -0.142 <0.001
sc age7 [IV non-manual] -0.377 -0.622 – -0.133 0.002
sc age7 [IV manual] -0.391 -0.568 – -0.214 <0.001
sc age7 [V] -0.389 -0.589 – -0.189 <0.001
breast f [NA] 0.267 -0.917 – 1.451 0.659
breast f [Dont know] -0.289 -0.619 – 0.042 0.087
breast f [No] -0.024 -0.086 – 0.037 0.437
breast f [Over one month] 0.104 0.046 – 0.161 <0.001
sc age11 [NA,unclear] -0.222 -0.391 – -0.053 0.010
sc age11 [II] 0.043 -0.113 – 0.198 0.591
sc age11 [III non manual] 0.039 -0.133 – 0.210 0.659
sc age11 [III manual] -0.075 -0.234 – 0.083 0.353
sc age11 [IV] -0.154 -0.321 – 0.013 0.071
sc age11 [V] -0.121 -0.318 – 0.076 0.229
sc age11 [No male head] -0.124 -0.327 – 0.078 0.228
tv age11 [Sometimes] 0.105 -0.063 – 0.272 0.219
tv age11 [most days] 0.167 0.011 – 0.322 0.036
child reading age11
[Sometimes]
0.240 0.072 – 0.407 0.005
child reading age11 [Most
days]
0.377 0.212 – 0.543 <0.001
sc age16 [NA,NMH,forces] -0.355 -0.521 – -0.190 <0.001
sc age16 [II] -0.198 -0.366 – -0.030 0.021
sc age16 [III non-manual] -0.195 -0.378 – -0.011 0.038
sc age16 [III manual] -0.278 -0.446 – -0.110 0.001
sc age16 [IV non-manual] -0.233 -0.520 – 0.053 0.111
sc age16 [IV manual] -0.290 -0.473 – -0.107 0.002
sc age16 [V] -0.370 -0.579 – -0.161 0.001
sc age16 [Unclear] -0.282 -0.586 – 0.022 0.069
smoking age16 [NA] -0.544 -0.808 – -0.280 <0.001
smoking age16 [Less 1 a
week]
-0.137 -0.276 – 0.002 0.053
smoking age16 [1 to 9 a
week]
-0.245 -0.343 – -0.148 <0.001
smoking age16 [10 to 19 a
week]
-0.258 -0.367 – -0.150 <0.001
smoking age16 [20 to 29 a
week]
-0.348 -0.455 – -0.240 <0.001
smoking age16 [30 to 39 a
week]
-0.335 -0.446 – -0.225 <0.001
smoking age16 [40 to 49 a
week]
-0.452 -0.568 – -0.336 <0.001
smoking age16 [50 to 59 a
week]
-0.354 -0.479 – -0.228 <0.001
smoking age16 [60 or more
a wk]
-0.422 -0.522 – -0.321 <0.001
alcohol age16 [NA] -0.231 -0.610 – 0.149 0.233
alcohol age16 [Less than
1 week]
0.211 0.101 – 0.320 <0.001
alcohol age16 [2 to 4
weeks]
0.151 0.035 – 0.267 0.011
alcohol age16 [5 to 8
weeks]
0.154 0.017 – 0.292 0.028
alcohol age16 [9 to 12
weeks]
0.090 -0.075 – 0.255 0.285
alcohol age16 [Over 12
weeks]
0.066 -0.074 – 0.206 0.355
alcohol age16 [Do not
remember]
0.079 -0.042 – 0.201 0.201
child read age16
[Sometimes]
0.139 0.090 – 0.188 <0.001
tv age16 [Sometimes] -0.022 -0.131 – 0.087 0.688
tv age16 [Often] -0.050 -0.154 – 0.054 0.345
school attendance age16 0.002 0.001 – 0.003 <0.001
Observations 5309
R2 / R2 adjusted 0.221 / 0.210
reading_age16 <- lm(z_reading_ability_age16 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f +
                   sc_age11 + tv_age11 + child_reading_age11 +
                   sc_age16 + smoking_age16 + alcohol_age16 +
                   child_read_age16 + tv_age16 + school_attendance_age16, data = df_complete_7_16)

tab_model(reading_age16, digits = 3)
  z reading ability age 16
Predictors Estimates CI p
(Intercept) -1.505 -1.919 – -1.092 <0.001
sex [Female] -0.122 -0.168 – -0.076 <0.001
sc prenatal [I] 0.417 0.203 – 0.632 <0.001
sc prenatal [II] 0.362 0.191 – 0.534 <0.001
sc prenatal [III] 0.266 0.110 – 0.423 0.001
sc prenatal [IV] 0.195 0.029 – 0.362 0.021
sc prenatal [V] 0.098 -0.075 – 0.271 0.267
sc prenatal [Students] 0.598 -0.043 – 1.239 0.067
sc prenatal [Retired] 0.447 -1.205 – 2.098 0.596
sc prenatal [Single,no
husbnd]
0.272 0.056 – 0.487 0.013
smoking preg [5mths-no
change]
0.041 -0.174 – 0.256 0.710
smoking preg [Now
non-smoker]
0.108 -0.122 – 0.337 0.358
smoking preg [1 to 4 now] -0.044 -0.291 – 0.202 0.725
smoking preg [5 to 9 now] -0.002 -0.260 – 0.256 0.989
smoking preg [10 to 14
now]
0.078 -0.211 – 0.368 0.595
smoking preg [15 to 19
now]
-0.026 -0.414 – 0.362 0.895
smoking preg [20 to 24
now]
0.231 -0.197 – 0.659 0.290
smoking preg [25 to 29
now]
0.289 -0.405 – 0.984 0.414
smoking preg [30 or more
now]
-0.021 -0.718 – 0.675 0.952
smoking_pregVariable-5mths 0.040 -0.192 – 0.272 0.736
birth weight 0.000 -0.000 – 0.001 0.056
parent read level
[Occasionally]
0.074 -0.020 – 0.169 0.124
parent read level
[Weekly]
0.267 0.177 – 0.356 <0.001
sc age7 [NA, unclear] -0.423 -0.736 – -0.110 0.008
sc age7 [No male head] -0.446 -0.674 – -0.218 <0.001
sc age7 [II] -0.116 -0.278 – 0.046 0.160
sc age7 [III non-manual] -0.102 -0.275 – 0.071 0.246
sc age7 [III manual] -0.289 -0.452 – -0.126 0.001
sc age7 [IV non-manual] -0.264 -0.501 – -0.027 0.029
sc age7 [IV manual] -0.385 -0.556 – -0.213 <0.001
sc age7 [V] -0.433 -0.627 – -0.239 <0.001
breast f [NA] 0.895 -0.252 – 2.043 0.126
breast f [Dont know] 0.071 -0.250 – 0.392 0.664
breast f [No] -0.064 -0.123 – -0.004 0.036
breast f [Over one month] 0.073 0.017 – 0.129 0.011
sc age11 [NA,unclear] -0.197 -0.361 – -0.033 0.018
sc age11 [II] -0.005 -0.155 – 0.146 0.952
sc age11 [III non manual] -0.055 -0.221 – 0.111 0.514
sc age11 [III manual] -0.094 -0.247 – 0.060 0.233
sc age11 [IV] -0.200 -0.362 – -0.038 0.015
sc age11 [V] -0.203 -0.394 – -0.011 0.038
sc age11 [No male head] -0.025 -0.221 – 0.171 0.800
tv age11 [Sometimes] 0.173 0.010 – 0.335 0.037
tv age11 [most days] 0.284 0.133 – 0.435 <0.001
child reading age11
[Sometimes]
0.391 0.228 – 0.553 <0.001
child reading age11 [Most
days]
0.606 0.446 – 0.767 <0.001
sc age16 [NA,NMH,forces] -0.129 -0.289 – 0.032 0.117
sc age16 [II] -0.003 -0.166 – 0.160 0.970
sc age16 [III non-manual] -0.001 -0.179 – 0.177 0.991
sc age16 [III manual] -0.082 -0.245 – 0.081 0.323
sc age16 [IV non-manual] -0.082 -0.360 – 0.195 0.561
sc age16 [IV manual] -0.123 -0.301 – 0.054 0.173
sc age16 [V] -0.212 -0.415 – -0.009 0.040
sc age16 [Unclear] -0.107 -0.402 – 0.187 0.475
smoking age16 [NA] -0.685 -0.941 – -0.429 <0.001
smoking age16 [Less 1 a
week]
-0.094 -0.229 – 0.040 0.170
smoking age16 [1 to 9 a
week]
-0.230 -0.324 – -0.135 <0.001
smoking age16 [10 to 19 a
week]
-0.257 -0.362 – -0.152 <0.001
smoking age16 [20 to 29 a
week]
-0.297 -0.401 – -0.193 <0.001
smoking age16 [30 to 39 a
week]
-0.309 -0.416 – -0.202 <0.001
smoking age16 [40 to 49 a
week]
-0.249 -0.361 – -0.136 <0.001
smoking age16 [50 to 59 a
week]
-0.213 -0.334 – -0.091 0.001
smoking age16 [60 or more
a wk]
-0.223 -0.321 – -0.126 <0.001
alcohol age16 [NA] -0.054 -0.422 – 0.314 0.774
alcohol age16 [Less than
1 week]
0.568 0.462 – 0.674 <0.001
alcohol age16 [2 to 4
weeks]
0.500 0.388 – 0.613 <0.001
alcohol age16 [5 to 8
weeks]
0.395 0.261 – 0.528 <0.001
alcohol age16 [9 to 12
weeks]
0.329 0.169 – 0.489 <0.001
alcohol age16 [Over 12
weeks]
0.310 0.174 – 0.445 <0.001
alcohol age16 [Do not
remember]
0.307 0.189 – 0.425 <0.001
child read age16
[Sometimes]
0.239 0.191 – 0.286 <0.001
tv age16 [Sometimes] 0.015 -0.091 – 0.121 0.780
tv age16 [Often] 0.006 -0.095 – 0.107 0.903
school attendance age16 0.001 0.000 – 0.002 0.002
Observations 5309
R2 / R2 adjusted 0.237 / 0.227
extract_age7_11_16_exposures <- function(model, outcome_label) {

  coef_df <- tidy(model, conf.int = TRUE) %>%
    filter(term %in% c(
      # Age 7
      "parent_read_levelOccasionally",
      "parent_read_levelWeekly",
      
      # Age 11
      "child_reading_age11Sometimes",
      "child_reading_age11Most days",
      "tv_age11Sometimes",
      "tv_age11most days",
      
      # Age 16
      "child_read_age16Sometimes",
      "tv_age16Sometimes"
    )) %>%
    mutate(
      exposure = case_when(
        term == "parent_read_levelOccasionally" ~ "Reading: Sometimes",
        term == "parent_read_levelWeekly"       ~ "Reading: Most days",
        
        term == "child_reading_age11Sometimes"  ~ "Reading: Sometimes",
        term == "child_reading_age11Most days"  ~ "Reading: Most days",
        
        term == "child_read_age16Sometimes"     ~ "Reading: Sometimes",
        
        term == "tv_age11Sometimes"             ~ "TV: Sometimes",
        term == "tv_age11most days"             ~ "TV: Most days",
        term == "tv_age16Sometimes"             ~ "TV: Sometimes"
      ),
      domain = case_when(
        grepl("^tv_", term) ~ "Passive SB (TV)",
        TRUE                ~ "Cognitively engaged SB (Reading)"
      ),
      age = case_when(
        grepl("^parent_", term)         ~ "Age 7",
        grepl("age11", term)            ~ "Age 11",
        grepl("age16", term)            ~ "Age 16"
      ),
      outcome = outcome_label
    ) %>%
    dplyr::select(outcome, domain, age, exposure, estimate, conf.low, conf.high)

  # referências
  ref_df <- tibble(
    outcome  = outcome_label,
    domain   = c(
      "Cognitively engaged SB (Reading)",
      "Cognitively engaged SB (Reading)",
      "Cognitively engaged SB (Reading)",
      "Passive SB (TV)",
      "Passive SB (TV)"
    ),
    age      = c("Age 7", "Age 11", "Age 16", "Age 11", "Age 16"),
    exposure = c(
      "Reading: Hardly ever",
      "Reading: Hardly ever",
      "Reading: Hardly ever",
      "TV: Hardly ever",
      "TV: Hardly ever"
    ),
    estimate = 0,
    conf.low = 0,
    conf.high = 0
  )

  bind_rows(ref_df, coef_df)
}

plot_df <- bind_rows(
  extract_age7_11_16_exposures(global_age16,  "Global cognition"),
  extract_age7_11_16_exposures(arith_age16,   "Maths ability"),
  extract_age7_11_16_exposures(reading_age16, "Reading ability")
)


plot_df$dose <- factor(
  gsub(".*: ", "", plot_df$exposure),
  levels = c("Most days", "Sometimes", "Hardly ever"),
  ordered = TRUE
)

plot_df$age <- factor(plot_df$age, 
  levels = c("Age 7", "Age 11", "Age 16"),
  ordered = TRUE
)

plot_age16 <- ggplot(plot_df,
       aes(x = estimate,
           y = outcome,
           colour = age,
           shape = dose)) +

  geom_vline(xintercept = 0,
             linetype = "dashed",
             color = "grey50") +

  geom_pointrange(
    aes(xmin = conf.low, xmax = conf.high),
    position = position_dodge(width = 0.6),
    size = 0.9
  ) +

  facet_wrap(~ domain, ncol = 2, scales = "free_y") +

  scale_color_jama() +

  labs(
    x = "Standardized difference in cognitive score (z)",
    y = NULL,
    colour = "Age",
    shape  = "Exposure frequency"
  ) +

  theme_minimal(base_size = 13) +
  theme(
    panel.grid.minor = element_blank(),
    legend.position = "bottom",
    strip.text = element_text(face = "bold")
  )

plot_age16

COGNITIVE DECLINE

library(dplyr)
library(tidyr)
library(lme4)
library(emmeans)
library(ggplot2)
library(ggsci)
library(broom)
library(broom.mixed)

df_long <- df_complete_7_16 %>%
  dplyr::select(
    ncdsid,
    z_global_cog_age7,
    z_global_cog_age11,
    z_global_cog_age16,
    parent_read_level,
    sex,
    sc_prenatal,
    smoking_preg,
    birth_weight,
    sc_age7,
    breast_f,
    sc_age11,
    sc_age16,
    smoking_age16,
    alcohol_age16,
    school_attendance_age16
  ) %>%
  pivot_longer(
    cols = starts_with("z_global_cog"),
    names_to = "wave",
    values_to = "z_global_cog"
  ) %>%
  mutate(
    age = case_when(
      wave == "z_global_cog_age7"  ~ 7,
      wave == "z_global_cog_age11" ~ 11,
      wave == "z_global_cog_age16" ~ 16
    ),
    id_num = as.numeric(as.factor(ncdsid)),
    parent_read_level = factor(
      parent_read_level,
      levels = c("Hardly ever", "Occasionally", "Weekly")
    )
  ) %>%
  filter(!is.na(z_global_cog), !is.na(parent_read_level))

mixed <- lmer(
  z_global_cog ~ age * parent_read_level +
    sex + sc_prenatal + smoking_preg +
    birth_weight + sc_age7 + breast_f +
    sc_age11 + sc_age16 +
    smoking_age16 + alcohol_age16 +
    school_attendance_age16 +
    (1 | id_num),
  data = df_long,
  REML = TRUE
)

tab_model(mixed, digits = 3, digits.p = 3)
  z global cog
Predictors Estimates CI p
(Intercept) 0.087 -0.175 – 0.348 0.516
age -0.028 -0.035 – -0.021 <0.001
parent read level
[Occasionally]
0.059 -0.057 – 0.174 0.317
parent read level
[Weekly]
-0.137 -0.243 – -0.030 0.012
sex [Female] -0.014 -0.047 – 0.019 0.413
sc prenatal [I] 0.341 0.192 – 0.489 <0.001
sc prenatal [II] 0.269 0.145 – 0.393 <0.001
sc prenatal [III] 0.130 0.017 – 0.242 0.024
sc prenatal [IV] 0.067 -0.053 – 0.188 0.272
sc prenatal [V] -0.043 -0.168 – 0.082 0.498
sc prenatal [Students] 0.445 -0.070 – 0.960 0.091
sc prenatal [Retired] 0.415 -0.653 – 1.483 0.446
sc prenatal [Single,no
husbnd]
-0.025 -0.180 – 0.129 0.747
smoking preg [5mths-no
change]
-0.012 -0.175 – 0.151 0.885
smoking preg [Now
non-smoker]
0.024 -0.148 – 0.197 0.782
smoking preg [1 to 4 now] -0.056 -0.241 – 0.129 0.553
smoking preg [5 to 9 now] -0.062 -0.254 – 0.130 0.528
smoking preg [10 to 14
now]
-0.034 -0.252 – 0.185 0.763
smoking preg [15 to 19
now]
-0.195 -0.465 – 0.075 0.156
smoking preg [20 to 24
now]
-0.371 -0.692 – -0.050 0.024
smoking preg [25 to 29
now]
0.368 -0.156 – 0.893 0.169
smoking preg [30 or more
now]
-0.423 -0.976 – 0.130 0.134
smoking_pregVariable-5mths -0.072 -0.247 – 0.103 0.419
birth weight 0.000 0.000 – 0.001 <0.001
sc age7 [NA, unclear] -0.291 -0.512 – -0.069 0.010
sc age7 [No male head] -0.332 -0.494 – -0.170 <0.001
sc age7 [II] -0.107 -0.218 – 0.003 0.057
sc age7 [III non-manual] -0.090 -0.208 – 0.029 0.137
sc age7 [III manual] -0.303 -0.414 – -0.191 <0.001
sc age7 [IV non-manual] -0.256 -0.422 – -0.090 0.003
sc age7 [IV manual] -0.349 -0.468 – -0.230 <0.001
sc age7 [V] -0.471 -0.607 – -0.336 <0.001
breast f [NA] 0.464 -0.400 – 1.327 0.293
breast f [Dont know] -0.166 -0.403 – 0.071 0.169
breast f [No] -0.069 -0.112 – -0.025 0.002
breast f [Over one month] 0.097 0.056 – 0.138 <0.001
sc age11 [NA,unclear] -0.273 -0.390 – -0.156 <0.001
sc age11 [II] -0.055 -0.162 – 0.053 0.321
sc age11 [III non manual] -0.080 -0.199 – 0.038 0.184
sc age11 [III manual] -0.207 -0.317 – -0.096 <0.001
sc age11 [IV] -0.271 -0.388 – -0.154 <0.001
sc age11 [V] -0.289 -0.428 – -0.150 <0.001
sc age11 [No male head] -0.247 -0.389 – -0.105 0.001
sc age16 [NA,NMH,forces] -0.185 -0.296 – -0.074 0.001
sc age16 [II] -0.087 -0.200 – 0.025 0.129
sc age16 [III non-manual] -0.164 -0.288 – -0.040 0.009
sc age16 [III manual] -0.143 -0.256 – -0.030 0.013
sc age16 [IV non-manual] -0.140 -0.333 – 0.054 0.157
sc age16 [IV manual] -0.174 -0.297 – -0.050 0.006
sc age16 [V] -0.285 -0.430 – -0.141 <0.001
sc age16 [Unclear] 0.015 -0.188 – 0.218 0.884
smoking age16 [NA] -0.405 -0.582 – -0.228 <0.001
smoking age16 [Less 1 a
week]
-0.098 -0.191 – -0.004 0.041
smoking age16 [1 to 9 a
week]
-0.208 -0.279 – -0.138 <0.001
smoking age16 [10 to 19 a
week]
-0.270 -0.348 – -0.191 <0.001
smoking age16 [20 to 29 a
week]
-0.289 -0.366 – -0.212 <0.001
smoking age16 [30 to 39 a
week]
-0.240 -0.320 – -0.160 <0.001
smoking age16 [40 to 49 a
week]
-0.302 -0.387 – -0.217 <0.001
smoking age16 [50 to 59 a
week]
-0.190 -0.284 – -0.095 <0.001
smoking age16 [60 or more
a wk]
-0.202 -0.275 – -0.129 <0.001
alcohol age16 [NA] 0.126 -0.083 – 0.336 0.236
alcohol age16 [Less than
1 week]
0.604 0.528 – 0.680 <0.001
alcohol age16 [2 to 4
weeks]
0.535 0.454 – 0.615 <0.001
alcohol age16 [5 to 8
weeks]
0.476 0.381 – 0.572 <0.001
alcohol age16 [9 to 12
weeks]
0.418 0.306 – 0.531 <0.001
alcohol age16 [Over 12
weeks]
0.370 0.274 – 0.466 <0.001
alcohol age16 [Do not
remember]
0.322 0.238 – 0.406 <0.001
school attendance age16 0.001 0.001 – 0.002 <0.001
age × parent read level
[Occasionally]
0.001 -0.007 – 0.009 0.873
age × parent read level
[Weekly]
0.031 0.024 – 0.038 <0.001
Random Effects
σ2 0.31
τ00 id_num 0.47
ICC 0.60
N id_num 8473
Observations 25419
Marginal R2 / Conditional R2 0.189 / 0.680
emm <- emmeans(
  mixed,
  ~ parent_read_level | age,
  at = list(age = c(7, 11, 16)),
  nuisance = c(
    "sex", "sc_prenatal", "smoking_preg", "sc_age7", "sc_age11", "sc_age16",
    "smoking_age16", "alcohol_age16", "school_attendance_age16", "breast_f"
  )
)

emm_df <- as.data.frame(emm)

plot_trajectory_7_16 <- ggplot(
  emm_df,
  aes(
    x = age,
    y = emmean,
    colour = parent_read_level,
    fill   = parent_read_level
  )
) +
  geom_line(size = 1.1) +
  geom_hline(yintercept = 0, linetype = "dashed", colour = "grey50") +
  scale_x_continuous(breaks = c(7, 11, 16)) +
  labs(
    x = "Age (years)",
    y = "Predicted global cognitive score (z)",
    colour = "Parental reading frequency",
    fill   = "Parental reading frequency"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    legend.position = "bottom",
    panel.grid.minor = element_blank()
  ) +
  scale_color_bmj() +
  scale_fill_bmj()

int_term <- tidy(
  mixed,
  effects = "fixed",
  conf.int = TRUE
) %>%
  dplyr::filter(grepl("^age:parent_read_levelWeekly", term)) %>%
  mutate(
    p.value = 2 * (1 - pnorm(abs(statistic)))
  )

int_term
## # A tibble: 1 × 8
##   effect term           estimate std.error statistic conf.low conf.high  p.value
##   <chr>  <chr>             <dbl>     <dbl>     <dbl>    <dbl>     <dbl>    <dbl>
## 1 fixed  age:parent_re…   0.0309   0.00376      8.22   0.0236    0.0383 2.22e-16
int_label <- with(int_term, {
  paste0(
    "Interaction (age × weekly parental reading (age 7):\n",
    "β = ", sprintf("%.4f", estimate),
    " (95% CI ", sprintf("%.4f", conf.low),
    ", ", sprintf("%.4f", conf.high), "), ",
    "p = ", ifelse(p.value < 0.001, "<0.001", sprintf("%.3f", p.value))
  )
})

int_label
## [1] "Interaction (age × weekly parental reading (age 7):\nβ = 0.0309 (95% CI 0.0236, 0.0383), p = <0.001"
plot_trajectory_7_16 <- plot_trajectory_7_16 +
  annotate(
    "text",
    x = 7.2,
    y = min(emm_df$emmean) + 0.05,
    label = int_label,
    hjust = 0,
    size = 4.2
  )

plot_trajectory_7_16

GROUP-BASED TRAJECTORY MODELLING

library(lcmm)

df_read_long <- df_complete_7_16 %>%
  mutate(
    reading_age7  = parent_read_level,
    reading_age11 = child_reading_age11,
    reading_age16 = child_read_age16
  ) %>%
  mutate(
    read7 = as.numeric(
      factor(
        reading_age7,
        levels = c("Hardly ever", "Occasionally", "Weekly")
      )
    ) - 1,
    read11 = as.numeric(
      factor(
        reading_age11,
        levels = c("Hardly ever", "Sometimes", "Most days")
      )
    ) - 1,
    read16 = as.numeric(
      factor(
        reading_age16,
        levels = c("Hardly ever", "Sometimes", "Most days")
      )
    ) - 1
  ) %>%
  dplyr::select(
    ncdsid,
    read7,
    read11,
    read16
  ) %>%
  pivot_longer(
    cols = starts_with("read"),
    names_to = "wave",
    values_to = "reading_num"
  ) %>%
  mutate(
    age = case_when(
      wave == "read7"  ~ 7,
      wave == "read11" ~ 11,
      wave == "read16" ~ 16
    ),
    id_num = as.numeric(as.factor(ncdsid))
  ) %>%
  filter(!is.na(reading_num))

m1 <- hlme(
  fixed   = reading_num ~ age,
  random  = ~ 1,
  subject = "id_num",
  ng      = 1,
  data    = df_read_long
)

summary(m1)
## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = reading_num ~ age, random = ~1, subject = "id_num", 
##     ng = 1, data = df_read_long)
##  
## Statistical Model: 
##      Dataset: df_read_long 
##      Number of subjects: 9080 
##      Number of observations: 23579 
##      Number of latent classes: 1 
##      Number of parameters: 4  
##  
## Iteration process: 
##      Convergence criteria satisfied 
##      Number of iterations:  34 
##      Convergence criteria: parameters= 5e-13 
##                          : likelihood= 4.9e-07 
##                          : second derivatives= 7.4e-06 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -21067.55  
##      AIC: 42143.11  
##      BIC: 42171.56  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the longitudinal model:
## 
##               coef      Se    Wald p-value
## intercept  2.55734 0.01250 204.535 0.00000
## age       -0.10674 0.00110 -96.814 0.00000
## 
## 
## Variance-covariance matrix of the random-effects:
##           intercept
## intercept         0
## 
##                              coef      Se
## Residual standard error:  0.59129 0.00272
m2 <- hlme(
  fixed   = reading_num ~ age,
  mixture = ~ age,
  random  = ~ 1,
  subject = "id_num",
  ng      = 2,
  data    = df_read_long,
  nwg     = TRUE,
  B       = m1
)

summary(m2)
## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = reading_num ~ age, mixture = ~age, random = ~1, 
##     subject = "id_num", ng = 2, nwg = TRUE, data = df_read_long)
##  
## Statistical Model: 
##      Dataset: df_read_long 
##      Number of subjects: 9080 
##      Number of observations: 23579 
##      Number of latent classes: 2 
##      Number of parameters: 8  
##  
## Iteration process: 
##      Convergence criteria satisfied 
##      Number of iterations:  206 
##      Convergence criteria: parameters= 4.8e-10 
##                          : likelihood= 1.6e-06 
##                          : second derivatives= 9e-07 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -20627.31  
##      AIC: 41270.62  
##      BIC: 41327.53  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the class-membership model:
## (the class of reference is the last class) 
## 
##                      coef       Se     Wald p-value
## intercept class1 -2.21667  0.05951  -37.249 0.00000
## 
## Fixed effects in the longitudinal model:
## 
##                      coef       Se     Wald p-value
## intercept class1  0.55147  0.05724    9.635 0.00000
## intercept class2  2.77983  0.01391  199.814 0.00000
## age class1        0.02617  0.00484    5.404 0.00000
## age class2       -0.12158  0.00117 -104.255 0.00000
## 
## 
## Variance-covariance matrix of the random-effects:
##           intercept
## intercept         0
## 
##                                     coef       Se
## Proportional coefficient class1  0.95921 11.73219
## Residual standard error:         0.53844  0.00279
postprob(m2)
##  
## Posterior classification: 
##   class1  class2
## N 660.00 8420.00
## %   7.27   92.73
##  
## Posterior classification table: 
##      --> mean of posterior probabilities in each class 
##         prob1  prob2
## class1 0.8537 0.1463
## class2 0.0390 0.9610
##  
## Posterior probabilities above a threshold (%): 
##          class1 class2
## prob>0.7  98.79  95.63
## prob>0.8  83.64  92.66
## prob>0.9  33.03  91.69
## 
age_grid <- data.frame(
  age = seq(7, 16, by = 0.1)
)

pred <- predictY(
  m2,
  newdata = age_grid,
  var.time = "age",
  draws = TRUE,
  ndraws = 1000
)

str(pred)
## List of 2
##  $ pred : num [1:91, 1:6] 0.735 0.737 0.74 0.743 0.745 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:6] "Ypred_class1" "Ypred_class2" "lower.Ypred_class1" "lower.Ypred_class2" ...
##  $ times:'data.frame':   91 obs. of  1 variable:
##   ..$ age: num [1:91] 7 7.1 7.2 7.3 7.4 7.5 7.6 7.7 7.8 7.9 ...
##  - attr(*, "class")= chr "predictY"
traj_df <- bind_rows(

  data.frame(
    age   = pred$times$age,
    mean  = pred$pred[, "Ypred_class1"],
    lower = pred$pred[, "lower.Ypred_class1"],
    upper = pred$pred[, "upper.Ypred_class1"],
    class = "Class 1"
  ),

  data.frame(
    age   = pred$times$age,
    mean  = pred$pred[, "Ypred_class2"],
    lower = pred$pred[, "lower.Ypred_class2"],
    upper = pred$pred[, "upper.Ypred_class2"],
    class = "Class 2"
  )
)

traj_df$class <- factor(
  traj_df$class,
  levels = c("Class 1", "Class 2"),
  labels = c(
    "High initial / rapidly decreasing",
    "Moderate initial / slowly increasing"
  )
)

fig_traj_reading <- ggplot(traj_df, aes(x = age, y = mean, colour = class, fill = class)) +

  geom_ribbon(
    aes(ymin = lower, ymax = upper),
    alpha = 0.20,
    colour = NA
  ) +

  geom_line(size = 1.2) +

  scale_x_continuous(
    breaks = c(7, 11, 16)
  ) +

  scale_y_continuous(
    breaks = c(0, 1, 2),
    labels = c("Low", "Moderate", "High")
  ) +

  labs(
    x = "Age (years)",
    y = "Reading frequency",
    colour = NULL,
    fill   = NULL
  ) +

  theme_classic(base_size = 14) +
  theme(
    legend.position = "top",
    axis.title = element_text(face = "bold")
  ) +
  coord_cartesian(ylim = c(0,2)) +
  scale_color_bmj() +
  scale_fill_bmj()

fig_traj_reading

GBTM + COGNITIVE DECLINE

library(lme4)
library(ggeffects)
library(ggplot2)
library(ggsci)

pp <- postprob(m2)
##  
## Posterior classification: 
##   class1  class2
## N 660.00 8420.00
## %   7.27   92.73
##  
## Posterior classification table: 
##      --> mean of posterior probabilities in each class 
##         prob1  prob2
## class1 0.8537 0.1463
## class2 0.0390 0.9610
##  
## Posterior probabilities above a threshold (%): 
##          class1 class2
## prob>0.7  98.79  95.63
## prob>0.8  83.64  92.66
## prob>0.9  33.03  91.69
## 
str(pp)
## List of 3
##  $ : num [1:2, 1:2] 660 7.27 8420 92.73
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:2] "N" "%"
##   .. ..$ : chr [1:2] "class1" "class2"
##  $ : num [1:2, 1:2] 0.854 0.039 0.146 0.961
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:2] "class1" "class2"
##   .. ..$ : chr [1:2] "prob1" "prob2"
##  $ : num [1:3, 1:2] 98.8 83.6 33 95.6 92.7 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:3] "prob>0.7" "prob>0.8" "prob>0.9"
##   .. ..$ : chr [1:2] "class1" "class2"
class_df <- m2$pprob %>%
  mutate(
    read_class = factor(
      class,
      labels = c(
        "High initial / rapidly decreasing",
        "Moderate initial / slowly increasing"
      )
    )
  ) %>%
  dplyr::select(id_num, read_class)

table(class_df$read_class)
## 
##    High initial / rapidly decreasing Moderate initial / slowly increasing 
##                                  660                                 8420
df_cog <- df_long %>%
  left_join(class_df, by = "id_num")

mixed_cog <- lmer(
  z_global_cog ~ age * read_class + 
                sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f +
                   sc_age11 +sc_age16 + smoking_age16 + alcohol_age16 +
                   school_attendance_age16 + (1 | ncdsid), data = df_cog
)
tab_model(mixed_cog, digits = 3)
  z global cog
Predictors Estimates CI p
(Intercept) 0.092 -0.169 – 0.352 0.491
age -0.028 -0.035 – -0.021 <0.001
read class [Moderate
initial / slowly
increasing]
0.226 -0.041 – 0.492 0.097
sex [Female] -0.014 -0.047 – 0.019 0.406
sc prenatal [I] 0.343 0.194 – 0.491 <0.001
sc prenatal [II] 0.270 0.146 – 0.394 <0.001
sc prenatal [III] 0.132 0.020 – 0.245 0.021
sc prenatal [IV] 0.069 -0.052 – 0.189 0.263
sc prenatal [V] -0.037 -0.162 – 0.087 0.557
sc prenatal [Students] 0.447 -0.068 – 0.962 0.089
sc prenatal [Retired] 0.413 -0.654 – 1.480 0.448
sc prenatal [Single,no
husbnd]
-0.024 -0.179 – 0.130 0.757
smoking preg [5mths-no
change]
-0.021 -0.184 – 0.142 0.800
smoking preg [Now
non-smoker]
0.015 -0.158 – 0.187 0.867
smoking preg [1 to 4 now] -0.067 -0.253 – 0.118 0.475
smoking preg [5 to 9 now] -0.071 -0.263 – 0.121 0.468
smoking preg [10 to 14
now]
-0.045 -0.264 – 0.173 0.685
smoking preg [15 to 19
now]
-0.189 -0.458 – 0.081 0.170
smoking preg [20 to 24
now]
-0.384 -0.705 – -0.063 0.019
smoking preg [25 to 29
now]
0.355 -0.169 – 0.879 0.185
smoking preg [30 or more
now]
-0.434 -0.987 – 0.118 0.124
smoking_pregVariable-5mths -0.082 -0.256 – 0.093 0.358
birth weight 0.000 0.000 – 0.001 <0.001
parent read level
[Occasionally]
-0.419 -0.678 – -0.160 0.002
parent read level
[Weekly]
-0.280 -0.542 – -0.018 0.037
sc age7 [NA, unclear] -0.294 -0.516 – -0.073 0.009
sc age7 [No male head] -0.335 -0.497 – -0.173 <0.001
sc age7 [II] -0.106 -0.216 – 0.005 0.060
sc age7 [III non-manual] -0.090 -0.208 – 0.028 0.135
sc age7 [III manual] -0.302 -0.413 – -0.190 <0.001
sc age7 [IV non-manual] -0.260 -0.426 – -0.094 0.002
sc age7 [IV manual] -0.348 -0.467 – -0.230 <0.001
sc age7 [V] -0.469 -0.604 – -0.333 <0.001
breast f [NA] 0.626 -0.241 – 1.493 0.157
breast f [Dont know] -0.155 -0.391 – 0.082 0.200
breast f [No] -0.068 -0.112 – -0.025 0.002
breast f [Over one month] 0.097 0.056 – 0.138 <0.001
sc age11 [NA,unclear] -0.269 -0.385 – -0.152 <0.001
sc age11 [II] -0.056 -0.163 – 0.052 0.309
sc age11 [III non manual] -0.081 -0.200 – 0.037 0.180
sc age11 [III manual] -0.208 -0.319 – -0.098 <0.001
sc age11 [IV] -0.271 -0.387 – -0.154 <0.001
sc age11 [V] -0.293 -0.432 – -0.154 <0.001
sc age11 [No male head] -0.247 -0.389 – -0.105 0.001
sc age16 [NA,NMH,forces] -0.185 -0.295 – -0.075 0.001
sc age16 [II] -0.087 -0.200 – 0.025 0.128
sc age16 [III non-manual] -0.163 -0.287 – -0.040 0.010
sc age16 [III manual] -0.142 -0.255 – -0.029 0.014
sc age16 [IV non-manual] -0.132 -0.325 – 0.062 0.182
sc age16 [IV manual] -0.174 -0.298 – -0.051 0.006
sc age16 [V] -0.286 -0.430 – -0.141 <0.001
sc age16 [Unclear] 0.019 -0.184 – 0.222 0.856
smoking age16 [NA] -0.398 -0.574 – -0.221 <0.001
smoking age16 [Less 1 a
week]
-0.097 -0.191 – -0.004 0.041
smoking age16 [1 to 9 a
week]
-0.208 -0.278 – -0.137 <0.001
smoking age16 [10 to 19 a
week]
-0.271 -0.350 – -0.193 <0.001
smoking age16 [20 to 29 a
week]
-0.290 -0.367 – -0.213 <0.001
smoking age16 [30 to 39 a
week]
-0.237 -0.317 – -0.157 <0.001
smoking age16 [40 to 49 a
week]
-0.304 -0.389 – -0.219 <0.001
smoking age16 [50 to 59 a
week]
-0.187 -0.282 – -0.092 <0.001
smoking age16 [60 or more
a wk]
-0.203 -0.276 – -0.130 <0.001
alcohol age16 [NA] 0.144 -0.065 – 0.353 0.178
alcohol age16 [Less than
1 week]
0.605 0.529 – 0.681 <0.001
alcohol age16 [2 to 4
weeks]
0.537 0.456 – 0.617 <0.001
alcohol age16 [5 to 8
weeks]
0.476 0.381 – 0.572 <0.001
alcohol age16 [9 to 12
weeks]
0.418 0.305 – 0.530 <0.001
alcohol age16 [Over 12
weeks]
0.371 0.274 – 0.467 <0.001
alcohol age16 [Do not
remember]
0.325 0.241 – 0.409 <0.001
school attendance age16 0.001 0.001 – 0.002 <0.001
age × read class
[Moderate initial /
slowly increasing]
0.024 0.017 – 0.031 <0.001
Random Effects
σ2 0.31
τ00 ncdsid 0.47
ICC 0.60
N ncdsid 8473
Observations 25419
Marginal R2 / Conditional R2 0.188 / 0.676
pred_cog <- ggpredict(
  mixed_cog,
  terms = c("age [7:16]", "read_class")
)

fig2 <- ggplot(
  pred_cog,
  aes(
    x = x,
    y = predicted,
    colour = group,
    fill = group
  )
)  +
  geom_line(size = 1.2) +
  geom_hline(yintercept = 0, linetype = "dashed", colour = "grey50") +
  scale_color_jama() +
  scale_fill_jama() +
  scale_x_continuous(breaks = c(7, 11, 16)) +
  labs(
    x = "Age (years)",
    y = "Global cognitive function (z-score)",
    colour = "Reading trajectory",
    fill   = "Reading trajectory"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    legend.position = "top",
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank()
  )

int_term <- tidy(
  mixed_cog,
  effects = "fixed",
  conf.int = TRUE
) %>%
  dplyr::filter(grepl("^age:read_class", term)) %>%
  mutate(
    p.value = 2 * (1 - pnorm(abs(statistic)))
  )

int_term
## # A tibble: 1 × 8
##   effect term           estimate std.error statistic conf.low conf.high  p.value
##   <chr>  <chr>             <dbl>     <dbl>     <dbl>    <dbl>     <dbl>    <dbl>
## 1 fixed  age:read_clas…   0.0237   0.00364      6.51   0.0166    0.0308 7.40e-11
int_label <- with(int_term, {
  paste0(
    "Interaction (age × reading trajectory):\n",
    "β = ", sprintf("%.4f", estimate),
    " (95% CI ", sprintf("%.4f", conf.low),
    ", ", sprintf("%.4f", conf.high), "), ",
    "p = ", ifelse(p.value < 0.001, "<0.001", sprintf("%.3f", p.value))
  )
})

int_label
## [1] "Interaction (age × reading trajectory):\nβ = 0.0237 (95% CI 0.0166, 0.0308), p = <0.001"
fig2 <- fig2 +
  annotate(
    "text",
    x = 7.5,
    y = min(pred_cog$predicted) + 0.35,
    label = int_label,
    hjust = 0,
    size = 4.2
  )

fig2

SB ACROSS ADULTHOOD

table(df_complete_7_16$tv_age11)
## 
## Hardly ever   Sometimes   most days 
##         208        1043        7536
table(df_complete_7_16$tv_age16)
## 
## Hardly ever   Sometimes       Often 
##         476        2546        5736
table(df_complete_7_16$tv_age23)
## 
## 5 TIMES A WEEK +  3-4TIMES A WEEK  1-2TIMES A WEEK 2-3TIMES LST4WKS 
##             4976             1288              728              236 
## ONCE IN LST 4WKS NOT DONE LST4WKS 
##               59              106
table(df_complete_7_16$read_book_age23)
## 
## 5 TIMES A WEEK +  3-4TIMES A WEEK  1-2TIMES A WEEK 2-3TIMES LST4WKS 
##             1564              816              941              579 
## ONCE IN LST 4WKS NOT DONE LST4WKS 
##              706             2780
table(df_complete_7_16$read_book_age46)
## 
##                           Refused                        Don't know 
##                                 0                                 0 
##                     Other missing                    Not applicable 
##                                 1                                 0 
##                         Every day Most days (4 or 5 days each week) 
##                              1407                               757 
##              At least once a week             At least once a month 
##                               772                               684 
##              Less often than that                             Never 
##                              1412                               701
table(df_complete_7_16$read_news_age46)
## 
##                           Refused                        Don't know 
##                                 0                                 0 
##                     Other missing                    Not applicable 
##                                 1                                 0 
##                         Every day Most days (4 or 5 days each week) 
##                              2713                              1030 
##              At least once a week             At least once a month 
##                              1431                               280 
##              Less often than that                             Never 
##                               146                               133
table(df_complete_7_16$computer_home_age46)
## 
##        Refused     Don't know  Other missing Not applicable             No 
##              0              0              1              0            763 
##            yes 
##           4970
table(df_complete_7_16$computer_home_freq_age46)
## 
##               Refused            Don't know         Other missing 
##                     0                     2                     0 
##        Not applicable                 Daily      2-4 times a week 
##                   764                  1731                  1243 
##           Once a week Less than once a week                 Never 
##                   603                   750                   641
table(df_complete_7_16$computer_work_age46)
## 
##        Refused     Don't know  Other missing Not applicable             No 
##              0              0              1            690           1417 
##            yes 
##           3626
table(df_complete_7_16$computer_work_freq_age46)
## 
##               Refused            Don't know         Other missing 
##                     0                     2                     0 
##        Not applicable                 Daily      2-4 times a week 
##                  2108                  3165                   246 
##           Once a week Less than once a week 
##                   123                    90
table(df_complete_7_16$computer_home_age50)
## 
##                 Refusal              Don't Know Schedule not applicable 
##                       0                       0                       0 
##     Item not applicable                     Yes                      No 
##                       8                    5288                     517
table(df_complete_7_16$computer_home_freq_age46)
## 
##               Refused            Don't know         Other missing 
##                     0                     2                     0 
##        Not applicable                 Daily      2-4 times a week 
##                   764                  1731                  1243 
##           Once a week Less than once a week                 Never 
##                   603                   750                   641
table(df_complete_7_16$computer_work_age50)
## 
##                 Refusal              Don't Know Schedule not applicable 
##                       0                       0                       0 
##     Item not applicable                     Yes                      No 
##                     818                    3751                    1244
table(df_complete_7_16$computer_work_freq_age46)
## 
##               Refused            Don't know         Other missing 
##                     0                     2                     0 
##        Not applicable                 Daily      2-4 times a week 
##                  2108                  3165                   246 
##           Once a week Less than once a week 
##                   123                    90
table(df_complete_7_16$internet_use_age55)
## 
##                     Refused                  Don't know 
##                           3                           2 
##              Not applicable Everyday or almost everyday 
##                          68                        3962 
##        Several times a week        Once or twice a week 
##                         478                         373 
##       At least once a month         Less often or never 
##                         164                         453
table(df_complete_7_16$read_book_age62)
## 
##                  Not answered                   Multi-coded 
##                            10                             2 
##                Not applicable Every day or almost every day 
##                           745                          1469 
##          Several times a week          Once or twice a week 
##                           525                           392 
##         At least once a month              Every few months 
##                           315                           536 
##          At least once a year           Less often or never 
##                           314                           722
table(df_complete_7_16$tv_age62)
## 
##            Not answered             Multi-coded          Not applicable 
##                      12                       2                     745 
##                    None  Less than 1 hour a day      1 to 2 hours a day 
##                      76                     289                     781 
##      2 to 3 hours a day      3 to 4 hours a day More than 4 hours a day 
##                    1071                     967                    1087
df_complete_7_16$read3_age11 <- factor(
  df_complete_7_16$child_read_books_age11,
  levels = c("Hardly ever", "Sometimes", "most days"),
  labels = c("Low", "Moderate", "High")
)

df_complete_7_16$read3_age16 <- factor(
  df_complete_7_16$child_read_age16,
  levels = c("Hardly ever", "Sometimes", "Most days"),
  labels = c("Low", "Moderate", "High")
)

df_complete_7_16$tv3_age11 <- factor(
  df_complete_7_16$tv_age11,
  levels = c("Hardly ever", "Sometimes", "most days"),
  labels = c("Low", "Moderate", "High")
)

df_complete_7_16$tv3_age16 <- factor(
  df_complete_7_16$tv_age16,
  levels = c("Hardly ever", "Sometimes", "Often"),
  labels = c("Low", "Moderate", "High")
)

df_complete_7_16$tv3_age23 <- case_when(
  df_complete_7_16$tv_age23 %in% c("NOT DONE LST4WKS", "ONCE IN LST 4WKS") ~ "Low",
  df_complete_7_16$tv_age23 %in% c("2-3TIMES LST4WKS", "1-2TIMES A WEEK") ~ "Moderate",
  df_complete_7_16$tv_age23 %in% c("3-4TIMES A WEEK", "5 TIMES A WEEK +") ~ "High",
  TRUE ~ NA_character_
) |> factor(levels = c("Low", "Moderate", "High"))

df_complete_7_16$tv3_age62 <- case_when(
  df_complete_7_16$tv_age62 %in% c("None", "Less than 1 hour a day") ~ "Low",
  df_complete_7_16$tv_age62 %in% c("1 to 2 hours a day", "2 to 3 hours a day") ~ "Moderate",
  df_complete_7_16$tv_age62 %in% c("3 to 4 hours a day", "More than 4 hours a day") ~ "High",
  TRUE ~ NA_character_
) |> factor(levels = c("Low", "Moderate", "High"))


df_complete_7_16$read3_age23 <- case_when(
  df_complete_7_16$read_book_age23 %in% c("NOT DONE LST4WKS") ~ "Low",
  df_complete_7_16$read_book_age23 %in% c("ONCE IN LST 4WKS", 
                                          "1-2TIMES A WEEK") ~ "Moderate",
  df_complete_7_16$read_book_age23 %in% c("2-3TIMES LST4WKS",
                                          "3-4TIMES A WEEK", 
                                          "5 TIMES A WEEK +") ~ "High",
  TRUE ~ NA_character_
) |> factor(levels = c("Low", "Moderate", "High"))


df_complete_7_16$read3_age46 <- case_when(
  df_complete_7_16$read_book_age46 %in% c("Never", "Less often than that") &
    df_complete_7_16$read_news_age46 %in% c("Never", "Less often than that") ~ "Low",
  
  df_complete_7_16$read_book_age46 %in% c("At least once a week", "At least once a month") |
    df_complete_7_16$read_news_age46 %in% c("At least once a week", "At least once a month") ~ "Moderate",
  
  df_complete_7_16$read_book_age46 %in% c("Every day", "Most days (4 or 5 days each week)") |
    df_complete_7_16$read_news_age46 %in% c("Every day", "Most days (4 or 5 days each week)") ~ "High",
  
  TRUE ~ NA_character_
) |> factor(levels = c("Low", "Moderate", "High"))


df_complete_7_16$read3_age62 <- case_when(
  df_complete_7_16$read_book_age62 %in% c("Less often or never", "Every few months" ,"At least once a year") ~ "Low",
  df_complete_7_16$read_book_age62 %in% c("At least once a month", "At least once a year") ~ "Moderate",
  df_complete_7_16$read_book_age62 %in% c("Several times a week", "Once or twice a week", "Every day or almost every day") ~ "High",
  TRUE ~ NA_character_
) |> factor(levels = c("Low", "Moderate", "High"))


df_complete_7_16$computer3_home_age46 <- case_when(
  df_complete_7_16$computer_home_age46 == "No" ~ "Low",
  
  df_complete_7_16$computer_home_freq_age46 %in%
    c("Never", "Less than once a week") ~ "Low",
  
  df_complete_7_16$computer_home_freq_age46 %in%
    c("Once a week", "2-4 times a week") ~ "Moderate",
  
  df_complete_7_16$computer_home_freq_age46 %in%
    c("Daily") ~ "High",
  
  TRUE ~ NA_character_
) |> factor(levels = c("Low", "Moderate", "High"))


df_complete_7_16$computer3_work_age46 <- case_when(
  df_complete_7_16$computer_work_age46 == "No" ~ "Low",
  
  df_complete_7_16$computer_work_freq_age46 %in%
    c("Less than once a week") ~ "Low",
  
  df_complete_7_16$computer_work_freq_age46 %in%
    c("Once a week") ~ "Moderate",
  
  df_complete_7_16$computer_work_freq_age46 %in%
    c("2-4 times a week", "Daily") ~ "High",
  
  TRUE ~ NA_character_
) |> factor(levels = c("Low", "Moderate", "High"))


df_complete_7_16$computer3_age46 <- pmax(
  as.numeric(df_complete_7_16$computer3_home_age46),
  as.numeric(df_complete_7_16$computer3_work_age46),
  na.rm = TRUE
)

df_complete_7_16$computer3_age46 <- factor(
  df_complete_7_16$computer3_age46,
  levels = 1:3,
  labels = c("Low", "Moderate", "High")
)


df_complete_7_16$computer3_age50 <- case_when(
  df_complete_7_16$computer_home_age50 == "No" ~ "Low",
  df_complete_7_16$computer_home_age50 == "Yes" ~ "High",
  TRUE ~ NA_character_
) |> factor(levels = c("Low", "High"))


df_complete_7_16$internet3_age55 <- case_when(
  df_complete_7_16$internet_use_age55 %in%
    c("Less often or never") ~ "Low",
  
  df_complete_7_16$internet_use_age55 %in%
    c("Once or twice a week", "Several times a week") ~ "Moderate",
  
  df_complete_7_16$internet_use_age55 %in%
    c("Everyday or almost everyday") ~ "High",
  
  TRUE ~ NA_character_
) |> factor(levels = c("Low", "Moderate", "High"))

GROUP-BASED TRAJECTORY MODELLING - ADULTHOOD - TV

library(tidyr)
library(lcmm)
library(dplyr)

df_tv <- df_complete_7_16 %>%
  dplyr::select(
    ncdsid,
    tv3_age11,
    tv3_age16,
    tv3_age23,
    tv3_age62
  ) %>%
  mutate(
    tv11 = as.numeric(tv3_age11) - 1,
    tv16 = as.numeric(tv3_age16) - 1,
    tv23 = as.numeric(tv3_age23) - 1,
    tv62 = as.numeric(tv3_age62) - 1
  )

df_tv_long <- df_tv %>%
  pivot_longer(
    cols = c(tv11, tv16, tv23, tv62),
    names_to  = "wave",
    values_to = "tv_num"
  ) %>%
  mutate(
    age = case_when(
      wave == "tv11" ~ 11,
      wave == "tv16" ~ 16,
      wave == "tv23" ~ 23,
      wave == "tv62" ~ 62
    )
  ) %>%
  filter(!is.na(tv_num))

df_tv_long <- df_tv_long %>%
  mutate(
    id_num = as.numeric(as.factor(ncdsid))
  )

m1 <- hlme(
  fixed   = tv_num ~ age,
  random  = ~ 1,
  subject = "id_num",
  ng      = 1,
  data    = df_tv_long
)

summary(m1)
## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = tv_num ~ age, random = ~1, subject = "id_num", ng = 1, 
##     data = df_tv_long)
##  
## Statistical Model: 
##      Dataset: df_tv_long 
##      Number of subjects: 9079 
##      Number of observations: 29209 
##      Number of latent classes: 1 
##      Number of parameters: 4  
##  
## Iteration process: 
##      Convergence criteria satisfied 
##      Number of iterations:  11 
##      Convergence criteria: parameters= 2.4e-13 
##                          : likelihood= 1.1e-10 
##                          : second derivatives= 4.6e-14 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -22691.85  
##      AIC: 45391.7  
##      BIC: 45420.16  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the longitudinal model:
## 
##               coef      Se     Wald p-value
## intercept  1.86267 0.00531  350.680 0.00000
## age       -0.00720 0.00018  -40.286 0.00000
## 
## 
## Variance-covariance matrix of the random-effects:
##           intercept
## intercept   0.02617
## 
##                              coef      Se
## Residual standard error:  0.50339 0.00248
m2 <- hlme(
  fixed   = tv_num ~ age,
  mixture = ~ age,
  random  = ~ 1,
  subject = "id_num",
  ng      = 2,
  nwg     = TRUE,
  data    = df_tv_long,
  B       = random(m1)
)

summary(m2)
## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = tv_num ~ age, mixture = ~age, random = ~1, subject = "id_num", 
##     ng = 2, nwg = TRUE, data = df_tv_long)
##  
## Statistical Model: 
##      Dataset: df_tv_long 
##      Number of subjects: 9079 
##      Number of observations: 29209 
##      Number of latent classes: 2 
##      Number of parameters: 8  
##  
## Iteration process: 
##      Maximum number of iteration reached without convergence 
##      Number of iterations:  500 
##      Convergence criteria: parameters= 0.01 
##                          : likelihood= 5.5e-05 
##                          : second derivatives= 1 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -22019.76  
##      AIC: 44055.51  
##      BIC: 44112.42  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the class-membership model:
## (the class of reference is the last class) 
## 
##                       coef Se Wald p-value
## intercept class1  -1.16542                
## 
## Fixed effects in the longitudinal model:
## 
##                       coef Se Wald p-value
## intercept class1   1.78087                
## intercept class2   1.88621                
## age class1        -0.01716                
## age class2        -0.00402                
## 
## 
## Variance-covariance matrix of the random-effects:
##           intercept
## intercept         0
## 
##                                      coef Se
## Proportional coefficient class1 141.89345   
## Residual standard error:          0.47405
age_grid <- data.frame(
  age = c(11, 16, 23, 62)
)

pred_tv <- predictY(
  m2,
  newdata = age_grid,
  var.time = "age",
  draws = TRUE
)

str(pred_tv)
## List of 2
##  $ pred : num [1:4, 1:6] 1.592 1.506 1.386 0.717 1.842 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:6] "Ypred_class1" "Ypred_class2" "lower.Ypred_class1" "lower.Ypred_class2" ...
##  $ times:'data.frame':   4 obs. of  1 variable:
##   ..$ age: num [1:4] 11 16 23 62
##  - attr(*, "class")= chr "predictY"
pred_df <- data.frame(
  age = pred_tv$times$age,
  pred_tv$pred
)

traj_df <- pred_df %>%
  pivot_longer(
    cols = -age,
    names_to = "term",
    values_to = "value"
  ) %>%
  separate(term, into = c("type", "class"), sep = "\\.") %>%
  pivot_wider(
    names_from  = type,
    values_from = value
  ) %>%
  mutate(
  class = dplyr::recode(
    class,
    "class1" = "Low / slowly increasing TV",
    "class2" = "High / rapidly increasing TV"
  )
)

# Grid de idades observado
age_grid <- data.frame(
  age = seq(
    min(df_tv_long$age, na.rm = TRUE),
    max(df_tv_long$age, na.rm = TRUE),
    by = 0.5
  )
)

# Predições com IC95%
pred_tv <- predictY(
  m2,
  newdata = age_grid,
  var.time = "age",
  draws = TRUE
)

traj_df <- data.frame(
  age = pred_tv$times$age,

  mean_class1  = pred_tv$pred[, "Ypred_class1"],
  lower_class1 = pred_tv$pred[, "lower.Ypred_class1"],
  upper_class1 = pred_tv$pred[, "upper.Ypred_class1"],

  mean_class2  = pred_tv$pred[, "Ypred_class2"],
  lower_class2 = pred_tv$pred[, "lower.Ypred_class2"],
  upper_class2 = pred_tv$pred[, "upper.Ypred_class2"]
) %>%
  tidyr::pivot_longer(
    cols = -age,
    names_to = c("stat", "class"),
    names_sep = "_class",
    values_to = "value"
  ) %>%
  tidyr::pivot_wider(
    names_from  = stat,
    values_from = value
  ) %>%
  mutate(
    class = factor(
      class,
      levels = c("1", "2"),
      labels = c(
        "High / slowly declining",
        "High / rapidly declining"
      )
    )
  )

fig_tv <- ggplot(traj_df, aes(x = age, y = mean, color = class, fill = class))  +
  geom_line(size = 1.2) +
  scale_color_bmj() +
  scale_fill_bmj() +
  labs(
    x = "Age (years)",
    y = "TV watching (ordinal score)",
    color = "Trajectory TV watching",
    fill  = "Trajectory TV watching"
  ) +
  theme_classic(base_size = 14) +
  theme(
    legend.position = "top",
    legend.title = element_text(size = 12),
    legend.text  = element_text(size = 11),
    axis.title   = element_text(size = 13),
    axis.text    = element_text(size = 11)
  ) +

  scale_x_continuous(
    breaks = c(11, 16, 23, 62)
  ) +
  scale_y_continuous(
    breaks = c(0, 1, 2),
    labels = c("Low", "Mid", "High")
  )  +
  coord_cartesian(ylim = c(0,2))
fig_tv

COGNITIVE FUNCION - ADULTHOOD

df_complete_7_62 <- df_complete_7_16 %>%
  filter(words_immed_age50>-1 &
            words_delayed>-1 & 
            letter_canc_correct>-1 &
            letter_canc_accuracy>-1 &
            animals_named>-1 & 
            
            words_delayed_age62>-1 &
            letter_can_accuracy>-1 &
            animals_named_age62>-1)

df_complete_7_50 <- df_complete_7_16 %>%
  filter(words_immed_age50>-1 &
            words_delayed>-1 & 
            letter_canc_correct>-1 &
            letter_canc_accuracy>-1 &
            animals_named>-1)

            #### #####

# Age 50
#Word recall test - immediate recall
df_complete_7_50$mean_words_immed_age50 <- mean(df_complete_7_50$words_immed_age50)
df_complete_7_50$sd_words_immed_age50 <- sd(df_complete_7_50$words_immed_age50)
df_complete_7_50$z_words_immed_age50 <- (df_complete_7_50$words_immed_age50-df_complete_7_50$mean_words_immed_age50)/df_complete_7_50$sd_words_immed_age50
df_complete_7_50$z_words_immed_age50 = scale(df_complete_7_50$z_words_immed_age50)[,1]

hist(df_complete_7_50$z_words_immed_age50)

#Word recall test - Delayed recall
df_complete_7_50$mean_words_delayed <- mean(df_complete_7_50$words_delayed)
df_complete_7_50$sd_words_delayed <- sd(df_complete_7_50$words_delayed)
df_complete_7_50$z_words_delayed <- (df_complete_7_50$words_delayed-df_complete_7_50$mean_words_delayed)/df_complete_7_50$sd_words_delayed
df_complete_7_50$z_words_delayed = scale(df_complete_7_50$z_words_delayed)[,1]

hist(df_complete_7_50$z_words_delayed)

#Animal naming - Number of animals named
df_complete_7_50$mean_animals_named <- mean(df_complete_7_50$animals_named)
df_complete_7_50$sd_animals_named <- sd(df_complete_7_50$animals_named)
df_complete_7_50$z_animals_named <- (df_complete_7_50$animals_named-df_complete_7_50$mean_animals_named)/df_complete_7_50$sd_animals_named
df_complete_7_50$z_animals_named = scale(df_complete_7_50$z_animals_named)[,1]

hist(df_complete_7_50$z_animals_named)

#Letter cancelation - accuracy
df_complete_7_50$mean_letter_canc_accuracy <- mean(df_complete_7_50$letter_canc_accuracy)
df_complete_7_50$sd_letter_canc_accuracy <- sd(df_complete_7_50$letter_canc_accuracy)
df_complete_7_50$z_letter_canc_accuracy <- (df_complete_7_50$letter_canc_accuracy-df_complete_7_50$mean_letter_canc_accuracy)/df_complete_7_50$sd_letter_canc_accuracy
df_complete_7_50$z_letter_canc_accuracy = scale(df_complete_7_50$z_letter_canc_accuracy)[,1]

df_complete_7_50$z_letter_canc_accuracy <- df_complete_7_50$z_letter_canc_accuracy*-1
hist(df_complete_7_50$z_letter_canc_accuracy)

# averaging all z scores
df_complete_7_50$global_cog_age50 <- (df_complete_7_50$z_words_immed_age50 + df_complete_7_50$z_words_delayed  + df_complete_7_50$z_animals_named  + df_complete_7_50$z_letter_canc_accuracy) / 4

# creating a z score for the averaged variable
df_complete_7_50$global_cog_age50 = scale(df_complete_7_50$global_cog_age50)[,1]

hist(df_complete_7_50$global_cog_age50)

# Age 50
#Word recall test - immediate recall
df_complete_7_62$mean_words_immed_age50 <- mean(df_complete_7_62$words_immed_age50)
df_complete_7_62$sd_words_immed_age50 <- sd(df_complete_7_62$words_immed_age50)
df_complete_7_62$z_words_immed_age50 <- (df_complete_7_62$words_immed_age50-df_complete_7_62$mean_words_immed_age50)/df_complete_7_62$sd_words_immed_age50
df_complete_7_62$z_words_immed_age50 = scale(df_complete_7_62$z_words_immed_age50)[,1]

hist(df_complete_7_62$z_words_immed_age50)

#Word recall test - Delayed recall
df_complete_7_62$mean_words_delayed <- mean(df_complete_7_62$words_delayed)
df_complete_7_62$sd_words_delayed <- sd(df_complete_7_62$words_delayed)
df_complete_7_62$z_words_delayed <- (df_complete_7_62$words_delayed-df_complete_7_62$mean_words_delayed)/df_complete_7_62$sd_words_delayed
df_complete_7_62$z_words_delayed = scale(df_complete_7_62$z_words_delayed)[,1]

hist(df_complete_7_62$z_words_delayed)

#Animal naming - Number of animals named
df_complete_7_62$mean_animals_named <- mean(df_complete_7_62$animals_named)
df_complete_7_62$sd_animals_named <- sd(df_complete_7_62$animals_named)
df_complete_7_62$z_animals_named <- (df_complete_7_62$animals_named-df_complete_7_62$mean_animals_named)/df_complete_7_62$sd_animals_named
df_complete_7_62$z_animals_named = scale(df_complete_7_62$z_animals_named)[,1]

hist(df_complete_7_62$z_animals_named)

#Letter cancelation - accuracy
df_complete_7_62$mean_letter_canc_accuracy <- mean(df_complete_7_62$letter_canc_accuracy)
df_complete_7_62$sd_letter_canc_accuracy <- sd(df_complete_7_62$letter_canc_accuracy)
df_complete_7_62$z_letter_canc_accuracy <- (df_complete_7_62$letter_canc_accuracy-df_complete_7_62$mean_letter_canc_accuracy)/df_complete_7_62$sd_letter_canc_accuracy
df_complete_7_62$z_letter_canc_accuracy = scale(df_complete_7_62$z_letter_canc_accuracy)[,1]

df_complete_7_62$z_letter_canc_accuracy <- df_complete_7_62$z_letter_canc_accuracy*-1
hist(df_complete_7_62$z_letter_canc_accuracy)

# averaging all z scores
df_complete_7_62$global_cog_age50 <- (df_complete_7_62$z_words_immed_age50 + df_complete_7_62$z_words_delayed  + df_complete_7_62$z_animals_named  + df_complete_7_62$z_letter_canc_accuracy) / 4

# creating a z score for the averaged variable
df_complete_7_62$global_cog_age50 = scale(df_complete_7_62$global_cog_age50)[,1]

hist(df_complete_7_62$global_cog_age50)

# Age 62
#Word recall test - Delayed recall
df_complete_7_62$mean_words_delayed_age62 <- mean(df_complete_7_62$words_delayed_age62)
df_complete_7_62$sd_words_delayed_age62 <- sd(df_complete_7_62$words_delayed_age62)
df_complete_7_62$z_words_delayed_age62 <- (df_complete_7_62$words_delayed_age62-df_complete_7_62$mean_words_delayed_age62)/df_complete_7_62$sd_words_delayed_age62
df_complete_7_62$z_words_delayed_age62 = scale(df_complete_7_62$z_words_delayed_age62)[,1]

hist(df_complete_7_62$z_words_delayed_age62)

#Animal naming - Number of animals named
df_complete_7_62$mean_animals_named_age62 <- mean(df_complete_7_62$animals_named_age62)
df_complete_7_62$sd_animals_named_age62 <- sd(df_complete_7_62$animals_named_age62)
df_complete_7_62$z_animals_named_age62 <- (df_complete_7_62$animals_named_age62-df_complete_7_62$mean_animals_named_age62)/df_complete_7_62$sd_animals_named_age62
df_complete_7_62$z_animals_named_age62 = scale(df_complete_7_62$z_animals_named_age62)[,1]

hist(df_complete_7_62$z_animals_named_age62)

#Letter cancelation - accuracy
df_complete_7_62$mean_letter_can_accuracy <- mean(df_complete_7_62$letter_can_accuracy)
df_complete_7_62$sd_letter_can_accuracy <- sd(df_complete_7_62$letter_can_accuracy)
df_complete_7_62$z_letter_can_accuracy <- (df_complete_7_62$letter_canc_accuracy-df_complete_7_62$mean_letter_can_accuracy)/df_complete_7_62$sd_letter_can_accuracy
df_complete_7_62$z_letter_can_accuracy = scale(df_complete_7_62$z_letter_can_accuracy)[,1]

df_complete_7_62$z_letter_can_accuracy <- df_complete_7_62$z_letter_can_accuracy*-1
hist(df_complete_7_62$z_letter_can_accuracy)

# averaging all z scores
df_complete_7_62$global_cog_age62 <- (df_complete_7_62$z_words_delayed_age62 + df_complete_7_62$z_animals_named_age62  + df_complete_7_62$z_letter_can_accuracy) / 3

# creating a z score for the averaged variable
df_complete_7_62$global_cog_age62 = scale(df_complete_7_62$global_cog_age62)[,1]

hist(df_complete_7_62$global_cog_age62)

COGNITIVE DECLINE - ADULTHOOD

library(dplyr)
library(tidyr)
library(ggeffects)
library(lme4)
library(broom.mixed)

tv_class_df <- m2$pprob %>%
  mutate(
    tv_class = factor(
      class,
      levels = c(1, 2),
      labels = c(
        "High / slowly declining TV",
        "High / rapidly declining TV"
      )
    )
  ) %>%
  dplyr::select(id_num, tv_class)

df_cog_long <- df_complete_7_62 %>%
  dplyr::select(
    ncdsid,
    z_global_cog_age7,
    z_global_cog_age11,
    z_global_cog_age16,
    global_cog_age50,
    global_cog_age62
  ) %>%
  pivot_longer(
    cols = -ncdsid,
    names_to  = "wave",
    values_to = "z_cog"
  ) %>%
  mutate(
    age = case_when(
      wave == "z_global_cog_age7"   ~ 7,
      wave == "z_global_cog_age11"  ~ 11,
      wave == "z_global_cog_age16"  ~ 16,
      wave == "global_cog_age50"    ~ 50,
      wave == "global_cog_age62"    ~ 62
    )
  ) %>%
  filter(!is.na(z_cog)) %>%
  mutate(
    id_num = as.numeric(as.factor(ncdsid))
  )

df_cov <- df_complete_7_62 %>%
  dplyr::select(
    ncdsid,

    # Demográficas
    sex,

    # Gestação / início da vida
    sc_prenatal,
    smoking_preg,
    birth_weight,
    breast_f,

    # Ambiente cognitivo precoce
    parent_read_level,

    # SEP ao longo da infância/adolescência
    sc_age7,
    sc_age11,
    sc_age16,

    # Comportamentos na adolescência
    smoking_age16,
    alcohol_age16,

    # Escolaridade
    school_attendance_age16,
    qual_age33,
    
    #Late adulthood
    malaise_age50,
    diabetes_age50,
    hypertension_age50
  ) %>%
  mutate(
    id_num = as.numeric(as.factor(ncdsid))
  )

df_tv_class <- m2$pprob %>%
  mutate(
    tv_class = factor(
      class,
      labels = c(
        "High / slowly decline TV",
        "High / rapidly decline TV"
      )
    )
  ) %>%
  dplyr::select(id_num, tv_class)

df_cog_tv <- df_cog_long %>%
  left_join(df_tv_class, by = "id_num") %>%
  left_join(df_cov,       by = "id_num")

cog_mixed_adj <- lmer(
  z_cog ~ age * tv_class +

    sex +
    sc_prenatal +
    smoking_preg +
    birth_weight +
    breast_f +
    parent_read_level +

    sc_age7 +
    sc_age11 +
    sc_age16 +

    smoking_age16 +
    alcohol_age16 +
    
    school_attendance_age16 +
    qual_age33 +
    
    malaise_age50 +
    diabetes_age50 +
    hypertension_age50 +

    (1 | id_num),

  data = df_cog_tv,
  REML = TRUE
)

tab_model(cog_mixed_adj, digits = 3)
  z cog
Predictors Estimates CI p
(Intercept) -0.566 -0.914 – -0.218 0.001
age -0.006 -0.008 – -0.005 <0.001
tv class [High / rapidly
decline TV]
-0.062 -0.135 – 0.012 0.099
sex [Female] 0.107 0.064 – 0.150 <0.001
sc prenatal [I] 0.242 0.059 – 0.426 0.010
sc prenatal [II] 0.222 0.061 – 0.384 0.007
sc prenatal [III] 0.142 -0.009 – 0.294 0.065
sc prenatal [IV] 0.147 -0.014 – 0.307 0.074
sc prenatal [V] 0.114 -0.058 – 0.285 0.195
sc prenatal [Students] 0.014 -0.650 – 0.678 0.968
sc prenatal [Single,no
husbnd]
0.079 -0.131 – 0.288 0.462
smoking preg [5mths-no
change]
-0.168 -0.398 – 0.061 0.150
smoking preg [Now
non-smoker]
-0.147 -0.387 – 0.094 0.232
smoking preg [1 to 4 now] -0.193 -0.450 – 0.064 0.140
smoking preg [5 to 9 now] -0.118 -0.382 – 0.146 0.382
smoking preg [10 to 14
now]
-0.126 -0.419 – 0.166 0.398
smoking preg [15 to 19
now]
-0.022 -0.417 – 0.374 0.914
smoking preg [20 to 24
now]
-0.410 -0.817 – -0.004 0.048
smoking preg [25 to 29
now]
-0.225 -0.908 – 0.458 0.518
smoking preg [30 or more
now]
-0.079 -0.767 – 0.609 0.822
smoking_pregVariable-5mths -0.167 -0.411 – 0.076 0.178
birth weight 0.000 -0.000 – 0.001 0.308
breast f [NA] -0.063 -0.855 – 0.730 0.877
breast f [Dont know] 0.071 -0.276 – 0.418 0.689
breast f [No] -0.038 -0.096 – 0.019 0.193
breast f [Over one month] 0.058 0.006 – 0.109 0.028
parent read level
[Occasionally]
0.066 -0.035 – 0.168 0.200
parent read level
[Weekly]
0.099 0.004 – 0.194 0.041
sc age7 [NA, unclear] -0.190 -0.514 – 0.133 0.249
sc age7 [No male head] 0.154 -0.058 – 0.367 0.155
sc age7 [II] -0.005 -0.127 – 0.117 0.938
sc age7 [III non-manual] 0.031 -0.103 – 0.165 0.650
sc age7 [III manual] -0.058 -0.183 – 0.067 0.364
sc age7 [IV non-manual] 0.125 -0.078 – 0.328 0.226
sc age7 [IV manual] -0.024 -0.160 – 0.112 0.725
sc age7 [V] -0.045 -0.212 – 0.123 0.603
sc age11 [NA,unclear] -0.081 -0.221 – 0.058 0.253
sc age11 [II] -0.084 -0.206 – 0.038 0.175
sc age11 [III non manual] -0.020 -0.157 – 0.116 0.773
sc age11 [III manual] -0.079 -0.205 – 0.048 0.224
sc age11 [IV] -0.120 -0.257 – 0.018 0.088
sc age11 [V] -0.069 -0.244 – 0.105 0.437
sc age11 [No male head] -0.137 -0.313 – 0.038 0.125
sc age16 [NA,NMH,forces] -0.096 -0.219 – 0.027 0.127
sc age16 [II] 0.018 -0.106 – 0.142 0.781
sc age16 [III non-manual] -0.095 -0.235 – 0.046 0.186
sc age16 [III manual] -0.072 -0.199 – 0.055 0.266
sc age16 [IV non-manual] 0.086 -0.139 – 0.312 0.453
sc age16 [IV manual] -0.171 -0.316 – -0.026 0.021
sc age16 [V] -0.162 -0.343 – 0.019 0.079
sc age16 [Unclear] 0.060 -0.177 – 0.296 0.621
smoking age16 [NA] -0.125 -0.409 – 0.160 0.391
smoking age16 [Less 1 a
week]
0.024 -0.084 – 0.131 0.666
smoking age16 [1 to 9 a
week]
-0.048 -0.141 – 0.044 0.304
smoking age16 [10 to 19 a
week]
0.009 -0.099 – 0.117 0.872
smoking age16 [20 to 29 a
week]
0.018 -0.084 – 0.120 0.728
smoking age16 [30 to 39 a
week]
-0.052 -0.157 – 0.053 0.330
smoking age16 [40 to 49 a
week]
-0.003 -0.117 – 0.111 0.959
smoking age16 [50 to 59 a
week]
0.082 -0.054 – 0.219 0.238
smoking age16 [60 or more
a wk]
0.007 -0.093 – 0.107 0.886
alcohol age16 [NA] 0.146 -0.181 – 0.474 0.382
alcohol age16 [Less than
1 week]
0.227 0.117 – 0.336 <0.001
alcohol age16 [2 to 4
weeks]
0.180 0.066 – 0.294 0.002
alcohol age16 [5 to 8
weeks]
0.140 0.008 – 0.272 0.038
alcohol age16 [9 to 12
weeks]
0.144 -0.007 – 0.294 0.061
alcohol age16 [Over 12
weeks]
0.120 -0.012 – 0.252 0.076
alcohol age16 [Do not
remember]
0.090 -0.030 – 0.210 0.140
school attendance age16 0.000 -0.001 – 0.001 0.980
qual age33 [No
information]
0.579 0.394 – 0.764 <0.001
qual_age33CSE 2-5/equiv NVQ1 0.256 0.148 – 0.363 <0.001
qual age33 [O Level/equiv
NVQ2]
0.664 0.569 – 0.758 <0.001
qual age33 [A Level/equiv
NVQ3]
0.882 0.778 – 0.985 <0.001
qual age33 [Higher qual
NVQ4]
0.931 0.828 – 1.034 <0.001
qual age33 [Degree/higher
NVQ5,6]
1.360 1.252 – 1.468 <0.001
malaise age50 [incomplete
info]
-0.251 -0.567 – 0.064 0.119
malaise age50 [ High
malaise 4+]
-0.143 -0.207 – -0.080 <0.001
diabetes age50
[Mentioned]
-0.071 -0.181 – 0.038 0.202
hypertension age50
[Mentioned]
-0.041 -0.101 – 0.018 0.173
age × tv class [High /
rapidly decline TV]
0.001 -0.000 – 0.003 0.087
Random Effects
σ2 0.49
τ00 id_num 0.22
ICC 0.31
N id_num 3025
Observations 15125
Marginal R2 / Conditional R2 0.223 / 0.464
df_cog_tv$tv_class <- factor(
  df_cog_tv$tv_class,
  levels = c(
    "High / slowly declining TV",
    "High / rapidly declining TV"
  )
)

pred_cog <- ggpredict(
  cog_mixed_adj,
  terms = c("age [7,11,16,50,62]", "tv_class")
)

pred_df <- as.data.frame(pred_cog)

fig_cog <- ggplot(
  pred_df,
  aes(
    x = x,
    y = predicted,
    color = group,
    fill  = group
  )
) +
  geom_line(size = 1.3) +

  scale_color_manual(
    values = c("#1B9E77", "#D95F02")
  ) +
  scale_fill_manual(
    values = c("#1B9E77", "#D95F02")
  ) +

  scale_x_continuous(
    breaks = c(7, 11, 16, 50, 62)
  ) +

  labs(
    x = "Age (years)",
    y = "Global cognitive function (z-score)",
    color = "TV viewing trajectory",
    fill  = "TV viewing trajectory"
  ) +

  theme_classic(base_size = 14) +
  theme(
    legend.position = "top",
    legend.title = element_text(size = 12),
    legend.text  = element_text(size = 11),
    axis.title   = element_text(size = 13),
    axis.text    = element_text(size = 11)
  )

int_term <- tidy(
  cog_mixed_adj,
  effects = "fixed",
  conf.int = TRUE
) %>%
  dplyr::filter(grepl("^age:tv_class", term)) %>%
  mutate(
    p.value = 2 * (1 - pnorm(abs(statistic)))
  )

int_term
## # A tibble: 1 × 8
##   effect term            estimate std.error statistic conf.low conf.high p.value
##   <chr>  <chr>              <dbl>     <dbl>     <dbl>    <dbl>     <dbl>   <dbl>
## 1 fixed  age:tv_classHi…  0.00128  0.000747      1.71 -1.87e-4   0.00274  0.0874
int_label <- with(int_term, {
  paste0(
    "Interaction (age × TV trajectory):\n",
    "β = ", sprintf("%.4f", estimate),
    " (95% CI ", sprintf("%.4f", conf.low),
    ", ", sprintf("%.4f", conf.high), "), ",
    "p = ", ifelse(p.value < 0.001, "<0.001", sprintf("%.3f", p.value))
  )
})

int_label
## [1] "Interaction (age × TV trajectory):\nβ = 0.0013 (95% CI -0.0002, 0.0027), p = 0.087"
fig_cog <- fig_cog +
  annotate(
    "text",
    x = 8,
    y = min(pred_df$predicted) + 0.05,
    label = int_label,
    hjust = 0,
    size = 4.2
  )

fig_cog

GROUP-BASED TRAJECTORY MODELLING - READING

library(lcmm)

# df_complete_7_62$read3_age11 <- factor(
#   df_complete_7_62$child_read_books_age11,
#   levels = c("Hardly ever", "Sometimes", "most days"),
#   labels = c("Low", "Moderate", "High")
# )
# 
# df_complete_7_62$read3_age16 <- factor(
#   df_complete_7_62$child_read_age16,
#   levels = c("Hardly ever", "Sometimes", "Most days"),
#   labels = c("Low", "Moderate", "High")
# )


df_read <- df_complete_7_62 %>%
  dplyr::select(
    ncdsid,
    read3_age11,
    read3_age16,
    read3_age23,
    read3_age46,
    read3_age62
  ) %>%
  mutate(
    rdbk11 = as.numeric(read3_age11) - 1,
    rdbk16 = as.numeric(read3_age16) - 1,
    rdbk23 = as.numeric(read3_age23) - 1,
    rdbk46 = as.numeric(read3_age46) - 1,
    rdbk62 = as.numeric(read3_age62) - 1
  )

df_read_long <- df_read %>%
  pivot_longer(
    cols = c(rdbk11, rdbk16, rdbk23, rdbk46, rdbk62),
    names_to  = "wave",
    values_to = "rdbk_num"
  ) %>%
  mutate(
    age = case_when(
      wave == "rdbk11" ~ 11,
      wave == "rdbk16" ~ 16,
      wave == "rdbk23" ~ 23,
      wave == "rdbk46" ~ 46,
      wave == "rdbk62" ~ 62
    )
  ) %>%
  filter(!is.na(rdbk_num))


df_read_long <- df_read_long %>%
  mutate(
    id_num = as.numeric(as.factor(ncdsid))
  )

m1 <- hlme(
  fixed   = rdbk_num ~ age,
  random  = ~ 1,
  subject = "id_num",
  ng      = 1,
  data    = df_read_long
)

summary(m1)
## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = rdbk_num ~ age, random = ~1, subject = "id_num", 
##     ng = 1, data = df_read_long)
##  
## Statistical Model: 
##      Dataset: df_read_long 
##      Number of subjects: 3548 
##      Number of observations: 15512 
##      Number of latent classes: 1 
##      Number of parameters: 4  
##  
## Iteration process: 
##      Convergence criteria satisfied 
##      Number of iterations:  10 
##      Convergence criteria: parameters= 6.6e-12 
##                          : likelihood= 4.3e-08 
##                          : second derivatives= 2.4e-16 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -17712.83  
##      AIC: 35433.65  
##      BIC: 35458.35  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the longitudinal model:
## 
##              coef      Se    Wald p-value
## intercept 1.11983 0.01224  91.507 0.00000
## age       0.00314 0.00029  10.867 0.00000
## 
## 
## Variance-covariance matrix of the random-effects:
##           intercept
## intercept   0.09587
## 
##                             coef      Se
## Residual standard error: 0.70727 0.00458
m2 <- hlme(
  fixed   = rdbk_num ~ age,
  mixture = ~ age,
  random  = ~ 1,
  subject = "id_num",
  ng      = 2,
  nwg     = TRUE,
  data    = df_read_long,
  B       = random(m1)
)

summary(m2)
## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = rdbk_num ~ age, mixture = ~age, random = ~1, subject = "id_num", 
##     ng = 2, nwg = TRUE, data = df_read_long)
##  
## Statistical Model: 
##      Dataset: df_read_long 
##      Number of subjects: 3548 
##      Number of observations: 15512 
##      Number of latent classes: 2 
##      Number of parameters: 8  
##  
## Iteration process: 
##      Convergence criteria satisfied 
##      Number of iterations:  20 
##      Convergence criteria: parameters= 1.9e-10 
##                          : likelihood= 2.2e-06 
##                          : second derivatives= 1.8e-06 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -17536.53  
##      AIC: 35089.07  
##      BIC: 35138.46  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the class-membership model:
## (the class of reference is the last class) 
## 
##                      coef       Se    Wald p-value
## intercept class1  0.55494  0.05744   9.661 0.00000
## 
## Fixed effects in the longitudinal model:
## 
##                      coef       Se    Wald p-value
## intercept class1  1.19188  0.01694  70.342 0.00000
## intercept class2  0.96949  0.02273  42.648 0.00000
## age class1        0.00833  0.00045  18.701 0.00000
## age class2       -0.00548  0.00066  -8.309 0.00000
## 
## 
## Variance-covariance matrix of the random-effects:
##           intercept
## intercept         0
## 
##                                     coef       Se
## Proportional coefficient class1  1.01109 11.38158
## Residual standard error:         0.68754  0.00430
age_grid_rdbk <- data.frame(
  age = c(11, 16, 23, 46, 62)
)

pred_read <- predictY(
  m2,
  newdata = age_grid_rdbk,
  var.time = "age",
  draws = TRUE
)

str(pred_read)
## List of 2
##  $ pred : num [1:5, 1:6] 1.28 1.33 1.38 1.57 1.71 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:6] "Ypred_class1" "Ypred_class2" "lower.Ypred_class1" "lower.Ypred_class2" ...
##  $ times:'data.frame':   5 obs. of  1 variable:
##   ..$ age: num [1:5] 11 16 23 46 62
##  - attr(*, "class")= chr "predictY"
pred_df <- data.frame(
  age = pred_read$times$age,
  pred_read$pred
)

traj_df <- pred_df %>%
  pivot_longer(
    cols = -age,
    names_to = "term",
    values_to = "value"
  ) %>%
  separate(term, into = c("type", "class"), sep = "\\.") %>%
  pivot_wider(
    names_from  = type,
    values_from = value
  ) %>%
  mutate(
  class = dplyr::recode(
    class,
    "class1" = "Mid / slowly increasing",
    "class2" = "Mid / slowly declining"
  )
)

# Grid de idades observado
age_grid_rdbk <- data.frame(
  age = seq(
    min(df_read_long$age, na.rm = TRUE),
    max(df_read_long$age, na.rm = TRUE),
    by = 0.5
  )
)

# Predições com IC95%
pred_read <- predictY(
  m2,
  newdata = age_grid_rdbk,
  var.time = "age",
  draws = TRUE
)

traj_df <- data.frame(
  age = pred_read$times$age,

  mean_class1  = pred_read$pred[, "Ypred_class1"],
  lower_class1 = pred_read$pred[, "lower.Ypred_class1"],
  upper_class1 = pred_read$pred[, "upper.Ypred_class1"],

  mean_class2  = pred_read$pred[, "Ypred_class2"],
  lower_class2 = pred_read$pred[, "lower.Ypred_class2"],
  upper_class2 = pred_read$pred[, "upper.Ypred_class2"]
) %>%
  tidyr::pivot_longer(
    cols = -age,
    names_to = c("stat", "class"),
    names_sep = "_class",
    values_to = "value"
  ) %>%
  tidyr::pivot_wider(
    names_from  = stat,
    values_from = value
  ) %>%
  mutate(
    class = factor(
      class,
      levels = c("1", "2"),
      labels = c(
        "Mid / slowly increasing",
        "Mid / slowly declining"
      )
    )
  )

fig_read <- ggplot(traj_df, aes(x = age, y = mean, color = class, fill = class)) +
  geom_ribbon(
    aes(ymin = lower, ymax = upper),
    alpha = 0.25,
    color = NA
  ) +
  geom_line(size = 1.2) +
  scale_color_bmj() +
  scale_fill_bmj() +
  labs(
    x = "Age (years)",
    y = "Reading books (ordinal score)",
    color = "Trajectory",
    fill  = "Trajectory"
  ) +
  theme_classic(base_size = 14) +
  theme(
    legend.position = "top",
    legend.title = element_text(size = 12),
    legend.text  = element_text(size = 11),
    axis.title   = element_text(size = 13),
    axis.text    = element_text(size = 11)
  ) +

  scale_x_continuous(
    breaks = c(11, 16, 23, 46, 62)
  ) +
  scale_y_continuous(
    breaks = c(0, 1, 2),
    labels = c("Low", "Mid", "High")
  )  +
  coord_cartesian(ylim = c(0,2))
fig_read

COGNITIVE FUNCION - READING ACROSS LIFE COURSE

read_class_df <- m2$pprob %>%
  mutate(
    class = factor(
      class,
      levels = c(1, 2),
      labels = c("Mid / slowly increasing",
                 "Mid / slowly declining"
      )
    )
  ) %>%
  dplyr::select(id_num, class)

df_cog_long <- df_complete_7_62 %>%
  dplyr::select(
    ncdsid,
    z_global_cog_age7,
    z_global_cog_age11,
    z_global_cog_age16,
    global_cog_age50,
    global_cog_age62
  ) %>%
  pivot_longer(
    cols = -ncdsid,
    names_to  = "wave",
    values_to = "z_cog"
  ) %>%
  mutate(
    age = case_when(
      wave == "z_global_cog_age7"   ~ 7,
      wave == "z_global_cog_age11"  ~ 11,
      wave == "z_global_cog_age16"  ~ 16,
      wave == "global_cog_age50"    ~ 50,
      wave == "global_cog_age62"    ~ 62
    )
  ) %>%
  filter(!is.na(z_cog)) %>%
  mutate(
    id_num = as.numeric(as.factor(ncdsid))
  )

df_cov <- df_complete_7_62 %>%
  dplyr::select(
    ncdsid,

    # Demográficas
    sex,

    # Gestação / início da vida
    sc_prenatal,
    smoking_preg,
    birth_weight,
    breast_f,

    # Ambiente cognitivo precoce
    parent_read_level,

    # SEP ao longo da infância/adolescência
    sc_age7,
    sc_age11,
    sc_age16,

    # Comportamentos na adolescência
    smoking_age16,
    alcohol_age16,

    # Escolaridade
    school_attendance_age16,
    qual_age33,
    
    #Late adulthood
    malaise_age50,
    diabetes_age50,
    hypertension_age50
  ) %>%
  mutate(
    id_num = as.numeric(as.factor(ncdsid))
  )

df_read_class <- m2$pprob %>%
  mutate(
    class = factor(
      class,
      labels = c("Mid / slowly increasing",
                 "Mid / slowly declining"
      )
    )
  ) %>%
  dplyr::select(id_num, class)

df_cog_read <- df_cog_long %>%
  left_join(df_read_class, by = "id_num") %>%
  left_join(df_cov,       by = "id_num")


library(lme4)

cog_mixed_adj_read <- lmer(
  z_cog ~ age * class +

    sex +
    sc_prenatal +
    smoking_preg +
    birth_weight +
    breast_f +
    parent_read_level +

    sc_age7 +
    sc_age11 +
    sc_age16 +

    smoking_age16 +
    alcohol_age16 +
    school_attendance_age16 +
    qual_age33 +
    
    malaise_age50 +
    diabetes_age50 +
    hypertension_age50 +
    
    (1 | id_num),

  data = df_cog_read,
  REML = TRUE
)

tab_model(cog_mixed_adj_read, digits = 3)
  z cog
Predictors Estimates CI p
(Intercept) -0.508 -0.850 – -0.166 0.004
age -0.005 -0.006 – -0.004 <0.001
class [Mid / slowly
declining]
-0.149 -0.203 – -0.095 <0.001
sex [Female] 0.070 0.027 – 0.113 0.001
sc prenatal [I] 0.228 0.046 – 0.410 0.014
sc prenatal [II] 0.218 0.058 – 0.378 0.008
sc prenatal [III] 0.132 -0.018 – 0.282 0.084
sc prenatal [IV] 0.137 -0.022 – 0.297 0.091
sc prenatal [V] 0.106 -0.064 – 0.276 0.223
sc prenatal [Students] 0.034 -0.623 – 0.691 0.919
sc prenatal [Single,no
husbnd]
0.071 -0.137 – 0.279 0.502
smoking preg [5mths-no
change]
-0.173 -0.400 – 0.054 0.135
smoking preg [Now
non-smoker]
-0.145 -0.383 – 0.093 0.233
smoking preg [1 to 4 now] -0.207 -0.461 – 0.048 0.112
smoking preg [5 to 9 now] -0.139 -0.401 – 0.123 0.298
smoking preg [10 to 14
now]
-0.144 -0.434 – 0.146 0.331
smoking preg [15 to 19
now]
-0.029 -0.420 – 0.363 0.886
smoking preg [20 to 24
now]
-0.406 -0.809 – -0.004 0.048
smoking preg [25 to 29
now]
-0.166 -0.842 – 0.510 0.630
smoking preg [30 or more
now]
-0.032 -0.713 – 0.650 0.927
smoking_pregVariable-5mths -0.169 -0.410 – 0.072 0.170
birth weight 0.000 -0.000 – 0.000 0.354
breast f [NA] -0.115 -0.900 – 0.670 0.775
breast f [Dont know] 0.063 -0.281 – 0.407 0.720
breast f [No] -0.032 -0.089 – 0.024 0.264
breast f [Over one month] 0.063 0.012 – 0.114 0.015
parent read level
[Occasionally]
0.051 -0.049 – 0.151 0.320
parent read level
[Weekly]
0.077 -0.017 – 0.171 0.110
sc age7 [NA, unclear] -0.190 -0.510 – 0.130 0.245
sc age7 [No male head] 0.175 -0.035 – 0.386 0.103
sc age7 [II] -0.006 -0.127 – 0.114 0.917
sc age7 [III non-manual] 0.027 -0.106 – 0.159 0.694
sc age7 [III manual] -0.057 -0.181 – 0.067 0.367
sc age7 [IV non-manual] 0.117 -0.084 – 0.317 0.255
sc age7 [IV manual] -0.024 -0.159 – 0.111 0.729
sc age7 [V] -0.044 -0.210 – 0.122 0.605
sc age11 [NA,unclear] -0.080 -0.218 – 0.058 0.257
sc age11 [II] -0.077 -0.198 – 0.044 0.214
sc age11 [III non manual] -0.013 -0.148 – 0.122 0.853
sc age11 [III manual] -0.068 -0.193 – 0.058 0.291
sc age11 [IV] -0.104 -0.240 – 0.032 0.134
sc age11 [V] -0.057 -0.230 – 0.115 0.515
sc age11 [No male head] -0.137 -0.311 – 0.037 0.123
sc age16 [NA,NMH,forces] -0.090 -0.212 – 0.032 0.149
sc age16 [II] 0.029 -0.094 – 0.152 0.646
sc age16 [III non-manual] -0.096 -0.235 – 0.043 0.174
sc age16 [III manual] -0.065 -0.191 – 0.060 0.308
sc age16 [IV non-manual] 0.087 -0.137 – 0.310 0.448
sc age16 [IV manual] -0.167 -0.311 – -0.023 0.023
sc age16 [V] -0.157 -0.336 – 0.022 0.086
sc age16 [Unclear] 0.053 -0.181 – 0.287 0.659
smoking age16 [NA] -0.131 -0.413 – 0.151 0.363
smoking age16 [Less 1 a
week]
0.026 -0.080 – 0.133 0.626
smoking age16 [1 to 9 a
week]
-0.051 -0.142 – 0.040 0.274
smoking age16 [10 to 19 a
week]
0.012 -0.095 – 0.118 0.830
smoking age16 [20 to 29 a
week]
0.016 -0.085 – 0.117 0.751
smoking age16 [30 to 39 a
week]
-0.058 -0.162 – 0.046 0.274
smoking age16 [40 to 49 a
week]
0.002 -0.110 – 0.115 0.968
smoking age16 [50 to 59 a
week]
0.087 -0.048 – 0.222 0.208
smoking age16 [60 or more
a wk]
0.018 -0.081 – 0.117 0.726
alcohol age16 [NA] 0.167 -0.157 – 0.492 0.311
alcohol age16 [Less than
1 week]
0.235 0.127 – 0.344 <0.001
alcohol age16 [2 to 4
weeks]
0.185 0.073 – 0.298 0.001
alcohol age16 [5 to 8
weeks]
0.147 0.016 – 0.277 0.028
alcohol age16 [9 to 12
weeks]
0.158 0.009 – 0.307 0.037
alcohol age16 [Over 12
weeks]
0.119 -0.012 – 0.250 0.076
alcohol age16 [Do not
remember]
0.109 -0.010 – 0.228 0.073
school attendance age16 0.000 -0.001 – 0.001 0.979
qual age33 [No
information]
0.588 0.404 – 0.771 <0.001
qual_age33CSE 2-5/equiv NVQ1 0.248 0.142 – 0.355 <0.001
qual age33 [O Level/equiv
NVQ2]
0.639 0.545 – 0.733 <0.001
qual age33 [A Level/equiv
NVQ3]
0.849 0.746 – 0.951 <0.001
qual age33 [Higher qual
NVQ4]
0.888 0.785 – 0.990 <0.001
qual age33 [Degree/higher
NVQ5,6]
1.293 1.185 – 1.401 <0.001
malaise age50 [incomplete
info]
-0.249 -0.561 – 0.063 0.118
malaise age50 [ High
malaise 4+]
-0.130 -0.194 – -0.067 <0.001
diabetes age50
[Mentioned]
-0.085 -0.193 – 0.023 0.124
hypertension age50
[Mentioned]
-0.040 -0.099 – 0.019 0.180
age × class [Mid / slowly
declining]
-0.001 -0.002 – 0.000 0.100
Random Effects
σ2 0.49
τ00 id_num 0.21
ICC 0.30
N id_num 3025
Observations 15125
Marginal R2 / Conditional R2 0.229 / 0.464
df_cog_read$class <- factor(
  df_cog_read$class,
  levels = c(
    "High / stable",
    "Low / slow increase"
  )
)

pred_cog <- ggpredict(
  cog_mixed_adj_read,
  terms = c("age [11,16,23,46,50, 62]", "class")
)

pred_df <- as.data.frame(pred_cog)

fig_cog_read <- ggplot(
  pred_df,
  aes(
    x = x,
    y = predicted,
    color = group,
    fill  = group
  )
) +
  geom_line(size = 1.3) +

  scale_color_bmj() +
  scale_fill_bmj() +

  scale_x_continuous(
    breaks = c(11, 16, 23, 46, 50, 62)
  ) +

  labs(
    x = "Age (years)",
    y = "Global cognitive function (z-score)",
    color = "Reading books trajectory",
    fill  = "Reading books trajectory"
  ) +

  theme_classic(base_size = 14) +
  theme(
    legend.position = "top",
    legend.title = element_text(size = 12),
    legend.text  = element_text(size = 11),
    axis.title   = element_text(size = 13),
    axis.text    = element_text(size = 11)
  )

int_term <- tidy(
  cog_mixed_adj_read,
  effects = "fixed",
  conf.int = TRUE
) %>%
  dplyr::filter(grepl("^age:class", term)) %>%
  mutate(
    p.value = 2 * (1 - pnorm(abs(statistic)))
  )

int_term
## # A tibble: 1 × 8
##   effect term            estimate std.error statistic conf.low conf.high p.value
##   <chr>  <chr>              <dbl>     <dbl>     <dbl>    <dbl>     <dbl>   <dbl>
## 1 fixed  age:classMid /… -8.74e-4  0.000532     -1.64 -0.00192  0.000168   0.100
int_label <- with(int_term, {
  paste0(
    "Interaction (age × reading books trajectory):\n",
    "β = ", sprintf("%.4f", estimate),
    " (95% CI ", sprintf("%.4f", conf.low),
    ", ", sprintf("%.4f", conf.high), "), ",
    "p = ", ifelse(p.value < 0.001, "<0.001", sprintf("%.3f", p.value))
  )
})

int_label
## [1] "Interaction (age × reading books trajectory):\nβ = -0.0009 (95% CI -0.0019, 0.0002), p = 0.100"
fig_cog_read <- fig_cog_read +
  annotate(
    "text",
    x = 8,
    y = min(pred_df$predicted) + 0.05,
    label = int_label,
    hjust = 0,
    size = 4.2
  )

fig_cog_read