SB life course

df$sex <- df$n622
df$sc_prenatal <- df$n492
df$smoking_preg <- df$n503
df$birth_weight <- df$n646

df$physical_activity_age7 <- df$n121
df$mon_read_child <- df$n179
df$dad_read_child <- df$n180
df$mon_read_book <- df$n186
df$mon_read_news <- df$n184
df$dad_read_book <- df$n187
df$dad_read_news <- df$n185
df$sc_age7 <- df$n190
df$breast_f <- df$n222
df$oral_ability_age7 <- df$n65
df$awareness_age7 <- df$n67
df$reading_ability_age7 <- df$n68
df$creativity_ability_age7 <- df$n69
df$numbers_ability_age7 <- df$n70
df$bsag_age7 <- df$n455
df$arithmetic_age7 <- df$n90
df$copying_age7 <- df$n457
df$draw_man_age7 <- df$n1840
df$reading_test_age7 <- df$n92

df$mon_walk_child_age11 <- df$n1145
df$dad_walk_child_age11 <- df$n1146
df$sc_age11 <- df$n1687
df$knowledge_age11 <- df$n876
df$numbers_ability_age11 <- df$n877
df$child_use_book_age11 <- df$n878
df$oral_ability_age11 <- df$n879
df$bsag_age11 <- df$n1008
df$verbal_ability_age11 <- df$n914
df$nonverbal_ability_age11 <- df$n917
df$general_ability_age11 <- df$n920
df$reading_ability_age11 <- df$n923
df$maths_ability_age11 <- df$n926
df$copying_age11 <- df$n929
df$child_bor_books_lib_age11 <- df$n931
df$child_read_books_age11 <- df$n935
df$child_read_news_age11 <- df$n936
df$physical_activity_age11 <- df$n941
df$child_writes_stories_age11 <- df$n942
df$child_draws_age11 <- df$n943
df$tv_age11 <- df$n949

df$sc_age16 <- df$n2384
df$iq_ag16 <- df$n1897
df$maths_ability_age16 <-df$n2244
df$english_ability_age16 <- df$n2245
df$language_age16 <- df$n2246
df$science_age16 <- df$n2247
df$practical_ability_age16 <- df$n2248
df$social_std_ability_age16 <- df$n2249
df$school_attendance_age16 <- df$n1722
df$child_read_age16 <- df$n2864
df$play_outdoor_age16 <- df$n2865
df$play_indoor_age16 <- df$n2867
df$swims_age16 <- df$n2866
df$tv_age16 <- df$n2868
df$smoking_age16 <- df$n2887
df$alcohol_age16 <- df$n2888
df$reading_test_age16 <- df$n2928
df$maths_test_age16 <- df$n2930
df$height_age16 <- df$dvht11
df$weight_age16 <- df$dvwt11

df$tv_age23 <- df$n5913_sweep4
df$read_book_age23 <- df$n5914_sweep4
df$read_news_age23 <- df$n5956_sweep4
df$physical_activity_age23 <- df$n5916_sweep4
df$alcohol_age23 <- df$n5920_sweep4
df$qual_age23 <- df$maxqual2_sweep4
df$smoking_age23 <- df$currentn_sweep4
df$income_age23 <- df$famnet_sweep4
df$job_time <- df$fullpart_sweep4
df$malaise_age23 <- df$malaise_sweep4

df$main_activity_age33 <- df$n500520_sweep5
df$sc_age33 <- df$n540056_sweep5
df$smoking_1_age33 <- df$n504262_sweep5
df$smoking_2_age33 <- df$n504265_sweep5
df$alcohol_age33 <- df$n504273_sweep5
df$occ_pa_age33 <- df$n504361_sweep5
df$physical_activity_1_age33 <- df$n504362_sweep5
df$physical_activity_2_age33 <- df$n504363_sweep5
df$qual_age33 <- df$hqual33_sweep5

df$occ_pa_age42 <- df$jdemand2_sweep6
df$smoking_age42 <- df$smoking_sweep6
df$alcohol_age42 <-df$drinks_sweep6
df$physical_activity_1_age42 <- df$exercise_sweep6
df$physical_activity_2_age42 <- df$breathls_sweep6
df$physical_activity_3_age42 <- df$sweat_sweep6
df$sc_age42 <- df$sc_sweep6

df$marital_age46 <- df$nd7ms_sweep7
df$income_age46 <- df$nd7iamtc_sweep7
df$main_activity_age46 <- df$nd7ecact_sweep7
df$qual_age46 <- df$nd7achq1_sweep7
df$computer_home_age46 <- df$n7pchome_sweep7
df$computer_home_freq_age46 <- df$n7hpcuse_sweep7
df$computer_work_age46 <- df$n7pcwork_sweep7
df$computer_work_freq_age46 <- df$n7wpcuse_sweep7
df$internet_leisure <- df$n7intacc_sweep7
df$read_news_age46 <- df$n7pread1_sweep7
df$read_book_age46 <- df$n7pread2_sweep7
df$diseases_age46 <- df$n7lsiany_sweep7
df$smoking_age46 <- df$nd7smoke_sweep7
df$alcohol_age46 <- df$n7drinks_sweep7
df$physical_activity_1_46 <- df$n7exers1_sweep7
df$physical_activity_2_46 <- df$n7breals_sweep7
df$physical_activity_3_46 <- df$n7sweat_sweep7
df$transport_age46 <- df$n7tranrt_sweep7

df$computer_home_age50 <- df$N8PCHOME_sweep8
df$computer_home_freq_age50 <- df$N8HPCUSE_sweep8
df$computer_work_age50 <- df$N8PCWORK_sweep8
df$computer_work_freq_age50 <- df$N8WPCUSE_sweep8
df$general_health_age50 <- df$N8HLTHGN_sweep8
df$diabetes_age50 <- df$N8KHPB03_sweep8
df$hypertension_age50 <- df$N8KHPB09_sweep8
df$words_immed_age50 <- df$N8CFLISN_sweep8
df$animals_named <- df$N8CFANI_sweep8
df$letter_canc_correct <- df$N8CFCOR_sweep8
df$letter_canc_accuracy <- df$N8CFMIS_sweep8
df$words_delayed <- df$N8CFLISD_sweep8
df$smoking_age50 <- df$N8SMOKIG_sweep8
df$alcohol_age50 <- df$N8DRINKS_sweep8
df$physical_activity_1_age50 <- df$N8EXERSE_sweep8
df$physical_activity_2_age50 <- df$N8BREALS_sweep8
df$physical_activity_3_age50 <- df$N8SWEAT_sweep8
df$occ_pa_age50 <- df$N8PHYSWK_sweep8
df$malaise_age50 <- df$ND8MALG_sweep8

df$physical_activity_age55 <- df$N9LEIS01_sweep9
df$internet_use_age55 <- df$N9INTRNT_sweep9
df$marital_age55 <- df$ND9MS_sweep9
df$main_activity_age55 <- df$ND9ECACT_sweep9
df$qual_age55 <- df$ND9HNVQ_sweep9
df$weight_age55 <- df$ND9WGHTK_sweep9
df$bmi_age55 <- df$ND9BMI_sweep9

df$marital_age62 <- df$n10hms_sweep10
df$main_activity_age62 <- df$n10econact2_sweep10
df$letter_can_accuracy <- df$n10cfmis_sweep10
df$animals_named_age62 <- df$n10cfani_sweep10
df$words_delayed_age62 <- df$n10cflisd_sweep10
df$general_health_age62 <- df$n10hlthgen_sweep10
df$diabetes_age62 <- df$n10khlprb04_sweep10
df$hypertension_age62 <- df$n10khlprb06_sweep10
df$dementia_age62 <- df$n10khlprb10_sweep10
df$physical_activity_1_age62 <- df$n10exercise_sweep10
df$physical_activity_2_age62 <- df$n10freqexer_sweep10
df$physical_activity_3_age62 <- df$n10sweat_sweep10
df$smoking_age62 <- df$n10smoking_sweep10
df$read_book_age62 <- df$n10ylq26_sweep10
df$tv_age62 <- df$n10ylq27_sweep10
df$bmi_age62 <- df$nd10bmi_rec_sweep10
df$malaise_age62 <- df$nd10mal_sweep10
df$qual_age62 <- df$nd10hnvq_sweep10

df$sc_age7 <- relevel(df$sc_age7, ref = "I")

df$breast_f <- relevel(df$breast_f, ref = "Under one month")

df$sc_age11 <- relevel(df$sc_age11, ref = "I")

df$sc_age16 <- relevel(df$sc_age16, ref = "I")

df$smoking_age16 <- relevel(df$smoking_age16, ref = "None,dont smoke")

df$alcohol_age16 <- relevel(df$alcohol_age16, ref = "Never had one")

df$qual_age33 <- relevel(df$qual_age33, ref = "No qualification")

df$diabetes_age50 <- relevel(df$diabetes_age50, ref = "Not mentioned")

df$hypertension_age50 <- relevel(df$hypertension_age50, ref = "Not mentioned")

df$malaise_age50 <- relevel(df$malaise_age50, ref = "Low malaise 0-3")

COGNITIVE FUNCTION AT AGE 7

library(broom)
library(dplyr)

df_complete <- df %>%
  filter(arithmetic_age7!=-1 &
           copying_age7!=-1  &
           draw_man_age7!=-1 &
           reading_test_age7!=-1)

# Age 7
# Problem Arithmetic test
df_complete$mean_arithmetic_age7 <- mean(df_complete$arithmetic_age7)
df_complete$sd_arithmetic_age7 <- sd(df_complete$arithmetic_age7)
df_complete$z_arithmetic_age7 <- (df_complete$arithmetic_age7-df_complete$mean_arithmetic_age7)/df_complete$sd_arithmetic_age7
df_complete$z_arithmetic_age7 = scale(df_complete$z_arithmetic_age7)[,1]

hist(df_complete$z_arithmetic_age7)

#Copying designs test
df_complete$mean_copying_age7 <- mean(df_complete$copying_age7)
df_complete$sd_copying_age7 <- sd(df_complete$copying_age7)
df_complete$z_copying_age7 <- (df_complete$copying_age7-df_complete$mean_copying_age7)/df_complete$sd_copying_age7
df_complete$z_copying_age7 = scale(df_complete$z_copying_age7)[,1]

hist(df_complete$z_copying_age7)

#Draw-a-man test
df_complete$mean_draw_man_age7 <- mean(df_complete$draw_man_age7)
df_complete$sd_draw_man_age7 <- sd(df_complete$draw_man_age7)
df_complete$z_draw_man_age7 <- (df_complete$draw_man_age7-df_complete$mean_draw_man_age7)/df_complete$sd_draw_man_age7
df_complete$z_draw_man_age7 = scale(df_complete$z_draw_man_age7)[,1]

hist(df_complete$z_draw_man_age7)

#Southgate Group Reading test
df_complete$mean_reading_test_age7 <- mean(df_complete$reading_test_age7)
df_complete$sd_reading_test_age7 <- sd(df_complete$reading_test_age7)
df_complete$z_reading_test_age7 <- (df_complete$reading_test_age7-df_complete$mean_reading_test_age7)/df_complete$sd_reading_test_age7
df_complete$z_reading_test_age7 = scale(df_complete$z_draw_man_age7)[,1]

hist(df_complete$z_reading_test_age7)

# averaging all z scores
df_complete$global_cog_age7 <- (df_complete$z_arithmetic_age7 + df_complete$z_copying_age7 + df_complete$z_draw_man_age7 + df_complete$z_reading_test_age7) / 4

# creating a z score for the averaged variable
df_complete$z_global_cog_age7 = scale(df_complete$global_cog_age7)[,1]

hist(df_complete$z_global_cog_age7)

# Reference levels
df_complete <- df_complete %>%
  mutate(
    dad_read_score = case_when(
      dad_read_book %in% c("Yes most weeks") ~ 2,
      dad_read_book %in% c("Yes occasionally") ~ 1,
      dad_read_book %in% c("Hardly ever") ~ 0,
      TRUE ~ NA_real_
    ),
    
    mom_read_score = case_when(
      mon_read_child %in% c("Yes,every week") ~ 2,
      mon_read_child %in% c("Yes,occasionally") ~ 1,
      mon_read_child %in% c("Hardly ever") ~ 0,
      TRUE ~ NA_real_
    )
  )

df_complete <- df_complete %>%
  mutate(
    parent_read = case_when(
      dad_read_score == 0 & mom_read_score == 0 ~ 0,
      dad_read_score > 0 | mom_read_score > 0 ~ 1,
      TRUE ~ NA_real_
    )
  )

df_complete <- df_complete %>%
  mutate(
    parent_read_level = pmax(dad_read_score, mom_read_score, na.rm = TRUE),
    parent_read_level = ifelse(
      is.infinite(parent_read_level), NA, parent_read_level
    )
  )


df_complete$parent_read_level <- factor(
  df_complete$parent_read_level,
  levels = c(0, 1, 2),
  labels = c("Hardly ever", "Occasionally", "Weekly"))

DescTools::Desc(df_complete$parent_read_level)

## ────────────────────────────────────────────────────────────────────────────── 
## df_complete$parent_read_level (factor)
## 
##   length      n    NAs unique levels  dupes
##   14'404 13'651    753      3      3      y
##           94.8%   5.2%                     
## 
##           level   freq   perc  cumfreq  cumperc
## 1        Weekly  9'459  69.3%    9'459    69.3%
## 2  Occasionally  3'170  23.2%   12'629    92.5%
## 3   Hardly ever  1'022   7.5%   13'651   100.0%

global_age7 <- lm(z_global_cog_age7 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f, data = df_complete)

tab_model(global_age7, digits = 3)

	z global cog age 7
Predictors	Estimates	CI	p
(Intercept)	-0.060	-0.278 – 0.158	0.590
sex [Female]	0.006	-0.027 – 0.038	0.736
sc prenatal [Unemployed,sick]	-0.593	-1.541 – 0.355	0.220
sc prenatal [I]	0.331	0.180 – 0.482	<0.001
sc prenatal [II]	0.231	0.106 – 0.356	<0.001
sc prenatal [III]	0.172	0.058 – 0.287	0.003
sc prenatal [IV]	0.089	-0.034 – 0.211	0.155
sc prenatal [V]	-0.029	-0.154 – 0.096	0.650
sc prenatal [Students]	0.254	-0.283 – 0.791	0.354
sc prenatal [Dead or away]	-0.671	-2.010 – 0.668	0.326
sc prenatal [Retired]	0.263	-1.076 – 1.603	0.700
sc prenatal [Single,no husbnd]	-0.095	-0.250 – 0.059	0.227
smoking preg [5mths-no change]	0.029	-0.128 – 0.186	0.715
smoking preg [Now non-smoker]	0.092	-0.076 – 0.259	0.283
smoking preg [1 to 4 now]	0.037	-0.143 – 0.218	0.684
smoking preg [5 to 9 now]	-0.019	-0.207 – 0.169	0.840
smoking preg [10 to 14 now]	0.038	-0.177 – 0.254	0.729
smoking preg [15 to 19 now]	-0.058	-0.322 – 0.206	0.668
smoking preg [20 to 24 now]	-0.233	-0.537 – 0.071	0.133
smoking preg [25 to 29 now]	0.029	-0.441 – 0.499	0.903
smoking preg [30 or more now]	0.025	-0.501 – 0.552	0.925
smoking_pregVariable-5mths	0.010	-0.160 – 0.180	0.908
birth weight	0.000	0.000 – 0.001	0.002
parent read level [Occasionally]	0.122	0.053 – 0.191	0.001
parent read level [Weekly]	0.199	0.134 – 0.263	<0.001
sc age7 [NA, unclear]	-0.578	-0.795 – -0.361	<0.001
sc age7 [No male head]	-0.441	-0.576 – -0.306	<0.001
sc age7 [II]	-0.118	-0.215 – -0.021	0.017
sc age7 [III non-manual]	-0.146	-0.250 – -0.042	0.006
sc age7 [III manual]	-0.391	-0.487 – -0.294	<0.001
sc age7 [IV non-manual]	-0.377	-0.536 – -0.219	<0.001
sc age7 [IV manual]	-0.481	-0.584 – -0.378	<0.001
sc age7 [V]	-0.620	-0.738 – -0.502	<0.001
breast f [NA]	0.237	-0.704 – 1.179	0.621
breast f [Dont know]	-0.206	-0.435 – 0.022	0.077
breast f [No]	-0.086	-0.130 – -0.042	<0.001
breast f [Over one month]	0.095	0.053 – 0.136	<0.001
Observations	13225
R² / R² adjusted	0.071 / 0.069

arith_age7 <- lm(z_arithmetic_age7 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f, data = df_complete)

arith_age7 %>%
  broom::tidy(conf.int = TRUE) %>%
  knitr::kable(digits = 3)

term	estimate	std.error	statistic	p.value	conf.low	conf.high
(Intercept)	0.084	0.113	0.745	0.456	-0.137	0.304
sexFemale	-0.088	0.017	-5.197	0.000	-0.121	-0.055
sc_prenatalUnemployed,sick	0.384	0.489	0.786	0.432	-0.573	1.342
sc_prenatalI	0.313	0.078	4.024	0.000	0.161	0.466
sc_prenatalII	0.205	0.064	3.174	0.002	0.078	0.331
sc_prenatalIII	0.162	0.059	2.751	0.006	0.047	0.277
sc_prenatalIV	0.085	0.063	1.344	0.179	-0.039	0.208
sc_prenatalV	0.003	0.065	0.053	0.958	-0.123	0.130
sc_prenatalStudents	0.189	0.277	0.683	0.495	-0.353	0.732
sc_prenatalDead or away	-0.066	0.690	-0.096	0.923	-1.419	1.286
sc_prenatalRetired	-0.128	0.690	-0.186	0.853	-1.482	1.225
sc_prenatalSingle,no husbnd	-0.013	0.080	-0.160	0.872	-0.169	0.144
smoking_preg5mths-no change	-0.010	0.081	-0.126	0.900	-0.169	0.148
smoking_pregNow non-smoker	0.067	0.086	0.782	0.434	-0.101	0.236
smoking_preg1 to 4 now	-0.030	0.093	-0.321	0.748	-0.212	0.152
smoking_preg5 to 9 now	-0.005	0.097	-0.051	0.959	-0.195	0.185
smoking_preg10 to 14 now	-0.006	0.111	-0.056	0.955	-0.224	0.212
smoking_preg15 to 19 now	-0.148	0.136	-1.091	0.275	-0.415	0.118
smoking_preg20 to 24 now	-0.240	0.157	-1.534	0.125	-0.547	0.067
smoking_preg25 to 29 now	-0.533	0.242	-2.199	0.028	-1.008	-0.058
smoking_preg30 or more now	-0.131	0.271	-0.482	0.630	-0.663	0.401
smoking_pregVariable-5mths	-0.010	0.088	-0.117	0.907	-0.182	0.162
birth_weight	0.000	0.000	2.700	0.007	0.000	0.001
parent_read_levelOccasionally	0.094	0.036	2.625	0.009	0.024	0.163
parent_read_levelWeekly	0.155	0.033	4.672	0.000	0.090	0.221
sc_age7NA, unclear	-0.458	0.112	-4.099	0.000	-0.677	-0.239
sc_age7No male head	-0.438	0.070	-6.294	0.000	-0.575	-0.302
sc_age7II	-0.126	0.050	-2.518	0.012	-0.224	-0.028
sc_age7III non-manual	-0.172	0.053	-3.225	0.001	-0.277	-0.068
sc_age7III manual	-0.415	0.050	-8.336	0.000	-0.512	-0.317
sc_age7IV non-manual	-0.359	0.082	-4.389	0.000	-0.519	-0.199
sc_age7IV manual	-0.490	0.053	-9.213	0.000	-0.595	-0.386
sc_age7V	-0.558	0.061	-9.187	0.000	-0.677	-0.439
breast_fNA	-0.041	0.485	-0.085	0.932	-0.992	0.910
breast_fDont know	-0.312	0.118	-2.650	0.008	-0.544	-0.081
breast_fNo	-0.039	0.023	-1.716	0.086	-0.083	0.006
breast_fOver one month	0.059	0.021	2.746	0.006	0.017	0.101

copying_age7 <- lm(z_copying_age7 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f, data = df_complete)

copying_age7 %>%
  broom::tidy(conf.int = TRUE) %>%
  knitr::kable(digits = 3)

term	estimate	std.error	statistic	p.value	conf.low	conf.high
(Intercept)	0.065	0.114	0.572	0.567	-0.158	0.289
sexFemale	-0.048	0.017	-2.784	0.005	-0.081	-0.014
sc_prenatalUnemployed,sick	0.304	0.495	0.614	0.539	-0.666	1.274
sc_prenatalI	0.220	0.079	2.785	0.005	0.065	0.374
sc_prenatalII	0.103	0.065	1.575	0.115	-0.025	0.231
sc_prenatalIII	0.071	0.060	1.196	0.232	-0.046	0.188
sc_prenatalIV	0.007	0.064	0.117	0.907	-0.118	0.132
sc_prenatalV	-0.116	0.065	-1.771	0.077	-0.244	0.012
sc_prenatalStudents	0.267	0.281	0.952	0.341	-0.283	0.817
sc_prenatalDead or away	-0.503	0.699	-0.719	0.472	-1.874	0.868
sc_prenatalRetired	0.788	0.700	1.126	0.260	-0.584	2.160
sc_prenatalSingle,no husbnd	-0.154	0.081	-1.906	0.057	-0.312	0.004
smoking_preg5mths-no change	0.021	0.082	0.251	0.802	-0.140	0.181
smoking_pregNow non-smoker	0.024	0.087	0.274	0.784	-0.147	0.195
smoking_preg1 to 4 now	0.019	0.094	0.197	0.844	-0.166	0.203
smoking_preg5 to 9 now	-0.073	0.098	-0.745	0.456	-0.266	0.119
smoking_preg10 to 14 now	0.039	0.113	0.351	0.726	-0.181	0.260
smoking_preg15 to 19 now	0.051	0.138	0.372	0.710	-0.219	0.322
smoking_preg20 to 24 now	-0.114	0.159	-0.716	0.474	-0.425	0.198
smoking_preg25 to 29 now	0.069	0.246	0.279	0.780	-0.413	0.550
smoking_preg30 or more now	-0.224	0.275	-0.815	0.415	-0.763	0.315
smoking_pregVariable-5mths	-0.044	0.089	-0.490	0.624	-0.218	0.131
birth_weight	0.000	0.000	2.217	0.027	0.000	0.001
parent_read_levelOccasionally	0.053	0.036	1.459	0.144	-0.018	0.124
parent_read_levelWeekly	0.120	0.034	3.563	0.000	0.054	0.186
sc_age7NA, unclear	-0.350	0.113	-3.091	0.002	-0.572	-0.128
sc_age7No male head	-0.308	0.071	-4.362	0.000	-0.446	-0.169
sc_age7II	-0.069	0.051	-1.368	0.171	-0.169	0.030
sc_age7III non-manual	-0.094	0.054	-1.730	0.084	-0.200	0.012
sc_age7III manual	-0.267	0.050	-5.302	0.000	-0.366	-0.169
sc_age7IV non-manual	-0.263	0.083	-3.173	0.002	-0.425	-0.100
sc_age7IV manual	-0.307	0.054	-5.692	0.000	-0.413	-0.201
sc_age7V	-0.428	0.062	-6.949	0.000	-0.549	-0.307
breast_fNA	-0.165	0.492	-0.335	0.738	-1.129	0.799
breast_fDont know	0.035	0.119	0.294	0.769	-0.199	0.269
breast_fNo	-0.089	0.023	-3.849	0.000	-0.134	-0.043
breast_fOver one month	0.070	0.022	3.213	0.001	0.027	0.112

draw_age7 <- lm(z_draw_man_age7 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f, data = df_complete)

draw_age7 %>%
  broom::tidy(conf.int = TRUE) %>%
  knitr::kable(digits = 3)

term	estimate	std.error	statistic	p.value	conf.low	conf.high
(Intercept)	-0.167	0.113	-1.470	0.141	-0.389	0.056
sexFemale	0.076	0.017	4.488	0.000	0.043	0.110
sc_prenatalUnemployed,sick	-1.255	0.492	-2.551	0.011	-2.220	-0.291
sc_prenatalI	0.242	0.078	3.085	0.002	0.088	0.395
sc_prenatalII	0.201	0.065	3.097	0.002	0.074	0.329
sc_prenatalIII	0.148	0.059	2.494	0.013	0.032	0.264
sc_prenatalIV	0.090	0.063	1.420	0.156	-0.034	0.214
sc_prenatalV	0.012	0.065	0.180	0.857	-0.116	0.139
sc_prenatalStudents	0.162	0.279	0.582	0.561	-0.384	0.709
sc_prenatalDead or away	-0.747	0.695	-1.074	0.283	-2.109	0.616
sc_prenatalRetired	0.075	0.696	0.108	0.914	-1.288	1.438
sc_prenatalSingle,no husbnd	-0.063	0.080	-0.784	0.433	-0.220	0.094
smoking_preg5mths-no change	0.040	0.081	0.487	0.626	-0.120	0.199
smoking_pregNow non-smoker	0.095	0.087	1.095	0.274	-0.075	0.265
smoking_preg1 to 4 now	0.063	0.094	0.674	0.500	-0.120	0.246
smoking_preg5 to 9 now	0.009	0.098	0.096	0.924	-0.182	0.201
smoking_preg10 to 14 now	0.042	0.112	0.375	0.708	-0.177	0.261
smoking_preg15 to 19 now	-0.040	0.137	-0.294	0.769	-0.309	0.228
smoking_preg20 to 24 now	-0.181	0.158	-1.148	0.251	-0.490	0.128
smoking_preg25 to 29 now	0.277	0.244	1.136	0.256	-0.201	0.756
smoking_preg30 or more now	0.216	0.273	0.791	0.429	-0.320	0.752
smoking_pregVariable-5mths	0.042	0.088	0.480	0.631	-0.131	0.215
birth_weight	0.000	0.000	2.205	0.027	0.000	0.001
parent_read_levelOccasionally	0.115	0.036	3.188	0.001	0.044	0.185
parent_read_levelWeekly	0.168	0.033	5.006	0.000	0.102	0.233
sc_age7NA, unclear	-0.484	0.113	-4.297	0.000	-0.705	-0.263
sc_age7No male head	-0.304	0.070	-4.337	0.000	-0.442	-0.167
sc_age7II	-0.083	0.050	-1.654	0.098	-0.182	0.015
sc_age7III non-manual	-0.091	0.054	-1.695	0.090	-0.197	0.014
sc_age7III manual	-0.259	0.050	-5.172	0.000	-0.358	-0.161
sc_age7IV non-manual	-0.269	0.082	-3.265	0.001	-0.430	-0.107
sc_age7IV manual	-0.340	0.054	-6.348	0.000	-0.445	-0.235
sc_age7V	-0.459	0.061	-7.508	0.000	-0.579	-0.340
breast_fNA	0.467	0.489	0.956	0.339	-0.491	1.426
breast_fDont know	-0.178	0.119	-1.502	0.133	-0.411	0.054
breast_fNo	-0.068	0.023	-2.994	0.003	-0.113	-0.024
breast_fOver one month	0.081	0.022	3.775	0.000	0.039	0.123

reading_age7 <- lm(z_reading_test_age7 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f, data = df_complete)

reading_age7 %>%
  broom::tidy(conf.int = TRUE) %>%
  knitr::kable(digits = 3)

term	estimate	std.error	statistic	p.value	conf.low	conf.high
(Intercept)	-0.167	0.113	-1.470	0.141	-0.389	0.056
sexFemale	0.076	0.017	4.488	0.000	0.043	0.110
sc_prenatalUnemployed,sick	-1.255	0.492	-2.551	0.011	-2.220	-0.291
sc_prenatalI	0.242	0.078	3.085	0.002	0.088	0.395
sc_prenatalII	0.201	0.065	3.097	0.002	0.074	0.329
sc_prenatalIII	0.148	0.059	2.494	0.013	0.032	0.264
sc_prenatalIV	0.090	0.063	1.420	0.156	-0.034	0.214
sc_prenatalV	0.012	0.065	0.180	0.857	-0.116	0.139
sc_prenatalStudents	0.162	0.279	0.582	0.561	-0.384	0.709
sc_prenatalDead or away	-0.747	0.695	-1.074	0.283	-2.109	0.616
sc_prenatalRetired	0.075	0.696	0.108	0.914	-1.288	1.438
sc_prenatalSingle,no husbnd	-0.063	0.080	-0.784	0.433	-0.220	0.094
smoking_preg5mths-no change	0.040	0.081	0.487	0.626	-0.120	0.199
smoking_pregNow non-smoker	0.095	0.087	1.095	0.274	-0.075	0.265
smoking_preg1 to 4 now	0.063	0.094	0.674	0.500	-0.120	0.246
smoking_preg5 to 9 now	0.009	0.098	0.096	0.924	-0.182	0.201
smoking_preg10 to 14 now	0.042	0.112	0.375	0.708	-0.177	0.261
smoking_preg15 to 19 now	-0.040	0.137	-0.294	0.769	-0.309	0.228
smoking_preg20 to 24 now	-0.181	0.158	-1.148	0.251	-0.490	0.128
smoking_preg25 to 29 now	0.277	0.244	1.136	0.256	-0.201	0.756
smoking_preg30 or more now	0.216	0.273	0.791	0.429	-0.320	0.752
smoking_pregVariable-5mths	0.042	0.088	0.480	0.631	-0.131	0.215
birth_weight	0.000	0.000	2.205	0.027	0.000	0.001
parent_read_levelOccasionally	0.115	0.036	3.188	0.001	0.044	0.185
parent_read_levelWeekly	0.168	0.033	5.006	0.000	0.102	0.233
sc_age7NA, unclear	-0.484	0.113	-4.297	0.000	-0.705	-0.263
sc_age7No male head	-0.304	0.070	-4.337	0.000	-0.442	-0.167
sc_age7II	-0.083	0.050	-1.654	0.098	-0.182	0.015
sc_age7III non-manual	-0.091	0.054	-1.695	0.090	-0.197	0.014
sc_age7III manual	-0.259	0.050	-5.172	0.000	-0.358	-0.161
sc_age7IV non-manual	-0.269	0.082	-3.265	0.001	-0.430	-0.107
sc_age7IV manual	-0.340	0.054	-6.348	0.000	-0.445	-0.235
sc_age7V	-0.459	0.061	-7.508	0.000	-0.579	-0.340
breast_fNA	0.467	0.489	0.956	0.339	-0.491	1.426
breast_fDont know	-0.178	0.119	-1.502	0.133	-0.411	0.054
breast_fNo	-0.068	0.023	-2.994	0.003	-0.113	-0.024
breast_fOver one month	0.081	0.022	3.775	0.000	0.039	0.123

extract_parent_read <- function(model, outcome_label) {
  
  coef_df <- tidy(model, conf.int = TRUE) %>%
    filter(term %in% c(
      "parent_read_levelOccasionally",
      "parent_read_levelWeekly"
    )) %>%
    mutate(
      exposure = case_when(
        term == "parent_read_levelOccasionally" ~ "Occasionally",
        term == "parent_read_levelWeekly" ~ "Weekly"
      ),
      outcome = outcome_label
    ) %>%
    dplyr::select(outcome, exposure, estimate, conf.low, conf.high)
  
  # referência (Hardly ever)
  ref_df <- tibble(
    outcome   = outcome_label,
    exposure  = "Hardly ever",
    estimate  = 0,
    conf.low  = 0,
    conf.high = 0
  )
  
  bind_rows(ref_df, coef_df)
}

plot_df <- bind_rows(
  extract_parent_read(global_age7,  "Global cognition"),
  extract_parent_read(arith_age7,   "Arithmetic"),
  extract_parent_read(copying_age7, "Copying"),
  extract_parent_read(draw_age7,    "Draw-a-man"),
  extract_parent_read(reading_age7, "Reading")
)

plot_df$exposure <- factor(
  plot_df$exposure,
  levels = c("Hardly ever", "Occasionally", "Weekly")
)


ggplot(plot_df, aes(x = estimate, y = outcome)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
  
  geom_pointrange(
    aes(xmin = conf.low, xmax = conf.high, shape = exposure),
    size = 1, position = position_dodge2(width = 0.9)
  ) +
  
  scale_shape_manual(
    values = c(
      "Hardly ever" = 15,
      "Occasionally" = 16,
      "Weekly" = 17
    )
  ) +
  
  labs(
    x = "Standardized difference in Z score (95% CI)",
    y = NULL,
    shape = "Parental reading frequency"
  ) +
  
  theme_minimal(base_size = 13) +
  theme(
    panel.grid.minor = element_blank(),
    legend.position = "bottom"
  )

COGNITIVE FUNCTION AT AGE 11

df_complete_7_11 <- df_complete %>%
  filter(verbal_ability_age11!=-1 &
           nonverbal_ability_age11!=-1  &
           reading_ability_age11!=-1 &
           maths_ability_age11!=-1 &
           copying_age11!=-1)

# Age 11
# Verbal score – general ability test
df_complete_7_11$mean_verbal_ability_age11 <- mean(df_complete_7_11$verbal_ability_age11)
df_complete_7_11$sd_verbal_ability_age11 <- sd(df_complete_7_11$verbal_ability_age11)
df_complete_7_11$z_verbal_ability_age11 <- (df_complete_7_11$arithmetic_age7-df_complete_7_11$mean_verbal_ability_age11)/df_complete_7_11$sd_verbal_ability_age11
df_complete_7_11$z_verbal_ability_age11 = scale(df_complete_7_11$z_verbal_ability_age11)[,1]

hist(df_complete_7_11$z_verbal_ability_age11)

# Non-verbal score – general ability test
df_complete_7_11$mean_n_verbal_ability_age11 <- mean(df_complete_7_11$nonverbal_ability_age11)
df_complete_7_11$sd_n_verbal_ability_age11 <- sd(df_complete_7_11$nonverbal_ability_age11)
df_complete_7_11$z_n_verbal_ability_age11 <- (df_complete_7_11$nonverbal_ability_age11-df_complete_7_11$mean_n_verbal_ability_age11)/df_complete_7_11$sd_n_verbal_ability_age11
df_complete_7_11$z_n_verbal_ability_age11 = scale(df_complete_7_11$z_n_verbal_ability_age11)[,1]

hist(df_complete_7_11$z_n_verbal_ability_age11)

#Reading comprehension test
df_complete_7_11$mean_reading_ability_age11 <- mean(df_complete_7_11$reading_ability_age11)
df_complete_7_11$sd_reading_ability_age11 <- sd(df_complete_7_11$reading_ability_age11)
df_complete_7_11$z_reading_ability_age11 <- (df_complete_7_11$reading_ability_age11-df_complete_7_11$mean_reading_ability_age11)/df_complete_7_11$sd_reading_ability_age11
df_complete_7_11$z_reading_ability_age11 = scale(df_complete_7_11$z_reading_ability_age11)[,1]

hist(df_complete_7_11$z_reading_ability_age11)

#Mathematics test score
df_complete_7_11$mean_maths_ability_age11 <- mean(df_complete_7_11$maths_ability_age11)
df_complete_7_11$sd_maths_ability_age11 <- sd(df_complete_7_11$maths_ability_age11)
df_complete_7_11$z_maths_ability_age11 <- (df_complete_7_11$maths_ability_age11-df_complete_7_11$mean_maths_ability_age11)/df_complete_7_11$sd_maths_ability_age11
df_complete_7_11$z_maths_ability_age11 = scale(df_complete_7_11$z_maths_ability_age11)[,1]

hist(df_complete_7_11$z_maths_ability_age11)

#Copying designs test score
df_complete_7_11$mean_copying_age11 <- mean(df_complete_7_11$copying_age11)
df_complete_7_11$sd_copying_age11 <- sd(df_complete_7_11$copying_age11)
df_complete_7_11$z_copying_age11 <- (df_complete_7_11$copying_age11-df_complete_7_11$mean_copying_age11)/df_complete_7_11$sd_copying_age11
df_complete_7_11$z_copying_age11 = scale(df_complete_7_11$z_copying_age11)[,1]

hist(df_complete_7_11$z_copying_age11)

# averaging all z scores
df_complete_7_11$global_cog_age11 <- (df_complete_7_11$z_verbal_ability_age11 + df_complete_7_11$z_n_verbal_ability_age11 + df_complete_7_11$z_reading_ability_age11 + df_complete_7_11$z_maths_ability_age11 + df_complete_7_11$z_copying_age11) / 5

# creating a z score for the averaged variable
df_complete_7_11$z_global_cog_age11 = scale(df_complete_7_11$global_cog_age11)[,1]

hist(df_complete_7_11$z_global_cog_age11)

# Reference levels
DescTools::Desc(df_complete_7_11$tv_age11)

## ────────────────────────────────────────────────────────────────────────────── 
## df_complete_7_11$tv_age11 (factor)
## 
##   length      n    NAs unique levels  dupes
##   12'301 12'301      0      4      4      y
##          100.0%   0.0%                     
## 
##          level    freq   perc  cumfreq  cumperc
## 1    most days  10'134  82.4%   10'134    82.4%
## 2    Sometimes   1'430  11.6%   11'564    94.0%
## 3           NA     438   3.6%   12'002    97.6%
## 4  Hardly ever     299   2.4%   12'301   100.0%

DescTools::Desc(df_complete_7_11$child_read_books_age11)

## ────────────────────────────────────────────────────────────────────────────── 
## df_complete_7_11$child_read_books_age11 (factor)
## 
##   length      n    NAs unique levels  dupes
##   12'301 12'301      0      4      4      y
##          100.0%   0.0%                     
## 
##          level   freq   perc  cumfreq  cumperc
## 1    Sometimes  5'273  42.9%    5'273    42.9%
## 2    most days  5'223  42.5%   10'496    85.3%
## 3  Hardly ever  1'368  11.1%   11'864    96.4%
## 4           NA    437   3.6%   12'301   100.0%

DescTools::Desc(df_complete_7_11$child_read_news_age11)

## ────────────────────────────────────────────────────────────────────────────── 
## df_complete_7_11$child_read_news_age11 (factor)
## 
##   length      n    NAs unique levels  dupes
##   12'301 12'301      0      4      4      y
##          100.0%   0.0%                     
## 
##          level   freq   perc  cumfreq  cumperc
## 1    most days  6'156  50.0%    6'156    50.0%
## 2    Sometimes  4'613  37.5%   10'769    87.5%
## 3  Hardly ever  1'033   8.4%   11'802    95.9%
## 4           NA    499   4.1%   12'301   100.0%

DescTools::Desc(df_complete_7_11$physical_activity_age11)

## ────────────────────────────────────────────────────────────────────────────── 
## df_complete_7_11$physical_activity_age11 (factor)
## 
##   length      n    NAs unique levels  dupes
##   12'301 12'301      0      4      4      y
##          100.0%   0.0%                     
## 
##          level   freq   perc  cumfreq  cumperc
## 1    most days  5'476  44.5%    5'476    44.5%
## 2    Sometimes  5'049  41.0%   10'525    85.6%
## 3  Hardly ever  1'334  10.8%   11'859    96.4%
## 4           NA    442   3.6%   12'301   100.0%

DescTools::Desc(df_complete_7_11$child_writes_stories_age11)

## ────────────────────────────────────────────────────────────────────────────── 
## df_complete_7_11$child_writes_stories_age11 (factor)
## 
##   length      n    NAs unique levels  dupes
##   12'301 12'301      0      4      4      y
##          100.0%   0.0%                     
## 
##          level   freq   perc  cumfreq  cumperc
## 1  Hardly ever  6'685  54.3%    6'685    54.3%
## 2    Sometimes  4'032  32.8%   10'717    87.1%
## 3    most days  1'082   8.8%   11'799    95.9%
## 4           NA    502   4.1%   12'301   100.0%

DescTools::Desc(df_complete_7_11$child_draws_age11)

## ────────────────────────────────────────────────────────────────────────────── 
## df_complete_7_11$child_draws_age11 (factor)
## 
##   length      n    NAs unique levels  dupes
##   12'301 12'301      0      4      4      y
##          100.0%   0.0%                     
## 
##          level   freq   perc  cumfreq  cumperc
## 1    Sometimes  6'007  48.8%    6'007    48.8%
## 2  Hardly ever  3'022  24.6%    9'029    73.4%
## 3    most days  2'799  22.8%   11'828    96.2%
## 4           NA    473   3.8%   12'301   100.0%

DescTools::Desc(df_complete_7_11$child_writes_stories_age11)

## ────────────────────────────────────────────────────────────────────────────── 
## df_complete_7_11$child_writes_stories_age11 (factor)
## 
##   length      n    NAs unique levels  dupes
##   12'301 12'301      0      4      4      y
##          100.0%   0.0%                     
## 
##          level   freq   perc  cumfreq  cumperc
## 1  Hardly ever  6'685  54.3%    6'685    54.3%
## 2    Sometimes  4'032  32.8%   10'717    87.1%
## 3    most days  1'082   8.8%   11'799    95.9%
## 4           NA    502   4.1%   12'301   100.0%

recode_freq <- function(x) {
  case_when(
    x == "Hardly ever" ~ 0,
    x == "Sometimes"  ~ 1,
    x == "most days"  ~ 2,
    TRUE ~ NA_real_
  )
}
df_complete_7_11 <- df_complete_7_11 %>%
  mutate(
    read_books_score = recode_freq(child_read_books_age11),
    read_news_score  = recode_freq(child_read_news_age11),
    
    child_reading_age11 = pmax(read_books_score, read_news_score, na.rm = TRUE),
    child_reading_age11 = ifelse(
      is.infinite(child_reading_age11), NA, child_reading_age11
    )
  )


df_complete_7_11$child_reading_age11 <- factor(
  df_complete_7_11$child_reading_age11,
  levels = c(0, 1, 2),
  labels = c("Hardly ever", "Sometimes", "Most days"))

table(df_complete_7_11$child_reading_age11, useNA = "ifany")

## 
## Hardly ever   Sometimes   Most days        <NA> 
##         233        3248        8536         284

df_complete_7_11 <- df_complete_7_11 %>%
  mutate(
    write_story_score = recode_freq(child_writes_stories_age11),
    draw_score        = recode_freq(child_draws_age11),
    
    child_creative_age11 = pmax(write_story_score, draw_score, na.rm = TRUE),
    child_creative_age11 = ifelse(
      is.infinite(child_creative_age11), NA, child_creative_age11
    )
  )

df_complete_7_11$child_creative_age11 <- factor(
  df_complete_7_11$child_creative_age11,
  levels = c(0, 1, 2),
  labels = c("Hardly ever", "Sometimes", "Most days"))

table(df_complete_7_11$child_creative_age11, useNA = "ifany")

## 
## Hardly ever   Sometimes   Most days        <NA> 
##        2074        6548        3391         288

df_complete_7_11$tv_age11 <- factor(
  df_complete_7_11$tv_age11,
  levels = c("Hardly ever", "Sometimes", "most days")
)

table(df_complete_7_11$tv_age11, useNA = "ifany")

## 
## Hardly ever   Sometimes   most days        <NA> 
##         299        1430       10134         438

df_complete_7_11$physical_activity_age11 <- factor(
  df_complete_7_11$physical_activity_age11,
  levels = c("Hardly ever", "Sometimes", "most days")
)

table(df_complete_7_11$physical_activity_age11, useNA = "ifany")

## 
## Hardly ever   Sometimes   most days        <NA> 
##        1334        5049        5476         442

global_age11 <- lm(z_global_cog_age11 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f +
                   sc_age11 + tv_age11 + child_reading_age11, data = df_complete_7_11)

global_age11 %>%
  broom::tidy(conf.int = TRUE) %>%
  knitr::kable(digits = 3)

term	estimate	std.error	statistic	p.value	conf.low	conf.high
(Intercept)	-0.683	0.140	-4.869	0.000	-0.959	-0.408
sexFemale	-0.053	0.017	-3.182	0.001	-0.086	-0.020
sc_prenatalUnemployed,sick	0.251	0.873	0.288	0.774	-1.460	1.962
sc_prenatalI	0.418	0.077	5.429	0.000	0.267	0.569
sc_prenatalII	0.267	0.064	4.175	0.000	0.142	0.392
sc_prenatalIII	0.134	0.058	2.295	0.022	0.019	0.248
sc_prenatalIV	0.073	0.062	1.179	0.239	-0.049	0.195
sc_prenatalV	-0.090	0.064	-1.404	0.160	-0.216	0.036
sc_prenatalStudents	0.599	0.283	2.114	0.035	0.044	1.155
sc_prenatalDead or away	-1.144	0.875	-1.308	0.191	-2.859	0.570
sc_prenatalRetired	0.758	0.622	1.219	0.223	-0.461	1.978
sc_prenatalSingle,no husbnd	0.023	0.080	0.285	0.776	-0.134	0.179
smoking_preg5mths-no change	-0.033	0.082	-0.405	0.686	-0.194	0.128
smoking_pregNow non-smoker	0.005	0.087	0.057	0.955	-0.166	0.176
smoking_preg1 to 4 now	-0.111	0.094	-1.178	0.239	-0.295	0.074
smoking_preg5 to 9 now	-0.140	0.097	-1.440	0.150	-0.331	0.051
smoking_preg10 to 14 now	-0.152	0.111	-1.370	0.171	-0.370	0.066
smoking_preg15 to 19 now	-0.203	0.138	-1.468	0.142	-0.474	0.068
smoking_preg20 to 24 now	-0.373	0.156	-2.395	0.017	-0.679	-0.068
smoking_preg25 to 29 now	-0.178	0.264	-0.675	0.500	-0.697	0.340
smoking_preg30 or more now	-0.478	0.288	-1.663	0.096	-1.042	0.085
smoking_pregVariable-5mths	-0.124	0.089	-1.391	0.164	-0.298	0.051
birth_weight	0.000	0.000	1.759	0.079	0.000	0.000
parent_read_levelOccasionally	0.120	0.035	3.394	0.001	0.051	0.189
parent_read_levelWeekly	0.277	0.033	8.387	0.000	0.212	0.341
sc_age7NA, unclear	-0.444	0.117	-3.803	0.000	-0.672	-0.215
sc_age7No male head	-0.436	0.080	-5.458	0.000	-0.593	-0.280
sc_age7II	-0.155	0.056	-2.774	0.006	-0.264	-0.045
sc_age7III non-manual	-0.161	0.059	-2.729	0.006	-0.277	-0.046
sc_age7III manual	-0.406	0.056	-7.232	0.000	-0.516	-0.296
sc_age7IV non-manual	-0.395	0.084	-4.690	0.000	-0.561	-0.230
sc_age7IV manual	-0.490	0.060	-8.221	0.000	-0.607	-0.373
sc_age7V	-0.644	0.068	-9.527	0.000	-0.777	-0.512
breast_fNA	0.684	0.436	1.567	0.117	-0.171	1.539
breast_fDont know	-0.217	0.125	-1.732	0.083	-0.463	0.029
breast_fNo	-0.065	0.023	-2.884	0.004	-0.109	-0.021
breast_fOver one month	0.086	0.021	4.079	0.000	0.045	0.127
sc_age11NA,unclear	-0.314	0.057	-5.555	0.000	-0.425	-0.203
sc_age11II	-0.004	0.052	-0.086	0.932	-0.107	0.098
sc_age11III non manual	-0.083	0.058	-1.437	0.151	-0.197	0.030
sc_age11III manual	-0.229	0.053	-4.317	0.000	-0.333	-0.125
sc_age11IV	-0.308	0.056	-5.463	0.000	-0.419	-0.198
sc_age11V	-0.329	0.067	-4.925	0.000	-0.460	-0.198
sc_age11No male head	-0.302	0.069	-4.399	0.000	-0.437	-0.167
tv_age11Sometimes	0.173	0.058	2.958	0.003	0.058	0.287
tv_age11most days	0.282	0.054	5.227	0.000	0.176	0.388
child_reading_age11Sometimes	0.492	0.063	7.806	0.000	0.369	0.616
child_reading_age11Most days	0.782	0.062	12.642	0.000	0.661	0.904

arith_age11 <- lm(z_maths_ability_age11 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f +
                   sc_age11 + tv_age11 + child_reading_age11, data = df_complete_7_11)

tab_model(arith_age11, digits = 3)

	z maths ability age 11
Predictors	Estimates	CI	p
(Intercept)	-0.658	-0.941 – -0.376	<0.001
sex [Female]	-0.056	-0.089 – -0.022	0.001
sc prenatal [Unemployed,sick]	-0.213	-1.970 – 1.543	0.812
sc prenatal [I]	0.429	0.274 – 0.584	<0.001
sc prenatal [II]	0.316	0.187 – 0.444	<0.001
sc prenatal [III]	0.120	0.003 – 0.237	0.045
sc prenatal [IV]	0.070	-0.055 – 0.195	0.274
sc prenatal [V]	-0.092	-0.221 – 0.037	0.161
sc prenatal [Students]	0.784	0.213 – 1.354	0.007
sc prenatal [Dead or away]	-0.915	-2.675 – 0.845	0.308
sc prenatal [Retired]	0.799	-0.452 – 2.051	0.211
sc prenatal [Single,no husbnd]	0.004	-0.156 – 0.165	0.957
smoking preg [5mths-no change]	0.032	-0.133 – 0.197	0.705
smoking preg [Now non-smoker]	0.026	-0.150 – 0.201	0.775
smoking preg [1 to 4 now]	-0.044	-0.233 – 0.145	0.647
smoking preg [5 to 9 now]	-0.059	-0.255 – 0.136	0.552
smoking preg [10 to 14 now]	-0.123	-0.346 – 0.101	0.282
smoking preg [15 to 19 now]	-0.150	-0.428 – 0.128	0.291
smoking preg [20 to 24 now]	-0.172	-0.486 – 0.141	0.281
smoking preg [25 to 29 now]	-0.112	-0.643 – 0.420	0.681
smoking preg [30 or more now]	-0.510	-1.089 – 0.068	0.084
smoking_pregVariable-5mths	-0.072	-0.251 – 0.107	0.431
birth weight	0.000	0.000 – 0.001	0.036
parent read level [Occasionally]	0.092	0.021 – 0.163	0.011
parent read level [Weekly]	0.258	0.192 – 0.325	<0.001
sc age7 [NA, unclear]	-0.381	-0.616 – -0.147	0.001
sc age7 [No male head]	-0.456	-0.616 – -0.295	<0.001
sc age7 [II]	-0.188	-0.300 – -0.076	0.001
sc age7 [III non-manual]	-0.204	-0.323 – -0.085	0.001
sc age7 [III manual]	-0.446	-0.559 – -0.333	<0.001
sc age7 [IV non-manual]	-0.446	-0.616 – -0.277	<0.001
sc age7 [IV manual]	-0.521	-0.641 – -0.401	<0.001
sc age7 [V]	-0.639	-0.775 – -0.503	<0.001
breast f [NA]	0.979	0.101 – 1.856	0.029
breast f [Dont know]	-0.318	-0.570 – -0.066	0.013
breast f [No]	-0.047	-0.092 – -0.002	0.042
breast f [Over one month]	0.078	0.035 – 0.120	<0.001
sc age11 [NA,unclear]	-0.255	-0.369 – -0.141	<0.001
sc age11 [II]	0.071	-0.034 – 0.176	0.183
sc age11 [III non manual]	0.022	-0.094 – 0.139	0.706
sc age11 [III manual]	-0.143	-0.250 – -0.036	0.009
sc age11 [IV]	-0.241	-0.354 – -0.127	<0.001
sc age11 [V]	-0.245	-0.379 – -0.110	<0.001
sc age11 [No male head]	-0.266	-0.404 – -0.128	<0.001
tv age11 [Sometimes]	0.173	0.056 – 0.290	0.004
tv age11 [most days]	0.261	0.153 – 0.370	<0.001
child reading age11 [Sometimes]	0.393	0.267 – 0.520	<0.001
child reading age11 [Most days]	0.676	0.552 – 0.801	<0.001
Observations	10966
R² / R² adjusted	0.186 / 0.182

copying_age11 <- lm(z_copying_age11 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f +
                   sc_age11 + tv_age11 + child_reading_age11, data = df_complete_7_11)

tab_model(copying_age11, digits = 3)

	z copying age 11
Predictors	Estimates	CI	p
(Intercept)	0.028	-0.270 – 0.327	0.853
sex [Female]	-0.049	-0.085 – -0.014	0.007
sc prenatal [Unemployed,sick]	-0.115	-1.971 – 1.741	0.903
sc prenatal [I]	0.043	-0.121 – 0.207	0.608
sc prenatal [II]	0.005	-0.131 – 0.141	0.946
sc prenatal [III]	-0.057	-0.181 – 0.067	0.365
sc prenatal [IV]	-0.047	-0.179 – 0.085	0.487
sc prenatal [V]	-0.190	-0.326 – -0.054	0.006
sc prenatal [Students]	0.055	-0.548 – 0.657	0.859
sc prenatal [Dead or away]	-0.972	-2.832 – 0.888	0.306
sc prenatal [Retired]	1.512	0.190 – 2.835	0.025
sc prenatal [Single,no husbnd]	-0.134	-0.304 – 0.035	0.120
smoking preg [5mths-no change]	-0.113	-0.287 – 0.062	0.205
smoking preg [Now non-smoker]	-0.122	-0.308 – 0.063	0.196
smoking preg [1 to 4 now]	-0.191	-0.391 – 0.009	0.062
smoking preg [5 to 9 now]	-0.206	-0.413 – 0.001	0.051
smoking preg [10 to 14 now]	-0.242	-0.479 – -0.006	0.044
smoking preg [15 to 19 now]	-0.234	-0.528 – 0.060	0.119
smoking preg [20 to 24 now]	-0.610	-0.942 – -0.279	<0.001
smoking preg [25 to 29 now]	-0.425	-0.987 – 0.137	0.138
smoking preg [30 or more now]	-0.643	-1.255 – -0.032	0.039
smoking_pregVariable-5mths	-0.194	-0.383 – -0.005	0.044
birth weight	0.000	-0.000 – 0.000	0.745
parent read level [Occasionally]	0.057	-0.018 – 0.132	0.136
parent read level [Weekly]	0.096	0.026 – 0.166	0.007
sc age7 [NA, unclear]	-0.084	-0.332 – 0.164	0.505
sc age7 [No male head]	-0.174	-0.344 – -0.004	0.045
sc age7 [II]	-0.017	-0.136 – 0.101	0.777
sc age7 [III non-manual]	-0.012	-0.138 – 0.114	0.849
sc age7 [III manual]	-0.087	-0.206 – 0.033	0.155
sc age7 [IV non-manual]	-0.117	-0.296 – 0.062	0.200
sc age7 [IV manual]	-0.157	-0.284 – -0.031	0.015
sc age7 [V]	-0.226	-0.370 – -0.083	0.002
breast f [NA]	0.402	-0.526 – 1.329	0.396
breast f [Dont know]	-0.098	-0.364 – 0.168	0.471
breast f [No]	-0.062	-0.110 – -0.014	0.011
breast f [Over one month]	0.034	-0.011 – 0.079	0.134
sc age11 [NA,unclear]	-0.252	-0.372 – -0.131	<0.001
sc age11 [II]	-0.096	-0.207 – 0.015	0.089
sc age11 [III non manual]	-0.172	-0.295 – -0.049	0.006
sc age11 [III manual]	-0.211	-0.323 – -0.098	<0.001
sc age11 [IV]	-0.212	-0.332 – -0.092	0.001
sc age11 [V]	-0.262	-0.404 – -0.119	<0.001
sc age11 [No male head]	-0.180	-0.326 – -0.034	0.016
tv age11 [Sometimes]	0.013	-0.111 – 0.137	0.841
tv age11 [most days]	0.088	-0.026 – 0.203	0.131
child reading age11 [Sometimes]	0.295	0.161 – 0.429	<0.001
child reading age11 [Most days]	0.330	0.198 – 0.461	<0.001
Observations	10966
R² / R² adjusted	0.037 / 0.033

non_verbal_age11 <- lm(z_n_verbal_ability_age11 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f +
                   sc_age11 + tv_age11 + child_reading_age11, data = df_complete_7_11)

tab_model(non_verbal_age11, digits = 3)

	z n verbal ability age 11
Predictors	Estimates	CI	p
(Intercept)	-0.693	-0.978 – -0.408	<0.001
sex [Female]	0.017	-0.017 – 0.051	0.326
sc prenatal [Unemployed,sick]	0.198	-1.574 – 1.970	0.827
sc prenatal [I]	0.320	0.164 – 0.476	<0.001
sc prenatal [II]	0.203	0.073 – 0.333	0.002
sc prenatal [III]	0.110	-0.009 – 0.228	0.069
sc prenatal [IV]	0.033	-0.093 – 0.160	0.604
sc prenatal [V]	-0.108	-0.238 – 0.022	0.105
sc prenatal [Students]	0.370	-0.205 – 0.946	0.207
sc prenatal [Dead or away]	-0.693	-2.469 – 1.083	0.444
sc prenatal [Retired]	-0.236	-1.499 – 1.027	0.714
sc prenatal [Single,no husbnd]	0.031	-0.131 – 0.193	0.705
smoking preg [5mths-no change]	-0.004	-0.171 – 0.162	0.960
smoking preg [Now non-smoker]	0.021	-0.156 – 0.198	0.813
smoking preg [1 to 4 now]	-0.086	-0.277 – 0.104	0.375
smoking preg [5 to 9 now]	-0.145	-0.343 – 0.052	0.150
smoking preg [10 to 14 now]	-0.077	-0.303 – 0.148	0.501
smoking preg [15 to 19 now]	-0.204	-0.485 – 0.076	0.154
smoking preg [20 to 24 now]	-0.107	-0.423 – 0.210	0.508
smoking preg [25 to 29 now]	0.176	-0.361 – 0.712	0.520
smoking preg [30 or more now]	-0.249	-0.833 – 0.335	0.403
smoking_pregVariable-5mths	-0.119	-0.299 – 0.062	0.198
birth weight	0.000	-0.000 – 0.000	0.247
parent read level [Occasionally]	0.160	0.089 – 0.232	<0.001
parent read level [Weekly]	0.292	0.225 – 0.359	<0.001
sc age7 [NA, unclear]	-0.368	-0.605 – -0.131	0.002
sc age7 [No male head]	-0.407	-0.570 – -0.245	<0.001
sc age7 [II]	-0.148	-0.261 – -0.035	0.011
sc age7 [III non-manual]	-0.162	-0.282 – -0.042	0.008
sc age7 [III manual]	-0.357	-0.471 – -0.243	<0.001
sc age7 [IV non-manual]	-0.360	-0.531 – -0.188	<0.001
sc age7 [IV manual]	-0.426	-0.547 – -0.305	<0.001
sc age7 [V]	-0.613	-0.750 – -0.476	<0.001
breast f [NA]	0.759	-0.126 – 1.644	0.093
breast f [Dont know]	-0.029	-0.283 – 0.226	0.825
breast f [No]	-0.085	-0.130 – -0.039	<0.001
breast f [Over one month]	0.080	0.037 – 0.123	<0.001
sc age11 [NA,unclear]	-0.304	-0.419 – -0.189	<0.001
sc age11 [II]	-0.026	-0.132 – 0.080	0.629
sc age11 [III non manual]	-0.113	-0.231 – 0.004	0.059
sc age11 [III manual]	-0.225	-0.332 – -0.117	<0.001
sc age11 [IV]	-0.280	-0.394 – -0.165	<0.001
sc age11 [V]	-0.343	-0.479 – -0.208	<0.001
sc age11 [No male head]	-0.291	-0.430 – -0.151	<0.001
tv age11 [Sometimes]	0.207	0.089 – 0.325	0.001
tv age11 [most days]	0.308	0.199 – 0.418	<0.001
child reading age11 [Sometimes]	0.437	0.309 – 0.565	<0.001
child reading age11 [Most days]	0.677	0.552 – 0.803	<0.001
Observations	10966
R² / R² adjusted	0.152 / 0.149

verbal_age11 <- lm(z_verbal_ability_age11 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f +
                   sc_age11 + tv_age11 + child_reading_age11, data = df_complete_7_11)

tab_model(verbal_age11, digits = 3)

	z verbal ability age 11
Predictors	Estimates	CI	p
(Intercept)	-0.416	-0.719 – -0.113	0.007
sex [Female]	-0.104	-0.140 – -0.068	<0.001
sc prenatal [Unemployed,sick]	0.719	-1.167 – 2.606	0.455
sc prenatal [I]	0.302	0.135 – 0.468	<0.001
sc prenatal [II]	0.225	0.087 – 0.363	0.001
sc prenatal [III]	0.200	0.074 – 0.325	0.002
sc prenatal [IV]	0.149	0.015 – 0.283	0.030
sc prenatal [V]	0.069	-0.069 – 0.208	0.326
sc prenatal [Students]	0.452	-0.160 – 1.065	0.148
sc prenatal [Dead or away]	-0.574	-2.465 – 1.317	0.552
sc prenatal [Retired]	-0.040	-1.384 – 1.305	0.954
sc prenatal [Single,no husbnd]	0.103	-0.069 – 0.275	0.242
smoking preg [5mths-no change]	-0.059	-0.236 – 0.118	0.515
smoking preg [Now non-smoker]	0.012	-0.177 – 0.200	0.902
smoking preg [1 to 4 now]	-0.083	-0.286 – 0.120	0.424
smoking preg [5 to 9 now]	-0.081	-0.292 – 0.129	0.448
smoking preg [10 to 14 now]	-0.064	-0.304 – 0.176	0.602
smoking preg [15 to 19 now]	-0.205	-0.503 – 0.094	0.179
smoking preg [20 to 24 now]	-0.313	-0.650 – 0.024	0.068
smoking preg [25 to 29 now]	-0.500	-1.071 – 0.072	0.086
smoking preg [30 or more now]	-0.061	-0.682 – 0.561	0.848
smoking_pregVariable-5mths	-0.084	-0.277 – 0.108	0.388
birth weight	0.000	-0.000 – 0.000	0.242
parent read level [Occasionally]	0.064	-0.012 – 0.140	0.100
parent read level [Weekly]	0.125	0.054 – 0.196	0.001
sc age7 [NA, unclear]	-0.315	-0.567 – -0.063	0.014
sc age7 [No male head]	-0.271	-0.444 – -0.098	0.002
sc age7 [II]	-0.110	-0.230 – 0.011	0.074
sc age7 [III non-manual]	-0.139	-0.267 – -0.012	0.033
sc age7 [III manual]	-0.306	-0.427 – -0.185	<0.001
sc age7 [IV non-manual]	-0.252	-0.434 – -0.070	0.007
sc age7 [IV manual]	-0.352	-0.481 – -0.223	<0.001
sc age7 [V]	-0.421	-0.567 – -0.275	<0.001
breast f [NA]	-0.014	-0.957 – 0.928	0.976
breast f [Dont know]	-0.301	-0.571 – -0.030	0.029
breast f [No]	-0.011	-0.060 – 0.038	0.654
breast f [Over one month]	0.058	0.013 – 0.104	0.012
sc age11 [NA,unclear]	-0.199	-0.321 – -0.077	0.001
sc age11 [II]	-0.042	-0.155 – 0.070	0.461
sc age11 [III non manual]	-0.080	-0.205 – 0.045	0.208
sc age11 [III manual]	-0.146	-0.260 – -0.031	0.013
sc age11 [IV]	-0.211	-0.333 – -0.089	0.001
sc age11 [V]	-0.154	-0.298 – -0.009	0.037
sc age11 [No male head]	-0.194	-0.342 – -0.045	0.011
tv age11 [Sometimes]	0.099	-0.027 – 0.226	0.122
tv age11 [most days]	0.174	0.057 – 0.290	0.004
child reading age11 [Sometimes]	0.340	0.204 – 0.477	<0.001
child reading age11 [Most days]	0.517	0.383 – 0.651	<0.001
Observations	10966
R² / R² adjusted	0.064 / 0.060

reading_age11 <- lm(z_reading_ability_age11 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f +
                   sc_age11 + tv_age11 + child_reading_age11, data = df_complete_7_11)

tab_model(reading_age11, digits = 3)

	z reading ability age 11
Predictors	Estimates	CI	p
(Intercept)	-0.888	-1.166 – -0.610	<0.001
sex [Female]	-0.012	-0.046 – 0.021	0.460
sc prenatal [Unemployed,sick]	0.376	-1.351 – 2.104	0.669
sc prenatal [I]	0.514	0.362 – 0.667	<0.001
sc prenatal [II]	0.278	0.152 – 0.405	<0.001
sc prenatal [III]	0.142	0.027 – 0.257	0.016
sc prenatal [IV]	0.076	-0.047 – 0.199	0.225
sc prenatal [V]	-0.025	-0.152 – 0.101	0.694
sc prenatal [Students]	0.642	0.081 – 1.203	0.025
sc prenatal [Dead or away]	-1.246	-2.977 – 0.485	0.158
sc prenatal [Retired]	0.878	-0.352 – 2.109	0.162
sc prenatal [Single,no husbnd]	0.083	-0.075 – 0.241	0.301
smoking preg [5mths-no change]	0.017	-0.146 – 0.179	0.841
smoking preg [Now non-smoker]	0.082	-0.090 – 0.255	0.349
smoking preg [1 to 4 now]	-0.022	-0.208 – 0.165	0.820
smoking preg [5 to 9 now]	-0.047	-0.240 – 0.145	0.631
smoking preg [10 to 14 now]	-0.079	-0.299 – 0.141	0.480
smoking preg [15 to 19 now]	0.012	-0.261 – 0.286	0.930
smoking preg [20 to 24 now]	-0.232	-0.541 – 0.076	0.140
smoking preg [25 to 29 now]	0.175	-0.348 – 0.698	0.513
smoking preg [30 or more now]	-0.375	-0.944 – 0.194	0.196
smoking_pregVariable-5mths	-0.007	-0.182 – 0.169	0.942
birth weight	0.000	-0.000 – 0.000	0.080
parent read level [Occasionally]	0.087	0.017 – 0.156	0.015
parent read level [Weekly]	0.292	0.227 – 0.358	<0.001
sc age7 [NA, unclear]	-0.556	-0.787 – -0.325	<0.001
sc age7 [No male head]	-0.370	-0.528 – -0.211	<0.001
sc age7 [II]	-0.132	-0.242 – -0.022	0.019
sc age7 [III non-manual]	-0.103	-0.220 – 0.014	0.085
sc age7 [III manual]	-0.367	-0.478 – -0.255	<0.001
sc age7 [IV non-manual]	-0.345	-0.512 – -0.178	<0.001
sc age7 [IV manual]	-0.428	-0.546 – -0.310	<0.001
sc age7 [V]	-0.577	-0.711 – -0.443	<0.001
breast f [NA]	0.503	-0.360 – 1.366	0.253
breast f [Dont know]	-0.089	-0.337 – 0.159	0.483
breast f [No]	-0.045	-0.090 – -0.001	0.047
breast f [Over one month]	0.080	0.039 – 0.122	<0.001
sc age11 [NA,unclear]	-0.200	-0.312 – -0.088	<0.001
sc age11 [II]	0.076	-0.027 – 0.180	0.148
sc age11 [III non manual]	0.023	-0.092 – 0.137	0.695
sc age11 [III manual]	-0.155	-0.260 – -0.050	0.004
sc age11 [IV]	-0.241	-0.353 – -0.130	<0.001
sc age11 [V]	-0.262	-0.395 – -0.130	<0.001
sc age11 [No male head]	-0.231	-0.367 – -0.096	0.001
tv age11 [Sometimes]	0.171	0.056 – 0.287	0.004
tv age11 [most days]	0.252	0.146 – 0.359	<0.001
child reading age11 [Sometimes]	0.426	0.301 – 0.551	<0.001
child reading age11 [Most days]	0.808	0.685 – 0.930	<0.001
Observations	10966
R² / R² adjusted	0.199 / 0.195

extract_age11_exposures <- function(model, outcome_label) {
  
  coef_df <- tidy(model, conf.int = TRUE) %>%
    filter(term %in% c(
      "tv_age11Sometimes",
      "tv_age11most days",
      "child_reading_age11Sometimes",
      "child_reading_age11Most days"
    )) %>%
    mutate(
      exposure = case_when(
        term == "tv_age11Sometimes" ~ "TV: Sometimes",
        term == "tv_age11most days" ~ "TV: Most days",
        term == "child_reading_age11Sometimes" ~ "Reading: Sometimes",
        term == "child_reading_age11Most days" ~ "Reading: Most days"
      ),
      domain = case_when(
        grepl("^tv_", term) ~ "Passive SB (TV)",
        grepl("^child_reading_", term) ~ "Cognitively engaged SB (Reading)"
      ),
      outcome = outcome_label
    ) %>%
    dplyr::select(outcome, domain, exposure, estimate, conf.low, conf.high)
  
  # referências (Hardly ever)
  ref_df <- tibble(
    outcome   = rep(outcome_label, 2),
    domain    = c("Passive SB (TV)", "Cognitively engaged SB (Reading)"),
    exposure  = c("TV: Hardly ever", "Reading: Hardly ever"),
    estimate  = 0,
    conf.low  = 0,
    conf.high = 0
  )
  
  bind_rows(ref_df, coef_df)
}

plot_df_age11 <- bind_rows(
  extract_age11_exposures(global_age11,     "Global cognition"),
  extract_age11_exposures(arith_age11,      "Arithmetic"),
  extract_age11_exposures(copying_age11,    "Copying"),
  extract_age11_exposures(non_verbal_age11, "Non-verbal ability"),
  extract_age11_exposures(verbal_age11,     "Verbal ability"),
  extract_age11_exposures(reading_age11,    "Reading ability")
)

plot_df_age11$exposure <- factor(
  plot_df_age11$exposure,
  levels = c(
    "TV: Hardly ever", "TV: Sometimes", "TV: Most days",
    "Reading: Hardly ever", "Reading: Sometimes", "Reading: Most days"
  )
)

plot_df_age11$dose <- NA

plot_df_age11$dose[plot_df_age11$exposure=="Reading: Hardly ever"] <- "Hardly ever"
plot_df_age11$dose[plot_df_age11$exposure=="TV: Hardly ever"] <- "Hardly ever"
plot_df_age11$dose[plot_df_age11$exposure=="Reading: Sometimes"] <- "Sometimes"
plot_df_age11$dose[plot_df_age11$exposure=="TV: Sometimes"] <- "Sometimes"
plot_df_age11$dose[plot_df_age11$exposure=="TV: Most days"] <- "Most days"
plot_df_age11$dose[plot_df_age11$exposure=="Reading: Most days"] <- "Most days"

library(ggplot2)
plot_df_age11$dose <- factor(plot_df_age11$dose, levels = c("Most days", 
                                                            "Sometimes",
                                                            "Hardly ever"), 
                             ordered = T)

ggplot(plot_df_age11,
       aes(x = estimate, y = outcome, colour = domain, fill=domain, shape = dose)) +
  
  scale_color_jama() +
  scale_fill_jama() +

  geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
  
  geom_pointrange(
    aes(xmin = conf.low, xmax = conf.high, shape = dose),
    size = 1, position = position_dodge2(width = 0.9)
  ) +
  
  facet_wrap(~ domain, ncol = 1, scales = "free_y") +
  
  labs(
    x = "Standardized difference in cognitive score (z)",
    y = NULL,
    shape = "Exposure category"
  ) +
  
  theme_minimal(base_size = 13) +
  theme(
    panel.grid.minor = element_blank(),
    legend.position = "bottom",
    strip.text = element_text(face = "bold")
  )

library(broom)
library(dplyr)

extract_age7_11_exposures <- function(model, outcome_label) {
  
  coef_df <- tidy(model, conf.int = TRUE) %>%
    filter(term %in% c(
      # Age 7
      "parent_read_levelOccasionally",
      "parent_read_levelWeekly",
      # Age 11
      "tv_age11Sometimes",
      "tv_age11most days",
      "child_reading_age11Sometimes",
      "child_reading_age11Most days"
    )) %>%
    mutate(
      exposure = case_when(
        term == "parent_read_levelOccasionally" ~ "Reading: Sometimes",
        term == "parent_read_levelWeekly"       ~ "Reading: Most days",
        term == "child_reading_age11Sometimes"  ~ "Reading: Sometimes",
        term == "child_reading_age11Most days"  ~ "Reading: Most days",
        term == "tv_age11Sometimes"              ~ "TV: Sometimes",
        term == "tv_age11most days"              ~ "TV: Most days"
      ),
      domain = case_when(
        grepl("^tv_", term) ~ "Passive SB (TV)",
        TRUE                ~ "Cognitively engaged SB (Reading)"
      ),
      age = case_when(
        grepl("^parent_", term)         ~ "Age 7",
        grepl("^tv_", term)             ~ "Age 11",
        grepl("^child_reading_", term)  ~ "Age 11"
      ),
      outcome = outcome_label
    ) %>%
    dplyr::select(outcome, domain, age, exposure, estimate, conf.low, conf.high)
  
  # referências (Hardly ever)
  ref_df <- tibble(
    outcome   = outcome_label,
    domain    = c(
      "Cognitively engaged SB (Reading)",
      "Cognitively engaged SB (Reading)",
      "Passive SB (TV)"
    ),
    age       = c("Age 7", "Age 11", "Age 11"),
    exposure  = c(
      "Reading: Hardly ever",
      "Reading: Hardly ever",
      "TV: Hardly ever"
    ),
    estimate  = 0,
    conf.low  = 0,
    conf.high = 0
  )
  
  bind_rows(ref_df, coef_df)
}


plot_df <- bind_rows(
  extract_age7_11_exposures(global_age7,  "Global cognition"),
  extract_age7_11_exposures(arith_age7,   "Arithmetic"),
  extract_age7_11_exposures(copying_age7, "Copying"),
  extract_age7_11_exposures(draw_age7,    "Draw-a-man"),
  extract_age7_11_exposures(reading_age7, "Reading"),
  
  extract_age7_11_exposures(global_age11,     "Global cognition"),
  extract_age7_11_exposures(arith_age11,      "Arithmetic"),
  extract_age7_11_exposures(copying_age11,    "Copying"),
  extract_age7_11_exposures(non_verbal_age11, "Non-verbal ability"),
  extract_age7_11_exposures(verbal_age11,     "Verbal ability"),
  extract_age7_11_exposures(reading_age11,    "Reading ability")
)

plot_df$dose <- factor(
  gsub(".*: ", "", plot_df$exposure),
  levels = c("Hardly ever", "Sometimes", "Most days"),
  ordered = TRUE
)


library(ggplot2)
library(ggsci)

ggplot(plot_df,
       aes(x = estimate, y = outcome,
           colour = age, shape = dose)) +
  
  geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
  
  geom_pointrange(
    aes(xmin = conf.low, xmax = conf.high),
    position = position_dodge(width = 0.6),
    size = 0.9
  ) +
  
  facet_wrap(~ domain, ncol = 1, scales = "free_y") +
  
  scale_color_jama() +
  
  labs(
    x = "Standardized difference in cognitive score (z)",
    y = NULL,
    colour = "Age",
    shape  = "Exposure frequency"
  ) +
  
  theme_minimal(base_size = 13) +
  theme(
    panel.grid.minor = element_blank(),
    legend.position = "bottom",
    strip.text = element_text(face = "bold")
  )

COGNITIVE FUNCTION AT AGE 16

df_complete_7_16 <- df_complete_7_11 %>%
  filter(reading_test_age16!=-1 &
            maths_test_age16!=-1)

# Age 16
#Reading comprehension test
df_complete_7_16$mean_reading_ability_age16 <- mean(df_complete_7_16$reading_test_age16)
df_complete_7_16$sd_reading_ability_age16 <- sd(df_complete_7_16$reading_test_age16)
df_complete_7_16$z_reading_ability_age16 <- (df_complete_7_16$reading_test_age16-df_complete_7_16$mean_reading_ability_age16)/df_complete_7_16$sd_reading_ability_age16
df_complete_7_16$z_reading_ability_age16 = scale(df_complete_7_16$z_reading_ability_age16)[,1]

hist(df_complete_7_16$z_reading_ability_age16)

#Mathematics test score
df_complete_7_16$mean_maths_ability_age16 <- mean(df_complete_7_16$maths_test_age16)
df_complete_7_16$sd_maths_ability_age16 <- sd(df_complete_7_16$maths_test_age16)
df_complete_7_16$z_maths_ability_age16 <- (df_complete_7_16$maths_test_age16-df_complete_7_16$mean_maths_ability_age16)/df_complete_7_16$sd_maths_ability_age16
df_complete_7_16$z_maths_ability_age16 = scale(df_complete_7_16$z_maths_ability_age16)[,1]

hist(df_complete_7_16$z_maths_ability_age16)

# averaging all z scores
df_complete_7_16$global_cog_age16 <- (df_complete_7_16$z_reading_ability_age16 + df_complete_7_16$z_maths_ability_age16) / 2

# creating a z score for the averaged variable
df_complete_7_16$z_global_cog_age16 = scale(df_complete_7_16$global_cog_age16)[,1]

hist(df_complete_7_16$z_global_cog_age16)

# Reference levels
library(dplyr)
DescTools::Desc(df_complete_7_16$tv_age16)

## ────────────────────────────────────────────────────────────────────────────── 
## df_complete_7_16$tv_age16 (factor)
## 
##   length      n    NAs unique levels  dupes
##    9'082  9'082      0      5      5      y
##          100.0%   0.0%                     
## 
##          level   freq   perc  cumfreq  cumperc
## 1        Often  5'736  63.2%    5'736    63.2%
## 2    Sometimes  2'546  28.0%    8'282    91.2%
## 3  Hardly ever    426   4.7%    8'708    95.9%
## 4           NA    324   3.6%    9'032    99.4%
## 5    No chance     50   0.6%    9'082   100.0%

DescTools::Desc(df_complete_7_16$child_read_age16)

## ────────────────────────────────────────────────────────────────────────────── 
## df_complete_7_16$child_read_age16 (factor)
## 
##   length      n    NAs unique levels  dupes
##    9'082  9'082      0      5      5      y
##          100.0%   0.0%                     
## 
##          level   freq   perc  cumfreq  cumperc
## 1    Sometimes  3'838  42.3%    3'838    42.3%
## 2        Often  2'242  24.7%    6'080    66.9%
## 3  Hardly ever  2'136  23.5%    8'216    90.5%
## 4           NA    592   6.5%    8'808    97.0%
## 5    No chance    274   3.0%    9'082   100.0%

DescTools::Desc(df_complete_7_16$play_indoor_age16)

## ────────────────────────────────────────────────────────────────────────────── 
## df_complete_7_16$play_indoor_age16 (factor)
## 
##   length      n    NAs unique levels  dupes
##    9'082  9'082      0      5      5      y
##          100.0%   0.0%                     
## 
##          level   freq   perc  cumfreq  cumperc
## 1  Hardly ever  2'716  29.9%    2'716    29.9%
## 2    Sometimes  2'655  29.2%    5'371    59.1%
## 3        Often  2'096  23.1%    7'467    82.2%
## 4    No chance    851   9.4%    8'318    91.6%
## 5           NA    764   8.4%    9'082   100.0%

DescTools::Desc(df_complete_7_16$play_outdoor_age16)

## ────────────────────────────────────────────────────────────────────────────── 
## df_complete_7_16$play_outdoor_age16 (factor)
## 
##   length      n    NAs unique levels  dupes
##    9'082  9'082      0      5      5      y
##          100.0%   0.0%                     
## 
##          level   freq   perc  cumfreq  cumperc
## 1        Often  3'176  35.0%    3'176    35.0%
## 2    Sometimes  3'009  33.1%    6'185    68.1%
## 3  Hardly ever  2'048  22.6%    8'233    90.7%
## 4           NA    585   6.4%    8'818    97.1%
## 5    No chance    264   2.9%    9'082   100.0%

df_complete_7_16$pa_age16 <- dplyr::case_when(
  
  is.na(df_complete_7_16$play_indoor_age16) &
  is.na(df_complete_7_16$play_outdoor_age16) ~ NA_character_,
  
  df_complete_7_16$play_indoor_age16 == "Most days" |
  df_complete_7_16$play_outdoor_age16 == "Most days" ~ "Most days",
  
  df_complete_7_16$play_indoor_age16 == "Sometimes" |
  df_complete_7_16$play_outdoor_age16 == "Sometimes" ~ "Sometimes",
  
  df_complete_7_16$play_indoor_age16 == "Hardly ever" &
  df_complete_7_16$play_outdoor_age16 == "Hardly ever" ~ "Hardly ever"
)

df_complete_7_16$pa_age16 <- factor(
  df_complete_7_16$pa_age16,
  levels = c("Hardly ever", "Sometimes", "Most days")
)

df_complete_7_16$tv_age16 <- factor(
  df_complete_7_16$tv_age16,
  levels = c("No chance", "Hardly ever", "Sometimes", "Often")
)

df_complete_7_16$tv_age16 <- fct_collapse(df_complete_7_16$tv_age16,
                                                "Hardly ever" = c("No chance", "Hardly ever"))


df_complete_7_16$child_read_age16 <- factor(
  df_complete_7_16$child_read_age16,
  levels = c("Hardly ever", "Sometimes", "Most days")
)


global_age16 <- lm(z_global_cog_age16 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f +
                   sc_age11 + tv_age11 + child_reading_age11 +
                   sc_age16 + smoking_age16 + alcohol_age16 +
                   child_read_age16 + tv_age16 + school_attendance_age16, data = df_complete_7_16)

tab_model(global_age16, digits = 3)

	z global cog age 16
Predictors	Estimates	CI	p
(Intercept)	-1.079	-1.485 – -0.674	<0.001
sex [Female]	-0.216	-0.261 – -0.172	<0.001
sc prenatal [I]	0.400	0.190 – 0.609	<0.001
sc prenatal [II]	0.358	0.190 – 0.527	<0.001
sc prenatal [III]	0.185	0.032 – 0.339	0.018
sc prenatal [IV]	0.126	-0.037 – 0.289	0.129
sc prenatal [V]	0.013	-0.156 – 0.183	0.877
sc prenatal [Students]	0.539	-0.089 – 1.167	0.093
sc prenatal [Retired]	-0.056	-1.674 – 1.562	0.946
sc prenatal [Single,no husbnd]	0.233	0.022 – 0.444	0.031
smoking preg [5mths-no change]	0.120	-0.090 – 0.331	0.262
smoking preg [Now non-smoker]	0.133	-0.092 – 0.358	0.246
smoking preg [1 to 4 now]	0.023	-0.218 – 0.265	0.850
smoking preg [5 to 9 now]	0.075	-0.178 – 0.328	0.562
smoking preg [10 to 14 now]	0.094	-0.189 – 0.378	0.513
smoking preg [15 to 19 now]	0.038	-0.342 – 0.418	0.845
smoking preg [20 to 24 now]	0.265	-0.154 – 0.684	0.216
smoking preg [25 to 29 now]	0.380	-0.301 – 1.060	0.274
smoking preg [30 or more now]	-0.003	-0.686 – 0.679	0.993
smoking_pregVariable-5mths	0.079	-0.148 – 0.306	0.496
birth weight	0.000	0.000 – 0.001	0.038
parent read level [Occasionally]	0.040	-0.053 – 0.133	0.402
parent read level [Weekly]	0.246	0.159 – 0.333	<0.001
sc age7 [NA, unclear]	-0.424	-0.731 – -0.118	0.007
sc age7 [No male head]	-0.434	-0.657 – -0.210	<0.001
sc age7 [II]	-0.148	-0.307 – 0.010	0.067
sc age7 [III non-manual]	-0.162	-0.332 – 0.007	0.060
sc age7 [III manual]	-0.330	-0.489 – -0.170	<0.001
sc age7 [IV non-manual]	-0.353	-0.585 – -0.121	0.003
sc age7 [IV manual]	-0.427	-0.595 – -0.259	<0.001
sc age7 [V]	-0.453	-0.643 – -0.263	<0.001
breast f [NA]	0.640	-0.484 – 1.764	0.264
breast f [Dont know]	-0.120	-0.434 – 0.194	0.455
breast f [No]	-0.048	-0.107 – 0.010	0.103
breast f [Over one month]	0.097	0.042 – 0.152	0.001
sc age11 [NA,unclear]	-0.231	-0.391 – -0.070	0.005
sc age11 [II]	0.021	-0.127 – 0.169	0.781
sc age11 [III non manual]	-0.009	-0.172 – 0.153	0.912
sc age11 [III manual]	-0.093	-0.243 – 0.058	0.226
sc age11 [IV]	-0.195	-0.354 – -0.036	0.016
sc age11 [V]	-0.178	-0.366 – 0.009	0.062
sc age11 [No male head]	-0.083	-0.275 – 0.110	0.400
tv age11 [Sometimes]	0.153	-0.006 – 0.312	0.059
tv age11 [most days]	0.248	0.101 – 0.396	0.001
child reading age11 [Sometimes]	0.347	0.188 – 0.507	<0.001
child reading age11 [Most days]	0.542	0.385 – 0.699	<0.001
sc age16 [NA,NMH,forces]	-0.267	-0.424 – -0.109	0.001
sc age16 [II]	-0.111	-0.270 – 0.049	0.174
sc age16 [III non-manual]	-0.108	-0.282 – 0.067	0.226
sc age16 [III manual]	-0.198	-0.358 – -0.039	0.015
sc age16 [IV non-manual]	-0.174	-0.446 – 0.098	0.210
sc age16 [IV manual]	-0.228	-0.402 – -0.054	0.010
sc age16 [V]	-0.321	-0.519 – -0.122	0.002
sc age16 [Unclear]	-0.214	-0.503 – 0.074	0.145
smoking age16 [NA]	-0.677	-0.928 – -0.426	<0.001
smoking age16 [Less 1 a week]	-0.127	-0.259 – 0.005	0.058
smoking age16 [1 to 9 a week]	-0.262	-0.354 – -0.169	<0.001
smoking age16 [10 to 19 a week]	-0.284	-0.387 – -0.181	<0.001
smoking age16 [20 to 29 a week]	-0.355	-0.457 – -0.253	<0.001
smoking age16 [30 to 39 a week]	-0.355	-0.460 – -0.250	<0.001
smoking age16 [40 to 49 a week]	-0.386	-0.496 – -0.276	<0.001
smoking age16 [50 to 59 a week]	-0.312	-0.431 – -0.193	<0.001
smoking age16 [60 or more a wk]	-0.355	-0.451 – -0.260	<0.001
alcohol age16 [NA]	-0.157	-0.517 – 0.204	0.394
alcohol age16 [Less than 1 week]	0.429	0.325 – 0.533	<0.001
alcohol age16 [2 to 4 weeks]	0.359	0.249 – 0.469	<0.001
alcohol age16 [5 to 8 weeks]	0.302	0.172 – 0.433	<0.001
alcohol age16 [9 to 12 weeks]	0.231	0.074 – 0.387	0.004
alcohol age16 [Over 12 weeks]	0.207	0.074 – 0.340	0.002
alcohol age16 [Do not remember]	0.213	0.097 – 0.328	<0.001
child read age16 [Sometimes]	0.208	0.161 – 0.254	<0.001
tv age16 [Sometimes]	-0.004	-0.108 – 0.100	0.939
tv age16 [Often]	-0.024	-0.123 – 0.075	0.632
school attendance age16	0.002	0.001 – 0.002	<0.001
Observations	5309
R² / R² adjusted	0.271 / 0.260

arith_age16 <- lm(z_maths_ability_age16 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f +
                   sc_age11 + tv_age11 + child_reading_age11 +
                   sc_age16 + smoking_age16 + alcohol_age16 +
                   child_read_age16 + tv_age16 + school_attendance_age16, data = df_complete_7_16)

tab_model(arith_age16, digits = 3)

	z maths ability age 16
Predictors	Estimates	CI	p
(Intercept)	-0.454	-0.881 – -0.027	0.037
sex [Female]	-0.271	-0.318 – -0.224	<0.001
sc prenatal [I]	0.308	0.087 – 0.529	0.006
sc prenatal [II]	0.288	0.111 – 0.466	0.001
sc prenatal [III]	0.070	-0.091 – 0.231	0.393
sc prenatal [IV]	0.034	-0.138 – 0.205	0.701
sc prenatal [V]	-0.074	-0.252 – 0.105	0.419
sc prenatal [Students]	0.380	-0.281 – 1.041	0.260
sc prenatal [Retired]	-0.548	-2.251 – 1.155	0.528
sc prenatal [Single,no husbnd]	0.151	-0.071 – 0.373	0.184
smoking preg [5mths-no change]	0.178	-0.044 – 0.400	0.115
smoking preg [Now non-smoker]	0.134	-0.102 – 0.371	0.266
smoking preg [1 to 4 now]	0.086	-0.168 – 0.341	0.505
smoking preg [5 to 9 now]	0.138	-0.129 – 0.404	0.311
smoking preg [10 to 14 now]	0.093	-0.205 – 0.391	0.541
smoking preg [15 to 19 now]	0.095	-0.305 – 0.495	0.642
smoking preg [20 to 24 now]	0.250	-0.192 – 0.692	0.267
smoking preg [25 to 29 now]	0.400	-0.316 – 1.117	0.273
smoking preg [30 or more now]	0.015	-0.703 – 0.734	0.966
smoking_pregVariable-5mths	0.103	-0.136 – 0.343	0.398
birth weight	0.000	-0.000 – 0.001	0.084
parent read level [Occasionally]	-0.002	-0.100 – 0.096	0.964
parent read level [Weekly]	0.180	0.088 – 0.272	<0.001
sc age7 [NA, unclear]	-0.348	-0.671 – -0.025	0.035
sc age7 [No male head]	-0.342	-0.577 – -0.106	0.004
sc age7 [II]	-0.153	-0.320 – 0.014	0.072
sc age7 [III non-manual]	-0.192	-0.371 – -0.014	0.034
sc age7 [III manual]	-0.310	-0.478 – -0.142	<0.001
sc age7 [IV non-manual]	-0.377	-0.622 – -0.133	0.002
sc age7 [IV manual]	-0.391	-0.568 – -0.214	<0.001
sc age7 [V]	-0.389	-0.589 – -0.189	<0.001
breast f [NA]	0.267	-0.917 – 1.451	0.659
breast f [Dont know]	-0.289	-0.619 – 0.042	0.087
breast f [No]	-0.024	-0.086 – 0.037	0.437
breast f [Over one month]	0.104	0.046 – 0.161	<0.001
sc age11 [NA,unclear]	-0.222	-0.391 – -0.053	0.010
sc age11 [II]	0.043	-0.113 – 0.198	0.591
sc age11 [III non manual]	0.039	-0.133 – 0.210	0.659
sc age11 [III manual]	-0.075	-0.234 – 0.083	0.353
sc age11 [IV]	-0.154	-0.321 – 0.013	0.071
sc age11 [V]	-0.121	-0.318 – 0.076	0.229
sc age11 [No male head]	-0.124	-0.327 – 0.078	0.228
tv age11 [Sometimes]	0.105	-0.063 – 0.272	0.219
tv age11 [most days]	0.167	0.011 – 0.322	0.036
child reading age11 [Sometimes]	0.240	0.072 – 0.407	0.005
child reading age11 [Most days]	0.377	0.212 – 0.543	<0.001
sc age16 [NA,NMH,forces]	-0.355	-0.521 – -0.190	<0.001
sc age16 [II]	-0.198	-0.366 – -0.030	0.021
sc age16 [III non-manual]	-0.195	-0.378 – -0.011	0.038
sc age16 [III manual]	-0.278	-0.446 – -0.110	0.001
sc age16 [IV non-manual]	-0.233	-0.520 – 0.053	0.111
sc age16 [IV manual]	-0.290	-0.473 – -0.107	0.002
sc age16 [V]	-0.370	-0.579 – -0.161	0.001
sc age16 [Unclear]	-0.282	-0.586 – 0.022	0.069
smoking age16 [NA]	-0.544	-0.808 – -0.280	<0.001
smoking age16 [Less 1 a week]	-0.137	-0.276 – 0.002	0.053
smoking age16 [1 to 9 a week]	-0.245	-0.343 – -0.148	<0.001
smoking age16 [10 to 19 a week]	-0.258	-0.367 – -0.150	<0.001
smoking age16 [20 to 29 a week]	-0.348	-0.455 – -0.240	<0.001
smoking age16 [30 to 39 a week]	-0.335	-0.446 – -0.225	<0.001
smoking age16 [40 to 49 a week]	-0.452	-0.568 – -0.336	<0.001
smoking age16 [50 to 59 a week]	-0.354	-0.479 – -0.228	<0.001
smoking age16 [60 or more a wk]	-0.422	-0.522 – -0.321	<0.001
alcohol age16 [NA]	-0.231	-0.610 – 0.149	0.233
alcohol age16 [Less than 1 week]	0.211	0.101 – 0.320	<0.001
alcohol age16 [2 to 4 weeks]	0.151	0.035 – 0.267	0.011
alcohol age16 [5 to 8 weeks]	0.154	0.017 – 0.292	0.028
alcohol age16 [9 to 12 weeks]	0.090	-0.075 – 0.255	0.285
alcohol age16 [Over 12 weeks]	0.066	-0.074 – 0.206	0.355
alcohol age16 [Do not remember]	0.079	-0.042 – 0.201	0.201
child read age16 [Sometimes]	0.139	0.090 – 0.188	<0.001
tv age16 [Sometimes]	-0.022	-0.131 – 0.087	0.688
tv age16 [Often]	-0.050	-0.154 – 0.054	0.345
school attendance age16	0.002	0.001 – 0.003	<0.001
Observations	5309
R² / R² adjusted	0.221 / 0.210

reading_age16 <- lm(z_reading_ability_age16 ~ sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f +
                   sc_age11 + tv_age11 + child_reading_age11 +
                   sc_age16 + smoking_age16 + alcohol_age16 +
                   child_read_age16 + tv_age16 + school_attendance_age16, data = df_complete_7_16)

tab_model(reading_age16, digits = 3)

	z reading ability age 16
Predictors	Estimates	CI	p
(Intercept)	-1.505	-1.919 – -1.092	<0.001
sex [Female]	-0.122	-0.168 – -0.076	<0.001
sc prenatal [I]	0.417	0.203 – 0.632	<0.001
sc prenatal [II]	0.362	0.191 – 0.534	<0.001
sc prenatal [III]	0.266	0.110 – 0.423	0.001
sc prenatal [IV]	0.195	0.029 – 0.362	0.021
sc prenatal [V]	0.098	-0.075 – 0.271	0.267
sc prenatal [Students]	0.598	-0.043 – 1.239	0.067
sc prenatal [Retired]	0.447	-1.205 – 2.098	0.596
sc prenatal [Single,no husbnd]	0.272	0.056 – 0.487	0.013
smoking preg [5mths-no change]	0.041	-0.174 – 0.256	0.710
smoking preg [Now non-smoker]	0.108	-0.122 – 0.337	0.358
smoking preg [1 to 4 now]	-0.044	-0.291 – 0.202	0.725
smoking preg [5 to 9 now]	-0.002	-0.260 – 0.256	0.989
smoking preg [10 to 14 now]	0.078	-0.211 – 0.368	0.595
smoking preg [15 to 19 now]	-0.026	-0.414 – 0.362	0.895
smoking preg [20 to 24 now]	0.231	-0.197 – 0.659	0.290
smoking preg [25 to 29 now]	0.289	-0.405 – 0.984	0.414
smoking preg [30 or more now]	-0.021	-0.718 – 0.675	0.952
smoking_pregVariable-5mths	0.040	-0.192 – 0.272	0.736
birth weight	0.000	-0.000 – 0.001	0.056
parent read level [Occasionally]	0.074	-0.020 – 0.169	0.124
parent read level [Weekly]	0.267	0.177 – 0.356	<0.001
sc age7 [NA, unclear]	-0.423	-0.736 – -0.110	0.008
sc age7 [No male head]	-0.446	-0.674 – -0.218	<0.001
sc age7 [II]	-0.116	-0.278 – 0.046	0.160
sc age7 [III non-manual]	-0.102	-0.275 – 0.071	0.246
sc age7 [III manual]	-0.289	-0.452 – -0.126	0.001
sc age7 [IV non-manual]	-0.264	-0.501 – -0.027	0.029
sc age7 [IV manual]	-0.385	-0.556 – -0.213	<0.001
sc age7 [V]	-0.433	-0.627 – -0.239	<0.001
breast f [NA]	0.895	-0.252 – 2.043	0.126
breast f [Dont know]	0.071	-0.250 – 0.392	0.664
breast f [No]	-0.064	-0.123 – -0.004	0.036
breast f [Over one month]	0.073	0.017 – 0.129	0.011
sc age11 [NA,unclear]	-0.197	-0.361 – -0.033	0.018
sc age11 [II]	-0.005	-0.155 – 0.146	0.952
sc age11 [III non manual]	-0.055	-0.221 – 0.111	0.514
sc age11 [III manual]	-0.094	-0.247 – 0.060	0.233
sc age11 [IV]	-0.200	-0.362 – -0.038	0.015
sc age11 [V]	-0.203	-0.394 – -0.011	0.038
sc age11 [No male head]	-0.025	-0.221 – 0.171	0.800
tv age11 [Sometimes]	0.173	0.010 – 0.335	0.037
tv age11 [most days]	0.284	0.133 – 0.435	<0.001
child reading age11 [Sometimes]	0.391	0.228 – 0.553	<0.001
child reading age11 [Most days]	0.606	0.446 – 0.767	<0.001
sc age16 [NA,NMH,forces]	-0.129	-0.289 – 0.032	0.117
sc age16 [II]	-0.003	-0.166 – 0.160	0.970
sc age16 [III non-manual]	-0.001	-0.179 – 0.177	0.991
sc age16 [III manual]	-0.082	-0.245 – 0.081	0.323
sc age16 [IV non-manual]	-0.082	-0.360 – 0.195	0.561
sc age16 [IV manual]	-0.123	-0.301 – 0.054	0.173
sc age16 [V]	-0.212	-0.415 – -0.009	0.040
sc age16 [Unclear]	-0.107	-0.402 – 0.187	0.475
smoking age16 [NA]	-0.685	-0.941 – -0.429	<0.001
smoking age16 [Less 1 a week]	-0.094	-0.229 – 0.040	0.170
smoking age16 [1 to 9 a week]	-0.230	-0.324 – -0.135	<0.001
smoking age16 [10 to 19 a week]	-0.257	-0.362 – -0.152	<0.001
smoking age16 [20 to 29 a week]	-0.297	-0.401 – -0.193	<0.001
smoking age16 [30 to 39 a week]	-0.309	-0.416 – -0.202	<0.001
smoking age16 [40 to 49 a week]	-0.249	-0.361 – -0.136	<0.001
smoking age16 [50 to 59 a week]	-0.213	-0.334 – -0.091	0.001
smoking age16 [60 or more a wk]	-0.223	-0.321 – -0.126	<0.001
alcohol age16 [NA]	-0.054	-0.422 – 0.314	0.774
alcohol age16 [Less than 1 week]	0.568	0.462 – 0.674	<0.001
alcohol age16 [2 to 4 weeks]	0.500	0.388 – 0.613	<0.001
alcohol age16 [5 to 8 weeks]	0.395	0.261 – 0.528	<0.001
alcohol age16 [9 to 12 weeks]	0.329	0.169 – 0.489	<0.001
alcohol age16 [Over 12 weeks]	0.310	0.174 – 0.445	<0.001
alcohol age16 [Do not remember]	0.307	0.189 – 0.425	<0.001
child read age16 [Sometimes]	0.239	0.191 – 0.286	<0.001
tv age16 [Sometimes]	0.015	-0.091 – 0.121	0.780
tv age16 [Often]	0.006	-0.095 – 0.107	0.903
school attendance age16	0.001	0.000 – 0.002	0.002
Observations	5309
R² / R² adjusted	0.237 / 0.227

extract_age7_11_16_exposures <- function(model, outcome_label) {

  coef_df <- tidy(model, conf.int = TRUE) %>%
    filter(term %in% c(
      # Age 7
      "parent_read_levelOccasionally",
      "parent_read_levelWeekly",
      
      # Age 11
      "child_reading_age11Sometimes",
      "child_reading_age11Most days",
      "tv_age11Sometimes",
      "tv_age11most days",
      
      # Age 16
      "child_read_age16Sometimes",
      "tv_age16Sometimes"
    )) %>%
    mutate(
      exposure = case_when(
        term == "parent_read_levelOccasionally" ~ "Reading: Sometimes",
        term == "parent_read_levelWeekly"       ~ "Reading: Most days",
        
        term == "child_reading_age11Sometimes"  ~ "Reading: Sometimes",
        term == "child_reading_age11Most days"  ~ "Reading: Most days",
        
        term == "child_read_age16Sometimes"     ~ "Reading: Sometimes",
        
        term == "tv_age11Sometimes"             ~ "TV: Sometimes",
        term == "tv_age11most days"             ~ "TV: Most days",
        term == "tv_age16Sometimes"             ~ "TV: Sometimes"
      ),
      domain = case_when(
        grepl("^tv_", term) ~ "Passive SB (TV)",
        TRUE                ~ "Cognitively engaged SB (Reading)"
      ),
      age = case_when(
        grepl("^parent_", term)         ~ "Age 7",
        grepl("age11", term)            ~ "Age 11",
        grepl("age16", term)            ~ "Age 16"
      ),
      outcome = outcome_label
    ) %>%
    dplyr::select(outcome, domain, age, exposure, estimate, conf.low, conf.high)

  # referências
  ref_df <- tibble(
    outcome  = outcome_label,
    domain   = c(
      "Cognitively engaged SB (Reading)",
      "Cognitively engaged SB (Reading)",
      "Cognitively engaged SB (Reading)",
      "Passive SB (TV)",
      "Passive SB (TV)"
    ),
    age      = c("Age 7", "Age 11", "Age 16", "Age 11", "Age 16"),
    exposure = c(
      "Reading: Hardly ever",
      "Reading: Hardly ever",
      "Reading: Hardly ever",
      "TV: Hardly ever",
      "TV: Hardly ever"
    ),
    estimate = 0,
    conf.low = 0,
    conf.high = 0
  )

  bind_rows(ref_df, coef_df)
}

plot_df <- bind_rows(
  extract_age7_11_16_exposures(global_age16,  "Global cognition"),
  extract_age7_11_16_exposures(arith_age16,   "Maths ability"),
  extract_age7_11_16_exposures(reading_age16, "Reading ability")
)


plot_df$dose <- factor(
  gsub(".*: ", "", plot_df$exposure),
  levels = c("Most days", "Sometimes", "Hardly ever"),
  ordered = TRUE
)

plot_df$age <- factor(plot_df$age, 
  levels = c("Age 7", "Age 11", "Age 16"),
  ordered = TRUE
)

plot_age16 <- ggplot(plot_df,
       aes(x = estimate,
           y = outcome,
           colour = age,
           shape = dose)) +

  geom_vline(xintercept = 0,
             linetype = "dashed",
             color = "grey50") +

  geom_pointrange(
    aes(xmin = conf.low, xmax = conf.high),
    position = position_dodge(width = 0.6),
    size = 0.9
  ) +

  facet_wrap(~ domain, ncol = 2, scales = "free_y") +

  scale_color_jama() +

  labs(
    x = "Standardized difference in cognitive score (z)",
    y = NULL,
    colour = "Age",
    shape  = "Exposure frequency"
  ) +

  theme_minimal(base_size = 13) +
  theme(
    panel.grid.minor = element_blank(),
    legend.position = "bottom",
    strip.text = element_text(face = "bold")
  )

plot_age16

COGNITIVE DECLINE

library(dplyr)
library(tidyr)
library(lme4)
library(emmeans)
library(ggplot2)
library(ggsci)
library(broom)
library(broom.mixed)

df_long <- df_complete_7_16 %>%
  dplyr::select(
    ncdsid,
    z_global_cog_age7,
    z_global_cog_age11,
    z_global_cog_age16,
    parent_read_level,
    sex,
    sc_prenatal,
    smoking_preg,
    birth_weight,
    sc_age7,
    breast_f,
    sc_age11,
    sc_age16,
    smoking_age16,
    alcohol_age16,
    school_attendance_age16
  ) %>%
  pivot_longer(
    cols = starts_with("z_global_cog"),
    names_to = "wave",
    values_to = "z_global_cog"
  ) %>%
  mutate(
    age = case_when(
      wave == "z_global_cog_age7"  ~ 7,
      wave == "z_global_cog_age11" ~ 11,
      wave == "z_global_cog_age16" ~ 16
    ),
    id_num = as.numeric(as.factor(ncdsid)),
    parent_read_level = factor(
      parent_read_level,
      levels = c("Hardly ever", "Occasionally", "Weekly")
    )
  ) %>%
  filter(!is.na(z_global_cog), !is.na(parent_read_level))

mixed <- lmer(
  z_global_cog ~ age * parent_read_level +
    sex + sc_prenatal + smoking_preg +
    birth_weight + sc_age7 + breast_f +
    sc_age11 + sc_age16 +
    smoking_age16 + alcohol_age16 +
    school_attendance_age16 +
    (1 | id_num),
  data = df_long,
  REML = TRUE
)

tab_model(mixed, digits = 3, digits.p = 3)

	z global cog
Predictors	Estimates	CI	p
(Intercept)	0.087	-0.175 – 0.348	0.516
age	-0.028	-0.035 – -0.021	<0.001
parent read level [Occasionally]	0.059	-0.057 – 0.174	0.317
parent read level [Weekly]	-0.137	-0.243 – -0.030	0.012
sex [Female]	-0.014	-0.047 – 0.019	0.413
sc prenatal [I]	0.341	0.192 – 0.489	<0.001
sc prenatal [II]	0.269	0.145 – 0.393	<0.001
sc prenatal [III]	0.130	0.017 – 0.242	0.024
sc prenatal [IV]	0.067	-0.053 – 0.188	0.272
sc prenatal [V]	-0.043	-0.168 – 0.082	0.498
sc prenatal [Students]	0.445	-0.070 – 0.960	0.091
sc prenatal [Retired]	0.415	-0.653 – 1.483	0.446
sc prenatal [Single,no husbnd]	-0.025	-0.180 – 0.129	0.747
smoking preg [5mths-no change]	-0.012	-0.175 – 0.151	0.885
smoking preg [Now non-smoker]	0.024	-0.148 – 0.197	0.782
smoking preg [1 to 4 now]	-0.056	-0.241 – 0.129	0.553
smoking preg [5 to 9 now]	-0.062	-0.254 – 0.130	0.528
smoking preg [10 to 14 now]	-0.034	-0.252 – 0.185	0.763
smoking preg [15 to 19 now]	-0.195	-0.465 – 0.075	0.156
smoking preg [20 to 24 now]	-0.371	-0.692 – -0.050	0.024
smoking preg [25 to 29 now]	0.368	-0.156 – 0.893	0.169
smoking preg [30 or more now]	-0.423	-0.976 – 0.130	0.134
smoking_pregVariable-5mths	-0.072	-0.247 – 0.103	0.419
birth weight	0.000	0.000 – 0.001	<0.001
sc age7 [NA, unclear]	-0.291	-0.512 – -0.069	0.010
sc age7 [No male head]	-0.332	-0.494 – -0.170	<0.001
sc age7 [II]	-0.107	-0.218 – 0.003	0.057
sc age7 [III non-manual]	-0.090	-0.208 – 0.029	0.137
sc age7 [III manual]	-0.303	-0.414 – -0.191	<0.001
sc age7 [IV non-manual]	-0.256	-0.422 – -0.090	0.003
sc age7 [IV manual]	-0.349	-0.468 – -0.230	<0.001
sc age7 [V]	-0.471	-0.607 – -0.336	<0.001
breast f [NA]	0.464	-0.400 – 1.327	0.293
breast f [Dont know]	-0.166	-0.403 – 0.071	0.169
breast f [No]	-0.069	-0.112 – -0.025	0.002
breast f [Over one month]	0.097	0.056 – 0.138	<0.001
sc age11 [NA,unclear]	-0.273	-0.390 – -0.156	<0.001
sc age11 [II]	-0.055	-0.162 – 0.053	0.321
sc age11 [III non manual]	-0.080	-0.199 – 0.038	0.184
sc age11 [III manual]	-0.207	-0.317 – -0.096	<0.001
sc age11 [IV]	-0.271	-0.388 – -0.154	<0.001
sc age11 [V]	-0.289	-0.428 – -0.150	<0.001
sc age11 [No male head]	-0.247	-0.389 – -0.105	0.001
sc age16 [NA,NMH,forces]	-0.185	-0.296 – -0.074	0.001
sc age16 [II]	-0.087	-0.200 – 0.025	0.129
sc age16 [III non-manual]	-0.164	-0.288 – -0.040	0.009
sc age16 [III manual]	-0.143	-0.256 – -0.030	0.013
sc age16 [IV non-manual]	-0.140	-0.333 – 0.054	0.157
sc age16 [IV manual]	-0.174	-0.297 – -0.050	0.006
sc age16 [V]	-0.285	-0.430 – -0.141	<0.001
sc age16 [Unclear]	0.015	-0.188 – 0.218	0.884
smoking age16 [NA]	-0.405	-0.582 – -0.228	<0.001
smoking age16 [Less 1 a week]	-0.098	-0.191 – -0.004	0.041
smoking age16 [1 to 9 a week]	-0.208	-0.279 – -0.138	<0.001
smoking age16 [10 to 19 a week]	-0.270	-0.348 – -0.191	<0.001
smoking age16 [20 to 29 a week]	-0.289	-0.366 – -0.212	<0.001
smoking age16 [30 to 39 a week]	-0.240	-0.320 – -0.160	<0.001
smoking age16 [40 to 49 a week]	-0.302	-0.387 – -0.217	<0.001
smoking age16 [50 to 59 a week]	-0.190	-0.284 – -0.095	<0.001
smoking age16 [60 or more a wk]	-0.202	-0.275 – -0.129	<0.001
alcohol age16 [NA]	0.126	-0.083 – 0.336	0.236
alcohol age16 [Less than 1 week]	0.604	0.528 – 0.680	<0.001
alcohol age16 [2 to 4 weeks]	0.535	0.454 – 0.615	<0.001
alcohol age16 [5 to 8 weeks]	0.476	0.381 – 0.572	<0.001
alcohol age16 [9 to 12 weeks]	0.418	0.306 – 0.531	<0.001
alcohol age16 [Over 12 weeks]	0.370	0.274 – 0.466	<0.001
alcohol age16 [Do not remember]	0.322	0.238 – 0.406	<0.001
school attendance age16	0.001	0.001 – 0.002	<0.001
age × parent read level [Occasionally]	0.001	-0.007 – 0.009	0.873
age × parent read level [Weekly]	0.031	0.024 – 0.038	<0.001
Random Effects
σ²	0.31
τ₀₀ _{id_num}	0.47
ICC	0.60
N _{id_num}	8473
Observations	25419
Marginal R² / Conditional R²	0.189 / 0.680

emm <- emmeans(
  mixed,
  ~ parent_read_level | age,
  at = list(age = c(7, 11, 16)),
  nuisance = c(
    "sex", "sc_prenatal", "smoking_preg", "sc_age7", "sc_age11", "sc_age16",
    "smoking_age16", "alcohol_age16", "school_attendance_age16", "breast_f"
  )
)

emm_df <- as.data.frame(emm)

plot_trajectory_7_16 <- ggplot(
  emm_df,
  aes(
    x = age,
    y = emmean,
    colour = parent_read_level,
    fill   = parent_read_level
  )
) +
  geom_line(size = 1.1) +
  geom_hline(yintercept = 0, linetype = "dashed", colour = "grey50") +
  scale_x_continuous(breaks = c(7, 11, 16)) +
  labs(
    x = "Age (years)",
    y = "Predicted global cognitive score (z)",
    colour = "Parental reading frequency",
    fill   = "Parental reading frequency"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    legend.position = "bottom",
    panel.grid.minor = element_blank()
  ) +
  scale_color_bmj() +
  scale_fill_bmj()

int_term <- tidy(
  mixed,
  effects = "fixed",
  conf.int = TRUE
) %>%
  dplyr::filter(grepl("^age:parent_read_levelWeekly", term)) %>%
  mutate(
    p.value = 2 * (1 - pnorm(abs(statistic)))
  )

int_term

## # A tibble: 1 × 8
##   effect term           estimate std.error statistic conf.low conf.high  p.value
##   <chr>  <chr>             <dbl>     <dbl>     <dbl>    <dbl>     <dbl>    <dbl>
## 1 fixed  age:parent_re…   0.0309   0.00376      8.22   0.0236    0.0383 2.22e-16

int_label <- with(int_term, {
  paste0(
    "Interaction (age × weekly parental reading (age 7):\n",
    "β = ", sprintf("%.4f", estimate),
    " (95% CI ", sprintf("%.4f", conf.low),
    ", ", sprintf("%.4f", conf.high), "), ",
    "p = ", ifelse(p.value < 0.001, "<0.001", sprintf("%.3f", p.value))
  )
})

int_label

## [1] "Interaction (age × weekly parental reading (age 7):\nβ = 0.0309 (95% CI 0.0236, 0.0383), p = <0.001"

plot_trajectory_7_16 <- plot_trajectory_7_16 +
  annotate(
    "text",
    x = 7.2,
    y = min(emm_df$emmean) + 0.05,
    label = int_label,
    hjust = 0,
    size = 4.2
  )

plot_trajectory_7_16

GROUP-BASED TRAJECTORY MODELLING

library(lcmm)

df_read_long <- df_complete_7_16 %>%
  mutate(
    reading_age7  = parent_read_level,
    reading_age11 = child_reading_age11,
    reading_age16 = child_read_age16
  ) %>%
  mutate(
    read7 = as.numeric(
      factor(
        reading_age7,
        levels = c("Hardly ever", "Occasionally", "Weekly")
      )
    ) - 1,
    read11 = as.numeric(
      factor(
        reading_age11,
        levels = c("Hardly ever", "Sometimes", "Most days")
      )
    ) - 1,
    read16 = as.numeric(
      factor(
        reading_age16,
        levels = c("Hardly ever", "Sometimes", "Most days")
      )
    ) - 1
  ) %>%
  dplyr::select(
    ncdsid,
    read7,
    read11,
    read16
  ) %>%
  pivot_longer(
    cols = starts_with("read"),
    names_to = "wave",
    values_to = "reading_num"
  ) %>%
  mutate(
    age = case_when(
      wave == "read7"  ~ 7,
      wave == "read11" ~ 11,
      wave == "read16" ~ 16
    ),
    id_num = as.numeric(as.factor(ncdsid))
  ) %>%
  filter(!is.na(reading_num))

m1 <- hlme(
  fixed   = reading_num ~ age,
  random  = ~ 1,
  subject = "id_num",
  ng      = 1,
  data    = df_read_long
)

summary(m1)

## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = reading_num ~ age, random = ~1, subject = "id_num", 
##     ng = 1, data = df_read_long)
##  
## Statistical Model: 
##      Dataset: df_read_long 
##      Number of subjects: 9080 
##      Number of observations: 23579 
##      Number of latent classes: 1 
##      Number of parameters: 4  
##  
## Iteration process: 
##      Convergence criteria satisfied 
##      Number of iterations:  34 
##      Convergence criteria: parameters= 5e-13 
##                          : likelihood= 4.9e-07 
##                          : second derivatives= 7.4e-06 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -21067.55  
##      AIC: 42143.11  
##      BIC: 42171.56  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the longitudinal model:
## 
##               coef      Se    Wald p-value
## intercept  2.55734 0.01250 204.535 0.00000
## age       -0.10674 0.00110 -96.814 0.00000
## 
## 
## Variance-covariance matrix of the random-effects:
##           intercept
## intercept         0
## 
##                              coef      Se
## Residual standard error:  0.59129 0.00272

m2 <- hlme(
  fixed   = reading_num ~ age,
  mixture = ~ age,
  random  = ~ 1,
  subject = "id_num",
  ng      = 2,
  data    = df_read_long,
  nwg     = TRUE,
  B       = m1
)

summary(m2)

## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = reading_num ~ age, mixture = ~age, random = ~1, 
##     subject = "id_num", ng = 2, nwg = TRUE, data = df_read_long)
##  
## Statistical Model: 
##      Dataset: df_read_long 
##      Number of subjects: 9080 
##      Number of observations: 23579 
##      Number of latent classes: 2 
##      Number of parameters: 8  
##  
## Iteration process: 
##      Convergence criteria satisfied 
##      Number of iterations:  206 
##      Convergence criteria: parameters= 4.8e-10 
##                          : likelihood= 1.6e-06 
##                          : second derivatives= 9e-07 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -20627.31  
##      AIC: 41270.62  
##      BIC: 41327.53  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the class-membership model:
## (the class of reference is the last class) 
## 
##                      coef       Se     Wald p-value
## intercept class1 -2.21667  0.05951  -37.249 0.00000
## 
## Fixed effects in the longitudinal model:
## 
##                      coef       Se     Wald p-value
## intercept class1  0.55147  0.05724    9.635 0.00000
## intercept class2  2.77983  0.01391  199.814 0.00000
## age class1        0.02617  0.00484    5.404 0.00000
## age class2       -0.12158  0.00117 -104.255 0.00000
## 
## 
## Variance-covariance matrix of the random-effects:
##           intercept
## intercept         0
## 
##                                     coef       Se
## Proportional coefficient class1  0.95921 11.73219
## Residual standard error:         0.53844  0.00279

postprob(m2)

##  
## Posterior classification: 
##   class1  class2
## N 660.00 8420.00
## %   7.27   92.73
##  
## Posterior classification table: 
##      --> mean of posterior probabilities in each class 
##         prob1  prob2
## class1 0.8537 0.1463
## class2 0.0390 0.9610
##  
## Posterior probabilities above a threshold (%): 
##          class1 class2
## prob>0.7  98.79  95.63
## prob>0.8  83.64  92.66
## prob>0.9  33.03  91.69
##

age_grid <- data.frame(
  age = seq(7, 16, by = 0.1)
)

pred <- predictY(
  m2,
  newdata = age_grid,
  var.time = "age",
  draws = TRUE,
  ndraws = 1000
)

str(pred)

## List of 2
##  $ pred : num [1:91, 1:6] 0.735 0.737 0.74 0.743 0.745 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:6] "Ypred_class1" "Ypred_class2" "lower.Ypred_class1" "lower.Ypred_class2" ...
##  $ times:'data.frame':   91 obs. of  1 variable:
##   ..$ age: num [1:91] 7 7.1 7.2 7.3 7.4 7.5 7.6 7.7 7.8 7.9 ...
##  - attr(*, "class")= chr "predictY"

traj_df <- bind_rows(

  data.frame(
    age   = pred$times$age,
    mean  = pred$pred[, "Ypred_class1"],
    lower = pred$pred[, "lower.Ypred_class1"],
    upper = pred$pred[, "upper.Ypred_class1"],
    class = "Class 1"
  ),

  data.frame(
    age   = pred$times$age,
    mean  = pred$pred[, "Ypred_class2"],
    lower = pred$pred[, "lower.Ypred_class2"],
    upper = pred$pred[, "upper.Ypred_class2"],
    class = "Class 2"
  )
)

traj_df$class <- factor(
  traj_df$class,
  levels = c("Class 1", "Class 2"),
  labels = c(
    "High initial / rapidly decreasing",
    "Moderate initial / slowly increasing"
  )
)

fig_traj_reading <- ggplot(traj_df, aes(x = age, y = mean, colour = class, fill = class)) +

  geom_ribbon(
    aes(ymin = lower, ymax = upper),
    alpha = 0.20,
    colour = NA
  ) +

  geom_line(size = 1.2) +

  scale_x_continuous(
    breaks = c(7, 11, 16)
  ) +

  scale_y_continuous(
    breaks = c(0, 1, 2),
    labels = c("Low", "Moderate", "High")
  ) +

  labs(
    x = "Age (years)",
    y = "Reading frequency",
    colour = NULL,
    fill   = NULL
  ) +

  theme_classic(base_size = 14) +
  theme(
    legend.position = "top",
    axis.title = element_text(face = "bold")
  ) +
  coord_cartesian(ylim = c(0,2)) +
  scale_color_bmj() +
  scale_fill_bmj()

fig_traj_reading

GBTM + COGNITIVE DECLINE

library(lme4)
library(ggeffects)
library(ggplot2)
library(ggsci)

pp <- postprob(m2)

##  
## Posterior classification: 
##   class1  class2
## N 660.00 8420.00
## %   7.27   92.73
##  
## Posterior classification table: 
##      --> mean of posterior probabilities in each class 
##         prob1  prob2
## class1 0.8537 0.1463
## class2 0.0390 0.9610
##  
## Posterior probabilities above a threshold (%): 
##          class1 class2
## prob>0.7  98.79  95.63
## prob>0.8  83.64  92.66
## prob>0.9  33.03  91.69
##

str(pp)

## List of 3
##  $ : num [1:2, 1:2] 660 7.27 8420 92.73
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:2] "N" "%"
##   .. ..$ : chr [1:2] "class1" "class2"
##  $ : num [1:2, 1:2] 0.854 0.039 0.146 0.961
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:2] "class1" "class2"
##   .. ..$ : chr [1:2] "prob1" "prob2"
##  $ : num [1:3, 1:2] 98.8 83.6 33 95.6 92.7 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:3] "prob>0.7" "prob>0.8" "prob>0.9"
##   .. ..$ : chr [1:2] "class1" "class2"

class_df <- m2$pprob %>%
  mutate(
    read_class = factor(
      class,
      labels = c(
        "High initial / rapidly decreasing",
        "Moderate initial / slowly increasing"
      )
    )
  ) %>%
  dplyr::select(id_num, read_class)

table(class_df$read_class)

## 
##    High initial / rapidly decreasing Moderate initial / slowly increasing 
##                                  660                                 8420

df_cog <- df_long %>%
  left_join(class_df, by = "id_num")

mixed_cog <- lmer(
  z_global_cog ~ age * read_class + 
                sex + sc_prenatal  + smoking_preg + 
                 birth_weight +  parent_read_level  + sc_age7  + breast_f +
                   sc_age11 +sc_age16 + smoking_age16 + alcohol_age16 +
                   school_attendance_age16 + (1 | ncdsid), data = df_cog
)
tab_model(mixed_cog, digits = 3)

	z global cog
Predictors	Estimates	CI	p
(Intercept)	0.092	-0.169 – 0.352	0.491
age	-0.028	-0.035 – -0.021	<0.001
read class [Moderate initial / slowly increasing]	0.226	-0.041 – 0.492	0.097
sex [Female]	-0.014	-0.047 – 0.019	0.406
sc prenatal [I]	0.343	0.194 – 0.491	<0.001
sc prenatal [II]	0.270	0.146 – 0.394	<0.001
sc prenatal [III]	0.132	0.020 – 0.245	0.021
sc prenatal [IV]	0.069	-0.052 – 0.189	0.263
sc prenatal [V]	-0.037	-0.162 – 0.087	0.557
sc prenatal [Students]	0.447	-0.068 – 0.962	0.089
sc prenatal [Retired]	0.413	-0.654 – 1.480	0.448
sc prenatal [Single,no husbnd]	-0.024	-0.179 – 0.130	0.757
smoking preg [5mths-no change]	-0.021	-0.184 – 0.142	0.800
smoking preg [Now non-smoker]	0.015	-0.158 – 0.187	0.867
smoking preg [1 to 4 now]	-0.067	-0.253 – 0.118	0.475
smoking preg [5 to 9 now]	-0.071	-0.263 – 0.121	0.468
smoking preg [10 to 14 now]	-0.045	-0.264 – 0.173	0.685
smoking preg [15 to 19 now]	-0.189	-0.458 – 0.081	0.170
smoking preg [20 to 24 now]	-0.384	-0.705 – -0.063	0.019
smoking preg [25 to 29 now]	0.355	-0.169 – 0.879	0.185
smoking preg [30 or more now]	-0.434	-0.987 – 0.118	0.124
smoking_pregVariable-5mths	-0.082	-0.256 – 0.093	0.358
birth weight	0.000	0.000 – 0.001	<0.001
parent read level [Occasionally]	-0.419	-0.678 – -0.160	0.002
parent read level [Weekly]	-0.280	-0.542 – -0.018	0.037
sc age7 [NA, unclear]	-0.294	-0.516 – -0.073	0.009
sc age7 [No male head]	-0.335	-0.497 – -0.173	<0.001
sc age7 [II]	-0.106	-0.216 – 0.005	0.060
sc age7 [III non-manual]	-0.090	-0.208 – 0.028	0.135
sc age7 [III manual]	-0.302	-0.413 – -0.190	<0.001
sc age7 [IV non-manual]	-0.260	-0.426 – -0.094	0.002
sc age7 [IV manual]	-0.348	-0.467 – -0.230	<0.001
sc age7 [V]	-0.469	-0.604 – -0.333	<0.001
breast f [NA]	0.626	-0.241 – 1.493	0.157
breast f [Dont know]	-0.155	-0.391 – 0.082	0.200
breast f [No]	-0.068	-0.112 – -0.025	0.002
breast f [Over one month]	0.097	0.056 – 0.138	<0.001
sc age11 [NA,unclear]	-0.269	-0.385 – -0.152	<0.001
sc age11 [II]	-0.056	-0.163 – 0.052	0.309
sc age11 [III non manual]	-0.081	-0.200 – 0.037	0.180
sc age11 [III manual]	-0.208	-0.319 – -0.098	<0.001
sc age11 [IV]	-0.271	-0.387 – -0.154	<0.001
sc age11 [V]	-0.293	-0.432 – -0.154	<0.001
sc age11 [No male head]	-0.247	-0.389 – -0.105	0.001
sc age16 [NA,NMH,forces]	-0.185	-0.295 – -0.075	0.001
sc age16 [II]	-0.087	-0.200 – 0.025	0.128
sc age16 [III non-manual]	-0.163	-0.287 – -0.040	0.010
sc age16 [III manual]	-0.142	-0.255 – -0.029	0.014
sc age16 [IV non-manual]	-0.132	-0.325 – 0.062	0.182
sc age16 [IV manual]	-0.174	-0.298 – -0.051	0.006
sc age16 [V]	-0.286	-0.430 – -0.141	<0.001
sc age16 [Unclear]	0.019	-0.184 – 0.222	0.856
smoking age16 [NA]	-0.398	-0.574 – -0.221	<0.001
smoking age16 [Less 1 a week]	-0.097	-0.191 – -0.004	0.041
smoking age16 [1 to 9 a week]	-0.208	-0.278 – -0.137	<0.001
smoking age16 [10 to 19 a week]	-0.271	-0.350 – -0.193	<0.001
smoking age16 [20 to 29 a week]	-0.290	-0.367 – -0.213	<0.001
smoking age16 [30 to 39 a week]	-0.237	-0.317 – -0.157	<0.001
smoking age16 [40 to 49 a week]	-0.304	-0.389 – -0.219	<0.001
smoking age16 [50 to 59 a week]	-0.187	-0.282 – -0.092	<0.001
smoking age16 [60 or more a wk]	-0.203	-0.276 – -0.130	<0.001
alcohol age16 [NA]	0.144	-0.065 – 0.353	0.178
alcohol age16 [Less than 1 week]	0.605	0.529 – 0.681	<0.001
alcohol age16 [2 to 4 weeks]	0.537	0.456 – 0.617	<0.001
alcohol age16 [5 to 8 weeks]	0.476	0.381 – 0.572	<0.001
alcohol age16 [9 to 12 weeks]	0.418	0.305 – 0.530	<0.001
alcohol age16 [Over 12 weeks]	0.371	0.274 – 0.467	<0.001
alcohol age16 [Do not remember]	0.325	0.241 – 0.409	<0.001
school attendance age16	0.001	0.001 – 0.002	<0.001
age × read class [Moderate initial / slowly increasing]	0.024	0.017 – 0.031	<0.001
Random Effects
σ²	0.31
τ₀₀ _ncdsid	0.47
ICC	0.60
N _ncdsid	8473
Observations	25419
Marginal R² / Conditional R²	0.188 / 0.676

pred_cog <- ggpredict(
  mixed_cog,
  terms = c("age [7:16]", "read_class")
)

fig2 <- ggplot(
  pred_cog,
  aes(
    x = x,
    y = predicted,
    colour = group,
    fill = group
  )
)  +
  geom_line(size = 1.2) +
  geom_hline(yintercept = 0, linetype = "dashed", colour = "grey50") +
  scale_color_jama() +
  scale_fill_jama() +
  scale_x_continuous(breaks = c(7, 11, 16)) +
  labs(
    x = "Age (years)",
    y = "Global cognitive function (z-score)",
    colour = "Reading trajectory",
    fill   = "Reading trajectory"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    legend.position = "top",
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank()
  )

int_term <- tidy(
  mixed_cog,
  effects = "fixed",
  conf.int = TRUE
) %>%
  dplyr::filter(grepl("^age:read_class", term)) %>%
  mutate(
    p.value = 2 * (1 - pnorm(abs(statistic)))
  )

int_term

## # A tibble: 1 × 8
##   effect term           estimate std.error statistic conf.low conf.high  p.value
##   <chr>  <chr>             <dbl>     <dbl>     <dbl>    <dbl>     <dbl>    <dbl>
## 1 fixed  age:read_clas…   0.0237   0.00364      6.51   0.0166    0.0308 7.40e-11

int_label <- with(int_term, {
  paste0(
    "Interaction (age × reading trajectory):\n",
    "β = ", sprintf("%.4f", estimate),
    " (95% CI ", sprintf("%.4f", conf.low),
    ", ", sprintf("%.4f", conf.high), "), ",
    "p = ", ifelse(p.value < 0.001, "<0.001", sprintf("%.3f", p.value))
  )
})

int_label

## [1] "Interaction (age × reading trajectory):\nβ = 0.0237 (95% CI 0.0166, 0.0308), p = <0.001"

fig2 <- fig2 +
  annotate(
    "text",
    x = 7.5,
    y = min(pred_cog$predicted) + 0.35,
    label = int_label,
    hjust = 0,
    size = 4.2
  )

fig2

SB ACROSS ADULTHOOD

table(df_complete_7_16$tv_age11)

## 
## Hardly ever   Sometimes   most days 
##         208        1043        7536

table(df_complete_7_16$tv_age16)

## 
## Hardly ever   Sometimes       Often 
##         476        2546        5736

table(df_complete_7_16$tv_age23)

## 
## 5 TIMES A WEEK +  3-4TIMES A WEEK  1-2TIMES A WEEK 2-3TIMES LST4WKS 
##             4976             1288              728              236 
## ONCE IN LST 4WKS NOT DONE LST4WKS 
##               59              106

table(df_complete_7_16$read_book_age23)

## 
## 5 TIMES A WEEK +  3-4TIMES A WEEK  1-2TIMES A WEEK 2-3TIMES LST4WKS 
##             1564              816              941              579 
## ONCE IN LST 4WKS NOT DONE LST4WKS 
##              706             2780

table(df_complete_7_16$read_book_age46)

## 
##                           Refused                        Don't know 
##                                 0                                 0 
##                     Other missing                    Not applicable 
##                                 1                                 0 
##                         Every day Most days (4 or 5 days each week) 
##                              1407                               757 
##              At least once a week             At least once a month 
##                               772                               684 
##              Less often than that                             Never 
##                              1412                               701

table(df_complete_7_16$read_news_age46)

## 
##                           Refused                        Don't know 
##                                 0                                 0 
##                     Other missing                    Not applicable 
##                                 1                                 0 
##                         Every day Most days (4 or 5 days each week) 
##                              2713                              1030 
##              At least once a week             At least once a month 
##                              1431                               280 
##              Less often than that                             Never 
##                               146                               133

table(df_complete_7_16$computer_home_age46)

## 
##        Refused     Don't know  Other missing Not applicable             No 
##              0              0              1              0            763 
##            yes 
##           4970

table(df_complete_7_16$computer_home_freq_age46)

## 
##               Refused            Don't know         Other missing 
##                     0                     2                     0 
##        Not applicable                 Daily      2-4 times a week 
##                   764                  1731                  1243 
##           Once a week Less than once a week                 Never 
##                   603                   750                   641

table(df_complete_7_16$computer_work_age46)

## 
##        Refused     Don't know  Other missing Not applicable             No 
##              0              0              1            690           1417 
##            yes 
##           3626

table(df_complete_7_16$computer_work_freq_age46)

## 
##               Refused            Don't know         Other missing 
##                     0                     2                     0 
##        Not applicable                 Daily      2-4 times a week 
##                  2108                  3165                   246 
##           Once a week Less than once a week 
##                   123                    90

table(df_complete_7_16$computer_home_age50)

## 
##                 Refusal              Don't Know Schedule not applicable 
##                       0                       0                       0 
##     Item not applicable                     Yes                      No 
##                       8                    5288                     517

table(df_complete_7_16$computer_home_freq_age46)

## 
##               Refused            Don't know         Other missing 
##                     0                     2                     0 
##        Not applicable                 Daily      2-4 times a week 
##                   764                  1731                  1243 
##           Once a week Less than once a week                 Never 
##                   603                   750                   641

table(df_complete_7_16$computer_work_age50)

## 
##                 Refusal              Don't Know Schedule not applicable 
##                       0                       0                       0 
##     Item not applicable                     Yes                      No 
##                     818                    3751                    1244

table(df_complete_7_16$computer_work_freq_age46)

## 
##               Refused            Don't know         Other missing 
##                     0                     2                     0 
##        Not applicable                 Daily      2-4 times a week 
##                  2108                  3165                   246 
##           Once a week Less than once a week 
##                   123                    90

table(df_complete_7_16$internet_use_age55)

## 
##                     Refused                  Don't know 
##                           3                           2 
##              Not applicable Everyday or almost everyday 
##                          68                        3962 
##        Several times a week        Once or twice a week 
##                         478                         373 
##       At least once a month         Less often or never 
##                         164                         453

table(df_complete_7_16$read_book_age62)

## 
##                  Not answered                   Multi-coded 
##                            10                             2 
##                Not applicable Every day or almost every day 
##                           745                          1469 
##          Several times a week          Once or twice a week 
##                           525                           392 
##         At least once a month              Every few months 
##                           315                           536 
##          At least once a year           Less often or never 
##                           314                           722

table(df_complete_7_16$tv_age62)

## 
##            Not answered             Multi-coded          Not applicable 
##                      12                       2                     745 
##                    None  Less than 1 hour a day      1 to 2 hours a day 
##                      76                     289                     781 
##      2 to 3 hours a day      3 to 4 hours a day More than 4 hours a day 
##                    1071                     967                    1087

df_complete_7_16$read3_age11 <- factor(
  df_complete_7_16$child_read_books_age11,
  levels = c("Hardly ever", "Sometimes", "most days"),
  labels = c("Low", "Moderate", "High")
)

df_complete_7_16$read3_age16 <- factor(
  df_complete_7_16$child_read_age16,
  levels = c("Hardly ever", "Sometimes", "Most days"),
  labels = c("Low", "Moderate", "High")
)

df_complete_7_16$tv3_age11 <- factor(
  df_complete_7_16$tv_age11,
  levels = c("Hardly ever", "Sometimes", "most days"),
  labels = c("Low", "Moderate", "High")
)

df_complete_7_16$tv3_age16 <- factor(
  df_complete_7_16$tv_age16,
  levels = c("Hardly ever", "Sometimes", "Often"),
  labels = c("Low", "Moderate", "High")
)

df_complete_7_16$tv3_age23 <- case_when(
  df_complete_7_16$tv_age23 %in% c("NOT DONE LST4WKS", "ONCE IN LST 4WKS") ~ "Low",
  df_complete_7_16$tv_age23 %in% c("2-3TIMES LST4WKS", "1-2TIMES A WEEK") ~ "Moderate",
  df_complete_7_16$tv_age23 %in% c("3-4TIMES A WEEK", "5 TIMES A WEEK +") ~ "High",
  TRUE ~ NA_character_
) |> factor(levels = c("Low", "Moderate", "High"))

df_complete_7_16$tv3_age62 <- case_when(
  df_complete_7_16$tv_age62 %in% c("None", "Less than 1 hour a day") ~ "Low",
  df_complete_7_16$tv_age62 %in% c("1 to 2 hours a day", "2 to 3 hours a day") ~ "Moderate",
  df_complete_7_16$tv_age62 %in% c("3 to 4 hours a day", "More than 4 hours a day") ~ "High",
  TRUE ~ NA_character_
) |> factor(levels = c("Low", "Moderate", "High"))


df_complete_7_16$read3_age23 <- case_when(
  df_complete_7_16$read_book_age23 %in% c("NOT DONE LST4WKS") ~ "Low",
  df_complete_7_16$read_book_age23 %in% c("ONCE IN LST 4WKS", 
                                          "1-2TIMES A WEEK") ~ "Moderate",
  df_complete_7_16$read_book_age23 %in% c("2-3TIMES LST4WKS",
                                          "3-4TIMES A WEEK", 
                                          "5 TIMES A WEEK +") ~ "High",
  TRUE ~ NA_character_
) |> factor(levels = c("Low", "Moderate", "High"))


df_complete_7_16$read3_age46 <- case_when(
  df_complete_7_16$read_book_age46 %in% c("Never", "Less often than that") &
    df_complete_7_16$read_news_age46 %in% c("Never", "Less often than that") ~ "Low",
  
  df_complete_7_16$read_book_age46 %in% c("At least once a week", "At least once a month") |
    df_complete_7_16$read_news_age46 %in% c("At least once a week", "At least once a month") ~ "Moderate",
  
  df_complete_7_16$read_book_age46 %in% c("Every day", "Most days (4 or 5 days each week)") |
    df_complete_7_16$read_news_age46 %in% c("Every day", "Most days (4 or 5 days each week)") ~ "High",
  
  TRUE ~ NA_character_
) |> factor(levels = c("Low", "Moderate", "High"))


df_complete_7_16$read3_age62 <- case_when(
  df_complete_7_16$read_book_age62 %in% c("Less often or never", "Every few months" ,"At least once a year") ~ "Low",
  df_complete_7_16$read_book_age62 %in% c("At least once a month", "At least once a year") ~ "Moderate",
  df_complete_7_16$read_book_age62 %in% c("Several times a week", "Once or twice a week", "Every day or almost every day") ~ "High",
  TRUE ~ NA_character_
) |> factor(levels = c("Low", "Moderate", "High"))


df_complete_7_16$computer3_home_age46 <- case_when(
  df_complete_7_16$computer_home_age46 == "No" ~ "Low",
  
  df_complete_7_16$computer_home_freq_age46 %in%
    c("Never", "Less than once a week") ~ "Low",
  
  df_complete_7_16$computer_home_freq_age46 %in%
    c("Once a week", "2-4 times a week") ~ "Moderate",
  
  df_complete_7_16$computer_home_freq_age46 %in%
    c("Daily") ~ "High",
  
  TRUE ~ NA_character_
) |> factor(levels = c("Low", "Moderate", "High"))


df_complete_7_16$computer3_work_age46 <- case_when(
  df_complete_7_16$computer_work_age46 == "No" ~ "Low",
  
  df_complete_7_16$computer_work_freq_age46 %in%
    c("Less than once a week") ~ "Low",
  
  df_complete_7_16$computer_work_freq_age46 %in%
    c("Once a week") ~ "Moderate",
  
  df_complete_7_16$computer_work_freq_age46 %in%
    c("2-4 times a week", "Daily") ~ "High",
  
  TRUE ~ NA_character_
) |> factor(levels = c("Low", "Moderate", "High"))


df_complete_7_16$computer3_age46 <- pmax(
  as.numeric(df_complete_7_16$computer3_home_age46),
  as.numeric(df_complete_7_16$computer3_work_age46),
  na.rm = TRUE
)

df_complete_7_16$computer3_age46 <- factor(
  df_complete_7_16$computer3_age46,
  levels = 1:3,
  labels = c("Low", "Moderate", "High")
)


df_complete_7_16$computer3_age50 <- case_when(
  df_complete_7_16$computer_home_age50 == "No" ~ "Low",
  df_complete_7_16$computer_home_age50 == "Yes" ~ "High",
  TRUE ~ NA_character_
) |> factor(levels = c("Low", "High"))


df_complete_7_16$internet3_age55 <- case_when(
  df_complete_7_16$internet_use_age55 %in%
    c("Less often or never") ~ "Low",
  
  df_complete_7_16$internet_use_age55 %in%
    c("Once or twice a week", "Several times a week") ~ "Moderate",
  
  df_complete_7_16$internet_use_age55 %in%
    c("Everyday or almost everyday") ~ "High",
  
  TRUE ~ NA_character_
) |> factor(levels = c("Low", "Moderate", "High"))

GROUP-BASED TRAJECTORY MODELLING - ADULTHOOD - TV

library(tidyr)
library(lcmm)
library(dplyr)

df_tv <- df_complete_7_16 %>%
  dplyr::select(
    ncdsid,
    tv3_age11,
    tv3_age16,
    tv3_age23,
    tv3_age62
  ) %>%
  mutate(
    tv11 = as.numeric(tv3_age11) - 1,
    tv16 = as.numeric(tv3_age16) - 1,
    tv23 = as.numeric(tv3_age23) - 1,
    tv62 = as.numeric(tv3_age62) - 1
  )

df_tv_long <- df_tv %>%
  pivot_longer(
    cols = c(tv11, tv16, tv23, tv62),
    names_to  = "wave",
    values_to = "tv_num"
  ) %>%
  mutate(
    age = case_when(
      wave == "tv11" ~ 11,
      wave == "tv16" ~ 16,
      wave == "tv23" ~ 23,
      wave == "tv62" ~ 62
    )
  ) %>%
  filter(!is.na(tv_num))

df_tv_long <- df_tv_long %>%
  mutate(
    id_num = as.numeric(as.factor(ncdsid))
  )

m1 <- hlme(
  fixed   = tv_num ~ age,
  random  = ~ 1,
  subject = "id_num",
  ng      = 1,
  data    = df_tv_long
)

summary(m1)

## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = tv_num ~ age, random = ~1, subject = "id_num", ng = 1, 
##     data = df_tv_long)
##  
## Statistical Model: 
##      Dataset: df_tv_long 
##      Number of subjects: 9079 
##      Number of observations: 29209 
##      Number of latent classes: 1 
##      Number of parameters: 4  
##  
## Iteration process: 
##      Convergence criteria satisfied 
##      Number of iterations:  11 
##      Convergence criteria: parameters= 2.4e-13 
##                          : likelihood= 1.1e-10 
##                          : second derivatives= 4.6e-14 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -22691.85  
##      AIC: 45391.7  
##      BIC: 45420.16  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the longitudinal model:
## 
##               coef      Se     Wald p-value
## intercept  1.86267 0.00531  350.680 0.00000
## age       -0.00720 0.00018  -40.286 0.00000
## 
## 
## Variance-covariance matrix of the random-effects:
##           intercept
## intercept   0.02617
## 
##                              coef      Se
## Residual standard error:  0.50339 0.00248

m2 <- hlme(
  fixed   = tv_num ~ age,
  mixture = ~ age,
  random  = ~ 1,
  subject = "id_num",
  ng      = 2,
  nwg     = TRUE,
  data    = df_tv_long,
  B       = random(m1)
)

summary(m2)

## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = tv_num ~ age, mixture = ~age, random = ~1, subject = "id_num", 
##     ng = 2, nwg = TRUE, data = df_tv_long)
##  
## Statistical Model: 
##      Dataset: df_tv_long 
##      Number of subjects: 9079 
##      Number of observations: 29209 
##      Number of latent classes: 2 
##      Number of parameters: 8  
##  
## Iteration process: 
##      Maximum number of iteration reached without convergence 
##      Number of iterations:  500 
##      Convergence criteria: parameters= 0.01 
##                          : likelihood= 5.5e-05 
##                          : second derivatives= 1 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -22019.76  
##      AIC: 44055.51  
##      BIC: 44112.42  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the class-membership model:
## (the class of reference is the last class) 
## 
##                       coef Se Wald p-value
## intercept class1  -1.16542                
## 
## Fixed effects in the longitudinal model:
## 
##                       coef Se Wald p-value
## intercept class1   1.78087                
## intercept class2   1.88621                
## age class1        -0.01716                
## age class2        -0.00402                
## 
## 
## Variance-covariance matrix of the random-effects:
##           intercept
## intercept         0
## 
##                                      coef Se
## Proportional coefficient class1 141.89345   
## Residual standard error:          0.47405

age_grid <- data.frame(
  age = c(11, 16, 23, 62)
)

pred_tv <- predictY(
  m2,
  newdata = age_grid,
  var.time = "age",
  draws = TRUE
)

str(pred_tv)

## List of 2
##  $ pred : num [1:4, 1:6] 1.592 1.506 1.386 0.717 1.842 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:6] "Ypred_class1" "Ypred_class2" "lower.Ypred_class1" "lower.Ypred_class2" ...
##  $ times:'data.frame':   4 obs. of  1 variable:
##   ..$ age: num [1:4] 11 16 23 62
##  - attr(*, "class")= chr "predictY"

pred_df <- data.frame(
  age = pred_tv$times$age,
  pred_tv$pred
)

traj_df <- pred_df %>%
  pivot_longer(
    cols = -age,
    names_to = "term",
    values_to = "value"
  ) %>%
  separate(term, into = c("type", "class"), sep = "\\.") %>%
  pivot_wider(
    names_from  = type,
    values_from = value
  ) %>%
  mutate(
  class = dplyr::recode(
    class,
    "class1" = "Low / slowly increasing TV",
    "class2" = "High / rapidly increasing TV"
  )
)

# Grid de idades observado
age_grid <- data.frame(
  age = seq(
    min(df_tv_long$age, na.rm = TRUE),
    max(df_tv_long$age, na.rm = TRUE),
    by = 0.5
  )
)

# Predições com IC95%
pred_tv <- predictY(
  m2,
  newdata = age_grid,
  var.time = "age",
  draws = TRUE
)

traj_df <- data.frame(
  age = pred_tv$times$age,

  mean_class1  = pred_tv$pred[, "Ypred_class1"],
  lower_class1 = pred_tv$pred[, "lower.Ypred_class1"],
  upper_class1 = pred_tv$pred[, "upper.Ypred_class1"],

  mean_class2  = pred_tv$pred[, "Ypred_class2"],
  lower_class2 = pred_tv$pred[, "lower.Ypred_class2"],
  upper_class2 = pred_tv$pred[, "upper.Ypred_class2"]
) %>%
  tidyr::pivot_longer(
    cols = -age,
    names_to = c("stat", "class"),
    names_sep = "_class",
    values_to = "value"
  ) %>%
  tidyr::pivot_wider(
    names_from  = stat,
    values_from = value
  ) %>%
  mutate(
    class = factor(
      class,
      levels = c("1", "2"),
      labels = c(
        "High / slowly declining",
        "High / rapidly declining"
      )
    )
  )

fig_tv <- ggplot(traj_df, aes(x = age, y = mean, color = class, fill = class))  +
  geom_line(size = 1.2) +
  scale_color_bmj() +
  scale_fill_bmj() +
  labs(
    x = "Age (years)",
    y = "TV watching (ordinal score)",
    color = "Trajectory TV watching",
    fill  = "Trajectory TV watching"
  ) +
  theme_classic(base_size = 14) +
  theme(
    legend.position = "top",
    legend.title = element_text(size = 12),
    legend.text  = element_text(size = 11),
    axis.title   = element_text(size = 13),
    axis.text    = element_text(size = 11)
  ) +

  scale_x_continuous(
    breaks = c(11, 16, 23, 62)
  ) +
  scale_y_continuous(
    breaks = c(0, 1, 2),
    labels = c("Low", "Mid", "High")
  )  +
  coord_cartesian(ylim = c(0,2))
fig_tv

COGNITIVE FUNCION - ADULTHOOD

df_complete_7_62 <- df_complete_7_16 %>%
  filter(words_immed_age50>-1 &
            words_delayed>-1 & 
            letter_canc_correct>-1 &
            letter_canc_accuracy>-1 &
            animals_named>-1 & 
            
            words_delayed_age62>-1 &
            letter_can_accuracy>-1 &
            animals_named_age62>-1)

df_complete_7_50 <- df_complete_7_16 %>%
  filter(words_immed_age50>-1 &
            words_delayed>-1 & 
            letter_canc_correct>-1 &
            letter_canc_accuracy>-1 &
            animals_named>-1)

            #### #####

# Age 50
#Word recall test - immediate recall
df_complete_7_50$mean_words_immed_age50 <- mean(df_complete_7_50$words_immed_age50)
df_complete_7_50$sd_words_immed_age50 <- sd(df_complete_7_50$words_immed_age50)
df_complete_7_50$z_words_immed_age50 <- (df_complete_7_50$words_immed_age50-df_complete_7_50$mean_words_immed_age50)/df_complete_7_50$sd_words_immed_age50
df_complete_7_50$z_words_immed_age50 = scale(df_complete_7_50$z_words_immed_age50)[,1]

hist(df_complete_7_50$z_words_immed_age50)

#Word recall test - Delayed recall
df_complete_7_50$mean_words_delayed <- mean(df_complete_7_50$words_delayed)
df_complete_7_50$sd_words_delayed <- sd(df_complete_7_50$words_delayed)
df_complete_7_50$z_words_delayed <- (df_complete_7_50$words_delayed-df_complete_7_50$mean_words_delayed)/df_complete_7_50$sd_words_delayed
df_complete_7_50$z_words_delayed = scale(df_complete_7_50$z_words_delayed)[,1]

hist(df_complete_7_50$z_words_delayed)

#Animal naming - Number of animals named
df_complete_7_50$mean_animals_named <- mean(df_complete_7_50$animals_named)
df_complete_7_50$sd_animals_named <- sd(df_complete_7_50$animals_named)
df_complete_7_50$z_animals_named <- (df_complete_7_50$animals_named-df_complete_7_50$mean_animals_named)/df_complete_7_50$sd_animals_named
df_complete_7_50$z_animals_named = scale(df_complete_7_50$z_animals_named)[,1]

hist(df_complete_7_50$z_animals_named)

#Letter cancelation - accuracy
df_complete_7_50$mean_letter_canc_accuracy <- mean(df_complete_7_50$letter_canc_accuracy)
df_complete_7_50$sd_letter_canc_accuracy <- sd(df_complete_7_50$letter_canc_accuracy)
df_complete_7_50$z_letter_canc_accuracy <- (df_complete_7_50$letter_canc_accuracy-df_complete_7_50$mean_letter_canc_accuracy)/df_complete_7_50$sd_letter_canc_accuracy
df_complete_7_50$z_letter_canc_accuracy = scale(df_complete_7_50$z_letter_canc_accuracy)[,1]

df_complete_7_50$z_letter_canc_accuracy <- df_complete_7_50$z_letter_canc_accuracy*-1
hist(df_complete_7_50$z_letter_canc_accuracy)

# averaging all z scores
df_complete_7_50$global_cog_age50 <- (df_complete_7_50$z_words_immed_age50 + df_complete_7_50$z_words_delayed  + df_complete_7_50$z_animals_named  + df_complete_7_50$z_letter_canc_accuracy) / 4

# creating a z score for the averaged variable
df_complete_7_50$global_cog_age50 = scale(df_complete_7_50$global_cog_age50)[,1]

hist(df_complete_7_50$global_cog_age50)

# Age 50
#Word recall test - immediate recall
df_complete_7_62$mean_words_immed_age50 <- mean(df_complete_7_62$words_immed_age50)
df_complete_7_62$sd_words_immed_age50 <- sd(df_complete_7_62$words_immed_age50)
df_complete_7_62$z_words_immed_age50 <- (df_complete_7_62$words_immed_age50-df_complete_7_62$mean_words_immed_age50)/df_complete_7_62$sd_words_immed_age50
df_complete_7_62$z_words_immed_age50 = scale(df_complete_7_62$z_words_immed_age50)[,1]

hist(df_complete_7_62$z_words_immed_age50)

#Word recall test - Delayed recall
df_complete_7_62$mean_words_delayed <- mean(df_complete_7_62$words_delayed)
df_complete_7_62$sd_words_delayed <- sd(df_complete_7_62$words_delayed)
df_complete_7_62$z_words_delayed <- (df_complete_7_62$words_delayed-df_complete_7_62$mean_words_delayed)/df_complete_7_62$sd_words_delayed
df_complete_7_62$z_words_delayed = scale(df_complete_7_62$z_words_delayed)[,1]

hist(df_complete_7_62$z_words_delayed)

#Animal naming - Number of animals named
df_complete_7_62$mean_animals_named <- mean(df_complete_7_62$animals_named)
df_complete_7_62$sd_animals_named <- sd(df_complete_7_62$animals_named)
df_complete_7_62$z_animals_named <- (df_complete_7_62$animals_named-df_complete_7_62$mean_animals_named)/df_complete_7_62$sd_animals_named
df_complete_7_62$z_animals_named = scale(df_complete_7_62$z_animals_named)[,1]

hist(df_complete_7_62$z_animals_named)

#Letter cancelation - accuracy
df_complete_7_62$mean_letter_canc_accuracy <- mean(df_complete_7_62$letter_canc_accuracy)
df_complete_7_62$sd_letter_canc_accuracy <- sd(df_complete_7_62$letter_canc_accuracy)
df_complete_7_62$z_letter_canc_accuracy <- (df_complete_7_62$letter_canc_accuracy-df_complete_7_62$mean_letter_canc_accuracy)/df_complete_7_62$sd_letter_canc_accuracy
df_complete_7_62$z_letter_canc_accuracy = scale(df_complete_7_62$z_letter_canc_accuracy)[,1]

df_complete_7_62$z_letter_canc_accuracy <- df_complete_7_62$z_letter_canc_accuracy*-1
hist(df_complete_7_62$z_letter_canc_accuracy)

# averaging all z scores
df_complete_7_62$global_cog_age50 <- (df_complete_7_62$z_words_immed_age50 + df_complete_7_62$z_words_delayed  + df_complete_7_62$z_animals_named  + df_complete_7_62$z_letter_canc_accuracy) / 4

# creating a z score for the averaged variable
df_complete_7_62$global_cog_age50 = scale(df_complete_7_62$global_cog_age50)[,1]

hist(df_complete_7_62$global_cog_age50)

# Age 62
#Word recall test - Delayed recall
df_complete_7_62$mean_words_delayed_age62 <- mean(df_complete_7_62$words_delayed_age62)
df_complete_7_62$sd_words_delayed_age62 <- sd(df_complete_7_62$words_delayed_age62)
df_complete_7_62$z_words_delayed_age62 <- (df_complete_7_62$words_delayed_age62-df_complete_7_62$mean_words_delayed_age62)/df_complete_7_62$sd_words_delayed_age62
df_complete_7_62$z_words_delayed_age62 = scale(df_complete_7_62$z_words_delayed_age62)[,1]

hist(df_complete_7_62$z_words_delayed_age62)

#Animal naming - Number of animals named
df_complete_7_62$mean_animals_named_age62 <- mean(df_complete_7_62$animals_named_age62)
df_complete_7_62$sd_animals_named_age62 <- sd(df_complete_7_62$animals_named_age62)
df_complete_7_62$z_animals_named_age62 <- (df_complete_7_62$animals_named_age62-df_complete_7_62$mean_animals_named_age62)/df_complete_7_62$sd_animals_named_age62
df_complete_7_62$z_animals_named_age62 = scale(df_complete_7_62$z_animals_named_age62)[,1]

hist(df_complete_7_62$z_animals_named_age62)

#Letter cancelation - accuracy
df_complete_7_62$mean_letter_can_accuracy <- mean(df_complete_7_62$letter_can_accuracy)
df_complete_7_62$sd_letter_can_accuracy <- sd(df_complete_7_62$letter_can_accuracy)
df_complete_7_62$z_letter_can_accuracy <- (df_complete_7_62$letter_canc_accuracy-df_complete_7_62$mean_letter_can_accuracy)/df_complete_7_62$sd_letter_can_accuracy
df_complete_7_62$z_letter_can_accuracy = scale(df_complete_7_62$z_letter_can_accuracy)[,1]

df_complete_7_62$z_letter_can_accuracy <- df_complete_7_62$z_letter_can_accuracy*-1
hist(df_complete_7_62$z_letter_can_accuracy)

# averaging all z scores
df_complete_7_62$global_cog_age62 <- (df_complete_7_62$z_words_delayed_age62 + df_complete_7_62$z_animals_named_age62  + df_complete_7_62$z_letter_can_accuracy) / 3

# creating a z score for the averaged variable
df_complete_7_62$global_cog_age62 = scale(df_complete_7_62$global_cog_age62)[,1]

hist(df_complete_7_62$global_cog_age62)

COGNITIVE DECLINE - ADULTHOOD

library(dplyr)
library(tidyr)
library(ggeffects)
library(lme4)
library(broom.mixed)

tv_class_df <- m2$pprob %>%
  mutate(
    tv_class = factor(
      class,
      levels = c(1, 2),
      labels = c(
        "High / slowly declining TV",
        "High / rapidly declining TV"
      )
    )
  ) %>%
  dplyr::select(id_num, tv_class)

df_cog_long <- df_complete_7_62 %>%
  dplyr::select(
    ncdsid,
    z_global_cog_age7,
    z_global_cog_age11,
    z_global_cog_age16,
    global_cog_age50,
    global_cog_age62
  ) %>%
  pivot_longer(
    cols = -ncdsid,
    names_to  = "wave",
    values_to = "z_cog"
  ) %>%
  mutate(
    age = case_when(
      wave == "z_global_cog_age7"   ~ 7,
      wave == "z_global_cog_age11"  ~ 11,
      wave == "z_global_cog_age16"  ~ 16,
      wave == "global_cog_age50"    ~ 50,
      wave == "global_cog_age62"    ~ 62
    )
  ) %>%
  filter(!is.na(z_cog)) %>%
  mutate(
    id_num = as.numeric(as.factor(ncdsid))
  )

df_cov <- df_complete_7_62 %>%
  dplyr::select(
    ncdsid,

    # Demográficas
    sex,

    # Gestação / início da vida
    sc_prenatal,
    smoking_preg,
    birth_weight,
    breast_f,

    # Ambiente cognitivo precoce
    parent_read_level,

    # SEP ao longo da infância/adolescência
    sc_age7,
    sc_age11,
    sc_age16,

    # Comportamentos na adolescência
    smoking_age16,
    alcohol_age16,

    # Escolaridade
    school_attendance_age16,
    qual_age33,
    
    #Late adulthood
    malaise_age50,
    diabetes_age50,
    hypertension_age50
  ) %>%
  mutate(
    id_num = as.numeric(as.factor(ncdsid))
  )

df_tv_class <- m2$pprob %>%
  mutate(
    tv_class = factor(
      class,
      labels = c(
        "High / slowly decline TV",
        "High / rapidly decline TV"
      )
    )
  ) %>%
  dplyr::select(id_num, tv_class)

df_cog_tv <- df_cog_long %>%
  left_join(df_tv_class, by = "id_num") %>%
  left_join(df_cov,       by = "id_num")

cog_mixed_adj <- lmer(
  z_cog ~ age * tv_class +

    sex +
    sc_prenatal +
    smoking_preg +
    birth_weight +
    breast_f +
    parent_read_level +

    sc_age7 +
    sc_age11 +
    sc_age16 +

    smoking_age16 +
    alcohol_age16 +
    
    school_attendance_age16 +
    qual_age33 +
    
    malaise_age50 +
    diabetes_age50 +
    hypertension_age50 +

    (1 | id_num),

  data = df_cog_tv,
  REML = TRUE
)

tab_model(cog_mixed_adj, digits = 3)

	z cog
Predictors	Estimates	CI	p
(Intercept)	-0.566	-0.914 – -0.218	0.001
age	-0.006	-0.008 – -0.005	<0.001
tv class [High / rapidly decline TV]	-0.062	-0.135 – 0.012	0.099
sex [Female]	0.107	0.064 – 0.150	<0.001
sc prenatal [I]	0.242	0.059 – 0.426	0.010
sc prenatal [II]	0.222	0.061 – 0.384	0.007
sc prenatal [III]	0.142	-0.009 – 0.294	0.065
sc prenatal [IV]	0.147	-0.014 – 0.307	0.074
sc prenatal [V]	0.114	-0.058 – 0.285	0.195
sc prenatal [Students]	0.014	-0.650 – 0.678	0.968
sc prenatal [Single,no husbnd]	0.079	-0.131 – 0.288	0.462
smoking preg [5mths-no change]	-0.168	-0.398 – 0.061	0.150
smoking preg [Now non-smoker]	-0.147	-0.387 – 0.094	0.232
smoking preg [1 to 4 now]	-0.193	-0.450 – 0.064	0.140
smoking preg [5 to 9 now]	-0.118	-0.382 – 0.146	0.382
smoking preg [10 to 14 now]	-0.126	-0.419 – 0.166	0.398
smoking preg [15 to 19 now]	-0.022	-0.417 – 0.374	0.914
smoking preg [20 to 24 now]	-0.410	-0.817 – -0.004	0.048
smoking preg [25 to 29 now]	-0.225	-0.908 – 0.458	0.518
smoking preg [30 or more now]	-0.079	-0.767 – 0.609	0.822
smoking_pregVariable-5mths	-0.167	-0.411 – 0.076	0.178
birth weight	0.000	-0.000 – 0.001	0.308
breast f [NA]	-0.063	-0.855 – 0.730	0.877
breast f [Dont know]	0.071	-0.276 – 0.418	0.689
breast f [No]	-0.038	-0.096 – 0.019	0.193
breast f [Over one month]	0.058	0.006 – 0.109	0.028
parent read level [Occasionally]	0.066	-0.035 – 0.168	0.200
parent read level [Weekly]	0.099	0.004 – 0.194	0.041
sc age7 [NA, unclear]	-0.190	-0.514 – 0.133	0.249
sc age7 [No male head]	0.154	-0.058 – 0.367	0.155
sc age7 [II]	-0.005	-0.127 – 0.117	0.938
sc age7 [III non-manual]	0.031	-0.103 – 0.165	0.650
sc age7 [III manual]	-0.058	-0.183 – 0.067	0.364
sc age7 [IV non-manual]	0.125	-0.078 – 0.328	0.226
sc age7 [IV manual]	-0.024	-0.160 – 0.112	0.725
sc age7 [V]	-0.045	-0.212 – 0.123	0.603
sc age11 [NA,unclear]	-0.081	-0.221 – 0.058	0.253
sc age11 [II]	-0.084	-0.206 – 0.038	0.175
sc age11 [III non manual]	-0.020	-0.157 – 0.116	0.773
sc age11 [III manual]	-0.079	-0.205 – 0.048	0.224
sc age11 [IV]	-0.120	-0.257 – 0.018	0.088
sc age11 [V]	-0.069	-0.244 – 0.105	0.437
sc age11 [No male head]	-0.137	-0.313 – 0.038	0.125
sc age16 [NA,NMH,forces]	-0.096	-0.219 – 0.027	0.127
sc age16 [II]	0.018	-0.106 – 0.142	0.781
sc age16 [III non-manual]	-0.095	-0.235 – 0.046	0.186
sc age16 [III manual]	-0.072	-0.199 – 0.055	0.266
sc age16 [IV non-manual]	0.086	-0.139 – 0.312	0.453
sc age16 [IV manual]	-0.171	-0.316 – -0.026	0.021
sc age16 [V]	-0.162	-0.343 – 0.019	0.079
sc age16 [Unclear]	0.060	-0.177 – 0.296	0.621
smoking age16 [NA]	-0.125	-0.409 – 0.160	0.391
smoking age16 [Less 1 a week]	0.024	-0.084 – 0.131	0.666
smoking age16 [1 to 9 a week]	-0.048	-0.141 – 0.044	0.304
smoking age16 [10 to 19 a week]	0.009	-0.099 – 0.117	0.872
smoking age16 [20 to 29 a week]	0.018	-0.084 – 0.120	0.728
smoking age16 [30 to 39 a week]	-0.052	-0.157 – 0.053	0.330
smoking age16 [40 to 49 a week]	-0.003	-0.117 – 0.111	0.959
smoking age16 [50 to 59 a week]	0.082	-0.054 – 0.219	0.238
smoking age16 [60 or more a wk]	0.007	-0.093 – 0.107	0.886
alcohol age16 [NA]	0.146	-0.181 – 0.474	0.382
alcohol age16 [Less than 1 week]	0.227	0.117 – 0.336	<0.001
alcohol age16 [2 to 4 weeks]	0.180	0.066 – 0.294	0.002
alcohol age16 [5 to 8 weeks]	0.140	0.008 – 0.272	0.038
alcohol age16 [9 to 12 weeks]	0.144	-0.007 – 0.294	0.061
alcohol age16 [Over 12 weeks]	0.120	-0.012 – 0.252	0.076
alcohol age16 [Do not remember]	0.090	-0.030 – 0.210	0.140
school attendance age16	0.000	-0.001 – 0.001	0.980
qual age33 [No information]	0.579	0.394 – 0.764	<0.001
qual_age33CSE 2-5/equiv NVQ1	0.256	0.148 – 0.363	<0.001
qual age33 [O Level/equiv NVQ2]	0.664	0.569 – 0.758	<0.001
qual age33 [A Level/equiv NVQ3]	0.882	0.778 – 0.985	<0.001
qual age33 [Higher qual NVQ4]	0.931	0.828 – 1.034	<0.001
qual age33 [Degree/higher NVQ5,6]	1.360	1.252 – 1.468	<0.001
malaise age50 [incomplete info]	-0.251	-0.567 – 0.064	0.119
malaise age50 [ High malaise 4+]	-0.143	-0.207 – -0.080	<0.001
diabetes age50 [Mentioned]	-0.071	-0.181 – 0.038	0.202
hypertension age50 [Mentioned]	-0.041	-0.101 – 0.018	0.173
age × tv class [High / rapidly decline TV]	0.001	-0.000 – 0.003	0.087
Random Effects
σ²	0.49
τ₀₀ _{id_num}	0.22
ICC	0.31
N _{id_num}	3025
Observations	15125
Marginal R² / Conditional R²	0.223 / 0.464

df_cog_tv$tv_class <- factor(
  df_cog_tv$tv_class,
  levels = c(
    "High / slowly declining TV",
    "High / rapidly declining TV"
  )
)

pred_cog <- ggpredict(
  cog_mixed_adj,
  terms = c("age [7,11,16,50,62]", "tv_class")
)

pred_df <- as.data.frame(pred_cog)

fig_cog <- ggplot(
  pred_df,
  aes(
    x = x,
    y = predicted,
    color = group,
    fill  = group
  )
) +
  geom_line(size = 1.3) +

  scale_color_manual(
    values = c("#1B9E77", "#D95F02")
  ) +
  scale_fill_manual(
    values = c("#1B9E77", "#D95F02")
  ) +

  scale_x_continuous(
    breaks = c(7, 11, 16, 50, 62)
  ) +

  labs(
    x = "Age (years)",
    y = "Global cognitive function (z-score)",
    color = "TV viewing trajectory",
    fill  = "TV viewing trajectory"
  ) +

  theme_classic(base_size = 14) +
  theme(
    legend.position = "top",
    legend.title = element_text(size = 12),
    legend.text  = element_text(size = 11),
    axis.title   = element_text(size = 13),
    axis.text    = element_text(size = 11)
  )

int_term <- tidy(
  cog_mixed_adj,
  effects = "fixed",
  conf.int = TRUE
) %>%
  dplyr::filter(grepl("^age:tv_class", term)) %>%
  mutate(
    p.value = 2 * (1 - pnorm(abs(statistic)))
  )

int_term

## # A tibble: 1 × 8
##   effect term            estimate std.error statistic conf.low conf.high p.value
##   <chr>  <chr>              <dbl>     <dbl>     <dbl>    <dbl>     <dbl>   <dbl>
## 1 fixed  age:tv_classHi…  0.00128  0.000747      1.71 -1.87e-4   0.00274  0.0874

int_label <- with(int_term, {
  paste0(
    "Interaction (age × TV trajectory):\n",
    "β = ", sprintf("%.4f", estimate),
    " (95% CI ", sprintf("%.4f", conf.low),
    ", ", sprintf("%.4f", conf.high), "), ",
    "p = ", ifelse(p.value < 0.001, "<0.001", sprintf("%.3f", p.value))
  )
})

int_label

## [1] "Interaction (age × TV trajectory):\nβ = 0.0013 (95% CI -0.0002, 0.0027), p = 0.087"

fig_cog <- fig_cog +
  annotate(
    "text",
    x = 8,
    y = min(pred_df$predicted) + 0.05,
    label = int_label,
    hjust = 0,
    size = 4.2
  )

fig_cog

GROUP-BASED TRAJECTORY MODELLING - READING

library(lcmm)

# df_complete_7_62$read3_age11 <- factor(
#   df_complete_7_62$child_read_books_age11,
#   levels = c("Hardly ever", "Sometimes", "most days"),
#   labels = c("Low", "Moderate", "High")
# )
# 
# df_complete_7_62$read3_age16 <- factor(
#   df_complete_7_62$child_read_age16,
#   levels = c("Hardly ever", "Sometimes", "Most days"),
#   labels = c("Low", "Moderate", "High")
# )


df_read <- df_complete_7_62 %>%
  dplyr::select(
    ncdsid,
    read3_age11,
    read3_age16,
    read3_age23,
    read3_age46,
    read3_age62
  ) %>%
  mutate(
    rdbk11 = as.numeric(read3_age11) - 1,
    rdbk16 = as.numeric(read3_age16) - 1,
    rdbk23 = as.numeric(read3_age23) - 1,
    rdbk46 = as.numeric(read3_age46) - 1,
    rdbk62 = as.numeric(read3_age62) - 1
  )

df_read_long <- df_read %>%
  pivot_longer(
    cols = c(rdbk11, rdbk16, rdbk23, rdbk46, rdbk62),
    names_to  = "wave",
    values_to = "rdbk_num"
  ) %>%
  mutate(
    age = case_when(
      wave == "rdbk11" ~ 11,
      wave == "rdbk16" ~ 16,
      wave == "rdbk23" ~ 23,
      wave == "rdbk46" ~ 46,
      wave == "rdbk62" ~ 62
    )
  ) %>%
  filter(!is.na(rdbk_num))


df_read_long <- df_read_long %>%
  mutate(
    id_num = as.numeric(as.factor(ncdsid))
  )

m1 <- hlme(
  fixed   = rdbk_num ~ age,
  random  = ~ 1,
  subject = "id_num",
  ng      = 1,
  data    = df_read_long
)

summary(m1)

## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = rdbk_num ~ age, random = ~1, subject = "id_num", 
##     ng = 1, data = df_read_long)
##  
## Statistical Model: 
##      Dataset: df_read_long 
##      Number of subjects: 3548 
##      Number of observations: 15512 
##      Number of latent classes: 1 
##      Number of parameters: 4  
##  
## Iteration process: 
##      Convergence criteria satisfied 
##      Number of iterations:  10 
##      Convergence criteria: parameters= 6.6e-12 
##                          : likelihood= 4.3e-08 
##                          : second derivatives= 2.4e-16 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -17712.83  
##      AIC: 35433.65  
##      BIC: 35458.35  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the longitudinal model:
## 
##              coef      Se    Wald p-value
## intercept 1.11983 0.01224  91.507 0.00000
## age       0.00314 0.00029  10.867 0.00000
## 
## 
## Variance-covariance matrix of the random-effects:
##           intercept
## intercept   0.09587
## 
##                             coef      Se
## Residual standard error: 0.70727 0.00458

m2 <- hlme(
  fixed   = rdbk_num ~ age,
  mixture = ~ age,
  random  = ~ 1,
  subject = "id_num",
  ng      = 2,
  nwg     = TRUE,
  data    = df_read_long,
  B       = random(m1)
)

summary(m2)

## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = rdbk_num ~ age, mixture = ~age, random = ~1, subject = "id_num", 
##     ng = 2, nwg = TRUE, data = df_read_long)
##  
## Statistical Model: 
##      Dataset: df_read_long 
##      Number of subjects: 3548 
##      Number of observations: 15512 
##      Number of latent classes: 2 
##      Number of parameters: 8  
##  
## Iteration process: 
##      Convergence criteria satisfied 
##      Number of iterations:  20 
##      Convergence criteria: parameters= 1.9e-10 
##                          : likelihood= 2.2e-06 
##                          : second derivatives= 1.8e-06 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -17536.53  
##      AIC: 35089.07  
##      BIC: 35138.46  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the class-membership model:
## (the class of reference is the last class) 
## 
##                      coef       Se    Wald p-value
## intercept class1  0.55494  0.05744   9.661 0.00000
## 
## Fixed effects in the longitudinal model:
## 
##                      coef       Se    Wald p-value
## intercept class1  1.19188  0.01694  70.342 0.00000
## intercept class2  0.96949  0.02273  42.648 0.00000
## age class1        0.00833  0.00045  18.701 0.00000
## age class2       -0.00548  0.00066  -8.309 0.00000
## 
## 
## Variance-covariance matrix of the random-effects:
##           intercept
## intercept         0
## 
##                                     coef       Se
## Proportional coefficient class1  1.01109 11.38158
## Residual standard error:         0.68754  0.00430

age_grid_rdbk <- data.frame(
  age = c(11, 16, 23, 46, 62)
)

pred_read <- predictY(
  m2,
  newdata = age_grid_rdbk,
  var.time = "age",
  draws = TRUE
)

str(pred_read)

## List of 2
##  $ pred : num [1:5, 1:6] 1.28 1.33 1.38 1.57 1.71 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:6] "Ypred_class1" "Ypred_class2" "lower.Ypred_class1" "lower.Ypred_class2" ...
##  $ times:'data.frame':   5 obs. of  1 variable:
##   ..$ age: num [1:5] 11 16 23 46 62
##  - attr(*, "class")= chr "predictY"

pred_df <- data.frame(
  age = pred_read$times$age,
  pred_read$pred
)

traj_df <- pred_df %>%
  pivot_longer(
    cols = -age,
    names_to = "term",
    values_to = "value"
  ) %>%
  separate(term, into = c("type", "class"), sep = "\\.") %>%
  pivot_wider(
    names_from  = type,
    values_from = value
  ) %>%
  mutate(
  class = dplyr::recode(
    class,
    "class1" = "Mid / slowly increasing",
    "class2" = "Mid / slowly declining"
  )
)

# Grid de idades observado
age_grid_rdbk <- data.frame(
  age = seq(
    min(df_read_long$age, na.rm = TRUE),
    max(df_read_long$age, na.rm = TRUE),
    by = 0.5
  )
)

# Predições com IC95%
pred_read <- predictY(
  m2,
  newdata = age_grid_rdbk,
  var.time = "age",
  draws = TRUE
)

traj_df <- data.frame(
  age = pred_read$times$age,

  mean_class1  = pred_read$pred[, "Ypred_class1"],
  lower_class1 = pred_read$pred[, "lower.Ypred_class1"],
  upper_class1 = pred_read$pred[, "upper.Ypred_class1"],

  mean_class2  = pred_read$pred[, "Ypred_class2"],
  lower_class2 = pred_read$pred[, "lower.Ypred_class2"],
  upper_class2 = pred_read$pred[, "upper.Ypred_class2"]
) %>%
  tidyr::pivot_longer(
    cols = -age,
    names_to = c("stat", "class"),
    names_sep = "_class",
    values_to = "value"
  ) %>%
  tidyr::pivot_wider(
    names_from  = stat,
    values_from = value
  ) %>%
  mutate(
    class = factor(
      class,
      levels = c("1", "2"),
      labels = c(
        "Mid / slowly increasing",
        "Mid / slowly declining"
      )
    )
  )

fig_read <- ggplot(traj_df, aes(x = age, y = mean, color = class, fill = class)) +
  geom_ribbon(
    aes(ymin = lower, ymax = upper),
    alpha = 0.25,
    color = NA
  ) +
  geom_line(size = 1.2) +
  scale_color_bmj() +
  scale_fill_bmj() +
  labs(
    x = "Age (years)",
    y = "Reading books (ordinal score)",
    color = "Trajectory",
    fill  = "Trajectory"
  ) +
  theme_classic(base_size = 14) +
  theme(
    legend.position = "top",
    legend.title = element_text(size = 12),
    legend.text  = element_text(size = 11),
    axis.title   = element_text(size = 13),
    axis.text    = element_text(size = 11)
  ) +

  scale_x_continuous(
    breaks = c(11, 16, 23, 46, 62)
  ) +
  scale_y_continuous(
    breaks = c(0, 1, 2),
    labels = c("Low", "Mid", "High")
  )  +
  coord_cartesian(ylim = c(0,2))
fig_read

COGNITIVE FUNCION - READING ACROSS LIFE COURSE

read_class_df <- m2$pprob %>%
  mutate(
    class = factor(
      class,
      levels = c(1, 2),
      labels = c("Mid / slowly increasing",
                 "Mid / slowly declining"
      )
    )
  ) %>%
  dplyr::select(id_num, class)

df_cog_long <- df_complete_7_62 %>%
  dplyr::select(
    ncdsid,
    z_global_cog_age7,
    z_global_cog_age11,
    z_global_cog_age16,
    global_cog_age50,
    global_cog_age62
  ) %>%
  pivot_longer(
    cols = -ncdsid,
    names_to  = "wave",
    values_to = "z_cog"
  ) %>%
  mutate(
    age = case_when(
      wave == "z_global_cog_age7"   ~ 7,
      wave == "z_global_cog_age11"  ~ 11,
      wave == "z_global_cog_age16"  ~ 16,
      wave == "global_cog_age50"    ~ 50,
      wave == "global_cog_age62"    ~ 62
    )
  ) %>%
  filter(!is.na(z_cog)) %>%
  mutate(
    id_num = as.numeric(as.factor(ncdsid))
  )

df_cov <- df_complete_7_62 %>%
  dplyr::select(
    ncdsid,

    # Demográficas
    sex,

    # Gestação / início da vida
    sc_prenatal,
    smoking_preg,
    birth_weight,
    breast_f,

    # Ambiente cognitivo precoce
    parent_read_level,

    # SEP ao longo da infância/adolescência
    sc_age7,
    sc_age11,
    sc_age16,

    # Comportamentos na adolescência
    smoking_age16,
    alcohol_age16,

    # Escolaridade
    school_attendance_age16,
    qual_age33,
    
    #Late adulthood
    malaise_age50,
    diabetes_age50,
    hypertension_age50
  ) %>%
  mutate(
    id_num = as.numeric(as.factor(ncdsid))
  )

df_read_class <- m2$pprob %>%
  mutate(
    class = factor(
      class,
      labels = c("Mid / slowly increasing",
                 "Mid / slowly declining"
      )
    )
  ) %>%
  dplyr::select(id_num, class)

df_cog_read <- df_cog_long %>%
  left_join(df_read_class, by = "id_num") %>%
  left_join(df_cov,       by = "id_num")


library(lme4)

cog_mixed_adj_read <- lmer(
  z_cog ~ age * class +

    sex +
    sc_prenatal +
    smoking_preg +
    birth_weight +
    breast_f +
    parent_read_level +

    sc_age7 +
    sc_age11 +
    sc_age16 +

    smoking_age16 +
    alcohol_age16 +
    school_attendance_age16 +
    qual_age33 +
    
    malaise_age50 +
    diabetes_age50 +
    hypertension_age50 +
    
    (1 | id_num),

  data = df_cog_read,
  REML = TRUE
)

tab_model(cog_mixed_adj_read, digits = 3)

	z cog
Predictors	Estimates	CI	p
(Intercept)	-0.508	-0.850 – -0.166	0.004
age	-0.005	-0.006 – -0.004	<0.001
class [Mid / slowly declining]	-0.149	-0.203 – -0.095	<0.001
sex [Female]	0.070	0.027 – 0.113	0.001
sc prenatal [I]	0.228	0.046 – 0.410	0.014
sc prenatal [II]	0.218	0.058 – 0.378	0.008
sc prenatal [III]	0.132	-0.018 – 0.282	0.084
sc prenatal [IV]	0.137	-0.022 – 0.297	0.091
sc prenatal [V]	0.106	-0.064 – 0.276	0.223
sc prenatal [Students]	0.034	-0.623 – 0.691	0.919
sc prenatal [Single,no husbnd]	0.071	-0.137 – 0.279	0.502
smoking preg [5mths-no change]	-0.173	-0.400 – 0.054	0.135
smoking preg [Now non-smoker]	-0.145	-0.383 – 0.093	0.233
smoking preg [1 to 4 now]	-0.207	-0.461 – 0.048	0.112
smoking preg [5 to 9 now]	-0.139	-0.401 – 0.123	0.298
smoking preg [10 to 14 now]	-0.144	-0.434 – 0.146	0.331
smoking preg [15 to 19 now]	-0.029	-0.420 – 0.363	0.886
smoking preg [20 to 24 now]	-0.406	-0.809 – -0.004	0.048
smoking preg [25 to 29 now]	-0.166	-0.842 – 0.510	0.630
smoking preg [30 or more now]	-0.032	-0.713 – 0.650	0.927
smoking_pregVariable-5mths	-0.169	-0.410 – 0.072	0.170
birth weight	0.000	-0.000 – 0.000	0.354
breast f [NA]	-0.115	-0.900 – 0.670	0.775
breast f [Dont know]	0.063	-0.281 – 0.407	0.720
breast f [No]	-0.032	-0.089 – 0.024	0.264
breast f [Over one month]	0.063	0.012 – 0.114	0.015
parent read level [Occasionally]	0.051	-0.049 – 0.151	0.320
parent read level [Weekly]	0.077	-0.017 – 0.171	0.110
sc age7 [NA, unclear]	-0.190	-0.510 – 0.130	0.245
sc age7 [No male head]	0.175	-0.035 – 0.386	0.103
sc age7 [II]	-0.006	-0.127 – 0.114	0.917
sc age7 [III non-manual]	0.027	-0.106 – 0.159	0.694
sc age7 [III manual]	-0.057	-0.181 – 0.067	0.367
sc age7 [IV non-manual]	0.117	-0.084 – 0.317	0.255
sc age7 [IV manual]	-0.024	-0.159 – 0.111	0.729
sc age7 [V]	-0.044	-0.210 – 0.122	0.605
sc age11 [NA,unclear]	-0.080	-0.218 – 0.058	0.257
sc age11 [II]	-0.077	-0.198 – 0.044	0.214
sc age11 [III non manual]	-0.013	-0.148 – 0.122	0.853
sc age11 [III manual]	-0.068	-0.193 – 0.058	0.291
sc age11 [IV]	-0.104	-0.240 – 0.032	0.134
sc age11 [V]	-0.057	-0.230 – 0.115	0.515
sc age11 [No male head]	-0.137	-0.311 – 0.037	0.123
sc age16 [NA,NMH,forces]	-0.090	-0.212 – 0.032	0.149
sc age16 [II]	0.029	-0.094 – 0.152	0.646
sc age16 [III non-manual]	-0.096	-0.235 – 0.043	0.174
sc age16 [III manual]	-0.065	-0.191 – 0.060	0.308
sc age16 [IV non-manual]	0.087	-0.137 – 0.310	0.448
sc age16 [IV manual]	-0.167	-0.311 – -0.023	0.023
sc age16 [V]	-0.157	-0.336 – 0.022	0.086
sc age16 [Unclear]	0.053	-0.181 – 0.287	0.659
smoking age16 [NA]	-0.131	-0.413 – 0.151	0.363
smoking age16 [Less 1 a week]	0.026	-0.080 – 0.133	0.626
smoking age16 [1 to 9 a week]	-0.051	-0.142 – 0.040	0.274
smoking age16 [10 to 19 a week]	0.012	-0.095 – 0.118	0.830
smoking age16 [20 to 29 a week]	0.016	-0.085 – 0.117	0.751
smoking age16 [30 to 39 a week]	-0.058	-0.162 – 0.046	0.274
smoking age16 [40 to 49 a week]	0.002	-0.110 – 0.115	0.968
smoking age16 [50 to 59 a week]	0.087	-0.048 – 0.222	0.208
smoking age16 [60 or more a wk]	0.018	-0.081 – 0.117	0.726
alcohol age16 [NA]	0.167	-0.157 – 0.492	0.311
alcohol age16 [Less than 1 week]	0.235	0.127 – 0.344	<0.001
alcohol age16 [2 to 4 weeks]	0.185	0.073 – 0.298	0.001
alcohol age16 [5 to 8 weeks]	0.147	0.016 – 0.277	0.028
alcohol age16 [9 to 12 weeks]	0.158	0.009 – 0.307	0.037
alcohol age16 [Over 12 weeks]	0.119	-0.012 – 0.250	0.076
alcohol age16 [Do not remember]	0.109	-0.010 – 0.228	0.073
school attendance age16	0.000	-0.001 – 0.001	0.979
qual age33 [No information]	0.588	0.404 – 0.771	<0.001
qual_age33CSE 2-5/equiv NVQ1	0.248	0.142 – 0.355	<0.001
qual age33 [O Level/equiv NVQ2]	0.639	0.545 – 0.733	<0.001
qual age33 [A Level/equiv NVQ3]	0.849	0.746 – 0.951	<0.001
qual age33 [Higher qual NVQ4]	0.888	0.785 – 0.990	<0.001
qual age33 [Degree/higher NVQ5,6]	1.293	1.185 – 1.401	<0.001
malaise age50 [incomplete info]	-0.249	-0.561 – 0.063	0.118
malaise age50 [ High malaise 4+]	-0.130	-0.194 – -0.067	<0.001
diabetes age50 [Mentioned]	-0.085	-0.193 – 0.023	0.124
hypertension age50 [Mentioned]	-0.040	-0.099 – 0.019	0.180
age × class [Mid / slowly declining]	-0.001	-0.002 – 0.000	0.100
Random Effects
σ²	0.49
τ₀₀ _{id_num}	0.21
ICC	0.30
N _{id_num}	3025
Observations	15125
Marginal R² / Conditional R²	0.229 / 0.464

df_cog_read$class <- factor(
  df_cog_read$class,
  levels = c(
    "High / stable",
    "Low / slow increase"
  )
)

pred_cog <- ggpredict(
  cog_mixed_adj_read,
  terms = c("age [11,16,23,46,50, 62]", "class")
)

pred_df <- as.data.frame(pred_cog)

fig_cog_read <- ggplot(
  pred_df,
  aes(
    x = x,
    y = predicted,
    color = group,
    fill  = group
  )
) +
  geom_line(size = 1.3) +

  scale_color_bmj() +
  scale_fill_bmj() +

  scale_x_continuous(
    breaks = c(11, 16, 23, 46, 50, 62)
  ) +

  labs(
    x = "Age (years)",
    y = "Global cognitive function (z-score)",
    color = "Reading books trajectory",
    fill  = "Reading books trajectory"
  ) +

  theme_classic(base_size = 14) +
  theme(
    legend.position = "top",
    legend.title = element_text(size = 12),
    legend.text  = element_text(size = 11),
    axis.title   = element_text(size = 13),
    axis.text    = element_text(size = 11)
  )

int_term <- tidy(
  cog_mixed_adj_read,
  effects = "fixed",
  conf.int = TRUE
) %>%
  dplyr::filter(grepl("^age:class", term)) %>%
  mutate(
    p.value = 2 * (1 - pnorm(abs(statistic)))
  )

int_term

## # A tibble: 1 × 8
##   effect term            estimate std.error statistic conf.low conf.high p.value
##   <chr>  <chr>              <dbl>     <dbl>     <dbl>    <dbl>     <dbl>   <dbl>
## 1 fixed  age:classMid /… -8.74e-4  0.000532     -1.64 -0.00192  0.000168   0.100

int_label <- with(int_term, {
  paste0(
    "Interaction (age × reading books trajectory):\n",
    "β = ", sprintf("%.4f", estimate),
    " (95% CI ", sprintf("%.4f", conf.low),
    ", ", sprintf("%.4f", conf.high), "), ",
    "p = ", ifelse(p.value < 0.001, "<0.001", sprintf("%.3f", p.value))
  )
})

int_label

## [1] "Interaction (age × reading books trajectory):\nβ = -0.0009 (95% CI -0.0019, 0.0002), p = 0.100"

fig_cog_read <- fig_cog_read +
  annotate(
    "text",
    x = 8,
    y = min(pred_df$predicted) + 0.05,
    label = int_label,
    hjust = 0,
    size = 4.2
  )

fig_cog_read

SB life course

Feter et al.

2026-01-27