Nuffield - Longitudinal - EF Development Analysis

Author

Fionnuala O’Reilly

Set working directory

Code
getwd()

Packages

Code
load_or_install <- function(pkg) {
  if (!require(pkg, character.only = TRUE)) {
    install.packages(pkg)
    library(pkg, character.only = TRUE)
  }
}

load_or_install("tidyverse")
load_or_install("glue")
load_or_install("here")
load_or_install("viridis")
load_or_install("purrr")
load_or_install("readr")
load_or_install("dplyr")
load_or_install("tidyr")
load_or_install("quarto")
load_or_install("ggthemes")
load_or_install("GGally")
load_or_install("stringr")
load_or_install("readxl")
load_or_install("lubridate")
load_or_install("psych")
load_or_install("flextable")
load_or_install("officer")
load_or_install("ggplot2")
load_or_install("reshape2")
load_or_install("corrplot")
load_or_install("writexl")
load_or_install("lme4")
load_or_install("lmerTest")
load_or_install("lavaan")
load_or_install("semPlot")
load_or_install("miceadds")
load_or_install("car")

Part 1: Executive Funciton, longitudinal data

Clean data

Code
#| echo: false
#| results: 'hide'
#| message: false
#| warning: false

# Gender

# Convert gender column to numeric (0 = Female, 1 = Male)
data_ef <- data_ef %>%
  mutate(gender = case_when(
    gender == "f" ~ 0,
    gender == "m" ~ 1,
    TRUE ~ NA_real_  # Assigns NA if value is neither "f" nor "m"
  ))

table(data_ef$gender, useNA = "ifany")

 0  1 
82 88 
Code
# Rename gender so as not to cause confusion when merging witht he parental data
data_ef <- data_ef %>%
  rename(male_female = gender)

# Year (3y or 4y)

# Remove 'y' and convert to numeric
data_ef <- data_ef %>%
  mutate(year = as.numeric(gsub("y", "", year)))

# Check if conversion worked
str(data_ef$year)
 num [1:170] 3 3 4 3 4 3 4 3 4 3 ...
Code
table(data_ef$year, useNA = "ifany")

 3  4 
88 82 
Code
# Converting to numeric

# List of columns to convert
cols_to_convert <- c("nogoT1", "nogoT2", "MrantT1", "MrantT2", "stroopT1", "stroopT2", "giveNT1", "giveNT2", "counthighT1", "countobjT1", "countobjT2", "numnamT1", "numnamT2", "cancT1", "cancT2", "magcomT1", "magcomT2", "BPVS", "BASr", "BAS")

# Convert selected columns to numeric, ensuring "NA" strings become actual NA values
data_ef <- data_ef %>%
  mutate(across(all_of(cols_to_convert), ~ as.numeric(ifelse(. == "NA", NA, .))))

# Check the structure after conversion
str(data_ef[cols_to_convert])
tibble [170 × 20] (S3: tbl_df/tbl/data.frame)
 $ nogoT1     : num [1:170] NA NA 0.748 0.478 0.891 ...
 $ nogoT2     : num [1:170] 0.205 0.617 0.574 0.66 0.735 ...
 $ MrantT1    : num [1:170] 1 NA 2.33 1 2 ...
 $ MrantT2    : num [1:170] 1.33 NA 1.67 1.33 3 ...
 $ stroopT1   : num [1:170] 62.5 37.5 100 96 96 87.5 91.5 70.5 96 45.5 ...
 $ stroopT2   : num [1:170] 83 79 87.5 96 100 96 96 100 92 96 ...
 $ giveNT1    : num [1:170] 3 5 8 3 8 1 6 8 8 5 ...
 $ giveNT2    : num [1:170] 5 16 5 6 11 4 3 10 8 11 ...
 $ counthighT1: num [1:170] 4 NA 29 11 45 3 10 49 49 13 ...
 $ countobjT1 : num [1:170] 2 NA 8 6 10 2 10 10 10 10 ...
 $ countobjT2 : num [1:170] 12 10 NA 10 15 11 6 18 13 4 ...
 $ numnamT1   : num [1:170] 6 NA 18 13 18 2 11 0 7 2 ...
 $ numnamT2   : num [1:170] 14 NA 18 18 18 10 17 18 17 8 ...
 $ cancT1     : num [1:170] 0.55 0.446 1.121 0.676 1.014 ...
 $ cancT2     : num [1:170] 0.643 NA 0.902 0.609 1.052 ...
 $ magcomT1   : num [1:170] 0.46 NA 0.915 0.7 0.875 0.58 0.63 0.61 0.85 0.8 ...
 $ magcomT2   : num [1:170] 0.725 0.55 1 0.895 0.92 0.785 0.87 0.935 0.915 0.85 ...
 $ BPVS       : num [1:170] 95 101 115 98 122 92 108 112 114 113 ...
 $ BASr       : num [1:170] 18 25 21 25 20 17 9 20 14 17 ...
 $ BAS        : num [1:170] 89 107 125 107 96 86 90 96 107 86 ...
Code
# Summary to verify missing values and correct conversion
summary(data_ef[cols_to_convert])
     nogoT1           nogoT2           MrantT1         MrantT2     
 Min.   :0.0000   Min.   :0.07896   Min.   :0.000   Min.   :0.000  
 1st Qu.:0.3281   1st Qu.:0.55367   1st Qu.:1.000   1st Qu.:1.333  
 Median :0.5348   Median :0.67433   Median :1.667   Median :2.000  
 Mean   :0.5129   Mean   :0.63951   Mean   :1.495   Mean   :1.775  
 3rd Qu.:0.6748   3rd Qu.:0.78000   3rd Qu.:2.000   3rd Qu.:2.333  
 Max.   :0.9500   Max.   :1.00000   Max.   :3.333   Max.   :3.333  
 NA's   :11       NA's   :3         NA's   :3       NA's   :1      
    stroopT1         stroopT2         giveNT1         giveNT2      
 Min.   :  0.00   Min.   : 29.50   Min.   :1.000   Min.   : 0.000  
 1st Qu.: 50.00   1st Qu.: 66.50   1st Qu.:3.000   1st Qu.: 4.000  
 Median : 83.50   Median : 96.00   Median :5.000   Median : 6.000  
 Mean   : 74.82   Mean   : 83.07   Mean   :4.929   Mean   : 7.012  
 3rd Qu.: 96.00   3rd Qu.:100.00   3rd Qu.:8.000   3rd Qu.:10.000  
 Max.   :100.00   Max.   :100.00   Max.   :8.000   Max.   :16.000  
 NA's   :1        NA's   :1        NA's   :1       NA's   :5       
  counthighT1       countobjT1       countobjT2        numnamT1    
 Min.   :  0.00   Min.   : 0.000   Min.   : 2.000   Min.   : 0.00  
 1st Qu.: 10.00   1st Qu.: 6.000   1st Qu.: 7.000   1st Qu.: 4.50  
 Median : 13.00   Median : 8.000   Median : 9.500   Median :14.00  
 Mean   : 16.74   Mean   : 7.146   Mean   : 9.952   Mean   :11.52  
 3rd Qu.: 20.00   3rd Qu.: 9.000   3rd Qu.:12.000   3rd Qu.:18.00  
 Max.   :100.00   Max.   :10.000   Max.   :20.000   Max.   :18.00  
 NA's   :4        NA's   :6        NA's   :2        NA's   :3      
    numnamT2         cancT1           cancT2          magcomT1     
 Min.   : 0.00   Min.   :0.1977   Min.   :0.1849   Min.   :0.3300  
 1st Qu.:12.00   1st Qu.:0.4661   1st Qu.:0.6175   1st Qu.:0.6525  
 Median :16.00   Median :0.5849   Median :0.7236   Median :0.8000  
 Mean   :13.71   Mean   :0.6051   Mean   :0.7344   Mean   :0.7655  
 3rd Qu.:18.00   3rd Qu.:0.7333   3rd Qu.:0.8358   3rd Qu.:0.8900  
 Max.   :18.00   Max.   :1.1862   Max.   :1.3527   Max.   :1.0000  
 NA's   :1       NA's   :1        NA's   :6        NA's   :8       
    magcomT2           BPVS          BASr            BAS        
 Min.   :0.4850   Min.   : 72   Min.   : 7.00   Min.   : 62.00  
 1st Qu.:0.7850   1st Qu.: 91   1st Qu.:15.00   1st Qu.: 86.00  
 Median :0.8550   Median :103   Median :18.00   Median : 92.00  
 Mean   :0.8281   Mean   :103   Mean   :17.95   Mean   : 92.43  
 3rd Qu.:0.9150   3rd Qu.:114   3rd Qu.:20.00   3rd Qu.:100.00  
 Max.   :1.0000   Max.   :131   Max.   :25.00   Max.   :141.00  
 NA's   :2        NA's   :3     NA's   :1       NA's   :1       
Code
str(data_ef)
tibble [170 × 27] (S3: tbl_df/tbl/data.frame)
 $ ID         : num [1:170] 1 2 4 6 7 8 9 10 12 13 ...
 $ male_female: num [1:170] 0 1 1 0 1 1 1 1 1 1 ...
 $ year       : num [1:170] 3 3 4 3 4 3 4 3 4 3 ...
 $ ageT1      : num [1:170] 41 43 49 44 52 44 51 45 51 45 ...
 $ ageT2      : num [1:170] 46 48 53 48 57 48 56 50 56 50 ...
 $ nogoT1     : num [1:170] NA NA 0.748 0.478 0.891 ...
 $ nogoT2     : num [1:170] 0.205 0.617 0.574 0.66 0.735 ...
 $ MrantT1    : num [1:170] 1 NA 2.33 1 2 ...
 $ MrantT2    : num [1:170] 1.33 NA 1.67 1.33 3 ...
 $ stroopT1   : num [1:170] 62.5 37.5 100 96 96 87.5 91.5 70.5 96 45.5 ...
 $ stroopT2   : num [1:170] 83 79 87.5 96 100 96 96 100 92 96 ...
 $ giveNT1    : num [1:170] 3 5 8 3 8 1 6 8 8 5 ...
 $ giveNT2    : num [1:170] 5 16 5 6 11 4 3 10 8 11 ...
 $ counthighT1: num [1:170] 4 NA 29 11 45 3 10 49 49 13 ...
 $ counthighT2: num [1:170] 14 14 29 13 12 5 20 22 100 9 ...
 $ countobjT1 : num [1:170] 2 NA 8 6 10 2 10 10 10 10 ...
 $ countobjT2 : num [1:170] 12 10 NA 10 15 11 6 18 13 4 ...
 $ numnamT1   : num [1:170] 6 NA 18 13 18 2 11 0 7 2 ...
 $ numnamT2   : num [1:170] 14 NA 18 18 18 10 17 18 17 8 ...
 $ cancT1     : num [1:170] 0.55 0.446 1.121 0.676 1.014 ...
 $ cancT2     : num [1:170] 0.643 NA 0.902 0.609 1.052 ...
 $ magcomT1   : num [1:170] 0.46 NA 0.915 0.7 0.875 0.58 0.63 0.61 0.85 0.8 ...
 $ magcomT2   : num [1:170] 0.725 0.55 1 0.895 0.92 0.785 0.87 0.935 0.915 0.85 ...
 $ BPVSr      : num [1:170] 48 55 70 52 89 46 65 66 72 68 ...
 $ BPVS       : num [1:170] 95 101 115 98 122 92 108 112 114 113 ...
 $ BASr       : num [1:170] 18 25 21 25 20 17 9 20 14 17 ...
 $ BAS        : num [1:170] 89 107 125 107 96 86 90 96 107 86 ...

CFA on EF

Code
run_cfa <- function(data, time_point) {
  # Update variable names based on time point
  task_vars <- paste0(c("nogo", "Mrant", "stroop", "canc"), time_point) 
  
  # Define CFA model
  cfa_model <- paste0("EF =~ ", paste(task_vars, collapse = " + "))
  
  # Fit CFA model using Robust Maximum Likelihood (MLR) with FIML for missing data
  fit_cfa <- cfa(cfa_model, data = data, estimator = "MLR", std.lv=TRUE, missing = "FIML")
  
  # Print summary with fit indices and standardized loadings
  print(summary(fit_cfa, fit.measures = TRUE, standardized = TRUE, rsquare=TRUE))
  
  # Extract key fit indices
  fit_indices <- fitMeasures(fit_cfa, c("chisq", "df", "pvalue", "cfi", "tli", "rmsea", "srmr"))
  
  # Print fit indices
  print(fit_indices)
  
  # Extract Factor Scores and Append to DataFrame
  factor_scores <- lavPredict(fit_cfa)
  
  # Assign the time-specific column name
  score_col_name <- paste0("EF_factor_score_", time_point)
  data[[score_col_name]] <- factor_scores  # Ensure the column is created dynamically

  # Define dynamic labels based on time point
  nodeLabels <- c(
    paste("No-go (n", time_point, ")", sep = ""),
    paste("Mr.Ant (MT", time_point, ")", sep = ""),
    paste("Stroop (s", time_point, ")", sep = ""),
    paste("Cancellation (c", time_point, ")", sep = ""),
    "Executive Function" 
  )

  # Generate the path diagram
  semPaths(
    fit_cfa,                      
    what = "std",                     
    layout = "tree2",                 
    edge.label.cex = 1.5,             
    edge.color = "black",             
    edge.label.color = "black",       
    esize = 1.5,                        
    node.label.cex = .75,              
    lwd = 3,                          
    residuals = FALSE,               
    curveAdjacent = FALSE,            
    title = TRUE,                     
    nodeNames =  nodeLabels,
    node.width = 2,                   # Increase node spacing
    mar = c(3, 3, 3, 5),              # Adjust margin to prevent text overlap
    legend = TRUE,                    # Ensure the legend is displayed
    legend.cex = 0.3,                 # Reduce legend font size
    legend.position = "topright"       # Move legend for better visibility
  )

  return(list(fit = fit_cfa, data = data))
}

# Run CFA for Time 1 (T1)
cfa_results_T1 <- run_cfa(data_ef, "T1")
Warning: lavaan->lav_data_full():  
   some observed variances are (at least) a factor 1000 times larger than 
   others; use varTable(fit) to investigate
lavaan 0.6-19 ended normally after 32 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        12

  Number of observations                           170
  Number of missing patterns                         5

Model Test User Model:
                                              Standard      Scaled
  Test Statistic                                 0.411       0.431
  Degrees of freedom                                 2           2
  P-value (Chi-square)                           0.814       0.806
  Scaling correction factor                                  0.954
    Yuan-Bentler correction (Mplus variant)                       

Model Test Baseline Model:

  Test statistic                                49.454      44.811
  Degrees of freedom                                 6           6
  P-value                                        0.000       0.000
  Scaling correction factor                                  1.104

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    1.000       1.000
  Tucker-Lewis Index (TLI)                       1.110       1.121
                                                                  
  Robust Comparative Fit Index (CFI)                         1.000
  Robust Tucker-Lewis Index (TLI)                            1.103

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)               -876.818    -876.818
  Scaling correction factor                                  0.979
      for the MLR correction                                      
  Loglikelihood unrestricted model (H1)       -876.613    -876.613
  Scaling correction factor                                  0.976
      for the MLR correction                                      
                                                                  
  Akaike (AIC)                                1777.636    1777.636
  Bayesian (BIC)                              1815.266    1815.266
  Sample-size adjusted Bayesian (SABIC)       1777.269    1777.269

Root Mean Square Error of Approximation:

  RMSEA                                          0.000       0.000
  90 Percent confidence interval - lower         0.000       0.000
  90 Percent confidence interval - upper         0.092       0.098
  P-value H_0: RMSEA <= 0.050                    0.873       0.858
  P-value H_0: RMSEA >= 0.080                    0.070       0.080
                                                                  
  Robust RMSEA                                               0.000
  90 Percent confidence interval - lower                     0.000
  90 Percent confidence interval - upper                     0.093
  P-value H_0: Robust RMSEA <= 0.050                         0.870
  P-value H_0: Robust RMSEA >= 0.080                         0.072

Standardized Root Mean Square Residual:

  SRMR                                           0.011       0.011

Parameter Estimates:

  Standard errors                             Sandwich
  Information bread                           Observed
  Observed information based on                Hessian

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  EF =~                                                                 
    nogoT1            0.135    0.022    6.260    0.000    0.135    0.627
    MrantT1           0.355    0.077    4.602    0.000    0.355    0.466
    stroopT1          5.419    2.350    2.306    0.021    5.419    0.231
    cancT1            0.109    0.021    5.101    0.000    0.109    0.584

Intercepts:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .nogoT1            0.511    0.017   29.878    0.000    0.511    2.369
   .MrantT1           1.495    0.059   25.396    0.000    1.495    1.965
   .stroopT1         74.815    1.802   41.523    0.000   74.815    3.194
   .cancT1            0.605    0.014   42.340    0.000    0.605    3.255

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .nogoT1            0.028    0.006    4.800    0.000    0.028    0.607
   .MrantT1           0.453    0.065    6.987    0.000    0.453    0.783
   .stroopT1        519.477   49.529   10.488    0.000  519.477    0.946
   .cancT1            0.023    0.005    4.498    0.000    0.023    0.658
    EF                1.000                               1.000    1.000

R-Square:
                   Estimate
    nogoT1            0.393
    MrantT1           0.217
    stroopT1          0.054
    cancT1            0.342

 chisq     df pvalue    cfi    tli  rmsea   srmr 
 0.411  2.000  0.814  1.000  1.110  0.000  0.011 
Warning in qgraph::qgraph(Edgelist, labels = nLab, bidirectional = Bidir, : The
following arguments are not documented and likely not arguments of qgraph and
thus ignored: node.label.cex; lwd; legend.position

Code
fit_cfa_T1 <- cfa_results_T1$fit
data_ef_T1 <- cfa_results_T1$data  # Updated data with factor scores for T1

# Run CFA for Time 2 (T2)
cfa_results_T2 <- run_cfa(data_ef, "T2")
Warning: lavaan->lav_data_full():  
   some observed variances are (at least) a factor 1000 times larger than 
   others; use varTable(fit) to investigate
lavaan 0.6-19 ended normally after 35 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        12

  Number of observations                           170
  Number of missing patterns                         5

Model Test User Model:
                                              Standard      Scaled
  Test Statistic                                 2.517       2.170
  Degrees of freedom                                 2           2
  P-value (Chi-square)                           0.284       0.338
  Scaling correction factor                                  1.160
    Yuan-Bentler correction (Mplus variant)                       

Model Test Baseline Model:

  Test statistic                                52.175      45.097
  Degrees of freedom                                 6           6
  P-value                                        0.000       0.000
  Scaling correction factor                                  1.157

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.989       0.996
  Tucker-Lewis Index (TLI)                       0.966       0.987
                                                                  
  Robust Comparative Fit Index (CFI)                         0.997
  Robust Tucker-Lewis Index (TLI)                            0.991

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)               -848.730    -848.730
  Scaling correction factor                                  1.057
      for the MLR correction                                      
  Loglikelihood unrestricted model (H1)       -847.472    -847.472
  Scaling correction factor                                  1.071
      for the MLR correction                                      
                                                                  
  Akaike (AIC)                                1721.460    1721.460
  Bayesian (BIC)                              1759.089    1759.089
  Sample-size adjusted Bayesian (SABIC)       1721.093    1721.093

Root Mean Square Error of Approximation:

  RMSEA                                          0.039       0.022
  90 Percent confidence interval - lower         0.000       0.000
  90 Percent confidence interval - upper         0.162       0.147
  P-value H_0: RMSEA <= 0.050                    0.424       0.497
  P-value H_0: RMSEA >= 0.080                    0.402       0.325
                                                                  
  Robust RMSEA                                               0.021
  90 Percent confidence interval - lower                     0.000
  90 Percent confidence interval - upper                     0.169
  P-value H_0: Robust RMSEA <= 0.050                         0.467
  P-value H_0: Robust RMSEA >= 0.080                         0.384

Standardized Root Mean Square Residual:

  SRMR                                           0.027       0.027

Parameter Estimates:

  Standard errors                             Sandwich
  Information bread                           Observed
  Observed information based on                Hessian

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  EF =~                                                                 
    nogoT2            0.111    0.020    5.638    0.000    0.111    0.561
    MrantT2           0.254    0.084    3.024    0.002    0.254    0.331
    stroopT2          8.413    2.026    4.153    0.000    8.413    0.411
    cancT2            0.124    0.026    4.742    0.000    0.124    0.648

Intercepts:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .nogoT2            0.638    0.015   41.664    0.000    0.638    3.232
   .MrantT2           1.775    0.059   30.015    0.000    1.775    2.309
   .stroopT2         83.080    1.574   52.793    0.000   83.080    4.057
   .cancT2            0.735    0.015   49.128    0.000    0.735    3.830

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .nogoT2            0.027    0.005    5.848    0.000    0.027    0.686
   .MrantT2           0.526    0.068    7.698    0.000    0.526    0.890
   .stroopT2        348.473   41.244    8.449    0.000  348.473    0.831
   .cancT2            0.021    0.006    3.463    0.001    0.021    0.580
    EF                1.000                               1.000    1.000

R-Square:
                   Estimate
    nogoT2            0.314
    MrantT2           0.110
    stroopT2          0.169
    cancT2            0.420

 chisq     df pvalue    cfi    tli  rmsea   srmr 
 2.517  2.000  0.284  0.989  0.966  0.039  0.027 
Warning in qgraph::qgraph(Edgelist, labels = nLab, bidirectional = Bidir, : The
following arguments are not documented and likely not arguments of qgraph and
thus ignored: node.label.cex; lwd; legend.position

Code
fit_cfa_T2 <- cfa_results_T2$fit
data_ef_T2 <- cfa_results_T2$data  # Updated data with factor scores for T2

# Merge scores into data_ef
data_ef <- data_ef %>%
  left_join(dplyr::select(data_ef_T1, ID, EF_factor_score_T1), by = "ID") %>%
  left_join(dplyr::select(data_ef_T2, ID, EF_factor_score_T2), by = "ID")

colnames(data_ef)
 [1] "ID"                 "male_female"        "year"              
 [4] "ageT1"              "ageT2"              "nogoT1"            
 [7] "nogoT2"             "MrantT1"            "MrantT2"           
[10] "stroopT1"           "stroopT2"           "giveNT1"           
[13] "giveNT2"            "counthighT1"        "counthighT2"       
[16] "countobjT1"         "countobjT2"         "numnamT1"          
[19] "numnamT2"           "cancT1"             "cancT2"            
[22] "magcomT1"           "magcomT2"           "BPVSr"             
[25] "BPVS"               "BASr"               "BAS"               
[28] "EF_factor_score_T1" "EF_factor_score_T2"

Part 2: Parent questionnaire

Code
# Parent questionnaire
data_par <- read_excel("~/OneDrive - Nexus365/Nuffield1/R datasets/All_data-copy for R.xlsx")
colnames(data_par)
 [1] "pat_id"               "longitudinal"         "age_mths_t1"         
 [4] "age_years_t1"         "wave"                 "gender"              
 [7] "mean_acc_congru_t1"   "mean_acc_incongru_t1" "stroop_cs"           
[10] "nogo_cs"              "mr_ant_cs"            "cancel_cs"           
[13] "isci_raw"             "fi_raw"               "emi_raw"             
[16] "gec_raw"              "behav_adjust"         "lang_cog"            
[19] "daily_living"         "fam_support"          "he2_n1"              
[22] "he3_ll1"              "he5_ll2"              "he6_ls1"             
[25] "he7_n2"               "he9_n3"               "he10_ls2"            
[28] "he11_ll3"             "he13_n4"              "he15_ll4"            
[31] "he17_ll5"             "he18_ls3"             "he20_ls4"            
[34] "he21_n5"              "he23_ls5"             "he24_ll6"            
[37] "he25_n6"              "he27_ls6"             "he28_ll7"            
[40] "he29_n7"              "he31_n8"              "he32_ls7"            
[43] "he33_ll8"             "re1_pse1"             "re2_pse2"            
[46] "re3_pse3"             "re4_pse4"             "re5_pse5"            
[49] "imd_decile"           "imd_rank"             "mat_edu"             
[52] "pat_edu"              "pat_or_other_edu"     "combined_pat_edu"    
[55] "bas3_raw"             "bpvs_raw"             "give_n"              
[58] "count_high"           "num_screen"           "coun_amounts_acc"    

Data cleaning

Perform PCA on HLE items

Code
# Define Home Learning Environment (HLE) variables for PCA
hle_vars <- c("he2_n1", "he3_ll1", "he5_ll2", "he6_ls1", "he7_n2", "he9_n3", "he10_ls2", "he11_ll3", "he13_n4", "he15_ll4", "he17_ll5", "he18_ls3", "he20_ls4", "he21_n5", "he23_ls5", "he24_ll6", "he25_n6", "he27_ls6", "he28_ll7", "he29_n7", "he31_n8", "he32_ls7", "he33_ll8" 
)

summary_stats_hle <- data_par %>%
  dplyr::select(any_of(hle_vars)) %>%  # Use any_of() to avoid errors if some variables are missing
  summarise(across(everything(), list(
    n = ~ sum(!is.na(.)),               
    mean = ~ mean(. , na.rm = TRUE),    
    sd = ~ sd(. , na.rm = TRUE),        
    median = ~ median(. , na.rm = TRUE),
    min = ~ min(. , na.rm = TRUE),      
    max = ~ max(. , na.rm = TRUE)       
  ))) %>%
  pivot_longer(cols = everything(), names_to = "Variable_Statistic", values_to = "Value") %>%
  separate(Variable_Statistic, into = c("Variable", "Statistic"), sep = "_", extra = "merge")


# Convert to flextable for visualization
summary_table <- flextable(summary_stats_hle) %>%
  theme_vanilla() %>%
  autofit() %>%
  set_caption("Summary Statistics for HLE Variables")

# Print table
summary_table

Run the PCA

Export loadings as csv

Scree plot to determine the number of factors to retain

Apply Factor Rotation

Save Factor Loadings to Word

Extract & Merge Factor Scores

Visualisations

Creating the composite HLE score

Part 3: Merging datasets and running multi-group models

Part 3: Simple linear models


Call:
lm(formula = EF_factor_score_T2 ~ EF_factor_score_T1 + imd + 
    combined_pat_edu, data = merged_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.68708 -0.44999  0.02961  0.35907  1.53082 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)        -0.130673   0.252453  -0.518    0.606    
EF_factor_score_T1  0.586728   0.073479   7.985 9.19e-13 ***
imd                 0.015627   0.024882   0.628    0.531    
combined_pat_edu    0.002577   0.030176   0.085    0.932    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.6334 on 121 degrees of freedom
  (45 observations deleted due to missingness)
Multiple R-squared:  0.3627,    Adjusted R-squared:  0.3469 
F-statistic: 22.95 on 3 and 121 DF,  p-value: 7.843e-12

Call:
lm(formula = EF_factor_score_T2 ~ EF_factor_score_T1 + imd + 
    combined_pat_edu + ageT1 + gender, data = merged_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.53191 -0.50342  0.04815  0.33416  1.41610 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)        -0.721630   0.845758  -0.853    0.395    
EF_factor_score_T1  0.560018   0.076636   7.308  3.4e-11 ***
imd                 0.015487   0.025168   0.615    0.540    
combined_pat_edu    0.008178   0.030699   0.266    0.790    
ageT1               0.015896   0.016995   0.935    0.351    
gender             -0.128632   0.115461  -1.114    0.267    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.6337 on 119 degrees of freedom
  (45 observations deleted due to missingness)
Multiple R-squared:  0.3726,    Adjusted R-squared:  0.3462 
F-statistic: 14.13 on 5 and 119 DF,  p-value: 7.625e-11

Call:
lm(formula = EF_factor_score_T2 ~ EF_factor_score_T1 + imd + 
    combined_pat_edu + ageT1 + gender + BPVS + BAS, data = merged_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.62814 -0.42890  0.02759  0.38063  1.41084 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)        -1.561786   0.907232  -1.721   0.0879 .  
EF_factor_score_T1  0.468229   0.085153   5.499 2.37e-07 ***
imd                 0.006216   0.026942   0.231   0.8179    
combined_pat_edu   -0.003372   0.030974  -0.109   0.9135    
ageT1               0.005115   0.017452   0.293   0.7700    
gender             -0.103801   0.115746  -0.897   0.3717    
BPVS                0.002882   0.005673   0.508   0.6125    
BAS                 0.012323   0.005800   2.125   0.0358 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.6245 on 114 degrees of freedom
  (48 observations deleted due to missingness)
Multiple R-squared:  0.3909,    Adjusted R-squared:  0.3535 
F-statistic: 10.45 on 7 and 114 DF,  p-value: 4.339e-10

Call:
lm(formula = EF_factor_score_T2 ~ EF_factor_score_T1 + imd + 
    combined_pat_edu + ageT1 + gender + BPVS + BAS + TC1 + TC2 + 
    TC3 + TC4 + TC5 + TC6, data = merged_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.33088 -0.29519 -0.02629  0.35799  1.37566 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)        -0.9118231  1.0818579  -0.843   0.4014    
EF_factor_score_T1  0.4655421  0.0977548   4.762  6.7e-06 ***
imd                 0.0005392  0.0294910   0.018   0.9854    
combined_pat_edu   -0.0303042  0.0395884  -0.765   0.4458    
ageT1               0.0035562  0.0202901   0.175   0.8612    
gender             -0.1465601  0.1355719  -1.081   0.2824    
BPVS               -0.0001149  0.0065466  -0.018   0.9860    
BAS                 0.0125298  0.0061618   2.033   0.0447 *  
TC1                 0.0006646  0.0679780   0.010   0.9922    
TC2                 0.1107694  0.0829700   1.335   0.1850    
TC3                 0.0683802  0.0757245   0.903   0.3688    
TC4                -0.0389784  0.0690822  -0.564   0.5739    
TC5                -0.0620557  0.0699877  -0.887   0.3775    
TC6                 0.0564839  0.0724643   0.779   0.4376    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.6132 on 97 degrees of freedom
  (59 observations deleted due to missingness)
Multiple R-squared:  0.3951,    Adjusted R-squared:  0.314 
F-statistic: 4.873 on 13 and 97 DF,  p-value: 1.733e-06

Longitudinal Models Predicting EF Factor Score at Time 2
==================================================================================================================
                                                         Dependent variable:                                      
                    ----------------------------------------------------------------------------------------------
                                                         EF Factor Score (T2)                                     
                            Model 1                 Model 2                 Model 3                Model 4        
                              (1)                     (2)                     (3)                    (4)          
------------------------------------------------------------------------------------------------------------------
EF_factor_score_T1         0.587***                0.560***                0.468***                0.466***       
                        (0.443, 0.731)          (0.410, 0.710)          (0.301, 0.635)          (0.274, 0.657)    
imd                          0.016                   0.015                   0.006                  0.001         
                        (-0.033, 0.064)         (-0.034, 0.065)         (-0.047, 0.059)        (-0.057, 0.058)    
combined_pat_edu             0.003                   0.008                  -0.003                  -0.030        
                        (-0.057, 0.062)         (-0.052, 0.068)         (-0.064, 0.057)        (-0.108, 0.047)    
ageT1                                                0.016                   0.005                  0.004         
                                                (-0.017, 0.049)         (-0.029, 0.039)        (-0.036, 0.043)    
gender                                              -0.129                  -0.104                  -0.147        
                                                (-0.355, 0.098)         (-0.331, 0.123)        (-0.412, 0.119)    
BPVS                                                                         0.003                 -0.0001        
                                                                        (-0.008, 0.014)        (-0.013, 0.013)    
BAS                                                                         0.012*                  0.013*        
                                                                        (0.001, 0.024)         (0.0005, 0.025)    
TC1                                                                                                 0.001         
                                                                                               (-0.133, 0.134)    
TC2                                                                                                 0.111         
                                                                                               (-0.052, 0.273)    
TC3                                                                                                 0.068         
                                                                                               (-0.080, 0.217)    
TC4                                                                                                 -0.039        
                                                                                               (-0.174, 0.096)    
TC5                                                                                                 -0.062        
                                                                                               (-0.199, 0.075)    
TC6                                                                                                 0.056         
                                                                                               (-0.086, 0.199)    
Constant                    -0.131                  -0.722                  -1.562                  -0.912        
                        (-0.625, 0.364)         (-2.379, 0.936)         (-3.340, 0.216)        (-3.032, 1.209)    
------------------------------------------------------------------------------------------------------------------
Observations                  125                     125                     122                    111          
R2                           0.363                   0.373                   0.391                  0.395         
Adjusted R2                  0.347                   0.346                   0.354                  0.314         
Residual Std. Error    0.633 (df = 121)        0.634 (df = 119)        0.625 (df = 114)        0.613 (df = 97)    
F Statistic         22.955*** (df = 3; 121) 14.132*** (df = 5; 119) 10.452*** (df = 7; 114) 4.873*** (df = 13; 97)
==================================================================================================================
Note:                                                                                *p<0.05; **p<0.01; ***p<0.001

Part 4: Creating long form dataset for mixed effects models

Linear mixed effects models - sequentially adding variables - complete case (no imputation)

Code
#| echo: true
#| results: 'hide'
#| message: false
#| warning: false

library(dplyr)

merged_data_long_complete <- merged_data_long %>%
  filter(complete.cases(EF_factor_score, Time, imd, combined_pat_edu))

library(lme4)

lmm_1 <- lmer(EF_factor_score ~ Time * (imd + combined_pat_edu) + (1 | ID), 
              data = merged_data_long, REML = FALSE)

lmm_2 <- lmer(EF_factor_score ~ Time * (imd + combined_pat_edu) + ageT1 + gender + (1 | ID), 
              data = merged_data_long, REML = FALSE)

lmm_3 <- lmer(EF_factor_score ~ Time * (imd + combined_pat_edu) + ageT1 + gender + BPVS + BAS + (1 | ID), 
              data = merged_data_long, REML = FALSE)

lmm_4 <- lmer(EF_factor_score ~ Time * (imd + combined_pat_edu) + ageT1 + gender + BPVS + BAS + 
              TC1 + TC2 + TC3 + TC4 + TC5 + TC6 + (1 | ID), data = merged_data_long, REML = FALSE)

# Additional - modelling the Time*HLE interactions
lmm_5 <- lmer(EF_factor_score ~ Time * (imd + combined_pat_edu + TC1 + TC2 + TC3 + TC4 + TC5 + TC6) + ageT1 + gender + BPVS + BAS + 
              (1 | ID), data = merged_data_long, REML = FALSE)


# Print Model Summaries
summary(lmm_1)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
  method [lmerModLmerTest]
Formula: EF_factor_score ~ Time * (imd + combined_pat_edu) + (1 | ID)
   Data: merged_data_long

      AIC       BIC    logLik -2*log(L)  df.resid 
    542.2     570.4    -263.1     526.2       242 

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.14786 -0.55663  0.02281  0.50378  1.99960 

Random effects:
 Groups   Name        Variance Std.Dev.
 ID       (Intercept) 0.3488   0.5906  
 Residual             0.2449   0.4949  
Number of obs: 250, groups:  ID, 125

Fixed effects:
                          Estimate Std. Error         df t value Pr(>|t|)  
(Intercept)              -0.570505   0.302846 185.860905  -1.884   0.0612 .
TimeT2                    0.105101   0.275092 125.000001   0.382   0.7031  
imd                       0.063105   0.029739 185.860904   2.122   0.0352 *
combined_pat_edu          0.018901   0.036671 185.860905   0.515   0.6069  
TimeT2:imd               -0.010453   0.027014 125.000001  -0.387   0.6994  
TimeT2:combined_pat_edu  -0.005235   0.033310 125.000001  -0.157   0.8754  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) TimeT2 imd    cmbn__ TmT2:m
TimeT2      -0.454                            
imd         -0.667  0.303                     
combnd_pt_d -0.635  0.288 -0.105              
TimeT2:imd   0.303 -0.667 -0.454  0.048       
TmT2:cmbn__  0.288 -0.635  0.048 -0.454 -0.105
Code
summary(lmm_2)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
  method [lmerModLmerTest]
Formula: EF_factor_score ~ Time * (imd + combined_pat_edu) + ageT1 + gender +  
    (1 | ID)
   Data: merged_data_long

      AIC       BIC    logLik -2*log(L)  df.resid 
    534.3     569.5    -257.2     514.3       240 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.9633 -0.5650 -0.0571  0.5674  2.0711 

Random effects:
 Groups   Name        Variance Std.Dev.
 ID       (Intercept) 0.3060   0.5531  
 Residual             0.2449   0.4949  
Number of obs: 250, groups:  ID, 125

Fixed effects:
                          Estimate Std. Error         df t value Pr(>|t|)   
(Intercept)              -2.784835   0.850918 131.702655  -3.273  0.00136 **
TimeT2                    0.105101   0.275092 125.000000   0.382  0.70307   
imd                       0.053459   0.029004 189.296966   1.843  0.06687 . 
combined_pat_edu          0.035528   0.035696 189.569800   0.995  0.32086   
ageT1                     0.053401   0.016936 125.000000   3.153  0.00202 **
gender                   -0.226154   0.118116 125.000000  -1.915  0.05782 . 
TimeT2:imd               -0.010453   0.027014 125.000000  -0.387  0.69945   
TimeT2:combined_pat_edu  -0.005235   0.033310 125.000000  -0.157  0.87538   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) TimeT2 imd    cmbn__ ageT1  gender TmT2:m
TimeT2      -0.162                                          
imd         -0.084  0.311                                   
combnd_pt_d -0.346  0.296 -0.122                            
ageT1       -0.920  0.000 -0.140  0.144                     
gender      -0.091  0.000 -0.054 -0.023 -0.104              
TimeT2:imd   0.108 -0.667 -0.466  0.049  0.000  0.000       
TmT2:cmbn__  0.103 -0.635  0.049 -0.467  0.000  0.000 -0.105
Code
summary(lmm_3)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
  method [lmerModLmerTest]
Formula: EF_factor_score ~ Time * (imd + combined_pat_edu) + ageT1 + gender +  
    BPVS + BAS + (1 | ID)
   Data: merged_data_long

      AIC       BIC    logLik -2*log(L)  df.resid 
    495.3     537.2    -235.6     471.3       232 

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.16293 -0.57193 -0.02532  0.56731  1.89913 

Random effects:
 Groups   Name        Variance Std.Dev.
 ID       (Intercept) 0.2046   0.4524  
 Residual             0.2482   0.4982  
Number of obs: 244, groups:  ID, 122

Fixed effects:
                          Estimate Std. Error         df t value Pr(>|t|)    
(Intercept)              -3.780755   0.783940 129.986315  -4.823 3.89e-06 ***
TimeT2                    0.101381   0.279269 122.000000   0.363  0.71722    
imd                       0.006309   0.028397 192.491246   0.222  0.82441    
combined_pat_edu          0.011350   0.033059 198.578607   0.343  0.73172    
ageT1                     0.020680   0.015905 122.000001   1.300  0.19596    
gender                   -0.163903   0.105588 122.000000  -1.552  0.12319    
BPVS                      0.014680   0.005014 121.999999   2.928  0.00407 ** 
BAS                       0.015317   0.005234 122.000000   2.926  0.00409 ** 
TimeT2:imd               -0.007270   0.027902 122.000000  -0.261  0.79488    
TimeT2:combined_pat_edu  -0.009173   0.033770 122.000000  -0.272  0.78637    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) TimeT2 imd    cmbn__ ageT1  gender BPVS   BAS    TmT2:m
TimeT2      -0.178                                                        
imd         -0.073  0.330                                                 
combnd_pt_d -0.288  0.315 -0.083                                          
ageT1       -0.755  0.000 -0.017  0.189                                   
gender      -0.100  0.000 -0.078 -0.046 -0.143                            
BPVS        -0.013  0.000 -0.308 -0.114 -0.244  0.044                     
BAS         -0.234  0.000  0.089 -0.052 -0.091  0.061 -0.499              
TimeT2:imd   0.120 -0.672 -0.491  0.063  0.000  0.000  0.000  0.000       
TmT2:cmbn__  0.110 -0.617  0.060 -0.511  0.000  0.000  0.000  0.000 -0.123
Code
summary(lmm_4)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
  method [lmerModLmerTest]
Formula: EF_factor_score ~ Time * (imd + combined_pat_edu) + ageT1 + gender +  
    BPVS + BAS + TC1 + TC2 + TC3 + TC4 + TC5 + TC6 + (1 | ID)
   Data: merged_data_long

      AIC       BIC    logLik -2*log(L)  df.resid 
    441.4     502.7    -202.7     405.4       204 

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.11678 -0.59005 -0.05825  0.59811  1.91171 

Random effects:
 Groups   Name        Variance Std.Dev.
 ID       (Intercept) 0.1512   0.3889  
 Residual             0.2426   0.4926  
Number of obs: 222, groups:  ID, 111

Fixed effects:
                          Estimate Std. Error         df t value Pr(>|t|)    
(Intercept)              -3.894309   0.853164 118.130396  -4.565 1.23e-05 ***
TimeT2                    0.217794   0.301159 111.000001   0.723  0.47109    
imd                       0.008701   0.029165 180.180190   0.298  0.76579    
combined_pat_edu         -0.015600   0.038096 168.495744  -0.409  0.68270    
ageT1                     0.025934   0.017053 111.000004   1.521  0.13117    
gender                   -0.054520   0.115391 110.999999  -0.472  0.63751    
BPVS                      0.013606   0.005353 110.999999   2.542  0.01240 *  
BAS                       0.014315   0.005165 110.999999   2.771  0.00655 ** 
TC1                      -0.068274   0.057342 110.999999  -1.191  0.23633    
TC2                       0.171640   0.069386 110.999999   2.474  0.01489 *  
TC3                       0.145259   0.063217 110.999999   2.298  0.02345 *  
TC4                      -0.056738   0.058656 110.999999  -0.967  0.33550    
TC5                      -0.014027   0.059548 110.999999  -0.236  0.81421    
TC6                      -0.176250   0.057150 110.999999  -3.084  0.00258 ** 
TimeT2:imd               -0.011443   0.029692 111.000001  -0.385  0.70068    
TimeT2:combined_pat_edu  -0.018155   0.035595 111.000001  -0.510  0.61102    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation matrix not shown by default, as p = 16 > 12.
Use print(x, correlation=TRUE)  or
    vcov(x)        if you need it
Code
summary(lmm_5)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
  method [lmerModLmerTest]
Formula: EF_factor_score ~ Time * (imd + combined_pat_edu + TC1 + TC2 +  
    TC3 + TC4 + TC5 + TC6) + ageT1 + gender + BPVS + BAS + (1 |      ID)
   Data: merged_data_long

      AIC       BIC    logLik -2*log(L)  df.resid 
    444.9     526.6    -198.5     396.9       198 

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.19141 -0.54307 -0.03021  0.53822  2.11131 

Random effects:
 Groups   Name        Variance Std.Dev.
 ID       (Intercept) 0.1602   0.4002  
 Residual             0.2247   0.4741  
Number of obs: 222, groups:  ID, 111

Fixed effects:
                          Estimate Std. Error         df t value Pr(>|t|)    
(Intercept)              -3.894703   0.857951 120.697888  -4.540 1.34e-05 ***
TimeT2                    0.218583   0.351373 111.000001   0.622 0.535164    
imd                       0.010779   0.029460 184.305688   0.366 0.714872    
combined_pat_edu         -0.019037   0.039345 182.410813  -0.484 0.629070    
TC1                      -0.089181   0.067224 183.895529  -1.327 0.186276    
TC2                       0.168653   0.082002 187.090704   2.057 0.041103 *  
TC3                       0.147189   0.074112 183.898803   1.986 0.048517 *  
TC4                      -0.028796   0.068418 181.829303  -0.421 0.674336    
TC5                       0.020336   0.069829 183.999968   0.291 0.771211    
TC6                      -0.260159   0.065995 177.626201  -3.942 0.000116 ***
ageT1                     0.025934   0.017053 110.999998   1.521 0.131168    
gender                   -0.054520   0.115391 110.999999  -0.472 0.637509    
BPVS                      0.013606   0.005353 111.000002   2.542 0.012404 *  
BAS                       0.014315   0.005165 110.999999   2.771 0.006547 ** 
TimeT2:imd               -0.015599   0.030835 111.000001  -0.506 0.613929    
TimeT2:combined_pat_edu  -0.011282   0.040665 111.000001  -0.277 0.781968    
TimeT2:TC1                0.041814   0.070171 111.000001   0.596 0.552464    
TimeT2:TC2                0.005973   0.087403 111.000001   0.068 0.945639    
TimeT2:TC3               -0.003859   0.077363 111.000001  -0.050 0.960307    
TimeT2:TC4               -0.055884   0.070438 111.000001  -0.793 0.429249    
TimeT2:TC5               -0.068726   0.072940 111.000001  -0.942 0.348126    
TimeT2:TC6                0.167818   0.066007 111.000001   2.542 0.012387 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation matrix not shown by default, as p = 22 > 12.
Use print(x, correlation=TRUE)  or
    vcov(x)        if you need it
Code
# Compare Model Fit using AIC/BIC
AIC(lmm_1, lmm_2, lmm_3, lmm_4)
Warning in AIC.default(lmm_1, lmm_2, lmm_3, lmm_4): models are not all fitted
to the same number of observations
Code
BIC(lmm_1, lmm_2, lmm_3, lmm_4)
Warning in BIC.default(lmm_1, lmm_2, lmm_3, lmm_4): models are not all fitted
to the same number of observations
Code
# Compute confidence intervals for all models
ci_lmm_1 <- confint(lmm_1, method = "Wald")  # or "profile" for profiling CIs
ci_lmm_2 <- confint(lmm_2, method = "Wald")
ci_lmm_3 <- confint(lmm_3, method = "Wald")
ci_lmm_4 <- confint(lmm_4, method = "Wald")
ci_lmm_5 <- confint(lmm_5, method = "Wald")


# Print confidence intervals
ci_lmm_1
                               2.5 %     97.5 %
.sig01                            NA         NA
.sigma                            NA         NA
(Intercept)             -1.164072450 0.02306300
TimeT2                  -0.434069718 0.64427118
imd                      0.004817489 0.12139213
combined_pat_edu        -0.052972458 0.09077407
TimeT2:imd              -0.063398567 0.04249264
TimeT2:combined_pat_edu -0.070521062 0.06005187
Code
ci_lmm_2
                              2.5 %       97.5 %
.sig01                           NA           NA
.sigma                           NA           NA
(Intercept)             -4.45260256 -1.117066443
TimeT2                  -0.43406972  0.644271180
imd                     -0.00338875  0.110306307
combined_pat_edu        -0.03443452  0.105490366
ageT1                    0.02020693  0.086594662
gender                  -0.45765809  0.005350071
TimeT2:imd              -0.06339857  0.042492641
TimeT2:combined_pat_edu -0.07052106  0.060051871
Code
ci_lmm_3
                               2.5 %      97.5 %
.sig01                            NA          NA
.sigma                            NA          NA
(Intercept)             -5.317249526 -2.24426058
TimeT2                  -0.445976588  0.64873892
imd                     -0.049347845  0.06196635
combined_pat_edu        -0.053444146  0.07614323
ageT1                   -0.010492192  0.05185317
gender                  -0.370852242  0.04304695
BPVS                     0.004853367  0.02450759
BAS                      0.005058362  0.02557606
TimeT2:imd              -0.061956135  0.04741638
TimeT2:combined_pat_edu -0.075360004  0.05701463
Code
ci_lmm_4
                               2.5 %      97.5 %
.sig01                            NA          NA
.sigma                            NA          NA
(Intercept)             -5.566479504 -2.22213856
TimeT2                  -0.372466112  0.80805365
imd                     -0.048460963  0.06586241
combined_pat_edu        -0.090267807  0.05906729
ageT1                   -0.007490199  0.05935797
gender                  -0.280681817  0.17164144
BPVS                     0.003115018  0.02409712
BAS                      0.004191086  0.02443828
TC1                     -0.180662211  0.04411330
TC2                      0.035645184  0.30763429
TC3                      0.021356178  0.26916243
TC4                     -0.171702405  0.05822619
TC5                     -0.130739810  0.10268503
TC6                     -0.288260965 -0.06423902
TimeT2:imd              -0.069637675  0.04675143
TimeT2:combined_pat_edu -0.087920010  0.05160925
Code
ci_lmm_5
                               2.5 %      97.5 %
.sig01                            NA          NA
.sigma                            NA          NA
(Intercept)             -5.576256886 -2.21315006
TimeT2                  -0.470095744  0.90726104
imd                     -0.046961093  0.06851868
combined_pat_edu        -0.096151311  0.05807712
TC1                     -0.220938111  0.04257524
TC2                      0.007932881  0.32937363
TC3                      0.001931000  0.29244657
TC4                     -0.162891759  0.10529994
TC5                     -0.116526494  0.15719767
TC6                     -0.389506062 -0.13081190
ageT1                   -0.007490199  0.05935797
gender                  -0.280681817  0.17164144
BPVS                     0.003115018  0.02409712
BAS                      0.004191086  0.02443828
TimeT2:imd              -0.076033950  0.04483542
TimeT2:combined_pat_edu -0.090984519  0.06842112
TimeT2:TC1              -0.095718793  0.17934671
TimeT2:TC2              -0.165334013  0.17727994
TimeT2:TC3              -0.155487980  0.14777005
TimeT2:TC4              -0.193941022  0.08217222
TimeT2:TC5              -0.211686661  0.07423475
TimeT2:TC6               0.038447042  0.29718891
Code
library(modelsummary)

Attaching package: 'modelsummary'
The following object is masked from 'package:psych':

    SD
Code
model_list <- list(
  "Model 1" = lmm_1,
  "Model 2" = lmm_2,
  "Model 3" = lmm_3,
  "Model 4" = lmm_4
)

modelsummary(
  model_list,
  statistic = c("std.error", "conf.int"),
  stars = TRUE,
  output = "html"
)
Model 1 Model 2 Model 3 Model 4
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) −0.571+ −2.785** −3.781*** −3.894***
(0.303) (0.851) (0.784) (0.853)
[−1.167, 0.026] [−4.461, −1.109] [−5.325, −2.236] [−5.576, −2.212]
TimeT2 0.105 0.105 0.101 0.218
(0.275) (0.275) (0.279) (0.301)
[−0.437, 0.647] [−0.437, 0.647] [−0.449, 0.652] [−0.376, 0.812]
imd 0.063* 0.053+ 0.006 0.009
(0.030) (0.029) (0.028) (0.029)
[0.005, 0.122] [−0.004, 0.111] [−0.050, 0.062] [−0.049, 0.066]
combined_pat_edu 0.019 0.036 0.011 −0.016
(0.037) (0.036) (0.033) (0.038)
[−0.053, 0.091] [−0.035, 0.106] [−0.054, 0.076] [−0.091, 0.060]
TimeT2 × imd −0.010 −0.010 −0.007 −0.011
(0.027) (0.027) (0.028) (0.030)
[−0.064, 0.043] [−0.064, 0.043] [−0.062, 0.048] [−0.070, 0.047]
TimeT2 × combined_pat_edu −0.005 −0.005 −0.009 −0.018
(0.033) (0.033) (0.034) (0.036)
[−0.071, 0.060] [−0.071, 0.060] [−0.076, 0.057] [−0.088, 0.052]
ageT1 0.053** 0.021 0.026
(0.017) (0.016) (0.017)
[0.020, 0.087] [−0.011, 0.052] [−0.008, 0.060]
gender −0.226+ −0.164 −0.055
(0.118) (0.106) (0.115)
[−0.459, 0.007] [−0.372, 0.044] [−0.282, 0.173]
BPVS 0.015** 0.014*
(0.005) (0.005)
[0.005, 0.025] [0.003, 0.024]
BAS 0.015** 0.014**
(0.005) (0.005)
[0.005, 0.026] [0.004, 0.024]
TC1 −0.068
(0.057)
[−0.181, 0.045]
TC2 0.172*
(0.069)
[0.035, 0.308]
TC3 0.145*
(0.063)
[0.021, 0.270]
TC4 −0.057
(0.059)
[−0.172, 0.059]
TC5 −0.014
(0.060)
[−0.131, 0.103]
TC6 −0.176**
(0.057)
[−0.289, −0.064]
SD (Intercept ID) 0.591 0.553 0.452 0.389
SD (Observations) 0.495 0.495 0.498 0.493
Num.Obs. 250 250 244 222
R2 Marg. 0.033 0.103 0.247 0.299
R2 Cond. 0.601 0.601 0.587 0.568
AIC 570.4 571.5 551.1 520.1
BIC 598.6 606.7 593.1 581.3
ICC 0.6 0.6 0.5 0.4
RMSE 0.39 0.40 0.41 0.42

We conducted a sequential series of mixed-effects models to predict executive function (EF) across two time points. EF at each time point was modeled as a repeated measure nested within participants. Across all models, there was no significant main effect of time, suggesting that, on average, EF scores did not change substantially from T1 to T2. Sociodemographic variables (IMD, parental education) were not consistently associated with EF, and their interaction terms with time were non-significant, indicating little evidence of differential change by SES. Vocabulary (BPVS) and self-regulation (BAS) emerged as significant positive predictors of EF across time, even after adjusting for age and gender (Model 3 and 4).

In the final model, specific features of the home learning environment showed differential associations: TC2 and TC3 were positively associated with EF, while TC6 (potentially a less structured or overstimulating activity) was negatively associated. Model fit improved progressively across steps, with the full model (Model 4) providing the best fit to the data (ΔAIC = -100.8 from Model 1).

Checking for collinearity

                 Time                   imd      combined_pat_edu 
            20.746303              2.297141              2.622301 
                ageT1                gender                  BPVS 
             1.474493              1.350426              2.048013 
                  BAS                   TC1                   TC2 
             1.530029              1.443421              1.690488 
                  TC3                   TC4                   TC5 
             1.367579              1.341283              1.296603 
                  TC6              Time:imd Time:combined_pat_edu 
             1.457576             13.902384             12.143878 

Additional - Bi-variate correlations - HLE and EF

# A tibble: 6 × 3
  Variable Correlation p_value
  <chr>          <dbl>   <dbl>
1 TC1            0.05    0.578
2 TC2            0.11    0.22 
3 TC3            0.201   0.024
4 TC4            0.041   0.65 
5 TC5           -0.006   0.944
6 TC6           -0.04    0.66 
      TC1                 TC2                 TC3                TC4         
 Min.   :-3.143278   Min.   :-2.954707   Min.   :-1.80753   Min.   :-2.4591  
 1st Qu.:-0.594508   1st Qu.:-0.651689   1st Qu.:-0.46882   1st Qu.:-0.4736  
 Median : 0.014395   Median : 0.018919   Median : 0.07675   Median : 0.2777  
 Mean   : 0.005591   Mean   :-0.000754   Mean   : 0.18385   Mean   : 0.1389  
 3rd Qu.: 0.814902   3rd Qu.: 0.860155   3rd Qu.: 0.84347   3rd Qu.: 0.8114  
 Max.   : 2.553194   Max.   : 1.375402   Max.   : 2.55887   Max.   : 2.0921  
 NA's   :45          NA's   :45          NA's   :45         NA's   :45       
      TC5                TC6          
 Min.   :-2.54617   Min.   :-2.10938  
 1st Qu.:-0.56102   1st Qu.:-0.82219  
 Median : 0.12227   Median :-0.04232  
 Mean   :-0.02383   Mean   : 0.01264  
 3rd Qu.: 0.64146   3rd Qu.: 0.83072  
 Max.   : 1.97381   Max.   : 2.80200  
 NA's   :45         NA's   :45        

Visualisations of the distribution of HLE factor scores

Scatterplots to examine the EF_T2 v HLE factors

[1] "TC1" "TC2" "TC3" "TC4" "TC5" "TC6"