Galton Data as Multi-Group Regression (By Sex) (Figure 14.2)

Load Galton Data

<a https://docs.google.com/spreadsheets/d/1K7ZR3wd_Yer8CdDwT2Db6wCmfPKeIARC0Q1HhI8_hmg/edit?usp=sharing

Plot shows regression lines, confidence interval for regression line and 50% confidence ellipse to communicate variation and slope

R Program for Scatterplot (Figure 14.2)

# Load necessary libraries
library(readxl)
library(ggplot2)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

# Read the data
Galton <- read_excel("Galton.xlsx")

# Create scatterplot with regression lines and 75% confidence ellipses
ggplot(Galton, aes(x = midparentHeight, y = childHeight, color = gender)) +
  geom_point(alpha = 0.7) +
  geom_smooth(method = "lm", se = TRUE) +
  stat_ellipse(type = "norm", level = 0.75, linetype = "dashed", linewidth = 1) +
  labs(
    title = "Figure 14.2: Regression of Child Height on Midparent Height by Gender",
    x = "Midparent Height (inches)",
    y = "Child Height (inches)"
  ) +
  scale_color_manual(values = c("male" = "red", "female" = "blue")) +
  theme_minimal(base_size = 14)

## `geom_smooth()` using formula = 'y ~ x'

Multi-Group Two Predictor Regression Model (Figure 14.3)

Path Diagram

Two-Group Loading and Intercept Invariance

R Program

Fig4.3Model <- '
  alcf3mw1 ~ c(b1m,b1m)*rridcw1 + c(b2m,b2m)*rridqw1
  zFun =~ c(sdcm,sdcf)*rridcw1+NA*rridcw1
  zFeel =~ c(sdqm,sdqf)*rridqw1+NA*rridqw1
  rridcw1~~0*rridcw1
  rridqw1~~0*rridqw1
  rridcw1~~0*rridqw1
  zFun~~1*zFun
  zFeel~~1*zFeel
  zFun~~c(Corrm,Corrm)*zFeel
  alcf3mw1~c(Im,If)*1
'
summary(Fig4.3Model, standardized=TRUE, fit.measures=TRUE)

##    Length     Class      Mode 
##         1 character character

#plot_lavaan(Fig4.3Model)

Multi-Group Random Intercept Model

Load Simulated Reasons for Drinking Data

<a https://docs.google.com/document/d/1aRUQ1A_f3IZXc2WpSjjWWRyZfnJnAkvFgTXCg6YnBhs/edit?usp=sharing

Path Diagram for Two-Group Model (Figure 14.4)

Two-Group Loading and Intercept Invariance

Two Group Invariance Model with Loading and Intercept Invariance

RFD <- read.table("SIMMg.txt", 
                 sep = "\t", 
                 header = TRUE, 
                 stringsAsFactors = FALSE)

Fig14.4Model <- '
  F =~ c(NA,NA)*rridaw1+  c("LAM","LAM")*rridaw1+  c("LBM","LBM")*rridbw1+ c("LCM","LCM")*rridcw1+ c("LDM","LDM")*rriddw1+ c("LEM","LEM")*rridew1
  RI=~ 1*rridaw1+  1*rridbw1+ 1*rridcw1+ 1*rriddw1+ 1*rridew1
  F~~c(NA,1)*F
  RI~~c("RIM","RIF")*RI
  F~~c(NA,0)*RI
  rridaw1~c(ai,ai)*1
  rridbw1~c(bi,bi)*1
  rridcw1~c(ci,ci)*1
  rriddw1~c(di,di)*1
  rridew1~c(ei,ei)*1
  F~c(NA,0)*1
  RI~c(NA,0)*1
  '
require(lavaan)

## Loading required package: lavaan

## This is lavaan 0.6-19
## lavaan is FREE software! Please report any bugs.

Fig14.4Result <- sem(Fig14.4Model, data = RFD, group = "sexw0", fixed.x = TRUE)
summary(Fig14.4Result,fit.measures=TRUE, standardized=TRUE)

## lavaan 0.6-19 ended normally after 50 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        36
##   Number of equality constraints                    10
## 
##   Number of observations per group:                   
##     0                                              881
##     1                                             1296
## 
## Model Test User Model:
##                                                       
##   Test statistic                               267.284
##   Degrees of freedom                                14
##   P-value (Chi-square)                           0.000
##   Test statistic for each group:
##     0                                          109.500
##     1                                          157.784
## 
## Model Test Baseline Model:
## 
##   Test statistic                              8568.268
##   Degrees of freedom                                20
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.970
##   Tucker-Lewis Index (TLI)                       0.958
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -9694.973
##   Loglikelihood unrestricted model (H1)      -9561.331
##                                                       
##   Akaike (AIC)                               19441.945
##   Bayesian (BIC)                             19589.773
##   Sample-size adjusted Bayesian (SABIC)      19507.168
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.129
##   90 Percent confidence interval - lower         0.116
##   90 Percent confidence interval - upper         0.143
##   P-value H_0: RMSEA <= 0.050                    0.000
##   P-value H_0: RMSEA >= 0.080                    1.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.036
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## 
## Group 1 [0]:
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   F =~                                                                  
##     rridaw1  (LAM)    0.543    0.045   12.093    0.000    0.428    0.496
##     rridbw1  (LBM)    0.418    0.049    8.482    0.000    0.330    0.394
##     rridcw1  (LCM)    0.681    0.040   16.901    0.000    0.537    0.631
##     rriddw1  (LDM)    0.717    0.039   18.451    0.000    0.566    0.651
##     rridew1  (LEM)    0.346    0.053    6.538    0.000    0.273    0.330
##   RI =~                                                                 
##     rridaw1           1.000                               0.471    0.546
##     rridbw1           1.000                               0.471    0.562
##     rridcw1           1.000                               0.471    0.553
##     rriddw1           1.000                               0.471    0.542
##     rridew1           1.000                               0.471    0.570
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   F ~~                                                                  
##     RI                0.081    0.088    0.927    0.354    0.218    0.218
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .rridaw1   (ai)    2.923    0.023  125.666    0.000    2.923    3.387
##    .rridbw1   (bi)    2.965    0.022  134.742    0.000    2.965    3.536
##    .rridcw1   (ci)    3.045    0.024  125.976    0.000    3.045    3.576
##    .rriddw1   (di)    2.975    0.025  118.069    0.000    2.975    3.426
##    .rridew1   (ei)    3.107    0.022  139.719    0.000    3.107    3.758
##     F                 0.155    0.072    2.135    0.033    0.196    0.196
##     RI               -0.011    0.045   -0.239    0.811   -0.023   -0.023
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     F                 0.622    0.161    3.852    0.000    1.000    1.000
##     RI       (RIM)    0.222    0.056    3.938    0.000    1.000    1.000
##    .rridaw1           0.251    0.014   18.496    0.000    0.251    0.337
##    .rridbw1           0.304    0.017   17.936    0.000    0.304    0.433
##    .rridcw1           0.104    0.008   12.640    0.000    0.104    0.143
##    .rriddw1           0.096    0.009   10.951    0.000    0.096    0.127
##    .rridew1           0.331    0.020   16.394    0.000    0.331    0.484
## 
## 
## Group 2 [1]:
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   F =~                                                                  
##     rridaw1  (LAM)    0.543    0.045   12.093    0.000    0.543    0.611
##     rridbw1  (LBM)    0.418    0.049    8.482    0.000    0.418    0.500
##     rridcw1  (LCM)    0.681    0.040   16.901    0.000    0.681    0.776
##     rriddw1  (LDM)    0.717    0.039   18.451    0.000    0.717    0.783
##     rridew1  (LEM)    0.346    0.053    6.538    0.000    0.346    0.409
##   RI =~                                                                 
##     rridaw1           1.000                               0.483    0.543
##     rridbw1           1.000                               0.483    0.578
##     rridcw1           1.000                               0.483    0.550
##     rriddw1           1.000                               0.483    0.528
##     rridew1           1.000                               0.483    0.571
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   F ~~                                                                  
##     RI                0.000                               0.000    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .rridaw1   (ai)    2.923    0.023  125.666    0.000    2.923    3.286
##    .rridbw1   (bi)    2.965    0.022  134.742    0.000    2.965    3.544
##    .rridcw1   (ci)    3.045    0.024  125.976    0.000    3.045    3.469
##    .rriddw1   (di)    2.975    0.025  118.069    0.000    2.975    3.248
##    .rridew1   (ei)    3.107    0.022  139.719    0.000    3.107    3.669
##     F                 0.000                               0.000    0.000
##     RI                0.000                               0.000    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     F                 1.000                               1.000    1.000
##     RI       (RIF)    0.233    0.046    5.030    0.000    1.000    1.000
##    .rridaw1           0.263    0.011   22.859    0.000    0.263    0.332
##    .rridbw1           0.291    0.014   21.270    0.000    0.291    0.416
##    .rridcw1           0.073    0.006   11.365    0.000    0.073    0.095
##    .rriddw1           0.091    0.008   11.990    0.000    0.091    0.108
##    .rridew1           0.364    0.018   20.062    0.000    0.364    0.507

#plot_lavaan(Fig14.4Result)

Weak Invariance Latent Basis Model BSI data (Figure 14.5)

Diagram

Two-Group Latent Basis Model

Program Code

SIMBSIMg <- read.table("SIMBSIMg.txt", 
                 sep = "\t", 
                 header = TRUE, 
                 stringsAsFactors = FALSE)

Fig14.5Model <- '
  # General factor loadings (first fixed to identify scale)
  g =~ c(ml1,ml1)*BSIW1 + + c(NA,NA)*BSIW1+c(ml2,ml2)*BSIW2 + c(ml3,ml3)*BSIW3 + c(ml4,ml4)*BSIW4 + 
       c(ml5,ml5)*BSIW5 + c(ml6,ml6)*BSIW6 + c(ml7,ml7)*BSIW7 + c(ml8,ml8)*BSIW8

  # Random intercept factor loadings fixed at 1
  ri =~ 1*BSIW1 + 1*BSIW2 + 1*BSIW3 + 1*BSIW4 + 
        1*BSIW5 + 1*BSIW6 + 1*BSIW7 + 1*BSIW8

  # Factor covariances
  g ~~ c(0,NA)*ri    # covariance between g and ri (set to 0 in your original)
  

  # Factor variances
  g ~~ c(1,NA)*g
  
  ri ~~ c(mvri,fvri)*ri

  # Factor means
  g ~ c(NA,NA)*1
  ri ~ c(NA,NA)*1

  # Item intercepts
  BSIW1 ~ c(0,0)*1
  BSIW2 ~ c(0,0)*1
  BSIW3 ~ c(0,0)*1
  BSIW4 ~ c(0,0)*1
  BSIW5 ~ c(0,0)*1
  BSIW6 ~ c(0,0)*1
  BSIW7 ~ c(0,0)*1
  BSIW8 ~ c(0,0)*1

  # Residual variances
  BSIW1 ~~ c(me1,fe1)*BSIW1
  BSIW2 ~~ c(me2,fe2)*BSIW2
  BSIW3 ~~ c(me3,fe3)*BSIW3
  BSIW4 ~~ c(me4,fe4)*BSIW4
  BSIW5 ~~ c(me5,fe5)*BSIW5
  BSIW6 ~~ c(me6,fe6)*BSIW6
  BSIW7 ~~ c(me7,fe7)*BSIW7
  BSIW8 ~~ c(me8,fe8)*BSIW8
'


Fig14.5Result <- sem(Fig14.5Model, data = SIMBSIMg, group = "sex", fixed.x = TRUE)
summary(Fig14.5Result,fit.measures=TRUE, standardized=TRUE)

## lavaan 0.6-19 ended normally after 150 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        40
##   Number of equality constraints                     8
## 
##   Number of observations per group:                   
##     0                                             1724
##     1                                             1991
## 
## Model Test User Model:
##                                                       
##   Test statistic                               451.725
##   Degrees of freedom                                56
##   P-value (Chi-square)                           0.000
##   Test statistic for each group:
##     0                                          293.230
##     1                                          158.495
## 
## Model Test Baseline Model:
## 
##   Test statistic                             13234.152
##   Degrees of freedom                                56
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.970
##   Tucker-Lewis Index (TLI)                       0.970
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -18112.042
##   Loglikelihood unrestricted model (H1)     -17886.179
##                                                       
##   Akaike (AIC)                               36288.083
##   Bayesian (BIC)                             36487.128
##   Sample-size adjusted Bayesian (SABIC)      36385.447
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.062
##   90 Percent confidence interval - lower         0.056
##   90 Percent confidence interval - upper         0.067
##   P-value H_0: RMSEA <= 0.050                    0.000
##   P-value H_0: RMSEA >= 0.080                    0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.043
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## 
## Group 1 [0]:
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   g =~                                                                  
##     BSIW1    (ml1)   -0.146    0.020   -7.313    0.000   -0.146   -0.233
##     BSIW2    (ml2)   -0.130    0.018   -7.038    0.000   -0.130   -0.259
##     BSIW3    (ml3)    0.002    0.017    0.128    0.898    0.002    0.004
##     BSIW4    (ml4)    0.038    0.016    2.317    0.020    0.038    0.072
##     BSIW5    (ml5)    0.141    0.017    8.499    0.000    0.141    0.257
##     BSIW6    (ml6)    0.179    0.017   10.693    0.000    0.179    0.326
##     BSIW7    (ml7)    0.228    0.018   12.504    0.000    0.228    0.371
##     BSIW8    (ml8)    0.162    0.017    9.679    0.000    0.162    0.297
##   ri =~                                                                 
##     BSIW1             1.000                               0.368    0.589
##     BSIW2             1.000                               0.368    0.737
##     BSIW3             1.000                               0.368    0.690
##     BSIW4             1.000                               0.368    0.704
##     BSIW5             1.000                               0.368    0.673
##     BSIW6             1.000                               0.368    0.672
##     BSIW7             1.000                               0.368    0.599
##     BSIW8             1.000                               0.368    0.677
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   g ~~                                                                  
##     ri                0.000                               0.000    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     g                -0.078    0.035   -2.243    0.025   -0.078   -0.078
##     ri                0.438    0.010   45.671    0.000    1.190    1.190
##    .BSIW1             0.000                               0.000    0.000
##    .BSIW2             0.000                               0.000    0.000
##    .BSIW3             0.000                               0.000    0.000
##    .BSIW4             0.000                               0.000    0.000
##    .BSIW5             0.000                               0.000    0.000
##    .BSIW6             0.000                               0.000    0.000
##    .BSIW7             0.000                               0.000    0.000
##    .BSIW8             0.000                               0.000    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     g                 1.000                               1.000    1.000
##     ri      (mvri)    0.136    0.005   25.279    0.000    1.000    1.000
##    .BSIW1    (me1)    0.234    0.010   22.851    0.000    0.234    0.599
##    .BSIW2    (me2)    0.097    0.006   15.617    0.000    0.097    0.389
##    .BSIW3    (me3)    0.149    0.006   25.840    0.000    0.149    0.524
##    .BSIW4    (me4)    0.136    0.005   25.883    0.000    0.136    0.499
##    .BSIW5    (me5)    0.144    0.006   24.685    0.000    0.144    0.480
##    .BSIW6    (me6)    0.133    0.006   22.766    0.000    0.133    0.442
##    .BSIW7    (me7)    0.191    0.008   22.875    0.000    0.191    0.504
##    .BSIW8    (me8)    0.134    0.006   23.504    0.000    0.134    0.453
## 
## 
## Group 2 [1]:
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   g =~                                                                  
##     BSIW1    (ml1)   -0.146    0.020   -7.313    0.000   -0.121   -0.199
##     BSIW2    (ml2)   -0.130    0.018   -7.038    0.000   -0.107   -0.191
##     BSIW3    (ml3)    0.002    0.017    0.128    0.898    0.002    0.003
##     BSIW4    (ml4)    0.038    0.016    2.317    0.020    0.031    0.057
##     BSIW5    (ml5)    0.141    0.017    8.499    0.000    0.117    0.217
##     BSIW6    (ml6)    0.179    0.017   10.693    0.000    0.148    0.285
##     BSIW7    (ml7)    0.228    0.018   12.504    0.000    0.189    0.345
##     BSIW8    (ml8)    0.162    0.017    9.679    0.000    0.134    0.242
##   ri =~                                                                 
##     BSIW1             1.000                               0.408    0.672
##     BSIW2             1.000                               0.408    0.726
##     BSIW3             1.000                               0.408    0.747
##     BSIW4             1.000                               0.408    0.750
##     BSIW5             1.000                               0.408    0.759
##     BSIW6             1.000                               0.408    0.784
##     BSIW7             1.000                               0.408    0.745
##     BSIW8             1.000                               0.408    0.735
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   g ~~                                                                  
##     ri               -0.058    0.017   -3.483    0.000   -0.170   -0.170
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     g                -0.249    0.030   -8.255    0.000   -0.300   -0.300
##     ri                0.514    0.010   49.899    0.000    1.261    1.261
##    .BSIW1             0.000                               0.000    0.000
##    .BSIW2             0.000                               0.000    0.000
##    .BSIW3             0.000                               0.000    0.000
##    .BSIW4             0.000                               0.000    0.000
##    .BSIW5             0.000                               0.000    0.000
##    .BSIW6             0.000                               0.000    0.000
##    .BSIW7             0.000                               0.000    0.000
##    .BSIW8             0.000                               0.000    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     g                 0.687    0.075    9.145    0.000    1.000    1.000
##     ri      (fvri)    0.166    0.006   27.026    0.000    1.000    1.000
##    .BSIW1    (fe1)    0.170    0.008   22.515    0.000    0.170    0.463
##    .BSIW2    (fe2)    0.123    0.006   20.291    0.000    0.123    0.389
##    .BSIW3    (fe3)    0.132    0.005   27.293    0.000    0.132    0.443
##    .BSIW4    (fe4)    0.133    0.005   27.875    0.000    0.133    0.449
##    .BSIW5    (fe5)    0.125    0.005   26.967    0.000    0.125    0.433
##    .BSIW6    (fe6)    0.103    0.004   24.482    0.000    0.103    0.380
##    .BSIW7    (fe7)    0.124    0.005   23.305    0.000    0.124    0.414
##    .BSIW8    (fe8)    0.142    0.005   27.046    0.000    0.142    0.462

#plot_lavaan(Fig14.5Result)

Percentile Plot of Predicted Longitudinal BSI Scores (Figure 14.6)

library(lavaan)
library(dplyr)
library(tidyr)
library(ggplot2)

## 0) Build predictions per group, using case indices to align rows
pred_list <- lavPredict(Fig14.5Result, type = "ov")                      # list of matrices
case_idx  <- lavInspect(Fig14.5Result, "case.idx")                       # row indices per group
group_lab <- unique(SIMBSIMg$sex)                                        # labels as in data

pred_df <- bind_rows(lapply(seq_along(pred_list), function(g){
  out <- as.data.frame(pred_list[[g]])
  out$sex <- SIMBSIMg$sex[case_idx[[g]]]                                  # exact rows
  out
}))

## 1) Recode sex -> 0/1 (edit mapping to match your coding)
pred_df <- pred_df %>%
  mutate(
    sex01 = case_when(
      sex %in% c("Male","male","M", 1, "1")   ~ 1,   # your requested coding
      sex %in% c("Female","female","F", 0, "0") ~ 0,
      TRUE ~ NA_real_
    )
  )

## 2) Long format (create pred_long here, then mutate its wave order)
pred_long <- pred_df %>%
  pivot_longer(
    cols = dplyr::starts_with("BSIW"),
    names_to = "wave",
    values_to = "pred_score"
  ) %>%
  mutate(
    wave = factor(wave, levels = paste0("BSIW", 1:8)),
    wave_num = as.integer(gsub("BSIW","", wave))
  )

## 3) Percentile bins within sex×wave
pred_binned <- pred_long %>%
  group_by(sex01, wave, wave_num) %>%
  mutate(
    pct_group = cut(
      pred_score,
      breaks = quantile(pred_score, probs = c(0, .10, .25, .50, .75, .90, 1), na.rm = TRUE),
      include.lowest = TRUE,
      right = TRUE,
      labels = c("0–10","10–25","25–50","50–75","75–90","90–100")
    )
  ) %>%
  ungroup()

# ...
pct_summary <- pred_binned %>%
  group_by(sex01, wave, wave_num, pct_group) %>%
  summarise(
    n    = sum(!is.na(pred_score)),
    mean = mean(pred_score, na.rm = TRUE),
    sd   = sd(pred_score,   na.rm = TRUE),
    .groups = "drop"
  )

# Now plot
ggplot2::ggplot(pct_summary,
       aes(x = wave_num, y = mean, color = pct_group, group = pct_group)) +
  geom_line(linewidth = 1) +
  geom_point(size = 1.5) +
  geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd), width = 0.15) +
  facet_wrap(~ sex01, labeller = label_both) +
  scale_x_continuous(breaks = 1:8, labels = paste0("BSIW", 1:8)) +
  labs(
    x = "Wave",
    y = "Predicted BSI Score",
    color = "Percentile group",
    title = "Figure 14.6 Predicted BSI by Wave and Sex (0/1) with SDs per Percentile Group",
    subtitle = "Lines/points = group means; error bars = +/- 1 SD within percentile group"
  ) +
  theme_minimal(base_size = 9)

Chapter 14:

Phil Wood

2025-08-01