<a https://docs.google.com/spreadsheets/d/1K7ZR3wd_Yer8CdDwT2Db6wCmfPKeIARC0Q1HhI8_hmg/edit?usp=sharing
Plot shows regression lines, confidence interval for regression line and 50% confidence ellipse to communicate variation and slope
# Load necessary libraries
library(readxl)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Read the data
Galton <- read_excel("Galton.xlsx")
# Create scatterplot with regression lines and 75% confidence ellipses
ggplot(Galton, aes(x = midparentHeight, y = childHeight, color = gender)) +
geom_point(alpha = 0.7) +
geom_smooth(method = "lm", se = TRUE) +
stat_ellipse(type = "norm", level = 0.75, linetype = "dashed", linewidth = 1) +
labs(
title = "Figure 14.2: Regression of Child Height on Midparent Height by Gender",
x = "Midparent Height (inches)",
y = "Child Height (inches)"
) +
scale_color_manual(values = c("male" = "red", "female" = "blue")) +
theme_minimal(base_size = 14)
## `geom_smooth()` using formula = 'y ~ x'
Fig4.3Model <- '
alcf3mw1 ~ c(b1m,b1m)*rridcw1 + c(b2m,b2m)*rridqw1
zFun =~ c(sdcm,sdcf)*rridcw1+NA*rridcw1
zFeel =~ c(sdqm,sdqf)*rridqw1+NA*rridqw1
rridcw1~~0*rridcw1
rridqw1~~0*rridqw1
rridcw1~~0*rridqw1
zFun~~1*zFun
zFeel~~1*zFeel
zFun~~c(Corrm,Corrm)*zFeel
alcf3mw1~c(Im,If)*1
'
summary(Fig4.3Model, standardized=TRUE, fit.measures=TRUE)
## Length Class Mode
## 1 character character
#plot_lavaan(Fig4.3Model)
<a https://docs.google.com/document/d/1aRUQ1A_f3IZXc2WpSjjWWRyZfnJnAkvFgTXCg6YnBhs/edit?usp=sharing
RFD <- read.table("SIMMg.txt",
sep = "\t",
header = TRUE,
stringsAsFactors = FALSE)
Fig14.4Model <- '
F =~ c(NA,NA)*rridaw1+ c("LAM","LAM")*rridaw1+ c("LBM","LBM")*rridbw1+ c("LCM","LCM")*rridcw1+ c("LDM","LDM")*rriddw1+ c("LEM","LEM")*rridew1
RI=~ 1*rridaw1+ 1*rridbw1+ 1*rridcw1+ 1*rriddw1+ 1*rridew1
F~~c(NA,1)*F
RI~~c("RIM","RIF")*RI
F~~c(NA,0)*RI
rridaw1~c(ai,ai)*1
rridbw1~c(bi,bi)*1
rridcw1~c(ci,ci)*1
rriddw1~c(di,di)*1
rridew1~c(ei,ei)*1
F~c(NA,0)*1
RI~c(NA,0)*1
'
require(lavaan)
## Loading required package: lavaan
## This is lavaan 0.6-19
## lavaan is FREE software! Please report any bugs.
Fig14.4Result <- sem(Fig14.4Model, data = RFD, group = "sexw0", fixed.x = TRUE)
summary(Fig14.4Result,fit.measures=TRUE, standardized=TRUE)
## lavaan 0.6-19 ended normally after 50 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 36
## Number of equality constraints 10
##
## Number of observations per group:
## 0 881
## 1 1296
##
## Model Test User Model:
##
## Test statistic 267.284
## Degrees of freedom 14
## P-value (Chi-square) 0.000
## Test statistic for each group:
## 0 109.500
## 1 157.784
##
## Model Test Baseline Model:
##
## Test statistic 8568.268
## Degrees of freedom 20
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.970
## Tucker-Lewis Index (TLI) 0.958
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -9694.973
## Loglikelihood unrestricted model (H1) -9561.331
##
## Akaike (AIC) 19441.945
## Bayesian (BIC) 19589.773
## Sample-size adjusted Bayesian (SABIC) 19507.168
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.129
## 90 Percent confidence interval - lower 0.116
## 90 Percent confidence interval - upper 0.143
## P-value H_0: RMSEA <= 0.050 0.000
## P-value H_0: RMSEA >= 0.080 1.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.036
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
##
## Group 1 [0]:
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## F =~
## rridaw1 (LAM) 0.543 0.045 12.093 0.000 0.428 0.496
## rridbw1 (LBM) 0.418 0.049 8.482 0.000 0.330 0.394
## rridcw1 (LCM) 0.681 0.040 16.901 0.000 0.537 0.631
## rriddw1 (LDM) 0.717 0.039 18.451 0.000 0.566 0.651
## rridew1 (LEM) 0.346 0.053 6.538 0.000 0.273 0.330
## RI =~
## rridaw1 1.000 0.471 0.546
## rridbw1 1.000 0.471 0.562
## rridcw1 1.000 0.471 0.553
## rriddw1 1.000 0.471 0.542
## rridew1 1.000 0.471 0.570
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## F ~~
## RI 0.081 0.088 0.927 0.354 0.218 0.218
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .rridaw1 (ai) 2.923 0.023 125.666 0.000 2.923 3.387
## .rridbw1 (bi) 2.965 0.022 134.742 0.000 2.965 3.536
## .rridcw1 (ci) 3.045 0.024 125.976 0.000 3.045 3.576
## .rriddw1 (di) 2.975 0.025 118.069 0.000 2.975 3.426
## .rridew1 (ei) 3.107 0.022 139.719 0.000 3.107 3.758
## F 0.155 0.072 2.135 0.033 0.196 0.196
## RI -0.011 0.045 -0.239 0.811 -0.023 -0.023
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## F 0.622 0.161 3.852 0.000 1.000 1.000
## RI (RIM) 0.222 0.056 3.938 0.000 1.000 1.000
## .rridaw1 0.251 0.014 18.496 0.000 0.251 0.337
## .rridbw1 0.304 0.017 17.936 0.000 0.304 0.433
## .rridcw1 0.104 0.008 12.640 0.000 0.104 0.143
## .rriddw1 0.096 0.009 10.951 0.000 0.096 0.127
## .rridew1 0.331 0.020 16.394 0.000 0.331 0.484
##
##
## Group 2 [1]:
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## F =~
## rridaw1 (LAM) 0.543 0.045 12.093 0.000 0.543 0.611
## rridbw1 (LBM) 0.418 0.049 8.482 0.000 0.418 0.500
## rridcw1 (LCM) 0.681 0.040 16.901 0.000 0.681 0.776
## rriddw1 (LDM) 0.717 0.039 18.451 0.000 0.717 0.783
## rridew1 (LEM) 0.346 0.053 6.538 0.000 0.346 0.409
## RI =~
## rridaw1 1.000 0.483 0.543
## rridbw1 1.000 0.483 0.578
## rridcw1 1.000 0.483 0.550
## rriddw1 1.000 0.483 0.528
## rridew1 1.000 0.483 0.571
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## F ~~
## RI 0.000 0.000 0.000
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .rridaw1 (ai) 2.923 0.023 125.666 0.000 2.923 3.286
## .rridbw1 (bi) 2.965 0.022 134.742 0.000 2.965 3.544
## .rridcw1 (ci) 3.045 0.024 125.976 0.000 3.045 3.469
## .rriddw1 (di) 2.975 0.025 118.069 0.000 2.975 3.248
## .rridew1 (ei) 3.107 0.022 139.719 0.000 3.107 3.669
## F 0.000 0.000 0.000
## RI 0.000 0.000 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## F 1.000 1.000 1.000
## RI (RIF) 0.233 0.046 5.030 0.000 1.000 1.000
## .rridaw1 0.263 0.011 22.859 0.000 0.263 0.332
## .rridbw1 0.291 0.014 21.270 0.000 0.291 0.416
## .rridcw1 0.073 0.006 11.365 0.000 0.073 0.095
## .rriddw1 0.091 0.008 11.990 0.000 0.091 0.108
## .rridew1 0.364 0.018 20.062 0.000 0.364 0.507
#plot_lavaan(Fig14.4Result)
SIMBSIMg <- read.table("SIMBSIMg.txt",
sep = "\t",
header = TRUE,
stringsAsFactors = FALSE)
Fig14.5Model <- '
# General factor loadings (first fixed to identify scale)
g =~ c(ml1,ml1)*BSIW1 + + c(NA,NA)*BSIW1+c(ml2,ml2)*BSIW2 + c(ml3,ml3)*BSIW3 + c(ml4,ml4)*BSIW4 +
c(ml5,ml5)*BSIW5 + c(ml6,ml6)*BSIW6 + c(ml7,ml7)*BSIW7 + c(ml8,ml8)*BSIW8
# Random intercept factor loadings fixed at 1
ri =~ 1*BSIW1 + 1*BSIW2 + 1*BSIW3 + 1*BSIW4 +
1*BSIW5 + 1*BSIW6 + 1*BSIW7 + 1*BSIW8
# Factor covariances
g ~~ c(0,NA)*ri # covariance between g and ri (set to 0 in your original)
# Factor variances
g ~~ c(1,NA)*g
ri ~~ c(mvri,fvri)*ri
# Factor means
g ~ c(NA,NA)*1
ri ~ c(NA,NA)*1
# Item intercepts
BSIW1 ~ c(0,0)*1
BSIW2 ~ c(0,0)*1
BSIW3 ~ c(0,0)*1
BSIW4 ~ c(0,0)*1
BSIW5 ~ c(0,0)*1
BSIW6 ~ c(0,0)*1
BSIW7 ~ c(0,0)*1
BSIW8 ~ c(0,0)*1
# Residual variances
BSIW1 ~~ c(me1,fe1)*BSIW1
BSIW2 ~~ c(me2,fe2)*BSIW2
BSIW3 ~~ c(me3,fe3)*BSIW3
BSIW4 ~~ c(me4,fe4)*BSIW4
BSIW5 ~~ c(me5,fe5)*BSIW5
BSIW6 ~~ c(me6,fe6)*BSIW6
BSIW7 ~~ c(me7,fe7)*BSIW7
BSIW8 ~~ c(me8,fe8)*BSIW8
'
Fig14.5Result <- sem(Fig14.5Model, data = SIMBSIMg, group = "sex", fixed.x = TRUE)
summary(Fig14.5Result,fit.measures=TRUE, standardized=TRUE)
## lavaan 0.6-19 ended normally after 150 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 40
## Number of equality constraints 8
##
## Number of observations per group:
## 0 1724
## 1 1991
##
## Model Test User Model:
##
## Test statistic 451.725
## Degrees of freedom 56
## P-value (Chi-square) 0.000
## Test statistic for each group:
## 0 293.230
## 1 158.495
##
## Model Test Baseline Model:
##
## Test statistic 13234.152
## Degrees of freedom 56
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.970
## Tucker-Lewis Index (TLI) 0.970
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -18112.042
## Loglikelihood unrestricted model (H1) -17886.179
##
## Akaike (AIC) 36288.083
## Bayesian (BIC) 36487.128
## Sample-size adjusted Bayesian (SABIC) 36385.447
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.062
## 90 Percent confidence interval - lower 0.056
## 90 Percent confidence interval - upper 0.067
## P-value H_0: RMSEA <= 0.050 0.000
## P-value H_0: RMSEA >= 0.080 0.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.043
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
##
## Group 1 [0]:
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## g =~
## BSIW1 (ml1) -0.146 0.020 -7.313 0.000 -0.146 -0.233
## BSIW2 (ml2) -0.130 0.018 -7.038 0.000 -0.130 -0.259
## BSIW3 (ml3) 0.002 0.017 0.128 0.898 0.002 0.004
## BSIW4 (ml4) 0.038 0.016 2.317 0.020 0.038 0.072
## BSIW5 (ml5) 0.141 0.017 8.499 0.000 0.141 0.257
## BSIW6 (ml6) 0.179 0.017 10.693 0.000 0.179 0.326
## BSIW7 (ml7) 0.228 0.018 12.504 0.000 0.228 0.371
## BSIW8 (ml8) 0.162 0.017 9.679 0.000 0.162 0.297
## ri =~
## BSIW1 1.000 0.368 0.589
## BSIW2 1.000 0.368 0.737
## BSIW3 1.000 0.368 0.690
## BSIW4 1.000 0.368 0.704
## BSIW5 1.000 0.368 0.673
## BSIW6 1.000 0.368 0.672
## BSIW7 1.000 0.368 0.599
## BSIW8 1.000 0.368 0.677
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## g ~~
## ri 0.000 0.000 0.000
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## g -0.078 0.035 -2.243 0.025 -0.078 -0.078
## ri 0.438 0.010 45.671 0.000 1.190 1.190
## .BSIW1 0.000 0.000 0.000
## .BSIW2 0.000 0.000 0.000
## .BSIW3 0.000 0.000 0.000
## .BSIW4 0.000 0.000 0.000
## .BSIW5 0.000 0.000 0.000
## .BSIW6 0.000 0.000 0.000
## .BSIW7 0.000 0.000 0.000
## .BSIW8 0.000 0.000 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## g 1.000 1.000 1.000
## ri (mvri) 0.136 0.005 25.279 0.000 1.000 1.000
## .BSIW1 (me1) 0.234 0.010 22.851 0.000 0.234 0.599
## .BSIW2 (me2) 0.097 0.006 15.617 0.000 0.097 0.389
## .BSIW3 (me3) 0.149 0.006 25.840 0.000 0.149 0.524
## .BSIW4 (me4) 0.136 0.005 25.883 0.000 0.136 0.499
## .BSIW5 (me5) 0.144 0.006 24.685 0.000 0.144 0.480
## .BSIW6 (me6) 0.133 0.006 22.766 0.000 0.133 0.442
## .BSIW7 (me7) 0.191 0.008 22.875 0.000 0.191 0.504
## .BSIW8 (me8) 0.134 0.006 23.504 0.000 0.134 0.453
##
##
## Group 2 [1]:
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## g =~
## BSIW1 (ml1) -0.146 0.020 -7.313 0.000 -0.121 -0.199
## BSIW2 (ml2) -0.130 0.018 -7.038 0.000 -0.107 -0.191
## BSIW3 (ml3) 0.002 0.017 0.128 0.898 0.002 0.003
## BSIW4 (ml4) 0.038 0.016 2.317 0.020 0.031 0.057
## BSIW5 (ml5) 0.141 0.017 8.499 0.000 0.117 0.217
## BSIW6 (ml6) 0.179 0.017 10.693 0.000 0.148 0.285
## BSIW7 (ml7) 0.228 0.018 12.504 0.000 0.189 0.345
## BSIW8 (ml8) 0.162 0.017 9.679 0.000 0.134 0.242
## ri =~
## BSIW1 1.000 0.408 0.672
## BSIW2 1.000 0.408 0.726
## BSIW3 1.000 0.408 0.747
## BSIW4 1.000 0.408 0.750
## BSIW5 1.000 0.408 0.759
## BSIW6 1.000 0.408 0.784
## BSIW7 1.000 0.408 0.745
## BSIW8 1.000 0.408 0.735
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## g ~~
## ri -0.058 0.017 -3.483 0.000 -0.170 -0.170
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## g -0.249 0.030 -8.255 0.000 -0.300 -0.300
## ri 0.514 0.010 49.899 0.000 1.261 1.261
## .BSIW1 0.000 0.000 0.000
## .BSIW2 0.000 0.000 0.000
## .BSIW3 0.000 0.000 0.000
## .BSIW4 0.000 0.000 0.000
## .BSIW5 0.000 0.000 0.000
## .BSIW6 0.000 0.000 0.000
## .BSIW7 0.000 0.000 0.000
## .BSIW8 0.000 0.000 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## g 0.687 0.075 9.145 0.000 1.000 1.000
## ri (fvri) 0.166 0.006 27.026 0.000 1.000 1.000
## .BSIW1 (fe1) 0.170 0.008 22.515 0.000 0.170 0.463
## .BSIW2 (fe2) 0.123 0.006 20.291 0.000 0.123 0.389
## .BSIW3 (fe3) 0.132 0.005 27.293 0.000 0.132 0.443
## .BSIW4 (fe4) 0.133 0.005 27.875 0.000 0.133 0.449
## .BSIW5 (fe5) 0.125 0.005 26.967 0.000 0.125 0.433
## .BSIW6 (fe6) 0.103 0.004 24.482 0.000 0.103 0.380
## .BSIW7 (fe7) 0.124 0.005 23.305 0.000 0.124 0.414
## .BSIW8 (fe8) 0.142 0.005 27.046 0.000 0.142 0.462
#plot_lavaan(Fig14.5Result)
library(lavaan)
library(dplyr)
library(tidyr)
library(ggplot2)
## 0) Build predictions per group, using case indices to align rows
pred_list <- lavPredict(Fig14.5Result, type = "ov") # list of matrices
case_idx <- lavInspect(Fig14.5Result, "case.idx") # row indices per group
group_lab <- unique(SIMBSIMg$sex) # labels as in data
pred_df <- bind_rows(lapply(seq_along(pred_list), function(g){
out <- as.data.frame(pred_list[[g]])
out$sex <- SIMBSIMg$sex[case_idx[[g]]] # exact rows
out
}))
## 1) Recode sex -> 0/1 (edit mapping to match your coding)
pred_df <- pred_df %>%
mutate(
sex01 = case_when(
sex %in% c("Male","male","M", 1, "1") ~ 1, # your requested coding
sex %in% c("Female","female","F", 0, "0") ~ 0,
TRUE ~ NA_real_
)
)
## 2) Long format (create pred_long here, then mutate its wave order)
pred_long <- pred_df %>%
pivot_longer(
cols = dplyr::starts_with("BSIW"),
names_to = "wave",
values_to = "pred_score"
) %>%
mutate(
wave = factor(wave, levels = paste0("BSIW", 1:8)),
wave_num = as.integer(gsub("BSIW","", wave))
)
## 3) Percentile bins within sex×wave
pred_binned <- pred_long %>%
group_by(sex01, wave, wave_num) %>%
mutate(
pct_group = cut(
pred_score,
breaks = quantile(pred_score, probs = c(0, .10, .25, .50, .75, .90, 1), na.rm = TRUE),
include.lowest = TRUE,
right = TRUE,
labels = c("0–10","10–25","25–50","50–75","75–90","90–100")
)
) %>%
ungroup()
# ...
pct_summary <- pred_binned %>%
group_by(sex01, wave, wave_num, pct_group) %>%
summarise(
n = sum(!is.na(pred_score)),
mean = mean(pred_score, na.rm = TRUE),
sd = sd(pred_score, na.rm = TRUE),
.groups = "drop"
)
# Now plot
ggplot2::ggplot(pct_summary,
aes(x = wave_num, y = mean, color = pct_group, group = pct_group)) +
geom_line(linewidth = 1) +
geom_point(size = 1.5) +
geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd), width = 0.15) +
facet_wrap(~ sex01, labeller = label_both) +
scale_x_continuous(breaks = 1:8, labels = paste0("BSIW", 1:8)) +
labs(
x = "Wave",
y = "Predicted BSI Score",
color = "Percentile group",
title = "Figure 14.6 Predicted BSI by Wave and Sex (0/1) with SDs per Percentile Group",
subtitle = "Lines/points = group means; error bars = +/- 1 SD within percentile group"
) +
theme_minimal(base_size = 9)