Read in Data

setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
#Load lavaan (Install if you haven't already)

#Install lavaan if needed
if (!require("lavaan", character.only = TRUE)) {
  install.packages("lavaan")
  library(lavaan, character.only = TRUE)
}

## Loading required package: lavaan

## This is lavaan 0.6-19
## lavaan is FREE software! Please report any bugs.

library(lavaan)

#Install HistData if needed
if (!require("HistData", character.only = TRUE)) {
  install.packages("HistData")
  library(HistData, character.only = TRUE)
}

## Loading required package: HistData

library(HistData)

# Load the Galton data
data(GaltonFamilies)


# Confirm variable names
names(GaltonFamilies)

## [1] "family"          "father"          "mother"          "midparentHeight"
## [5] "children"        "childNum"        "gender"          "childHeight"

Figure 3.3 a, b, and c (Also Figure 3.4a): Regression model with named parameters

model <- '
 # Regression path with named parameter
  childHeight ~ b1*midparentHeight

  # Variance of predictor
  midparentHeight ~~ var_mph*midparentHeight

  # Residual (error) variance of outcome
  childHeight ~~ var_e*childHeight

  # Intercept of outcome
  childHeight ~ b0*1

  # Mean of predictor
  midparentHeight ~ m*1
'

# Fit the model
fit <- sem(model, data = GaltonFamilies)

# Show results
summary(fit, standardized = TRUE, fit.measures = TRUE)

## lavaan 0.6-19 ended normally after 8 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
## 
##   Number of observations                           934
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Model Test Baseline Model:
## 
##   Test statistic                               101.534
##   Degrees of freedom                                 1
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -4340.025
##   Loglikelihood unrestricted model (H1)      -4340.025
##                                                       
##   Akaike (AIC)                                8690.049
##   Bayesian (BIC)                              8714.247
##   Sample-size adjusted Bayesian (SABIC)       8698.367
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent confidence interval - lower         0.000
##   90 Percent confidence interval - upper         0.000
##   P-value H_0: RMSEA <= 0.050                       NA
##   P-value H_0: RMSEA >= 0.080                       NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   childHeight ~                                                         
##     mdprntHgh (b1)    0.637    0.062   10.357    0.000    0.637    0.321
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .childHght (b0)   22.636    4.261    5.313    0.000   22.636    6.328
##     mdprntHgh  (m)   69.207    0.059 1174.115    0.000   69.207   38.418
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     mdprntH (vr_m)    3.245    0.150   21.610    0.000    3.245    1.000
##    .chldHgh (var_)   11.479    0.531   21.610    0.000   11.479    0.897

Make a Path Diagram

#Make a Path Diagram
#lavaan Shiny GUI is a graphical user interface (GUI) built with Shiny,
#It allows users to specify and run SEM models without writing full R code.

#How to Launch lavaan GUI
#1. Install required packages
#Install shiny if you haven't already:
#Install lavaangui if needed

if (!require("shiny", character.only = TRUE)) {
  install.packages("shiny")
  library(shiny, character.only = TRUE)
}

## Loading required package: shiny

if (!require("lavaangui", character.only = TRUE)) {
  install.packages("lavaangui")
  library(lavaangui, character.only = TRUE)
}

## Loading required package: lavaangui

## This is lavaangui 0.2.4
## lavaangui is BETA software! Please report any bugs at https://github.com/karchjd/lavaangui/issues

require(lavaangui)
#After executing the following line, you will see a window open. You can then
#move path elements around, save as an SVG or other format and export to the
#graphics package of your choice for further processing
#You'll have to run this interactively. The plot looks as follows:
#plot_lavaan(fit)

Figure 3.3a One Predictor Standardized Model

Figure3.3b Unstandardized One Predictor Regression Model

Figure3.3c Unstandardized One Predictor Regression Model with Intercept

Figure 3.4b .Model with Error Standard Deviation

modelErrSD <- '
# Regression path from predictor to outcome
  childHeight ~ b1*midparentHeight

  # Latent error factor with fixed variance
  e =~ stddev_e*childHeight + NA*childHeight
  e ~~ 1*e

  # Residual variance removed from observed childHeight
  childHeight ~~ 0*childHeight

  # Variance of predictor
  midparentHeight ~~ var_mph*midparentHeight

  # Intercept of outcome
  childHeight ~ b0*1

  # Mean of predictor
  midparentHeight ~ m*1
'

# Fit the model
fitErrSD <- sem(modelErrSD, data = GaltonFamilies)

# Show results
summary(fitErrSD, standardized = TRUE, fit.measures = TRUE)

## lavaan 0.6-19 ended normally after 28 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
## 
##   Number of observations                           934
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Model Test Baseline Model:
## 
##   Test statistic                               101.534
##   Degrees of freedom                                 1
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -4340.025
##   Loglikelihood unrestricted model (H1)      -4340.025
##                                                       
##   Akaike (AIC)                                8690.049
##   Bayesian (BIC)                              8714.247
##   Sample-size adjusted Bayesian (SABIC)       8698.367
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent confidence interval - lower         0.000
##   90 Percent confidence interval - upper         0.000
##   P-value H_0: RMSEA <= 0.050                       NA
##   P-value H_0: RMSEA >= 0.080                       NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   e =~                                                                  
##     chldHgh (std_)    3.388    0.078   43.220    0.000    3.388    0.947
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   childHeight ~                                                         
##     mdprntHgh (b1)    0.637    0.062   10.357    0.000    0.637    0.321
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .childHght (b0)   22.636    4.261    5.313    0.000   22.636    6.328
##     mdprntHgh  (m)   69.207    0.059 1174.115    0.000   69.207   38.418
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     e                 1.000                               1.000    1.000
##    .chldHgh           0.000                               0.000    0.000
##     mdprntH (vr_m)    3.245    0.150   21.610    0.000    3.245    1.000

#plot_lavaan(fitErrSD)

Figure 3.4b One Predictor Model with Intercept and Error Standard Deviation

Path Diagram for Correlation Coefficient

#Path Diagram for Correlation Coefficient (Z Score Factors)
modelCorr <- '
  # Latent factors with unit variance
  ChildZ =~ NA*childHeight+SDChild*childHeight
  ChildZ ~~ 1*ChildZ
  ParentZ =~ NA*midparentHeight+SDParent*midparentHeight
  ParentZ ~~ 1*ParentZ

  # Residual variance removed from observed childHeight
  childHeight ~~ 0*childHeight
  midparentHeight ~~ 0*midparentHeight

  # Correlation of Z Score Child and Z Score Parent
  ChildZ ~~ R*ParentZ

  # Mean of Child and Parent
  childHeight ~ b0*1
  midparentHeight ~ m*1
'

# Fit the model
fitCorr <- sem(modelCorr, data = GaltonFamilies)

# Show results
summary(fitCorr, standardized = TRUE, fit.measures = TRUE)

## lavaan 0.6-19 ended normally after 15 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
## 
##   Number of observations                           934
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Model Test Baseline Model:
## 
##   Test statistic                               101.534
##   Degrees of freedom                                 1
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -4340.025
##   Loglikelihood unrestricted model (H1)      -4340.025
##                                                       
##   Akaike (AIC)                                8690.049
##   Bayesian (BIC)                              8714.247
##   Sample-size adjusted Bayesian (SABIC)       8698.367
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent confidence interval - lower         0.000
##   90 Percent confidence interval - upper         0.000
##   P-value H_0: RMSEA <= 0.050                       NA
##   P-value H_0: RMSEA >= 0.080                       NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   ChildZ =~                                                             
##     chldHgh (SDCh)    3.577    0.083   43.220    0.000    3.577    1.000
##   ParentZ =~                                                            
##     mdprntH (SDPr)    1.801    0.042   43.220    0.000    1.801    1.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   ChildZ ~~                                                             
##     ParentZ    (R)    0.321    0.029   10.935    0.000    0.321    0.321
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .childHght (b0)   66.746    0.117  570.215    0.000   66.746   18.658
##    .mdprntHgh  (m)   69.207    0.059 1174.115    0.000   69.207   38.418
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     ChildZ            1.000                               1.000    1.000
##     ParentZ           1.000                               1.000    1.000
##    .childHeight       0.000                               0.000    0.000
##    .midparentHeght    0.000                               0.000    0.000

#plot_lavaan(fitCorr)

Figure3.4c Path Diagram of a Correlation Coefficient based on Adjoined Matrix

Ch. 3 Path Notation

Phil Wood

2025-07-18

Read in Data

Figure 3.3 a, b, and c (Also Figure 3.4a): Regression model with named parameters

Make a Path Diagram

Figure 3.4b .Model with Error Standard Deviation

Path Diagram for Correlation Coefficient