Read in Data

setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
#Load lavaan (Install if you haven't already)

#Install lavaan if needed
if (!require("lavaan", character.only = TRUE)) {
  install.packages("lavaan")
  library(lavaan, character.only = TRUE)
}
## Loading required package: lavaan
## This is lavaan 0.6-19
## lavaan is FREE software! Please report any bugs.
library(lavaan)

#Install HistData if needed
if (!require("HistData", character.only = TRUE)) {
  install.packages("HistData")
  library(HistData, character.only = TRUE)
}
## Loading required package: HistData
library(HistData)

# Load the Galton data
data(GaltonFamilies)


# Confirm variable names
names(GaltonFamilies)
## [1] "family"          "father"          "mother"          "midparentHeight"
## [5] "children"        "childNum"        "gender"          "childHeight"

Figure 3.3 a, b, and c (Also Figure 3.4a): Regression model with named parameters

model <- '
 # Regression path with named parameter
  childHeight ~ b1*midparentHeight

  # Variance of predictor
  midparentHeight ~~ var_mph*midparentHeight

  # Residual (error) variance of outcome
  childHeight ~~ var_e*childHeight

  # Intercept of outcome
  childHeight ~ b0*1

  # Mean of predictor
  midparentHeight ~ m*1
'

# Fit the model
fit <- sem(model, data = GaltonFamilies)

# Show results
summary(fit, standardized = TRUE, fit.measures = TRUE)
## lavaan 0.6-19 ended normally after 8 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
## 
##   Number of observations                           934
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Model Test Baseline Model:
## 
##   Test statistic                               101.534
##   Degrees of freedom                                 1
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -4340.025
##   Loglikelihood unrestricted model (H1)      -4340.025
##                                                       
##   Akaike (AIC)                                8690.049
##   Bayesian (BIC)                              8714.247
##   Sample-size adjusted Bayesian (SABIC)       8698.367
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent confidence interval - lower         0.000
##   90 Percent confidence interval - upper         0.000
##   P-value H_0: RMSEA <= 0.050                       NA
##   P-value H_0: RMSEA >= 0.080                       NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   childHeight ~                                                         
##     mdprntHgh (b1)    0.637    0.062   10.357    0.000    0.637    0.321
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .childHght (b0)   22.636    4.261    5.313    0.000   22.636    6.328
##     mdprntHgh  (m)   69.207    0.059 1174.115    0.000   69.207   38.418
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     mdprntH (vr_m)    3.245    0.150   21.610    0.000    3.245    1.000
##    .chldHgh (var_)   11.479    0.531   21.610    0.000   11.479    0.897

Make a Path Diagram

#Make a Path Diagram
#lavaan Shiny GUI is a graphical user interface (GUI) built with Shiny,
#It allows users to specify and run SEM models without writing full R code.

#How to Launch lavaan GUI
#1. Install required packages
#Install shiny if you haven't already:
#Install lavaangui if needed

if (!require("shiny", character.only = TRUE)) {
  install.packages("shiny")
  library(shiny, character.only = TRUE)
}
## Loading required package: shiny
if (!require("lavaangui", character.only = TRUE)) {
  install.packages("lavaangui")
  library(lavaangui, character.only = TRUE)
}
## Loading required package: lavaangui
## This is lavaangui 0.2.4
## lavaangui is BETA software! Please report any bugs at https://github.com/karchjd/lavaangui/issues
require(lavaangui)
#After executing the following line, you will see a window open. You can then
#move path elements around, save as an SVG or other format and export to the
#graphics package of your choice for further processing
#You'll have to run this interactively. The plot looks as follows:
#plot_lavaan(fit)
Figure 3.3a One Predictor Standardized Model
Figure 3.3a One Predictor Standardized Model
Figure3.3b Unstandardized One Predictor Regression Model
Figure3.3b Unstandardized One Predictor Regression Model
Figure3.3c Unstandardized One Predictor Regression Model with Intercept
Figure3.3c Unstandardized One Predictor Regression Model with Intercept

Figure 3.4b .Model with Error Standard Deviation

modelErrSD <- '
# Regression path from predictor to outcome
  childHeight ~ b1*midparentHeight

  # Latent error factor with fixed variance
  e =~ stddev_e*childHeight + NA*childHeight
  e ~~ 1*e

  # Residual variance removed from observed childHeight
  childHeight ~~ 0*childHeight

  # Variance of predictor
  midparentHeight ~~ var_mph*midparentHeight

  # Intercept of outcome
  childHeight ~ b0*1

  # Mean of predictor
  midparentHeight ~ m*1
'

# Fit the model
fitErrSD <- sem(modelErrSD, data = GaltonFamilies)

# Show results
summary(fitErrSD, standardized = TRUE, fit.measures = TRUE)
## lavaan 0.6-19 ended normally after 28 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
## 
##   Number of observations                           934
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Model Test Baseline Model:
## 
##   Test statistic                               101.534
##   Degrees of freedom                                 1
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -4340.025
##   Loglikelihood unrestricted model (H1)      -4340.025
##                                                       
##   Akaike (AIC)                                8690.049
##   Bayesian (BIC)                              8714.247
##   Sample-size adjusted Bayesian (SABIC)       8698.367
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent confidence interval - lower         0.000
##   90 Percent confidence interval - upper         0.000
##   P-value H_0: RMSEA <= 0.050                       NA
##   P-value H_0: RMSEA >= 0.080                       NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   e =~                                                                  
##     chldHgh (std_)    3.388    0.078   43.220    0.000    3.388    0.947
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   childHeight ~                                                         
##     mdprntHgh (b1)    0.637    0.062   10.357    0.000    0.637    0.321
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .childHght (b0)   22.636    4.261    5.313    0.000   22.636    6.328
##     mdprntHgh  (m)   69.207    0.059 1174.115    0.000   69.207   38.418
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     e                 1.000                               1.000    1.000
##    .chldHgh           0.000                               0.000    0.000
##     mdprntH (vr_m)    3.245    0.150   21.610    0.000    3.245    1.000
#plot_lavaan(fitErrSD)
Figure 3.4b One Predictor Model with Intercept and Error Standard Deviation
Figure 3.4b One Predictor Model with Intercept and Error Standard Deviation

Path Diagram for Correlation Coefficient

#Path Diagram for Correlation Coefficient (Z Score Factors)
modelCorr <- '
  # Latent factors with unit variance
  ChildZ =~ NA*childHeight+SDChild*childHeight
  ChildZ ~~ 1*ChildZ
  ParentZ =~ NA*midparentHeight+SDParent*midparentHeight
  ParentZ ~~ 1*ParentZ

  # Residual variance removed from observed childHeight
  childHeight ~~ 0*childHeight
  midparentHeight ~~ 0*midparentHeight

  # Correlation of Z Score Child and Z Score Parent
  ChildZ ~~ R*ParentZ

  # Mean of Child and Parent
  childHeight ~ b0*1
  midparentHeight ~ m*1
'

# Fit the model
fitCorr <- sem(modelCorr, data = GaltonFamilies)

# Show results
summary(fitCorr, standardized = TRUE, fit.measures = TRUE)
## lavaan 0.6-19 ended normally after 15 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         5
## 
##   Number of observations                           934
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Model Test Baseline Model:
## 
##   Test statistic                               101.534
##   Degrees of freedom                                 1
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    1.000
##   Tucker-Lewis Index (TLI)                       1.000
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -4340.025
##   Loglikelihood unrestricted model (H1)      -4340.025
##                                                       
##   Akaike (AIC)                                8690.049
##   Bayesian (BIC)                              8714.247
##   Sample-size adjusted Bayesian (SABIC)       8698.367
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.000
##   90 Percent confidence interval - lower         0.000
##   90 Percent confidence interval - upper         0.000
##   P-value H_0: RMSEA <= 0.050                       NA
##   P-value H_0: RMSEA >= 0.080                       NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.000
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   ChildZ =~                                                             
##     chldHgh (SDCh)    3.577    0.083   43.220    0.000    3.577    1.000
##   ParentZ =~                                                            
##     mdprntH (SDPr)    1.801    0.042   43.220    0.000    1.801    1.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   ChildZ ~~                                                             
##     ParentZ    (R)    0.321    0.029   10.935    0.000    0.321    0.321
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .childHght (b0)   66.746    0.117  570.215    0.000   66.746   18.658
##    .mdprntHgh  (m)   69.207    0.059 1174.115    0.000   69.207   38.418
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     ChildZ            1.000                               1.000    1.000
##     ParentZ           1.000                               1.000    1.000
##    .childHeight       0.000                               0.000    0.000
##    .midparentHeght    0.000                               0.000    0.000
#plot_lavaan(fitCorr)
Figure3.4c Path Diagram of a Correlation Coefficient based on Adjoined Matrix
Figure3.4c Path Diagram of a Correlation Coefficient based on Adjoined Matrix