This document shows you how to set up and run the example from my
path analysis lecture using lavaan in R. Some additional
packages (semPlot and semptools) are used for
creating the path diagrams. This document was created in R Markdown.
Note that there are some differences in the parameter estimates for
Model 1C compared to those from the piece-wise OLS regression method
used in the slides.
Required Packages
knitr::opts_chunk$set(echo = TRUE)
if (!require("pacman")) install.packages("pacman")
library(pacman)
# Load installed packages, install and load the package if not installed
pacman::p_load(here, rio, conflicted, tidyverse, gtsummary, broom,
janitor, summarytools, skimr, DataExplorer, inspectdf, Hmisc,
lavaan, semTools, semoutput, semPlot, sjPlot, psych, readr, dplyr,
GPArotation, psychTools, semptools, tinytex, stargazer)
# reading in corr matrix
corr <- '
1.000
.256 1.000
.536 .313 1.000
.505 .637 .653 1.000 '
# add variable names and convert to full correlation matrix
corrfull <- lavaan::getCov(corr, names=c("SES","MA","Mot","AA"))
# add SDs and convert to full covariance matrix
covfull <- lavaan::cor2cov(corrfull, sds=c(1,1,1,1))
# observed covariance matrix
#covfull
Using psych::lmCor to run the two OLS regressions with
correlation matrix as input. Compare the parameter estimates to those we
get from lavaan.
psych::lmCor(Mot ~ SES + MA, data=corrfull, n.obs=30, plot=T, main="First Regression Model")
## Call: psych::lmCor(y = Mot ~ SES + MA, data = corrfull, n.obs = 30,
## main = "First Regression Model", plot = T)
##
## Multiple Regression from matrix input
##
## DV = Mot
## slope se t p lower.ci upper.ci VIF Vy.x
## SES 0.49 0.16 2.97 0.0061 0.15 0.82 1.07 0.26
## MA 0.19 0.16 1.15 0.2600 -0.15 0.52 1.07 0.06
##
## Residual Standard Error = 0.85 with 27 degrees of freedom
##
## Multiple Regression
## R R2 Ruw R2uw Shrunken R2 SE of R2 overall F df1 df2 p
## Mot 0.57 0.32 0.54 0.29 0.27 0.12 6.36 2 27 0.00544
psych::lmCor(AA ~ SES + MA + Mot, data=corrfull, n.obs=30, plot=T, main="Second Regression Model")
## Call: psych::lmCor(y = AA ~ SES + MA + Mot, data = corrfull, n.obs = 30,
## main = "Second Regression Model", plot = T)
##
## Multiple Regression from matrix input
##
## DV = AA
## slope se t p lower.ci upper.ci VIF Vy.x
## SES 0.16 0.14 1.16 0.26000 -0.12 0.44 1.42 0.08
## MA 0.46 0.12 3.79 0.00082 0.21 0.72 1.12 0.30
## Mot 0.42 0.14 3.01 0.00580 0.13 0.71 1.47 0.28
##
## Residual Standard Error = 0.62 with 26 degrees of freedom
##
## Multiple Regression
## R R2 Ruw R2uw Shrunken R2 SE of R2 overall F df1 df2 p
## AA 0.81 0.65 0.79 0.62 0.61 0.08 16.23 3 26 3.79e-06
# specify the path model
mod1b <- '
Mot ~ a1*SES + a2*MA
AA ~ c1*SES + c2*MA + b*Mot
#indirect & total effects
SES_Mot_AA := a1*b
MA_Mot_AA := a2*b
SES_AA_tot := c1 + a1*b
MA_AA_tot := c2 + a2*b
'
# fit the path model
mod1b.fit <- lavaan::sem(mod1b, sample.cov=covfull, sample.nobs=30)
#--making path diagram w my locations specified in 'm'
m <- matrix(c("SES", NA, NA, NA,
NA, "Mot", NA, "AA",
"MA", NA, NA, NA), byrow=T, 3, 4)
fig1 <- semPlot::semPaths(mod1b.fit, layout=m, whatLabels = "std",
sizeMan=10, edge.label.cex=1.2,
nCharNodes=0, nCharEdges = 0,
rotation = 2, style = "lisrel",
DoNotPlot=T, residScale=10,
edge.color = "black")
#--working with semptools to modify path diagram
my_curve_list <- c("SES~~MA" = -3)
my_position_list <- c("AA~SES" =.60, "AA~MA" = .50, "Mot~SES"=.40)
my_rotate_resid_list <- c(Mot=.90, AA=.90)
#using piping to apply my lists
fig1_1 <- fig1 |> set_curve(my_curve_list) |>
set_edge_label_position(my_position_list)|>
rotate_resid(my_rotate_resid_list) |>
mark_sig(mod1b.fit, alpha = c("(n.s.)" = 1.00, "*" = .05)) #adding *, ns
plot(fig1_1)
Figure 1. Path diagram (Model 1B.
Model 1B is just-identified, so df = 0 and fit is perfect.
summary(mod1b.fit, fit.measures=T, standardized=T, rsquare=T)
## lavaan 0.6.16 ended normally after 1 iteration
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 7
##
## Number of observations 30
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
##
## Model Test Baseline Model:
##
## Test statistic 43.246
## Degrees of freedom 5
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 1.000
## Tucker-Lewis Index (TLI) 1.000
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -62.496
## Loglikelihood unrestricted model (H1) -62.496
##
## Akaike (AIC) 138.993
## Bayesian (BIC) 148.801
## Sample-size adjusted Bayesian (SABIC) 127.007
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.000
## 90 Percent confidence interval - lower 0.000
## 90 Percent confidence interval - upper 0.000
## P-value H_0: RMSEA <= 0.050 NA
## P-value H_0: RMSEA >= 0.080 NA
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.000
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## Mot ~
## SES (a1) 0.488 0.156 3.133 0.002 0.488 0.488
## MA (a2) 0.188 0.156 1.208 0.227 0.188 0.188
## AA ~
## SES (c1) 0.160 0.128 1.247 0.213 0.160 0.160
## MA (c2) 0.464 0.114 4.066 0.000 0.464 0.464
## Mot (b) 0.422 0.131 3.230 0.001 0.422 0.422
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .Mot 0.657 0.170 3.873 0.000 0.657 0.680
## .AA 0.336 0.087 3.873 0.000 0.336 0.348
##
## R-Square:
## Estimate
## Mot 0.320
## AA 0.652
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## SES_Mot_AA 0.206 0.092 2.249 0.025 0.206 0.206
## MA_Mot_AA 0.079 0.070 1.132 0.258 0.079 0.079
## SES_AA_tot 0.366 0.129 2.829 0.005 0.366 0.366
## MA_AA_tot 0.543 0.129 4.200 0.000 0.543 0.543
# specify the path model
mod1c <- '
Mot ~ a1*SES
AA ~ MA + b*Mot
#indirect effects
SES_Mot_AA := a1*b
'
# fit the path model
mod1c.fit <- lavaan::sem(mod1c, sample.cov=covfull, sample.nobs=30)
Note that lavaan calculates the coefficients for MA and
Mot as predictors of AA a bit differently than the piece-wise OLS
regression method used in the lecture slides. Compare the values for
‘Estimates’ with those for ‘Std.all’ in the Full Output below. Also the
\(R^2\) value is a bit different.
#--making path diagram w my locations specified in 'm'
m <- matrix(c("SES", NA, NA, NA,
NA, "Mot", NA, "AA",
"MA", NA, NA, NA), byrow=T, 3, 4)
fig2 <- semPlot::semPaths(mod1c.fit, layout=m, whatLabels = "std",
sizeMan=10, edge.label.cex=1.2,
nCharNodes=0, nCharEdges = 0,
rotation = 2, style = "lisrel",
DoNotPlot=T, residScale=10,
edge.color = "black")
#--working with semptools to modify path diagram
my_curve_list <- c("SES~~MA" = -3)
my_position_list <- c("AA~MA" = .50, "Mot~SES"=.40)
my_rotate_resid_list <- c(Mot=.90, AA=.90)
#using piping to apply my lists
fig2_1 <- fig2 |> set_curve(my_curve_list) |>
set_edge_label_position(my_position_list)|>
rotate_resid(my_rotate_resid_list) |>
mark_sig(mod1c.fit, alpha = c("(n.s.)" = 1.00, "*" = .05)) #adding *, ns
plot(fig2_1)
Figure 2. Path diagram (Model 1C.
summary(mod1c.fit, fit.measures=T, standardized=T, rsquare=T)
## lavaan 0.6.16 ended normally after 1 iteration
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 5
##
## Number of observations 30
##
## Model Test User Model:
##
## Test statistic 2.940
## Degrees of freedom 2
## P-value (Chi-square) 0.230
##
## Model Test Baseline Model:
##
## Test statistic 43.246
## Degrees of freedom 5
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.975
## Tucker-Lewis Index (TLI) 0.939
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -63.967
## Loglikelihood unrestricted model (H1) -62.496
##
## Akaike (AIC) 137.933
## Bayesian (BIC) 144.939
## Sample-size adjusted Bayesian (SABIC) 129.372
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.125
## 90 Percent confidence interval - lower 0.000
## 90 Percent confidence interval - upper 0.405
## P-value H_0: RMSEA <= 0.050 0.255
## P-value H_0: RMSEA >= 0.080 0.707
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.081
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## Mot ~
## SES (a1) 0.536 0.154 3.478 0.001 0.536 0.536
## AA ~
## MA 0.480 0.112 4.300 0.000 0.480 0.501
## Mot (b) 0.503 0.112 4.509 0.000 0.503 0.526
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .Mot 0.689 0.178 3.873 0.000 0.689 0.713
## .AA 0.354 0.091 3.873 0.000 0.354 0.400
##
## R-Square:
## Estimate
## Mot 0.287
## AA 0.600
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## SES_Mot_AA 0.270 0.098 2.754 0.006 0.270 0.282
All z values < |2.54|.
lavResiduals(mod1c.fit)
## $type
## [1] "cor.bentler"
##
## $cov
## Mot AA SES MA
## Mot 0.000
## AA 0.084 0.085
## SES 0.000 0.113 0.000
## MA 0.176 0.088 0.000 0.000
##
## $cov.z
## Mot AA SES MA
## Mot 0.000
## AA 1.208 1.208
## SES 0.000 1.213 0.000
## MA 1.208 1.208 0.000 0.000
##
## $summary
## cov
## srmr 0.081
## srmr.se 0.039
## srmr.exactfit.z 0.650
## srmr.exactfit.pvalue 0.258
## usrmr 0.067
## usrmr.se 0.096
## usrmr.ci.lower -0.091
## usrmr.ci.upper 0.226
## usrmr.closefit.h0.value 0.050
## usrmr.closefit.z 0.180
## usrmr.closefit.pvalue 0.429
# comparing nested models via chisq likelihood ratio test
lavTestLRT(mod1b.fit, mod1c.fit, type = "chisq")
##
## Chi-Squared Difference Test
##
## Df AIC BIC Chisq Chisq diff RMSEA Df diff Pr(>Chisq)
## mod1b.fit 0 138.99 148.80 0.0000
## mod1c.fit 2 137.93 144.94 2.9404 2.9404 0.1252 2 0.2299
Note that the model comparison test is nonsignificant; the more
parsimonious Model 1C fits the data as well as the just-identified Model
1B. The \(\chi^2\) value is a bit
different than the hand calculation due to the differences in \(R^2\) values between OLS and
lavaan.