rm(list=ls()) #Remove all existing objects
setwd("D:/r/sem") #Set the working directory
library(tidyverse) #Data Science package
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lavaan) #SEM
## This is lavaan 0.6-16
## lavaan is FREE software! Please report any bugs.
# Load Data
data <- read.csv("CFAdat.csv")
#Take a quick look at the data
head(data) # Overview
## ID x1 x2 x3 x4 x5 x6
## 1 1 44.58603 39.18660 47.05583 53.84292 59.35734 67.03329
## 2 2 41.71302 55.82479 57.34313 41.96656 50.62482 56.27188
## 3 3 40.47189 53.06506 49.23380 58.85288 62.63306 46.98520
## 4 4 45.70096 53.21960 69.43753 68.94695 64.08602 63.07206
## 5 5 56.75433 80.66200 65.81849 61.65908 53.01208 49.33261
## 6 6 57.79563 44.03558 48.18220 54.30853 62.18502 66.18146
str(data) # Data Structure
## 'data.frame': 385 obs. of 7 variables:
## $ ID: int 1 2 3 4 5 6 7 8 9 10 ...
## $ x1: num 44.6 41.7 40.5 45.7 56.8 ...
## $ x2: num 39.2 55.8 53.1 53.2 80.7 ...
## $ x3: num 47.1 57.3 49.2 69.4 65.8 ...
## $ x4: num 53.8 42 58.9 68.9 61.7 ...
## $ x5: num 59.4 50.6 62.6 64.1 53 ...
## $ x6: num 67 56.3 47 63.1 49.3 ...
summary(data) # Quick Summary
## ID x1 x2 x3
## Min. : 1 Min. : 17.04 Min. :12.36 Min. :17.87
## 1st Qu.: 97 1st Qu.: 43.96 1st Qu.:43.74 1st Qu.:44.23
## Median :193 Median : 48.45 Median :48.44 Median :49.29
## Mean :193 Mean : 49.76 Mean :49.71 Mean :49.73
## 3rd Qu.:289 3rd Qu.: 53.91 3rd Qu.:54.65 3rd Qu.:54.81
## Max. :385 Max. :117.68 Max. :86.54 Max. :85.63
## x4 x5 x6
## Min. : 13.84 Min. : 20.25 Min. :26.47
## 1st Qu.: 43.20 1st Qu.: 43.95 1st Qu.:44.37
## Median : 49.08 Median : 49.62 Median :49.50
## Mean : 49.99 Mean : 50.11 Mean :50.02
## 3rd Qu.: 55.08 3rd Qu.: 55.03 3rd Qu.:54.89
## Max. :106.72 Max. :108.79 Max. :90.12
dim(data) # Data Dimensions
## [1] 385 7
names(data) # Column names
## [1] "ID" "x1" "x2" "x3" "x4" "x5" "x6"
Use the default setting for scaling latent factors in Mplus to fit a standard CFA model in which x1-x3 load on factor 1 and x4-x6 load on factor 2. Copy and paste the syntax you used in a Word document and insert a path diagram with standardized estimates. (1 point)
# Create the CFA model
m1a <- '
# Define the latent factors and their indicators
f1 =~ x1 + x2 + x3
f2 =~ x4 + x5 + x6
# Specify the factor variances and covariances
f1 ~~ f1
f2 ~~ f2
'
# Fit the model
m1af <- cfa(m1a, data = data, fixed.x = FALSE) # fit the model
# Summarize the results
summary(m1af, standardized = TRUE)
## lavaan 0.6.16 ended normally after 87 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 13
##
## Number of observations 385
##
## Model Test User Model:
##
## Test statistic 47.721
## Degrees of freedom 8
## P-value (Chi-square) 0.000
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## f1 =~
## x1 1.000 6.071 0.602
## x2 1.126 0.102 11.095 0.000 6.837 0.697
## x3 1.534 0.123 12.510 0.000 9.316 0.943
## f2 =~
## x4 1.000 8.060 0.749
## x5 0.954 0.068 13.930 0.000 7.690 0.793
## x6 0.584 0.065 8.989 0.000 4.703 0.498
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## f1 ~~
## f2 41.666 5.095 8.177 0.000 0.851 0.851
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## f1 36.862 5.937 6.208 0.000 1.000 1.000
## f2 64.969 8.213 7.910 0.000 1.000 1.000
## .x1 64.968 5.024 12.932 0.000 64.968 0.638
## .x2 49.615 4.108 12.078 0.000 49.615 0.515
## .x3 10.893 3.507 3.106 0.002 10.893 0.112
## .x4 50.687 4.966 10.207 0.000 50.687 0.438
## .x5 35.013 3.913 8.948 0.000 35.013 0.372
## .x6 67.017 5.163 12.980 0.000 67.017 0.752
# Visualize the output
library(lavaanPlot) # package for lavaan visualization
# All paths standarized
lavaanPlot(model = m1af, node_options = list(shape = "box", fontname = "Helvetica"), edge_options = list(color = "grey"), coefs = TRUE, stand = TRUE)
Refit the same standard CFA model in Q1 but scale the latent factors by fixing the loadings of x2 and x5 for factor 1 and factor 2, respectively, instead. Copy and paste the syntax in the Word document and insert a path diagram with unstandardized estimates. (1 point)
# Create the CFA model, change the default marker method to what's specified in the question
m2a <- '
# Define the latent factors and their indicators
f1 =~ NA*x1 + 1*x2 + x3
f2 =~ NA*x4 + 1*x5 + x6
# Specify the factor variances and covariances
f1 ~~ f1
f2 ~~ f2
'
# Fit the model
m2af <- cfa(m2a, data = data, fixed.x = FALSE) # fit the model
# Summarize the results
summary(m2af, standardized = TRUE)
## lavaan 0.6.16 ended normally after 83 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 13
##
## Number of observations 385
##
## Model Test User Model:
##
## Test statistic 47.721
## Degrees of freedom 8
## P-value (Chi-square) 0.000
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## f1 =~
## x1 0.888 0.080 11.095 0.000 6.071 0.602
## x2 1.000 6.837 0.697
## x3 1.362 0.091 14.961 0.000 9.316 0.943
## f2 =~
## x4 1.048 0.075 13.930 0.000 8.060 0.749
## x5 1.000 7.690 0.793
## x6 0.612 0.067 9.164 0.000 4.703 0.498
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## f1 ~~
## f2 44.769 4.972 9.004 0.000 0.851 0.851
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## f1 46.751 6.288 7.435 0.000 1.000 1.000
## f2 59.141 6.973 8.481 0.000 1.000 1.000
## .x1 64.968 5.024 12.932 0.000 64.968 0.638
## .x2 49.615 4.108 12.078 0.000 49.615 0.515
## .x3 10.893 3.507 3.106 0.002 10.893 0.112
## .x4 50.687 4.966 10.207 0.000 50.687 0.438
## .x5 35.013 3.913 8.948 0.000 35.013 0.372
## .x6 67.017 5.163 12.980 0.000 67.017 0.752
# Visualize the output
# All paths standarized
lavaanPlot(model = m2af, node_options = list(shape = "box", fontname = "Helvetica"), edge_options = list(color = "grey"), coefs = TRUE, stand = FALSE)
Refit the same standard CFA model in Q1 but scale the latent factors by fixing their variances to 1 instead.
# Create the modified CFA model with scaled latent factors (assuming no factor covariance)
m3a <- '
# Define the latent factors and their indicators
f1 =~ NA*x1 + x2 + x3
f2 =~ NA*x4 + x5 + x6
# Fix the variances of the latent factors to 1
f1 ~~ 1* f1
f2 ~~ 1* f2
'
# Fit the modified model
m3af <- cfa(m3a, data = data) # Fit the model
# Summarize the results
summary(m3af, standardized = TRUE) # Standardized estimates
## lavaan 0.6.16 ended normally after 24 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 13
##
## Number of observations 385
##
## Model Test User Model:
##
## Test statistic 47.721
## Degrees of freedom 8
## P-value (Chi-square) 0.000
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## f1 =~
## x1 6.071 0.489 12.417 0.000 6.071 0.602
## x2 6.837 0.460 14.870 0.000 6.837 0.697
## x3 9.316 0.418 22.291 0.000 9.316 0.943
## f2 =~
## x4 8.060 0.509 15.821 0.000 8.060 0.749
## x5 7.690 0.453 16.962 0.000 7.690 0.793
## x6 4.703 0.490 9.593 0.000 4.703 0.498
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## f1 ~~
## f2 0.851 0.030 28.640 0.000 0.851 0.851
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## f1 1.000 1.000 1.000
## f2 1.000 1.000 1.000
## .x1 64.968 5.024 12.932 0.000 64.968 0.638
## .x2 49.615 4.108 12.078 0.000 49.615 0.515
## .x3 10.893 3.507 3.106 0.002 10.893 0.112
## .x4 50.687 4.966 10.206 0.000 50.687 0.438
## .x5 35.013 3.913 8.948 0.000 35.013 0.372
## .x6 67.017 5.163 12.980 0.000 67.017 0.752
# Visualize the output
# All paths standarized
lavaanPlot(model = m3af, node_options = list(shape = "box", fontname = "Helvetica"), edge_options = list(color = "grey"), coefs = TRUE, stand = FALSE)
Unstandardized coefficients are different between Q1 and Q2, Q1 and Q3, and Q2 and Q3
Standardized coefficients (std.all) are the same across Q1, Q2, and Q3.
The Chi-squares, degree of freedom, and p value are the same across Q1, Q2, and Q3. This is because we’re just using two different methods to scale the latent factors (Technically, Q1 and Q2 employ the same scaling strategy)
# Specify the model, having x3 crossload on both factors
m4a <- '
# Define the latent factors and their indicators
f1 =~ x1 + x2 + x3
f2 =~ x4 + x5 + x6 + x3
'
# Fit the modified model
m4af <- cfa(m4a, data = data) # Fit the model
# Summarize the results
summary(m4af, standardized = TRUE) # Standardized estimates
## lavaan 0.6.16 ended normally after 95 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 14
##
## Number of observations 385
##
## Model Test User Model:
##
## Test statistic 17.109
## Degrees of freedom 7
## P-value (Chi-square) 0.017
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## f1 =~
## x1 1.000 6.953 0.689
## x2 1.142 0.093 12.339 0.000 7.942 0.809
## x3 0.666 0.094 7.059 0.000 4.628 0.468
## f2 =~
## x4 1.000 7.936 0.738
## x5 0.982 0.070 13.956 0.000 7.794 0.803
## x6 0.593 0.066 8.965 0.000 4.704 0.498
## x3 0.630 0.081 7.811 0.000 4.996 0.506
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## f1 ~~
## f2 36.634 4.908 7.464 0.000 0.664 0.664
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .x1 53.490 4.882 10.956 0.000 53.490 0.525
## .x2 33.298 4.586 7.260 0.000 33.298 0.346
## .x3 20.598 2.723 7.565 0.000 20.598 0.211
## .x4 52.681 4.992 10.554 0.000 52.681 0.455
## .x5 33.404 3.858 8.659 0.000 33.404 0.355
## .x6 67.008 5.153 13.003 0.000 67.008 0.752
## f1 48.340 6.927 6.979 0.000 1.000 1.000
## f2 62.976 8.098 7.777 0.000 1.000 1.000
According to the output: Model chi-squares: 17.109 DF: 7 p-value: .017
First, let’s load the dataset:
# Load Data
dat <- read.csv("pathdat.csv")
#Take a quick look at the data
head(dat) # Overview
## ID MOT ENG Math Read Sci SEL
## 1 1 50.91526 40.72218 41.34665 40.18896 50.20612 46.75275
## 2 2 43.13290 36.69724 44.91700 68.14945 28.63248 62.98196
## 3 3 57.71078 50.92025 51.67867 46.86790 51.89710 54.03885
## 4 4 52.15350 63.86352 46.09058 49.50678 43.20759 49.15102
## 5 5 59.45522 35.22761 51.29142 43.29821 42.42249 29.31352
## 6 6 50.06800 56.10331 41.11826 59.17902 48.12415 47.98657
str(dat) # Data Structure
## 'data.frame': 125 obs. of 7 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ MOT : num 50.9 43.1 57.7 52.2 59.5 ...
## $ ENG : num 40.7 36.7 50.9 63.9 35.2 ...
## $ Math: num 41.3 44.9 51.7 46.1 51.3 ...
## $ Read: num 40.2 68.1 46.9 49.5 43.3 ...
## $ Sci : num 50.2 28.6 51.9 43.2 42.4 ...
## $ SEL : num 46.8 63 54 49.2 29.3 ...
summary(dat) # Quick Summary
## ID MOT ENG Math
## Min. : 1 Min. :11.66 Min. :-48.65 Min. :-0.261
## 1st Qu.: 32 1st Qu.:44.52 1st Qu.: 43.02 1st Qu.:44.419
## Median : 63 Median :50.87 Median : 49.26 Median :50.993
## Mean : 63 Mean :49.43 Mean : 47.04 Mean :48.610
## 3rd Qu.: 94 3rd Qu.:54.66 3rd Qu.: 55.21 3rd Qu.:55.548
## Max. :125 Max. :77.75 Max. : 75.85 Max. :72.214
## Read Sci SEL
## Min. :15.46 Min. : 2.713 Min. : 2.963
## 1st Qu.:44.99 1st Qu.:45.668 1st Qu.:44.759
## Median :50.99 Median :51.261 Median :51.778
## Mean :49.31 Mean :50.337 Mean :49.529
## 3rd Qu.:55.19 3rd Qu.:57.289 3rd Qu.:55.960
## Max. :68.81 Max. :71.354 Max. :80.390
dim(dat) # Data Dimensions
## [1] 125 7
names(dat) # Column names
## [1] "ID" "MOT" "ENG" "Math" "Read" "Sci" "SEL"
# Specify the path model
pm1 <- '
# Define the relationships in the path model
MOT ~ SEL
ENG ~ MOT
Math ~ MOT
Read ~ MOT
Sci ~ MOT
# Correlate all disturbances (just in case not default in lavaan)
ENG ~~ Math
ENG ~~ Read
ENG ~~ Sci
Math ~~ Read
Math ~~ Sci
Read ~~ Sci
'
# Fit the model
pm1f <- sem(pm1, data = dat)
# Summarize the results
summary(pm1f, standardized=TRUE)
## lavaan 0.6.16 ended normally after 53 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 16
##
## Number of observations 125
##
## Model Test User Model:
##
## Test statistic 7.626
## Degrees of freedom 4
## P-value (Chi-square) 0.106
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## MOT ~
## SEL 0.433 0.071 6.065 0.000 0.433 0.477
## ENG ~
## MOT 0.848 0.123 6.870 0.000 0.848 0.524
## Math ~
## MOT 0.279 0.108 2.577 0.010 0.279 0.225
## Read ~
## MOT 0.136 0.090 1.512 0.131 0.136 0.134
## Sci ~
## MOT 0.090 0.099 0.906 0.365 0.090 0.081
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .ENG ~~
## .Math 46.640 14.208 3.283 0.001 46.640 0.307
## .Read 17.153 11.422 1.502 0.133 17.153 0.136
## .Sci 24.616 12.631 1.949 0.051 24.616 0.177
## .Math ~~
## .Read 28.725 10.266 2.798 0.005 28.725 0.258
## .Sci 10.710 10.964 0.977 0.329 10.710 0.088
## .Read ~~
## .Sci 3.802 9.109 0.417 0.676 3.802 0.037
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .MOT 70.215 8.882 7.906 0.000 70.215 0.773
## .ENG 172.913 21.872 7.906 0.000 172.913 0.726
## .Math 133.341 16.867 7.906 0.000 133.341 0.950
## .Read 92.618 11.715 7.906 0.000 92.618 0.982
## .Sci 111.821 14.144 7.906 0.000 111.821 0.993
# Visualize the output
# All paths standarized
lavaanPlot(model = pm1f, node_options = list(shape = "box", fontname = "Helvetica"), edge_options = list(color = "grey"), coefs = TRUE, stand = TRUE)
The unstandardized value of the direct effect estimate of SEL on MOT is 0.433. It means that for every one unit increase in social emotional learning, there is a corresponding increase of 0.433 units in motivation.
The direct effect of SEL on MOT is significant at p <.05 level. The z-score is 6.065, nd the p-value is 0.000 (rounded).
The standardized value of the direct effect of SEL on MOT is 0.477. This means that for every one standard deviation in crease in social emotional learning, there is a corresponding 0.477 standard deviation increase in motivation.
The Mediation Model
# Specify the mediation model
pm2 <- '
# Define the relationships in the path model
MOT ~ d*SEL
ENG ~ p*SEL
Math ~ q*SEL
Read ~ r*SEL
Sci ~ s*SEL
ENG ~ u*MOT
Math ~ v*MOT
Read ~ w*MOT
Sci ~ x*MOT
# Specify the mediation effect
du := d * u
vd := d * v
wd := d * w
xd := d * x
'
# Fit the model
pm2f <- sem(pm2, data = dat)
# Summarize the results
summary(pm2f, standardized = TRUE)
## lavaan 0.6.16 ended normally after 84 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 20
##
## Number of observations 125
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## MOT ~
## SEL (d) 0.433 0.071 6.065 0.000 0.433 0.477
## ENG ~
## SEL (p) 0.296 0.125 2.369 0.018 0.296 0.201
## Math ~
## SEL (q) -0.040 0.112 -0.356 0.722 -0.040 -0.035
## Read ~
## SEL (r) 0.021 0.093 0.227 0.820 0.021 0.023
## Sci ~
## SEL (s) 0.125 0.102 1.223 0.221 0.125 0.123
## ENG ~
## MOT (u) 0.692 0.137 5.043 0.000 0.692 0.428
## Math ~
## MOT (v) 0.300 0.123 2.436 0.015 0.300 0.241
## Read ~
## MOT (w) 0.125 0.103 1.221 0.222 0.125 0.123
## Sci ~
## MOT (x) 0.024 0.112 0.218 0.827 0.024 0.022
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .ENG ~~
## .Math 47.650 13.947 3.417 0.001 47.650 0.321
## .Read 16.620 11.170 1.488 0.137 16.620 0.134
## .Sci 21.459 12.246 1.752 0.080 21.459 0.159
## .Math ~~
## .Read 28.801 10.262 2.807 0.005 28.801 0.259
## .Sci 11.125 10.897 1.021 0.307 11.125 0.092
## .Read ~~
## .Sci 3.572 9.052 0.395 0.693 3.572 0.035
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .MOT 70.215 8.882 7.906 0.000 70.215 0.773
## .ENG 165.482 20.932 7.906 0.000 165.482 0.695
## .Math 133.213 16.850 7.906 0.000 133.213 0.949
## .Read 92.582 11.711 7.906 0.000 92.582 0.982
## .Sci 110.492 13.976 7.906 0.000 110.492 0.982
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## du 0.300 0.077 3.878 0.000 0.300 0.204
## vd 0.130 0.058 2.260 0.024 0.130 0.115
## wd 0.054 0.045 1.197 0.231 0.054 0.059
## xd 0.011 0.049 0.218 0.827 0.011 0.010
Therefore, the interpretation of the unstandardized coefficients is as followed:
For every one unit increase in SEL, there is a corresponding 0.3 unit increase in English score through the medicating effect of MOT. The mediation effect is significant at p <.05 level.
For every one unit increase in SEL, there is a corresponding 0.13 unit increase in math score through the medicating effect of MOT. The mediation effect is significant at p <.05 level.
The interpretation for read and science follows the same logic. However, the results are not significant at p <.05 level.
The interpretation of the unstandardized coefficient of the indirect effect of SEL on ENG is: For every one unit increase in SEL, there is a corresponding 0.3 unit increase in English score through the mediation effect of motivation.
The interpretation of the standardized coefficient of the indirect effect of SEL on ENG is: For every one standard deviation increase in SEL, there is a corresponding 0.204 standard deviation increase in English score through the mediation effect of motivation.
According to the model output, the covariance between disturbances of Math and Read is significant at P < .05 level. The z-statistics is 2.807, the p-value estimate is 0.005 (rounded), which falls below the cut-off value we use here, which is p = .05.
This results mean that we found a significant relationship between the errors of ‘Math’ and ‘Read’, which suggests that there is something common that affects both subjects.
Put it in another way, this result tells us that ‘Math’ and ‘Read’ are not entirely independent, and there’s a common factor affecting them. So when we see changes in ‘Math’ scores, it’s likely to be associated with changes in ‘Read’ scores as well.
# Specify the mediation model with fixed covariances
pm3 <- '
# Define the relationships in the path model
MOT ~ d*SEL
ENG ~ p*SEL
Math ~ q*SEL
Read ~ r*SEL
Sci ~ s*SEL
ENG ~ u*MOT
Math ~ v*MOT
Read ~ w*MOT
Sci ~ x*MOT
# Specify the mediation effect
du := d * u
vd := d * v
wd := d * w
xd := d * x
# Fix the covariances between disturbances
ENG ~~ 0*Read
ENG ~~ 0*Sci
Math ~~ 0*Sci
Read ~~ 0*Sci
'
# Fit the model
pm3f <- sem(pm3, data = dat)
# Summarize the results
summary(pm3f, fit.measures=TRUE, standardized=TRUE)
## lavaan 0.6.16 ended normally after 29 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 16
##
## Number of observations 125
##
## Model Test User Model:
##
## Test statistic 5.703
## Degrees of freedom 4
## P-value (Chi-square) 0.222
##
## Model Test Baseline Model:
##
## Test statistic 115.103
## Degrees of freedom 15
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.983
## Tucker-Lewis Index (TLI) 0.936
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -2344.471
## Loglikelihood unrestricted model (H1) -2341.620
##
## Akaike (AIC) 4720.943
## Bayesian (BIC) 4766.196
## Sample-size adjusted Bayesian (SABIC) 4715.601
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.058
## 90 Percent confidence interval - lower 0.000
## 90 Percent confidence interval - upper 0.157
## P-value H_0: RMSEA <= 0.050 0.365
## P-value H_0: RMSEA >= 0.080 0.441
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.044
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Regressions:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## MOT ~
## SEL (d) 0.433 0.071 6.065 0.000 0.433 0.477
## ENG ~
## SEL (p) 0.296 0.125 2.369 0.018 0.296 0.201
## Math ~
## SEL (q) -0.040 0.111 -0.359 0.719 -0.040 -0.036
## Read ~
## SEL (r) 0.021 0.093 0.227 0.820 0.021 0.023
## Sci ~
## SEL (s) 0.125 0.102 1.223 0.221 0.125 0.123
## ENG ~
## MOT (u) 0.692 0.137 5.043 0.000 0.692 0.428
## Math ~
## MOT (v) 0.300 0.122 2.457 0.014 0.300 0.243
## Read ~
## MOT (w) 0.125 0.103 1.221 0.222 0.125 0.123
## Sci ~
## MOT (x) 0.024 0.112 0.218 0.827 0.024 0.022
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .ENG ~~
## .Read 0.000 0.000 0.000
## .Sci 0.000 0.000 0.000
## .Math ~~
## .Sci 0.000 0.000 0.000
## .Read ~~
## .Sci 0.000 0.000 0.000
## .ENG ~~
## .Math 43.260 13.406 3.227 0.001 43.260 0.294
## .Math ~~
## .Read 24.456 9.663 2.531 0.011 24.456 0.222
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .MOT 70.215 8.882 7.906 0.000 70.215 0.773
## .ENG 165.482 20.932 7.906 0.000 165.482 0.695
## .Math 130.917 16.489 7.940 0.000 130.917 0.948
## .Read 92.582 11.711 7.906 0.000 92.582 0.982
## .Sci 110.492 13.976 7.906 0.000 110.492 0.982
##
## Defined Parameters:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## du 0.300 0.077 3.878 0.000 0.300 0.204
## vd 0.130 0.057 2.277 0.023 0.130 0.116
## wd 0.054 0.045 1.197 0.231 0.054 0.059
## xd 0.011 0.049 0.218 0.827 0.011 0.010
# Visualize the output
# All paths standarized
lavaanPlot(model = pm3f, node_options = list(shape = "box", fontname = "Helvetica"), edge_options = list(color = "grey"), coefs = TRUE, stand = TRUE)
# Load Data
df <- read.csv("pathmissdat.csv")
#Take a quick look at the data
head(df) # Overview
## ID MOT ENG Math Read Sci SEL
## 1 1 50.91526 40.72218 41.34665 40.18896 50.20612 46.75275
## 2 2 43.13290 36.69724 44.91700 68.14945 28.63248 62.98196
## 3 3 57.71078 50.92025 51.67867 46.86790 51.89710 54.03885
## 4 4 52.15350 63.86352 46.09058 49.50678 43.20759 49.15102
## 5 5 59.45522 35.22761 51.29142 43.29821 42.42249 29.31352
## 6 6 50.06800 56.10331 41.11826 59.17902 48.12415 47.98657
str(df) # Data Structure
## 'data.frame': 125 obs. of 7 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ MOT : num 50.9 43.1 57.7 52.2 59.5 ...
## $ ENG : num 40.7 36.7 50.9 63.9 35.2 ...
## $ Math: num 41.3 44.9 51.7 46.1 51.3 ...
## $ Read: num 40.2 68.1 46.9 49.5 43.3 ...
## $ Sci : num 50.2 28.6 51.9 43.2 42.4 ...
## $ SEL : num 46.8 63 54 49.2 29.3 ...
summary(df) # Quick Summary
## ID MOT ENG Math
## Min. : 1 Min. :11.66 Min. :-48.65 Min. :-0.261
## 1st Qu.: 32 1st Qu.:44.52 1st Qu.: 43.02 1st Qu.:44.419
## Median : 63 Median :50.87 Median : 49.26 Median :50.993
## Mean : 63 Mean :49.43 Mean : 47.04 Mean :48.610
## 3rd Qu.: 94 3rd Qu.:54.66 3rd Qu.: 55.21 3rd Qu.:55.548
## Max. :125 Max. :77.75 Max. : 75.85 Max. :72.214
## Read Sci SEL
## Min. :15.46 Min. : 2.713 Min. :-999.00
## 1st Qu.:44.99 1st Qu.:45.668 1st Qu.: 37.57
## Median :50.99 Median :51.261 Median : 49.07
## Mean :49.31 Mean :50.337 Mean :-110.13
## 3rd Qu.:55.19 3rd Qu.:57.289 3rd Qu.: 55.19
## Max. :68.81 Max. :71.354 Max. : 71.38
dim(df) # Data Dimensions
## [1] 125 7
names(df) # Column names
## [1] "ID" "MOT" "ENG" "Math" "Read" "Sci" "SEL"
# Replace -999 with "NA" so R can recognize
df[] <- lapply(df, function(x) ifelse(x == -999, NA, x))
# Show missing cells numbers
sum(is.na(df))
## [1] 19
# For Missing Pattern Plot
library(VIM)
## Loading required package: colorspace
## Loading required package: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
# Visualize missing data
aggr(df, numbers = TRUE)
NOw, let’s refit the Q1 model with FIML:
# Create the CFA model
pm1fi <- '
# Define the relationships in the path model
MOT ~ SEL
ENG ~ MOT
Math ~ MOT
Read ~ MOT
Sci ~ MOT
# Correlate all disturbances (just in case not default in lavaan)
ENG ~~ Math
ENG ~~ Read
ENG ~~ Sci
Math ~~ Read
Math ~~ Sci
Read ~~ Sci
'
# Fit the model
pm1fif <- sem(pm1fi, data = df, missing = 'ml.x') # fit the model
# Summarize the results
summary(pm1fif, standardized = TRUE)
## lavaan 0.6.16 ended normally after 78 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 21
##
## Number of observations 125
## Number of missing patterns 2
##
## Model Test User Model:
##
## Test statistic 6.789
## Degrees of freedom 4
## P-value (Chi-square) 0.147
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Observed
## Observed information based on Hessian
##
## Regressions:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## MOT ~
## SEL 0.409 0.078 5.264 0.000 0.409 0.447
## ENG ~
## MOT 0.848 0.123 6.870 0.000 0.848 0.524
## Math ~
## MOT 0.279 0.108 2.577 0.010 0.279 0.225
## Read ~
## MOT 0.136 0.090 1.512 0.131 0.136 0.134
## Sci ~
## MOT 0.090 0.099 0.906 0.365 0.090 0.081
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .ENG ~~
## .Math 46.640 14.207 3.283 0.001 46.640 0.307
## .Read 17.153 11.422 1.502 0.133 17.153 0.136
## .Sci 24.616 12.631 1.949 0.051 24.616 0.177
## .Math ~~
## .Read 28.725 10.266 2.798 0.005 28.725 0.258
## .Sci 10.710 10.964 0.977 0.329 10.710 0.088
## .Read ~~
## .Sci 3.801 9.109 0.417 0.676 3.801 0.037
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .MOT 29.229 3.925 7.447 0.000 29.229 3.065
## .ENG 5.149 6.210 0.829 0.407 5.149 0.334
## .Math 34.811 5.454 6.383 0.000 34.811 2.938
## .Read 42.563 4.545 9.364 0.000 42.563 4.383
## .Sci 45.892 4.994 9.189 0.000 45.892 4.326
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .MOT 72.794 9.491 7.670 0.000 72.794 0.800
## .ENG 172.913 21.872 7.906 0.000 172.913 0.726
## .Math 133.342 16.866 7.906 0.000 133.342 0.950
## .Read 92.618 11.715 7.906 0.000 92.618 0.982
## .Sci 111.821 14.145 7.905 0.000 111.821 0.993