In the mediation and moderation Facebook group, a question was asked about latent interaction terms and problems with model fit statistics. I remember the package ‘semTools’ offered several methods for creating latent interaction terms, so I decided to throw this tutorial together to see if I could figure out what might be happening.
Load relevant packages (really only need ‘lavaan’ and ‘semTools’)
library(lavaan)
library(semTools)
library(mike) # a few functions I made for printing lavaan results
library(stargazer) # makes nice tables
Read in data. For this example, I slapped generic names on some data from my thesis.
data <- read.csv("thesis.data.csv",header=TRUE)
data <- data[,c(2:7,26:28,29:35)]
names(data) <- c('x1','x2','x3','z1','z2','z3','w1','w2','w3','y1','y2','y3','y4','y5','y6','y7')
cor <- cor(data)
cor[upper.tri(cor, diag=TRUE)] <- as.numeric("")
stargazer(cor[,1:15],
summary = FALSE,
type = "html",
digits = 2,
digits.extra = 0,
initial.zero = FALSE)
| x1 | x2 | x3 | z1 | z2 | z3 | w1 | w2 | w3 | y1 | y2 | y3 | y4 | y5 | y6 | |
| x1 | |||||||||||||||
| x2 | .50 | ||||||||||||||
| x3 | .57 | .38 | |||||||||||||
| z1 | -.35 | -.19 | -.29 | ||||||||||||
| z2 | -.33 | -.21 | -.30 | .66 | |||||||||||
| z3 | -.42 | -.21 | -.33 | .61 | .57 | ||||||||||
| w1 | .12 | .13 | .04 | -.15 | -.07 | -.09 | |||||||||
| w2 | .08 | .14 | .05 | -.18 | -.09 | -.13 | .59 | ||||||||
| w3 | .11 | .13 | .07 | -.11 | -.07 | -.08 | .51 | .60 | |||||||
| y1 | .03 | .07 | -.13 | -.16 | -.12 | -.09 | .32 | .37 | .37 | ||||||
| y2 | .16 | .09 | .04 | -.36 | -.32 | -.24 | .38 | .35 | .34 | .57 | |||||
| y3 | .19 | .13 | .09 | -.29 | -.24 | -.23 | .30 | .29 | .29 | .48 | .71 | ||||
| y4 | .19 | .08 | .02 | -.33 | -.29 | -.25 | .37 | .36 | .31 | .45 | .63 | .62 | |||
| y5 | .19 | .10 | .03 | -.19 | -.17 | -.15 | .31 | .28 | .24 | .41 | .43 | .40 | .43 | ||
| y6 | .11 | .11 | -0.00 | -.13 | -.12 | -.04 | .31 | .36 | .30 | .49 | .43 | .42 | .43 | .57 | |
| y7 | .15 | .06 | .04 | -.14 | -.03 | -.05 | .28 | .20 | .23 | .32 | .38 | .39 | .37 | .39 | .41 |
Specify a model without interaction term.
m1.syntax <-'
# exogeneous latent factors
x =~ x1 + x2 + x3
z =~ z1 + z2 + z3
w =~ w1 + w2 + w3
# endogenous latent factor
y =~ y1 + y2 + y3 + y4 + y5 + y6 + y7
# structural model
y ~ x + z + w
'
Fit the data - I’m using maximum likelihood with robust standard errors. And store fit statistics.
m1.fit <-sem(m1.syntax, data=data, std.lv=TRUE, estimator="mlr")
model1 <- semFitR(m1.fit)
Create interaction term - I decided to compare double mean centering vs. orthogonalizing.
data.dmc <- indProd(data,var1 = c('x1','x2','x3'), var2 = c('z1','z2','z3'), match=FALSE, meanC=TRUE, residualC=FALSE, doubleMC=TRUE)
data.orth <- orthogonalize(data, c('x1','x2','x3'), c('z1','z2','z3'))
Specify and fit double mean centered (dmc) model with restricted interaction effect.
m2.dmc.syntax <-'
# exogeneous latent factors
x =~ x1 + x2 + x3
z =~ z1 + z2 + z3
w =~ w1 + w2 + w3
# interaction latent factor - ***DOUBLE MEAN CENTERED***
xz =~ x1.z1 + x1.z2 + x1.z3 + x2.z1 + x2.z2 + x2.z3 + x3.z1 + x3.z2 + x3.z3
# endogenous latent factor
y =~ y1 + y2 + y3 + y4 + y5 + y6 + y7
# structural model
y ~ x + z + w + 0*xz
'
m2.dmc.fit <-sem(m2.dmc.syntax, data=data.dmc, std.lv=TRUE, estimator="mlr")
model2.dmc <- semFitR(m2.dmc.fit)
Specify and fit orthogonalized (orth) model with restricted interaction effect.
m2.orth.syntax <-'
# exogeneous latent factors
x =~ x1 + x2 + x3
z =~ z1 + z2 + z3
w =~ w1 + w2 + w3
# interaction latent factor - ***ORTHOGONALIZED***
xz =~ x1.z1 + x2.z2 + x3.z3
# endogenous latent factor
y =~ y1 + y2 + y3 + y4 + y5 + y6 + y7
# structural model
y ~ x + z + w + 0*xz
'
m2.orth.fit <-sem(m2.orth.syntax, data=data.orth, std.lv=TRUE, estimator="mlr")
model2.orth <- semFitR(m2.orth.fit)
Specify and fit double mean centered (dmc) model with freely estimated interaction effect.
m3.dmc.syntax <-'
# exogeneous latent factors
x =~ x1 + x2 + x3
z =~ z1 + z2 + z3
w =~ w1 + w2 + w3
# interaction latent factor - ***DOUBLE MEAN CENTERED***
xz =~ x1.z1 + x1.z2 + x1.z3 + x2.z1 + x2.z2 + x2.z3 + x3.z1 + x3.z2 + x3.z3
# endogenous latent factor
y =~ y1 + y2 + y3 + y4 + y5 + y6 + y7
# structural model
y ~ x + z + w + xz
'
m3.dmc.fit <-sem(m3.dmc.syntax, data=data.dmc, std.lv=TRUE, estimator="mlr")
model3.dmc <- semFitR(m3.dmc.fit)
Specify and fit orthogonalized (orth) model with freely estimated interaction effect.
m3.orth.syntax <-'
# exogeneous latent factors
x =~ x1 + x2 + x3
z =~ z1 + z2 + z3
w =~ w1 + w2 + w3
# interaction latent factor - ***ORTHOGONALIZED***
xz =~ x1.z1 + x2.z2 + x3.z3
# endogenous latent factor
y =~ y1 + y2 + y3 + y4 + y5 + y6 + y7
# structural model
y ~ x + z + w + xz
'
m3.orth.fit <-sem(m3.orth.syntax, data=data.orth, std.lv=TRUE, estimator="mlr")
model3.orth <- semFitR(m3.orth.fit)
For both double mean centering and orthogonalizing, model 2 is significantly worse than model 3.
anova(m2.dmc.fit, m3.dmc.fit)
## Scaled Chi Square Difference Test (method = "satorra.bentler.2001")
##
## Df AIC BIC Chisq Chisq diff Df diff Pr(>Chisq)
## m3.dmc.fit 265 28415 28766 1593.9
## m2.dmc.fit 266 28417 28763 1597.6 4.4172 1 0.03558 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m2.orth.fit, m3.orth.fit)
## Scaled Chi Square Difference Test (method = "satorra.bentler.2001")
##
## Df AIC BIC Chisq Chisq diff Df diff Pr(>Chisq)
## m3.orth.fit 142 21065 21341 375.16
## m2.orth.fit 143 21070 21342 381.90 5.9512 1 0.01471 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
But when both methods are compared using a table with fit indices from all models, it’s clear the double mean centering method produced much worse overall model fit.
table <- data.frame(rbind(model1, model2.dmc, model3.dmc, model2.orth, model3.orth))
names(table) <- c('chisq', 'df', 'p', 'rmsea', 'lower', 'upper', 'cfi', 'tli', 'srmr')
stargazer(table[,c(1:4,7:9)],
summary=FALSE,
type='html',
digits = 2,
digits.extra = 0,
initial.zero = FALSE,
title='Model Fit Statistics')
| chisq | df | p | rmsea | cfi | tli | srmr | |
| model1 | 282.19 | 98 | 0 | .06 | .92 | .91 | .05 |
| model2.dmc | 998.55 | 266 | 0 | .08 | .74 | .71 | .07 |
| model3.dmc | 994.45 | 265 | 0 | .08 | .74 | .71 | .07 |
| model2.orth | 316.98 | 143 | 0 | .05 | .92 | .91 | .05 |
| model3.orth | 311.26 | 142 | 0 | .05 | .93 | .91 | .05 |
I don’t know enough to explain why this is the case – or even why orthogonalizing is different from double mean centering (it could be that using only ‘matching’ terms would work), but I do know that Todd Little has written extensively about this.
And that’s it!