library(pacman); p_load(lavaan, psych)
# Experiment 1
lowerExp1 <- '
1
0.82 1
0.42 0.42 1
0.5 0.6 0.4 1
0.19 0.22 0.24 0.39 1
0.46 0.51 0.33 0.5 0.26 1'
lowerCon1 <- '
1
0.73 1
0.49 0.38 1
0.37 0.42 0.39 1
0.35 0.44 0.44 0.34 1
0.45 0.37 0.41 0.57 0.36 1'
Names1 = list("MatCon", "MatDis", "LPS2GK", "LPS2NumSeq", "LPS2MenRot", "LPS2Add")
Exp1Cor = getCov(lowerExp1, names = Names1)
Con1Cor = getCov(lowerCon1, names = Names1)
Exp1SDs <- c(4.39, 4.10, 7.86, 3.65, 6.69, 6)
Con1SDs <- c(5.48, 4.83, 9.66, 3.37, 7.54, 5.98)
Exp1Cov = lavaan::cor2cov(Exp1Cor, Exp1SDs)
Con1Cov = lavaan::cor2cov(Con1Cor, Con1SDs)
Exp1Means <- c(13.59, 14.16, 34.59, 20.59, 20.98, 14.46)
Con1Means <- c(7.21, 9.07, 35.91, 20.89, 22.95, 14.79)
ExpCon1N <- 56
# Experiment 2
lowerExp2 <- '
1
0.45 1
0.37 0.8 1
0.5 0.67 0.44 1
0.22 0.73 0.24 0.32 1'
lowerCon2 <- '
1
0.38 1
0.23 0.76 1
0.46 0.57 0.22 1
0.24 0.81 0.31 0.35 1'
Names2 = list("FigMat", "LPS2Sum", "LPS2GK", "LPS2NumSeq", "LPS2MenRot")
Exp2Cor = getCov(lowerExp2, names = Names2)
Con2Cor = getCov(lowerCon2, names = Names2)
Exp2SDs <- c(6.02, 15.79, 8.87, 4.1, 8.12)
Con2SDs <- c(7.46, 14.53, 7.77, 3.77, 8.03)
Exp2Cov = lavaan::cor2cov(Exp2Cor, Exp2SDs)
Con2Cov = lavaan::cor2cov(Con2Cor, Con2SDs)
Exp2Means <- c(17.63, 79.87, 36.04, 21.57, 22.26)
Con2Means <- c(9.43, 79.22, 35.73, 21.37, 22.12)
Exp2N <- 114
Con2N <- 115
# Consolidate
Exp1Covs <- list(Exp1Cov, Con1Cov)
Exp2Covs <- list(Exp2Cov, Con2Cov)
Exp1Means <- list(Exp1Means, Con1Means)
Exp2Means <- list(Exp2Means, Con2Means)
Exp1Ns <- list(ExpCon1N, ExpCon1N)
Exp2Ns <- list(Exp2N, Con2N)
# Fit Measures
FITM <- c("chisq", "df", "nPar", "cfi", "rmsea", "rmsea.ci.lower", "rmsea.ci.upper", "aic", "bic")
# Bias Metrics
SDI2.UDI2 = function(p, loading1, loading2, intercept1, intercept2, stdev2, fmean2, fsd2, nodewidth = 0.01, lowerLV = -5, upperLV = 5) {
LV = seq(lowerLV,upperLV,nodewidth)
DiffExpItem12 = matrix(NA,length(LV),p)
pdfLV2 = matrix(NA,length(LV),1)
SDI2numerator = matrix(NA,length(LV),p)
UDI2numerator = matrix(NA,length(LV),p)
SDI2 = matrix(NA,p,1)
UDI2 = matrix(NA,p,1)
for(j in 1:p){
for(k in 1:length(LV)){
DiffExpItem12[k,j] <- (intercept1[j]-intercept2[j]) + (loading1[j]-loading2[j])*LV[k]
pdfLV2[k] = dnorm(LV[k], mean=fmean2, sd=fsd2)
SDI2numerator[k,j] = DiffExpItem12[k,j]*pdfLV2[k]*nodewidth
UDI2numerator[k,j] = abs(SDI2numerator[k,j])
}
SDI2[j] <- sum(SDI2numerator[,j])/stdev2[j]
UDI2[j] <- sum(UDI2numerator[,j])/stdev2[j]
}
colnames(SDI2) = "SDI2"
colnames(UDI2) = "UDI2"
return(list(SDI2=round(SDI2,4), UDI2=round(UDI2,4)))}
SUDI <- function(SDI, UDI){
D <- ifelse(SDI < 0, -1, 1)
H = D * UDI
return(H)}
# Cohen's d Function
Cohensd <- function(M1, M2, SD1, SD2, N1 = 1, N2 = 1, rnd = 3){
SDP = sqrt((SD1^2 + SD2^2)/2)
SDPW = sqrt((((N1 - 1) * SD1^2) + ((N2 - 1) * SD2^2))/(N1 + N2))
d = (M2 - M1)/SDP
delta = (M2 - M1)/SD1
g = (M2 - M1)/SDPW
if (N1 & N2 <= 1) {cat(paste0("With group means of ", M1, " and ", M2," with SDs of ", SD1, " and ", SD2, ", Cohen's d is ", round(d, rnd), " Glass' Delta is ", round(delta, rnd), ". \n"))} else {cat(paste0("With group means of ", M1, " and ", M2," with SDs of ", SD1, " and ", SD2, " Cohen's d is ", round(d, rnd), " Glass' Delta is ", round(delta, rnd), " and Hedge's g is ", round(g, rnd), ". \n"))}}
Schneider et al. (2020) performed two experiments where they taught samples figural matrix rules and their test scores rose considerable. In both experimental and control groups, figural matrix performance was related to intelligence before and after the experiment, the scores were simply boosted by more than one Cohen’s d.
Experimental manipulations that impact scores are especially interesting for theory, since experiments that boost intelligence should be measurement invariant, but those which simply change measurement properties will bias a model’s loadings, intercepts, and/or residual variances. If a given intervention raises a score by changing the interpretation of an item heterogeneously, it should change the loadings, altering the scaling with respect to intelligence between control and intervention groups. If a given intervention raises a score homogeneously, it will shift intercepts up. In the former case but not always the latter, the residual variance should also change, since the intervention should result in additional forces acting on and differentiating within the intervention group. This last change could also be due to effects on the measurement reliability of items, but it is harder to detect than intercept changes for the same reasons that all mean changes require smaller samples to detect than variance differences. I’ve simulated this for variance ratios in general, here: https://rpubs.com/JLLJ/SDVR.
All that said, it could be useful to see if Schneider et al.’s intervention really altered intelligence or if it just caused measurement bias when it boosted scores. Presumably, the answer is not a change to intelligence, since the non-matrix test items were not altered.
Exp1Mod <- '
g =~ MatCon + MatDis + LPS2GK + LPS2NumSeq + LPS2MenRot + LPS2Add'
Exp1InitFit <- cfa(Exp1Mod,
sample.cov = Exp1Cov,
sample.mean = Exp1Means,
sample.nobs = ExpCon1N,
std.lv = T)
Con1InitFit <- cfa(Exp1Mod,
sample.cov = Con1Cov,
sample.mean = Con1Means,
sample.nobs = ExpCon1N,
std.lv = T)
fitMeasures(Exp1InitFit, FITM)
## chisq df npar cfi rmsea
## 12.518 9.000 18.000 0.971 0.084
## rmsea.ci.lower rmsea.ci.upper aic bic
## 0.000 0.184 1974.637 2011.093
fitMeasures(Con1InitFit, FITM)
## chisq df npar cfi rmsea
## 22.346 9.000 18.000 0.876 0.163
## rmsea.ci.lower rmsea.ci.upper aic bic
## 0.079 0.249 2066.590 2103.047
Exp2Mod <- '
g =~ FigMat + LPS2GK + LPS2NumSeq + LPS2MenRot'
Exp2InitFit <- cfa(Exp2Mod,
sample.cov = Exp2Cov,
sample.mean = Exp2Means,
sample.nobs = Exp2N,
std.lv = T)
Con2InitFit <- cfa(Exp2Mod,
sample.cov = Con2Cov,
sample.mean = Con2Means,
sample.nobs = Con2N,
std.lv = T)
fitMeasures(Exp2InitFit, FITM)
## chisq df npar cfi rmsea
## 0.340 2.000 12.000 1.000 0.000
## rmsea.ci.lower rmsea.ci.upper aic bic
## 0.000 0.104 2944.704 2977.539
fitMeasures(Con2InitFit, FITM)
## chisq df npar cfi rmsea
## 4.571 2.000 12.000 0.951 0.106
## rmsea.ci.lower rmsea.ci.upper aic bic
## 0.000 0.237 2989.309 3022.248
I cannot explain why the initial fit is worse for the non-experimental group in both cases, but it’s consistent with multigroup sampling error and small samples, so it shouldn’t be focused on.
Exp1Config <- cfa(Exp1Mod,
sample.cov = Exp1Covs,
sample.mean = Exp1Means,
sample.nobs = Exp1Ns,
std.lv = T)
Exp1Metric <- cfa(Exp1Mod,
sample.cov = Exp1Covs,
sample.mean = Exp1Means,
sample.nobs = Exp1Ns,
std.lv = F,
group.equal = "loadings")
Exp1Scalar <- cfa(Exp1Mod,
sample.cov = Exp1Covs,
sample.mean = Exp1Means,
sample.nobs = Exp1Ns,
std.lv = F,
group.equal = c("loadings", "intercepts"))
Exp1ScalarP <- cfa(Exp1Mod,
sample.cov = Exp1Covs,
sample.mean = Exp1Means,
sample.nobs = Exp1Ns,
std.lv = F,
group.equal = c("loadings", "intercepts"),
group.partial = c("MatCon ~ 1", "MatDis ~ 1"))
Exp1Strict <- cfa(Exp1Mod,
sample.cov = Exp1Covs,
sample.mean = Exp1Means,
sample.nobs = Exp1Ns,
std.lv = F,
group.equal = c("loadings", "intercepts", "residuals"),
group.partial = c("MatCon ~ 1", "MatDis ~ 1"))
Exp1StrictP <- cfa(Exp1Mod,
sample.cov = Exp1Covs,
sample.mean = Exp1Means,
sample.nobs = Exp1Ns,
std.lv = F,
group.equal = c("loadings", "intercepts", "residuals"),
group.partial = c("MatCon ~ 1", "MatDis ~ 1",
"MatDis ~~ MatDis"))
#MatCon ~~ MatCon does not make for an acceptable model, but when included with MatDis ~~ MatDis, it actually makes the model fit better than the configural one (!). Because I did not want to ovefit, I only did what was sufficient.
Exp1LVars <- cfa(Exp1Mod,
sample.cov = Exp1Covs,
sample.mean = Exp1Means,
sample.nobs = Exp1Ns,
std.lv = F,
group.equal = c("loadings", "intercepts", "residuals", "lv.variances"),
group.partial = c("MatCon ~ 1", "MatDis ~ 1",
"MatDis ~~ MatDis"))
Exp1Means <- cfa(Exp1Mod,
sample.cov = Exp1Covs,
sample.mean = Exp1Means,
sample.nobs = Exp1Ns,
std.lv = F,
group.equal = c("loadings", "intercepts", "residuals", "lv.variances", "means"),
group.partial = c("MatCon ~ 1", "MatDis ~ 1",
"MatDis ~~ MatDis"))
round(cbind(Configural = fitMeasures(Exp1Config, FITM),
Metric = fitMeasures(Exp1Metric, FITM),
Scalar = fitMeasures(Exp1Scalar, FITM),
"Partial Scalar" = fitMeasures(Exp1ScalarP, FITM),
Strict = fitMeasures(Exp1Strict, FITM),
"Partial Strict" = fitMeasures(Exp1StrictP, FITM),
Variances = fitMeasures(Exp1LVars, FITM),
Means = fitMeasures(Exp1Means, FITM)), 3)
## Configural Metric Scalar Partial Scalar Strict
## chisq 34.865 40.156 103.483 41.686 54.317
## df 18.000 23.000 28.000 26.000 32.000
## npar 36.000 31.000 26.000 28.000 22.000
## cfi 0.926 0.925 0.668 0.931 0.902
## rmsea 0.129 0.115 0.219 0.104 0.112
## rmsea.ci.lower 0.062 0.051 0.175 0.036 0.057
## rmsea.ci.upper 0.193 0.174 0.265 0.160 0.161
## aic 4041.227 4036.518 4089.846 4032.049 4032.680
## bic 4139.093 4120.792 4160.527 4108.167 4092.487
## Partial Strict Variances Means
## chisq 46.938 47.514 48.440
## df 31.000 32.000 33.000
## npar 23.000 22.000 21.000
## cfi 0.930 0.932 0.932
## rmsea 0.096 0.093 0.091
## rmsea.ci.lower 0.028 0.023 0.020
## rmsea.ci.upper 0.149 0.146 0.144
## aic 4027.301 4025.877 4024.802
## bic 4089.826 4085.684 4081.891
pchisq(40.156 - 34.865, 5, lower.tail = F) #Configural to Metric. Pass.
## [1] 0.3814078
pchisq(103.483 - 40.156, 5, lower.tail = F) #Metric to Scalar. Abysmal fail.
## [1] 2.490724e-12
pchisq(41.686 - 40.156, 3, lower.tail = F) #Metric to Partial Scalar. Pass.
## [1] 0.6753639
pchisq(54.317 - 41.686, 6, lower.tail = F) #Partial Scalar to Strict. Fail.
## [1] 0.04928463
pchisq(46.938 - 41.686, 5, lower.tail = F) #Partial Scalar to Partial Strict. Pass
## [1] 0.3859062
pchisq(47.514 - 46.938, 1, lower.tail = F) #Partial Strict to Latent Variances. Pass
## [1] 0.4478845
pchisq(48.440 - 47.514, 1, lower.tail = F) #Latent Variances to Means. Pass
## [1] 0.3359045
How much bias was detected? The control group will be the SD anchor.
summary(Exp1Means, stand = T, fit = T)
## lavaan 0.6.14 ended normally after 50 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 36
## Number of equality constraints 15
##
## Number of observations per group:
## Group 1 56
## Group 2 56
##
## Model Test User Model:
##
## Test statistic 48.440
## Degrees of freedom 33
## P-value (Chi-square) 0.041
## Test statistic for each group:
## Group 1 19.889
## Group 2 28.551
##
## Model Test Baseline Model:
##
## Test statistic 257.333
## Degrees of freedom 30
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.932
## Tucker-Lewis Index (TLI) 0.938
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -1991.401
## Loglikelihood unrestricted model (H1) -1967.181
##
## Akaike (AIC) 4024.802
## Bayesian (BIC) 4081.891
## Sample-size adjusted Bayesian (SABIC) 4015.523
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.091
## 90 Percent confidence interval - lower 0.020
## 90 Percent confidence interval - upper 0.144
## P-value H_0: RMSEA <= 0.050 0.123
## P-value H_0: RMSEA >= 0.080 0.658
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.121
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
##
## Group 1 [Group 1]:
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## g =~
## MatCon 1.000 4.211 0.854
## MatDis (.p2.) 0.932 0.088 10.535 0.000 3.924 0.938
## LPS2GK (.p3.) 1.113 0.192 5.812 0.000 4.689 0.536
## LPS2NmS (.p4.) 0.492 0.075 6.583 0.000 2.071 0.594
## LPS2MnR (.p5.) 0.684 0.162 4.221 0.000 2.880 0.404
## LPS2Add (.p6.) 0.799 0.129 6.211 0.000 3.364 0.567
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .MatCon 13.889 0.580 23.957 0.000 13.889 2.818
## .MatDis 14.439 0.477 30.261 0.000 14.439 3.451
## .LPS2GK (.16.) 35.250 0.827 42.624 0.000 35.250 4.028
## .LPS2NmS (.17.) 20.740 0.329 62.991 0.000 20.740 5.952
## .LPS2MnR (.18.) 21.965 0.674 32.593 0.000 21.965 3.080
## .LPS2Add (.19.) 14.625 0.561 26.063 0.000 14.625 2.463
## g 0.000 0.000 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .MatCon (.p7.) 6.565 1.456 4.508 0.000 6.565 0.270
## .MatDis 2.106 1.149 1.834 0.067 2.106 0.120
## .LPS2GK (.p9.) 54.615 7.682 7.110 0.000 54.615 0.713
## .LPS2NmS (.10.) 7.853 1.126 6.972 0.000 7.853 0.647
## .LPS2MnR (.11.) 42.574 5.829 7.305 0.000 42.574 0.837
## .LPS2Add (.12.) 23.948 3.400 7.043 0.000 23.948 0.679
## g (.13.) 17.736 3.336 5.317 0.000 1.000 1.000
##
##
## Group 2 [Group 2]:
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## g =~
## MatCon 1.000 4.211 0.854
## MatDis (.p2.) 0.932 0.088 10.535 0.000 3.924 0.814
## LPS2GK (.p3.) 1.113 0.192 5.812 0.000 4.689 0.536
## LPS2NmS (.p4.) 0.492 0.075 6.583 0.000 2.071 0.594
## LPS2MnR (.p5.) 0.684 0.162 4.221 0.000 2.880 0.404
## LPS2Add (.p6.) 0.799 0.129 6.211 0.000 3.364 0.567
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .MatCon 6.911 0.580 11.920 0.000 6.911 1.402
## .MatDis 8.791 0.575 15.296 0.000 8.791 1.823
## .LPS2GK (.16.) 35.250 0.827 42.624 0.000 35.250 4.028
## .LPS2NmS (.17.) 20.740 0.329 62.991 0.000 20.740 5.952
## .LPS2MnR (.18.) 21.965 0.674 32.593 0.000 21.965 3.080
## .LPS2Add (.19.) 14.625 0.561 26.063 0.000 14.625 2.463
## g 0.000 0.000 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .MatCon (.p7.) 6.565 1.456 4.508 0.000 6.565 0.270
## .MatDis 7.853 2.134 3.681 0.000 7.853 0.338
## .LPS2GK (.p9.) 54.615 7.682 7.110 0.000 54.615 0.713
## .LPS2NmS (.10.) 7.853 1.126 6.972 0.000 7.853 0.647
## .LPS2MnR (.11.) 42.574 5.829 7.305 0.000 42.574 0.837
## .LPS2Add (.12.) 23.948 3.400 7.043 0.000 23.948 0.679
## g (.13.) 17.736 3.336 5.317 0.000 1.000 1.000
Exp1p = 6
Exp1l1 = c(0, 0, 0, 0, 0, 0); Exp1l2 = c(0, 0, 0, 0, 0, 0) # No loading differences
Exp1i1 = c(6.911, 8.791, 0, 0, 0, 0); Exp1i2 = c(13.889, 14.439, 0, 0, 0, 0) # Matrix test intercept differences
Exp1sd = c(5.48, 4.83, 9.66, 3.37, 7.54, 5.98)
Exp1fm = 0
Exp1fsd = 1
Exp1ES <- SDI2.UDI2(Exp1p, Exp1l1, Exp1l2, Exp1i1, Exp1i2, Exp1sd, Exp1fm, Exp1fsd)
SUDI(Exp1ES$SDI2, Exp1ES$UDI2)
## SDI2
## [1,] -1.2734
## [2,] -1.1694
## [3,] 0.0000
## [4,] 0.0000
## [5,] 0.0000
## [6,] 0.0000
The bias induced in favor of the experimental group can be thought of like Hedge’s g because of where the summary statistics involved in its calculation comes from (i.e., the SEM). Plainly, the experimental group received a 1.27 and 1.17 g higher intercept than expected given their latent ability levels. This compares to the total score gaps of
Cohensd(13.59, 7.21, 4.39, 5.48, 56, 56)
## With group means of 13.59 and 7.21 with SDs of 4.39 and 5.48 Cohen's d is -1.285 Glass' Delta is -1.453 and Hedge's g is -1.297.
Cohensd(14.16, 9.07, 4.10, 4.83, 56, 56)
## With group means of 14.16 and 9.07 with SDs of 4.1 and 4.83 Cohen's d is -1.136 Glass' Delta is -1.241 and Hedge's g is -1.146.
Randomization probably worked for this experimental comparison. Great job, Schneider et al. (2020)!
Now onto the larger experiment.
Exp2Config <- cfa(Exp2Mod,
sample.cov = Exp2Covs,
sample.mean = Exp2Means,
sample.nobs = Exp2Ns,
std.lv = T)
Exp2Metric <- cfa(Exp2Mod,
sample.cov = Exp2Covs,
sample.mean = Exp2Means,
sample.nobs = Exp2Ns,
std.lv = F,
group.equal = "loadings")
Exp2Scalar <- cfa(Exp2Mod,
sample.cov = Exp2Covs,
sample.mean = Exp2Means,
sample.nobs = Exp2Ns,
std.lv = F,
group.equal = c("loadings", "intercepts"))
## Warning in lav_object_post_check(object): lavaan WARNING: some estimated ov
## variances are negative
Exp2ScalarP <- cfa(Exp2Mod,
sample.cov = Exp2Covs,
sample.mean = Exp2Means,
sample.nobs = Exp2Ns,
std.lv = F,
group.equal = c("loadings", "intercepts"),
group.partial = "FigMat ~ 1")
Exp2Strict <- cfa(Exp2Mod,
sample.cov = Exp2Covs,
sample.mean = Exp2Means,
sample.nobs = Exp2Ns,
std.lv = F,
group.equal = c("loadings", "intercepts", "residuals"),
group.partial = "FigMat ~ 1")
Exp2LVars <- cfa(Exp2Mod,
sample.cov = Exp2Covs,
sample.mean = Exp2Means,
sample.nobs = Exp2Ns,
std.lv = F,
group.equal = c("loadings", "intercepts", "residuals", "lv.variances"),
group.partial = "FigMat ~ 1")
Exp2Means <- cfa(Exp2Mod,
sample.cov = Exp2Covs,
sample.mean = Exp2Means,
sample.nobs = Exp2Ns,
std.lv = F,
group.equal = c("loadings", "intercepts", "residuals", "lv.variances", "means"),
group.partial = "FigMat ~ 1")
round(cbind(Configural = fitMeasures(Exp2Config, FITM),
Metric = fitMeasures(Exp2Metric, FITM),
Scalar = fitMeasures(Exp2Scalar, FITM),
"Partial Scalar" = fitmeasures(Exp2ScalarP, FITM),
Strict = fitMeasures(Exp2Strict, FITM),
Variances = fitMeasures(Exp2LVars, FITM),
Means = fitMeasures(Exp2Means, FITM)), 3)
## Configural Metric Scalar Partial Scalar Strict Variances
## chisq 4.911 9.619 61.008 9.632 17.320 17.610
## df 4.000 7.000 10.000 9.000 13.000 14.000
## npar 24.000 21.000 18.000 19.000 15.000 14.000
## cfi 0.993 0.979 0.584 0.995 0.965 0.971
## rmsea 0.045 0.057 0.211 0.025 0.054 0.047
## rmsea.ci.lower 0.000 0.000 0.162 0.000 0.000 0.000
## rmsea.ci.upper 0.154 0.137 0.263 0.110 0.114 0.108
## aic 5934.014 5932.722 5978.111 5928.735 5928.423 5926.713
## bic 6016.423 6004.830 6039.918 5993.976 5979.928 5974.786
## Means
## chisq 17.771
## df 15.000
## npar 13.000
## cfi 0.977
## rmsea 0.040
## rmsea.ci.lower 0.000
## rmsea.ci.upper 0.101
## aic 5924.874
## bic 5969.512
pchisq(9.619 - 4.911, 3, lower.tail = F) #Configural to Metric. Pass.
## [1] 0.1944707
This is a very dubious “pass” that I assume was down to low power. The CFI is bad, the RMSEA is borderline (but not significantly so), the AIC/BIC are both good, and the p-value is of course fine.
pchisq(61.008 - 9.619, 3, lower.tail = F) #Metric to Scalar. Abysmal fail.
## [1] 4.042153e-11
pchisq(9.632 - 9.619, 2, lower.tail = F) #Metric to Partial Scalar. Pass.
## [1] 0.9935211
pchisq(17.320 - 9.632, 4, lower.tail = F) #Partial Scalar to Strict. Pass.
## [1] 0.1036994
This is another very dubious “pass” that I also assume was down to low power. The CFI is horrible, the RMSEA is horrible (but not significantly so), the AIC/BIC are both good, and the p-value is of course fine.
pchisq(17.610 - 17.320, 1, lower.tail = F) #Partial Scalar to Latent Variances. Pass
## [1] 0.5902205
pchisq(17.771 - 17.610, 1, lower.tail = F) #Latent Variances to Means. Pass
## [1] 0.6882375
Now to bias
summary(Exp2Means, stand = T, fit = T)
## lavaan 0.6.14 ended normally after 55 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 24
## Number of equality constraints 11
##
## Number of observations per group:
## Group 1 114
## Group 2 115
##
## Model Test User Model:
##
## Test statistic 17.771
## Degrees of freedom 15
## P-value (Chi-square) 0.275
## Test statistic for each group:
## Group 1 7.321
## Group 2 10.450
##
## Model Test Baseline Model:
##
## Test statistic 134.619
## Degrees of freedom 12
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.977
## Tucker-Lewis Index (TLI) 0.982
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -2949.437
## Loglikelihood unrestricted model (H1) -2940.551
##
## Akaike (AIC) 5924.874
## Bayesian (BIC) 5969.512
## Sample-size adjusted Bayesian (SABIC) 5928.310
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.040
## 90 Percent confidence interval - lower 0.000
## 90 Percent confidence interval - upper 0.101
## P-value H_0: RMSEA <= 0.050 0.544
## P-value H_0: RMSEA >= 0.080 0.168
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.090
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
##
## Group 1 [Group 1]:
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## g =~
## FigMat 1.000 4.123 0.611
## LPS2GK (.p2.) 0.962 0.182 5.285 0.000 3.967 0.478
## LPS2NmS (.p3.) 0.719 0.124 5.801 0.000 2.964 0.756
## LPS2MnR (.p4.) 0.861 0.173 4.981 0.000 3.550 0.442
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .FigMat 17.542 0.593 29.601 0.000 17.542 2.598
## .LPS2GK (.11.) 35.884 0.549 65.420 0.000 35.884 4.323
## .LPS2NmS (.12.) 21.470 0.259 82.843 0.000 21.470 5.474
## .LPS2MnR (.13.) 22.190 0.531 41.766 0.000 22.190 2.760
## g 0.000 0.000 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .FigMat (.p5.) 28.600 3.798 7.529 0.000 28.600 0.627
## .LPS2GK (.p6.) 53.168 5.722 9.291 0.000 53.168 0.772
## .LPS2NmS (.p7.) 6.597 1.475 4.472 0.000 6.597 0.429
## .LPS2MnR (.p8.) 52.035 5.439 9.568 0.000 52.035 0.805
## g (.p9.) 16.995 4.278 3.973 0.000 1.000 1.000
##
##
## Group 2 [Group 2]:
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## g =~
## FigMat 1.000 4.123 0.611
## LPS2GK (.p2.) 0.962 0.182 5.285 0.000 3.967 0.478
## LPS2NmS (.p3.) 0.719 0.124 5.801 0.000 2.964 0.756
## LPS2MnR (.p4.) 0.861 0.173 4.981 0.000 3.550 0.442
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .FigMat 9.518 0.590 16.121 0.000 9.518 1.410
## .LPS2GK (.11.) 35.884 0.549 65.420 0.000 35.884 4.323
## .LPS2NmS (.12.) 21.470 0.259 82.843 0.000 21.470 5.474
## .LPS2MnR (.13.) 22.190 0.531 41.766 0.000 22.190 2.760
## g 0.000 0.000 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .FigMat (.p5.) 28.600 3.798 7.529 0.000 28.600 0.627
## .LPS2GK (.p6.) 53.168 5.722 9.291 0.000 53.168 0.772
## .LPS2NmS (.p7.) 6.597 1.475 4.472 0.000 6.597 0.429
## .LPS2MnR (.p8.) 52.035 5.439 9.568 0.000 52.035 0.805
## g (.p9.) 16.995 4.278 3.973 0.000 1.000 1.000
Exp2p = 4
Exp2l1 = c(0, 0, 0, 0); Exp2l2 = c(0, 0, 0, 0) # No loading differences
Exp2i1 = c(9.518, 0, 0, 0); Exp2i2 = c(17.542, 0, 0, 0) # Matrix test intercept differences
Exp2sd = c(7.46, 7.77, 3.77, 8.03)
Exp2fm = 0
Exp2fsd = 1
Exp2ES <- SDI2.UDI2(Exp2p, Exp2l1, Exp2l2, Exp2i1, Exp2i2, Exp2sd, Exp2fm, Exp2fsd)
SUDI(Exp2ES$SDI2, Exp2ES$UDI2)
## SDI2
## [1,] -1.0756
## [2,] 0.0000
## [3,] 0.0000
## [4,] 0.0000
Cohensd(17.63, 9.43, 6.02, 7.46, 114, 115)
## With group means of 17.63 and 9.43 with SDs of 6.02 and 7.46 Cohen's d is -1.21 Glass' Delta is -1.362 and Hedge's g is -1.214.
Again, almost all of the gap is explained by measurement bias. What a consistent finding.
Everything came out as expected if measurement invariance testing and bias quantification work: the experimental group became better at doing matrix tasks, but not other tasks, and the gains they showed in matrix tasks were basically entirely attributable to bias in the intercepts, with the residual gaps being small enough to be dismissable or attributable to little more than random variation and modest power.
The first but not the latter experiment’s model significantly supported differences in the residual variances, but the first model had more power for detecting said differences and the second model seemed to produce results consistent with their variance too. The significance testing simply failed to bear them out. Many factors influence statistical power, and this was a great example of how the number of indicators can matter more than the sample size at times.
This experiment provided good data for supplementing the paper detailed in https://rpubs.com/JLLJ/PMI22. That paper featured the use of a different questionnaire and the finding that bias was detected as expected. This experiment shows that result manipulation with a mostly or completely homogeneous effect in a sample also produces a bias that can be discovered as expected (i.e., in the intercepts) and quantified accurately.
Two related trials, one described clearly and the other much less so, also support that interventions or formatting differences are detectable by assessing measurement invariance. The first, and better-documented of the two, is Arendasy & Sommer (2013). This study showed used a higher-order model with the group factors Crystallized, Fluid, and Quantitative intelligence, showing that variations in the rules involved in figural matrices tasks resulted in measurement non-invariance in the loadings, intercepts, and residual variances of the figural matrices items alone.
[The] weak measurement invariance model exhibited a significantly worse model fit than the configural invariance model. To test whether the misfit can be attributed to differences in the Gf/g-saturation of the figural matrices test between the latent classes, we allowed the factor loading of the figural matrices test on Gf to be estimated separately for both latent classes. This partial weak measurement invariance model exhibited a good fit and fitted no worse than the configural invariance model. [… The] Gf/g-saturation of the figural matrices test was lower in the latent class assuemd to use response elimination more often to solve figural matrices items. […] Next, we restricted the intercepts and error variances of our cognitive ability tests to be equal across both latent classes. Unfortunately, these models failed to fit the data no worse than their less restrictive precursor. However, allowing the intercept and error variance of the figural matrices test to be estimated separately for the two latent classes resolved the problem.
Becker et al. (2016) did something similar and found essentially the same result in the sense of exclaiming, like Arendasy & Sommer (2013), that the prevention of response elimination strategies helps with the validities for figural matrices tests. Unlike Arendasy & Sommer (2013), it seemed their model involved correlated group factors with intelligence mistreated as if it were not a higher-order factor. They fit three models, with one of them left difficult to interpret.
The first model, which was hard to interpret, was one in which the matrices tests grouped as a factor and covaried with a working memory factor, and this one seemed to show evidence for up to strict invariance despite rules differences. Unfortunately, strict invariance is not tested in the correlated group factor model without testing the typically-structural test of the equality of latent covariances, since things like measurement error can reside there (and may have driven their results accordingly) if every other measurement parameter is constrained. As Millsap & Olivera-Aguilar (2012) implied, the correlated group factor model can turn some parameters from structural to measurement ones. The same interpretive issue is also true for higher-order factor loadings.
In the models in which the matrices factor was covaried with intelligence or both intelligence and working memory, mesaurement invairance was rejected at every level. Unfortunately, without their raw data, it’s uncertain what really went on here. Intelligence was obviously misidentified if it did not include as a factor both working memory and matrices. Ideally, their data could be reacquired and modeled properly. But in any case, their results seem to at least be majorly, if not totally, consistent with those or Arendasy & Sommer (2013) and the results presented here.
Dr. Becker has been contacted for his data and this post will be updated with results in the post-script if I receive it.
Schneider, B., Becker, N., Krieger, F., Spinath, F. M., & Sparfeldt, J. R. (2020). Teaching the underlying rules of figural matrices in a short video increases test scores. Intelligence, 82, 101473. https://doi.org/10.1016/j.intell.2020.101473
Arendasy, M. E., & Sommer, M. (2013). Reducing response elimination strategies enhances the construct validity of figural matrices. Intelligence, 41(4), 234–243. https://doi.org/10.1016/j.intell.2013.03.006
Becker, N., Schmitz, F., Falk, A. M., Feldbrügge, J., Recktenwald, D. R., Wilhelm, O., Preckel, F., & Spinath, F. M. (2016). Preventing Response Elimination Strategies Improves the Convergent Validity of Figural Matrices. Journal of Intelligence, 4(1), Article 1. https://doi.org/10.3390/jintelligence4010002
Millsap, R. E., & Olivera-Aguilar, M. (2012). Investigating measurement invariance using confirmatory factor analysis. In R. H. Hoyle (Ed.), Handbook of structural equation modeling (pp. 380-392). New York, NY: Guildford Press
sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] psych_2.2.9 lavaan_0.6-14 pacman_0.5.1
##
## loaded via a namespace (and not attached):
## [1] rstudioapi_0.14 knitr_1.42 MASS_7.3-58.1 mnormt_2.1.1
## [5] pbivnorm_0.6.0 lattice_0.20-45 R6_2.5.1 rlang_1.0.6
## [9] quadprog_1.5-8 fastmap_1.1.0 tools_4.2.2 parallel_4.2.2
## [13] grid_4.2.2 nlme_3.1-160 xfun_0.37 cli_3.6.0
## [17] jquerylib_0.1.4 htmltools_0.5.4 yaml_2.3.7 digest_0.6.31
## [21] sass_0.4.5 cachem_1.0.6 evaluate_0.20 rmarkdown_2.20
## [25] compiler_4.2.2 bslib_0.4.2 stats4_4.2.2 jsonlite_1.8.4