library(pacman); p_load(ggplot2, lavaan, PIAAC, kirkegaard, haven)
FITM <- c("chisq", "df", "nPar", "cfi", "rmsea", "rmsea.ci.lower", "rmsea.ci.upper", "aic", "bic")
usa <- read_sas("PIAACRSAS.sas7bdat")
usa <- usa[usa$RACETHN_5CAT %in% 1:4, ]
usa$race <- ''
usa$race[usa$RACETHN_5CAT == 1] <- 'Hispanic'
usa$race[usa$RACETHN_5CAT == 2] <- 'White'
usa$race[usa$RACETHN_5CAT == 3] <- 'Black'
usa$race[usa$RACETHN_5CAT == 4] <- 'AAPI'
SDI2.UDI2 = function(p, loading1, loading2, intercept1, intercept2, stdev2,
fmean2, fsd2, nodewidth = 0.01, lowerLV = -5, upperLV = 5) {
LV = seq(lowerLV,upperLV,nodewidth)
DiffExpItem12 = matrix(NA,length(LV),p)
pdfLV2 = matrix(NA,length(LV),1)
SDI2numerator = matrix(NA,length(LV),p)
UDI2numerator = matrix(NA,length(LV),p)
SDI2 = matrix(NA,p,1)
UDI2 = matrix(NA,p,1)
for(j in 1:p){
for(k in 1:length(LV)){
DiffExpItem12[k,j] <- (intercept1[j]-intercept2[j]) + (loading1[j]-loading2[j])*LV[k]
pdfLV2[k] = dnorm(LV[k], mean=fmean2, sd=fsd2)
SDI2numerator[k,j] = DiffExpItem12[k,j]*pdfLV2[k]*nodewidth
UDI2numerator[k,j] = abs(SDI2numerator[k,j])
}
SDI2[j] <- sum(SDI2numerator[,j])/stdev2[j]
UDI2[j] <- sum(UDI2numerator[,j])/stdev2[j]
}
colnames(SDI2) = "SDI2"
colnames(UDI2) = "UDI2"
return(list(SDI2=round(SDI2,4), UDI2=round(UDI2,4)))}
SUDI <- function(SDI, UDI){
D <- ifelse(SDI < 0, -1, 1)
H = D * UDI
return(H)}
MOES <- function(T1, T2, S1, S2) {
ES = (T1 - T2) / (S1^2 - S2^2)
return(ES)}
I was tasked with performing an MGCFA of the PIAAC sample. Because it is a fairly large sample but it has few indicators, I will use a p-value threshold of .001.
PIAACG <- '
g =~ PVPSL1 + PVLIT1 + PVNUM1'
PIAACFit <- cfa(PIAACG, std.lv = T, data = usa)
summary(PIAACFit, stand = T, fit = T)
## lavaan 0.6.16 ended normally after 18 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 6
##
## Used Total
## Number of observations 3983 4755
##
## Model Test User Model:
##
## Test statistic 0.000
## Degrees of freedom 0
##
## Model Test Baseline Model:
##
## Test statistic 10012.001
## Degrees of freedom 3
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 1.000
## Tucker-Lewis Index (TLI) 1.000
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -57699.769
## Loglikelihood unrestricted model (H1) -57699.769
##
## Akaike (AIC) 115411.539
## Bayesian (BIC) 115449.277
## Sample-size adjusted Bayesian (SABIC) 115430.212
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.000
## 90 Percent confidence interval - lower 0.000
## 90 Percent confidence interval - upper 0.000
## P-value H_0: RMSEA <= 0.050 NA
## P-value H_0: RMSEA >= 0.080 NA
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.000
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## g =~
## PVPSL1 37.272 0.563 66.167 0.000 37.272 0.853
## PVLIT1 42.507 0.529 80.282 0.000 42.507 0.961
## PVNUM1 45.190 0.632 71.471 0.000 45.190 0.897
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .PVPSL1 519.812 14.305 36.338 0.000 519.812 0.272
## .PVLIT1 148.877 11.304 13.170 0.000 148.877 0.076
## .PVNUM1 498.621 16.548 30.131 0.000 498.621 0.196
## g 1.000 1.000 1.000
PIAACG <- '
g =~ PVPSL1 + PVLIT1 + PVNUM1'
PIAACONFIG <- cfa(PIAACG, data = usa, std.lv = T, group = "race", orthogonal = F)
PIAACMETRIC <- cfa(PIAACG, data = usa, std.lv = F, group = "race", orthogonal = F,
group.equal = "loadings")
PIAACSCALAR <- cfa(PIAACG, data = usa, std.lv = F, group = "race", orthogonal = F,
group.equal = c("loadings", "intercepts"))
PIAACG <- '
g =~ PVPSL1 + PVLIT1 + PVNUM1
PVNUM1 ~ c(a, a, b, a)*1'
PIAACPSCALAR <- cfa(PIAACG, data = usa, std.lv = F, group = "race", orthogonal = F,
group.equal = c("loadings", "intercepts"))
PIAACSTRICT <- cfa(PIAACG, data = usa, std.lv = F, group = "race", orthogonal = F,
group.equal = c("loadings", "intercepts", "residuals"))
PIAACG <- '
g =~ PVPSL1 + PVLIT1 + PVNUM1
PVNUM1 ~ c(a, a, b, a)*1
PVNUM1 ~~ c(y, x, y, z)*PVNUM1'
PIAACPSTRICT <- cfa(PIAACG, data = usa, std.lv = F, group = "race", orthogonal = F,
group.equal = c("loadings", "intercepts", "residuals"))
PIAACLVs <- cfa(PIAACG, data = usa, std.lv = F, group = "race", orthogonal = F,
group.equal = c("loadings", "intercepts", "residuals", "lv.variances"))
PIACCMEANS <- cfa(PIAACG, data = usa, std.lv = F, group = "race", orthogonal = F,
group.equal = c("loadings", "intercepts", "residuals", "lv.variances", "means"))
round(cbind(CONFIGURAL = fitMeasures(PIAACONFIG, FITM),
METRIC = fitMeasures(PIAACMETRIC, FITM),
SCALAR = fitMeasures(PIAACSCALAR, FITM),
"Partial Scalar" = fitMeasures(PIAACPSCALAR, FITM),
STRICT = fitMeasures(PIAACSTRICT, FITM),
"Partial Strict" = fitMeasures(PIAACPSTRICT, FITM),
LVARS = fitMeasures(PIAACLVs, FITM),
MEANS = fitMeasures(PIACCMEANS, FITM)), 3)
## CONFIGURAL METRIC SCALAR Partial Scalar STRICT
## chisq 0.0 6.001 167.251 20.601 70.532
## df 0.0 6.000 12.000 11.000 20.000
## npar 36.0 30.000 24.000 25.000 16.000
## cfi 1.0 1.000 0.983 0.999 0.995
## rmsea 0.0 0.000 0.114 0.030 0.050
## rmsea.ci.lower 0.0 0.000 0.099 0.007 0.038
## rmsea.ci.upper 0.0 0.041 0.130 0.049 0.063
## aic 114809.5 114803.470 114952.719 114808.069 114840.000
## bic 115035.9 114992.163 115103.674 114965.314 114940.637
## Partial Strict LVARS MEANS
## chisq 39.869 41.504 438.311
## df 18.000 21.000 24.000
## npar 18.000 15.000 12.000
## cfi 0.998 0.998 0.956
## rmsea 0.035 0.031 0.132
## rmsea.ci.lower 0.020 0.017 0.121
## rmsea.ci.upper 0.050 0.045 0.143
## aic 114813.338 114808.972 115199.779
## bic 114926.554 114903.319 115275.256
#Metric vs Configural
1-pchisq(5.995, 6)
## [1] 0.4237504
#Partial Scalar vs Metric
1-pchisq(20.601-5.995, 5)
## [1] 0.01218542
#Partial Strict vs Partial Scalar
1-pchisq(39.869-20.601, 7)
## [1] 0.007387983
#Latent Means vs Latent Variances
1-pchisq(437.311-41.504, 3)
## [1] 0
summary(PIAACLVs, stand = T, fit = T)
## lavaan 0.6.16 ended normally after 177 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 39
## Number of equality constraints 24
##
## Number of observations per group: Used Total
## White 2903 3318
## Hispanic 382 557
## Black 497 643
## AAPI 201 237
##
## Model Test User Model:
##
## Test statistic 41.504
## Degrees of freedom 21
## P-value (Chi-square) 0.005
## Test statistic for each group:
## White 3.044
## Hispanic 8.978
## Black 16.814
## AAPI 12.668
##
## Model Test Baseline Model:
##
## Test statistic 9408.218
## Degrees of freedom 12
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.998
## Tucker-Lewis Index (TLI) 0.999
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -57389.486
## Loglikelihood unrestricted model (H1) -57368.734
##
## Akaike (AIC) 114808.972
## Bayesian (BIC) 114903.319
## Sample-size adjusted Bayesian (SABIC) 114855.656
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.031
## 90 Percent confidence interval - lower 0.017
## 90 Percent confidence interval - upper 0.045
## P-value H_0: RMSEA <= 0.050 0.988
## P-value H_0: RMSEA >= 0.080 0.000
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.024
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
##
## Group 1 [White]:
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## g =~
## PVPSL1 1.000 35.299 0.839
## PVLIT1 (.p2.) 1.144 0.014 83.978 0.000 40.367 0.958
## PVNUM1 (.p3.) 1.184 0.016 75.838 0.000 41.788 0.892
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .PVNUM1 (a) 274.558 0.861 318.977 0.000 274.558 5.862
## .PVPSL1 (.p9.) 285.625 0.757 377.490 0.000 285.625 6.790
## .PVLIT1 (.10.) 287.467 0.780 368.730 0.000 287.467 6.826
## g 0.000 0.000 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .PVNUM1 (y) 447.400 15.994 27.973 0.000 447.400 0.204
## .PVPSL1 (.p6.) 523.712 14.286 36.659 0.000 523.712 0.296
## .PVLIT1 (.p7.) 144.206 11.082 13.013 0.000 144.206 0.081
## g (.p8.) 1246.051 37.823 32.944 0.000 1.000 1.000
##
##
## Group 2 [Hispanic]:
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## g =~
## PVPSL1 1.000 35.299 0.839
## PVLIT1 (.p2.) 1.144 0.014 83.978 0.000 40.367 0.958
## PVNUM1 (.p3.) 1.184 0.016 75.838 0.000 41.788 0.863
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .PVNUM1 (a) 274.558 0.861 318.977 0.000 274.558 5.673
## .PVPSL1 (.p9.) 285.625 0.757 377.490 0.000 285.625 6.790
## .PVLIT1 (.10.) 287.467 0.780 368.730 0.000 287.467 6.826
## g -28.728 1.998 -14.381 0.000 -0.814 -0.814
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .PVNUM1 (x) 596.089 52.384 11.379 0.000 596.089 0.254
## .PVPSL1 (.p6.) 523.712 14.286 36.659 0.000 523.712 0.296
## .PVLIT1 (.p7.) 144.206 11.082 13.013 0.000 144.206 0.081
## g (.p8.) 1246.051 37.823 32.944 0.000 1.000 1.000
##
##
## Group 3 [Black]:
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## g =~
## PVPSL1 1.000 35.299 0.839
## PVLIT1 (.p2.) 1.144 0.014 83.978 0.000 40.367 0.958
## PVNUM1 (.p3.) 1.184 0.016 75.838 0.000 41.788 0.892
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .PVNUM1 (b) 259.678 1.385 187.434 0.000 259.678 5.544
## .PVPSL1 (.p9.) 285.625 0.757 377.490 0.000 285.625 6.790
## .PVLIT1 (.10.) 287.467 0.780 368.730 0.000 287.467 6.826
## g -28.622 1.797 -15.929 0.000 -0.811 -0.811
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .PVNUM1 (y) 447.400 15.994 27.973 0.000 447.400 0.204
## .PVPSL1 (.p6.) 523.712 14.286 36.659 0.000 523.712 0.296
## .PVLIT1 (.p7.) 144.206 11.082 13.013 0.000 144.206 0.081
## g (.p8.) 1246.051 37.823 32.944 0.000 1.000 1.000
##
##
## Group 4 [AAPI]:
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## g =~
## PVPSL1 1.000 35.299 0.839
## PVLIT1 (.p2.) 1.144 0.014 83.978 0.000 40.367 0.958
## PVNUM1 (.p3.) 1.184 0.016 75.838 0.000 41.788 0.831
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .PVNUM1 (a) 274.558 0.861 318.977 0.000 274.558 5.461
## .PVPSL1 (.p9.) 285.625 0.757 377.490 0.000 285.625 6.790
## .PVLIT1 (.10.) 287.467 0.780 368.730 0.000 287.467 6.826
## g -6.251 2.655 -2.355 0.019 -0.177 -0.177
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .PVNUM1 (z) 781.570 90.201 8.665 0.000 781.570 0.309
## .PVPSL1 (.p6.) 523.712 14.286 36.659 0.000 523.712 0.296
## .PVLIT1 (.p7.) 144.206 11.082 13.013 0.000 144.206 0.081
## g (.p8.) 1246.051 37.823 32.944 0.000 1.000 1.000
White people will be used as the baseline group, since they are the largest in this sample and are thus the most precisely estimated.
usa %>%
group_by(race) %>%
summarise(
SD_NUM = sd(PVNUM1, na.rm = TRUE),
MEAN_NUM = mean(PVNUM1, na.rm = TRUE),
SD_SL1 = sd(PVLIT1, na.rm = TRUE),
MEAN_SL1 = mean(PVLIT1, na.rm = TRUE),
SD_LIT1 = sd(PVPSL1, na.rm = TRUE),
MEAN_LIT1 = mean(PVPSL1, na.rm = TRUE))
Pp = 3
Pl1 = c(0, 0, 0); Pl2 = c(0, 0, 0)
Pi1 = c(259.678, 0, 0); Pi2 = c(274.558, 0, 0)
Psd = c(41.36568, 43.83578, 49.89643)
Pfm = -.811; Pfsd = 1
PES <- SDI2.UDI2(Pp, Pl1, Pl2, Pi1, Pi2, Psd, Pfm, Pfsd)
SUDI(PES$SDI2, PES$UDI2)
## SDI2
## [1,] -0.3597
## [2,] 0.0000
## [3,] 0.0000
The African American group was disadvantaged by roughly -.36 d in the PVNUM1 score compared to the AAPI, Hispanic, and White groups.
This compares the residual variances of the group of White people with the group of Hispanic people, and then it compares the group of White people to the group of Asian people.
MOES(447.400, 596.089, 49.89643, 58.51150)
## [1] 0.1592058
MOES(447.400, 781.570, 49.89643, 54.12603)
## [1] 0.7595232
16% of the differences in the variances of the Hispanic group and the White group be attributed to differences in the residual variances. For the Asian group, this percentage is 76%.
I am not sure why the PIAAC shows any bias at all, but it does. However, this bias is also not especially large. The score gap between Black and White participants in the PIAAC for PVNUM1 is 268.421-214.254 = 54.167 points, whereas the standard deviations for Black and White participants are 49.896 and 50.292 points, respectively. Splitting the difference and calling it a 50-point standard deviation for both groups, reducing the gap in PVNUM1 by .36 standard deviations brings it down to 36.167 points. For comparison, the differences in PVPSL1 and PVLIT1 are 31.711 and 36.217 points.
The biased composite score difference is .925 d, but the debiased composite score gap is .777 d. On the other hand, the difference in g factor scores is .811 g. Factor scores gaps for the other groups were somewhat strange. As a whole, the Hispanic group performed .814 g worse than the White group and the Asian and Pacific Islander group performed .177 g worse than the White group.
Gunn, H. J., Grimm, K. J., & Edwards, M. C. (2020). Evaluation of Six Effect Size Measures of Measurement Non-Invariance for Continuous Outcomes. Structural Equation Modeling: A Multidisciplinary Journal, 27(4), 503–514. https://doi.org/10.1080/10705511.2019.1689507