load("/Users/TARDIS/Dropbox/SEM/ICPSR_34877/DS0002/34877-0002-Data.rda")
# select only children for which we have both fall and spring participation in English
data <- subset(da34877.0002, as.numeric(ASMSTATPF)==2 & as.numeric(ASMSTATPS)==2)

# classroom level data
load("/Users/TARDIS/Dropbox/SEM/ICPSR_34877/DS0001/34877-0001-Data.rda")

# =====================================
# EDA on language measures
child_info <- subset(data, select=c("ASMTAGEPF", "ASMTAGEPS", "CHGENP", "CHMATEDNP", "CHINCOMNP", "POORP"))
child_info$POORP <- as.numeric(child_info$POORP) - 1
child_info$CHGENP <- as.numeric(child_info$CHGENP) - 1
fall_lit <- subset(data, select=c("LETTEREPF", "PPVREPF", "OWLSEPF",  "WJ21AEPF", "CSLANGPF", "EWTNMEPF"))
spring_lit <- subset(data, select=c("LETTEREPS", "PPVREPS", "OWLSEPS", "WJ21AEPS", "CSLANGPS", "EWTNMEPS"))

fall_comb <- fall_lit
spring_comb <- spring_lit
colnames(fall_comb) <- c("LETTEREP", "PPVREP",  "OWLSEP", "WJ21AEP", "CSLANGP", "EWTNMEP")
colnames(spring_comb) <- c("LETTEREP", "PPVREP", "OWLSEP", "WJ21AEP", "CSLANGP", "EWTNMEP")
fall_comb$time <- 0
spring_comb$time <- 1
comb_measures <- rbind(fall_comb, spring_comb)
comb_measures$time <- as.factor(comb_measures$time)

# histograms
require(reshape2)

## Loading required package: reshape2

melt <- melt(comb_measures)

## Using time as id variables

require(ggplot2)

## Loading required package: ggplot2

ggplot(melt,aes(x = value, fill=time)) + 
  facet_wrap(~variable,scales = "free") + 
  geom_histogram(position="identity", alpha=.5)

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-1

# EDA: 
# The distributions for PPVT and OWLS look good, CSLANGP looks okay (a little closer to uniform than normal). LETTER (how many written letters the child can correctly identify, min=0 max=26) appears to have a floor effect in the fall and a ceiling effect in the spring. WJ21AEP (rhyming) is positively skewed at both timepoints, but more so in the fall. 
# The only super problematic variable is EWTNM (what percent of his or her name the child can write legibly). There is a pronounced ceiling effect: The mode is 100% in both fall and spring (and it's even more pronounced in the spring than in the fall)
# I'll drop that variable since it's going to mess with my covariance matrix, which will throw off everything. 
fall_lit <- subset(data, select=c("LETTEREPF", "PPVREPF", "OWLSEPF",  "WJ21AEPF", "CSLANGPF"))
spring_lit <- subset(data, select=c("LETTEREPS", "PPVREPS", "OWLSEPS", "WJ21AEPS", "CSLANGPS"))

# regenerate data for histograms
comb_measures <- subset(comb_measures, select=c("LETTEREP", "PPVREP", "OWLSEP", "WJ21AEP", "CSLANGP", "time"))

# histgrams
melt <- melt(comb_measures)

## Using time as id variables

ggplot(melt,aes(x = value, fill=time)) + 
  facet_wrap(~variable,scales = "free_x") + 
  geom_histogram(position="identity", alpha=.5)

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

plot of chunk unnamed-chunk-2


# histograms for child info (SES, etc.)
melt <- melt(child_info, id.vars=NULL)
ggplot(melt,aes(x = value)) + 
  facet_wrap(~variable,scales = "free") + 
  geom_histogram()

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## Warning: position_stack requires constant width: output may be incorrect
## Warning: position_stack requires constant width: output may be incorrect

plot of chunk unnamed-chunk-2

# corr matrices
round(cor(fall_lit, use="pairwise.complete.obs"), 3)

##           LETTEREPF PPVREPF OWLSEPF WJ21AEPF CSLANGPF
## LETTEREPF     1.000   0.339   0.277    0.392    0.461
## PPVREPF       0.339   1.000   0.646    0.493    0.371
## OWLSEPF       0.277   0.646   1.000    0.497    0.354
## WJ21AEPF      0.392   0.493   0.497    1.000    0.356
## CSLANGPF      0.461   0.371   0.354    0.356    1.000

round(cor(spring_lit, use="pairwise.complete.obs"), 3)

##           LETTEREPS PPVREPS OWLSEPS WJ21AEPS CSLANGPS
## LETTEREPS     1.000   0.326   0.269    0.365    0.470
## PPVREPS       0.326   1.000   0.626    0.526    0.336
## OWLSEPS       0.269   0.626   1.000    0.484    0.342
## WJ21AEPS      0.365   0.526   0.484    1.000    0.377
## CSLANGPS      0.470   0.336   0.342    0.377    1.000

# the pattern looks very similar in fall and spring

# PCA
require(psych)

## Loading required package: psych
## 
## Attaching package: 'psych'
## 
## The following object is masked from 'package:ggplot2':
## 
##     %+%

# note you need to specify the number of factors to extract. I ran it first and got the scree plot, which helped me decide to extract 2.
fit <- principal(comb_measures[1:5], rotate="promax", nfactors=5) 

plot(fit$values,type="lines") # scree plot

## Warning: plot type 'lines' will be truncated to first character

abline(h=1,lty=2) # add a line at eigen value = 1

plot of chunk unnamed-chunk-4


fit <- principal(comb_measures[1:5], rotate="promax", nfactors=2)
summary(fit)

## 
## Factor analysis with Call: principal(r = comb_measures[1:5], nfactors = 2, rotate = "promax")
## 
## Test of the hypothesis that 2 factors are sufficient.
## The degrees of freedom for the model is 1  and the objective function was  0.49 
## The number of observations was  4238  with Chi Square =  2072  with prob <  0 
## 
##  With component correlations of 
##      PC1  PC2
## PC1 1.00 0.53
## PC2 0.53 1.00

loadings(fit)

## 
## Loadings:
##          PC1    PC2   
## LETTEREP         0.932
## PPVREP    0.860       
## OWLSEP    0.958 -0.149
## WJ21AEP   0.638  0.222
## CSLANGP          0.832
## 
##                  PC1   PC2
## SS loadings    2.074 1.634
## Proportion Var 0.415 0.327
## Cumulative Var 0.415 0.742

# =====================================
# CFA 
df <- cbind(fall_lit, spring_lit, child_info)

require(lavaan)

## Loading required package: lavaan
## This is lavaan 0.5-16
## lavaan is BETA software! Please report any bugs.

model <- '
fall_literacy =~ LETTEREPF + PPVREPF + OWLSEPF + WJ21AEPF + CSLANGPF
spring_literacy =~ LETTEREPS + PPVREPS + OWLSEPS + WJ21AEPS + CSLANGPS 
'

fit <- cfa(model, data=df)

## Warning: lavaan WARNING: covariance matrix of latent variables is not
## positive definite; use inspect(fit,"cov.lv") to investigate.

summary(fit, fit.measures=TRUE, standardized = TRUE)

## lavaan (0.5-16) converged normally after  77 iterations
## 
##                                                   Used       Total
##   Number of observations                          1715        2119
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic             3058.511
##   Degrees of freedom                                34
##   P-value (Chi-square)                           0.000
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic             9933.629
##   Degrees of freedom                                45
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.694
##   Tucker-Lewis Index (TLI)                       0.595
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -51098.595
##   Loglikelihood unrestricted model (H1)     -49569.339
## 
##   Number of free parameters                         21
##   Akaike (AIC)                              102239.190
##   Bayesian (BIC)                            102353.580
##   Sample-size adjusted Bayesian (BIC)       102286.865
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.228
##   90 Percent Confidence Interval          0.221  0.235
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.115
## 
## Parameter estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
##                    Estimate  Std.err  Z-value  P(>|z|)   Std.lv  Std.all
## Latent variables:
##   fall_literacy =~
##     LETTEREPF         1.000                               4.752    0.526
##     PPVREPF           2.941    0.130   22.646    0.000   13.975    0.789
##     OWLSEPF           1.991    0.091   21.946    0.000    9.460    0.742
##     WJ21AEPF          0.430    0.021   20.174    0.000    2.043    0.639
##     CSLANGPF          0.095    0.005   17.427    0.000    0.450    0.511
##   spring_literacy =~
##     LETTEREPS         1.000                               4.822    0.514
##     PPVREPS           2.830    0.128   22.083    0.000   13.646    0.785
##     OWLSEPS           1.883    0.088   21.293    0.000    9.079    0.729
##     WJ21AEPS          0.549    0.027   20.066    0.000    2.645    0.653
##     CSLANGPS          0.101    0.006   17.372    0.000    0.489    0.518
## 
## Covariances:
##   fall_literacy ~~
##     spring_litrcy    24.975    1.639   15.241    0.000    1.090    1.090
## 
## Variances:
##     LETTEREPF        59.130    2.054                     59.130    0.724
##     PPVREPF         118.211    4.600                    118.211    0.377
##     OWLSEPF          72.931    2.706                     72.931    0.449
##     WJ21AEPF          6.035    0.214                      6.035    0.591
##     CSLANGPF          0.572    0.020                      0.572    0.739
##     LETTEREPS        64.603    2.240                     64.603    0.735
##     PPVREPS         116.244    4.494                    116.244    0.384
##     OWLSEPS          72.678    2.670                     72.678    0.469
##     WJ21AEPS          9.405    0.334                      9.405    0.573
##     CSLANGPS          0.650    0.023                      0.650    0.731
##     fall_literacy    22.581    1.963                      1.000    1.000
##     spring_litrcy    23.254    2.070                      1.000    1.000

# The argument standardized=TRUE augments the output with standardized parameter values. Two extra columns of standardized parameter values are printed. In the first column (labeled Std.lv), only the latent variables are standardized. In the second column (labeled Std.all), both latent and observed variables are standardized. The latter is often called the 'completely standardized solution'.
# The fit is pretty bad (RMSEA=.228, CFI=.694). Check mod indices to see where the problem might be.

varTable(fit)

##         name idx nobs    type exo user   mean     var nlev lnam
## 1  LETTEREPF   1 2116 numeric   0    0  8.699  81.520    0     
## 2    PPVREPF   2 2118 numeric   0    0 52.441 332.503    0     
## 3    OWLSEPF   3 2109 numeric   0    0 91.590 170.417    0     
## 4   WJ21AEPF   4 2117 numeric   0    0  2.261  10.427    0     
## 5   CSLANGPF   5 1992 numeric   0    0  2.361   0.786    0     
## 6  LETTEREPS   6 2119 numeric   0    0 14.252  87.813    0     
## 7    PPVREPS   7 2118 numeric   0    0 62.060 315.679    0     
## 8    OWLSEPS   8 2118 numeric   0    0 94.402 162.442    0     
## 9   WJ21AEPS   9 2119 numeric   0    0  3.764  16.357    0     
## 10  CSLANGPS  10 1809 numeric   0    0  3.088   0.900    0

inspect(fit,"cov.lv")

##                 fll_lt sprng_
## fall_literacy   22.58        
## spring_literacy 24.98  23.25


mi <- modindices(fit)
# show modification indices, sorting by modification index in decending order
subset(mi[with(mi, order(-mi)), ], mi>4)

##                lhs op       rhs      mi     epc sepc.lv sepc.all sepc.nox
## 1        LETTEREPF ~~ LETTEREPS 892.078  46.326  46.326    0.547    0.547
## 2          OWLSEPF ~~   OWLSEPS 388.621  42.539  42.539    0.268    0.268
## 3          PPVREPF ~~   PPVREPS 368.752  75.750  75.750    0.246    0.246
## 4         CSLANGPF ~~  CSLANGPS 336.934   0.281   0.281    0.338    0.338
## 5         WJ21AEPF ~~  WJ21AEPS 219.913   2.974   2.974    0.230    0.230
## 6        LETTEREPF ~~  CSLANGPF 146.497   1.735   1.735    0.218    0.218
## 7        LETTEREPS ~~  CSLANGPS 129.611   1.817   1.817    0.206    0.206
## 8        LETTEREPF ~~   OWLSEPS 125.862 -19.458 -19.458   -0.173   -0.173
## 9        LETTEREPF ~~  CSLANGPS 123.207   1.728   1.728    0.203    0.203
## 10         OWLSEPF ~~ LETTEREPS 110.533 -19.181 -19.181   -0.161   -0.161
## 11        CSLANGPF ~~   PPVREPS  87.118  -2.066  -2.066   -0.135   -0.135
## 12       LETTEREPF ~~   PPVREPS  85.045 -20.856 -20.856   -0.133   -0.133
## 13       LETTEREPF ~~   OWLSEPF  78.884 -14.925 -14.925   -0.130   -0.130
## 14         PPVREPF ~~  WJ21AEPS  63.684  -7.783  -7.783   -0.109   -0.109
## 15         PPVREPF ~~  CSLANGPS  58.369  -1.833  -1.833   -0.110   -0.110
## 16         PPVREPF ~~ LETTEREPS  51.227 -17.093 -17.093   -0.103   -0.103
## 17       LETTEREPS ~~   OWLSEPS  48.508 -12.127 -12.127   -0.104   -0.104
## 18        WJ21AEPF ~~   PPVREPS  47.313  -5.246  -5.246   -0.094   -0.094
## 19         OWLSEPF ~~  CSLANGPS  46.616  -1.251  -1.251   -0.104   -0.104
## 20         OWLSEPF ~~  WJ21AEPS  46.232  -5.001  -5.001   -0.097   -0.097
## 21        CSLANGPF ~~ LETTEREPS  45.105   1.023   1.023    0.124    0.124
## 22        WJ21AEPF ~~   OWLSEPS  38.365  -3.582  -3.582   -0.090   -0.090
## 23         PPVREPS ~~  CSLANGPS  36.149  -1.358  -1.358   -0.083   -0.083
## 24         PPVREPF ~~   OWLSEPF  34.433  16.334  16.334    0.072    0.072
## 25        CSLANGPF ~~   OWLSEPS  31.412  -0.952  -0.952   -0.087   -0.087
## 26       LETTEREPF ~~  WJ21AEPF  28.024   2.499   2.499    0.087    0.087
## 27         PPVREPS ~~   OWLSEPS  24.228  13.292  13.292    0.061    0.061
## 28       LETTEREPS ~~   PPVREPS  24.124 -11.048 -11.048   -0.068   -0.068
## 29       LETTEREPF ~~   PPVREPF  22.447 -10.345 -10.345   -0.065   -0.065
## 30   fall_literacy =~  WJ21AEPS  12.431   0.231   1.097    0.271    0.271
## 31         PPVREPF ~~   OWLSEPS  11.761  -9.934  -9.934   -0.045   -0.045
## 32       LETTEREPS ~~  WJ21AEPS   9.420   1.891   1.891    0.050    0.050
## 33         PPVREPF ~~  WJ21AEPF   8.908  -2.177  -2.177   -0.038   -0.038
## 34         OWLSEPS ~~  CSLANGPS   7.953  -0.493  -0.493   -0.042   -0.042
## 35         OWLSEPF ~~   PPVREPS   7.651  -8.023  -8.023   -0.036   -0.036
## 36         PPVREPF ~~  CSLANGPF   7.247  -0.576  -0.576   -0.037   -0.037
## 37        WJ21AEPS ~~  CSLANGPS   6.077   0.152   0.152    0.040    0.040
## 38        CSLANGPF ~~  WJ21AEPS   5.540  -0.140  -0.140   -0.039   -0.039
## 39        WJ21AEPF ~~  CSLANGPF   5.225   0.106   0.106    0.038    0.038
## 40         OWLSEPF ~~  CSLANGPF   5.147  -0.374  -0.374   -0.033   -0.033
## 41 spring_literacy =~ LETTEREPF   4.872  -0.301  -1.453   -0.161   -0.161
## 42 spring_literacy =~  CSLANGPF   4.669   0.029   0.138    0.157    0.157
## 43   fall_literacy =~ LETTEREPS   4.570  -0.309  -1.466   -0.156   -0.156

# The biggest mi's are all covariances between indicators from the same method (e.g. the Woodcock-Johnson measures, PPVT fall and spring, etc.)
# Add method factors: WJ, PPVT, OWLS, LETTER, Teacher Report (CSLANGP)

# =====================================
# Adding Method Factors
model <- '
# measurement model
fall_literacy =~ LETTEREPF + PPVREPF + OWLSEPF + WJ21AEPF + CSLANGPF
spring_literacy =~ LETTEREPS + PPVREPS + OWLSEPS + WJ21AEPS + CSLANGPS 

# method factors
WJ =~ WJ21AEPF + WJ21AEPS
PPVT =~ PPVREPF + PPVREPS
OWLS =~ OWLSEPF + OWLSEPS
LETTER =~ LETTEREPF + LETTEREPS
Teacher =~ CSLANGPF + CSLANGPS
'
fit <- sem(model, data=df)

## Warning: lavaan WARNING: could not compute standard errors!
## 
## Warning: lavaan WARNING: some estimated variances are negative
## Warning: lavaan WARNING: covariance matrix of latent variables is not positive definite; use inspect(fit,"cov.lv") to investigate.
## Warning: lavaan WARNING: residual covariance matrix is not positive definite; use inspect(fit,"cov.ov") to investigate.

# It won't run.

# =====================================
# Correlated errors by method instead
model <- '
# measurement model
fall_literacy =~ LETTEREPF + PPVREPF + OWLSEPF + WJ21AEPF + CSLANGPF
spring_literacy =~ LETTEREPS + PPVREPS + OWLSEPS + WJ21AEPS + CSLANGPS 

# correlated errors
WJ21AEPF ~~ WJ21AEPS
PPVREPF ~~ PPVREPS
OWLSEPF ~~ OWLSEPS
LETTEREPF ~~ LETTEREPS
CSLANGPF ~~ CSLANGPS
'
fit <- sem(model, data=df)
summary(fit, fit.measures=TRUE, standardized = TRUE)

## lavaan (0.5-16) converged normally after 133 iterations
## 
##                                                   Used       Total
##   Number of observations                          1715        2119
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic              423.081
##   Degrees of freedom                                29
##   P-value (Chi-square)                           0.000
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic             9933.629
##   Degrees of freedom                                45
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.960
##   Tucker-Lewis Index (TLI)                       0.938
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -49780.879
##   Loglikelihood unrestricted model (H1)     -49569.339
## 
##   Number of free parameters                         26
##   Akaike (AIC)                               99613.759
##   Bayesian (BIC)                             99755.385
##   Sample-size adjusted Bayesian (BIC)        99672.786
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.089
##   90 Percent Confidence Interval          0.082  0.097
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.066
## 
## Parameter estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
##                    Estimate  Std.err  Z-value  P(>|z|)   Std.lv  Std.all
## Latent variables:
##   fall_literacy =~
##     LETTEREPF         1.000                               4.634    0.512
##     PPVREPF           2.983    0.150   19.838    0.000   13.825    0.781
##     OWLSEPF           2.046    0.105   19.506    0.000    9.482    0.744
##     WJ21AEPF          0.451    0.025   18.297    0.000    2.090    0.653
##     CSLANGPF          0.099    0.006   16.007    0.000    0.458    0.520
##   spring_literacy =~
##     LETTEREPS         1.000                               4.671    0.498
##     PPVREPS           2.901    0.151   19.239    0.000   13.553    0.780
##     OWLSEPS           1.936    0.103   18.740    0.000    9.041    0.726
##     WJ21AEPS          0.589    0.032   18.184    0.000    2.750    0.679
##     CSLANGPS          0.104    0.007   15.601    0.000    0.487    0.516
## 
## Covariances:
##   WJ21AEPF ~~
##     WJ21AEPS          2.995    0.232   12.900    0.000    2.995    0.416
##   PPVREPF ~~
##     PPVREPS          63.557    5.173   12.286    0.000   63.557    0.530
##   OWLSEPF ~~
##     OWLSEPS          38.787    2.754   14.082    0.000   38.787    0.532
##   LETTEREPF ~~
##     LETTEREPS        46.096    2.031   22.699    0.000   46.096    0.729
##   CSLANGPF ~~
##     CSLANGPS          0.290    0.018   16.383    0.000    0.290    0.477
##   fall_literacy ~~
##     spring_litrcy    20.912    1.910   10.949    0.000    0.966    0.966
## 
## Variances:
##     LETTEREPF        60.412    2.219                     60.412    0.738
##     PPVREPF         121.879    6.279                    121.879    0.389
##     OWLSEPF          72.512    3.362                     72.512    0.446
##     WJ21AEPF          5.858    0.238                      5.858    0.573
##     CSLANGPF          0.564    0.021                      0.564    0.729
##     LETTEREPS        66.237    2.421                     66.237    0.752
##     PPVREPS         118.048    6.078                    118.048    0.391
##     OWLSEPS          73.269    3.300                     73.269    0.473
##     WJ21AEPS          8.851    0.369                      8.851    0.539
##     CSLANGPS          0.654    0.024                      0.654    0.734
##     fall_literacy    21.474    2.043                      1.000    1.000
##     spring_litrcy    21.818    2.146                      1.000    1.000

# The argument standardized=TRUE augments the output with standardized parameter values. Two extra columns of standardized parameter values are printed. In the first column (labeled Std.lv), only the latent variables are standardized. In the second column (labeled Std.all), both latent and observed variables are standardized. The latter is often called the 'completely standardized solution'.
# Way better fit! Still not ideal, but good (RMSEA=.089, CFI=.960).

mi <- modindices(fit)
# show modification indices, sorting by modification index in decending order (and only showing mi's > 4)
subset(mi[with(mi, order(-mi)), ], mi>4)

##                lhs op       rhs     mi    epc sepc.lv sepc.all sepc.nox
## 1        LETTEREPF ~~  CSLANGPF 43.459  0.589   0.589    0.074    0.074
## 2        LETTEREPS ~~  CSLANGPS 20.951  0.459   0.459    0.052    0.052
## 3        LETTEREPF ~~  WJ21AEPF 19.226  1.344   1.344    0.046    0.046
## 4        LETTEREPF ~~   OWLSEPF 14.687 -4.059  -4.059   -0.035   -0.035
## 5          PPVREPS ~~  CSLANGPS 14.682 -0.700  -0.700   -0.043   -0.043
## 6          PPVREPF ~~  WJ21AEPF 12.693 -2.176  -2.176   -0.038   -0.038
## 7          PPVREPF ~~   OWLSEPF  5.954  5.480   5.480    0.024    0.024
## 8  spring_literacy =~   PPVREPF  5.619  1.630   7.613    0.430    0.430
## 9          OWLSEPF ~~   PPVREPS  5.040  4.768   4.768    0.022    0.022
## 10        CSLANGPF ~~ LETTEREPS  4.925 -0.206  -0.206   -0.025   -0.025
## 11       LETTEREPF ~~  CSLANGPS  4.493  0.203   0.203    0.024    0.024

# Interestingly, the biggest mi is now a covariance between how many letters the child can identify and teacher rating of literacy, which makes sense now that I think about it, but adding that covariance would be pretty posthoc. And it could easily be a fluke. Either way, I don't want to add a covariance there for fear of over-fitting.

# =====================================
# Measurement invariance in fall vs. spring?

# rearranging the dataset so time is represented as a factor
f <- subset(data, select=c("ICPSR_STUDY_ID", "LETTEREPF", "PPVREPF", "OWLSEPF",  "WJ21AEPF", "CSLANGPF"))
s <- subset(data, select=c("ICPSR_STUDY_ID", "LETTEREPS", "PPVREPS", "OWLSEPS", "WJ21AEPS", "CSLANGPS")) 
child <- subset(data, select=c("ICPSR_STUDY_ID", "ASMTAGEPF", "ASMTAGEPS", "CHGENP", "CHMATEDNP", "CHINCOMNP", "POORP"))
child$POORP <- as.numeric(child$POORP) - 1
child$CHGENP <- as.numeric(child$CHGENP) - 1
fall_comb <- f
spring_comb <- s
colnames(fall_comb) <- c("ICPSR_STUDY_ID", "LETTEREP", "PPVREP", "OWLSEP", "WJ21AEP", "CSLANGP")
colnames(spring_comb) <- c("ICPSR_STUDY_ID", "LETTEREP", "PPVREP", "OWLSEP", "WJ21AEP", "CSLANGP")
fall_comb$time <- 0
spring_comb$time <- 1
comb_measures <- rbind(fall_comb, spring_comb)
comb_measures$time <- as.factor(comb_measures$time)
inv_df <- merge(comb_measures, child)


model <- '
literacy =~ LETTEREP + PPVREP + OWLSEP + WJ21AEP + CSLANGP
'
fit <- cfa(model, data=inv_df, group="time")
summary(fit, fit.measures=TRUE, standardized = TRUE)

## lavaan (0.5-16) converged normally after 101 iterations
## 
##                                                   Used       Total
##   Number of observations per group         
##   0                                               1980        2119
##   1                                               1807        2119
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic              542.173
##   Degrees of freedom                                10
##   P-value (Chi-square)                           0.000
## 
## Chi-square for each group:
## 
##   0                                            282.717
##   1                                            259.456
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic             5334.161
##   Degrees of freedom                                20
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.900
##   Tucker-Lewis Index (TLI)                       0.800
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -57928.749
##   Loglikelihood unrestricted model (H1)     -57657.662
## 
##   Number of free parameters                         30
##   Akaike (AIC)                              115917.498
##   Bayesian (BIC)                            116104.678
##   Sample-size adjusted Bayesian (BIC)       116009.352
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.168
##   90 Percent Confidence Interval          0.156  0.180
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.058
## 
## Parameter estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
## Group 1 [0]:
## 
##                    Estimate  Std.err  Z-value  P(>|z|)   Std.lv  Std.all
## Latent variables:
##   literacy =~
##     LETTEREP          1.000                               4.293    0.475
##     PPVREP            3.268    0.171   19.094    0.000   14.028    0.778
##     OWLSEP            2.291    0.121   18.990    0.000    9.834    0.761
##     WJ21AEP           0.493    0.027   17.968    0.000    2.115    0.656
##     CSLANGP           0.107    0.007   15.910    0.000    0.458    0.518
## 
## Intercepts:
##     LETTEREP          8.698    0.203   42.867    0.000    8.698    0.963
##     PPVREP           52.647    0.405  129.946    0.000   52.647    2.920
##     OWLSEP           91.700    0.290  315.673    0.000   91.700    7.094
##     WJ21AEP           2.267    0.072   31.275    0.000    2.267    0.703
##     CSLANGP           2.367    0.020  119.024    0.000    2.367    2.675
##     literacy          0.000                               0.000    0.000
## 
## Variances:
##     LETTEREP         63.098    2.153                     63.098    0.774
##     PPVREP          128.221    6.575                    128.221    0.395
##     OWLSEP           70.366    3.404                     70.366    0.421
##     WJ21AEP           5.930    0.230                      5.930    0.570
##     CSLANGP           0.573    0.020                      0.573    0.731
##     literacy         18.431    1.819                      1.000    1.000
## 
## 
## 
## Group 2 [1]:
## 
##                    Estimate  Std.err  Z-value  P(>|z|)   Std.lv  Std.all
## Latent variables:
##   literacy =~
##     LETTEREP          1.000                               4.642    0.494
##     PPVREP            2.973    0.156   19.031    0.000   13.804    0.786
##     OWLSEP            1.996    0.107   18.661    0.000    9.267    0.735
##     WJ21AEP           0.597    0.033   18.172    0.000    2.771    0.688
##     CSLANGP           0.104    0.007   15.444    0.000    0.482    0.509
## 
## Intercepts:
##     LETTEREP         14.115    0.221   63.892    0.000   14.115    1.503
##     PPVREP           61.782    0.413  149.447    0.000   61.782    3.516
##     OWLSEP           94.467    0.297  318.346    0.000   94.467    7.489
##     WJ21AEP           3.752    0.095   39.570    0.000    3.752    0.931
##     CSLANGP           3.088    0.022  138.379    0.000    3.088    3.255
##     literacy          0.000                               0.000    0.000
## 
## Variances:
##     LETTEREP         66.635    2.398                     66.635    0.756
##     PPVREP          118.274    6.470                    118.274    0.383
##     OWLSEP           73.247    3.425                     73.247    0.460
##     WJ21AEP           8.561    0.364                      8.561    0.527
##     CSLANGP           0.667    0.024                      0.667    0.741
##     literacy         21.551    2.128                      1.000    1.000

# pretty bad fit (RMSEA=.168, CFI=.900)

require(semTools)

## Loading required package: semTools
## Loading required package: MASS
##  
## ###############################################################################
## This is semTools 0.4-0
## All users of R (or SEM) are invited to submit functions or ideas for functions.
## ###############################################################################
## 
## Attaching package: 'semTools'
## 
## The following object is masked from 'package:psych':
## 
##     skew

measurementInvariance(model, data=inv_df, group="time")

## 
## Measurement invariance tests:
## 
## Model 1: configural invariance:
##     chisq        df    pvalue       cfi     rmsea       bic 
## 5.422e+02 1.000e+01 0.000e+00 9.000e-01 1.680e-01 1.161e+05 
## 
## Model 2: weak invariance (equal loadings):
##     chisq        df    pvalue       cfi     rmsea       bic 
## 5.785e+02 1.400e+01 0.000e+00 8.940e-01 1.460e-01 1.161e+05 
## 
## [Model 1 versus model 2]
##   delta.chisq      delta.df delta.p.value     delta.cfi 
##        36.345         4.000         0.000         0.006 
## 
## Model 3: strong invariance (equal loadings + intercepts):
##     chisq        df    pvalue       cfi     rmsea       bic 
## 9.961e+02 1.800e+01 0.000e+00 8.160e-01 1.690e-01 1.165e+05 
## 
## [Model 1 versus model 3]
##   delta.chisq      delta.df delta.p.value     delta.cfi 
##       453.881         8.000         0.000         0.084 
## 
## [Model 2 versus model 3]
##   delta.chisq      delta.df delta.p.value     delta.cfi 
##       417.536         4.000         0.000         0.078 
## 
## Model 4: equal loadings + intercepts + means:
##     chisq        df    pvalue       cfi     rmsea       bic 
## 1.343e+03 1.900e+01 0.000e+00 7.510e-01 1.920e-01 1.168e+05 
## 
## [Model 1 versus model 4]
##   delta.chisq      delta.df delta.p.value     delta.cfi 
##       800.893         9.000         0.000         0.149 
## 
## [Model 3 versus model 4]
##   delta.chisq      delta.df delta.p.value     delta.cfi 
##       347.012         1.000         0.000         0.065

# Looks like I can't really assume invariant loadings...

# =====================================
# SEM, adding child's age at fall assessment as a predictor of change
# see http://davidakenny.net/cm/ar.htm#CH Models of Change: Standard Model, "two-wave autoregressive model"
model <- '
# measurement model
fall_literacy =~ LETTEREPF + PPVREPF + OWLSEPF + WJ21AEPF + CSLANGPF
spring_literacy =~ LETTEREPS + PPVREPS + OWLSEPS + WJ21AEPS + CSLANGPS 

# error covariances
WJ21AEPF ~~ WJ21AEPS
PPVREPF ~~ PPVREPS
OWLSEPF ~~ OWLSEPS
LETTEREPF ~~ LETTEREPS
CSLANGPF ~~ CSLANGPS

# structural model
spring_literacy ~ fall_literacy + ASMTAGEPF
fall_literacy ~~ ASMTAGEPF
'
fit <- sem(model, data=df)

## Warning: lavaan WARNING: some observed variances are (at least) a factor 1000 times larger than others; use varTable(fit) to investigate
## Warning: lavaan WARNING: model syntax contains variance/covariance/intercept formulas
##   involving (an) exogenous variable(s): [ASMTAGEPF];
##   Please use fixed.x=FALSE or leave them alone

summary(fit, fit.measures=TRUE, standardized = TRUE)

## lavaan (0.5-16) converged normally after 155 iterations
## 
##                                                   Used       Total
##   Number of observations                          1715        2119
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic              761.490
##   Degrees of freedom                                37
##   P-value (Chi-square)                           0.000
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic            10367.869
##   Degrees of freedom                                55
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.930
##   Tucker-Lewis Index (TLI)                       0.896
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -50169.956
##   Loglikelihood unrestricted model (H1)     -49789.211
## 
##   Number of free parameters                         29
##   Akaike (AIC)                              100397.912
##   Bayesian (BIC)                            100555.879
##   Sample-size adjusted Bayesian (BIC)       100463.750
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.107
##   90 Percent Confidence Interval          0.100  0.114
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.072
## 
## Parameter estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
##                    Estimate  Std.err  Z-value  P(>|z|)   Std.lv  Std.all
## Latent variables:
##   fall_literacy =~
##     LETTEREPF         1.000                               4.696    0.519
##     PPVREPF           2.980    0.147   20.251    0.000   13.994    0.791
##     OWLSEPF           1.969    0.100   19.597    0.000    9.248    0.725
##     WJ21AEPF          0.446    0.024   18.572    0.000    2.093    0.655
##     CSLANGPF          0.098    0.006   16.255    0.000    0.461    0.524
##   spring_literacy =~
##     LETTEREPS         1.000                               4.723    0.503
##     PPVREPS           2.897    0.148   19.524    0.000   13.685    0.787
##     OWLSEPS           1.870    0.100   18.790    0.000    8.833    0.710
##     WJ21AEPS          0.584    0.032   18.416    0.000    2.760    0.681
##     CSLANGPS          0.104    0.007   15.815    0.000    0.491    0.520
## 
## Regressions:
##   spring_literacy ~
##     fall_literacy     0.976    0.034   28.931    0.000    0.970    0.970
##     ASMTAGEPF        -0.236    0.203   -1.166    0.244   -0.050   -0.016
## 
## Covariances:
##   WJ21AEPF ~~
##     WJ21AEPS          2.963    0.231   12.828    0.000    2.963    0.413
##   PPVREPF ~~
##     PPVREPS          59.533    5.101   11.671    0.000   59.533    0.514
##   OWLSEPF ~~
##     OWLSEPS          42.828    2.807   15.258    0.000   42.828    0.556
##   LETTEREPF ~~
##     LETTEREPS        45.603    2.016   22.625    0.000   45.603    0.727
##   CSLANGPF ~~
##     CSLANGPS          0.286    0.018   16.274    0.000    0.286    0.474
##   fall_literacy ~~
##     ASMTAGEPF         0.386    0.043    8.873    0.000    0.386    0.386
## 
## Variances:
##     LETTEREPF        59.811    2.200                     59.811    0.731
##     PPVREPF         116.889    6.176                    116.889    0.374
##     OWLSEPF          77.310    3.424                     77.310    0.475
##     WJ21AEPF          5.842    0.237                      5.842    0.572
##     CSLANGPF          0.561    0.021                      0.561    0.726
##     LETTEREPS        65.806    2.409                     65.806    0.747
##     PPVREPS         114.734    6.041                    114.734    0.380
##     OWLSEPS          76.733    3.341                     76.733    0.496
##     WJ21AEPS          8.799    0.367                      8.799    0.536
##     CSLANGPS          0.650    0.024                      0.650    0.730
##     ASMTAGEPF         0.097    0.003                      0.097    1.000
##     fall_literacy    22.055    2.062                      1.000    1.000
##     spring_litrcy     1.481    0.262                      0.066    0.066


mi <- modindices(fit)
# show modification indices, sorting by modification index in decending order
subset(mi[with(mi, order(-mi)), ], mi>4)

##              lhs op       rhs     mi    epc sepc.lv sepc.all sepc.nox
## 1      LETTEREPF ~~  CSLANGPF 42.475  0.580   0.580    0.073    0.073
## 2      LETTEREPS ~~  CSLANGPS 20.918  0.458   0.458    0.052    0.052
## 3      LETTEREPF ~~  WJ21AEPF 18.117  1.302   1.302    0.045    0.045
## 4        PPVREPS ~~  CSLANGPS 16.641 -0.747  -0.747   -0.046   -0.046
## 5        PPVREPF ~~  WJ21AEPF 16.133 -2.453  -2.453   -0.043   -0.043
## 6      LETTEREPF ~~   OWLSEPF 12.031 -3.665  -3.665   -0.032   -0.032
## 7        PPVREPF ~~   OWLSEPF  7.694  6.166   6.166    0.027    0.027
## 8  fall_literacy =~   PPVREPS  6.289  1.671   7.848    0.452    0.452
## 9      LETTEREPF ~~   PPVREPF  5.719 -3.385  -3.385   -0.021   -0.021
## 10       OWLSEPF ~~   PPVREPS  4.904  4.684   4.684    0.021    0.021
## 11      CSLANGPF ~~ LETTEREPS  4.860 -0.205  -0.205   -0.025   -0.025
## 12     LETTEREPF ~~  CSLANGPS  4.285  0.197   0.197    0.023    0.023
## 13       PPVREPF ~~   OWLSEPS  4.260  4.347   4.347    0.020    0.020

# =====================================
# Set loadings equal in fall and spring?
model <- '
# measurement model
fall_literacy =~ LETTEREPF + ppvt*PPVREPF + owls*OWLSEPF + rhyme*WJ21AEPF + teacher*CSLANGPF
spring_literacy =~ LETTEREPS + ppvt*PPVREPS + owls*OWLSEPS + rhyme*WJ21AEPS + teacher*CSLANGPS 

# error covariances
WJ21AEPF ~~ WJ21AEPS
PPVREPF ~~ PPVREPS
OWLSEPF ~~ OWLSEPS
LETTEREPF ~~ LETTEREPS
CSLANGPF ~~ CSLANGPS 

# structural model
spring_literacy ~ fall_literacy + ASMTAGEPF
fall_literacy ~~ ASMTAGEPF
'
fit <- sem(model, data=df)

## Warning: lavaan WARNING: some observed variances are (at least) a factor 1000 times larger than others; use varTable(fit) to investigate
## Warning: lavaan WARNING: model syntax contains variance/covariance/intercept formulas
##   involving (an) exogenous variable(s): [ASMTAGEPF];
##   Please use fixed.x=FALSE or leave them alone

summary(fit, fit.measures=TRUE, standardized = TRUE)

## lavaan (0.5-16) converged normally after 143 iterations
## 
##                                                   Used       Total
##   Number of observations                          1715        2119
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic              831.747
##   Degrees of freedom                                41
##   P-value (Chi-square)                           0.000
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic            10367.869
##   Degrees of freedom                                55
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.923
##   Tucker-Lewis Index (TLI)                       0.897
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -50205.084
##   Loglikelihood unrestricted model (H1)     -49789.211
## 
##   Number of free parameters                         25
##   Akaike (AIC)                              100460.168
##   Bayesian (BIC)                            100596.347
##   Sample-size adjusted Bayesian (BIC)       100516.925
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.106
##   90 Percent Confidence Interval          0.100  0.112
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.079
## 
## Parameter estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
##                    Estimate  Std.err  Z-value  P(>|z|)   Std.lv  Std.all
## Latent variables:
##   fall_literacy =~
##     LETTER            1.000                               4.664    0.517
##     PPVREP (ppvt)     2.918    0.135   21.578    0.000   13.611    0.778
##     OWLSEP (owls)     1.912    0.091   20.909    0.000    8.918    0.709
##     WJ21AE (rhym)     0.490    0.024   20.366    0.000    2.285    0.692
##     CSLANG (tchr)     0.101    0.006   17.911    0.000    0.469    0.531
##   spring_literacy =~
##     LETTER            1.000                               4.779    0.507
##     PPVREP (ppvt)     2.918    0.135   21.578    0.000   13.948    0.795
##     OWLSEP (owls)     1.912    0.091   20.909    0.000    9.138    0.725
##     WJ21AE (rhym)     0.490    0.024   20.366    0.000    2.342    0.608
##     CSLANG (tchr)     0.101    0.006   17.911    0.000    0.480    0.511
## 
## Regressions:
##   spring_literacy ~
##     fall_literacy     0.995    0.016   62.170    0.000    0.971    0.971
##     ASMTAGEPF        -0.286    0.206   -1.389    0.165   -0.060   -0.019
## 
## Covariances:
##   WJ21AEPF ~~
##     WJ21AEPS          2.951    0.231   12.791    0.000    2.951    0.404
##   PPVREPF ~~
##     PPVREPS          60.361    5.150   11.720    0.000   60.361    0.517
##   OWLSEPF ~~
##     OWLSEPS          42.528    2.817   15.099    0.000   42.528    0.553
##   LETTEREPF ~~
##     LETTEREPS        45.591    2.018   22.589    0.000   45.591    0.727
##   CSLANGPF ~~
##     CSLANGPS          0.286    0.018   16.242    0.000    0.286    0.473
##   fall_literacy ~~
##     ASMTAGEPF         0.382    0.043    8.895    0.000    0.382    0.382
## 
## Variances:
##     LETTEREPF        59.725    2.195                     59.725    0.733
##     PPVREPF         120.713    6.126                    120.713    0.395
##     OWLSEPF          78.545    3.392                     78.545    0.497
##     WJ21AEPF          5.686    0.239                      5.686    0.521
##     CSLANGPF          0.559    0.021                      0.559    0.718
##     LETTEREPS        65.852    2.413                     65.852    0.742
##     PPVREPS         113.050    6.074                    113.050    0.368
##     OWLSEPS          75.315    3.352                     75.315    0.474
##     WJ21AEPS          9.365    0.363                      9.365    0.631
##     CSLANGPS          0.653    0.024                      0.653    0.739
##     ASMTAGEPF         0.097    0.003                      0.097    1.000
##     fall_literacy    21.750    1.957                      1.000    1.000
##     spring_litrcy     1.505    0.266                      0.066    0.066


mi <- modindices(fit)
# show modification indices, sorting by modification index in decending order
subset(mi[with(mi, order(-mi)), ], mi>4)

##                lhs op       rhs     mi    epc sepc.lv sepc.all sepc.nox
## 1    fall_literacy =~  WJ21AEPF 64.249 -0.074  -0.347   -0.105   -0.105
## 2  spring_literacy =~  WJ21AEPS 64.249  0.074   0.355    0.092    0.092
## 3    fall_literacy =~  WJ21AEPS 52.839  0.126   0.587    0.152    0.152
## 4        LETTEREPF ~~  CSLANGPF 41.249  0.568   0.568    0.071    0.071
## 5  spring_literacy =~  WJ21AEPF 38.950 -0.087  -0.414   -0.125   -0.125
## 6          PPVREPF ~~  WJ21AEPF 23.085 -2.934  -2.934   -0.051   -0.051
## 7        LETTEREPS ~~  CSLANGPS 21.151  0.460   0.460    0.052    0.052
## 8          PPVREPS ~~  CSLANGPS 17.011 -0.744  -0.744   -0.045   -0.045
## 9        LETTEREPF ~~  WJ21AEPF 15.530  1.210   1.210    0.041    0.041
## 10         PPVREPF ~~   OWLSEPF 13.108  7.609   7.609    0.035    0.035
## 11   fall_literacy =~   OWLSEPF 10.835  0.092   0.430    0.034    0.034
## 12 spring_literacy =~   OWLSEPS 10.835 -0.092  -0.440   -0.035   -0.035
## 13 spring_literacy =~   OWLSEPF  9.961  0.154   0.734    0.058    0.058
## 14       LETTEREPF ~~   OWLSEPF  9.889 -3.278  -3.278   -0.029   -0.029
## 15   fall_literacy =~   OWLSEPS  9.757 -0.156  -0.727   -0.058   -0.058
## 16 spring_literacy =~   PPVREPS  6.669 -0.102  -0.487   -0.028   -0.028
## 17   fall_literacy =~   PPVREPF  6.669  0.102   0.475    0.027    0.027
## 18 spring_literacy =~   PPVREPF  6.126  0.169   0.806    0.046    0.046
## 19        CSLANGPF ~~ LETTEREPS  4.940 -0.206  -0.206   -0.025   -0.025
## 20        WJ21AEPS ~~  CSLANGPS  4.803  0.111   0.111    0.031    0.031
## 21         OWLSEPF ~~   PPVREPS  4.507  4.411   4.411    0.020    0.020
## 22       LETTEREPF ~~  CSLANGPS  4.382  0.199   0.199    0.023    0.023
## 23   fall_literacy =~   PPVREPS  4.102 -0.143  -0.665   -0.038   -0.038
## 24       LETTEREPF ~~   PPVREPF  4.037 -2.785  -2.785   -0.018   -0.018

# =====================================
# SEM, with SES (mother education) as a predictor of change
# see http://davidakenny.net/cm/ar.htm#CH Models of Change: Standard Model, "two-wave autoregressive model"
model <- '
# measurement model
fall_literacy =~ LETTEREPF + PPVREPF + OWLSEPF + WJ21AEPF + CSLANGPF
spring_literacy =~ LETTEREPS + PPVREPS + OWLSEPS + WJ21AEPS + CSLANGPS

# error covariances
WJ21AEPF ~~ WJ21AEPS
PPVREPF ~~ PPVREPS
OWLSEPF ~~ OWLSEPS
LETTEREPF ~~ LETTEREPS
CSLANGPF ~~ CSLANGPS

# structural model
spring_literacy ~ fall_literacy + CHMATEDNP
fall_literacy ~~ CHMATEDNP
'
fit <- sem(model, data=df)

## Warning: lavaan WARNING: model syntax contains variance/covariance/intercept formulas
##   involving (an) exogenous variable(s): [CHMATEDNP];
##   Please use fixed.x=FALSE or leave them alone

summary(fit, fit.measures=TRUE, standardized = TRUE)

## lavaan (0.5-16) converged normally after 143 iterations
## 
##                                                   Used       Total
##   Number of observations                          1681        2119
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic              421.953
##   Degrees of freedom                                37
##   P-value (Chi-square)                           0.000
## 
## Model test baseline model:
## 
##   Minimum Function Test Statistic            10053.620
##   Degrees of freedom                                55
##   P-value                                        0.000
## 
## User model versus baseline model:
## 
##   Comparative Fit Index (CFI)                    0.961
##   Tucker-Lewis Index (TLI)                       0.943
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -52400.307
##   Loglikelihood unrestricted model (H1)     -52189.330
## 
##   Number of free parameters                         29
##   Akaike (AIC)                              104858.613
##   Bayesian (BIC)                            105016.000
##   Sample-size adjusted Bayesian (BIC)       104923.871
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.079
##   90 Percent Confidence Interval          0.072  0.086
##   P-value RMSEA <= 0.05                          0.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.061
## 
## Parameter estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
##                    Estimate  Std.err  Z-value  P(>|z|)   Std.lv  Std.all
## Latent variables:
##   fall_literacy =~
##     LETTEREPF         1.000                               4.655    0.515
##     PPVREPF           2.984    0.150   19.919    0.000   13.890    0.783
##     OWLSEPF           2.029    0.104   19.499    0.000    9.444    0.740
##     WJ21AEPF          0.446    0.024   18.256    0.000    2.076    0.651
##     CSLANGPF          0.099    0.006   16.048    0.000    0.460    0.524
##   spring_literacy =~
##     LETTEREPS         1.000                               4.687    0.500
##     PPVREPS           2.894    0.150   19.251    0.000   13.563    0.780
##     OWLSEPS           1.919    0.103   18.706    0.000    8.994    0.724
##     WJ21AEPS          0.588    0.032   18.176    0.000    2.757    0.680
##     CSLANGPS          0.104    0.007   15.621    0.000    0.487    0.519
## 
## Regressions:
##   spring_literacy ~
##     fall_literacy     0.970    0.035   27.793    0.000    0.964    0.964
##     CHMATEDNP         0.010    0.031    0.322    0.748    0.002    0.005
## 
## Covariances:
##   WJ21AEPF ~~
##     WJ21AEPS          2.940    0.231   12.728    0.000    2.940    0.408
##   PPVREPF ~~
##     PPVREPS          63.732    5.083   12.538    0.000   63.732    0.532
##   OWLSEPF ~~
##     OWLSEPS          39.453    2.734   14.432    0.000   39.453    0.537
##   LETTEREPF ~~
##     LETTEREPS        45.754    2.035   22.486    0.000   45.754    0.727
##   CSLANGPF ~~
##     CSLANGPS          0.285    0.018   16.196    0.000    0.285    0.475
##   fall_literacy ~~
##     CHMATEDNP         4.800    0.372   12.891    0.000    4.800    4.800
## 
## Variances:
##     LETTEREPF        60.109    2.225                     60.109    0.735
##     PPVREPF         121.578    6.187                    121.578    0.387
##     OWLSEPF          73.546    3.352                     73.546    0.452
##     WJ21AEPF          5.873    0.238                      5.873    0.577
##     CSLANGPF          0.561    0.021                      0.561    0.726
##     LETTEREPS        65.943    2.430                     65.943    0.750
##     PPVREPS         118.099    6.007                    118.099    0.391
##     OWLSEPS          73.456    3.277                     73.456    0.476
##     WJ21AEPS          8.820    0.368                      8.820    0.537
##     CSLANGPS          0.643    0.024                      0.643    0.730
##     CHMATEDNP         5.226    0.180                      5.226    1.000
##     fall_literacy    21.670    2.061                      1.000    1.000
##     spring_litrcy     1.465    0.260                      0.067    0.067


mi <- modindices(fit)
# show modification indices, sorting by modification index in decending order
subset(mi[with(mi, order(-mi)), ], mi>4)

##                lhs op       rhs     mi    epc sepc.lv sepc.all sepc.nox
## 1        LETTEREPF ~~  CSLANGPF 37.144  0.548   0.548    0.069    0.069
## 2        LETTEREPS ~~  CSLANGPS 19.620  0.446   0.446    0.051    0.051
## 3        LETTEREPF ~~  WJ21AEPF 18.431  1.331   1.331    0.046    0.046
## 4          PPVREPF ~~  WJ21AEPF 12.955 -2.207  -2.207   -0.039   -0.039
## 5        LETTEREPF ~~   OWLSEPF 12.615 -3.783  -3.783   -0.033   -0.033
## 6          PPVREPS ~~  CSLANGPS 11.954 -0.631  -0.631   -0.039   -0.039
## 7          OWLSEPF ~~   PPVREPS  6.224  5.292   5.292    0.024    0.024
## 8        LETTEREPF ~~  CSLANGPS  5.944  0.234   0.234    0.028    0.028
## 9          PPVREPF ~~   OWLSEPF  5.251  5.138   5.138    0.023    0.023
## 10 spring_literacy =~   PPVREPF  4.511  1.453   6.810    0.384    0.384
## 11        CSLANGPF ~~ LETTEREPS  4.072 -0.189  -0.189   -0.023   -0.023