library(psych)
library(lavaan)
## This is lavaan 0.6-20
## lavaan is FREE software! Please report any bugs.
## 
## Attaching package: 'lavaan'
## The following object is masked from 'package:psych':
## 
##     cor2cov
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%()   masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(stats)
final_df <- read.csv("clean_for_final.csv", header = TRUE)
part1 <- ' #latent variable definitions
 educ =~ educ_years
health =~ h2o_source + h2o_non_potable + toilet_type + share_toilet
work =~ radio + television + fridge + car_truck + cell_phone + watch + sewing_machine + computer
'
part2 <- ' #fix covariance between educ & health to zero
educ ~~ 0*health
'
#fit <- cfa(model = c(part1, part2), std.lv = TRUE, ordered = c("share_toilet", "radio", "television", "fridge", #"car_truck", "cell_phone", "watch", "sewing_machine", "computer"), data = final_df)
pov.model <- 'educ =~ educ_years
health =~ h2o_source + h2o_non_potable + toilet_type + share_toilet
work =~ radio + television + fridge + car_truck + cell_phone + watch + sewing_machine + computer' 
fit <- cfa(pov.model, std.lv = TRUE, ordered = c("share_toilet", "radio", "television", "fridge", "car_truck", "cell_phone", "watch", "sewing_machine", "computer"), data = final_df)
summary(fit, fit.measures = TRUE)
## lavaan 0.6-20 ended normally after 40 iterations
## 
##   Estimator                                       DWLS
##   Optimization method                           NLMINB
##   Number of model parameters                        32
## 
##   Number of observations                         14722
## 
## Model Test User Model:
##                                                Standard      Scaled
##   Test Statistic                              19875.347   18573.916
##   Degrees of freedom                                 63          63
##   P-value (Chi-square)                            0.000       0.000
##   Scaling correction factor                                   1.071
##   Shift parameter                                            17.214
##     simple second-order correction                                 
## 
## Model Test Baseline Model:
## 
##   Test statistic                             95916.528   64015.458
##   Degrees of freedom                                78          78
##   P-value                                        0.000       0.000
##   Scaling correction factor                                  1.499
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.793       0.710
##   Tucker-Lewis Index (TLI)                       0.744       0.642
##                                                                   
##   Robust Comparative Fit Index (CFI)                            NA
##   Robust Tucker-Lewis Index (TLI)                               NA
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.146       0.141
##   90 Percent confidence interval - lower         0.144       0.140
##   90 Percent confidence interval - upper         0.148       0.143
##   P-value H_0: RMSEA <= 0.050                    0.000       0.000
##   P-value H_0: RMSEA >= 0.080                    1.000       1.000
##                                                                   
##   Robust RMSEA                                                  NA
##   90 Percent confidence interval - lower                        NA
##   90 Percent confidence interval - upper                        NA
##   P-value H_0: Robust RMSEA <= 0.050                            NA
##   P-value H_0: Robust RMSEA >= 0.080                            NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.180       0.180
## 
## Parameter Estimates:
## 
##   Parameterization                               Delta
##   Standard errors                           Robust.sem
##   Information                                 Expected
##   Information saturated (h1) model        Unstructured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   educ =~                                             
##     educ_years        4.491    0.043  104.095    0.000
##   health =~                                           
##     h2o_source        7.338    0.045  164.787    0.000
##     h2o_non_potabl    7.706    0.042  184.589    0.000
##     toilet_type       2.670    0.069   38.508    0.000
##     share_toilet     -0.322    0.011  -30.209    0.000
##   work =~                                             
##     radio             0.734    0.008   87.376    0.000
##     television        0.915    0.007  133.612    0.000
##     fridge            0.816    0.012   70.887    0.000
##     car_truck         0.790    0.012   65.395    0.000
##     cell_phone        0.891    0.006  144.663    0.000
##     watch             0.741    0.008   95.660    0.000
##     sewing_machine    0.510    0.017   29.684    0.000
##     computer          0.911    0.014   63.337    0.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   educ ~~                                             
##     health            0.083    0.007   11.115    0.000
##     work              0.337    0.009   38.508    0.000
##   health ~~                                           
##     work              0.255    0.008   31.866    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .educ_years        3.341    0.061   54.606    0.000
##    .h2o_source       51.095    0.070  726.075    0.000
##    .h2o_non_potabl   42.151    0.069  614.258    0.000
##    .toilet_type      31.517    0.104  304.110    0.000
## 
## Thresholds:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     share_toilt|t1   -0.511    0.011  -47.136    0.000
##     radio|t1         -0.458    0.011  -42.658    0.000
##     television|t1     1.095    0.013   84.692    0.000
##     fridge|t1         1.728    0.018   93.699    0.000
##     car_truck|t1      1.729    0.018   93.690    0.000
##     cell_phone|t1     0.067    0.010    6.461    0.000
##     watch|t1         -0.174    0.010  -16.788    0.000
##     sewing_mchn|t1    1.740    0.019   93.546    0.000
##     computer|t1       2.070    0.024   85.635    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .educ_years        0.000                           
##    .h2o_source       13.793    0.531   25.965    0.000
##    .h2o_non_potabl    6.218    0.564   11.026    0.000
##    .toilet_type      79.860    1.059   75.428    0.000
##    .share_toilet      0.896                           
##    .radio             0.461                           
##    .television        0.163                           
##    .fridge            0.335                           
##    .car_truck         0.376                           
##    .cell_phone        0.206                           
##    .watch             0.452                           
##    .sewing_machine    0.740                           
##    .computer          0.169                           
##     educ              1.000                           
##     health            1.000                           
##     work              1.000
### Run the SEM model

pov2_model <- '
#measurement model
educ =~ educ_years
health =~ h2o_source + h2o_non_potable + toilet_type + share_toilet
work =~ radio + television + fridge + car_truck + cell_phone + watch + sewing_machine + computer

### regressions
educ ~ work
educ ~ health
health ~ work


### residual correlations
educ ~~ computer
share_toilet ~~ car_truck
sewing_machine ~~ educ'
fit <- sem(pov2_model, std.lv = TRUE, ordered = c("share_toilet", "radio", "television", "fridge", "car_truck", "cell_phone", "watch", "sewing_machine", "computer"), group = "urb_rural", data = final_df)
## Warning: lavaan->lav_samplestats_step2():  
##    correlation between variables computer and toilet_type is (nearly) 1.0
## Warning: lavaan->lav_samplestats_step2():  
##    correlation between variables cell_phone and television is (nearly) 1.0
## Warning: lavaan->lav_samplestats_step2():  
##    correlation between variables cell_phone and fridge is (nearly) 1.0
## Warning: lavaan->lav_model_vcov():  
##    Could not compute standard errors! The information matrix could not be 
##    inverted. This may be a symptom that the model is not identified.
## Warning: lavaan->lav_test_satorra_bentler():  
##    could not invert information matrix needed for robust test statistic
## Warning: lavaan->lav_object_post_check():  
##    some estimated ov variances are negative
summary(fit, standardized = TRUE,  fit.measures = TRUE)
## lavaan 0.6-20 ended normally after 970 iterations
## 
##   Estimator                                       DWLS
##   Optimization method                           NLMINB
##   Number of model parameters                        70
## 
##   Number of observations per group:                   
##     1                                             8539
##     2                                             6183
## 
## Model Test User Model:
##                                                Standard      Scaled
##   Test Statistic                              12300.261          NA
##   Degrees of freedom                                120         120
##   P-value (Chi-square)                            0.000          NA
##   Scaling correction factor                                      NA
##   Shift parameter                                                NA
##                                                                    
##   Test statistic for each group:
##     1                                               NA          NA
##     2                                               NA          NA
## 
## Model Test Baseline Model:
## 
##   Test statistic                             76833.239   47851.975
##   Degrees of freedom                               156         156
##   P-value                                        0.000       0.000
##   Scaling correction factor                                  1.608
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.841          NA
##   Tucker-Lewis Index (TLI)                       0.793          NA
##                                                                   
##   Robust Comparative Fit Index (CFI)                            NA
##   Robust Tucker-Lewis Index (TLI)                               NA
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.117          NA
##   90 Percent confidence interval - lower         0.116          NA
##   90 Percent confidence interval - upper         0.119          NA
##   P-value H_0: RMSEA <= 0.050                    0.000          NA
##   P-value H_0: RMSEA >= 0.080                    1.000          NA
##                                                                   
##   Robust RMSEA                                                  NA
##   90 Percent confidence interval - lower                        NA
##   90 Percent confidence interval - upper                        NA
##   P-value H_0: Robust RMSEA <= 0.050                            NA
##   P-value H_0: Robust RMSEA >= 0.080                            NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.158       0.158
## 
## Parameter Estimates:
## 
##   Parameterization                               Delta
##   Standard errors                           Robust.sem
##   Information                                 Expected
##   Information saturated (h1) model        Unstructured
## 
## 
## Group 1 [1]:
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   educ =~                                                               
##     educ_years        4.652       NA                      4.849    1.000
##   health =~                                                             
##     h2o_source        8.942       NA                      8.958    0.916
##     h2o_non_potabl   10.102       NA                     10.120    1.055
##     toilet_type       1.047       NA                      1.049    0.106
##     share_toilet     -0.095       NA                     -0.096   -0.096
##   work =~                                                               
##     radio             0.717       NA                      0.717    0.717
##     television        0.894       NA                      0.894    0.894
##     fridge            0.810       NA                      0.810    0.810
##     car_truck         0.783       NA                      0.783    0.783
##     cell_phone        0.819       NA                      0.819    0.819
##     watch             0.692       NA                      0.692    0.692
##     sewing_machine    0.472       NA                      0.472    0.472
##     computer          0.899       NA                      0.899    0.899
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   educ ~                                                                
##     work              0.295       NA                      0.283    0.283
##     health           -0.017       NA                     -0.016   -0.016
##   health ~                                                              
##     work              0.060       NA                      0.059    0.059
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##  .educ ~~                                                               
##    .computer         -0.035       NA                     -0.035   -0.079
##  .share_toilet ~~                                                       
##    .car_truck        -0.503       NA                     -0.503   -0.811
##  .educ ~~                                                               
##    .sewing_machine   -0.025       NA                     -0.025   -0.029
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .educ_years        4.177       NA                      4.177    0.861
##    .h2o_source       52.199       NA                     52.199    5.335
##    .h2o_non_potabl   43.295       NA                     43.295    4.514
##    .toilet_type      32.792       NA                     32.792    3.301
## 
## Thresholds:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     share_toilt|t1   -0.335       NA                     -0.335   -0.335
##     radio|t1         -0.712       NA                     -0.712   -0.712
##     television|t1     0.825       NA                      0.825    0.825
##     fridge|t1         1.662       NA                      1.662    1.662
##     car_truck|t1      1.531       NA                      1.531    1.531
##     cell_phone|t1    -0.378       NA                     -0.378   -0.378
##     watch|t1         -0.463       NA                     -0.463   -0.463
##     sewing_mchn|t1    1.579       NA                      1.579    1.579
##     computer|t1       1.876       NA                      1.876    1.876
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .educ_years        0.000                               0.000    0.000
##    .h2o_source       15.479       NA                     15.479    0.162
##    .h2o_non_potabl  -10.418       NA                    -10.418   -0.113
##    .toilet_type      97.564       NA                     97.564    0.989
##    .share_toilet      0.991                               0.991    0.991
##    .radio             0.486                               0.486    0.486
##    .television        0.201                               0.201    0.201
##    .fridge            0.344                               0.344    0.344
##    .car_truck         0.388                               0.388    0.388
##    .cell_phone        0.330                               0.330    0.330
##    .watch             0.521                               0.521    0.521
##    .sewing_machine    0.778                               0.778    0.778
##    .computer          0.192                               0.192    0.192
##    .educ              1.000                               0.920    0.920
##    .health            1.000                               0.996    0.996
##     work              1.000                               1.000    1.000
## 
## 
## Group 2 [2]:
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   educ =~                                                               
##     educ_years        0.039       NA                      3.645    1.000
##   health =~                                                             
##     h2o_source        0.122       NA                      1.152    0.231
##     h2o_non_potabl    0.122       NA                      1.152    0.231
##     toilet_type       0.555       NA                      5.238    0.647
##     share_toilet     -0.042       NA                     -0.392   -0.392
##   work =~                                                               
##     radio             0.631       NA                      0.631    0.631
##     television        1.243       NA                      1.243    1.243
##     fridge            0.817       NA                      0.817    0.817
##     car_truck         0.653       NA                      0.653    0.653
##     cell_phone        0.887       NA                      0.887    0.887
##     watch             0.635       NA                      0.635    0.635
##     sewing_machine    0.397       NA                      0.397    0.397
##     computer          0.824       NA                      0.824    0.824
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   educ ~                                                                
##     work            864.354       NA                      9.277    9.277
##     health          -89.173       NA                     -9.038   -9.038
##   health ~                                                              
##     work              9.390       NA                      0.994    0.994
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##  .educ ~~                                                               
##    .computer          4.441       NA                      4.441    7.839
##  .share_toilet ~~                                                       
##    .car_truck        -0.026       NA                     -0.026   -0.037
##  .educ ~~                                                               
##    .sewing_machine    5.597       NA                      5.597    6.099
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .educ_years        2.186       NA                      2.186    0.600
##    .h2o_source       49.570       NA                     49.570    9.946
##    .h2o_non_potabl   40.570       NA                     40.570    8.140
##    .toilet_type      29.756       NA                     29.756    3.676
## 
## Thresholds:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     share_toilt|t1   -0.784       NA                     -0.784   -0.784
##     radio|t1         -0.147       NA                     -0.147   -0.147
##     television|t1     1.717       NA                      1.717    1.717
##     fridge|t1         1.834       NA                      1.834    1.834
##     car_truck|t1      2.228       NA                      2.228    2.228
##     cell_phone|t1     0.728       NA                      0.728    0.728
##     watch|t1          0.206       NA                      0.206    0.206
##     sewing_mchn|t1    2.087       NA                      2.087    2.087
##     computer|t1       2.662       NA                      2.662    2.662
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .educ_years        0.000                               0.000    0.000
##    .h2o_source       23.514       NA                     23.514    0.947
##    .h2o_non_potabl   23.514       NA                     23.514    0.947
##    .toilet_type      38.083       NA                     38.083    0.581
##    .share_toilet      0.846                               0.846    0.846
##    .radio             0.602                               0.602    0.602
##    .television       -0.544                              -0.544   -0.544
##    .fridge            0.333                               0.333    0.333
##    .car_truck         0.573                               0.573    0.573
##    .cell_phone        0.212                               0.212    0.212
##    .watch             0.597                               0.597    0.597
##    .sewing_machine    0.842                               0.842    0.842
##    .computer          0.321                               0.321    0.321
##    .educ              1.000                               0.000    0.000
##    .health            1.000                               0.011    0.011
##     work              1.000                               1.000    1.000