library(psych)
library(lavaan)
## This is lavaan 0.6-20
## lavaan is FREE software! Please report any bugs.
## 
## Attaching package: 'lavaan'
## The following object is masked from 'package:psych':
## 
##     cor2cov
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%()   masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(stats)
final_df <- read.csv("clean_for_final.csv", header = TRUE)
part1 <- ' #latent variable definitions
 educ =~ educ_years
health =~ h2o_source + h2o_non_potable + toilet_type + share_toilet
work =~ radio + television + fridge + car_truck + cell_phone + watch + sewing_machine + computer
'
part2 <- ' #fix covariance between educ & health to zero
educ ~~ 0*health
'
fit <- cfa(model = c(part1, part2), data = final_df)
## Warning: lavaan->lav_data_full():  
##    some observed variances are (at least) a factor 1000 times larger than 
##    others; use varTable(fit) to investigate
## Warning: lavaan->lav_object_post_check():  
##    some estimated ov variances are negative
pov.model <- 'educ =~ educ_years
health =~ h2o_source + h2o_non_potable + toilet_type + share_toilet
work =~ radio + television + fridge + car_truck + cell_phone + watch + sewing_machine + computer' 
fit <- cfa(pov.model, data = final_df)
## Warning: lavaan->lav_data_full():  
##    some observed variances are (at least) a factor 1000 times larger than 
##    others; use varTable(fit) to investigate
## Warning: lavaan->lav_object_post_check():  
##    some estimated ov variances are negative
summary(fit, fit.measures = TRUE)
## lavaan 0.6-20 ended normally after 197 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        28
## 
##   Number of observations                         14722
## 
## Model Test User Model:
##                                                        
##   Test statistic                              10003.074
##   Degrees of freedom                                 63
##   P-value (Chi-square)                            0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                             76010.571
##   Degrees of freedom                                78
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.869
##   Tucker-Lewis Index (TLI)                       0.838
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)            -196068.572
##   Loglikelihood unrestricted model (H1)    -191067.036
##                                                       
##   Akaike (AIC)                              392193.145
##   Bayesian (BIC)                            392405.864
##   Sample-size adjusted Bayesian (SABIC)     392316.882
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.104
##   90 Percent confidence interval - lower         0.102
##   90 Percent confidence interval - upper         0.105
##   P-value H_0: RMSEA <= 0.050                    0.000
##   P-value H_0: RMSEA >= 0.080                    1.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.107
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   educ =~                                             
##     educ_years        1.000                           
##   health =~                                           
##     h2o_source        1.000                           
##     h2o_non_potabl    1.091    0.014   78.801    0.000
##     toilet_type       0.106    0.009   11.762    0.000
##     share_toilet     -0.002    0.000   -3.900    0.000
##   work =~                                             
##     radio             1.000                           
##     television        1.079    0.024   45.819    0.000
##     fridge            0.436    0.011   38.517    0.000
##     car_truck         0.434    0.011   38.430    0.000
##     cell_phone        1.428    0.032   44.275    0.000
##     watch             1.170    0.029   40.347    0.000
##     sewing_machine    0.240    0.010   25.274    0.000
##     computer          0.255    0.007   34.871    0.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   educ ~~                                             
##     health            1.609    0.260    6.191    0.000
##     work              0.346    0.011   30.078    0.000
##   health ~~                                           
##     work              0.207    0.016   12.642    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .educ_years        0.000                           
##    .h2o_source        7.670    0.757   10.132    0.000
##    .h2o_non_potabl   -5.736    0.897   -6.398    0.000
##    .toilet_type      86.309    1.005   85.883    0.000
##    .share_toilet      0.212    0.002   85.811    0.000
##    .radio             0.172    0.002   78.193    0.000
##    .television        0.063    0.001   61.542    0.000
##    .fridge            0.031    0.000   77.832    0.000
##    .car_truck         0.031    0.000   77.910    0.000
##    .cell_phone        0.153    0.002   68.189    0.000
##    .watch             0.181    0.002   75.884    0.000
##    .sewing_machine    0.037    0.000   83.749    0.000
##    .computer          0.016    0.000   80.427    0.000
##     educ             20.169    0.235   85.796    0.000
##     health           59.971    1.086   55.240    0.000
##     work              0.047    0.002   25.844    0.000
###?PoliticalDemocracy
### Run the SEM model

model <- '
#measurement model
educ =~ educ_years
health =~ h2o_source + h2o_non_potable + toilet_type + share_toilet
work =~ radio + television + fridge + car_truck + cell_phone + watch + sewing_machine + computer

### regressions
educ ~ work
health ~ work
work ~ educ

### residual correlations
educ ~~ computer
share_toilet ~~ car_truck
sewing_machine ~~ educ'
fit <- sem(model, data = final_df)
## Warning: lavaan->lav_data_full():  
##    some observed variances are (at least) a factor 1000 times larger than 
##    others; use varTable(fit) to investigate
## Warning: lavaan->lav_object_post_check():  
##    some estimated ov variances are negative
summary(fit, standardized = TRUE, fit.measures = TRUE)
## lavaan 0.6-20 ended normally after 431 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        31
## 
##   Number of observations                         14722
## 
## Model Test User Model:
##                                                       
##   Test statistic                              9947.225
##   Degrees of freedom                                60
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                             76010.571
##   Degrees of freedom                                78
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.870
##   Tucker-Lewis Index (TLI)                       0.831
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)            -196040.648
##   Loglikelihood unrestricted model (H1)    -191067.036
##                                                       
##   Akaike (AIC)                              392143.297
##   Bayesian (BIC)                            392378.807
##   Sample-size adjusted Bayesian (SABIC)     392280.291
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.106
##   90 Percent confidence interval - lower         0.104
##   90 Percent confidence interval - upper         0.108
##   P-value H_0: RMSEA <= 0.050                    0.000
##   P-value H_0: RMSEA >= 0.080                    1.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.106
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   educ =~                                                               
##     educ_years        1.000                               4.491    1.000
##   health =~                                                             
##     h2o_source        1.000                               7.753    0.943
##     h2o_non_potabl    1.088    0.014   79.714    0.000    8.436    1.042
##     toilet_type       0.107    0.009   11.826    0.000    0.831    0.089
##     share_toilet     -0.002    0.000   -4.016    0.000   -0.014   -0.029
##   work =~                                                               
##     radio             1.000                               0.218    0.465
##     television        1.072    0.023   45.850    0.000    0.233    0.679
##     fridge            0.433    0.011   38.445    0.000    0.094    0.470
##     car_truck         0.413    0.011   37.600    0.000    0.090    0.452
##     cell_phone        1.426    0.032   44.374    0.000    0.310    0.621
##     watch             1.169    0.029   40.455    0.000    0.254    0.513
##     sewing_machine    0.212    0.054    3.893    0.000    0.046    0.232
##     computer          0.215    0.075    2.878    0.004    0.047    0.341
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   educ ~                                                                
##     work             45.334   69.871    0.649    0.516    2.196    2.196
##   health ~                                                              
##     work              4.426    0.343   12.917    0.000    0.124    0.124
##   work ~                                                                
##     educ             -0.432    3.376   -0.128    0.898   -8.929   -8.929
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##  .educ ~~                                                               
##    .computer         -0.091    0.268   -0.341    0.733   -0.010   -0.079
##  .share_toilet ~~                                                       
##    .car_truck        -0.005    0.001   -6.483    0.000   -0.005   -0.055
##  .educ ~~                                                               
##    .sewing_machine   -0.067    0.194   -0.342    0.732   -0.007   -0.038
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .educ_years        0.000                               0.000    0.000
##    .h2o_source        7.536    0.750   10.049    0.000    7.536    0.111
##    .h2o_non_potabl   -5.576    0.884   -6.306    0.000   -5.576   -0.085
##    .toilet_type      86.297    1.005   85.881    0.000   86.297    0.992
##    .share_toilet      0.212    0.002   85.811    0.000    0.212    0.999
##    .radio             0.172    0.002   78.118    0.000    0.172    0.784
##    .television        0.064    0.001   61.749    0.000    0.064    0.540
##    .fridge            0.031    0.000   77.919    0.000    0.031    0.780
##    .car_truck         0.031    0.000   78.655    0.000    0.031    0.796
##    .cell_phone        0.153    0.002   68.061    0.000    0.153    0.614
##    .watch             0.181    0.002   75.795    0.000    0.181    0.736
##    .sewing_machine    0.037    0.000   80.380    0.000    0.037    0.931
##    .computer          0.016    0.000   46.725    0.000    0.016    0.840
##    .educ             85.359  250.328    0.341    0.733    4.232    4.232
##    .health           59.179    1.031   57.418    0.000    0.985    0.985
##    .work              4.125   61.272    0.067    0.946   87.181   87.181