library(psych)
library(lavaan)
## This is lavaan 0.6-20
## lavaan is FREE software! Please report any bugs.
## 
## Attaching package: 'lavaan'
## The following object is masked from 'package:psych':
## 
##     cor2cov
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%()   masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(stats)
final_df <- read.csv("clean_for_final.csv", header = TRUE)
 #Assuming 'my_data' is your data frame and 'categorical_var' is the categorical variable
# And you want to restrict to the value 'specific_value'
#rural <- final_df[final_df$urb_rural == '2' ]

# Now, perform your procedure on 'subset_data'
# For example, a linear model:
# model <- lm(dependent_var ~ other_var, data = subset_data)
urban <- final_df %>%
  filter(urb_rural == "1")
#mean(rural$wi_continuous) [1] 4550.573

#mean(urban$wi_continuous) [1] 112430.3
pov.model <- '
health =~ h2o_source + share_toilet
wealth =~ radio +   watch + computer + car_truck + sewing_machine + cell_phone 
wealth ~~ health  + educ_years'
fit <- cfa(pov.model, std.lv = TRUE,  ordered = c("share_toilet", "radio",  "watch", "car_truck" , "sewing_machine", "computer", "cell_phone" ), data = urban)
## Warning: lavaan->lav_object_post_check():  
##    covariance matrix of latent variables is not positive definite ; use 
##    lavInspect(fit, "cov.lv") to investigate.
#Error: lavaan->lav_data_full():
#some variables have no variance in group 1 : computer car_truck
#Error: lavaan->lav_data_full():
#some variables have no variance in group 1 : computer
#3Error: lavaan->lav_data_full():
#some variables have no variance in group 4 : car_truck sewing_machine
#Error: lavaan->lav_data_full():
#some variables have no variance in group 5 : cell_phone
#Error in c("share_toilet", "radio", "watch", ) : argument 4 is empty
#Error in c("share_toilet", "radio", "watch", ) : argument 4 is empty This with urb_rural as the group; no 4 in this group
summary(fit, fit.measures = TRUE, standardized = TRUE)
## lavaan 0.6-20 ended normally after 86 iterations
## 
##   Estimator                                       DWLS
##   Optimization method                           NLMINB
##   Number of model parameters                        21
## 
##   Number of observations                          8539
## 
## Model Test User Model:
##                                               Standard      Scaled
##   Test Statistic                               345.465     412.287
##   Degrees of freedom                                26          26
##   P-value (Chi-square)                           0.000       0.000
##   Scaling correction factor                                  0.841
##   Shift parameter                                            1.573
##     simple second-order correction                                
## 
## Model Test Baseline Model:
## 
##   Test statistic                             11839.749   10011.806
##   Degrees of freedom                                36          36
##   P-value                                        0.000       0.000
##   Scaling correction factor                                  1.183
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.973       0.961
##   Tucker-Lewis Index (TLI)                       0.963       0.946
##                                                                   
##   Robust Comparative Fit Index (CFI)                            NA
##   Robust Tucker-Lewis Index (TLI)                               NA
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.038       0.042
##   90 Percent confidence interval - lower         0.034       0.038
##   90 Percent confidence interval - upper         0.042       0.045
##   P-value H_0: RMSEA <= 0.050                    1.000       1.000
##   P-value H_0: RMSEA >= 0.080                    0.000       0.000
##                                                                   
##   Robust RMSEA                                                  NA
##   90 Percent confidence interval - lower                        NA
##   90 Percent confidence interval - upper                        NA
##   P-value H_0: Robust RMSEA <= 0.050                            NA
##   P-value H_0: Robust RMSEA >= 0.080                            NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.059       0.059
## 
## Parameter Estimates:
## 
##   Parameterization                               Delta
##   Standard errors                           Robust.sem
##   Information                                 Expected
##   Information saturated (h1) model        Unstructured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   health =~                                                             
##     h2o_source        0.129    0.213    0.604    0.546    0.129    0.013
##     share_toilet     -0.358    0.494   -0.724    0.469   -0.358   -0.358
##   wealth =~                                                             
##     radio             0.715    0.013   53.733    0.000    0.715    0.715
##     watch             0.729    0.012   61.076    0.000    0.729    0.729
##     computer          0.852    0.025   34.128    0.000    0.852    0.852
##     car_truck         0.805    0.019   41.838    0.000    0.805    0.805
##     sewing_machine    0.476    0.025   19.432    0.000    0.476    0.476
##     cell_phone        0.802    0.011   70.561    0.000    0.802    0.802
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   health ~~                                                             
##     wealth            1.498    2.070    0.724    0.469    1.498    1.498
##   wealth ~~                                                             
##     educ_years        1.359    0.069   19.730    0.000    1.359    0.280
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .h2o_source       52.199    0.114  457.268    0.000   52.199    5.335
##     educ_years        4.177    0.074   56.298    0.000    4.177    0.861
## 
## Thresholds:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     share_toilt|t1   -0.335    0.014  -24.226    0.000   -0.335   -0.335
##     radio|t1         -0.712    0.015  -47.822    0.000   -0.712   -0.712
##     watch|t1         -0.463    0.014  -32.842    0.000   -0.463   -0.463
##     computer|t1       1.876    0.027   69.402    0.000    1.876    1.876
##     car_truck|t1      1.531    0.021   72.013    0.000    1.531    1.531
##     sewing_mchn|t1    1.579    0.022   72.074    0.000    1.579    1.579
##     cell_phone|t1    -0.378    0.014  -27.148    0.000   -0.378   -0.378
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .h2o_source       95.714    0.923  103.728    0.000   95.714    1.000
##    .share_toilet      0.872                               0.872    0.872
##    .radio             0.488                               0.488    0.488
##    .watch             0.469                               0.469    0.469
##    .computer          0.274                               0.274    0.274
##    .car_truck         0.351                               0.351    0.351
##    .sewing_machine    0.773                               0.773    0.773
##    .cell_phone        0.357                               0.357    0.357
##     educ_years       23.517    0.625   37.621    0.000   23.517    1.000
##     health            1.000                               1.000    1.000
##     wealth            1.000                               1.000    1.000
### Run the SEM model

pov2_model <- '
#measurement model
health =~ h2o_source  + share_toilet
wealth =~ radio+ car_truck + cell_phone + watch + sewing_machine +computer
wealth ~~ health + educ_years'

### regressions

educ ~ health
## educ ~ health
health ~ wealth
## health ~ wealth
educ ~ wealth 
## educ ~ wealth
fit <- sem(pov2_model, std.lv = TRUE,, ordered = c("share_toilet", "radio", "car_truck", "cell_phone", "watch", "sewing_machine", "computer"),data = urban)
## Warning: lavaan->lav_object_post_check():  
##    covariance matrix of latent variables is not positive definite ; use 
##    lavInspect(fit, "cov.lv") to investigate.
summary(fit, standardized = TRUE,  fit.measures = TRUE)
## lavaan 0.6-20 ended normally after 86 iterations
## 
##   Estimator                                       DWLS
##   Optimization method                           NLMINB
##   Number of model parameters                        21
## 
##   Number of observations                          8539
## 
## Model Test User Model:
##                                               Standard      Scaled
##   Test Statistic                               345.465     412.287
##   Degrees of freedom                                26          26
##   P-value (Chi-square)                           0.000       0.000
##   Scaling correction factor                                  0.841
##   Shift parameter                                            1.573
##     simple second-order correction                                
## 
## Model Test Baseline Model:
## 
##   Test statistic                             11839.749   10011.806
##   Degrees of freedom                                36          36
##   P-value                                        0.000       0.000
##   Scaling correction factor                                  1.183
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.973       0.961
##   Tucker-Lewis Index (TLI)                       0.963       0.946
##                                                                   
##   Robust Comparative Fit Index (CFI)                            NA
##   Robust Tucker-Lewis Index (TLI)                               NA
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.038       0.042
##   90 Percent confidence interval - lower         0.034       0.038
##   90 Percent confidence interval - upper         0.042       0.045
##   P-value H_0: RMSEA <= 0.050                    1.000       1.000
##   P-value H_0: RMSEA >= 0.080                    0.000       0.000
##                                                                   
##   Robust RMSEA                                                  NA
##   90 Percent confidence interval - lower                        NA
##   90 Percent confidence interval - upper                        NA
##   P-value H_0: Robust RMSEA <= 0.050                            NA
##   P-value H_0: Robust RMSEA >= 0.080                            NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.059       0.059
## 
## Parameter Estimates:
## 
##   Parameterization                               Delta
##   Standard errors                           Robust.sem
##   Information                                 Expected
##   Information saturated (h1) model        Unstructured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   health =~                                                             
##     h2o_source        0.129    0.213    0.604    0.546    0.129    0.013
##     share_toilet     -0.358    0.494   -0.724    0.469   -0.358   -0.358
##   wealth =~                                                             
##     radio             0.715    0.013   53.733    0.000    0.715    0.715
##     car_truck         0.805    0.019   41.838    0.000    0.805    0.805
##     cell_phone        0.802    0.011   70.561    0.000    0.802    0.802
##     watch             0.729    0.012   61.076    0.000    0.729    0.729
##     sewing_machine    0.476    0.025   19.432    0.000    0.476    0.476
##     computer          0.852    0.025   34.128    0.000    0.852    0.852
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   health ~~                                                             
##     wealth            1.498    2.070    0.724    0.469    1.498    1.498
##   wealth ~~                                                             
##     educ_years        1.359    0.069   19.730    0.000    1.359    0.280
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .h2o_source       52.199    0.114  457.268    0.000   52.199    5.335
##     educ_years        4.177    0.074   56.298    0.000    4.177    0.861
## 
## Thresholds:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     share_toilt|t1   -0.335    0.014  -24.226    0.000   -0.335   -0.335
##     radio|t1         -0.712    0.015  -47.822    0.000   -0.712   -0.712
##     car_truck|t1      1.531    0.021   72.013    0.000    1.531    1.531
##     cell_phone|t1    -0.378    0.014  -27.148    0.000   -0.378   -0.378
##     watch|t1         -0.463    0.014  -32.842    0.000   -0.463   -0.463
##     sewing_mchn|t1    1.579    0.022   72.074    0.000    1.579    1.579
##     computer|t1       1.876    0.027   69.402    0.000    1.876    1.876
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .h2o_source       95.714    0.923  103.728    0.000   95.714    1.000
##    .share_toilet      0.872                               0.872    0.872
##    .radio             0.488                               0.488    0.488
##    .car_truck         0.351                               0.351    0.351
##    .cell_phone        0.357                               0.357    0.357
##    .watch             0.469                               0.469    0.469
##    .sewing_machine    0.773                               0.773    0.773
##    .computer          0.274                               0.274    0.274
##     educ_years       23.517    0.625   37.621    0.000   23.517    1.000
##     health            1.000                               1.000    1.000
##     wealth            1.000                               1.000    1.000