library(psych)
library(lavaan)
## This is lavaan 0.6-20
## lavaan is FREE software! Please report any bugs.
## 
## Attaching package: 'lavaan'
## The following object is masked from 'package:psych':
## 
##     cor2cov
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%()   masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(stats)
final_df <- read.csv("clean_for_final.csv", header = TRUE)
 #Assuming 'my_data' is your data frame and 'categorical_var' is the categorical variable
# And you want to restrict to the value 'specific_value'
#rural <- final_df[final_df$urb_rural == '2' ]

# Now, perform your procedure on 'subset_data'
# For example, a linear model:
# model <- lm(dependent_var ~ other_var, data = subset_data)
rural <- final_df %>%
  filter(urb_rural == "2")
pov.model <- '
health =~ h2o_source + share_toilet
wealth =~ radio +   watch + computer + car_truck + sewing_machine + cell_phone 
wealth ~~ health  + educ_years'
fit <- cfa(pov.model, std.lv = TRUE, ordered = c("share_toilet", "radio",  "watch", "car_truck" , "sewing_machine", "computer", "cell_phone" ), data = rural)
## Warning: lavaan->lav_model_vcov():  
##    Could not compute standard errors! The information matrix could not be 
##    inverted. This may be a symptom that the model is not identified.
## Warning: lavaan->lav_test_satorra_bentler():  
##    could not invert information matrix needed for robust test statistic
## Warning: lavaan->lav_object_post_check():  
##    covariance matrix of latent variables is not positive definite ; use 
##    lavInspect(fit, "cov.lv") to investigate.
#Error: lavaan->lav_data_full():
#some variables have no variance in group 1 : computer
#3Error: lavaan->lav_data_full():
#some variables have no variance in group 4 : car_truck sewing_machine
#Error: lavaan->lav_data_full():
#some variables have no variance in group 5 : cell_phone
#Error in c("share_toilet", "radio", "watch", ) : argument 4 is empty
#Error in c("share_toilet", "radio", "watch", ) : argument 4 is empty This with urb_rural as the group; no 4 in this group
summary(fit, fit.measures = TRUE, standardized = TRUE)
## lavaan 0.6-20 ended normally after 1325 iterations
## 
##   Estimator                                       DWLS
##   Optimization method                           NLMINB
##   Number of model parameters                        21
## 
##   Number of observations                          6183
## 
## Model Test User Model:
##                                               Standard      Scaled
##   Test Statistic                               278.482          NA
##   Degrees of freedom                                26          26
##   P-value (Chi-square)                           0.000          NA
##   Scaling correction factor                                     NA
##   Shift parameter                                               NA
##                                                                   
## 
## Model Test Baseline Model:
## 
##   Test statistic                              8477.404    6214.294
##   Degrees of freedom                                36          36
##   P-value                                        0.000       0.000
##   Scaling correction factor                                  1.366
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.970          NA
##   Tucker-Lewis Index (TLI)                       0.959          NA
##                                                                   
##   Robust Comparative Fit Index (CFI)                            NA
##   Robust Tucker-Lewis Index (TLI)                               NA
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.040          NA
##   90 Percent confidence interval - lower         0.036          NA
##   90 Percent confidence interval - upper         0.044          NA
##   P-value H_0: RMSEA <= 0.050                    1.000          NA
##   P-value H_0: RMSEA >= 0.080                    0.000          NA
##                                                                   
##   Robust RMSEA                                                  NA
##   90 Percent confidence interval - lower                        NA
##   90 Percent confidence interval - upper                        NA
##   P-value H_0: Robust RMSEA <= 0.050                            NA
##   P-value H_0: Robust RMSEA >= 0.080                            NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.103       0.103
## 
## Parameter Estimates:
## 
##   Parameterization                               Delta
##   Standard errors                           Robust.sem
##   Information                                 Expected
##   Information saturated (h1) model        Unstructured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   health =~                                                             
##     h2o_source        0.002       NA                      0.002    0.000
##     share_toilet     -0.000       NA                     -0.000   -0.000
##   wealth =~                                                             
##     radio             0.737       NA                      0.737    0.737
##     watch             0.731       NA                      0.731    0.731
##     computer          0.853       NA                      0.853    0.853
##     car_truck         0.757       NA                      0.757    0.757
##     sewing_machine    0.516       NA                      0.516    0.516
##     cell_phone        0.871       NA                      0.871    0.871
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   health ~~                                                             
##     wealth          901.718       NA                    901.718  901.718
##   wealth ~~                                                             
##     educ_years        1.154       NA                      1.154    0.317
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .h2o_source       49.570       NA                     49.570    9.946
##     educ_years        2.186       NA                      2.186    0.601
## 
## Thresholds:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     share_toilt|t1   -0.784       NA                     -0.784   -0.784
##     radio|t1         -0.147       NA                     -0.147   -0.147
##     watch|t1          0.206       NA                      0.206    0.206
##     computer|t1       2.662       NA                      2.662    2.662
##     car_truck|t1      2.228       NA                      2.228    2.228
##     sewing_mchn|t1    2.087       NA                      2.087    2.087
##     cell_phone|t1     0.728       NA                      0.728    0.728
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .h2o_source       24.840       NA                     24.840    1.000
##    .share_toilet      1.000                               1.000    1.000
##    .radio             0.457                               0.457    0.457
##    .watch             0.465                               0.465    0.465
##    .computer          0.272                               0.272    0.272
##    .car_truck         0.427                               0.427    0.427
##    .sewing_machine    0.734                               0.734    0.734
##    .cell_phone        0.240                               0.240    0.240
##     educ_years       13.244       NA                     13.244    1.000
##     health            1.000                               1.000    1.000
##     wealth            1.000                               1.000    1.000
### Run the SEM model

pov2_model <- '
#measurement model
health =~ h2o_source  + share_toilet
wealth =~ radio+ car_truck + cell_phone + watch + sewing_machine +computer
wealth ~~ health + educ_years'

### regressions

educ ~ health
## educ ~ health
health ~ wealth
## health ~ wealth
educ ~ wealth 
## educ ~ wealth
fit <- sem(pov2_model, std.lv = TRUE,  ordered = c("share_toilet", "radio", "car_truck", "cell_phone", "watch", "sewing_machine", "computer"),data = rural)
## Warning: lavaan->lav_model_vcov():  
##    Could not compute standard errors! The information matrix could not be 
##    inverted. This may be a symptom that the model is not identified.
## Warning: lavaan->lav_test_satorra_bentler():  
##    could not invert information matrix needed for robust test statistic
## Warning: lavaan->lav_object_post_check():  
##    covariance matrix of latent variables is not positive definite ; use 
##    lavInspect(fit, "cov.lv") to investigate.
summary(fit, standardized = TRUE,  fit.measures = TRUE)
## lavaan 0.6-20 ended normally after 1380 iterations
## 
##   Estimator                                       DWLS
##   Optimization method                           NLMINB
##   Number of model parameters                        21
## 
##   Number of observations                          6183
## 
## Model Test User Model:
##                                               Standard      Scaled
##   Test Statistic                               278.482          NA
##   Degrees of freedom                                26          26
##   P-value (Chi-square)                           0.000          NA
##   Scaling correction factor                                     NA
##   Shift parameter                                               NA
##                                                                   
## 
## Model Test Baseline Model:
## 
##   Test statistic                              8477.404    6214.294
##   Degrees of freedom                                36          36
##   P-value                                        0.000       0.000
##   Scaling correction factor                                  1.366
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.970          NA
##   Tucker-Lewis Index (TLI)                       0.959          NA
##                                                                   
##   Robust Comparative Fit Index (CFI)                            NA
##   Robust Tucker-Lewis Index (TLI)                               NA
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.040          NA
##   90 Percent confidence interval - lower         0.036          NA
##   90 Percent confidence interval - upper         0.044          NA
##   P-value H_0: RMSEA <= 0.050                    1.000          NA
##   P-value H_0: RMSEA >= 0.080                    0.000          NA
##                                                                   
##   Robust RMSEA                                                  NA
##   90 Percent confidence interval - lower                        NA
##   90 Percent confidence interval - upper                        NA
##   P-value H_0: Robust RMSEA <= 0.050                            NA
##   P-value H_0: Robust RMSEA >= 0.080                            NA
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.103       0.103
## 
## Parameter Estimates:
## 
##   Parameterization                               Delta
##   Standard errors                           Robust.sem
##   Information                                 Expected
##   Information saturated (h1) model        Unstructured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   health =~                                                             
##     h2o_source        0.002       NA                      0.002    0.000
##     share_toilet     -0.000       NA                     -0.000   -0.000
##   wealth =~                                                             
##     radio             0.737       NA                      0.737    0.737
##     car_truck         0.757       NA                      0.757    0.757
##     cell_phone        0.871       NA                      0.871    0.871
##     watch             0.731       NA                      0.731    0.731
##     sewing_machine    0.516       NA                      0.516    0.516
##     computer          0.853       NA                      0.853    0.853
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   health ~~                                                             
##     wealth          993.544       NA                    993.544  993.544
##   wealth ~~                                                             
##     educ_years        1.154       NA                      1.154    0.317
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .h2o_source       49.570       NA                     49.570    9.946
##     educ_years        2.186       NA                      2.186    0.601
## 
## Thresholds:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     share_toilt|t1   -0.784       NA                     -0.784   -0.784
##     radio|t1         -0.147       NA                     -0.147   -0.147
##     car_truck|t1      2.228       NA                      2.228    2.228
##     cell_phone|t1     0.728       NA                      0.728    0.728
##     watch|t1          0.206       NA                      0.206    0.206
##     sewing_mchn|t1    2.087       NA                      2.087    2.087
##     computer|t1       2.662       NA                      2.662    2.662
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .h2o_source       24.840       NA                     24.840    1.000
##    .share_toilet      1.000                               1.000    1.000
##    .radio             0.457                               0.457    0.457
##    .car_truck         0.427                               0.427    0.427
##    .cell_phone        0.240                               0.240    0.240
##    .watch             0.465                               0.465    0.465
##    .sewing_machine    0.734                               0.734    0.734
##    .computer          0.272                               0.272    0.272
##     educ_years       13.244       NA                     13.244    1.000
##     health            1.000                               1.000    1.000
##     wealth            1.000                               1.000    1.000