library(psych)
library(lavaan)
## This is lavaan 0.6-20
## lavaan is FREE software! Please report any bugs.
##
## Attaching package: 'lavaan'
## The following object is masked from 'package:psych':
##
## cor2cov
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%() masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(stats)
final_df <- read.csv("clean_for_final.csv", header = TRUE)
#Assuming 'my_data' is your data frame and 'categorical_var' is the categorical variable
# And you want to restrict to the value 'specific_value'
#rural <- final_df[final_df$urb_rural == '2' ]
# Now, perform your procedure on 'subset_data'
# For example, a linear model:
# model <- lm(dependent_var ~ other_var, data = subset_data)
urban <- final_df %>%
filter(urb_rural == "1")
#mean(rural$wi_continuous) [1] 4550.573
#mean(urban$wi_continuous) [1] 112430.3
pov.model <- '
health =~ h2o_source + share_toilet
wealth =~ radio + watch + computer + car_truck + sewing_machine + cell_phone
wealth ~~ health + educ_years'
fit <- cfa(pov.model, std.lv = TRUE, ordered = c("share_toilet", "radio", "watch", "car_truck" , "sewing_machine", "computer", "cell_phone" ), data = urban)
## Warning: lavaan->lav_object_post_check():
## covariance matrix of latent variables is not positive definite ; use
## lavInspect(fit, "cov.lv") to investigate.
#Error: lavaan->lav_data_full():
#some variables have no variance in group 1 : computer car_truck
#Error: lavaan->lav_data_full():
#some variables have no variance in group 1 : computer
#3Error: lavaan->lav_data_full():
#some variables have no variance in group 4 : car_truck sewing_machine
#Error: lavaan->lav_data_full():
#some variables have no variance in group 5 : cell_phone
#Error in c("share_toilet", "radio", "watch", ) : argument 4 is empty
#Error in c("share_toilet", "radio", "watch", ) : argument 4 is empty This with urb_rural as the group; no 4 in this group
summary(fit, fit.measures = TRUE, standardized = TRUE)
## lavaan 0.6-20 ended normally after 86 iterations
##
## Estimator DWLS
## Optimization method NLMINB
## Number of model parameters 21
##
## Number of observations 8539
##
## Model Test User Model:
## Standard Scaled
## Test Statistic 345.465 412.287
## Degrees of freedom 26 26
## P-value (Chi-square) 0.000 0.000
## Scaling correction factor 0.841
## Shift parameter 1.573
## simple second-order correction
##
## Model Test Baseline Model:
##
## Test statistic 11839.749 10011.806
## Degrees of freedom 36 36
## P-value 0.000 0.000
## Scaling correction factor 1.183
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.973 0.961
## Tucker-Lewis Index (TLI) 0.963 0.946
##
## Robust Comparative Fit Index (CFI) NA
## Robust Tucker-Lewis Index (TLI) NA
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.038 0.042
## 90 Percent confidence interval - lower 0.034 0.038
## 90 Percent confidence interval - upper 0.042 0.045
## P-value H_0: RMSEA <= 0.050 1.000 1.000
## P-value H_0: RMSEA >= 0.080 0.000 0.000
##
## Robust RMSEA NA
## 90 Percent confidence interval - lower NA
## 90 Percent confidence interval - upper NA
## P-value H_0: Robust RMSEA <= 0.050 NA
## P-value H_0: Robust RMSEA >= 0.080 NA
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.059 0.059
##
## Parameter Estimates:
##
## Parameterization Delta
## Standard errors Robust.sem
## Information Expected
## Information saturated (h1) model Unstructured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## health =~
## h2o_source 0.129 0.213 0.604 0.546 0.129 0.013
## share_toilet -0.358 0.494 -0.724 0.469 -0.358 -0.358
## wealth =~
## radio 0.715 0.013 53.733 0.000 0.715 0.715
## watch 0.729 0.012 61.076 0.000 0.729 0.729
## computer 0.852 0.025 34.128 0.000 0.852 0.852
## car_truck 0.805 0.019 41.838 0.000 0.805 0.805
## sewing_machine 0.476 0.025 19.432 0.000 0.476 0.476
## cell_phone 0.802 0.011 70.561 0.000 0.802 0.802
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## health ~~
## wealth 1.498 2.070 0.724 0.469 1.498 1.498
## wealth ~~
## educ_years 1.359 0.069 19.730 0.000 1.359 0.280
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .h2o_source 52.199 0.114 457.268 0.000 52.199 5.335
## educ_years 4.177 0.074 56.298 0.000 4.177 0.861
##
## Thresholds:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## share_toilt|t1 -0.335 0.014 -24.226 0.000 -0.335 -0.335
## radio|t1 -0.712 0.015 -47.822 0.000 -0.712 -0.712
## watch|t1 -0.463 0.014 -32.842 0.000 -0.463 -0.463
## computer|t1 1.876 0.027 69.402 0.000 1.876 1.876
## car_truck|t1 1.531 0.021 72.013 0.000 1.531 1.531
## sewing_mchn|t1 1.579 0.022 72.074 0.000 1.579 1.579
## cell_phone|t1 -0.378 0.014 -27.148 0.000 -0.378 -0.378
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .h2o_source 95.714 0.923 103.728 0.000 95.714 1.000
## .share_toilet 0.872 0.872 0.872
## .radio 0.488 0.488 0.488
## .watch 0.469 0.469 0.469
## .computer 0.274 0.274 0.274
## .car_truck 0.351 0.351 0.351
## .sewing_machine 0.773 0.773 0.773
## .cell_phone 0.357 0.357 0.357
## educ_years 23.517 0.625 37.621 0.000 23.517 1.000
## health 1.000 1.000 1.000
## wealth 1.000 1.000 1.000
### Run the SEM model
pov2_model <- '
#measurement model
health =~ h2o_source + share_toilet
wealth =~ radio+ car_truck + cell_phone + watch + sewing_machine +computer
wealth ~~ health + educ_years'
### regressions
educ ~ health
## educ ~ health
health ~ wealth
## health ~ wealth
educ ~ wealth
## educ ~ wealth
fit <- sem(pov2_model, std.lv = TRUE,, ordered = c("share_toilet", "radio", "car_truck", "cell_phone", "watch", "sewing_machine", "computer"),data = urban)
## Warning: lavaan->lav_object_post_check():
## covariance matrix of latent variables is not positive definite ; use
## lavInspect(fit, "cov.lv") to investigate.
summary(fit, standardized = TRUE, fit.measures = TRUE)
## lavaan 0.6-20 ended normally after 86 iterations
##
## Estimator DWLS
## Optimization method NLMINB
## Number of model parameters 21
##
## Number of observations 8539
##
## Model Test User Model:
## Standard Scaled
## Test Statistic 345.465 412.287
## Degrees of freedom 26 26
## P-value (Chi-square) 0.000 0.000
## Scaling correction factor 0.841
## Shift parameter 1.573
## simple second-order correction
##
## Model Test Baseline Model:
##
## Test statistic 11839.749 10011.806
## Degrees of freedom 36 36
## P-value 0.000 0.000
## Scaling correction factor 1.183
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.973 0.961
## Tucker-Lewis Index (TLI) 0.963 0.946
##
## Robust Comparative Fit Index (CFI) NA
## Robust Tucker-Lewis Index (TLI) NA
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.038 0.042
## 90 Percent confidence interval - lower 0.034 0.038
## 90 Percent confidence interval - upper 0.042 0.045
## P-value H_0: RMSEA <= 0.050 1.000 1.000
## P-value H_0: RMSEA >= 0.080 0.000 0.000
##
## Robust RMSEA NA
## 90 Percent confidence interval - lower NA
## 90 Percent confidence interval - upper NA
## P-value H_0: Robust RMSEA <= 0.050 NA
## P-value H_0: Robust RMSEA >= 0.080 NA
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.059 0.059
##
## Parameter Estimates:
##
## Parameterization Delta
## Standard errors Robust.sem
## Information Expected
## Information saturated (h1) model Unstructured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## health =~
## h2o_source 0.129 0.213 0.604 0.546 0.129 0.013
## share_toilet -0.358 0.494 -0.724 0.469 -0.358 -0.358
## wealth =~
## radio 0.715 0.013 53.733 0.000 0.715 0.715
## car_truck 0.805 0.019 41.838 0.000 0.805 0.805
## cell_phone 0.802 0.011 70.561 0.000 0.802 0.802
## watch 0.729 0.012 61.076 0.000 0.729 0.729
## sewing_machine 0.476 0.025 19.432 0.000 0.476 0.476
## computer 0.852 0.025 34.128 0.000 0.852 0.852
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## health ~~
## wealth 1.498 2.070 0.724 0.469 1.498 1.498
## wealth ~~
## educ_years 1.359 0.069 19.730 0.000 1.359 0.280
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .h2o_source 52.199 0.114 457.268 0.000 52.199 5.335
## educ_years 4.177 0.074 56.298 0.000 4.177 0.861
##
## Thresholds:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## share_toilt|t1 -0.335 0.014 -24.226 0.000 -0.335 -0.335
## radio|t1 -0.712 0.015 -47.822 0.000 -0.712 -0.712
## car_truck|t1 1.531 0.021 72.013 0.000 1.531 1.531
## cell_phone|t1 -0.378 0.014 -27.148 0.000 -0.378 -0.378
## watch|t1 -0.463 0.014 -32.842 0.000 -0.463 -0.463
## sewing_mchn|t1 1.579 0.022 72.074 0.000 1.579 1.579
## computer|t1 1.876 0.027 69.402 0.000 1.876 1.876
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .h2o_source 95.714 0.923 103.728 0.000 95.714 1.000
## .share_toilet 0.872 0.872 0.872
## .radio 0.488 0.488 0.488
## .car_truck 0.351 0.351 0.351
## .cell_phone 0.357 0.357 0.357
## .watch 0.469 0.469 0.469
## .sewing_machine 0.773 0.773 0.773
## .computer 0.274 0.274 0.274
## educ_years 23.517 0.625 37.621 0.000 23.517 1.000
## health 1.000 1.000 1.000
## wealth 1.000 1.000 1.000