library(rio)
mor=import("Libro1.xlsx")
mor <- mor[, c(1, 2, 5, 7, 9, 11, 14, ncol(mor))]
mor$indsan.1=NULL
mor = na.omit(mor)
dontselect=c("Paises")
select=setdiff(names(mor),dontselect)
theData=mor[,select]
library(polycor)
corMatrix=polycor::hetcor(theData)$correlations
library(ggcorrplot)
## Loading required package: ggplot2
ggcorrplot(corMatrix)

library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## The following object is masked from 'package:polycor':
##
## polyserial
psych::KMO(corMatrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA = 0.79
## MSA for each item =
## Mort Dsnt Expvidnc atenpre polineq indsan
## 0.72 0.88 0.70 0.82 0.88 0.89
cortest.bartlett(corMatrix,n=nrow(theData))$p.value>0.05
## [1] FALSE
library(matrixcalc)
is.singular.matrix(corMatrix)
## [1] FALSE
fa.parallel(theData, fa = 'fa',correct = T,plot = F)
## Parallel analysis suggests that the number of factors = 2 and the number of components = NA
library(GPArotation)
##
## Attaching package: 'GPArotation'
## The following objects are masked from 'package:psych':
##
## equamax, varimin
resfa <- fa(theData,
nfactors = 2,
cor = 'mixed',
rotate = "varimax",
fm="minres")
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
print(resfa$loadings)
##
## Loadings:
## MR1 MR2
## Mort -0.823 -0.447
## Dsnt -0.281 -0.597
## Expvidnc 0.959 0.287
## atenpre 0.313 0.758
## polineq 0.288 0.663
## indsan 0.600 0.342
##
## MR1 MR2
## SS loadings 2.217 1.769
## Proportion Var 0.370 0.295
## Cumulative Var 0.370 0.664
print(resfa$loadings,cutoff = 0.5)
##
## Loadings:
## MR1 MR2
## Mort -0.823
## Dsnt -0.597
## Expvidnc 0.959
## atenpre 0.758
## polineq 0.663
## indsan 0.600
##
## MR1 MR2
## SS loadings 2.217 1.769
## Proportion Var 0.370 0.295
## Cumulative Var 0.370 0.664
fa.diagram(resfa,main = "Resultados del EFA")

sort(resfa$communality)
## Dsnt indsan polineq atenpre Mort Expvidnc
## 0.4351651 0.4769198 0.5230808 0.6720955 0.8772670 1.0020055
sort(resfa$complexity)
## Expvidnc atenpre polineq Dsnt Mort indsan
## 1.177805 1.332037 1.363804 1.422830 1.542320 1.586288
library(magrittr)
as.data.frame(resfa$scores)%>%head()
## MR1 MR2
## 1 -2.032936 -1.23832122
## 2 1.960973 -0.07597925
## 6 -0.921940 -0.48434247
## 11 1.019871 0.99987927
## 18 1.457863 -1.20770884
## 23 -1.030113 0.05570630
mor$mor_efa=resfa$scores[,1]
mor$atpod_efa=resfa$scores[,2]
ggplot(data=mor,aes(x=Mort,y=mor_efa)) + geom_point() + theme_minimal() + labs(x="Mortalidad Infantil (original)", y="Mortalidad Infantil EFA")

library(BBmisc)
##
## Attaching package: 'BBmisc'
## The following object is masked from 'package:base':
##
## isFALSE
efa_scores_ok=normalize(resfa$scores,
method = "range",
margin=2, # by column
range = c(0, 10))
mor$mor_efa_ok=efa_scores_ok[,1]
mor$atpod_efa_ok=efa_scores_ok[,2]
ggplot(data=mor,aes(x=Mort,y=mor_efa_ok)) + geom_point() + theme_minimal() + labs(x="Mortalidad Infantil (original)", y="Mortalidad Infantil EFA (cambiado)")

modelCFA <- 'IndiceBienestarSanitario =~ Expvidnc + Mort + indsan
IndiceSaludyEquidadAlimentaria=~atenpre+polineq+Dsnt'
theDataNorm=scale(theData)
library(lavaan)
## This is lavaan 0.6-15
## lavaan is FREE software! Please report any bugs.
##
## Attaching package: 'lavaan'
## The following object is masked from 'package:psych':
##
## cor2cov
cfa_fit <- cfa(modelCFA, data=theDataNorm,
std.lv=TRUE,
missing="fiml")
summary(cfa_fit)
## lavaan 0.6.15 ended normally after 28 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 19
##
## Number of observations 74
## Number of missing patterns 1
##
## Model Test User Model:
##
## Test statistic 13.589
## Degrees of freedom 8
## P-value (Chi-square) 0.093
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Observed
## Observed information based on Hessian
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## IndiceBienestarSanitario =~
## Expvidnc 0.940 0.088 10.687 0.000
## Mort -0.970 0.086 -11.298 0.000
## indsan 0.668 0.105 6.337 0.000
## IndiceSaludyEquidadAlimentaria =~
## atenpre 0.831 0.105 7.940 0.000
## polineq 0.710 0.109 6.508 0.000
## Dsnt -0.646 0.112 -5.774 0.000
##
## Covariances:
## Estimate Std.Err z-value P(>|z|)
## IndiceBienestarSanitario ~~
## IndcSldyEqddAl 0.736 0.073 10.106 0.000
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|)
## .Expvidnc 0.000 0.115 0.000 1.000
## .Mort -0.000 0.115 -0.000 1.000
## .indsan 0.000 0.115 0.000 1.000
## .atenpre 0.000 0.115 0.000 1.000
## .polineq -0.000 0.115 -0.000 1.000
## .Dsnt -0.000 0.115 -0.000 1.000
## IndicBnstrSntr 0.000
## IndcSldyEqddAl 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .Expvidnc 0.103 0.040 2.591 0.010
## .Mort 0.046 0.039 1.162 0.245
## .indsan 0.541 0.096 5.613 0.000
## .atenpre 0.297 0.093 3.188 0.001
## .polineq 0.482 0.101 4.759 0.000
## .Dsnt 0.569 0.110 5.168 0.000
## IndicBnstrSntr 1.000
## IndcSldyEqddAl 1.000
allParamCFA=parameterEstimates(cfa_fit,standardized = T)
allFitCFA=as.list(fitMeasures(cfa_fit))
allFitCFA[c("chisq", "df", "pvalue")] # pvalue>0.05
## $chisq
## [1] 13.58943
##
## $df
## [1] 8
##
## $pvalue
## [1] 0.0931147
allFitCFA$tli
## [1] 0.9633248
allFitCFA[c('rmsea.ci.lower','rmsea' ,'rmsea.ci.upper')]
## $rmsea.ci.lower
## [1] 0
##
## $rmsea
## [1] 0.09716789
##
## $rmsea.ci.upper
## [1] 0.1833325
scorescfa=normalize(lavPredict(cfa_fit),
method = "range",
margin=2, # by column
range = c(0, 10))
mor$mor_efa_ok=scorescfa[,1]
mor$atopd_efa_ok=scorescfa[,2]
ggplot(data=mor,aes(x=Mort,y=mor_efa_ok)) + geom_point() + theme_minimal() + labs(x="Indice de Mortalidad (original)", y="Indice de Mortalidad EFA (cambiado)")

library(lavaanPlot)
lavaanPlot(model = cfa_fit, node_options = list(shape = "box", fontname = "Helvetica"), edge_options = list(color = "grey"), coefs = T)
hipotesis=formula(mor_efa_ok~atopd_efa_ok)
reg1=lm(hipotesis, data=mor)
summary(reg1)
##
## Call:
## lm(formula = hipotesis, data = mor)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.7455 -0.7014 -0.0748 0.8760 2.9047
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.7241 0.5359 1.351 0.181
## atopd_efa_ok 0.8431 0.0726 11.613 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.294 on 72 degrees of freedom
## Multiple R-squared: 0.6519, Adjusted R-squared: 0.6471
## F-statistic: 134.9 on 1 and 72 DF, p-value: < 2.2e-16
modelSEM <- 'IndiceBienestarSanitario =~ Expvidnc + Mort + indsan
IndiceSaludyEquidadAlimentaria=~atenpre+polineq+Dsnt
IndiceBienestarSanitario~IndiceSaludyEquidadAlimentaria'
sem_fit <- sem(modelSEM,
data=theDataNorm)
summary(sem_fit)
## lavaan 0.6.15 ended normally after 26 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 13
##
## Number of observations 74
##
## Model Test User Model:
##
## Test statistic 13.589
## Degrees of freedom 8
## P-value (Chi-square) 0.093
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## IndiceBienestarSanitario =~
## Expvidnc 1.000
## Mort -1.032 0.062 -16.604 0.000
## indsan 0.710 0.097 7.305 0.000
## IndiceSaludyEquidadAlimentaria =~
## atenpre 1.000
## polineq 0.855 0.144 5.924 0.000
## Dsnt -0.778 0.144 -5.393 0.000
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## IndiceBienestarSanitario ~
## IndcSldyEqddAl 0.832 0.143 5.806 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .Expvidnc 0.103 0.038 2.729 0.006
## .Mort 0.046 0.037 1.244 0.213
## .indsan 0.541 0.092 5.902 0.000
## .atenpre 0.297 0.093 3.199 0.001
## .polineq 0.482 0.101 4.759 0.000
## .Dsnt 0.569 0.110 5.178 0.000
## .IndicBnstrSntr 0.405 0.097 4.165 0.000
## IndcSldyEqddAl 0.690 0.174 3.974 0.000
lavaanPlot(model = sem_fit,
node_options = list(shape = "box",
fontname = "Helvetica"),
edge_options = list(color = "grey"), coefs = T,stand = T)
library(semPlot)
semPaths(sem_fit, residuals=F,
sizeMan=7,sizeLat=12,
what = "std",
nCharNodes=10,
posCol=c("skyblue4", "red"),
edge.color="orange",
edge.label.cex=1.2,layout="circle2")
