library(rio)
mor=import("Libro1.xlsx")
str(mor)
## 'data.frame': 238 obs. of 14 variables:
## $ Paises : chr "Afghanistan" "Albania" "Algeria" "American Samoa" ...
## $ Mort : num 103.06 10.54 19.22 9.87 3.39 ...
## $ GastEdu : num 2.9 3.1 7 NA 2.9 2.4 4 3.8 NA 5 ...
## $ Car : num 34903 3945 104 241 320 ...
## $ Dsnt : num 19.1 1.5 NA NA NA 19 NA NA 2.7 2 ...
## $ Men5bp : num 19.1 1.5 2.7 NA NA 19 NA NA NA 1.7 ...
## $ Expvidnc: num 54 79.7 78.3 75.6 83.6 ...
## $ Emco2 : num 7.89e+06 3.79e+06 1.52e+08 3.55e+05 NA ...
## $ atenpre : num 65 88 95 NA NA 82 NA 100 NA 95 ...
## $ sobrep : num 4.1 16.4 12.8 NA NA 3.4 NA NA 12.8 12.4 ...
## $ polineq : num 2.7 3.5 NA NA NA 2.7 NA NA NA NA ...
## $ Ginico : num 29.4 30.8 27.6 NA NA 51.3 NA NA NA 42.3 ...
## $ tasdesm : num 13.3 11.8 12.7 29.8 3.7 ...
## $ indsan : num 50.5 99.3 86 NA 100 ...
mor <- mor[, c(1, 2, 5, 7, 9, 11, 14, ncol(mor))]
mor$indsan.1=NULL
mor = na.omit(mor)
dontselect=c("Paises","Mort")
select=setdiff(names(mor),dontselect)
theData=mor[,select]
library(polycor)
corMatrix=polycor::hetcor(theData)$correlations
library(ggcorrplot)
## Loading required package: ggplot2
ggcorrplot(corMatrix)

library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## The following object is masked from 'package:polycor':
##
## polyserial
psych::KMO(corMatrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA = 0.77
## MSA for each item =
## Dsnt Expvidnc atenpre polineq indsan
## 0.84 0.73 0.75 0.83 0.71
cortest.bartlett(corMatrix,n=nrow(theData))$p.value>0.05
## [1] FALSE
library(matrixcalc)
is.singular.matrix(corMatrix)
## [1] FALSE
fa.parallel(theData, fa = 'fa',correct = T,plot = F)
## Parallel analysis suggests that the number of factors = 2 and the number of components = NA
library(GPArotation)
##
## Attaching package: 'GPArotation'
## The following objects are masked from 'package:psych':
##
## equamax, varimin
resfa <- fa(theData,
nfactors = 2,
cor = 'mixed',
rotate = "varimax",
fm="minres")
print(resfa$loadings)
##
## Loadings:
## MR1 MR2
## Dsnt -0.571 -0.290
## Expvidnc 0.478 0.576
## atenpre 0.854 0.187
## polineq 0.652 0.277
## indsan 0.244 0.969
##
## MR1 MR2
## SS loadings 1.767 1.466
## Proportion Var 0.353 0.293
## Cumulative Var 0.353 0.647
print(resfa$loadings,cutoff = 0.5)
##
## Loadings:
## MR1 MR2
## Dsnt -0.571
## Expvidnc 0.576
## atenpre 0.854
## polineq 0.652
## indsan 0.969
##
## MR1 MR2
## SS loadings 1.767 1.466
## Proportion Var 0.353 0.293
## Cumulative Var 0.353 0.647
fa.diagram(resfa,main = "Resultados del EFA")

sort(resfa$communality)
## Dsnt polineq Expvidnc atenpre indsan
## 0.4093547 0.5013865 0.5597950 0.7640284 0.9985950
sort(resfa$complexity)
## atenpre indsan polineq Dsnt Expvidnc
## 1.095878 1.126291 1.349948 1.483333 1.933790
library(magrittr)
as.data.frame(resfa$scores)%>%head()
## MR1 MR2
## 1 -1.80614447 0.3621007
## 2 0.12858711 1.5983085
## 6 -0.78215017 0.1549476
## 11 0.91182468 1.2328279
## 18 -0.63823284 0.1810011
## 23 0.04189252 -1.3164003
mor$mor_efa=resfa$scores[,1]
mor$atpod_efa=resfa$scores[,2]
ggplot(data=mor,aes(x=Mort,y=mor_efa)) + geom_point() + theme_minimal() + labs(x="Mortalidad Infantil (original)", y="Mortalidad Infantil EFA")

library(BBmisc)
##
## Attaching package: 'BBmisc'
## The following object is masked from 'package:base':
##
## isFALSE
efa_scores_ok=normalize(resfa$scores,
method = "range",
margin=2, # by column
range = c(0, 10))
mor$mor_efa_ok=efa_scores_ok[,1]
mor$atpod_efa_ok=efa_scores_ok[,2]
ggplot(data=mor,aes(x=Mort,y=mor_efa_ok)) + geom_point() + theme_minimal() + labs(x="Mortalidad Infantil (original)", y="Mortalidad Infantil EFA (cambiado)")

modelCFA="IndiceSaludyEquidadAlimentaria=~atenpre+polineq+Dsnt
IndiceBienestarSanitario=~indsan+Expvidnc"
theDataNorm=scale(theData)
library(lavaan)
## This is lavaan 0.6-15
## lavaan is FREE software! Please report any bugs.
##
## Attaching package: 'lavaan'
## The following object is masked from 'package:psych':
##
## cor2cov
cfa_fit <- cfa(modelCFA, data=theDataNorm,
std.lv=TRUE,
missing="fiml")
summary(cfa_fit)
## lavaan 0.6.15 ended normally after 16 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 16
##
## Number of observations 74
## Number of missing patterns 1
##
## Model Test User Model:
##
## Test statistic 3.564
## Degrees of freedom 4
## P-value (Chi-square) 0.468
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Observed
## Observed information based on Hessian
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## IndiceSaludyEquidadAlimentaria =~
## atenpre 0.808 0.108 7.476 0.000
## polineq 0.727 0.110 6.610 0.000
## Dsnt -0.655 0.113 -5.791 0.000
## IndiceBienestarSanitario =~
## indsan 0.749 0.113 6.630 0.000
## Expvidnc 0.894 0.112 7.997 0.000
##
## Covariances:
## Estimate Std.Err z-value P(>|z|)
## IndiceSaludyEquidadAlimentaria ~~
## IndicBnstrSntr 0.716 0.092 7.804 0.000
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|)
## .atenpre 0.000 0.115 0.000 1.000
## .polineq -0.000 0.115 -0.000 1.000
## .Dsnt -0.000 0.115 -0.000 1.000
## .indsan 0.000 0.115 0.000 1.000
## .Expvidnc 0.000 0.115 0.000 1.000
## IndcSldyEqddAl 0.000
## IndicBnstrSntr 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .atenpre 0.334 0.101 3.312 0.001
## .polineq 0.458 0.103 4.439 0.000
## .Dsnt 0.558 0.112 4.999 0.000
## .indsan 0.426 0.110 3.870 0.000
## .Expvidnc 0.186 0.125 1.492 0.136
## IndcSldyEqddAl 1.000
## IndicBnstrSntr 1.000
allParamCFA=parameterEstimates(cfa_fit,standardized = T)
allFitCFA=as.list(fitMeasures(cfa_fit))
allFitCFA[c("chisq", "df", "pvalue")] # pvalue>0.05
## $chisq
## [1] 3.563979
##
## $df
## [1] 4
##
## $pvalue
## [1] 0.4682171
allFitCFA$tli
## [1] 1.008219
allFitCFA[c('rmsea.ci.lower','rmsea' ,'rmsea.ci.upper')]
## $rmsea.ci.lower
## [1] 0
##
## $rmsea
## [1] 0
##
## $rmsea.ci.upper
## [1] 0.1668309
scorescfa=normalize(lavPredict(cfa_fit),
method = "range",
margin=2, # by column
range = c(0, 10))
mor$mor_efa_ok=scorescfa[,1]
mor$atopd_efa_ok=scorescfa[,2]
ggplot(data=mor,aes(x=Mort,y=mor_efa_ok)) + geom_point() + theme_minimal() + labs(x="Indice de Mortalidad (original)", y="Indice de Mortalidad EFA (cambiado)")

library(lavaanPlot)
lavaanPlot(model = cfa_fit, node_options = list(shape = "box", fontname = "Helvetica"), edge_options = list(color = "grey"), coefs = T)
hipotesis=formula(atopd_efa_ok~mor_efa_ok)
reg1=lm(hipotesis, data=mor)
summary(reg1)
##
## Call:
## lm(formula = hipotesis, data = mor)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.6129 -1.0543 -0.1102 1.0794 3.3771
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.98597 0.58568 -1.683 0.0966 .
## mor_efa_ok 0.95619 0.08172 11.701 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.499 on 72 degrees of freedom
## Multiple R-squared: 0.6554, Adjusted R-squared: 0.6506
## F-statistic: 136.9 on 1 and 72 DF, p-value: < 2.2e-16
modelSEM <- 'IndiceBienestarSanitario =~ Expvidnc+ indsan
IndiceSaludyEquidadAlimentaria=~atenpre+polineq+Dsnt
IndiceSaludyEquidadAlimentaria~IndiceBienestarSanitario'
sem_fit <- sem(modelSEM,
data=theDataNorm)
summary(sem_fit)
## lavaan 0.6.15 ended normally after 19 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 11
##
## Number of observations 74
##
## Model Test User Model:
##
## Test statistic 3.564
## Degrees of freedom 4
## P-value (Chi-square) 0.468
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## IndiceBienestarSanitario =~
## Expvidnc 1.000
## indsan 0.837 0.152 5.514 0.000
## IndiceSaludyEquidadAlimentaria =~
## atenpre 1.000
## polineq 0.900 0.157 5.728 0.000
## Dsnt -0.811 0.155 -5.242 0.000
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## IndiceSaludyEquidadAlimentaria ~
## IndicBnstrSntr 0.647 0.138 4.672 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .Expvidnc 0.186 0.122 1.525 0.127
## .indsan 0.426 0.109 3.924 0.000
## .atenpre 0.334 0.099 3.367 0.001
## .polineq 0.458 0.103 4.432 0.000
## .Dsnt 0.558 0.111 5.020 0.000
## IndicBnstrSntr 0.800 0.198 4.032 0.000
## .IndcSldyEqddAl 0.318 0.112 2.839 0.005
lavaanPlot(model = sem_fit,
node_options = list(shape = "box",
fontname = "Helvetica"),
edge_options = list(color = "grey"), coefs = T,stand = T)
library(semPlot)
semPaths(sem_fit, residuals=F,
sizeMan=7,sizeLat=12,
what = "std",
nCharNodes=10,
posCol=c("skyblue4", "red"),
edge.color="orange",
edge.label.cex=1.2,layout="circle2")
