library(rio)
mor=import("Libro1.xlsx")
mor <- mor[, c(1, 2, 5, 7, 9, 11, 14, ncol(mor))]
mor$indsan.1=NULL
mor = na.omit(mor)
dontselect=c("Paises")
select=setdiff(names(mor),dontselect) 
theData=mor[,select]
library(polycor)
corMatrix=polycor::hetcor(theData)$correlations
library(ggcorrplot)
## Loading required package: ggplot2
ggcorrplot(corMatrix)

library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## The following object is masked from 'package:polycor':
## 
##     polyserial
psych::KMO(corMatrix) 
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA =  0.79
## MSA for each item = 
##     Mort     Dsnt Expvidnc  atenpre  polineq   indsan 
##     0.72     0.88     0.70     0.82     0.88     0.89
cortest.bartlett(corMatrix,n=nrow(theData))$p.value>0.05
## [1] FALSE
library(matrixcalc)

is.singular.matrix(corMatrix)
## [1] FALSE
fa.parallel(theData, fa = 'fa',correct = T,plot = F)
## Parallel analysis suggests that the number of factors =  2  and the number of components =  NA
library(GPArotation)
## 
## Attaching package: 'GPArotation'
## The following objects are masked from 'package:psych':
## 
##     equamax, varimin
resfa <- fa(theData,
            nfactors = 2,
            cor = 'mixed',
            rotate = "varimax",
            fm="minres")
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
print(resfa$loadings)
## 
## Loadings:
##          MR1    MR2   
## Mort     -0.823 -0.447
## Dsnt     -0.281 -0.597
## Expvidnc  0.959  0.287
## atenpre   0.313  0.758
## polineq   0.288  0.663
## indsan    0.600  0.342
## 
##                  MR1   MR2
## SS loadings    2.217 1.769
## Proportion Var 0.370 0.295
## Cumulative Var 0.370 0.664
print(resfa$loadings,cutoff = 0.5)
## 
## Loadings:
##          MR1    MR2   
## Mort     -0.823       
## Dsnt            -0.597
## Expvidnc  0.959       
## atenpre          0.758
## polineq          0.663
## indsan    0.600       
## 
##                  MR1   MR2
## SS loadings    2.217 1.769
## Proportion Var 0.370 0.295
## Cumulative Var 0.370 0.664
fa.diagram(resfa,main = "Resultados del EFA")

sort(resfa$communality)
##      Dsnt    indsan   polineq   atenpre      Mort  Expvidnc 
## 0.4351651 0.4769198 0.5230808 0.6720955 0.8772670 1.0020055
sort(resfa$complexity)
## Expvidnc  atenpre  polineq     Dsnt     Mort   indsan 
## 1.177805 1.332037 1.363804 1.422830 1.542320 1.586288
library(magrittr)
as.data.frame(resfa$scores)%>%head()
##          MR1         MR2
## 1  -2.032936 -1.23832122
## 2   1.960973 -0.07597925
## 6  -0.921940 -0.48434247
## 11  1.019871  0.99987927
## 18  1.457863 -1.20770884
## 23 -1.030113  0.05570630
mor$mor_efa=resfa$scores[,1]
mor$atpod_efa=resfa$scores[,2]

ggplot(data=mor,aes(x=Mort,y=mor_efa)) + geom_point() + theme_minimal() + labs(x="Mortalidad Infantil (original)", y="Mortalidad Infantil EFA")

library(BBmisc)
## 
## Attaching package: 'BBmisc'
## The following object is masked from 'package:base':
## 
##     isFALSE
efa_scores_ok=normalize(resfa$scores, 
                       method = "range", 
                       margin=2, # by column
                       range = c(0, 10))

mor$mor_efa_ok=efa_scores_ok[,1]
mor$atpod_efa_ok=efa_scores_ok[,2]

ggplot(data=mor,aes(x=Mort,y=mor_efa_ok)) + geom_point() + theme_minimal() + labs(x="Mortalidad Infantil (original)", y="Mortalidad Infantil EFA (cambiado)")

modelCFA <- 'IndiceBienestarSanitario  =~ Expvidnc + Mort + indsan 

IndiceSaludyEquidadAlimentaria=~atenpre+polineq+Dsnt'
theDataNorm=scale(theData)

library(lavaan)
## This is lavaan 0.6-15
## lavaan is FREE software! Please report any bugs.
## 
## Attaching package: 'lavaan'
## The following object is masked from 'package:psych':
## 
##     cor2cov
cfa_fit <- cfa(modelCFA, data=theDataNorm, 
           std.lv=TRUE,  
           missing="fiml")
summary(cfa_fit)
## lavaan 0.6.15 ended normally after 28 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        19
## 
##   Number of observations                            74
##   Number of missing patterns                         1
## 
## Model Test User Model:
##                                                       
##   Test statistic                                13.589
##   Degrees of freedom                                 8
##   P-value (Chi-square)                           0.093
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Observed
##   Observed information based on                Hessian
## 
## Latent Variables:
##                                     Estimate  Std.Err  z-value  P(>|z|)
##   IndiceBienestarSanitario =~                                          
##     Expvidnc                           0.940    0.088   10.687    0.000
##     Mort                              -0.970    0.086  -11.298    0.000
##     indsan                             0.668    0.105    6.337    0.000
##   IndiceSaludyEquidadAlimentaria =~                                    
##     atenpre                            0.831    0.105    7.940    0.000
##     polineq                            0.710    0.109    6.508    0.000
##     Dsnt                              -0.646    0.112   -5.774    0.000
## 
## Covariances:
##                               Estimate  Std.Err  z-value  P(>|z|)
##   IndiceBienestarSanitario ~~                                    
##     IndcSldyEqddAl               0.736    0.073   10.106    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .Expvidnc          0.000    0.115    0.000    1.000
##    .Mort             -0.000    0.115   -0.000    1.000
##    .indsan            0.000    0.115    0.000    1.000
##    .atenpre           0.000    0.115    0.000    1.000
##    .polineq          -0.000    0.115   -0.000    1.000
##    .Dsnt             -0.000    0.115   -0.000    1.000
##     IndicBnstrSntr    0.000                           
##     IndcSldyEqddAl    0.000                           
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .Expvidnc          0.103    0.040    2.591    0.010
##    .Mort              0.046    0.039    1.162    0.245
##    .indsan            0.541    0.096    5.613    0.000
##    .atenpre           0.297    0.093    3.188    0.001
##    .polineq           0.482    0.101    4.759    0.000
##    .Dsnt              0.569    0.110    5.168    0.000
##     IndicBnstrSntr    1.000                           
##     IndcSldyEqddAl    1.000
allParamCFA=parameterEstimates(cfa_fit,standardized = T)
allFitCFA=as.list(fitMeasures(cfa_fit))
allFitCFA[c("chisq", "df", "pvalue")] # pvalue>0.05
## $chisq
## [1] 13.58943
## 
## $df
## [1] 8
## 
## $pvalue
## [1] 0.0931147
allFitCFA$tli 
## [1] 0.9633248
allFitCFA[c('rmsea.ci.lower','rmsea' ,'rmsea.ci.upper')] 
## $rmsea.ci.lower
## [1] 0
## 
## $rmsea
## [1] 0.09716789
## 
## $rmsea.ci.upper
## [1] 0.1833325
scorescfa=normalize(lavPredict(cfa_fit),
                    method = "range", 
                    margin=2, # by column
                    range = c(0, 10))

mor$mor_efa_ok=scorescfa[,1]
mor$atopd_efa_ok=scorescfa[,2]
ggplot(data=mor,aes(x=Mort,y=mor_efa_ok)) + geom_point() + theme_minimal() + labs(x="Indice de Mortalidad (original)", y="Indice de Mortalidad EFA (cambiado)")

library(lavaanPlot)
lavaanPlot(model = cfa_fit, node_options = list(shape = "box", fontname = "Helvetica"), edge_options = list(color = "grey"), coefs = T)
hipotesis=formula(mor_efa_ok~atopd_efa_ok)
reg1=lm(hipotesis, data=mor)
summary(reg1)
## 
## Call:
## lm(formula = hipotesis, data = mor)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.7455 -0.7014 -0.0748  0.8760  2.9047 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.7241     0.5359   1.351    0.181    
## atopd_efa_ok   0.8431     0.0726  11.613   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.294 on 72 degrees of freedom
## Multiple R-squared:  0.6519, Adjusted R-squared:  0.6471 
## F-statistic: 134.9 on 1 and 72 DF,  p-value: < 2.2e-16
modelSEM <- 'IndiceBienestarSanitario  =~ Expvidnc + Mort + indsan 

IndiceSaludyEquidadAlimentaria=~atenpre+polineq+Dsnt

IndiceBienestarSanitario~IndiceSaludyEquidadAlimentaria'
sem_fit <- sem(modelSEM, 
              data=theDataNorm)
summary(sem_fit)
## lavaan 0.6.15 ended normally after 26 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        13
## 
##   Number of observations                            74
## 
## Model Test User Model:
##                                                       
##   Test statistic                                13.589
##   Degrees of freedom                                 8
##   P-value (Chi-square)                           0.093
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                                     Estimate  Std.Err  z-value  P(>|z|)
##   IndiceBienestarSanitario =~                                          
##     Expvidnc                           1.000                           
##     Mort                              -1.032    0.062  -16.604    0.000
##     indsan                             0.710    0.097    7.305    0.000
##   IndiceSaludyEquidadAlimentaria =~                                    
##     atenpre                            1.000                           
##     polineq                            0.855    0.144    5.924    0.000
##     Dsnt                              -0.778    0.144   -5.393    0.000
## 
## Regressions:
##                              Estimate  Std.Err  z-value  P(>|z|)
##   IndiceBienestarSanitario ~                                    
##     IndcSldyEqddAl              0.832    0.143    5.806    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .Expvidnc          0.103    0.038    2.729    0.006
##    .Mort              0.046    0.037    1.244    0.213
##    .indsan            0.541    0.092    5.902    0.000
##    .atenpre           0.297    0.093    3.199    0.001
##    .polineq           0.482    0.101    4.759    0.000
##    .Dsnt              0.569    0.110    5.178    0.000
##    .IndicBnstrSntr    0.405    0.097    4.165    0.000
##     IndcSldyEqddAl    0.690    0.174    3.974    0.000
lavaanPlot(model = sem_fit,
           node_options = list(shape = "box",
                               fontname = "Helvetica"),
           edge_options = list(color = "grey"), coefs = T,stand = T)
library(semPlot)
semPaths(sem_fit, residuals=F,
         sizeMan=7,sizeLat=12,
         what = "std",
         nCharNodes=10,
         posCol=c("skyblue4", "red"),
         edge.color="orange",
         edge.label.cex=1.2,layout="circle2")