Entregable-3.knit

library(rio)
mor=import("Libro1.xlsx")

str(mor)

## 'data.frame':    238 obs. of  14 variables:
##  $ Paises  : chr  "Afghanistan" "Albania" "Algeria" "American Samoa" ...
##  $ Mort    : num  103.06 10.54 19.22 9.87 3.39 ...
##  $ GastEdu : num  2.9 3.1 7 NA 2.9 2.4 4 3.8 NA 5 ...
##  $ Car     : num  34903 3945 104 241 320 ...
##  $ Dsnt    : num  19.1 1.5 NA NA NA 19 NA NA 2.7 2 ...
##  $ Men5bp  : num  19.1 1.5 2.7 NA NA 19 NA NA NA 1.7 ...
##  $ Expvidnc: num  54 79.7 78.3 75.6 83.6 ...
##  $ Emco2   : num  7.89e+06 3.79e+06 1.52e+08 3.55e+05 NA ...
##  $ atenpre : num  65 88 95 NA NA 82 NA 100 NA 95 ...
##  $ sobrep  : num  4.1 16.4 12.8 NA NA 3.4 NA NA 12.8 12.4 ...
##  $ polineq : num  2.7 3.5 NA NA NA 2.7 NA NA NA NA ...
##  $ Ginico  : num  29.4 30.8 27.6 NA NA 51.3 NA NA NA 42.3 ...
##  $ tasdesm : num  13.3 11.8 12.7 29.8 3.7 ...
##  $ indsan  : num  50.5 99.3 86 NA 100 ...

mor <- mor[, c(1, 2, 5, 7, 9, 11, 14, ncol(mor))]
mor$indsan.1=NULL

mor = na.omit(mor)

dontselect=c("Paises","Mort")
select=setdiff(names(mor),dontselect) 
theData=mor[,select]

library(polycor)
corMatrix=polycor::hetcor(theData)$correlations

library(ggcorrplot)

## Loading required package: ggplot2

ggcorrplot(corMatrix)

library(psych)

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

## The following object is masked from 'package:polycor':
## 
##     polyserial

psych::KMO(corMatrix)

## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA =  0.77
## MSA for each item = 
##     Dsnt Expvidnc  atenpre  polineq   indsan 
##     0.84     0.73     0.75     0.83     0.71

cortest.bartlett(corMatrix,n=nrow(theData))$p.value>0.05

## [1] FALSE

library(matrixcalc)

is.singular.matrix(corMatrix)

## [1] FALSE

fa.parallel(theData, fa = 'fa',correct = T,plot = F)

## Parallel analysis suggests that the number of factors =  2  and the number of components =  NA

library(GPArotation)

## 
## Attaching package: 'GPArotation'

## The following objects are masked from 'package:psych':
## 
##     equamax, varimin

resfa <- fa(theData,
            nfactors = 2,
            cor = 'mixed',
            rotate = "varimax",
            fm="minres")
print(resfa$loadings)

## 
## Loadings:
##          MR1    MR2   
## Dsnt     -0.571 -0.290
## Expvidnc  0.478  0.576
## atenpre   0.854  0.187
## polineq   0.652  0.277
## indsan    0.244  0.969
## 
##                  MR1   MR2
## SS loadings    1.767 1.466
## Proportion Var 0.353 0.293
## Cumulative Var 0.353 0.647

print(resfa$loadings,cutoff = 0.5)

## 
## Loadings:
##          MR1    MR2   
## Dsnt     -0.571       
## Expvidnc         0.576
## atenpre   0.854       
## polineq   0.652       
## indsan           0.969
## 
##                  MR1   MR2
## SS loadings    1.767 1.466
## Proportion Var 0.353 0.293
## Cumulative Var 0.353 0.647

fa.diagram(resfa,main = "Resultados del EFA")

sort(resfa$communality)

##      Dsnt   polineq  Expvidnc   atenpre    indsan 
## 0.4093547 0.5013865 0.5597950 0.7640284 0.9985950

sort(resfa$complexity)

##  atenpre   indsan  polineq     Dsnt Expvidnc 
## 1.095878 1.126291 1.349948 1.483333 1.933790

library(magrittr)
as.data.frame(resfa$scores)%>%head()

##            MR1        MR2
## 1  -1.80614447  0.3621007
## 2   0.12858711  1.5983085
## 6  -0.78215017  0.1549476
## 11  0.91182468  1.2328279
## 18 -0.63823284  0.1810011
## 23  0.04189252 -1.3164003

mor$mor_efa=resfa$scores[,1]
mor$atpod_efa=resfa$scores[,2]

ggplot(data=mor,aes(x=Mort,y=mor_efa)) + geom_point() + theme_minimal() + labs(x="Mortalidad Infantil (original)", y="Mortalidad Infantil EFA")

library(BBmisc)

## 
## Attaching package: 'BBmisc'

## The following object is masked from 'package:base':
## 
##     isFALSE

efa_scores_ok=normalize(resfa$scores, 
                       method = "range", 
                       margin=2, # by column
                       range = c(0, 10))

mor$mor_efa_ok=efa_scores_ok[,1]
mor$atpod_efa_ok=efa_scores_ok[,2]

ggplot(data=mor,aes(x=Mort,y=mor_efa_ok)) + geom_point() + theme_minimal() + labs(x="Mortalidad Infantil (original)", y="Mortalidad Infantil EFA (cambiado)")

modelCFA="IndiceSaludyEquidadAlimentaria=~atenpre+polineq+Dsnt
IndiceBienestarSanitario=~indsan+Expvidnc"

theDataNorm=scale(theData)

library(lavaan)

## This is lavaan 0.6-15
## lavaan is FREE software! Please report any bugs.

## 
## Attaching package: 'lavaan'

## The following object is masked from 'package:psych':
## 
##     cor2cov

cfa_fit <- cfa(modelCFA, data=theDataNorm, 
           std.lv=TRUE,  
           missing="fiml")
summary(cfa_fit)

## lavaan 0.6.15 ended normally after 16 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        16
## 
##   Number of observations                            74
##   Number of missing patterns                         1
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 3.564
##   Degrees of freedom                                 4
##   P-value (Chi-square)                           0.468
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Observed
##   Observed information based on                Hessian
## 
## Latent Variables:
##                                     Estimate  Std.Err  z-value  P(>|z|)
##   IndiceSaludyEquidadAlimentaria =~                                    
##     atenpre                            0.808    0.108    7.476    0.000
##     polineq                            0.727    0.110    6.610    0.000
##     Dsnt                              -0.655    0.113   -5.791    0.000
##   IndiceBienestarSanitario =~                                          
##     indsan                             0.749    0.113    6.630    0.000
##     Expvidnc                           0.894    0.112    7.997    0.000
## 
## Covariances:
##                                     Estimate  Std.Err  z-value  P(>|z|)
##   IndiceSaludyEquidadAlimentaria ~~                                    
##     IndicBnstrSntr                     0.716    0.092    7.804    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .atenpre           0.000    0.115    0.000    1.000
##    .polineq          -0.000    0.115   -0.000    1.000
##    .Dsnt             -0.000    0.115   -0.000    1.000
##    .indsan            0.000    0.115    0.000    1.000
##    .Expvidnc          0.000    0.115    0.000    1.000
##     IndcSldyEqddAl    0.000                           
##     IndicBnstrSntr    0.000                           
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .atenpre           0.334    0.101    3.312    0.001
##    .polineq           0.458    0.103    4.439    0.000
##    .Dsnt              0.558    0.112    4.999    0.000
##    .indsan            0.426    0.110    3.870    0.000
##    .Expvidnc          0.186    0.125    1.492    0.136
##     IndcSldyEqddAl    1.000                           
##     IndicBnstrSntr    1.000

allParamCFA=parameterEstimates(cfa_fit,standardized = T)
allFitCFA=as.list(fitMeasures(cfa_fit))

allFitCFA[c("chisq", "df", "pvalue")] # pvalue>0.05

## $chisq
## [1] 3.563979
## 
## $df
## [1] 4
## 
## $pvalue
## [1] 0.4682171

allFitCFA$tli

## [1] 1.008219

allFitCFA[c('rmsea.ci.lower','rmsea' ,'rmsea.ci.upper')]

## $rmsea.ci.lower
## [1] 0
## 
## $rmsea
## [1] 0
## 
## $rmsea.ci.upper
## [1] 0.1668309

scorescfa=normalize(lavPredict(cfa_fit),
                    method = "range", 
                    margin=2, # by column
                    range = c(0, 10))

mor$mor_efa_ok=scorescfa[,1]
mor$atopd_efa_ok=scorescfa[,2]

ggplot(data=mor,aes(x=Mort,y=mor_efa_ok)) + geom_point() + theme_minimal() + labs(x="Indice de Mortalidad (original)", y="Indice de Mortalidad EFA (cambiado)")

library(lavaanPlot)
lavaanPlot(model = cfa_fit, node_options = list(shape = "box", fontname = "Helvetica"), edge_options = list(color = "grey"), coefs = T)

hipotesis=formula(atopd_efa_ok~mor_efa_ok)
reg1=lm(hipotesis, data=mor)
summary(reg1)

## 
## Call:
## lm(formula = hipotesis, data = mor)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6129 -1.0543 -0.1102  1.0794  3.3771 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.98597    0.58568  -1.683   0.0966 .  
## mor_efa_ok   0.95619    0.08172  11.701   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.499 on 72 degrees of freedom
## Multiple R-squared:  0.6554, Adjusted R-squared:  0.6506 
## F-statistic: 136.9 on 1 and 72 DF,  p-value: < 2.2e-16

modelSEM <- 'IndiceBienestarSanitario  =~ Expvidnc+ indsan 

IndiceSaludyEquidadAlimentaria=~atenpre+polineq+Dsnt

IndiceSaludyEquidadAlimentaria~IndiceBienestarSanitario'

sem_fit <- sem(modelSEM, 
              data=theDataNorm)
summary(sem_fit)

## lavaan 0.6.15 ended normally after 19 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        11
## 
##   Number of observations                            74
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 3.564
##   Degrees of freedom                                 4
##   P-value (Chi-square)                           0.468
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                                     Estimate  Std.Err  z-value  P(>|z|)
##   IndiceBienestarSanitario =~                                          
##     Expvidnc                           1.000                           
##     indsan                             0.837    0.152    5.514    0.000
##   IndiceSaludyEquidadAlimentaria =~                                    
##     atenpre                            1.000                           
##     polineq                            0.900    0.157    5.728    0.000
##     Dsnt                              -0.811    0.155   -5.242    0.000
## 
## Regressions:
##                                    Estimate  Std.Err  z-value  P(>|z|)
##   IndiceSaludyEquidadAlimentaria ~                                    
##     IndicBnstrSntr                    0.647    0.138    4.672    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .Expvidnc          0.186    0.122    1.525    0.127
##    .indsan            0.426    0.109    3.924    0.000
##    .atenpre           0.334    0.099    3.367    0.001
##    .polineq           0.458    0.103    4.432    0.000
##    .Dsnt              0.558    0.111    5.020    0.000
##     IndicBnstrSntr    0.800    0.198    4.032    0.000
##    .IndcSldyEqddAl    0.318    0.112    2.839    0.005

lavaanPlot(model = sem_fit,
           node_options = list(shape = "box",
                               fontname = "Helvetica"),
           edge_options = list(color = "grey"), coefs = T,stand = T)

library(semPlot)
semPaths(sem_fit, residuals=F,
         sizeMan=7,sizeLat=12,
         what = "std",
         nCharNodes=10,
         posCol=c("skyblue4", "red"),
         edge.color="orange",
         edge.label.cex=1.2,layout="circle2")