rm(list = ls())
library(rio)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data1= import("reporte.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
data1 = data1[,-c(1,2)] 
filas_a_eliminar <- c(1, 2, 3, 4, 202, 203, 204)
data1 <- data1[-filas_a_eliminar, ]
str(data1)
## 'data.frame':    197 obs. of  13 variables:
##  $ ...3 : chr  "Provincia" "Amazonas, provincia: Chachapoyas" "Amazonas, provincia: Bagua" "Amazonas, provincia: Bongara" ...
##  $ ...4 : chr  "No usa electricidad" "14763" "20313" "7689" ...
##  $ ...5 : chr  "Sí usa electricidad" "574" "161" "124" ...
##  $ ...6 : chr  "Total" "15337" "20474" "7813" ...
##  $ ...7 : chr  "No usa gas (balón GLP)" "4696" "10557" "3154" ...
##  $ ...8 : chr  "Sí usa gas (balón GLP)" "10641" "9917" "4659" ...
##  $ ...9 : chr  "Total" "15337" "20474" "7813" ...
##  $ ...10: chr  "No usa carbón" "15161" "20185" "7755" ...
##  $ ...11: chr  "Sí usa carbón" "176" "289" "58" ...
##  $ ...12: chr  "Total" "15337" "20474" "7813" ...
##  $ ...13: chr  "No usa leña" "7236" "7357" "2345" ...
##  $ ...14: chr  "Sí usa leña" "8101" "13117" "5468" ...
##  $ ...15: chr  "Total" "15337" "20474" "7813" ...
data1 <- data1 %>%
  rename (provincia = "...3",
         no_electricidad = "...4",
         si_electricidad = "...5",
         total_electricidad = "...6",
         no_gas = "...7",
         si_gas = "...8",
         total_gas = "...9",
         no_carbon = "...10",
         si_carbon = "...11",
         total_carbon = "...12",
         no_leña = "...13",
         si_leña = "...14",
         total_leña = "...15")
data1 <- data1[-1, ]
str(data1)
## 'data.frame':    196 obs. of  13 variables:
##  $ provincia         : chr  "Amazonas, provincia: Chachapoyas" "Amazonas, provincia: Bagua" "Amazonas, provincia: Bongara" "Amazonas, provincia: Condorcanqui" ...
##  $ no_electricidad   : chr  "14763" "20313" "7689" "9853" ...
##  $ si_electricidad   : chr  "574" "161" "124" "14" ...
##  $ total_electricidad: chr  "15337" "20474" "7813" "9867" ...
##  $ no_gas            : chr  "4696" "10557" "3154" "8331" ...
##  $ si_gas            : chr  "10641" "9917" "4659" "1536" ...
##  $ total_gas         : chr  "15337" "20474" "7813" "9867" ...
##  $ no_carbon         : chr  "15161" "20185" "7755" "9841" ...
##  $ si_carbon         : chr  "176" "289" "58" "26" ...
##  $ total_carbon      : chr  "15337" "20474" "7813" "9867" ...
##  $ no_leña           : chr  "7236" "7357" "2345" "1059" ...
##  $ si_leña           : chr  "8101" "13117" "5468" "8808" ...
##  $ total_leña        : chr  "15337" "20474" "7813" "9867" ...
data1[,-c(1)]=lapply(data1[,-c(1)], readr::parse_number)
str(data1)
## 'data.frame':    196 obs. of  13 variables:
##  $ provincia         : chr  "Amazonas, provincia: Chachapoyas" "Amazonas, provincia: Bagua" "Amazonas, provincia: Bongara" "Amazonas, provincia: Condorcanqui" ...
##  $ no_electricidad   : num  14763 20313 7689 9853 13112 ...
##  $ si_electricidad   : num  574 161 124 14 90 65 255 921 16 33 ...
##  $ total_electricidad: num  15337 20474 7813 9867 13202 ...
##  $ no_gas            : num  4696 10557 3154 8331 6863 ...
##  $ si_gas            : num  10641 9917 4659 1536 6339 ...
##  $ total_gas         : num  15337 20474 7813 9867 13202 ...
##  $ no_carbon         : num  15161 20185 7755 9841 13169 ...
##  $ si_carbon         : num  176 289 58 26 33 26 335 218 4 4 ...
##  $ total_carbon      : num  15337 20474 7813 9867 13202 ...
##  $ no_leña           : num  7236 7357 2345 1059 1833 ...
##  $ si_leña           : num  8101 13117 5468 8808 11369 ...
##  $ total_leña        : num  15337 20474 7813 9867 13202 ...
data1$por_electricidad <- (data1$si_electricidad / data1$total_electricidad) * 100
data1$por_gas <- (data1$si_gas / data1$total_gas) * 100
data1$por_carbon <- (data1$si_carbon / data1$total_carbon) * 100
data1$por_leña <- (data1$si_leña / data1$total_leña) * 100
dontselect=c("provincia","no_electricidad","si_electricidad","total_electricidad","no_gas","si_gas", "total_gas", "si_carbon", "no_carbon","total_carbon","si_leña","total_leña", "no_leña")
select=setdiff(names(data1),dontselect) 
theData= data1[,select]
corMatrix=polycor::hetcor(theData)$correlations
round(corMatrix,2)
##                  por_electricidad por_gas por_carbon por_leña
## por_electricidad             1.00    0.47       0.10    -0.46
## por_gas                      0.47    1.00       0.23    -0.87
## por_carbon                   0.10    0.23       1.00    -0.31
## por_leña                    -0.46   -0.87      -0.31     1.00
library(psych)
psych::KMO(corMatrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA =  0.63
## MSA for each item = 
## por_electricidad          por_gas       por_carbon         por_leña 
##             0.91             0.59             0.72             0.59
cortest.bartlett(corMatrix,n=nrow(theData))$p.value>0.05
## [1] FALSE
library(matrixcalc)

is.singular.matrix(corMatrix)
## [1] FALSE
fa.parallel(theData, fa = 'fa',correct = T,plot = F)
## Parallel analysis suggests that the number of factors =  1  and the number of components =  NA
library(GPArotation)
## 
## Attaching package: 'GPArotation'
## The following objects are masked from 'package:psych':
## 
##     equamax, varimin
resfa <- fa(theData,
            nfactors = 1,
            cor = 'mixed',
            rotate = "varimax",
            fm="minres")
print(resfa$loadings)
## 
## Loadings:
##                  MR1   
## por_electricidad  0.491
## por_gas           0.912
## por_carbon        0.280
## por_leña         -0.956
## 
##                  MR1
## SS loadings    2.065
## Proportion Var 0.516
print(resfa$loadings,cutoff = 0.5)
## 
## Loadings:
##                  MR1   
## por_electricidad       
## por_gas           0.912
## por_carbon             
## por_leña         -0.956
## 
##                  MR1
## SS loadings    2.065
## Proportion Var 0.516
library(GPArotation)
resfa2<- fa(theData,
            nfactors = 1,
            cor = 'mixed',
            rotate = "oblimin",
            fm="minres")
print(resfa2$loadings)
## 
## Loadings:
##                  MR1   
## por_electricidad  0.491
## por_gas           0.912
## por_carbon        0.280
## por_leña         -0.956
## 
##                  MR1
## SS loadings    2.065
## Proportion Var 0.516
print(resfa2$loadings,cutoff = 0.5)
## 
## Loadings:
##                  MR1   
## por_electricidad       
## por_gas           0.912
## por_carbon             
## por_leña         -0.956
## 
##                  MR1
## SS loadings    2.065
## Proportion Var 0.516
fa.diagram(resfa,main = "Resultados del EFA")

as.data.frame(resfa$scores)%>%head()
##           MR1
## 6   0.4475047
## 7  -0.1693039
## 8  -0.1326194
## 9  -1.2306794
## 10 -0.6894937
## 11 -0.6444460
data1$resfa=resfa$scores[,1]
resfa$TLI
## [1] 0.9607796
resfa$rms
## [1] 0.03000416
resfa$RMSEA
##      RMSEA      lower      upper confidence 
## 0.10505967 0.01732496 0.20271043 0.90000000
resfa$BIC
## [1] -4.21926