library(readxl)
reporte <- read_excel("reporte.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
View(reporte)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
reporte <- slice(reporte, -(1:4))
colnames(reporte) <- reporte[1, ]
reporte <- reporte[-1, ]
names(reporte)
##  [1] NA                       "Código"                 "Provincia"             
##  [4] "No usa electricidad"    "Sí usa electricidad"    "Total"                 
##  [7] "No usa gas (balón GLP)" "Sí usa gas (balón GLP)" "Total"                 
## [10] "No usa carbón"          "Sí usa carbón"          "Total"                 
## [13] "No usa leña"            "Sí usa leña"            "Total"
colnames(reporte) <- make.names(colnames(reporte))
reporte<- reporte[, -1]
reporte <- na.omit(reporte)
names(reporte)
##  [1] "Código"                 "Provincia"              "No.usa.electricidad"   
##  [4] "Sí.usa.electricidad"    "Total"                  "No.usa.gas..balón.GLP."
##  [7] "Sí.usa.gas..balón.GLP." "Total"                  "No.usa.carbón"         
## [10] "Sí.usa.carbón"          "Total"                  "No.usa.leña"           
## [13] "Sí.usa.leña"            "Total"
names(reporte) <- c("Código","Provincia", "No.usa.electricidad", "Sí.usa.electricidad", "Total1",
                    "No.usa.gas..balón.GLP.", "Sí.usa.gas..balón.GLP.", "Total2",
                    "No.usa.carbón", "Sí.usa.carbón", "Total3",
                    "No.usa.leña", "Sí.usa.leña", "Total4")
reporte$No.usa.electricidad<- as.numeric(reporte$No.usa.electricidad)
reporte$Sí.usa.electricidad<- as.numeric(reporte$Sí.usa.electricidad)
reporte$No.usa.gas..balón.GLP.<- as.numeric(reporte$No.usa.gas..balón.GLP.)
reporte$Sí.usa.gas..balón.GLP.<- as.numeric(reporte$Sí.usa.gas..balón.GLP.)
reporte$No.usa.carbón<- as.numeric(reporte$No.usa.carbón)
reporte$Sí.usa.carbón<- as.numeric(reporte$Sí.usa.carbón)
reporte$No.usa.leña<- as.numeric(reporte$No.usa.leña)
reporte$Sí.usa.leña <- as.numeric(reporte$Sí.usa.leña)
reporte$Total1 <- as.numeric(reporte$Total1)
reporte$Total2 <- as.numeric(reporte$Total2)
reporte$Total3<- as.numeric(reporte$Total3)
reporte$Total4 <- as.numeric(reporte$Total4)
reporte$ele_percent <- (reporte$Sí.usa.electricidad / reporte$Total1) * 100
reporte$gas_percent <- (reporte$Sí.usa.gas..balón.GLP. / reporte$Total2) * 100
reporte$carbon_percent <- (reporte$Sí.usa.carbón / reporte$Total3) * 100
reporte$leña_percent <- (reporte$Sí.usa.leña / reporte$Total4) * 100
names(reporte)
##  [1] "Código"                 "Provincia"              "No.usa.electricidad"   
##  [4] "Sí.usa.electricidad"    "Total1"                 "No.usa.gas..balón.GLP."
##  [7] "Sí.usa.gas..balón.GLP." "Total2"                 "No.usa.carbón"         
## [10] "Sí.usa.carbón"          "Total3"                 "No.usa.leña"           
## [13] "Sí.usa.leña"            "Total4"                 "ele_percent"           
## [16] "gas_percent"            "carbon_percent"         "leña_percent"
dontselect=c("Provincia","Código","Total1",
             "Total2","Total3","Total4","Sí.usa.electricidad","No.usa.electricidad", "Sí.usa.gas..balón.GLP.","No.usa.gas..balón.GLP.",  "No.usa.carbón", "Sí.usa.carbón" , "No.usa.leña","Sí.usa.leña"   )
select=setdiff(names(reporte),dontselect) 
theData=reporte[,select]

# usaremos:
library(magrittr)
head(theData,10)%>%
    rmarkdown::paged_table()
str(theData)
## tibble [196 × 4] (S3: tbl_df/tbl/data.frame)
##  $ ele_percent   : num [1:196] 3.743 0.786 1.587 0.142 0.682 ...
##  $ gas_percent   : num [1:196] 69.4 48.4 59.6 15.6 48 ...
##  $ carbon_percent: num [1:196] 1.148 1.412 0.742 0.264 0.25 ...
##  $ leña_percent  : num [1:196] 52.8 64.1 70 89.3 86.1 ...
##  - attr(*, "na.action")= 'omit' Named int [1:3] 197 198 199
##   ..- attr(*, "names")= chr [1:3] "197" "198" "199"
theData <- na.omit(theData)
theData=as.data.frame(theData)
#install.packages("polycor")
library(polycor)
## Warning: package 'polycor' was built under R version 4.3.3
corMatrix <- hetcor(theData)$correlations
library(psych)
## Warning: package 'psych' was built under R version 4.3.2
## 
## Attaching package: 'psych'
## The following object is masked from 'package:polycor':
## 
##     polyserial
psych::KMO(corMatrix) 
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA =  0.63
## MSA for each item = 
##    ele_percent    gas_percent carbon_percent   leña_percent 
##           0.91           0.59           0.72           0.59
cortest.bartlett(corMatrix,n=nrow(theData))$p.value>0.05
## [1] FALSE
library(matrixcalc)

is.singular.matrix(corMatrix)
## [1] FALSE
fa.parallel(theData, fa = 'fa',correct = T,plot = F)
## Parallel analysis suggests that the number of factors =  1  and the number of components =  NA
library(GPArotation)
## Warning: package 'GPArotation' was built under R version 4.3.3
## 
## Attaching package: 'GPArotation'
## The following objects are masked from 'package:psych':
## 
##     equamax, varimin
resfa <- fa(theData,
            nfactors = 1,
            cor = 'mixed',
            rotate = "varimax", #oblimin?
            fm="minres")
print(resfa$loadings)
## 
## Loadings:
##                MR1   
## ele_percent     0.491
## gas_percent     0.912
## carbon_percent  0.280
## leña_percent   -0.956
## 
##                  MR1
## SS loadings    2.065
## Proportion Var 0.516
library(GPArotation)
resfa2 <- fa(theData,
            nfactors = 1,
            cor = 'mixed',
            rotate = "oblimin", #oblimin?
            fm="minres")
print(resfa2$loadings)
## 
## Loadings:
##                MR1   
## ele_percent     0.491
## gas_percent     0.912
## carbon_percent  0.280
## leña_percent   -0.956
## 
##                  MR1
## SS loadings    2.065
## Proportion Var 0.516
sort(resfa$communality)
## carbon_percent    ele_percent    gas_percent   leña_percent 
##     0.07838591     0.24104291     0.83166545     0.91346690
sort(resfa$complexity)
##    gas_percent    ele_percent carbon_percent   leña_percent 
##              1              1              1              1