library(readxl)
reporte <- read_excel("reporte.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
View(reporte)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
reporte <- slice(reporte, -(1:4))
colnames(reporte) <- reporte[1, ]
reporte <- reporte[-1, ]
names(reporte)
## [1] NA "Código" "Provincia"
## [4] "No usa electricidad" "Sí usa electricidad" "Total"
## [7] "No usa gas (balón GLP)" "Sí usa gas (balón GLP)" "Total"
## [10] "No usa carbón" "Sí usa carbón" "Total"
## [13] "No usa leña" "Sí usa leña" "Total"
colnames(reporte) <- make.names(colnames(reporte))
reporte<- reporte[, -1]
reporte <- na.omit(reporte)
names(reporte)
## [1] "Código" "Provincia" "No.usa.electricidad"
## [4] "Sí.usa.electricidad" "Total" "No.usa.gas..balón.GLP."
## [7] "Sí.usa.gas..balón.GLP." "Total" "No.usa.carbón"
## [10] "Sí.usa.carbón" "Total" "No.usa.leña"
## [13] "Sí.usa.leña" "Total"
names(reporte) <- c("Código","Provincia", "No.usa.electricidad", "Sí.usa.electricidad", "Total1",
"No.usa.gas..balón.GLP.", "Sí.usa.gas..balón.GLP.", "Total2",
"No.usa.carbón", "Sí.usa.carbón", "Total3",
"No.usa.leña", "Sí.usa.leña", "Total4")
reporte$No.usa.electricidad<- as.numeric(reporte$No.usa.electricidad)
reporte$Sí.usa.electricidad<- as.numeric(reporte$Sí.usa.electricidad)
reporte$No.usa.gas..balón.GLP.<- as.numeric(reporte$No.usa.gas..balón.GLP.)
reporte$Sí.usa.gas..balón.GLP.<- as.numeric(reporte$Sí.usa.gas..balón.GLP.)
reporte$No.usa.carbón<- as.numeric(reporte$No.usa.carbón)
reporte$Sí.usa.carbón<- as.numeric(reporte$Sí.usa.carbón)
reporte$No.usa.leña<- as.numeric(reporte$No.usa.leña)
reporte$Sí.usa.leña <- as.numeric(reporte$Sí.usa.leña)
reporte$Total1 <- as.numeric(reporte$Total1)
reporte$Total2 <- as.numeric(reporte$Total2)
reporte$Total3<- as.numeric(reporte$Total3)
reporte$Total4 <- as.numeric(reporte$Total4)
reporte$ele_percent <- (reporte$Sí.usa.electricidad / reporte$Total1) * 100
reporte$gas_percent <- (reporte$Sí.usa.gas..balón.GLP. / reporte$Total2) * 100
reporte$carbon_percent <- (reporte$Sí.usa.carbón / reporte$Total3) * 100
reporte$leña_percent <- (reporte$Sí.usa.leña / reporte$Total4) * 100
names(reporte)
## [1] "Código" "Provincia" "No.usa.electricidad"
## [4] "Sí.usa.electricidad" "Total1" "No.usa.gas..balón.GLP."
## [7] "Sí.usa.gas..balón.GLP." "Total2" "No.usa.carbón"
## [10] "Sí.usa.carbón" "Total3" "No.usa.leña"
## [13] "Sí.usa.leña" "Total4" "ele_percent"
## [16] "gas_percent" "carbon_percent" "leña_percent"
dontselect=c("Provincia","Código","Total1",
"Total2","Total3","Total4","Sí.usa.electricidad","No.usa.electricidad", "Sí.usa.gas..balón.GLP.","No.usa.gas..balón.GLP.", "No.usa.carbón", "Sí.usa.carbón" , "No.usa.leña","Sí.usa.leña" )
select=setdiff(names(reporte),dontselect)
theData=reporte[,select]
# usaremos:
library(magrittr)
head(theData,10)%>%
rmarkdown::paged_table()
str(theData)
## tibble [196 × 4] (S3: tbl_df/tbl/data.frame)
## $ ele_percent : num [1:196] 3.743 0.786 1.587 0.142 0.682 ...
## $ gas_percent : num [1:196] 69.4 48.4 59.6 15.6 48 ...
## $ carbon_percent: num [1:196] 1.148 1.412 0.742 0.264 0.25 ...
## $ leña_percent : num [1:196] 52.8 64.1 70 89.3 86.1 ...
## - attr(*, "na.action")= 'omit' Named int [1:3] 197 198 199
## ..- attr(*, "names")= chr [1:3] "197" "198" "199"
theData <- na.omit(theData)
theData=as.data.frame(theData)
#install.packages("polycor")
library(polycor)
## Warning: package 'polycor' was built under R version 4.3.3
corMatrix <- hetcor(theData)$correlations
library(psych)
## Warning: package 'psych' was built under R version 4.3.2
##
## Attaching package: 'psych'
## The following object is masked from 'package:polycor':
##
## polyserial
psych::KMO(corMatrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA = 0.63
## MSA for each item =
## ele_percent gas_percent carbon_percent leña_percent
## 0.91 0.59 0.72 0.59
cortest.bartlett(corMatrix,n=nrow(theData))$p.value>0.05
## [1] FALSE
library(matrixcalc)
is.singular.matrix(corMatrix)
## [1] FALSE
fa.parallel(theData, fa = 'fa',correct = T,plot = F)
## Parallel analysis suggests that the number of factors = 1 and the number of components = NA
library(GPArotation)
## Warning: package 'GPArotation' was built under R version 4.3.3
##
## Attaching package: 'GPArotation'
## The following objects are masked from 'package:psych':
##
## equamax, varimin
resfa <- fa(theData,
nfactors = 1,
cor = 'mixed',
rotate = "varimax", #oblimin?
fm="minres")
print(resfa$loadings)
##
## Loadings:
## MR1
## ele_percent 0.491
## gas_percent 0.912
## carbon_percent 0.280
## leña_percent -0.956
##
## MR1
## SS loadings 2.065
## Proportion Var 0.516
library(GPArotation)
resfa2 <- fa(theData,
nfactors = 1,
cor = 'mixed',
rotate = "oblimin", #oblimin?
fm="minres")
print(resfa2$loadings)
##
## Loadings:
## MR1
## ele_percent 0.491
## gas_percent 0.912
## carbon_percent 0.280
## leña_percent -0.956
##
## MR1
## SS loadings 2.065
## Proportion Var 0.516
sort(resfa$communality)
## carbon_percent ele_percent gas_percent leña_percent
## 0.07838591 0.24104291 0.83166545 0.91346690
sort(resfa$complexity)
## gas_percent ele_percent carbon_percent leña_percent
## 1 1 1 1