library(rio)
## Warning: package 'rio' was built under R version 4.3.3
data1=import("reporte.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
data1 <- data1[-c(1:4,202:204), ]
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
colnames(data1) <- as.character(unlist(data1[1,]))
data1 <- data1[-c(1), ]
data1<- data1[, -1]
# Separar la columna ubicacion en departamento y provincia
data1$DEPARTAMENTO <- gsub(", provincia: .*", "", data1$Provincia)
# Para el formato "Departamento, provincia: Provincia"
data1$DEPARTAMENTO <- sub(" prov\\. de .*", "", data1$DEPARTAMENTO)
# Para el formato "Departamento prov. de Provincia"
data1$PROVINCIA <- sub(".* prov\\. de ", "", data1$Provincia)
# Para el formato "Departamento prov. de Provincia"
data1$PROVINCIA <- sub(".*, provincia: ", "", data1$PROVINCIA)
# Para el formato "Departamento, provincia: Provincia"
para crear los porcentajes
str(data1)
## 'data.frame': 196 obs. of 16 variables:
## $ Código : chr "101" "102" "103" "104" ...
## $ Provincia : chr "Amazonas, provincia: Chachapoyas" "Amazonas, provincia: Bagua" "Amazonas, provincia: Bongara" "Amazonas, provincia: Condorcanqui" ...
## $ No usa electricidad : chr "14763" "20313" "7689" "9853" ...
## $ Sí usa electricidad : chr "574" "161" "124" "14" ...
## $ Total : chr "15337" "20474" "7813" "9867" ...
## $ No usa gas (balón GLP): chr "4696" "10557" "3154" "8331" ...
## $ Sí usa gas (balón GLP): chr "10641" "9917" "4659" "1536" ...
## $ Total.1 : chr "15337" "20474" "7813" "9867" ...
## $ No usa carbón : chr "15161" "20185" "7755" "9841" ...
## $ Sí usa carbón : chr "176" "289" "58" "26" ...
## $ Total.2 : chr "15337" "20474" "7813" "9867" ...
## $ No usa leña : chr "7236" "7357" "2345" "1059" ...
## $ Sí usa leña : chr "8101" "13117" "5468" "8808" ...
## $ Total.3 : chr "15337" "20474" "7813" "9867" ...
## $ DEPARTAMENTO : chr "Amazonas" "Amazonas" "Amazonas" "Amazonas" ...
## $ PROVINCIA : chr "Chachapoyas" "Bagua" "Bongara" "Condorcanqui" ...
data1[, c(3:14)] <- lapply(data1[, c(3:14)], as.numeric)
str(data1)
## 'data.frame': 196 obs. of 16 variables:
## $ Código : chr "101" "102" "103" "104" ...
## $ Provincia : chr "Amazonas, provincia: Chachapoyas" "Amazonas, provincia: Bagua" "Amazonas, provincia: Bongara" "Amazonas, provincia: Condorcanqui" ...
## $ No usa electricidad : num 14763 20313 7689 9853 13112 ...
## $ Sí usa electricidad : num 574 161 124 14 90 65 255 921 16 33 ...
## $ Total : num 15337 20474 7813 9867 13202 ...
## $ No usa gas (balón GLP): num 4696 10557 3154 8331 6863 ...
## $ Sí usa gas (balón GLP): num 10641 9917 4659 1536 6339 ...
## $ Total.1 : num 15337 20474 7813 9867 13202 ...
## $ No usa carbón : num 15161 20185 7755 9841 13169 ...
## $ Sí usa carbón : num 176 289 58 26 33 26 335 218 4 4 ...
## $ Total.2 : num 15337 20474 7813 9867 13202 ...
## $ No usa leña : num 7236 7357 2345 1059 1833 ...
## $ Sí usa leña : num 8101 13117 5468 8808 11369 ...
## $ Total.3 : num 15337 20474 7813 9867 13202 ...
## $ DEPARTAMENTO : chr "Amazonas" "Amazonas" "Amazonas" "Amazonas" ...
## $ PROVINCIA : chr "Chachapoyas" "Bagua" "Bongara" "Condorcanqui" ...
crear porcentajes
data1$Electricidad_pct=data1$'Sí usa electricidad' /data1$Total
data1$gas_pct=data1$'Sí usa gas (balón GLP)' /data1$Total.1
data1$carbon_pct=data1$'Sí usa carbón' /data1$Total.2
data1$lena_pct=data1$'Sí usa leña' /data1$Total.3
seleccionamos la data
factor=c('Electricidad_pct','gas_pct','carbon_pct','lena_pct')
datafactor=data1[,factor]
library(polycor)
## Warning: package 'polycor' was built under R version 4.3.3
corMatrix=polycor::hetcor(datafactor)$correlations
library(psych)
## Warning: package 'psych' was built under R version 4.3.3
##
## Attaching package: 'psych'
## The following object is masked from 'package:polycor':
##
## polyserial
psych::KMO(corMatrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA = 0.63
## MSA for each item =
## Electricidad_pct gas_pct carbon_pct lena_pct
## 0.91 0.59 0.72 0.59
cortest.bartlett(corMatrix,n=nrow(datafactor))$p.value>0.05
## [1] FALSE
library(matrixcalc)
is.singular.matrix(corMatrix)
## [1] FALSE
fa.parallel(datafactor, fa = 'fa',correct = T,plot = F)
## Parallel analysis suggests that the number of factors = 1 and the number of components = NA
con varimax
library(GPArotation)
## Warning: package 'GPArotation' was built under R version 4.3.3
##
## Attaching package: 'GPArotation'
## The following objects are masked from 'package:psych':
##
## equamax, varimin
varimax <- fa(datafactor,
nfactors = 1,
cor = 'mixed',
rotate = "varimax",
fm="minres")
print(varimax$loadings)
##
## Loadings:
## MR1
## Electricidad_pct 0.491
## gas_pct 0.912
## carbon_pct 0.280
## lena_pct -0.956
##
## MR1
## SS loadings 2.065
## Proportion Var 0.516
varimax$TLI
## [1] 0.9607796
varimax$rms
## [1] 0.03000416
library(GPArotation)
oblimin <- fa(datafactor,
nfactors = 1,
cor = 'mixed',
rotate = "oblimin",
fm="minres")
print(oblimin$loadings)
##
## Loadings:
## MR1
## Electricidad_pct 0.491
## gas_pct 0.912
## carbon_pct 0.280
## lena_pct -0.956
##
## MR1
## SS loadings 2.065
## Proportion Var 0.516
oblimin$TLI
## [1] 0.9607796
oblimin$rms
## [1] 0.03000416