setwd("~/FINAL EAP2")
library(rio)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(magrittr)
##
## Attaching package: 'magrittr'
##
## The following object is masked from 'package:purrr':
##
## set_names
##
## The following object is masked from 'package:tidyr':
##
## extract
library(polycor)
library(psych)
##
## Attaching package: 'psych'
##
## The following object is masked from 'package:polycor':
##
## polyserial
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(matrixcalc)
library(GPArotation)
##
## Attaching package: 'GPArotation'
##
## The following objects are masked from 'package:psych':
##
## equamax, varimin
library(BBmisc)
##
## Attaching package: 'BBmisc'
##
## The following objects are masked from 'package:dplyr':
##
## coalesce, collapse, symdiff
##
## The following object is masked from 'package:base':
##
## isFALSE
data_energia = import("DATA5_HOGARES.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
names(data_energia)
## [1] "Título" "...2" "...3" "...4" "...5" "...6" "...7" "...8"
## [9] "...9" "...10" "...11" "...12" "...13" "...14" "...15"
##ELIMINAR FILAS
data_energia <- data_energia[-c(1:4), ]
##ELIMINAR COLUMNAS
data_energia <- data_energia[ , -c(1)]
data_energia = data_energia[-c(198:200),]
##PASAR DE CHARACTER A NUMERIC
data_energia <- data_energia %>%
mutate(across(c(...4:...15), as.numeric))
## Warning: There were 12 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `across(c(...4:...15), as.numeric)`.
## Caused by warning:
## ! NAs introducidos por coerción
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 11 remaining warnings.
library(scraEP)
data_energia[,2]=sapply(data_energia[,2],unaccent)
data_energia[,2]=sapply(data_energia[,2],toupper)
names(data_energia)[1] = "UBIGEO"
names(data_energia)[2] = "PROVINCIA"
data_energia$PROVINCIA <- gsub("[A-ZÁÉÍÓÚÑ]+, PROVINCIA:\\s*", "", data_energia$PROVINCIA)
data_energia[data_energia$PROVINCIA=="MADRE DE DIOS PROV. DE TAMBOPATA","PROVINCIA"]="TAMBOPATA"
data_energia[data_energia$PROVINCIA=="MADRE DE DIOS PROV. DE MANU","PROVINCIA"]="MANU"
data_energia[data_energia$PROVINCIA=="MADRE DE DIOS PROV. DE TAHUAMANU","PROVINCIA"]="TAHUAMANU"
data_energia[data_energia$PROVINCIA=="PROVINCIA CONSTITUCIONAL DEL CALLAO","PROVINCIA"]="CALLAO"
##CAMBIAR LOS NOMBRES DE LAS COLUMNAS
colnames(data_energia)[3] = "NO USA ELECTRICIDAD"
colnames(data_energia)[4] = "SI USA ELECTRICIDAD"
colnames(data_energia)[5] = "TOTAL.1"
colnames(data_energia)[6] = "NO USA GAS (BALON GLP)"
colnames(data_energia)[7] = "SI USA GAS (BALON GLP)"
colnames(data_energia)[8] = "TOTAL.2"
colnames(data_energia)[9] = "NO USA CARBON"
colnames(data_energia)[10] = "SI USA CARBON"
colnames(data_energia)[11] = "TOTAL.3"
colnames(data_energia)[12] = "NO USA LENA"
colnames(data_energia)[13] = "SI USA LENA"
colnames(data_energia)[14] = "TOTAL.4"
##ELIMINAR FILAS
data_energia <- data_energia[-c(1), ]
pregunta 1
# Calcular los porcentajes de los conteos positivos
data_energia <- data_energia %>%
mutate(
`% SI USA ELECTRICIDAD` = (`SI USA ELECTRICIDAD` / `TOTAL.1`) * 100,
`% SI USA GAS (BALON GLP)` = (`SI USA GAS (BALON GLP)` / `TOTAL.2`) * 100,
`% SI USA CARBON` = (`SI USA CARBON` / `TOTAL.3`) * 100,
`% SI USA LENA` = (`SI USA LENA` / `TOTAL.4`) * 100
)
# Verificar el resultado
head(data_energia)
## UBIGEO PROVINCIA NO USA ELECTRICIDAD SI USA ELECTRICIDAD TOTAL.1
## 6 101 CHACHAPOYAS 14763 574 15337
## 7 102 BAGUA 20313 161 20474
## 8 103 BONGARA 7689 124 7813
## 9 104 CONDORCANQUI 9853 14 9867
## 10 105 LUYA 13112 90 13202
## 11 106 RODRIGUEZ DE MENDOZA 9103 65 9168
## NO USA GAS (BALON GLP) SI USA GAS (BALON GLP) TOTAL.2 NO USA CARBON
## 6 4696 10641 15337 15161
## 7 10557 9917 20474 20185
## 8 3154 4659 7813 7755
## 9 8331 1536 9867 9841
## 10 6863 6339 13202 13169
## 11 5387 3781 9168 9142
## SI USA CARBON TOTAL.3 NO USA LENA SI USA LENA TOTAL.4 % SI USA ELECTRICIDAD
## 6 176 15337 7236 8101 15337 3.7425833
## 7 289 20474 7357 13117 20474 0.7863632
## 8 58 7813 2345 5468 7813 1.5870984
## 9 26 9867 1059 8808 9867 0.1418871
## 10 33 13202 1833 11369 13202 0.6817149
## 11 26 9168 1824 7344 9168 0.7089878
## % SI USA GAS (BALON GLP) % SI USA CARBON % SI USA LENA
## 6 69.38123 1.1475517 52.81998
## 7 48.43704 1.4115464 64.06662
## 8 59.63138 0.7423525 69.98592
## 9 15.56704 0.2635046 89.26725
## 10 48.01545 0.2499621 86.11574
## 11 41.24127 0.2835951 80.10471
data_energia_factorial <- data_energia %>%
select(`% SI USA ELECTRICIDAD`, `% SI USA GAS (BALON GLP)`, `% SI USA CARBON`, `% SI USA LENA`)
# Calcular matriz de correlación
cor_matrix <- cor(data_energia_factorial, use = "pairwise.complete.obs")
# Prueba de esfericidad de Bartlett
cortest.bartlett(cor_matrix, n = nrow(data_energia_factorial))
## $chisq
## [1] 338.9104
##
## $p.value
## [1] 3.704739e-70
##
## $df
## [1] 6
# Índice KMO
KMO(cor_matrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = cor_matrix)
## Overall MSA = 0.63
## MSA for each item =
## % SI USA ELECTRICIDAD % SI USA GAS (BALON GLP) % SI USA CARBON
## 0.91 0.59 0.72
## % SI USA LENA
## 0.59
library(psych)
# Realizar el análisis factorial con Varimax
efa_varimax <- fa(data_energia_factorial, nfactors = 1, rotate = "varimax", fm = "ml")
# Mostrar las cargas factoriales
efa_varimax$loadings
##
## Loadings:
## ML1
## % SI USA ELECTRICIDAD 0.493
## % SI USA GAS (BALON GLP) 0.912
## % SI USA CARBON 0.300
## % SI USA LENA -0.949
##
## ML1
## SS loadings 2.065
## Proportion Var 0.516
# Realizar el análisis factorial con Oblimin
efa_oblimin <- fa(data_energia_factorial, nfactors = 1, rotate = "oblimin", fm = "ml")
# Mostrar las cargas factoriales
efa_oblimin$loadings
##
## Loadings:
## ML1
## % SI USA ELECTRICIDAD 0.493
## % SI USA GAS (BALON GLP) 0.912
## % SI USA CARBON 0.300
## % SI USA LENA -0.949
##
## ML1
## SS loadings 2.065
## Proportion Var 0.516
efa_one_factor <- fa(data_energia_factorial, nfactors = 1, rotate = "none", fm = "ml")
efa_one_factor$Vaccounted
## ML1
## SS loadings 2.0651889
## Proportion Var 0.5162972