setwd("~/FINAL EAP2")
library(rio)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(magrittr)
## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
library(polycor)
library(psych)
## 
## Attaching package: 'psych'
## 
## The following object is masked from 'package:polycor':
## 
##     polyserial
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(matrixcalc)
library(GPArotation)
## 
## Attaching package: 'GPArotation'
## 
## The following objects are masked from 'package:psych':
## 
##     equamax, varimin
library(BBmisc)
## 
## Attaching package: 'BBmisc'
## 
## The following objects are masked from 'package:dplyr':
## 
##     coalesce, collapse, symdiff
## 
## The following object is masked from 'package:base':
## 
##     isFALSE
data_energia = import("DATA5_HOGARES.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
names(data_energia)
##  [1] "Título" "...2"   "...3"   "...4"   "...5"   "...6"   "...7"   "...8"  
##  [9] "...9"   "...10"  "...11"  "...12"  "...13"  "...14"  "...15"
##ELIMINAR FILAS
data_energia <- data_energia[-c(1:4), ]
##ELIMINAR COLUMNAS
data_energia <- data_energia[ , -c(1)]
data_energia = data_energia[-c(198:200),]
##PASAR DE CHARACTER A NUMERIC
data_energia <- data_energia %>%
  mutate(across(c(...4:...15), as.numeric))
## Warning: There were 12 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `across(c(...4:...15), as.numeric)`.
## Caused by warning:
## ! NAs introducidos por coerción
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 11 remaining warnings.
library(scraEP)
data_energia[,2]=sapply(data_energia[,2],unaccent)
data_energia[,2]=sapply(data_energia[,2],toupper)
names(data_energia)[1] = "UBIGEO"
names(data_energia)[2] = "PROVINCIA"
data_energia$PROVINCIA <- gsub("[A-ZÁÉÍÓÚÑ]+, PROVINCIA:\\s*", "", data_energia$PROVINCIA)
data_energia[data_energia$PROVINCIA=="MADRE DE DIOS PROV. DE TAMBOPATA","PROVINCIA"]="TAMBOPATA"
data_energia[data_energia$PROVINCIA=="MADRE DE DIOS PROV. DE MANU","PROVINCIA"]="MANU"
data_energia[data_energia$PROVINCIA=="MADRE DE DIOS PROV. DE TAHUAMANU","PROVINCIA"]="TAHUAMANU"
data_energia[data_energia$PROVINCIA=="PROVINCIA CONSTITUCIONAL DEL CALLAO","PROVINCIA"]="CALLAO"
##CAMBIAR LOS NOMBRES DE  LAS COLUMNAS
colnames(data_energia)[3] = "NO USA ELECTRICIDAD"
colnames(data_energia)[4] = "SI USA ELECTRICIDAD"
colnames(data_energia)[5] = "TOTAL.1"
colnames(data_energia)[6] = "NO USA GAS (BALON GLP)"
colnames(data_energia)[7] = "SI USA GAS (BALON GLP)"
colnames(data_energia)[8] = "TOTAL.2"
colnames(data_energia)[9] = "NO USA CARBON"
colnames(data_energia)[10] = "SI USA CARBON"
colnames(data_energia)[11] = "TOTAL.3"
colnames(data_energia)[12] = "NO USA LENA"
colnames(data_energia)[13] = "SI USA LENA"
colnames(data_energia)[14] = "TOTAL.4"
##ELIMINAR FILAS
data_energia <- data_energia[-c(1), ]

pregunta 1

# Calcular los porcentajes de los conteos positivos
data_energia <- data_energia %>%
  mutate(
    `% SI USA ELECTRICIDAD` = (`SI USA ELECTRICIDAD` / `TOTAL.1`) * 100,
    `% SI USA GAS (BALON GLP)` = (`SI USA GAS (BALON GLP)` / `TOTAL.2`) * 100,
    `% SI USA CARBON` = (`SI USA CARBON` / `TOTAL.3`) * 100,
    `% SI USA LENA` = (`SI USA LENA` / `TOTAL.4`) * 100
  )

# Verificar el resultado
head(data_energia)
##    UBIGEO            PROVINCIA NO USA ELECTRICIDAD SI USA ELECTRICIDAD TOTAL.1
## 6     101          CHACHAPOYAS               14763                 574   15337
## 7     102                BAGUA               20313                 161   20474
## 8     103              BONGARA                7689                 124    7813
## 9     104         CONDORCANQUI                9853                  14    9867
## 10    105                 LUYA               13112                  90   13202
## 11    106 RODRIGUEZ DE MENDOZA                9103                  65    9168
##    NO USA GAS (BALON GLP) SI USA GAS (BALON GLP) TOTAL.2 NO USA CARBON
## 6                    4696                  10641   15337         15161
## 7                   10557                   9917   20474         20185
## 8                    3154                   4659    7813          7755
## 9                    8331                   1536    9867          9841
## 10                   6863                   6339   13202         13169
## 11                   5387                   3781    9168          9142
##    SI USA CARBON TOTAL.3 NO USA LENA SI USA LENA TOTAL.4 % SI USA ELECTRICIDAD
## 6            176   15337        7236        8101   15337             3.7425833
## 7            289   20474        7357       13117   20474             0.7863632
## 8             58    7813        2345        5468    7813             1.5870984
## 9             26    9867        1059        8808    9867             0.1418871
## 10            33   13202        1833       11369   13202             0.6817149
## 11            26    9168        1824        7344    9168             0.7089878
##    % SI USA GAS (BALON GLP) % SI USA CARBON % SI USA LENA
## 6                  69.38123       1.1475517      52.81998
## 7                  48.43704       1.4115464      64.06662
## 8                  59.63138       0.7423525      69.98592
## 9                  15.56704       0.2635046      89.26725
## 10                 48.01545       0.2499621      86.11574
## 11                 41.24127       0.2835951      80.10471
data_energia_factorial <- data_energia %>%
  select(`% SI USA ELECTRICIDAD`, `% SI USA GAS (BALON GLP)`, `% SI USA CARBON`, `% SI USA LENA`)

# Calcular matriz de correlación
cor_matrix <- cor(data_energia_factorial, use = "pairwise.complete.obs")

# Prueba de esfericidad de Bartlett
cortest.bartlett(cor_matrix, n = nrow(data_energia_factorial))
## $chisq
## [1] 338.9104
## 
## $p.value
## [1] 3.704739e-70
## 
## $df
## [1] 6
# Índice KMO
KMO(cor_matrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = cor_matrix)
## Overall MSA =  0.63
## MSA for each item = 
##    % SI USA ELECTRICIDAD % SI USA GAS (BALON GLP)          % SI USA CARBON 
##                     0.91                     0.59                     0.72 
##            % SI USA LENA 
##                     0.59
library(psych)

# Realizar el análisis factorial con Varimax
efa_varimax <- fa(data_energia_factorial, nfactors = 1, rotate = "varimax", fm = "ml")

# Mostrar las cargas factoriales
efa_varimax$loadings
## 
## Loadings:
##                          ML1   
## % SI USA ELECTRICIDAD     0.493
## % SI USA GAS (BALON GLP)  0.912
## % SI USA CARBON           0.300
## % SI USA LENA            -0.949
## 
##                  ML1
## SS loadings    2.065
## Proportion Var 0.516
# Realizar el análisis factorial con Oblimin
efa_oblimin <- fa(data_energia_factorial, nfactors = 1, rotate = "oblimin", fm = "ml")

# Mostrar las cargas factoriales
efa_oblimin$loadings
## 
## Loadings:
##                          ML1   
## % SI USA ELECTRICIDAD     0.493
## % SI USA GAS (BALON GLP)  0.912
## % SI USA CARBON           0.300
## % SI USA LENA            -0.949
## 
##                  ML1
## SS loadings    2.065
## Proportion Var 0.516
efa_one_factor <- fa(data_energia_factorial, nfactors = 1, rotate = "none", fm = "ml")
efa_one_factor$Vaccounted
##                      ML1
## SS loadings    2.0651889
## Proportion Var 0.5162972