#paquetes
library(AER)
## Cargando paquete requerido: car
## Cargando paquete requerido: carData
## Cargando paquete requerido: lmtest
## Cargando paquete requerido: zoo
## 
## Adjuntando el paquete: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Cargando paquete requerido: sandwich
## Cargando paquete requerido: survival
library(rio)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(arm)
## Cargando paquete requerido: MASS
## 
## Adjuntando el paquete: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Cargando paquete requerido: Matrix
## Cargando paquete requerido: lme4
## 
## Adjuntando el paquete: 'lme4'
## The following object is masked from 'package:rio':
## 
##     factorize
## 
## arm (Version 1.14-4, built: 2024-4-1)
## Working directory is F:/PUCP/PUCP Sociales/Ciclo 6/Estadistica 2/Examen Final
## 
## Adjuntando el paquete: 'arm'
## The following object is masked from 'package:car':
## 
##     logit
library(Matrix)
library(modelsummary)
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
##   backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
## 
## Revert to `kableExtra` for one session:
## 
##   options(modelsummary_factory_default = 'kableExtra')
##   options(modelsummary_factory_latex = 'kableExtra')
##   options(modelsummary_factory_html = 'kableExtra')
## 
## Silence this message forever:
## 
##   config_modelsummary(startup_message = FALSE)
library(kableExtra)
## 
## Adjuntando el paquete: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(lme4)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.1     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::expand()          masks Matrix::expand()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ kableExtra::group_rows() masks dplyr::group_rows()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ tidyr::pack()            masks Matrix::pack()
## ✖ dplyr::recode()          masks car::recode()
## ✖ MASS::select()           masks dplyr::select()
## ✖ purrr::some()            masks car::some()
## ✖ tidyr::unpack()          masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Pregunta 1

data1 = import("data1_final.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
names(data1)=data1[5,]   #cambio el número según la fila
head(data1,10)%>%
    rmarkdown::paged_table()
data1=data1[-c(1:5),]  #borrar filas, de la primera a la quinta

head(data1,10)%>%
    rmarkdown::paged_table()
data1 <- data1[, -1]   #borrar columna específica (la 1)
data1 <- data1 %>%
  mutate(DEPARTAMENTO = trimws(substring(Provincia, 1, regexpr(",", Provincia) - 1)),
         PROVINCIA = trimws(substring(Provincia, regexpr(":", Provincia) + 1)))
# Eliminar columnas por su posición
data1 <- data1[, -c(1, 2)]
names(data1)
##  [1] "No usa electricidad"    "Sí usa electricidad"    "Total"                 
##  [4] "No usa gas (balón GLP)" "Sí usa gas (balón GLP)" "Total.1"               
##  [7] "No usa carbón"          "Sí usa carbón"          "Total.2"               
## [10] "No usa leña"            "Sí usa leña"            "Total.3"               
## [13] "DEPARTAMENTO"           "PROVINCIA"
names(data1)[2]='Electricidad_si'
names(data1)[5]='Gas_si'
names(data1)[8]='Carbon_si'
names(data1)[11]='Leña_si'
names(data1)
##  [1] "No usa electricidad"    "Electricidad_si"        "Total"                 
##  [4] "No usa gas (balón GLP)" "Gas_si"                 "Total.1"               
##  [7] "No usa carbón"          "Carbon_si"              "Total.2"               
## [10] "No usa leña"            "Leña_si"                "Total.3"               
## [13] "DEPARTAMENTO"           "PROVINCIA"
str(data1)
## 'data.frame':    199 obs. of  14 variables:
##  $ No usa electricidad   : chr  "14763" "20313" "7689" "9853" ...
##  $ Electricidad_si       : chr  "574" "161" "124" "14" ...
##  $ Total                 : chr  "15337" "20474" "7813" "9867" ...
##  $ No usa gas (balón GLP): chr  "4696" "10557" "3154" "8331" ...
##  $ Gas_si                : chr  "10641" "9917" "4659" "1536" ...
##  $ Total.1               : chr  "15337" "20474" "7813" "9867" ...
##  $ No usa carbón         : chr  "15161" "20185" "7755" "9841" ...
##  $ Carbon_si             : chr  "176" "289" "58" "26" ...
##  $ Total.2               : chr  "15337" "20474" "7813" "9867" ...
##  $ No usa leña           : chr  "7236" "7357" "2345" "1059" ...
##  $ Leña_si               : chr  "8101" "13117" "5468" "8808" ...
##  $ Total.3               : chr  "15337" "20474" "7813" "9867" ...
##  $ DEPARTAMENTO          : chr  "Amazonas" "Amazonas" "Amazonas" "Amazonas" ...
##  $ PROVINCIA             : chr  "Chachapoyas" "Bagua" "Bongara" "Condorcanqui" ...
data1$Electricidad_si = as.numeric(data1$Electricidad_si)
data1$Total = as.numeric(data1$Total)
data1$Gas_si = as.numeric(data1$Gas_si)
data1$Total.1 = as.numeric(data1$Total.1)
data1$Carbon_si = as.numeric(data1$Carbon_si)
data1$Total.2 = as.numeric(data1$Total.2)
data1$Leña_si = as.numeric(data1$Leña_si)
data1$Total.3 = as.numeric(data1$Total.3)
str(data1)
## 'data.frame':    199 obs. of  14 variables:
##  $ No usa electricidad   : chr  "14763" "20313" "7689" "9853" ...
##  $ Electricidad_si       : num  574 161 124 14 90 65 255 921 16 33 ...
##  $ Total                 : num  15337 20474 7813 9867 13202 ...
##  $ No usa gas (balón GLP): chr  "4696" "10557" "3154" "8331" ...
##  $ Gas_si                : num  10641 9917 4659 1536 6339 ...
##  $ Total.1               : num  15337 20474 7813 9867 13202 ...
##  $ No usa carbón         : chr  "15161" "20185" "7755" "9841" ...
##  $ Carbon_si             : num  176 289 58 26 33 26 335 218 4 4 ...
##  $ Total.2               : num  15337 20474 7813 9867 13202 ...
##  $ No usa leña           : chr  "7236" "7357" "2345" "1059" ...
##  $ Leña_si               : num  8101 13117 5468 8808 11369 ...
##  $ Total.3               : num  15337 20474 7813 9867 13202 ...
##  $ DEPARTAMENTO          : chr  "Amazonas" "Amazonas" "Amazonas" "Amazonas" ...
##  $ PROVINCIA             : chr  "Chachapoyas" "Bagua" "Bongara" "Condorcanqui" ...
data1$Electricidad_pct = data1$Electricidad_si/data1$Total
data1$Gas_pct = data1$Gas_si/data1$Total.1
data1$Carbon_pct = data1$Carbon_si/data1$Total.2
data1$Leña_pct = data1$Leña_si/data1$Total.3
names(data1)
##  [1] "No usa electricidad"    "Electricidad_si"        "Total"                 
##  [4] "No usa gas (balón GLP)" "Gas_si"                 "Total.1"               
##  [7] "No usa carbón"          "Carbon_si"              "Total.2"               
## [10] "No usa leña"            "Leña_si"                "Total.3"               
## [13] "DEPARTAMENTO"           "PROVINCIA"              "Electricidad_pct"      
## [16] "Gas_pct"                "Carbon_pct"             "Leña_pct"
data1_nueva <- data1[, c("Electricidad_pct","Gas_pct","Carbon_pct", "Leña_pct")]  
library(polycor)
corMatrix=polycor::hetcor(data1_nueva)$correlations
round(corMatrix,2)
##                  Electricidad_pct Gas_pct Carbon_pct Leña_pct
## Electricidad_pct             1.00    0.47       0.10    -0.46
## Gas_pct                      0.47    1.00       0.23    -0.87
## Carbon_pct                   0.10    0.23       1.00    -0.31
## Leña_pct                    -0.46   -0.87      -0.31     1.00
library(ggcorrplot)

ggcorrplot(corMatrix)

Verificamos si permite factorizar

library(psych)
## 
## Adjuntando el paquete: 'psych'
## The following object is masked from 'package:polycor':
## 
##     polyserial
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## The following object is masked from 'package:modelsummary':
## 
##     SD
## The following objects are masked from 'package:arm':
## 
##     logit, rescale, sim
## The following object is masked from 'package:car':
## 
##     logit
psych::KMO(corMatrix) 
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA =  0.63
## MSA for each item = 
## Electricidad_pct          Gas_pct       Carbon_pct         Leña_pct 
##             0.91             0.59             0.72             0.59

Prueba Matriz identidad

cortest.bartlett(corMatrix,n=nrow(data1_nueva))$p.value>0.05
## [1] FALSE

Prueba Matriz singular

library(matrixcalc)

is.singular.matrix(corMatrix)
## [1] FALSE

En cuantos factores podríamos dimensionar

fa.parallel(data1_nueva, fa = 'fa',correct = T,plot = F)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Parallel analysis suggests that the number of factors =  1  and the number of components =  NA

Nos señala que en uno, (pero tener cuidado)

Varimax

library(GPArotation)
## 
## Adjuntando el paquete: 'GPArotation'
## The following objects are masked from 'package:psych':
## 
##     equamax, varimin
resfa1 <- fa(data1_nueva,
            nfactors = 1,
            cor = 'mixed',
            rotate = "varimax", #oblimin?
            fm="minres")
print(resfa1$loadings)
## 
## Loadings:
##                  MR1   
## Electricidad_pct  0.491
## Gas_pct           0.912
## Carbon_pct        0.280
## Leña_pct         -0.956
## 
##                  MR1
## SS loadings    2.065
## Proportion Var 0.516
print(resfa1$loadings,cutoff = 0.5)
## 
## Loadings:
##                  MR1   
## Electricidad_pct       
## Gas_pct           0.912
## Carbon_pct             
## Leña_pct         -0.956
## 
##                  MR1
## SS loadings    2.065
## Proportion Var 0.516

Oblimin

library(GPArotation)
resfa2 <- fa(data1_nueva,
            nfactors = 1,
            cor = 'mixed',
            rotate = "oblimin", #oblimin?
            fm="minres")
print(resfa2$loadings)
## 
## Loadings:
##                  MR1   
## Electricidad_pct  0.491
## Gas_pct           0.912
## Carbon_pct        0.280
## Leña_pct         -0.956
## 
##                  MR1
## SS loadings    2.065
## Proportion Var 0.516
print(resfa2$loadings,cutoff = 0.5)
## 
## Loadings:
##                  MR1   
## Electricidad_pct       
## Gas_pct           0.912
## Carbon_pct             
## Leña_pct         -0.956
## 
##                  MR1
## SS loadings    2.065
## Proportion Var 0.516

Rpta: sí es viable resumirlo en un factor