#paquetes
library(AER)
## Cargando paquete requerido: car
## Cargando paquete requerido: carData
## Cargando paquete requerido: lmtest
## Cargando paquete requerido: zoo
##
## Adjuntando el paquete: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Cargando paquete requerido: sandwich
## Cargando paquete requerido: survival
library(rio)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(arm)
## Cargando paquete requerido: MASS
##
## Adjuntando el paquete: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Cargando paquete requerido: Matrix
## Cargando paquete requerido: lme4
##
## Adjuntando el paquete: 'lme4'
## The following object is masked from 'package:rio':
##
## factorize
##
## arm (Version 1.14-4, built: 2024-4-1)
## Working directory is F:/PUCP/PUCP Sociales/Ciclo 6/Estadistica 2/Examen Final
##
## Adjuntando el paquete: 'arm'
## The following object is masked from 'package:car':
##
## logit
library(Matrix)
library(modelsummary)
## `modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
## backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
##
## Revert to `kableExtra` for one session:
##
## options(modelsummary_factory_default = 'kableExtra')
## options(modelsummary_factory_latex = 'kableExtra')
## options(modelsummary_factory_html = 'kableExtra')
##
## Silence this message forever:
##
## config_modelsummary(startup_message = FALSE)
library(kableExtra)
##
## Adjuntando el paquete: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(lme4)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.1 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ kableExtra::group_rows() masks dplyr::group_rows()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tidyr::pack() masks Matrix::pack()
## ✖ dplyr::recode() masks car::recode()
## ✖ MASS::select() masks dplyr::select()
## ✖ purrr::some() masks car::some()
## ✖ tidyr::unpack() masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data1 = import("data1_final.xlsx")
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
names(data1)=data1[5,] #cambio el número según la fila
head(data1,10)%>%
rmarkdown::paged_table()
data1=data1[-c(1:5),] #borrar filas, de la primera a la quinta
head(data1,10)%>%
rmarkdown::paged_table()
data1 <- data1[, -1] #borrar columna específica (la 1)
data1 <- data1 %>%
mutate(DEPARTAMENTO = trimws(substring(Provincia, 1, regexpr(",", Provincia) - 1)),
PROVINCIA = trimws(substring(Provincia, regexpr(":", Provincia) + 1)))
# Eliminar columnas por su posición
data1 <- data1[, -c(1, 2)]
names(data1)
## [1] "No usa electricidad" "Sí usa electricidad" "Total"
## [4] "No usa gas (balón GLP)" "Sí usa gas (balón GLP)" "Total.1"
## [7] "No usa carbón" "Sí usa carbón" "Total.2"
## [10] "No usa leña" "Sí usa leña" "Total.3"
## [13] "DEPARTAMENTO" "PROVINCIA"
names(data1)[2]='Electricidad_si'
names(data1)[5]='Gas_si'
names(data1)[8]='Carbon_si'
names(data1)[11]='Leña_si'
names(data1)
## [1] "No usa electricidad" "Electricidad_si" "Total"
## [4] "No usa gas (balón GLP)" "Gas_si" "Total.1"
## [7] "No usa carbón" "Carbon_si" "Total.2"
## [10] "No usa leña" "Leña_si" "Total.3"
## [13] "DEPARTAMENTO" "PROVINCIA"
str(data1)
## 'data.frame': 199 obs. of 14 variables:
## $ No usa electricidad : chr "14763" "20313" "7689" "9853" ...
## $ Electricidad_si : chr "574" "161" "124" "14" ...
## $ Total : chr "15337" "20474" "7813" "9867" ...
## $ No usa gas (balón GLP): chr "4696" "10557" "3154" "8331" ...
## $ Gas_si : chr "10641" "9917" "4659" "1536" ...
## $ Total.1 : chr "15337" "20474" "7813" "9867" ...
## $ No usa carbón : chr "15161" "20185" "7755" "9841" ...
## $ Carbon_si : chr "176" "289" "58" "26" ...
## $ Total.2 : chr "15337" "20474" "7813" "9867" ...
## $ No usa leña : chr "7236" "7357" "2345" "1059" ...
## $ Leña_si : chr "8101" "13117" "5468" "8808" ...
## $ Total.3 : chr "15337" "20474" "7813" "9867" ...
## $ DEPARTAMENTO : chr "Amazonas" "Amazonas" "Amazonas" "Amazonas" ...
## $ PROVINCIA : chr "Chachapoyas" "Bagua" "Bongara" "Condorcanqui" ...
data1$Electricidad_si = as.numeric(data1$Electricidad_si)
data1$Total = as.numeric(data1$Total)
data1$Gas_si = as.numeric(data1$Gas_si)
data1$Total.1 = as.numeric(data1$Total.1)
data1$Carbon_si = as.numeric(data1$Carbon_si)
data1$Total.2 = as.numeric(data1$Total.2)
data1$Leña_si = as.numeric(data1$Leña_si)
data1$Total.3 = as.numeric(data1$Total.3)
str(data1)
## 'data.frame': 199 obs. of 14 variables:
## $ No usa electricidad : chr "14763" "20313" "7689" "9853" ...
## $ Electricidad_si : num 574 161 124 14 90 65 255 921 16 33 ...
## $ Total : num 15337 20474 7813 9867 13202 ...
## $ No usa gas (balón GLP): chr "4696" "10557" "3154" "8331" ...
## $ Gas_si : num 10641 9917 4659 1536 6339 ...
## $ Total.1 : num 15337 20474 7813 9867 13202 ...
## $ No usa carbón : chr "15161" "20185" "7755" "9841" ...
## $ Carbon_si : num 176 289 58 26 33 26 335 218 4 4 ...
## $ Total.2 : num 15337 20474 7813 9867 13202 ...
## $ No usa leña : chr "7236" "7357" "2345" "1059" ...
## $ Leña_si : num 8101 13117 5468 8808 11369 ...
## $ Total.3 : num 15337 20474 7813 9867 13202 ...
## $ DEPARTAMENTO : chr "Amazonas" "Amazonas" "Amazonas" "Amazonas" ...
## $ PROVINCIA : chr "Chachapoyas" "Bagua" "Bongara" "Condorcanqui" ...
data1$Electricidad_pct = data1$Electricidad_si/data1$Total
data1$Gas_pct = data1$Gas_si/data1$Total.1
data1$Carbon_pct = data1$Carbon_si/data1$Total.2
data1$Leña_pct = data1$Leña_si/data1$Total.3
names(data1)
## [1] "No usa electricidad" "Electricidad_si" "Total"
## [4] "No usa gas (balón GLP)" "Gas_si" "Total.1"
## [7] "No usa carbón" "Carbon_si" "Total.2"
## [10] "No usa leña" "Leña_si" "Total.3"
## [13] "DEPARTAMENTO" "PROVINCIA" "Electricidad_pct"
## [16] "Gas_pct" "Carbon_pct" "Leña_pct"
data1_nueva <- data1[, c("Electricidad_pct","Gas_pct","Carbon_pct", "Leña_pct")]
library(polycor)
corMatrix=polycor::hetcor(data1_nueva)$correlations
round(corMatrix,2)
## Electricidad_pct Gas_pct Carbon_pct Leña_pct
## Electricidad_pct 1.00 0.47 0.10 -0.46
## Gas_pct 0.47 1.00 0.23 -0.87
## Carbon_pct 0.10 0.23 1.00 -0.31
## Leña_pct -0.46 -0.87 -0.31 1.00
library(ggcorrplot)
ggcorrplot(corMatrix)
library(psych)
##
## Adjuntando el paquete: 'psych'
## The following object is masked from 'package:polycor':
##
## polyserial
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## The following object is masked from 'package:modelsummary':
##
## SD
## The following objects are masked from 'package:arm':
##
## logit, rescale, sim
## The following object is masked from 'package:car':
##
## logit
psych::KMO(corMatrix)
## Kaiser-Meyer-Olkin factor adequacy
## Call: psych::KMO(r = corMatrix)
## Overall MSA = 0.63
## MSA for each item =
## Electricidad_pct Gas_pct Carbon_pct Leña_pct
## 0.91 0.59 0.72 0.59
cortest.bartlett(corMatrix,n=nrow(data1_nueva))$p.value>0.05
## [1] FALSE
library(matrixcalc)
is.singular.matrix(corMatrix)
## [1] FALSE
fa.parallel(data1_nueva, fa = 'fa',correct = T,plot = F)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Parallel analysis suggests that the number of factors = 1 and the number of components = NA
Nos señala que en uno, (pero tener cuidado)
library(GPArotation)
##
## Adjuntando el paquete: 'GPArotation'
## The following objects are masked from 'package:psych':
##
## equamax, varimin
resfa1 <- fa(data1_nueva,
nfactors = 1,
cor = 'mixed',
rotate = "varimax", #oblimin?
fm="minres")
print(resfa1$loadings)
##
## Loadings:
## MR1
## Electricidad_pct 0.491
## Gas_pct 0.912
## Carbon_pct 0.280
## Leña_pct -0.956
##
## MR1
## SS loadings 2.065
## Proportion Var 0.516
print(resfa1$loadings,cutoff = 0.5)
##
## Loadings:
## MR1
## Electricidad_pct
## Gas_pct 0.912
## Carbon_pct
## Leña_pct -0.956
##
## MR1
## SS loadings 2.065
## Proportion Var 0.516
library(GPArotation)
resfa2 <- fa(data1_nueva,
nfactors = 1,
cor = 'mixed',
rotate = "oblimin", #oblimin?
fm="minres")
print(resfa2$loadings)
##
## Loadings:
## MR1
## Electricidad_pct 0.491
## Gas_pct 0.912
## Carbon_pct 0.280
## Leña_pct -0.956
##
## MR1
## SS loadings 2.065
## Proportion Var 0.516
print(resfa2$loadings,cutoff = 0.5)
##
## Loadings:
## MR1
## Electricidad_pct
## Gas_pct 0.912
## Carbon_pct
## Leña_pct -0.956
##
## MR1
## SS loadings 2.065
## Proportion Var 0.516