#SUBIR EL MÓDILO 100 Y 612 ##MIDES LA VARIABLE DEPENDIENTE Y EMPIEZAS A SOLO TENER LAS VARIABLES ÚTILES, LUEGO EXPORTAMOS LA BASE DE DATOS.
library(rio)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
## Warning: package 'stringr' was built under R version 4.4.3
library(magrittr)
library(readr)
## Warning: package 'readr' was built under R version 4.4.2
library(rvest)
##
## Adjuntando el paquete: 'rvest'
## The following object is masked from 'package:readr':
##
## guess_encoding
library(knitr)
library(modelsummary)
## Warning: package 'modelsummary' was built under R version 4.4.2
library(arm)
## Warning: package 'arm' was built under R version 4.4.2
## Cargando paquete requerido: MASS
## Warning: package 'MASS' was built under R version 4.4.2
##
## Adjuntando el paquete: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Cargando paquete requerido: Matrix
## Cargando paquete requerido: lme4
##
## Adjuntando el paquete: 'lme4'
## The following object is masked from 'package:rio':
##
## factorize
##
## arm (Version 1.14-4, built: 2024-4-1)
## Working directory is C:/Users/G ALEJANDRA ROJAS/Documents/ESTAPOL2/PROYECTO TF
library(magrittr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.2
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.4.2
#AHORA LUEGO DE “LIMPIEZA 1_TF”, PASAREMOS A CREAR LA VARIABLE DEPENDIENTE:
#CARGAMOS BD:
data= "https://raw.githubusercontent.com/GeraldyAle16/Estapol_2/refs/heads/main/Basegeneral.csv"
bdpe=import(data)
str(bdpe)
## 'data.frame': 1359 obs. of 26 variables:
## $ CONGLOME : int 18636 18636 18636 18636 18644 18581 18576 18576 18599 18598 ...
## $ VIVIENDA : int 28 64 82 100 46 113 120 120 86 149 ...
## $ HOGAR : int 11 11 11 11 11 11 12 22 11 11 ...
## $ UBIGEO : int 160101 160101 160101 160101 160101 160101 160101 160101 160101 160101 ...
## $ P1131 : int 0 1 1 0 1 0 0 0 0 0 ...
## $ P1132 : int 1 1 1 1 0 1 0 1 1 1 ...
## $ P1133 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ P1135 : int 1 1 1 1 1 1 1 1 1 0 ...
## $ P1136 : int 0 0 0 0 1 0 0 0 0 0 ...
## $ P1137 : int 0 0 0 0 0 0 0 1 0 0 ...
## $ P1139 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ P1121 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ P1141 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ P1142 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ P101 : int 1 1 1 1 1 1 1 8 1 1 ...
## $ P102 : int 1 1 1 1 7 1 1 9 1 1 ...
## $ P1172$02 : int 97 164 213 10 35 97 15 20 72 78 ...
## $ radio : int 2 2 2 2 1 2 1 1 2 1 ...
## $ tv_color : int 1 1 1 2 2 1 2 1 1 1 ...
## $ tv_bn : int 2 2 2 2 2 2 2 2 2 2 ...
## $ refrigeradora: int 1 1 1 2 2 1 2 1 1 1 ...
## $ P207 : int 2 1 2 2 1 1 1 1 2 1 ...
## $ P208A : int 52 60 58 58 85 38 58 36 40 58 ...
## $ P301A : int 8 10 10 8 9 10 7 7 8 6 ...
## $ MIEPERHO : int 2 4 7 3 3 4 4 6 4 2 ...
## $ INGHOG2D : num 40074 186510 154248 42930 12586 ...
#CONSTRUIMOS VARIABLE DEPENDIENTE: POBREZA ENERGÉTICA: POBRE O NO POBRE ENERGÉTICO
#CALCULAMOS EL NIVEL DE PRIVACIÓN DE ACUERDO A las 5 dimensiones del MEPI
datos = bdpe %>%
mutate(
privacion_cocina = ifelse(P1135 == 1 | P1136 == 1 | P1137 == 1 | P1139 == 1, 1,0 ),
privacion_ilumina = ifelse( P1121 == 1, 0, 1),
privacion_comuni = ifelse( P1141 == 1 | P1142 == 1, 0,1),
privacion_electro = ifelse( refrigeradora == 1, 0, 1),
privacion_entreteni = ifelse ( radio == 1 | tv_color == 1 | tv_bn == 1, 0, 1)
)
#CALCULAMOS PUNTAJE DE PRIVACION CON SUS PESOS (DEL MEPI)
datos = datos %>%
mutate( puntaje = ( 0.40 * privacion_cocina) +
(0.20 * privacion_ilumina) +
(0.13 * privacion_comuni)+
(0.13 * privacion_electro)+
(0.13 * privacion_entreteni)
)
#CREAMOS VARIABLE DEPENDIENTE A PARTIR DEL PUNTAJE: - Si Di≥0.26, el hogar será pobre energético (etiqueta 1). - Si Di<0.26, el hogar será no pobre energético (etiqueta 0).
datos = datos %>%
mutate( Pobre_energético = ifelse(datos$puntaje >= 0.26, 1, 0))
#VEMOS LA DISTRIBUCIÓN DE LA VARIABLE DEPENDIENTE
ggplot(datos, aes(x = factor(Pobre_energético))) +
geom_bar(aes(y = ..count..), fill = "#2E8B57", width = 0.5) + # Color naranja sobrio, barras más delgadas
geom_text(stat = "count", aes(label = scales::percent(..count../sum(..count..))),
vjust = -0.5, size = 3.5, fontface = "bold") +
labs(
title = "Distribución de hogares en pobreza energética",
x = "¿Está en pobreza energética? (0 = No, 1 = Sí)",
y = "Número de hogares"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", size = 15),
axis.text = element_text(face = "bold")
)
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
str(datos)
## 'data.frame': 1359 obs. of 33 variables:
## $ CONGLOME : int 18636 18636 18636 18636 18644 18581 18576 18576 18599 18598 ...
## $ VIVIENDA : int 28 64 82 100 46 113 120 120 86 149 ...
## $ HOGAR : int 11 11 11 11 11 11 12 22 11 11 ...
## $ UBIGEO : int 160101 160101 160101 160101 160101 160101 160101 160101 160101 160101 ...
## $ P1131 : int 0 1 1 0 1 0 0 0 0 0 ...
## $ P1132 : int 1 1 1 1 0 1 0 1 1 1 ...
## $ P1133 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ P1135 : int 1 1 1 1 1 1 1 1 1 0 ...
## $ P1136 : int 0 0 0 0 1 0 0 0 0 0 ...
## $ P1137 : int 0 0 0 0 0 0 0 1 0 0 ...
## $ P1139 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ P1121 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ P1141 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ P1142 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ P101 : int 1 1 1 1 1 1 1 8 1 1 ...
## $ P102 : int 1 1 1 1 7 1 1 9 1 1 ...
## $ P1172$02 : int 97 164 213 10 35 97 15 20 72 78 ...
## $ radio : int 2 2 2 2 1 2 1 1 2 1 ...
## $ tv_color : int 1 1 1 2 2 1 2 1 1 1 ...
## $ tv_bn : int 2 2 2 2 2 2 2 2 2 2 ...
## $ refrigeradora : int 1 1 1 2 2 1 2 1 1 1 ...
## $ P207 : int 2 1 2 2 1 1 1 1 2 1 ...
## $ P208A : int 52 60 58 58 85 38 58 36 40 58 ...
## $ P301A : int 8 10 10 8 9 10 7 7 8 6 ...
## $ MIEPERHO : int 2 4 7 3 3 4 4 6 4 2 ...
## $ INGHOG2D : num 40074 186510 154248 42930 12586 ...
## $ privacion_cocina : num 1 1 1 1 1 1 1 1 1 0 ...
## $ privacion_ilumina : num 0 0 0 0 0 0 0 0 0 0 ...
## $ privacion_comuni : num 0 0 0 0 0 0 0 0 0 0 ...
## $ privacion_electro : num 0 0 0 1 1 0 1 0 0 0 ...
## $ privacion_entreteni: num 0 0 0 1 0 0 0 0 0 0 ...
## $ puntaje : num 0.4 0.4 0.4 0.66 0.53 0.4 0.53 0.4 0.4 0 ...
## $ Pobre_energético : num 1 1 1 1 1 1 1 1 1 0 ...
library(dplyr)
# Crear nueva base con solo las variables necesarias
datos_limpios <- datos %>%
dplyr::select (
CONGLOME, VIVIENDA, HOGAR, UBIGEO,
P101, P102, `P1172$02`, P207, P208A, P301A,
MIEPERHO, INGHOG2D, Pobre_energético
)
export(datos_limpios, "Consolidado.csv")