R Markdown

#SUBIR EL MÓDILO 100 Y 612 ##MIDES LA VARIABLE DEPENDIENTE Y EMPIEZAS A SOLO TENER LAS VARIABLES ÚTILES, LUEGO EXPORTAMOS LA BASE DE DATOS.

library(rio)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
## Warning: package 'stringr' was built under R version 4.4.3
library(magrittr)
library(readr)
## Warning: package 'readr' was built under R version 4.4.2
library(rvest)
## 
## Adjuntando el paquete: 'rvest'
## The following object is masked from 'package:readr':
## 
##     guess_encoding
library(knitr)
library(modelsummary)
## Warning: package 'modelsummary' was built under R version 4.4.2
library(arm)
## Warning: package 'arm' was built under R version 4.4.2
## Cargando paquete requerido: MASS
## Warning: package 'MASS' was built under R version 4.4.2
## 
## Adjuntando el paquete: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Cargando paquete requerido: Matrix
## Cargando paquete requerido: lme4
## 
## Adjuntando el paquete: 'lme4'
## The following object is masked from 'package:rio':
## 
##     factorize
## 
## arm (Version 1.14-4, built: 2024-4-1)
## Working directory is C:/Users/G ALEJANDRA ROJAS/Documents/ESTAPOL2/PROYECTO TF
library(magrittr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.2
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.4.2

#AHORA LUEGO DE “LIMPIEZA 1_TF”, PASAREMOS A CREAR LA VARIABLE DEPENDIENTE:

#CARGAMOS BD:

data= "https://raw.githubusercontent.com/GeraldyAle16/Estapol_2/refs/heads/main/Basegeneral.csv"
bdpe=import(data)
str(bdpe)
## 'data.frame':    1359 obs. of  26 variables:
##  $ CONGLOME     : int  18636 18636 18636 18636 18644 18581 18576 18576 18599 18598 ...
##  $ VIVIENDA     : int  28 64 82 100 46 113 120 120 86 149 ...
##  $ HOGAR        : int  11 11 11 11 11 11 12 22 11 11 ...
##  $ UBIGEO       : int  160101 160101 160101 160101 160101 160101 160101 160101 160101 160101 ...
##  $ P1131        : int  0 1 1 0 1 0 0 0 0 0 ...
##  $ P1132        : int  1 1 1 1 0 1 0 1 1 1 ...
##  $ P1133        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ P1135        : int  1 1 1 1 1 1 1 1 1 0 ...
##  $ P1136        : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ P1137        : int  0 0 0 0 0 0 0 1 0 0 ...
##  $ P1139        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ P1121        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ P1141        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ P1142        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ P101         : int  1 1 1 1 1 1 1 8 1 1 ...
##  $ P102         : int  1 1 1 1 7 1 1 9 1 1 ...
##  $ P1172$02     : int  97 164 213 10 35 97 15 20 72 78 ...
##  $ radio        : int  2 2 2 2 1 2 1 1 2 1 ...
##  $ tv_color     : int  1 1 1 2 2 1 2 1 1 1 ...
##  $ tv_bn        : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ refrigeradora: int  1 1 1 2 2 1 2 1 1 1 ...
##  $ P207         : int  2 1 2 2 1 1 1 1 2 1 ...
##  $ P208A        : int  52 60 58 58 85 38 58 36 40 58 ...
##  $ P301A        : int  8 10 10 8 9 10 7 7 8 6 ...
##  $ MIEPERHO     : int  2 4 7 3 3 4 4 6 4 2 ...
##  $ INGHOG2D     : num  40074 186510 154248 42930 12586 ...

#CONSTRUIMOS VARIABLE DEPENDIENTE: POBREZA ENERGÉTICA: POBRE O NO POBRE ENERGÉTICO

#CALCULAMOS EL NIVEL DE PRIVACIÓN DE ACUERDO A las 5 dimensiones del MEPI

datos = bdpe %>% 
  mutate(
    privacion_cocina = ifelse(P1135 == 1 | P1136 == 1 | P1137 == 1 | P1139 == 1, 1,0 ),
    privacion_ilumina = ifelse( P1121 == 1, 0, 1),
    privacion_comuni = ifelse( P1141 == 1 | P1142 == 1, 0,1),
    privacion_electro = ifelse( refrigeradora == 1, 0, 1),
    privacion_entreteni = ifelse ( radio == 1 | tv_color == 1 | tv_bn == 1, 0, 1)
  )

#CALCULAMOS PUNTAJE DE PRIVACION CON SUS PESOS (DEL MEPI)

datos = datos %>% 
  mutate( puntaje = ( 0.40 * privacion_cocina) +
                    (0.20 * privacion_ilumina) +
                    (0.13 * privacion_comuni)+
                    (0.13 * privacion_electro)+
                    (0.13 * privacion_entreteni)
          )

#CREAMOS VARIABLE DEPENDIENTE A PARTIR DEL PUNTAJE: - Si Di≥0.26, el hogar será pobre energético (etiqueta 1). - Si Di<0.26, el hogar será no pobre energético (etiqueta 0).

datos = datos %>% 
  mutate( Pobre_energético = ifelse(datos$puntaje >= 0.26, 1, 0))

#VEMOS LA DISTRIBUCIÓN DE LA VARIABLE DEPENDIENTE

ggplot(datos, aes(x = factor(Pobre_energético))) +
  geom_bar(aes(y = ..count..), fill = "#2E8B57", width = 0.5) +  # Color naranja sobrio, barras más delgadas
  geom_text(stat = "count", aes(label = scales::percent(..count../sum(..count..))),
            vjust = -0.5, size = 3.5, fontface = "bold") +
  labs(
    title = "Distribución de hogares en pobreza energética",
    x = "¿Está en pobreza energética? (0 = No, 1 = Sí)",
    y = "Número de hogares"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold", size = 15),
    axis.text = element_text(face = "bold")
  )
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

str(datos)
## 'data.frame':    1359 obs. of  33 variables:
##  $ CONGLOME           : int  18636 18636 18636 18636 18644 18581 18576 18576 18599 18598 ...
##  $ VIVIENDA           : int  28 64 82 100 46 113 120 120 86 149 ...
##  $ HOGAR              : int  11 11 11 11 11 11 12 22 11 11 ...
##  $ UBIGEO             : int  160101 160101 160101 160101 160101 160101 160101 160101 160101 160101 ...
##  $ P1131              : int  0 1 1 0 1 0 0 0 0 0 ...
##  $ P1132              : int  1 1 1 1 0 1 0 1 1 1 ...
##  $ P1133              : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ P1135              : int  1 1 1 1 1 1 1 1 1 0 ...
##  $ P1136              : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ P1137              : int  0 0 0 0 0 0 0 1 0 0 ...
##  $ P1139              : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ P1121              : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ P1141              : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ P1142              : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ P101               : int  1 1 1 1 1 1 1 8 1 1 ...
##  $ P102               : int  1 1 1 1 7 1 1 9 1 1 ...
##  $ P1172$02           : int  97 164 213 10 35 97 15 20 72 78 ...
##  $ radio              : int  2 2 2 2 1 2 1 1 2 1 ...
##  $ tv_color           : int  1 1 1 2 2 1 2 1 1 1 ...
##  $ tv_bn              : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ refrigeradora      : int  1 1 1 2 2 1 2 1 1 1 ...
##  $ P207               : int  2 1 2 2 1 1 1 1 2 1 ...
##  $ P208A              : int  52 60 58 58 85 38 58 36 40 58 ...
##  $ P301A              : int  8 10 10 8 9 10 7 7 8 6 ...
##  $ MIEPERHO           : int  2 4 7 3 3 4 4 6 4 2 ...
##  $ INGHOG2D           : num  40074 186510 154248 42930 12586 ...
##  $ privacion_cocina   : num  1 1 1 1 1 1 1 1 1 0 ...
##  $ privacion_ilumina  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ privacion_comuni   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ privacion_electro  : num  0 0 0 1 1 0 1 0 0 0 ...
##  $ privacion_entreteni: num  0 0 0 1 0 0 0 0 0 0 ...
##  $ puntaje            : num  0.4 0.4 0.4 0.66 0.53 0.4 0.53 0.4 0.4 0 ...
##  $ Pobre_energético   : num  1 1 1 1 1 1 1 1 1 0 ...
library(dplyr)

# Crear nueva base con solo las variables necesarias
datos_limpios <- datos %>%
  dplyr::select (
    CONGLOME, VIVIENDA, HOGAR, UBIGEO,
    P101, P102, `P1172$02`, P207, P208A, P301A,
    MIEPERHO, INGHOG2D, Pobre_energético
  )
export(datos_limpios, "Consolidado.csv")