library(tidyverse)
library(readxl)
library(ggplot2)
usu_individual_T325 <- read_excel("C:/Users/eb/Downloads/usu_individual_T325.xlsx")
class(usu_individual_T325$NIVEL_ED)
## [1] "numeric"
class(usu_individual_T325$ESTADO)
## [1] "numeric"
table(usu_individual_T325$ESTADO)
##
## 0 1 2 3 4
## 64 20066 1303 18630 4883
usu_individual_T325$ESTADO <- factor(usu_individual_T325$ESTADO,
levels = c(0, 1, 2, 3, 4),
labels = c("No realizada",
"Ocupado",
"Desocupado",
"Inactivo",
"Menor de 10 años"))
table(usu_individual_T325$ESTADO)
##
## No realizada Ocupado Desocupado Inactivo
## 64 20066 1303 18630
## Menor de 10 años
## 4883
usu_individual_T325 <- usu_individual_T325[
!is.na(usu_individual_T325$PP08D1) & usu_individual_T325$PP08D1 >= 0,
]
#Verificación de resultados
summary(usu_individual_T325$PP08D1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 400000 583559 900000 12000000
#Variable numerica de nivel educativo
table(usu_individual_T325$NIVEL_ED)
##
## 1 2 3 4 5 6 7
## 475 1926 2911 5698 2524 4019 44
prop.table(table(usu_individual_T325$NIVEL_ED))
##
## 1 2 3 4 5 6
## 0.026993237 0.109450475 0.165425925 0.323805194 0.143433540 0.228391203
## 7
## 0.002500426
#Lables de la variable
usu_individual_T325$NIVEL_ED <- factor(usu_individual_T325$NIVEL_ED,
levels = c(1,2,3,4,5,6,7),
labels = c("Primario incompleto",
"Primario completo",
"Secundario incompleto",
"Secundario completo",
"Superior incompleto",
"Superior completo",
"Sin instrucción"))
#variable numerica
summary(usu_individual_T325$ITF)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 560000 1380000 1671223 2300000 71300000
#limpieza de datos
usu_individual_T325_ITF <- usu_individual_T325[
!is.na(usu_individual_T325$ITF) & usu_individual_T325$ITF > 0,
]
summary(usu_individual_T325_ITF$ITF)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 20000 1070000 1700000 2085857 2550000 71300000
tapply(usu_individual_T325_ITF$ITF,
usu_individual_T325_ITF$NIVEL_ED,
mean,
na.rm = TRUE)
## Primario incompleto Primario completo Secundario incompleto
## 1508459 1516886 1631071
## Secundario completo Superior incompleto Superior completo
## 1943457 2203015 2862818
## Sin instrucción
## 1714944
ggplot(usu_individual_T325_ITF, aes(x = NIVEL_ED, y = ITF)) +
stat_summary(fun = mean, geom = "bar") +
labs(title = "Ingreso promedio por nivel educativo",
x = "Nivel educativo",
y = "Ingreso promedio (ITF)") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))