Lectura, uso de otras variables como el sexo del jefe perspectiva de género
#https://www.inegi.org.mx/programas/enigh/nc/2024/#microdatos
# Cuadro 2.1
# Ingreso corriente promedio trimestral por hogar en deciles de
# hogares y su coeficiente de GINI
# 2024
## Limpia la pantalla de tablas o basura de un ejercicio anterior
rm(list = ls())
## Carga librerÃas
## Abre la tabla concentradohogar
ruta<-"C:/Users/cguer/Documents/Claudia/Midropbox/Investigacion y escritos/karamanis/data/"
infile <- "concentradohogar.CSV"
nomarchi<-paste0(ruta,infile)
Conc<-read.csv(nomarchi,stringsAsFactors = FALSE)
#attach(Conc)
## Selecciona las variables de interés
Conc <- Conc [ c('folioviv', 'foliohog', 'ing_cor', 'factor', 'upm', 'est_dis','sexo_jefe','ubica_geo','gasto_mon','alimentos','ali_dentro','ali_fuera','leche','bebidas','carnes')]
## Se define la columna de los deciles
Numdec<-c('Total', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X')
Numdec<-c('I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X')
ruta<-"C:/Users/cguer/Documents/Claudia/Midropbox/Investigacion y escritos/karamanis/data/"
infile <- "ubicageo.CSV"
nomarchi<-paste0(ruta,infile)
ubicageo<-read.csv(nomarchi,stringsAsFactors = FALSE)
#unique(ubicageo$desc_ent)
Conc<-left_join(Conc, ubicageo, by = "ubica_geo")
#unique(ubicageo$desc_ent)
## Hogares
## Se crea una bandera para numerar los hogares
Conc$Nhog <- 1
#Conc<-filter(Conc,entidad==5)
## Ordena Conc de acuerdo a ing_cor, folioviv, foliohog
Conc<- orderBy (~+ing_cor+folioviv+foliohog, data=Conc)
## Suma todos los factores y guarda el valor en el vector tot_hogares
tot_hogares <- sum(Conc$factor)
## Se divide la suma de factores entre diez para sacar el tamaño del decil
## (se debe de truncar el resultado quitando los decimales)
tam_dec<-trunc(tot_hogares/10)
## Muestra la suma del factor en variable hogar
Conc$tam_dec=tam_dec
## Creación de deciles de hogares
## Se renombra la tabla concentrado a BD1
BD1 <- Conc
## Dentro de la tabla BD1 se crea la variable MAXT y se le asignan los
## valores que tiene el ing_cor.
BD1$MAXT <- BD1$ing_cor
## Se ordena de menor a mayor según la variable MAXT
BD1 <- BD1[with(BD1, order(rank(MAXT))),]
## Se aplica la función cumsum, suma acumulada a la variable factor
BD1$ACUMULA <- cumsum(BD1$factor)
## Entra a un ciclo donde genera los deciles 1 a 10
for(i in 1:9)
{
a1<-BD1[dim(BD1[BD1$ACUMULA<tam_dec*i,])[1]+1,]$factor
BD1<-rbind(BD1[1:(dim(BD1[BD1$ACUMULA<tam_dec*i,])[1]+1),],
BD1[(dim(BD1[BD1$ACUMULA<tam_dec*i,])[1]+1):dim(BD1[1])[1],])
b1<-tam_dec*i-BD1[dim(BD1[BD1$ACUMULA<tam_dec*i,])[1],]$ACUMULA
BD1[(dim(BD1[BD1$ACUMULA<tam_dec*i,])[1]+1),]$factor<-b1
BD1[(dim(BD1[BD1$ACUMULA<tam_dec*i,])[1]+2),]$factor<-(a1-b1)
}
BD1$ACUMULA2<-cumsum(BD1$factor)
BD1$DECIL <- 0
BD1[(BD1$ACUMULA2<=tam_dec),]$DECIL <- 1
for(i in 1:9)
{
BD1[((BD1$ACUMULA2>tam_dec*i)&(BD1$ACUMULA2<=tam_dec*(i+1))),]$DECIL <- (i+1)
}
#BD1[as.numeric(BD1$DECIL) %in% '0',]$DECIL <- 10
## Total de hogares
x <- tapply(BD1$factor,BD1$Nhog,sum)
## Deciles
y <- tapply(BD1$factor,as.numeric(BD1$DECIL),sum)
## Se calcula el promedio de ingreso para el total y para cada uno de los deciles
ing_cormed_t <- tapply(BD1$factor*BD1$ing_cor,BD1$Nhog,sum)/x
ing_cormed_d <- tapply(BD1$factor*BD1$ing_cor,as.numeric(BD1$DECIL),sum)/y
gasdecil <- BD1%>%
group_by(DECIL,sexo_jefe) %>%
summarise(gasmon = sum(gasto_mon*factor),n=n())
alidecil <- BD1%>%
group_by(DECIL,sexo_jefe) %>%
summarise(gasali = sum(alimentos*factor))
#alidecil <- BD1%>%
# group_by(DECIL,sexo_jefe) %>%
# summarise(gasali = sum(carnes*factor))
aliden<-BD1%>%
group_by(DECIL,sexo_jefe) %>%
summarise(aliden = sum(carnes*factor))
alifue<-BD1%>%
group_by(DECIL,sexo_jefe) %>%
summarise(alifue = sum(ali_fuera*factor))
alitod<-inner_join(gasdecil, alidecil, by = c("sexo_jefe","DECIL"))
alitod<-inner_join(alitod, aliden, by = c("sexo_jefe","DECIL"))
alitod<-inner_join(alitod, alifue, by = c("sexo_jefe","DECIL"))
alitod$promaligtot<-(alitod$gasali/alitod$gasmon)*100
alitod$promdengali<-(alitod$aliden/alitod$gasali)*100
alitod$promaligtot<-(alitod$aliden/alitod$gasali)*100
Proporción de gastos en alimentos por decil y si se consume dentro o fuera de casa
A<-ggplot(alitod) +
ggdist::geom_dots(alitod , mapping = aes(x = as.numeric(DECIL), y = alitod$promaligtot, fill = alitod$promaligtot), size = .5,color="black") +
ggrepel::geom_text_repel(data = alitod, mapping = aes(x = as.numeric(DECIL), y = alitod$promaligtot,label = str_wrap(round(promaligtot,2), 4)), lineheight = 0.9, seed = 999, segment.size = 0.1, size = 3.5, direction = "x",fontface = "bold") +
scale_x_continuous(breaks = breaks_width(1),limits=c(0,11)) +
scale_y_continuous(labels = scales::label_comma())+
colorspace::scale_fill_binned_sequential(palette= "YlGnBu", rev =TRUE,n.breaks = 15,guide = guide_colorsteps(show.limits = TRUE, title = "% alimento"),labels = scales::label_comma())+
facet_wrap(vars(sexo_jefe), ncol = 2) +
labs(
title = "% Gasto en alimentos por Decil-sexo",
subtitle = str_wrap("1:Masculino 2:Femenino"),
caption = "Fuente: ENIGH 2024 "
) +
theme_minimal() +
theme(
legend.position = "right",
legend.key.height = unit(3, "lines"),
legend.key.width = unit(1, "lines"),
plot.background = element_rect(fill = "grey99", color = NA),
axis.title = element_blank(),
panel.grid.major.y = element_blank(),
axis.text.y = element_text(face = "bold", size = 12, color = "grey10"),
strip.text = element_text(size = 15, face = "bold"),
panel.spacing.y = unit(1.5, "lines"),
plot.margin = margin(10, 10, 10, 10),
plot.title = element_text(size = 18, face = "bold"),
plot.subtitle = element_text(size = 12, margin = margin(0, 0, 10, 0)),
plot.caption = element_text(margin = margin(10, 0, 0, 0))
)
A
B<-ggplot(alitod) +
ggdist::geom_dots(alitod, mapping = aes(x = as.numeric(DECIL), y = alitod$promdengali, fill = alitod$promdengali), size = .5,color="black") +
ggrepel::geom_text_repel(data = alitod, mapping = aes(x = as.numeric(DECIL), y = alitod$promdengali,label = str_wrap(round(promdengali,2), 4)), lineheight = 0.9, seed = 999, segment.size = 0.1, size = 3.5, direction = "x",fontface = "bold") +
scale_x_continuous(breaks = breaks_width(1),limits=c(0,11)) +
scale_y_continuous(labels = scales::label_comma())+
colorspace::scale_fill_binned_sequential(palette= "RdPu", rev =TRUE,n.breaks = 15,guide = guide_colorsteps(show.limits = TRUE, title = "% alimento"),labels = scales::label_comma())+
facet_wrap(vars(sexo_jefe), ncol = 2) +
labs(
title = "% gasto alimentación dentro Decil-sexo",
subtitle = str_wrap("1:Masculino 2:Femenino"),
caption = "Gráfica: Claudia Guerrero "
) +
theme_minimal() +
theme(
legend.position = "right",
legend.key.height = unit(3, "lines"),
legend.key.width = unit(1, "lines"),
plot.background = element_rect(fill = "grey99", color = NA),
axis.title = element_blank(),
panel.grid.major.y = element_blank(),
axis.text.y = element_text(face = "bold", size = 12, color = "grey10"),
strip.text = element_text(size = 15, face = "bold"),
panel.spacing.y = unit(1.5, "lines"),
plot.margin = margin(10, 10, 10, 10),
plot.title = element_text(size = 18, face = "bold"),
plot.subtitle = element_text(size = 12, margin = margin(0, 0, 10, 0)),
plot.caption = element_text(margin = margin(10, 0, 0, 0))
)
B
library(ggpubr)
ggarrange(A,C, common.legend = FALSE)
library(glue)
map<-"inegi"
pal<-"purples"
if (!dir.exists(glue("images/{map}"))) {
dir.create(glue("images/{map}"))
}
outfile <- str_to_lower(glue("images/{map}/{map}_{pal}alim.png"))
# Now that everything is assigned, save these objects so we
# can use then in our markup script
ggsave(outfile, width = 300, height = 300, units = "mm", dpi = "retina",limitsize = FALSE)
alitod1<-alitod |> filter(sexo_jefe==1)
alitod2<-alitod |> filter(sexo_jefe==2)
library(ggnewscale)
A<-ggplot() +
ggdist::geom_dots(alitod1 , mapping = aes(x = as.numeric(DECIL), y = alitod1$promaligtot, fill = alitod1$promaligtot), size = .5,color="black") +
ggrepel::geom_text_repel(data = alitod1, mapping = aes(x = as.numeric(DECIL), y = alitod1$promaligtot,label = str_wrap(round(promaligtot,2), 4)), lineheight = 0.9, seed = 999, segment.size = 0.1, size = 3.5, direction = "x",fontface = "bold") +
scale_x_continuous(breaks = breaks_width(1),limits=c(0,11)) +
scale_y_continuous(labels = scales::label_comma())+
colorspace::scale_fill_binned_sequential(palette= "YlGnBu", rev =TRUE,n.breaks = 15,guide = guide_colorsteps(show.limits = TRUE, title = "jefe hombre"),labels = scales::label_comma())+
new_scale("fill") +
ggdist::geom_dots(alitod2 , mapping = aes(x = as.numeric(DECIL), y = alitod2$promaligtot, fill = alitod2$promaligtot), size = .5,color="black") +
ggrepel::geom_text_repel(data = alitod2, mapping = aes(x = as.numeric(DECIL), y = alitod2$promaligtot,label = str_wrap(round(promaligtot,2), 4)), lineheight = 0.9, seed = 999, segment.size = 0.1, size = 3.5, direction = "x",fontface = "bold") +
scale_x_continuous(breaks = breaks_width(1),limits=c(0,11)) +
scale_y_continuous(labels = scales::label_comma())+
colorspace::scale_fill_binned_sequential(palette= "Oranges", rev =TRUE,n.breaks = 15,guide = guide_colorsteps(show.limits = TRUE, title = "jefe mujer"),labels = scales::label_comma())+
#facet_wrap(vars(sexo_jefe), ncol = 2) +
labs(
title = "% Gasto en alimentos por Decil-sexo",
subtitle = str_wrap("1:Masculino 2:Femenino"),
caption = "Fuente: ENIGH 2024 "
) +
theme_minimal() +
theme(
legend.position = "bottom",
legend.key.height = unit(1, "lines"),
legend.key.width = unit(3, "lines"),
plot.background = element_rect(fill = "grey99", color = NA),
axis.title = element_blank(),
panel.grid.major.y = element_blank(),
axis.text.y = element_text(face = "bold", size = 12, color = "grey10"),
strip.text = element_text(size = 15, face = "bold"),
panel.spacing.y = unit(1.5, "lines"),
plot.margin = margin(10, 10, 10, 10),
plot.title = element_text(size = 18, face = "bold"),
plot.subtitle = element_text(size = 12, margin = margin(0, 0, 10, 0)),
plot.caption = element_text(margin = margin(10, 0, 0, 0))
)
A
En qué alimento se gasta más de acuerdo al DECIL
## Abre la tabla concentradohogar
ruta<-"C:/Users/cguer/Documents/Claudia/Midropbox/Investigacion y escritos/karamanis/data/"
infile <- "concentradohogar.CSV"
nomarchi<-paste0(ruta,infile)
Conc<-read.csv(nomarchi,stringsAsFactors = FALSE)
#attach(Conc)
## Selecciona las variables de interés
Conc <- Conc [ c('folioviv', 'foliohog', 'ing_cor', 'factor', 'upm', 'est_dis','sexo_jefe','ubica_geo','gasto_mon','alimentos','ali_dentro','ali_fuera',
'cereales','carnes','pescado','leche','huevo','aceites','tuberculo','verduras','frutas','azucar','cafe','especias','otros_alim')]
## Se define la columna de los deciles
Numdec<-c('Total', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X')
Numdec<-c('I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X')
ruta<-"C:/Users/cguer/Documents/Claudia/Midropbox/Investigacion y escritos/karamanis/data/"
infile <- "ubicageo.CSV"
nomarchi<-paste0(ruta,infile)
ubicageo<-read.csv(nomarchi,stringsAsFactors = FALSE)
#unique(ubicageo$desc_ent)
Conc<-left_join(Conc, ubicageo, by = "ubica_geo")
#unique(ubicageo$desc_ent)
## Hogares
## Se crea una bandera para numerar los hogares
Conc$Nhog <- 1
#Conc<-filter(Conc,entidad==5)
## Ordena Conc de acuerdo a ing_cor, folioviv, foliohog
Conc<- orderBy (~+ing_cor+folioviv+foliohog, data=Conc)
## Suma todos los factores y guarda el valor en el vector tot_hogares
tot_hogares <- sum(Conc$factor)
## Se divide la suma de factores entre diez para sacar el tamaño del decil
## (se debe de truncar el resultado quitando los decimales)
tam_dec<-trunc(tot_hogares/10)
## Muestra la suma del factor en variable hogar
Conc$tam_dec=tam_dec
BD1 <- Conc
## Creación de deciles de hogares
## Se renombra la tabla concentrado a BD1
## Dentro de la tabla BD1 se crea la variable MAXT y se le asignan los
## valores que tiene el ing_cor.
BD1$MAXT <- BD1$ing_cor
## Se ordena de menor a mayor según la variable MAXT
BD1 <- BD1[with(BD1, order(rank(MAXT))),]
## Se aplica la función cumsum, suma acumulada a la variable factor
BD1$ACUMULA <- cumsum(BD1$factor)
## Entra a un ciclo donde genera los deciles 1 a 10
for(i in 1:9)
{
a1<-BD1[dim(BD1[BD1$ACUMULA<tam_dec*i,])[1]+1,]$factor
BD1<-rbind(BD1[1:(dim(BD1[BD1$ACUMULA<tam_dec*i,])[1]+1),],
BD1[(dim(BD1[BD1$ACUMULA<tam_dec*i,])[1]+1):dim(BD1[1])[1],])
b1<-tam_dec*i-BD1[dim(BD1[BD1$ACUMULA<tam_dec*i,])[1],]$ACUMULA
BD1[(dim(BD1[BD1$ACUMULA<tam_dec*i,])[1]+1),]$factor<-b1
BD1[(dim(BD1[BD1$ACUMULA<tam_dec*i,])[1]+2),]$factor<-(a1-b1)
}
BD1$ACUMULA2<-cumsum(BD1$factor)
BD1$DECIL <- 0
BD1[(BD1$ACUMULA2<=tam_dec),]$DECIL <- 1
for(i in 1:9)
{
BD1[((BD1$ACUMULA2>tam_dec*i)&(BD1$ACUMULA2<=tam_dec*(i+1))),]$DECIL <- (i+1)
}
#BD1[as.numeric(BD1$DECIL) %in% '0',]$DECIL <- 10
## Total de hogares
bd1col<-BD1[, -c(32,33,34,35)] # Selects all columns except 2 and 4
rows_in_df1_only <- setdiff(bd1col, Conc)
bd1col2<-left_join(Conc,BD1,by=c('folioviv','foliohog','ing_cor','factor'))
#BD1[as.numeric(BD1$DECIL) %in% '0',]$DECIL <- 10
## Total de hogares
x <- tapply(BD1$factor,BD1$Nhog,sum)
## Deciles
y <- tapply(BD1$factor,as.numeric(BD1$DECIL),sum)
## Se calcula el promedio de ingreso para el total y para cada uno de los deciles
ing_cormed_t <- tapply(BD1$factor*BD1$ing_cor,BD1$Nhog,sum)/x
ing_cormed_d <- tapply(BD1$factor*BD1$ing_cor,as.numeric(BD1$DECIL),sum)/y
## Selecciona las variables de alimentos
BD1al <- bd1col2[ c('cereales.x','carnes.x','pescado.x','leche.x','huevo.x','aceites.x','tuberculo.x','verduras.x','frutas.x','azucar.x','cafe.x','especias.x','otros_alim.x')]
BD1al$max_col_base <- apply(BD1al, 1, function(x) names(x)[which.max(x)])
merged_BD1_inner <- merge(BD1, BD1al, by = 0)
BD1nvo <- merged_BD1_inner[c('folioviv', 'foliohog', 'ing_cor', 'factor', 'upm', 'est_dis','sexo_jefe','ubica_geo','gasto_mon','alimentos','cereales','carnes','pescado','leche','huevo','aceites','tuberculo','verduras','frutas','azucar','cafe','especias','otros_alim','DECIL','max_col_base')]
colnames(BD1nvo) <- c('folioviv', 'foliohog', 'ing_cor', 'factor', 'upm', 'est_dis','sexo_jefe','ubica_geo','gasto_mon','alimentos','cereales','carnes','pescado','leche','huevo','aceites','tuberculo','verduras','frutas','azucar','cafe','especias','otros_alim','DECIL','max_col_base')
BD1nvo$max_col_base <- gsub("\\.x", "", BD1nvo$max_col_base)
df_dplyr <-BD1nvo %>%
rowwise() %>%
mutate(selected_value = cur_data()[[max_col_base]]) %>%
ungroup()
BD1decil2 <- df_dplyr %>%
group_by(DECIL,max_col_base) %>%
summarise(gasrub=sum(selected_value))
BD1decil2 <-na.omit(BD1decil2)
BD1decil3 <- df_dplyr %>%
group_by(DECIL) %>%
summarise(gasali=sum(alimentos))
BD1decil3 <-na.omit(BD1decil3)
BD1decil2<-left_join(BD1decil2,BD1decil3,by=c('DECIL'))
BD1decil2$porrubalim<-(BD1decil2$gasrub/BD1decil2$gasali)*100
BD1decil2 <-na.omit(BD1decil2)
BD1decil <- BD1nvo %>%
group_by(DECIL,max_col_base) %>%
summarise(n=n(),na.rm = TRUE)
BD1decil$max_col_base <- gsub("\\.x", "", BD1decil$max_col_base)
library(ggsci)
A<-ggplot(
subset(BD1decil, n>=5),
aes(x=as.numeric(DECIL),y=n,color=max_col_base)) +
# ggdist::geom_dots(BD1decil , mapping = aes(x = as.numeric(DECIL), y = n, fill = max_col_base),binwidth = NA,overflow = 'compress' ) +
geom_point(alpha=0.7) +
geom_smooth(method="loess",alpha=0.1,size=1,span=1)+
scale_x_continuous(breaks = breaks_width(1),limits=c(0,11))+
#scale_color_brewer(palette='Spectral')+
scale_color_ucscgb(name="rubro")+
#scale_color_simpsons()+
scale_y_continuous(labels = scales::label_comma())+
ggrepel::geom_text_repel(data = subset(BD1decil, n>=200) , mapping = aes(x = as.numeric(DECIL), y = n,label = str_wrap(round(n,0), 4)), lineheight = 0.9, seed = 999, segment.size = 0.1, size = 3.5, direction = "y",fontface = "bold")+
labs(
title = "Rubro gasto máximo por decil de ingreso corriente",
subtitle = str_wrap("Conteo de viviendas"),
caption = "Fuente: ENIGH 2024 Gráfica:Claudia Guerrero"
) +
theme_minimal() +
theme(
legend.position = "right",
legend.key.height = unit(3, "lines"),
legend.key.width = unit(2, "lines"),
plot.background = element_rect(fill = "grey99", color = NA),
axis.title = element_blank(),
panel.grid.major.y = element_blank(),
axis.text.y = element_text(face = "bold", size = 12, color = "grey10"),
strip.text = element_text(size = 15, face = "bold"),
panel.spacing.y = unit(1.5, "lines"),
plot.margin = margin(10, 10, 10, 10),
plot.title = element_text(size = 18, face = "bold"),
plot.subtitle = element_text(size = 12, margin = margin(0, 0, 10, 0)),
plot.caption = element_text(margin = margin(10, 0, 0, 0))
)
A
map<-"inegi"
pal<-"purples"
if (!dir.exists(glue("images/{map}"))) {
dir.create(glue("images/{map}"))
}
outfile <- str_to_lower(glue("images/{map}/{map}_{pal}rub.png"))
# Now that everything is assigned, save these objects so we
# can use then in our markup script
ggsave(outfile, width = 300, height = 300, units = "mm", dpi = "retina",limitsize = FALSE)
B<-ggplot(BD1decil2,
#subset(BD1decil2, n>=5),
aes(x=as.numeric(DECIL),y=porrubalim,color=max_col_base)) +
# ggdist::geom_dots(BD1decil , mapping = aes(x = as.numeric(DECIL), y = n, fill = max_col_base),binwidth = NA,overflow = 'compress' ) +
geom_point(alpha=0.7) +
geom_smooth(method="loess",alpha=0.1,size=1,span=1)+
scale_x_continuous(breaks = breaks_width(1),limits=c(0,11))+
#scale_color_brewer(palette='Spectral')+
scale_color_ucscgb(name="rubro")+
#scale_color_simpsons()+
scale_y_continuous(labels = scales::label_comma())+
ggrepel::geom_text_repel(data = BD1decil2 , mapping = aes(x = as.numeric(DECIL), y = porrubalim,label = str_wrap(round(porrubalim,2), 4)), lineheight = 0.9, seed = 999, segment.size = 0.1, size = 3.5, direction = "y",fontface = "bold")+
labs(
title = " % Rubro gasto máximo por vivienda",
subtitle = str_wrap("por decil de ingreso corriente"),
caption = "Fuente: ENIGH 2024 Gráfica:Claudia Guerrero"
) +
theme_minimal() +
theme(
legend.position = "right",
legend.key.height = unit(3, "lines"),
legend.key.width = unit(2, "lines"),
plot.background = element_rect(fill = "grey99", color = NA),
axis.title = element_blank(),
panel.grid.major.y = element_blank(),
axis.text.y = element_text(face = "bold", size = 12, color = "grey10"),
strip.text = element_text(size = 15, face = "bold"),
panel.spacing.y = unit(1.5, "lines"),
plot.margin = margin(10, 10, 10, 10),
plot.title = element_text(size = 18, face = "bold"),
plot.subtitle = element_text(size = 12, margin = margin(0, 0, 10, 0)),
plot.caption = element_text(margin = margin(10, 0, 0, 0))
)
B
outfile <- str_to_lower(glue("images/{map}/{map}_{pal}porrub.png"))
# Now that everything is assigned, save these objects so we
# can use then in our markup script
ggsave(outfile, width = 300, height = 300, units = "mm", dpi = "retina",limitsize = FALSE)