Ajustes: - Usamos P47T y quintiles de ingreso (Con P21 el quintil 1
no obtiene acceso en toda la CABA, y si usamos cuartiles los rangos de
ingreso quedan muy amplios) - Unificamos cuentapropistas e informales en
un solo conjunto (trabajadores informales) - Para las estadísticas
descriptivas usamos el raster con las constantes agregadas (35 m2 y
0.9289)
# 1. Funciones auxiliares
get_quintile_table <- function(df) {
calculate_quintile_stats <- function(data, weights, income) {
quintile_cuts <- wtd.quantile(income, weights = weights, probs = seq(0.2, 0.8, 0.2))
q1_median <- weighted.median(income[income <= quintile_cuts[1]], w = weights[income <= quintile_cuts[1]], na.rm = TRUE)
q2_median <- weighted.median(income[income > quintile_cuts[1] & income <= quintile_cuts[2]], w = weights[income > quintile_cuts[1] & income <= quintile_cuts[2]], na.rm = TRUE)
q3_median <- weighted.median(income[income > quintile_cuts[2] & income <= quintile_cuts[3]], w = weights[income > quintile_cuts[2] & income <= quintile_cuts[3]], na.rm = TRUE)
q4_median <- weighted.median(income[income > quintile_cuts[3] & income <= quintile_cuts[4]], w = weights[income > quintile_cuts[3] & income <= quintile_cuts[4]], na.rm = TRUE)
q5_median <- weighted.median(income[income > quintile_cuts[4]], w = weights[income > quintile_cuts[4]], na.rm = TRUE)
c(Q1 = q1_median/1291, Q2 = q2_median/1291, Q3 = q3_median/1291, Q4 = q4_median/1291, Q5 = q5_median/1291)
}
results <- calculate_quintile_stats(df, df$PONDIH, df$P47T) # Usa P47T
tibble(!!!results, n = sum(df$PONDIH))
}
make_categorical <- function(val, medians, umbral) {
medians <- as.numeric(medians)
q1 <- medians[1] * umbral
q2 <- medians[2] * umbral
q3 <- medians[3] * umbral
q4 <- medians[4] * umbral
q5 <- medians[5] * umbral
val_transformed <- val * 0.9289 * 35
m <- matrix(c(-Inf, q1, 1, q1, q2, 2, q2, q3, 3, q3, q4, 4, q4, q5, 5, q5, Inf, 6), ncol=3, byrow=TRUE)
result <- classify(val_transformed, m)
return(result)
}
## AJUSTE: Las categorías se ajustan a 5 quintiles.
create_raster_groups <- function(val, tabla) {
medians <- tabla %>% select(Q1, Q2, Q3, Q4, Q5)
medians <- as.numeric(medians[1, ])
thresholds <- c(0.3, 0.5, 0.7)
name_suffix <- c("0-30%", "31-50%", "50-70%")
result_list <- vector("list", length(thresholds))
for(i in seq_along(thresholds)) {
rast <- make_categorical(val, medians, thresholds[i])
cats <- data.frame(ID = 1:6, category = c("Q1", "Q2", "Q3", "Q4", "Q5", "No access"))
levels(rast) <- cats
result_list[[i]] <- rast
}
out <- do.call(c, result_list)
names(out) <- sprintf("Informales_%s", name_suffix)
return(out)
}
# (El resto de funciones auxiliares no requieren cambios)
create_lorenz_data <- function(gini_df) {
value_cols <- setdiff(names(gini_df), c("Group", "Income_Dedication"))
map_df(1:nrow(gini_df), function(i) {
values <- as.numeric(gini_df[i, value_cols])
if(sum(values) > 0) {
l_curve <- Lc(values)
data.frame(Income_Dedication = gini_df$Income_Dedication[i], Cumulative_Population_Share = l_curve$p, Cumulative_Accessibility_Share = l_curve$L)
} else { data.frame(Income_Dedication = gini_df$Income_Dedication[i], Cumulative_Population_Share = 0, Cumulative_Accessibility_Share = 0) }
})
}
create_lorenz_plot <- function(lorenz_data) {
ggplot(lorenz_data, aes(x = Cumulative_Population_Share, y = Cumulative_Accessibility_Share)) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "grey") +
geom_line(size = 1.2, color = "blue") + facet_wrap(~ Income_Dedication) +
labs(x = "Proporción acumulada de población", y = "Proporción acumulada de accesibilidad") +
scale_x_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.2)) +
scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.2)) +
theme_minimal() + theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"), axis.title = element_text(size = 10), axis.text = element_text(size = 8), legend.position = "none", strip.text = element_text(size = 11, face = "bold"), panel.spacing = unit(1, "lines"), aspect.ratio = 1)
}
create_maps <- function(all_groups, pal, nombres_leyenda) {
maps <- map(1:nlyr(all_groups), ~{
tm_shape(all_groups[[.x]]) +
tm_raster(palette = pal, legend.show = FALSE, alpha = 0.8) +
tm_layout(main.title = names(all_groups)[.x], main.title.size = 1.2) +
tm_legend(show = FALSE)
})
names(maps) <- names(all_groups)
# AJUSTE: Se elimina el argumento 'legend.position' de la leyenda
legend <- tm_shape(all_groups[[1]]) +
tm_raster(palette = pal, title = "Quintiles", labels = nombres_leyenda,
legend.show = TRUE, legend.is.portrait = FALSE) +
tm_layout(legend.only = TRUE)
list(maps = maps, legend = legend)
}
# 2. Preparación de datos EPH
assign("has_internet_via_proxy", TRUE, environment(curl::has_internet))
eph_raw <- get_microdata(2024, 4, "individual")
eph <- eph_raw %>%
mutate(
clase = case_when(
CAT_OCUP == 2 ~ "Cuentapropista",
CAT_OCUP == 3 & PP07G1 == 1 & PP07G2 == 1 & PP07G3 == 1 & PP07G4 == 1 ~ "Asalariado formal",
CAT_OCUP == 3 ~ "Asalariado informal",
TRUE ~ "Otro"
)
) %>%
filter(
CH06 >= 18, CH06 <= 65,
AGLOMERADO == 32,
P47T > 0, ## AJUSTE: Se usa P47T en lugar de P21
clase %in% c("Cuentapropista", "Asalariado informal")
) %>%
mutate(
grupo_analisis = "Trabajadores Informales"
)
# 3. Tabla Auxiliar: Poblacion representada por Quintil
# Calcular los puntos de corte para los quintiles
quintile_breaks <- Hmisc::wtd.quantile(eph$P47T, weights = eph$PONDIH, probs = seq(0, 1, 0.2), na.rm = TRUE)
# Crear una columna temporal para asignar cada persona a su quintil
eph_con_quintil <- eph %>%
mutate(quintil = cut(P47T, breaks = quintile_breaks, labels = paste0("Quintil ", 1:5), include.lowest = TRUE))
# Generar la tabla resumen
tabla_conteo_quintiles <- eph_con_quintil %>%
group_by(quintil) %>%
summarise(
`Población Estimada (Ponderada)` = sum(PONDIH)
) %>%
ungroup()
# Calcular porcentajes sobre la población estimada
total_poblacion <- sum(tabla_conteo_quintiles$`Población Estimada (Ponderada)`)
tabla_conteo_quintiles <- tabla_conteo_quintiles %>%
mutate(
`Porcentaje (%)` = (`Población Estimada (Ponderada)` / total_poblacion) * 100
)
# Mostrar la tabla formateada
kable(tabla_conteo_quintiles,
caption = "Distribución de la Población por Quintil de Ingreso (P47T)",
digits = c(0, 0, 0, 2),
format.args = list(big.mark = ".", decimal.mark = ",")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Distribución de la Población por Quintil de Ingreso (P47T)
|
quintil
|
Población Estimada (Ponderada)
|
Porcentaje (%)
|
|
Quintil 1
|
104.516
|
21
|
|
Quintil 2
|
103.213
|
20
|
|
Quintil 3
|
97.447
|
19
|
|
Quintil 4
|
125.778
|
25
|
|
Quintil 5
|
72.528
|
14
|
# 3. Estadisticas descriptivas de costos de acceso e ingresos por quintil
val_r <- rast("C:/Users/pcusu/Desktop/Tesis Di Tella 2025/vut_sin_calles_ni_ev.tif") / 1291.81 * 0.9289 * 35 # Convertir a USD/m² con las constantes
costo_usd_m2 <- values(val_r, na.rm = TRUE)
tabla_costo <- tibble(
Estadística = c("Media", "Mediana", "Mínimo", "Máximo", "Desv. Estándar"),
Costo_Acceso_USD_m2 = c(
mean(costo_usd_m2, na.rm = TRUE),
median(costo_usd_m2, na.rm = TRUE),
min(costo_usd_m2, na.rm = TRUE),
max(costo_usd_m2, na.rm = TRUE),
sd(costo_usd_m2, na.rm = TRUE)
)
)
umbrales <- c("30%" = 0.3, "50%" = 0.5, "70%" = 0.7)
if (!exists("eph_con_quintil")) {
quintile_breaks <- Hmisc::wtd.quantile(eph$P47T, weights = eph$PONDIH, probs = seq(0, 1, 0.2), na.rm = TRUE)
eph_con_quintil <- eph %>%
mutate(quintil = cut(P47T, breaks = quintile_breaks, labels = paste0("Q", 1:5), include.lowest = TRUE))
}
lista_stats_ingreso <- lapply(names(umbrales), function(nombre_umbral) {
umbral_val <- umbrales[[nombre_umbral]]
eph_con_quintil %>%
mutate(ingreso_disponible = P47T * umbral_val) %>%
group_by(quintil) %>%
filter(!is.na(quintil)) %>%
summarise(
# Se aplica la corrección con as.numeric() para evitar errores
Media = as.numeric(Hmisc::wtd.mean(ingreso_disponible, weights = PONDIH, na.rm = TRUE)),
Mediana = as.numeric(Hmisc::wtd.quantile(ingreso_disponible, weights = PONDIH, probs = 0.5, na.rm = TRUE)),
Mínimo = min(ingreso_disponible, na.rm = TRUE),
Máximo = max(ingreso_disponible, na.rm = TRUE),
`Desv. Estándar` = as.numeric(sqrt(Hmisc::wtd.var(ingreso_disponible, weights = PONDIH, na.rm = TRUE))),
.groups = 'drop'
) %>%
# Se convierten los valores a USD
mutate(across(where(is.numeric), ~ . / 1291)) %>%
pivot_longer(cols = -quintil, names_to = "Estadística") %>%
pivot_wider(names_from = quintil, values_from = value)
})
tabla_ingreso_final <- reduce(lista_stats_ingreso, left_join, by = "Estadística")
tabla_final_descriptiva <- left_join(tabla_costo, tabla_ingreso_final, by = "Estadística")
kable(tabla_final_descriptiva,
digits = 2,
caption = "Estadísticas Descriptivas de Costos (USD/m²) e Ingresos Disponibles (USD) por Quintil",
col.names = c("Estadística", "Costo Acceso (USD/m²)", rep(paste0("Q", 1:5), 3)),
format.args = list(big.mark = ".", decimal.mark = ",")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), font_size = 10) %>%
add_header_above(c(" " = 2, "Ingreso Disponible al 30%" = 5, "Ingreso Disponible al 50%" = 5, "Ingreso Disponible al 70%" = 5),
background = "#D3D3D3", color = "black", bold = TRUE) %>%
scroll_box(width = "100%")
Estadísticas Descriptivas de Costos (USD/m²) e Ingresos Disponibles
(USD) por Quintil
|
|
Ingreso Disponible al 30%
|
Ingreso Disponible al 50%
|
Ingreso Disponible al 70%
|
|
Estadística
|
Costo Acceso (USD/m²)
|
Q1
|
Q2
|
Q3
|
Q4
|
Q5
|
Q1
|
Q2
|
Q3
|
Q4
|
Q5
|
Q1
|
Q2
|
Q3
|
Q4
|
Q5
|
|
Media
|
287,84
|
53,22
|
120,98
|
175,83
|
284,42
|
772,42
|
88,71
|
201,63
|
293,04
|
474,03
|
1.287,36
|
124,19
|
282,28
|
410,26
|
663,64
|
1.802,31
|
|
Mediana
|
292,39
|
46,48
|
116,19
|
162,66
|
278,85
|
487,99
|
77,46
|
193,65
|
271,11
|
464,76
|
813,32
|
108,44
|
271,11
|
379,55
|
650,66
|
1.138,65
|
|
Mínimo
|
67,80
|
1,39
|
95,27
|
144,07
|
222,62
|
371,80
|
2,32
|
158,79
|
240,12
|
371,03
|
619,67
|
3,25
|
222,31
|
336,17
|
519,44
|
867,54
|
|
Máximo
|
783,99
|
92,95
|
139,43
|
220,76
|
348,57
|
3.834,24
|
154,92
|
232,38
|
367,93
|
580,95
|
6.390,40
|
216,89
|
325,33
|
515,10
|
813,32
|
8.946,55
|
|
Desv. Estándar
|
78,65
|
27,13
|
13,02
|
22,19
|
50,30
|
624,06
|
45,21
|
21,70
|
36,98
|
83,84
|
1.040,10
|
63,30
|
30,39
|
51,78
|
117,37
|
1.456,14
|
# 4 .Mapa de precio de alquiler por m2
val <- rast("C:/Users/pcusu/Desktop/Tesis Di Tella 2025/vut_sin_calles_ni_ev.tif") / 1291.81 #acá usamos el valor sin transformar
tm_shape(val) +
tm_raster(
title = "USD/m²",
palette = "viridis",
style = "quantile",
n = 5,
alpha = 0.85
) +
tm_layout(
main.title = "Precio de alquiler en USD/m² (CABA)",
main.title.size = 1.2,
legend.position = c("left", "bottom"),
legend.bg.color = "white",
legend.bg.alpha = 0.7
)

# 5. Cálculo de acceso por quintiles y procesamiento espacial
tabla_quintiles <- get_quintile_table(eph)
all_group_maps <- create_raster_groups(val, tabla_quintiles)
colores_quintiles <- rev(brewer.pal(5, "YlOrRd"))
# Añadimos el color gris para "No access" al final de la lista.
pal <- c(colores_quintiles, "#4D4D4D")
nombres_leyenda <- c("Q1", "Q2", "Q3", "Q4", "Q5", "No access")
# 6. Cálculo de frecuencias y Gini
freq_list <- map(1:nlyr(all_group_maps), ~{
freq_data <- freq(all_group_maps[[.x]])
data.frame(Group = names(all_group_maps)[.x], value = freq_data$value, count = freq_data$count)
})
a <- do.call(rbind, freq_list) %>%
filter(!is.na(value)) %>%
mutate(
value = factor(value, levels = c("Q1", "Q2", "Q3", "Q4", "Q5", "No access"))
) %>%
pivot_wider(names_from = value, values_from = count, values_fill = 0)
expected_cols <- c("Q1", "Q2", "Q3", "Q4", "Q5", "No access")
for (col in expected_cols) {
if (!col %in% names(a)) { a[[col]] <- 0 }
}
a <- a %>% mutate(across(-Group, ~ . * (100/1000000)))
a$Total <- rowSums(a %>% select(-Group))
a <- a %>% mutate(Income_Dedication = case_when(grepl("0-30%", Group) ~ "0-30%", grepl("31-50%", Group) ~ "31-50%", grepl("50-70%", Group) ~ "50-70%"))
gini <- a %>%
select(-Total) %>%
mutate(across(c(Q1, Q2, Q3, Q4, Q5, `No access`), ~ ./rowSums(across(c(Q1, Q2, Q3, Q4, Q5, `No access`)))))
# 7. Tabla de porcentajes y Gini por Quintil
gini_coefs <- map_dbl(1:nrow(gini), function(i) {
values <- as.numeric(gini[i, c("Q1", "Q2", "Q3", "Q4", "Q5", "No access")]) ## AJUSTE
if(sum(values) > 0) { ineq(values, type="Gini") } else { NA_real_ }
})
gini_table <- gini %>%
mutate(across(c(Q1, Q2, Q3, Q4, Q5, `No access`), ~scales::percent(., accuracy=0.01))) %>% ## AJUSTE
mutate(`Dedicación de Ingreso` = Income_Dedication, Gini = round(gini_coefs, 3)) %>%
select(`Dedicación de Ingreso`, Q1, Q2, Q3, Q4, Q5, `No access`, Gini)
kable(gini_table,
caption = "Porcentaje de accesibilidad al alquiler por dedicación de ingresos y quintil",
format = "html",
align = c('l', rep('r', 8))) %>%
kable_styling(bootstrap_options = c("striped", "hover"))
Porcentaje de accesibilidad al alquiler por dedicación de ingresos y
quintil
|
Dedicación de Ingreso
|
Q1
|
Q2
|
Q3
|
Q4
|
Q5
|
No access
|
Gini
|
|
0-30%
|
0.00%
|
1.62%
|
0.76%
|
37.06%
|
59.18%
|
1.38%
|
0.675
|
|
31-50%
|
0.23%
|
12.35%
|
23.38%
|
62.47%
|
1.58%
|
0.00%
|
0.654
|
|
50-70%
|
1.62%
|
34.34%
|
57.50%
|
6.01%
|
0.54%
|
0.00%
|
0.655
|
# 8. Visualización: Mapas
map_list <- create_maps(all_group_maps, pal, nombres_leyenda)
tmap_arrange(map_list$maps[[1]], map_list$maps[[2]], map_list$maps[[3]], ncol=3)

print(map_list$legend)

# 9. Visualización: Curvas de Lorenz
lorenz_data <- create_lorenz_data(gini)
lorenz_plot <- create_lorenz_plot(lorenz_data)
print(lorenz_plot)
