# Configura el entorno y carga las librerĆas necesarias
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
library(Hmisc)
library(dplyr)
library(tidyr)
library(purrr)
library(ineq)
library(terra)
library(ggplot2)
library(tmap)
library(eph)
library(tidyverse)
library(kableExtra)
library(scales)
library(spatstat.geom)
library(RColorBrewer)
library(viridis)
# 1. Funciones auxiliares
get_quintile_table <- function(df) {
calculate_quintile_stats <- function(data, weights, income) {
quintile_cuts <- wtd.quantile(income, weights = weights, probs = seq(0.2, 0.8, 0.2))
q1_median <- weighted.median(income[income <= quintile_cuts[1]], w = weights[income <= quintile_cuts[1]], na.rm = TRUE)
q2_median <- weighted.median(income[income > quintile_cuts[1] & income <= quintile_cuts[2]], w = weights[income > quintile_cuts[1] & income <= quintile_cuts[2]], na.rm = TRUE)
q3_median <- weighted.median(income[income > quintile_cuts[2] & income <= quintile_cuts[3]], w = weights[income > quintile_cuts[2] & income <= quintile_cuts[3]], na.rm = TRUE)
q4_median <- weighted.median(income[income > quintile_cuts[3] & income <= quintile_cuts[4]], w = weights[income > quintile_cuts[3] & income <= quintile_cuts[4]], na.rm = TRUE)
q5_median <- weighted.median(income[income > quintile_cuts[4]], w = weights[income > quintile_cuts[4]], na.rm = TRUE)
c(Q1 = q1_median/1291, Q2 = q2_median/1291, Q3 = q3_median/1291, Q4 = q4_median/1291, Q5 = q5_median/1291)
}
results <- calculate_quintile_stats(df, df$PONDIH, df$P47T)
tibble(!!!results, n = sum(df$PONDIH))
}
make_categorical <- function(val, medians, umbral) {
medians <- as.numeric(medians)
q1 <- medians[1] * umbral
q2 <- medians[2] * umbral
q3 <- medians[3] * umbral
q4 <- medians[4] * umbral
q5 <- medians[5] * umbral
val_transformed <- val * 0.9289 * 35
m <- matrix(c(-Inf, q1, 1, q1, q2, 2, q2, q3, 3, q3, q4, 4, q4, q5, 5, q5, Inf, 6), ncol=3, byrow=TRUE)
result <- classify(val_transformed, m)
return(result)
}
create_raster_groups <- function(val, tabla, group_name) {
medians <- tabla %>% select(Q1, Q2, Q3, Q4, Q5)
medians <- as.numeric(medians[1, ])
thresholds <- c(0.3, 0.5, 0.7)
name_suffix <- c("0-30%", "31-50%", "50-70%")
result_list <- vector("list", length(thresholds))
for(i in seq_along(thresholds)) {
rast <- make_categorical(val, medians, thresholds[i])
cats <- data.frame(ID = 1:6, category = c("Q1", "Q2", "Q3", "Q4", "Q5", "No access"))
levels(rast) <- cats
result_list[[i]] <- rast
}
out <- do.call(c, result_list)
names(out) <- sprintf("%s_%s", group_name, name_suffix)
return(out)
}
create_lorenz_data <- function(gini_df) {
map_df(1:nrow(gini_df), function(i) {
grupo <- gini_df$Grupo[i]
income_dedication <- gini_df$Income_Dedication[i]
values <- as.numeric(gini_df[i, setdiff(names(gini_df), c("Grupo", "Income_Dedication", "Gini"))])
if(sum(values, na.rm = TRUE) > 0) {
l_curve <- Lc(values)
data.frame(
Grupo = grupo,
Income_Dedication = income_dedication,
Cumulative_Population_Share = l_curve$p,
Cumulative_Accessibility_Share = l_curve$L
)
} else { NULL }
})
}
create_lorenz_plot_comparative <- function(lorenz_data) {
line_data <- distinct(lorenz_data, Income_Dedication)
ggplot(lorenz_data, aes(x = Cumulative_Population_Share, y = Cumulative_Accessibility_Share, color = Grupo)) +
geom_abline(data = line_data, aes(intercept = 0, slope = 1), linetype = "dashed", color = "grey", inherit.aes = FALSE) +
geom_line(size = 1.2) +
facet_wrap(~ Income_Dedication) +
labs(
x = "Proporción acumulada de población",
y = "Proporción acumulada de accesibilidad",
color = "Grupo de Trabajadores"
) +
scale_color_manual(values = c("Formales" = "blue", "Informales" = "red")) +
scale_x_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.2)) +
scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.2)) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
axis.title = element_text(size = 10),
axis.text = element_text(size = 8),
legend.position = "bottom",
strip.text = element_text(size = 11, face = "bold"),
panel.spacing = unit(1, "lines"),
aspect.ratio = 1
)
}
create_maps <- function(all_groups, pal, nombres_leyenda) {
maps <- map(1:nlyr(all_groups), ~{
tm_shape(all_groups[[.x]]) +
tm_raster(palette = pal, legend.show = FALSE, alpha = 0.8) +
tm_layout(main.title = names(all_groups)[.x], main.title.size = 1.2) +
tm_legend(show = FALSE)
})
names(maps) <- names(all_groups)
legend <- tm_shape(all_groups[[1]]) +
tm_raster(palette = pal, title = "Quintiles", labels = nombres_leyenda,
legend.show = TRUE, legend.is.portrait = FALSE) +
tm_layout(legend.only = TRUE)
list(maps = maps, legend = legend)
}
# 2. Filtrado de microdatos
assign("has_internet_via_proxy", TRUE, environment(curl::has_internet))
eph_raw <- get_microdata(2024, 4, "individual")
eph_base <- eph_raw %>%
mutate(
clase = case_when(
CAT_OCUP == 2 ~ "Cuentapropista",
CAT_OCUP == 3 & PP07G1 == 1 & PP07G2 == 1 & PP07G3 == 1 & PP07G4 == 1 ~ "Asalariado formal",
CAT_OCUP == 3 ~ "Asalariado informal",
TRUE ~ "Otro"
)
) %>%
filter(
CH06 >= 18, CH06 <= 65,
AGLOMERADO == 32,
P47T > 0
)
# Grupo 1: Trabajadores Informales
eph_informales <- eph_base %>%
filter(clase %in% c("Cuentapropista", "Asalariado informal")) %>%
mutate(grupo_analisis = "Trabajadores Informales")
# Grupo 2: Trabajadores Formales
eph_formales <- eph_base %>%
filter(clase == "Asalariado formal") %>%
mutate(grupo_analisis = "Trabajadores Formales")
# 3. Tabla Comparativa de Quintiles
generar_tabla_conteo <- function(df, nombre_grupo) {
quintile_breaks <- Hmisc::wtd.quantile(df$P47T, weights = df$PONDIH, probs = seq(0, 1, 0.2), na.rm = TRUE)
df %>%
mutate(quintil = cut(P47T, breaks = quintile_breaks, labels = paste0("Quintil ", 1:5), include.lowest = TRUE)) %>%
group_by(quintil) %>%
summarise(`Población Estimada` = sum(PONDIH), .groups = 'drop') %>%
mutate(Grupo = nombre_grupo)
}
# Generar tablas para ambos grupos y unirlas
tabla_informales <- generar_tabla_conteo(eph_informales, "Informales")
tabla_formales <- generar_tabla_conteo(eph_formales, "Formales")
tabla_comparativa_quintiles <- bind_rows(tabla_informales, tabla_formales) %>%
select(Grupo, quintil, `Población Estimada`)
kable(tabla_comparativa_quintiles,
caption = "Distribución de la Población por Quintil de Ingreso (P47T) - Comparativa",
digits = 0,
format.args = list(big.mark = ".", decimal.mark = ",")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
group_rows(index = c("Informales" = 5, "Formales" = 5))
Distribución de la Población por Quintil de Ingreso (P47T) - Comparativa
|
Grupo
|
quintil
|
Población Estimada
|
|
Informales
|
|
Informales
|
Quintil 1
|
104.516
|
|
Informales
|
Quintil 2
|
103.213
|
|
Informales
|
Quintil 3
|
97.447
|
|
Informales
|
Quintil 4
|
125.778
|
|
Informales
|
Quintil 5
|
72.528
|
|
Formales
|
|
Formales
|
Quintil 1
|
188.993
|
|
Formales
|
Quintil 2
|
170.815
|
|
Formales
|
Quintil 3
|
163.945
|
|
Formales
|
Quintil 4
|
172.394
|
|
Formales
|
Quintil 5
|
171.605
|
# 4. Estadisticas descriptivas
val_r_const <- rast("C:/Users/pcusu/Desktop/Tesis Di Tella 2025/vut_sin_calles_ni_ev.tif") / 1291.81 * 0.9289 * 35
costo_usd_m2 <- values(val_r_const, na.rm = TRUE)
tabla_costo <- tibble(
EstadĆstica = c("Media", "Mediana", "MĆnimo", "MĆ”ximo", "Desv. EstĆ”ndar"),
`Costo Acceso (USD/m²)` = c(
mean(costo_usd_m2, na.rm = TRUE), median(costo_usd_m2, na.rm = TRUE),
min(costo_usd_m2, na.rm = TRUE), max(costo_usd_m2, na.rm = TRUE),
sd(costo_usd_m2, na.rm = TRUE)
)
)
# Función para generar la tabla de estadĆsticas de ingreso
generar_tabla_ingreso <- function(eph_df) {
quintile_breaks <- Hmisc::wtd.quantile(eph_df$P47T, weights = eph_df$PONDIH, probs = seq(0, 1, 0.2), na.rm = TRUE)
eph_con_quintil <- eph_df %>%
mutate(quintil = cut(P47T, breaks = quintile_breaks, labels = paste0("Q", 1:5), include.lowest = TRUE))
umbrales <- c("30%" = 0.3, "50%" = 0.5, "70%" = 0.7)
lista_stats_ingreso <- lapply(names(umbrales), function(nombre_umbral) {
umbral_val <- umbrales[[nombre_umbral]]
eph_con_quintil %>%
mutate(ingreso_disponible = P47T * umbral_val) %>%
group_by(quintil) %>%
filter(!is.na(quintil)) %>%
summarise(
Media = as.numeric(Hmisc::wtd.mean(ingreso_disponible, weights = PONDIH, na.rm = TRUE)),
Mediana = as.numeric(Hmisc::wtd.quantile(ingreso_disponible, weights = PONDIH, probs = 0.5, na.rm = TRUE)),
MĆnimo = min(ingreso_disponible, na.rm = TRUE),
MƔximo = max(ingreso_disponible, na.rm = TRUE),
`Desv. EstƔndar` = as.numeric(sqrt(Hmisc::wtd.var(ingreso_disponible, weights = PONDIH, na.rm = TRUE))),
.groups = 'drop'
) %>%
mutate(across(where(is.numeric), ~ . / 1291)) %>%
pivot_longer(cols = -quintil, names_to = "EstadĆstica") %>%
pivot_wider(names_from = quintil, values_from = value) %>%
setNames(c("EstadĆstica", paste0(names(.)[-1], "_", nombre_umbral)))
})
reduce(lista_stats_ingreso, full_join, by = "EstadĆstica")
}
# Generar y mostrar la tabla para informales
tabla_ingreso_informales <- generar_tabla_ingreso(eph_informales)
tabla_final_informales <- left_join(tabla_costo, tabla_ingreso_informales, by = "EstadĆstica")
kable(tabla_final_informales,
digits = 2,
caption = "EstadĆsticas Descriptivas (Trabajadores Informales)",
col.names = c("EstadĆstica", "Costo Acceso (USD/m²)", rep(paste0("Q", 1:5), 3)),
format.args = list(big.mark = ".", decimal.mark = ",")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), font_size = 10) %>%
add_header_above(c(" " = 2, "Ingreso Disponible al 30%" = 5, "Ingreso Disponible al 50%" = 5, "Ingreso Disponible al 70%" = 5),
background = "#D3D3D3", color = "black", bold = TRUE) %>%
scroll_box(width = "100%")
EstadĆsticas Descriptivas (Trabajadores Informales)
|
|
Ingreso Disponible al 30%
|
Ingreso Disponible al 50%
|
Ingreso Disponible al 70%
|
|
EstadĆstica
|
Costo Acceso (USD/m²)
|
Q1
|
Q2
|
Q3
|
Q4
|
Q5
|
Q1
|
Q2
|
Q3
|
Q4
|
Q5
|
Q1
|
Q2
|
Q3
|
Q4
|
Q5
|
|
Media
|
287,84
|
53,22
|
120,98
|
175,83
|
284,42
|
772,42
|
88,71
|
201,63
|
293,04
|
474,03
|
1.287,36
|
124,19
|
282,28
|
410,26
|
663,64
|
1.802,31
|
|
Mediana
|
292,39
|
46,48
|
116,19
|
162,66
|
278,85
|
487,99
|
77,46
|
193,65
|
271,11
|
464,76
|
813,32
|
108,44
|
271,11
|
379,55
|
650,66
|
1.138,65
|
|
MĆnimo
|
67,80
|
1,39
|
95,27
|
144,07
|
222,62
|
371,80
|
2,32
|
158,79
|
240,12
|
371,03
|
619,67
|
3,25
|
222,31
|
336,17
|
519,44
|
867,54
|
|
MƔximo
|
783,99
|
92,95
|
139,43
|
220,76
|
348,57
|
3.834,24
|
154,92
|
232,38
|
367,93
|
580,95
|
6.390,40
|
216,89
|
325,33
|
515,10
|
813,32
|
8.946,55
|
|
Desv. EstƔndar
|
78,65
|
27,13
|
13,02
|
22,19
|
50,30
|
624,06
|
45,21
|
21,70
|
36,98
|
83,84
|
1.040,10
|
63,30
|
30,39
|
51,78
|
117,37
|
1.456,14
|
# 5. EstadĆsticas Descriptivas: Trabajadores Formales
# Generar y mostrar la tabla para formales
tabla_ingreso_formales <- generar_tabla_ingreso(eph_formales)
tabla_final_formales <- left_join(tabla_costo, tabla_ingreso_formales, by = "EstadĆstica")
kable(tabla_final_formales,
digits = 2,
caption = "EstadĆsticas Descriptivas (Trabajadores Formales)",
col.names = c("EstadĆstica", "Costo Acceso (USD/m²)", rep(paste0("Q", 1:5), 3)),
format.args = list(big.mark = ".", decimal.mark = ",")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), font_size = 10) %>%
add_header_above(c(" " = 2, "Ingreso Disponible al 30%" = 5, "Ingreso Disponible al 50%" = 5, "Ingreso Disponible al 70%" = 5),
background = "#D3D3D3", color = "black", bold = TRUE) %>%
scroll_box(width = "100%")
EstadĆsticas Descriptivas (Trabajadores Formales)
|
|
Ingreso Disponible al 30%
|
Ingreso Disponible al 50%
|
Ingreso Disponible al 70%
|
|
EstadĆstica
|
Costo Acceso (USD/m²)
|
Q1
|
Q2
|
Q3
|
Q4
|
Q5
|
Q1
|
Q2
|
Q3
|
Q4
|
Q5
|
Q1
|
Q2
|
Q3
|
Q4
|
Q5
|
|
Media
|
287,84
|
128,16
|
195,92
|
251,76
|
365,38
|
674,54
|
213,60
|
326,53
|
419,59
|
608,97
|
1.124,24
|
299,04
|
457,14
|
587,43
|
852,55
|
1.573,93
|
|
Mediana
|
292,39
|
139,43
|
191,71
|
255,62
|
348,57
|
580,95
|
232,38
|
319,52
|
426,03
|
580,95
|
968,24
|
325,33
|
447,33
|
596,44
|
813,32
|
1.355,54
|
|
MĆnimo
|
67,80
|
23,24
|
176,61
|
211,46
|
295,12
|
453,14
|
38,73
|
294,35
|
352,44
|
491,87
|
755,23
|
54,22
|
412,08
|
493,42
|
688,61
|
1.057,32
|
|
MƔximo
|
783,99
|
174,28
|
209,14
|
290,47
|
441,52
|
1.859,02
|
290,47
|
348,57
|
484,12
|
735,86
|
3.098,37
|
406,66
|
487,99
|
677,77
|
1.030,21
|
4.337,72
|
|
Desv. EstƔndar
|
78,65
|
36,82
|
11,03
|
21,57
|
36,68
|
307,94
|
61,37
|
18,38
|
35,95
|
61,14
|
513,24
|
85,91
|
25,73
|
50,33
|
85,59
|
718,53
|
# 6. Visualización del Valor del Suelo
val <- rast("C:/Users/pcusu/Desktop/Tesis Di Tella 2025/vut_sin_calles_ni_ev.tif") / 1291.81
tm_shape(val) +
tm_raster(
title = "USD/m²",
palette = "viridis",
style = "quantile",
n = 5,
alpha = 0.85
) +
tm_layout(
main.title = "Valor de alquiler en USD/m² (CABA)",
main.title.size = 1.2,
legend.position = c("left", "bottom"),
legend.bg.color = "white",
legend.bg.alpha = 0.7
)

# 7.Analisis de acceso para ambos grupos (formales e informales)
# Para Informales
tabla_quintiles_informales <- get_quintile_table(eph_informales)
mapas_informales <- create_raster_groups(val, tabla_quintiles_informales, "Informales")
# Para Formales
tabla_quintiles_formales <- get_quintile_table(eph_formales)
mapas_formales <- create_raster_groups(val, tabla_quintiles_formales, "Formales")
# Paleta y leyenda
colores_quintiles <- rev(brewer.pal(5, "YlOrRd"))
pal <- c(colores_quintiles, "#4D4D4D")
nombres_leyenda <- c("Q1", "Q2", "Q3", "Q4", "Q5", "No access")
# 8.Función para calcular Gini y frecuencias adaptada a ambos grupos
calcular_gini_freq <- function(raster_group, group_prefix) {
freq_list <- map(1:nlyr(raster_group), ~{
freq_data <- freq(raster_group[[.x]])
data.frame(Group = names(raster_group)[.x], value = freq_data$value, count = freq_data$count)
})
df_a <- do.call(rbind, freq_list) %>%
filter(!is.na(value)) %>%
mutate(value = factor(value, levels = c("Q1", "Q2", "Q3", "Q4", "Q5", "No access"))) %>%
pivot_wider(names_from = value, values_from = count, values_fill = 0)
expected_cols <- c("Q1", "Q2", "Q3", "Q4", "Q5", "No access")
for (col in expected_cols) {
if (!col %in% names(df_a)) { df_a[[col]] <- 0 }
}
# Calcula las proporciones
df_gini_data <- df_a %>%
select(-Group) %>%
mutate(across(everything(), ~ . / rowSums(across(everything()))))
gini_coefs <- map_dbl(1:nrow(df_gini_data), ~ineq(as.numeric(df_gini_data[.x, ]), type="Gini"))
df_gini_data %>%
mutate(
Income_Dedication = c("0-30%", "31-50%", "50-70%"),
Gini = round(gini_coefs, 3),
Grupo = group_prefix
) %>%
select(Grupo, Income_Dedication, Q1, Q2, Q3, Q4, Q5, `No access`, Gini)
}
# Calcular para ambos grupos y unir
gini_informales <- calcular_gini_freq(mapas_informales, "Informales")
gini_formales <- calcular_gini_freq(mapas_formales, "Formales")
gini_comparativa <- bind_rows(gini_informales, gini_formales)
# 9. Tabla de Resultados Comparativa (Gini y Porcentajes de acceso)
gini_table_comparativa <- gini_comparativa %>%
mutate(across(c(Q1, Q2, Q3, Q4, Q5, `No access`), ~scales::percent(., accuracy=0.01))) %>%
rename(`Dedicación de Ingreso` = Income_Dedication)
kable(gini_table_comparativa,
caption = "Porcentaje de acceso al alquiler e indice Gini por Grupo de Trabajadores",
format = "html",
align = c('l', 'l', rep('r', 7))) %>%
kable_styling(bootstrap_options = c("striped", "hover")) %>%
group_rows(index = c("Informales" = 3, "Formales" = 3))
Porcentaje de acceso al alquiler e indice Gini por Grupo de Trabajadores
|
Grupo
|
Dedicación de Ingreso
|
Q1
|
Q2
|
Q3
|
Q4
|
Q5
|
No access
|
Gini
|
|
Informales
|
|
Informales
|
0-30%
|
0.00%
|
1.62%
|
0.76%
|
37.06%
|
59.18%
|
1.38%
|
0.675
|
|
Informales
|
31-50%
|
0.23%
|
12.35%
|
23.38%
|
62.47%
|
1.58%
|
0.00%
|
0.654
|
|
Informales
|
50-70%
|
1.62%
|
34.34%
|
57.50%
|
6.01%
|
0.54%
|
0.00%
|
0.655
|
|
Formales
|
|
Formales
|
0-30%
|
1.74%
|
8.82%
|
17.35%
|
57.44%
|
13.91%
|
0.74%
|
0.559
|
|
Formales
|
31-50%
|
21.29%
|
49.57%
|
26.34%
|
2.05%
|
0.74%
|
0.00%
|
0.573
|
|
Formales
|
50-70%
|
75.20%
|
22.74%
|
1.47%
|
0.59%
|
0.00%
|
0.00%
|
0.742
|
# 10. Visualización Comparativa de Mapas de Accesibilidad
# Crear listas de mapas para cada grupo
map_list_informales <- create_maps(mapas_informales, pal, nombres_leyenda)
map_list_formales <- create_maps(mapas_formales, pal, nombres_leyenda)
# Organizar todos los mapas en una sola grilla
mapas_combinados <- tmap_arrange(
map_list_informales$maps[[1]], map_list_formales$maps[[1]],
map_list_informales$maps[[2]], map_list_formales$maps[[2]],
map_list_informales$maps[[3]], map_list_formales$maps[[3]],
ncol = 2, nrow = 3
)
# Mostrar la grilla de mapas y luego la leyenda
print(mapas_combinados)

print(map_list_informales$legend)

# 11. Visualización Comparativa de Curvas de Lorenz
# Ejecución
lorenz_data_comparative <- create_lorenz_data(gini_comparativa)
lorenz_plot_comparative <- create_lorenz_plot_comparative(lorenz_data_comparative)
print(lorenz_plot_comparative)
