# Configura el entorno y carga las librerĆ­as necesarias
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
library(Hmisc)
library(dplyr)
library(tidyr)
library(purrr)
library(ineq)
library(terra)
library(ggplot2)
library(tmap)
library(eph)
library(tidyverse)
library(kableExtra)
library(scales)
library(spatstat.geom)
library(RColorBrewer)
library(viridis)
# 1. Funciones auxiliares
get_quintile_table <- function(df) {
  calculate_quintile_stats <- function(data, weights, income) {
    quintile_cuts <- wtd.quantile(income, weights = weights, probs = seq(0.2, 0.8, 0.2))
    q1_median <- weighted.median(income[income <= quintile_cuts[1]], w = weights[income <= quintile_cuts[1]], na.rm = TRUE)
    q2_median <- weighted.median(income[income > quintile_cuts[1] & income <= quintile_cuts[2]], w = weights[income > quintile_cuts[1] & income <= quintile_cuts[2]], na.rm = TRUE)
    q3_median <- weighted.median(income[income > quintile_cuts[2] & income <= quintile_cuts[3]], w = weights[income > quintile_cuts[2] & income <= quintile_cuts[3]], na.rm = TRUE)
    q4_median <- weighted.median(income[income > quintile_cuts[3] & income <= quintile_cuts[4]], w = weights[income > quintile_cuts[3] & income <= quintile_cuts[4]], na.rm = TRUE)
    q5_median <- weighted.median(income[income > quintile_cuts[4]], w = weights[income > quintile_cuts[4]], na.rm = TRUE)
    c(Q1 = q1_median/1291, Q2 = q2_median/1291, Q3 = q3_median/1291, Q4 = q4_median/1291, Q5 = q5_median/1291)
  }
  results <- calculate_quintile_stats(df, df$PONDIH, df$P47T)
  tibble(!!!results, n = sum(df$PONDIH))
}

make_categorical <- function(val, medians, umbral) {
  medians <- as.numeric(medians)
  q1 <- medians[1] * umbral
  q2 <- medians[2] * umbral
  q3 <- medians[3] * umbral
  q4 <- medians[4] * umbral
  q5 <- medians[5] * umbral
  val_transformed <- val * 0.9289 * 35 
  m <- matrix(c(-Inf, q1, 1, q1, q2, 2, q2, q3, 3, q3, q4, 4, q4, q5, 5, q5, Inf, 6), ncol=3, byrow=TRUE)
  result <- classify(val_transformed, m)
  return(result)
}

create_raster_groups <- function(val, tabla, group_name) {
  medians <- tabla %>% select(Q1, Q2, Q3, Q4, Q5)
  medians <- as.numeric(medians[1, ])
  thresholds <- c(0.3, 0.5, 0.7)
  name_suffix <- c("0-30%", "31-50%", "50-70%")
  result_list <- vector("list", length(thresholds))
  for(i in seq_along(thresholds)) {
    rast <- make_categorical(val, medians, thresholds[i])
    cats <- data.frame(ID = 1:6, category = c("Q1", "Q2", "Q3", "Q4", "Q5", "No access"))
    levels(rast) <- cats
    result_list[[i]] <- rast
  }
  out <- do.call(c, result_list)
  names(out) <- sprintf("%s_%s", group_name, name_suffix)
  return(out)
}

create_lorenz_data <- function(gini_df) {
  map_df(1:nrow(gini_df), function(i) {
    grupo <- gini_df$Grupo[i]
    income_dedication <- gini_df$Income_Dedication[i]
    values <- as.numeric(gini_df[i, setdiff(names(gini_df), c("Grupo", "Income_Dedication", "Gini"))])
    if(sum(values, na.rm = TRUE) > 0) {
      l_curve <- Lc(values)
      data.frame(
        Grupo = grupo,
        Income_Dedication = income_dedication,
        Cumulative_Population_Share = l_curve$p, 
        Cumulative_Accessibility_Share = l_curve$L
      )
    } else { NULL }
  })
}

create_lorenz_plot_comparative <- function(lorenz_data) {
  line_data <- distinct(lorenz_data, Income_Dedication)
  
  ggplot(lorenz_data, aes(x = Cumulative_Population_Share, y = Cumulative_Accessibility_Share, color = Grupo)) +
    geom_abline(data = line_data, aes(intercept = 0, slope = 1), linetype = "dashed", color = "grey", inherit.aes = FALSE) +
    geom_line(size = 1.2) + 
    facet_wrap(~ Income_Dedication) +
    labs(
      x = "Proporción acumulada de población", 
      y = "Proporción acumulada de accesibilidad",
      color = "Grupo de Trabajadores"
    ) +
    scale_color_manual(values = c("Formales" = "blue", "Informales" = "red")) +
    scale_x_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.2)) +
    scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.2)) +
    theme_minimal() + 
    theme(
      plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
      axis.title = element_text(size = 10),
      axis.text = element_text(size = 8),
      legend.position = "bottom",
      strip.text = element_text(size = 11, face = "bold"),
      panel.spacing = unit(1, "lines"),
      aspect.ratio = 1
    )
}

create_maps <- function(all_groups, pal, nombres_leyenda) {
  maps <- map(1:nlyr(all_groups), ~{
    tm_shape(all_groups[[.x]]) + 
      tm_raster(palette = pal, legend.show = FALSE, alpha = 0.8) + 
      tm_layout(main.title = names(all_groups)[.x], main.title.size = 1.2) + 
      tm_legend(show = FALSE)
  })
  names(maps) <- names(all_groups)
  
  legend <- tm_shape(all_groups[[1]]) +
    tm_raster(palette = pal, title = "Quintiles", labels = nombres_leyenda,
              legend.show = TRUE, legend.is.portrait = FALSE) +
    tm_layout(legend.only = TRUE)
    
  list(maps = maps, legend = legend)
}
# 2. Filtrado de microdatos

assign("has_internet_via_proxy", TRUE, environment(curl::has_internet))
eph_raw <- get_microdata(2024, 4, "individual")

eph_base <- eph_raw %>%
  mutate(
    clase = case_when(
      CAT_OCUP == 2 ~ "Cuentapropista",
      CAT_OCUP == 3 & PP07G1 == 1 & PP07G2 == 1 & PP07G3 == 1 & PP07G4 == 1 ~ "Asalariado formal",
      CAT_OCUP == 3 ~ "Asalariado informal",
      TRUE ~ "Otro"
    )
  ) %>%
  filter(
    CH06 >= 18, CH06 <= 65,
    AGLOMERADO == 32,
    P47T > 0
  )

# Grupo 1: Trabajadores Informales
eph_informales <- eph_base %>%
  filter(clase %in% c("Cuentapropista", "Asalariado informal")) %>%
  mutate(grupo_analisis = "Trabajadores Informales")

# Grupo 2: Trabajadores Formales
eph_formales <- eph_base %>%
  filter(clase == "Asalariado formal") %>%
  mutate(grupo_analisis = "Trabajadores Formales")
# 3. Tabla Comparativa de Quintiles

generar_tabla_conteo <- function(df, nombre_grupo) {
  quintile_breaks <- Hmisc::wtd.quantile(df$P47T, weights = df$PONDIH, probs = seq(0, 1, 0.2), na.rm = TRUE)
  df %>%
    mutate(quintil = cut(P47T, breaks = quintile_breaks, labels = paste0("Quintil ", 1:5), include.lowest = TRUE)) %>%
    group_by(quintil) %>%
    summarise(`Población Estimada` = sum(PONDIH), .groups = 'drop') %>%
    mutate(Grupo = nombre_grupo)
}

# Generar tablas para ambos grupos y unirlas
tabla_informales <- generar_tabla_conteo(eph_informales, "Informales")
tabla_formales <- generar_tabla_conteo(eph_formales, "Formales")
tabla_comparativa_quintiles <- bind_rows(tabla_informales, tabla_formales) %>%
  select(Grupo, quintil, `Población Estimada`)

kable(tabla_comparativa_quintiles,
      caption = "Distribución de la Población por Quintil de Ingreso (P47T) - Comparativa",
      digits = 0,
      format.args = list(big.mark = ".", decimal.mark = ",")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  group_rows(index = c("Informales" = 5, "Formales" = 5))
Distribución de la Población por Quintil de Ingreso (P47T) - Comparativa
Grupo quintil Población Estimada
Informales
Informales Quintil 1 104.516
Informales Quintil 2 103.213
Informales Quintil 3 97.447
Informales Quintil 4 125.778
Informales Quintil 5 72.528
Formales
Formales Quintil 1 188.993
Formales Quintil 2 170.815
Formales Quintil 3 163.945
Formales Quintil 4 172.394
Formales Quintil 5 171.605
# 4. Estadisticas descriptivas

val_r_const <- rast("C:/Users/pcusu/Desktop/Tesis Di Tella 2025/vut_sin_calles_ni_ev.tif") / 1291.81 * 0.9289 * 35
costo_usd_m2 <- values(val_r_const, na.rm = TRUE)
tabla_costo <- tibble(
  Estadƭstica = c("Media", "Mediana", "Mƭnimo", "MƔximo", "Desv. EstƔndar"),
  `Costo Acceso (USD/m²)` = c(
    mean(costo_usd_m2, na.rm = TRUE), median(costo_usd_m2, na.rm = TRUE),
    min(costo_usd_m2, na.rm = TRUE), max(costo_usd_m2, na.rm = TRUE),
    sd(costo_usd_m2, na.rm = TRUE)
  )
)

# Función para generar la tabla de estadísticas de ingreso
generar_tabla_ingreso <- function(eph_df) {
  quintile_breaks <- Hmisc::wtd.quantile(eph_df$P47T, weights = eph_df$PONDIH, probs = seq(0, 1, 0.2), na.rm = TRUE)
  eph_con_quintil <- eph_df %>%
    mutate(quintil = cut(P47T, breaks = quintile_breaks, labels = paste0("Q", 1:5), include.lowest = TRUE))
  
  umbrales <- c("30%" = 0.3, "50%" = 0.5, "70%" = 0.7)
  
  lista_stats_ingreso <- lapply(names(umbrales), function(nombre_umbral) {
    umbral_val <- umbrales[[nombre_umbral]]
    
    eph_con_quintil %>%
      mutate(ingreso_disponible = P47T * umbral_val) %>%
      group_by(quintil) %>%
      filter(!is.na(quintil)) %>%
      summarise(
        Media = as.numeric(Hmisc::wtd.mean(ingreso_disponible, weights = PONDIH, na.rm = TRUE)),
        Mediana = as.numeric(Hmisc::wtd.quantile(ingreso_disponible, weights = PONDIH, probs = 0.5, na.rm = TRUE)),
        MĆ­nimo = min(ingreso_disponible, na.rm = TRUE),
        MƔximo = max(ingreso_disponible, na.rm = TRUE),
        `Desv. EstƔndar` = as.numeric(sqrt(Hmisc::wtd.var(ingreso_disponible, weights = PONDIH, na.rm = TRUE))),
        .groups = 'drop'
      ) %>%
      mutate(across(where(is.numeric), ~ . / 1291)) %>%
      pivot_longer(cols = -quintil, names_to = "EstadĆ­stica") %>%
      pivot_wider(names_from = quintil, values_from = value) %>%
      setNames(c("EstadĆ­stica", paste0(names(.)[-1], "_", nombre_umbral)))
  })
  
  reduce(lista_stats_ingreso, full_join, by = "EstadĆ­stica")
}

# Generar y mostrar la tabla para informales
tabla_ingreso_informales <- generar_tabla_ingreso(eph_informales)
tabla_final_informales <- left_join(tabla_costo, tabla_ingreso_informales, by = "EstadĆ­stica")

kable(tabla_final_informales, 
      digits = 2, 
      caption = "EstadĆ­sticas Descriptivas (Trabajadores Informales)",
      col.names = c("Estadística", "Costo Acceso (USD/m²)", rep(paste0("Q", 1:5), 3)),
      format.args = list(big.mark = ".", decimal.mark = ",")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), font_size = 10) %>%
  add_header_above(c(" " = 2, "Ingreso Disponible al 30%" = 5, "Ingreso Disponible al 50%" = 5, "Ingreso Disponible al 70%" = 5),
                   background = "#D3D3D3", color = "black", bold = TRUE) %>%
  scroll_box(width = "100%")
EstadĆ­sticas Descriptivas (Trabajadores Informales)
Ingreso Disponible al 30%
Ingreso Disponible al 50%
Ingreso Disponible al 70%
Estadística Costo Acceso (USD/m²) Q1 Q2 Q3 Q4 Q5 Q1 Q2 Q3 Q4 Q5 Q1 Q2 Q3 Q4 Q5
Media 287,84 53,22 120,98 175,83 284,42 772,42 88,71 201,63 293,04 474,03 1.287,36 124,19 282,28 410,26 663,64 1.802,31
Mediana 292,39 46,48 116,19 162,66 278,85 487,99 77,46 193,65 271,11 464,76 813,32 108,44 271,11 379,55 650,66 1.138,65
MĆ­nimo 67,80 1,39 95,27 144,07 222,62 371,80 2,32 158,79 240,12 371,03 619,67 3,25 222,31 336,17 519,44 867,54
MƔximo 783,99 92,95 139,43 220,76 348,57 3.834,24 154,92 232,38 367,93 580,95 6.390,40 216,89 325,33 515,10 813,32 8.946,55
Desv. EstƔndar 78,65 27,13 13,02 22,19 50,30 624,06 45,21 21,70 36,98 83,84 1.040,10 63,30 30,39 51,78 117,37 1.456,14
# 5. EstadĆ­sticas Descriptivas: Trabajadores Formales

# Generar y mostrar la tabla para formales
tabla_ingreso_formales <- generar_tabla_ingreso(eph_formales)
tabla_final_formales <- left_join(tabla_costo, tabla_ingreso_formales, by = "EstadĆ­stica")

kable(tabla_final_formales, 
      digits = 2, 
      caption = "EstadĆ­sticas Descriptivas (Trabajadores Formales)",
      col.names = c("Estadística", "Costo Acceso (USD/m²)", rep(paste0("Q", 1:5), 3)),
      format.args = list(big.mark = ".", decimal.mark = ",")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), font_size = 10) %>%
  add_header_above(c(" " = 2, "Ingreso Disponible al 30%" = 5, "Ingreso Disponible al 50%" = 5, "Ingreso Disponible al 70%" = 5),
                   background = "#D3D3D3", color = "black", bold = TRUE) %>%
  scroll_box(width = "100%")
EstadĆ­sticas Descriptivas (Trabajadores Formales)
Ingreso Disponible al 30%
Ingreso Disponible al 50%
Ingreso Disponible al 70%
Estadística Costo Acceso (USD/m²) Q1 Q2 Q3 Q4 Q5 Q1 Q2 Q3 Q4 Q5 Q1 Q2 Q3 Q4 Q5
Media 287,84 128,16 195,92 251,76 365,38 674,54 213,60 326,53 419,59 608,97 1.124,24 299,04 457,14 587,43 852,55 1.573,93
Mediana 292,39 139,43 191,71 255,62 348,57 580,95 232,38 319,52 426,03 580,95 968,24 325,33 447,33 596,44 813,32 1.355,54
MĆ­nimo 67,80 23,24 176,61 211,46 295,12 453,14 38,73 294,35 352,44 491,87 755,23 54,22 412,08 493,42 688,61 1.057,32
MƔximo 783,99 174,28 209,14 290,47 441,52 1.859,02 290,47 348,57 484,12 735,86 3.098,37 406,66 487,99 677,77 1.030,21 4.337,72
Desv. EstƔndar 78,65 36,82 11,03 21,57 36,68 307,94 61,37 18,38 35,95 61,14 513,24 85,91 25,73 50,33 85,59 718,53
# 6. Visualización del Valor del Suelo

val <- rast("C:/Users/pcusu/Desktop/Tesis Di Tella 2025/vut_sin_calles_ni_ev.tif") / 1291.81

tm_shape(val) +
  tm_raster(
    title = "USD/m²", 
    palette = "viridis",      
    style = "quantile", 
    n = 5, 
    alpha = 0.85             
  ) +
  tm_layout(
    main.title = "Valor de alquiler en USD/m² (CABA)",
    main.title.size = 1.2,
    legend.position = c("left", "bottom"),
    legend.bg.color = "white",
    legend.bg.alpha = 0.7
  ) 

# 7.Analisis de acceso para ambos grupos (formales e informales)

# Para Informales
tabla_quintiles_informales <- get_quintile_table(eph_informales)
mapas_informales <- create_raster_groups(val, tabla_quintiles_informales, "Informales")

# Para Formales
tabla_quintiles_formales <- get_quintile_table(eph_formales)
mapas_formales <- create_raster_groups(val, tabla_quintiles_formales, "Formales")

# Paleta y leyenda
colores_quintiles <- rev(brewer.pal(5, "YlOrRd"))
pal <- c(colores_quintiles, "#4D4D4D")
nombres_leyenda <- c("Q1", "Q2", "Q3", "Q4", "Q5", "No access")
# 8.Función para calcular Gini y frecuencias adaptada a ambos grupos

calcular_gini_freq <- function(raster_group, group_prefix) {
  freq_list <- map(1:nlyr(raster_group), ~{
    freq_data <- freq(raster_group[[.x]])
    data.frame(Group = names(raster_group)[.x], value = freq_data$value, count = freq_data$count)
  })

  df_a <- do.call(rbind, freq_list) %>%
    filter(!is.na(value)) %>%
    mutate(value = factor(value, levels = c("Q1", "Q2", "Q3", "Q4", "Q5", "No access"))) %>%
    pivot_wider(names_from = value, values_from = count, values_fill = 0)
    
  expected_cols <- c("Q1", "Q2", "Q3", "Q4", "Q5", "No access")
  for (col in expected_cols) {
    if (!col %in% names(df_a)) { df_a[[col]] <- 0 }
  }
  
  # Calcula las proporciones
  df_gini_data <- df_a %>%
    select(-Group) %>%
    mutate(across(everything(), ~ . / rowSums(across(everything()))))
    
  gini_coefs <- map_dbl(1:nrow(df_gini_data), ~ineq(as.numeric(df_gini_data[.x, ]), type="Gini"))
  
  df_gini_data %>%
    mutate(
      Income_Dedication = c("0-30%", "31-50%", "50-70%"),
      Gini = round(gini_coefs, 3),
      Grupo = group_prefix
    ) %>%
    select(Grupo, Income_Dedication, Q1, Q2, Q3, Q4, Q5, `No access`, Gini)
}

# Calcular para ambos grupos y unir
gini_informales <- calcular_gini_freq(mapas_informales, "Informales")
gini_formales <- calcular_gini_freq(mapas_formales, "Formales")
gini_comparativa <- bind_rows(gini_informales, gini_formales)
# 9. Tabla de Resultados Comparativa (Gini y Porcentajes de acceso)

gini_table_comparativa <- gini_comparativa %>%
  mutate(across(c(Q1, Q2, Q3, Q4, Q5, `No access`), ~scales::percent(., accuracy=0.01))) %>%
  rename(`Dedicación de Ingreso` = Income_Dedication)

kable(gini_table_comparativa,
      caption = "Porcentaje de acceso al alquiler e indice Gini por Grupo de Trabajadores",
      format = "html",
      align = c('l', 'l', rep('r', 7))) %>%
  kable_styling(bootstrap_options = c("striped", "hover")) %>%
  group_rows(index = c("Informales" = 3, "Formales" = 3))
Porcentaje de acceso al alquiler e indice Gini por Grupo de Trabajadores
Grupo Dedicación de Ingreso Q1 Q2 Q3 Q4 Q5 No access Gini
Informales
Informales 0-30% 0.00% 1.62% 0.76% 37.06% 59.18% 1.38% 0.675
Informales 31-50% 0.23% 12.35% 23.38% 62.47% 1.58% 0.00% 0.654
Informales 50-70% 1.62% 34.34% 57.50% 6.01% 0.54% 0.00% 0.655
Formales
Formales 0-30% 1.74% 8.82% 17.35% 57.44% 13.91% 0.74% 0.559
Formales 31-50% 21.29% 49.57% 26.34% 2.05% 0.74% 0.00% 0.573
Formales 50-70% 75.20% 22.74% 1.47% 0.59% 0.00% 0.00% 0.742
# 10. Visualización Comparativa de Mapas de Accesibilidad

# Crear listas de mapas para cada grupo
map_list_informales <- create_maps(mapas_informales, pal, nombres_leyenda)
map_list_formales <- create_maps(mapas_formales, pal, nombres_leyenda)

# Organizar todos los mapas en una sola grilla
mapas_combinados <- tmap_arrange(
  map_list_informales$maps[[1]], map_list_formales$maps[[1]],
  map_list_informales$maps[[2]], map_list_formales$maps[[2]],
  map_list_informales$maps[[3]], map_list_formales$maps[[3]],
  ncol = 2, nrow = 3
)

# Mostrar la grilla de mapas y luego la leyenda
print(mapas_combinados)

print(map_list_informales$legend)

# 11. Visualización Comparativa de Curvas de Lorenz

# Ejecución
lorenz_data_comparative <- create_lorenz_data(gini_comparativa)
lorenz_plot_comparative <- create_lorenz_plot_comparative(lorenz_data_comparative)

print(lorenz_plot_comparative)