# Load required libraries
library(terra)
## Warning: package 'terra' was built under R version 4.3.3
## terra 1.7.78
library(sf)
## Linking to GEOS 3.11.2, GDAL 3.6.2, PROJ 9.2.0; sf_use_s2() is TRUE
library(eph)
## Warning: package 'eph' was built under R version 4.3.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(gglorenz)
## Warning: package 'gglorenz' was built under R version 4.3.3
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.3.3
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:terra':
## 
##     rotate
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:terra':
## 
##     intersect, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ✔ readr     2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::extract() masks terra::extract()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
# Set working directory to your project folder
setwd("C:/Users/sixto/OneDrive/Documentos/Papers/accesibilidad_alquileres")
# Load the rental value raster and display it
vut <- rast("C:/Users/sixto/OneDrive/Documentos/Papers/accesibilidad_alquileres/vut.tif")
plot(vut)

eph <- read_excel("C:/Users/sixto/OneDrive/Documentos/EPH_usu_3_Trim_2024_xls/usu_individual_T324.xlsx")
## Warning: Expecting logical in CY2065 / R2065C103: got 'paises del exterior'
## Warning: Expecting logical in CY3899 / R3899C103: got 'la plata'
## Warning: Expecting logical in CY5387 / R5387C103: got 'todo el pais'
## Warning: Expecting logical in CY5389 / R5389C103: got 'todo el pais'
## Warning: Expecting logical in CY6239 / R6239C103: got 'cordoba'
## Warning: Expecting logical in CY7666 / R7666C103: got 'mar del plata'
## Warning: Expecting logical in CY9021 / R9021C103: got 'Latinoamerica'
## Warning: Expecting logical in CY10817 / R10817C103: got 'NO ESPECIFICA'
## Warning: Expecting logical in CY11707 / R11707C103: got 'greneral belgrano'
## Warning: Expecting logical in CY14741 / R14741C103: got 'en rutas argentinas'
## Warning: Expecting logical in CY18117 / R18117C103: got 'EN EL EXTERIOR VIA
## ZOOM Y EN UN LOCAL Q ALQUILA'
## Warning: Expecting logical in CY24897 / R24897C103: got 'NO ESPECIFICA'
## Warning: Expecting logical in CY28911 / R28911C103: got 'familiar'
## Warning: Expecting logical in CY30339 / R30339C103: got 'VUELOS NACIONALES E
## INTERNACIONALES'
## Warning: Expecting logical in CY31029 / R31029C103: got 'LA PLATA'
## Warning: Expecting logical in CY31640 / R31640C103: got 'todo el pais'
## Warning: Expecting logical in CY33636 / R33636C103: got 'todo el pais'
## Warning: Expecting logical in CY33810 / R33810C103: got 'viajes nacionales e
## internacionales'
## Warning: Expecting logical in CY40240 / R40240C103: got 'Viajar por todo el
## pais'
## Warning: Expecting logical in CY43174 / R43174C103: got 'otro lado'
## Warning: Expecting logical in CY43247 / R43247C103: got 'NO ESPECIFICA'
## Warning: Expecting logical in CY44640 / R44640C103: got 'Todos lados'
eph <- eph %>% 
  dplyr::filter(P47T > 0, AGLOMERADO == 32, CH12 < 9) %>% 
  mutate(genero = recode(CH04, `1` = "masculino", `2` = "femenino"))
eph = subset(eph, CH06 >= 18 & CH06 <=35  & AGLOMERADO == 32)
# Load the census shapefile ("manzanas") and project to EPSG:3857
manzanas <- st_read("C:/Users/sixto/OneDrive/Documentos/Papers/accesibilidad_alquileres/manzanas_caba_censo_2010")
## Reading layer `manzanas_censo_2010' from data source 
##   `C:\Users\sixto\OneDrive\Documentos\Papers\accesibilidad_alquileres\manzanas_caba_censo_2010' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 13726 features and 1 field (with 1 geometry empty)
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 93844.14 ymin: 91670.86 xmax: 111752.4 ymax: 111284.4
## Projected CRS: Gauss Krugger BA
manzanas <- st_transform(manzanas, 3857)

# Load the green spaces layer and project it
ev <- st_read("https://cdn.buenosaires.gob.ar/datosabiertos/datasets/secretaria-de-desarrollo-urbano/espacios-verdes/espacio_verde_publico.geojson") 
## Reading layer `espacio_verde_publico' from data source 
##   `https://cdn.buenosaires.gob.ar/datosabiertos/datasets/secretaria-de-desarrollo-urbano/espacios-verdes/espacio_verde_publico.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 2144 features and 18 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -58.53175 ymin: -34.70557 xmax: -58.33983 ymax: -34.52657
## Geodetic CRS:  WGS 84
ev <- st_transform(ev, 3857)

# Mask the rental raster using the 'manzanas' layer and crop it
val <- mask(vut, manzanas)
val <- trim(val)
val <- crop(vut, val, mask = TRUE)
plot(val)

# Exclude green spaces (using inverse masking)
val <- mask(val, ev, inverse = TRUE)
val <- trim(val)
plot(val)

# Optionally, save the processed raster
writeRaster(val, "vut_sin_calles_ni_ev.tif", overwrite = TRUE)
compute_decile_medians <- function(data, income_col = "P47T", n_deciles = 10) {
  data %>%
    filter(.data[[income_col]] > 0) %>%
    mutate(decile = ntile(.data[[income_col]], n_deciles)) %>%
    group_by(decile) %>%
    summarise(median_income = median(.data[[income_col]], na.rm = TRUE), .groups = "drop") %>%
    arrange(decile) %>%
    pull(median_income)
}

plot_ratio_rasters <- function(raster_list, title, break_val = 0.3,
                               palette = c("darkgreen", "yellow", "red")) {
  # Determine the maximum value among all rasters
  max_val <- max(sapply(raster_list, function(r) max(values(r), na.rm = TRUE)))
  # If the maximum value is <= 0.5, set top_break to 0.51; otherwise, keep max_val
  top_break <- ifelse(max_val <= 0.5, 0.51, max_val)
  # Fixed breaks: 0, 0.3, 0.5, and the maximum (top_break)
  breaks <- c(0, 0.3, 0.5, top_break)
  
  # Layout for 10 decile plots (modify the layout if number of rasters changes)
  par(mfrow = c(2, 5), oma = c(0, 0, 2, 0))
  for (i in seq_along(raster_list)) {
    plot(raster_list[[i]], col = palette, breaks = breaks,
         main = paste("Decile", i, "Ratio"))
  }
  mtext(title, outer = TRUE, cex = 1.2)
}
# Using all individuals from eph for the total analysis
medians_total <- compute_decile_medians(eph, income_col = "P47T")

# Create and store ratio rasters for each decile (1 to 10)
ratio_rasters_total <- list()
for(i in 1:10) {
  # Calculate ratio: rental value factor divided by median income per decile
  ratio <- val * 0.9289 * 35 / medians_total[[i]]
  names(ratio) <- paste0("Decile_", i)
  
  # Save the raster if desired
  writeRaster(ratio, paste0("Decil_total_", i, ".tif"), overwrite = TRUE)
  
  ratio_rasters_total[[i]] <- ratio
}

# Plot the total analysis ratio rasters
plot_ratio_rasters(ratio_rasters_total, title = "Accessibility Ratio - Total")

# Separate data by gender
eph_male <- subset(eph, genero == "masculino")
eph_female <- subset(eph, genero == "femenino")

# --- Male Analysis ---
medians_male <- compute_decile_medians(eph_male, income_col = "P47T")
ratio_rasters_male <- list()
for(i in 1:10) {
  ratio <- val * 0.9289 * 35 / medians_male[[i]]
  names(ratio) <- paste0("Decile_", i)
  writeRaster(ratio, paste0("Decil_male_", i, ".tif"), overwrite = TRUE)
  ratio_rasters_male[[i]] <- ratio
}

# --- Female Analysis ---
medians_female <- compute_decile_medians(eph_female, income_col = "P47T")
ratio_rasters_female <- list()
for(i in 1:10) {
  ratio <- val * 0.9289 * 35 / medians_female[[i]]
  names(ratio) <- paste0("Decile_", i)
  writeRaster(ratio, paste0("Decil_female_", i, ".tif"), overwrite = TRUE)
  ratio_rasters_female[[i]] <- ratio
}

# Plot side-by-side comparisons
par(mfrow = c(2,1), oma = c(0, 0, 2, 0))
plot_ratio_rasters(ratio_rasters_male, title = "Accessibility Ratio - Males")

plot_ratio_rasters(ratio_rasters_female, title = "Accessibility Ratio - Females")

# Define age brackets
age_brackets <- list("18-23" = c(18,23),
                     "24-29" = c(24,29),
                     "30-35" = c(30,35))

# Loop through each age bracket for analysis
for(group in names(age_brackets)) {
  age_range <- age_brackets[[group]]
  
  # Subset microdata to the current age bracket
  eph_age <- subset(eph, CH06 >= age_range[1] & CH06 <= age_range[2])
  
  # Calculate median incomes by decile
  medians_age <- compute_decile_medians(eph_age, income_col = "P47T")
  
  ratio_rasters_age <- list()
  for(i in 1:10) {
    ratio <- val * 0.9289 * 35 / medians_age[[i]]
    names(ratio) <- paste0("Decile_", i)
    # Save the raster, include age group in the filename
    writeRaster(ratio, paste0("Decil_age_", group, "_", i, ".tif"), overwrite = TRUE)
    ratio_rasters_age[[i]] <- ratio
  }
  
  # Plot the ratio rasters for the current age bracket
  plot_ratio_rasters(ratio_rasters_age, title = paste("Accessibility Ratio - Age Group", group))
}

# Further breakdown by both gender and age
genders <- c("masculino", "femenino")

for(g in genders) {
  # Subset data by gender
  eph_gender <- subset(eph, genero == g)
  
  for(group in names(age_brackets)) {
    age_range <- age_brackets[[group]]
    
    # Further subset the gender-specific data by the age bracket
    eph_gender_age <- subset(eph_gender, CH06 >= age_range[1] & CH06 <= age_range[2])
    
    # Skip analysis if there are few observations in the subgroup
    if(nrow(eph_gender_age) < 10) {
      cat("Not enough data for", g, "in age group", group, "\n")
      next
    }
    
    # Calculate median incomes by decile
    medians_gender_age <- compute_decile_medians(eph_gender_age, income_col = "P47T")
    
    ratio_rasters_gender_age <- list()
    for(i in 1:10) {
      ratio <- val * 0.9289 * 35 / medians_gender_age[[i]]
      names(ratio) <- paste0("Decile_", i)
      # Save each raster: gender and age group in filename
      filename <- paste0("Decil_", g, "_", group, "_", i, ".tif")
      writeRaster(ratio, filename, overwrite = TRUE)
      ratio_rasters_gender_age[[i]] <- ratio
    }
    
    # Plot the raster results for this gender and age group
    plot_title <- paste("Accessibility Ratio -", g, "Age", group)
    plot_ratio_rasters(ratio_rasters_gender_age, title = plot_title)
  }
}

# Compute median income per decile for varones
varones <- subset(eph, CH04 == 1)
medians <- compute_decile_medians(varones)
medians  # Display medians
##  [1]  200000  430000  500000  600000  800000 1000000 1100000 1237500 1575000
## [10] 2304250
# Create rasters for each decile for varones
for(i in 1:10) {
  a = val * 0.9289 * 35 / medians[[i]]
  names(a) <- paste0("Decile ", i)
  writeRaster(a, paste0("Decil_varones_", i, ".tif"), overwrite = TRUE)
}

# Stack the 10 rasters and plot them
varones_files <- paste0("Decil_varones_", 1:10, ".tif")
varones_stack <- rast(varones_files)
plot(varones_stack, 
     col = data.frame(from = c(0, 0.3, 0.5, 0.7), 
                      to = c(0.3, 0.5, 0.7, Inf),
                      color = c("green", "yellow", "orange", "red")))

# Compute accessible area below the threshold (umbral = 0.30)
umbral = 0.30
area_m2 <- global(varones_stack < umbral, fun = "sum", na.rm = TRUE) * prod(res(varones_stack))
area_hectares <- area_m2 / 10000
area_hectares
##                sum
## Decile 1      0.00
## Decile 2    293.87
## Decile 3    479.76
## Decile 4    495.84
## Decile 5    587.17
## Decile 6   2776.99
## Decile 7   4574.59
## Decile 8   7287.58
## Decile 9  18744.35
## Decile 10 22873.73
# Compute percentage of area accessed by each decile
area_total <- ifel(not.na(val), 1, 0)
area_total <- global(area_total, fun = "sum", na.rm = TRUE) * prod(res(varones_stack)) / 10000
accesibilidad_varones <- (((global(varones_stack < umbral, fun = "sum", na.rm = TRUE) * prod(res(varones_stack)) / 10000) / as.numeric(area_total)) * 100) %>% round(2)
accesibilidad_varones
##             sum
## Decile 1   0.00
## Decile 2   1.26
## Decile 3   2.05
## Decile 4   2.12
## Decile 5   2.51
## Decile 6  11.88
## Decile 7  19.58
## Decile 8  31.19
## Decile 9  80.21
## Decile 10 97.89
# Save the accessibility results
save(accesibilidad_varones, file = "accesibilidad_varones.rda")
# Subset microdata for mujeres
mujeres <- subset(eph, CH04 == 2)
medians <- compute_decile_medians(mujeres)
medians
##  [1]  150000  300000  400000  607500  710000  800000  900000 1250000 1350000
## [10] 1500000
# Create rasters for each decile for mujeres
for(i in 1:10) {
  a = val * 0.9289 * 35 / medians[[i]]
  names(a) <- paste0("Decile ", i)
  writeRaster(a, paste0("Decil_mujeres_", i, ".tif"), overwrite = TRUE)
}

# Stack the 10 rasters and plot them
mujeres_files <- paste0("Decil_mujeres_", 1:10, ".tif")
mujeres_stack <- rast(mujeres_files)
plot(mujeres_stack, 
     col = data.frame(from = c(0, 0.3, 0.5, 0.7),
                      to = c(0.3, 0.5, 0.7, Inf),
                      color = c("green", "yellow", "orange", "red")))

# Compute accessible area for mujeres
area_m2_mujeres <- global(mujeres_stack < umbral, fun = "sum", na.rm = TRUE) * prod(res(mujeres_stack))
area_hectares_mujeres <- area_m2_mujeres / 10000
area_hectares_mujeres
##                sum
## Decile 1      0.00
## Decile 2      0.11
## Decile 3    204.52
## Decile 4    495.84
## Decile 5    495.91
## Decile 6    587.17
## Decile 7   1633.16
## Decile 8   7588.07
## Decile 9  11173.39
## Decile 10 16667.87
# Compute percentage accessible area for mujeres
area_total <- ifel(not.na(val), 1, 0)
area_total <- global(area_total, fun = "sum", na.rm = TRUE) * prod(res(mujeres_stack)) / 10000
accesibilidad_mujeres <- (((global(mujeres_stack < umbral, fun = "sum", na.rm = TRUE) * prod(res(mujeres_stack)) / 10000) / as.numeric(area_total)) * 100) %>% round(2)
accesibilidad_mujeres
##             sum
## Decile 1   0.00
## Decile 2   0.00
## Decile 3   0.88
## Decile 4   2.12
## Decile 5   2.12
## Decile 6   2.51
## Decile 7   6.99
## Decile 8  32.47
## Decile 9  47.82
## Decile 10 71.33
# Save the accessibility results for mujeres
save(accesibilidad_mujeres, file = "accesibilidad_mujeres.rda")
# Define genders to loop over
genders <- c("masculino", "femenino")

for(g in genders) {
  # Subset data by gender
  eph_gender <- subset(eph, genero == g)
  
  for(group in names(age_brackets)) {
    age_range <- age_brackets[[group]]
    
    # Further subset the gender-specific data by the age bracket
    eph_gender_age <- subset(eph_gender, CH06 >= age_range[1] & CH06 <= age_range[2])
    
    # Skip analysis if there are few observations in the subgroup
    if(nrow(eph_gender_age) < 10) {
      cat("Not enough data for", g, "in age group", group, "\n")
      next
    }
    
    # Calculate median incomes by decile
    medians_gender_age <- compute_decile_medians(eph_gender_age, income_col = "P47T")
    
    ratio_rasters_gender_age <- list()
    for(i in 1:10) {
      ratio <- val * 0.9289 * 35 / medians_gender_age[[i]]
      names(ratio) <- paste0("Decile_", i)
      # Save each raster: gender and age group in filename
      filename <- paste0("Decil_", g, "_", group, "_", i, ".tif")
      writeRaster(ratio, filename, overwrite = TRUE)
      ratio_rasters_gender_age[[i]] <- ratio
    }
    
    # Define gender labels for the map title
    gender_title <- ifelse(g == "masculino", "Men", "Women")
    # Plot the raster results for this gender and age group with updated gender label
    plot_title <- paste("Accessibility Ratio -", gender_title, "Age", group)
    plot_ratio_rasters(ratio_rasters_gender_age, title = plot_title)
  }
}

library(terra)
library(dplyr)
library(moments)
library(eph)       # para get_microdata() si es necesario
library(DT)        # para datatables interactivas
## Warning: package 'DT' was built under R version 4.3.2
# umbral es el límite de accesibilidad para el raster de ratio
umbral <- 0.30

# Funcion que calcula el porcentaje de área accesible por decil
# para un subconjunto de microdatos (usa el raster global "val")
calcula_accesibilidad <- function(eph_subset) {
  medians <- compute_decile_medians(eph_subset, income_col = "P47T")
  accesibilidad_deciles <- numeric(10)
  for(i in 1:10) {
    ratio_raster <- val * 0.9289 * 35 / medians[[i]]
    # Área accesible (ha) donde el ratio < umbral:
    area_accessed <- as.numeric(global(ratio_raster < umbral, fun = "sum", na.rm = TRUE))[1] * prod(res(ratio_raster)) / 10000
    # Área total (ha) del área de estudio a partir del raster 'val':
    area_total <- as.numeric(global(ifel(!is.na(val), 1, 0), fun = "sum", na.rm = TRUE))[1] * prod(res(val)) / 10000
    accesibilidad_deciles[i] <- (area_accessed / area_total) * 100
  }
  return(accesibilidad_deciles)
}
# -----------------------------
# 1. Por Grupo Etario (con comparacion por género)
# -----------------------------
age_brackets <- list("18-23" = c(18, 23),
                     "24-29" = c(24, 29),
                     "30-35" = c(30, 35))
final_results <- list()

for(group in names(age_brackets)) {
  age_range <- age_brackets[[group]]
  accesibilidad <- list()  # para almacenar los porcentajes por género
  
  for(g in c("masculino", "femenino")) {
    eph_subset <- subset(eph, genero == g & CH06 >= age_range[1] & CH06 <= age_range[2])
    if(nrow(eph_subset) < 10) {
      cat("Not enough data for", g, "in age group", group, "\n")
      next
    }
    accesibilidad[[g]] <- calcula_accesibilidad(eph_subset)
  }
  
  if(all(c("masculino", "femenino") %in% names(accesibilidad))) {
    # Construir la tabla para el grupo etario actual
    tabla <- cbind(Masculino = accesibilidad[["masculino"]],
                   Femenino  = accesibilidad[["femenino"]])
    tabla <- as.data.frame(tabla)
    tabla$Decile <- 1:10
    tabla$Age_Group <- group  # indicador de grupo etario
    
    # Calcular porcentajes acumulados
    tabla$Cumulative_Masculino <- cumsum(tabla$Masculino)
    tabla$Cumulative_Femenino <- cumsum(tabla$Femenino)
    
    # Agregar diferencia por decil
    tabla$Diferencia <- tabla$Masculino - tabla$Femenino
    
    # Calcular estadísticas resumen
    media_diff <- mean(tabla$Diferencia, na.rm = TRUE)
    sd_diff <- sd(tabla$Diferencia, na.rm = TRUE)
    skewness_diff <- skewness(tabla$Diferencia, na.rm = TRUE)
    
    # Mostrar tabla interactiva con título
    caption_text <- paste0("Accesibilidad por Decil para el Grupo Etario: ", group, 
                           " (Masculino vs Femenino)")
    DT::datatable(tabla, caption = htmltools::tags$caption(
      style = 'text-align: center; color: black; font-size: 150%;',
      paste(caption_text)
    ))
    
    cat("-------------------------------------------------\n")
    cat("Grupo Etario:", group, "\n")
    print(tabla)
    cat("Media de la diferencia:", round(media_diff, 2), "\n")
    cat("Desviacion estándar:", round(sd_diff, 2), "\n")
    cat("Skewness:", round(skewness_diff, 2), "\n")
    cat("-------------------------------------------------\n\n")
    
    final_results[[group]] <- list(
      Tabla = tabla,
      Media_Diferencia = round(media_diff, 2),
      SD_Diferencia = round(sd_diff, 2),
      Skewness_Diferencia = round(skewness_diff, 2)
    )
  }
}
## -------------------------------------------------
## Grupo Etario: 18-23 
##    Masculino     Femenino Decile Age_Group Cumulative_Masculino
## 1   0.000000 0.000000e+00      1     18-23             0.000000
## 2   0.000000 0.000000e+00      2     18-23             0.000000
## 3   1.253310 0.000000e+00      3     18-23             1.253310
## 4   2.053089 0.000000e+00      4     18-23             3.306399
## 5   2.053089 4.707348e-04      5     18-23             5.359487
## 6   2.121901 5.473790e-01      6     18-23             7.481389
## 7   2.122201 2.053089e+00      7     18-23             9.603590
## 8  13.123916 2.122201e+00      8     18-23            22.727506
## 9  29.913701 3.210497e+00      9     18-23            52.641208
## 10 96.746238 3.562590e+01     10     18-23           149.387446
##    Cumulative_Femenino  Diferencia
## 1         0.000000e+00  0.00000000
## 2         0.000000e+00  0.00000000
## 3         0.000000e+00  1.25331012
## 4         0.000000e+00  2.05308862
## 5         4.707348e-04  2.05261788
## 6         5.478498e-01  1.57452246
## 7         2.600938e+00  0.06911243
## 8         4.723139e+00 11.00171519
## 9         7.933637e+00 26.70320425
## 10        4.355953e+01 61.12034037
## Media de la diferencia: 10.58 
## Desviacion estándar: 19.61 
## Skewness: 1.97 
## -------------------------------------------------
## 
## -------------------------------------------------
## Grupo Etario: 24-29 
##     Masculino   Femenino Decile Age_Group Cumulative_Masculino
## 1   0.5449826  0.0000000      1     24-29            0.5449826
## 2   1.9398127  0.8752245      2     24-29            2.4847953
## 3   2.0530886  2.0530886      3     24-29            4.5378839
## 4   2.1219015  2.1222011      4     24-29            6.6597854
## 5  11.8838723  2.4927978      5     24-29           18.5436577
## 6  27.3663413  2.5226252      6     24-29           45.9099989
## 7  46.5380876  7.0294406      7     24-29           92.4480865
## 8  87.8646269 37.3090742      8     24-29          180.3127134
## 9  93.7977689 57.4507911      9     24-29          274.1104823
## 10 97.9925727 80.5706761     10     24-29          372.1030550
##    Cumulative_Femenino    Diferencia
## 1            0.0000000  0.5449825657
## 2            0.8752245  1.0645882440
## 3            2.9283131  0.0000000000
## 4            5.0505141 -0.0002995585
## 5            7.5433119  9.3910745250
## 6           10.0659371 24.8437160322
## 7           17.0953777 39.5086469711
## 8           54.4044520 50.5555527026
## 9          111.8552430 36.3469777967
## 10         192.4259192 17.4218965308
## Media de la diferencia: 17.97 
## Desviacion estándar: 18.91 
## Skewness: 0.5 
## -------------------------------------------------
## 
## -------------------------------------------------
## Grupo Etario: 30-35 
##    Masculino  Femenino Decile Age_Group Cumulative_Masculino
## 1   0.000000  0.000000      1     30-35             0.000000
## 2   2.121901  2.053089      2     30-35             2.121901
## 3   2.291024  2.122201      3     30-35             4.412925
## 4   2.513211  2.512098      4     30-35             6.926136
## 5  11.883872  3.210497      5     30-35            18.810008
## 6  12.957704  7.038427      6     30-35            31.767712
## 7  24.856640 35.291804      7     30-35            56.624352
## 8  34.911878 40.998608      8     30-35            91.536230
## 9  62.452306 47.815491      9     30-35           153.988536
## 10 99.427715 71.328611     10     30-35           253.416251
##    Cumulative_Femenino    Diferencia
## 1             0.000000   0.000000000
## 2             2.053089   0.068812875
## 3             4.175290   0.168822632
## 4             6.687388   0.001112646
## 5             9.897885   8.673375066
## 6            16.936312   5.919276677
## 7            52.228116 -10.435164406
## 8            93.226725  -6.086729899
## 9           141.042216  14.636815231
## 10          212.370826  28.099104234
## Media de la diferencia: 4.1 
## Desviacion estándar: 11.01 
## Skewness: 0.94 
## -------------------------------------------------
# -----------------------------
# 2. Mujeres Solas (Edad 18-35)
# -----------------------------
eph_female <- subset(eph, genero == "femenino" & CH06 >= 18 & CH06 <= 35)
if(nrow(eph_female) < 10) {
  cat("Not enough data for mujeres in 18-35\n")
} else {
  women_access <- calcula_accesibilidad(eph_female)
  women_table <- data.frame(Decile = 1:10,
                            Accessibility_Women = round(women_access, 2),
                            Age_Group = "18-35")
  # Agregar columna de acumulados
  women_table$Cumulative_Accessibility_Women <- cumsum(women_table$Accessibility_Women)
  caption_text <- "Accesibilidad por Decil para Mujeres (Edad 18-35)"
  cat("----- Accesibilidad para Mujeres (18-35) -----\n")
  DT::datatable(women_table, caption = htmltools::tags$caption(
    style = 'text-align: center; color: black; font-size: 150%;',
    paste(caption_text)
  ))
  print(women_table)
}
## ----- Accesibilidad para Mujeres (18-35) -----
##    Decile Accessibility_Women Age_Group Cumulative_Accessibility_Women
## 1       1                0.00     18-35                           0.00
## 2       2                0.00     18-35                           0.00
## 3       3                0.88     18-35                           0.88
## 4       4                2.12     18-35                           3.00
## 5       5                2.12     18-35                           5.12
## 6       6                2.51     18-35                           7.63
## 7       7                6.99     18-35                          14.62
## 8       8               32.47     18-35                          47.09
## 9       9               47.82     18-35                          94.91
## 10     10               71.33     18-35                         166.24
# -----------------------------
# 3. Varones Solos (Edad 18-35)
# -----------------------------
eph_male <- subset(eph, genero == "masculino" & CH06 >= 18 & CH06 <= 35)
if(nrow(eph_male) < 10) {
  cat("Not enough data for varones in 18-35\n")
} else {
  men_access <- calcula_accesibilidad(eph_male)
  men_table <- data.frame(Decile = 1:10,
                          Accessibility_Men = round(men_access, 2),
                          Age_Group = "18-35")
  # Agregar columna de acumulados
  men_table$Cumulative_Accessibility_Men <- cumsum(men_table$Accessibility_Men)
  caption_text <- "Accesibilidad por Decil para Varones (Edad 18-35)"
  cat("----- Accesibilidad para Varones (18-35) -----\n")
  DT::datatable(men_table, caption = htmltools::tags$caption(
    style = 'text-align: center; color: black; font-size: 150%;',
    paste(caption_text)
  ))
  print(men_table)
}
## ----- Accesibilidad para Varones (18-35) -----
##    Decile Accessibility_Men Age_Group Cumulative_Accessibility_Men
## 1       1              0.00     18-35                         0.00
## 2       2              1.26     18-35                         1.26
## 3       3              2.05     18-35                         3.31
## 4       4              2.12     18-35                         5.43
## 5       5              2.51     18-35                         7.94
## 6       6             11.88     18-35                        19.82
## 7       7             19.58     18-35                        39.40
## 8       8             31.19     18-35                        70.59
## 9       9             80.21     18-35                       150.80
## 10     10             97.89     18-35                       248.69
# -----------------------------
# 4. Total (Ambos géneros, Edad 18-35)
# -----------------------------
eph_total <- subset(eph, CH06 >= 18 & CH06 <= 35)
if(nrow(eph_total) < 10) {
  cat("Not enough data for total (18-35)\n")
} else {
  total_access <- calcula_accesibilidad(eph_total)
  total_table <- data.frame(Decile = 1:10,
                            Accessibility_Total = round(total_access, 2),
                            Age_Group = "18-35")
  # Agregar columna de acumulados
  total_table$Cumulative_Accessibility_Total <- cumsum(total_table$Accessibility_Total)
  caption_text <- "Accesibilidad por Decil para Total (Ambos géneros, Edad 18-35)"
  cat("----- Accesibilidad para Total (18-35) -----\n")
  DT::datatable(total_table, caption = htmltools::tags$caption(
    style = 'text-align: center; color: black; font-size: 150%;',
    paste(caption_text)
  ))
  print(total_table)
}
## ----- Accesibilidad para Total (18-35) -----
##    Decile Accessibility_Total Age_Group Cumulative_Accessibility_Total
## 1       1                0.00     18-35                           0.00
## 2       2                0.55     18-35                           0.55
## 3       3                2.05     18-35                           2.60
## 4       4                2.12     18-35                           4.72
## 5       5                2.12     18-35                           6.84
## 6       6                2.56     18-35                           9.40
## 7       7               11.88     18-35                          21.28
## 8       8               28.81     18-35                          50.09
## 9       9               56.07     18-35                         106.16
## 10     10               94.94     18-35                         201.10
# -----------------------------
# 5. Tabla Comparativa: Varones vs Mujeres (Edad 18-35)
# -----------------------------
if(nrow(eph_male) >= 10 & nrow(eph_female) >= 10) {
  combined_table <- merge(men_table, women_table, by = c("Decile", "Age_Group"))
  # Las columnas resultantes son Accessibility_Men y Accessibility_Women
  combined_table$Cumulative_Accessibility_Men <- cumsum(combined_table$Accessibility_Men)
  combined_table$Cumulative_Accessibility_Women <- cumsum(combined_table$Accessibility_Women)
  combined_table$Difference <- combined_table$Accessibility_Men - combined_table$Accessibility_Women
  
  caption_text <- "Comparativa de Accesibilidad por Decil: Varones vs Mujeres (Edad 18-35)"
  cat("----- Tabla Comparativa: Varones vs Mujeres (Edad 18-35) -----\n")
  DT::datatable(combined_table, caption = htmltools::tags$caption(
    style = 'text-align: center; color: black; font-size: 150%;',
    paste(caption_text)
  ))
  print(combined_table)
} else {
  cat("Not enough data to create the combined table for 18-35.\n")
}
## ----- Tabla Comparativa: Varones vs Mujeres (Edad 18-35) -----
##    Decile Age_Group Accessibility_Men Cumulative_Accessibility_Men
## 1       1     18-35              0.00                         0.00
## 2      10     18-35             97.89                        97.89
## 3       2     18-35              1.26                        99.15
## 4       3     18-35              2.05                       101.20
## 5       4     18-35              2.12                       103.32
## 6       5     18-35              2.51                       105.83
## 7       6     18-35             11.88                       117.71
## 8       7     18-35             19.58                       137.29
## 9       8     18-35             31.19                       168.48
## 10      9     18-35             80.21                       248.69
##    Accessibility_Women Cumulative_Accessibility_Women Difference
## 1                 0.00                           0.00       0.00
## 2                71.33                          71.33      26.56
## 3                 0.00                          71.33       1.26
## 4                 0.88                          72.21       1.17
## 5                 2.12                          74.33       0.00
## 6                 2.12                          76.45       0.39
## 7                 2.51                          78.96       9.37
## 8                 6.99                          85.95      12.59
## 9                32.47                         118.42      -1.28
## 10               47.82                         166.24      32.39
# final_results contiene las tablas y estadísticas resumidas por grupo etario.
final_results
## $`18-23`
## $`18-23`$Tabla
##    Masculino     Femenino Decile Age_Group Cumulative_Masculino
## 1   0.000000 0.000000e+00      1     18-23             0.000000
## 2   0.000000 0.000000e+00      2     18-23             0.000000
## 3   1.253310 0.000000e+00      3     18-23             1.253310
## 4   2.053089 0.000000e+00      4     18-23             3.306399
## 5   2.053089 4.707348e-04      5     18-23             5.359487
## 6   2.121901 5.473790e-01      6     18-23             7.481389
## 7   2.122201 2.053089e+00      7     18-23             9.603590
## 8  13.123916 2.122201e+00      8     18-23            22.727506
## 9  29.913701 3.210497e+00      9     18-23            52.641208
## 10 96.746238 3.562590e+01     10     18-23           149.387446
##    Cumulative_Femenino  Diferencia
## 1         0.000000e+00  0.00000000
## 2         0.000000e+00  0.00000000
## 3         0.000000e+00  1.25331012
## 4         0.000000e+00  2.05308862
## 5         4.707348e-04  2.05261788
## 6         5.478498e-01  1.57452246
## 7         2.600938e+00  0.06911243
## 8         4.723139e+00 11.00171519
## 9         7.933637e+00 26.70320425
## 10        4.355953e+01 61.12034037
## 
## $`18-23`$Media_Diferencia
## [1] 10.58
## 
## $`18-23`$SD_Diferencia
## [1] 19.61
## 
## $`18-23`$Skewness_Diferencia
## [1] 1.97
## 
## 
## $`24-29`
## $`24-29`$Tabla
##     Masculino   Femenino Decile Age_Group Cumulative_Masculino
## 1   0.5449826  0.0000000      1     24-29            0.5449826
## 2   1.9398127  0.8752245      2     24-29            2.4847953
## 3   2.0530886  2.0530886      3     24-29            4.5378839
## 4   2.1219015  2.1222011      4     24-29            6.6597854
## 5  11.8838723  2.4927978      5     24-29           18.5436577
## 6  27.3663413  2.5226252      6     24-29           45.9099989
## 7  46.5380876  7.0294406      7     24-29           92.4480865
## 8  87.8646269 37.3090742      8     24-29          180.3127134
## 9  93.7977689 57.4507911      9     24-29          274.1104823
## 10 97.9925727 80.5706761     10     24-29          372.1030550
##    Cumulative_Femenino    Diferencia
## 1            0.0000000  0.5449825657
## 2            0.8752245  1.0645882440
## 3            2.9283131  0.0000000000
## 4            5.0505141 -0.0002995585
## 5            7.5433119  9.3910745250
## 6           10.0659371 24.8437160322
## 7           17.0953777 39.5086469711
## 8           54.4044520 50.5555527026
## 9          111.8552430 36.3469777967
## 10         192.4259192 17.4218965308
## 
## $`24-29`$Media_Diferencia
## [1] 17.97
## 
## $`24-29`$SD_Diferencia
## [1] 18.91
## 
## $`24-29`$Skewness_Diferencia
## [1] 0.5
## 
## 
## $`30-35`
## $`30-35`$Tabla
##    Masculino  Femenino Decile Age_Group Cumulative_Masculino
## 1   0.000000  0.000000      1     30-35             0.000000
## 2   2.121901  2.053089      2     30-35             2.121901
## 3   2.291024  2.122201      3     30-35             4.412925
## 4   2.513211  2.512098      4     30-35             6.926136
## 5  11.883872  3.210497      5     30-35            18.810008
## 6  12.957704  7.038427      6     30-35            31.767712
## 7  24.856640 35.291804      7     30-35            56.624352
## 8  34.911878 40.998608      8     30-35            91.536230
## 9  62.452306 47.815491      9     30-35           153.988536
## 10 99.427715 71.328611     10     30-35           253.416251
##    Cumulative_Femenino    Diferencia
## 1             0.000000   0.000000000
## 2             2.053089   0.068812875
## 3             4.175290   0.168822632
## 4             6.687388   0.001112646
## 5             9.897885   8.673375066
## 6            16.936312   5.919276677
## 7            52.228116 -10.435164406
## 8            93.226725  -6.086729899
## 9           141.042216  14.636815231
## 10          212.370826  28.099104234
## 
## $`30-35`$Media_Diferencia
## [1] 4.1
## 
## $`30-35`$SD_Diferencia
## [1] 11.01
## 
## $`30-35`$Skewness_Diferencia
## [1] 0.94
# -----------------------------
# Nuevo Chunk: Consolidacion de Tablas de Accesibilidad
# -----------------------------

# Crear un data frame base con los números de deciles
consolidated <- data.frame(Decile = 1:10)

# Agregar columnas para cada grupo etario (si existen en 'final_results')
age_groups <- c("18-23", "24-29", "30-35")
for(age in age_groups) {
  if(!is.null(final_results[[age]])) {
    tabla_age <- final_results[[age]]$Tabla
    tabla_age <- tabla_age[order(tabla_age$Decile), ]  # Asegurar el orden por decil
    consolidated[[paste0(age, "_Male")]]   <- tabla_age$Masculino
    consolidated[[paste0(age, "_Female")]] <- tabla_age$Femenino
  }
}

# Agregar datos para el total de jovenes (Edad 18-35)
# Se asume que 'men_table', 'women_table' y 'total_table' ya existen a partir de análisis previos.
men_table <- men_table[order(men_table$Decile), ]
women_table <- women_table[order(women_table$Decile), ]
total_table <- total_table[order(total_table$Decile), ]

consolidated[["18-35_Male"]]   <- men_table$Accessibility_Men
consolidated[["18-35_Female"]] <- women_table$Accessibility_Women
consolidated[["18-35_Total"]]  <- total_table$Accessibility_Total

# Crear la tabla acumulada (cumulative) a partir de la consolidada
cumulative <- consolidated
for(col in names(cumulative)[-1]) {
  cumulative[[col]] <- cumsum(cumulative[[col]])
}

# Mostrar las tablas consolidadas mediante DT
library(DT)

# Primera tabla: Porcentajes por decil
DT::datatable(consolidated, caption = htmltools::tags$caption(
  style = 'text-align: center; color: black; font-size: 150%;',
  "Accesibilidad por Decil - Consolidado"
))
# Segunda tabla: Porcentajes acumulados por decil
DT::datatable(cumulative, caption = htmltools::tags$caption(
  style = 'text-align: center; color: black; font-size: 150%;',
  "Accesibilidad Acumulada por Decil - Consolidado"
))
# -----------------------------
# Nuevo Chunk: Gráficos Individuales por Grupo (Plotly, renderizados por separado) con línea de igualdad
# -----------------------------
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
# Obtenemos la lista de grupos (columnas, excepto la columna 'Decile') de la tabla acumulada normalizada
group_list <- names(accumulated_norm)[-1]

# Generar e imprimir un gráfico interactivo por cada grupo
for(g in group_list) {
  # Creamos el data frame para cada grupo a partir del vector de accesibilidad acumulada,
  # considerando que la poblacion se distribuye en deciles (0.1, 0.2, …, 1.0)
  data_sub <- data.frame(
    Population = seq(0.1, 1, by = 0.1),
    Accessibility = accumulated_norm[[g]]
  )
  
  p <- plot_ly(data = data_sub, 
               x = ~Population, 
               y = ~Accessibility, 
               type = 'scatter', 
               mode = 'lines+markers', 
               line = list(width = 2),
               marker = list(size = 8),
               name = g) %>%
    # Agregamos la traza de la línea de igualdad (45°)
    add_trace(x = c(0, 1), y = c(0, 100),
              type = 'scatter', 
              mode = 'lines',
              line = list(dash = 'dash', color = 'gray', width = 2),
              name = "Igualdad Perfecta") %>%
    layout(title = paste("Curva de Lorenz -", g),
           xaxis = list(title = "Fraccion Acumulada de Deciles"),
           yaxis = list(title = "Accesibilidad Acumulada (%)"))
  
  # Renderizamos el gráfico; cada print() genera una caja de render independiente
  print(p)
}
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
# -----------------------------
# Nuevo Chunk: Gráfico Interactivo Consolidado con Curva de Igualdad para Comparar
# -----------------------------
library(plotly)

# Se asume que 'lorenz_df' ya contiene la informacion de las curvas de Lorenz consolidadas.
# En caso de que no se haya definido, se debe construir previamente a partir de 'accumulated_norm':
# lorenz_df contiene columnas: Group, Population (fraccion acumulada) y Accessibility (acumulada, normalizada).

# Crear gráfico interactivo con las curvas consolidadas
p_consolidated <- plot_ly()

# Agregar una traza para cada grupo (consolidado)
group_list <- unique(lorenz_df$Group)
for(g in group_list) {
  data_sub <- subset(lorenz_df, Group == g)
  p_consolidated <- p_consolidated %>%
    add_trace(data = data_sub, 
              x = ~Population, 
              y = ~Accessibility, 
              type = 'scatter', 
              mode = 'lines+markers',
              name = g,
              line = list(width = 2),
              marker = list(size = 8))
}

# Agregar la curva de igualdad (línea de Lorenz de igualdad perfecta)
# Se asume que Population varía de 0 a 1 y la accesibilidad perfecta es 100 al final.
p_consolidated <- p_consolidated %>%
  add_trace(x = c(0, 1), y = c(0, 100),
            type = 'scatter', 
            mode = 'lines',
            name = "Igualdad perfecta",
            line = list(dash = 'dash', color = 'gray', width = 2),
            showlegend = TRUE)

# Configurar el layout del gráfico
p_consolidated <- p_consolidated %>%
  layout(title = "Curva de Lorenz de Accesibilidad Consolidadas ",
         xaxis = list(title = "Fraccion Acumulada de Deciles (Poblacion)"),
         yaxis = list(title = "Accesibilidad Acumulada (%)"),
         legend = list(x = 0.1, y = 0.9))

# Mostrar el gráfico interactivo
p_consolidated
# Cálculo del Coeficiente de Gini para cada grupo
# Se usa la tabla 'consolidated' (valores no acumulados) ya que representan la "densidad" (o peso) de accesibilidad por decil.
gini_values_manual <- sapply(names(consolidated)[-1], function(col) {
  gini_manual(consolidated[[col]])
})
# Convert the manually calculated Gini coefficients (gini_values_manual) to a data frame,
# then sort them in descending order (most unequal on top) and display the result as a datatable.
gini_ordered <- sort(gini_values_manual, decreasing = TRUE)
gini_ordered_df <- data.frame(
  Group = names(gini_ordered),
  Gini  = as.numeric(gini_ordered)
)

# Display the ordered Gini coefficients as an interactive datatable
DT::datatable(gini_ordered_df, 
              caption = htmltools::tags$caption(
                style = 'text-align: center; color: black; font-size: 150%;',
                "Gini Coefficients Ordered by Inequality (Most Unequal on Top)"
              ))