# Load required libraries
library(terra)
## Warning: package 'terra' was built under R version 4.3.3
## terra 1.7.78
library(sf)
## Linking to GEOS 3.11.2, GDAL 3.6.2, PROJ 9.2.0; sf_use_s2() is TRUE
library(eph)
## Warning: package 'eph' was built under R version 4.3.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(gglorenz)
## Warning: package 'gglorenz' was built under R version 4.3.3
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.3.3
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:terra':
##
## rotate
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:terra':
##
## intersect, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::extract() masks terra::extract()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
# Set working directory to your project folder
setwd("C:/Users/sixto/OneDrive/Documentos/Papers/accesibilidad_alquileres")
# Load the rental value raster and display it
vut <- rast("C:/Users/sixto/OneDrive/Documentos/Papers/accesibilidad_alquileres/vut.tif")
plot(vut)

eph <- read_excel("C:/Users/sixto/OneDrive/Documentos/EPH_usu_3_Trim_2024_xls/usu_individual_T324.xlsx")
## Warning: Expecting logical in CY2065 / R2065C103: got 'paises del exterior'
## Warning: Expecting logical in CY3899 / R3899C103: got 'la plata'
## Warning: Expecting logical in CY5387 / R5387C103: got 'todo el pais'
## Warning: Expecting logical in CY5389 / R5389C103: got 'todo el pais'
## Warning: Expecting logical in CY6239 / R6239C103: got 'cordoba'
## Warning: Expecting logical in CY7666 / R7666C103: got 'mar del plata'
## Warning: Expecting logical in CY9021 / R9021C103: got 'Latinoamerica'
## Warning: Expecting logical in CY10817 / R10817C103: got 'NO ESPECIFICA'
## Warning: Expecting logical in CY11707 / R11707C103: got 'greneral belgrano'
## Warning: Expecting logical in CY14741 / R14741C103: got 'en rutas argentinas'
## Warning: Expecting logical in CY18117 / R18117C103: got 'EN EL EXTERIOR VIA
## ZOOM Y EN UN LOCAL Q ALQUILA'
## Warning: Expecting logical in CY24897 / R24897C103: got 'NO ESPECIFICA'
## Warning: Expecting logical in CY28911 / R28911C103: got 'familiar'
## Warning: Expecting logical in CY30339 / R30339C103: got 'VUELOS NACIONALES E
## INTERNACIONALES'
## Warning: Expecting logical in CY31029 / R31029C103: got 'LA PLATA'
## Warning: Expecting logical in CY31640 / R31640C103: got 'todo el pais'
## Warning: Expecting logical in CY33636 / R33636C103: got 'todo el pais'
## Warning: Expecting logical in CY33810 / R33810C103: got 'viajes nacionales e
## internacionales'
## Warning: Expecting logical in CY40240 / R40240C103: got 'Viajar por todo el
## pais'
## Warning: Expecting logical in CY43174 / R43174C103: got 'otro lado'
## Warning: Expecting logical in CY43247 / R43247C103: got 'NO ESPECIFICA'
## Warning: Expecting logical in CY44640 / R44640C103: got 'Todos lados'
eph <- eph %>%
dplyr::filter(P47T > 0, AGLOMERADO == 32, CH12 < 9) %>%
mutate(genero = recode(CH04, `1` = "masculino", `2` = "femenino"))
eph = subset(eph, CH06 >= 18 & CH06 <=35 & AGLOMERADO == 32)
# Load the census shapefile ("manzanas") and project to EPSG:3857
manzanas <- st_read("C:/Users/sixto/OneDrive/Documentos/Papers/accesibilidad_alquileres/manzanas_caba_censo_2010")
## Reading layer `manzanas_censo_2010' from data source
## `C:\Users\sixto\OneDrive\Documentos\Papers\accesibilidad_alquileres\manzanas_caba_censo_2010'
## using driver `ESRI Shapefile'
## Simple feature collection with 13726 features and 1 field (with 1 geometry empty)
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 93844.14 ymin: 91670.86 xmax: 111752.4 ymax: 111284.4
## Projected CRS: Gauss Krugger BA
manzanas <- st_transform(manzanas, 3857)
# Load the green spaces layer and project it
ev <- st_read("https://cdn.buenosaires.gob.ar/datosabiertos/datasets/secretaria-de-desarrollo-urbano/espacios-verdes/espacio_verde_publico.geojson")
## Reading layer `espacio_verde_publico' from data source
## `https://cdn.buenosaires.gob.ar/datosabiertos/datasets/secretaria-de-desarrollo-urbano/espacios-verdes/espacio_verde_publico.geojson'
## using driver `GeoJSON'
## Simple feature collection with 2144 features and 18 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -58.53175 ymin: -34.70557 xmax: -58.33983 ymax: -34.52657
## Geodetic CRS: WGS 84
ev <- st_transform(ev, 3857)
# Mask the rental raster using the 'manzanas' layer and crop it
val <- mask(vut, manzanas)
val <- trim(val)
val <- crop(vut, val, mask = TRUE)
plot(val)

# Exclude green spaces (using inverse masking)
val <- mask(val, ev, inverse = TRUE)
val <- trim(val)
plot(val)

# Optionally, save the processed raster
writeRaster(val, "vut_sin_calles_ni_ev.tif", overwrite = TRUE)
compute_decile_medians <- function(data, income_col = "P47T", n_deciles = 10) {
data %>%
filter(.data[[income_col]] > 0) %>%
mutate(decile = ntile(.data[[income_col]], n_deciles)) %>%
group_by(decile) %>%
summarise(median_income = median(.data[[income_col]], na.rm = TRUE), .groups = "drop") %>%
arrange(decile) %>%
pull(median_income)
}
plot_ratio_rasters <- function(raster_list, title, break_val = 0.3,
palette = c("darkgreen", "yellow", "red")) {
# Determine the maximum value among all rasters
max_val <- max(sapply(raster_list, function(r) max(values(r), na.rm = TRUE)))
# If the maximum value is <= 0.5, set top_break to 0.51; otherwise, keep max_val
top_break <- ifelse(max_val <= 0.5, 0.51, max_val)
# Fixed breaks: 0, 0.3, 0.5, and the maximum (top_break)
breaks <- c(0, 0.3, 0.5, top_break)
# Layout for 10 decile plots (modify the layout if number of rasters changes)
par(mfrow = c(2, 5), oma = c(0, 0, 2, 0))
for (i in seq_along(raster_list)) {
plot(raster_list[[i]], col = palette, breaks = breaks,
main = paste("Decile", i, "Ratio"))
}
mtext(title, outer = TRUE, cex = 1.2)
}
# Using all individuals from eph for the total analysis
medians_total <- compute_decile_medians(eph, income_col = "P47T")
# Create and store ratio rasters for each decile (1 to 10)
ratio_rasters_total <- list()
for(i in 1:10) {
# Calculate ratio: rental value factor divided by median income per decile
ratio <- val * 0.9289 * 35 / medians_total[[i]]
names(ratio) <- paste0("Decile_", i)
# Save the raster if desired
writeRaster(ratio, paste0("Decil_total_", i, ".tif"), overwrite = TRUE)
ratio_rasters_total[[i]] <- ratio
}
# Plot the total analysis ratio rasters
plot_ratio_rasters(ratio_rasters_total, title = "Accessibility Ratio - Total")

# Separate data by gender
eph_male <- subset(eph, genero == "masculino")
eph_female <- subset(eph, genero == "femenino")
# --- Male Analysis ---
medians_male <- compute_decile_medians(eph_male, income_col = "P47T")
ratio_rasters_male <- list()
for(i in 1:10) {
ratio <- val * 0.9289 * 35 / medians_male[[i]]
names(ratio) <- paste0("Decile_", i)
writeRaster(ratio, paste0("Decil_male_", i, ".tif"), overwrite = TRUE)
ratio_rasters_male[[i]] <- ratio
}
# --- Female Analysis ---
medians_female <- compute_decile_medians(eph_female, income_col = "P47T")
ratio_rasters_female <- list()
for(i in 1:10) {
ratio <- val * 0.9289 * 35 / medians_female[[i]]
names(ratio) <- paste0("Decile_", i)
writeRaster(ratio, paste0("Decil_female_", i, ".tif"), overwrite = TRUE)
ratio_rasters_female[[i]] <- ratio
}
# Plot side-by-side comparisons
par(mfrow = c(2,1), oma = c(0, 0, 2, 0))
plot_ratio_rasters(ratio_rasters_male, title = "Accessibility Ratio - Males")

plot_ratio_rasters(ratio_rasters_female, title = "Accessibility Ratio - Females")

# Define age brackets
age_brackets <- list("18-23" = c(18,23),
"24-29" = c(24,29),
"30-35" = c(30,35))
# Loop through each age bracket for analysis
for(group in names(age_brackets)) {
age_range <- age_brackets[[group]]
# Subset microdata to the current age bracket
eph_age <- subset(eph, CH06 >= age_range[1] & CH06 <= age_range[2])
# Calculate median incomes by decile
medians_age <- compute_decile_medians(eph_age, income_col = "P47T")
ratio_rasters_age <- list()
for(i in 1:10) {
ratio <- val * 0.9289 * 35 / medians_age[[i]]
names(ratio) <- paste0("Decile_", i)
# Save the raster, include age group in the filename
writeRaster(ratio, paste0("Decil_age_", group, "_", i, ".tif"), overwrite = TRUE)
ratio_rasters_age[[i]] <- ratio
}
# Plot the ratio rasters for the current age bracket
plot_ratio_rasters(ratio_rasters_age, title = paste("Accessibility Ratio - Age Group", group))
}



# Further breakdown by both gender and age
genders <- c("masculino", "femenino")
for(g in genders) {
# Subset data by gender
eph_gender <- subset(eph, genero == g)
for(group in names(age_brackets)) {
age_range <- age_brackets[[group]]
# Further subset the gender-specific data by the age bracket
eph_gender_age <- subset(eph_gender, CH06 >= age_range[1] & CH06 <= age_range[2])
# Skip analysis if there are few observations in the subgroup
if(nrow(eph_gender_age) < 10) {
cat("Not enough data for", g, "in age group", group, "\n")
next
}
# Calculate median incomes by decile
medians_gender_age <- compute_decile_medians(eph_gender_age, income_col = "P47T")
ratio_rasters_gender_age <- list()
for(i in 1:10) {
ratio <- val * 0.9289 * 35 / medians_gender_age[[i]]
names(ratio) <- paste0("Decile_", i)
# Save each raster: gender and age group in filename
filename <- paste0("Decil_", g, "_", group, "_", i, ".tif")
writeRaster(ratio, filename, overwrite = TRUE)
ratio_rasters_gender_age[[i]] <- ratio
}
# Plot the raster results for this gender and age group
plot_title <- paste("Accessibility Ratio -", g, "Age", group)
plot_ratio_rasters(ratio_rasters_gender_age, title = plot_title)
}
}






# Compute median income per decile for varones
varones <- subset(eph, CH04 == 1)
medians <- compute_decile_medians(varones)
medians # Display medians
## [1] 200000 430000 500000 600000 800000 1000000 1100000 1237500 1575000
## [10] 2304250
# Create rasters for each decile for varones
for(i in 1:10) {
a = val * 0.9289 * 35 / medians[[i]]
names(a) <- paste0("Decile ", i)
writeRaster(a, paste0("Decil_varones_", i, ".tif"), overwrite = TRUE)
}
# Stack the 10 rasters and plot them
varones_files <- paste0("Decil_varones_", 1:10, ".tif")
varones_stack <- rast(varones_files)
plot(varones_stack,
col = data.frame(from = c(0, 0.3, 0.5, 0.7),
to = c(0.3, 0.5, 0.7, Inf),
color = c("green", "yellow", "orange", "red")))

# Compute accessible area below the threshold (umbral = 0.30)
umbral = 0.30
area_m2 <- global(varones_stack < umbral, fun = "sum", na.rm = TRUE) * prod(res(varones_stack))
area_hectares <- area_m2 / 10000
area_hectares
## sum
## Decile 1 0.00
## Decile 2 293.87
## Decile 3 479.76
## Decile 4 495.84
## Decile 5 587.17
## Decile 6 2776.99
## Decile 7 4574.59
## Decile 8 7287.58
## Decile 9 18744.35
## Decile 10 22873.73
# Compute percentage of area accessed by each decile
area_total <- ifel(not.na(val), 1, 0)
area_total <- global(area_total, fun = "sum", na.rm = TRUE) * prod(res(varones_stack)) / 10000
accesibilidad_varones <- (((global(varones_stack < umbral, fun = "sum", na.rm = TRUE) * prod(res(varones_stack)) / 10000) / as.numeric(area_total)) * 100) %>% round(2)
accesibilidad_varones
## sum
## Decile 1 0.00
## Decile 2 1.26
## Decile 3 2.05
## Decile 4 2.12
## Decile 5 2.51
## Decile 6 11.88
## Decile 7 19.58
## Decile 8 31.19
## Decile 9 80.21
## Decile 10 97.89
# Save the accessibility results
save(accesibilidad_varones, file = "accesibilidad_varones.rda")
# Subset microdata for mujeres
mujeres <- subset(eph, CH04 == 2)
medians <- compute_decile_medians(mujeres)
medians
## [1] 150000 300000 400000 607500 710000 800000 900000 1250000 1350000
## [10] 1500000
# Create rasters for each decile for mujeres
for(i in 1:10) {
a = val * 0.9289 * 35 / medians[[i]]
names(a) <- paste0("Decile ", i)
writeRaster(a, paste0("Decil_mujeres_", i, ".tif"), overwrite = TRUE)
}
# Stack the 10 rasters and plot them
mujeres_files <- paste0("Decil_mujeres_", 1:10, ".tif")
mujeres_stack <- rast(mujeres_files)
plot(mujeres_stack,
col = data.frame(from = c(0, 0.3, 0.5, 0.7),
to = c(0.3, 0.5, 0.7, Inf),
color = c("green", "yellow", "orange", "red")))

# Compute accessible area for mujeres
area_m2_mujeres <- global(mujeres_stack < umbral, fun = "sum", na.rm = TRUE) * prod(res(mujeres_stack))
area_hectares_mujeres <- area_m2_mujeres / 10000
area_hectares_mujeres
## sum
## Decile 1 0.00
## Decile 2 0.11
## Decile 3 204.52
## Decile 4 495.84
## Decile 5 495.91
## Decile 6 587.17
## Decile 7 1633.16
## Decile 8 7588.07
## Decile 9 11173.39
## Decile 10 16667.87
# Compute percentage accessible area for mujeres
area_total <- ifel(not.na(val), 1, 0)
area_total <- global(area_total, fun = "sum", na.rm = TRUE) * prod(res(mujeres_stack)) / 10000
accesibilidad_mujeres <- (((global(mujeres_stack < umbral, fun = "sum", na.rm = TRUE) * prod(res(mujeres_stack)) / 10000) / as.numeric(area_total)) * 100) %>% round(2)
accesibilidad_mujeres
## sum
## Decile 1 0.00
## Decile 2 0.00
## Decile 3 0.88
## Decile 4 2.12
## Decile 5 2.12
## Decile 6 2.51
## Decile 7 6.99
## Decile 8 32.47
## Decile 9 47.82
## Decile 10 71.33
# Save the accessibility results for mujeres
save(accesibilidad_mujeres, file = "accesibilidad_mujeres.rda")
# Define genders to loop over
genders <- c("masculino", "femenino")
for(g in genders) {
# Subset data by gender
eph_gender <- subset(eph, genero == g)
for(group in names(age_brackets)) {
age_range <- age_brackets[[group]]
# Further subset the gender-specific data by the age bracket
eph_gender_age <- subset(eph_gender, CH06 >= age_range[1] & CH06 <= age_range[2])
# Skip analysis if there are few observations in the subgroup
if(nrow(eph_gender_age) < 10) {
cat("Not enough data for", g, "in age group", group, "\n")
next
}
# Calculate median incomes by decile
medians_gender_age <- compute_decile_medians(eph_gender_age, income_col = "P47T")
ratio_rasters_gender_age <- list()
for(i in 1:10) {
ratio <- val * 0.9289 * 35 / medians_gender_age[[i]]
names(ratio) <- paste0("Decile_", i)
# Save each raster: gender and age group in filename
filename <- paste0("Decil_", g, "_", group, "_", i, ".tif")
writeRaster(ratio, filename, overwrite = TRUE)
ratio_rasters_gender_age[[i]] <- ratio
}
# Define gender labels for the map title
gender_title <- ifelse(g == "masculino", "Men", "Women")
# Plot the raster results for this gender and age group with updated gender label
plot_title <- paste("Accessibility Ratio -", gender_title, "Age", group)
plot_ratio_rasters(ratio_rasters_gender_age, title = plot_title)
}
}






library(terra)
library(dplyr)
library(moments)
library(eph) # para get_microdata() si es necesario
library(DT) # para datatables interactivas
## Warning: package 'DT' was built under R version 4.3.2
# umbral es el límite de accesibilidad para el raster de ratio
umbral <- 0.30
# Funcion que calcula el porcentaje de área accesible por decil
# para un subconjunto de microdatos (usa el raster global "val")
calcula_accesibilidad <- function(eph_subset) {
medians <- compute_decile_medians(eph_subset, income_col = "P47T")
accesibilidad_deciles <- numeric(10)
for(i in 1:10) {
ratio_raster <- val * 0.9289 * 35 / medians[[i]]
# Área accesible (ha) donde el ratio < umbral:
area_accessed <- as.numeric(global(ratio_raster < umbral, fun = "sum", na.rm = TRUE))[1] * prod(res(ratio_raster)) / 10000
# Área total (ha) del área de estudio a partir del raster 'val':
area_total <- as.numeric(global(ifel(!is.na(val), 1, 0), fun = "sum", na.rm = TRUE))[1] * prod(res(val)) / 10000
accesibilidad_deciles[i] <- (area_accessed / area_total) * 100
}
return(accesibilidad_deciles)
}
# -----------------------------
# 1. Por Grupo Etario (con comparacion por género)
# -----------------------------
age_brackets <- list("18-23" = c(18, 23),
"24-29" = c(24, 29),
"30-35" = c(30, 35))
final_results <- list()
for(group in names(age_brackets)) {
age_range <- age_brackets[[group]]
accesibilidad <- list() # para almacenar los porcentajes por género
for(g in c("masculino", "femenino")) {
eph_subset <- subset(eph, genero == g & CH06 >= age_range[1] & CH06 <= age_range[2])
if(nrow(eph_subset) < 10) {
cat("Not enough data for", g, "in age group", group, "\n")
next
}
accesibilidad[[g]] <- calcula_accesibilidad(eph_subset)
}
if(all(c("masculino", "femenino") %in% names(accesibilidad))) {
# Construir la tabla para el grupo etario actual
tabla <- cbind(Masculino = accesibilidad[["masculino"]],
Femenino = accesibilidad[["femenino"]])
tabla <- as.data.frame(tabla)
tabla$Decile <- 1:10
tabla$Age_Group <- group # indicador de grupo etario
# Calcular porcentajes acumulados
tabla$Cumulative_Masculino <- cumsum(tabla$Masculino)
tabla$Cumulative_Femenino <- cumsum(tabla$Femenino)
# Agregar diferencia por decil
tabla$Diferencia <- tabla$Masculino - tabla$Femenino
# Calcular estadísticas resumen
media_diff <- mean(tabla$Diferencia, na.rm = TRUE)
sd_diff <- sd(tabla$Diferencia, na.rm = TRUE)
skewness_diff <- skewness(tabla$Diferencia, na.rm = TRUE)
# Mostrar tabla interactiva con título
caption_text <- paste0("Accesibilidad por Decil para el Grupo Etario: ", group,
" (Masculino vs Femenino)")
DT::datatable(tabla, caption = htmltools::tags$caption(
style = 'text-align: center; color: black; font-size: 150%;',
paste(caption_text)
))
cat("-------------------------------------------------\n")
cat("Grupo Etario:", group, "\n")
print(tabla)
cat("Media de la diferencia:", round(media_diff, 2), "\n")
cat("Desviacion estándar:", round(sd_diff, 2), "\n")
cat("Skewness:", round(skewness_diff, 2), "\n")
cat("-------------------------------------------------\n\n")
final_results[[group]] <- list(
Tabla = tabla,
Media_Diferencia = round(media_diff, 2),
SD_Diferencia = round(sd_diff, 2),
Skewness_Diferencia = round(skewness_diff, 2)
)
}
}
## -------------------------------------------------
## Grupo Etario: 18-23
## Masculino Femenino Decile Age_Group Cumulative_Masculino
## 1 0.000000 0.000000e+00 1 18-23 0.000000
## 2 0.000000 0.000000e+00 2 18-23 0.000000
## 3 1.253310 0.000000e+00 3 18-23 1.253310
## 4 2.053089 0.000000e+00 4 18-23 3.306399
## 5 2.053089 4.707348e-04 5 18-23 5.359487
## 6 2.121901 5.473790e-01 6 18-23 7.481389
## 7 2.122201 2.053089e+00 7 18-23 9.603590
## 8 13.123916 2.122201e+00 8 18-23 22.727506
## 9 29.913701 3.210497e+00 9 18-23 52.641208
## 10 96.746238 3.562590e+01 10 18-23 149.387446
## Cumulative_Femenino Diferencia
## 1 0.000000e+00 0.00000000
## 2 0.000000e+00 0.00000000
## 3 0.000000e+00 1.25331012
## 4 0.000000e+00 2.05308862
## 5 4.707348e-04 2.05261788
## 6 5.478498e-01 1.57452246
## 7 2.600938e+00 0.06911243
## 8 4.723139e+00 11.00171519
## 9 7.933637e+00 26.70320425
## 10 4.355953e+01 61.12034037
## Media de la diferencia: 10.58
## Desviacion estándar: 19.61
## Skewness: 1.97
## -------------------------------------------------
##
## -------------------------------------------------
## Grupo Etario: 24-29
## Masculino Femenino Decile Age_Group Cumulative_Masculino
## 1 0.5449826 0.0000000 1 24-29 0.5449826
## 2 1.9398127 0.8752245 2 24-29 2.4847953
## 3 2.0530886 2.0530886 3 24-29 4.5378839
## 4 2.1219015 2.1222011 4 24-29 6.6597854
## 5 11.8838723 2.4927978 5 24-29 18.5436577
## 6 27.3663413 2.5226252 6 24-29 45.9099989
## 7 46.5380876 7.0294406 7 24-29 92.4480865
## 8 87.8646269 37.3090742 8 24-29 180.3127134
## 9 93.7977689 57.4507911 9 24-29 274.1104823
## 10 97.9925727 80.5706761 10 24-29 372.1030550
## Cumulative_Femenino Diferencia
## 1 0.0000000 0.5449825657
## 2 0.8752245 1.0645882440
## 3 2.9283131 0.0000000000
## 4 5.0505141 -0.0002995585
## 5 7.5433119 9.3910745250
## 6 10.0659371 24.8437160322
## 7 17.0953777 39.5086469711
## 8 54.4044520 50.5555527026
## 9 111.8552430 36.3469777967
## 10 192.4259192 17.4218965308
## Media de la diferencia: 17.97
## Desviacion estándar: 18.91
## Skewness: 0.5
## -------------------------------------------------
##
## -------------------------------------------------
## Grupo Etario: 30-35
## Masculino Femenino Decile Age_Group Cumulative_Masculino
## 1 0.000000 0.000000 1 30-35 0.000000
## 2 2.121901 2.053089 2 30-35 2.121901
## 3 2.291024 2.122201 3 30-35 4.412925
## 4 2.513211 2.512098 4 30-35 6.926136
## 5 11.883872 3.210497 5 30-35 18.810008
## 6 12.957704 7.038427 6 30-35 31.767712
## 7 24.856640 35.291804 7 30-35 56.624352
## 8 34.911878 40.998608 8 30-35 91.536230
## 9 62.452306 47.815491 9 30-35 153.988536
## 10 99.427715 71.328611 10 30-35 253.416251
## Cumulative_Femenino Diferencia
## 1 0.000000 0.000000000
## 2 2.053089 0.068812875
## 3 4.175290 0.168822632
## 4 6.687388 0.001112646
## 5 9.897885 8.673375066
## 6 16.936312 5.919276677
## 7 52.228116 -10.435164406
## 8 93.226725 -6.086729899
## 9 141.042216 14.636815231
## 10 212.370826 28.099104234
## Media de la diferencia: 4.1
## Desviacion estándar: 11.01
## Skewness: 0.94
## -------------------------------------------------
# -----------------------------
# 2. Mujeres Solas (Edad 18-35)
# -----------------------------
eph_female <- subset(eph, genero == "femenino" & CH06 >= 18 & CH06 <= 35)
if(nrow(eph_female) < 10) {
cat("Not enough data for mujeres in 18-35\n")
} else {
women_access <- calcula_accesibilidad(eph_female)
women_table <- data.frame(Decile = 1:10,
Accessibility_Women = round(women_access, 2),
Age_Group = "18-35")
# Agregar columna de acumulados
women_table$Cumulative_Accessibility_Women <- cumsum(women_table$Accessibility_Women)
caption_text <- "Accesibilidad por Decil para Mujeres (Edad 18-35)"
cat("----- Accesibilidad para Mujeres (18-35) -----\n")
DT::datatable(women_table, caption = htmltools::tags$caption(
style = 'text-align: center; color: black; font-size: 150%;',
paste(caption_text)
))
print(women_table)
}
## ----- Accesibilidad para Mujeres (18-35) -----
## Decile Accessibility_Women Age_Group Cumulative_Accessibility_Women
## 1 1 0.00 18-35 0.00
## 2 2 0.00 18-35 0.00
## 3 3 0.88 18-35 0.88
## 4 4 2.12 18-35 3.00
## 5 5 2.12 18-35 5.12
## 6 6 2.51 18-35 7.63
## 7 7 6.99 18-35 14.62
## 8 8 32.47 18-35 47.09
## 9 9 47.82 18-35 94.91
## 10 10 71.33 18-35 166.24
# -----------------------------
# 3. Varones Solos (Edad 18-35)
# -----------------------------
eph_male <- subset(eph, genero == "masculino" & CH06 >= 18 & CH06 <= 35)
if(nrow(eph_male) < 10) {
cat("Not enough data for varones in 18-35\n")
} else {
men_access <- calcula_accesibilidad(eph_male)
men_table <- data.frame(Decile = 1:10,
Accessibility_Men = round(men_access, 2),
Age_Group = "18-35")
# Agregar columna de acumulados
men_table$Cumulative_Accessibility_Men <- cumsum(men_table$Accessibility_Men)
caption_text <- "Accesibilidad por Decil para Varones (Edad 18-35)"
cat("----- Accesibilidad para Varones (18-35) -----\n")
DT::datatable(men_table, caption = htmltools::tags$caption(
style = 'text-align: center; color: black; font-size: 150%;',
paste(caption_text)
))
print(men_table)
}
## ----- Accesibilidad para Varones (18-35) -----
## Decile Accessibility_Men Age_Group Cumulative_Accessibility_Men
## 1 1 0.00 18-35 0.00
## 2 2 1.26 18-35 1.26
## 3 3 2.05 18-35 3.31
## 4 4 2.12 18-35 5.43
## 5 5 2.51 18-35 7.94
## 6 6 11.88 18-35 19.82
## 7 7 19.58 18-35 39.40
## 8 8 31.19 18-35 70.59
## 9 9 80.21 18-35 150.80
## 10 10 97.89 18-35 248.69
# -----------------------------
# 4. Total (Ambos géneros, Edad 18-35)
# -----------------------------
eph_total <- subset(eph, CH06 >= 18 & CH06 <= 35)
if(nrow(eph_total) < 10) {
cat("Not enough data for total (18-35)\n")
} else {
total_access <- calcula_accesibilidad(eph_total)
total_table <- data.frame(Decile = 1:10,
Accessibility_Total = round(total_access, 2),
Age_Group = "18-35")
# Agregar columna de acumulados
total_table$Cumulative_Accessibility_Total <- cumsum(total_table$Accessibility_Total)
caption_text <- "Accesibilidad por Decil para Total (Ambos géneros, Edad 18-35)"
cat("----- Accesibilidad para Total (18-35) -----\n")
DT::datatable(total_table, caption = htmltools::tags$caption(
style = 'text-align: center; color: black; font-size: 150%;',
paste(caption_text)
))
print(total_table)
}
## ----- Accesibilidad para Total (18-35) -----
## Decile Accessibility_Total Age_Group Cumulative_Accessibility_Total
## 1 1 0.00 18-35 0.00
## 2 2 0.55 18-35 0.55
## 3 3 2.05 18-35 2.60
## 4 4 2.12 18-35 4.72
## 5 5 2.12 18-35 6.84
## 6 6 2.56 18-35 9.40
## 7 7 11.88 18-35 21.28
## 8 8 28.81 18-35 50.09
## 9 9 56.07 18-35 106.16
## 10 10 94.94 18-35 201.10
# -----------------------------
# 5. Tabla Comparativa: Varones vs Mujeres (Edad 18-35)
# -----------------------------
if(nrow(eph_male) >= 10 & nrow(eph_female) >= 10) {
combined_table <- merge(men_table, women_table, by = c("Decile", "Age_Group"))
# Las columnas resultantes son Accessibility_Men y Accessibility_Women
combined_table$Cumulative_Accessibility_Men <- cumsum(combined_table$Accessibility_Men)
combined_table$Cumulative_Accessibility_Women <- cumsum(combined_table$Accessibility_Women)
combined_table$Difference <- combined_table$Accessibility_Men - combined_table$Accessibility_Women
caption_text <- "Comparativa de Accesibilidad por Decil: Varones vs Mujeres (Edad 18-35)"
cat("----- Tabla Comparativa: Varones vs Mujeres (Edad 18-35) -----\n")
DT::datatable(combined_table, caption = htmltools::tags$caption(
style = 'text-align: center; color: black; font-size: 150%;',
paste(caption_text)
))
print(combined_table)
} else {
cat("Not enough data to create the combined table for 18-35.\n")
}
## ----- Tabla Comparativa: Varones vs Mujeres (Edad 18-35) -----
## Decile Age_Group Accessibility_Men Cumulative_Accessibility_Men
## 1 1 18-35 0.00 0.00
## 2 10 18-35 97.89 97.89
## 3 2 18-35 1.26 99.15
## 4 3 18-35 2.05 101.20
## 5 4 18-35 2.12 103.32
## 6 5 18-35 2.51 105.83
## 7 6 18-35 11.88 117.71
## 8 7 18-35 19.58 137.29
## 9 8 18-35 31.19 168.48
## 10 9 18-35 80.21 248.69
## Accessibility_Women Cumulative_Accessibility_Women Difference
## 1 0.00 0.00 0.00
## 2 71.33 71.33 26.56
## 3 0.00 71.33 1.26
## 4 0.88 72.21 1.17
## 5 2.12 74.33 0.00
## 6 2.12 76.45 0.39
## 7 2.51 78.96 9.37
## 8 6.99 85.95 12.59
## 9 32.47 118.42 -1.28
## 10 47.82 166.24 32.39
# final_results contiene las tablas y estadísticas resumidas por grupo etario.
final_results
## $`18-23`
## $`18-23`$Tabla
## Masculino Femenino Decile Age_Group Cumulative_Masculino
## 1 0.000000 0.000000e+00 1 18-23 0.000000
## 2 0.000000 0.000000e+00 2 18-23 0.000000
## 3 1.253310 0.000000e+00 3 18-23 1.253310
## 4 2.053089 0.000000e+00 4 18-23 3.306399
## 5 2.053089 4.707348e-04 5 18-23 5.359487
## 6 2.121901 5.473790e-01 6 18-23 7.481389
## 7 2.122201 2.053089e+00 7 18-23 9.603590
## 8 13.123916 2.122201e+00 8 18-23 22.727506
## 9 29.913701 3.210497e+00 9 18-23 52.641208
## 10 96.746238 3.562590e+01 10 18-23 149.387446
## Cumulative_Femenino Diferencia
## 1 0.000000e+00 0.00000000
## 2 0.000000e+00 0.00000000
## 3 0.000000e+00 1.25331012
## 4 0.000000e+00 2.05308862
## 5 4.707348e-04 2.05261788
## 6 5.478498e-01 1.57452246
## 7 2.600938e+00 0.06911243
## 8 4.723139e+00 11.00171519
## 9 7.933637e+00 26.70320425
## 10 4.355953e+01 61.12034037
##
## $`18-23`$Media_Diferencia
## [1] 10.58
##
## $`18-23`$SD_Diferencia
## [1] 19.61
##
## $`18-23`$Skewness_Diferencia
## [1] 1.97
##
##
## $`24-29`
## $`24-29`$Tabla
## Masculino Femenino Decile Age_Group Cumulative_Masculino
## 1 0.5449826 0.0000000 1 24-29 0.5449826
## 2 1.9398127 0.8752245 2 24-29 2.4847953
## 3 2.0530886 2.0530886 3 24-29 4.5378839
## 4 2.1219015 2.1222011 4 24-29 6.6597854
## 5 11.8838723 2.4927978 5 24-29 18.5436577
## 6 27.3663413 2.5226252 6 24-29 45.9099989
## 7 46.5380876 7.0294406 7 24-29 92.4480865
## 8 87.8646269 37.3090742 8 24-29 180.3127134
## 9 93.7977689 57.4507911 9 24-29 274.1104823
## 10 97.9925727 80.5706761 10 24-29 372.1030550
## Cumulative_Femenino Diferencia
## 1 0.0000000 0.5449825657
## 2 0.8752245 1.0645882440
## 3 2.9283131 0.0000000000
## 4 5.0505141 -0.0002995585
## 5 7.5433119 9.3910745250
## 6 10.0659371 24.8437160322
## 7 17.0953777 39.5086469711
## 8 54.4044520 50.5555527026
## 9 111.8552430 36.3469777967
## 10 192.4259192 17.4218965308
##
## $`24-29`$Media_Diferencia
## [1] 17.97
##
## $`24-29`$SD_Diferencia
## [1] 18.91
##
## $`24-29`$Skewness_Diferencia
## [1] 0.5
##
##
## $`30-35`
## $`30-35`$Tabla
## Masculino Femenino Decile Age_Group Cumulative_Masculino
## 1 0.000000 0.000000 1 30-35 0.000000
## 2 2.121901 2.053089 2 30-35 2.121901
## 3 2.291024 2.122201 3 30-35 4.412925
## 4 2.513211 2.512098 4 30-35 6.926136
## 5 11.883872 3.210497 5 30-35 18.810008
## 6 12.957704 7.038427 6 30-35 31.767712
## 7 24.856640 35.291804 7 30-35 56.624352
## 8 34.911878 40.998608 8 30-35 91.536230
## 9 62.452306 47.815491 9 30-35 153.988536
## 10 99.427715 71.328611 10 30-35 253.416251
## Cumulative_Femenino Diferencia
## 1 0.000000 0.000000000
## 2 2.053089 0.068812875
## 3 4.175290 0.168822632
## 4 6.687388 0.001112646
## 5 9.897885 8.673375066
## 6 16.936312 5.919276677
## 7 52.228116 -10.435164406
## 8 93.226725 -6.086729899
## 9 141.042216 14.636815231
## 10 212.370826 28.099104234
##
## $`30-35`$Media_Diferencia
## [1] 4.1
##
## $`30-35`$SD_Diferencia
## [1] 11.01
##
## $`30-35`$Skewness_Diferencia
## [1] 0.94
# -----------------------------
# Nuevo Chunk: Consolidacion de Tablas de Accesibilidad
# -----------------------------
# Crear un data frame base con los números de deciles
consolidated <- data.frame(Decile = 1:10)
# Agregar columnas para cada grupo etario (si existen en 'final_results')
age_groups <- c("18-23", "24-29", "30-35")
for(age in age_groups) {
if(!is.null(final_results[[age]])) {
tabla_age <- final_results[[age]]$Tabla
tabla_age <- tabla_age[order(tabla_age$Decile), ] # Asegurar el orden por decil
consolidated[[paste0(age, "_Male")]] <- tabla_age$Masculino
consolidated[[paste0(age, "_Female")]] <- tabla_age$Femenino
}
}
# Agregar datos para el total de jovenes (Edad 18-35)
# Se asume que 'men_table', 'women_table' y 'total_table' ya existen a partir de análisis previos.
men_table <- men_table[order(men_table$Decile), ]
women_table <- women_table[order(women_table$Decile), ]
total_table <- total_table[order(total_table$Decile), ]
consolidated[["18-35_Male"]] <- men_table$Accessibility_Men
consolidated[["18-35_Female"]] <- women_table$Accessibility_Women
consolidated[["18-35_Total"]] <- total_table$Accessibility_Total
# Crear la tabla acumulada (cumulative) a partir de la consolidada
cumulative <- consolidated
for(col in names(cumulative)[-1]) {
cumulative[[col]] <- cumsum(cumulative[[col]])
}
# Mostrar las tablas consolidadas mediante DT
library(DT)
# Primera tabla: Porcentajes por decil
DT::datatable(consolidated, caption = htmltools::tags$caption(
style = 'text-align: center; color: black; font-size: 150%;',
"Accesibilidad por Decil - Consolidado"
))
# Segunda tabla: Porcentajes acumulados por decil
DT::datatable(cumulative, caption = htmltools::tags$caption(
style = 'text-align: center; color: black; font-size: 150%;',
"Accesibilidad Acumulada por Decil - Consolidado"
))
# -----------------------------
# Nuevo Chunk: Gráficos Individuales por Grupo (Plotly, renderizados por separado) con línea de igualdad
# -----------------------------
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# Obtenemos la lista de grupos (columnas, excepto la columna 'Decile') de la tabla acumulada normalizada
group_list <- names(accumulated_norm)[-1]
# Generar e imprimir un gráfico interactivo por cada grupo
for(g in group_list) {
# Creamos el data frame para cada grupo a partir del vector de accesibilidad acumulada,
# considerando que la poblacion se distribuye en deciles (0.1, 0.2, …, 1.0)
data_sub <- data.frame(
Population = seq(0.1, 1, by = 0.1),
Accessibility = accumulated_norm[[g]]
)
p <- plot_ly(data = data_sub,
x = ~Population,
y = ~Accessibility,
type = 'scatter',
mode = 'lines+markers',
line = list(width = 2),
marker = list(size = 8),
name = g) %>%
# Agregamos la traza de la línea de igualdad (45°)
add_trace(x = c(0, 1), y = c(0, 100),
type = 'scatter',
mode = 'lines',
line = list(dash = 'dash', color = 'gray', width = 2),
name = "Igualdad Perfecta") %>%
layout(title = paste("Curva de Lorenz -", g),
xaxis = list(title = "Fraccion Acumulada de Deciles"),
yaxis = list(title = "Accesibilidad Acumulada (%)"))
# Renderizamos el gráfico; cada print() genera una caja de render independiente
print(p)
}
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...
# -----------------------------
# Nuevo Chunk: Gráfico Interactivo Consolidado con Curva de Igualdad para Comparar
# -----------------------------
library(plotly)
# Se asume que 'lorenz_df' ya contiene la informacion de las curvas de Lorenz consolidadas.
# En caso de que no se haya definido, se debe construir previamente a partir de 'accumulated_norm':
# lorenz_df contiene columnas: Group, Population (fraccion acumulada) y Accessibility (acumulada, normalizada).
# Crear gráfico interactivo con las curvas consolidadas
p_consolidated <- plot_ly()
# Agregar una traza para cada grupo (consolidado)
group_list <- unique(lorenz_df$Group)
for(g in group_list) {
data_sub <- subset(lorenz_df, Group == g)
p_consolidated <- p_consolidated %>%
add_trace(data = data_sub,
x = ~Population,
y = ~Accessibility,
type = 'scatter',
mode = 'lines+markers',
name = g,
line = list(width = 2),
marker = list(size = 8))
}
# Agregar la curva de igualdad (línea de Lorenz de igualdad perfecta)
# Se asume que Population varía de 0 a 1 y la accesibilidad perfecta es 100 al final.
p_consolidated <- p_consolidated %>%
add_trace(x = c(0, 1), y = c(0, 100),
type = 'scatter',
mode = 'lines',
name = "Igualdad perfecta",
line = list(dash = 'dash', color = 'gray', width = 2),
showlegend = TRUE)
# Configurar el layout del gráfico
p_consolidated <- p_consolidated %>%
layout(title = "Curva de Lorenz de Accesibilidad Consolidadas ",
xaxis = list(title = "Fraccion Acumulada de Deciles (Poblacion)"),
yaxis = list(title = "Accesibilidad Acumulada (%)"),
legend = list(x = 0.1, y = 0.9))
# Mostrar el gráfico interactivo
p_consolidated
# Cálculo del Coeficiente de Gini para cada grupo
# Se usa la tabla 'consolidated' (valores no acumulados) ya que representan la "densidad" (o peso) de accesibilidad por decil.
gini_values_manual <- sapply(names(consolidated)[-1], function(col) {
gini_manual(consolidated[[col]])
})
# Convert the manually calculated Gini coefficients (gini_values_manual) to a data frame,
# then sort them in descending order (most unequal on top) and display the result as a datatable.
gini_ordered <- sort(gini_values_manual, decreasing = TRUE)
gini_ordered_df <- data.frame(
Group = names(gini_ordered),
Gini = as.numeric(gini_ordered)
)
# Display the ordered Gini coefficients as an interactive datatable
DT::datatable(gini_ordered_df,
caption = htmltools::tags$caption(
style = 'text-align: center; color: black; font-size: 150%;',
"Gini Coefficients Ordered by Inequality (Most Unequal on Top)"
))