# Load required libraries
library(eph)
## Warning: package 'eph' was built under R version 4.3.3
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(terra)
## Warning: package 'terra' was built under R version 4.3.3
## terra 1.7.78
library(ineq)
library(DT)
## Warning: package 'DT' was built under R version 4.3.2
library(ggplot2)
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(moments) # For skewness calculations if needed
# ---------------------------
# Functions for Reproducible Analysis
# ---------------------------
# Load individual microdata: filter and add gender
load_eph_individual <- function(year, trimester) {
get_microdata(year = year, trimester = trimester, type = "individual") %>%
filter(CH12 < 9) %>%
mutate(genero = case_when(
CH04 == 1 ~ "masculino",
CH04 == 2 ~ "femenino"
))
}
# Load household microdata and compute household fields
load_eph_hogar <- function(year, trimester) {
get_microdata(year = year, trimester = trimester, type = "hogar") %>%
mutate(
hogar = case_when(
IX_TOT == 1 ~ "unipersonal",
IX_TOT == 2 & IX_MEN10 == 1 ~ "Monoparental",
IX_TOT == 3 & IX_MEN10 == 1 ~ "Pareja_hijx",
IX_TOT == 2 & IX_MEN10 == 0 ~ "Pareja_adultos_amigxs",
IX_TOT == 2 & IX_MEN10 == 2 ~ "Pareja_2hijxs",
TRUE ~ "Multipersonal"
),
Tenencia = case_when(
II7 == 1 ~ "Propietario vivienda y terreno",
II7 == 2 ~ "Propietario de la vivienda solamente",
II7 == 3 ~ "Inquilino / arrendatario de la vivienda",
II7 == 4 ~ "Ocupante por pago de impuestos / expensas",
II7 == 5 ~ "Ocupante en relación de dependencia",
II7 == 6 ~ "Ocupante gratuito (con permiso)",
II7 == 7 ~ "Ocupante de hecho (sin permiso)",
II7 == 8 ~ "Está en sucesión"
),
personas_cuarto = IX_TOT / IV2,
hacinamiento = case_when(
personas_cuarto <= 1 ~ "Sin_Hacinamiento",
personas_cuarto > 3 ~ "Hacinamiento",
TRUE ~ "Hacinamiento_Medio"
)
)
}
# Compute decile medians for a given income variable (default: P47T)
compute_decile_medians <- function(data, income_col = "P47T", n_deciles = 10) {
data %>%
mutate(decile = ntile(.data[[income_col]], n_deciles)) %>%
group_by(decile) %>%
summarise(median_income = median(.data[[income_col]], na.rm = TRUE),
.groups = "drop") %>%
arrange(decile) %>%
pull(median_income)
}
# Load a raster and set its CRS
load_raster <- function(file_path, crs_code = "EPSG:4326") {
r <- rast(file_path)
crs(r) <- crs_code
r
}
# Create ratio rasters for each decile:
# Multiply the original raster value by 0.9289 and then by the multiplier,
# then divide by the decile median income.
create_ratio_rasters <- function(raster_obj, medians, multiplier = 50) {
val <- values(raster_obj)[, 1] * 0.9289 * multiplier
lapply(seq_along(medians), function(i) {
r_new <- raster_obj
values(r_new) <- val / medians[i]
r_new
})
}
# Plot a list of rasters using a common color scale and break
plot_ratio_rasters <- function(raster_list, break_val = 0.35,
palette = c("blue", "white", "red")) {
max_val <- max(sapply(raster_list, function(r) max(values(r), na.rm = TRUE)))
breaks <- c(0, break_val, max_val)
par(mfrow = c(2, 5))
for (i in seq_along(raster_list)) {
plot(raster_list[[i]], col = palette, breaks = breaks,
main = paste("Decile", i, "Ratio"))
}
}
# Calculate area (using pixel area) where the ratio is <= threshold for each decile
calculate_area <- function(raster_list, threshold = 0.35) {
pixel_area <- prod(res(raster_list[[1]]))
sapply(raster_list, function(r) {
sum(values(r) <= threshold, na.rm = TRUE) * pixel_area
})
}
# Compute the Gini coefficient for a given area vector
calculate_gini <- function(area_vector) {
Gini(area_vector)
}
# Analyze a subgroup (e.g., by Age and Gender) and return decile medians, ratio rasters, area vector, and Gini coefficient
analyze_group <- function(data, raster_obj, income_col = "P47T",
threshold = 0.35, multiplier = 50) {
medians <- compute_decile_medians(data, income_col = income_col, n_deciles = 10)
ratio_rasters <- create_ratio_rasters(raster_obj, medians, multiplier = multiplier)
areas <- calculate_area(ratio_rasters, threshold = threshold)
gini_val <- calculate_gini(areas)
list(medians = medians, ratio_rasters = ratio_rasters, areas = areas, gini = gini_val)
}
# Helper function: Clean and plot ratio rasters (remove CRS warnings)
plot_clean_ratio_rasters <- function(ratio_rasters, title, break_val = 0.35,
palette = c("blue", "white", "red")) {
clean_rasters <- lapply(ratio_rasters, function(r) {
r_copy <- r
crs(r_copy) <- NA
r_copy
})
par(mfrow = c(2, 5), oma = c(0, 0, 2, 0))
plot_ratio_rasters(clean_rasters, break_val = break_val, palette = palette)
mtext(title, outer = TRUE, cex = 1.2)
}
# ---------------------------
# Main Analysis Script Setup
# ---------------------------
year <- 2024
trimester <- 3
raster_path <- "C:/Users/sixto/OneDrive/Documentos/Papers/accesibilidad_alquileres/vut.tif"
# Load microdata
eph <- load_eph_individual(year, trimester)
eph_hogar <- load_eph_hogar(year, trimester)
# (Optional) Household data setup for CABA
eph_CABA_hogar <- eph_hogar %>% filter(AGLOMERADO == 32)
eph_CABA_hogar_4pax <- eph_CABA_hogar %>% filter(IX_TOT == 4, ITF > 0)
# Load the raster and set its CRS
vut <- load_raster(raster_path, crs_code = "EPSG:4326")
# ---------------------------
# Analysis for Multiple Age Brackets
# ---------------------------
# Define age brackets with lower and upper bounds
age_brackets <- list(
"18-24" = c(18, 24),
"25-34" = c(25, 34),
"35-44" = c(35, 44),
"45-54" = c(45, 54),
"55-64" = c(55, 99),
"65+" = c(65, 99),
"18+" = c(18, 99)
)
# Create an empty list to store results for each age bracket
results_by_age <- list()
# Loop over each age bracket
for (bracket in names(age_brackets)) {
bounds <- age_brackets[[bracket]]
lower <- bounds[1]
upper <- bounds[2]
# Filter individuals in CABA with positive income within the age range
data_age <- eph %>%
filter(CH06 >= lower, CH06 <= upper, P47T > 0, AGLOMERADO == 32)
# Separate by gender
data_m <- data_age %>% filter(genero == "masculino")
data_f <- data_age %>% filter(genero == "femenino")
# Run analysis for each gender.
# (Using the same multiplier as in your original 18-30 analysis.
# You may adjust the multiplier if needed.)
res_m <- analyze_group(data_m, vut, income_col = "P47T", threshold = 0.35,
multiplier = 38 * 0.9289)
res_f <- analyze_group(data_f, vut, income_col = "P47T", threshold = 0.35,
multiplier = 38 * 0.9289)
# Store the results for the current age bracket
results_by_age[[bracket]] <- list(
male = res_m,
female = res_f,
data = data_age
)
# Print Gini coefficients for the current age bracket
cat("Gini Coefficient for", bracket, "Masculino:", round(res_m$gini, 3), "\n")
cat("Gini Coefficient for", bracket, "Femenino:", round(res_f$gini, 3), "\n\n")
# ---------------------------
# Interactive Datatable: Income Decile Summary for this Age Bracket
# ---------------------------
decile_income <- data_age %>%
mutate(decile = ntile(P47T, 10)) %>%
group_by(decile) %>%
summarise(
min_income = min(P47T, na.rm = TRUE),
median_income = median(P47T, na.rm = TRUE),
max_income = max(P47T, na.rm = TRUE),
.groups = 'drop'
) %>%
arrange(decile)
print(datatable(decile_income,
caption = paste('Income Deciles (', bracket, '): Min, Median, and Max Income', sep = ''),
options = list(pageLength = 10)))
# ---------------------------
# Interactive Datatable: Decile Comparison for Occupied Area by Gender
# ---------------------------
decile_table <- data.frame(
Decile = 1:10,
Male_Median = res_m$medians,
Female_Median = res_f$medians,
Male_Area = res_m$areas,
Female_Area = res_f$areas
)
print(datatable(decile_table,
caption = paste('Decile Summary: Median Income and Occupied Area for ', bracket, ' Age Group by Gender', sep = ''),
options = list(pageLength = 10)))
# ---------------------------
# Interactive Plot: Percentage Difference in Area (Male vs Female)
# ---------------------------
decile_table <- decile_table %>%
mutate(Perc_Male_Female = (Male_Area / Female_Area - 1) * 100)
fig <- plot_ly(
data = decile_table,
x = ~Decile,
y = ~Perc_Male_Female,
type = 'bar',
text = ~paste('Decile:', Decile, '<br>% Difference:', round(Perc_Male_Female, 2), '%'),
hoverinfo = 'text',
marker = list(
color = 'rgb(158,202,225)',
line = list(color = 'rgb(8,48,107)', width = 1.5)
)
) %>% layout(
title = paste('Percentage Difference in Area Occupied by Males vs. Females Across Deciles (', bracket, ')', sep = ''),
xaxis = list(title = 'Decile'),
yaxis = list(title = '% Difference (Male vs Female)'),
bargap = 0.2
)
print(fig)
# ---------------------------
# Map Areas: Ratio Rasters for Each Gender
# ---------------------------
plot_clean_ratio_rasters(res_m$ratio_rasters, paste("Map Areas (Ratio Rasters) for", bracket, "Men"))
plot_clean_ratio_rasters(res_f$ratio_rasters, paste("Map Areas (Ratio Rasters) for", bracket, "Women"))
# ---------------------------
# Additional Plots: Lorenz Curves for Occupied Area
# ---------------------------
lc_m <- Lc(as.numeric(res_m$areas))
plot(lc_m, main = paste("Lorenz Curve for", bracket, "Masculino"),
xlab = "Cumulative share of deciles", ylab = "Cumulative share of Area (<= 0.35)",
col = "blue", lwd = 2)
abline(0, 1, col = "red", lty = 2)
lc_f <- Lc(as.numeric(res_f$areas))
plot(lc_f, main = paste("Lorenz Curve for", bracket, "Femenino"),
xlab = "Cumulative share of deciles", ylab = "Cumulative share of Area (<= 0.35)",
col = "blue", lwd = 2)
abline(0, 1, col = "red", lty = 2)
}
## Gini Coefficient for 18-24 Masculino: 0.733
## Gini Coefficient for 18-24 Femenino: 0.719
## Warning: Ignoring 2 observations



## Gini Coefficient for 25-34 Masculino: 0.553
## Gini Coefficient for 25-34 Femenino: 0.551



## Gini Coefficient for 35-44 Masculino: 0.508
## Gini Coefficient for 35-44 Femenino: 0.585



## Gini Coefficient for 45-54 Masculino: 0.498
## Gini Coefficient for 45-54 Femenino: 0.581


## Gini Coefficient for 55-64 Masculino: 0.633
## Gini Coefficient for 55-64 Femenino: 0.754
## Warning: Ignoring 1 observations



## Gini Coefficient for 65+ Masculino: 0.695
## Gini Coefficient for 65+ Femenino: 0.779
## Warning: Ignoring 1 observations



## Gini Coefficient for 18+ Masculino: 0.591
## Gini Coefficient for 18+ Femenino: 0.671
## Warning: Ignoring 1 observations



