# Load required libraries
library(eph)
## Warning: package 'eph' was built under R version 4.3.3
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(terra)
## Warning: package 'terra' was built under R version 4.3.3
## terra 1.7.78
library(ineq)
library(DT)
## Warning: package 'DT' was built under R version 4.3.2
library(ggplot2)
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(moments)  # For skewness calculations if needed
# ---------------------------
# Functions for Reproducible Analysis
# ---------------------------

# Load individual microdata: filter and add gender
load_eph_individual <- function(year, trimester) {
  get_microdata(year = year, trimester = trimester, type = "individual") %>%
    filter(CH12 < 9) %>%
    mutate(genero = case_when(
      CH04 == 1 ~ "masculino",
      CH04 == 2 ~ "femenino"
    ))
}

# Load household microdata and compute household fields
load_eph_hogar <- function(year, trimester) {
  get_microdata(year = year, trimester = trimester, type = "hogar") %>%
    mutate(
      hogar = case_when(
        IX_TOT == 1 ~ "unipersonal",
        IX_TOT == 2 & IX_MEN10 == 1 ~ "Monoparental",
        IX_TOT == 3 & IX_MEN10 == 1 ~ "Pareja_hijx",
        IX_TOT == 2 & IX_MEN10 == 0 ~ "Pareja_adultos_amigxs",
        IX_TOT == 2 & IX_MEN10 == 2 ~ "Pareja_2hijxs",
        TRUE ~ "Multipersonal"
      ),
      Tenencia = case_when(
        II7 == 1 ~ "Propietario vivienda y terreno",
        II7 == 2 ~ "Propietario de la vivienda solamente",
        II7 == 3 ~ "Inquilino / arrendatario de la vivienda",
        II7 == 4 ~ "Ocupante por pago de impuestos / expensas",
        II7 == 5 ~ "Ocupante en relación de dependencia",
        II7 == 6 ~ "Ocupante gratuito (con permiso)",
        II7 == 7 ~ "Ocupante de hecho (sin permiso)",
        II7 == 8 ~ "Está en sucesión"
      ),
      personas_cuarto = IX_TOT / IV2,
      hacinamiento = case_when(
        personas_cuarto <= 1 ~ "Sin_Hacinamiento",
        personas_cuarto > 3 ~ "Hacinamiento",
        TRUE ~ "Hacinamiento_Medio"
      )
    )
}

# Compute decile medians for a given income variable (default: P47T)
compute_decile_medians <- function(data, income_col = "P47T", n_deciles = 10) {
  data %>%
    mutate(decile = ntile(.data[[income_col]], n_deciles)) %>%
    group_by(decile) %>%
    summarise(median_income = median(.data[[income_col]], na.rm = TRUE),
              .groups = "drop") %>%
    arrange(decile) %>%
    pull(median_income)
}

# Load a raster and set its CRS
load_raster <- function(file_path, crs_code = "EPSG:4326") {
  r <- rast(file_path)
  crs(r) <- crs_code
  r
}

# Create ratio rasters for each decile:
# Multiply the original raster value by 0.9289 and then by the multiplier,
# then divide by the decile median income.
create_ratio_rasters <- function(raster_obj, medians, multiplier = 50) {
  val <- values(raster_obj)[, 1] * 0.9289 * multiplier
  lapply(seq_along(medians), function(i) {
    r_new <- raster_obj
    values(r_new) <- val / medians[i]
    r_new
  })
}

# Plot a list of rasters using a common color scale and break
plot_ratio_rasters <- function(raster_list, break_val = 0.35,
                               palette = c("blue", "white", "red")) {
  max_val <- max(sapply(raster_list, function(r) max(values(r), na.rm = TRUE)))
  breaks <- c(0, break_val, max_val)
  par(mfrow = c(2, 5))
  for (i in seq_along(raster_list)) {
    plot(raster_list[[i]], col = palette, breaks = breaks,
         main = paste("Decile", i, "Ratio"))
  }
}

# Calculate area (using pixel area) where the ratio is <= threshold for each decile
calculate_area <- function(raster_list, threshold = 0.35) {
  pixel_area <- prod(res(raster_list[[1]]))
  sapply(raster_list, function(r) {
    sum(values(r) <= threshold, na.rm = TRUE) * pixel_area
  })
}

# Compute the Gini coefficient for a given area vector
calculate_gini <- function(area_vector) {
  Gini(area_vector)
}

# Analyze a subgroup (e.g., by Age and Gender) and return decile medians, ratio rasters, area vector, and Gini coefficient
analyze_group <- function(data, raster_obj, income_col = "P47T", 
                          threshold = 0.35, multiplier = 50) {
  medians <- compute_decile_medians(data, income_col = income_col, n_deciles = 10)
  ratio_rasters <- create_ratio_rasters(raster_obj, medians, multiplier = multiplier)
  areas <- calculate_area(ratio_rasters, threshold = threshold)
  gini_val <- calculate_gini(areas)
  list(medians = medians, ratio_rasters = ratio_rasters, areas = areas, gini = gini_val)
}

# Helper function: Clean and plot ratio rasters (remove CRS warnings)
plot_clean_ratio_rasters <- function(ratio_rasters, title, break_val = 0.35, 
                                       palette = c("blue", "white", "red")) {
  clean_rasters <- lapply(ratio_rasters, function(r) {
    r_copy <- r
    crs(r_copy) <- NA
    r_copy
  })
  
  par(mfrow = c(2, 5), oma = c(0, 0, 2, 0))
  plot_ratio_rasters(clean_rasters, break_val = break_val, palette = palette)
  mtext(title, outer = TRUE, cex = 1.2)
}
# ---------------------------
# Main Analysis Script Setup
# ---------------------------
year <- 2024
trimester <- 3
raster_path <- "C:/Users/sixto/OneDrive/Documentos/Papers/accesibilidad_alquileres/vut.tif"

# Load microdata
eph <- load_eph_individual(year, trimester)
eph_hogar <- load_eph_hogar(year, trimester)

# (Optional) Household data setup for CABA
eph_CABA_hogar <- eph_hogar %>% filter(AGLOMERADO == 32)
eph_CABA_hogar_4pax <- eph_CABA_hogar %>% filter(IX_TOT == 4, ITF > 0)

# Load the raster and set its CRS
vut <- load_raster(raster_path, crs_code = "EPSG:4326")
# ---------------------------
# Analysis for Multiple Age Brackets
# ---------------------------
# Define age brackets with lower and upper bounds
age_brackets <- list(
  "18-24" = c(18, 24),
  "25-34" = c(25, 34),
  "35-44" = c(35, 44),
  "45-54" = c(45, 54),
  "55-64" = c(55, 99),
  "65+" = c(65, 99),
  "18+" = c(18, 99)
)

# Create an empty list to store results for each age bracket
results_by_age <- list()

# Loop over each age bracket
for (bracket in names(age_brackets)) {
  bounds <- age_brackets[[bracket]]
  lower <- bounds[1]
  upper <- bounds[2]
  
  # Filter individuals in CABA with positive income within the age range
  data_age <- eph %>% 
    filter(CH06 >= lower, CH06 <= upper, P47T > 0, AGLOMERADO == 32)
  
  # Separate by gender
  data_m <- data_age %>% filter(genero == "masculino")
  data_f <- data_age %>% filter(genero == "femenino")
  
  # Run analysis for each gender.
  # (Using the same multiplier as in your original 18-30 analysis.
  # You may adjust the multiplier if needed.)
  res_m <- analyze_group(data_m, vut, income_col = "P47T", threshold = 0.35, 
                         multiplier = 32 * 0.9289)
  res_f <- analyze_group(data_f, vut, income_col = "P47T", threshold = 0.35, 
                         multiplier = 32 * 0.9289)
  
  # Store the results for the current age bracket
  results_by_age[[bracket]] <- list(
    male = res_m,
    female = res_f,
    data = data_age
  )
  
  # Print Gini coefficients for the current age bracket
  cat("Gini Coefficient for", bracket, "Masculino:", round(res_m$gini, 3), "\n")
  cat("Gini Coefficient for", bracket, "Femenino:", round(res_f$gini, 3), "\n\n")
  
  # ---------------------------
  # Interactive Datatable: Income Decile Summary for this Age Bracket
  # ---------------------------
  decile_income <- data_age %>%
    mutate(decile = ntile(P47T, 10)) %>%
    group_by(decile) %>%
    summarise(
      min_income = min(P47T, na.rm = TRUE),
      median_income = median(P47T, na.rm = TRUE),
      max_income = max(P47T, na.rm = TRUE),
      .groups = 'drop'
    ) %>%
    arrange(decile)
  
  print(datatable(decile_income, 
                  caption = paste('Income Deciles (', bracket, '): Min, Median, and Max Income', sep = ''),
                  options = list(pageLength = 10)))
  
  # ---------------------------
  # Interactive Datatable: Decile Comparison for Occupied Area by Gender
  # ---------------------------
  decile_table <- data.frame(
    Decile = 1:10,
    Male_Median = res_m$medians,
    Female_Median = res_f$medians,
    Male_Area = res_m$areas,
    Female_Area = res_f$areas
  )
  
  print(datatable(decile_table, 
                  caption = paste('Decile Summary: Median Income and Occupied Area for ', bracket, ' Age Group by Gender', sep = ''),
                  options = list(pageLength = 10)))
  
  # ---------------------------
  # Interactive Plot: Percentage Difference in Area (Male vs Female)
  # ---------------------------
  decile_table <- decile_table %>% 
    mutate(Perc_Male_Female = (Male_Area / Female_Area - 1) * 100)
  
  fig <- plot_ly(
    data = decile_table,
    x = ~Decile,
    y = ~Perc_Male_Female,
    type = 'bar',
    text = ~paste('Decile:', Decile, '<br>% Difference:', round(Perc_Male_Female, 2), '%'),
    hoverinfo = 'text',
    marker = list(
      color = 'rgb(158,202,225)',
      line = list(color = 'rgb(8,48,107)', width = 1.5)
    )
  ) %>% layout(
    title = paste('Percentage Difference in Area Occupied by Males vs. Females Across Deciles (', bracket, ')', sep = ''),
    xaxis = list(title = 'Decile'),
    yaxis = list(title = '% Difference (Male vs Female)'),
    bargap = 0.2
  )
  
  print(fig)
  
  # ---------------------------
  # Map Areas: Ratio Rasters for Each Gender
  # ---------------------------
  plot_clean_ratio_rasters(res_m$ratio_rasters, paste("Map Areas (Ratio Rasters) for", bracket, "Men"))
  plot_clean_ratio_rasters(res_f$ratio_rasters, paste("Map Areas (Ratio Rasters) for", bracket, "Women"))
  
  # ---------------------------
  # Additional Plots: Lorenz Curves for Occupied Area
  # ---------------------------
  lc_m <- Lc(as.numeric(res_m$areas))
  plot(lc_m, main = paste("Lorenz Curve for", bracket, "Masculino"), 
       xlab = "Cumulative share of deciles", ylab = "Cumulative share of Area (<= 0.35)",
       col = "blue", lwd = 2)
  abline(0, 1, col = "red", lty = 2)
  
  lc_f <- Lc(as.numeric(res_f$areas))
  plot(lc_f, main = paste("Lorenz Curve for", bracket, "Femenino"), 
       xlab = "Cumulative share of deciles", ylab = "Cumulative share of Area (<= 0.35)",
       col = "blue", lwd = 2)
  abline(0, 1, col = "red", lty = 2)
}
## Gini Coefficient for 18-24 Masculino: 0.691 
## Gini Coefficient for 18-24 Femenino: 0.716
## Warning: Ignoring 2 observations

## Gini Coefficient for 25-34 Masculino: 0.463 
## Gini Coefficient for 25-34 Femenino: 0.462

## Gini Coefficient for 35-44 Masculino: 0.428 
## Gini Coefficient for 35-44 Femenino: 0.497

## Gini Coefficient for 45-54 Masculino: 0.429 
## Gini Coefficient for 45-54 Femenino: 0.503

## Gini Coefficient for 55-64 Masculino: 0.567 
## Gini Coefficient for 55-64 Femenino: 0.699

## Gini Coefficient for 65+ Masculino: 0.632 
## Gini Coefficient for 65+ Femenino: 0.718

## Gini Coefficient for 18+ Masculino: 0.511 
## Gini Coefficient for 18+ Femenino: 0.6