library(sf)
## Linking to GEOS 3.11.2, GDAL 3.6.2, PROJ 9.2.0; sf_use_s2() is TRUE
library(ggplot2)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr   1.1.3     ✔ stringr 1.5.0
## ✔ forcats 1.0.0     ✔ tibble  3.2.1
## ✔ purrr   1.0.2     ✔ tidyr   1.3.0
## ✔ readr   2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(mapview)
## Warning: package 'mapview' was built under R version 4.3.3
library(eph) 
## Warning: package 'eph' was built under R version 4.3.3
library(DT)
## Warning: package 'DT' was built under R version 4.3.2
library(spdep)
## Warning: package 'spdep' was built under R version 4.3.3
## Loading required package: spData
## Warning: package 'spData' was built under R version 4.3.3
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
##       Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
##       if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
s2::s2_options(model = "closed")  # You can try "open" or "semi-open" as well if needed
## $model
## [1] 3
## 
## $snap
## list()
## attr(,"class")
## [1] "snap_identity"
## 
## $snap_radius
## [1] -1
## 
## $duplicate_edges
## [1] FALSE
## 
## $edge_type
## [1] 1
## 
## $validate
## [1] FALSE
## 
## $polyline_type
## [1] 1
## 
## $polyline_sibling_pairs
## [1] 2
## 
## $simplify_edge_chains
## [1] FALSE
## 
## $split_crossing_edges
## [1] FALSE
## 
## $idempotent
## [1] FALSE
## 
## $dimensions
## [1] 1 2 3
## 
## attr(,"class")
## [1] "s2_options"
# Read the shapefile
alquileres <- st_read("C:/Users/sixto/OneDrive/Documentos/Papers/accesibilidad_alquileres/202412/alquileres_CABA_2024-12.shp")
## Reading layer `alquileres_CABA_2024-12' from data source 
##   `C:\Users\sixto\OneDrive\Documentos\Papers\accesibilidad_alquileres\202412\alquileres_CABA_2024-12.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 3612 features and 8 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -58.53591 ymin: -34.75753 xmax: -58.36092 ymax: -34.49665
## Geodetic CRS:  WGS 84
# Print a summary of the data

Sumamos precio por metro cuadrado

alquileres = alquileres %>% mutate(pm2 = as.numeric(precio) / as.numeric(sp_cbrt)) %>% mutate(antig = as.numeric(antig))
## Warning: There was 1 warning in `stopifnot()`.
## ℹ In argument: `pm2 = as.numeric(precio)/as.numeric(sp_cbrt)`.
## Caused by warning:
## ! NAs introducidos por coerción
## Warning: There was 1 warning in `stopifnot()`.
## ℹ In argument: `antig = as.numeric(antig)`.
## Caused by warning:
## ! NAs introducidos por coerción
# Load barrios as context
barrios <- st_read("https://cdn.buenosaires.gob.ar/datosabiertos/datasets/ministerio-de-educacion/barrios/barrios.geojson")
## Reading layer `barrios' from data source 
##   `https://cdn.buenosaires.gob.ar/datosabiertos/datasets/ministerio-de-educacion/barrios/barrios.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 48 features and 6 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -58.53152 ymin: -34.70529 xmax: -58.33516 ymax: -34.52649
## Geodetic CRS:  WGS 84
alquileres = st_intersection(alquileres, barrios)
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
mapview(filter(alquileres, pm2 <20000), zcol = "pm2")
mapview(filter(alquileres, antig <80), zcol = "antig")
summary(alquileres$antig)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.00   10.00   30.00   28.97   45.00  100.00     812

Ahora vamos a calcularle las distancias

Ahora igual quiero ver como son los censos a ver que paso entre 2022 y 2010

shapefiles_dir <- "C:/Users/sixto/OneDrive/Documentos/GCBA/Radios censales 2022"
viviendas_2022<- st_read(file.path(shapefiles_dir, "Radios_CABA_4._Viviendas_2022.shp"))
## Reading layer `Radios_CABA_4._Viviendas_2022' from data source 
##   `C:\Users\sixto\OneDrive\Documentos\GCBA\Radios censales 2022\Radios_CABA_4._Viviendas_2022.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 3820 features and 51 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 93798.6 ymin: 91566.94 xmax: 111752.1 ymax: 111401.1
## Projected CRS: Argentina_GKBsAs
hogares_2022 <- st_read(file.path(shapefiles_dir, "Radios_CABA_5._Hogares_2022.shp"))
## Reading layer `Radios_CABA_5._Hogares_2022' from data source 
##   `C:\Users\sixto\OneDrive\Documentos\GCBA\Radios censales 2022\Radios_CABA_5._Hogares_2022.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 3820 features and 174 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 93798.6 ymin: 91566.94 xmax: 111752.1 ymax: 111401.1
## Projected CRS: Argentina_GKBsAs
poblacion_2022 <- st_read(file.path(shapefiles_dir, "Radios_CABA_6._Poblacion_2022.shp"))
## Reading layer `Radios_CABA_6._Poblacion_2022' from data source 
##   `C:\Users\sixto\OneDrive\Documentos\GCBA\Radios censales 2022\Radios_CABA_6._Poblacion_2022.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 3820 features and 379 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 93798.6 ymin: 91566.94 xmax: 111752.1 ymax: 111401.1
## Projected CRS: Argentina_GKBsAs
mean(hogares_2022$NBI_Z_SI)
## [1] 5.157259
sd(hogares_2022$NBI_Z_SI)
## [1] 6.630787
mapview(filter(hogares_2022,NBI_Z_SI >20 ), zcol = "NBI_Z_SI", lwd = 0)
censo_2010 = st_read("https://cdn.buenosaires.gob.ar/datosabiertos/datasets/direccion-general-de-estadisticas-y-censos/informacion-censal-por-radio/caba_radios_censales.geojson")
## Reading layer `informacion_censal_por_radio_wgs84' from data source 
##   `https://cdn.buenosaires.gob.ar/datosabiertos/datasets/direccion-general-de-estadisticas-y-censos/informacion-censal-por-radio/caba_radios_censales.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 3554 features and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -58.53152 ymin: -34.70529 xmax: -58.33514 ymax: -34.52755
## Geodetic CRS:  WGS 84
censo_2010 = censo_2010 %>% mutate(NBI_Z_SI = as.numeric(H_CON_NBI) /as.numeric(T_HOGAR) *100)
mapview(filter(hogares_2022,NBI_Z_SI >20 ), zcol = "NBI_Z_SI", lwd = 0)
mapview(filter(censo_2010,NBI_Z_SI >20 ), zcol = "NBI_Z_SI", lwd = 0)

Vamos a ser los Test de Moran de CABA

censo_2010 <- censo_2010 %>% filter(NBI_Z_SI >0)



# Create spatial weights matrix based on polygon contiguity
neighbors <- poly2nb(censo_2010)
weights <- nb2listw(neighbors, style = "W", zero.policy = TRUE)


# Calculate spatial lag for Moran's diagram
censo_2010$spatial_lag <- lag.listw(weights, censo_2010$NBI_Z_SI, zero.policy = TRUE)

# Create Moran scatterplot using ggplot2
ggplot(censo_2010, aes(x = NBI_Z_SI, y = spatial_lag)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "blue") +
  labs(
    title = "Moran Scatterplot",
    x = "NBI_Z_SI",
    y = "Spatial Lag of NBI_Z_SI"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# Calculate Moran's I
moran_test <- moran.test(censo_2010$NBI_Z_SI, weights, zero.policy = TRUE)
print(moran_test)
## 
##  Moran I test under randomisation
## 
## data:  censo_2010$NBI_Z_SI  
## weights: weights    
## 
## Moran I statistic standard deviate = 53.492, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.5413396639     -0.0003069368      0.0001025321
# Filter out rows where NBI_Z_SI is less than or equal to 0
hogares_2022 <- hogares_2022 %>% filter(NBI_Z_SI > 0)

# Check for non-finite values in NBI_Z_SI
if (any(!is.finite(hogares_2022$NBI_Z_SI))) {
  stop("Non-finite values found in NBI_Z_SI. Please clean the data.")
}

# Create spatial weights matrix based on polygon contiguity
neighbors <- poly2nb(hogares_2022)  # Contiguity-based neighbors
weights <- nb2listw(neighbors, style = "W", zero.policy = TRUE)  # Spatial weights

# Calculate spatial lag for Moran's diagram
hogares_2022$spatial_lag <- lag.listw(weights, hogares_2022$NBI_Z_SI, zero.policy = TRUE)

# Create Moran scatterplot using ggplot2
ggplot(hogares_2022, aes(x = NBI_Z_SI, y = spatial_lag)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "blue") +
  labs(
    title = "Moran Scatterplot",
    x = "NBI_Z_SI",
    y = "Spatial Lag of NBI_Z_SI"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# Calculate Moran's I
moran_test <- moran.test(hogares_2022$NBI_Z_SI, weights, zero.policy = TRUE)
print(moran_test)
## 
##  Moran I test under randomisation
## 
## data:  hogares_2022$NBI_Z_SI  
## weights: weights    
## 
## Moran I statistic standard deviate = 61.892, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.5647867903     -0.0002690342      0.0000833509

Vamos a comparar las poblaciones

library(raster)
## Loading required package: sp
## 
## Attaching package: 'raster'
## The following object is masked from 'package:dplyr':
## 
##     select
# Load the .tif file
VUT <- raster("C:/Users/sixto/OneDrive/Documentos/Papers/accesibilidad_alquileres/vut.tif")

# Check the loaded raster
print(VUT)
## class      : RasterLayer 
## dimensions : 2525, 2285, 5769625  (nrow, ncol, ncell)
## resolution : 10, 10  (x, y)
## extent     : -6516190, -6493340, -4124477, -4099227  (xmin, xmax, ymin, ymax)
## crs        : +proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +nadgrids=@null +wktext +no_defs 
## source     : vut.tif 
## names      : lyr1 
## values     : 2700, 34664.12  (min, max)
eph_data <- readxl::read_excel("C:/Users/sixto/OneDrive/Documentos/Papers/accesibilidad_alquileres/usu_individual_T224.xlsx", na = ".")
eph <- dplyr::filter(eph_data, CH12 < 9)
eph = eph  %>% mutate(genero = case_when(CH04  == 1 ~ 'masculino', CH04  == 2 ~ "femenino"))

eph_AMBA = eph %>% filter(AGLOMERADO == 32  | AGLOMERADO == 33 )

eph_CABA = eph  %>% filter(AGLOMERADO == 32)
EPH_CABA_18_24 =  eph %>% filter(CH06 %in% (18:24),P47T !=-9, AGLOMERADO == 32 )
EPH_CABA_25_29 =  eph %>% filter(CH06 %in% (25:29) ,P47T!=-9, AGLOMERADO == 32)
EPH_CABA_18_29 =  eph %>% filter(CH06 %in% (18:29) ,P47T!=-9, AGLOMERADO == 32)


EPH_AMBA_18_24 =  eph_AMBA %>% filter(CH06 %in% (18:24),P47T !=-9 )
EPH_AMBA_25_29 =  eph_AMBA %>% filter(CH06 %in% (25:29) ,P47T!=-9)
EPH_AMBA_18_29 =  eph_AMBA %>% filter(CH06 %in% (18:29) ,P47T!=-9)

print("EPH_CABA_18_24")
## [1] "EPH_CABA_18_24"
summary(EPH_CABA_18_24$P47T) 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0  158891  200000 3156000
print("EPH_CABA_25_29")
## [1] "EPH_CABA_25_29"
summary(EPH_CABA_25_29$P47T)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0  305000  500000  609829  837500 2500000
print("EPH_CABA_18_29")
## [1] "EPH_CABA_18_29"
summary(EPH_CABA_18_29$P47T)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0  145000  318313  500000 3156000
print("EPH_AMBA_18_24")
## [1] "EPH_AMBA_18_24"
summary(EPH_AMBA_18_24$P47T)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0  138284  200000 3156000
print("EPH_AMBA_25_29")
## [1] "EPH_AMBA_25_29"
summary(EPH_AMBA_25_29$P47T)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0   19500  300000  358866  520000 2500000
ggplot(EPH_CABA_18_24, aes(x=CH06, y=P47T, color=genero)) + 
    geom_point(size=5)  + labs(x = "Edad", y = "Ingresos ($)", color = "Genero", title = "", caption = "Fuente: EPH - 2° trimestre 2021")

ggplot(EPH_CABA_25_29, aes(x=CH06, y=P47T, color=genero)) + 
    geom_point(size=5) +
    theme_classic() + labs(x = "Edad", y = "Ingresos ($)", color = "Genero", title = "", caption = "Fuente: EPH - 2° trimestre 2021")  +
  ylim(0, 100000)
## Warning: Removed 59 rows containing missing values (`geom_point()`).

modelo_exp_multiple <- lm(P47T ~ CH06 + genero + CH12, data = eph_CABA)

summary(modelo_exp_multiple)
## 
## Call:
## lm(formula = P47T ~ CH06 + genero + CH12, data = eph_CABA)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -838918 -376067 -123737  170400 6702721 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -274637.2    55189.1  -4.976 7.30e-07 ***
## CH06               2784.0      789.6   3.526 0.000436 ***
## generomasculino  144191.6    34105.9   4.228 2.52e-05 ***
## CH12              90893.7     8935.2  10.173  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 631100 on 1378 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.1026, Adjusted R-squared:  0.1006 
## F-statistic:  52.5 on 3 and 1378 DF,  p-value: < 2.2e-16
p <- EPH_AMBA_25_29 %>%
  ggplot( aes(x=P47T, fill=genero)) +
    geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity') +
    scale_fill_manual(values=c("#69b3a2", "#404080")) +
    theme_ipsum() +
    labs(fill="")
p
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## not found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

EPH_CABA_18_24 =  eph %>% filter(CH06 %in% (18:24),P47T >0, AGLOMERADO == 32 )
EPH_CABA_25_29 =  eph %>% filter(CH06 %in% (25:29) ,P47T>0, AGLOMERADO == 32)
EPH_CABA_18_29 =  eph %>% filter(CH06 %in% (18:29) ,P47T>0, AGLOMERADO == 32)

EPH_AMBA_18_24 =  eph_AMBA %>% filter(CH06 %in% (18:24),P47T >0 )
EPH_AMBA_25_29 =  eph_AMBA %>% filter(CH06 %in% (25:29) ,P47T>0)
EPH_AMBA_18_29 =  eph_AMBA %>% filter(CH06 %in% (18:29) ,P47T>0)

summary(EPH_CABA_18_24$P47T)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   15000  142500  248000  376630  487500 3156000
summary(EPH_CABA_25_29$P47T)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   40000  400000  580000  699803  900000 2500000
summary(EPH_AMBA_18_24$P47T)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10000  100000  225000  299101  400000 3156000
summary(EPH_AMBA_25_29$P47T)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10000  210000  400000  475970  600000 2500000
summary(EPH_AMBA_18_29$P47T)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10000  150000  300000  383394  500000 3156000
summary(EPH_CABA_18_29$P47T)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   15000  200000  412000  548052  750000 3156000
# Load data.table library
library(DT)

# Summarize data and store results in a data.table
summary_table <- data.frame(
  Group = c("EPH_CABA_18_24", "EPH_CABA_25_29", "EPH_CABA_18_29", "EPH_AMBA_18_24", "EPH_AMBA_25_29"),
  Min = c(
    min(EPH_CABA_18_24$P47T, na.rm = TRUE),
    min(EPH_CABA_25_29$P47T, na.rm = TRUE),
    min(EPH_CABA_18_29$P47T, na.rm = TRUE),
    min(EPH_AMBA_18_24$P47T, na.rm = TRUE),
    min(EPH_AMBA_25_29$P47T, na.rm = TRUE)
  ),
  Median = c(
    median(EPH_CABA_18_24$P47T, na.rm = TRUE),
    median(EPH_CABA_25_29$P47T, na.rm = TRUE),
    median(EPH_CABA_18_29$P47T, na.rm = TRUE),
    median(EPH_AMBA_18_24$P47T, na.rm = TRUE),
    median(EPH_AMBA_25_29$P47T, na.rm = TRUE)
  ),
  Mean = c(
    mean(EPH_CABA_18_24$P47T, na.rm = TRUE),
    mean(EPH_CABA_25_29$P47T, na.rm = TRUE),
    mean(EPH_CABA_18_29$P47T, na.rm = TRUE),
    mean(EPH_AMBA_18_24$P47T, na.rm = TRUE),
    mean(EPH_AMBA_25_29$P47T, na.rm = TRUE)
  ),
  Max = c(
    max(EPH_CABA_18_24$P47T, na.rm = TRUE),
    max(EPH_CABA_25_29$P47T, na.rm = TRUE),
    max(EPH_CABA_18_29$P47T, na.rm = TRUE),
    max(EPH_AMBA_18_24$P47T, na.rm = TRUE),
    max(EPH_AMBA_25_29$P47T, na.rm = TRUE)
  )
)
datatable(summary_table)
# Round the values in summary_table
summary_table$Min <- round(summary_table$Min, 2)
summary_table$Median <- round(summary_table$Median, 2)
summary_table$Mean <- round(summary_table$Mean, 2)
summary_table$Max <- round(summary_table$Max, 2)

# Print the rounded summary table
print(summary_table)
##            Group   Min Median     Mean     Max
## 1 EPH_CABA_18_24 15000 248000 376629.6 3156000
## 2 EPH_CABA_25_29 40000 580000 699803.3 2500000
## 3 EPH_CABA_18_29 15000 412000 548052.2 3156000
## 4 EPH_AMBA_18_24 10000 225000 299100.6 3156000
## 5 EPH_AMBA_25_29 10000 400000 475969.7 2500000
# Print the data.table
datatable(summary_table)
eph_hogar <- readxl::read_excel("C:/Users/sixto/OneDrive/Documentos/Papers/accesibilidad_alquileres/usu_hogar_T224.xlsx", na = ".")
## Warning: Expecting logical in K1570 / R1570C11: got 'casilla'
## Warning: Expecting logical in K2231 / R2231C11: got 'ph'
## Warning: Expecting logical in K2404 / R2404C11: got 'CASILLA'
## Warning: Expecting logical in K2573 / R2573C11: got 'casilla'
## Warning: Expecting logical in K3524 / R3524C11: got 'casilla'
## Warning: Expecting logical in K3690 / R3690C11: got 'DUPLEX'
## Warning: Expecting logical in K3875 / R3875C11: got 'CASILLA'
## Warning: Expecting logical in K5359 / R5359C11: got 'CASILLA'
## Warning: Expecting logical in K6030 / R6030C11: got 'CASILLA'
## Warning: Expecting logical in K7368 / R7368C11: got 'casilla'
## Warning: Expecting logical in K7448 / R7448C11: got 'CASILLA'
## Warning: Expecting logical in K8608 / R8608C11: got 'casilla'
## Warning: Expecting logical in K8793 / R8793C11: got 'lavadero'
## Warning: Expecting logical in K9510 / R9510C11: got 'casilla'
## Warning: Expecting logical in K11289 / R11289C11: got 'CASILLA'
## Warning: Expecting logical in K12469 / R12469C11: got 'DUPLEX'
## Warning: Expecting logical in K12765 / R12765C11: got 'CASILLA'
## Warning: Expecting logical in K12982 / R12982C11: got 'CASILLA'
## Warning: Expecting logical in K13088 / R13088C11: got 'PREFABRICADAS'
## Warning: Expecting logical in K13089 / R13089C11: got 'CASILLA'
## Warning: Expecting logical in K15009 / R15009C11: got 'casilla'

Divido los hogares según su composición y calculo el hacinamiento

eph_hogar  =  eph_hogar  %>% mutate(hogar = case_when(IX_TOT  == 1 ~ 'unipersonal', IX_TOT  == 2 & IX_MEN10 == 1 ~ 'Monoparental', IX_TOT  == 3 & IX_MEN10 == 1 ~ 'Pareja_hijx', IX_TOT  == 2 & IX_MEN10 == 0 ~ 'Pareja_adultos_amigxs' , TRUE  ~ "Multipersonal" ,  IX_TOT  == 2 & IX_MEN10 == 2 ~ 'Pareja_2hijxs'))


eph_hogar  =  eph_hogar  %>% mutate(Tenencia = case_when(II7 == 01 ~ "Propietario vivienda y terreno",  II7 == 02 ~ "Propietario de la vivienda solamente",II7 == 03  ~ "Inquilino / arrendatario de la vivienda", II7 == 04  ~ "Ocupante por pago de
 impuestos / expensas",II7 == 05  ~  "Ocupante en relación de dependencia", II7 == 06 ~ "Ocupante gratuito (con permiso)", II7 == 07 ~ "Ocupante de hecho (sin permiso)", II7 == 08 ~ "Está en sucesión"))


eph_hogar  =  eph_hogar  %>% mutate(personas_cuarto = IX_TOT/IV2 )

eph_hogar  =  eph_hogar  %>% mutate(hacinamiento = case_when(personas_cuarto <= 1  ~ 'Sin_Hacinamiento',personas_cuarto >3  ~ 'Hacinamiento',  TRUE  ~ "Hacinamiento_Medio" ))

Hago un subset para quedarme con los hogares que tienen individuos de entre 18 y 29 años. Esto lo puedo hacer porque la base de datos de individuos y hogares está relacionada

eph_hogar_18_29= subset(eph_hogar, CODUSU %in% EPH_CABA_18_29$CODUSU)

Filtro por aquellos hogares que no tienen Multipersonas porque queremos los hogares independientes para analizarlos

eph_hogar_18_29_independientes = eph_hogar_18_29 %>% filter(hogar != "Multipersonal")

Como hay hogares que están compuestos por hogares de personas adultas de +35 y personas de -29 tenemos que filtrarlos para quedarnos con los que realmente nos interesan para analizar. Para eso selecciono todos los hogares que a priori no son multifamiliares y los cruzo con la base de datos de EPH de individuos, así me quedo solo con los individuos de hogares independientes de jóvenes.

eph_hogar_18_29_independientes_individuos= subset(eph, CODUSU %in% eph_hogar_18_29_independientes$CODUSU)

eph_hogar_18_29_no_independientes_individuos= eph_hogar_18_29_independientes_individuos %>% filter(CH06 > 35)

Lo que debemos hacer ahora es quedarnos con aquellos hogares que cumplen las condiciones necesarias, para hacer un análisis de hogares además de indiviudos. ¿Cómo hacemos eso? Tomamos la base de datos de hogares independientes y a ella, le sacamos aquellos hogares que no cumplen con la condición de independencia.

eph_hogar_18_29_independientes = subset (eph_hogar_18_29_independientes,!(CODUSU %in% eph_hogar_18_29_no_independientes_individuos$CODUSU))

Resumimos la información sobre este tipo de hogares y luego la información sobre todos los hogares que no cumplen con esta condición. Utilizamos las variables de hogares más importantes.

II7 = Regimen de tenencia. Lo codificamos previamente para que sea “Tenencia” IPCF = Ingreso per cápita familiar Hacinamiento IX_TOT

summary(eph_hogar_18_29_independientes$IPCF)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0  250000  521250  634710  837500 3156000
summary(eph_hogar_18_29_independientes$IX_TOT)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   1.000   1.543   2.000   3.000
agrupado <- group_by(eph_hogar_18_29_independientes, Tenencia)
sumario <- summarise(agrupado, Cantidad = n())
sumario
## # A tibble: 5 × 2
##   Tenencia                                      Cantidad
##   <chr>                                            <int>
## 1 "Inquilino / arrendatario de la vivienda"           32
## 2 "Ocupante gratuito (con permiso)"                    3
## 3 "Ocupante por pago de\n impuestos / expensas"        4
## 4 "Propietario de la vivienda solamente"               2
## 5 "Propietario vivienda y terreno"                     5
agrupado <- group_by(eph_hogar_18_29_independientes, hogar)
sumario <- summarise(agrupado, Cantidad = n())
sumario
## # A tibble: 3 × 2
##   hogar                 Cantidad
##   <chr>                    <int>
## 1 Pareja_adultos_amigxs       19
## 2 Pareja_hijx                  3
## 3 unipersonal                 24
p <- eph_hogar_18_29_independientes %>%
  ggplot( aes(x=IPCF, fill= hogar)) +
    geom_density( color="#e9ecef", alpha=0.6, position = 'identity') +
    scale_fill_manual(values=c("#69b3a2", "#404080", "#81c33b")) +
    labs(fill="")
p

p2 <- eph_hogar_18_29_independientes %>%
  ggplot( aes(x=ITF, fill= hogar)) +
    geom_density( color="#e9ecef", alpha=0.6, position = 'identity') +
    scale_fill_manual(values=c("#69b3a2", "#404080", "#81c33b"))  +
    labs(fill="")

p2

eph_hogar_18_29_independientes_individuos= subset(eph_hogar_18_29_independientes_individuos, !(CODUSU %in% eph_hogar_18_29_no_independientes_individuos$CODUSU))

eph_hogar_18_29_independientes_individuos = eph_hogar_18_29_independientes_individuos%>% filter (P47T >1,  CH06 < 30)
summary(eph_hogar_18_29_independientes_individuos$P47T)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   40000  325000  507500  679750  812500 3156000

Ahora vamos a analizar los hogares que tienen la población objetivo. Es decir, aquellos hogares multifamiliares en los que se encuentran personas de 18 a 29 y comprender su situación específica.

El primer paso es filtrar por los hogares que tiene nuestra población objetivo. Entonces tomamos los individuos de la EPH individual que tienen esa edad, afortunadamente ya lo hemos hecho antes.

eph_hogar_18_29 =   subset(eph_hogar, CODUSU %in% EPH_CABA_18_29$CODUSU)

Al primer filtro le sustraigo aquellos hogares que ya se que no forman parte, es decir, realizamos el proceso inverso de la última iteracón.

eph_hogar_18_29_dependientes =  subset(eph_hogar_18_29, !(CODUSU %in% eph_hogar_18_29_independientes$CODUSU))

Queremos conocer los ingresos individuales de las personas jóvenes que están en hogares donde se podrían independizar. Entonces nuevamente cruzamos las bases de datos

eph_individuos_18_29_dependientes = subset(eph, CODUSU %in% eph_hogar_18_29_dependientes$CODUSU) %>% filter(CH06 %in% (18:29),P47T> -1 )

eph_individuos_24_29_dependientes = subset(eph, CODUSU %in% eph_hogar_18_29_dependientes$CODUSU) %>% filter(CH06 %in% (24:29),P47T> -1 )
summary(eph_individuos_24_29_dependientes$P47T)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0  310000  485000  554032  760000 1750000

Analizamos entonces los diferentes grupos de señalados Jóvenes de entre 18 y 29 años 1) Aquellos que se han logrado independizar 2) Aquellos que aún no lo han logrado 3) Todos juntos

eph_hogar_18_29_independientes_individuos = filter(eph_hogar_18_29_independientes_individuos,P47T> -1)

"Ingresos"
## [1] "Ingresos"
"Individuos independientes"
## [1] "Individuos independientes"
summary(eph_hogar_18_29_independientes_individuos$P47T)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   40000  325000  507500  679750  812500 3156000
"Individuos dependientes"
## [1] "Individuos dependientes"
summary(eph_individuos_18_29_dependientes$P47T)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0   65000  300000  346667  505000 1750000
"Total individuos"
## [1] "Total individuos"
summary(EPH_CABA_18_29$P47T)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   15000  200000  412000  548052  750000 3156000
"Edad"
## [1] "Edad"
"Individuos independientes"
## [1] "Individuos independientes"
summary(eph_hogar_18_29_independientes_individuos$CH06)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   18.00   24.00   26.00   25.32   28.00   29.00
"Individuos dependientes"
## [1] "Individuos dependientes"
summary(eph_individuos_18_29_dependientes$CH06)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   18.00   20.00   23.00   22.99   26.00   29.00
"Total individuos"
## [1] "Total individuos"
summary(EPH_CABA_18_29$CH06)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    18.0    21.0    25.0    24.4    27.0    29.0
ggplot()+ 
    geom_histogram(data = filter(eph_individuos_18_29_dependientes, P47T < 300000), aes(x=P47T) , fill="#FFFF00", alpha=1)  + labs(title = "Ingresos según condición de hogar jóven", caption = "Verde = Hogares independientes, Amarillo = Hogares dependientes") + 
    geom_histogram (data = filter ( eph_hogar_18_29_independientes_individuos, P47T < 300000), aes(x=P47T), fill="#0A7029" , alpha=0.7)  + scale_x_continuous(labels = scales::comma)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot()+ 
    geom_histogram(data =eph_individuos_18_29_dependientes, aes(x=CH06) , fill="#FFFF00", alpha=0.8)  +
    geom_histogram(data =eph_hogar_18_29_independientes_individuos, aes(x=CH06), fill="#0A7029" , alpha=0.6) + labs(title = "Edad según condición de hogar jóven", caption = "Verde = Hogares independientes, Amarillo = Hogares dependientes", x ="Edad")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Esto nos da una mirada bastante clara de la diferencia de ingresos necesarias para poder independizarse. Pero si profundizamos sobre aquellos jovenes independizados en pareja y en hogares unipersonales?

eph_hogar_18_29_independientes_Unipersonal = eph_hogar_18_29_independientes %>% filter(hogar =="unipersonal")
eph_hogar_18_29_independientes_Pareja_amigxs = eph_hogar_18_29_independientes %>% filter(hogar =="Pareja_adultos_amigxs")

eph_individuos_18_29_independientes_Unipersonal =  subset(eph, CODUSU %in% eph_hogar_18_29_independientes_Unipersonal$CODUSU)
eph_individuos_18_29_independientes_Pareja_amigxs =  subset(eph, CODUSU %in% eph_hogar_18_29_independientes_Pareja_amigxs$CODUSU)

eph_individuos_18_29_independientes_Pareja_amigxs = eph_individuos_18_29_independientes_Pareja_amigxs %>% filter(P47T> -1)

summary(eph_individuos_18_29_independientes_Unipersonal$P47T)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  100000  250000  456000  698458  925000 3156000
summary(eph_individuos_18_29_independientes_Pareja_amigxs$P47T)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0  357500  532500  710722  862500 2500000
summary(eph_individuos_18_29_dependientes$P47T)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0   65000  300000  346667  505000 1750000
nivel_educativo <- calculate_tabulates(base = eph_CABA,
                    x = 'CAT_OCUP', y = 'NIVEL_ED',
                    weights = 'PONDERA', add.totals = 'row',
                    add.percentage = 'col', 
                    digits = 1)
eph_hogar  =  eph_hogar  %>% mutate(hogar = case_when(IX_TOT  == 1 ~ 'unipersonal', IX_TOT  == 2 & IX_MEN10 == 1 ~ 'Monoparental', IX_TOT  == 3 & IX_MEN10 == 1 ~ 'Pareja_hijx', IX_TOT  == 2 & IX_MEN10 == 0 ~ 'Pareja_adultos_amigxs' ,   IX_TOT  == 4 & IX_MEN10 == 2 ~ 'Pareja_2hijxs', TRUE  ~ "Multipersonal" ))
eph_CABA_hogar = eph_hogar  %>% filter(AGLOMERADO == 32)
result <- eph_CABA_hogar %>%
  group_by(hogar) %>%
  summarise(count = n())

# View the result
print(result) 
## # A tibble: 6 × 2
##   hogar                 count
##   <chr>                 <int>
## 1 Monoparental              6
## 2 Multipersonal           170
## 3 Pareja_2hijxs             9
## 4 Pareja_adultos_amigxs   193
## 5 Pareja_hijx              20
## 6 unipersonal             235
library(dplyr)

# Filter, group, and summarize for composition combinations
composition_combinations <- eph_CABA_hogar %>%
  filter(hogar == "Multipersonal") %>%  # Filter for multipersonal households
  group_by(IX_TOT, IX_MEN10) %>%        # Group by total people and children under 10
  summarise(count = n(), .groups = "drop") %>%  # Count occurrences
  arrange(desc(count))                  # Arrange by the most common combinations

# View the result
print(composition_combinations)
## # A tibble: 12 × 3
##    IX_TOT IX_MEN10 count
##     <dbl>    <dbl> <int>
##  1      3        0    70
##  2      4        0    46
##  3      4        1    22
##  4      5        0     9
##  5      5        1     6
##  6      5        2     4
##  7      6        0     4
##  8      6        2     3
##  9      6        1     2
## 10      7        0     2
## 11      3        2     1
## 12     10        1     1

Vamos a ir con los hogares de 4 personas

eph_CABA_hogar_4pax =  eph_CABA_hogar %>% filter(IX_TOT == 4)
# Create ITF deciles using ntile
eph_CABA_hogar_4pax <- eph_CABA_hogar_4pax %>%
  mutate(ITF_decile = ntile(ITF, 10))

# View the updated data frame
print(head(eph_CABA_hogar_4pax))
## # A tibble: 6 × 93
##   CODUSU    ANO4 TRIMESTRE NRO_HOGAR REALIZADA REGION MAS_500 AGLOMERADO PONDERA
##   <chr>    <dbl>     <dbl>     <dbl>     <dbl>  <dbl> <chr>        <dbl>   <dbl>
## 1 TQRMNOQ…  2024         2         1         1      1 S               32    4909
## 2 TQRMNOP…  2024         2         1         1      1 S               32    1612
## 3 TQRMNOP…  2024         2         1         1      1 S               32    1451
## 4 TQRMNOQ…  2024         2         1         1      1 S               32    1057
## 5 TQRMNOQ…  2024         2         1         1      1 S               32    2805
## 6 TQRMNOR…  2024         2         1         1      1 S               32    1802
## # ℹ 84 more variables: IV1 <dbl>, IV1_ESP <lgl>, IV2 <dbl>, IV3 <dbl>,
## #   IV3_ESP <chr>, IV4 <dbl>, IV5 <dbl>, IV6 <dbl>, IV7 <dbl>, IV7_ESP <chr>,
## #   IV8 <dbl>, IV9 <dbl>, IV10 <dbl>, IV11 <dbl>, IV12_1 <dbl>, IV12_2 <dbl>,
## #   IV12_3 <dbl>, II1 <dbl>, II2 <dbl>, II3 <dbl>, II3_1 <dbl>, II4_1 <dbl>,
## #   II4_2 <dbl>, II4_3 <dbl>, II5 <dbl>, II5_1 <dbl>, II6 <dbl>, II6_1 <dbl>,
## #   II7 <dbl>, II7_ESP <chr>, II8 <dbl>, II8_ESP <chr>, II9 <dbl>, V1 <dbl>,
## #   V2 <dbl>, V21 <dbl>, V22 <dbl>, V3 <dbl>, V4 <dbl>, V5 <dbl>, V6 <dbl>, …
library(dplyr)

# Create ITF deciles
eph_CABA_hogar_4pax <- eph_CABA_hogar_4pax %>%
  mutate(ITF_decile = ntile(ITF, 10))

# Summarize income for each decile
income_by_decile <- eph_CABA_hogar_4pax %>%
  group_by(ITF_decile) %>%
  summarise(
    min_income = min(ITF, na.rm = TRUE),
    max_income = max(ITF, na.rm = TRUE),
    mean_income = mean(ITF, na.rm = TRUE),
    .groups = 'drop'
  )

# View the result
print(income_by_decile)
## # A tibble: 10 × 4
##    ITF_decile min_income max_income mean_income
##         <int>      <dbl>      <dbl>       <dbl>
##  1          1          0          0          0 
##  2          2          0          0          0 
##  3          3          0          0          0 
##  4          4          0          0          0 
##  5          5          0     649000     255250 
##  6          6     692000     900000     836500 
##  7          7     900000    1185000    1040875 
##  8          8    1190000    1430000    1295143.
##  9          9    1460000    2250000    1864286.
## 10         10    2600000    6698000    4184714.
library(dplyr)

# Create ITF deciles
eph_CABA_hogar_4pax <- eph_CABA_hogar_4pax %>%
  mutate(ITF_decile = ntile(ITF, 10))

# Summarize income for each decile
income_by_decile <- eph_CABA_hogar_4pax %>%
  group_by(ITF_decile) %>%
  summarise(
    ITF_decile = first(ITF_decile),
    min_income = min(ITF, na.rm = TRUE),
    max_income = max(ITF, na.rm = TRUE),
    median_income = median(ITF, na.rm = TRUE),
    .groups = 'drop'
  ) %>%
  arrange(ITF_decile)  # Ensure rows are sorted by decile

# View the result
print(income_by_decile)
## # A tibble: 10 × 4
##    ITF_decile min_income max_income median_income
##         <int>      <dbl>      <dbl>         <dbl>
##  1          1          0          0             0
##  2          2          0          0             0
##  3          3          0          0             0
##  4          4          0          0             0
##  5          5          0     649000        250000
##  6          6     692000     900000        850000
##  7          7     900000    1185000       1014500
##  8          8    1190000    1430000       1300000
##  9          9    1460000    2250000       1925000
## 10         10    2600000    6698000       3350000
library(raster)
library(dplyr)

# Assuming VUT is your raster object and eph_CABA_hogar_4pax is your data frame with the ITF column

# Step 1: Calculate ITF deciles and median income for each decile
eph_CABA_hogar_4pax <- eph_CABA_hogar_4pax %>%
  mutate(ITF_decile = ntile(ITF, 10))

# Calculate median income for each decile
median_by_decile <- eph_CABA_hogar_4pax %>%
  group_by(ITF_decile) %>%
  summarise(median_income = median(ITF, na.rm = TRUE)) %>%
  ungroup() %>% mutate(asequibilidad = median_income * 0.35)

print(median_by_decile)
## # A tibble: 10 × 3
##    ITF_decile median_income asequibilidad
##         <int>         <dbl>         <dbl>
##  1          1             0             0
##  2          2             0             0
##  3          3             0             0
##  4          4             0             0
##  5          5        250000         87500
##  6          6        850000        297500
##  7          7       1014500        355075
##  8          8       1300000        455000
##  9          9       1925000        673750
## 10         10       3350000       1172500
library(terra)
## Warning: package 'terra' was built under R version 4.3.3
## terra 1.7.78
## 
## Attaching package: 'terra'
## The following object is masked from 'package:tidyr':
## 
##     extract
library(dplyr)

# Step 1: Load the raster using terra
VUT <- rast("C:\\Users\\sixto\\OneDrive\\Documentos\\Papers\\accesibilidad_alquileres\\vut.tif")
library(terra)

# Load the raster
VUT <- rast("C:\\Users\\sixto\\OneDrive\\Documentos\\Papers\\accesibilidad_alquileres\\vut.tif")

# Check the current CRS
print(crs(VUT))
## [1] "PROJCRS[\"WGS 84 / Pseudo-Mercator\",\n    BASEGEOGCRS[\"WGS 84\",\n        DATUM[\"World Geodetic System 1984\",\n            ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n                LENGTHUNIT[\"metre\",1]]],\n        PRIMEM[\"Greenwich\",0,\n            ANGLEUNIT[\"degree\",0.0174532925199433]],\n        ID[\"EPSG\",4326]],\n    CONVERSION[\"unnamed\",\n        METHOD[\"Popular Visualisation Pseudo Mercator\",\n            ID[\"EPSG\",1024]],\n        PARAMETER[\"Latitude of natural origin\",0,\n            ANGLEUNIT[\"degree\",0.0174532925199433],\n            ID[\"EPSG\",8801]],\n        PARAMETER[\"Longitude of natural origin\",0,\n            ANGLEUNIT[\"degree\",0.0174532925199433],\n            ID[\"EPSG\",8802]],\n        PARAMETER[\"False easting\",0,\n            LENGTHUNIT[\"metre\",1],\n            ID[\"EPSG\",8806]],\n        PARAMETER[\"False northing\",0,\n            LENGTHUNIT[\"metre\",1],\n            ID[\"EPSG\",8807]]],\n    CS[Cartesian,2],\n        AXIS[\"easting\",east,\n            ORDER[1],\n            LENGTHUNIT[\"metre\",1,\n                ID[\"EPSG\",9001]]],\n        AXIS[\"northing\",north,\n            ORDER[2],\n            LENGTHUNIT[\"metre\",1,\n                ID[\"EPSG\",9001]]]]"
# If the CRS is missing, assign the correct one
# (Replace "EPSG:4326" with the appropriate code for your data)
crs(VUT) <- "EPSG:4326"

# Now you can compute the cell sizes using the projected raster
cell_areas <- cellSize(VUT)

# Continue with your analysis using VUT_proj and cell_areas
plot(VUT$lyr1)
## Warning: [is.lonlat] coordinates are out of range for lon/lat

# Step 2: Extract raster data into a data.frame
raster_data <- as.data.frame(VUT, xy = TRUE)  # 'xy = TRUE' keeps coordinates
 raster_data = raster_data %>% mutate(Valor_Total = lyr1 * 50)
# Replicate the 'asequibilidad' values for each row in 'raster_data' based on deciles
raster_data <- raster_data %>%
  mutate(
    decil1 = median_by_decile$median_income[1],
    decil2 = median_by_decile$median_income[2],
    decil3 = median_by_decile$median_income[3],
    decil4 = median_by_decile$median_income[4],
    decil5 = median_by_decile$median_income[5],
    decil6 = median_by_decile$median_income[6],
    decil7 = median_by_decile$median_income[7],
    decil8 = median_by_decile$median_income[8],
    decil9 = median_by_decile$median_income[9],
    decil10 = median_by_decile$median_income[10]
  )
# Create new columns that calculate decil1/Valor_Total, decil2/Valor_Total, ..., decil10/Valor_Total
raster_data <- raster_data %>%
  mutate(
    decil1_ratio = Valor_Total / decil1,
    decil2_ratio = Valor_Total / decil2,
    decil3_ratio = Valor_Total / decil3,
    decil4_ratio = Valor_Total / decil4,
    decil5_ratio = Valor_Total / decil5,
    decil6_ratio = Valor_Total / decil6,
    decil7_ratio = Valor_Total / decil7,
    decil8_ratio = Valor_Total / decil8,
    decil9_ratio = Valor_Total / decil9,
    decil10_ratio = Valor_Total / decil10
  )
# Load the terra package
library(terra)

# Function to convert a data.frame column to a SpatRaster
create_raster_from_df <- function(df, col_name) {
  # Create a SpatRaster from the data frame, using x and y for coordinates
  raster_obj <- rast(df[, c("x", "y", col_name)])
  return(raster_obj)
}

# Create a SpatRaster for each decile ratio
raster_decil1_ratio <- create_raster_from_df(raster_data, "decil1_ratio")
raster_decil2_ratio <- create_raster_from_df(raster_data, "decil2_ratio")
raster_decil3_ratio <- create_raster_from_df(raster_data, "decil3_ratio")
raster_decil4_ratio <- create_raster_from_df(raster_data, "decil4_ratio")
raster_decil5_ratio <- create_raster_from_df(raster_data, "decil5_ratio")
raster_decil6_ratio <- create_raster_from_df(raster_data, "decil6_ratio")
raster_decil7_ratio <- create_raster_from_df(raster_data, "decil7_ratio")
raster_decil8_ratio <- create_raster_from_df(raster_data, "decil8_ratio")
raster_decil9_ratio <- create_raster_from_df(raster_data, "decil9_ratio")
raster_decil10_ratio <- create_raster_from_df(raster_data, "decil10_ratio")
# Now you have rasters for each decile ratio, for example:
plot(raster_decil1_ratio)  # Visualize the decil1_ratio raster
plot(raster_decil2_ratio)  # Visualize the decil1_ratio raster

plot(raster_decil3_ratio)  # Visualize the decil1_ratio raster
plot(raster_decil4_ratio)  # Visualize the decil1_ratio raster
plot(raster_decil5_ratio)  # Visualize the decil1_ratio raster

plot(raster_decil6_ratio)  # Visualize the decil1_ratio raster

plot(raster_decil7_ratio)  # Visualize the decil1_ratio raster

plot(raster_decil8_ratio)  # Visualize the decil1_ratio raster

plot(raster_decil9_ratio)  # Visualize the decil1_ratio raster

plot(raster_decil10_ratio)  # Visualize the decil1_ratio raster

# Define a color palette and breaks for the values
breaks <- c(0, 0.35, max(c(raster_decil1_ratio[], raster_decil2_ratio[], raster_decil3_ratio[], raster_decil4_ratio[], 
                           raster_decil5_ratio[], raster_decil6_ratio[], raster_decil7_ratio[], raster_decil8_ratio[], 
                           raster_decil9_ratio[], raster_decil10_ratio[])))
colors <- c("blue", "white", "red")

# Plot the rasters with consistent color scale and a break at 0.35
par(mfrow=c(2,5))  # To display 10 plots in a 2x5 grid

plot(raster_decil1_ratio, col=colors, breaks=breaks, main="Decil 1 Ratio")
plot(raster_decil2_ratio, col=colors, breaks=breaks, main="Decil 2 Ratio")
plot(raster_decil3_ratio, col=colors, breaks=breaks, main="Decil 3 Ratio")
plot(raster_decil4_ratio, col=colors, breaks=breaks, main="Decil 4 Ratio")
plot(raster_decil5_ratio, col=colors, breaks=breaks, main="Decil 5 Ratio")
plot(raster_decil6_ratio, col=colors, breaks=breaks, main="Decil 6 Ratio")
plot(raster_decil7_ratio, col=colors, breaks=breaks, main="Decil 7 Ratio")
plot(raster_decil8_ratio, col=colors, breaks=breaks, main="Decil 8 Ratio")
plot(raster_decil9_ratio, col=colors, breaks=breaks, main="Decil 9 Ratio")
plot(raster_decil10_ratio, col=colors, breaks=breaks, main="Decil 10 Ratio")

library(terra)

# Store all decile rasters in a list
raster_list <- list(raster_decil1_ratio, raster_decil2_ratio, raster_decil3_ratio, 
                    raster_decil4_ratio, raster_decil5_ratio, raster_decil6_ratio, 
                    raster_decil7_ratio, raster_decil8_ratio, raster_decil9_ratio, 
                    raster_decil10_ratio)

# Get the resolution (area of one pixel)
pixel_area <- prod(res(raster_list[[1]]))  # Assuming all rasters have the same resolution

# Initialize an empty list to store results
area_results <- vector("list", length(raster_list))

# Loop through each raster and calculate area
for (i in seq_along(raster_list)) {
  raster_values <- values(raster_list[[i]])  # Extract values
  raster_values <- raster_values[!is.na(raster_values)]  # Remove NA values
  
  # Calculate area in each category
  area_0_35 <- sum(raster_values <= 0.35) * pixel_area
  area_greater_35 <- sum(raster_values > 0.35) * pixel_area
  total_area <- area_0_35 + area_greater_35
  
  # Store results
  area_results[[i]] <- c(Decile = paste("Decile", i), 
                          Area_0_35 = area_0_35, 
                          Area_greater_35 = area_greater_35, 
                          Total_Area = total_area)
}

# Convert list to a data frame
area_results <- do.call(rbind, area_results)
area_results <- as.data.frame(area_results)

# Print the final summary table
print(area_results)
##       Decile Area_0_35 Area_greater_35 Total_Area
## 1   Decile 1         0       299585700  299585700
## 2   Decile 2         0       299585700  299585700
## 3   Decile 3         0       299585700  299585700
## 4   Decile 4         0       299585700  299585700
## 5   Decile 5         0       299585700  299585700
## 6   Decile 6   5680800       293904900  299585700
## 7   Decile 7   6621700       292964000  299585700
## 8   Decile 8  33173300       266412400  299585700
## 9   Decile 9 184013900       115571800  299585700
## 10 Decile 10 292257600         7328100  299585700
# Ensure your deciles are in the correct order (if they are not already)
# Here we assume that Decile 1, 2, ..., 10 are in order.
area_results <- area_results[order(as.numeric(gsub("Decile ", "", area_results$Decile))), ]

# Calculate the cumulative area for the variable (Area_0_35) of interest
cumulative_area <- cumsum(area_results$Area_0_35)

# Compute the total area
total_area <- sum(as.numeric(area_results$Area_0_35))

# Calculate the cumulative share (frequency)
cumulative_share <- cumulative_area / total_area


# Create a data frame with the cumulative share in percentage format (up to 2 decimals)
cumulative_df <- data.frame(
  Decile = area_results$Decile,
  Cumulative_Area = cumulative_area,
  Cumulative_Share = paste0(round(cumulative_share * 100, 2), "%")
)

print(cumulative_df)
##       Decile Cumulative_Area Cumulative_Share
## 1   Decile 1               0               0%
## 2   Decile 2               0               0%
## 3   Decile 3               0               0%
## 4   Decile 4               0               0%
## 5   Decile 5               0               0%
## 6   Decile 6         5680800            1.09%
## 7   Decile 7        12302500            2.36%
## 8   Decile 8        45475800            8.72%
## 9   Decile 9       229489700           43.98%
## 10 Decile 10       521747300             100%
library(ineq)
lc <- Lc(area_results$Area_0_35)
plot(lc, 
     main = "Lorenz Curve for Area_0_35", 
     xlab = "Cumulative share of deciles", 
     ylab = "Cumulative share of Area_0_35", 
     col = "blue", lwd = 2)
abline(0, 1, col = "red", lty = 2)

gini_coefficient <- Gini(area_results$Area_0_35)
cat("Gini coefficient:", round(gini_coefficient, 3), "\n")
## Gini coefficient: 0.788