library(sf)
## Linking to GEOS 3.11.2, GDAL 3.6.2, PROJ 9.2.0; sf_use_s2() is TRUE
library(ggplot2)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ stringr 1.5.0
## ✔ forcats 1.0.0 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(mapview)
## Warning: package 'mapview' was built under R version 4.3.3
library(eph)
## Warning: package 'eph' was built under R version 4.3.3
library(DT)
## Warning: package 'DT' was built under R version 4.3.2
library(spdep)
## Warning: package 'spdep' was built under R version 4.3.3
## Loading required package: spData
## Warning: package 'spData' was built under R version 4.3.3
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
s2::s2_options(model = "closed") # You can try "open" or "semi-open" as well if needed
## $model
## [1] 3
##
## $snap
## list()
## attr(,"class")
## [1] "snap_identity"
##
## $snap_radius
## [1] -1
##
## $duplicate_edges
## [1] FALSE
##
## $edge_type
## [1] 1
##
## $validate
## [1] FALSE
##
## $polyline_type
## [1] 1
##
## $polyline_sibling_pairs
## [1] 2
##
## $simplify_edge_chains
## [1] FALSE
##
## $split_crossing_edges
## [1] FALSE
##
## $idempotent
## [1] FALSE
##
## $dimensions
## [1] 1 2 3
##
## attr(,"class")
## [1] "s2_options"
# Read the shapefile
alquileres <- st_read("C:/Users/sixto/OneDrive/Documentos/Papers/accesibilidad_alquileres/202412/alquileres_CABA_2024-12.shp")
## Reading layer `alquileres_CABA_2024-12' from data source
## `C:\Users\sixto\OneDrive\Documentos\Papers\accesibilidad_alquileres\202412\alquileres_CABA_2024-12.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 3612 features and 8 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -58.53591 ymin: -34.75753 xmax: -58.36092 ymax: -34.49665
## Geodetic CRS: WGS 84
# Print a summary of the data
Sumamos precio por metro cuadrado
alquileres = alquileres %>% mutate(pm2 = as.numeric(precio) / as.numeric(sp_cbrt)) %>% mutate(antig = as.numeric(antig))
## Warning: There was 1 warning in `stopifnot()`.
## ℹ In argument: `pm2 = as.numeric(precio)/as.numeric(sp_cbrt)`.
## Caused by warning:
## ! NAs introducidos por coerción
## Warning: There was 1 warning in `stopifnot()`.
## ℹ In argument: `antig = as.numeric(antig)`.
## Caused by warning:
## ! NAs introducidos por coerción
# Load barrios as context
barrios <- st_read("https://cdn.buenosaires.gob.ar/datosabiertos/datasets/ministerio-de-educacion/barrios/barrios.geojson")
## Reading layer `barrios' from data source
## `https://cdn.buenosaires.gob.ar/datosabiertos/datasets/ministerio-de-educacion/barrios/barrios.geojson'
## using driver `GeoJSON'
## Simple feature collection with 48 features and 6 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -58.53152 ymin: -34.70529 xmax: -58.33516 ymax: -34.52649
## Geodetic CRS: WGS 84
alquileres = st_intersection(alquileres, barrios)
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
mapview(filter(alquileres, pm2 <20000), zcol = "pm2")
mapview(filter(alquileres, antig <80), zcol = "antig")
summary(alquileres$antig)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 10.00 30.00 28.97 45.00 100.00 812
Ahora vamos a calcularle las distancias
Ahora igual quiero ver como son los censos a ver que paso entre 2022 y 2010
shapefiles_dir <- "C:/Users/sixto/OneDrive/Documentos/GCBA/Radios censales 2022"
viviendas_2022<- st_read(file.path(shapefiles_dir, "Radios_CABA_4._Viviendas_2022.shp"))
## Reading layer `Radios_CABA_4._Viviendas_2022' from data source
## `C:\Users\sixto\OneDrive\Documentos\GCBA\Radios censales 2022\Radios_CABA_4._Viviendas_2022.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 3820 features and 51 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 93798.6 ymin: 91566.94 xmax: 111752.1 ymax: 111401.1
## Projected CRS: Argentina_GKBsAs
hogares_2022 <- st_read(file.path(shapefiles_dir, "Radios_CABA_5._Hogares_2022.shp"))
## Reading layer `Radios_CABA_5._Hogares_2022' from data source
## `C:\Users\sixto\OneDrive\Documentos\GCBA\Radios censales 2022\Radios_CABA_5._Hogares_2022.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 3820 features and 174 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 93798.6 ymin: 91566.94 xmax: 111752.1 ymax: 111401.1
## Projected CRS: Argentina_GKBsAs
poblacion_2022 <- st_read(file.path(shapefiles_dir, "Radios_CABA_6._Poblacion_2022.shp"))
## Reading layer `Radios_CABA_6._Poblacion_2022' from data source
## `C:\Users\sixto\OneDrive\Documentos\GCBA\Radios censales 2022\Radios_CABA_6._Poblacion_2022.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 3820 features and 379 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 93798.6 ymin: 91566.94 xmax: 111752.1 ymax: 111401.1
## Projected CRS: Argentina_GKBsAs
mean(hogares_2022$NBI_Z_SI)
## [1] 5.157259
sd(hogares_2022$NBI_Z_SI)
## [1] 6.630787
mapview(filter(hogares_2022,NBI_Z_SI >20 ), zcol = "NBI_Z_SI", lwd = 0)
censo_2010 = st_read("https://cdn.buenosaires.gob.ar/datosabiertos/datasets/direccion-general-de-estadisticas-y-censos/informacion-censal-por-radio/caba_radios_censales.geojson")
## Reading layer `informacion_censal_por_radio_wgs84' from data source
## `https://cdn.buenosaires.gob.ar/datosabiertos/datasets/direccion-general-de-estadisticas-y-censos/informacion-censal-por-radio/caba_radios_censales.geojson'
## using driver `GeoJSON'
## Simple feature collection with 3554 features and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -58.53152 ymin: -34.70529 xmax: -58.33514 ymax: -34.52755
## Geodetic CRS: WGS 84
censo_2010 = censo_2010 %>% mutate(NBI_Z_SI = as.numeric(H_CON_NBI) /as.numeric(T_HOGAR) *100)
mapview(filter(hogares_2022,NBI_Z_SI >20 ), zcol = "NBI_Z_SI", lwd = 0)
mapview(filter(censo_2010,NBI_Z_SI >20 ), zcol = "NBI_Z_SI", lwd = 0)
Vamos a ser los Test de Moran de CABA
censo_2010 <- censo_2010 %>% filter(NBI_Z_SI >0)
# Create spatial weights matrix based on polygon contiguity
neighbors <- poly2nb(censo_2010)
weights <- nb2listw(neighbors, style = "W", zero.policy = TRUE)
# Calculate spatial lag for Moran's diagram
censo_2010$spatial_lag <- lag.listw(weights, censo_2010$NBI_Z_SI, zero.policy = TRUE)
# Create Moran scatterplot using ggplot2
ggplot(censo_2010, aes(x = NBI_Z_SI, y = spatial_lag)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
labs(
title = "Moran Scatterplot",
x = "NBI_Z_SI",
y = "Spatial Lag of NBI_Z_SI"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
# Calculate Moran's I
moran_test <- moran.test(censo_2010$NBI_Z_SI, weights, zero.policy = TRUE)
print(moran_test)
##
## Moran I test under randomisation
##
## data: censo_2010$NBI_Z_SI
## weights: weights
##
## Moran I statistic standard deviate = 53.492, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.5413396639 -0.0003069368 0.0001025321
# Filter out rows where NBI_Z_SI is less than or equal to 0
hogares_2022 <- hogares_2022 %>% filter(NBI_Z_SI > 0)
# Check for non-finite values in NBI_Z_SI
if (any(!is.finite(hogares_2022$NBI_Z_SI))) {
stop("Non-finite values found in NBI_Z_SI. Please clean the data.")
}
# Create spatial weights matrix based on polygon contiguity
neighbors <- poly2nb(hogares_2022) # Contiguity-based neighbors
weights <- nb2listw(neighbors, style = "W", zero.policy = TRUE) # Spatial weights
# Calculate spatial lag for Moran's diagram
hogares_2022$spatial_lag <- lag.listw(weights, hogares_2022$NBI_Z_SI, zero.policy = TRUE)
# Create Moran scatterplot using ggplot2
ggplot(hogares_2022, aes(x = NBI_Z_SI, y = spatial_lag)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
labs(
title = "Moran Scatterplot",
x = "NBI_Z_SI",
y = "Spatial Lag of NBI_Z_SI"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
# Calculate Moran's I
moran_test <- moran.test(hogares_2022$NBI_Z_SI, weights, zero.policy = TRUE)
print(moran_test)
##
## Moran I test under randomisation
##
## data: hogares_2022$NBI_Z_SI
## weights: weights
##
## Moran I statistic standard deviate = 61.892, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.5647867903 -0.0002690342 0.0000833509
Vamos a comparar las poblaciones
library(raster)
## Loading required package: sp
##
## Attaching package: 'raster'
## The following object is masked from 'package:dplyr':
##
## select
# Load the .tif file
VUT <- raster("C:/Users/sixto/OneDrive/Documentos/Papers/accesibilidad_alquileres/vut.tif")
# Check the loaded raster
print(VUT)
## class : RasterLayer
## dimensions : 2525, 2285, 5769625 (nrow, ncol, ncell)
## resolution : 10, 10 (x, y)
## extent : -6516190, -6493340, -4124477, -4099227 (xmin, xmax, ymin, ymax)
## crs : +proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 +units=m +nadgrids=@null +wktext +no_defs
## source : vut.tif
## names : lyr1
## values : 2700, 34664.12 (min, max)
eph_data <- readxl::read_excel("C:/Users/sixto/OneDrive/Documentos/Papers/accesibilidad_alquileres/usu_individual_T224.xlsx", na = ".")
eph <- dplyr::filter(eph_data, CH12 < 9)
eph = eph %>% mutate(genero = case_when(CH04 == 1 ~ 'masculino', CH04 == 2 ~ "femenino"))
eph_AMBA = eph %>% filter(AGLOMERADO == 32 | AGLOMERADO == 33 )
eph_CABA = eph %>% filter(AGLOMERADO == 32)
EPH_CABA_18_24 = eph %>% filter(CH06 %in% (18:24),P47T !=-9, AGLOMERADO == 32 )
EPH_CABA_25_29 = eph %>% filter(CH06 %in% (25:29) ,P47T!=-9, AGLOMERADO == 32)
EPH_CABA_18_29 = eph %>% filter(CH06 %in% (18:29) ,P47T!=-9, AGLOMERADO == 32)
EPH_AMBA_18_24 = eph_AMBA %>% filter(CH06 %in% (18:24),P47T !=-9 )
EPH_AMBA_25_29 = eph_AMBA %>% filter(CH06 %in% (25:29) ,P47T!=-9)
EPH_AMBA_18_29 = eph_AMBA %>% filter(CH06 %in% (18:29) ,P47T!=-9)
print("EPH_CABA_18_24")
## [1] "EPH_CABA_18_24"
summary(EPH_CABA_18_24$P47T)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 158891 200000 3156000
print("EPH_CABA_25_29")
## [1] "EPH_CABA_25_29"
summary(EPH_CABA_25_29$P47T)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 305000 500000 609829 837500 2500000
print("EPH_CABA_18_29")
## [1] "EPH_CABA_18_29"
summary(EPH_CABA_18_29$P47T)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 145000 318313 500000 3156000
print("EPH_AMBA_18_24")
## [1] "EPH_AMBA_18_24"
summary(EPH_AMBA_18_24$P47T)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 0 0 138284 200000 3156000
print("EPH_AMBA_25_29")
## [1] "EPH_AMBA_25_29"
summary(EPH_AMBA_25_29$P47T)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 19500 300000 358866 520000 2500000
ggplot(EPH_CABA_18_24, aes(x=CH06, y=P47T, color=genero)) +
geom_point(size=5) + labs(x = "Edad", y = "Ingresos ($)", color = "Genero", title = "", caption = "Fuente: EPH - 2° trimestre 2021")
ggplot(EPH_CABA_25_29, aes(x=CH06, y=P47T, color=genero)) +
geom_point(size=5) +
theme_classic() + labs(x = "Edad", y = "Ingresos ($)", color = "Genero", title = "", caption = "Fuente: EPH - 2° trimestre 2021") +
ylim(0, 100000)
## Warning: Removed 59 rows containing missing values (`geom_point()`).
modelo_exp_multiple <- lm(P47T ~ CH06 + genero + CH12, data = eph_CABA)
summary(modelo_exp_multiple)
##
## Call:
## lm(formula = P47T ~ CH06 + genero + CH12, data = eph_CABA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -838918 -376067 -123737 170400 6702721
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -274637.2 55189.1 -4.976 7.30e-07 ***
## CH06 2784.0 789.6 3.526 0.000436 ***
## generomasculino 144191.6 34105.9 4.228 2.52e-05 ***
## CH12 90893.7 8935.2 10.173 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 631100 on 1378 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.1026, Adjusted R-squared: 0.1006
## F-statistic: 52.5 on 3 and 1378 DF, p-value: < 2.2e-16
p <- EPH_AMBA_25_29 %>%
ggplot( aes(x=P47T, fill=genero)) +
geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity') +
scale_fill_manual(values=c("#69b3a2", "#404080")) +
theme_ipsum() +
labs(fill="")
p
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## not found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
EPH_CABA_18_24 = eph %>% filter(CH06 %in% (18:24),P47T >0, AGLOMERADO == 32 )
EPH_CABA_25_29 = eph %>% filter(CH06 %in% (25:29) ,P47T>0, AGLOMERADO == 32)
EPH_CABA_18_29 = eph %>% filter(CH06 %in% (18:29) ,P47T>0, AGLOMERADO == 32)
EPH_AMBA_18_24 = eph_AMBA %>% filter(CH06 %in% (18:24),P47T >0 )
EPH_AMBA_25_29 = eph_AMBA %>% filter(CH06 %in% (25:29) ,P47T>0)
EPH_AMBA_18_29 = eph_AMBA %>% filter(CH06 %in% (18:29) ,P47T>0)
summary(EPH_CABA_18_24$P47T)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15000 142500 248000 376630 487500 3156000
summary(EPH_CABA_25_29$P47T)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 40000 400000 580000 699803 900000 2500000
summary(EPH_AMBA_18_24$P47T)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10000 100000 225000 299101 400000 3156000
summary(EPH_AMBA_25_29$P47T)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10000 210000 400000 475970 600000 2500000
summary(EPH_AMBA_18_29$P47T)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10000 150000 300000 383394 500000 3156000
summary(EPH_CABA_18_29$P47T)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15000 200000 412000 548052 750000 3156000
# Load data.table library
library(DT)
# Summarize data and store results in a data.table
summary_table <- data.frame(
Group = c("EPH_CABA_18_24", "EPH_CABA_25_29", "EPH_CABA_18_29", "EPH_AMBA_18_24", "EPH_AMBA_25_29"),
Min = c(
min(EPH_CABA_18_24$P47T, na.rm = TRUE),
min(EPH_CABA_25_29$P47T, na.rm = TRUE),
min(EPH_CABA_18_29$P47T, na.rm = TRUE),
min(EPH_AMBA_18_24$P47T, na.rm = TRUE),
min(EPH_AMBA_25_29$P47T, na.rm = TRUE)
),
Median = c(
median(EPH_CABA_18_24$P47T, na.rm = TRUE),
median(EPH_CABA_25_29$P47T, na.rm = TRUE),
median(EPH_CABA_18_29$P47T, na.rm = TRUE),
median(EPH_AMBA_18_24$P47T, na.rm = TRUE),
median(EPH_AMBA_25_29$P47T, na.rm = TRUE)
),
Mean = c(
mean(EPH_CABA_18_24$P47T, na.rm = TRUE),
mean(EPH_CABA_25_29$P47T, na.rm = TRUE),
mean(EPH_CABA_18_29$P47T, na.rm = TRUE),
mean(EPH_AMBA_18_24$P47T, na.rm = TRUE),
mean(EPH_AMBA_25_29$P47T, na.rm = TRUE)
),
Max = c(
max(EPH_CABA_18_24$P47T, na.rm = TRUE),
max(EPH_CABA_25_29$P47T, na.rm = TRUE),
max(EPH_CABA_18_29$P47T, na.rm = TRUE),
max(EPH_AMBA_18_24$P47T, na.rm = TRUE),
max(EPH_AMBA_25_29$P47T, na.rm = TRUE)
)
)
datatable(summary_table)
# Round the values in summary_table
summary_table$Min <- round(summary_table$Min, 2)
summary_table$Median <- round(summary_table$Median, 2)
summary_table$Mean <- round(summary_table$Mean, 2)
summary_table$Max <- round(summary_table$Max, 2)
# Print the rounded summary table
print(summary_table)
## Group Min Median Mean Max
## 1 EPH_CABA_18_24 15000 248000 376629.6 3156000
## 2 EPH_CABA_25_29 40000 580000 699803.3 2500000
## 3 EPH_CABA_18_29 15000 412000 548052.2 3156000
## 4 EPH_AMBA_18_24 10000 225000 299100.6 3156000
## 5 EPH_AMBA_25_29 10000 400000 475969.7 2500000
# Print the data.table
datatable(summary_table)
eph_hogar <- readxl::read_excel("C:/Users/sixto/OneDrive/Documentos/Papers/accesibilidad_alquileres/usu_hogar_T224.xlsx", na = ".")
## Warning: Expecting logical in K1570 / R1570C11: got 'casilla'
## Warning: Expecting logical in K2231 / R2231C11: got 'ph'
## Warning: Expecting logical in K2404 / R2404C11: got 'CASILLA'
## Warning: Expecting logical in K2573 / R2573C11: got 'casilla'
## Warning: Expecting logical in K3524 / R3524C11: got 'casilla'
## Warning: Expecting logical in K3690 / R3690C11: got 'DUPLEX'
## Warning: Expecting logical in K3875 / R3875C11: got 'CASILLA'
## Warning: Expecting logical in K5359 / R5359C11: got 'CASILLA'
## Warning: Expecting logical in K6030 / R6030C11: got 'CASILLA'
## Warning: Expecting logical in K7368 / R7368C11: got 'casilla'
## Warning: Expecting logical in K7448 / R7448C11: got 'CASILLA'
## Warning: Expecting logical in K8608 / R8608C11: got 'casilla'
## Warning: Expecting logical in K8793 / R8793C11: got 'lavadero'
## Warning: Expecting logical in K9510 / R9510C11: got 'casilla'
## Warning: Expecting logical in K11289 / R11289C11: got 'CASILLA'
## Warning: Expecting logical in K12469 / R12469C11: got 'DUPLEX'
## Warning: Expecting logical in K12765 / R12765C11: got 'CASILLA'
## Warning: Expecting logical in K12982 / R12982C11: got 'CASILLA'
## Warning: Expecting logical in K13088 / R13088C11: got 'PREFABRICADAS'
## Warning: Expecting logical in K13089 / R13089C11: got 'CASILLA'
## Warning: Expecting logical in K15009 / R15009C11: got 'casilla'
Divido los hogares según su composición y calculo el hacinamiento
eph_hogar = eph_hogar %>% mutate(hogar = case_when(IX_TOT == 1 ~ 'unipersonal', IX_TOT == 2 & IX_MEN10 == 1 ~ 'Monoparental', IX_TOT == 3 & IX_MEN10 == 1 ~ 'Pareja_hijx', IX_TOT == 2 & IX_MEN10 == 0 ~ 'Pareja_adultos_amigxs' , TRUE ~ "Multipersonal" , IX_TOT == 2 & IX_MEN10 == 2 ~ 'Pareja_2hijxs'))
eph_hogar = eph_hogar %>% mutate(Tenencia = case_when(II7 == 01 ~ "Propietario vivienda y terreno", II7 == 02 ~ "Propietario de la vivienda solamente",II7 == 03 ~ "Inquilino / arrendatario de la vivienda", II7 == 04 ~ "Ocupante por pago de
impuestos / expensas",II7 == 05 ~ "Ocupante en relación de dependencia", II7 == 06 ~ "Ocupante gratuito (con permiso)", II7 == 07 ~ "Ocupante de hecho (sin permiso)", II7 == 08 ~ "Está en sucesión"))
eph_hogar = eph_hogar %>% mutate(personas_cuarto = IX_TOT/IV2 )
eph_hogar = eph_hogar %>% mutate(hacinamiento = case_when(personas_cuarto <= 1 ~ 'Sin_Hacinamiento',personas_cuarto >3 ~ 'Hacinamiento', TRUE ~ "Hacinamiento_Medio" ))
Hago un subset para quedarme con los hogares que tienen individuos de entre 18 y 29 años. Esto lo puedo hacer porque la base de datos de individuos y hogares está relacionada
eph_hogar_18_29= subset(eph_hogar, CODUSU %in% EPH_CABA_18_29$CODUSU)
Filtro por aquellos hogares que no tienen Multipersonas porque queremos los hogares independientes para analizarlos
eph_hogar_18_29_independientes = eph_hogar_18_29 %>% filter(hogar != "Multipersonal")
Como hay hogares que están compuestos por hogares de personas adultas de +35 y personas de -29 tenemos que filtrarlos para quedarnos con los que realmente nos interesan para analizar. Para eso selecciono todos los hogares que a priori no son multifamiliares y los cruzo con la base de datos de EPH de individuos, así me quedo solo con los individuos de hogares independientes de jóvenes.
eph_hogar_18_29_independientes_individuos= subset(eph, CODUSU %in% eph_hogar_18_29_independientes$CODUSU)
eph_hogar_18_29_no_independientes_individuos= eph_hogar_18_29_independientes_individuos %>% filter(CH06 > 35)
Lo que debemos hacer ahora es quedarnos con aquellos hogares que cumplen las condiciones necesarias, para hacer un análisis de hogares además de indiviudos. ¿Cómo hacemos eso? Tomamos la base de datos de hogares independientes y a ella, le sacamos aquellos hogares que no cumplen con la condición de independencia.
eph_hogar_18_29_independientes = subset (eph_hogar_18_29_independientes,!(CODUSU %in% eph_hogar_18_29_no_independientes_individuos$CODUSU))
Resumimos la información sobre este tipo de hogares y luego la información sobre todos los hogares que no cumplen con esta condición. Utilizamos las variables de hogares más importantes.
II7 = Regimen de tenencia. Lo codificamos previamente para que sea “Tenencia” IPCF = Ingreso per cápita familiar Hacinamiento IX_TOT
summary(eph_hogar_18_29_independientes$IPCF)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 250000 521250 634710 837500 3156000
summary(eph_hogar_18_29_independientes$IX_TOT)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 1.000 1.543 2.000 3.000
agrupado <- group_by(eph_hogar_18_29_independientes, Tenencia)
sumario <- summarise(agrupado, Cantidad = n())
sumario
## # A tibble: 5 × 2
## Tenencia Cantidad
## <chr> <int>
## 1 "Inquilino / arrendatario de la vivienda" 32
## 2 "Ocupante gratuito (con permiso)" 3
## 3 "Ocupante por pago de\n impuestos / expensas" 4
## 4 "Propietario de la vivienda solamente" 2
## 5 "Propietario vivienda y terreno" 5
agrupado <- group_by(eph_hogar_18_29_independientes, hogar)
sumario <- summarise(agrupado, Cantidad = n())
sumario
## # A tibble: 3 × 2
## hogar Cantidad
## <chr> <int>
## 1 Pareja_adultos_amigxs 19
## 2 Pareja_hijx 3
## 3 unipersonal 24
p <- eph_hogar_18_29_independientes %>%
ggplot( aes(x=IPCF, fill= hogar)) +
geom_density( color="#e9ecef", alpha=0.6, position = 'identity') +
scale_fill_manual(values=c("#69b3a2", "#404080", "#81c33b")) +
labs(fill="")
p
p2 <- eph_hogar_18_29_independientes %>%
ggplot( aes(x=ITF, fill= hogar)) +
geom_density( color="#e9ecef", alpha=0.6, position = 'identity') +
scale_fill_manual(values=c("#69b3a2", "#404080", "#81c33b")) +
labs(fill="")
p2
eph_hogar_18_29_independientes_individuos= subset(eph_hogar_18_29_independientes_individuos, !(CODUSU %in% eph_hogar_18_29_no_independientes_individuos$CODUSU))
eph_hogar_18_29_independientes_individuos = eph_hogar_18_29_independientes_individuos%>% filter (P47T >1, CH06 < 30)
summary(eph_hogar_18_29_independientes_individuos$P47T)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 40000 325000 507500 679750 812500 3156000
Ahora vamos a analizar los hogares que tienen la población objetivo. Es decir, aquellos hogares multifamiliares en los que se encuentran personas de 18 a 29 y comprender su situación específica.
El primer paso es filtrar por los hogares que tiene nuestra población objetivo. Entonces tomamos los individuos de la EPH individual que tienen esa edad, afortunadamente ya lo hemos hecho antes.
eph_hogar_18_29 = subset(eph_hogar, CODUSU %in% EPH_CABA_18_29$CODUSU)
Al primer filtro le sustraigo aquellos hogares que ya se que no forman parte, es decir, realizamos el proceso inverso de la última iteracón.
eph_hogar_18_29_dependientes = subset(eph_hogar_18_29, !(CODUSU %in% eph_hogar_18_29_independientes$CODUSU))
Queremos conocer los ingresos individuales de las personas jóvenes que están en hogares donde se podrían independizar. Entonces nuevamente cruzamos las bases de datos
eph_individuos_18_29_dependientes = subset(eph, CODUSU %in% eph_hogar_18_29_dependientes$CODUSU) %>% filter(CH06 %in% (18:29),P47T> -1 )
eph_individuos_24_29_dependientes = subset(eph, CODUSU %in% eph_hogar_18_29_dependientes$CODUSU) %>% filter(CH06 %in% (24:29),P47T> -1 )
summary(eph_individuos_24_29_dependientes$P47T)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 310000 485000 554032 760000 1750000
Analizamos entonces los diferentes grupos de señalados Jóvenes de entre 18 y 29 años 1) Aquellos que se han logrado independizar 2) Aquellos que aún no lo han logrado 3) Todos juntos
eph_hogar_18_29_independientes_individuos = filter(eph_hogar_18_29_independientes_individuos,P47T> -1)
"Ingresos"
## [1] "Ingresos"
"Individuos independientes"
## [1] "Individuos independientes"
summary(eph_hogar_18_29_independientes_individuos$P47T)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 40000 325000 507500 679750 812500 3156000
"Individuos dependientes"
## [1] "Individuos dependientes"
summary(eph_individuos_18_29_dependientes$P47T)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 65000 300000 346667 505000 1750000
"Total individuos"
## [1] "Total individuos"
summary(EPH_CABA_18_29$P47T)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15000 200000 412000 548052 750000 3156000
"Edad"
## [1] "Edad"
"Individuos independientes"
## [1] "Individuos independientes"
summary(eph_hogar_18_29_independientes_individuos$CH06)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.00 24.00 26.00 25.32 28.00 29.00
"Individuos dependientes"
## [1] "Individuos dependientes"
summary(eph_individuos_18_29_dependientes$CH06)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.00 20.00 23.00 22.99 26.00 29.00
"Total individuos"
## [1] "Total individuos"
summary(EPH_CABA_18_29$CH06)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.0 21.0 25.0 24.4 27.0 29.0
ggplot()+
geom_histogram(data = filter(eph_individuos_18_29_dependientes, P47T < 300000), aes(x=P47T) , fill="#FFFF00", alpha=1) + labs(title = "Ingresos según condición de hogar jóven", caption = "Verde = Hogares independientes, Amarillo = Hogares dependientes") +
geom_histogram (data = filter ( eph_hogar_18_29_independientes_individuos, P47T < 300000), aes(x=P47T), fill="#0A7029" , alpha=0.7) + scale_x_continuous(labels = scales::comma)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot()+
geom_histogram(data =eph_individuos_18_29_dependientes, aes(x=CH06) , fill="#FFFF00", alpha=0.8) +
geom_histogram(data =eph_hogar_18_29_independientes_individuos, aes(x=CH06), fill="#0A7029" , alpha=0.6) + labs(title = "Edad según condición de hogar jóven", caption = "Verde = Hogares independientes, Amarillo = Hogares dependientes", x ="Edad")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Esto nos da una mirada bastante clara de la diferencia de ingresos necesarias para poder independizarse. Pero si profundizamos sobre aquellos jovenes independizados en pareja y en hogares unipersonales?
eph_hogar_18_29_independientes_Unipersonal = eph_hogar_18_29_independientes %>% filter(hogar =="unipersonal")
eph_hogar_18_29_independientes_Pareja_amigxs = eph_hogar_18_29_independientes %>% filter(hogar =="Pareja_adultos_amigxs")
eph_individuos_18_29_independientes_Unipersonal = subset(eph, CODUSU %in% eph_hogar_18_29_independientes_Unipersonal$CODUSU)
eph_individuos_18_29_independientes_Pareja_amigxs = subset(eph, CODUSU %in% eph_hogar_18_29_independientes_Pareja_amigxs$CODUSU)
eph_individuos_18_29_independientes_Pareja_amigxs = eph_individuos_18_29_independientes_Pareja_amigxs %>% filter(P47T> -1)
summary(eph_individuos_18_29_independientes_Unipersonal$P47T)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 100000 250000 456000 698458 925000 3156000
summary(eph_individuos_18_29_independientes_Pareja_amigxs$P47T)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 357500 532500 710722 862500 2500000
summary(eph_individuos_18_29_dependientes$P47T)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 65000 300000 346667 505000 1750000
nivel_educativo <- calculate_tabulates(base = eph_CABA,
x = 'CAT_OCUP', y = 'NIVEL_ED',
weights = 'PONDERA', add.totals = 'row',
add.percentage = 'col',
digits = 1)
eph_hogar = eph_hogar %>% mutate(hogar = case_when(IX_TOT == 1 ~ 'unipersonal', IX_TOT == 2 & IX_MEN10 == 1 ~ 'Monoparental', IX_TOT == 3 & IX_MEN10 == 1 ~ 'Pareja_hijx', IX_TOT == 2 & IX_MEN10 == 0 ~ 'Pareja_adultos_amigxs' , IX_TOT == 4 & IX_MEN10 == 2 ~ 'Pareja_2hijxs', TRUE ~ "Multipersonal" ))
eph_CABA_hogar = eph_hogar %>% filter(AGLOMERADO == 32)
result <- eph_CABA_hogar %>%
group_by(hogar) %>%
summarise(count = n())
# View the result
print(result)
## # A tibble: 6 × 2
## hogar count
## <chr> <int>
## 1 Monoparental 6
## 2 Multipersonal 170
## 3 Pareja_2hijxs 9
## 4 Pareja_adultos_amigxs 193
## 5 Pareja_hijx 20
## 6 unipersonal 235
library(dplyr)
# Filter, group, and summarize for composition combinations
composition_combinations <- eph_CABA_hogar %>%
filter(hogar == "Multipersonal") %>% # Filter for multipersonal households
group_by(IX_TOT, IX_MEN10) %>% # Group by total people and children under 10
summarise(count = n(), .groups = "drop") %>% # Count occurrences
arrange(desc(count)) # Arrange by the most common combinations
# View the result
print(composition_combinations)
## # A tibble: 12 × 3
## IX_TOT IX_MEN10 count
## <dbl> <dbl> <int>
## 1 3 0 70
## 2 4 0 46
## 3 4 1 22
## 4 5 0 9
## 5 5 1 6
## 6 5 2 4
## 7 6 0 4
## 8 6 2 3
## 9 6 1 2
## 10 7 0 2
## 11 3 2 1
## 12 10 1 1
Vamos a ir con los hogares de 4 personas
eph_CABA_hogar_4pax = eph_CABA_hogar %>% filter(IX_TOT == 4)
# Create ITF deciles using ntile
eph_CABA_hogar_4pax <- eph_CABA_hogar_4pax %>%
mutate(ITF_decile = ntile(ITF, 10))
# View the updated data frame
print(head(eph_CABA_hogar_4pax))
## # A tibble: 6 × 93
## CODUSU ANO4 TRIMESTRE NRO_HOGAR REALIZADA REGION MAS_500 AGLOMERADO PONDERA
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 TQRMNOQ… 2024 2 1 1 1 S 32 4909
## 2 TQRMNOP… 2024 2 1 1 1 S 32 1612
## 3 TQRMNOP… 2024 2 1 1 1 S 32 1451
## 4 TQRMNOQ… 2024 2 1 1 1 S 32 1057
## 5 TQRMNOQ… 2024 2 1 1 1 S 32 2805
## 6 TQRMNOR… 2024 2 1 1 1 S 32 1802
## # ℹ 84 more variables: IV1 <dbl>, IV1_ESP <lgl>, IV2 <dbl>, IV3 <dbl>,
## # IV3_ESP <chr>, IV4 <dbl>, IV5 <dbl>, IV6 <dbl>, IV7 <dbl>, IV7_ESP <chr>,
## # IV8 <dbl>, IV9 <dbl>, IV10 <dbl>, IV11 <dbl>, IV12_1 <dbl>, IV12_2 <dbl>,
## # IV12_3 <dbl>, II1 <dbl>, II2 <dbl>, II3 <dbl>, II3_1 <dbl>, II4_1 <dbl>,
## # II4_2 <dbl>, II4_3 <dbl>, II5 <dbl>, II5_1 <dbl>, II6 <dbl>, II6_1 <dbl>,
## # II7 <dbl>, II7_ESP <chr>, II8 <dbl>, II8_ESP <chr>, II9 <dbl>, V1 <dbl>,
## # V2 <dbl>, V21 <dbl>, V22 <dbl>, V3 <dbl>, V4 <dbl>, V5 <dbl>, V6 <dbl>, …
library(dplyr)
# Create ITF deciles
eph_CABA_hogar_4pax <- eph_CABA_hogar_4pax %>%
mutate(ITF_decile = ntile(ITF, 10))
# Summarize income for each decile
income_by_decile <- eph_CABA_hogar_4pax %>%
group_by(ITF_decile) %>%
summarise(
min_income = min(ITF, na.rm = TRUE),
max_income = max(ITF, na.rm = TRUE),
mean_income = mean(ITF, na.rm = TRUE),
.groups = 'drop'
)
# View the result
print(income_by_decile)
## # A tibble: 10 × 4
## ITF_decile min_income max_income mean_income
## <int> <dbl> <dbl> <dbl>
## 1 1 0 0 0
## 2 2 0 0 0
## 3 3 0 0 0
## 4 4 0 0 0
## 5 5 0 649000 255250
## 6 6 692000 900000 836500
## 7 7 900000 1185000 1040875
## 8 8 1190000 1430000 1295143.
## 9 9 1460000 2250000 1864286.
## 10 10 2600000 6698000 4184714.
library(dplyr)
# Create ITF deciles
eph_CABA_hogar_4pax <- eph_CABA_hogar_4pax %>%
mutate(ITF_decile = ntile(ITF, 10))
# Summarize income for each decile
income_by_decile <- eph_CABA_hogar_4pax %>%
group_by(ITF_decile) %>%
summarise(
ITF_decile = first(ITF_decile),
min_income = min(ITF, na.rm = TRUE),
max_income = max(ITF, na.rm = TRUE),
median_income = median(ITF, na.rm = TRUE),
.groups = 'drop'
) %>%
arrange(ITF_decile) # Ensure rows are sorted by decile
# View the result
print(income_by_decile)
## # A tibble: 10 × 4
## ITF_decile min_income max_income median_income
## <int> <dbl> <dbl> <dbl>
## 1 1 0 0 0
## 2 2 0 0 0
## 3 3 0 0 0
## 4 4 0 0 0
## 5 5 0 649000 250000
## 6 6 692000 900000 850000
## 7 7 900000 1185000 1014500
## 8 8 1190000 1430000 1300000
## 9 9 1460000 2250000 1925000
## 10 10 2600000 6698000 3350000
library(raster)
library(dplyr)
# Assuming VUT is your raster object and eph_CABA_hogar_4pax is your data frame with the ITF column
# Step 1: Calculate ITF deciles and median income for each decile
eph_CABA_hogar_4pax <- eph_CABA_hogar_4pax %>%
mutate(ITF_decile = ntile(ITF, 10))
# Calculate median income for each decile
median_by_decile <- eph_CABA_hogar_4pax %>%
group_by(ITF_decile) %>%
summarise(median_income = median(ITF, na.rm = TRUE)) %>%
ungroup() %>% mutate(asequibilidad = median_income * 0.35)
print(median_by_decile)
## # A tibble: 10 × 3
## ITF_decile median_income asequibilidad
## <int> <dbl> <dbl>
## 1 1 0 0
## 2 2 0 0
## 3 3 0 0
## 4 4 0 0
## 5 5 250000 87500
## 6 6 850000 297500
## 7 7 1014500 355075
## 8 8 1300000 455000
## 9 9 1925000 673750
## 10 10 3350000 1172500
library(terra)
## Warning: package 'terra' was built under R version 4.3.3
## terra 1.7.78
##
## Attaching package: 'terra'
## The following object is masked from 'package:tidyr':
##
## extract
library(dplyr)
# Step 1: Load the raster using terra
VUT <- rast("C:\\Users\\sixto\\OneDrive\\Documentos\\Papers\\accesibilidad_alquileres\\vut.tif")
library(terra)
# Load the raster
VUT <- rast("C:\\Users\\sixto\\OneDrive\\Documentos\\Papers\\accesibilidad_alquileres\\vut.tif")
# Check the current CRS
print(crs(VUT))
## [1] "PROJCRS[\"WGS 84 / Pseudo-Mercator\",\n BASEGEOGCRS[\"WGS 84\",\n DATUM[\"World Geodetic System 1984\",\n ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n LENGTHUNIT[\"metre\",1]]],\n PRIMEM[\"Greenwich\",0,\n ANGLEUNIT[\"degree\",0.0174532925199433]],\n ID[\"EPSG\",4326]],\n CONVERSION[\"unnamed\",\n METHOD[\"Popular Visualisation Pseudo Mercator\",\n ID[\"EPSG\",1024]],\n PARAMETER[\"Latitude of natural origin\",0,\n ANGLEUNIT[\"degree\",0.0174532925199433],\n ID[\"EPSG\",8801]],\n PARAMETER[\"Longitude of natural origin\",0,\n ANGLEUNIT[\"degree\",0.0174532925199433],\n ID[\"EPSG\",8802]],\n PARAMETER[\"False easting\",0,\n LENGTHUNIT[\"metre\",1],\n ID[\"EPSG\",8806]],\n PARAMETER[\"False northing\",0,\n LENGTHUNIT[\"metre\",1],\n ID[\"EPSG\",8807]]],\n CS[Cartesian,2],\n AXIS[\"easting\",east,\n ORDER[1],\n LENGTHUNIT[\"metre\",1,\n ID[\"EPSG\",9001]]],\n AXIS[\"northing\",north,\n ORDER[2],\n LENGTHUNIT[\"metre\",1,\n ID[\"EPSG\",9001]]]]"
# If the CRS is missing, assign the correct one
# (Replace "EPSG:4326" with the appropriate code for your data)
crs(VUT) <- "EPSG:4326"
# Now you can compute the cell sizes using the projected raster
cell_areas <- cellSize(VUT)
# Continue with your analysis using VUT_proj and cell_areas
plot(VUT$lyr1)
## Warning: [is.lonlat] coordinates are out of range for lon/lat
# Step 2: Extract raster data into a data.frame
raster_data <- as.data.frame(VUT, xy = TRUE) # 'xy = TRUE' keeps coordinates
raster_data = raster_data %>% mutate(Valor_Total = lyr1 * 50)
# Replicate the 'asequibilidad' values for each row in 'raster_data' based on deciles
raster_data <- raster_data %>%
mutate(
decil1 = median_by_decile$median_income[1],
decil2 = median_by_decile$median_income[2],
decil3 = median_by_decile$median_income[3],
decil4 = median_by_decile$median_income[4],
decil5 = median_by_decile$median_income[5],
decil6 = median_by_decile$median_income[6],
decil7 = median_by_decile$median_income[7],
decil8 = median_by_decile$median_income[8],
decil9 = median_by_decile$median_income[9],
decil10 = median_by_decile$median_income[10]
)
# Create new columns that calculate decil1/Valor_Total, decil2/Valor_Total, ..., decil10/Valor_Total
raster_data <- raster_data %>%
mutate(
decil1_ratio = Valor_Total / decil1,
decil2_ratio = Valor_Total / decil2,
decil3_ratio = Valor_Total / decil3,
decil4_ratio = Valor_Total / decil4,
decil5_ratio = Valor_Total / decil5,
decil6_ratio = Valor_Total / decil6,
decil7_ratio = Valor_Total / decil7,
decil8_ratio = Valor_Total / decil8,
decil9_ratio = Valor_Total / decil9,
decil10_ratio = Valor_Total / decil10
)
# Load the terra package
library(terra)
# Function to convert a data.frame column to a SpatRaster
create_raster_from_df <- function(df, col_name) {
# Create a SpatRaster from the data frame, using x and y for coordinates
raster_obj <- rast(df[, c("x", "y", col_name)])
return(raster_obj)
}
# Create a SpatRaster for each decile ratio
raster_decil1_ratio <- create_raster_from_df(raster_data, "decil1_ratio")
raster_decil2_ratio <- create_raster_from_df(raster_data, "decil2_ratio")
raster_decil3_ratio <- create_raster_from_df(raster_data, "decil3_ratio")
raster_decil4_ratio <- create_raster_from_df(raster_data, "decil4_ratio")
raster_decil5_ratio <- create_raster_from_df(raster_data, "decil5_ratio")
raster_decil6_ratio <- create_raster_from_df(raster_data, "decil6_ratio")
raster_decil7_ratio <- create_raster_from_df(raster_data, "decil7_ratio")
raster_decil8_ratio <- create_raster_from_df(raster_data, "decil8_ratio")
raster_decil9_ratio <- create_raster_from_df(raster_data, "decil9_ratio")
raster_decil10_ratio <- create_raster_from_df(raster_data, "decil10_ratio")
# Now you have rasters for each decile ratio, for example:
plot(raster_decil1_ratio) # Visualize the decil1_ratio raster
plot(raster_decil2_ratio) # Visualize the decil1_ratio raster
plot(raster_decil3_ratio) # Visualize the decil1_ratio raster
plot(raster_decil4_ratio) # Visualize the decil1_ratio raster
plot(raster_decil5_ratio) # Visualize the decil1_ratio raster
plot(raster_decil6_ratio) # Visualize the decil1_ratio raster
plot(raster_decil7_ratio) # Visualize the decil1_ratio raster
plot(raster_decil8_ratio) # Visualize the decil1_ratio raster
plot(raster_decil9_ratio) # Visualize the decil1_ratio raster
plot(raster_decil10_ratio) # Visualize the decil1_ratio raster
# Define a color palette and breaks for the values
breaks <- c(0, 0.35, max(c(raster_decil1_ratio[], raster_decil2_ratio[], raster_decil3_ratio[], raster_decil4_ratio[],
raster_decil5_ratio[], raster_decil6_ratio[], raster_decil7_ratio[], raster_decil8_ratio[],
raster_decil9_ratio[], raster_decil10_ratio[])))
colors <- c("blue", "white", "red")
# Plot the rasters with consistent color scale and a break at 0.35
par(mfrow=c(2,5)) # To display 10 plots in a 2x5 grid
plot(raster_decil1_ratio, col=colors, breaks=breaks, main="Decil 1 Ratio")
plot(raster_decil2_ratio, col=colors, breaks=breaks, main="Decil 2 Ratio")
plot(raster_decil3_ratio, col=colors, breaks=breaks, main="Decil 3 Ratio")
plot(raster_decil4_ratio, col=colors, breaks=breaks, main="Decil 4 Ratio")
plot(raster_decil5_ratio, col=colors, breaks=breaks, main="Decil 5 Ratio")
plot(raster_decil6_ratio, col=colors, breaks=breaks, main="Decil 6 Ratio")
plot(raster_decil7_ratio, col=colors, breaks=breaks, main="Decil 7 Ratio")
plot(raster_decil8_ratio, col=colors, breaks=breaks, main="Decil 8 Ratio")
plot(raster_decil9_ratio, col=colors, breaks=breaks, main="Decil 9 Ratio")
plot(raster_decil10_ratio, col=colors, breaks=breaks, main="Decil 10 Ratio")
library(terra)
# Store all decile rasters in a list
raster_list <- list(raster_decil1_ratio, raster_decil2_ratio, raster_decil3_ratio,
raster_decil4_ratio, raster_decil5_ratio, raster_decil6_ratio,
raster_decil7_ratio, raster_decil8_ratio, raster_decil9_ratio,
raster_decil10_ratio)
# Get the resolution (area of one pixel)
pixel_area <- prod(res(raster_list[[1]])) # Assuming all rasters have the same resolution
# Initialize an empty list to store results
area_results <- vector("list", length(raster_list))
# Loop through each raster and calculate area
for (i in seq_along(raster_list)) {
raster_values <- values(raster_list[[i]]) # Extract values
raster_values <- raster_values[!is.na(raster_values)] # Remove NA values
# Calculate area in each category
area_0_35 <- sum(raster_values <= 0.35) * pixel_area
area_greater_35 <- sum(raster_values > 0.35) * pixel_area
total_area <- area_0_35 + area_greater_35
# Store results
area_results[[i]] <- c(Decile = paste("Decile", i),
Area_0_35 = area_0_35,
Area_greater_35 = area_greater_35,
Total_Area = total_area)
}
# Convert list to a data frame
area_results <- do.call(rbind, area_results)
area_results <- as.data.frame(area_results)
# Print the final summary table
print(area_results)
## Decile Area_0_35 Area_greater_35 Total_Area
## 1 Decile 1 0 299585700 299585700
## 2 Decile 2 0 299585700 299585700
## 3 Decile 3 0 299585700 299585700
## 4 Decile 4 0 299585700 299585700
## 5 Decile 5 0 299585700 299585700
## 6 Decile 6 5680800 293904900 299585700
## 7 Decile 7 6621700 292964000 299585700
## 8 Decile 8 33173300 266412400 299585700
## 9 Decile 9 184013900 115571800 299585700
## 10 Decile 10 292257600 7328100 299585700
# Ensure your deciles are in the correct order (if they are not already)
# Here we assume that Decile 1, 2, ..., 10 are in order.
area_results <- area_results[order(as.numeric(gsub("Decile ", "", area_results$Decile))), ]
# Calculate the cumulative area for the variable (Area_0_35) of interest
cumulative_area <- cumsum(area_results$Area_0_35)
# Compute the total area
total_area <- sum(as.numeric(area_results$Area_0_35))
# Calculate the cumulative share (frequency)
cumulative_share <- cumulative_area / total_area
# Create a data frame with the cumulative share in percentage format (up to 2 decimals)
cumulative_df <- data.frame(
Decile = area_results$Decile,
Cumulative_Area = cumulative_area,
Cumulative_Share = paste0(round(cumulative_share * 100, 2), "%")
)
print(cumulative_df)
## Decile Cumulative_Area Cumulative_Share
## 1 Decile 1 0 0%
## 2 Decile 2 0 0%
## 3 Decile 3 0 0%
## 4 Decile 4 0 0%
## 5 Decile 5 0 0%
## 6 Decile 6 5680800 1.09%
## 7 Decile 7 12302500 2.36%
## 8 Decile 8 45475800 8.72%
## 9 Decile 9 229489700 43.98%
## 10 Decile 10 521747300 100%
library(ineq)
lc <- Lc(area_results$Area_0_35)
plot(lc,
main = "Lorenz Curve for Area_0_35",
xlab = "Cumulative share of deciles",
ylab = "Cumulative share of Area_0_35",
col = "blue", lwd = 2)
abline(0, 1, col = "red", lty = 2)
gini_coefficient <- Gini(area_results$Area_0_35)
cat("Gini coefficient:", round(gini_coefficient, 3), "\n")
## Gini coefficient: 0.788