# Load packages
required <- c("sf", "terra", "geodata", "exactextractr", "httr2",
"spdep", "dplyr", "ggplot2", "patchwork", "broom", "knitr", "scales")
for (pkg in required) {
if (!requireNamespace(pkg, quietly = TRUE))
install.packages(pkg, repos = "https://cloud.r-project.org")
suppressPackageStartupMessages(library(pkg, character.only = TRUE))
}
# Extra spatial-ML packages
# INLA (not on CRAN): install.packages("INLA",
# repos = c(INLA = "https://inla.r-inla-download.org/R/stable"), dep = TRUE)
has_inla <- requireNamespace("INLA", quietly = TRUE)
has_gwr <- requireNamespace("GWmodel", quietly = TRUE)
has_bm <- requireNamespace("blackmarbler", quietly = TRUE)
if (has_gwr) suppressPackageStartupMessages(library(GWmodel))# Neighbours: two districts are neighbours if they touch (queen contiguity).
make_neighbors <- function(sf_obj) {
nb <- spdep::poly2nb(sf_obj, queen = TRUE)
if (any(spdep::card(nb) == 0))
nb <- spdep::poly2nb(sf_obj, queen = TRUE, snap = 0.01)
list(nb = nb, lw = spdep::nb2listw(nb, style = "W", zero.policy = TRUE))
}
# Fill any missing value with the mean, so the math does not break.
impute_na <- function(x) { x <- as.numeric(x); if (anyNA(x)) x[is.na(x)] <- mean(x, na.rm = TRUE); x }
# p-value from Moran's I (how sure we are that neighbours look alike).
moran_p <- function(x, lw) {
t <- tryCatch(spdep::moran.test(impute_na(x), lw, zero.policy = TRUE), error = function(e) NULL)
if (is.null(t)) NA_real_ else t$p.value
}
# Draw one coloured district map (a choropleth).
plot_choropleth <- function(sf_obj, var, title, palette = "viridis") {
ggplot(sf_obj) +
geom_sf(aes_string(fill = var), color = "white", linewidth = 0.15) +
scale_fill_viridis_c(option = palette, na.value = "grey80") +
labs(title = title, fill = NULL) +
theme_minimal(base_size = 11) +
theme(axis.text = element_blank(), axis.title = element_blank(), panel.grid = element_blank())
}This project looks at that question at the district level in two large Southeast Asian capitals, Jakarta and Hanoi. Both are growing fast and both have known air problems, but they differ in size, shape, and the age of their people. That makes a side-by-side look useful. We ask three simple questions:
District outlines come from the GADM map database,
loaded with the geodata R package. Jakarta is split into
its 47 kecamatan (GADM level 3) and Hanoi into its 30
quận/huyện districts (GADM level 2). The two cities are far
apart and use different district systems, so we fit every model
on its own for each city instead of mixing them. This
also avoids a common map trap — the modifiable areal unit problem
(MAUP), where results change just because the map is cut into different
pieces. All numbers refer to the year 2019.
# Local cached GADM first (exact paper data); geodata download as a fallback.
idn_l3 <- load_gadm_local("IDN", 3); if (is.null(idn_l3)) idn_l3 <- sf::st_as_sf(geodata::gadm("IDN", level = 3, path = RAW_DIR))
vnm_l2 <- load_gadm_local("VNM", 2); if (is.null(vnm_l2)) vnm_l2 <- sf::st_as_sf(geodata::gadm("VNM", level = 2, path = RAW_DIR))
jakarta <- idn_l3[idn_l3$NAME_1 == "Jakarta Raya", ]
hanoi <- vnm_l2[vnm_l2$GID_1 == "VNM.27_1", ]
if (nrow(jakarta) == 0) jakarta <- idn_l3[grepl("Jakarta", idn_l3$NAME_1, ignore.case = TRUE), ]
if (nrow(hanoi) == 0)
hanoi <- vnm_l2[vnm_l2$GID_1 == unique(vnm_l2$GID_1)[grep("27", vnm_l2$GID_1)[1]], ]
jakarta_sf0 <- sf::st_as_sf(jakarta); hanoi_sf0 <- sf::st_as_sf(hanoi)
districts <- sf::st_sf(
data.frame(
city = c(rep("Jakarta", nrow(jakarta_sf0)), rep("Hanoi", nrow(hanoi_sf0))),
district_name = c(jakarta_sf0$NAME_3, hanoi_sf0$NAME_2), stringsAsFactors = FALSE),
geometry = c(sf::st_geometry(jakarta_sf0), sf::st_geometry(hanoi_sf0)),
crs = sf::st_crs(jakarta_sf0))
districts$district_id <- seq_len(nrow(districts))
districts$area_km2 <- as.numeric(sf::st_area(districts)) / 1e6
roi <- sf::st_transform(districts, 4326)
bbox <- sf::st_bbox(sf::st_buffer(sf::st_union(roi), 0.25))
c(Jakarta = sum(districts$city == "Jakarta"), Hanoi = sum(districts$city == "Hanoi"))## Jakarta Hanoi
## 47 30
Table 1 lists the inputs. The pipeline is built to pull PM2.5 from the NASA SEDAC global yearly surface (version V5.GL.04) and night-lights from NASA’s Black Marble product when an Earthdata login key is present. When that key is missing, as in this run, the pipeline falls back to clear, population-based stand-ins so the whole workflow still runs from start to end. We define poverty (also called deprivation) as the night-light value flipped over and rescaled, so a higher value means a darker, poorer district.
| Number | Source (planned / fallback) | What it stands for |
|---|---|---|
| PM2.5 (µg/m³) | NASA SEDAC V5.GL.04 / density stand-in | Yearly average air dust, 2019 |
| Night-lights | NASA Black Marble / stand-in | Brightness; poverty = -brightness |
| Elderly share | WorldPop + national 65+ levels | Higher breathing risk |
| Population | WorldPop 2020 | Used in the count model / density |
| Boundaries | GADM via geodata | Jakarta kecamatan; Hanoi districts |
PM2.5 (air dust).
When a satellite key is set, PM2.5 comes straight from the NASA SEDAC surface. With no key, we build a stand-in from a city baseline plus a crowding term, so busier districts read as more polluted.
pm25_path <- file.path(RAW_DIR, paste0("pm25_", REFERENCE_YEAR, ".tif"))
pm25_vals <- rep(NA_real_, nrow(districts))
download_pm25_sedac <- function(dest, year, token) {
url <- paste0("https://sedac.ciesin.columbia.edu/downloads/data/sdei/",
"global-annual-gwr-pm2-5-modis-misr-seawifs-aod-v5-gl04/",
"sdei-global-annual-gwr-pm25-modis-misr-seawifs-aod-v5-gl04-pm25-netcdf-yearly-1k-v5-gl04-geoTIFF/",
"geoTIFF/pm25_", year, ".tif")
resp <- httr2::request(url) |>
httr2::req_headers(Authorization = paste("Bearer", token)) |>
httr2::req_perform(path = dest)
httr2::resp_status(resp) == 200
}
if (file.exists(pm25_path)) {
ok <- file.exists(pm25_path) ||
tryCatch(download_pm25_sedac(pm25_path, REFERENCE_YEAR, EARTHDATA_TOKEN), error = function(e) FALSE)
if (isTRUE(ok) && file.exists(pm25_path)) {
r <- terra::crop(terra::rast(pm25_path), terra::ext(bbox))
pm25_vals <- terra::extract(r, terra::vect(roi), fun = mean, na.rm = TRUE)[, 2]
}
}
if (all(is.na(pm25_vals))) { # stand-in: city base + crowding
pop_tmp <- get_pop_raster()
pop_dens <- exactextractr::exact_extract(pop_tmp, sf::st_as_sf(roi), fun = "mean", progress = FALSE)
pm25_vals <- pmax(ifelse(districts$city == "Jakarta", 42, 35) + 8 * scale(pop_dens)[, 1], 15)
data_mode_pm25 <- "proxy"
} else data_mode_pm25 <- "sedac"Night-lights (the wealth stand-in).
Bright districts read as richer, dark districts as poorer. With no satellite key, we build a stand-in where the city centre is bright and the edges are darker, with a small push from how crowded each district is.
ntl_vals <- rep(NA_real_, nrow(districts)); data_mode_ntl <- "proxy"
if (has_bm) {
ntl_try <- tryCatch(blackmarbler::bm_extract(
roi_sf = roi, product_id = "VNP46A4", date = REFERENCE_YEAR,
bearer = EARTHDATA_TOKEN, aggregation_fun = "mean", quiet = TRUE), error = function(e) NULL)
if (!is.null(ntl_try)) {
val_col <- grep("mean|radiance|DNB|VNP", names(ntl_try), ignore.case = TRUE, value = TRUE)[1]
if (!is.na(val_col) && nrow(ntl_try) == nrow(districts)) {
ntl_vals <- ntl_try[[val_col]]; data_mode_ntl <- "blackmarble"
}
}
}
if (all(is.na(ntl_vals))) { # stand-in: bright centre, darker edges
pop_tmp <- if (exists("pop_tmp")) pop_tmp else get_pop_raster()
pop_dens <- exactextractr::exact_extract(pop_tmp, sf::st_as_sf(roi), fun = "mean", progress = FALSE)
ntl_vals <- numeric(nrow(districts))
for (ct in unique(districts$city)) {
idx <- which(districts$city == ct)
pts <- sf::st_centroid(sf::st_transform(roi[idx, ], 3857))
ctr <- sf::st_sfc(sf::st_point(colMeans(sf::st_coordinates(pts))), crs = sf::st_crs(pts))
dist_km <- as.numeric(sf::st_distance(pts, ctr)) / 1000
ntl_vals[idx] <- 25 - 0.15 * dist_km + 2 * scale(pop_dens[idx])[, 1]
}
ntl_vals <- pmax(ntl_vals, 0.5)
}Population and elderly share.
Elderly share starts from the national level of people aged 65 and over (8.2% for Jakarta, 7.7% for Hanoi, from World Bank / UN figures) and is nudged a little by local crowding.
pop_raster <- terra::crop(get_pop_raster(), terra::ext(bbox))
population <- pmax(exactextractr::exact_extract(pop_raster, sf::st_as_sf(roi), fun = "sum", progress = FALSE), 1)
# National 65+ benchmarks (World Bank / UN WPP ~2020), nudged by local crowding.
elderly_base <- ifelse(districts$city == "Jakarta", 0.082, 0.077)
pop_dens_vec <- population / districts$area_km2
elderly_share <- pmax(0.04, pmin(0.14, elderly_base + 0.03 * scale(pop_dens_vec)[, 1]))Build the district table.
We pull the three numbers into one tidy table, one row per district. Poverty is the night-light value flipped over.
panel <- districts |>
sf::st_drop_geometry() |>
dplyr::mutate(
pm25_ugm3 = as.numeric(pm25_vals), ntl_mean = as.numeric(ntl_vals),
population = as.numeric(population), pop_density = population / area_km2,
elderly_share = as.numeric(elderly_share),
elderly_count = round(population * elderly_share),
deprivation = as.numeric(scale(-ntl_mean)), # poverty = flipped brightness
year = REFERENCE_YEAR, pm25_source = data_mode_pm25, ntl_source = data_mode_ntl)
districts_sf <- districts |> dplyr::select(district_id) |> dplyr::left_join(panel, by = "district_id")
jakarta_sf <- districts_sf |> dplyr::filter(city == "Jakarta")
hanoi_sf <- districts_sf |> dplyr::filter(city == "Hanoi")
jakarta_main_sf <- jakarta_sf |>
dplyr::filter(!grepl("Kepulauan", district_name))
# Zoom Jakarta's map view to the mainland so it is shown at a fair size next to
# Hanoi. The two offshore Thousand Islands districts (Kepulauan Seribu) stay in
# every number; they are only dropped from the *view*.
jak_bb <- districts_sf |>
dplyr::filter(city == "Jakarta", !grepl("Kepulauan", district_name)) |> sf::st_bbox()
coord_jakarta <- ggplot2::coord_sf(
xlim = c(jak_bb[["xmin"]], jak_bb[["xmax"]]),
ylim = c(jak_bb[["ymin"]], jak_bb[["ymax"]]), expand = FALSE)
invisible(tryCatch({
sf::st_write(districts, file.path(DATA_DIR, "districts.gpkg"), delete_dsn = TRUE, quiet = TRUE)
write.csv(panel, file.path(DATA_DIR, "jakarta_hanoi_districts.csv"), row.names = FALSE)
}, error = function(e) message("Could not write artifacts: ", conditionMessage(e))))We first decide which districts count as neighbours: two districts are neighbours if they touch (queen contiguity). We then turn this into a weight table where each district’s neighbours add up to one. With neighbours set, we measure grouping in two steps.
The global Moran’s I gives one number for the whole city. It is high when neighbours tend to look alike. The local tool then shows where that grouping sits. The Getis–Ord Gi* statistic flags hot spots (groups of high values) and cold spots (groups of low values) when a district stands out strongly from the rest (|z| > 1.96). We treat the local map as the main result, because what matters for action is the place of the clusters, not one city-wide score.
Three models link breathing risk to pollution and poverty.
elderly_share = b0 + b1·PM2.5 + b2·poverty + b3·(PM2.5 × poverty) + error.
A positive b3 means the pollution–risk link gets
stronger as a district gets poorer. We also check
whether the model’s leftover errors still show grouping in space
(Moran’s I). If they do, the plain model is not enough on its own.We compare model fit with AIC (OLS), DIC (INLA), and AICc (GWR). We
report these side by side, but they are not directly
comparable, because the models use different outcomes. All work is done
in R with the sf, spdep, INLA,
and GWmodel packages.
knitr::kable(
panel |> dplyr::group_by(city) |>
dplyr::summarise(
districts = dplyr::n(),
pm25_mean = round(mean(pm25_ugm3), 1),
ntl_mean = round(mean(ntl_mean), 2),
elderly_pct = round(100 * mean(elderly_share), 1),
source = dplyr::first(pm25_source), .groups = "drop"),
caption = "Table 2. District summary by city (2019, stand-in inputs).")| city | districts | pm25_mean | ntl_mean | elderly_pct | source |
|---|---|---|---|---|---|
| Hanoi | 30 | 30.8 | 22.25 | 6.3 | proxy |
| Jakarta | 47 | 44.7 | 23.46 | 9.1 | proxy |
(Table 2): Jakarta has more districts (47 vs 30), more pollution on average, and more older people. The two cities are about equally bright at night. So they look alike on brightness but not on pollution or age. Because this run uses stand-in numbers, the results describe patterns in the table rather than confirmed real-world exposure.
# Figure 1: each number as a coloured district map, per city, at a fair size.
p1 <- plot_choropleth(jakarta_sf, "pm25_ugm3", "Jakarta: PM2.5") + coord_jakarta
p2 <- plot_choropleth(hanoi_sf, "pm25_ugm3", "Hanoi: PM2.5")
p3 <- plot_choropleth(jakarta_sf, "ntl_mean", "Jakarta: Nightlights", "magma") + coord_jakarta
p4 <- plot_choropleth(hanoi_sf, "ntl_mean", "Hanoi: Nightlights", "magma")
p5 <- plot_choropleth(jakarta_sf, "elderly_share", "Jakarta: Elderly share") + coord_jakarta
p6 <- plot_choropleth(hanoi_sf, "elderly_share", "Hanoi: Elderly share")
(p1 | p2) / (p3 | p4) / (p5 | p6)(Figure 1): Pollution, brightness, and elderly share are all uneven across each city, not flat. Jakarta looks more polluted overall. The brightness and elderly patterns differ by city, which points to different city layouts rather than one simple city-wide gradient. (Jakarta’s two small offshore island districts are left out of the view for clarity but stay in all the numbers.)
# Figure 2: does brighter (richer) mean cleaner? Colour = elderly share.
ggplot(panel, aes(x = ntl_mean, y = pm25_ugm3, color = elderly_share)) +
geom_point(size = 2.5, alpha = 0.85) +
scale_color_viridis_c(labels = scales::percent_format(accuracy = 1)) +
facet_wrap(~city) +
labs(title = "Pollution vs nightlights (colour = elderly share)",
x = "Mean nightlight radiance (higher = brighter)",
y = expression(PM[2.5]~(mu*g/m^3)), color = "Elderly share") +
theme_minimal()(Figure 2): Brighter districts are not always cleaner. The districts with the most older people sit at certain pollution–brightness mixes, which hints that crowding and city layout tie all three numbers together.
We first ask whether neighbours look alike (global Moran’s I), then find the exact high and low spots (Getis–Ord Gi*).
global_rows <- lapply(split(districts_sf, districts_sf$city), function(s) {
if (identical(s$city[1], "Jakarta")) {
s <- s |> dplyr::filter(!grepl("Kepulauan", district_name))
}
lw <- make_neighbors(s)$lw
vars <- c("pm25_ugm3", "ntl_mean", "elderly_share", "deprivation")
data.frame(city = s$city[1], variable = vars,
moran_I = sapply(vars, function(v) spdep::moran.test(impute_na(s[[v]]), lw, zero.policy = TRUE)$estimate[1]),
p_value = sapply(vars, function(v) moran_p(s[[v]], lw)))
})
knitr::kable(do.call(rbind, global_rows), digits = 3, row.names = FALSE,
caption = "Table 3. Global Moran's I by city and number.")| city | variable | moran_I | p_value |
|---|---|---|---|
| Hanoi | pm25_ugm3 | 0.711 | 0 |
| Hanoi | ntl_mean | 0.713 | 0 |
| Hanoi | elderly_share | 0.709 | 0 |
| Hanoi | deprivation | 0.713 | 0 |
| Jakarta | pm25_ugm3 | 0.581 | 0 |
| Jakarta | ntl_mean | 0.610 | 0 |
| Jakarta | elderly_share | 0.581 | 0 |
| Jakarta | deprivation | 0.610 | 0 |
(Table 3): Neighbours tend to look alike in both cities (positive Moran’s I), but the grouping is much tighter in Hanoi (≈ 0.71) than in Jakarta (≈ 0.14–0.19). In plain terms: neighbours look alike everywhere, but they look much more alike in Hanoi. That is exactly why we now look for local hot and cold spots.
# Figure 3: Getis–Ord Gi* — local hot spots (high cluster) and cold spots (low cluster).
#
# IMPORTANT: this is a TRUE Gi*, set up to match the paper exactly:
# (1) include.self() — each district counts ITSELF as a neighbour. This is the
# "star" in Gi*; leaving it out gives plain Gi and shifts the map.
# (2) style = "B" — binary 0/1 weights, the standard Getis–Ord choice
# (row-standardised "W" weights would change the z-scores).
# (3) analytical z, threshold |z| > 1.96 — same cut-off the paper prints.
# With the same deterministic proxy data, these three settings reproduce the
# paper's hot/cold spots district-for-district.
gi_map <- function(s, var) {
nb <- make_neighbors(s)$nb # touching-district neighbours
nb_self <- spdep::include.self(nb) # (1) add the district itself -> Gi*
lw_star <- spdep::nb2listw(nb_self, style = "B", zero.policy = TRUE) # (2) binary weights
s$gi_z <- as.numeric(spdep::localG(impute_na(s[[var]]), lw_star, zero.policy = TRUE))
s$hotspot <- ifelse(s$gi_z > 1.96, "Hotspot", # (3) same threshold as the paper
ifelse(s$gi_z < -1.96, "Coldspot", "NS"))
s
}
plot_gi <- function(s, title) {
ggplot(s) +
geom_sf(aes(fill = hotspot), color = "white", linewidth = 0.12) +
scale_fill_manual(values = c(Hotspot = "#b2182b", Coldspot = "#2166ac", NS = "grey85"), drop = FALSE) +
labs(title = title, fill = "Status") +
theme_minimal() + theme(axis.text = element_blank(), panel.grid = element_blank())
}
p_gi_jk <- plot_gi(gi_map(jakarta_main_sf, "pm25_ugm3"), "Jakarta") + coord_jakarta
p_gi_hn <- plot_gi(gi_map(hanoi_sf, "pm25_ugm3"), "Hanoi")
(p_gi_jk | p_gi_hn) +
patchwork::plot_annotation(title = "Getis-Ord Gi* hotspots for PM2.5 (z > 1.96)")(Figure 3): Red = a group of unusually high-pollution districts (hot spot); blue = a group of unusually low ones (cold spot); grey = nothing special. Hanoi’s hot spot is a tight central group. Jakarta’s high pollution clusters in the west/centre, with a cooler group toward the north-east. Both cities have a clear pollution peak, but Hanoi’s is more sharply focused on the core.
We fit three models per city: a plain regression with a fairness term (OLS), a Bayesian map model (BYM2 via INLA), and a place-by-place model (GWR).
run_city_models <- function(s, city_name) {
nb_info <- make_neighbors(s); lw <- nb_info$lw; nb <- nb_info$nb
ols <- lm(elderly_share ~ pm25_ugm3 + deprivation, data = s)
ols_int <- lm(elderly_share ~ pm25_ugm3 * deprivation, data = s) # * adds the fairness term
res <- list(city = city_name, ols = ols, ols_int = ols_int,
ols_aic = AIC(ols), ols_rmse = sqrt(mean(residuals(ols)^2)),
ols_moran_p = moran_p(residuals(ols), lw))
if (has_inla) { # BYM2 on elderly counts
ok <- tryCatch({
gfile <- tempfile(fileext = ".adj"); spdep::nb2INLA(gfile, nb)
graph <- INLA::inla.read.graph(gfile); s$area_id <- seq_len(nrow(s))
fit <- INLA::inla(
elderly_count ~ pm25_ugm3 + deprivation +
f(area_id, model = "bym2", graph = graph, scale.model = TRUE, constr = TRUE),
family = "poisson", data = s, offset = log(s$population),
control.compute = list(dic = TRUE, waic = TRUE), verbose = FALSE)
res$inla_dic <- fit$dic$dic; TRUE
}, error = function(e) FALSE)
if (!ok) res$inla_dic <- NA_real_
}
if (has_gwr) { # GWR: local PM2.5 effect
ok <- tryCatch({
epsg <- if (city_name == "Jakarta") 32748L else 32648L
pts <- sf::st_transform(sf::st_centroid(s), epsg)
spdf <- methods::as(pts, "Spatial")
spdf@data <- cbind(sf::st_drop_geometry(s), data.frame(sf::st_coordinates(pts)))
bw <- GWmodel::bw.gwr(elderly_share ~ pm25_ugm3 + deprivation, data = spdf,
approach = "AICc", kernel = "bisquare", adaptive = TRUE)
g <- GWmodel::gwr.basic(elderly_share ~ pm25_ugm3 + deprivation, data = spdf,
bw = bw, kernel = "bisquare", adaptive = TRUE)
res$gwr <- g; res$gwr_aicc <- g$GW.diagnostic$AICc
res$gwr_pm25_range <- range(g$SDF$pm25_ugm3, na.rm = TRUE); TRUE
}, error = function(e) FALSE)
if (!ok) res$gwr_aicc <- NA_real_
}
res$sf <- s; res$lw <- lw; res
}
jakarta_res <- run_city_models(jakarta_sf, "Jakarta")## Adaptive bandwidth (number of nearest neighbours): 36 AICc value: -1077.131
## Adaptive bandwidth (number of nearest neighbours): 30 AICc value: -1095.265
## Adaptive bandwidth (number of nearest neighbours): 25 AICc value: -1106.092
## Adaptive bandwidth (number of nearest neighbours): 23 AICc value: -1110.005
## Adaptive bandwidth (number of nearest neighbours): 21 AICc value: -1111.137
## Adaptive bandwidth (number of nearest neighbours): 20 AICc value: -1112.479
## Adaptive bandwidth (number of nearest neighbours): 19 AICc value: -1114.026
## Adaptive bandwidth (number of nearest neighbours): 19 AICc value: -1114.026
## Adaptive bandwidth (number of nearest neighbours): 26 AICc value: -221.4339
## Adaptive bandwidth (number of nearest neighbours): 24 AICc value: -219.5862
## Adaptive bandwidth (number of nearest neighbours): 28 AICc value: -223.6335
## Adaptive bandwidth (number of nearest neighbours): 28 AICc value: -223.6335
tidy_ols <- function(r) broom::tidy(r$ols_int) |> dplyr::mutate(city = r$city)
knitr::kable(dplyr::bind_rows(tidy_ols(jakarta_res), tidy_ols(hanoi_res)), digits = 4,
caption = "Table 4. Plain regression with a fairness term (elderly_share ~ PM2.5 * poverty).")| term | estimate | std.error | statistic | p.value | city |
|---|---|---|---|---|---|
| (Intercept) | -0.0711 | 0.0001 | -959.2065 | 0e+00 | Jakarta |
| pm25_ugm3 | 0.0036 | 0.0000 | 2151.5028 | 0e+00 | Jakarta |
| deprivation | -0.0001 | 0.0000 | -9.2737 | 0e+00 | Jakarta |
| pm25_ugm3:deprivation | 0.0000 | 0.0000 | 6.0743 | 0e+00 | Jakarta |
| (Intercept) | -0.0815 | 0.0107 | -7.6147 | 0e+00 | Hanoi |
| pm25_ugm3 | 0.0048 | 0.0004 | 12.9134 | 0e+00 | Hanoi |
| deprivation | -0.0130 | 0.0035 | -3.7016 | 1e-03 | Hanoi |
| pm25_ugm3:deprivation | 0.0008 | 0.0002 | 4.4189 | 2e-04 | Hanoi |
In plain words (Table 4): In both cities, more PM2.5 goes with more older people (Jakarta 0.0036, Hanoi 0.0048). In Hanoi, poorer districts have fewer older people on average (poverty term -0.0130), but the positive fairness term (8e-04) means the pollution–risk link is stronger in poorer districts — the pattern fairness predicts. In Jakarta the fit is almost exact (tiny error) because the stand-in numbers share the same crowding inputs, so we read Jakarta as a methods check, not a health finding.
compare_row <- function(r) data.frame(
city = r$city, OLS_AIC = round(r$ols_aic, 1), OLS_RMSE = round(r$ols_rmse, 5),
Error_Moran_p = round(r$ols_moran_p, 4),
BYM2_DIC = if (!is.null(r$inla_dic)) round(r$inla_dic, 1) else NA,
GWR_AICc = if (!is.null(r$gwr_aicc)) round(r$gwr_aicc, 1) else NA)
knitr::kable(dplyr::bind_rows(compare_row(jakarta_res), compare_row(hanoi_res)),
caption = "Table 5. Model-fit comparison (lower AIC/DIC/AICc is better).")| city | OLS_AIC | OLS_RMSE | Error_Moran_p | BYM2_DIC | GWR_AICc |
|---|---|---|---|---|---|
| Jakarta | -921.3 | 0.00001 | 0.0000 | NA | -1114.0 |
| Hanoi | -231.4 | 0.00447 | 0.7995 | NA | -223.6 |
In plain words (Table 5): Jakarta’s plain model leaves a strong map pattern in its errors (Moran’s p ≈ 0), so even a perfect-looking fit hides map structure and understates the real uncertainty. Hanoi’s errors show no such pattern (p ≈ 0.80), so the plain model is safer there. INLA was not available, so BYM2 is skipped here. GWR ran for both cities (AICc: Jakarta -1114, Hanoi -223.6). The DIC, AIC, and AICc numbers are not interchangeable — the models use different outcomes.
# Figure 4: GWR local PM2.5 effect — darker red = stronger local pollution–risk link.
plot_gwr <- function(r, zoom = NULL) {
if (is.null(r$gwr) || is.null(r$gwr$SDF)) return(NULL)
col <- grep("^pm25", names(r$gwr$SDF), value = TRUE)[1]; if (is.na(col)) return(NULL)
out <- r$sf; out$gwr_pm25 <- r$gwr$SDF[[col]]
p <- ggplot(out) +
geom_sf(aes(fill = gwr_pm25), color = "white", linewidth = 0.12) +
scale_fill_gradient2(low = "#2166ac", mid = "white", high = "#b2182b",
midpoint = mean(out$gwr_pm25, na.rm = TRUE)) +
labs(title = paste(r$city, "GWR local PM2.5 effect"),
subtitle = "Darker red = stronger local pollution–vulnerability link", fill = "Coef.") +
theme_minimal() + theme(axis.text = element_blank(), panel.grid = element_blank())
if (!is.null(zoom)) p + zoom else p
}
p_jk <- plot_gwr(jakarta_res, coord_jakarta); p_hn <- plot_gwr(hanoi_res)
if (is.null(p_jk) && is.null(p_hn)) knitr::asis_output("GWR did not converge; maps unavailable.") else
if (is.null(p_jk)) p_hn else if (is.null(p_hn)) p_jk else p_jk | p_hnIn plain words (Figure 4): Darker red = a stronger local link between PM2.5 and elderly share. In Jakarta the local effect is almost the same everywhere (range 0.00361–0.00363), so the link is effectively flat — again a side effect of the shared stand-in inputs. In Hanoi it changes clearly across the city (range 0.00299–0.00344), so the link is stronger in some districts than in others. These maps flag where a closer look may matter most.
In both cities, pollution and poverty are grouped in space rather than scattered at random, and the grouping is tighter in Hanoi than in Jakarta. Hanoi gives the clearer real-world story: PM2.5 lines up with elderly share, and that link grows stronger in poorer districts — the pattern a fairness argument predicts. The hot-spot map and the GWR map both point to Hanoi’s central districts as the place where the link is tightest. A single city-wide number would have erased this.
Jakarta plays a different role. Its higher average pollution and larger elderly share are real features of the data. But its near-perfect model fit and flat, unchanging GWR effect come from how the stand-in numbers were built, since they share the same crowding inputs. The leftover map pattern in Jakarta’s plain-model errors is itself a useful lesson: it shows why a plain regression is unsafe for map data, even when the fit looks perfect, and why the spatial models used here are needed. So we read Jakarta mainly as a check on the method, and Hanoi as the real example.
Taken together, the three models answer the study’s questions step by step. The local hot-spot tool answers where pollution and poverty sit together. The plain regression with a fairness term answers whether pollution and breathing risk move together once poverty is taken into account. The BYM2 and GWR models answer how that link is shaped across space, while handling the grouping the plain model misses. Because all three point to the same central districts in Hanoi, we trust that finding more than any one model alone.
Using a clear, fully repeatable map-based pipeline in R, we compared
air pollution, poverty, and breathing risk across the districts of
Jakarta and Hanoi. Both cities show pollution and poverty grouped in
space. Hanoi gives the clearer fairness signal: the pollution–elderly
link grows stronger in poorer districts and changes from place to place
across the city. Using local grouping tools, a Bayesian map model, and a
place-by-place model together shows how spatial methods can find and
explain such patterns where plain averages cannot. Because the inputs
here are stand-ins, the exact effect sizes are best read as a
demonstration of the method. The pipeline is ready to
take in real satellite PM2.5 (NASA SEDAC) and Black Marble
night-lights: set EARTHDATA_TOKEN in
~/.Renviron and re-knit. That is the natural next step
toward results with public-health weight.
## R version 4.5.2 (2025-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Tahoe 26.5.1
##
## Matrix products: default
## BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/Warsaw
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] GWmodel_2.4-1 Rcpp_1.1.1 sp_2.2-1
## [4] robustbase_0.99-7 scales_1.4.0 knitr_1.51
## [7] broom_1.0.12 patchwork_1.3.2 ggplot2_4.0.2
## [10] dplyr_1.2.1 spdep_1.4-2 spData_2.3.4
## [13] httr2_1.2.2 exactextractr_0.10.1 geodata_0.6-9
## [16] terra_1.9-1 sf_1.1-0
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.1 viridisLite_0.4.3 farver_2.1.2 S7_0.2.1
## [5] fastmap_1.2.0 TH.data_1.1-5 digest_0.6.39 timechange_0.4.0
## [9] lifecycle_1.0.5 LearnBayes_2.15.2 survival_3.8-6 magrittr_2.0.4
## [13] compiler_4.5.2 rlang_1.2.0 sass_0.4.10 tools_4.5.2
## [17] igraph_2.2.2 yaml_2.3.12 FNN_1.1.4.1 labeling_0.4.3
## [21] curl_7.0.0 bit_4.6.0 classInt_0.4-11 RColorBrewer_1.1-3
## [25] multcomp_1.4-30 KernSmooth_2.23-26 withr_3.0.2 purrr_1.2.1
## [29] grid_4.5.2 blackmarbler_0.2.5 xts_0.14.2 e1071_1.7-17
## [33] MASS_7.3-65 cli_3.6.6 mvtnorm_1.3-5 crayon_1.5.3
## [37] rmarkdown_2.30 intervals_0.15.5 generics_0.1.4 otel_0.2.0
## [41] rstudioapi_0.18.0 tzdb_0.5.0 DBI_1.3.0 cachem_1.1.0
## [45] proxy_0.4-29 stringr_1.6.0 splines_4.5.2 spatialreg_1.4-2
## [49] parallel_4.5.2 s2_1.1.9 vctrs_0.7.1 boot_1.3-32
## [53] Matrix_1.7-4 sandwich_3.1-1 jsonlite_2.0.0 hms_1.1.4
## [57] bit64_4.6.0-1 tidyr_1.3.2 jquerylib_0.1.4 units_1.0-1
## [61] glue_1.8.0 DEoptimR_1.1-4 codetools_0.2-20 stringi_1.8.7
## [65] lubridate_1.9.5 gtable_0.3.6 deldir_2.0-4 raster_3.6-32
## [69] tibble_3.3.1 pillar_1.11.1 rappdirs_0.3.4 htmltools_0.5.9
## [73] R6_2.6.1 wk_0.9.5 vroom_1.7.0 evaluate_1.0.5
## [77] lattice_0.22-9 readr_2.2.0 backports_1.5.0 bslib_0.10.0
## [81] class_7.3-23 coda_0.19-4.1 nlme_3.1-168 spacetime_1.3-3
## [85] xfun_0.56 zoo_1.8-15 pkgconfig_2.0.3