# Setup
knitr::opts_chunk$set(cache = TRUE, autodep = TRUE, warning = FALSE, message = FALSE)
library(tidycensus)
## Warning: package 'tidycensus' was built under R version 4.5.2
library(tigris)
## Warning: package 'tigris' was built under R version 4.5.2
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
library(sf)
## Warning: package 'sf' was built under R version 4.5.2
## Linking to GEOS 3.13.1, GDAL 3.11.4, PROJ 9.7.0; sf_use_s2() is TRUE
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(stringr)
library(purrr)
library(readr)
library(ggplot2)
library(classInt)
## Warning: package 'classInt' was built under R version 4.5.2
library(viridis)
## Warning: package 'viridis' was built under R version 4.5.2
## Loading required package: viridisLite
library(osmdata)
## Warning: package 'osmdata' was built under R version 4.5.2
## Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright
library(spdep)
## Warning: package 'spdep' was built under R version 4.5.2
## Loading required package: spData
## Warning: package 'spData' was built under R version 4.5.2
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(spatialreg)
## Warning: package 'spatialreg' was built under R version 4.5.2
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
## set.coresOption, set.mcOption, set.VerboseOption,
## set.ZeroPolicyOption
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:viridis':
##
## viridis_pal
## The following object is masked from 'package:readr':
##
## col_factor
## The following object is masked from 'package:purrr':
##
## discard
library(knitr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(osmextract)
## Warning: package 'osmextract' was built under R version 4.5.2
## Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright.
## Check the package website, https://docs.ropensci.org/osmextract/, for more details.
options(tigris_use_cache = TRUE)
options(scipen = 999)
# Census API Key
tidycensus::census_api_key(Sys.getenv("CENSUS_API_KEY"), install = FALSE)
We use NYC census tracts as the neighborhood unit for a clean, reproducible run (no manual files). Proxies come from ACS 5-year estimates. Mosques are pulled from OpenStreetMap.
# Base settings
year <- 2022
state <- "NY"
nyc_counties <- c("New York","Kings","Queens","Bronx","Richmond")
# Census tract population (and geometry)
tract_pop <- get_acs(
geography = "tract",
variables = c(total_pop = "B01003_001"),
state = state,
county = nyc_counties,
year = year,
survey = "acs5",
geometry = TRUE,
output = "wide"
)
tract_pop <- tract_pop |>
rename(total_pop = total_popE) |>
st_transform(2263) |>
st_make_valid()
# ACS metadata
vars_b05006 <- load_variables(year, "acs5", cache = TRUE) |>
filter(str_detect(name, "^B05006"))
vars_b16001 <- load_variables(year, "acs5", cache = TRUE) |>
filter(str_detect(name, "^B16001"))
vars_b04006 <- load_variables(year, "acs5", cache = TRUE) |>
filter(str_detect(name, "^B04006"))
# Muslim-majority country list
mm_countries <- c(
"Afghanistan","Algeria","Azerbaijan","Bahrain","Bangladesh","Brunei","Burkina Faso",
"Chad","Comoros","Djibouti","Egypt","Gambia","Guinea","Guinea-Bissau","Indonesia",
"Iran","Iraq","Jordan","Kazakhstan","Kuwait","Kyrgyzstan","Lebanon","Libya","Malaysia",
"Maldives","Mali","Mauritania","Morocco","Niger","Nigeria","Oman","Pakistan","Palestine",
"Qatar","Saudi Arabia","Senegal","Sierra Leone","Somalia","Sudan","Syria","Tajikistan",
"Tunisia","Turkey","Turkmenistan","United Arab Emirates","Uzbekistan","Western Sahara","Yemen"
)
mm_regex <- regex(
paste0("\\b(", paste(mm_countries, collapse = "|"), ")\\b"),
ignore_case = TRUE
)
# ACS variable selections
pick_b05006 <- vars_b05006 |>
filter(str_detect(label, mm_regex))
pick_b16001 <- vars_b16001 |>
filter(
str_detect(
label,
regex("Urdu:|Bengali:|Arabic:|African languages:", ignore_case = TRUE)
)
)
pick_b04006 <- vars_b04006 |>
filter(str_detect(label, regex("Arab", ignore_case = TRUE)))
# Helper to pull ACS variables and compute shares
pull_table <- function(var_rows, label, batch_size = 40) {
v <- unique(var_rows$name)
if (length(v) == 0) {
message("No variables found for label = ", label)
return(tibble())
}
chunks <- split(v, ceiling(seq_along(v) / batch_size))
get_one <- function(vars_vec) {
tryCatch(
get_acs(
geography = "tract",
state = state,
county = nyc_counties,
year = year,
survey = "acs5",
variables = vars_vec,
geometry = FALSE,
cache_table = TRUE
),
error = function(e) {
message("Skipping a chunk that failed (", length(vars_vec),
" vars). Error: ", conditionMessage(e))
return(tibble())
}
)
}
raw <- map_dfr(chunks, get_one)
if (nrow(raw) == 0) return(tibble())
raw |>
left_join(
tract_pop |>
st_drop_geometry() |>
select(GEOID, total_pop),
by = "GEOID"
) |>
mutate(
share = estimate / pmax(total_pop, 1),
group = label
) |>
select(GEOID, variable, estimate, moe, total_pop, share, group)
}
# Pull proxies
acs_b05006 <- pull_table(pick_b05006, "birthplace")
acs_b04006 <- pull_table(pick_b04006, "ancestry")
acs_b16001 <- NULL # language excluded due to suppressed NA estimates
# Validate distributions
summary(acs_b05006$share)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000000 0.000000 0.000000 0.001365 0.000000 0.390987
summary(acs_b04006$share)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000000 0.000000 0.000000 0.002724 0.000000 0.565217
# Bind birthplace + ancestry proxies
acs_proxy <- dplyr::bind_rows(
purrr::keep(
list(acs_b05006, acs_b04006),
~ !is.null(.x) && nrow(.x) > 0
)
)
if (is.null(acs_proxy) || nrow(acs_proxy) == 0) {
stop("No ACS proxy rows available. Check earlier selection logic.")
}
# Pivot wide: one row per tract, one column per proxy variable
proxy_wide <- acs_proxy |>
tidyr::unite(var_id, group, variable, remove = FALSE) |>
dplyr::select(GEOID, var_id, share) |>
tidyr::pivot_wider(
names_from = var_id,
values_from = share,
values_fill = 0
)
# Z-score each proxy column
zcols <- setdiff(names(proxy_wide), "GEOID")
proxy_z <- proxy_wide
proxy_z[zcols] <- lapply(proxy_wide[zcols], function(x) as.numeric(scale(x)))
# Compute PCI as average of standardized proxies
proxy_z$PCI <- rowMeans(proxy_z[zcols], na.rm = TRUE)
# Join PCI back to tract geometry
pci <- tract_pop |>
dplyr::select(GEOID, geometry) |>
dplyr::left_join(
proxy_z |> dplyr::select(GEOID, PCI),
by = "GEOID"
)
summary(proxy_z$PCI)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.18582 -0.16087 -0.09050 0.00000 0.05829 3.71303
head(pci)
## Simple feature collection with 6 features and 2 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 1007383 ymin: 239639.8 xmax: 1026519 ymax: 268162.4
## Projected CRS: NAD83 / New York Long Island (ftUS)
## GEOID PCI geometry
## 1 36005013500 -0.18581591 POLYGON ((1010519 240631.4,...
## 2 36005009200 0.03438263 POLYGON ((1023619 242996.9,...
## 3 36005005400 -0.07722129 POLYGON ((1016383 241620.3,...
## 4 36005036501 -0.03178067 POLYGON ((1015150 247218.6,...
## 5 36005044902 -0.16844511 POLYGON ((1019811 267212.7,...
## 6 36005017500 0.04407802 POLYGON ((1007579 241875.2,...
nyc_bbox <- tract_pop |>
st_transform(4326) |>
st_bbox()
# Query 1: all places of worship
q_pow <- opq(bbox = nyc_bbox) |>
add_osm_feature(key = "amenity", value = "place_of_worship")
osm_pow_raw <- osmdata_sf(q_pow)
# Query 2: all buildings explicitly tagged as mosques
q_bldg_mosque <- opq(bbox = nyc_bbox) |>
add_osm_feature(key = "building", value = "mosque")
osm_bldg_raw <- osmdata_sf(q_bldg_mosque)
filter_mosque_sf <- function(x) {
if (is.null(x) || nrow(x) == 0) return(NULL)
x |>
mutate(
religion = tolower(as.character(religion)),
name = tolower(as.character(name)),
denom = tolower(as.character(denomination))
) |>
filter(
religion == "muslim" |
str_detect(name, "mosque|masjid|islamic") |
str_detect(denom, "muslim|sunni|shia")
)
}
pow_pts <- filter_mosque_sf(osm_pow_raw$osm_points)
pow_polys <- filter_mosque_sf(osm_pow_raw$osm_polygons)
pow_mpolys <- filter_mosque_sf(osm_pow_raw$osm_multipolygons)
# For building=mosque, accept ALL features without extra filtering
bldg_pts <- osm_bldg_raw$osm_points
bldg_polys <- osm_bldg_raw$osm_polygons
bldg_mpolys <- osm_bldg_raw$osm_multipolygons
poly_to_centroids <- function(x) {
if (is.null(x) || nrow(x) == 0) return(NULL)
st_centroid(x)
}
pow_poly_centroids <- poly_to_centroids(pow_polys)
pow_mpoly_centroids <- poly_to_centroids(pow_mpolys)
bldg_poly_centroids <- poly_to_centroids(bldg_polys)
bldg_mpoly_centroids <- poly_to_centroids(bldg_mpolys)
mosque_all <- list(
pow_pts,
pow_poly_centroids,
pow_mpoly_centroids,
bldg_pts,
bldg_poly_centroids,
bldg_mpoly_centroids
) |>
purrr::keep(~ !is.null(.x) && nrow(.x) > 0) |>
dplyr::bind_rows()
if (is.null(mosque_all) || nrow(mosque_all) == 0) {
stop("No mosque features returned from OSM. Check queries or bbox.")
}
mosque_all <- mosque_all |>
st_transform(2263) |>
distinct(geometry, .keep_all = TRUE)
mosque_tract <- st_join(
mosque_all,
tract_pop |> select(GEOID, geometry),
join = st_within,
left = FALSE
)
mosque_counts <- mosque_tract |>
st_drop_geometry() |>
count(GEOID, name = "mosque_count")
mosque_density <- tract_pop |>
st_drop_geometry() |>
select(GEOID, total_pop) |>
left_join(mosque_counts, by = "GEOID") |>
mutate(
mosque_count = ifelse(is.na(mosque_count), 0L, mosque_count),
mosque_density_per_10k = ifelse(
total_pop > 0,
mosque_count / (total_pop / 10000),
NA_real_
)
)
pci_mosques <- pci |>
left_join(
mosque_density |>
select(GEOID, mosque_count, mosque_density_per_10k),
by = "GEOID"
)
nrow(mosque_all)
## [1] 324
summary(pci_mosques$mosque_density_per_10k)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.0000 0.0000 0.4313 0.0000 156.2500 85
head(unique(tolower(mosque_all$name)), 30)
## [1] "islamic society of bayridge"
## [2] "mosque of american mohammedan society"
## [3] "al-mahdi foundation"
## [4] "muslim community center of union county (mccuc)"
## [5] "islamic center of passaic county"
## [6] "staten island islamic masjid"
## [7] "muslim majlis of staten island"
## [8] "noor al-islam society"
## [9] "masjid al-ihsan"
## [10] "dar-ul-islah"
## [11] "masjid at-taqea"
## [12] "masjid mission center"
## [13] "united american muslim association"
## [14] "ibaadu'rrahman central mosque"
## [15] "masjid al taufiq"
## [16] "north bronx islamic center"
## [17] "masjid nur al islam"
## [18] "greenpoint islamic center"
## [19] "masjid al-salam islamic education and information center"
## [20] "masjid manhattan"
## [21] NA
## [22] "masjid ar-rahman"
## [23] "masjid dar-ul furqan"
## [24] "masjidul allah"
## [25] "masjid salam"
## [26] "islamic center at nyu"
## [27] "unionport jame masjid"
## [28] "masjid al imam al albani"
## [29] "sheepshead bay muslim center"
## [30] "baitul jannah"
pci_mosques <- pci_mosques |>
mutate(
mosque_density_per_10k = ifelse(
is.na(mosque_density_per_10k),
0,
mosque_density_per_10k
)
)
cor.test(
pci_mosques$PCI,
pci_mosques$mosque_density_per_10k,
method = "spearman"
)
##
## Spearman's rank correlation rho
##
## data: pci_mosques$PCI and pci_mosques$mosque_density_per_10k
## S = 1914541729, p-value = 0.00001972
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.08835235
ggplot(pci_mosques,
aes(x = PCI, y = mosque_density_per_10k)) +
geom_point(alpha = 0.4, size = 1) +
geom_smooth(method = "loess", se = FALSE, color = "red", linewidth = 1) +
scale_y_continuous("Mosque density per 10,000 residents") +
scale_x_continuous("Proxy Concentration Index (PCI)") +
theme_minimal()
# 1) Query amenities: restaurants and fast food
q_halal_amenity <- opq(bbox = nyc_bbox) |>
add_osm_feature(key = "amenity", value = c("restaurant", "fast_food"))
osm_halal_amenity <- osmdata_sf(q_halal_amenity)
amen_pts <- osm_halal_amenity$osm_points
amen_polys <- osm_halal_amenity$osm_polygons
# 2) Query shops: supermarkets, convenience, butchers
q_halal_shop <- opq(bbox = nyc_bbox) |>
add_osm_feature(key = "shop", value = c("supermarket", "convenience", "butcher"))
osm_halal_shop <- osmdata_sf(q_halal_shop)
shop_pts <- osm_halal_shop$osm_points
shop_polys <- osm_halal_shop$osm_polygons
# 3) Filtering function: look for halal cues in name or cuisine
filter_halal_sf <- function(x) {
if (is.null(x) || nrow(x) == 0) return(NULL)
x |>
dplyr::mutate(
name_lc = tolower(as.character(name)),
cuisine_lc = tolower(as.character(cuisine))
) |>
dplyr::filter(
stringr::str_detect(name_lc, "halal|islamic|muslim|zabiha|zabihah") |
stringr::str_detect(cuisine_lc, "halal")
)
}
amen_pts_f <- filter_halal_sf(amen_pts)
amen_polys_f <- filter_halal_sf(amen_polys)
shop_pts_f <- filter_halal_sf(shop_pts)
shop_polys_f <- filter_halal_sf(shop_polys)
# 4) Convert polygons to centroids
poly_to_centroids <- function(x) {
if (is.null(x) || nrow(x) == 0) return(NULL)
sf::st_centroid(x)
}
amen_poly_centroids <- poly_to_centroids(amen_polys_f)
shop_poly_centroids <- poly_to_centroids(shop_polys_f)
# 5) Combine all halal candidate locations as sf
halal_list <- list(
amen_pts_f,
amen_poly_centroids,
shop_pts_f,
shop_poly_centroids
) |>
purrr::keep(~ !is.null(.x) && nrow(.x) > 0)
if (length(halal_list) > 0) {
# Find common columns across all sf objects
common_cols <- Reduce(intersect, lapply(halal_list, names))
# Subset each to the common columns, then row-bind
halal_all <- do.call(
rbind,
lapply(halal_list, function(x) x[, common_cols, drop = FALSE])
)
# Transform to tract CRS
halal_all <- halal_all |>
sf::st_transform(2263)
# Drop duplicate geometries
geom_txt <- sf::st_as_text(sf::st_geometry(halal_all))
halal_all <- halal_all[!duplicated(geom_txt), ]
# 6) Assign each halal business to a tract
halal_tract <- sf::st_join(
halal_all,
tract_pop |> dplyr::select(GEOID, geometry),
join = sf::st_within,
left = FALSE
)
# 7) Count per tract
halal_counts <- halal_tract |>
sf::st_drop_geometry() |>
dplyr::count(GEOID, name = "halal_count")
} else {
message("No halal-related features returned from OSM using current filters.")
halal_counts <- dplyr::tibble(GEOID = character(), halal_count = integer())
}
# 8) Construct halal density per 10K residents for all tracts
halal_density <- tract_pop |>
sf::st_drop_geometry() |>
dplyr::select(GEOID, total_pop) |>
dplyr::left_join(halal_counts, by = "GEOID") |>
dplyr::mutate(
halal_count = dplyr::coalesce(halal_count, 0L),
halal_density_per_10k = dplyr::if_else(
total_pop > 0,
halal_count / (total_pop / 10000),
0
)
)
# 9) Join halal density onto PCI + mosque object
pci_halal <- pci_mosques |>
dplyr::left_join(
halal_density |>
dplyr::select(GEOID, halal_count, halal_density_per_10k),
by = "GEOID"
)
nrow(halal_all)
## [1] 145
summary(pci_halal$halal_density_per_10k)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.2728 0.0000 263.1579
head(unique(tolower(halal_all$name)), 40)
## [1] "no pork halal kitchen" "halalbee's"
## [3] "halal dynasty" "alnoor halal deli"
## [5] "just halal" "shah's halal food"
## [7] "cajun halal platters" "halal chicks"
## [9] "halal bitez" "crafted halal"
## [11] "khalil halal chinese" "urban halal grill"
## [13] "halal munchies" "barakah's halal"
## [15] "medo halal food" "the halal guys"
## [17] "halal" "un halal"
## [19] "halal mix" "numidia halal food"
## [21] "hillside express halal food" "tanjawi halal sandwich shop"
## [23] "halal boyz" "mr. chang halal chinese"
## [25] "bakhter halal food cart" "halal city"
## [27] "halal bros" "express halal food"
## [29] "bellrose halal meat" "no 1 halal kitchen"
## [31] "halal xpress asian fusion" "mia halal food"
## [33] "halal grill eats" "hooda halal"
## [35] "halal cart" "sammy's halal food"
## [37] "halal food" "halal guys"
## [39] "sammy’s halal" "zinger halal express chicken burger"
cor.test(
pci_halal$PCI,
pci_halal$halal_density_per_10k,
method = "spearman"
)
##
## Spearman's rank correlation rho
##
## data: pci_halal$PCI and pci_halal$halal_density_per_10k
## S = 1956829080, p-value = 0.0009921
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.06821637
ggplot(pci) +
geom_sf(aes(fill = PCI), color = "grey85", size = 0.05) +
scale_fill_viridis_c(
option = "plasma",
direction = 1,
name = "PCI (relative)\nHigher = stronger proxy"
) +
labs(
title = "Proxy Concentration Index (PCI) Across NYC Census Tracts",
subtitle = "Higher PCI values indicate neighborhoods with stronger Muslim settlement proxies",
caption = "Source: 2018–2022 ACS (birthplace + ancestry proxies)"
) +
theme_minimal(base_size = 11) +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
plot.title = element_text(face = "bold", size = 13),
legend.position = "right",
legend.title = element_text(size = 10),
legend.text = element_text(size = 9)
)
mosque_xy <- mosque_all |>
st_transform(2263)
ggplot() +
geom_sf(data = pci, aes(fill = PCI), color = "grey85", size = 0.05) +
geom_sf(
data = mosque_xy,
color = "deepskyblue3",
size = 1.2,
alpha = 0.8
) +
scale_fill_viridis_c(
option = "plasma",
direction = 1,
name = "PCI"
) +
labs(
title = "Mosque Locations Overlaid on the Proxy Concentration Index (PCI)",
subtitle = "Mosques tend to cluster in neighborhoods where PCI is higher",
caption = "Mosques from OpenStreetMap; PCI from ACS proxies"
) +
theme_minimal(base_size = 11) +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
plot.title = element_text(face = "bold", size = 13),
legend.position = "right",
legend.title = element_text(size = 10),
legend.text = element_text(size = 9)
)
halal_xy <- halal_all |>
st_transform(2263)
ggplot() +
geom_sf(data = pci, aes(fill = PCI), color = "grey85", size = 0.05) +
geom_sf(
data = halal_xy,
color = "goldenrod2",
size = 1.1,
alpha = 0.7
) +
scale_fill_viridis_c(
option = "plasma",
direction = 1,
name = "PCI"
) +
labs(
title = "Halal Businesses Overlaid on the Proxy Concentration Index (PCI)",
subtitle = "Halal food and retail clusters appear in some—but not all—high-PCI neighborhoods",
caption = "Halal-related establishments from OpenStreetMap; PCI from ACS proxies"
) +
theme_minimal(base_size = 11) +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
plot.title = element_text(face = "bold", size = 13),
legend.position = "right",
legend.title = element_text(size = 10),
legend.text = element_text(size = 9)
)