knitr::opts_chunk$set(echo=TRUE, warning=FALSE, message=FALSE)DATA607-FinalProject
library(tidyverse)
library(readxl)
library(tidycensus)
library(leaflet)
library(sf)
library(tigris)
library(knitr)Introduction
Muslim Communities in the United States have experienced significant demographic growth over the past 2 decades, driven by immigration. This exploratory analysis seeks to quantify the growth by examining mosque count and growth from 2000 to 2020 across the United States. Two data-sets are being used, one from ARDA exploring mosque growth and the second is U.S. Census Data providing estimates of immigrants from Muslim-majority countries. Our primary question to answer is: Do states with larger muslim-origin immigrant populations experience faster mosque growth rate?
Load ARDA Data
url_2000 <- "https://github.com/AslamF/DATA607-FinalProject/raw/main/US%20Religion%20Census%202000.XLSX"
url_2010 <- "https://github.com/AslamF/DATA607-FinalProject/raw/main/US%20Religion%20Census%202010%20Membership%20Study.XLSX"
url_2020 <- "https://github.com/AslamF/DATA607-FinalProject/raw/main/US%20Religion%20Census%202020.xlsx"
read_github_xlsx <- function(url) {
tmp <- tempfile(fileext = ".xlsx")
download.file(url, tmp, mode = "wb", quiet = TRUE)
read_excel(tmp)
}
arda_2000_raw <- read_github_xlsx(url_2000)
arda_2010_raw <- read_github_xlsx(url_2010)
arda_2020_raw <- read_github_xlsx(url_2020)Clean and Extract Muslim Data
# Standarize Data from all 3 years into same structure
arda_2000 <- arda_2000_raw %>%
select(state = STATENAM,
mosques = ISLAMCG,
adherents = ISLAMAD) %>%
mutate(year = 2000,
mosques = as.numeric(mosques),
adherents = as.numeric(adherents),
state = as.character(state))
arda_2010 <- arda_2010_raw %>%
select(state = STNAME,
mosques = MSLMCNG,
adherents = MSLMADH) %>%
mutate(year = 2010,
mosques = as.numeric(mosques),
adherents = as.numeric(adherents),
state = as.character(state))
arda_2020 <- arda_2020_raw %>%
select(state = STATNAM,
mosques = MSLMCNG_2020,
adherents = MSLMADH_2020) %>%
mutate(year = 2020,
mosques = as.numeric(mosques),
adherents = as.numeric(adherents),
state = as.character(state))
# Stack into long format
# Filter out rows with no mosque data
arda_long <- bind_rows(arda_2000, arda_2010, arda_2020) %>%
filter(!is.na(mosques), mosques > 0)
head(arda_long)# A tibble: 6 × 4
state mosques adherents year
<chr> <dbl> <dbl> <dbl>
1 Alabama 20 7670 2000
2 Alaska 3 1381 2000
3 Arizona 12 11857 2000
4 Arkansas 6 2044 2000
5 California 163 259762 2000
6 Colorado 12 14855 2000
Calculate Growth Rates
# Switch from long to wide format. Each row has a column for each year
# Calculate growth percentage 2000 --> 2020
arda_wide <- arda_long %>%
select(state, year, mosques) %>%
pivot_wider(names_from = year, values_from = mosques,
names_prefix = "mosques_") %>%
mutate(growth_pct = ((mosques_2020 - mosques_2000) / mosques_2000) * 100) %>%
filter(!is.na(growth_pct))
head(arda_wide)# A tibble: 6 × 5
state mosques_2000 mosques_2010 mosques_2020 growth_pct
<chr> <dbl> <dbl> <dbl> <dbl>
1 Alabama 20 31 37 85
2 Alaska 3 3 2 -33.3
3 Arizona 12 29 35 192.
4 Arkansas 6 13 12 100
5 California 163 246 308 89.0
6 Colorado 12 17 23 91.7
Load ACS Data
acs_muslim <- read_csv("https://raw.githubusercontent.com/AslamF/DATA607-FinalProject/main/acs_muslim_backup.csv")
head(acs_muslim)# A tibble: 6 × 2
state muslim_immigrants
<chr> <dbl>
1 Alabama 68517
2 Alaska 36995
3 Arizona 232576
4 Arkansas 38779
5 California 5079680
6 Colorado 169897
Join Data-sets
# Join mosque growth and immigration data by state name
# This is our combined data-set
combined <- arda_wide %>%
left_join(acs_muslim, by = "state") %>%
filter(!is.na(muslim_immigrants))
kable(head(combined, 10))| state | mosques_2000 | mosques_2010 | mosques_2020 | growth_pct | muslim_immigrants |
|---|---|---|---|---|---|
| Alabama | 20 | 31 | 37 | 85.00000 | 68517 |
| Alaska | 3 | 3 | 2 | -33.33333 | 36995 |
| Arizona | 12 | 29 | 35 | 191.66667 | 232576 |
| Arkansas | 6 | 13 | 12 | 100.00000 | 38779 |
| California | 163 | 246 | 308 | 88.95706 | 5079680 |
| Colorado | 12 | 17 | 23 | 91.66667 | 169897 |
| Connecticut | 20 | 36 | 49 | 145.00000 | 148907 |
| Delaware | 3 | 5 | 9 | 200.00000 | 35581 |
| Florida | 37 | 118 | 157 | 324.32432 | 542950 |
| Georgia | 39 | 69 | 99 | 153.84615 | 402051 |
Bar Chart - Top 15 States by mosque
# Visualize which states have the most mosques in 2020
# slice_max selects the top 15 rows by mosque count
combined %>%
slice_max(mosques_2020, n = 15) %>%
ggplot(aes(x = reorder(state, mosques_2020), y = mosques_2020, fill = mosques_2020)) +
geom_col() +
coord_flip() +
scale_fill_gradient(low = "#a8d5a2", high = "#2d6a4f") +
labs(title = "Top 15 States by Number of Mosques (2020)",
x = "State", y = "Number of Mosques") +
theme_minimal()Bar Chart - Top 15 States by Growth Rate
# Which states expereinced the fastest mosque growth from 2000-2020
# Visual Implementation
combined %>%
slice_max(growth_pct, n = 15) %>%
ggplot(aes(x = reorder(state, growth_pct), y = growth_pct, fill = growth_pct)) +
geom_col() +
coord_flip() +
scale_fill_gradient(low = "#a8d5a2", high = "#1b4332") +
labs(title = "Top 15 States by Mosque Growth Rate (2000–2020)",
x = "State", y = "Growth (%)") +
theme_minimal()Correlation
# Testing whether states with more muslim immigrants also saw fast mosque growth
cor.test(combined$muslim_immigrants, combined$growth_pct, method = "pearson")
Pearson's product-moment correlation
data: combined$muslim_immigrants and combined$growth_pct
t = -0.51051, df = 48, p-value = 0.612
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.3447819 0.2091390
sample estimates:
cor
-0.07348662
Linear Regression
# Simple Linear-Regression: mosque growth rate predicated by Muslim-orign imigrant
# Dependent Variable --> Growth Percentage
# Independent Variable --> Muslim_Immigrants
model <- lm(growth_pct ~ muslim_immigrants, data = combined)
summary(model)
Call:
lm(formula = growth_pct ~ muslim_immigrants, data = combined)
Residuals:
Min 1Q Median 3Q Max
-189.20 -68.61 -10.70 44.10 400.39
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.562e+02 1.651e+01 9.461 1.51e-12 ***
muslim_immigrants -1.037e-05 2.032e-05 -0.511 0.612
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 106.9 on 48 degrees of freedom
Multiple R-squared: 0.0054, Adjusted R-squared: -0.01532
F-statistic: 0.2606 on 1 and 48 DF, p-value: 0.612
Regression Plot
# Scatter plot of each state as a point
# Shows the direction and strength of the relationship
ggplot(combined, aes(x = muslim_immigrants, y = growth_pct)) +
geom_point(color = "#2d6a4f", size = 3, alpha = 0.7) +
geom_smooth(method = "lm", color = "#1b4332", se = TRUE) +
labs(title = "Muslim-Origin Immigrants vs. Mosque Growth Rate",
x = "Muslim-Origin Immigrant Population",
y = "Mosque Growth Rate 2000–2020 (%)") +
theme_minimal()Leaflet Map
# Implementing a new visualization tool using Leaflet, using shapefiles from tigris
# Joining combined data-set to the spatial data by state
options(tigris_use_cache = TRUE)
states_geo <- states(cb = TRUE, resolution = "20m") %>%
filter(!STUSPS %in% c("PR", "GU", "VI", "MP", "AS")) %>%
left_join(combined, by = c("NAME" = "state"))
# Color Green palatte to indiciate mosque growth percentage
pal <- colorNumeric(palette = "Greens",
domain = states_geo$growth_pct,
na.color = "#cccccc")
# Interactive Leaflet Map
#
leaflet(states_geo) %>%
setView(lng=-96, lat=37.8, zoom=4) %>%
addTiles() %>%
addPolygons(
fillColor = ~pal(growth_pct),
fillOpacity = 0.8,
color = "white",
weight = 1,
label = ~paste0(NAME, ": ", round(growth_pct, 1), "% growth"),
highlightOptions = highlightOptions(weight = 2, color = "#333",
bringToFront = TRUE)
) %>%
addLegend(pal = pal, values = ~growth_pct,
title = "Mosque Growth<br>2000–2020 (%)",
position = "bottomright",
labFormat = labelFormat(suffix = "%", digits=0))Conclusion
The data shows that mosque count increased in nearly every State between 2000-2020. We can see the largest and most established muslim populations in California, New York and Texas. Muslim Communities are expanding into new geographic areas represented b the large percentage increases in smaller states. Immigration from Muslim-Majority countries dont not play a meaningful role in driving growth rates. We found no statistically significant relationship for the regression analysis. Immigration is not the primary driver of where mosques are growing fastest. (P value is larger than 0.05)