knitr::opts_chunk$set(echo=TRUE, warning=FALSE, message=FALSE)DATA607-FinalProject
library(tidyverse)
library(readxl)
library(tidycensus)
library(leaflet)
library(sf)
library(tigris)
library(knitr)Load ARDA Data
url_2000 <- "https://github.com/AslamF/DATA607-FinalProject/raw/main/US%20Religion%20Census%202000.XLSX"
url_2010 <- "https://github.com/AslamF/DATA607-FinalProject/raw/main/US%20Religion%20Census%202010%20Membership%20Study.XLSX"
url_2020 <- "https://github.com/AslamF/DATA607-FinalProject/raw/main/US%20Religion%20Census%202020.xlsx"
read_github_xlsx <- function(url) {
tmp <- tempfile(fileext = ".xlsx")
download.file(url, tmp, mode = "wb", quiet = TRUE)
read_excel(tmp)
}
arda_2000_raw <- read_github_xlsx(url_2000)
arda_2010_raw <- read_github_xlsx(url_2010)
arda_2020_raw <- read_github_xlsx(url_2020)Clean and Extract Muslim Data
# Standarize Data from all 3 years into same structure
arda_2000 <- arda_2000_raw %>%
select(state = STATENAM,
mosques = ISLAMCG,
adherents = ISLAMAD) %>%
mutate(year = 2000,
mosques = as.numeric(mosques),
adherents = as.numeric(adherents),
state = as.character(state))
arda_2010 <- arda_2010_raw %>%
select(state = STNAME,
mosques = MSLMCNG,
adherents = MSLMADH) %>%
mutate(year = 2010,
mosques = as.numeric(mosques),
adherents = as.numeric(adherents),
state = as.character(state))
arda_2020 <- arda_2020_raw %>%
select(state = STATNAM,
mosques = MSLMCNG_2020,
adherents = MSLMADH_2020) %>%
mutate(year = 2020,
mosques = as.numeric(mosques),
adherents = as.numeric(adherents),
state = as.character(state))
# Stack into long format
# Filter out rows with no mosque data
arda_long <- bind_rows(arda_2000, arda_2010, arda_2020) %>%
filter(!is.na(mosques), mosques > 0)
head(arda_long)# A tibble: 6 × 4
state mosques adherents year
<chr> <dbl> <dbl> <dbl>
1 Alabama 20 7670 2000
2 Alaska 3 1381 2000
3 Arizona 12 11857 2000
4 Arkansas 6 2044 2000
5 California 163 259762 2000
6 Colorado 12 14855 2000
Calculate Growth Rates
# Switch from long to wide format. Each row has a column for each year
# Calculate growth percentage 2000 --> 2020
arda_wide <- arda_long %>%
select(state, year, mosques) %>%
pivot_wider(names_from = year, values_from = mosques,
names_prefix = "mosques_") %>%
mutate(growth_pct = ((mosques_2020 - mosques_2000) / mosques_2000) * 100) %>%
filter(!is.na(growth_pct))
head(arda_wide)# A tibble: 6 × 5
state mosques_2000 mosques_2010 mosques_2020 growth_pct
<chr> <dbl> <dbl> <dbl> <dbl>
1 Alabama 20 31 37 85
2 Alaska 3 3 2 -33.3
3 Arizona 12 29 35 192.
4 Arkansas 6 13 12 100
5 California 163 246 308 89.0
6 Colorado 12 17 23 91.7
Load ACS Data
census_api_key("e6afdcc55c31fd2b74a4d3217d20058cf09c5f53", install = FALSE)
# Immigrant data broken down by country of birth at the state level
acs_raw <- get_acs(
geography = "state",
table = "B05006",
year = 2020,
survey = "acs5"
)
# muslim-origin immigrant population per state
muslim_vars <- c(
"B05006_047", # Pakistan
"B05006_054", # Bangladesh
"B05006_031", # Egypt
"B05006_045", # Iran
"B05006_062", # Somalia
"B05006_051", # Iraq
"B05006_052", # Jordan
"B05006_057", # Syria
"B05006_035", # Morocco
"B05006_060" # Yemen
)
# Filter to Muslim-majority country variables
acs_muslim <- acs_raw %>%
filter(variable %in% muslim_vars) %>%
group_by(NAME) %>%
summarise(muslim_immigrants = sum(estimate, na.rm = TRUE)) %>%
rename(state = NAME)
head(acs_muslim)# A tibble: 6 × 2
state muslim_immigrants
<chr> <dbl>
1 Alabama 68517
2 Alaska 36995
3 Arizona 232576
4 Arkansas 38779
5 California 5079680
6 Colorado 169897
Join Data-sets
# Join mosque growth and immigration data by state name
# This is our combined data-set
combined <- arda_wide %>%
left_join(acs_muslim, by = "state") %>%
filter(!is.na(muslim_immigrants))
kable(head(combined, 10))| state | mosques_2000 | mosques_2010 | mosques_2020 | growth_pct | muslim_immigrants |
|---|---|---|---|---|---|
| Alabama | 20 | 31 | 37 | 85.00000 | 68517 |
| Alaska | 3 | 3 | 2 | -33.33333 | 36995 |
| Arizona | 12 | 29 | 35 | 191.66667 | 232576 |
| Arkansas | 6 | 13 | 12 | 100.00000 | 38779 |
| California | 163 | 246 | 308 | 88.95706 | 5079680 |
| Colorado | 12 | 17 | 23 | 91.66667 | 169897 |
| Connecticut | 20 | 36 | 49 | 145.00000 | 148907 |
| Delaware | 3 | 5 | 9 | 200.00000 | 35581 |
| Florida | 37 | 118 | 157 | 324.32432 | 542950 |
| Georgia | 39 | 69 | 99 | 153.84615 | 402051 |
Bar Chart - Top 15 States by mosque
# Visualize which states have the most mosques in 2020
# slice_max selects the top 15 rows by mosque count
combined %>%
slice_max(mosques_2020, n = 15) %>%
ggplot(aes(x = reorder(state, mosques_2020), y = mosques_2020, fill = mosques_2020)) +
geom_col() +
coord_flip() +
scale_fill_gradient(low = "#a8d5a2", high = "#2d6a4f") +
labs(title = "Top 15 States by Number of Mosques (2020)",
x = "State", y = "Number of Mosques") +
theme_minimal()Bar Chart - Top 15 States by Growth Rate
# Which states expereinced the fastest mosque growth from 2000-2020
# Visual Implementation
combined %>%
slice_max(growth_pct, n = 15) %>%
ggplot(aes(x = reorder(state, growth_pct), y = growth_pct, fill = growth_pct)) +
geom_col() +
coord_flip() +
scale_fill_gradient(low = "#a8d5a2", high = "#1b4332") +
labs(title = "Top 15 States by Mosque Growth Rate (2000–2020)",
x = "State", y = "Growth (%)") +
theme_minimal()Correlation
# Testing whether states with more muslim immigrants also saw fast mosque growth
cor.test(combined$muslim_immigrants, combined$growth_pct, method = "pearson")
Pearson's product-moment correlation
data: combined$muslim_immigrants and combined$growth_pct
t = -0.51051, df = 48, p-value = 0.612
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.3447819 0.2091390
sample estimates:
cor
-0.07348662
Linear Regression
# Simple Linear-Regression: mosque growth rate predicated by Muslim-orign imigrant
# Dependent Variable --> Growth Percentage
# Independent Variable --> Muslim_Immigrants
model <- lm(growth_pct ~ muslim_immigrants, data = combined)
summary(model)
Call:
lm(formula = growth_pct ~ muslim_immigrants, data = combined)
Residuals:
Min 1Q Median 3Q Max
-189.20 -68.61 -10.70 44.10 400.39
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.562e+02 1.651e+01 9.461 1.51e-12 ***
muslim_immigrants -1.037e-05 2.032e-05 -0.511 0.612
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 106.9 on 48 degrees of freedom
Multiple R-squared: 0.0054, Adjusted R-squared: -0.01532
F-statistic: 0.2606 on 1 and 48 DF, p-value: 0.612
Regression Plot
# Scatter plot of each state as a point
# Shows the direction and strength of the relationship
ggplot(combined, aes(x = muslim_immigrants, y = growth_pct)) +
geom_point(color = "#2d6a4f", size = 3, alpha = 0.7) +
geom_smooth(method = "lm", color = "#1b4332", se = TRUE) +
labs(title = "Muslim-Origin Immigrants vs. Mosque Growth Rate",
x = "Muslim-Origin Immigrant Population",
y = "Mosque Growth Rate 2000–2020 (%)") +
theme_minimal()Leaflet Map
# Implementing a new visualization tool using Leaflet, using shapefiles from tigris
# Joining combined data-set to the spatial data by state
options(tigris_use_cache = TRUE)
states_geo <- states(cb = TRUE, resolution = "20m") %>%
filter(!STUSPS %in% c("PR", "GU", "VI", "MP", "AS")) %>%
left_join(combined, by = c("NAME" = "state"))
# Color Green palatte to indiciate mosque growth percentage
pal <- colorNumeric(palette = "Greens",
domain = states_geo$growth_pct,
na.color = "#cccccc")
# Interactive Leaflet Map
#
leaflet(states_geo) %>%
setView(lng=-96, lat=37.8, zoom=4) %>%
addTiles() %>%
addPolygons(
fillColor = ~pal(growth_pct),
fillOpacity = 0.8,
color = "white",
weight = 1,
label = ~paste0(NAME, ": ", round(growth_pct, 1), "% growth"),
highlightOptions = highlightOptions(weight = 2, color = "#333",
bringToFront = TRUE)
) %>%
addLegend(pal = pal, values = ~growth_pct,
title = "Mosque Growth<br>2000–2020 (%)",
position = "bottomright",
labFormat = labelFormat(suffix = "%", digits=0))