DATA607-FinalProject

knitr::opts_chunk$set(echo=TRUE, warning=FALSE, message=FALSE)
library(tidyverse)
library(readxl)
library(tidycensus)
library(leaflet)
library(sf)
library(tigris)
library(knitr)

Load ARDA Data

url_2000 <- "https://github.com/AslamF/DATA607-FinalProject/raw/main/US%20Religion%20Census%202000.XLSX"
url_2010 <- "https://github.com/AslamF/DATA607-FinalProject/raw/main/US%20Religion%20Census%202010%20Membership%20Study.XLSX"
url_2020 <- "https://github.com/AslamF/DATA607-FinalProject/raw/main/US%20Religion%20Census%202020.xlsx"

read_github_xlsx <- function(url) {
  tmp <- tempfile(fileext = ".xlsx")
  download.file(url, tmp, mode = "wb", quiet = TRUE)
  read_excel(tmp)
}

arda_2000_raw <- read_github_xlsx(url_2000)
arda_2010_raw <- read_github_xlsx(url_2010)
arda_2020_raw <- read_github_xlsx(url_2020)

Clean and Extract Muslim Data

# Standarize Data from all 3 years into same structure 

arda_2000 <- arda_2000_raw %>%
  select(state = STATENAM,
         mosques = ISLAMCG,
         adherents = ISLAMAD) %>%
  mutate(year = 2000,
         mosques = as.numeric(mosques),
         adherents = as.numeric(adherents),
         state = as.character(state))


arda_2010 <- arda_2010_raw %>%
  select(state = STNAME,
         mosques = MSLMCNG,
         adherents = MSLMADH) %>%
  mutate(year = 2010,
         mosques = as.numeric(mosques),
         adherents = as.numeric(adherents),
         state = as.character(state))

arda_2020 <- arda_2020_raw %>%
  select(state = STATNAM,
         mosques = MSLMCNG_2020,
         adherents = MSLMADH_2020) %>%
  mutate(year = 2020,
         mosques = as.numeric(mosques),
         adherents = as.numeric(adherents),
         state = as.character(state))

# Stack into long format
# Filter out rows with no mosque data 
arda_long <- bind_rows(arda_2000, arda_2010, arda_2020) %>%
  filter(!is.na(mosques), mosques > 0)

head(arda_long)
# A tibble: 6 × 4
  state      mosques adherents  year
  <chr>        <dbl>     <dbl> <dbl>
1 Alabama         20      7670  2000
2 Alaska           3      1381  2000
3 Arizona         12     11857  2000
4 Arkansas         6      2044  2000
5 California     163    259762  2000
6 Colorado        12     14855  2000

Calculate Growth Rates

# Switch from long to wide format. Each row has a column for each year 
# Calculate growth percentage 2000 --> 2020
arda_wide <- arda_long %>%
  select(state, year, mosques) %>%
  pivot_wider(names_from = year, values_from = mosques,
              names_prefix = "mosques_") %>%
  mutate(growth_pct = ((mosques_2020 - mosques_2000) / mosques_2000) * 100) %>%
  filter(!is.na(growth_pct))

head(arda_wide)
# A tibble: 6 × 5
  state      mosques_2000 mosques_2010 mosques_2020 growth_pct
  <chr>             <dbl>        <dbl>        <dbl>      <dbl>
1 Alabama              20           31           37       85  
2 Alaska                3            3            2      -33.3
3 Arizona              12           29           35      192. 
4 Arkansas              6           13           12      100  
5 California          163          246          308       89.0
6 Colorado             12           17           23       91.7

Load ACS Data

census_api_key("e6afdcc55c31fd2b74a4d3217d20058cf09c5f53", install = FALSE)

# Immigrant data broken down by country of birth at the state level 
acs_raw <- get_acs(
  geography = "state",
  table = "B05006",
  year = 2020,
  survey = "acs5"
)

# muslim-origin immigrant population per state 
muslim_vars <- c(
  "B05006_047",  # Pakistan
  "B05006_054",  # Bangladesh
  "B05006_031",  # Egypt
  "B05006_045",  # Iran
  "B05006_062",  # Somalia
  "B05006_051",  # Iraq
  "B05006_052",  # Jordan
  "B05006_057",  # Syria
  "B05006_035",  # Morocco
  "B05006_060"   # Yemen
)

# Filter to Muslim-majority country variables 
acs_muslim <- acs_raw %>%
  filter(variable %in% muslim_vars) %>%
  group_by(NAME) %>%
  summarise(muslim_immigrants = sum(estimate, na.rm = TRUE)) %>%
  rename(state = NAME)

head(acs_muslim)
# A tibble: 6 × 2
  state      muslim_immigrants
  <chr>                  <dbl>
1 Alabama                68517
2 Alaska                 36995
3 Arizona               232576
4 Arkansas               38779
5 California           5079680
6 Colorado              169897

Join Data-sets

# Join mosque growth and immigration data by state name 
# This is our combined data-set 
combined <- arda_wide %>%
  left_join(acs_muslim, by = "state") %>%
  filter(!is.na(muslim_immigrants))

kable(head(combined, 10))
state mosques_2000 mosques_2010 mosques_2020 growth_pct muslim_immigrants
Alabama 20 31 37 85.00000 68517
Alaska 3 3 2 -33.33333 36995
Arizona 12 29 35 191.66667 232576
Arkansas 6 13 12 100.00000 38779
California 163 246 308 88.95706 5079680
Colorado 12 17 23 91.66667 169897
Connecticut 20 36 49 145.00000 148907
Delaware 3 5 9 200.00000 35581
Florida 37 118 157 324.32432 542950
Georgia 39 69 99 153.84615 402051

Bar Chart - Top 15 States by mosque

# Visualize which states have the most mosques in 2020
# slice_max selects the top 15 rows by mosque count 

combined %>%
  slice_max(mosques_2020, n = 15) %>%
  ggplot(aes(x = reorder(state, mosques_2020), y = mosques_2020, fill = mosques_2020)) +
  geom_col() +
  coord_flip() +
  scale_fill_gradient(low = "#a8d5a2", high = "#2d6a4f") +
  labs(title = "Top 15 States by Number of Mosques (2020)",
       x = "State", y = "Number of Mosques") +
  theme_minimal()

Bar Chart - Top 15 States by Growth Rate

# Which states expereinced the fastest mosque growth from 2000-2020
# Visual Implementation 
combined %>%
  slice_max(growth_pct, n = 15) %>%
  ggplot(aes(x = reorder(state, growth_pct), y = growth_pct, fill = growth_pct)) +
  geom_col() +
  coord_flip() +
  scale_fill_gradient(low = "#a8d5a2", high = "#1b4332") +
  labs(title = "Top 15 States by Mosque Growth Rate (2000–2020)",
       x = "State", y = "Growth (%)") +
  theme_minimal()

Correlation

# Testing whether states with more muslim immigrants also saw fast mosque growth 

cor.test(combined$muslim_immigrants, combined$growth_pct, method = "pearson")

    Pearson's product-moment correlation

data:  combined$muslim_immigrants and combined$growth_pct
t = -0.51051, df = 48, p-value = 0.612
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.3447819  0.2091390
sample estimates:
        cor 
-0.07348662 

Linear Regression

# Simple Linear-Regression: mosque growth rate predicated by Muslim-orign imigrant
# Dependent Variable --> Growth Percentage
# Independent Variable --> Muslim_Immigrants 

model <- lm(growth_pct ~ muslim_immigrants, data = combined)
summary(model)

Call:
lm(formula = growth_pct ~ muslim_immigrants, data = combined)

Residuals:
    Min      1Q  Median      3Q     Max 
-189.20  -68.61  -10.70   44.10  400.39 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)        1.562e+02  1.651e+01   9.461 1.51e-12 ***
muslim_immigrants -1.037e-05  2.032e-05  -0.511    0.612    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 106.9 on 48 degrees of freedom
Multiple R-squared:  0.0054,    Adjusted R-squared:  -0.01532 
F-statistic: 0.2606 on 1 and 48 DF,  p-value: 0.612

Regression Plot

# Scatter plot of each state as a point
# Shows the direction and strength of the relationship


ggplot(combined, aes(x = muslim_immigrants, y = growth_pct)) +
  geom_point(color = "#2d6a4f", size = 3, alpha = 0.7) +
  geom_smooth(method = "lm", color = "#1b4332", se = TRUE) +
  labs(title = "Muslim-Origin Immigrants vs. Mosque Growth Rate",
       x = "Muslim-Origin Immigrant Population",
       y = "Mosque Growth Rate 2000–2020 (%)") +
  theme_minimal()

Leaflet Map

# Implementing a new visualization tool using Leaflet, using shapefiles from tigris
# Joining combined data-set to the spatial data by state 
options(tigris_use_cache = TRUE)
states_geo <- states(cb = TRUE, resolution = "20m") %>%
  filter(!STUSPS %in% c("PR", "GU", "VI", "MP", "AS")) %>%
  left_join(combined, by = c("NAME" = "state"))


# Color Green palatte to indiciate mosque growth percentage 
pal <- colorNumeric(palette = "Greens", 
                    domain = states_geo$growth_pct, 
                    na.color = "#cccccc")

# Interactive Leaflet Map
# 
leaflet(states_geo) %>%
  setView(lng=-96, lat=37.8, zoom=4) %>%
  addTiles() %>%
  addPolygons(
    fillColor = ~pal(growth_pct),
    fillOpacity = 0.8,
    color = "white",
    weight = 1,
    label = ~paste0(NAME, ": ", round(growth_pct, 1), "% growth"),
    highlightOptions = highlightOptions(weight = 2, color = "#333", 
                                        bringToFront = TRUE)
  ) %>%
  addLegend(pal = pal, values = ~growth_pct,
            title = "Mosque Growth<br>2000–2020 (%)",
            position = "bottomright",
            labFormat = labelFormat(suffix = "%", digits=0))