DATA607-FinalProject

knitr::opts_chunk$set(echo=TRUE, warning=FALSE, message=FALSE)
library(tidyverse)
library(readxl)
library(tidycensus)
library(leaflet)
library(sf)
library(tigris)
library(knitr)

Introduction

Muslim Communities in the United States have experienced significant demographic growth over the past 2 decades, driven by immigration. This exploratory analysis seeks to quantify the growth by examining mosque count and growth from 2000 to 2020 across the United States. Two data-sets are being used, one from ARDA exploring mosque growth and the second is U.S. Census Data providing estimates of immigrants from Muslim-majority countries. Our primary question to answer is: Do states with larger muslim-origin immigrant populations experience faster mosque growth rate?

Load ARDA Data

url_2000 <- "https://github.com/AslamF/DATA607-FinalProject/raw/main/US%20Religion%20Census%202000.XLSX"
url_2010 <- "https://github.com/AslamF/DATA607-FinalProject/raw/main/US%20Religion%20Census%202010%20Membership%20Study.XLSX"
url_2020 <- "https://github.com/AslamF/DATA607-FinalProject/raw/main/US%20Religion%20Census%202020.xlsx"

read_github_xlsx <- function(url) {
  tmp <- tempfile(fileext = ".xlsx")
  download.file(url, tmp, mode = "wb", quiet = TRUE)
  read_excel(tmp)
}

arda_2000_raw <- read_github_xlsx(url_2000)
arda_2010_raw <- read_github_xlsx(url_2010)
arda_2020_raw <- read_github_xlsx(url_2020)

Clean and Extract Muslim Data

# Standarize Data from all 3 years into same structure 

arda_2000 <- arda_2000_raw %>%
  select(state = STATENAM,
         mosques = ISLAMCG,
         adherents = ISLAMAD) %>%
  mutate(year = 2000,
         mosques = as.numeric(mosques),
         adherents = as.numeric(adherents),
         state = as.character(state))


arda_2010 <- arda_2010_raw %>%
  select(state = STNAME,
         mosques = MSLMCNG,
         adherents = MSLMADH) %>%
  mutate(year = 2010,
         mosques = as.numeric(mosques),
         adherents = as.numeric(adherents),
         state = as.character(state))

arda_2020 <- arda_2020_raw %>%
  select(state = STATNAM,
         mosques = MSLMCNG_2020,
         adherents = MSLMADH_2020) %>%
  mutate(year = 2020,
         mosques = as.numeric(mosques),
         adherents = as.numeric(adherents),
         state = as.character(state))

# Stack into long format
# Filter out rows with no mosque data 
arda_long <- bind_rows(arda_2000, arda_2010, arda_2020) %>%
  filter(!is.na(mosques), mosques > 0)

head(arda_long)
# A tibble: 6 × 4
  state      mosques adherents  year
  <chr>        <dbl>     <dbl> <dbl>
1 Alabama         20      7670  2000
2 Alaska           3      1381  2000
3 Arizona         12     11857  2000
4 Arkansas         6      2044  2000
5 California     163    259762  2000
6 Colorado        12     14855  2000

Calculate Growth Rates

# Switch from long to wide format. Each row has a column for each year 
# Calculate growth percentage 2000 --> 2020
arda_wide <- arda_long %>%
  select(state, year, mosques) %>%
  pivot_wider(names_from = year, values_from = mosques,
              names_prefix = "mosques_") %>%
  mutate(growth_pct = ((mosques_2020 - mosques_2000) / mosques_2000) * 100) %>%
  filter(!is.na(growth_pct))

head(arda_wide)
# A tibble: 6 × 5
  state      mosques_2000 mosques_2010 mosques_2020 growth_pct
  <chr>             <dbl>        <dbl>        <dbl>      <dbl>
1 Alabama              20           31           37       85  
2 Alaska                3            3            2      -33.3
3 Arizona              12           29           35      192. 
4 Arkansas              6           13           12      100  
5 California          163          246          308       89.0
6 Colorado             12           17           23       91.7

Load ACS Data

acs_muslim <- read_csv("https://raw.githubusercontent.com/AslamF/DATA607-FinalProject/main/acs_muslim_backup.csv")


head(acs_muslim)
# A tibble: 6 × 2
  state      muslim_immigrants
  <chr>                  <dbl>
1 Alabama                68517
2 Alaska                 36995
3 Arizona               232576
4 Arkansas               38779
5 California           5079680
6 Colorado              169897

Join Data-sets

# Join mosque growth and immigration data by state name 
# This is our combined data-set 
combined <- arda_wide %>%
  left_join(acs_muslim, by = "state") %>%
  filter(!is.na(muslim_immigrants))

kable(head(combined, 10))
state mosques_2000 mosques_2010 mosques_2020 growth_pct muslim_immigrants
Alabama 20 31 37 85.00000 68517
Alaska 3 3 2 -33.33333 36995
Arizona 12 29 35 191.66667 232576
Arkansas 6 13 12 100.00000 38779
California 163 246 308 88.95706 5079680
Colorado 12 17 23 91.66667 169897
Connecticut 20 36 49 145.00000 148907
Delaware 3 5 9 200.00000 35581
Florida 37 118 157 324.32432 542950
Georgia 39 69 99 153.84615 402051

Bar Chart - Top 15 States by mosque

# Visualize which states have the most mosques in 2020
# slice_max selects the top 15 rows by mosque count 

combined %>%
  slice_max(mosques_2020, n = 15) %>%
  ggplot(aes(x = reorder(state, mosques_2020), y = mosques_2020, fill = mosques_2020)) +
  geom_col() +
  coord_flip() +
  scale_fill_gradient(low = "#a8d5a2", high = "#2d6a4f") +
  labs(title = "Top 15 States by Number of Mosques (2020)",
       x = "State", y = "Number of Mosques") +
  theme_minimal()

Bar Chart - Top 15 States by Growth Rate

# Which states expereinced the fastest mosque growth from 2000-2020
# Visual Implementation 
combined %>%
  slice_max(growth_pct, n = 15) %>%
  ggplot(aes(x = reorder(state, growth_pct), y = growth_pct, fill = growth_pct)) +
  geom_col() +
  coord_flip() +
  scale_fill_gradient(low = "#a8d5a2", high = "#1b4332") +
  labs(title = "Top 15 States by Mosque Growth Rate (2000–2020)",
       x = "State", y = "Growth (%)") +
  theme_minimal()

Correlation

# Testing whether states with more muslim immigrants also saw fast mosque growth 

cor.test(combined$muslim_immigrants, combined$growth_pct, method = "pearson")

    Pearson's product-moment correlation

data:  combined$muslim_immigrants and combined$growth_pct
t = -0.51051, df = 48, p-value = 0.612
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.3447819  0.2091390
sample estimates:
        cor 
-0.07348662 

Linear Regression

# Simple Linear-Regression: mosque growth rate predicated by Muslim-orign imigrant
# Dependent Variable --> Growth Percentage
# Independent Variable --> Muslim_Immigrants 

model <- lm(growth_pct ~ muslim_immigrants, data = combined)
summary(model)

Call:
lm(formula = growth_pct ~ muslim_immigrants, data = combined)

Residuals:
    Min      1Q  Median      3Q     Max 
-189.20  -68.61  -10.70   44.10  400.39 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)        1.562e+02  1.651e+01   9.461 1.51e-12 ***
muslim_immigrants -1.037e-05  2.032e-05  -0.511    0.612    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 106.9 on 48 degrees of freedom
Multiple R-squared:  0.0054,    Adjusted R-squared:  -0.01532 
F-statistic: 0.2606 on 1 and 48 DF,  p-value: 0.612

Regression Plot

# Scatter plot of each state as a point
# Shows the direction and strength of the relationship


ggplot(combined, aes(x = muslim_immigrants, y = growth_pct)) +
  geom_point(color = "#2d6a4f", size = 3, alpha = 0.7) +
  geom_smooth(method = "lm", color = "#1b4332", se = TRUE) +
  labs(title = "Muslim-Origin Immigrants vs. Mosque Growth Rate",
       x = "Muslim-Origin Immigrant Population",
       y = "Mosque Growth Rate 2000–2020 (%)") +
  theme_minimal()

Leaflet Map

# Implementing a new visualization tool using Leaflet, using shapefiles from tigris
# Joining combined data-set to the spatial data by state 
options(tigris_use_cache = TRUE)
states_geo <- states(cb = TRUE, resolution = "20m") %>%
  filter(!STUSPS %in% c("PR", "GU", "VI", "MP", "AS")) %>%
  left_join(combined, by = c("NAME" = "state"))


# Color Green palatte to indiciate mosque growth percentage 
pal <- colorNumeric(palette = "Greens", 
                    domain = states_geo$growth_pct, 
                    na.color = "#cccccc")

# Interactive Leaflet Map
# 
leaflet(states_geo) %>%
  setView(lng=-96, lat=37.8, zoom=4) %>%
  addTiles() %>%
  addPolygons(
    fillColor = ~pal(growth_pct),
    fillOpacity = 0.8,
    color = "white",
    weight = 1,
    label = ~paste0(NAME, ": ", round(growth_pct, 1), "% growth"),
    highlightOptions = highlightOptions(weight = 2, color = "#333", 
                                        bringToFront = TRUE)
  ) %>%
  addLegend(pal = pal, values = ~growth_pct,
            title = "Mosque Growth<br>2000–2020 (%)",
            position = "bottomright",
            labFormat = labelFormat(suffix = "%", digits=0))

Conclusion

The data shows that mosque count increased in nearly every State between 2000-2020. We can see the largest and most established muslim populations in California, New York and Texas. Muslim Communities are expanding into new geographic areas represented b the large percentage increases in smaller states. Immigration from Muslim-Majority countries dont not play a meaningful role in driving growth rates. We found no statistically significant relationship for the regression analysis. Immigration is not the primary driver of where mosques are growing fastest. (P value is larger than 0.05)