MReyes_Lab3

# Creating the FBI data histogram with a mean line
fbi_histogram <- ggplot(hate_crimes %>% 
         #Remove missing or non-finite values to prevent plotting errors
                               filter(!is.na(hatecrimes_fbi) & is.finite(hatecrimes_fbi)),
                               aes(x = hatecrimes_fbi)) +
  geom_histogram(fill = "skyblue", bins = 30, alpha = 1) +
  # Add a vertical dashed line representing the national mean
  # This helps visually assess states fall above or below average
  geom_vline(aes(xintercept = mean(hatecrimes_fbi, na.rm = TRUE)), #
             color = "red", linetype = "dashed", size = 1) +
  # Label axes for interpretability in the final report
  labs(x = "Average Annual Hate Crimes per 100k", y = "Frequency") +
 # Apply a clean theme to improve readability and presentation quality
  theme_minimal()

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Overlaying data on map of United States
# Load geographic boundary data for U.S. states
# This dataset contains the coordinates needed to draw each state polygon
states_map <- map_data("state")

# Prepare the hate crimes data for spatial visualization
hate_crimes_map_data <- hate_crimes %>%
  
  # Convert state names to lowercase so they match the format
  # used in the map_data dataset (necessary for a successful join)
  mutate(state = tolower(state)) %>%
  
  # Merge hate crime data with geographic coordinates
  # inner_join keeps only states present in BOTH datasets,
  # preventing blank or unmatched regions on the map
  inner_join(states_map, by = c("state" = "region"))

# Create a choropleth map (color-coded map) of hate crime rates
hate_crimes_map <- ggplot() +
  
  # Draw each state as a polygon and fill based on hate crime rate
  geom_polygon(
    data = hate_crimes_map_data,
    aes(
      x = long, 
      y = lat, 
      group = group,     # Ensures each state boundary is drawn correctly
      fill = hatecrimes_fbi
    ),
    color = "white"      # White borders improve state separation
  ) +
  
  # Fix the aspect ratio so the U.S. map is not distorted
  coord_fixed(1.3) +
  
  # Use the viridis color scale:
  # - perceptually uniform (differences reflect real data differences)
  # - colorblind-friendly
  # - prints well in grayscale
  scale_fill_viridis(
    option = "magma",
    name = "Hate Crimes\nper 100k",
    guide = guide_colorbar(direction = "vertical")
  ) +
  
  # Remove axes, ticks, and background elements
  # This keeps attention on the spatial pattern
  theme_void() +
  
  # Position legend and center the title for a cleaner layout
  theme(
    legend.position = "right",
    plot.title = element_text(hjust = 0.5)
  )

# Combine the histogram and map into a single figure
combined_plot <- fbi_histogram + hate_crimes_map +
  
  # Arrange plots in two columns for easy comparison
  plot_layout(ncol = 2) +
  
  # Add a caption describing the time period
  labs(caption = "Notes: Years 2010 through 2015") +
  
  # Style the caption for readability
  theme(plot.caption = element_text(hjust = 0, face = "italic"))

print(combined_plot)

# Reorder the columns so that the DV is first
hate_crimes_reordered <- hate_crimes %>% 
  select(hatecrimes_fbi, everything())

# Use datasummary_skim() on the reordered data
datasummary_skim(hate_crimes_reordered, type="numeric", fmt=2, histogram=FALSE)

## Warning: The `histogram` argument is deprecated. Use `fun_numeric` instead.

	Unique	Missing Pct.	Mean	SD	Min	Median	Max
hatecrimes_fbi	51	2	2.37	1.71	0.27	1.99	10.95
median_house_inc	51	0	55223.61	9208.48	35521.00	54916.00	76165.00
share_unemp_seas	32	0	4.96	1.07	2.80	5.10	7.30
share_pop_metro	31	0	75.02	18.16	31.00	79.00	100.00
share_pop_hs	40	0	86.91	3.41	79.90	87.40	91.80
share_non_citizen	13	6	5.46	3.11	1.00	4.50	13.00
share_white_poverty	12	0	9.18	2.47	4.00	9.00	17.00
gini_index	39	0	45.38	2.09	41.90	45.40	53.20
share_non_white	34	0	31.57	16.49	6.00	28.00	81.00
share_vote_trump	33	0	49.00	11.87	4.00	49.00	70.00
hate_crimes_per_100k_splc	48	8	0.30	0.25	0.07	0.23	1.52
avg_hatecrimes_per_100k_fbi	51	2	2.37	1.71	0.27	1.99	10.95
hatecrimes_splc	48	8	0.30	0.25	0.07	0.23	1.52

var_table <- matrix(
    c(  
      "median_house_inc",    "Negative", "Higher income states are associated with lower hate crime rates.",
      "share_unemp_seas",    "Positive", "Higher unemployment is associated to higher hate crime rates.",
      "share_pop_metro",     "Negative", "Metropolitan areas will reflect fewer hate crimes",
      "share_pop_hs",        "Negative", "A higher share of the population with a high school diploma is associated with lower hate crime rates.",
      "share_non_citizen",   "Positive", "Areas with higher non-citizen populations are associated with more hate crimes.",
      "share_white_poverty", "Positive", "A greater poor white population will be associated with more hate crimes.",
      "gini_index",          "Positive", "Higher income inequality is associated with higher hate crime rates.",
      "share_non_white",     "Positive", "Areas with diverse populations will experience more hate crimes.",
      "share_vote_trump",    "Positive", "States with higher support for Trump are associated with higher hate crime rates."
    ), 
    ncol = 3, 
    byrow = TRUE
  )

# Convert the matrix to a data frame for kable
table_df <- as.data.frame(var_table)

# Create the table with knitr::kable and shade the first row
kable(table_df, "html", booktabs = T, col.names = c("Variable Name", "Hypothesized Direction", "Hypothesis Formulation")) %>%
  kable_styling(latex_options = "striped", stripe_color = "gray!30") %>%
  row_spec(0, background = "gray!30")

Variable Name	Hypothesized Direction	Hypothesis Formulation
median_house_inc	Negative	Higher income states are associated with lower hate crime rates.
share_unemp_seas	Positive	Higher unemployment is associated to higher hate crime rates.
share_pop_metro	Negative	Metropolitan areas will reflect fewer hate crimes
share_pop_hs	Negative	A higher share of the population with a high school diploma is associated with lower hate crime rates.
share_non_citizen	Positive	Areas with higher non-citizen populations are associated with more hate crimes.
share_white_poverty	Positive	A greater poor white population will be associated with more hate crimes.
gini_index	Positive	Higher income inequality is associated with higher hate crime rates.
share_non_white	Positive	Areas with diverse populations will experience more hate crimes.
share_vote_trump	Positive	States with higher support for Trump are associated with higher hate crime rates.

# Education 
model_edu <- lm(hatecrimes_fbi ~ share_pop_hs, data = hate_crimes)

# Diversity
model_div <- lm(hatecrimes_fbi ~ share_non_white, data = hate_crimes)
model_div2 <- lm(hatecrimes_fbi ~ share_non_citizen, data = hate_crimes)

# Geographic Distribution 
model_geo <- lm(hatecrimes_fbi ~ share_pop_metro, data = hate_crimes)

# Economic 
model_eco <- lm(hatecrimes_fbi ~ median_house_inc, data = hate_crimes)
model_eco2 <- lm(hatecrimes_fbi ~ share_unemp_seas, data = hate_crimes)
model_eco3 <- lm(hatecrimes_fbi ~ share_white_poverty, data = hate_crimes)

# Income Inequality 
model_ine <- lm(hatecrimes_fbi ~ gini_index, data = hate_crimes)

# Political Leanings 
model_pol <- lm(hatecrimes_fbi ~ share_vote_trump, data = hate_crimes)

model_list <- list(
  "Education" = model_edu,
  "Diversity" = model_div,
  "Geographic Distribution" = model_geo,
  "Economic Indicators" = model_eco,
  "Income Inequality" = model_ine,
  "Political Leanings" = model_pol
)

modelsummary(model_list,
             estimate = "{estimate}{stars}",
             statistic = NULL,
             gof_omit = "IC|Log|alg|pss|F|RMSE",
             notes = "Notes: + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001",
             output="huxtable")

	Education	Diversity	Geographic Distribution	Economic Indicators	Income Inequality	Political Leanings
(Intercept)	-4.809	2.077***	0.897	-0.956	-13.334**	6.026***
share_pop_hs	0.083
share_non_white		0.010
share_pop_metro			0.020
median_house_inc				0.000*
gini_index					0.346**
share_vote_trump						-0.074***
Num.Obs.	50	50	50	50	50	50
R2	0.027	0.007	0.044	0.101	0.177	0.255
R2 Adj.	0.007	-0.014	0.024	0.083	0.160	0.239
Notes: + p < 0.1, * p < 0.05, p < 0.01, * p < 0.001

selected_vars <- hate_crimes %>% 
  select(hatecrimes_fbi,
         gini_index, 
         median_house_inc, 
         share_unemp_seas, 
         share_pop_metro, 
         share_pop_hs, 
         share_non_white, 
         share_non_citizen, 
         share_white_poverty,
         share_vote_trump)

cor_matrix <- cor(selected_vars, use = "complete.obs")

corrplot(cor_matrix, method = "circle", diag = FALSE, type = "lower",
                       order = "hclust",      # Orders variables based on hierarchical clustering, or change it to "original" to keep your custom order 
                       tl.srt = 45,           # Set text label angle for the top labels
                       tl.offset = 0.5,       # Increase offset to move text labels closer to the grid
                       tl.cex = 0.6,          # Font size of text labels
                       tl.col = "black",      # Label color
                       number.cex = 0.8,      # Adjust size of numbers inside circles
                       addCoef.col = "black", # Color of the correlation coefficients
                       cl.pos = 'n',          # Removes legend
                       is.corr = TRUE)        # Ensure it interprets values as correlations for formatting

model_fbi <- lm(hatecrimes_fbi ~ median_house_inc + 
                                             share_unemp_seas + 
                                             share_pop_metro + 
                                             share_pop_hs + 
                                             share_non_citizen + 
                                             share_white_poverty + 
                                             gini_index + 
                                             share_non_white + 
                                             share_vote_trump, 
                data = hate_crimes)

modelsummary(list("avg annual per 100,000" = model_fbi),
             estimate = "{estimate}{stars}",
             statistic = NULL,
             gof_omit = "IC|Log|alg|pss|F|RMSE",
             notes = "Notes: + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001",
             output="huxtable")

	avg annual per 100,000
(Intercept)	-42.347*
median_house_inc	0.000
share_unemp_seas	0.285
share_pop_metro	-0.026
share_pop_hs	0.212
share_non_citizen	0.198
share_white_poverty	0.063
gini_index	0.570**
share_non_white	-0.037
share_vote_trump	-0.022
Num.Obs.	47
R2	0.498
R2 Adj.	0.376
Notes: + p < 0.1, * p < 0.05, p < 0.01, * p < 0.001

# ----------------------------------------
# Step 1: Define model labels
# 'Multivariate' refers to the full model including all independent variables.
# 'Bivariate' refers to models with a single predictor (one DV vs. one IV).
# This vector is for labeling purposes in the plot legend.
model_names <- c("Multivariate", rep("Bivariate", 9))

# ----------------------------------------
# Step 2: Define colors for the plot
# Green for the multivariate model, pink for bivariate models
colors <- c("#1B9E77", rep("#E7298A", 9))

# ----------------------------------------
# Step 3: Extract coefficients and confidence intervals
# 'tidy()' from the broom package converts model objects to a tidy data frame
tidy_fbi <- tidy(model_fbi, conf.int = TRUE) %>% mutate(Model = "Multivariate")
tidy_edu <- tidy(model_edu, conf.int = TRUE) %>% mutate(Model = "Bivariate")
tidy_div <- tidy(model_div, conf.int = TRUE) %>% mutate(Model = "Bivariate")
tidy_div2 <- tidy(model_div2, conf.int = TRUE) %>% mutate(Model = "Bivariate")
tidy_geo <- tidy(model_geo, conf.int = TRUE) %>% mutate(Model = "Bivariate")
tidy_eco <- tidy(model_eco, conf.int = TRUE) %>% mutate(Model = "Bivariate")
tidy_eco2 <- tidy(model_eco2, conf.int = TRUE) %>% mutate(Model = "Bivariate")
tidy_eco3 <- tidy(model_eco3, conf.int = TRUE) %>% mutate(Model = "Bivariate")
tidy_ine <- tidy(model_ine, conf.int = TRUE) %>% mutate(Model = "Bivariate")
tidy_pol <- tidy(model_pol, conf.int = TRUE) %>% mutate(Model = "Bivariate")

# ----------------------------------------
# Step 4: Combine all tidied models into a single dataframe
all_models_df <- bind_rows(
  tidy_fbi, tidy_edu, tidy_div, tidy_div2, tidy_geo, 
  tidy_eco, tidy_eco2, tidy_eco3, tidy_ine, tidy_pol
) %>%
  filter(term != "(Intercept)") %>%  # Remove intercepts from plot
  mutate(
    adjustment = if_else(Model == "Multivariate", 0, -0.02 * estimate),
    estimate_adj = estimate + adjustment,  # Slightly offset bivariate estimates for visibility
    ymin = conf.low + adjustment,          # Adjust lower bound
    ymax = conf.high + adjustment          # Adjust upper bound
  )

# ----------------------------------------
# Step 5: Plot coefficients with confidence intervals
ggplot(all_models_df, aes(x = term, y = estimate_adj, color = Model)) +
  geom_point(position = position_dodge(width = 0.5)) +  # dodge points to avoid overlap
  geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.2, position = position_dodge(width = 0.5)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black", size = 0.5) +  # Reference line at 0
  scale_color_manual(values = c("Multivariate" = "#1B9E77", "Bivariate" = "#E7298A")) +
  theme_minimal() +
  theme(legend.position = "right") +
  labs(
    x = "Variable", 
    y = "Coefficient Estimate", 
    color = "Model Type",
    title = "Comparison of Bivariate vs. Multivariate Coefficient Estimates"
  ) +
  coord_flip()  # Flip coordinates so variables appear on the y-axis for readability

# ----------------------------------------
# Annotations:
# - Green dots = coefficients from the full multivariate model
# - Pink dots = coefficients from each bivariate model
# - Error bars = 95% confidence intervals
# - Dashed horizontal line = no effect (0)
# - Positive estimate: variable is associated with higher hate crime rate
# - Negative estimate: variable is associated with lower hate crime rate

# Multivariate regression model using SPLC data
model_splc <- lm(hatecrimes_splc ~ median_house_inc + 
                                         share_unemp_seas + 
                                         share_pop_metro + 
                                         share_pop_hs + 
                                         share_non_citizen + 
                                         share_white_poverty + 
                                         gini_index + 
                                         share_non_white + 
                                         share_vote_trump, 
                 data = hate_crimes)


modelsummary(list("FBI\n(avg annual per 100,000)" = model_fbi, "SPLC\n(hate incidents per 100,000 in 10 day period )"= model_splc),
             estimate = "{estimate}{stars}",
             statistic = NULL,
             gof_omit = "IC|Log|alg|pss|F|RMSE",
             notes = "Notes: + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001",
             output="huxtable")

	FBI (avg annual per 100,000)	SPLC (hate incidents per 100,000 in 10 day period )
(Intercept)	-42.347*	-4.305
median_house_inc	0.000	-0.000
share_unemp_seas	0.285	0.049
share_pop_metro	-0.026	-0.003
share_pop_hs	0.212	0.036+
share_non_citizen	0.198	0.009
share_white_poverty	0.063	0.005
gini_index	0.570**	0.050*
share_non_white	-0.037	-0.003
share_vote_trump	-0.022	-0.013*
Num.Obs.	47	45
R2	0.498	0.596
R2 Adj.	0.376	0.492
Notes: + p < 0.1, * p < 0.05, p < 0.01, * p < 0.001

splc_map_data <- hate_crimes %>%
  mutate(state = tolower(state)) %>%
  inner_join(states_map, by = c("state" = "region"))

splc_map <- ggplot() +
  geom_polygon(
    data = splc_map_data,
    aes(
      x = long,
      y = lat,
      group = group,
      fill = hatecrimes_splc
    ),
    color = "white"
  ) +
  coord_fixed(1.3) +
  scale_fill_viridis(option = "plasma",
                     name = "SPLC Hate Incidents\nper 100k (10 days)",
                     guide = guide_colorbar(direction = "vertical")) +
  theme_void() +
  theme(
    legend.position = "right",
    plot.title = element_text(hjust = 0.5)
  ) +
  labs(title = "SPLC-Reported Hate Incidents Across States")

print(splc_map)

MReyes_Lab3

Matthew Reyes

2026-02-17