# Creating the FBI data histogram with a mean line
fbi_histogram <- ggplot(hate_crimes %>%
#Remove missing or non-finite values to prevent plotting errors
filter(!is.na(hatecrimes_fbi) & is.finite(hatecrimes_fbi)),
aes(x = hatecrimes_fbi)) +
geom_histogram(fill = "skyblue", bins = 30, alpha = 1) +
# Add a vertical dashed line representing the national mean
# This helps visually assess states fall above or below average
geom_vline(aes(xintercept = mean(hatecrimes_fbi, na.rm = TRUE)), #
color = "red", linetype = "dashed", size = 1) +
# Label axes for interpretability in the final report
labs(x = "Average Annual Hate Crimes per 100k", y = "Frequency") +
# Apply a clean theme to improve readability and presentation quality
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Overlaying data on map of United States
# Load geographic boundary data for U.S. states
# This dataset contains the coordinates needed to draw each state polygon
states_map <- map_data("state")
# Prepare the hate crimes data for spatial visualization
hate_crimes_map_data <- hate_crimes %>%
# Convert state names to lowercase so they match the format
# used in the map_data dataset (necessary for a successful join)
mutate(state = tolower(state)) %>%
# Merge hate crime data with geographic coordinates
# inner_join keeps only states present in BOTH datasets,
# preventing blank or unmatched regions on the map
inner_join(states_map, by = c("state" = "region"))
# Create a choropleth map (color-coded map) of hate crime rates
hate_crimes_map <- ggplot() +
# Draw each state as a polygon and fill based on hate crime rate
geom_polygon(
data = hate_crimes_map_data,
aes(
x = long,
y = lat,
group = group, # Ensures each state boundary is drawn correctly
fill = hatecrimes_fbi
),
color = "white" # White borders improve state separation
) +
# Fix the aspect ratio so the U.S. map is not distorted
coord_fixed(1.3) +
# Use the viridis color scale:
# - perceptually uniform (differences reflect real data differences)
# - colorblind-friendly
# - prints well in grayscale
scale_fill_viridis(
option = "magma",
name = "Hate Crimes\nper 100k",
guide = guide_colorbar(direction = "vertical")
) +
# Remove axes, ticks, and background elements
# This keeps attention on the spatial pattern
theme_void() +
# Position legend and center the title for a cleaner layout
theme(
legend.position = "right",
plot.title = element_text(hjust = 0.5)
)
# Combine the histogram and map into a single figure
combined_plot <- fbi_histogram + hate_crimes_map +
# Arrange plots in two columns for easy comparison
plot_layout(ncol = 2) +
# Add a caption describing the time period
labs(caption = "Notes: Years 2010 through 2015") +
# Style the caption for readability
theme(plot.caption = element_text(hjust = 0, face = "italic"))
print(combined_plot)

# Reorder the columns so that the DV is first
hate_crimes_reordered <- hate_crimes %>%
select(hatecrimes_fbi, everything())
# Use datasummary_skim() on the reordered data
datasummary_skim(hate_crimes_reordered, type="numeric", fmt=2, histogram=FALSE)
## Warning: The `histogram` argument is deprecated. Use `fun_numeric` instead.
| |
Unique |
Missing Pct. |
Mean |
SD |
Min |
Median |
Max |
| hatecrimes_fbi |
51 |
2 |
2.37 |
1.71 |
0.27 |
1.99 |
10.95 |
| median_house_inc |
51 |
0 |
55223.61 |
9208.48 |
35521.00 |
54916.00 |
76165.00 |
| share_unemp_seas |
32 |
0 |
4.96 |
1.07 |
2.80 |
5.10 |
7.30 |
| share_pop_metro |
31 |
0 |
75.02 |
18.16 |
31.00 |
79.00 |
100.00 |
| share_pop_hs |
40 |
0 |
86.91 |
3.41 |
79.90 |
87.40 |
91.80 |
| share_non_citizen |
13 |
6 |
5.46 |
3.11 |
1.00 |
4.50 |
13.00 |
| share_white_poverty |
12 |
0 |
9.18 |
2.47 |
4.00 |
9.00 |
17.00 |
| gini_index |
39 |
0 |
45.38 |
2.09 |
41.90 |
45.40 |
53.20 |
| share_non_white |
34 |
0 |
31.57 |
16.49 |
6.00 |
28.00 |
81.00 |
| share_vote_trump |
33 |
0 |
49.00 |
11.87 |
4.00 |
49.00 |
70.00 |
| hate_crimes_per_100k_splc |
48 |
8 |
0.30 |
0.25 |
0.07 |
0.23 |
1.52 |
| avg_hatecrimes_per_100k_fbi |
51 |
2 |
2.37 |
1.71 |
0.27 |
1.99 |
10.95 |
| hatecrimes_splc |
48 |
8 |
0.30 |
0.25 |
0.07 |
0.23 |
1.52 |
var_table <- matrix(
c(
"median_house_inc", "Negative", "Higher income states are associated with lower hate crime rates.",
"share_unemp_seas", "Positive", "Higher unemployment is associated to higher hate crime rates.",
"share_pop_metro", "Negative", "Metropolitan areas will reflect fewer hate crimes",
"share_pop_hs", "Negative", "A higher share of the population with a high school diploma is associated with lower hate crime rates.",
"share_non_citizen", "Positive", "Areas with higher non-citizen populations are associated with more hate crimes.",
"share_white_poverty", "Positive", "A greater poor white population will be associated with more hate crimes.",
"gini_index", "Positive", "Higher income inequality is associated with higher hate crime rates.",
"share_non_white", "Positive", "Areas with diverse populations will experience more hate crimes.",
"share_vote_trump", "Positive", "States with higher support for Trump are associated with higher hate crime rates."
),
ncol = 3,
byrow = TRUE
)
# Convert the matrix to a data frame for kable
table_df <- as.data.frame(var_table)
# Create the table with knitr::kable and shade the first row
kable(table_df, "html", booktabs = T, col.names = c("Variable Name", "Hypothesized Direction", "Hypothesis Formulation")) %>%
kable_styling(latex_options = "striped", stripe_color = "gray!30") %>%
row_spec(0, background = "gray!30")
|
Variable Name
|
Hypothesized Direction
|
Hypothesis Formulation
|
|
median_house_inc
|
Negative
|
Higher income states are associated with lower hate crime rates.
|
|
share_unemp_seas
|
Positive
|
Higher unemployment is associated to higher hate crime rates.
|
|
share_pop_metro
|
Negative
|
Metropolitan areas will reflect fewer hate crimes
|
|
share_pop_hs
|
Negative
|
A higher share of the population with a high school diploma is
associated with lower hate crime rates.
|
|
share_non_citizen
|
Positive
|
Areas with higher non-citizen populations are associated with more hate
crimes.
|
|
share_white_poverty
|
Positive
|
A greater poor white population will be associated with more hate
crimes.
|
|
gini_index
|
Positive
|
Higher income inequality is associated with higher hate crime rates.
|
|
share_non_white
|
Positive
|
Areas with diverse populations will experience more hate crimes.
|
|
share_vote_trump
|
Positive
|
States with higher support for Trump are associated with higher hate
crime rates.
|
# Education
model_edu <- lm(hatecrimes_fbi ~ share_pop_hs, data = hate_crimes)
# Diversity
model_div <- lm(hatecrimes_fbi ~ share_non_white, data = hate_crimes)
model_div2 <- lm(hatecrimes_fbi ~ share_non_citizen, data = hate_crimes)
# Geographic Distribution
model_geo <- lm(hatecrimes_fbi ~ share_pop_metro, data = hate_crimes)
# Economic
model_eco <- lm(hatecrimes_fbi ~ median_house_inc, data = hate_crimes)
model_eco2 <- lm(hatecrimes_fbi ~ share_unemp_seas, data = hate_crimes)
model_eco3 <- lm(hatecrimes_fbi ~ share_white_poverty, data = hate_crimes)
# Income Inequality
model_ine <- lm(hatecrimes_fbi ~ gini_index, data = hate_crimes)
# Political Leanings
model_pol <- lm(hatecrimes_fbi ~ share_vote_trump, data = hate_crimes)
model_list <- list(
"Education" = model_edu,
"Diversity" = model_div,
"Geographic Distribution" = model_geo,
"Economic Indicators" = model_eco,
"Income Inequality" = model_ine,
"Political Leanings" = model_pol
)
modelsummary(model_list,
estimate = "{estimate}{stars}",
statistic = NULL,
gof_omit = "IC|Log|alg|pss|F|RMSE",
notes = "Notes: + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001",
output="huxtable")
| (Intercept) | -4.809 | 2.077*** | 0.897 | -0.956 | -13.334** | 6.026*** |
| share_pop_hs | 0.083 | | | | | |
| share_non_white | | 0.010 | | | | |
| share_pop_metro | | | 0.020 | | | |
| median_house_inc | | | | 0.000* | | |
| gini_index | | | | | 0.346** | |
| share_vote_trump | | | | | | -0.074*** |
| Num.Obs. | 50 | 50 | 50 | 50 | 50 | 50 |
| R2 | 0.027 | 0.007 | 0.044 | 0.101 | 0.177 | 0.255 |
| R2 Adj. | 0.007 | -0.014 | 0.024 | 0.083 | 0.160 | 0.239 |
| Notes: + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 |
selected_vars <- hate_crimes %>%
select(hatecrimes_fbi,
gini_index,
median_house_inc,
share_unemp_seas,
share_pop_metro,
share_pop_hs,
share_non_white,
share_non_citizen,
share_white_poverty,
share_vote_trump)
cor_matrix <- cor(selected_vars, use = "complete.obs")
corrplot(cor_matrix, method = "circle", diag = FALSE, type = "lower",
order = "hclust", # Orders variables based on hierarchical clustering, or change it to "original" to keep your custom order
tl.srt = 45, # Set text label angle for the top labels
tl.offset = 0.5, # Increase offset to move text labels closer to the grid
tl.cex = 0.6, # Font size of text labels
tl.col = "black", # Label color
number.cex = 0.8, # Adjust size of numbers inside circles
addCoef.col = "black", # Color of the correlation coefficients
cl.pos = 'n', # Removes legend
is.corr = TRUE) # Ensure it interprets values as correlations for formatting

model_fbi <- lm(hatecrimes_fbi ~ median_house_inc +
share_unemp_seas +
share_pop_metro +
share_pop_hs +
share_non_citizen +
share_white_poverty +
gini_index +
share_non_white +
share_vote_trump,
data = hate_crimes)
modelsummary(list("avg annual per 100,000" = model_fbi),
estimate = "{estimate}{stars}",
statistic = NULL,
gof_omit = "IC|Log|alg|pss|F|RMSE",
notes = "Notes: + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001",
output="huxtable")
| (Intercept) | -42.347* |
| median_house_inc | 0.000 |
| share_unemp_seas | 0.285 |
| share_pop_metro | -0.026 |
| share_pop_hs | 0.212 |
| share_non_citizen | 0.198 |
| share_white_poverty | 0.063 |
| gini_index | 0.570** |
| share_non_white | -0.037 |
| share_vote_trump | -0.022 |
| Num.Obs. | 47 |
| R2 | 0.498 |
| R2 Adj. | 0.376 |
| Notes: + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 |
# ----------------------------------------
# Step 1: Define model labels
# 'Multivariate' refers to the full model including all independent variables.
# 'Bivariate' refers to models with a single predictor (one DV vs. one IV).
# This vector is for labeling purposes in the plot legend.
model_names <- c("Multivariate", rep("Bivariate", 9))
# ----------------------------------------
# Step 2: Define colors for the plot
# Green for the multivariate model, pink for bivariate models
colors <- c("#1B9E77", rep("#E7298A", 9))
# ----------------------------------------
# Step 3: Extract coefficients and confidence intervals
# 'tidy()' from the broom package converts model objects to a tidy data frame
tidy_fbi <- tidy(model_fbi, conf.int = TRUE) %>% mutate(Model = "Multivariate")
tidy_edu <- tidy(model_edu, conf.int = TRUE) %>% mutate(Model = "Bivariate")
tidy_div <- tidy(model_div, conf.int = TRUE) %>% mutate(Model = "Bivariate")
tidy_div2 <- tidy(model_div2, conf.int = TRUE) %>% mutate(Model = "Bivariate")
tidy_geo <- tidy(model_geo, conf.int = TRUE) %>% mutate(Model = "Bivariate")
tidy_eco <- tidy(model_eco, conf.int = TRUE) %>% mutate(Model = "Bivariate")
tidy_eco2 <- tidy(model_eco2, conf.int = TRUE) %>% mutate(Model = "Bivariate")
tidy_eco3 <- tidy(model_eco3, conf.int = TRUE) %>% mutate(Model = "Bivariate")
tidy_ine <- tidy(model_ine, conf.int = TRUE) %>% mutate(Model = "Bivariate")
tidy_pol <- tidy(model_pol, conf.int = TRUE) %>% mutate(Model = "Bivariate")
# ----------------------------------------
# Step 4: Combine all tidied models into a single dataframe
all_models_df <- bind_rows(
tidy_fbi, tidy_edu, tidy_div, tidy_div2, tidy_geo,
tidy_eco, tidy_eco2, tidy_eco3, tidy_ine, tidy_pol
) %>%
filter(term != "(Intercept)") %>% # Remove intercepts from plot
mutate(
adjustment = if_else(Model == "Multivariate", 0, -0.02 * estimate),
estimate_adj = estimate + adjustment, # Slightly offset bivariate estimates for visibility
ymin = conf.low + adjustment, # Adjust lower bound
ymax = conf.high + adjustment # Adjust upper bound
)
# ----------------------------------------
# Step 5: Plot coefficients with confidence intervals
ggplot(all_models_df, aes(x = term, y = estimate_adj, color = Model)) +
geom_point(position = position_dodge(width = 0.5)) + # dodge points to avoid overlap
geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.2, position = position_dodge(width = 0.5)) +
geom_hline(yintercept = 0, linetype = "dashed", color = "black", size = 0.5) + # Reference line at 0
scale_color_manual(values = c("Multivariate" = "#1B9E77", "Bivariate" = "#E7298A")) +
theme_minimal() +
theme(legend.position = "right") +
labs(
x = "Variable",
y = "Coefficient Estimate",
color = "Model Type",
title = "Comparison of Bivariate vs. Multivariate Coefficient Estimates"
) +
coord_flip() # Flip coordinates so variables appear on the y-axis for readability

# ----------------------------------------
# Annotations:
# - Green dots = coefficients from the full multivariate model
# - Pink dots = coefficients from each bivariate model
# - Error bars = 95% confidence intervals
# - Dashed horizontal line = no effect (0)
# - Positive estimate: variable is associated with higher hate crime rate
# - Negative estimate: variable is associated with lower hate crime rate
# Multivariate regression model using SPLC data
model_splc <- lm(hatecrimes_splc ~ median_house_inc +
share_unemp_seas +
share_pop_metro +
share_pop_hs +
share_non_citizen +
share_white_poverty +
gini_index +
share_non_white +
share_vote_trump,
data = hate_crimes)
modelsummary(list("FBI\n(avg annual per 100,000)" = model_fbi, "SPLC\n(hate incidents per 100,000 in 10 day period )"= model_splc),
estimate = "{estimate}{stars}",
statistic = NULL,
gof_omit = "IC|Log|alg|pss|F|RMSE",
notes = "Notes: + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001",
output="huxtable")
| (Intercept) | -42.347* | -4.305 |
| median_house_inc | 0.000 | -0.000 |
| share_unemp_seas | 0.285 | 0.049 |
| share_pop_metro | -0.026 | -0.003 |
| share_pop_hs | 0.212 | 0.036+ |
| share_non_citizen | 0.198 | 0.009 |
| share_white_poverty | 0.063 | 0.005 |
| gini_index | 0.570** | 0.050* |
| share_non_white | -0.037 | -0.003 |
| share_vote_trump | -0.022 | -0.013* |
| Num.Obs. | 47 | 45 |
| R2 | 0.498 | 0.596 |
| R2 Adj. | 0.376 | 0.492 |
| Notes: + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 |
splc_map_data <- hate_crimes %>%
mutate(state = tolower(state)) %>%
inner_join(states_map, by = c("state" = "region"))
splc_map <- ggplot() +
geom_polygon(
data = splc_map_data,
aes(
x = long,
y = lat,
group = group,
fill = hatecrimes_splc
),
color = "white"
) +
coord_fixed(1.3) +
scale_fill_viridis(option = "plasma",
name = "SPLC Hate Incidents\nper 100k (10 days)",
guide = guide_colorbar(direction = "vertical")) +
theme_void() +
theme(
legend.position = "right",
plot.title = element_text(hjust = 0.5)
) +
labs(title = "SPLC-Reported Hate Incidents Across States")
print(splc_map)
