rm(list=ls())
gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 592055 31.7 1356874 72.5 NA 669422 35.8
## Vcells 1114928 8.6 8388608 64.0 16384 1851672 14.2
directory <- "/Users/ruthiemaurer/Desktop/DATA 712"
setwd(directory)
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(haven)
set.seed(123123)
DATA <- read_xlsx("hate_crime copy 2.xlsx")
print(DATA)
## # A tibble: 253,776 × 9
## data_year pug_agency_name state_abbr state_name incident_date offender_race
## <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 1991 Pine Bluff AR Arkansas 1991-07-04 Black or Afr…
## 2 1991 Pine Bluff AR Arkansas 1991-12-24 Black or Afr…
## 3 1991 North Little Rock AR Arkansas 1991-07-10 Black or Afr…
## 4 1991 North Little Rock AR Arkansas 1991-10-06 Black or Afr…
## 5 1991 Sevier AR Arkansas 1991-10-14 White
## 6 1991 Rogers AR Arkansas 1991-08-31 White
## 7 1991 Hope AR Arkansas 1991-09-19 Black or Afr…
## 8 1991 Pine Bluff AR Arkansas 1991-12-23 Black or Afr…
## 9 1991 Pine Bluff AR Arkansas 1991-07-27 Black or Afr…
## 10 1991 Little Rock AR Arkansas 1991-11-14 Black or Afr…
## # ℹ 253,766 more rows
## # ℹ 3 more variables: offender_ethnicity <chr>, offense_name <chr>,
## # bias_desc <chr>
colnames(DATA) <- c("Year", "Pug Agency Name", "State", "State Name", "Incident Date", "Offender Race", "Offender Ethnicity", "Offense", "Reason/Bias")
colnames(DATA)
## [1] "Year" "Pug Agency Name" "State"
## [4] "State Name" "Incident Date" "Offender Race"
## [7] "Offender Ethnicity" "Offense" "Reason/Bias"
print(DATA)
## # A tibble: 253,776 × 9
## Year `Pug Agency Name` State `State Name` `Incident Date` `Offender Race`
## <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 1991 Pine Bluff AR Arkansas 1991-07-04 Black or African …
## 2 1991 Pine Bluff AR Arkansas 1991-12-24 Black or African …
## 3 1991 North Little Rock AR Arkansas 1991-07-10 Black or African …
## 4 1991 North Little Rock AR Arkansas 1991-10-06 Black or African …
## 5 1991 Sevier AR Arkansas 1991-10-14 White
## 6 1991 Rogers AR Arkansas 1991-08-31 White
## 7 1991 Hope AR Arkansas 1991-09-19 Black or African …
## 8 1991 Pine Bluff AR Arkansas 1991-12-23 Black or African …
## 9 1991 Pine Bluff AR Arkansas 1991-07-27 Black or African …
## 10 1991 Little Rock AR Arkansas 1991-11-14 Black or African …
## # ℹ 253,766 more rows
## # ℹ 3 more variables: `Offender Ethnicity` <chr>, Offense <chr>,
## # `Reason/Bias` <chr>
library(nnet) # For multinomial logistic regression
## Warning: package 'nnet' was built under R version 4.3.3
violent_crimes <- c("Aggravated Assault", "Murder and Nonnegligent Manslaughter", "Rape", "Robbery")
DATA <- DATA %>%
mutate(violent_crime = ifelse(grepl(paste(violent_crimes, collapse = "|"), Offense), 1, 0),
`Offender Race` = as.factor(`Offender Race`),
`Offender Ethnicity` = as.factor(`Offender Ethnicity`))
# Model 1: Only Year
model_1 <- glm(violent_crime ~ Year, family = binomial(link = "logit"), data = DATA)
# Model 2: Add Offender Race
model_2 <- glm(violent_crime ~ Year + `Offender Race`, family = binomial(link = "logit"), data = DATA)
# Model 3: Add Offender Ethnicity
model_3 <- glm(violent_crime ~ Year + `Offender Race` + `Offender Ethnicity`, family = binomial(link = "logit"), data = DATA)
# Compare models using AIC and BIC
model_comparison <- data.frame(
Model = c("Model 1", "Model 2", "Model 3"),
AIC = c(AIC(model_1), AIC(model_2), AIC(model_3)),
BIC = c(BIC(model_1), BIC(model_2), BIC(model_3)),
LogLikelihood = c(logLik(model_1), logLik(model_2), logLik(model_3))
)
print(model_comparison)
## Model AIC BIC LogLikelihood
## 1 Model 1 199453.1 199473.9 -99724.53
## 2 Model 2 179915.4 180009.4 -89948.68
## 3 Model 3 179412.7 179548.5 -89693.34
library(texreg)
## Warning: package 'texreg' was built under R version 4.3.3
## Version: 1.39.4
## Date: 2024-07-23
## Author: Philip Leifeld (University of Manchester)
##
## Consider submitting praise using the praise or praise_interactive functions.
## Please cite the JSS article in your publications -- see citation("texreg").
##
## Attaching package: 'texreg'
## The following object is masked from 'package:tidyr':
##
## extract
htmlreg(list(model_1, model_2, model_3), doctype = FALSE)
## <table class="texreg" style="margin: 10px auto;border-collapse: collapse;border-spacing: 0px;caption-side: bottom;color: #000000;border-top: 2px solid #000000;">
## <caption>Statistical models</caption>
## <thead>
## <tr>
## <th style="padding-left: 5px;padding-right: 5px;"> </th>
## <th style="padding-left: 5px;padding-right: 5px;">Model 1</th>
## <th style="padding-left: 5px;padding-right: 5px;">Model 2</th>
## <th style="padding-left: 5px;padding-right: 5px;">Model 3</th>
## </tr>
## </thead>
## <tbody>
## <tr style="border-top: 1px solid #000000;">
## <td style="padding-left: 5px;padding-right: 5px;">(Intercept)</td>
## <td style="padding-left: 5px;padding-right: 5px;">7.70<sup>***</sup></td>
## <td style="padding-left: 5px;padding-right: 5px;">14.55<sup>***</sup></td>
## <td style="padding-left: 5px;padding-right: 5px;">26.93<sup>***</sup></td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;">(1.20)</td>
## <td style="padding-left: 5px;padding-right: 5px;">(1.26)</td>
## <td style="padding-left: 5px;padding-right: 5px;">(1.68)</td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;">Year</td>
## <td style="padding-left: 5px;padding-right: 5px;">-0.00<sup>***</sup></td>
## <td style="padding-left: 5px;padding-right: 5px;">-0.01<sup>***</sup></td>
## <td style="padding-left: 5px;padding-right: 5px;">-0.01<sup>***</sup></td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;">(0.00)</td>
## <td style="padding-left: 5px;padding-right: 5px;">(0.00)</td>
## <td style="padding-left: 5px;padding-right: 5px;">(0.00)</td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;">`Offender Race`Asian</td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;">-0.12</td>
## <td style="padding-left: 5px;padding-right: 5px;">-0.12</td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;">(0.08)</td>
## <td style="padding-left: 5px;padding-right: 5px;">(0.08)</td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;">`Offender Race`Black or African American</td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;">0.27<sup>***</sup></td>
## <td style="padding-left: 5px;padding-right: 5px;">0.27<sup>***</sup></td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;">(0.06)</td>
## <td style="padding-left: 5px;padding-right: 5px;">(0.06)</td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;">`Offender Race`Multiple</td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;">0.34<sup>***</sup></td>
## <td style="padding-left: 5px;padding-right: 5px;">0.25<sup>***</sup></td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;">(0.07)</td>
## <td style="padding-left: 5px;padding-right: 5px;">(0.07)</td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;">`Offender Race`Native Hawaiian or Other Pacific Islander</td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;">-0.18</td>
## <td style="padding-left: 5px;padding-right: 5px;">-0.20</td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;">(0.21)</td>
## <td style="padding-left: 5px;padding-right: 5px;">(0.21)</td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;">`Offender Race`Not Specified</td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;">-1.64<sup>***</sup></td>
## <td style="padding-left: 5px;padding-right: 5px;">-1.53<sup>***</sup></td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;">(0.08)</td>
## <td style="padding-left: 5px;padding-right: 5px;">(0.08)</td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;">`Offender Race`Unknown</td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;">-2.04<sup>***</sup></td>
## <td style="padding-left: 5px;padding-right: 5px;">-2.05<sup>***</sup></td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;">(0.07)</td>
## <td style="padding-left: 5px;padding-right: 5px;">(0.07)</td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;">`Offender Race`White</td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;">-0.23<sup>***</sup></td>
## <td style="padding-left: 5px;padding-right: 5px;">-0.25<sup>***</sup></td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;">(0.06)</td>
## <td style="padding-left: 5px;padding-right: 5px;">(0.06)</td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;">`Offender Ethnicity`Multiple</td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;">-0.48<sup>***</sup></td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;">(0.08)</td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;">`Offender Ethnicity`Not Hispanic or Latino</td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;">-0.83<sup>***</sup></td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;">(0.04)</td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;">`Offender Ethnicity`Not Specified</td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;">-0.93<sup>***</sup></td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;">(0.04)</td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;">`Offender Ethnicity`Unknown</td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;">-0.81<sup>***</sup></td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;"> </td>
## <td style="padding-left: 5px;padding-right: 5px;">(0.04)</td>
## </tr>
## <tr style="border-top: 1px solid #000000;">
## <td style="padding-left: 5px;padding-right: 5px;">AIC</td>
## <td style="padding-left: 5px;padding-right: 5px;">199453.06</td>
## <td style="padding-left: 5px;padding-right: 5px;">179915.36</td>
## <td style="padding-left: 5px;padding-right: 5px;">179412.68</td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;">BIC</td>
## <td style="padding-left: 5px;padding-right: 5px;">199473.95</td>
## <td style="padding-left: 5px;padding-right: 5px;">180009.36</td>
## <td style="padding-left: 5px;padding-right: 5px;">179548.45</td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;">Log Likelihood</td>
## <td style="padding-left: 5px;padding-right: 5px;">-99724.53</td>
## <td style="padding-left: 5px;padding-right: 5px;">-89948.68</td>
## <td style="padding-left: 5px;padding-right: 5px;">-89693.34</td>
## </tr>
## <tr>
## <td style="padding-left: 5px;padding-right: 5px;">Deviance</td>
## <td style="padding-left: 5px;padding-right: 5px;">199449.06</td>
## <td style="padding-left: 5px;padding-right: 5px;">179897.36</td>
## <td style="padding-left: 5px;padding-right: 5px;">179386.68</td>
## </tr>
## <tr style="border-bottom: 2px solid #000000;">
## <td style="padding-left: 5px;padding-right: 5px;">Num. obs.</td>
## <td style="padding-left: 5px;padding-right: 5px;">253776</td>
## <td style="padding-left: 5px;padding-right: 5px;">253776</td>
## <td style="padding-left: 5px;padding-right: 5px;">253776</td>
## </tr>
## </tbody>
## <tfoot>
## <tr>
## <td style="font-size: 0.8em;" colspan="4"><sup>***</sup>p < 0.001; <sup>**</sup>p < 0.01; <sup>*</sup>p < 0.05</td>
## </tr>
## </tfoot>
## </table>
print(htmlreg(list(model_1, model_2, model_3), doctype = FALSE), file = "regression_table_hate.html")
anova(model_1, model_2, test = "Chisq")
## Analysis of Deviance Table
##
## Model 1: violent_crime ~ Year
## Model 2: violent_crime ~ Year + `Offender Race`
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 253774 199449
## 2 253767 179897 7 19552 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(model_2, model_3, test = "Chisq")
## Analysis of Deviance Table
##
## Model 1: violent_crime ~ Year + `Offender Race`
## Model 2: violent_crime ~ Year + `Offender Race` + `Offender Ethnicity`
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 253767 179897
## 2 253763 179387 4 510.69 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
In this analysis, using the FBI Hate
Crime Dataset, I examined factors influencing whether a hate crime
was violent or non-violent by using a series of logistic regression
models. The dependent variable, violent_crime, was coded as
1 for violent crimes (aggravated assault, murder and nonnegligent
manslaughter, rape, and robbery) and 0 for non-violent crimes. Each new
model had additional variables to better see the impact of additional
predictors on model fit and predictive accuracy.
The first model (model_1) included only the year of the
incident as an independent variable. The second model
(model_2) incorporated the offender’s race as a categorical
predictor. The third model (model_3) further expanded on
Model 2 by including the offender’s ethnicity. The Akaike Information
Criterion (AIC) and Bayesian Information Criterion (BIC) were used to
evaluate model fit, with lower values indicating a better fit.
The results showed Model 1 had an AIC of 199453.06 and a BIC of 199473.95, serving as the baseline model. Introducing offender race in Model 2 reduced the AIC to 179915.36 and BIC to 180009.36, suggesting an improvement in model fit. The likelihood ratio test comparing Model 1 and Model 2 yielded a Chi-square statistic of 19552 with a p-value of < 2.2e-16, indicating that adding offender race significantly enhances the predictive power of the model.
Further refining the model, Model 3, which included offender ethnicity in addition to race, resulted in an AIC of 179412.68 and a BIC of 179548.45. The likelihood ratio test between Model 2 and Model 3 produced a Chi-square statistic of 510.69 with a p-value of < 2.2e-16, demonstrating whether offender ethnicity significantly contributes to predicting violent hate crimes.
Like said, the AIC and BIC values decreased from Model 1 to Model 2, indicating that including offender race significantly improved the model. However, the reduction in AIC and BIC between Model 2 and Model 3 was minimal, suggesting that adding offender ethnicity did not substantially improve the model.
Additionally, the likelihood ratio test (LRT) compared these models: The LRT for Model 1 vs. Model 2 produced a Chi-square statistic of 19552 and a p-value of < 2.2e-16, confirming that adding offender race significantly improves the model. The LRT for Model 2 vs. Model 3 resulted in a Chi-square statistic of 510.69 and a p-value of < 2.2e-16. If this p-value is greater than 0.05, it suggests that adding offender ethnicity does not significantly enhance the model beyond what offender race already provides.
Since Model 2 achieves a substantial reduction in AIC and BIC compared to Model 1, and Model 3 does not provide a significant improvement over Model 2, Model 2 seems like the best choice. Overall, this analysis suggests that offender race is a significant factor in determining whether a hate crime is violent or non-violent, while offender ethnicity may have a less substantial impact.