R Notebook

data <- read.csv("hatecrimes_data.csv")

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

variables <- data %>% select(hate_crimes_per_100k_splc, inequality, median_household_income, share_non_white)

Summary Table

mean1 <- variables %>% summarise(across(everything(), mean, na.rm=TRUE))

## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(everything(), mean, na.rm = TRUE)`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
## 
##   # Previously
##   across(a:b, mean, na.rm = TRUE)
## 
##   # Now
##   across(a:b, \(x) mean(x, na.rm = TRUE))

sd1 <- variables %>% summarise(across(everything(), sd, na.rm=TRUE))
min1 <- variables %>% summarise(across(everything(), min, na.rm=TRUE))
max1 <- variables %>% summarise(across(everything(), max, na.rm=TRUE))

#put together into a table using rbind()
table <- rbind(mean1, sd1, min1, max1)

#Usually, we want the variables to be in the rows and the summary measures to be in the columns - let's flip them around
rownames(table) <- c("Mean", "Standard Deviation", "Minimum", "Maximum")

#Transpose the dataframe to flip it around
table <- t(table)

#remove scientific notation
options(scipen = 999)

#Tell R to round the data to make it look nicer
table <- table %>% as.data.frame %>% mutate_if(is.numeric, round, digits=2)

#show the table using kableExtra
library(kableExtra)

## 
## Attaching package: 'kableExtra'

## The following object is masked from 'package:dplyr':
## 
##     group_rows

table %>%
  kbl() %>%
  kable_classic_2("striped", full_width = F)

	Mean	Standard Deviation	Minimum	Maximum
hate_crimes_per_100k_splc	0.30	0.25	0.07	1.52
inequality	0.45	0.02	0.42	0.53
median_household_income	55223.61	9208.48	35521.00	76165.00
share_non_white	0.32	0.16	0.06	0.81

Histograms/Plots

#Histogram 1
library(ggplot2)
ggplot(data, aes(x = hate_crimes_per_100k_splc))+
  geom_histogram(bins = 10, fill = "cornflowerblue", color = "gray")+
  labs(title = "Hate Crimes per 100k People")+
  theme_minimal()

## Warning: Removed 4 rows containing non-finite values (`stat_bin()`).

ggplot(data, aes(x = inequality))+
  geom_histogram(bins = 10, fill = "tomato", color = "gray")+
  labs(title = "Inequality")+
  theme_minimal()

#plot 3
library(ggthemes)
ggplot(data)+
  geom_point(aes(y = hate_crimes_per_100k_splc, x = inequality, color = state))+
  geom_smooth(aes(y = hate_crimes_per_100k_splc, x = inequality), method = "lm")+
  labs(title = "Hate Crimes vs Inequality", y = "Hate Crimes per 100k", x = "Inequality")+
  theme_minimal()+
  guides(color = F)+
  theme(plot.title = element_text(hjust = 0.5))

## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 4 rows containing non-finite values (`stat_smooth()`).

## Warning: Removed 4 rows containing missing values (`geom_point()`).

Make a correlation plot

library(corrplot)

## corrplot 0.92 loaded

cor_data <- cor(variables, use = "complete.obs")

corrplot(cor_data)

#click on gray button with arrow to show in new window - you can resize the plot from there to make it more visible

The regression model

summary(lm(formula = hate_crimes_per_100k_splc ~ inequality + median_household_income + share_non_white, data = variables))

## 
## Call:
## lm(formula = hate_crimes_per_100k_splc ~ inequality + median_household_income + 
##     share_non_white, data = variables)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.35476 -0.10791 -0.05605  0.07765  0.69690 
## 
## Coefficients:
##                             Estimate   Std. Error t value Pr(>|t|)    
## (Intercept)             -3.183440886  0.874954059  -3.638  0.00073 ***
## inequality               6.547087807  1.895658282   3.454  0.00125 ** 
## median_household_income  0.000011722  0.000003503   3.347  0.00171 ** 
## share_non_white         -0.435826999  0.258514525  -1.686  0.09906 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2163 on 43 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.315,  Adjusted R-squared:  0.2672 
## F-statistic:  6.59 on 3 and 43 DF,  p-value: 0.0009177