library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(sf)
## Warning: package 'sf' was built under R version 4.3.3
## Linking to GEOS 3.11.2, GDAL 3.8.2, PROJ 9.3.1; sf_use_s2() is TRUE
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(tmap)
## Warning: package 'tmap' was built under R version 4.3.3
## Breaking News: tmap 3.x is retiring. Please test v4, e.g. with
## remotes::install_github('r-tmap/tmap')
# Read data
race <- read.csv("FE_raw.csv")
# We are interested in the data in Georgia
GeorgiaData <- race %>% filter(State == "GA")
# We try plotting the location of the civilian deaths
# to check for any clusters
GeorgiaData_sf <- st_as_sf(GeorgiaData, coords = c("Longitude", "Latitude"), crs = 4326)
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(GeorgiaData_sf) +
tm_dots(col = "red", size = 0.01) +
tm_layout(title = "Location of Deaths")
# There are two data points with wrong coordinates.
# We'll manually change their coordinates to
# the coordinates of the county they belong to
GeorgiaData_sf[GeorgiaData_sf$Unique.ID == 30666, ] <- st_set_geometry(GeorgiaData_sf[GeorgiaData_sf$Unique.ID == 30666, ], st_sfc(st_point(c(-85.042955, 34.845934)), crs = 4326))
GeorgiaData_sf[GeorgiaData_sf$Unique.ID == 30384, ] <- st_set_geometry(GeorgiaData_sf[GeorgiaData_sf$Unique.ID == 30384, ], st_sfc(st_point(c(-83.3789, 34.9027)), crs = 4326))
tm_shape(GeorgiaData_sf) +
tm_dots(col = "red", size = 0.01) +
tm_layout(title = "Location of Deaths")
# We see more deaths near metro Atlanta, which makes sense
# since more people live here.
# Next, we compare whether a certain race has more deaths.
# People believe in general that black people
# are more likely to get killed by police. Let's check with data
# First, drop all NA data and simplify race data
GeorgiaRace <- GeorgiaData_sf %>%
filter(Race != "Race unspecified") %>%
mutate(Race = case_when(
Race %in% c("European-American/White" , "European-American/European-American/White") ~ "White",
Race %in% c("African-American/Black", "African-American/Black African-American/Black Not imputed") ~ "Black",
Race == "Hispanic/Latino" ~ "Latino",
Race == "Asian/Pacific Islander" ~ "Asian"))
tm_shape(GeorgiaRace) +
tm_dots(col = "Race", size = 0.01) +
tm_layout(title = "Location of Deaths")
# We see some patterns in Georgia, particularly around Atlanta.
# More white people are killed in North Atlanta,
# whereas more black people are killed in South Atlanta.
# This might simply be caused by more white people
# living in North Atlanta and more black people
# living in South Atlanta.
# Let's add age and see what happens to the data
ggplot(GeorgiaRace, aes(x = Race, y = as.numeric(Age), color = Race)) +
geom_jitter(width = 0.2, size = 0.5, height = 0, alpha = 1) +
labs(title = "Death by Race", x = "Race", y = "Age") +
scale_y_continuous(breaks = seq(0, 100, by = 10)) +
theme_minimal()
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Now we see some stats similar to common beliefs.
# More young, black people are killed when compared to
# Asian, Latino, and white people.
# The age range is also greater,
# with people around 0 or over 90 for black people.
# Finally, we compare whether male or female are more likely to get killed
# First, drop all NA data from the original data
GeorgiaGender <- GeorgiaRace %>% filter(!is.na(Gender))
# Turns out there's no NA data for Gender
# Next, we plot them
tm_shape(GeorgiaGender) +
tm_dots(col = "Gender", size = 0.01) +
tm_layout(title = "Location of Deaths based on Gender")
# Turns out we have transgender people.
# Let's get rid of these people, and assess the data of men and women.
GeorgiaGender <- GeorgiaGender %>% filter(Gender != "Transgender")
ggplot(GeorgiaGender, aes(x = Gender)) +
geom_bar(fill = "lightblue") +
coord_flip() +
labs(title = "Death by Gender and Race", x = "Gender", y = "Total Deaths") +
facet_wrap(~ Race) +
theme_minimal()

# Looks like men are much more likely to get killed
# than women, and white and black men in particular.
# Let's sample a few causes of deaths
# to see if we can see anything interesting.
GeorgiaWB <- GeorgiaGender %>% filter(Gender == "Male") %>% filter(Race == "White" | Race == "Black")
print(sample(GeorgiaWB$Brief.description, size = 10, replace = F))
## [1] "D'Wayne Burke, a former cop, allegedly broke into the evidence room of his old station at 2am. A night shift police officer saw what appeared to be a burglary in progress and called two deputies to help clear the building. The officer and one of the deputies encountered Burke in a file room, masked and armed with an ax. Burke advanced on the officer who shot and killed Burke."
## [2] "Gavin Williams was shot and killed by deputies in central Georgia after he allegedly attacked an elderly couple, killing one person, and after a standoff brandished a knife, according to police."
## [3] "Barnett died 21 days after being shot by two off-duty local police more than 12 times in the parking lot of a nightclub. His family admitted that he'd been armed, but the gun hadn't been fired, and he was only trying to defend himself in a confusing group fight."
## [4] "Beckworth was being sought by the Chatham county Sheriff's Fugitive Squad for failure to appear in court on a charge of Aggravated Battery, Violation of Parole and felony probation violations. When deputies arrived at his residence, he fled from the front yard on foot and then turned and aimed a handgun at the deputy pursuing him. The deputy fired one round at Beckworth but missed. Beckworth then killed himself."
## [5] "Shot during a manhunt while wanted on charges of killing his ex-wife and a male companion on Trauger's boat and burning the boat while it was docked near St. Mary's. Friends of the couple said Trauger had been abusive and was angry he had lost his custom-built boat to her. Source: Police, media reports"
## [6] "Ashburn police attempted to stop the driver of a car that was believed to have been stolen. The woman driving the car instead fled north on Interstate 75 with the officers in pursuit. Police said as the chase continued, it was joined by officers with the Turner County Sheriff's Office, Crisp County Sheriff's Office and the Georgia State Patrol. After a male passenger inside her vehicle fatally shot himself, the driver stopped the car. The Crisp County Sheriff's Office said that before shooting himself, the passenger, Gerry, had fired at least three shots at law enforcement."
## [7] "Glynn County Police Lt. Robert \"Cory\" Sasser fatally shot his estranged wife, Katie Lovett Sasser, 34, and her friend Johnny Edward Hall Jr., 39, before killing himself with a gunshot to the chest in the driveway of his Glynn County home as police closed in, police said."
## [8] "Darren Billy Wilson, 47, was shot after deputies responded to a call on Mansfield Road in White, according to the Georgia Bureau of Investigation. When the two deputies arrived, Wilson reportedly charged one of the deputies while wielding a large stick and yelling threats. The second deputy shot Wilson, killing him. Neither deputy was injured."
## [9] "Three officers responded to a report of a burglary in progress. According to police, when the officers arrived they called for the man to come out of the house. They then went inside, and he rushed toward them before Rome police Sgt. Carla Pearson shot and killed him."
## [10] "Zachary Lumpkin, 25, was driving a red Ford Ranger, fleeing from a Whitfield County Sheriff's car with blue lights and siren activated about 7:45 p.m. Also in the vehicle was Shadow Stanley, 20. Deputy Christopher Hicks hit the truck's right rear with the left front of his vehicle spinning the truck clockwise until it went airborne, with the driver's side of the vehicle striking a utility pole and then overturning, killing Lumpkin and Stanley."
# We see a lot of crime-related stuff, but we also see
# a lot of "reportedly" and "allegedly" and people are innocent until
# proven guilty, so they should be given the benefit of the doubt.
# Anyway, we can conclude that white and black men are probably
# more likely to get killed by police, some due to crime, and some due to other reasons.