This dataset was sourced from Kaggle, a repository of public data, and spans from 2015 to 2022. The goal of this analysis is to investigate which geographic areas, neighborhoods, and demographic groups have been most affected by police-related gun violence. Additionally, I aim to examine whether there are differences in the number of incidents involving males versus females, whether the incidence of shootings has increased over time, and whether the demographic profile of those affected has shifted between 2015 and 2022
library(tidyverse)
library(tidyr)
library(leaflet)
library(sf)
library(tigris)
library(lubridate)
library(scales)
library(ggplot2)
library(dplyr)
library(highcharter)
setwd("C:/Users/eyong/Downloads/police shooting")
police <- read_csv("US Police shootings in from 2015-22.csv")
data(police)
police_clean <- police %>%
# Convert date to proper format and extract year
mutate(
date = as.Date(date),
year = year(date),
manner_of_death = factor(manner_of_death, levels = c("shot", "shot and Tasered")),
signs_of_mental_illness = as.logical(signs_of_mental_illness),
armed = factor(armed)
) %>%
# Select relevant columns
select(date, year, manner_of_death, city, state, flee, threat_level, body_camera,
signs_of_mental_illness, armed, longitude, latitude, age, race, gender) %>%
# Remove any rows with NA in critical columns
filter(
!is.na(longitude), !is.na(latitude), !is.na(manner_of_death), !is.na(armed),
!is.na(age), !is.na(race), !is.na(signs_of_mental_illness), !is.na(gender),
!is.na(flee), !is.na(threat_level)
)
Incidents by year I want to examin how many police shooting where recored for years 2015 to 2022 ## creating a count for each year
yearly_incidents <- police_clean %>%
group_by(year) %>%
summarise(count = n())
highchart() %>%
hc_chart(type = "line") %>%
hc_title(text = "Police Shooting Incidents by Year (2015-2022)") %>%
hc_xAxis(categories = yearly_incidents$year) %>%
hc_yAxis(title = list(text = "Number of Incidents")) %>%
hc_series(
list(
name = "Incidents",
data = yearly_incidents$count,
color = "blue",
marker = list(symbol = "circle", radius = 5, lineColor = "red", lineWidth = 2)
)
) %>%
hc_tooltip(
headerFormat = '<b>{point.key}</b><br>',
pointFormat = '{series.name}: {point.y}'
)
we can see police shootings incidents where very high in the year 2015 which was 913 if we consider the data is accurate and there are no missing values and its at an all time low in 2022 in the united state
ggplot(police_clean, aes(x = age)) +
geom_histogram(binwidth = 5, fill = "red", color = "black") +
theme_minimal() +
labs(title = "Age Distribution of Shooting Victims",
x = "Age",
y = "Count")
manner_death_yearly <- police_clean %>%
group_by(year, manner_of_death) %>%
summarise(count = n(), .groups = "drop")
ggplot(manner_death_yearly,
aes(x = year, y = count, fill = manner_of_death)) +
geom_bar(stat = "identity", position = "stack") +
theme_minimal() +
scale_fill_brewer(palette = "Set2") +
labs(title = "Manner of Death Distribution by Year",
x = "Year",
y = "Count",
fill = "Manner of Death")
## Exsplanation it looks like alot of people got shot and tased but most
people got shot I dont really know why they will tased after that I cant
really exsplain
The bar graph shows the ages who got shoot the most and as we can see a very big majority are in thier 20s and 30ths may be because they are aggrevise at that age I wouldnt not really know about that now ## do they have a mental Illness
mental_illness_yearly <- police_clean %>%
group_by(year, signs_of_mental_illness) %>%
summarise(count = n(), .groups = "drop") %>%
mutate(percentage = count / sum(count) * 100)
ggplot(mental_illness_yearly,
aes(x = year, y = count, fill = signs_of_mental_illness)) +
geom_bar(stat = "identity", position = "dodge") +
theme_minimal() +
scale_fill_manual(values = c("FALSE" = "lightblue", "TRUE" = "darkred")) +
labs(title = "Mental Illness Status in Police Shootings by Year",
x = "Year",
y = "Count",
fill = "Signs of Mental Illness")
## Exsplantion from what I you will see most atimes the people who get
shot don’t have any mental illness
armed_status <- police_clean %>%
group_by(armed) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
head(10)
ggplot(armed_status, aes(x = reorder(armed, count), y = count)) +
geom_bar(stat = "identity", fill = "steelblue") +
coord_flip() +
theme_minimal() +
labs(title = "Distribution of Armed Status",
x = "Armed Status",
y = "Count")
## Exsplanation it show that yes most people where usually armed which a
gun before been shot so we may say the police may have shot they out of
fear for their life
state_incidents <- police_clean %>%
group_by(state) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
head(10)
ggplot(state_incidents, aes(x = reorder(state, count), y = count)) +
geom_bar(stat = "identity", fill = "darkblue") +
coord_flip() +
theme_minimal() +
labs(title = "Top 10 States by Number of Police Shooting Incidents",
x = "State",
y = "Number of Incidents")
## Explantion it shows that more people got shot in carlifonia and texas
more than any other state this could maybe because of thier gun carring
policy
us_shoot <- police_clean %>%
filter(manner_of_death == "shot and Tasered") %>%
mutate(armed = as.numeric(armed),
estimated_shots = round(armed * 10),
measure = factor(manner_of_death),
age_group = case_when(
age <= 18 ~ "Under 18",
age > 18 & age <= 35 ~ "18-35",
age > 35 & age <= 50 ~ "36-50",
age > 50 ~ "51+",
TRUE ~ "Unknown"
))
examine <- police_clean |>
select(date, year, manner_of_death, city, state, flee, threat_level, body_camera,
signs_of_mental_illness, armed, longitude, latitude, age, race, gender)
head(examine)
## # A tibble: 6 × 15
## date year manner_of_death city state flee threat_level body_camera
## <date> <dbl> <fct> <chr> <chr> <chr> <chr> <lgl>
## 1 2015-01-02 2015 shot Shelton WA Not … attack FALSE
## 2 2015-01-02 2015 shot Aloha OR Not … attack FALSE
## 3 2015-01-03 2015 shot and Tasered Wichita KS Not … other FALSE
## 4 2015-01-04 2015 shot San Fr… CA Not … attack FALSE
## 5 2015-01-04 2015 shot Evans CO Not … attack FALSE
## 6 2015-01-04 2015 shot Guthrie OK Not … attack FALSE
## # ℹ 7 more variables: signs_of_mental_illness <lgl>, armed <fct>,
## # longitude <dbl>, latitude <dbl>, age <dbl>, race <chr>, gender <chr>
tx <- examine |>
filter(state == "TX")
head(tx)
## # A tibble: 6 × 15
## date year manner_of_death city state flee threat_level body_camera
## <date> <dbl> <fct> <chr> <chr> <chr> <chr> <lgl>
## 1 2015-01-07 2015 shot Freeport TX Not … attack FALSE
## 2 2015-01-09 2015 shot El Paso TX Not … attack FALSE
## 3 2015-01-13 2015 shot Jourdan… TX Not … other FALSE
## 4 2015-01-14 2015 shot Lake Ja… TX Not … attack FALSE
## 5 2015-01-16 2015 shot Mabank TX Car attack FALSE
## 6 2015-01-17 2015 shot Fort Wo… TX Not … attack FALSE
## # ℹ 7 more variables: signs_of_mental_illness <lgl>, armed <fct>,
## # longitude <dbl>, latitude <dbl>, age <dbl>, race <chr>, gender <chr>
examine2 <- police_clean |>
select(date, year, manner_of_death, city, state, flee, threat_level, body_camera,
signs_of_mental_illness, armed, longitude, latitude, age, race, gender)
head(examine)
## # A tibble: 6 × 15
## date year manner_of_death city state flee threat_level body_camera
## <date> <dbl> <fct> <chr> <chr> <chr> <chr> <lgl>
## 1 2015-01-02 2015 shot Shelton WA Not … attack FALSE
## 2 2015-01-02 2015 shot Aloha OR Not … attack FALSE
## 3 2015-01-03 2015 shot and Tasered Wichita KS Not … other FALSE
## 4 2015-01-04 2015 shot San Fr… CA Not … attack FALSE
## 5 2015-01-04 2015 shot Evans CO Not … attack FALSE
## 6 2015-01-04 2015 shot Guthrie OK Not … attack FALSE
## # ℹ 7 more variables: signs_of_mental_illness <lgl>, armed <fct>,
## # longitude <dbl>, latitude <dbl>, age <dbl>, race <chr>, gender <chr>
ca <- examine |>
filter(state == "CA")
head(ca)
## # A tibble: 6 × 15
## date year manner_of_death city state flee threat_level body_camera
## <date> <dbl> <fct> <chr> <chr> <chr> <chr> <lgl>
## 1 2015-01-04 2015 shot San Fr… CA Not … attack FALSE
## 2 2015-01-06 2015 shot and Tasered Stockt… CA Not … attack FALSE
## 3 2015-01-11 2015 shot South … CA Not … attack FALSE
## 4 2015-01-15 2015 shot Fairfi… CA Not … other FALSE
## 5 2015-01-16 2015 shot and Tasered Fremont CA Not … other FALSE
## 6 2015-01-17 2015 shot Los An… CA Not … attack FALSE
## # ℹ 7 more variables: signs_of_mental_illness <lgl>, armed <fct>,
## # longitude <dbl>, latitude <dbl>, age <dbl>, race <chr>, gender <chr>
regression_data <- police_clean %>%
group_by(year) %>%
summarise(
incidents = n(),
avg_age = mean(age, na.rm = TRUE),
mental_illness_pct = mean(signs_of_mental_illness, na.rm = TRUE) * 100
)
model_incidents <- lm(incidents ~ year, data = regression_data)
summary(model_incidents)
##
## Call:
## lm(formula = incidents ~ year, data = regression_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -202.08 -73.55 -39.01 92.63 204.96
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 201427.94 45491.51 4.428 0.00443 **
## year -99.48 22.54 -4.414 0.00450 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 146.1 on 6 degrees of freedom
## Multiple R-squared: 0.7645, Adjusted R-squared: 0.7253
## F-statistic: 19.48 on 1 and 6 DF, p-value: 0.004501
ggplot(regression_data, aes(x = year, y = incidents)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE, color = "blue") +
theme_minimal() +
labs(title = "Linear Regression: Incidents Over Time",
x = "Year",
y = "Number of Incidents")
## `geom_smooth()` using formula = 'y ~ x'
tx3 <- tx %>%
filter(manner_of_death == "shot and Tasered") %>%
mutate(
# Convert 'armed' to numeric (if it's not already)
armed = as.numeric(armed),
# Estimate shots fired (this can be adjusted based on real data or estimation rules)
estimated_shots = round(armed * 10),
# Factor for 'manner_of_death'
measure = factor(manner_of_death),
# Categorize age groups
age_group = case_when(
age <= 18 ~ "Under 18",
age > 18 & age <= 35 ~ "18-35",
age > 35 & age <= 50 ~ "36-50",
age > 50 ~ "51+",
TRUE ~ "Unknown"
),
# Add fleeing status (Assuming 'flee' is a binary variable, if not, modify accordingly)
flee_status = case_when(
flee == 0 ~ "Not Fleeing",
flee == 1 ~ "Car",
flee == 2 ~ "Foot",
flee == 3 ~ "Other",
TRUE ~ "Unknown"
),
# Mental illness signs (Assuming binary variable, if more categories, modify accordingly)
mental_illness = case_when(
signs_of_mental_illness == 1 ~ "Yes",
signs_of_mental_illness == 0 ~ "No",
TRUE ~ "Unknown"
),
# Threat level (Assuming it's a categorical variable, can be 'low', 'medium', 'high', etc.)
threat_level = case_when(
threat_level == 1 ~ "attack",
threat_level == 2 ~ "other",
threat_level == 3 ~ "undetermined",
TRUE ~ "Unknown"
),
# City name
city = as.character(city),
# Gender (assuming binary or categorical variable, adjust as necessary)
gender = factor(gender, levels = c("M", "F")),
# Race (assuming race is coded, can modify levels as per your dataset)
race = factor(race, levels = c("W", "B", "H", "A", "O")),
# Date (if you have a date field, format it properly)
date = as.Date(date, format="%Y-%m-%d")
)
pal <- colorNumeric(
palette = "viridis",
domain = tx3$estimated_shots
)
# Create the leaflet map
leaflet(tx3) %>%
addProviderTiles(providers$Esri.WorldStreetMap) %>%
addCircleMarkers(
~longitude, ~latitude,
radius = ~sqrt(estimated_shots) * 0.25,
color = ~pal(estimated_shots),
fillOpacity = 0.7,
popup = ~paste0(
"<strong>Age Group: </strong>", age_group, "<br>",
"<strong>Manner of Death: </strong>", manner_of_death, "<br>",
"<strong>Estimated Shots: </strong>", round(estimated_shots, 1), "<br>",
"<strong>Armed Status: </strong>", ifelse(armed == 1, "Armed", "Unarmed"), "<br>",
"<strong>Fleeing: </strong>", flee_status, "<br>",
"<strong>Mental Illness: </strong>", mental_illness, "<br>",
"<strong>Threat Level: </strong>", threat_level, "<br>",
"<strong>City: </strong>", city, "<br>",
"<strong>Gender: </strong>", gender, "<br>",
"<strong>Race: </strong>", race, "<br>",
"<strong>Date: </strong>", format(date, "%B %d, %Y")
)
) %>%
addLegend(
position = "bottomright",
pal = pal,
values = ~estimated_shots,
title = "Estimated Shots",
opacity = 1
)
ca3 <- ca %>%
filter(manner_of_death == "shot and Tasered") %>%
mutate(
# Convert 'armed' to numeric (if it's not already)
armed = as.numeric(armed),
# Estimate shots fired (this can be adjusted based on real data or estimation rules)
estimated_shots = round(armed * 10),
# Factor for 'manner_of_death'
measure = factor(manner_of_death),
# Categorize age groups
age_group = case_when(
age <= 18 ~ "Under 18",
age > 18 & age <= 35 ~ "18-35",
age > 35 & age <= 50 ~ "36-50",
age > 50 ~ "51+",
TRUE ~ "Unknown"
),
# Add fleeing status (Assuming 'flee' is a binary variable, if not, modify accordingly)
flee_status = case_when(
flee == 0 ~ "Not Fleeing",
flee == 1 ~ "Car",
flee == 2 ~ "Foot",
flee == 3 ~ "Other",
TRUE ~ "Unknown"
),
# Mental illness signs (Assuming binary variable, if more categories, modify accordingly)
mental_illness = case_when(
signs_of_mental_illness == 1 ~ "Yes",
signs_of_mental_illness == 0 ~ "No",
TRUE ~ "Unknown"
),
# Threat level (Assuming it's a categorical variable, can be 'low', 'medium', 'high', etc.)
threat_level = case_when(
threat_level == 1 ~ "attack",
threat_level == 2 ~ "other",
threat_level == 3 ~ "undetermined",
TRUE ~ "Unknown"
),
# City name
city = as.character(city),
# Gender (assuming binary or categorical variable, adjust as necessary)
gender = factor(gender, levels = c("M", "F")),
# Race (assuming race is coded, can modify levels as per your dataset)
race = factor(race, levels = c("W", "B", "H", "A", "O")),
# Date (if you have a date field, format it properly)
date = as.Date(date, format="%Y-%m-%d")
)
pal <- colorNumeric(
palette = "viridis",
domain = ca3$estimated_shots
)
# Create the leaflet map
leaflet(ca3) %>%
addProviderTiles(providers$Esri.WorldStreetMap) %>%
addCircleMarkers(
~longitude, ~latitude,
radius = ~sqrt(estimated_shots) * 0.25,
color = ~pal(estimated_shots),
fillOpacity = 0.7,
popup = ~paste0(
"<strong>Age Group: </strong>", age_group, "<br>",
"<strong>Manner of Death: </strong>", manner_of_death, "<br>",
"<strong>Estimated Shots: </strong>", round(estimated_shots, 1), "<br>",
"<strong>Armed Status: </strong>", ifelse(armed == 1, "Armed", "Unarmed"), "<br>",
"<strong>Fleeing: </strong>", flee_status, "<br>",
"<strong>Mental Illness: </strong>", mental_illness, "<br>",
"<strong>Threat Level: </strong>", threat_level, "<br>",
"<strong>City: </strong>", city, "<br>",
"<strong>Gender: </strong>", gender, "<br>",
"<strong>Race: </strong>", race, "<br>",
"<strong>Date: </strong>", format(date, "%B %d, %Y")
)
) %>%
addLegend(
position = "bottomright",
pal = pal,
values = ~estimated_shots,
title = "Estimated Shots",
opacity = 1
)
pal <- colorNumeric(
palette = "viridis",
domain = ca3$estimated_shots
)
# Create the leaflet map
leaflet(ca3) %>%
addProviderTiles(providers$Esri.WorldStreetMap) %>%
addCircleMarkers(
~longitude, ~latitude,
radius = ~sqrt(estimated_shots) * 0.25,
color = ~pal(estimated_shots),
fillOpacity = 0.7,
popup = ~paste0(
"<strong>Age Group: </strong>", age_group, "<br>",
"<strong>Manner of Death: </strong>", manner_of_death, "<br>",
"<strong>Estimated Shots: </strong>", round(estimated_shots, 1), "<br>",
"<strong>Armed Status: </strong>", ifelse(armed == 1, "Armed", "Unarmed"), "<br>",
"<strong>Fleeing: </strong>", flee_status, "<br>",
"<strong>Mental Illness: </strong>", mental_illness, "<br>",
"<strong>Threat Level: </strong>", threat_level, "<br>",
"<strong>City: </strong>", city, "<br>",
"<strong>Gender: </strong>", gender, "<br>",
"<strong>Race: </strong>", race, "<br>",
"<strong>Date: </strong>", format(date, "%B %d, %Y")
)
) %>%
addLegend(
position = "bottomright",
pal = pal,
values = ~estimated_shots,
title = "Estimated Shots",
opacity = 1
)