Indocution

This dataset was sourced from Kaggle, a repository of public data, and spans from 2015 to 2022. The goal of this analysis is to investigate which geographic areas, neighborhoods, and demographic groups have been most affected by police-related gun violence. Additionally, I aim to examine whether there are differences in the number of incidents involving males versus females, whether the incidence of shootings has increased over time, and whether the demographic profile of those affected has shifted between 2015 and 2022

loading libraries

library(tidyverse)
library(tidyr)
library(leaflet)
library(sf)
library(tigris)
library(lubridate)
library(scales)
library(ggplot2)
library(dplyr)
library(highcharter)
setwd("C:/Users/eyong/Downloads/police shooting")
police <- read_csv("US Police shootings in from 2015-22.csv")
data(police)

selcting the columns and cleaning the Na’s

police_clean <- police %>%
  # Convert date to proper format and extract year
  mutate(
    date = as.Date(date),
    year = year(date),
    manner_of_death = factor(manner_of_death, levels = c("shot", "shot and Tasered")),
    signs_of_mental_illness = as.logical(signs_of_mental_illness),
    armed = factor(armed)
  ) %>%
  # Select relevant columns
  select(date, year, manner_of_death, city, state, flee, threat_level, body_camera,
         signs_of_mental_illness, armed, longitude, latitude, age, race, gender) %>%
  # Remove any rows with NA in critical columns
  filter(
    !is.na(longitude), !is.na(latitude), !is.na(manner_of_death), !is.na(armed),
    !is.na(age), !is.na(race), !is.na(signs_of_mental_illness), !is.na(gender),
    !is.na(flee), !is.na(threat_level)
  )

1. Time Analysis

Incidents by year I want to examin how many police shooting where recored for years 2015 to 2022 ## creating a count for each year

yearly_incidents <- police_clean %>%
  group_by(year) %>%
  summarise(count = n())

Create an interactive Highcharts plot

highchart() %>%
  hc_chart(type = "line") %>%
  hc_title(text = "Police Shooting Incidents by Year (2015-2022)") %>%
  hc_xAxis(categories = yearly_incidents$year) %>%
  hc_yAxis(title = list(text = "Number of Incidents")) %>%
  hc_series(
    list(
      name = "Incidents",
      data = yearly_incidents$count,
      color = "blue",
      marker = list(symbol = "circle", radius = 5, lineColor = "red", lineWidth = 2)
    )
  ) %>%
  hc_tooltip(
    headerFormat = '<b>{point.key}</b><br>',
    pointFormat = '{series.name}: {point.y}'
  )

Explantion

we can see police shootings incidents where very high in the year 2015 which was 913 if we consider the data is accurate and there are no missing values and its at an all time low in 2022 in the united state

2. Age Distribution Analysis

ggplot(police_clean, aes(x = age)) +
  geom_histogram(binwidth = 5, fill = "red", color = "black") +
  theme_minimal() +
  labs(title = "Age Distribution of Shooting Victims",
       x = "Age",
       y = "Count")

Manner of Death Analysis

manner_death_yearly <- police_clean %>%
  group_by(year, manner_of_death) %>%
  summarise(count = n(), .groups = "drop")

ggplot(manner_death_yearly, 
       aes(x = year, y = count, fill = manner_of_death)) +
  geom_bar(stat = "identity", position = "stack") +
  theme_minimal() +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "Manner of Death Distribution by Year",
       x = "Year",
       y = "Count",
       fill = "Manner of Death")

## Exsplanation it looks like alot of people got shot and tased but most people got shot I dont really know why they will tased after that I cant really exsplain

exsplanation

The bar graph shows the ages who got shoot the most and as we can see a very big majority are in thier 20s and 30ths may be because they are aggrevise at that age I wouldnt not really know about that now ## do they have a mental Illness

mental_illness_yearly <- police_clean %>%
  group_by(year, signs_of_mental_illness) %>%
  summarise(count = n(), .groups = "drop") %>%
  mutate(percentage = count / sum(count) * 100)

Bar graph

ggplot(mental_illness_yearly, 
       aes(x = year, y = count, fill = signs_of_mental_illness)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_minimal() +
  scale_fill_manual(values = c("FALSE" = "lightblue", "TRUE" = "darkred")) +
  labs(title = "Mental Illness Status in Police Shootings by Year",
       x = "Year",
       y = "Count",
       fill = "Signs of Mental Illness")

## Exsplantion from what I you will see most atimes the people who get shot don’t have any mental illness

where they Armed

armed_status <- police_clean %>%
  group_by(armed) %>%
  summarise(count = n()) %>%
  arrange(desc(count)) %>%
  head(10)  

Bar Plot showing thier status

ggplot(armed_status, aes(x = reorder(armed, count), y = count)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  coord_flip() +
  theme_minimal() +
  labs(title = "Distribution of Armed Status",
       x = "Armed Status",
       y = "Count")

## Exsplanation it show that yes most people where usually armed which a gun before been shot so we may say the police may have shot they out of fear for their life

how many people get shot pair state

state_incidents <- police_clean %>%
  group_by(state) %>%
  summarise(count = n()) %>%
  arrange(desc(count)) %>%
  head(10)

plot

ggplot(state_incidents, aes(x = reorder(state, count), y = count)) +
  geom_bar(stat = "identity", fill = "darkblue") +
  coord_flip() +
  theme_minimal() +
  labs(title = "Top 10 States by Number of Police Shooting Incidents",
       x = "State",
       y = "Number of Incidents")

## Explantion it shows that more people got shot in carlifonia and texas more than any other state this could maybe because of thier gun carring policy

examine how many people get shot the age group and if they where armed or not

us_shoot <- police_clean %>%
  filter(manner_of_death == "shot and Tasered") %>%
  mutate(armed = as.numeric(armed),
    
    estimated_shots = round(armed * 10),  
    measure = factor(manner_of_death),  
    age_group = case_when(
      age <= 18 ~ "Under 18",
      age > 18 & age <= 35 ~ "18-35",
      age > 35 & age <= 50 ~ "36-50",
      age > 50 ~ "51+",
      TRUE ~ "Unknown"
    ))

Creating a map for the whole US for people who got shot and if they where armed or unarmed

examing how many people got shot in texas

examine <- police_clean |>
  select(date, year, manner_of_death, city, state, flee, threat_level, body_camera,
         signs_of_mental_illness, armed, longitude, latitude, age, race, gender)
head(examine)
## # A tibble: 6 × 15
##   date        year manner_of_death  city    state flee  threat_level body_camera
##   <date>     <dbl> <fct>            <chr>   <chr> <chr> <chr>        <lgl>      
## 1 2015-01-02  2015 shot             Shelton WA    Not … attack       FALSE      
## 2 2015-01-02  2015 shot             Aloha   OR    Not … attack       FALSE      
## 3 2015-01-03  2015 shot and Tasered Wichita KS    Not … other        FALSE      
## 4 2015-01-04  2015 shot             San Fr… CA    Not … attack       FALSE      
## 5 2015-01-04  2015 shot             Evans   CO    Not … attack       FALSE      
## 6 2015-01-04  2015 shot             Guthrie OK    Not … attack       FALSE      
## # ℹ 7 more variables: signs_of_mental_illness <lgl>, armed <fct>,
## #   longitude <dbl>, latitude <dbl>, age <dbl>, race <chr>, gender <chr>
tx <- examine |>
  filter(state == "TX")
head(tx)
## # A tibble: 6 × 15
##   date        year manner_of_death city     state flee  threat_level body_camera
##   <date>     <dbl> <fct>           <chr>    <chr> <chr> <chr>        <lgl>      
## 1 2015-01-07  2015 shot            Freeport TX    Not … attack       FALSE      
## 2 2015-01-09  2015 shot            El Paso  TX    Not … attack       FALSE      
## 3 2015-01-13  2015 shot            Jourdan… TX    Not … other        FALSE      
## 4 2015-01-14  2015 shot            Lake Ja… TX    Not … attack       FALSE      
## 5 2015-01-16  2015 shot            Mabank   TX    Car   attack       FALSE      
## 6 2015-01-17  2015 shot            Fort Wo… TX    Not … attack       FALSE      
## # ℹ 7 more variables: signs_of_mental_illness <lgl>, armed <fct>,
## #   longitude <dbl>, latitude <dbl>, age <dbl>, race <chr>, gender <chr>

examing the how many people got shot in califonia

examine2 <- police_clean |>
  select(date, year, manner_of_death, city, state, flee, threat_level, body_camera,
         signs_of_mental_illness, armed, longitude, latitude, age, race, gender)
head(examine)
## # A tibble: 6 × 15
##   date        year manner_of_death  city    state flee  threat_level body_camera
##   <date>     <dbl> <fct>            <chr>   <chr> <chr> <chr>        <lgl>      
## 1 2015-01-02  2015 shot             Shelton WA    Not … attack       FALSE      
## 2 2015-01-02  2015 shot             Aloha   OR    Not … attack       FALSE      
## 3 2015-01-03  2015 shot and Tasered Wichita KS    Not … other        FALSE      
## 4 2015-01-04  2015 shot             San Fr… CA    Not … attack       FALSE      
## 5 2015-01-04  2015 shot             Evans   CO    Not … attack       FALSE      
## 6 2015-01-04  2015 shot             Guthrie OK    Not … attack       FALSE      
## # ℹ 7 more variables: signs_of_mental_illness <lgl>, armed <fct>,
## #   longitude <dbl>, latitude <dbl>, age <dbl>, race <chr>, gender <chr>
ca <- examine |>
  filter(state == "CA")
head(ca)
## # A tibble: 6 × 15
##   date        year manner_of_death  city    state flee  threat_level body_camera
##   <date>     <dbl> <fct>            <chr>   <chr> <chr> <chr>        <lgl>      
## 1 2015-01-04  2015 shot             San Fr… CA    Not … attack       FALSE      
## 2 2015-01-06  2015 shot and Tasered Stockt… CA    Not … attack       FALSE      
## 3 2015-01-11  2015 shot             South … CA    Not … attack       FALSE      
## 4 2015-01-15  2015 shot             Fairfi… CA    Not … other        FALSE      
## 5 2015-01-16  2015 shot and Tasered Fremont CA    Not … other        FALSE      
## 6 2015-01-17  2015 shot             Los An… CA    Not … attack       FALSE      
## # ℹ 7 more variables: signs_of_mental_illness <lgl>, armed <fct>,
## #   longitude <dbl>, latitude <dbl>, age <dbl>, race <chr>, gender <chr>

getting the data for the Linear Regression Analysis

regression_data <- police_clean %>%
  group_by(year) %>%
  summarise(
    incidents = n(),
    avg_age = mean(age, na.rm = TRUE),
    mental_illness_pct = mean(signs_of_mental_illness, na.rm = TRUE) * 100
  )

Simple linear regression for incidents over time

model_incidents <- lm(incidents ~ year, data = regression_data)
summary(model_incidents)
## 
## Call:
## lm(formula = incidents ~ year, data = regression_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -202.08  -73.55  -39.01   92.63  204.96 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 201427.94   45491.51   4.428  0.00443 **
## year           -99.48      22.54  -4.414  0.00450 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 146.1 on 6 degrees of freedom
## Multiple R-squared:  0.7645, Adjusted R-squared:  0.7253 
## F-statistic: 19.48 on 1 and 6 DF,  p-value: 0.004501

Plot regression line

ggplot(regression_data, aes(x = year, y = incidents)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  theme_minimal() +
  labs(title = "Linear Regression: Incidents Over Time",
       x = "Year",
       y = "Number of Incidents")
## `geom_smooth()` using formula = 'y ~ x'

examing shots in Texas

tx3 <- tx %>%
  filter(manner_of_death == "shot and Tasered") %>%
  mutate(
    # Convert 'armed' to numeric (if it's not already)
    armed = as.numeric(armed),
    
    # Estimate shots fired (this can be adjusted based on real data or estimation rules)
    estimated_shots = round(armed * 10), 
    
    # Factor for 'manner_of_death'
    measure = factor(manner_of_death),
    
    # Categorize age groups
    age_group = case_when(
      age <= 18 ~ "Under 18",
      age > 18 & age <= 35 ~ "18-35",
      age > 35 & age <= 50 ~ "36-50",
      age > 50 ~ "51+",
      TRUE ~ "Unknown"
    ),
    
    # Add fleeing status (Assuming 'flee' is a binary variable, if not, modify accordingly)
 flee_status = case_when(
  flee == 0 ~ "Not Fleeing",
  flee == 1 ~ "Car",
  flee == 2 ~ "Foot",
  flee == 3 ~ "Other",
  TRUE ~ "Unknown"
),

    # Mental illness signs (Assuming binary variable, if more categories, modify accordingly)
    mental_illness = case_when(
      signs_of_mental_illness == 1 ~ "Yes",
      signs_of_mental_illness == 0 ~ "No",
      TRUE ~ "Unknown"
    ),
    
    # Threat level (Assuming it's a categorical variable, can be 'low', 'medium', 'high', etc.)
    threat_level = case_when(
      threat_level == 1 ~ "attack",
      threat_level == 2 ~ "other",
      threat_level == 3 ~ "undetermined",
      TRUE ~ "Unknown"
    ),
    
    # City name
    city = as.character(city),
    
    # Gender (assuming binary or categorical variable, adjust as necessary)
    gender = factor(gender, levels = c("M", "F")),
    
    # Race (assuming race is coded, can modify levels as per your dataset)
    race = factor(race, levels = c("W", "B", "H", "A", "O")),
    
    # Date (if you have a date field, format it properly)
    date = as.Date(date, format="%Y-%m-%d")
  )

Map for texas

pal <- colorNumeric(
  palette = "viridis",  
  domain = tx3$estimated_shots  
)

# Create the leaflet map
leaflet(tx3) %>%
  addProviderTiles(providers$Esri.WorldStreetMap) %>%  
  addCircleMarkers(
    ~longitude, ~latitude,  
    radius = ~sqrt(estimated_shots) * 0.25,  
    color = ~pal(estimated_shots),  
    fillOpacity = 0.7,  
    popup = ~paste0(
      "<strong>Age Group: </strong>", age_group, "<br>",  
      "<strong>Manner of Death: </strong>", manner_of_death, "<br>",  
      "<strong>Estimated Shots: </strong>", round(estimated_shots, 1), "<br>",  
      "<strong>Armed Status: </strong>", ifelse(armed == 1, "Armed", "Unarmed"), "<br>",  
      "<strong>Fleeing: </strong>", flee_status, "<br>",  
      "<strong>Mental Illness: </strong>", mental_illness, "<br>",  
      "<strong>Threat Level: </strong>", threat_level, "<br>",  
      "<strong>City: </strong>", city, "<br>",  
      "<strong>Gender: </strong>", gender, "<br>",  
      "<strong>Race: </strong>", race, "<br>",  
      "<strong>Date: </strong>", format(date, "%B %d, %Y")
    )
  ) %>%
  addLegend(
    position = "bottomright",  
    pal = pal,  
    values = ~estimated_shots,  
    title = "Estimated Shots",  
    opacity = 1
  )

examing Califonia

ca3 <- ca %>%
  filter(manner_of_death == "shot and Tasered") %>%
  mutate(
    # Convert 'armed' to numeric (if it's not already)
    armed = as.numeric(armed),
    
    # Estimate shots fired (this can be adjusted based on real data or estimation rules)
    estimated_shots = round(armed * 10), 
    
    # Factor for 'manner_of_death'
    measure = factor(manner_of_death),
    
    # Categorize age groups
    age_group = case_when(
      age <= 18 ~ "Under 18",
      age > 18 & age <= 35 ~ "18-35",
      age > 35 & age <= 50 ~ "36-50",
      age > 50 ~ "51+",
      TRUE ~ "Unknown"
    ),
    
    # Add fleeing status (Assuming 'flee' is a binary variable, if not, modify accordingly)
 flee_status = case_when(
  flee == 0 ~ "Not Fleeing",
  flee == 1 ~ "Car",
  flee == 2 ~ "Foot",
  flee == 3 ~ "Other",
  TRUE ~ "Unknown"
),

    # Mental illness signs (Assuming binary variable, if more categories, modify accordingly)
    mental_illness = case_when(
      signs_of_mental_illness == 1 ~ "Yes",
      signs_of_mental_illness == 0 ~ "No",
      TRUE ~ "Unknown"
    ),
    
    # Threat level (Assuming it's a categorical variable, can be 'low', 'medium', 'high', etc.)
    threat_level = case_when(
      threat_level == 1 ~ "attack",
      threat_level == 2 ~ "other",
      threat_level == 3 ~ "undetermined",
      TRUE ~ "Unknown"
    ),
    
    # City name
    city = as.character(city),
    
    # Gender (assuming binary or categorical variable, adjust as necessary)
    gender = factor(gender, levels = c("M", "F")),
    
    # Race (assuming race is coded, can modify levels as per your dataset)
    race = factor(race, levels = c("W", "B", "H", "A", "O")),
    
    # Date (if you have a date field, format it properly)
    date = as.Date(date, format="%Y-%m-%d")
  )

pal <- colorNumeric(
  palette = "viridis",  
  domain = ca3$estimated_shots  
)

# Create the leaflet map
leaflet(ca3) %>%
  addProviderTiles(providers$Esri.WorldStreetMap) %>%  
  addCircleMarkers(
    ~longitude, ~latitude,  
    radius = ~sqrt(estimated_shots) * 0.25,  
    color = ~pal(estimated_shots),  
    fillOpacity = 0.7,  
    popup = ~paste0(
      "<strong>Age Group: </strong>", age_group, "<br>",  
      "<strong>Manner of Death: </strong>", manner_of_death, "<br>",  
      "<strong>Estimated Shots: </strong>", round(estimated_shots, 1), "<br>",  
      "<strong>Armed Status: </strong>", ifelse(armed == 1, "Armed", "Unarmed"), "<br>",  
      "<strong>Fleeing: </strong>", flee_status, "<br>",  
      "<strong>Mental Illness: </strong>", mental_illness, "<br>",  
      "<strong>Threat Level: </strong>", threat_level, "<br>",  
      "<strong>City: </strong>", city, "<br>",  
      "<strong>Gender: </strong>", gender, "<br>",  
      "<strong>Race: </strong>", race, "<br>",  
      "<strong>Date: </strong>", format(date, "%B %d, %Y")
    )
  ) %>%
  addLegend(
    position = "bottomright",  
    pal = pal,  
    values = ~estimated_shots,  
    title = "Estimated Shots",  
    opacity = 1
  )

pal <- colorNumeric(
  palette = "viridis",  
  domain = ca3$estimated_shots  
)

# Create the leaflet map
leaflet(ca3) %>%
  addProviderTiles(providers$Esri.WorldStreetMap) %>%  
  addCircleMarkers(
    ~longitude, ~latitude,  
    radius = ~sqrt(estimated_shots) * 0.25,  
    color = ~pal(estimated_shots),  
    fillOpacity = 0.7,  
    popup = ~paste0(
      "<strong>Age Group: </strong>", age_group, "<br>",  
      "<strong>Manner of Death: </strong>", manner_of_death, "<br>",  
      "<strong>Estimated Shots: </strong>", round(estimated_shots, 1), "<br>",  
      "<strong>Armed Status: </strong>", ifelse(armed == 1, "Armed", "Unarmed"), "<br>",  
      "<strong>Fleeing: </strong>", flee_status, "<br>",  
      "<strong>Mental Illness: </strong>", mental_illness, "<br>",  
      "<strong>Threat Level: </strong>", threat_level, "<br>",  
      "<strong>City: </strong>", city, "<br>",  
      "<strong>Gender: </strong>", gender, "<br>",  
      "<strong>Race: </strong>", race, "<br>",  
      "<strong>Date: </strong>", format(date, "%B %d, %Y")
    )
  ) %>%
  addLegend(
    position = "bottomright",  
    pal = pal,  
    values = ~estimated_shots,  
    title = "Estimated Shots",  
    opacity = 1
  )