library(tidyverse)
setwd("~/Downloads")
data <- read_csv("POL-_Use_of_Force_Details_20260320.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 4228 Columns: 118
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (20): ReportGUID, OfficerGUID, Event Date/Time, CR # or Event #, Event C...
## dbl  (5): Subj Age, Subj Height, Subj Weight, Ofc Age, Ofc CEW # of Cartridges
## lgl (93): Reason - Counteract, Reason - Claim Injury, Reason - Protective In...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data_unique <- data %>%
  distinct(ReportGUID, .keep_all = TRUE)

# Count incidents by event class description
event_counts <- data_unique %>%
  count(`Event Class Description`, sort = TRUE) %>%
  mutate(percent = n / sum(n) * 100)

# View top 10
event_counts %>%
  slice_head(n = 10)
## # A tibble: 10 × 3
##    `Event Class Description`              n percent
##    <chr>                              <int>   <dbl>
##  1 MENTAL TRANSPORT                     374   20.5 
##  2 DEER DEAD/INJURED ROADWAY            186   10.2 
##  3 POL INFORMATION                      154    8.45
##  4 AUTO THEFT - PASSENGER VEHICLE       102    5.60
##  5 ASSAULT & BATTERY - POLICE OFFICER    81    4.44
##  6 LARCENY SHOPLIFTING OVER $200         54    2.96
##  7 ASSAULT & BATTERY - CITIZEN           51    2.80
##  8 LARCENY SHOPLIFTING $50 - $199        42    2.30
##  9 SIMPLE ASSAULT - PO                   42    2.30
## 10 DEER OTHER                            39    2.14
data_grouped <- data_unique %>%
  mutate(category = case_when(
    str_detect(`Event Class Description`, regex("assault|robbery|weapon|homicide", ignore_case = TRUE)) ~ "Violent",
    str_detect(`Event Class Description`, regex("mental|transport|information|suspicious|disorderly|domestic|theft|auto", ignore_case = TRUE)) ~ "Non-Violent/Service",
    TRUE ~ "Other"
  ))

data_grouped %>%
  count(category) %>%
  mutate(percent = n / sum(n) * 100)
## # A tibble: 3 × 3
##   category                n percent
##   <chr>               <int>   <dbl>
## 1 Non-Violent/Service   690    37.8
## 2 Other                 825    45.3
## 3 Violent               308    16.9
data_unique %>%
  count(`Event Class Description`, sort = TRUE) %>%
  slice_head(n = 15)
## # A tibble: 15 × 2
##    `Event Class Description`              n
##    <chr>                              <int>
##  1 MENTAL TRANSPORT                     374
##  2 DEER DEAD/INJURED ROADWAY            186
##  3 POL INFORMATION                      154
##  4 AUTO THEFT - PASSENGER VEHICLE       102
##  5 ASSAULT & BATTERY - POLICE OFFICER    81
##  6 LARCENY SHOPLIFTING OVER $200         54
##  7 ASSAULT & BATTERY - CITIZEN           51
##  8 LARCENY SHOPLIFTING $50 - $199        42
##  9 SIMPLE ASSAULT - PO                   42
## 10 DEER OTHER                            39
## 11 TRESPASSING                           39
## 12 DISORDERLY CONDUCT                    38
## 13 DRIVING UNDER THE INFLUENCE           36
## 14 AGG ASSLT CUT/STAB CITIZEN            33
## 15 LARCENY SHOPLIFTING UNDER $50         31