library(readr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ purrr 1.0.2
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(lubridate)
library(writexl)
file_path <- "../data/Police_Department_Incident_Reports__Historical_2003_to_May_2018_20250331.csv"
crimes_03_to_18 <- read_csv(file_path)
## Rows: 2129525 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): IncidntNum, Incident Code, Category, Descript, DayOfWeek, Date, P...
## dbl (24): PdId, X, Y, SF Find Neighborhoods 2 2, Current Police Districts 2...
## time (1): Time
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
unique(crimes_03_to_18$Category) #listing unique values in Category column
## [1] "ROBBERY" "VEHICLE THEFT"
## [3] "ARSON" "ASSAULT"
## [5] "TRESPASS" "BURGLARY"
## [7] "LARCENY/THEFT" "WARRANTS"
## [9] "OTHER OFFENSES" "DRUG/NARCOTIC"
## [11] "SUSPICIOUS OCC" "LIQUOR LAWS"
## [13] "VANDALISM" "WEAPON LAWS"
## [15] "NON-CRIMINAL" "MISSING PERSON"
## [17] "FRAUD" "SEX OFFENSES, FORCIBLE"
## [19] "SECONDARY CODES" "DISORDERLY CONDUCT"
## [21] "RECOVERED VEHICLE" "KIDNAPPING"
## [23] "FORGERY/COUNTERFEITING" "PROSTITUTION"
## [25] "DRUNKENNESS" "BAD CHECKS"
## [27] "DRIVING UNDER THE INFLUENCE" "LOITERING"
## [29] "STOLEN PROPERTY" "SUICIDE"
## [31] "BRIBERY" "EXTORTION"
## [33] "EMBEZZLEMENT" "GAMBLING"
## [35] "PORNOGRAPHY/OBSCENE MAT" "SEX OFFENSES, NON FORCIBLE"
## [37] "TREA"
#filter for vehicle thefts
vehicle_theft_03_to_17 <- crimes_03_to_18 %>%
mutate(Date = mdy(Date)) %>% # Convert "MM/DD/YYYY" to Date format
filter(Category == "VEHICLE THEFT" & Date < as.Date("2018-01-01")) #filter for reported crimes before 2018
#group by year and count reported vehicle thefts
vehicle_theft_by_year <- vehicle_theft_03_to_17 %>%
mutate(Year = as.numeric(substr(Date, 1, 4))) %>% # Extract first 4 characters as Year
group_by(Year) %>%
summarise(count = n()) %>%
arrange(Year) # Sort by Year
#checking in Excel why it doesn't match Chronicle story
vehicle_theft_03_to_17_export <- crimes_03_to_18 %>%
filter(Category == "VEHICLE THEFT")
write_xlsx(vehicle_theft_03_to_17_export, "../data/vehicle_theft_03_to_17_export.xlsx")