After importing the data set, it was necessary to clean up the data by removing columns that provided little relevant information. In addition, I renamed some columns to improve readability and consistency. The final dataset contains essential variables like arrest date, location, demographics, and charge descriptions
arrests_df <- read.csv("Arrests.csv")
arrests_df <- arrests_df %>%
select(-c("Report.ID", "Report.Type", "Reporting.District", "Charge.Group.Code",
"Charge.Description", "Address", "LAT", "LON",
"Location", "Booking.Date", "Booking.Location", "Booking.Location.Code",
"Area.ID", "Arrest.Type.Code", "Booking.Time", "Time", "Disposition.Description", "Charge")) %>%
filter(!apply(., 1, function(row) any(is.na(row) | row == "")))
arrests_df <- arrests_df %>%
rename(
Arrest_Date = Arrest.Date,
Area = Area.Name,
Cross_Street = Cross.Street,
Gender = Sex.Code,
Race = Descent.Code,
Charge_Description = Charge.Group.Description,
)
arrests_df$Arrest_Date <- gsub(" 12:00:00 AM", "", arrests_df$Arrest_Date)