# Load data and keep necessary columns
crimes <- read_csv("Chicago_Crimes.csv", show_col_types = FALSE) %>%
select(`Primary Type`, Date, Description, Arrest, `Community Area`, Latitude, Longitude) %>%
drop_na()
# Community names data
raw <- read_csv("chicago-community-areas.csv", show_col_types = FALSE)
# Filter top 10 most common crime types
crime_counts <- crimes %>%
count(`Primary Type`, sort = TRUE) %>%
slice_head(n = 10) %>%
select(`Primary Type`) %>%
left_join(crimes, by = "Primary Type") %>%
count(`Primary Type`, Arrest, name = "n")
# Plot histogram
ggplot(crime_counts, aes(x = n, y = reorder(`Primary Type`, n), fill = Arrest)) +
geom_col() +
scale_x_continuous(labels = comma) +
scale_fill_manual(
values = c("TRUE" = "#d2e4f0", "FALSE" = "#0073ae"),
labels = c("TRUE" = "Arrest", "FALSE" = "No Arrest")
) +
labs(
title = "Top 10 Crime Types in Chicago (with Arrest Breakdown)",
x = "Number of Crimes",
y = "Crime Type",
fill = "Outcome"
) +
theme_minimal()

# Filter crimes to only narcotics that resulted in arrest
narcotics <- crimes %>%
filter(`Primary Type` == "NARCOTICS", Arrest == TRUE) %>%
mutate(Year = year(mdy_hms(Date)))
# Filter for top 3 most common drug crimes
top_3_drugs <- narcotics %>%
count(Description, sort = TRUE) %>%
slice_head(n = 3) %>%
pull(Description)
# Group by Year and Description
narcotics_summary <- narcotics %>%
filter(Description %in% top_3_drugs) %>%
group_by(Year, Description) %>%
summarise(n_arrests = n(), .groups = "drop")
# Generate scatterplot
ggplot(narcotics_summary, aes(x = Year, y = n_arrests, color = Description)) +
geom_point(size = 3) +
geom_line() +
scale_color_manual(
values = c(
"POSS: CANNABIS 30GMS OR LESS" = "#0073ae",
"POSS: CRACK" = "#d2e4f0",
"POSS: HEROIN(WHITE)" = "black"
)
) +
labs(
title = "Narcotics Arrests Over Time (Top 3 Drug Types)",
x = "Year",
y = "Number of Arrests",
color = "Drug Type"
) +
theme_minimal()

# Aggregate to the 50 community areas with the most crimes
area_summary <- crimes %>%
group_by(`Community Area`) %>%
summarise(
n_crimes = n(),
avg_lat = mean(Latitude, na.rm = TRUE),
avg_lng = mean(Longitude, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(n_crimes))
# Transpose
community_names <- raw %>%
as_tibble() %>%
t() %>%
as_tibble(.name_repair = "minimal")
# Clean data
community_names <- community_names %>%
`colnames<-`(.[1, ]) %>%
slice(-1) %>%
mutate(`Community Area` = row_number()) %>%
select(name, `Community Area`)
# Merge with area_summary
area_summary <- area_summary %>%
left_join(community_names, by = "Community Area")
# Interactive map
leaflet(area_summary) %>%
addProviderTiles("CartoDB.Positron") %>%
addCircles(
lng = ~avg_lng,
lat = ~avg_lat,
radius = ~n_crimes * 0.04,
color = "#0073ae",
stroke = FALSE,
fillOpacity = 0.7,
popup = ~paste0(
"<b>Community Area:</b> ", `name`,
"<br><b>Crimes:</b> ", n_crimes
)
) %>%
addMarkers(
lng = -87.636141,
lat = 41.8815863,
popup = "CRA Office"
)