# Load data and keep necessary columns
crimes <- read_csv("Chicago_Crimes.csv", show_col_types = FALSE) %>%
  select(`Primary Type`, Date, Description, Arrest, `Community Area`, Latitude, Longitude) %>%
  drop_na()

# Community names data
raw <- read_csv("chicago-community-areas.csv", show_col_types = FALSE)
# Filter top 10 most common crime types
crime_counts <- crimes %>%
  count(`Primary Type`, sort = TRUE) %>%
  slice_head(n = 10) %>%
  select(`Primary Type`) %>%
  left_join(crimes, by = "Primary Type") %>%
  count(`Primary Type`, Arrest, name = "n")

# Plot histogram
ggplot(crime_counts, aes(x = n, y = reorder(`Primary Type`, n), fill = Arrest)) +
  geom_col() +
  scale_x_continuous(labels = comma) +
  scale_fill_manual(
    values = c("TRUE" = "#d2e4f0", "FALSE" = "#0073ae"),
    labels = c("TRUE" = "Arrest", "FALSE" = "No Arrest")
  ) +
  labs(
    title = "Top 10 Crime Types in Chicago (with Arrest Breakdown)",
    x = "Number of Crimes",
    y = "Crime Type",
    fill = "Outcome"
  ) +
  theme_minimal()

# Filter crimes to only narcotics that resulted in arrest
narcotics <- crimes %>%
  filter(`Primary Type` == "NARCOTICS", Arrest == TRUE) %>%
    mutate(Year = year(mdy_hms(Date)))

# Filter for top 3 most common drug crimes  
top_3_drugs <- narcotics %>%
  count(Description, sort = TRUE) %>%
  slice_head(n = 3) %>%
  pull(Description)

# Group by Year and Description
narcotics_summary <- narcotics %>%
  filter(Description %in% top_3_drugs) %>%
  group_by(Year, Description) %>%
  summarise(n_arrests = n(), .groups = "drop")

# Generate scatterplot
ggplot(narcotics_summary, aes(x = Year, y = n_arrests, color = Description)) +
  geom_point(size = 3) +
  geom_line() +
    scale_color_manual(
    values = c(
      "POSS: CANNABIS 30GMS OR LESS" = "#0073ae",
      "POSS: CRACK" = "#d2e4f0",
      "POSS: HEROIN(WHITE)" = "black"
    )
  ) +
  labs(
    title = "Narcotics Arrests Over Time (Top 3 Drug Types)",
    x = "Year",
    y = "Number of Arrests",
    color = "Drug Type"
  ) +
  theme_minimal()

# Aggregate to the 50 community areas with the most crimes
area_summary <- crimes %>%
  group_by(`Community Area`) %>%
  summarise(
    n_crimes = n(),
    avg_lat   = mean(Latitude,  na.rm = TRUE),
    avg_lng   = mean(Longitude, na.rm = TRUE),
    .groups   = "drop"
  ) %>%
  arrange(desc(n_crimes))

# Transpose
community_names <- raw %>%
  as_tibble() %>%
  t() %>%
  as_tibble(.name_repair = "minimal")

# Clean data
community_names <- community_names %>%
  `colnames<-`(.[1, ]) %>%
  slice(-1) %>%
  mutate(`Community Area` = row_number()) %>%
  select(name, `Community Area`)

# Merge with area_summary
area_summary <- area_summary %>%
  left_join(community_names, by = "Community Area")
# Interactive map
leaflet(area_summary) %>%
  addProviderTiles("CartoDB.Positron") %>%
  addCircles(
    lng = ~avg_lng,
    lat = ~avg_lat,
    radius = ~n_crimes * 0.04,
    color = "#0073ae",
    stroke = FALSE,
    fillOpacity = 0.7,
    popup = ~paste0(
      "<b>Community Area:</b> ", `name`,
      "<br><b>Crimes:</b> ", n_crimes
    )
  ) %>%
  addMarkers(
    lng = -87.636141,
    lat = 41.8815863,
    popup = "CRA Office"
  )