file = "R_datafiles//bpd_victims.csv"
library(data.table)
library(lubridate)
library(dplyr)
library(DescTools)
library(ggplot2)
library(scales)
library(RColorBrewer)
library(ggthemes)
library(ggrepel)
library(plotly)
library(leaflet)

df = fread(file)

x = mdy(df$CrimeDate) #Breaking out date into parts, weekends, quarter

df$year = year(x)
df$month = months(x, abbreviate = FALSE)
df$yday = yday(x)
df$day = day(x)
df$weekdays = weekdays(x,abbreviate = FALSE)
df$weekend = wday(x) %in% c(1, 7)
df$qtr = quarters(x)

xy = hms(df$CrimeTime) #Breaking out time into hours and minutes
df$hour = hour(xy)
df$minute = minute(xy)

crimecount = data.frame(count(df, Neighborhood))
crimecount = crimecount[order(crimecount$n, decreasing = TRUE),]

rownames(crimecount) = c(1:nrow(crimecount))

weapontype = data.frame(count(df,Weapon))
weapontype = weapontype[order(weapontype$n, decreasing = TRUE),]

otherrow = which(weapontype$Weapon %like any% c("%OTHER%")) #Combining Other and NA weapon type into 1 variables


narow = which(is.na(weapontype$Weapon))

otherrow_narow = c(otherrow, narow)

unknowntotal = sum(weapontype[otherrow_narow, 'n'])

weapontype2 = weapontype[-otherrow_narow,]

weapontype2 = rbind(c("Unknown", unknowntotal), weapontype2)
weapontype2$n = as.numeric(weapontype2$n)

rownames(weapontype2) = c(1:nrow(weapontype2))

Introduction

This dataset contains data on major crimes committed in Baltimore City between the years of 2014 to November 2020. This is publicly available data provided by the Baltimore City Police Department.

Crime Data

There are 15 different types of crimes listed in this database for a total of 321,414 total records over the 7 years covered. Each crime has the weapon used (if known) as well as a time, location, and police district responsible for investigating.

Findings

Yearly Crimes

Total number of major crimes reported each year

df_yearfilter = filter(df, year >= 2014 & year < 2021) #Removing years 1963 to 2013, this has very little random data in it

df_homicide = filter(df_yearfilter, Description == "HOMICIDE") #Creating new dataframe for only homicide

df_homicide_2020 = filter(df_homicide, year == 2020) #Creating new dataframe for only homicides in 2020

p1 = ggplot(df_yearfilter, aes(x=year)) +
  geom_histogram(bins = 7, color="black", fill="lightblue")+
  labs(title = "Crimes per Year (to November 2020)", x = "Year", y = "Count of Crimes")+
  scale_y_continuous(labels=comma)+
  stat_bin(binwidth=1, geom="text", color="black", aes(label=scales::comma(..count..)), vjust=-.48)+
  theme_clean ()
x_axis_labels = min(df_yearfilter$year):max(df_yearfilter$year)
p1 = p1 + scale_x_continuous(labels = x_axis_labels, breaks = x_axis_labels)
p1

Neighborhoods

Top 10 neighborhoods with the most crimes committed between 2014 and 2020

ggplot(crimecount[1:10,], aes(x = reorder(Neighborhood, -n), y = n, fill = n)) + 
  geom_bar(colour="black", stat = "identity")+
  scale_fill_gradient(low = "lightblue", high = "blue")+
  labs(title = "Baltimore Crimes by Neighborhood (Top 10)", x = "Neighborhood", y = "Crime Count", fill = "Count")+
  theme(plot.title = element_text(hjust = .5))+
  theme_clean ()

Weapons

Type of weapon used in connection with crimes committed

ggplot(weapontype2[2:5,], aes(x = reorder(Weapon, n), y = n, fill = n)) + 
  geom_bar(colour="black", stat = "identity")+
  scale_fill_gradient(low = "lightblue", high = "blue")+
  labs(title = "Weapon Types Used (When Known)", x = "Weapon", y = "Count", fill = "Count")+
  theme(plot.title = element_text(hjust = .5))+
  theme_clean()

Crime Type

Breakdown the type of crimes committed, displayed per year

df_desc = count(df_yearfilter, Description)
df_desc = df_desc[order(df_desc$n, decreasing = TRUE),]

new_df = df_yearfilter %>%
  select(year, Description) %>%
  group_by(Description, year) %>%
  summarise(n = length(Description), .groups = 'keep') %>%
  data.frame()
agg_total = new_df %>%
  select(Description, n) %>%
  group_by(Description) %>%
  summarise(tot = sum(n), .groups ="keep") %>%
  data.frame()

new_df$year = as.factor(new_df$year)

ggplot(new_df, aes(x= reorder(Description, n, sum), y = n, fill = year)) +
  geom_bar(stat = "identity", position = position_stack(reverse = TRUE)) + #moving 2020 to right side
  coord_flip()+
  labs(title = "Crime Count by Type", x = "", y = "Crime Count", fill = "Year")+
  theme_clean ()+
  theme(plot.title = element_text(hjust = .5))+
  scale_fill_brewer(palette = "Set2", guide = guide_legend(reverse = TRUE))+
  geom_text(data = agg_total, aes(x = Description, y = tot, label = scales::comma(tot), fill = NULL),hjust = -.1, size = 3.8)

Crime Time

Total number of crimes committed at each hour of the day. Visual starts at midnight and ends at 11 PM.

hours_df = df_yearfilter %>%
  group_by(hour) %>%
  summarise(n = length(year), .groups='keep')%>%
  data.frame

hours_df = hours_df %>%
  group_by(hour) %>%
  filter(!is.na(hour))

hours_df$hour = as.integer(hours_df$hour)

x_axis_labels_line = 0:23

hi_lo = hours_df %>%
  filter(!is.na(hour))%>%
  filter(n == min(n) | n == max(n)) %>%
  data.frame()

ggplot(hours_df, aes(x = hour, y = n))+
  geom_line(color = 'lightblue', size = 1)+
  geom_point(shape = 21, size = 3, color = 'blue', fill = 'white')+
  labs(x = "Hour", y = "Crime Count", title = "Crimes by Hour")+
  scale_y_continuous(labels=comma)+
  theme_clean()+
  theme(plot.title = element_text(hjust = .5))+
  scale_x_continuous(labels = x_axis_labels_line, breaks = x_axis_labels_line, minor_breaks = NULL) +
  geom_point(data = hi_lo, aes(x = hour, y = n), shape = 21, size = 3, fill = 'white', color = 'black')+
  geom_label_repel(aes(label = ifelse(n == max(n) | n == min(n), scales::comma(n), "")), 
                   box.padding = 1.8, point.padding = 1.5, size = 3,
                   color = 'Grey50', segment.color = 'lightblue')

Districts

Percentage of total crimes committed in each police district

district = count(df_yearfilter, District)
district = district[order(-n),]
district = district[1:9,]

district_df = df_yearfilter %>%
  select(District, year) %>%
  group_by(year, District) %>%
  summarise(n=length(District), .groups = 'keep') %>%
  group_by(year) %>%
  mutate(percent_of_total = round(100*n/sum(n),1)) %>%
  ungroup() %>%
  data.frame()

#district_df[district_df$year == 2019,]

Trellis_pie = ggplot(data = district_df, aes(x = "", y = n, fill = District))+
  geom_bar(stat = "identity", position = "fill")+
  coord_polar(theta = "y", start = 0)+
  labs(fill = "Districts", x = NULL, y = NULL, title = "Crimes by District")+
  theme_clean()+
  theme(plot.title = element_text(hjust = .5),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        panel.grid = element_blank())+
  facet_wrap(~year, ncol = 4, nrow = 2)+
  scale_fill_brewer(palette = "Spectral")+
  geom_text(aes(x = 1.8, label = paste0(percent_of_total, "%")),
            size = 3,
            position = position_fill(vjust = .5))

Trellis_pie

Homicides by District

This nested pie chart displays the homicide numbers across each district for the years 2016 to 2019. When hovering over each slice of the pie additional information is shown about the underlying data.

homicide_district_df = df_homicide %>%
  select(District, year) %>%
  group_by(year, District) %>%
  summarise(n=length(District), .groups = 'keep') %>%
  group_by(year) %>%
  mutate(percent_of_total = round(100*n/sum(n),1)) %>%
  ungroup() %>%
  data.frame()

fig = plot_ly(hole=0.8) %>%
  layout(title = "Homicides by District (2016 - 2019)") %>%
  add_trace(data = homicide_district_df[homicide_district_df$year == 2019,],
            labels = ~District,
            values = ~homicide_district_df[homicide_district_df$year == 2019, "n"],
            type = "pie",
            textposition = "inside",
            hovertemplate = "Year: 2019<br>District: %{label}<br>Percent: %{percent}<br>Homicide Count: %{value}<extra></extra>") %>%
  add_trace(data = homicide_district_df[homicide_district_df$year == 2018,],
            labels = ~District,
            values = ~homicide_district_df[homicide_district_df$year == 2018, "n"],
            type = "pie",
            textposition = "inside",
            hovertemplate = "Year: 2018<br>District: %{label}<br>Percent: %{percent}<br>Homicide Count: %{value}<extra></extra>",
            domain = list(
              x = c(.11,.89),
              y = c(.11,.89))) %>%
  add_trace(data = homicide_district_df[homicide_district_df$year == 2017,],
            labels = ~District,
            values = ~homicide_district_df[homicide_district_df$year == 2017, "n"],
            type = "pie",
            textposition = "inside",
            hovertemplate = "Year: 2017<br>District: %{label}<br>Percent: %{percent}<br>Homicide Count: %{value}<extra></extra>",
            domain = list(
              x = c(.20,.80),
              y = c(.20,.80))) %>%
  add_trace(data = homicide_district_df[homicide_district_df$year == 2016,],
            labels = ~District,
            values = ~homicide_district_df[homicide_district_df$year == 2016, "n"],
            type = "pie",
            textposition = "inside",
            hovertemplate = "Year: 2016<br>District: %{label}<br>Percent: %{percent}<br>Homicide Count: %{value}<extra></extra>",
            domain = list(
              x = c(.27,.73),
              y = c(.27,.73)))

fig

Homicide Map (2019)

Map of Baltimore City showing the locations of all murders committed in 2019, as well as the murder weapon.

map = leaflet() %>%
  addTiles() %>%
  setView(lng = -76.617345, lat = 39.2991918, zoom = 11.5) %>%
  addCircles(
    lng = subset(df_homicide_2020, Weapon == "FIREARM")$Longitude,
    lat = subset(df_homicide_2020, Weapon == "FIREARM")$Latitude,
    opacity = 10,
    color = "red",
    popup = paste0("Firearm")) %>%
  addCircles(
    lng = subset(df_homicide_2020, Weapon == "KNIFE")$Longitude,
    lat = subset(df_homicide_2020, Weapon == "KNIFE")$Latitude,
    opacity = 10,
    color = "blue",
    popup = paste0("Knife")) %>%
  addCircles(
    lng = subset(df_homicide_2020, Weapon == "OTHER")$Longitude,
    lat = subset(df_homicide_2020, Weapon == "OTHER")$Latitude,
    opacity = 10,
    color = "green",
    popup = paste0("Other")) 
map

Data Source

Sourced from Baltimore City Police Department:
https://data.baltimorecity.gov/datasets/baltimore::part-1-crime-data-/explore?location=39.300286%2C-76.618650%2C13.51”