file = "R_datafiles//bpd_victims.csv"
library(data.table)
library(lubridate)
library(dplyr)
library(DescTools)
library(ggplot2)
library(scales)
library(RColorBrewer)
library(ggthemes)
library(ggrepel)
library(plotly)
library(leaflet)
df = fread(file)
x = mdy(df$CrimeDate) #Breaking out date into parts, weekends, quarter
df$year = year(x)
df$month = months(x, abbreviate = FALSE)
df$yday = yday(x)
df$day = day(x)
df$weekdays = weekdays(x,abbreviate = FALSE)
df$weekend = wday(x) %in% c(1, 7)
df$qtr = quarters(x)
xy = hms(df$CrimeTime) #Breaking out time into hours and minutes
df$hour = hour(xy)
df$minute = minute(xy)
crimecount = data.frame(count(df, Neighborhood))
crimecount = crimecount[order(crimecount$n, decreasing = TRUE),]
rownames(crimecount) = c(1:nrow(crimecount))
weapontype = data.frame(count(df,Weapon))
weapontype = weapontype[order(weapontype$n, decreasing = TRUE),]
otherrow = which(weapontype$Weapon %like any% c("%OTHER%")) #Combining Other and NA weapon type into 1 variables
narow = which(is.na(weapontype$Weapon))
otherrow_narow = c(otherrow, narow)
unknowntotal = sum(weapontype[otherrow_narow, 'n'])
weapontype2 = weapontype[-otherrow_narow,]
weapontype2 = rbind(c("Unknown", unknowntotal), weapontype2)
weapontype2$n = as.numeric(weapontype2$n)
rownames(weapontype2) = c(1:nrow(weapontype2))
This dataset contains data on major crimes committed in Baltimore City between the years of 2014 to November 2020. This is publicly available data provided by the Baltimore City Police Department.
There are 15 different types of crimes listed in this database for a total of 321,414 total records over the 7 years covered. Each crime has the weapon used (if known) as well as a time, location, and police district responsible for investigating.
Total number of major crimes reported each year
df_yearfilter = filter(df, year >= 2014 & year < 2021) #Removing years 1963 to 2013, this has very little random data in it
df_homicide = filter(df_yearfilter, Description == "HOMICIDE") #Creating new dataframe for only homicide
df_homicide_2020 = filter(df_homicide, year == 2020) #Creating new dataframe for only homicides in 2020
p1 = ggplot(df_yearfilter, aes(x=year)) +
geom_histogram(bins = 7, color="black", fill="lightblue")+
labs(title = "Crimes per Year (to November 2020)", x = "Year", y = "Count of Crimes")+
scale_y_continuous(labels=comma)+
stat_bin(binwidth=1, geom="text", color="black", aes(label=scales::comma(..count..)), vjust=-.48)+
theme_clean ()
x_axis_labels = min(df_yearfilter$year):max(df_yearfilter$year)
p1 = p1 + scale_x_continuous(labels = x_axis_labels, breaks = x_axis_labels)
p1
Top 10 neighborhoods with the most crimes committed between 2014 and 2020
ggplot(crimecount[1:10,], aes(x = reorder(Neighborhood, -n), y = n, fill = n)) +
geom_bar(colour="black", stat = "identity")+
scale_fill_gradient(low = "lightblue", high = "blue")+
labs(title = "Baltimore Crimes by Neighborhood (Top 10)", x = "Neighborhood", y = "Crime Count", fill = "Count")+
theme(plot.title = element_text(hjust = .5))+
theme_clean ()
Type of weapon used in connection with crimes committed
ggplot(weapontype2[2:5,], aes(x = reorder(Weapon, n), y = n, fill = n)) +
geom_bar(colour="black", stat = "identity")+
scale_fill_gradient(low = "lightblue", high = "blue")+
labs(title = "Weapon Types Used (When Known)", x = "Weapon", y = "Count", fill = "Count")+
theme(plot.title = element_text(hjust = .5))+
theme_clean()
Breakdown the type of crimes committed, displayed per year
df_desc = count(df_yearfilter, Description)
df_desc = df_desc[order(df_desc$n, decreasing = TRUE),]
new_df = df_yearfilter %>%
select(year, Description) %>%
group_by(Description, year) %>%
summarise(n = length(Description), .groups = 'keep') %>%
data.frame()
agg_total = new_df %>%
select(Description, n) %>%
group_by(Description) %>%
summarise(tot = sum(n), .groups ="keep") %>%
data.frame()
new_df$year = as.factor(new_df$year)
ggplot(new_df, aes(x= reorder(Description, n, sum), y = n, fill = year)) +
geom_bar(stat = "identity", position = position_stack(reverse = TRUE)) + #moving 2020 to right side
coord_flip()+
labs(title = "Crime Count by Type", x = "", y = "Crime Count", fill = "Year")+
theme_clean ()+
theme(plot.title = element_text(hjust = .5))+
scale_fill_brewer(palette = "Set2", guide = guide_legend(reverse = TRUE))+
geom_text(data = agg_total, aes(x = Description, y = tot, label = scales::comma(tot), fill = NULL),hjust = -.1, size = 3.8)
Total number of crimes committed at each hour of the day. Visual starts at midnight and ends at 11 PM.
hours_df = df_yearfilter %>%
group_by(hour) %>%
summarise(n = length(year), .groups='keep')%>%
data.frame
hours_df = hours_df %>%
group_by(hour) %>%
filter(!is.na(hour))
hours_df$hour = as.integer(hours_df$hour)
x_axis_labels_line = 0:23
hi_lo = hours_df %>%
filter(!is.na(hour))%>%
filter(n == min(n) | n == max(n)) %>%
data.frame()
ggplot(hours_df, aes(x = hour, y = n))+
geom_line(color = 'lightblue', size = 1)+
geom_point(shape = 21, size = 3, color = 'blue', fill = 'white')+
labs(x = "Hour", y = "Crime Count", title = "Crimes by Hour")+
scale_y_continuous(labels=comma)+
theme_clean()+
theme(plot.title = element_text(hjust = .5))+
scale_x_continuous(labels = x_axis_labels_line, breaks = x_axis_labels_line, minor_breaks = NULL) +
geom_point(data = hi_lo, aes(x = hour, y = n), shape = 21, size = 3, fill = 'white', color = 'black')+
geom_label_repel(aes(label = ifelse(n == max(n) | n == min(n), scales::comma(n), "")),
box.padding = 1.8, point.padding = 1.5, size = 3,
color = 'Grey50', segment.color = 'lightblue')
Percentage of total crimes committed in each police district
district = count(df_yearfilter, District)
district = district[order(-n),]
district = district[1:9,]
district_df = df_yearfilter %>%
select(District, year) %>%
group_by(year, District) %>%
summarise(n=length(District), .groups = 'keep') %>%
group_by(year) %>%
mutate(percent_of_total = round(100*n/sum(n),1)) %>%
ungroup() %>%
data.frame()
#district_df[district_df$year == 2019,]
Trellis_pie = ggplot(data = district_df, aes(x = "", y = n, fill = District))+
geom_bar(stat = "identity", position = "fill")+
coord_polar(theta = "y", start = 0)+
labs(fill = "Districts", x = NULL, y = NULL, title = "Crimes by District")+
theme_clean()+
theme(plot.title = element_text(hjust = .5),
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank())+
facet_wrap(~year, ncol = 4, nrow = 2)+
scale_fill_brewer(palette = "Spectral")+
geom_text(aes(x = 1.8, label = paste0(percent_of_total, "%")),
size = 3,
position = position_fill(vjust = .5))
Trellis_pie
This nested pie chart displays the homicide numbers across each district for the years 2016 to 2019. When hovering over each slice of the pie additional information is shown about the underlying data.
homicide_district_df = df_homicide %>%
select(District, year) %>%
group_by(year, District) %>%
summarise(n=length(District), .groups = 'keep') %>%
group_by(year) %>%
mutate(percent_of_total = round(100*n/sum(n),1)) %>%
ungroup() %>%
data.frame()
fig = plot_ly(hole=0.8) %>%
layout(title = "Homicides by District (2016 - 2019)") %>%
add_trace(data = homicide_district_df[homicide_district_df$year == 2019,],
labels = ~District,
values = ~homicide_district_df[homicide_district_df$year == 2019, "n"],
type = "pie",
textposition = "inside",
hovertemplate = "Year: 2019<br>District: %{label}<br>Percent: %{percent}<br>Homicide Count: %{value}<extra></extra>") %>%
add_trace(data = homicide_district_df[homicide_district_df$year == 2018,],
labels = ~District,
values = ~homicide_district_df[homicide_district_df$year == 2018, "n"],
type = "pie",
textposition = "inside",
hovertemplate = "Year: 2018<br>District: %{label}<br>Percent: %{percent}<br>Homicide Count: %{value}<extra></extra>",
domain = list(
x = c(.11,.89),
y = c(.11,.89))) %>%
add_trace(data = homicide_district_df[homicide_district_df$year == 2017,],
labels = ~District,
values = ~homicide_district_df[homicide_district_df$year == 2017, "n"],
type = "pie",
textposition = "inside",
hovertemplate = "Year: 2017<br>District: %{label}<br>Percent: %{percent}<br>Homicide Count: %{value}<extra></extra>",
domain = list(
x = c(.20,.80),
y = c(.20,.80))) %>%
add_trace(data = homicide_district_df[homicide_district_df$year == 2016,],
labels = ~District,
values = ~homicide_district_df[homicide_district_df$year == 2016, "n"],
type = "pie",
textposition = "inside",
hovertemplate = "Year: 2016<br>District: %{label}<br>Percent: %{percent}<br>Homicide Count: %{value}<extra></extra>",
domain = list(
x = c(.27,.73),
y = c(.27,.73)))
fig
Map of Baltimore City showing the locations of all murders committed in 2019, as well as the murder weapon.
map = leaflet() %>%
addTiles() %>%
setView(lng = -76.617345, lat = 39.2991918, zoom = 11.5) %>%
addCircles(
lng = subset(df_homicide_2020, Weapon == "FIREARM")$Longitude,
lat = subset(df_homicide_2020, Weapon == "FIREARM")$Latitude,
opacity = 10,
color = "red",
popup = paste0("Firearm")) %>%
addCircles(
lng = subset(df_homicide_2020, Weapon == "KNIFE")$Longitude,
lat = subset(df_homicide_2020, Weapon == "KNIFE")$Latitude,
opacity = 10,
color = "blue",
popup = paste0("Knife")) %>%
addCircles(
lng = subset(df_homicide_2020, Weapon == "OTHER")$Longitude,
lat = subset(df_homicide_2020, Weapon == "OTHER")$Latitude,
opacity = 10,
color = "green",
popup = paste0("Other"))
map
Sourced from Baltimore City Police Department:
“https://data.baltimorecity.gov/datasets/baltimore::part-1-crime-data-/explore?location=39.300286%2C-76.618650%2C13.51”