The following graphs present an analysis of Baltimore City crime data spanning from 2016 to 2024. The data reveals significant trends and patterns in crime occurrences, types, and demographics.
# R libraries used:
library(httr)
library(data.table)
library(ggplot2)
library(lubridate)
library(dplyr)
library(plyr)
library(scales)
library(ggthemes)
library(RColorBrewer)
library(ggrepel)
library(leaflet)
library(leaflet.extras)
library(plotly)
#Downloading and reading data
setwd("~/Master Degree/Data Visualization Decs Makin (DS-736-W01, GB-736-W01)/R")
fileURL <- "https://www.dropbox.com/scl/fo/va3maecms7cvdnq3ho4b9/ALyMQyLDHbMksLbhuYmX24Y/Baltimore%20Crime/Part1_Crime_Beta_5960161298247612570.csv?rlkey=wfotyhfgb6kutv8jgh028vwxu&dl=1"
response <- GET(url = fileURL, write_disk("Baltimore_City_Crime.csv", overwrite = TRUE))
filename <- "Baltimore_City_Crime.csv"
df <- fread(filename, fill = TRUE)
The visualizations presented here focus on crime events in Baltimore City from 2016 to 2024.
#Filtering and modifing main dataframe
df$CrimeDateTime <- year(mdy_hms(df$CrimeDateTime))
yearFilter <- seq(from=2016, to=2024, by=1)
newdf <- df %>%
filter(CrimeDateTime %in% yearFilter) %>%
data.frame()
This histogram visualizes Baltimore City crime trends from 2016 to 2024. Here’s a summary:
Possible Explanations for the 2020 Dip: It’s worth noting that 2020 was the year of the initial COVID-19 pandemic and lockdowns, which may have influenced the significant decrease in crime.
P1 <- ggplot(newdf, aes(x= CrimeDateTime)) +
geom_histogram(bins= length(unique(newdf$CrimeDateTime)),
color = "darkblue",
fill= "lightblue") +
labs(title = "Histogram Baltimore City Crimes Trends (2016-2024)",
x="Year",
y="Count of Crimes") +
scale_y_continuous(label=comma, breaks = seq(0, max(count(newdf$CrimeDateTime)), by = 10000)) +
stat_bin(binwidth = 1, geom='text', color='black',
aes(label=comma(after_stat(count))),
vjust=-0.4,
size= 3) +
scale_x_continuous(labels = yearFilter,
breaks = yearFilter) +
theme_light() +
annotate(
"text", x = 2020, y = 40000,
label = "Lowest Crime",
color = "red",
size = 2,
fontface = "bold") +
annotate(
"text", x = 2017, y = 56000,
label = "Highest Crime",
color = "red",
size = 2,
fontface = "bold")
P1
This stacked bar chart displays Baltimore City crime counts by crime type and year from 2016 to 2024.
Most Frequent Crimes:
Trends Over Time:
stackdf <- newdf %>%
select(Description, CrimeDateTime) %>%
mutate(Description = case_when(
Description %in% c("LARCENY FROM AUTO") ~ "LARCENY AUTO",
Description %in% c("ROBBERY - CARJACKING") ~ "ROBBERY - CARJACK",
Description %in% c("ROBBERY - COMMERCIAL") ~ "ROBBERY - COMM",
TRUE ~ Description )) %>%
group_by(Description,CrimeDateTime) %>%
dplyr::summarise(n = length(CrimeDateTime), .groups = 'keep') %>%
data.frame()
stackdf$CrimeDateTime <- as.factor(stackdf$CrimeDateTime)
agg_tot <- stackdf %>%
select(Description,n) %>%
group_by(Description) %>%
dplyr::summarise(tot = sum(n), .groups = 'keep') %>%
data.frame()
max_y <- round_any(max(agg_tot$tot), 2000, ceiling) + 10000
P2 <- ggplot(stackdf, aes(x= reorder(Description, n, sum),
y = n,
fill = CrimeDateTime)) +
geom_bar(stat = "identity",
position = position_stack(reverse = TRUE)) +
coord_flip() +
labs(title = "Baltimore City Crimes by Crime Type and Years (2016-2024)",
x = "",
y = "Crime Count",
fill = "Year") +
theme_light() +
theme(plot.title = element_text(hjust = 0.5),
axis.text.y = element_text(size = 8)) +
scale_fill_brewer(palette = "Paired",
guide = guide_legend(reverse = TRUE)) +
geom_text(data = agg_tot,
aes(x = Description,
y = tot,
label = sprintf("%.2fk", tot/1000),,
fill = NULL),
hjust = -0.1, size = 3) +
scale_y_continuous(breaks = seq(0, max_y, by = 10000 ),
limits = c(0, max_y),
labels = label_number(suffix = "k", scale = 1e-3))
P2
This line chart illustrates trends in violent crimes in Baltimore from 2016 to 2024. Here are some key observations:
Overall Trend:
Specific Crime Trends:
vnewdf <- newdf
vnewdf$Description <- ifelse(grepl("^ROBBERY", vnewdf$Description), "ROBBERY", vnewdf$Description)
detach("package:plyr", unload = TRUE)
vstackdf <- vnewdf %>%
select(Description, CrimeDateTime) %>%
group_by(Description,CrimeDateTime) %>%
dplyr::summarise(n = length(CrimeDateTime), .groups = 'keep') %>%
data.frame()
ViolentCrimes <- c("HOMICIDE","RAPE","AGG. ASSAULT", "ROBBERY")
violentdf <- vstackdf %>%
filter(Description %in% ViolentCrimes) %>%
data.frame()
x_axis_labels = seq(2016,2024)
P3 <-ggplot(violentdf, aes(x = CrimeDateTime, y = n, group=Description)) +
geom_line(aes(color= Description), size=2) +
labs(title = "Baltimore's Violent Crime Trends: A Nine-Year Overview (2016-2024)",
x = "Year",
y = "Crime Count") +
theme_light() +
theme(plot.title = element_text(hjust = .5)) +
scale_x_continuous(labels = x_axis_labels,
breaks = x_axis_labels,
minor_breaks = NULL) +
geom_point(shape = 21,
size = 3,
color = "black",
fill = "white" ) +
scale_color_brewer(palette = "Paired", name = "Violent Crime") +
scale_y_continuous(label=comma,
breaks = seq(0, 7000, by = 500 )) +
geom_text(
data = violentdf %>% group_by(Description) %>% filter(n == max(n)),
aes(label = n),
vjust = -1.5,
color = "black",
size = 2
) +
geom_text(
data = violentdf %>% group_by(Description) %>% filter(n == min(n)),
aes(label = n),
vjust = 2.5,
color = "black",
size = 2
)
P3
This set of pie charts provides a visual breakdown of the types of weapons used in Baltimore City crimes from 2016 to 2024.
Key Observations:
weaponfilter <- c(NA,"Y","","UNKNOWN","OTHER")
weapon_counts <- newdf %>%
select(CrimeDateTime,Weapon) %>%
filter(!Weapon %in% weaponfilter) %>%
mutate(Weapon = case_when(
Weapon %in% c("KNIFE_CUTTING_INSTRUMENT", "KNIFE_CUTTIN") ~ "KNIFE",
Weapon %in% c("PERSONAL_WEA") ~ "PERSONAL_WEAPONS",
Weapon %in% c("HANDGUN", "RIFLE", "SHOTGUN", "AUTOMATIC_HA",
"AUTOMATIC_FI", "AUTOMATIC_RI", "OTHER_FIREARM", "OTHER_FIREAR",
"AUTOMATIC_FIREARM",
"AUTOMATIC_SH") ~ "FIREARM",
Weapon %in% c("FIRE_INCENDI", "FIRE_INCENDIARY_DEVICE") ~ "FIRE",
Weapon %in% c("MOTOR_VEHICLE_VESSEL", "MOTOR_VEHICL") ~ "MOTOR_VEHICLE",
Weapon %in% c("DRUGS_NARCOTICS_SLEEPING_PILLS",
"DRUGS_NARCOT") ~ "DRUGS_NARCOTICS",
TRUE ~ Weapon
)) %>%
group_by(CrimeDateTime,Weapon) %>%
dplyr::summarise(Total_Incidents = n(), .groups = 'keep') %>%
arrange(CrimeDateTime, desc(Total_Incidents)) %>%
group_by(CrimeDateTime) %>%
mutate(Percent = round(100 * Total_Incidents / sum(Total_Incidents),1)) %>%
slice_head(n = 10)
weapon_counts$CrimeDateTime <- as.factor(weapon_counts$CrimeDateTime)
P4 <- ggplot(data = weapon_counts, aes(x = "", y = Total_Incidents, fill = Weapon)) +
geom_bar(stat = "identity", position = "fill") +
coord_polar(theta = "y", start = 0) +
labs(fill = "Weapons",
x = NULL,
y = NULL,
title = "Tracking Weapon Usage in Baltimore City Crimes Over Time",
caption = "Weapons that were not clasified or Unknown have been removed") +
theme_light() +
scale_fill_brewer(palette = "Paired") +
theme(plot.title = element_text(hjust = .5),
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank()) +
facet_wrap(~CrimeDateTime, ncol = 3, nrow = 3) +
geom_text(aes(x=1.7,
label = ifelse(Percent > 5,paste0(Percent, "%"),"")),
size = 2,
position = position_fill(vjust = .5))
P4
This pie chart presents a breakdown of crimes in Baltimore City by the race and gender of the individuals involved.
Key Observations:
Gendf <- newdf %>%
select(Gender) %>%
mutate(
Gender = case_when(
Gender %in% c("",NA,"U") ~ "Unknown", TRUE ~ Gender )) %>%
mutate(
Gender = case_when(
Gender %in% c("M") ~ "Male", TRUE ~ Gender )) %>%
mutate(
Gender = case_when(
Gender %in% c("F") ~ "Female", TRUE ~ Gender )) %>%
group_by(Gender) %>%
dplyr::summarise(Total_Incidents = n(), .groups = 'keep') %>%
data.frame()
Racedf <- newdf %>%
select(Race) %>%
mutate(
Race = case_when(
Race %in% c("","UNKNOWN",NA) ~ "Unknown", TRUE ~ Race )) %>%
mutate(
Race = case_when(
Race %in% c("BLACK_OR_AFRICAN_AMERICAN") ~ "Black", TRUE ~ Race )) %>%
mutate(
Race = case_when(
Race %in% c("NATIVE_HAWAIIAN_OR_OTHER_PACIFIC_ISLANDER") ~ "Pacific Islander", TRUE ~ Race )) %>%
mutate(
Race = case_when(
Race %in% c("AMERICAN_INDIAN_OR_ALASKA_NATIVE") ~ "Native American", TRUE ~ Race )) %>%
mutate(
Race = case_when(
Race %in% c("WHITE") ~ "White", TRUE ~ Race )) %>%
mutate(
Race = case_when(
Race %in% c("ASIAN") ~ "Asian", TRUE ~ Race )) %>%
group_by(Race) %>%
dplyr::summarise(Total_Incidents = n(), .groups = 'keep') %>%
data.frame()
P5 <- plot_ly(Racedf) %>%
add_trace(labels = ~Race, values = ~Total_Incidents,
type = "pie", hole = 0.7,
textinfo = "label+percent",
textposition = "outside",
insidetextorientation = "radial",
marker = list(colors = c("#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b")),
hoverinfo = "label+percent") %>%
add_trace(data = Gendf,
labels = ~Gender, values = ~Total_Incidents,
type = "pie",
textinfo = "label+percent",
textposition = "inside",
insidetextorientation = "radial",
domain = list(x = c(0.16, 0.84), y = c(0.16, 0.84)),
marker = list(colors = c("#e377c2", "#7f7f7f", "#bcbd22")),
hoverinfo = "label+percent") %>%
layout(title = "Crimes by Race and Gender", showlegend = FALSE)
P5
This heatmap visualizes the concentration of violent crimes in Baltimore City. The map uses color intensity to represent the density of crimes, with warmer colors (red, orange) indicating higher concentrations and cooler colors (blue, green) indicating lower concentrations.
Key Observations:
violnewdf <- newdf
violnewdf$Description <- ifelse(grepl("^ROBBERY", vnewdf$Description), "ROBBERY", vnewdf$Description)
crimeFilter <- c("AGG. ASSAULT","ROBBERY")
LocationFilter <- c(NA,0)
violnewdf <- violnewdf %>%
filter(Description %in% crimeFilter) %>%
filter(!Latitude %in% LocationFilter, Latitude >= 0 ) %>%
select(Description, CrimeDateTime, Latitude, Longitude, Neighborhood) %>%
group_by(Description,Neighborhood,CrimeDateTime,Latitude, Longitude) %>%
dplyr::summarise(n = length(CrimeDateTime), .groups = 'keep') %>%
data.frame()
m1 <- leaflet() %>%
addTiles() %>%
addHeatmap(
lng = violnewdf$Longitude,
lat = violnewdf$Latitude,
data = violnewdf,
intensity = violnewdf$n,
blur = 20,
max = 10,
radius = 15
) %>%
addMarkers(lng = violnewdf$Longitude,
lat = violnewdf$Latitude,
popup = paste(
"Description:", violnewdf$Description, "<br>",
"Neighborhood:", violnewdf$Neighborhood, "<br>",
"Date:", violnewdf$CrimeDateTime, "<br>",
"Count:", violnewdf$n),
clusterOptions = markerClusterOptions()
) %>%
setView(lng = -76.6122, lat = 39.2904, zoom = 12) %>%
addControl(
html = "<h4 style='font-size: 12px; padding: 3px 8px; margin: 3px;'>Violent Crime Heatmap in Baltimore City</h4>",
position = "topleft"
) %>%
addControl(
html = "<p style='font-size: 10px; padding: 3px 8px; margin: 3px;'>Violent Crimes Defined as: Homicide, Rape, Agg. Assault and Robbery</p>",
position = "bottomleft"
)
m1