This analysis attempts to visualize and contextualize spatial and temporal patterns in point data of crimes in St. Louis, Missouri. The analysis was conducted using a variety of packages in the R scripting language.
The St. Louis crime dataset contains 557 records which capture all reported homicides, arson, and DUIs. Each record also lists a year and month of the incident along with a handful of other fields. The data spans from August 2013 to August 2014.
This section of the report walks through results derived from analysis in R
library(leaflet)
library(tidyverse)
library(sf)
library(tidyr)
file_dir_crime <- "C:/temp/PSU_operational/GEOG586/Lesson2/Geog586_Les2_Project/crime/"
file_dir_gis <- "C:/temp/PSU_operational/GEOG586/Lesson2/Geog586_Les2_Project/gis/"
stl_crime <- read.csv("C:/temp/PSU_operational/GEOG586/Lesson2/Geog586_Les2_Project/crime/crimeStLouis20132014b.csv", header = TRUE, sep=",")
summary(stl_crime)
## recno crimetype xL yL
## Min. : 235 Length:557 Min. :-90.33 Min. :38.54
## 1st Qu.:14284 Class :character 1st Qu.:-90.26 1st Qu.:38.60
## Median :24229 Mode :character Median :-90.24 Median :38.65
## Mean :26208 Mean :-90.24 Mean :38.64
## 3rd Qu.:39623 3rd Qu.:-90.22 3rd Qu.:38.67
## Max. :52145 Max. :-90.18 Max. :38.76
## year month count codemonth
## Min. :2013 Min. : 2.000 Min. :1 Length:557
## 1st Qu.:2013 1st Qu.: 4.000 1st Qu.:1 Class :character
## Median :2014 Median : 8.000 Median :1 Mode :character
## Mean :2014 Mean : 6.996 Mean :1
## 3rd Qu.:2014 3rd Qu.: 9.000 3rd Qu.:1
## Max. :2014 Max. :12.000 Max. :1
## crimet district Neighborho
## Min. : 10000 Min. :0.000 Min. : 0.00
## 1st Qu.: 10000 1st Qu.:2.000 1st Qu.:18.00
## Median : 83000 Median :4.000 Median :50.00
## Mean :106830 Mean :4.129 Mean :42.53
## 3rd Qu.:211000 3rd Qu.:6.000 3rd Qu.:64.00
## Max. :212000 Max. :9.000 Max. :83.00
recorded_crimes <- unique(stl_crime$crimetype)
recorded_crimes
## [1] "homicide" "arson" "dui"
stl_crime <- stl_crime %>%
mutate(date = as.Date(paste(year, month, "01", sep = "-")))
monthly_counts <- stl_crime %>%
group_by(date) %>%
summarise(count = n())
complete_monthly_counts <- monthly_counts %>%
complete(date = seq.Date(min(date), max(date), by = "month")) %>%
replace_na(list(count = 0))
complete_monthly_counts$date <- factor(complete_monthly_counts$date, levels = complete_monthly_counts$date, labels = format(complete_monthly_counts$date, "%Y-%m"))
ggplot(complete_monthly_counts, aes(x = date, y = count)) +
geom_col() +
labs(title = "Figure 1: Number of crimes, per month",
x = "Month",
y = "Crimes") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
crime_counts <- stl_crime %>%
group_by(date, crimetype) %>%
summarize(count = n()) %>%
ungroup()
complete_monthly_counts <- crime_counts %>%
complete(date = seq.Date(min(date), max(date), by = "month"), crimetype) %>%
replace_na(list(count = 0))
complete_monthly_counts$date <- as.factor(format(complete_monthly_counts$date, "%Y-%m"))
ggplot(complete_monthly_counts, aes(x = date, y = count, color = crimetype, group = crimetype)) +
geom_line() +
labs(title = "Figure 2: Number of crimes, per month-year by crime type",
x = "Month",
y = "Count of Events",
color = "Crimes") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
monthly_counts <- stl_crime %>%
group_by(month = format(date, "%m"), crimetype) %>%
summarise(count = n(), .groups = 'drop') %>%
mutate(count = if_else(month == "08", count / 2, count))
monthly_counts$month <- factor(monthly_counts$month, levels = sprintf("%02d", 1:12), labels = month.abb)
ggplot(monthly_counts, aes(x = month, y = count, color = crimetype, group = crimetype)) +
geom_line() +
labs(title = "Figure 3: Number of crimes, per month by crime type",
x = "Month",
y = "Crimes",
color = "Crime Type") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
stl_boundary <- st_read(paste(file_dir_gis, "stl_boundary_ll.shp", sep = "")) %>%
st_transform(crs = 4326) %>%
st_make_valid()
## Reading layer `stl_boundary_ll' from data source
## `C:\temp\PSU_operational\GEOG586\Lesson2\Geog586_Les2_Project\gis\stl_boundary_ll.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 4 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -90.32052 ymin: 38.532 xmax: -90.16631 ymax: 38.77435
## Geodetic CRS: WGS 84
geocoded_data_sf <- st_as_sf(stl_crime, coords = c("xL", "yL"), crs = 4326)
homicide <- filter(geocoded_data_sf, crimetype == "homicide")
arson <- filter(geocoded_data_sf, crimetype == "arson")
dui <- filter(geocoded_data_sf, crimetype == "dui")
leaflet() %>%
addTiles() %>%
addPolygons(data = stl_boundary, color = "black", fillColor = "transparent") %>%
addCircleMarkers(data = homicide, group = "Homicide",
radius = 5, color = "red", fillOpacity = 0.5,
popup = ~paste("Month: ", geocoded_data_sf$month, "<br>",
"Year: ", geocoded_data_sf$year, "<br>",
"District: ", geocoded_data_sf$district, "<br>",
"Neighborhood: ", geocoded_data_sf$Neighborho)) %>%
addCircleMarkers(data = arson, group = "Arson",
radius = 5, color = "blue", fillOpacity = 0.5,
popup = ~paste("Month: ", geocoded_data_sf$month, "<br>",
"Year: ", geocoded_data_sf$year, "<br>",
"District: ", geocoded_data_sf$district, "<br>",
"Neighborhood: ", geocoded_data_sf$Neighborho)) %>%
addCircleMarkers(data = dui, group = "DUI",
radius = 5, color = "green", fillOpacity = 0.5,
popup = ~paste("Month: ", geocoded_data_sf$month, "<br>",
"Year: ", geocoded_data_sf$year, "<br>",
"District: ", geocoded_data_sf$district, "<br>",
"Neighborhood: ", geocoded_data_sf$Neighborho)) %>%
# Add layer control
addLayersControl(overlayGroups = c("Homicide", "Arson", "DUI"),
options = layersControlOptions(collapsed = FALSE)) %>%
addLegend(position = "bottomright", # Adjust position as needed
colors = c("black", "red", "blue", "green"),
labels = c("City boundary", "Homicide", "Arson", "DUI"),
opacity = 1)
Beyond what was covered in this brief report, additional insights could be gleaned by:
neighborhood_counts <- stl_crime %>%
group_by(Neighborho) %>%
summarise(count = n()) %>%
ungroup()
avg_crime <- sum(neighborhood_counts$count)/88
avg_crime
## [1] 6.329545
This analysis showed distinct spatial and temporal patterns in St. Louis’ crime data for the included years.