library(tidyverse)
library(readr)
library(lubridate)
library(data.table)
library(dplyr)
library(leaflet)
library(geojsonio)
library(jsonlite)
library(raster)
library(sf)
library(geojsonsf)
library(ggplot2)
library(viridis) # colour blind friendly palette, works in B&W also
library(ggExtra) # because remembering ggplot theme options is beyond me
library(tidyr)
Leaflet map displays each SDPD beat and the total number of calls for service from the public for each year from 2016 to 2022. Data are directly loaded from the San Diego Open Data portal into the RMD doc where it is cleaned and the map generated.
# import multple datasets and join together before eda
pd_2022 <- read_csv("https://seshat.datasd.org/pd/pd_calls_for_service_2022_datasd.csv",
col_types = cols(incident_num = col_character(),
date_time = col_datetime(format = "%Y-%m-%d %H:%M:%S"),
day_of_week = col_integer(), address_number_primary = col_integer(),
address_dir_primary = col_character(),
address_road_primary = col_character(),
address_sfx_primary = col_character(),
address_dir_intersecting = col_character(),
address_road_intersecting = col_character(),
address_sfx_intersecting = col_character(),
call_type = col_character(), disposition = col_character(),
beat = col_integer(), priority = col_integer()))
pd_2021 <- read_csv("https://seshat.datasd.org/pd/pd_calls_for_service_2021_datasd.csv",
col_types = cols(incident_num = col_character(),
date_time = col_datetime(format = "%Y-%m-%d %H:%M:%S"),
day_of_week = col_integer(), address_number_primary = col_integer(),
address_dir_primary = col_character(),
address_road_primary = col_character(),
address_sfx_primary = col_character(),
address_dir_intersecting = col_character(),
address_road_intersecting = col_character(),
address_sfx_intersecting = col_character(),
call_type = col_character(), disposition = col_character(),
beat = col_integer(), priority = col_integer()))
pd_2020 <- read_csv("https://seshat.datasd.org/pd/pd_calls_for_service_2020_datasd.csv",
col_types = cols(incident_num = col_character(),
date_time = col_datetime(format = "%Y-%m-%d %H:%M:%S"),
day_of_week = col_integer(), address_number_primary = col_integer(),
address_dir_primary = col_character(),
address_road_primary = col_character(),
address_sfx_primary = col_character(),
address_dir_intersecting = col_character(),
address_road_intersecting = col_character(),
address_sfx_intersecting = col_character(),
call_type = col_character(), disposition = col_character(),
beat = col_integer(), priority = col_integer()))
pd_2019 <- read_csv("https://seshat.datasd.org/pd/pd_calls_for_service_2019_datasd.csv",
col_types = cols(incident_num = col_character(),
date_time = col_datetime(format = "%Y-%m-%d %H:%M:%S"),
day_of_week = col_integer(), address_number_primary = col_integer(),
address_dir_primary = col_character(),
address_road_primary = col_character(),
address_sfx_primary = col_character(),
address_dir_intersecting = col_character(),
address_road_intersecting = col_character(),
address_sfx_intersecting = col_character(),
call_type = col_character(), disposition = col_character(),
beat = col_integer(), priority = col_integer()))
pd_2018 <- read_csv("https://seshat.datasd.org/pd/pd_calls_for_service_2018_datasd.csv",
col_types = cols(incident_num = col_character(),
date_time = col_datetime(format = "%Y-%m-%d %H:%M:%S"),
day_of_week = col_integer(), address_number_primary = col_integer(),
address_dir_primary = col_character(),
address_road_primary = col_character(),
address_sfx_primary = col_character(),
address_dir_intersecting = col_character(),
address_road_intersecting = col_character(),
address_sfx_intersecting = col_character(),
call_type = col_character(), disposition = col_character(),
beat = col_integer(), priority = col_integer()))
pd_2017 <- read_csv("https://seshat.datasd.org/pd/pd_calls_for_service_2017_datasd_v1.csv",
col_types = cols(incident_num = col_character(),
date_time = col_datetime(format = "%Y-%m-%d %H:%M:%S"),
day_of_week = col_integer(), address_number_primary = col_integer(),
address_dir_primary = col_character(),
address_road_primary = col_character(),
address_sfx_primary = col_character(),
address_dir_intersecting = col_character(),
address_road_intersecting = col_character(),
address_sfx_intersecting = col_character(),
call_type = col_character(), disposition = col_character(),
beat = col_integer(), priority = col_integer()))
pd_2016 <- read_csv("https://seshat.datasd.org/pd/pd_calls_for_service_2016_datasd_v1.csv",
col_types = cols(incident_num = col_character(),
date_time = col_datetime(format = "%Y-%m-%d %H:%M:%S"),
day_of_week = col_integer(), address_number_primary = col_integer(),
address_dir_primary = col_character(),
address_road_primary = col_character(),
address_sfx_primary = col_character(),
address_dir_intersecting = col_character(),
address_road_intersecting = col_character(),
address_sfx_intersecting = col_character(),
call_type = col_character(), disposition = col_character(),
beat = col_integer(), priority = col_integer()))
pd_2015 <- read_csv("https://seshat.datasd.org/pd/pd_calls_for_service_2015_datasd_v1.csv",
col_types = cols(incident_num = col_character(),
date_time = col_datetime(format = "%Y-%m-%d %H:%M:%S"),
day_of_week = col_integer(), address_number_primary = col_integer(),
address_dir_primary = col_character(),
address_road_primary = col_character(),
address_sfx_primary = col_character(),
address_dir_intersecting = col_character(),
address_road_intersecting = col_character(),
address_sfx_intersecting = col_character(),
call_type = col_character(), disposition = col_character(),
beat = col_integer(), priority = col_integer()))
# load in geojson data as spacial data frame
geojson <- geojson_sf("https://seshat.datasd.org/sde/pd/pd_beats_datasd.geojson")
# working with the geojson beats data, let's aggregate crimes in each beat
# for the map display
crime_beat_sums_2022 <- pd_2022 %>%
group_by(beat) %>%
count(name = 'ct_2022') %>%
ungroup(beat) %>%
arrange(beat, desc(ct_2022), by_group = TRUE)
crime_beat_sums_2021 <- pd_2021 %>%
group_by(beat) %>%
count(name = 'ct_2021') %>%
ungroup(beat) %>%
arrange(beat, desc(ct_2021), by_group = TRUE)
crime_beat_sums_2020 <- pd_2020 %>%
group_by(beat) %>%
count(name = 'ct_2020') %>%
ungroup(beat) %>%
arrange(beat, desc(ct_2020), by_group = TRUE)
crime_beat_sums_2019 <- pd_2019 %>%
group_by(beat) %>%
count(name = 'ct_2019') %>%
ungroup(beat) %>%
arrange(beat, desc(ct_2019), by_group = TRUE)
crime_beat_sums_2018 <- pd_2018 %>%
group_by(beat) %>%
count(name = 'ct_2018') %>%
ungroup(beat) %>%
arrange(beat, desc(ct_2018), by_group = TRUE)
crime_beat_sums_2017 <- pd_2017 %>%
group_by(beat) %>%
count(name = 'ct_2017') %>%
ungroup(beat) %>%
arrange(beat, desc(ct_2017), by_group = TRUE)
crime_beat_sums_2016 <- pd_2016 %>%
group_by(beat) %>%
count(name = 'ct_2016') %>%
ungroup(beat) %>%
arrange(beat, desc(ct_2016), by_group = TRUE)
crime_beat_sums_2015 <- pd_2015 %>%
group_by(beat) %>%
count(name = 'ct_2015') %>%
ungroup(beat) %>%
arrange(beat, desc(ct_2015), by_group = TRUE)
# now merge the aggregated call data with the spacial data frame
m <- sp::merge(geojson,crime_beat_sums_2022, by = 'beat')
m <- sp::merge(m,crime_beat_sums_2021, by = 'beat')
m <- sp::merge(m,crime_beat_sums_2020, by = 'beat')
m <- sp::merge(m,crime_beat_sums_2019, by = 'beat')
m <- sp::merge(m,crime_beat_sums_2018, by = 'beat')
m <- sp::merge(m,crime_beat_sums_2017, by = 'beat')
m <- sp::merge(m,crime_beat_sums_2016, by = 'beat')
#m <- sp::merge(m,crime_beat_sums_2015, by = 'beat')
# create variable with lable information pulled from geojson data
mytext <- paste(
"Beat: ", m$name, "<br/>",
"Total Calls 2022: ", m$ct_2022,
"<br/> Total Calls 2021: ", m$ct_2021,
"<br/> Total Calls 2020: ", m$ct_2020,
"<br/> Total Calls 2019: ", m$ct_2019,
"<br/> Total Calls 2018: ", m$ct_2018 ,
"<br/> Total Calls 2017: ", m$ct_2017 ,
"<br/> Total Calls 2016: ", m$ct_2016 ,
sep = " "
) %>%
lapply(htmltools::HTML)
# finally we create the map using leaflet
sd_crime_map <- leaflet(m) %>%
addTiles() %>%
addPolygons(stroke = TRUE,
smoothFactor = 0.3,
label = mytext,
highlightOptions = highlightOptions(color = "white", weight = 2,
bringToFront = TRUE))
sd_crime_map