This short R “notebook” walks through some of the major characteristics presents in Washington, DC’s Sex Offender Registry. This analysis includes a short overview of the dataset. Two map graphics that show the distribution of sex offenders across the city and a density map that shows where there are clusters of sex offenders. Finally, the project concludes with a comparison analysis that determines which zipcode has the most sex offenders living in it.
#These commands import the packages needed for this analysis.
library(tidyverse) #Data conditioning and plotting
library(here) #Data directory management
library(janitor) #Data conditioning
library(rgdal) #Work with shapefiles
Shapefiles can be difficult to work with within ggplot. As a result the shapefile data are read into the analysis using the RGDAL package. When imported using this method the data are imported as a Large SpatialPolygonsDataFrame compared to a non-spatial R data frame if a different library like SF were used.
so_data <- read_csv(here("data", "Sex_Offender_Registry.csv"))
#Importing Zipcode Shapefile using RGDAL ----
shp <- readOGR(here("data", "Zip_Codes", "Zip_Codes.shp"))
## OGR data source with driver: ESRI Shapefile
## Source: "C:\PSU\Geog588\R_Projects\RYouWithMe\Data\Zip_Codes\Zip_Codes.shp", layer: "Zip_Codes"
## with 172 features
## It has 15 fields
#Conditioning and Filtering data ----
clean_so <- so_data %>%
clean_names() %>%
select(firstname, lastname, type, zipcode, latitude, longitude) %>%
filter(type == "HOME")
clean_so$zipcode <- as.factor(clean_so$zipcode)
clean_so %>%
filter(zipcode != "200322131") %>%
ggplot(aes(x=longitude, y = latitude, color = zipcode)) +
geom_point()+
geom_polygon(data=shp, aes(x=long, y=lat, group=group), color="grey", fill=NA, )+
theme_update(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust=0.5),
plot.caption = element_text(hjust=0),
legend.position="right",
legend.title=element_text(face="bold"),
legend.title.align=0.5)+
labs(title="Sex Offender Home Locations by Zipcode",
subtitle = "Washington, DC",
caption = "Sex Offender Registry: https://arcg.is/0rDa0O0 \nZipcode Data: https://arcg.is/0eLiiS",
x = "Longitude",
y= "Latitude")
#PLOT 2 ----
#Graphing the total number of sex offenders by zipcode
byZipcode <- clean_so
byZipcode$count <- 1
byZipcode <- byZipcode %>%
group_by(zipcode)%>%
summarise(num_offenders = sum(count))
byZipcode %>%
filter(zipcode != "200322131") %>%
ggplot(aes(y=num_offenders, x=zipcode, fill=num_offenders))+
geom_bar(stat='identity', show.legend = FALSE)+
geom_text(aes(label=num_offenders), hjust=-0.25, color="black", size=3.5)+
scale_fill_gradient(low="blue", high='red')+
coord_flip()+
theme_update(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust=0.5),
plot.caption = element_text(hjust=0))+
labs(title="Sum of Sex Offenders by Zipcode",
subtitle = "Washington, DC",
caption = "Sex Offender Registry: https://arcg.is/0rDa0O0 \nZipcode Data: https://arcg.is/0eLiiS",
x="Zipcode",
y= "Number of Sex Offenders")
#Filtering out zipcodes and focusing on the top zipcodes
Zip_Year <- so_data %>%
clean_names() %>%
filter(zipcode != "200322131", zipcode == c("20019", "20020", "20032", "20002")) %>%
select(zipcode, registrationdate)%>%
separate(registrationdate, c("year", "month", "day"), remove = FALSE)
#Adding a Column with a value of 1
Zip_Year$number <- 1
#Grouping the data by year and zipcode
Zip_Year <- Zip_Year %>%
group_by(year, zipcode) %>%
summarize(totalnum = sum(number))
#Plotting
Zip_Year %>%
ggplot(aes(x=year, y=totalnum))+
geom_col()+
facet_wrap(~zipcode)+
coord_flip()+
theme_update(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust=0.5),
plot.caption = element_text(hjust=0))+
labs(title="Comparing Sex Offender Registrations Across the Highest Zipcodes",
subtitle = "Washington, DC",
caption = "Sex Offender Registry: https://arcg.is/0rDa0O0 \nZipcode Data: https://arcg.is/0eLiiS",
x="Year",
y= "Number of Sex Offenders Registered")
plot4 <- clean_so %>%
filter(zipcode != "200322131") %>%
ggplot(aes(x=longitude, y = latitude)) +
geom_density_2d_filled(alpha = 0.4) +
scale_fill_brewer("Density", type='seq', palette = 14)+
geom_point(size=0.25, alpha=0.2)+
geom_polygon(data=shp, aes(x=long, y=lat, group=group), color="black", fill=NA, linewidth=0.15)+
theme_update(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust=0.5),
plot.caption = element_text(hjust=0))+
labs(title="Density of Registered Sex Offenders",
subtitle = "Washington, DC",
caption = "Sex Offender Registry: https://arcg.is/0rDa0O0 \nZipcode Data: https://arcg.is/0eLiiS")
plot4
ggsave(here("data", "Density of Registered Sex Offenders.jpeg"))