Explore US state crime estimates from the the US DOJ, FBI Uniform Crime Reporting (UCR) Program using the Crime Data Explorer (CDE) web interface. Plot property crime and violent crime for the nation and by state. Use motion charts to allow dynamic visualization of variables changing over time.

Data Sources

Link to the FBI Crime Data Explorer (CDE):

Data are not entirely comparable

  • Changes in reporting from year to year, etc., make comparisons problematic.

Caution against ranking

The United States Department of Justice says:

“Many entities—news media, tourism agencies, and other groups with an interest in crime in our Nation—use figures from the Uniform Crime Reporting (UCR) Program to compile rankings of cities and counties. These rankings, however, are merely a quick choice made by the data user; they provide no insight into the many variables that mold the crime in a particular town, city, county, state, region, or other jurisdiction. Consequently, these rankings lead to simplistic and/or incomplete analyses that often create misleading perceptions adversely affecting cities and counties, along with their residents.”

From: http://www.ucrdatatool.gov/ranking.cfm

Configure document rendering

Set knitr options for nicer rendering this document.

library(knitr)
opts_chunk$set(tidy = FALSE, cache = FALSE, error = TRUE)

Load packages

Load the R packages you will need, installing any that you are missing.

# Load packages
if (! suppressPackageStartupMessages(require(pacman))) {
  install.packages('pacman', repos = 'http://cran.us.r-project.org')
}
pacman::p_load(readr, stringr, tidyr, ggplot2, googleVis, httr, dplyr)

Data Management

Prepare the data folder

Create the data folder if it does not already exist.

# Create the data folder if needed.
datadir <- file.path("data", "doj_ucr")
dir.create(datadir, showWarnings = FALSE, recursive = TRUE)

Load the crime data

If you have the tidied data file, use that, or if not, use the original data file. Failing that, download the data from the DOJ and tidy the data.

# Load the crime data into a single data frame.
datafile <- file.path(datadir, "CrimeStatebyState.csv")
if (file.exists(datafile) == TRUE) {
  # Read the data file into a data frame.
  us.crime <- read_csv(datafile)
} else {
  # Download the data file.
  csv.url <- paste("http://s3-us-gov-west-1.amazonaws.com",
                   "cg-d4b776d0-d898-4153-90c8-8336f86bdfec",
                   "estimated_crimes_1995_2018.csv", sep = "/")
  res <- GET(csv.url, write_disk(datafile, overwrite = TRUE))
  
  # Read the data file.
  us.crime <- read_csv(datafile)
}

Find the timespan in years

Find minimum and maximum years for dataset timespan. Use this for plot titles.

states <- tibble(state = state.name, state_abbr = state.abb)
us.crime <- us.crime %>% left_join(states, by = c('state_abbr')) %>% 
  mutate(state = ifelse(state_abbr == "DC", "District of Columbia", state)) %>% 
  mutate(state = ifelse(is.na(state_abbr) == TRUE, "United States-Total", state))
min.max.years <- us.crime %>% 
  filter(state != "United States-Total") %>% group_by(state) %>% 
    summarise(min.year = min(year), max.year = max(year))

min.states.year <- min(min.max.years$min.year)
max.states.year <- max(min.max.years$max.year)

max.us.year <- max(us.crime[us.crime$state == "United States-Total", "year"])
min.us.year <- min(us.crime[us.crime$state == "United States-Total", "year"])

Crime rate in the US

Prepare the data.

# Subset just the two US totals of major crime groups: violent and property.
crime.groups <- c("year", "violent_crime", "property_crime")
us.crime.groups <- us.crime[us.crime$state == "United States-Total", 
                            crime.groups]

# Remove "_crime" from variable names and capitalize the names.
names(us.crime.groups) <- str_to_title(gsub("_crime", "", names(us.crime.groups)))

# Transform data structure to "long" format for plotting.
us.crime.groups.long <- gather(us.crime.groups, Type, Rate, -Year)

Make a stacked bar plot.

# Make a bar plot of violent crime versus property crime in the US.
ggplot(us.crime.groups.long, aes(x = Year, y = Rate, fill = Type)) +
  geom_bar(stat = 'identity') + 
  ggtitle(paste("US Crime Rate per 100,000 People, ",
                min.us.year, "-", max.us.year, sep = ""))

Property crime rate in the US

Prepare the data.

# Subset the property crimes.
property <- c("year", "property_crime", "burglary", 
              "larceny", "motor_vehicle_theft", 
              "state", "population")
us.property.crime.by.state <- us.crime %>% select(property) %>% as_tibble()

# Rename crime names (capitalize them).
property <- c("Year", "Property", "Burglary", "Larceny", "Motor_Vehicle_Theft",
             "State", "Population")
names(us.property.crime.by.state) <- property

# Subset to only see US totals not by-state values.
us.property.crime <- us.property.crime.by.state %>%
  filter(State == "United States-Total")

# Transform data structure to "long" form for plotting.
us.property.crime.long <- gather(us.property.crime.by.state, Type, Rate, 
                                 Burglary, Larceny, Motor_Vehicle_Theft)

Make a stacked bar plot.

# Make a bar plot of property crime in the US by crime.
ggplot(us.property.crime.long, aes(x = Year, y = Rate, fill = Type)) +
  geom_bar(stat = 'identity') + 
  ggtitle(paste("US Property Crime Rate per 100,000 People, ", 
                min.us.year, "-", max.us.year, sep = ""))

Violent crime rate in the US

Prepare the data.

# Subset the violent crimes.
violent <- c("year", "homicide", "rape_legacy", "rape_revised", "robbery", 
             "aggravated_assault", "violent_crime", "state", "population")
us.violent.crime.by.state <- us.crime %>% select(violent) %>% as_tibble()

# Rename and capitalize variable names.
violent <- c("Year", "Murder", "Rape_Legacy", "Rape_Revised", "Robbery", 
             "Assault", "Violent", "State", "Population")
names(us.violent.crime.by.state) <- violent

# Combine Rape variables into a single variable, preferring Rape_Revised.
us.violent.crime.by.state <- us.violent.crime.by.state %>% 
  mutate(Rape = ifelse(is.na(Rape_Revised), Rape_Legacy, Rape_Revised))

# Subset to only see US totals not by-state values.
us.violent.crime <- us.violent.crime.by.state %>% 
  filter(State == "United States-Total")

# Transform data structure to "long" form for plotting.
us.violent.crime.long <- us.violent.crime.by.state %>% 
           gather(Type, Rate, Murder, Rape, Robbery, Assault)

Make a stacked bar plot.

# Make a bar plot of violent crime in the US by crime.
ggplot(us.violent.crime.long, aes(x = Year, y = Rate, fill = Type)) +
  geom_bar(stat = 'identity') + 
  ggtitle(paste("US Violent Crime Rate per 100,000 People, ", 
                min.us.year, "-", max.us.year, sep = ""))

US property crime by state

Make a “Motion Chart” of violent crime in the US by state and crime.

# Configure googleVis options.
op <- options(gvis.plot.tag = 'chart')

Subset the data and create a plot title variable.

# Subset to only see US states and not the national totals.
us.property.crime.by.state <- us.property.crime.by.state %>% 
  filter(State != "United States-Total") %>%  
  distinct(State, Year, .keep_all = TRUE) %>% as_tibble()

# Make a plot title to use in next heading.
plot.title <- paste("US Property Crime Rate by State per 100,000 People, ", 
  min.states.year, "-", max.states.year, sep = "")

US Property Crime Rate by State per 100,000 People

plot(gvisMotionChart(data = us.property.crime.by.state, idvar = "State", 
                     timevar = "Year", colorvar = "Property",
                     xvar = "Motor_Vehicle_Theft", yvar = "Burglary", 
                     sizevar = "Population",
                     options = list(title = plot.title)))

US violent crime by state

Make a “Motion Chart” of violent crime in the US by state and crime.

First, subset the data and create a plot title variable.

# Subset to only see US states and not the national totals.
us.violent.crime.by.state <- us.violent.crime.by.state %>% 
  filter(State != "United States-Total") %>% 
  distinct(State, Year, .keep_all = TRUE) %>% as_tibble()

# Make a plot title to use in next heading.
plot.title <- paste("US Violent Crime Rate by State per 100,000 People, ", 
                    min.states.year, "-", max.states.year, sep = "")

US Violent Crime Rate by State per 100,000 People

plot(gvisMotionChart(data = us.violent.crime.by.state, idvar = "State", 
                     timevar = "Year", colorvar = "Violent",
                     xvar = "Rape", yvar = "Murder", sizevar = "Population",
                     options = list(title = plot.title)))