Jennifer Ganeles
2/10/19

Violent and Property Crime Rates Across U.S. Regions (2014)

This week’s homework explores reported crime rates for all 50 U.S. states (plus District of Columbia) using the 2014 UCR Crime Dataset from Social Explorer. Data was used to see how states, categorized by region, compare in terms of both violent and property crime rates. Violent crimes include reported murders, rapes, robberies, and aggravated assaults. Property crimes include reported burglaries, larcenies, and motor vehicle thefts. Regions were divided geographically into five groups: (1) West, (2) Midwest, (3) Northeast, (4) Southwest, and (5) Southeast.

Importing the Data

library(readr)
crimedata<-read_csv("/Users/Jennifer/Downloads/R12018303_SL040.csv")

Data Preview

head(crimedata)

Renaming Variables

library(dplyr)
crimedata1<-rename(crimedata, State=Geo_Name, 
                   Total_Pop=SE_T001_001,
                   Total_VP_Crimes=SE_T002_001,
                   Total_V_Crimes=SE_T002_002,
                   Total_P_Crimes=SE_T002_003)
names(crimedata1)
 [1] "Geo_FIPS"        "State"           "Geo_QName"       "Total_Pop"       "Total_VP_Crimes"
 [6] "Total_V_Crimes"  "Total_P_Crimes"  "SE_T003_001"     "SE_T003_002"     "SE_T003_003"    
[11] "SE_T004_001"     "SE_T004_002"     "SE_T004_003"     "SE_T004_004"     "SE_T004_005"    
[16] "SE_T006_001"     "SE_T006_002"     "SE_T006_003"     "SE_T006_004"    

Recoding Existing Variables

State was recoded into five categories under new variable Region:


1.West: Washington, Utah, Oregon, Nevada, Montana, Idaho, Colorado, California
2.Midwest: Wisconsin, South Dakota, Ohio, North Dakota, Nebraska, Missouri, Minnesota, Michigan, Kansas, Iowa, Indiana, Illinois
3.Northeast: Vermont, Rhode Island, Pennsylvania, New York, New Jersey, New Hampshire, Massachussets, Maine, Connecticut
4.Southwest: Texas, Oklahoma, New Mexico, Arizona
5.Southeast: West Virginia, Virginia, Tennessee, South Carolina, Mississippi, Maryland, North Carolina, Lousiana, Kentucky, Georgia, Florida, Delaware, D.C., Arkansas, Alabama

crimedata2<-mutate(crimedata1, Region=State)
crimedata2$Region<-recode(crimedata2$Region, 'Washington'=1,'Montana'=1,'Oregon'=1,'Idaho'=1,'California'=1,
         'Nevada'=1,'Utah'=1,'Colorado'=1,'Wyoming'=1, 'Alaska'=1, 'Hawaii'=1,
         'North Dakota'=2, 'South Dakota'=2,'Nebraska'=2,'Kansas'=2, 'Missouri'=2, 'Iowa'=2, 'Minnesota'=2, 'Wisconsin'=2, 'Michigan'=2, 'Illinois'=2, 'Ohio'=2, 'Indiana'=2, 'Maine'=3, 'New Hampshire'=3, 'Massachusetts'=3,  'Vermont'=3, 'New York'=3, 'New Jersey'=3, 'Pennsylvania'=3, 'Rhode Island'=3, 'Connecticut'=3, 'Arizona'=4, 'New Mexico'=4, 'Texas'=4, 'Oklahoma'=4, 'Arkansas'=5, 'Louisiana'=5, 'Mississippi'=5, 'Alabama'=5, 'Georgia'=5,'South Carolina'=5, 'North Carolina'=5, 'Virginia'=5, 'Maryland'=5, 'West Virginia'=5, 'Florida'=5, 'Tennessee'=5, 'Kentucky'=5, 'Delaware'=5, 'District of Columbia'=5)
table(crimedata2$Region)

 1  2  3  4  5 
11 12  9  4 15 

Keeping Variables

I selected six pertinent variables, decreasing the total number of variables from 20 to 6.

crimedata3<-select(crimedata2, Region, State, Total_Pop, Total_VP_Crimes, Total_P_Crimes, Total_V_Crimes)
dim(crimedata2)
[1] 51 20
dim(crimedata3)
[1] 51  6

Generating Summary Variables

by_region<-group_by(crimedata3, Region)
crimedata4<-summarize(by_region, Region_Pop=sum(Total_Pop),
                      Region_VP_Crimes=sum(Total_VP_Crimes),
                      Region_V_Crimes=sum(Total_V_Crimes),
                      Region_P_Crimes=sum(Total_P_Crimes))
print(crimedata4)

Generating More Variables

I created three new variables, Region_VP_Rate, Region_V_Rate, and Region_P_Rate which indicate violent and property crime rates for each region.

Region_VP_Rate: Percent of Combined Violent and Property Crimes Across Regions
Region_V_Rate: Percent of Violent Crime Rate Across Regions
Region_P_Rate: Percent of Crime Rate Across Regions

crimedata5<-mutate(crimedata4, Region_VP_Rate=Region_VP_Crimes/Region_Pop*100,
Region_V_Rate=Region_V_Crimes/Region_Pop*100,
Region_P_Rate=Region_P_Crimes/Region_Pop*100)
print(crimedata5[,6:8])

Chaining All Operations Together Using Magrittr

library(magrittr)
crime_data<-crimedata%>%
  rename(State=Geo_Name,
         Total_Pop=SE_T001_001,
         Total_VP_Crimes= SE_T002_001,
         Total_V_Crimes=SE_T002_002,
         Total_P_Crimes=SE_T002_003)%>%
  select(State, Total_Pop, Total_VP_Crimes, Total_P_Crimes,
         Total_V_Crimes)%>%
  mutate(Region=
  recode(State,'Washington'=1,'Montana'=1,'Oregon'=1,'Idaho'=1,'California'=1,
         'Nevada'=1,'Utah'=1,'Colorado'=1,'Wyoming'=1, 'Alaska'=1, 'Hawaii'=1,
         'North Dakota'=2, 'South Dakota'=2,'Nebraska'=2,'Kansas'=2, 'Missouri'=2, 'Iowa'=2, 'Minnesota'=2, 'Wisconsin'=2, 'Michigan'=2, 'Illinois'=2, 'Ohio'=2, 'Indiana'=2, 'Maine'=3, 'New Hampshire'=3, 'Massachusetts'=3,  'Vermont'=3, 'New York'=3, 'New Jersey'=3, 'Pennsylvania'=3, 'Rhode Island'=3, 'Connecticut'=3, 'Arizona'=4, 'New Mexico'=4, 'Texas'=4, 'Oklahoma'=4, 'Arkansas'=5, 'Louisiana'=5, 'Mississippi'=5, 'Alabama'=5, 'Georgia'=5,'South Carolina'=5, 'North Carolina'=5, 'Virginia'=5, 'Maryland'=5, 'West Virginia'=5, 'Florida'=5, 'Tennessee'=5, 'Kentucky'=5, 'Delaware'=5, 'District of Columbia'=5))%>%
  group_by(Region)%>%
  summarize( 
            Region_Pop=sum(Total_Pop),
            Region_VP_Crimes=sum(Total_VP_Crimes),
            Region_V_Crimes=sum(Total_V_Crimes),
            Region_P_Crimes=sum(Total_P_Crimes)
            )%>%
    mutate(Region_VP_Rate=Region_VP_Crimes/Region_Pop*100, Region_V_Rate=Region_V_Crimes/Region_Pop*100, Region_P_Rate=Region_P_Crimes/Region_Pop*100)
print(crime_data)

Reshaping from Wide to Long Format

library(tidyr)
RegionData<-crime_data%>%
  rename(Combined=Region_VP_Rate, Property=Region_P_Rate, Violent=Region_V_Rate)%>%
  select(Region, Combined, Property, Violent)
RegionData_LongFormat <- gather(CrimeData, "CrimeType","CrimeRate",-Region)
head(RegionData_LongFormat) 

Visualizations

The data shows that the percentage of reported crimes per population is highest in the southwest region(3.47%), followed by southeast(3.25%), west(2.99%), midwest(2.62%), and northeast(2.13%). This order holds true for both violent and property crimes.

library(ggplot2)
ggplot(data=RegionData_LongFormat)+
geom_col(aes(x=Region, y=CrimeRate, fill=CrimeType), position="dodge")+labs(title="Percent of Violent and Property Crimes Across U.S. Regions (2014)",y="Percentage of Reported Crimes")

