Violent and Property Crime Rates Across U.S. Regions (2014)
This week’s homework explores reported crime rates for all 50 U.S. states (plus District of Columbia) using the 2014 UCR Crime Dataset from Social Explorer. Data was used to see how states, categorized by region, compare in terms of both violent and property crime rates. Violent crimes include reported murders, rapes, robberies, and aggravated assaults. Property crimes include reported burglaries, larcenies, and motor vehicle thefts. Regions were divided geographically into five groups: (1) West, (2) Midwest, (3) Northeast, (4) Southwest, and (5) Southeast.
Importing the Data
library(readr)
crimedata<-read_csv("/Users/Jennifer/Downloads/R12018303_SL040.csv")
Data Preview
head(crimedata)
Renaming Variables
library(dplyr)
crimedata1<-rename(crimedata, State=Geo_Name,
Total_Pop=SE_T001_001,
Total_VP_Crimes=SE_T002_001,
Total_V_Crimes=SE_T002_002,
Total_P_Crimes=SE_T002_003)
names(crimedata1)
[1] "Geo_FIPS" "State" "Geo_QName" "Total_Pop" "Total_VP_Crimes"
[6] "Total_V_Crimes" "Total_P_Crimes" "SE_T003_001" "SE_T003_002" "SE_T003_003"
[11] "SE_T004_001" "SE_T004_002" "SE_T004_003" "SE_T004_004" "SE_T004_005"
[16] "SE_T006_001" "SE_T006_002" "SE_T006_003" "SE_T006_004"
Recoding Existing Variables
State was recoded into five categories under new variable Region:
1.West: Washington, Utah, Oregon, Nevada, Montana, Idaho, Colorado, California
2.Midwest: Wisconsin, South Dakota, Ohio, North Dakota, Nebraska, Missouri, Minnesota, Michigan, Kansas, Iowa, Indiana, Illinois
3.Northeast: Vermont, Rhode Island, Pennsylvania, New York, New Jersey, New Hampshire, Massachussets, Maine, Connecticut
4.Southwest: Texas, Oklahoma, New Mexico, Arizona
5.Southeast: West Virginia, Virginia, Tennessee, South Carolina, Mississippi, Maryland, North Carolina, Lousiana, Kentucky, Georgia, Florida, Delaware, D.C., Arkansas, Alabama
crimedata2<-mutate(crimedata1, Region=State)
crimedata2$Region<-recode(crimedata2$Region, 'Washington'=1,'Montana'=1,'Oregon'=1,'Idaho'=1,'California'=1,
'Nevada'=1,'Utah'=1,'Colorado'=1,'Wyoming'=1, 'Alaska'=1, 'Hawaii'=1,
'North Dakota'=2, 'South Dakota'=2,'Nebraska'=2,'Kansas'=2, 'Missouri'=2, 'Iowa'=2, 'Minnesota'=2, 'Wisconsin'=2, 'Michigan'=2, 'Illinois'=2, 'Ohio'=2, 'Indiana'=2, 'Maine'=3, 'New Hampshire'=3, 'Massachusetts'=3, 'Vermont'=3, 'New York'=3, 'New Jersey'=3, 'Pennsylvania'=3, 'Rhode Island'=3, 'Connecticut'=3, 'Arizona'=4, 'New Mexico'=4, 'Texas'=4, 'Oklahoma'=4, 'Arkansas'=5, 'Louisiana'=5, 'Mississippi'=5, 'Alabama'=5, 'Georgia'=5,'South Carolina'=5, 'North Carolina'=5, 'Virginia'=5, 'Maryland'=5, 'West Virginia'=5, 'Florida'=5, 'Tennessee'=5, 'Kentucky'=5, 'Delaware'=5, 'District of Columbia'=5)
table(crimedata2$Region)
1 2 3 4 5
11 12 9 4 15
Keeping Variables
I selected six pertinent variables, decreasing the total number of variables from 20 to 6.
crimedata3<-select(crimedata2, Region, State, Total_Pop, Total_VP_Crimes, Total_P_Crimes, Total_V_Crimes)
dim(crimedata2)
[1] 51 20
dim(crimedata3)
[1] 51 6
Generating Summary Variables
by_region<-group_by(crimedata3, Region)
crimedata4<-summarize(by_region, Region_Pop=sum(Total_Pop),
Region_VP_Crimes=sum(Total_VP_Crimes),
Region_V_Crimes=sum(Total_V_Crimes),
Region_P_Crimes=sum(Total_P_Crimes))
print(crimedata4)
Generating More Variables
I created three new variables, Region_VP_Rate, Region_V_Rate, and Region_P_Rate which indicate violent and property crime rates for each region.
Region_VP_Rate: Percent of Combined Violent and Property Crimes Across Regions
Region_V_Rate: Percent of Violent Crime Rate Across Regions
Region_P_Rate: Percent of Crime Rate Across Regions
crimedata5<-mutate(crimedata4, Region_VP_Rate=Region_VP_Crimes/Region_Pop*100,
Region_V_Rate=Region_V_Crimes/Region_Pop*100,
Region_P_Rate=Region_P_Crimes/Region_Pop*100)
print(crimedata5[,6:8])
Chaining All Operations Together Using Magrittr
library(magrittr)
crime_data<-crimedata%>%
rename(State=Geo_Name,
Total_Pop=SE_T001_001,
Total_VP_Crimes= SE_T002_001,
Total_V_Crimes=SE_T002_002,
Total_P_Crimes=SE_T002_003)%>%
select(State, Total_Pop, Total_VP_Crimes, Total_P_Crimes,
Total_V_Crimes)%>%
mutate(Region=
recode(State,'Washington'=1,'Montana'=1,'Oregon'=1,'Idaho'=1,'California'=1,
'Nevada'=1,'Utah'=1,'Colorado'=1,'Wyoming'=1, 'Alaska'=1, 'Hawaii'=1,
'North Dakota'=2, 'South Dakota'=2,'Nebraska'=2,'Kansas'=2, 'Missouri'=2, 'Iowa'=2, 'Minnesota'=2, 'Wisconsin'=2, 'Michigan'=2, 'Illinois'=2, 'Ohio'=2, 'Indiana'=2, 'Maine'=3, 'New Hampshire'=3, 'Massachusetts'=3, 'Vermont'=3, 'New York'=3, 'New Jersey'=3, 'Pennsylvania'=3, 'Rhode Island'=3, 'Connecticut'=3, 'Arizona'=4, 'New Mexico'=4, 'Texas'=4, 'Oklahoma'=4, 'Arkansas'=5, 'Louisiana'=5, 'Mississippi'=5, 'Alabama'=5, 'Georgia'=5,'South Carolina'=5, 'North Carolina'=5, 'Virginia'=5, 'Maryland'=5, 'West Virginia'=5, 'Florida'=5, 'Tennessee'=5, 'Kentucky'=5, 'Delaware'=5, 'District of Columbia'=5))%>%
group_by(Region)%>%
summarize(
Region_Pop=sum(Total_Pop),
Region_VP_Crimes=sum(Total_VP_Crimes),
Region_V_Crimes=sum(Total_V_Crimes),
Region_P_Crimes=sum(Total_P_Crimes)
)%>%
mutate(Region_VP_Rate=Region_VP_Crimes/Region_Pop*100, Region_V_Rate=Region_V_Crimes/Region_Pop*100, Region_P_Rate=Region_P_Crimes/Region_Pop*100)
print(crime_data)
Visualizations
The data shows that the percentage of reported crimes per population is highest in the southwest region(3.47%), followed by southeast(3.25%), west(2.99%), midwest(2.62%), and northeast(2.13%). This order holds true for both violent and property crimes.
library(ggplot2)
ggplot(data=RegionData_LongFormat)+
geom_col(aes(x=Region, y=CrimeRate, fill=CrimeType), position="dodge")+labs(title="Percent of Violent and Property Crimes Across U.S. Regions (2014)",y="Percentage of Reported Crimes")

