Synopsis

This is an exploratory data analysis of the crime rates in the USA in the year 2013. The data source is the FBI Uniform Crime Reports web site.

library(plotly)
library(plyr)
library(reshape2)

1 - Read the data set

setwd("C:/Documents and Settings/Pierluigi/Documenti")
crimedf <- read.csv("./OpenData/CrimeData_2013.csv", 
                     as.is = c(1), nrows = 51, skip = 9)

names(crimedf) <- gsub("(.)rate(.)*", "", x = names(crimedf))
names(crimedf)[4:5] <- c("Murder", "Rape")
crimedf[, 1] <- tolower(crimedf[, 1])

2 - Analysis of violent crime rates (VCR)

The very high crime rates in District of Columbia (DofC) reflect a tendency which is typical of the large metropolitan areas in the USA.
In the following analysis we focus on the 50 States and omit DofC.

vcrdf <- crimedf[order(-crimedf$Robbery), c(1:5, 7:8)]
print(vcrdf)
##                   State Population Violent.Crime Murder Rape Robbery
## 9  district of columbia     649111        1296.4   15.9 45.8   628.9
## 29               nevada    2791494         605.0    5.8 39.0   185.7
## 21             maryland    5938737         475.4    6.5 19.9   169.2
## 5            california   38431393         402.6    4.5 19.4   139.6
## 33             new york   19695680         393.8    3.3 13.1   138.3
## 14             illinois   12890552         403.1    5.6 30.2   137.6
## 31           new jersey    8911502         288.9    4.5  9.7   135.6
## 8              delaware     925240         500.7    4.4 30.0   133.3
## 11              georgia    9994759         375.4    5.6 20.2   127.1
## 36                 ohio   11572005         291.4    4.1 31.1   125.2
## 44                texas   26505637         410.3    4.3 28.7   120.0
## 19            louisiana    4629284         521.2   10.7 27.0   119.7
## 10              florida   19600311         469.3    5.0 24.3   118.4
## 39         pennsylvania   12781296         335.1    4.8 21.3   115.3
## 43            tennessee    6497269         585.8    5.2 29.3   112.9
## 15              indiana    6570713         359.6    5.4 24.7   108.3
## 23             michigan    9898193         452.2    6.3 45.5   102.0
## 3               arizona    6634997         415.6    5.4 35.3   100.3
## 22        massachusetts    6708874         406.4    2.1 25.7    99.9
## 7           connecticut    3599341         262.2    2.5 18.0    98.7
## 1               alabama    4833996         431.0    7.2 30.0    96.1
## 34       north carolina    9848917         341.0    4.7 18.2    94.2
## 26             missouri    6044917         433.7    6.1 27.8    90.8
## 32           new mexico    2086895         622.5    5.9 54.4    87.8
## 2                alaska     737259         638.7    4.6 89.1    84.5
## 50            wisconsin    5742953         280.7    2.8 22.8    84.2
## 48           washington    6973742         290.0    2.4 29.7    83.6
## 41       south carolina    4771929         508.5    6.4 36.4    83.1
## 25          mississippi    2992206         277.5    7.3 24.3    80.6
## 37             oklahoma    3853118         446.1    5.1 44.5    78.5
## 4              arkansas    2958765         463.2    5.3 38.4    76.4
## 18             kentucky    4399583         210.9    3.9 21.6    73.8
## 24            minnesota    5422060         234.4    2.1 26.8    67.8
## 12               hawaii    1408987         254.4    2.3 26.0    66.3
## 40         rhode island    1053354         257.3    2.9 31.6    64.8
## 38               oregon    3928068         242.8    2.1 25.5    60.9
## 6              colorado    5272086         305.4    3.3 41.7    59.5
## 28             nebraska    1868969         264.8    3.0 33.2    55.4
## 47             virginia    8270345         197.8    3.9 17.8    55.3
## 30        new hampshire    1322616         223.2    1.6 39.5    48.9
## 17               kansas    2895801         342.8    4.0 34.6    46.2
## 45                 utah    2902787         228.9    1.8 31.9    43.0
## 49        west virginia    1853595         305.2    3.3 19.8    35.2
## 16                 iowa    3092341         273.0    1.3 28.2    30.3
## 20                maine    1328702         132.5    1.8 27.5    25.2
## 35         north dakota     723857         273.4    2.2 39.8    22.2
## 27              montana    1014864         288.1    2.3 37.6    21.4
## 42         south dakota     845510         323.2    2.1 53.7    18.5
## 13                idaho    1612843         215.2    1.8 32.0    13.6
## 51              wyoming     583223         207.8    2.9 24.7    12.7
## 46              vermont     626855         123.6    1.6 18.0    12.0
##    Aggravated.assault
## 9               590.8
## 29              360.4
## 21              271.5
## 5               231.7
## 33              234.2
## 14              218.5
## 31              135.5
## 8               319.9
## 11              213.7
## 36              124.2
## 44              246.6
## 19              352.8
## 10              311.5
## 39              185.3
## 43              429.3
## 15              211.8
## 23              275.3
## 3               262.1
## 22              270.1
## 7               136.3
## 1               285.2
## 34              216.5
## 26              298.7
## 32              453.8
## 2               424.1
## 50              161.7
## 48              166.7
## 41              372.2
## 25              155.4
## 37              302.4
## 4               331.1
## 18               95.8
## 24              127.4
## 12              149.8
## 40              146.9
## 38              142.5
## 6               186.4
## 28              161.1
## 47              110.2
## 30              113.9
## 17              247.8
## 45              131.7
## 49              228.9
## 16              204.8
## 20               68.3
## 35              199.9
## 27              218.5
## 42              242.7
## 13              158.0
## 51              157.2
## 46               88.4

3 - Histogram of VCR

for(k in 1:3) {
hist(x = vcrdf[2:51, k+2], xlab = names(vcrdf)[k+2],
col = k+1, border = "purple", main = "")
}

4 - barplot of VCR

b <- 15 # plot the b top crime rate States (omit DofC)
vcrm <- melt(data = vcrdf[2:(b+1), 1:6], id.vars = "State", measure.vars = 4:6, value.name = "rate")
vv <- ggplot(data = vcrm, aes(x = reorder(State, rate), y = rate))
vv <- vv + geom_bar(aes(fill = variable), position = "stack", stat = "identity")
vv + coord_flip() + theme_bw() + labs(x = "State", y = "Crime rate per 100,000 population - Year 2013")

5 - Relation between violent crime rates

vcrdf <- vcrdf[2:51, ]
qnt <- quantile(vcrdf$Population, probs = seq(0, 1, 0.25))
vcrdf$PopLevel <- cut(vcrdf$Population, breaks = qnt)
p1 <- ggplot(data = vcrdf, aes(x = Robbery, y = Rape))
p1 <- p1 + geom_point(aes(size = Murder, colour = PopLevel))
p1 + theme_bw()

6 - Map of violent crime rates

usmap <- map_data("state")
names(vcrdf)[1] <- "region"
## define 5 groups for each crime type
qnt1 <- quantile(vcrdf$Murder, probs = seq(0, 1, 0.2))
qnt2 <- quantile(vcrdf$Rape, probs = seq(0, 1, 0.2))
qnt3 <- quantile(vcrdf$Robbery, probs = seq(0, 1, 0.2))
vcrdf$MurderLevel <- cut(vcrdf$Murder, breaks = qnt1, include.lowest = TRUE)
vcrdf$RapeLevel <- cut(vcrdf$Rape, breaks = qnt2, include.lowest = TRUE)
vcrdf$RobberyLevel <- cut(vcrdf$Robbery, breaks = qnt3, include.lowest = TRUE)
mapdf <- join(x = usmap, y = vcrdf, by = "region", type = "inner")
vcrdf[order(vcrdf[, 6], decreasing = TRUE)[2:16], c(1:6)]
##          region Population Violent.Crime Murder Rape Robbery
## 21     maryland    5938737         475.4    6.5 19.9   169.2
## 5    california   38431393         402.6    4.5 19.4   139.6
## 33     new york   19695680         393.8    3.3 13.1   138.3
## 14     illinois   12890552         403.1    5.6 30.2   137.6
## 31   new jersey    8911502         288.9    4.5  9.7   135.6
## 8      delaware     925240         500.7    4.4 30.0   133.3
## 11      georgia    9994759         375.4    5.6 20.2   127.1
## 36         ohio   11572005         291.4    4.1 31.1   125.2
## 44        texas   26505637         410.3    4.3 28.7   120.0
## 19    louisiana    4629284         521.2   10.7 27.0   119.7
## 10      florida   19600311         469.3    5.0 24.3   118.4
## 39 pennsylvania   12781296         335.1    4.8 21.3   115.3
## 43    tennessee    6497269         585.8    5.2 29.3   112.9
## 15      indiana    6570713         359.6    5.4 24.7   108.3
## 23     michigan    9898193         452.2    6.3 45.5   102.0

plot the map of Robbery rate

mp <- ggplot(data = mapdf, aes(x = long, y = lat))
mp <- mp + geom_polygon(aes(group = group, fill = RobberyLevel), colour = "red")
mp <- mp + scale_fill_brewer(palette = "YlGnBu")
mp <- mp + coord_fixed(ratio = 1.25)
mp <- mp + theme_bw()
mp <- mp + labs(title = "Robbery rate per State - Year 2013", x = "", y = "", fill = "Robbery level")
mp

7 - Analysis of property crime rates (PCR)

pcrdf <- crimedf[order(-crimedf$Property.crime), 
                 c(1:2, 9:12)]
print(pcrdf)
##                   State Population Property.crime Burglary Larceny.theft
## 9  district of columbia     649111         4790.7    510.9        3781.6
## 32           new mexico    2086895         3783.5   1046.0        2450.8
## 48           washington    6973742         3715.9    838.7        2469.6
## 41       south carolina    4771929         3630.8    857.7        2509.0
## 4              arkansas    2958765         3598.7   1026.4        2380.4
## 19            louisiana    4629284         3578.9    890.3        2490.7
## 11              georgia    9994759         3399.9    835.0        2293.1
## 3               arizona    6634997         3365.4    727.8        2381.9
## 1               alabama    4833996         3347.9    877.3        2252.0
## 37             oklahoma    3853118         3280.9    869.7        2118.4
## 44                texas   26505637         3253.2    720.8        2284.2
## 12               hawaii    1408987         3212.7    552.0        2337.0
## 43            tennessee    6497269         3196.5    790.1        2223.1
## 38               oregon    3928068         3184.3    529.7        2402.3
## 26             missouri    6044917         3137.4    641.8        2225.2
## 8              delaware     925240         3134.4    680.8        2307.2
## 34       north carolina    9848917         3103.2    915.7        2041.3
## 10              florida   19600311         3097.8    708.7        2210.9
## 45                 utah    2902787         3043.1    472.9        2312.1
## 17               kansas    2895801         2959.6    603.5        2125.7
## 36                 ohio   11572005         2924.4    787.2        1968.5
## 2                alaska     737259         2877.0    395.7        2251.4
## 15              indiana    6570713         2853.1    650.7        1986.6
## 29               nevada    2791494         2836.4    825.6        1652.6
## 25          mississippi    2992206         2725.3    834.5        1744.3
## 21             maryland    5938737         2659.1    538.0        1895.2
## 6              colorado    5272086         2655.0    475.6        1941.8
## 5            california   38431393         2651.2    603.8        1617.3
## 28             nebraska    1868969         2632.3    478.6        1915.0
## 27              montana    1014864         2581.1    406.2        1989.6
## 40         rhode island    1053354         2443.7    533.6        1697.5
## 24            minnesota    5422060         2419.6    418.9        1853.8
## 18             kentucky    4399583         2374.0    598.5        1637.6
## 23             michigan    9898193         2325.0    568.3        1513.3
## 14             illinois   12890552         2296.6    458.4        1676.1
## 20                maine    1328702         2292.0    487.7        1735.8
## 46              vermont     626855         2214.5    534.7        1626.1
## 30        new hampshire    1322616         2213.6    376.5        1765.8
## 16                 iowa    3092341         2198.2    514.8        1545.9
## 51              wyoming     583223         2196.2    335.4        1761.8
## 50            wisconsin    5742953         2188.7    424.1        1635.8
## 49        west virginia    1853595         2154.1    534.5        1513.5
## 35         north dakota     723857         2121.4    411.5        1511.6
## 47             virginia    8270345         2074.4    323.0        1647.0
## 39         pennsylvania   12781296         2059.1    406.8        1544.5
## 22        massachusetts    6708874         2046.2    457.8        1452.4
## 7           connecticut    3599341         1980.2    359.3        1447.5
## 42         south dakota     845510         1924.8    400.1        1413.9
## 31           new jersey    8911502         1881.1    402.7        1324.2
## 13                idaho    1612843         1874.3    415.0        1364.0
## 33             new york   19695680         1820.7    286.6        1455.5
##    Motor.vehicle.theft
## 9                498.2
## 32               286.7
## 48               407.6
## 41               264.1
## 4                191.9
## 19               198.0
## 11               271.7
## 3                255.7
## 1                218.5
## 37               292.8
## 44               248.2
## 12               323.7
## 43               183.2
## 38               252.3
## 26               270.4
## 8                146.4
## 34               146.3
## 10               178.1
## 45               258.1
## 17               230.4
## 36               168.7
## 2                229.9
## 15               215.9
## 29               358.2
## 25               146.6
## 21               225.9
## 6                237.6
## 5                430.1
## 28               238.7
## 27               185.3
## 40               212.6
## 24               146.9
## 18               137.9
## 23               243.3
## 14               162.1
## 20                68.6
## 46                53.8
## 30                71.4
## 16               137.6
## 51                99.1
## 50               128.7
## 49               106.1
## 35               198.2
## 47               104.3
## 39               107.7
## 22               136.0
## 7                173.4
## 42               110.7
## 31               154.3
## 13                95.4
## 33                78.6

8 - Histogram of PCR

for(k in 1:3) {
  hist(x = pcrdf[2:51, k+2], xlab = names(pcrdf)[k+2],
       col = k+1, border = "purple", main = "")
}

9 - Barplot of PCR

b <- 15 # plot the b top crime rate States (omit DofC)
pcrm <- melt(data = pcrdf[2:(b+1), ], id.vars = "State", measure.vars = 4:6, value.name = "rate")
vv <- ggplot(data = pcrm, aes(x = reorder(State, rate), y = rate))
vv <- vv + geom_bar(aes(fill = variable), position = "stack", stat = "identity")
vv + coord_flip() + theme_bw() + 
  labs(x = "State", y = "Crime rate per 100,000 population - Year 2013")

10 - Map of property crime rates

usmap <- map_data("state")
names(pcrdf)[1] <- "region"
## define 5 groups for each crime type
qnt1 <- quantile(pcrdf$Burglary, probs = seq(0, 1, 0.2))
qnt2 <- quantile(pcrdf$Larceny.theft, probs = seq(0, 1, 0.2))
qnt3 <- quantile(pcrdf$Property.crime, probs = seq(0, 1, 0.2))
pcrdf$BurglaryLevel <- cut(pcrdf$Burglary, breaks = qnt1, include.lowest = TRUE)
pcrdf$TheftLevel <- cut(pcrdf$Larceny.theft, breaks = qnt2, include.lowest = TRUE)
pcrdf$Property.CrimeLevel <- cut(pcrdf$Property.crime, breaks = qnt3, include.lowest = TRUE)
mapdf <- join(x = usmap, y = pcrdf, by = "region", type = "inner")
pcrdf[order(pcrdf[, 3], decreasing = TRUE)[2:16], c(1:6)]
##            region Population Property.crime Burglary Larceny.theft
## 32     new mexico    2086895         3783.5   1046.0        2450.8
## 48     washington    6973742         3715.9    838.7        2469.6
## 41 south carolina    4771929         3630.8    857.7        2509.0
## 4        arkansas    2958765         3598.7   1026.4        2380.4
## 19      louisiana    4629284         3578.9    890.3        2490.7
## 11        georgia    9994759         3399.9    835.0        2293.1
## 3         arizona    6634997         3365.4    727.8        2381.9
## 1         alabama    4833996         3347.9    877.3        2252.0
## 37       oklahoma    3853118         3280.9    869.7        2118.4
## 44          texas   26505637         3253.2    720.8        2284.2
## 12         hawaii    1408987         3212.7    552.0        2337.0
## 43      tennessee    6497269         3196.5    790.1        2223.1
## 38         oregon    3928068         3184.3    529.7        2402.3
## 26       missouri    6044917         3137.4    641.8        2225.2
## 8        delaware     925240         3134.4    680.8        2307.2
##    Motor.vehicle.theft
## 32               286.7
## 48               407.6
## 41               264.1
## 4                191.9
## 19               198.0
## 11               271.7
## 3                255.7
## 1                218.5
## 37               292.8
## 44               248.2
## 12               323.7
## 43               183.2
## 38               252.3
## 26               270.4
## 8                146.4

plot the map of total Property Crime rate

mp <- ggplot(data = mapdf, aes(x = long, y = lat))
mp <- mp + geom_polygon(aes(group = group, fill = Property.CrimeLevel), colour = "red")
mp <- mp + scale_fill_brewer(palette = "YlGnBu")
mp <- mp + coord_fixed(ratio = 1.25)
mp <- mp + theme_bw()
mp <- mp + labs(title = "Property Crime rate per State - Year 2013", x = "", y = "", fill = "Property Crime level")
mp

plot the map of Burglary rate

mp <- ggplot(data = mapdf, aes(x = long, y = lat))
mp <- mp + geom_polygon(aes(group = group, fill = BurglaryLevel), colour = "red")
mp <- mp + scale_fill_brewer(palette = "YlGnBu")
mp <- mp + coord_fixed(ratio = 1.25)
mp <- mp + theme_bw()
mp <- mp + labs(title = "Burglary rate per State - Year 2013", x = "", y = "", fill = "Burglary level")
mp

11 - Relation between Property and Violent Crime rates

p2 <- ggplot(data = crimedf[-9, ], aes(x = Property.crime, y = Violent.Crime))
p2 <- p2 + geom_point(aes(size = Population))
p2