This is an exploratory data analysis of the crime rates in the USA in the year 2013. The data source is the FBI Uniform Crime Reports web site.
library(plotly)
library(plyr)
library(reshape2)
setwd("C:/Documents and Settings/Pierluigi/Documenti")
crimedf <- read.csv("./OpenData/CrimeData_2013.csv",
as.is = c(1), nrows = 51, skip = 9)
names(crimedf) <- gsub("(.)rate(.)*", "", x = names(crimedf))
names(crimedf)[4:5] <- c("Murder", "Rape")
crimedf[, 1] <- tolower(crimedf[, 1])
The very high crime rates in District of Columbia (DofC) reflect a tendency which is typical of the large metropolitan areas in the USA.
In the following analysis we focus on the 50 States and omit DofC.
vcrdf <- crimedf[order(-crimedf$Robbery), c(1:5, 7:8)]
print(vcrdf)
## State Population Violent.Crime Murder Rape Robbery
## 9 district of columbia 649111 1296.4 15.9 45.8 628.9
## 29 nevada 2791494 605.0 5.8 39.0 185.7
## 21 maryland 5938737 475.4 6.5 19.9 169.2
## 5 california 38431393 402.6 4.5 19.4 139.6
## 33 new york 19695680 393.8 3.3 13.1 138.3
## 14 illinois 12890552 403.1 5.6 30.2 137.6
## 31 new jersey 8911502 288.9 4.5 9.7 135.6
## 8 delaware 925240 500.7 4.4 30.0 133.3
## 11 georgia 9994759 375.4 5.6 20.2 127.1
## 36 ohio 11572005 291.4 4.1 31.1 125.2
## 44 texas 26505637 410.3 4.3 28.7 120.0
## 19 louisiana 4629284 521.2 10.7 27.0 119.7
## 10 florida 19600311 469.3 5.0 24.3 118.4
## 39 pennsylvania 12781296 335.1 4.8 21.3 115.3
## 43 tennessee 6497269 585.8 5.2 29.3 112.9
## 15 indiana 6570713 359.6 5.4 24.7 108.3
## 23 michigan 9898193 452.2 6.3 45.5 102.0
## 3 arizona 6634997 415.6 5.4 35.3 100.3
## 22 massachusetts 6708874 406.4 2.1 25.7 99.9
## 7 connecticut 3599341 262.2 2.5 18.0 98.7
## 1 alabama 4833996 431.0 7.2 30.0 96.1
## 34 north carolina 9848917 341.0 4.7 18.2 94.2
## 26 missouri 6044917 433.7 6.1 27.8 90.8
## 32 new mexico 2086895 622.5 5.9 54.4 87.8
## 2 alaska 737259 638.7 4.6 89.1 84.5
## 50 wisconsin 5742953 280.7 2.8 22.8 84.2
## 48 washington 6973742 290.0 2.4 29.7 83.6
## 41 south carolina 4771929 508.5 6.4 36.4 83.1
## 25 mississippi 2992206 277.5 7.3 24.3 80.6
## 37 oklahoma 3853118 446.1 5.1 44.5 78.5
## 4 arkansas 2958765 463.2 5.3 38.4 76.4
## 18 kentucky 4399583 210.9 3.9 21.6 73.8
## 24 minnesota 5422060 234.4 2.1 26.8 67.8
## 12 hawaii 1408987 254.4 2.3 26.0 66.3
## 40 rhode island 1053354 257.3 2.9 31.6 64.8
## 38 oregon 3928068 242.8 2.1 25.5 60.9
## 6 colorado 5272086 305.4 3.3 41.7 59.5
## 28 nebraska 1868969 264.8 3.0 33.2 55.4
## 47 virginia 8270345 197.8 3.9 17.8 55.3
## 30 new hampshire 1322616 223.2 1.6 39.5 48.9
## 17 kansas 2895801 342.8 4.0 34.6 46.2
## 45 utah 2902787 228.9 1.8 31.9 43.0
## 49 west virginia 1853595 305.2 3.3 19.8 35.2
## 16 iowa 3092341 273.0 1.3 28.2 30.3
## 20 maine 1328702 132.5 1.8 27.5 25.2
## 35 north dakota 723857 273.4 2.2 39.8 22.2
## 27 montana 1014864 288.1 2.3 37.6 21.4
## 42 south dakota 845510 323.2 2.1 53.7 18.5
## 13 idaho 1612843 215.2 1.8 32.0 13.6
## 51 wyoming 583223 207.8 2.9 24.7 12.7
## 46 vermont 626855 123.6 1.6 18.0 12.0
## Aggravated.assault
## 9 590.8
## 29 360.4
## 21 271.5
## 5 231.7
## 33 234.2
## 14 218.5
## 31 135.5
## 8 319.9
## 11 213.7
## 36 124.2
## 44 246.6
## 19 352.8
## 10 311.5
## 39 185.3
## 43 429.3
## 15 211.8
## 23 275.3
## 3 262.1
## 22 270.1
## 7 136.3
## 1 285.2
## 34 216.5
## 26 298.7
## 32 453.8
## 2 424.1
## 50 161.7
## 48 166.7
## 41 372.2
## 25 155.4
## 37 302.4
## 4 331.1
## 18 95.8
## 24 127.4
## 12 149.8
## 40 146.9
## 38 142.5
## 6 186.4
## 28 161.1
## 47 110.2
## 30 113.9
## 17 247.8
## 45 131.7
## 49 228.9
## 16 204.8
## 20 68.3
## 35 199.9
## 27 218.5
## 42 242.7
## 13 158.0
## 51 157.2
## 46 88.4
for(k in 1:3) {
hist(x = vcrdf[2:51, k+2], xlab = names(vcrdf)[k+2],
col = k+1, border = "purple", main = "")
}
b <- 15 # plot the b top crime rate States (omit DofC)
vcrm <- melt(data = vcrdf[2:(b+1), 1:6], id.vars = "State", measure.vars = 4:6, value.name = "rate")
vv <- ggplot(data = vcrm, aes(x = reorder(State, rate), y = rate))
vv <- vv + geom_bar(aes(fill = variable), position = "stack", stat = "identity")
vv + coord_flip() + theme_bw() + labs(x = "State", y = "Crime rate per 100,000 population - Year 2013")
vcrdf <- vcrdf[2:51, ]
qnt <- quantile(vcrdf$Population, probs = seq(0, 1, 0.25))
vcrdf$PopLevel <- cut(vcrdf$Population, breaks = qnt)
p1 <- ggplot(data = vcrdf, aes(x = Robbery, y = Rape))
p1 <- p1 + geom_point(aes(size = Murder, colour = PopLevel))
p1 + theme_bw()
usmap <- map_data("state")
names(vcrdf)[1] <- "region"
## define 5 groups for each crime type
qnt1 <- quantile(vcrdf$Murder, probs = seq(0, 1, 0.2))
qnt2 <- quantile(vcrdf$Rape, probs = seq(0, 1, 0.2))
qnt3 <- quantile(vcrdf$Robbery, probs = seq(0, 1, 0.2))
vcrdf$MurderLevel <- cut(vcrdf$Murder, breaks = qnt1, include.lowest = TRUE)
vcrdf$RapeLevel <- cut(vcrdf$Rape, breaks = qnt2, include.lowest = TRUE)
vcrdf$RobberyLevel <- cut(vcrdf$Robbery, breaks = qnt3, include.lowest = TRUE)
mapdf <- join(x = usmap, y = vcrdf, by = "region", type = "inner")
vcrdf[order(vcrdf[, 6], decreasing = TRUE)[2:16], c(1:6)]
## region Population Violent.Crime Murder Rape Robbery
## 21 maryland 5938737 475.4 6.5 19.9 169.2
## 5 california 38431393 402.6 4.5 19.4 139.6
## 33 new york 19695680 393.8 3.3 13.1 138.3
## 14 illinois 12890552 403.1 5.6 30.2 137.6
## 31 new jersey 8911502 288.9 4.5 9.7 135.6
## 8 delaware 925240 500.7 4.4 30.0 133.3
## 11 georgia 9994759 375.4 5.6 20.2 127.1
## 36 ohio 11572005 291.4 4.1 31.1 125.2
## 44 texas 26505637 410.3 4.3 28.7 120.0
## 19 louisiana 4629284 521.2 10.7 27.0 119.7
## 10 florida 19600311 469.3 5.0 24.3 118.4
## 39 pennsylvania 12781296 335.1 4.8 21.3 115.3
## 43 tennessee 6497269 585.8 5.2 29.3 112.9
## 15 indiana 6570713 359.6 5.4 24.7 108.3
## 23 michigan 9898193 452.2 6.3 45.5 102.0
mp <- ggplot(data = mapdf, aes(x = long, y = lat))
mp <- mp + geom_polygon(aes(group = group, fill = RobberyLevel), colour = "red")
mp <- mp + scale_fill_brewer(palette = "YlGnBu")
mp <- mp + coord_fixed(ratio = 1.25)
mp <- mp + theme_bw()
mp <- mp + labs(title = "Robbery rate per State - Year 2013", x = "", y = "", fill = "Robbery level")
mp
pcrdf <- crimedf[order(-crimedf$Property.crime),
c(1:2, 9:12)]
print(pcrdf)
## State Population Property.crime Burglary Larceny.theft
## 9 district of columbia 649111 4790.7 510.9 3781.6
## 32 new mexico 2086895 3783.5 1046.0 2450.8
## 48 washington 6973742 3715.9 838.7 2469.6
## 41 south carolina 4771929 3630.8 857.7 2509.0
## 4 arkansas 2958765 3598.7 1026.4 2380.4
## 19 louisiana 4629284 3578.9 890.3 2490.7
## 11 georgia 9994759 3399.9 835.0 2293.1
## 3 arizona 6634997 3365.4 727.8 2381.9
## 1 alabama 4833996 3347.9 877.3 2252.0
## 37 oklahoma 3853118 3280.9 869.7 2118.4
## 44 texas 26505637 3253.2 720.8 2284.2
## 12 hawaii 1408987 3212.7 552.0 2337.0
## 43 tennessee 6497269 3196.5 790.1 2223.1
## 38 oregon 3928068 3184.3 529.7 2402.3
## 26 missouri 6044917 3137.4 641.8 2225.2
## 8 delaware 925240 3134.4 680.8 2307.2
## 34 north carolina 9848917 3103.2 915.7 2041.3
## 10 florida 19600311 3097.8 708.7 2210.9
## 45 utah 2902787 3043.1 472.9 2312.1
## 17 kansas 2895801 2959.6 603.5 2125.7
## 36 ohio 11572005 2924.4 787.2 1968.5
## 2 alaska 737259 2877.0 395.7 2251.4
## 15 indiana 6570713 2853.1 650.7 1986.6
## 29 nevada 2791494 2836.4 825.6 1652.6
## 25 mississippi 2992206 2725.3 834.5 1744.3
## 21 maryland 5938737 2659.1 538.0 1895.2
## 6 colorado 5272086 2655.0 475.6 1941.8
## 5 california 38431393 2651.2 603.8 1617.3
## 28 nebraska 1868969 2632.3 478.6 1915.0
## 27 montana 1014864 2581.1 406.2 1989.6
## 40 rhode island 1053354 2443.7 533.6 1697.5
## 24 minnesota 5422060 2419.6 418.9 1853.8
## 18 kentucky 4399583 2374.0 598.5 1637.6
## 23 michigan 9898193 2325.0 568.3 1513.3
## 14 illinois 12890552 2296.6 458.4 1676.1
## 20 maine 1328702 2292.0 487.7 1735.8
## 46 vermont 626855 2214.5 534.7 1626.1
## 30 new hampshire 1322616 2213.6 376.5 1765.8
## 16 iowa 3092341 2198.2 514.8 1545.9
## 51 wyoming 583223 2196.2 335.4 1761.8
## 50 wisconsin 5742953 2188.7 424.1 1635.8
## 49 west virginia 1853595 2154.1 534.5 1513.5
## 35 north dakota 723857 2121.4 411.5 1511.6
## 47 virginia 8270345 2074.4 323.0 1647.0
## 39 pennsylvania 12781296 2059.1 406.8 1544.5
## 22 massachusetts 6708874 2046.2 457.8 1452.4
## 7 connecticut 3599341 1980.2 359.3 1447.5
## 42 south dakota 845510 1924.8 400.1 1413.9
## 31 new jersey 8911502 1881.1 402.7 1324.2
## 13 idaho 1612843 1874.3 415.0 1364.0
## 33 new york 19695680 1820.7 286.6 1455.5
## Motor.vehicle.theft
## 9 498.2
## 32 286.7
## 48 407.6
## 41 264.1
## 4 191.9
## 19 198.0
## 11 271.7
## 3 255.7
## 1 218.5
## 37 292.8
## 44 248.2
## 12 323.7
## 43 183.2
## 38 252.3
## 26 270.4
## 8 146.4
## 34 146.3
## 10 178.1
## 45 258.1
## 17 230.4
## 36 168.7
## 2 229.9
## 15 215.9
## 29 358.2
## 25 146.6
## 21 225.9
## 6 237.6
## 5 430.1
## 28 238.7
## 27 185.3
## 40 212.6
## 24 146.9
## 18 137.9
## 23 243.3
## 14 162.1
## 20 68.6
## 46 53.8
## 30 71.4
## 16 137.6
## 51 99.1
## 50 128.7
## 49 106.1
## 35 198.2
## 47 104.3
## 39 107.7
## 22 136.0
## 7 173.4
## 42 110.7
## 31 154.3
## 13 95.4
## 33 78.6
for(k in 1:3) {
hist(x = pcrdf[2:51, k+2], xlab = names(pcrdf)[k+2],
col = k+1, border = "purple", main = "")
}
b <- 15 # plot the b top crime rate States (omit DofC)
pcrm <- melt(data = pcrdf[2:(b+1), ], id.vars = "State", measure.vars = 4:6, value.name = "rate")
vv <- ggplot(data = pcrm, aes(x = reorder(State, rate), y = rate))
vv <- vv + geom_bar(aes(fill = variable), position = "stack", stat = "identity")
vv + coord_flip() + theme_bw() +
labs(x = "State", y = "Crime rate per 100,000 population - Year 2013")
usmap <- map_data("state")
names(pcrdf)[1] <- "region"
## define 5 groups for each crime type
qnt1 <- quantile(pcrdf$Burglary, probs = seq(0, 1, 0.2))
qnt2 <- quantile(pcrdf$Larceny.theft, probs = seq(0, 1, 0.2))
qnt3 <- quantile(pcrdf$Property.crime, probs = seq(0, 1, 0.2))
pcrdf$BurglaryLevel <- cut(pcrdf$Burglary, breaks = qnt1, include.lowest = TRUE)
pcrdf$TheftLevel <- cut(pcrdf$Larceny.theft, breaks = qnt2, include.lowest = TRUE)
pcrdf$Property.CrimeLevel <- cut(pcrdf$Property.crime, breaks = qnt3, include.lowest = TRUE)
mapdf <- join(x = usmap, y = pcrdf, by = "region", type = "inner")
pcrdf[order(pcrdf[, 3], decreasing = TRUE)[2:16], c(1:6)]
## region Population Property.crime Burglary Larceny.theft
## 32 new mexico 2086895 3783.5 1046.0 2450.8
## 48 washington 6973742 3715.9 838.7 2469.6
## 41 south carolina 4771929 3630.8 857.7 2509.0
## 4 arkansas 2958765 3598.7 1026.4 2380.4
## 19 louisiana 4629284 3578.9 890.3 2490.7
## 11 georgia 9994759 3399.9 835.0 2293.1
## 3 arizona 6634997 3365.4 727.8 2381.9
## 1 alabama 4833996 3347.9 877.3 2252.0
## 37 oklahoma 3853118 3280.9 869.7 2118.4
## 44 texas 26505637 3253.2 720.8 2284.2
## 12 hawaii 1408987 3212.7 552.0 2337.0
## 43 tennessee 6497269 3196.5 790.1 2223.1
## 38 oregon 3928068 3184.3 529.7 2402.3
## 26 missouri 6044917 3137.4 641.8 2225.2
## 8 delaware 925240 3134.4 680.8 2307.2
## Motor.vehicle.theft
## 32 286.7
## 48 407.6
## 41 264.1
## 4 191.9
## 19 198.0
## 11 271.7
## 3 255.7
## 1 218.5
## 37 292.8
## 44 248.2
## 12 323.7
## 43 183.2
## 38 252.3
## 26 270.4
## 8 146.4
mp <- ggplot(data = mapdf, aes(x = long, y = lat))
mp <- mp + geom_polygon(aes(group = group, fill = Property.CrimeLevel), colour = "red")
mp <- mp + scale_fill_brewer(palette = "YlGnBu")
mp <- mp + coord_fixed(ratio = 1.25)
mp <- mp + theme_bw()
mp <- mp + labs(title = "Property Crime rate per State - Year 2013", x = "", y = "", fill = "Property Crime level")
mp
mp <- ggplot(data = mapdf, aes(x = long, y = lat))
mp <- mp + geom_polygon(aes(group = group, fill = BurglaryLevel), colour = "red")
mp <- mp + scale_fill_brewer(palette = "YlGnBu")
mp <- mp + coord_fixed(ratio = 1.25)
mp <- mp + theme_bw()
mp <- mp + labs(title = "Burglary rate per State - Year 2013", x = "", y = "", fill = "Burglary level")
mp
p2 <- ggplot(data = crimedf[-9, ], aes(x = Property.crime, y = Violent.Crime))
p2 <- p2 + geom_point(aes(size = Population))
p2