Study the crime pattern in the United States over the period of 1994 - 2013
Find the following:
This data is aquired from the following site https://ucr.fbi.gov/crime-in-the-u.s/2013/crime-in-the-u.s.-2013/tables/1tabledatadecoverviewpdf/table_1_crime_in_the_united_states_by_volume_and_rate_per_100000_inhabitants_1994-2013.xls
Loading the required libraries
# Loading the required libraries
library(readxl)
library(dplyr)
library(tidyr)
library(ggplot2)
library(plotly)
fbiData <- read_excel("crime_in_the_united_states__1994-2013.xls")
head(fbiData)
## # A tibble: 6 x 24
## `Table 1` X__1
## <chr> <chr>
## 1 Crime in the United States <NA>
## 2 by Volume and Rate per 100,000 Inhabitants, 1994<U+0096>2013 <NA>
## 3 Year Population
## 4 1994 260327021
## 5 1995 262803276
## 6 1996 265228572
## # ... with 22 more variables: X__2 <chr>, X__3 <chr>, X__4 <chr>,
## # X__5 <chr>, X__6 <chr>, X__7 <chr>, X__8 <chr>, X__9 <chr>,
## # X__10 <chr>, X__11 <chr>, X__12 <chr>, X__13 <chr>, X__14 <chr>,
## # X__15 <chr>, X__16 <chr>, X__17 <chr>, X__18 <chr>, X__19 <chr>,
## # X__20 <lgl>, X__21 <lgl>, X__22 <lgl>, X__23 <lgl>
class(fbiData)
## [1] "tbl_df" "tbl" "data.frame"
# Removing unwanted rows
fbiData <- fbiData[3:23, 1:20]
#View(fbiData)
colNames <- fbiData[1,]
colNames
## # A tibble: 1 x 20
## `Table 1` X__1 X__2 X__3
## <chr> <chr> <chr> <chr>
## 1 Year Population ViolentCrime ViolentCrimeRate
## # ... with 16 more variables: X__4 <chr>, X__5 <chr>, X__6 <chr>,
## # X__7 <chr>, X__8 <chr>, X__9 <chr>, X__10 <chr>, X__11 <chr>,
## # X__12 <chr>, X__13 <chr>, X__14 <chr>, X__15 <chr>, X__16 <chr>,
## # X__17 <chr>, X__18 <chr>, X__19 <chr>
colnames(fbiData) <- colNames
crimeData <- fbiData[-1,]
#View(crimeData)
crimeData[8,1] <- "2001"
crimeData[19,1] <- "2012"
crimeData[] %>% sapply(as.numeric) -> crimeData[]
tail(crimeData)
## # A tibble: 6 x 20
## Year Population ViolentCrime ViolentCrimeRate
## <dbl> <dbl> <dbl> <dbl>
## 1 2008 304059724 1394461 458.6
## 2 2009 307006550 1325896 431.9
## 3 2010 309330219 1251248 404.5
## 4 2011 311587816 1206005 387.1
## 5 2012 313873685 1217057 387.8
## 6 2013 316128839 1163146 367.9
## # ... with 16 more variables: MurderAndNonnegligentManslaughter <dbl>,
## # MurderAndNonnegligentManslaughterRate <dbl>, Rape <dbl>,
## # RapeRate <dbl>, Robbery <dbl>, RobberyRate <dbl>,
## # AggravatedAssault <dbl>, AggravatedAssaultRate <dbl>,
## # PropertyCrime <dbl>, PropertyCrimeRate <dbl>, Burglary <dbl>,
## # BurglaryRate <dbl>, LarcenyTheft <dbl>, LarcenyTheftRate <dbl>,
## # MotorVehicleTheft <dbl>, MotorVehicleTheftRate <dbl>
populationGrowth <-
ggplot(crimeData, aes(Year, Population, fill = Population)) +
geom_bar(stat = "identity") +
ggtitle("Population growth between 1994 - 2013") +
xlab("Years") + ylab("Population") +
theme(
plot.title = element_text(color="blue", size=14, face="bold.italic"),
axis.text.x = element_text(angle=60, hjust=1),
#axis.text.y = element_blank(),
#axis.ticks.y = element_blank(),
axis.title.x = element_text(color="blue", size=14, face="bold"),
axis.title.y = element_text(color="blue", size=14, face="bold")
)
ggplotly(populationGrowth)
From the above graph we notice that the population is growing over the years.
str(crimeData)
## Classes 'tbl_df', 'tbl' and 'data.frame': 20 obs. of 20 variables:
## $ Year : num 1994 1995 1996 1997 1998 ...
## $ Population : num 2.60e+08 2.63e+08 2.65e+08 2.68e+08 2.70e+08 ...
## $ ViolentCrime : num 1857670 1798792 1688540 1636096 1533887 ...
## $ ViolentCrimeRate : num 714 684 637 611 568 ...
## $ MurderAndNonnegligentManslaughter : num 23326 21606 19645 18208 16974 ...
## $ MurderAndNonnegligentManslaughterRate: num 9 8.2 7.4 6.8 6.3 5.7 5.5 5.6 5.6 5.7 ...
## $ Rape : num 102216 97470 96252 96153 93144 ...
## $ RapeRate : num 39.3 37.1 36.3 35.9 34.5 32.8 32 31.8 33.1 32.3 ...
## $ Robbery : num 618949 580509 535594 498534 447186 ...
## $ RobberyRate : num 238 221 202 186 166 ...
## $ AggravatedAssault : num 1113179 1099207 1037049 1023201 976583 ...
## $ AggravatedAssaultRate : num 428 418 391 382 361 ...
## $ PropertyCrime : num 12131873 12063935 11805323 11558475 10951827 ...
## $ PropertyCrimeRate : num 4660 4590 4451 4316 4052 ...
## $ Burglary : num 2712774 2593784 2506400 2460526 2332735 ...
## $ BurglaryRate : num 1042 987 945 919 863 ...
## $ LarcenyTheft : num 7879812 7997710 7904685 7743760 7376311 ...
## $ LarcenyTheftRate : num 3027 3043 2980 2892 2730 ...
## $ MotorVehicleTheft : num 1539287 1472441 1394238 1354189 1242781 ...
## $ MotorVehicleTheftRate : num 591 560 526 506 460 ...
crimeData %>% mutate(overallRate = (
ViolentCrimeRate + MurderAndNonnegligentManslaughterRate +
RapeRate + RobberyRate + AggravatedAssaultRate +
PropertyCrimeRate + BurglaryRate + LarcenyTheftRate +
MotorVehicleTheftRate) / 9
) -> crimeData
overallRateGraph <-
ggplot(crimeData, aes(Year, overallRate)) +
geom_line(color="green") +
ggtitle("Overall crime rate per 100,000 people in US") +
xlab("Years") + ylab("CrimeRate per 100,000 people") +
theme(
plot.title = element_text(color="blue", size=14, face="bold.italic"),
axis.text.x = element_text(angle=60, hjust=1),
axis.title.x = element_text(color="blue", size=10, face="bold"),
axis.title.y = element_text(color="blue", size=10, face="bold")
) + geom_point(color=crimeData$overallRate)
ggplotly(overallRateGraph)
Notice that the overall crime rate per 100,000 people has been decreasing over the years
model1 <- lm(overallRate~Year, data = crimeData)
summary(model1)
##
## Call:
## lm(formula = overallRate ~ Year, data = crimeData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -76.891 -15.712 2.996 22.145 55.047
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 50458.21 2824.79 17.86 6.71e-13 ***
## Year -24.73 1.41 -17.54 9.15e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 36.36 on 18 degrees of freedom
## Multiple R-squared: 0.9447, Adjusted R-squared: 0.9417
## F-statistic: 307.7 on 1 and 18 DF, p-value: 9.148e-13
# Notice that r2 is close to 1, so it looks like good fit
lmPlot <- ggplot(crimeData, aes(x = Year, y = overallRate)) +
geom_point(color="#006400") +
stat_smooth(method = "lm", col = "red") +
ggtitle("Linear model of Overall crime rate per 100,000 people in US") +
xlab("Years") + ylab("CrimeRate per 100,000 people")
lmPlot
# Predicting the values
p1 <- predict(model1, data.frame(Year=c(2014:2016)))
predictedDf <- data.frame(Year=2014:2016, overallRate=p1)
predictedDf
## Year overallRate
## 1 2014 647.2604
## 2 2015 622.5280
## 3 2016 597.7957
The following are the conclusion notes:
Notice that the overall crime rate per 100,000 people has been decreasing over the years
We have predicted that overall crime rate is going to decrease to 597.7957 in the year 2016
You can access this Html from http://rpubs.com/dalonlobo/fbicrimerate