Frame

Study the crime pattern in the United States over the period of 1994 - 2013

Find the following:

  1. Comment on the pattern of overall crime rate per 100,000 people
  2. Predict the overall crime rate per 100,000 people in the year 2016

Aquire

# Loading the required libraries
library(readxl)
library(dplyr)
library(tidyr)
library(ggplot2)
library(plotly)

Refine and Transform

fbiData <- read_excel("crime_in_the_united_states__1994-2013.xls")
head(fbiData)
## # A tibble: 6 x 24
##                                               `Table 1`       X__1
##                                                   <chr>      <chr>
## 1                            Crime in the United States       <NA>
## 2 by Volume and Rate per 100,000 Inhabitants, 1994<U+0096>2013       <NA>
## 3                                                  Year Population
## 4                                                  1994  260327021
## 5                                                  1995  262803276
## 6                                                  1996  265228572
## # ... with 22 more variables: X__2 <chr>, X__3 <chr>, X__4 <chr>,
## #   X__5 <chr>, X__6 <chr>, X__7 <chr>, X__8 <chr>, X__9 <chr>,
## #   X__10 <chr>, X__11 <chr>, X__12 <chr>, X__13 <chr>, X__14 <chr>,
## #   X__15 <chr>, X__16 <chr>, X__17 <chr>, X__18 <chr>, X__19 <chr>,
## #   X__20 <lgl>, X__21 <lgl>, X__22 <lgl>, X__23 <lgl>
class(fbiData)
## [1] "tbl_df"     "tbl"        "data.frame"
# Removing unwanted rows
fbiData <- fbiData[3:23, 1:20]
#View(fbiData)

colNames <- fbiData[1,]
colNames
## # A tibble: 1 x 20
##   `Table 1`       X__1         X__2             X__3
##       <chr>      <chr>        <chr>            <chr>
## 1      Year Population ViolentCrime ViolentCrimeRate
## # ... with 16 more variables: X__4 <chr>, X__5 <chr>, X__6 <chr>,
## #   X__7 <chr>, X__8 <chr>, X__9 <chr>, X__10 <chr>, X__11 <chr>,
## #   X__12 <chr>, X__13 <chr>, X__14 <chr>, X__15 <chr>, X__16 <chr>,
## #   X__17 <chr>, X__18 <chr>, X__19 <chr>
colnames(fbiData) <- colNames

crimeData <- fbiData[-1,]
#View(crimeData)

crimeData[8,1] <- "2001"
crimeData[19,1] <- "2012"

crimeData[] %>% sapply(as.numeric) -> crimeData[]
tail(crimeData)
## # A tibble: 6 x 20
##    Year Population ViolentCrime ViolentCrimeRate
##   <dbl>      <dbl>        <dbl>            <dbl>
## 1  2008  304059724      1394461            458.6
## 2  2009  307006550      1325896            431.9
## 3  2010  309330219      1251248            404.5
## 4  2011  311587816      1206005            387.1
## 5  2012  313873685      1217057            387.8
## 6  2013  316128839      1163146            367.9
## # ... with 16 more variables: MurderAndNonnegligentManslaughter <dbl>,
## #   MurderAndNonnegligentManslaughterRate <dbl>, Rape <dbl>,
## #   RapeRate <dbl>, Robbery <dbl>, RobberyRate <dbl>,
## #   AggravatedAssault <dbl>, AggravatedAssaultRate <dbl>,
## #   PropertyCrime <dbl>, PropertyCrimeRate <dbl>, Burglary <dbl>,
## #   BurglaryRate <dbl>, LarcenyTheft <dbl>, LarcenyTheftRate <dbl>,
## #   MotorVehicleTheft <dbl>, MotorVehicleTheftRate <dbl>

Explore

populationGrowth <-
  ggplot(crimeData, aes(Year, Population, fill = Population)) + 
  geom_bar(stat = "identity") +
  ggtitle("Population growth between 1994 - 2013") +
  xlab("Years") + ylab("Population") +
  theme(
    plot.title = element_text(color="blue", size=14, face="bold.italic"),
    axis.text.x = element_text(angle=60, hjust=1),
    #axis.text.y = element_blank(),
    #axis.ticks.y = element_blank(),
    axis.title.x = element_text(color="blue", size=14, face="bold"),
    axis.title.y = element_text(color="blue", size=14, face="bold")
  )

ggplotly(populationGrowth)

From the above graph we notice that the population is growing over the years.

str(crimeData)
## Classes 'tbl_df', 'tbl' and 'data.frame':    20 obs. of  20 variables:
##  $ Year                                 : num  1994 1995 1996 1997 1998 ...
##  $ Population                           : num  2.60e+08 2.63e+08 2.65e+08 2.68e+08 2.70e+08 ...
##  $ ViolentCrime                         : num  1857670 1798792 1688540 1636096 1533887 ...
##  $ ViolentCrimeRate                     : num  714 684 637 611 568 ...
##  $ MurderAndNonnegligentManslaughter    : num  23326 21606 19645 18208 16974 ...
##  $ MurderAndNonnegligentManslaughterRate: num  9 8.2 7.4 6.8 6.3 5.7 5.5 5.6 5.6 5.7 ...
##  $ Rape                                 : num  102216 97470 96252 96153 93144 ...
##  $ RapeRate                             : num  39.3 37.1 36.3 35.9 34.5 32.8 32 31.8 33.1 32.3 ...
##  $ Robbery                              : num  618949 580509 535594 498534 447186 ...
##  $ RobberyRate                          : num  238 221 202 186 166 ...
##  $ AggravatedAssault                    : num  1113179 1099207 1037049 1023201 976583 ...
##  $ AggravatedAssaultRate                : num  428 418 391 382 361 ...
##  $ PropertyCrime                        : num  12131873 12063935 11805323 11558475 10951827 ...
##  $ PropertyCrimeRate                    : num  4660 4590 4451 4316 4052 ...
##  $ Burglary                             : num  2712774 2593784 2506400 2460526 2332735 ...
##  $ BurglaryRate                         : num  1042 987 945 919 863 ...
##  $ LarcenyTheft                         : num  7879812 7997710 7904685 7743760 7376311 ...
##  $ LarcenyTheftRate                     : num  3027 3043 2980 2892 2730 ...
##  $ MotorVehicleTheft                    : num  1539287 1472441 1394238 1354189 1242781 ...
##  $ MotorVehicleTheftRate                : num  591 560 526 506 460 ...
crimeData %>% mutate(overallRate = (
  ViolentCrimeRate + MurderAndNonnegligentManslaughterRate +
  RapeRate + RobberyRate + AggravatedAssaultRate +
  PropertyCrimeRate + BurglaryRate + LarcenyTheftRate +
  MotorVehicleTheftRate) / 9
) -> crimeData
overallRateGraph <-
  ggplot(crimeData, aes(Year, overallRate)) + 
  geom_line(color="green") +
  ggtitle("Overall crime rate per 100,000 people in US") +
  xlab("Years") + ylab("CrimeRate per 100,000 people") +
  theme(
    plot.title = element_text(color="blue", size=14, face="bold.italic"),
    axis.text.x = element_text(angle=60, hjust=1),
    axis.title.x = element_text(color="blue", size=10, face="bold"),
    axis.title.y = element_text(color="blue", size=10, face="bold")
  ) + geom_point(color=crimeData$overallRate)

ggplotly(overallRateGraph)

Notice that the overall crime rate per 100,000 people has been decreasing over the years

Model

model1 <- lm(overallRate~Year, data = crimeData)
summary(model1)
## 
## Call:
## lm(formula = overallRate ~ Year, data = crimeData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -76.891 -15.712   2.996  22.145  55.047 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 50458.21    2824.79   17.86 6.71e-13 ***
## Year          -24.73       1.41  -17.54 9.15e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 36.36 on 18 degrees of freedom
## Multiple R-squared:  0.9447, Adjusted R-squared:  0.9417 
## F-statistic: 307.7 on 1 and 18 DF,  p-value: 9.148e-13
# Notice that r2 is close to 1, so it looks like good fit

lmPlot <- ggplot(crimeData, aes(x = Year, y = overallRate)) + 
  geom_point(color="#006400") +
  stat_smooth(method = "lm", col = "red") +
  ggtitle("Linear model of Overall crime rate per 100,000 people in US") +
  xlab("Years") + ylab("CrimeRate per 100,000 people") 

lmPlot

# Predicting the values
p1 <- predict(model1, data.frame(Year=c(2014:2016)))
predictedDf <- data.frame(Year=2014:2016, overallRate=p1)
predictedDf
##   Year overallRate
## 1 2014    647.2604
## 2 2015    622.5280
## 3 2016    597.7957

Communicate

The following are the conclusion notes:

  1. Notice that the overall crime rate per 100,000 people has been decreasing over the years

  2. We have predicted that overall crime rate is going to decrease to 597.7957 in the year 2016

You can access this Html from http://rpubs.com/dalonlobo/fbicrimerate