Loading packages

suppressMessages(library(data.table))
suppressMessages(library(tidyverse))
suppressMessages(library(ggmap))
suppressMessages(library(maps))
suppressMessages(library(mapdata))
suppressMessages(library(lubridate))
suppressMessages(library(ggrepel))
suppressMessages(library(varhandle))

Loading data:

crime.chi.15<-as.data.frame(fread("/Users/codethedral/Google Drive/MScA/MSCA_31006 Time Series Analysis & Forecasting/Chicago Crime TS/DATA/Crimes_-_2015.csv", na.strings = c("NA")))
crime.chi.16<-as.data.frame(fread("/Users/codethedral/Google Drive/MScA/MSCA_31006 Time Series Analysis & Forecasting/Chicago Crime TS/DATA/Crimes_-_2016.csv", na.strings = c("NA")))
crime.chi.17<-as.data.frame(fread("/Users/codethedral/Google Drive/MScA/MSCA_31006 Time Series Analysis & Forecasting/Chicago Crime TS/DATA/Crimes_-_2017.csv", na.strings = c("NA")))
crime.chi.18<-as.data.frame(fread("/Users/codethedral/Google Drive/MScA/MSCA_31006 Time Series Analysis & Forecasting/Chicago Crime TS/DATA/Crimes_-_2018.csv", na.strings = c("NA")))

Combine all data sets:

crime.chi<-rbind(crime.chi.15, crime.chi.16, crime.chi.17, crime.chi.18)
glimpse(crime.chi)
## Observations: 973,228
## Variables: 22
## $ ID                     <int> 10000092, 10000094, 10000095, 10000096,...
## $ `Case Number`          <chr> "HY189866", "HY190059", "HY190052", "HY...
## $ Date                   <chr> "03/18/2015 07:44:00 PM", "03/18/2015 1...
## $ Block                  <chr> "047XX W OHIO ST", "066XX S MARSHFIELD ...
## $ IUCR                   <chr> "041A", "4625", "0486", "0460", "031A",...
## $ `Primary Type`         <chr> "BATTERY", "OTHER OFFENSE", "BATTERY", ...
## $ Description            <chr> "AGGRAVATED: HANDGUN", "PAROLE VIOLATIO...
## $ `Location Description` <chr> "STREET", "STREET", "APARTMENT", "APART...
## $ Arrest                 <lgl> FALSE, TRUE, FALSE, FALSE, FALSE, FALSE...
## $ Domestic               <lgl> FALSE, FALSE, TRUE, FALSE, FALSE, FALSE...
## $ Beat                   <int> 1111, 725, 222, 225, 1113, 223, 733, 21...
## $ District               <int> 11, 7, 2, 2, 11, 2, 7, 2, 9, 5, 5, 6, 4...
## $ Ward                   <int> 28, 15, 4, 3, 28, 4, 17, 3, 11, 6, 9, 1...
## $ `Community Area`       <int> 25, 67, 39, 40, 25, 39, 68, 38, 59, 49,...
## $ `FBI Code`             <chr> "04B", "26", "08B", "08B", "03", "08B",...
## $ `X Coordinate`         <int> 1144606, 1166468, 1185075, 1178033, 114...
## $ `Y Coordinate`         <int> 1903566, 1860715, 1875622, 1870804, 189...
## $ Year                   <int> 2015, 2015, 2015, 2015, 2015, 2015, 201...
## $ `Updated On`           <chr> "02/10/2018 03:50:01 PM", "02/10/2018 0...
## $ Latitude               <dbl> 41.89140, 41.77337, 41.81386, 41.80080,...
## $ Longitude              <dbl> -87.74438, -87.66532, -87.59664, -87.62...
## $ Location               <chr> "(41.891398861, -87.744384567)", "(41.7...

Select relevant variables

crime.chi.selected<-select(crime.chi, 'Date','Block','Description','Arrest','Latitude', 'Longitude')
glimpse(crime.chi.selected)
## Observations: 973,228
## Variables: 6
## $ Date        <chr> "03/18/2015 07:44:00 PM", "03/18/2015 11:00:00 PM"...
## $ Block       <chr> "047XX W OHIO ST", "066XX S MARSHFIELD AVE", "044X...
## $ Description <chr> "AGGRAVATED: HANDGUN", "PAROLE VIOLATION", "DOMEST...
## $ Arrest      <lgl> FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FA...
## $ Latitude    <dbl> 41.89140, 41.77337, 41.81386, 41.80080, 41.87806, ...
## $ Longitude   <dbl> -87.74438, -87.66532, -87.59664, -87.62262, -87.74...

Data cleaning:

#Split date and time
crime.chi.selected$Date2 <- sapply(strsplit(as.character(crime.chi.selected$Date), " "), "[", 1)
crime.chi.selected$Time2 <- sapply(strsplit(as.character(crime.chi.selected$Date), " "), "[", 2)

#Convert Date2 into date type
crime.chi.selected$Date2<- mdy(crime.chi.selected$Date2)

Change dates as required:

#Select only 2017
crime.selected.years<- filter(crime.chi.selected, Date2 >= as_date ("2017-01-01"), Date2 <= as_date("2017-12-30"))

Data cleaning:

#Separate date into year, month and day
crime.selected.years$year <- year(crime.selected.years$Date2)
crime.selected.years$month <- month(crime.selected.years$Date2)
crime.selected.years$day <- day(crime.selected.years$Date2)

#Recode the Arrested variable into readable format
crime.selected.years$Arrest<-as.factor(crime.selected.years$Arrest)
crime.selected.years$Arrest<-recode(crime.selected.years$Arrest, 'TRUE'='Arrested', 'FALSE'='Not Arrested')

#Convert all character variables into factors
character_vars<-lapply(crime.selected.years, class) == "character"
crime.selected.years[, character_vars]<-lapply(crime.selected.years[, character_vars], as.factor)
glimpse(crime.selected.years)
## Observations: 267,048
## Variables: 11
## $ Date        <fct> 09/21/2017 12:15:00 AM, 10/12/2017 07:14:00 PM, 10...
## $ Block       <fct> 072XX N CALIFORNIA AVE, 055XX W GRAND AVE, 043XX S...
## $ Description <fct> COUNTERFEIT CHECK, TO CITY OF CHICAGO PROPERTY, SE...
## $ Arrest      <fct> Arrested, Arrested, Arrested, Arrested, Arrested, ...
## $ Latitude    <dbl> 42.01229, 41.91871, 41.81467, 41.93858, 41.89581, ...
## $ Longitude   <dbl> -87.69971, -87.76551, -87.69073, -87.76583, -87.68...
## $ Date2       <date> 2017-09-21, 2017-10-12, 2017-10-30, 2017-09-29, 2...
## $ Time2       <fct> 12:15:00, 07:14:00, 11:52:00, 06:45:00, 06:20:00, ...
## $ year        <dbl> 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 20...
## $ month       <dbl> 9, 10, 10, 9, 12, 6, 6, 9, 11, 12, 12, 11, 12, 10,...
## $ day         <int> 21, 12, 30, 29, 1, 8, 25, 6, 28, 11, 12, 22, 8, 8,...

Top 20 crimes in 2017

year.2017<- crime.selected.years %>%
  filter(year == 2017)

group<- year.2017 %>%
  group_by(Description) %>%
  summarise(total = n ()) %>%
  distinct() %>%
  top_n(15)
## Selecting by total
group %>%
  ggplot(aes(reorder(Description, total), y= total)) +
  geom_col(fill = "sky blue") +
  geom_label_repel(aes(label = total), size = 2) +
  coord_flip() +
  labs(title = 'Chicago Top 15 Crimes in 2017',
      x = "Crime Description",
      y = "Total")

Comparison of crimes that end in an arrest or not.

crime.arrest<- year.2017 %>%
  group_by(Arrest, Description) %>%
  summarise(total = n ()) %>%
  top_n(15) %>%
  na.omit()
## Selecting by total
crime.arrest %>%
  ggplot(aes(reorder(x = Description, total), y = total)) +
  geom_col(fill = "sky blue") +
  geom_text(aes(label=total), color = "black", hjust = -0.1, size = 2) +
  coord_flip() +
  facet_wrap(~ Arrest) +
  labs ( x = 'Total',
         y = 'Crime Description')

Mapping the crime areas:

chi.map<-qmap(location = "Chicago", zoom = 12)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Chicago&zoom=12&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Chicago&sensor=false
## Warning: `panel.margin` is deprecated. Please use `panel.spacing` property
## instead
#Select variables for mapping