Loading packages
suppressMessages(library(data.table))
suppressMessages(library(tidyverse))
suppressMessages(library(ggmap))
suppressMessages(library(maps))
suppressMessages(library(mapdata))
suppressMessages(library(lubridate))
suppressMessages(library(ggrepel))
suppressMessages(library(varhandle))
Loading data:
crime.chi.15<-as.data.frame(fread("/Users/codethedral/Google Drive/MScA/MSCA_31006 Time Series Analysis & Forecasting/Chicago Crime TS/DATA/Crimes_-_2015.csv", na.strings = c("NA")))
crime.chi.16<-as.data.frame(fread("/Users/codethedral/Google Drive/MScA/MSCA_31006 Time Series Analysis & Forecasting/Chicago Crime TS/DATA/Crimes_-_2016.csv", na.strings = c("NA")))
crime.chi.17<-as.data.frame(fread("/Users/codethedral/Google Drive/MScA/MSCA_31006 Time Series Analysis & Forecasting/Chicago Crime TS/DATA/Crimes_-_2017.csv", na.strings = c("NA")))
crime.chi.18<-as.data.frame(fread("/Users/codethedral/Google Drive/MScA/MSCA_31006 Time Series Analysis & Forecasting/Chicago Crime TS/DATA/Crimes_-_2018.csv", na.strings = c("NA")))
Combine all data sets:
crime.chi<-rbind(crime.chi.15, crime.chi.16, crime.chi.17, crime.chi.18)
glimpse(crime.chi)
## Observations: 973,228
## Variables: 22
## $ ID <int> 10000092, 10000094, 10000095, 10000096,...
## $ `Case Number` <chr> "HY189866", "HY190059", "HY190052", "HY...
## $ Date <chr> "03/18/2015 07:44:00 PM", "03/18/2015 1...
## $ Block <chr> "047XX W OHIO ST", "066XX S MARSHFIELD ...
## $ IUCR <chr> "041A", "4625", "0486", "0460", "031A",...
## $ `Primary Type` <chr> "BATTERY", "OTHER OFFENSE", "BATTERY", ...
## $ Description <chr> "AGGRAVATED: HANDGUN", "PAROLE VIOLATIO...
## $ `Location Description` <chr> "STREET", "STREET", "APARTMENT", "APART...
## $ Arrest <lgl> FALSE, TRUE, FALSE, FALSE, FALSE, FALSE...
## $ Domestic <lgl> FALSE, FALSE, TRUE, FALSE, FALSE, FALSE...
## $ Beat <int> 1111, 725, 222, 225, 1113, 223, 733, 21...
## $ District <int> 11, 7, 2, 2, 11, 2, 7, 2, 9, 5, 5, 6, 4...
## $ Ward <int> 28, 15, 4, 3, 28, 4, 17, 3, 11, 6, 9, 1...
## $ `Community Area` <int> 25, 67, 39, 40, 25, 39, 68, 38, 59, 49,...
## $ `FBI Code` <chr> "04B", "26", "08B", "08B", "03", "08B",...
## $ `X Coordinate` <int> 1144606, 1166468, 1185075, 1178033, 114...
## $ `Y Coordinate` <int> 1903566, 1860715, 1875622, 1870804, 189...
## $ Year <int> 2015, 2015, 2015, 2015, 2015, 2015, 201...
## $ `Updated On` <chr> "02/10/2018 03:50:01 PM", "02/10/2018 0...
## $ Latitude <dbl> 41.89140, 41.77337, 41.81386, 41.80080,...
## $ Longitude <dbl> -87.74438, -87.66532, -87.59664, -87.62...
## $ Location <chr> "(41.891398861, -87.744384567)", "(41.7...
Select relevant variables
crime.chi.selected<-select(crime.chi, 'Date','Block','Description','Arrest','Latitude', 'Longitude')
glimpse(crime.chi.selected)
## Observations: 973,228
## Variables: 6
## $ Date <chr> "03/18/2015 07:44:00 PM", "03/18/2015 11:00:00 PM"...
## $ Block <chr> "047XX W OHIO ST", "066XX S MARSHFIELD AVE", "044X...
## $ Description <chr> "AGGRAVATED: HANDGUN", "PAROLE VIOLATION", "DOMEST...
## $ Arrest <lgl> FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FA...
## $ Latitude <dbl> 41.89140, 41.77337, 41.81386, 41.80080, 41.87806, ...
## $ Longitude <dbl> -87.74438, -87.66532, -87.59664, -87.62262, -87.74...
Data cleaning:
#Split date and time
crime.chi.selected$Date2 <- sapply(strsplit(as.character(crime.chi.selected$Date), " "), "[", 1)
crime.chi.selected$Time2 <- sapply(strsplit(as.character(crime.chi.selected$Date), " "), "[", 2)
#Convert Date2 into date type
crime.chi.selected$Date2<- mdy(crime.chi.selected$Date2)
Change dates as required:
#Select only 2017
crime.selected.years<- filter(crime.chi.selected, Date2 >= as_date ("2017-01-01"), Date2 <= as_date("2017-12-30"))
Data cleaning:
#Separate date into year, month and day
crime.selected.years$year <- year(crime.selected.years$Date2)
crime.selected.years$month <- month(crime.selected.years$Date2)
crime.selected.years$day <- day(crime.selected.years$Date2)
#Recode the Arrested variable into readable format
crime.selected.years$Arrest<-as.factor(crime.selected.years$Arrest)
crime.selected.years$Arrest<-recode(crime.selected.years$Arrest, 'TRUE'='Arrested', 'FALSE'='Not Arrested')
#Convert all character variables into factors
character_vars<-lapply(crime.selected.years, class) == "character"
crime.selected.years[, character_vars]<-lapply(crime.selected.years[, character_vars], as.factor)
glimpse(crime.selected.years)
## Observations: 267,048
## Variables: 11
## $ Date <fct> 09/21/2017 12:15:00 AM, 10/12/2017 07:14:00 PM, 10...
## $ Block <fct> 072XX N CALIFORNIA AVE, 055XX W GRAND AVE, 043XX S...
## $ Description <fct> COUNTERFEIT CHECK, TO CITY OF CHICAGO PROPERTY, SE...
## $ Arrest <fct> Arrested, Arrested, Arrested, Arrested, Arrested, ...
## $ Latitude <dbl> 42.01229, 41.91871, 41.81467, 41.93858, 41.89581, ...
## $ Longitude <dbl> -87.69971, -87.76551, -87.69073, -87.76583, -87.68...
## $ Date2 <date> 2017-09-21, 2017-10-12, 2017-10-30, 2017-09-29, 2...
## $ Time2 <fct> 12:15:00, 07:14:00, 11:52:00, 06:45:00, 06:20:00, ...
## $ year <dbl> 2017, 2017, 2017, 2017, 2017, 2017, 2017, 2017, 20...
## $ month <dbl> 9, 10, 10, 9, 12, 6, 6, 9, 11, 12, 12, 11, 12, 10,...
## $ day <int> 21, 12, 30, 29, 1, 8, 25, 6, 28, 11, 12, 22, 8, 8,...
Top 20 crimes in 2017
year.2017<- crime.selected.years %>%
filter(year == 2017)
group<- year.2017 %>%
group_by(Description) %>%
summarise(total = n ()) %>%
distinct() %>%
top_n(15)
## Selecting by total
group %>%
ggplot(aes(reorder(Description, total), y= total)) +
geom_col(fill = "sky blue") +
geom_label_repel(aes(label = total), size = 2) +
coord_flip() +
labs(title = 'Chicago Top 15 Crimes in 2017',
x = "Crime Description",
y = "Total")
Comparison of crimes that end in an arrest or not.
crime.arrest<- year.2017 %>%
group_by(Arrest, Description) %>%
summarise(total = n ()) %>%
top_n(15) %>%
na.omit()
## Selecting by total
crime.arrest %>%
ggplot(aes(reorder(x = Description, total), y = total)) +
geom_col(fill = "sky blue") +
geom_text(aes(label=total), color = "black", hjust = -0.1, size = 2) +
coord_flip() +
facet_wrap(~ Arrest) +
labs ( x = 'Total',
y = 'Crime Description')
Mapping the crime areas:
chi.map<-qmap(location = "Chicago", zoom = 12)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Chicago&zoom=12&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Chicago&sensor=false
## Warning: `panel.margin` is deprecated. Please use `panel.spacing` property
## instead
#Select variables for mapping