crime = read.csv('/Users/ruiqianli/Desktop/EBAC_DABP/Day_2/data/crime\ case/crime.csv')
head(crime,4)
##         ID Case.Number             Date                  Block IUCR
## 1 10000092    HY189866 18/03/2015 19:44        047XX W OHIO ST 041A
## 2 10000094    HY190059 18/03/2015 23:00 066XX S MARSHFIELD AVE 4625
## 3 10000095    HY190052 18/03/2015 22:45  044XX S LAKE PARK AVE  486
## 4 10000096    HY190054 18/03/2015 22:30   051XX S MICHIGAN AVE  460
##    Primary.Type             Description Location.Description Arrest Domestic
## 1       BATTERY     AGGRAVATED: HANDGUN               STREET  FALSE    FALSE
## 2 OTHER OFFENSE        PAROLE VIOLATION               STREET   TRUE    FALSE
## 3       BATTERY DOMESTIC BATTERY SIMPLE            APARTMENT  FALSE     TRUE
## 4       BATTERY                  SIMPLE            APARTMENT  FALSE    FALSE
##   Beat District Ward Community.Area FBI.Code X.Coordinate Y.Coordinate Year
## 1 1111       11   28             25      04B      1144606      1903566 2015
## 2  725        7   15             67       26      1166468      1860715 2015
## 3  222        2    4             39      08B      1185075      1875622 2015
## 4  225        2    3             40      08B      1178033      1870804 2015
##         Updated.On Latitude Longitude                      Location
## 1 02/10/2018 15:50 41.89140 -87.74438 (41.891398861, -87.744384567)
## 2 02/10/2018 15:50 41.77337 -87.66532 (41.773371528, -87.665319468)
## 3 02/10/2018 15:50 41.81386 -87.59664  (41.81386068, -87.596642837)
## 4 02/10/2018 15:50 41.80080 -87.62262 (41.800802415, -87.622619343)

#Task1 Select the specified column in Figure 1

crime.figure1=subset(crime,select=c("ID","Date","Primary.Type","Location.Description","Arrest","Domestic","District","Year"))
head(crime.figure1)
##         ID             Date  Primary.Type Location.Description Arrest Domestic
## 1 10000092 18/03/2015 19:44       BATTERY               STREET  FALSE    FALSE
## 2 10000094 18/03/2015 23:00 OTHER OFFENSE               STREET   TRUE    FALSE
## 3 10000095 18/03/2015 22:45       BATTERY            APARTMENT  FALSE     TRUE
## 4 10000096 18/03/2015 22:30       BATTERY            APARTMENT  FALSE    FALSE
## 5 10000097 18/03/2015 21:00        ROBERY             SIDEWALK  FALSE    FALSE
## 6 10000098 18/03/2015 22:00       BATTERY            APARTMENT  FALSE    FALSE
##   District Year
## 1       11 2015
## 2        7 2015
## 3        2 2015
## 4        2 2015
## 5       11 2015
## 6        2 2015

Transform several column names

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
crime.figure2=rename(crime.figure1, Location=Location.Description, CrimeType=Primary.Type)
head(crime.figure2,4)
##         ID             Date     CrimeType  Location Arrest Domestic District
## 1 10000092 18/03/2015 19:44       BATTERY    STREET  FALSE    FALSE       11
## 2 10000094 18/03/2015 23:00 OTHER OFFENSE    STREET   TRUE    FALSE        7
## 3 10000095 18/03/2015 22:45       BATTERY APARTMENT  FALSE     TRUE        2
## 4 10000096 18/03/2015 22:30       BATTERY APARTMENT  FALSE    FALSE        2
##   Year
## 1 2015
## 2 2015
## 3 2015
## 4 2015

Create more columns

pacman::p_load(tidyverse, lubridate)
crime.figure2$Date = ymd_hms(crime.figure2$Date)
crime.figure2$Date2 = date(crime.figure2$Date)
crime.figure2$time = hms(format(crime.figure2$Date, format='%T') )
crime.figure2$weekday = weekdays((crime.figure2$Date2))
crime.figure2$week = week((crime.figure2$Date2))
crime.figure2$weekend = as.factor(ifelse(crime.figure2$weekday %in% c("Saturday", "Sunday"), "we
ekend", "weekday"))
head(crime.figure2)
##         ID                Date     CrimeType  Location Arrest Domestic District
## 1 10000092 2018-03-20 15:19:44       BATTERY    STREET  FALSE    FALSE       11
## 2 10000094 2018-03-20 15:23:00 OTHER OFFENSE    STREET   TRUE    FALSE        7
## 3 10000095 2018-03-20 15:22:45       BATTERY APARTMENT  FALSE     TRUE        2
## 4 10000096 2018-03-20 15:22:30       BATTERY APARTMENT  FALSE    FALSE        2
## 5 10000097 2018-03-20 15:21:00        ROBERY  SIDEWALK  FALSE    FALSE       11
## 6 10000098 2018-03-20 15:22:00       BATTERY APARTMENT  FALSE    FALSE        2
##   Year      Date2        time weekday week weekend
## 1 2015 2018-03-20 15H 19M 44S Tuesday   12 weekday
## 2 2015 2018-03-20  15H 23M 0S Tuesday   12 weekday
## 3 2015 2018-03-20 15H 22M 45S Tuesday   12 weekday
## 4 2015 2018-03-20 15H 22M 30S Tuesday   12 weekday
## 5 2015 2018-03-20  15H 21M 0S Tuesday   12 weekday
## 6 2015 2018-03-20  15H 22M 0S Tuesday   12 weekday

Create breaks and labels for the breaks

breaks = hour(hm("00:00", "6:00", "12:00", "18:00", "23:59"))
breaks
## [1]  0  6 12 18 23
labels = c("Night", "Morning", "Afternoon", "Evening")
crime.figure2$Time_of_day = cut(x=hour(crime.figure2$time), breaks = breaks, labels = labels, include.lowest=TRUE)
head(crime.figure2,4)
##         ID                Date     CrimeType  Location Arrest Domestic District
## 1 10000092 2018-03-20 15:19:44       BATTERY    STREET  FALSE    FALSE       11
## 2 10000094 2018-03-20 15:23:00 OTHER OFFENSE    STREET   TRUE    FALSE        7
## 3 10000095 2018-03-20 15:22:45       BATTERY APARTMENT  FALSE     TRUE        2
## 4 10000096 2018-03-20 15:22:30       BATTERY APARTMENT  FALSE    FALSE        2
##   Year      Date2        time weekday week weekend Time_of_day
## 1 2015 2018-03-20 15H 19M 44S Tuesday   12 weekday   Afternoon
## 2 2015 2018-03-20  15H 23M 0S Tuesday   12 weekday   Afternoon
## 3 2015 2018-03-20 15H 22M 45S Tuesday   12 weekday   Afternoon
## 4 2015 2018-03-20 15H 22M 30S Tuesday   12 weekday   Afternoon

Delete a column and change the order of a column

crime.figure2=crime.figure2[,-2]
head(crime.figure2,4)
##         ID     CrimeType  Location Arrest Domestic District Year      Date2
## 1 10000092       BATTERY    STREET  FALSE    FALSE       11 2015 2018-03-20
## 2 10000094 OTHER OFFENSE    STREET   TRUE    FALSE        7 2015 2018-03-20
## 3 10000095       BATTERY APARTMENT  FALSE     TRUE        2 2015 2018-03-20
## 4 10000096       BATTERY APARTMENT  FALSE    FALSE        2 2015 2018-03-20
##          time weekday week weekend Time_of_day
## 1 15H 19M 44S Tuesday   12 weekday   Afternoon
## 2  15H 23M 0S Tuesday   12 weekday   Afternoon
## 3 15H 22M 45S Tuesday   12 weekday   Afternoon
## 4 15H 22M 30S Tuesday   12 weekday   Afternoon
crime.figure2.final = crime.figure2[,c('ID','Date2','Time_of_day','weekday','weekend','week','CrimeType','Location','District','Arrest','Domestic')] 
crime.figure2.final$CrimeType[crime.figure2.final$CrimeType=='OTHER OFFENSE']='oth'
head(crime.figure2.final,4)
##         ID      Date2 Time_of_day weekday weekend week CrimeType  Location
## 1 10000092 2018-03-20   Afternoon Tuesday weekday   12   BATTERY    STREET
## 2 10000094 2018-03-20   Afternoon Tuesday weekday   12       oth    STREET
## 3 10000095 2018-03-20   Afternoon Tuesday weekday   12   BATTERY APARTMENT
## 4 10000096 2018-03-20   Afternoon Tuesday weekday   12   BATTERY APARTMENT
##   District Arrest Domestic
## 1       11  FALSE    FALSE
## 2        7   TRUE    FALSE
## 3        2  FALSE     TRUE
## 4        2  FALSE    FALSE

#Task2 To study the crime pattern over time the data needs to be prepared in a time series format. Take the data from task-1 and further prepare it to in a format that has crime incident counts aggregated yearly by week for every district as shown in fig-3

df_tseries = crime.figure2 %>%
group_by(Year,week, District)%>%
summarise(count = n())
## `summarise()` has grouped output by 'Year', 'week'. You can override using the `.groups` argument.
head(df_tseries,4)
## # A tibble: 4 x 4
## # Groups:   Year, week [1]
##    Year  week District count
##   <int> <dbl>    <int> <int>
## 1  2011     3        1   869
## 2  2011     3        2  1194
## 3  2011     3        3  1400
## 4  2011     3        4  1598