library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.6.1
library(foreign)
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 3.6.1
## Loading required package: magrittr
## Warning: package 'magrittr' was built under R version 3.6.1
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
##
## extract
library(tidyr)
library(plyr)
## Warning: package 'plyr' was built under R version 3.6.1
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following object is masked from 'package:ggpubr':
##
## mutate
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
library(scales)
## Warning: package 'scales' was built under R version 3.6.1
library(zoo)
## Warning: package 'zoo' was built under R version 3.6.1
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.6.1
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:plyr':
##
## here
## The following object is masked from 'package:base':
##
## date
crime <- read.csv("crime.csv")
View(crime)
crime <- crime[-c(15,16,17)]
#removing the NA values
crime <- na.omit(crime)
#summary of the dataset
summary(crime)
## INCIDENT_NUMBER OFFENSE_CODE
## I162030584: 13 Min. : 111
## I152080623: 11 1st Qu.: 802
## I172013170: 10 Median :2907
## I172096394: 10 Mean :2291
## I182065208: 10 3rd Qu.:3201
## I162001871: 9 Max. :3831
## (Other) :298760
## OFFENSE_CODE_GROUP
## Motor Vehicle Accident Response: 30558
## Larceny : 25256
## Medical Assistance : 22491
## Investigate Person : 18063
## Other : 17157
## Simple Assault : 15002
## (Other) :170296
## OFFENSE_DESCRIPTION DISTRICT
## INVESTIGATE PERSON : 18067 B2 :46561
## SICK/INJURED/MEDICAL - PERSON : 17923 C11 :40991
## VANDALISM : 14624 D4 :38530
## M/V - LEAVING SCENE - PROPERTY DAMAGE: 14447 B3 :33939
## ASSAULT SIMPLE - BATTERY : 14011 A1 :32274
## VERBAL DISPUTE : 12972 C6 :22050
## (Other) :206779 (Other):84478
## REPORTING_AREA SHOOTING OCCURRED_ON_DATE YEAR
## Min. : 0.0 :297833 2017-06-01 00:00:00: 28 Min. :2015
## 1st Qu.:177.0 Y: 990 2015-07-01 00:00:00: 26 1st Qu.:2016
## Median :344.0 2016-08-01 00:00:00: 24 Median :2017
## Mean :383.2 2015-06-18 05:00:00: 22 Mean :2017
## 3rd Qu.:544.0 2017-01-01 00:00:00: 21 3rd Qu.:2017
## Max. :962.0 2017-08-01 00:00:00: 21 Max. :2018
## (Other) :298681
## MONTH DAY_OF_WEEK HOUR UCR_PART
## Min. : 1.000 Friday :45445 Min. : 0.00 : 90
## 1st Qu.: 4.000 Monday :42799 1st Qu.: 9.00 Other : 1171
## Median : 7.000 Saturday :41893 Median :14.00 Part One : 59319
## Mean : 6.612 Sunday :37912 Mean :13.12 Part Three:146858
## 3rd Qu.: 9.000 Thursday :43668 3rd Qu.:18.00 Part Two : 91385
## Max. :12.000 Tuesday :43327 Max. :23.00
## Wednesday:43779
## STREET
## WASHINGTON ST : 14144
## BLUE HILL AVE : 7103
## BOYLSTON ST : 6972
## DORCHESTER AVE: 5035
## TREMONT ST : 4668
## HARRISON AVE : 4440
## (Other) :256461
#single incidented id recored for more than one offense
summary(crime$INCIDENT_NUMBER)
## I162030584 I152080623 I172013170 I172096394 I182065208
## 13 11 10 10 10
## I162001871 I162071327 I162098170 I172054429 I172056883
## 9 9 9 9 9
## I130041200-00 I152076465 I152105431 I162056703 I162064331
## 8 8 8 8 8
## I162078338 I162082917 I162087224 I172053616 I172069723
## 8 8 8 8 8
## I152055687 I152057379 I152061219 I152066520 I152067057
## 7 7 7 7 7
## I152071480 I152072690 I152081203 I152091216 I152095733
## 7 7 7 7 7
## I152096998 I152098022 I162003538 I162010747 I162018523
## 7 7 7 7 7
## I162045680 I162050011 I162054378 I162063389 I162066003
## 7 7 7 7 7
## I162067346 I162068784 I162083089 I162091005 I162095648
## 7 7 7 7 7
## I172018004 I172018939 I172034576 I172035545 I172039629
## 7 7 7 7 7
## I172049351 I172049837 I172051375 I172073130 I172077319
## 7 7 7 7 7
## I172090526 I182000755 I182004372 I182033082 I182048995
## 7 7 7 7 7
## I182056728 I152053080 I152054996 I152055981 I152058024
## 7 6 6 6 6
## I152063289 I152064262 I152064440 I152069051 I152069565
## 6 6 6 6 6
## I152070360 I152070497 I152073806 I152078014 I152078189
## 6 6 6 6 6
## I152081150 I152081170 I152083209 I152090153 I152094958
## 6 6 6 6 6
## I152095325 I152095592 I152101251 I152104401 I152105025
## 6 6 6 6 6
## I152105059 I162000603 I162001102 I162001325 I162002070
## 6 6 6 6 6
## I162002959 I162003106 I162004797 I162004842 I162007562
## 6 6 6 6 6
## I162007649 I162014789 I162015180 I162019520 (Other)
## 6 6 6 6 298129
#storing the data with shooting in a seperate dataset
summary(crime$SHOOTING)
## Y
## 297833 990
crime$SHOOTING <- factor(crime$SHOOTING)
levels(crime$SHOOTING)
## [1] "" "Y"
shooting_crime <- crime %>% filter(crime$SHOOTING=='Y')
#removing the out layers
summary(crime$UCR_PART)
## Other Part One Part Three Part Two
## 90 1171 59319 146858 91385
crime <- crime %>% filter(crime$UCR_PART!='')
crime <- crime %>% filter(crime$UCR_PART!='Other')
summary(crime$DISTRICT)
## A1 A15 A7 B2 B3 C11 C6 D14 D4 E13 E18
## 27 32208 6134 12992 46321 33763 40786 21931 19004 38398 16565 16636
## E5
## 12797
crime <- crime %>% filter(crime$DISTRICT!='')
summary(crime$OFFENSE_DESCRIPTION)
## INVESTIGATE PERSON
## 18063
## SICK/INJURED/MEDICAL - PERSON
## 17923
## VANDALISM
## 14623
## M/V - LEAVING SCENE - PROPERTY DAMAGE
## 14446
## ASSAULT SIMPLE - BATTERY
## 14011
## VERBAL DISPUTE
## 12972
## TOWED MOTOR VEHICLE
## 10744
## INVESTIGATE PROPERTY
## 10601
## LARCENY THEFT FROM BUILDING
## 8951
## THREATS TO DO BODILY HARM
## 8832
## PROPERTY - LOST
## 8507
## LARCENY THEFT FROM MV - NON-ACCESSORY
## 8393
## LARCENY SHOPLIFTING
## 7848
## WARRANT ARREST
## 7681
## LARCENY ALL OTHERS
## 5685
## M/V ACCIDENT - PROPERTY Â DAMAGE
## 5314
## ASSAULT - AGGRAVATED - BATTERY
## 4596
## FRAUD - FALSE PRETENSE / SCHEME
## 4319
## MISSING PERSON - LOCATED
## 4308
## HARASSMENT
## 3955
## M/V ACCIDENT - PERSONAL INJURY
## 3784
## MISSING PERSON
## 3724
## AUTO THEFT
## 3456
## PROPERTY - FOUND
## 3433
## TRESPASSING
## 3127
## FRAUD - CREDIT CARD / ATM FRAUD
## 3101
## ROBBERY - STREET
## 2772
## ASSAULT - AGGRAVATED
## 2732
## BURGLARY - RESIDENTIAL - FORCE
## 2579
## VAL - VIOLATION OF AUTO LAW - OTHER
## 2491
## SERVICE TO OTHER PD INSIDE OF MA.
## 2359
## BURGLARY - RESIDENTIAL - NO FORCE
## 2341
## DRUGS - POSS CLASS B - COCAINE, ETC.
## 2188
## M/V ACCIDENT - OTHER
## 2188
## LARCENY THEFT OF BICYCLE
## 2147
## VAL - OPERATING AFTER REV/SUSP.
## 2134
## DRUGS - POSS CLASS B - INTENT TO MFR DIST DISP
## 1897
## LARCENY THEFT OF MV PARTS & ACCESSORIES
## 1885
## DRUGS - SALE / MANUFACTURING
## 1822
## LICENSE PREMISE VIOLATION
## 1658
## VAL - OPERATING WITHOUT LICENSE
## 1619
## FORGERY / COUNTERFEITING
## 1430
## SUDDEN DEATH
## 1338
## VIOL. OF RESTRAINING ORDER W NO ARREST
## 1324
## M/V ACCIDENT INVOLVING PEDESTRIAN - INJURY
## 1311
## SICK/INJURED/MEDICAL - POLICE
## 1300
## DISORDERLY CONDUCT
## 1284
## DRUGS - POSS CLASS A - HEROIN, ETC.
## 1272
## FIRE REPORT - HOUSE, BUILDING, ETC.
## 1254
## DRUGS - POSS CLASS A - INTENT TO MFR DIST DISP
## 1212
## DRUGS - OTHER
## 1200
## DEATH INVESTIGATION
## 1178
## DRUGS - SICK ASSIST - HEROIN
## 1133
## FRAUD - IMPERSONATION
## 1043
## M/V - LEAVING SCENE - PERSONAL INJURY
## 1024
## LANDLORD - TENANT SERVICE
## 965
## BALLISTICS EVIDENCE/FOUND
## 938
## SEARCH WARRANT
## 937
## ASSAULT - SIMPLE
## 918
## STOLEN PROPERTY - BUYING / RECEIVING / POSSESSING
## 912
## BURGLARY - COMMERICAL - FORCE
## 906
## VAL - OPERATING UNREG/UNINS Â CAR
## 878
## PROPERTY - ACCIDENTAL DAMAGE
## 864
## PROPERTY - MISSING
## 846
## WEAPON - FIREARM - CARRYING / POSSESSING, ETC
## 811
## AUTO THEFT - MOTORCYCLE / SCOOTER
## 791
## DISTURBING THE PEACE
## 777
## DRUGS - POSS CLASS D
## 775
## LIQUOR - DRINKING IN PUBLIC
## 725
## DRUGS - POSS CLASS D - INTENT TO MFR DIST DISP
## 720
## FIREARM/WEAPON - FOUND OR CONFISCATED
## 674
## BURGLARY - RESIDENTIAL - ATTEMPT
## 657
## M/V ACCIDENT - INVOLVING Â BICYCLE - INJURY
## 655
## M/V ACCIDENT - POLICE VEHICLE
## 618
## NOISY PARTY/RADIO-NO ARREST
## 610
## ROBBERY - OTHER
## 608
## OTHER OFFENSE
## 597
## M/V ACCIDENT - OTHER CITY VEHICLE
## 587
## MISSING PERSON - NOT REPORTED - LOCATED
## 577
## WEAPON - OTHER - CARRYING / POSSESSING, ETC
## 553
## ROBBERY - COMMERCIAL
## 549
## DRUGS - POSS CLASS E
## 517
## FIRE REPORT - CAR, BRUSH, ETC.
## 475
## M/V PLATES - LOST
## 473
## VAL - OPERATING W/O AUTHORIZATION LAWFUL
## 436
## PROPERTY - STOLEN THEN RECOVERED
## 435
## LARCENY PICK-POCKET
## 410
## OPERATING UNDER THE INFLUENCE ALCOHOL
## 403
## EVADING FARE
## 392
## DRUGS - POSS CLASS C
## 391
## AUTO THEFT - LEASED/RENTED VEHICLE
## 384
## ANIMAL CONTROL - DOG BITES - ETC.
## 356
## SUICIDE / SUICIDE ATTEMPT
## 342
## FRAUD - WIRE
## 328
## DRUGS - SICK ASSIST - OTHER HARMFUL DRUG
## 323
## VIOLATION - CITY ORDINANCE
## 321
## M/V ACCIDENT - INVOLVING BICYCLE - NO INJURY
## 316
## M/V ACCIDENT - INVOLVING PEDESTRIAN - NO INJURY
## 311
## BURGLARY - COMMERICAL - NO FORCE
## 304
## (Other)
## 8658
crime$OFFENSE_DESCRIPTION=factor(crime$OFFENSE_DESCRIPTION)
#mutate the dataset
crime <- crime %>% mutate(DAY_NIGHT=ifelse((crime$HOUR>=6 & crime$HOUR<=18),"DAY","NIGHT"))
crime <- crime %>% mutate(WEEK_END=ifelse(crime$DAY_OF_WEEK %in% c('Saturday','Sunday'),"Yes","No"))
#we need to use '%in%' to compare a vector of length > 1
#frequency of offense code group
options(scipen=999)
#1
ggplot(crime,aes(x=crime$OFFENSE_CODE_GROUP,y=length(crime$OFFENSE_DESCRIPTION)))+geom_col()+theme(axis.text.x = element_text(angle = 90, hjust = 1))+labs(title = "Frequency of offense code groups",x ="Offense Code Group",y="Frequency")

#offense code with UCR_PART
#2
ggplot(data=crime,aes(x=OFFENSE_CODE,fill=UCR_PART))+geom_bar(binwidth = 70,position='dodge')+ggtitle("Frequency Of Offense code")+ylab("Frequency")
## Warning: `geom_bar()` no longer has a `binwidth` parameter. Please use
## `geom_histogram()` instead.

#pie charts for year,month, day of week and hours
#crimes occured in each year
crime$YEAR <- factor(crime$YEAR)
levels(crime$YEAR)
## [1] "2015" "2016" "2017" "2018"
crimes_in_year <- data.frame(table(crime$YEAR))
names(crimes_in_year) <- c("year","Freq")
library(scales)
#3
ggplot(crimes_in_year, aes(x="", y=Freq, fill=year))+
geom_bar(width = 1, stat = "identity",color = "white")+coord_polar("y", start=0)+
theme_minimal()+
theme(axis.title.x = element_blank(),axis.title.y = element_blank(),axis.text.x=element_blank())+
geom_text(aes(y = Freq/2 + c(0, cumsum(Freq)[-length(Freq)]),label = percent((Freq/sum(Freq)))), size=5)+
labs(title="Crime Percentage In Each Year")

#crimes occured in each month
crime$MONTH <-factor(crime$MONTH)
levels(crime$MONTH)
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12"
crimes_in_month <- data.frame(table(crime$MONTH))
names(crimes_in_month) <- c("month","Freq")
crimes_in_month <- data.frame(crimes_in_month,month_name=c('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'))
#4
ggplot(crimes_in_month, aes(x="", y=Freq, fill=month_name))+
geom_bar(width = 1, stat = "identity",color = "white")+coord_polar("y", start=0)+
theme_minimal()+
theme(axis.title.x = element_blank(),axis.title.y = element_blank(),axis.text.x=element_blank())+
geom_text(aes(y = Freq/2 + c(0, cumsum(Freq)[-length(Freq)]),label = percent((Freq/sum(Freq)))), size=5)+
labs(title="Crime Percentage In Each month")

#district
#5
crimes_in_district <- data.frame(table(crime$DISTRICT))
names(crimes_in_district) <- c("district","Freq")
ggplot(crimes_in_district, aes(x = district, y = Freq)) +
geom_bar(fill = "#0073C2FF", stat = "identity") +
geom_text(aes(label = Freq), vjust = -0.3) +
theme_pubclean()

#hour
#6
crimes_in_hour <- data.frame(table(crime$HOUR))
names(crimes_in_hour) <- c("hour","Freq")
ggplot(crimes_in_hour, aes(hour, Freq)) +
geom_linerange(aes(x = hour, ymin = 0, ymax = Freq), color = "lightgray", size = 1.5)+
geom_point(aes(color = hour), size = 3)+
ggpubr::color_palette()+
theme_pubclean()

#hour vs day_of_week
#7
t<-data.frame(table(crime$HOUR,crime$DAY_OF_WEEK))
ggplot(t,aes(Var1,Freq,fill=Var2))+geom_col(position = "dodge",color = "white")+facet_wrap(~Var2)+labs(title="HOUR Vs DAY_OF_WEEK",x="Hours",y="Frequency")

#UCR_PART
#8
library(ggridges)
## Warning: package 'ggridges' was built under R version 3.6.1
##
## Attaching package: 'ggridges'
## The following object is masked from 'package:ggplot2':
##
## scale_discrete_manual
crime$UCR_PART<-factor(crime$UCR_PART)
ucr_part <- data.frame(table(crime$UCR_PART))
names(ucr_part) <- c("part","Freq")
ggplot(ucr_part, aes(x = Freq)) +geom_area(aes(fill = part), color = "white", stat ="bin", bins = 30) +
scale_fill_manual(values = c("#00AFBB", "#E7B800","#D9210F"))+labs(title="UCR_PART Frequency",x="UCR_PART",y="Frequency")

#seperating the date
crime<-separate(crime,OCCURRED_ON_DATE,c("DATE","TIME"), sep = " ")
crime$DATE <- as.Date(crime$DATE)
#calender heat map
#9
calender <- crime
View(calender)
calender$weekday = as.POSIXlt(calender$DATE)$wday
calender$weekdayf<-factor(calender$weekday,levels=rev(0:6),labels=rev(c("Sun","Mon","Tue","Wed","Thu","Fri","Sat")),ordered=TRUE)
calender$monthf<-factor(month(calender$DATE),levels=as.character(1:12),labels=c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"),ordered=TRUE)
calender$yearmonth<- factor(as.yearmon(calender$DATE))
calender$week <- as.numeric(format(calender$DATE,"%W"))
calender<-ddply(calender,.(yearmonth),transform,monthweek=1+week-min(week))
temp <- data.frame(table(crime$DATE))
names(temp) <- c("DATE","CRIME_FREQ")
temp$DATE <- as.Date(temp$DATE)
calender <- calender %>% inner_join(temp)
## Joining, by = "DATE"
ggplot(calender, aes(monthweek, weekdayf, fill = calender$CRIME_FREQ)) +
geom_tile(colour = "white") +
facet_grid(year(calender$DATE)~monthf) +
scale_fill_gradient(low="red", high="green") +
xlab("Week of Month") + ylab("") +
ggtitle("Time-Series Calendar Heatmap") + labs(fill = "Crime Frequency")
