library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.6.1
library(foreign)
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 3.6.1
## Loading required package: magrittr
## Warning: package 'magrittr' was built under R version 3.6.1
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
## 
##     extract
library(tidyr)
library(plyr)
## Warning: package 'plyr' was built under R version 3.6.1
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:ggpubr':
## 
##     mutate
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
library(scales)
## Warning: package 'scales' was built under R version 3.6.1
library(zoo)
## Warning: package 'zoo' was built under R version 3.6.1
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.6.1
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:plyr':
## 
##     here
## The following object is masked from 'package:base':
## 
##     date
crime <- read.csv("crime.csv")
View(crime)
crime <- crime[-c(15,16,17)]
#removing the NA values
crime <- na.omit(crime)
#summary of the dataset
summary(crime)
##    INCIDENT_NUMBER    OFFENSE_CODE 
##  I162030584:    13   Min.   : 111  
##  I152080623:    11   1st Qu.: 802  
##  I172013170:    10   Median :2907  
##  I172096394:    10   Mean   :2291  
##  I182065208:    10   3rd Qu.:3201  
##  I162001871:     9   Max.   :3831  
##  (Other)   :298760                 
##                        OFFENSE_CODE_GROUP
##  Motor Vehicle Accident Response: 30558  
##  Larceny                        : 25256  
##  Medical Assistance             : 22491  
##  Investigate Person             : 18063  
##  Other                          : 17157  
##  Simple Assault                 : 15002  
##  (Other)                        :170296  
##                             OFFENSE_DESCRIPTION    DISTRICT    
##  INVESTIGATE PERSON                   : 18067   B2     :46561  
##  SICK/INJURED/MEDICAL - PERSON        : 17923   C11    :40991  
##  VANDALISM                            : 14624   D4     :38530  
##  M/V - LEAVING SCENE - PROPERTY DAMAGE: 14447   B3     :33939  
##  ASSAULT SIMPLE - BATTERY             : 14011   A1     :32274  
##  VERBAL DISPUTE                       : 12972   C6     :22050  
##  (Other)                              :206779   (Other):84478  
##  REPORTING_AREA  SHOOTING              OCCURRED_ON_DATE       YEAR     
##  Min.   :  0.0    :297833   2017-06-01 00:00:00:    28   Min.   :2015  
##  1st Qu.:177.0   Y:   990   2015-07-01 00:00:00:    26   1st Qu.:2016  
##  Median :344.0              2016-08-01 00:00:00:    24   Median :2017  
##  Mean   :383.2              2015-06-18 05:00:00:    22   Mean   :2017  
##  3rd Qu.:544.0              2017-01-01 00:00:00:    21   3rd Qu.:2017  
##  Max.   :962.0              2017-08-01 00:00:00:    21   Max.   :2018  
##                             (Other)            :298681                 
##      MONTH           DAY_OF_WEEK         HOUR             UCR_PART     
##  Min.   : 1.000   Friday   :45445   Min.   : 0.00             :    90  
##  1st Qu.: 4.000   Monday   :42799   1st Qu.: 9.00   Other     :  1171  
##  Median : 7.000   Saturday :41893   Median :14.00   Part One  : 59319  
##  Mean   : 6.612   Sunday   :37912   Mean   :13.12   Part Three:146858  
##  3rd Qu.: 9.000   Thursday :43668   3rd Qu.:18.00   Part Two  : 91385  
##  Max.   :12.000   Tuesday  :43327   Max.   :23.00                      
##                   Wednesday:43779                                      
##             STREET      
##  WASHINGTON ST : 14144  
##  BLUE HILL AVE :  7103  
##  BOYLSTON ST   :  6972  
##  DORCHESTER AVE:  5035  
##  TREMONT ST    :  4668  
##  HARRISON AVE  :  4440  
##  (Other)       :256461
#single incidented id recored for more than one offense
summary(crime$INCIDENT_NUMBER)
##    I162030584    I152080623    I172013170    I172096394    I182065208 
##            13            11            10            10            10 
##    I162001871    I162071327    I162098170    I172054429    I172056883 
##             9             9             9             9             9 
## I130041200-00    I152076465    I152105431    I162056703    I162064331 
##             8             8             8             8             8 
##    I162078338    I162082917    I162087224    I172053616    I172069723 
##             8             8             8             8             8 
##    I152055687    I152057379    I152061219    I152066520    I152067057 
##             7             7             7             7             7 
##    I152071480    I152072690    I152081203    I152091216    I152095733 
##             7             7             7             7             7 
##    I152096998    I152098022    I162003538    I162010747    I162018523 
##             7             7             7             7             7 
##    I162045680    I162050011    I162054378    I162063389    I162066003 
##             7             7             7             7             7 
##    I162067346    I162068784    I162083089    I162091005    I162095648 
##             7             7             7             7             7 
##    I172018004    I172018939    I172034576    I172035545    I172039629 
##             7             7             7             7             7 
##    I172049351    I172049837    I172051375    I172073130    I172077319 
##             7             7             7             7             7 
##    I172090526    I182000755    I182004372    I182033082    I182048995 
##             7             7             7             7             7 
##    I182056728    I152053080    I152054996    I152055981    I152058024 
##             7             6             6             6             6 
##    I152063289    I152064262    I152064440    I152069051    I152069565 
##             6             6             6             6             6 
##    I152070360    I152070497    I152073806    I152078014    I152078189 
##             6             6             6             6             6 
##    I152081150    I152081170    I152083209    I152090153    I152094958 
##             6             6             6             6             6 
##    I152095325    I152095592    I152101251    I152104401    I152105025 
##             6             6             6             6             6 
##    I152105059    I162000603    I162001102    I162001325    I162002070 
##             6             6             6             6             6 
##    I162002959    I162003106    I162004797    I162004842    I162007562 
##             6             6             6             6             6 
##    I162007649    I162014789    I162015180    I162019520       (Other) 
##             6             6             6             6        298129
#storing the data with shooting in a seperate dataset
summary(crime$SHOOTING)
##             Y 
## 297833    990
crime$SHOOTING <- factor(crime$SHOOTING)
levels(crime$SHOOTING)
## [1] ""  "Y"
shooting_crime <- crime %>% filter(crime$SHOOTING=='Y')

#removing the out layers
summary(crime$UCR_PART)
##                 Other   Part One Part Three   Part Two 
##         90       1171      59319     146858      91385
crime <- crime %>% filter(crime$UCR_PART!='')
crime <- crime %>% filter(crime$UCR_PART!='Other')
summary(crime$DISTRICT)
##          A1   A15    A7    B2    B3   C11    C6   D14    D4   E13   E18 
##    27 32208  6134 12992 46321 33763 40786 21931 19004 38398 16565 16636 
##    E5 
## 12797
crime <- crime %>% filter(crime$DISTRICT!='')
summary(crime$OFFENSE_DESCRIPTION)
##                                INVESTIGATE PERSON 
##                                             18063 
##                     SICK/INJURED/MEDICAL - PERSON 
##                                             17923 
##                                         VANDALISM 
##                                             14623 
##             M/V - LEAVING SCENE - PROPERTY DAMAGE 
##                                             14446 
##                          ASSAULT SIMPLE - BATTERY 
##                                             14011 
##                                    VERBAL DISPUTE 
##                                             12972 
##                               TOWED MOTOR VEHICLE 
##                                             10744 
##                              INVESTIGATE PROPERTY 
##                                             10601 
##                       LARCENY THEFT FROM BUILDING 
##                                              8951 
##                         THREATS TO DO BODILY HARM 
##                                              8832 
##                                   PROPERTY - LOST 
##                                              8507 
##             LARCENY THEFT FROM MV - NON-ACCESSORY 
##                                              8393 
##                               LARCENY SHOPLIFTING 
##                                              7848 
##                                    WARRANT ARREST 
##                                              7681 
##                                LARCENY ALL OTHERS 
##                                              5685 
##                   M/V ACCIDENT - PROPERTY  DAMAGE 
##                                              5314 
##                    ASSAULT - AGGRAVATED - BATTERY 
##                                              4596 
##                   FRAUD - FALSE PRETENSE / SCHEME 
##                                              4319 
##                          MISSING PERSON - LOCATED 
##                                              4308 
##                                        HARASSMENT 
##                                              3955 
##                    M/V ACCIDENT - PERSONAL INJURY 
##                                              3784 
##                                    MISSING PERSON 
##                                              3724 
##                                        AUTO THEFT 
##                                              3456 
##                                  PROPERTY - FOUND 
##                                              3433 
##                                       TRESPASSING 
##                                              3127 
##                   FRAUD - CREDIT CARD / ATM FRAUD 
##                                              3101 
##                                  ROBBERY - STREET 
##                                              2772 
##                              ASSAULT - AGGRAVATED 
##                                              2732 
##                    BURGLARY - RESIDENTIAL - FORCE 
##                                              2579 
##               VAL - VIOLATION OF AUTO LAW - OTHER 
##                                              2491 
##                 SERVICE TO OTHER PD INSIDE OF MA. 
##                                              2359 
##                 BURGLARY - RESIDENTIAL - NO FORCE 
##                                              2341 
##              DRUGS - POSS CLASS B - COCAINE, ETC. 
##                                              2188 
##                              M/V ACCIDENT - OTHER 
##                                              2188 
##                          LARCENY THEFT OF BICYCLE 
##                                              2147 
##                   VAL - OPERATING AFTER REV/SUSP. 
##                                              2134 
##    DRUGS - POSS CLASS B - INTENT TO MFR DIST DISP 
##                                              1897 
##           LARCENY THEFT OF MV PARTS & ACCESSORIES 
##                                              1885 
##                      DRUGS - SALE / MANUFACTURING 
##                                              1822 
##                         LICENSE PREMISE VIOLATION 
##                                              1658 
##                   VAL - OPERATING WITHOUT LICENSE 
##                                              1619 
##                          FORGERY / COUNTERFEITING 
##                                              1430 
##                                      SUDDEN DEATH 
##                                              1338 
##            VIOL. OF RESTRAINING ORDER W NO ARREST 
##                                              1324 
##        M/V ACCIDENT INVOLVING PEDESTRIAN - INJURY 
##                                              1311 
##                     SICK/INJURED/MEDICAL - POLICE 
##                                              1300 
##                                DISORDERLY CONDUCT 
##                                              1284 
##              DRUGS - POSS CLASS A - HEROIN, ETC.  
##                                              1272 
##               FIRE REPORT - HOUSE, BUILDING, ETC. 
##                                              1254 
##    DRUGS - POSS CLASS A - INTENT TO MFR DIST DISP 
##                                              1212 
##                                     DRUGS - OTHER 
##                                              1200 
##                               DEATH INVESTIGATION 
##                                              1178 
##                      DRUGS - SICK ASSIST - HEROIN 
##                                              1133 
##                             FRAUD - IMPERSONATION 
##                                              1043 
##             M/V - LEAVING SCENE - PERSONAL INJURY 
##                                              1024 
##                         LANDLORD - TENANT SERVICE 
##                                               965 
##                         BALLISTICS EVIDENCE/FOUND 
##                                               938 
##                                    SEARCH WARRANT 
##                                               937 
##                                  ASSAULT - SIMPLE 
##                                               918 
## STOLEN PROPERTY - BUYING / RECEIVING / POSSESSING 
##                                               912 
##                     BURGLARY - COMMERICAL - FORCE 
##                                               906 
##                  VAL - OPERATING UNREG/UNINS  CAR 
##                                               878 
##                      PROPERTY - ACCIDENTAL DAMAGE 
##                                               864 
##                                PROPERTY - MISSING 
##                                               846 
##     WEAPON - FIREARM - CARRYING / POSSESSING, ETC 
##                                               811 
##                 AUTO THEFT - MOTORCYCLE / SCOOTER 
##                                               791 
##                              DISTURBING THE PEACE 
##                                               777 
##                              DRUGS - POSS CLASS D 
##                                               775 
##                       LIQUOR - DRINKING IN PUBLIC 
##                                               725 
##    DRUGS - POSS CLASS D - INTENT TO MFR DIST DISP 
##                                               720 
##             FIREARM/WEAPON - FOUND OR CONFISCATED 
##                                               674 
##                  BURGLARY - RESIDENTIAL - ATTEMPT 
##                                               657 
##        M/V ACCIDENT - INVOLVING  BICYCLE - INJURY 
##                                               655 
##                     M/V ACCIDENT - POLICE VEHICLE 
##                                               618 
##                       NOISY PARTY/RADIO-NO ARREST 
##                                               610 
##                                   ROBBERY - OTHER 
##                                               608 
##                                     OTHER OFFENSE 
##                                               597 
##                 M/V ACCIDENT - OTHER CITY VEHICLE 
##                                               587 
##           MISSING PERSON - NOT REPORTED - LOCATED 
##                                               577 
##       WEAPON - OTHER - CARRYING / POSSESSING, ETC 
##                                               553 
##                              ROBBERY - COMMERCIAL 
##                                               549 
##                              DRUGS - POSS CLASS E 
##                                               517 
##                    FIRE REPORT - CAR, BRUSH, ETC. 
##                                               475 
##                                 M/V PLATES - LOST 
##                                               473 
##          VAL - OPERATING W/O AUTHORIZATION LAWFUL 
##                                               436 
##                  PROPERTY - STOLEN THEN RECOVERED 
##                                               435 
##                               LARCENY PICK-POCKET 
##                                               410 
##             OPERATING UNDER THE INFLUENCE ALCOHOL 
##                                               403 
##                                      EVADING FARE 
##                                               392 
##                              DRUGS - POSS CLASS C 
##                                               391 
##                AUTO THEFT - LEASED/RENTED VEHICLE 
##                                               384 
##                 ANIMAL CONTROL - DOG BITES - ETC. 
##                                               356 
##                         SUICIDE / SUICIDE ATTEMPT 
##                                               342 
##                                      FRAUD - WIRE 
##                                               328 
##          DRUGS - SICK ASSIST - OTHER HARMFUL DRUG 
##                                               323 
##                        VIOLATION - CITY ORDINANCE 
##                                               321 
##      M/V ACCIDENT - INVOLVING BICYCLE - NO INJURY 
##                                               316 
##   M/V ACCIDENT - INVOLVING PEDESTRIAN - NO INJURY 
##                                               311 
##                  BURGLARY - COMMERICAL - NO FORCE 
##                                               304 
##                                           (Other) 
##                                              8658
crime$OFFENSE_DESCRIPTION=factor(crime$OFFENSE_DESCRIPTION)

#mutate the dataset
crime <- crime %>% mutate(DAY_NIGHT=ifelse((crime$HOUR>=6 & crime$HOUR<=18),"DAY","NIGHT"))
crime <- crime %>% mutate(WEEK_END=ifelse(crime$DAY_OF_WEEK %in% c('Saturday','Sunday'),"Yes","No"))
#we need to use  '%in%' to compare a vector of length > 1

#frequency of offense code group
options(scipen=999)
#1
ggplot(crime,aes(x=crime$OFFENSE_CODE_GROUP,y=length(crime$OFFENSE_DESCRIPTION)))+geom_col()+theme(axis.text.x = element_text(angle = 90, hjust = 1))+labs(title = "Frequency of offense code groups",x ="Offense Code Group",y="Frequency")

#offense code with UCR_PART
#2
ggplot(data=crime,aes(x=OFFENSE_CODE,fill=UCR_PART))+geom_bar(binwidth = 70,position='dodge')+ggtitle("Frequency Of Offense code")+ylab("Frequency")
## Warning: `geom_bar()` no longer has a `binwidth` parameter. Please use
## `geom_histogram()` instead.

#pie charts for year,month, day of week and hours

#crimes occured in each year
crime$YEAR <- factor(crime$YEAR)
levels(crime$YEAR)
## [1] "2015" "2016" "2017" "2018"
crimes_in_year <- data.frame(table(crime$YEAR))
names(crimes_in_year) <- c("year","Freq")
library(scales)
#3
ggplot(crimes_in_year, aes(x="", y=Freq, fill=year))+
  geom_bar(width = 1, stat = "identity",color = "white")+coord_polar("y", start=0)+
  theme_minimal()+
  theme(axis.title.x = element_blank(),axis.title.y = element_blank(),axis.text.x=element_blank())+
  geom_text(aes(y = Freq/2 + c(0, cumsum(Freq)[-length(Freq)]),label = percent((Freq/sum(Freq)))), size=5)+
  labs(title="Crime Percentage In Each Year")

#crimes occured in each month
crime$MONTH <-factor(crime$MONTH)
levels(crime$MONTH)
##  [1] "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "11" "12"
crimes_in_month <- data.frame(table(crime$MONTH))
names(crimes_in_month) <- c("month","Freq")
crimes_in_month <- data.frame(crimes_in_month,month_name=c('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'))
#4
ggplot(crimes_in_month, aes(x="", y=Freq, fill=month_name))+
  geom_bar(width = 1, stat = "identity",color = "white")+coord_polar("y", start=0)+
  theme_minimal()+
  theme(axis.title.x = element_blank(),axis.title.y = element_blank(),axis.text.x=element_blank())+
  geom_text(aes(y = Freq/2 + c(0, cumsum(Freq)[-length(Freq)]),label = percent((Freq/sum(Freq)))), size=5)+
  labs(title="Crime Percentage In Each month")

#district 
#5
crimes_in_district <- data.frame(table(crime$DISTRICT))
names(crimes_in_district) <- c("district","Freq")
ggplot(crimes_in_district, aes(x = district, y = Freq)) +
  geom_bar(fill = "#0073C2FF", stat = "identity") +
  geom_text(aes(label = Freq), vjust = -0.3) + 
  theme_pubclean()

#hour
#6
crimes_in_hour <- data.frame(table(crime$HOUR))
names(crimes_in_hour) <- c("hour","Freq")
ggplot(crimes_in_hour, aes(hour, Freq)) +
  geom_linerange(aes(x = hour, ymin = 0, ymax = Freq), color = "lightgray", size = 1.5)+
  geom_point(aes(color = hour), size = 3)+
  ggpubr::color_palette()+
  theme_pubclean()

#hour vs day_of_week
#7
t<-data.frame(table(crime$HOUR,crime$DAY_OF_WEEK))
ggplot(t,aes(Var1,Freq,fill=Var2))+geom_col(position = "dodge",color = "white")+facet_wrap(~Var2)+labs(title="HOUR Vs DAY_OF_WEEK",x="Hours",y="Frequency")

#UCR_PART
#8
library(ggridges)
## Warning: package 'ggridges' was built under R version 3.6.1
## 
## Attaching package: 'ggridges'
## The following object is masked from 'package:ggplot2':
## 
##     scale_discrete_manual
crime$UCR_PART<-factor(crime$UCR_PART)
ucr_part <- data.frame(table(crime$UCR_PART))
names(ucr_part) <- c("part","Freq")
ggplot(ucr_part, aes(x = Freq)) +geom_area(aes(fill = part), color = "white", stat ="bin", bins = 30) +
  scale_fill_manual(values = c("#00AFBB", "#E7B800","#D9210F"))+labs(title="UCR_PART Frequency",x="UCR_PART",y="Frequency")

#seperating the date
crime<-separate(crime,OCCURRED_ON_DATE,c("DATE","TIME"), sep = " ")
crime$DATE <- as.Date(crime$DATE)
#calender heat map
#9
calender <- crime
View(calender)
calender$weekday = as.POSIXlt(calender$DATE)$wday 
calender$weekdayf<-factor(calender$weekday,levels=rev(0:6),labels=rev(c("Sun","Mon","Tue","Wed","Thu","Fri","Sat")),ordered=TRUE)
calender$monthf<-factor(month(calender$DATE),levels=as.character(1:12),labels=c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"),ordered=TRUE)
calender$yearmonth<- factor(as.yearmon(calender$DATE))
calender$week <- as.numeric(format(calender$DATE,"%W"))
calender<-ddply(calender,.(yearmonth),transform,monthweek=1+week-min(week))
temp <- data.frame(table(crime$DATE))
names(temp) <- c("DATE","CRIME_FREQ")
temp$DATE <- as.Date(temp$DATE)
calender <- calender %>% inner_join(temp)
## Joining, by = "DATE"
ggplot(calender, aes(monthweek, weekdayf, fill = calender$CRIME_FREQ)) + 
  geom_tile(colour = "white") + 
  facet_grid(year(calender$DATE)~monthf) +
  scale_fill_gradient(low="red", high="green") +
  xlab("Week of Month") + ylab("") + 
  ggtitle("Time-Series Calendar Heatmap") + labs(fill = "Crime Frequency")