Departemen Kepolisian Cambridge telah mengumumkan bahwa kejahatan di Cambridge menurun selama enam tahun berturut-turut ke tingkat yang belum pernah terjadi sebelumnya yang tidak tercatat sejak tahun 1961. Hasilnya dirilis setelah Unit Analisis Kejahatan Departemen Kepolisian Cambridge baru-baru ini menyelesaikan angka-angka tersebut.
library(ggplot2)
library(GGally)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.0.5
library(ggpubr)
library(leaflet)
library(lubridate)
library(dplyr)
library(scales)
crime <- read.csv("crime.csv")
glimpse(crime)
## Rows: 327,820
## Columns: 17
## $ INCIDENT_NUMBER <chr> "I182080058", "I182080053", "I182080052", "I182...
## $ OFFENSE_CODE <int> 2403, 3201, 2647, 413, 3122, 1402, 3803, 3301, ...
## $ OFFENSE_CODE_GROUP <chr> "Disorderly Conduct", "Property Lost", "Other",...
## $ OFFENSE_DESCRIPTION <chr> "DISTURBING THE PEACE", "PROPERTY - LOST", "THR...
## $ DISTRICT <chr> "E18", "D14", "B2", "A1", "A7", "C11", "", "B2"...
## $ REPORTING_AREA <int> 495, 795, 329, 92, 36, 351, NA, 603, 543, 621, ...
## $ SHOOTING <chr> "", "", "", "", "", "", "", "", "", "", "", "",...
## $ OCCURRED_ON_DATE <chr> "2018-10-03 20:13:00", "2018-08-30 20:00:00", "...
## $ YEAR <int> 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018,...
## $ MONTH <int> 10, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, ...
## $ DAY_OF_WEEK <chr> "Wednesday", "Thursday", "Wednesday", "Wednesda...
## $ HOUR <int> 20, 20, 19, 20, 20, 20, 20, 19, 19, 20, 19, 20,...
## $ UCR_PART <chr> "Part Two", "Part Three", "Part Two", "Part One...
## $ STREET <chr> "ARLINGTON ST", "ALLSTON ST", "DEVON ST", "CAMB...
## $ Lat <dbl> 42.26261, 42.35211, 42.30813, 42.35945, 42.3752...
## $ Long <dbl> -71.12119, -71.13531, -71.07693, -71.05965, -71...
## $ Location <chr> "(42.26260773, -71.12118637)", "(42.35211146, -...
Pertama kita harus mencoba mengubah beberapa tipe data.
crime$INCIDENT_NUMBER <- as.character(crime$INCIDENT_NUMBER) #change the type into chr because unique
crime$OCCURRED_ON_DATE <- ymd_hms(crime$OCCURRED_ON_DATE,tz = "America/New_York") # change into date format
crime$OFFENSE_CODE <- as.factor(crime$OFFENSE_CODE)
crime$REPORTING_AREA <- as.factor(crime$REPORTING_AREA)
crime$MONTH <- as.factor(month.abb[crime$MONTH])
crime$MONTH <- factor(crime$MONTH, levels=c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")) #reordering Level Month
crime$DAY_OF_WEEK <- wday(crime$OCCURRED_ON_DATE,
label = T,
abbr = T,
week_start = 1)
Selanjutnya mari kita lihat summary datanya
summary(crime)
## INCIDENT_NUMBER OFFENSE_CODE OFFENSE_CODE_GROUP OFFENSE_DESCRIPTION
## Length:327820 3006 : 19360 Length:327820 Length:327820
## Class :character 3115 : 19180 Class :character Class :character
## Mode :character 3831 : 16730 Mode :character Mode :character
## 1402 : 15542
## 802 : 15199
## 3301 : 13478
## (Other):228331
## DISTRICT REPORTING_AREA SHOOTING
## Length:327820 111 : 2432 Length:327820
## Class :character 186 : 2080 Class :character
## Mode :character 329 : 1937 Mode :character
## 117 : 1888
## 143 : 1822
## (Other):296741
## NA's : 20920
## OCCURRED_ON_DATE YEAR MONTH DAY_OF_WEEK
## Min. :2015-06-15 00:00:00 Min. :2015 Aug : 35137 Fri :49758
## 1st Qu.:2016-04-20 09:43:00 1st Qu.:2016 Jul : 34640 Wed :48151
## Median :2017-02-14 15:45:00 Median :2017 Sep : 34023 Thu :47872
## Mean :2017-02-10 06:41:16 Mean :2017 Jun : 30622 Tue :47726
## 3rd Qu.:2017-11-30 18:20:00 3rd Qu.:2017 Oct : 26437 Mon :46970
## Max. :2018-10-03 20:49:00 Max. :2018 May : 26242 (Other):87340
## NA's :3 (Other):140719 NA's : 3
## HOUR UCR_PART STREET Lat
## Min. : 0.00 Length:327820 Length:327820 Min. :-1.00
## 1st Qu.: 9.00 Class :character Class :character 1st Qu.:42.30
## Median :14.00 Mode :character Mode :character Median :42.33
## Mean :13.11 Mean :42.21
## 3rd Qu.:18.00 3rd Qu.:42.35
## Max. :23.00 Max. :42.40
## NA's :20632
## Long Location
## Min. :-71.18 Length:327820
## 1st Qu.:-71.10 Class :character
## Median :-71.08 Mode :character
## Mean :-70.91
## 3rd Qu.:-71.06
## Max. : -1.00
## NA's :20632
setelah itu mari kita cek apakah terdapat missing value
table(is.na(crime))
##
## FALSE TRUE
## 5510750 62190
karena terdapat Missing Value maka kita harus membersihkannya
crime <- crime[complete.cases(crime), ]
crime <- crime[!(crime$DISTRICT == ""), ]
crime$SHOOTING <- as.factor(sub("^$", "N", crime$SHOOTING))
crime <- crime[!(crime$Lat == -1 & crime$Long == -1), ]
crime <- droplevels(crime)
Tingkat kejahatan (crimes) berdasarkan waktu.
event <- function(x){
if(x < 6){
x <- "12AM to 6AM"
}else if(x >= 6 & x < 12){
x <- "6AM to 12PM"
}else if(x >= 12 & x < 18){
x <- "12PM to 6PM"
}else{
x <- "6PM to 12AM"
}
}
crime$TIME_OCCURED <- as.factor(sapply(crime$HOUR, event))
crime$TIME_OCCURED <- ordered(crime$TIME_OCCURED,
levels = c("12AM to 6AM",
"6AM to 12PM",
"12PM to 6PM",
"6PM to 12AM"))
ggplot(crime, aes(x = TIME_OCCURED)) +
geom_bar(fill = "royalblue2", col = "mediumblue") +
theme_igray() +
labs(x = NULL, y = NULL,
title = "Number of Crimes in crime by Time Occured",
subtitle = "During Year of 2015 - 2018") +
geom_text(aes(label=comma(..count..)),stat="count", position=position_dodge(0.9),vjust=-0.5) +
scale_y_continuous(labels=comma)
Tingkat kejahatan (crimes) berdasarkan hari.
ggplot(crime, aes(x = DAY_OF_WEEK)) +
geom_bar(fill = "royalblue2", col = "mediumblue") +
theme_igray() +
labs(x = NULL, y = NULL,
title = "Number of Crimes in crime by Day of Week",
subtitle = "During Year of 2015 - 2018") +
geom_text(aes(label=comma(..count..)),stat="count", position=position_dodge(0.9),vjust=-0.5) +
scale_y_continuous(labels=comma)
tingkat kejahatan (crimes) berdasarkan Bulan.
ggplot(crime, aes(x = MONTH)) +
geom_bar(fill = "royalblue2", col = "mediumblue") +
theme_igray() +
labs(x = NULL, y = NULL,
title = "Number of Crimes in Cambridge by Month",
subtitle = "During Year of 2015 - 2018") +
geom_text(aes(label=comma(..count..)),stat="count", position=position_dodge(0.9),vjust=-0.5) +
scale_y_continuous(labels=comma)
tingkat kejahatan (crimes) berdasarkan Tahun.
ggplot(crime, aes(x = YEAR)) +
geom_bar(fill = "royalblue2", col = "mediumblue") +
theme_igray() +
labs(x = NULL, y = NULL,
title = "Number of Crimes in Cambridge",
subtitle = "During Year of 2015 - 2018") +
geom_text(aes(label=comma(..count..)),stat="count", position=position_dodge(0.9),vjust=-0.5) +
scale_y_continuous(labels=comma)
crime %>%
filter(!is.na(OFFENSE_CODE_GROUP)) %>%
group_by(OFFENSE_CODE_GROUP) %>%
summarise(count = n(),na.rm = TRUE) %>%
arrange(desc(count)) %>%
ungroup() %>%
mutate(OFFENSE_CODE_GROUP = reorder(OFFENSE_CODE_GROUP, count)) %>%
head(10)%>%
ggplot(aes(x = OFFENSE_CODE_GROUP, y = count)) +
geom_bar(stat = "identity", color = "white", fill = "burlywood4") +
geom_text(aes(x= OFFENSE_CODE_GROUP, y = 1, label = paste0( " ",count)),
hjust =0, vjust =.5, size = 4, color = 'black', fontface = 'bold')+
labs(x = "crime", y = "count", title = "Top crime in Cambridge Neighboorhood 2015 - 2018")+
coord_flip()