Departemen Kepolisian Cambridge telah mengumumkan bahwa kejahatan di Cambridge menurun selama enam tahun berturut-turut ke tingkat yang belum pernah terjadi sebelumnya yang tidak tercatat sejak tahun 1961. Hasilnya dirilis setelah Unit Analisis Kejahatan Departemen Kepolisian Cambridge baru-baru ini menyelesaikan angka-angka tersebut.

Import Library

library(ggplot2)
library(GGally)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.0.5
library(ggpubr)
library(leaflet)
library(lubridate)
library(dplyr)
library(scales)

Import Data

crime <- read.csv("crime.csv")

Exploratory Data Analysis

glimpse(crime)
## Rows: 327,820
## Columns: 17
## $ INCIDENT_NUMBER     <chr> "I182080058", "I182080053", "I182080052", "I182...
## $ OFFENSE_CODE        <int> 2403, 3201, 2647, 413, 3122, 1402, 3803, 3301, ...
## $ OFFENSE_CODE_GROUP  <chr> "Disorderly Conduct", "Property Lost", "Other",...
## $ OFFENSE_DESCRIPTION <chr> "DISTURBING THE PEACE", "PROPERTY - LOST", "THR...
## $ DISTRICT            <chr> "E18", "D14", "B2", "A1", "A7", "C11", "", "B2"...
## $ REPORTING_AREA      <int> 495, 795, 329, 92, 36, 351, NA, 603, 543, 621, ...
## $ SHOOTING            <chr> "", "", "", "", "", "", "", "", "", "", "", "",...
## $ OCCURRED_ON_DATE    <chr> "2018-10-03 20:13:00", "2018-08-30 20:00:00", "...
## $ YEAR                <int> 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018,...
## $ MONTH               <int> 10, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, ...
## $ DAY_OF_WEEK         <chr> "Wednesday", "Thursday", "Wednesday", "Wednesda...
## $ HOUR                <int> 20, 20, 19, 20, 20, 20, 20, 19, 19, 20, 19, 20,...
## $ UCR_PART            <chr> "Part Two", "Part Three", "Part Two", "Part One...
## $ STREET              <chr> "ARLINGTON ST", "ALLSTON ST", "DEVON ST", "CAMB...
## $ Lat                 <dbl> 42.26261, 42.35211, 42.30813, 42.35945, 42.3752...
## $ Long                <dbl> -71.12119, -71.13531, -71.07693, -71.05965, -71...
## $ Location            <chr> "(42.26260773, -71.12118637)", "(42.35211146, -...

Pertama kita harus mencoba mengubah beberapa tipe data.

crime$INCIDENT_NUMBER <- as.character(crime$INCIDENT_NUMBER) #change the type into chr because unique
crime$OCCURRED_ON_DATE <- ymd_hms(crime$OCCURRED_ON_DATE,tz = "America/New_York") # change into date format
crime$OFFENSE_CODE <- as.factor(crime$OFFENSE_CODE)
crime$REPORTING_AREA <- as.factor(crime$REPORTING_AREA)
crime$MONTH <- as.factor(month.abb[crime$MONTH])
crime$MONTH <- factor(crime$MONTH, levels=c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")) #reordering Level Month
crime$DAY_OF_WEEK <- wday(crime$OCCURRED_ON_DATE,
                           label = T,
                           abbr = T,
                           week_start = 1)

Selanjutnya mari kita lihat summary datanya

summary(crime)
##  INCIDENT_NUMBER     OFFENSE_CODE    OFFENSE_CODE_GROUP OFFENSE_DESCRIPTION
##  Length:327820      3006   : 19360   Length:327820      Length:327820      
##  Class :character   3115   : 19180   Class :character   Class :character   
##  Mode  :character   3831   : 16730   Mode  :character   Mode  :character   
##                     1402   : 15542                                         
##                     802    : 15199                                         
##                     3301   : 13478                                         
##                     (Other):228331                                         
##    DISTRICT         REPORTING_AREA     SHOOTING        
##  Length:327820      111    :  2432   Length:327820     
##  Class :character   186    :  2080   Class :character  
##  Mode  :character   329    :  1937   Mode  :character  
##                     117    :  1888                     
##                     143    :  1822                     
##                     (Other):296741                     
##                     NA's   : 20920                     
##  OCCURRED_ON_DATE                   YEAR          MONTH         DAY_OF_WEEK   
##  Min.   :2015-06-15 00:00:00   Min.   :2015   Aug    : 35137   Fri    :49758  
##  1st Qu.:2016-04-20 09:43:00   1st Qu.:2016   Jul    : 34640   Wed    :48151  
##  Median :2017-02-14 15:45:00   Median :2017   Sep    : 34023   Thu    :47872  
##  Mean   :2017-02-10 06:41:16   Mean   :2017   Jun    : 30622   Tue    :47726  
##  3rd Qu.:2017-11-30 18:20:00   3rd Qu.:2017   Oct    : 26437   Mon    :46970  
##  Max.   :2018-10-03 20:49:00   Max.   :2018   May    : 26242   (Other):87340  
##  NA's   :3                                    (Other):140719   NA's   :    3  
##       HOUR         UCR_PART            STREET               Lat       
##  Min.   : 0.00   Length:327820      Length:327820      Min.   :-1.00  
##  1st Qu.: 9.00   Class :character   Class :character   1st Qu.:42.30  
##  Median :14.00   Mode  :character   Mode  :character   Median :42.33  
##  Mean   :13.11                                         Mean   :42.21  
##  3rd Qu.:18.00                                         3rd Qu.:42.35  
##  Max.   :23.00                                         Max.   :42.40  
##                                                        NA's   :20632  
##       Long          Location        
##  Min.   :-71.18   Length:327820     
##  1st Qu.:-71.10   Class :character  
##  Median :-71.08   Mode  :character  
##  Mean   :-70.91                     
##  3rd Qu.:-71.06                     
##  Max.   : -1.00                     
##  NA's   :20632

setelah itu mari kita cek apakah terdapat missing value

table(is.na(crime))
## 
##   FALSE    TRUE 
## 5510750   62190

karena terdapat Missing Value maka kita harus membersihkannya

crime <- crime[complete.cases(crime), ]
crime <- crime[!(crime$DISTRICT == ""), ]
crime$SHOOTING <- as.factor(sub("^$", "N", crime$SHOOTING))
crime <- crime[!(crime$Lat == -1 & crime$Long == -1), ]
crime <- droplevels(crime) 

Visualization

Berdasarkan Waktu / Jam

Tingkat kejahatan (crimes) berdasarkan waktu.

event <- function(x){
                      if(x < 6){
                        x <- "12AM to 6AM"
                      }else if(x >= 6 & x < 12){
                        x <- "6AM to 12PM"
                      }else if(x >= 12 & x < 18){
                        x <- "12PM to 6PM"
                      }else{
                        x <- "6PM to 12AM"
                      }
                      
                    }

crime$TIME_OCCURED <- as.factor(sapply(crime$HOUR, event))

crime$TIME_OCCURED <- ordered(crime$TIME_OCCURED,
                                                   levels = c("12AM to 6AM",
                                                              "6AM to 12PM",
                                                              "12PM to 6PM",
                                                              "6PM to 12AM"))

ggplot(crime, aes(x = TIME_OCCURED)) +
  geom_bar(fill = "royalblue2", col = "mediumblue") +
  theme_igray() +
  labs(x = NULL, y = NULL,
       title = "Number of Crimes in crime by Time Occured",
       subtitle = "During Year of 2015 - 2018") +
   geom_text(aes(label=comma(..count..)),stat="count", position=position_dodge(0.9),vjust=-0.5) +
   scale_y_continuous(labels=comma)

Berdasarkan Hari

Tingkat kejahatan (crimes) berdasarkan hari.

ggplot(crime, aes(x = DAY_OF_WEEK)) +
  geom_bar(fill = "royalblue2", col = "mediumblue") +
  theme_igray() +
  labs(x = NULL, y = NULL,
       title = "Number of Crimes in crime by Day of Week",
       subtitle = "During Year of 2015 - 2018") +
   geom_text(aes(label=comma(..count..)),stat="count", position=position_dodge(0.9),vjust=-0.5) +
   scale_y_continuous(labels=comma)

Berdasarkan Bulan

tingkat kejahatan (crimes) berdasarkan Bulan.

ggplot(crime, aes(x = MONTH)) +
  geom_bar(fill = "royalblue2", col = "mediumblue") +
  theme_igray() +
  labs(x = NULL, y = NULL,
       title = "Number of Crimes in Cambridge by Month",
       subtitle = "During Year of 2015 - 2018") +
   geom_text(aes(label=comma(..count..)),stat="count", position=position_dodge(0.9),vjust=-0.5) +
   scale_y_continuous(labels=comma)

Berdasarkan Tahun

tingkat kejahatan (crimes) berdasarkan Tahun.

ggplot(crime, aes(x = YEAR)) +
  geom_bar(fill = "royalblue2", col = "mediumblue") +
  theme_igray() +
  labs(x = NULL, y = NULL,
       title = "Number of Crimes in Cambridge",
       subtitle = "During Year of 2015 - 2018") +
  geom_text(aes(label=comma(..count..)),stat="count", position=position_dodge(0.9),vjust=-0.5) +
  scale_y_continuous(labels=comma)

10 Jenis Kejahatan Tertinggi

crime %>% 
  filter(!is.na(OFFENSE_CODE_GROUP)) %>%
    group_by(OFFENSE_CODE_GROUP) %>%
    summarise(count = n(),na.rm = TRUE) %>%
    arrange(desc(count)) %>% 
    ungroup() %>%
    mutate(OFFENSE_CODE_GROUP = reorder(OFFENSE_CODE_GROUP, count)) %>% 
    head(10)%>% 
    ggplot(aes(x = OFFENSE_CODE_GROUP, y = count)) +
    geom_bar(stat = "identity", color = "white", fill = "burlywood4") +
    geom_text(aes(x= OFFENSE_CODE_GROUP, y = 1, label = paste0( "  ",count)),
              hjust =0, vjust =.5, size = 4, color = 'black', fontface = 'bold')+
  labs(x = "crime", y = "count", title = "Top crime in Cambridge Neighboorhood 2015 - 2018")+
  coord_flip()