Reading data & Uploading Packages

library(readxl)
newyork_data <- read_excel("C:/Users/samfatto/Desktop/DataViz/newyork_data.xlsx")
View(newyork_data)

library(readr)
library(readxl)
library(ggthemes)
library(ggplot2)
library(dplyr)
library(dygraphs)

Data Reduction, Restructuring and Summary

#remove date column since not necessary
newyork_data <- within(newyork_data, rm(DateStamp))

#restructuring data types for vizualisations
newyork_data$UniqueComplaintId <- as.integer(newyork_data$UniqueComplaintId)
newyork_data$CloseYear <- as.integer(newyork_data$CloseYear)
newyork_data$ReceivedYear <- as.integer(newyork_data$ReceivedYear)
newyork_data$Borough.of.Occurrence <- as.factor(newyork_data$Borough.of.Occurrence)
newyork_data$Is.Full.Investigation <- as.logical(newyork_data$Is.Full.Investigation)
newyork_data$Complaint.Has.Video.Evidence <- as.factor(newyork_data$Complaint.Has.Video.Evidence)
newyork_data$Complaint.Filed.Mode <- as.factor(newyork_data$Complaint.Filed.Mode)
newyork_data$Complaint.Filed.Place <- as.factor(newyork_data$Complaint.Filed.Place)
newyork_data$Complaint.Contains.Stop...Frisk.Allegations <- as.integer(newyork_data$Complaint.Contains.Stop...Frisk.Allegations)
newyork_data$IncidentLocation <- as.factor(newyork_data$IncidentLocation)
newyork_data$IncidentYear <- as.integer(newyork_data$IncidentYear)
newyork_data$EncounterOutcome <- as.factor(newyork_data$EncounterOutcome)
newyork_data$Reason.For.Initial.Contact <- as.factor(newyork_data$Reason.For.Initial.Contact)
newyork_data$Allegation.FADO.Type <- as.factor(newyork_data$Allegation.FADO.Type)
newyork_data$Allegation.Description <- as.factor(newyork_data$Allegation.Description)

str(newyork_data)
## Classes 'tbl_df', 'tbl' and 'data.frame':    204397 obs. of  15 variables:
##  $ UniqueComplaintId                          : int  11 18 18 18 18 18 18 18 18 18 ...
##  $ CloseYear                                  : int  2006 2006 2006 2006 2006 2006 2006 2006 2006 2006 ...
##  $ ReceivedYear                               : int  2005 2004 2004 2004 2004 2004 2004 2004 2004 2004 ...
##  $ Borough.of.Occurrence                      : Factor w/ 7 levels "Bronx","Brooklyn",..: 3 2 2 2 2 2 2 2 2 2 ...
##  $ Is.Full.Investigation                      : logi  FALSE TRUE TRUE TRUE TRUE TRUE ...
##  $ Complaint.Has.Video.Evidence               : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Complaint.Filed.Mode                       : Factor w/ 7 levels "Call Processing System",..: 6 7 7 7 7 7 7 7 7 7 ...
##  $ Complaint.Filed.Place                      : Factor w/ 14 levels "CCRB","Comm. to Combat Police Corruption",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Complaint.Contains.Stop...Frisk.Allegations: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ IncidentLocation                           : Factor w/ 16 levels "Apartment/house",..: 15 15 15 15 15 15 15 15 15 15 ...
##  $ IncidentYear                               : int  2005 2004 2004 2004 2004 2004 2004 2004 2004 2004 ...
##  $ EncounterOutcome                           : Factor w/ 4 levels "Arrest","No Arrest or Summons",..: 2 1 1 1 1 1 1 1 1 1 ...
##  $ Reason.For.Initial.Contact                 : Factor w/ 50 levels "Aided case","Arrest/Complainant",..: 24 33 33 33 33 33 33 33 33 33 ...
##  $ Allegation.FADO.Type                       : Factor w/ 5 levels "Abuse of Authority",..: 1 1 2 2 2 3 3 3 3 3 ...
##  $ Allegation.Description                     : Factor w/ 57 levels "Action","Animal",..: 49 36 57 57 57 28 28 28 28 28 ...
dim(newyork_data)
## [1] 204397     15
summary(newyork_data)
##  UniqueComplaintId   CloseYear     ReceivedYear    Borough.of.Occurrence
##  Min.   :    1     Min.   :2006   Min.   :1999   Bronx        :49442    
##  1st Qu.:17356     1st Qu.:2008   1st Qu.:2007   Brooklyn     :72215    
##  Median :34794     Median :2010   Median :2009   Manhattan    :42104    
##  Mean   :34778     Mean   :2010   Mean   :2010   NA           :  483    
##  3rd Qu.:52204     3rd Qu.:2013   3rd Qu.:2012   Outside NYC  :  170    
##  Max.   :69492     Max.   :2016   Max.   :2016   Queens       :30883    
##                                                  Staten Island: 9100    
##  Is.Full.Investigation Complaint.Has.Video.Evidence
##  Mode :logical         0:195530                    
##  FALSE:107084          1:  8867                    
##  TRUE :97313                                       
##  NA's :0                                           
##                                                    
##                                                    
##                                                    
##              Complaint.Filed.Mode       Complaint.Filed.Place
##  Call Processing System: 42447    CCRB             :130877   
##  E-mail                :   799    IAB              : 69214   
##  Fax                   :   356    Precinct         :  3548   
##  In-person             :  9586    Other City agency:   295   
##  Mail                  :  3424    Mayor's Office   :   157   
##  On-line website       : 14197    Other            :   110   
##  Phone                 :133588    (Other)          :   196   
##  Complaint.Contains.Stop...Frisk.Allegations             IncidentLocation 
##  Min.   :0.0000                              Street/highway      :123274  
##  1st Qu.:0.0000                              Apartment/house     : 34720  
##  Median :0.0000                              Residential building: 12421  
##  Mean   :0.4136                              Police building     :  8968  
##  3rd Qu.:1.0000                              Subway station/train:  6077  
##  Max.   :1.0000                              Commercial building :  5243  
##                                              (Other)             : 13694  
##   IncidentYear              EncounterOutcome
##  Min.   :1999   Arrest              :89139  
##  1st Qu.:2007   No Arrest or Summons:82964  
##  Median :2009   Other/NA            : 1050  
##  Mean   :2010   Summons             :31244  
##  3rd Qu.:2012                               
##  Max.   :2016                               
##                                             
##                                 Reason.For.Initial.Contact
##  PD suspected C/V of violation/crime - street:60107       
##  Other                                       :39030       
##  PD suspected C/V of violation/crime - bldg  :16067       
##  PD suspected C/V of violation/crime - auto  :12953       
##  Moving violation                            : 8843       
##  Report-dispute                              : 8818       
##  (Other)                                     :58579       
##          Allegation.FADO.Type
##  Abuse of Authority:102173   
##  Discourtesy       : 34452   
##  Force             : 61761   
##  NA                :     3   
##  Offensive Language:  6008   
##                              
##                              
##                            Allegation.Description
##  Physical force                       :44116     
##  Word                                 :31704     
##  Stop                                 :12944     
##  Search (of person)                   :12250     
##  Refusal to provide name/shield number:10359     
##  Threat of arrest                     : 9947     
##  (Other)                              :83077
head(newyork_data)
## # A tibble: 6 × 15
##   UniqueComplaintId CloseYear ReceivedYear Borough.of.Occurrence
##               <int>     <int>        <int>                <fctr>
## 1                11      2006         2005             Manhattan
## 2                18      2006         2004              Brooklyn
## 3                18      2006         2004              Brooklyn
## 4                18      2006         2004              Brooklyn
## 5                18      2006         2004              Brooklyn
## 6                18      2006         2004              Brooklyn
## # ... with 11 more variables: Is.Full.Investigation <lgl>,
## #   Complaint.Has.Video.Evidence <fctr>, Complaint.Filed.Mode <fctr>,
## #   Complaint.Filed.Place <fctr>,
## #   Complaint.Contains.Stop...Frisk.Allegations <int>,
## #   IncidentLocation <fctr>, IncidentYear <int>, EncounterOutcome <fctr>,
## #   Reason.For.Initial.Contact <fctr>, Allegation.FADO.Type <fctr>,
## #   Allegation.Description <fctr>
#Creating subsets of original data
manhattan <- newyork_data[newyork_data$Borough.of.Occurrence=="Manhattan",]
brooklyn <- newyork_data[newyork_data$Borough.of.Occurrence=="Brooklyn",]
bronx <- newyork_data[newyork_data$Borough.of.Occurrence=="Bronx",]
queens <- newyork_data[newyork_data$Borough.of.Occurrence=="Queens",]

Data wrangling & extraction

ny1 <- newyork_data %>%
  group_by(Complaint.Filed.Mode, IncidentYear) %>%
  summarise(Count = n())

ny2 <- newyork_data %>%
  group_by(EncounterOutcome, Borough.of.Occurrence) %>%
  summarise(Count = n()) %>%
  ungroup()

ny3 <- newyork_data %>%
  group_by(Allegation.FADO.Type) %>%
  summarise (Count = n()) %>%
  ggplot(aes(x = Allegation.FADO.Type, y = Count))

VISUAL 1

ggplot(newyork_data, aes(x = Borough.of.Occurrence)) + xlab("Incident Year per Occurrence") + geom_bar(colour = "black", aes(fill = ..count..)) + scale_fill_gradient("Count", low = "lightgrey", high = "black") + ggtitle("Histogram of Incident Counts per Borough of Occurrence in the City of New York") + theme_bw()

This graph is a histogram that displays the count of incident per borough of Occurrence for different areas in the city of New York. Brooklyn and Bronx have the highest count of incident occurence in the City of New York.

VISUAL 2

ggplot(newyork_data, aes(x = IncidentYear)) + xlab("Incident Year Counts per Borough of Occurence") + geom_histogram(binwidth = 2) + facet_wrap(~Borough.of.Occurrence) + ggtitle("Histograms of Incident Year per Borough of Occurence in the NY area") + theme_bw()

This graph is a display multiple histograms of Incidents Year per Borough of Occurrence in the City of New York. The use of the facet wrap function allows the display multiple histograms of the count of incident Year per Borough of Occurrence.

VISUAL 3

ggplot(manhattan, aes(x = EncounterOutcome)) + xlab("Encounter Outcome") + ggtitle("Histogram of Encounter Outcome in Manhattan, NY") + geom_bar(colour = "blue", aes(fill = ..count..)) + scale_fill_gradient("Count", low = "lightblue", high = "blue") + theme_bw()

This graph is a histogram that displays the count of Encounter Outcome in Manhattan, New York. Arrest and No Arrest or Summons categories have the highest count.

VISUAL 4

ggplot(brooklyn, aes(x = EncounterOutcome)) + xlab("Encounter Outcome") + ggtitle("Histogram of Encounter Outcome in Brooklyn, NY") + geom_bar(colour = "red", aes(fill = ..count..)) + scale_fill_gradient("Count", low = "lightgrey", high = "red") + theme_bw()

This graph is a histogram that displays the count of Encounter Outcome in Brooklyn, New York. Arrest and No Arrest or Summons categories also have the highest count.

VISUAL 5

ggplot(bronx, aes(x = EncounterOutcome)) + xlab("Encounter Outcome") + ggtitle("Histogram of Encounter Outcome in Bronx, NY") + geom_bar(colour = "purple", aes(fill = ..count..)) + scale_fill_gradient("Count", low = "white", high = "purple") + theme_bw()

This graph is a histogram that displays the count of Encounter Outcome in Bronx, New York. Arrest and No Arrest or Summons categories also have the highest count.

VISUAL 6

ggplot(newyork_data, aes(x = IncidentYear, y = CloseYear)) + geom_point() + geom_smooth(method = "lm") + ggtitle("Scatterplot of correlation between Incident Year Closed & Received") + xlab("Incident Closed Year") + ylab ("Incident Year") + theme_bw()

This graph is a scatterplot that displays the relationship between Incident Year Closed and Incident Year Received for the city of New York.

VISUAL 7

ggplot(newyork_data, aes(x = IncidentYear, y = CloseYear)) + geom_line() + ggtitle("Line graph of Correlation between Incidents Closed & Received") + xlab("Incident Closed Year") + ylab ("Incident Year") + theme_bw() + geom_smooth()

This graph is a Line graph that displays the relationship between Incident Year Closed and Incident Year Received for the city of New York.

VISUAL 8

ggplot(newyork_data, aes(x = IncidentYear, fill = Allegation.FADO.Type)) + geom_histogram() + ggtitle("Histograms of Incident Year per Allegation Type based on Borough") + xlab("Incident Closed Year") + ylab ("Incident Year") + facet_wrap(~Borough.of.Occurrence) + theme_bw() + stat_bin(bins = 300)

This graph is a display multiple histograms of Incidents Year per Allegation Type based on the Borough of Occurrence in the City of New York. The use of the facet wrap function allows the display multiple histograms of the count of incident Year per Borough of Occurrence.

VISUAL 9

ggplot(newyork_data, aes(x = IncidentYear, fill = Is.Full.Investigation)) + geom_bar() + ggtitle("Histogram of Counts of Incidents based on Investigation Status In New York City") + xlab("Incident Year") + ylab ("Count") + scale_fill_discrete("Completely Investigated?") + theme_bw()

This graph is a histogram that displays counts of incident based on the status of the crime investigation in the city of New York.

VISUAL 10

manhattan$Is.Full.Investigation <- as.logical(manhattan$Is.Full.Investigation)
ggplot(manhattan, aes(x = IncidentYear, fill = Is.Full.Investigation)) + geom_bar() + ggtitle("Histogram of Counts of Incidents based on Investigation Status In Manhattan, NY") + xlab("Incident Year") + ylab ("Count") + scale_fill_discrete("Completely Investigated?") + theme_bw()