NYPD Motor Vehicle Collisions Data were downloaded from the following website http://www1.nyc.gov/site/nypd/stats/traffic-data/traffic-data-collision.page Data were saved in csv file and merged by columns

Set enviornment

library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(knitr)
library(ggplot2)
library(readr)

read csv file

Collision <- tbl_df(read.csv("C:/Users/tbao/Desktop/CUNY MSDS notes/607/week 6/project 2/NY PD data/NYPD_Motor_Vehicle_Collisions.csv", stringsAsFactors = FALSE, check.names = FALSE))
head(Collision)
## # A tibble: 6 x 15
##   GeoCode Boro_Location GeoCodeLabel Number_of_Motor~ Vehicles_or_Mot~
##     <int> <chr>         <chr>                   <int>            <int>
## 1       1 Manhattan 1s~ 1st Precinct              288              561
## 2       5 Manhattan 5t~ 5th Precinct              203              376
## 3       6 Manhattan 6t~ 6th Precinct              130              249
## 4       7 Manhattan 7t~ 7th Precinct              102              195
## 5       9 Manhattan 9t~ 9th Precinct              126              241
## 6      10 Manhattan 10~ 10th Precin~              246              479
## # ... with 10 more variables: Injury_or_Fatal_Collisions <int>,
## #   MotoristsInjured <int>, MotoristsKilled <int>, PassengInjured <int>,
## #   PassengKilled <int>, CyclistsInjured <int>, CyclistsKilled <int>,
## #   PedestrInjured <int>, PedestrKilled <int>, Bicycle <int>
tail(Collision)
## # A tibble: 6 x 15
##   GeoCode Boro_Location GeoCodeLabel Number_of_Motor~ Vehicles_or_Mot~
##     <int> <chr>         <chr>                   <int>            <int>
## 1      NA ""            ""                         NA               NA
## 2      NA ""            ""                         NA               NA
## 3      NA ""            ""                         NA               NA
## 4      NA ""            ""                         NA               NA
## 5      NA ""            ""                         NA               NA
## 6      NA ""            ""                         NA               NA
## # ... with 10 more variables: Injury_or_Fatal_Collisions <int>,
## #   MotoristsInjured <int>, MotoristsKilled <int>, PassengInjured <int>,
## #   PassengKilled <int>, CyclistsInjured <int>, CyclistsKilled <int>,
## #   PedestrInjured <int>, PedestrKilled <int>, Bicycle <int>

Tidy data

injury_type <- Collision %>% gather(Injury_type, Freq, 4:15)
head(injury_type)
## # A tibble: 6 x 5
##   GeoCode Boro_Location       GeoCodeLabel  Injury_type               Freq
##     <int> <chr>               <chr>         <chr>                    <int>
## 1       1 Manhattan 1st Prec~ 1st Precinct  Number_of_Motor_Vehicle~   288
## 2       5 Manhattan 5th Prec~ 5th Precinct  Number_of_Motor_Vehicle~   203
## 3       6 Manhattan 6th Prec~ 6th Precinct  Number_of_Motor_Vehicle~   130
## 4       7 Manhattan 7th Prec~ 7th Precinct  Number_of_Motor_Vehicle~   102
## 5       9 Manhattan 9th Prec~ 9th Precinct  Number_of_Motor_Vehicle~   126
## 6      10 Manhattan 10th Pre~ 10th Precinct Number_of_Motor_Vehicle~   246
dim(injury_type)
## [1] 1020    5
tail(injury_type)
## # A tibble: 6 x 5
##   GeoCode Boro_Location GeoCodeLabel Injury_type  Freq
##     <int> <chr>         <chr>        <chr>       <int>
## 1      NA ""            ""           Bicycle        NA
## 2      NA ""            ""           Bicycle        NA
## 3      NA ""            ""           Bicycle        NA
## 4      NA ""            ""           Bicycle        NA
## 5      NA ""            ""           Bicycle        NA
## 6      NA ""            ""           Bicycle        NA

Data Analysis most frequent injury type

injury_type_freq_most <- injury_type %>% group_by(Injury_type) %>% arrange(-Freq) %>% filter(row_number()==1)
injury_type_freq_most
## # A tibble: 12 x 5
## # Groups:   Injury_type [12]
##    GeoCode Boro_Location       GeoCodeLabel  Injury_type              Freq
##      <int> <chr>               <chr>         <chr>                   <int>
##  1     109 Queens 109th Preci~ 109th Precin~ Vehicles_or_Motorists_~  1248
##  2     109 Queens 109th Preci~ 109th Precin~ Number_of_Motor_Vehicl~   618
##  3     105 Queens 105th Preci~ 105th Precin~ Injury_or_Fatal_Collis~   154
##  4     105 Queens 105th Preci~ 105th Precin~ MotoristsInjured          141
##  5     105 Queens 105th Preci~ 105th Precin~ PassengInjured             89
##  6      75 Brooklyn 75th Prec~ 75th Precinct PedestrInjured             26
##  7      90 Brooklyn 90th Prec~ 90th Precinct Bicycle                    22
##  8      90 Brooklyn 90th Prec~ 90th Precinct CyclistsInjured            16
##  9      47 Bronx 47th Precinct 47th Precinct MotoristsKilled             2
## 10     112 Queens 112th Preci~ 112th Precin~ PedestrKilled               2
## 11      45 Bronx 45th Precinct 45th Precinct PassengKilled               1
## 12      66 Brooklyn 66th Prec~ 66th Precinct CyclistsKilled              1

Above table the frequency of injury by injury_type in descending order

least frequent injury type

injury_type_freq_least <- injury_type %>% group_by(Injury_type) %>% arrange(Freq) %>% filter(row_number()==1)
injury_type_freq_least
## # A tibble: 12 x 5
## # Groups:   Injury_type [12]
##    GeoCode Boro_Location        GeoCodeLabel     Injury_type          Freq
##      <int> <chr>                <chr>            <chr>               <int>
##  1      22 Manhattan Central P~ Central Park Pr~ MotoristsInjured        0
##  2       1 Manhattan 1st Preci~ 1st Precinct     MotoristsKilled         0
##  3      22 Manhattan Central P~ Central Park Pr~ PassengInjured          0
##  4       1 Manhattan 1st Preci~ 1st Precinct     PassengKilled           0
##  5      22 Manhattan Central P~ Central Park Pr~ CyclistsInjured         0
##  6       1 Manhattan 1st Preci~ 1st Precinct     CyclistsKilled          0
##  7       5 Manhattan 5th Preci~ 5th Precinct     PedestrKilled           0
##  8      22 Manhattan Central P~ Central Park Pr~ Injury_or_Fatal_Co~     1
##  9      22 Manhattan Central P~ Central Park Pr~ PedestrInjured          1
## 10      30 Manhattan 30th Prec~ 30th Precinct    Bicycle                 1
## 11      22 Manhattan Central P~ Central Park Pr~ Number_of_Motor_Ve~     5
## 12      22 Manhattan Central P~ Central Park Pr~ Vehicles_or_Motori~     6

Data visulization

e<-ggplot(injury_type, aes(Injury_type, Freq)) + geom_point(aes(col=Injury_type, size=Freq)) 
e
## Warning: Removed 96 rows containing missing values (geom_point).