NYPD Motor Vehicle Collisions Data were downloaded from the following website http://www1.nyc.gov/site/nypd/stats/traffic-data/traffic-data-collision.page Data were saved in csv file and merged by columns
Set enviornment
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
library(ggplot2)
library(readr)
read csv file
Collision <- tbl_df(read.csv("C:/Users/tbao/Desktop/CUNY MSDS notes/607/week 6/project 2/NY PD data/NYPD_Motor_Vehicle_Collisions.csv", stringsAsFactors = FALSE, check.names = FALSE))
head(Collision)
## # A tibble: 6 x 15
## GeoCode Boro_Location GeoCodeLabel Number_of_Motor~ Vehicles_or_Mot~
## <int> <chr> <chr> <int> <int>
## 1 1 Manhattan 1s~ 1st Precinct 288 561
## 2 5 Manhattan 5t~ 5th Precinct 203 376
## 3 6 Manhattan 6t~ 6th Precinct 130 249
## 4 7 Manhattan 7t~ 7th Precinct 102 195
## 5 9 Manhattan 9t~ 9th Precinct 126 241
## 6 10 Manhattan 10~ 10th Precin~ 246 479
## # ... with 10 more variables: Injury_or_Fatal_Collisions <int>,
## # MotoristsInjured <int>, MotoristsKilled <int>, PassengInjured <int>,
## # PassengKilled <int>, CyclistsInjured <int>, CyclistsKilled <int>,
## # PedestrInjured <int>, PedestrKilled <int>, Bicycle <int>
tail(Collision)
## # A tibble: 6 x 15
## GeoCode Boro_Location GeoCodeLabel Number_of_Motor~ Vehicles_or_Mot~
## <int> <chr> <chr> <int> <int>
## 1 NA "" "" NA NA
## 2 NA "" "" NA NA
## 3 NA "" "" NA NA
## 4 NA "" "" NA NA
## 5 NA "" "" NA NA
## 6 NA "" "" NA NA
## # ... with 10 more variables: Injury_or_Fatal_Collisions <int>,
## # MotoristsInjured <int>, MotoristsKilled <int>, PassengInjured <int>,
## # PassengKilled <int>, CyclistsInjured <int>, CyclistsKilled <int>,
## # PedestrInjured <int>, PedestrKilled <int>, Bicycle <int>
Tidy data
injury_type <- Collision %>% gather(Injury_type, Freq, 4:15)
head(injury_type)
## # A tibble: 6 x 5
## GeoCode Boro_Location GeoCodeLabel Injury_type Freq
## <int> <chr> <chr> <chr> <int>
## 1 1 Manhattan 1st Prec~ 1st Precinct Number_of_Motor_Vehicle~ 288
## 2 5 Manhattan 5th Prec~ 5th Precinct Number_of_Motor_Vehicle~ 203
## 3 6 Manhattan 6th Prec~ 6th Precinct Number_of_Motor_Vehicle~ 130
## 4 7 Manhattan 7th Prec~ 7th Precinct Number_of_Motor_Vehicle~ 102
## 5 9 Manhattan 9th Prec~ 9th Precinct Number_of_Motor_Vehicle~ 126
## 6 10 Manhattan 10th Pre~ 10th Precinct Number_of_Motor_Vehicle~ 246
dim(injury_type)
## [1] 1020 5
tail(injury_type)
## # A tibble: 6 x 5
## GeoCode Boro_Location GeoCodeLabel Injury_type Freq
## <int> <chr> <chr> <chr> <int>
## 1 NA "" "" Bicycle NA
## 2 NA "" "" Bicycle NA
## 3 NA "" "" Bicycle NA
## 4 NA "" "" Bicycle NA
## 5 NA "" "" Bicycle NA
## 6 NA "" "" Bicycle NA
Data Analysis most frequent injury type
injury_type_freq_most <- injury_type %>% group_by(Injury_type) %>% arrange(-Freq) %>% filter(row_number()==1)
injury_type_freq_most
## # A tibble: 12 x 5
## # Groups: Injury_type [12]
## GeoCode Boro_Location GeoCodeLabel Injury_type Freq
## <int> <chr> <chr> <chr> <int>
## 1 109 Queens 109th Preci~ 109th Precin~ Vehicles_or_Motorists_~ 1248
## 2 109 Queens 109th Preci~ 109th Precin~ Number_of_Motor_Vehicl~ 618
## 3 105 Queens 105th Preci~ 105th Precin~ Injury_or_Fatal_Collis~ 154
## 4 105 Queens 105th Preci~ 105th Precin~ MotoristsInjured 141
## 5 105 Queens 105th Preci~ 105th Precin~ PassengInjured 89
## 6 75 Brooklyn 75th Prec~ 75th Precinct PedestrInjured 26
## 7 90 Brooklyn 90th Prec~ 90th Precinct Bicycle 22
## 8 90 Brooklyn 90th Prec~ 90th Precinct CyclistsInjured 16
## 9 47 Bronx 47th Precinct 47th Precinct MotoristsKilled 2
## 10 112 Queens 112th Preci~ 112th Precin~ PedestrKilled 2
## 11 45 Bronx 45th Precinct 45th Precinct PassengKilled 1
## 12 66 Brooklyn 66th Prec~ 66th Precinct CyclistsKilled 1
Above table the frequency of injury by injury_type in descending order
least frequent injury type
injury_type_freq_least <- injury_type %>% group_by(Injury_type) %>% arrange(Freq) %>% filter(row_number()==1)
injury_type_freq_least
## # A tibble: 12 x 5
## # Groups: Injury_type [12]
## GeoCode Boro_Location GeoCodeLabel Injury_type Freq
## <int> <chr> <chr> <chr> <int>
## 1 22 Manhattan Central P~ Central Park Pr~ MotoristsInjured 0
## 2 1 Manhattan 1st Preci~ 1st Precinct MotoristsKilled 0
## 3 22 Manhattan Central P~ Central Park Pr~ PassengInjured 0
## 4 1 Manhattan 1st Preci~ 1st Precinct PassengKilled 0
## 5 22 Manhattan Central P~ Central Park Pr~ CyclistsInjured 0
## 6 1 Manhattan 1st Preci~ 1st Precinct CyclistsKilled 0
## 7 5 Manhattan 5th Preci~ 5th Precinct PedestrKilled 0
## 8 22 Manhattan Central P~ Central Park Pr~ Injury_or_Fatal_Co~ 1
## 9 22 Manhattan Central P~ Central Park Pr~ PedestrInjured 1
## 10 30 Manhattan 30th Prec~ 30th Precinct Bicycle 1
## 11 22 Manhattan Central P~ Central Park Pr~ Number_of_Motor_Ve~ 5
## 12 22 Manhattan Central P~ Central Park Pr~ Vehicles_or_Motori~ 6
Data visulization
e<-ggplot(injury_type, aes(Injury_type, Freq)) + geom_point(aes(col=Injury_type, size=Freq))
e
## Warning: Removed 96 rows containing missing values (geom_point).