getwd()
## [1] "D:/Project2 Week6 Due Oct7 GH DataToUse"
setwd('D:/Project2 Week6 Due Oct7 GH DataToUse')
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.3.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.3.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
NYPDCollision <- tbl_df(read.csv("NYPDAug2017_Motor_Vehicle_Collisions.csv", stringsAsFactors = FALSE, check.names = FALSE))
head(NYPDCollision)
## # A tibble: 6 x 15
## GeoCode Boro_Location GeoCodeLabel Number_of_Motor~ Vehicles_or_Mot~
## <int> <chr> <chr> <int> <int>
## 1 1 Manhattan 1s~ 1st Precinct 288 561
## 2 5 Manhattan 5t~ 5th Precinct 203 376
## 3 6 Manhattan 6t~ 6th Precinct 130 249
## 4 7 Manhattan 7t~ 7th Precinct 102 195
## 5 9 Manhattan 9t~ 9th Precinct 126 241
## 6 10 Manhattan 10~ 10th Precin~ 246 479
## # ... with 10 more variables: Injury_or_Fatal_Collisions <int>,
## # MotoristsInjured <int>, MotoristsKilled <int>, PassengInjured <int>,
## # PassengKilled <int>, CyclistsInjured <int>, CyclistsKilled <int>,
## # PedestrInjured <int>, PedestrKilled <int>, Bicycle <int>
tail(NYPDCollision)
## # A tibble: 6 x 15
## GeoCode Boro_Location GeoCodeLabel Number_of_Motor~ Vehicles_or_Mot~
## <int> <chr> <chr> <int> <int>
## 1 114 Queens 114th~ 114th Preci~ 412 795
## 2 115 Queens 115th~ 115th Preci~ 310 611
## 3 120 S.Island 120~ 120th Preci~ 219 442
## 4 121 S.Island 121~ 121st Preci~ 326 661
## 5 122 S.Island 122~ 122nd Preci~ 352 708
## 6 123 S.Island 123~ 123rd Preci~ 147 288
## # ... with 10 more variables: Injury_or_Fatal_Collisions <int>,
## # MotoristsInjured <int>, MotoristsKilled <int>, PassengInjured <int>,
## # PassengKilled <int>, CyclistsInjured <int>, CyclistsKilled <int>,
## # PedestrInjured <int>, PedestrKilled <int>, Bicycle <int>
NYPDCollision.ByInjuryType2 <- NYPDCollision %>%
gather(InJury_type,NofCases, c("Number_of_Motor_Vehicle_Collisions", "Vehicles_or_Motorists_Involved", "Injury_or_Fatal_Collisions","MotoristsInjured","MotoristsKilled","PassengInjured","CyclistsInjured",
"CyclistsKilled","PedestrInjured","PedestrKilled","Bicycle" ))
head(NYPDCollision.ByInjuryType2)
## # A tibble: 6 x 6
## GeoCode Boro_Location GeoCodeLabel PassengKilled InJury_type NofCases
## <int> <chr> <chr> <int> <chr> <int>
## 1 1 Manhattan 1st~ 1st Precinct 0 Number_of_Mo~ 288
## 2 5 Manhattan 5th~ 5th Precinct 0 Number_of_Mo~ 203
## 3 6 Manhattan 6th~ 6th Precinct 0 Number_of_Mo~ 130
## 4 7 Manhattan 7th~ 7th Precinct 0 Number_of_Mo~ 102
## 5 9 Manhattan 9th~ 9th Precinct 0 Number_of_Mo~ 126
## 6 10 Manhattan 10t~ 10th Precin~ 0 Number_of_Mo~ 246
tail(NYPDCollision.ByInjuryType2)
## # A tibble: 6 x 6
## GeoCode Boro_Location GeoCodeLabel PassengKilled InJury_type NofCases
## <int> <chr> <chr> <int> <chr> <int>
## 1 114 Queens 114th Pr~ 114th Preci~ 0 Bicycle 16
## 2 115 Queens 115th Pr~ 115th Preci~ 0 Bicycle 9
## 3 120 S.Island 120th ~ 120th Preci~ 0 Bicycle 2
## 4 121 S.Island 121st ~ 121st Preci~ 0 Bicycle 4
## 5 122 S.Island 122nd ~ 122nd Preci~ 0 Bicycle 4
## 6 123 S.Island 123rd ~ 123rd Preci~ 0 Bicycle 1
Find the Min N of cases, and the Maximum N of cases, by Injury Type
NYPDCollision.ByInjuryType_Max <- NYPDCollision.ByInjuryType2 %>%
group_by(InJury_type) %>%
arrange(-NofCases) %>%
filter(row_number()==1)
## Warning: package 'bindrcpp' was built under R version 3.3.3
NYPDCollision.ByInjuryType_Min <- NYPDCollision.ByInjuryType2 %>%
group_by(InJury_type) %>% arrange(NofCases) %>%
filter(row_number()==1)
(NYPDCollision.ByInjuryType_Max)
## # A tibble: 11 x 6
## # Groups: InJury_type [11]
## GeoCode Boro_Location GeoCodeLabel PassengKilled InJury_type NofCases
## <int> <chr> <chr> <int> <chr> <int>
## 1 109 Queens 109th ~ 109th Preci~ 0 Vehicles_or~ 1248
## 2 109 Queens 109th ~ 109th Preci~ 0 Number_of_M~ 618
## 3 105 Queens 105th ~ 105th Preci~ 0 Injury_or_F~ 154
## 4 105 Queens 105th ~ 105th Preci~ 0 MotoristsIn~ 141
## 5 105 Queens 105th ~ 105th Preci~ 0 PassengInju~ 89
## 6 75 Brooklyn 75th~ 75th Precin~ 0 PedestrInju~ 26
## 7 90 Brooklyn 90th~ 90th Precin~ 0 Bicycle 22
## 8 90 Brooklyn 90th~ 90th Precin~ 0 CyclistsInj~ 16
## 9 47 Bronx 47th Pr~ 47th Precin~ 0 MotoristsKi~ 2
## 10 112 Queens 112th ~ 112th Preci~ 0 PedestrKill~ 2
## 11 66 Brooklyn 66th~ 66th Precin~ 0 CyclistsKil~ 1
(NYPDCollision.ByInjuryType_Min)
## # A tibble: 11 x 6
## # Groups: InJury_type [11]
## GeoCode Boro_Location GeoCodeLabel PassengKilled InJury_type NofCases
## <int> <chr> <chr> <int> <chr> <int>
## 1 22 Manhattan Cen~ Central Park~ 0 MotoristsI~ 0
## 2 1 Manhattan 1st~ 1st Precinct 0 MotoristsK~ 0
## 3 22 Manhattan Cen~ Central Park~ 0 PassengInj~ 0
## 4 22 Manhattan Cen~ Central Park~ 0 CyclistsIn~ 0
## 5 1 Manhattan 1st~ 1st Precinct 0 CyclistsKi~ 0
## 6 5 Manhattan 5th~ 5th Precinct 0 PedestrKil~ 0
## 7 22 Manhattan Cen~ Central Park~ 0 Injury_or_~ 1
## 8 22 Manhattan Cen~ Central Park~ 0 PedestrInj~ 1
## 9 30 Manhattan 30t~ 30th Precinct 0 Bicycle 1
## 10 22 Manhattan Cen~ Central Park~ 0 Number_of_~ 5
## 11 22 Manhattan Cen~ Central Park~ 0 Vehicles_o~ 6
To Examine the highest collion rate by location of Boro
NYPDCollision.ByBoro_Max2 <- NYPDCollision.ByInjuryType2 %>%
group_by(Boro_Location) %>%
arrange(-NofCases) %>%
filter(row_number()==1)
(NYPDCollision.ByBoro_Max2)
## # A tibble: 77 x 6
## # Groups: Boro_Location [77]
## GeoCode Boro_Location GeoCodeLabel PassengKilled InJury_type NofCases
## <int> <chr> <chr> <int> <chr> <int>
## 1 109 Queens 109th ~ 109th Preci~ 0 Vehicles_or~ 1248
## 2 105 Queens 105th ~ 105th Preci~ 0 Vehicles_or~ 1209
## 3 75 Brooklyn 75th~ 75th Precin~ 0 Vehicles_or~ 1025
## 4 108 Queens 108th ~ 108th Preci~ 0 Vehicles_or~ 820
## 5 114 Queens 114th ~ 114th Preci~ 0 Vehicles_or~ 795
## 6 19 Manhattan 19t~ 19th Precin~ 0 Vehicles_or~ 792
## 7 106 Queens 106th ~ 106th Preci~ 0 Vehicles_or~ 781
## 8 67 Brooklyn 67th~ 67th Precin~ 0 Vehicles_or~ 740
## 9 107 Queens 107th ~ 107th Preci~ 0 Vehicles_or~ 735
## 10 113 Queens 113th ~ 113th Preci~ 0 Vehicles_or~ 718
## # ... with 67 more rows
write a csv file for data visulization
#write.csv(NYPDCollision.ByBoro_Max2, 'NYPDCollisionByBoro_Max.csv')
conclusion from above analysis: the maximum number of cases (1248 cases) occured in the Queens 109th Precint
Find the Max N of cases, by the location and the Injury Type
NYPDCollision.ByBoroInjuryType_Max3 <- NYPDCollision.ByInjuryType2 %>%
group_by(Boro_Location, InJury_type) %>%
arrange(-NofCases) %>%
filter(row_number()==1)
(NYPDCollision.ByBoroInjuryType_Max3)
## # A tibble: 847 x 6
## # Groups: Boro_Location, InJury_type [847]
## GeoCode Boro_Location GeoCodeLabel PassengKilled InJury_type NofCases
## <int> <chr> <chr> <int> <chr> <int>
## 1 109 Queens 109th ~ 109th Preci~ 0 Vehicles_or~ 1248
## 2 105 Queens 105th ~ 105th Preci~ 0 Vehicles_or~ 1209
## 3 75 Brooklyn 75th~ 75th Precin~ 0 Vehicles_or~ 1025
## 4 108 Queens 108th ~ 108th Preci~ 0 Vehicles_or~ 820
## 5 114 Queens 114th ~ 114th Preci~ 0 Vehicles_or~ 795
## 6 19 Manhattan 19t~ 19th Precin~ 0 Vehicles_or~ 792
## 7 106 Queens 106th ~ 106th Preci~ 0 Vehicles_or~ 781
## 8 67 Brooklyn 67th~ 67th Precin~ 0 Vehicles_or~ 740
## 9 107 Queens 107th ~ 107th Preci~ 0 Vehicles_or~ 735
## 10 113 Queens 113th ~ 113th Preci~ 0 Vehicles_or~ 718
## # ... with 837 more rows
write a csv file for data visulization
#write.csv(NYPDCollision.ByBoroInjuryType_Max3, 'NYPDCollisionByBoroInjuryType_Max.csv')
conclusion from above analysis: the maximum number of cases (1248 cases) occured in the Queens 109th Precint, with the most injury type as ‘Vehicles_or_Motorists_Involved’