getwd()
## [1] "D:/Project2 Week6 Due Oct7 GH DataToUse"
setwd('D:/Project2 Week6 Due Oct7 GH DataToUse')
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.3.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.3.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
NYPDCollision <- tbl_df(read.csv("NYPDAug2017_Motor_Vehicle_Collisions.csv", stringsAsFactors = FALSE, check.names = FALSE))

head(NYPDCollision)
## # A tibble: 6 x 15
##   GeoCode Boro_Location GeoCodeLabel Number_of_Motor~ Vehicles_or_Mot~
##     <int> <chr>         <chr>                   <int>            <int>
## 1       1 Manhattan 1s~ 1st Precinct              288              561
## 2       5 Manhattan 5t~ 5th Precinct              203              376
## 3       6 Manhattan 6t~ 6th Precinct              130              249
## 4       7 Manhattan 7t~ 7th Precinct              102              195
## 5       9 Manhattan 9t~ 9th Precinct              126              241
## 6      10 Manhattan 10~ 10th Precin~              246              479
## # ... with 10 more variables: Injury_or_Fatal_Collisions <int>,
## #   MotoristsInjured <int>, MotoristsKilled <int>, PassengInjured <int>,
## #   PassengKilled <int>, CyclistsInjured <int>, CyclistsKilled <int>,
## #   PedestrInjured <int>, PedestrKilled <int>, Bicycle <int>
tail(NYPDCollision)
## # A tibble: 6 x 15
##   GeoCode Boro_Location GeoCodeLabel Number_of_Motor~ Vehicles_or_Mot~
##     <int> <chr>         <chr>                   <int>            <int>
## 1     114 Queens 114th~ 114th Preci~              412              795
## 2     115 Queens 115th~ 115th Preci~              310              611
## 3     120 S.Island 120~ 120th Preci~              219              442
## 4     121 S.Island 121~ 121st Preci~              326              661
## 5     122 S.Island 122~ 122nd Preci~              352              708
## 6     123 S.Island 123~ 123rd Preci~              147              288
## # ... with 10 more variables: Injury_or_Fatal_Collisions <int>,
## #   MotoristsInjured <int>, MotoristsKilled <int>, PassengInjured <int>,
## #   PassengKilled <int>, CyclistsInjured <int>, CyclistsKilled <int>,
## #   PedestrInjured <int>, PedestrKilled <int>, Bicycle <int>
NYPDCollision.ByInjuryType2 <- NYPDCollision %>% 
gather(InJury_type,NofCases, c("Number_of_Motor_Vehicle_Collisions", "Vehicles_or_Motorists_Involved", "Injury_or_Fatal_Collisions","MotoristsInjured","MotoristsKilled","PassengInjured","CyclistsInjured",
"CyclistsKilled","PedestrInjured","PedestrKilled","Bicycle" ))

head(NYPDCollision.ByInjuryType2)
## # A tibble: 6 x 6
##   GeoCode Boro_Location  GeoCodeLabel PassengKilled InJury_type   NofCases
##     <int> <chr>          <chr>                <int> <chr>            <int>
## 1       1 Manhattan 1st~ 1st Precinct             0 Number_of_Mo~      288
## 2       5 Manhattan 5th~ 5th Precinct             0 Number_of_Mo~      203
## 3       6 Manhattan 6th~ 6th Precinct             0 Number_of_Mo~      130
## 4       7 Manhattan 7th~ 7th Precinct             0 Number_of_Mo~      102
## 5       9 Manhattan 9th~ 9th Precinct             0 Number_of_Mo~      126
## 6      10 Manhattan 10t~ 10th Precin~             0 Number_of_Mo~      246
tail(NYPDCollision.ByInjuryType2)
## # A tibble: 6 x 6
##   GeoCode Boro_Location    GeoCodeLabel PassengKilled InJury_type NofCases
##     <int> <chr>            <chr>                <int> <chr>          <int>
## 1     114 Queens 114th Pr~ 114th Preci~             0 Bicycle           16
## 2     115 Queens 115th Pr~ 115th Preci~             0 Bicycle            9
## 3     120 S.Island 120th ~ 120th Preci~             0 Bicycle            2
## 4     121 S.Island 121st ~ 121st Preci~             0 Bicycle            4
## 5     122 S.Island 122nd ~ 122nd Preci~             0 Bicycle            4
## 6     123 S.Island 123rd ~ 123rd Preci~             0 Bicycle            1

Find the Min N of cases, and the Maximum N of cases, by Injury Type

NYPDCollision.ByInjuryType_Max <- NYPDCollision.ByInjuryType2 %>% 
  group_by(InJury_type) %>% 
  arrange(-NofCases) %>%
  filter(row_number()==1)
## Warning: package 'bindrcpp' was built under R version 3.3.3
NYPDCollision.ByInjuryType_Min <- NYPDCollision.ByInjuryType2 %>% 
  group_by(InJury_type) %>% arrange(NofCases) %>%
  filter(row_number()==1)

(NYPDCollision.ByInjuryType_Max)
## # A tibble: 11 x 6
## # Groups:   InJury_type [11]
##    GeoCode Boro_Location  GeoCodeLabel PassengKilled InJury_type  NofCases
##      <int> <chr>          <chr>                <int> <chr>           <int>
##  1     109 Queens 109th ~ 109th Preci~             0 Vehicles_or~     1248
##  2     109 Queens 109th ~ 109th Preci~             0 Number_of_M~      618
##  3     105 Queens 105th ~ 105th Preci~             0 Injury_or_F~      154
##  4     105 Queens 105th ~ 105th Preci~             0 MotoristsIn~      141
##  5     105 Queens 105th ~ 105th Preci~             0 PassengInju~       89
##  6      75 Brooklyn 75th~ 75th Precin~             0 PedestrInju~       26
##  7      90 Brooklyn 90th~ 90th Precin~             0 Bicycle            22
##  8      90 Brooklyn 90th~ 90th Precin~             0 CyclistsInj~       16
##  9      47 Bronx 47th Pr~ 47th Precin~             0 MotoristsKi~        2
## 10     112 Queens 112th ~ 112th Preci~             0 PedestrKill~        2
## 11      66 Brooklyn 66th~ 66th Precin~             0 CyclistsKil~        1
(NYPDCollision.ByInjuryType_Min)
## # A tibble: 11 x 6
## # Groups:   InJury_type [11]
##    GeoCode Boro_Location  GeoCodeLabel  PassengKilled InJury_type NofCases
##      <int> <chr>          <chr>                 <int> <chr>          <int>
##  1      22 Manhattan Cen~ Central Park~             0 MotoristsI~        0
##  2       1 Manhattan 1st~ 1st Precinct              0 MotoristsK~        0
##  3      22 Manhattan Cen~ Central Park~             0 PassengInj~        0
##  4      22 Manhattan Cen~ Central Park~             0 CyclistsIn~        0
##  5       1 Manhattan 1st~ 1st Precinct              0 CyclistsKi~        0
##  6       5 Manhattan 5th~ 5th Precinct              0 PedestrKil~        0
##  7      22 Manhattan Cen~ Central Park~             0 Injury_or_~        1
##  8      22 Manhattan Cen~ Central Park~             0 PedestrInj~        1
##  9      30 Manhattan 30t~ 30th Precinct             0 Bicycle            1
## 10      22 Manhattan Cen~ Central Park~             0 Number_of_~        5
## 11      22 Manhattan Cen~ Central Park~             0 Vehicles_o~        6

To Examine the highest collion rate by location of Boro

NYPDCollision.ByBoro_Max2 <- NYPDCollision.ByInjuryType2 %>% 
  group_by(Boro_Location) %>% 
  arrange(-NofCases) %>%
  filter(row_number()==1)

(NYPDCollision.ByBoro_Max2)
## # A tibble: 77 x 6
## # Groups:   Boro_Location [77]
##    GeoCode Boro_Location  GeoCodeLabel PassengKilled InJury_type  NofCases
##      <int> <chr>          <chr>                <int> <chr>           <int>
##  1     109 Queens 109th ~ 109th Preci~             0 Vehicles_or~     1248
##  2     105 Queens 105th ~ 105th Preci~             0 Vehicles_or~     1209
##  3      75 Brooklyn 75th~ 75th Precin~             0 Vehicles_or~     1025
##  4     108 Queens 108th ~ 108th Preci~             0 Vehicles_or~      820
##  5     114 Queens 114th ~ 114th Preci~             0 Vehicles_or~      795
##  6      19 Manhattan 19t~ 19th Precin~             0 Vehicles_or~      792
##  7     106 Queens 106th ~ 106th Preci~             0 Vehicles_or~      781
##  8      67 Brooklyn 67th~ 67th Precin~             0 Vehicles_or~      740
##  9     107 Queens 107th ~ 107th Preci~             0 Vehicles_or~      735
## 10     113 Queens 113th ~ 113th Preci~             0 Vehicles_or~      718
## # ... with 67 more rows

write a csv file for data visulization

#write.csv(NYPDCollision.ByBoro_Max2, 'NYPDCollisionByBoro_Max.csv')

conclusion from above analysis: the maximum number of cases (1248 cases) occured in the Queens 109th Precint

Find the Max N of cases, by the location and the Injury Type

NYPDCollision.ByBoroInjuryType_Max3 <- NYPDCollision.ByInjuryType2 %>% 
  group_by(Boro_Location, InJury_type) %>% 
  arrange(-NofCases) %>%
  filter(row_number()==1)

(NYPDCollision.ByBoroInjuryType_Max3)
## # A tibble: 847 x 6
## # Groups:   Boro_Location, InJury_type [847]
##    GeoCode Boro_Location  GeoCodeLabel PassengKilled InJury_type  NofCases
##      <int> <chr>          <chr>                <int> <chr>           <int>
##  1     109 Queens 109th ~ 109th Preci~             0 Vehicles_or~     1248
##  2     105 Queens 105th ~ 105th Preci~             0 Vehicles_or~     1209
##  3      75 Brooklyn 75th~ 75th Precin~             0 Vehicles_or~     1025
##  4     108 Queens 108th ~ 108th Preci~             0 Vehicles_or~      820
##  5     114 Queens 114th ~ 114th Preci~             0 Vehicles_or~      795
##  6      19 Manhattan 19t~ 19th Precin~             0 Vehicles_or~      792
##  7     106 Queens 106th ~ 106th Preci~             0 Vehicles_or~      781
##  8      67 Brooklyn 67th~ 67th Precin~             0 Vehicles_or~      740
##  9     107 Queens 107th ~ 107th Preci~             0 Vehicles_or~      735
## 10     113 Queens 113th ~ 113th Preci~             0 Vehicles_or~      718
## # ... with 837 more rows

write a csv file for data visulization

#write.csv(NYPDCollision.ByBoroInjuryType_Max3, 'NYPDCollisionByBoroInjuryType_Max.csv')

conclusion from above analysis: the maximum number of cases (1248 cases) occured in the Queens 109th Precint, with the most injury type as ‘Vehicles_or_Motorists_Involved’