install.packages(“tidyverse”) library(tidyverse) dat <- read_csv(“C:/Users/MY PC/Desktop/Coursera/Motor_Vehicle_Collisions_-_Crashes.csv”) Rows: 2263787 Columns: 29
── Column specification ────────────────────────────────────────────────────── Delimiter: “,” chr (16): CRASH DATE, BOROUGH, LOCATION, ON STREET NAME, CROSS STREET NAM… dbl (12): ZIP CODE, LATITUDE, LONGITUDE, NUMBER OF PERSONS INJURED, NUMBE… time (1): CRASH TIME

ℹ Use spec() to retrieve the full column specification for this data. ℹ Specify the column types or set show_col_types = FALSE to quiet this message. > dat # A tibble: 2,263,787 × 29 CRASH DATE CRASH TIME BOROUGH ZIP CODE LATITUDE LONGITUDE LOCATION

###Figure 1: Total Crashes by Borough ##Description:- This bar chart shows the total number of crashes occurring in each NYC borough.

fig_dat1 <- dat %>% + filter(!is.na(BOROUGH)) %>% + select(BOROUGH) > fig_dat1 <- dat %>% + filter(!is.na(BOROUGH)) %>% + count(BOROUGH, name = “Total_Crashes”) > > fig_dat1 # A tibble: 5 × 2 BOROUGH Total_Crashes 1 BRONX 233440 2 BROOKLYN 505449 3 MANHATTAN 347644 4 QUEENS 421339 5 STATEN ISLAND 65862 > library(ggplot2) > > ggplot(fig_dat1, + aes(x = reorder(BOROUGH, Total_Crashes), + y = Total_Crashes)) + + geom_col() + + coord_flip() + + labs( + title = “Total Motor Vehicle Crashes by Borough”, + x = “Borough”, + y = “Number of Crashes” + ) Visualization http://127.0.0.1:37555/graphics/81820f10-fd93-4127-a000-050b8ae4c96a.png

###Figure 2: Daily Crash Trend ##Description:- This line chart shows how crashes change over time.

fig_dat2 <- dat %>% + filter(!is.na(CRASH DATE)) %>% + count(CRASH DATE, name = “Total_Crashes”) > > fig_dat2 # A tibble: 5,071 × 2 CRASH DATE Total_Crashes 1 01/01/2013 381 2 01/01/2014 399 3 01/01/2015 395 4 01/01/2016 393 5 01/01/2017 485 6 01/01/2018 453 7 01/01/2019 430 8 01/01/2020 346 9 01/01/2021 257 10 01/01/2022 283

ggplot(fig_dat2, + aes(x = as.Date(CRASH DATE), + y = Total_Crashes)) + + geom_line() + + labs( + title = “Daily Motor Vehicle Crashes”, + x = “Date”, + y = “Number of Crashes” + ) Visualization http://127.0.0.1:37555/graphics/e6d73b28-670d-4d5a-959f-8511a7b876b5.png

###Figure 3: Top Contributing Factors ##Description:- This chart identifies the most common causes of collisions

fig_dat3 <- dat %>% + filter(!is.na(CONTRIBUTING FACTOR VEHICLE 1)) %>% + count(CONTRIBUTING FACTOR VEHICLE 1, + sort = TRUE) %>% + slice_head(n = 10) > > fig_dat3 # A tibble: 10 × 2 CONTRIBUTING FACTOR VEHICLE 1 n 1 Unspecified 755164 2 Driver Inattention/Distraction 460889 3 Failure to Yield Right-of-Way 135716 4 Following Too Closely 121418 5 Backing Unsafely 82433 6 Other Vehicular 71201 7 Passing or Lane Usage Improper 65317 8 Passing Too Closely 57840 9 Turning Improperly 55609 10 Fatigued/Drowsy 47595

ggplot(fig_dat3, + aes(x = reorder(CONTRIBUTING FACTOR VEHICLE 1, n), + y = n)) + + geom_col() + + coord_flip() + + labs( + title = “Top 10 Contributing Factors”, + x = “Factor”, + y = “Crash Count” Visualization http://127.0.0.1:37555/graphics/f9abe3d2-7c83-4cf4-a0aa-4e5d1f35d815.png

###Figure 4: Injuries by Borough ##Description:- Compare total injuries across boroughs.

fig_dat4 # A tibble: 5 × 2 BOROUGH Injuries 1 BRONX 82184 2 BROOKLYN 179455 3 MANHATTAN 80743 4 QUEENS 135700

ggplot(fig_dat4, + aes(x = reorder(BOROUGH, Injuries), + y = Injuries)) + + geom_col() + + coord_flip() + + labs( + title = “Total Injuries by Borough”, + x = “Borough”, + y = “Total Injuries” + ) Visualization http://127.0.0.1:37555/graphics/61932711-5eeb-46d1-ae42-674faf3ba030.png

##Figure 5: Fatalities by Borough ##Description:- Shows which boroughs have the highest fatalities. > fig_dat5 <- dat %>% + filter(!is.na(BOROUGH)) %>% + group_by(BOROUGH) %>% + summarise( + Fatalities = sum(NUMBER OF PERSONS KILLED, + na.rm = TRUE) + ) > > fig_dat5 # A tibble: 5 × 2 BOROUGH Fatalities 1 BRONX 325 2 BROOKLYN 740 3 MANHATTAN 396 4 QUEENS 607 5 STATEN ISLAND 108

ggplot(fig_dat5, + aes(x = reorder(BOROUGH, Fatalities), + y = Fatalities)) + + geom_col() + + coord_flip() + + labs( + title = “Traffic Fatalities by Borough”, + x = “Borough”, + y = “Fatalities” + ) Visualization http://127.0.0.1:37555/graphics/5e88538f-a071-4648-8478-f8267a7ab1fa.png

###Figure 6: Interactive Crash Map ##Description:- A MAp that shows major number of Collusions.

fig_dat6 <- dat %>% + filter( + !is.na(LATITUDE), + !is.na(LONGITUDE) + ) > fig_dat6 # A tibble: 2,023,011 × 29 CRASH DATE CRASH TIME BOROUGH ZIP CODE LATITUDE LONGITUDE LOCATION

###Figure- 7:- Vehicle Types Involved ## Description:- The type of vehicles prone to more accidents

ggplot(vehicle_data, + aes(x = reorder(VEHICLE TYPE CODE 1, n), + y = n)) + + geom_col() + + coord_flip() + + labs( + title = “Top Vehicle Types Involved”, + x = “Vehicle Type”, + y = “Crash Count” + ) Visualization http://127.0.0.1:9350/graphics/b2c1b337-4cbb-4f9e-9cb4-d0e67b5d3099.png

###Figure 8: Pedestrian vs Cyclist vs Motorist Injuries ##Description:- The count of different personas effected.

library(treemapify) injury_data <- tibble( + Category = c( + “Pedestrians”, + “Cyclists”, + “Motorists” + ), + Injuries = c( + sum(dat\(`NUMBER OF PEDESTRIANS INJURED`, na.rm = TRUE), + sum(dat\)NUMBER OF CYCLIST INJURED, na.rm = TRUE), + sum(dat$NUMBER OF MOTORIST INJURED, na.rm = TRUE) + ) + ) ggplot( + injury_data, + aes( + area = Injuries, + fill = Category, + label = paste(Category, Injuries) + ) + ) + + geom_treemap() + + geom_treemap_text( + colour = “white”, + place = “centre” + ) + + labs( + title = “Distribution of Injuries by Road User Type” + ) Visualization http://127.0.0.1:9350/graphics/7d61ad4a-2db0-4e27-857a-07547c20caea.png

###Figure 9: Injuries vs Fatalities by Borough ##Description:- Highlights severity differences between boroughs.

severity_data <- dat %>% + filter(!is.na(BOROUGH)) %>% + group_by(BOROUGH) %>% + summarise( + Injuries = sum(NUMBER OF PERSONS INJURED, na.rm = TRUE), + Fatalities = sum(NUMBER OF PERSONS KILLED, na.rm = TRUE) + )

ggplot(severity_data) + + geom_segment( + aes( + y = BOROUGH, + yend = BOROUGH, + x = Fatalities, + xend = Injuries + ) + ) + + geom_point(aes(x = Fatalities, y = BOROUGH), size = 3) + + geom_point(aes(x = Injuries, y = BOROUGH), size = 3) + + labs( + title = “Injuries vs Fatalities by Borough”, + x = “Count”, + y = “Borough” + ) Visualization http://127.0.0.1:9350/graphics/40abadb8-96c9-4fca-9313-63600651a13d.png