install.packages(“tidyverse”) library(tidyverse) dat <-
read_csv(“C:/Users/MY
PC/Desktop/Coursera/Motor_Vehicle_Collisions_-_Crashes.csv”) Rows:
2263787 Columns: 29
── Column specification
────────────────────────────────────────────────────── Delimiter: “,”
chr (16): CRASH DATE, BOROUGH, LOCATION, ON STREET NAME, CROSS STREET
NAM… dbl (12): ZIP CODE, LATITUDE, LONGITUDE, NUMBER OF PERSONS INJURED,
NUMBE… time (1): CRASH TIME
ℹ Use spec() to retrieve the full column specification
for this data. ℹ Specify the column types or set
show_col_types = FALSE to quiet this message. > dat # A
tibble: 2,263,787 × 29 CRASH DATE CRASH TIME
BOROUGH ZIP CODE LATITUDE LONGITUDE LOCATION
###Figure 1: Total Crashes by Borough ##Description:- This bar chart shows the total number of crashes occurring in each NYC borough.
fig_dat1 <- dat %>% + filter(!is.na(BOROUGH)) %>% +
select(BOROUGH) > fig_dat1 <- dat %>% + filter(!is.na(BOROUGH))
%>% + count(BOROUGH, name = “Total_Crashes”) > > fig_dat1 # A
tibble: 5 × 2 BOROUGH Total_Crashes
###Figure 2: Daily Crash Trend ##Description:- This line chart shows how crashes change over time.
fig_dat2 <- dat %>% + filter(!is.na(CRASH DATE))
%>% + count(CRASH DATE, name = “Total_Crashes”) >
> fig_dat2 # A tibble: 5,071 × 2 CRASH DATE
Total_Crashes
ggplot(fig_dat2, + aes(x = as.Date(
CRASH DATE), + y = Total_Crashes)) + + geom_line() + + labs( + title = “Daily Motor Vehicle Crashes”, + x = “Date”, + y = “Number of Crashes” + ) Visualization http://127.0.0.1:37555/graphics/e6d73b28-670d-4d5a-959f-8511a7b876b5.png
###Figure 3: Top Contributing Factors ##Description:- This chart identifies the most common causes of collisions
fig_dat3 <- dat %>% +
filter(!is.na(CONTRIBUTING FACTOR VEHICLE 1)) %>% +
count(CONTRIBUTING FACTOR VEHICLE 1, + sort = TRUE) %>%
+ slice_head(n = 10) > > fig_dat3 # A tibble: 10 × 2
CONTRIBUTING FACTOR VEHICLE 1 n
ggplot(fig_dat3, + aes(x = reorder(
CONTRIBUTING FACTOR VEHICLE 1, n), + y = n)) + + geom_col() + + coord_flip() + + labs( + title = “Top 10 Contributing Factors”, + x = “Factor”, + y = “Crash Count” Visualization http://127.0.0.1:37555/graphics/f9abe3d2-7c83-4cf4-a0aa-4e5d1f35d815.png
###Figure 4: Injuries by Borough ##Description:- Compare total injuries across boroughs.
fig_dat4 # A tibble: 5 × 2 BOROUGH Injuries
1 BRONX 82184 2 BROOKLYN 179455 3 MANHATTAN 80743 4 QUEENS 135700
ggplot(fig_dat4, + aes(x = reorder(BOROUGH, Injuries), + y = Injuries)) + + geom_col() + + coord_flip() + + labs( + title = “Total Injuries by Borough”, + x = “Borough”, + y = “Total Injuries” + ) Visualization http://127.0.0.1:37555/graphics/61932711-5eeb-46d1-ae42-674faf3ba030.png
##Figure 5: Fatalities by Borough ##Description:- Shows which
boroughs have the highest fatalities. > fig_dat5 <- dat %>% +
filter(!is.na(BOROUGH)) %>% + group_by(BOROUGH) %>% + summarise( +
Fatalities = sum(NUMBER OF PERSONS KILLED, + na.rm = TRUE)
+ ) > > fig_dat5 # A tibble: 5 × 2 BOROUGH Fatalities
ggplot(fig_dat5, + aes(x = reorder(BOROUGH, Fatalities), + y = Fatalities)) + + geom_col() + + coord_flip() + + labs( + title = “Traffic Fatalities by Borough”, + x = “Borough”, + y = “Fatalities” + ) Visualization http://127.0.0.1:37555/graphics/5e88538f-a071-4648-8478-f8267a7ab1fa.png
###Figure 6: Interactive Crash Map ##Description:- A MAp that shows major number of Collusions.
fig_dat6 <- dat %>% + filter( + !is.na(LATITUDE), +
!is.na(LONGITUDE) + ) > fig_dat6 # A tibble: 2,023,011 × 29
CRASH DATE CRASH TIME BOROUGH
ZIP CODE LATITUDE LONGITUDE LOCATION
###Figure- 7:- Vehicle Types Involved ## Description:- The type of vehicles prone to more accidents
ggplot(vehicle_data, + aes(x = reorder(
VEHICLE TYPE CODE 1, n), + y = n)) + + geom_col() + + coord_flip() + + labs( + title = “Top Vehicle Types Involved”, + x = “Vehicle Type”, + y = “Crash Count” + ) Visualization http://127.0.0.1:9350/graphics/b2c1b337-4cbb-4f9e-9cb4-d0e67b5d3099.png
###Figure 8: Pedestrian vs Cyclist vs Motorist Injuries ##Description:- The count of different personas effected.
library(treemapify) injury_data <- tibble( + Category = c( + “Pedestrians”, + “Cyclists”, + “Motorists” + ), + Injuries = c( + sum(dat\(`NUMBER OF PEDESTRIANS INJURED`, na.rm = TRUE), + sum(dat\)
NUMBER OF CYCLIST INJURED, na.rm = TRUE), + sum(dat$NUMBER OF MOTORIST INJURED, na.rm = TRUE) + ) + ) ggplot( + injury_data, + aes( + area = Injuries, + fill = Category, + label = paste(Category, Injuries) + ) + ) + + geom_treemap() + + geom_treemap_text( + colour = “white”, + place = “centre” + ) + + labs( + title = “Distribution of Injuries by Road User Type” + ) Visualization http://127.0.0.1:9350/graphics/7d61ad4a-2db0-4e27-857a-07547c20caea.png
###Figure 9: Injuries vs Fatalities by Borough ##Description:- Highlights severity differences between boroughs.
severity_data <- dat %>% + filter(!is.na(BOROUGH)) %>% + group_by(BOROUGH) %>% + summarise( + Injuries = sum(
NUMBER OF PERSONS INJURED, na.rm = TRUE), + Fatalities = sum(NUMBER OF PERSONS KILLED, na.rm = TRUE) + )
ggplot(severity_data) + + geom_segment( + aes( + y = BOROUGH, + yend = BOROUGH, + x = Fatalities, + xend = Injuries + ) + ) + + geom_point(aes(x = Fatalities, y = BOROUGH), size = 3) + + geom_point(aes(x = Injuries, y = BOROUGH), size = 3) + + labs( + title = “Injuries vs Fatalities by Borough”, + x = “Count”, + y = “Borough” + ) Visualization http://127.0.0.1:9350/graphics/40abadb8-96c9-4fca-9313-63600651a13d.png