dat <- read_csv("C:/Users/MY PC/Desktop/Coursera/Motor_Vehicle_Collisions_-_Crashes.csv")
## Rows: 2263787 Columns: 29
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (16): CRASH DATE, BOROUGH, LOCATION, ON STREET NAME, CROSS STREET NAME,...
## dbl (12): ZIP CODE, LATITUDE, LONGITUDE, NUMBER OF PERSONS INJURED, NUMBER ...
## time (1): CRASH TIME
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Description: This bar chart shows the total number of crashes occurring in each NYC borough.
fig_dat1 <- dat %>%
filter(!is.na(BOROUGH)) %>%
count(BOROUGH, name = "Total_Crashes")
ggplot(
fig_dat1,
aes(
x = reorder(BOROUGH, Total_Crashes),
y = Total_Crashes
)
) +
geom_col(fill = "steelblue") +
coord_flip() +
labs(
title = "Total Motor Vehicle Crashes by Borough",
x = "Borough",
y = "Number of Crashes"
) +
theme_minimal()
## Figure 2: Daily Crash Trend
Description: This line chart shows how crashes change over time.
fig_dat2 <- dat %>%
filter(!is.na(`CRASH DATE`)) %>%
count(`CRASH DATE`, name = "Total_Crashes")
fig_dat2$`CRASH DATE` <- as.Date(
fig_dat2$`CRASH DATE`,
format = "%m/%d/%Y"
)
ggplot(
fig_dat2,
aes(
x = `CRASH DATE`,
y = Total_Crashes
)
) +
geom_line(color = "steelblue") +
labs(
title = "Daily Motor Vehicle Crashes",
x = "Date",
y = "Number of Crashes"
) +
theme_minimal()
## Figure 3: Top Contributing Factors
Description: Most common causes of collisions.
fig_dat3 <- dat %>%
filter(!is.na(`CONTRIBUTING FACTOR VEHICLE 1`)) %>%
count(`CONTRIBUTING FACTOR VEHICLE 1`, sort = TRUE) %>%
slice_head(n = 10)
ggplot(
fig_dat3,
aes(
x = reorder(`CONTRIBUTING FACTOR VEHICLE 1`, n),
y = n
)
) +
geom_col(fill = "darkorange") +
coord_flip() +
labs(
title = "Top 10 Contributing Factors",
x = "Factor",
y = "Crash Count"
) +
theme_minimal()
## Figure 4: Injuries by Borough
fig_dat4 <- dat %>%
filter(!is.na(BOROUGH)) %>%
group_by(BOROUGH) %>%
summarise(
Injuries = sum(`NUMBER OF PERSONS INJURED`,
na.rm = TRUE)
)
ggplot(
fig_dat4,
aes(
x = reorder(BOROUGH, Injuries),
y = Injuries
)
) +
geom_col(fill = "firebrick") +
coord_flip() +
labs(
title = "Total Injuries by Borough",
x = "Borough",
y = "Total Injuries"
) +
theme_minimal()
## Figure 5: Fatalities by Borough
fig_dat5 <- dat %>%
filter(!is.na(BOROUGH)) %>%
group_by(BOROUGH) %>%
summarise(
Fatalities = sum(
`NUMBER OF PERSONS KILLED`,
na.rm = TRUE
)
)
ggplot(
fig_dat5,
aes(
x = reorder(BOROUGH, Fatalities),
y = Fatalities
)
) +
geom_col(fill = "black") +
coord_flip() +
labs(
title = "Traffic Fatalities by Borough",
x = "Borough",
y = "Fatalities"
) +
theme_minimal()
## Figure 6: Interactive Crash Map
fig_dat6 <- dat %>%
filter(
!is.na(LATITUDE),
!is.na(LONGITUDE)
)
leaflet(
fig_dat6 %>% sample_n(10000)
) %>%
addTiles() %>%
addCircleMarkers(
lng = ~LONGITUDE,
lat = ~LATITUDE,
radius = 1
)
vehicle_data <- dat %>%
filter(!is.na(`VEHICLE TYPE CODE 1`)) %>%
count(`VEHICLE TYPE CODE 1`,
sort = TRUE) %>%
slice_head(n = 10)
ggplot(
vehicle_data,
aes(
x = reorder(`VEHICLE TYPE CODE 1`, n),
y = n
)
) +
geom_col(fill = "purple") +
coord_flip() +
labs(
title = "Top Vehicle Types Involved",
x = "Vehicle Type",
y = "Crash Count"
) +
theme_minimal()
## Figure 8: Pedestrian vs Cyclist vs Motorist Injuries
injury_data <- tibble(
Category = c(
"Pedestrians",
"Cyclists",
"Motorists"
),
Injuries = c(
sum(dat$`NUMBER OF PEDESTRIANS INJURED`,
na.rm = TRUE),
sum(dat$`NUMBER OF CYCLIST INJURED`,
na.rm = TRUE),
sum(dat$`NUMBER OF MOTORIST INJURED`,
na.rm = TRUE)
)
)
ggplot(
injury_data,
aes(
area = Injuries,
fill = Category,
label = paste(Category, Injuries)
)
) +
geom_treemap() +
geom_treemap_text(
colour = "white",
place = "centre"
) +
labs(
title = "Distribution of Injuries by Road User Type"
)
## Figure 9: Injuries vs Fatalities by Borough
severity_data <- dat %>%
filter(!is.na(BOROUGH)) %>%
group_by(BOROUGH) %>%
summarise(
Injuries = sum(
`NUMBER OF PERSONS INJURED`,
na.rm = TRUE
),
Fatalities = sum(
`NUMBER OF PERSONS KILLED`,
na.rm = TRUE
)
)
ggplot(severity_data) +
geom_segment(
aes(
y = BOROUGH,
yend = BOROUGH,
x = Fatalities,
xend = Injuries
)
) +
geom_point(
aes(
x = Fatalities,
y = BOROUGH
),
size = 3
) +
geom_point(
aes(
x = Injuries,
y = BOROUGH
),
size = 3
) +
labs(
title = "Injuries vs Fatalities by Borough",
x = "Count",
y = "Borough"
) +
theme_minimal()