dat <- read_csv("C:/Users/MY PC/Desktop/Coursera/Motor_Vehicle_Collisions_-_Crashes.csv")
## Rows: 2263787 Columns: 29
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (16): CRASH DATE, BOROUGH, LOCATION, ON STREET NAME, CROSS STREET NAME,...
## dbl  (12): ZIP CODE, LATITUDE, LONGITUDE, NUMBER OF PERSONS INJURED, NUMBER ...
## time  (1): CRASH TIME
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Figure 1: Total Crashes by Borough

Description: This bar chart shows the total number of crashes occurring in each NYC borough.

fig_dat1 <- dat %>%
  filter(!is.na(BOROUGH)) %>%
  count(BOROUGH, name = "Total_Crashes")

ggplot(
  fig_dat1,
  aes(
    x = reorder(BOROUGH, Total_Crashes),
    y = Total_Crashes
  )
) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  labs(
    title = "Total Motor Vehicle Crashes by Borough",
    x = "Borough",
    y = "Number of Crashes"
  ) +
  theme_minimal()

## Figure 2: Daily Crash Trend

Description: This line chart shows how crashes change over time.

fig_dat2 <- dat %>%
  filter(!is.na(`CRASH DATE`)) %>%
  count(`CRASH DATE`, name = "Total_Crashes")

fig_dat2$`CRASH DATE` <- as.Date(
  fig_dat2$`CRASH DATE`,
  format = "%m/%d/%Y"
)

ggplot(
  fig_dat2,
  aes(
    x = `CRASH DATE`,
    y = Total_Crashes
  )
) +
  geom_line(color = "steelblue") +
  labs(
    title = "Daily Motor Vehicle Crashes",
    x = "Date",
    y = "Number of Crashes"
  ) +
  theme_minimal()

## Figure 3: Top Contributing Factors

Description: Most common causes of collisions.

fig_dat3 <- dat %>%
  filter(!is.na(`CONTRIBUTING FACTOR VEHICLE 1`)) %>%
  count(`CONTRIBUTING FACTOR VEHICLE 1`, sort = TRUE) %>%
  slice_head(n = 10)

ggplot(
  fig_dat3,
  aes(
    x = reorder(`CONTRIBUTING FACTOR VEHICLE 1`, n),
    y = n
  )
) +
  geom_col(fill = "darkorange") +
  coord_flip() +
  labs(
    title = "Top 10 Contributing Factors",
    x = "Factor",
    y = "Crash Count"
  ) +
  theme_minimal()

## Figure 4: Injuries by Borough

fig_dat4 <- dat %>%
  filter(!is.na(BOROUGH)) %>%
  group_by(BOROUGH) %>%
  summarise(
    Injuries = sum(`NUMBER OF PERSONS INJURED`,
                   na.rm = TRUE)
  )

ggplot(
  fig_dat4,
  aes(
    x = reorder(BOROUGH, Injuries),
    y = Injuries
  )
) +
  geom_col(fill = "firebrick") +
  coord_flip() +
  labs(
    title = "Total Injuries by Borough",
    x = "Borough",
    y = "Total Injuries"
  ) +
  theme_minimal()

## Figure 5: Fatalities by Borough

fig_dat5 <- dat %>%
  filter(!is.na(BOROUGH)) %>%
  group_by(BOROUGH) %>%
  summarise(
    Fatalities = sum(
      `NUMBER OF PERSONS KILLED`,
      na.rm = TRUE
    )
  )

ggplot(
  fig_dat5,
  aes(
    x = reorder(BOROUGH, Fatalities),
    y = Fatalities
  )
) +
  geom_col(fill = "black") +
  coord_flip() +
  labs(
    title = "Traffic Fatalities by Borough",
    x = "Borough",
    y = "Fatalities"
  ) +
  theme_minimal()

## Figure 6: Interactive Crash Map

fig_dat6 <- dat %>%
  filter(
    !is.na(LATITUDE),
    !is.na(LONGITUDE)
  )

leaflet(
  fig_dat6 %>% sample_n(10000)
) %>%
  addTiles() %>%
  addCircleMarkers(
    lng = ~LONGITUDE,
    lat = ~LATITUDE,
    radius = 1
  )

Figure 7: Vehicle Types Involved

vehicle_data <- dat %>%
  filter(!is.na(`VEHICLE TYPE CODE 1`)) %>%
  count(`VEHICLE TYPE CODE 1`,
        sort = TRUE) %>%
  slice_head(n = 10)

ggplot(
  vehicle_data,
  aes(
    x = reorder(`VEHICLE TYPE CODE 1`, n),
    y = n
  )
) +
  geom_col(fill = "purple") +
  coord_flip() +
  labs(
    title = "Top Vehicle Types Involved",
    x = "Vehicle Type",
    y = "Crash Count"
  ) +
  theme_minimal()

## Figure 8: Pedestrian vs Cyclist vs Motorist Injuries

injury_data <- tibble(
  Category = c(
    "Pedestrians",
    "Cyclists",
    "Motorists"
  ),
  Injuries = c(
    sum(dat$`NUMBER OF PEDESTRIANS INJURED`,
        na.rm = TRUE),
    sum(dat$`NUMBER OF CYCLIST INJURED`,
        na.rm = TRUE),
    sum(dat$`NUMBER OF MOTORIST INJURED`,
        na.rm = TRUE)
  )
)

ggplot(
  injury_data,
  aes(
    area = Injuries,
    fill = Category,
    label = paste(Category, Injuries)
  )
) +
  geom_treemap() +
  geom_treemap_text(
    colour = "white",
    place = "centre"
  ) +
  labs(
    title = "Distribution of Injuries by Road User Type"
  )

## Figure 9: Injuries vs Fatalities by Borough

severity_data <- dat %>%
  filter(!is.na(BOROUGH)) %>%
  group_by(BOROUGH) %>%
  summarise(
    Injuries = sum(
      `NUMBER OF PERSONS INJURED`,
      na.rm = TRUE
    ),
    Fatalities = sum(
      `NUMBER OF PERSONS KILLED`,
      na.rm = TRUE
    )
  )

ggplot(severity_data) +
  geom_segment(
    aes(
      y = BOROUGH,
      yend = BOROUGH,
      x = Fatalities,
      xend = Injuries
    )
  ) +
  geom_point(
    aes(
      x = Fatalities,
      y = BOROUGH
    ),
    size = 3
  ) +
  geom_point(
    aes(
      x = Injuries,
      y = BOROUGH
    ),
    size = 3
  ) +
  labs(
    title = "Injuries vs Fatalities by Borough",
    x = "Count",
    y = "Borough"
  ) +
  theme_minimal()