Project 2

Vir Shah

Agenda

  • Crash Summaries
  • Distribution Tables (Crashes Per Month, Weekday, and Hour)
  • Most Common PCF Violations, Types of Collisions, and Parties at Fault
  • Pedestrian Accidents, Injuries and Deaths
  • Bicyclist Accidents, Injuries, and Deaths

Crash Summaries

Show code
library(tidyverse)
dat <- read_csv("crashes_2023.csv")

dat |>
  summarise(
    crash_total  = n(),
    victim_total = sum(NUMBER_INJURED),
    fatal_total  = sum(NUMBER_KILLED)
  )
# A tibble: 1 × 3
  crash_total victim_total fatal_total
        <int>        <dbl>       <dbl>
1      161852       224642        3560
Show code
crashes <- dat |> 
  distinct(CASE_ID, .keep_all = TRUE)

Distribution Table: Crashes Per Month

Show code
crashes |>
  count(MONTH, name = "count") |>
  mutate(pct = 100 * count / sum(count)) |>
  arrange(MONTH)
# A tibble: 12 × 3
   MONTH count   pct
   <dbl> <int> <dbl>
 1     1 12766  7.89
 2     2 11793  7.29
 3     3 13120  8.11
 4     4 12987  8.02
 5     5 13418  8.29
 6     6 13256  8.19
 7     7 13634  8.42
 8     8 14715  9.09
 9     9 14153  8.74
10    10 14974  9.25
11    11 13636  8.42
12    12 13400  8.28

Distribution Table: Crashes Per Day of Week

Show code
crashes |>
  count(DAY_OF_WEEK, name = "count") |>
  mutate(pct = 100 * count / sum(count)) |>
  arrange(DAY_OF_WEEK)
# A tibble: 7 × 3
  DAY_OF_WEEK count   pct
        <dbl> <int> <dbl>
1           1 21647  13.4
2           2 23504  14.5
3           3 23845  14.7
4           4 23934  14.8
5           5 25478  15.7
6           6 22996  14.2
7           7 20448  12.6

Distribution Table: Crashes Per Hour

Show code
crashes |>
  count(HOUR, name = "count") |>
  mutate(pct = 100 * count / sum(count)) |>
  arrange(HOUR)
# A tibble: 24 × 3
    HOUR count   pct
   <dbl> <int> <dbl>
 1     0  3552  2.19
 2     1  3193  1.97
 3     2  3031  1.87
 4     3  2124  1.31
 5     4  2215  1.37
 6     5  3279  2.03
 7     6  4468  2.76
 8     7  7211  4.46
 9     8  8420  5.20
10     9  6472  4.00
# ℹ 14 more rows

Top 10 PCF Violations (Most to Least Common)

Show code
crashes |>
  count(PCF_VIOL_CATEGORY, name = "count") |>
  arrange(desc(count)) |>
  slice_head(n = 10)
# A tibble: 10 × 2
   PCF_VIOL_CATEGORY       count
   <chr>                   <int>
 1 unsafe speed            48018
 2 improper turning        26804
 3 automobile right of way 23465
 4 traffic signs           15197
 5 DUI                     14635
 6 unsafe lane change       6622
 7 unknown                  5114
 8 wrong side of road       3932
 9 pedestrian right of way  3910
10 pedestrian violation     3788

Types of Collision (Most to Least Common)

Show code
crashes |>
  count(TYPE_OF_COLLISION, name = "count") |>
  arrange(desc(count))
# A tibble: 9 × 2
  TYPE_OF_COLLISION  count
  <chr>              <int>
1 rear end           47294
2 broadside          43212
3 hit object         21753
4 sideswipe          18185
5 head-on            10535
6 vehicle/pedestrian 10438
7 overturned          6186
8 other               3592
9 unknown              657

Top 8 Parties at Fault (Most to Least Common)

Show code
dat |>
  count(STWD_VEHTYPE_AT_FAULT, name = "count") |>
  arrange(desc(count)) |>
  slice_head(n = 8)
# A tibble: 8 × 2
  STWD_VEHTYPE_AT_FAULT                count
  <chr>                                <int>
1 Passenger Car/Station Wagon         107023
2 Not Stated                           18653
3 Pickup or Panel Truck                15787
4 Motorcycle/Scooter                    6470
5 Bicycle                               4288
6 Pedestrian                            3865
7 Truck or Truck Tractor                1684
8 Truck or Truck Tractor with Trailer   1298

Pedestrian Accidents, Injuries, and Deaths

Show code
ped_crashes <- dat |>
  filter(PEDESTRIAN_ACCIDENT == "yes") |>
  distinct(CASE_ID, .keep_all = TRUE)

ped_counts <- ped_crashes |>
  summarise(
    n_ped_accidents = n(),
    n_ped_injured   = sum(COUNT_PED_INJURED),
    n_ped_killed    = sum(COUNT_PED_KILLED)
  )

ped_counts
# A tibble: 1 × 3
  n_ped_accidents n_ped_injured n_ped_killed
            <int>         <dbl>        <dbl>
1           11490         11022          974

Top 5 PCF Violations for Pedestrian Accidents

Show code
ped_crashes |>
  count(PCF_VIOL_CATEGORY, name = "count") |>
  arrange(desc(count)) |>
  slice_head(n = 5)
# A tibble: 5 × 2
  PCF_VIOL_CATEGORY       count
  <chr>                   <int>
1 pedestrian right of way  3679
2 pedestrian violation     3654
3 unsafe speed              922
4 unknown                   853
5 improper turning          653

Top 5 Types of Collision for Pedestrian Accidents

Show code
ped_crashes |>
  count(TYPE_OF_COLLISION, name = "count") |>
  arrange(desc(count)) |>
  slice_head(n = 5)
# A tibble: 5 × 2
  TYPE_OF_COLLISION  count
  <chr>              <int>
1 vehicle/pedestrian  9897
2 head-on              401
3 broadside            338
4 sideswipe            321
5 rear end             254

Top 5 Party at Fault for Pedestrian Accidents

Show code
ped_crashes |>
  count(STWD_VEHTYPE_AT_FAULT, name = "count") |>
  arrange(desc(count)) |>
  slice_head(n = 5)
# A tibble: 5 × 2
  STWD_VEHTYPE_AT_FAULT       count
  <chr>                       <int>
1 Passenger Car/Station Wagon  4603
2 Pedestrian                   3862
3 Not Stated                   2040
4 Pickup or Panel Truck         689
5 Truck or Truck Tractor         77

Bicyclist Accidents, Injuries, and Deaths

Show code
bike_crashes <- dat |>
  filter(BICYCLE_ACCIDENT == "yes") |>
  distinct(CASE_ID, .keep_all = TRUE)

bike_counts <- bike_crashes |>
  summarise(
    n_bike_accidents = n(),
    n_bike_injured   = sum(COUNT_BICYCLIST_INJURED),
    n_bike_killed    = sum(COUNT_BICYCLIST_KILLED)
  )

bike_counts
# A tibble: 1 × 3
  n_bike_accidents n_bike_injured n_bike_killed
             <int>          <dbl>         <dbl>
1             8744           8683           134

Top 5 PCF Violations for Bicycle Accidents

Show code
bike_crashes |>
  count(PCF_VIOL_CATEGORY, name = "count") |>
  arrange(desc(count)) |>
  slice_head(n = 5)
# A tibble: 5 × 2
  PCF_VIOL_CATEGORY       count
  <chr>                   <int>
1 automobile right of way  1673
2 wrong side of road       1577
3 improper turning         1325
4 traffic signs            1081
5 unsafe speed             1000

Top 5 Types of Collision for Bicycle Accidents

Show code
bike_crashes |>
  count(TYPE_OF_COLLISION, name = "count") |>
  arrange(desc(count)) |>
  slice_head(n = 5)
# A tibble: 5 × 2
  TYPE_OF_COLLISION count
  <chr>             <int>
1 broadside          3896
2 other              1789
3 sideswipe           955
4 head-on             558
5 rear end            524

Top 5 Parties at Fault for Bicycle Accidents

Show code
bike_crashes |>
  count(STWD_VEHTYPE_AT_FAULT, name = "count") |>
  arrange(desc(count)) |>
  slice_head(n = 5)
# A tibble: 5 × 2
  STWD_VEHTYPE_AT_FAULT       count
  <chr>                       <int>
1 Bicycle                      4283
2 Passenger Car/Station Wagon  2399
3 Not Stated                   1372
4 Pickup or Panel Truck         301
5 Other Vehicle                 261

Research Question

Which group was responsible for a higher proportion of crashes they were involved in, pedestrians or bicyclists?

Show code
ped_crashes |>
  summarise(
    prop_ped_at_fault = mean(STWD_VEHTYPE_AT_FAULT == "Pedestrian", na.rm = TRUE))
# A tibble: 1 × 1
  prop_ped_at_fault
              <dbl>
1             0.336
Show code
bike_crashes |>
  summarise(
    prop_bike_at_fault = mean(STWD_VEHTYPE_AT_FAULT == "Bicycle", na.rm = TRUE))
# A tibble: 1 × 1
  prop_bike_at_fault
               <dbl>
1              0.490

We see that pedestrians are at fault for 33.6% of the accidents that they are involved in, while bicyclists are at fault for 49.0% of the accidents that they are involved in. Therefore, bicyclists are more likely than pedestrians to be at fault for an accident that they are involved in.