pkgs <- c("readr", "data.table", "dplyr", "tidyr", "DT", "reshape2", "tm", "stringr", "gsubfn", "lubridate",
          "ggplot2", "gridExtra", "highcharter", "plotly", "ggrepel", "leaflet", "leaflet.extras", "ggmap", 
          "RColorBrewer", "viridisLite", "countrycode", "ggmap", "zipcode") 

for (pkg in pkgs) {
                    if (! (pkg %in% rownames(installed.packages())))
                      { install.packages(pkg) }
                    require(pkg, character.only = TRUE)
                  }
rm(pkgs, pkg)

Introduction

This analysis provides statistics of accidents in Victoria, Australia from 2006 to current date.

Motivation for this kernel is to illustrate visualization capabilities.

Original dataset

  • The ACCIDENT database was downloaded from Department of Transport Open data, Victoria State Government Website[1]. The data is provided by VicRoads for educational and research purposes. The ACCIDENT database containes 12 tables.
#Load data 
#setwd("Desktop/RMIT Sem 2/Data Preprocessing/Ass3")
df <- read_csv("ACCIDENT.csv", 
            col_types = cols(`Accident Type Desc` = col_factor(levels = c("Collision with vehicle", "Struck Pedestrian", "Struck animal", "Collision with a fixed object", "collision with some other object", "Vehicle overturned (no collision)", "Fall from or in moving vehicle", "No collision and no object struck", "Other df")),
                            `Light Condition Desc` = col_factor(levels = c("Day", "Dusk/Dawn", "Dark Street lights on", "Dark Street lights off", "Dark Street lights unknown", "Dark No street lights", "Unknown"), ordered = TRUE)))

df$SEVERITY <- df$SEVERITY %>% factor(levels = c("1", "2", "3", "4"), labels=c("Low", "Mediam", "High", "Very High"), ordered = TRUE)
df <- df %>% mutate(DAY = day(dmy(ACCIDENTDATE)), MONTH = month(dmy(ACCIDENTDATE)),YEAR = year(dmy(ACCIDENTDATE)), HOUR = hour(hms(ACCIDENTTIME)))
df$MONTH <- month.abb[df$MONTH]

df$`Day Week Description` <- df$`Day Week Description` %>% factor(levels = c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"), ordered = TRUE)

df$MONTH <- df$MONTH  %>% factor(levels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"))

df$SPEED_ZONE <- df$SPEED_ZONE %>% factor(levels = c("040","050","060","075","080","090","100","110","777","888","999"), 
                                          labels = c("40 km/hr","50 km/hr","60 km/hr","75 km/hr","80 km/hr","90 km/hr","100 km/hr","110 km/hr","Other speed limit","Camping grounds, off road","Not known" ))

df$`DCA Description` <- df$`DCA Description` %>% factor(levels = c("RIGHT NEAR (INTERSECTIONS ONLY)", "FELL IN/FROM VEHICLE", "REAR END(VEHICLES IN SAME LANE)","RIGHT THROUGH","LEFT NEAR (INTERSECTIONS ONLY)","LEFT OFF CARRIAGEWAY INTO OBJECT/PARKED VEHICLE","U TURN","ANY MANOEUVRE INVOLVING PED NOT INCLUDED IN DCAs 100-108.","RIGHT OFF CARRIAGEWAY INTO OBJECT/PARKED VEHICLE","CROSS TRAFFIC(INTERSECTIONS ONLY)","PED WALKING WITH TRAFFIC","LEFT REAR","VEHICLE COLLIDES WITH VEHICLE PARKED ON LEFT OF ROAD","RIGHT TURN SIDESWIPE","TEMPORARY ROADWORKS","LANE CHANGE LEFT (NOT OVERTAKING)","OFF CARRIAGEWAY TO LEFT","OFF CARRIAGEWAY TO RIGHT","VEHICLE OFF FOOTPATH STRIKES VEH ON CARRIAGEWAY","LEAVING PARKING","RIGHT FAR (INTERSECTIONS ONLY)","PED NEAR SIDE. PED HIT BY VEHICLE FROM THE RIGHT.","OFF CARRIAGEWAY ON RIGHT BEND","OFF RIGHT BEND INTO OBJECT/PARKED VEHICLE","OFF END OF ROAD/T-INTERSECTION.","OUT OF CONTROL ON CARRIAGEWAY (ON STRAIGHT)","STRUCK OBJECT ON CARRIAGEWAY","LANE SIDE SWIPE (VEHICLES IN PARALLEL LANES)","OTHER ACCIDENTS-OFF STRAIGHT NOT INCLUDED IN DCAs 170-175","OUT OF CONTROL ON CARRIAGEWAY (ON BEND)","HEAD ON (NOT OVERTAKING)","REVERSING INTO FIXED OBJECT/PARKED VEHICLE","VEHICLE STRIKES ANOTHER VEH WHILE EMERGING FROM DRIVEWAY","TWO RIGHT TURNING (INTERSECTIONS ONLY)","LANE CHANGE RIGHT (NOT OVERTAKING)","OFF LEFT BEND INTO OBJECT/PARKED VEHICLE","OFF CARRIAGEWAY ON LEFT BEND","PULLING OUT (OVERTAKING)","ENTERING PARKING","LEFT TURN SIDESWIPE","RIGHT REAR.","OTHER ACCIDENTS ON CURVE NOT INCLUDED IN DCAs 180-184","OUT OF CONTROL (OVERTAKING)","STRUCK ANIMAL","FAR SIDE. PED HIT BY VEHICLE FROM THE LEFT","PED EMERGES FROM IN FRONT OF PARKED OR STATIONARY VEHICLE","HEAD ON(OVERTAKING)","VEHICLE STRIKES DOOR OF PARKED/STATIONARY VEHICLE","VEH STRIKES PED ON FOOTPATH/MEDIAN/TRAFFIC ISLAND.","PED PLAYING/LYING/WORKING/STANDING ON CARRIAGEWAY.","OTHER OPPOSING MANOEUVRES NOT INCLUDED IN DCAs 120-125.","LEFT FAR (INTERSECTIONS ONLY)","OTHER SAME DIRECTION-MANOUEVRES NOT INCLUDED IN DCAs 130-137","PED ON FOOTHPATH STRUCK BY VEHENTERING/LEAVING DRIVEWAY.","PED STRUCK WALKING TO/FROM OR BOARDING/ALIGHTING VEHICLE.","PERMANENT OBSTRUCTION ON CARRIAGEWAY", "RIGHT/LEFT. ONE VEH TURNING RIGHT THE OTHER LEFT.","U TURN INTO FIXED OBJECT/PARKED VEHICLE","PED WALKING AGAINST TRAFFIC.","REVERSING IN STREAM OF TRAFFIC","CUTTING IN (OVERTAKING)","LOAD OR MISSILE STRUCK VEHICLE"   ,"PARKED VEHICLES ONLY","OTHER ACCIDENTS NOT CLASSIFIABLE ELSEWHERE","OTHER ADJACENT (INTERSECTIONS ONLY)","OTHER OVERTAKING MANOEUVRES NOT INCLUDED IN DCAs 150-154","ACCIDENT OR BROKEN DOWN", "OTHER ON PATH" ,"OTHER MANOEUVRING NOT INCLUDED IN DCAs 140-148","UNKNOWN-NO DETAILS ON MANOEUVRES OF ROAD-USERS IN ACCIDENT","PULLING OUT -REAR END","RIGHT/LEFT FAR (INTERSECTIONS ONLY)","STRUCK TRAIN","LEFT/RIGHT FAR (INTERSECTIONS ONLY)","PARKED CAR RUN AWAY","TWO LEFT TURNING (INTERSECTIONS ONLY)","LEFT THROUGH" ,"STRUCK RAILWAY CROSSING FURNITURE","RIGHT/RIGHT BOTH VEHs FROM OPPOSITE DIRECTIONS TURNING RIGHT", "LEFT/LEFT. BOTH VEHs FROM OPPOSITE DIRECTIONS TURNING LEFT.","DOUBLE PARKED" ))

P <- read_csv("ACCIDENT/PERSON.csv")

P$SEX <- P$SEX %>% factor(levels = c("F", "M"), labels =c("Female", "Male"))

P$`Age Group`<- P$`Age Group`%>% factor(levels = c("0-4", "5-12", "13-15","16-17", "17-21", "22-25", "26-29", "30-39", "40-49", "50-59", "60-64", "64-69" , "70+", "unknown"), ordered = TRUE)

P$`Inj Level Desc` <- P$`Inj Level Desc` %>% factor(levels = c("Not injured", "Other injury", "Serious injury", "Fatality"), ordered = TRUE)
 
P$`Road User Type Desc` <- P$`Road User Type Desc` %>% factor(levels = c("Pedestrians", "Drivers", "Passengers", "Motorcyclists", "Pillion Passengers", "Bicyclists", "Unknown"), ordered = TRUE)


L <- L <- read_csv("ACCIDENT/ACCIDENT_LOCATION.csv")

L <- mutate(L, ROAD_ROUTE_1_Desc =cut(ROAD_ROUTE_1, 
                                breaks=c(1999, 3000, 4000, 5000, 6000, 8000, Inf), 
                                labels=c("Freeways or Highways","Forest Rds","Tourist Rds", "Main Rds", "Freeway ramps", "Unclassified Roads")))

L <- L %>% unite(Road_new , ROAD_NAME, ROAD_TYPE, sep = " ")
 

df %>% head(10) %>% datatable(style="bootstrap", class="table-condensed", extensions = 'Responsive',
                 options = list(dom = 'tp',scrollX = TRUE, pageLength = 5))

Visualizations:

Accidents by Time

By Year

By Month

By day of the week

Victims of the accidents

The good thing about this dataset is that most columns (including main source, secondary source, injury type, body part, event etc.) contains unique values without typos. This, of course, makes it easy to visualize the data much quickly.

Injury Level

Age groups

Gender

Accident type and the atmosphere

Accident Type

Accident causes

Light Condition

Locations of the accidents and speed zone

Most dangerous road types

20 Most dangerous roads

Speed Zones

References

[1]Vicroadsopendatastorehouse.vicroads.vic.gov.au, 2019. [Online]. Available: https://vicroadsopendatastorehouse.vicroads.vic.gov.au/opendata/Road_Safety/ACCIDENT.zip.

[2]“Crash Stats User Guide”, Data.vicroads.vic.gov.au, 2019. [Online]. Available: http://data.vicroads.vic.gov.au/metadata/crashstats_user_guide_and_appendices.pdf.