Click the Original, Code and Reconstruction tabs to read about the issues and how they were fixed.
Objective
The original data visualisation aims to compare the accident prone nature of roads belonging to various states of Australia based on the hours of a day. It tries to help citizens of the state interpret what times of a day are most dangerous.
The visualisation chosen had the following three main issues:
Reference
The following code was used to fix the issues identified in the original.
library(readr)
library(dplyr)
library(ggplot2)
library(tidyr)
crafatal <- read_csv("S:/shubh/STUDY MATERIAL/books/RMIT/SEM2/Data Visualisation/Assignment2/Data/CSVs/ardd_fatalities.csv")
#Renaming the columns
crafatal <- crafatal %>% rename(CrashID = `Crash ID`)
crafatal <- crafatal %>% rename(CrashType = `Crash Type`)
crafatal <- crafatal %>% rename(RoadUser = `Road User`)
crafatal <- crafatal %>% rename(AgeGroup = `Age Group`)
crafatal <- crafatal %>% rename(WeekDayvsWeekEnd = `Day of week`)
crafatal <- crafatal %>% rename(DayVsNight = `Time of day`)
#Selecting Required Columns
crafatal <- select(crafatal, CrashID, State, Time, AgeGroup, WeekDayvsWeekEnd)
#Checking unique values
crafatal$State %>% unique()
## [1] "NSW" "WA" "SA" "Qld" "Vic" "NT" "Tas" "ACT"
crafatal$AgeGroup %>% unique()
## [1] "26_to_39" "0_to_16" "40_to_64" "75_or_older" "17_to_25"
## [6] "65_to_74" "Unknown"
crafatal$WeekDayvsWeekEnd %>% unique()
## [1] "Weekend" "Weekday"
#Checking for null(-9) and missing values
crafatal %>% filter(State == -9)
## # A tibble: 0 x 5
## # ... with 5 variables: CrashID <dbl>, State <chr>, Time <time>,
## # AgeGroup <chr>, WeekDayvsWeekEnd <chr>
crafatal %>% filter(Time == -9)
## # A tibble: 0 x 5
## # ... with 5 variables: CrashID <dbl>, State <chr>, Time <time>,
## # AgeGroup <chr>, WeekDayvsWeekEnd <chr>
crafatal %>% filter(AgeGroup == -9)
## # A tibble: 0 x 5
## # ... with 5 variables: CrashID <dbl>, State <chr>, Time <time>,
## # AgeGroup <chr>, WeekDayvsWeekEnd <chr>
crafatal %>% filter(WeekDayvsWeekEnd == -9)
## # A tibble: 0 x 5
## # ... with 5 variables: CrashID <dbl>, State <chr>, Time <time>,
## # AgeGroup <chr>, WeekDayvsWeekEnd <chr>
crafatal %>% filter(is.na(State))
## # A tibble: 0 x 5
## # ... with 5 variables: CrashID <dbl>, State <chr>, Time <time>,
## # AgeGroup <chr>, WeekDayvsWeekEnd <chr>
crafatal %>% filter(is.na(Time))
## # A tibble: 40 x 5
## CrashID State Time AgeGroup WeekDayvsWeekEnd
## <dbl> <chr> <time> <chr> <chr>
## 1 20127002 NT NA 26_to_39 Weekday
## 2 20087036 NT NA 40_to_64 Weekday
## 3 20087041 NT NA 17_to_25 Weekday
## 4 20077047 NT NA 17_to_25 Weekday
## 5 20077047 NT NA Unknown Weekday
## 6 20057045 NT NA 40_to_64 Weekday
## 7 20057045 NT NA 40_to_64 Weekday
## 8 20057036 NT NA 65_to_74 Weekend
## 9 20052184 Vic NA 17_to_25 Weekend
## 10 20057028 NT NA 26_to_39 Weekday
## # ... with 30 more rows
#Removing null values from Time Column
crafatal <- crafatal[!(is.na(crafatal$Time)),]
#Splitting Time Column
crafatal <- crafatal %>% separate(Time, into = c("hour", "minute", "sec"), sep = ":")
#Changing datatype of hour column
typeof(crafatal$hour)
## [1] "character"
crafatal$hour <- as.integer(crafatal$hour)
typeof(crafatal$hour)
## [1] "integer"
#Creating a variable to distinguish times of the day.
crafatal$TimeOfDay<-ifelse(crafatal$hour>=2 & crafatal$hour<=6,"Dawn (2AM - 6AM)",
ifelse(crafatal$hour>=7 & crafatal$hour<=11,"Morning Peak (7AM - 11AM)",
ifelse(crafatal$hour>=12 & crafatal$hour<=15,"Daylight (12PM - 3PM)",
ifelse(crafatal$hour>=16 & crafatal$hour<=20,"Evening Peak (4PM - 8PM)","Night (9PM - 1AM)"
))))
#Renaming values
crafatal$State[crafatal$State == "Qld"] <- "QLD"
crafatal$State[crafatal$State == "Tas"] <- "TAS"
crafatal$State[crafatal$State == "Vic"] <- "VIC"
crafatal$State <- as.factor(crafatal$State)
levels(crafatal$State)
## [1] "ACT" "NSW" "NT" "QLD" "SA" "TAS" "VIC" "WA"
#Changing datatypes to factors.
crafatal$TimeOfDay <- crafatal$TimeOfDay %>% factor(levels = c("Dawn (2AM - 6AM)", "Morning Peak (7AM - 11AM)", "Daylight (12PM - 3PM)", "Evening Peak (4PM - 8PM)", "Night (9PM - 1AM)"), ordered = TRUE)
levels(crafatal$TimeOfDay)
## [1] "Dawn (2AM - 6AM)" "Morning Peak (7AM - 11AM)"
## [3] "Daylight (12PM - 3PM)" "Evening Peak (4PM - 8PM)"
## [5] "Night (9PM - 1AM)"
crafatal$WeekDayvsWeekEnd <-crafatal$WeekDayvsWeekEnd %>% factor(levels = c("Weekend", "Weekday"))
levels(crafatal$WeekDayvsWeekEnd)
## [1] "Weekend" "Weekday"
crafatal$AgeGroup <-crafatal$AgeGroup %>% factor(levels = c("0_to_16", "17_to_25", "26_to_39", "40_to_64", "65_to_74", "75_or_older", "Unknown"), ordered = TRUE)
levels(crafatal$AgeGroup)
## [1] "0_to_16" "17_to_25" "26_to_39" "40_to_64" "65_to_74"
## [6] "75_or_older" "Unknown"
#Plotting
p1 <- ggplot(crafatal, aes(State)) +
geom_bar(aes(fill = AgeGroup),) +
theme_bw() +
facet_grid(WeekDayvsWeekEnd ~ TimeOfDay) +
labs(title = "The most dangerous hours to be on roads of Australia.", y = "Number of fatalities", x = "State") +
theme(strip.text = element_text(size = 12, face = "bold.italic"),
axis.text.x=element_text(angle=45,hjust=1),
axis.text = element_text(size=12, face = "bold"),
axis.title = element_text(size=14, face = "bold"),
title = element_text(size = 16, face = "bold", colour = "black"),
axis.line = element_line(colour = "black"))
Data Reference
The following plot fixes the main issues in the original visualisation.