---
title: "When the Lights Go Out: Road Safety in Victoria"
output:
flexdashboard::flex_dashboard:
orientation: rows
vertical_layout: fill
theme: cosmo
source_code: embed
navbar:
- { text: "Data Source", href: "https://www.crimestatistics.vic.gov.au", icon: "fa-database" }
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(message = FALSE, warning = FALSE)
library(tidyverse)
library(janitor)
library(lubridate)
library(scales)
library(flexdashboard)
library(stringr)
```
```{r load_data, include=FALSE}
path <- "victorian_road_crash_data.csv"
crash <- read_csv(path, show_col_types = FALSE) |>
clean_names()
crash_clean <- crash |>
mutate(
severity = case_when(
str_detect(str_to_lower(severity), "fatal") ~ "Fatal",
str_detect(str_to_lower(severity), "serious") ~ "Serious injury",
str_detect(str_to_lower(severity), "other") ~ "Other injury",
str_detect(str_to_lower(severity), "non") ~ "Non-injury",
TRUE ~ "Unknown"
),
light_cond = case_when(
str_detect(str_to_lower(light_condition), "day") ~ "Daylight",
str_detect(str_to_lower(light_condition), "dusk|dawn|twilight") ~ "Dusk/Dawn",
str_detect(str_to_lower(light_condition), "dark") ~ "Dark",
TRUE ~ "Unknown"
),
speed_zone_num = readr::parse_number(as.character(speed_zone)),
speed_band = case_when(
speed_zone_num <= 60 ~ "≤60 km/h",
speed_zone_num %in% c(70, 75, 80) ~ "70–80 km/h",
speed_zone_num %in% c(90, 100, 110) ~ "90–110 km/h",
TRUE ~ "Other/Unknown"
),
# extract year
accident_date = suppressWarnings(dmy(accident_date)) %||%
suppressWarnings(ymd(accident_date)),
year = year(accident_date)
)
```
```{r kpi_prep, echo=FALSE}
# KPIs
kpi <- crash_clean %>%
mutate(severe = severity %in% c("Fatal","Serious injury")) %>%
filter(light_cond %in% c("Daylight","Dark")) %>%
group_by(light_cond) %>%
summarise(severe_share = mean(severe, na.rm = TRUE), .groups = "drop") %>%
tidyr::pivot_wider(names_from = light_cond, values_from = severe_share) %>%
mutate(
Daylight = ifelse(is.na(Daylight), 0, Daylight),
Dark = ifelse(is.na(Dark), 0, Dark),
Gap = Dark - Daylight
)
```
# overview
## Row {data-height="35"}
### Key metrics
```{r fig.width=10, fig.height=8}
valueBox("43% vs 36%", "Severe crashes: Night vs Day (+6% at night)", icon = "moon", color = "danger")
```
## Row {data-height="65"}
```{r fig.width=8, fig.height=10}
dn_tbl <- crash_clean |>
filter(light_cond %in% c("Daylight","Dark")) |>
count(light_cond, severity) |>
group_by(light_cond) |>
mutate(pct = n / sum(n)) |>
ungroup() |>
mutate(severity = factor(severity,
levels = c("Fatal","Serious injury","Other injury","Non-injury","Unknown")))
ggplot(dn_tbl, aes(light_cond, pct, fill = severity)) +
geom_col(width = 0.7) +
# % labels inside the stacks
geom_text(aes(label = scales::percent(pct, accuracy = 1)),
position = position_stack(vjust = 0.5), size = 4, color = "white") +
scale_y_continuous(labels = scales::percent) +
scale_fill_brewer(palette = "Set2", name = "Severity") +
labs(
title = "Crash Severity by Light Condition",
x = NULL,
y = "Share within Light Condition",
caption = "Each bar sums to 100% within Daylight or Dark"
) +
theme_minimal(base_size = 13) +
theme(legend.position = "right", plot.caption = element_text(size = 10, color = "grey40"))
```
```{r}
crash_clean <- crash_clean |>
mutate(
speed_zone_num = readr::parse_number(as.character(speed_zone)),
speed_band = case_when(
speed_zone_num <= 60 ~ "<=60 km/h",
speed_zone_num %in% c(70, 75, 80) ~ "70-80 km/h",
speed_zone_num %in% c(90, 100, 110) ~ "90-110 km/h",
TRUE ~ "Other/Unknown"
)
)
```
```{r fig.width=8, fig.height=10}
heat_tbl <- crash_clean |>
mutate(severe = severity %in% c("Fatal","Serious injury")) |>
filter(light_cond %in% c("Daylight","Dark"),
!is.na(speed_band), speed_band != "Other/Unknown") |>
count(speed_band, light_cond, severe) |>
tidyr::pivot_wider(names_from = severe, values_from = n, values_fill = 0) |>
mutate(total = `TRUE` + `FALSE`,
severe_share = `TRUE` / total)
ggplot(heat_tbl, aes(x = light_cond, y = speed_band, fill = severe_share)) +
geom_tile(color = "white", linewidth = 0.6) +
geom_text(aes(label = scales::percent(severe_share, accuracy = 0.1)),
size = 5, fontface = "bold") +
scale_fill_gradient(low = "#ffffff", high = "#d7301f",
labels = scales::percent, name = "Severe share") +
labs(
title = "Severe Crash Share by Light Condition and Speed Band",
x = "Light Condition",
y = "Speed Band",
caption = "Severe = Fatal or Serious injury (cell = share within speed band x light condition)"
) +
theme_minimal(base_size = 14) +
theme(legend.position = "right",
plot.caption = element_text(size = 10, color = "grey40"))
```
```{r , echo=FALSE}
crash_clean <- crash_clean %>%
mutate(
accident_date_dmy = suppressWarnings(lubridate::dmy(accident_date)),
accident_date_ymd = suppressWarnings(lubridate::ymd(accident_date)),
year_from_date = coalesce(lubridate::year(accident_date_dmy),
lubridate::year(accident_date_ymd)),
year_from_accno = suppressWarnings(as.integer(substr(accident_no, 2, 5))),
year = coalesce(year_from_date, year_from_accno)
) %>%
filter(!is.na(year), between(year, 2000, 2100))
```
```{r fig.width=10.5, fig.height=10}
trend_tbl <- crash_clean %>%
mutate(severe = severity %in% c("Fatal","Serious injury")) %>%
filter(light_cond %in% c("Daylight","Dark")) %>% # keep only Day vs Night
group_by(year, light_cond) %>%
summarise(severe_share = mean(severe, na.rm = TRUE), .groups = "drop")
ggplot(trend_tbl, aes(x = year, y = severe_share, color = light_cond, group = light_cond)) +
geom_line(linewidth = 1.1) +
geom_point(size = 2) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
scale_color_manual(values = c("Dark" = "red3", "Daylight" = "skyblue3")) +
labs(
title = "Trend of Severe Crashes Over Time",
x = "Year",
y = "Severe Crash Share (%)",
color = "Light Condition"
) +
theme_minimal(base_size = 13)
```
# more insights
## Row
```{r fig.width=13, fig.height=10}
ggplot(crash_clean, aes(x = as.factor(speed_zone_num), fill = severity)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = percent_format()) +
labs(title = "Crash Severity by Speed Limit",
x = "Speed Limit (km/h)", y = "Percentage within Speed Zone",
fill = "Severity") +
theme_minimal(base_size = 13)
```
```{r fig.width=13, fig.height=10}
time_tbl <- crash_clean %>%
mutate(
accident_time = parse_time(as.character(accident_time), na = c("", "NA")),
hour = hour(accident_time),
severe = severity %in% c("Fatal","Serious injury")
) %>%
filter(!is.na(hour), light_cond %in% c("Daylight","Dark","Dusk/Dawn")) %>%
group_by(hour, light_cond) %>%
summarise(severe_share = mean(severe, na.rm = TRUE), .groups = "drop")
ggplot(time_tbl, aes(x = hour, y = severe_share, color = light_cond, group = light_cond)) +
geom_line(linewidth = 1.1) +
geom_point(size = 2) +
scale_x_continuous(breaks = seq(0, 23, 3)) +
scale_y_continuous(labels = percent_format(accuracy = 1)) +
scale_color_manual(values = c("Dark" = "red3", "Daylight" = "skyblue3", "Dusk/Dawn" = "orange3")) +
labs(
title = "Hourly Pattern of Severe Crashes",
x = "Hour of Day",
y = "Severe Crash Share (%)",
color = "Light Condition",
caption = "Night-time (Dark) crashes show higher severe share between 6 PM and 2 AM."
) +
theme_minimal(base_size = 13) +
theme(plot.caption = element_text(size = 10, color = "grey40"))
```