overview

Row

Key metrics

43% vs 36%

Row

more insights

Row

---
title: "When the Lights Go Out: Road Safety in Victoria"
output:
  flexdashboard::flex_dashboard:
    orientation: rows
    vertical_layout: fill
    theme: cosmo
    source_code: embed
    navbar:
      - { text: "Data Source", href: "https://www.crimestatistics.vic.gov.au", icon: "fa-database" }
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(message = FALSE, warning = FALSE)
library(tidyverse)
library(janitor)
library(lubridate)
library(scales)
library(flexdashboard)
library(stringr)
```

```{r load_data, include=FALSE}
path <- "victorian_road_crash_data.csv"


crash <- read_csv(path, show_col_types = FALSE) |> 
  clean_names()

crash_clean <- crash |>
  mutate(

    severity = case_when(
      str_detect(str_to_lower(severity), "fatal") ~ "Fatal",
      str_detect(str_to_lower(severity), "serious") ~ "Serious injury",
      str_detect(str_to_lower(severity), "other") ~ "Other injury",
      str_detect(str_to_lower(severity), "non") ~ "Non-injury",
      TRUE ~ "Unknown"
    ),
    

    light_cond = case_when(
      str_detect(str_to_lower(light_condition), "day") ~ "Daylight",
      str_detect(str_to_lower(light_condition), "dusk|dawn|twilight") ~ "Dusk/Dawn",
      str_detect(str_to_lower(light_condition), "dark") ~ "Dark",
      TRUE ~ "Unknown"
    ),


    speed_zone_num = readr::parse_number(as.character(speed_zone)),
    speed_band = case_when(
      speed_zone_num <= 60 ~ "≤60 km/h",
      speed_zone_num %in% c(70, 75, 80) ~ "70–80 km/h",
      speed_zone_num %in% c(90, 100, 110) ~ "90–110 km/h",
      TRUE ~ "Other/Unknown"
    ),

    # extract year
    accident_date = suppressWarnings(dmy(accident_date)) %||%
                    suppressWarnings(ymd(accident_date)),
    year = year(accident_date)
  )
```

```{r kpi_prep, echo=FALSE}
# KPIs
kpi <- crash_clean %>%
  mutate(severe = severity %in% c("Fatal","Serious injury")) %>%
  filter(light_cond %in% c("Daylight","Dark")) %>%
  group_by(light_cond) %>%
  summarise(severe_share = mean(severe, na.rm = TRUE), .groups = "drop") %>%
  tidyr::pivot_wider(names_from = light_cond, values_from = severe_share) %>%
  mutate(
    Daylight = ifelse(is.na(Daylight), 0, Daylight),
    Dark     = ifelse(is.na(Dark),     0, Dark),
    Gap      = Dark - Daylight
  )
```

# overview

## Row {data-height="35"}

### Key metrics

```{r fig.width=10, fig.height=8}
valueBox("43% vs 36%", "Severe crashes: Night vs Day (+6% at night)", icon = "moon", color = "danger")

```

## Row {data-height="65"}

```{r fig.width=8, fig.height=10}
dn_tbl <- crash_clean |>
  filter(light_cond %in% c("Daylight","Dark")) |>
  count(light_cond, severity) |>
  group_by(light_cond) |>
  mutate(pct = n / sum(n)) |>
  ungroup() |>

  mutate(severity = factor(severity,
                           levels = c("Fatal","Serious injury","Other injury","Non-injury","Unknown")))

ggplot(dn_tbl, aes(light_cond, pct, fill = severity)) +
  geom_col(width = 0.7) +
  # % labels inside the stacks
  geom_text(aes(label = scales::percent(pct, accuracy = 1)),
            position = position_stack(vjust = 0.5), size = 4, color = "white") +
  scale_y_continuous(labels = scales::percent) +
  scale_fill_brewer(palette = "Set2", name = "Severity") +
  labs(
    title = "Crash Severity by Light Condition",
    x = NULL,
    y = "Share within Light Condition",
    caption = "Each bar sums to 100% within Daylight or Dark"
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "right", plot.caption = element_text(size = 10, color = "grey40"))



```

```{r}
crash_clean <- crash_clean |>
  mutate(
    speed_zone_num = readr::parse_number(as.character(speed_zone)),
    speed_band = case_when(
      speed_zone_num <= 60 ~ "<=60 km/h",
      speed_zone_num %in% c(70, 75, 80) ~ "70-80 km/h",
      speed_zone_num %in% c(90, 100, 110) ~ "90-110 km/h",
      TRUE ~ "Other/Unknown"
    )
  )
```

```{r fig.width=8, fig.height=10}
heat_tbl <- crash_clean |>
  mutate(severe = severity %in% c("Fatal","Serious injury")) |>
  filter(light_cond %in% c("Daylight","Dark"),
         !is.na(speed_band), speed_band != "Other/Unknown") |>
  count(speed_band, light_cond, severe) |>
  tidyr::pivot_wider(names_from = severe, values_from = n, values_fill = 0) |>
  mutate(total = `TRUE` + `FALSE`,
         severe_share = `TRUE` / total)

ggplot(heat_tbl, aes(x = light_cond, y = speed_band, fill = severe_share)) +
  geom_tile(color = "white", linewidth = 0.6) +
  geom_text(aes(label = scales::percent(severe_share, accuracy = 0.1)),
            size = 5, fontface = "bold") +
  scale_fill_gradient(low = "#ffffff", high = "#d7301f",
                      labels = scales::percent, name = "Severe share") +
  labs(
    title   = "Severe Crash Share by Light Condition and Speed Band",
    x       = "Light Condition",
    y       = "Speed Band",
    caption = "Severe = Fatal or Serious injury (cell = share within speed band x light condition)"
  ) +
  theme_minimal(base_size = 14) +
  theme(legend.position = "right",
        plot.caption = element_text(size = 10, color = "grey40"))
```

```{r , echo=FALSE}

crash_clean <- crash_clean %>%
  mutate(

    accident_date_dmy = suppressWarnings(lubridate::dmy(accident_date)),
    accident_date_ymd = suppressWarnings(lubridate::ymd(accident_date)),
    year_from_date    = coalesce(lubridate::year(accident_date_dmy),
                                 lubridate::year(accident_date_ymd)),

    year_from_accno   = suppressWarnings(as.integer(substr(accident_no, 2, 5))),
    year              = coalesce(year_from_date, year_from_accno)
  ) %>%

  filter(!is.na(year), between(year, 2000, 2100))

```

```{r fig.width=10.5, fig.height=10}

trend_tbl <- crash_clean %>%
  mutate(severe = severity %in% c("Fatal","Serious injury")) %>%
  filter(light_cond %in% c("Daylight","Dark")) %>%     # keep only Day vs Night
  group_by(year, light_cond) %>%
  summarise(severe_share = mean(severe, na.rm = TRUE), .groups = "drop")



ggplot(trend_tbl, aes(x = year, y = severe_share, color = light_cond, group = light_cond)) +
  geom_line(linewidth = 1.1) +
  geom_point(size = 2) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  scale_color_manual(values = c("Dark" = "red3", "Daylight" = "skyblue3")) +
  labs(
    title = "Trend of Severe Crashes Over Time",
    x = "Year",
    y = "Severe Crash Share (%)",
    color = "Light Condition"
  ) +
  theme_minimal(base_size = 13)

```

# more insights
## Row 

```{r fig.width=13, fig.height=10}
ggplot(crash_clean, aes(x = as.factor(speed_zone_num), fill = severity)) +
  geom_bar(position = "fill") +
  scale_y_continuous(labels = percent_format()) +
  labs(title = "Crash Severity by Speed Limit",
       x = "Speed Limit (km/h)", y = "Percentage within Speed Zone",
       fill = "Severity") +
  theme_minimal(base_size = 13)

```

```{r fig.width=13, fig.height=10}

time_tbl <- crash_clean %>%
  mutate(
    accident_time = parse_time(as.character(accident_time), na = c("", "NA")),
    hour = hour(accident_time),
    severe = severity %in% c("Fatal","Serious injury")
  ) %>%
  filter(!is.na(hour), light_cond %in% c("Daylight","Dark","Dusk/Dawn")) %>%
  group_by(hour, light_cond) %>%
  summarise(severe_share = mean(severe, na.rm = TRUE), .groups = "drop")

ggplot(time_tbl, aes(x = hour, y = severe_share, color = light_cond, group = light_cond)) +
  geom_line(linewidth = 1.1) +
  geom_point(size = 2) +
  scale_x_continuous(breaks = seq(0, 23, 3)) +
  scale_y_continuous(labels = percent_format(accuracy = 1)) +
  scale_color_manual(values = c("Dark" = "red3", "Daylight" = "skyblue3", "Dusk/Dawn" = "orange3")) +
  labs(
    title = "Hourly Pattern of Severe Crashes",
    x = "Hour of Day",
    y = "Severe Crash Share (%)",
    color = "Light Condition",
    caption = "Night-time (Dark) crashes show higher severe share between 6 PM and 2 AM."
  ) +
  theme_minimal(base_size = 13) +
  theme(plot.caption = element_text(size = 10, color = "grey40"))

```