Crime in Victoria 2014–2024: Where the Trend Breaks

---
title: "Crime in Victoria 2014–2024: Where the Trend Breaks"
author: "Shainy Fernandes"
output:
  flexdashboard::flex_dashboard:
    storyboard: true
    orientation: rows
    theme: cosmo
    social: menu
    source_code: embed
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(message = FALSE, warning = FALSE)

suppressPackageStartupMessages({
  library(tidyverse)   # includes dplyr, ggplot2, readr, magrittr (%>%)
  library(janitor)
  library(plotly)
  library(scales)
  library(flexdashboard)
  library(readxl)
  library(forcats)
  library(stringr)
})

# ---------------- Helpers (base-R safe) ----------------

# Find one file in data/ by stem; tolerate ".xlsx.xlsx"; prefer .xlsx then .csv
find_data_file <- function(stem){
  if (!dir.exists("data")) return(NULL)
  files <- list.files("data", full.names = TRUE)
  if (!length(files)) return(NULL)
  pat <- paste0(stem, ".*\\.(xlsx|csv)(\\.(xlsx|csv))?$")
  hits <- files[grepl(pat, tolower(files))]
  if (!length(hits)) return(NULL)
  # prefer excel over csv
  xhits <- hits[grepl("\\.xlsx(\\.|$)|\\.xlsx$", tolower(hits))]
  chits <- hits[grepl("\\.csv(\\.|$)|\\.csv$", tolower(hits))]
  hits2 <- unique(c(xhits, chits))
  if (!length(hits2)) return(NULL)
  hits2[1]
}

# Read sheet whose name contains "Table 02" (CSA layout); fallback to first
read_table02 <- function(path){
  stopifnot(!is.null(path))
  if (grepl("\\.csv(\\.|$)|\\.csv$", tolower(path))) {
    df <- readr::read_csv(path, show_col_types = FALSE) %>% clean_names()
  } else {
    sh  <- readxl::excel_sheets(path)
    sh2 <- sh[grepl("table\\s*0*2", tolower(sh))]
    target <- if (length(sh2)) sh2[1] else sh[1]
    df <- readxl::read_excel(path, sheet = target) %>% clean_names()
  }
  df
}

# First column-name that matches any regex in `patterns`
pick_col <- function(df, patterns){
  nms <- names(df)
  for (p in patterns){
    hit <- which(grepl(p, nms, ignore.case = TRUE))
    if (length(hit)) return(nms[hit[1]])
  }
  NULL
}

# ---------------- Load + Prep (hidden) ----------------

counts_path <- find_data_file("csa_lga_offences_counts")
rates_path  <- find_data_file("csa_lga_offences_rates")

counts_t02 <- tryCatch(read_table02(counts_path), error = function(e) NULL)
rates_t02  <- tryCatch(read_table02(rates_path),  error = function(e) NULL)

data_ready <- !is.null(counts_t02) && !is.null(rates_t02)

if (data_ready){

  # locate counts column (offences/incidents)
  c_cnt <- pick_col(
    counts_t02,
    c("^offences?_recorded$", "^offences?$", "recorded.*offence",
      "incident(s)?(_recorded)?$", "\\bcount(s)?\\b", "number.*offence", "incidents?")
  )
  if (is.null(c_cnt)) {
    nms  <- names(counts_t02)
    cand <- which(!grepl("rate", nms, ignore.case=TRUE) &
                    grepl("offence|incident|count|number", nms, ignore.case=TRUE))
    if (length(cand)) c_cnt <- nms[max(cand)]
  }

  # locate rate column
  r_rate <- pick_col(
    rates_t02,
    c("rate_per_100000", "rate_per_100_000", "rate_per_100k", "^rate.?per.?100")
  )

  # If either critical column missing, stop early (clean message in Slide 1)
  if (is.null(c_cnt) || is.null(r_rate)) {
    data_ready <- FALSE
  } else {

    # harmonise counts (minimal keys robust to CSA variants)
    canon_counts <- tibble(
      year          = as.integer(gsub("[^0-9]", "", as.character(counts_t02$year))),
      lga           = str_squish(as.character(counts_t02$local_government_area)),
      offence_group = dplyr::coalesce(
                        as.character(counts_t02$offence_subdivision),
                        as.character(counts_t02$offence_subgroup),
                        as.character(counts_t02$offence_division)
                      ),
      offences      = suppressWarnings(as.numeric(counts_t02[[c_cnt]]))
    ) %>%
      filter(!is.na(lga), !is.na(year), !is.na(offences), !grepl("^total", tolower(lga)))

    # harmonise rates
    canon_rates <- tibble(
      year          = as.integer(gsub("[^0-9]", "", as.character(rates_t02$year))),
      lga           = str_squish(as.character(rates_t02$local_government_area)),
      offence_group = dplyr::coalesce(
                        as.character(rates_t02$offence_subdivision),
                        as.character(rates_t02$offence_subgroup),
                        as.character(rates_t02$offence_division)
                      ),
      rate_per_100k = suppressWarnings(as.numeric(rates_t02[[r_rate]]))
    ) %>%
      filter(!is.na(lga), !is.na(year), !is.na(rate_per_100k), !grepl("^total", tolower(lga)))

    # join on lga + year + offence_group; fallback to lga+year if needed
    crime <- suppressWarnings(
      inner_join(canon_counts, canon_rates, by = c("year","lga","offence_group"))
    )
    if (nrow(crime) == 0L) {
      crime <- inner_join(
        select(canon_counts, year,lga,offences),
        select(canon_rates,  year,lga,rate_per_100k),
        by = c("year","lga")
      ) %>% mutate(offence_group = "All offences")
    }

    # clamp to 2014..2024
    min_year <- max(2014L, min(crime$year, na.rm=TRUE))
    max_year <- min(2024L, max(crime$year, na.rm=TRUE))
    crime <- crime %>% filter(year >= min_year, year <= max_year)

    # statewide summary
    statewide <- crime %>%
      group_by(year) %>%
      summarise(
        total_offences = sum(offences, na.rm=TRUE),
        avg_rate       = weighted.mean(rate_per_100k, w = offences, na.rm=TRUE),
        .groups = "drop"
      )

    baseline_year <- min_year
    latest_year   <- max_year

    # LGA-year totals
    lga_year_total <- crime %>%
      group_by(lga, year) %>%
      summarise(
        rate     = weighted.mean(rate_per_100k, w = offences, na.rm=TRUE),
        offences = sum(offences, na.rm=TRUE),
        .groups  = "drop"
      )

    # LGA change since baseline
    lga_change <- lga_year_total %>%
      group_by(lga) %>%
      summarise(
        base       = dplyr::first(rate[year==baseline_year], default = NA_real_),
        latest     = dplyr::first(rate[year==latest_year],   default = NA_real_),
        pct_change = (latest - base)/base,
        .groups    = "drop"
      ) %>%
      filter(is.finite(pct_change))

    # top/bottom movers
    top10_up   <- lga_change %>% slice_max(pct_change, n = 10, with_ties = FALSE)
    top10_down <- lga_change %>% slice_min(pct_change, n = 10, with_ties = FALSE)

    # default LGA for deep dive: highest latest rate
    default_lga <- lga_year_total %>%
      filter(year==latest_year) %>%
      slice_max(rate, n=1) %>%
      pull(lga)
  }
}
```

### Slide 1 {.storyboard}
# Victoria at a glance

```{r}
if (!data_ready) {
  cat("📂 Add these in `data/` then Knit:\n",
      " - csa_lga_offences_counts.xlsx (Table 02)\n",
      " - csa_lga_offences_rates.xlsx  (Table 02)\n",
      "\nIf your sheet names don’t contain 'Table 02', open the files and rename the relevant sheet to include 'Table 02'.")
} else {
  sw_base   <- dplyr::filter(statewide, year==baseline_year)
  sw_latest <- dplyr::filter(statewide, year==latest_year)

  valueBox(scales::comma(sw_latest$total_offences),
           caption = paste0("Total offences (", latest_year, ")"))

  valueBox(scales::comma(sw_latest$avg_rate, accuracy=0.1),
           caption = paste0("Avg rate per 100k (", latest_year, ")"))

  valueBox(scales::percent((sw_latest$avg_rate - sw_base$avg_rate)/sw_base$avg_rate, accuracy=0.1),
           caption = paste0((latest_year - baseline_year), "-yr change vs ", baseline_year))
}
```


### Slide 2 {.storyboard}
# Statewide trend (rate per 100,000) 

```{r}
if (data_ready) {
  plot_ly(statewide, x=~year, y=~avg_rate, type="scatter", mode="lines+markers",
          hovertemplate="Year %{x}<br>Rate %{y:.1f}<extra></extra>") %>%
    layout(yaxis=list(title="Rate per 100,000"), xaxis=list(dtick=1))
} else {
  cat("Waiting for data…")
}
```

### Slide 3 {.storyboard}
# Offence groups driving the trend (statewide)
```{r}
if (data_ready) {
  grp <- crime %>%
    group_by(year, offence_group) %>%
    summarise(rate = stats::weighted.mean(rate_per_100k, w=offences, na.rm=TRUE),
              .groups="drop")

  plot_ly(grp, x=~year, y=~rate, color=~offence_group, type="scatter", mode="lines",
          hovertemplate="Year %{x}<br>%{fullData.name}: %{y:.1f}<extra></extra>") %>%
    layout(yaxis=list(title="Rate per 100,000"),
           xaxis=list(dtick=1),
           legend=list(orientation="h"))
} else {
  cat("Waiting for data…")
}
```

### Slide 4 {.storyboard}
# LGAs with biggest shifts (baseline → latest)
```{r}
if (data_ready) {
  p_up <- top10_up %>%
    mutate(lga = forcats::fct_reorder(lga, pct_change)) %>%
    ggplot(aes(lga, pct_change)) +
    geom_col() + coord_flip() +
    scale_y_continuous(labels=scales::percent) +
    labs(x=NULL, y="% change", title="Largest increases")

  p_down <- top10_down %>%
    mutate(lga = forcats::fct_reorder(lga, pct_change)) %>%
    ggplot(aes(lga, pct_change)) +
    geom_col() + coord_flip() +
    scale_y_continuous(labels=scales::percent) +
    labs(x=NULL, y="% change", title="Largest decreases")

  p_up; p_down
} else {
  cat("Waiting for data…")
}
```

### Slide 5 {.storyboard}
# Deep dive: a single LGA (auto-chosen)
```{r}
if (data_ready) {
  sel_lga <- default_lga
  lga_ts  <- dplyr::filter(lga_year_total, lga == sel_lga)

  plot_ly(lga_ts, x=~year, y=~rate, type="scatter", mode="lines+markers",
          hovertemplate=paste0(sel_lga, "<br>%{x}: %{y:.1f}<extra></extra>")) %>%
    layout(yaxis=list(title="Rate per 100,000"),
           xaxis=list(dtick=1),
           title=paste0(sel_lga, " — overall rate"))
} else {
  cat("Waiting for data…")
}
```

### Slide 6 {.storyboard}
# Caveats & notes
- Recorded offences depend on reporting, policing and definitions.  
- Rates for small LGAs can swing a lot (small denominators).  
- Focus on multi-year patterns; single-year jumps can reflect reporting/policing.

### Slide 7 {.storyboard}
# Sources
- Crime Statistics Agency (Victoria) — *Latest crime data by area*.  
- Figures use **Table 02** from both workbooks (counts and rates).