---
title: "Crime in Victoria 2014–2024: Where the Trend Breaks"
author: "Shainy Fernandes"
output:
flexdashboard::flex_dashboard:
storyboard: true
orientation: rows
theme: cosmo
social: menu
source_code: embed
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(message = FALSE, warning = FALSE)
suppressPackageStartupMessages({
library(tidyverse) # includes dplyr, ggplot2, readr, magrittr (%>%)
library(janitor)
library(plotly)
library(scales)
library(flexdashboard)
library(readxl)
library(forcats)
library(stringr)
})
# ---------------- Helpers (base-R safe) ----------------
# Find one file in data/ by stem; tolerate ".xlsx.xlsx"; prefer .xlsx then .csv
find_data_file <- function(stem){
if (!dir.exists("data")) return(NULL)
files <- list.files("data", full.names = TRUE)
if (!length(files)) return(NULL)
pat <- paste0(stem, ".*\\.(xlsx|csv)(\\.(xlsx|csv))?$")
hits <- files[grepl(pat, tolower(files))]
if (!length(hits)) return(NULL)
# prefer excel over csv
xhits <- hits[grepl("\\.xlsx(\\.|$)|\\.xlsx$", tolower(hits))]
chits <- hits[grepl("\\.csv(\\.|$)|\\.csv$", tolower(hits))]
hits2 <- unique(c(xhits, chits))
if (!length(hits2)) return(NULL)
hits2[1]
}
# Read sheet whose name contains "Table 02" (CSA layout); fallback to first
read_table02 <- function(path){
stopifnot(!is.null(path))
if (grepl("\\.csv(\\.|$)|\\.csv$", tolower(path))) {
df <- readr::read_csv(path, show_col_types = FALSE) %>% clean_names()
} else {
sh <- readxl::excel_sheets(path)
sh2 <- sh[grepl("table\\s*0*2", tolower(sh))]
target <- if (length(sh2)) sh2[1] else sh[1]
df <- readxl::read_excel(path, sheet = target) %>% clean_names()
}
df
}
# First column-name that matches any regex in `patterns`
pick_col <- function(df, patterns){
nms <- names(df)
for (p in patterns){
hit <- which(grepl(p, nms, ignore.case = TRUE))
if (length(hit)) return(nms[hit[1]])
}
NULL
}
# ---------------- Load + Prep (hidden) ----------------
counts_path <- find_data_file("csa_lga_offences_counts")
rates_path <- find_data_file("csa_lga_offences_rates")
counts_t02 <- tryCatch(read_table02(counts_path), error = function(e) NULL)
rates_t02 <- tryCatch(read_table02(rates_path), error = function(e) NULL)
data_ready <- !is.null(counts_t02) && !is.null(rates_t02)
if (data_ready){
# locate counts column (offences/incidents)
c_cnt <- pick_col(
counts_t02,
c("^offences?_recorded$", "^offences?$", "recorded.*offence",
"incident(s)?(_recorded)?$", "\\bcount(s)?\\b", "number.*offence", "incidents?")
)
if (is.null(c_cnt)) {
nms <- names(counts_t02)
cand <- which(!grepl("rate", nms, ignore.case=TRUE) &
grepl("offence|incident|count|number", nms, ignore.case=TRUE))
if (length(cand)) c_cnt <- nms[max(cand)]
}
# locate rate column
r_rate <- pick_col(
rates_t02,
c("rate_per_100000", "rate_per_100_000", "rate_per_100k", "^rate.?per.?100")
)
# If either critical column missing, stop early (clean message in Slide 1)
if (is.null(c_cnt) || is.null(r_rate)) {
data_ready <- FALSE
} else {
# harmonise counts (minimal keys robust to CSA variants)
canon_counts <- tibble(
year = as.integer(gsub("[^0-9]", "", as.character(counts_t02$year))),
lga = str_squish(as.character(counts_t02$local_government_area)),
offence_group = dplyr::coalesce(
as.character(counts_t02$offence_subdivision),
as.character(counts_t02$offence_subgroup),
as.character(counts_t02$offence_division)
),
offences = suppressWarnings(as.numeric(counts_t02[[c_cnt]]))
) %>%
filter(!is.na(lga), !is.na(year), !is.na(offences), !grepl("^total", tolower(lga)))
# harmonise rates
canon_rates <- tibble(
year = as.integer(gsub("[^0-9]", "", as.character(rates_t02$year))),
lga = str_squish(as.character(rates_t02$local_government_area)),
offence_group = dplyr::coalesce(
as.character(rates_t02$offence_subdivision),
as.character(rates_t02$offence_subgroup),
as.character(rates_t02$offence_division)
),
rate_per_100k = suppressWarnings(as.numeric(rates_t02[[r_rate]]))
) %>%
filter(!is.na(lga), !is.na(year), !is.na(rate_per_100k), !grepl("^total", tolower(lga)))
# join on lga + year + offence_group; fallback to lga+year if needed
crime <- suppressWarnings(
inner_join(canon_counts, canon_rates, by = c("year","lga","offence_group"))
)
if (nrow(crime) == 0L) {
crime <- inner_join(
select(canon_counts, year,lga,offences),
select(canon_rates, year,lga,rate_per_100k),
by = c("year","lga")
) %>% mutate(offence_group = "All offences")
}
# clamp to 2014..2024
min_year <- max(2014L, min(crime$year, na.rm=TRUE))
max_year <- min(2024L, max(crime$year, na.rm=TRUE))
crime <- crime %>% filter(year >= min_year, year <= max_year)
# statewide summary
statewide <- crime %>%
group_by(year) %>%
summarise(
total_offences = sum(offences, na.rm=TRUE),
avg_rate = weighted.mean(rate_per_100k, w = offences, na.rm=TRUE),
.groups = "drop"
)
baseline_year <- min_year
latest_year <- max_year
# LGA-year totals
lga_year_total <- crime %>%
group_by(lga, year) %>%
summarise(
rate = weighted.mean(rate_per_100k, w = offences, na.rm=TRUE),
offences = sum(offences, na.rm=TRUE),
.groups = "drop"
)
# LGA change since baseline
lga_change <- lga_year_total %>%
group_by(lga) %>%
summarise(
base = dplyr::first(rate[year==baseline_year], default = NA_real_),
latest = dplyr::first(rate[year==latest_year], default = NA_real_),
pct_change = (latest - base)/base,
.groups = "drop"
) %>%
filter(is.finite(pct_change))
# top/bottom movers
top10_up <- lga_change %>% slice_max(pct_change, n = 10, with_ties = FALSE)
top10_down <- lga_change %>% slice_min(pct_change, n = 10, with_ties = FALSE)
# default LGA for deep dive: highest latest rate
default_lga <- lga_year_total %>%
filter(year==latest_year) %>%
slice_max(rate, n=1) %>%
pull(lga)
}
}
```
### Slide 1 {.storyboard}
# Victoria at a glance
```{r}
if (!data_ready) {
cat("📂 Add these in `data/` then Knit:\n",
" - csa_lga_offences_counts.xlsx (Table 02)\n",
" - csa_lga_offences_rates.xlsx (Table 02)\n",
"\nIf your sheet names don’t contain 'Table 02', open the files and rename the relevant sheet to include 'Table 02'.")
} else {
sw_base <- dplyr::filter(statewide, year==baseline_year)
sw_latest <- dplyr::filter(statewide, year==latest_year)
valueBox(scales::comma(sw_latest$total_offences),
caption = paste0("Total offences (", latest_year, ")"))
valueBox(scales::comma(sw_latest$avg_rate, accuracy=0.1),
caption = paste0("Avg rate per 100k (", latest_year, ")"))
valueBox(scales::percent((sw_latest$avg_rate - sw_base$avg_rate)/sw_base$avg_rate, accuracy=0.1),
caption = paste0((latest_year - baseline_year), "-yr change vs ", baseline_year))
}
```
### Slide 2 {.storyboard}
# Statewide trend (rate per 100,000)
```{r}
if (data_ready) {
plot_ly(statewide, x=~year, y=~avg_rate, type="scatter", mode="lines+markers",
hovertemplate="Year %{x}<br>Rate %{y:.1f}<extra></extra>") %>%
layout(yaxis=list(title="Rate per 100,000"), xaxis=list(dtick=1))
} else {
cat("Waiting for data…")
}
```
### Slide 3 {.storyboard}
# Offence groups driving the trend (statewide)
```{r}
if (data_ready) {
grp <- crime %>%
group_by(year, offence_group) %>%
summarise(rate = stats::weighted.mean(rate_per_100k, w=offences, na.rm=TRUE),
.groups="drop")
plot_ly(grp, x=~year, y=~rate, color=~offence_group, type="scatter", mode="lines",
hovertemplate="Year %{x}<br>%{fullData.name}: %{y:.1f}<extra></extra>") %>%
layout(yaxis=list(title="Rate per 100,000"),
xaxis=list(dtick=1),
legend=list(orientation="h"))
} else {
cat("Waiting for data…")
}
```
### Slide 4 {.storyboard}
# LGAs with biggest shifts (baseline → latest)
```{r}
if (data_ready) {
p_up <- top10_up %>%
mutate(lga = forcats::fct_reorder(lga, pct_change)) %>%
ggplot(aes(lga, pct_change)) +
geom_col() + coord_flip() +
scale_y_continuous(labels=scales::percent) +
labs(x=NULL, y="% change", title="Largest increases")
p_down <- top10_down %>%
mutate(lga = forcats::fct_reorder(lga, pct_change)) %>%
ggplot(aes(lga, pct_change)) +
geom_col() + coord_flip() +
scale_y_continuous(labels=scales::percent) +
labs(x=NULL, y="% change", title="Largest decreases")
p_up; p_down
} else {
cat("Waiting for data…")
}
```
### Slide 5 {.storyboard}
# Deep dive: a single LGA (auto-chosen)
```{r}
if (data_ready) {
sel_lga <- default_lga
lga_ts <- dplyr::filter(lga_year_total, lga == sel_lga)
plot_ly(lga_ts, x=~year, y=~rate, type="scatter", mode="lines+markers",
hovertemplate=paste0(sel_lga, "<br>%{x}: %{y:.1f}<extra></extra>")) %>%
layout(yaxis=list(title="Rate per 100,000"),
xaxis=list(dtick=1),
title=paste0(sel_lga, " — overall rate"))
} else {
cat("Waiting for data…")
}
```
### Slide 6 {.storyboard}
# Caveats & notes
- Recorded offences depend on reporting, policing and definitions.
- Rates for small LGAs can swing a lot (small denominators).
- Focus on multi-year patterns; single-year jumps can reflect reporting/policing.
### Slide 7 {.storyboard}
# Sources
- Crime Statistics Agency (Victoria) — *Latest crime data by area*.
- Figures use **Table 02** from both workbooks (counts and rates).