title: “Crime Rates vs Population — Victorian LGAs” author: “Mohamed Najih Anvar (S4045859)” date: “2025-10-29” output: revealjs::revealjs_presentation: self_contained: true transition: fade slide_level: 2 center: true —
Do larger Local Government Areas (LGAs) always have higher crime rates? Using official Victorian Recorded Offences (CSA, year ending June 2025) and Estimated Resident Population (ABS, 2024), I compare crime rates (per 100,000) across LGAs to test this assumption. The visual story shows that several mid-sized LGAs rank higher by rate than some large metropolitan areas, suggesting that population size alone does not explain crime risk.
Tip: Knit to HTML for interactive tooltips. If knitting to PDF, this document automatically shows static ggplots.
library(tidyverse)
library(readxl)
library(scales)
library(plotly)
library(stringr)
# IMPORTANT on Windows: use forward slashes (/) or double backslashes (\)
pop_path <- "C:/Users/nprad/Downloads/ABS_LGA_ERP_2001_2024.xlsx"
crime_path <- "C:/Users/nprad/Downloads/Data_Tables_LGA_Recorded_Offences_Year_Ending_June_2025.xlsx"
# If either file doesn't exist, let me choose it interactively
if (!file.exists(pop_path)) {
message("Select the ABS population file...")
pop_path <- file.choose()
}
if (!file.exists(crime_path)) {
message("Select the CSA offences file...")
crime_path <- file.choose()
}
stopifnot(file.exists(pop_path), file.exists(crime_path))
# -------- ABS: population by LGA (Table 1) --------
# In this workbook, the header row with years is the 6th row -> skip = 5
abs_raw <- suppressMessages(read_excel(pop_path, sheet = "Table 1", skip = 5))
# Column 2 is LGA name after import (first two headers are usually blank "...1", "...2")
lga_col <- names(abs_raw)[2]
# Find the "2024" column (sometimes imported as X2024)
year_cols <- names(abs_raw)[str_detect(names(abs_raw), "^(X)?\\d{4}$")]
if (length(year_cols) == 0) stop("ABS: Year-like columns not found after skip=5. Inspect names(read_excel(..., skip=5)).")
abs_year_col <- if ("2024" %in% year_cols) "2024" else if ("X2024" %in% year_cols) "X2024" else tail(year_cols, 1)
# Remove the 'units' row (often has 'no.'), totals and unincorporated
pop_data <- abs_raw %>%
filter(!tolower(as.character(.data[[abs_year_col]])) %in% c("no.", "no")) %>%
filter(!is.na(.data[[lga_col]]),
!str_detect(.data[[lga_col]], regex("Total|Unincorp", ignore_case = TRUE))) %>%
transmute(
lga = str_squish(as.character(.data[[lga_col]])),
population_2024 = suppressWarnings(as.numeric(.data[[abs_year_col]]))
)
# -------- CSA: recorded offences by LGA (Table 01) --------
csa_raw <- read_excel(crime_path, sheet = "Table 01")
lga2_col <- names(csa_raw)[str_detect(names(csa_raw), "^Local Government Area$")][1]
year_col <- names(csa_raw)[str_detect(names(csa_raw), "^Year$")][1]
count_col <- names(csa_raw)[str_detect(names(csa_raw), "^Offence Count$")][1]
rate_col <- names(csa_raw)[str_detect(names(csa_raw), "^Rate per 100,000 population$")][1]
if (any(is.na(c(lga2_col, year_col, count_col, rate_col)))) {
stop("CSA: Expected columns not found. Open the sheet to confirm headers.")
}
crime_year <- suppressWarnings(max(as.numeric(csa_raw[[year_col]]), na.rm = TRUE))
crime_data <- csa_raw %>%
filter(.data[[year_col]] == crime_year,
!is.na(.data[[lga2_col]]),
!str_detect(.data[[lga2_col]], regex("Total|Unincorporated Vic", ignore_case = TRUE))) %>%
transmute(
lga = str_squish(as.character(.data[[lga2_col]])),
offence_count = suppressWarnings(as.numeric(.data[[count_col]])),
crime_rate = suppressWarnings(as.numeric(.data[[rate_col]]))
)
# Align to VIC LGAs by intersecting names
pop_data <- pop_data %>% filter(lga %in% unique(crime_data$lga))
# Merge
merged_data <- pop_data %>%
left_join(crime_data, by = "lga") %>%
filter(!is.na(offence_count), !is.na(population_2024), population_2024 > 0) %>%
mutate(crime_rate = replace_na(crime_rate, 0)) %>%
arrange(desc(crime_rate))
# Quick checks
list(
abs_rows = nrow(pop_data),
csa_rows = nrow(crime_data),
merged_rows = nrow(merged_data),
sample_lgas = head(merged_data$lga, 5)
)
## $abs_rows
## [1] 75
##
## $csa_rows
## [1] 80
##
## $merged_rows
## [1] 75
##
## $sample_lgas
## [1] "Melbourne" "Yarra" "Greater Shepparton"
## [4] "Mildura" "Ararat"
top_10 <- merged_data %>% slice_head(n = 10)
p1 <- ggplot(top_10, aes(x = reorder(lga, -crime_rate), y = crime_rate)) +
geom_bar(stat = "identity") +
labs(title = "Top-10 Victorian LGAs by recorded offence rate",
subtitle = paste0("Year ending June ", crime_year, " (per 100,000 population)"),
x = "LGA", y = "Crime rate (per 100,000)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
if (knitr::is_html_output()) {
p1 <- plotly::ggplotly(
p1 + aes(text = paste0("LGA: ", lga,
"<br>Rate: ", round(crime_rate, 1),
"<br>Offences: ", scales::comma(offence_count))),
tooltip = "text"
)
}
p1
p2 <- ggplot(merged_data, aes(x = population_2024, y = crime_rate)) +
geom_point(alpha = 0.85) +
labs(title = "Population vs recorded offence rate by LGA",
subtitle = paste0("ERP 2024 (ABS) vs rate per 100,000 (CSA ", crime_year, ")"),
x = "Population (ERP 2024)", y = "Crime rate (per 100,000)") +
scale_x_continuous(labels = scales::label_number(scale_cut = scales::cut_short_scale())) +
theme_minimal()
if (knitr::is_html_output()) {
p2 <- plotly::ggplotly(
p2 + aes(text = paste0("LGA: ", lga,
"<br>Population: ", scales::comma(population_2024),
"<br>Rate: ", round(crime_rate, 1),
"<br>Offences: ", scales::comma(offence_count))),
tooltip = "text"
)
}
p2
Population size alone does not explain crime risk. Focus on rate outliers for targeted, localised prevention and resourcing.
Australian Bureau of Statistics. (2024). Regional population, 2023-24: Estimated resident population by Local Government Area (ERP by LGA) [Data set]. https://www.abs.gov.au/ Crime Statistics Agency Victoria. (2025). Recorded offences by Local Government Area - Year ending June 2025 [Data set]. https://www.crimestatistics.vic.gov.au/