title: “Crime Rates vs Population — Victorian LGAs” author: “Mohamed Najih Anvar (S4045859)” date: “2025-10-29” output: revealjs::revealjs_presentation: self_contained: true transition: fade slide_level: 2 center: true —

Abstract

Do larger Local Government Areas (LGAs) always have higher crime rates? Using official Victorian Recorded Offences (CSA, year ending June 2025) and Estimated Resident Population (ABS, 2024), I compare crime rates (per 100,000) across LGAs to test this assumption. The visual story shows that several mid-sized LGAs rank higher by rate than some large metropolitan areas, suggesting that population size alone does not explain crime risk.

Data & Method (1-minute read)

Tip: Knit to HTML for interactive tooltips. If knitting to PDF, this document automatically shows static ggplots.

Load libraries

library(tidyverse)
library(readxl)
library(scales)
library(plotly)
library(stringr)

File paths (edit if stored elsewhere)

# IMPORTANT on Windows: use forward slashes (/) or double backslashes (\)
pop_path   <- "C:/Users/nprad/Downloads/ABS_LGA_ERP_2001_2024.xlsx"
crime_path <- "C:/Users/nprad/Downloads/Data_Tables_LGA_Recorded_Offences_Year_Ending_June_2025.xlsx"

# If either file doesn't exist, let me choose it interactively
if (!file.exists(pop_path)) {
  message("Select the ABS population file...")
  pop_path <- file.choose()
}
if (!file.exists(crime_path)) {
  message("Select the CSA offences file...")
  crime_path <- file.choose()
}

stopifnot(file.exists(pop_path), file.exists(crime_path))

Read & prepare data

# -------- ABS: population by LGA (Table 1) --------
# In this workbook, the header row with years is the 6th row -> skip = 5
abs_raw <- suppressMessages(read_excel(pop_path, sheet = "Table 1", skip = 5))

# Column 2 is LGA name after import (first two headers are usually blank "...1", "...2")
lga_col <- names(abs_raw)[2]

# Find the "2024" column (sometimes imported as X2024)
year_cols <- names(abs_raw)[str_detect(names(abs_raw), "^(X)?\\d{4}$")]
if (length(year_cols) == 0) stop("ABS: Year-like columns not found after skip=5. Inspect names(read_excel(..., skip=5)).")

abs_year_col <- if ("2024" %in% year_cols) "2024" else if ("X2024" %in% year_cols) "X2024" else tail(year_cols, 1)

# Remove the 'units' row (often has 'no.'), totals and unincorporated
pop_data <- abs_raw %>%
  filter(!tolower(as.character(.data[[abs_year_col]])) %in% c("no.", "no")) %>%
  filter(!is.na(.data[[lga_col]]),
         !str_detect(.data[[lga_col]], regex("Total|Unincorp", ignore_case = TRUE))) %>%
  transmute(
    lga = str_squish(as.character(.data[[lga_col]])),
    population_2024 = suppressWarnings(as.numeric(.data[[abs_year_col]]))
  )

# -------- CSA: recorded offences by LGA (Table 01) --------
csa_raw <- read_excel(crime_path, sheet = "Table 01")

lga2_col  <- names(csa_raw)[str_detect(names(csa_raw), "^Local Government Area$")][1]
year_col  <- names(csa_raw)[str_detect(names(csa_raw), "^Year$")][1]
count_col <- names(csa_raw)[str_detect(names(csa_raw), "^Offence Count$")][1]
rate_col  <- names(csa_raw)[str_detect(names(csa_raw), "^Rate per 100,000 population$")][1]

if (any(is.na(c(lga2_col, year_col, count_col, rate_col)))) {
  stop("CSA: Expected columns not found. Open the sheet to confirm headers.")
}

crime_year <- suppressWarnings(max(as.numeric(csa_raw[[year_col]]), na.rm = TRUE))

crime_data <- csa_raw %>%
  filter(.data[[year_col]] == crime_year,
         !is.na(.data[[lga2_col]]),
         !str_detect(.data[[lga2_col]], regex("Total|Unincorporated Vic", ignore_case = TRUE))) %>%
  transmute(
    lga = str_squish(as.character(.data[[lga2_col]])),
    offence_count = suppressWarnings(as.numeric(.data[[count_col]])),
    crime_rate    = suppressWarnings(as.numeric(.data[[rate_col]]))
  )

# Align to VIC LGAs by intersecting names
pop_data <- pop_data %>% filter(lga %in% unique(crime_data$lga))

# Merge
merged_data <- pop_data %>%
  left_join(crime_data, by = "lga") %>%
  filter(!is.na(offence_count), !is.na(population_2024), population_2024 > 0) %>%
  mutate(crime_rate = replace_na(crime_rate, 0)) %>%
  arrange(desc(crime_rate))

# Quick checks
list(
  abs_rows = nrow(pop_data),
  csa_rows = nrow(crime_data),
  merged_rows = nrow(merged_data),
  sample_lgas = head(merged_data$lga, 5)
)
## $abs_rows
## [1] 75
## 
## $csa_rows
## [1] 80
## 
## $merged_rows
## [1] 75
## 
## $sample_lgas
## [1] "Melbourne"          "Yarra"              "Greater Shepparton"
## [4] "Mildura"            "Ararat"

Findings (at a glance)

Visual 1 - Top-10 LGAs by crime rate

top_10 <- merged_data %>% slice_head(n = 10)

p1 <- ggplot(top_10, aes(x = reorder(lga, -crime_rate), y = crime_rate)) +
  geom_bar(stat = "identity") +
  labs(title = "Top-10 Victorian LGAs by recorded offence rate",
       subtitle = paste0("Year ending June ", crime_year, " (per 100,000 population)"),
       x = "LGA", y = "Crime rate (per 100,000)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

if (knitr::is_html_output()) {
  p1 <- plotly::ggplotly(
    p1 + aes(text = paste0("LGA: ", lga,
                           "<br>Rate: ", round(crime_rate, 1),
                           "<br>Offences: ", scales::comma(offence_count))),
    tooltip = "text"
  )
}
p1

Visual 2 - Population vs crime rate

p2 <- ggplot(merged_data, aes(x = population_2024, y = crime_rate)) +
  geom_point(alpha = 0.85) +
  labs(title = "Population vs recorded offence rate by LGA",
       subtitle = paste0("ERP 2024 (ABS) vs rate per 100,000 (CSA ", crime_year, ")"),
       x = "Population (ERP 2024)", y = "Crime rate (per 100,000)") +
  scale_x_continuous(labels = scales::label_number(scale_cut = scales::cut_short_scale())) +
  theme_minimal()

if (knitr::is_html_output()) {
  p2 <- plotly::ggplotly(
    p2 + aes(text = paste0("LGA: ", lga,
                           "<br>Population: ", scales::comma(population_2024),
                           "<br>Rate: ", round(crime_rate, 1),
                           "<br>Offences: ", scales::comma(offence_count))),
    tooltip = "text"
  )
}
p2

Limitations

Conclusion

Population size alone does not explain crime risk. Focus on rate outliers for targeted, localised prevention and resourcing.

References (APA 7th)

Australian Bureau of Statistics. (2024). Regional population, 2023-24: Estimated resident population by Local Government Area (ERP by LGA) [Data set]. https://www.abs.gov.au/ Crime Statistics Agency Victoria. (2025). Recorded offences by Local Government Area - Year ending June 2025 [Data set]. https://www.crimestatistics.vic.gov.au/