library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(scales)

filtered_data <- data.frame(
  Offence.Subgroup = c("B42 Steal from a motor vehicle", "B49 Other theft", "E21 Breach family violence order", 
                       "B21 Criminal damage", "B53 Obtain benefit by deception"),
  Total_Offences = c(557647, 459704, 442503, 372954, 286594),
  Percentage = c(10.9, 9, 8.6, 7.3, 5.6),
  Offence.Subdivision = c("Theft-related offences", "Theft-related offences", 
                          "Family violence-related breaches", "Property damage offences", "Deceptive behaviour (e.g., fraud)")
)

ggplot(filtered_data, aes(
  x = reorder(`Offence.Subgroup`, -Total_Offences),
  y = Total_Offences,
  fill = `Offence.Subdivision`
)) +
  geom_col(width = 0.6, colour = "black") +
  geom_text(
    aes(label = paste0(comma(Total_Offences), " (", round(Percentage, 1), "%)")),
    vjust = -0.5, size = 4.5, fontface = "bold", colour = "black"
  ) +
  annotate(
    "text", x = 1.35, y = 525000,  
    label = "Theft-related offences dominate\nwith 10.9% of total recorded crimes",
    size = 4.5, hjust = 0, fontface = "italic", colour = "#619CFF"
  ) +
  annotate(
    "text", x = 3.35, y = 425000,  
    label = "Family violence-related breaches\nare a major concern",
    size = 4.5, hjust = 0, fontface = "italic", colour = "#F564E3"
  ) +
  annotate(
    "text", x = 2.5, y = 150000,
    label = "Policy interventions could prioritise\nfamily violence breaches and theft.",
    size = 4.2, hjust = 0.5, colour = "darkblue", fontface = "italic"
  ) +
  scale_fill_manual(
    values = c(
      "Theft-related offences" = "#619CFF",
      "Family violence-related breaches" = "#F564E3",
      "Property damage offences" = "#F8766D",
      "Deceptive behaviour (e.g., fraud)" = "#00BA38"
    ),
    labels = c(
      "Theft-related offences",
      "Family violence-related breaches",
      "Property damage offences",
      "Deceptive behaviour (e.g., fraud)"
    ),
    breaks = c("Theft-related offences", "Family violence-related breaches", 
               "Property damage offences", "Deceptive behaviour (e.g., fraud)")
  ) +
  labs(
    title = "Key Offences in LGAs: Insights and Focus Areas (2024)",
    subtitle = "Top 5 offence subgroups by total recorded offences, highlighting key areas of concern for intervention",
    x = "Top Offence Subgroups (by Total Offences)",
    y = "Total Offences",
    fill = "Offence Subdivision (Key Categories)"
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
    plot.subtitle = element_text(hjust = 0.5, size = 12),
    axis.text.x = element_text(angle = 0, hjust = 0.5, vjust = 0.5, size = 11),
    axis.text.y = element_text(size = 12),
    axis.title.x = element_text(margin = margin(t = 15)),
    legend.position = "bottom",
    legend.title = element_text(size = 12),
    legend.text = element_text(size = 10),
    panel.grid.major.y = element_line(colour = "grey80"),
    panel.grid.minor.y = element_blank(),
    panel.grid.major.x = element_blank(),
    plot.margin = margin(20, 20, 20, 20)
  ) +
  scale_y_continuous(labels = scales::comma, breaks = seq(0, 600000, 100000))