library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
data <- data.frame(
  Year = c(2024, 2024, 2024, 2024, 2024, 2024, 2024, 2024, 2024, 2024),
  `Year ending` = c("June", "June", "June", "June", "June", "June", "June", "June", "June", "June"),
  `Local Government Area` = c("Alpine", "Alpine", "Alpine", "Ararat", "Ararat", "Ararat", 
                              "Ballarat", "Ballarat", "Ballarat", "Bendigo"),
  `Location Division` = c("1 Residential", "1 Residential", "1 Residential", "2 Community", 
                          "2 Community", "2 Community", "1 Residential", "1 Residential", 
                          "1 Residential", "2 Community"),
  `Location Subdivision` = c(
    "Private Dwellings", "Private Dwellings", "Private Dwellings", 
    "Justice Facilities", "Justice Facilities", "Justice Facilities", 
    "Private Dwellings", "Justice Facilities", "Private Dwellings", "Justice Facilities"
  ),
  `Location Group` = c(
    "Flat/Unit/Apartment", "House", "Caravan/Mobile Home", 
    "Court", "Other Justice Facility", "Other Justice (nec)", 
    "Flat/Unit/Apartment", "Court", "Caravan/Mobile Home", "Court"
  ),
  `Offence Count` = c(6, 227, 3, 48, 2, 1, 200, 50, 5, 60)
)

data <- data %>% clean_names()

summarised_data <- data %>%
  group_by(local_government_area, location_subdivision) %>%
  summarise(total_offence_count = sum(offence_count), .groups = "drop") %>%
  mutate(
    percentage = round((total_offence_count / sum(total_offence_count)) * 100, 1)
  )

top_5_lgas <- summarised_data %>%
  group_by(local_government_area) %>%
  summarise(total = sum(total_offence_count)) %>%
  top_n(5, total) %>%
  arrange(desc(total))

filtered_data <- summarised_data %>%
  filter(local_government_area %in% top_5_lgas$local_government_area) %>%
  arrange(match(local_government_area, top_5_lgas$local_government_area))

ggplot(filtered_data, aes(
  x = local_government_area,
  y = total_offence_count,
  fill = location_subdivision
)) +
  geom_bar(stat = "identity", width = 0.6, color = "black") +
  geom_text(aes(
    label = paste0(percentage, "%"),
    group = location_subdivision),
    position = position_stack(vjust = 0.5),
    size = 4.5
  ) +
  geom_text(
    data = top_5_lgas,
    aes(x = local_government_area, y = total + 6.5, label = paste0("Total: ", total)),
    inherit.aes = FALSE,
    size = 5,
    fontface = "bold"
  ) +
  scale_fill_manual(
    values = c("Private Dwellings" = "#FFA500", "Justice Facilities" = "#77DD77"),
    name = "Location Subdivision (Offence Categories)"
  ) +
  labs(
    title = "Top 5 LGAs by Total Offences: Private Dwellings vs Justice Facilities (2024)",
    subtitle = "Comparing total offence counts across the top 5 Local Government Areas, with offences grouped by location.",
    x = "Local Government Area (Ranked by Total Offences)",
    y = "Total Offence Count"
  ) +
  theme_minimal() +
  theme(
    legend.position = "top",
    legend.direction = "horizontal",
    legend.justification = "center",
    legend.box.spacing = unit(0.5, "cm"),
    legend.title = element_text(size = 12, face = "bold"),
    legend.text = element_text(size = 11),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 18, margin = margin(t = 20, b = 10)),
    plot.subtitle = element_text(hjust = 0.5, size = 14, margin = margin(t = 5, b = 15)),
    axis.text.x = element_text(size = 12),
    axis.text.y = element_text(size = 12),
    axis.title.x = element_text(size = 14, face = "bold", margin = margin(t = 10)),
    axis.title.y = element_text(size = 14, face = "bold", margin = margin(r = 10)),
    panel.grid.major.y = element_line(colour = "grey80"),
    panel.grid.minor = element_blank(),
    plot.margin = margin(20, 20, 20, 20)
  )