R Markdown

# Family Violence in Victoria - Data Visualization Script
# Corrected script for your actual data structure

# Load required libraries
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(viridis)
## Loading required package: viridisLite
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:viridis':
## 
##     viridis_pal
library(htmlwidgets)

# Your actual data (replace this with your data loading code)
incidents_data <- data.frame(
  LGA = c("Brimbank", "Melton", "Hume", "Merri-bek", "Greater Shepparton", 
          "Casey", "Greater Geelong", "Greater Dandenong", "Wyndham", "Whittlesea",
          "Latrobe", "Frankston", "Mornington Peninsula", "Ballarat", "Mildura"),
  FinancialYear = rep(c("2019-20", "2020-21", "2021-22", "2022-23", "2023-24"), each = 15),
  PoliceRegion = c(rep("North West Metro", 2), "Western", "North West Metro", "Eastern",
                   "Southern Metro", "Western", "Southern Metro", "North West Metro", "Western",
                   "Eastern", "Southern Metro", "Southern Metro", "Eastern", "Western"),
  Incidents = c(1509, 3963, 2104, 4427, 4708, 323, 2688, 4473, 2802, 2337, 4788, 2321, 3420, 2906, 604,
                4509, 1306, 306, 1707, 4777, 4459, 3495, 3238, 4972, 3313, 3572, 2766, 3011, 1517, 821,
                4819, 4521, 3484, 3998, 221, 2441, 3816, 1160, 1659, 1235, 800, 2131, 2127, 1907, 847,
                780, 1242, 2383, 1403, 4303, 325, 2267, 4015, 697, 2849, 1112, 725, 3791, 4486, 1935,
                3359, 565, 1981, 1444, 4092, 2298, 4069, 4081, 3992, 2255, 3797, 3183, 3580, 103, 2429),
  Rate = c(1270.4, 1829.4, 2644.7, 1731.3, 889.0, 1352.7, 2838.2, 1961.8, 3258.7, 860.0, 2022.1, 3947.3, 3625.7, 3602.6, 1112.7,
           957.4, 2785.9, 1702.3, 2798.7, 1621.3, 1156.9, 3238.0, 827.6, 2133.7, 2290.3, 2600.0, 1664.9, 2210.1, 3840.7, 2190.2,
           3616.2, 3700.5, 2630.6, 1937.4, 1014.8, 3773.5, 1554.3, 712.5, 3817.0, 3022.1, 998.0, 2422.5, 3839.3, 2549.2, 1915.8,
           2767.6, 1619.4, 1577.0, 1269.2, 1793.2, 3944.8, 1039.7, 818.7, 996.7, 2915.0, 2667.4, 3619.9, 2855.5, 3079.8, 2324.0,
           2809.4, 3376.3, 3252.0, 3929.4, 2038.0, 1591.0, 1933.2, 536.6, 1143.5, 3449.6, 1309.1, 1336.8, 768.4, 1360.0, 3062.5)
)

abuse_data <- data.frame(
  LGA = rep(c("Brimbank", "Melton", "Hume", "Merri-bek", "Greater Shepparton", 
              "Casey", "Greater Geelong", "Greater Dandenong", "Wyndham", "Whittlesea",
              "Latrobe", "Frankston", "Mornington Peninsula", "Ballarat", "Mildura"), each = 30),
  FinancialYear = rep(rep(c("2019-20", "2020-21", "2021-22", "2022-23", "2023-24"), each = 6), 15),
  AbuseType = rep(c("Verbal", "Emotional", "Physical", "Sexual", "Economic", "Other"), 75),
  Count = c(1703, 821, 1580, 1929, 0, 0, 1020, 982, 68, 1560, 0, 0, 806, 1142, 1569, 457, 0, 0, 531, 1412, 1472, 652, 0, 0,
            267, 1836, 1279, 1944, 0, 0, 810, 1256, 988, 1191, 0, 0, 1165, 885, 355, 1534, 0, 0, 473, 1107, 66, 777, 0, 0,
            917, 164, 932, 1550, 0, 0, 475, 559, 1010, 1098, 0, 0, 1029, 824, 810, 1832, 0, 0, 740, 436, 956, 411, 0, 0,
            1317, 1672, 1441, 600, 0, 0, 781, 348, 158, 235, 0, 0, 743, 1617, 742, 460, 0, 0, 1091, 1116, 1615, 1955, 0, 0,
            1494, 1342, 1680, 628, 0, 0, 481, 385, 514, 1466, 0, 0, 855, 1284, 740, 1582, 0, 0, 568, 658, 1721, 256, 0, 0,
            1278, 1463, 1715, 517, 0, 0, 408, 828, 627, 578, 0, 0, 1734, 1940, 337, 247, 0, 0, 1506, 1936, 1423, 280, 0, 0,
            1353, 1467, 252, 1983, 0, 0, 1255, 552, 116, 0, 0, 0, 776, 482, 1999, 0, 0, 0, 1083, 1206, 118, 0, 0, 0, 1756, 572, 710, 0, 0, 0,
            1184, 1086, 1834, 0, 0, 0, 1688, 1581, 1254, 0, 0, 0, 659, 378, 608, 0, 0, 0, 1431, 839, 1489, 0, 0, 0, 567, 970, 1676, 0, 0, 0,
            1209, 1743, 663, 0, 0, 0, 989, 1855, 1011, 0, 0, 0, 567, 1770, 1410, 0, 0, 0, 1151, 1365, 1301, 0, 0, 0, 1831, 1903, 1306, 0, 0, 0,
            1809, 1057, 1957, 0, 0, 0, 585, 1174, 859, 0, 0, 0, 677, 706, 283, 0, 0, 0, 1972, 727, 1076, 0, 0, 0, 1259, 89, 489, 0, 0, 0,
            1878, 1030, 999, 0, 0, 0, 960, 1749, 772, 0, 0, 0, 843, 62, 1968, 0, 0, 0, 1335, 191, 807, 0, 0, 0, 347, 370, 497, 0, 0, 0,
            1167, 1552, 1265, 0, 0, 0, 516, 1484, 316, 0, 0, 0, 1927, 1945, 1937, 0, 0, 0, 1223, 960, 1054, 0, 0, 0, 1054, 195, 368, 0, 0, 0,
            835, 1315, 1263, 0, 0, 0, 1766, 1529, 1973, 0, 0, 0, 760, 317, 1354, 0, 0, 0, 612, 823, 867, 0, 0, 0, 383, 489, 681, 0, 0, 0,
            386, 163, 1679, 0, 0, 0, 990, 822, 330, 0, 0, 0, 543, 177, 426, 0, 0, 0, 472, 490, 1799, 0, 0, 0, 1365, 157, 651, 0, 0, 0,
            143, 1357, 758, 0, 0, 0, 1417, 631, 1579, 0, 0, 0, 736, 246, 427, 0, 0, 0, 847, 190, 85, 0, 0, 0, 1651, 1767, 843, 0, 0, 0,
            1842, 1521, 992, 0, 0, 0, 601, 1642, 873, 0, 0, 0, 1924, 1965, 718, 0, 0, 0, 1470, 252, 1740, 0, 0, 0, 1388, 243, 937, 0, 0, 0,
            153, 1608, 1091, 0, 0, 0)
)

# Remove rows with 0 counts for cleaner analysis
abuse_data <- abuse_data %>% filter(Count > 0)

# Create output directory
if (!dir.exists("visualizations")) {
  dir.create("visualizations")
}

# Visualization 1: Overall Trends
cat("Creating Visualization 1: Overall Trends...\n")
## Creating Visualization 1: Overall Trends...
trend_data <- incidents_data %>%
  group_by(FinancialYear) %>%
  summarise(TotalIncidents = sum(Incidents))

trend_plot <- ggplot(trend_data, aes(x = FinancialYear, y = TotalIncidents, group = 1)) +
  geom_line(color = "#E74C3C", size = 1.5) +
  geom_point(color = "#E74C3C", size = 3) +
  geom_text(aes(label = comma(TotalIncidents)), vjust = -1, size = 3) +
  labs(
    title = "Family Violence Incidents in Victoria (2019-2024)",
    subtitle = "Overall trend across all Local Government Areas",
    x = "Financial Year",
    y = "Number of Incidents",
    caption = "Data Source: Victoria Police"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    axis.text.x = element_text(angle = 45, hjust = 1)
  ) +
  scale_y_continuous(labels = comma)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(trend_plot)

ggsave("visualizations/overall_trends.png", plot = trend_plot, width = 10, height = 6, dpi = 300)

# Visualization 2: Top LGAs by Incident Rate (2023-24)
cat("Creating Visualization 2: Top LGAs by Incident Rate...\n")
## Creating Visualization 2: Top LGAs by Incident Rate...
top_lgas_data <- incidents_data %>%
  filter(FinancialYear == "2023-24") %>%
  arrange(desc(Rate)) %>%
  head(10)

top_lgas_plot <- ggplot(top_lgas_data, aes(x = Rate, y = reorder(LGA, Rate))) +
  geom_col(fill = "#3498DB", alpha = 0.8) +
  geom_text(aes(label = round(Rate)), hjust = -0.2, size = 3) +
  labs(
    title = "Top 10 LGAs by Family Violence Incident Rate (2023-24)",
    subtitle = "Rate per 100,000 population",
    x = "Rate per 100,000",
    y = "Local Government Area",
    caption = "Higher rates indicate more incidents relative to population size"
  ) +
  theme_minimal() +
  theme(plot.title = element_text(face = "bold")) +
  expand_limits(x = max(top_lgas_data$Rate) * 1.1)

print(top_lgas_plot)

ggsave("visualizations/top_lgas_rate.png", plot = top_lgas_plot, width = 10, height = 6, dpi = 300)

# Visualization 3: Abuse Type Distribution
cat("Creating Visualization 3: Abuse Type Distribution...\n")
## Creating Visualization 3: Abuse Type Distribution...
abuse_distribution_data <- abuse_data %>%
  filter(FinancialYear == "2023-24") %>%
  group_by(AbuseType) %>%
  summarise(TotalCount = sum(Count)) %>%
  mutate(Percentage = TotalCount / sum(TotalCount) * 100)

abuse_distribution <- ggplot(abuse_distribution_data, aes(x = "", y = TotalCount, fill = AbuseType)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0) +
  geom_text(aes(label = paste0(round(Percentage, 1), "%")), 
            position = position_stack(vjust = 0.5), size = 3) +
  labs(
    title = "Distribution of Family Violence Abuse Types (2023-24)",
    subtitle = "Percentage breakdown across Victoria",
    fill = "Abuse Type",
    caption = "Data: Victoria Police Family Violence Incidents"
  ) +
  theme_void() +
  theme(plot.title = element_text(face = "bold", hjust = 0.5),
        legend.position = "right") +
  scale_fill_viridis_d()

print(abuse_distribution)

ggsave("visualizations/abuse_distribution.png", plot = abuse_distribution, width = 10, height = 6, dpi = 300)

# Visualization 4: Abuse Type Trends Over Time
cat("Creating Visualization 4: Abuse Type Trends...\n")
## Creating Visualization 4: Abuse Type Trends...
abuse_trends_data <- abuse_data %>%
  group_by(FinancialYear, AbuseType) %>%
  summarise(TotalCount = sum(Count))
## `summarise()` has grouped output by 'FinancialYear'. You can override using the
## `.groups` argument.
abuse_trends_plot <- ggplot(abuse_trends_data, aes(x = FinancialYear, y = TotalCount, color = AbuseType, group = AbuseType)) +
  geom_line(size = 1.2) +
  geom_point(size = 2) +
  labs(
    title = "Family Violence Abuse Type Trends (2019-2024)",
    subtitle = "Evolution of different abuse types over time",
    x = "Financial Year",
    y = "Number of Incidents",
    color = "Abuse Type",
    caption = "Note: Multiple abuse types may be recorded per incident"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold"),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "bottom"
  ) +
  scale_y_continuous(labels = comma) +
  scale_color_viridis_d()

print(abuse_trends_plot)

ggsave("visualizations/abuse_trends.png", plot = abuse_trends_plot, width = 12, height = 6, dpi = 300)

# Visualization 5: Police Region Comparison
cat("Creating Visualization 5: Police Region Comparison...\n")
## Creating Visualization 5: Police Region Comparison...
region_comparison_data <- incidents_data %>%
  filter(FinancialYear == "2023-24") %>%
  group_by(PoliceRegion) %>%
  summarise(
    AvgRate = mean(Rate),
    TotalIncidents = sum(Incidents)
  )

region_comparison_plot <- ggplot(region_comparison_data, aes(x = AvgRate, y = reorder(PoliceRegion, AvgRate))) +
  geom_col(aes(fill = TotalIncidents), alpha = 0.8) +
  geom_text(aes(label = paste0(round(AvgRate), "/100k")), hjust = -0.2, size = 3) +
  labs(
    title = "Family Violence Rates by Police Region (2023-24)",
    subtitle = "Comparison across Victoria's police regions",
    x = "Average Rate per 100,000 Population",
    y = "Police Region",
    fill = "Total Incidents",
    caption = "Color intensity represents total number of incidents"
  ) +
  theme_minimal() +
  theme(plot.title = element_text(face = "bold")) +
  scale_fill_viridis_c(labels = comma) +
  expand_limits(x = max(region_comparison_data$AvgRate) * 1.1)

print(region_comparison_plot)

ggsave("visualizations/region_comparison.png", plot = region_comparison_plot, width = 10, height = 6, dpi = 300)

# Visualization 6: Heatmap of Top LGAs Over Time
cat("Creating Visualization 6: LGA Heatmap...\n")
## Creating Visualization 6: LGA Heatmap...
top_10_lgas <- incidents_data %>%
  filter(FinancialYear == "2023-24") %>%
  arrange(desc(Rate)) %>%
  head(10) %>%
  pull(LGA)

heatmap_data <- incidents_data %>%
  filter(LGA %in% top_10_lgas)

heatmap_plot <- ggplot(heatmap_data, aes(x = FinancialYear, y = reorder(LGA, Rate), fill = Rate)) +
  geom_tile(color = "white", size = 0.5) +
  geom_text(aes(label = round(Rate)), color = "white", fontface = "bold", size = 3) +
  labs(
    title = "Family Violence Incident Rates Heatmap",
    subtitle = "Top 10 LGAs with highest rates (2019-2024)",
    x = "Financial Year",
    y = "Local Government Area",
    fill = "Rate per 100k",
    caption = "Darker colors indicate higher incident rates"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold"),
    axis.text.x = element_text(angle = 45, hjust = 1)
  ) +
  scale_fill_viridis_c()

print(heatmap_plot)

ggsave("visualizations/lga_heatmap.png", plot = heatmap_plot, width = 10, height = 6, dpi = 300)

# Create interactive versions with plotly
cat("Creating interactive visualizations...\n")
## Creating interactive visualizations...
# Interactive trend plot
interactive_trend <- plot_ly(
  data = trend_data,
  x = ~FinancialYear,
  y = ~TotalIncidents,
  type = 'scatter',
  mode = 'lines+markers',
  line = list(color = '#E74C3C', width = 4),
  marker = list(color = '#E74C3C', size = 8),
  text = ~paste("Year:", FinancialYear, "<br>Incidents:", comma(TotalIncidents)),
  hoverinfo = 'text'
) %>%
  layout(
    title = "Family Violence Incidents in Victoria (2019-2024)",
    xaxis = list(title = "Financial Year"),
    yaxis = list(title = "Number of Incidents")
  )

# Save interactive plot
htmlwidgets::saveWidget(interactive_trend, "visualizations/interactive_trend.html")

# Print summary statistics
cat("\n=== SUMMARY STATISTICS ===\n")
## 
## === SUMMARY STATISTICS ===
cat("Time Period: 2019-20 to 2023-24\n")
## Time Period: 2019-20 to 2023-24
cat("Number of LGAs:", length(unique(incidents_data$LGA)), "\n")
## Number of LGAs: 15
total_incidents_2024 <- incidents_data %>%
  filter(FinancialYear == "2023-24") %>%
  summarise(Total = sum(Incidents)) %>%
  pull(Total)

cat("Total Incidents (2023-24):", comma(total_incidents_2024), "\n")
## Total Incidents (2023-24): 41,228
summary_stats <- incidents_data %>%
  group_by(FinancialYear) %>%
  summarise(
    TotalIncidents = sum(Incidents),
    AvgRate = mean(Rate)
  )

print(summary_stats)
## # A tibble: 5 × 3
##   FinancialYear TotalIncidents AvgRate
##   <chr>                  <dbl>   <dbl>
## 1 2019-20                43373   2196.
## 2 2020-21                43769   2135.
## 3 2021-22                35166   2500.
## 4 2022-23                32313   2219.
## 5 2023-24                41228   2126.
cat("\n=== VISUALIZATION FILES CREATED ===\n")
## 
## === VISUALIZATION FILES CREATED ===
cat("1. visualizations/overall_trends.png - Overall incident trends\n")
## 1. visualizations/overall_trends.png - Overall incident trends
cat("2. visualizations/top_lgas_rate.png - Top LGAs by incident rate\n")
## 2. visualizations/top_lgas_rate.png - Top LGAs by incident rate
cat("3. visualizations/abuse_distribution.png - Abuse type distribution\n")
## 3. visualizations/abuse_distribution.png - Abuse type distribution
cat("4. visualizations/abuse_trends.png - Abuse type trends over time\n")
## 4. visualizations/abuse_trends.png - Abuse type trends over time
cat("5. visualizations/region_comparison.png - Police region comparison\n")
## 5. visualizations/region_comparison.png - Police region comparison
cat("6. visualizations/lga_heatmap.png - LGA heatmap over time\n")
## 6. visualizations/lga_heatmap.png - LGA heatmap over time
cat("7. visualizations/interactive_trend.html - Interactive trend plot\n")
## 7. visualizations/interactive_trend.html - Interactive trend plot
cat("\nTo use these visualizations:\n")
## 
## To use these visualizations:
cat("1. All PNG files can be embedded in websites or reports\n")
## 1. All PNG files can be embedded in websites or reports
cat("2. The HTML file can be uploaded to any web server\n")
## 2. The HTML file can be uploaded to any web server
cat("3. Files are saved in the 'visualizations' folder\n")
## 3. Files are saved in the 'visualizations' folder
cat("\nScript completed successfully!\n")
## 
## Script completed successfully!