R Markdown
# Family Violence in Victoria - Data Visualization Script
# Corrected script for your actual data structure
# Load required libraries
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(viridis)
## Loading required package: viridisLite
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:viridis':
##
## viridis_pal
library(htmlwidgets)
# Your actual data (replace this with your data loading code)
incidents_data <- data.frame(
LGA = c("Brimbank", "Melton", "Hume", "Merri-bek", "Greater Shepparton",
"Casey", "Greater Geelong", "Greater Dandenong", "Wyndham", "Whittlesea",
"Latrobe", "Frankston", "Mornington Peninsula", "Ballarat", "Mildura"),
FinancialYear = rep(c("2019-20", "2020-21", "2021-22", "2022-23", "2023-24"), each = 15),
PoliceRegion = c(rep("North West Metro", 2), "Western", "North West Metro", "Eastern",
"Southern Metro", "Western", "Southern Metro", "North West Metro", "Western",
"Eastern", "Southern Metro", "Southern Metro", "Eastern", "Western"),
Incidents = c(1509, 3963, 2104, 4427, 4708, 323, 2688, 4473, 2802, 2337, 4788, 2321, 3420, 2906, 604,
4509, 1306, 306, 1707, 4777, 4459, 3495, 3238, 4972, 3313, 3572, 2766, 3011, 1517, 821,
4819, 4521, 3484, 3998, 221, 2441, 3816, 1160, 1659, 1235, 800, 2131, 2127, 1907, 847,
780, 1242, 2383, 1403, 4303, 325, 2267, 4015, 697, 2849, 1112, 725, 3791, 4486, 1935,
3359, 565, 1981, 1444, 4092, 2298, 4069, 4081, 3992, 2255, 3797, 3183, 3580, 103, 2429),
Rate = c(1270.4, 1829.4, 2644.7, 1731.3, 889.0, 1352.7, 2838.2, 1961.8, 3258.7, 860.0, 2022.1, 3947.3, 3625.7, 3602.6, 1112.7,
957.4, 2785.9, 1702.3, 2798.7, 1621.3, 1156.9, 3238.0, 827.6, 2133.7, 2290.3, 2600.0, 1664.9, 2210.1, 3840.7, 2190.2,
3616.2, 3700.5, 2630.6, 1937.4, 1014.8, 3773.5, 1554.3, 712.5, 3817.0, 3022.1, 998.0, 2422.5, 3839.3, 2549.2, 1915.8,
2767.6, 1619.4, 1577.0, 1269.2, 1793.2, 3944.8, 1039.7, 818.7, 996.7, 2915.0, 2667.4, 3619.9, 2855.5, 3079.8, 2324.0,
2809.4, 3376.3, 3252.0, 3929.4, 2038.0, 1591.0, 1933.2, 536.6, 1143.5, 3449.6, 1309.1, 1336.8, 768.4, 1360.0, 3062.5)
)
abuse_data <- data.frame(
LGA = rep(c("Brimbank", "Melton", "Hume", "Merri-bek", "Greater Shepparton",
"Casey", "Greater Geelong", "Greater Dandenong", "Wyndham", "Whittlesea",
"Latrobe", "Frankston", "Mornington Peninsula", "Ballarat", "Mildura"), each = 30),
FinancialYear = rep(rep(c("2019-20", "2020-21", "2021-22", "2022-23", "2023-24"), each = 6), 15),
AbuseType = rep(c("Verbal", "Emotional", "Physical", "Sexual", "Economic", "Other"), 75),
Count = c(1703, 821, 1580, 1929, 0, 0, 1020, 982, 68, 1560, 0, 0, 806, 1142, 1569, 457, 0, 0, 531, 1412, 1472, 652, 0, 0,
267, 1836, 1279, 1944, 0, 0, 810, 1256, 988, 1191, 0, 0, 1165, 885, 355, 1534, 0, 0, 473, 1107, 66, 777, 0, 0,
917, 164, 932, 1550, 0, 0, 475, 559, 1010, 1098, 0, 0, 1029, 824, 810, 1832, 0, 0, 740, 436, 956, 411, 0, 0,
1317, 1672, 1441, 600, 0, 0, 781, 348, 158, 235, 0, 0, 743, 1617, 742, 460, 0, 0, 1091, 1116, 1615, 1955, 0, 0,
1494, 1342, 1680, 628, 0, 0, 481, 385, 514, 1466, 0, 0, 855, 1284, 740, 1582, 0, 0, 568, 658, 1721, 256, 0, 0,
1278, 1463, 1715, 517, 0, 0, 408, 828, 627, 578, 0, 0, 1734, 1940, 337, 247, 0, 0, 1506, 1936, 1423, 280, 0, 0,
1353, 1467, 252, 1983, 0, 0, 1255, 552, 116, 0, 0, 0, 776, 482, 1999, 0, 0, 0, 1083, 1206, 118, 0, 0, 0, 1756, 572, 710, 0, 0, 0,
1184, 1086, 1834, 0, 0, 0, 1688, 1581, 1254, 0, 0, 0, 659, 378, 608, 0, 0, 0, 1431, 839, 1489, 0, 0, 0, 567, 970, 1676, 0, 0, 0,
1209, 1743, 663, 0, 0, 0, 989, 1855, 1011, 0, 0, 0, 567, 1770, 1410, 0, 0, 0, 1151, 1365, 1301, 0, 0, 0, 1831, 1903, 1306, 0, 0, 0,
1809, 1057, 1957, 0, 0, 0, 585, 1174, 859, 0, 0, 0, 677, 706, 283, 0, 0, 0, 1972, 727, 1076, 0, 0, 0, 1259, 89, 489, 0, 0, 0,
1878, 1030, 999, 0, 0, 0, 960, 1749, 772, 0, 0, 0, 843, 62, 1968, 0, 0, 0, 1335, 191, 807, 0, 0, 0, 347, 370, 497, 0, 0, 0,
1167, 1552, 1265, 0, 0, 0, 516, 1484, 316, 0, 0, 0, 1927, 1945, 1937, 0, 0, 0, 1223, 960, 1054, 0, 0, 0, 1054, 195, 368, 0, 0, 0,
835, 1315, 1263, 0, 0, 0, 1766, 1529, 1973, 0, 0, 0, 760, 317, 1354, 0, 0, 0, 612, 823, 867, 0, 0, 0, 383, 489, 681, 0, 0, 0,
386, 163, 1679, 0, 0, 0, 990, 822, 330, 0, 0, 0, 543, 177, 426, 0, 0, 0, 472, 490, 1799, 0, 0, 0, 1365, 157, 651, 0, 0, 0,
143, 1357, 758, 0, 0, 0, 1417, 631, 1579, 0, 0, 0, 736, 246, 427, 0, 0, 0, 847, 190, 85, 0, 0, 0, 1651, 1767, 843, 0, 0, 0,
1842, 1521, 992, 0, 0, 0, 601, 1642, 873, 0, 0, 0, 1924, 1965, 718, 0, 0, 0, 1470, 252, 1740, 0, 0, 0, 1388, 243, 937, 0, 0, 0,
153, 1608, 1091, 0, 0, 0)
)
# Remove rows with 0 counts for cleaner analysis
abuse_data <- abuse_data %>% filter(Count > 0)
# Create output directory
if (!dir.exists("visualizations")) {
dir.create("visualizations")
}
# Visualization 1: Overall Trends
cat("Creating Visualization 1: Overall Trends...\n")
## Creating Visualization 1: Overall Trends...
trend_data <- incidents_data %>%
group_by(FinancialYear) %>%
summarise(TotalIncidents = sum(Incidents))
trend_plot <- ggplot(trend_data, aes(x = FinancialYear, y = TotalIncidents, group = 1)) +
geom_line(color = "#E74C3C", size = 1.5) +
geom_point(color = "#E74C3C", size = 3) +
geom_text(aes(label = comma(TotalIncidents)), vjust = -1, size = 3) +
labs(
title = "Family Violence Incidents in Victoria (2019-2024)",
subtitle = "Overall trend across all Local Government Areas",
x = "Financial Year",
y = "Number of Incidents",
caption = "Data Source: Victoria Police"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 14),
axis.text.x = element_text(angle = 45, hjust = 1)
) +
scale_y_continuous(labels = comma)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(trend_plot)

ggsave("visualizations/overall_trends.png", plot = trend_plot, width = 10, height = 6, dpi = 300)
# Visualization 2: Top LGAs by Incident Rate (2023-24)
cat("Creating Visualization 2: Top LGAs by Incident Rate...\n")
## Creating Visualization 2: Top LGAs by Incident Rate...
top_lgas_data <- incidents_data %>%
filter(FinancialYear == "2023-24") %>%
arrange(desc(Rate)) %>%
head(10)
top_lgas_plot <- ggplot(top_lgas_data, aes(x = Rate, y = reorder(LGA, Rate))) +
geom_col(fill = "#3498DB", alpha = 0.8) +
geom_text(aes(label = round(Rate)), hjust = -0.2, size = 3) +
labs(
title = "Top 10 LGAs by Family Violence Incident Rate (2023-24)",
subtitle = "Rate per 100,000 population",
x = "Rate per 100,000",
y = "Local Government Area",
caption = "Higher rates indicate more incidents relative to population size"
) +
theme_minimal() +
theme(plot.title = element_text(face = "bold")) +
expand_limits(x = max(top_lgas_data$Rate) * 1.1)
print(top_lgas_plot)

ggsave("visualizations/top_lgas_rate.png", plot = top_lgas_plot, width = 10, height = 6, dpi = 300)
# Visualization 3: Abuse Type Distribution
cat("Creating Visualization 3: Abuse Type Distribution...\n")
## Creating Visualization 3: Abuse Type Distribution...
abuse_distribution_data <- abuse_data %>%
filter(FinancialYear == "2023-24") %>%
group_by(AbuseType) %>%
summarise(TotalCount = sum(Count)) %>%
mutate(Percentage = TotalCount / sum(TotalCount) * 100)
abuse_distribution <- ggplot(abuse_distribution_data, aes(x = "", y = TotalCount, fill = AbuseType)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0) +
geom_text(aes(label = paste0(round(Percentage, 1), "%")),
position = position_stack(vjust = 0.5), size = 3) +
labs(
title = "Distribution of Family Violence Abuse Types (2023-24)",
subtitle = "Percentage breakdown across Victoria",
fill = "Abuse Type",
caption = "Data: Victoria Police Family Violence Incidents"
) +
theme_void() +
theme(plot.title = element_text(face = "bold", hjust = 0.5),
legend.position = "right") +
scale_fill_viridis_d()
print(abuse_distribution)

ggsave("visualizations/abuse_distribution.png", plot = abuse_distribution, width = 10, height = 6, dpi = 300)
# Visualization 4: Abuse Type Trends Over Time
cat("Creating Visualization 4: Abuse Type Trends...\n")
## Creating Visualization 4: Abuse Type Trends...
abuse_trends_data <- abuse_data %>%
group_by(FinancialYear, AbuseType) %>%
summarise(TotalCount = sum(Count))
## `summarise()` has grouped output by 'FinancialYear'. You can override using the
## `.groups` argument.
abuse_trends_plot <- ggplot(abuse_trends_data, aes(x = FinancialYear, y = TotalCount, color = AbuseType, group = AbuseType)) +
geom_line(size = 1.2) +
geom_point(size = 2) +
labs(
title = "Family Violence Abuse Type Trends (2019-2024)",
subtitle = "Evolution of different abuse types over time",
x = "Financial Year",
y = "Number of Incidents",
color = "Abuse Type",
caption = "Note: Multiple abuse types may be recorded per incident"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold"),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom"
) +
scale_y_continuous(labels = comma) +
scale_color_viridis_d()
print(abuse_trends_plot)

ggsave("visualizations/abuse_trends.png", plot = abuse_trends_plot, width = 12, height = 6, dpi = 300)
# Visualization 5: Police Region Comparison
cat("Creating Visualization 5: Police Region Comparison...\n")
## Creating Visualization 5: Police Region Comparison...
region_comparison_data <- incidents_data %>%
filter(FinancialYear == "2023-24") %>%
group_by(PoliceRegion) %>%
summarise(
AvgRate = mean(Rate),
TotalIncidents = sum(Incidents)
)
region_comparison_plot <- ggplot(region_comparison_data, aes(x = AvgRate, y = reorder(PoliceRegion, AvgRate))) +
geom_col(aes(fill = TotalIncidents), alpha = 0.8) +
geom_text(aes(label = paste0(round(AvgRate), "/100k")), hjust = -0.2, size = 3) +
labs(
title = "Family Violence Rates by Police Region (2023-24)",
subtitle = "Comparison across Victoria's police regions",
x = "Average Rate per 100,000 Population",
y = "Police Region",
fill = "Total Incidents",
caption = "Color intensity represents total number of incidents"
) +
theme_minimal() +
theme(plot.title = element_text(face = "bold")) +
scale_fill_viridis_c(labels = comma) +
expand_limits(x = max(region_comparison_data$AvgRate) * 1.1)
print(region_comparison_plot)

ggsave("visualizations/region_comparison.png", plot = region_comparison_plot, width = 10, height = 6, dpi = 300)
# Visualization 6: Heatmap of Top LGAs Over Time
cat("Creating Visualization 6: LGA Heatmap...\n")
## Creating Visualization 6: LGA Heatmap...
top_10_lgas <- incidents_data %>%
filter(FinancialYear == "2023-24") %>%
arrange(desc(Rate)) %>%
head(10) %>%
pull(LGA)
heatmap_data <- incidents_data %>%
filter(LGA %in% top_10_lgas)
heatmap_plot <- ggplot(heatmap_data, aes(x = FinancialYear, y = reorder(LGA, Rate), fill = Rate)) +
geom_tile(color = "white", size = 0.5) +
geom_text(aes(label = round(Rate)), color = "white", fontface = "bold", size = 3) +
labs(
title = "Family Violence Incident Rates Heatmap",
subtitle = "Top 10 LGAs with highest rates (2019-2024)",
x = "Financial Year",
y = "Local Government Area",
fill = "Rate per 100k",
caption = "Darker colors indicate higher incident rates"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold"),
axis.text.x = element_text(angle = 45, hjust = 1)
) +
scale_fill_viridis_c()
print(heatmap_plot)

ggsave("visualizations/lga_heatmap.png", plot = heatmap_plot, width = 10, height = 6, dpi = 300)
# Create interactive versions with plotly
cat("Creating interactive visualizations...\n")
## Creating interactive visualizations...
# Interactive trend plot
interactive_trend <- plot_ly(
data = trend_data,
x = ~FinancialYear,
y = ~TotalIncidents,
type = 'scatter',
mode = 'lines+markers',
line = list(color = '#E74C3C', width = 4),
marker = list(color = '#E74C3C', size = 8),
text = ~paste("Year:", FinancialYear, "<br>Incidents:", comma(TotalIncidents)),
hoverinfo = 'text'
) %>%
layout(
title = "Family Violence Incidents in Victoria (2019-2024)",
xaxis = list(title = "Financial Year"),
yaxis = list(title = "Number of Incidents")
)
# Save interactive plot
htmlwidgets::saveWidget(interactive_trend, "visualizations/interactive_trend.html")
# Print summary statistics
cat("\n=== SUMMARY STATISTICS ===\n")
##
## === SUMMARY STATISTICS ===
cat("Time Period: 2019-20 to 2023-24\n")
## Time Period: 2019-20 to 2023-24
cat("Number of LGAs:", length(unique(incidents_data$LGA)), "\n")
## Number of LGAs: 15
total_incidents_2024 <- incidents_data %>%
filter(FinancialYear == "2023-24") %>%
summarise(Total = sum(Incidents)) %>%
pull(Total)
cat("Total Incidents (2023-24):", comma(total_incidents_2024), "\n")
## Total Incidents (2023-24): 41,228
summary_stats <- incidents_data %>%
group_by(FinancialYear) %>%
summarise(
TotalIncidents = sum(Incidents),
AvgRate = mean(Rate)
)
print(summary_stats)
## # A tibble: 5 × 3
## FinancialYear TotalIncidents AvgRate
## <chr> <dbl> <dbl>
## 1 2019-20 43373 2196.
## 2 2020-21 43769 2135.
## 3 2021-22 35166 2500.
## 4 2022-23 32313 2219.
## 5 2023-24 41228 2126.
cat("\n=== VISUALIZATION FILES CREATED ===\n")
##
## === VISUALIZATION FILES CREATED ===
cat("1. visualizations/overall_trends.png - Overall incident trends\n")
## 1. visualizations/overall_trends.png - Overall incident trends
cat("2. visualizations/top_lgas_rate.png - Top LGAs by incident rate\n")
## 2. visualizations/top_lgas_rate.png - Top LGAs by incident rate
cat("3. visualizations/abuse_distribution.png - Abuse type distribution\n")
## 3. visualizations/abuse_distribution.png - Abuse type distribution
cat("4. visualizations/abuse_trends.png - Abuse type trends over time\n")
## 4. visualizations/abuse_trends.png - Abuse type trends over time
cat("5. visualizations/region_comparison.png - Police region comparison\n")
## 5. visualizations/region_comparison.png - Police region comparison
cat("6. visualizations/lga_heatmap.png - LGA heatmap over time\n")
## 6. visualizations/lga_heatmap.png - LGA heatmap over time
cat("7. visualizations/interactive_trend.html - Interactive trend plot\n")
## 7. visualizations/interactive_trend.html - Interactive trend plot
cat("\nTo use these visualizations:\n")
##
## To use these visualizations:
cat("1. All PNG files can be embedded in websites or reports\n")
## 1. All PNG files can be embedded in websites or reports
cat("2. The HTML file can be uploaded to any web server\n")
## 2. The HTML file can be uploaded to any web server
cat("3. Files are saved in the 'visualizations' folder\n")
## 3. Files are saved in the 'visualizations' folder
cat("\nScript completed successfully!\n")
##
## Script completed successfully!