# Load required libraries
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
getwd()
## [1] "/Users/gezhang/Desktop/Semester2/visulization/A3/Data_Tables_Family_Incidents_Visualisation_Year_Ending_June_2024 2"
# data loading step
family_incidents_data <- read.csv("family-incidents-data.csv")
# Ensure correct data type for Family.Incident.Count
family_incidents_data$Family.Incident.Count <- as.numeric(gsub(",", "", family_incidents_data$Family.Incident.Count))
# Group data by Year and summarize total Family Incident Count
incident_summary <- family_incidents_data %>%
group_by(Year) %>%
summarise(Total_Incidents = sum(Family.Incident.Count, na.rm = TRUE))
# Plotting the bar chart for total Family Incident Count by year
ggplot(data = incident_summary, aes(x = as.factor(Year), y = Total_Incidents)) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(title = "Total Family Incident Count by Year",
x = "Year",
y = "Total Family Incident Count") +
scale_y_continuous(labels = scales::comma) +
theme_minimal()
