Ge Zhang
2024-10-29
This is an R Markdown presentation. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document.
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## [1] "/Users/gezhang/Desktop/Semester2/visulization/A3/Data_Tables_Family_Incidents_Visualisation_Year_Ending_June_2024 2"
# data loading step
family_incidents_data <- read.csv("family-incidents-data.csv")
# Ensure correct data type for Family.Incident.Count
family_incidents_data$Family.Incident.Count <- as.numeric(gsub(",", "", family_incidents_data$Family.Incident.Count))
# Group data by Year and summarize total Family Incident Count
incident_summary <- family_incidents_data %>%
group_by(Year) %>%
summarise(Total_Incidents = sum(Family.Incident.Count, na.rm = TRUE))
# Plotting the bar chart for total Family Incident Count by year
ggplot(data = incident_summary, aes(x = as.factor(Year), y = Total_Incidents)) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(title = "Total Family Incident Count by Year",
x = "Year",
y = "Total Family Incident Count") +
scale_y_continuous(labels = scales::comma) +
theme_minimal()# Load required libraries
library(ggplot2)
# Plotting the line chart for total Family Incident Count by year
ggplot(data = incident_summary, aes(x = as.factor(Year), y = Total_Incidents, group = 1)) +
geom_line(color = "steelblue", size = 1) +
geom_point(color = "red", size = 2) +
labs(title = "Total Family Incident Count Trend by Year",
x = "Year",
y = "Total Family Incident Count") +
scale_y_continuous(labels = scales::comma) +
theme_minimal()## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
library(ggplot2)
library(dplyr)
# load data
AFM_data <- read.csv("AFM-data.csv")
# ensure correct data type
AFM_data$Rate.per.100.000.population <- as.numeric(gsub(",", "", AFM_data$Rate.per.100.000.population))
AFM_data$Year <- as.factor(AFM_data$Year)
AFM_data$AFM.Age.Group <- as.factor(AFM_data$AFM.Age.Group)
AFM_data$AFM.Sex <- as.factor(AFM_data$AFM.Sex)
# group by sex
filtered_data <- AFM_data %>%
filter(AFM.Sex %in% c("Females", "Males"))
# plot bar chart
ggplot(filtered_data, aes(x = AFM.Age.Group, y = Rate.per.100.000.population, fill = AFM.Sex)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Family violence victimization rate by gender and age group",
x = "age gruop",
y = "Rate.per.100.000.population",
fill = "sex") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank())# Read the data
AFM_data <- read.csv("AFM-data.csv")
# Filter data to get the total count for each gender
# Convert the Rate column to numeric
AFM_data$Rate.per.100.000.population <- as.numeric(gsub(",", "", AFM_data$Rate.per.100.000.population))
# Group by Sex and summarize the total victims count
victim_summary <- AFM_data %>%
filter(AFM.Sex %in% c("Females", "Males")) %>%
group_by(AFM.Sex) %>%
summarise(Total_Victims = sum(Rate.per.100.000.population, na.rm = TRUE))
#
ggplot(victim_summary, aes(x = "", y = Total_Victims, fill = AFM.Sex)) +
geom_bar(stat = "identity", width = 1) +
coord_polar("y", start = 0) +
geom_text(aes(label = paste0(round(Total_Victims / sum(Total_Victims) * 100, 1), "%")),
position = position_stack(vjust = 0.5)) +
labs(title = "Gender-wise Victim Count Proportion", fill = "Gender") +
theme_void()
## 888
##
## The downloaded binary packages are in
## /var/folders/9k/ggkns9sx5v7fl6bm32fwrb0r0000gn/T//RtmpnFTxNU/downloaded_packages
library(ggplot2)
library(reshape2)
# read the data
AFM_data <- read.csv("AFM-data.csv")
# ensure the correct data type
AFM_data$Rate.per.100.000.population <- as.numeric(gsub(",", "", AFM_data$Rate.per.100.000.population))
AFM_data$Year <- as.factor(AFM_data$Year)
AFM_data$AFM.Age.Group <- as.factor(AFM_data$AFM.Age.Group)
# Convert the data into a format suitable for creating a heatmap, using the mean as the aggregation function
heatmap_data <- dcast(AFM_data, AFM.Age.Group ~ Year, value.var = "Rate.per.100.000.population", fun.aggregate = mean)
# convert to long type
heatmap_long <- melt(heatmap_data, id.vars = "AFM.Age.Group", variable.name = "Year", value.name = "Rate")
print(heatmap_long)## AFM.Age.Group Year Rate
## 1 00-04 years 2020 118.5333
## 2 05-09 years 2020 283.8333
## 3 10-14 years 2020 638.4333
## 4 15-17 years 2020 1347.1333
## 5 18-19 years 2020 1812.1667
## 6 20-24 years 2020 1899.2333
## 7 25-29 years 2020 2075.8000
## 8 30-34 years 2020 2064.7000
## 9 35-39 years 2020 2241.0000
## 10 40-44 years 2020 2223.4000
## 11 45-49 years 2020 2029.6333
## 12 50-54 years 2020 1582.8333
## 13 55-59 years 2020 1018.6333
## 14 60-64 years 2020 754.8667
## 15 65-69 years 2020 629.3667
## 16 70-74 years 2020 483.8333
## 17 75 years and over 2020 333.2000
## 18 Total 2020 1328.1667
## 19 00-04 years 2021 129.3000
## 20 05-09 years 2021 280.1667
## 21 10-14 years 2021 727.0667
## 22 15-17 years 2021 1520.1667
## 23 18-19 years 2021 1981.9333
## 24 20-24 years 2021 2151.5667
## 25 25-29 years 2021 2260.4667
## 26 30-34 years 2021 2284.5000
## 27 35-39 years 2021 2350.1000
## 28 40-44 years 2021 2325.9000
## 29 45-49 years 2021 2172.4667
## 30 50-54 years 2021 1639.3667
## 31 55-59 years 2021 1127.0333
## 32 60-64 years 2021 770.8333
## 33 65-69 years 2021 632.2000
## 34 70-74 years 2021 490.6667
## 35 75 years and over 2021 373.5333
## 36 Total 2021 1420.9000
## 37 00-04 years 2022 100.2667
## 38 05-09 years 2022 255.9333
## 39 10-14 years 2022 718.4000
## 40 15-17 years 2022 1405.4667
## 41 18-19 years 2022 1853.1667
## 42 20-24 years 2022 1929.5333
## 43 25-29 years 2022 2150.3333
## 44 30-34 years 2022 2209.2333
## 45 35-39 years 2022 2238.0000
## 46 40-44 years 2022 2225.0000
## 47 45-49 years 2022 2045.7667
## 48 50-54 years 2022 1652.4667
## 49 55-59 years 2022 1165.5000
## 50 60-64 years 2022 762.2000
## 51 65-69 years 2022 616.0333
## 52 70-74 years 2022 509.2667
## 53 75 years and over 2022 360.2667
## 54 Total 2022 1359.4667
## 55 00-04 years 2023 115.6000
## 56 05-09 years 2023 319.4000
## 57 10-14 years 2023 789.2667
## 58 15-17 years 2023 1483.2000
## 59 18-19 years 2023 1602.4000
## 60 20-24 years 2023 1795.7333
## 61 25-29 years 2023 1991.0667
## 62 30-34 years 2023 2164.2667
## 63 35-39 years 2023 2272.7000
## 64 40-44 years 2023 2265.6000
## 65 45-49 years 2023 2032.5000
## 66 50-54 years 2023 1652.6667
## 67 55-59 years 2023 1178.5333
## 68 60-64 years 2023 785.0333
## 69 65-69 years 2023 599.3333
## 70 70-74 years 2023 569.5333
## 71 75 years and over 2023 406.8333
## 72 Total 2023 1360.5000
## 73 00-04 years 2024 102.2000
## 74 05-09 years 2024 366.0000
## 75 10-14 years 2024 786.0000
## 76 15-17 years 2024 1550.2000
## 77 18-19 years 2024 1687.9333
## 78 20-24 years 2024 1749.4000
## 79 25-29 years 2024 2005.2333
## 80 30-34 years 2024 2209.7000
## 81 35-39 years 2024 2440.2333
## 82 40-44 years 2024 2324.9000
## 83 45-49 years 2024 2127.1667
## 84 50-54 years 2024 1687.1333
## 85 55-59 years 2024 1234.3667
## 86 60-64 years 2024 903.8667
## 87 65-69 years 2024 675.3000
## 88 70-74 years 2024 605.7000
## 89 75 years and over 2024 420.6667
## 90 Total 2024 1412.1667
# plot heatmap
ggplot(heatmap_long, aes(x = Year, y = AFM.Age.Group, fill = Rate)) +
geom_tile(color = "white") +
scale_fill_gradient(low = "#FEE0D2", high = "#DE2D26", na.value = "#F7F7F7") +
labs(title = "Heatmap of family violence victimization rates across different age groups",
x = "year",
y = "age group",
fill = "Rate.per.100.000.population") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))library(ggplot2)
# Load the data
AFM_data <- read.csv("AFM-data.csv")
# Ensure data types are correct
AFM_data$Rate.per.100.000.population <- as.numeric(gsub(",", "", AFM_data$Rate.per.100.000.population))
AFM_data$Year <- as.factor(AFM_data$Year)
AFM_data$AFM.Age.Group <- as.factor(AFM_data$AFM.Age.Group)
# Create boxplot to show the distribution of family violence victim counts by age group
ggplot(AFM_data, aes(x = AFM.Age.Group, y = Rate.per.100.000.population)) +
geom_boxplot(fill = "#69b3a2", color = "#1f2f56") +
labs(title = "Boxplot of Family Violence Victim Counts by Age Group",
x = "Age Group",
y = "Rate per 100,000 population") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))