Q1: Number of crashes during different conditions of road
# Create a new column indicating the road condition
traffic$ROADCONDITIONSPRIMARY <- ifelse(traffic$ROADCONDITIONSPRIMARY %in% c("01 - DRY", "02 - WET", "04 - ICE", "03 - SNOW"), traffic$ROADCONDITIONSPRIMARY, "OTHER")
# Count the number of crashes in each road condition
crash_counts <- traffic %>%
group_by(ROADCONDITIONSPRIMARY) %>%
summarize(num_crashes = n())
# Create a pie chart
crash_counts$percent_crashes <- (crash_counts$num_crashes / sum(crash_counts$num_crashes)) * 100
# Create a pie chart
ggplot(crash_counts, aes(x = "", y = num_crashes, fill = ROADCONDITIONSPRIMARY)) +
geom_bar(width = 1, stat = "identity") +
coord_polar("y", start=0) +
labs(title = "Number of Crashes by Road Condition") +
theme_void()
Q2: The number of crashes of male vs female, and the percentage of fatal
crashes on their total crashes.
# Calculate the number of crashes by gender
crash_counts <- traffic %>%
group_by(GENDER) %>%
summarize(num_crashes = n())
# Calculate the number and percentage of fatal crashes by gender
fatal_counts <- traffic %>%
filter(GENDER %in% c("M - MALE", "F - FEMALE"), INJURIES %in% c("1 - FATAL", "5 - FATAL", "2 - SUSPECTED SERIOUS INJURY")) %>%
group_by(GENDER) %>%
summarize(num_fatal = n()) %>%
mutate(percent_fatal = round(num_fatal / sum(crash_counts$num_crashes) * 100, 2))
# Merge the crash counts and fatal counts by gender
crash_summary <- merge(crash_counts, fatal_counts, by = "GENDER")
# Create a bar plot of crashes by gender with percentage of fatal crashes
ggplot(crash_summary, aes(x = GENDER, y = num_crashes, fill = GENDER)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(label = paste0(round(percent_fatal, 2), "% (Fatal)")),
position = position_dodge(width = 1), vjust = -0.5, size = 3,
fontface = "bold") +
labs(title = "Number of Crashes by Gender", y = "Number of Crashes",
fill = "Gender") +
scale_fill_manual(values = c("pink", "blue")) +
theme_minimal()