Libraries

library(ggplot2) library(dplyr) # Data Fran car_crashes <- data.frame( year = rep(2010:2020, times = 10), # Repeat the years 10 times to get exactly 110 rows region = rep(c(“North”, “South”, “East”, “West”), length.out = 110), # Repeat the regions until 110 rows crashes = sample(50:500, 110, replace = TRUE) # Random crash data )

Location data (regions and demographic info)

location_data <- data.frame( region = c(“North”, “South”, “East”, “West”), population = c(1200000, 850000, 1100000, 1500000), income_level = c(“High”, “Medium”, “High”, “Low”) )

Aggregate car crashes by region

crashes_by_region <- car_crashes %>% group_by(region) %>% summarise(avg_crashes = mean(crashes)) # Corrected line with closing parenthesis

Merge with location data for population and income

plot_data_1 <- crashes_by_region %>% left_join(location_data, by = “region”)

Create the plot

plot <- ggplot(plot_data_1, aes(x = region, y = avg_crashes, color = income_level)) + geom_point(size = 4) + labs( title = “Average Number of Crashes by Region”, subtitle = “Data from 2010 to 2020”, x = “Region”, y = “Average Crashes”, caption = “Source: Car Crashes Dataset” ) + theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) plot