pacman::p_load(pacman, tidyverse, ggplot2, gridExtra)
pollution_df = read_csv("Cities1.csv")
## Rows: 3963 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): City, Region, Country
## dbl (2): AirQuality, WaterPollution
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Extract water pollution information
average_water_pollution <- pollution_df %>%
group_by(Country) %>%
summarise(avg_water_pollution = mean(WaterPollution, na.rm = TRUE))
overall_avg_water_pol <- mean(average_water_pollution$avg_water_pollution, na.rm = TRUE)
# Extract water quality information
average_water_quality <- pollution_df %>%
group_by(Country) %>%
summarise(avg_water_quality = mean(100 - WaterPollution, na.rm = TRUE))
overall_avg_water_qual <- mean(average_water_quality$avg_water_quality, na.rm = TRUE)
# Plot for water pollution
avg_water_pol <- average_water_pollution %>%
arrange(desc(avg_water_pollution)) %>%
head(42) %>%
ggplot(aes(x = reorder(Country, avg_water_pollution), y = avg_water_pollution)) +
geom_col(fill = "#3333FF", color = "#000000", linewidth = 0.75) +
labs(title = "Top 42 Countries: Worst Water Pollution",
x = NULL,
y = "Avg. Water Pollution Score") +
geom_hline(yintercept = overall_avg_water_pol, linewidth = 1.5, color = "#CC0033") +
geom_text(aes(x = 3, y = overall_avg_water_pol + 33, label = paste("Overall Average\n", round(overall_avg_water_pol, 2))), color = "#000000") +
theme(plot.title = element_text(face = "bold")) +
coord_flip()
# Plot for water quality
avg_water_qual <- average_water_quality %>%
arrange(desc(avg_water_quality)) %>%
head(42) %>%
ggplot(aes(x = reorder(Country, avg_water_quality), y = avg_water_quality)) +
geom_col(fill = "#3333FF", color = "#000000", linewidth = 0.75) +
labs(title = "Top 42 Countries: Best Water Quality",
x = NULL,
y = "Avg. Water Quality Score") +
geom_hline(yintercept = overall_avg_water_qual, linewidth = 1.5, color = "#CC0033") +
geom_text(aes(x = 3, y = overall_avg_water_qual + 42, label = paste("Overall Average\n", round(overall_avg_water_qual, 2))), color = "#000000") +
theme(plot.title = element_text(face = "bold")) +
coord_flip()
print(avg_water_pol)

print(avg_water_qual)

# Arrange plots together
grid.arrange(avg_water_pol, avg_water_qual, ncol = 2)
