pacman::p_load(pacman,tidyverse,ggplot2,gridExtra)

pollution_pf = read_csv("Cities1.csv")
## Rows: 3963 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): City, Region, Country
## dbl (2): AirQuality, WaterPollution
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pollution_pf_cleaned <- pollution_pf %>%
  mutate(AirPollution = 100 - AirQuality) %>%
  dplyr::select(-c(Region, AirQuality))
# By using dplyr::select - ensures no conflicts with other packages.

average_pollution <- pollution_pf_cleaned %>%
  group_by(Country) %>%
  summarise(avg_air_pollution = mean(AirPollution),
            avg_water_pollution = mean(WaterPollution))
overall_avg_air_pol <- mean(average_pollution$avg_air_pollution)
overall_avg_water_pol <- mean(average_pollution$avg_water_pollution)
avg_air_pol <- average_pollution %>% 
  arrange(desc(avg_air_pollution)) %>%
  
  head(25) %>%
  ggplot(aes(x = reorder(Country, avg_air_pollution), y = avg_air_pollution)) +
  geom_col(fill = "#66CCFF", color="#000000", linewidth =.75) +
  labs(title = "Top 25 Countries: Worst Air Pollution",
       x = NULL,
       y = "Avg. Air Pollution Score") +
  geom_hline(yintercept = overall_avg_air_pol, linewidth = 1.5, color = "#CC0033") +
  geom_text(aes(x = 8, y = overall_avg_air_pol + 15, label = paste("Overall Average\n", round(overall_avg_air_pol, 2))), color = "#FFFFFF") +
  theme(plot.title = element_text(face = "bold")) +
  coord_flip()

avg_water_pol <- average_pollution %>%
  arrange(desc(avg_water_pollution)) %>%
  head(25) %>%
  ggplot(aes(x = reorder(Country, avg_water_pollution), y = avg_water_pollution)) + 
  geom_col(fill = "#3333FF", color="#000000", linewidth =.75) + 
  labs(title = "Top 25 Countries: Worst Water Pollution",
       x = NULL,
       y = "Avg. Water Pollution Score") +
  geom_hline(yintercept = overall_avg_water_pol, linewidth = 1.5, color = "#CC0033") +
  geom_text(aes(x = 8, y = overall_avg_water_pol + 16, label = paste("Overall Average\n",round(overall_avg_water_pol, 2))), color = "#FFFFFF") +
  theme(plot.title = element_text(face = "bold")) +
  coord_flip()

quality_scores <- pollution_pf %>%
  mutate(WaterQuality = 100 - WaterPollution) %>%
  dplyr::select(-c(Region, WaterPollution)) 
# By using dplyr::select - ensures no conflicts with other packages.

# Which countries are the best for air and water quality?

average_quality <- quality_scores %>%
  group_by(Country) %>%
  summarise(avg_air_quality = mean(AirQuality),
            avg_water_quality = mean(WaterQuality))

# Overall average of each quality

overall_avg_air_qual <- mean(average_quality$avg_air_quality)
overall_avg_water_qual <- mean(average_quality$avg_water_quality)

# Top 25 best countries for air and water quality

avg_air_qual <- average_quality %>% 
  arrange(desc(avg_air_quality)) %>%
  head(25) %>%
  ggplot(aes(x = reorder(Country, avg_air_quality), y = avg_air_quality)) +
  geom_col(fill = "#66CCFF", color="#000000", linewidth =.75) +
  labs(title = "Top 25 Countries: Best Air Quality",
       x = NULL,
       y = "Avg. Air Quality Score") +
  geom_hline(yintercept = overall_avg_air_qual, linewidth = 1.5, color = "#CC0033") +
  geom_text(aes(x = 8, y = overall_avg_air_qual + 15, label = paste("Overall Average\n", round(overall_avg_air_qual, 2))), color = "#FFFFFF") +
  theme(plot.title = element_text(face = "bold")) +
  coord_flip()

avg_water_qual <- average_quality %>%
  arrange(desc(avg_water_quality)) %>%
  head(25) %>%
  ggplot(aes(x = reorder(Country, avg_water_quality), y = avg_water_quality)) + 
  geom_col(fill = "#3333FF", color="#000000", linewidth =.75) + 
  labs(title = "Top 25 Countries: Best Water Quality",
       x = NULL,
       y = "Avg. Water Quality Score") +
  geom_hline(yintercept = overall_avg_water_qual, linewidth = 1.5, color = "#CC0033") +
  geom_text(aes(x = 8, y = overall_avg_water_qual + 16, label = paste("Overall Average\n",round(overall_avg_water_qual, 2))), color = "#FFFFFF") +
  theme(plot.title = element_text(face = "bold")) +
  coord_flip()

grid.arrange(avg_air_qual, avg_water_qual, avg_air_pol, avg_water_pol, nrow = 2, ncol = 2)