The code analyses air pollution and air quality data and calculates average values for each location between 2014 to 2019.

pacman::p_load(pacman, tidyverse, ggplot2, gridExtra)

WP = read_csv("WHO_PM.csv")
## Rows: 9450 Columns: 34
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (14): IndicatorCode, Indicator, ValueType, ParentLocationCode, ParentLo...
## dbl   (4): Period, FactValueNumeric, FactValueNumericLow, FactValueNumericHigh
## lgl  (15): IsLatestYear, Dim2 type, Dim2, Dim2ValueCode, Dim3 type, Dim3, Di...
## dttm  (1): DateModified
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Extract air pollution information
average_air_pollution <- WP %>%
  filter(!is.na(FactValueNumeric)) %>%
  group_by(Location) %>%
  summarise(avg_air_pollution = mean(FactValueNumeric, na.rm = TRUE))

overall_avg_air_pol <- mean(average_air_pollution$avg_air_pollution, na.rm = TRUE)

# Extract air quality information
average_air_quality <- WP %>%
  filter(!is.na(FactValueNumeric)) %>%
  group_by(Location) %>%
  summarise(avg_air_quality = mean(FactValueNumeric, na.rm = TRUE))

overall_avg_air_qual <- mean(average_air_quality$avg_air_quality, na.rm = TRUE)

# Plot for air pollution
avg_air_pol <- average_air_pollution %>%
  arrange(desc(avg_air_pollution)) %>%
  head(42) %>%
  ggplot(aes(x = reorder(Location, avg_air_pollution), y = avg_air_pollution)) +
  geom_col(fill = "#3333FF", color = "#000000", linewidth = 0.75) +
  labs(title = "Top 42 Locations: Highest Fine Particulate Matter PM(2.5)",
       x = NULL,
       y = "Avg. Air PM Count") +
  geom_hline(yintercept = overall_avg_air_pol, linewidth = 1.5, color = "#CC0033") +
  geom_text(aes(x = 3, y = overall_avg_air_pol + 33, label = paste("Overall Avg.\n", round(overall_avg_air_pol, 2))), color = "#000000") +
  theme(plot.title = element_text(face = "bold")) +
  coord_flip()

# Plot for air quality 
avg_air_qual <- average_air_quality %>%
  arrange(avg_air_quality) %>%
  head(42) %>%
  ggplot(aes(x = reorder(Location, -avg_air_quality), y = avg_air_quality)) +  
  geom_col(fill = "#33CC33", color = "#000000", linewidth = 0.75) +
  labs(title = "Top 42 Locations: Lowest Fine Particulate Matter PM(2.5)",
       x = NULL,
       y = "Avg. Air PM Count") +
  geom_hline(yintercept = overall_avg_air_qual, linewidth = 1.5, color = "#CC0033") +
  geom_text(aes(x = 3, y = overall_avg_air_qual - 4, label = paste("Overall Avg.\n", round(overall_avg_air_qual, 2))), color = "#000000") +  # Adjust label position
  theme(plot.title = element_text(face = "bold")) +
  coord_flip()


print(avg_air_pol)

print(avg_air_qual)

# Arrange plots side by side
grid.arrange(avg_air_pol, avg_air_qual, ncol = 2)