The code analyses air pollution and air quality data and calculates
average values for each location between 2014 to 2019.
pacman::p_load(pacman, tidyverse, ggplot2, gridExtra)
WP = read_csv("WHO_PM.csv")
## Rows: 9450 Columns: 34
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (14): IndicatorCode, Indicator, ValueType, ParentLocationCode, ParentLo...
## dbl (4): Period, FactValueNumeric, FactValueNumericLow, FactValueNumericHigh
## lgl (15): IsLatestYear, Dim2 type, Dim2, Dim2ValueCode, Dim3 type, Dim3, Di...
## dttm (1): DateModified
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Extract air pollution information
average_air_pollution <- WP %>%
filter(!is.na(FactValueNumeric)) %>%
group_by(Location) %>%
summarise(avg_air_pollution = mean(FactValueNumeric, na.rm = TRUE))
overall_avg_air_pol <- mean(average_air_pollution$avg_air_pollution, na.rm = TRUE)
# Extract air quality information
average_air_quality <- WP %>%
filter(!is.na(FactValueNumeric)) %>%
group_by(Location) %>%
summarise(avg_air_quality = mean(FactValueNumeric, na.rm = TRUE))
overall_avg_air_qual <- mean(average_air_quality$avg_air_quality, na.rm = TRUE)
# Plot for air pollution
avg_air_pol <- average_air_pollution %>%
arrange(desc(avg_air_pollution)) %>%
head(42) %>%
ggplot(aes(x = reorder(Location, avg_air_pollution), y = avg_air_pollution)) +
geom_col(fill = "#3333FF", color = "#000000", linewidth = 0.75) +
labs(title = "Top 42 Locations: Highest Fine Particulate Matter PM(2.5)",
x = NULL,
y = "Avg. Air PM Count") +
geom_hline(yintercept = overall_avg_air_pol, linewidth = 1.5, color = "#CC0033") +
geom_text(aes(x = 3, y = overall_avg_air_pol + 33, label = paste("Overall Avg.\n", round(overall_avg_air_pol, 2))), color = "#000000") +
theme(plot.title = element_text(face = "bold")) +
coord_flip()
# Plot for air quality
avg_air_qual <- average_air_quality %>%
arrange(avg_air_quality) %>%
head(42) %>%
ggplot(aes(x = reorder(Location, -avg_air_quality), y = avg_air_quality)) +
geom_col(fill = "#33CC33", color = "#000000", linewidth = 0.75) +
labs(title = "Top 42 Locations: Lowest Fine Particulate Matter PM(2.5)",
x = NULL,
y = "Avg. Air PM Count") +
geom_hline(yintercept = overall_avg_air_qual, linewidth = 1.5, color = "#CC0033") +
geom_text(aes(x = 3, y = overall_avg_air_qual - 4, label = paste("Overall Avg.\n", round(overall_avg_air_qual, 2))), color = "#000000") + # Adjust label position
theme(plot.title = element_text(face = "bold")) +
coord_flip()
print(avg_air_pol)

print(avg_air_qual)

# Arrange plots side by side
grid.arrange(avg_air_pol, avg_air_qual, ncol = 2)
