This R notebook explores the world-wide Ozone levels found in the World Air Quality Data 2024 (Updated) dataset on kaggle.com.
The focus is on performing spatial analysis with the data. I am by no means an air quality expert. However, it was interesting to find ways of creating spatial data from the base input text file and visualizing it in charts and maps.
Air Pollutants | Air | CDC - https://www.cdc.gov/air/pollutants.htm
National Ambient Air Quality Standards (NAAQS) for ozone - https://www.epa.gov/naaqs/ozone-o3-air-quality-standards
OpenDataSoft - World Air Quality - https://public.opendatasoft.com/explore/dataset/openaq/information/?disjunctive.city&disjunctive.location&disjunctive.measurements_parameter&sort=measurements_lastupdated
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(reshape2)
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
library(EnvStats)
##
## Attaching package: 'EnvStats'
##
## The following objects are masked from 'package:stats':
##
## predict, predict.lm
##
## The following object is masked from 'package:base':
##
## print.default
library(ggplot2)
options(scipen = 999)
aq.data <- read.csv(file = "world_air_quality.csv", header = TRUE, sep = ";")
head(aq.data, 20)
## Country.Code City Location
## 1 JP 北九州市小倉北区大門一丁目6-48
## 2 JP 北九州市若松区本町三丁目13-1
## 3 JP 北九州市門司区大里原町12-12
## 4 JP 千歳市若草4-13
## 5 JP 千葉市稲毛区宮野木町996-9
## 6 JP 千葉市花見川区花見川4-1
## 7 JP 南埼玉郡宮代町学園台4-1
## 8 JP 厚木市中町1-8-11
## 9 JP 取手市大字寺田5139
## 10 JP 取手市大字寺田5139
## 11 JP 古河市下大野谷地川2456
## 12 JP 名古屋市中区大須2丁目404番地先
## 13 JP 名古屋市中川区元中野町2-11
## 14 JP 名古屋市南区松下町2-1
## 15 JP 名古屋市港区港陽1-1-65
## 16 JP 君津市久保2-11
## 17 JP 君津市俵田1110
## 18 JP 呉市西畑町33地先
## 19 JP 周南市周陽3丁目1-1
## 20 JP 和光市諏訪3-20
## Coordinates Pollutant Source.Name Unit Value
## 1 33.880833, 130.873056 NO japan-soramame ppm 0.002
## 2 33.898056, 130.81 NO2 japan-soramame ppm 0.005
## 3 33.895833, 130.935833 NOX japan-soramame ppm 0.013
## 4 42.786944, 141.605 NO2 japan-soramame ppm 0.004
## 5 35.653889, 140.097778 NOX japan-soramame ppm 0.003
## 6 35.693889, 140.098333 SO2 japan-soramame ppm 0.001
## 7 36.027222, 139.716111 PM2.5 japan-soramame µg/m³ 2.000
## 8 35.443333, 139.368056 NO2 japan-soramame ppm 0.006
## 9 35.908333, 140.052778 NOX japan-soramame ppm 0.002
## 10 35.908333, 140.052778 NO japan-soramame ppm 0.000
## 11 36.175833000000004, 139.75888899999998 SO2 japan-soramame ppm 0.001
## 12 35.1625, 136.901111 SO2 japan-soramame ppm 0.001
## 13 35.134444, 136.883333 NO japan-soramame ppm 0.001
## 14 35.073056, 136.916667 PM2.5 japan-soramame µg/m³ 3.000
## 15 35.099722, 136.890278 NO2 japan-soramame ppm 0.002
## 16 35.331667, 139.901944 SO2 japan-soramame ppm 0.002
## 17 35.319444, 140.056667 PM2.5 japan-soramame µg/m³ 5.000
## 18 34.236389, 132.583889 NO2 japan-soramame ppm 0.002
## 19 34.041667, 131.830278 NOX japan-soramame ppm 0.018
## 20 35.774722, 139.621389 NO2 japan-soramame ppm 0.002
## Last.Updated Country.Label
## 1 2024-03-10T13:30:00+05:30 Japan
## 2 2024-03-10T13:30:00+05:30 Japan
## 3 2024-03-10T13:30:00+05:30 Japan
## 4 2024-03-10T13:30:00+05:30 Japan
## 5 2024-03-10T13:30:00+05:30 Japan
## 6 2024-03-10T13:30:00+05:30 Japan
## 7 2024-03-10T13:30:00+05:30 Japan
## 8 2024-03-10T13:30:00+05:30 Japan
## 9 2024-03-10T13:30:00+05:30 Japan
## 10 2024-03-10T13:30:00+05:30 Japan
## 11 2024-03-10T13:30:00+05:30 Japan
## 12 2024-03-10T13:30:00+05:30 Japan
## 13 2024-03-10T13:30:00+05:30 Japan
## 14 2024-03-10T13:30:00+05:30 Japan
## 15 2024-03-10T13:30:00+05:30 Japan
## 16 2024-03-10T13:30:00+05:30 Japan
## 17 2024-03-10T13:30:00+05:30 Japan
## 18 2024-03-10T13:30:00+05:30 Japan
## 19 2024-03-10T13:30:00+05:30 Japan
## 20 2024-03-10T13:30:00+05:30 Japan
aq.data %>% group_by(Pollutant, Unit) %>%
summarise(Count = n(), .groups = "keep") %>%
arrange(Pollutant)
## # A tibble: 20 × 3
## # Groups: Pollutant, Unit [20]
## Pollutant Unit Count
## <chr> <chr> <int>
## 1 BC µg/m³ 135
## 2 CO ppm 1275
## 3 CO µg/m³ 3799
## 4 NO ppm 1984
## 5 NO µg/m³ 1879
## 6 NO2 ppm 2857
## 7 NO2 µg/m³ 7085
## 8 NOX ppm 1896
## 9 NOX µg/m³ 119
## 10 O3 ppm 2827
## 11 O3 µg/m³ 5523
## 12 PM1 µg/m³ 124
## 13 PM10 µg/m³ 8050
## 14 PM2.5 ppm 6
## 15 PM2.5 µg/m³ 9541
## 16 RELATIVEHUMIDITY % 66
## 17 SO2 ppm 2214
## 18 SO2 µg/m³ 4693
## 19 TEMPERATURE c 117
## 20 UM003 particles/cm³ 65
data.frame(
Country = unique(aq.data$Country.Label)
) %>%
arrange(Country) %>%
head(10)
## Country
## 1
## 2 Afghanistan
## 3 Algeria
## 4 Andorra
## 5 Antigua and Barbuda
## 6 Argentina
## 7 Armenia
## 8 Australia
## 9 Austria
## 10 Azerbaijan
data.frame(
Last.Updated = unique(aq.data$Last.Updated)
) %>%
head(10)
## Last.Updated
## 1 2024-03-10T13:30:00+05:30
## 2 2024-01-16T09:30:00+05:30
## 3 2024-03-07T12:30:00+05:30
## 4 2023-08-28T09:30:00+05:30
## 5 2023-12-28T09:30:00+05:30
## 6 2023-11-07T04:30:00+05:30
## 7 2023-10-16T13:30:00+05:30
## 8 2024-02-22T11:30:00+05:30
## 9 2023-06-13T05:30:00+05:30
## 10 2024-03-11T16:30:00+05:30
aq.data[aq.data$Country.Label > "", ] %>%
group_by(Country.Label, Pollutant) %>%
summarise(Count = n(), .groups = "keep") %>%
arrange(Country.Label) %>%
group_by(Country.Label) %>%
summarise(Pollutants = paste(unique(Pollutant), collapse = ", ")) %>%
arrange(Country.Label)
## # A tibble: 118 × 2
## Country.Label Pollutants
## <chr> <chr>
## 1 Afghanistan PM2.5
## 2 Algeria PM2.5
## 3 Andorra CO, NO, NO2, O3, PM10, PM2.5, SO2
## 4 Antigua and Barbuda PM2.5
## 5 Argentina CO, NO, NO2, NOX, O3, PM10, PM2.5, SO2, TEMPERATURE
## 6 Armenia PM2.5
## 7 Australia CO, NO2, O3, PM10, PM2.5, SO2, TEMPERATURE
## 8 Austria CO, NO, NO2, O3, PM10, PM2.5, SO2, TEMPERATURE
## 9 Azerbaijan PM2.5
## 10 Bahrain PM2.5
## # ℹ 108 more rows
aq.data[aq.data$Pollutant == "O3" & aq.data$Country.Label != "",
c("Country.Label", "Pollutant", "Unit", "Value")] %>%
arrange(Country.Label) %>%
head(10)
## Country.Label Pollutant Unit Value
## 1 Andorra O3 µg/m³ 50.500
## 2 Andorra O3 µg/m³ 108.700
## 3 Andorra O3 µg/m³ 45.800
## 4 Argentina O3 µg/m³ 24.000
## 5 Argentina O3 µg/m³ 28.000
## 6 Australia O3 ppm 0.037
## 7 Australia O3 ppm 0.018
## 8 Australia O3 ppm 0.013
## 9 Australia O3 ppm 0.019
## 10 Australia O3 ppm 0.020
# Recompute ppm to ug/m3 to normalize the observations
aq.data.norm <- aq.data[aq.data$Country.Label != "", ] %>%
mutate(Value = Value * ifelse(Unit == "ppm", 1000, 1),
Unit = ifelse(Unit == "ppm", "µg/m³", Unit),
Value = ifelse(Value < 0, 0, Value))
head(aq.data.norm, 10)
## Country.Code City Location Coordinates
## 1 JP 北九州市小倉北区大門一丁目6-48 33.880833, 130.873056
## 2 JP 北九州市若松区本町三丁目13-1 33.898056, 130.81
## 3 JP 北九州市門司区大里原町12-12 33.895833, 130.935833
## 4 JP 千歳市若草4-13 42.786944, 141.605
## 5 JP 千葉市稲毛区宮野木町996-9 35.653889, 140.097778
## 6 JP 千葉市花見川区花見川4-1 35.693889, 140.098333
## 7 JP 南埼玉郡宮代町学園台4-1 36.027222, 139.716111
## 8 JP 厚木市中町1-8-11 35.443333, 139.368056
## 9 JP 取手市大字寺田5139 35.908333, 140.052778
## 10 JP 取手市大字寺田5139 35.908333, 140.052778
## Pollutant Source.Name Unit Value Last.Updated Country.Label
## 1 NO japan-soramame µg/m³ 2 2024-03-10T13:30:00+05:30 Japan
## 2 NO2 japan-soramame µg/m³ 5 2024-03-10T13:30:00+05:30 Japan
## 3 NOX japan-soramame µg/m³ 13 2024-03-10T13:30:00+05:30 Japan
## 4 NO2 japan-soramame µg/m³ 4 2024-03-10T13:30:00+05:30 Japan
## 5 NOX japan-soramame µg/m³ 3 2024-03-10T13:30:00+05:30 Japan
## 6 SO2 japan-soramame µg/m³ 1 2024-03-10T13:30:00+05:30 Japan
## 7 PM2.5 japan-soramame µg/m³ 2 2024-03-10T13:30:00+05:30 Japan
## 8 NO2 japan-soramame µg/m³ 6 2024-03-10T13:30:00+05:30 Japan
## 9 NOX japan-soramame µg/m³ 2 2024-03-10T13:30:00+05:30 Japan
## 10 NO japan-soramame µg/m³ 0 2024-03-10T13:30:00+05:30 Japan
aq.data.norm %>% group_by(Pollutant, Unit) %>%
summarise(Count = n(),
Min.Value = min(Value),
Avg.Value = mean(Value),
Max.Value = max(Value),
.groups = "keep") %>%
arrange(Pollutant)
## # A tibble: 13 × 6
## # Groups: Pollutant, Unit [13]
## Pollutant Unit Count Min.Value Avg.Value Max.Value
## <chr> <chr> <int> <dbl> <dbl> <dbl>
## 1 BC µg/m³ 135 0 0.700 5.42
## 2 CO µg/m³ 5059 0 7390. 3198676
## 3 NO µg/m³ 3863 0 6.27 1134.
## 4 NO2 µg/m³ 9927 0 43.4 72289.
## 5 NOX µg/m³ 2015 0 9.76 1136.
## 6 O3 µg/m³ 8332 0 64.2 13000
## 7 PM1 µg/m³ 124 0 22.9 170.
## 8 PM10 µg/m³ 8035 0 38.6 1985
## 9 PM2.5 µg/m³ 9510 0 22.3 10400
## 10 RELATIVEHUMIDITY % 66 11.2 54.7 86.9
## 11 SO2 µg/m³ 6892 0 21.1 49181.
## 12 TEMPERATURE c 117 0 20.8 38.9
## 13 UM003 particles/cm³ 65 84.8 5879. 24641.
# Summarize Ozone by Country
aq.data.norm[aq.data.norm$Pollutant == "O3", ] %>%
group_by(Country.Label) %>%
summarise(Avg.Value = round(mean(Value), 2),
Unit = first(Unit),
Count = n()) %>%
arrange(desc(Avg.Value))
## # A tibble: 72 × 4
## Country.Label Avg.Value Unit Count
## <chr> <dbl> <chr> <int>
## 1 Australia 195. µg/m³ 74
## 2 China 131. µg/m³ 1854
## 3 Portugal 99.3 µg/m³ 41
## 4 Malta 85.8 µg/m³ 4
## 5 Gibraltar 84.2 µg/m³ 1
## 6 Lithuania 80.8 µg/m³ 13
## 7 Slovenia 74.0 µg/m³ 2
## 8 Romania 71.6 µg/m³ 36
## 9 Finland 70.7 µg/m³ 18
## 10 Luxembourg 69.8 µg/m³ 4
## # ℹ 62 more rows
chart.data <- aq.data.norm[aq.data.norm$Pollutant == "O3", ]
plot(
chart.data$Value ~ as.factor(chart.data$Country.Code),
ylab = "Ozone", xlab = "Country", col = "royalblue",
ylim = c(0, 400),
main = "Ozone Distribution by Country"
)
grid(nx = NULL, ny = NULL)
chart.data <- aq.data.norm[aq.data.norm$Pollutant == "O3" &
aq.data.norm$Country.Label == "France", ] %>%
mutate(Last.Updated = as.Date(Last.Updated)) %>%
group_by(Country.Label, Last.Updated) %>%
summarise(Value = round(mean(Value), 2),
.groups = "keep") %>%
arrange(Last.Updated)
barplot(
chart.data$Value, names.arg = chart.data$Last.Updated,
col = "royalblue", ylim = c(0, 200),
main = paste("Ozone Pollutant for", first(chart.data$Country.Label))
)
grid(nx = NULL, ny = NULL)
chart.data[, c("Country.Label", "Last.Updated", "Value")]
## # A tibble: 75 × 3
## # Groups: Country.Label, Last.Updated [75]
## Country.Label Last.Updated Value
## <chr> <date> <dbl>
## 1 France 2016-11-29 35.5
## 2 France 2016-12-02 3.75
## 3 France 2016-12-05 14.0
## 4 France 2017-03-07 49.6
## 5 France 2017-04-12 4.6
## 6 France 2017-05-18 76
## 7 France 2017-05-31 8.03
## 8 France 2017-06-29 60.4
## 9 France 2017-06-30 53.0
## 10 France 2017-10-11 39.1
## # ℹ 65 more rows
aq.data.norm[is.na(aq.data.norm$Coordinates) == FALSE, ] %>%
group_by(Coordinates) %>%
summarise(Country.Label = first(Country.Label)) %>%
head(10)
## # A tibble: 10 × 2
## Coordinates Country.Label
## <chr> <chr>
## 1 "" United States
## 2 "-0.09548, -78.44988" Ecuador
## 3 "-0.11072, -78.49953" Ecuador
## 4 "-0.21484, -78.40326" Ecuador
## 5 "-0.2215, -78.51413" Ecuador
## 6 "-0.25192, -78.51713" Ecuador
## 7 "-0.29693, -78.45555" Ecuador
## 8 "-0.33428, -78.5534" Ecuador
## 9 "-1.23437, 36.81732" Kenya
## 10 "-1.26614, 36.66314" Kenya
# Find locations where the CO-to-O3 ratio is above 50%
loc.data <- aq.data.norm[aq.data.norm$Coordinates != "", ] %>%
group_by(Coordinates) %>%
summarise(
Country.Label = first(Country.Label),
Location = first(Location),
Count = n(),
CO.Count = sum(ifelse(Pollutant == "CO", 1, 0)),
O3.Count = sum(ifelse(Pollutant == "O3", 1, 0)),
CO = ifelse(CO.Count > 0,
sum(ifelse(Pollutant == "CO", Value, 0)) / CO.Count, 0),
O3 = ifelse(O3.Count > 0,
sum(ifelse(Pollutant == "O3", Value, 0)) / O3.Count, 0)
)
head(arrange(loc.data, desc(CO)), 10)
## # A tibble: 10 × 8
## Coordinates Country.Label Location Count CO.Count O3.Count CO O3
## <chr> <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 41.474300144863… Turkey TR 6 1 1 3.20e6 4.28
## 2 41.06596, 28.99… Turkey İstanbu… 3 1 0 2.73e6 0
## 3 36.714140379538… Turkey Hatay -… 5 1 1 2.08e6 141.
## 4 40.354999946317… Turkey Seyyar … 6 1 1 1.98e6 3.93
## 5 41.114296050854… Turkey İstanbu… 5 1 0 1.84e6 0
## 6 41.724462848366… Turkey Kırklar… 6 1 1 1.60e6 58.5
## 7 40.8578714, 29.… Turkey İstanbu… 6 1 1 1.28e6 12.0
## 8 41.02705, 29.02… Turkey İstanbu… 3 1 0 1.24e6 0
## 9 31.735555555556… Mexico Canales… 2 1 1 1.24e6 28.3
## 10 41.063062, 29.0… Turkey İstanbu… 6 1 1 1.08e6 8.09
co.values <- loc.data$CO
o3.values <- loc.data$O3
plot(
o3.values, co.values,
col = "royalblue",
main = "Carbon Monoxide vs Ozone \n Pollutant Distribution",
xlab = "O3", ylab = "CO",
xlim = c(0, 2000), ylim = c(0, 5000)
)
grid(nx = NULL, ny = NULL)
chart.data <- aq.data.norm[aq.data.norm$Pollutant == "O3" &
aq.data.norm$Country.Label == "Mexico", ] %>%
mutate(Last.Updated = as.Date(Last.Updated)) %>%
group_by(Country.Label, Last.Updated) %>%
summarise(
Value = round(mean(Value), 2),
.groups = "keep"
) %>%
arrange(Last.Updated)
t.test(chart.data$Value, mu = 50, s = 10)
##
## One Sample t-test
##
## data: chart.data$Value
## t = -2.0951, df = 68, p-value = 0.03989
## alternative hypothesis: true mean is not equal to 50
## 95 percent confidence interval:
## 19.75990 49.26329
## sample estimates:
## mean of x
## 34.51159
varTest(
chart.data$Value,
alternative = "greater",
conf.level = 0.95,
sigma.squared = (sd(chart.data$Value) ^ 2)
)
## $statistic
## Chi-Squared
## 68
##
## $parameters
## df
## 68
##
## $p.value
## [1] 0.4771905
##
## $estimate
## variance
## 3770.884
##
## $null.value
## variance
## 3770.884
##
## $alternative
## [1] "greater"
##
## $method
## [1] "Chi-Squared Test on Variance"
##
## $data.name
## [1] "chart.data$Value"
##
## $conf.int
## LCL UCL
## 2905.605 Inf
## attr(,"conf.level")
## [1] 0.95
##
## attr(,"class")
## [1] "htestEnvStats"
library(tidyterra)
##
## Attaching package: 'tidyterra'
## The following object is masked from 'package:stats':
##
## filter
library(terra)
## terra 1.8.93
##
## Attaching package: 'terra'
## The following object is masked from 'package:tidyr':
##
## extract
library(sf)
## Linking to GEOS 3.13.1, GDAL 3.11.4, PROJ 9.7.0; sf_use_s2() is TRUE
countries <- vect("world-ash-ms.geojson") %>%
subset(
.$iso_a3 != "ATA",
select = c("continent", "admin", "iso_a2", "geometry")
)
countries[countries$continent == "Seven seas (open ocean)", c("continent")] <-
"Seven Seas"
countries$iso_a2[countries$admin == "France"] <- "FR"
head(countries, 10)
## continent admin iso_a2
## 1 North America Costa Rica CR
## 2 North America Nicaragua NI
## 3 North America Saint Martin MF
## 4 North America Sint Maarten SX
## 5 North America Haiti HT
## 6 North America Dominican Republic DO
## 7 North America El Salvador SV
## 8 North America Guatemala GT
## 9 North America Cuba CU
## 10 North America Honduras HN
# Convert "Coordinates" attribute into Latitude/Longitude vector
coords <-
aq.data.norm[aq.data.norm$Coordinates != "", ] %>%
separate_wider_delim(Coordinates, delim = ",",
names = c("Latitude", "Longitude"))
longitude <- as.numeric(coords$Longitude)
latitude <- as.numeric(coords$Latitude)
coords <- cbind(longitude, latitude)
head(coords)
## longitude latitude
## [1,] 130.8731 33.88083
## [2,] 130.8100 33.89806
## [3,] 130.9358 33.89583
## [4,] 141.6050 42.78694
## [5,] 140.0978 35.65389
## [6,] 140.0983 35.69389
# Create a tidyterra SpatVector data frame
spat.data <- aq.data.norm[aq.data.norm$Coordinates != "", ]
spat.data$geometry <- paste(
'POINT(', longitude, ' ', latitude, ')',
sep = ""
)
aq.data.spatial <- as_spatvector(
spat.data,
geom = "geometry",
crs = "EPSG:4326"
)
head(aq.data.spatial, 5)
## Country.Code City Location Coordinates
## 1 JP 北九州市小倉北区大門一丁目6-48 33.880833, 130.873056
## 2 JP 北九州市若松区本町三丁目13-1 33.898056, 130.81
## 3 JP 北九州市門司区大里原町12-12 33.895833, 130.935833
## 4 JP 千歳市若草4-13 42.786944, 141.605
## 5 JP 千葉市稲毛区宮野木町996-9 35.653889, 140.097778
## Pollutant Source.Name Unit Value Last.Updated Country.Label
## 1 NO japan-soramame µg/m³ 2 2024-03-10T13:30:00+05:30 Japan
## 2 NO2 japan-soramame µg/m³ 5 2024-03-10T13:30:00+05:30 Japan
## 3 NOX japan-soramame µg/m³ 13 2024-03-10T13:30:00+05:30 Japan
## 4 NO2 japan-soramame µg/m³ 4 2024-03-10T13:30:00+05:30 Japan
## 5 NOX japan-soramame µg/m³ 3 2024-03-10T13:30:00+05:30 Japan
map.data <- aq.data.spatial[
aq.data.spatial$Pollutant == "O3" & aq.data.spatial$Value > 0,
] %>%
arrange(Value)
quantile(map.data$Value)
## 0% 25% 50% 75% 100%
## 0.008539926 29.455000000 49.974028000 78.652500000 13000.000000000
# Map of Ozone (O3) readings
ggplot() +
geom_spatvector(
data = countries,
fill = "lightgray"
) +
geom_spatvector(
data = map.data,
mapping = aes(col = Value),
size = 0.75
) +
scale_color_binned(
palette = "viridis",
breaks = c(0, 30, 50, 80, Inf),
labels = c("0", "30", "50", "80", "Extreme")
) +
labs(
title = "Ozone Levels by Location"
) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5)
)
data.frame(map.data[map.data$Value > 50, ]) %>%
arrange(desc(Value)) %>%
head(25)
## Country.Code City Location
## 1 AU Canberra Civic
## 2 TR Hatay Hatay - Millet Bahçesi
## 3 PT Braga PT01042
## 4 CL Los Vientos
## 5 IN Karve Road, Pune - MPCB
## 6 TH Khlong Chan, Bang kapi
## 7 TR Ankara Ankara - Sincan
## 8 MX Primaria Julián Gascón Mercado
## 9 TR Karabük Kardemir 2
## 10 IN Rabindra Bharati University, Kolkata - WBSPCB
## 11 CN 三瓦窑
## 12 CN 华阳
## 13 CN 东风化工厂
## 14 CN 人民公园
## 15 CN 滨江
## 16 CN 三金集团
## 17 CL Huasco EME F
## 18 CN 消防大队
## 19 CN 莆田园
## 20 CN 和睦小学
## 21 CN 科学城
## 22 CN 浙江农大
## 23 CN 兰炼宾馆
## 24 CN 金胜
## 25 CN 城厢镇
## Coordinates Pollutant Source.Name Unit
## 1 -35.285399, 149.131536 O3 Australia - ACT µg/m³
## 2 36.2113739258692, 36.1587317640839 O3 Turkiye µg/m³
## 3 41.569444, -8.456944 O3 EEA Portugal µg/m³
## 4 -32.833639, -70.99693 O3 Chile - SINCA µg/m³
## 5 18.5011743, 73.8165527 O3 caaqm µg/m³
## 6 13.78152, 100.636548 O3 Thailand µg/m³
## 7 39.953072, 32.581754 O3 Turkiye µg/m³
## 8 21.509805555556, -104.91018055555999 O3 Sinaica Mexico µg/m³
## 9 41.193515, 32.627684 O3 Turkey µg/m³
## 10 22.627875, 88.3804 O3 CPCB µg/m³
## 11 30.5767, 104.0594 O3 ChinaAQIData µg/m³
## 12 30.522500000000004, 104.0578 O3 ChinaAQIData µg/m³
## 13 36.8406, 118.0253 O3 ChinaAQIData µg/m³
## 14 36.81029999999999, 118.0414 O3 ChinaAQIData µg/m³
## 15 30.2111, 120.20720000000001 O3 ChinaAQIData µg/m³
## 16 36.805, 117.8661 O3 ChinaAQIData µg/m³
## 17 -28.466423001884, -71.221554279327 O3 Chile - SINCA µg/m³
## 18 30.286400000000004, 120.1556 O3 ChinaAQIData µg/m³
## 19 36.8194, 118.30920000000002 O3 ChinaAQIData µg/m³
## 20 30.311900000000005, 120.11969999999998 O3 ChinaAQIData µg/m³
## 21 30.4072, 104.07889999999999 O3 ChinaAQIData µg/m³
## 22 30.2692, 120.1903 O3 ChinaAQIData µg/m³
## 23 36.10309999999999, 103.63099999999999 O3 ChinaAQIData µg/m³
## 24 37.78199999999999, 112.4701 O3 ChinaAQIData µg/m³
## 25 30.181899999999995, 120.26970000000001 O3 ChinaAQIData µg/m³
## Value Last.Updated Country.Label
## 1 13000.000 2024-01-30T03:30:00+05:30 Australia
## 2 2004.076 2023-03-23T19:30:00+05:30 Turkey
## 3 1996.000 2023-06-15T10:30:00+05:30 Portugal
## 4 983.100 2016-02-16T23:30:00+05:30 Chile
## 5 760.000 2022-10-10T16:45:00+05:30 India
## 6 667.000 2020-07-16T12:15:00+05:30 Thailand
## 7 504.000 2018-03-23T13:30:00+05:30 Turkey
## 8 500.000 2021-09-22T22:30:00+05:30 Mexico
## 9 479.000 2017-12-02T17:30:00+05:30 Turkey
## 10 433.310 2017-10-20T05:00:00+05:30 India
## 11 333.000 2021-08-09T16:30:00+05:30 China
## 12 329.000 2021-08-09T16:30:00+05:30 China
## 13 324.000 2021-08-09T16:30:00+05:30 China
## 14 323.000 2021-08-09T16:30:00+05:30 China
## 15 308.000 2021-08-09T16:30:00+05:30 China
## 16 303.000 2021-08-09T16:30:00+05:30 China
## 17 302.740 2021-08-21T02:30:00+05:30 Chile
## 18 302.000 2021-08-09T16:30:00+05:30 China
## 19 295.000 2021-08-09T16:30:00+05:30 China
## 20 288.000 2021-08-09T16:30:00+05:30 China
## 21 287.000 2021-08-09T16:30:00+05:30 China
## 22 280.000 2021-08-09T16:30:00+05:30 China
## 23 274.000 2021-08-09T16:30:00+05:30 China
## 24 266.000 2021-08-09T16:30:00+05:30 China
## 25 264.000 2021-08-09T16:30:00+05:30 China
plot(
map.data$Value ~ as.factor(map.data$Country.Code),
ylim = c(0, 300), col = "royalblue",
ylab = "Ozone", xlab = "Country",
main = "Distribution of Ozone Pollutant by Country"
)
grid(nx = NULL, ny = NULL)
# Visualize the CO-to-O3 ratio
loc.data$Pollutant_Ratio <- ifelse(loc.data$O3 > 0, loc.data$CO / loc.data$O3, 0)
map.data <-
subset(
loc.data, O3 > 0
) %>%
merge(
aq.data.spatial[aq.data.spatial$Pollutant == "O3", c("Coordinates")], .,
by.x = "Coordinates",
by.y = "Coordinates",
all.x = FALSE
) %>%
arrange(Pollutant_Ratio)
head(map.data)
## Coordinates Country.Label Location Count CO.Count
## 1 21.919917, -102.276083 Mexico NTE 6 0
## 2 45.930299999999995, 8.5655 Italy NET.IT224A 2 0
## 3 43.310832999999995, -0.391111 France NET-FR058A 6 0
## 4 43.096943, -0.03888900000000001 France NET-FR068A 5 0
## 5 45.88999999999999, 0.9 France NET-FR058A 5 0
## 6 43.3987241550087, 6.07610576710319 France FR03067 2 0
## O3.Count CO O3 Pollutant_Ratio
## 1 1 0 81.0 0
## 2 1 0 41.0 0
## 3 1 0 26.4 0
## 4 1 0 86.4 0
## 5 1 0 7.2 0
## 6 1 0 62.5 0
ggplot() +
geom_spatvector(
data = countries,
fill = "lightgray"
) +
geom_spatvector(
data = map.data,
mapping = aes(col = Pollutant_Ratio),
size = 0.75
) +
scale_color_binned(
"CO/O3",
palette = "viridis",
breaks = c(0, .2, .5, .7, 1)
) +
labs(
title = "Carbon Monoxide to Ozone Ratio by Location"
) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5)
)
# Visualize Average Ozone levels per country
o3.data <-
data.frame(
aq.data.spatial[aq.data.spatial$Pollutant == "O3", ]
) %>%
group_by(
Country.Code,
Coordinates
) %>%
summarise(
Max.Value = max(Value),
Country.Label = first(Country.Label),
.groups = "keep"
) %>%
group_by(Country.Code) %>%
summarise(
AvgValue = mean(Max.Value),
Country.Label = first(Country.Label)
)
map.data <- merge(
countries[, c("admin", "iso_a2")],
o3.data,
by.x = "iso_a2",
by.y = "Country.Code"
)
head(map.data)
## iso_a2 admin AvgValue Country.Label
## 1 GT Guatemala 0.00000 Guatemala
## 2 US United States of America 43.91099 United States
## 3 CA Canada 30.06589 Canada
## 4 MX Mexico 29.88481 Mexico
## 5 TT Trinidad and Tobago 15.78576 Trinidad and Tobago
## 6 PR Puerto Rico 21.33333 Puerto Rico
ggplot() +
geom_spatvector(
data = countries,
fill = "lightgray"
) +
geom_spatvector(
data = map.data,
mapping = aes(fill = AvgValue)
) +
scale_fill_viridis_c(
"Avg.O3"
) +
labs(
title = "Average Ozone Levels by Country"
) +
theme_bw() +
theme(
plot.title = element_text(hjust = 0.5)
)