BATCH 21

Air Quality Dataset

Loading data and library

library(ggplot2)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.3
## Warning: package 'readr' was built under R version 4.5.3
## Warning: package 'forcats' was built under R version 4.5.3
## Warning: package 'lubridate' was built under R version 4.5.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ lubridate 1.9.5     ✔ tibble    3.3.1
## ✔ purrr     1.2.1     ✔ tidyr     1.3.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.5.3
## corrplot 0.95 loaded
data <- read.csv("C:/Users/Vigne/Downloads/air+quality/AirQualityUCI.csv", sep=";", dec=",")
data <- data[,1:13]

data$Datetime <- lubridate::dmy_hms(paste(data$Date, data$Time))
## Warning: 114 failed to parse.
data <- na.omit(data)


data$Month <- lubridate::month(data$Datetime, label = TRUE)
pollutants <- data %>%
  dplyr::select(Datetime, CO.GT., NO2.GT., NOx.GT.) %>%
  tidyr::pivot_longer(-Datetime)
corr_data <- data %>%
  select(CO.GT., NO2.GT., NOx.GT., PT08.S1.CO., PT08.S2.NMHC.)

corr_matrix <- cor(corr_data, use = "complete.obs")

Overview

Time Series

ggplot(data, aes(Datetime, CO.GT.)) + geom_line()

Multi Pollutants

ggplot2::ggplot(pollutants, aes(Datetime, value, color = name))

Correlation

corrplot(corr_matrix, method="color")

Insights