Introduction

This report analyzes air quality data using time-series plots, seasonal trends, and correlations.

Load Libraries

library(ggplot2)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.3
## Warning: package 'readr' was built under R version 4.5.3
## Warning: package 'forcats' was built under R version 4.5.3
## Warning: package 'lubridate' was built under R version 4.5.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ lubridate 1.9.5     ✔ tibble    3.3.1
## ✔ purrr     1.2.1     ✔ tidyr     1.3.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.5.3
## corrplot 0.95 loaded
library(magrittr)
## Warning: package 'magrittr' was built under R version 4.5.3
## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
data <- read.csv("C:/Users/Vigne/Downloads/air+quality/AirQualityUCI.csv", sep=";", dec=",")
data <- data[,1:13]

data$Datetime <- lubridate::dmy_hms(paste(data$Date, data$Time))
## Warning: 114 failed to parse.
data <- na.omit(data)

data$Month <- lubridate::month(data$Datetime, label = TRUE)

pollutants <- data %>%
  select(Datetime, CO.GT., NO2.GT., NOx.GT.) %>%
  pivot_longer(-Datetime)

corr_data <- data %>%
  select(CO.GT., NO2.GT., NOx.GT., PT08.S1.CO., PT08.S2.NMHC.)

corr_matrix <- cor(corr_data, use = "complete.obs")

Time Series

ggplot(data, aes(Datetime, CO.GT.)) + geom_line()

Multi-line Plot

pollutants <- data %>%
  select(Datetime, CO.GT., NO2.GT., NOx.GT.) %>%
  pivot_longer(-Datetime)

ggplot(pollutants, aes(Datetime, value, color=name)) + geom_line()

Seasonal Trend

ggplot(data, aes(Month, CO.GT.)) + geom_boxplot()

## Correlation Heatmap

corr_data <- data %>%
  select(CO.GT., NO2.GT., NOx.GT., PT08.S1.CO., PT08.S2.NMHC.)

corr_matrix <- cor(corr_data, use="complete.obs")

corrplot(corr_matrix, method="color")