This report analyzes air quality data using time-series plots, seasonal trends, and correlations.
library(ggplot2)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.3
## Warning: package 'readr' was built under R version 4.5.3
## Warning: package 'forcats' was built under R version 4.5.3
## Warning: package 'lubridate' was built under R version 4.5.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ lubridate 1.9.5 ✔ tibble 3.3.1
## ✔ purrr 1.2.1 ✔ tidyr 1.3.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.5.3
## corrplot 0.95 loaded
library(magrittr)
## Warning: package 'magrittr' was built under R version 4.5.3
##
## Attaching package: 'magrittr'
##
## The following object is masked from 'package:purrr':
##
## set_names
##
## The following object is masked from 'package:tidyr':
##
## extract
data <- read.csv("C:/Users/Vigne/Downloads/air+quality/AirQualityUCI.csv", sep=";", dec=",")
data <- data[,1:13]
data$Datetime <- lubridate::dmy_hms(paste(data$Date, data$Time))
## Warning: 114 failed to parse.
data <- na.omit(data)
data$Month <- lubridate::month(data$Datetime, label = TRUE)
pollutants <- data %>%
select(Datetime, CO.GT., NO2.GT., NOx.GT.) %>%
pivot_longer(-Datetime)
corr_data <- data %>%
select(CO.GT., NO2.GT., NOx.GT., PT08.S1.CO., PT08.S2.NMHC.)
corr_matrix <- cor(corr_data, use = "complete.obs")
ggplot(data, aes(Datetime, CO.GT.)) + geom_line()
pollutants <- data %>%
select(Datetime, CO.GT., NO2.GT., NOx.GT.) %>%
pivot_longer(-Datetime)
ggplot(pollutants, aes(Datetime, value, color=name)) + geom_line()
ggplot(data, aes(Month, CO.GT.)) + geom_boxplot()
## Correlation Heatmap
corr_data <- data %>%
select(CO.GT., NO2.GT., NOx.GT., PT08.S1.CO., PT08.S2.NMHC.)
corr_matrix <- cor(corr_data, use="complete.obs")
corrplot(corr_matrix, method="color")