#Some libraries
library(rvest)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(viridis)
## Loading required package: viridisLite
library(ggrepel)
## Loading required package: ggplot2
library(readr)
##
## Attaching package: 'readr'
## The following object is masked from 'package:rvest':
##
## guess_encoding
library(ggthemes)
library(ggplot2)
#import data
Delhi_Pollution <- read.csv("delhi pollution.csv")
head(Delhi_Pollution)
## Date Month Year Holidays_Count Days PM2.5 PM10 NO2 SO2 CO Ozone AQI
## 1 1 1 2021 0 5 408.80 442.42 160.61 12.95 2.77 43.19 462
## 2 2 1 2021 0 6 404.04 561.95 52.85 5.18 2.60 16.43 482
## 3 3 1 2021 1 7 225.07 239.04 170.95 10.93 1.40 44.29 263
## 4 4 1 2021 0 1 89.55 132.08 153.98 10.42 1.01 49.19 207
## 5 5 1 2021 0 2 54.06 55.54 122.66 9.70 0.64 48.88 149
## 6 6 1 2021 0 3 155.59 180.14 142.71 10.29 1.18 44.47 252
cls <- as.data.frame(sapply(Delhi_Pollution,FUN = class))
cls
## sapply(Delhi_Pollution, FUN = class)
## Date integer
## Month integer
## Year integer
## Holidays_Count integer
## Days integer
## PM2.5 numeric
## PM10 numeric
## NO2 numeric
## SO2 numeric
## CO numeric
## Ozone numeric
## AQI integer
summary(Delhi_Pollution)
## Date Month Year Holidays_Count
## Min. : 1.00 Min. : 1.000 Min. :2021 Min. :0.0000
## 1st Qu.: 8.00 1st Qu.: 4.000 1st Qu.:2022 1st Qu.:0.0000
## Median :16.00 Median : 7.000 Median :2023 Median :0.0000
## Mean :15.73 Mean : 6.523 Mean :2023 Mean :0.1896
## 3rd Qu.:23.00 3rd Qu.:10.000 3rd Qu.:2024 3rd Qu.:0.0000
## Max. :31.00 Max. :12.000 Max. :2024 Max. :1.0000
## Days PM2.5 PM10 NO2
## Min. :1.000 Min. : 0.05 Min. : 9.69 Min. : 2.16
## 1st Qu.:2.000 1st Qu.: 41.28 1st Qu.: 115.11 1st Qu.: 17.28
## Median :4.000 Median : 72.06 Median : 199.80 Median : 30.49
## Mean :4.001 Mean : 90.77 Mean : 218.22 Mean : 37.18
## 3rd Qu.:6.000 3rd Qu.: 118.50 3rd Qu.: 297.75 3rd Qu.: 45.01
## Max. :7.000 Max. :1000.00 Max. :1000.00 Max. :433.98
## SO2 CO Ozone AQI
## Min. : 1.21 Min. :0.270 Min. : 2.70 Min. : 19.0
## 1st Qu.: 7.71 1st Qu.:0.610 1st Qu.: 24.10 1st Qu.:108.0
## Median : 15.43 Median :0.850 Median : 32.47 Median :189.0
## Mean : 20.10 Mean :1.026 Mean : 36.34 Mean :202.2
## 3rd Qu.: 26.62 3rd Qu.:1.240 3rd Qu.: 45.73 3rd Qu.:284.0
## Max. :113.40 Max. :4.700 Max. :115.87 Max. :500.0
sum(is.na(Delhi_Pollution))
## [1] 0
df_2023 <- Delhi_Pollution[Delhi_Pollution$Year == 2023, ]
df_2023$Date <- as.Date(paste("2023", df_2023$Month, df_2023$Date, sep = "-"))
ggplot(df_2023, aes(x = Date, y = PM2.5)) +
geom_line() +
scale_x_date(date_labels = "%b", date_breaks = "1 month") + # Display month names
labs(title = "PM2.5 Levels in 2023",
x = "Month",
y = "PM2.5 Concentration (µg/m³)")
