#Some libraries
library(rvest)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(viridis)
## Loading required package: viridisLite
library(ggrepel)
## Loading required package: ggplot2
library(readr)
## 
## Attaching package: 'readr'
## The following object is masked from 'package:rvest':
## 
##     guess_encoding
library(ggthemes)
library(ggplot2)
#import data
Delhi_Pollution <- read.csv("delhi pollution.csv")
head(Delhi_Pollution)
##   Date Month Year Holidays_Count Days  PM2.5   PM10    NO2   SO2   CO Ozone AQI
## 1    1     1 2021              0    5 408.80 442.42 160.61 12.95 2.77 43.19 462
## 2    2     1 2021              0    6 404.04 561.95  52.85  5.18 2.60 16.43 482
## 3    3     1 2021              1    7 225.07 239.04 170.95 10.93 1.40 44.29 263
## 4    4     1 2021              0    1  89.55 132.08 153.98 10.42 1.01 49.19 207
## 5    5     1 2021              0    2  54.06  55.54 122.66  9.70 0.64 48.88 149
## 6    6     1 2021              0    3 155.59 180.14 142.71 10.29 1.18 44.47 252
cls <- as.data.frame(sapply(Delhi_Pollution,FUN = class))
cls
##                sapply(Delhi_Pollution, FUN = class)
## Date                                        integer
## Month                                       integer
## Year                                        integer
## Holidays_Count                              integer
## Days                                        integer
## PM2.5                                       numeric
## PM10                                        numeric
## NO2                                         numeric
## SO2                                         numeric
## CO                                          numeric
## Ozone                                       numeric
## AQI                                         integer
summary(Delhi_Pollution)
##       Date           Month             Year      Holidays_Count  
##  Min.   : 1.00   Min.   : 1.000   Min.   :2021   Min.   :0.0000  
##  1st Qu.: 8.00   1st Qu.: 4.000   1st Qu.:2022   1st Qu.:0.0000  
##  Median :16.00   Median : 7.000   Median :2023   Median :0.0000  
##  Mean   :15.73   Mean   : 6.523   Mean   :2023   Mean   :0.1896  
##  3rd Qu.:23.00   3rd Qu.:10.000   3rd Qu.:2024   3rd Qu.:0.0000  
##  Max.   :31.00   Max.   :12.000   Max.   :2024   Max.   :1.0000  
##       Days           PM2.5              PM10              NO2        
##  Min.   :1.000   Min.   :   0.05   Min.   :   9.69   Min.   :  2.16  
##  1st Qu.:2.000   1st Qu.:  41.28   1st Qu.: 115.11   1st Qu.: 17.28  
##  Median :4.000   Median :  72.06   Median : 199.80   Median : 30.49  
##  Mean   :4.001   Mean   :  90.77   Mean   : 218.22   Mean   : 37.18  
##  3rd Qu.:6.000   3rd Qu.: 118.50   3rd Qu.: 297.75   3rd Qu.: 45.01  
##  Max.   :7.000   Max.   :1000.00   Max.   :1000.00   Max.   :433.98  
##       SO2               CO            Ozone             AQI       
##  Min.   :  1.21   Min.   :0.270   Min.   :  2.70   Min.   : 19.0  
##  1st Qu.:  7.71   1st Qu.:0.610   1st Qu.: 24.10   1st Qu.:108.0  
##  Median : 15.43   Median :0.850   Median : 32.47   Median :189.0  
##  Mean   : 20.10   Mean   :1.026   Mean   : 36.34   Mean   :202.2  
##  3rd Qu.: 26.62   3rd Qu.:1.240   3rd Qu.: 45.73   3rd Qu.:284.0  
##  Max.   :113.40   Max.   :4.700   Max.   :115.87   Max.   :500.0
sum(is.na(Delhi_Pollution))
## [1] 0
df_2023 <- Delhi_Pollution[Delhi_Pollution$Year == 2023, ]
df_2023$Date <- as.Date(paste("2023", df_2023$Month, df_2023$Date, sep = "-"))
ggplot(df_2023, aes(x = Date, y = PM2.5)) +
  geom_line() +
  scale_x_date(date_labels = "%b", date_breaks = "1 month") + # Display month names
  labs(title = "PM2.5 Levels in 2023",
       x = "Month",
       y = "PM2.5 Concentration (µg/m³)")