Pollution levels in India shaves off 5.2 years from the life expectancy of an average Indian and it most acutely hits people living in the Indo-Gagentic plains, according to an assessment by University of Chicago reported by Hindustan Times on 29 June 2020.
The average PM 2.5 concentration in the National Capital Territory of Delhi was 106 micrograms per cubic metre, which can lead to loss of 9.4 life years compared to if Delhi had met the WHO guidelines for air quality.
Over 20 years, India’s annual PM 2.5 levels have increased by 42%, taking 1.8 years off the life of the average resident. A quarter of India’s population is exposed to air pollution concentrations not recorded in any other country, with 248 million people in North India on track to lose more than 8 years of life expectancy if the same pollution levels persist.
Through this project, I have analyzed the PM 2.5 level for the month of March for the years 2020, 2019, 2018 & 2017.
As the nationwide lockdown was imposed in this month so, I have tried to analyze the PM 2.5 levels for this particular month hoping that the level might have reduced this year in comparsion to last 3 years.
knitr::opts_chunk$set(fig.width=12,fig.height=8)
setwd("C:\\Users\\MARK\\Desktop\\Marwin Documents\\Coursera Courses\\AQI Project")
It lists all the files in the working directory.
list.files()
## [1] "AQIAnalysis.html" "AQIAnalysis.md" "AQIAnalysis.Rmd"
## [4] "AQIAnalysis_files" "CleanMarch.R" "Data"
## [7] "DatAn.R" "rsconnect"
library(xlsx)
## Warning: package 'xlsx' was built under R version 3.6.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.6.3
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
Dat1<-read.xlsx("./Data/AQIData2020.xlsx",sheetIndex=3,header=T)
Dat2<-read.xlsx("./Data/AQIData2019.xlsx",sheetIndex=1,header=T)
Dat3<-read.xlsx("./Data/AQIData2018.xlsx",sheetIndex=1,header=T)
Dat4<-read.xlsx("./Data/AQIData2017.xlsx",sheetIndex=1,header=T)
head(Dat1)
## PM.2.5 PM.10 O3 NO2 SO2 CO
## 1 135 128 23 33 NA 19
## 2 126 209 20 33 5 22
## 3 192 154 6 38 3 31
## 4 158 129 58 29 7 17
## 5 152 64 16 21 4 18
## 6 94 38 18 19 3 16
This in-built function gives a 6 number summary of the data which includes mean, median, quantiles, minimum, maximum.
summary(Dat1)
## PM.2.5 PM.10 O3 NO2 SO2
## Min. : 65 Min. : 33.0 Min. : 6.00 Min. : 6.0 Min. :1.000
## 1st Qu.:116 1st Qu.: 62.5 1st Qu.:13.00 1st Qu.:14.0 1st Qu.:3.000
## Median :138 Median :102.0 Median :18.00 Median :21.0 Median :4.000
## Mean :137 Mean :106.0 Mean :21.81 Mean :20.1 Mean :4.233
## 3rd Qu.:157 3rd Qu.:133.5 3rd Qu.:24.50 3rd Qu.:28.5 3rd Qu.:5.000
## Max. :240 Max. :234.0 Max. :69.00 Max. :38.0 Max. :9.000
## NA's :1
## CO
## Min. : 6.00
## 1st Qu.:12.00
## Median :17.00
## Mean :15.97
## 3rd Qu.:18.00
## Max. :31.00
##
summary(Dat2)
## PM.2.5 PM.10 O3 NO2
## Min. : 88.0 Min. : 79.0 Min. : 3.00 Min. :14.00
## 1st Qu.:134.5 1st Qu.: 99.0 1st Qu.: 9.00 1st Qu.:18.00
## Median :151.0 Median :112.0 Median :11.00 Median :20.00
## Mean :146.1 Mean :113.2 Mean :14.33 Mean :21.72
## 3rd Qu.:159.0 3rd Qu.:130.5 3rd Qu.:13.00 3rd Qu.:25.00
## Max. :182.0 Max. :142.0 Max. :76.00 Max. :40.00
## NA's :1 NA's :2
## SO2 CO
## Min. :3.000 Min. : 5.000
## 1st Qu.:4.000 1st Qu.: 7.000
## Median :4.000 Median : 8.000
## Mean :4.194 Mean : 8.129
## 3rd Qu.:5.000 3rd Qu.: 9.000
## Max. :6.000 Max. :12.000
##
summary(Dat3)
## PM.2.5 PM.10 O3 NO2
## Min. :115.0 Min. : 81.0 Min. :12.00 Min. :16.00
## 1st Qu.:143.5 1st Qu.:101.0 1st Qu.:20.00 1st Qu.:19.00
## Median :151.0 Median :117.0 Median :21.00 Median :22.00
## Mean :155.6 Mean :121.3 Mean :21.81 Mean :23.65
## 3rd Qu.:161.5 3rd Qu.:138.0 3rd Qu.:22.00 3rd Qu.:29.00
## Max. :207.0 Max. :176.0 Max. :35.00 Max. :40.00
## SO2 CO
## Min. : 5.000 Min. : 0.00
## 1st Qu.: 7.000 1st Qu.: 2.50
## Median : 9.000 Median : 7.00
## Mean : 9.903 Mean : 6.29
## 3rd Qu.:12.500 3rd Qu.: 9.00
## Max. :19.000 Max. :14.00
summary(Dat4)
## PM.2.5 PM.10 O3 NO2
## Min. : 94.0 Min. : 54.0 Min. :10.00 Min. :15.00
## 1st Qu.:137.0 1st Qu.: 95.0 1st Qu.:22.00 1st Qu.:20.50
## Median :155.0 Median :107.0 Median :24.00 Median :23.00
## Mean :151.7 Mean :115.2 Mean :24.42 Mean :23.77
## 3rd Qu.:163.5 3rd Qu.:121.0 3rd Qu.:28.00 3rd Qu.:26.00
## Max. :194.0 Max. :385.0 Max. :37.00 Max. :38.00
## SO2 CO
## Min. : 3.000 Min. : 0.0
## 1st Qu.: 4.500 1st Qu.: 9.5
## Median : 6.000 Median :10.0
## Mean : 5.903 Mean :11.1
## 3rd Qu.: 7.000 3rd Qu.:13.0
## Max. :10.000 Max. :18.0
This function will help us to restructure the data i.e. add new columns, rename columns, etc.
Dat1<-rename(Dat1,PM25=PM.2.5,PM10=PM.10)
colnames(Dat1)
## [1] "PM25" "PM10" "O3" "NO2" "SO2" "CO"
Dat2<-rename(Dat2,PM25=PM.2.5,PM10=PM.10)
colnames(Dat2)
## [1] "PM25" "PM10" "O3" "NO2" "SO2" "CO"
Dat3<-rename(Dat3,PM25=PM.2.5,PM10=PM.10)
colnames(Dat3)
## [1] "PM25" "PM10" "O3" "NO2" "SO2" "CO"
Dat4<-rename(Dat4,PM25=PM.2.5,PM10=PM.10)
colnames(Dat4)
## [1] "PM25" "PM10" "O3" "NO2" "SO2" "CO"
This column will give a measure of PM 2.5 around central tendency i.e mean.
Dat1<-mutate(Dat1,PM25DTrend=PM25-mean(PM25,na.rm=TRUE))
head(Dat1)
## PM25 PM10 O3 NO2 SO2 CO PM25DTrend
## 1 135 128 23 33 NA 19 -1.967742
## 2 126 209 20 33 5 22 -10.967742
## 3 192 154 6 38 3 31 55.032258
## 4 158 129 58 29 7 17 21.032258
## 5 152 64 16 21 4 18 15.032258
## 6 94 38 18 19 3 16 -42.967742
Dat2<-mutate(Dat2,PM25DTrend=PM25-mean(PM25,na.rm=TRUE))
head(Dat2)
## PM25 PM10 O3 NO2 SO2 CO PM25DTrend
## 1 182 110 8 20 4 9 35.903226
## 2 159 112 3 29 4 9 12.903226
## 3 168 79 5 20 3 8 21.903226
## 4 135 117 18 14 3 9 -11.096774
## 5 176 93 76 15 3 7 29.903226
## 6 153 85 8 16 3 9 6.903226
Dat3<-mutate(Dat3,PM25DTrend=PM25-mean(PM25,na.rm=TRUE))
head(Dat3)
## PM25 PM10 O3 NO2 SO2 CO PM25DTrend
## 1 199 133 22 29 7 12 43.354839
## 2 168 161 22 23 8 14 12.354839
## 3 192 107 19 22 7 9 36.354839
## 4 165 102 18 20 6 8 9.354839
## 5 143 87 21 19 5 7 -12.645161
## 6 136 99 21 16 5 7 -19.645161
Dat4<-mutate(Dat4,PM25DTrend=PM25-mean(PM25,na.rm=TRUE))
head(Dat4)
## PM25 PM10 O3 NO2 SO2 CO PM25DTrend
## 1 174 135 16 38 9 14 22.322581
## 2 179 113 24 29 8 6 27.322581
## 3 174 94 20 26 10 9 22.322581
## 4 134 89 23 21 5 10 -17.677419
## 5 144 107 23 20 7 10 -7.677419
## 6 161 110 25 22 7 13 9.322581
It will factor days based on the PM 2.5 level.
PM25Lvl1<-array()
PM25Lvl2<-array()
PM25Lvl3<-array()
PM25Lvl4<-array()
i<-numeric()
for(i in 1:length(Dat1$PM25))
{
if(Dat1$PM25[i]<12) {
PM25Lvl1[i]<-c("Good")}
else if(12.1<(Dat1$PM25[i]) && (Dat1$PM25[i])<35.4) {
PM25Lvl1[i]<-c("Moderate")}
else if(35.5<(Dat1$PM25[i]) && (Dat1$PM25[i])<55.4) {
PM25Lvl1[i]<-c("UnhealthySG")}
else if(55.5<(Dat1$PM25[i]) && (Dat1$PM25[i])<150.4) {
PM25Lvl1[i]<-c("Unhealthy")}
else if(150.5<(Dat1$PM25[i]) && (Dat1$PM25[i])<250.4) {
PM25Lvl1[i]<-c("VeryUnhealthy")}
else if(Dat1$PM25[i]>250.5){
PM25Lvl1[i]<-c("Hazardous")}
}
for(i in 1:length(Dat2$PM25))
{
if(Dat2$PM25[i]<12) {
PM25Lvl2[i]<-c("Good")}
else if(12.1<(Dat2$PM25[i]) && (Dat2$PM25[i])<35.4) {
PM25Lvl2[i]<-c("Moderate")}
else if(35.5<(Dat2$PM25[i]) && (Dat2$PM25[i])<55.4) {
PM25Lvl2[i]<-c("UnhealthySG")}
else if(55.5<(Dat2$PM25[i]) && (Dat2$PM25[i])<150.4) {
PM25Lvl2[i]<-c("Unhealthy")}
else if(150.5<(Dat2$PM25[i]) && (Dat2$PM25[i])<250.4) {
PM25Lvl2[i]<-c("VeryUnhealthy")}
else if(Dat2$PM25[i]>250.5){
PM25Lvl2[i]<-c("Hazardous")}
}
for(i in 1:length(Dat3$PM25))
{
if(Dat3$PM25[i]<12) {
PM25Lvl3[i]<-c("Good")}
else if(12.1<(Dat3$PM25[i]) && (Dat3$PM25[i])<35.4) {
PM25Lvl3[i]<-c("Moderate")}
else if(35.5<(Dat3$PM25[i]) && (Dat3$PM25[i])<55.4) {
PM25Lvl3[i]<-c("UnhealthySG")}
else if(55.5<(Dat3$PM25[i]) && (Dat3$PM25[i])<150.4) {
PM25Lvl3[i]<-c("Unhealthy")}
else if(150.5<(Dat3$PM25[i]) && (Dat3$PM25[i])<250.4) {
PM25Lvl3[i]<-c("VeryUnhealthy")}
else if(Dat3$PM25[i]>250.5) {
PM25Lvl3[i]<-c("Hazardous")}
}
for(i in 1:length(Dat4$PM25))
{
if(Dat4$PM25[i]<12) {
PM25Lvl4[i]<-c("Good")}
else if(12.1<(Dat4$PM25[i]) && (Dat4$PM25[i])<35.4) {
PM25Lvl4[i]<-c("Moderate")}
else if(35.5<(Dat4$PM25[i]) && (Dat4$PM25[i])<55.4) {
PM25Lvl4[i]<-c("UnhealthySG")}
else if(55.5<(Dat4$PM25[i]) && (Dat4$PM25[i])<150.4) {
PM25Lvl4[i]<-c("Unhealthy")}
else if(150.5<(Dat4$PM25[i]) && (Dat4$PM25[i])<250.4) {
PM25Lvl4[i]<-c("VeryUnhealthy")}
else if(Dat4$PM25[i]>250.5){
PM25Lvl4[i]<-c("Hazardous")}
}
Dat1<-mutate(Dat1,PM25Lvl1=as.factor(PM25Lvl1))
Dat1
## PM25 PM10 O3 NO2 SO2 CO PM25DTrend PM25Lvl1
## 1 135 128 23 33 NA 19 -1.967742 Unhealthy
## 2 126 209 20 33 5 22 -10.967742 Unhealthy
## 3 192 154 6 38 3 31 55.032258 VeryUnhealthy
## 4 158 129 58 29 7 17 21.032258 VeryUnhealthy
## 5 152 64 16 21 4 18 15.032258 VeryUnhealthy
## 6 94 38 18 19 3 16 -42.967742 Unhealthy
## 7 65 68 13 20 3 14 -71.967742 Unhealthy
## 8 106 137 11 25 5 21 -30.967742 Unhealthy
## 9 172 121 12 22 3 18 35.032258 VeryUnhealthy
## 10 139 234 7 22 1 17 2.032258 Unhealthy
## 11 240 83 17 14 4 14 103.032258 VeryUnhealthy
## 12 133 120 13 19 5 17 -3.967742 Unhealthy
## 13 144 118 15 22 3 12 7.032258 Unhealthy
## 14 127 65 26 19 4 18 -9.967742 Unhealthy
## 15 115 98 16 21 4 18 -21.967742 Unhealthy
## 16 144 145 13 29 2 15 7.032258 Unhealthy
## 17 155 130 14 28 3 16 18.032258 VeryUnhealthy
## 18 154 150 8 30 1 24 17.032258 VeryUnhealthy
## 19 135 171 9 30 4 17 -1.967742 Unhealthy
## 20 180 122 20 21 9 11 43.032258 VeryUnhealthy
## 21 168 170 17 32 6 20 31.032258 VeryUnhealthy
## 22 171 101 69 18 6 18 34.032258 VeryUnhealthy
## 23 156 80 42 15 7 13 19.032258 VeryUnhealthy
## 24 138 102 37 14 8 18 1.032258 Unhealthy
## 25 175 58 22 9 6 10 38.032258 VeryUnhealthy
## 26 117 55 18 7 3 12 -19.967742 Unhealthy
## 27 120 33 18 6 5 11 -16.967742 Unhealthy
## 28 79 35 18 6 3 10 -57.967742 Unhealthy
## 29 75 54 28 6 3 10 -61.967742 Unhealthy
## 30 88 54 35 8 3 12 -48.967742 Unhealthy
## 31 93 61 37 7 4 6 -43.967742 Unhealthy
Dat2<-mutate(Dat2,PM25Lvl2=as.factor(PM25Lvl2))
Dat2
## PM25 PM10 O3 NO2 SO2 CO PM25DTrend PM25Lvl2
## 1 182 110 8 20 4 9 35.90322581 VeryUnhealthy
## 2 159 112 3 29 4 9 12.90322581 VeryUnhealthy
## 3 168 79 5 20 3 8 21.90322581 VeryUnhealthy
## 4 135 117 18 14 3 9 -11.09677419 Unhealthy
## 5 176 93 76 15 3 7 29.90322581 VeryUnhealthy
## 6 153 85 8 16 3 9 6.90322581 VeryUnhealthy
## 7 122 121 52 19 4 9 -24.09677419 Unhealthy
## 8 168 113 NA 21 4 9 21.90322581 VeryUnhealthy
## 9 159 104 5 19 4 7 12.90322581 VeryUnhealthy
## 10 141 99 11 18 4 7 -5.09677419 Unhealthy
## 11 139 129 11 27 5 8 -7.09677419 Unhealthy
## 12 146 95 12 19 4 7 -0.09677419 Unhealthy
## 13 113 140 10 31 6 8 -33.09677419 Unhealthy
## 14 154 142 9 25 4 10 7.90322581 VeryUnhealthy
## 15 181 102 13 16 4 7 34.90322581 VeryUnhealthy
## 16 139 97 8 19 4 6 -7.09677419 Unhealthy
## 17 141 117 8 21 4 9 -5.09677419 Unhealthy
## 18 151 120 9 21 4 8 4.90322581 VeryUnhealthy
## 19 157 132 10 22 4 11 10.90322581 VeryUnhealthy
## 20 155 134 10 NA 4 9 8.90322581 VeryUnhealthy
## 21 144 134 11 NA 5 8 -2.09677419 Unhealthy
## 22 175 92 12 40 5 6 28.90322581 VeryUnhealthy
## 23 88 95 13 15 4 6 -58.09677419 Unhealthy
## 24 111 99 12 18 5 6 -35.09677419 Unhealthy
## 25 115 101 11 19 5 7 -31.09677419 Unhealthy
## 26 126 107 13 21 4 7 -20.09677419 Unhealthy
## 27 134 111 14 23 4 8 -12.09677419 Unhealthy
## 28 126 142 17 29 5 12 -20.09677419 Unhealthy
## 29 155 132 14 29 4 11 8.90322581 VeryUnhealthy
## 30 157 135 15 26 5 10 10.90322581 VeryUnhealthy
## 31 159 121 12 18 5 5 12.90322581 VeryUnhealthy
Dat3<-mutate(Dat3,PM25Lvl3=as.factor(PM25Lvl3))
Dat3
## PM25 PM10 O3 NO2 SO2 CO PM25DTrend PM25Lvl3
## 1 199 133 22 29 7 12 43.3548387 VeryUnhealthy
## 2 168 161 22 23 8 14 12.3548387 VeryUnhealthy
## 3 192 107 19 22 7 9 36.3548387 VeryUnhealthy
## 4 165 102 18 20 6 8 9.3548387 VeryUnhealthy
## 5 143 87 21 19 5 7 -12.6451613 Unhealthy
## 6 136 99 21 16 5 7 -19.6451613 Unhealthy
## 7 143 97 22 19 9 8 -12.6451613 Unhealthy
## 8 150 140 35 40 17 12 -5.6451613 Unhealthy
## 9 181 168 27 32 19 1 25.3548387 VeryUnhealthy
## 10 207 101 20 19 13 1 51.3548387 VeryUnhealthy
## 11 139 101 24 19 11 0 -16.6451613 Unhealthy
## 12 146 114 21 22 10 0 -9.6451613 Unhealthy
## 13 156 110 12 27 13 0 0.3548387 VeryUnhealthy
## 14 150 161 21 31 13 0 -5.6451613 Unhealthy
## 15 151 158 18 25 10 3 -4.6451613 VeryUnhealthy
## 16 145 138 32 21 13 9 -10.6451613 Unhealthy
## 17 159 117 20 19 11 7 3.3548387 VeryUnhealthy
## 18 163 81 19 18 7 0 7.3548387 VeryUnhealthy
## 19 115 93 24 25 10 5 -40.6451613 Unhealthy
## 20 139 123 28 32 12 13 -16.6451613 Unhealthy
## 21 160 176 21 29 16 10 4.3548387 VeryUnhealthy
## 22 193 113 20 21 8 8 37.3548387 VeryUnhealthy
## 23 151 85 18 16 6 7 -4.6451613 VeryUnhealthy
## 24 130 101 20 17 7 7 -25.6451613 Unhealthy
## 25 136 124 20 17 7 9 -19.6451613 Unhealthy
## 26 156 107 20 18 6 6 0.3548387 VeryUnhealthy
## 27 144 129 22 29 9 2 -11.6451613 Unhealthy
## 28 155 138 26 35 14 3 -0.6451613 VeryUnhealthy
## 29 154 117 22 19 9 6 -1.6451613 VeryUnhealthy
## 30 149 122 21 24 9 10 -6.6451613 Unhealthy
## 31 150 157 20 30 10 11 -5.6451613 Unhealthy
Dat4<-mutate(Dat4,PM25Lvl4=as.factor(PM25Lvl4))
Dat4
## PM25 PM10 O3 NO2 SO2 CO PM25DTrend PM25Lvl4
## 1 174 135 16 38 9 14 22.3225806 VeryUnhealthy
## 2 179 113 24 29 8 6 27.3225806 VeryUnhealthy
## 3 174 94 20 26 10 9 22.3225806 VeryUnhealthy
## 4 134 89 23 21 5 10 -17.6774194 Unhealthy
## 5 144 107 23 20 7 10 -7.6774194 Unhealthy
## 6 161 110 25 22 7 13 9.3225806 VeryUnhealthy
## 7 161 119 31 38 9 18 9.3225806 VeryUnhealthy
## 8 165 116 19 24 7 10 13.3225806 VeryUnhealthy
## 9 142 74 14 25 5 10 -9.6774194 Unhealthy
## 10 136 61 10 24 4 9 -15.6774194 Unhealthy
## 11 116 54 21 21 6 9 -35.6774194 Unhealthy
## 12 94 70 20 19 4 10 -57.6774194 Unhealthy
## 13 129 80 24 15 4 10 -22.6774194 Unhealthy
## 14 144 78 28 21 4 13 -7.6774194 Unhealthy
## 15 137 107 24 26 5 12 -14.6774194 Unhealthy
## 16 162 117 30 28 7 13 10.3225806 VeryUnhealthy
## 17 180 105 21 21 6 11 28.3225806 VeryUnhealthy
## 18 172 100 28 25 7 16 20.3225806 VeryUnhealthy
## 19 155 107 28 27 8 16 3.3225806 VeryUnhealthy
## 20 161 123 29 23 5 13 9.3225806 VeryUnhealthy
## 21 175 117 29 24 4 9 23.3225806 VeryUnhealthy
## 22 156 125 27 28 4 14 4.3225806 VeryUnhealthy
## 23 161 175 37 29 6 18 9.3225806 VeryUnhealthy
## 24 194 123 24 23 6 12 42.3225806 VeryUnhealthy
## 25 157 96 25 21 3 10 5.3225806 VeryUnhealthy
## 26 137 140 24 19 5 10 -14.6774194 Unhealthy
## 27 151 107 27 17 5 8 -0.6774194 VeryUnhealthy
## 28 145 130 25 20 7 0 -6.6774194 Unhealthy
## 29 140 114 24 18 3 9 -11.6774194 Unhealthy
## 30 133 99 26 20 6 10 -18.6774194 Unhealthy
## 31 133 385 31 25 7 12 -18.6774194 Unhealthy
This table helps us to know about the number of days with “Good” and “Bad” PM 2.5 Level.
t1<-table(Dat1$PM25Lvl)
print("Categorical Days in March '20")
## [1] "Categorical Days in March '20"
t1
##
## Unhealthy VeryUnhealthy
## 19 12
t2<-table(Dat2$PM25Lvl)
print("Categorical Days in March '19")
## [1] "Categorical Days in March '19"
t2
##
## Unhealthy VeryUnhealthy
## 15 16
t3<-table(Dat3$PM25Lvl)
print("Categorical Days in March '18")
## [1] "Categorical Days in March '18"
t3
##
## Unhealthy VeryUnhealthy
## 15 16
t4<-table(Dat4$PM25Lvl)
print("Categorical Days in March '17")
## [1] "Categorical Days in March '17"
t4
##
## Unhealthy VeryUnhealthy
## 14 17
This segment helps us to plot PM 2.5 level data with labels representing the PM 2.5 level on that day.
Days<-c(1:length(Dat1$PM25))
p1<-ggplot(Dat1,aes(Days,PM25,fill=PM25Lvl1))+
geom_bar(stat="identity")+
geom_text(aes(label=PM25),vjust=-0.2,size=2.5)+
ylab("PM 2.5 Lvl")+
ggtitle("March 2020")
p2<-ggplot(Dat2,aes(Days,PM25,fill=PM25Lvl2))+
geom_bar(stat="identity")+
geom_text(aes(label=PM25),vjust=-0.2,size=2.5)+
ylab("PM 2.5 Lvl")+
ggtitle("March 2019")
p3<-ggplot(Dat3,aes(Days,PM25,fill=PM25Lvl3))+
geom_bar(stat="identity")+
geom_text(aes(label=PM25),vjust=-0.2,size=2.5)+
ylab("PM 2.5 Lvl")+
ggtitle("March 2018")
p4<-ggplot(Dat4,aes(Days,PM25,fill=PM25Lvl4))+
geom_bar(stat="identity")+
geom_text(aes(label=PM25),vjust=-0.2,size=2.5)+
ylab("PM 2.5 Lvl")+
ggtitle("March 2017")
print(p1)
print(p2)
print(p3)
print(p4)
grid.arrange(p1,p2,p3,p4)
This helps us to analyze the data on the basis of “Good” & “Bad” days.
m1<-mean(Dat1$PM25)
m2<-mean(Dat2$PM25)
m3<-mean(Dat3$PM25)
m4<-mean(Dat4$PM25)
max1<-max(Dat1$PM25)
max2<-max(Dat2$PM25)
max3<-max(Dat3$PM25)
max4<-max(Dat4$PM25)
min1<-min(Dat1$PM25)
min2<-min(Dat2$PM25)
min3<-min(Dat3$PM25)
min4<-min(Dat4$PM25)
d1<-ggplot(Dat1,aes(Days,PM25))+
geom_point(aes(col=PM25Lvl1))+
ylab("PM 2.5 Lvl")+
ggtitle("March 2020")+
annotate("point",x=with(Dat1,which(max(PM25)==PM25)),y=max(Dat1$PM25),col="red")+
geom_text(x=with(Dat1,which(max(PM25)==PM25)),y=(max(Dat1$PM25)+3),label=signif(max1,digits=6),size=3)+
annotate("point",x=with(Dat1,which(min(PM25)==PM25)),y=min(Dat1$PM25),col="green")+
geom_text(x=with(Dat1,which(min(PM25)==PM25)),y=(min(Dat1$PM25)+3),label=signif(min1,digits=6),size=3)+
geom_text(x=with(Dat1,which(max(PM25)==PM25))+3,y=max(Dat1$PM25)+3,label="Max. PM2.5",size=3)+
geom_text(x=with(Dat1,which(min(PM25)==PM25))+3,y=min(Dat1$PM25)+3,label="Min. PM2.5",size=3)+
geom_hline(yintercept=mean(Dat1$PM25),col="yellow",linetype="dashed",size=1)+
geom_text(x=30,y=(mean(Dat1$PM25)+10),label="Mean Lvl",size=3)+
geom_text(x=30,y=(mean(Dat1$PM25)-5),label=signif(m1,digits=6),size=3)
d2<-ggplot(Dat2,aes(Days,PM25))+
geom_point(aes(col=PM25Lvl2))+
ylab("PM 2.5 Lvl")+
ggtitle("March 2019")+
annotate("point",x=with(Dat2,which(max(PM25)==PM25)),y=max(Dat2$PM25),col="red")+
geom_text(x=with(Dat2,which(max(PM25)==PM25)),y=(max(Dat2$PM25)+3),label=signif(max2,digits=6),size=3)+
annotate("point",x=with(Dat2,which(min(PM25)==PM25)),y=min(Dat2$PM25),col="green")+
geom_text(x=with(Dat2,which(min(PM25)==PM25)),y=(min(Dat2$PM25)+3),label=signif(min2,digits=6),size=3)+
geom_text(x=with(Dat2,which(max(PM25)==PM25))+3,y=max(Dat2$PM25)+3,label="Max. PM2.5",size=3)+
geom_text(x=with(Dat2,which(min(PM25)==PM25))+3,y=min(Dat2$PM25)+3,label="Min. PM2.5",size=3)+
geom_hline(yintercept=mean(Dat2$PM25),col="yellow",linetype="dashed",size=1)+
geom_text(x=30,y=(mean(Dat2$PM25)+5),label="Mean Lvl",size=3)+
geom_text(x=30,y=(mean(Dat2$PM25)-5),label=signif(m2,digits=6),size=3)
d3<-ggplot(Dat3,aes(Days,PM25))+
geom_point(aes(col=PM25Lvl3))+
ylab("PM 2.5 Lvl")+
ggtitle("March 2018")+
annotate("point",x=with(Dat3,which(max(PM25)==PM25)),y=max(Dat3$PM25),col="red")+
geom_text(x=with(Dat3,which(max(PM25)==PM25)),y=(max(Dat3$PM25)+3),label=signif(max3,digits=6),size=3)+
annotate("point",x=with(Dat3,which(min(PM25)==PM25)),y=min(Dat3$PM25),col="green")+
geom_text(x=with(Dat3,which(min(PM25)==PM25)),y=(min(Dat3$PM25)+3),label=signif(min3,digits=6),size=3)+
geom_text(x=with(Dat3,which(max(PM25)==PM25))+3,y=max(Dat3$PM25)+3,label="Max. PM2.5",size=3)+
geom_text(x=with(Dat3,which(min(PM25)==PM25))+3,y=min(Dat3$PM25)+3,label="Min. PM2.5",size=3)+
geom_hline(yintercept=mean(Dat3$PM25),col="yellow",linetype="dashed",size=1)+
geom_text(x=30,y=(mean(Dat3$PM25)+5),label="Mean Lvl",size=3)+
geom_text(x=30,y=(mean(Dat3$PM25)-5),label=signif(m3,digits=6),size=3)
d4<-ggplot(Dat4,aes(Days,PM25))+
geom_point(aes(col=PM25Lvl4))+
ylab("PM 2.5 Lvl")+
ggtitle("March 2017")+
annotate("point",x=with(Dat4,which(max(PM25)==PM25)),y=max(Dat4$PM25),col="red")+
geom_text(x=with(Dat4,which(max(PM25)==PM25)),y=(max(Dat4$PM25)+3),label=signif(max4,digits=6),size=3)+
annotate("point",x=with(Dat4,which(min(PM25)==PM25)),y=min(Dat4$PM25),col="green")+
geom_text(x=with(Dat4,which(min(PM25)==PM25)),y=(min(Dat4$PM25)+3),label=signif(min4,digits=6),size=3)+
geom_text(x=with(Dat4,which(max(PM25)==PM25))+3,y=max(Dat4$PM25)+3,label="Max. PM2.5",size=3)+
geom_text(x=with(Dat4,which(min(PM25)==PM25))+3,y=min(Dat4$PM25)+3,label="Min. PM2.5",size=3)+
geom_hline(yintercept=mean(Dat4$PM25),col="yellow",linetype="dashed",size=1)+
geom_text(x=30,y=(mean(Dat4$PM25)+5),label="Mean Lvl",size=3)+
geom_text(x=30,y=(mean(Dat4$PM25)-5),label=signif(m4,digits=6),size=3)
print(d1)
print(d2)
print(d3)
print(d4)
grid.arrange(d1,d2,d3,d4)
Based on above two different type of plots, we can see that the month of March was cleanest this year in comparison to last 3 years. The mean PM 2.5 level recorded was lowest this year and also, the number of days with “Very Unhealthy” PM 2.5 Level were also lower in comparison to the month of March in last 3 years.
I have used the data from this Website for this analysis cum project.