library(rvest)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(viridis)
## Loading required package: viridisLite
library(ggrepel)
## Loading required package: ggplot2
library(readr)
## 
## Attaching package: 'readr'
## The following object is masked from 'package:rvest':
## 
##     guess_encoding
library(ggthemes)
library(ggplot2)

Rainfall Analysis

The historical Kerala rainfall data is read, and the focus is narrowed down to the period from 2000 to 2017.

#import the data
df_rain <- read.csv("Kerala-Rainfall-Historical.csv")
#Filter the data 
df_filtered <- df_rain %>%
  filter(YEAR >= 2000 & YEAR <= 2017)
#View the new data frame to verify the filter
head(df_filtered)
##   SUBDIVISION YEAR  JAN  FEB  MAR   APR   MAY   JUN   JUL   AUG   SEP   OCT
## 1      Kerala 2000 11.7 57.8 21.5  96.3 124.5 633.8 343.2 566.5 195.8 214.2
## 2      Kerala 2001 16.5 28.3  7.0 238.0 238.6 715.3 598.5 361.3 216.8 319.6
## 3      Kerala 2002  4.7  8.7 35.7 117.3 330.8 503.1 318.7 438.2  99.0 511.7
## 4      Kerala 2003  0.7 50.9 82.1 134.4  91.0 566.7 532.0 350.3  93.6 407.0
## 5      Kerala 2004  2.4  8.1 37.9 113.2 610.9 673.4 385.4 417.9 192.8 320.6
## 6      Kerala 2005 19.8  7.0 25.3 205.9 134.8 619.2 832.7 291.0 414.7 240.1
##     NOV  DEC ANNUAL   JF   MAM   JJAS   OND
## 1  78.1 69.1 2412.6 69.5 242.3 1739.4 361.5
## 2 181.0 10.1 2931.1 44.7 483.7 1892.0 510.7
## 3 137.5  2.1 2507.4 13.3 483.7 1359.0 651.3
## 4  76.4  9.7 2394.9 51.6 307.5 1542.6 493.1
## 5 120.7  2.7 2886.1 10.5 762.0 1669.5 444.0
## 6 184.3 56.4 3031.1 26.8 366.0 2157.6 480.7
df_filtered
##    SUBDIVISION YEAR  JAN  FEB   MAR   APR   MAY    JUN   JUL   AUG   SEP   OCT
## 1       Kerala 2000 11.7 57.8  21.5  96.3 124.5  633.8 343.2 566.5 195.8 214.2
## 2       Kerala 2001 16.5 28.3   7.0 238.0 238.6  715.3 598.5 361.3 216.8 319.6
## 3       Kerala 2002  4.7  8.7  35.7 117.3 330.8  503.1 318.7 438.2  99.0 511.7
## 4       Kerala 2003  0.7 50.9  82.1 134.4  91.0  566.7 532.0 350.3  93.6 407.0
## 5       Kerala 2004  2.4  8.1  37.9 113.2 610.9  673.4 385.4 417.9 192.8 320.6
## 6       Kerala 2005 19.8  7.0  25.3 205.9 134.8  619.2 832.7 291.0 414.7 240.1
## 7       Kerala 2006  8.1  0.5  90.7  65.3 521.2  482.4 804.0 432.6 474.8 376.4
## 8       Kerala 2007  0.5  5.6   7.3 138.5 192.7  705.9 966.3 489.6 526.7 357.2
## 9       Kerala 2008  0.8 30.3 217.2 108.4  81.2  469.9 505.1 349.0 347.0 343.4
## 10      Kerala 2009  3.3  1.5  62.6  69.0 191.6  438.2 924.9 269.3 326.5 205.2
## 11      Kerala 2010 18.6  1.0  31.4 138.9 190.6  667.5 629.0 356.0 275.6 441.4
## 12      Kerala 2011 20.5 45.7  24.1 165.2 124.2  788.5 536.8 492.7 391.2 227.2
## 13      Kerala 2012  7.4 11.0  21.0 171.1  95.3  430.3 362.6 501.6 241.1 187.5
## 14      Kerala 2013  3.9 40.1  49.9  49.3 119.3 1042.7 830.2 369.7 318.6 259.9
## 15      Kerala 2014  4.6 10.3  17.9  95.7 251.0  454.4 677.8 733.9 298.8 355.5
## 16      Kerala 2015  3.1  5.8  50.1 214.1 201.8  563.6 406.0 252.2 292.9 308.1
## 17      Kerala 2016  3.0 16.4  22.4  33.3 258.4  595.7 441.5 231.0  84.1 105.1
## 18      Kerala 2017 12.7  0.3  87.8  52.8 213.3  579.8 378.5 462.6 435.5 228.0
##      NOV  DEC ANNUAL   JF   MAM   JJAS   OND
## 1   78.1 69.1 2412.6 69.5 242.3 1739.4 361.5
## 2  181.0 10.1 2931.1 44.7 483.7 1892.0 510.7
## 3  137.5  2.1 2507.4 13.3 483.7 1359.0 651.3
## 4   76.4  9.7 2394.9 51.6 307.5 1542.6 493.1
## 5  120.7  2.7 2886.1 10.5 762.0 1669.5 444.0
## 6  184.3 56.4 3031.1 26.8 366.0 2157.6 480.7
## 7  162.8  1.8 3420.6  8.6 677.2 2193.8 541.0
## 8   87.4 11.9 3489.6  6.1 338.4 2688.5 456.5
## 9   55.4 17.0 2524.5 31.1 406.7 1670.9 415.7
## 10 274.4 44.2 2810.6  4.8 323.1 1958.9 523.8
## 11 335.1 46.8 3131.8 19.6 360.9 1928.0 823.3
## 12 169.7 49.5 3035.1 66.2 313.5 2209.1 446.3
## 13 112.9  9.4 2151.1 18.3 287.4 1535.6 309.8
## 14 154.9 17.0 3255.4 43.9 218.5 2561.2 431.8
## 15  99.5 47.2 3046.4 14.9 364.5 2164.8 502.1
## 16 223.6 79.4 2600.6  8.9 465.9 1514.7 611.1
## 17  57.9 22.0 1870.9 19.3 314.2 1352.3 185.0
## 18 152.1 61.4 2664.9 13.0 353.9 1856.5 441.5
# Display a statistical summary of the data
summary(df_filtered)
##  SUBDIVISION             YEAR           JAN              FEB       
##  Length:18          Min.   :2000   Min.   : 0.500   Min.   : 0.30  
##  Class :character   1st Qu.:2004   1st Qu.: 3.025   1st Qu.: 5.65  
##  Mode  :character   Median :2008   Median : 4.650   Median : 9.50  
##                     Mean   :2008   Mean   : 7.906   Mean   :18.29  
##                     3rd Qu.:2013   3rd Qu.:12.450   3rd Qu.:29.80  
##                     Max.   :2017   Max.   :20.500   Max.   :57.80  
##       MAR              APR              MAY             JUN        
##  Min.   :  7.00   Min.   : 33.30   Min.   : 81.2   Min.   : 430.3  
##  1st Qu.: 21.73   1st Qu.: 75.67   1st Qu.:124.3   1st Qu.: 487.6  
##  Median : 33.55   Median :115.25   Median :192.2   Median : 587.8  
##  Mean   : 49.55   Mean   :122.59   Mean   :220.6   Mean   : 607.2  
##  3rd Qu.: 59.48   3rd Qu.:158.62   3rd Qu.:247.9   3rd Qu.: 671.9  
##  Max.   :217.20   Max.   :238.00   Max.   :610.9   Max.   :1042.7  
##       JUL             AUG             SEP             OCT       
##  Min.   :318.7   Min.   :231.0   Min.   : 84.1   Min.   :105.1  
##  1st Qu.:390.6   1st Qu.:349.3   1st Qu.:201.1   1st Qu.:227.4  
##  Median :534.4   Median :393.8   Median :295.9   Median :313.9  
##  Mean   :581.8   Mean   :409.2   Mean   :290.3   Mean   :300.4  
##  3rd Qu.:772.5   3rd Qu.:482.9   3rd Qu.:380.1   3rd Qu.:356.8  
##  Max.   :966.3   Max.   :733.9   Max.   :526.7   Max.   :511.7  
##       NOV              DEC            ANNUAL           JF       
##  Min.   : 55.40   Min.   : 1.80   Min.   :1871   Min.   : 4.80  
##  1st Qu.: 90.42   1st Qu.: 9.80   1st Qu.:2512   1st Qu.:11.12  
##  Median :144.80   Median :19.50   Median :2848   Median :18.80  
##  Mean   :147.98   Mean   :30.98   Mean   :2787   Mean   :26.17  
##  3rd Qu.:178.18   3rd Qu.:48.92   3rd Qu.:3044   3rd Qu.:40.70  
##  Max.   :335.10   Max.   :79.40   Max.   :3490   Max.   :69.50  
##       MAM             JJAS           OND       
##  Min.   :218.5   Min.   :1352   Min.   :185.0  
##  1st Qu.:313.7   1st Qu.:1574   1st Qu.:434.2  
##  Median :357.4   Median :1874   Median :468.6  
##  Mean   :392.7   Mean   :1889   Mean   :479.4  
##  3rd Qu.:451.1   3rd Qu.:2163   3rd Qu.:520.5  
##  Max.   :762.0   Max.   :2688   Max.   :823.3
# Visualization: Annual Rainfall Bar Plot
barplot(df_filtered$ANNUAL,
        main='Annual Rainfall in Kerala (2000-2017)',
        xlab='Year',ylab = 'Rainfall(cm)',xlim = c(0,20),ylim =c(0,3500))

Pollution Analysis

The Delhi pollution data is imported, and a summary is checked, along with a count of missing values.

#import data
Delhi_Pollution <- read.csv("delhi pollution.csv")
# Display the first few rows of the data
head(Delhi_Pollution)
##   Date Month Year Holidays_Count Days  PM2.5   PM10    NO2   SO2   CO Ozone AQI
## 1    1     1 2021              0    5 408.80 442.42 160.61 12.95 2.77 43.19 462
## 2    2     1 2021              0    6 404.04 561.95  52.85  5.18 2.60 16.43 482
## 3    3     1 2021              1    7 225.07 239.04 170.95 10.93 1.40 44.29 263
## 4    4     1 2021              0    1  89.55 132.08 153.98 10.42 1.01 49.19 207
## 5    5     1 2021              0    2  54.06  55.54 122.66  9.70 0.64 48.88 149
## 6    6     1 2021              0    3 155.59 180.14 142.71 10.29 1.18 44.47 252
# Check the class/data type of each column
cls <- as.data.frame(sapply(Delhi_Pollution,FUN = class))
cls
##                sapply(Delhi_Pollution, FUN = class)
## Date                                        integer
## Month                                       integer
## Year                                        integer
## Holidays_Count                              integer
## Days                                        integer
## PM2.5                                       numeric
## PM10                                        numeric
## NO2                                         numeric
## SO2                                         numeric
## CO                                          numeric
## Ozone                                       numeric
## AQI                                         integer
# Display a statistical summary of the data
summary(Delhi_Pollution)
##       Date           Month             Year      Holidays_Count  
##  Min.   : 1.00   Min.   : 1.000   Min.   :2021   Min.   :0.0000  
##  1st Qu.: 8.00   1st Qu.: 4.000   1st Qu.:2022   1st Qu.:0.0000  
##  Median :16.00   Median : 7.000   Median :2023   Median :0.0000  
##  Mean   :15.73   Mean   : 6.523   Mean   :2023   Mean   :0.1896  
##  3rd Qu.:23.00   3rd Qu.:10.000   3rd Qu.:2024   3rd Qu.:0.0000  
##  Max.   :31.00   Max.   :12.000   Max.   :2024   Max.   :1.0000  
##       Days           PM2.5              PM10              NO2        
##  Min.   :1.000   Min.   :   0.05   Min.   :   9.69   Min.   :  2.16  
##  1st Qu.:2.000   1st Qu.:  41.28   1st Qu.: 115.11   1st Qu.: 17.28  
##  Median :4.000   Median :  72.06   Median : 199.80   Median : 30.49  
##  Mean   :4.001   Mean   :  90.77   Mean   : 218.22   Mean   : 37.18  
##  3rd Qu.:6.000   3rd Qu.: 118.50   3rd Qu.: 297.75   3rd Qu.: 45.01  
##  Max.   :7.000   Max.   :1000.00   Max.   :1000.00   Max.   :433.98  
##       SO2               CO            Ozone             AQI       
##  Min.   :  1.21   Min.   :0.270   Min.   :  2.70   Min.   : 19.0  
##  1st Qu.:  7.71   1st Qu.:0.610   1st Qu.: 24.10   1st Qu.:108.0  
##  Median : 15.43   Median :0.850   Median : 32.47   Median :189.0  
##  Mean   : 20.10   Mean   :1.026   Mean   : 36.34   Mean   :202.2  
##  3rd Qu.: 26.62   3rd Qu.:1.240   3rd Qu.: 45.73   3rd Qu.:284.0  
##  Max.   :113.40   Max.   :4.700   Max.   :115.87   Max.   :500.0
#Data Check: Missing Values
sum(is.na(Delhi_Pollution))
## [1] 0
#Data Manipulation: Filter Pollution Data for 2023
df_2023 <- Delhi_Pollution[Delhi_Pollution$Year == 2023, ]
#Visualization: PM 2.5 vs PM 10 Box Plot
boxplot(df_2023$PM2.5,df_2023$PM10, main= 'Comparison of PM-2.5 and PM-10 in 2023',names = c("PM 2.5","PM 10"),
        xlab="Pollutents", ylab=" Concentration (µg/m³)",xlim=c(0,3),ylim=c(0,550))

Population Analysis

The population data is imported from “populaion.csv”, and its structure is inspected.

#import data
populaion <- read.csv("populaion.csv")
View(populaion)
# Display the first few rows of the data
head(populaion)
##   Year   Population X..Increase.in.Population Population.Density
## 1 1950    357021100                     0.00%             108.61
## 2 1951 36,49,22,360                     2.21%             111.01
## 3 1952 37,29,97,188                     2.21%             113.47
## 4 1953 38,12,27,705                     2.21%             115.97
## 5 1954 38,97,31,406                     2.23%             118.56
## 6 1955 39,85,77,992                     2.27%             121.25
##   X..Increase.in.Population.Density Urban.Population
## 1                             0.00%             Null
## 2                             2.21%             Null
## 3                             2.21%             Null
## 4                             2.21%             Null
## 5                             2.23%             Null
## 6                             2.27%             Null
##   Urban.Population...of.Total.Population X..Increase.in.Urban.Population
## 1                                   Null                            Null
## 2                                   Null                            Null
## 3                                   Null                            Null
## 4                                   Null                            Null
## 5                                   Null                            Null
## 6                                   Null                            Null
##   Rural.Population Rural.Population...of.Total.Population
## 1             Null                                   Null
## 2             Null                                   Null
## 3             Null                                   Null
## 4             Null                                   Null
## 5             Null                                   Null
## 6             Null                                   Null
##   X..Increase.in.Rural.Population Life.Expectancy
## 1                            Null           35.21
## 2                            Null           35.80
## 3                            Null           36.39
## 4                            Null           36.98
## 5                            Null           37.57
## 6                            Null           38.16
##   X..Increase.in.Life.Expectancy Birth.Rate X..Change.in.Birth.Rate Death.Rate
## 1                          0.00%     44.175                   0.00%     28.161
## 2                          1.68%     43.970                  -0.46%     27.584
## 3                          1.65%     43.764                  -0.47%     27.008
## 4                          1.62%     43.558                  -0.47%     26.432
## 5                          1.60%     43.352                  -0.47%     25.856
## 6                          1.57%     43.146                  -0.48%     25.280
##   X..Change.in.Death.Rate Infant.Mortality.Rate
## 1                   0.00%               189.629
## 2                  -2.05%               186.737
## 3                  -2.09%               183.846
## 4                  -2.13%               180.954
## 5                  -2.18%               178.062
## 6                  -2.23%               175.171
##   X..Change.in.Infant.Mortality.Rate Fertility.Rate X..Change.in.Fertility.Rate
## 1                              0.00%          5.907                       0.00%
## 2                             -1.53%          5.906                      -0.02%
## 3                             -1.55%          5.904                      -0.03%
## 4                             -1.57%          5.903                      -0.02%
## 5                             -1.60%          5.902                      -0.02%
## 6                             -1.62%          5.900                      -0.03%
##   Net.Migration.Rate X..Change.in.Net.Migration.Rate
## 1             -0.043                           0.00%
## 2             -0.047                           9.30%
## 3             -0.050                           6.38%
## 4             -0.054                           8.00%
## 5             -0.058                           7.41%
## 6             -0.061                           5.17%
# Check the class/data type of each column
cls <- as.data.frame(sapply(populaion,FUN = class))
cls
##                                        sapply(populaion, FUN = class)
## Year                                                          integer
## Population                                                  character
## X..Increase.in.Population                                   character
## Population.Density                                            numeric
## X..Increase.in.Population.Density                           character
## Urban.Population                                            character
## Urban.Population...of.Total.Population                      character
## X..Increase.in.Urban.Population                             character
## Rural.Population                                            character
## Rural.Population...of.Total.Population                      character
## X..Increase.in.Rural.Population                             character
## Life.Expectancy                                               numeric
## X..Increase.in.Life.Expectancy                              character
## Birth.Rate                                                    numeric
## X..Change.in.Birth.Rate                                     character
## Death.Rate                                                    numeric
## X..Change.in.Death.Rate                                     character
## Infant.Mortality.Rate                                         numeric
## X..Change.in.Infant.Mortality.Rate                          character
## Fertility.Rate                                                numeric
## X..Change.in.Fertility.Rate                                 character
## Net.Migration.Rate                                            numeric
## X..Change.in.Net.Migration.Rate                             character
# Display a statistical summary of the data
summary(populaion)
##       Year       Population        X..Increase.in.Population Population.Density
##  Min.   :1950   Length:73          Length:73                 Min.   :108.6     
##  1st Qu.:1968   Class :character   Class :character          1st Qu.:162.3     
##  Median :1986   Mode  :character   Mode  :character          Median :242.7     
##  Mean   :1986                                                Mean   :254.8     
##  3rd Qu.:2004                                                3rd Qu.:345.7     
##  Max.   :2022                                                Max.   :431.1     
##  X..Increase.in.Population.Density Urban.Population  
##  Length:73                         Length:73         
##  Class :character                  Class :character  
##  Mode  :character                  Mode  :character  
##                                                      
##                                                      
##                                                      
##  Urban.Population...of.Total.Population X..Increase.in.Urban.Population
##  Length:73                              Length:73                      
##  Class :character                       Class :character               
##  Mode  :character                       Mode  :character               
##                                                                        
##                                                                        
##                                                                        
##  Rural.Population   Rural.Population...of.Total.Population
##  Length:73          Length:73                             
##  Class :character   Class :character                      
##  Mode  :character   Mode  :character                      
##                                                           
##                                                           
##                                                           
##  X..Increase.in.Rural.Population Life.Expectancy X..Increase.in.Life.Expectancy
##  Length:73                       Min.   :35.21   Length:73                     
##  Class :character                1st Qu.:46.10   Class :character              
##  Mode  :character                Median :55.98   Mode  :character              
##                                  Mean   :54.86                                 
##                                  3rd Qu.:63.91                                 
##                                  Max.   :70.19                                 
##    Birth.Rate    X..Change.in.Birth.Rate   Death.Rate    
##  Min.   :17.16   Length:73               Min.   : 7.237  
##  1st Qu.:24.75   Class :character        1st Qu.: 8.261  
##  Median :34.02   Mode  :character        Median :12.008  
##  Mean   :32.18                           Mean   :13.869  
##  3rd Qu.:39.77                           3rd Qu.:18.368  
##  Max.   :44.17                           Max.   :28.161  
##  X..Change.in.Death.Rate Infant.Mortality.Rate
##  Length:73               Min.   : 27.70       
##  Class :character        1st Qu.: 57.85       
##  Mode  :character        Median : 98.21       
##                          Mean   :101.89       
##                          3rd Qu.:145.81       
##                          Max.   :189.63       
##  X..Change.in.Infant.Mortality.Rate Fertility.Rate  X..Change.in.Fertility.Rate
##  Length:73                          Min.   :2.159   Length:73                  
##  Class :character                   1st Qu.:3.071   Class :character           
##  Mode  :character                   Median :4.432   Mode  :character           
##                                     Mean   :4.292                              
##                                     3rd Qu.:5.723                              
##                                     Max.   :5.907                              
##  Net.Migration.Rate X..Change.in.Net.Migration.Rate
##  Min.   :-0.44600   Length:73                      
##  1st Qu.:-0.34300   Class :character               
##  Median :-0.06800   Mode  :character               
##  Mean   :-0.06993                                  
##  3rd Qu.: 0.03900                                  
##  Max.   : 0.71500
#Filter the data and create a new data frame called 'df_filtered'
df_filtered_pop <- populaion %>%
  filter(Year >= 2010 & Year <= 2022)%>%
  select(Year, Population)
df_filtered_pop$Population <- as.numeric(df_filtered_pop$Population)
df_filtered_pop
##    Year Population
## 1  2010 1240613620
## 2  2011 1257621191
## 3  2012 1274487215
## 4  2013 1291132063
## 5  2014 1307246509
## 6  2015 1322866505
## 7  2016 1338636340
## 8  2017 1354195680
## 9  2018 1369003306
## 10 2019 1383112050
## 11 2020 1396387127
## 12 2021 1407563842
## 13 2022 1417173173
#Visualization: Population Trend
ggplot(data = df_filtered_pop, aes(x = Year, y = Population)) +
  geom_line(color = "darkblue", linewidth = 1.2) +
  labs(
    title = "Population Trend (2000-2022)",
    x = "Year",
    y = "Total Population")