Project Details

A small Exploratory Analysis Project based on COVID-19 data for different Indian States.

knitr::opts_chunk$set(fig.width=12,fig.height=8)

Loading required package

library(covid19.analytics)
## Warning: package 'covid19.analytics' was built under R version 3.6.3
library(ggplot2)

Fetching Data

aggdat<-covid19.data(case="aggregated")
## Data being read from JHU/CCSE repository
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Reading data from https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/08-03-2021.csv
data<-aggdat[aggdat$Country_Region=="India",]
View(data)

tsdat<-covid19.data(case="ts-confirmed")
## Data being read from JHU/CCSE repository
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Reading data from https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv
## Data retrieved on 2021-08-04 15:37:51 || Range of dates on data: 2020-01-22--2021-08-03 | Nbr of records: 279
## --------------------------------------------------------------------------------
tsdata<-tsdat[tsdat$Country.Region=="India",]
View(tsdata)

tsdat2<-covid19.data(case="ts-deaths")
## Data being read from JHU/CCSE repository
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Reading data from https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv
## Data retrieved on 2021-08-04 15:37:54 || Range of dates on data: 2020-01-22--2021-08-03 | Nbr of records: 279
## --------------------------------------------------------------------------------
tsdata2<-tsdat2[tsdat2$Country.Region=="India",]
View(tsdata2)

data is aggregated data i.e. it contains confirmed, recovered, among others variables while tsdata is time-series data of confirmed cases and tsdata2 is time-series data of deaths.

Important Plots

Confirmed Cases Per Day

CTperday<-numeric(ncol(tsdata)-5)

  for(i in 1:(ncol(tsdata)-5)){
    CTperday[i]<-tsdata[1,i+5]-tsdata[1,i+4]
  }
  ct<-data.frame(Days=1:(ncol(tsdata)-5),NC=CTperday)
  
  ggplot(ct,aes(x=Days,y=NC,fill=NC))+
  geom_bar(stat="Identity")+
  xlab("Days")+
  ylab("No. of Cases Per Day")

Deaths Per Day

DTperday<-numeric(ncol(tsdata2)-5)
  
  for(i in 1:(ncol(tsdata2)-5)){
    DTperday[i]<-tsdata2[1,i+5]-tsdata2[1,i+4]
  }
  dt<-data.frame(Days=1:(ncol(tsdata)-5),ND=DTperday)
  
  ggplot(dt,aes(x=Days,y=ND,fill=ND))+
    geom_bar(stat="Identity")+
    xlab("Days")+
    ylab("No. of Deaths Per Day")  

Confirmed Cases Per Day vs Deaths Per Day

  ggplot(dt,aes(x=Days,y=40*ND,col=))+
    geom_bar(stat="Identity",col="red")+
    xlab("Days")+
    ylab("No. of Cases Per Day")+
    geom_line(aes(x=ct$Days,y=ct$NC),size=1,col="blue")+
    scale_y_continuous(sec.axis=sec_axis(~./40,name="No. of Deaths Per Day "))

Some Important Figures

tc<-sum(data$Confirmed)
td<-sum(data$Deaths)
tr<-sum(data$Recovered)
ta<-sum(data$Active)
rr<-signif((sum(data$Recovered)/sum(data$Confirmed))*100,digits=8)
dr<-signif((sum(data$Deaths)/sum(data$Confirmed))*100,digits=8)
ar<-signif((sum(data$Active)/sum(data$Confirmed))*100,digits=8)

df1<-data.frame(Total_Confirmed_cases=tc,Total_Deaths=td,Total_Recovered_Cases=tr,
                       Total_Active_Cases=ta,Recovery_Rate=rr,Death_Rate=dr)
df1
##   Total_Confirmed_cases Total_Deaths Total_Recovered_Cases Total_Active_Cases
## 1              31769132       425757              30933022             410353
##   Recovery_Rate Death_Rate
## 1      97.36817   1.340159

States most affected by COVID-19

print("Top 10 States/UT with most active cases: ")
## [1] "Top 10 States/UT with most active cases: "
head(data[order(data$Active,decreasing=T),c("Combined_Key","Active")],n=10)
##              Combined_Key Active
## 266         Kerala, India 173736
## 270    Maharashtra, India  77729
## 265      Karnataka, India  24305
## 280     Tamil Nadu, India  20217
## 251 Andhra Pradesh, India  20170
## 275         Odisha, India  12676
## 273        Mizoram, India  12663
## 253          Assam, India  12440
## 286    West Bengal, India  10767
## 271        Manipur, India   9490
print("Top 10 States/UT with most deaths: ")
## [1] "Top 10 States/UT with most deaths: "
head(data[order(data$Deaths,decreasing=T),c("Combined_Key","Deaths")],n=10)
##              Combined_Key Deaths
## 270    Maharashtra, India 133215
## 265      Karnataka, India  36650
## 280     Tamil Nadu, India  34159
## 258          Delhi, India  25058
## 284  Uttar Pradesh, India  22765
## 286    West Bengal, India  18170
## 266         Kerala, India  17103
## 277         Punjab, India  16299
## 256   Chhattisgarh, India  13530
## 251 Andhra Pradesh, India  13428

States with best recovery rate & death rate

rarr<-numeric()
darr<-numeric()
for(i in 1:37){
  rarr[i]<-signif((data$Recovered[i]/data$Confirmed[i])*100,digits=8)
  darr[i]<-signif((data$Deaths[i]/data$Confirmed[i])*100,digits=8)
}
df2<-data.frame(State_UT=data$Province_State,Recovery_Rate=rarr,Death_Rate=darr)
df2<-df2[-34,]

print("Top 10 States/UT with best recovery rate: ")
## [1] "Top 10 States/UT with best recovery rate: "
head(df2[order(df2$Recovery_Rate,decreasing=T),],n=10)
##                                    State_UT Recovery_Rate Death_Rate
## 8  Dadra and Nagar Haveli and Daman and Diu      99.83100 0.03755516
## 29                                Rajasthan      99.03808 0.93885490
## 11                                  Gujarat      98.75118 1.22142360
## 12                                  Haryana      98.65594 1.25210720
## 20                           Madhya Pradesh      98.65485 1.32760010
## 6                                Chandigarh      98.63633 1.30880340
## 18                                   Ladakh      98.63491 1.01644980
## 19                              Lakshadweep      98.63094 0.48894974
## 35                            Uttar Pradesh      98.62826 1.33240700
## 5                                     Bihar      98.61678 1.33024910
print("Top 10 States/UT with least death rate: ")
## [1] "Top 10 States/UT with least death rate: "
head(df2[order(df2$Death_Rate),],n=10)
##                                    State_UT Recovery_Rate Death_Rate
## 8  Dadra and Nagar Haveli and Daman and Diu      99.83100 0.03755516
## 24                                  Mizoram      68.73110 0.38052493
## 3                         Arunachal Pradesh      92.66427 0.47868423
## 19                              Lakshadweep      98.63094 0.48894974
## 17                                   Kerala      94.46707 0.49586144
## 32                                Telangana      98.05322 0.58938519
## 26                                   Odisha      98.08557 0.62210333
## 2                            Andhra Pradesh      98.29586 0.68108710
## 4                                     Assam      96.88342 0.93196989
## 29                                Rajasthan      99.03808 0.93885490

Sate-Wise Plots

Total Confirmed Cases

data<-data[-34,]
  
  ggplot(data,aes(x=1:36,y=Confirmed,fill=Province_State))+
  geom_bar(stat="Identity")+
  geom_text(aes(label=Confirmed),vjust=-0.2,size=2.5)+
  xlab("States/UT")+ylab("Total Confirmed Cases")

Total Active Cases

ggplot(data,aes(x=1:36,y=Active,fill=Province_State))+
  geom_bar(stat="Identity")+
  geom_text(aes(label=Active),vjust=-0.2,size=2.5)+
  xlab("States/UT")+ylab("Total Active Cases")

Total Deaths

ggplot(data,aes(x=1:36,y=Deaths,fill=Province_State))+
  geom_bar(stat="Identity")+
  geom_text(aes(label=Deaths),vjust=-0.2,size=2.5)+
  xlab("States/UT")+ylab("Total Deaths")

Recovery Rate

ggplot(df2,aes(x=1:36,y=signif(Recovery_Rate,digits=2),fill=State_UT))+
  geom_bar(stat="Identity")+
  geom_text(aes(label=signif(Recovery_Rate,digits=2)),vjust=-0.2,size=2.5)+
  xlab("States/UT")+ylab("Recovery Rate")

Death Rate

ggplot(df2,aes(x=1:36,y=signif(Death_Rate,digits=2),fill=State_UT))+
  geom_bar(stat="Identity")+
  geom_text(aes(label=signif(Death_Rate,digits=2)),vjust=-0.2,size=2.5)+
  xlab("States/UT")+ylab("Death Rate")