A small Exploratory Analysis Project based on COVID-19 data for different Indian States.
knitr::opts_chunk$set(fig.width=12,fig.height=8)
library(covid19.analytics)
## Warning: package 'covid19.analytics' was built under R version 3.6.3
library(ggplot2)
aggdat<-covid19.data(case="aggregated")
## Data being read from JHU/CCSE repository
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Reading data from https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/08-03-2021.csv
data<-aggdat[aggdat$Country_Region=="India",]
View(data)
tsdat<-covid19.data(case="ts-confirmed")
## Data being read from JHU/CCSE repository
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Reading data from https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv
## Data retrieved on 2021-08-04 15:37:51 || Range of dates on data: 2020-01-22--2021-08-03 | Nbr of records: 279
## --------------------------------------------------------------------------------
tsdata<-tsdat[tsdat$Country.Region=="India",]
View(tsdata)
tsdat2<-covid19.data(case="ts-deaths")
## Data being read from JHU/CCSE repository
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## Reading data from https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv
## Data retrieved on 2021-08-04 15:37:54 || Range of dates on data: 2020-01-22--2021-08-03 | Nbr of records: 279
## --------------------------------------------------------------------------------
tsdata2<-tsdat2[tsdat2$Country.Region=="India",]
View(tsdata2)
data is aggregated data i.e. it contains confirmed, recovered, among others variables while tsdata is time-series data of confirmed cases and tsdata2 is time-series data of deaths.
CTperday<-numeric(ncol(tsdata)-5)
for(i in 1:(ncol(tsdata)-5)){
CTperday[i]<-tsdata[1,i+5]-tsdata[1,i+4]
}
ct<-data.frame(Days=1:(ncol(tsdata)-5),NC=CTperday)
ggplot(ct,aes(x=Days,y=NC,fill=NC))+
geom_bar(stat="Identity")+
xlab("Days")+
ylab("No. of Cases Per Day")
DTperday<-numeric(ncol(tsdata2)-5)
for(i in 1:(ncol(tsdata2)-5)){
DTperday[i]<-tsdata2[1,i+5]-tsdata2[1,i+4]
}
dt<-data.frame(Days=1:(ncol(tsdata)-5),ND=DTperday)
ggplot(dt,aes(x=Days,y=ND,fill=ND))+
geom_bar(stat="Identity")+
xlab("Days")+
ylab("No. of Deaths Per Day")
ggplot(dt,aes(x=Days,y=40*ND,col=))+
geom_bar(stat="Identity",col="red")+
xlab("Days")+
ylab("No. of Cases Per Day")+
geom_line(aes(x=ct$Days,y=ct$NC),size=1,col="blue")+
scale_y_continuous(sec.axis=sec_axis(~./40,name="No. of Deaths Per Day "))
tc<-sum(data$Confirmed)
td<-sum(data$Deaths)
tr<-sum(data$Recovered)
ta<-sum(data$Active)
rr<-signif((sum(data$Recovered)/sum(data$Confirmed))*100,digits=8)
dr<-signif((sum(data$Deaths)/sum(data$Confirmed))*100,digits=8)
ar<-signif((sum(data$Active)/sum(data$Confirmed))*100,digits=8)
df1<-data.frame(Total_Confirmed_cases=tc,Total_Deaths=td,Total_Recovered_Cases=tr,
Total_Active_Cases=ta,Recovery_Rate=rr,Death_Rate=dr)
df1
## Total_Confirmed_cases Total_Deaths Total_Recovered_Cases Total_Active_Cases
## 1 31769132 425757 30933022 410353
## Recovery_Rate Death_Rate
## 1 97.36817 1.340159
print("Top 10 States/UT with most active cases: ")
## [1] "Top 10 States/UT with most active cases: "
head(data[order(data$Active,decreasing=T),c("Combined_Key","Active")],n=10)
## Combined_Key Active
## 266 Kerala, India 173736
## 270 Maharashtra, India 77729
## 265 Karnataka, India 24305
## 280 Tamil Nadu, India 20217
## 251 Andhra Pradesh, India 20170
## 275 Odisha, India 12676
## 273 Mizoram, India 12663
## 253 Assam, India 12440
## 286 West Bengal, India 10767
## 271 Manipur, India 9490
print("Top 10 States/UT with most deaths: ")
## [1] "Top 10 States/UT with most deaths: "
head(data[order(data$Deaths,decreasing=T),c("Combined_Key","Deaths")],n=10)
## Combined_Key Deaths
## 270 Maharashtra, India 133215
## 265 Karnataka, India 36650
## 280 Tamil Nadu, India 34159
## 258 Delhi, India 25058
## 284 Uttar Pradesh, India 22765
## 286 West Bengal, India 18170
## 266 Kerala, India 17103
## 277 Punjab, India 16299
## 256 Chhattisgarh, India 13530
## 251 Andhra Pradesh, India 13428
rarr<-numeric()
darr<-numeric()
for(i in 1:37){
rarr[i]<-signif((data$Recovered[i]/data$Confirmed[i])*100,digits=8)
darr[i]<-signif((data$Deaths[i]/data$Confirmed[i])*100,digits=8)
}
df2<-data.frame(State_UT=data$Province_State,Recovery_Rate=rarr,Death_Rate=darr)
df2<-df2[-34,]
print("Top 10 States/UT with best recovery rate: ")
## [1] "Top 10 States/UT with best recovery rate: "
head(df2[order(df2$Recovery_Rate,decreasing=T),],n=10)
## State_UT Recovery_Rate Death_Rate
## 8 Dadra and Nagar Haveli and Daman and Diu 99.83100 0.03755516
## 29 Rajasthan 99.03808 0.93885490
## 11 Gujarat 98.75118 1.22142360
## 12 Haryana 98.65594 1.25210720
## 20 Madhya Pradesh 98.65485 1.32760010
## 6 Chandigarh 98.63633 1.30880340
## 18 Ladakh 98.63491 1.01644980
## 19 Lakshadweep 98.63094 0.48894974
## 35 Uttar Pradesh 98.62826 1.33240700
## 5 Bihar 98.61678 1.33024910
print("Top 10 States/UT with least death rate: ")
## [1] "Top 10 States/UT with least death rate: "
head(df2[order(df2$Death_Rate),],n=10)
## State_UT Recovery_Rate Death_Rate
## 8 Dadra and Nagar Haveli and Daman and Diu 99.83100 0.03755516
## 24 Mizoram 68.73110 0.38052493
## 3 Arunachal Pradesh 92.66427 0.47868423
## 19 Lakshadweep 98.63094 0.48894974
## 17 Kerala 94.46707 0.49586144
## 32 Telangana 98.05322 0.58938519
## 26 Odisha 98.08557 0.62210333
## 2 Andhra Pradesh 98.29586 0.68108710
## 4 Assam 96.88342 0.93196989
## 29 Rajasthan 99.03808 0.93885490
data<-data[-34,]
ggplot(data,aes(x=1:36,y=Confirmed,fill=Province_State))+
geom_bar(stat="Identity")+
geom_text(aes(label=Confirmed),vjust=-0.2,size=2.5)+
xlab("States/UT")+ylab("Total Confirmed Cases")
ggplot(data,aes(x=1:36,y=Active,fill=Province_State))+
geom_bar(stat="Identity")+
geom_text(aes(label=Active),vjust=-0.2,size=2.5)+
xlab("States/UT")+ylab("Total Active Cases")
ggplot(data,aes(x=1:36,y=Deaths,fill=Province_State))+
geom_bar(stat="Identity")+
geom_text(aes(label=Deaths),vjust=-0.2,size=2.5)+
xlab("States/UT")+ylab("Total Deaths")
ggplot(df2,aes(x=1:36,y=signif(Recovery_Rate,digits=2),fill=State_UT))+
geom_bar(stat="Identity")+
geom_text(aes(label=signif(Recovery_Rate,digits=2)),vjust=-0.2,size=2.5)+
xlab("States/UT")+ylab("Recovery Rate")
ggplot(df2,aes(x=1:36,y=signif(Death_Rate,digits=2),fill=State_UT))+
geom_bar(stat="Identity")+
geom_text(aes(label=signif(Death_Rate,digits=2)),vjust=-0.2,size=2.5)+
xlab("States/UT")+ylab("Death Rate")