Loading the COVID data: Number of positive cases in each country on each day from 22 January to 16 May 2020

library(readxl)
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(rsconnect)
## Warning: package 'rsconnect' was built under R version 3.6.3
library(tidyverse)
## -- Attaching packages ------------------------------ tidyverse 1.2.1 --
## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts --------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
#Countries_Confirmed <- read_excel(Countries-Confirmed_Harvard.xlsx)
#setwd("COVID Indices/Countries-Confirmed_Harvard.xlsx")
Countries_Confirmed <- read_excel("C:/Users/ramya.emandi/Desktop/Econ Policy/COVID Indices/Countries-Confirmed_Harvard.xlsx")
rownames(Countries_Confirmed) <- c(rownames(Countries_Confirmed$CNTRY_NAME))
colnames(Countries_Confirmed) <- c(as.Date(colnames(Countries_Confirmed), format = "%m%d"))
colnames(Countries_Confirmed)[1] <- "CNTRY_NAME"
View(Countries_Confirmed)

knitr::opts_chunk$set(echo = TRUE)

Setting the data for calculations

power <- rowSums(Countries_Confirmed != 0)
powerA <- power - 1
#power <- apply(Countries_Confirmed, 1, function(y) sum(y!=0))
nonzero <- apply(Countries_Confirmed[,2:117], 1, function(x) (x[x>0])[1])
nonzeroA <- replace_na(nonzero,0)

CAGR calculations

View(Countries_Confirmed[,124])
AGR <- Countries_Confirmed[,124]/ nonzeroA
#View(AGR)
CAGR <- (AGR ^ (1/powerA)) - 1
CAGRpc <- round(CAGR * 100, digits = 2)
INDEX <- cbind(Countries_Confirmed$CNTRY_NAME, CAGRpc)
#Countries_Confirmed$CAGRpc <- paste(Countries_Confirmed$CNTRY_NAME,CAGRpc)
#View(Countries_Confirmed$CAGRpc)
#View(CAGRpc)
View(INDEX)

write.csv(INDEX, file = "index.csv")

Loading HDR independent variables

HDR <- read_excel("C:/Users/ramya.emandi/Desktop/Interesting Data/HDR2018/HDR2018_IndVar.xlsx")
#names(GII) <- GII[1,]
#colnames(GII)[1] <- "GII Rank 2018"
#GII <- GII[-1,]
View(HDR)
is.numeric(HDR$Pop_Den)
## [1] TRUE
is.numeric(HDR$Strict)
## [1] TRUE

Merge the datasets, COVID positive cases and the HDR parameters

DepInd <- merge(x = INDEX, y = HDR, by.x="Countries_Confirmed$CNTRY_NAME", by.y="Country", all.x=TRUE)
colnames(DepInd)[2] <- "CAGR"
#View(DepInd)
nan <- is.nan(DepInd[,2])
CleanDepInd <- DepInd[!nan,]
View(CleanDepInd)
CleanDepInd$GII <- as.numeric(as.character(CleanDepInd$GII))
## Warning: NAs introduced by coercion
CleanDepInd$HDI <- as.numeric(as.character(CleanDepInd$HDI))
CleanDepInd$IHDI <- as.numeric(as.character(CleanDepInd$IHDI))
## Warning: NAs introduced by coercion
CleanDepInd$MPI <- as.numeric(as.character(CleanDepInd$MPI))
## Warning: NAs introduced by coercion
CleanDepInd$HealthExp <- as.numeric(as.character(CleanDepInd$HealthExp))
## Warning: NAs introduced by coercion
CleanDepInd$LEI <- as.numeric(as.character(CleanDepInd$LEI))
CleanDepInd$GNIperCapita <- as.numeric(as.character(CleanDepInd$GNIperCapita))

Linear Regression to HDR 2018 (on trail basis)

TrailIndex <- lm(formula = CAGR ~ GII + HDI + IHDI + MPI + HealthExp + LEI + GNIperCapita + Pop_Den + Strict,  data = CleanDepInd)
print(summary(TrailIndex))
## 
## Call:
## lm(formula = CAGR ~ GII + HDI + IHDI + MPI + HealthExp + LEI + 
##     GNIperCapita + Pop_Den + Strict, data = CleanDepInd)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.911 -1.339  0.186  1.670  7.352 
## 
## Coefficients: (1 not defined because of singularities)
##                Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  -1.082e+00  4.797e+00  -0.226  0.82206   
## GII           3.945e+00  4.140e+00   0.953  0.34297   
## HDI           7.833e+00  9.482e+00   0.826  0.41076   
## IHDI         -9.272e+00  8.578e+00  -1.081  0.28240   
## MPI                  NA         NA      NA       NA   
## HealthExp     9.306e-02  1.090e-01   0.854  0.39512   
## LEI           4.177e+00  5.623e+00   0.743  0.45940   
## GNIperCapita  7.318e-05  3.261e-05   2.244  0.02706 * 
## Pop_Den      -6.145e-04  3.859e-04  -1.592  0.11453   
## Strict        5.278e-02  1.764e-02   2.992  0.00351 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.494 on 98 degrees of freedom
##   (75 observations deleted due to missingness)
## Multiple R-squared:  0.1723, Adjusted R-squared:  0.1047 
## F-statistic: 2.549 on 8 and 98 DF,  p-value: 0.01443
#eliminating MPI due to NAs
#TrailIndex <- lm(formula = CAGR ~ GII + HDI + IHDI + HealthExp + LEI + GNIperCapita,  data = CleanDepInd)
#print(summary(TrailIndex))
#Trailand Error
TrailIndex <- lm(formula = CAGR ~ Pop_Den + Strict,  data = CleanDepInd)
print(summary(TrailIndex))
## 
## Call:
## lm(formula = CAGR ~ Pop_Den + Strict, data = CleanDepInd)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.924 -1.509  0.101  1.871  7.817 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.5100259  1.2672604   4.348 2.81e-05 ***
## Pop_Den     -0.0001196  0.0003273  -0.365  0.71544    
## Strict       0.0535721  0.0161765   3.312  0.00121 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.688 on 126 degrees of freedom
##   (53 observations deleted due to missingness)
## Multiple R-squared:  0.08027,    Adjusted R-squared:  0.06567 
## F-statistic: 5.498 on 2 and 126 DF,  p-value: 0.005137
plot(CleanDepInd$Strict, CleanDepInd$CAGR)

author: “Ramya Emandi”