Loading the COVID data: Number of positive cases in each country on each day from 22 January to 16 May 2020

Setting the data for calculations

power <- rowSums(Countries_Confirmed != 0)
powerA <- power - 1
#power <- apply(Countries_Confirmed, 1, function(y) sum(y!=0))
nonzero <- apply(Countries_Confirmed[,2:117], 1, function(x) (x[x>0])[1])
nonzeroA <- replace_na(nonzero,0)

CAGR calculations

#View(Countries_Confirmed[,117])
AGR <- Countries_Confirmed[,117]/ nonzeroA
#View(AGR)
CAGR <- (AGR ^ (1/powerA)) - 1
CAGRpc <- round(CAGR * 100, digits = 2)
INDEX <- cbind(Countries_Confirmed$CNTRY_NAME, CAGRpc)
#Countries_Confirmed$CAGRpc <- paste(Countries_Confirmed$CNTRY_NAME,CAGRpc)
#View(Countries_Confirmed$CAGRpc)
#View(CAGRpc)
View(INDEX)

write.csv(INDEX, file = "index.csv")

Loading HDR independent variables

HDR <- read_excel("C:/Users/ramya.emandi/Desktop/Interesting Data/HDR2018/HDR2018_IndVar.xlsx")
#names(GII) <- GII[1,]
#colnames(GII)[1] <- "GII Rank 2018"
#GII <- GII[-1,]
View(HDR)

Merge the datasets, COVID positive cases and the HDR parameters

DepInd <- merge(x = INDEX, y = HDR, by.x="Countries_Confirmed$CNTRY_NAME", by.y="Country", all.x=TRUE)
colnames(DepInd)[2] <- "CAGR"
#View(DepInd)
nan <- is.nan(DepInd[,2])
CleanDepInd <- DepInd[!nan,]
View(CleanDepInd)
CleanDepInd$GII <- as.numeric(as.character(CleanDepInd$GII))
## Warning: NAs introduced by coercion
CleanDepInd$HDI <- as.numeric(as.character(CleanDepInd$HDI))
CleanDepInd$IHDI <- as.numeric(as.character(CleanDepInd$IHDI))
## Warning: NAs introduced by coercion
CleanDepInd$MPI <- as.numeric(as.character(CleanDepInd$MPI))
## Warning: NAs introduced by coercion
CleanDepInd$HealthExp <- as.numeric(as.character(CleanDepInd$HealthExp))
## Warning: NAs introduced by coercion
CleanDepInd$LEI <- as.numeric(as.character(CleanDepInd$LEI))
CleanDepInd$GNIperCapita <- as.numeric(as.character(CleanDepInd$GNIperCapita))

Linear Regression to HDR 2018 (on trail basis)

TrailIndex <- lm(formula = CAGR ~ GII + HDI + IHDI + MPI + HealthExp + LEI + GNIperCapita,  data = CleanDepInd)
print(summary(TrailIndex))
## 
## Call:
## lm(formula = CAGR ~ GII + HDI + IHDI + MPI + HealthExp + LEI + 
##     GNIperCapita, data = CleanDepInd)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.6607 -1.3989  0.1399  1.7911 20.1346 
## 
## Coefficients: (1 not defined because of singularities)
##                Estimate Std. Error t value Pr(>|t|)
## (Intercept)  -1.814e+00  6.183e+00  -0.293    0.770
## GII           7.865e+00  5.104e+00   1.541    0.126
## HDI           1.141e+00  1.195e+01   0.095    0.924
## IHDI          4.103e+00  1.036e+01   0.396    0.693
## MPI                  NA         NA      NA       NA
## HealthExp     9.455e-02  1.395e-01   0.678    0.499
## LEI           6.431e+00  7.192e+00   0.894    0.373
## GNIperCapita  1.549e-05  3.589e-05   0.432    0.667
## 
## Residual standard error: 3.492 on 115 degrees of freedom
##   (60 observations deleted due to missingness)
## Multiple R-squared:  0.0484, Adjusted R-squared:  -0.001247 
## F-statistic: 0.9749 on 6 and 115 DF,  p-value: 0.4455
#eliminating MPI due to NAs
TrailIndex <- lm(formula = CAGR ~ GII + HDI + IHDI + HealthExp + LEI + GNIperCapita,  data = CleanDepInd)
print(summary(TrailIndex))
## 
## Call:
## lm(formula = CAGR ~ GII + HDI + IHDI + HealthExp + LEI + GNIperCapita, 
##     data = CleanDepInd)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.6607 -1.3989  0.1399  1.7911 20.1346 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)
## (Intercept)  -1.814e+00  6.183e+00  -0.293    0.770
## GII           7.865e+00  5.104e+00   1.541    0.126
## HDI           1.141e+00  1.195e+01   0.095    0.924
## IHDI          4.103e+00  1.036e+01   0.396    0.693
## HealthExp     9.455e-02  1.395e-01   0.678    0.499
## LEI           6.431e+00  7.192e+00   0.894    0.373
## GNIperCapita  1.549e-05  3.589e-05   0.432    0.667
## 
## Residual standard error: 3.492 on 115 degrees of freedom
##   (60 observations deleted due to missingness)
## Multiple R-squared:  0.0484, Adjusted R-squared:  -0.001247 
## F-statistic: 0.9749 on 6 and 115 DF,  p-value: 0.4455

Visualisations

plot(CleanDepInd$GII, CleanDepInd$CAGR)

plot(CleanDepInd$HDI, CleanDepInd$CAGR)

plot(CleanDepInd$IHDI, CleanDepInd$CAGR)

plot(CleanDepInd$MPI, CleanDepInd$CAGR)

plot(CleanDepInd$HealthExp, CleanDepInd$CAGR)

plot(CleanDepInd$LEI, CleanDepInd$CAGR)

plot(CleanDepInd$GNIperCapita, CleanDepInd$CAGR)

Trail (Visuals kind of give a feeling that the HealthExp and GNIperCapita might be significant)

#reconfirming if HealthExp and GNIperCapita are having significant impact
Viz <- lm(formula = CAGR ~ HealthExp + GNIperCapita,  data = CleanDepInd)
print(summary(Viz))
## 
## Call:
## lm(formula = CAGR ~ HealthExp + GNIperCapita, data = CleanDepInd)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.1191  -2.0175   0.0518   2.2421  20.1790 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  8.858e+00  7.908e-01  11.201   <2e-16 ***
## HealthExp    1.479e-01  1.125e-01   1.314    0.191    
## GNIperCapita 1.953e-05  1.539e-05   1.269    0.206    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.512 on 147 degrees of freedom
##   (32 observations deleted due to missingness)
## Multiple R-squared:  0.0299, Adjusted R-squared:  0.0167 
## F-statistic: 2.265 on 2 and 147 DF,  p-value: 0.1074
plot(Viz)

author: “Ramya Emandi”