Loading the COVID data: Number of positive cases in each country on each day from 22 January to 16 May 2020
Setting the data for calculations
power <- rowSums(Countries_Confirmed != 0)
powerA <- power - 1
#power <- apply(Countries_Confirmed, 1, function(y) sum(y!=0))
nonzero <- apply(Countries_Confirmed[,2:117], 1, function(x) (x[x>0])[1])
nonzeroA <- replace_na(nonzero,0)
CAGR calculations
#View(Countries_Confirmed[,117])
AGR <- Countries_Confirmed[,117]/ nonzeroA
#View(AGR)
CAGR <- (AGR ^ (1/powerA)) - 1
CAGRpc <- round(CAGR * 100, digits = 2)
INDEX <- cbind(Countries_Confirmed$CNTRY_NAME, CAGRpc)
#Countries_Confirmed$CAGRpc <- paste(Countries_Confirmed$CNTRY_NAME,CAGRpc)
#View(Countries_Confirmed$CAGRpc)
#View(CAGRpc)
View(INDEX)
write.csv(INDEX, file = "index.csv")
Loading HDR independent variables
HDR <- read_excel("C:/Users/ramya.emandi/Desktop/Interesting Data/HDR2018/HDR2018_IndVar.xlsx")
#names(GII) <- GII[1,]
#colnames(GII)[1] <- "GII Rank 2018"
#GII <- GII[-1,]
View(HDR)
Merge the datasets, COVID positive cases and the HDR parameters
DepInd <- merge(x = INDEX, y = HDR, by.x="Countries_Confirmed$CNTRY_NAME", by.y="Country", all.x=TRUE)
colnames(DepInd)[2] <- "CAGR"
#View(DepInd)
nan <- is.nan(DepInd[,2])
CleanDepInd <- DepInd[!nan,]
View(CleanDepInd)
CleanDepInd$GII <- as.numeric(as.character(CleanDepInd$GII))
## Warning: NAs introduced by coercion
CleanDepInd$HDI <- as.numeric(as.character(CleanDepInd$HDI))
CleanDepInd$IHDI <- as.numeric(as.character(CleanDepInd$IHDI))
## Warning: NAs introduced by coercion
CleanDepInd$MPI <- as.numeric(as.character(CleanDepInd$MPI))
## Warning: NAs introduced by coercion
CleanDepInd$HealthExp <- as.numeric(as.character(CleanDepInd$HealthExp))
## Warning: NAs introduced by coercion
CleanDepInd$LEI <- as.numeric(as.character(CleanDepInd$LEI))
CleanDepInd$GNIperCapita <- as.numeric(as.character(CleanDepInd$GNIperCapita))
Linear Regression to HDR 2018 (on trail basis)
TrailIndex <- lm(formula = CAGR ~ GII + HDI + IHDI + MPI + HealthExp + LEI + GNIperCapita, data = CleanDepInd)
print(summary(TrailIndex))
##
## Call:
## lm(formula = CAGR ~ GII + HDI + IHDI + MPI + HealthExp + LEI +
## GNIperCapita, data = CleanDepInd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.6607 -1.3989 0.1399 1.7911 20.1346
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.814e+00 6.183e+00 -0.293 0.770
## GII 7.865e+00 5.104e+00 1.541 0.126
## HDI 1.141e+00 1.195e+01 0.095 0.924
## IHDI 4.103e+00 1.036e+01 0.396 0.693
## MPI NA NA NA NA
## HealthExp 9.455e-02 1.395e-01 0.678 0.499
## LEI 6.431e+00 7.192e+00 0.894 0.373
## GNIperCapita 1.549e-05 3.589e-05 0.432 0.667
##
## Residual standard error: 3.492 on 115 degrees of freedom
## (60 observations deleted due to missingness)
## Multiple R-squared: 0.0484, Adjusted R-squared: -0.001247
## F-statistic: 0.9749 on 6 and 115 DF, p-value: 0.4455
#eliminating MPI due to NAs
TrailIndex <- lm(formula = CAGR ~ GII + HDI + IHDI + HealthExp + LEI + GNIperCapita, data = CleanDepInd)
print(summary(TrailIndex))
##
## Call:
## lm(formula = CAGR ~ GII + HDI + IHDI + HealthExp + LEI + GNIperCapita,
## data = CleanDepInd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.6607 -1.3989 0.1399 1.7911 20.1346
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.814e+00 6.183e+00 -0.293 0.770
## GII 7.865e+00 5.104e+00 1.541 0.126
## HDI 1.141e+00 1.195e+01 0.095 0.924
## IHDI 4.103e+00 1.036e+01 0.396 0.693
## HealthExp 9.455e-02 1.395e-01 0.678 0.499
## LEI 6.431e+00 7.192e+00 0.894 0.373
## GNIperCapita 1.549e-05 3.589e-05 0.432 0.667
##
## Residual standard error: 3.492 on 115 degrees of freedom
## (60 observations deleted due to missingness)
## Multiple R-squared: 0.0484, Adjusted R-squared: -0.001247
## F-statistic: 0.9749 on 6 and 115 DF, p-value: 0.4455
Visualisations
plot(CleanDepInd$GII, CleanDepInd$CAGR)
plot(CleanDepInd$HDI, CleanDepInd$CAGR)
plot(CleanDepInd$IHDI, CleanDepInd$CAGR)
plot(CleanDepInd$MPI, CleanDepInd$CAGR)
plot(CleanDepInd$HealthExp, CleanDepInd$CAGR)
plot(CleanDepInd$LEI, CleanDepInd$CAGR)
plot(CleanDepInd$GNIperCapita, CleanDepInd$CAGR)
Trail (Visuals kind of give a feeling that the HealthExp and GNIperCapita might be significant)
#reconfirming if HealthExp and GNIperCapita are having significant impact
Viz <- lm(formula = CAGR ~ HealthExp + GNIperCapita, data = CleanDepInd)
print(summary(Viz))
##
## Call:
## lm(formula = CAGR ~ HealthExp + GNIperCapita, data = CleanDepInd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.1191 -2.0175 0.0518 2.2421 20.1790
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.858e+00 7.908e-01 11.201 <2e-16 ***
## HealthExp 1.479e-01 1.125e-01 1.314 0.191
## GNIperCapita 1.953e-05 1.539e-05 1.269 0.206
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.512 on 147 degrees of freedom
## (32 observations deleted due to missingness)
## Multiple R-squared: 0.0299, Adjusted R-squared: 0.0167
## F-statistic: 2.265 on 2 and 147 DF, p-value: 0.1074
plot(Viz)