Loading the COVID data: Number of positive cases in each country on each day from 22 January to 16 May 2020
library(readxl)
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(rsconnect)
## Warning: package 'rsconnect' was built under R version 3.6.3
library(tidyverse)
## -- Attaching packages ------------------------------ tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.3
## v tibble 2.1.3 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts --------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
#Countries_Confirmed <- read_excel(Countries-Confirmed_Harvard.xlsx)
#setwd("COVID Indices/Countries-Confirmed_Harvard.xlsx")
Countries_Confirmed <- read_excel("C:/Users/ramya.emandi/Desktop/Econ Policy/COVID Indices/Countries-Confirmed_Harvard.xlsx")
rownames(Countries_Confirmed) <- c(rownames(Countries_Confirmed$CNTRY_NAME))
colnames(Countries_Confirmed) <- c(as.Date(colnames(Countries_Confirmed), format = "%m%d"))
colnames(Countries_Confirmed)[1] <- "CNTRY_NAME"
View(Countries_Confirmed)
knitr::opts_chunk$set(echo = TRUE)
Setting the data for calculations
power <- rowSums(Countries_Confirmed != 0)
powerA <- power - 1
#power <- apply(Countries_Confirmed, 1, function(y) sum(y!=0))
nonzero <- apply(Countries_Confirmed[,2:117], 1, function(x) (x[x>0])[1])
nonzeroA <- replace_na(nonzero,0)
CAGR calculations
View(Countries_Confirmed[,124])
AGR <- Countries_Confirmed[,124]/ nonzeroA
#View(AGR)
CAGR <- (AGR ^ (1/powerA)) - 1
CAGRpc <- round(CAGR * 100, digits = 2)
INDEX <- cbind(Countries_Confirmed$CNTRY_NAME, CAGRpc)
#Countries_Confirmed$CAGRpc <- paste(Countries_Confirmed$CNTRY_NAME,CAGRpc)
#View(Countries_Confirmed$CAGRpc)
#View(CAGRpc)
View(INDEX)
write.csv(INDEX, file = "index.csv")
Loading HDR independent variables
HDR <- read_excel("C:/Users/ramya.emandi/Desktop/Interesting Data/HDR2018/HDR2018_IndVar.xlsx")
#names(GII) <- GII[1,]
#colnames(GII)[1] <- "GII Rank 2018"
#GII <- GII[-1,]
View(HDR)
is.numeric(HDR$Pop_Den)
## [1] TRUE
is.numeric(HDR$Strict)
## [1] TRUE
Merge the datasets, COVID positive cases and the HDR parameters
DepInd <- merge(x = INDEX, y = HDR, by.x="Countries_Confirmed$CNTRY_NAME", by.y="Country", all.x=TRUE)
colnames(DepInd)[2] <- "CAGR"
#View(DepInd)
nan <- is.nan(DepInd[,2])
CleanDepInd <- DepInd[!nan,]
View(CleanDepInd)
CleanDepInd$GII <- as.numeric(as.character(CleanDepInd$GII))
## Warning: NAs introduced by coercion
CleanDepInd$HDI <- as.numeric(as.character(CleanDepInd$HDI))
CleanDepInd$IHDI <- as.numeric(as.character(CleanDepInd$IHDI))
## Warning: NAs introduced by coercion
CleanDepInd$MPI <- as.numeric(as.character(CleanDepInd$MPI))
## Warning: NAs introduced by coercion
CleanDepInd$HealthExp <- as.numeric(as.character(CleanDepInd$HealthExp))
## Warning: NAs introduced by coercion
CleanDepInd$LEI <- as.numeric(as.character(CleanDepInd$LEI))
CleanDepInd$GNIperCapita <- as.numeric(as.character(CleanDepInd$GNIperCapita))
Linear Regression to HDR 2018 (on trail basis)
TrailIndex <- lm(formula = CAGR ~ GII + HDI + IHDI + MPI + HealthExp + LEI + GNIperCapita + Pop_Den + Strict, data = CleanDepInd)
print(summary(TrailIndex))
##
## Call:
## lm(formula = CAGR ~ GII + HDI + IHDI + MPI + HealthExp + LEI +
## GNIperCapita + Pop_Den + Strict, data = CleanDepInd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.911 -1.339 0.186 1.670 7.352
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.082e+00 4.797e+00 -0.226 0.82206
## GII 3.945e+00 4.140e+00 0.953 0.34297
## HDI 7.833e+00 9.482e+00 0.826 0.41076
## IHDI -9.272e+00 8.578e+00 -1.081 0.28240
## MPI NA NA NA NA
## HealthExp 9.306e-02 1.090e-01 0.854 0.39512
## LEI 4.177e+00 5.623e+00 0.743 0.45940
## GNIperCapita 7.318e-05 3.261e-05 2.244 0.02706 *
## Pop_Den -6.145e-04 3.859e-04 -1.592 0.11453
## Strict 5.278e-02 1.764e-02 2.992 0.00351 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.494 on 98 degrees of freedom
## (75 observations deleted due to missingness)
## Multiple R-squared: 0.1723, Adjusted R-squared: 0.1047
## F-statistic: 2.549 on 8 and 98 DF, p-value: 0.01443
#eliminating MPI due to NAs
#TrailIndex <- lm(formula = CAGR ~ GII + HDI + IHDI + HealthExp + LEI + GNIperCapita, data = CleanDepInd)
#print(summary(TrailIndex))
#Trailand Error
TrailIndex <- lm(formula = CAGR ~ Pop_Den + Strict, data = CleanDepInd)
print(summary(TrailIndex))
##
## Call:
## lm(formula = CAGR ~ Pop_Den + Strict, data = CleanDepInd)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.924 -1.509 0.101 1.871 7.817
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.5100259 1.2672604 4.348 2.81e-05 ***
## Pop_Den -0.0001196 0.0003273 -0.365 0.71544
## Strict 0.0535721 0.0161765 3.312 0.00121 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.688 on 126 degrees of freedom
## (53 observations deleted due to missingness)
## Multiple R-squared: 0.08027, Adjusted R-squared: 0.06567
## F-statistic: 5.498 on 2 and 126 DF, p-value: 0.005137
plot(CleanDepInd$Strict, CleanDepInd$CAGR)