library(stringr)
library(dplyr)
library(covid19.analytics)
library(lubridate)
library(leaps)
library(zoo)
library(glmnet)
library(caTools)
library(olsrr)
options(max.print=999999)
ag <- covid19.data(case = 'aggregated')
## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#ag$LU_year <- substr(ag$Last_Update, 1, 4)
#ag$LU_year <- as.numeric(ag$LU_year)
#ag$LU_month <- substr(ag$Last_Update, 6, 7)
#ag$LU_month <- as.numeric(ag$LU_month)
#ag$LU_day <- substr(ag$Last_Update, 9, 10)
#ag$LU_day <- as.numeric(ag$LU_day)
ag$Last_Update <- substr(ag$Last_Update, 1, 10)
ag$Last_Update <- ymd(ag$Last_Update)
ag <- subset(ag, select = -c(Active, Recovered, FIPS, Admin2, Province_State, Lat, Long_, Combined_Key))
ag <- na.omit(ag)
rownames(ag) <- 1:nrow(ag)
set.seed(1) # using a randomly generated seed helps to reproduce the results
train = sample(3912, 1956) # Randomly sample 180 items from 260 items
train.dat = ag[train,]
test.dat = ag[-train, ]
model1 = glm(Deaths ~ ., data=ag)
results <- summary(model1)
pvals <- data.frame(results$coefficients)
pvals <- filter(pvals, pvals$Pr...t.. < 0.05)
print(rownames(pvals))
## [1] "Country_RegionArgentina" "Country_RegionAustria"
## [3] "Country_RegionBelarus" "Country_RegionBelgium"
## [5] "Country_RegionBrazil" "Country_RegionBulgaria"
## [7] "Country_RegionCuba" "Country_RegionCzechia"
## [9] "Country_RegionEcuador" "Country_RegionEgypt"
## [11] "Country_RegionHungary" "Country_RegionIndia"
## [13] "Country_RegionIndonesia" "Country_RegionIran"
## [15] "Country_RegionIraq" "Country_RegionIreland"
## [17] "Country_RegionIsrael" "Country_RegionJordan"
## [19] "Country_RegionKorea, South" "Country_RegionKuwait"
## [21] "Country_RegionMongolia" "Country_RegionNetherlands"
## [23] "Country_RegionNorway" "Country_RegionPoland"
## [25] "Country_RegionQatar" "Country_RegionRomania"
## [27] "Country_RegionSerbia" "Country_RegionSingapore"
## [29] "Country_RegionSlovakia" "Country_RegionSouth Africa"
## [31] "Country_RegionSwitzerland" "Country_RegionThailand"
## [33] "Country_RegionTunisia" "Country_RegionTurkey"
## [35] "Country_RegionUnited Arab Emirates" "Country_RegionUnited Kingdom"
## [37] "Country_RegionVanuatu" "Country_RegionYemen"
## [39] "Confirmed" "Incident_Rate"
## [41] "Case_Fatality_Ratio"
x = model.matrix(Deaths ~ ., ag)[,-1] # model.matrix()[,-1] is for taking off the intercept
# x[1:3, ]
y = ag$Deaths # create the response vector
set.seed(1) # using a randomly generated seed helps to reproduce the results
train = sample(3900, 1950) # Randomly sample 180 items from 260 items
test = (-train)
ridge.mod = glmnet(x[train,], y[train], alpha = 0) # fit ridge regression on training data
cv.out = cv.glmnet(x[train,], y[train], alpha = 0) # run cross validation to find
# the best lambda
plot(cv.out)

names(cv.out)
## [1] "lambda" "cvm" "cvsd" "cvup" "cvlo"
## [6] "nzero" "call" "name" "glmnet.fit" "lambda.min"
## [11] "lambda.1se" "index"
bestlam = cv.out$lambda.min
bestlam
## [1] 533.5884
ridge.mod = glmnet(x[train,], y[train], alpha = 0,lambda = bestlam)
ridge.pred = predict(ridge.mod, s = bestlam, newx = x[test,])
y.test = y[test] # response vector in the test data
mean((ridge.pred - y.test)^2)
## [1] 12058122
ridge.out = glmnet(x, y, alpha =0)
ridge_results <- predict(ridge.out, type = "coefficients", s = bestlam)#[1:20,]
ridge_results
## 197 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 4.333235e+02
## Country_RegionAlbania -6.473607e+02
## Country_RegionAlgeria 1.428245e+03
## Country_RegionAndorra -4.195786e+02
## Country_RegionAngola -5.666872e+02
## Country_RegionAntigua and Barbuda -1.235069e+03
## Country_RegionArgentina 3.494963e+04
## Country_RegionArmenia 1.545424e+03
## Country_RegionAustralia -9.365311e+02
## Country_RegionAustria -4.793257e+03
## Country_RegionAzerbaijan -1.391276e+03
## Country_RegionBahamas -1.011100e+03
## Country_RegionBahrain -2.759183e+03
## Country_RegionBangladesh 3.708282e+03
## Country_RegionBarbados -7.062082e+02
## Country_RegionBelarus -4.669908e+03
## Country_RegionBelgium -2.405100e+03
## Country_RegionBelize -8.005562e+02
## Country_RegionBenin -7.977208e+02
## Country_RegionBhutan -4.652817e+02
## Country_RegionBolivia 8.892049e+03
## Country_RegionBosnia and Herzegovina 6.224604e+03
## Country_RegionBotswana -1.081047e+03
## Country_RegionBrazil 8.958854e+03
## Country_RegionBrunei -6.792721e+02
## Country_RegionBulgaria 1.570539e+04
## Country_RegionBurkina Faso -9.464592e+02
## Country_RegionBurma 8.977676e+03
## Country_RegionBurundi -6.979051e+02
## Country_RegionCabo Verde -7.981369e+02
## Country_RegionCambodia -9.682058e+01
## Country_RegionCameroon -7.309919e+02
## Country_RegionCanada -4.963204e+02
## Country_RegionCentral African Republic -7.488758e+02
## Country_RegionChad -1.375407e+03
## Country_RegionChile -1.100305e+02
## Country_RegionChina -5.421366e+02
## Country_RegionColombia 3.072886e+02
## Country_RegionComoros -1.429785e+03
## Country_RegionCongo (Brazzaville) -9.518025e+02
## Country_RegionCongo (Kinshasa) -7.972348e+02
## Country_RegionCosta Rica -1.514858e+03
## Country_RegionCote d'Ivoire -9.556805e+02
## Country_RegionCroatia 1.160945e+03
## Country_RegionCuba -5.773725e+03
## Country_RegionCyprus -1.671597e+03
## Country_RegionCzechia 3.744677e+02
## Country_RegionDenmark -1.838348e+03
## Country_RegionDjibouti -8.551705e+02
## Country_RegionDominica -5.538017e+02
## Country_RegionDominican Republic -2.274801e+03
## Country_RegionEcuador 2.121117e+04
## Country_RegionEgypt 1.194332e+04
## Country_RegionEl Salvador 4.465234e+02
## Country_RegionEquatorial Guinea -8.387202e+02
## Country_RegionEritrea -7.170113e+02
## Country_RegionEstonia -1.762069e+03
## Country_RegionEswatini -7.060255e+02
## Country_RegionEthiopia 2.786395e+02
## Country_RegionFiji -8.222379e+02
## Country_RegionFinland -1.929726e+03
## Country_RegionFrance -3.757528e+02
## Country_RegionGabon -8.691476e+02
## Country_RegionGambia -1.362373e+03
## Country_RegionGeorgia -7.377633e+02
## Country_RegionGermany -1.459496e+02
## Country_RegionGhana -1.329338e+03
## Country_RegionGreece 3.325742e+03
## Country_RegionGrenada -1.357402e+03
## Country_RegionGuatemala 5.205753e+03
## Country_RegionGuinea -8.736889e+02
## Country_RegionGuinea-Bissau -1.120949e+03
## Country_RegionGuyana -7.999836e+02
## Country_RegionHaiti -1.032381e+03
## Country_RegionHoly See -3.524291e+02
## Country_RegionHonduras 3.249609e+03
## Country_RegionHungary 1.615600e+04
## Country_RegionIceland -6.019506e+02
## Country_RegionIndia -1.514834e+03
## Country_RegionIndonesia 7.386848e+04
## Country_RegionIran 3.684671e+04
## Country_RegionIraq -6.540821e+03
## Country_RegionIreland -3.335865e+03
## Country_RegionIsrael -1.080078e+04
## Country_RegionItaly 1.487526e+03
## Country_RegionJamaica -2.467967e+02
## Country_RegionJapan -8.722357e+02
## Country_RegionJordan -2.981327e+03
## Country_RegionKazakhstan 1.473601e+03
## Country_RegionKenya 4.005262e+02
## Country_RegionKiribati -3.995272e+02
## Country_RegionKorea, South -3.493541e+03
## Country_RegionKosovo -3.020623e+02
## Country_RegionKuwait -3.709780e+03
## Country_RegionKyrgyzstan -7.767652e+02
## Country_RegionLaos -1.368420e+03
## Country_RegionLatvia -2.593289e+02
## Country_RegionLebanon -1.695119e+03
## Country_RegionLesotho -1.087534e+03
## Country_RegionLiberia -1.866659e+03
## Country_RegionLibya -7.605352e+02
## Country_RegionLiechtenstein -6.454094e+02
## Country_RegionLithuania -7.336414e+02
## Country_RegionLuxembourg -9.249087e+02
## Country_RegionMadagascar -8.308410e+02
## Country_RegionMalawi -3.531557e+02
## Country_RegionMalaysia -1.122321e+03
## Country_RegionMaldives -1.247438e+03
## Country_RegionMali -1.219741e+03
## Country_RegionMalta -7.672785e+02
## Country_RegionMarshall Islands -3.994692e+02
## Country_RegionMauritania -8.562764e+02
## Country_RegionMauritius -8.136533e+02
## Country_RegionMexico 3.663135e+03
## Country_RegionMicronesia -3.995100e+02
## Country_RegionMoldova 2.498567e+03
## Country_RegionMonaco -5.831115e+02
## Country_RegionMongolia -3.671874e+03
## Country_RegionMontenegro -5.074043e+02
## Country_RegionMorocco 2.268023e+01
## Country_RegionMozambique -1.066270e+03
## Country_RegionNamibia 2.983504e+02
## Country_RegionNepal -1.198522e+03
## Country_RegionNetherlands -1.645368e+03
## Country_RegionNew Zealand -5.198587e+02
## Country_RegionNicaragua -8.441993e+02
## Country_RegionNiger -1.504824e+03
## Country_RegionNigeria -9.914381e+02
## Country_RegionNorth Macedonia 2.709183e+03
## Country_RegionNorway -3.368669e+03
## Country_RegionOman -1.051308e+03
## Country_RegionPakistan 2.879990e+02
## Country_RegionPalau -3.989706e+02
## Country_RegionPanama -3.731601e+02
## Country_RegionPapua New Guinea -8.828806e+02
## Country_RegionParaguay 7.457179e+03
## Country_RegionPeru 3.128330e+03
## Country_RegionPhilippines 6.720179e+03
## Country_RegionPoland 2.875230e+04
## Country_RegionPortugal 6.610868e+02
## Country_RegionQatar -3.060562e+03
## Country_RegionRomania 2.740320e+04
## Country_RegionRussia 2.282341e+02
## Country_RegionRwanda -9.431622e+02
## Country_RegionSaint Kitts and Nevis -6.735095e+02
## Country_RegionSaint Lucia -9.411675e+02
## Country_RegionSaint Vincent and the Grenadines -7.850272e+02
## Country_RegionSamoa -3.994829e+02
## Country_RegionSan Marino -6.338087e+02
## Country_RegionSao Tome and Principe -8.777078e+02
## Country_RegionSaudi Arabia -1.233515e+02
## Country_RegionSenegal -5.040994e+02
## Country_RegionSerbia -6.382893e+03
## Country_RegionSeychelles -4.363042e+02
## Country_RegionSierra Leone -1.007430e+03
## Country_RegionSingapore -3.327357e+03
## Country_RegionSlovakia -3.286084e+03
## Country_RegionSlovenia -1.385667e+03
## Country_RegionSolomon Islands -3.996848e+02
## Country_RegionSomalia -1.422874e+03
## Country_RegionSouth Africa 4.066300e+04
## Country_RegionSouth Sudan -7.951064e+02
## Country_RegionSpain -2.212706e+02
## Country_RegionSri Lanka 4.515850e+03
## Country_RegionSudan -4.829913e+02
## Country_RegionSuriname -6.508961e+02
## Country_RegionSweden -7.440733e+02
## Country_RegionSwitzerland -4.250408e+03
## Country_RegionSyria -4.057114e+02
## Country_RegionTaiwan* -1.550299e+03
## Country_RegionTajikistan -7.553057e+02
## Country_RegionTanzania -1.012439e+03
## Country_RegionThailand -1.003858e+04
## Country_RegionTimor-Leste -7.370471e+02
## Country_RegionTogo -8.328101e+02
## Country_RegionTonga -3.994442e+02
## Country_RegionTrinidad and Tobago -2.381305e+02
## Country_RegionTunisia 1.225071e+04
## Country_RegionTurkey -4.793532e+04
## Country_RegionUganda 3.646120e+01
## Country_RegionUkraine 2.330752e+02
## Country_RegionUnited Arab Emirates -8.310415e+03
## Country_RegionUnited Kingdom -1.037434e+03
## Country_RegionUruguay -4.622441e+02
## Country_RegionUS -7.904586e+02
## Country_RegionUzbekistan -1.921464e+03
## Country_RegionVanuatu -5.996960e+03
## Country_RegionVenezuela -1.802946e+03
## Country_RegionVietnam 5.644356e+03
## Country_RegionWest Bank and Gaza -2.355497e+03
## Country_RegionYemen -5.276911e+03
## Country_RegionZambia -4.019867e+02
## Country_RegionZimbabwe 9.386968e+02
## Last_Update .
## Confirmed 1.451517e-02
## Incident_Rate -1.557711e-02
## Case_Fatality_Ratio 3.660769e+02
set.seed(1)
lasso.mod = glmnet(x[train,], y[train], alpha = 1)
cv.out = cv.glmnet(x[train,], y[train], alpha = 1)
plot(cv.out)

bestlam = cv.out$lambda.min
bestlam
## [1] 38.5294
lasso.mod = glmnet(x[train,], y[train], alpha = 1,lambda = bestlam)
lasso.pred = predict(lasso.mod, s = bestlam, newx = x[test ,])
mean((lasso.pred - y.test)^2)
## [1] 16412313
out = glmnet(x, y, alpha = 1)
plot(out)

lasso.coef = predict(out, type ="coefficients", s = bestlam)#[1:20,]
lasso.coef
## 197 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) -5.136699e+02
## Country_RegionAlbania .
## Country_RegionAlgeria .
## Country_RegionAndorra .
## Country_RegionAngola .
## Country_RegionAntigua and Barbuda .
## Country_RegionArgentina 2.387649e+04
## Country_RegionArmenia .
## Country_RegionAustralia .
## Country_RegionAustria -4.781777e+03
## Country_RegionAzerbaijan .
## Country_RegionBahamas .
## Country_RegionBahrain -3.541984e+02
## Country_RegionBangladesh .
## Country_RegionBarbados .
## Country_RegionBelarus -3.346529e+03
## Country_RegionBelgium -1.369889e+03
## Country_RegionBelize .
## Country_RegionBenin .
## Country_RegionBhutan .
## Country_RegionBolivia 6.760108e+03
## Country_RegionBosnia and Herzegovina 4.426781e+03
## Country_RegionBotswana .
## Country_RegionBrazil 8.165064e+03
## Country_RegionBrunei .
## Country_RegionBulgaria 1.376098e+04
## Country_RegionBurkina Faso .
## Country_RegionBurma 6.904217e+03
## Country_RegionBurundi .
## Country_RegionCabo Verde .
## Country_RegionCambodia .
## Country_RegionCameroon .
## Country_RegionCanada .
## Country_RegionCentral African Republic .
## Country_RegionChad .
## Country_RegionChile .
## Country_RegionChina .
## Country_RegionColombia 3.564949e+02
## Country_RegionComoros .
## Country_RegionCongo (Brazzaville) .
## Country_RegionCongo (Kinshasa) .
## Country_RegionCosta Rica .
## Country_RegionCote d'Ivoire .
## Country_RegionCroatia .
## Country_RegionCuba -5.255610e+03
## Country_RegionCyprus .
## Country_RegionCzechia -1.675455e+03
## Country_RegionDenmark -1.030694e+02
## Country_RegionDjibouti .
## Country_RegionDominica .
## Country_RegionDominican Republic -1.380268e+02
## Country_RegionEcuador 2.008584e+04
## Country_RegionEgypt 1.041450e+04
## Country_RegionEl Salvador .
## Country_RegionEquatorial Guinea .
## Country_RegionEritrea .
## Country_RegionEstonia .
## Country_RegionEswatini .
## Country_RegionEthiopia .
## Country_RegionFiji .
## Country_RegionFinland .
## Country_RegionFrance -4.269407e+02
## Country_RegionGabon .
## Country_RegionGambia .
## Country_RegionGeorgia .
## Country_RegionGermany .
## Country_RegionGhana .
## Country_RegionGreece .
## Country_RegionGrenada .
## Country_RegionGuatemala 2.617689e+03
## Country_RegionGuinea .
## Country_RegionGuinea-Bissau .
## Country_RegionGuyana .
## Country_RegionHaiti .
## Country_RegionHoly See .
## Country_RegionHonduras 1.049177e+03
## Country_RegionHungary 1.322760e+04
## Country_RegionIceland .
## Country_RegionIndia -2.637425e+03
## Country_RegionIndonesia 6.878844e+04
## Country_RegionIran 2.407968e+04
## Country_RegionIraq -8.743658e+03
## Country_RegionIreland -1.761529e+03
## Country_RegionIsrael -1.163671e+04
## Country_RegionItaly 1.331919e+03
## Country_RegionJamaica .
## Country_RegionJapan .
## Country_RegionJordan -2.291115e+03
## Country_RegionKazakhstan .
## Country_RegionKenya .
## Country_RegionKiribati .
## Country_RegionKorea, South -1.639960e+03
## Country_RegionKosovo .
## Country_RegionKuwait -1.701644e+03
## Country_RegionKyrgyzstan .
## Country_RegionLaos .
## Country_RegionLatvia .
## Country_RegionLebanon -1.722427e+02
## Country_RegionLesotho .
## Country_RegionLiberia .
## Country_RegionLibya .
## Country_RegionLiechtenstein .
## Country_RegionLithuania .
## Country_RegionLuxembourg .
## Country_RegionMadagascar .
## Country_RegionMalawi .
## Country_RegionMalaysia -1.313159e+02
## Country_RegionMaldives .
## Country_RegionMali .
## Country_RegionMalta .
## Country_RegionMarshall Islands .
## Country_RegionMauritania .
## Country_RegionMauritius .
## Country_RegionMexico 3.817885e+03
## Country_RegionMicronesia .
## Country_RegionMoldova 2.582534e+02
## Country_RegionMonaco .
## Country_RegionMongolia -1.591026e+03
## Country_RegionMontenegro .
## Country_RegionMorocco .
## Country_RegionMozambique .
## Country_RegionNamibia .
## Country_RegionNepal -2.305701e+01
## Country_RegionNetherlands -7.189464e+02
## Country_RegionNew Zealand .
## Country_RegionNicaragua .
## Country_RegionNiger .
## Country_RegionNigeria .
## Country_RegionNorth Macedonia 7.900907e+02
## Country_RegionNorway -1.031800e+03
## Country_RegionOman .
## Country_RegionPakistan .
## Country_RegionPalau .
## Country_RegionPanama .
## Country_RegionPapua New Guinea .
## Country_RegionParaguay 5.387540e+03
## Country_RegionPeru 3.259252e+03
## Country_RegionPhilippines .
## Country_RegionPoland 2.099439e+04
## Country_RegionPortugal .
## Country_RegionQatar -5.776525e+02
## Country_RegionRomania 2.399968e+04
## Country_RegionRussia 5.086168e+02
## Country_RegionRwanda .
## Country_RegionSaint Kitts and Nevis .
## Country_RegionSaint Lucia .
## Country_RegionSaint Vincent and the Grenadines .
## Country_RegionSamoa .
## Country_RegionSan Marino .
## Country_RegionSao Tome and Principe .
## Country_RegionSaudi Arabia .
## Country_RegionSenegal .
## Country_RegionSerbia -6.650095e+03
## Country_RegionSeychelles .
## Country_RegionSierra Leone .
## Country_RegionSingapore -9.242806e+02
## Country_RegionSlovakia -3.251202e+03
## Country_RegionSlovenia .
## Country_RegionSolomon Islands .
## Country_RegionSomalia .
## Country_RegionSouth Africa 3.548345e+04
## Country_RegionSouth Sudan .
## Country_RegionSpain .
## Country_RegionSri Lanka 1.990001e+03
## Country_RegionSudan .
## Country_RegionSuriname .
## Country_RegionSweden .
## Country_RegionSwitzerland -3.885847e+03
## Country_RegionSyria .
## Country_RegionTaiwan* .
## Country_RegionTajikistan .
## Country_RegionTanzania .
## Country_RegionThailand -1.269009e+04
## Country_RegionTimor-Leste .
## Country_RegionTogo .
## Country_RegionTonga .
## Country_RegionTrinidad and Tobago .
## Country_RegionTunisia 1.000826e+04
## Country_RegionTurkey -6.999108e+04
## Country_RegionUganda .
## Country_RegionUkraine 2.782663e+02
## Country_RegionUnited Arab Emirates -7.468770e+03
## Country_RegionUnited Kingdom -1.291750e+03
## Country_RegionUruguay .
## Country_RegionUS .
## Country_RegionUzbekistan .
## Country_RegionVanuatu -3.967662e+03
## Country_RegionVenezuela .
## Country_RegionVietnam 1.433926e+03
## Country_RegionWest Bank and Gaza -3.653453e+02
## Country_RegionYemen -3.340372e+03
## Country_RegionZambia .
## Country_RegionZimbabwe .
## Last_Update .
## Confirmed 1.687055e-02
## Incident_Rate -1.364757e-02
## Case_Fatality_Ratio 4.130920e+02
# library(knitr)
# kable(ag, caption= "AG")