library(stringr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(covid19.analytics)
## Warning: package 'covid19.analytics' was built under R version 4.0.5
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.0.5
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(leaps)
## Warning: package 'leaps' was built under R version 4.0.5
library(zoo)
## Warning: package 'zoo' was built under R version 4.0.5
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(glmnet)
## Warning: package 'glmnet' was built under R version 4.0.5
## Loading required package: Matrix
## Loaded glmnet 4.1-2
library(caTools)
## Warning: package 'caTools' was built under R version 4.0.5
library(olsrr)
## Warning: package 'olsrr' was built under R version 4.0.5
##
## Attaching package: 'olsrr'
## The following object is masked from 'package:datasets':
##
## rivers
options(max.print=999999)
dataset <- read.csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv", stringsAsFactors = T) %>%
select(-c(continent, location, weekly_icu_admissions, weekly_icu_admissions_per_million, weekly_hosp_admissions,
weekly_hosp_admissions_per_million, icu_patients, hosp_patients, hosp_patients_per_million, total_boosters,
total_boosters_per_hundred, continent, location, weekly_icu_admissions, weekly_icu_admissions_per_million,
weekly_hosp_admissions, weekly_hosp_admissions_per_million, icu_patients_per_million, handwashing_facilities,
total_deaths_per_million, new_deaths_per_million, new_tests_per_thousand, excess_mortality_cumulative,
total_vaccinations_per_hundred, people_fully_vaccinated_per_hundred, new_vaccinations_smoothed_per_million,
new_people_vaccinated_smoothed_per_hundred, excess_mortality_cumulative_absolute, total_cases_per_million,
excess_mortality_cumulative_per_million, excess_mortality_cumulative, excess_mortality, new_cases_per_million,
people_vaccinated_per_hundred, new_tests_smoothed_per_thousand, date, new_deaths_smoothed_per_million,
new_cases_smoothed_per_million, total_tests_per_thousand)) %>%
filter(iso_code %in% c("USA", "IND", "BRA", "RUS", "GBR", "FRA", "TUR", "ITA", "COL", "DEU"))
# ag <- covid19.data(case = 'aggregated')
#ag$LU_year <- substr(ag$Last_Update, 1, 4)
#ag$LU_year <- as.numeric(ag$LU_year)
#ag$LU_month <- substr(ag$Last_Update, 6, 7)
#ag$LU_month <- as.numeric(ag$LU_month)
#ag$LU_day <- substr(ag$Last_Update, 9, 10)
#ag$LU_day <- as.numeric(ag$LU_day)
#ag$Last_Update <- substr(ag$Last_Update, 1, 10)
#ag$Last_Update <- ymd(ag$Last_Update)
#dataset <- subset(dataset = -c(icu_patients, hosp_patients, hosp_patients_per_millions,
# total_boosters, total_boosters_per_hundred, continent, location,
# weekly_icu_admissions, weekly_icu_admissions_per_million,
# weekly_hosp_admissions, weekly_hosp_admissions_per_million))
dataset <- na.omit(dataset)
rownames(dataset) <- 1:nrow(dataset)
# write.csv(ag, "C:/Users/Sam/Documents/MATH_624/Module_14/ag.csv")
# ag <- read.csv("C:/Users/Sam/Documents/MATH_624/Module_14/ag.csv", header = TRUE, sep = ",")
# ag$Active <- as.character(ag$Active)
# ag$Recovered <- as.character(ag$Recovered)
# ag <- ag %>% replace_na(list(Recovered = 'None', Active = 'None'))
set.seed(1) # using a randomly generated seed helps to reproduce the results
train = sample(1835, 917) # Randomly sample 180 items from 260 items
train.dat = dataset[train,]
test.dat = dataset[-train, ]
model1 = glm(total_deaths ~ ., data=dataset)
results <- summary(model1)
pvals <- data.frame(results$coefficients)
pvals <- filter(pvals, pvals$Pr...t.. < 0.05)
print(rownames(pvals))
## [1] "(Intercept)" "iso_codeGBR"
## [3] "iso_codeIND" "iso_codeITA"
## [5] "iso_codeRUS" "iso_codeTUR"
## [7] "iso_codeUSA" "total_cases"
## [9] "new_cases_smoothed" "new_deaths_smoothed"
## [11] "reproduction_rate" "total_tests"
## [13] "new_tests_smoothed" "tests_per_case"
## [15] "total_vaccinations" "people_vaccinated"
## [17] "people_fully_vaccinated" "new_vaccinations_smoothed"
x = model.matrix(total_deaths ~ iso_code + total_cases + new_cases_smoothed + new_deaths_smoothed +
reproduction_rate + total_tests + new_tests_smoothed +
tests_per_case + total_vaccinations + people_vaccinated +
people_fully_vaccinated + new_vaccinations_smoothed, dataset)[,-1]
# model.matrix()[,-1] is for taking off the intercept
# x[1:3, ]
y = dataset$total_deaths # create the response vector
set.seed(1) # using a randomly generated seed helps to reproduce the results
train = sample(1835, 917) # Randomly sample 180 items from 260 items
test = (-train)
ridge.mod = glmnet(x[train,], y[train], alpha = 0) # fit ridge regression on training data
cv.out = cv.glmnet(x[train,], y[train], alpha = 0) # run cross validation to find
# the best lambda
plot(cv.out)

names(cv.out)
## [1] "lambda" "cvm" "cvsd" "cvup" "cvlo"
## [6] "nzero" "call" "name" "glmnet.fit" "lambda.min"
## [11] "lambda.1se" "index"
bestlam = cv.out$lambda.min
bestlam
## [1] 19830.74
ridge.mod = glmnet(x[train,], y[train], alpha = 0,lambda = bestlam)
ridge.pred = predict(ridge.mod, s = bestlam, newx = x[test,])
y.test = y[test] # response vector in the test data
mean((ridge.pred - y.test)^2)
## [1] 517031790
ridge.out = glmnet(x, y, alpha =0)
ridge_results <- predict(ridge.out, type = "coefficients", s = bestlam)#[1:20,]
ridge_results
## 248 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 8.819964e+04
## iso_codeAFG .
## iso_codeAGO .
## iso_codeAIA .
## iso_codeALB .
## iso_codeAND .
## iso_codeARE .
## iso_codeARG .
## iso_codeARM .
## iso_codeATG .
## iso_codeAUS .
## iso_codeAUT .
## iso_codeAZE .
## iso_codeBDI .
## iso_codeBEL .
## iso_codeBEN .
## iso_codeBES .
## iso_codeBFA .
## iso_codeBGD .
## iso_codeBGR .
## iso_codeBHR .
## iso_codeBHS .
## iso_codeBIH .
## iso_codeBLR .
## iso_codeBLZ .
## iso_codeBMU .
## iso_codeBOL .
## iso_codeBRA .
## iso_codeBRB .
## iso_codeBRN .
## iso_codeBTN .
## iso_codeBWA .
## iso_codeCAF .
## iso_codeCAN .
## iso_codeCHE .
## iso_codeCHL .
## iso_codeCHN .
## iso_codeCIV .
## iso_codeCMR .
## iso_codeCOD .
## iso_codeCOG .
## iso_codeCOK .
## iso_codeCOL -4.160092e+03
## iso_codeCOM .
## iso_codeCPV .
## iso_codeCRI .
## iso_codeCUB .
## iso_codeCUW .
## iso_codeCYM .
## iso_codeCYP .
## iso_codeCZE .
## iso_codeDEU .
## iso_codeDJI .
## iso_codeDMA .
## iso_codeDNK .
## iso_codeDOM .
## iso_codeDZA .
## iso_codeECU .
## iso_codeEGY .
## iso_codeERI .
## iso_codeESP .
## iso_codeEST .
## iso_codeETH .
## iso_codeFIN .
## iso_codeFJI .
## iso_codeFLK .
## iso_codeFRA .
## iso_codeFRO .
## iso_codeFSM .
## iso_codeGAB .
## iso_codeGBR -4.803875e+04
## iso_codeGEO .
## iso_codeGGY .
## iso_codeGHA .
## iso_codeGIB .
## iso_codeGIN .
## iso_codeGMB .
## iso_codeGNB .
## iso_codeGNQ .
## iso_codeGRC .
## iso_codeGRD .
## iso_codeGRL .
## iso_codeGTM .
## iso_codeGUY .
## iso_codeHKG .
## iso_codeHND .
## iso_codeHRV .
## iso_codeHTI .
## iso_codeHUN .
## iso_codeIDN .
## iso_codeIMN .
## iso_codeIND -3.724126e+04
## iso_codeIRL .
## iso_codeIRN .
## iso_codeIRQ .
## iso_codeISL .
## iso_codeISR .
## iso_codeITA -8.566365e+03
## iso_codeJAM .
## iso_codeJEY .
## iso_codeJOR .
## iso_codeJPN .
## iso_codeKAZ .
## iso_codeKEN .
## iso_codeKGZ .
## iso_codeKHM .
## iso_codeKIR .
## iso_codeKNA .
## iso_codeKOR .
## iso_codeKWT .
## iso_codeLAO .
## iso_codeLBN .
## iso_codeLBR .
## iso_codeLBY .
## iso_codeLCA .
## iso_codeLIE .
## iso_codeLKA .
## iso_codeLSO .
## iso_codeLTU .
## iso_codeLUX .
## iso_codeLVA .
## iso_codeMAC .
## iso_codeMAR .
## iso_codeMCO .
## iso_codeMDA .
## iso_codeMDG .
## iso_codeMDV .
## iso_codeMEX .
## iso_codeMHL .
## iso_codeMKD .
## iso_codeMLI .
## iso_codeMLT .
## iso_codeMMR .
## iso_codeMNE .
## iso_codeMNG .
## iso_codeMOZ .
## iso_codeMRT .
## iso_codeMSR .
## iso_codeMUS .
## iso_codeMWI .
## iso_codeMYS .
## iso_codeNAM .
## iso_codeNCL .
## iso_codeNER .
## iso_codeNGA .
## iso_codeNIC .
## iso_codeNIU .
## iso_codeNLD .
## iso_codeNOR .
## iso_codeNPL .
## iso_codeNRU .
## iso_codeNZL .
## iso_codeOMN .
## iso_codeOWID_AFR .
## iso_codeOWID_ASI .
## iso_codeOWID_CYN .
## iso_codeOWID_EUN .
## iso_codeOWID_EUR .
## iso_codeOWID_HIC .
## iso_codeOWID_INT .
## iso_codeOWID_KOS .
## iso_codeOWID_LIC .
## iso_codeOWID_LMC .
## iso_codeOWID_NAM .
## iso_codeOWID_OCE .
## iso_codeOWID_SAM .
## iso_codeOWID_UMC .
## iso_codeOWID_WRL .
## iso_codePAK .
## iso_codePAN .
## iso_codePCN .
## iso_codePER .
## iso_codePHL .
## iso_codePLW .
## iso_codePNG .
## iso_codePOL .
## iso_codePRT .
## iso_codePRY .
## iso_codePSE .
## iso_codePYF .
## iso_codeQAT .
## iso_codeROU .
## iso_codeRUS -2.444284e+04
## iso_codeRWA .
## iso_codeSAU .
## iso_codeSDN .
## iso_codeSEN .
## iso_codeSGP .
## iso_codeSHN .
## iso_codeSLB .
## iso_codeSLE .
## iso_codeSLV .
## iso_codeSMR .
## iso_codeSOM .
## iso_codeSRB .
## iso_codeSSD .
## iso_codeSTP .
## iso_codeSUR .
## iso_codeSVK .
## iso_codeSVN .
## iso_codeSWE .
## iso_codeSWZ .
## iso_codeSXM .
## iso_codeSYC .
## iso_codeSYR .
## iso_codeTCA .
## iso_codeTCD .
## iso_codeTGO .
## iso_codeTHA .
## iso_codeTJK .
## iso_codeTKL .
## iso_codeTKM .
## iso_codeTLS .
## iso_codeTON .
## iso_codeTTO .
## iso_codeTUN .
## iso_codeTUR -7.522161e+04
## iso_codeTUV .
## iso_codeTWN .
## iso_codeTZA .
## iso_codeUGA .
## iso_codeUKR .
## iso_codeURY .
## iso_codeUSA 1.582848e+05
## iso_codeUZB .
## iso_codeVAT .
## iso_codeVCT .
## iso_codeVEN .
## iso_codeVGB .
## iso_codeVNM .
## iso_codeVUT .
## iso_codeWLF .
## iso_codeWSM .
## iso_codeYEM .
## iso_codeZAF .
## iso_codeZMB .
## iso_codeZWE .
## total_cases 5.209306e-03
## new_cases_smoothed -1.917844e-01
## new_deaths_smoothed 4.674523e+00
## reproduction_rate -1.113954e+04
## total_tests 2.230228e-04
## new_tests_smoothed 2.767698e-02
## tests_per_case 3.030740e+01
## total_vaccinations 2.270409e-05
## people_vaccinated 1.124624e-06
## people_fully_vaccinated 2.227355e-04
## new_vaccinations_smoothed 7.686551e-04
set.seed(1)
lasso.mod = glmnet(x[train,], y[train], alpha = 1)
cv.out = cv.glmnet(x[train,], y[train], alpha = 1)
plot(cv.out)

bestlam = cv.out$lambda.min
bestlam
## [1] 268.3194
lasso.mod = glmnet(x[train,], y[train], alpha = 1,lambda = bestlam)
lasso.pred = predict(lasso.mod, s = bestlam, newx = x[test ,])
mean((lasso.pred - y.test)^2)
## [1] 120698795
out = glmnet(x, y, alpha = 1)
plot(out)

lasso.coef = predict(out, type ="coefficients", s = bestlam)#[1:20,]
lasso.coef
## 248 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 5.305678e+04
## iso_codeAFG .
## iso_codeAGO .
## iso_codeAIA .
## iso_codeALB .
## iso_codeAND .
## iso_codeARE .
## iso_codeARG .
## iso_codeARM .
## iso_codeATG .
## iso_codeAUS .
## iso_codeAUT .
## iso_codeAZE .
## iso_codeBDI .
## iso_codeBEL .
## iso_codeBEN .
## iso_codeBES .
## iso_codeBFA .
## iso_codeBGD .
## iso_codeBGR .
## iso_codeBHR .
## iso_codeBHS .
## iso_codeBIH .
## iso_codeBLR .
## iso_codeBLZ .
## iso_codeBMU .
## iso_codeBOL .
## iso_codeBRA .
## iso_codeBRB .
## iso_codeBRN .
## iso_codeBTN .
## iso_codeBWA .
## iso_codeCAF .
## iso_codeCAN .
## iso_codeCHE .
## iso_codeCHL .
## iso_codeCHN .
## iso_codeCIV .
## iso_codeCMR .
## iso_codeCOD .
## iso_codeCOG .
## iso_codeCOK .
## iso_codeCOL -1.535873e+03
## iso_codeCOM .
## iso_codeCPV .
## iso_codeCRI .
## iso_codeCUB .
## iso_codeCUW .
## iso_codeCYM .
## iso_codeCYP .
## iso_codeCZE .
## iso_codeDEU .
## iso_codeDJI .
## iso_codeDMA .
## iso_codeDNK .
## iso_codeDOM .
## iso_codeDZA .
## iso_codeECU .
## iso_codeEGY .
## iso_codeERI .
## iso_codeESP .
## iso_codeEST .
## iso_codeETH .
## iso_codeFIN .
## iso_codeFJI .
## iso_codeFLK .
## iso_codeFRA .
## iso_codeFRO .
## iso_codeFSM .
## iso_codeGAB .
## iso_codeGBR .
## iso_codeGEO .
## iso_codeGGY .
## iso_codeGHA .
## iso_codeGIB .
## iso_codeGIN .
## iso_codeGMB .
## iso_codeGNB .
## iso_codeGNQ .
## iso_codeGRC .
## iso_codeGRD .
## iso_codeGRL .
## iso_codeGTM .
## iso_codeGUY .
## iso_codeHKG .
## iso_codeHND .
## iso_codeHRV .
## iso_codeHTI .
## iso_codeHUN .
## iso_codeIDN .
## iso_codeIMN .
## iso_codeIND -3.495507e+04
## iso_codeIRL .
## iso_codeIRN .
## iso_codeIRQ .
## iso_codeISL .
## iso_codeISR .
## iso_codeITA 1.150132e+04
## iso_codeJAM .
## iso_codeJEY .
## iso_codeJOR .
## iso_codeJPN .
## iso_codeKAZ .
## iso_codeKEN .
## iso_codeKGZ .
## iso_codeKHM .
## iso_codeKIR .
## iso_codeKNA .
## iso_codeKOR .
## iso_codeKWT .
## iso_codeLAO .
## iso_codeLBN .
## iso_codeLBR .
## iso_codeLBY .
## iso_codeLCA .
## iso_codeLIE .
## iso_codeLKA .
## iso_codeLSO .
## iso_codeLTU .
## iso_codeLUX .
## iso_codeLVA .
## iso_codeMAC .
## iso_codeMAR .
## iso_codeMCO .
## iso_codeMDA .
## iso_codeMDG .
## iso_codeMDV .
## iso_codeMEX .
## iso_codeMHL .
## iso_codeMKD .
## iso_codeMLI .
## iso_codeMLT .
## iso_codeMMR .
## iso_codeMNE .
## iso_codeMNG .
## iso_codeMOZ .
## iso_codeMRT .
## iso_codeMSR .
## iso_codeMUS .
## iso_codeMWI .
## iso_codeMYS .
## iso_codeNAM .
## iso_codeNCL .
## iso_codeNER .
## iso_codeNGA .
## iso_codeNIC .
## iso_codeNIU .
## iso_codeNLD .
## iso_codeNOR .
## iso_codeNPL .
## iso_codeNRU .
## iso_codeNZL .
## iso_codeOMN .
## iso_codeOWID_AFR .
## iso_codeOWID_ASI .
## iso_codeOWID_CYN .
## iso_codeOWID_EUN .
## iso_codeOWID_EUR .
## iso_codeOWID_HIC .
## iso_codeOWID_INT .
## iso_codeOWID_KOS .
## iso_codeOWID_LIC .
## iso_codeOWID_LMC .
## iso_codeOWID_NAM .
## iso_codeOWID_OCE .
## iso_codeOWID_SAM .
## iso_codeOWID_UMC .
## iso_codeOWID_WRL .
## iso_codePAK .
## iso_codePAN .
## iso_codePCN .
## iso_codePER .
## iso_codePHL .
## iso_codePLW .
## iso_codePNG .
## iso_codePOL .
## iso_codePRT .
## iso_codePRY .
## iso_codePSE .
## iso_codePYF .
## iso_codeQAT .
## iso_codeROU .
## iso_codeRUS 3.213278e+03
## iso_codeRWA .
## iso_codeSAU .
## iso_codeSDN .
## iso_codeSEN .
## iso_codeSGP .
## iso_codeSHN .
## iso_codeSLB .
## iso_codeSLE .
## iso_codeSLV .
## iso_codeSMR .
## iso_codeSOM .
## iso_codeSRB .
## iso_codeSSD .
## iso_codeSTP .
## iso_codeSUR .
## iso_codeSVK .
## iso_codeSVN .
## iso_codeSWE .
## iso_codeSWZ .
## iso_codeSXM .
## iso_codeSYC .
## iso_codeSYR .
## iso_codeTCA .
## iso_codeTCD .
## iso_codeTGO .
## iso_codeTHA .
## iso_codeTJK .
## iso_codeTKL .
## iso_codeTKM .
## iso_codeTLS .
## iso_codeTON .
## iso_codeTTO .
## iso_codeTUN .
## iso_codeTUR -7.261097e+04
## iso_codeTUV .
## iso_codeTWN .
## iso_codeTZA .
## iso_codeUGA .
## iso_codeUKR .
## iso_codeURY .
## iso_codeUSA 1.057361e+05
## iso_codeUZB .
## iso_codeVAT .
## iso_codeVCT .
## iso_codeVEN .
## iso_codeVGB .
## iso_codeVNM .
## iso_codeVUT .
## iso_codeWLF .
## iso_codeWSM .
## iso_codeYEM .
## iso_codeZAF .
## iso_codeZMB .
## iso_codeZWE .
## total_cases 1.314559e-02
## new_cases_smoothed -1.523977e-01
## new_deaths_smoothed -4.939630e+00
## reproduction_rate 1.526322e+03
## total_tests -2.888769e-07
## new_tests_smoothed -4.662682e-03
## tests_per_case 5.017302e+01
## total_vaccinations .
## people_vaccinated .
## people_fully_vaccinated .
## new_vaccinations_smoothed .