library(stringr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(covid19.analytics)
## Warning: package 'covid19.analytics' was built under R version 4.0.5
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.0.5
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(leaps)
## Warning: package 'leaps' was built under R version 4.0.5
library(zoo)
## Warning: package 'zoo' was built under R version 4.0.5
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(glmnet)
## Warning: package 'glmnet' was built under R version 4.0.5
## Loading required package: Matrix
## Loaded glmnet 4.1-2
library(caTools)
## Warning: package 'caTools' was built under R version 4.0.5
library(olsrr)
## Warning: package 'olsrr' was built under R version 4.0.5
## 
## Attaching package: 'olsrr'
## The following object is masked from 'package:datasets':
## 
##     rivers
options(max.print=999999)
dataset <- read.csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv", stringsAsFactors = T) %>%
  select(-c(continent, location, weekly_icu_admissions, weekly_icu_admissions_per_million, weekly_hosp_admissions,
            weekly_hosp_admissions_per_million, icu_patients, hosp_patients, hosp_patients_per_million, total_boosters,
            total_boosters_per_hundred, continent, location, weekly_icu_admissions, weekly_icu_admissions_per_million,
            weekly_hosp_admissions, weekly_hosp_admissions_per_million, icu_patients_per_million, handwashing_facilities,
            total_deaths_per_million, new_deaths_per_million, new_tests_per_thousand, excess_mortality_cumulative,
            total_vaccinations_per_hundred, people_fully_vaccinated_per_hundred, new_vaccinations_smoothed_per_million,
            new_people_vaccinated_smoothed_per_hundred, excess_mortality_cumulative_absolute, total_cases_per_million,
            excess_mortality_cumulative_per_million, excess_mortality_cumulative, excess_mortality, new_cases_per_million,
            people_vaccinated_per_hundred, new_tests_smoothed_per_thousand, date, new_deaths_smoothed_per_million,
            new_cases_smoothed_per_million, total_tests_per_thousand)) %>%
  filter(iso_code %in% c("USA", "IND", "BRA", "RUS", "GBR", "FRA", "TUR", "ITA", "COL", "DEU")) 
# ag <- covid19.data(case = 'aggregated')


#ag$LU_year <- substr(ag$Last_Update, 1, 4)
#ag$LU_year <- as.numeric(ag$LU_year)
#ag$LU_month <- substr(ag$Last_Update, 6, 7)
#ag$LU_month <- as.numeric(ag$LU_month)
#ag$LU_day <- substr(ag$Last_Update, 9, 10)
#ag$LU_day <- as.numeric(ag$LU_day)
#ag$Last_Update <- substr(ag$Last_Update, 1, 10)
#ag$Last_Update <- ymd(ag$Last_Update)

#dataset <- subset(dataset = -c(icu_patients, hosp_patients, hosp_patients_per_millions,
#                               total_boosters, total_boosters_per_hundred, continent, location,
#                               weekly_icu_admissions, weekly_icu_admissions_per_million,
#                               weekly_hosp_admissions, weekly_hosp_admissions_per_million))
dataset <- na.omit(dataset)
rownames(dataset) <- 1:nrow(dataset)
# write.csv(ag, "C:/Users/Sam/Documents/MATH_624/Module_14/ag.csv")
# ag <- read.csv("C:/Users/Sam/Documents/MATH_624/Module_14/ag.csv", header = TRUE, sep = ",")

# ag$Active <- as.character(ag$Active)
# ag$Recovered <- as.character(ag$Recovered)
# ag <- ag %>% replace_na(list(Recovered = 'None', Active = 'None'))
set.seed(1) # using a randomly generated seed helps to reproduce the results
train = sample(1835, 917) # Randomly sample 180 items from 260 items
train.dat = dataset[train,]
test.dat = dataset[-train, ]

model1 = glm(total_deaths ~ ., data=dataset)
results <- summary(model1)
pvals <- data.frame(results$coefficients)
pvals <- filter(pvals, pvals$Pr...t.. < 0.05)
print(rownames(pvals))
##  [1] "(Intercept)"               "iso_codeGBR"              
##  [3] "iso_codeIND"               "iso_codeITA"              
##  [5] "iso_codeRUS"               "iso_codeTUR"              
##  [7] "iso_codeUSA"               "total_cases"              
##  [9] "new_cases_smoothed"        "new_deaths_smoothed"      
## [11] "reproduction_rate"         "total_tests"              
## [13] "new_tests_smoothed"        "tests_per_case"           
## [15] "total_vaccinations"        "people_vaccinated"        
## [17] "people_fully_vaccinated"   "new_vaccinations_smoothed"
x = model.matrix(total_deaths ~ iso_code + total_cases + new_cases_smoothed + new_deaths_smoothed +
                   reproduction_rate + total_tests + new_tests_smoothed +
                   tests_per_case + total_vaccinations + people_vaccinated +
                   people_fully_vaccinated + new_vaccinations_smoothed, dataset)[,-1]
# model.matrix()[,-1] is for taking off the intercept
# x[1:3, ]
y = dataset$total_deaths # create the response vector
set.seed(1) # using a randomly generated seed helps to reproduce the results
train = sample(1835, 917) # Randomly sample 180 items from 260 items
test = (-train)
ridge.mod = glmnet(x[train,], y[train], alpha = 0) # fit ridge regression on training data
cv.out = cv.glmnet(x[train,], y[train], alpha = 0) # run cross validation to find
# the best lambda
plot(cv.out)

names(cv.out)
##  [1] "lambda"     "cvm"        "cvsd"       "cvup"       "cvlo"      
##  [6] "nzero"      "call"       "name"       "glmnet.fit" "lambda.min"
## [11] "lambda.1se" "index"
bestlam = cv.out$lambda.min
bestlam
## [1] 19830.74
ridge.mod = glmnet(x[train,], y[train], alpha = 0,lambda = bestlam)
ridge.pred = predict(ridge.mod, s = bestlam, newx = x[test,])
y.test = y[test] # response vector in the test data
mean((ridge.pred - y.test)^2)
## [1] 517031790
ridge.out = glmnet(x, y, alpha =0)
ridge_results <- predict(ridge.out, type = "coefficients", s = bestlam)#[1:20,]
ridge_results
## 248 x 1 sparse Matrix of class "dgCMatrix"
##                                      s1
## (Intercept)                8.819964e+04
## iso_codeAFG                .           
## iso_codeAGO                .           
## iso_codeAIA                .           
## iso_codeALB                .           
## iso_codeAND                .           
## iso_codeARE                .           
## iso_codeARG                .           
## iso_codeARM                .           
## iso_codeATG                .           
## iso_codeAUS                .           
## iso_codeAUT                .           
## iso_codeAZE                .           
## iso_codeBDI                .           
## iso_codeBEL                .           
## iso_codeBEN                .           
## iso_codeBES                .           
## iso_codeBFA                .           
## iso_codeBGD                .           
## iso_codeBGR                .           
## iso_codeBHR                .           
## iso_codeBHS                .           
## iso_codeBIH                .           
## iso_codeBLR                .           
## iso_codeBLZ                .           
## iso_codeBMU                .           
## iso_codeBOL                .           
## iso_codeBRA                .           
## iso_codeBRB                .           
## iso_codeBRN                .           
## iso_codeBTN                .           
## iso_codeBWA                .           
## iso_codeCAF                .           
## iso_codeCAN                .           
## iso_codeCHE                .           
## iso_codeCHL                .           
## iso_codeCHN                .           
## iso_codeCIV                .           
## iso_codeCMR                .           
## iso_codeCOD                .           
## iso_codeCOG                .           
## iso_codeCOK                .           
## iso_codeCOL               -4.160092e+03
## iso_codeCOM                .           
## iso_codeCPV                .           
## iso_codeCRI                .           
## iso_codeCUB                .           
## iso_codeCUW                .           
## iso_codeCYM                .           
## iso_codeCYP                .           
## iso_codeCZE                .           
## iso_codeDEU                .           
## iso_codeDJI                .           
## iso_codeDMA                .           
## iso_codeDNK                .           
## iso_codeDOM                .           
## iso_codeDZA                .           
## iso_codeECU                .           
## iso_codeEGY                .           
## iso_codeERI                .           
## iso_codeESP                .           
## iso_codeEST                .           
## iso_codeETH                .           
## iso_codeFIN                .           
## iso_codeFJI                .           
## iso_codeFLK                .           
## iso_codeFRA                .           
## iso_codeFRO                .           
## iso_codeFSM                .           
## iso_codeGAB                .           
## iso_codeGBR               -4.803875e+04
## iso_codeGEO                .           
## iso_codeGGY                .           
## iso_codeGHA                .           
## iso_codeGIB                .           
## iso_codeGIN                .           
## iso_codeGMB                .           
## iso_codeGNB                .           
## iso_codeGNQ                .           
## iso_codeGRC                .           
## iso_codeGRD                .           
## iso_codeGRL                .           
## iso_codeGTM                .           
## iso_codeGUY                .           
## iso_codeHKG                .           
## iso_codeHND                .           
## iso_codeHRV                .           
## iso_codeHTI                .           
## iso_codeHUN                .           
## iso_codeIDN                .           
## iso_codeIMN                .           
## iso_codeIND               -3.724126e+04
## iso_codeIRL                .           
## iso_codeIRN                .           
## iso_codeIRQ                .           
## iso_codeISL                .           
## iso_codeISR                .           
## iso_codeITA               -8.566365e+03
## iso_codeJAM                .           
## iso_codeJEY                .           
## iso_codeJOR                .           
## iso_codeJPN                .           
## iso_codeKAZ                .           
## iso_codeKEN                .           
## iso_codeKGZ                .           
## iso_codeKHM                .           
## iso_codeKIR                .           
## iso_codeKNA                .           
## iso_codeKOR                .           
## iso_codeKWT                .           
## iso_codeLAO                .           
## iso_codeLBN                .           
## iso_codeLBR                .           
## iso_codeLBY                .           
## iso_codeLCA                .           
## iso_codeLIE                .           
## iso_codeLKA                .           
## iso_codeLSO                .           
## iso_codeLTU                .           
## iso_codeLUX                .           
## iso_codeLVA                .           
## iso_codeMAC                .           
## iso_codeMAR                .           
## iso_codeMCO                .           
## iso_codeMDA                .           
## iso_codeMDG                .           
## iso_codeMDV                .           
## iso_codeMEX                .           
## iso_codeMHL                .           
## iso_codeMKD                .           
## iso_codeMLI                .           
## iso_codeMLT                .           
## iso_codeMMR                .           
## iso_codeMNE                .           
## iso_codeMNG                .           
## iso_codeMOZ                .           
## iso_codeMRT                .           
## iso_codeMSR                .           
## iso_codeMUS                .           
## iso_codeMWI                .           
## iso_codeMYS                .           
## iso_codeNAM                .           
## iso_codeNCL                .           
## iso_codeNER                .           
## iso_codeNGA                .           
## iso_codeNIC                .           
## iso_codeNIU                .           
## iso_codeNLD                .           
## iso_codeNOR                .           
## iso_codeNPL                .           
## iso_codeNRU                .           
## iso_codeNZL                .           
## iso_codeOMN                .           
## iso_codeOWID_AFR           .           
## iso_codeOWID_ASI           .           
## iso_codeOWID_CYN           .           
## iso_codeOWID_EUN           .           
## iso_codeOWID_EUR           .           
## iso_codeOWID_HIC           .           
## iso_codeOWID_INT           .           
## iso_codeOWID_KOS           .           
## iso_codeOWID_LIC           .           
## iso_codeOWID_LMC           .           
## iso_codeOWID_NAM           .           
## iso_codeOWID_OCE           .           
## iso_codeOWID_SAM           .           
## iso_codeOWID_UMC           .           
## iso_codeOWID_WRL           .           
## iso_codePAK                .           
## iso_codePAN                .           
## iso_codePCN                .           
## iso_codePER                .           
## iso_codePHL                .           
## iso_codePLW                .           
## iso_codePNG                .           
## iso_codePOL                .           
## iso_codePRT                .           
## iso_codePRY                .           
## iso_codePSE                .           
## iso_codePYF                .           
## iso_codeQAT                .           
## iso_codeROU                .           
## iso_codeRUS               -2.444284e+04
## iso_codeRWA                .           
## iso_codeSAU                .           
## iso_codeSDN                .           
## iso_codeSEN                .           
## iso_codeSGP                .           
## iso_codeSHN                .           
## iso_codeSLB                .           
## iso_codeSLE                .           
## iso_codeSLV                .           
## iso_codeSMR                .           
## iso_codeSOM                .           
## iso_codeSRB                .           
## iso_codeSSD                .           
## iso_codeSTP                .           
## iso_codeSUR                .           
## iso_codeSVK                .           
## iso_codeSVN                .           
## iso_codeSWE                .           
## iso_codeSWZ                .           
## iso_codeSXM                .           
## iso_codeSYC                .           
## iso_codeSYR                .           
## iso_codeTCA                .           
## iso_codeTCD                .           
## iso_codeTGO                .           
## iso_codeTHA                .           
## iso_codeTJK                .           
## iso_codeTKL                .           
## iso_codeTKM                .           
## iso_codeTLS                .           
## iso_codeTON                .           
## iso_codeTTO                .           
## iso_codeTUN                .           
## iso_codeTUR               -7.522161e+04
## iso_codeTUV                .           
## iso_codeTWN                .           
## iso_codeTZA                .           
## iso_codeUGA                .           
## iso_codeUKR                .           
## iso_codeURY                .           
## iso_codeUSA                1.582848e+05
## iso_codeUZB                .           
## iso_codeVAT                .           
## iso_codeVCT                .           
## iso_codeVEN                .           
## iso_codeVGB                .           
## iso_codeVNM                .           
## iso_codeVUT                .           
## iso_codeWLF                .           
## iso_codeWSM                .           
## iso_codeYEM                .           
## iso_codeZAF                .           
## iso_codeZMB                .           
## iso_codeZWE                .           
## total_cases                5.209306e-03
## new_cases_smoothed        -1.917844e-01
## new_deaths_smoothed        4.674523e+00
## reproduction_rate         -1.113954e+04
## total_tests                2.230228e-04
## new_tests_smoothed         2.767698e-02
## tests_per_case             3.030740e+01
## total_vaccinations         2.270409e-05
## people_vaccinated          1.124624e-06
## people_fully_vaccinated    2.227355e-04
## new_vaccinations_smoothed  7.686551e-04
set.seed(1)
lasso.mod = glmnet(x[train,], y[train], alpha = 1)
cv.out = cv.glmnet(x[train,], y[train], alpha = 1)
plot(cv.out)

bestlam = cv.out$lambda.min
bestlam
## [1] 268.3194
lasso.mod = glmnet(x[train,], y[train], alpha = 1,lambda = bestlam)
lasso.pred = predict(lasso.mod, s = bestlam, newx = x[test ,])
mean((lasso.pred - y.test)^2)
## [1] 120698795
out = glmnet(x, y, alpha = 1)
plot(out)

lasso.coef = predict(out, type ="coefficients", s = bestlam)#[1:20,]
lasso.coef
## 248 x 1 sparse Matrix of class "dgCMatrix"
##                                      s1
## (Intercept)                5.305678e+04
## iso_codeAFG                .           
## iso_codeAGO                .           
## iso_codeAIA                .           
## iso_codeALB                .           
## iso_codeAND                .           
## iso_codeARE                .           
## iso_codeARG                .           
## iso_codeARM                .           
## iso_codeATG                .           
## iso_codeAUS                .           
## iso_codeAUT                .           
## iso_codeAZE                .           
## iso_codeBDI                .           
## iso_codeBEL                .           
## iso_codeBEN                .           
## iso_codeBES                .           
## iso_codeBFA                .           
## iso_codeBGD                .           
## iso_codeBGR                .           
## iso_codeBHR                .           
## iso_codeBHS                .           
## iso_codeBIH                .           
## iso_codeBLR                .           
## iso_codeBLZ                .           
## iso_codeBMU                .           
## iso_codeBOL                .           
## iso_codeBRA                .           
## iso_codeBRB                .           
## iso_codeBRN                .           
## iso_codeBTN                .           
## iso_codeBWA                .           
## iso_codeCAF                .           
## iso_codeCAN                .           
## iso_codeCHE                .           
## iso_codeCHL                .           
## iso_codeCHN                .           
## iso_codeCIV                .           
## iso_codeCMR                .           
## iso_codeCOD                .           
## iso_codeCOG                .           
## iso_codeCOK                .           
## iso_codeCOL               -1.535873e+03
## iso_codeCOM                .           
## iso_codeCPV                .           
## iso_codeCRI                .           
## iso_codeCUB                .           
## iso_codeCUW                .           
## iso_codeCYM                .           
## iso_codeCYP                .           
## iso_codeCZE                .           
## iso_codeDEU                .           
## iso_codeDJI                .           
## iso_codeDMA                .           
## iso_codeDNK                .           
## iso_codeDOM                .           
## iso_codeDZA                .           
## iso_codeECU                .           
## iso_codeEGY                .           
## iso_codeERI                .           
## iso_codeESP                .           
## iso_codeEST                .           
## iso_codeETH                .           
## iso_codeFIN                .           
## iso_codeFJI                .           
## iso_codeFLK                .           
## iso_codeFRA                .           
## iso_codeFRO                .           
## iso_codeFSM                .           
## iso_codeGAB                .           
## iso_codeGBR                .           
## iso_codeGEO                .           
## iso_codeGGY                .           
## iso_codeGHA                .           
## iso_codeGIB                .           
## iso_codeGIN                .           
## iso_codeGMB                .           
## iso_codeGNB                .           
## iso_codeGNQ                .           
## iso_codeGRC                .           
## iso_codeGRD                .           
## iso_codeGRL                .           
## iso_codeGTM                .           
## iso_codeGUY                .           
## iso_codeHKG                .           
## iso_codeHND                .           
## iso_codeHRV                .           
## iso_codeHTI                .           
## iso_codeHUN                .           
## iso_codeIDN                .           
## iso_codeIMN                .           
## iso_codeIND               -3.495507e+04
## iso_codeIRL                .           
## iso_codeIRN                .           
## iso_codeIRQ                .           
## iso_codeISL                .           
## iso_codeISR                .           
## iso_codeITA                1.150132e+04
## iso_codeJAM                .           
## iso_codeJEY                .           
## iso_codeJOR                .           
## iso_codeJPN                .           
## iso_codeKAZ                .           
## iso_codeKEN                .           
## iso_codeKGZ                .           
## iso_codeKHM                .           
## iso_codeKIR                .           
## iso_codeKNA                .           
## iso_codeKOR                .           
## iso_codeKWT                .           
## iso_codeLAO                .           
## iso_codeLBN                .           
## iso_codeLBR                .           
## iso_codeLBY                .           
## iso_codeLCA                .           
## iso_codeLIE                .           
## iso_codeLKA                .           
## iso_codeLSO                .           
## iso_codeLTU                .           
## iso_codeLUX                .           
## iso_codeLVA                .           
## iso_codeMAC                .           
## iso_codeMAR                .           
## iso_codeMCO                .           
## iso_codeMDA                .           
## iso_codeMDG                .           
## iso_codeMDV                .           
## iso_codeMEX                .           
## iso_codeMHL                .           
## iso_codeMKD                .           
## iso_codeMLI                .           
## iso_codeMLT                .           
## iso_codeMMR                .           
## iso_codeMNE                .           
## iso_codeMNG                .           
## iso_codeMOZ                .           
## iso_codeMRT                .           
## iso_codeMSR                .           
## iso_codeMUS                .           
## iso_codeMWI                .           
## iso_codeMYS                .           
## iso_codeNAM                .           
## iso_codeNCL                .           
## iso_codeNER                .           
## iso_codeNGA                .           
## iso_codeNIC                .           
## iso_codeNIU                .           
## iso_codeNLD                .           
## iso_codeNOR                .           
## iso_codeNPL                .           
## iso_codeNRU                .           
## iso_codeNZL                .           
## iso_codeOMN                .           
## iso_codeOWID_AFR           .           
## iso_codeOWID_ASI           .           
## iso_codeOWID_CYN           .           
## iso_codeOWID_EUN           .           
## iso_codeOWID_EUR           .           
## iso_codeOWID_HIC           .           
## iso_codeOWID_INT           .           
## iso_codeOWID_KOS           .           
## iso_codeOWID_LIC           .           
## iso_codeOWID_LMC           .           
## iso_codeOWID_NAM           .           
## iso_codeOWID_OCE           .           
## iso_codeOWID_SAM           .           
## iso_codeOWID_UMC           .           
## iso_codeOWID_WRL           .           
## iso_codePAK                .           
## iso_codePAN                .           
## iso_codePCN                .           
## iso_codePER                .           
## iso_codePHL                .           
## iso_codePLW                .           
## iso_codePNG                .           
## iso_codePOL                .           
## iso_codePRT                .           
## iso_codePRY                .           
## iso_codePSE                .           
## iso_codePYF                .           
## iso_codeQAT                .           
## iso_codeROU                .           
## iso_codeRUS                3.213278e+03
## iso_codeRWA                .           
## iso_codeSAU                .           
## iso_codeSDN                .           
## iso_codeSEN                .           
## iso_codeSGP                .           
## iso_codeSHN                .           
## iso_codeSLB                .           
## iso_codeSLE                .           
## iso_codeSLV                .           
## iso_codeSMR                .           
## iso_codeSOM                .           
## iso_codeSRB                .           
## iso_codeSSD                .           
## iso_codeSTP                .           
## iso_codeSUR                .           
## iso_codeSVK                .           
## iso_codeSVN                .           
## iso_codeSWE                .           
## iso_codeSWZ                .           
## iso_codeSXM                .           
## iso_codeSYC                .           
## iso_codeSYR                .           
## iso_codeTCA                .           
## iso_codeTCD                .           
## iso_codeTGO                .           
## iso_codeTHA                .           
## iso_codeTJK                .           
## iso_codeTKL                .           
## iso_codeTKM                .           
## iso_codeTLS                .           
## iso_codeTON                .           
## iso_codeTTO                .           
## iso_codeTUN                .           
## iso_codeTUR               -7.261097e+04
## iso_codeTUV                .           
## iso_codeTWN                .           
## iso_codeTZA                .           
## iso_codeUGA                .           
## iso_codeUKR                .           
## iso_codeURY                .           
## iso_codeUSA                1.057361e+05
## iso_codeUZB                .           
## iso_codeVAT                .           
## iso_codeVCT                .           
## iso_codeVEN                .           
## iso_codeVGB                .           
## iso_codeVNM                .           
## iso_codeVUT                .           
## iso_codeWLF                .           
## iso_codeWSM                .           
## iso_codeYEM                .           
## iso_codeZAF                .           
## iso_codeZMB                .           
## iso_codeZWE                .           
## total_cases                1.314559e-02
## new_cases_smoothed        -1.523977e-01
## new_deaths_smoothed       -4.939630e+00
## reproduction_rate          1.526322e+03
## total_tests               -2.888769e-07
## new_tests_smoothed        -4.662682e-03
## tests_per_case             5.017302e+01
## total_vaccinations         .           
## people_vaccinated          .           
## people_fully_vaccinated    .           
## new_vaccinations_smoothed  .