Assignment

# Read XLSX file from Github
github_link <- "https://github.com/candrewxs/Vaccination/blob/main/israeli_vaccination_data_analysis_start.xlsx?raw=true"
library(httr)
temp_file <- tempfile(fileext = ".xlsx")
req <- GET(github_link, 
           # authenticate using GITHUB_PATH
           authenticate(Sys.getenv("GITHUB_PATH"), ""),
           # write result to disk
           write_disk(path = temp_file))
vac <- readxl::read_excel(temp_file, sheet = NULL, range = "A1:F6", col_names = T, trim_ws = TRUE)
## New names:
## * `` -> ...3
## * `` -> ...5
unlink(temp_file)

Look at the data

vac
## # A tibble: 5 x 6
##   Age   `Population %`          ...3                  `Severe Cases` ...5  Efficacy
##   <chr> <chr>                   <chr>                 <chr>          <chr> <chr>   
## 1 <NA>  "Not Vax\r\n%"          "Fully Vax\r\n%"      "Not Vax\r\np~ "Ful~ vs. sev~
## 2 <50   "1116834"               "3501118"             "43"           "11"  <NA>    
## 3 <NA>  "0.23300000000000001"   "0.73"                 <NA>           <NA> <NA>    
## 4 >50   "186078"                "2133516"             "171"          "290" <NA>    
## 5 <NA>  "7.9000000000000001E-2" "0.90400000000000003"  <NA>           <NA> <NA>

Clean the data

# Replace headers (column names) with more meaningful names
headers <- c("Age", "Not_Vacc", "Full_Vacc", "Sev_Not_Vacc_per100k", "Sev_Full_Vacc_per100k", "Efficacy vs. Severe" )
colnames(vac) <- headers
vac
## # A tibble: 5 x 6
##   Age   Not_Vacc  Full_Vacc  Sev_Not_Vacc_per~ Sev_Full_Vacc_p~ `Efficacy vs. S~
##   <chr> <chr>     <chr>      <chr>             <chr>            <chr>           
## 1 <NA>  "Not Vax~ "Fully Va~ "Not Vax\r\nper ~ "Fully Vax\r\np~ vs. severe dise~
## 2 <50   "1116834" "3501118"  "43"              "11"             <NA>            
## 3 <NA>  "0.23300~ "0.73"      <NA>              <NA>            <NA>            
## 4 >50   "186078"  "2133516"  "171"             "290"            <NA>            
## 5 <NA>  "7.90000~ "0.904000~  <NA>              <NA>            <NA>

Extract rows - removed rows 2 to 5

vac <- vac %>% slice(2:5)
vac
## # A tibble: 4 x 6
##   Age   Not_Vacc   Full_Vacc  Sev_Not_Vacc_pe~ Sev_Full_Vacc_p~ `Efficacy vs. S~
##   <chr> <chr>      <chr>      <chr>            <chr>            <chr>           
## 1 <50   1116834    3501118    43               11               <NA>            
## 2 <NA>  0.2330000~ 0.73       <NA>             <NA>             <NA>            
## 3 >50   186078     2133516    171              290              <NA>            
## 4 <NA>  7.9000000~ 0.9040000~ <NA>             <NA>             <NA>

Subset the israel_vac_data dataset to obtain only the population vaccinated rows

pop_vac <- vac[c(1,3),]
pop_vac
## # A tibble: 2 x 6
##   Age   Not_Vacc Full_Vacc Sev_Not_Vacc_per~ Sev_Full_Vacc_pe~ `Efficacy vs. Se~
##   <chr> <chr>    <chr>     <chr>             <chr>             <chr>            
## 1 <50   1116834  3501118   43                11                <NA>             
## 2 >50   186078   2133516   171               290               <NA>

Subset the israel_vac_data dataset to obtain only the percent vaccinated rows

pct_vac <- vac[c(2,4),]
pct_vac_headers <- c("Age", "Not_Vacc_pct", "Full_Vacc_pct", "Sev_Not_Vacc_per100k_pct", "Sev_Full_Vacc_per100k_pct", "Efficacy vs. Severe")
colnames(pct_vac) <- pct_vac_headers
pct_vacc <- pct_vac %>% select(Not_Vacc_pct:Sev_Full_Vacc_per100k_pct)
pct_vacc
## # A tibble: 2 x 4
##   Not_Vacc_pct          Full_Vacc_pct       Sev_Not_Vacc_per1~ Sev_Full_Vacc_pe~
##   <chr>                 <chr>               <chr>              <chr>            
## 1 0.23300000000000001   0.73                <NA>               <NA>             
## 2 7.9000000000000001E-2 0.90400000000000003 <NA>               <NA>

Combine the two dataframes (pop_vac and pct_vacc) to get a dataframe of the israel vaccination data

israel_vaccination_data <- cbind(pop_vac, pct_vacc)
israel_vaccination_data
##   Age Not_Vacc Full_Vacc Sev_Not_Vacc_per100k Sev_Full_Vacc_per100k
## 1 <50  1116834   3501118                   43                    11
## 2 >50   186078   2133516                  171                   290
##   Efficacy vs. Severe          Not_Vacc_pct       Full_Vacc_pct
## 1                <NA>   0.23300000000000001                0.73
## 2                <NA> 7.9000000000000001E-2 0.90400000000000003
##   Sev_Not_Vacc_per100k_pct Sev_Full_Vacc_per100k_pct
## 1                     <NA>                      <NA>
## 2                     <NA>                      <NA>

Israel Vaccination rate

isr <- israel_vaccination_data %>% select(Age, Not_Vacc, Not_Vacc_pct, Full_Vacc, Full_Vacc_pct, Sev_Not_Vacc_per100k, Sev_Not_Vacc_per100k_pct, Sev_Full_Vacc_per100k, Sev_Full_Vacc_per100k_pct, `Efficacy vs. Severe`)
isr
##   Age Not_Vacc          Not_Vacc_pct Full_Vacc       Full_Vacc_pct
## 1 <50  1116834   0.23300000000000001   3501118                0.73
## 2 >50   186078 7.9000000000000001E-2   2133516 0.90400000000000003
##   Sev_Not_Vacc_per100k Sev_Not_Vacc_per100k_pct Sev_Full_Vacc_per100k
## 1                   43                     <NA>                    11
## 2                  171                     <NA>                   290
##   Sev_Full_Vacc_per100k_pct Efficacy vs. Severe
## 1                      <NA>                <NA>
## 2                      <NA>                <NA>

Remove the non-numeric symbols

isr$Not_Vacc_pct <- isr$Not_Vacc_pct %>% str_remove_all(pattern = "%")
isr$Full_Vacc_pct <- isr$Full_Vacc_pct %>% str_remove_all(pattern = "%")
isr$Not_Vacc <- isr$Not_Vacc %>% str_remove_all(pattern = ",")
isr$Full_Vacc <- isr$Full_Vacc %>% str_remove_all(pattern = ",")
isr
##   Age Not_Vacc          Not_Vacc_pct Full_Vacc       Full_Vacc_pct
## 1 <50  1116834   0.23300000000000001   3501118                0.73
## 2 >50   186078 7.9000000000000001E-2   2133516 0.90400000000000003
##   Sev_Not_Vacc_per100k Sev_Not_Vacc_per100k_pct Sev_Full_Vacc_per100k
## 1                   43                     <NA>                    11
## 2                  171                     <NA>                   290
##   Sev_Full_Vacc_per100k_pct Efficacy vs. Severe
## 1                      <NA>                <NA>
## 2                      <NA>                <NA>

Convert the columns to numeric

isr_age <- isr %>% select(Age)
dat <- isr %>% select(-Age)
dat_df <- unlist(sapply(dat, as.numeric)) #convert all the columns except the Age column to numeric
dat_daf <- as.data.frame(dat_df)
israel_Vax <- cbind(isr_age, dat_daf)
israel_Vax
##   Age Not_Vacc Not_Vacc_pct Full_Vacc Full_Vacc_pct Sev_Not_Vacc_per100k
## 1 <50  1116834        0.233   3501118         0.730                   43
## 2 >50   186078        0.079   2133516         0.904                  171
##   Sev_Not_Vacc_per100k_pct Sev_Full_Vacc_per100k Sev_Full_Vacc_per100k_pct
## 1                       NA                    11                        NA
## 2                       NA                   290                        NA
##   Efficacy vs. Severe
## 1                  NA
## 2                  NA

Compute the Sev_Not_Vacc_per100k_pct and Sev_Full_Vacc_per100k_pct

israel_Vax <- israel_Vax %>% mutate(Sev_Not_Vacc_per100k_pct = round((Sev_Not_Vacc_per100k/Not_Vacc)*100000,1), 
                             Sev_Full_Vacc_per100k_pct = round((Sev_Full_Vacc_per100k/Full_Vacc)*100000,1))
israel_Vax
##   Age Not_Vacc Not_Vacc_pct Full_Vacc Full_Vacc_pct Sev_Not_Vacc_per100k
## 1 <50  1116834        0.233   3501118         0.730                   43
## 2 >50   186078        0.079   2133516         0.904                  171
##   Sev_Not_Vacc_per100k_pct Sev_Full_Vacc_per100k Sev_Full_Vacc_per100k_pct
## 1                      3.9                    11                       0.3
## 2                     91.9                   290                      13.6
##   Efficacy vs. Severe
## 1                  NA
## 2                  NA

Compute the Efficacy vs. Severe Efficacy vs. Severe = 1 - (Sev_Full_Vacc_per100k_pct/Sev_Not_Vacc_per100k_pct)

israel_Vaxx <- israel_Vax %>% mutate(`Efficacy vs. Severe` = round((1 - (Sev_Full_Vacc_per100k_pct/Sev_Not_Vacc_per100k_pct)),3)*100)

israel_Vaxx
##   Age Not_Vacc Not_Vacc_pct Full_Vacc Full_Vacc_pct Sev_Not_Vacc_per100k
## 1 <50  1116834        0.233   3501118         0.730                   43
## 2 >50   186078        0.079   2133516         0.904                  171
##   Sev_Not_Vacc_per100k_pct Sev_Full_Vacc_per100k Sev_Full_Vacc_per100k_pct
## 1                      3.9                    11                       0.3
## 2                     91.9                   290                      13.6
##   Efficacy vs. Severe
## 1                92.3
## 2                85.2

Question 1: Do you have enough information to calculate the total population? What does this total population represent?

Solution 1:

Compute population:

ques1 <- israel_Vaxx %>% select(Age, Not_Vacc, Full_Vacc)
ques1 <- ques1 %>% mutate(Population = Not_Vacc + Full_Vacc)
ques1_pop_pct <- israel_Vaxx %>% transmute(Pop_pct = Not_Vacc_pct + Full_Vacc_pct)
ques1 <- cbind(ques1, ques1_pop_pct)
#compute estimated population per age group
est_pop <- ques1 %>% transmute(Est_population = Population/(Pop_pct/100))
ques1 <- cbind(ques1, est_pop)
ques1
##   Age Not_Vacc Full_Vacc Population Pop_pct Est_population
## 1 <50  1116834   3501118    4617952   0.963      479538110
## 2 >50   186078   2133516    2319594   0.983      235970905

Compute the total estimated population:

Est_total_population <- round(sum(ques1$Est_population), 0)
paste0("The estimated total population from the given data is ", Est_total_population)
## [1] "The estimated total population from the given data is 715509015"

Question 2: Calculate the Efficacy vs. Disease; Explain your results:

Solution 2: From the values computed above:

israel_efficacy_severe <- israel_Vaxx %>% select(Age, Not_Vacc, Full_Vacc, `Efficacy vs. Severe`)
israel_efficacy_severe
##   Age Not_Vacc Full_Vacc Efficacy vs. Severe
## 1 <50  1116834   3501118                92.3
## 2 >50   186078   2133516                85.2

This means that the Efficacy vs. Severe is higher for those below 50 (92.3%) compared to those above 50 (85.2%) which implies that the vaccine is more effective for those 50 and below.

Question 3: From your calculation of efficacy vs. disease, are you able to compare the rate of severe cases in unvaccinated to vaccinated individuals?

Solution 3: Yes I am able to compare the rate of severe cases in unvaccinated individuals to vaccinated individuals. The Efficacy vs. Severe basically represents the percent reduction in severe infection in the vaccinated group relative to the unvaccinated. From the severe rates, we see that those who are unvaccinated are more likely to have severe cases (hospitalized) compared to those who are vaccinated for both age groups.

Source:

GitHub

RPubs