# Read XLSX file from Github
github_link <- "https://github.com/candrewxs/Vaccination/blob/main/israeli_vaccination_data_analysis_start.xlsx?raw=true"
library(httr)
temp_file <- tempfile(fileext = ".xlsx")
req <- GET(github_link,
# authenticate using GITHUB_PATH
authenticate(Sys.getenv("GITHUB_PATH"), ""),
# write result to disk
write_disk(path = temp_file))
vac <- readxl::read_excel(temp_file, sheet = NULL, range = "A1:F6", col_names = T, trim_ws = TRUE)
## New names:
## * `` -> ...3
## * `` -> ...5
unlink(temp_file)
Look at the data
vac
## # A tibble: 5 x 6
## Age `Population %` ...3 `Severe Cases` ...5 Efficacy
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 <NA> "Not Vax\r\n%" "Fully Vax\r\n%" "Not Vax\r\np~ "Ful~ vs. sev~
## 2 <50 "1116834" "3501118" "43" "11" <NA>
## 3 <NA> "0.23300000000000001" "0.73" <NA> <NA> <NA>
## 4 >50 "186078" "2133516" "171" "290" <NA>
## 5 <NA> "7.9000000000000001E-2" "0.90400000000000003" <NA> <NA> <NA>
# Replace headers (column names) with more meaningful names
headers <- c("Age", "Not_Vacc", "Full_Vacc", "Sev_Not_Vacc_per100k", "Sev_Full_Vacc_per100k", "Efficacy vs. Severe" )
colnames(vac) <- headers
vac
## # A tibble: 5 x 6
## Age Not_Vacc Full_Vacc Sev_Not_Vacc_per~ Sev_Full_Vacc_p~ `Efficacy vs. S~
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 <NA> "Not Vax~ "Fully Va~ "Not Vax\r\nper ~ "Fully Vax\r\np~ vs. severe dise~
## 2 <50 "1116834" "3501118" "43" "11" <NA>
## 3 <NA> "0.23300~ "0.73" <NA> <NA> <NA>
## 4 >50 "186078" "2133516" "171" "290" <NA>
## 5 <NA> "7.90000~ "0.904000~ <NA> <NA> <NA>
Extract rows - removed rows 2 to 5
vac <- vac %>% slice(2:5)
vac
## # A tibble: 4 x 6
## Age Not_Vacc Full_Vacc Sev_Not_Vacc_pe~ Sev_Full_Vacc_p~ `Efficacy vs. S~
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 <50 1116834 3501118 43 11 <NA>
## 2 <NA> 0.2330000~ 0.73 <NA> <NA> <NA>
## 3 >50 186078 2133516 171 290 <NA>
## 4 <NA> 7.9000000~ 0.9040000~ <NA> <NA> <NA>
Subset the israel_vac_data dataset to obtain only the population vaccinated rows
pop_vac <- vac[c(1,3),]
pop_vac
## # A tibble: 2 x 6
## Age Not_Vacc Full_Vacc Sev_Not_Vacc_per~ Sev_Full_Vacc_pe~ `Efficacy vs. Se~
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 <50 1116834 3501118 43 11 <NA>
## 2 >50 186078 2133516 171 290 <NA>
Subset the israel_vac_data dataset to obtain only the percent vaccinated rows
pct_vac <- vac[c(2,4),]
pct_vac_headers <- c("Age", "Not_Vacc_pct", "Full_Vacc_pct", "Sev_Not_Vacc_per100k_pct", "Sev_Full_Vacc_per100k_pct", "Efficacy vs. Severe")
colnames(pct_vac) <- pct_vac_headers
pct_vacc <- pct_vac %>% select(Not_Vacc_pct:Sev_Full_Vacc_per100k_pct)
pct_vacc
## # A tibble: 2 x 4
## Not_Vacc_pct Full_Vacc_pct Sev_Not_Vacc_per1~ Sev_Full_Vacc_pe~
## <chr> <chr> <chr> <chr>
## 1 0.23300000000000001 0.73 <NA> <NA>
## 2 7.9000000000000001E-2 0.90400000000000003 <NA> <NA>
Combine the two dataframes (pop_vac and pct_vacc) to get a dataframe of the israel vaccination data
israel_vaccination_data <- cbind(pop_vac, pct_vacc)
israel_vaccination_data
## Age Not_Vacc Full_Vacc Sev_Not_Vacc_per100k Sev_Full_Vacc_per100k
## 1 <50 1116834 3501118 43 11
## 2 >50 186078 2133516 171 290
## Efficacy vs. Severe Not_Vacc_pct Full_Vacc_pct
## 1 <NA> 0.23300000000000001 0.73
## 2 <NA> 7.9000000000000001E-2 0.90400000000000003
## Sev_Not_Vacc_per100k_pct Sev_Full_Vacc_per100k_pct
## 1 <NA> <NA>
## 2 <NA> <NA>
Israel Vaccination rate
isr <- israel_vaccination_data %>% select(Age, Not_Vacc, Not_Vacc_pct, Full_Vacc, Full_Vacc_pct, Sev_Not_Vacc_per100k, Sev_Not_Vacc_per100k_pct, Sev_Full_Vacc_per100k, Sev_Full_Vacc_per100k_pct, `Efficacy vs. Severe`)
isr
## Age Not_Vacc Not_Vacc_pct Full_Vacc Full_Vacc_pct
## 1 <50 1116834 0.23300000000000001 3501118 0.73
## 2 >50 186078 7.9000000000000001E-2 2133516 0.90400000000000003
## Sev_Not_Vacc_per100k Sev_Not_Vacc_per100k_pct Sev_Full_Vacc_per100k
## 1 43 <NA> 11
## 2 171 <NA> 290
## Sev_Full_Vacc_per100k_pct Efficacy vs. Severe
## 1 <NA> <NA>
## 2 <NA> <NA>
Remove the non-numeric symbols
isr$Not_Vacc_pct <- isr$Not_Vacc_pct %>% str_remove_all(pattern = "%")
isr$Full_Vacc_pct <- isr$Full_Vacc_pct %>% str_remove_all(pattern = "%")
isr$Not_Vacc <- isr$Not_Vacc %>% str_remove_all(pattern = ",")
isr$Full_Vacc <- isr$Full_Vacc %>% str_remove_all(pattern = ",")
isr
## Age Not_Vacc Not_Vacc_pct Full_Vacc Full_Vacc_pct
## 1 <50 1116834 0.23300000000000001 3501118 0.73
## 2 >50 186078 7.9000000000000001E-2 2133516 0.90400000000000003
## Sev_Not_Vacc_per100k Sev_Not_Vacc_per100k_pct Sev_Full_Vacc_per100k
## 1 43 <NA> 11
## 2 171 <NA> 290
## Sev_Full_Vacc_per100k_pct Efficacy vs. Severe
## 1 <NA> <NA>
## 2 <NA> <NA>
Convert the columns to numeric
isr_age <- isr %>% select(Age)
dat <- isr %>% select(-Age)
dat_df <- unlist(sapply(dat, as.numeric)) #convert all the columns except the Age column to numeric
dat_daf <- as.data.frame(dat_df)
israel_Vax <- cbind(isr_age, dat_daf)
israel_Vax
## Age Not_Vacc Not_Vacc_pct Full_Vacc Full_Vacc_pct Sev_Not_Vacc_per100k
## 1 <50 1116834 0.233 3501118 0.730 43
## 2 >50 186078 0.079 2133516 0.904 171
## Sev_Not_Vacc_per100k_pct Sev_Full_Vacc_per100k Sev_Full_Vacc_per100k_pct
## 1 NA 11 NA
## 2 NA 290 NA
## Efficacy vs. Severe
## 1 NA
## 2 NA
Compute the Sev_Not_Vacc_per100k_pct and Sev_Full_Vacc_per100k_pct
israel_Vax <- israel_Vax %>% mutate(Sev_Not_Vacc_per100k_pct = round((Sev_Not_Vacc_per100k/Not_Vacc)*100000,1),
Sev_Full_Vacc_per100k_pct = round((Sev_Full_Vacc_per100k/Full_Vacc)*100000,1))
israel_Vax
## Age Not_Vacc Not_Vacc_pct Full_Vacc Full_Vacc_pct Sev_Not_Vacc_per100k
## 1 <50 1116834 0.233 3501118 0.730 43
## 2 >50 186078 0.079 2133516 0.904 171
## Sev_Not_Vacc_per100k_pct Sev_Full_Vacc_per100k Sev_Full_Vacc_per100k_pct
## 1 3.9 11 0.3
## 2 91.9 290 13.6
## Efficacy vs. Severe
## 1 NA
## 2 NA
Compute the Efficacy vs. Severe Efficacy vs. Severe = 1 - (Sev_Full_Vacc_per100k_pct/Sev_Not_Vacc_per100k_pct)
israel_Vaxx <- israel_Vax %>% mutate(`Efficacy vs. Severe` = round((1 - (Sev_Full_Vacc_per100k_pct/Sev_Not_Vacc_per100k_pct)),3)*100)
israel_Vaxx
## Age Not_Vacc Not_Vacc_pct Full_Vacc Full_Vacc_pct Sev_Not_Vacc_per100k
## 1 <50 1116834 0.233 3501118 0.730 43
## 2 >50 186078 0.079 2133516 0.904 171
## Sev_Not_Vacc_per100k_pct Sev_Full_Vacc_per100k Sev_Full_Vacc_per100k_pct
## 1 3.9 11 0.3
## 2 91.9 290 13.6
## Efficacy vs. Severe
## 1 92.3
## 2 85.2
Solution 1:
Compute population:
ques1 <- israel_Vaxx %>% select(Age, Not_Vacc, Full_Vacc)
ques1 <- ques1 %>% mutate(Population = Not_Vacc + Full_Vacc)
ques1_pop_pct <- israel_Vaxx %>% transmute(Pop_pct = Not_Vacc_pct + Full_Vacc_pct)
ques1 <- cbind(ques1, ques1_pop_pct)
#compute estimated population per age group
est_pop <- ques1 %>% transmute(Est_population = Population/(Pop_pct/100))
ques1 <- cbind(ques1, est_pop)
ques1
## Age Not_Vacc Full_Vacc Population Pop_pct Est_population
## 1 <50 1116834 3501118 4617952 0.963 479538110
## 2 >50 186078 2133516 2319594 0.983 235970905
Compute the total estimated population:
Est_total_population <- round(sum(ques1$Est_population), 0)
paste0("The estimated total population from the given data is ", Est_total_population)
## [1] "The estimated total population from the given data is 715509015"
Solution 2: From the values computed above:
israel_efficacy_severe <- israel_Vaxx %>% select(Age, Not_Vacc, Full_Vacc, `Efficacy vs. Severe`)
israel_efficacy_severe
## Age Not_Vacc Full_Vacc Efficacy vs. Severe
## 1 <50 1116834 3501118 92.3
## 2 >50 186078 2133516 85.2
This means that the Efficacy vs. Severe is higher for those below 50 (92.3%) compared to those above 50 (85.2%) which implies that the vaccine is more effective for those 50 and below.
Solution 3: Yes I am able to compare the rate of severe cases in unvaccinated individuals to vaccinated individuals. The Efficacy vs. Severe basically represents the percent reduction in severe infection in the vaccinated group relative to the unvaccinated. From the severe rates, we see that those who are unvaccinated are more likely to have severe cases (hospitalized) compared to those who are vaccinated for both age groups.
Source: