library(dplyr)
library(downloader)
library(stringr)
library(htmlTable)
library(tidyverse)
library(flextable)
# Import the data from the csv file
vaccination_data <- read.csv("https://raw.githubusercontent.com/baruab/msdsrepo/main/DATA-607/israeli_vaccination_data.csv")
vaccination_data %>% regulartable() %>% autofit()
Age | Population.Not.Vax.. | Population.Fully.Vax.. | Severe.Cases.Not.Vax.per.100K | Severe.Cases.Fully.Vax.per.100K | Efficacy.vs.Severe.Disease |
<50 | 1,116,834 | 3,501,118 | 43 | 11 | |
| 23.3% | 73.0% | |||
>50 | 186,078 | 2,133,516 | 171 | 290 | |
7.9% | 90.4% |
# Extract Odd rows from dataframe
odd_rows <- seq_len(nrow(vaccination_data)) %% 2
# Drop even rows
data_odd_rows <- vaccination_data[odd_rows == 1, ]
# Now Drop odd rows, create a dataframe of even rows
data_even_rows <- vaccination_data[odd_rows == 0, ]
subset_even_rows <- subset(data_even_rows, select= c(2, 3))
# Rename even row column names
colnames(subset_even_rows) <- c("%_Not_Vax","%_Fully_Vax")
# Add numeric columns for the Vaccination percentage values
num_subset_even_rows <- subset_even_rows %>%
mutate(Percent_NotVax = str_extract_all(subset_even_rows$`%_Not_Vax`, "^[:digit:]+.[:digit:]+")) %>%
unnest() %>%
mutate(Percent_NotVax = as.numeric(Percent_NotVax)) %>%
mutate(Percent_FullyVax = str_extract_all(subset_even_rows$`%_Fully_Vax`, "^[:digit:]+.[:digit:]+")) %>%
unnest() %>%
mutate(Percent_FullyVax = as.numeric(Percent_FullyVax))
# Rename odd row column names
colnames(data_odd_rows) <- c("Age" ,"Population_Not_Vax","Population_Fully_Vax", "Severe_Not_Vax_Cases", "Severe_Fully_Vax_Cases", "Efficacy_vs_Severe_Disease")
# Add numeric columns for the Vaccination population values
num_data_odd_rows <- data_odd_rows %>%
mutate(Population_NotVax = as.numeric(gsub(",","", data_odd_rows$Population_Not_Vax ))) %>%
unnest() %>%
mutate(Population_FullyVax = as.numeric(gsub(",","", data_odd_rows$Population_Fully_Vax ))) %>%
unnest() %>%
# Compute Efficacy vs Severe Disease format(round(sum/count, 2), nsmall = 2)
mutate(Efficacy_vs_Severe_Disease = format(round(1 - ( data_odd_rows$Severe_Fully_Vax_Cases / data_odd_rows$Severe_Not_Vax_Cases),2), nsmall = 2 )) %>%
unnest()
num_data_odd_rows$Population_NotVax
## [1] 1116834 186078
num_data_odd_rows$Population_FullyVax
## [1] 3501118 2133516
# Combine the two data frames with the new numeric columns created
data_rows <- cbind(subset(num_data_odd_rows, select = c(1,4,5,6,7,8)), subset(num_subset_even_rows, select= c(3, 4)))
# Add new columns totalling the percentage and population by row
new_data_rows <- mutate(data_rows, total_percentage = data_rows$Percent_NotVax + data_rows$Percent_FullyVax ) %>%
mutate(data_rows, total_population = data_rows$Population_NotVax + data_rows$Population_FullyVax )
new_data_rows[nrow(new_data_rows)+1, ] <- c('Total', sum(new_data_rows$Severe_Not_Vax_Cases), sum(new_data_rows$Severe_Fully_Vax_Cases),NA, sum(new_data_rows$Population_NotVax), sum(new_data_rows$Population_FullyVax),NA, NA, NA, sum(new_data_rows$total_population) )
new_data_rows %>% regulartable() %>% autofit()
Age | Severe_Not_Vax_Cases | Severe_Fully_Vax_Cases | Efficacy_vs_Severe_Disease | Population_NotVax | Population_FullyVax | Percent_NotVax | Percent_FullyVax | total_percentage | total_population |
<50 | 43 | 11 | 0.74 | 1116834 | 3501118 | 23.3 | 73 | 96.3 | 4617952 |
>50 | 171 | 290 | -0.70 | 186078 | 2133516 | 7.9 | 90.4 | 98.3 | 2319594 |
Total | 214 | 301 | 1302912 | 5634634 | 6937546 |
# Short Column names
colnames(new_data_rows) <- c("Age" ,"NotVax_Cases","FullyVax_Cases", "Effi_Ratio", "NotVax_Popu", "FullyVax_Popu", "NotVax_Perc", "FullyVax_Perc", "Total_Perc", "Total_Popu")
col_order <- c("Age", "NotVax_Popu", "FullyVax_Popu", "Total_Popu", "NotVax_Perc", "FullyVax_Perc", "Total_Perc","NotVax_Cases","FullyVax_Cases", "Effi_Ratio")
final_data_rows <- new_data_rows[, col_order]
htmlTable(final_data_rows)
| Age | NotVax_Popu | FullyVax_Popu | Total_Popu | NotVax_Perc | FullyVax_Perc | Total_Perc | NotVax_Cases | FullyVax_Cases | Effi_Ratio | |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | <50 | 1116834 | 3501118 | 4617952 | 23.3 | 73 | 96.3 | 43 | 11 | 0.74 |
| 2 |
50 |
186078 | 2133516 | 2319594 | 7.9 | 90.4 | 98.3 | 171 | 290 | -0.70 |
| 3 | Total | 1302912 | 5634634 | 6937546 | 214 | 301 |
Ref: https://www.usnews.com/news/health-news/articles/2021-02-04/all-israelis-over-16-are-eligible-for-coronavirus-vaccine Age Structure Ref: https://www.statista.com/statistics/526596/age-structure-in-israel/
efficacy_data_rows <- subset(final_data_rows, select= c("Age","NotVax_Popu", "FullyVax_Popu", "Total_Popu",
"NotVax_Cases","FullyVax_Cases", "Effi_Ratio" ))
# Add Efficacy ratio values
more_efficacy_ratio <- efficacy_data_rows %>%
mutate(Severecase_rate_NotVax = format(round( as.numeric(efficacy_data_rows$NotVax_Cases) / as.numeric(efficacy_data_rows$NotVax_Popu),8), nsmall =8 )) %>%
unnest() %>%
mutate( Severecase_rate_FullVax = format(round( as.numeric(efficacy_data_rows$FullyVax_Cases) / as.numeric(efficacy_data_rows$FullyVax_Popu),8), nsmall = 8 )) %>%
unnest()
htmlTable(more_efficacy_ratio)
| Age | NotVax_Popu | FullyVax_Popu | Total_Popu | NotVax_Cases | FullyVax_Cases | Effi_Ratio | Severecase_rate_NotVax | Severecase_rate_FullVax | |
|---|---|---|---|---|---|---|---|---|---|
| 1 | <50 | 1116834 | 3501118 | 4617952 | 43 | 11 | 0.74 | 0.00003850 | 0.00000314 |
| 2 |
50 |
186078 | 2133516 | 2319594 | 171 | 290 | -0.70 | 0.00091897 | 0.00013593 |
| 3 | Total | 1302912 | 5634634 | 6937546 | 214 | 301 | 0.00016425 | 0.00005342 |