Loading necessary packages

library(readr)
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readxl)
library(stringr)

Import csv file to RStudio

Israeli_Vaccine <- read_csv("Israeli_Vaccine.csv")
## New names:
## Rows: 6 Columns: 6
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): Age, Population %, ...3, Severe Cases, ...5 lgl (1): Efficacy vs. severe
## disease
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...3`
## • `` -> `...5`
Israeli_Vaccine
## # A tibble: 6 × 6
##   Age   `Population %` ...3      `Severe Cases` ...5      Efficacy\nvs. severe…¹
##   <chr> <chr>          <chr>     <chr>          <chr>     <lgl>                 
## 1 <NA>  Not Vax        Fully Vax Not Vax        Fully Vax NA                    
## 2 <NA>  %              %         per 100K       per 100K  NA                    
## 3 < 50  1,116,834      3,501,118 43             11        NA                    
## 4 <NA>  23.3%          73.0%     <NA>           <NA>      NA                    
## 5 > 50  186,078        2,133,516 171            290       NA                    
## 6 <NA>  7.9%           90.4%     <NA>           <NA>      NA                    
## # ℹ abbreviated name: ¹​`Efficacy\nvs. severe disease`

Fill ‘NA’ in Age column using the previous value

Israeli_Vaccine <- na_if(Israeli_Vaccine, NA)
Israeli_Vaccine <- fill(Israeli_Vaccine, Age)
Israeli_Vaccine
## # A tibble: 6 × 6
##   Age   `Population %` ...3      `Severe Cases` ...5      Efficacy\nvs. severe…¹
##   <chr> <chr>          <chr>     <chr>          <chr>     <lgl>                 
## 1 <NA>  Not Vax        Fully Vax Not Vax        Fully Vax NA                    
## 2 <NA>  %              %         per 100K       per 100K  NA                    
## 3 < 50  1,116,834      3,501,118 43             11        NA                    
## 4 < 50  23.3%          73.0%     <NA>           <NA>      NA                    
## 5 > 50  186,078        2,133,516 171            290       NA                    
## 6 > 50  7.9%           90.4%     <NA>           <NA>      NA                    
## # ℹ abbreviated name: ¹​`Efficacy\nvs. severe disease`

Separate and rename two different tables

tb1 <- slice(Israeli_Vaccine, 4, 6)
tb2 <- slice(Israeli_Vaccine, 3, 5)
tb1 <- rename(tb1, 'Not Vax %' = 'Population %', 'Fully Vax %' = '...3')
tb2 <- rename(tb2, 'Population Not Vax' = 'Population %', 'Population Fully Vax' = '...3', 'Severe Cases Not Vax' = 'Severe Cases', 'Severe Cases Fully Vax' = '...5' )
tb1
## # A tibble: 2 × 6
##   Age   `Not Vax %` `Fully Vax %` `Severe Cases` ...5  Efficacy\nvs. severe di…¹
##   <chr> <chr>       <chr>         <chr>          <chr> <lgl>                    
## 1 < 50  23.3%       73.0%         <NA>           <NA>  NA                       
## 2 > 50  7.9%        90.4%         <NA>           <NA>  NA                       
## # ℹ abbreviated name: ¹​`Efficacy\nvs. severe disease`
tb2
## # A tibble: 2 × 6
##   Age   `Population Not Vax` `Population Fully Vax` `Severe Cases Not Vax`
##   <chr> <chr>                <chr>                  <chr>                 
## 1 < 50  1,116,834            3,501,118              43                    
## 2 > 50  186,078              2,133,516              171                   
## # ℹ 2 more variables: `Severe Cases Fully Vax` <chr>,
## #   `Efficacy\nvs. severe disease` <lgl>

Combine & relocate columns from two separate tables

Vaccination_df <- bind_cols(tb2, select(tb1, c("Not Vax %", "Fully Vax %")))
Vaccination_df <- relocate(Vaccination_df, "Not Vax %", .after = "Population Not Vax")
Vaccination_df <- relocate(Vaccination_df, "Fully Vax %", .after = "Population Fully Vax")
Vaccination_df
## # A tibble: 2 × 8
##   Age   `Population Not Vax` `Not Vax %` `Population Fully Vax` `Fully Vax %`
##   <chr> <chr>                <chr>       <chr>                  <chr>        
## 1 < 50  1,116,834            23.3%       3,501,118              73.0%        
## 2 > 50  186,078              7.9%        2,133,516              90.4%        
## # ℹ 3 more variables: `Severe Cases Not Vax` <chr>,
## #   `Severe Cases Fully Vax` <chr>, `Efficacy\nvs. severe disease` <lgl>

(1) Do you have enough information to calculate the total population? What does this total population represent?

Answer: Yes. the information is only enough to calculate the total population of the vaccinated or unvaccinated in the age group (< 50 and > 50).

(2) Calculate the Efficacy vs. Disease; Explain your results

“Efficacy vs. Disease” is the effectiveness of a vaccine in preventing the occurrence of a particular disease among vaccinated individuals compared to unvaccinated individuals. This effectiveness is calculated as \[ \text { Vaccine Efficacy }=\left(1-\frac{\mathrm{ARV}}{\mathrm{ARU}}\right) \] \(\mathrm{ARV}\) is the disease attack rate in the vaccinated group and ARU is the disease attack rate in the controls

# Remove commas and percent symbols, format columns as numeric
Vaccination_df$'Population Not Vax' <- as.numeric(str_remove_all(Vaccination_df$'Population Not Vax', ","))
Vaccination_df$'Population Fully Vax' <- as.numeric(str_remove_all(Vaccination_df$'Population Fully Vax', ","))
Vaccination_df$'Not Vax %' <- as.numeric(str_remove_all(Vaccination_df$'Not Vax %', "%"))
Vaccination_df$'Fully Vax %' <- as.numeric(str_remove_all(Vaccination_df$'Fully Vax %', "%"))
Vaccination_df$'Severe Cases Not Vax' <- as.numeric(Vaccination_df$`Severe Cases Not Vax`)
Vaccination_df$'Severe Cases Fully Vax' <- as.numeric(Vaccination_df$`Severe Cases Fully Vax`)
Vaccination_df
## # A tibble: 2 × 8
##   Age   `Population Not Vax` `Not Vax %` `Population Fully Vax` `Fully Vax %`
##   <chr>                <dbl>       <dbl>                  <dbl>         <dbl>
## 1 < 50               1116834        23.3                3501118          73  
## 2 > 50                186078         7.9                2133516          90.4
## # ℹ 3 more variables: `Severe Cases Not Vax` <dbl>,
## #   `Severe Cases Fully Vax` <dbl>, `Efficacy\nvs. severe disease` <lgl>
Vaccination_df$`Efficacy
vs. severe disease` <- 1 - ((Vaccination_df$'Severe Cases Fully Vax' / Vaccination_df$`Population Fully Vax`) / (Vaccination_df$'Severe Cases Not Vax' / Vaccination_df$`Population Not Vax`))
Vaccination_df
## # A tibble: 2 × 8
##   Age   `Population Not Vax` `Not Vax %` `Population Fully Vax` `Fully Vax %`
##   <chr>                <dbl>       <dbl>                  <dbl>         <dbl>
## 1 < 50               1116834        23.3                3501118          73  
## 2 > 50                186078         7.9                2133516          90.4
## # ℹ 3 more variables: `Severe Cases Not Vax` <dbl>,
## #   `Severe Cases Fully Vax` <dbl>, `Efficacy\nvs. severe disease` <dbl>

Table shows that vaccine can protect the occurrence of a severe disease 91.8% of Israeli population for age group under 50 and 85.2 % to those who age group over 50.

(3) From your calculation of efficacy vs. disease, are you able to compare the rate of severe cases in unvaccinated individuals to that in vaccinated individuals?

# Calculate Rate of Severe Cases Not Vax
Vaccination_df <- Vaccination_df %>%
  mutate(Vaccination_df, 'Rate of Severe Cases Not Vax' = Vaccination_df$'Severe Cases Not Vax' / Vaccination_df$'Severe Cases Fully Vax')
Vaccination_df
## # A tibble: 2 × 9
##   Age   `Population Not Vax` `Not Vax %` `Population Fully Vax` `Fully Vax %`
##   <chr>                <dbl>       <dbl>                  <dbl>         <dbl>
## 1 < 50               1116834        23.3                3501118          73  
## 2 > 50                186078         7.9                2133516          90.4
## # ℹ 4 more variables: `Severe Cases Not Vax` <dbl>,
## #   `Severe Cases Fully Vax` <dbl>, `Efficacy\nvs. severe disease` <dbl>,
## #   `Rate of Severe Cases Not Vax` <dbl>
# Calculate Rate of Severe Cases Fully Vax
Vaccination_df <- Vaccination_df %>%
  mutate(Vaccination_df, 'Rate of Severe Cases Fully Vax' = Vaccination_df$'Severe Cases Fully Vax' / Vaccination_df$'Severe Cases Not Vax')
Vaccination_df
## # A tibble: 2 × 10
##   Age   `Population Not Vax` `Not Vax %` `Population Fully Vax` `Fully Vax %`
##   <chr>                <dbl>       <dbl>                  <dbl>         <dbl>
## 1 < 50               1116834        23.3                3501118          73  
## 2 > 50                186078         7.9                2133516          90.4
## # ℹ 5 more variables: `Severe Cases Not Vax` <dbl>,
## #   `Severe Cases Fully Vax` <dbl>, `Efficacy\nvs. severe disease` <dbl>,
## #   `Rate of Severe Cases Not Vax` <dbl>,
## #   `Rate of Severe Cases Fully Vax` <dbl>
Vaccination_df <- Vaccination_df %>%
  mutate(Vaccination_df, 'Ratio' = Vaccination_df$'Rate of Severe Cases Not Vax' / Vaccination_df$'Rate of Severe Cases Fully Vax')
Vaccination_df
## # A tibble: 2 × 11
##   Age   `Population Not Vax` `Not Vax %` `Population Fully Vax` `Fully Vax %`
##   <chr>                <dbl>       <dbl>                  <dbl>         <dbl>
## 1 < 50               1116834        23.3                3501118          73  
## 2 > 50                186078         7.9                2133516          90.4
## # ℹ 6 more variables: `Severe Cases Not Vax` <dbl>,
## #   `Severe Cases Fully Vax` <dbl>, `Efficacy\nvs. severe disease` <dbl>,
## #   `Rate of Severe Cases Not Vax` <dbl>,
## #   `Rate of Severe Cases Fully Vax` <dbl>, Ratio <dbl>

The ratio of the rate of severe cases in unvaccinated individuals to that in vaccinated individuals is 15.2809917 for population of age group under 50 and 0.3476932 for over age above 50.