library(readr)
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
vaccination_Data <- read_csv(file = "https://raw.githubusercontent.com/Eperez54/Dat-607/main/Week%205/israeli_vaccination_data_analysis_start.csv")
## New names:
## * `` -> ...3
## * `` -> ...5
## Rows: 19 Columns: 6
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (6): Age, Population %, ...3, Severe Cases, ...5, Efficacy
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
vaccination_Data
## # A tibble: 19 x 6
## Age `Population %` ...3 `Severe Cases` ...5 Efficacy
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 <NA> "Not Vax\n%" "Ful~ "Not Vax\nper~ "Ful~ vs. sev~
## 2 <50 "1,116,834" "3,5~ "43" "11" <NA>
## 3 <NA> "23.3%" "73.~ <NA> <NA> <NA>
## 4 >50 "186,078" "2,1~ "171" "290" <NA>
## 5 <NA> "7.9%" "90.~ <NA> <NA> <NA>
## 6 <NA> <NA> <NA> <NA> <NA> <NA>
## 7 <NA> <NA> <NA> <NA> <NA> <NA>
## 8 <NA> <NA> <NA> <NA> <NA> <NA>
## 9 Definitions <NA> <NA> <NA> <NA> <NA>
## 10 <NA> "Severe Cases~ <NA> <NA> <NA> <NA>
## 11 <NA> "Efficacy vs.~ <NA> <NA> <NA> <NA>
## 12 <NA> <NA> <NA> <NA> <NA> <NA>
## 13 (1) Do you have enough in~ <NA> <NA> <NA> <NA> <NA>
## 14 (2) Calculate the Efficac~ <NA> <NA> <NA> <NA> <NA>
## 15 (3) From your calculation~ <NA> <NA> <NA> <NA> <NA>
## 16 <NA> <NA> <NA> <NA> <NA> <NA>
## 17 <NA> <NA> <NA> <NA> <NA> <NA>
## 18 <NA> <NA> <NA> <NA> <NA> <NA>
## 19 <NA> <NA> <NA> <NA> <NA> <NA>
vaccination_Data <- slice(vaccination_Data,1:5) #use only the first five rows
head(vaccination_Data)
## # A tibble: 5 x 6
## Age `Population %` ...3 `Severe Cases` ...5 Efficacy
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 <NA> "Not Vax\n%" "Fully Vax\n%" "Not Vax\nper 100K\n\n\np" "Full~ vs. sev~
## 2 <50 "1,116,834" "3,501,118" "43" "11" <NA>
## 3 <NA> "23.3%" "73.0%" <NA> <NA> <NA>
## 4 >50 "186,078" "2,133,516" "171" "290" <NA>
## 5 <NA> "7.9%" "90.4%" <NA> <NA> <NA>
here I noticed that row 3 and 5 have similar data and row 2 and 4 are alike I decided to join them to combine them later. Once I joined them I renamed them to match the name on the original csv file.
Row1 <- slice(vaccination_Data, 3, 5)
Row2 <- slice(vaccination_Data, 2, 4)
Row1 <- rename(Row1, 'Not Vax %' = 'Population %', 'Fully Vax %' = '...3', 'Efficacy vs Severe Disease' = 'Efficacy')
Row2 <- rename(Row2, 'Population Not Vax' = 'Population %', 'Population Fully Vax' = '...3',
'Severe Cases Not Vax Per 100K' = 'Severe Cases', 'Severe Cases Fully Vax Per 100K' = '...5', 'Efficacy vs Severe Disease' = 'Efficacy' )
Here I arrange my data to look similar to the original file. I had use the relocate function from dplyr to move fully vax to go next to population fully vax and move the Not vax next to its rightful place.
vaxdf <- bind_cols(Row2, select(Row1, c("Not Vax %", "Fully Vax %")))
vaxdf <- relocate(vaxdf, "Fully Vax %", .after = "Population Fully Vax")
vaxdf <- relocate(vaxdf, "Not Vax %", .after = "Population Not Vax")
vaxdf
## # A tibble: 2 x 8
## Age `Population Not Vax` `Not Vax %` `Population Fully Vax` `Fully Vax %`
## <chr> <chr> <chr> <chr> <chr>
## 1 <50 1,116,834 23.3% 3,501,118 73.0%
## 2 >50 186,078 7.9% 2,133,516 90.4%
## # ... with 3 more variables: `Severe Cases Not Vax Per 100K` <chr>,
## # `Severe Cases Fully Vax Per 100K` <chr>, `Efficacy vs Severe Disease` <chr>
In the form we have the data we currently have enough information to answer this question. Assuming by guidelines from the CDC fully vaccinated must have both shots and in-turn those who do not have both shots are not considered vaccinated.
removing commas so that I can do arithmetic operations
vaxdf$'Population Not Vax' <- as.numeric(str_remove_all(vaxdf$'Population Not Vax', ","))
vaxdf$'Population Fully Vax' <- as.numeric(str_remove_all(vaxdf$'Population Fully Vax', ","))
vaxdf$'Not Vax %' <- as.numeric(str_remove_all(vaxdf$'Not Vax %', "%"))
vaxdf$'Fully Vax %' <- as.numeric(str_remove_all(vaxdf$'Fully Vax %', "%"))
vaxdf$'Severe Cases Not Vax Per 100K' <- as.numeric(vaxdf$`Severe Cases Not Vax Per 100K`)
vaxdf$'Severe Cases Fully Vax Per 100K' <- as.numeric(vaxdf$`Severe Cases Fully Vax Per 100K`)
here we calculate the total of the population
vaxdf <- mutate(vaxdf, 'Total Population' = vaxdf$'Population Not Vax' + vaxdf$'Population Fully Vax', .after = 'Age')
summarise(vaxdf, 'Sum Total Population' = sum(vaxdf$'Total Population'))
## # A tibble: 1 x 1
## `Sum Total Population`
## <dbl>
## 1 6937546
population = sum(vaxdf$'Total Population')
print(paste0("Total Population is ", population))
## [1] "Total Population is 6937546"
vaxdf <- mutate(vaxdf, 'Efficacy vs Severe Disease' = 1 - ((vaxdf$'Severe Cases Fully Vax Per 100K' / vaxdf$`Population Fully Vax`) / (vaxdf$'Severe Cases Fully Vax Per 100K' / vaxdf$`Population Not Vax`)))
vaxdf
## # A tibble: 2 x 9
## Age `Total Population` `Population Not Vax` `Not Vax %` `Population Fully ~`
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 <50 4617952 1116834 23.3 3501118
## 2 >50 2319594 186078 7.9 2133516
## # ... with 4 more variables: `Fully Vax %` <dbl>,
## # `Severe Cases Not Vax Per 100K` <dbl>,
## # `Severe Cases Fully Vax Per 100K` <dbl>, `Efficacy vs Severe Disease` <dbl>
By calculating the Efficacy I noticed that it is high among the Age >50 Group compared to the over 50 group which is significantly lower.
The rate of severe cases can be calculated by dividing the efficacy by the disease rate, since the complement is also a factor.
vaxdf <- mutate(vaxdf, 'Rate of Severe Cases Not Vax' = vaxdf$`Severe Cases Not Vax Per 100K` / vaxdf$`Severe Cases Fully Vax Per 100K`)
vaxdf <- mutate(vaxdf, 'Rate of Severe Cases Fully Vax' = 1 - vaxdf$'Efficacy vs Severe Disease')
vaxdf
## # A tibble: 2 x 11
## Age `Total Population` `Population Not Vax` `Not Vax %` `Population Fully ~`
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 <50 4617952 1116834 23.3 3501118
## 2 >50 2319594 186078 7.9 2133516
## # ... with 6 more variables: `Fully Vax %` <dbl>,
## # `Severe Cases Not Vax Per 100K` <dbl>,
## # `Severe Cases Fully Vax Per 100K` <dbl>,
## # `Efficacy vs Severe Disease` <dbl>, `Rate of Severe Cases Not Vax` <dbl>,
## # `Rate of Severe Cases Fully Vax` <dbl>
vaxdf <- mutate(vaxdf, 'Ratio' = vaxdf$'Rate of Severe Cases Not Vax' / vaxdf$'Rate of Severe Cases Fully Vax')
vaxdf
## # A tibble: 2 x 12
## Age `Total Population` `Population Not Vax` `Not Vax %` `Population Fully ~`
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 <50 4617952 1116834 23.3 3501118
## 2 >50 2319594 186078 7.9 2133516
## # ... with 7 more variables: `Fully Vax %` <dbl>,
## # `Severe Cases Not Vax Per 100K` <dbl>,
## # `Severe Cases Fully Vax Per 100K` <dbl>,
## # `Efficacy vs Severe Disease` <dbl>, `Rate of Severe Cases Not Vax` <dbl>,
## # `Rate of Severe Cases Fully Vax` <dbl>, Ratio <dbl>
Individuals Age 50 and under who are unvaccinated have higher rate of severe cases than those individuals who are fully vaccinated. In unvaccinated individuals over 50, the rate of severe cases is higher than in fully vaccinated over 50 individuals.