R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("tidyverse")
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.2     v stringr 1.4.0
## v tidyr   1.1.3     v forcats 0.5.1
## v readr   1.4.0
## Warning: package 'ggplot2' was built under R version 4.1.2
## Warning: package 'stringr' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Creating a data frame:

# create the empty data frame
vaccination_df <- data.frame( age = character(),
                  unvaccinated_pop = double(),
                  unvaccinated_pop_per_100K = double(),
                  vaccinated_pop = double(),
                  vaccinated_pop_per_100K = double(),
                  unvaccinated_severe_cases = double(),
                  vaccinated_severe_cases = double()
)

vaccination_df
## [1] age                       unvaccinated_pop         
## [3] unvaccinated_pop_per_100K vaccinated_pop           
## [5] vaccinated_pop_per_100K   unvaccinated_severe_cases
## [7] vaccinated_severe_cases  
## <0 rows> (or 0-length row.names)

Adding values into the dataframe

vaccination_df <- vaccination_df %>% 
add_row(
  age = "under 50",
  unvaccinated_pop = 1116834,
  unvaccinated_pop_per_100K = 0.233,
  vaccinated_pop = 3501118,
  vaccinated_pop_per_100K = 0.730,
  unvaccinated_severe_cases = 43,
  vaccinated_severe_cases = 11
) %>% 
  add_row(
    age = "over 50",
    unvaccinated_pop = 186078,
    unvaccinated_pop_per_100K = 0.079,
    vaccinated_pop = 2133516,
    vaccinated_pop_per_100K = 0.904,
    unvaccinated_severe_cases = 171,
    vaccinated_severe_cases = 290
  ) %>% 
  add_row(
    age = "total",
    unvaccinated_pop = 1302912,
    unvaccinated_pop_per_100K = 18.2,
    vaccinated_pop = 5634634,
    vaccinated_pop_per_100K = 78.7,
    unvaccinated_severe_cases = 214,
    vaccinated_severe_cases = 301
    )

vaccination_df
##        age unvaccinated_pop unvaccinated_pop_per_100K vaccinated_pop
## 1 under 50          1116834                     0.233        3501118
## 2  over 50           186078                     0.079        2133516
## 3    total          1302912                    18.200        5634634
##   vaccinated_pop_per_100K unvaccinated_severe_cases vaccinated_severe_cases
## 1                   0.730                        43                      11
## 2                   0.904                       171                     290
## 3                  78.700                       214                     301
vaccination_df 
##        age unvaccinated_pop unvaccinated_pop_per_100K vaccinated_pop
## 1 under 50          1116834                     0.233        3501118
## 2  over 50           186078                     0.079        2133516
## 3    total          1302912                    18.200        5634634
##   vaccinated_pop_per_100K unvaccinated_severe_cases vaccinated_severe_cases
## 1                   0.730                        43                      11
## 2                   0.904                       171                     290
## 3                  78.700                       214                     301

Creating a CSV file

write.csv(vaccination_df, "C:/Users/Uzma/CUNY-SPS-Assignments/data_607/week5/vaccination_data_analysis.csv", row.names=FALSE)

Reading from a CSV file

vaccine_df <- read_csv( file = "C:/Users/Uzma/CUNY-SPS-Assignments/data_607/week5/vaccination_data_analysis.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   age = col_character(),
##   unvaccinated_pop = col_double(),
##   unvaccinated_pop_per_100K = col_double(),
##   vaccinated_pop = col_double(),
##   vaccinated_pop_per_100K = col_double(),
##   unvaccinated_severe_cases = col_double(),
##   vaccinated_severe_cases = col_double()
## )
glimpse(vaccine_df)
## Rows: 3
## Columns: 7
## $ age                       <chr> "under 50", "over 50", "total"
## $ unvaccinated_pop          <dbl> 1116834, 186078, 1302912
## $ unvaccinated_pop_per_100K <dbl> 0.233, 0.079, 18.200
## $ vaccinated_pop            <dbl> 3501118, 2133516, 5634634
## $ vaccinated_pop_per_100K   <dbl> 0.730, 0.904, 78.700
## $ unvaccinated_severe_cases <dbl> 43, 171, 214
## $ vaccinated_severe_cases   <dbl> 11, 290, 301
vaccine_df
## # A tibble: 3 x 7
##   age     unvaccinated_pop unvaccinated_pop_p~ vaccinated_pop vaccinated_pop_pe~
##   <chr>              <dbl>               <dbl>          <dbl>              <dbl>
## 1 under ~          1116834               0.233        3501118              0.73 
## 2 over 50           186078               0.079        2133516              0.904
## 3 total            1302912              18.2          5634634             78.7  
## # ... with 2 more variables: unvaccinated_severe_cases <dbl>,
## #   vaccinated_severe_cases <dbl>
# afficacy vs. severe disease = 1 - (% fully vaxed severe cases per 100K / % not vaxed severe cases per 100K)

# (1) Do you have enough information to calculate the total population?  What does this total population represent?                     
# (2) Calculate the Efficacy vs. Disease; Explain your results.                     
# (3) From your calculation of efficacy vs. disease, are you able to compare the rate of severe cases in unvaccinated 
# individuals to that in vaccinated individuals?                        
                        
# Population    7,155,090   Population includes Israelis age 12 and up; also includes partially vaxed individuals               
                        
#   <50: 3.9 / 0.3  13.0    { How should these numbers be intepreted?           
#   >50: 91.9 / 13.6    6.8             

Calculating the total population

total_population <- select(vaccine_df, "age", "unvaccinated_pop", "vaccinated_pop") 

total_population
## # A tibble: 3 x 3
##   age      unvaccinated_pop vaccinated_pop
##   <chr>               <dbl>          <dbl>
## 1 under 50          1116834        3501118
## 2 over 50            186078        2133516
## 3 total             1302912        5634634
# (1) Do you have enough information to calculate the total population?  What does this total population represent? 

# yes, the total population represents all vaccinated and unvaccinated, below the age 50 and above age 50.

total_population <-
  mutate(total_population,
  total_pop = (unvaccinated_pop + vaccinated_pop)
  )

total_population
## # A tibble: 3 x 4
##   age      unvaccinated_pop vaccinated_pop total_pop
##   <chr>               <dbl>          <dbl>     <dbl>
## 1 under 50          1116834        3501118   4617952
## 2 over 50            186078        2133516   2319594
## 3 total             1302912        5634634   6937546
# (2) Calculate the Efficacy vs. Disease; Explain your results.                     
# (3) From your calculation of efficacy vs. disease, are you able to compare the rate of severe cases in unvaccinated 
# individuals to that in vaccinated individuals?                        

vaccine_df <-
  mutate(vaccine_df,
  unvaccinated_severe_cases_per_100k = (unvaccinated_severe_cases*100000/unvaccinated_pop),
  vaccinated_severe_cases_per_100k = (vaccinated_severe_cases*100000/vaccinated_pop),
  efficacy_vs_diseases = (1-(vaccinated_severe_cases_per_100k/unvaccinated_severe_cases_per_100k))
  )

vaccine_df
## # A tibble: 3 x 10
##   age     unvaccinated_pop unvaccinated_pop_p~ vaccinated_pop vaccinated_pop_pe~
##   <chr>              <dbl>               <dbl>          <dbl>              <dbl>
## 1 under ~          1116834               0.233        3501118              0.73 
## 2 over 50           186078               0.079        2133516              0.904
## 3 total            1302912              18.2          5634634             78.7  
## # ... with 5 more variables: unvaccinated_severe_cases <dbl>,
## #   vaccinated_severe_cases <dbl>, unvaccinated_severe_cases_per_100k <dbl>,
## #   vaccinated_severe_cases_per_100k <dbl>, efficacy_vs_diseases <dbl>

Analysis:

# From the data provided in the efficacy vs severe diseases, which represent the ratio of the severe cases in vaccinated populationvs the unvaccinated population, we conclude that the fully vaccinated population is 98% less likely to contract severe deseases than the unvaccinated people below 50 age.

# For above 50 age group, afficacy fall down to 85.2%

# While for the total population, the efficacy drop down to 67.5%

# The reason is because the severe cases in the over 50 population are happening at a rate of 92% and this is causing overall average to go down.