Q1

covid_low <- filter (data, (new_cases < 200 & total_icu < 100))
print (covid_low)
## # A tibble: 134 x 10
##    date       new_cases new_test active_cases new_deaths total_icu new_vax  year
##    <date>         <dbl>    <dbl>        <dbl>      <dbl>     <dbl>   <dbl> <dbl>
##  1 2020-08-02       196     1870         3151          3        76      NA  2020
##  2 2020-08-30       168     4481         2948          1        66      NA  2020
##  3 2020-10-01       199     5214         2834          0        58      NA  2020
##  4 2020-10-03       175     5200         2821          1        58      NA  2020
##  5 2020-10-04       159     4344         2807          0        58      NA  2020
##  6 2020-10-05       194     4751         2812          0        59      NA  2020
##  7 2020-10-10       178     5143         2792          1        54      NA  2020
##  8 2020-10-14       198     6173         2799          0        57      NA  2020
##  9 2020-10-16       189     6082         2764          0        54      NA  2020
## 10 2020-10-30       193     9971         2762          1        39      NA  2020
## # ... with 124 more rows, and 2 more variables: month <dbl>, day <dbl>

Q2

data <- mutate (data, total_cases = cumsum (new_cases))
print (data)
## # A tibble: 564 x 11
##    date       new_cases new_test active_cases new_deaths total_icu new_vax  year
##    <date>         <dbl>    <dbl>        <dbl>      <dbl>     <dbl>   <dbl> <dbl>
##  1 2020-02-29         1        0            1          0        NA      NA  2020
##  2 2020-03-01         2        0            3          0        NA      NA  2020
##  3 2020-03-02         4        0            7          0        NA      NA  2020
##  4 2020-03-03         1        0            8          0        NA      NA  2020
##  5 2020-03-04         0        0            8          0        NA      NA  2020
##  6 2020-03-05         0        0            8          0        NA      NA  2020
##  7 2020-03-06         3        0           11          0        NA      NA  2020
##  8 2020-03-07         1        0           12          0        NA      NA  2020
##  9 2020-03-08         3        0           15          0        NA      NA  2020
## 10 2020-03-09         3        0           18          0        NA      NA  2020
## # ... with 554 more rows, and 3 more variables: month <dbl>, day <dbl>,
## #   total_cases <dbl>

Q3

byMonth <- group_by (data, year, month)
byMonth <- summarize (byMonth, avgCase = mean (new_test, na.rm = TRUE))
summarize (byMonth, maxMonth = max (avgCase))
## # A tibble: 2 x 2
##    year maxMonth
##   <dbl>    <dbl>
## 1  2020    9711.
## 2  2021   25880.

Ans: In 2020, November had the maximum average cases. In 2021, September has the maximum average cases so far.

Q4 (ii)

data <- mutate (data, day_of_week = wday (date, label = T))
byDay <- filter (data, (day_of_week == 'Sun' | day_of_week == 'Mon'))
summarize (group_by (byDay, day_of_week), avgVacc = mean (new_vax, na.rm = TRUE))
## # A tibble: 2 x 2
##   day_of_week avgVacc
##   <ord>         <dbl>
## 1 Sun          19768.
## 2 Mon          24881.

Ans: On average more vaccines are administered on Monday

Q5

reqData <- filter (data, date <= '2021-04-30')
avgCaseMonth <- summarise (group_by (reqData, year, month), avgCase = mean (new_cases, na.rm = TRUE))
avgCaseMonth <- mutate (avgCaseMonth, date = make_datetime (year, month))

ggplot() +
  geom_line(data = avgCaseMonth, mapping = aes(x = date, y = avgCase)) +
  labs(x = "Date", y = "Average Cases By Month")

avgVaxMonth <- summarise (group_by (reqData, year, month), avgVax = mean (new_vax, na.rm = TRUE), avgCase = mean (new_cases, na.rm = TRUE))
avgVaxMonth <- mutate (avgVaxMonth, date = make_datetime (year, month))

ggplot() +
  geom_line(data = avgVaxMonth, mapping = aes(x = date, y = avgCase)) +
  geom_point (data = avgVaxMonth, mapping = aes (x = date, y = avgVax))

  labs(x = "Date", y = "Average Cases and Vaccine By Month")
## $x
## [1] "Date"
## 
## $y
## [1] "Average Cases and Vaccine By Month"
## 
## attr(,"class")
## [1] "labels"

Ans: Based on the data before the start of the April, the cases first increased, then dropped and then started increasing again. This could justify the lock down imposed. Exploring more data. Another reason for the lock down imposed could be that the average cases per month increased despite the fact that more people started getting vaccinated in the months of March and April. This may indicate that people stopped following protocols when they were vaccinated which required the government to impose a lock down.