Load packages and data

library(dplyr)
library(ggplot2)
library(tidyr)
library(tibble)
library(tsibble)
library(ggfortify)
library(tidyverse)
library(fpp3)
library(moments)
library(zoo)
library(fable)

Questions

Exercise 1

covid_data <- readxl::read_excel("//Users//colinadams//Documents//GCSU//Fall 2022//Forecasting//Forecasts//Forecast 2//ForecastingDataCovidGCSU.xlsx")

covid_ts <- covid_data %>%
  mutate(Week = yearweek(Date)) %>%
  as_tsibble(index = Week) %>%
  select(Week, Employee_Cases, Student_Cases)
head(covid_ts)
## # A tsibble: 6 x 3 [1W]
##       Week Employee_Cases Student_Cases
##     <week>          <dbl>         <dbl>
## 1 2021 W52              0             1
## 2 2022 W01             26            39
## 3 2022 W02             18            63
## 4 2022 W03             20            78
## 5 2022 W04             18            43
## 6 2022 W05              9            16
covid_plot <- covid_data %>%
  pivot_longer(-Date) %>%
  ggplot(aes(x = Date, y = value, color = name)) +
  geom_line() +
  facet_grid(name ~., scales = "free_y") +
  labs(y = "Covid Cases")
covid_plot

Exercise 2

covid_ts_log <- covid_ts %>%
  mutate(Employee_Cases = log(Employee_Cases), Student_Cases = log(Student_Cases))

covid_ts_log_plot <- covid_ts_log %>%
  ggplot(aes(x = Week, y= value, color = name)) +
  geom_line(aes(y = Employee_Cases), color = "red") +
  geom_line(aes(y = Student_Cases), color = "blue") +
  labs(x = "Date", y = "% Change in Covid Cases") +
  ggtitle("% Change in Cases for Employees(Red) and Students(Blue) in 2022 at GCSU")
covid_ts_log_plot

covid_ma <- covid_data %>%
  mutate(Employee_Cases = rollmean(Employee_Cases, k = 5, fill = NA)) %>%
  mutate(Student_Cases = rollmean(Student_Cases, k = 5, fill = NA)) 

covid_plot_ma <- covid_ma %>%
  pivot_longer(-Date) %>%
  ggplot(aes(x = Date, y = value, color = name)) +
  geom_line() +
  facet_grid(name ~., scales = "free_y") +
  labs(y = "Moving Average Covid Cases")
covid_plot_ma
## Warning: Removed 8 row(s) containing missing values (geom_path).

covid_dcmp_emp <- covid_ts %>%
  model(stl = STL(Employee_Cases))
components(covid_dcmp_emp) %>% autoplot()

covid_dcmp_stu <- covid_ts %>%
  model(stl = STL(Student_Cases))
components(covid_dcmp_stu) %>% autoplot()

Exercise 3

After plotting the data, I am able to see two spikes in the Covid-19 rates this year. These spikes occurred for both the employees and the students. One spike was in January and the other was in August. These spikes line up perfectly with the beginning of each semester for GCSU. Both spikes saw a percentage change in Covid-19 above 3%. The magnitude of spikes was larger in both cases for the students. This is likely due to less worry about Covid-19 as well as a more social lifestyle for the students on average.

Exercise 4

employee_cases <- covid_data %>%
  select(-Student_Cases) 

employee_cases_ts <- employee_cases %>%
  mutate(Week = yearweek(Date)) %>%
  as_tsibble(index = Week) %>%
  select(Week, Employee_Cases)

student_cases <- covid_data %>%
  select(-Employee_Cases)
student_cases_ts <- student_cases %>%
  mutate(Week = yearweek(Date)) %>%
  as_tsibble(index = Week) %>%
  select(Week, Student_Cases)

ggplot(employee_cases_ts)+
  geom_line(aes(Week, Employee_Cases))

employee_cases_ts_mean <- employee_cases_ts %>%
  model(MEAN(Employee_Cases)) %>%
  forecast(h = 1) %>%
  autoplot(employee_cases_ts)
employee_cases_ts_mean

employee_cases_ts_naive <- employee_cases_ts %>%
  model(NAIVE(Employee_Cases)) %>%
  forecast(h = "1 week") %>%
  autoplot(employee_cases_ts, Level = NULL)
## Warning: Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
employee_cases_ts_naive

ggplot(student_cases_ts) +
  geom_line(aes(Week, Student_Cases))

student_cases_ts_mean <- student_cases_ts %>%
  model(MEAN(Student_Cases)) %>%
  forecast(h = 1) %>%
  autoplot(student_cases_ts)
student_cases_ts_mean

student_cases_ts_naive <- student_cases_ts %>%
  model(NAIVE(Student_Cases)) %>%
  forecast(h = "1 week") %>%
  autoplot(student_cases_ts, Level = NULL)
## Warning: Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
student_cases_ts_naive

Based off the mean averages, I forecast that the student cases next week will be 12.6 and the employee cases will be 6.32. Using the mean method, the 80% prediction interval for student cases is (-13.4, 42.8). Using the mean method, the 80% prediction interval for employee cases is (-1.64, 14.96).