Load packages and data

library(dplyr)
library(ggplot2)
library(tidyr)
library(tibble)
library(tsibble)
library(ggfortify)
library(tidyverse)
library(fpp3)
library(moments)
library(zoo)
library(fable)
# install and load any package necessary

Questions

Exercise 1

Covid.Data <- readxl::read_excel("C:\\Users\\Zacha\\Downloads\\DataForecast#2.xlsx")
Covid.TS <- Covid.Data %>% 
  mutate(Week= yearweek(Date)) %>%
  as_tsibble(index = Week) %>%
  select(Week, Employee_Cases, Student_Cases)
  
head(Covid.TS)
## # A tsibble: 6 x 3 [1W]
##       Week Employee_Cases Student_Cases
##     <week>          <dbl>         <dbl>
## 1 2021 W52              0             1
## 2 2022 W01             26            39
## 3 2022 W02             18            63
## 4 2022 W03             20            78
## 5 2022 W04             18            43
## 6 2022 W05              9            16
Covid.Plot <- Covid.Data %>%
  pivot_longer(-Date) %>%
  ggplot(aes(x = Date, y = value, color = name)) +
  geom_line() +
  facet_grid(name ~., scales = "free_y") +
  labs(y = "Covid Cases")
Covid.Plot

Covid.Log <- Covid.TS %>%
  mutate(Employee_Cases = log(Employee_Cases), Student_Cases = log(Student_Cases))

Covid.Log.Plot <- Covid.Log %>%
  ggplot(aes(x = Week, y= value, color = name)) +
  geom_line(aes(y = Employee_Cases), color = "Orange") +
  geom_line(aes(y = Student_Cases), color = "Blue") +
  labs(x = "Date", y = "% Change in Covid Cases") +
  ggtitle("% Change in Cases for Employees(Orange) and Students(Blue) in 2022 at GCSU")
Covid.Log.Plot

Exercise 2

Covid.Move <- Covid.Data %>%
  mutate(Employee_Cases = rollmean(Employee_Cases, k = 5, fill = NA)) %>%
  mutate(Student_Cases = rollmean(Student_Cases, k = 5, fill = NA)) 

Covid.Move.Plot <- Covid.Move %>%
  pivot_longer(-Date) %>%
  ggplot(aes(x = Date, y = value, color = name)) +
  geom_line() +
  facet_grid(name ~., scales = "free_y") +
  labs(y = "Moving Average Covid Cases")
Covid.Move.Plot
## Warning: Removed 8 row(s) containing missing values (geom_path).

Covid.Decomp <- Covid.TS %>%
  model(stl = STL(Employee_Cases))
components(Covid.Decomp) %>% autoplot()

Exercise 3

The spikes in the data make a lot of sense once you consider when the school year starts each semester. We observe a spike in Covid Cases in January when spring semester begins and in August when Fall semester begins. The spikes in percent change for students were larger than that for employees which is interesting. It is likely due to students having the propensity to party in large groups.

Exercise 4

Employee.Cases <- Covid.Data %>%
  select(-Student_Cases) 

Employee.TS <- Employee.Cases %>%
  mutate(Week = yearweek(Date)) %>%
  as_tsibble(index = Week) %>%
  select(Week, Employee_Cases)

Student.Cases <- Covid.Data %>%
  select(-Employee_Cases)
Student.TS <- Student.Cases %>%
  mutate(Week = yearweek(Date)) %>%
  as_tsibble(index = Week) %>%
  select(Week, Student_Cases)

ggplot(Employee.TS)+
  geom_line(aes(Week, Employee_Cases))

Employee.Mean <- Employee.TS %>%
  model(MEAN(Employee_Cases)) %>%
  forecast(h = 1) %>%
  autoplot(Employee.TS)
Employee.Mean

Employee.Naive <- Employee.TS %>%
  model(NAIVE(Employee_Cases)) %>%
  forecast(h = "1 week") %>%
  autoplot(Employee.TS, Level = NULL)
## Warning: Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
Employee.Naive

ggplot(Student.TS) +    
  geom_line(aes(Week, Student_Cases))

Student.Mean <- Student.TS %>%
  model(MEAN(Student_Cases)) %>%
  forecast(h = 1) %>%
  autoplot(Student.TS)

Student.Mean

Student.Naive <- Student.TS %>%
  model(NAIVE(Student_Cases)) %>%
  forecast(h = "1 week") %>%
  autoplot(Student.TS, Level = NULL)
## Warning: Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
Student.Naive

Going off of the mean averages my forecast for next week for student cases will be 12.6 and the employee cases will be 6.32.