09-19-2022library(dplyr)
library(ggplot2)
library(tidyr)
library(tibble)
library(tsibble)
library(ggfortify)
library(tidyverse)
library(fpp3)
library(moments)
library(zoo)
library(fable)
# install and load any package necessary
Covid.Data <- readxl::read_excel("C:\\Users\\Zacha\\Downloads\\DataForecast#2.xlsx")
Covid.TS <- Covid.Data %>%
mutate(Week= yearweek(Date)) %>%
as_tsibble(index = Week) %>%
select(Week, Employee_Cases, Student_Cases)
head(Covid.TS)
## # A tsibble: 6 x 3 [1W]
## Week Employee_Cases Student_Cases
## <week> <dbl> <dbl>
## 1 2021 W52 0 1
## 2 2022 W01 26 39
## 3 2022 W02 18 63
## 4 2022 W03 20 78
## 5 2022 W04 18 43
## 6 2022 W05 9 16
Covid.Plot <- Covid.Data %>%
pivot_longer(-Date) %>%
ggplot(aes(x = Date, y = value, color = name)) +
geom_line() +
facet_grid(name ~., scales = "free_y") +
labs(y = "Covid Cases")
Covid.Plot
Covid.Log <- Covid.TS %>%
mutate(Employee_Cases = log(Employee_Cases), Student_Cases = log(Student_Cases))
Covid.Log.Plot <- Covid.Log %>%
ggplot(aes(x = Week, y= value, color = name)) +
geom_line(aes(y = Employee_Cases), color = "Orange") +
geom_line(aes(y = Student_Cases), color = "Blue") +
labs(x = "Date", y = "% Change in Covid Cases") +
ggtitle("% Change in Cases for Employees(Orange) and Students(Blue) in 2022 at GCSU")
Covid.Log.Plot
Covid.Move <- Covid.Data %>%
mutate(Employee_Cases = rollmean(Employee_Cases, k = 5, fill = NA)) %>%
mutate(Student_Cases = rollmean(Student_Cases, k = 5, fill = NA))
Covid.Move.Plot <- Covid.Move %>%
pivot_longer(-Date) %>%
ggplot(aes(x = Date, y = value, color = name)) +
geom_line() +
facet_grid(name ~., scales = "free_y") +
labs(y = "Moving Average Covid Cases")
Covid.Move.Plot
## Warning: Removed 8 row(s) containing missing values (geom_path).
Covid.Decomp <- Covid.TS %>%
model(stl = STL(Employee_Cases))
components(Covid.Decomp) %>% autoplot()
The spikes in the data make a lot of sense once you consider when the school year starts each semester. We observe a spike in Covid Cases in January when spring semester begins and in August when Fall semester begins. The spikes in percent change for students were larger than that for employees which is interesting. It is likely due to students having the propensity to party in large groups.
Employee.Cases <- Covid.Data %>%
select(-Student_Cases)
Employee.TS <- Employee.Cases %>%
mutate(Week = yearweek(Date)) %>%
as_tsibble(index = Week) %>%
select(Week, Employee_Cases)
Student.Cases <- Covid.Data %>%
select(-Employee_Cases)
Student.TS <- Student.Cases %>%
mutate(Week = yearweek(Date)) %>%
as_tsibble(index = Week) %>%
select(Week, Student_Cases)
ggplot(Employee.TS)+
geom_line(aes(Week, Employee_Cases))
Employee.Mean <- Employee.TS %>%
model(MEAN(Employee_Cases)) %>%
forecast(h = 1) %>%
autoplot(Employee.TS)
Employee.Mean
Employee.Naive <- Employee.TS %>%
model(NAIVE(Employee_Cases)) %>%
forecast(h = "1 week") %>%
autoplot(Employee.TS, Level = NULL)
## Warning: Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
Employee.Naive
ggplot(Student.TS) +
geom_line(aes(Week, Student_Cases))
Student.Mean <- Student.TS %>%
model(MEAN(Student_Cases)) %>%
forecast(h = 1) %>%
autoplot(Student.TS)
Student.Mean
Student.Naive <- Student.TS %>%
model(NAIVE(Student_Cases)) %>%
forecast(h = "1 week") %>%
autoplot(Student.TS, Level = NULL)
## Warning: Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
Student.Naive
Going off of the mean averages my forecast for next week for student cases will be 12.6 and the employee cases will be 6.32.