library(dplyr)
library(ggplot2)
library(tidyr)
library(tibble)
library(tsibble)
library(ggfortify)
library(tidyverse)
library(fpp3)
library(moments)
library(zoo)
library(fable)
covid_data <- readxl::read_excel("//Users//colinadams//Documents//GCSU//Fall 2022//Forecasting//Forecasts//Forecast 2//ForecastingDataCovidGCSU.xlsx")
covid_ts <- covid_data %>%
mutate(Week = yearweek(Date)) %>%
as_tsibble(index = Week) %>%
select(Week, Employee_Cases, Student_Cases)
head(covid_ts)
## # A tsibble: 6 x 3 [1W]
## Week Employee_Cases Student_Cases
## <week> <dbl> <dbl>
## 1 2021 W52 0 1
## 2 2022 W01 26 39
## 3 2022 W02 18 63
## 4 2022 W03 20 78
## 5 2022 W04 18 43
## 6 2022 W05 9 16
covid_plot <- covid_data %>%
pivot_longer(-Date) %>%
ggplot(aes(x = Date, y = value, color = name)) +
geom_line() +
facet_grid(name ~., scales = "free_y") +
labs(y = "Covid Cases")
covid_plot
covid_ts_log <- covid_ts %>%
mutate(Employee_Cases = log(Employee_Cases), Student_Cases = log(Student_Cases))
covid_ts_log_plot <- covid_ts_log %>%
ggplot(aes(x = Week, y= value, color = name)) +
geom_line(aes(y = Employee_Cases), color = "red") +
geom_line(aes(y = Student_Cases), color = "blue") +
labs(x = "Date", y = "% Change in Covid Cases") +
ggtitle("% Change in Cases for Employees(Red) and Students(Blue) in 2022 at GCSU")
covid_ts_log_plot
covid_ma <- covid_data %>%
mutate(Employee_Cases = rollmean(Employee_Cases, k = 5, fill = NA)) %>%
mutate(Student_Cases = rollmean(Student_Cases, k = 5, fill = NA))
covid_plot_ma <- covid_ma %>%
pivot_longer(-Date) %>%
ggplot(aes(x = Date, y = value, color = name)) +
geom_line() +
facet_grid(name ~., scales = "free_y") +
labs(y = "Moving Average Covid Cases")
covid_plot_ma
## Warning: Removed 8 row(s) containing missing values (geom_path).
covid_dcmp_emp <- covid_ts %>%
model(stl = STL(Employee_Cases))
components(covid_dcmp_emp) %>% autoplot()
covid_dcmp_stu <- covid_ts %>%
model(stl = STL(Student_Cases))
components(covid_dcmp_stu) %>% autoplot()
After plotting the data, I am able to see two spikes in the Covid-19 rates this year. These spikes occurred for both the employees and the students. One spike was in January and the other was in August. These spikes line up perfectly with the beginning of each semester for GCSU. Both spikes saw a percentage change in Covid-19 above 3%. The magnitude of spikes was larger in both cases for the students. This is likely due to less worry about Covid-19 as well as a more social lifestyle for the students on average.
employee_cases <- covid_data %>%
select(-Student_Cases)
employee_cases_ts <- employee_cases %>%
mutate(Week = yearweek(Date)) %>%
as_tsibble(index = Week) %>%
select(Week, Employee_Cases)
student_cases <- covid_data %>%
select(-Employee_Cases)
student_cases_ts <- student_cases %>%
mutate(Week = yearweek(Date)) %>%
as_tsibble(index = Week) %>%
select(Week, Student_Cases)
ggplot(employee_cases_ts)+
geom_line(aes(Week, Employee_Cases))
employee_cases_ts_mean <- employee_cases_ts %>%
model(MEAN(Employee_Cases)) %>%
forecast(h = 1) %>%
autoplot(employee_cases_ts)
employee_cases_ts_mean
employee_cases_ts_naive <- employee_cases_ts %>%
model(NAIVE(Employee_Cases)) %>%
forecast(h = "1 week") %>%
autoplot(employee_cases_ts, Level = NULL)
## Warning: Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
employee_cases_ts_naive
ggplot(student_cases_ts) +
geom_line(aes(Week, Student_Cases))
student_cases_ts_mean <- student_cases_ts %>%
model(MEAN(Student_Cases)) %>%
forecast(h = 1) %>%
autoplot(student_cases_ts)
student_cases_ts_mean
student_cases_ts_naive <- student_cases_ts %>%
model(NAIVE(Student_Cases)) %>%
forecast(h = "1 week") %>%
autoplot(student_cases_ts, Level = NULL)
## Warning: Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
student_cases_ts_naive
Based off the mean averages, I forecast that the student cases next week will be 12.6 and the employee cases will be 6.32. Using the mean method, the 80% prediction interval for student cases is (-13.4, 42.8). Using the mean method, the 80% prediction interval for employee cases is (-1.64, 14.96).