library(dplyr)
library(ggplot2)
library(tidyr)
library(tibble)
library(tsibble)
library(ggfortify)
library(tidyverse)
library(fpp3)
library(moments)
library(zoo)
library(fable)
library(readxl)
Covid_GCSU <- read_excel("~/Downloads/Covid GCSU.xlsx",
col_types = c("date", "numeric", "numeric"))
covidts <- Covid_GCSU %>%
mutate(Week = yearweek(Date)) %>%
as_tsibble(index = Week) %>%
select(Week, Employee_Cases, Student_Cases)
head(covidts)
## # A tsibble: 6 x 3 [1W]
## Week Employee_Cases Student_Cases
## <week> <dbl> <dbl>
## 1 2021 W52 0 1
## 2 2022 W01 26 39
## 3 2022 W02 18 63
## 4 2022 W03 20 78
## 5 2022 W04 18 43
## 6 2022 W05 9 16
covid_plot <- Covid_GCSU %>%
pivot_longer(-Date) %>%
ggplot(aes(x = Date, y = value, color = name)) +
geom_line() +
facet_grid(name ~., scales = "free_y") +
labs(y = "Covid Cases")
covid_plot
covid_ts_log <- covidts %>%
mutate(Employee_Cases = log(Employee_Cases), Student_Cases = log(Student_Cases))
covidtslogpl <- covid_ts_log %>%
ggplot(aes(x = Week, y= value, color = name)) +
geom_line(aes(y = Employee_Cases), color = "red") +
geom_line(aes(y = Student_Cases), color = "blue") +
labs(x = "Date", y = "% Change in Covid Cases") +
ggtitle("% Change in Cases for Employees(Red) and Students(Blue) in 2022 at GCSU")
covidtslogpl
covidma <- Covid_GCSU %>%
mutate(Employee_Cases = rollmean(Employee_Cases, k = 5, fill = NA)) %>%
mutate(Student_Cases = rollmean(Student_Cases, k = 5, fill = NA))
covidmaplot <- covidma %>%
pivot_longer(-Date) %>%
ggplot(aes(x = Date, y = value, color = name)) +
geom_line() +
facet_grid(name ~., scales = "free_y") +
labs(y = "Moving Average Covid Cases")
covidmaplot
## Warning: Removed 8 row(s) containing missing values (geom_path).
coviddcmpemp <- covidts %>%
model(stl = STL(Employee_Cases))
components(coviddcmpemp) %>% autoplot()
coviddcmpstu <- covidts %>%
model(stl = STL(Student_Cases))
components(coviddcmpstu) %>% autoplot()
After plotting my data, I noticed two peaks in January and August. Both of these spikes came during the time when semesters started. The size of both spikes were higher in Student cases. I think this is due to the student population being so much higher and the more social lifestyle. Both spikes saw a larger than 3% increase in cases.
employeecases <- Covid_GCSU %>%
select(-Student_Cases)
employeecasests <- employeecases %>%
mutate(Week = yearweek(Date)) %>%
as_tsibble(index = Week) %>%
select(Week, Employee_Cases)
studentcases <- Covid_GCSU %>%
select(-Employee_Cases)
studentcasests <- studentcases %>%
mutate(Week = yearweek(Date)) %>%
as_tsibble(index = Week) %>%
select(Week, Student_Cases)
ggplot(employeecasests)+
geom_line(aes(Week, Employee_Cases))
employeecasestsm <- employeecasests %>%
model(MEAN(Employee_Cases)) %>%
forecast(h = 1) %>%
autoplot(employeecasests)
employeecasestsm
employeecasestsn <- employeecasests %>%
model(NAIVE(Employee_Cases)) %>%
forecast(h = "1 week") %>%
autoplot(employeecasests, Level = NULL)
## Warning: Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
employeecasestsn
ggplot(studentcasests) +
geom_line(aes(Week, Student_Cases))
studentcasestsm <- studentcasests %>%
model(MEAN(Student_Cases)) %>%
forecast(h = 1) %>%
autoplot(studentcasests)
studentcasestsm
studentcasestsn <- studentcasests %>%
model(NAIVE(Student_Cases)) %>%
forecast(h = "1 week") %>%
autoplot(studentcasests, Level = NULL)
## Warning: Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
## Ignoring unknown parameters: Level
studentcasestsn
Based off the mean averages, I think that the student cases next week will be 12.6 and the employee cases will be 6.32. Using the mean method, the 80% prediction interval for student cases is (-13.4, 42.8). Using the mean method, the 80% prediction interval for employee cases is (-1.64, 14.96).