Forecast #2

rm(list=ls())
library(tsibble)

## 
## Attaching package: 'tsibble'

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union

library(tidyverse)

## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──

## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(fpp3)

## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ lubridate   1.8.0     ✔ feasts      0.2.2
## ✔ tsibbledata 0.4.0     ✔ fable       0.3.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()     masks base::date()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ tsibble::intersect()  masks base::intersect()
## ✖ lubridate::interval() masks tsibble::interval()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ tsibble::setdiff()    masks base::setdiff()
## ✖ tsibble::union()      masks base::union()

library(openxlsx)
library(ggplot2)
library(tidyr)
library(timeSeries)

## Loading required package: timeDate

library(readr)
library(dplyr)
library(forecast)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo 
## 
## Attaching package: 'forecast'
## 
## The following objects are masked from 'package:fabletools':
## 
##     accuracy, forecast

library(TTR)

#Step 1: Collect the data and plot it
COVID <- readr::read_csv("C:\\Users\\lvm12\\OneDrive\\Desktop\\Forecast Covid Cases GCSU.csv")

## Rows: 36 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Date
## dbl (2): Employee_Cases, Student_Cases
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

head(COVID)

## # A tibble: 6 × 3
##   Date      Employee_Cases Student_Cases
##   <chr>              <dbl>         <dbl>
## 1 9/4/2022               9            24
## 2 8/28/2022             21            49
## 3 8/22/2022              4            88
## 4 8/14/2022             10            22
## 5 8/7/2022               5             0
## 6 7/31/2022             12             1

Covid <- ggplot(COVID, aes(x = Date, group = 1)) +
  geom_line(aes(y = Employee_Cases), color = "blue") +
  geom_line(aes(y = Student_Cases), color = "red", linetype ="twodash") +
  theme(axis.text.x=element_text(angle=60, hjust=1)) +
  ylab("COVID Cases") +
  ggtitle("COVID Cases at GCSU (Red=Students, Blue=Employees)")
Covid

#Step 2: Use techniques from class such as decomposition and moving averages to observe the data
#Taking the log of both employee and student cases to change them to percentage changes
COVIDlog <- COVID
COVIDlog$logEmployee_Cases = log(COVIDlog$Employee_Cases)
COVIDlog$logStudent_Cases = log(COVIDlog$Student_Cases)
head(COVIDlog)

## # A tibble: 6 × 5
##   Date      Employee_Cases Student_Cases logEmployee_Cases logStudent_Cases
##   <chr>              <dbl>         <dbl>             <dbl>            <dbl>
## 1 9/4/2022               9            24              2.20             3.18
## 2 8/28/2022             21            49              3.04             3.89
## 3 8/22/2022              4            88              1.39             4.48
## 4 8/14/2022             10            22              2.30             3.09
## 5 8/7/2022               5             0              1.61          -Inf   
## 6 7/31/2022             12             1              2.48             0

Covidlog <- ggplot(COVIDlog, aes(x = Date, group = 1)) +
  geom_line(aes(y = logEmployee_Cases), color = "blue") +
  geom_line(aes(y = logStudent_Cases), color = "red", linetype ="twodash") +
  theme(axis.text.x=element_text(angle=60, hjust=1)) +
  ylab("COVID Cases - Log") +
  ggtitle("COVID Cases at GCSU as % Point Change (Red=Students, Blue=Employees)")
Covidlog

#Taking the moving average of both employee and student cases to find the average of COVID cases over the months
COVIDEmp <- COVID %>%
  mutate(`5-MA Employee` = slider::slide_dbl(Employee_Cases, mean,
                .before = 2, .after = 2, .complete = TRUE))
COVIDStu <- COVID %>%
  mutate(`5-MA Student` = slider::slide_dbl(Student_Cases, mean,
                .before = 2, .after = 2, .complete = TRUE))
COVIDEmp

## # A tibble: 36 × 4
##    Date      Employee_Cases Student_Cases `5-MA Employee`
##    <chr>              <dbl>         <dbl>           <dbl>
##  1 9/4/2022               9            24            NA  
##  2 8/28/2022             21            49            NA  
##  3 8/22/2022              4            88             9.8
##  4 8/14/2022             10            22            10.4
##  5 8/7/2022               5             0             7.6
##  6 7/31/2022             12             1             8.8
##  7 7/24/2022              7             1             8.2
##  8 7/18/2022             10             4             8.6
##  9 7/10/2022              7             4             7.8
## 10 7/3/2022               7             1             7  
## # … with 26 more rows
## # ℹ Use `print(n = ...)` to see more rows

COVIDStu

## # A tibble: 36 × 4
##    Date      Employee_Cases Student_Cases `5-MA Student`
##    <chr>              <dbl>         <dbl>          <dbl>
##  1 9/4/2022               9            24           NA  
##  2 8/28/2022             21            49           NA  
##  3 8/22/2022              4            88           36.6
##  4 8/14/2022             10            22           32  
##  5 8/7/2022               5             0           22.4
##  6 7/31/2022             12             1            5.6
##  7 7/24/2022              7             1            2  
##  8 7/18/2022             10             4            2.2
##  9 7/10/2022              7             4            2.4
## 10 7/3/2022               7             1            2.4
## # … with 26 more rows
## # ℹ Use `print(n = ...)` to see more rows

#Step 3: Interpret the results from step 2
#When taking the log of both the employee and student cases, you can see the percent change over time of the amount of cases. The employee cases tend to be more steady and spike at nearly the same time or slightly after the student cases spike. Student cases tend to spike significantly at the beginning of the semester and then level out (herd immunity) as the semester progresses, and repeats by the next semester. Taking the log shows the percentage change, which allows the employee cases and student cases to be more comparable to each other as the amount of students and employees differs drastically, which there being a significantly larger amount of students.

#The moving after helps to show the direction of the trend of cases by taking the average of the prior 2 cases and the after 2 cases, which helps to smooth out the data and show a smoother trend of the data. Taking the moving average of both student and employee cases helps emphasis the seasonal trend of the data that for each semester, there is a massive spike at the beginning that levels out as the semester progresses

#Step 4: Forecast the cases of COVID between 09/19 and 09/25
summary(COVID)

##      Date           Employee_Cases   Student_Cases 
##  Length:36          Min.   : 0.000   Min.   : 0.0  
##  Class :character   1st Qu.: 2.000   1st Qu.: 1.0  
##  Mode  :character   Median : 4.000   Median : 2.0  
##                     Mean   : 6.361   Mean   :13.0  
##                     3rd Qu.: 9.000   3rd Qu.: 8.5  
##                     Max.   :26.000   Max.   :88.0

EmpMean <- mean(COVID$Employee_Cases, na.rm = TRUE)
StuMean <- mean(COVID$Student_Cases, na.rm = TRUE)
EmpMean

## [1] 6.361111

StuMean

## [1] 13

#Based on the data collected of COVID cases among students and employees between 1/2/2022 and 9/4/2022, 9/4/2022 is about 4 weeks into school for Fall 2022 and we need to forecast the amount of cases between 9/19 and 9/25 which is about 6 weeks into school. Back in Spring 2022, our 6 week bench mark was around 2/13/2022 which had 4 employee cases and 4 student cases, the week prior had 9 employee cases and 16 student cases, and week 4 had 18 employee cases and 43 student cases. Comparing our Fall 2022 week 4 (9 employee cases and 24 student cases) to the Spring 2022 week 4 cases, the Fall 2022 is half the amount of the spring. If we take the Spring week 6 data (4 employee cases and 4 student cases) and cut it in half, I would guess that we will have 2 employee cases and 2 student cases in the fall for 9/19 to 9/25.

#The mean of Employee cases is 6.36 and the mean for student cases is 13, but COVID cases follow a spike and fall trend, so from the data we collected we are currently (as of 9/4/2022) in a spike that will soon level out which backs up our guess of 2 employee cases and 2 student cases by 9/19-9/25.

#If we take the average of the most recent spike (8/14-9/4) the mean is significantly higher than 13 (student) and 6.36 (employee) cases, so adding in a month where the case is only 2 for both students and employees will help lower the mean to get closer to matching the overall mean.

Forecast #2

Lauren Moskowitz

2022-09-19