rm(list=ls())
library(tsibble)
##
## Attaching package: 'tsibble'
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ lubridate 1.8.0 ✔ feasts 0.2.2
## ✔ tsibbledata 0.4.0 ✔ fable 0.3.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ lubridate::interval() masks tsibble::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(openxlsx)
library(ggplot2)
library(tidyr)
library(timeSeries)
## Loading required package: timeDate
library(readr)
library(dplyr)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'forecast'
##
## The following objects are masked from 'package:fabletools':
##
## accuracy, forecast
library(TTR)
#Step 1: Collect the data and plot it
COVID <- readr::read_csv("C:\\Users\\lvm12\\OneDrive\\Desktop\\Forecast Covid Cases GCSU.csv")
## Rows: 36 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Date
## dbl (2): Employee_Cases, Student_Cases
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(COVID)
## # A tibble: 6 × 3
## Date Employee_Cases Student_Cases
## <chr> <dbl> <dbl>
## 1 9/4/2022 9 24
## 2 8/28/2022 21 49
## 3 8/22/2022 4 88
## 4 8/14/2022 10 22
## 5 8/7/2022 5 0
## 6 7/31/2022 12 1
Covid <- ggplot(COVID, aes(x = Date, group = 1)) +
geom_line(aes(y = Employee_Cases), color = "blue") +
geom_line(aes(y = Student_Cases), color = "red", linetype ="twodash") +
theme(axis.text.x=element_text(angle=60, hjust=1)) +
ylab("COVID Cases") +
ggtitle("COVID Cases at GCSU (Red=Students, Blue=Employees)")
Covid

#Step 2: Use techniques from class such as decomposition and moving averages to observe the data
#Taking the log of both employee and student cases to change them to percentage changes
COVIDlog <- COVID
COVIDlog$logEmployee_Cases = log(COVIDlog$Employee_Cases)
COVIDlog$logStudent_Cases = log(COVIDlog$Student_Cases)
head(COVIDlog)
## # A tibble: 6 × 5
## Date Employee_Cases Student_Cases logEmployee_Cases logStudent_Cases
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 9/4/2022 9 24 2.20 3.18
## 2 8/28/2022 21 49 3.04 3.89
## 3 8/22/2022 4 88 1.39 4.48
## 4 8/14/2022 10 22 2.30 3.09
## 5 8/7/2022 5 0 1.61 -Inf
## 6 7/31/2022 12 1 2.48 0
Covidlog <- ggplot(COVIDlog, aes(x = Date, group = 1)) +
geom_line(aes(y = logEmployee_Cases), color = "blue") +
geom_line(aes(y = logStudent_Cases), color = "red", linetype ="twodash") +
theme(axis.text.x=element_text(angle=60, hjust=1)) +
ylab("COVID Cases - Log") +
ggtitle("COVID Cases at GCSU as % Point Change (Red=Students, Blue=Employees)")
Covidlog

#Taking the moving average of both employee and student cases to find the average of COVID cases over the months
COVIDEmp <- COVID %>%
mutate(`5-MA Employee` = slider::slide_dbl(Employee_Cases, mean,
.before = 2, .after = 2, .complete = TRUE))
COVIDStu <- COVID %>%
mutate(`5-MA Student` = slider::slide_dbl(Student_Cases, mean,
.before = 2, .after = 2, .complete = TRUE))
COVIDEmp
## # A tibble: 36 × 4
## Date Employee_Cases Student_Cases `5-MA Employee`
## <chr> <dbl> <dbl> <dbl>
## 1 9/4/2022 9 24 NA
## 2 8/28/2022 21 49 NA
## 3 8/22/2022 4 88 9.8
## 4 8/14/2022 10 22 10.4
## 5 8/7/2022 5 0 7.6
## 6 7/31/2022 12 1 8.8
## 7 7/24/2022 7 1 8.2
## 8 7/18/2022 10 4 8.6
## 9 7/10/2022 7 4 7.8
## 10 7/3/2022 7 1 7
## # … with 26 more rows
## # ℹ Use `print(n = ...)` to see more rows
COVIDStu
## # A tibble: 36 × 4
## Date Employee_Cases Student_Cases `5-MA Student`
## <chr> <dbl> <dbl> <dbl>
## 1 9/4/2022 9 24 NA
## 2 8/28/2022 21 49 NA
## 3 8/22/2022 4 88 36.6
## 4 8/14/2022 10 22 32
## 5 8/7/2022 5 0 22.4
## 6 7/31/2022 12 1 5.6
## 7 7/24/2022 7 1 2
## 8 7/18/2022 10 4 2.2
## 9 7/10/2022 7 4 2.4
## 10 7/3/2022 7 1 2.4
## # … with 26 more rows
## # ℹ Use `print(n = ...)` to see more rows
#Step 3: Interpret the results from step 2
#When taking the log of both the employee and student cases, you can see the percent change over time of the amount of cases. The employee cases tend to be more steady and spike at nearly the same time or slightly after the student cases spike. Student cases tend to spike significantly at the beginning of the semester and then level out (herd immunity) as the semester progresses, and repeats by the next semester. Taking the log shows the percentage change, which allows the employee cases and student cases to be more comparable to each other as the amount of students and employees differs drastically, which there being a significantly larger amount of students.
#The moving after helps to show the direction of the trend of cases by taking the average of the prior 2 cases and the after 2 cases, which helps to smooth out the data and show a smoother trend of the data. Taking the moving average of both student and employee cases helps emphasis the seasonal trend of the data that for each semester, there is a massive spike at the beginning that levels out as the semester progresses
#Step 4: Forecast the cases of COVID between 09/19 and 09/25
summary(COVID)
## Date Employee_Cases Student_Cases
## Length:36 Min. : 0.000 Min. : 0.0
## Class :character 1st Qu.: 2.000 1st Qu.: 1.0
## Mode :character Median : 4.000 Median : 2.0
## Mean : 6.361 Mean :13.0
## 3rd Qu.: 9.000 3rd Qu.: 8.5
## Max. :26.000 Max. :88.0
EmpMean <- mean(COVID$Employee_Cases, na.rm = TRUE)
StuMean <- mean(COVID$Student_Cases, na.rm = TRUE)
EmpMean
## [1] 6.361111
StuMean
## [1] 13
#Based on the data collected of COVID cases among students and employees between 1/2/2022 and 9/4/2022, 9/4/2022 is about 4 weeks into school for Fall 2022 and we need to forecast the amount of cases between 9/19 and 9/25 which is about 6 weeks into school. Back in Spring 2022, our 6 week bench mark was around 2/13/2022 which had 4 employee cases and 4 student cases, the week prior had 9 employee cases and 16 student cases, and week 4 had 18 employee cases and 43 student cases. Comparing our Fall 2022 week 4 (9 employee cases and 24 student cases) to the Spring 2022 week 4 cases, the Fall 2022 is half the amount of the spring. If we take the Spring week 6 data (4 employee cases and 4 student cases) and cut it in half, I would guess that we will have 2 employee cases and 2 student cases in the fall for 9/19 to 9/25.
#The mean of Employee cases is 6.36 and the mean for student cases is 13, but COVID cases follow a spike and fall trend, so from the data we collected we are currently (as of 9/4/2022) in a spike that will soon level out which backs up our guess of 2 employee cases and 2 student cases by 9/19-9/25.
#If we take the average of the most recent spike (8/14-9/4) the mean is significantly higher than 13 (student) and 6.36 (employee) cases, so adding in a month where the case is only 2 for both students and employees will help lower the mean to get closer to matching the overall mean.