My Packages and Data

library(ggplot2)
library(dplyr)
library(tidyquant)
library(tsibble)
library(lava)
library(feasts)
library(readxl)
library(tidyr)
library(lubridate)
library(fpp3)
library(forecast)
library(slider)

cases <- read_excel("C:/Users/14047/Downloads/Forecasting Data Covid GCSU.xlsx")

Questions

Problem 1

cases1 <- cases %>% 
  pivot_longer(!Date, names_to = "Employee_student", values_to = "count")
cases1
## # A tibble: 72 × 3
##    Date                Employee_student count
##    <dttm>              <chr>            <dbl>
##  1 2022-09-04 00:00:00 Employee_Cases       9
##  2 2022-09-04 00:00:00 Student_Cases       24
##  3 2022-08-28 00:00:00 Employee_Cases      21
##  4 2022-08-28 00:00:00 Student_Cases       49
##  5 2022-08-21 00:00:00 Employee_Cases       4
##  6 2022-08-21 00:00:00 Student_Cases       88
##  7 2022-08-14 00:00:00 Employee_Cases      10
##  8 2022-08-14 00:00:00 Student_Cases       22
##  9 2022-08-07 00:00:00 Employee_Cases       5
## 10 2022-08-07 00:00:00 Student_Cases        0
## # … with 62 more rows
## # ℹ Use `print(n = ...)` to see more rows
ggplot(data = cases1, mapping = aes(x = Date, y = count, color = Employee_student)) +
  geom_line()

Problem 2

# making a STL model
  
table1 <- cases %>% 
  mutate(Date = yearweek(Date)) %>% 
  as_tsibble(index = Date, key = c(Employee_Cases, Student_Cases)) %>% 
  summarise(Total_Cases = Employee_Cases + Student_Cases)
table1
## # A tsibble: 36 x 2 [1W]
##        Date Total_Cases
##      <week>       <dbl>
##  1 2021 W52           1
##  2 2022 W01          65
##  3 2022 W02          81
##  4 2022 W03          98
##  5 2022 W04          61
##  6 2022 W05          25
##  7 2022 W06           8
##  8 2022 W07           6
##  9 2022 W08           2
## 10 2022 W09           2
## # … with 26 more rows
## # ℹ Use `print(n = ...)` to see more rows
maple <- table1 %>% 
  model(stl = STL(Total_Cases))
maple
## # A mable: 1 x 1
##       stl
##   <model>
## 1   <STL>
dcmp <- components(maple)
dcmp
## # A dable: 36 x 6 [1W]
## # Key:     .model [1]
## # :        Total_Cases = trend + remainder
##    .model     Date Total_Cases trend remainder season_adjust
##    <chr>    <week>       <dbl> <dbl>     <dbl>         <dbl>
##  1 stl    2021 W52           1 62.2     -61.2              1
##  2 stl    2022 W01          65 59.0       5.98            65
##  3 stl    2022 W02          81 55.9      25.1             81
##  4 stl    2022 W03          98 51.0      47.0             98
##  5 stl    2022 W04          61 43.7      17.3             61
##  6 stl    2022 W05          25 34.5      -9.54            25
##  7 stl    2022 W06           8 24.4     -16.4              8
##  8 stl    2022 W07           6 15        -9.00             6
##  9 stl    2022 W08           2  7.44     -5.44             2
## 10 stl    2022 W09           2  3.77     -1.77             2
## # … with 26 more rows
## # ℹ Use `print(n = ...)` to see more rows
# Now plotting the model with trend as the component

table1 %>% 
  autoplot(Total_Cases) +
  autolayer(dcmp, trend, color = '#D55E00') +
  labs(y = "Toal Cases", "Weeks in 2022", title = "GCSU COVID Cases for 2022")

# Looking at each component individually 

dcmp %>% 
  autoplot()

# Looking at seasonally adj component (Keep this graph in mind for the conclusion).

table1 %>% 
  autoplot(Total_Cases) +
  autolayer(dcmp, season_adjust, color = '#D55E00') +
  labs(y = "Toal Cases", "Weeks in 2022", title = "GCSU COVID Cases for 2022")

# ^^ We will discuss these observances later in number 3.  

Problem 3

# ^^ As you can see, the red line (seasonality) fits perfectly onto the black line (actual total cases),
# indicating that the data has no seasonality at all. This is because we do not have enough time
# periods of the data to find an estimation of the seasonality. If we were to include maybe a couple of 
# more years, a seasonal trend would be clear. Based on observing the little data that we have, I suspect that 
# there would be an increase of COVID cases at the beginning of each semester, which makes sense since
# there is more exposure to everyone at GCSU when everyone comes back from their different locations. 
# On the other hand, the trend shows a smoother version of the total cases, making it obvious when there is 
# a gradual increase of cases at the beginning of each semester and then a gradual decline as the semester 
# continues into the Summer break. 
# The remainder seems to follow the actual total cases of the data, which is an issue. Being that it 
# follows basically the same trend as the actual data, we do not have a great measure 
# of any white noise the data may contain. This is most likely due to the fact that we do not have enough 
# data to pull out an the seasonality it would contain, leaving more exposure to the white noise. 
# In conclusion, we  need more observations to fully analyze and comprehend the fluctuations of the data. 

Problem 4

# Since there was a spike at the beginning of the semester, and it
# now seems to follow a decreasing trend like from the Spring
# semester, my prediction for the point forecast for the amount of COVID 
# cases from 09/19 to 9/25 is 15 for students and 5 for Employees. My 
# prediction on a 80% PI is from 0 to 35 since spikes are possible to happen; 
# however, I am given the impression that cases will continue to 
# decline.