Forecast #2

library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ tibble      3.1.8     ✔ tsibble     1.1.2
## ✔ dplyr       1.0.9     ✔ tsibbledata 0.4.0
## ✔ tidyr       1.2.0     ✔ feasts      0.2.2
## ✔ lubridate   1.8.0     ✔ fable       0.3.1
## ✔ ggplot2     3.3.6
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(readxl)
library(ggplot2)
library(tsibble)
library(lubridate)
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ readr   2.1.2     ✔ stringr 1.4.1
## ✔ purrr   0.3.4     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::as.difftime() masks base::as.difftime()
## ✖ lubridate::date()        masks base::date()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ tsibble::intersect()     masks lubridate::intersect(), base::intersect()
## ✖ tsibble::interval()      masks lubridate::interval()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ tsibble::setdiff()       masks lubridate::setdiff(), base::setdiff()
## ✖ tsibble::union()         masks lubridate::union(), base::union()
#installing packages
mydata <- read_excel("C:/Users/natha/Desktop/Forecasting in R/Covid_22.xlsx")
head(mydata)
## # A tibble: 6 × 6
##   Month   Week                Period Student_Cases Employee_Cases Total_Cases
##   <chr>   <dttm>               <dbl>         <dbl>          <dbl>       <dbl>
## 1 January 2021-12-27 00:00:00      1             1              0           1
## 2 January 2022-01-03 00:00:00      2            39             26          65
## 3 January 2022-01-10 00:00:00      3            63             18          81
## 4 January 2022-01-17 00:00:00      4            78             20          98
## 5 January 2022-01-24 00:00:00      5            43             18          61
## 6 Febuary 2022-01-31 00:00:00      6            16              9          25
#plot of the raw data
mydata%>%
  ggplot(aes(y = Total_Cases, x = Week))+ geom_line()+ labs(x = "Week", y = "Total Cases", title = "GCSU Covid Cases by Week")

#Formatting data
mydata<-mydata%>%
  mutate(Date = yearweek(Week))%>%
  select(-Month)%>%
  select(-Period)
mydata<-mydata%>%
  select(-Week)
head(mydata)
## # A tibble: 6 × 4
##   Student_Cases Employee_Cases Total_Cases     Date
##           <dbl>          <dbl>       <dbl>   <week>
## 1             1              0           1 2021 W52
## 2            39             26          65 2022 W01
## 3            63             18          81 2022 W02
## 4            78             20          98 2022 W03
## 5            43             18          61 2022 W04
## 6            16              9          25 2022 W05
covidMA<- mydata%>%
  mutate(ma = slider::slide_dbl(Total_Cases, mean,
                                    .before = 2, .after = 1, .complete = TRUE))
head(covidMA)
## # A tibble: 6 × 5
##   Student_Cases Employee_Cases Total_Cases     Date    ma
##           <dbl>          <dbl>       <dbl>   <week> <dbl>
## 1             1              0           1 2021 W52  NA  
## 2            39             26          65 2022 W01  NA  
## 3            63             18          81 2022 W02  61.2
## 4            78             20          98 2022 W03  76.2
## 5            43             18          61 2022 W04  66.2
## 6            16              9          25 2022 W05  48
covidMA%>%
  ggplot(aes(y = Total_Cases, x = Date))+ geom_line(aes(y = ma))+ labs(x = "Week", y = "Total Cases", title = "GCSU Covid Cases by Week") 
## Warning: Removed 3 row(s) containing missing values (geom_path).

#The graph showing the moving average is the trend of the data. The graph shows a decreasing trend in the first weeks of 2022, then a upward trend before returning to a downward trend in the first weeks of September.

#use STL decomp 
mydata<-mydata%>%
  as_tsibble(index = Date)

mydata%>%
  model(STL(Total_Cases ~ trend(window=5) + season(window="periodic"), robust = TRUE))%>% 
  components() %>% autoplot() +
  labs(title = "STL decomposition: GCSU Weekly Covid Cases")

The STL decomposition shows the trend of the data with a remainder graph. The trend shown matches the same downward then upward trend seen previously in the moving average graph. The data analyized did not have a seasonal trend due to the short time frame observed. The remainder for this STL decomposition is not white noise since there is an obvious upward then downward trend. The lack of white noise in the remainder means that any modeling attempted from this will result in an inaccurate forecast.

Point Forcast and 80% PI

#My pointforcast for the week of 9/19/22 is 10 with a 80% PI of 0 and 20.