library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ tibble 3.1.8 ✔ tsibble 1.1.2
## ✔ dplyr 1.0.9 ✔ tsibbledata 0.4.0
## ✔ tidyr 1.2.0 ✔ feasts 0.2.2
## ✔ lubridate 1.8.0 ✔ fable 0.3.1
## ✔ ggplot2 3.3.6
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(readxl)
library(ggplot2)
library(tsibble)
library(lubridate)
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ readr 2.1.2 ✔ stringr 1.4.1
## ✔ purrr 0.3.4 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::as.difftime() masks base::as.difftime()
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks lubridate::intersect(), base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks lubridate::setdiff(), base::setdiff()
## ✖ tsibble::union() masks lubridate::union(), base::union()
#installing packages
mydata <- read_excel("C:/Users/natha/Desktop/Forecasting in R/Covid_22.xlsx")
head(mydata)
## # A tibble: 6 × 6
## Month Week Period Student_Cases Employee_Cases Total_Cases
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl>
## 1 January 2021-12-27 00:00:00 1 1 0 1
## 2 January 2022-01-03 00:00:00 2 39 26 65
## 3 January 2022-01-10 00:00:00 3 63 18 81
## 4 January 2022-01-17 00:00:00 4 78 20 98
## 5 January 2022-01-24 00:00:00 5 43 18 61
## 6 Febuary 2022-01-31 00:00:00 6 16 9 25
#plot of the raw data
mydata%>%
ggplot(aes(y = Total_Cases, x = Week))+ geom_line()+ labs(x = "Week", y = "Total Cases", title = "GCSU Covid Cases by Week")
#Formatting data
mydata<-mydata%>%
mutate(Date = yearweek(Week))%>%
select(-Month)%>%
select(-Period)
mydata<-mydata%>%
select(-Week)
head(mydata)
## # A tibble: 6 × 4
## Student_Cases Employee_Cases Total_Cases Date
## <dbl> <dbl> <dbl> <week>
## 1 1 0 1 2021 W52
## 2 39 26 65 2022 W01
## 3 63 18 81 2022 W02
## 4 78 20 98 2022 W03
## 5 43 18 61 2022 W04
## 6 16 9 25 2022 W05
covidMA<- mydata%>%
mutate(ma = slider::slide_dbl(Total_Cases, mean,
.before = 2, .after = 1, .complete = TRUE))
head(covidMA)
## # A tibble: 6 × 5
## Student_Cases Employee_Cases Total_Cases Date ma
## <dbl> <dbl> <dbl> <week> <dbl>
## 1 1 0 1 2021 W52 NA
## 2 39 26 65 2022 W01 NA
## 3 63 18 81 2022 W02 61.2
## 4 78 20 98 2022 W03 76.2
## 5 43 18 61 2022 W04 66.2
## 6 16 9 25 2022 W05 48
covidMA%>%
ggplot(aes(y = Total_Cases, x = Date))+ geom_line(aes(y = ma))+ labs(x = "Week", y = "Total Cases", title = "GCSU Covid Cases by Week")
## Warning: Removed 3 row(s) containing missing values (geom_path).
#The graph showing the moving average is the trend of the data. The graph shows a decreasing trend in the first weeks of 2022, then a upward trend before returning to a downward trend in the first weeks of September.
#use STL decomp
mydata<-mydata%>%
as_tsibble(index = Date)
mydata%>%
model(STL(Total_Cases ~ trend(window=5) + season(window="periodic"), robust = TRUE))%>%
components() %>% autoplot() +
labs(title = "STL decomposition: GCSU Weekly Covid Cases")
#My pointforcast for the week of 9/19/22 is 10 with a 80% PI of 0 and 20.