covid<- readxl::read_excel("//Users//mattmullis//Downloads//covid gcsu.xlsx")
ggplot(data=covid, aes(x=Date, y= Cases, color = Case_Type))+
geom_line()

covid_ts <- as_tsibble(covid, index=row_num)
covid_ts2 <- covid_ts %>%
filter(row_number() %in% c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37)) %>%
select(total_cases)
covid_ts2 %>%
model(
STL(total_cases ~ trend(window = 7) +
season(window = "periodic"),
robust = TRUE)) %>%
components() %>%
autoplot()

covid_ts2 %>%
ma(3)
## Time Series:
## Start = 1
## End = 37
## Frequency = 1
## [1] NA 37.666667 65.000000 64.666667 43.000000 16.666667 8.666667
## [8] 11.666667 11.000000 11.000000 9.666667 7.333333 6.000000 4.000000
## [15] 4.000000 3.666667 3.000000 3.666667 5.000000 5.666667 5.666667
## [22] 3.333333 3.000000 2.000000 2.000000 1.666667 1.333333 2.000000
## [29] 3.333333 5.333333 13.000000 31.333333 61.333333 80.000000 81.333333
## [36] 49.000000 NA
#this looks like the data doesnt have a strong trend due to the fact that the remainder looks so consistant
#with the data. There are two periods where the cases are large, one where the trend is decreasing at the
#beginning and one where it is increaseing near the end. In the middle, it stays constant. The moving average is very consistant with the data and im sure that if i graphed it, it would look nearly identical.
#for my forecast, I will use the Naive method, which is best if the data follows a random walk. while this
#data doesnt do that, I still feel that Niave could be effective because sometimes it feels like
#covid can just come back randomly. I believe that there will be 10 cases next week, 5 employees and 5 students. my 80% prediction interval is 2-40. One may say that my upper limit is high, but it's hard to chose an upper value because you dont know who got covid in the last week(if they were in contact with a lot of people)