covid<- readxl::read_excel("//Users//mattmullis//Downloads//covid gcsu.xlsx")
ggplot(data=covid, aes(x=Date, y= Cases, color = Case_Type))+
geom_line()

covid_ts <- as_tsibble(covid, index=row_num)
covid_ts2 <- covid_ts %>%
filter(row_number() %in% c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37)) %>%
select(total_cases)
covid_ts2 %>%
model(
STL(total_cases ~ trend(window = 7) +
season(window = "periodic"),
robust = TRUE)) %>%
components() %>%
autoplot()

covid_ts2 %>%
ma(3) %>%
autoplot(xlab= "row number")

#this looks like the data doesnt have a strong trend due to the fact that the remainder looks so consistent
#with the data. Of course, it is not a white noise. There are two periods where the cases are large, one where the trend is decreasing at the
#beginning and one where it is increasing near the end. In the middle, it stays constant. The moving average is very consistent with the data and in the graph, but looks a little smoother, as a moving average should.
#for my forecast, I will use the Naive method, which is best if the data follows a random walk. while this
#data doesnt do that, I still feel that Niave could be effective because sometimes it feels like
#covid can just come back randomly. I believe that there will be 10 cases next week, 5 employees and 5 students. my 80% prediction interval is 2-40. One may say that my upper limit is high, but it's hard to chose an upper value because you dont know who got covid in the last week(if they were in contact with a lot of people)