forecast #3
library(readxl)
library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ tibble 3.1.7 ✔ tsibble 1.1.2
## ✔ dplyr 1.0.10 ✔ tsibbledata 0.4.0
## ✔ tidyr 1.2.0 ✔ feasts 0.2.2
## ✔ lubridate 1.8.0 ✔ fable 0.3.1
## ✔ ggplot2 3.3.6
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
gassss <- readxl:: read_excel("//Users//mattmullis//Downloads//gasdatareal.xlsx")
#a
gasomit <- gassss %>%
slice(1:665) %>%
mutate(row=row_number(Date))
gasts <- gasomit %>%
as_tsibble(index=row)
#b
ggplot(data=gasts)+
geom_line(mapping=aes(x=row, y=Price))

dcmp <- gasts %>%
model(stl = STL(Price))
components(dcmp) %>%
autoplot()

components(dcmp) %>%
as_tsibble() %>%
autoplot(Price, color ="gray")+
geom_line(aes(y=trend), colour = "#D55E00")

lambda <- gasts %>%
features(Price, features = guerrero) %>%
pull(lambda_guerrero)
aus_production %>%
autoplot(box_cox(Gas, lambda))

#the gas prices reached a local maximum in 2012, when the US was recovering from a recession. it looks like gas prices went down a lot in 2014. The internet says this is due to the plummet in Brent Crude prices around that time. Gas prices remained volatile. Recently, gas prices plummeted due to covid, then skyrocketed as a result of increased demand due to the end of covid lockdowns.
#c
gasm <- gasts %>% model(MEAN(Price)) %>%
forecast(h=30)
autoplot(gasm)+
autolayer(gasts)
## Plot variable not specified, automatically selected `.vars = Price`

gasd <- gasts %>% model(RW(Price~drift())) %>%
forecast(h=30)
autoplot(gasd)+
autolayer(gasts)
## Plot variable not specified, automatically selected `.vars = Price`

gassn <- gasts %>% model(SNAIVE(Price~lag(4))) %>%
forecast(h=30)
autoplot(gassn)+
autolayer(gasts)
## Plot variable not specified, automatically selected `.vars = Price`

gasn <- gasts %>% model(NAIVE(Price)) %>%
forecast(h=30)
autoplot(gasn)+
autolayer(gasts)
## Plot variable not specified, automatically selected `.vars = Price`

#I feel like the mean method is most accurate. It is probably the least-bad method. It is very hard to predict gas prices, and I feel like this one displays the most reasonable prediction because it is right in the middle of all of the data. Naive is unreliable because gas prices are extremely high right now and likely to fall again. SNAIVE is unreliable becasue the data isnt very seasonal (other than summer price rises, which cant really be seen in the graph).I woulnt use random walk because it has the price of gas increasing even more.
gaststr <- gasts %>%
stretch_tsibble(.init = 5, .step = 1)
gaststr %>%
model(RW(Price ~ drift())) %>%
forecast(h = 30) %>%
accuracy(gasts)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 30 observations are missing between 666 and 695
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 RW(Price ~ drift()) Test -0.000816 0.396 0.287 -0.805 9.73 7.28 7.11 0.959
gaststr %>%
model(MEAN(Price)) %>%
forecast(h=30) %>%
accuracy(gasts)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 30 observations are missing between 666 and 695
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MEAN(Price) Test -0.106 0.654 0.528 -7.88 18.9 13.4 11.7 0.982
gaststr %>%
model(NAIVE(Price)) %>%
forecast(h=30) %>%
accuracy(gasts)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 30 observations are missing between 666 and 695
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NAIVE(Price) Test 0.0360 0.379 0.270 0.205 9.15 6.84 6.81 0.959
gaststr %>%
model(SNAIVE(Price~lag(4))) %>%
forecast(h=30) %>%
accuracy(gasts)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 30 observations are missing between 666 and 695
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Price ~ lag(4)) Test 0.0398 0.396 0.288 0.235 9.77 7.30 7.12 0.943
#to my suprise, they all look like very good methods to test forecast accuracy. Naive is better, by about .02 points, which im not sure is significant.
gasts %>%
model(NAIVE(Price)) %>%
forecast(h = 10) %>%
hilo()
## # A tsibble: 10 x 6 [1]
## # Key: .model [1]
## .model row Price .mean `80%`
## <chr> <dbl> <dist> <dbl> <hilo>
## 1 NAIVE(Price) 666 N(3.8, 0.0031) 3.83 [3.760658, 3.903342]80
## 2 NAIVE(Price) 667 N(3.8, 0.0062) 3.83 [3.731107, 3.932893]80
## 3 NAIVE(Price) 668 N(3.8, 0.0093) 3.83 [3.708432, 3.955568]80
## 4 NAIVE(Price) 669 N(3.8, 0.012) 3.83 [3.689316, 3.974684]80
## 5 NAIVE(Price) 670 N(3.8, 0.015) 3.83 [3.672474, 3.991526]80
## 6 NAIVE(Price) 671 N(3.8, 0.019) 3.83 [3.657248, 4.006752]80
## 7 NAIVE(Price) 672 N(3.8, 0.022) 3.83 [3.643246, 4.020754]80
## 8 NAIVE(Price) 673 N(3.8, 0.025) 3.83 [3.630214, 4.033786]80
## 9 NAIVE(Price) 674 N(3.8, 0.028) 3.83 [3.617974, 4.046026]80
## 10 NAIVE(Price) 675 N(3.8, 0.031) 3.83 [3.606396, 4.057604]80
## # … with 1 more variable: `95%` <hilo>
#based on the fact that Naive is the best, my forecast for the next period is the price of gas will be 3.832. my 80% confidence interval is: about 3.7-3.9