library("dplyr")
library("ggplot2")
library("tidyr")
library("tibble")
library("tsibble")
library("ggfortify")
library("tidyverse")
library("fpp3")
library("moments")
library("zoo")
library("fable")
library("brew")
library("sos")
library("slider")
library("seasonal")
library("x13binary")
library("USgas")
# install and load any package necessary
#a
AusPop <- global_economy %>%
filter(Country == "Australia") %>%
mutate(Population = Population/1000000)
AusPop %>%
model(RW(Population ~ drift())) %>%
forecast(h = "5 years") %>%
autoplot(AusPop, level = NULL) +
geom_line(data = slice(AusPop, range(cumsum(!is.na(Population)))),
aes(y=Population), linetype = "dashed", color = "#0072B2") +
labs(title = "Austalian Population in Millions")
#b
bricks <- aus_production %>%
filter_index("1970 Q1" ~ "2004 Q4") %>%
select(Bricks)
autoplot(bricks)
## Plot variable not specified, automatically selected `.vars = Bricks`
bricks %>%
model(SNAIVE(Bricks ~ lag("5 year"))) %>%
forecast(h = "5 years") %>%
autoplot(bricks, level = NULL) +
labs(title = "Brick Production")
#c
NSWLambs <- aus_livestock %>%
filter(Animal == "Lambs" & State == "New South Wales") %>%
mutate(Count = Count/1000)
autoplot(NSWLambs)
## Plot variable not specified, automatically selected `.vars = Count`
NSWLambs %>%
model(SNAIVE(Count ~ lag("5 year"))) %>%
forecast(h = "5 years") %>%
autoplot(NSWLambs, level = NULL) +
labs(title = "Count of Lambs in New South Wales in Thousands")
#d
hh_budget %>%
autoplot(Wealth)
hh_budget %>%
model(RW(Wealth ~ drift())) %>%
forecast(h = "5 years") %>%
autoplot(hh_budget, level = NULL) +
geom_line(data = slice(hh_budget, range(cumsum(!is.na(Wealth)))),
aes(y=Wealth), linetype = "dashed", color = "#0072B2") +
labs(title = "Wealth ")
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
#e
TFS <- aus_retail %>%
select(Month, State, Industry, Turnover) %>%
filter(Industry == "Takeaway food services") %>%
summarise(total_turnover = sum(Turnover))
autoplot(TFS)
## Plot variable not specified, automatically selected `.vars = total_turnover`
TFS2 <- aus_retail %>%
filter(Industry == "Takeaway food services") %>%
model(RW(Turnover ~ drift())) %>%
forecast(h = 15) %>%
autoplot(aus_retail) +
labs(title = "Australian Takeaway Food Turnover") +
facet_wrap(~State, scales = "free")
TFS2
#a
Faceb <- gafa_stock %>%
filter(Symbol == "FB") %>%
mutate(trading_day = row_number()) %>%
update_tsibble(index = trading_day, regular = TRUE)
autoplot(Faceb, Close)
#b/c
Faceb %>%
model(Drift = RW(Close ~ drift())) %>%
forecast(h=100) %>%
autoplot(Faceb, level = NULL) +
geom_line(data = slice(Faceb, range(cumsum(!is.na(Close)))),
aes(y=Close), linetype = "dashed", color = "#0072B2") +
labs(title = "Face Book Closing Stock Price", y= "$")
#d
Faceb %>%
model(NAIVE(Close)) %>%
forecast(h =100) %>%
autoplot(Faceb, level = NULL) %>%
labs(title = "Face Book Closing Price Forecast")
## [[1]]
##
## $title
## [1] "Face Book Closing Price Forecast"
##
## attr(,"class")
## [1] "labels"
Faceb %>%
model(SNAIVE(Close ~ lag(50))) %>%
forecast(h = 100) %>%
autoplot(Faceb, level = NULL) +
labs(title = "Face Book Closing Price Forecast")
Faceb %>%
model(MEAN(Close)) %>%
forecast(h=50) %>%
autoplot(Faceb, level = NULL) %>%
labs(title = "Face Book CLosing Price Forecast")
## [[1]]
##
## $title
## [1] "Face Book CLosing Price Forecast"
##
## attr(,"class")
## [1] "labels"
#Predicting a stock is incredibly hard to do. There's no seasonality, the NAIVE looks awful, and the mean looks even worse. I would have to say the drift would be the most applicable to the FB stock. The drift utilizes a random walk, which is likely the best way to approach a stock price.
aus_livestock
## # A tsibble: 29,364 x 4 [1M]
## # Key: Animal, State [54]
## Month Animal State Count
## <mth> <fct> <fct> <dbl>
## 1 1976 Jul Bulls, bullocks and steers Australian Capital Territory 2300
## 2 1976 Aug Bulls, bullocks and steers Australian Capital Territory 2100
## 3 1976 Sep Bulls, bullocks and steers Australian Capital Territory 2100
## 4 1976 Oct Bulls, bullocks and steers Australian Capital Territory 1900
## 5 1976 Nov Bulls, bullocks and steers Australian Capital Territory 2100
## 6 1976 Dec Bulls, bullocks and steers Australian Capital Territory 1800
## 7 1977 Jan Bulls, bullocks and steers Australian Capital Territory 1800
## 8 1977 Feb Bulls, bullocks and steers Australian Capital Territory 1900
## 9 1977 Mar Bulls, bullocks and steers Australian Capital Territory 2700
## 10 1977 Apr Bulls, bullocks and steers Australian Capital Territory 2300
## # ... with 29,354 more rows
## # i Use `print(n = ...)` to see more rows
autoplot(aus_livestock)
## Plot variable not specified, automatically selected `.vars = Count`
aus_livestock %>%
filter(State == "Victoria" & (Animal == "Lambs" | Animal == "Pigs" | Animal == "Sheep"))%>%
model(SNAIVE(Count ~ lag("5 year"))) %>%
forecast(h = "5 years") %>%
autoplot(aus_livestock, level= NULL) +
labs(title = "Forecast of Lambs, Pigs, and Sheep in Victoria")
aus_livestock %>%
filter(State == "Victoria" & (Animal == "Calves" | Animal == "Cattle (excl. calves)" | Animal == "Cows and heifers" | Animal == "Bulls, bullocks and steers"))%>%
model(SNAIVE(Count ~ lag("5 year"))) %>%
forecast(h = "5 years") %>%
autoplot(aus_livestock, level= NULL) +
labs(title = "Forecast of Calves, Cattle, Cows and heifers, and Bulls, bullocks and steers in Victoria")
#I believe the seasonal NAIVE model works very well for these series beacause they demonstrate high amounts of seasonality.
True. The innovation residuals that forecasts yield should be normally distributed. It’s not necessary but certainly good if it is.
False, residuals need to be normally distributed with a mean of 0 in order for the forecast to be good not just “small”.
False, MAPE may be good for data sets with large amounts of data, but not as good for data sets with small amounts of data.
False, making the model more complicated will not necessarily make the forecast better. It’s more efficient to reevaluate the way you’re trying to forecast and use the classical assumptions.
True, depending on which measure of accuracy we’re using. If we’re looking at RMSE I would feel confident using the model that yields the lowest RMSE.
set.seed(19190)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
#a
myseries_train <- myseries %>%
filter(year(Month) < 2011)
#b
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, color = "orange")
#c
fit <- myseries_train %>%
model(SNAIVE(Turnover))
#d
fit %>% gg_tsresiduals()
## Warning: Removed 12 row(s) containing missing values (geom_path).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing non-finite values (stat_bin).
#The residuals do display features of a normal distribution but the tails especially the left one extends further than we would like. They also do not appear to be uncorrelated.
#e
fc <- fit %>%
forecast(new_data = anti_join(myseries, myseries_train))
## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")
fc %>% autoplot(myseries)
#f
fit %>% accuracy()
## # A tibble: 1 x 12
## State Indus~1 .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Australi~ Clothi~ SNAIV~ Trai~ 0.547 1.75 1.23 5.04 11.5 1 1 0.769
## # ... with abbreviated variable name 1: Industry
fc %>% accuracy(myseries)
## # A tibble: 1 x 12
## .model State Indus~1 .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Tu~ Aust~ Clothi~ Test 2.37 3.20 2.51 10.3 11.1 2.04 1.82 0.592
## # ... with abbreviated variable name 1: Industry
#a
aa <- aus_livestock
autoplot(aus_livestock)
## Plot variable not specified, automatically selected `.vars = Count`
#b
Austrain <- aus_livestock %>%
filter(State == "New South Wales" & Animal == "Pigs") %>%
slice(1:486)
autoplot(Austrain)
## Plot variable not specified, automatically selected `.vars = Count`
aus_fit <- Austrain %>%
model(
Mean = MEAN(Count),
Naive = NAIVE(Count),
Seasonal_naive = SNAIVE(Count),
Drift = RW(Count ~ drift()))
aus_fc <- aus_fit %>%
forecast(h=10)
aus_SNAIVE <- Austrain %>%
model(SNAIVE(Count))
aus_fc %>%
autoplot(Austrain, level = NULL) + labs( y= "Count", title = "Forecasts for Australian Animal Count") + guides(color = guide_legend(title = "Forecast"))
#The seasonal Naive method definitely fits the best and accounts for the seasonality. The mean would have to be the worst in my opinion. The drift and the Naive do not pick up on the seasonality of the data set.
accuracy(aus_fit)
## # A tibble: 4 x 12
## Animal State .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <fct> <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Pigs New Sout~ Mean Trai~ 2.42e-12 25389. 21496. -5.59 20.8 2.03 1.75
## 2 Pigs New Sout~ Naive Trai~ -3.98e+ 1 15324. 12171. -1.02 11.2 1.15 1.05
## 3 Pigs New Sout~ Seaso~ Trai~ -8.23e+ 2 14530. 10600 -1.83 10.1 1 1
## 4 Pigs New Sout~ Drift Trai~ 3.01e-12 15324. 12173. -0.985 11.2 1.15 1.05
## # ... with 1 more variable: ACF1 <dbl>
## # i Use `colnames()` to see all variable names
aus_SNAIVE %>% gg_tsresiduals()
## Warning: Removed 12 row(s) containing missing values (geom_path).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing non-finite values (stat_bin).
#They do not appear to resemble white noise.
proaus <- aus_production %>%
select(Quarter, Bricks) %>%
slice(1:198)
autoplot(proaus)
## Plot variable not specified, automatically selected `.vars = Bricks`
proaus %>%
model(classical_decomposition(Bricks ~ season(4), type = "multiplicative")) %>%
components() %>%
autoplot() +
labs(title = "Multiplicative Decomp Bricks")
## Warning: Removed 2 row(s) containing missing values (geom_path).
proaus %>%
model(classical_decomposition(Bricks ~ season(4), type = "additive")) %>%
components() %>%
autoplot() +
labs(title = "Multiplicative Decomp Bricks")
## Warning: Removed 2 row(s) containing missing values (geom_path).
proaus2 <- proaus %>%
model(STL(Bricks ~ trend(window=5) + season(window="periodic"), robust = TRUE)) %>%
components() %>% autoplot() +
labs(title = "STL decomposition of bricks (periodic)")
proaus2
stl_brick_fixed_st <- stl(proaus, s.window = "periodic", robust = TRUE)
regproaus <- proaus %>%
model(STL(Bricks ~ season(window=9), robust=TRUE)) %>%
components() %>% autoplot() +
labs(title = "STL decomposition of bricks")
regproaus
stl_brick_changing_st <- stl(proaus, s.window = 5, robust = TRUE)
#b
dcmp <- proaus %>%
model(stl = STL(Bricks))
Compdcmp <- components(dcmp)
Compdcmp %>%
as_tsibble() %>%
autoplot(Bricks, colour="gray") +
geom_line(aes(y=trend), colour = "#D55E00") +
labs( y = "Bricks", title = "Bricks Trend" )
components(dcmp) %>%
as_tsibble() %>%
autoplot(Bricks, colour = "gray") +
geom_line(aes(y=season_adjust), colour = "#0072B2") +
labs(y = "Bricks", title = "Bricks Seasonally adjusted")
#c
austrend <- Compdcmp %>%
select(-c(.model, Bricks, trend, season_year, remainder))
austrend %>%
model(NAIVE(season_adjust)) %>%
forecast(h = "5 years") %>%
autoplot(austrend) +
labs(title = "Seasonally adjusted naive forecast", y = "Bricks")
#d
auswall <- proaus %>%
model(stlf = decomposition_model(STL(Bricks ~ trend(window = 4), robust = TRUE), NAIVE(season_adjust)))
auswall %>%
forecast() %>%
autoplot(proaus) +
labs(title = "Decomposition with decomposition_model()")
#e
gg_tsresiduals(auswall)
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_bin).
gg_tsresiduals(dcmp)
#I would not say either residuals look uncorrelated.
#f
AUSbrick2year <- aus_production %>%
filter(year(Quarter) > 2003) %>%
select(-c(Beer, Tobacco, Cement, Electricity, Gas))
AUSBrickomit <- na.omit(AUSbrick2year)
AUSBrick2year_snaive <- AUSBrickomit %>%
model(SNAIVE(Bricks ~ lag("1 year"))) %>%
forecast(h = "2 years") %>%
autoplot(AUSBrickomit) +
labs(title = "Forecast with SNAIVE since 2004")
AUSBrick2year_snaive
AUSBrickdcmp2 <- AUSBrickomit %>%
model(stlf = decomposition_model(STL(Bricks ~ trend(window = 4), robust = TRUE), NAIVE(season_adjust)))
AUSBrickdcmp2 %>%
forecast() %>%
autoplot(AUSBrickomit) +
labs(title = "Forecast with decomposition_model() since 2004")
#The decomposition looks better than the seasonal NAIVE.
tourism
## # A tsibble: 24,320 x 5 [1Q]
## # Key: Region, State, Purpose [304]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
## 7 1999 Q3 Adelaide South Australia Business 169.
## 8 1999 Q4 Adelaide South Australia Business 134.
## 9 2000 Q1 Adelaide South Australia Business 154.
## 10 2000 Q2 Adelaide South Australia Business 169.
## # ... with 24,310 more rows
## # i Use `print(n = ...)` to see more rows
#a
gc_tourism <- tourism %>%
filter(Region == "Gold Coast") %>%
summarise(Trips = sum(Trips))
gc_train_1 <- gc_tourism %>%
filter(year(Quarter) <= 2016)
gc_train_2 <- gc_tourism %>%
filter(year(Quarter) <= 2015)
gc_train_3 <- gc_tourism %>%
filter(year(Quarter) <= 2014)
gc_fc_1 <- gc_train_1 %>%
model(SNAIVE(Trips ~ lag("year"))) %>%
forecast(h = "1 years")
gc_fc_1 %>%
autoplot(gc_train_1, level = NULL) +
labs(title = "SNAIVE forecast")
gc_fc_2 <- gc_train_2 %>%
model(SNAIVE(Trips ~ lag("year"))) %>%
forecast(h = "1 years")
gc_fc_2 %>%
autoplot(gc_train_2, level = NULL) +
labs(title = "SNAIVE forecast")
gc_fc_3 <- gc_train_3 %>%
model(SNAIVE(Trips ~ lag("year"))) %>%
forecast(h = "1 years")
gc_fc_3 %>%
autoplot(gc_train_3, level = NULL) +
labs(title = "SNAIVE forecast")
gc_fc_1 %>%
accuracy(gc_tourism)
## # A tibble: 1 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(Trips ~ lag(\"~ Test 75.1 167. 154. 6.36 15.1 2.66 2.36 -0.410
gc_fc_2 %>%
accuracy(gc_tourism)
## # A tibble: 1 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(Trips ~ lag(\"~ Test 12.0 43.1 39.5 1.14 4.32 0.670 0.599 -0.792
gc_fc_3 %>%
accuracy(gc_tourism)
## # A tibble: 1 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(Trips ~ lag(\"y~ Test 35.8 91.4 83.9 3.56 9.07 1.46 1.30 0.239
# gc_fc_2 by far had the best MAPE with 4.32, compared to gc_fc_1 (15.1) and gc_fc_3 (9.07). I found online that a forecast with a MAPE lower than 5% is "acceptably accurate".
apple <- gafa_stock %>%
filter(Symbol == "AAPL") %>%
mutate(trading_day = row_number()) %>%
update_tsibble(index = trading_day, regular = TRUE)
apple_stretch <- apple %>%
stretch_tsibble(.init = 10, .step = 1) %>%
relocate(Date, Symbol, .id)
crossvali <- apple_stretch %>%
model(RW(Close ~ drift())) %>%
forecast(h=1) %>%
accuracy(apple)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 1259
crossvalimean <- apple_stretch %>%
model(MEAN(Close)) %>%
forecast(h=1) %>%
accuracy(apple)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 1259
crossvali
## # A tibble: 1 x 11
## .model Symbol .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 RW(Close ~ ~ AAPL Test -0.0158 2.10 1.41 -0.0139 1.06 1.00 1.00 0.0330
crossvalimean
## # A tibble: 1 x 11
## .model Symbol .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MEAN(Close) AAPL Test 26.9 37.6 28.5 16.9 18.6 20.2 17.9 0.996
#The drift has a significantly lower RMSE than the mean model does, so I would say the drift is more accurate. A mean model does not really make sense for stocks.
#a
prices2 <- prices %>%
na.omit()
prices2 %>%
autoplot(wheat)
#b
price_fit <- prices2 %>%
model(RW(log(wheat) ~ drift()))
price_fc <- price_fit %>%
forecast(h=50)
price_fc %>% autoplot(prices2) +
labs(title = "RW Forecast of wheat with Drift")
#c
wheatbs <- price_fit %>% forecast(h=50, bootstrap = TRUE, times=500)
wheatbs
## # A fable: 50 x 4 [1Y]
## # Key: .model [1]
## .model year wheat .mean
## <chr> <dbl> <dist> <dbl>
## 1 RW(log(wheat) ~ drift()) 1994 t(sample[500]) 112.
## 2 RW(log(wheat) ~ drift()) 1995 t(sample[500]) 112.
## 3 RW(log(wheat) ~ drift()) 1996 t(sample[500]) 112.
## 4 RW(log(wheat) ~ drift()) 1997 t(sample[500]) 111.
## 5 RW(log(wheat) ~ drift()) 1998 t(sample[500]) 111.
## 6 RW(log(wheat) ~ drift()) 1999 t(sample[500]) 111.
## 7 RW(log(wheat) ~ drift()) 2000 t(sample[500]) 111.
## 8 RW(log(wheat) ~ drift()) 2001 t(sample[500]) 111.
## 9 RW(log(wheat) ~ drift()) 2002 t(sample[500]) 112.
## 10 RW(log(wheat) ~ drift()) 2003 t(sample[500]) 111.
## # ... with 40 more rows
## # i Use `print(n = ...)` to see more rows
autoplot(wheatbs, prices2)
#d
bootstrap <- price_fit %>% generate (h=50,times=500, bootstrap=TRUE)
bootstrap
## # A tsibble: 25,000 x 4 [1Y]
## # Key: .model, .rep [500]
## .model year .rep .sim
## <chr> <dbl> <chr> <dbl>
## 1 RW(log(wheat) ~ drift()) 1994 1 117.
## 2 RW(log(wheat) ~ drift()) 1995 1 123.
## 3 RW(log(wheat) ~ drift()) 1996 1 126.
## 4 RW(log(wheat) ~ drift()) 1997 1 121.
## 5 RW(log(wheat) ~ drift()) 1998 1 123.
## 6 RW(log(wheat) ~ drift()) 1999 1 117.
## 7 RW(log(wheat) ~ drift()) 2000 1 114.
## 8 RW(log(wheat) ~ drift()) 2001 1 106.
## 9 RW(log(wheat) ~ drift()) 2002 1 112.
## 10 RW(log(wheat) ~ drift()) 2003 1 127.
## # ... with 24,990 more rows
## # i Use `print(n = ...)` to see more rows
prices2 %>%
ggplot(aes(x = year)) +
geom_line(aes(y = wheat)) +
geom_line(aes(y = .sim, color = as.factor(.rep)),
data = bootstrap) +
guides(color = "none")
#e) When residuals are uncorrelated and have constant variance.