library(dplyr)
library(ggplot2)
library(tidyr)
library(tibble)
library(tsibble)
library(ggfortify)
library(tidyverse)
library(fpp3)
library(moments)
library(zoo)
library(fable)
library(readxl)
library(seasonal)
library(caTools)
AUSpop <- global_economy %>%
select(Country, Year, Population) %>%
filter(Country == "Australia")
AUSpoppl <- AUSpop %>%
autoplot(Population/1e3) +
labs(y = "Population (Thousands)" )
AUSpoppl
AUSgepl <- global_economy %>%
filter(Country == "Australia") %>%
model(RW(Population ~ drift())) %>%
forecast(h = 15) %>%
autoplot(global_economy) +
labs(title = "Australia Population")
AUSgepl
#drift
#1 pt 2
AUSbricks <- aus_production %>%
select(Quarter, Bricks)
autoplot(AUSbricks)
## Plot variable not specified, automatically selected `.vars = Bricks`
## Warning: Removed 20 row(s) containing missing values (geom_path).
AUSbrickpl <- aus_production %>%
filter(!is.na(Bricks)) %>%
model(SNAIVE(Bricks)) %>%
forecast(h = 15) %>%
autoplot(aus_production) +
labs(title = "Australian Bricks")
AUSbrickpl
## Warning: Removed 20 row(s) containing missing values (geom_path).
#SNAIVE
#1 pt 3
AUSlamb <- aus_livestock %>%
filter(Animal == "Lambs") %>%
filter(State == "New South Wales")
autoplot(AUSlamb)
## Plot variable not specified, automatically selected `.vars = Count`
AUSlambpl <- aus_livestock %>%
filter(State == "New South Wales", Animal == "Lambs") %>%
model(NAIVE(Count)) %>%
forecast(h = 15) %>%
autoplot(aus_livestock) +
labs(title = "Lambs in New South Wales")
AUSlambpl
#NAIVE
#1 pt 4
HHAUS <- hh_budget %>%
select(Country, Year, Wealth) %>%
filter(Country == "Australia")
autoplot(HHAUS)
## Plot variable not specified, automatically selected `.vars = Wealth`
HHCAD <- hh_budget %>%
select(Country, Year, Wealth) %>%
filter(Country == "Canada")
autoplot(HHCAD)
## Plot variable not specified, automatically selected `.vars = Wealth`
HHUSA <- hh_budget %>%
select(Country, Year, Wealth) %>%
filter(Country == "USA")
autoplot(HHUSA)
## Plot variable not specified, automatically selected `.vars = Wealth`
HHJAP <- hh_budget %>%
select(Country, Year, Wealth) %>%
filter(Country == "Japan")
autoplot(HHJAP)
## Plot variable not specified, automatically selected `.vars = Wealth`
HHwealpl <- hh_budget %>%
model(RW(Wealth ~ drift())) %>%
forecast(h = 15) %>%
autoplot(hh_budget) +
labs(title = "Household Wealth")
HHwealpl
#Drift
#1 pt 5
FoodTO <- aus_retail %>%
select(Month, State, Industry, Turnover) %>%
filter(Industry == "Takeaway food services")
autoplot(FoodTO)
## Plot variable not specified, automatically selected `.vars = Turnover`
AUStakeoutpl <- aus_retail %>%
filter(Industry == "Takeaway food services") %>%
model(RW(Turnover ~ drift())) %>%
forecast(h = 15) %>%
autoplot(aus_retail) +
labs(title = "Australian Takeaway Food Turnover") +
facet_wrap(~State, scales = "free")
AUStakeoutpl
#Drift
FBClose <- gafa_stock %>%
filter(Symbol == "FB") %>%
mutate(day = row_number()) %>%
update_tsibble(index = day, regular = TRUE)
FBClosepl <- FBClose %>%
autoplot(Close) +
labs(title= "Close Prices of Facebook", x = "Days", y = "$$$")
FBClosepl
#2 pt 2
FBdrift <- FBClose %>%
model(RW(Close ~ drift())) %>%
forecast(h = 150) %>%
autoplot(FBClose) +
labs(title = "Close Prices of Facebook", x = "Days", y = "$$$")
FBdrift
#2 pt 3
FBdriftwline <- FBClose %>%
model(RW(Close ~ drift())) %>%
forecast(h = 150) %>%
autoplot(FBClose) +
geom_line(data = slice(FBClose, range(cumsum(!is.na(Close)))), aes(y=Close), linetype = 'dashed') +
labs(title = "Close Prices of Facebook", x = "Days", y = "$$$")
FBdriftwline
#2 pt 4
FBall <- FBClose %>%
model(Mean = MEAN(Close),
Naive = NAIVE(Close),
Drift = NAIVE(Close ~ drift())) %>%
forecast(h = 150) %>%
autoplot(FBClose) +
labs(title = "Close Prices of Facebook", y = "$$$")
FBall
Drift works the best of the methods. All three of the methods are very similar, but I feel like the drift best captures the upward trend.
I think this is a good benchmark for this series. The data is very
seasonal so this model is a very good choice.
1 True (Classical assumption) 2 False (This isn’t always correct. If the residuals are all very small positive numbers, the mean of the residuals wouldn’t be zero. The mean of the residuals needs to be zero.) 3 False (MAPE doesn’t work well with datasets with a relatively small amount of data) 4 False (Making the model more complicated only makes it harder for you, we will use the classical assumptions to find the issue.) 5 True (The test set is what you use to determine accuracy)
set.seed(69420)
AUSre <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
#5 pt 2
AUSretrain <- AUSre %>%
filter(year(Month) < 2011)
autoplot(AUSre, Turnover) +
autolayer(AUSretrain, Turnover, colour = "blue")
#5 pt 3
AUSfit <- AUSretrain %>%
model(SNAIVE(Turnover))
AUSfit
## # A mable: 1 x 3
## # Key: State, Industry [1]
## State Industry SNAIVE…¹
## <chr> <chr> <model>
## 1 Australian Capital Territory Pharmaceutical, cosmetic and toiletry g… <SNAIVE>
## # … with abbreviated variable name ¹`SNAIVE(Turnover)`
#5 pt 4
AUSfit %>% gg_tsresiduals()
## Warning: Removed 12 row(s) containing missing values (geom_path).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing non-finite values (stat_bin).
#5 pt 5
AUStestdata <- AUSfit %>%
forecast(new_data = anti_join(AUSre, AUSretrain))
## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")
AUStestdata %>% autoplot(AUSre)
#5 pt 6
AUStestcheck <- AUStestdata %>% accuracy(AUSre)
AUStestcheck
## # A tibble: 1 × 12
## .model State Indus…¹ .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Tu… Aust… Pharma… Test 0.633 2.91 2.28 1.48 14.6 1.59 1.48 0.884
## # … with abbreviated variable name ¹Industry
AUGpigs <- aus_livestock %>%
filter(Animal == "Pigs", State == "New South Wales")
autoplot(AUGpigs)
## Plot variable not specified, automatically selected `.vars = Count`
#6 pt 2
Trainpigs <- AUGpigs %>%
slice(1:486)
Trainpigs
## # A tsibble: 486 x 4 [1M]
## # Key: Animal, State [1]
## Month Animal State Count
## <mth> <fct> <fct> <dbl>
## 1 1972 Jul Pigs New South Wales 97400
## 2 1972 Aug Pigs New South Wales 114700
## 3 1972 Sep Pigs New South Wales 109900
## 4 1972 Oct Pigs New South Wales 108300
## 5 1972 Nov Pigs New South Wales 122200
## 6 1972 Dec Pigs New South Wales 106900
## 7 1973 Jan Pigs New South Wales 96600
## 8 1973 Feb Pigs New South Wales 96700
## 9 1973 Mar Pigs New South Wales 121200
## 10 1973 Apr Pigs New South Wales 99300
## # … with 476 more rows
Testpigs <- AUGpigs %>%
slice(487:558)
Testpigs
## # A tsibble: 72 x 4 [1M]
## # Key: Animal, State [1]
## Month Animal State Count
## <mth> <fct> <fct> <dbl>
## 1 2013 Jan Pigs New South Wales 71300
## 2 2013 Feb Pigs New South Wales 69700
## 3 2013 Mar Pigs New South Wales 79900
## 4 2013 Apr Pigs New South Wales 74300
## 5 2013 May Pigs New South Wales 87200
## 6 2013 Jun Pigs New South Wales 71200
## 7 2013 Jul Pigs New South Wales 86200
## 8 2013 Aug Pigs New South Wales 78000
## 9 2013 Sep Pigs New South Wales 71200
## 10 2013 Oct Pigs New South Wales 78200
## # … with 62 more rows
#6 pt 3
Fitpig <- Trainpigs %>%
model( Mean = MEAN(Count), `Naïve` = NAIVE(Count), `Seasonal naïve` = SNAIVE(Count), Drift = RW(Count ~ drift()))
Pigforecast <- Fitpig %>%
forecast(h = 72)
Piggyfc <- Pigforecast %>%
autoplot((AUGpigs),level = NULL) +
labs(y = "Count", title = "Forecasts Pigs") +
guides(colour = guide_legend(title = "Forecast"))
Piggyfc
Accpigs <- accuracy(Pigforecast, AUGpigs)
Accpigs
## # A tibble: 4 × 12
## .model Animal State .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <fct> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Drift Pigs New … Test -4685. 8091. 6967. -7.36 10.1 0.657 0.557
## 2 Mean Pigs New … Test -39360. 39894. 39360. -55.9 55.9 3.71 2.75
## 3 Naïve Pigs New … Test -6138. 8941. 7840. -9.39 11.4 0.740 0.615
## 4 Seasonal na… Pigs New … Test -5838. 10111. 8174. -8.81 11.9 0.771 0.696
## # … with 1 more variable: ACF1 <dbl>
#6 pt 4
MPigs <- Trainpigs %>%
model( Mean = MEAN(Count))
NPigs <- Trainpigs %>%
model( Naive = NAIVE(Count))
SNPigs <- Trainpigs %>%
model(`Seasonal naïve` = SNAIVE(Count))
DPigs <- Trainpigs %>%
model( Drift = RW(Count ~ drift()))
MPigs %>%
gg_tsresiduals() +
labs( title = "Mean Method Residuals")
NPigs %>%
gg_tsresiduals() +
labs( title = "Naive Model Residuals")
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_bin).
SNPigs %>%
gg_tsresiduals() +
labs( title = "Seasonal Naive Residuals")
## Warning: Removed 12 row(s) containing missing values (geom_path).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing non-finite values (stat_bin).
DPigs %>%
gg_tsresiduals() +
labs( title = "Drift Method Residuals")
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_bin).
The seasonal naive method is the best method for this data. My residuals look very close to white noise. It seems like there may be something causing the residuals to be slightly off from white noise.
Bteam <- aus_production %>%
select(Quarter, Bricks) %>%
slice(1:198)
autoplot(Bteam)
## Plot variable not specified, automatically selected `.vars = Bricks`
Bteam %>%
model(STL(box_cox(Bricks,0) ~ season(window=9), robust=TRUE)) %>%
components() %>% autoplot() +
labs(title = "STL decomposition of bricks multiplictive")
Bteam %>%
model(STL(box_cox(Bricks,0) ~ season(window="periodic"), robust=TRUE)) %>%
components() %>% autoplot() +
labs(title = "STL decomposition of bricks multiplictive (periodic)")
Bteam %>%
model(STL(box_cox(Bricks,1) ~ season(window=9), robust=TRUE)) %>%
components() %>% autoplot() +
labs(title = "STL decomposition of bricks additive")
Bteam %>%
model(STL(box_cox(Bricks,1) ~ season(window="periodic"), robust=TRUE)) %>%
components() %>% autoplot() +
labs(title = "STL decomposition of bricks additive (periodic")
fixedBteam <- Bteam %>%
model(STL(Bricks ~ trend(window=5) + season(window="periodic"), robust = TRUE)) %>%
components() %>% autoplot() +
labs(title = "STL decomposition of bricks (periodic)")
fixedBteam
stl_brick_fixed_st <- stl(Bteam, s.window = "periodic", robust = TRUE)
RegBteam <- Bteam %>%
model(STL(Bricks ~ season(window=9), robust=TRUE)) %>%
components() %>% autoplot() +
labs(title = "STL decomposition of bricks")
RegBteam
stl_brick_changing_st <- stl(Bteam, s.window = 5, robust = TRUE)
#7 pt 2
dcmp <- Bteam %>%
model(stl = STL(Bricks))
Compdcmp <- components(dcmp)
Compdcmp %>%
as_tsibble() %>%
autoplot(Bricks, colour="gray") +
geom_line(aes(y=trend), colour = "#D55E00") +
labs( y = "Bricks", title = "Bricks Trend" )
components(dcmp) %>%
as_tsibble() %>%
autoplot(Bricks, colour = "gray") +
geom_line(aes(y=season_adjust), colour = "#0072B2") +
labs(y = "Bricks", title = "Bricks Seasonally adjusted")
#7 pt 3
AUStrend <- Compdcmp %>%
select(-c(.model, Bricks, trend, season_year, remainder))
AUStrend %>%
model(NAIVE(season_adjust)) %>%
forecast(h = "5 years") %>%
autoplot(AUStrend) +
labs(title = "Seasonally adjusted naive forecast", y = "Bricks")
#7 pt 4
AUSwall <- Bteam %>%
model(stlf = decomposition_model(STL(Bricks ~ trend(window = 4), robust = TRUE), NAIVE(season_adjust)))
AUSwall %>%
forecast() %>%
autoplot(Bteam) +
labs(title = "Decomposition with decomposition_model()")
#7 pt 5
gg_tsresiduals(AUSwall)
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_bin).
gg_tsresiduals(dcmp)
#7 pt 6
AUSbrick2year <- aus_production %>%
filter(year(Quarter) > 2003) %>%
select(-c(Beer, Tobacco, Cement, Electricity, Gas))
AUSBrickomit <- na.omit(AUSbrick2year)
AUSBrick2year_snaive <- AUSBrickomit %>%
model(SNAIVE(Bricks ~ lag("1 year"))) %>%
forecast(h = "2 years") %>%
autoplot(AUSBrickomit) +
labs(title = "Forecast with SNAIVE since 2004")
AUSBrick2year_snaive
AUSBrickdcmp2 <- AUSBrickomit %>%
model(stlf = decomposition_model(STL(Bricks ~ trend(window = 4), robust = TRUE), NAIVE(season_adjust)))
AUSBrickdcmp2 %>%
forecast() %>%
autoplot(AUSBrickomit) +
labs(title = "Forecast with decomposition_model() since 2004")
The one using the decomposition model () was the best for this data. This is true because the Decomposition model accounts for periods in the future being much different from the current periods. The SNAIVE model has slight bias because the periods are very similar from those in the past. The decomposition model uses trend as well.
tourism
## # A tsibble: 24,320 x 5 [1Q]
## # Key: Region, State, Purpose [304]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
## 7 1999 Q3 Adelaide South Australia Business 169.
## 8 1999 Q4 Adelaide South Australia Business 134.
## 9 2000 Q1 Adelaide South Australia Business 154.
## 10 2000 Q2 Adelaide South Australia Business 169.
## # … with 24,310 more rows
GCtourism <- tourism %>%
filter(Region == "Gold Coast") %>%
summarise(Trips = sum(Trips))
gctrain1 <- GCtourism %>%
filter(year(Quarter) <= 2016)
gctrain2 <- GCtourism %>%
filter(year(Quarter) <= 2015)
gctrain3 <- GCtourism %>%
filter(year(Quarter) <= 2014)
gcfc1 <- gctrain1 %>%
model(SNAIVE(Trips ~ lag("year"))) %>%
forecast(h = "1 years")
gcfc1 %>%
autoplot(gctrain1, level = NULL) +
labs(title = "SNAIVE forecast")
gcfc2 <- gctrain2 %>%
model(SNAIVE(Trips ~ lag("year"))) %>%
forecast(h = "1 years")
gcfc2 %>%
autoplot(gctrain2, level = NULL) +
labs(title = "SNAIVE forecast")
gcfc3 <- gctrain3 %>%
model(SNAIVE(Trips ~ lag("year"))) %>%
forecast(h = "1 years")
gcfc3 %>%
autoplot(gctrain3, level = NULL) +
labs(title = "SNAIVE forecast")
gcfc1 %>%
accuracy(GCtourism)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(Trips ~ lag(\"… Test 75.1 167. 154. 6.36 15.1 2.66 2.36 -0.410
gcfc2 %>%
accuracy(GCtourism)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(Trips ~ lag(\"… Test 12.0 43.1 39.5 1.14 4.32 0.670 0.599 -0.792
gcfc3 %>%
accuracy(GCtourism)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "SNAIVE(Trips ~ lag(\"y… Test 35.8 91.4 83.9 3.56 9.07 1.46 1.30 0.239
gcfc2 by far had the best MAPE with 4.32, compared to gcfc1 (15.1) and gcfc3 (9.07). A MAPE lower than 5% is “acceptably accurate”.
Steve <- gafa_stock %>%
filter(Symbol == "AAPL") %>%
mutate(trading_day = row_number()) %>%
update_tsibble(index = trading_day, regular = TRUE)
appstretch <- Steve %>%
stretch_tsibble(.init = 10, .step = 1) %>%
relocate(Date, Symbol, .id)
tval <- appstretch %>%
model(RW(Close ~ drift())) %>%
forecast(h=1) %>%
accuracy(Steve)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 1259
tvalmean <- appstretch %>%
model(MEAN(Close)) %>%
forecast(h=1) %>%
accuracy(Steve)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 1259
tval
## # A tibble: 1 × 11
## .model Symbol .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 RW(Close ~ … AAPL Test -0.0158 2.10 1.41 -0.0139 1.06 1.00 1.00 0.0330
tvalmean
## # A tibble: 1 × 11
## .model Symbol .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MEAN(Close) AAPL Test 26.9 37.6 28.5 16.9 18.6 20.2 17.9 0.996
prices2 <- prices %>%
na.omit()
prices2 %>%
autoplot(wheat)
price_fit <- prices2 %>%
model(RW(log(wheat) ~ drift()))
price_fc <- price_fit %>%
forecast(h=50)
price_fc %>% autoplot(prices2) +
labs(title = "RW Forecast of wheat with Drift")
wheatbs <- price_fit %>% forecast(h=50, bootstrap = TRUE, times=500)
wheatbs
## # A fable: 50 x 4 [1Y]
## # Key: .model [1]
## .model year wheat .mean
## <chr> <dbl> <dist> <dbl>
## 1 RW(log(wheat) ~ drift()) 1994 t(sample[500]) 113.
## 2 RW(log(wheat) ~ drift()) 1995 t(sample[500]) 111.
## 3 RW(log(wheat) ~ drift()) 1996 t(sample[500]) 110.
## 4 RW(log(wheat) ~ drift()) 1997 t(sample[500]) 110.
## 5 RW(log(wheat) ~ drift()) 1998 t(sample[500]) 109.
## 6 RW(log(wheat) ~ drift()) 1999 t(sample[500]) 108.
## 7 RW(log(wheat) ~ drift()) 2000 t(sample[500]) 107.
## 8 RW(log(wheat) ~ drift()) 2001 t(sample[500]) 106.
## 9 RW(log(wheat) ~ drift()) 2002 t(sample[500]) 105.
## 10 RW(log(wheat) ~ drift()) 2003 t(sample[500]) 105.
## # … with 40 more rows
autoplot(wheatbs, prices2)
bootstrap <- price_fit %>% generate (h=50,times=500, bootstrap=TRUE)
bootstrap
## # A tsibble: 25,000 x 4 [1Y]
## # Key: .model, .rep [500]
## .model year .rep .sim
## <chr> <dbl> <chr> <dbl>
## 1 RW(log(wheat) ~ drift()) 1994 1 80.1
## 2 RW(log(wheat) ~ drift()) 1995 1 74.0
## 3 RW(log(wheat) ~ drift()) 1996 1 76.9
## 4 RW(log(wheat) ~ drift()) 1997 1 75.0
## 5 RW(log(wheat) ~ drift()) 1998 1 75.5
## 6 RW(log(wheat) ~ drift()) 1999 1 53.4
## 7 RW(log(wheat) ~ drift()) 2000 1 46.4
## 8 RW(log(wheat) ~ drift()) 2001 1 45.6
## 9 RW(log(wheat) ~ drift()) 2002 1 80.9
## 10 RW(log(wheat) ~ drift()) 2003 1 86.3
## # … with 24,990 more rows
prices2 %>%
ggplot(aes(x = year)) +
geom_line(aes(y = wheat)) +
geom_line(aes(y = .sim, color = as.factor(.rep)),
data = bootstrap) +
guides(color = "none")
When a normal distribution is an unreasonable assumption, and when residuals are uncorrelated with constant variance.