library(fpp3)
## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --
## v tibble 3.1.4 v tsibble 1.0.1
## v dplyr 1.0.7 v tsibbledata 0.3.0
## v tidyr 1.1.3 v feasts 0.2.2
## v lubridate 1.7.10 v fable 0.3.1
## v ggplot2 3.3.5
## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x tsibble::intersect() masks base::intersect()
## x tsibble::interval() masks lubridate::interval()
## x dplyr::lag() masks stats::lag()
## x tsibble::setdiff() masks base::setdiff()
## x tsibble::union() masks base::union()
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v readr 2.0.1 v stringr 1.4.0
## v purrr 0.3.4 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x tsibble::intersect() masks lubridate::intersect(), base::intersect()
## x tsibble::interval() masks lubridate::interval()
## x dplyr::lag() masks stats::lag()
## x tsibble::setdiff() masks lubridate::setdiff(), base::setdiff()
## x tsibble::union() masks lubridate::union(), base::union()
library(dplyr)
library(latex2exp)
library(ggplot2)
library(fabletools)
Australian Population (global_economy)
global_economy %>%
filter(Country=="Australia") %>%
autoplot(Population)
Australia<-global_economy %>%
filter(Country=="Australia")
#fit
Aus_fit <- Australia %>%
model(
Drift= NAIVE(Population~drift())
)
Aus_fit
## # A mable: 1 x 2
## # Key: Country [1]
## Country Drift
## <fct> <model>
## 1 Australia <RW w/ drift>
# Generate forecasts for 14 periods
Aus_fit %>% forecast(h = 14)
## # A fable: 14 x 5 [1Y]
## # Key: Country, .model [1]
## Country .model Year Population .mean
## <fct> <chr> <dbl> <dist> <dbl>
## 1 Australia Drift 2018 N(2.5e+07, 6e+09) 24850204.
## 2 Australia Drift 2019 N(2.5e+07, 1.2e+10) 25101475.
## 3 Australia Drift 2020 N(2.5e+07, 1.9e+10) 25352746.
## 4 Australia Drift 2021 N(2.6e+07, 2.5e+10) 25604018.
## 5 Australia Drift 2022 N(2.6e+07, 3.2e+10) 25855289.
## 6 Australia Drift 2023 N(2.6e+07, 3.9e+10) 26106560.
## 7 Australia Drift 2024 N(2.6e+07, 4.7e+10) 26357831.
## 8 Australia Drift 2025 N(2.7e+07, 5.4e+10) 26609102.
## 9 Australia Drift 2026 N(2.7e+07, 6.2e+10) 26860373.
## 10 Australia Drift 2027 N(2.7e+07, 7e+10) 27111645.
## 11 Australia Drift 2028 N(2.7e+07, 7.8e+10) 27362916.
## 12 Australia Drift 2029 N(2.8e+07, 8.6e+10) 27614187.
## 13 Australia Drift 2030 N(2.8e+07, 9.5e+10) 27865458.
## 14 Australia Drift 2031 N(2.8e+07, 1e+11) 28116729.
# Plot forecasts against actual values
Aus_fit %>%
forecast(h = 14)%>%
autoplot(Australia)
DISCUSSION: Australian population forecast is strictly increasing, the drift method appropriately forecasts most effectively.
Bricks (aus_production)
aus_production %>%
autoplot(Bricks)
## Warning: Removed 20 row(s) containing missing values (geom_path).
Brick1<-aus_production %>%
filter(!is.na(Bricks))
#fit
Brick1_fit <- Brick1 %>%
model(
SNAIVE(Bricks ~ lag("year") )
)
Brick1_fit
## # A mable: 1 x 1
## `SNAIVE(Bricks ~ lag("year"))`
## <model>
## 1 <SNAIVE>
# Generate forecasts for 4 periods 1 year
Brick1_fit %>% forecast(h = 4)
## # A fable: 4 x 4 [1Q]
## # Key: .model [1]
## .model Quarter Bricks .mean
## <chr> <qtr> <dist> <dbl>
## 1 "SNAIVE(Bricks ~ lag(\"year\"))" 2005 Q3 N(428, 2336) 428
## 2 "SNAIVE(Bricks ~ lag(\"year\"))" 2005 Q4 N(397, 2336) 397
## 3 "SNAIVE(Bricks ~ lag(\"year\"))" 2006 Q1 N(355, 2336) 355
## 4 "SNAIVE(Bricks ~ lag(\"year\"))" 2006 Q2 N(435, 2336) 435
# Plot forecasts against actual values
Brick1_fit %>%
forecast(h = 4)%>%
autoplot(Brick1)
The seasonal model captures the seasonality and is appropriate. However, a second seasonalilty of 10 years can also be seen.
NSW Lambs (aus_livestock)
aus_livestock %>%
distinct(State)
## # A tibble: 8 x 1
## State
## <fct>
## 1 Australian Capital Territory
## 2 New South Wales
## 3 Northern Territory
## 4 Queensland
## 5 South Australia
## 6 Tasmania
## 7 Victoria
## 8 Western Australia
aus_livestock %>%
distinct(Animal)
## # A tibble: 7 x 1
## Animal
## <fct>
## 1 Bulls, bullocks and steers
## 2 Calves
## 3 Cattle (excl. calves)
## 4 Cows and heifers
## 5 Lambs
## 6 Pigs
## 7 Sheep
aus_livestock %>%
filter(State=="New South Wales" & Animal=="Lambs") %>%
autoplot(Count)
Lamb1<-aus_livestock %>%
filter(State=="New South Wales" & Animal=="Lambs")
#fit
Lamb1fit <- Lamb1 %>%
model(
Mean = MEAN(Count),
`Seasonal Naïve` = SNAIVE(Count),
`Naïve` = NAIVE(Count),
Drift= NAIVE(Count~drift())
)
Lamb1fit
## # A mable: 1 x 6
## # Key: Animal, State [1]
## Animal State Mean `Seasonal Naïve` Naïve Drift
## <fct> <fct> <model> <model> <model> <model>
## 1 Lambs New South Wales <MEAN> <SNAIVE> <NAIVE> <RW w/ drift>
# Generate forecasts for 14 periods
Lamb1fit %>% forecast(h = 14)
## # A fable: 56 x 6 [1M]
## # Key: Animal, State, .model [4]
## Animal State .model Month Count .mean
## <fct> <fct> <chr> <mth> <dist> <dbl>
## 1 Lambs New South Wales Mean 2019 Jan N(4e+05, 5.3e+09) 404667.
## 2 Lambs New South Wales Mean 2019 Feb N(4e+05, 5.3e+09) 404667.
## 3 Lambs New South Wales Mean 2019 Mar N(4e+05, 5.3e+09) 404667.
## 4 Lambs New South Wales Mean 2019 Apr N(4e+05, 5.3e+09) 404667.
## 5 Lambs New South Wales Mean 2019 May N(4e+05, 5.3e+09) 404667.
## 6 Lambs New South Wales Mean 2019 Jun N(4e+05, 5.3e+09) 404667.
## 7 Lambs New South Wales Mean 2019 Jul N(4e+05, 5.3e+09) 404667.
## 8 Lambs New South Wales Mean 2019 Aug N(4e+05, 5.3e+09) 404667.
## 9 Lambs New South Wales Mean 2019 Sep N(4e+05, 5.3e+09) 404667.
## 10 Lambs New South Wales Mean 2019 Oct N(4e+05, 5.3e+09) 404667.
## # ... with 46 more rows
# Plot forecasts against actual values
Lamb1fit %>%
forecast(h = 14)%>%
autoplot(Lamb1)
Because of the downward then steady trend, I believe the naive would be appropriate with arguably mean model as a contender.
Household wealth (hh_budget).
#lets look at Australian household wealth
hh_budget %>%
filter(Country=="Australia") %>%
autoplot(Wealth)
Australia1<-hh_budget %>%
filter(Country=="Australia")
#fit
AusW_fit <- Australia1 %>%
model(
Mean = MEAN(Wealth),
`Naïve` = NAIVE(Wealth),
`Seasonal Naïve` = SNAIVE(Wealth),
Drift= NAIVE(Wealth~drift())
)
## Warning: 1 error encountered for Seasonal Naïve
## [1] Non-seasonal model specification provided, use RW() or provide a different lag specification.
AusW_fit
## # A mable: 1 x 5
## # Key: Country [1]
## Country Mean Naïve `Seasonal Naïve` Drift
## <chr> <model> <model> <model> <model>
## 1 Australia <MEAN> <NAIVE> <NULL model> <RW w/ drift>
# Generate forecasts for 14 periods
AusW_fit %>% forecast(h = 14)
## # A fable: 56 x 5 [1Y]
## # Key: Country, .model [4]
## Country .model Year Wealth .mean
## <chr> <chr> <dbl> <dist> <dbl>
## 1 Australia Mean 2017 N(359, 1018) 359.
## 2 Australia Mean 2018 N(359, 1018) 359.
## 3 Australia Mean 2019 N(359, 1018) 359.
## 4 Australia Mean 2020 N(359, 1018) 359.
## 5 Australia Mean 2021 N(359, 1018) 359.
## 6 Australia Mean 2022 N(359, 1018) 359.
## 7 Australia Mean 2023 N(359, 1018) 359.
## 8 Australia Mean 2024 N(359, 1018) 359.
## 9 Australia Mean 2025 N(359, 1018) 359.
## 10 Australia Mean 2026 N(359, 1018) 359.
## # ... with 46 more rows
# Plot forecasts against actual values
AusW_fit %>%
forecast(h = 14)%>%
autoplot(Australia1)
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 14 row(s) containing missing values (geom_path).
There is a pattern of increasing, decreasing and increasing. The uncertainty leads to a Naive model being sound.
Australian takeaway food turnover (aus_retail).
#lets look at Australian Capital Territory
aus_retail%>%
distinct(State)
## # A tibble: 8 x 1
## State
## <chr>
## 1 Australian Capital Territory
## 2 New South Wales
## 3 Northern Territory
## 4 Queensland
## 5 South Australia
## 6 Tasmania
## 7 Victoria
## 8 Western Australia
aus_retail%>%
distinct(Industry)
## # A tibble: 20 x 1
## Industry
## <chr>
## 1 Cafes, restaurants and catering services
## 2 Cafes, restaurants and takeaway food services
## 3 Clothing retailing
## 4 Clothing, footwear and personal accessory retailing
## 5 Department stores
## 6 Electrical and electronic goods retailing
## 7 Food retailing
## 8 Footwear and other personal accessory retailing
## 9 Furniture, floor coverings, houseware and textile goods retailing
## 10 Hardware, building and garden supplies retailing
## 11 Household goods retailing
## 12 Liquor retailing
## 13 Newspaper and book retailing
## 14 Other recreational goods retailing
## 15 Other retailing
## 16 Other retailing n.e.c.
## 17 Other specialised food retailing
## 18 Pharmaceutical, cosmetic and toiletry goods retailing
## 19 Supermarket and grocery stores
## 20 Takeaway food services
aus_retail %>%
filter(State=="Australian Capital Territory" & Industry=="Takeaway food services") %>%
autoplot(Turnover)
Food<-aus_retail %>%
filter(State=="Australian Capital Territory" & Industry=="Takeaway food services")
#fit
Food_fit <- Food %>%
model(
`Naïve` = NAIVE(Turnover),
`Seasonal Naïve` = SNAIVE(Turnover),
Drift= NAIVE(Turnover~drift())
)
Food_fit
## # A mable: 1 x 5
## # Key: State, Industry [1]
## State Industry Naïve `Seasonal Naïve` Drift
## <chr> <chr> <model> <model> <model>
## 1 Australian Capital Territory Takeaway ~ <NAIVE> <SNAIVE> <RW w/ drift>
# Generate forecasts for 14 periods
Food_fit %>% forecast(h = 14)
## # A fable: 42 x 6 [1M]
## # Key: State, Industry, .model [3]
## State Industry .model Month Turnover .mean
## <chr> <chr> <chr> <mth> <dist> <dbl>
## 1 Australian Capital Territory Takeaway food~ Naïve 2019 Jan N(29, 0.91) 29.1
## 2 Australian Capital Territory Takeaway food~ Naïve 2019 Feb N(29, 1.8) 29.1
## 3 Australian Capital Territory Takeaway food~ Naïve 2019 Mar N(29, 2.7) 29.1
## 4 Australian Capital Territory Takeaway food~ Naïve 2019 Apr N(29, 3.6) 29.1
## 5 Australian Capital Territory Takeaway food~ Naïve 2019 May N(29, 4.5) 29.1
## 6 Australian Capital Territory Takeaway food~ Naïve 2019 Jun N(29, 5.4) 29.1
## 7 Australian Capital Territory Takeaway food~ Naïve 2019 Jul N(29, 6.3) 29.1
## 8 Australian Capital Territory Takeaway food~ Naïve 2019 Aug N(29, 7.2) 29.1
## 9 Australian Capital Territory Takeaway food~ Naïve 2019 Sep N(29, 8.1) 29.1
## 10 Australian Capital Territory Takeaway food~ Naïve 2019 Oct N(29, 9.1) 29.1
## # ... with 32 more rows
# Plot forecasts against actual values
Food_fit %>%
forecast(h = 14)%>%
autoplot(Food)
From visual inspection, the drift model captures the increasing trend. However, the seasonal model captures the seasonality and the naive model conservatively captures the both the seasonality and upward trend which cancel each other out.
Any of these three models depict characteristics of the time series in arguable ways.
FB<-gafa_stock %>%
filter(Symbol=="FB")
autoplot(FB,Close)+labs(y="$", title="Facebook CLOSE")
# Re-index based on trading days
google_stock <- gafa_stock %>%
filter(Symbol == "FB", year(Date) >= 2015) %>%
mutate(day = row_number()) %>%
update_tsibble(index = day, regular = TRUE)
# Filter the year of interest
google_2015 <- google_stock %>% filter(year(Date) == 2015)
# Fit the models
google_fit <- google_2015 %>%
model(
Mean = MEAN(Close),
`Naïve` = NAIVE(Close),
Drift = NAIVE(Close ~ drift())
)
# Produce forecasts for the trading days in January 2016
google_jan_2016 <- google_stock %>%
filter(yearmonth(Date) == yearmonth("2016 Jan"))
google_fc <- google_fit %>%
forecast(new_data = google_jan_2016)
# Plot the forecasts
google_fc %>%
autoplot(google_2015, level = NULL) +
autolayer(google_jan_2016, Close, colour = "black") +
labs(y = "$US",
title = "Google daily closing stock prices",
subtitle = "(Jan 2015 - Jan 2016)") +
guides(colour = guide_legend(title = "Forecast"))
Using and modifying the code in the text, accounted for a new time index based on trading days,used the data 2015 to forecast Jan 2016. In addition, three models are shown: drift, mean and naive.
DISCUSSION:
This implies the slope of the line between the first and last point is equivalent to the slope of the line between the forecast first point( last point of the dataset used) and the any forecast point (all forecast points are on the same line.
Obtain the points and compare the slopes.
dim(google_2015)
## [1] 252 9
google_2015[1,6]
## # A tibble: 1 x 1
## Close
## <dbl>
## 1 78.4
google_2015[252,6]
## # A tibble: 1 x 1
## Close
## <dbl>
## 1 105.
#slope
(google_2015[252,6]-google_2015[1,6])/(252-1)
## Close
## 1 0.1044223
#above is the slope of the line used to forecast
# now compute the slope of the line drift forecast
#determine the last forecast point
dim(google_jan_2016)
## [1] 19 9
#2nd slope
#Here is point 1 in forecast
google_fc[39,]
## # A fable: 1 x 11 [1]
## # Key: Symbol, .model [1]
## Symbol .model day Close .mean Date Open High Low Adj_Close
## <chr> <chr> <int> <dist> <dbl> <date> <dbl> <dbl> <dbl> <dbl>
## 1 FB Drift 253 N(105, 2.1) 105. 2016-01-04 102. 102. 99.8 102.
## # ... with 1 more variable: Volume <dbl>
#Here is point 2 in forecaset
google_fc[40,]
## # A fable: 1 x 11 [1]
## # Key: Symbol, .model [1]
## Symbol .model day Close .mean Date Open High Low Adj_Close
## <chr> <chr> <int> <dist> <dbl> <date> <dbl> <dbl> <dbl> <dbl>
## 1 FB Drift 254 N(105, 4.3) 105. 2016-01-05 103. 104. 102. 103.
## # ... with 1 more variable: Volume <dbl>
#slope is delta(y)/delta(x
# change in x is 1 year
(google_fc[40,5]-google_fc[39,5])/1
## .mean
## 1 0.1044223
# check the line between the last point of series and 1 point of forecast
google_fc[39,5]-google_2015[252,6]
## .mean
## 1 0.1044223
DISCUSSION:
Above the mean model and Naive model are used as forecasts.
The naive model is better because the drift model captures the upward trend and not the downward seemingly randowm variability seen throughout.
# Extract data of interest
recent_production <- aus_production %>%
filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production %>% model(SNAIVE(Beer))
#residual
aug<-recent_production %>% model(SNAIVE(Beer)) %>%
augment()
# Look at the residuals
fit %>% gg_tsresiduals()
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_bin).
# Look a some forecasts
fit %>% forecast() %>% autoplot(recent_production)
aug %>% features(.innov, box_pierce, lag=8, dof=0)
## # A tibble: 1 x 3
## .model bp_stat bp_pvalue
## <chr> <dbl> <dbl>
## 1 SNAIVE(Beer) 29.7 0.000234
What do you conclude?
DISCUSSION: The forecast plot is visually acceptable.
Next, a review of the residual plots.
The residual plots seem to indicate the forecast accounts for all information. The time plot shows the same variability across time and are constant. The histogram is symmetric and normal looking with a mean of 0.
The acf plot does imply an investigation for lag 4 is warranted, the model may need adjustment.
The box pierce test is signficance, different than white noise.
# Extract data of interest
Aus_production <- global_economy %>%
filter(Country=="Australia")
# Define and estimate a model
fit2 <-Aus_production %>% model(NAIVE(Exports))
# Look at the residuals
fit2 %>% gg_tsresiduals()
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_bin).
#residual
aug2<-Aus_production %>% model(NAIVE(Exports)) %>%
augment()
# Look a some forecasts
fit2 %>% forecast() %>% autoplot(Aus_production)
#
#SNAIVE
# Define and estimate a model
fit3 <-Aus_production %>% model(SNAIVE(Exports~lag(4)))
# Look at the residuals
fit3 %>% gg_tsresiduals()
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_bin).
# Look a some forecasts
fit3 %>% forecast() %>% autoplot(Aus_production)
aug2 %>% features(.innov, box_pierce, lag=8, dof=0)
## # A tibble: 1 x 4
## Country .model bp_stat bp_pvalue
## <fct> <chr> <dbl> <dbl>
## 1 Australia NAIVE(Exports) 13.8 0.0858
aug2 %>% features(.innov, ljung_box, lag=8, dof=0)
## # A tibble: 1 x 4
## Country .model lb_stat lb_pvalue
## <fct> <chr> <dbl> <dbl>
## 1 Australia NAIVE(Exports) 15.5 0.0508
Based on the residual diagnostic plots, the NAIVE model is superior. The acf plot is displays non-significant correlations and the residual histogram appears symmetric and normally distributed. Both the box-pierce and ljung_box test are not significant at the .05 level indicating the residuals are not distinguishable from white noise.
Meanwhile, the SNAIVE residual diagnostic plots show a couple of significant autocorrelation at lag1,4. The histogram is left skewed, not normal. The residual over time does show more variability than the NAIVE.
The NAIVE model is more appropriate.
set.seed(1111111)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
autoplot(myseries,.vars=Turnover)
myseries_train <- myseries %>%
filter(year(Month) < 2011)
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, colour = "red")
fit4 <- myseries_train %>%
model(SNAIVE(Turnover~lag(4)))
fit4 %>% gg_tsresiduals()
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_bin).
fit4 %>% forecast() %>% autoplot(myseries_train)
Do the residuals appear to be uncorrelated and normally distributed?
DISCUSSION:
There are problems with all three residual diagnostic plots.
In the first plot, the residuals over time depict an increasing variability.
The acf plot of residuals display several significant autocorrelations.
The histogram of residuals shows a left skew.
fc <- fit4 %>%
forecast(new_data = anti_join(myseries, myseries_train))
## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")
fc %>% autoplot(myseries)
f. Compare the accuracy of your forecasts against the actual values.
fit4 %>% accuracy()
## # A tibble: 1 x 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Weste~ Newspape~ SNAIV~ Trai~ 0.244 5.56 3.52 -0.569 14.2 0.859 0.981 0.449
fc %>% accuracy(myseries)
## # A tibble: 1 x 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(~ West~ Newspape~ Test 4.51 9.52 7.77 9.18 21.3 1.90 1.68 0.528
DISCUSSION:
The accuracy measures from the training set smaller.
The training set is using the most recent data as opposed to the whole set in this case.
More recent data more produced more accurate forecast.