library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.2
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.3
## ✔ dplyr 1.1.2 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.0 ✔ feasts 0.3.1
## ✔ lubridate 1.9.2 ✔ fable 0.3.3
## ✔ ggplot2 3.4.2 ✔ fabletools 0.3.4
## Warning: package 'tsibble' was built under R version 4.3.2
## Warning: package 'tsibbledata' was built under R version 4.3.2
## Warning: package 'feasts' was built under R version 4.3.2
## Warning: package 'fabletools' was built under R version 4.3.2
## Warning: package 'fable' was built under R version 4.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
head(global_economy)
## # A tsibble: 6 x 9 [1Y]
## # Key: Country [1]
## Country Code Year GDP Growth CPI Imports Exports Population
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan AFG 1960 537777811. NA NA 7.02 4.13 8996351
## 2 Afghanistan AFG 1961 548888896. NA NA 8.10 4.45 9166764
## 3 Afghanistan AFG 1962 546666678. NA NA 9.35 4.88 9345868
## 4 Afghanistan AFG 1963 751111191. NA NA 16.9 9.17 9533954
## 5 Afghanistan AFG 1964 800000044. NA NA 18.1 8.89 9731361
## 6 Afghanistan AFG 1965 1006666638. NA NA 21.4 11.3 9938414
use whatever of naive(y),snaive(y) or Rnaive(y)
## Make sure to select one column so that the autoplot only selects the Population column
Auspop <- global_economy %>%
filter(Country == "Australia") %>%
select(Population)
Auspop %>%
autoplot() +
labs(title = "Australia Population")
## Plot variable not specified, automatically selected `.vars = Population`
## Using the naive forecast to forecast the next 5 years..
Auspop %>%
model(Naive = NAIVE(Population)) %>%
forecast(h = 5) %>%
autoplot(Auspop) + labs(title = "Australia Population")
## Use the other forecasting methods to see how it does..
Aus_train <- Auspop %>%
filter_index("2000" ~ "2017")
Aus_fit <- Aus_train %>%
model(
Naive = NAIVE(Population),
Drift = RW(Population ~ drift()
))
Aus_fc <- Aus_fit %>%
forecast(h = "5 years")
Aus_fc %>%
autoplot(Aus_train,level = NULL) +
autolayer(
filter_index(Auspop,"2000" ~.),
color = "black"
) +
labs(title = "Australia Population") +
guides(color = guide_legend(title = "Forecast")
)
## Plot variable not specified, automatically selected `.vars = Population`
Bricks1 <- aus_production %>%
select(Bricks)
## There is empty values in this chart..
Bricks1 %>%
autoplot() + labs(title = "Australia Brick Productions")
## Plot variable not specified, automatically selected `.vars = Bricks`
## Warning: Removed 20 rows containing missing values (`geom_line()`).
## Filter out na values
Bricks1 <- Bricks1 %>%
filter(!is.na(Bricks))
## Now we forecast between 1992 to 2009
Bricks_train <- Bricks1 %>%
filter_index("1992 Q1" ~ "2009 Q4")
Bricks_fit <- Bricks_train %>%
model(
Naive = NAIVE(Bricks),
`Seasonal Naive` = SNAIVE(Bricks),
Drift = RW(Bricks~drift())
)
## Forecast for the next 4 quarters
Bricks_fc <- Bricks_fit %>%
forecast(h = 18)
Bricks_fc %>%
autoplot(Bricks_train,level = NULL) +
autolayer(
filter_index(Bricks1,"1992 Q1" ~.),
color = "black"
) +
labs(title = "Bricks Production") +
guides(color = guide_legend(title = "Forecast")
)
## Plot variable not specified, automatically selected `.vars = Bricks`
NSWlamb <- aus_livestock %>%
filter(Animal == "Lambs",State == "New South Wales") %>%
select(Count)
NSWlamb %>%
autoplot() + labs(title = "New South Wales Lambs")
## Plot variable not specified, automatically selected `.vars = Count`
NSW_train <- NSWlamb %>%
filter_index("2000 Jan"~.)
NSW_fit <- NSW_train %>%
model(
Naive = NAIVE(Count),
`Seasonal Naive` = SNAIVE(Count),
Drift = RW(Count~drift())
)
## Forecast for the next 12 months
NSW_fc <- NSW_fit %>%
forecast(h = 12)
NSW_fc %>%
autoplot(NSW_train,level = NULL) +
autolayer(
filter_index(NSW_train,"2000 Jan" ~.),
color = "black"
) +
labs(title = "Lambs slaughter") +
guides(color = guide_legend(title = "Forecast")
)
## Plot variable not specified, automatically selected `.vars = Count`
hhwealth <- hh_budget %>%
select(Wealth)
hhwealth %>%
autoplot()
## Plot variable not specified, automatically selected `.vars = Wealth`
This plot seems to display a positive trend of wealth during the early
2010s followed by a decrease in 2008 and then climbs up afterwards.
hh_fit <- hhwealth %>%
model(
Naive = NAIVE(Wealth),
`Seasonal Naive` = SNAIVE(Wealth),
Drift = RW(Wealth~drift())
)
## Warning: 4 errors (1 unique) encountered for Seasonal Naive
## [4] Non-seasonal model specification provided, use RW() or provide a different lag specification.
## Forecast for the next 12 months
hh_fc <- hh_fit %>%
forecast(h = 12)
hh_fc %>%
autoplot(hhwealth,level = NULL) +
labs(title = "HH wealth") +
guides(color = guide_legend(title = "Forecast")
)
## Warning: Removed 48 rows containing missing values (`()`).
Takeaway <- aus_retail %>%
filter(Industry == "Takeaway food services",State == "Australian Capital Territory") %>%
select(Turnover)
Takeaway %>%
model(
naive = NAIVE(Turnover),
`seasonal_naive` = SNAIVE(Turnover),
Drift = RW(Turnover~drift())) %>%
forecast(h = 15) %>%
autoplot(Takeaway,level = NULL) + labs(title = "Food Takeaway in Australia")
fb_stock <- gafa_stock %>%
filter(Symbol == "FB")
ggplot(fb_stock,aes(x=Date)) +
geom_line(aes(y=Close)) +
labs(title = "Facebook Stock Closing Price")
Since the fb time-series are all irregular I am going to use the textbook example to fix it the irregular time series..
## Use example 5.2 from the textbook to re index it.
## Set up a new time index based on the trading day rather than calendar days.
fb_stock <- fb_stock %>%
mutate(day = row_number()) %>%
update_tsibble(index = day,regular = TRUE) %>%
select(Close)
## Model the data..
fb_stock_fit <- fb_stock %>%
model(
drift = RW(Close~drift())
)
fb_plot <- fb_stock_fit %>%
forecast(h = 50) %>%
autoplot(fb_stock) + labs(title = "Facebook stocks Forecast")
fb_plot
## Forecast the data by 50 days and then autoplot the fb_stock with it
ggplot(fb_stock,aes(x = day)) +
geom_line(aes(y = Close)) + geom_segment(aes(x = min(day),y = first(Close),
xend = max(day),yend = last(Close)), color = "red",linetype = "dashed") +
labs(title = "Facebook Stock Closing Prices")
fb_stock %>%
model(
Mean = MEAN(Close),
`Naïve` = NAIVE(Close),
Drift = NAIVE(Close ~ drift())
) %>%
## Forecast for a 100 days
forecast(h = 100) %>%
autoplot(fb_stock,level = NULL) + labs(title = "Facebook stocks forecast") +
guides(colour = guide_legend(title = "Forecast"))
It is really difficult to tell but I would assume that the drift method is the best overall since the time series seems to have a positive trend overall in the long-run.
Applying the code from the textbook
# Extract data of interest We can see some seasonal patterns for each quarter
recent_production <- aus_production |>
filter(year(Quarter) >= 1992)
recent_production %>%
autoplot()
## Plot variable not specified, automatically selected `.vars = Beer`
# Define and estimate a model
fit <- recent_production |> model(SNAIVE(Beer))
# Look at the residuals
fit |> gg_tsresiduals()
## Warning: Removed 4 rows containing missing values (`geom_line()`).
## Warning: Removed 4 rows containing missing values (`geom_point()`).
## Warning: Removed 4 rows containing non-finite values (`stat_bin()`).
# Look at some forecasts
fit |> forecast() |> autoplot(recent_production)
It seems the residuals doesn’t look like white noise the residuals seems to be centered around 0 but displays high variance while the histogram shows a bi modal normal distribution, and the acf doesn’t display any signs of correlation. Thus, the residuals seems normally distributed and not correlated.
We can use the two tests to see if the residuals are white noise or not.
## Use 8 for lag because the data is seasonal data and there are 4 quarters per year and we multiply it by 2
fit %>%
augment() %>%
features(.innov,box_pierce,lag = 8)
## # A tibble: 1 × 3
## .model bp_stat bp_pvalue
## <chr> <dbl> <dbl>
## 1 SNAIVE(Beer) 29.7 0.000234
fit %>%
augment() %>%
features(.innov,ljung_box, lag = 8)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 SNAIVE(Beer) 32.3 0.0000834
Besides on both tests, since the p-value is less than 0.05 we can reject the hypothesis of white noise thus the residuals are not white noise..
Aus_exports <- global_economy %>%
filter(Country == "Australia") %>%
select(Exports)
## Use the naive method.
Aus_fit <- Aus_exports %>%
model(NAIVE(Exports))
## Forecast for the next 15 years..
Aus_mod <- Aus_fit %>%
forecast(h = 15)
Aus_mod %>%
autoplot(Aus_exports) +
labs(title = "Australian Exports")
## Check the residuals
Aus_fit %>%
gg_tsresiduals()
## Warning: Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_bin()`).
The residuals look normally distributed but the lag plot shows some correlation at certain points, and the variance is centered around 0 but has high variance.
## THe plot doesn't seems seasonal so I will use 10 according to the authors
Aus_fit %>%
augment() %>%
features(.innov,box_pierce,lag = 10)
## # A tibble: 1 × 3
## .model bp_stat bp_pvalue
## <chr> <dbl> <dbl>
## 1 NAIVE(Exports) 14.6 0.148
Aus_fit %>%
augment() %>%
features(.innov,ljung_box,lag = 10)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 NAIVE(Exports) 16.4 0.0896
The p-value are not significant for both tests for the Australia Exports thus it seems that we fail to reject the null of white noise and conclude that the residuals are white noise….
## This is quarterly data of bricks so lag = 8?
Brickss <- aus_production %>%
select(Bricks) %>%
filter(!is.na(Bricks))
## Data appears seasonal
Brickss %>%
autoplot() +
labs(title = "Clay Bricks Production")
## Plot variable not specified, automatically selected `.vars = Bricks`
Brick_fit <- Brickss %>%
model(SNAIVE(Bricks))
Brick_fit %>%
gg_tsresiduals()
## Warning: Removed 4 rows containing missing values (`geom_line()`).
## Warning: Removed 4 rows containing missing values (`geom_point()`).
## Warning: Removed 4 rows containing non-finite values (`stat_bin()`).
The innovation residuals had low variation at first but then had higher variation during 1980, the ACF had a strong value at the start and the residuals are left-tail skewed, perhaps the residuals are white-noise?
Brick_fit %>%
augment() %>%
features(.innov,box_pierce,lag = 10)
## # A tibble: 1 × 3
## .model bp_stat bp_pvalue
## <chr> <dbl> <dbl>
## 1 SNAIVE(Bricks) 292. 0
Brick_fit %>%
augment() %>%
features(.innov,ljung_box,lag = 10)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 SNAIVE(Bricks) 301. 0
The p-value is zero but i feel like I messed somewhere.. cause the residuals seems like white noise to me.
set.seed(12345678)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries_train <- myseries %>%
filter(year(Month) < 2011)
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, colour = "red")
fit <- myseries_train |>
model(SNAIVE(Turnover))
fit |> gg_tsresiduals()
## Warning: Removed 12 rows containing missing values (`geom_line()`).
## Warning: Removed 12 rows containing missing values (`geom_point()`).
## Warning: Removed 12 rows containing non-finite values (`stat_bin()`).
The residuals doesn’t seem to be not uncorrelated and the distribution seems unimodal normal distribution. The ACF seems to shows correlation between each point of lag until the 11th value which decreases.
fc <- fit |>
forecast(new_data = anti_join(myseries, myseries_train))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
fc |> autoplot(myseries)
## Accuracy of the fit,.
fit |> accuracy()
## # A tibble: 1 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Norther… Clothin… SNAIV… Trai… 0.439 1.21 0.915 5.23 12.4 1 1 0.768
## Actual accuracy
fc |> accuracy(myseries)
## # A tibble: 1 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(T… Nort… Clothin… Test 0.836 1.55 1.24 5.94 9.06 1.36 1.28 0.601
I am unsure but it appears that the accuracy isn’t that sensitive since the MAE and the RMSE are pretty close in values in the training set.