library(fpp3)
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.4
## ✔ dplyr 1.1.3 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.0 ✔ feasts 0.3.1
## ✔ lubridate 1.9.3 ✔ fable 0.3.3
## ✔ ggplot2 3.4.3 ✔ fabletools 0.3.4
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
Produce forecasts for the following series using whichever of NAIVE(y), SNAIVE(y) or RW(y~ drift()) is more appropriate in each case:
Australian Population (global economy)
global_economy|>
filter (Country=='Australia')|>
autoplot(Population)
The plot is linear therefore the most suitable forecast model will be a linear model. Also, there is strong trend observed in the plot so the drift model will be the best one for this problem.
aus_economy <- global_economy %>%
filter(Code == "AUS")
aus_economy %>%
model(Drift = RW(Population ~ drift())) %>%
forecast(h = 15) %>%
autoplot(aus_economy) +
labs(title = "Australian Population Forcast")
Bricks (aus_production)
aus_production|>
filter(!is.na(Bricks))|>
autoplot(Bricks)
It appears that the data has seasonality as well as trend from 1980 onward. Therefore, the most suitable forecast might be based on seasonal Naive Bayes model.
aus_production %>%
filter(!is.na(Bricks)) %>%
model(SNAIVE(Bricks ~ lag(20))) %>%
forecast(h = 20) %>%
filter(!is.na(Bricks)) %>%
autoplot(aus_production) +
labs(title = "Australian Bricks Production Forcast")
## Warning: Removed 20 rows containing missing values (`geom_line()`).
NSW Lambs (aus_livestock)
aus_livestock|>
filter(Animal == 'Lambs' & State == 'New South Wales')|>
autoplot(Count)
The plot shows the seasonality and trend both.Therefore, Seasonal Naive Bays model best suit for this data.
aus_livestock|>
filter(Animal == 'Lambs' & State == 'New South Wales')|>
model(snaive = SNAIVE(Count ~ lag(15)))|>
forecast(h=20)|>
autoplot(aus_livestock)
Household wealth (hh_budget)
hh_budget %>%
filter(Country =='Australia') |>
model(Drift = RW(Wealth ~ drift())) |>
forecast(h = 10) |>
autoplot(hh_budget) +
labs(title = "Household wealth Forcast")
Australian takeaway food turnover (aus_retail).
aus_retail %>%
filter(State == "South Australia",
Industry == 'Takeaway food services') %>%
model(SNAIVE(Turnover ~ lag(15))) %>%
forecast(h = 15) %>%
autoplot(aus_retail) +
labs(title = "South Australian takeaway food turnover Forcast")
Use the facebook stock price (data set gafa_stock) to do the following:
Produce a time plot of the series.
fb_df <- gafa_stock|>
filter(Symbol == 'FB')|>
filter(!is.na(Close))|>
as_tsibble(key = Symbol, index = Date, regular = TRUE)|>
fill_gaps()
autoplot(fb_df, Close)+
labs(title = "Historical price of facebook stock",
x = 'Time (day)',
y= 'Close price')
Produce forecasts using the drift method and plot them.
fb_df %>%
model(Drift = RW(Close ~ drift())) %>%
forecast(h = 30) %>%
autoplot(fb_df) +
labs(title = "Facebook Close Price Forcast")
Show that the forecasts are identical to extending the line drawn between the first and last observations.
df_line <- data.frame(x1 = as.Date('2014-01-02'), x2 = as.Date('2018-12-31'), y1= 54.71, y2=131.09)
fb_df %>%
model(Drift = RW(Close ~ drift())) %>%
forecast(h = 50) %>%
autoplot(fb_df) +
labs(title = "Facebook Close Price Forcast")+
geom_segment(data = df_line, aes(x=x1, y=y1, xend = x2, yend = y2), colour = 'blue', linetype ='dashed')
fb_model<- fb_df |>
filter_index("2018-01-01" ~ "2019-01-01")|>
fill_gaps(Close)
fb_model|>
model(Drift = SNAIVE(Close, lag('10 day'))) |>
forecast(h = 15)|>
autoplot(fb_model) +
labs(title = "Facebook Close Price Forcast")
## Warning: Removed 2 rows containing missing values (`()`).
## Warning: Removed 1 row containing missing values (`geom_line()`).
Apply a seasonal naive method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts. The following code will help.
prod_fr_1992 <- aus_production |>
filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- prod_fr_1992|> model(SNAIVE(Beer))
# Look at the residuals
fit |> gg_tsresiduals()
## Warning: Removed 4 rows containing missing values (`geom_line()`).
## Warning: Removed 4 rows containing missing values (`geom_point()`).
## Warning: Removed 4 rows containing non-finite values (`stat_bin()`).
# Look a some forecasts
fit |> forecast() |> autoplot(prod_fr_1992)
what do you conclude?
Two hypotheses of residuals that must be satisfied are: a. The mean of residuals must be zero. b. No trend in residuals residuals
Both of the above hypotheses are satisfied: The residual plots is symmetric about the y=0 thus no trend and mean of the residuals is zero. Also, using the histogram of residuals, it can be concluded that the distribution of residuals is normal. But ACF plot shows some abnormalities in the residuals. But, we can adjust the time of the data so that outliers can be dropped and the remaining series will give a best prediction.
Repeat the previous exercise using the Australian Exports series from global_economy and the Bricks series from aus_production. Use whichever of Naive() or SNAIVE() is more appropriate in each case.
head(global_economy, 3)
## # A tsibble: 3 x 9 [1Y]
## # Key: Country [1]
## Country Code Year GDP Growth CPI Imports Exports Population
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan AFG 1960 537777811. NA NA 7.02 4.13 8996351
## 2 Afghanistan AFG 1961 548888896. NA NA 8.10 4.45 9166764
## 3 Afghanistan AFG 1962 546666678. NA NA 9.35 4.88 9345868
aus_export <- global_economy |>
filter( Code == 'AUS')|>
select(Exports)
autoplot(aus_export, Exports)+
labs(title = 'Australian Exports',
x= 'Year', y = 'Exports')
The data seems to have trend but no seasonality is appeared in the chart. However, I think NAIVE method might be more appropriate for this case.
# Define and estimate a model
fit <- aus_export|> model(SNAIVE(Exports~ lag(10)))
# Look at the residuals
fit |> gg_tsresiduals()
## Warning: Removed 10 rows containing missing values (`geom_line()`).
## Warning: Removed 10 rows containing missing values (`geom_point()`).
## Warning: Removed 10 rows containing non-finite values (`stat_bin()`).
# Look a some forecasts
fit |> forecast(h=10) |> autoplot(aus_export)
fit <- aus_production |>
filter(!is.na(Bricks))|>
model(SNAIVE(Bricks ~ lag("year")))
# Look at the residuals
fit %>% gg_tsresiduals()
## Warning: Removed 4 rows containing missing values (`geom_line()`).
## Warning: Removed 4 rows containing missing values (`geom_point()`).
## Warning: Removed 4 rows containing non-finite values (`stat_bin()`).
# Look a some forecasts
fit |> forecast(h=10) |> autoplot(aus_production)
## Warning: Removed 20 rows containing missing values (`geom_line()`).
For your retail time series (from exercise 7 in Section 2.10):
set.seed(12345678)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries_train <- myseries |>
filter(year(Month) < 2011)
{{r}} autoplot(myseries, Turnover) + autolayer(myseries_train, Turnover, colour = "red")
fit <- myseries_train |>
model(snaive = SNAIVE(Turnover))
fit |> gg_tsresiduals()
## Warning: Removed 12 rows containing missing values (`geom_line()`).
## Warning: Removed 12 rows containing missing values (`geom_point()`).
## Warning: Removed 12 rows containing non-finite values (`stat_bin()`).
fc <- fit |>
forecast(new_data = anti_join(myseries, myseries_train))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
fc |> autoplot(myseries)
fit |> accuracy()
## # A tibble: 1 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Norther… Clothin… snaive Trai… 0.439 1.21 0.915 5.23 12.4 1 1 0.768
fc |> accuracy(myseries)
## # A tibble: 1 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 snaive Norther… Clothin… Test 0.836 1.55 1.24 5.94 9.06 1.36 1.28 0.601
The accuracy measures are sensitive to the proportion of data as training data. If the proportions of training data is more than 90% then the model might be over-fit and can give very accurate result but on unseen lasrge dataset, the model might behave badly. If the proportion of training data is toos less like less than 50% then the model is under fit and may give very inaccurate result. So there is need of balance of training and test dataset. It’s considered a good practice to use 80% of the dataset as the training dataset and rest 20% as the validation or test dataset.