library(tsibble)
##
## Attaching package: 'tsibble'
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
library(tsibbledata)
library(ggfortify)
## Loading required package: ggplot2
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(USgas)
library(readr)
library(tidyr)
library(readxl)
library(httr)
library(feasts)
## Loading required package: fabletools
library(stats)
library(fpp3)
## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --
## v tibble 3.1.6 v fable 0.3.1
## v lubridate 1.8.0
## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x tsibble::intersect() masks base::intersect()
## x lubridate::interval() masks tsibble::interval()
## x dplyr::lag() masks stats::lag()
## x tsibble::setdiff() masks base::setdiff()
## x tsibble::union() masks base::union()
library(seasonal)
##
## Attaching package: 'seasonal'
## The following object is masked from 'package:tibble':
##
## view
ge_aus = global_economy %>% filter(Country=='Australia')
ge_aus_fit = ge_aus %>%
model(
Drift= NAIVE(Population~drift())
)
ge_aus_fit %>% forecast(h = 14) %>% autoplot(ge_aus)
a_bricks = aus_production %>% select(Bricks) %>% filter(!is.na(Bricks))
a_bricks_fit = a_bricks %>%
model(
Drift= SNAIVE(Bricks)
)
a_bricks_fit %>% forecast(h = 4) %>% autoplot(a_bricks)
nsw_lambs = aus_livestock %>% filter(Animal=='Lambs',State=='New South Wales')
nsw_lambs_fit = nsw_lambs %>%
model(
Drift= SNAIVE(Count)
)
nsw_lambs_fit %>% forecast(h = 12) %>% autoplot(nsw_lambs)
household_wealth = hh_budget %>% select(Wealth) %>% filter(Country=='Australia') %>% filter(!is.na(Wealth))
household_wealth_fit = household_wealth %>%
model(
`Naïve` = NAIVE(Wealth)
)
household_wealth_fit %>% forecast(h = 14) %>% autoplot(household_wealth)
aus_retail_takeaway = aus_retail %>% filter(Industry=='Takeaway food services',State=='Australian Capital Territory')
aus_retail_takeaway_fit = aus_retail_takeaway %>%
model(
`Seasonal Naïve` = SNAIVE(Turnover)
)
aus_retail_takeaway_fit %>% forecast(h = 36) %>% autoplot(aus_retail_takeaway)
gafa_stock %>% filter(Symbol=='FB') %>% select(Close) %>% autoplot(.vars = Close)
close_FB = gafa_stock %>% filter(Symbol=='FB') %>% select(Close) %>% mutate(day=row_number()) %>% update_tsibble(index=day,regular = TRUE)
fb_test_2015 <- close_FB %>% filter(year(Date) == 2015)
close_FB_fit <- fb_test_2015 %>%
model(
Mean = MEAN(Close),
`Naïve` = NAIVE(Close),
Drift = NAIVE(Close ~ drift())
)
fb_2016 <- close_FB %>%
filter(yearmonth(Date) == yearmonth("2016 Jan"))
fb_fc <- close_FB_fit %>%
forecast(new_data = fb_2016)
# Plot the forecasts
fb_fc %>%
autoplot(fb_test_2015, level = NULL) +
autolayer(fb_2016, Close, colour = "black") +
labs(y = "$US",
title = "Google daily closing stock prices",
subtitle = "(Jan 2015 - Jan 2016)") +
guides(colour = guide_legend(title = "Forecast"))
## slope test: 2015 Test Dataset
(fb_test_2015[252,1]-fb_test_2015[1,1])/(dim(fb_test_2015)[1]-1)
## Close
## 1 0.1044223
## Slope test: Forecasted Dataset
(fb_fc[38,4]-fb_fc[37,4])/1
## .mean
## 1 0
#q5.2D
The above plot captures multiple benchmarks for this analysis. The drift model best captures the variability and information represented in the dataset.
# Extract data of interest
recent_production <- aus_production %>%
filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production %>% model(SNAIVE(Beer))
# Look at the residuals
fit %>% gg_tsresiduals()
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_bin).
# Look a some forecasts
fit %>% forecast() %>% autoplot(recent_production)
# create augmentation to see box pierce test
aug<-recent_production %>% model(SNAIVE(Beer)) %>%
augment()
aug %>% features(.innov, box_pierce, lag=8, dof=0)
## # A tibble: 1 x 3
## .model bp_stat bp_pvalue
## <chr> <dbl> <dbl>
## 1 SNAIVE(Beer) 29.7 0.000234
We can accept the null hypothesis that the bp_pvalue is statistically significant and is different than white noise
Examining the charts we notice that the residuals violate the ACF on the left side of the ACF visualization.
Residuals appear constant and distribution appears to be bimodal.
# Extract data of interest
aus_exports <- global_economy %>%
filter(Country=='Australia')
# Define and estimate a model
fit <- aus_exports %>% model(NAIVE(Exports))
# Look at the residuals
fit %>% gg_tsresiduals()
## Warning: Removed 1 row(s) containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_bin).
# Look a some forecasts
fit %>% forecast() %>% autoplot(aus_exports)
# create augmentation to see box pierce test
aug<-aus_exports %>% model(NAIVE(Exports)) %>%
augment()
aug %>% features(.innov, box_pierce, lag=8, dof=0)
## # A tibble: 1 x 4
## Country .model bp_stat bp_pvalue
## <fct> <chr> <dbl> <dbl>
## 1 Australia NAIVE(Exports) 13.8 0.0858
Australian exports residuals are normally distributed. A violation of the ACF occurs in the left side of the ACF visualization. Variability is consistent.
We fail to accept the null hypothesis here as the bp_value indicates that we cannot distinguish the series from white noise.
# Extract data of interest
aus_bricks <- aus_production %>% select(Bricks) %>% filter(!is.na(Bricks))
# Define and estimate a model
fit <- aus_bricks %>% model(SNAIVE(Bricks))
# Look at the residuals
fit %>% gg_tsresiduals()
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_bin).
# Look a some forecasts
fit %>% forecast() %>% autoplot(aus_bricks)
# create augmentation to see box pierce test
aug<-aus_bricks %>% model(SNAIVE(Bricks)) %>%
augment()
aug %>% features(.innov, box_pierce, lag=8, dof=0)
## # A tibble: 1 x 3
## .model bp_stat bp_pvalue
## <chr> <dbl> <dbl>
## 1 SNAIVE(Bricks) 267. 0
Australian bricks residuals are normally distributed. A violation of the ACF occurs throughout the ACF visualization. Variability is not consistent.
We accept the null hypothesis here as the bp_value indicates that we can distinguish the series from white noise.
set.seed(12354687)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
autoplot(myseries,.vars=Turnover)
myseries_train <- myseries %>%
filter(year(Month) < 2011)
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, colour = "red")
fit <- myseries_train %>%
model(SNAIVE(Turnover))
fit %>% gg_tsresiduals()
## Warning: Removed 12 row(s) containing missing values (geom_path).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing non-finite values (stat_bin).
The residuals are approximately normally distributed but appear to be correlated at the beginning of the dataset. The ACF is violated at the beginning and end of the dataset.
fc <- fit %>%
forecast(new_data = anti_join(myseries, myseries_train))
## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")
fc %>% autoplot(myseries)
fit %>% accuracy()
## # A tibble: 1 x 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Weste~ Electrica~ SNAIV~ Trai~ 5.20 14.4 10.3 5.45 11.5 1 1 0.769
fc %>% accuracy(myseries)
## # A tibble: 1 x 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE~ Weste~ Electric~ Test 12.5 17.7 14.7 6.50 7.78 1.42 1.23 0.690
g.) The accuracy measures appear to be very sensitive to the training dataset, and the recent data provided a wide range of possible forecasted values relative to the original dataset.