Data624

library(tsibble)

## 
## Attaching package: 'tsibble'

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union

library(tsibbledata)
library(ggfortify)

## Loading required package: ggplot2

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(USgas)
library(readr)
library(tidyr)
library(readxl)
library(httr)
library(feasts)

## Loading required package: fabletools

library(stats)
library(fpp3)

## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --

## v tibble    3.1.6     v fable     0.3.1
## v lubridate 1.8.0

## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date()     masks base::date()
## x dplyr::filter()       masks stats::filter()
## x tsibble::intersect()  masks base::intersect()
## x lubridate::interval() masks tsibble::interval()
## x dplyr::lag()          masks stats::lag()
## x tsibble::setdiff()    masks base::setdiff()
## x tsibble::union()      masks base::union()

library(seasonal)

## 
## Attaching package: 'seasonal'

## The following object is masked from 'package:tibble':
## 
##     view

ge_aus = global_economy %>% filter(Country=='Australia')

ge_aus_fit = ge_aus %>%
  model(
    
    Drift= NAIVE(Population~drift())
  )

ge_aus_fit %>% forecast(h = 14) %>% autoplot(ge_aus)

a_bricks = aus_production %>% select(Bricks) %>% filter(!is.na(Bricks))

a_bricks_fit = a_bricks %>%
  model(
    
    Drift= SNAIVE(Bricks)
  )

a_bricks_fit %>% forecast(h = 4) %>% autoplot(a_bricks)

nsw_lambs = aus_livestock %>% filter(Animal=='Lambs',State=='New South Wales')

nsw_lambs_fit = nsw_lambs %>%
  model(
    
    Drift= SNAIVE(Count)
  )

nsw_lambs_fit %>% forecast(h = 12) %>% autoplot(nsw_lambs)

household_wealth = hh_budget %>% select(Wealth) %>% filter(Country=='Australia')  %>% filter(!is.na(Wealth))

household_wealth_fit = household_wealth %>%
  model(
    
    `Naïve` = NAIVE(Wealth)
    
    
  )

household_wealth_fit %>% forecast(h = 14) %>% autoplot(household_wealth)

aus_retail_takeaway = aus_retail %>% filter(Industry=='Takeaway food services',State=='Australian Capital Territory') 

aus_retail_takeaway_fit = aus_retail_takeaway %>%
  model(
    
    
     `Seasonal Naïve` = SNAIVE(Turnover)
    
    
    
  )

aus_retail_takeaway_fit %>% forecast(h = 36) %>% autoplot(aus_retail_takeaway)

gafa_stock %>% filter(Symbol=='FB') %>% select(Close) %>% autoplot(.vars = Close)

close_FB = gafa_stock %>% filter(Symbol=='FB') %>% select(Close) %>% mutate(day=row_number()) %>% update_tsibble(index=day,regular = TRUE)


fb_test_2015 <- close_FB %>% filter(year(Date) == 2015)


close_FB_fit <- fb_test_2015 %>%
  model(
    Mean = MEAN(Close),
    `Naïve` = NAIVE(Close),
    Drift = NAIVE(Close ~ drift())
  )

fb_2016 <- close_FB %>%
  filter(yearmonth(Date) == yearmonth("2016 Jan"))

fb_fc <- close_FB_fit %>%
  forecast(new_data = fb_2016)

# Plot the forecasts
fb_fc %>%
  autoplot(fb_test_2015, level = NULL) +
  autolayer(fb_2016, Close, colour = "black") +
  labs(y = "$US",
       title = "Google daily closing stock prices",
       subtitle = "(Jan 2015 - Jan 2016)") +
  guides(colour = guide_legend(title = "Forecast"))

## slope test: 2015 Test Dataset

(fb_test_2015[252,1]-fb_test_2015[1,1])/(dim(fb_test_2015)[1]-1)

##       Close
## 1 0.1044223

## Slope test: Forecasted Dataset

(fb_fc[38,4]-fb_fc[37,4])/1

##   .mean
## 1     0

#q5.2D

The above plot captures multiple benchmarks for this analysis. The drift model best captures the variability and information represented in the dataset.

# Extract data of interest
recent_production <- aus_production %>%
  filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production %>% model(SNAIVE(Beer))
# Look at the residuals
fit %>% gg_tsresiduals()

## Warning: Removed 4 row(s) containing missing values (geom_path).

## Warning: Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_bin).

# Look a some forecasts
fit %>% forecast() %>% autoplot(recent_production)

# create augmentation to see box pierce test
aug<-recent_production %>% model(SNAIVE(Beer)) %>%
  augment()
aug %>% features(.innov, box_pierce, lag=8, dof=0)

## # A tibble: 1 x 3
##   .model       bp_stat bp_pvalue
##   <chr>          <dbl>     <dbl>
## 1 SNAIVE(Beer)    29.7  0.000234

We can accept the null hypothesis that the bp_pvalue is statistically significant and is different than white noise

Examining the charts we notice that the residuals violate the ACF on the left side of the ACF visualization.

Residuals appear constant and distribution appears to be bimodal.

# Extract data of interest
aus_exports <- global_economy  %>%
  filter(Country=='Australia')
# Define and estimate a model
fit <- aus_exports %>% model(NAIVE(Exports))
# Look at the residuals
fit %>% gg_tsresiduals()

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing non-finite values (stat_bin).

# Look a some forecasts
fit %>% forecast() %>% autoplot(aus_exports)

# create augmentation to see box pierce test
aug<-aus_exports %>% model(NAIVE(Exports)) %>%
  augment()
aug %>% features(.innov, box_pierce, lag=8, dof=0)

## # A tibble: 1 x 4
##   Country   .model         bp_stat bp_pvalue
##   <fct>     <chr>            <dbl>     <dbl>
## 1 Australia NAIVE(Exports)    13.8    0.0858

Australian exports residuals are normally distributed. A violation of the ACF occurs in the left side of the ACF visualization. Variability is consistent.

We fail to accept the null hypothesis here as the bp_value indicates that we cannot distinguish the series from white noise.

# Extract data of interest
aus_bricks <- aus_production  %>% select(Bricks) %>% filter(!is.na(Bricks)) 

# Define and estimate a model
fit <- aus_bricks %>% model(SNAIVE(Bricks))
# Look at the residuals
fit %>% gg_tsresiduals()

## Warning: Removed 4 row(s) containing missing values (geom_path).

## Warning: Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_bin).

# Look a some forecasts
fit %>% forecast() %>% autoplot(aus_bricks)

# create augmentation to see box pierce test
aug<-aus_bricks %>% model(SNAIVE(Bricks)) %>%
  augment()
aug %>% features(.innov, box_pierce, lag=8, dof=0)

## # A tibble: 1 x 3
##   .model         bp_stat bp_pvalue
##   <chr>            <dbl>     <dbl>
## 1 SNAIVE(Bricks)    267.         0

Australian bricks residuals are normally distributed. A violation of the ACF occurs throughout the ACF visualization. Variability is not consistent.

We accept the null hypothesis here as the bp_value indicates that we can distinguish the series from white noise.

set.seed(12354687)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

autoplot(myseries,.vars=Turnover)

myseries_train <- myseries %>%
  filter(year(Month) < 2011)

autoplot(myseries, Turnover) +
  autolayer(myseries_train, Turnover, colour = "red")

fit <- myseries_train %>%
  model(SNAIVE(Turnover))

fit %>% gg_tsresiduals()

## Warning: Removed 12 row(s) containing missing values (geom_path).

## Warning: Removed 12 rows containing missing values (geom_point).

## Warning: Removed 12 rows containing non-finite values (stat_bin).

The residuals are approximately normally distributed but appear to be correlated at the beginning of the dataset. The ACF is violated at the beginning and end of the dataset.

fc <- fit %>%
  forecast(new_data = anti_join(myseries, myseries_train))

## Joining, by = c("State", "Industry", "Series ID", "Month", "Turnover")

fc %>% autoplot(myseries)

fit %>% accuracy()

## # A tibble: 1 x 12
##   State  Industry   .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>      <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Weste~ Electrica~ SNAIV~ Trai~  5.20  14.4  10.3  5.45  11.5     1     1 0.769

fc %>% accuracy(myseries)

## # A tibble: 1 x 12
##   .model  State  Industry  .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>   <chr>  <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE~ Weste~ Electric~ Test   12.5  17.7  14.7  6.50  7.78  1.42  1.23 0.690

g.) The accuracy measures appear to be very sensitive to the training dataset, and the recent data provided a wide range of possible forecasted values relative to the original dataset.

Data624_HW3

Hector Santana

3/1/2022