homework 3

Question 1

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.1
## ✔ readr   2.1.2     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(tsibble)

## 
## Attaching package: 'tsibble'
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union

library(tsibbledata)
library(ggplot2)
library(fable)

## Loading required package: fabletools

pop=global_economy %>%
     filter(Country == "Australia")

autoplot(pop,Population) +
labs(title = "Australian Population")

pop%>%
model(RW(Population ~ drift())) %>%
       forecast(h = "10 years") %>%
        autoplot(global_economy) +
            labs(y = "people", title = "Australian Population")

aus_production %>%
  
  autoplot(Bricks)

## Warning: Removed 20 row(s) containing missing values (geom_path).

brick<-aus_production %>% 
  filter(!is.na(Bricks))
  
brick %>%
  model(SNAIVE(Bricks ~ lag("year")))%>%
   forecast(h = 10 )%>%
  autoplot(brick)+
labs(title="SNAIVE Forecast ", 
       xlab="Year" )

aus_livestock %>%
  filter(State=="New South Wales" & Animal=="Lambs") %>%
  autoplot(Count)

train=aus_livestock %>%
  filter(State=="New South Wales" & Animal=="Lambs") 
  

     train_fit=train%>% 
       model(Mean = MEAN(Count),
            `Seasonal Naïve` = SNAIVE(Count),
            `Naïve` = NAIVE(Count),
             Drift= NAIVE(Count~drift()))

train_fit %>% 
  forecast(h = 10)%>%
  autoplot(train)

hh_budget %>% 
  autoplot(Wealth)

budget <- hh_budget %>% 
      model( naive = NAIVE(Wealth))

 

 budget %>% 
  forecast(h = 10)%>%
  autoplot(hh_budget) + 
  labs(title="Drift Forecast")

aus_retail %>%
  filter(State=="Australian Capital Territory" & Industry=="Takeaway food services") %>%
  autoplot(Turnover)

train= aus_retail %>%
  filter(State=="Australian Capital Territory" & Industry=="Takeaway food services")

     train_data=train%>%
       model(`Seasonal Naïve` = SNAIVE(Turnover),
            `Naïve` = NAIVE(Turnover),
             Drift= NAIVE(Turnover~drift()))

train_data %>% 
  forecast(h = 10)%>%
  autoplot(train)

Question 2

meta<-gafa_stock %>%
  filter(Symbol=="FB")

meta%>%autoplot(Close)+labs(y="price", title="Facebook Closing prices")

  fbdf <- meta%>%
         mutate(Close = as.numeric(meta$Close)) %>%
         filter_index("2016-01-01" ~ "2019-01-01") %>%
         mutate(day = row_number()) %>%
         update_tsibble(index = day, regular = TRUE) %>%
         select(Date, Close)

        fbdf%>%
         model(RW(Close ~ drift())) %>%
         forecast(h = 180) %>%
         autoplot(fbdf) +
         labs(y = "price", title = "    Forecast Stock Prices ")

   fbdf %>%
         model(RW(Close~ drift())) %>%
         forecast(h = 180) %>%
         autoplot(fbdf) +
         geom_line(aes(x = day, y = Close))+
         geom_segment(aes (x =0 , y = 102, xend = 750, yend = 132 ))+
         labs(y = "price ", title = "Forecast Stock Prices")

train <- fbdf %>%
  filter(Date >= as.Date("2017-01-01") & Date <= as.Date("2017-01-31"))



fb2 <- fbdf %>%
  model(
    Mean = MEAN(Close),
    `Naïve` = NAIVE(Close),
    `Seasonal naïve` = SNAIVE(Close ~ lag("month")),
    Drift = NAIVE(Close ~ drift())
  )

## Warning: 1 error encountered for Seasonal naïve
## [1] invalid 'times' argument

f2 <- fb2 %>%
  forecast(data = train)


f2 %>%
  autoplot(fbdf, level = NULL) +
  autolayer(train, Close, colour = "black") +
  labs(y = "dollar",
       title = "Facebook daily stock prices") +
  guides(colour = guide_legend(title = "Forecast"))

## Warning: Removed 2 row(s) containing missing values (geom_path).

I think the drift method is the best because it covers the actual trend.

Question 3

aus_livestock %>%
  filter(State=="Victoria" & Animal=="Calves") %>%
  autoplot(Count)

aus<-aus_livestock %>%
  filter(State=="Victoria" & Animal=="Calves")
aus %>%
  model(SNAIVE(Count ~ lag(" 5 year"))) %>%
  forecast(h = "5 years") %>%
  autoplot(aus_livestock, level = NULL) +
  labs(title = " Victorian livestock")

Yes, it is a reasonable benchmark for the data

Question 4 a. true, good residuals should be normally distributed b.false, Good forecasts should have a zero residual mean which is not necessarily true for small residuals c. false, with low amounts of data there are better forecasts d. false, it might just need the data cleaned a little more e. true, you cant just rely on training data

Question 5

set.seed(23456789)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

myseries_train<-myseries%>%
  filter_index("1982 Apr" ~ "2010 Dec")

myseries_train %>%
  model(SNAIVE(Turnover ~ lag(" 5 year"))) %>%
  forecast(h = "5 years") %>%
  autoplot(myseries_train, level = NULL) +
  labs(title = " Turnover before 2011")

autoplot(myseries)

## Plot variable not specified, automatically selected `.vars = Turnover`

I did not do this one because I could not get the Augment function to work properly
my forecast failed to pick up on the overall increasing trend in the data. The actual results followed this trend.
not very

Question 6.

aus<-aus_livestock%>%
  filter(Animal=="Pigs" & State=="New South Wales")
autoplot(aus, Count)

aus_fit <- aus %>%
  filter(row_number()<484) 
aus_train<-aus_fit%>%
  model(
    Seasonal_naive = SNAIVE(Count),
    Naive = NAIVE(Count),
    Drift = RW(Count ~ drift()),
    Mean = MEAN(Count))
aus_fc<-aus_train%>%
  forecast(h=10)
aus_fc %>%
  autoplot(aus_fit, level = NULL) +
  labs(title = "Clay brick production in Australia",
       y = "Millions of bricks") +
  guides(colour = guide_legend(title = "Forecast"))

The Seasonal Naive method looks like the best fit to me. Like i mentioned earlier, I am unable to check the residuals because the augment function wont work for me.

Question 7.

brick<-aus_production%>%
  select(Bricks)
brick

## # A tsibble: 218 x 2 [1Q]
##    Bricks Quarter
##     <dbl>   <qtr>
##  1    189 1956 Q1
##  2    204 1956 Q2
##  3    208 1956 Q3
##  4    197 1956 Q4
##  5    187 1957 Q1
##  6    214 1957 Q2
##  7    227 1957 Q3
##  8    222 1957 Q4
##  9    199 1958 Q1
## 10    229 1958 Q2
## # … with 208 more rows

#bricksq %>%
 # stl(t.window=30, s.window="periodic", robust=TRUE) %>%
#  autoplot()
#brick %>%
 # model(STL(Bricks ~ season(window=5), robust=TRUE)) %>%
  #components() %>% autoplot() +
   # labs(title = "STL decomposition: US retail employment")

No matter what I could not get an STL to work for this data

Question 8

gc_tourism<-tourism%>%
  filter(Region=="Gold Coast")%>%
  summarise(Purpose = sum(Trips))


gc_tourism

## # A tsibble: 80 x 2 [1Q]
##    Quarter Purpose
##      <qtr>   <dbl>
##  1 1998 Q1    827.
##  2 1998 Q2    681.
##  3 1998 Q3    839.
##  4 1998 Q4    820.
##  5 1999 Q1    987.
##  6 1999 Q2    751.
##  7 1999 Q3    822.
##  8 1999 Q4    914.
##  9 2000 Q1    871.
## 10 2000 Q2    780.
## # … with 70 more rows

gc_train_1 <- gc_tourism %>% slice(1:(n()-4))

gc_train_2 <- gc_tourism %>% slice(1:(n()-8))

gc_train_3 <- gc_tourism %>% slice(1:(n()-12))
gc_fc_1<-gc_train_1

gc_fc_1 %>%
  model(SNAIVE(Purpose ~ lag(" 5 year"))) %>%
  forecast(h = "1 years") %>%
  autoplot(gc_fc_1, level = NULL) +
  labs(title = " Forecast 1")

gc_fc_2<-gc_train_2

gc_fc_2 %>%
  model(SNAIVE(Purpose ~ lag(" 5 year"))) %>%
  forecast(h = "1 years") %>%
  autoplot(gc_fc_2, level = NULL) +
  labs(title = " Forecast 2")

gc_fc_3<-gc_train_3

gc_fc_3 %>%
  model(SNAIVE(Purpose ~ lag(" 5 year"))) %>%
  forecast(h = "1 years") %>%
  autoplot(gc_fc_3, level = NULL) +
  labs(title = " Forecast 3")

#accuracy(gc_train_1)

I cannot get the accuracy function to work.

Question 9.

apple<-gafa_stock%>%
  filter(Symbol=="AAPL")

apple%>%
autoplot(Close)

apple_stretch <- apple %>%
  stretch_tsibble(.init = 10, .step = 1)
update_tsibble(apple_stretch, regular = TRUE)

## # A tsibble: 791,866 x 9 [1D]
## # Key:       .id, Symbol [1,249]
##    Symbol Date        Open  High   Low Close Adj_Close    Volume   .id
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>     <dbl> <int>
##  1 AAPL   2014-01-02  79.4  79.6  78.9  79.0      67.0  58671200     1
##  2 AAPL   2014-01-03  79.0  79.1  77.2  77.3      65.5  98116900     1
##  3 AAPL   2014-01-06  76.8  78.1  76.2  77.7      65.9 103152700     1
##  4 AAPL   2014-01-07  77.8  78.0  76.8  77.1      65.4  79302300     1
##  5 AAPL   2014-01-08  77.0  77.9  77.0  77.6      65.8  64632400     1
##  6 AAPL   2014-01-09  78.1  78.1  76.5  76.6      65.0  69787200     1
##  7 AAPL   2014-01-10  77.1  77.3  75.9  76.1      64.5  76244000     1
##  8 AAPL   2014-01-13  75.7  77.5  75.7  76.5      64.9  94623200     1
##  9 AAPL   2014-01-14  76.9  78.1  76.8  78.1      66.1  83140400     1
## 10 AAPL   2014-01-15  79.1  80.0  78.8  79.6      67.5  97909700     1
## # … with 791,856 more rows

fit <- apple_stretch %>%
  model(RW(Close ~ drift()))

## Warning: 1249 errors (1 unique) encountered for RW(Close ~ drift())
## [1249] .data is an irregular time series, which this model does not support. You should consider if your data can be made regular, and use `tsibble::update_tsibble(.data, regular = TRUE)` if appropriate.

fit_mean<-apple_stretch%>%
  model(mean=MEAN(Close))
  
apple %>% model(RW(Close ~ drift())) %>% accuracy()

## Warning: 1 error encountered for RW(Close ~ drift())
## [1] .data is an irregular time series, which this model does not support. You should consider if your data can be made regular, and use `tsibble::update_tsibble(.data, regular = TRUE)` if appropriate.

## # A tibble: 1 × 11
##   Symbol .model            .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>             <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL   RW(Close ~ drift… Trai…   NaN   NaN   NaN   NaN   NaN   NaN   NaN    NA

I cant figure out how to make my data regular which is throwing off parts of it

question 10.

#wheat_price<-prices%>%
#  select(wheat)
#wheat_price%>%
#  autoplot(wheat)
#wheat_stretch <- wheat_price %>%
#  stretch_tsibble(.init = 3, .step = 1) 

#fit_wheat <- wheat_stretch %>%
#  model(RW(wheat ~ drift()))

#boot<-wheat_price%>%
 # model(NAIVE(wheat))
#sim<-fit%>%generate(h=30,times=5, bootstrap = TRUE)
#sim
#bk <- fit %>% forecast(h = 30, bootstrap = TRUE, times=500)
#bk

At this point, I cant figure out anything new and my code is late, so Im just going to turn it in.

homework 3

2022-10-02