Question 1
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(tsibble)
##
## Attaching package: 'tsibble'
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
library(tsibbledata)
library(ggplot2)
library(fable)
## Loading required package: fabletools
pop=global_economy %>%
filter(Country == "Australia")
autoplot(pop,Population) +
labs(title = "Australian Population")
pop%>%
model(RW(Population ~ drift())) %>%
forecast(h = "10 years") %>%
autoplot(global_economy) +
labs(y = "people", title = "Australian Population")
aus_production %>%
autoplot(Bricks)
## Warning: Removed 20 row(s) containing missing values (geom_path).
brick<-aus_production %>%
filter(!is.na(Bricks))
brick %>%
model(SNAIVE(Bricks ~ lag("year")))%>%
forecast(h = 10 )%>%
autoplot(brick)+
labs(title="SNAIVE Forecast ",
xlab="Year" )
aus_livestock %>%
filter(State=="New South Wales" & Animal=="Lambs") %>%
autoplot(Count)
train=aus_livestock %>%
filter(State=="New South Wales" & Animal=="Lambs")
train_fit=train%>%
model(Mean = MEAN(Count),
`Seasonal Naïve` = SNAIVE(Count),
`Naïve` = NAIVE(Count),
Drift= NAIVE(Count~drift()))
train_fit %>%
forecast(h = 10)%>%
autoplot(train)
hh_budget %>%
autoplot(Wealth)
budget <- hh_budget %>%
model( naive = NAIVE(Wealth))
budget %>%
forecast(h = 10)%>%
autoplot(hh_budget) +
labs(title="Drift Forecast")
aus_retail %>%
filter(State=="Australian Capital Territory" & Industry=="Takeaway food services") %>%
autoplot(Turnover)
train= aus_retail %>%
filter(State=="Australian Capital Territory" & Industry=="Takeaway food services")
train_data=train%>%
model(`Seasonal Naïve` = SNAIVE(Turnover),
`Naïve` = NAIVE(Turnover),
Drift= NAIVE(Turnover~drift()))
train_data %>%
forecast(h = 10)%>%
autoplot(train)
Question 2
meta<-gafa_stock %>%
filter(Symbol=="FB")
meta%>%autoplot(Close)+labs(y="price", title="Facebook Closing prices")
fbdf <- meta%>%
mutate(Close = as.numeric(meta$Close)) %>%
filter_index("2016-01-01" ~ "2019-01-01") %>%
mutate(day = row_number()) %>%
update_tsibble(index = day, regular = TRUE) %>%
select(Date, Close)
fbdf%>%
model(RW(Close ~ drift())) %>%
forecast(h = 180) %>%
autoplot(fbdf) +
labs(y = "price", title = " Forecast Stock Prices ")
fbdf %>%
model(RW(Close~ drift())) %>%
forecast(h = 180) %>%
autoplot(fbdf) +
geom_line(aes(x = day, y = Close))+
geom_segment(aes (x =0 , y = 102, xend = 750, yend = 132 ))+
labs(y = "price ", title = "Forecast Stock Prices")
train <- fbdf %>%
filter(Date >= as.Date("2017-01-01") & Date <= as.Date("2017-01-31"))
fb2 <- fbdf %>%
model(
Mean = MEAN(Close),
`Naïve` = NAIVE(Close),
`Seasonal naïve` = SNAIVE(Close ~ lag("month")),
Drift = NAIVE(Close ~ drift())
)
## Warning: 1 error encountered for Seasonal naïve
## [1] invalid 'times' argument
f2 <- fb2 %>%
forecast(data = train)
f2 %>%
autoplot(fbdf, level = NULL) +
autolayer(train, Close, colour = "black") +
labs(y = "dollar",
title = "Facebook daily stock prices") +
guides(colour = guide_legend(title = "Forecast"))
## Warning: Removed 2 row(s) containing missing values (geom_path).
I think the drift method is the best because it covers the actual trend.
Question 3
aus_livestock %>%
filter(State=="Victoria" & Animal=="Calves") %>%
autoplot(Count)
aus<-aus_livestock %>%
filter(State=="Victoria" & Animal=="Calves")
aus %>%
model(SNAIVE(Count ~ lag(" 5 year"))) %>%
forecast(h = "5 years") %>%
autoplot(aus_livestock, level = NULL) +
labs(title = " Victorian livestock")
Yes, it is a reasonable benchmark for the data
Question 4 a. true, good residuals should be normally distributed b.false, Good forecasts should have a zero residual mean which is not necessarily true for small residuals c. false, with low amounts of data there are better forecasts d. false, it might just need the data cleaned a little more e. true, you cant just rely on training data
Question 5
set.seed(23456789)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries_train<-myseries%>%
filter_index("1982 Apr" ~ "2010 Dec")
myseries_train %>%
model(SNAIVE(Turnover ~ lag(" 5 year"))) %>%
forecast(h = "5 years") %>%
autoplot(myseries_train, level = NULL) +
labs(title = " Turnover before 2011")
autoplot(myseries)
## Plot variable not specified, automatically selected `.vars = Turnover`
Question 6.
aus<-aus_livestock%>%
filter(Animal=="Pigs" & State=="New South Wales")
autoplot(aus, Count)
aus_fit <- aus %>%
filter(row_number()<484)
aus_train<-aus_fit%>%
model(
Seasonal_naive = SNAIVE(Count),
Naive = NAIVE(Count),
Drift = RW(Count ~ drift()),
Mean = MEAN(Count))
aus_fc<-aus_train%>%
forecast(h=10)
aus_fc %>%
autoplot(aus_fit, level = NULL) +
labs(title = "Clay brick production in Australia",
y = "Millions of bricks") +
guides(colour = guide_legend(title = "Forecast"))
The Seasonal Naive method looks like the best fit to me. Like i mentioned earlier, I am unable to check the residuals because the augment function wont work for me.
Question 7.
brick<-aus_production%>%
select(Bricks)
brick
## # A tsibble: 218 x 2 [1Q]
## Bricks Quarter
## <dbl> <qtr>
## 1 189 1956 Q1
## 2 204 1956 Q2
## 3 208 1956 Q3
## 4 197 1956 Q4
## 5 187 1957 Q1
## 6 214 1957 Q2
## 7 227 1957 Q3
## 8 222 1957 Q4
## 9 199 1958 Q1
## 10 229 1958 Q2
## # … with 208 more rows
#bricksq %>%
# stl(t.window=30, s.window="periodic", robust=TRUE) %>%
# autoplot()
#brick %>%
# model(STL(Bricks ~ season(window=5), robust=TRUE)) %>%
#components() %>% autoplot() +
# labs(title = "STL decomposition: US retail employment")
No matter what I could not get an STL to work for this data
Question 8
gc_tourism<-tourism%>%
filter(Region=="Gold Coast")%>%
summarise(Purpose = sum(Trips))
gc_tourism
## # A tsibble: 80 x 2 [1Q]
## Quarter Purpose
## <qtr> <dbl>
## 1 1998 Q1 827.
## 2 1998 Q2 681.
## 3 1998 Q3 839.
## 4 1998 Q4 820.
## 5 1999 Q1 987.
## 6 1999 Q2 751.
## 7 1999 Q3 822.
## 8 1999 Q4 914.
## 9 2000 Q1 871.
## 10 2000 Q2 780.
## # … with 70 more rows
gc_train_1 <- gc_tourism %>% slice(1:(n()-4))
gc_train_2 <- gc_tourism %>% slice(1:(n()-8))
gc_train_3 <- gc_tourism %>% slice(1:(n()-12))
gc_fc_1<-gc_train_1
gc_fc_1 %>%
model(SNAIVE(Purpose ~ lag(" 5 year"))) %>%
forecast(h = "1 years") %>%
autoplot(gc_fc_1, level = NULL) +
labs(title = " Forecast 1")
gc_fc_2<-gc_train_2
gc_fc_2 %>%
model(SNAIVE(Purpose ~ lag(" 5 year"))) %>%
forecast(h = "1 years") %>%
autoplot(gc_fc_2, level = NULL) +
labs(title = " Forecast 2")
gc_fc_3<-gc_train_3
gc_fc_3 %>%
model(SNAIVE(Purpose ~ lag(" 5 year"))) %>%
forecast(h = "1 years") %>%
autoplot(gc_fc_3, level = NULL) +
labs(title = " Forecast 3")
#accuracy(gc_train_1)
Question 9.
apple<-gafa_stock%>%
filter(Symbol=="AAPL")
apple%>%
autoplot(Close)
apple_stretch <- apple %>%
stretch_tsibble(.init = 10, .step = 1)
update_tsibble(apple_stretch, regular = TRUE)
## # A tsibble: 791,866 x 9 [1D]
## # Key: .id, Symbol [1,249]
## Symbol Date Open High Low Close Adj_Close Volume .id
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int>
## 1 AAPL 2014-01-02 79.4 79.6 78.9 79.0 67.0 58671200 1
## 2 AAPL 2014-01-03 79.0 79.1 77.2 77.3 65.5 98116900 1
## 3 AAPL 2014-01-06 76.8 78.1 76.2 77.7 65.9 103152700 1
## 4 AAPL 2014-01-07 77.8 78.0 76.8 77.1 65.4 79302300 1
## 5 AAPL 2014-01-08 77.0 77.9 77.0 77.6 65.8 64632400 1
## 6 AAPL 2014-01-09 78.1 78.1 76.5 76.6 65.0 69787200 1
## 7 AAPL 2014-01-10 77.1 77.3 75.9 76.1 64.5 76244000 1
## 8 AAPL 2014-01-13 75.7 77.5 75.7 76.5 64.9 94623200 1
## 9 AAPL 2014-01-14 76.9 78.1 76.8 78.1 66.1 83140400 1
## 10 AAPL 2014-01-15 79.1 80.0 78.8 79.6 67.5 97909700 1
## # … with 791,856 more rows
fit <- apple_stretch %>%
model(RW(Close ~ drift()))
## Warning: 1249 errors (1 unique) encountered for RW(Close ~ drift())
## [1249] .data is an irregular time series, which this model does not support. You should consider if your data can be made regular, and use `tsibble::update_tsibble(.data, regular = TRUE)` if appropriate.
fit_mean<-apple_stretch%>%
model(mean=MEAN(Close))
apple %>% model(RW(Close ~ drift())) %>% accuracy()
## Warning: 1 error encountered for RW(Close ~ drift())
## [1] .data is an irregular time series, which this model does not support. You should consider if your data can be made regular, and use `tsibble::update_tsibble(.data, regular = TRUE)` if appropriate.
## # A tibble: 1 × 11
## Symbol .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL RW(Close ~ drift… Trai… NaN NaN NaN NaN NaN NaN NaN NA
question 10.
#wheat_price<-prices%>%
# select(wheat)
#wheat_price%>%
# autoplot(wheat)
#wheat_stretch <- wheat_price %>%
# stretch_tsibble(.init = 3, .step = 1)
#fit_wheat <- wheat_stretch %>%
# model(RW(wheat ~ drift()))
#boot<-wheat_price%>%
# model(NAIVE(wheat))
#sim<-fit%>%generate(h=30,times=5, bootstrap = TRUE)
#sim
#bk <- fit %>% forecast(h = 30, bootstrap = TRUE, times=500)
#bk
At this point, I cant figure out anything new and my code is late, so Im just going to turn it in.