title: “3” author: “Matt Mullis” date: “09/30/22” output: html_document —
library(fpp3)
# install and load any package necessary
#a
global_economy %>%
filter(Country=="Australia") %>%
model(RW(Population~drift())) %>%
forecast(h=5) %>%
autoplot(global_economy)
#b
bricks <- aus_production%>%
filter_index("1958 Q1"~"2004 Q2") %>%
select(Bricks)
bricks_fit <- bricks %>% model(SNAIVE(Bricks ~ lag("year"))) %>%
forecast(h = "5 years")
autoplot(bricks_fit, level= NULL)+
autolayer(bricks)
## Plot variable not specified, automatically selected `.vars = Bricks`
#c
nswlamb <- aus_livestock %>%
filter(State== "New South Wales") %>%
filter(Animal == "Lambs")
nswfc <- nswlamb %>% model(SNAIVE(Count~lag("year"))) %>%
forecast(h= "5 years")
autoplot(nswfc, level= NULL)+
autolayer(nswlamb)
## Plot variable not specified, automatically selected `.vars = Count`
#d
hh_budget %>%
model(RW(Wealth~drift())) %>%
forecast(h=5) %>%
autoplot(hh_budget)
#e
out <- aus_retail %>%
filter(Industry== "Takeaway food services")
outfc <- out %>%
model(RW(Turnover~drift())) %>%
forecast(h=20)
autoplot(outfc)+
autolayer(out)
## Plot variable not specified, automatically selected `.vars = Turnover`
#A
fbone <- gafa_stock %>%
filter(Symbol == 'FB', year(Date) >= 2015) %>%
mutate(day = row_number()) %>%
update_tsibble(index = day, regular = TRUE) %>%
select(Date, Close)
fbone %>%
autoplot(Close)
#b and c
fbone %>%
model(Drift = RW(Close ~ drift())) %>%
forecast(h = 100) %>%
autoplot(fbone) +
geom_line(data = slice(fbone, range(cumsum(!is.na(Close)))),
aes(y=Close), linetype = "dashed", color = "blue")
# d( Mean and Naive)
fbone %>%
model(MEAN(Close)) %>%
forecast(h=200) %>%
autoplot(fbone)
fbone %>%
model(NAIVE(Close)) %>%
forecast(h = 100) %>%
autoplot(fbone)
#because this is a stock, and stocks are unpredictable, so Naive method is definitely the best.
bbs <- aus_livestock %>%
filter(Animal=="Bulls, bullocks and steers") %>%
filter(State== "Victoria")
bbsfc <- bbs %>% model(SNAIVE(Count~lag("year"))) %>%
forecast(h= "5 years")
autoplot(bbsfc, level= NULL)+
autolayer(bbs)+
labs(title="Bulls and Steers")
## Plot variable not specified, automatically selected `.vars = Count`
cat <- aus_livestock %>%
filter(Animal=="Cattle (excl. calves)") %>%
filter(State=="Victoria")
catfc <- cat %>% model(SNAIVE(Count~lag("year"))) %>%
forecast(h= "5 years")
autoplot(catfc, level= NULL)+
autolayer(cat)+
labs(title="cattle")
## Plot variable not specified, automatically selected `.vars = Count`
cow <- aus_livestock %>%
filter(Animal=="Cows and heifers") %>%
filter(State=="Victoria")
cowfc <- cow %>% model(SNAIVE(Count~lag("year"))) %>%
forecast(h= "5 years")
autoplot(cowfc, level= NULL)+
autolayer(cow)+
labs(title="Cows")
## Plot variable not specified, automatically selected `.vars = Count`
lam <- aus_livestock %>%
filter(Animal=="Lambs") %>%
filter(State=="Victoria")
lamfc <- lam %>% model(SNAIVE(Count~lag("year"))) %>%
forecast(h= "5 years")
autoplot(lamfc, level= NULL)+
autolayer(lam)+
labs(title="Lambs")
## Plot variable not specified, automatically selected `.vars = Count`
pig <- aus_livestock %>%
filter(Animal=="Pigs") %>%
filter(State=="Victoria")
pigfc <- pig %>% model(SNAIVE(Count~lag("year"))) %>%
forecast(h= "5 years")
autoplot(pigfc, level= NULL)+
autolayer(pig)+
labs(title="pigs")
## Plot variable not specified, automatically selected `.vars = Count`
she <- aus_livestock %>%
filter(Animal=="Sheep") %>%
filter(State=="Victoria")
shefc <- she %>% model(SNAIVE(Count~lag("year"))) %>%
forecast(h= "5 years")
autoplot(shefc, level= NULL)+
autolayer(she)+
labs(title="Sheep")
## Plot variable not specified, automatically selected `.vars = Count`
cal <- aus_livestock %>%
filter(Animal=="Calves") %>%
filter(State=="Victoria")
calfc <- cal %>% model(SNAIVE(Count~lag("5 years"))) %>%
forecast(h= "5 years")
autoplot(calfc, level= NULL)+
autolayer(cal)+
labs(title="Calves")
## Plot variable not specified, automatically selected `.vars = Count`
#i would say yes, this is a reasonable benchmark for the series. the data is very seasonal. It might not be the best forecast in general, but of the four benchmarks we've done, this is probably the most accurate. I wish there could be something better to capture trends for series such as Lambs and Bulls and Steers.
1 is true. Having this can make forecasting easier, but it doesnt mean your forecast is bad if you don’t have it. 2. is true. in order to have a small RMSE, which tells you if a forecast is good, you must have small residuals. However, this isn’t the only thing that makes a forecast good. 3. is false. MAPE can be infinite or undefined if yt=0. Its not symetric,as it puts a heavier penelty on negative errors than positive errors. 4. is false. According to the book, overfitting the model to data is just as bad as not identifying patterns in the data. 5. is true. however, the forecast can still be improved. Sometimes, the best method is unclear as well.
set.seed(991)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
head(myseries)
## # A tsibble: 6 x 5 [1M]
## # Key: State, Industry [1]
## State Industry `Series ID` Month Turnover
## <chr> <chr> <chr> <mth> <dbl>
## 1 Western Australia Other retailing n.e.c. A3349908R 1982 Apr 11.3
## 2 Western Australia Other retailing n.e.c. A3349908R 1982 May 11.6
## 3 Western Australia Other retailing n.e.c. A3349908R 1982 Jun 10.9
## 4 Western Australia Other retailing n.e.c. A3349908R 1982 Jul 11.4
## 5 Western Australia Other retailing n.e.c. A3349908R 1982 Aug 10.9
## 6 Western Australia Other retailing n.e.c. A3349908R 1982 Sep 11.7
myseries2 <- myseries %>%
filter_index("1982 Apr"~"2010 Dec")
autoplot(myseries, Turnover) +
autolayer(myseries2, Turnover, colour = "red")
#c
fit <- myseries2 %>%
model(SNAIVE(Turnover))
#d
fit %>% gg_tsresiduals()
## Warning: Removed 12 row(s) containing missing values (geom_path).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing non-finite values (stat_bin).
#the residuals appear to have a lot of autocorrelation, it is definitely not a normal distribution. The correlation decreases thru time.
#e
myseries3 <- myseries %>%
filter_index("1982 Apr" ~ .)
plot_fc <- myseries2 %>%
model(seasonal_naive = SNAIVE(Turnover)) %>%
forecast(h = 100) %>%
autoplot(myseries3)
plot_fc
#f the test data was significantly higher than the forecast of the training data,
#literally exceeded the 80% confidence interval.
#g
#challange
pig <- aus_livestock %>%
filter(Animal== "Pigs") %>%
filter(State=="New South Wales")
autoplot(pig)
## Plot variable not specified, automatically selected `.vars = Count`
pigs_frequency <- pig$Count
hist(pigs_frequency)
#b
pigstrain <- pig %>%
slice(1:486)
autoplot(pigstrain)
## Plot variable not specified, automatically selected `.vars = Count`
#c
pigfit <- pigstrain %>%
model(
Mean = MEAN(Count),
`Naïve` = NAIVE(Count),
Drift = NAIVE(Count ~ drift()),
`Seasonal naïve` = SNAIVE(Count))
pigfc <- pigfit %>% forecast(h=100)
autoplot(pigfc, level= NULL)+
autolayer(pig)
## Plot variable not specified, automatically selected `.vars = Count`
#I think seasonal works the best, the data is clearly seasonal.
#d
blah <- pigstrain %>%
model(SNAIVE(Count))
blah %>% gg_tsresiduals()
## Warning: Removed 12 row(s) containing missing values (geom_path).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing non-finite values (stat_bin).
#the correlation decreases as time goes on, but I still would not call this a white noise.
#exercise 7
mbricks <- aus_production %>%
select(Quarter, Bricks) %>%
slice(1:198)
autoplot(mbricks)
## Plot variable not specified, automatically selected `.vars = Bricks`
#a
mbricks %>%
model(
STL(Bricks ~ trend(window = 7) +
season(window = "periodic"),
robust = TRUE)) %>%
components() %>%
autoplot()
mbricks %>%
model(
STL(box_cox(Bricks,0) ~ trend(window = 7) +
season(window = "periodic"),
robust = TRUE)) %>%
components() %>%
autoplot()
mbricks %>%
model(
STL(box_cox(Bricks,0) ~ trend(window = 7) +
season(window = 8),
robust = TRUE)) %>%
components() %>%
autoplot()
#b
dcmp <- mbricks %>%
model(stl=STL(Bricks))
components(dcmp)
## # A dable: 198 x 7 [1Q]
## # Key: .model [1]
## # : Bricks = trend + season_year + remainder
## .model Quarter Bricks trend season_year remainder season_adjust
## <chr> <qtr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stl 1956 Q1 189 204. -21.3 5.98 210.
## 2 stl 1956 Q2 204 202. 3.35 -1.39 201.
## 3 stl 1956 Q3 208 201. 16.6 -9.17 191.
## 4 stl 1956 Q4 197 200. 1.44 -4.26 196.
## 5 stl 1957 Q1 187 204. -21.5 4.26 208.
## 6 stl 1957 Q2 214 209. 3.29 1.24 211.
## 7 stl 1957 Q3 227 214. 16.8 -3.64 210.
## 8 stl 1957 Q4 222 217. 1.46 3.10 221.
## 9 stl 1958 Q1 199 222. -21.6 -1.41 221.
## 10 stl 1958 Q2 229 226. 3.25 -0.428 226.
## # … with 188 more rows
components(dcmp) %>%
as_tsibble() %>%
autoplot(Bricks, color="Red")+
geom_line(aes(y=trend), colour = "blue")
Adcmp <- components(dcmp)
#c
trend <- Adcmp %>%
select(-c(.model,Bricks, trend, season_year, remainder))
trend %>%
model(NAIVE(season_adjust)) %>%
forecast(h = "5 years") %>%
autoplot(trend)
#d
tttt <- mbricks %>%
model(stlf = decomposition_model(STL(Bricks ~ trend(window = 4), robust = TRUE), NAIVE(season_adjust)))
tttt %>%
forecast(h=5) %>%
autoplot(mbricks)
#e
gg_tsresiduals(tttt)
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_bin).
gg_tsresiduals(dcmp)
#exercise 8
gc_tourism <- tourism %>%
as.tibble() %>%
filter(Region=="Gold Coast") %>%
group_by(Quarter) %>%
summarise(Total= sum(Trips))
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
ungroup(gc_tourism)
## # A tibble: 80 × 2
## Quarter Total
## <qtr> <dbl>
## 1 1998 Q1 827.
## 2 1998 Q2 681.
## 3 1998 Q3 839.
## 4 1998 Q4 820.
## 5 1999 Q1 987.
## 6 1999 Q2 751.
## 7 1999 Q3 822.
## 8 1999 Q4 914.
## 9 2000 Q1 871.
## 10 2000 Q2 780.
## # … with 70 more rows
gc_tourism <- as_tsibble(gc_tourism, index=Quarter )
autoplot(gc_tourism)
## Plot variable not specified, automatically selected `.vars = Total`
gc_train_1 <- gc_tourism %>%
slice(1:(n()-29))
gc_train_2 <- gc_tourism %>%
slice(5:(n()-24))
gc_train_3 <- gc_tourism %>%
slice(1:(n()-24))
head(gc_train_1)
## # A tsibble: 6 x 2 [1Q]
## Quarter Total
## <qtr> <dbl>
## 1 1998 Q1 827.
## 2 1998 Q2 681.
## 3 1998 Q3 839.
## 4 1998 Q4 820.
## 5 1999 Q1 987.
## 6 1999 Q2 751.
gc_fc_1 <- gc_train_1 %>%
model(SNAIVE(Total~lag(4))) %>%
forecast(h=4)
autoplot(gc_fc_1)+
autolayer(gc_tourism)
## Plot variable not specified, automatically selected `.vars = Total`
gc_fc_2 <- gc_train_2 %>%
model(SNAIVE(Total~lag(4))) %>%
forecast(h=4)
autoplot(gc_fc_2)+
autolayer(gc_tourism)
## Plot variable not specified, automatically selected `.vars = Total`
gc_fc_3 <- gc_train_3 %>%
model(SNAIVE(Total~lag(4))) %>%
forecast(h=4)
autoplot(gc_fc_3)+
autolayer(gc_tourism)
## Plot variable not specified, automatically selected `.vars = Total`
accuracy(gc_fc_1, gc_tourism)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Total ~ lag(4)) Test -38.8 62.6 51.4 -5.06 6.44 0.880 0.876 0.275
accuracy(gc_fc_2, gc_tourism)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Total ~ lag(4)) Test 63.0 68.2 63.0 6.98 6.98 1.11 0.974 0.218
accuracy(gc_fc_3, gc_tourism)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Total ~ lag(4)) Test 63.0 68.2 63.0 6.98 6.98 1.11 0.974 0.218
#it looks like my 2nd and 3rd forecasts have the lowest mape values, so they are probably the best.
#exercise 9
#a
ap <- gafa_stock %>%
filter(Symbol=="AAPL") %>%
select(Date, Close) %>%
mutate(row=row_number()) %>%
update_tsibble(index=row, regular=TRUE)
autoplot(ap)+
labs(title="Apple Closing stock price")
## Plot variable not specified, automatically selected `.vars = Close`
#b
apte <- ap %>%
stretch_tsibble(.init=10, .step=1)
#c
aprw <- apte %>%
model(RW(Close~drift())) %>%
forecast(h=1)
#d
apm <- apte %>%
model(MEAN(Close)) %>%
forecast(h=1)
accuracy(aprw, ap)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 1259
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 RW(Close ~ drift()) Test -0.0158 2.10 1.41 -0.0139 1.06 1.00 1.00 0.0330
accuracy(apm, ap)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 1259
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MEAN(Close) Test 26.9 37.6 28.5 16.9 18.6 20.2 17.9 0.996
#the random walk is definetly the best because it has the lowest RMSE by far.
#exercise 10
w <- prices %>%
select(wheat) %>%
slice(1:197)
wrw <- w %>%
model(RW(wheat~drift())) %>%
forecast(h=50)
autoplot(wrw)+
autolayer(w)
## Plot variable not specified, automatically selected `.vars = wheat`
#c
wbs <- w %>%
model(RW(wheat~drift()))
wsim <- wbs %>%
generate(h=50, times=500, bootstrap=TRUE)
wsim
## # A tsibble: 25,000 x 4 [1Y]
## # Key: .model, .rep [500]
## .model year .rep .sim
## <chr> <dbl> <chr> <dbl>
## 1 RW(wheat ~ drift()) 1997 1 25.3
## 2 RW(wheat ~ drift()) 1998 1 38.9
## 3 RW(wheat ~ drift()) 1999 1 -57.4
## 4 RW(wheat ~ drift()) 2000 1 -38.7
## 5 RW(wheat ~ drift()) 2001 1 -57.9
## 6 RW(wheat ~ drift()) 2002 1 -66.0
## 7 RW(wheat ~ drift()) 2003 1 -38.9
## 8 RW(wheat ~ drift()) 2004 1 -39.0
## 9 RW(wheat ~ drift()) 2005 1 -12.5
## 10 RW(wheat ~ drift()) 2006 1 -5.30
## # … with 24,990 more rows
w %>%
ggplot(aes(x = year)) +
geom_line(aes(y = wheat)) +
geom_line(aes(y = .sim, colour = as.factor(.rep)),
data = wsim) +
guides(colour = "none")
#e
#they might be appropriate when the residuals arent normally distributed, because bootstrapping assumes that residuals are uncorrelated and have constant variance.