library(fpp2)
library(ggplot2)
library(ggthemes)
library(tidyverse)
library(kableExtra)
elec <- head(elecdaily,20)
plot1 <- autoplot(elec, facets = TRUE) + geom_line() +
xlab("Time") + ylab("") + ggtitle("Electricity Demand for Victoria, Australia")
plot1
lm1 <- tslm(Demand ~ Temperature, data = elec)
summary(lm1)
##
## Call:
## tslm(formula = Demand ~ Temperature, data = elec)
##
## Residuals:
## Min 1Q Median 3Q Max
## -46.060 -7.117 -1.437 17.484 27.102
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.2117 17.9915 2.179 0.0428 *
## Temperature 6.7572 0.6114 11.052 1.88e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22 on 18 degrees of freedom
## Multiple R-squared: 0.8716, Adjusted R-squared: 0.8644
## F-statistic: 122.1 on 1 and 18 DF, p-value: 1.876e-09
elecdf <- as.data.frame(elec)
plot2 <- ggplot(data=elecdf, aes(x=Temperature, y=Demand)) + ylab("Demand") + xlab("Temperature") +
geom_point() + geom_smooth(method = "lm", se=FALSE) +
ggtitle("Temperature and Electricity Demand")
plot2
There is a positive relationship between temperature and electricity usage, likely due to increased air conditioning usage as temperatures increase.
checkresiduals(lm1)
##
## Breusch-Godfrey test for serial correlation of order up to 5
##
## data: Residuals from Linear regression model
## LM test = 3.8079, df = 5, p-value = 0.5774
df <- elecdf
df[,"Residuals"] <- as.numeric(residuals(lm1))
p1 <- ggplot(df, aes(x=Demand, y=Residuals)) +
geom_point()
p2 <- ggplot(df, aes(x=Temperature, y=Residuals)) +
geom_point()
p3 <- ggplot(df, aes(x=Savings, y=Residuals)) +
geom_point()
p4 <- ggplot(df, aes(x=Unemployment, y=Residuals)) +
geom_point()
gridExtra::grid.arrange(p1, p2, nrow=1)
The model is adequate. No clear outliers.
Do you believe these forecasts?
lm1pred <- forecast(lm1, newdata=data.frame(Temperature=c(15,35)))
lm1pred
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 4.285714 140.5701 108.6810 172.4591 90.21166 190.9285
## 4.428571 275.7146 245.2278 306.2014 227.57056 323.8586
The forecasts are sensible. Higher temperatures means higher electricity demand.
autoplot(elec, facets=TRUE)
elec %>%
as.data.frame() %>%
ggplot(aes(x=Temperature, y=Demand)) +
geom_point() +
geom_smooth(method="lm", se=FALSE)
fit <- tslm(Demand ~ Temperature, data=elec)
checkresiduals(fit)
##
## Breusch-Godfrey test for serial correlation of order up to 5
##
## data: Residuals from Linear regression model
## LM test = 3.8079, df = 5, p-value = 0.5774
forecast(fit, newdata=data.frame(Temperature=c(15,35)))
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 4.285714 140.5701 108.6810 172.4591 90.21166 190.9285
## 4.428571 275.7146 245.2278 306.2014 227.57056 323.8586
elecdf2 <- as.data.frame(elecdaily)
plot3 <- ggplot(data=elecdf2, aes(x=Temperature, y=Demand)) + ylab("Demand") + xlab("Temperature") +
geom_point() +
ggtitle("Temperature and Electricity Demand")
plot3
The model needs further refinement given the limited data in the original dataset. Additionally, the model is not linear, since cold temperatures also show increased electric consumption.
Data set mens400 contains the winning times (in seconds) for the men’s 400 meters final in each Olympic Games from 1896 to 2016.
autoplot(mens400) + ggtitle("Winning Times for 400m") + xlab("Year") + ylab("Time")
Times have decreased with time. We can also see missing values during World War I and World War II.
year <- time(mens400)
lm2 <- tslm(mens400 ~ year, data = mens400)
summary(lm2)
##
## Call:
## tslm(formula = mens400 ~ year, data = mens400)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.6002 -0.5747 -0.2858 0.5751 4.1505
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 172.481477 11.487522 15.02 2.52e-14 ***
## year -0.064574 0.005865 -11.01 2.75e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.136 on 26 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.8234, Adjusted R-squared: 0.8166
## F-statistic: 121.2 on 1 and 26 DF, p-value: 2.752e-11
plot2b <- autoplot(mens400) + geom_smooth(method = "lm", se = FALSE)
plot2b
The winning time decreases by 0.0646 seconds per year, or roughly 0.26 seconds per Olympics.
checkresiduals(lm2)
##
## Breusch-Godfrey test for serial correlation of order up to 6
##
## data: Residuals from Linear regression model
## LM test = 3.6082, df = 6, p-value = 0.7295
The residuals indicate a suitable model, but the most recent observations show positive errors, which could signal that times are not improving as much as in the past.
pred2020 <- forecast(lm2, newdata=data.frame(year=2020))
pred2020
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2020 42.04231 40.44975 43.63487 39.55286 44.53176
Assuming a linear relationship, we expect a winning time of 42.04 seconds, with ranges between 40.45 and 43.63 (80%); and 39.55 and 44.53 (95%).
Type easter(ausbeer) and interpret what you see.
easter(ausbeer)
## Qtr1 Qtr2 Qtr3 Qtr4
## 1956 0.67 0.33 0.00 0.00
## 1957 0.00 1.00 0.00 0.00
## 1958 0.00 1.00 0.00 0.00
## 1959 1.00 0.00 0.00 0.00
## 1960 0.00 1.00 0.00 0.00
## 1961 0.33 0.67 0.00 0.00
## 1962 0.00 1.00 0.00 0.00
## 1963 0.00 1.00 0.00 0.00
## 1964 1.00 0.00 0.00 0.00
## 1965 0.00 1.00 0.00 0.00
## 1966 0.00 1.00 0.00 0.00
## 1967 1.00 0.00 0.00 0.00
## 1968 0.00 1.00 0.00 0.00
## 1969 0.00 1.00 0.00 0.00
## 1970 1.00 0.00 0.00 0.00
## 1971 0.00 1.00 0.00 0.00
## 1972 0.33 0.67 0.00 0.00
## 1973 0.00 1.00 0.00 0.00
## 1974 0.00 1.00 0.00 0.00
## 1975 1.00 0.00 0.00 0.00
## 1976 0.00 1.00 0.00 0.00
## 1977 0.00 1.00 0.00 0.00
## 1978 1.00 0.00 0.00 0.00
## 1979 0.00 1.00 0.00 0.00
## 1980 0.00 1.00 0.00 0.00
## 1981 0.00 1.00 0.00 0.00
## 1982 0.00 1.00 0.00 0.00
## 1983 0.00 1.00 0.00 0.00
## 1984 0.00 1.00 0.00 0.00
## 1985 0.00 1.00 0.00 0.00
## 1986 1.00 0.00 0.00 0.00
## 1987 0.00 1.00 0.00 0.00
## 1988 0.00 1.00 0.00 0.00
## 1989 1.00 0.00 0.00 0.00
## 1990 0.00 1.00 0.00 0.00
## 1991 1.00 0.00 0.00 0.00
## 1992 0.00 1.00 0.00 0.00
## 1993 0.00 1.00 0.00 0.00
## 1994 0.00 1.00 0.00 0.00
## 1995 0.00 1.00 0.00 0.00
## 1996 0.00 1.00 0.00 0.00
## 1997 1.00 0.00 0.00 0.00
## 1998 0.00 1.00 0.00 0.00
## 1999 0.00 1.00 0.00 0.00
## 2000 0.00 1.00 0.00 0.00
## 2001 0.00 1.00 0.00 0.00
## 2002 1.00 0.00 0.00 0.00
## 2003 0.00 1.00 0.00 0.00
## 2004 0.00 1.00 0.00 0.00
## 2005 1.00 0.00 0.00 0.00
## 2006 0.00 1.00 0.00 0.00
## 2007 0.00 1.00 0.00 0.00
## 2008 1.00 0.00 0.00 0.00
## 2009 0.00 1.00 0.00 0.00
## 2010 0.00 1.00
The table shows how much of easter week falls in each quarter. This would help predict sales around this holiday.
Express y as a function of x and show that the coefficient β1 is the elasticity coefficient.
\(\log(y) = \beta 0 + \beta1 \log x + \epsilon)\)
\(\beta1 = \frac{\log y - \beta0 - \epsilon}{\log x}\)
We see that \(\beta1\) is the elasticity as it shows the relative change between the forecast and explanatory variables.
The data set fancy concerns the monthly sales figures of a shop which opened in January 1987 and sells gifts, souvenirs, and novelties. The shop is situated on the wharf at a beach resort town in Queensland, Australia. The sales volume varies with the seasonal population of tourists. There is a large influx of visitors to the town at Christmas and for the local surfing festival, held every March since 1988. Over time, the shop has expanded its premises, range of products, and staff.
autoplot(fancy)
This is seasonal data. There is a clear uptick in sales every year during December, and a smaller uptick in March, as visitors flock the town during those months. Additionally, the expansion of the store has also increased sales year-to-year.
In a series like this one, logarithmic transformation addresses heteroscedasticity concerns in the model.
time <- time(fancy)
surf <- c()
for(i in 1:length(time)){
month <- round(12*(time[i] - floor(time[i]))) + 1
year <- floor(time[i])
if(year >= 1988 & month == 3){
surf[i] <- 1
} else {
surf[i] <- 0
}
}
lm5 <- tslm(BoxCox(fancy, 0) ~ trend + season + surf)
summary(lm5)
##
## Call:
## tslm(formula = BoxCox(fancy, 0) ~ trend + season + surf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.33673 -0.12757 0.00257 0.10911 0.37671
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.6196670 0.0742471 102.626 < 2e-16 ***
## trend 0.0220198 0.0008268 26.634 < 2e-16 ***
## season2 0.2514168 0.0956790 2.628 0.010555 *
## season3 0.2660828 0.1934044 1.376 0.173275
## season4 0.3840535 0.0957075 4.013 0.000148 ***
## season5 0.4094870 0.0957325 4.277 5.88e-05 ***
## season6 0.4488283 0.0957647 4.687 1.33e-05 ***
## season7 0.6104545 0.0958039 6.372 1.71e-08 ***
## season8 0.5879644 0.0958503 6.134 4.53e-08 ***
## season9 0.6693299 0.0959037 6.979 1.36e-09 ***
## season10 0.7473919 0.0959643 7.788 4.48e-11 ***
## season11 1.2067479 0.0960319 12.566 < 2e-16 ***
## season12 1.9622412 0.0961066 20.417 < 2e-16 ***
## surf 0.5015151 0.1964273 2.553 0.012856 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.179 on 70 degrees of freedom
## Multiple R-squared: 0.9567, Adjusted R-squared: 0.9487
## F-statistic: 119 on 13 and 70 DF, p-value: < 2.2e-16
checkresiduals(lm5)
##
## Breusch-Godfrey test for serial correlation of order up to 17
##
## data: Residuals from Linear regression model
## LM test = 37.954, df = 17, p-value = 0.002494
There’s evidence of some lag correlations, and the residuals don’t look randomly distributed.
lm5res <- cbind.data.frame(Month = factor(month.abb[round(12*(time - floor(time)) + 1)],
labels = month.abb,ordered = TRUE),
Residuals = lm5$residuals)
plot5e <- ggplot(data = lm5res, aes(x = Month, y = Residuals)) +
geom_boxplot()
plot5e
coeff5 <- round(coefficients(lm5),3)
coeff5
## (Intercept) trend season2 season3 season4 season5
## 7.620 0.022 0.251 0.266 0.384 0.409
## season6 season7 season8 season9 season10 season11
## 0.449 0.610 0.588 0.669 0.747 1.207
## season12 surf
## 1.962 0.502
Sales have increased year after year, and there are increased sale during the holiday period.
We calculated the Bresuch Godfrey test above. Results:
data: Residuals from Linear regression model LM test = 37.954, df = 17, p-value = 0.002494
P-Value is <0.05, which might mean autocorrelation will have a noticeable impact on forecasts.
futuretime <- rep(0, 36)
for(i in 1:36){
if(i %% 12 == 3){
futuretime[i] <- 1
}
}
futuretime <- ts(data = futuretime, start = 1994, end = c(1996, 12), frequency = 12)
f5 <- forecast(lm5, newdata = data.frame(Time = time(futuretime), surf = futuretime))
summary(f5)
##
## Forecast method: Linear regression model
##
## Model Information:
##
## Call:
## tslm(formula = BoxCox(fancy, 0) ~ trend + season + surf)
##
## Coefficients:
## (Intercept) trend season2 season3 season4 season5
## 7.61967 0.02202 0.25142 0.26608 0.38405 0.40949
## season6 season7 season8 season9 season10 season11
## 0.44883 0.61045 0.58796 0.66933 0.74739 1.20675
## season12 surf
## 1.96224 0.50152
##
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 5.285437e-17 0.1633968 0.13406 -0.0299207 1.459825 0.4277115
## ACF1
## Training set 0.5407548
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 1994 9.491352 9.238522 9.744183 9.101594 9.88111
## Feb 1994 9.764789 9.511959 10.017620 9.375031 10.15455
## Mar 1994 10.302990 10.048860 10.557120 9.911228 10.69475
## Apr 1994 9.941465 9.688635 10.194296 9.551707 10.33122
## May 1994 9.988919 9.736088 10.241749 9.599161 10.37868
## Jun 1994 10.050280 9.797449 10.303110 9.660522 10.44004
## Jul 1994 10.233926 9.981095 10.486756 9.844168 10.62368
## Aug 1994 10.233456 9.980625 10.486286 9.843698 10.62321
## Sep 1994 10.336841 10.084010 10.589671 9.947083 10.72660
## Oct 1994 10.436923 10.184092 10.689753 10.047165 10.82668
## Nov 1994 10.918299 10.665468 11.171129 10.528541 11.30806
## Dec 1994 11.695812 11.442981 11.948642 11.306054 12.08557
## Jan 1995 9.755590 9.499844 10.011336 9.361338 10.14984
## Feb 1995 10.029027 9.773281 10.284773 9.634775 10.42328
## Mar 1995 10.567228 10.310518 10.823938 10.171489 10.96297
## Apr 1995 10.205703 9.949957 10.461449 9.811451 10.59996
## May 1995 10.253157 9.997411 10.508903 9.858904 10.64741
## Jun 1995 10.314518 10.058772 10.570264 9.920265 10.70877
## Jul 1995 10.498164 10.242418 10.753910 10.103911 10.89242
## Aug 1995 10.497694 10.241948 10.753440 10.103441 10.89195
## Sep 1995 10.601079 10.345333 10.856825 10.206826 10.99533
## Oct 1995 10.701161 10.445415 10.956907 10.306908 11.09541
## Nov 1995 11.182537 10.926791 11.438282 10.788284 11.57679
## Dec 1995 11.960050 11.704304 12.215796 11.565797 12.35430
## Jan 1996 10.019828 9.760564 10.279093 9.620151 10.41951
## Feb 1996 10.293265 10.034000 10.552530 9.893588 10.69294
## Mar 1996 10.831466 10.571566 11.091365 10.430810 11.23212
## Apr 1996 10.469941 10.210677 10.729206 10.070264 10.86962
## May 1996 10.517395 10.258130 10.776659 10.117718 10.91707
## Jun 1996 10.578756 10.319491 10.838021 10.179079 10.97843
## Jul 1996 10.762402 10.503137 11.021667 10.362725 11.16208
## Aug 1996 10.761932 10.502667 11.021196 10.362254 11.16161
## Sep 1996 10.865317 10.606052 11.124582 10.465640 11.26499
## Oct 1996 10.965399 10.706134 11.224664 10.565722 11.36508
## Nov 1996 11.446774 11.187510 11.706039 11.047097 11.84645
## Dec 1996 12.224288 11.965023 12.483552 11.824611 12.62396
autoplot(f5)
f5transform <- f5
members_inv.log <- c('x', 'mean', 'lower', 'upper', 'residuals', 'fitted')
f5transform[members_inv.log] <- lapply(f5[members_inv.log],
InvBoxCox,lambda = 0)
f5transform[['model']][['model']][1] <- lapply(f5[['model']][['model']][1],
InvBoxCox,lambda = 0)
autoplot(f5transform) + ylab("Sales")
summary(f5transform)
##
## Forecast method: Linear regression model
##
## Model Information:
##
## Call:
## tslm(formula = BoxCox(fancy, 0) ~ trend + season + surf)
##
## Coefficients:
## (Intercept) trend season2 season3 season4 season5
## 7.61967 0.02202 0.25142 0.26608 0.38405 0.40949
## season6 season7 season8 season9 season10 season11
## 0.44883 0.61045 0.58796 0.66933 0.74739 1.20675
## season12 surf
## 1.96224 0.50152
##
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 338.1595 3218.293 1975.801 -1.34341 13.52456 0.4464704 0.4834156
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 1994 13244.70 10285.82 17054.73 8969.583 19557.43
## Feb 1994 17409.81 13520.45 22418.00 11790.284 25707.73
## Mar 1994 29821.65 23129.40 38450.24 20155.412 44123.68
## Apr 1994 20774.16 16133.21 26750.16 14068.696 30675.62
## May 1994 21783.73 16917.24 28050.15 14752.395 32166.37
## Jun 1994 23162.27 17987.81 29825.24 15685.969 34201.95
## Jul 1994 27831.56 21613.98 35837.72 18848.111 41096.73
## Aug 1994 27818.48 21603.82 35820.87 18839.249 41077.41
## Sep 1994 30848.42 23956.87 39722.43 20891.193 45551.50
## Oct 1994 34095.57 26478.61 43903.67 23090.230 50346.32
## Nov 1994 55176.84 42850.31 71049.28 37366.903 81475.41
## Dec 1994 120067.79 93244.59 154607.08 81312.400 177294.90
## Jan 1995 17250.40 13357.65 22277.59 11629.938 25587.08
## Feb 1995 22675.20 17558.28 29283.31 15287.252 33633.55
## Mar 1995 38840.85 30046.98 50208.44 26146.972 57697.39
## Apr 1995 27057.06 20951.33 34942.16 18241.435 40133.06
## May 1995 28371.96 21969.51 36640.25 19127.918 42083.42
## Jun 1995 30167.42 23359.80 38958.95 20338.387 44746.58
## Jul 1995 36248.88 28068.91 46812.70 24438.412 53767.06
## Aug 1995 36231.84 28055.72 46790.69 24426.922 53741.78
## Sep 1995 40178.16 31111.50 51887.06 27087.467 59595.26
## Oct 1995 44407.37 34386.35 57348.77 29938.733 65868.34
## Nov 1995 71864.42 55647.40 92807.48 48449.831 106594.69
## Dec 1995 156380.86 121091.75 201954.08 105429.448 231955.81
## Jan 1996 22467.57 17336.40 29117.46 15065.329 33506.86
## Feb 1996 29533.04 22788.25 38274.14 19802.984 44043.89
## Mar 1996 50587.81 39009.73 65602.25 33887.802 75517.62
## Apr 1996 35240.15 27191.96 45670.42 23629.808 52555.15
## May 1996 36952.72 28513.41 47889.88 24778.151 55109.18
## Jun 1996 39291.20 30317.82 50920.48 26346.183 58596.65
## Jul 1996 47211.93 36429.60 61185.57 31657.322 70409.18
## Aug 1996 47189.73 36412.48 61156.80 31642.439 70376.07
## Sep 1996 52329.57 40378.47 67817.91 35088.887 78041.33
## Oct 1996 57837.85 44628.77 74956.52 38782.394 86256.08
## Nov 1996 93598.96 72222.70 121302.09 62761.521 139588.15
## Dec 1996 203676.38 157160.50 263959.89 136572.460 303751.35
The gasoline series consists of weekly data for supplies of US finished motor gasoline product, from 2 February 1991 to 20 January 2017. The units are in “million barrels per day”. Consider only the data to the end of 2004.
gas2004 <- window(gasoline, end=2005)
autoplot(gas2004) + xlab("Year")
fourier2 <- tslm(gas2004 ~ trend + fourier(gas2004, K=2))
fourier5 <- tslm(gas2004 ~ trend + fourier(gas2004, K=5))
fourier7 <- tslm(gas2004 ~ trend + fourier(gas2004, K=7))
fourier10 <- tslm(gas2004 ~ trend + fourier(gas2004, K=10))
fourier16 <- tslm(gas2004 ~ trend + fourier(gas2004, K=16))
fourier20 <- tslm(gas2004 ~ trend + fourier(gas2004, K=20))
autoplot(gas2004) + xlab("Year") + autolayer(fourier2$fitted.values) + autolayer(fourier5$fitted.values) +
autolayer(fourier10$fitted.values) + autolayer(fourier16$fitted.values) +
autolayer(fourier20$fitted.values)
Higher number of Fourier pairs fit the data more closely than lower numbers.
kable(CV(fourier2), caption = "Fourier = 2")
| x | |
|---|---|
| CV | 0.0813685 |
| AIC | -1819.1133994 |
| AICc | -1818.9574105 |
| BIC | -1787.0005493 |
| AdjR2 | 0.8269569 |
kable(CV(fourier5), caption = "Fourier = 5")
| x | |
|---|---|
| CV | 0.0755365 |
| AIC | -1873.2338143 |
| AICc | -1872.7225784 |
| BIC | -1813.5956642 |
| AdjR2 | 0.8406928 |
kable(CV(fourier7), caption = "Fourier = 7")
| x | |
|---|---|
| CV | 0.0714734 |
| AIC | -1913.5362459 |
| AICc | -1912.6718391 |
| BIC | -1835.5478956 |
| AdjR2 | 0.8501073 |
kable(CV(fourier10), caption = "Fourier = 10")
| x | |
|---|---|
| CV | 0.0713575 |
| AIC | -1915.0136582 |
| AICc | -1913.4410086 |
| BIC | -1809.5000079 |
| AdjR2 | 0.8516102 |
kable(CV(fourier16), caption = "Fourier = 16")
| x | |
|---|---|
| CV | 0.0720689 |
| AIC | -1908.8174672 |
| AICc | -1905.1652933 |
| BIC | -1748.2532167 |
| AdjR2 | 0.8526940 |
kable(CV(fourier20), caption = "Fourier = 20")
| x | |
|---|---|
| CV | 0.0722383 |
| AIC | -1908.0172013 |
| AICc | -1902.4688142 |
| BIC | -1710.7525507 |
| AdjR2 | 0.8540588 |
AICc is lowest when Fourier = 10.
checkresiduals(fourier10)
##
## Breusch-Godfrey test for serial correlation of order up to 104
##
## data: Residuals from Linear regression model
## LM test = 155.45, df = 104, p-value = 0.0008135
fgas <- forecast(fourier10, newdata=data.frame(fourier(gas2004, K = 10, h = 52)))
summary(fgas)
##
## Forecast method: Linear regression model
##
## Model Information:
##
## Call:
## tslm(formula = gas2004 ~ trend + fourier(gas2004, K = 10))
##
## Coefficients:
## (Intercept) trend
## 7.092972 0.002799
## fourier(gas2004, K = 10)S1-52 fourier(gas2004, K = 10)C1-52
## 0.005822 -0.273068
## fourier(gas2004, K = 10)S2-52 fourier(gas2004, K = 10)C2-52
## -0.042246 -0.019506
## fourier(gas2004, K = 10)S3-52 fourier(gas2004, K = 10)C3-52
## 0.023718 -0.099742
## fourier(gas2004, K = 10)S4-52 fourier(gas2004, K = 10)C4-52
## 0.016751 -0.028523
## fourier(gas2004, K = 10)S5-52 fourier(gas2004, K = 10)C5-52
## 0.003452 -0.051467
## fourier(gas2004, K = 10)S6-52 fourier(gas2004, K = 10)C6-52
## 0.062397 -0.051832
## fourier(gas2004, K = 10)S7-52 fourier(gas2004, K = 10)C7-52
## 0.031550 0.043259
## fourier(gas2004, K = 10)S8-52 fourier(gas2004, K = 10)C8-52
## 0.021197 0.018102
## fourier(gas2004, K = 10)S9-52 fourier(gas2004, K = 10)C9-52
## -0.017916 0.013770
## fourier(gas2004, K = 10)S10-52 fourier(gas2004, K = 10)C10-52
## -0.017268 0.030860
##
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 1.202927e-18 0.259095 0.2055967 -0.1072588 2.573184 0.6279583
## ACF1
## Training set -0.1261056
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2005.014 8.745595 8.402358 9.088832 8.220249 9.270941
## 2005.033 8.604075 8.260737 8.947413 8.078574 9.129575
## 2005.052 8.607195 8.263839 8.950551 8.081666 9.132724
## 2005.071 8.683215 8.339879 9.026550 8.157718 9.208712
## 2005.090 8.746645 8.403404 9.089887 8.221293 9.271998
## 2005.110 8.783003 8.439850 9.126156 8.257785 9.308221
## 2005.129 8.833517 8.490370 9.176664 8.308309 9.358725
## 2005.148 8.917498 8.574348 9.260647 8.392285 9.442710
## 2005.167 8.997861 8.654729 9.340992 8.472676 9.523046
## 2005.186 9.029128 8.686005 9.372252 8.503956 9.554301
## 2005.205 9.018507 8.675381 9.361633 8.493330 9.543684
## 2005.225 9.017572 8.674439 9.360705 8.492385 9.542760
## 2005.244 9.056052 8.712918 9.399187 8.530863 9.581242
## 2005.263 9.105523 8.762395 9.448651 8.580344 9.630703
## 2005.282 9.121048 8.777925 9.464172 8.595876 9.646221
## 2005.301 9.105879 8.762754 9.449005 8.580703 9.631055
## 2005.320 9.112513 8.769382 9.455644 8.587329 9.637697
## 2005.340 9.175080 8.831948 9.518211 8.649895 9.700265
## 2005.359 9.259000 8.915873 9.602127 8.733822 9.784178
## 2005.378 9.296291 8.953167 9.639415 8.771118 9.821464
## 2005.397 9.268208 8.925083 9.611334 8.743032 9.793385
## 2005.416 9.234527 8.891397 9.577657 8.709345 9.759710
## 2005.435 9.270246 8.927115 9.613376 8.745062 9.795429
## 2005.455 9.381554 9.038427 9.724681 8.856376 9.906731
## 2005.474 9.497887 9.154762 9.841011 8.972713 10.023060
## 2005.493 9.546877 9.203751 9.890002 9.021700 10.072053
## 2005.512 9.524722 9.181592 9.867851 8.999539 10.049904
## 2005.531 9.487091 9.143960 9.830221 8.961908 10.012274
## 2005.550 9.482387 9.139260 9.825513 8.957209 10.007564
## 2005.570 9.509124 9.165999 9.852248 8.983950 10.034297
## 2005.589 9.536544 9.193418 9.879670 9.011367 10.061720
## 2005.608 9.546848 9.203718 9.889978 9.021665 10.072030
## 2005.627 9.541950 9.198820 9.885081 9.016767 10.067133
## 2005.646 9.517407 9.174280 9.860533 8.992229 10.042584
## 2005.665 9.453438 9.110314 9.796562 8.928265 9.978612
## 2005.685 9.344405 9.001279 9.687531 8.819228 9.869581
## 2005.704 9.226892 8.883762 9.570023 8.701709 9.752076
## 2005.723 9.160315 8.817184 9.503446 8.635131 9.685499
## 2005.742 9.174078 8.830951 9.517205 8.648901 9.699255
## 2005.761 9.241612 8.898488 9.584736 8.716439 9.766785
## 2005.780 9.310432 8.967306 9.653559 8.785255 9.835609
## 2005.800 9.348790 9.005658 9.691922 8.823604 9.873976
## 2005.819 9.354217 9.011085 9.697350 8.829031 9.879404
## 2005.838 9.326591 8.983464 9.669718 8.801413 9.851768
## 2005.857 9.258087 8.914964 9.601211 8.732915 9.783260
## 2005.876 9.163285 8.820158 9.506412 8.638107 9.688463
## 2005.895 9.102531 8.759394 9.445669 8.577338 9.627725
## 2005.915 9.142445 8.799307 9.485583 8.617250 9.667640
## 2005.934 9.276104 8.932975 9.619234 8.750923 9.801286
## 2005.953 9.396214 9.053089 9.739340 8.871039 9.921390
## 2005.972 9.374939 9.031803 9.718075 8.849747 9.900131
## 2005.991 9.184286 8.841055 9.527518 8.658948 9.709624
plot6e <- autoplot(fgas) +
autolayer(window(gasoline, start=2005, end = 2006)) +
theme(legend.position ="bottom") + scale_x_continuous(limits = c(2003, 2006))
plot6e
The performance of the model is acceptable. There are many explanatory variables for the prices, and the larger-than-expected drop towards the middle of the forecast year might be explained by unobserved variables.
plot7a <- autoplot(huron)
plot7a
There is no clear pattern to the time series.
lm7 <- tslm(huron ~ trend)
summary(lm7)
##
## Call:
## tslm(formula = huron ~ trend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.50997 -0.72726 0.00083 0.74402 2.53565
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.202037 0.230111 44.335 < 2e-16 ***
## trend -0.024201 0.004036 -5.996 3.55e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.13 on 96 degrees of freedom
## Multiple R-squared: 0.2725, Adjusted R-squared: 0.2649
## F-statistic: 35.95 on 1 and 96 DF, p-value: 3.545e-08
time7 <- time(huron)
t.break <- 1915
tb1 <- ts(pmax(0,time7-t.break), start=1875)
pw7 <- tslm(huron ~ time7 + tb1)
summary(pw7)
##
## Call:
## tslm(formula = huron ~ time7 + tb1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.49626 -0.66240 -0.07139 0.85163 2.39222
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 132.90870 19.97687 6.653 1.82e-09 ***
## time7 -0.06498 0.01051 -6.181 1.58e-08 ***
## tb1 0.06486 0.01563 4.150 7.26e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.045 on 95 degrees of freedom
## Multiple R-squared: 0.3841, Adjusted R-squared: 0.3711
## F-statistic: 29.62 on 2 and 95 DF, p-value: 1.004e-10
forecastlm <- forecast(lm7, h=8)
summary(forecastlm)
##
## Forecast method: Linear regression model
##
## Model Information:
##
## Call:
## tslm(formula = huron ~ trend)
##
## Coefficients:
## (Intercept) trend
## 10.2020 -0.0242
##
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 9.147126e-18 1.118694 0.9133554 -1.694321 10.68904 1.559779
## ACF1
## Training set 0.7615963
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 1973 7.806127 6.317648 9.294605 5.516501 10.095752
## 1974 7.781926 6.292536 9.271315 5.490899 10.072952
## 1975 7.757724 6.267406 9.248043 5.465269 10.050179
## 1976 7.733523 6.242259 9.224788 5.439613 10.027434
## 1977 7.709322 6.217094 9.201550 5.413929 10.004715
## 1978 7.685121 6.191912 9.178331 5.388219 9.982024
## 1979 7.660920 6.166712 9.155128 5.362481 9.959359
## 1980 7.636719 6.141494 9.131943 5.336717 9.936721
h=8
time7.new <- time7[length(time7)] + seq(h)
tb1.new <- tb1[length(tb1)] + seq(h)
newdata <- as.data.frame(cbind(time7=time7.new, tb1=tb1.new))
forecastpw <- forecast(pw7,newdata = newdata)
summary(forecastpw)
##
## Forecast method: Linear regression model
##
## Model Information:
##
## Call:
## tslm(formula = huron ~ time7 + tb1)
##
## Coefficients:
## (Intercept) time7 tb1
## 132.90870 -0.06498 0.06486
##
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -1.087145e-16 1.029296 0.8472155 -1.482482 9.954493 1.446829
## ACF1
## Training set 0.7280741
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 1973 8.455119 7.063583 9.846655 6.314483 10.59575
## 1974 8.454992 7.061518 9.848467 6.311374 10.59861
## 1975 8.454866 7.059398 9.850333 6.308182 10.60155
## 1976 8.454739 7.057225 9.852253 6.304906 10.60457
## 1977 8.454612 7.054998 9.854227 6.301549 10.60768
## 1978 8.454486 7.052717 9.856254 6.298109 10.61086
## 1979 8.454359 7.050384 9.858334 6.294587 10.61413
## 1980 8.454232 7.047997 9.860467 6.290984 10.61748
plot7c1 <- autoplot(huron) +
autolayer(fitted(forecastlm), series = "Linear Regression") + autolayer(forecastlm, series = "Linear Forecast") + theme(legend.position ="bottom")
plot7c2 <- autoplot(huron) +
autolayer(fitted(forecastpw), series = "Piecewise Regression") + autolayer(forecastpw, series = "Piecewise Forecast") + theme(legend.position ="bottom")
library(gridExtra)
grid.arrange(plot7c1, plot7c2, nrow=1)
Show that a 3×5 MA is equivalent to a 7-term weighted moving average with weights of 0.067, 0.133, 0.200, 0.200, 0.200, 0.133, and 0.067.
\(3 * 5 MA\) leads to a $(y+2y+3y+3y+2y+y) = + + $etc… \(= 0.067 + 0.133 + 0.2 + 0.2 + 0.2 +0.133 + 0.67\)
The plastics data set consists of the monthly sales (in thousands) of product A for a plastics manufacturer for five years.
plot62a <- autoplot(plastics)
plot62a
There is a clear cyclical component, as well as an upward trend.
dec_plastics <- decompose(plastics, type = "multiplicative")
autoplot(dec_plastics)
Yes. There is a clear seasonal component and an upward trend.
autoplot(plastics, series="Data") +
autolayer(trendcycle(dec_plastics), series="Trend") +
autolayer(seasadj(dec_plastics), series="Seasonally Adjusted") +
xlab("Year") + ylab("Sales")
plastics1 <- plastics
plastics1[10] <- plastics1[10] + 500
dec_plastics1 <- decompose(plastics1, type = "multiplicative")
autoplot(plastics1, series="Data") +
autolayer(trendcycle(dec_plastics1), series="Trend") +
autolayer(seasadj(dec_plastics1), series="Seasonally Adjusted") +
xlab("Year") + ylab("Sales")
The outlier changes the seasonal components and therefore seasonal adjustments.
plastics2 <- plastics
plastics2[50] <- plastics1[50] + 500
dec_plastics2 <- decompose(plastics2, type = "multiplicative")
autoplot(plastics2, series="Data") +
autolayer(trendcycle(dec_plastics2), series="Trend") +
autolayer(seasadj(dec_plastics2), series="Seasonally Adjusted") +
xlab("Year") + ylab("Sales")
It technically does not matter as the seasonal adjustment divides out all seasonal deviations of the component.
Recall your retail time series data (from Exercise 3 in Section 2.10). Decompose the series using X11. Does it reveal any outliers, or unusual features that you had not noticed previously?
library(openxlsx)
retaildata <- read.xlsx("F:/MSAE/2020fall_PredictiveAnalytics/retail.xlsx", startRow = 2)
tsretaildata <- ts(retaildata[,"A3349873A"], frequency=12, start=c(1982,4))
autoplot(tsretaildata)
library(seasonal)
x11retail <- seas(tsretaildata, x11 = "")
autoplot(x11retail)
The result of decomposing the number of persons in the civilian labor force in Australia each month from February 1978 to August 1995 are shown below.
We clearly see an upward trend in the data, as well as a relatively clear seasonal component related to the holiday season.
b.Is the recession of 1991/1992 visible in the estimated components?
Yes. The trend flattens during those years, and the remainders are all negative during that period, suggesting an exogenous factor pushing down on the series.
This exercise uses the cangas data (monthly Canadian gas production in billions of cubic metres, January 1960 – February 2005).
autoplot(cangas)
ggsubseriesplot(cangas)
ggseasonplot(cangas)
Broadly, gas prices are higher in the winter and lower in the summer, as Canadians rely on gas for heating. Prices will rise when demand increases.
stlcangas <- stl(cangas, s.window = 13)
autoplot(stlcangas)
x11cangas <- seas(cangas, x11 = "")
seatscangas <- seas(cangas)
autoplot(x11cangas) + ggtitle("X11")
autoplot(seatscangas) + ggtitle("SEATS")
We will use the bricksq data (Australian quarterly clay brick production. 1956–1994) for this exercise.
stlfixed <- stl(bricksq, s.window = "periodic")
stlchange <- stl(bricksq, s.window = 13, robust = TRUE)
autoplot(stlfixed) + ggtitle("Fixed Seasonality")
autoplot(stlchange) + ggtitle("CHanging Seasonality, s.window = 13")
Both methods are very close to each other.
seasadjfixed <- seasadj(stlfixed)
seasadjchange <- seasadj(stlchange)
autoplot(bricksq) + autolayer(seasadjfixed) + autolayer(seasadjchange)
fstl1 <- stl(bricksq, t.window=30, s.window="periodic")
autoplot(naive(seasadj(fstl1)))
fstlf2 <- stlf(bricksq, method='naive')
forecast(fstlf2,h=8)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 1994 Q4 465.7338 440.1878 491.2798 426.6645 504.8030
## 1995 Q1 424.6739 388.5464 460.8014 369.4217 479.9261
## 1995 Q2 483.9926 439.7456 528.2395 416.3227 551.6624
## 1995 Q3 494.0000 442.9080 545.0920 415.8616 572.1384
## 1995 Q4 465.7338 408.6112 522.8563 378.3723 553.0952
## 1996 Q1 424.6739 362.0993 487.2485 328.9742 520.3736
## 1996 Q2 483.9926 416.4042 551.5809 380.6251 587.3600
## 1996 Q3 494.0000 421.7450 566.2550 383.4956 604.5044
checkresiduals(fstlf2)
##
## Ljung-Box test
##
## data: Residuals from STL + Random walk
## Q* = 40.829, df = 8, p-value = 2.244e-06
##
## Model df: 0. Total lags used: 8
Residuals look random.
fstl_robust <- stlf(bricksq, t.window=30, s.window="periodic", robust=TRUE)
autoplot(naive(fstl_robust))
checkresiduals(fstl_robust)
##
## Ljung-Box test
##
## data: Residuals from STL + ETS(M,N,N)
## Q* = 21.064, df = 6, p-value = 0.001787
##
## Model df: 2. Total lags used: 8
train <- subset(bricksq, end = length(bricksq) - 8)
test <- subset(bricksq, start = length(bricksq) - 7)
snaive6g <- snaive(train)
stlf6g <- stlf(train, robust = TRUE)
autoplot(bricksq) + geom_line(size = 1) + autolayer(stlf6g, PI = FALSE, size = 1, series = "stlf") +
autolayer(snaive6g, PI = FALSE, size = 1, series = "snaive")
stlf function slightly closer top original data.
Use stlf() to produce forecasts of the writing series with either method=“naive” or method=“rwdrift”, whichever is most appropriate. Use the lambda argument if you think a Box-Cox transformation is required.
stlf7 <- stlf(writing, s.window = 13, robust = TRUE, lambda = BoxCox.lambda(writing), method = "rwdrift")
autoplot(stlf7)
Use stlf() to produce forecasts of the fancy series with either method=“naive” or method=“rwdrift”, whichever is most appropriate. Use the lambda argument if you think a Box-Cox transformation is required.
stlf8 <- stlf(fancy, s.window = 13, robust = TRUE, lambda = BoxCox.lambda(fancy), method = "rwdrift")
autoplot(stlf8)