library(fpp2)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## -- Attaching packages ---------------------------------------------- fpp2 2.4 --
## v ggplot2 3.3.3 v fma 2.4
## v forecast 8.15 v expsmooth 2.3
##
elecdaily<- elecdaily
daily20 <- head(elecdaily,20)
autoplot(daily20)
lm1 <- tslm(Demand~Temperature, data=daily20)
summary(lm1)
##
## Call:
## tslm(formula = Demand ~ Temperature, data = daily20)
##
## Residuals:
## Min 1Q Median 3Q Max
## -46.060 -7.117 -1.437 17.484 27.102
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.2117 17.9915 2.179 0.0428 *
## Temperature 6.7572 0.6114 11.052 1.88e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22 on 18 degrees of freedom
## Multiple R-squared: 0.8716, Adjusted R-squared: 0.8644
## F-statistic: 122.1 on 1 and 18 DF, p-value: 1.876e-09
checkresiduals(lm1)
##
## Breusch-Godfrey test for serial correlation of order up to 5
##
## data: Residuals from Linear regression model
## LM test = 3.8079, df = 5, p-value = 0.5774
#There seems to be a positive linear trend in the residuals. This indicates model is not fully accounting for trend data. The ACF shows no significant serial correlations.
forecast15<- forecast(lm1, newdata=data.frame(Temperature=15))
forecast15
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 4.285714 140.5701 108.681 172.4591 90.21166 190.9285
forecast35<- forecast(lm1, newdata=data.frame(Temperature=35))
forecast35
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 4.285714 275.7146 245.2278 306.2014 227.5706 323.8586
autoplot(daily20, facets=TRUE)
daily20 %>%
as.data.frame() %>%
ggplot(aes(x=Temperature, y=Demand)) +
geom_point() +
geom_smooth(method="lm", se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
fit <- tslm(Demand ~ Temperature, data=daily20)
checkresiduals(fit)
##
## Breusch-Godfrey test for serial correlation of order up to 5
##
## data: Residuals from Linear regression model
## LM test = 3.8079, df = 5, p-value = 0.5774
forecast(fit, newdata=data.frame(Temperature=c(15,35)))
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 4.285714 140.5701 108.6810 172.4591 90.21166 190.9285
## 4.428571 275.7146 245.2278 306.2014 227.57056 323.8586
plot(Demand~Temperature, data=elecdaily, main= "Demand vs. Temperature for All Data")
#The plot shows a strong relationship between temperature and electricity demand, consistent with our regression model. It is not linear, and so would be better modeled by a nonlinear regression model.
##2
View(mens400)
autoplot(mens400, xlab="Year", ylab="Seconds", main="Winning Time by Year")
# Winning times have been consistently decreasing every year. There does appear to be a slightly flattening slope, suggesting a diminishing decrease in winning times. While it appears relatively linear, a non-linear model may offer slightly better performance.
df <- na.interp(mens400, lambda = NULL)
seconds <- time(df)
lm2 <- tslm(df~seconds, data = df)
autoplot(mens400, series = "Data") +
autolayer(fitted(lm2), series = "Fitted") + ggtitle({"Winning Times by Year"}) + xlab("Year") + ylab("Seconds")
summary(lm2)
##
## Call:
## tslm(formula = df ~ seconds, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5622 -0.5877 -0.2707 0.5497 4.2157
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 171.697133 10.724592 16.01 6.19e-16 ***
## seconds -0.064195 0.005482 -11.71 1.64e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.092 on 29 degrees of freedom
## Multiple R-squared: 0.8254, Adjusted R-squared: 0.8194
## F-statistic: 137.1 on 1 and 29 DF, p-value: 1.638e-12
#On average, the winning times have been decreasing by 6.4 seconds per year.
checkresiduals(lm2)
##
## Breusch-Godfrey test for serial correlation of order up to 6
##
## data: Residuals from Linear regression model
## LM test = 4.3582, df = 6, p-value = 0.6283
#The residuals are higher at the begining and end of the period, suggesting a non-linear regression model will better model the relationship.
t <- seq(2020,2020, 4)
forecast_mens<- forecast(lm2,newdata=data.frame(seconds))
forecast_mens
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2020 49.98425 48.46665 51.50186 47.61750 52.35101
## 2024 49.72748 48.21779 51.23716 47.37307 52.08188
## 2028 49.47070 47.96843 50.97297 47.12785 51.81354
## 2032 49.21392 47.71855 50.70929 46.88183 51.54601
## 2036 48.95714 47.46814 50.44614 46.63499 51.27929
## 2040 48.70036 47.21721 50.18352 46.38733 51.01340
## 2044 48.44358 46.96574 49.92143 46.13883 50.74834
## 2048 48.18681 46.71372 49.65989 45.88948 50.48414
## 2052 47.93003 46.46116 49.39890 45.63927 50.22078
## 2056 47.67325 46.20805 49.13845 45.38821 49.95829
## 2060 47.41647 45.95438 48.87857 45.13628 49.69666
## 2064 47.15969 45.70015 48.61924 44.88347 49.43591
## 2068 46.90292 45.44535 48.36048 44.62979 49.17604
## 2072 46.64614 45.18999 48.10228 44.37522 48.91705
## 2076 46.38936 44.93407 47.84465 44.11978 48.65894
## 2080 46.13258 44.67757 47.58759 43.86344 48.40172
## 2084 45.87580 44.42051 47.33110 43.60622 48.14539
## 2088 45.61902 44.16288 47.07517 43.34811 47.88994
## 2092 45.36225 43.90468 46.81981 43.08912 47.63537
## 2096 45.10547 43.64592 46.56502 42.82925 47.38169
## 2100 44.84869 43.38659 46.31079 42.56850 47.12888
## 2104 44.59191 43.12671 46.05711 42.30687 46.87695
## 2108 44.33513 42.86627 45.80400 42.04438 46.62589
## 2112 44.07835 42.60527 45.55144 41.78103 46.37568
## 2116 43.82158 42.34373 45.29942 41.51682 46.12634
## 2120 43.56480 42.08164 45.04795 41.25176 45.87783
## 2124 43.30802 41.81902 44.79702 40.98587 45.63017
## 2128 43.05124 41.55587 44.54661 40.71916 45.38333
## 2132 42.79446 41.29220 44.29673 40.45162 45.13731
## 2136 42.53769 41.02800 44.04737 40.18328 44.89209
## 2140 42.28091 40.76330 43.79851 39.91415 44.64767
#The projected winning time is 42 seconds, with 80% confidence that time is between 40.49 and 43.55 and 95% confidence that winning time is between 39.64 and 44.4. This assumes that the linear model gives a strong approximation of the relationship between years and time.
##3
easter(ausbeer)
## Qtr1 Qtr2 Qtr3 Qtr4
## 1956 0.67 0.33 0.00 0.00
## 1957 0.00 1.00 0.00 0.00
## 1958 0.00 1.00 0.00 0.00
## 1959 1.00 0.00 0.00 0.00
## 1960 0.00 1.00 0.00 0.00
## 1961 0.33 0.67 0.00 0.00
## 1962 0.00 1.00 0.00 0.00
## 1963 0.00 1.00 0.00 0.00
## 1964 1.00 0.00 0.00 0.00
## 1965 0.00 1.00 0.00 0.00
## 1966 0.00 1.00 0.00 0.00
## 1967 1.00 0.00 0.00 0.00
## 1968 0.00 1.00 0.00 0.00
## 1969 0.00 1.00 0.00 0.00
## 1970 1.00 0.00 0.00 0.00
## 1971 0.00 1.00 0.00 0.00
## 1972 0.33 0.67 0.00 0.00
## 1973 0.00 1.00 0.00 0.00
## 1974 0.00 1.00 0.00 0.00
## 1975 1.00 0.00 0.00 0.00
## 1976 0.00 1.00 0.00 0.00
## 1977 0.00 1.00 0.00 0.00
## 1978 1.00 0.00 0.00 0.00
## 1979 0.00 1.00 0.00 0.00
## 1980 0.00 1.00 0.00 0.00
## 1981 0.00 1.00 0.00 0.00
## 1982 0.00 1.00 0.00 0.00
## 1983 0.00 1.00 0.00 0.00
## 1984 0.00 1.00 0.00 0.00
## 1985 0.00 1.00 0.00 0.00
## 1986 1.00 0.00 0.00 0.00
## 1987 0.00 1.00 0.00 0.00
## 1988 0.00 1.00 0.00 0.00
## 1989 1.00 0.00 0.00 0.00
## 1990 0.00 1.00 0.00 0.00
## 1991 1.00 0.00 0.00 0.00
## 1992 0.00 1.00 0.00 0.00
## 1993 0.00 1.00 0.00 0.00
## 1994 0.00 1.00 0.00 0.00
## 1995 0.00 1.00 0.00 0.00
## 1996 0.00 1.00 0.00 0.00
## 1997 1.00 0.00 0.00 0.00
## 1998 0.00 1.00 0.00 0.00
## 1999 0.00 1.00 0.00 0.00
## 2000 0.00 1.00 0.00 0.00
## 2001 0.00 1.00 0.00 0.00
## 2002 1.00 0.00 0.00 0.00
## 2003 0.00 1.00 0.00 0.00
## 2004 0.00 1.00 0.00 0.00
## 2005 1.00 0.00 0.00 0.00
## 2006 0.00 1.00 0.00 0.00
## 2007 0.00 1.00 0.00 0.00
## 2008 1.00 0.00 0.00 0.00
## 2009 0.00 1.00 0.00 0.00
## 2010 0.00 1.00
##3. the easter(ausbeer) input shows the quarter that easter occurs in. A fraction represents an easter period that begins at the end of one quarter (Black Friday) and ends at the beginning of the next quarter(Easter Sunday)
autoplot(fancy, xlab= "Monthly Sales", ylab="Year")
#The data exhibits seasonal trends. There does not appear to be much of a secular trend until 1991, during and after which a strong positive secular trend is exhibited. Before 1991 there is little change in magnitude, after 1990 there are strongly increasing magnitudes of seasonal trend.
#It is necessary to take logarithims of the data to control for the exponential increase occuring after 1991.
fancy.log <- log(fancy)
fancy.dummy <- rep(0, length(fancy))
fancy.dummy[seq_along(fancy.dummy)%%12 == 3] <- 1
fancy.dummy[3]<- 0
fancy.dummy <- ts(fancy.dummy, freq=12)
fancy.full <- data.frame(fancy.log, fancy.dummy)
lm3<- tslm(fancy.log~ trend + season + fancy.dummy, data=fancy.full)
autoplot(fancy.log, series = "Data") +
autolayer(fitted(lm3), series = "Fitted") + ggtitle({"Monthly Sales for Surf Shop"}) + xlab("Monthly") + ylab("Sales")
plot(residuals(lm3))
#The plot demonstrates that the model is having trouble accounting for some aspect of the data. Secular trends don’t seem to be the problem as there are high residuals at the begining and end of the period. The model is most likely having trouble with seasonal trends.
boxplot(resid(lm3) ~ cycle(resid(lm3)))
# Boxplots show that there is an increase in residuals around and after the seasonal peak. This further supports the argument that the model struggles around and after the peak period.
summary(lm3)
##
## Call:
## tslm(formula = fancy.log ~ trend + season + fancy.dummy, data = fancy.full)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.33673 -0.12757 0.00257 0.10911 0.37671
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.6196670 0.0742471 102.626 < 2e-16 ***
## trend 0.0220198 0.0008268 26.634 < 2e-16 ***
## season2 0.2514168 0.0956790 2.628 0.010555 *
## season3 0.2660828 0.1934044 1.376 0.173275
## season4 0.3840535 0.0957075 4.013 0.000148 ***
## season5 0.4094870 0.0957325 4.277 5.88e-05 ***
## season6 0.4488283 0.0957647 4.687 1.33e-05 ***
## season7 0.6104545 0.0958039 6.372 1.71e-08 ***
## season8 0.5879644 0.0958503 6.134 4.53e-08 ***
## season9 0.6693299 0.0959037 6.979 1.36e-09 ***
## season10 0.7473919 0.0959643 7.788 4.48e-11 ***
## season11 1.2067479 0.0960319 12.566 < 2e-16 ***
## season12 1.9622412 0.0961066 20.417 < 2e-16 ***
## fancy.dummy 0.5015151 0.1964273 2.553 0.012856 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.179 on 70 degrees of freedom
## Multiple R-squared: 0.9567, Adjusted R-squared: 0.9487
## F-statistic: 119 on 13 and 70 DF, p-value: < 2.2e-16
#Overall, the coefficients are significant. All coefficients are positive, which indicates a positive secular trend. The coefficients are also increasing in magnitude, showing that along with the positive secular trend, there is an increase in magnitude of the peak periods.
checkresiduals(lm3)
##
## Breusch-Godfrey test for serial correlation of order up to 17
##
## data: Residuals from Linear regression model
## LM test = 37.954, df = 17, p-value = 0.002494
fancy.dummy.pred <- data.frame(fancy.dummy = rep(0, 36))
fancy.predict <- forecast(lm3, newdata= fancy.dummy.pred)
autoplot(fancy.log, series = "Data") +
autolayer(fancy.predict, series = "Fitted") + ggtitle({"Monthly Sales for Surf Shop"}) + xlab("Monthly") + ylab("Sales")
summary(fancy.predict)
##
## Forecast method: Linear regression model
##
## Model Information:
##
## Call:
## tslm(formula = fancy.log ~ trend + season + fancy.dummy, data = fancy.full)
##
## Coefficients:
## (Intercept) trend season2 season3 season4 season5
## 7.61967 0.02202 0.25142 0.26608 0.38405 0.40949
## season6 season7 season8 season9 season10 season11
## 0.44883 0.61045 0.58796 0.66933 0.74739 1.20675
## season12 fancy.dummy
## 1.96224 0.50152
##
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 5.285437e-17 0.1633968 0.13406 -0.0299207 1.459825 0.4277115
## ACF1
## Training set 0.5407548
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 1994 9.491352 9.238522 9.744183 9.101594 9.88111
## Feb 1994 9.764789 9.511959 10.017620 9.375031 10.15455
## Mar 1994 9.801475 9.461879 10.141071 9.277961 10.32499
## Apr 1994 9.941465 9.688635 10.194296 9.551707 10.33122
## May 1994 9.988919 9.736088 10.241749 9.599161 10.37868
## Jun 1994 10.050280 9.797449 10.303110 9.660522 10.44004
## Jul 1994 10.233926 9.981095 10.486756 9.844168 10.62368
## Aug 1994 10.233456 9.980625 10.486286 9.843698 10.62321
## Sep 1994 10.336841 10.084010 10.589671 9.947083 10.72660
## Oct 1994 10.436923 10.184092 10.689753 10.047165 10.82668
## Nov 1994 10.918299 10.665468 11.171129 10.528541 11.30806
## Dec 1994 11.695812 11.442981 11.948642 11.306054 12.08557
## Jan 1995 9.755590 9.499844 10.011336 9.361338 10.14984
## Feb 1995 10.029027 9.773281 10.284773 9.634775 10.42328
## Mar 1995 10.065713 9.722498 10.408928 9.536620 10.59481
## Apr 1995 10.205703 9.949957 10.461449 9.811451 10.59996
## May 1995 10.253157 9.997411 10.508903 9.858904 10.64741
## Jun 1995 10.314518 10.058772 10.570264 9.920265 10.70877
## Jul 1995 10.498164 10.242418 10.753910 10.103911 10.89242
## Aug 1995 10.497694 10.241948 10.753440 10.103441 10.89195
## Sep 1995 10.601079 10.345333 10.856825 10.206826 10.99533
## Oct 1995 10.701161 10.445415 10.956907 10.306908 11.09541
## Nov 1995 11.182537 10.926791 11.438282 10.788284 11.57679
## Dec 1995 11.960050 11.704304 12.215796 11.565797 12.35430
## Jan 1996 10.019828 9.760564 10.279093 9.620151 10.41951
## Feb 1996 10.293265 10.034000 10.552530 9.893588 10.69294
## Mar 1996 10.329951 9.982679 10.677222 9.794605 10.86530
## Apr 1996 10.469941 10.210677 10.729206 10.070264 10.86962
## May 1996 10.517395 10.258130 10.776659 10.117718 10.91707
## Jun 1996 10.578756 10.319491 10.838021 10.179079 10.97843
## Jul 1996 10.762402 10.503137 11.021667 10.362725 11.16208
## Aug 1996 10.761932 10.502667 11.021196 10.362254 11.16161
## Sep 1996 10.865317 10.606052 11.124582 10.465640 11.26499
## Oct 1996 10.965399 10.706134 11.224664 10.565722 11.36508
## Nov 1996 11.446774 11.187510 11.706039 11.047097 11.84645
## Dec 1996 12.224288 11.965023 12.483552 11.824611 12.62396
fancy.full.2 <- data.frame(fancy, fancy.dummy)
lm4<- tslm(fancy~ trend + season + fancy.dummy, data=fancy.full.2)
fancy.predict.2 <- forecast(lm4, newdata= fancy.dummy.pred)
autoplot(fancy, series = "Data") +
autolayer(fancy.predict.2, series = "Fitted") + ggtitle({"Monthly Sales for Surf Shop, raw data"}) + xlab("Monthly") + ylab("Sales")
summary(fancy.predict.2)
##
## Forecast method: Linear regression model
##
## Model Information:
##
## Call:
## tslm(formula = fancy ~ trend + season + fancy.dummy, data = fancy.full.2)
##
## Coefficients:
## (Intercept) trend season2 season3 season4 season5
## -6552.4 321.8 994.2 8427.6 1935.4 1615.6
## season6 season7 season8 season9 season10 season11
## 2237.6 4165.6 4606.3 5392.9 6068.9 13961.8
## season12 fancy.dummy
## 40092.5 -3752.9
##
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -5.842945e-13 7661.599 4947.007 13.02307 52.44544 1.117872
## ACF1
## Training set 0.352124
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 1994 20803.25 8948.158 32658.35 2527.692 39078.82
## Feb 1994 22119.27 10264.176 33974.37 3843.710 40394.84
## Mar 1994 29874.49 13951.003 45797.98 5327.182 54421.80
## Apr 1994 23704.12 11849.022 35559.22 5428.556 41979.68
## May 1994 23706.18 11851.081 35561.27 5430.614 41981.74
## Jun 1994 24649.98 12794.883 36505.08 6374.417 42925.54
## Jul 1994 26899.88 15044.786 38754.98 8624.320 45175.45
## Aug 1994 27662.41 15807.309 39517.50 9386.843 45937.97
## Sep 1994 28770.83 16915.731 40625.92 10495.264 47046.39
## Oct 1994 29768.64 17913.543 41623.74 11493.077 48044.20
## Nov 1994 37983.37 26128.275 49838.47 19707.809 56258.94
## Dec 1994 64435.94 52580.839 76291.03 46160.373 82711.50
## Jan 1995 24665.22 12673.420 36657.03 6178.917 43151.53
## Feb 1995 25981.24 13989.439 37973.05 7494.936 44467.55
## Mar 1995 33736.46 17643.268 49829.66 8927.539 58545.39
## Apr 1995 27566.09 15574.285 39557.89 9079.781 46052.40
## May 1995 27568.15 15576.343 39559.95 9081.840 46054.45
## Jun 1995 28511.95 16520.146 40503.75 10025.643 46998.26
## Jul 1995 30761.85 18770.049 42753.66 12275.546 49248.16
## Aug 1995 31524.38 19532.572 43516.18 13038.068 50010.68
## Sep 1995 32632.80 20640.993 44624.60 14146.490 51119.10
## Oct 1995 33630.61 21638.806 45622.41 15144.303 52116.92
## Nov 1995 41845.34 29853.538 53837.14 23359.034 60331.65
## Dec 1995 68297.91 56306.102 80289.71 49811.598 86784.21
## Jan 1996 28527.19 16370.391 40683.99 9786.528 47267.86
## Feb 1996 29843.21 17686.410 42000.01 11102.547 48583.88
## Mar 1996 37598.43 21315.043 53881.82 12496.309 62700.55
## Apr 1996 31428.06 19271.256 43584.86 12687.393 50168.72
## May 1996 31430.12 19273.314 43586.92 12689.451 50170.78
## Jun 1996 32373.92 20217.117 44530.72 13633.254 51114.58
## Jul 1996 34623.82 22467.020 46780.62 15883.157 53364.49
## Aug 1996 35386.34 23229.543 47543.15 16645.680 54127.01
## Sep 1996 36494.77 24337.964 48651.57 17754.101 55235.43
## Oct 1996 37492.58 25335.777 49649.38 18751.914 56233.24
## Nov 1996 45707.31 33550.508 57864.11 26966.646 64447.97
## Dec 1996 72159.87 60003.073 84316.68 53419.210 90900.54
##6
View(gasoline)
gas <- window(gasoline, end=2005)
autoplot(gas, ylab="Weekly Gas Supply")
fourier2 <- tslm(gas~trend+ fourier(gas, K=2))
fourier5<- tslm(gas ~ trend + fourier(gas, K=5))
fourier10<- tslm(gas ~ trend +fourier(gas, K=10))
fourier20<- tslm(gas ~ trend +fourier(gas, K=20))
autoplot(gas)+ autolayer(fourier2$fitted.values)
autoplot(gas) +autolayer(fourier5$fitted.values)
autoplot(gas) + autolayer(fourier10$fitted.values)
autoplot(gas) + autolayer(fourier20$fitted.values)
# The fitted values approximate the data well. Increasing the number of Fourier termsincreased the magnitude of the seasonal component for the fitted values.
#Selecting number of Fourier terms
list <- c()
for(j in 1:20){
model<- tslm(gas~trend+ fourier(gas, K=j))
model.aic <- AIC(model)
list[[j]] <- model.aic
}
print(list)
## [[1]]
## [1] 247.007
##
## [[2]]
## [1] 241.1854
##
## [[3]]
## [1] 197.2137
##
## [[4]]
## [1] 196.1867
##
## [[5]]
## [1] 187.0649
##
## [[6]]
## [1] 157.5453
##
## [[7]]
## [1] 146.7625
##
## [[8]]
## [1] 146.6802
##
## [[9]]
## [1] 148.0063
##
## [[10]]
## [1] 145.2851
##
## [[11]]
## [1] 145.2139
##
## [[12]]
## [1] 141.0226
##
## [[13]]
## [1] 144.6272
##
## [[14]]
## [1] 146.0811
##
## [[15]]
## [1] 150.0682
##
## [[16]]
## [1] 151.4813
##
## [[17]]
## [1] 152.4293
##
## [[18]]
## [1] 145.6099
##
## [[19]]
## [1] 148.8298
##
## [[20]]
## [1] 152.2815
print("Fourier Term with lowest AIC")
## [1] "Fourier Term with lowest AIC"
which.min(list)
## [1] 12
fourier12<- tslm(gas ~ trend +fourier(gas, K=12))
checkresiduals(fourier12)
##
## Breusch-Godfrey test for serial correlation of order up to 104
##
## data: Residuals from Linear regression model
## LM test = 154.2, df = 104, p-value = 0.001021
fc <- forecast(fourier12, newdata=data.frame(fourier(gas,K= 12,h=52)))
autoplot(gas)+ autolayer(fc)
autoplot(gasoline) +autolayer(fc)
#The fitted values fit the actual data very well. The accurately model both the trand and seasonal components of the data.
##7
autoplot(huron)
lm.huron <- tslm(huron~trend)
summary(lm.huron)
##
## Call:
## tslm(formula = huron ~ trend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.50997 -0.72726 0.00083 0.74402 2.53565
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.202037 0.230111 44.335 < 2e-16 ***
## trend -0.024201 0.004036 -5.996 3.55e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.13 on 96 degrees of freedom
## Multiple R-squared: 0.2725, Adjusted R-squared: 0.2649
## F-statistic: 35.95 on 1 and 96 DF, p-value: 3.545e-08
time.h <- time(huron)
knot.h <- 1915
ts.huron <- ts(pmax(0,time.h-knot.h), start=1875)
tslm.knot <- tslm(huron~time.h +ts.huron)
summary(tslm.knot)
##
## Call:
## tslm(formula = huron ~ time.h + ts.huron)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.49626 -0.66240 -0.07139 0.85163 2.39222
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 132.90870 19.97687 6.653 1.82e-09 ***
## time.h -0.06498 0.01051 -6.181 1.58e-08 ***
## ts.huron 0.06486 0.01563 4.150 7.26e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.045 on 95 degrees of freedom
## Multiple R-squared: 0.3841, Adjusted R-squared: 0.3711
## F-statistic: 29.62 on 2 and 95 DF, p-value: 1.004e-10
###Chapter 6
#2. a.
rm(plastics)
## Warning in rm(plastics): object 'plastics' not found
autoplot(plastics)
#There appears to be both seasonal flucuations as well as a positive trend.
2.b.
plastic.decomp<- decompose(plastics, "multiplicative")
autoplot(plastic.decomp)
#The results support conclusions made in part A. There is a clear positive trend, as well as a clear seasonal component.
autoplot(plastics, series='Data')+autolayer(seasadj(plastic.decomp), series='Seasonally Adjusted')
e.
plastics.df<-plastics
plastics.df[30] <-plastics.df[30]+500
plastic.decomp.df<- decompose(plastics.df, "multiplicative")
autoplot(plastics.df, series='Data')+autolayer(seasadj(plastic.decomp.df), series='Seasonally Adjusted')
#The outlier has a significant effect, disrupting the seasonal pattern and causing a spike in both the data and the seasonally adjusted decomposition.
#f. #No matter where in the dataset the outlier resides, the decomposition will always be sensitive to it.
##3.
library(readxl)
retail <- read_excel("C:\\\\Users\\\\cthom\\\\OneDrive\\\\Boston College\\\\Econometrics\\\\retail.xlsx", skip=1)
myts <- ts(retail[,"A3349873A"],frequency=12, start=c(1982,4))
library(seasonal)
myts %>% seas(x11="") -> x11_fit
autoplot(x11_fit)+ggtitle("Retail X11 decomposition")
#The X11 decomposition shows a clear seasonal pattern and an upward trend. There are a few large spikes in the remainders, suggesting some outliers, particularily around 1988, 1994, and 2001. The spikes in the residuals were not noticed previously.
##4 #A. The data shows a clear upward trend. While there is also a seasonal component, the magnitude of the seasonal flucuations is minimal in relation to the base level of the data . Seasonal flucuations between -100 and 100, compared to data levels of between 7,000 and 9,000. #B. The recession is visible in the large negative spike in the remainders. This indicates there was a change in the overall level not attrituable to trend or seasonal components. The recession is an exogoneous shock.
##5
autoplot(cangas) + xlab("Month") + ylab("Gas Production") + ggtitle("Canadian Gas Production ")
ggsubseriesplot(cangas, main = NULL) + xlab("Month") + ylab("Gas Production")
ggseasonplot(cangas, main = NULL) + xlab("Month") + ylab("Gas Production")
#Overall the trend level of gas production was rising during peak periods. The changing seasonality was due to a greater magnitude of the seasonal effect in the 70s and 80s. That is, while peak production was increasing, the depth of the troughs in the 70s and 80s was deeper than in other time periods. The deepining trough led to a flat overall trend level from the mid-70s to the mid/late 80s.
cangas_stl <- stl(cangas, s.window = 13, robust = TRUE)
autoplot(cangas_stl) + xlab("Month") + ggtitle("STL Decomposition of Canadian Gas Production")
cangas_stl <- seas(cangas)
autoplot(cangas_stl) +xlab("Month")+ggtitle("Seats Decomposition of Canadian Gas Production")
cangas %>% seas(x11="") -> cangas_x11
autoplot(cangas_x11) +xlab("Month")+ggtitle("X11 Decomposition of Canadian Gas Production")
#The Stl decomposition does a better job representing the change in the magnitude of the seasonal trends, represented by a fatter mid-section (same for me :))in its seasonal output. This allows the decomposition to better model the data, represented by generally more stable remainders. The Seats and X11 decompositions struggle to deal with the increasing and then decreasing changes in seasonal magnitude. Because the models are multiplicative, it overestimates the seasonal effect before the increse in magnitude, and underestimates the seasonal effect after the decrease in magnitude. This is represeted by higher decomposed seasonality in the early part of the series, and lower decomposed seasonality lin the late part of the series,
##6.
bricksq_stl <- stl(bricksq, s.window="periodic", robust=TRUE)
autoplot(bricksq_stl)
bricksq_stl5 <- stl(bricksq, s.window=5, robust=TRUE)
autoplot(bricksq_stl5)
bricksq_stl3 <- stl(bricksq, s.window=3, robust=TRUE)
autoplot(bricksq_stl3)
#b.
bricksq_stl <- stl(bricksq, s.window="periodic", robust=FALSE)
brick_seas<- seasadj(bricksq_stl)
autoplot(brick_seas)
brick_naive <- naive(brick_seas)
autoplot(brick_naive)
brick_stlf <- stlf(brick_seas, method="naive")
forecast(brick_stlf, h=8)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 1994 Q4 465.3437 440.0423 490.6450 426.6486 504.0387
## 1995 Q1 463.5593 427.7777 499.3408 408.8362 518.2824
## 1995 Q2 472.8643 429.0410 516.6875 405.8424 539.8861
## 1995 Q3 469.4082 418.8055 520.0109 392.0180 546.7983
## 1995 Q4 465.3437 408.7681 521.9192 378.8188 551.8685
## 1996 Q1 463.5593 401.5839 525.5347 368.7761 558.3425
## 1996 Q2 472.8643 405.9232 539.8053 370.4867 575.2418
## 1996 Q3 469.4082 397.8451 540.9712 359.9620 578.8544
checkresiduals(brick_stlf)
## Warning in checkresiduals(brick_stlf): The fitted degrees of freedom is based on
## the model used for the seasonally adjusted data.
##
## Ljung-Box test
##
## data: Residuals from STL + Random walk
## Q* = 44.369, df = 8, p-value = 4.845e-07
##
## Model df: 0. Total lags used: 8
#Overall the residuals look uncorrelated. In reading the ACF plot there are 2 correlated errors, at 1 and 8 lags. These correlations are not so frequent of strong to dequalify the results.
bricksq_stlT <- stl(bricksq, s.window="periodic", robust=TRUE)
brick_seasT<- seasadj(bricksq_stlT)
autoplot(brick_seasT)
brick_stlfT <- stlf(brick_seasT, method="naive")
forecast(brick_stlfT, h=8)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 1994 Q4 465.6914 440.3901 490.9928 426.9964 504.3865
## 1995 Q1 459.3697 423.5882 495.1512 404.6466 514.0928
## 1995 Q2 474.3032 430.4800 518.1264 407.2814 541.3251
## 1995 Q3 471.8110 421.2083 522.4137 394.4208 549.2011
## 1995 Q4 465.6914 409.1159 522.2670 379.1666 552.2163
## 1996 Q1 459.3697 397.3943 521.3451 364.5865 554.1529
## 1996 Q2 474.3032 407.3621 541.2443 371.9257 576.6808
## 1996 Q3 471.8110 400.2479 543.3740 362.3648 581.2572
checkresiduals(brick_stlfT)
## Warning in checkresiduals(brick_stlfT): The fitted degrees of freedom is based
## on the model used for the seasonally adjusted data.
##
## Ljung-Box test
##
## data: Residuals from STL + Random walk
## Q* = 44.369, df = 8, p-value = 4.845e-07
##
## Model df: 0. Total lags used: 8
#Utilizing the robust STL decomposition does not appear to make much of a difference.
#g
b.train<- window(bricksq, end=c(1992,3))
b.test<- window(bricksq, start=c(1992,4), end=c(1994,3))
b.train.stl <- stlf(b.train)
b.train.stl.forecast <- forecast(b.train.stl, H=8)
b.train.snaive <- snaive(b.train)
b.train.snaive.forecast<- forecast(b.train.snaive)
autoplot(bricksq)+autolayer(b.train.stl.forecast, series="Stlf Forecast", PI= FALSE, size=1) + autolayer(b.train.snaive.forecast, series = "SNaive forecast", size=1, PI=FALSE)+ scale_x_continuous(limits = c(1990,1994.5))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
#The STLF forecast most closely follows the pattern of the data, and so is better.
##7.
hist(writing)
#Data not normally distributed, utilizing boxcox
writing.fit <- stlf(writing, s.window="periodic", robust=TRUE, lambda=BoxCox.lambda(writing), method="naive")
autoplot(writing.fit)
writing.fit2 <- stlf(writing, s.window="periodic", robust=TRUE, lambda=BoxCox.lambda(writing), method="rwdrift")
autoplot(writing.fit2)
##8
fancy.fit <- stlf(fancy, s.window="periodic", robust=TRUE, lambda=BoxCox.lambda(fancy), method="rwdrift")
autoplot(fancy.fit)