1

library(fpp2)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## -- Attaching packages ---------------------------------------------- fpp2 2.4 --

## v ggplot2   3.3.3     v fma       2.4  
## v forecast  8.15      v expsmooth 2.3

##

elecdaily<- elecdaily

daily20 <- head(elecdaily,20)

autoplot(daily20)

lm1 <- tslm(Demand~Temperature, data=daily20)
summary(lm1)

## 
## Call:
## tslm(formula = Demand ~ Temperature, data = daily20)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -46.060  -7.117  -1.437  17.484  27.102 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  39.2117    17.9915   2.179   0.0428 *  
## Temperature   6.7572     0.6114  11.052 1.88e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22 on 18 degrees of freedom
## Multiple R-squared:  0.8716, Adjusted R-squared:  0.8644 
## F-statistic: 122.1 on 1 and 18 DF,  p-value: 1.876e-09

There is a positive relationship between temperature and demand because when its hot people use air-conditioning and use more electricity.

checkresiduals(lm1)

## 
##  Breusch-Godfrey test for serial correlation of order up to 5
## 
## data:  Residuals from Linear regression model
## LM test = 3.8079, df = 5, p-value = 0.5774

#There seems to be a positive linear trend in the residuals. This indicates model is not fully accounting for trend data. The ACF shows no significant serial correlations.

forecast15<- forecast(lm1, newdata=data.frame(Temperature=15))
forecast15

##          Point Forecast   Lo 80    Hi 80    Lo 95    Hi 95
## 4.285714       140.5701 108.681 172.4591 90.21166 190.9285

forecast35<- forecast(lm1, newdata=data.frame(Temperature=35))
forecast35

##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 4.285714       275.7146 245.2278 306.2014 227.5706 323.8586

I believe these forecasts. When the temprature is 35c (95f), it is very hot out and people demand more energy for air conditioning. At 15c (59f) people use much less electricity because they don’t utilize air conditioners

autoplot(daily20, facets=TRUE)

daily20 %>%
  as.data.frame() %>%
  ggplot(aes(x=Temperature, y=Demand)) +
  geom_point() +
  geom_smooth(method="lm", se=FALSE)

## `geom_smooth()` using formula 'y ~ x'

fit <- tslm(Demand ~ Temperature, data=daily20)
checkresiduals(fit)

## 
##  Breusch-Godfrey test for serial correlation of order up to 5
## 
## data:  Residuals from Linear regression model
## LM test = 3.8079, df = 5, p-value = 0.5774

forecast(fit, newdata=data.frame(Temperature=c(15,35)))

##          Point Forecast    Lo 80    Hi 80     Lo 95    Hi 95
## 4.285714       140.5701 108.6810 172.4591  90.21166 190.9285
## 4.428571       275.7146 245.2278 306.2014 227.57056 323.8586

plot(Demand~Temperature, data=elecdaily, main= "Demand vs. Temperature for All Data")

#The plot shows a strong relationship between temperature and electricity demand, consistent with our regression model. It is not linear, and so would be better modeled by a nonlinear regression model.

##2

View(mens400)

autoplot(mens400, xlab="Year", ylab="Seconds", main="Winning Time by Year")

# Winning times have been consistently decreasing every year. There does appear to be a slightly flattening slope, suggesting a diminishing decrease in winning times. While it appears relatively linear, a non-linear model may offer slightly better performance.

df <- na.interp(mens400, lambda = NULL)
seconds <- time(df)

lm2 <- tslm(df~seconds, data = df)

autoplot(mens400, series = "Data") + 
  autolayer(fitted(lm2), series = "Fitted") + ggtitle({"Winning Times by Year"}) + xlab("Year") + ylab("Seconds")

summary(lm2)

## 
## Call:
## tslm(formula = df ~ seconds, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5622 -0.5877 -0.2707  0.5497  4.2157 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 171.697133  10.724592   16.01 6.19e-16 ***
## seconds      -0.064195   0.005482  -11.71 1.64e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.092 on 29 degrees of freedom
## Multiple R-squared:  0.8254, Adjusted R-squared:  0.8194 
## F-statistic: 137.1 on 1 and 29 DF,  p-value: 1.638e-12

#On average, the winning times have been decreasing by 6.4 seconds per year.

checkresiduals(lm2)

## 
##  Breusch-Godfrey test for serial correlation of order up to 6
## 
## data:  Residuals from Linear regression model
## LM test = 4.3582, df = 6, p-value = 0.6283

#The residuals are higher at the begining and end of the period, suggesting a non-linear regression model will better model the relationship.

t <- seq(2020,2020, 4)
forecast_mens<- forecast(lm2,newdata=data.frame(seconds))
forecast_mens

##      Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2020       49.98425 48.46665 51.50186 47.61750 52.35101
## 2024       49.72748 48.21779 51.23716 47.37307 52.08188
## 2028       49.47070 47.96843 50.97297 47.12785 51.81354
## 2032       49.21392 47.71855 50.70929 46.88183 51.54601
## 2036       48.95714 47.46814 50.44614 46.63499 51.27929
## 2040       48.70036 47.21721 50.18352 46.38733 51.01340
## 2044       48.44358 46.96574 49.92143 46.13883 50.74834
## 2048       48.18681 46.71372 49.65989 45.88948 50.48414
## 2052       47.93003 46.46116 49.39890 45.63927 50.22078
## 2056       47.67325 46.20805 49.13845 45.38821 49.95829
## 2060       47.41647 45.95438 48.87857 45.13628 49.69666
## 2064       47.15969 45.70015 48.61924 44.88347 49.43591
## 2068       46.90292 45.44535 48.36048 44.62979 49.17604
## 2072       46.64614 45.18999 48.10228 44.37522 48.91705
## 2076       46.38936 44.93407 47.84465 44.11978 48.65894
## 2080       46.13258 44.67757 47.58759 43.86344 48.40172
## 2084       45.87580 44.42051 47.33110 43.60622 48.14539
## 2088       45.61902 44.16288 47.07517 43.34811 47.88994
## 2092       45.36225 43.90468 46.81981 43.08912 47.63537
## 2096       45.10547 43.64592 46.56502 42.82925 47.38169
## 2100       44.84869 43.38659 46.31079 42.56850 47.12888
## 2104       44.59191 43.12671 46.05711 42.30687 46.87695
## 2108       44.33513 42.86627 45.80400 42.04438 46.62589
## 2112       44.07835 42.60527 45.55144 41.78103 46.37568
## 2116       43.82158 42.34373 45.29942 41.51682 46.12634
## 2120       43.56480 42.08164 45.04795 41.25176 45.87783
## 2124       43.30802 41.81902 44.79702 40.98587 45.63017
## 2128       43.05124 41.55587 44.54661 40.71916 45.38333
## 2132       42.79446 41.29220 44.29673 40.45162 45.13731
## 2136       42.53769 41.02800 44.04737 40.18328 44.89209
## 2140       42.28091 40.76330 43.79851 39.91415 44.64767

#The projected winning time is 42 seconds, with 80% confidence that time is between 40.49 and 43.55 and 95% confidence that winning time is between 39.64 and 44.4. This assumes that the linear model gives a strong approximation of the relationship between years and time.

##3

easter(ausbeer)

##      Qtr1 Qtr2 Qtr3 Qtr4
## 1956 0.67 0.33 0.00 0.00
## 1957 0.00 1.00 0.00 0.00
## 1958 0.00 1.00 0.00 0.00
## 1959 1.00 0.00 0.00 0.00
## 1960 0.00 1.00 0.00 0.00
## 1961 0.33 0.67 0.00 0.00
## 1962 0.00 1.00 0.00 0.00
## 1963 0.00 1.00 0.00 0.00
## 1964 1.00 0.00 0.00 0.00
## 1965 0.00 1.00 0.00 0.00
## 1966 0.00 1.00 0.00 0.00
## 1967 1.00 0.00 0.00 0.00
## 1968 0.00 1.00 0.00 0.00
## 1969 0.00 1.00 0.00 0.00
## 1970 1.00 0.00 0.00 0.00
## 1971 0.00 1.00 0.00 0.00
## 1972 0.33 0.67 0.00 0.00
## 1973 0.00 1.00 0.00 0.00
## 1974 0.00 1.00 0.00 0.00
## 1975 1.00 0.00 0.00 0.00
## 1976 0.00 1.00 0.00 0.00
## 1977 0.00 1.00 0.00 0.00
## 1978 1.00 0.00 0.00 0.00
## 1979 0.00 1.00 0.00 0.00
## 1980 0.00 1.00 0.00 0.00
## 1981 0.00 1.00 0.00 0.00
## 1982 0.00 1.00 0.00 0.00
## 1983 0.00 1.00 0.00 0.00
## 1984 0.00 1.00 0.00 0.00
## 1985 0.00 1.00 0.00 0.00
## 1986 1.00 0.00 0.00 0.00
## 1987 0.00 1.00 0.00 0.00
## 1988 0.00 1.00 0.00 0.00
## 1989 1.00 0.00 0.00 0.00
## 1990 0.00 1.00 0.00 0.00
## 1991 1.00 0.00 0.00 0.00
## 1992 0.00 1.00 0.00 0.00
## 1993 0.00 1.00 0.00 0.00
## 1994 0.00 1.00 0.00 0.00
## 1995 0.00 1.00 0.00 0.00
## 1996 0.00 1.00 0.00 0.00
## 1997 1.00 0.00 0.00 0.00
## 1998 0.00 1.00 0.00 0.00
## 1999 0.00 1.00 0.00 0.00
## 2000 0.00 1.00 0.00 0.00
## 2001 0.00 1.00 0.00 0.00
## 2002 1.00 0.00 0.00 0.00
## 2003 0.00 1.00 0.00 0.00
## 2004 0.00 1.00 0.00 0.00
## 2005 1.00 0.00 0.00 0.00
## 2006 0.00 1.00 0.00 0.00
## 2007 0.00 1.00 0.00 0.00
## 2008 1.00 0.00 0.00 0.00
## 2009 0.00 1.00 0.00 0.00
## 2010 0.00 1.00

##3. the easter(ausbeer) input shows the quarter that easter occurs in. A fraction represents an easter period that begins at the end of one quarter (Black Friday) and ends at the beginning of the next quarter(Easter Sunday)

4. y= 10^(B0+B1*log(x)+E) B1 is the slope of the curve at point x, it is the average percentage change in y resulting from a change in x.

5

autoplot(fancy, xlab= "Monthly Sales", ylab="Year")

#The data exhibits seasonal trends. There does not appear to be much of a secular trend until 1991, during and after which a strong positive secular trend is exhibited. Before 1991 there is little change in magnitude, after 1990 there are strongly increasing magnitudes of seasonal trend.

#It is necessary to take logarithims of the data to control for the exponential increase occuring after 1991.

fancy.log <- log(fancy)

fancy.dummy <- rep(0, length(fancy))
fancy.dummy[seq_along(fancy.dummy)%%12 == 3] <- 1
fancy.dummy[3]<- 0
fancy.dummy <- ts(fancy.dummy, freq=12)

fancy.full <- data.frame(fancy.log, fancy.dummy)

lm3<- tslm(fancy.log~ trend + season + fancy.dummy, data=fancy.full)

autoplot(fancy.log, series = "Data") + 
  autolayer(fitted(lm3), series = "Fitted") + ggtitle({"Monthly Sales for Surf Shop"}) + xlab("Monthly") + ylab("Sales")

plot(residuals(lm3))

#The plot demonstrates that the model is having trouble accounting for some aspect of the data. Secular trends don’t seem to be the problem as there are high residuals at the begining and end of the period. The model is most likely having trouble with seasonal trends.

boxplot(resid(lm3) ~ cycle(resid(lm3)))

# Boxplots show that there is an increase in residuals around and after the seasonal peak. This further supports the argument that the model struggles around and after the peak period.

summary(lm3)

## 
## Call:
## tslm(formula = fancy.log ~ trend + season + fancy.dummy, data = fancy.full)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.33673 -0.12757  0.00257  0.10911  0.37671 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 7.6196670  0.0742471 102.626  < 2e-16 ***
## trend       0.0220198  0.0008268  26.634  < 2e-16 ***
## season2     0.2514168  0.0956790   2.628 0.010555 *  
## season3     0.2660828  0.1934044   1.376 0.173275    
## season4     0.3840535  0.0957075   4.013 0.000148 ***
## season5     0.4094870  0.0957325   4.277 5.88e-05 ***
## season6     0.4488283  0.0957647   4.687 1.33e-05 ***
## season7     0.6104545  0.0958039   6.372 1.71e-08 ***
## season8     0.5879644  0.0958503   6.134 4.53e-08 ***
## season9     0.6693299  0.0959037   6.979 1.36e-09 ***
## season10    0.7473919  0.0959643   7.788 4.48e-11 ***
## season11    1.2067479  0.0960319  12.566  < 2e-16 ***
## season12    1.9622412  0.0961066  20.417  < 2e-16 ***
## fancy.dummy 0.5015151  0.1964273   2.553 0.012856 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.179 on 70 degrees of freedom
## Multiple R-squared:  0.9567, Adjusted R-squared:  0.9487 
## F-statistic:   119 on 13 and 70 DF,  p-value: < 2.2e-16

#Overall, the coefficients are significant. All coefficients are positive, which indicates a positive secular trend. The coefficients are also increasing in magnitude, showing that along with the positive secular trend, there is an increase in magnitude of the peak periods.

checkresiduals(lm3)

## 
##  Breusch-Godfrey test for serial correlation of order up to 17
## 
## data:  Residuals from Linear regression model
## LM test = 37.954, df = 17, p-value = 0.002494

The test statistic is signifcant and so heteroskedasticity is assumed. This means the values of the residuals are inconsistent across the data set, and that the model may not be accuratley representing the data.

fancy.dummy.pred <- data.frame(fancy.dummy = rep(0, 36))

fancy.predict <- forecast(lm3, newdata= fancy.dummy.pred)

autoplot(fancy.log, series = "Data") + 
  autolayer(fancy.predict, series = "Fitted") + ggtitle({"Monthly Sales for Surf Shop"}) + xlab("Monthly") + ylab("Sales")

summary(fancy.predict)

## 
## Forecast method: Linear regression model
## 
## Model Information:
## 
## Call:
## tslm(formula = fancy.log ~ trend + season + fancy.dummy, data = fancy.full)
## 
## Coefficients:
## (Intercept)        trend      season2      season3      season4      season5  
##     7.61967      0.02202      0.25142      0.26608      0.38405      0.40949  
##     season6      season7      season8      season9     season10     season11  
##     0.44883      0.61045      0.58796      0.66933      0.74739      1.20675  
##    season12  fancy.dummy  
##     1.96224      0.50152  
## 
## 
## Error measures:
##                        ME      RMSE     MAE        MPE     MAPE      MASE
## Training set 5.285437e-17 0.1633968 0.13406 -0.0299207 1.459825 0.4277115
##                   ACF1
## Training set 0.5407548
## 
## Forecasts:
##          Point Forecast     Lo 80     Hi 80     Lo 95    Hi 95
## Jan 1994       9.491352  9.238522  9.744183  9.101594  9.88111
## Feb 1994       9.764789  9.511959 10.017620  9.375031 10.15455
## Mar 1994       9.801475  9.461879 10.141071  9.277961 10.32499
## Apr 1994       9.941465  9.688635 10.194296  9.551707 10.33122
## May 1994       9.988919  9.736088 10.241749  9.599161 10.37868
## Jun 1994      10.050280  9.797449 10.303110  9.660522 10.44004
## Jul 1994      10.233926  9.981095 10.486756  9.844168 10.62368
## Aug 1994      10.233456  9.980625 10.486286  9.843698 10.62321
## Sep 1994      10.336841 10.084010 10.589671  9.947083 10.72660
## Oct 1994      10.436923 10.184092 10.689753 10.047165 10.82668
## Nov 1994      10.918299 10.665468 11.171129 10.528541 11.30806
## Dec 1994      11.695812 11.442981 11.948642 11.306054 12.08557
## Jan 1995       9.755590  9.499844 10.011336  9.361338 10.14984
## Feb 1995      10.029027  9.773281 10.284773  9.634775 10.42328
## Mar 1995      10.065713  9.722498 10.408928  9.536620 10.59481
## Apr 1995      10.205703  9.949957 10.461449  9.811451 10.59996
## May 1995      10.253157  9.997411 10.508903  9.858904 10.64741
## Jun 1995      10.314518 10.058772 10.570264  9.920265 10.70877
## Jul 1995      10.498164 10.242418 10.753910 10.103911 10.89242
## Aug 1995      10.497694 10.241948 10.753440 10.103441 10.89195
## Sep 1995      10.601079 10.345333 10.856825 10.206826 10.99533
## Oct 1995      10.701161 10.445415 10.956907 10.306908 11.09541
## Nov 1995      11.182537 10.926791 11.438282 10.788284 11.57679
## Dec 1995      11.960050 11.704304 12.215796 11.565797 12.35430
## Jan 1996      10.019828  9.760564 10.279093  9.620151 10.41951
## Feb 1996      10.293265 10.034000 10.552530  9.893588 10.69294
## Mar 1996      10.329951  9.982679 10.677222  9.794605 10.86530
## Apr 1996      10.469941 10.210677 10.729206 10.070264 10.86962
## May 1996      10.517395 10.258130 10.776659 10.117718 10.91707
## Jun 1996      10.578756 10.319491 10.838021 10.179079 10.97843
## Jul 1996      10.762402 10.503137 11.021667 10.362725 11.16208
## Aug 1996      10.761932 10.502667 11.021196 10.362254 11.16161
## Sep 1996      10.865317 10.606052 11.124582 10.465640 11.26499
## Oct 1996      10.965399 10.706134 11.224664 10.565722 11.36508
## Nov 1996      11.446774 11.187510 11.706039 11.047097 11.84645
## Dec 1996      12.224288 11.965023 12.483552 11.824611 12.62396

fancy.full.2 <- data.frame(fancy, fancy.dummy)

lm4<- tslm(fancy~ trend + season + fancy.dummy, data=fancy.full.2)

fancy.predict.2 <- forecast(lm4, newdata= fancy.dummy.pred)

autoplot(fancy, series = "Data") + 
  autolayer(fancy.predict.2, series = "Fitted") + ggtitle({"Monthly Sales for Surf Shop, raw data"}) + xlab("Monthly") + ylab("Sales")

summary(fancy.predict.2)

## 
## Forecast method: Linear regression model
## 
## Model Information:
## 
## Call:
## tslm(formula = fancy ~ trend + season + fancy.dummy, data = fancy.full.2)
## 
## Coefficients:
## (Intercept)        trend      season2      season3      season4      season5  
##     -6552.4        321.8        994.2       8427.6       1935.4       1615.6  
##     season6      season7      season8      season9     season10     season11  
##      2237.6       4165.6       4606.3       5392.9       6068.9      13961.8  
##    season12  fancy.dummy  
##     40092.5      -3752.9  
## 
## 
## Error measures:
##                         ME     RMSE      MAE      MPE     MAPE     MASE
## Training set -5.842945e-13 7661.599 4947.007 13.02307 52.44544 1.117872
##                  ACF1
## Training set 0.352124
## 
## Forecasts:
##          Point Forecast     Lo 80    Hi 80     Lo 95    Hi 95
## Jan 1994       20803.25  8948.158 32658.35  2527.692 39078.82
## Feb 1994       22119.27 10264.176 33974.37  3843.710 40394.84
## Mar 1994       29874.49 13951.003 45797.98  5327.182 54421.80
## Apr 1994       23704.12 11849.022 35559.22  5428.556 41979.68
## May 1994       23706.18 11851.081 35561.27  5430.614 41981.74
## Jun 1994       24649.98 12794.883 36505.08  6374.417 42925.54
## Jul 1994       26899.88 15044.786 38754.98  8624.320 45175.45
## Aug 1994       27662.41 15807.309 39517.50  9386.843 45937.97
## Sep 1994       28770.83 16915.731 40625.92 10495.264 47046.39
## Oct 1994       29768.64 17913.543 41623.74 11493.077 48044.20
## Nov 1994       37983.37 26128.275 49838.47 19707.809 56258.94
## Dec 1994       64435.94 52580.839 76291.03 46160.373 82711.50
## Jan 1995       24665.22 12673.420 36657.03  6178.917 43151.53
## Feb 1995       25981.24 13989.439 37973.05  7494.936 44467.55
## Mar 1995       33736.46 17643.268 49829.66  8927.539 58545.39
## Apr 1995       27566.09 15574.285 39557.89  9079.781 46052.40
## May 1995       27568.15 15576.343 39559.95  9081.840 46054.45
## Jun 1995       28511.95 16520.146 40503.75 10025.643 46998.26
## Jul 1995       30761.85 18770.049 42753.66 12275.546 49248.16
## Aug 1995       31524.38 19532.572 43516.18 13038.068 50010.68
## Sep 1995       32632.80 20640.993 44624.60 14146.490 51119.10
## Oct 1995       33630.61 21638.806 45622.41 15144.303 52116.92
## Nov 1995       41845.34 29853.538 53837.14 23359.034 60331.65
## Dec 1995       68297.91 56306.102 80289.71 49811.598 86784.21
## Jan 1996       28527.19 16370.391 40683.99  9786.528 47267.86
## Feb 1996       29843.21 17686.410 42000.01 11102.547 48583.88
## Mar 1996       37598.43 21315.043 53881.82 12496.309 62700.55
## Apr 1996       31428.06 19271.256 43584.86 12687.393 50168.72
## May 1996       31430.12 19273.314 43586.92 12689.451 50170.78
## Jun 1996       32373.92 20217.117 44530.72 13633.254 51114.58
## Jul 1996       34623.82 22467.020 46780.62 15883.157 53364.49
## Aug 1996       35386.34 23229.543 47543.15 16645.680 54127.01
## Sep 1996       36494.77 24337.964 48651.57 17754.101 55235.43
## Oct 1996       37492.58 25335.777 49649.38 18751.914 56233.24
## Nov 1996       45707.31 33550.508 57864.11 26966.646 64447.97
## Dec 1996       72159.87 60003.073 84316.68 53419.210 90900.54

The forecast with the transformed data more accurately procuses forcasts inclusive of the secular trend and the increasing magnutide of seasonal peaks. Data transformations are thus useful in modeling the data set and producing forecasts. Other transformations could be utilized to help reduce the hetroscadicity of the residuals.

##6

View(gasoline)

gas <- window(gasoline, end=2005)
autoplot(gas, ylab="Weekly Gas Supply")

fourier2 <- tslm(gas~trend+ fourier(gas, K=2))
fourier5<- tslm(gas ~ trend + fourier(gas, K=5))
fourier10<- tslm(gas ~ trend +fourier(gas, K=10))
fourier20<- tslm(gas ~ trend +fourier(gas, K=20))


autoplot(gas)+ autolayer(fourier2$fitted.values)

autoplot(gas) +autolayer(fourier5$fitted.values)

autoplot(gas) + autolayer(fourier10$fitted.values)

autoplot(gas) + autolayer(fourier20$fitted.values)

# The fitted values approximate the data well. Increasing the number of Fourier termsincreased the magnitude of the seasonal component for the fitted values.

#Selecting number of Fourier terms

list <- c()

for(j in 1:20){
  model<- tslm(gas~trend+ fourier(gas, K=j))
  model.aic <- AIC(model)
list[[j]] <- model.aic
}

print(list)

## [[1]]
## [1] 247.007
## 
## [[2]]
## [1] 241.1854
## 
## [[3]]
## [1] 197.2137
## 
## [[4]]
## [1] 196.1867
## 
## [[5]]
## [1] 187.0649
## 
## [[6]]
## [1] 157.5453
## 
## [[7]]
## [1] 146.7625
## 
## [[8]]
## [1] 146.6802
## 
## [[9]]
## [1] 148.0063
## 
## [[10]]
## [1] 145.2851
## 
## [[11]]
## [1] 145.2139
## 
## [[12]]
## [1] 141.0226
## 
## [[13]]
## [1] 144.6272
## 
## [[14]]
## [1] 146.0811
## 
## [[15]]
## [1] 150.0682
## 
## [[16]]
## [1] 151.4813
## 
## [[17]]
## [1] 152.4293
## 
## [[18]]
## [1] 145.6099
## 
## [[19]]
## [1] 148.8298
## 
## [[20]]
## [1] 152.2815

print("Fourier Term with lowest AIC")

## [1] "Fourier Term with lowest AIC"

which.min(list)

## [1] 12

fourier12<- tslm(gas ~ trend +fourier(gas, K=12))
checkresiduals(fourier12)

## 
##  Breusch-Godfrey test for serial correlation of order up to 104
## 
## data:  Residuals from Linear regression model
## LM test = 154.2, df = 104, p-value = 0.001021

fc <- forecast(fourier12, newdata=data.frame(fourier(gas,K= 12,h=52)))
autoplot(gas)+ autolayer(fc)

autoplot(gasoline) +autolayer(fc)

#The fitted values fit the actual data very well. The accurately model both the trand and seasonal components of the data.

##7

autoplot(huron)

lm.huron <- tslm(huron~trend)
summary(lm.huron)

## 
## Call:
## tslm(formula = huron ~ trend)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.50997 -0.72726  0.00083  0.74402  2.53565 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.202037   0.230111  44.335  < 2e-16 ***
## trend       -0.024201   0.004036  -5.996 3.55e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.13 on 96 degrees of freedom
## Multiple R-squared:  0.2725, Adjusted R-squared:  0.2649 
## F-statistic: 35.95 on 1 and 96 DF,  p-value: 3.545e-08

time.h <- time(huron)
knot.h <- 1915
ts.huron <- ts(pmax(0,time.h-knot.h), start=1875)
tslm.knot <- tslm(huron~time.h +ts.huron)
summary(tslm.knot)

## 
## Call:
## tslm(formula = huron ~ time.h + ts.huron)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.49626 -0.66240 -0.07139  0.85163  2.39222 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 132.90870   19.97687   6.653 1.82e-09 ***
## time.h       -0.06498    0.01051  -6.181 1.58e-08 ***
## ts.huron      0.06486    0.01563   4.150 7.26e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.045 on 95 degrees of freedom
## Multiple R-squared:  0.3841, Adjusted R-squared:  0.3711 
## F-statistic: 29.62 on 2 and 95 DF,  p-value: 1.004e-10

###Chapter 6

1. The notation 3x5 MA meane a 5-MA followed by a 3-MA. The values are obtained by taking an order 3 of the values of the order 5 moving average. This is expressed as (1/15)(x+2x+3x+3x+3x+2x+x)=.067x+.133x+.2x+.2x+.2x+.133x+.067x

#2. a.

rm(plastics)

## Warning in rm(plastics): object 'plastics' not found

autoplot(plastics)

#There appears to be both seasonal flucuations as well as a positive trend.

2.b.

plastic.decomp<- decompose(plastics, "multiplicative")
autoplot(plastic.decomp)

#The results support conclusions made in part A. There is a clear positive trend, as well as a clear seasonal component.

autoplot(plastics, series='Data')+autolayer(seasadj(plastic.decomp), series='Seasonally Adjusted')

plastics.df<-plastics
plastics.df[30] <-plastics.df[30]+500
plastic.decomp.df<- decompose(plastics.df, "multiplicative")

autoplot(plastics.df, series='Data')+autolayer(seasadj(plastic.decomp.df), series='Seasonally Adjusted')

#The outlier has a significant effect, disrupting the seasonal pattern and causing a spike in both the data and the seasonally adjusted decomposition.

#f. #No matter where in the dataset the outlier resides, the decomposition will always be sensitive to it.

##3.

library(readxl)

retail <- read_excel("C:\\\\Users\\\\cthom\\\\OneDrive\\\\Boston College\\\\Econometrics\\\\retail.xlsx", skip=1)
myts <- ts(retail[,"A3349873A"],frequency=12, start=c(1982,4))

library(seasonal)

myts %>% seas(x11="") -> x11_fit
autoplot(x11_fit)+ggtitle("Retail X11 decomposition")

#The X11 decomposition shows a clear seasonal pattern and an upward trend. There are a few large spikes in the remainders, suggesting some outliers, particularily around 1988, 1994, and 2001. The spikes in the residuals were not noticed previously.

##4 #A. The data shows a clear upward trend. While there is also a seasonal component, the magnitude of the seasonal flucuations is minimal in relation to the base level of the data . Seasonal flucuations between -100 and 100, compared to data levels of between 7,000 and 9,000. #B. The recession is visible in the large negative spike in the remainders. This indicates there was a change in the overall level not attrituable to trend or seasonal components. The recession is an exogoneous shock.

##5

autoplot(cangas) + xlab("Month") + ylab("Gas Production") + ggtitle("Canadian Gas Production ")

ggsubseriesplot(cangas, main = NULL) + xlab("Month") + ylab("Gas Production")

ggseasonplot(cangas, main = NULL) + xlab("Month") + ylab("Gas Production")

#Overall the trend level of gas production was rising during peak periods. The changing seasonality was due to a greater magnitude of the seasonal effect in the 70s and 80s. That is, while peak production was increasing, the depth of the troughs in the 70s and 80s was deeper than in other time periods. The deepining trough led to a flat overall trend level from the mid-70s to the mid/late 80s.

cangas_stl <- stl(cangas, s.window = 13, robust = TRUE)
autoplot(cangas_stl) + xlab("Month") + ggtitle("STL Decomposition of Canadian Gas Production")

cangas_stl <- seas(cangas)
autoplot(cangas_stl) +xlab("Month")+ggtitle("Seats Decomposition of Canadian Gas Production")

cangas %>% seas(x11="") -> cangas_x11
autoplot(cangas_x11) +xlab("Month")+ggtitle("X11 Decomposition of Canadian Gas Production")

#The Stl decomposition does a better job representing the change in the magnitude of the seasonal trends, represented by a fatter mid-section (same for me :))in its seasonal output. This allows the decomposition to better model the data, represented by generally more stable remainders. The Seats and X11 decompositions struggle to deal with the increasing and then decreasing changes in seasonal magnitude. Because the models are multiplicative, it overestimates the seasonal effect before the increse in magnitude, and underestimates the seasonal effect after the decrease in magnitude. This is represeted by higher decomposed seasonality in the early part of the series, and lower decomposed seasonality lin the late part of the series,

##6.

bricksq_stl <- stl(bricksq, s.window="periodic", robust=TRUE)
autoplot(bricksq_stl)

bricksq_stl5 <- stl(bricksq, s.window=5, robust=TRUE)
autoplot(bricksq_stl5)

bricksq_stl3 <- stl(bricksq, s.window=3, robust=TRUE)
autoplot(bricksq_stl3)

#b.

bricksq_stl <- stl(bricksq, s.window="periodic", robust=FALSE)
brick_seas<- seasadj(bricksq_stl)
autoplot(brick_seas)

brick_naive <- naive(brick_seas)
autoplot(brick_naive)

brick_stlf <- stlf(brick_seas, method="naive")
forecast(brick_stlf, h=8)

##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 1994 Q4       465.3437 440.0423 490.6450 426.6486 504.0387
## 1995 Q1       463.5593 427.7777 499.3408 408.8362 518.2824
## 1995 Q2       472.8643 429.0410 516.6875 405.8424 539.8861
## 1995 Q3       469.4082 418.8055 520.0109 392.0180 546.7983
## 1995 Q4       465.3437 408.7681 521.9192 378.8188 551.8685
## 1996 Q1       463.5593 401.5839 525.5347 368.7761 558.3425
## 1996 Q2       472.8643 405.9232 539.8053 370.4867 575.2418
## 1996 Q3       469.4082 397.8451 540.9712 359.9620 578.8544

checkresiduals(brick_stlf)

## Warning in checkresiduals(brick_stlf): The fitted degrees of freedom is based on
## the model used for the seasonally adjusted data.

## 
##  Ljung-Box test
## 
## data:  Residuals from STL +  Random walk
## Q* = 44.369, df = 8, p-value = 4.845e-07
## 
## Model df: 0.   Total lags used: 8

#Overall the residuals look uncorrelated. In reading the ACF plot there are 2 correlated errors, at 1 and 8 lags. These correlations are not so frequent of strong to dequalify the results.

bricksq_stlT <- stl(bricksq, s.window="periodic", robust=TRUE)
brick_seasT<- seasadj(bricksq_stlT)
autoplot(brick_seasT)

brick_stlfT <- stlf(brick_seasT, method="naive")
forecast(brick_stlfT, h=8)

##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 1994 Q4       465.6914 440.3901 490.9928 426.9964 504.3865
## 1995 Q1       459.3697 423.5882 495.1512 404.6466 514.0928
## 1995 Q2       474.3032 430.4800 518.1264 407.2814 541.3251
## 1995 Q3       471.8110 421.2083 522.4137 394.4208 549.2011
## 1995 Q4       465.6914 409.1159 522.2670 379.1666 552.2163
## 1996 Q1       459.3697 397.3943 521.3451 364.5865 554.1529
## 1996 Q2       474.3032 407.3621 541.2443 371.9257 576.6808
## 1996 Q3       471.8110 400.2479 543.3740 362.3648 581.2572

checkresiduals(brick_stlfT)

## Warning in checkresiduals(brick_stlfT): The fitted degrees of freedom is based
## on the model used for the seasonally adjusted data.

## 
##  Ljung-Box test
## 
## data:  Residuals from STL +  Random walk
## Q* = 44.369, df = 8, p-value = 4.845e-07
## 
## Model df: 0.   Total lags used: 8

#Utilizing the robust STL decomposition does not appear to make much of a difference.

b.train<- window(bricksq, end=c(1992,3))
b.test<- window(bricksq, start=c(1992,4), end=c(1994,3))

b.train.stl <- stlf(b.train)
b.train.stl.forecast <- forecast(b.train.stl, H=8)

b.train.snaive <- snaive(b.train)
b.train.snaive.forecast<- forecast(b.train.snaive)

autoplot(bricksq)+autolayer(b.train.stl.forecast, series="Stlf Forecast", PI= FALSE, size=1) + autolayer(b.train.snaive.forecast, series = "SNaive forecast", size=1, PI=FALSE)+ scale_x_continuous(limits = c(1990,1994.5))

## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.

#The STLF forecast most closely follows the pattern of the data, and so is better.

##7.

hist(writing)

#Data not normally distributed, utilizing boxcox

writing.fit <- stlf(writing, s.window="periodic", robust=TRUE, lambda=BoxCox.lambda(writing), method="naive")
autoplot(writing.fit)

writing.fit2 <- stlf(writing, s.window="periodic", robust=TRUE, lambda=BoxCox.lambda(writing), method="rwdrift")
autoplot(writing.fit2)

##8

fancy.fit <- stlf(fancy, s.window="periodic", robust=TRUE, lambda=BoxCox.lambda(fancy), method="rwdrift")
autoplot(fancy.fit)