Chapter 5

1.a

daily20 <- head(elecdaily,20)
elecdaily <- ts.union(aggregate(elecdemand[, "Demand"], nfrequency = 365, FUN = sum),aggregate(elecdemand[, !colnames(elecdemand) %in% c("Demand")], nfrequency = 365, FUN = mean))
colnames(elecdaily) <- colnames(elecdemand)
daily20 <- head(elecdaily, 20)
autoplot(daily20)

tslm_Dem_Temp <- tslm(Demand ~ Temperature, data = daily20)
tslm_Dem_Temp

## 
## Call:
## tslm(formula = Demand ~ Temperature, data = daily20)
## 
## Coefficients:
## (Intercept)  Temperature  
##      19.989        9.435

daily20 %>%
as.data.frame() %>%
ggplot(aes(x=Temperature, y=Demand)) +
ylab("Electricity Demand") +
xlab("Temperature") +
geom_point() +
geom_smooth(method="lm", se=FALSE)

## `geom_smooth()` using formula 'y ~ x'

1.b

checkresiduals(tslm_Dem_Temp$residuals)

## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

The model is adequate because the residuals aren’t correlated with each other. Also there is an outlier existed.

1.c

fc_Dem_Temp <- forecast(tslm_Dem_Temp,newdata=data.frame(Temperature=c(15,35)))
fc_Dem_Temp

##           Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2014.0548       161.5107 136.3111 186.7103 121.7161 201.3053
## 2014.0575       350.2061 323.3371 377.0751 307.7753 392.6370

I will believe these forecasts since the forecasts are close to the temperatures of the data.

1.d

80% interval

fc_Dem_Temp$upper[, 1]

## Time Series:
## Start = c(2014, 21) 
## End = c(2014, 22) 
## Frequency = 365 
## [1] 186.7103 377.0751

fc_Dem_Temp$lower[, 1]

## Time Series:
## Start = c(2014, 21) 
## End = c(2014, 22) 
## Frequency = 365 
## [1] 136.3111 323.3371

95% interval

fc_Dem_Temp$upper[, 2]

## Time Series:
## Start = c(2014, 21) 
## End = c(2014, 22) 
## Frequency = 365 
## [1] 201.3053 392.6370

fc_Dem_Temp$lower[, 2]

## Time Series:
## Start = c(2014, 21) 
## End = c(2014, 22) 
## Frequency = 365 
## [1] 121.7161 307.7753

1.e

elecdaily %>%
as.data.frame() %>%
ggplot(aes(x=Temperature, y=Demand)) + ylab("Electricity Demand") + xlab("Temperature") + geom_point() + geom_smooth(method="lm", se=FALSE)

## `geom_smooth()` using formula 'y ~ x'

This model seems to be based on a few data. It can forecast for like first 23 days well but after that it is way off the real data.

2.a

autoplot(mens400)

There is data missing.

2.b

time_mens400 <- time(mens400)
tslm_mens400 <- tslm(mens400 ~ time_mens400, data = mens400)
autoplot(mens400) +
geom_abline(slope = tslm_mens400$coefficients[2], intercept =        tslm_mens400$coefficients[1], colour = "blue")

tslm_mens400$coefficients[2]

## time_mens400 
##  -0.06457385

The winning times decrease at an averge rate of 0.06457385 sec per year.

2.c

cbind(Time = time_mens400, Residuals = tslm_mens400$residuals) %>%
as.data.frame() %>%
ggplot(aes(x = Time, y = Residuals)) + geom_point() +                ylab("Residuals of Regression Line(Unit:s)")

## Warning: Removed 3 rows containing missing values (geom_point).

The plot suggests that the regression model fitted the data in general.

2.d

lm_mens400 <- lm(mens400 ~ time_mens400, data = mens400, na.action = na.exclude)
fc_mens400 <- forecast(lm_mens400, newdata = data.frame(time_mens400 = 2020))
autoplot(mens400) +
autolayer(fc_mens400, PI = TRUE)

80% and 95% prediction intervals

fc_mens400$upper

##          [,1]     [,2]
## [1,] 43.63487 44.53176

fc_mens400$lower

##          [,1]     [,2]
## [1,] 40.44975 39.55286

Since it is calculated from the assumption from the normally distributed model’s residuals so maybe it is not correct.

help("ausbeer")

## starting httpd help server ... done

head(ausbeer)

##      Qtr1 Qtr2 Qtr3 Qtr4
## 1956  284  213  227  308
## 1957  262  228

str(ausbeer)

##  Time-Series [1:218] from 1956 to 2010: 284 213 227 308 262 228 236 320 272 233 ...

The data is a quarter data of Australian beer production with a total data points of 218

time(ausbeer)[c(1, length(ausbeer))]

## [1] 1956.00 2010.25

The start is the first quarter of 1956 and the end is the second quarter of 2010.

easter(ausbeer)

##      Qtr1 Qtr2 Qtr3 Qtr4
## 1956 0.67 0.33 0.00 0.00
## 1957 0.00 1.00 0.00 0.00
## 1958 0.00 1.00 0.00 0.00
## 1959 1.00 0.00 0.00 0.00
## 1960 0.00 1.00 0.00 0.00
## 1961 0.33 0.67 0.00 0.00
## 1962 0.00 1.00 0.00 0.00
## 1963 0.00 1.00 0.00 0.00
## 1964 1.00 0.00 0.00 0.00
## 1965 0.00 1.00 0.00 0.00
## 1966 0.00 1.00 0.00 0.00
## 1967 1.00 0.00 0.00 0.00
## 1968 0.00 1.00 0.00 0.00
## 1969 0.00 1.00 0.00 0.00
## 1970 1.00 0.00 0.00 0.00
## 1971 0.00 1.00 0.00 0.00
## 1972 0.33 0.67 0.00 0.00
## 1973 0.00 1.00 0.00 0.00
## 1974 0.00 1.00 0.00 0.00
## 1975 1.00 0.00 0.00 0.00
## 1976 0.00 1.00 0.00 0.00
## 1977 0.00 1.00 0.00 0.00
## 1978 1.00 0.00 0.00 0.00
## 1979 0.00 1.00 0.00 0.00
## 1980 0.00 1.00 0.00 0.00
## 1981 0.00 1.00 0.00 0.00
## 1982 0.00 1.00 0.00 0.00
## 1983 0.00 1.00 0.00 0.00
## 1984 0.00 1.00 0.00 0.00
## 1985 0.00 1.00 0.00 0.00
## 1986 1.00 0.00 0.00 0.00
## 1987 0.00 1.00 0.00 0.00
## 1988 0.00 1.00 0.00 0.00
## 1989 1.00 0.00 0.00 0.00
## 1990 0.00 1.00 0.00 0.00
## 1991 1.00 0.00 0.00 0.00
## 1992 0.00 1.00 0.00 0.00
## 1993 0.00 1.00 0.00 0.00
## 1994 0.00 1.00 0.00 0.00
## 1995 0.00 1.00 0.00 0.00
## 1996 0.00 1.00 0.00 0.00
## 1997 1.00 0.00 0.00 0.00
## 1998 0.00 1.00 0.00 0.00
## 1999 0.00 1.00 0.00 0.00
## 2000 0.00 1.00 0.00 0.00
## 2001 0.00 1.00 0.00 0.00
## 2002 1.00 0.00 0.00 0.00
## 2003 0.00 1.00 0.00 0.00
## 2004 0.00 1.00 0.00 0.00
## 2005 1.00 0.00 0.00 0.00
## 2006 0.00 1.00 0.00 0.00
## 2007 0.00 1.00 0.00 0.00
## 2008 1.00 0.00 0.00 0.00
## 2009 0.00 1.00 0.00 0.00
## 2010 0.00 1.00

There is a return of 1.00 and 0 in the given time period. If there is a holiday in the time period the return will be 1 and if there is not the return will be 0. Also if there is a part of the holiday within the time period the return will be a part of the return of 1.

4.log(y(x)) = b0 + b1log(x) + eps dy/dx * 1/y(x) = 0 + b1/x + 0 b1 = x/y(x) * dy/dx

5.a

autoplot(fancy)

head(fancy, 50)

##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 1987  1664.81  2397.53  2840.71  3547.29  3752.96  3714.74  4349.61  3566.34
## 1988  2499.81  5198.24  7225.14  4806.03  5900.88  4951.34  6179.12  4752.15
## 1989  4717.02  5702.63  9957.58  5304.78  6492.43  6630.80  7349.62  8176.62
## 1990  5921.10  5814.58 12421.25  6369.77  7609.12  7224.75  8121.22  7979.25
## 1991  4826.64  6470.23                                                      
##           Sep      Oct      Nov      Dec
## 1987  5021.82  6423.48  7600.60 19756.21
## 1988  5496.43  5835.10 12600.08 28541.72
## 1989  8573.17  9690.50 15151.84 34061.01
## 1990  8093.06  8476.70 17914.66 30114.41
## 1991

Sales generally increased from January to December and increased sharply in Decembers. Although Sales in Decembers increased every year but in 1990 sale decreased.

5.b Taking logarithms is to get an additive model and to stabilize the variance so it does not increase with the level of the series.

5.c

time_fancy <- time(fancy)
surfing_festival <- c()
for(i in 1:length(time_fancy)){month <- round(12*(time_fancy[i] - floor(time_fancy[i]))) + 1
year <- floor(time_fancy[i])
if(year >= 1988 & month == 3){
surfing_festival[i] <- 1}
else {surfing_festival[i] <- 0}}
tslm_log_fancy <- tslm(BoxCox(fancy, 0) ~ trend + season + surfing_festival)

5.d

autoplot(tslm_log_fancy$residuals)

cbind(Residuals = tslm_log_fancy$residuals, Fitted_values = tslm_log_fancy$fitted.values) %>%as.data.frame() %>%
ggplot(aes(x = Fitted_values, y = Residuals)) + geom_point()

The plot suggests that even in a long term there will still be heteroscedacity.

5.e

cbind.data.frame(Month = factor(month.abb[round(12*(time_fancy - floor(time_fancy)) + 1)],
labels = month.abb,
ordered = TRUE),
Residuals = tslm_log_fancy$residuals
) %>%
ggplot(aes(x = Month, y = Residuals)) + geom_boxplot()

## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.

The box plot shows wider variance than other months in August, September and October. There may be some data missing with in that time period.

5f.

summary(tslm_log_fancy)

## 
## Call:
## tslm(formula = BoxCox(fancy, 0) ~ trend + season + surfing_festival)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.33673 -0.12757  0.00257  0.10911  0.37671 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      7.6196670  0.0742471 102.626  < 2e-16 ***
## trend            0.0220198  0.0008268  26.634  < 2e-16 ***
## season2          0.2514168  0.0956790   2.628 0.010555 *  
## season3          0.2660828  0.1934044   1.376 0.173275    
## season4          0.3840535  0.0957075   4.013 0.000148 ***
## season5          0.4094870  0.0957325   4.277 5.88e-05 ***
## season6          0.4488283  0.0957647   4.687 1.33e-05 ***
## season7          0.6104545  0.0958039   6.372 1.71e-08 ***
## season8          0.5879644  0.0958503   6.134 4.53e-08 ***
## season9          0.6693299  0.0959037   6.979 1.36e-09 ***
## season10         0.7473919  0.0959643   7.788 4.48e-11 ***
## season11         1.2067479  0.0960319  12.566  < 2e-16 ***
## season12         1.9622412  0.0961066  20.417  < 2e-16 ***
## surfing_festival 0.5015151  0.1964273   2.553 0.012856 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.179 on 70 degrees of freedom
## Multiple R-squared:  0.9567, Adjusted R-squared:  0.9487 
## F-statistic:   119 on 13 and 70 DF,  p-value: < 2.2e-16

Every January has more sale than the previous January. The distribution of the residuals was unsymmetrical in some months. And for some months, the median of the residuals wasn’t 0.

5.g

checkresiduals(tslm_log_fancy)

## 
##  Breusch-Godfrey test for serial correlation of order up to 17
## 
## data:  Residuals from Linear regression model
## LM test = 37.954, df = 17, p-value = 0.002494

P-value is less than 0.05 therefore residuals can be correlated.

5.h

future_fancy <- rep(0, 36)
for(i in 1:36){if(i %% 12 == 3){future_fancy[i] <- 1}}
future_fancy <- ts(data = future_fancy, start = 1994, end = c(1996, 12),frequency = 12)
fc_tslm_log_fancy <- forecast(tslm_log_fancy,newdata = data.frame(Time = time(future_fancy),surfing_festival = future_fancy))
autoplot(fc_tslm_log_fancy)

fc_tslm_log_fancy$lower

##               [,1]      [,2]
## Jan 1994  9.238522  9.101594
## Feb 1994  9.511959  9.375031
## Mar 1994 10.048860  9.911228
## Apr 1994  9.688635  9.551707
## May 1994  9.736088  9.599161
## Jun 1994  9.797449  9.660522
## Jul 1994  9.981095  9.844168
## Aug 1994  9.980625  9.843698
## Sep 1994 10.084010  9.947083
## Oct 1994 10.184092 10.047165
## Nov 1994 10.665468 10.528541
## Dec 1994 11.442981 11.306054
## Jan 1995  9.499844  9.361338
## Feb 1995  9.773281  9.634775
## Mar 1995 10.310518 10.171489
## Apr 1995  9.949957  9.811451
## May 1995  9.997411  9.858904
## Jun 1995 10.058772  9.920265
## Jul 1995 10.242418 10.103911
## Aug 1995 10.241948 10.103441
## Sep 1995 10.345333 10.206826
## Oct 1995 10.445415 10.306908
## Nov 1995 10.926791 10.788284
## Dec 1995 11.704304 11.565797
## Jan 1996  9.760564  9.620151
## Feb 1996 10.034000  9.893588
## Mar 1996 10.571566 10.430810
## Apr 1996 10.210677 10.070264
## May 1996 10.258130 10.117718
## Jun 1996 10.319491 10.179079
## Jul 1996 10.503137 10.362725
## Aug 1996 10.502667 10.362254
## Sep 1996 10.606052 10.465640
## Oct 1996 10.706134 10.565722
## Nov 1996 11.187510 11.047097
## Dec 1996 11.965023 11.824611

fc_tslm_log_fancy$upper

##               [,1]     [,2]
## Jan 1994  9.744183  9.88111
## Feb 1994 10.017620 10.15455
## Mar 1994 10.557120 10.69475
## Apr 1994 10.194296 10.33122
## May 1994 10.241749 10.37868
## Jun 1994 10.303110 10.44004
## Jul 1994 10.486756 10.62368
## Aug 1994 10.486286 10.62321
## Sep 1994 10.589671 10.72660
## Oct 1994 10.689753 10.82668
## Nov 1994 11.171129 11.30806
## Dec 1994 11.948642 12.08557
## Jan 1995 10.011336 10.14984
## Feb 1995 10.284773 10.42328
## Mar 1995 10.823938 10.96297
## Apr 1995 10.461449 10.59996
## May 1995 10.508903 10.64741
## Jun 1995 10.570264 10.70877
## Jul 1995 10.753910 10.89242
## Aug 1995 10.753440 10.89195
## Sep 1995 10.856825 10.99533
## Oct 1995 10.956907 11.09541
## Nov 1995 11.438282 11.57679
## Dec 1995 12.215796 12.35430
## Jan 1996 10.279093 10.41951
## Feb 1996 10.552530 10.69294
## Mar 1996 11.091365 11.23212
## Apr 1996 10.729206 10.86962
## May 1996 10.776659 10.91707
## Jun 1996 10.838021 10.97843
## Jul 1996 11.021667 11.16208
## Aug 1996 11.021196 11.16161
## Sep 1996 11.124582 11.26499
## Oct 1996 11.224664 11.36508
## Nov 1996 11.706039 11.84645
## Dec 1996 12.483552 12.62396

5.i

fc_tslm_fancy <- fc_tslm_log_fancy
members_inv.log <- c('x', 'mean', 'lower', 'upper', 'residuals', 'fitted')
fc_tslm_fancy[members_inv.log] <- lapply(fc_tslm_log_fancy[members_inv.log], InvBoxCox, lambda = 0)
fc_tslm_fancy[['model']][['model']][1] <- lapply(fc_tslm_log_fancy[['model']][['model']][1], InvBoxCox, lambda = 0)
autoplot(fc_tslm_fancy)

fc_tslm_fancy$upper

##               [,1]      [,2]
## Jan 1994  17054.73  19557.43
## Feb 1994  22418.00  25707.73
## Mar 1994  38450.24  44123.68
## Apr 1994  26750.16  30675.62
## May 1994  28050.15  32166.37
## Jun 1994  29825.24  34201.95
## Jul 1994  35837.72  41096.73
## Aug 1994  35820.87  41077.41
## Sep 1994  39722.43  45551.50
## Oct 1994  43903.67  50346.32
## Nov 1994  71049.28  81475.41
## Dec 1994 154607.08 177294.90
## Jan 1995  22277.59  25587.08
## Feb 1995  29283.31  33633.55
## Mar 1995  50208.44  57697.39
## Apr 1995  34942.16  40133.06
## May 1995  36640.25  42083.42
## Jun 1995  38958.95  44746.58
## Jul 1995  46812.70  53767.06
## Aug 1995  46790.69  53741.78
## Sep 1995  51887.06  59595.26
## Oct 1995  57348.77  65868.34
## Nov 1995  92807.48 106594.69
## Dec 1995 201954.08 231955.81
## Jan 1996  29117.46  33506.86
## Feb 1996  38274.14  44043.89
## Mar 1996  65602.25  75517.62
## Apr 1996  45670.42  52555.15
## May 1996  47889.88  55109.18
## Jun 1996  50920.48  58596.65
## Jul 1996  61185.57  70409.18
## Aug 1996  61156.80  70376.07
## Sep 1996  67817.91  78041.33
## Oct 1996  74956.52  86256.08
## Nov 1996 121302.09 139588.15
## Dec 1996 263959.89 303751.35

fc_tslm_fancy$lower

##               [,1]       [,2]
## Jan 1994  10285.82   8969.583
## Feb 1994  13520.45  11790.284
## Mar 1994  23129.40  20155.412
## Apr 1994  16133.21  14068.696
## May 1994  16917.24  14752.395
## Jun 1994  17987.81  15685.969
## Jul 1994  21613.98  18848.111
## Aug 1994  21603.82  18839.249
## Sep 1994  23956.87  20891.193
## Oct 1994  26478.61  23090.230
## Nov 1994  42850.31  37366.903
## Dec 1994  93244.59  81312.400
## Jan 1995  13357.65  11629.938
## Feb 1995  17558.28  15287.252
## Mar 1995  30046.98  26146.972
## Apr 1995  20951.33  18241.435
## May 1995  21969.51  19127.918
## Jun 1995  23359.80  20338.387
## Jul 1995  28068.91  24438.412
## Aug 1995  28055.72  24426.922
## Sep 1995  31111.50  27087.467
## Oct 1995  34386.35  29938.733
## Nov 1995  55647.40  48449.831
## Dec 1995 121091.75 105429.448
## Jan 1996  17336.40  15065.329
## Feb 1996  22788.25  19802.984
## Mar 1996  39009.73  33887.802
## Apr 1996  27191.96  23629.808
## May 1996  28513.41  24778.151
## Jun 1996  30317.82  26346.183
## Jul 1996  36429.60  31657.322
## Aug 1996  36412.48  31642.439
## Sep 1996  40378.47  35088.887
## Oct 1996  44628.77  38782.394
## Nov 1996  72222.70  62761.521
## Dec 1996 157160.50 136572.460

5.j

tslm_fancy <- tslm(fancy ~ trend + season + surfing_festival)
fc_tslm_fancy2 <- forecast(tslm_fancy, newdata = data.frame(Time = time(future_fancy), surfing_festival = future_fancy))
autoplot(fc_tslm_fancy2)

6.a

str(gasoline)

##  Time-Series [1:1355] from 1991 to 2017: 6.62 6.43 6.58 7.22 6.88 ...

head(gasoline)

## Time Series:
## Start = 1991.1 
## End = 1991.19582477755 
## Frequency = 52.1785714285714 
## [1] 6.621 6.433 6.582 7.224 6.875 6.947

gasoline_2004 <- window(gasoline, end = 2005)
autoplot(gasoline_2004, xlab = "Year")

Ts linear model

for(num in c(1, 2, 3, 5, 10, 20)){
var_name <- paste("fit",as.character(num), "gasoline_2004", sep = "")
assign(var_name, tslm(gasoline_2004 ~ trend + fourier(gasoline_2004, K = num)))
print(autoplot(gasoline_2004) + autolayer(get(var_name)$fitted.values, series = as.character(num)) + ggtitle(var_name) + ylab("gasoline") + guides(colour = guide_legend(title = "Number of Fourier Transform pairs")))}

autoplot(gasoline_2004) +
autolayer(fit1gasoline_2004$fitted.values, series = "1") +
autolayer(fit5gasoline_2004$fitted.values, series = "2") +
autolayer(fit10gasoline_2004$fitted.values, series = "3") +
autolayer(fit10gasoline_2004$fitted.values, series = "5") +
autolayer(fit20gasoline_2004$fitted.values, series = "10") +
autolayer(fit20gasoline_2004$fitted.values, series = "20") +
guides(colour = guide_legend(title = "Fourier Transform pairs")) +
scale_color_discrete(breaks = c(1, 2, 3, 5, 10, 20))

With more Fourier pairs used, the line plotted will be more fitted with the original data.

6.b

library(psych)

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

gasoline_2004 <- window(gasoline, end=2005)
autoplot(gasoline_2004, main="Quarterly retail index in Europe", xlab="Year", ylab="Thousand barrels per day")

for(num in c(1, 2, 3, 5, 10, 20)){var_name <- paste("fit",as.character(num),"gasoline_2004",sep = "")
assign(var_name, tslm(gasoline_2004 ~ trend + fourier(gasoline_2004, K = num)))
print(autoplot(gasoline_2004) +autolayer(get(var_name)$fitted.values,series = as.character(num)) + ggtitle(var_name) + ylab("gasoline") + guides(colour = guide_legend(title = "Number of Fourier Transform pairs")))}

autoplot(gasoline_2004) +
autolayer(fit1gasoline_2004$fitted.values, series = "1") +
autolayer(fit5gasoline_2004$fitted.values, series = "2") +
autolayer(fit10gasoline_2004$fitted.values, series = "3") +
autolayer(fit10gasoline_2004$fitted.values, series = "5") +
autolayer(fit20gasoline_2004$fitted.values, series = "10") +
autolayer(fit20gasoline_2004$fitted.values, series = "20") +
guides(colour = guide_legend(title = "Fourier Transform pairs")) + scale_color_discrete(breaks = c(1, 2, 3, 5, 10, 20))

for(i in c(1, 2, 3, 5, 10, 20)){
  fit_gasoline_2004.name <- paste("fit", as.character(i), "gasoline_2004",sep = "")
  writeLines(paste("\n", fit_gasoline_2004.name, "\n"))
  print(CV(get(fit_gasoline_2004.name)))}

## 
##  fit1gasoline_2004 
## 
##            CV           AIC          AICc           BIC         AdjR2 
##  8.201921e-02 -1.813292e+03 -1.813208e+03 -1.790354e+03  8.250858e-01 
## 
##  fit2gasoline_2004 
## 
##            CV           AIC          AICc           BIC         AdjR2 
##  8.136854e-02 -1.819113e+03 -1.818957e+03 -1.787001e+03  8.269569e-01 
## 
##  fit3gasoline_2004 
## 
##            CV           AIC          AICc           BIC         AdjR2 
##  7.658846e-02 -1.863085e+03 -1.862834e+03 -1.821797e+03  8.375702e-01 
## 
##  fit5gasoline_2004 
## 
##            CV           AIC          AICc           BIC         AdjR2 
##  7.553646e-02 -1.873234e+03 -1.872723e+03 -1.813596e+03  8.406928e-01 
## 
##  fit10gasoline_2004 
## 
##            CV           AIC          AICc           BIC         AdjR2 
##  7.135754e-02 -1.915014e+03 -1.913441e+03 -1.809500e+03  8.516102e-01 
## 
##  fit20gasoline_2004 
## 
##            CV           AIC          AICc           BIC         AdjR2 
##  7.223834e-02 -1.908017e+03 -1.902469e+03 -1.710753e+03  8.540588e-01

fc_gasoline_2005 <- forecast(fit10gasoline_2004, newdata=data.frame(fourier(gasoline_2004, K = 10, h = 52)))
autoplot(fc_gasoline_2005) + autolayer(window(gasoline, start = 2004, end = 2006)) +scale_x_continuous(limits = c(2004, 2006))

## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.

## Warning: Removed 674 row(s) containing missing values (geom_path).

6.c

checkresiduals(fit10gasoline_2004)

## 
##  Breusch-Godfrey test for serial correlation of order up to 104
## 
## data:  Residuals from Linear regression model
## LM test = 155.45, df = 104, p-value = 0.0008135

6.d

fc_gasoline_2005 <- forecast(fit10gasoline_2004,newdata=data.frame(fourier(gasoline_2004, K = 10, h = 52)))

6.e

autoplot(fc_gasoline_2005) +autolayer(window(gasoline, start = 2004, end = 2006)) + scale_x_continuous(limits = c(2004, 2006))

## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.

## Warning: Removed 674 row(s) containing missing values (geom_path).

7.a

autoplot(huron)

str(huron)

##  Time-Series [1:98] from 1875 to 1972: 10.38 11.86 10.97 10.8 9.79 ...

head(huron)

## Time Series:
## Start = 1875 
## End = 1880 
## Frequency = 1 
## [1] 10.38 11.86 10.97 10.80  9.79 10.39

There seems to be a down trend from 1880 to 1920.

7.b

tslm_huron <- tslm(huron ~ trend)
t <- time(huron)
t.break <- 1915
t_piece <- ts(pmax(0,t-t.break), start=1875)
pw_huron <- tslm(huron ~ t + t_piece)
summary(pw_huron)

## 
## Call:
## tslm(formula = huron ~ t + t_piece)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.49626 -0.66240 -0.07139  0.85163  2.39222 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 132.90870   19.97687   6.653 1.82e-09 ***
## t            -0.06498    0.01051  -6.181 1.58e-08 ***
## t_piece       0.06486    0.01563   4.150 7.26e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.045 on 95 degrees of freedom
## Multiple R-squared:  0.3841, Adjusted R-squared:  0.3711 
## F-statistic: 29.62 on 2 and 95 DF,  p-value: 1.004e-10

7.c

h=8
fc_tslm_huron <- forecast(tslm_huron, h=h)
t.new <- t[length(t)] + seq(h)
t_piece_new <- t_piece[length(t_piece)]+seq(h)
newdata <- cbind(t=t.new, t_piece=t_piece_new) %>%as.data.frame()
fc_pw_huron <- forecast(pw_huron,newdata = newdata)
autoplot(huron) + autolayer(fitted(tslm_huron), series = "Linear") + autolayer(fc_tslm_huron, series = "Linear")

autoplot(huron) + autolayer(fitted(pw_huron), series = "Piecewise") + autolayer(fc_pw_huron, series="Piecewise")

Tslm model follows the trend. Pw models can show the trend easily.

Chapter 5

Yifeng Qi

2020/11/7