1.a
daily20 <- head(elecdaily,20)
elecdaily <- ts.union(aggregate(elecdemand[, "Demand"], nfrequency = 365, FUN = sum),aggregate(elecdemand[, !colnames(elecdemand) %in% c("Demand")], nfrequency = 365, FUN = mean))
colnames(elecdaily) <- colnames(elecdemand)
daily20 <- head(elecdaily, 20)
autoplot(daily20)
tslm_Dem_Temp <- tslm(Demand ~ Temperature, data = daily20)
tslm_Dem_Temp
##
## Call:
## tslm(formula = Demand ~ Temperature, data = daily20)
##
## Coefficients:
## (Intercept) Temperature
## 19.989 9.435
daily20 %>%
as.data.frame() %>%
ggplot(aes(x=Temperature, y=Demand)) +
ylab("Electricity Demand") +
xlab("Temperature") +
geom_point() +
geom_smooth(method="lm", se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
1.b
checkresiduals(tslm_Dem_Temp$residuals)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.
The model is adequate because the residuals aren’t correlated with each other. Also there is an outlier existed.
1.c
fc_Dem_Temp <- forecast(tslm_Dem_Temp,newdata=data.frame(Temperature=c(15,35)))
fc_Dem_Temp
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2014.0548 161.5107 136.3111 186.7103 121.7161 201.3053
## 2014.0575 350.2061 323.3371 377.0751 307.7753 392.6370
I will believe these forecasts since the forecasts are close to the temperatures of the data.
1.d
80% interval
fc_Dem_Temp$upper[, 1]
## Time Series:
## Start = c(2014, 21)
## End = c(2014, 22)
## Frequency = 365
## [1] 186.7103 377.0751
fc_Dem_Temp$lower[, 1]
## Time Series:
## Start = c(2014, 21)
## End = c(2014, 22)
## Frequency = 365
## [1] 136.3111 323.3371
95% interval
fc_Dem_Temp$upper[, 2]
## Time Series:
## Start = c(2014, 21)
## End = c(2014, 22)
## Frequency = 365
## [1] 201.3053 392.6370
fc_Dem_Temp$lower[, 2]
## Time Series:
## Start = c(2014, 21)
## End = c(2014, 22)
## Frequency = 365
## [1] 121.7161 307.7753
1.e
elecdaily %>%
as.data.frame() %>%
ggplot(aes(x=Temperature, y=Demand)) + ylab("Electricity Demand") + xlab("Temperature") + geom_point() + geom_smooth(method="lm", se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
This model seems to be based on a few data. It can forecast for like first 23 days well but after that it is way off the real data.
2.a
autoplot(mens400)
There is data missing.
2.b
time_mens400 <- time(mens400)
tslm_mens400 <- tslm(mens400 ~ time_mens400, data = mens400)
autoplot(mens400) +
geom_abline(slope = tslm_mens400$coefficients[2], intercept = tslm_mens400$coefficients[1], colour = "blue")
tslm_mens400$coefficients[2]
## time_mens400
## -0.06457385
The winning times decrease at an averge rate of 0.06457385 sec per year.
2.c
cbind(Time = time_mens400, Residuals = tslm_mens400$residuals) %>%
as.data.frame() %>%
ggplot(aes(x = Time, y = Residuals)) + geom_point() + ylab("Residuals of Regression Line(Unit:s)")
## Warning: Removed 3 rows containing missing values (geom_point).
The plot suggests that the regression model fitted the data in general.
2.d
lm_mens400 <- lm(mens400 ~ time_mens400, data = mens400, na.action = na.exclude)
fc_mens400 <- forecast(lm_mens400, newdata = data.frame(time_mens400 = 2020))
autoplot(mens400) +
autolayer(fc_mens400, PI = TRUE)
80% and 95% prediction intervals
fc_mens400$upper
## [,1] [,2]
## [1,] 43.63487 44.53176
fc_mens400$lower
## [,1] [,2]
## [1,] 40.44975 39.55286
Since it is calculated from the assumption from the normally distributed model’s residuals so maybe it is not correct.
help("ausbeer")
## starting httpd help server ... done
head(ausbeer)
## Qtr1 Qtr2 Qtr3 Qtr4
## 1956 284 213 227 308
## 1957 262 228
str(ausbeer)
## Time-Series [1:218] from 1956 to 2010: 284 213 227 308 262 228 236 320 272 233 ...
The data is a quarter data of Australian beer production with a total data points of 218
time(ausbeer)[c(1, length(ausbeer))]
## [1] 1956.00 2010.25
The start is the first quarter of 1956 and the end is the second quarter of 2010.
easter(ausbeer)
## Qtr1 Qtr2 Qtr3 Qtr4
## 1956 0.67 0.33 0.00 0.00
## 1957 0.00 1.00 0.00 0.00
## 1958 0.00 1.00 0.00 0.00
## 1959 1.00 0.00 0.00 0.00
## 1960 0.00 1.00 0.00 0.00
## 1961 0.33 0.67 0.00 0.00
## 1962 0.00 1.00 0.00 0.00
## 1963 0.00 1.00 0.00 0.00
## 1964 1.00 0.00 0.00 0.00
## 1965 0.00 1.00 0.00 0.00
## 1966 0.00 1.00 0.00 0.00
## 1967 1.00 0.00 0.00 0.00
## 1968 0.00 1.00 0.00 0.00
## 1969 0.00 1.00 0.00 0.00
## 1970 1.00 0.00 0.00 0.00
## 1971 0.00 1.00 0.00 0.00
## 1972 0.33 0.67 0.00 0.00
## 1973 0.00 1.00 0.00 0.00
## 1974 0.00 1.00 0.00 0.00
## 1975 1.00 0.00 0.00 0.00
## 1976 0.00 1.00 0.00 0.00
## 1977 0.00 1.00 0.00 0.00
## 1978 1.00 0.00 0.00 0.00
## 1979 0.00 1.00 0.00 0.00
## 1980 0.00 1.00 0.00 0.00
## 1981 0.00 1.00 0.00 0.00
## 1982 0.00 1.00 0.00 0.00
## 1983 0.00 1.00 0.00 0.00
## 1984 0.00 1.00 0.00 0.00
## 1985 0.00 1.00 0.00 0.00
## 1986 1.00 0.00 0.00 0.00
## 1987 0.00 1.00 0.00 0.00
## 1988 0.00 1.00 0.00 0.00
## 1989 1.00 0.00 0.00 0.00
## 1990 0.00 1.00 0.00 0.00
## 1991 1.00 0.00 0.00 0.00
## 1992 0.00 1.00 0.00 0.00
## 1993 0.00 1.00 0.00 0.00
## 1994 0.00 1.00 0.00 0.00
## 1995 0.00 1.00 0.00 0.00
## 1996 0.00 1.00 0.00 0.00
## 1997 1.00 0.00 0.00 0.00
## 1998 0.00 1.00 0.00 0.00
## 1999 0.00 1.00 0.00 0.00
## 2000 0.00 1.00 0.00 0.00
## 2001 0.00 1.00 0.00 0.00
## 2002 1.00 0.00 0.00 0.00
## 2003 0.00 1.00 0.00 0.00
## 2004 0.00 1.00 0.00 0.00
## 2005 1.00 0.00 0.00 0.00
## 2006 0.00 1.00 0.00 0.00
## 2007 0.00 1.00 0.00 0.00
## 2008 1.00 0.00 0.00 0.00
## 2009 0.00 1.00 0.00 0.00
## 2010 0.00 1.00
There is a return of 1.00 and 0 in the given time period. If there is a holiday in the time period the return will be 1 and if there is not the return will be 0. Also if there is a part of the holiday within the time period the return will be a part of the return of 1.
4.log(y(x)) = b0 + b1log(x) + eps dy/dx * 1/y(x) = 0 + b1/x + 0 b1 = x/y(x) * dy/dx
5.a
autoplot(fancy)
head(fancy, 50)
## Jan Feb Mar Apr May Jun Jul Aug
## 1987 1664.81 2397.53 2840.71 3547.29 3752.96 3714.74 4349.61 3566.34
## 1988 2499.81 5198.24 7225.14 4806.03 5900.88 4951.34 6179.12 4752.15
## 1989 4717.02 5702.63 9957.58 5304.78 6492.43 6630.80 7349.62 8176.62
## 1990 5921.10 5814.58 12421.25 6369.77 7609.12 7224.75 8121.22 7979.25
## 1991 4826.64 6470.23
## Sep Oct Nov Dec
## 1987 5021.82 6423.48 7600.60 19756.21
## 1988 5496.43 5835.10 12600.08 28541.72
## 1989 8573.17 9690.50 15151.84 34061.01
## 1990 8093.06 8476.70 17914.66 30114.41
## 1991
Sales generally increased from January to December and increased sharply in Decembers. Although Sales in Decembers increased every year but in 1990 sale decreased.
5.b Taking logarithms is to get an additive model and to stabilize the variance so it does not increase with the level of the series.
5.c
time_fancy <- time(fancy)
surfing_festival <- c()
for(i in 1:length(time_fancy)){month <- round(12*(time_fancy[i] - floor(time_fancy[i]))) + 1
year <- floor(time_fancy[i])
if(year >= 1988 & month == 3){
surfing_festival[i] <- 1}
else {surfing_festival[i] <- 0}}
tslm_log_fancy <- tslm(BoxCox(fancy, 0) ~ trend + season + surfing_festival)
5.d
autoplot(tslm_log_fancy$residuals)
cbind(Residuals = tslm_log_fancy$residuals, Fitted_values = tslm_log_fancy$fitted.values) %>%as.data.frame() %>%
ggplot(aes(x = Fitted_values, y = Residuals)) + geom_point()
The plot suggests that even in a long term there will still be heteroscedacity.
5.e
cbind.data.frame(Month = factor(month.abb[round(12*(time_fancy - floor(time_fancy)) + 1)],
labels = month.abb,
ordered = TRUE),
Residuals = tslm_log_fancy$residuals
) %>%
ggplot(aes(x = Month, y = Residuals)) + geom_boxplot()
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
The box plot shows wider variance than other months in August, September and October. There may be some data missing with in that time period.
5f.
summary(tslm_log_fancy)
##
## Call:
## tslm(formula = BoxCox(fancy, 0) ~ trend + season + surfing_festival)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.33673 -0.12757 0.00257 0.10911 0.37671
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.6196670 0.0742471 102.626 < 2e-16 ***
## trend 0.0220198 0.0008268 26.634 < 2e-16 ***
## season2 0.2514168 0.0956790 2.628 0.010555 *
## season3 0.2660828 0.1934044 1.376 0.173275
## season4 0.3840535 0.0957075 4.013 0.000148 ***
## season5 0.4094870 0.0957325 4.277 5.88e-05 ***
## season6 0.4488283 0.0957647 4.687 1.33e-05 ***
## season7 0.6104545 0.0958039 6.372 1.71e-08 ***
## season8 0.5879644 0.0958503 6.134 4.53e-08 ***
## season9 0.6693299 0.0959037 6.979 1.36e-09 ***
## season10 0.7473919 0.0959643 7.788 4.48e-11 ***
## season11 1.2067479 0.0960319 12.566 < 2e-16 ***
## season12 1.9622412 0.0961066 20.417 < 2e-16 ***
## surfing_festival 0.5015151 0.1964273 2.553 0.012856 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.179 on 70 degrees of freedom
## Multiple R-squared: 0.9567, Adjusted R-squared: 0.9487
## F-statistic: 119 on 13 and 70 DF, p-value: < 2.2e-16
Every January has more sale than the previous January. The distribution of the residuals was unsymmetrical in some months. And for some months, the median of the residuals wasn’t 0.
5.g
checkresiduals(tslm_log_fancy)
##
## Breusch-Godfrey test for serial correlation of order up to 17
##
## data: Residuals from Linear regression model
## LM test = 37.954, df = 17, p-value = 0.002494
P-value is less than 0.05 therefore residuals can be correlated.
5.h
future_fancy <- rep(0, 36)
for(i in 1:36){if(i %% 12 == 3){future_fancy[i] <- 1}}
future_fancy <- ts(data = future_fancy, start = 1994, end = c(1996, 12),frequency = 12)
fc_tslm_log_fancy <- forecast(tslm_log_fancy,newdata = data.frame(Time = time(future_fancy),surfing_festival = future_fancy))
autoplot(fc_tslm_log_fancy)
fc_tslm_log_fancy$lower
## [,1] [,2]
## Jan 1994 9.238522 9.101594
## Feb 1994 9.511959 9.375031
## Mar 1994 10.048860 9.911228
## Apr 1994 9.688635 9.551707
## May 1994 9.736088 9.599161
## Jun 1994 9.797449 9.660522
## Jul 1994 9.981095 9.844168
## Aug 1994 9.980625 9.843698
## Sep 1994 10.084010 9.947083
## Oct 1994 10.184092 10.047165
## Nov 1994 10.665468 10.528541
## Dec 1994 11.442981 11.306054
## Jan 1995 9.499844 9.361338
## Feb 1995 9.773281 9.634775
## Mar 1995 10.310518 10.171489
## Apr 1995 9.949957 9.811451
## May 1995 9.997411 9.858904
## Jun 1995 10.058772 9.920265
## Jul 1995 10.242418 10.103911
## Aug 1995 10.241948 10.103441
## Sep 1995 10.345333 10.206826
## Oct 1995 10.445415 10.306908
## Nov 1995 10.926791 10.788284
## Dec 1995 11.704304 11.565797
## Jan 1996 9.760564 9.620151
## Feb 1996 10.034000 9.893588
## Mar 1996 10.571566 10.430810
## Apr 1996 10.210677 10.070264
## May 1996 10.258130 10.117718
## Jun 1996 10.319491 10.179079
## Jul 1996 10.503137 10.362725
## Aug 1996 10.502667 10.362254
## Sep 1996 10.606052 10.465640
## Oct 1996 10.706134 10.565722
## Nov 1996 11.187510 11.047097
## Dec 1996 11.965023 11.824611
fc_tslm_log_fancy$upper
## [,1] [,2]
## Jan 1994 9.744183 9.88111
## Feb 1994 10.017620 10.15455
## Mar 1994 10.557120 10.69475
## Apr 1994 10.194296 10.33122
## May 1994 10.241749 10.37868
## Jun 1994 10.303110 10.44004
## Jul 1994 10.486756 10.62368
## Aug 1994 10.486286 10.62321
## Sep 1994 10.589671 10.72660
## Oct 1994 10.689753 10.82668
## Nov 1994 11.171129 11.30806
## Dec 1994 11.948642 12.08557
## Jan 1995 10.011336 10.14984
## Feb 1995 10.284773 10.42328
## Mar 1995 10.823938 10.96297
## Apr 1995 10.461449 10.59996
## May 1995 10.508903 10.64741
## Jun 1995 10.570264 10.70877
## Jul 1995 10.753910 10.89242
## Aug 1995 10.753440 10.89195
## Sep 1995 10.856825 10.99533
## Oct 1995 10.956907 11.09541
## Nov 1995 11.438282 11.57679
## Dec 1995 12.215796 12.35430
## Jan 1996 10.279093 10.41951
## Feb 1996 10.552530 10.69294
## Mar 1996 11.091365 11.23212
## Apr 1996 10.729206 10.86962
## May 1996 10.776659 10.91707
## Jun 1996 10.838021 10.97843
## Jul 1996 11.021667 11.16208
## Aug 1996 11.021196 11.16161
## Sep 1996 11.124582 11.26499
## Oct 1996 11.224664 11.36508
## Nov 1996 11.706039 11.84645
## Dec 1996 12.483552 12.62396
5.i
fc_tslm_fancy <- fc_tslm_log_fancy
members_inv.log <- c('x', 'mean', 'lower', 'upper', 'residuals', 'fitted')
fc_tslm_fancy[members_inv.log] <- lapply(fc_tslm_log_fancy[members_inv.log], InvBoxCox, lambda = 0)
fc_tslm_fancy[['model']][['model']][1] <- lapply(fc_tslm_log_fancy[['model']][['model']][1], InvBoxCox, lambda = 0)
autoplot(fc_tslm_fancy)
fc_tslm_fancy$upper
## [,1] [,2]
## Jan 1994 17054.73 19557.43
## Feb 1994 22418.00 25707.73
## Mar 1994 38450.24 44123.68
## Apr 1994 26750.16 30675.62
## May 1994 28050.15 32166.37
## Jun 1994 29825.24 34201.95
## Jul 1994 35837.72 41096.73
## Aug 1994 35820.87 41077.41
## Sep 1994 39722.43 45551.50
## Oct 1994 43903.67 50346.32
## Nov 1994 71049.28 81475.41
## Dec 1994 154607.08 177294.90
## Jan 1995 22277.59 25587.08
## Feb 1995 29283.31 33633.55
## Mar 1995 50208.44 57697.39
## Apr 1995 34942.16 40133.06
## May 1995 36640.25 42083.42
## Jun 1995 38958.95 44746.58
## Jul 1995 46812.70 53767.06
## Aug 1995 46790.69 53741.78
## Sep 1995 51887.06 59595.26
## Oct 1995 57348.77 65868.34
## Nov 1995 92807.48 106594.69
## Dec 1995 201954.08 231955.81
## Jan 1996 29117.46 33506.86
## Feb 1996 38274.14 44043.89
## Mar 1996 65602.25 75517.62
## Apr 1996 45670.42 52555.15
## May 1996 47889.88 55109.18
## Jun 1996 50920.48 58596.65
## Jul 1996 61185.57 70409.18
## Aug 1996 61156.80 70376.07
## Sep 1996 67817.91 78041.33
## Oct 1996 74956.52 86256.08
## Nov 1996 121302.09 139588.15
## Dec 1996 263959.89 303751.35
fc_tslm_fancy$lower
## [,1] [,2]
## Jan 1994 10285.82 8969.583
## Feb 1994 13520.45 11790.284
## Mar 1994 23129.40 20155.412
## Apr 1994 16133.21 14068.696
## May 1994 16917.24 14752.395
## Jun 1994 17987.81 15685.969
## Jul 1994 21613.98 18848.111
## Aug 1994 21603.82 18839.249
## Sep 1994 23956.87 20891.193
## Oct 1994 26478.61 23090.230
## Nov 1994 42850.31 37366.903
## Dec 1994 93244.59 81312.400
## Jan 1995 13357.65 11629.938
## Feb 1995 17558.28 15287.252
## Mar 1995 30046.98 26146.972
## Apr 1995 20951.33 18241.435
## May 1995 21969.51 19127.918
## Jun 1995 23359.80 20338.387
## Jul 1995 28068.91 24438.412
## Aug 1995 28055.72 24426.922
## Sep 1995 31111.50 27087.467
## Oct 1995 34386.35 29938.733
## Nov 1995 55647.40 48449.831
## Dec 1995 121091.75 105429.448
## Jan 1996 17336.40 15065.329
## Feb 1996 22788.25 19802.984
## Mar 1996 39009.73 33887.802
## Apr 1996 27191.96 23629.808
## May 1996 28513.41 24778.151
## Jun 1996 30317.82 26346.183
## Jul 1996 36429.60 31657.322
## Aug 1996 36412.48 31642.439
## Sep 1996 40378.47 35088.887
## Oct 1996 44628.77 38782.394
## Nov 1996 72222.70 62761.521
## Dec 1996 157160.50 136572.460
5.j
tslm_fancy <- tslm(fancy ~ trend + season + surfing_festival)
fc_tslm_fancy2 <- forecast(tslm_fancy, newdata = data.frame(Time = time(future_fancy), surfing_festival = future_fancy))
autoplot(fc_tslm_fancy2)
6.a
str(gasoline)
## Time-Series [1:1355] from 1991 to 2017: 6.62 6.43 6.58 7.22 6.88 ...
head(gasoline)
## Time Series:
## Start = 1991.1
## End = 1991.19582477755
## Frequency = 52.1785714285714
## [1] 6.621 6.433 6.582 7.224 6.875 6.947
gasoline_2004 <- window(gasoline, end = 2005)
autoplot(gasoline_2004, xlab = "Year")
Ts linear model
for(num in c(1, 2, 3, 5, 10, 20)){
var_name <- paste("fit",as.character(num), "gasoline_2004", sep = "")
assign(var_name, tslm(gasoline_2004 ~ trend + fourier(gasoline_2004, K = num)))
print(autoplot(gasoline_2004) + autolayer(get(var_name)$fitted.values, series = as.character(num)) + ggtitle(var_name) + ylab("gasoline") + guides(colour = guide_legend(title = "Number of Fourier Transform pairs")))}
autoplot(gasoline_2004) +
autolayer(fit1gasoline_2004$fitted.values, series = "1") +
autolayer(fit5gasoline_2004$fitted.values, series = "2") +
autolayer(fit10gasoline_2004$fitted.values, series = "3") +
autolayer(fit10gasoline_2004$fitted.values, series = "5") +
autolayer(fit20gasoline_2004$fitted.values, series = "10") +
autolayer(fit20gasoline_2004$fitted.values, series = "20") +
guides(colour = guide_legend(title = "Fourier Transform pairs")) +
scale_color_discrete(breaks = c(1, 2, 3, 5, 10, 20))
With more Fourier pairs used, the line plotted will be more fitted with the original data.
6.b
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
gasoline_2004 <- window(gasoline, end=2005)
autoplot(gasoline_2004, main="Quarterly retail index in Europe", xlab="Year", ylab="Thousand barrels per day")
for(num in c(1, 2, 3, 5, 10, 20)){var_name <- paste("fit",as.character(num),"gasoline_2004",sep = "")
assign(var_name, tslm(gasoline_2004 ~ trend + fourier(gasoline_2004, K = num)))
print(autoplot(gasoline_2004) +autolayer(get(var_name)$fitted.values,series = as.character(num)) + ggtitle(var_name) + ylab("gasoline") + guides(colour = guide_legend(title = "Number of Fourier Transform pairs")))}
autoplot(gasoline_2004) +
autolayer(fit1gasoline_2004$fitted.values, series = "1") +
autolayer(fit5gasoline_2004$fitted.values, series = "2") +
autolayer(fit10gasoline_2004$fitted.values, series = "3") +
autolayer(fit10gasoline_2004$fitted.values, series = "5") +
autolayer(fit20gasoline_2004$fitted.values, series = "10") +
autolayer(fit20gasoline_2004$fitted.values, series = "20") +
guides(colour = guide_legend(title = "Fourier Transform pairs")) + scale_color_discrete(breaks = c(1, 2, 3, 5, 10, 20))
for(i in c(1, 2, 3, 5, 10, 20)){
fit_gasoline_2004.name <- paste("fit", as.character(i), "gasoline_2004",sep = "")
writeLines(paste("\n", fit_gasoline_2004.name, "\n"))
print(CV(get(fit_gasoline_2004.name)))}
##
## fit1gasoline_2004
##
## CV AIC AICc BIC AdjR2
## 8.201921e-02 -1.813292e+03 -1.813208e+03 -1.790354e+03 8.250858e-01
##
## fit2gasoline_2004
##
## CV AIC AICc BIC AdjR2
## 8.136854e-02 -1.819113e+03 -1.818957e+03 -1.787001e+03 8.269569e-01
##
## fit3gasoline_2004
##
## CV AIC AICc BIC AdjR2
## 7.658846e-02 -1.863085e+03 -1.862834e+03 -1.821797e+03 8.375702e-01
##
## fit5gasoline_2004
##
## CV AIC AICc BIC AdjR2
## 7.553646e-02 -1.873234e+03 -1.872723e+03 -1.813596e+03 8.406928e-01
##
## fit10gasoline_2004
##
## CV AIC AICc BIC AdjR2
## 7.135754e-02 -1.915014e+03 -1.913441e+03 -1.809500e+03 8.516102e-01
##
## fit20gasoline_2004
##
## CV AIC AICc BIC AdjR2
## 7.223834e-02 -1.908017e+03 -1.902469e+03 -1.710753e+03 8.540588e-01
fc_gasoline_2005 <- forecast(fit10gasoline_2004, newdata=data.frame(fourier(gasoline_2004, K = 10, h = 52)))
autoplot(fc_gasoline_2005) + autolayer(window(gasoline, start = 2004, end = 2006)) +scale_x_continuous(limits = c(2004, 2006))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 674 row(s) containing missing values (geom_path).
6.c
checkresiduals(fit10gasoline_2004)
##
## Breusch-Godfrey test for serial correlation of order up to 104
##
## data: Residuals from Linear regression model
## LM test = 155.45, df = 104, p-value = 0.0008135
6.d
fc_gasoline_2005 <- forecast(fit10gasoline_2004,newdata=data.frame(fourier(gasoline_2004, K = 10, h = 52)))
6.e
autoplot(fc_gasoline_2005) +autolayer(window(gasoline, start = 2004, end = 2006)) + scale_x_continuous(limits = c(2004, 2006))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 674 row(s) containing missing values (geom_path).
7.a
autoplot(huron)
str(huron)
## Time-Series [1:98] from 1875 to 1972: 10.38 11.86 10.97 10.8 9.79 ...
head(huron)
## Time Series:
## Start = 1875
## End = 1880
## Frequency = 1
## [1] 10.38 11.86 10.97 10.80 9.79 10.39
There seems to be a down trend from 1880 to 1920.
7.b
tslm_huron <- tslm(huron ~ trend)
t <- time(huron)
t.break <- 1915
t_piece <- ts(pmax(0,t-t.break), start=1875)
pw_huron <- tslm(huron ~ t + t_piece)
summary(pw_huron)
##
## Call:
## tslm(formula = huron ~ t + t_piece)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.49626 -0.66240 -0.07139 0.85163 2.39222
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 132.90870 19.97687 6.653 1.82e-09 ***
## t -0.06498 0.01051 -6.181 1.58e-08 ***
## t_piece 0.06486 0.01563 4.150 7.26e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.045 on 95 degrees of freedom
## Multiple R-squared: 0.3841, Adjusted R-squared: 0.3711
## F-statistic: 29.62 on 2 and 95 DF, p-value: 1.004e-10
7.c
h=8
fc_tslm_huron <- forecast(tslm_huron, h=h)
t.new <- t[length(t)] + seq(h)
t_piece_new <- t_piece[length(t_piece)]+seq(h)
newdata <- cbind(t=t.new, t_piece=t_piece_new) %>%as.data.frame()
fc_pw_huron <- forecast(pw_huron,newdata = newdata)
autoplot(huron) + autolayer(fitted(tslm_huron), series = "Linear") + autolayer(fc_tslm_huron, series = "Linear")
autoplot(huron) + autolayer(fitted(pw_huron), series = "Piecewise") + autolayer(fc_pw_huron, series="Piecewise")
Tslm model follows the trend. Pw models can show the trend easily.