##Chapter 5
library(readr)
library(readxl)
library(ggplot2)
library(fpp2)
## Loading required package: forecast
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Loading required package: fma
## Loading required package: expsmooth
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(seasonal)
##
## Attaching package: 'seasonal'
## The following object is masked from 'package:psych':
##
## outlier
Chapter 5 1.
daily20 <- head(elecdaily,20)
autoplot(daily20)
##regression model
demandfit<-lm(Demand~Temperature,data = elecdaily)
demandfit
##
## Call:
## lm(formula = Demand ~ Temperature, data = elecdaily)
##
## Coefficients:
## (Intercept) Temperature
## 212.3856 0.4182
##sumary of regression model
summary(demandfit)
##
## Call:
## lm(formula = Demand ~ Temperature, data = elecdaily)
##
## Residuals:
## Min 1Q Median 3Q Max
## -55.474 -15.719 -0.336 15.767 117.184
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 212.3856 5.0080 42.409 <2e-16 ***
## Temperature 0.4182 0.2263 1.848 0.0654 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26.55 on 363 degrees of freedom
## Multiple R-squared: 0.009322, Adjusted R-squared: 0.006592
## F-statistic: 3.416 on 1 and 363 DF, p-value: 0.0654
ploting residuals
plot(demandfit)
demandtslm<-tslm(Demand~Temperature,data = daily20)
temperaturerange<-data.frame(Temperature=c(15,35))
forecastDemTemp<-forecast(demandtslm,temperaturerange)
forecastDemTemp
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 3.857143 140.5701 108.6810 172.4591 90.21166 190.9285
## 4.000000 275.7146 245.2278 306.2014 227.57056 323.8586
autoplot(daily20,facets = TRUE)
daily20 %>%
as.data.frame()%>%
ggplot(aes(x=Temperature, y=Demand))+
geom_point()+
geom_smooth(method="lm",se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
##running model
fit1<-tslm(Demand~Temperature,data=daily20)
checkresiduals(fit1)
##
## Breusch-Godfrey test for serial correlation of order up to 5
##
## data: Residuals from Linear regression model
## LM test = 3.8079, df = 5, p-value = 0.5774
checking temps 15 and 35
forecast(fit1,newdata = data.frame(Temperature=c(15,35)))
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 3.857143 140.5701 108.6810 172.4591 90.21166 190.9285
## 4.000000 275.7146 245.2278 306.2014 227.57056 323.8586
These prediction intervals seem reasonable based off the training data.
plot(Demand ~ Temperature, data=elecdaily, main="Demand vs. Temperature of Elecdaily dataset")
Scatterplot appears to show same pattern as daily20 meaning the daily 20 is a good representattive sample of the entire dataset.
autoplot(mens400)
There is a neagative somewhat linear relationship between year and 400 metre time. As the year increases the winning 400 meter decreases.
Largest outlier is probably the first winning time.
time400<-time(mens400)
tslm(mens400~time400,data=mens400)
##
## Call:
## tslm(formula = mens400 ~ time400, data = mens400)
##
## Coefficients:
## (Intercept) time400
## 172.48148 -0.06457
Winning time decreases by a predicted 0.065 seconds per year.
Line of best fit
autoplot(mens400)+geom_smooth(method="lm",se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3 rows containing non-finite values (stat_smooth).
mens400fit<-lm(mens400~time400,data=mens400)
plot(mens400fit)
The residuals show that there is a negative relationship to begin with but as we moev forward the model might not provide much insight. Perhaps a linear model is not the best fit.
TimeMens400<-time(mens400)
mens400fit2<-lm(mens400~TimeMens400,data=mens400,na.action = na.exclude)
mens4002020<-forecast(mens400fit2,newdata = data.frame(TimeMens400=2020))
mens4002020
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 1 42.04231 40.44975 43.63487 39.55286 44.53176
The 95 % CI puts our estimate between 39.55 seconds and 44.53 seconds
These assumptions were made off straight data. The data is not straight so I don’t see these predictions as reliable. The low end of the 95% CI does not seem feasible.
easter(ausbeer)
## Qtr1 Qtr2 Qtr3 Qtr4
## 1956 0.67 0.33 0.00 0.00
## 1957 0.00 1.00 0.00 0.00
## 1958 0.00 1.00 0.00 0.00
## 1959 1.00 0.00 0.00 0.00
## 1960 0.00 1.00 0.00 0.00
## 1961 0.33 0.67 0.00 0.00
## 1962 0.00 1.00 0.00 0.00
## 1963 0.00 1.00 0.00 0.00
## 1964 1.00 0.00 0.00 0.00
## 1965 0.00 1.00 0.00 0.00
## 1966 0.00 1.00 0.00 0.00
## 1967 1.00 0.00 0.00 0.00
## 1968 0.00 1.00 0.00 0.00
## 1969 0.00 1.00 0.00 0.00
## 1970 1.00 0.00 0.00 0.00
## 1971 0.00 1.00 0.00 0.00
## 1972 0.33 0.67 0.00 0.00
## 1973 0.00 1.00 0.00 0.00
## 1974 0.00 1.00 0.00 0.00
## 1975 1.00 0.00 0.00 0.00
## 1976 0.00 1.00 0.00 0.00
## 1977 0.00 1.00 0.00 0.00
## 1978 1.00 0.00 0.00 0.00
## 1979 0.00 1.00 0.00 0.00
## 1980 0.00 1.00 0.00 0.00
## 1981 0.00 1.00 0.00 0.00
## 1982 0.00 1.00 0.00 0.00
## 1983 0.00 1.00 0.00 0.00
## 1984 0.00 1.00 0.00 0.00
## 1985 0.00 1.00 0.00 0.00
## 1986 1.00 0.00 0.00 0.00
## 1987 0.00 1.00 0.00 0.00
## 1988 0.00 1.00 0.00 0.00
## 1989 1.00 0.00 0.00 0.00
## 1990 0.00 1.00 0.00 0.00
## 1991 1.00 0.00 0.00 0.00
## 1992 0.00 1.00 0.00 0.00
## 1993 0.00 1.00 0.00 0.00
## 1994 0.00 1.00 0.00 0.00
## 1995 0.00 1.00 0.00 0.00
## 1996 0.00 1.00 0.00 0.00
## 1997 1.00 0.00 0.00 0.00
## 1998 0.00 1.00 0.00 0.00
## 1999 0.00 1.00 0.00 0.00
## 2000 0.00 1.00 0.00 0.00
## 2001 0.00 1.00 0.00 0.00
## 2002 1.00 0.00 0.00 0.00
## 2003 0.00 1.00 0.00 0.00
## 2004 0.00 1.00 0.00 0.00
## 2005 1.00 0.00 0.00 0.00
## 2006 0.00 1.00 0.00 0.00
## 2007 0.00 1.00 0.00 0.00
## 2008 1.00 0.00 0.00 0.00
## 2009 0.00 1.00 0.00 0.00
## 2010 0.00 1.00
I see a data set running from 1956 to 2010. Each year is broke upto into 4 quarters. The majority of the values are either zero or one. But there are also 0.33 and 0.67 appear a few times.
plot(easter(ausbeer))
The plot looks like a barcode
logy=B0+B1logx+e y can be written as: y=e(B0+B1logx+e) dy/dx=(b1/x)e(B0+b1logx+e) dy/dx=B1y/x B1=(dy/dx)(x/y)=ratio of the percentage change in the forecast variable (y) to the percenatge change in the predictor variable (x)
B1=(dy/dx)*(x/y)
autoplot(fancy)
head(fancy,50)
## Jan Feb Mar Apr May Jun Jul Aug
## 1987 1664.81 2397.53 2840.71 3547.29 3752.96 3714.74 4349.61 3566.34
## 1988 2499.81 5198.24 7225.14 4806.03 5900.88 4951.34 6179.12 4752.15
## 1989 4717.02 5702.63 9957.58 5304.78 6492.43 6630.80 7349.62 8176.62
## 1990 5921.10 5814.58 12421.25 6369.77 7609.12 7224.75 8121.22 7979.25
## 1991 4826.64 6470.23
## Sep Oct Nov Dec
## 1987 5021.82 6423.48 7600.60 19756.21
## 1988 5496.43 5835.10 12600.08 28541.72
## 1989 8573.17 9690.50 15151.84 34061.01
## 1990 8093.06 8476.70 17914.66 30114.41
## 1991
Plot shows seasonal fluctuations with an overall positive upward trend in sales. The most sales appear at the start of the calendar year.
hist(fancy)
the seasonsal fluctations appear to be growing at an increasing rate. therefore a linear model is not appropriate and a log transformation would be more appropriate. Also the histrogram is strongly skewed right.
autoplot(fancy)
head(fancy)
## Jan Feb Mar Apr May Jun
## 1987 1664.81 2397.53 2840.71 3547.29 3752.96 3714.74
head(fancy,50)
## Jan Feb Mar Apr May Jun Jul Aug
## 1987 1664.81 2397.53 2840.71 3547.29 3752.96 3714.74 4349.61 3566.34
## 1988 2499.81 5198.24 7225.14 4806.03 5900.88 4951.34 6179.12 4752.15
## 1989 4717.02 5702.63 9957.58 5304.78 6492.43 6630.80 7349.62 8176.62
## 1990 5921.10 5814.58 12421.25 6369.77 7609.12 7224.75 8121.22 7979.25
## 1991 4826.64 6470.23
## Sep Oct Nov Dec
## 1987 5021.82 6423.48 7600.60 19756.21
## 1988 5496.43 5835.10 12600.08 28541.72
## 1989 8573.17 9690.50 15151.84 34061.01
## 1990 8093.06 8476.70 17914.66 30114.41
## 1991
tslmfancy<-tslm(BoxCox(fancy,0) ~ season+trend)
tslmfancy
##
## Call:
## tslm(formula = BoxCox(fancy, 0) ~ season + trend)
##
## Coefficients:
## (Intercept) season2 season3 season4 season5 season6
## 7.60586 0.25104 0.69521 0.38293 0.40799 0.44696
## season7 season8 season9 season10 season11 season12
## 0.60822 0.58535 0.66634 0.74403 1.20302 1.95814
## trend
## 0.02239
alternatively
fancylog<-log(fancy)
festival<-rep(0,length(fancy))
festival[seq_along(festival)%%12==3]<-1
festival[3]<-0
festival<-ts(festival, freq=12,start=c(1987,1))
fancydata<-data.frame(fancylog,festival)
fancyfit<-tslm(fancylog~trend+season+festival,data=fancydata)
fancyfit
##
## Call:
## tslm(formula = fancylog ~ trend + season + festival, data = fancydata)
##
## Coefficients:
## (Intercept) trend season2 season3 season4 season5
## 7.61967 0.02202 0.25142 0.26608 0.38405 0.40949
## season6 season7 season8 season9 season10 season11
## 0.44883 0.61045 0.58796 0.66933 0.74739 1.20675
## season12 festival
## 1.96224 0.50152
#plot of residuals
res<-residuals(fancyfit)
fit_vector<-as.vector(fitted(fancyfit))
fit_residuals<-as.vector(residuals(fancyfit))
plot(res)
#predicted scores and residuals
plot(fit_vector,fit_residuals)
There appears to be more of a random scatter in the residuals suggesting that the log reexpression and creation of dummy variables created a more approprtiate fit. I would hestiate as there is perhaps a weak curved pattern.
#boxplots for months
res.month<-data.frame(res, month=rep(1:12,7))
boxplot(res~month,data=res.month)
Months are very inconsistent. Sales across April and June have smallest variation and are normally distributed. But January, February, March and December have strong skews.
summary(fancyfit)
##
## Call:
## tslm(formula = fancylog ~ trend + season + festival, data = fancydata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.33673 -0.12757 0.00257 0.10911 0.37671
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.6196670 0.0742471 102.626 < 2e-16 ***
## trend 0.0220198 0.0008268 26.634 < 2e-16 ***
## season2 0.2514168 0.0956790 2.628 0.010555 *
## season3 0.2660828 0.1934044 1.376 0.173275
## season4 0.3840535 0.0957075 4.013 0.000148 ***
## season5 0.4094870 0.0957325 4.277 5.88e-05 ***
## season6 0.4488283 0.0957647 4.687 1.33e-05 ***
## season7 0.6104545 0.0958039 6.372 1.71e-08 ***
## season8 0.5879644 0.0958503 6.134 4.53e-08 ***
## season9 0.6693299 0.0959037 6.979 1.36e-09 ***
## season10 0.7473919 0.0959643 7.788 4.48e-11 ***
## season11 1.2067479 0.0960319 12.566 < 2e-16 ***
## season12 1.9622412 0.0961066 20.417 < 2e-16 ***
## festival 0.5015151 0.1964273 2.553 0.012856 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.179 on 70 degrees of freedom
## Multiple R-squared: 0.9567, Adjusted R-squared: 0.9487
## F-statistic: 119 on 13 and 70 DF, p-value: < 2.2e-16
Season 3 has a large P Value indicating its not much use in the model. The dummy variable and season 2 have larger p values than the rest of teh variables but they are still around 1%. Season 12 has the smallest p value which makes sense due to it having the largest peak. All coeficients are positive as well which is explained by the increasing trend.
library(lmtest)
dwtest(fancyfit,alt="two.sided")
##
## Durbin-Watson test
##
## data: fancyfit
## DW = 0.88889, p-value = 1.956e-07
## alternative hypothesis: true autocorrelation is not 0
Durbin Watson test shows autocorrelation is not zero and there is auto correlation in the residuals, meaning the model can be improved.
#forecast
festival=rep(0,36)
festival[seq_along(festival)%%12==3]<-1
futurefancy<-data.frame(festival)
forcast1<-forecast(fancyfit,newdata = futurefancy)
forcast1
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 1994 9.491352 9.238522 9.744183 9.101594 9.88111
## Feb 1994 9.764789 9.511959 10.017620 9.375031 10.15455
## Mar 1994 10.302990 10.048860 10.557120 9.911228 10.69475
## Apr 1994 9.941465 9.688635 10.194296 9.551707 10.33122
## May 1994 9.988919 9.736088 10.241749 9.599161 10.37868
## Jun 1994 10.050280 9.797449 10.303110 9.660522 10.44004
## Jul 1994 10.233926 9.981095 10.486756 9.844168 10.62368
## Aug 1994 10.233456 9.980625 10.486286 9.843698 10.62321
## Sep 1994 10.336841 10.084010 10.589671 9.947083 10.72660
## Oct 1994 10.436923 10.184092 10.689753 10.047165 10.82668
## Nov 1994 10.918299 10.665468 11.171129 10.528541 11.30806
## Dec 1994 11.695812 11.442981 11.948642 11.306054 12.08557
## Jan 1995 9.755590 9.499844 10.011336 9.361338 10.14984
## Feb 1995 10.029027 9.773281 10.284773 9.634775 10.42328
## Mar 1995 10.567228 10.310518 10.823938 10.171489 10.96297
## Apr 1995 10.205703 9.949957 10.461449 9.811451 10.59996
## May 1995 10.253157 9.997411 10.508903 9.858904 10.64741
## Jun 1995 10.314518 10.058772 10.570264 9.920265 10.70877
## Jul 1995 10.498164 10.242418 10.753910 10.103911 10.89242
## Aug 1995 10.497694 10.241948 10.753440 10.103441 10.89195
## Sep 1995 10.601079 10.345333 10.856825 10.206826 10.99533
## Oct 1995 10.701161 10.445415 10.956907 10.306908 11.09541
## Nov 1995 11.182537 10.926791 11.438282 10.788284 11.57679
## Dec 1995 11.960050 11.704304 12.215796 11.565797 12.35430
## Jan 1996 10.019828 9.760564 10.279093 9.620151 10.41951
## Feb 1996 10.293265 10.034000 10.552530 9.893588 10.69294
## Mar 1996 10.831466 10.571566 11.091365 10.430810 11.23212
## Apr 1996 10.469941 10.210677 10.729206 10.070264 10.86962
## May 1996 10.517395 10.258130 10.776659 10.117718 10.91707
## Jun 1996 10.578756 10.319491 10.838021 10.179079 10.97843
## Jul 1996 10.762402 10.503137 11.021667 10.362725 11.16208
## Aug 1996 10.761932 10.502667 11.021196 10.362254 11.16161
## Sep 1996 10.865317 10.606052 11.124582 10.465640 11.26499
## Oct 1996 10.965399 10.706134 11.224664 10.565722 11.36508
## Nov 1996 11.446774 11.187510 11.706039 11.047097 11.84645
## Dec 1996 12.224288 11.965023 12.483552 11.824611 12.62396
#intervals for raw data
forcast1$mean<-exp(forcast1$mean)
forcast1$upper<-exp(forcast1$upper)
forcast1$lower<-exp(forcast1$lower)
forcast1$x<-exp(forcast1$x)
forcast1
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 1994 13244.70 10285.82 17054.73 8969.583 19557.43
## Feb 1994 17409.81 13520.45 22418.00 11790.284 25707.73
## Mar 1994 29821.65 23129.40 38450.24 20155.412 44123.68
## Apr 1994 20774.16 16133.21 26750.16 14068.696 30675.62
## May 1994 21783.73 16917.24 28050.15 14752.395 32166.37
## Jun 1994 23162.27 17987.81 29825.24 15685.969 34201.95
## Jul 1994 27831.56 21613.98 35837.72 18848.111 41096.73
## Aug 1994 27818.48 21603.82 35820.87 18839.249 41077.41
## Sep 1994 30848.42 23956.87 39722.43 20891.193 45551.50
## Oct 1994 34095.57 26478.61 43903.67 23090.230 50346.32
## Nov 1994 55176.84 42850.31 71049.28 37366.903 81475.41
## Dec 1994 120067.79 93244.59 154607.08 81312.400 177294.90
## Jan 1995 17250.40 13357.65 22277.59 11629.938 25587.08
## Feb 1995 22675.20 17558.28 29283.31 15287.252 33633.55
## Mar 1995 38840.85 30046.98 50208.44 26146.972 57697.39
## Apr 1995 27057.06 20951.33 34942.16 18241.435 40133.06
## May 1995 28371.96 21969.51 36640.25 19127.918 42083.42
## Jun 1995 30167.42 23359.80 38958.95 20338.387 44746.58
## Jul 1995 36248.88 28068.91 46812.70 24438.412 53767.06
## Aug 1995 36231.84 28055.72 46790.69 24426.922 53741.78
## Sep 1995 40178.16 31111.50 51887.06 27087.467 59595.26
## Oct 1995 44407.37 34386.35 57348.77 29938.733 65868.34
## Nov 1995 71864.42 55647.40 92807.48 48449.831 106594.69
## Dec 1995 156380.86 121091.75 201954.08 105429.448 231955.81
## Jan 1996 22467.57 17336.40 29117.46 15065.329 33506.86
## Feb 1996 29533.04 22788.25 38274.14 19802.984 44043.89
## Mar 1996 50587.81 39009.73 65602.25 33887.802 75517.62
## Apr 1996 35240.15 27191.96 45670.42 23629.808 52555.15
## May 1996 36952.72 28513.41 47889.88 24778.151 55109.18
## Jun 1996 39291.20 30317.82 50920.48 26346.183 58596.65
## Jul 1996 47211.93 36429.60 61185.57 31657.322 70409.18
## Aug 1996 47189.73 36412.48 61156.80 31642.439 70376.07
## Sep 1996 52329.57 40378.47 67817.91 35088.887 78041.33
## Oct 1996 57837.85 44628.77 74956.52 38782.394 86256.08
## Nov 1996 93598.96 72222.70 121302.09 62761.521 139588.15
## Dec 1996 203676.38 157160.50 263959.89 136572.460 303751.35
#forecast for linear regression model
plot(forcast1)
By adding more explanatory variables we could improve the prediction. I think weather could play a big role in sales.
head(gasoline)
## Time Series:
## Start = 1991.1
## End = 1991.19582477755
## Frequency = 52.1785714285714
## [1] 6.621 6.433 6.582 7.224 6.875 6.947
str(gasoline)
## Time-Series [1:1355] from 1991 to 2017: 6.62 6.43 6.58 7.22 6.88 ...
gas04<-window(gasoline,end=2005)
autoplot(gas04)
for(num in c(1,2,3,4,10,20)){
var_name<-paste("fit",as.character(num),"gas04",sep = "")
assign(var_name,tslm(gas04~trend+fourier(gas04,K=num)))
print(autoplot(gas04)+autolayer(get(var_name)$fitted.values,series=as.character(num))+ggtitle(var_name))
}
autoplot(gas04)+autolayer(fit1gas04$fitted.values,series="1")+autolayer(fit2gas04$fitted.values,series="2")+autolayer(fit3gas04$fitted.values,series="3")+autolayer(fit4gas04$fitted.values,series="4")+autolayer(fit10gas04$fitted.values,series="10")+autolayer(fit20gas04$fitted.values,series="20")+scale_color_discrete(breaks=c(1,2,3,4,10,20))
The more Fourier pairs used the closer the model is to the original plot.
for(i in c(1,2,3,4,10,20)){
fitgas04<-paste("fit",as.character(i),"gas04",sep = "")
writeLines(paste("\n",fitgas04,"\n"))
}
##
## fit1gas04
##
##
## fit2gas04
##
##
## fit3gas04
##
##
## fit4gas04
##
##
## fit10gas04
##
##
## fit20gas04
10 fourier pairs has the lowest CV value however 20 fourier pairs has the lowest AIC value.
checkresiduals(fit10gas04)
##
## Breusch-Godfrey test for serial correlation of order up to 104
##
## data: Residuals from Linear regression model
## LM test = 155.45, df = 104, p-value = 0.0008135
Residuals are least look normally distributed.
forecast for 2005
forecastgas05<-forecast(fit10gas04,newdata = data.frame(fourier(gas04,K=10,h=52)))
forecastgas05
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2005.014 8.745595 8.402358 9.088832 8.220249 9.270941
## 2005.033 8.604075 8.260737 8.947413 8.078574 9.129575
## 2005.052 8.607195 8.263839 8.950551 8.081666 9.132724
## 2005.071 8.683215 8.339879 9.026550 8.157718 9.208712
## 2005.090 8.746645 8.403404 9.089887 8.221293 9.271998
## 2005.110 8.783003 8.439850 9.126156 8.257785 9.308221
## 2005.129 8.833517 8.490370 9.176664 8.308309 9.358725
## 2005.148 8.917498 8.574348 9.260647 8.392285 9.442710
## 2005.167 8.997861 8.654729 9.340992 8.472676 9.523046
## 2005.186 9.029128 8.686005 9.372252 8.503956 9.554301
## 2005.205 9.018507 8.675381 9.361633 8.493330 9.543684
## 2005.225 9.017572 8.674439 9.360705 8.492385 9.542760
## 2005.244 9.056052 8.712918 9.399187 8.530863 9.581242
## 2005.263 9.105523 8.762395 9.448651 8.580344 9.630703
## 2005.282 9.121048 8.777925 9.464172 8.595876 9.646221
## 2005.301 9.105879 8.762754 9.449005 8.580703 9.631055
## 2005.320 9.112513 8.769382 9.455644 8.587329 9.637697
## 2005.340 9.175080 8.831948 9.518211 8.649895 9.700265
## 2005.359 9.259000 8.915873 9.602127 8.733822 9.784178
## 2005.378 9.296291 8.953167 9.639415 8.771118 9.821464
## 2005.397 9.268208 8.925083 9.611334 8.743032 9.793385
## 2005.416 9.234527 8.891397 9.577657 8.709345 9.759710
## 2005.435 9.270246 8.927115 9.613376 8.745062 9.795429
## 2005.455 9.381554 9.038427 9.724681 8.856376 9.906731
## 2005.474 9.497887 9.154762 9.841011 8.972713 10.023060
## 2005.493 9.546877 9.203751 9.890002 9.021700 10.072053
## 2005.512 9.524722 9.181592 9.867851 8.999539 10.049904
## 2005.531 9.487091 9.143960 9.830221 8.961908 10.012274
## 2005.550 9.482387 9.139260 9.825513 8.957209 10.007564
## 2005.570 9.509124 9.165999 9.852248 8.983950 10.034297
## 2005.589 9.536544 9.193418 9.879670 9.011367 10.061720
## 2005.608 9.546848 9.203718 9.889978 9.021665 10.072030
## 2005.627 9.541950 9.198820 9.885081 9.016767 10.067133
## 2005.646 9.517407 9.174280 9.860533 8.992229 10.042584
## 2005.665 9.453438 9.110314 9.796562 8.928265 9.978612
## 2005.685 9.344405 9.001279 9.687531 8.819228 9.869581
## 2005.704 9.226892 8.883762 9.570023 8.701709 9.752076
## 2005.723 9.160315 8.817184 9.503446 8.635131 9.685499
## 2005.742 9.174078 8.830951 9.517205 8.648901 9.699255
## 2005.761 9.241612 8.898488 9.584736 8.716439 9.766785
## 2005.780 9.310432 8.967306 9.653559 8.785255 9.835609
## 2005.800 9.348790 9.005658 9.691922 8.823604 9.873976
## 2005.819 9.354217 9.011085 9.697350 8.829031 9.879404
## 2005.838 9.326591 8.983464 9.669718 8.801413 9.851768
## 2005.857 9.258087 8.914964 9.601211 8.732915 9.783260
## 2005.876 9.163285 8.820158 9.506412 8.638107 9.688463
## 2005.895 9.102531 8.759394 9.445669 8.577338 9.627725
## 2005.915 9.142445 8.799307 9.485583 8.617250 9.667640
## 2005.934 9.276104 8.932975 9.619234 8.750923 9.801286
## 2005.953 9.396214 9.053089 9.739340 8.871039 9.921390
## 2005.972 9.374939 9.031803 9.718075 8.849747 9.900131
## 2005.991 9.184286 8.841055 9.527518 8.658948 9.709624
autoplot(forecastgas05)+autolayer(window(gasoline,start=2004,end=2006))+scale_x_continuous(limits = c(2004,2006))
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 674 row(s) containing missing values (geom_path).
str(huron)
## Time-Series [1:98] from 1875 to 1972: 10.38 11.86 10.97 10.8 9.79 ...
head(huron)
## Time Series:
## Start = 1875
## End = 1880
## Frequency = 1
## [1] 10.38 11.86 10.97 10.80 9.79 10.39
autoplot(huron)
There is no obvious seasonal pattern, but there appears to be a downward trend.
Linear regression model
tslmhuron<-tslm(huron~trend)
tslmhuron
##
## Call:
## tslm(formula = huron ~ trend)
##
## Coefficients:
## (Intercept) trend
## 10.2020 -0.0242
Piecewise function
t<-time(huron)
t.break<-1915
t.piece<-ts(pmax(0,t-t.break),start = 1875)
pwhuron<-tslm(huron~t+t.piece)
summary(pwhuron)
##
## Call:
## tslm(formula = huron ~ t + t.piece)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.49626 -0.66240 -0.07139 0.85163 2.39222
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 132.90870 19.97687 6.653 1.82e-09 ***
## t -0.06498 0.01051 -6.181 1.58e-08 ***
## t.piece 0.06486 0.01563 4.150 7.26e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.045 on 95 degrees of freedom
## Multiple R-squared: 0.3841, Adjusted R-squared: 0.3711
## F-statistic: 29.62 on 2 and 95 DF, p-value: 1.004e-10
forecast for tslm
h=8
fctslmhuron<-forecast(tslmhuron,h=h)
t.new<-t[length(t)]+seq(h)
tpiecenew<-t.piece[length(t.piece)]+seq(h)
newdata<-cbind(t=t.new,t.piece=tpiecenew)%>%as.data.frame()
fcpwhuron<-forecast(pwhuron,newdata = newdata)
autoplot(huron)+autolayer(fitted(tslmhuron),series = "Linear")+autolayer(fctslmhuron,series = "Linear")
forecst for piecewise
autoplot(huron)+autolayer(fitted(pwhuron),series = "Piecewise")+autolayer(fcpwhuron,series = "Piecewise")
##Chapter 6 1.
3x5MA is taking the average for the first five terms and then getting the moving average of each three observation set is the same. Since 1/15(1y+2y+3y+3y+3y+2y+y)=y.
autoplot(plastics)
There is general upward trend with seasonal fluctations with peaks mid year and troughs at the turn of a new year.
decompplastics<-decompose(plastics,"multiplicative")
decompplastics
## $x
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1 742 697 776 898 1030 1107 1165 1216 1208 1131 971 783
## 2 741 700 774 932 1099 1223 1290 1349 1341 1296 1066 901
## 3 896 793 885 1055 1204 1326 1303 1436 1473 1453 1170 1023
## 4 951 861 938 1109 1274 1422 1486 1555 1604 1600 1403 1209
## 5 1030 1032 1126 1285 1468 1637 1611 1608 1528 1420 1119 1013
##
## $seasonal
## Jan Feb Mar Apr May Jun Jul
## 1 0.7670466 0.7103357 0.7765294 0.9103112 1.0447386 1.1570026 1.1636317
## 2 0.7670466 0.7103357 0.7765294 0.9103112 1.0447386 1.1570026 1.1636317
## 3 0.7670466 0.7103357 0.7765294 0.9103112 1.0447386 1.1570026 1.1636317
## 4 0.7670466 0.7103357 0.7765294 0.9103112 1.0447386 1.1570026 1.1636317
## 5 0.7670466 0.7103357 0.7765294 0.9103112 1.0447386 1.1570026 1.1636317
## Aug Sep Oct Nov Dec
## 1 1.2252952 1.2313635 1.1887444 0.9919176 0.8330834
## 2 1.2252952 1.2313635 1.1887444 0.9919176 0.8330834
## 3 1.2252952 1.2313635 1.1887444 0.9919176 0.8330834
## 4 1.2252952 1.2313635 1.1887444 0.9919176 0.8330834
## 5 1.2252952 1.2313635 1.1887444 0.9919176 0.8330834
##
## $trend
## Jan Feb Mar Apr May Jun Jul
## 1 NA NA NA NA NA NA 976.9583
## 2 1000.4583 1011.2083 1022.2917 1034.7083 1045.5417 1054.4167 1065.7917
## 3 1117.3750 1121.5417 1130.6667 1142.7083 1153.5833 1163.0000 1170.3750
## 4 1208.7083 1221.2917 1231.7083 1243.2917 1259.1250 1276.5833 1287.6250
## 5 1374.7917 1382.2083 1381.2500 1370.5833 1351.2500 1331.2500 NA
## Aug Sep Oct Nov Dec
## 1 977.0417 977.0833 978.4167 982.7083 990.4167
## 2 1076.1250 1084.6250 1094.3750 1103.8750 1112.5417
## 3 1175.5000 1180.5417 1185.0000 1190.1667 1197.0833
## 4 1298.0417 1313.0000 1328.1667 1343.5833 1360.6250
## 5 NA NA NA NA NA
##
## $random
## Jan Feb Mar Apr May Jun Jul
## 1 NA NA NA NA NA NA 1.0247887
## 2 0.9656005 0.9745267 0.9750081 0.9894824 1.0061175 1.0024895 1.0401641
## 3 1.0454117 0.9953920 1.0079773 1.0142083 0.9990100 0.9854384 0.9567618
## 4 1.0257400 0.9924762 0.9807020 0.9798704 0.9684851 0.9627557 0.9917766
## 5 0.9767392 1.0510964 1.0498039 1.0299302 1.0398787 1.0628077 NA
## Aug Sep Oct Nov Dec
## 1 1.0157335 1.0040354 0.9724119 0.9961368 0.9489762
## 2 1.0230774 1.0040674 0.9962088 0.9735577 0.9721203
## 3 0.9969907 1.0132932 1.0314752 0.9910657 1.0258002
## 4 0.9776897 0.9920952 1.0133954 1.0527311 1.0665946
## 5 NA NA NA NA NA
##
## $figure
## [1] 0.7670466 0.7103357 0.7765294 0.9103112 1.0447386 1.1570026 1.1636317
## [8] 1.2252952 1.2313635 1.1887444 0.9919176 0.8330834
##
## $type
## [1] "multiplicative"
##
## attr(,"class")
## [1] "decomposed.ts"
plot(decompplastics)
There is a yearly seasonal component. There is an upward trend until year 5 where a decrease begins.
The decomposition does support the plot of sales of plastics. That shows a general upward trend but its hard to determine if at 5 year there is a downward trend or just a uncommon year.
Seasonal fluctuations removed to see trend mapped against plot.
autoplot(plastics, series = "Data")+autolayer(seasadj(decompplastics),series = "Seasonally Adjusted")
plastics1<-plastics
plastics1[25]<-plastics1[25]+500
decompplastics1<-decompose(plastics1,"multiplicative")
decompplastics1
## $x
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1 742 697 776 898 1030 1107 1165 1216 1208 1131 971 783
## 2 741 700 774 932 1099 1223 1290 1349 1341 1296 1066 901
## 3 1396 793 885 1055 1204 1326 1303 1436 1473 1453 1170 1023
## 4 951 861 938 1109 1274 1422 1486 1555 1604 1600 1403 1209
## 5 1030 1032 1126 1285 1468 1637 1611 1608 1528 1420 1119 1013
##
## $seasonal
## Jan Feb Mar Apr May Jun Jul
## 1 0.8673820 0.7040518 0.7696271 0.9022526 1.0357124 1.1472203 1.1530419
## 2 0.8673820 0.7040518 0.7696271 0.9022526 1.0357124 1.1472203 1.1530419
## 3 0.8673820 0.7040518 0.7696271 0.9022526 1.0357124 1.1472203 1.1530419
## 4 0.8673820 0.7040518 0.7696271 0.9022526 1.0357124 1.1472203 1.1530419
## 5 0.8673820 0.7040518 0.7696271 0.9022526 1.0357124 1.1472203 1.1530419
## Aug Sep Oct Nov Dec
## 1 1.2136983 1.2200133 1.1779668 0.9832031 0.8258303
## 2 1.2136983 1.2200133 1.1779668 0.9832031 0.8258303
## 3 1.2136983 1.2200133 1.1779668 0.9832031 0.8258303
## 4 1.2136983 1.2200133 1.1779668 0.9832031 0.8258303
## 5 1.2136983 1.2200133 1.1779668 0.9832031 0.8258303
##
## $trend
## Jan Feb Mar Apr May Jun Jul
## 1 NA NA NA NA NA NA 976.9583
## 2 1000.4583 1011.2083 1022.2917 1034.7083 1045.5417 1054.4167 1086.6250
## 3 1159.0417 1163.2083 1172.3333 1184.3750 1195.2500 1204.6667 1191.2083
## 4 1208.7083 1221.2917 1231.7083 1243.2917 1259.1250 1276.5833 1287.6250
## 5 1374.7917 1382.2083 1381.2500 1370.5833 1351.2500 1331.2500 NA
## Aug Sep Oct Nov Dec
## 1 977.0417 977.0833 978.4167 982.7083 990.4167
## 2 1117.7917 1126.2917 1136.0417 1145.5417 1154.2083
## 3 1175.5000 1180.5417 1185.0000 1190.1667 1197.0833
## 4 1298.0417 1313.0000 1328.1667 1343.5833 1360.6250
## 5 NA NA NA NA NA
##
## $random
## Jan Feb Mar Apr May Jun Jul
## 1 NA NA NA NA NA NA 1.0342006
## 2 0.8539035 0.9832247 0.9837524 0.9983201 1.0148858 1.0110377 1.0295914
## 3 1.3885961 0.9683026 0.9808708 0.9872680 0.9725872 0.9594665 0.9486622
## 4 0.9070863 1.0013344 0.9894973 0.9886222 0.9769254 0.9709651 1.0008853
## 5 0.8637537 1.0604778 1.0592190 1.0391292 1.0489412 1.0718703 NA
## Aug Sep Oct Nov Dec
## 1 1.0254388 1.0133763 0.9813088 1.0049660 0.9573109
## 2 0.9943524 0.9759180 0.9684511 0.9464618 0.9452567
## 3 1.0065169 1.0227202 1.0409124 0.9998500 1.0348096
## 4 0.9870315 1.0013250 1.0226672 1.0620619 1.0759623
## 5 NA NA NA NA NA
##
## $figure
## [1] 0.8673820 0.7040518 0.7696271 0.9022526 1.0357124 1.1472203 1.1530419
## [8] 1.2136983 1.2200133 1.1779668 0.9832031 0.8258303
##
## $type
## [1] "multiplicative"
##
## attr(,"class")
## [1] "decomposed.ts"
plot(decompplastics1)
The added outlier still implies the seasonal effect but it creates a smaller mode on each season. A little spike on the downslope.
plastics2[60]<-plastics[60]+500 decompplastics2<-decompose(plastics2,“multiplicative”) plot(decompplastics2)
When outlier is placed at the end of the data it has less influence on the seasonal data. The only influence it seems to make is that of plateauing downward trend rather having a downward trend after year 5.
x11 decomposition
retail<-read_excel("Documents/Data Analysis and econometrics/retail.xlsx",skip=1)
myts<-ts(retail[,"A3349873A"],frequency = 12,start=c(1982,4))
myts%>%seas(x11="")->myts_x11
autoplot(myts_x11)
original decomposition
decommyts<-decompose(myts,"multiplicative")
plot(decommyts)
The X11 decomposition shows that the seasonal peaks are in general decreasing. Thet first decrease, then increase and decrease again. This is not visible in the original decomposition.
There is an upward trend in the data as time goes by. From 1991-93 there are larger outliers as seen by the remainder component. In comparison to the other components the seasonal variation is rather small by the scales of the graphs.
There seems to be a clear change in the remainder component that is captured in those years of recession.
autoplot(cangas)
ggsubseriesplot(cangas)
ggseasonplot(cangas)
There appears to be less demand for oil in the summer which makes sense as less energy is needed for heat. Yet Canada proabbly doesn’t use much enery for cooling. The price of oil probably changes the seasonlity. If oil is expenive then people will look to use natural gas or wood buring stoves instead. Or they use more public transport.
cangas%>%
stl(t.window=13,s.window = 13,robust=TRUE)%>%
autoplot()
seats
cangas%>%seas()%>% autoplot()
x11
cangas%>%seas(x11="")%>%autoplot()
It is hard to see a difference between the two models
bricksq%>%stl(t.window=13,s.window="periodic",robust=TRUE)%>%autoplot()
bricksq1<-stl(t.window = 13,s.window=13,robust=T,x=bricksq)
autoplot(seasadj(bricksq1))
bricksq1%>% seasadj() %>% naive() %>% autoplot()
bricksq2 <- stl(t.window = 13,s.window='periodic',robust=T,x=bricksq)
bricksq2 %>% seasadj() %>% naive() %>% autoplot()
stlf_brick <- stlf(bricksq)
autoplot(stlf_brick)
checkresiduals(stlf_brick)
## Warning in checkresiduals(stlf_brick): The fitted degrees of freedom is based on
## the model used for the seasonally adjusted data.
##
## Ljung-Box test
##
## data: Residuals from STL + ETS(M,N,N)
## Q* = 41.128, df = 6, p-value = 2.733e-07
##
## Model df: 2. Total lags used: 8
Residuals are correlated together as of the result Ljung-box test
stlf_brick_robust <- stlf(bricksq, robust = TRUE)
autoplot(stlf_brick_robust)
checkresiduals(stlf_brick_robust)
## Warning in checkresiduals(stlf_brick_robust): The fitted degrees of freedom is
## based on the model used for the seasonally adjusted data.
##
## Ljung-Box test
##
## data: Residuals from STL + ETS(M,N,N)
## Q* = 28.163, df = 6, p-value = 8.755e-05
##
## Model df: 2. Total lags used: 8
Correlations have lowered, but there is still strong evidence of correlation.
trainset_brick <- subset(bricksq,
end = length(bricksq) - 8)
testset_brick <- subset(bricksq,
start = length(bricksq) - 7)
snaive_brick <- snaive(trainset_brick)
stlf_brick_part <- stlf(trainset_brick, robust = TRUE)
autoplot(bricksq, series = "Original data") +
geom_line(size = 1) +
autolayer(stlf_brick_part, PI = FALSE, size = 1,
series = "stlf") +
autolayer(snaive_brick, PI = FALSE, size = 1,
series = "snaive") +
scale_color_manual(values = c("gray50", "blue", "red"),
breaks = c("Original data", "stlf", "snaive")) +
scale_x_continuous(limits = c(1990, 1994.5)) +
scale_y_continuous(limits = c(300, 600)) +
guides(colour = guide_legend(title = "Data")) +
ggtitle("Forecast from stlf and snaive functions") +
annotate(
"rect",
xmin=1992.75,xmax=1994.5,ymin=-Inf,ymax=Inf,
fill="lightgreen",alpha = 0.3
)
## Scale for 'x' is already present. Adding another scale for 'x', which will
## replace the existing scale.
## Warning: Removed 136 row(s) containing missing values (geom_path).
stlf_writing <- stlf(writing,
s.window = 13,
robust = TRUE,
lambda = BoxCox.lambda(writing),
method = "rwdrift")
autoplot(stlf_writing)
hist(fancy)
autoplot(fancy)
Histogram is heavily skewed so a re expression is needed, there is an increasing trend along with increasing peaks within each season.
stlf_fancy <- stlf(fancy,
s.window = "periodic",
robust = TRUE,
lambda = BoxCox.lambda(fancy),
method = "rwdrift")
autoplot(stlf_fancy)