library(forecast)
## Warning: package 'forecast' was built under R version 3.4.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.3
hours <- read.csv("CanadianWorkHours.csv", stringsAsFactors = FALSE)
str(hours)
## 'data.frame': 35 obs. of 2 variables:
## $ Year : int 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 ...
## $ HoursPerWeek: num 37.2 37 37.4 37.5 37.7 37.7 37.4 37.2 37.3 37.2 ...
head(hours)
## Year HoursPerWeek
## 1 1966 37.2
## 2 1967 37.0
## 3 1968 37.4
## 4 1969 37.5
## 5 1970 37.7
## 6 1971 37.7
tail(hours)
## Year HoursPerWeek
## 30 1995 35.7
## 31 1996 35.7
## 32 1997 35.5
## 33 1998 35.6
## 34 1999 36.3
## 35 2000 36.5
hoursTS <- ts(hours$Hours, start=c(1966, 1), frequency=1)
autoplot(hoursTS)
hoursLinear <- tslm(hoursTS ~ trend)
summary(hoursLinear)
##
## Call:
## tslm(formula = hoursTS ~ trend)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.20457 -0.28761 0.04779 0.30210 1.23190
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.416134 0.175818 212.811 < 2e-16 ***
## trend -0.061373 0.008518 -7.205 2.93e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.509 on 33 degrees of freedom
## Multiple R-squared: 0.6113, Adjusted R-squared: 0.5996
## F-statistic: 51.91 on 1 and 33 DF, p-value: 2.928e-08
hoursQuad <- tslm(hoursTS ~ trend + I(trend^2))
summary(hoursQuad)
##
## Call:
## tslm(formula = hoursTS ~ trend + I(trend^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.94503 -0.20964 -0.01652 0.31862 0.60160
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 38.1644156 0.2176599 175.340 < 2e-16 ***
## trend -0.1827154 0.0278786 -6.554 2.21e-07 ***
## I(trend^2) 0.0033706 0.0007512 4.487 8.76e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4049 on 32 degrees of freedom
## Multiple R-squared: 0.7614, Adjusted R-squared: 0.7465
## F-statistic: 51.07 on 2 and 32 DF, p-value: 1.1e-10
dsales <- read.csv("DeptStoreSales.csv", stringsAsFactors = FALSE)
dsalesTS <- ts(dsales$Sales, frequency=4)
autoplot(dsalesTS)
validLength <- 4
trainLength <- length(dsalesTS) - validLength
dsalesTrain <- window(dsalesTS, end=c(1, trainLength))
dsalesValid <- window(dsalesTS, start=c(1,trainLength+1))
dsalesExpo <- tslm(dsalesTrain ~ trend + season, lambda=0)
summary(dsalesExpo)
##
## Call:
## tslm(formula = dsalesTrain ~ trend + season, lambda = 0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.053524 -0.013199 -0.004527 0.014387 0.062681
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.748945 0.018725 574.057 < 2e-16 ***
## trend 0.011088 0.001295 8.561 3.70e-07 ***
## season2 0.024956 0.020764 1.202 0.248
## season3 0.165343 0.020884 7.917 9.79e-07 ***
## season4 0.433746 0.021084 20.572 2.10e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03277 on 15 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 7.63e+11 on 4 and 15 DF, p-value: < 2.2e-16
validPeriodForecasts <- forecast(dsalesExpo, h=2)
validPeriodForecasts
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 6 Q1 58793.71 55790.19 61958.92 54090.84 63905.46
## 6 Q2 60951.51 57837.76 64232.89 56076.04 66250.87
# Helps set up the plot
yrange = range(dsalesTS)
# Set up the plot
plot(c(1, 7), yrange, type="n", xlab="Quarter", ylab="Sales")
# Add the time series dsales
lines(dsalesTS, bty="l")
# Add fitted line from training period
lines(dsalesExpo$fitted, col="red")
# Add forecasts for valiation period
lines(validPeriodForecasts$mean, col="blue", lty=2)
plot(dsalesTrain - dsalesExpo$fitted, type="o", bty="l")
#Bring in the data
ssales <- read.csv("SouvenirSales.csv", stringsAsFactors = FALSE)
#Create time series
ssalesTS <- ts(ssales$Sales, start = c(1995,1), frequency=12)
#Plot
autoplot(ssalesTS)
#Split into training and validation periods
validLength <- 12
trainLength <- length(ssalesTS) - validLength
ssalesTrain <- window(ssalesTS, end=c(1995, trainLength))
ssalesValid <- window(ssalesTS, start=c(1995,trainLength+1))
# Fit the model to the training set
modelA <- tslm(ssalesTrain ~ trend + season)
# See the estimated regression equation
summary(modelA)
##
## Call:
## tslm(formula = ssalesTrain ~ trend + season)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12592 -2359 -411 1940 33651
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3065.55 2640.26 -1.161 0.25029
## trend 245.36 34.08 7.199 1.24e-09 ***
## season2 1119.38 3422.06 0.327 0.74474
## season3 4408.84 3422.56 1.288 0.20272
## season4 1462.57 3423.41 0.427 0.67077
## season5 1446.19 3424.60 0.422 0.67434
## season6 1867.98 3426.13 0.545 0.58766
## season7 2988.56 3427.99 0.872 0.38684
## season8 3227.58 3430.19 0.941 0.35058
## season9 3955.56 3432.73 1.152 0.25384
## season10 4821.66 3435.61 1.403 0.16573
## season11 11524.64 3438.82 3.351 0.00141 **
## season12 32469.55 3442.36 9.432 2.19e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5927 on 59 degrees of freedom
## Multiple R-squared: 0.7903, Adjusted R-squared: 0.7476
## F-statistic: 18.53 on 12 and 59 DF, p-value: 9.435e-16
# Call it modelB and have the dependent variable transformed
modelB <- tslm(log(ssalesTrain) ~ trend + season)
summary(modelB)
##
## Call:
## tslm(formula = log(ssalesTrain) ~ trend + season)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4529 -0.1163 0.0001 0.1005 0.3438
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.646363 0.084120 90.898 < 2e-16 ***
## trend 0.021120 0.001086 19.449 < 2e-16 ***
## season2 0.282015 0.109028 2.587 0.012178 *
## season3 0.694998 0.109044 6.374 3.08e-08 ***
## season4 0.373873 0.109071 3.428 0.001115 **
## season5 0.421710 0.109109 3.865 0.000279 ***
## season6 0.447046 0.109158 4.095 0.000130 ***
## season7 0.583380 0.109217 5.341 1.55e-06 ***
## season8 0.546897 0.109287 5.004 5.37e-06 ***
## season9 0.635565 0.109368 5.811 2.65e-07 ***
## season10 0.729490 0.109460 6.664 9.98e-09 ***
## season11 1.200954 0.109562 10.961 7.38e-16 ***
## season12 1.952202 0.109675 17.800 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1888 on 59 degrees of freedom
## Multiple R-squared: 0.9424, Adjusted R-squared: 0.9306
## F-statistic: 80.4 on 12 and 59 DF, p-value: < 2.2e-16
febForecast <- modelB$coefficients["(Intercept)"] + modelB$coefficients["trend"]*86 + modelB$coefficients["season2"]
exp(febForecast)
## (Intercept)
## 17062.99
modelAForecast <- forecast(modelA, h=validLength)
accuracy(modelAForecast, ssalesValid)
## ME RMSE MAE MPE MAPE MASE
## Training set -5.684342e-14 5365.199 3205.089 6.967778 36.75088 0.855877
## Test set 8.251513e+03 17451.547 10055.276 10.533974 26.66568 2.685130
## ACF1 Theil's U
## Training set 0.4048039 NA
## Test set 0.3206228 0.9075924
modelBForecast <- forecast(modelB, h=validLength)
# We have to tranform the forecasts back to the original scale.
accuracy(exp(modelBForecast$mean), ssalesValid)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 4824.494 7101.444 5191.669 12.35943 15.5191 0.4245018 0.4610253