knitr::opts_chunk$set(echo = TRUE)
# Load libraries and set environment options
library(knitr)
library(forecast)
library(ggplot2)
library(fpp)
library(readxl)
#library(dplyr)
#library(tidyr)#library(zoo)
# Use this option to supress scientific notation in printing values
options(scipen = 10, digits = 2)
# Create a times series and linear model, then plot the data
work_hours.ts<-ts(work_hours$`Hours per week`, start=1966)
work_hours.linear<- tslm(work_hours.ts ~ trend, lamda=1)
work_hours.quad<- tslm(work_hours.ts ~ trend + I(trend^2))
summary(work_hours.linear)
##
## Call:
## tslm(formula = work_hours.ts ~ trend, lamda = 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2046 -0.2876 0.0478 0.3021 1.2319
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.41613 0.17582 212.8 < 2e-16 ***
## trend -0.06137 0.00852 -7.2 0.000000029 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.51 on 33 degrees of freedom
## Multiple R-squared: 0.611, Adjusted R-squared: 0.6
## F-statistic: 51.9 on 1 and 33 DF, p-value: 0.0000000293
summary(work_hours.quad)
##
## Call:
## tslm(formula = work_hours.ts ~ trend + I(trend^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9450 -0.2096 -0.0165 0.3186 0.6016
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 38.164416 0.217660 175.34 < 2e-16 ***
## trend -0.182715 0.027879 -6.55 0.00000022 ***
## I(trend^2) 0.003371 0.000751 4.49 0.00008760 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.40 on 32 degrees of freedom
## Multiple R-squared: 0.761, Adjusted R-squared: 0.747
## F-statistic: 51.1 on 2 and 32 DF, p-value: 0.00000000011
# Plot the time series with the linear and quadratic trend lines
yrange = range(work_hours.ts)
plot(c(1965, 2000), yrange, type="n", xlab="YEAR", ylab="Average Weekly Hours", bty="l", xaxt="n", yaxt="n")
lines(work_hours.ts, bty="l")
axis(1, at=seq(1970, 2000, 5), labels=format(seq(1970,2000,5)))
axis(2, at=seq(32,39, 1), labels=format(seq(32, 39, 1)), las=0)
lines(work_hours.linear$fitted, col="blue")
lines(work_hours.quad$fitted, col="green")
legend(1985, 37.5, c("Actual", "Linear", "Quadratic"), lty=c(1,1,1), col=c("black", "blue", "green"), bty="n")
# Take a quick look at the time series
plot(dept_sales.ts, type="b", xlab="Year", ylab="Sales (in thousands)", bty="l", xaxt="n", yaxt="n")
axis(1, at=seq(1, 6, 1), labels=format(seq(1, 6, 1)))
axis(2, at=seq(40000, 120000, 20000), labels=format(seq(40, 120, 20)), las=0)
# Create training and validation periods
validLen <- 4
trainLen <- length(dept_sales.ts) - validLen
dept_sales_train <- window(dept_sales.ts, end=c(1, trainLen))
dept_sales_valid <- window(dept_sales.ts, start=c(1, trainLen+1))
# Fit an exponential trend with seasonality model to the training set
dept_salesExpo<- tslm(dept_sales_train ~ trend + season, lambda = 0)
summary(dept_salesExpo)
##
## Call:
## tslm(formula = dept_sales_train ~ trend + season, lambda = 0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.05352 -0.01320 -0.00453 0.01439 0.06268
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.7489 0.0187 574.06 < 2e-16 ***
## trend 0.0111 0.0013 8.56 0.0000003702488 ***
## season2 0.0250 0.0208 1.20 0.25
## season3 0.1653 0.0209 7.92 0.0000009788403 ***
## season4 0.4338 0.0211 20.57 0.0000000000021 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.033 on 15 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 7.63e+11 on 4 and 15 DF, p-value: <2e-16
# Create a forecast for the validation period
# Quadratic
dept_sales_validForecast<- forecast(dept_salesExpo, h=2)
dept_sales_validForecast
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 6 Q1 58794 55790 61959 54091 63905
## 6 Q2 60952 57838 64233 56076 66251
# Plot the time series with the exponential trend line
yrange = range(dept_sales.ts)
plot(c(1, 6), yrange, type="n",xlab="Quarter", ylab="Sales (in thousands)", bty="l", xaxt="n", yaxt="n")
lines(dept_sales.ts, bty="l")
axis(1, at=seq(1, 6, 1), labels=format(seq(1, 6, 1)))
axis(2, at=seq(40000, 120000, 20000), labels=format(seq(40, 120, 20)), las=2)
lines(dept_salesExpo$fitted, col="red")
legend(1, 105000, c("Sales", "Exponential + Seasonal Trend"), lty=c(1, 1), col=c("black", "red"), bty="n")
# Plot the residuals
#plot(dept_salesExpo$residuals, type="o", bty="l")
plot(dept_sales_train-dept_salesExpo$fitted, type="o", bty="l")
# Create a times series
s_sales.ts<-ts(s_sales$`Sales`, start=1995, frequency=12)
plot(s_sales.ts)
# Create training and validation periods
validLen <- 12
trainLen <- length(s_sales.ts) - validLen
s_sales_train <- window(s_sales.ts, end=c(1995, trainLen))
s_saless_valid <- window(s_sales.ts, start=c(1995, trainLen+1))
# Fit a linear trend with seasonality model to the training set
s_salesA<- tslm(s_sales_train ~ trend + I(trend^2) + season)
summary(s_salesA)
##
## Call:
## tslm(formula = s_sales_train ~ trend + I(trend^2) + season)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13610 -1758 176 2288 28154
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2921.06 2715.16 1.08 0.28646
## trend -250.13 120.05 -2.08 0.04162 *
## I(trend^2) 6.79 1.59 4.26 7.5e-05 ***
## season2 1187.26 3011.81 0.39 0.69488
## season3 4531.02 3012.35 1.50 0.13797
## season4 1625.47 3013.20 0.54 0.59164
## season5 1636.25 3014.34 0.54 0.58933
## season6 2071.60 3015.73 0.69 0.49486
## season7 3192.19 3017.37 1.06 0.29447
## season8 3417.63 3019.26 1.13 0.26232
## season9 4118.46 3021.41 1.36 0.17812
## season10 4943.83 3023.83 1.63 0.10747
## season11 11592.51 3026.56 3.83 0.00032 ***
## season12 32469.55 3029.64 10.72 2.2e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5220 on 58 degrees of freedom
## Multiple R-squared: 0.84, Adjusted R-squared: 0.805
## F-statistic: 23.5 on 13 and 58 DF, p-value: <2e-16
# Fit a linear trend with seasonality model with a logarithm to the training set
s_salesB<- tslm(log(s_sales_train) ~ trend + I(trend^2) + season)
summary(s_salesB)
##
## Call:
## tslm(formula = log(s_sales_train) ~ trend + I(trend^2) + season)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4569 -0.1156 -0.0022 0.1024 0.3402
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.65145164 0.09912593 77.19 < 2e-16 ***
## trend 0.02069847 0.00438279 4.72 1.5e-05 ***
## I(trend^2) 0.00000577 0.00005813 0.10 0.92128
## season2 0.28207257 0.10995613 2.57 0.01292 *
## season3 0.69510212 0.10997594 6.32 4.0e-08 ***
## season4 0.37401187 0.11000707 3.40 0.00123 **
## season5 0.42187153 0.11004841 3.83 0.00031 ***
## season6 0.44721921 0.11009922 4.06 0.00015 ***
## season7 0.58355294 0.11015911 5.30 1.9e-06 ***
## season8 0.54705825 0.11022808 4.96 6.4e-06 ***
## season9 0.63570352 0.11030646 5.76 3.3e-07 ***
## season10 0.72959434 0.11039498 6.61 1.3e-08 ***
## season11 1.20101178 0.11049471 10.87 1.3e-15 ***
## season12 1.95220222 0.11060709 17.65 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.19 on 58 degrees of freedom
## Multiple R-squared: 0.942, Adjusted R-squared: 0.929
## F-statistic: 73 on 13 and 58 DF, p-value: <2e-16
# Fit a linear trend with seasonality model with a logarithm to the full set
s_salesBFull<- tslm(log(s_sales.ts) ~ trend + I(trend^2) + season)
summary(s_salesBFull)
##
## Call:
## tslm(formula = log(s_sales.ts) ~ trend + I(trend^2) + season)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4824 -0.1152 -0.0007 0.1042 0.3710
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.6873287 0.0883781 86.98 < 2e-16 ***
## trend 0.0166223 0.0033346 4.98 0.00000431898 ***
## I(trend^2) 0.0000679 0.0000380 1.79 0.0783 .
## season2 0.2517226 0.0978287 2.57 0.0122 *
## season3 0.6964286 0.0978410 7.12 0.00000000076 ***
## season4 0.3845634 0.0978605 3.93 0.0002 ***
## season5 0.4098953 0.0978868 4.19 0.00008084244 ***
## season6 0.4489992 0.0979195 4.59 0.00001929771 ***
## season7 0.6102523 0.0979584 6.23 0.00000003066 ***
## season8 0.5872533 0.0980034 5.99 0.00000008088 ***
## season9 0.6679740 0.0980549 6.81 0.00000000274 ***
## season10 0.7452556 0.0981130 7.60 0.00000000010 ***
## season11 1.2036953 0.0981784 12.26 < 2e-16 ***
## season12 1.9581366 0.0982516 19.93 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.18 on 70 degrees of freedom
## Multiple R-squared: 0.955, Adjusted R-squared: 0.946
## F-statistic: 114 on 13 and 70 DF, p-value: <2e-16
# Forecast February 2002
feb_forecast<- s_salesBFull$coefficients["(Intercept)"] + s_salesBFull$coefficients["trend"]*86 + s_salesBFull$coefficients["I(trend^2)"]*86 + s_salesBFull$coefficients["season2"]
exp(feb_forecast)
## (Intercept)
## 11783
# Run the forecast on the validation period using each model
s_salesAForecast<- forecast(s_salesA, h=s_saless_valid)
s_salesBForecast<- forecast(s_salesB, h=s_saless_valid)
accuracy(s_salesAForecast$mean, s_sales.ts)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set -871 13934 8956 -29 38 0.26 1
accuracy(exp(s_salesBForecast$mean), s_sales.ts)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 4603 6811 5002 12 15 0.43 0.45
# Plot the time series with the quadratic trend line
yrange = range(s_sales.ts)
plot(c(1995, 2002), yrange, type="n",xlab="Month/Year", ylab="Sales (in hundreds)", bty="l", xaxt="n", yaxt="n")
lines(s_sales.ts, bty="l")
axis(1, at=seq(1995, 2002, 1), labels=format(seq(1995, 2002, 1)))
axis(2, at=seq(0, 120000, 20000), labels=format(seq(0, 1200, 200)), las=2)
lines(s_salesAForecast$fitted, col="red")
lines(exp(s_salesBForecast$fitted), col="Blue")
legend(1995, 100000, c("Sales", "Model A", "Model B"), lty=c(1, 1), col=c("black", "red", "blue"), bty="n")
# Plot the residuals
plot(s_salesAForecast$residuals, type="o", bty="l")
plot(exp(s_salesBForecast$residuals), type="o", bty="l")