(a) Run a regression model with log(Sales) as the output variable and with a linear trend and monthly predictors. Remember to fit only the training period. Use this model to forecast the sales in February 2002.
SouvSales <- read.csv("/Users/wendyhayes/Desktop/MBA 678-Predictive Analytics/SouvenirSales.csv",stringsAsFactors=FALSE)
str(SouvSales)
## 'data.frame': 84 obs. of 2 variables:
## $ Date : chr "Jan-95" "Feb-95" "Mar-95" "Apr-95" ...
## $ Sales: num 1665 2398 2841 3547 3753 ...
head(SouvSales)
## Date Sales
## 1 Jan-95 1664.81
## 2 Feb-95 2397.53
## 3 Mar-95 2840.71
## 4 Apr-95 3547.29
## 5 May-95 3752.96
## 6 Jun-95 3714.74
tail(SouvSales)
## Date Sales
## 79 1-Jul 26155.15
## 80 1-Aug 28586.52
## 81 1-Sep 30505.41
## 82 1-Oct 30821.33
## 83 1-Nov 46634.38
## 84 1-Dec 104660.67
SouvSalesTS <- ts(SouvSales$Sales, start=c(1995, 1), frequency=12)
validLength <- 12
trainLength <- length(SouvSalesTS ) - validLength
SouvTrain <- window(SouvSalesTS , end=c(1995, trainLength))
SouvValid <- window(SouvSalesTS , start=c(1995,trainLength+1))
SalesExpo <- tslm(SouvTrain ~ trend + season, lambda=0)
summary(SalesExpo)
##
## Call:
## tslm(formula = SouvTrain ~ trend + season, lambda = 0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4529 -0.1163 0.0001 0.1005 0.3438
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.646363 0.084120 90.898 < 2e-16 ***
## trend 0.021120 0.001086 19.449 < 2e-16 ***
## season2 0.282015 0.109028 2.587 0.012178 *
## season3 0.694998 0.109044 6.374 3.08e-08 ***
## season4 0.373873 0.109071 3.428 0.001115 **
## season5 0.421710 0.109109 3.865 0.000279 ***
## season6 0.447046 0.109158 4.095 0.000130 ***
## season7 0.583380 0.109217 5.341 1.55e-06 ***
## season8 0.546897 0.109287 5.004 5.37e-06 ***
## season9 0.635565 0.109368 5.811 2.65e-07 ***
## season10 0.729490 0.109460 6.664 9.98e-09 ***
## season11 1.200954 0.109562 10.961 7.38e-16 ***
## season12 1.952202 0.109675 17.800 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1888 on 59 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 2e+10 on 12 and 59 DF, p-value: < 2.2e-16
validPeriodForecasts <- forecast(SalesExpo, h=validLength)
validPeriodForecasts
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2001 9780.022 7459.322 12822.72 6437.463 14858.16
## Feb 2001 13243.095 10100.643 17363.21 8716.947 20119.38
## Mar 2001 20441.749 15591.130 26801.46 13455.287 31055.83
## Apr 2001 15143.541 11550.133 19854.91 9967.870 23006.60
## May 2001 16224.628 12374.689 21272.34 10679.469 24649.03
## Jun 2001 16996.137 12963.127 22283.87 11187.297 25821.13
## Jul 2001 19894.424 15173.679 26083.86 13095.024 30224.31
## Aug 2001 19591.112 14942.340 25686.18 12895.376 29763.51
## Sep 2001 21864.492 16676.271 28666.84 14391.774 33217.31
## Oct 2001 24530.299 18709.509 32162.02 16146.477 37267.30
## Nov 2001 40144.775 30618.828 52634.38 26424.329 60989.36
## Dec 2001 86908.868 66286.278 113947.44 57205.664 132035.03
Feb <- forecast(SalesExpo, h=14)
yrange=range(SouvSalesTS+2)
plot(c(1995, 2003), yrange, type="n", xlab="Year", ylab="Souvenir Sales (in thousands in Australian Dollars)", bty="l", xaxt="n", yaxt="n")
lines(SouvSalesTS, bty="l")
axis(1, at=seq(1995,2003,1),labels=format(seq(1995,2003,1)))
axis(2, at=seq(1600,110000,10000), labels=format(seq(16,1100,100)), las=2)
lines(SalesExpo$fitted, col="red")
lines(Feb$mean,col="blue",lty=2)
legend(1995, 110000, c("ARPM", "Exponential Trend+Season", "Forecasts"), lty=c(1,1,2), col=c("black","red","blue"), bty="n")

(b) Create an ACF plot until lag-15 for the forecast errors. Now fit an AR model with lag-2 [ARIMA(2, 0, 0)] to the forecast errors.
str(SalesExpo)
## List of 14
## $ coefficients : Named num [1:13] 7.6464 0.0211 0.282 0.695 0.3739 ...
## ..- attr(*, "names")= chr [1:13] "(Intercept)" "trend" "season2" "season3" ...
## $ residuals : Time-Series [1:72] from 1995 to 2001: -0.25 -0.1884 -0.4529 0.0692 0.0566 ...
## ..- attr(*, "names")= chr [1:72] "1" "2" "3" "4" ...
## $ effects : Named num [1:72] -76.9862 4.2655 -0.9226 0.0412 -0.7915 ...
## ..- attr(*, "names")= chr [1:72] "(Intercept)" "trend" "season2" "season3" ...
## $ rank : int 13
## $ fitted.values: Time-Series [1:72] from 1995 to 2001: 2138 2895 4468 3310 3546 ...
## ..- attr(*, "names")= chr [1:72] "1" "2" "3" "4" ...
## $ assign : int [1:13] 0 1 2 2 2 2 2 2 2 2 ...
## $ qr :List of 5
## ..$ qr : num [1:72, 1:13] -8.485 0.118 0.118 0.118 0.118 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr [1:72] "1" "2" "3" "4" ...
## .. .. ..$ : chr [1:13] "(Intercept)" "trend" "season2" "season3" ...
## .. ..- attr(*, "assign")= int [1:13] 0 1 2 2 2 2 2 2 2 2 ...
## .. ..- attr(*, "contrasts")=List of 1
## .. .. ..$ season: chr "contr.treatment"
## ..$ qraux: num [1:13] 1.12 1.17 1.1 1.08 1.07 ...
## ..$ pivot: int [1:13] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ tol : num 1e-07
## ..$ rank : int 13
## ..- attr(*, "class")= chr "qr"
## $ df.residual : int 59
## $ contrasts :List of 1
## ..$ season: chr "contr.treatment"
## $ xlevels :List of 1
## ..$ season: chr [1:12] "1" "2" "3" "4" ...
## $ call : language tslm(formula = SouvTrain ~ trend + season, lambda = 0)
## $ terms :Classes 'terms', 'formula' language SouvTrain ~ trend + season
## .. ..- attr(*, "variables")= language list(SouvTrain, trend, season)
## .. ..- attr(*, "factors")= int [1:3, 1:2] 0 1 0 0 0 1
## .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. ..$ : chr [1:3] "SouvTrain" "trend" "season"
## .. .. .. ..$ : chr [1:2] "trend" "season"
## .. ..- attr(*, "term.labels")= chr [1:2] "trend" "season"
## .. ..- attr(*, "order")= int [1:2] 1 1
## .. ..- attr(*, "intercept")= int 1
## .. ..- attr(*, "response")= int 1
## .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## .. ..- attr(*, "predvars")= language list(SouvTrain, trend, season)
## .. ..- attr(*, "dataClasses")= Named chr [1:3] "numeric" "numeric" "factor"
## .. .. ..- attr(*, "names")= chr [1:3] "SouvTrain" "trend" "season"
## $ model :'data.frame': 72 obs. of 3 variables:
## ..$ SouvTrain: num [1:72] 7.42 7.78 7.95 8.17 8.23 ...
## ..$ trend : int [1:72] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ season : Factor w/ 12 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
## ..- attr(*, "terms")=Classes 'terms', 'formula' language SouvTrain ~ trend + season
## .. .. ..- attr(*, "variables")= language list(SouvTrain, trend, season)
## .. .. ..- attr(*, "factors")= int [1:3, 1:2] 0 1 0 0 0 1
## .. .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. .. ..$ : chr [1:3] "SouvTrain" "trend" "season"
## .. .. .. .. ..$ : chr [1:2] "trend" "season"
## .. .. ..- attr(*, "term.labels")= chr [1:2] "trend" "season"
## .. .. ..- attr(*, "order")= int [1:2] 1 1
## .. .. ..- attr(*, "intercept")= int 1
## .. .. ..- attr(*, "response")= int 1
## .. .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv>
## .. .. ..- attr(*, "predvars")= language list(SouvTrain, trend, season)
## .. .. ..- attr(*, "dataClasses")= Named chr [1:3] "numeric" "numeric" "factor"
## .. .. .. ..- attr(*, "names")= chr [1:3] "SouvTrain" "trend" "season"
## $ lambda : num 0
## - attr(*, "class")= chr "lm"
Acf(SalesExpo$residuals, lag.max=15)

fit <- Arima(SalesExpo$residuals, order=c(2,0,0))
fit
## Series: SalesExpo$residuals
## ARIMA(2,0,0) with non-zero mean
##
## Coefficients:
## ar1 ar2 intercept
## 0.3072 0.3687 -0.0025
## s.e. 0.1090 0.1102 0.0489
##
## sigma^2 estimated as 0.0205: log likelihood=39.03
## AIC=-70.05 AICc=-69.46 BIC=-60.95