We are continuing our analysis of the monthly sales of Australian wines (data in AustralianWines.csv). The data are for six types of wines (red, rose, sweet white, dry white, sparkling, and fortified) for the period 1980-1994. The units are thousands of liters.
You are hired to obtain short-term forecasts (2-months ahead) for each of the six series, and this task will be repeated every month, as new data arrive.
Evaluate the performance of naive forecasts by examining the training performance and the validation performance
Week 3. Continuing the problem:
Week 4a. Differencing Wine sales
Week 4b Forecasting Wines Sales using a MA
Week 5 Regression Assignment. Go to end of Analysis for for the assignment.
# install.packages("data.table")
library(data.table)
library(forecast)
library(knitr) # for kable table formatting
# aussieWines = fread("http://www.forecastingbook.com/mooc/AustralianWines.csv?attredirects=0&d=1")
aussieWines = read.csv("/Users/NoName/Dropbox/Coursera/Forecasting - Time Series/Tsing Hua Time Series/PTSF-Datasets/AustralianWines.csv")
aussieWines = data.frame(aussieWines[1:180,]) # Fread gets confused at end of file and adds some NA's
colnames(aussieWines)[5] = "Sparkling" # Capitalize Sparking wine column name
summary(aussieWines)
## Month Fortified Red Rose Sparkling
## Apr-80 : 1 Min. :1154 Min. : 464 87 : 7 Min. :1170
## Apr-81 : 1 1st Qu.:2377 1st Qu.:1123 108 : 5 1st Qu.:1605
## Apr-82 : 1 Median :2894 Median :1559 65 : 5 Median :1896
## Apr-83 : 1 Mean :2999 Mean :1630 118 : 4 Mean :2431
## Apr-84 : 1 3rd Qu.:3527 3rd Qu.:2106 129 : 4 3rd Qu.:2599
## Apr-85 : 1 Max. :5618 Max. :3670 46 : 4 Max. :7242
## (Other):174 (Other):151
## Sweet.white Dry.white
## Min. : 85.0 Min. :1954
## 1st Qu.:141.5 1st Qu.:2736
## Median :223.5 Median :3090
## Mean :247.1 Mean :3240
## 3rd Qu.:319.2 3rd Qu.:3685
## Max. :662.0 Max. :5725
##
fortified.ts = ts(aussieWines$Fortified, start=c(1980,1), end = c(1994, 12), freq=12)
red.ts = ts(aussieWines$Red, start=c(1980,1), end = c(1994, 12), freq=12)
sparkling.ts = ts(aussieWines$Sparkling, start=c(1980,1), end = c(1994, 12), freq=12)
sweetwine.ts = ts(aussieWines$Sweet.white, start=c(1980,1), end = c(1994, 12), freq=12)
drywhite.ts = ts(aussieWines$Dry.white, start=c(1980,1), end = c(1994, 12), freq=12)
aussieWines$Rose = as.numeric(aussieWines$Rose) # make Rose a numeric variables and introduce NA's at "*"s
rose.ts = ts(aussieWines$Rose, start=c(1980,1), end = c(1994, 12), freq=12)
# Impute by interpolation missing values in rose.ts
# install.packages("imputeTS")
library(imputeTS)
rose.ts = na.interpolation(rose.ts)
# library(forecast)
# Fortified Wine Naive FCs
naiveFortified.pred = naive(fortified.ts, h = 2)
naiveFortified.fc = naiveFortified.pred$mean
snaiveFortified.pred = snaive(fortified.ts, h = 2) # Seasonal Naive
snaiveFortified.fc = snaiveFortified.pred$mean
# Red Wine Naive FCs
naiveRed.pred = naive(red.ts, h = 2)
naiveRed.fc = naiveRed.pred$mean
snaiveRed.pred = snaive(red.ts, h = 2) # Seasonal Naive
snaiveRed.fc = snaiveRed.pred$mean
# Sparkling Wine Naive FCs
naiveSparkling.pred = naive(sparkling.ts, h = 2)
naiveSparkling.fc = naiveSparkling.pred$mean
snaiveSparkling.pred = snaive(sparkling.ts, h = 2) # Seasonal Naive
snaiveSparkling.fc = snaiveSparkling.pred$mean
# Sweet Wine Naive FCs
naiveSweet.pred = naive(sweetwine.ts, h = 2)
naiveSweet.fc = naiveSweet.pred$mean
snaiveSweet.pred = snaive(sweetwine.ts, h = 2) # Seasonal Naive
snaiveSweet.fc = snaiveSweet.pred$mean
# Dry White Wine Naive FCs
naiveDryWhite.pred = naive(drywhite.ts, h = 2)
naiveDryWhite.fc = naiveDryWhite.pred$mean
snaiveDryWhite.pred = snaive(drywhite.ts, h = 2) # Seasonal Naive
snaiveDryWhite.fc = snaiveDryWhite.pred$mean
# Rose Wine Naive FCs
naiveRose.pred = naive(rose.ts, h = 2)
naiveRose.fc = naiveRose.pred$mean #
snaiveRose.pred = snaive(rose.ts, h = 2) # Seasonal Naive
snaiveRose.fc = snaiveRose.pred$mean
wineNaive.db = data.frame(Wines = colnames(aussieWines)[2:7],
NaiveFC_Jan95 = c(naiveFortified.fc[1], naiveRed.fc[1], naiveRose.fc[1], naiveSparkling.fc[1], naiveSweet.fc[1], naiveDryWhite.fc[1]),
NaiveFC_Feb95 = c(naiveFortified.fc[2], naiveRed.fc[2], naiveRose.fc[2], naiveSparkling.fc[2], naiveSweet.fc[2], naiveDryWhite.fc[2]),
Seasonal_NaiveFC_Jan95 = c(snaiveFortified.fc[1], snaiveRed.fc[1], snaiveRose.fc[1], snaiveSparkling.fc[1], snaiveSweet.fc[1], snaiveDryWhite.fc[1]),
Seasonal_NaiveFC_Feb95 = c(snaiveFortified.fc[2], snaiveRed.fc[2], snaiveRose.fc[2], snaiveSparkling.fc[2], snaiveSweet.fc[2], snaiveDryWhite.fc[2])
)
kable(wineNaive.db, caption = "Naive Forecasts For Wines Sales")
| Wines | NaiveFC_Jan95 | NaiveFC_Feb95 | Seasonal_NaiveFC_Jan95 | Seasonal_NaiveFC_Feb95 |
|---|---|---|---|---|
| Fortified | 2467 | 2467 | 1154 | 1568 |
| Red | 2684 | 2684 | 1041 | 1728 |
| Rose | 83 | 83 | 44 | 47 |
| Sparkling | 5999 | 5999 | 1197 | 1968 |
| Sweet.white | 394 | 394 | 150 | 280 |
| Dry.white | 5725 | 5725 | 2265 | 3685 |
par(mfrow = c(1, 2))
# Plot Fortified Wine naive FCs
plot(naiveFortified.pred, main = 'Fortified Wine Naive FC')
plot(snaiveFortified.pred, main = "Fortified Seasonal Naive FC")
# Plot Red Wine Naive FCs
plot(naiveRed.pred, main = "Red Wine Naive FC")
plot(snaiveRed.pred, main = "Seasonal Red Wine Naive FC")
# Plot Sparkling Wine Naive FCs
plot(naiveSparkling.pred, main = "Sparkling Wine Naive FC")
plot(snaiveSparkling.pred, main = "Seasonal Sparkling Wine Naive FC")
# Plot Sweet Wine Naive FCs
plot(naiveSweet.pred, main = "Sweet Wine Naive FC")
plot(snaiveSweet.pred, main = "Seasonal Sweet Wine Naive FC")
# Plot Dry White Wine Naive FCs
plot(naiveDryWhite.pred, main = "Dry White Wine Naive FC")
plot(snaiveDryWhite.pred, main = "Seasonal Dry White Wine Naive FC" )
# Plot Rose Wine Naive FCs
plot(naiveRose.pred, main = "Rose Wine Naive FC")
plot(snaiveRose.pred, main = "Seasonal Rose Wine Naive FC")
Partition into Training and Validatation Sets
fixed.nValid = 12
fixed.nTrain = length(rose.ts) - fixed.nValid
# Partition into train and validation set
train.rose.ts = window(rose.ts, start=c(1980,13), end=c(1980,fixed.nTrain))
valid.rose.ts = window(rose.ts, start=c(1980, fixed.nTrain), end = c(1980, fixed.nTrain+fixed.nValid))
Compute performance measures on the validation set
# For training set performance create lagged ts
train.rose.lag2 = lag(train.rose.ts, 2) # For naive forecast
train.rose.lag12 = lag(train.rose.ts, 12) # For snaive forecast
# Calculate residuals
resids.train.rose.lag2 = train.rose.ts - train.rose.lag2
resids.train.rose.lag12 = train.rose.ts - train.rose.lag12
# Table for training set accuracy of Naive Rose Wine forecasts
kable(accuracy(train.rose.ts, train.rose.lag2), caption = "Accuracy Stats for Naive Rose Wine Forecast over Training Set")
| ME | RMSE | MAE | MPE | MAPE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|
| Test set | 0.5844156 | 37.81723 | 27.5974 | -95.58538 | 139.1238 | 0.0686598 | 1.242332 |
# Table for training set accuracy of Seasonal Naive Rose Wine forecasts
kable(accuracy(train.rose.ts, train.rose.lag12), caption = "Accuracy Stats for Seasonal Naive Rose Wine Forecast over Training Set")
| ME | RMSE | MAE | MPE | MAPE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|
| Test set | 2.027778 | 34.0708 | 20.88889 | -84.76257 | 117.2428 | 0.1525 | 0.9787671 |
Plot the naive Forecasts for the training period
# Plot the residuals for the training set naive forecasts
par(mfrow = c(1, 2)) # Plot side by side
# Naive and Seasonal Naive plots
plot(resids.train.rose.lag2, xlab = "Months", ylab="Residuals", main = "Error in Naive Forecast \n on the training set for Rose Wines" )
plot(resids.train.rose.lag12, xlab = "Months", ylab="Residuals", main = "Error in Seasonal Naive Forecast \n on the training set for Rose Wines" )
# Histograms of residuals
hist(resids.train.rose.lag2, main = "Error in Naive Forecast \n on the training set for Rose Wines", xlab="Residuals")
hist(resids.train.rose.lag12, main = "Error in Seasonal Naive Forecast \n on the training set for Rose Wines", xlab="Residuals")
# For training set performance create lagged ts
valid.rose.lag2 = lag(valid.rose.ts, 2) # For naive forecast
valid.rose.lag12 = window(rose.ts, start=c(1980, fixed.nTrain-11), end = c(1980, fixed.nTrain+fixed.nValid-12)) # For snaive forecast
# Calculate residuals
resids.valid.rose.lag2 = valid.rose.ts - valid.rose.lag2
resids.valid.rose.lag12 = as.vector(valid.rose.ts) - as.vector(valid.rose.lag12)
## Warning in as.vector(valid.rose.ts) - as.vector(valid.rose.lag12): longer
## object length is not a multiple of shorter object length
# Table for training set accuracy of Naive Rose Wine forecasts
kable(accuracy(valid.rose.ts, valid.rose.lag2), caption = "Accuracy Stats for Naive Rose Wine Forecast over Validation Set")
| ME | RMSE | MAE | MPE | MAPE | ACF1 | Theil’s U | |
|---|---|---|---|---|---|---|---|
| Test set | 2.545454 | 34.06678 | 27.09091 | -440.5622 | 490.641 | 0.2954392 | 1.400756 |
# Table for training set accuracy of Seasonal Naive Rose Wine forecasts
# kable(accuracy(valid.rose.lag12, valid.rose.ts), caption = "Accuracy Stats for Seasonal Naive Rose Wine Forecast over Validation Set")
RMSE.valid.rose.snaive = sqrt(mean(resids.valid.rose.lag12^2))
print(paste("RSME for Seasonal Naive Rose on Validation Set:",round(RMSE.valid.rose.snaive, 2)))
## [1] "RSME for Seasonal Naive Rose on Validation Set: 26.37"
Plot the naive Forecasts for the Validation period
# Plot the residuals for the training set naive forecasts
par(mfrow = c(1, 2)) # Plot side by side
# Naive and Seasonal Naive plots
plot(resids.valid.rose.lag2, xlab = "Months", ylab="Residuals", main = "Error in Naive Forecast \n on the Validation set \n for Rose Wines" )
plot(resids.valid.rose.lag12, xlab = "Months", ylab="Residuals", main = "Error in Seasonal Naive Forecast \n on the Validation set \n for Rose Wines" )
# Histograms of residuals
hist(resids.valid.rose.lag2, main = "Error in Naive Forecast \n on the Validation set \n for Rose Wines", xlab="Residuals")
hist(resids.valid.rose.lag12, main = "Error in Seasonal Naive Forecast \n on the Validation set \n for Rose Wines", xlab="Residuals")
For Naive Rose Forecast
fixed.nValid = 12
fixed.nTrain = length(rose.ts) - fixed.nValid
rose.tmp = window(rose.ts, start=c(1980, fixed.nTrain-1), end = c(1980, fixed.nTrain+fixed.nValid-2))
rose.vec = as.vector(rose.tmp)
rose.fc = ts(rose.vec, start=c(1994, 1), end=c(1994,12), freq=12)
resids.rose = rose.fc - valid.rose.ts
q.resids.rose = quantile(resids.rose, probs = c(0.1, 0.9))
q10.rose = rose.fc + q.resids.rose[1]
q90.rose = rose.fc + q.resids.rose[2]
# Compute the first and ninth deciles of the validation errors distribution for the 2 period naive forecast
table = as.data.frame(c(q10.rose[1], q90.rose[2]), row.names=c("10% Decile", "90% Decile"))
colnames(table) = "Decile values"
kable(table, caption = "First and Ninth Interval of the Validation Errors Distribution on the two period Naive Forecast")
| Decile values | |
|---|---|
| 10% Decile | 6.6 |
| 90% Decile | 126.1 |
hist(resids.rose, main="Histogram of Residuals of Rose Forecast")
par(mfrow = c(1, 1))
plot(rose.fc, main = "Forecast of Rose Wine \nwith 10% and 90% Confidence Bands. \n Actuals in Blue")
lines(q10.rose, col="red")
lines(q90.rose, col="red")
lines(valid.rose.ts, lty=2, col="blue")
For Seasonal Naive Forecast
fixed.nValid = 12
fixed.nTrain = length(rose.ts) - fixed.nValid
srose.tmp = window(rose.ts, start=c(1980, fixed.nTrain-11), end = c(1980, fixed.nTrain+fixed.nValid-12))
srose.vec = as.vector(srose.tmp)
srose.fc = ts(srose.vec, start=c(1994, 1), end=c(1994,12), freq=12)
resids.srose = srose.fc - valid.rose.ts
q.resids.srose = quantile(resids.srose, probs = c(0.1, 0.9))
q10.srose = srose.fc + q.resids.srose[1]
q90.srose = srose.fc + q.resids.srose[2]
# Compute the first and ninth deciles of the validation errors distribution for the 2 period naive forecast
stable = as.data.frame(c(q10.srose[1], q90.srose[2]), row.names=c("10% Decile", "90% Decile"))
colnames(stable) = "Decile Values"
kable(stable, caption = "First and Ninth Interval of the Validation Errors Distribution on the two period Seasonal Naive Forecast")
| Decile Values | |
|---|---|
| 10% Decile | 40.2 |
| 90% Decile | 101.0 |
hist(resids.srose, main="Histogram of Residuals of Seasonal Rose Forecast")
par(mfrow = c(1, 1))
plot(srose.fc, main = "Seasonal Forecast of Rose Wine \n with 10% and 90% Confidence Bands \n Actuals in Blue")
lines(q10.srose, col="red")
lines(q90.srose, col="red")
lines(valid.rose.ts, lty=2, col="blue")
#install.packages("forecast")
# library(forecast)
par(mfrow = c(1, 3))
plot(fortified.ts, bty="l", main="Fortified Wine")
abline(reg=lm(fortified.ts~time(fortified.ts)))
plot(aggregate(fortified.ts,FUN=mean), main="Fortified Trend")
boxplot(fortified.ts~cycle(fortified.ts), xlab="Months", main="Seasonality of Fortified Wine")
plot(red.ts, bty="l", main= "Red Wine")
abline(reg=lm(red.ts~time(red.ts)))
plot(aggregate(red.ts,FUN=mean), main="Red Wine Trend")
boxplot(red.ts~cycle(red.ts), xlab="Months", main="Seasonality of Red Wine")
plot(rose.ts, bty="l", main = "Rose Wine")
abline(reg=lm(rose.ts~time(rose.ts)))
plot(aggregate(rose.ts,FUN=mean), main="Rose Wine Trend")
boxplot(rose.ts~cycle(rose.ts), xlab="Months", main="Seasonality of Rose Wine")
plot(sparkling.ts, bty="l", main = "Sparkling Wine")
abline(reg=lm(sparkling.ts~time(sparkling.ts)))
plot(aggregate(sparkling.ts,FUN=mean), main="Sparkling Wine Trend")
boxplot(sparkling.ts~cycle(sparkling.ts), xlab="Months", main="Seasonality of Sparkling Wine")
plot(sweetwine.ts, bty="l", main="Sweet Wine")
abline(reg=lm(sweetwine.ts~time(sweetwine.ts)))
plot(aggregate(sweetwine.ts,FUN=mean), main="Sweet Wine Trend")
boxplot(sweetwine.ts~cycle(sweetwine.ts), xlab="Months", main="Seasonality of Sweet Wine")
plot(drywhite.ts, bty="l", main = "Dry White Wines")
abline(reg=lm(drywhite.ts~time(drywhite.ts)))
plot(aggregate(drywhite.ts,FUN=mean), main="Dry White Wine Trend")
boxplot(drywhite.ts~cycle(drywhite.ts), xlab="Months", main="Seasonality of Dry White Wine")
Clearly, there is both trend and seasonality in each of the wine’s time series. Since each of the wine time series have a trend component and a seasonal component we will have to do double differencing to create a stationary series.
drywhite.dif1 = diff(drywhite.ts, lag=1)
drywhite.dif2 = diff(drywhite.dif1, lag=12)
par(mfrow = c(1, 2))
plot(drywhite.dif1, main = "Dry White Wine: 1st differencing")
plot(drywhite.dif2, main = "Dry White Wine: 2nd differencing")
Notice that the second differencing plot does not look stationary, but the variance seems to increase over time. I will take logs of the time series before trying to difference to remove this non-stationarity.
# Fortified Wine
fortified.log = log(fortified.ts)
fortified.dif1 = diff(fortified.log, lag=1)
fortified.dif2 = diff(fortified.dif1, lag=12) # lag = 12 to remove seasonality
par(mfrow = c(1, 2))
plot(fortified.dif1, main = "Fortified Wine: 1st differencing")
plot(fortified.dif2, main = "Fortified Wine: 2nd differencing")
# Red wine
red.log = log(red.ts)
red.dif1 = diff(log(red.log), lag=1)
red.dif2 = diff(red.dif1, lag=12) # lag = 12 to remove seasonality
plot(red.dif1, main = "Red Wine: 1st differencing")
plot(red.dif2, main = "Red Wine: 2nd differencing")
# Sparkling Wine
sparkling.log = log(sparkling.ts)
sparkling.dif1 = diff(sparkling.log, lag=1)
sparkling.dif2 = diff(sparkling.dif1, lag=12) # lag = 12 to remove seasonality
plot(sparkling.dif1, main = "Sparkling Wine: 1st differencing")
plot(sparkling.dif2, main = "Sparkling Wine: 2nd differencing")
# Sweet Wine
sweetwine.log = log(sweetwine.ts)
sweetwine.dif1 = diff(sweetwine.log, lag=1)
sweetwine.dif2 = diff(sweetwine.dif1, lag=12) # lag = 12 to remove seasonality
plot(sweetwine.dif1, main = "Sweet Wine: 1st differencing")
plot(sweetwine.dif2, main = "Sweet Wine: 2nd differencing")
# Dry White
drywhite.log = log(drywhite.ts)
drywhite.dif1 = diff(drywhite.log, lag=1)
drywhite.dif2 = diff(drywhite.dif1, lag=12) # lag = 12 to remove seasonality
plot(drywhite.dif1, main = "Dry White Wine: 1st differencing")
plot(drywhite.dif2, main = "Dry White Wine: 2nd differencing")
# Rose Wine
rose.log = log(rose.ts)
rose.dif1 = diff(rose.log, lag=1)
rose.dif2 = diff(rose.dif1, lag=12) # lag = 12 to remove seasonality
plot(rose.dif1, main = "Rose Wine: 1st differencing")
plot(rose.dif2, main = "Rose Wine: 2nd differencing")
library(forecast)
library(zoo)
# Fortified Wine
fortified.log = fortified.ts
fortified.dif1 = diff(fortified.log, lag=12)
fortified.dif2 = diff(fortified.dif1, lag=1) # lag = 12 to remove seasonality
Recall the data in AustralianWines.csv about the monthly sales of six types of Australian wines (red, rose, sweet white, dry white, sparkling, and fortified) for the period 1980-1994. The units are thousands of liters. You are hired to obtain short-term forecasts (2-months ahead) for each of the six series, and this task will be repeated every month, as new data arrive.
startdate = 1989
rose.ts.window = window(rose.ts, start=c(startdate,13))
fixed.nValid = 12
fixed.nTrain = length(rose.ts.window) - fixed.nValid
# Partition into train and validation set
train.rose.ts = window(rose.ts.window, start=c(startdate,13), end=c(startdate,fixed.nTrain))
valid.rose.ts = window(rose.ts.window, start=c(startdate, fixed.nTrain), end = c(startdate, fixed.nTrain+fixed.nValid))
Plot seasonal decompositon graph to see series decomposed into Trend and Seasonal components
fit.stl = stl(train.rose.ts, s.window=12) # s.window=12 because monthly seasonality
plot(fit.stl)
Produce linear model taking into account Trend and Seasonality
fit.lm1 = tslm(train.rose.ts ~ trend + season)
summary(fit.lm1)
##
## Call:
## tslm(formula = train.rose.ts ~ trend + season)
##
## Residuals:
## Min 1Q Median 3Q Max
## -39.333 -5.500 -0.500 7.333 50.333
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 53.41667 11.21232 4.764 8.38e-05 ***
## trend -0.08333 0.30849 -0.270 0.7895
## season2 10.08333 14.81084 0.681 0.5028
## season3 16.16667 14.82047 1.091 0.2866
## season4 16.25000 14.83652 1.095 0.2847
## season5 13.00000 14.85895 0.875 0.3907
## season6 16.75000 14.88774 1.125 0.2722
## season7 28.83333 14.92286 1.932 0.0657 .
## season8 11.91667 14.96425 0.796 0.4340
## season9 18.00000 15.01188 1.199 0.2427
## season10 12.41667 15.06566 0.824 0.4183
## season11 -1.16667 15.12556 -0.077 0.9392
## season12 -10.75000 15.19148 -0.708 0.4863
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.14 on 23 degrees of freedom
## Multiple R-squared: 0.3282, Adjusted R-squared: -0.02224
## F-statistic: 0.9365 on 12 and 23 DF, p-value: 0.5297
plot.ts(fit.lm1$fitted.values, col="red", main="Rose Wine: Fitted Values (red) \n vs Actuals (black)")
lines(train.rose.ts)
Check residuals for autocorrelaton
# Plot residuals
plot.ts(resid(fit.lm1))
# Plot Auto-correlations
acf(resid(fit.lm1))
Notice there is a spike at lag 5 indicating we are still leaving information on the table.
fc.lm1 = forecast(fit.lm1, h = 12, levels=c(80,95))
# Accuracy metrics for fitted values of linear model
accuracy(fc.lm1)
## ME RMSE MAE MPE MAPE MASE
## Training set -3.943605e-16 14.49585 9.537037 -20.26394 32.54064 0.6358025
## ACF1
## Training set 0.1523016
# Confirm Accuracy metrics for forecast
fc.accuracy = sqrt(mean(fc.lm1$residuals^2))
print(paste("RMSE for Linear Forecast Validation Set:",round(fc.accuracy, 2)))
## [1] "RMSE for Linear Forecast Validation Set: 14.5"
# Plot forecast values and actuals
plot(fc.lm1, ylab="Sales", main="Rose Wine: Fitted Values (red) \n vs Actuals (black) and \n 1994 Forecast(blue)")
# Plot Fitted values
lines(fit.lm1$fitted.values, col="red")
lines(valid.rose.ts, col="green")
fit.lm2 = tslm(rose.ts.window ~ trend + season)
summary(fit.lm2)
##
## Call:
## tslm(formula = rose.ts.window ~ trend + season)
##
## Residuals:
## Min 1Q Median 3Q Max
## -52.542 -3.350 1.012 6.752 34.283
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 57.4118 9.2850 6.183 1.42e-07 ***
## trend -0.3285 0.1453 -2.260 0.0285 *
## season2 7.5285 12.0846 0.623 0.5363
## season3 13.2569 12.0872 1.097 0.2783
## season4 14.5854 12.0915 1.206 0.2338
## season5 11.3139 12.0977 0.935 0.3545
## season6 16.4424 12.1055 1.358 0.1809
## season7 14.1708 12.1151 1.170 0.2480
## season8 3.6993 12.1264 0.305 0.7617
## season9 16.8278 12.1395 1.386 0.1722
## season10 15.1563 12.1543 1.247 0.2186
## season11 8.8847 12.1708 0.730 0.4690
## season12 11.0132 12.1890 0.904 0.3708
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.11 on 47 degrees of freedom
## Multiple R-squared: 0.1581, Adjusted R-squared: -0.05684
## F-statistic: 0.7356 on 12 and 47 DF, p-value: 0.7099
fc.lm2 = forecast(fit.lm2, h = 12, levels=c(80,95))
# Plot forecast values and actuals
plot(fc.lm2, ylab="Sales", main="Rose Wine: Fitted Values (red) \n vs Actuals (black) and \n Jan95, Feb95 Forecast (blue)")
# Plot Fitted values
lines(fit.lm2$fitted.values, col="red")
fc.lm2
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 1995 37.375 9.332994 65.41701 -6.0256431 80.77564
## Feb 1995 44.575 16.532994 72.61701 1.1743569 87.97564
## Mar 1995 49.975 21.932994 78.01701 6.5743569 93.37564
## Apr 1995 50.975 22.932994 79.01701 7.5743569 94.37564
## May 1995 47.375 19.332994 75.41701 3.9743569 90.77564
## Jun 1995 52.175 24.132994 80.21701 8.7743569 95.57564
## Jul 1995 49.575 21.532994 77.61701 6.1743569 92.97564
## Aug 1995 38.775 10.732994 66.81701 -4.6256431 82.17564
## Sep 1995 51.575 23.532994 79.61701 8.1743569 94.97564
## Oct 1995 49.575 21.532994 77.61701 6.1743569 92.97564
## Nov 1995 42.975 14.932994 71.01701 -0.4256431 86.37564
## Dec 1995 44.775 16.732994 72.81701 1.3743569 88.17564
The time series linear model considers the linear trend as well as seasonality. The output variable was rose sales, the predictors were previous time series sales values for all the data from 1980, considered in terms of trend and seasonality. The point forecasts were as given above: Jan1995: 13.8 and Feb1995: 24.2.
The error on the seasonal naive model is: RMSE for Seasonal Naive Rose on Validation Set: 26.4.
The error for the training set RMSE was: 25.01065.
The linear regression model using trend and seasonality barely did better than a seasonal naive model.
The second analysis which is shown above used data from 1989. The RMSE on the training set was 14.5 which was substantially better than the RMSE for the Seasonal Naive Model at 26.4. The point forecasts for Jan1995 is: 37.4 and for Feb1995 is: 44.6.
Final Thoughts. Clearly the linear models are not catching the large seasonal spikes that sometimes occur in the fall of the years. Perhaps aggregating the data into quarters might help with this.