********************************************************************

Problem Description

********************************************************************

We are continuing our analysis of the monthly sales of Australian wines (data in AustralianWines.csv). The data are for six types of wines (red, rose, sweet white, dry white, sparkling, and fortified) for the period 1980-1994. The units are thousands of liters.

You are hired to obtain short-term forecasts (2-months ahead) for each of the six series, and this task will be repeated every month, as new data arrive.

Evaluate the performance of naive forecasts by examining the training performance and the validation performance

Choose one or more wine types
Compute naive forecasts for the training and validation periods
Plot the actual and forecasted values for the training and validation periods
Compute performance measures (separately) for the training and validation periods

Continuing the problem:

Create a histogram of the validation period forecast errors (in an earlier discussion you already generated naive forecasts for the validation period; use the corresponding errors to create the histogram). Examine the shape in terms of error magnitudes.
Let’s set our desired certainty level to 80%. Compute the first and ninth deciles of the validation errors distribution.
Combine the two deciles with the point forecast (your naive forecast) to obtain an 80% prediction interval

********************************************************************

Load Libraries

********************************************************************

# install.packages("data.table")
library(data.table)
library(forecast)
library(knitr)  # for kable table formatting

********************************************************************

Get data

********************************************************************

aussieWines = fread("http://www.forecastingbook.com/mooc/AustralianWines.csv?attredirects=0&d=1")
aussieWines = data.frame(aussieWines[1:180,]) # Fread gets confused at end of file and adds some NA's
colnames(aussieWines)[5] = "Sparkling"  # Capitalize Sparking wine column name
summary(aussieWines)

##     Month             Fortified         Red           Rose          
##  Length:180         Min.   :1154   Min.   : 464   Length:180        
##  Class :character   1st Qu.:2377   1st Qu.:1123   Class :character  
##  Mode  :character   Median :2894   Median :1559   Mode  :character  
##                     Mean   :2999   Mean   :1630                     
##                     3rd Qu.:3527   3rd Qu.:2106                     
##                     Max.   :5618   Max.   :3670                     
##    Sparkling     Sweet.white      Dry.white   
##  Min.   :1170   Min.   : 85.0   Min.   :1954  
##  1st Qu.:1605   1st Qu.:141.5   1st Qu.:2736  
##  Median :1896   Median :223.5   Median :3090  
##  Mean   :2431   Mean   :247.1   Mean   :3240  
##  3rd Qu.:2599   3rd Qu.:319.2   3rd Qu.:3685  
##  Max.   :7242   Max.   :662.0   Max.   :5725

********************************************************************

Create TS objects

********************************************************************

fortified.ts = ts(aussieWines$Fortified, start=c(1980,1), end = c(1994, 12), freq=12)
red.ts = ts(aussieWines$Red, start=c(1980,1), end = c(1994, 12), freq=12)
sparkling.ts = ts(aussieWines$Sparkling, start=c(1980,1), end = c(1994, 12), freq=12)
sweetwine.ts = ts(aussieWines$Sweet.white, start=c(1980,1), end = c(1994, 12), freq=12)
drywhite.ts = ts(aussieWines$Dry.white, start=c(1980,1), end = c(1994, 12), freq=12)

aussieWines$Rose = as.numeric(aussieWines$Rose)  # make Rose a numeric variables and introduce NA's at "*"s

## Warning: NAs introduced by coercion

rose.ts = ts(aussieWines$Rose, start=c(1980,1), end = c(1994, 12), freq=12)

# Impute by interpolation missing values in rose.ts
# install.packages("imputeTS")
library(imputeTS)
rose.ts = na.interpolation(rose.ts)

********************************************************************

Plot Wines: Data, Trend, Seasonality

********************************************************************

#install.packages("forecast")
# library(forecast)

par(mfrow = c(1, 3))
plot(fortified.ts, bty="l", main="Fortified Wine")
abline(reg=lm(fortified.ts~time(fortified.ts)))
plot(aggregate(fortified.ts,FUN=mean), main="Fortified Trend")
boxplot(fortified.ts~cycle(fortified.ts), xlab="Months", main="Seasonality of Fortified Wine")

plot(red.ts, bty="l", main= "Red Wine")
abline(reg=lm(red.ts~time(red.ts)))
plot(aggregate(red.ts,FUN=mean), main="Red Wine Trend")
boxplot(red.ts~cycle(red.ts), xlab="Months", main="Seasonality of Red Wine")

plot(rose.ts, bty="l", main = "Rose Wine")
abline(reg=lm(rose.ts~time(rose.ts)))
plot(aggregate(rose.ts,FUN=mean), main="Rose Wine Trend")
boxplot(rose.ts~cycle(rose.ts), xlab="Months", main="Seasonality of Rose Wine")

plot(sparkling.ts, bty="l", main = "Sparkling Wine")
abline(reg=lm(sparkling.ts~time(sparkling.ts)))
plot(aggregate(sparkling.ts,FUN=mean), main="Sparkling Wine Trend")
boxplot(sparkling.ts~cycle(sparkling.ts), xlab="Months", main="Seasonality of Sparkling Wine")

plot(sweetwine.ts, bty="l", main="Sweet Wine")
abline(reg=lm(sweetwine.ts~time(sweetwine.ts)))
plot(aggregate(sweetwine.ts,FUN=mean), main="Sweet Wine Trend")
boxplot(sweetwine.ts~cycle(sweetwine.ts), xlab="Months", main="Seasonality of Sweet Wine")

plot(drywhite.ts, bty="l", main = "Dry White Wines")
abline(reg=lm(drywhite.ts~time(drywhite.ts)))
plot(aggregate(drywhite.ts,FUN=mean), main="Dry White Wine Trend")
boxplot(drywhite.ts~cycle(drywhite.ts), xlab="Months", main="Seasonality of Dry White Wine")

********************************************************************

Generate Naive Forecasts for each of the six wines

********************************************************************

# library(forecast)

# Fortified Wine Naive FCs
naiveFortified.pred = naive(fortified.ts, h = 2)
naiveFortified.fc = naiveFortified.pred$mean
snaiveFortified.pred = snaive(fortified.ts, h = 2)  # Seasonal Naive
snaiveFortified.fc = snaiveFortified.pred$mean

# Red Wine Naive FCs
naiveRed.pred = naive(red.ts, h = 2)
naiveRed.fc = naiveRed.pred$mean
snaiveRed.pred = snaive(red.ts, h = 2)  # Seasonal Naive
snaiveRed.fc = snaiveRed.pred$mean

# Sparkling Wine Naive FCs
naiveSparkling.pred = naive(sparkling.ts, h = 2)
naiveSparkling.fc = naiveSparkling.pred$mean
snaiveSparkling.pred = snaive(sparkling.ts, h = 2)  # Seasonal Naive
snaiveSparkling.fc = snaiveSparkling.pred$mean  

# Sweet Wine Naive FCs
naiveSweet.pred = naive(sweetwine.ts, h = 2)
naiveSweet.fc = naiveSweet.pred$mean
snaiveSweet.pred = snaive(sweetwine.ts, h = 2)  # Seasonal Naive
snaiveSweet.fc = snaiveSweet.pred$mean 

# Dry White Wine Naive FCs
naiveDryWhite.pred = naive(drywhite.ts, h = 2)
naiveDryWhite.fc = naiveDryWhite.pred$mean
snaiveDryWhite.pred = snaive(drywhite.ts, h = 2)  # Seasonal Naive
snaiveDryWhite.fc = snaiveDryWhite.pred$mean

# Rose Wine Naive FCs       
naiveRose.pred = naive(rose.ts, h = 2)
naiveRose.fc = naiveRose.pred$mean  # 
snaiveRose.pred = snaive(rose.ts, h = 2)  # Seasonal Naive
snaiveRose.fc = snaiveRose.pred$mean

********************************************************************

Table of Naive Forecasts

********************************************************************

wineNaive.db = data.frame(Wines = colnames(aussieWines)[2:7], 
                          NaiveFC_Jan95 = c(naiveFortified.fc[1], naiveRed.fc[1], naiveRose.fc[1], naiveSparkling.fc[1], naiveSweet.fc[1], naiveDryWhite.fc[1]), 
                          NaiveFC_Feb95 = c(naiveFortified.fc[2], naiveRed.fc[2], naiveRose.fc[2], naiveSparkling.fc[2], naiveSweet.fc[2], naiveDryWhite.fc[2]),
                          Seasonal_NaiveFC_Jan95 = c(snaiveFortified.fc[1], snaiveRed.fc[1], snaiveRose.fc[1], snaiveSparkling.fc[1], snaiveSweet.fc[1], snaiveDryWhite.fc[1]), 
                          Seasonal_NaiveFC_Feb95 = c(snaiveFortified.fc[2], snaiveRed.fc[2], snaiveRose.fc[2], snaiveSparkling.fc[2], snaiveSweet.fc[2], snaiveDryWhite.fc[2])
                          )

kable(wineNaive.db, caption = "Naive Forecasts For Wines Sales")

Naive Forecasts For Wines Sales
Wines	NaiveFC_Jan95	NaiveFC_Feb95	Seasonal_NaiveFC_Jan95	Seasonal_NaiveFC_Feb95
Fortified	2467	2467	1154	1568
Red	2684	2684	1041	1728
Rose	84	84	30	35
Sparkling	5999	5999	1197	1968
Sweet.white	394	394	150	280
Dry.white	5725	5725	2265	3685

********************************************************************

Plot Naive Forecasts

********************************************************************

par(mfrow = c(1, 2))

# Plot Fortified Wine naive FCs
plot(naiveFortified.pred, main = 'Fortified Wine Naive FC')
plot(snaiveFortified.pred, main = "Fortified Seasonal Naive FC")

# Plot Red Wine Naive FCs
plot(naiveRed.pred, main = "Red Wine Naive FC")
plot(snaiveRed.pred, main = "Seasonal Red Wine Naive FC")

# Plot Sparkling Wine Naive FCs
plot(naiveSparkling.pred, main = "Sparkling Wine Naive FC")
plot(snaiveSparkling.pred, main = "Seasonal Sparkling Wine Naive FC")

# Plot Sweet Wine Naive FCs
plot(naiveSweet.pred, main = "Sweet Wine Naive FC")
plot(snaiveSweet.pred, main = "Seasonal Sweet Wine Naive FC")

# Plot Dry White Wine Naive FCs
plot(naiveDryWhite.pred, main = "Dry White Wine Naive FC")
plot(snaiveDryWhite.pred, main = "Seasonal Dry White Wine Naive FC" )

# Plot Rose Wine Naive FCs
plot(naiveRose.pred, main = "Rose Wine Naive FC")
plot(snaiveRose.pred, main = "Seasonal Rose Wine Naive FC")

********************************************************************

Performance of naive forecasts for wine sales

********************************************************************

Evaluate the performance of naive forecasts by examining the training performance and the validation performance

1. Compute naive forecasts for the training periods and plot the actual and forecasted values for the training periods.

2. Compute performance measures (separately) for the training periods.

Partition into Training and Validatation Sets

fixed.nValid = 12 
fixed.nTrain = length(rose.ts) - fixed.nValid
        
# Partition into train and validation set
train.rose.ts = window(rose.ts, start=c(1980,13), end=c(1980,fixed.nTrain)) 
valid.rose.ts = window(rose.ts, start=c(1980, fixed.nTrain), end = c(1980, fixed.nTrain+fixed.nValid))

Compute performance measures on the validation set

# For training set performance create lagged ts
train.rose.lag2 = lag(train.rose.ts, 2)  # For naive forecast
train.rose.lag12 = lag(train.rose.ts, 12)  # For snaive forecast

# Calculate residuals
resids.train.rose.lag2 = train.rose.ts - train.rose.lag2
resids.train.rose.lag12 = train.rose.ts - train.rose.lag12

# Table for training set accuracy of Naive Rose Wine forecasts
kable(accuracy(train.rose.ts, train.rose.lag2), caption = "Accuracy Stats for Naive Rose Wine Forecast over Training Set")

Accuracy Stats for Naive Rose Wine Forecast over Training Set
	ME	RMSE	MAE	MPE	MAPE	ACF1	Theil’s U
Test set	-0.8441558	38.52053	27.90909	-9.061346	32.72826	0.1036125	1.563916

# Table for training set accuracy of Seasonal Naive Rose Wine forecasts
kable(accuracy(train.rose.ts, train.rose.lag12), caption = "Accuracy Stats for Seasonal Naive Rose Wine Forecast over Training Set")

Accuracy Stats for Seasonal Naive Rose Wine Forecast over Training Set
	ME	RMSE	MAE	MPE	MAPE	ACF1	Theil’s U
Test set	-8.236111	22.15664	15.43056	-11.69132	18.86774	0.2949936	0.8900648

Plot the naive Forecasts for the training period

# Plot the residuals for the training set naive forecasts
par(mfrow = c(1, 2))  # Plot side by side

# Naive and Seasonal Naive plots
plot(resids.train.rose.lag2, xlab = "Months", ylab="Residuals", main = "Error in Naive Forecast \n on the training set for Rose Wines" )
plot(resids.train.rose.lag12, xlab = "Months", ylab="Residuals", main = "Error in Seasonal Naive Forecast \n on the training set for Rose Wines" )

# Histograms of residuals
hist(resids.train.rose.lag2, main = "Error in Naive Forecast \n on the training set for Rose Wines", xlab="Residuals")
hist(resids.train.rose.lag12, main = "Error in Seasonal Naive Forecast \n on the training set for Rose Wines", xlab="Residuals")

3. Compute naive forecasts for the validation period and plot the actual and forecasted values for the validation period.

4. Compute performance measures (separately) for the validation period.

Compute the performance measure for Rose Wine on the validation set

# For training set performance create lagged ts
valid.rose.lag2 = lag(valid.rose.ts, 2)  # For naive forecast
valid.rose.lag12 = window(rose.ts, start=c(1980, fixed.nTrain-11), end = c(1980, fixed.nTrain+fixed.nValid-12))  # For snaive forecast

# Calculate residuals
resids.valid.rose.lag2 = valid.rose.ts - valid.rose.lag2
resids.valid.rose.lag12 = as.vector(valid.rose.ts) - as.vector(valid.rose.lag12)

## Warning in as.vector(valid.rose.ts) - as.vector(valid.rose.lag12): longer
## object length is not a multiple of shorter object length

# Table for training set accuracy of Naive Rose Wine forecasts
kable(accuracy(valid.rose.ts, valid.rose.lag2), caption = "Accuracy Stats for Naive Rose Wine Forecast over Validation Set")

Accuracy Stats for Naive Rose Wine Forecast over Validation Set
	ME	RMSE	MAE	MPE	MAPE	ACF1	Theil’s U
Test set	3.636364	17.83539	11.81818	1.464656	24.49496	0.0397031	1.586492

# Table for training set accuracy of Seasonal Naive Rose Wine forecasts
# kable(accuracy(valid.rose.lag12, valid.rose.ts), caption = "Accuracy Stats for Seasonal Naive Rose Wine Forecast over Validation Set")
RMSE.valid.rose.snaive = sqrt(mean(resids.valid.rose.lag12^2))
print(paste("RSME for Seasonal Naive Rose on Validation Set:",round(RMSE.valid.rose.snaive, 2)))

## [1] "RSME for Seasonal Naive Rose on Validation Set: 20.38"

Plot the naive Forecasts for the Validation period

# Plot the residuals for the training set naive forecasts
par(mfrow = c(1, 2))  # Plot side by side

# Naive and Seasonal Naive plots
plot(resids.valid.rose.lag2, xlab = "Months", ylab="Residuals", main = "Error in Naive Forecast \n on the Validation set \n for Rose Wines" )
plot(resids.valid.rose.lag12, xlab = "Months", ylab="Residuals", main = "Error in Seasonal Naive Forecast \n on the Validation set \n for Rose Wines" )

# Histograms of residuals
hist(resids.valid.rose.lag2, main = "Error in Naive Forecast \n on the Validation set \n for Rose Wines", xlab="Residuals")
hist(resids.valid.rose.lag12, main = "Error in Seasonal Naive Forecast \n on the Validation set \n for Rose Wines", xlab="Residuals")

5. Create a histogram of the validation period forecast errors (in an earlier discussion you already generated naive forecasts for the validation period; use the corresponding errors to create the histogram). Examine the shape in terms of error magnitudes.

6. Let’s set our desired certainty level to 80%. Compute the first and ninth deciles of the validation errors distribution.

7. Combine the two deciles with the point forecast (your naive forecast) to obtain an 80% prediction interval.

For Naive Rose Forecast

fixed.nValid = 12 
fixed.nTrain = length(rose.ts) - fixed.nValid

rose.tmp = window(rose.ts, start=c(1980, fixed.nTrain-1), end = c(1980, fixed.nTrain+fixed.nValid-2))
rose.vec = as.vector(rose.tmp)
rose.fc = ts(rose.vec, start=c(1994, 1), end=c(1994,12), freq=12)

resids.rose = rose.fc - valid.rose.ts
q.resids.rose = quantile(resids.rose, probs = c(0.1, 0.9))
q10.rose = rose.fc + q.resids.rose[1]
q90.rose = rose.fc + q.resids.rose[2]

# Compute the first and ninth deciles of the validation errors distribution for the 2 period naive forecast
table = as.data.frame(c(q10.rose[1], q90.rose[2]), row.names=c("10% Decile", "90% Decile"))
colnames(table) = "Decile values"
kable(table, caption = "First and Ninth Interval of the Validation Errors Distribution on the two period Naive Forecast")

First and Ninth Interval of the Validation Errors Distribution on the two period Naive Forecast
	Decile values
10% Decile	31.4
90% Decile	93.5

hist(resids.rose, main="Histogram of Residuals of Rose Forecast")

par(mfrow = c(1, 1))
plot(rose.fc, main = "Forecast of Rose Wine \nwith 10% and 90% Confidence Bands. \n Actuals in Blue")
lines(q10.rose, col="red")
lines(q90.rose, col="red")
lines(valid.rose.ts, lty=2, col="blue")

For Seasonal Naive Forecast

fixed.nValid = 12 
fixed.nTrain = length(rose.ts) - fixed.nValid

srose.tmp = window(rose.ts, start=c(1980, fixed.nTrain-11), end = c(1980, fixed.nTrain+fixed.nValid-12))
srose.vec = as.vector(srose.tmp)
srose.fc = ts(srose.vec, start=c(1994, 1), end=c(1994,12), freq=12)

resids.srose = srose.fc - valid.rose.ts
q.resids.srose = quantile(resids.srose, probs = c(0.1, 0.9))
q10.srose = srose.fc + q.resids.srose[1]
q90.srose = srose.fc + q.resids.srose[2]

# Compute the first and ninth deciles of the validation errors distribution for the 2 period naive forecast
stable = as.data.frame(c(q10.srose[1], q90.srose[2]), row.names=c("10% Decile", "90% Decile"))
colnames(stable) = "Decile Values"
kable(stable, caption = "First and Ninth Interval of the Validation Errors Distribution on the two period Seasonal Naive Forecast")

First and Ninth Interval of the Validation Errors Distribution on the two period Seasonal Naive Forecast
	Decile Values
10% Decile	26.40000
90% Decile	49.83333

hist(resids.srose,  main="Histogram of Residuals of Seasonal Rose Forecast")

par(mfrow = c(1, 1))
plot(srose.fc,  main = "Seasonal Forecast of Rose Wine \n with 10% and 90% Confidence Bands \n Actuals in Blue")
lines(q10.srose, col="red")
lines(q90.srose, col="red")
lines(valid.rose.ts, lty=2, col="blue")

Performance of Naive Forecasts for Wine Sales & Prediction intervals for wine sales naive forecasts

BL

11/4/2016

********************************************************************

Problem Description

********************************************************************

********************************************************************

Load Libraries

********************************************************************

********************************************************************

Get data

********************************************************************

********************************************************************

Create TS objects

********************************************************************

********************************************************************

Plot Wines: Data, Trend, Seasonality

********************************************************************

********************************************************************

Generate Naive Forecasts for each of the six wines

********************************************************************

********************************************************************

Table of Naive Forecasts

********************************************************************

********************************************************************

Plot Naive Forecasts

********************************************************************

********************************************************************

Performance of naive forecasts for wine sales

********************************************************************

Evaluate the performance of naive forecasts by examining the training performance and the validation performance

1. Compute naive forecasts for the training periods and plot the actual and forecasted values for the training periods.

2. Compute performance measures (separately) for the training periods.

3. Compute naive forecasts for the validation period and plot the actual and forecasted values for the validation period.

4. Compute performance measures (separately) for the validation period.

5. Create a histogram of the validation period forecast errors (in an earlier discussion you already generated naive forecasts for the validation period; use the corresponding errors to create the histogram). Examine the shape in terms of error magnitudes.

6. Let’s set our desired certainty level to 80%. Compute the first and ninth deciles of the validation errors distribution.

7. Combine the two deciles with the point forecast (your naive forecast) to obtain an 80% prediction interval.