library(fpp2)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching packages ────────────────────────────────────────────────────────────────────────── fpp2 2.4 ──
## ✓ ggplot2 3.3.2 ✓ fma 2.4
## ✓ forecast 8.13 ✓ expsmooth 2.3
##
library(gridExtra)
library(ggplot2)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::combine() masks gridExtra::combine()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readxl)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(xts)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
library(urca)
library(seasonal)
##
## Attaching package: 'seasonal'
## The following object is masked from 'package:tibble':
##
## view
First part of the Part A consists of reading the data and creating time series ts object. All N/A have been manually pre-cleaned in Excel by replacing them with 0. There are 4 time series, one for each of the machines. We fit a list of candidate models for each of the time series. The models are : mean, naive, seasonal naive, random walk with a drift, simple exponential, Holt, Holt Damped, Holt-Winters and finally ARIMA. We then select the forecast from the model with the lowest sum of errors squared (where error is defined as the difference between fit and an actual observation). All model forecasts have been charted on two graphs.
getwd()
## [1] "/Users/stepniak/624"
setwd("/Users/stepniak/624")
atm_data <- read_excel('/Users/stepniak/624/ATM624Data.xlsx', col_names=TRUE,
col_types=c('date', 'text', 'numeric')) %>%
mutate(DATE = as_date(DATE)) %>%
arrange(DATE, ATM)
atm_ts <- atm_data %>%
group_by(ATM) %>%
group_map(~ ts(.x$Cash, start=c(2009, 05, 01),frequency = 7))
See table of all models’ forecast ($ value) and sum of Squared Errors in the table which follows the charts. The selected model is ARIMA Arima_p_d_q_P_D_Q_Lag “0, 0, 2, 0, 1, 2, 7” : with a sum of squared error terms in the training period of 6278.98. The forecast for the month of May 2010 under this model is 2411.79.
autoplot(atm_ts[[1]]) +
autolayer(meanf(atm_ts[[1]] , h=31),series="Mean", PI=FALSE) +
autolayer(naive(atm_ts[[1]] , h=31),series="Naïve", PI=FALSE) +
autolayer(snaive(atm_ts[[1]] , h=31),series="Seasonal naïve", PI=FALSE) +
autolayer(rwf(atm_ts[[1]], drift=TRUE, h=31),series="Drift", PI=FALSE) +
ggtitle("ATM Cash Forecast") +
xlab("Year") + ylab("$ daily cash") +
guides(colour=guide_legend(title="Forecast"))
atm1_arima_fit <-auto.arima(atm_ts[[1]])
autoplot(atm_ts[[1]]) +
autolayer(forecast(atm1_arima_fit,h=31),series="Arima", PI=FALSE)+
autolayer(ses(atm_ts[[1]], h=31),series="SES", PI=FALSE) +
autolayer(holt(atm_ts[[1]] , h=31),series="Holt", PI=FALSE) +
autolayer(holt(atm_ts[[1]],damped=TRUE, h=31),series="Holt Damped", PI=FALSE) +
autolayer(hw(atm_ts[[1]], seasonal="additive", h=31),series="HW", PI=FALSE) +
ggtitle("ATM Cash Forecast") +
xlab("Year") + ylab("$ daily cash") +
guides(colour=guide_legend(title="Forecast"))
atm1_mean1 <- meanf(atm_ts[[1]], h=31)
atm1_naive1 <- naive(atm_ts[[1]], h=31)
atm1_snaive1 <- snaive(atm_ts[[1]], h=31)
atm1_rwf1 <- rwf(atm_ts[[1]],series="Drift", h=31)
atm1_ses1 <- ses(atm_ts[[1]], h=31)
atm1_holt1 <- holt(atm_ts[[1]], h=31)
atm1_holtD1 <- holt(atm_ts[[1]], damped=TRUE,h=31)
atm1_holtSeasonal <- hw(atm_ts[[1]],seasonal="additive")
atm1_arima <- forecast(atm1_arima_fit,h=31)
arimaordertext <- arimaorder(atm1_arima_fit)
listarima <- toString(arimaordertext)
forecast_mean1_rmse<-sum(sqrt((atm1_mean1$residuals)^2),na.rm=TRUE)
forecast_naive1_rmse<-sum(sqrt((atm1_naive1$residuals)^2),na.rm=TRUE)
forecast_snaive1_rmse<-sum(sqrt((atm1_snaive1$model$residuals)^2),na.rm=TRUE)
forecast_rwf1_rmse<-sum(sqrt((atm1_rwf1$residuals)^2),na.rm=TRUE)
forecast_ses1_rmse<-sum(sqrt((atm1_holt1$residuals)^2))
forecast_holt1_rmse<-sum(sqrt((atm1_holtD1$residuals)^2))
forecast_holtD1_rmse<-sum(sqrt((atm1_holtSeasonal$model$residuals)^2))
forecast_arima1_rmse<-sum(sqrt((atm1_arima$model$residuals)^2))
forecast_mean1_mean <- sum(atm1_mean1$mean)
forecast_naive1_mean <- sum(atm1_naive1$mean)
forecast_snaive1_mean <- sum(atm1_snaive1$mean)
forecast_rwf1_mean <- sum(atm1_rwf1$mean)
forecast_ses1_mean <- sum(atm1_ses1$mean)
forecast_holt1_mean <- sum(atm1_holt1$mean)
forecast_holtD1_mean <- sum(atm1_holtD1$mean)
forecast_arima1_mean <- sum(atm1_arima$mean)
forecast_method<-c('forecast_mean1_mean','forecast_naive1_mean','forecast_snaive1_mean','forecast_rwf1_mean','forecast_ses1_mean','forecast_holt1_mean','forecast_holtD1_mean','forecast_arima1_mean')
Dollar_Value<-c(forecast_mean1_mean,forecast_naive1_mean,forecast_snaive1_mean,forecast_rwf1_mean,forecast_ses1_mean,
forecast_holt1_mean,forecast_holtD1_mean,forecast_arima1_mean)
SE_training <- c(forecast_mean1_rmse,forecast_naive1_rmse,forecast_snaive1_rmse,forecast_rwf1_rmse,forecast_ses1_rmse,forecast_holt1_rmse,forecast_holtD1_rmse,forecast_arima1_rmse)
Arima_p_d_q_P_D_Q_Lag <- c('_','_','_','_','_','_','_',listarima)
combo_ATM1<-cbind(forecast_method,Dollar_Value,SE_training,Arima_p_d_q_P_D_Q_Lag)
combo_ATM1
## forecast_method Dollar_Value SE_training
## [1,] "forecast_mean1_mean" "2558.08967391304" "10512.2119565217"
## [2,] "forecast_naive1_mean" "2635" "14211"
## [3,] "forecast_snaive1_mean" "2408" "7029"
## [4,] "forecast_rwf1_mean" "2635" "14211"
## [5,] "forecast_ses1_mean" "2558.11871657836" "10751.2667537575"
## [6,] "forecast_holt1_mean" "2567.47548570441" "10462.9882055581"
## [7,] "forecast_holtD1_mean" "2292.28906115248" "6683.36795241376"
## [8,] "forecast_arima1_mean" "2411.79555969022" "6278.98019924216"
## Arima_p_d_q_P_D_Q_Lag
## [1,] "_"
## [2,] "_"
## [3,] "_"
## [4,] "_"
## [5,] "_"
## [6,] "_"
## [7,] "_"
## [8,] "0, 0, 2, 0, 1, 2, 7"
See table of all models’ forecast ($ value) and sum of Squared Errors in the table which follows the charts. The selected model is ARIMA Arima_p_d_q_P_D_Q_Lag “2, 0, 2, 0, 1, 2, 7” : ith a sum of squared error terms in the training period of 6590.51. The forecast for the month of May 2010 under this model is 1823.16.
autoplot(atm_ts[[2]]) +
autolayer(meanf(atm_ts[[2]] , h=31),series="Mean", PI=FALSE) +
autolayer(naive(atm_ts[[2]] , h=31),series="Naïve", PI=FALSE) +
autolayer(snaive(atm_ts[[2]] , h=31),series="Seasonal naïve", PI=FALSE) +
autolayer(rwf(atm_ts[[2]], drift=TRUE, h=31),series="Drift", PI=FALSE) +
ggtitle("ATM Cash Forecast") +
xlab("Year") + ylab("$ daily cash") +
guides(colour=guide_legend(title="Forecast"))
atm2_arima_fit <-auto.arima(atm_ts[[2]])
autoplot(atm_ts[[2]]) +
autolayer(forecast(atm2_arima_fit,h=31),series="Arima", PI=FALSE)+
autolayer(ses(atm_ts[[2]], h=31),series="SES", PI=FALSE) +
autolayer(holt(atm_ts[[2]] , h=31),series="Holt", PI=FALSE) +
autolayer(holt(atm_ts[[2]],damped=TRUE, h=31),series="Holt Damped", PI=FALSE) +
autolayer(hw(atm_ts[[2]], seasonal="additive", h=31),series="HW", PI=FALSE) +
ggtitle("ATM Cash Forecast") +
xlab("Year") + ylab("$ daily cash") +
guides(colour=guide_legend(title="Forecast"))
atm2_mean1 <- meanf(atm_ts[[2]], h=31)
atm2_naive1 <- naive(atm_ts[[2]], h=31)
atm2_snaive1 <- snaive(atm_ts[[2]], h=31)
atm2_rwf1 <- rwf(atm_ts[[2]],series="Drift", h=31)
atm2_ses1 <- ses(atm_ts[[2]], h=31)
atm2_holt1 <- holt(atm_ts[[2]], h=31)
atm2_holtD1 <- holt(atm_ts[[2]], damped=TRUE,h=31)
atm2_holtSeasonal <- hw(atm_ts[[2]],seasonal="additive")
atm2_arima <- forecast(atm2_arima_fit,h=31)
arimaordertext <- arimaorder(atm2_arima_fit)
listarima <- toString(arimaordertext)
forecast_mean1_rmse<-sum(sqrt((atm2_mean1$residuals)^2),na.rm=TRUE)
forecast_naive1_rmse<-sum(sqrt((atm2_naive1$residuals)^2),na.rm=TRUE)
forecast_snaive1_rmse<-sum(sqrt((atm2_snaive1$model$residuals)^2),na.rm=TRUE)
forecast_rwf1_rmse<-sum(sqrt((atm2_rwf1$residuals)^2),na.rm=TRUE)
forecast_ses1_rmse<-sum(sqrt((atm2_holt1$residuals)^2))
forecast_holt1_rmse<-sum(sqrt((atm2_holtD1$residuals)^2))
forecast_holtD1_rmse<-sum(sqrt((atm2_holtSeasonal$model$residuals)^2))
forecast_arima1_rmse<-sum(sqrt((atm2_arima$model$residuals)^2))
forecast_mean1_mean <- sum(atm2_mean1$mean)
forecast_naive1_mean <- sum(atm2_naive1$mean)
forecast_snaive1_mean <- sum(atm2_snaive1$mean)
forecast_rwf1_mean <- sum(atm2_rwf1$mean)
forecast_ses1_mean <- sum(atm2_ses1$mean)
forecast_holt1_mean <- sum(atm2_holt1$mean)
forecast_holtD1_mean <- sum(atm2_holtD1$mean)
forecast_arima1_mean <- sum(atm2_arima$mean)
forecast_method<-c('forecast_mean1_mean','forecast_naive1_mean','forecast_snaive1_mean','forecast_rwf1_mean','forecast_ses1_mean','forecast_holt1_mean','forecast_holtD1_mean','forecast_arima1_mean')
Dollar_Value<-c(forecast_mean1_mean,forecast_naive1_mean,forecast_snaive1_mean,forecast_rwf1_mean,forecast_ses1_mean,
forecast_holt1_mean,forecast_holtD1_mean,forecast_arima1_mean)
SE_training <- c(forecast_mean1_rmse,forecast_naive1_rmse,forecast_snaive1_rmse,forecast_rwf1_rmse,forecast_ses1_rmse,forecast_holt1_rmse,forecast_holtD1_rmse,forecast_arima1_rmse)
Arima_p_d_q_P_D_Q_Lag <- c('_','_','_','_','_','_','_',listarima)
combo_atm2<-cbind(forecast_method,Dollar_Value,SE_training,Arima_p_d_q_P_D_Q_Lag)
combo_atm2
## forecast_method Dollar_Value SE_training
## [1,] "forecast_mean1_mean" "1918.79019073569" "12331.7602179837"
## [2,] "forecast_naive1_mean" "2790" "15541"
## [3,] "forecast_snaive1_mean" "1957" "7734"
## [4,] "forecast_rwf1_mean" "2790" "15541"
## [5,] "forecast_ses1_mean" "1778.04793988467" "12242.3028503742"
## [6,] "forecast_holt1_mean" "1650.04925094897" "12222.5601582364"
## [7,] "forecast_holtD1_mean" "1782.39064072504" "7074.88567819248"
## [8,] "forecast_arima1_mean" "1823.1662585939" "6590.5111611762"
## Arima_p_d_q_P_D_Q_Lag
## [1,] "_"
## [2,] "_"
## [3,] "_"
## [4,] "_"
## [5,] "_"
## [6,] "_"
## [7,] "_"
## [8,] "2, 0, 2, 0, 1, 2, 7"
See table of all models’ forecast ($ value) and sum of Squared Errors in the table which follows the charts. The selected model is Holt : with a sum of squared error terms in the training period of 97.27. The forecast for the month of May 2010 under this model is 4129.77.
autoplot(atm_ts[[3]]) +
autolayer(meanf(atm_ts[[3]] , h=31),series="Mean", PI=FALSE) +
autolayer(naive(atm_ts[[3]] , h=31),series="Naïve", PI=FALSE) +
autolayer(snaive(atm_ts[[3]] , h=31),series="Seasonal naïve", PI=FALSE) +
autolayer(rwf(atm_ts[[3]], drift=TRUE, h=31),series="Drift", PI=FALSE) +
ggtitle("ATM Cash Forecast") +
xlab("Year") + ylab("$ daily cash") +
guides(colour=guide_legend(title="Forecast"))
atm3_arima_fit <-auto.arima(atm_ts[[3]])
autoplot(atm_ts[[3]]) +
autolayer(forecast(atm3_arima_fit,h=31),series="Arima", PI=FALSE)+
autolayer(ses(atm_ts[[3]], h=31),series="SES", PI=FALSE) +
autolayer(holt(atm_ts[[3]] , h=31),series="Holt", PI=FALSE) +
autolayer(holt(atm_ts[[3]],damped=TRUE, h=31),series="Holt Damped", PI=FALSE) +
autolayer(hw(atm_ts[[3]], seasonal="additive", h=31),series="HW", PI=FALSE) +
ggtitle("ATM Cash Forecast") +
xlab("Year") + ylab("$ daily cash") +
guides(colour=guide_legend(title="Forecast"))
atm3_mean1 <- meanf(atm_ts[[3]], h=31)
atm3_naive1 <- naive(atm_ts[[3]], h=31)
atm3_snaive1 <- snaive(atm_ts[[3]], h=31)
atm3_rwf1 <- rwf(atm_ts[[3]],series="Drift", h=31)
atm3_ses1 <- ses(atm_ts[[3]], h=31)
atm3_holt1 <- holt(atm_ts[[3]], h=31)
atm3_holtD1 <- holt(atm_ts[[3]], damped=TRUE,h=31)
atm3_holtSeasonal <- hw(atm_ts[[3]],seasonal="additive")
atm3_arima <- forecast(atm3_arima_fit,h=31)
arimaordertext <- arimaorder(atm3_arima_fit)
listarima <- toString(arimaordertext)
forecast_mean1_rmse<-sum(sqrt((atm3_mean1$residuals)^2),na.rm=TRUE)
forecast_naive1_rmse<-sum(sqrt((atm3_naive1$residuals)^2),na.rm=TRUE)
forecast_snaive1_rmse<-sum(sqrt((atm3_snaive1$model$residuals)^2),na.rm=TRUE)
forecast_rwf1_rmse<-sum(sqrt((atm3_rwf1$residuals)^2),na.rm=TRUE)
forecast_ses1_rmse<-sum(sqrt((atm3_holt1$residuals)^2))
forecast_holt1_rmse<-sum(sqrt((atm3_holtD1$residuals)^2))
forecast_holtD1_rmse<-sum(sqrt((atm3_holtSeasonal$model$residuals)^2))
forecast_arima1_rmse<-sum(sqrt((atm3_arima$model$residuals)^2))
forecast_mean1_mean <- sum(atm3_mean1$mean)
forecast_naive1_mean <- sum(atm3_naive1$mean)
forecast_snaive1_mean <- sum(atm3_snaive1$mean)
forecast_rwf1_mean <- sum(atm3_rwf1$mean)
forecast_ses1_mean <- sum(atm3_ses1$mean)
forecast_holt1_mean <- sum(atm3_holt1$mean)
forecast_holtD1_mean <- sum(atm3_holtD1$mean)
forecast_arima1_mean <- sum(atm3_arima$mean)
forecast_method<-c('forecast_mean1_mean','forecast_naive1_mean','forecast_snaive1_mean','forecast_rwf1_mean','forecast_ses1_mean','forecast_holt1_mean','forecast_holtD1_mean','forecast_arima1_mean')
Dollar_Value<-c(forecast_mean1_mean,forecast_naive1_mean,forecast_snaive1_mean,forecast_rwf1_mean,forecast_ses1_mean,
forecast_holt1_mean,forecast_holtD1_mean,forecast_arima1_mean)
SE_training <- c(forecast_mean1_rmse,forecast_naive1_rmse,forecast_snaive1_rmse,forecast_rwf1_rmse,forecast_ses1_rmse,forecast_holt1_rmse,forecast_holtD1_rmse,forecast_arima1_rmse)
Arima_p_d_q_P_D_Q_Lag <- c('_','_','_','_','_','_','_',listarima)
combo_atm3<-cbind(forecast_method,Dollar_Value,SE_training,Arima_p_d_q_P_D_Q_Lag)
combo_atm3
## forecast_method Dollar_Value SE_training
## [1,] "forecast_mean1_mean" "22.3369863013699" "521.676712328767"
## [2,] "forecast_naive1_mean" "2635" "113"
## [3,] "forecast_snaive1_mean" "1052" "263"
## [4,] "forecast_rwf1_mean" "2635" "113"
## [5,] "forecast_ses1_mean" "2622.05997429813" "97.3618260290541"
## [6,] "forecast_holt1_mean" "4129.77988220796" "97.2738895722538"
## [7,] "forecast_holtD1_mean" "2979.31803421215" "118.395238630094"
## [8,] "forecast_arima1_mean" "4.01875180487877" "99.0825561353301"
## Arima_p_d_q_P_D_Q_Lag
## [1,] "_"
## [2,] "_"
## [3,] "_"
## [4,] "_"
## [5,] "_"
## [6,] "_"
## [7,] "_"
## [8,] "0, 0, 2"
See table of all models’ forecast ($ value) and sum of Squared Errors in the table which follows the charts. The selected model is Damped Holt : with a sum of squared error terms in the training period of 110223.39. The forecast for the month of May 2010 under this model is 14695.34
autoplot(atm_ts[[4]]) +
autolayer(meanf(atm_ts[[4]] , h=31),series="Mean", PI=FALSE) +
autolayer(naive(atm_ts[[4]] , h=31),series="Naïve", PI=FALSE) +
autolayer(snaive(atm_ts[[4]] , h=31),series="Seasonal naïve", PI=FALSE) +
autolayer(rwf(atm_ts[[4]], drift=TRUE, h=31),series="Drift", PI=FALSE) +
ggtitle("ATM Cash Forecast") +
xlab("Year") + ylab("$ daily cash") +
guides(colour=guide_legend(title="Forecast"))
atm4_arima_fit <-auto.arima(atm_ts[[4]])
autoplot(atm_ts[[4]]) +
autolayer(forecast(atm4_arima_fit,h=31),series="Arima", PI=FALSE)+
autolayer(ses(atm_ts[[4]], h=31),series="SES", PI=FALSE) +
autolayer(holt(atm_ts[[4]] , h=31),series="Holt", PI=FALSE) +
autolayer(holt(atm_ts[[4]],damped=TRUE, h=31),series="Holt Damped", PI=FALSE) +
autolayer(hw(atm_ts[[4]], seasonal="additive", h=31),series="HW", PI=FALSE) +
ggtitle("ATM Cash Forecast") +
xlab("Year") + ylab("$ daily cash") +
guides(colour=guide_legend(title="Forecast"))
atm4_mean1 <- meanf(atm_ts[[4]], h=31)
atm4_naive1 <- naive(atm_ts[[4]], h=31)
atm4_snaive1 <- snaive(atm_ts[[4]], h=31)
atm4_rwf1 <- rwf(atm_ts[[4]],series="Drift", h=31)
atm4_ses1 <- ses(atm_ts[[4]], h=31)
atm4_holt1 <- holt(atm_ts[[4]], h=31)
atm4_holtD1 <- holt(atm_ts[[4]], damped=TRUE,h=31)
atm4_holtSeasonal <- hw(atm_ts[[4]],seasonal="additive")
atm4_arima <- forecast(atm4_arima_fit,h=31)
arimaordertext <- arimaorder(atm4_arima_fit)
listarima <- toString(arimaordertext)
forecast_mean1_rmse<-sum(sqrt((atm4_mean1$residuals)^2),na.rm=TRUE)
forecast_naive1_rmse<-sum(sqrt((atm4_naive1$residuals)^2),na.rm=TRUE)
forecast_snaive1_rmse<-sum(sqrt((atm4_snaive1$model$residuals)^2),na.rm=TRUE)
forecast_rwf1_rmse<-sum(sqrt((atm4_rwf1$residuals)^2),na.rm=TRUE)
forecast_ses1_rmse<-sum(sqrt((atm4_holt1$residuals)^2))
forecast_holt1_rmse<-sum(sqrt((atm4_holtD1$residuals)^2))
forecast_holtD1_rmse<-sum(sqrt((atm4_holtSeasonal$model$residuals)^2))
forecast_arima1_rmse<-sum(sqrt((atm4_arima$model$residuals)^2))
forecast_mean1_mean <- sum(atm4_mean1$mean)
forecast_naive1_mean <- sum(atm4_naive1$mean)
forecast_snaive1_mean <- sum(atm4_snaive1$mean)
forecast_rwf1_mean <- sum(atm4_rwf1$mean)
forecast_ses1_mean <- sum(atm4_ses1$mean)
forecast_holt1_mean <- sum(atm4_holt1$mean)
forecast_holtD1_mean <- sum(atm4_holtD1$mean)
forecast_arima1_mean <- sum(atm4_arima$mean)
forecast_method<-c('forecast_mean1_mean','forecast_naive1_mean','forecast_snaive1_mean','forecast_rwf1_mean','forecast_ses1_mean','forecast_holt1_mean','forecast_holtD1_mean','forecast_arima1_mean')
Dollar_Value<-c(forecast_mean1_mean,forecast_naive1_mean,forecast_snaive1_mean,forecast_rwf1_mean,forecast_ses1_mean,
forecast_holt1_mean,forecast_holtD1_mean,forecast_arima1_mean)
SE_training <- c(forecast_mean1_rmse,forecast_naive1_rmse,forecast_snaive1_rmse,forecast_rwf1_rmse,forecast_ses1_rmse,forecast_holt1_rmse,forecast_holtD1_rmse,forecast_arima1_rmse)
Arima_p_d_q_P_D_Q_Lag <- c('_','_','_','_','_','_','_',listarima)
combo_atm4<-cbind(forecast_method,Dollar_Value,SE_training,Arima_p_d_q_P_D_Q_Lag)
combo_atm4
## forecast_method Dollar_Value SE_training
## [1,] "forecast_mean1_mean" "14695.3436932973" "118114.034183585"
## [2,] "forecast_naive1_mean" "14950.9003133572" "161580.008639483"
## [3,] "forecast_snaive1_mean" "8311.58214140346" "143878.142831795"
## [4,] "forecast_rwf1_mean" "14950.9003133572" "161580.008639483"
## [5,] "forecast_ses1_mean" "14892.3414739461" "119231.534879963"
## [6,] "forecast_holt1_mean" "14894.5134000452" "119319.218617779"
## [7,] "forecast_holtD1_mean" "15041.4019748509" "110223.396151049"
## [8,] "forecast_arima1_mean" "14695.343693302" "118114.034183593"
## Arima_p_d_q_P_D_Q_Lag
## [1,] "_"
## [2,] "_"
## [3,] "_"
## [4,] "_"
## [5,] "_"
## [6,] "_"
## [7,] "_"
## [8,] "0, 0, 0"
Part B consists of a simple dataset of residential power usage for January 1998 until December 2013. Your assignment is to model these data and a monthly forecast for 2014. The data is given in a single file. The variable ‘KWH’ is power consumption in Kilowatt hours, the rest is straight forward. Add this to your existing files above.
The data has been cleaned up in Excel. It only contains a date and a numeric column. Date has been re-formatted so that it can be read directly by “ts” function without any further manipulations. The September 2008 electricity consumption which is missing has been replaced with 0.
Answer: We fit a list of candidate models for each of the time series. The models are : mean, naive, seasonal naive, random walk with a drift, simple exponential, Holt, Holt Damped, Holt-Winters and finally ARIMA (using auto-arima). We then select the forecast from the model with the lowest sum of errors squared (where error is defined as the difference between fit and an actual observation). All model forecasts have been charted on two graphs.
We select the model with the lowest RMSE which happens to be the ARIMA(1,0,0)(0,1,1)[12] with a drift model. Under this model the yearly forecast yields (sum of 12 months is): 87,824,569.
getwd()
## [1] "/Users/stepniak/624"
setwd("/Users/stepniak/624")
power_data <- read_excel('/Users/stepniak/624/ResidentialCustomerForecastLoad-624.xlsx', col_names=TRUE,
col_types=c('date','numeric'))
power_data_ts <- ts(power_data$KWH, start=c(1998, 1), frequency=12)
autoplot(power_data_ts) +
autolayer(meanf(power_data_ts , h=12),series="Mean", PI=FALSE) +
autolayer(naive(power_data_ts , h=12),series="Naïve", PI=FALSE) +
autolayer(snaive(power_data_ts , h=12),series="Seasonal naïve", PI=FALSE) +
autolayer(rwf(power_data_ts, drift=TRUE, h=12),series="Drift", PI=FALSE) +
ggtitle("ATM Cash Forecast") +
xlab("Year") + ylab("$ yearly electricity consumption") +
guides(colour=guide_legend(title="Forecast"))
power_arima_fit <-auto.arima(power_data_ts, approximation = FALSE, stepwise = FALSE)
autoplot(power_data_ts) +
autolayer(forecast(power_arima_fit,h=12),series="Arima", PI=FALSE)+
autolayer(ses(power_data_ts, h=12),series="SES", PI=FALSE) +
autolayer(holt(power_data_ts , h=12),series="Holt", PI=FALSE) +
autolayer(holt(power_data_ts,damped=TRUE, h=12),series="Holt Damped", PI=FALSE) +
autolayer(hw(power_data_ts, seasonal="additive", h=12),series="HW", PI=FALSE) +
ggtitle("ATM Cash Forecast") +
xlab("Year") + ylab("$ yearly electricity consumption") +
guides(colour=guide_legend(title="Forecast"))
power_mean1 <- meanf(power_data_ts, h=12)
power_naive1 <- naive(power_data_ts, h=12)
power_snaive1 <- snaive(power_data_ts, h=12)
power_rwf1 <- rwf(power_data_ts,series="Drift", h=12)
power_ses1 <- ses(power_data_ts, h=12)
power_holt1 <- holt(power_data_ts, h=12)
power_holtD1 <- holt(power_data_ts, damped=TRUE,h=12)
power_holtSeasonal <- hw(power_data_ts,seasonal="additive")
power_arima <- forecast(power_arima_fit,h=12)
arimaordertext <- arimaorder(power_arima_fit)
listarima <- toString(arimaordertext)
forecast_mean1_rmse<-sum(sqrt((power_mean1$residuals)^2),na.rm=TRUE)
forecast_naive1_rmse<-sum(sqrt((power_naive1$residuals)^2),na.rm=TRUE)
forecast_snaive1_rmse<-sum(sqrt((power_snaive1$model$residuals)^2),na.rm=TRUE)
forecast_rwf1_rmse<-sum(sqrt((power_rwf1$residuals)^2),na.rm=TRUE)
forecast_ses1_rmse<-sum(sqrt((power_holt1$residuals)^2))
forecast_holt1_rmse<-sum(sqrt((power_holtD1$residuals)^2))
forecast_holtD1_rmse<-sum(sqrt((power_holtSeasonal$model$residuals)^2))
forecast_arima1_rmse<-sum(sqrt((power_arima$model$residuals)^2))
forecast_mean1_mean <- sum(power_mean1$mean)
forecast_naive1_mean <- sum(power_naive1$mean)
forecast_snaive1_mean <- sum(power_snaive1$mean)
forecast_rwf1_mean <- sum(power_rwf1$mean)
forecast_ses1_mean <- sum(power_ses1$mean)
forecast_holt1_mean <- sum(power_holt1$mean)
forecast_holtD1_mean <- sum(power_holtD1$mean)
forecast_arima1_mean <- sum(power_arima$mean)
forecast_method<-c('forecast_mean1_mean','forecast_naive1_mean','forecast_snaive1_mean','forecast_rwf1_mean','forecast_ses1_mean','forecast_holt1_mean','forecast_holtD1_mean','forecast_arima1_mean')
Dollar_Value<-c(forecast_mean1_mean,forecast_naive1_mean,forecast_snaive1_mean,forecast_rwf1_mean,forecast_ses1_mean,
forecast_holt1_mean,forecast_holtD1_mean,forecast_arima1_mean)
SE_training <- c(forecast_mean1_rmse,forecast_naive1_rmse,forecast_snaive1_rmse,forecast_rwf1_rmse,forecast_ses1_rmse,forecast_holt1_rmse,forecast_holtD1_rmse,forecast_arima1_rmse)
Arima_p_d_q_P_D_Q_Lag <- c('_','_','_','_','_','_','_',listarima)
combo_power<-cbind(forecast_method,Dollar_Value,SE_training,Arima_p_d_q_P_D_Q_Lag)
combo_power
## forecast_method Dollar_Value SE_training
## [1,] "forecast_mean1_mean" "77623290.375" "233159492.625"
## [2,] "forecast_naive1_mean" "115275648" "238656085"
## [3,] "forecast_snaive1_mean" "91420024" "138546501"
## [4,] "forecast_rwf1_mean" "115275648" "238656085"
## [5,] "forecast_ses1_mean" "86386896.2767882" "245427645.181117"
## [6,] "forecast_holt1_mean" "91052014.6007181" "232675515.931883"
## [7,] "forecast_holtD1_mean" "88210558.1634732" "107740390.057433"
## [8,] "forecast_arima1_mean" "87824568.5861053" "103142818.962092"
## Arima_p_d_q_P_D_Q_Lag
## [1,] "_"
## [2,] "_"
## [3,] "_"
## [4,] "_"
## [5,] "_"
## [6,] "_"
## [7,] "_"
## [8,] "1, 0, 0, 0, 1, 1, 12"
Monthly Forecasts under the choses ARIMA model:
power_arima$mean
## Jan Feb Mar Apr May Jun Jul Aug Sep
## 2014 9692637 8085636 6739297 5990966 5734489 7420403 7968013 9134204 7749330
## Oct Nov Dec
## 2014 6116444 5709947 7483203