Install Packages
# Clear the workspace
rm(list = ls()) # Clear environment
gc() # Clear unused memory
cat("\f") # Clear the console
library("feasts")
library("seasonal")
library("tsibble")
library("tsibbledata")
library("dplyr")
library("ggplot2")
library("forecast")
library("fable")
library("weathermetrics")
library("seasonalview")
library("tibbletime")
library("fpp3")
library("fma")
library("fpp")
library("fpp2")
library("grid")
library("gridExtra")
library("lubridate")
library("seasonal")
library("zoo")
library("psych")
library("wesanderson")
library("leaflet")
setwd("/Users/spoll/OneDrive/Documents/Boston College/Predictive Analytics_Forecasting/Week 8 Final")
Load Data
raw_reserve <- read.csv("air_reserve.csv"
, check.names = FALSE
, stringsAsFactors = FALSE
, na.strings = ""
)
raw_visit <- read.csv("air_visit_data.csv"
, check.names = FALSE
, stringsAsFactors = FALSE
, na.strings = ""
)
raw_store <- read.csv("air_store_info.csv"
, check.names = FALSE
, stringsAsFactors = FALSE
, na.strings = ""
)
raw_date <- read.csv("date_info.csv"
, check.names = FALSE
, stringsAsFactors = FALSE
, na.strings = ""
)
raw_hpg_reserve <- read.csv("hpg_reserve.csv"
, check.names = FALSE
, stringsAsFactors = FALSE
, na.strings = ""
)
raw_hpg_store <- read.csv("hpg_store_info.csv"
, check.names = FALSE
, stringsAsFactors = FALSE
, na.strings = ""
)
store_id_relation <- read.csv("store_id_relation.csv"
, check.names = FALSE
, stringsAsFactors = FALSE
, na.strings = ""
)
sample <- read.csv("sample_submission.csv"
, check.names = FALSE
, stringsAsFactors = FALSE
, na.strings = ""
)
# sample$id <- gsub("[_]", " ", sample$id)
# sample <- sample %>% separate(id, c('system', 'id', 'date'), pattern )
# sample1 <- separate(sample, id, c('system', 'id', 'date'), sep = " ")
# sample <- sample1 %>%
# dplyr::select("date", "visitors")
# sample$date <- as.Date(sample$date)
# sample <- sample %>% distinct(.keep_all = TRUE)
# sample_ts <- ts(sample, frequency = 12)
Summary of Data
summary(raw_reserve)
## air_store_id visit_datetime reserve_datetime reserve_visitors
## Length:92378 Length:92378 Length:92378 Min. : 1.000
## Class :character Class :character Class :character 1st Qu.: 2.000
## Mode :character Mode :character Mode :character Median : 3.000
## Mean : 4.482
## 3rd Qu.: 5.000
## Max. :100.000
summary(raw_visit)
## air_store_id visit_date visitors
## Length:252108 Length:252108 Min. : 1.00
## Class :character Class :character 1st Qu.: 9.00
## Mode :character Mode :character Median : 17.00
## Mean : 20.97
## 3rd Qu.: 29.00
## Max. :877.00
summary(raw_store)
## air_store_id air_genre_name air_area_name latitude
## Length:829 Length:829 Length:829 Min. :33.21
## Class :character Class :character Class :character 1st Qu.:34.70
## Mode :character Mode :character Mode :character Median :35.66
## Mean :35.65
## 3rd Qu.:35.69
## Max. :44.02
## longitude
## Min. :130.2
## 1st Qu.:135.3
## Median :139.7
## Mean :137.4
## 3rd Qu.:139.8
## Max. :144.3
summary(raw_date)
## calendar_date day_of_week holiday_flg
## Length:517 Length:517 Min. :0.0000
## Class :character Class :character 1st Qu.:0.0000
## Mode :character Mode :character Median :0.0000
## Mean :0.0677
## 3rd Qu.:0.0000
## Max. :1.0000
summary(raw_hpg_reserve)
## hpg_store_id visit_datetime reserve_datetime reserve_visitors
## Length:1048575 Length:1048575 Length:1048575 Min. : 1.000
## Class :character Class :character Class :character 1st Qu.: 2.000
## Mode :character Mode :character Mode :character Median : 3.000
## Mean : 4.889
## 3rd Qu.: 6.000
## Max. :100.000
summary(raw_hpg_store)
## hpg_store_id hpg_genre_name hpg_area_name latitude
## Length:4690 Length:4690 Length:4690 Min. :33.31
## Class :character Class :character Class :character 1st Qu.:34.69
## Mode :character Mode :character Mode :character Median :35.66
## Mean :35.81
## 3rd Qu.:35.70
## Max. :43.77
## longitude
## Min. :130.3
## 1st Qu.:135.5
## Median :139.5
## Mean :137.7
## 3rd Qu.:139.7
## Max. :143.7
summary(store_id_relation)
## air_store_id hpg_store_id
## Length:150 Length:150
## Class :character Class :character
## Mode :character Mode :character
Descriptive Statistics About the Data
describe(raw_reserve)
## vars n mean sd median trimmed mad min max
## air_store_id* 1 92378 151.34 92.55 147 150.44 121.57 1 314
## visit_datetime* 2 92378 2958.96 1188.51 3079 3054.18 1214.25 1 4975
## reserve_datetime* 3 92378 4806.34 1942.51 5075 5004.60 1752.43 1 7513
## reserve_visitors 4 92378 4.48 4.92 3 3.39 1.48 1 100
## range skew kurtosis se
## air_store_id* 313 0.10 -1.30 0.30
## visit_datetime* 4974 -0.60 -0.39 3.91
## reserve_datetime* 7512 -0.80 -0.19 6.39
## reserve_visitors 99 4.65 33.52 0.02
describe(raw_visit)
## vars n mean sd median trimmed mad min max range
## air_store_id* 1 252108 413.70 239.70 407 413.21 309.86 1 829 828
## visit_date* 2 252108 239.75 142.68 227 238.40 186.81 1 478 477
## visitors 3 252108 20.97 16.76 17 18.82 13.34 1 877 876
## skew kurtosis se
## air_store_id* 0.02 -1.21 0.48
## visit_date* 0.13 -1.30 0.28
## visitors 3.31 74.26 0.03
describe(raw_store)
## vars n mean sd median trimmed mad min max
## air_store_id* 1 829 415.00 239.46 415.00 415.00 306.90 1.00 829.00
## air_genre_name* 2 829 6.26 3.13 7.00 6.02 2.97 1.00 14.00
## air_area_name* 3 829 54.10 31.14 58.00 54.90 43.00 1.00 103.00
## latitude 4 829 35.65 2.08 35.66 35.26 0.10 33.21 44.02
## longitude 5 829 137.42 3.65 139.69 137.82 0.20 130.20 144.27
## range skew kurtosis se
## air_store_id* 828.00 0.00 -1.20 8.32
## air_genre_name* 13.00 0.45 -0.41 0.11
## air_area_name* 102.00 -0.17 -1.34 1.08
## latitude 10.81 2.65 7.44 0.07
## longitude 14.08 -0.93 -0.55 0.13
describe(raw_date)
## vars n mean sd median trimmed mad min max range skew
## calendar_date* 1 517 259.00 149.39 259 259 191.26 1 517 516 0.00
## day_of_week* 2 517 4.00 2.00 4 4 2.97 1 7 6 0.00
## holiday_flg 3 517 0.07 0.25 0 0 0.00 0 1 1 3.43
## kurtosis se
## calendar_date* -1.21 6.57
## day_of_week* -1.26 0.09
## holiday_flg 9.79 0.01
describe(raw_hpg_reserve)
## vars n mean sd median trimmed mad min max
## hpg_store_id* 1 1048575 6382.07 3674.70 6387 6385.96 4722.08 1 12721
## visit_datetime* 2 1048575 3210.05 1954.17 3043 3176.60 2612.34 1 6582
## reserve_datetime* 3 1048575 4044.90 2325.80 4032 4041.95 3089.74 1 7998
## reserve_visitors 4 1048575 4.89 5.09 3 3.81 1.48 1 100
## range skew kurtosis se
## hpg_store_id* 12720 -0.01 -1.21 3.59
## visit_datetime* 6581 0.13 -1.30 1.91
## reserve_datetime* 7997 0.01 -1.25 2.27
## reserve_visitors 99 4.72 38.61 0.00
describe(raw_hpg_store)
## vars n mean sd median trimmed mad min
## hpg_store_id* 1 4690 2345.50 1354.03 2345.50 2345.50 1738.35 1.00
## hpg_genre_name* 2 4690 14.85 5.08 16.00 14.65 5.93 1.00
## hpg_area_name* 3 4690 66.05 35.10 70.00 67.16 45.96 1.00
## latitude 4 4690 35.81 2.14 35.66 35.38 1.01 33.31
## longitude 5 4690 137.68 3.20 139.50 138.09 0.78 130.34
## max range skew kurtosis se
## hpg_store_id* 4690.00 4689.00 0.00 -1.20 19.77
## hpg_genre_name* 34.00 33.00 0.41 0.47 0.07
## hpg_area_name* 119.00 118.00 -0.19 -1.21 0.51
## latitude 43.77 10.46 2.50 6.04 0.03
## longitude 143.71 13.38 -0.98 -0.14 0.05
describe(store_id_relation)
## vars n mean sd median trimmed mad min max range skew
## air_store_id* 1 150 75.5 43.45 75.5 75.5 55.6 1 150 149 0
## hpg_store_id* 2 150 75.5 43.45 75.5 75.5 55.6 1 150 149 0
## kurtosis se
## air_store_id* -1.22 3.55
## hpg_store_id* -1.22 3.55
Create a Time Series For Air Data
# Air Agg Time Series
air_agg <- aggregate(reserve_visitors ~ visit_datetime
, data = raw_reserve
, FUN = sum)
air_agg$date <- as.Date(air_agg$visit_datetime)
air_agg <- air_agg %>%
dplyr::select("date", "reserve_visitors")
class(air_agg$date)
## [1] "Date"
air_agg <- aggregate(reserve_visitors ~ date
, data = air_agg
, FUN = sum)
air_ts <- ts(air_agg)
summary(air_ts[,"reserve_visitors"])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.0 245.0 587.0 956.2 1303.0 4742.0
air_plot <- autoplot(air_ts[,"reserve_visitors"]) +
labs(title="Air Reservations Time Series") +
ylab("Reservations") +
theme(plot.title = element_text(hjust = 0.5))
air_agg <- head(air_agg, nrow(air_agg)-20) %>%
dplyr::select("date", "reserve_visitors")
air_test <- tail(air_agg, 20) %>%
dplyr::select("date", "reserve_visitors")
air_test_ts <- ts(air_test$reserve_visitors, frequency = 365)
Create a Time Series For HPG Data
# HPG Agg Time Series
hpg_agg <- aggregate(reserve_visitors ~ visit_datetime
, data = raw_hpg_reserve
, FUN = sum)
hpg_agg$date <- as.Date(hpg_agg$visit_datetime)
hpg_agg <- hpg_agg %>%
dplyr::select("date", "reserve_visitors")
class(hpg_agg$date)
## [1] "Date"
hpg_agg <- aggregate(reserve_visitors ~ date
, data = hpg_agg
, FUN = sum)
hpg_ts <- ts(hpg_agg)
summary(hpg_ts[,"reserve_visitors"])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 493 8094 10270 13153 19673 31062
hpg_plot <- autoplot(hpg_ts[,"reserve_visitors"]) +
labs(title="HPG Reservations Time Series") +
ylab("Reservations") +
theme(plot.title = element_text(hjust = 0.5))
Create a Univariate Time Series For Air Data
# UNIVARIATE TIME SERIES-----------------------------------------
# Air Univariate Time Series
air_ts2 <- ts(subset(air_agg
, select = "reserve_visitors")
, frequency = 2)
air_decomp_add <- decompose(air_ts2, type = "additive")
air_decomp_mult <- decompose(air_ts2, type = "multiplicative")
plot(air_decomp_add)

plot(air_decomp_mult)

ggtsdisplay(air_ts2) #confirmed diagnostics

Create a Univariate Time Series For HPG Data
# HPG Univariate Time Series
hpg_ts2 <- ts(subset(hpg_agg
, select = "reserve_visitors")
, frequency = 2)
hpg_decomp_add <- decompose(hpg_ts2, type = "additive")
hpg_decomp_mult <- decompose(hpg_ts2, type = "multiplicative")
plot(hpg_decomp_add)

plot(hpg_decomp_mult)

ggtsdisplay(hpg_ts2) #confirmed diagnostics

Data Exploration and Visualizations
visitplot <- plot(visitors ~ visit_date
, data = raw_visit
, main="Time Series of Air Visitors"
, xlab="Date"
, ylab="Number of Visitors"
, col="red")

wkday_visits <- raw_visit %>%
mutate(day = wday(visit_date
, label = TRUE)) %>%
group_by(day) %>%
summarise(visits = mean(visitors)) %>%
ggplot(aes(day, visits
, fill = day)) +
geom_col(colour="black") +
labs(main = "Visits per Day"
, x = "Day"
, y = "Visits") +
scale_fill_manual(values=wes_palette(n=12
, name="GrandBudapest1"
, type = "continuous"))
wkday_visits

monthly_visits <- raw_visit %>%
mutate(month = month(visit_date
, label = TRUE)) %>%
group_by(month) %>%
summarise(visits = mean(visitors)) %>%
ggplot(aes(month
, visits
, fill = month)) +
geom_col(colour="black") +
labs(x = "Month"
, y = "Visits"
, main="Visits Per Month") +
scale_fill_manual(values=wes_palette(n=12
, name="GrandBudapest1"
, type = "continuous"))
monthly_visits

leaflet(raw_store) %>%
addTiles() %>%
addProviderTiles("CartoDB.Positron") %>%
addMarkers(~longitude, ~latitude,
popup = ~air_store_id, label = ~air_genre_name,
clusterOptions = markerClusterOptions())
raw_hpg_store %>%
group_by(hpg_genre_name) %>%
summarise(total = n()) %>%
ggplot(aes(reorder(hpg_genre_name
, total
, FUN = min)
, total
, fill = hpg_genre_name)) +
geom_col() +
coord_flip() +
theme(legend.position = "none") +
scale_fill_manual(values=wes_palette(n=34
, name="GrandBudapest1"
, type = "continuous")) +
ylab("Restaurant Style")

Time Series Plots
# Combine Time Series Plots
grid.arrange(air_plot, hpg_plot, nrow = 2)

Air Linear Regressions
# LINEAR REGRESSIONS ---------------------
# Air Reservation Linear Regression
air_lm <- tslm(reserve_visitors ~ date
, data = air_ts)
air_lm_plot <- autoplot(air_ts[,"reserve_visitors"],
main = "Air Reservations Linear Regression") +
autolayer(fitted(air_lm)
, series = "Fitted Linear Regression") +
ylab("Reservations") +
theme(plot.title = element_text(hjust = 0.5))
checkresiduals(air_lm)

##
## Breusch-Godfrey test for serial correlation of order up to 10
##
## data: Residuals from Linear regression model
## LM test = 321.56, df = 10, p-value < 2.2e-16
HPG Linear Regression
# Air Linear Regression
hpg_lm <- tslm(reserve_visitors ~ date
, data = hpg_ts)
hpg_lm_plot <- autoplot(hpg_ts[,"reserve_visitors"],
main = "HPG Reservations Linear Regression") +
autolayer(fitted(hpg_lm)
, series = "Fitted Linear Regression") +
ylab("Reservations") +
theme(plot.title = element_text(hjust = 0.5))
checkresiduals(hpg_lm)

##
## Breusch-Godfrey test for serial correlation of order up to 10
##
## data: Residuals from Linear regression model
## LM test = 44.971, df = 10, p-value = 2.201e-06
# Combine Linear Reg Plots
grid.arrange(air_lm_plot, hpg_lm_plot, nrow = 2)

Linear Regression Forecasts
newdata <- data.frame(date = c(141:150))
forecast(air_lm, newdata = newdata)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 434 -49174.97 -54717.91 -43632.03 -57663.01 -40686.94
## 435 -49172.01 -54714.63 -43629.39 -57659.56 -40684.46
## 436 -49169.05 -54711.36 -43626.74 -57656.12 -40681.98
## 437 -49166.08 -54708.08 -43624.09 -57652.67 -40679.50
## 438 -49163.12 -54704.80 -43621.44 -57649.23 -40677.01
## 439 -49160.16 -54701.52 -43618.79 -57645.78 -40674.53
## 440 -49157.20 -54698.24 -43616.15 -57642.34 -40672.05
## 441 -49154.23 -54694.97 -43613.50 -57638.89 -40669.57
## 442 -49151.27 -54691.69 -43610.85 -57635.45 -40667.09
## 443 -49148.31 -54688.41 -43608.20 -57632.00 -40664.61
forecast(hpg_lm, newdata = newdata)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 133 1479089 993299.1 1964878 732967.9 2225209
## 134 1479091 993300.5 1964881 732968.9 2225212
## 135 1479093 993301.8 1964883 732969.9 2225215
## 136 1479095 993303.2 1964886 732970.9 2225218
## 137 1479097 993304.6 1964889 732971.9 2225222
## 138 1479099 993305.9 1964892 732972.9 2225225
## 139 1479101 993307.3 1964894 732973.9 2225228
## 140 1479103 993308.7 1964897 732974.9 2225231
## 141 1479105 993310.0 1964900 732975.9 2225234
## 142 1479107 993311.4 1964902 732976.9 2225237
air_lm_acc <- accuracy(air_lm)
hpg_lm_acc <- accuracy(hpg_lm)
Air Holt-Winters’
# HOLT-WINTERS' ---------------------------
# HOLT-WINTERS' AIR RESERVATIONS
air_hw <- hw(air_ts2, seasonal = "additive")
air_hw2 <- hw(air_ts2, seasonal = "multiplicative")
air_hw_plot <- autoplot(air_ts2,
main = "Air Reservations Holt-Winters'") +
autolayer(fitted(air_hw), series = "Fitted Additive") +
autolayer(fitted(air_hw2), series = "Fitted Multiplicative") +
ylab("Air Reservations") +
theme(plot.title = element_text(hjust = 0.5))
HPG Holt-Winters’
# HOLT-WINTERS' HPG RESERVATIONS
hpg_hw <- hw(hpg_ts2, seasonal = "additive")
hpg_hw2 <- hw(hpg_ts2, seasonal = "multiplicative")
hpg_hw_plot <- autoplot(hpg_ts2,
main = "HPG Reservations Holt-Winters'") +
autolayer(fitted(hpg_hw), series = "Fitted Additive") +
autolayer(fitted(hpg_hw2), series = "Fitted Multiplicative") +
ylab("Air Reservations") +
theme(plot.title = element_text(hjust = 0.5))
grid.arrange(air_hw_plot, hpg_hw_plot, nrow = 2)

Holt-Winters’ Accuracy
air_hw_acc <- accuracy(air_hw)
air_hw2_acc <- accuracy(air_hw2)
hpg_hw_acc<- accuracy(hpg_hw)
hpg_hw2_acc <- accuracy(hpg_hw2)
Air Holt-Winters’ Damped
# HOLT-WINTERS' ---------------------------
# HOLT-WINTERS' DAMPED AIR RESERVATIONS
air_hwd <- hw(air_ts2, seasonal = "additive")
air_hwd2 <- hw(air_ts2, seasonal = "multiplicative")
air_hwd_plot <- autoplot(air_ts2,
main = "Air Reservations Holt-Winters' Damped") +
autolayer(fitted(air_hwd), series = "Fitted Additive") +
autolayer(fitted(air_hwd2), series = "Fitted Multiplicative") +
ylab("Air Reservations") +
theme(plot.title = element_text(hjust = 0.5))
HPG Holt-Winters’ Damped
hpg_hwd <- hw(hpg_ts2, seasonal = "additive")
hpg_hwd2 <- hw(hpg_ts2, seasonal = "multiplicative")
hpg_hwd_plot <- autoplot(hpg_ts2,
main = "HPG Reservations Holt-Winters'Damped") +
autolayer(fitted(hpg_hwd), series = "Fitted Additive") +
autolayer(fitted(hpg_hwd2), series = "Fitted Multiplicative") +
ylab("Air Reservations") +
theme(plot.title = element_text(hjust = 0.5))
grid.arrange(air_hwd_plot
, hpg_hwd_plot
, nrow = 2)

Holt-Winters’ Damped Accuracy
air_hwd_acc <- accuracy(air_hwd)
air_hwd2_acc <- accuracy(air_hwd2)
hpg_hwd_acc <- accuracy(hpg_hwd)
hpg_hwd2_acc <- accuracy(hpg_hwd2)
Air ARIMA
# ARIMA MODELS --------------
# Air ARIMA
air_arima <- auto.arima(air_ts2)
checkresiduals(air_arima)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,1)(2,0,0)[2]
## Q* = 105.6, df = 3, p-value < 2.2e-16
##
## Model df: 4. Total lags used: 7
air_arima_acc <- accuracy(air_arima)
air_arima_fc <- forecast(air_arima, h = 12)
accuracy(air_arima_fc)
## ME RMSE MAE MPE MAPE MASE
## Training set 1.866716 543.3904 350.7981 -61.12175 84.65856 0.5926106
## ACF1
## Training set 0.004705909
air_arima_fc_plot <- autoplot(air_arima_fc) +
labs(title = "Air ARIMA Forecast using ARIMA(1,1,1)(2,0,0)[2]") +
ylab("Air Reservations") +
theme(plot.title = element_text(hjust = 0.5))
HPG ARIMA
# ARIMA MODELS --------------
# HPG ARIMA
hpg_arima <- auto.arima(hpg_ts2)
checkresiduals(hpg_arima)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,1)(0,0,1)[2]
## Q* = 13.217, df = 3, p-value = 0.00419
##
## Model df: 3. Total lags used: 6
hpg_arima_acc <- accuracy(hpg_arima)
hpg_arima_fc <- forecast(hpg_arima, h = 12)
accuracy(hpg_arima_fc)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1139.111 6541.987 4866.718 -6.582705 36.316 0.613335 -0.006800734
hpg_arima_fc_plot <- autoplot(hpg_arima_fc) +
labs(title = "HPG ARIMA Forecast using ARIMA(1,1,1)(0,0,1)[2]") +
ylab("Air Reservations") +
theme(plot.title = element_text(hjust = 0.5))
# Combine Plots
grid.arrange(air_arima_fc_plot
, hpg_arima_fc_plot
, nrow = 2)

Air ETS
# ETS MODELS -------------------------
# Air ETS
air_ets <- ets(air_ts2)
air_ets_acc <- accuracy(air_ets)
checkresiduals(air_ets)

##
## Ljung-Box test
##
## data: Residuals from ETS(M,A,A)
## Q* = 96.639, df = 3, p-value < 2.2e-16
##
## Model df: 6. Total lags used: 9
HPG ETS
# HPG ETS
hpg_ets <- ets(hpg_ts2)
hpg_ets_acc <- accuracy(hpg_ets)
checkresiduals(hpg_ets)

##
## Ljung-Box test
##
## data: Residuals from ETS(A,Ad,N)
## Q* = 55.749, df = 3, p-value = 4.751e-12
##
## Model df: 5. Total lags used: 8
Air ETS Forecast
# ETS FORECASTS ------------------------------
# Air ETS
air_ets_fc <- forecast(air_ets, h = 12)
accuracy(air_ets_fc)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -147.3028 658.9757 469.1776 -191.0366 205.9678 0.7925916 0.3235844
air_ets_fc_plot <- air_ets_fc %>% autoplot() +
labs(title="Air Reservations Forecast Using ETS(M,A,A)") +
ylab("Reservations") +
theme(plot.title = element_text(hjust = 0.5))
HPG ETS Forecast and Combined Plots
# ETS FORECASTS ------------------------------
# HPG ETS
hpg_ets_fc <- forecast(hpg_ets, h = 12)
accuracy(hpg_ets_fc)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 99.5092 6794.227 5854.746 -20.73566 49.281 0.7378525 0.3200496
hpg_ets_fc_plot <- hpg_ets_fc %>% autoplot() +
labs(title="HPG Reservations Forecast Using ETS(A,Ad,N)") +
ylab("Reservations") +
theme(plot.title = element_text(hjust = 0.5))
grid.arrange(air_ets_fc_plot
, hpg_ets_fc_plot
, nrow = 2)

Air Neural Net
air_nnet <- nnetar(air_ts2)
air_nnet
## Series: air_ts2
## Model: NNAR(22,1,12)[2]
## Call: nnetar(y = air_ts2)
##
## Average of 20 networks, each of which is
## a 22-12-1 network with 289 weights
## options were - linear output units
##
## sigma^2 estimated as 7661
air_nnet_acc <- accuracy(air_nnet)
air_nnet_fc <- forecast(air_nnet, h = 36)
air_nnet_fc
## Point Forecast
## 207.50 455.53373
## 208.00 511.15022
## 208.50 506.62641
## 209.00 440.16070
## 209.50 562.93916
## 210.00 691.72423
## 210.50 1097.90591
## 211.00 2031.25240
## 211.50 1849.76000
## 212.00 1082.21688
## 212.50 796.25988
## 213.00 683.99517
## 213.50 1289.33706
## 214.00 1296.71661
## 214.50 1572.91529
## 215.00 1519.85180
## 215.50 978.78545
## 216.00 520.50875
## 216.50 461.32333
## 217.00 1126.12485
## 217.50 2011.25325
## 218.00 2202.59225
## 218.50 1778.68831
## 219.00 662.41379
## 219.50 107.05203
## 220.00 137.38559
## 220.50 924.94033
## 221.00 1261.61091
## 221.50 1631.96377
## 222.00 1714.17919
## 222.50 860.47873
## 223.00 220.23803
## 223.50 23.89943
## 224.00 825.37037
## 224.50 1268.13648
## 225.00 1337.85238
accuracy(air_nnet_fc)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.1742974 87.52765 41.45957 -73.77621 81.08722 0.07003853
## ACF1
## Training set -0.01203742
air_nnet_fc %>% autoplot() +
labs(title="Air Reservation Forecast using NNAR(21,1,11)[2]") +
ylab("Reservations") +
theme(plot.title = element_text(hjust = 0.5))

HPG Neural Net
hpg_nnet <- nnetar(hpg_ts2)
hpg_nnet
## Series: hpg_ts2
## Model: NNAR(9,1,5)[2]
## Call: nnetar(y = hpg_ts2)
##
## Average of 20 networks, each of which is
## a 9-5-1 network with 56 weights
## options were - linear output units
##
## sigma^2 estimated as 4771184
hpg_nnet_acc <- accuracy(hpg_nnet)
hpg_nnet_fc <- forecast(hpg_nnet, h = 36)
hpg_nnet_fc
## Point Forecast
## 67.00 17226.193
## 67.50 13249.431
## 68.00 11397.835
## 68.50 12393.194
## 69.00 10080.696
## 69.50 16031.470
## 70.00 13862.505
## 70.50 21049.647
## 71.00 22376.994
## 71.50 12287.345
## 72.00 9438.065
## 72.50 10956.497
## 73.00 11267.199
## 73.50 11834.952
## 74.00 20634.336
## 74.50 21707.271
## 75.00 12892.186
## 75.50 12746.047
## 76.00 11857.071
## 76.50 12017.742
## 77.00 10634.955
## 77.50 11545.395
## 78.00 8503.348
## 78.50 14302.802
## 79.00 21482.694
## 79.50 14969.532
## 80.00 8956.686
## 80.50 8341.577
## 81.00 10774.967
## 81.50 13306.269
## 82.00 17325.911
## 82.50 20446.214
## 83.00 14603.830
## 83.50 12150.730
## 84.00 11369.650
## 84.50 11546.530
accuracy(hpg_nnet_fc)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.08987451 2184.304 1429.475 -5.096843 11.75099 0.1801515
## ACF1
## Training set 0.0261221
hpg_nnet_fc %>% autoplot() +
labs(title="HPG Reservation Forecast using NNAR(9,1,5)[2]") +
ylab("Reservations") +
theme(plot.title = element_text(hjust = 0.5))

Compare All Model Accuracies
rbind(air_lm_acc
, hpg_lm_acc
, air_hw_acc
, air_hw2_acc
, hpg_hw_acc
, hpg_hw2_acc
, air_hwd_acc
, air_hwd2_acc
, hpg_hwd_acc
, hpg_hwd2_acc
, air_arima_acc
, hpg_arima_acc
, air_ets_acc
, hpg_ets_acc
, air_nnet_acc
, hpg_nnet_acc
)
## ME RMSE MAE MPE MAPE
## Training set 5.455015e-14 845.10296 585.93570 -982.791825 1011.48091
## Training set 2.711636e-14 6951.25188 6000.01644 -40.641889 66.29348
## Training set -4.379600e+01 642.78592 430.71721 -66.440063 88.80221
## Training set -2.742310e+01 659.46225 415.58743 -35.135517 63.92213
## Training set -3.065214e+02 7259.44865 6122.80775 -23.173570 54.48020
## Training set -4.055501e+02 7314.79362 6208.43179 -28.895602 55.42954
## Training set -4.379600e+01 642.78592 430.71721 -66.440063 88.80221
## Training set -2.742310e+01 659.46225 415.58743 -35.135517 63.92213
## Training set -3.065214e+02 7259.44865 6122.80775 -23.173570 54.48020
## Training set -4.055501e+02 7314.79362 6208.43179 -28.895602 55.42954
## Training set 1.866716e+00 543.39036 350.79810 -61.121754 84.65856
## Training set 1.139111e+03 6541.98664 4866.71849 -6.582705 36.31600
## Training set -1.473028e+02 658.97571 469.17756 -191.036575 205.96783
## Training set 9.950920e+01 6794.22741 5854.74581 -20.735662 49.28100
## Training set -1.742974e-01 87.52765 41.45957 -73.776207 81.08722
## Training set 8.987451e-02 2184.30397 1429.47466 -5.096843 11.75099
## MASE ACF1
## Training set 1.60706650 0.711057938
## Training set 1.10664471 0.350669971
## Training set 0.72761967 0.371255950
## Training set 0.70206061 0.218831401
## Training set 0.77163535 0.329198293
## Training set 0.78242623 0.360976759
## Training set 0.72761967 0.371255950
## Training set 0.70206061 0.218831401
## Training set 0.77163535 0.329198293
## Training set 0.78242623 0.360976759
## Training set 0.59261064 0.004705909
## Training set 0.61333495 -0.006800734
## Training set 0.79259155 0.323584441
## Training set 0.73785247 0.320049566
## Training set 0.07003853 -0.012037424
## Training set 0.18015153 0.026122101
Final Prediction
# Predictions
air_predict <- forecast(air_nnet, h = 36)
air_predict
## Point Forecast
## 207.50 455.53373
## 208.00 511.15022
## 208.50 506.62641
## 209.00 440.16070
## 209.50 562.93916
## 210.00 691.72423
## 210.50 1097.90591
## 211.00 2031.25240
## 211.50 1849.76000
## 212.00 1082.21688
## 212.50 796.25988
## 213.00 683.99517
## 213.50 1289.33706
## 214.00 1296.71661
## 214.50 1572.91529
## 215.00 1519.85180
## 215.50 978.78545
## 216.00 520.50875
## 216.50 461.32333
## 217.00 1126.12485
## 217.50 2011.25325
## 218.00 2202.59225
## 218.50 1778.68831
## 219.00 662.41379
## 219.50 107.05203
## 220.00 137.38559
## 220.50 924.94033
## 221.00 1261.61091
## 221.50 1631.96377
## 222.00 1714.17919
## 222.50 860.47873
## 223.00 220.23803
## 223.50 23.89943
## 224.00 825.37037
## 224.50 1268.13648
## 225.00 1337.85238
air_nnet_fc %>% autoplot(main = "Air Reservations Predictions using NNAR(21,1,11)[2]") +
ylab("Reservation") +
theme(plot.title = element_text(hjust = 0.5))

unique_date <- as.Date(unique(air_agg$date))
final_prediction_output <- as.data.frame(cbind(unique_date, air_predict$x))
colnames(final_prediction_output) <- c("date", "reservations")
final_prediction_output$date <- as.Date(final_prediction_output$date)
write.csv(final_prediction_output, file = "Forecasting_Final_Kaggle_Pollastro.csv", row.names = FALSE)