# setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
#setwd("C:/Users/Kwang/Downloads/Time Series/TS_19")Marriot Room Forecasting case
Marriot Room Forecasting case
Data
Set Work Directory
Load Package
library(readxl)Warning: package 'readxl' was built under R version 4.5.2
library(tidyr)Warning: package 'tidyr' was built under R version 4.5.2
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(lubridate)Warning: package 'lubridate' was built under R version 4.5.2
Attaching package: 'lubridate'
The following objects are masked from 'package:base':
date, intersect, setdiff, union
library(forecast)Warning: package 'forecast' was built under R version 4.5.2
Registered S3 method overwritten by 'quantmod':
method from
as.zoo.data.frame zoo
library(TSA)Warning: package 'TSA' was built under R version 4.5.2
Registered S3 methods overwritten by 'TSA':
method from
fitted.Arima forecast
plot.Arima forecast
Attaching package: 'TSA'
The following objects are masked from 'package:stats':
acf, arima
The following object is masked from 'package:utils':
tar
library(tseries)Warning: package 'tseries' was built under R version 4.5.2
Read & Clean Data
# --- Read Excel sheet 'Exhibit 1' ---
data_all <- read_excel("UV6154-XLS-ENG.xls", sheet = "Exhibit 1")
# Extract rows 1 to 99
data_all <- data_all[1:98, ]
data_all <- data_all %>%
rename(
dow = "DOW INDICATOR1",
demand = DEMAND,
bookings = "TUESDAY BOOKINGS",
pickup = "PICKUP RATIO",
week = "WEEK",
dow_index = "DOW INDEX"
)
# Fill week
data_all <- data_all %>%
fill(week, .direction = "down")
# --- View data ---
head(data_all)# A tibble: 6 × 6
week dow demand bookings pickup dow_index
<chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 1 1 1470 1512 0.972 0.865
2 1 2 870 864 1.01 0.911
3 1 3 986 827 1.19 0.973
4 1 4 1247 952 1.31 1.01
5 1 5 1109 740 1.50 1.07
6 1 6 1197 908 1.32 1.12
# check
data_all$week [1] "1" "1" "1" "1" "1" "1" "1" "2" "2" "2" "2" "2" "2" "2" "3"
[16] "3" "3" "3" "3" "3" "3" "4" "4" "4" "4" "4" "4" "4" "5" "5"
[31] "5" "5" "5" "5" "5" "6" "6" "6" "6" "6" "6" "6" "7" "7" "7"
[46] "7" "7" "7" "7" "8" "8" "8" "8" "8" "8" "8" "9" "9" "9" "9"
[61] "9" "9" "9" "10" "10" "10" "10" "10" "10" "10" "11" "11" "11" "11" "11"
[76] "11" "11" "12" "12" "12" "12" "12" "12" "12" "13" "13" "13" "13" "13" "13"
[91] "13" "14" "14" "14" "14" "14" "14" "14"
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ####
# Number of rows in your dataset
n <- nrow(data_all)
# Create a date vector starting from 1987-05-23
start_date = as.Date("1987-05-23")
date_vector <- seq(
from = start_date,
by = "day",
length.out = n
)
# Add it as a new column
data_all$date <- date_vector
# Move DATE column to the first position
data_all <- data_all[, c("date", setdiff(names(data_all), "date"))]#Add Adjust Pickup Column in this Dataset
data_all <- data_all %>% mutate(adjust_pickup = pickup/dow_index)
data_all# A tibble: 98 × 8
date week dow demand bookings pickup dow_index adjust_pickup
<date> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1987-05-23 1 1 1470 1512 0.972 0.865 1.12
2 1987-05-24 1 2 870 864 1.01 0.911 1.11
3 1987-05-25 1 3 986 827 1.19 0.973 1.23
4 1987-05-26 1 4 1247 952 1.31 1.01 1.29
5 1987-05-27 1 5 1109 740 1.50 1.07 1.40
6 1987-05-28 1 6 1197 908 1.32 1.12 1.17
7 1987-05-29 1 7 1500 1311 1.14 1.05 1.09
8 1987-05-30 2 1 1854 2034 0.912 0.865 1.05
9 1987-05-31 2 2 1489 1584 0.940 0.911 1.03
10 1987-06-01 2 3 1792 1682 1.07 0.973 1.10
# ℹ 88 more rows
EDA
#Demand & Bookings
plot(data_all$date,data_all$demand,"l",col = "blue",ylab = "no. of total room", xlab = "Date", main = 'Demand and Booking VS total room',
ylim = c(700, 2200))
lines(data_all$date,data_all$bookings,col = "black")
abline(h = 1877, col = "deeppink2", lwd = 2, lty = 2)
legend("topright",
legend = c("Demand", "Bookings", "Total Rooms"),
col = c("blue", "black", "deeppink2"),
lty = c(1,1,2),
bty = "n")#Pick up ratio
plot(data_all$date,data_all$pickup,"l",ylab = "Pick up ratio", xlab = "Date")
abline(h = 1, col = "deeppink2", lwd = 1, lty = 2)# จำนวนวันและ % ต่อทั้งหมดที่เกิน 1
cond <- data_all$pickup > 1
days_over_1 <- sum(cond, na.rm = TRUE)
percent_over_1 <- mean(cond, na.rm = TRUE) * 100 #56/87
days_over_1[1] 56
percent_over_1[1] 64.36782
#Dow_index
#Overall
plot(data_all$date,data_all$dow_index,"l",ylab = "Dow index", xlab = "Date")#One week
one_week <- data_all[1:7, ]
plot(one_week$dow, one_week$dow_index, type="l",
xlab="Date", ylab="Dow index")Split Train/Test
#repeat dow index
dow_train_vals <- data_all$dow_index[1:7] # ค่า 7 วันแรก
dow_full <- rep(dow_train_vals, length.out=nrow(data_all))
data_all$DOW_index_full <- dow_full
#ts for all column exclude date
data_num <- data_all[, setdiff(names(data_all), "date")]
ts_all <- ts(data_num, frequency = 7)
#Train
train_end <- as.Date("1987-08-03")
n_train <- sum(data_all$date <= train_end)
#last time of train set
t_train_end <- time(ts_all)[n_train]
#Split train test
train_ts <- window(ts_all, end = t_train_end)
#test_ts <- window(ts_all, start = t_train_end + 1/frequency(ts_all))
# 1) หา index ของแถวที่ pickup_ratio ไม่ใช่ NA
idx_non_na <- which(!is.na(ts_all[,"pickup"]))
# 2) หา index ของวันสุดท้ายที่ไม่ใช่ NA
last_non_na_index <- idx_non_na[length(idx_non_na)]
# 3) แปลง index → time ใน ts
end_time <- time(ts_all)[last_non_na_index]
test_ts <- window(ts_all, start = t_train_end + 1/frequency(ts_all),end = end_time)
#Select Variable
#1.pickup
pickup_train <- ts(train_ts[, "pickup"],
start = start(train_ts),
frequency = frequency(train_ts))
pickup_test <- ts(test_ts[, "pickup"],
start = start(test_ts),
frequency = frequency(test_ts))
n_pickup_test <- length(pickup_test)
#2.adjust pickup (use DOW index)
#adjust pickup
adjust_pickup_train <- ts(train_ts[, "adjust_pickup"],
start = start(train_ts),
frequency = frequency(train_ts))
adjust_pickup_test <- ts(test_ts[, "adjust_pickup"],
start = start(test_ts),
frequency = frequency(test_ts))
n_adjust_pickup_test <- length(adjust_pickup_test)
#dow index
dow_train <- ts(train_ts[, "DOW_index_full"],
start = start(train_ts),
frequency = frequency(train_ts))
dow_test <- ts(test_ts[, "DOW_index_full"],
start = start(test_ts),
frequency = frequency(test_ts))
n_dow_test <- length(dow_test)
#Check
frequency(pickup_train)[1] 7
frequency(pickup_test)[1] 7
start(pickup_train); end(pickup_train)[1] 1 1
[1] 11 3
start(pickup_test); end(pickup_test)[1] 11 4
[1] 13 3
frequency(adjust_pickup_train)[1] 7
frequency(adjust_pickup_test)[1] 7
start(adjust_pickup_train); end(adjust_pickup_train)[1] 1 1
[1] 11 3
start(adjust_pickup_test); end(adjust_pickup_test)[1] 11 4
[1] 13 3
frequency(dow_train)[1] 7
frequency(dow_test)[1] 7
start(dow_train); end(dow_train)[1] 1 1
[1] 11 3
start(dow_test); end(dow_test)[1] 11 4
[1] 13 3
Check Stationary & Decomposition
1.Pickup
# Time series pick ratio plot for train
plot(pickup_train,ylab="Pick up ratio",xlab="Time",ylim = c(0.7, 1.7), main= "Pick up ratio over time")
abline(h = 1, col = "deeppink2", lwd = 1, lty = 2)- Stationary
adf.test(pickup_train) #Fail to Reject H0 -> not stationary
Augmented Dickey-Fuller Test
data: pickup_train
Dickey-Fuller = -3.0237, Lag order = 4, p-value = 0.1583
alternative hypothesis: stationary
par(mfrow = c(2,1))
# --- ACF ---
acf_out <- acf(pickup_train, plot = FALSE)
lags_acf <- acf_out$lag[,1,1] * frequency(pickup_train)
plot(
lags_acf,acf_out$acf,type = "h",
xlab = "Lag",ylab = "ACF",main = "ACF for Pickup Ratio")
abline(h = c(-1,1) * qnorm(0.975) / sqrt(length(pickup_train)),
col = "blue", lty = 2)
abline(h = 0, col = "black", lty = 1)
# --- PACF ---
pacf_out <- pacf(pickup_train, plot = FALSE)
lags_pacf <- pacf_out$lag * frequency(pickup_train)
plot(
lags_pacf,pacf_out$acf,type = "h",
xlab = "Lag",ylab = "PACF",
main = "PACF for Pickup Ratio")
abline(h = c(-1,1) * qnorm(0.975) / sqrt(length(pickup_train)),
col = "blue", lty = 2)
abline(h = 0, col = "black", lty = 1)- Decomposition
plot(decompose(pickup_train),ylim = c(0.7, 1.7))2.adjust pickup
# Time series deseasonal pick ratio plot for train
plot(adjust_pickup_train,ylab="Pick up ratio",xlab="Time",ylim = c(0.7, 1.7), main = "deseasonal pick up ratio over time")
abline(h = 1, col = "deeppink2", lwd = 1, lty = 2)- Stationary
adf.test(adjust_pickup_train) #Fail to Reject H0 -> not stationary
Augmented Dickey-Fuller Test
data: adjust_pickup_train
Dickey-Fuller = -2.8383, Lag order = 4, p-value = 0.2338
alternative hypothesis: stationary
par(mfrow = c(2,1))
# --- ACF ---
acf_out <- acf(adjust_pickup_train, plot = FALSE)
lags_acf <- acf_out$lag[,1,1] * frequency(adjust_pickup_train)
plot(
lags_acf,acf_out$acf,type = "h",
xlab = "Lag",ylab = "ACF",main = "ACF for Deseasonal Pickup Ratio")
abline(h = c(-1,1) * qnorm(0.975) / sqrt(length(adjust_pickup_train)),
col = "blue", lty = 2)
abline(h = 0, col = "black", lty = 1)
# --- PACF ---
pacf_out <- pacf(adjust_pickup_train, plot = FALSE)
lags_pacf <- pacf_out$lag * frequency(adjust_pickup_train)
plot(
lags_pacf,pacf_out$acf,type = "h",
xlab = "Lag",ylab = "PACF",
main = "PACF for Deseasonal Pickup Ratio")
abline(h = c(-1,1) * qnorm(0.975) / sqrt(length(adjust_pickup_train)),
col = "blue", lty = 2)
abline(h = 0, col = "black", lty = 1)# Non stationary- Decomposition
plot(decompose(adjust_pickup_train),ylim = c(0.7, 1.7))Model
1. Exponential Smoothing: ETS(M,N,M) (Test: MAPE 7.100%, RMSE 0.093)
Model
pickup_ets <- ets(pickup_train)
pickup_etsETS(M,N,M)
Call:
ets(y = pickup_train)
Smoothing parameters:
alpha = 0.9158
gamma = 2e-04
Initial states:
l = 0.995
s = 1.0421 1.0982 1.075 1.0205 0.9801 0.9126
0.8717
sigma: 0.092
AIC AICc BIC
-11.156595 -7.608208 11.748000
Check Model
# Shapiro-Wilk test
# create standard residual
res <- residuals(pickup_ets)
res_std <- (res - mean(res, na.rm = TRUE)) / sd(res, na.rm = TRUE)
shapiro.test(res_std) #Fail to Reject -> Normal
Shapiro-Wilk normality test
data: res_std
W = 0.99004, p-value = 0.8394
# ACF of residual
checkresiduals(pickup_ets) #Fail to reject -> residual random
Ljung-Box test
data: Residuals from ETS(M,N,M)
Q* = 14.177, df = 14, p-value = 0.4366
Model df: 0. Total lags used: 14
Cross Validation
#pickup_ets_forecast <- forecast(pickup_ets,h = n_pickup_test)
#pickup_ets_forecast
#accuracy(pickup_test,pickup_ets_forecast$mean)
#plot(pickup_ets_forecast)
#lines(pickup_test, col = "red")
#self-validation
#pickup_ets_forecast_self <- forecast(pickup_ets)$fitted
#accuracy(pickup_ets_forecast_self,pickup_train)
#plot(pickup_ets_forecast_self)
#lines(pickup_train, col = "red")
#cross-validation
pickup_ets_forecast_cross <- forecast(pickup_ets,h = n_pickup_test)
accuracy_test1 <- accuracy(pickup_ets_forecast_cross$mean,pickup_test)
accuracy_test1 ME RMSE MAE MPE MAPE ACF1 Theil's U
Test set 0.06486101 0.09339594 0.0773739 5.844638 7.099857 0.4400417 0.877311
plot(pickup_ets_forecast_cross,ylim = c(0.7, 1.7))
lines(pickup_test, col = "deeppink2")2. SARIMA (autoarima): SARIMA(2,0,0)(1,0,0)[7] (Test: MAPE 7.717, RMSE 0.090)
Transform
# BoxCox.ar(pickup_train)
# BoxCox.lambda(pickup_train)
#ไม่ TransformModel
pickup_arima <- auto.arima(pickup_train,
stepwise = T,
seasonal = T,
approximation = F,
trace = T)
ARIMA(2,0,2)(1,0,1)[7] with non-zero mean : Inf
ARIMA(0,0,0) with non-zero mean : -48.39683
ARIMA(1,0,0)(1,0,0)[7] with non-zero mean : -95.68378
ARIMA(0,0,1)(0,0,1)[7] with non-zero mean : -90.21096
ARIMA(0,0,0) with zero mean : 225.976
ARIMA(1,0,0) with non-zero mean : -90.62985
ARIMA(1,0,0)(2,0,0)[7] with non-zero mean : -97.23867
ARIMA(1,0,0)(2,0,1)[7] with non-zero mean : Inf
ARIMA(1,0,0)(1,0,1)[7] with non-zero mean : Inf
ARIMA(0,0,0)(2,0,0)[7] with non-zero mean : -44.98505
ARIMA(2,0,0)(2,0,0)[7] with non-zero mean : -99.42872
ARIMA(2,0,0)(1,0,0)[7] with non-zero mean : -99.95363
ARIMA(2,0,0) with non-zero mean : -95.50889
ARIMA(2,0,0)(1,0,1)[7] with non-zero mean : Inf
ARIMA(2,0,0)(0,0,1)[7] with non-zero mean : -98.67539
ARIMA(2,0,0)(2,0,1)[7] with non-zero mean : Inf
ARIMA(3,0,0)(1,0,0)[7] with non-zero mean : -97.58476
ARIMA(2,0,1)(1,0,0)[7] with non-zero mean : -97.59089
ARIMA(1,0,1)(1,0,0)[7] with non-zero mean : -99.70294
ARIMA(3,0,1)(1,0,0)[7] with non-zero mean : Inf
ARIMA(2,0,0)(1,0,0)[7] with zero mean : -82.86512
Best model: ARIMA(2,0,0)(1,0,0)[7] with non-zero mean
pickup_arimaSeries: pickup_train
ARIMA(2,0,0)(1,0,0)[7] with non-zero mean
Coefficients:
ar1 ar2 sar1 mean
0.9288 -0.2905 0.3184 1.1081
s.e. 0.1120 0.1104 0.1174 0.0504
sigma^2 = 0.01327: log likelihood = 55.42
AIC=-100.85 AICc=-99.95 BIC=-89.4
Check Model
par(mfrow=c(1,2))
# Histogram of residual
hist(rstandard(pickup_arima),xlab="Standardised residuals",main="")
# QQ plot
qqnorm(rstandard(pickup_arima))
qqline(rstandard(pickup_arima))# Shapiro-Wilk test
shapiro.test(rstandard(pickup_arima)) #Fail to Reject -> Normal
Shapiro-Wilk normality test
data: rstandard(pickup_arima)
W = 0.98166, p-value = 0.3676
# ACF of residual
checkresiduals(pickup_arima) #Fail to reject -> residual random
Ljung-Box test
data: Residuals from ARIMA(2,0,0)(1,0,0)[7] with non-zero mean
Q* = 10.175, df = 11, p-value = 0.5147
Model df: 3. Total lags used: 14
# tsdiag
dev.new()
tsdiag(pickup_arima, gof=30, omit.initial=TRUE)Cross Validation
#pickup_arima_forecast <- forecast(pickup_arima,h = n_pickup_test)
#pickup_arima_forecast
#accuracy(pickup_test,pickup_arima_forecast$mean)
#plot(pickup_arima_forecast)
#lines(pickup_test, col = "red")
#self-validation
#pickup_arima_forecast_self <- forecast(pickup_arima)$fitted
#accuracy(pickup_arima_forecast_self,pickup_train)
#plot(pickup_arima_forecast_self)
#lines(pickup_train, col = "red")
#cross-validation
pickup_arima_forecast_cross <- forecast(pickup_arima,h = n_pickup_test)
accuracy_test2 <- accuracy(pickup_arima_forecast_cross$mean,pickup_test)
accuracy_test2 ME RMSE MAE MPE MAPE ACF1
Test set -0.03567746 0.08976739 0.07962678 -4.075359 7.716586 0.2825282
Theil's U
Test set 0.9151883
plot(pickup_arima_forecast_cross,ylim = c(0.7, 1.7))
lines(pickup_test, col = "deeppink2")3. Deseasonalize+ETS: ETS(M,N,N) (Test: MAPE 6.574%, RMSE 0.087)
Model
#Deseasionalize use adjust_pickup
adjust_pickup_ets <- ets(adjust_pickup_train)
adjust_pickup_etsETS(M,N,N)
Call:
ets(y = adjust_pickup_train)
Smoothing parameters:
alpha = 0.9999
Initial states:
l = 1.1161
sigma: 0.0867
AIC AICc BIC
-25.10301 -24.75518 -18.23163
Check Model
# Shapiro-Wilk test
# create standard residual
res <- residuals(adjust_pickup_ets)
res_std <- (res - mean(res, na.rm = TRUE)) / sd(res, na.rm = TRUE)
shapiro.test(res_std) #Fail to Reject -> Normal
Shapiro-Wilk normality test
data: res_std
W = 0.98328, p-value = 0.4455
# ACF of residual
checkresiduals(adjust_pickup_ets) #Fail to reject -> residual random
Ljung-Box test
data: Residuals from ETS(M,N,N)
Q* = 14.345, df = 14, p-value = 0.4244
Model df: 0. Total lags used: 14
Reseasonalize
# Mulitiply DOW index in test set
adjust_pickup_ets_forecast <- forecast(adjust_pickup_ets,h = n_pickup_test)
adjust_pickup_ets_forecast_test <- as.numeric(adjust_pickup_ets_forecast$mean) * as.numeric(dow_test)
# Change to ts
adjust_pickup_ets_forecast_test <- ts(
adjust_pickup_ets_forecast_test,
start = start(test_ts),
frequency = frequency(test_ts)
)Cross Validation
# 1. Plot full forecast object (includes black history + blue forecast + CI)
plot(adjust_pickup_ets_forecast,
main = "Forecasts from Deseasonal + ETS Model",
ylab = "Pickup Ratio",
xlab = "",
col = NA,
fcol = NA,
shaded = TRUE,
ylim = c(0.7, 1.7))
# 2. Add historical TRAIN data (thick black line)
lines(pickup_train, col = "black", lwd = 1)
# 3. Add actual TEST data (red)
lines(pickup_test, col = "deeppink2", lwd = 2)
# 4. Add reseasonalized forecast for TEST period (blue)
lines(adjust_pickup_ets_forecast_test, col = "blue", lwd = 2)
# Optional legend
legend("topleft",
legend = c("Train", "Test", "Forecast"),
col = c("black", "deeppink2", "blue"),
lwd = c(2, 2, 2),
lty = c(1, 1, 1),
bty = "n")#cross-validation
accuracy_test3 <- accuracy(adjust_pickup_ets_forecast_test,pickup_test)
accuracy_test3 ME RMSE MAE MPE MAPE ACF1 Theil's U
Test set 0.05956513 0.08737383 0.07124064 5.403682 6.574 0.4425267 0.8161484
4. Deseasonalize+ARIMA (autoarima): ARIMA(1,0,0) (Test MAPE 5.453%, RMSE 0.074)
Model
#Deseasionalize use adjust_pickup
adjust_pickup_arima <- auto.arima(adjust_pickup_train,
stepwise = T,
approximation = F,
trace = T)
ARIMA(2,0,2)(1,0,1)[7] with non-zero mean : Inf
ARIMA(0,0,0) with non-zero mean : -78.79131
ARIMA(1,0,0)(1,0,0)[7] with non-zero mean : -137.0727
ARIMA(0,0,1)(0,0,1)[7] with non-zero mean : -121.7625
ARIMA(0,0,0) with zero mean : 225.9234
ARIMA(1,0,0) with non-zero mean : -137.9435
ARIMA(1,0,0)(0,0,1)[7] with non-zero mean : -137.4383
ARIMA(1,0,0)(1,0,1)[7] with non-zero mean : Inf
ARIMA(2,0,0) with non-zero mean : -136.6714
ARIMA(1,0,1) with non-zero mean : -136.6951
ARIMA(0,0,1) with non-zero mean : -122.3199
ARIMA(2,0,1) with non-zero mean : Inf
ARIMA(1,0,0) with zero mean : Inf
Best model: ARIMA(1,0,0) with non-zero mean
adjust_pickup_arimaSeries: adjust_pickup_train
ARIMA(1,0,0) with non-zero mean
Coefficients:
ar1 mean
0.7508 1.1088
s.e. 0.0750 0.0405
sigma^2 = 0.008246: log likelihood = 72.15
AIC=-138.29 AICc=-137.94 BIC=-131.42
Check Model
par(mfrow=c(1,2))
# Histogram of residual
hist(rstandard(adjust_pickup_arima),xlab="Standardised residuals",main="")
# QQ plot
qqnorm(rstandard(adjust_pickup_arima))
qqline(rstandard(adjust_pickup_arima))# Shapiro-Wilk test
shapiro.test(rstandard(adjust_pickup_arima)) #Fail to Reject -> Normal
Shapiro-Wilk normality test
data: rstandard(adjust_pickup_arima)
W = 0.98785, p-value = 0.7116
# ACF of residual
checkresiduals(adjust_pickup_arima) #Fail to reject -> residual random
Ljung-Box test
data: Residuals from ARIMA(1,0,0) with non-zero mean
Q* = 15.183, df = 13, p-value = 0.2961
Model df: 1. Total lags used: 14
# tsdiag
dev.new()
tsdiag(adjust_pickup_arima, gof=30, omit.initial=TRUE)Reseasonalize
# Mulitiply DOW index in test set
adjust_pickup_arima_forecast <- forecast(adjust_pickup_arima,h = n_adjust_pickup_test)
adjust_pickup_arima_forecast_test <- as.numeric(adjust_pickup_arima_forecast$mean) * as.numeric(dow_test)
# Change to ts
adjust_pickup_arima_forecast_test <- ts(
adjust_pickup_arima_forecast_test,
start = start(test_ts),
frequency = frequency(test_ts)
)Cross Validation
# 1. Plot full forecast object (includes black history + blue forecast + CI)
plot(adjust_pickup_arima_forecast,
main = "Forecasts from Deseasonal + ARIMA Model",
ylab = "Pickup Ratio",
xlab = "",
col = NA,
fcol = NA,
shaded = TRUE,
ylim = c(0.7, 1.7))
# 2. Add historical TRAIN data (thick black line)
lines(pickup_train, col = "black", lwd = 1)
# 3. Add actual TEST data (red)
lines(pickup_test, col = "deeppink2", lwd = 2)
# 4. Add reseasonalized forecast for TEST period (blue)
lines(adjust_pickup_arima_forecast_test, col = "blue", lwd = 2)
# Optional legend
legend("topleft",
legend = c("Train", "Test", "Forecast"),
col = c("black", "deeppink2", "blue"),
lwd = c(2, 2, 2),
lty = c(1, 1, 1),
bty = "n")#cross-validation
accuracy_test4 <- accuracy(adjust_pickup_arima_forecast_test,pickup_test)
accuracy_test4 ME RMSE MAE MPE MAPE ACF1
Test set -0.02792482 0.0738166 0.05671775 -2.943266 5.452708 0.4730281
Theil's U
Test set 0.7751699
Model Selection
Compare accuracy in test set
result <- data.frame(
Model = c("ETS", "SARIMA", "Deseasonalizae+ETS", "Deseasonalizae+ARIMA"),
ME = c(accuracy_test1["Test set","ME"],
accuracy_test2["Test set","ME"],
accuracy_test3["Test set","ME"],
accuracy_test4["Test set","ME"]),
RMSE = c(accuracy_test1["Test set","RMSE"],
accuracy_test2["Test set","RMSE"],
accuracy_test3["Test set","RMSE"],
accuracy_test4["Test set","RMSE"]),
MAE = c(accuracy_test1["Test set","MAE"],
accuracy_test2["Test set","MAE"],
accuracy_test3["Test set","MAE"],
accuracy_test4["Test set","MAE"]),
MAPE = c(accuracy_test1["Test set","MAPE"],
accuracy_test2["Test set","MAPE"],
accuracy_test3["Test set","MAPE"],
accuracy_test4["Test set","MAPE"]),
TheilsU = c(accuracy_test1["Test set","Theil's U"],
accuracy_test2["Test set","Theil's U"],
accuracy_test3["Test set","Theil's U"],
accuracy_test4["Test set","Theil's U"])
)
result Model ME RMSE MAE MAPE TheilsU
1 ETS 0.06486101 0.09339594 0.07737390 7.099857 0.8773110
2 SARIMA -0.03567746 0.08976739 0.07962678 7.716586 0.9151883
3 Deseasonalizae+ETS 0.05956513 0.08737383 0.07124064 6.574000 0.8161484
4 Deseasonalizae+ARIMA -0.02792482 0.07381660 0.05671775 5.452708 0.7751699
Forecast next 11 Day (18 Aug - 28 Aug 1987)
#Select Deseasonalize+ARIMA (best model)
# Mulitiply DOW index in test set
adjust_pickup_arima_forecast <- forecast(adjust_pickup_arima,h = n_adjust_pickup_test+11)
adjust_pickup_arima_forecast_test <- as.numeric(adjust_pickup_arima_forecast$mean) * as.numeric(dow_test)Warning in as.numeric(adjust_pickup_arima_forecast$mean) *
as.numeric(dow_test): longer object length is not a multiple of shorter object
length
# Change to ts
adjust_pickup_arima_forecast_test <- ts(
adjust_pickup_arima_forecast_test,
start = start(test_ts),
frequency = frequency(test_ts)
)# 1. Plot full forecast object (includes black history + blue forecast + CI)
plot(adjust_pickup_arima_forecast,
main = "Forecasts from Deseasonal + ARIMA Model",
ylab = "Pickup Ratio",
xlab = "",
col = NA,
fcol = NA,
shaded = TRUE,
ylim = c(0.7, 1.7))
# 2. Add historical TRAIN data (thick black line)
lines(pickup_train, col = "black", lwd = 1)
# 3. Add actual TEST data (red)
lines(pickup_test, col = "deeppink2", lwd = 2)
# 4. Add reseasonalized forecast for TEST period (blue)
lines(adjust_pickup_arima_forecast_test, col = "blue", lwd = 2)
# Optional legend
legend("topleft",
legend = c("Train", "Test", "Forecast"),
col = c("black", "deeppink2", "blue"),
lwd = c(2, 2, 2),
lty = c(1, 1, 1),
bty = "n")Add new column to original data
#Select Deseasonalize+ARIMA
final_pickup_result <- adjust_pickup_arima_forecast_test#prepare date in test set
test_dates <- data_all$date[(n_train+1):nrow(data_all)]
#add new column in original data
data_all$pickup_forecast <- NA
data_all$pickup_forecast[(n_train+1):nrow(data_all)] <-
as.numeric(final_pickup_result)Demand Forecast
data_all$demand_forecast <- data_all$pickup_forecast * data_all$bookings Summary
data_all[data_all$date == as.Date("1987-08-22"), c(1,5,11,10)]# A tibble: 1 × 4
date bookings demand_forecast pickup_forecast
<date> <dbl> <dbl> <dbl>
1 1987-08-22 1839 1763. 0.958
#1763+60 = 1,823 < 1,877 the hotel can safely accept the additional 60 reservations without risking overbooking.