Marriot Room Forecasting case

Marriot Room Forecasting case

Data

Set Work Directory

# setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
#setwd("C:/Users/Kwang/Downloads/Time Series/TS_19")

Load Package

library(readxl)
Warning: package 'readxl' was built under R version 4.5.2
library(tidyr)
Warning: package 'tidyr' was built under R version 4.5.2
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(lubridate)
Warning: package 'lubridate' was built under R version 4.5.2

Attaching package: 'lubridate'
The following objects are masked from 'package:base':

    date, intersect, setdiff, union
library(forecast)
Warning: package 'forecast' was built under R version 4.5.2
Registered S3 method overwritten by 'quantmod':
  method            from
  as.zoo.data.frame zoo 
library(TSA)
Warning: package 'TSA' was built under R version 4.5.2
Registered S3 methods overwritten by 'TSA':
  method       from    
  fitted.Arima forecast
  plot.Arima   forecast

Attaching package: 'TSA'
The following objects are masked from 'package:stats':

    acf, arima
The following object is masked from 'package:utils':

    tar
library(tseries)
Warning: package 'tseries' was built under R version 4.5.2

Read & Clean Data

# --- Read Excel sheet 'Exhibit 1' ---
data_all <- read_excel("UV6154-XLS-ENG.xls", sheet = "Exhibit 1")

# Extract rows 1 to 99
data_all <- data_all[1:98, ]  
data_all <- data_all %>%
            rename(
                dow = "DOW INDICATOR1",
                demand = DEMAND,
                bookings = "TUESDAY BOOKINGS",
                pickup = "PICKUP RATIO",
                week = "WEEK",
                dow_index = "DOW INDEX"
            )

# Fill week
data_all <- data_all %>%
    fill(week, .direction = "down") 

# --- View data ---
head(data_all)
# A tibble: 6 × 6
  week  dow   demand bookings pickup dow_index
  <chr> <chr>  <dbl>    <dbl>  <dbl>     <dbl>
1 1     1       1470     1512  0.972     0.865
2 1     2        870      864  1.01      0.911
3 1     3        986      827  1.19      0.973
4 1     4       1247      952  1.31      1.01 
5 1     5       1109      740  1.50      1.07 
6 1     6       1197      908  1.32      1.12 
# check
data_all$week
 [1] "1"  "1"  "1"  "1"  "1"  "1"  "1"  "2"  "2"  "2"  "2"  "2"  "2"  "2"  "3" 
[16] "3"  "3"  "3"  "3"  "3"  "3"  "4"  "4"  "4"  "4"  "4"  "4"  "4"  "5"  "5" 
[31] "5"  "5"  "5"  "5"  "5"  "6"  "6"  "6"  "6"  "6"  "6"  "6"  "7"  "7"  "7" 
[46] "7"  "7"  "7"  "7"  "8"  "8"  "8"  "8"  "8"  "8"  "8"  "9"  "9"  "9"  "9" 
[61] "9"  "9"  "9"  "10" "10" "10" "10" "10" "10" "10" "11" "11" "11" "11" "11"
[76] "11" "11" "12" "12" "12" "12" "12" "12" "12" "13" "13" "13" "13" "13" "13"
[91] "13" "14" "14" "14" "14" "14" "14" "14"
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ####
# Number of rows in your dataset
n <- nrow(data_all)

# Create a date vector starting from 1987-05-23
start_date = as.Date("1987-05-23")
date_vector <- seq(
    from = start_date, 
    by = "day", 
    length.out = n
)


# Add it as a new column
data_all$date <- date_vector

# Move DATE column to the first position
data_all <- data_all[, c("date", setdiff(names(data_all), "date"))]
#Add Adjust Pickup Column in this Dataset
data_all <- data_all %>% mutate(adjust_pickup = pickup/dow_index)
data_all
# A tibble: 98 × 8
   date       week  dow   demand bookings pickup dow_index adjust_pickup
   <date>     <chr> <chr>  <dbl>    <dbl>  <dbl>     <dbl>         <dbl>
 1 1987-05-23 1     1       1470     1512  0.972     0.865          1.12
 2 1987-05-24 1     2        870      864  1.01      0.911          1.11
 3 1987-05-25 1     3        986      827  1.19      0.973          1.23
 4 1987-05-26 1     4       1247      952  1.31      1.01           1.29
 5 1987-05-27 1     5       1109      740  1.50      1.07           1.40
 6 1987-05-28 1     6       1197      908  1.32      1.12           1.17
 7 1987-05-29 1     7       1500     1311  1.14      1.05           1.09
 8 1987-05-30 2     1       1854     2034  0.912     0.865          1.05
 9 1987-05-31 2     2       1489     1584  0.940     0.911          1.03
10 1987-06-01 2     3       1792     1682  1.07      0.973          1.10
# ℹ 88 more rows

EDA

#Demand & Bookings
plot(data_all$date,data_all$demand,"l",col = "blue",ylab = "no. of total room", xlab = "Date", main = 'Demand and Booking VS total room',
     ylim = c(700, 2200))
lines(data_all$date,data_all$bookings,col = "black")
abline(h = 1877, col = "deeppink2", lwd = 2, lty = 2)

legend("topright",
       legend = c("Demand", "Bookings", "Total Rooms"),
       col    = c("blue", "black", "deeppink2"),
       lty    = c(1,1,2),
       bty    = "n")

#Pick up ratio
plot(data_all$date,data_all$pickup,"l",ylab = "Pick up ratio", xlab = "Date")
abline(h = 1, col = "deeppink2", lwd = 1, lty = 2)

# จำนวนวันและ % ต่อทั้งหมดที่เกิน 1
cond <- data_all$pickup > 1
days_over_1 <- sum(cond, na.rm = TRUE)
percent_over_1 <- mean(cond, na.rm = TRUE) * 100 #56/87

days_over_1
[1] 56
percent_over_1
[1] 64.36782
#Dow_index

#Overall
plot(data_all$date,data_all$dow_index,"l",ylab = "Dow index", xlab = "Date")

#One week
one_week <- data_all[1:7, ]

plot(one_week$dow, one_week$dow_index, type="l",
     xlab="Date", ylab="Dow index")

Split Train/Test

#repeat dow index
dow_train_vals <- data_all$dow_index[1:7]   # ค่า 7 วันแรก
dow_full <- rep(dow_train_vals, length.out=nrow(data_all))
data_all$DOW_index_full <- dow_full

#ts for all column exclude date
data_num <- data_all[, setdiff(names(data_all), "date")]
ts_all   <- ts(data_num, frequency = 7)

#Train
train_end  <- as.Date("1987-08-03")
n_train    <- sum(data_all$date <= train_end)
#last time of train set
t_train_end <- time(ts_all)[n_train]

#Split train test
train_ts <- window(ts_all, end   = t_train_end)
#test_ts  <- window(ts_all, start = t_train_end + 1/frequency(ts_all))

# 1) หา index ของแถวที่ pickup_ratio ไม่ใช่ NA
idx_non_na <- which(!is.na(ts_all[,"pickup"]))

# 2) หา index ของวันสุดท้ายที่ไม่ใช่ NA
last_non_na_index <- idx_non_na[length(idx_non_na)]

# 3) แปลง index → time ใน ts
end_time <- time(ts_all)[last_non_na_index]

test_ts  <- window(ts_all, start = t_train_end + 1/frequency(ts_all),end = end_time)

#Select Variable
#1.pickup
pickup_train <- ts(train_ts[, "pickup"],
                   start     = start(train_ts),
                   frequency = frequency(train_ts))

pickup_test  <- ts(test_ts[, "pickup"],
                   start     = start(test_ts),
                   frequency = frequency(test_ts))
n_pickup_test <- length(pickup_test)

#2.adjust pickup (use DOW index)
#adjust pickup
adjust_pickup_train <- ts(train_ts[, "adjust_pickup"],
                   start     = start(train_ts),
                   frequency = frequency(train_ts))

adjust_pickup_test  <- ts(test_ts[, "adjust_pickup"],
                   start     = start(test_ts),
                   frequency = frequency(test_ts))
n_adjust_pickup_test <- length(adjust_pickup_test)

#dow index
dow_train <- ts(train_ts[, "DOW_index_full"],
                   start     = start(train_ts),
                   frequency = frequency(train_ts))

dow_test  <- ts(test_ts[, "DOW_index_full"],
                   start     = start(test_ts),
                   frequency = frequency(test_ts))
n_dow_test <- length(dow_test)

#Check
frequency(pickup_train)
[1] 7
frequency(pickup_test)
[1] 7
start(pickup_train); end(pickup_train)
[1] 1 1
[1] 11  3
start(pickup_test);  end(pickup_test)
[1] 11  4
[1] 13  3
frequency(adjust_pickup_train)
[1] 7
frequency(adjust_pickup_test)
[1] 7
start(adjust_pickup_train); end(adjust_pickup_train)
[1] 1 1
[1] 11  3
start(adjust_pickup_test);  end(adjust_pickup_test)
[1] 11  4
[1] 13  3
frequency(dow_train)
[1] 7
frequency(dow_test)
[1] 7
start(dow_train); end(dow_train)
[1] 1 1
[1] 11  3
start(dow_test);  end(dow_test)
[1] 11  4
[1] 13  3

Check Stationary & Decomposition

1.Pickup

# Time series pick ratio plot for train
plot(pickup_train,ylab="Pick up ratio",xlab="Time",ylim = c(0.7, 1.7), main= "Pick up ratio over time")
abline(h = 1, col = "deeppink2", lwd = 1, lty = 2)

  • Stationary
adf.test(pickup_train) #Fail to Reject H0 -> not stationary

    Augmented Dickey-Fuller Test

data:  pickup_train
Dickey-Fuller = -3.0237, Lag order = 4, p-value = 0.1583
alternative hypothesis: stationary
par(mfrow = c(2,1))

# --- ACF ---
acf_out <- acf(pickup_train, plot = FALSE)
lags_acf <- acf_out$lag[,1,1] * frequency(pickup_train)

plot(
  lags_acf,acf_out$acf,type = "h",
  xlab = "Lag",ylab = "ACF",main = "ACF for Pickup Ratio")

abline(h = c(-1,1) * qnorm(0.975) / sqrt(length(pickup_train)), 
       col = "blue", lty = 2)

abline(h = 0, col = "black", lty = 1)

# --- PACF ---
pacf_out <- pacf(pickup_train, plot = FALSE)
lags_pacf <- pacf_out$lag * frequency(pickup_train)

plot(
  lags_pacf,pacf_out$acf,type = "h",
  xlab = "Lag",ylab = "PACF",
  main = "PACF for Pickup Ratio")

abline(h = c(-1,1) * qnorm(0.975) / sqrt(length(pickup_train)), 
       col = "blue", lty = 2)
abline(h = 0, col = "black", lty = 1)

  • Decomposition
plot(decompose(pickup_train),ylim = c(0.7, 1.7))

2.adjust pickup

# Time series deseasonal pick ratio plot for train
plot(adjust_pickup_train,ylab="Pick up ratio",xlab="Time",ylim = c(0.7, 1.7), main = "deseasonal pick up ratio over time")
abline(h = 1, col = "deeppink2", lwd = 1, lty = 2)

  • Stationary
adf.test(adjust_pickup_train) #Fail to Reject H0 -> not stationary

    Augmented Dickey-Fuller Test

data:  adjust_pickup_train
Dickey-Fuller = -2.8383, Lag order = 4, p-value = 0.2338
alternative hypothesis: stationary
par(mfrow = c(2,1))

# --- ACF ---
acf_out <- acf(adjust_pickup_train, plot = FALSE)
lags_acf <- acf_out$lag[,1,1] * frequency(adjust_pickup_train)

plot(
  lags_acf,acf_out$acf,type = "h",
  xlab = "Lag",ylab = "ACF",main = "ACF for Deseasonal Pickup Ratio")

abline(h = c(-1,1) * qnorm(0.975) / sqrt(length(adjust_pickup_train)), 
       col = "blue", lty = 2)

abline(h = 0, col = "black", lty = 1)

# --- PACF ---
pacf_out <- pacf(adjust_pickup_train, plot = FALSE)
lags_pacf <- pacf_out$lag * frequency(adjust_pickup_train)

plot(
  lags_pacf,pacf_out$acf,type = "h",
  xlab = "Lag",ylab = "PACF",
  main = "PACF for Deseasonal Pickup Ratio")

abline(h = c(-1,1) * qnorm(0.975) / sqrt(length(adjust_pickup_train)), 
       col = "blue", lty = 2)
abline(h = 0, col = "black", lty = 1)

# Non stationary
  • Decomposition
plot(decompose(adjust_pickup_train),ylim = c(0.7, 1.7))

Model

1. Exponential Smoothing: ETS(M,N,M) (Test: MAPE 7.100%, RMSE 0.093)

Model

pickup_ets <- ets(pickup_train)
pickup_ets
ETS(M,N,M) 

Call:
ets(y = pickup_train)

  Smoothing parameters:
    alpha = 0.9158 
    gamma = 2e-04 

  Initial states:
    l = 0.995 
    s = 1.0421 1.0982 1.075 1.0205 0.9801 0.9126
           0.8717

  sigma:  0.092

       AIC       AICc        BIC 
-11.156595  -7.608208  11.748000 

Check Model

# Shapiro-Wilk test
# create standard residual
res <- residuals(pickup_ets)
res_std <- (res - mean(res, na.rm = TRUE)) / sd(res, na.rm = TRUE)
shapiro.test(res_std) #Fail to Reject -> Normal

    Shapiro-Wilk normality test

data:  res_std
W = 0.99004, p-value = 0.8394
# ACF of residual
checkresiduals(pickup_ets) #Fail to reject -> residual random


    Ljung-Box test

data:  Residuals from ETS(M,N,M)
Q* = 14.177, df = 14, p-value = 0.4366

Model df: 0.   Total lags used: 14

Cross Validation

#pickup_ets_forecast <- forecast(pickup_ets,h = n_pickup_test)
#pickup_ets_forecast
#accuracy(pickup_test,pickup_ets_forecast$mean)
#plot(pickup_ets_forecast)
#lines(pickup_test, col = "red")

#self-validation
#pickup_ets_forecast_self <- forecast(pickup_ets)$fitted
#accuracy(pickup_ets_forecast_self,pickup_train)
#plot(pickup_ets_forecast_self)
#lines(pickup_train, col = "red")

#cross-validation
pickup_ets_forecast_cross <- forecast(pickup_ets,h = n_pickup_test)
accuracy_test1 <- accuracy(pickup_ets_forecast_cross$mean,pickup_test)
accuracy_test1
                 ME       RMSE       MAE      MPE     MAPE      ACF1 Theil's U
Test set 0.06486101 0.09339594 0.0773739 5.844638 7.099857 0.4400417  0.877311
plot(pickup_ets_forecast_cross,ylim = c(0.7, 1.7))
lines(pickup_test, col = "deeppink2")

2. SARIMA (autoarima): SARIMA(2,0,0)(1,0,0)[7] (Test: MAPE 7.717, RMSE 0.090)

Transform

# BoxCox.ar(pickup_train)
# BoxCox.lambda(pickup_train)
#ไม่ Transform

Model

pickup_arima <- auto.arima(pickup_train, 
                         stepwise = T, 
                         seasonal = T,
                         approximation = F,
                         trace = T)

 ARIMA(2,0,2)(1,0,1)[7] with non-zero mean : Inf
 ARIMA(0,0,0)           with non-zero mean : -48.39683
 ARIMA(1,0,0)(1,0,0)[7] with non-zero mean : -95.68378
 ARIMA(0,0,1)(0,0,1)[7] with non-zero mean : -90.21096
 ARIMA(0,0,0)           with zero mean     : 225.976
 ARIMA(1,0,0)           with non-zero mean : -90.62985
 ARIMA(1,0,0)(2,0,0)[7] with non-zero mean : -97.23867
 ARIMA(1,0,0)(2,0,1)[7] with non-zero mean : Inf
 ARIMA(1,0,0)(1,0,1)[7] with non-zero mean : Inf
 ARIMA(0,0,0)(2,0,0)[7] with non-zero mean : -44.98505
 ARIMA(2,0,0)(2,0,0)[7] with non-zero mean : -99.42872
 ARIMA(2,0,0)(1,0,0)[7] with non-zero mean : -99.95363
 ARIMA(2,0,0)           with non-zero mean : -95.50889
 ARIMA(2,0,0)(1,0,1)[7] with non-zero mean : Inf
 ARIMA(2,0,0)(0,0,1)[7] with non-zero mean : -98.67539
 ARIMA(2,0,0)(2,0,1)[7] with non-zero mean : Inf
 ARIMA(3,0,0)(1,0,0)[7] with non-zero mean : -97.58476
 ARIMA(2,0,1)(1,0,0)[7] with non-zero mean : -97.59089
 ARIMA(1,0,1)(1,0,0)[7] with non-zero mean : -99.70294
 ARIMA(3,0,1)(1,0,0)[7] with non-zero mean : Inf
 ARIMA(2,0,0)(1,0,0)[7] with zero mean     : -82.86512

 Best model: ARIMA(2,0,0)(1,0,0)[7] with non-zero mean 
pickup_arima
Series: pickup_train 
ARIMA(2,0,0)(1,0,0)[7] with non-zero mean 

Coefficients:
         ar1      ar2    sar1    mean
      0.9288  -0.2905  0.3184  1.1081
s.e.  0.1120   0.1104  0.1174  0.0504

sigma^2 = 0.01327:  log likelihood = 55.42
AIC=-100.85   AICc=-99.95   BIC=-89.4

Check Model

par(mfrow=c(1,2))

# Histogram of residual
hist(rstandard(pickup_arima),xlab="Standardised residuals",main="")
# QQ plot
qqnorm(rstandard(pickup_arima))
qqline(rstandard(pickup_arima))

# Shapiro-Wilk test
shapiro.test(rstandard(pickup_arima)) #Fail to Reject -> Normal

    Shapiro-Wilk normality test

data:  rstandard(pickup_arima)
W = 0.98166, p-value = 0.3676
# ACF of residual
checkresiduals(pickup_arima) #Fail to reject -> residual random


    Ljung-Box test

data:  Residuals from ARIMA(2,0,0)(1,0,0)[7] with non-zero mean
Q* = 10.175, df = 11, p-value = 0.5147

Model df: 3.   Total lags used: 14
# tsdiag
dev.new()
tsdiag(pickup_arima, gof=30, omit.initial=TRUE)

Cross Validation

#pickup_arima_forecast <- forecast(pickup_arima,h = n_pickup_test)
#pickup_arima_forecast
#accuracy(pickup_test,pickup_arima_forecast$mean)
#plot(pickup_arima_forecast)
#lines(pickup_test, col = "red")



#self-validation
#pickup_arima_forecast_self <- forecast(pickup_arima)$fitted
#accuracy(pickup_arima_forecast_self,pickup_train)
#plot(pickup_arima_forecast_self)
#lines(pickup_train, col = "red")

#cross-validation
pickup_arima_forecast_cross <- forecast(pickup_arima,h = n_pickup_test)
accuracy_test2 <- accuracy(pickup_arima_forecast_cross$mean,pickup_test)
accuracy_test2
                  ME       RMSE        MAE       MPE     MAPE      ACF1
Test set -0.03567746 0.08976739 0.07962678 -4.075359 7.716586 0.2825282
         Theil's U
Test set 0.9151883
plot(pickup_arima_forecast_cross,ylim = c(0.7, 1.7))
lines(pickup_test, col = "deeppink2")

3. Deseasonalize+ETS: ETS(M,N,N) (Test: MAPE 6.574%, RMSE 0.087)

Model

#Deseasionalize use adjust_pickup
adjust_pickup_ets <- ets(adjust_pickup_train)
adjust_pickup_ets
ETS(M,N,N) 

Call:
ets(y = adjust_pickup_train)

  Smoothing parameters:
    alpha = 0.9999 

  Initial states:
    l = 1.1161 

  sigma:  0.0867

      AIC      AICc       BIC 
-25.10301 -24.75518 -18.23163 

Check Model

# Shapiro-Wilk test
# create standard residual
res <- residuals(adjust_pickup_ets)
res_std <- (res - mean(res, na.rm = TRUE)) / sd(res, na.rm = TRUE)
shapiro.test(res_std) #Fail to Reject -> Normal

    Shapiro-Wilk normality test

data:  res_std
W = 0.98328, p-value = 0.4455
# ACF of residual
checkresiduals(adjust_pickup_ets) #Fail to reject -> residual random


    Ljung-Box test

data:  Residuals from ETS(M,N,N)
Q* = 14.345, df = 14, p-value = 0.4244

Model df: 0.   Total lags used: 14

Reseasonalize

# Mulitiply DOW index in test set
adjust_pickup_ets_forecast <- forecast(adjust_pickup_ets,h = n_pickup_test)
adjust_pickup_ets_forecast_test <- as.numeric(adjust_pickup_ets_forecast$mean) * as.numeric(dow_test)


# Change to ts
adjust_pickup_ets_forecast_test <- ts(
  adjust_pickup_ets_forecast_test,
  start     = start(test_ts),
  frequency = frequency(test_ts)
)

Cross Validation

# 1. Plot full forecast object (includes black history + blue forecast + CI)
plot(adjust_pickup_ets_forecast,
     main = "Forecasts from Deseasonal + ETS Model",
     ylab = "Pickup Ratio",
     xlab = "",
     col = NA,
     fcol = NA,
     shaded = TRUE,
     ylim = c(0.7, 1.7))
 
# 2. Add historical TRAIN data (thick black line)
lines(pickup_train, col = "black", lwd = 1)
 
# 3. Add actual TEST data (red)
lines(pickup_test, col = "deeppink2", lwd = 2)
 
# 4. Add reseasonalized forecast for TEST period (blue)
lines(adjust_pickup_ets_forecast_test, col = "blue", lwd = 2)
 
# Optional legend
legend("topleft",
       legend = c("Train", "Test", "Forecast"),
       col    = c("black", "deeppink2", "blue"),
       lwd    = c(2, 2, 2),
       lty    = c(1, 1, 1),
       bty    = "n")

#cross-validation
accuracy_test3 <- accuracy(adjust_pickup_ets_forecast_test,pickup_test)
accuracy_test3
                 ME       RMSE        MAE      MPE  MAPE      ACF1 Theil's U
Test set 0.05956513 0.08737383 0.07124064 5.403682 6.574 0.4425267 0.8161484

4. Deseasonalize+ARIMA (autoarima): ARIMA(1,0,0) (Test MAPE 5.453%, RMSE 0.074)

Model

#Deseasionalize use adjust_pickup
adjust_pickup_arima <- auto.arima(adjust_pickup_train, 
                         stepwise = T, 
                         approximation = F,
                         trace = T)

 ARIMA(2,0,2)(1,0,1)[7] with non-zero mean : Inf
 ARIMA(0,0,0)           with non-zero mean : -78.79131
 ARIMA(1,0,0)(1,0,0)[7] with non-zero mean : -137.0727
 ARIMA(0,0,1)(0,0,1)[7] with non-zero mean : -121.7625
 ARIMA(0,0,0)           with zero mean     : 225.9234
 ARIMA(1,0,0)           with non-zero mean : -137.9435
 ARIMA(1,0,0)(0,0,1)[7] with non-zero mean : -137.4383
 ARIMA(1,0,0)(1,0,1)[7] with non-zero mean : Inf
 ARIMA(2,0,0)           with non-zero mean : -136.6714
 ARIMA(1,0,1)           with non-zero mean : -136.6951
 ARIMA(0,0,1)           with non-zero mean : -122.3199
 ARIMA(2,0,1)           with non-zero mean : Inf
 ARIMA(1,0,0)           with zero mean     : Inf

 Best model: ARIMA(1,0,0)           with non-zero mean 
adjust_pickup_arima
Series: adjust_pickup_train 
ARIMA(1,0,0) with non-zero mean 

Coefficients:
         ar1    mean
      0.7508  1.1088
s.e.  0.0750  0.0405

sigma^2 = 0.008246:  log likelihood = 72.15
AIC=-138.29   AICc=-137.94   BIC=-131.42

Check Model

par(mfrow=c(1,2))

# Histogram of residual
hist(rstandard(adjust_pickup_arima),xlab="Standardised residuals",main="")
# QQ plot
qqnorm(rstandard(adjust_pickup_arima))
qqline(rstandard(adjust_pickup_arima))

# Shapiro-Wilk test
shapiro.test(rstandard(adjust_pickup_arima)) #Fail to Reject -> Normal

    Shapiro-Wilk normality test

data:  rstandard(adjust_pickup_arima)
W = 0.98785, p-value = 0.7116
# ACF of residual
checkresiduals(adjust_pickup_arima) #Fail to reject -> residual random


    Ljung-Box test

data:  Residuals from ARIMA(1,0,0) with non-zero mean
Q* = 15.183, df = 13, p-value = 0.2961

Model df: 1.   Total lags used: 14
# tsdiag
dev.new()
tsdiag(adjust_pickup_arima, gof=30, omit.initial=TRUE)

Reseasonalize

# Mulitiply DOW index in test set

adjust_pickup_arima_forecast <- forecast(adjust_pickup_arima,h = n_adjust_pickup_test)
adjust_pickup_arima_forecast_test <- as.numeric(adjust_pickup_arima_forecast$mean) * as.numeric(dow_test)



# Change to ts
  adjust_pickup_arima_forecast_test <- ts(
  adjust_pickup_arima_forecast_test,
  start     = start(test_ts),
  frequency = frequency(test_ts)
)

Cross Validation

# 1. Plot full forecast object (includes black history + blue forecast + CI)
plot(adjust_pickup_arima_forecast,
     main = "Forecasts from Deseasonal + ARIMA Model",
     ylab = "Pickup Ratio",
     xlab = "",
     col = NA,
     fcol = NA,
     shaded = TRUE,
     ylim = c(0.7, 1.7))
 
# 2. Add historical TRAIN data (thick black line)
lines(pickup_train, col = "black", lwd = 1)
 
# 3. Add actual TEST data (red)
lines(pickup_test, col = "deeppink2", lwd = 2)
 
# 4. Add reseasonalized forecast for TEST period (blue)
lines(adjust_pickup_arima_forecast_test, col = "blue", lwd = 2)
 
# Optional legend
legend("topleft",
       legend = c("Train", "Test", "Forecast"),
       col    = c("black", "deeppink2", "blue"),
       lwd    = c(2, 2, 2),
       lty    = c(1, 1, 1),
       bty    = "n")

#cross-validation
accuracy_test4 <- accuracy(adjust_pickup_arima_forecast_test,pickup_test)
accuracy_test4
                  ME      RMSE        MAE       MPE     MAPE      ACF1
Test set -0.02792482 0.0738166 0.05671775 -2.943266 5.452708 0.4730281
         Theil's U
Test set 0.7751699

Model Selection

Compare accuracy in test set

result <- data.frame(
  Model = c("ETS", "SARIMA", "Deseasonalizae+ETS", "Deseasonalizae+ARIMA"),
  
  ME    = c(accuracy_test1["Test set","ME"],
            accuracy_test2["Test set","ME"],
            accuracy_test3["Test set","ME"],
            accuracy_test4["Test set","ME"]),
  
  RMSE  = c(accuracy_test1["Test set","RMSE"],
            accuracy_test2["Test set","RMSE"],
            accuracy_test3["Test set","RMSE"],
            accuracy_test4["Test set","RMSE"]),
  
  MAE   = c(accuracy_test1["Test set","MAE"],
            accuracy_test2["Test set","MAE"],
            accuracy_test3["Test set","MAE"],
            accuracy_test4["Test set","MAE"]),
  
  MAPE  = c(accuracy_test1["Test set","MAPE"],
            accuracy_test2["Test set","MAPE"],
            accuracy_test3["Test set","MAPE"],
            accuracy_test4["Test set","MAPE"]),
  
  TheilsU = c(accuracy_test1["Test set","Theil's U"],
              accuracy_test2["Test set","Theil's U"],
              accuracy_test3["Test set","Theil's U"],
              accuracy_test4["Test set","Theil's U"])
)

result
                 Model          ME       RMSE        MAE     MAPE   TheilsU
1                  ETS  0.06486101 0.09339594 0.07737390 7.099857 0.8773110
2               SARIMA -0.03567746 0.08976739 0.07962678 7.716586 0.9151883
3   Deseasonalizae+ETS  0.05956513 0.08737383 0.07124064 6.574000 0.8161484
4 Deseasonalizae+ARIMA -0.02792482 0.07381660 0.05671775 5.452708 0.7751699

Forecast next 11 Day (18 Aug - 28 Aug 1987)

#Select Deseasonalize+ARIMA (best model)

# Mulitiply DOW index in test set
adjust_pickup_arima_forecast <- forecast(adjust_pickup_arima,h = n_adjust_pickup_test+11)
adjust_pickup_arima_forecast_test <- as.numeric(adjust_pickup_arima_forecast$mean) * as.numeric(dow_test)
Warning in as.numeric(adjust_pickup_arima_forecast$mean) *
as.numeric(dow_test): longer object length is not a multiple of shorter object
length
# Change to ts
  adjust_pickup_arima_forecast_test <- ts(
  adjust_pickup_arima_forecast_test,
  start     = start(test_ts),
  frequency = frequency(test_ts)
)
# 1. Plot full forecast object (includes black history + blue forecast + CI)
plot(adjust_pickup_arima_forecast,
     main = "Forecasts from Deseasonal + ARIMA Model",
     ylab = "Pickup Ratio",
     xlab = "",
     col = NA,
     fcol = NA,
     shaded = TRUE,
     ylim = c(0.7, 1.7))
 
# 2. Add historical TRAIN data (thick black line)
lines(pickup_train, col = "black", lwd = 1)
 
# 3. Add actual TEST data (red)
lines(pickup_test, col = "deeppink2", lwd = 2)
 
# 4. Add reseasonalized forecast for TEST period (blue)
lines(adjust_pickup_arima_forecast_test, col = "blue", lwd = 2)
 
# Optional legend
legend("topleft",
       legend = c("Train", "Test", "Forecast"),
       col    = c("black", "deeppink2", "blue"),
       lwd    = c(2, 2, 2),
       lty    = c(1, 1, 1),
       bty    = "n")

Add new column to original data

#Select Deseasonalize+ARIMA
final_pickup_result <- adjust_pickup_arima_forecast_test
#prepare date in test set
test_dates <- data_all$date[(n_train+1):nrow(data_all)]

#add new column in original data
data_all$pickup_forecast <- NA
data_all$pickup_forecast[(n_train+1):nrow(data_all)] <- 
  as.numeric(final_pickup_result)

Demand Forecast

data_all$demand_forecast <- data_all$pickup_forecast * data_all$bookings 

Summary

data_all[data_all$date == as.Date("1987-08-22"), c(1,5,11,10)]
# A tibble: 1 × 4
  date       bookings demand_forecast pickup_forecast
  <date>        <dbl>           <dbl>           <dbl>
1 1987-08-22     1839           1763.           0.958
#1763+60 = 1,823 < 1,877 the hotel can safely accept the additional 60 reservations without risking overbooking.