library('ggplot2')
library('scales')
library('grid')
library('gridExtra')
library('RColorBrewer')
library('corrplot') # visualisation
## corrplot 0.84 loaded
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:scales':
##
## alpha, rescale
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library('dplyr')
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library('readr')
##
## Attaching package: 'readr'
## The following object is masked from 'package:scales':
##
## col_factor
library('data.table')
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library('tibble')
library('tidyr')
library('stringr')
library('forcats')
library('ggfortify')
library('ggrepel')
library('ggridges')
library('ggExtra')
library('ggforce')
library('lazyeval')
library('broom')
library('purrr')
##
## Attaching package: 'purrr'
## The following objects are masked from 'package:lazyeval':
##
## is_atomic, is_formula
## The following object is masked from 'package:data.table':
##
## transpose
## The following object is masked from 'package:scales':
##
## discard
library('lubridate') # date and time
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library('timeDate') # date and time
library('tseries') # time series analysis
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library('forecast') # time series analysis
## Registered S3 methods overwritten by 'forecast':
## method from
## autoplot.Arima ggfortify
## autoplot.acf ggfortify
## autoplot.ar ggfortify
## autoplot.bats ggfortify
## autoplot.decomposed.ts ggfortify
## autoplot.ets ggfortify
## autoplot.forecast ggfortify
## autoplot.stl ggfortify
## autoplot.ts ggfortify
## fitted.ar ggfortify
## fortify.ts ggfortify
## residuals.ar ggfortify
air.reserve = air_reserve <- read.csv("~/Desktop/recruit-restaurant-visitor-forecasting/air_reserve.csv")
air.visits = air_visit_data <- read.csv("~/Desktop/recruit-restaurant-visitor-forecasting/air_visit_data.csv")
air.store = air_store_info <- read.csv("~/Desktop/recruit-restaurant-visitor-forecasting/air_store_info.csv")
hpg.reserve = hpg_reserve <- read.csv("~/Desktop/recruit-restaurant-visitor-forecasting/hpg_reserve.csv")
hpg.store = hpg_store_info <- read.csv("~/Desktop/recruit-restaurant-visitor-forecasting/hpg_store_info.csv")
holidays = date_info <- read.csv("~/Desktop/recruit-restaurant-visitor-forecasting/date_info.csv")
stord.ids = store_id_relation <- read.csv("~/Desktop/recruit-restaurant-visitor-forecasting/store_id_relation.csv")
testdata = sample_submission <- read.csv("~/Desktop/recruit-restaurant-visitor-forecasting/sample_submission.csv")
sum(is.na(air.reserve))
## [1] 0
sum(is.na(air.visits))
## [1] 0
sum(is.na(air.store))
## [1] 0
sum(is.na(hpg.reserve))
## [1] 0
sum(is.na(hpg.store))
## [1] 0
sum(is.na(holidays))
## [1] 0
sum(is.na(stord.ids))
## [1] 0
sum(is.na(testdata))
## [1] 0
# Non missing value from any data set
describe(air.reserve)
## vars n mean sd median trimmed mad min max
## air_store_id* 1 92378 151.34 92.55 147 150.44 121.57 1 314
## visit_datetime* 2 92378 2958.96 1188.51 3079 3054.18 1214.25 1 4975
## reserve_datetime* 3 92378 4806.34 1942.51 5075 5004.60 1752.43 1 7513
## reserve_visitors 4 92378 4.48 4.92 3 3.39 1.48 1 100
## range skew kurtosis se
## air_store_id* 313 0.10 -1.30 0.30
## visit_datetime* 4974 -0.60 -0.39 3.91
## reserve_datetime* 7512 -0.80 -0.19 6.39
## reserve_visitors 99 4.65 33.52 0.02
describe(air.visits)
## vars n mean sd median trimmed mad min max range
## air_store_id* 1 252108 413.70 239.70 407 413.21 309.86 1 829 828
## visit_date* 2 252108 286.53 123.15 297 294.11 137.88 1 478 477
## visitors 3 252108 20.97 16.76 17 18.82 13.34 1 877 876
## skew kurtosis se
## air_store_id* 0.02 -1.21 0.48
## visit_date* -0.41 -0.69 0.25
## visitors 3.31 74.26 0.03
describe(air.store)
## vars n mean sd median trimmed mad min max
## air_store_id* 1 829 415.00 239.46 415.00 415.00 306.90 1.00 829.00
## air_genre_name* 2 829 6.26 3.13 7.00 6.02 2.97 1.00 14.00
## air_area_name* 3 829 53.21 31.96 57.00 54.27 43.00 1.00 103.00
## latitude 4 829 35.65 2.08 35.66 35.26 0.10 33.21 44.02
## longitude 5 829 137.42 3.65 139.69 137.82 0.20 130.20 144.27
## range skew kurtosis se
## air_store_id* 828.00 0.00 -1.20 8.32
## air_genre_name* 13.00 0.45 -0.41 0.11
## air_area_name* 102.00 -0.20 -1.30 1.11
## latitude 10.81 2.65 7.44 0.07
## longitude 14.08 -0.93 -0.55 0.13
describe(hpg.reserve)
## vars n mean sd median trimmed mad min max
## hpg_store_id* 1 2000320 6680.24 3854.37 6683 6684.65 4965.23 1 13325
## visit_datetime* 2 2000320 5635.59 2681.73 6377 5810.69 2830.28 1 9847
## reserve_datetime* 3 2000320 6724.42 3228.43 7537 6912.04 3629.40 1 11450
## reserve_visitors 4 2000320 5.07 5.42 3 3.92 1.48 1 100
## range skew kurtosis se
## hpg_store_id* 13324 -0.01 -1.21 2.73
## visit_datetime* 9846 -0.50 -0.94 1.90
## reserve_datetime* 11449 -0.44 -0.99 2.28
## reserve_visitors 99 4.60 36.10 0.00
describe(hpg.store)
## vars n mean sd median trimmed mad min
## hpg_store_id* 1 4690 2345.50 1354.03 2345.50 2345.50 1738.35 1.00
## hpg_genre_name* 2 4690 14.85 5.08 16.00 14.65 5.93 1.00
## hpg_area_name* 3 4690 65.41 35.89 69.00 66.38 47.44 1.00
## latitude 4 4690 35.81 2.14 35.66 35.38 1.01 33.31
## longitude 5 4690 137.68 3.20 139.50 138.09 0.78 130.34
## max range skew kurtosis se
## hpg_store_id* 4690.00 4689.00 0.00 -1.20 19.77
## hpg_genre_name* 34.00 33.00 0.41 0.47 0.07
## hpg_area_name* 119.00 118.00 -0.20 -1.25 0.52
## latitude 43.77 10.46 2.50 6.04 0.03
## longitude 143.71 13.38 -0.98 -0.14 0.05
describe(holidays)
## vars n mean sd median trimmed mad min max range skew
## calendar_date* 1 517 259.00 149.39 259 259 191.26 1 517 516 0.00
## day_of_week* 2 517 4.00 2.00 4 4 2.97 1 7 6 0.00
## holiday_flg 3 517 0.07 0.25 0 0 0.00 0 1 1 3.43
## kurtosis se
## calendar_date* -1.21 6.57
## day_of_week* -1.26 0.09
## holiday_flg 9.79 0.01
describe(stord.ids)
## vars n mean sd median trimmed mad min max range skew
## air_store_id* 1 150 75.5 43.45 75.5 75.5 55.6 1 150 149 0
## hpg_store_id* 2 150 75.5 43.45 75.5 75.5 55.6 1 150 149 0
## kurtosis se
## air_store_id* -1.22 3.55
## hpg_store_id* -1.22 3.55
describe(testdata)
## vars n mean sd median trimmed mad min max range skew
## id* 1 32019 16010 9243.23 16010 16010 11868.21 1 32019 32018 0
## visitors 2 32019 0 0.00 0 0 0.00 0 0 0 NaN
## kurtosis se
## id* -1.2 51.66
## visitors NaN 0.00
air.visits = air.visits %>%
mutate(visit_date = ymd(visit_date))
air.reserve = air.reserve %>%
mutate(visit_datetime = ymd_hms(visit_datetime),
reserve_datetime = ymd_hms(reserve_datetime))
hpg.reserve = hpg.reserve %>%
mutate(visit_datetime = ymd_hms(visit_datetime),
reserve_datetime = ymd_hms(reserve_datetime))
#air.store <- air.store %>%
# mutate(air_genre_name = as.factor(air_genre_name),
# air_area_name = as.factor(air_area_name))
holidays = holidays %>%
mutate(holiday_flg = as.logical(holiday_flg),
date = ymd(calendar_date))
glimpse(air.reserve)
## Rows: 92,378
## Columns: 4
## $ air_store_id <chr> "air_877f79706adbfb06", "air_db4b38ebe7a7ceff", "air…
## $ visit_datetime <dttm> 2016-01-01 19:00:00, 2016-01-01 19:00:00, 2016-01-0…
## $ reserve_datetime <dttm> 2016-01-01 16:00:00, 2016-01-01 19:00:00, 2016-01-0…
## $ reserve_visitors <int> 1, 3, 6, 2, 5, 2, 4, 2, 2, 2, 3, 3, 2, 6, 7, 41, 13,…
glimpse(air.visits)
## Rows: 252,108
## Columns: 3
## $ air_store_id <chr> "air_ba937bf13d40fb24", "air_ba937bf13d40fb24", "air_ba9…
## $ visit_date <date> 2016-01-13, 2016-01-14, 2016-01-15, 2016-01-16, 2016-01…
## $ visitors <int> 25, 32, 29, 22, 6, 9, 31, 21, 18, 26, 21, 11, 24, 21, 26…
#glimpse(air.store)
glimpse(hpg.reserve)
## Rows: 2,000,320
## Columns: 4
## $ hpg_store_id <chr> "hpg_c63f6f42e088e50f", "hpg_dac72789163a3f47", "hpg…
## $ visit_datetime <dttm> 2016-01-01 11:00:00, 2016-01-01 13:00:00, 2016-01-0…
## $ reserve_datetime <dttm> 2016-01-01 09:00:00, 2016-01-01 06:00:00, 2016-01-0…
## $ reserve_visitors <int> 1, 3, 2, 5, 13, 2, 2, 2, 2, 6, 2, 2, 2, 2, 5, 4, 2, …
#glimpse(hpg.store)
glimpse(holidays)
## Rows: 517
## Columns: 4
## $ calendar_date <chr> "2016-01-01", "2016-01-02", "2016-01-03", "2016-01-04",…
## $ day_of_week <chr> "Friday", "Saturday", "Sunday", "Monday", "Tuesday", "W…
## $ holiday_flg <lgl> TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FA…
## $ date <date> 2016-01-01, 2016-01-02, 2016-01-03, 2016-01-04, 2016-0…
glimpse(stord.ids)
## Rows: 150
## Columns: 2
## $ air_store_id <chr> "air_63b13c56b7201bd9", "air_a24bf50c3e90d583", "air_c7f…
## $ hpg_store_id <chr> "hpg_4bc649e72e2a239a", "hpg_c34b496d0305a809", "hpg_cd8…
glimpse(testdata)
## Rows: 32,019
## Columns: 2
## $ id <chr> "air_00a91d42b08b08d9_2017-04-23", "air_00a91d42b08b08d9_201…
## $ visitors <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
plot.1 = air.visits %>%
group_by(visit_date) %>%
summarise(all_visitors = sum(visitors)) %>%
ggplot(aes(visit_date,all_visitors)) +
geom_line(col = "purple") +
labs(x = "Date of visit", y = "total air visitors")
## `summarise()` ungrouping output (override with `.groups` argument)
plot.2 = air.visits %>%
ggplot(aes(visitors)) +
geom_vline(xintercept = 20, color = "red") +
geom_histogram(fill = "purple", bins = 30) +
scale_x_log10()
plot.3 = air.visits %>%
mutate(wday = wday(visit_date, label = TRUE)) %>%
group_by(wday) %>%
summarise(visits = mean(visitors)) %>%
ggplot(aes(wday, visits, fill = wday)) +
geom_col() +
theme(legend.position = "none", axis.text.x = element_text(angle=45, hjust=1, vjust=0.9)) +
labs(x = "Weekday", y = "Mean visitors")
## `summarise()` ungrouping output (override with `.groups` argument)
plot.4 = air.visits %>%
mutate(month = month(visit_date, label = TRUE)) %>%
group_by(month) %>%
summarise(visits = median(visitors)) %>%
ggplot(aes(month, visits, fill = month)) +
geom_col() +
theme(legend.position = "none") +
labs(x = "Month", y = "Median visitors")
## `summarise()` ungrouping output (override with `.groups` argument)
plot.1

plot.2

plot.3

plot.4

#Overall, increasing population of air visitors from 2016 to 2017( more people joined air? more restaurant enrolled?)
#maximum amount of visitors is around 20~ 30 perday,Friday ~Sunday are more popular time frame than other weekdays
#March to May and Dec are the most visited months of the year(Holiday season?) observed seasonality here
#here we are seeing the vosulaztion of total visitors who reserved from HPG(grouping by their visiting date)
plot.1 = hpg.reserve %>%
mutate(month = month(visit_datetime, label = TRUE)) %>%
group_by(month) %>%
summarise(all_reserved_visits = median(reserve_visitors)) %>%
ggplot(aes(month, all_reserved_visits, fill = month)) +
geom_col() +
theme(legend.position = "none") +
labs(x = "Month", y = "Median visitors")
## `summarise()` ungrouping output (override with `.groups` argument)
plot.1

#March and Dec are the most popular months for hpg reserved vistors to visit
plot.2 = hpg.reserve %>%
mutate(wday = wday(visit_datetime, label = TRUE)) %>%
group_by(wday) %>%
summarise(all_reserved_visits = mean(reserve_visitors)) %>%
ggplot(aes(wday, all_reserved_visits, fill = wday)) +
geom_col() +
theme(legend.position = "none", axis.text.x = element_text(angle=45, hjust=1, vjust=0.9)) +
labs(x = "Weekday", y = "Mean visitors")
## `summarise()` ungrouping output (override with `.groups` argument)
plot.2

#Wen to Friday are the most visited weekdays( intersting!)
plot.3 = hpg.reserve %>%
group_by(visit_datetime) %>%
summarise(all_visitors = sum(reserve_visitors)) %>%
ggplot(aes(visit_datetime, all_visitors)) +
geom_col(fill = "red")
## `summarise()` ungrouping output (override with `.groups` argument)
plot.3

# Take a look at if all visitors respond to their reservation from HPG, therefor adding one new varibles to represent time different between visiting and reservation
newhpg <- hpg.reserve %>%
mutate(respond_hour = time_length(visit_datetime - reserve_datetime, unit = "hour"),
visit_hour = hour(visit_datetime)
)
plot.3 <- newhpg %>%
filter(respond_hour < 24*5) %>%
group_by(respond_hour) %>%
summarise(all_visitors = sum(reserve_visitors)) %>%
ggplot(aes(respond_hour, all_visitors)) +
geom_col(fill = "orange") +
labs(x = "Responding time from reservation to visit through hpg [hours]")
## `summarise()` ungrouping output (override with `.groups` argument)
plot.3

# we observed that people usually visite in between 24- 48 hours, consistantly
plot.4 = newhpg %>%
group_by(visit_hour) %>%
summarise(all_visitors = sum(reserve_visitors)) %>%
ggplot(aes(visit_hour, all_visitors)) +
geom_col(fill = "blue")
## `summarise()` ungrouping output (override with `.groups` argument)
plot.4

#Observed that dinner time has more constumers than other time frame
plot.1 = air.reserve %>%
group_by(visit_datetime) %>%
summarise(all_visitors = sum(reserve_visitors)) %>%
ggplot(aes(visit_datetime, all_visitors)) +
geom_line(col = "purple") +
labs(x = "Date of reservation", y = "total air reservation")
## `summarise()` ungrouping output (override with `.groups` argument)
plot.1

#respond well to the plt for air.visit, a general increase from 2016 to 2017 is observed ( meaning less reservation is made through air in 2016)
plot.2 = air.reserve %>%
ggplot(aes(reserve_visitors)) +
geom_vline(xintercept = 20, color = "red") +
geom_histogram(fill = "purple", bins = 50) +
scale_x_log10()
plot.2

plot.3 = air.reserve %>%
mutate(wday = wday(visit_datetime, label = TRUE)) %>%
group_by(wday) %>%
summarise(visits = mean(reserve_visitors)) %>%
ggplot(aes(wday, visits, fill = wday)) +
geom_col() +
theme(legend.position = "none", axis.text.x = element_text(angle=45, hjust=1, vjust=0.9)) +
labs(x = "Weekday", y = "Mean reserved visitors through air")
## `summarise()` ungrouping output (override with `.groups` argument)
plot.3

plot.4 = air.reserve %>%
mutate(month = month(visit_datetime, label = TRUE)) %>%
group_by(month) %>%
summarise(visits = median(reserve_visitors)) %>%
ggplot(aes(month, visits, fill = month)) +
geom_col() +
theme(legend.position = "none") +
labs(x = "Month", y = "Median reserved visitors through air")
## `summarise()` ungrouping output (override with `.groups` argument)
plot.4

#Overal wee see that the data for air reserve crrosponds well to data for air visit, eexcept we see air reservatoin could be super busy from Oct to Jun, which is quite differnet from hpg reservation, if we recall that hpg reservatio is popular in March and December only(intersting!)
#newair.reserve = air_reserve %>%
#mutate(visit_hour = hour(visit_datetime),
#diff_hour = time_length(visit_datetime - reserve_datetime, unit = "hour"),)
newair.reserve = air.reserve %>%
mutate(diff_hour = time_length(visit_datetime - reserve_datetime, unit = "hour"),
visit_hour = hour(visit_datetime)
)
p2 = newair.reserve %>%
group_by(visit_hour) %>%
summarise(all_visitors = sum(reserve_visitors)) %>%
ggplot(aes(visit_hour, all_visitors)) +
geom_col(fill = "orange")
## `summarise()` ungrouping output (override with `.groups` argument)
p3 = newair.reserve %>%
filter(diff_hour < 24*5) %>%
group_by(diff_hour) %>%
summarise(all_visitors = sum(reserve_visitors)) %>%
ggplot(aes(diff_hour, all_visitors)) +
geom_col(fill = "orange") +
labs(x = "Time from reservation to visit through air [hours]")
## `summarise()` ungrouping output (override with `.groups` argument)
p2

p3

# We see that still, dinner time is more popular, time difference follow a nice 24-hour visting pattern;thereare extrem case wherethe reservation is made almost a year ago
glimpse(holidays)
## Rows: 517
## Columns: 4
## $ calendar_date <chr> "2016-01-01", "2016-01-02", "2016-01-03", "2016-01-04",…
## $ day_of_week <chr> "Friday", "Saturday", "Sunday", "Monday", "Tuesday", "W…
## $ holiday_flg <lgl> TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FA…
## $ date <date> 2016-01-01, 2016-01-02, 2016-01-03, 2016-01-04, 2016-0…
plot.1 = holidays %>%
ggplot(aes(holiday_flg, fill = holiday_flg)) +
geom_bar() +
theme(legend.position = "none")
plot.1

holidays %>% group_by(holiday_flg) %>% tally()
## # A tibble: 2 x 2
## holiday_flg n
## <lgl> <int>
## 1 FALSE 482
## 2 TRUE 35
#around 7.26% days from training set days are holidays
holidays <- holidays %>%
mutate(holiday_flg = as.logical(holiday_flg),
date = ymd(calendar_date))
Holidays= holidays %>%
mutate(wday = wday(date))
plot.1 = Holidays %>%
filter(date > ymd("2016-04-15") & date < ymd("2016-06-01")) %>%
ggplot(aes(date, holiday_flg, color = holiday_flg)) +
geom_point(size = 2) +
theme(legend.position = "none") +
labs(x = "Holidays for 2016 date")
plot.2 = Holidays %>%
filter(calendar_date > ymd("2017-04-15") & calendar_date < ymd("2017-06-01")) %>%
ggplot(aes(calendar_date, holiday_flg, color = holiday_flg)) +
geom_point(size = 2) +
theme(legend.position = "none") +
labs(x = "Holidays for 2017 date")
plot.2

plot.1

# Here we can councluded that holidays do not have significant impact on varyig the amount of reservation or amount of reservation made through air or hpg, since in total there are same amount of holdays from 2016 to 2017
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
# we observed seasonality for air visits: Mar and Decmber are the most visited month of the year
## Adding Month and Weekday components to Air data set
air.visits <- air.visits %>%
mutate(month = month(visit_date, label = TRUE)) %>%
mutate(wday = wday(visit_date, label = TRUE))
## The inclusion of Year
air2.visits.yr <- air.visits %>%
mutate(year = year(visit_date)) %>%
mutate(month = month(visit_date, label = TRUE)) %>%
mutate(wday = wday(visit_date, label = TRUE))
air1.ts <- ts(air.visits)
model1 <- tslm(visitors ~ month + wday, data=air1.ts)
model1
##
## Call:
## tslm(formula = visitors ~ month + wday, data = air1.ts)
##
## Coefficients:
## (Intercept) month wday
## 17.47673 -0.00231 0.83865
## Inclusion of Year component
air2.visits.yr.ts <- ts(air2.visits.yr)
model2 <- tslm(visitors ~ year + month + wday, data=air2.visits.yr.ts)
model2
##
## Call:
## tslm(formula = visitors ~ year + month + wday, data = air2.visits.yr.ts)
##
## Coefficients:
## (Intercept) year month wday
## 481.60174 -0.23013 -0.02201 0.83893
kable(accuracy(model1))
|
ME
|
RMSE
|
MAE
|
MPE
|
MAPE
|
MASE
|
ACF1
|
Training set
|
0
|
16.67548
|
12.40542
|
-128.4076
|
156.7472
|
1.153626
|
0.5093798
|
kable(accuracy(model2))
|
ME
|
RMSE
|
MAE
|
MPE
|
MAPE
|
MASE
|
ACF1
|
Training set
|
0
|
16.6753
|
12.40535
|
-128.3874
|
156.7267
|
1.153619
|
0.5093626
|
prediction <- testdata %>%
separate(id, c("air", "store_id", "date"), sep = "_") %>%
distinct(date) %>%
nrow()
model3 = auto.arima(air.visits$visitors,ic="aic")
arima_visits <- model3 %>% forecast(h = prediction, level = c(50,95))
kable(accuracy(model3))
|
ME
|
RMSE
|
MAE
|
MPE
|
MAPE
|
MASE
|
ACF1
|
Training set
|
-0.0007324
|
12.49051
|
8.25521
|
-61.74455
|
86.55579
|
0.7676824
|
-0.0002372
|
kable(accuracy(arima_visits))
|
ME
|
RMSE
|
MAE
|
MPE
|
MAPE
|
MASE
|
ACF1
|
Training set
|
-0.0007324
|
12.49051
|
8.25521
|
-61.74455
|
86.55579
|
0.7676824
|
-0.0002372
|
autoplot(arima_visits) + ggtitle("Forecasts from ARIMA")

air.visits.ts = ts(air.visits[,3], start = c(2016),frequency = 12)
model4 = ets(air.visits.ts, model="MAN")
kable(accuracy(model4))
|
ME
|
RMSE
|
MAE
|
MPE
|
MAPE
|
MASE
|
ACF1
|
Training set
|
-0.0009522
|
12.95466
|
8.654044
|
-66.30959
|
91.35238
|
0.7523813
|
0.1166475
|
autoplot(model4) + ggtitle("ETS-MAN model of all visitors through air")

model5 = ets(air.visits.ts, model="MNN")
kable(accuracy(model5))
|
ME
|
RMSE
|
MAE
|
MPE
|
MAPE
|
MASE
|
ACF1
|
Training set
|
-0.0005889
|
12.9546
|
8.6543
|
-66.32081
|
91.36294
|
0.7524035
|
0.1161476
|
autoplot(model5) + ggtitle("ETS-MNN model of all visitors through air")

checkresiduals(model4)

##
## Ljung-Box test
##
## data: Residuals from ETS(M,Ad,N)
## Q* = 17646, df = 19, p-value < 2.2e-16
##
## Model df: 5. Total lags used: 24
checkresiduals(model5)

##
## Ljung-Box test
##
## data: Residuals from ETS(M,N,N)
## Q* = 17655, df = 22, p-value < 2.2e-16
##
## Model df: 2. Total lags used: 24
`