library('ggplot2') 
library('scales') 
library('grid') 
library('gridExtra') 
library('RColorBrewer') 
library('corrplot') # visualisation
## corrplot 0.84 loaded
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:scales':
## 
##     alpha, rescale
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library('dplyr')
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library('readr') 
## 
## Attaching package: 'readr'
## The following object is masked from 'package:scales':
## 
##     col_factor
library('data.table') 
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library('tibble') 
library('tidyr') 
library('stringr') 
library('forcats') 


library('ggfortify')
library('ggrepel') 
library('ggridges') 
library('ggExtra') 
library('ggforce') 


library('lazyeval') 
library('broom') 
library('purrr') 
## 
## Attaching package: 'purrr'
## The following objects are masked from 'package:lazyeval':
## 
##     is_atomic, is_formula
## The following object is masked from 'package:data.table':
## 
##     transpose
## The following object is masked from 'package:scales':
## 
##     discard
library('lubridate') # date and time
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library('timeDate') # date and time
library('tseries') # time series analysis
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library('forecast') # time series analysis
## Registered S3 methods overwritten by 'forecast':
##   method                 from     
##   autoplot.Arima         ggfortify
##   autoplot.acf           ggfortify
##   autoplot.ar            ggfortify
##   autoplot.bats          ggfortify
##   autoplot.decomposed.ts ggfortify
##   autoplot.ets           ggfortify
##   autoplot.forecast      ggfortify
##   autoplot.stl           ggfortify
##   autoplot.ts            ggfortify
##   fitted.ar              ggfortify
##   fortify.ts             ggfortify
##   residuals.ar           ggfortify
air.reserve = air_reserve <- read.csv("~/Desktop/recruit-restaurant-visitor-forecasting/air_reserve.csv")
air.visits = air_visit_data <- read.csv("~/Desktop/recruit-restaurant-visitor-forecasting/air_visit_data.csv")
air.store = air_store_info <- read.csv("~/Desktop/recruit-restaurant-visitor-forecasting/air_store_info.csv")
hpg.reserve = hpg_reserve <- read.csv("~/Desktop/recruit-restaurant-visitor-forecasting/hpg_reserve.csv")
hpg.store = hpg_store_info <- read.csv("~/Desktop/recruit-restaurant-visitor-forecasting/hpg_store_info.csv")
holidays = date_info <- read.csv("~/Desktop/recruit-restaurant-visitor-forecasting/date_info.csv")
stord.ids = store_id_relation <- read.csv("~/Desktop/recruit-restaurant-visitor-forecasting/store_id_relation.csv")
testdata = sample_submission <- read.csv("~/Desktop/recruit-restaurant-visitor-forecasting/sample_submission.csv")

sum(is.na(air.reserve))
## [1] 0
sum(is.na(air.visits))
## [1] 0
sum(is.na(air.store))
## [1] 0
sum(is.na(hpg.reserve))
## [1] 0
sum(is.na(hpg.store))
## [1] 0
sum(is.na(holidays))
## [1] 0
sum(is.na(stord.ids))
## [1] 0
sum(is.na(testdata))
## [1] 0
# Non missing value from any data set
describe(air.reserve)
##                   vars     n    mean      sd median trimmed     mad min  max
## air_store_id*        1 92378  151.34   92.55    147  150.44  121.57   1  314
## visit_datetime*      2 92378 2958.96 1188.51   3079 3054.18 1214.25   1 4975
## reserve_datetime*    3 92378 4806.34 1942.51   5075 5004.60 1752.43   1 7513
## reserve_visitors     4 92378    4.48    4.92      3    3.39    1.48   1  100
##                   range  skew kurtosis   se
## air_store_id*       313  0.10    -1.30 0.30
## visit_datetime*    4974 -0.60    -0.39 3.91
## reserve_datetime*  7512 -0.80    -0.19 6.39
## reserve_visitors     99  4.65    33.52 0.02
describe(air.visits)
##               vars      n   mean     sd median trimmed    mad min max range
## air_store_id*    1 252108 413.70 239.70    407  413.21 309.86   1 829   828
## visit_date*      2 252108 286.53 123.15    297  294.11 137.88   1 478   477
## visitors         3 252108  20.97  16.76     17   18.82  13.34   1 877   876
##                skew kurtosis   se
## air_store_id*  0.02    -1.21 0.48
## visit_date*   -0.41    -0.69 0.25
## visitors       3.31    74.26 0.03
describe(air.store)
##                 vars   n   mean     sd median trimmed    mad    min    max
## air_store_id*      1 829 415.00 239.46 415.00  415.00 306.90   1.00 829.00
## air_genre_name*    2 829   6.26   3.13   7.00    6.02   2.97   1.00  14.00
## air_area_name*     3 829  53.21  31.96  57.00   54.27  43.00   1.00 103.00
## latitude           4 829  35.65   2.08  35.66   35.26   0.10  33.21  44.02
## longitude          5 829 137.42   3.65 139.69  137.82   0.20 130.20 144.27
##                  range  skew kurtosis   se
## air_store_id*   828.00  0.00    -1.20 8.32
## air_genre_name*  13.00  0.45    -0.41 0.11
## air_area_name*  102.00 -0.20    -1.30 1.11
## latitude         10.81  2.65     7.44 0.07
## longitude        14.08 -0.93    -0.55 0.13
describe(hpg.reserve)
##                   vars       n    mean      sd median trimmed     mad min   max
## hpg_store_id*        1 2000320 6680.24 3854.37   6683 6684.65 4965.23   1 13325
## visit_datetime*      2 2000320 5635.59 2681.73   6377 5810.69 2830.28   1  9847
## reserve_datetime*    3 2000320 6724.42 3228.43   7537 6912.04 3629.40   1 11450
## reserve_visitors     4 2000320    5.07    5.42      3    3.92    1.48   1   100
##                   range  skew kurtosis   se
## hpg_store_id*     13324 -0.01    -1.21 2.73
## visit_datetime*    9846 -0.50    -0.94 1.90
## reserve_datetime* 11449 -0.44    -0.99 2.28
## reserve_visitors     99  4.60    36.10 0.00
describe(hpg.store)
##                 vars    n    mean      sd  median trimmed     mad    min
## hpg_store_id*      1 4690 2345.50 1354.03 2345.50 2345.50 1738.35   1.00
## hpg_genre_name*    2 4690   14.85    5.08   16.00   14.65    5.93   1.00
## hpg_area_name*     3 4690   65.41   35.89   69.00   66.38   47.44   1.00
## latitude           4 4690   35.81    2.14   35.66   35.38    1.01  33.31
## longitude          5 4690  137.68    3.20  139.50  138.09    0.78 130.34
##                     max   range  skew kurtosis    se
## hpg_store_id*   4690.00 4689.00  0.00    -1.20 19.77
## hpg_genre_name*   34.00   33.00  0.41     0.47  0.07
## hpg_area_name*   119.00  118.00 -0.20    -1.25  0.52
## latitude          43.77   10.46  2.50     6.04  0.03
## longitude        143.71   13.38 -0.98    -0.14  0.05
describe(holidays)
##                vars   n   mean     sd median trimmed    mad min max range skew
## calendar_date*    1 517 259.00 149.39    259     259 191.26   1 517   516 0.00
## day_of_week*      2 517   4.00   2.00      4       4   2.97   1   7     6 0.00
## holiday_flg       3 517   0.07   0.25      0       0   0.00   0   1     1 3.43
##                kurtosis   se
## calendar_date*    -1.21 6.57
## day_of_week*      -1.26 0.09
## holiday_flg        9.79 0.01
describe(stord.ids)
##               vars   n mean    sd median trimmed  mad min max range skew
## air_store_id*    1 150 75.5 43.45   75.5    75.5 55.6   1 150   149    0
## hpg_store_id*    2 150 75.5 43.45   75.5    75.5 55.6   1 150   149    0
##               kurtosis   se
## air_store_id*    -1.22 3.55
## hpg_store_id*    -1.22 3.55
describe(testdata)
##          vars     n  mean      sd median trimmed      mad min   max range skew
## id*         1 32019 16010 9243.23  16010   16010 11868.21   1 32019 32018    0
## visitors    2 32019     0    0.00      0       0     0.00   0     0     0  NaN
##          kurtosis    se
## id*          -1.2 51.66
## visitors      NaN  0.00
air.visits = air.visits %>%
  mutate(visit_date = ymd(visit_date))

air.reserve = air.reserve %>%
  mutate(visit_datetime = ymd_hms(visit_datetime),
         reserve_datetime = ymd_hms(reserve_datetime))

hpg.reserve = hpg.reserve %>%
  mutate(visit_datetime = ymd_hms(visit_datetime),
         reserve_datetime = ymd_hms(reserve_datetime))


#air.store <- air.store %>%
#  mutate(air_genre_name = as.factor(air_genre_name),
#         air_area_name = as.factor(air_area_name))

holidays = holidays %>%
  mutate(holiday_flg = as.logical(holiday_flg),
         date = ymd(calendar_date))



glimpse(air.reserve)
## Rows: 92,378
## Columns: 4
## $ air_store_id     <chr> "air_877f79706adbfb06", "air_db4b38ebe7a7ceff", "air…
## $ visit_datetime   <dttm> 2016-01-01 19:00:00, 2016-01-01 19:00:00, 2016-01-0…
## $ reserve_datetime <dttm> 2016-01-01 16:00:00, 2016-01-01 19:00:00, 2016-01-0…
## $ reserve_visitors <int> 1, 3, 6, 2, 5, 2, 4, 2, 2, 2, 3, 3, 2, 6, 7, 41, 13,…
glimpse(air.visits)
## Rows: 252,108
## Columns: 3
## $ air_store_id <chr> "air_ba937bf13d40fb24", "air_ba937bf13d40fb24", "air_ba9…
## $ visit_date   <date> 2016-01-13, 2016-01-14, 2016-01-15, 2016-01-16, 2016-01…
## $ visitors     <int> 25, 32, 29, 22, 6, 9, 31, 21, 18, 26, 21, 11, 24, 21, 26…
#glimpse(air.store)
glimpse(hpg.reserve)
## Rows: 2,000,320
## Columns: 4
## $ hpg_store_id     <chr> "hpg_c63f6f42e088e50f", "hpg_dac72789163a3f47", "hpg…
## $ visit_datetime   <dttm> 2016-01-01 11:00:00, 2016-01-01 13:00:00, 2016-01-0…
## $ reserve_datetime <dttm> 2016-01-01 09:00:00, 2016-01-01 06:00:00, 2016-01-0…
## $ reserve_visitors <int> 1, 3, 2, 5, 13, 2, 2, 2, 2, 6, 2, 2, 2, 2, 5, 4, 2, …
#glimpse(hpg.store)
glimpse(holidays)
## Rows: 517
## Columns: 4
## $ calendar_date <chr> "2016-01-01", "2016-01-02", "2016-01-03", "2016-01-04",…
## $ day_of_week   <chr> "Friday", "Saturday", "Sunday", "Monday", "Tuesday", "W…
## $ holiday_flg   <lgl> TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FA…
## $ date          <date> 2016-01-01, 2016-01-02, 2016-01-03, 2016-01-04, 2016-0…
glimpse(stord.ids)
## Rows: 150
## Columns: 2
## $ air_store_id <chr> "air_63b13c56b7201bd9", "air_a24bf50c3e90d583", "air_c7f…
## $ hpg_store_id <chr> "hpg_4bc649e72e2a239a", "hpg_c34b496d0305a809", "hpg_cd8…
glimpse(testdata)
## Rows: 32,019
## Columns: 2
## $ id       <chr> "air_00a91d42b08b08d9_2017-04-23", "air_00a91d42b08b08d9_201…
## $ visitors <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
plot.1 =  air.visits %>%
  group_by(visit_date) %>%
  summarise(all_visitors = sum(visitors)) %>%
  ggplot(aes(visit_date,all_visitors)) +
  geom_line(col = "purple") +
  labs(x = "Date of visit", y = "total air visitors")
## `summarise()` ungrouping output (override with `.groups` argument)
plot.2 = air.visits %>%
  ggplot(aes(visitors)) +
  geom_vline(xintercept = 20, color = "red") +
  geom_histogram(fill = "purple", bins = 30) +
  scale_x_log10()

plot.3 = air.visits %>%
  mutate(wday = wday(visit_date, label = TRUE)) %>%
  group_by(wday) %>%
  summarise(visits = mean(visitors)) %>%
  ggplot(aes(wday, visits, fill = wday)) +
  geom_col() +
  theme(legend.position = "none", axis.text.x  = element_text(angle=45, hjust=1, vjust=0.9)) +
  labs(x = "Weekday", y = "Mean visitors")
## `summarise()` ungrouping output (override with `.groups` argument)
plot.4 = air.visits %>%
  mutate(month = month(visit_date, label = TRUE)) %>%
  group_by(month) %>%
  summarise(visits = median(visitors)) %>%
  ggplot(aes(month, visits, fill = month)) +
  geom_col() +
  theme(legend.position = "none") +
  labs(x = "Month", y = "Median visitors")
## `summarise()` ungrouping output (override with `.groups` argument)
plot.1

plot.2

plot.3

plot.4

#Overall, increasing population of air visitors from 2016 to 2017( more people joined air? more restaurant enrolled?)

#maximum amount of visitors is around 20~ 30 perday,Friday ~Sunday are more popular time frame than other weekdays

#March to May and Dec are the most visited months of the year(Holiday season?) observed seasonality here
#here we are seeing the vosulaztion of total visitors who reserved from HPG(grouping by their visiting date)

plot.1 = hpg.reserve %>%
  mutate(month = month(visit_datetime, label = TRUE)) %>%
  group_by(month) %>%
  summarise(all_reserved_visits = median(reserve_visitors)) %>%
  ggplot(aes(month, all_reserved_visits, fill = month)) +
  geom_col() +
  theme(legend.position = "none") +
  labs(x = "Month", y = "Median visitors")
## `summarise()` ungrouping output (override with `.groups` argument)
plot.1

#March and Dec are the most popular months for hpg reserved vistors to visit



plot.2 = hpg.reserve %>%
  mutate(wday = wday(visit_datetime, label = TRUE)) %>%
  group_by(wday) %>%
  summarise(all_reserved_visits = mean(reserve_visitors)) %>%
  ggplot(aes(wday, all_reserved_visits, fill = wday)) +
  geom_col() +
  theme(legend.position = "none", axis.text.x  = element_text(angle=45, hjust=1, vjust=0.9)) +
  labs(x = "Weekday", y = "Mean visitors")
## `summarise()` ungrouping output (override with `.groups` argument)
plot.2

#Wen to Friday are the most visited weekdays( intersting!)


plot.3 = hpg.reserve %>%
  group_by(visit_datetime) %>%
  summarise(all_visitors = sum(reserve_visitors)) %>%
  ggplot(aes(visit_datetime, all_visitors)) +
  geom_col(fill = "red")
## `summarise()` ungrouping output (override with `.groups` argument)
plot.3

# Take a look at if all visitors respond to their reservation from HPG, therefor adding one new varibles to represent time different between visiting and reservation
newhpg <- hpg.reserve %>%
  mutate(respond_hour = time_length(visit_datetime - reserve_datetime, unit = "hour"),
         visit_hour = hour(visit_datetime)
         )

plot.3 <- newhpg %>%
  filter(respond_hour < 24*5) %>%
  group_by(respond_hour) %>%
  summarise(all_visitors = sum(reserve_visitors)) %>%
  ggplot(aes(respond_hour, all_visitors)) +
  geom_col(fill = "orange") +
  labs(x = "Responding time from reservation to visit through hpg [hours]")
## `summarise()` ungrouping output (override with `.groups` argument)
plot.3

# we observed that people usually visite in between 24- 48 hours, consistantly


plot.4 =  newhpg %>%
  group_by(visit_hour) %>%
  summarise(all_visitors = sum(reserve_visitors)) %>%
  ggplot(aes(visit_hour, all_visitors)) +
  geom_col(fill = "blue")
## `summarise()` ungrouping output (override with `.groups` argument)
plot.4

#Observed that dinner time has more constumers than other time frame
plot.1 =  air.reserve %>%
  group_by(visit_datetime) %>%
  summarise(all_visitors = sum(reserve_visitors)) %>%
  ggplot(aes(visit_datetime, all_visitors)) +
  geom_line(col = "purple") +
  labs(x = "Date of reservation", y = "total air reservation")
## `summarise()` ungrouping output (override with `.groups` argument)
plot.1

#respond well to the plt for air.visit, a general increase from 2016 to 2017 is observed ( meaning less reservation is made through air in 2016)

plot.2 = air.reserve %>%
  ggplot(aes(reserve_visitors)) +
  geom_vline(xintercept = 20, color = "red") +
  geom_histogram(fill = "purple", bins = 50) +
  scale_x_log10()
plot.2

plot.3 = air.reserve %>%
  mutate(wday = wday(visit_datetime, label = TRUE)) %>%
  group_by(wday) %>%
  summarise(visits = mean(reserve_visitors)) %>%
  ggplot(aes(wday, visits, fill = wday)) +
  geom_col() +
  theme(legend.position = "none", axis.text.x  = element_text(angle=45, hjust=1, vjust=0.9)) +
  labs(x = "Weekday", y = "Mean reserved visitors through air")
## `summarise()` ungrouping output (override with `.groups` argument)
plot.3

plot.4 = air.reserve %>%
  mutate(month = month(visit_datetime, label = TRUE)) %>%
  group_by(month) %>%
  summarise(visits = median(reserve_visitors)) %>%
  ggplot(aes(month, visits, fill = month)) +
  geom_col() +
  theme(legend.position = "none") +
  labs(x = "Month", y = "Median reserved visitors through air")
## `summarise()` ungrouping output (override with `.groups` argument)
plot.4 

#Overal wee see that the data for air reserve crrosponds well to data for air visit, eexcept we see air reservatoin could be super busy from Oct to Jun, which is quite differnet from hpg reservation, if we recall that hpg reservatio is popular in March and December only(intersting!)
#newair.reserve = air_reserve %>%
  #mutate(visit_hour = hour(visit_datetime),
         #diff_hour = time_length(visit_datetime - reserve_datetime, unit = "hour"),)

newair.reserve = air.reserve %>%
  mutate(diff_hour = time_length(visit_datetime - reserve_datetime, unit = "hour"),
         visit_hour = hour(visit_datetime)
         )



p2 = newair.reserve %>%
  group_by(visit_hour) %>%
  summarise(all_visitors = sum(reserve_visitors)) %>%
  ggplot(aes(visit_hour, all_visitors)) +
  geom_col(fill = "orange")
## `summarise()` ungrouping output (override with `.groups` argument)
p3 = newair.reserve %>%
  filter(diff_hour < 24*5) %>%
  group_by(diff_hour) %>%
  summarise(all_visitors = sum(reserve_visitors)) %>%
  ggplot(aes(diff_hour, all_visitors)) +
  geom_col(fill = "orange") +
  labs(x = "Time from reservation to visit through air [hours]")
## `summarise()` ungrouping output (override with `.groups` argument)
p2

p3

# We see that still, dinner time is more popular, time difference follow a nice 24-hour visting pattern;thereare extrem case wherethe reservation is made almost a year ago
glimpse(holidays)
## Rows: 517
## Columns: 4
## $ calendar_date <chr> "2016-01-01", "2016-01-02", "2016-01-03", "2016-01-04",…
## $ day_of_week   <chr> "Friday", "Saturday", "Sunday", "Monday", "Tuesday", "W…
## $ holiday_flg   <lgl> TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FA…
## $ date          <date> 2016-01-01, 2016-01-02, 2016-01-03, 2016-01-04, 2016-0…
plot.1 = holidays %>%
  ggplot(aes(holiday_flg, fill = holiday_flg)) +
  geom_bar() +
  theme(legend.position = "none")

plot.1

holidays %>% group_by(holiday_flg) %>% tally()
## # A tibble: 2 x 2
##   holiday_flg     n
##   <lgl>       <int>
## 1 FALSE         482
## 2 TRUE           35
#around 7.26% days from training set days are holidays
holidays <- holidays %>%
  mutate(holiday_flg = as.logical(holiday_flg),
         date = ymd(calendar_date))

Holidays= holidays %>%
  mutate(wday = wday(date))

plot.1 = Holidays %>%
  filter(date > ymd("2016-04-15") & date < ymd("2016-06-01")) %>%
  ggplot(aes(date, holiday_flg, color = holiday_flg)) +
  geom_point(size = 2) +
  theme(legend.position = "none") +
  labs(x = "Holidays for 2016 date")

plot.2 = Holidays %>%
  filter(calendar_date > ymd("2017-04-15") & calendar_date < ymd("2017-06-01")) %>%
  ggplot(aes(calendar_date, holiday_flg, color = holiday_flg)) +
  geom_point(size = 2) +
  theme(legend.position = "none") +
   labs(x = "Holidays for 2017 date")





plot.2

plot.1

# Here we can councluded that holidays do not have significant impact on varyig the amount of reservation or amount of reservation made through air or hpg, since in total there are same amount of holdays from 2016 to 2017
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
# we observed seasonality for air visits: Mar and Decmber are the most visited month of the year
## Adding Month and Weekday components to Air data set
air.visits <-  air.visits %>%
  mutate(month = month(visit_date, label = TRUE)) %>%
  mutate(wday = wday(visit_date, label = TRUE))

## The inclusion of Year
air2.visits.yr <-  air.visits %>%
  mutate(year = year(visit_date)) %>%
  mutate(month = month(visit_date, label = TRUE)) %>%
  mutate(wday = wday(visit_date, label = TRUE))
air1.ts <- ts(air.visits)
model1 <- tslm(visitors ~ month + wday, data=air1.ts)
model1
## 
## Call:
## tslm(formula = visitors ~ month + wday, data = air1.ts)
## 
## Coefficients:
## (Intercept)        month         wday  
##    17.47673     -0.00231      0.83865
## Inclusion of Year component
air2.visits.yr.ts <- ts(air2.visits.yr)
model2 <- tslm(visitors ~ year + month + wday, data=air2.visits.yr.ts)
model2
## 
## Call:
## tslm(formula = visitors ~ year + month + wday, data = air2.visits.yr.ts)
## 
## Coefficients:
## (Intercept)         year        month         wday  
##   481.60174     -0.23013     -0.02201      0.83893
kable(accuracy(model1))
ME RMSE MAE MPE MAPE MASE ACF1
Training set 0 16.67548 12.40542 -128.4076 156.7472 1.153626 0.5093798
kable(accuracy(model2))
ME RMSE MAE MPE MAPE MASE ACF1
Training set 0 16.6753 12.40535 -128.3874 156.7267 1.153619 0.5093626
prediction <- testdata %>%
  separate(id, c("air", "store_id", "date"), sep = "_") %>%
  distinct(date) %>%
  nrow()



model3 = auto.arima(air.visits$visitors,ic="aic")
arima_visits <- model3 %>% forecast(h = prediction, level = c(50,95))

kable(accuracy(model3))
ME RMSE MAE MPE MAPE MASE ACF1
Training set -0.0007324 12.49051 8.25521 -61.74455 86.55579 0.7676824 -0.0002372
kable(accuracy(arima_visits))
ME RMSE MAE MPE MAPE MASE ACF1
Training set -0.0007324 12.49051 8.25521 -61.74455 86.55579 0.7676824 -0.0002372
autoplot(arima_visits) + ggtitle("Forecasts from ARIMA")

air.visits.ts = ts(air.visits[,3], start = c(2016),frequency = 12)

model4 = ets(air.visits.ts, model="MAN")
kable(accuracy(model4)) 
ME RMSE MAE MPE MAPE MASE ACF1
Training set -0.0009522 12.95466 8.654044 -66.30959 91.35238 0.7523813 0.1166475
autoplot(model4) + ggtitle("ETS-MAN model of all visitors through air")

model5 = ets(air.visits.ts, model="MNN")
kable(accuracy(model5))
ME RMSE MAE MPE MAPE MASE ACF1
Training set -0.0005889 12.9546 8.6543 -66.32081 91.36294 0.7524035 0.1161476
autoplot(model5) + ggtitle("ETS-MNN model of all visitors through air")

checkresiduals(model4)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(M,Ad,N)
## Q* = 17646, df = 19, p-value < 2.2e-16
## 
## Model df: 5.   Total lags used: 24
checkresiduals(model5)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(M,N,N)
## Q* = 17655, df = 22, p-value < 2.2e-16
## 
## Model df: 2.   Total lags used: 24

`