library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.8
## ✓ tidyr 1.2.0 ✓ stringr 1.4.0
## ✓ readr 2.1.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(reshape)
##
## Attaching package: 'reshape'
## The following object is masked from 'package:lubridate':
##
## stamp
## The following object is masked from 'package:data.table':
##
## melt
## The following object is masked from 'package:dplyr':
##
## rename
## The following objects are masked from 'package:tidyr':
##
## expand, smiths
library(broom)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:reshape':
##
## rename
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(tidyr)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(ggplot2)
library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✓ tsibble 1.1.1 ✓ feasts 0.2.2
## ✓ tsibbledata 0.4.0 ✓ fable 0.3.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## x data.table::between() masks dplyr::between()
## x lubridate::date() masks base::date()
## x reshape::expand() masks tidyr::expand()
## x plotly::filter() masks dplyr::filter(), stats::filter()
## x data.table::first() masks dplyr::first()
## x fabletools::forecast() masks forecast::forecast()
## x lubridate::hour() masks data.table::hour()
## x tsibble::intersect() masks base::intersect()
## x tsibble::interval() masks lubridate::interval()
## x lubridate::isoweek() masks data.table::isoweek()
## x tsibble::key() masks data.table::key()
## x dplyr::lag() masks stats::lag()
## x data.table::last() masks dplyr::last()
## x caret::lift() masks purrr::lift()
## x fabletools::MAE() masks caret::MAE()
## x lubridate::mday() masks data.table::mday()
## x lubridate::minute() masks data.table::minute()
## x lubridate::month() masks data.table::month()
## x lubridate::quarter() masks data.table::quarter()
## x plotly::rename() masks reshape::rename(), dplyr::rename()
## x fabletools::RMSE() masks caret::RMSE()
## x lubridate::second() masks data.table::second()
## x tsibble::setdiff() masks base::setdiff()
## x reshape::stamp() masks lubridate::stamp()
## x data.table::transpose() masks purrr::transpose()
## x tsibble::union() masks base::union()
## x lubridate::wday() masks data.table::wday()
## x lubridate::week() masks data.table::week()
## x lubridate::yday() masks data.table::yday()
## x lubridate::year() masks data.table::year()
library(fma)
library(readxl)
library(tsibble)
library(feasts)
library(seasonal)
##
## Attaching package: 'seasonal'
## The following object is masked from 'package:tibble':
##
## view
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:seasonal':
##
## outlier
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(MASS)
##
## Attaching package: 'MASS'
## The following objects are masked from 'package:fma':
##
## cement, housing, petrol
## The following object is masked from 'package:plotly':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
library(neuralnet)
##
## Attaching package: 'neuralnet'
## The following object is masked from 'package:dplyr':
##
## compute
library(dplyr)
library(vars)
## Loading required package: strucchange
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following object is masked from 'package:tsibble':
##
## index
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
##
## Attaching package: 'strucchange'
## The following object is masked from 'package:stringr':
##
## boundary
## Loading required package: urca
## Loading required package: lmtest
##
## Attaching package: 'vars'
## The following object is masked from 'package:fable':
##
## VAR
library(expsmooth)
###Import data###
air_visit_data <- read.csv("~/Desktop/recruit-restaurant-visitor-forecasting/air_visit_data.csv")
air_reserve <- read.csv("~/Desktop/recruit-restaurant-visitor-forecasting/air_reserve.csv")
air_store_info <- read.csv("~/Desktop/recruit-restaurant-visitor-forecasting/air_store_info.csv")
hpg_reserve <- read.csv("~/Desktop/recruit-restaurant-visitor-forecasting/hpg_reserve.csv")
hpg_store_info <- read.csv("~/Desktop/recruit-restaurant-visitor-forecasting/hpg_store_info.csv")
store_id_relation <- read.csv("~/Desktop/recruit-restaurant-visitor-forecasting/store_id_relation.csv")
date_info <- read.csv("~/Desktop/recruit-restaurant-visitor-forecasting/date_info.csv")
sample_submission <- read.csv("~/Desktop/recruit-restaurant-visitor-forecasting/sample_submission.csv")
summary(air_visit_data)
## air_store_id visit_date visitors
## Length:252108 Length:252108 Min. : 1.00
## Class :character Class :character 1st Qu.: 9.00
## Mode :character Mode :character Median : 17.00
## Mean : 20.97
## 3rd Qu.: 29.00
## Max. :877.00
summary(air_reserve)
## air_store_id visit_datetime reserve_datetime reserve_visitors
## Length:92378 Length:92378 Length:92378 Min. : 1.000
## Class :character Class :character Class :character 1st Qu.: 2.000
## Mode :character Mode :character Mode :character Median : 3.000
## Mean : 4.482
## 3rd Qu.: 5.000
## Max. :100.000
summary(air_store_info)
## air_store_id air_genre_name air_area_name latitude
## Length:829 Length:829 Length:829 Min. :33.21
## Class :character Class :character Class :character 1st Qu.:34.70
## Mode :character Mode :character Mode :character Median :35.66
## Mean :35.65
## 3rd Qu.:35.69
## Max. :44.02
## longitude
## Min. :130.2
## 1st Qu.:135.3
## Median :139.7
## Mean :137.4
## 3rd Qu.:139.8
## Max. :144.3
summary(hpg_reserve)
## hpg_store_id visit_datetime reserve_datetime reserve_visitors
## Length:2000320 Length:2000320 Length:2000320 Min. : 1.000
## Class :character Class :character Class :character 1st Qu.: 2.000
## Mode :character Mode :character Mode :character Median : 3.000
## Mean : 5.074
## 3rd Qu.: 6.000
## Max. :100.000
summary(hpg_store_info)
## hpg_store_id hpg_genre_name hpg_area_name latitude
## Length:4690 Length:4690 Length:4690 Min. :33.31
## Class :character Class :character Class :character 1st Qu.:34.69
## Mode :character Mode :character Mode :character Median :35.66
## Mean :35.81
## 3rd Qu.:35.70
## Max. :43.77
## longitude
## Min. :130.3
## 1st Qu.:135.5
## Median :139.5
## Mean :137.7
## 3rd Qu.:139.7
## Max. :143.7
summary(store_id_relation)
## air_store_id hpg_store_id
## Length:150 Length:150
## Class :character Class :character
## Mode :character Mode :character
summary(date_info)
## calendar_date day_of_week holiday_flg
## Length:517 Length:517 Min. :0.0000
## Class :character Class :character 1st Qu.:0.0000
## Mode :character Mode :character Median :0.0000
## Mean :0.0677
## 3rd Qu.:0.0000
## Max. :1.0000
summary(sample_submission)
## id visitors
## Length:32019 Min. :0
## Class :character 1st Qu.:0
## Mode :character Median :0
## Mean :0
## 3rd Qu.:0
## Max. :0
###Processing the dataframe above###
After looking at the sample_submission dataset, I found that both the id and the date exist in the id column, so I separate id into air_store_id and visit_date
sample_submission <- sample_submission %>%
mutate(air_store_id = str_sub(id,1,20),
visit_date = ymd(str_sub(id,22,31))) %>% dplyr::select(-id)
Reform air_visit_date into ymd date format and log1p transformed visitors. For real-valued input, log1p is accurate also for x so small that 1 + x == 1 in floating-point accuracy.
air_visit_data <- air_visit_data %>%
mutate(visit_date = ymd(visit_date),
visitors = log1p(visitors))
Adding visit date & reserve date to hpg_reserve & air_reserve datasets
air_reserve <- air_reserve %>%
mutate(visit_datetime = ymd_hms(visit_datetime),
reserve_datetime = ymd_hms(reserve_datetime),
visit_date = date(visit_datetime),
reserve_date = date(reserve_datetime))
hpg_reserve <- hpg_reserve %>%
mutate(visit_datetime = ymd_hms(visit_datetime),
reserve_datetime = ymd_hms(reserve_datetime),
visit_date = date(visit_datetime),
reserve_date = date(reserve_datetime))
Derive prefecture from air_area_name column in air_store_info dataset
air_store_info <- air_store_info %>%
separate(air_area_name, c("prefecture"), sep = " ", remove = FALSE)
## Warning: Expected 1 pieces. Additional pieces discarded in 829 rows [1, 2, 3, 4,
## 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
Encoding day of week Mon-Sun as 1-7 and adding flags for weekend or holiday in date_info dataset. 0: represent weekday. 1:represent weekend or holiday.
date_info <- date_info %>%
dplyr::select(-day_of_week) %>%
mutate(calendar_date = ymd(calendar_date),
dow = wday(calendar_date, label=TRUE),
dow = fct_relevel(dow, c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")),
dow = as.numeric(dow),
weekend_or_holiday_flg = ifelse(dow > 5 | holiday_flg == 1, 1, 0))
Row binding train data (air_visit_data,1/1/2016-4/22/2017) and test data (sample_submission,4/23/2017-5/31/2017)
data <- bind_rows(air_visit_data, sample_submission) %>%
mutate(dow = wday(visit_date, label=TRUE),
dow = fct_relevel(dow, c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")),
dow = as.numeric(dow),
month = month(visit_date),
year = year(visit_date))
Joining air_store_info and date_info (4/23/2017-5/31/2017) with data
data <- data %>%
left_join(air_store_info, by = "air_store_id") %>%
left_join(date_info, by = c("visit_date" = "calendar_date", "dow"))
###Variables Relationship Exploring & Visualization###
#Sum log1p(visitors) over time for 50 stores.
p1 <- air_visit_data %>%
filter(air_store_id %in% unique(.$air_store_id)[sample(1:829, 50)]) %>%
group_by(visit_date) %>%
summarise(all_visitors = sum(visitors)) %>%
ggplot(aes(visit_date,all_visitors)) +
geom_line(col = "black") +
labs(x = "Date", y = "log1p(visitors)")
p1
A prominent weekly pattern
#log1p(visitors) when grouped by day-of-week, genre, and holiday&weekends. # Distribution of log1p(visitors) by day-of-week
p2 <- air_visit_data %>%
mutate(day_of_week = wday(visit_date, label = TRUE)) %>%
ggplot(aes(x = reorder(day_of_week, visitors, FUN = median), y = visitors,
fill = day_of_week)) +
geom_boxplot() +
labs(x = "Day of week", y = "log1p(visitors)") +
theme(legend.position = "none")
ggplotly(p2)
Fridays and Sundays are more popular
p3 <- air_visit_data %>%
left_join(air_store_info, by = "air_store_id") %>%
ggplot(aes(x = reorder(air_genre_name, visitors, FUN = median), y = visitors,
fill = air_genre_name)) +
geom_boxplot() +
labs(x = "Genre", y = "log1p(visitors)") +
theme(legend.position = "none") +
coord_flip()
ggplotly(p3)
Asian most popular, Bar least popular
#Relation between actual visitors log1p(visitors) visited and log1p(reserve_visitors) and reserve visitors log1p(reserve_visitors) in Air system # Scatterplot of air visit vs air reserve
p4 <- air_reserve %>%
group_by(air_store_id, visit_date) %>%
summarise(reserve_visitors = log1p(sum(reserve_visitors))) %>%
inner_join(air_visit_data, by = c("air_store_id", "visit_date")) %>%
ggplot(aes(x = visitors, y = reserve_visitors)) +
geom_point(color = "black", alpha = 0.3, size = 2) +
geom_smooth(method = "loess", color = "blue") +
labs(x = "log1p(visitors)", y = "log1p(air_reserve_visitors)")
## `summarise()` has grouped output by 'air_store_id'. You can override using the
## `.groups` argument.
ggplotly(p4)
## `geom_smooth()` using formula 'y ~ x'
###Preparation for Building Models###
We have to forecast the visitors from date “23-04-2017” to “31-05-2017” (that is, for 39 consecutive days). A forecast window (FW) is defined as time frame for which predictions are made and feature derivation window (FDW) is defined as time frame that is prior in time to the FW and is used for deriving features from the target variable or any of its covariates. The data used for modelling is prepared by row binding the dataframes formed by sliding the FW. We generate data by using fixed size FW of 39 days silding by 7 days and variable size FW of 7, 14, 21, 28 and 35 days just before “23-04-2017”.
# Function to get chunk
get_chunk <- function(chunk_start_date, ndays){
chunk_end_date <- chunk_start_date + ndays
chunk <- data %>%
filter(visit_date < chunk_end_date & visit_date >= chunk_start_date) %>%
mutate(diff_from_chunk_start_date = time_length(visit_date - chunk_start_date, unit = "day"))
}
# Function to get visitors stats based on prior duration of chunk grouped by air_store_id
get_store_visitor_stats <- function(chunk, chunk_start_date, ndays){
prior_start_date <- ymd(chunk_start_date - ndays)
temp <- data %>%
filter(visit_date < chunk_start_date & visit_date >= prior_start_date) %>%
group_by(air_store_id) %>%
summarise(mean = mean(visitors),
median = median(visitors),
max = max(visitors),
min = min(visitors),
sd = sd(visitors),
skewness = e1071::skewness(visitors),
count = n()) %>%
plyr::rename(c(mean = paste0("store_mean_", ndays,"_days_prior"),
median = paste0("store_median_", ndays,"_days_prior"),
max = paste0("store_max_", ndays,"_days_prior"),
min = paste0("store_min_", ndays,"_days_prior"),
sd = paste0("store_sd_", ndays,"_days_prior"),
skewness = paste0("store_skewness_", ndays,"_days_prior"),
count = paste0("store_count_", ndays,"_days_prior")))
chunk = chunk %>% left_join(temp, by = "air_store_id")
chunk[is.na(chunk)] <- 0
chunk
}
# Function to get chunk and features based on days prior to chunk interval
get_chunk_and_features <- function(chunk_start_date, ndays){
chunk <- get_chunk(chunk_start_date, ndays)
chunk <- get_store_visitor_stats(chunk,chunk_start_date,500)
chunk <- get_store_visitor_stats(chunk,chunk_start_date,56)
chunk <- get_store_visitor_stats(chunk,chunk_start_date,28)
chunk <- get_store_visitor_stats(chunk,chunk_start_date,14)
chunk
}
train <- data.frame()
start_date = ymd("2017-03-12")
for(i in 0:57){
train_sub <- get_chunk_and_features(start_date - (i*7), 39)
train <- train %>% bind_rows(train_sub)
}
for(i in 1:5){
train_sub <- get_chunk_and_features(start_date + (i*7), 42 - (i*7))
train <- train %>% bind_rows(train_sub)
}
test <- get_chunk_and_features(start_date + 42, 39)
comb <- train %>% bind_rows(test) %>%
mutate(air_genre_name = as.numeric(as.factor(air_genre_name)),
air_area_name = as.numeric(as.factor(air_area_name)),
prefecture = as.numeric(as.factor(prefecture)))
train <- comb[1:nrow(train), ]
test <- comb[-(1:nrow(train)), ]
rm(comb)
data_ts <- ts(data$visitors,start=c(2016,1),end=c(2017,5), frequency=12)
data_ts %>% autoplot() + labs(title="Numbers of log1p(Visitors) in data ") +
ylab("log1p(Visitors)") +
guides(colour=guide_legend(title="index"))
train_ts <- ts(train$visitors,start=c(2016,1),end=c(2017,4), frequency=12)
train_ts %>% autoplot() + labs(title="Numbers of log1p visitors in train data ") +
ylab("log1p visitors") + xlab("Date")
train_ts%>%as_tsibble() %>%
gg_tsdisplay(plot_type='partial')
## Plot variable not specified, automatically selected `y = value`
acf(train_ts)
pacf(train_ts)
###Building Models###
###Arima Model###(Half success)
f <- train_ts %>% as_tsibble() %>%
model(arima210 = ARIMA(visitors ~ pdq(2,1,0)),
arima111 = ARIMA(visitors ~ pdq(1,1,1)),
arima011 = ARIMA(visitors ~ pdq(0,1,1)),
arima012 = ARIMA(visitors ~ pdq(0,1,2)),
arima013 = ARIMA(visitors ~ pdq(0,1,3)),
arima000 = ARIMA(visitors ~ pdq(0,0,0)),
auto = ARIMA(visitors, stepwise = FALSE, approx = FALSE), #By setting stepwise=FALSE and approximation=FALSE, we are making R work extra hard to find a good model. This takes much longer, but with only one series to model, the extra time taken is not a problem.
stepwise = ARIMA(visitors),
search = ARIMA(visitors, stepwise=FALSE))
f %>% pivot_longer(everything(), names_to = "Model name",
values_to = "Orders")
## # A mable: 9 x 2
## # Key: Model name [9]
## `Model name` Orders
## <chr> <model>
## 1 arima210 <ARIMA(2,1,0)(0,1,0)[12]>
## 2 arima111 <ARIMA(1,1,1)(0,1,0)[12]>
## 3 arima011 <ARIMA(0,1,1)(0,1,0)[12]>
## 4 arima012 <ARIMA(0,1,2)(0,1,0)[12]>
## 5 arima013 <ARIMA(0,1,3)(0,1,0)[12]>
## 6 arima000 <ARIMA(0,0,0)(0,1,0)[12] w/ drift>
## 7 auto <ARIMA(3,0,1)(0,1,0)[12] w/ drift>
## 8 stepwise <ARIMA(1,0,1)(0,1,0)[12] w/ drift>
## 9 search <ARIMA(2,0,3)(0,1,0)[12] w/ drift>
glance(f) %>% arrange(AICc) %>% dplyr::select(.model:BIC)
## # A tibble: 9 × 6
## .model sigma2 log_lik AIC AICc BIC
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 auto 390. -1002. 2016. 2016. 2037.
## 2 search 389. -1001. 2016. 2017. 2040.
## 3 arima111 416. -1008. 2021. 2021. 2032.
## 4 stepwise 405. -1007. 2021. 2022. 2035.
## 5 arima013 431. -1009. 2027. 2027. 2040.
## 6 arima012 438. -1012. 2029. 2029. 2039.
## 7 arima011 440. -1013. 2029. 2029. 2036.
## 8 arima210 446. -1013. 2033. 2033. 2043.
## 9 arima000 732. -1075. 2154. 2154. 2161.
#auto arima performs best, smallest AICc.
fit_arima <- train_ts %>% as_tsibble() %>%
model(auto = ARIMA(visitors, stepwise = FALSE, approx = FALSE))
report(fit_arima)
## Series: visitors
## Model: ARIMA(3,0,1)(0,1,0)[12] w/ drift
##
## Coefficients:
## ar1 ar2 ar3 ma1 constant
## -0.4095 0.6526 0.2237 0.9784 9.7176
## s.e. 0.0668 0.0568 0.0660 0.0195 2.5230
##
## sigma^2 estimated as 390.5: log likelihood=-1001.97
## AIC=2015.95 AICc=2016.33 BIC=2036.52
checkresiduals(train_ts)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.
fc_arima <- forecast(fit_arima, h=39) %>%
filter(.model=='auto') %>%
autoplot(visitors) +
labs(title = "ARIMA(3,0,1)(0,1,0) Prediction",
x = "Date", y = "Log1p(Visitors)")
fc_arima
###Neural Nets Model###(Half success)
fitnn <- train_ts %>% as_tsibble() %>%
model(NNETAR(sqrt(visitors)))
report(fitnn)
## Series: visitors
## Model: NNAR(1,1,2)[12]
## Transformation: sqrt(visitors)
##
## Average of 20 networks, each of which is
## a 2-2-1 network with 9 weights
## options were - linear output units
##
## sigma^2 estimated as 0.4707
fc_nn <- forecast(fitnn, h=39) %>%
autoplot(visitors)+
labs(title = "NNAR(1,1,2) Prediction",
y = "log1p visitors")
fc_nn
###TSLM Model###(Success)
lm_model <- lm(visitors ~ month + dow + weekend_or_holiday_flg, data=train)
lm_model
##
## Call:
## lm(formula = visitors ~ month + dow + weekend_or_holiday_flg,
## data = train)
##
## Coefficients:
## (Intercept) month dow
## 2.5657309 -0.0003705 0.0519277
## weekend_or_holiday_flg
## 0.0852223
summary(lm_model)
##
## Call:
## lm(formula = visitors ~ month + dow + weekend_or_holiday_flg,
## data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.3209 -0.5202 0.0915 0.5961 4.0572
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.5657309 0.0021380 1200.071 <2e-16 ***
## month -0.0003705 0.0001931 -1.918 0.0551 .
## dow 0.0519277 0.0004951 104.886 <2e-16 ***
## weekend_or_holiday_flg 0.0852223 0.0021069 40.449 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7993 on 1322333 degrees of freedom
## Multiple R-squared: 0.02685, Adjusted R-squared: 0.02685
## F-statistic: 1.216e+04 on 3 and 1322333 DF, p-value: < 2.2e-16
accuracy(lm_model)
## ME RMSE MAE MPE MAPE MASE
## Training set 3.501226e-16 0.7993134 0.6444566 -13.84612 31.76349 0.9849236
tts <- ts(train)
tslm_model <- tslm(visitors ~ month + dow + weekend_or_holiday_flg, data=tts)
tslm_model
##
## Call:
## tslm(formula = visitors ~ month + dow + weekend_or_holiday_flg,
## data = tts)
##
## Coefficients:
## (Intercept) month dow
## 2.5657309 -0.0003705 0.0519277
## weekend_or_holiday_flg
## 0.0852223
summary(tslm_model)
##
## Call:
## tslm(formula = visitors ~ month + dow + weekend_or_holiday_flg,
## data = tts)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.3209 -0.5202 0.0915 0.5961 4.0572
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.5657309 0.0021380 1200.071 <2e-16 ***
## month -0.0003705 0.0001931 -1.918 0.0551 .
## dow 0.0519277 0.0004951 104.886 <2e-16 ***
## weekend_or_holiday_flg 0.0852223 0.0021069 40.449 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7993 on 1322333 degrees of freedom
## Multiple R-squared: 0.02685, Adjusted R-squared: 0.02685
## F-statistic: 1.216e+04 on 3 and 1322333 DF, p-value: < 2.2e-16
accuracy(tslm_model)
## ME RMSE MAE MPE MAPE MASE
## Training set 3.501226e-16 0.7993134 0.6444566 -13.84612 31.76349 1.082882
## ACF1
## Training set 0.507867
fit_tslm <- tslm(train_ts ~ trend + season)
accuracy(fit_tslm)
## ME RMSE MAE MPE MAPE MASE
## Training set -3.122502e-17 0.1472828 0.1002915 -0.3533385 4.098759 0.25
## ACF1
## Training set -0.799346
autoplot(forecast(fit_tslm, h=5)) + labs(y = "log1p(visitors)")
fit_trends1 <- tslm(log(train_ts) ~ trend + season)
accuracy(fit_trends1)
## ME RMSE MAE MPE MAPE MASE
## Training set 1.040834e-17 0.05910358 0.03952886 -0.4537455 4.571334 0.25
## ACF1
## Training set -0.7929404
autoplot(forecast(fit_trends1, h=5)) + labs(y = "exponential visitors")
#The fitted exponential trend and forecasting, best.
ETS MODEL FOR FURTHER EXPLORING () ################################################################################ETS fit_ets <- ets(train$visitors) fit_ets plot(fit_ets) accuracy(fit_ets)
fit_ets %>% gg_tsresiduals(lag_max = 16)
augment(fit_ets) %>% features(.innov, ljung_box, lag = 16, dof = 6)
forecast(fit_ets, h=5) %>% #filter(.model==‘auto’) %>% autoplot(visitors)+ labs(title = “ETS(A,N,N) Prediction”, y = “log1p visitors”) fc_ets <- forecast(fit_ets,h = 39) autoplot(fc_ets, train)+ labs(title = “ETS(A,N,N) Prediction: Cement production in US”, y = “Index”) ##############################################################################