library(forecast)
## Warning: package 'forecast' was built under R version 4.0.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tseries)
## Warning: package 'tseries' was built under R version 4.0.3
library(pastecs)
## Warning: package 'pastecs' was built under R version 4.0.3
library(abind, pos=44)
## Warning: package 'abind' was built under R version 4.0.3
library(TTR)
## Warning: package 'TTR' was built under R version 4.0.3
library( nortest )
## Warning: package 'nortest' was built under R version 4.0.3
library(PerformanceAnalytics)
## Warning: package 'PerformanceAnalytics' was built under R version 4.0.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.0.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.0.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:pastecs':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
library(aTSA)
## Warning: package 'aTSA' was built under R version 4.0.3
##
## Attaching package: 'aTSA'
## The following object is masked from 'package:pastecs':
##
## trend.test
## The following objects are masked from 'package:tseries':
##
## adf.test, kpss.test, pp.test
## The following object is masked from 'package:forecast':
##
## forecast
## The following object is masked from 'package:graphics':
##
## identify
library(FitAR)
## Warning: package 'FitAR' was built under R version 4.0.3
## Loading required package: lattice
## Loading required package: leaps
## Warning: package 'leaps' was built under R version 4.0.3
## Loading required package: ltsa
## Warning: package 'ltsa' was built under R version 4.0.3
## Loading required package: bestglm
## Warning: package 'bestglm' was built under R version 4.0.3
##
## Attaching package: 'FitAR'
## The following object is masked from 'package:forecast':
##
## BoxCox
library("fUnitRoots")
## Warning: package 'fUnitRoots' was built under R version 4.0.3
## Loading required package: timeDate
## Warning: package 'timeDate' was built under R version 4.0.3
##
## Attaching package: 'timeDate'
## The following objects are masked from 'package:PerformanceAnalytics':
##
## kurtosis, skewness
## Loading required package: timeSeries
## Warning: package 'timeSeries' was built under R version 4.0.3
##
## Attaching package: 'timeSeries'
## The following object is masked from 'package:zoo':
##
## time<-
## Loading required package: fBasics
## Warning: package 'fBasics' was built under R version 4.0.3
##
## Attaching package: 'fBasics'
## The following object is masked from 'package:TTR':
##
## volatility
library(fracdiff)
## Warning: package 'fracdiff' was built under R version 4.0.3
library(nnfor)
## Warning: package 'nnfor' was built under R version 4.0.3
library(fpp2)
## Warning: package 'fpp2' was built under R version 4.0.3
## -- Attaching packages ---------------------------------------------- fpp2 2.4 --
## v ggplot2 3.3.2 v expsmooth 2.3
## v fma 2.4
## Warning: package 'ggplot2' was built under R version 4.0.3
## Warning: package 'fma' was built under R version 4.0.3
## Warning: package 'expsmooth' was built under R version 4.0.3
## -- Conflicts ------------------------------------------------- fpp2_conflicts --
## x FitAR::BoxCox() masks forecast::BoxCox()
## x aTSA::forecast() masks forecast::forecast()
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble 3.0.4 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## v purrr 0.3.4
## Warning: package 'tibble' was built under R version 4.0.3
## Warning: package 'tidyr' was built under R version 4.0.3
## Warning: package 'readr' was built under R version 4.0.3
## Warning: package 'purrr' was built under R version 4.0.3
## Warning: package 'dplyr' was built under R version 4.0.3
## Warning: package 'stringr' was built under R version 4.0.3
## Warning: package 'forcats' was built under R version 4.0.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::extract() masks pastecs::extract()
## x dplyr::filter() masks timeSeries::filter(), stats::filter()
## x dplyr::first() masks xts::first(), pastecs::first()
## x dplyr::lag() masks timeSeries::lag(), stats::lag()
## x dplyr::last() masks xts::last(), pastecs::last()
library(magrittr)
## Warning: package 'magrittr' was built under R version 4.0.3
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
## The following object is masked from 'package:pastecs':
##
## extract
library(aTSA)
library(TSstudio)
## Warning: package 'TSstudio' was built under R version 4.0.3
library(rtweet)
## Warning: package 'rtweet' was built under R version 4.0.3
##
## Attaching package: 'rtweet'
## The following object is masked from 'package:TSstudio':
##
## ts_plot
## The following object is masked from 'package:purrr':
##
## flatten
Can <- read.delim("C:/Users/compu lab/Desktop/Can.txt")
Can=Can[-c(121:130),]#for dropping the un-needed na. raws.
head(Can)
tail(Can)
Can[-1]
Can =Can[-1]
class(Can)
## [1] "data.frame"
tsCan = ts(Can, start = c(2009,1), frequency = 12)
tsCan
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 2009 89 76 80 81 86 75 70 81 76 84 73 74
## 2010 86 74 78 79 84 73 69 79 76 83 74 75
## 2011 98 90 95 100 118 98 90 69 87 73 72 47
## 2012 88 73 82 93 99 102 105 112 115 118 121 123
## 2013 96 100 103 107 110 120 123 125 128 130 132 140
## 2014 126 137 105 135 110 116 112 114 115 162 142 128
## 2015 160 130 138 142 120 122 128 133 140 139 134 142
## 2016 115 120 123 145 149 153 157 165 165 160 140 110
## 2017 144 134 135 148 147 144 143 147 151 159 149 143
## 2018 144 135 136 148 147 145 144 148 152 160 149 142
class(tsCan)
## [1] "ts"
typeof(tsCan)
## [1] "integer"
str(tsCan)
## Time-Series [1:120, 1] from 2009 to 2019: 89 76 80 81 86 75 70 81 76 84 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr "number"
tsData=tsCan
head(tsData,10)
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct
## 2009 89 76 80 81 86 75 70 81 76 84
str(tsData)
## Time-Series [1:120, 1] from 2009 to 2019: 89 76 80 81 86 75 70 81 76 84 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr "number"
typeof(tsData)
## [1] "integer"
tsData=ts(tsData)
str(tsData)
## Time-Series [1:120, 1] from 1 to 120: 89 76 80 81 86 75 70 81 76 84 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr "number"
#Nice Visualization
autoplot(tsData) +
geom_line(color = "cyan") +
geom_point(color = "cyan") +
labs(x = NULL, y = NULL,
title = "Monthly Cancer-Numbers 2009-2019",
subtitle = "Data Source:Ministry Of Health Gaza data.") +
theme(
axis.line = element_blank(),
axis.text.x = element_text(color = "white", lineheight = 0.9),
axis.text.y = element_text(color = "white", lineheight = 0.9),
axis.ticks = element_line(color = "white", size = 0.2),
axis.title.x = element_text(color = "white", margin = margin(0, 10, 0, 0)),
axis.title.y = element_text(color = "white", angle = 90, margin = margin(0, 10, 0, 0)),
axis.ticks.length = unit(0.3, "lines"),
legend.background = element_rect(color = NA, fill = " gray10"),
legend.key = element_rect(color = "white", fill = " gray10"),
legend.key.size = unit(1.2, "lines"),
legend.key.height = NULL,
legend.key.width = NULL,
legend.text = element_text(color = "white"),
legend.title = element_text(face = "bold", hjust = 0, color = "white"),
legend.text.align = NULL,
legend.title.align = NULL,
legend.direction = "vertical",
legend.box = NULL,
panel.background = element_rect(fill = "gray10", color = NA),
panel.border = element_blank(),
panel.grid.major = element_line(color = "grey35"),
panel.grid.minor = element_line(color = "grey20"),
panel.spacing = unit(0.5, "lines"),
strip.background = element_rect(fill = "grey30", color = "grey10"),
strip.text.x = element_text(color = "white"),
strip.text.y = element_text(color = "white", angle = -90),
plot.background = element_rect(color = "gray10", fill = "gray10"),
plot.title = element_text(color = "white", hjust = 0, lineheight = 1.25,
margin = margin(2, 2, 2, 2)),
plot.subtitle = element_text(color = "white", hjust = 0, margin = margin(2, 2, 2, 2)),
plot.caption = element_text(color = "white", hjust = 0),
plot.margin = unit(rep(1, 4), "lines"))

plot(tsData)

#from the main
#To get the different in out -put:
tsData = ts(tsData, start = c(2009,1), frequency = 12)
ts_info(tsCan)
## The tsCan series is a ts object with 1 variable and 120 observations
## Frequency: 12
## Start time: 2009 1
## End time: 2018 12
ts_info(tsData)
## The tsData series is a ts object with 1 variable and 120 observations
## Frequency: 12
## Start time: 2009 1
## End time: 2018 12
#convert to time series
plot(tsData)

ts_info(tsData)
## The tsData series is a ts object with 1 variable and 120 observations
## Frequency: 12
## Start time: 2009 1
## End time: 2018 12
ts_info(tsCan)
## The tsCan series is a ts object with 1 variable and 120 observations
## Frequency: 12
## Start time: 2009 1
## End time: 2018 12
ts_decompose(tsData)
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
ts_seasonal(tsData, type = "all")
ts_seasonal(tsData - decompose(tsData)$trend,
type = "all",
title = "Seasonal Plot - tsData (Detrend)")
## Warning: Ignoring 1 observations
## Warning: Ignoring 1 observations
## Warning: Ignoring 1 observations
## Warning: Ignoring 1 observations
## Warning: Ignoring 1 observations
## Warning: Ignoring 1 observations
## Warning: Ignoring 1 observations
## Warning: Ignoring 1 observations
## Warning: Ignoring 1 observations
## Warning: Ignoring 1 observations
## Warning: Ignoring 1 observations
## Warning: Ignoring 1 observations
#Exploratory analysis
#=======================
# The goal of the exploratory analysis is to identify the key characteristics
# of the series with the use of descriptive analysis methods. Throughout this
#process we are mainly want to detect:
# Seasonal pattern - single/multiple seasonal pattern (if exists)
#The trend type - linear/exponential
#1-Structural breaks and outliers
#2-Any other pattern of the series
#3-Those insights provide us with a better understanding of the past
#and can be utilized to forecast the future.
#Decomposing the series
#========================
# We will start with decomposing the series into its three components -
# the trend, seasonal and random components.
#The ts_decompose function provides an interactive inference for the
#decompose function form the stats package:
require(graphics)
#Multiplicative:
#================
decompose_multi<-decompose(tsCan,type='m')
plot(decompose_multi)

#Additive Decomposition:#=======================
x <- ts(tsCan, start = c(2009, 1), end = c(2019, 12), frequency = 12)
tsCan1 <- decompose(x)
ts_decompose(x)
plot(tsCan1)

#Seasonal analysis:
#===================
# You can note from both the series and decompose plots that the series has
#a strong seasonal pattern along with a non-linear trend. We will use the
#ts_seasonal, ts_heatmap and ts_surface functions to explore the seasonal
#pattern of the series:
ts_seasonal(tsCan, type = "all")
ts_seasonal(tsCan - decompose(tsCan)$trend,
type = "all",
title = "Seasonal Plot - tsCan (Detrend)")
## Warning: Ignoring 1 observations
## Warning: Ignoring 1 observations
## Warning: Ignoring 1 observations
## Warning: Ignoring 1 observations
## Warning: Ignoring 1 observations
## Warning: Ignoring 1 observations
## Warning: Ignoring 1 observations
## Warning: Ignoring 1 observations
## Warning: Ignoring 1 observations
## Warning: Ignoring 1 observations
## Warning: Ignoring 1 observations
## Warning: Ignoring 1 observations
#Correlations:
#==============
ts_heatmap(tsCan)
ts_surface(tsCan)
ts_cor(tsCan, lag.max = 36)
#As expected you can notice that the series has a high correlation with its seasonal lags.
#A more intuitive way to review identify the relationship between the series and its lags
#is with the ts_lags function, which provides plots of the series against its lags:
ts_lags(tsCan)
#From the 1st lag you can get that the timeseries is stationary.
#---------------------------------------------------------
# Exporatory analysis summary
#============================
#Here are the key insights we learned from the exploratory analysis process:
# The series has a strong seasonal pattern, no indication for multi-seasonality
#The series trend has a structural break around 2010 and a non-linear growth
#The series has a strong correlation with its seasonal lags
#Forecasting the series
#=======================
library( forecastHybrid)
## Warning: package 'forecastHybrid' was built under R version 4.0.3
## Loading required package: thief
## Warning: package 'thief' was built under R version 4.0.3
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.0.3
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
tsCan_df <- ts_to_prophet(tsCan)
head(tsCan_df)
tsCan_df$flag <- ifelse(year(tsCan_df$ds) >= 2009, 1, 0)
#-------------
# Traditional Approach
#====================
# Set the sample out and forecast horizon
h1 <- 12 # the length of the testing partition
h2 <- 60 # forecast horizon
# Splitting the time series object to training and testing partitions
tsCan_split <- ts_split(tsCan, sample.out = h1)
train <- tsCan_split$train
test <- tsCan_split$test
ts_info(train)
## The train series is a ts object with 1 variable and 108 observations
## Frequency: 12
## Start time: 2009 1
## End time: 2017 12
ts_info(test)
## The test series is a ts object with 1 variable and 12 observations
## Frequency: 12
## Start time: 2018 1
## End time: 2018 12
# Splitting the data.frame object to training and testing partitions
train_df <- tsCan_df[1:(nrow(tsCan_df) - h1), ]
test_df <- tsCan_df[(nrow(tsCan_df) - h1 + 1):nrow(tsCan_df), ]
#Model 1 - ARIMA
#================
set.seed(1234)
library(forecast)
library(plotly)
## Warning: package 'plotly' was built under R version 4.0.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:timeSeries':
##
## filter
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# auto.arima
md1 <- auto.arima(train,
stepwise = FALSE,
approximation = FALSE,
D = 1)
fc1 <- forecast::forecast(md1, h = h1)
accuracy(fc1, test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.1750222 11.82971 8.224938 -1.248665 7.412145 0.5051786
## Test set -13.3563357 13.68902 13.356336 -9.181074 9.181074 0.8203508
## ACF1 Theil's U
## Training set 0.02278905 NA
## Test set 0.23810980 2.065053
# ------------------
# Traditional Approach:
# =====================
# We will use the following three forecasting approaches:
h <- 12
# ARIMA – autoregressive moving average model using the auto.arima function
#ETS (Error, Trend and Seasonal) model - or exponential smoothing state space model with the ets function
# TSLM (Time Series Linear Model) – forecasting with linear regression model using the tslm function
#When running a “horse racing” between forecasting models, it is recommended diversify your modeling approach. The performance of the models may change according to the data structure and by the tuning parameters.
# We will start by spliting the series to training and testing partitions:
split_ts <- ts_split(tsCan, sample.out = h)
train <- split_ts$train
test <- split_ts$test
# Create forecast object
fc <- forecast::forecast(auto.arima(train, lambda = BoxCox.lambda(train)), h = h) # Plot the fitted and forecasted vs the actual values
# ======================================================
test_forecast(actual = tsCan, forecast.obj = fc, test = test)
#It seems like the ARIMA model capture well the change in the
#trend but fail to capture the seasonal peaks. In addition, the
#error rate on the testing set is more than twice than the one
#on the training set. This may indication for overfitting.
# Model 2 - ETS(ANN)
# ==================
# ETS
md2 <- ets(train, opt.crit = "mse")
fc2 <- forecast::forecast(md2, h = h1)
accuracy(fc2, test)
## ME RMSE MAE MPE MAPE MASE
## Training set 1.0333618 12.478354 9.167721 -0.1348013 8.749278 0.5630846
## Test set -0.8526215 6.483249 4.833333 -0.7789156 3.338880 0.2968650
## ACF1 Theil's U
## Training set -0.03252918 NA
## Test set 0.47943286 1.009345
library(forecast)
#data(tsCan)
# Set the horizon of the forecast
h <- 12
# split to training/testing partition
split_ts <- ts_split(tsCan, sample.out = h)
train <- split_ts$train
test <- split_ts$test
# Create forecast object
fc <- forecast::forecast(auto.arima(train, lambda = BoxCox.lambda(train)), h = h)
# Plot the fitted and forecasted vs the actual values
test_forecast(actual = tsCan, forecast.obj = fc, test = test)
#Model 3 - TSLM:
#===============
# Time series linear regression
md3 <- tslm(train ~ season + trend)
fc3 <- forecast::forecast(md3, h = h1)
test_forecast(actual = tsCan, forecast.obj = fc3, test = test)
accuracy(fc3, test)
## ME RMSE MAE MPE MAPE MASE
## Training set -1.447643e-15 12.97400 10.18544 -1.691944 9.974131 0.6255934
## Test set -1.487731e+01 14.94414 14.87731 -10.262473 10.262473 0.9137698
## ACF1 Theil's U
## Training set 0.5278316 NA
## Test set 0.5249287 2.229779
#Not surprisingly, the linear approach seems vary linear and therefore fail to capture the peaks
# and the trend structure. Far behind from the ARIMA and the ETS, the TSLM
#score an error rate of 9.2%
#on the testing set.
#We can try to impose the structural break by adding the flag variable
#we prepared before:
md3a <- tslm(train ~ season + trend)
fc3a <- forecast::forecast(md3a, h = h1)
test_forecast(actual = tsCan, forecast.obj = fc3a, test = test)
accuracy(fc3a, test)
## ME RMSE MAE MPE MAPE MASE
## Training set -1.447643e-15 12.97400 10.18544 -1.691944 9.974131 0.6255934
## Test set -1.487731e+01 14.94414 14.87731 -10.262473 10.262473 0.9137698
## ACF1 Theil's U
## Training set 0.5278316 NA
## Test set 0.5249287 2.229779
# Backtesting approach:
# =====================
# We will use the ts_backtesting to train and test multiple models
#(auto.arima, ets, HoltWinters, nnetar, tbats, from the forecast package, #hybridModel from the forecastHybrid package
# and bsts from the bsts package) over six periods of time (periods = 6). Likewise as we did in the
# traditional approach above, we will set the testing partition to 12 months (h #= h1) and the forecast
#horizon to 60 months (h = h2)
# ---
test_forecast(forecast.obj = fc3a, actual = tsCan, test = test)
md_final <- tslm(tsCan ~ season + trend + I(trend ^ 2))
fc_final <- forecast::forecast(md_final, h = h2)
plot_forecast(fc_final)
# Defining the models and their arguments
methods <- list(ets1 = list(method = "ets",
method_arg = list(opt.crit = "lik"),
notes = "ETS model with opt.crit = lik"),
ets2 = list(method = "ets",
method_arg = list(opt.crit = "amse"),
notes = "ETS model with opt.crit = amse"),
arima1 = list(method = "arima",
method_arg = list(order = c(2,1,0)),
notes = "ARIMA(2,1,0)"),
arima2 = list(method = "arima",
method_arg = list(order = c(2,1,2),
seasonal = list(order = c(1,1,1))),
notes = "SARIMA(2,1,2)(1,1,1)"),
hw = list(method = "HoltWinters",
method_arg = NULL,
notes = "HoltWinters Model"),
tslm = list(method = "tslm",
method_arg = list(formula = input ~ trend + season),
notes = "tslm model with trend and seasonal components"))
# Training the models with backtesting
md <- train_model(input = USgas,
methods = methods,
train_method = list(partitions = 4,
sample.out = 12,
space = 3),
horizon = 12,
error = "MAPE")
## # A tibble: 6 x 7
## model_id model notes avg_mape avg_rmse `avg_coverage_8~ `avg_coverage_9~
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 hw HoltW~ HoltWinte~ 0.0355 111. 0.938 1
## 2 ets1 ets ETS model~ 0.0468 149. 0.833 0.958
## 3 arima2 arima SARIMA(2,~ 0.0475 145. 0.646 0.917
## 4 ets2 ets ETS model~ 0.0555 165. 0.625 0.896
## 5 tslm tslm tslm mode~ 0.0892 253. 0.25 0.625
## 6 arima1 arima ARIMA(2,1~ 0.175 554. 0.854 0.979
# View the model performance on the backtesting partitions
md$leaderboard