GAZA-CANCER (ARIMA ANN)2009-2218

library(forecast)

## Warning: package 'forecast' was built under R version 4.0.3

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(tseries)

## Warning: package 'tseries' was built under R version 4.0.3

library(pastecs)

## Warning: package 'pastecs' was built under R version 4.0.3

library(abind, pos=44)

## Warning: package 'abind' was built under R version 4.0.3

library(TTR)

## Warning: package 'TTR' was built under R version 4.0.3

library( nortest )

## Warning: package 'nortest' was built under R version 4.0.3

library(PerformanceAnalytics)

## Warning: package 'PerformanceAnalytics' was built under R version 4.0.3

## Loading required package: xts

## Warning: package 'xts' was built under R version 4.0.3

## Loading required package: zoo

## Warning: package 'zoo' was built under R version 4.0.3

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## 
## Attaching package: 'xts'

## The following objects are masked from 'package:pastecs':
## 
##     first, last

## 
## Attaching package: 'PerformanceAnalytics'

## The following object is masked from 'package:graphics':
## 
##     legend

library(aTSA)

## Warning: package 'aTSA' was built under R version 4.0.3

## 
## Attaching package: 'aTSA'

## The following object is masked from 'package:pastecs':
## 
##     trend.test

## The following objects are masked from 'package:tseries':
## 
##     adf.test, kpss.test, pp.test

## The following object is masked from 'package:forecast':
## 
##     forecast

## The following object is masked from 'package:graphics':
## 
##     identify

library(FitAR)

## Warning: package 'FitAR' was built under R version 4.0.3

## Loading required package: lattice

## Loading required package: leaps

## Warning: package 'leaps' was built under R version 4.0.3

## Loading required package: ltsa

## Warning: package 'ltsa' was built under R version 4.0.3

## Loading required package: bestglm

## Warning: package 'bestglm' was built under R version 4.0.3

## 
## Attaching package: 'FitAR'

## The following object is masked from 'package:forecast':
## 
##     BoxCox

library("fUnitRoots")

## Warning: package 'fUnitRoots' was built under R version 4.0.3

## Loading required package: timeDate

## Warning: package 'timeDate' was built under R version 4.0.3

## 
## Attaching package: 'timeDate'

## The following objects are masked from 'package:PerformanceAnalytics':
## 
##     kurtosis, skewness

## Loading required package: timeSeries

## Warning: package 'timeSeries' was built under R version 4.0.3

## 
## Attaching package: 'timeSeries'

## The following object is masked from 'package:zoo':
## 
##     time<-

## Loading required package: fBasics

## Warning: package 'fBasics' was built under R version 4.0.3

## 
## Attaching package: 'fBasics'

## The following object is masked from 'package:TTR':
## 
##     volatility

library(fracdiff)

## Warning: package 'fracdiff' was built under R version 4.0.3

library(nnfor)

## Warning: package 'nnfor' was built under R version 4.0.3

library(fpp2)

## Warning: package 'fpp2' was built under R version 4.0.3

## -- Attaching packages ---------------------------------------------- fpp2 2.4 --

## v ggplot2   3.3.2     v expsmooth 2.3  
## v fma       2.4

## Warning: package 'ggplot2' was built under R version 4.0.3

## Warning: package 'fma' was built under R version 4.0.3

## Warning: package 'expsmooth' was built under R version 4.0.3

## -- Conflicts ------------------------------------------------- fpp2_conflicts --
## x FitAR::BoxCox()  masks forecast::BoxCox()
## x aTSA::forecast() masks forecast::forecast()

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.0.3

## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --

## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## v purrr   0.3.4

## Warning: package 'tibble' was built under R version 4.0.3

## Warning: package 'tidyr' was built under R version 4.0.3

## Warning: package 'readr' was built under R version 4.0.3

## Warning: package 'purrr' was built under R version 4.0.3

## Warning: package 'dplyr' was built under R version 4.0.3

## Warning: package 'stringr' was built under R version 4.0.3

## Warning: package 'forcats' was built under R version 4.0.3

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::extract() masks pastecs::extract()
## x dplyr::filter()  masks timeSeries::filter(), stats::filter()
## x dplyr::first()   masks xts::first(), pastecs::first()
## x dplyr::lag()     masks timeSeries::lag(), stats::lag()
## x dplyr::last()    masks xts::last(), pastecs::last()

library(magrittr)

## Warning: package 'magrittr' was built under R version 4.0.3

## 
## Attaching package: 'magrittr'

## The following object is masked from 'package:purrr':
## 
##     set_names

## The following object is masked from 'package:tidyr':
## 
##     extract

## The following object is masked from 'package:pastecs':
## 
##     extract

library(aTSA)
library(TSstudio)

## Warning: package 'TSstudio' was built under R version 4.0.3

library(rtweet)

## Warning: package 'rtweet' was built under R version 4.0.3

## 
## Attaching package: 'rtweet'

## The following object is masked from 'package:TSstudio':
## 
##     ts_plot

## The following object is masked from 'package:purrr':
## 
##     flatten

Can <- read.delim("C:/Users/compu lab/Desktop/Can.txt")
Can=Can[-c(121:130),]#for dropping the un-needed na. raws.
head(Can)

tail(Can)

Can[-1]

Can =Can[-1]
class(Can)

## [1] "data.frame"

tsCan = ts(Can, start = c(2009,1), frequency = 12)
tsCan

##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 2009  89  76  80  81  86  75  70  81  76  84  73  74
## 2010  86  74  78  79  84  73  69  79  76  83  74  75
## 2011  98  90  95 100 118  98  90  69  87  73  72  47
## 2012  88  73  82  93  99 102 105 112 115 118 121 123
## 2013  96 100 103 107 110 120 123 125 128 130 132 140
## 2014 126 137 105 135 110 116 112 114 115 162 142 128
## 2015 160 130 138 142 120 122 128 133 140 139 134 142
## 2016 115 120 123 145 149 153 157 165 165 160 140 110
## 2017 144 134 135 148 147 144 143 147 151 159 149 143
## 2018 144 135 136 148 147 145 144 148 152 160 149 142

class(tsCan)

## [1] "ts"

typeof(tsCan)

## [1] "integer"

str(tsCan)

##  Time-Series [1:120, 1] from 2009 to 2019: 89 76 80 81 86 75 70 81 76 84 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr "number"

tsData=tsCan
head(tsData,10)

##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct
## 2009  89  76  80  81  86  75  70  81  76  84

str(tsData)

##  Time-Series [1:120, 1] from 2009 to 2019: 89 76 80 81 86 75 70 81 76 84 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr "number"

typeof(tsData)

## [1] "integer"

tsData=ts(tsData)
str(tsData)

##  Time-Series [1:120, 1] from 1 to 120: 89 76 80 81 86 75 70 81 76 84 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr "number"

#Nice Visualization 
autoplot(tsData) + 
  geom_line(color = "cyan") + 
  geom_point(color = "cyan") + 
  labs(x = NULL, y = NULL, 
       title = "Monthly Cancer-Numbers 2009-2019", 
       subtitle = "Data Source:Ministry Of Health Gaza data.") + 
  theme(
    axis.line = element_blank(),  
    axis.text.x = element_text(color = "white", lineheight = 0.9),  
    axis.text.y = element_text(color = "white", lineheight = 0.9),  
    axis.ticks = element_line(color = "white", size  =  0.2),  
    axis.title.x = element_text(color = "white", margin = margin(0, 10, 0, 0)),  
    axis.title.y = element_text(color = "white", angle = 90, margin = margin(0, 10, 0, 0)),  
    axis.ticks.length = unit(0.3, "lines"),   
    legend.background = element_rect(color = NA, fill = " gray10"),  
    legend.key = element_rect(color = "white",  fill = " gray10"),  
    legend.key.size = unit(1.2, "lines"),  
    legend.key.height = NULL,  
    legend.key.width = NULL,      
    legend.text = element_text(color = "white"),  
    legend.title = element_text(face = "bold", hjust = 0, color = "white"),  
    legend.text.align = NULL,  
    legend.title.align = NULL,  
    legend.direction = "vertical",  
    legend.box = NULL, 
    panel.background = element_rect(fill = "gray10", color  =  NA),  
    panel.border = element_blank(),
    panel.grid.major = element_line(color = "grey35"),  
    panel.grid.minor = element_line(color = "grey20"),  
    panel.spacing = unit(0.5, "lines"),   
    strip.background = element_rect(fill = "grey30", color = "grey10"),  
    strip.text.x = element_text(color = "white"),  
    strip.text.y = element_text(color = "white", angle = -90),  
    plot.background = element_rect(color = "gray10", fill = "gray10"),  
    plot.title = element_text(color = "white", hjust = 0, lineheight = 1.25,
                              margin = margin(2, 2, 2, 2)),  
    plot.subtitle = element_text(color = "white", hjust = 0, margin = margin(2, 2, 2, 2)),  
    plot.caption = element_text(color = "white", hjust = 0),  
    plot.margin = unit(rep(1, 4), "lines"))

plot(tsData)

#from the main 
#To get the different in out -put:

  tsData = ts(tsData, start = c(2009,1), frequency = 12)
ts_info(tsCan)

##  The tsCan series is a ts object with 1 variable and 120 observations
##  Frequency: 12 
##  Start time: 2009 1 
##  End time: 2018 12

ts_info(tsData)

##  The tsData series is a ts object with 1 variable and 120 observations
##  Frequency: 12 
##  Start time: 2009 1 
##  End time: 2018 12

#convert to time series
plot(tsData)

ts_info(tsData)

##  The tsData series is a ts object with 1 variable and 120 observations
##  Frequency: 12 
##  Start time: 2009 1 
##  End time: 2018 12

 ts_info(tsCan)

##  The tsCan series is a ts object with 1 variable and 120 observations
##  Frequency: 12 
##  Start time: 2009 1 
##  End time: 2018 12

ts_decompose(tsData)

## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

ts_seasonal(tsData, type = "all")

ts_seasonal(tsData - decompose(tsData)$trend, 
            type = "all", 
            title = "Seasonal Plot - tsData (Detrend)")

## Warning: Ignoring 1 observations

## Warning: Ignoring 1 observations

## Warning: Ignoring 1 observations

## Warning: Ignoring 1 observations

## Warning: Ignoring 1 observations

## Warning: Ignoring 1 observations

## Warning: Ignoring 1 observations

## Warning: Ignoring 1 observations

## Warning: Ignoring 1 observations

## Warning: Ignoring 1 observations

## Warning: Ignoring 1 observations

## Warning: Ignoring 1 observations

#Exploratory analysis
#=======================
#  The goal of the exploratory analysis is to identify the key characteristics 
 # of the series with the use of descriptive analysis methods. Throughout this
#process we are mainly want to detect:
#  Seasonal pattern - single/multiple seasonal pattern (if exists)
#The trend type - linear/exponential
#1-Structural breaks and outliers
#2-Any other pattern of the series
#3-Those insights provide us with a better understanding of the past
#and can be utilized to forecast the future.
#Decomposing the series
#========================
#  We will start with decomposing the series into its three components - 
#  the trend, seasonal and random components.
#The  ts_decompose function provides an interactive inference for the 
#decompose function form the stats package:
  require(graphics)

#Multiplicative:
#================
decompose_multi<-decompose(tsCan,type='m')
plot(decompose_multi)

#Additive Decomposition:#=======================
x <- ts(tsCan, start = c(2009, 1), end = c(2019, 12), frequency = 12)
tsCan1 <- decompose(x)
ts_decompose(x)

plot(tsCan1)

#Seasonal analysis:
#===================
#  You can note from both the series and decompose plots that the series has 
#a strong seasonal pattern along with a non-linear trend. We will use the 
#ts_seasonal, ts_heatmap and ts_surface functions to explore the seasonal 
#pattern of the series:
 ts_seasonal(tsCan, type = "all")

ts_seasonal(tsCan - decompose(tsCan)$trend, 
            type = "all", 
            title = "Seasonal Plot - tsCan (Detrend)")

## Warning: Ignoring 1 observations

## Warning: Ignoring 1 observations

## Warning: Ignoring 1 observations

## Warning: Ignoring 1 observations

## Warning: Ignoring 1 observations

## Warning: Ignoring 1 observations

## Warning: Ignoring 1 observations

## Warning: Ignoring 1 observations

## Warning: Ignoring 1 observations

## Warning: Ignoring 1 observations

## Warning: Ignoring 1 observations

## Warning: Ignoring 1 observations

#Correlations:
#==============
  ts_heatmap(tsCan)

ts_surface(tsCan)

ts_cor(tsCan, lag.max = 36)

#As expected you can notice that the series has a high correlation with its seasonal lags.
#A more intuitive way to review identify the relationship between the series and its lags 
#is with the ts_lags function, which provides plots of the series against its lags:

  ts_lags(tsCan)

#From the 1st lag you can get that the timeseries is stationary.
#---------------------------------------------------------

#  Exporatory analysis summary
#============================
  #Here are the key insights we learned from the exploratory analysis process:
   # The series has a strong seasonal pattern, no indication for multi-seasonality
#The series trend has a structural break around 2010 and a non-linear growth
#The series has a strong correlation with its seasonal lags
#Forecasting the series
#=======================
library( forecastHybrid)

## Warning: package 'forecastHybrid' was built under R version 4.0.3

## Loading required package: thief

## Warning: package 'thief' was built under R version 4.0.3

library(lubridate)

## Warning: package 'lubridate' was built under R version 4.0.3

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

tsCan_df <- ts_to_prophet(tsCan)
head(tsCan_df)

tsCan_df$flag <- ifelse(year(tsCan_df$ds) >= 2009, 1, 0)
#-------------

#  Traditional Approach
#====================
  # Set the sample out and forecast horizon
  h1 <- 12 # the length of the testing partition
h2 <- 60 # forecast horizon

# Splitting the time series object to training and testing partitions
tsCan_split <- ts_split(tsCan, sample.out = h1)
train <- tsCan_split$train
test <- tsCan_split$test

ts_info(train)

##  The train series is a ts object with 1 variable and 108 observations
##  Frequency: 12 
##  Start time: 2009 1 
##  End time: 2017 12

ts_info(test)

##  The test series is a ts object with 1 variable and 12 observations
##  Frequency: 12 
##  Start time: 2018 1 
##  End time: 2018 12

# Splitting the data.frame object to training and testing partitions
train_df <- tsCan_df[1:(nrow(tsCan_df) - h1), ]
test_df <- tsCan_df[(nrow(tsCan_df) - h1 + 1):nrow(tsCan_df), ]

#Model 1 - ARIMA
#================
set.seed(1234)
library(forecast)
library(plotly)

## Warning: package 'plotly' was built under R version 4.0.3

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:timeSeries':
## 
##     filter

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

# auto.arima
md1 <- auto.arima(train, 
                  stepwise = FALSE, 
                  approximation = FALSE,
                  D = 1)
fc1 <- forecast::forecast(md1, h = h1)
 accuracy(fc1, test)

##                       ME     RMSE       MAE       MPE     MAPE      MASE
## Training set  -0.1750222 11.82971  8.224938 -1.248665 7.412145 0.5051786
## Test set     -13.3563357 13.68902 13.356336 -9.181074 9.181074 0.8203508
##                    ACF1 Theil's U
## Training set 0.02278905        NA
## Test set     0.23810980  2.065053

# ------------------

#   Traditional Approach:
#   =====================
# We will use the following three forecasting approaches:
   h <- 12
 #  ARIMA – autoregressive moving average model using the auto.arima function
 #ETS (Error, Trend and Seasonal) model - or exponential smoothing state space model with the ets function
# TSLM (Time Series Linear Model) – forecasting with linear regression model using the tslm function
 #When running a “horse racing” between forecasting models, it is recommended diversify your modeling approach. The performance of the models may change according to the data structure and by the tuning parameters.
 
# We will start by spliting the series to training and testing partitions:
 split_ts <- ts_split(tsCan, sample.out  = h)  
 train <- split_ts$train
 test <- split_ts$test

 # Create forecast object
 fc <- forecast::forecast(auto.arima(train, lambda = BoxCox.lambda(train)), h = h) # Plot the fitted and forecasted vs the actual values
# ======================================================

   test_forecast(actual = tsCan, forecast.obj = fc, test = test)

 #It seems like the ARIMA model capture well the change in the 
 #trend but fail to capture the seasonal peaks. In addition, the
 #error rate on the testing set is more than twice than the one 
 #on the training set. This may indication for overfitting.
# Model 2 - ETS(ANN)
# ==================
   # ETS
   md2 <- ets(train, opt.crit = "mse")
 fc2 <- forecast::forecast(md2, h = h1)
 accuracy(fc2, test)

##                      ME      RMSE      MAE        MPE     MAPE      MASE
## Training set  1.0333618 12.478354 9.167721 -0.1348013 8.749278 0.5630846
## Test set     -0.8526215  6.483249 4.833333 -0.7789156 3.338880 0.2968650
##                     ACF1 Theil's U
## Training set -0.03252918        NA
## Test set      0.47943286  1.009345

   library(forecast)
 #data(tsCan)
 
 # Set the horizon of the forecast
 h <- 12
 
 # split to training/testing partition
 split_ts <- ts_split(tsCan, sample.out  = h)
 train <- split_ts$train
 test <- split_ts$test
 
 # Create forecast object
 fc <- forecast::forecast(auto.arima(train, lambda = BoxCox.lambda(train)), h = h)

 # Plot the fitted and forecasted vs the actual values
 test_forecast(actual = tsCan, forecast.obj = fc, test = test)

 #Model 3 - TSLM:
   #===============
   # Time series linear regression
   md3 <- tslm(train ~ season + trend)
   fc3 <- forecast::forecast(md3, h = h1)
   test_forecast(actual = tsCan, forecast.obj = fc3, test = test)

   accuracy(fc3, test)

##                         ME     RMSE      MAE        MPE      MAPE      MASE
## Training set -1.447643e-15 12.97400 10.18544  -1.691944  9.974131 0.6255934
## Test set     -1.487731e+01 14.94414 14.87731 -10.262473 10.262473 0.9137698
##                   ACF1 Theil's U
## Training set 0.5278316        NA
## Test set     0.5249287  2.229779

   #Not surprisingly, the linear approach seems vary linear and therefore fail to capture the peaks 
  # and the trend structure. Far behind from the ARIMA and the ETS, the TSLM
 #score an error rate of 9.2%
 #on the testing set.
 #We can try to impose the structural break by adding the flag variable
 #we prepared before:

 md3a <- tslm(train ~ season + trend)
   fc3a <- forecast::forecast(md3a, h = h1)
   test_forecast(actual = tsCan, forecast.obj = fc3a, test = test)

 accuracy(fc3a, test)

##                         ME     RMSE      MAE        MPE      MAPE      MASE
## Training set -1.447643e-15 12.97400 10.18544  -1.691944  9.974131 0.6255934
## Test set     -1.487731e+01 14.94414 14.87731 -10.262473 10.262473 0.9137698
##                   ACF1 Theil's U
## Training set 0.5278316        NA
## Test set     0.5249287  2.229779

 #  Backtesting approach:
  # =====================
  # We will use the ts_backtesting to train and test multiple models
 #(auto.arima, ets,  HoltWinters, nnetar, tbats, from the forecast package, #hybridModel from the forecastHybrid package
 # and bsts from the bsts package) over six periods of time (periods = 6). Likewise as we did in the
# traditional approach above, we will set the testing partition to 12 months (h #= h1) and the forecast
 #horizon to 60 months (h = h2)
# ---
  test_forecast(forecast.obj = fc3a, actual = tsCan, test = test)

  md_final <- tslm(tsCan ~ season + trend + I(trend ^ 2))
  fc_final <- forecast::forecast(md_final, h = h2)
  plot_forecast(fc_final)

  # Defining the models and their arguments
  methods <- list(ets1 = list(method = "ets",
                              method_arg = list(opt.crit = "lik"),
                              notes = "ETS model with opt.crit = lik"),
                  ets2 = list(method = "ets",
                              method_arg = list(opt.crit = "amse"),
                              notes = "ETS model with opt.crit = amse"),
                  arima1 = list(method = "arima",
                                method_arg = list(order = c(2,1,0)),
                                notes = "ARIMA(2,1,0)"),
                  arima2 = list(method = "arima",
                                method_arg = list(order = c(2,1,2),
                                                  seasonal = list(order = c(1,1,1))),
                                notes = "SARIMA(2,1,2)(1,1,1)"),
                  hw = list(method = "HoltWinters",
                            method_arg = NULL,
                            notes = "HoltWinters Model"),
                  tslm = list(method = "tslm",
                              method_arg = list(formula = input ~ trend + season),
                              notes = "tslm model with trend and seasonal components"))
  # Training the models with backtesting
  md <- train_model(input = USgas,
                    methods = methods,
                    train_method = list(partitions = 4, 
                                        sample.out = 12, 
                                        space = 3),
                    horizon = 12,
                    error = "MAPE")

## # A tibble: 6 x 7
##   model_id model  notes      avg_mape avg_rmse `avg_coverage_8~ `avg_coverage_9~
##   <chr>    <chr>  <chr>         <dbl>    <dbl>            <dbl>            <dbl>
## 1 hw       HoltW~ HoltWinte~   0.0355     111.            0.938            1    
## 2 ets1     ets    ETS model~   0.0468     149.            0.833            0.958
## 3 arima2   arima  SARIMA(2,~   0.0475     145.            0.646            0.917
## 4 ets2     ets    ETS model~   0.0555     165.            0.625            0.896
## 5 tslm     tslm   tslm mode~   0.0892     253.            0.25             0.625
## 6 arima1   arima  ARIMA(2,1~   0.175      554.            0.854            0.979

  # View the model performance on the backtesting partitions
  md$leaderboard

GAZA-CANCER (ARIMA ANN)2009-2218

Dr.Eldirdiri Fadol for M.O.H. Research

10/30/2020