#INTRODUCTION TO TIME SERIES MODELING

Loading the required libraries

library(ggplot2)
library(forecast)
library(tseries)

###simulating time series data

set.seed(123)
data<-arima.sim(model = list(order=c(1,1,1),ar=0.5,ma=0.3),n=100)
head(data)
Time Series:
Start = 1 
End = 6 
Frequency = 1 
[1] 0.0000000 0.9692641 1.9626118 2.6901997 2.5313574 4.0720970

#Convert data to time series

ts_data<-ts(data,start=c(2000,1),frequency=12)
head(ts_data)
           Jan       Feb       Mar       Apr       May       Jun
2000 0.0000000 0.9692641 1.9626118 2.6901997 2.5313574 4.0720970

#plotting the time series data

plot(ts_data,main="time series data",col="green",lwd=2)

#check stationarity using Augmented Dickey Fuller test(ADF)

adf.test(ts_data) #if p-value is less than 0.05, the series is stationary

    Augmented Dickey-Fuller Test

data:  ts_data
Dickey-Fuller = -3.1142, Lag order = 4, p-value = 0.1148
alternative hypothesis: stationary

The results above is not stationary, we opt for transformation using difference

##having the difference to make the data stationary

diff_data<-diff(ts_data)
plot(diff_data,main="differenced time series",col="red")

print(diff_data)
             Jan         Feb         Mar         Apr         May         Jun
2000              0.96926410  0.99334765  0.72758798 -0.15884233  1.54073963
2001 -1.71740797 -1.89539655 -1.79140491 -2.76990755 -1.05317472 -0.12187813
2002  1.89977435  1.88500175  1.70301061  0.95576889  0.15334827 -0.39558567
2003 -0.54357285 -0.85930723  0.21031490  0.25577792  0.35619675  0.22554718
2004  0.32174777  0.41397173  0.65140782 -0.06272770 -0.51526827 -1.37617173
2005  1.54622113 -1.68336766 -0.52869597 -0.67182719 -1.23668244  0.20082756
2006 -0.13155684  0.46740012  0.20652646  0.36889923  1.38082322  1.45464480
2007  1.22554648  0.42070939  2.21760981  3.29761543  1.87289054 -0.16068574
2008 -1.30673447 -2.55678051 -2.15899936 -0.27457103 -0.43693349            
             Jul         Aug         Sep         Oct         Nov         Dec
2000  1.80429423 -0.91511490 -0.34618669 -0.43547798 -1.42740012 -1.25202209
2001 -1.15306407  0.33584181  0.97052960  0.31813258  0.96567051  1.62950644
2002 -1.00664111 -0.91964993 -1.78759650  0.89553881  2.30641819  0.39248911
2003  0.06133910  1.38641070  0.87801505  1.88774683 -0.14993821  0.04501881
2004 -2.06544971 -1.05073358  0.01390158  0.19441795  1.03537771  2.84445378
2005  0.12331219 -1.24449352 -0.80715859 -0.48807962 -0.27994303  0.24703814
2006  0.53194526  1.31700077  1.99664653  1.84477138  1.32563651  0.10653170
2007 -1.09867570 -0.50557611 -0.42241482 -0.63275757 -1.37226013 -1.01664336
2008                                                                        
adf.test(diff_data)

    Augmented Dickey-Fuller Test

data:  diff_data
Dickey-Fuller = -3.3007, Lag order = 4, p-value = 0.07501
alternative hypothesis: stationary

#plot of ACF and PACF

par(mfrow=c(1,2))
acf(ts_data,main="ACF plot") #identify MA process
pacf(ts_data,main="PACF plot") #identify AR process

par(mfrow=c(1,1)) #reset plotting layout

#Fitting ARIMA

model<-auto.arima(ts_data)
summary(model)
Series: ts_data 
ARIMA(2,1,1)(0,0,1)[12] 

Coefficients:
          ar1     ar2     ma1     sma1
      -0.0416  0.3446  0.8588  -0.1651
s.e.   0.1416  0.1300  0.0922   0.1034

sigma^2 = 0.7694:  log likelihood = -127.39
AIC=264.78   AICc=265.42   BIC=277.81

Training set error measures:
                     ME      RMSE       MAE      MPE     MAPE      MASE
Training set 0.04247906 0.8551607 0.6833282 13.63793 39.61551 0.1178191
                   ACF1
Training set -0.0136965

We can also estimate the ARIMA manually as shown in the steps below

##manual estimation of ARIMA

model_manual<-arima(ts_data,order=c(2,1,2))
summary(model_manual)

Call:
arima(x = ts_data, order = c(2, 1, 2))

Coefficients:
         ar1     ar2     ma1      ma2
      0.0908  0.3599  0.7423  -0.1017
s.e.  0.3644  0.1551  0.3731   0.2553

sigma^2 estimated as 0.7573:  log likelihood = -128.5,  aic = 267

Training set error measures:
                     ME      RMSE       MAE      MPE     MAPE      MASE
Training set 0.02246521 0.8658951 0.6886005 13.85039 39.86403 0.7014706
                    ACF1
Training set 0.003210435

#model diagnostic

tsdiag(model) #diagnostic plots

checkresiduals(model)


    Ljung-Box test

data:  Residuals from ARIMA(2,1,1)(0,0,1)[12]
Q* = 12.739, df = 16, p-value = 0.6917

Model df: 4.   Total lags used: 20

#Forecasting of the model

forecast_values<-forecast(model,h=12)
head(forecast_values)
$method
[1] "ARIMA(2,1,1)(0,0,1)[12]"

$model
Series: ts_data 
ARIMA(2,1,1)(0,0,1)[12] 

Coefficients:
          ar1     ar2     ma1     sma1
      -0.0416  0.3446  0.8588  -0.1651
s.e.   0.1416  0.1300  0.0922   0.1034

sigma^2 = 0.7694:  log likelihood = -127.39
AIC=264.78   AICc=265.42   BIC=277.81

$level
[1] 80 95

$mean
          Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
2008                                              8.315199 8.461113 8.423634
2009 8.987372 9.393355 9.689015 9.642017 9.658030                           
          Sep      Oct      Nov      Dec
2008 8.438735 8.463423 8.654295 8.805193
2009                                    

$lower
               80%        95%
Jun 2008 7.1910891  6.5960211
Jul 2008 6.1295257  4.8952578
Aug 2008 5.0833970  3.3151823
Sep 2008 4.1475806  1.8759805
Oct 2008 3.3386904  0.6258211
Nov 2008 2.7653501 -0.3520691
Dec 2008 2.2257621 -1.2571781
Jan 2009 1.7698540 -2.0508690
Feb 2009 1.5859473 -2.5470445
Mar 2009 1.3293565 -3.0959797
Apr 2009 0.7633876 -3.9366747
May 2009 0.2879706 -4.6722395

$upper
              80%      95%
Jun 2008  9.43931 10.03438
Jul 2008 10.79270 12.02697
Aug 2008 11.76387 13.53209
Sep 2008 12.72989 15.00149
Oct 2008 13.58815 16.30102
Nov 2008 14.54324 17.66066
Dec 2008 15.38462 18.86756
Jan 2009 16.20489 20.02561
Feb 2009 17.20076 21.33375
Mar 2009 18.04867 22.47401
Apr 2009 18.52065 23.22071
May 2009 19.02809 23.98830
plot(forecast_values,main="ARIMA forecast",col="blue")