#INTRODUCTION TO TIME SERIES MODELING
library(ggplot2)
library(forecast)
library(tseries)
###simulating time series data
set.seed(123)
data<-arima.sim(model = list(order=c(1,1,1),ar=0.5,ma=0.3),n=100)
head(data)
Time Series:
Start = 1
End = 6
Frequency = 1
[1] 0.0000000 0.9692641 1.9626118 2.6901997 2.5313574 4.0720970
#Convert data to time series
ts_data<-ts(data,start=c(2000,1),frequency=12)
head(ts_data)
Jan Feb Mar Apr May Jun
2000 0.0000000 0.9692641 1.9626118 2.6901997 2.5313574 4.0720970
#plotting the time series data
plot(ts_data,main="time series data",col="green",lwd=2)
#check stationarity using Augmented Dickey Fuller test(ADF)
adf.test(ts_data) #if p-value is less than 0.05, the series is stationary
Augmented Dickey-Fuller Test
data: ts_data
Dickey-Fuller = -3.1142, Lag order = 4, p-value = 0.1148
alternative hypothesis: stationary
##having the difference to make the data stationary
diff_data<-diff(ts_data)
plot(diff_data,main="differenced time series",col="red")
print(diff_data)
Jan Feb Mar Apr May Jun
2000 0.96926410 0.99334765 0.72758798 -0.15884233 1.54073963
2001 -1.71740797 -1.89539655 -1.79140491 -2.76990755 -1.05317472 -0.12187813
2002 1.89977435 1.88500175 1.70301061 0.95576889 0.15334827 -0.39558567
2003 -0.54357285 -0.85930723 0.21031490 0.25577792 0.35619675 0.22554718
2004 0.32174777 0.41397173 0.65140782 -0.06272770 -0.51526827 -1.37617173
2005 1.54622113 -1.68336766 -0.52869597 -0.67182719 -1.23668244 0.20082756
2006 -0.13155684 0.46740012 0.20652646 0.36889923 1.38082322 1.45464480
2007 1.22554648 0.42070939 2.21760981 3.29761543 1.87289054 -0.16068574
2008 -1.30673447 -2.55678051 -2.15899936 -0.27457103 -0.43693349
Jul Aug Sep Oct Nov Dec
2000 1.80429423 -0.91511490 -0.34618669 -0.43547798 -1.42740012 -1.25202209
2001 -1.15306407 0.33584181 0.97052960 0.31813258 0.96567051 1.62950644
2002 -1.00664111 -0.91964993 -1.78759650 0.89553881 2.30641819 0.39248911
2003 0.06133910 1.38641070 0.87801505 1.88774683 -0.14993821 0.04501881
2004 -2.06544971 -1.05073358 0.01390158 0.19441795 1.03537771 2.84445378
2005 0.12331219 -1.24449352 -0.80715859 -0.48807962 -0.27994303 0.24703814
2006 0.53194526 1.31700077 1.99664653 1.84477138 1.32563651 0.10653170
2007 -1.09867570 -0.50557611 -0.42241482 -0.63275757 -1.37226013 -1.01664336
2008
adf.test(diff_data)
Augmented Dickey-Fuller Test
data: diff_data
Dickey-Fuller = -3.3007, Lag order = 4, p-value = 0.07501
alternative hypothesis: stationary
#plot of ACF and PACF
par(mfrow=c(1,2))
acf(ts_data,main="ACF plot") #identify MA process
pacf(ts_data,main="PACF plot") #identify AR process
par(mfrow=c(1,1)) #reset plotting layout
#Fitting ARIMA
model<-auto.arima(ts_data)
summary(model)
Series: ts_data
ARIMA(2,1,1)(0,0,1)[12]
Coefficients:
ar1 ar2 ma1 sma1
-0.0416 0.3446 0.8588 -0.1651
s.e. 0.1416 0.1300 0.0922 0.1034
sigma^2 = 0.7694: log likelihood = -127.39
AIC=264.78 AICc=265.42 BIC=277.81
Training set error measures:
ME RMSE MAE MPE MAPE MASE
Training set 0.04247906 0.8551607 0.6833282 13.63793 39.61551 0.1178191
ACF1
Training set -0.0136965
##manual estimation of ARIMA
model_manual<-arima(ts_data,order=c(2,1,2))
summary(model_manual)
Call:
arima(x = ts_data, order = c(2, 1, 2))
Coefficients:
ar1 ar2 ma1 ma2
0.0908 0.3599 0.7423 -0.1017
s.e. 0.3644 0.1551 0.3731 0.2553
sigma^2 estimated as 0.7573: log likelihood = -128.5, aic = 267
Training set error measures:
ME RMSE MAE MPE MAPE MASE
Training set 0.02246521 0.8658951 0.6886005 13.85039 39.86403 0.7014706
ACF1
Training set 0.003210435
#model diagnostic
tsdiag(model) #diagnostic plots
checkresiduals(model)
Ljung-Box test
data: Residuals from ARIMA(2,1,1)(0,0,1)[12]
Q* = 12.739, df = 16, p-value = 0.6917
Model df: 4. Total lags used: 20
#Forecasting of the model
forecast_values<-forecast(model,h=12)
head(forecast_values)
$method
[1] "ARIMA(2,1,1)(0,0,1)[12]"
$model
Series: ts_data
ARIMA(2,1,1)(0,0,1)[12]
Coefficients:
ar1 ar2 ma1 sma1
-0.0416 0.3446 0.8588 -0.1651
s.e. 0.1416 0.1300 0.0922 0.1034
sigma^2 = 0.7694: log likelihood = -127.39
AIC=264.78 AICc=265.42 BIC=277.81
$level
[1] 80 95
$mean
Jan Feb Mar Apr May Jun Jul Aug
2008 8.315199 8.461113 8.423634
2009 8.987372 9.393355 9.689015 9.642017 9.658030
Sep Oct Nov Dec
2008 8.438735 8.463423 8.654295 8.805193
2009
$lower
80% 95%
Jun 2008 7.1910891 6.5960211
Jul 2008 6.1295257 4.8952578
Aug 2008 5.0833970 3.3151823
Sep 2008 4.1475806 1.8759805
Oct 2008 3.3386904 0.6258211
Nov 2008 2.7653501 -0.3520691
Dec 2008 2.2257621 -1.2571781
Jan 2009 1.7698540 -2.0508690
Feb 2009 1.5859473 -2.5470445
Mar 2009 1.3293565 -3.0959797
Apr 2009 0.7633876 -3.9366747
May 2009 0.2879706 -4.6722395
$upper
80% 95%
Jun 2008 9.43931 10.03438
Jul 2008 10.79270 12.02697
Aug 2008 11.76387 13.53209
Sep 2008 12.72989 15.00149
Oct 2008 13.58815 16.30102
Nov 2008 14.54324 17.66066
Dec 2008 15.38462 18.86756
Jan 2009 16.20489 20.02561
Feb 2009 17.20076 21.33375
Mar 2009 18.04867 22.47401
Apr 2009 18.52065 23.22071
May 2009 19.02809 23.98830
plot(forecast_values,main="ARIMA forecast",col="blue")