library(datasets)
df<-AirPassengers
?AirPassengers
## starting httpd help server ... done
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## -- Attaching packages ---------------------------------------------- fpp2 2.4 --
## v ggplot2 3.3.3 v fma 2.4
## v forecast 8.13 v expsmooth 2.3
Y<-ts(df,frequency = 12,start = c(1949,1),end = c(1960,12))
#Time plot of passengers by years
{autoplot(Y)+
ggtitle("TIME SERIES: Air Line Passengers by years")+
ylab("Number of Passengers:In Thousands")+
geom_smooth()
}
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#the data has a strong trend and is positive with the number of passengers increasing with years
#the data also seems to suggest seasonality since there are peaks and dips in the trend
#checking just the trend
plot(aggregate(Y,FUN = mean),
ylab="Number of Passengers:In Thoudans",
xlab="Years",
main="Trend of passengers by Years")
abline(reg = lm(df~time(Y)),col="blue")#mean of the time series analysis

#take the first difference of the data to remove the trend
#we have to try and investigate seasonality
frequency(Y)
## [1] 12
#this will print the time cycle across years
cycle(Y)
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1949 1 2 3 4 5 6 7 8 9 10 11 12
## 1950 1 2 3 4 5 6 7 8 9 10 11 12
## 1951 1 2 3 4 5 6 7 8 9 10 11 12
## 1952 1 2 3 4 5 6 7 8 9 10 11 12
## 1953 1 2 3 4 5 6 7 8 9 10 11 12
## 1954 1 2 3 4 5 6 7 8 9 10 11 12
## 1955 1 2 3 4 5 6 7 8 9 10 11 12
## 1956 1 2 3 4 5 6 7 8 9 10 11 12
## 1957 1 2 3 4 5 6 7 8 9 10 11 12
## 1958 1 2 3 4 5 6 7 8 9 10 11 12
## 1959 1 2 3 4 5 6 7 8 9 10 11 12
## 1960 1 2 3 4 5 6 7 8 9 10 11 12
#box plot across months will give us a sense on seasonal effect
{boxplot(df~cycle(Y),col=rainbow(12),
main="Box Plot of Passengers By Months",
xlab="Months:1=JAN,...etc.",
ylab = "Number of Passengers:In Thousands")
}

#the data suggest that we have more passengers on average during July and August,with
#June being the 3rd highest in terms of average passengers
#and the lowest in Feb and Nov
#AR I MA
#p d q
DY<<-diff(Y)
#time plot of difference data
autoplot(DY)+
ggtitle("Time Plot:Passengers by years")+
ylab("Thousands")

{
ggseasonplot(DY)+
ggtitle("Seasonal Plot:Change in Passengers per day")
}

#we see a sharp drop from july to september every year from about
# 75 000 passengers in July To around -100 000 in september.
#but overal passengers pick up from september every year till Dec
#with 1958,59 and 1960 seeing the lowest drop in airline passenger
#another seasonal plot
# seasonal subseries plot
ggsubseriesplot(DY)

#it verifies that in july we have the highest number of passengers and by septmber
#we experience a shortage of passengers in the excess of 100 000
#Forecast with Various Methods
#use a benchmark method
#seasonal naive method as our benchmark
fit<-snaive(DY) #Residual sd: 12.3109 or the model misses on average by
print(summary(fit)) #12,3K Passengers
##
## Forecast method: Seasonal naive method
##
## Model Information:
## Call: snaive(y = DY)
##
## Residual sd: 12.3109
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.1832061 12.31086 9.419847 -Inf Inf 1 -0.3098146
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 1961 12 -3.777001 27.7770008 -12.128840 36.128840
## Feb 1961 -26 -41.777001 -10.2229992 -50.128840 -1.871160
## Mar 1961 28 12.222999 43.7770008 3.871160 52.128840
## Apr 1961 42 26.222999 57.7770008 17.871160 66.128840
## May 1961 11 -4.777001 26.7770008 -13.128840 35.128840
## Jun 1961 63 47.222999 78.7770008 38.871160 87.128840
## Jul 1961 87 71.222999 102.7770008 62.871160 111.128840
## Aug 1961 -16 -31.777001 -0.2229992 -40.128840 8.128840
## Sep 1961 -98 -113.777001 -82.2229992 -122.128840 -73.871160
## Oct 1961 -47 -62.777001 -31.2229992 -71.128840 -22.871160
## Nov 1961 -71 -86.777001 -55.2229992 -95.128840 -46.871160
## Dec 1961 42 26.222999 57.7770008 17.871160 66.128840
## Jan 1962 12 -10.312048 34.3120484 -22.123333 46.123333
## Feb 1962 -26 -48.312048 -3.6879516 -60.123333 8.123333
## Mar 1962 28 5.687952 50.3120484 -6.123333 62.123333
## Apr 1962 42 19.687952 64.3120484 7.876667 76.123333
## May 1962 11 -11.312048 33.3120484 -23.123333 45.123333
## Jun 1962 63 40.687952 85.3120484 28.876667 97.123333
## Jul 1962 87 64.687952 109.3120484 52.876667 121.123333
## Aug 1962 -16 -38.312048 6.3120484 -50.123333 18.123333
## Sep 1962 -98 -120.312048 -75.6879516 -132.123333 -63.876667
## Oct 1962 -47 -69.312048 -24.6879516 -81.123333 -12.876667
## Nov 1962 -71 -93.312048 -48.6879516 -105.123333 -36.876667
## Dec 1962 42 19.687952 64.3120484 7.876667 76.123333
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 1961 12 -3.777001 27.7770008 -12.128840 36.128840
## Feb 1961 -26 -41.777001 -10.2229992 -50.128840 -1.871160
## Mar 1961 28 12.222999 43.7770008 3.871160 52.128840
## Apr 1961 42 26.222999 57.7770008 17.871160 66.128840
## May 1961 11 -4.777001 26.7770008 -13.128840 35.128840
## Jun 1961 63 47.222999 78.7770008 38.871160 87.128840
## Jul 1961 87 71.222999 102.7770008 62.871160 111.128840
## Aug 1961 -16 -31.777001 -0.2229992 -40.128840 8.128840
## Sep 1961 -98 -113.777001 -82.2229992 -122.128840 -73.871160
## Oct 1961 -47 -62.777001 -31.2229992 -71.128840 -22.871160
## Nov 1961 -71 -86.777001 -55.2229992 -95.128840 -46.871160
## Dec 1961 42 26.222999 57.7770008 17.871160 66.128840
## Jan 1962 12 -10.312048 34.3120484 -22.123333 46.123333
## Feb 1962 -26 -48.312048 -3.6879516 -60.123333 8.123333
## Mar 1962 28 5.687952 50.3120484 -6.123333 62.123333
## Apr 1962 42 19.687952 64.3120484 7.876667 76.123333
## May 1962 11 -11.312048 33.3120484 -23.123333 45.123333
## Jun 1962 63 40.687952 85.3120484 28.876667 97.123333
## Jul 1962 87 64.687952 109.3120484 52.876667 121.123333
## Aug 1962 -16 -38.312048 6.3120484 -50.123333 18.123333
## Sep 1962 -98 -120.312048 -75.6879516 -132.123333 -63.876667
## Oct 1962 -47 -69.312048 -24.6879516 -81.123333 -12.876667
## Nov 1962 -71 -93.312048 -48.6879516 -105.123333 -36.876667
## Dec 1962 42 19.687952 64.3120484 7.876667 76.123333
checkresiduals(fit)

##
## Ljung-Box test
##
## data: Residuals from Seasonal naive method
## Q* = 51.362, df = 24, p-value = 0.0009468
##
## Model df: 0. Total lags used: 24
#2 Exponential Smoothing Models (ETS)
#fit ETS method
#ETS misses on average by 39.2 passengers
fit_ets<-ets(Y) #sigma: 0.0392 same measure as Sd, this is smaller so it fits better
print(summary(fit_ets))
## ETS(M,Ad,M)
##
## Call:
## ets(y = Y)
##
## Smoothing parameters:
## alpha = 0.7096
## beta = 0.0204
## gamma = 1e-04
## phi = 0.98
##
## Initial states:
## l = 120.9939
## b = 1.7705
## s = 0.8944 0.7993 0.9217 1.0592 1.2203 1.2318
## 1.1105 0.9786 0.9804 1.011 0.8869 0.9059
##
## sigma: 0.0392
##
## AIC AICc BIC
## 1395.166 1400.638 1448.623
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.567359 10.74726 7.791605 0.4357799 2.857917 0.2432573 0.03945056
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.567359 10.74726 7.791605 0.4357799 2.857917 0.2432573 0.03945056
checkresiduals(fit_ets)

##
## Ljung-Box test
##
## data: Residuals from ETS(M,Ad,M)
## Q* = 51.341, df = 7, p-value = 7.871e-09
##
## Model df: 17. Total lags used: 24
#Arima Model
#we could use the diff data to get rid of the trend and tell arima that
# theres seasonality going on
# or we could
fit_arima<-auto.arima(Y,d=1,D=1,stepwise = F,approximation = F,trace = T)
##
## ARIMA(0,1,0)(0,1,0)[12] : 1031.539
## ARIMA(0,1,0)(0,1,1)[12] : 1030.846
## ARIMA(0,1,0)(0,1,2)[12] : 1032.465
## ARIMA(0,1,0)(1,1,0)[12] : 1030.501
## ARIMA(0,1,0)(1,1,1)[12] : 1032.317
## ARIMA(0,1,0)(1,1,2)[12] : 1034.414
## ARIMA(0,1,0)(2,1,0)[12] : 1032.309
## ARIMA(0,1,0)(2,1,1)[12] : 1034.423
## ARIMA(0,1,0)(2,1,2)[12] : Inf
## ARIMA(0,1,1)(0,1,0)[12] : 1020.733
## ARIMA(0,1,1)(0,1,1)[12] : 1021.192
## ARIMA(0,1,1)(0,1,2)[12] : 1019.812
## ARIMA(0,1,1)(1,1,0)[12] : 1020.614
## ARIMA(0,1,1)(1,1,1)[12] : Inf
## ARIMA(0,1,1)(1,1,2)[12] : Inf
## ARIMA(0,1,1)(2,1,0)[12] : 1019.496
## ARIMA(0,1,1)(2,1,1)[12] : Inf
## ARIMA(0,1,1)(2,1,2)[12] : Inf
## ARIMA(0,1,2)(0,1,0)[12] : 1022.816
## ARIMA(0,1,2)(0,1,1)[12] : 1023.319
## ARIMA(0,1,2)(0,1,2)[12] : 1021.934
## ARIMA(0,1,2)(1,1,0)[12] : 1022.742
## ARIMA(0,1,2)(1,1,1)[12] : Inf
## ARIMA(0,1,2)(1,1,2)[12] : Inf
## ARIMA(0,1,2)(2,1,0)[12] : 1021.628
## ARIMA(0,1,2)(2,1,1)[12] : Inf
## ARIMA(0,1,3)(0,1,0)[12] : 1020.522
## ARIMA(0,1,3)(0,1,1)[12] : 1021.286
## ARIMA(0,1,3)(0,1,2)[12] : 1020.837
## ARIMA(0,1,3)(1,1,0)[12] : 1020.842
## ARIMA(0,1,3)(1,1,1)[12] : 1021.296
## ARIMA(0,1,3)(2,1,0)[12] : 1020.423
## ARIMA(0,1,4)(0,1,0)[12] : 1021.176
## ARIMA(0,1,4)(0,1,1)[12] : 1020.854
## ARIMA(0,1,4)(1,1,0)[12] : 1020.285
## ARIMA(0,1,5)(0,1,0)[12] : 1022.076
## ARIMA(1,1,0)(0,1,0)[12] : 1020.488
## ARIMA(1,1,0)(0,1,1)[12] : 1021.103
## ARIMA(1,1,0)(0,1,2)[12] : 1019.811
## ARIMA(1,1,0)(1,1,0)[12] : 1020.582
## ARIMA(1,1,0)(1,1,1)[12] : Inf
## ARIMA(1,1,0)(1,1,2)[12] : Inf
## ARIMA(1,1,0)(2,1,0)[12] : 1019.557
## ARIMA(1,1,0)(2,1,1)[12] : Inf
## ARIMA(1,1,0)(2,1,2)[12] : Inf
## ARIMA(1,1,1)(0,1,0)[12] : 1022.583
## ARIMA(1,1,1)(0,1,1)[12] : 1023.214
## ARIMA(1,1,1)(0,1,2)[12] : 1021.793
## ARIMA(1,1,1)(1,1,0)[12] : 1022.674
## ARIMA(1,1,1)(1,1,1)[12] : Inf
## ARIMA(1,1,1)(1,1,2)[12] : Inf
## ARIMA(1,1,1)(2,1,0)[12] : 1021.513
## ARIMA(1,1,1)(2,1,1)[12] : Inf
## ARIMA(1,1,2)(0,1,0)[12] : 1024.478
## ARIMA(1,1,2)(0,1,1)[12] : 1025.198
## ARIMA(1,1,2)(0,1,2)[12] : 1023.802
## ARIMA(1,1,2)(1,1,0)[12] : Inf
## ARIMA(1,1,2)(1,1,1)[12] : Inf
## ARIMA(1,1,2)(2,1,0)[12] : 1023.483
## ARIMA(1,1,3)(0,1,0)[12] : 1019.733
## ARIMA(1,1,3)(0,1,1)[12] : 1020.211
## ARIMA(1,1,3)(1,1,0)[12] : 1019.812
## ARIMA(1,1,4)(0,1,0)[12] : Inf
## ARIMA(2,1,0)(0,1,0)[12] : 1022.583
## ARIMA(2,1,0)(0,1,1)[12] : 1023.222
## ARIMA(2,1,0)(0,1,2)[12] : 1021.861
## ARIMA(2,1,0)(1,1,0)[12] : 1022.691
## ARIMA(2,1,0)(1,1,1)[12] : Inf
## ARIMA(2,1,0)(1,1,2)[12] : Inf
## ARIMA(2,1,0)(2,1,0)[12] : 1021.6
## ARIMA(2,1,0)(2,1,1)[12] : Inf
## ARIMA(2,1,1)(0,1,0)[12] : 1018.165
## ARIMA(2,1,1)(0,1,1)[12] : 1018.84
## ARIMA(2,1,1)(0,1,2)[12] : 1018.628
## ARIMA(2,1,1)(1,1,0)[12] : 1018.395
## ARIMA(2,1,1)(1,1,1)[12] : Inf
## ARIMA(2,1,1)(2,1,0)[12] : 1018.336
## ARIMA(2,1,2)(0,1,0)[12] : 1019.771
## ARIMA(2,1,2)(0,1,1)[12] : 1020.613
## ARIMA(2,1,2)(1,1,0)[12] : 1020.224
## ARIMA(2,1,3)(0,1,0)[12] : 1020.474
## ARIMA(3,1,0)(0,1,0)[12] : 1023.984
## ARIMA(3,1,0)(0,1,1)[12] : 1024.921
## ARIMA(3,1,0)(0,1,2)[12] : 1023.827
## ARIMA(3,1,0)(1,1,0)[12] : 1024.484
## ARIMA(3,1,0)(1,1,1)[12] : Inf
## ARIMA(3,1,0)(2,1,0)[12] : 1023.496
## ARIMA(3,1,1)(0,1,0)[12] : 1019.565
## ARIMA(3,1,1)(0,1,1)[12] : 1020.374
## ARIMA(3,1,1)(1,1,0)[12] : 1020.005
## ARIMA(3,1,2)(0,1,0)[12] : Inf
## ARIMA(4,1,0)(0,1,0)[12] : 1022.456
## ARIMA(4,1,0)(0,1,1)[12] : 1022.836
## ARIMA(4,1,0)(1,1,0)[12] : 1022.208
## ARIMA(4,1,1)(0,1,0)[12] : 1024.624
## ARIMA(5,1,0)(0,1,0)[12] : 1024.622
##
##
##
## Best model: ARIMA(2,1,1)(0,1,0)[12]
print(summary(fit_arima))
## Series: Y
## ARIMA(2,1,1)(0,1,0)[12]
##
## Coefficients:
## ar1 ar2 ma1
## 0.5960 0.2143 -0.9819
## s.e. 0.0888 0.0880 0.0292
##
## sigma^2 estimated as 132.3: log likelihood=-504.92
## AIC=1017.85 AICc=1018.17 BIC=1029.35
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.3423 10.84619 7.86754 0.420698 2.800458 0.245628 -0.00124847
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.3423 10.84619 7.86754 0.420698 2.800458 0.245628 -0.00124847
checkresiduals(fit_arima)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,1)(0,1,0)[12]
## Q* = 37.784, df = 21, p-value = 0.01366
##
## Model df: 3. Total lags used: 24
#model misses by 11.50217 on average
# that is around 11 502 Passengers
#the Best model so far is the Exponential Smoothing model
#with an average miss of 39 Passengers this is the lowest Sd by far compared to the
#other two which are in the thousands in terms of average miss
#we will use the ETS to forecast
#Forecast, Since the ETS is fitting the best
fcst<-forecast(fit_ets,h=24,level = 0.95,fan = F,)
autoplot(fcst)
