library(datasets)
df<-AirPassengers
?AirPassengers
## starting httpd help server ... done
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo 
## -- Attaching packages ---------------------------------------------- fpp2 2.4 --
## v ggplot2   3.3.3     v fma       2.4  
## v forecast  8.13      v expsmooth 2.3
Y<-ts(df,frequency = 12,start = c(1949,1),end = c(1960,12))

#Time plot of passengers by years
{autoplot(Y)+
  ggtitle("TIME SERIES: Air Line Passengers by years")+
  ylab("Number of Passengers:In Thousands")+
    geom_smooth()
}
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#the data has a strong trend and is positive with the number of passengers increasing with years
#the data also seems to suggest seasonality since there are peaks and dips in the trend


#checking just the trend
plot(aggregate(Y,FUN = mean),
     ylab="Number of Passengers:In Thoudans",
     xlab="Years",
     main="Trend of passengers by Years")
abline(reg = lm(df~time(Y)),col="blue")#mean of the time series analysis

#take the first difference of the data to remove the trend
#we have to try and investigate seasonality

frequency(Y)
## [1] 12
#this will print the time cycle across years
cycle(Y)
##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1949   1   2   3   4   5   6   7   8   9  10  11  12
## 1950   1   2   3   4   5   6   7   8   9  10  11  12
## 1951   1   2   3   4   5   6   7   8   9  10  11  12
## 1952   1   2   3   4   5   6   7   8   9  10  11  12
## 1953   1   2   3   4   5   6   7   8   9  10  11  12
## 1954   1   2   3   4   5   6   7   8   9  10  11  12
## 1955   1   2   3   4   5   6   7   8   9  10  11  12
## 1956   1   2   3   4   5   6   7   8   9  10  11  12
## 1957   1   2   3   4   5   6   7   8   9  10  11  12
## 1958   1   2   3   4   5   6   7   8   9  10  11  12
## 1959   1   2   3   4   5   6   7   8   9  10  11  12
## 1960   1   2   3   4   5   6   7   8   9  10  11  12
#box plot across months will give us a sense on seasonal effect
{boxplot(df~cycle(Y),col=rainbow(12),
        main="Box Plot of Passengers By Months",
        xlab="Months:1=JAN,...etc.",
        ylab = "Number of Passengers:In Thousands")
}

#the data suggest that we have more passengers on average during July and August,with
#June being the 3rd highest in terms of average passengers
#and the lowest in Feb and Nov

#AR I MA
#p  d  q



DY<<-diff(Y)
#time plot of difference data
autoplot(DY)+
  ggtitle("Time Plot:Passengers by years")+
  ylab("Thousands")

{
  ggseasonplot(DY)+
    ggtitle("Seasonal Plot:Change in Passengers per day")
  }

#we see a sharp drop from july to september every year from about
# 75 000 passengers in July To around -100 000 in september.
#but overal passengers pick up from september every year till Dec
#with 1958,59 and 1960 seeing the lowest drop in airline passenger

#another seasonal plot
# seasonal subseries plot

ggsubseriesplot(DY)

#it verifies that in july we have the highest number of passengers and by septmber
#we experience a shortage of passengers in the excess of 100 000



#Forecast with Various Methods
#use a benchmark method
#seasonal naive method as our benchmark

fit<-snaive(DY)     #Residual sd: 12.3109 or the model misses on average by
print(summary(fit)) #12,3K Passengers
## 
## Forecast method: Seasonal naive method
## 
## Model Information:
## Call: snaive(y = DY) 
## 
## Residual sd: 12.3109 
## 
## Error measures:
##                     ME     RMSE      MAE  MPE MAPE MASE       ACF1
## Training set 0.1832061 12.31086 9.419847 -Inf  Inf    1 -0.3098146
## 
## Forecasts:
##          Point Forecast       Lo 80       Hi 80       Lo 95      Hi 95
## Jan 1961             12   -3.777001  27.7770008  -12.128840  36.128840
## Feb 1961            -26  -41.777001 -10.2229992  -50.128840  -1.871160
## Mar 1961             28   12.222999  43.7770008    3.871160  52.128840
## Apr 1961             42   26.222999  57.7770008   17.871160  66.128840
## May 1961             11   -4.777001  26.7770008  -13.128840  35.128840
## Jun 1961             63   47.222999  78.7770008   38.871160  87.128840
## Jul 1961             87   71.222999 102.7770008   62.871160 111.128840
## Aug 1961            -16  -31.777001  -0.2229992  -40.128840   8.128840
## Sep 1961            -98 -113.777001 -82.2229992 -122.128840 -73.871160
## Oct 1961            -47  -62.777001 -31.2229992  -71.128840 -22.871160
## Nov 1961            -71  -86.777001 -55.2229992  -95.128840 -46.871160
## Dec 1961             42   26.222999  57.7770008   17.871160  66.128840
## Jan 1962             12  -10.312048  34.3120484  -22.123333  46.123333
## Feb 1962            -26  -48.312048  -3.6879516  -60.123333   8.123333
## Mar 1962             28    5.687952  50.3120484   -6.123333  62.123333
## Apr 1962             42   19.687952  64.3120484    7.876667  76.123333
## May 1962             11  -11.312048  33.3120484  -23.123333  45.123333
## Jun 1962             63   40.687952  85.3120484   28.876667  97.123333
## Jul 1962             87   64.687952 109.3120484   52.876667 121.123333
## Aug 1962            -16  -38.312048   6.3120484  -50.123333  18.123333
## Sep 1962            -98 -120.312048 -75.6879516 -132.123333 -63.876667
## Oct 1962            -47  -69.312048 -24.6879516  -81.123333 -12.876667
## Nov 1962            -71  -93.312048 -48.6879516 -105.123333 -36.876667
## Dec 1962             42   19.687952  64.3120484    7.876667  76.123333
##          Point Forecast       Lo 80       Hi 80       Lo 95      Hi 95
## Jan 1961             12   -3.777001  27.7770008  -12.128840  36.128840
## Feb 1961            -26  -41.777001 -10.2229992  -50.128840  -1.871160
## Mar 1961             28   12.222999  43.7770008    3.871160  52.128840
## Apr 1961             42   26.222999  57.7770008   17.871160  66.128840
## May 1961             11   -4.777001  26.7770008  -13.128840  35.128840
## Jun 1961             63   47.222999  78.7770008   38.871160  87.128840
## Jul 1961             87   71.222999 102.7770008   62.871160 111.128840
## Aug 1961            -16  -31.777001  -0.2229992  -40.128840   8.128840
## Sep 1961            -98 -113.777001 -82.2229992 -122.128840 -73.871160
## Oct 1961            -47  -62.777001 -31.2229992  -71.128840 -22.871160
## Nov 1961            -71  -86.777001 -55.2229992  -95.128840 -46.871160
## Dec 1961             42   26.222999  57.7770008   17.871160  66.128840
## Jan 1962             12  -10.312048  34.3120484  -22.123333  46.123333
## Feb 1962            -26  -48.312048  -3.6879516  -60.123333   8.123333
## Mar 1962             28    5.687952  50.3120484   -6.123333  62.123333
## Apr 1962             42   19.687952  64.3120484    7.876667  76.123333
## May 1962             11  -11.312048  33.3120484  -23.123333  45.123333
## Jun 1962             63   40.687952  85.3120484   28.876667  97.123333
## Jul 1962             87   64.687952 109.3120484   52.876667 121.123333
## Aug 1962            -16  -38.312048   6.3120484  -50.123333  18.123333
## Sep 1962            -98 -120.312048 -75.6879516 -132.123333 -63.876667
## Oct 1962            -47  -69.312048 -24.6879516  -81.123333 -12.876667
## Nov 1962            -71  -93.312048 -48.6879516 -105.123333 -36.876667
## Dec 1962             42   19.687952  64.3120484    7.876667  76.123333
checkresiduals(fit)

## 
##  Ljung-Box test
## 
## data:  Residuals from Seasonal naive method
## Q* = 51.362, df = 24, p-value = 0.0009468
## 
## Model df: 0.   Total lags used: 24
#2 Exponential Smoothing Models (ETS)
#fit ETS method

                    #ETS misses on average by 39.2 passengers
fit_ets<-ets(Y)   #sigma: 0.0392 same measure as Sd, this is smaller so it fits better
print(summary(fit_ets))
## ETS(M,Ad,M) 
## 
## Call:
##  ets(y = Y) 
## 
##   Smoothing parameters:
##     alpha = 0.7096 
##     beta  = 0.0204 
##     gamma = 1e-04 
##     phi   = 0.98 
## 
##   Initial states:
##     l = 120.9939 
##     b = 1.7705 
##     s = 0.8944 0.7993 0.9217 1.0592 1.2203 1.2318
##            1.1105 0.9786 0.9804 1.011 0.8869 0.9059
## 
##   sigma:  0.0392
## 
##      AIC     AICc      BIC 
## 1395.166 1400.638 1448.623 
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE     MAPE      MASE       ACF1
## Training set 1.567359 10.74726 7.791605 0.4357799 2.857917 0.2432573 0.03945056
##                    ME     RMSE      MAE       MPE     MAPE      MASE       ACF1
## Training set 1.567359 10.74726 7.791605 0.4357799 2.857917 0.2432573 0.03945056
checkresiduals(fit_ets)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(M,Ad,M)
## Q* = 51.341, df = 7, p-value = 7.871e-09
## 
## Model df: 17.   Total lags used: 24
#Arima Model
#we could use the diff data to get rid of the trend and tell arima that
# theres seasonality going on
# or we could

fit_arima<-auto.arima(Y,d=1,D=1,stepwise = F,approximation = F,trace = T)
## 
##  ARIMA(0,1,0)(0,1,0)[12]                    : 1031.539
##  ARIMA(0,1,0)(0,1,1)[12]                    : 1030.846
##  ARIMA(0,1,0)(0,1,2)[12]                    : 1032.465
##  ARIMA(0,1,0)(1,1,0)[12]                    : 1030.501
##  ARIMA(0,1,0)(1,1,1)[12]                    : 1032.317
##  ARIMA(0,1,0)(1,1,2)[12]                    : 1034.414
##  ARIMA(0,1,0)(2,1,0)[12]                    : 1032.309
##  ARIMA(0,1,0)(2,1,1)[12]                    : 1034.423
##  ARIMA(0,1,0)(2,1,2)[12]                    : Inf
##  ARIMA(0,1,1)(0,1,0)[12]                    : 1020.733
##  ARIMA(0,1,1)(0,1,1)[12]                    : 1021.192
##  ARIMA(0,1,1)(0,1,2)[12]                    : 1019.812
##  ARIMA(0,1,1)(1,1,0)[12]                    : 1020.614
##  ARIMA(0,1,1)(1,1,1)[12]                    : Inf
##  ARIMA(0,1,1)(1,1,2)[12]                    : Inf
##  ARIMA(0,1,1)(2,1,0)[12]                    : 1019.496
##  ARIMA(0,1,1)(2,1,1)[12]                    : Inf
##  ARIMA(0,1,1)(2,1,2)[12]                    : Inf
##  ARIMA(0,1,2)(0,1,0)[12]                    : 1022.816
##  ARIMA(0,1,2)(0,1,1)[12]                    : 1023.319
##  ARIMA(0,1,2)(0,1,2)[12]                    : 1021.934
##  ARIMA(0,1,2)(1,1,0)[12]                    : 1022.742
##  ARIMA(0,1,2)(1,1,1)[12]                    : Inf
##  ARIMA(0,1,2)(1,1,2)[12]                    : Inf
##  ARIMA(0,1,2)(2,1,0)[12]                    : 1021.628
##  ARIMA(0,1,2)(2,1,1)[12]                    : Inf
##  ARIMA(0,1,3)(0,1,0)[12]                    : 1020.522
##  ARIMA(0,1,3)(0,1,1)[12]                    : 1021.286
##  ARIMA(0,1,3)(0,1,2)[12]                    : 1020.837
##  ARIMA(0,1,3)(1,1,0)[12]                    : 1020.842
##  ARIMA(0,1,3)(1,1,1)[12]                    : 1021.296
##  ARIMA(0,1,3)(2,1,0)[12]                    : 1020.423
##  ARIMA(0,1,4)(0,1,0)[12]                    : 1021.176
##  ARIMA(0,1,4)(0,1,1)[12]                    : 1020.854
##  ARIMA(0,1,4)(1,1,0)[12]                    : 1020.285
##  ARIMA(0,1,5)(0,1,0)[12]                    : 1022.076
##  ARIMA(1,1,0)(0,1,0)[12]                    : 1020.488
##  ARIMA(1,1,0)(0,1,1)[12]                    : 1021.103
##  ARIMA(1,1,0)(0,1,2)[12]                    : 1019.811
##  ARIMA(1,1,0)(1,1,0)[12]                    : 1020.582
##  ARIMA(1,1,0)(1,1,1)[12]                    : Inf
##  ARIMA(1,1,0)(1,1,2)[12]                    : Inf
##  ARIMA(1,1,0)(2,1,0)[12]                    : 1019.557
##  ARIMA(1,1,0)(2,1,1)[12]                    : Inf
##  ARIMA(1,1,0)(2,1,2)[12]                    : Inf
##  ARIMA(1,1,1)(0,1,0)[12]                    : 1022.583
##  ARIMA(1,1,1)(0,1,1)[12]                    : 1023.214
##  ARIMA(1,1,1)(0,1,2)[12]                    : 1021.793
##  ARIMA(1,1,1)(1,1,0)[12]                    : 1022.674
##  ARIMA(1,1,1)(1,1,1)[12]                    : Inf
##  ARIMA(1,1,1)(1,1,2)[12]                    : Inf
##  ARIMA(1,1,1)(2,1,0)[12]                    : 1021.513
##  ARIMA(1,1,1)(2,1,1)[12]                    : Inf
##  ARIMA(1,1,2)(0,1,0)[12]                    : 1024.478
##  ARIMA(1,1,2)(0,1,1)[12]                    : 1025.198
##  ARIMA(1,1,2)(0,1,2)[12]                    : 1023.802
##  ARIMA(1,1,2)(1,1,0)[12]                    : Inf
##  ARIMA(1,1,2)(1,1,1)[12]                    : Inf
##  ARIMA(1,1,2)(2,1,0)[12]                    : 1023.483
##  ARIMA(1,1,3)(0,1,0)[12]                    : 1019.733
##  ARIMA(1,1,3)(0,1,1)[12]                    : 1020.211
##  ARIMA(1,1,3)(1,1,0)[12]                    : 1019.812
##  ARIMA(1,1,4)(0,1,0)[12]                    : Inf
##  ARIMA(2,1,0)(0,1,0)[12]                    : 1022.583
##  ARIMA(2,1,0)(0,1,1)[12]                    : 1023.222
##  ARIMA(2,1,0)(0,1,2)[12]                    : 1021.861
##  ARIMA(2,1,0)(1,1,0)[12]                    : 1022.691
##  ARIMA(2,1,0)(1,1,1)[12]                    : Inf
##  ARIMA(2,1,0)(1,1,2)[12]                    : Inf
##  ARIMA(2,1,0)(2,1,0)[12]                    : 1021.6
##  ARIMA(2,1,0)(2,1,1)[12]                    : Inf
##  ARIMA(2,1,1)(0,1,0)[12]                    : 1018.165
##  ARIMA(2,1,1)(0,1,1)[12]                    : 1018.84
##  ARIMA(2,1,1)(0,1,2)[12]                    : 1018.628
##  ARIMA(2,1,1)(1,1,0)[12]                    : 1018.395
##  ARIMA(2,1,1)(1,1,1)[12]                    : Inf
##  ARIMA(2,1,1)(2,1,0)[12]                    : 1018.336
##  ARIMA(2,1,2)(0,1,0)[12]                    : 1019.771
##  ARIMA(2,1,2)(0,1,1)[12]                    : 1020.613
##  ARIMA(2,1,2)(1,1,0)[12]                    : 1020.224
##  ARIMA(2,1,3)(0,1,0)[12]                    : 1020.474
##  ARIMA(3,1,0)(0,1,0)[12]                    : 1023.984
##  ARIMA(3,1,0)(0,1,1)[12]                    : 1024.921
##  ARIMA(3,1,0)(0,1,2)[12]                    : 1023.827
##  ARIMA(3,1,0)(1,1,0)[12]                    : 1024.484
##  ARIMA(3,1,0)(1,1,1)[12]                    : Inf
##  ARIMA(3,1,0)(2,1,0)[12]                    : 1023.496
##  ARIMA(3,1,1)(0,1,0)[12]                    : 1019.565
##  ARIMA(3,1,1)(0,1,1)[12]                    : 1020.374
##  ARIMA(3,1,1)(1,1,0)[12]                    : 1020.005
##  ARIMA(3,1,2)(0,1,0)[12]                    : Inf
##  ARIMA(4,1,0)(0,1,0)[12]                    : 1022.456
##  ARIMA(4,1,0)(0,1,1)[12]                    : 1022.836
##  ARIMA(4,1,0)(1,1,0)[12]                    : 1022.208
##  ARIMA(4,1,1)(0,1,0)[12]                    : 1024.624
##  ARIMA(5,1,0)(0,1,0)[12]                    : 1024.622
## 
## 
## 
##  Best model: ARIMA(2,1,1)(0,1,0)[12]
print(summary(fit_arima))
## Series: Y 
## ARIMA(2,1,1)(0,1,0)[12] 
## 
## Coefficients:
##          ar1     ar2      ma1
##       0.5960  0.2143  -0.9819
## s.e.  0.0888  0.0880   0.0292
## 
## sigma^2 estimated as 132.3:  log likelihood=-504.92
## AIC=1017.85   AICc=1018.17   BIC=1029.35
## 
## Training set error measures:
##                  ME     RMSE     MAE      MPE     MAPE     MASE        ACF1
## Training set 1.3423 10.84619 7.86754 0.420698 2.800458 0.245628 -0.00124847
##                  ME     RMSE     MAE      MPE     MAPE     MASE        ACF1
## Training set 1.3423 10.84619 7.86754 0.420698 2.800458 0.245628 -0.00124847
checkresiduals(fit_arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,1,1)(0,1,0)[12]
## Q* = 37.784, df = 21, p-value = 0.01366
## 
## Model df: 3.   Total lags used: 24
#model misses by 11.50217 on average
# that is around 11 502 Passengers

#the Best model so far is the Exponential Smoothing model
#with an average miss of 39 Passengers this is the lowest Sd by far compared to the 
#other two which are in the thousands in terms of average miss

#we will use the ETS to forecast

#Forecast, Since the ETS is fitting the best

fcst<-forecast(fit_ets,h=24,level = 0.95,fan = F,)
autoplot(fcst)