data(AirPassengers)
class(AirPassengers)
## [1] "ts"
#This tells you that the data series is in a time series format
start(AirPassengers)
## [1] 1949 1
end(AirPassengers)
## [1] 1960 12
frequency(AirPassengers)
## [1] 12
library(tseries)
plot(AirPassengers)
abline(reg=lm(AirPassengers~time(AirPassengers)))

cycle(AirPassengers)
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1949 1 2 3 4 5 6 7 8 9 10 11 12
## 1950 1 2 3 4 5 6 7 8 9 10 11 12
## 1951 1 2 3 4 5 6 7 8 9 10 11 12
## 1952 1 2 3 4 5 6 7 8 9 10 11 12
## 1953 1 2 3 4 5 6 7 8 9 10 11 12
## 1954 1 2 3 4 5 6 7 8 9 10 11 12
## 1955 1 2 3 4 5 6 7 8 9 10 11 12
## 1956 1 2 3 4 5 6 7 8 9 10 11 12
## 1957 1 2 3 4 5 6 7 8 9 10 11 12
## 1958 1 2 3 4 5 6 7 8 9 10 11 12
## 1959 1 2 3 4 5 6 7 8 9 10 11 12
## 1960 1 2 3 4 5 6 7 8 9 10 11 12
plot(aggregate(AirPassengers,FUN=mean))

#This will aggregate the cycles and display a year on year trend
boxplot(AirPassengers~cycle(AirPassengers))

#Box plot across months will give us a sense on seasonal effect
#ARIMA Models
adf.test(diff(log(AirPassengers)), alternative="stationary", k=0)
## Warning in adf.test(diff(log(AirPassengers)), alternative = "stationary", :
## p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff(log(AirPassengers))
## Dickey-Fuller = -9.6003, Lag order = 0, p-value = 0.01
## alternative hypothesis: stationary
# We see that the series is stationary enough to do any kind of time series modelling.
acf(log(AirPassengers))

#Clearly, the decay of ACF chart is very slow, which means that the population is not stationary.
# We have already discussed above that we now intend to regress on the difference of logs rather than log directly.
acf(diff(log(AirPassengers)))

pacf(diff(log(AirPassengers)))

# Clearly, ACF plot cuts off after the first lag.
#Hence, we understood that value of p should be 0 as the ACF is the curve getting a cut off.
#While value of q should be 1 or 2. After a few iterations, we found that (0,1,1) as (p,d,q) comes out to be the combination with least AIC and BIC.
(fit <- arima(log(AirPassengers), c(0, 1, 1),seasonal = list(order = c(0, 1, 1), period = 12)))
##
## Call:
## arima(x = log(AirPassengers), order = c(0, 1, 1), seasonal = list(order = c(0,
## 1, 1), period = 12))
##
## Coefficients:
## ma1 sma1
## -0.4018 -0.5569
## s.e. 0.0896 0.0731
##
## sigma^2 estimated as 0.001348: log likelihood = 244.7, aic = -483.4
pred <- predict(fit, n.ahead = 10*12)
ts.plot(AirPassengers,2.718^pred$pred, log = "y", lty = c(1,3))
