#library(lubridate)
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
#library(tidyverse)
#library(car)
#library(astsa)
#library(foreign)
#library(timsac)
#library(vars)
#library(lmtest)
#library(mFilter)
#library(dynlm)
#library(nlme)
#library(lmtest)
#library(broom)
#library(kableExtra)
#library(knitr)
#library(MASS)
#library(parallel)
#library(car)
#library(mlogit)
#library(dplyr)
#library(tidyr)
library(forecast)
#library(fpp2)
#library(stats)
#library(quantmod)
?AirPassengers
data("AirPassengers")
Y <- AirPassengers
print(Y)
##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1949 112 118 132 129 121 135 148 148 136 119 104 118
## 1950 115 126 141 135 125 149 170 170 158 133 114 140
## 1951 145 150 178 163 172 178 199 199 184 162 146 166
## 1952 171 180 193 181 183 218 230 242 209 191 172 194
## 1953 196 196 236 235 229 243 264 272 237 211 180 201
## 1954 204 188 235 227 234 264 302 293 259 229 203 229
## 1955 242 233 267 269 270 315 364 347 312 274 237 278
## 1956 284 277 317 313 318 374 413 405 355 306 271 306
## 1957 315 301 356 348 355 422 465 467 404 347 305 336
## 1958 340 318 362 348 363 435 491 505 404 359 310 337
## 1959 360 342 406 396 420 472 548 559 463 407 362 405
## 1960 417 391 419 461 472 535 622 606 508 461 390 432
##PASO 1. 
plot(Y,  main="Serie de tiempo", ylab="AirPassengers", col="red")

componente = decompose(Y, type = "additive")

tend <- componente$trend #m_t
estac <- componente$seasonal #s_t
error <- componente$random #z_t

plot(componente)

##PASO 2.
ndiffs(Y)
## [1] 1
serielog=log(Y)
serielog
##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 1949 4.718499 4.770685 4.882802 4.859812 4.795791 4.905275 4.997212 4.997212
## 1950 4.744932 4.836282 4.948760 4.905275 4.828314 5.003946 5.135798 5.135798
## 1951 4.976734 5.010635 5.181784 5.093750 5.147494 5.181784 5.293305 5.293305
## 1952 5.141664 5.192957 5.262690 5.198497 5.209486 5.384495 5.438079 5.488938
## 1953 5.278115 5.278115 5.463832 5.459586 5.433722 5.493061 5.575949 5.605802
## 1954 5.318120 5.236442 5.459586 5.424950 5.455321 5.575949 5.710427 5.680173
## 1955 5.488938 5.451038 5.587249 5.594711 5.598422 5.752573 5.897154 5.849325
## 1956 5.648974 5.624018 5.758902 5.746203 5.762051 5.924256 6.023448 6.003887
## 1957 5.752573 5.707110 5.874931 5.852202 5.872118 6.045005 6.142037 6.146329
## 1958 5.828946 5.762051 5.891644 5.852202 5.894403 6.075346 6.196444 6.224558
## 1959 5.886104 5.834811 6.006353 5.981414 6.040255 6.156979 6.306275 6.326149
## 1960 6.033086 5.968708 6.037871 6.133398 6.156979 6.282267 6.432940 6.406880
##           Sep      Oct      Nov      Dec
## 1949 4.912655 4.779123 4.644391 4.770685
## 1950 5.062595 4.890349 4.736198 4.941642
## 1951 5.214936 5.087596 4.983607 5.111988
## 1952 5.342334 5.252273 5.147494 5.267858
## 1953 5.468060 5.351858 5.192957 5.303305
## 1954 5.556828 5.433722 5.313206 5.433722
## 1955 5.743003 5.613128 5.468060 5.627621
## 1956 5.872118 5.723585 5.602119 5.723585
## 1957 6.001415 5.849325 5.720312 5.817111
## 1958 6.001415 5.883322 5.736572 5.820083
## 1959 6.137727 6.008813 5.891644 6.003887
## 1960 6.230481 6.133398 5.966147 6.068426
plot(serielog) #We visualize if it is stationary or not. It is.

adf.test(Y) #Does a Dickey Fuller Test that tells you if the data is stationary or not
## Warning in adf.test(Y): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  Y
## Dickey-Fuller = -7.3186, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
#If p > 0.05 then it is not stationary
#It is stationary p =0.01

seriedif=diff(Y) #It does substraction between two points to make it stationary
plot(seriedif)

##PASO 3.
acf(seriedif) #Autocorrelation plot

adf.test(seriedif)
## Warning in adf.test(seriedif): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  seriedif
## Dickey-Fuller = -7.0177, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
plot(seriedif, type="o", lty="dashed",main="Serie de Tiempo",col="blue")

#With the autocorrelation plot and parcial autocorrelation plot you can get p and q
par(mfrow=c(2,1), mar=c(4,4,4,1)+.1) #Par to graph in the same canvas
acf(seriedif)#tells you q - number of moving averages
pacf(seriedif)#tells you p - number of autoregressives

acf(ts(seriedif, frequency=1))
pacf(ts(seriedif, frequency=1))

#PASO 4. 
modelo1.1=arima(Y,order=c(1,2,1)) #(p,d,q) 
summary(modelo1.1)
## 
## Call:
## arima(x = Y, order = c(1, 2, 1))
## 
## Coefficients:
##          ar1      ma1
##       0.3129  -1.0000
## s.e.  0.0805   0.0176
## 
## sigma^2 estimated as 1034:  log likelihood = -696.46,  aic = 1398.93
## 
## Training set error measures:
##                       ME    RMSE      MAE        MPE     MAPE      MASE
## Training set -0.02226461 31.9296 24.74944 -0.3318117 8.700005 0.9570499
##                    ACF1
## Training set 0.06233016
tsdiag(modelo1.1) # this should look like white noise.

Box.test(residuals(modelo1.1),type="Ljung-Box") # if p > 0.05 then there's white noise, that means the model is good
## 
##  Box-Ljung test
## 
## data:  residuals(modelo1.1)
## X-squared = 0.57118, df = 1, p-value = 0.4498
#p = 0.449, so it is white noise 

error=residuals(modelo1.1) 
plot(error) #error mean = zero

mean(error)
## [1] -0.02226461
##PASO 5.
#pronóstico para 3 años
pronostico= predict(modelo1.1, n.ahead=36)
print(pronostico)
## $pred
##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 1961 446.7759 453.0320 456.6220 459.3776 461.8720 464.2848 466.6720 469.0511
## 1962 480.9304 483.3059 485.6815 488.0570 490.4325 492.8080 495.1836 497.5591
## 1963 509.4367 511.8122 514.1878 516.5633 518.9388 521.3143 523.6899 526.0654
##           Sep      Oct      Nov      Dec
## 1961 471.4278 473.8037 476.1793 478.5549
## 1962 499.9346 502.3101 504.6857 507.0612
## 1963 528.4409 530.8164 533.1920 535.5675
## 
## $se
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 1961  32.26514  53.42731  70.52408  84.93566  97.53941 108.87345 119.27385
## 1962 170.61589 178.03653 185.24218 192.25703 199.10124 205.79183 212.34333
## 1963 249.32983 255.18198 260.96122 266.67229 272.31949 277.90670 283.43747
##            Aug       Sep       Oct       Nov       Dec
## 1961 128.95901 138.07726 146.73363 155.00534 162.95091
## 1962 218.76823 225.07739 231.28025 237.38517 243.39951
## 1963 288.91505 294.34240 299.72226 305.05713 310.34934
plot(pronostico$pred, pronostico$se)