set.seed(12345)
stationary_ts <- arima.sim(model = list(order = c(1,0,0),
ar = 0.5 ), n = 500)
library(TSstudio)
ts_plot(stationary_ts,
title = "Stationary Time Series",
Ytitle = "Value",
Xtitle = "Index")
set.seed(12345)
non_stationary_ts <- arima.sim(model = list(order = c(1,1,0),ar = 0.3 ),n = 500)
ts_plot(non_stationary_ts,
title = "Non-Stationary Time Series",
Ytitle = "Value",
Xtitle = "Index")
##Por otro lado, en este ejemplo se viola la condición de estacionariedad ya que presenta una tendencia a lo largo del tiempo, lo que significa que está cambiando con el tiempo. Consideramos que los datos de una serie de tiempo son no estacionarios siempre que no se cumplan esas condiciones.
data(AirPassengers)
ts_plot(AirPassengers,
title = "Monthly Airline Passenger Numbers 1949-1960",
Ytitle = "Thousands of Passengers",
Xtitle = "Year")
ts_plot(diff(AirPassengers, lag = 1),
title = "AirPassengers Series - First Differencing",
Xtitle = "Year",
Ytitle = "Differencing of Thousands of Passengers")
ts_plot(diff(diff(AirPassengers, lag = 1), 12),
title = "AirPassengers Series - First and Seasonal Differencing",
Xtitle = "Year",
Ytitle = "Differencing of Thousands of Passengers")
##Ahora ya logramnos obtener una serie que parece estacionaria, ya que la diferencia estacionaria pudo trabajar correctamente y estabilizar la variación de la serie.
ts_plot(diff(log(AirPassengers), lag = 1),
title = "AirPassengers Series - First Differencing with Log Transformation",
Xtitle = "Year",
Ytitle = "Differencing/Log of Thousands of Passengers")
set.seed(12345)
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
p1 <- plot_ly()
p2 <- plot_ly()
for(i in 1:20){
rm <- NULL
rw <- arima.sim(model = list(order = c(0, 1, 0)), n = 500)
p1 <- p1 %>% add_lines(x = time(rw), y = as.numeric(rw))
p2 <- p2 %>% add_lines(x = time(diff(rw)), y = as.numeric(diff(rw)))
}
p1 %>% layout(title = "Simulate Random Walk",
yaxis = list(title = "Value"),
xaxis = list(title = "Index")) %>%
hide_legend()
p2 %>% layout(title = "Simulate Random Walk with First-Order Differencing",
yaxis = list(title = "Value"),
xaxis = list(title = "Index")) %>%
hide_legend()
set.seed(12345)
ar2 <- arima.sim(model = list(order = c(2,0,0),
ar = c(0.9, -0.3)),
n = 500)
ts_plot(ar2,
title = "Simulate AR(2) Series",
Ytitle = "Value",
Xtitle = "Index")
md_ar <- ar(ar2)
md_ar
##
## Call:
## ar(x = ar2)
##
## Coefficients:
## 1 2
## 0.8851 -0.2900
##
## Order selected 2 sigma^2 estimated as 1.049
par(mfrow=c(1,2))
acf(ar2)
pacf(ar2)
set.seed(12345)
ma2 <- arima.sim(model = list(order = c(0, 0, 2),
ma = c(0.5, -0.3)),
n = 500)
ts_plot(ma2,
title = "Simulate MA(2) Series",
Ytitle = "Value",
Xtitle = "Index")
md_ma <- arima(ma2, order = c(0,0,2), method = "ML")
md_ma
##
## Call:
## arima(x = ma2, order = c(0, 0, 2), method = "ML")
##
## Coefficients:
## ma1 ma2 intercept
## 0.530 -0.3454 0.0875
## s.e. 0.041 0.0406 0.0525
##
## sigma^2 estimated as 0.9829: log likelihood = -705.81, aic = 1419.62
par(mfrow=c(1,2))
acf(ma2)
pacf(ma2)
set.seed(12345)
arma <- arima.sim(model = list(order(1,0,2), ar = c(0.7), ma = c(0.5,-0.3)), n = 500)
ts_plot(arma,
title = "Simulate ARMA(1,2) Series",
Ytitle = "Value",
Xtitle = "Index")
arma_md <- arima(arma, order = c(1,0,2))
arma_md
##
## Call:
## arima(x = arma, order = c(1, 0, 2))
##
## Coefficients:
## ar1 ma1 ma2 intercept
## 0.7439 0.4785 -0.3954 0.2853
## s.e. 0.0657 0.0878 0.0849 0.1891
##
## sigma^2 estimated as 1.01: log likelihood = -713.18, aic = 1436.36
par(mfrow=c(1,2))
acf(arma)
pacf(arma)
arima(arma, order = c(1, 0, 1), include.mean = FALSE)
##
## Call:
## arima(x = arma, order = c(1, 0, 1), include.mean = FALSE)
##
## Coefficients:
## ar1 ma1
## 0.4144 0.8864
## s.e. 0.0432 0.0248
##
## sigma^2 estimated as 1.051: log likelihood = -723, aic = 1452
arima(arma, order = c(2, 0, 1), include.mean = FALSE)
##
## Call:
## arima(x = arma, order = c(2, 0, 1), include.mean = FALSE)
##
## Coefficients:
## ar1 ar2 ma1
## 0.3136 0.1918 0.9227
## s.e. 0.0486 0.0484 0.0183
##
## sigma^2 estimated as 1.019: log likelihood = -715.26, aic = 1438.52
arima(arma, order = c(1, 0, 2), include.mean = FALSE)
##
## Call:
## arima(x = arma, order = c(1, 0, 2), include.mean = FALSE)
##
## Coefficients:
## ar1 ma1 ma2
## 0.7602 0.4654 -0.4079
## s.e. 0.0626 0.0858 0.0832
##
## sigma^2 estimated as 1.014: log likelihood = -714.27, aic = 1436.54
arima(arma, order = c(2, 0, 2), include.mean = FALSE)
##
## Call:
## arima(x = arma, order = c(2, 0, 2), include.mean = FALSE)
##
## Coefficients:
## ar1 ar2 ma1 ma2
## 0.7239 0.0194 0.4997 -0.3783
## s.e. 0.2458 0.1257 0.2427 0.2134
##
## sigma^2 estimated as 1.014: log likelihood = -714.26, aic = 1438.51
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
best_arma <- arima(arma, order = c(1, 0, 2), include.mean = FALSE)
checkresiduals(best_arma)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,2) with zero mean
## Q* = 5.3129, df = 7, p-value = 0.6218
##
## Model df: 3. Total lags used: 10
ar_fc <- forecast(md_ar, h = 100)
plot_forecast(ar_fc,
title = "Forecast AR(2) Model",
Ytitle = "Value",
Xtitle = "Year")
data(Coffee_Prices)
robusta_price <- window(Coffee_Prices[,1], start = c(2000, 1))
ts_plot(robusta_price,
title = "The Robusta Coffee Monthly Prices",
Ytitle = "Price in USD",
Xtitle = "Year")
acf(robusta_price)
robusta_price_d1 <- diff(robusta_price)
par(mfrow=c(1,2))
acf(robusta_price_d1)
pacf(robusta_price_d1)
robusta_md <- arima(robusta_price, order = c(1, 1, 0))
summary(robusta_md)
##
## Call:
## arima(x = robusta_price, order = c(1, 1, 0))
##
## Coefficients:
## ar1
## 0.2780
## s.e. 0.0647
##
## sigma^2 estimated as 0.007142: log likelihood = 231.38, aic = -458.76
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.002595604 0.08432096 0.06494772 0.08104715 4.254984 1.001542
## ACF1
## Training set 0.001526295
checkresiduals(robusta_md)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,0)
## Q* = 26.896, df = 23, p-value = 0.2604
##
## Model df: 1. Total lags used: 24
data(USgas)
ts_plot(USgas,
title = "US Monthly Natural Gas consumption",
Ytitle = "Billion Cubic Feet",
Xtitle = "Year")
USgas_split <- ts_split(USgas, sample.out = 12)
train <- USgas_split$train
test <- USgas_split$test
par(mfrow=c(1,2))
acf(train, lag.max = 60)
pacf(train, lag.max = 60)
USgas_d12 <- diff(train, 12)
ts_plot(USgas_d12,
title = "US Monthly Natural Gas consumption - First Seasonal Difference",
Ytitle = "Billion Cubic Feet (First Difference)",
Xtitle = "Year")
USgas_d12_1 <- diff(diff(USgas_d12, 1))
ts_plot(USgas_d12_1,
title = "US Monthly Natural Gas consumption - First Seasonal and Non-Seasonal Differencing",
Ytitle = "Billion Cubic Feet (Difference)",
Xtitle = "Year")
par(mfrow=c(1,2))
acf(USgas_d12_1, lag.max = 60)
pacf(USgas_d12_1, lag.max = 60)
p <- q <- P <- Q <- 0:2
arima_grid <- expand.grid(p,q,P,Q)
names(arima_grid) <- c("p", "q", "P", "Q")
arima_grid$d <- 1
arima_grid$D <- 1
arima_grid$k <- rowSums(arima_grid)
USgas_best_md <- arima(train, order = c(1,1,1), seasonal = list(order = c(2,1,1)))
USgas_best_md
##
## Call:
## arima(x = train, order = c(1, 1, 1), seasonal = list(order = c(2, 1, 1)))
##
## Coefficients:
## ar1 ma1 sar1 sar2 sma1
## 0.4247 -0.9180 0.0132 -0.2639 -0.7449
## s.e. 0.0770 0.0376 0.0894 0.0834 0.0753
##
## sigma^2 estimated as 10160: log likelihood = -1292.96, aic = 2597.91
USgas_test_fc <- forecast(USgas_best_md, h = 12)
accuracy(USgas_test_fc, test)
## ME RMSE MAE MPE MAPE MASE
## Training set 6.081099 97.85701 73.36854 0.1298714 3.517097 0.6371821
## Test set 42.211253 104.79281 83.09943 1.4913412 3.314280 0.7216918
## ACF1 Theil's U
## Training set 0.004565602 NA
## Test set -0.049999868 0.3469228
test_forecast(USgas,
forecast.obj = USgas_test_fc,
test = test)
final_md <- arima(USgas, order = c(1,1,1), seasonal = list(order = c(2,1,1)))
checkresiduals(final_md)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,1)(2,1,1)[12]
## Q* = 30.173, df = 19, p-value = 0.04964
##
## Model df: 5. Total lags used: 24
USgas_fc <- forecast(final_md, h = 12)
plot_forecast(USgas_fc,
title = "US Natural Gas Consumption - Forecast",
Ytitle = "Billion Cubic Feet",
Xtitle = "Year")
USgas_auto_md1 <- auto.arima(train)
USgas_auto_md1
## Series: train
## ARIMA(2,1,1)(2,1,1)[12]
##
## Coefficients:
## ar1 ar2 ma1 sar1 sar2 sma1
## 0.4301 -0.0372 -0.9098 0.0117 -0.2673 -0.7431
## s.e. 0.0794 0.0741 0.0452 0.0887 0.0830 0.0751
##
## sigma^2 = 10446: log likelihood = -1292.83
## AIC=2599.67 AICc=2600.22 BIC=2623.2
USgas_auto_md2 <- auto.arima(train,
max.order = 5,
D = 1,
d = 1,
ic = "aic",
stepwise = FALSE,
approximation = FALSE)
USgas_auto_md2
## Series: train
## ARIMA(1,1,1)(2,1,1)[12]
##
## Coefficients:
## ar1 ma1 sar1 sar2 sma1
## 0.4247 -0.9180 0.0132 -0.2639 -0.7449
## s.e. 0.0770 0.0376 0.0894 0.0834 0.0753
##
## sigma^2 = 10405: log likelihood = -1292.96
## AIC=2597.91 AICc=2598.32 BIC=2618.08
df <- ts_to_prophet(AirPassengers) %>% setNames(c("date", "y"))
df$lag12 <- dplyr::lag(df$y, n = 12)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
df$month <- factor(month(df$date, label = TRUE), ordered = FALSE)
df$trend <- 1:nrow(df)
par <- ts_split(ts.obj = AirPassengers, sample.out = 12)
train <- par$train
test <- par$test
train_df <- df[1:(nrow(df) - 12), ]
test_df <- df[(nrow(df) - 12 + 1):nrow(df), ]
md1 <- tslm(train ~ season + trend + lag12, data = train_df)
checkresiduals(md1)
##
## Breusch-Godfrey test for serial correlation of order up to 24
##
## data: Residuals from Linear regression model
## LM test = 83.147, df = 24, p-value = 1.903e-08
md2 <- auto.arima(train,
xreg = cbind(model.matrix(~ month,train_df)[,-1],
train_df$trend,
train_df$lag12),
seasonal = TRUE,
stepwise = FALSE,
approximation = FALSE)
summary(md2)
## Series: train
## Regression with ARIMA(2,0,0)(2,0,0)[12] errors
##
## Coefficients:
## ar1 ar2 sar1 sar2 monthfeb monthmar monthabr monthmay
## 0.5849 0.3056 -0.4421 -0.2063 -2.7523 0.8231 0.2066 2.8279
## s.e. 0.0897 0.0903 0.1050 0.1060 1.8417 2.3248 2.4162 2.5560
## monthjun monthjul monthago monthsept monthoct monthnov monthdic
## 6.6057 11.2337 12.1909 3.8269 0.6350 -2.2723 -0.9918
## s.e. 3.2916 4.2324 4.1198 2.9243 2.3405 2.4211 1.9172
##
## 0.2726 1.0244
## s.e. 0.1367 0.0426
##
## sigma^2 = 72.82: log likelihood = -426.93
## AIC=889.86 AICc=896.63 BIC=940.04
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.4117148 8.353629 6.397538 0.2571543 2.549682 0.2100998
## ACF1
## Training set 0.005966712
checkresiduals(md2)
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(2,0,0)(2,0,0)[12] errors
## Q* = 25.816, df = 20, p-value = 0.172
##
## Model df: 4. Total lags used: 24
fc1 <- forecast(md1, newdata = test_df)
fc2 <- forecast(md2, xreg = cbind(model.matrix(~ month,test_df)[,-1],
test_df$trend,
test_df$lag12))
accuracy(fc1, test)
## ME RMSE MAE MPE MAPE MASE
## Training set -1.894492e-15 13.99206 11.47136 -0.1200023 4.472154 0.3541758
## Test set 1.079611e+01 20.82782 18.57391 1.9530865 3.828115 0.5734654
## ACF1 Theil's U
## Training set 0.7502578 NA
## Test set 0.1653119 0.4052175
accuracy(fc2, test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.4117148 8.353629 6.397538 0.2571543 2.549682 0.2100998
## Test set -11.2123706 17.928195 13.353910 -2.4898843 2.924174 0.4385521
## ACF1 Theil's U
## Training set 0.005966712 NA
## Test set -0.325044929 0.3923795