# Libraries
library(readxl)
library(tseries)
## Warning: package 'tseries' was built under R version 4.4.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(forecast)
## Warning: package 'forecast' was built under R version 4.4.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
# Database (from Excel)
pib_data <- read_excel("db_PIB2.xlsx")
# Converting the date column to Date format
pib_data$fecha <- as.Date(pib_data$fecha, format = "%d/%m/%y")
# Filter data from 2000 to 2024
pib_data <- pib_data[pib_data$fecha >= as.Date("2000-01-01") &
pib_data$fecha <= as.Date("2024-12-31"), ]
# Extracting the starting year and quarter
start_year <- as.numeric(format(min(pib_data$fecha), "%Y"))
start_month <- as.numeric(format(min(pib_data$fecha), "%m"))
# Converting month into quarter
start_quarter <- ifelse(start_month %in% c(1, 2, 3), 1,
ifelse(start_month %in% c(4, 5, 6), 2,
ifelse(start_month %in% c(7, 8, 9), 3, 4)))
# Creating the time series object with quarterly frequency
ts_data <- ts(pib_data$pib,
start = c(start_year, start_quarter),
frequency = 4)
# Converting to time series using as.ts()
ts_data <- as.ts(ts_data)
# Displaying the time series
ts_data
## Qtr1 Qtr2 Qtr3 Qtr4
## 2000 17562072 17786512 17980809 17778211
## 2001 17895081 17758108 17721837 17568749
## 2002 17459227 17649247 17813026 17851054
## 2003 17846064 17868090 17877093 18039385
## 2004 18277012 18520992 18509751 18707302
## 2005 18748764 18752841 18943014 19294067
## 2006 19676167 19841753 19892102 19963992
## 2007 20107079 20234691 20307737 20359607
## 2008 20354106 20505531 20521779 20197940
## 2009 18952309 18633041 19309769 19731182
## 2010 19814164 20029359 20219279 20386074
## 2011 20528681 20591212 20977196 21128881
## 2012 21330425 21461669 21516987 21684247
## 2013 21703435 21614636 21750398 21857318
## 2014 22001548 22281559 22319088 22508686
## 2015 22618510 22816784 23085376 23017589
## 2016 22992150 23094709 23296235 23556605
## 2017 23693966 23720108 23578936 23908388
## 2018 24208912 24134241 24230199 24186080
## 2019 24190818 24123758 24122015 23947538
## 2020 23660840 19187221 22164196 23122943
## 2021 23288550 23507958 23301496 23574312
## 2022 23901537 24184105 24386676 24671752
## 2023 24798272 24983768 25181152 25202781
## 2024 25255205 25227221 25524271 25275173
# Converting to millions (for a better and easier visualization)
ts_millions <- ts_data / 1e6
##Preguntarle al profe si ocupamos transformar los valores a niveles > log --> no es necesario
#Activity 3. Code for Problem Situation #Remember the already converted data
plot(ts_millions,
main = "Mexico Quarterly GDP (2000–2024)",
ylab = "GDP (Millions of MXN)",
xlab = "Time",
col = "#0B3C5D",
lwd = 2)
adf_original = adf.test(ts_millions)
print(adf_original)
##
## Augmented Dickey-Fuller Test
##
## data: ts_millions
## Dickey-Fuller = -2.8069, Lag order = 4, p-value = 0.2424
## alternative hypothesis: stationary
#p-value was > 0.05 thus differencing needs to be applied
diff1 = diff(ts_millions)
#Check for stationary again
#With the ADF Test
adf.test(diff1)
## Warning in adf.test(diff1): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff1
## Dickey-Fuller = -5.1413, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
# Plot differenced data
autoplot(diff1) +
ggtitle("Differenced Series") +
xlab("Time") + ylab("Differenced Value")
# 2) Analyze ACF and PACF to identify potential model structures and
appropiate lag values
# ACF and PACF
par(mfrow = c(1,2))
acf(diff1, main = "ACF of Differenced Series")
pacf(diff1, main = "PACF of Differenced Series")
par(mfrow = c(1,1))
model1 = arima(ts_millions, order = c(0,1,1))
model2 = arima(ts_millions, order = c(0,1,2))
model3 = arima(ts_millions, order = c(1,1,0))
model4 = arima(ts_millions, order = c(1,1,1))
#Models
model1
##
## Call:
## arima(x = ts_millions, order = c(0, 1, 1))
##
## Coefficients:
## ma1
## -0.2611
## s.e. 0.1094
##
## sigma^2 estimated as 0.3352: log likelihood = -86.41, aic = 176.81
model2
##
## Call:
## arima(x = ts_millions, order = c(0, 1, 2))
##
## Coefficients:
## ma1 ma2
## -0.2322 -0.1002
## s.e. 0.0996 0.0952
##
## sigma^2 estimated as 0.3314: log likelihood = -85.86, aic = 177.71
model3
##
## Call:
## arima(x = ts_millions, order = c(1, 1, 0))
##
## Coefficients:
## ar1
## -0.1940
## s.e. 0.0982
##
## sigma^2 estimated as 0.3399: log likelihood = -87.07, aic = 178.15
model4
##
## Call:
## arima(x = ts_millions, order = c(1, 1, 1))
##
## Coefficients:
## ar1 ma1
## 0.2795 -0.5203
## s.e. 0.2714 0.2354
##
## sigma^2 estimated as 0.3321: log likelihood = -85.95, aic = 177.91
#Compare models with AIC
model_comparison <- data.frame(
Model = c("ARIMA(0,1,1)",
"ARIMA(0,1,2)",
"ARIMA(1,1,0)",
"ARIMA(1,1,1)"),
AIC = c(AIC(model1),
AIC(model2),
AIC(model3),
AIC(model4))
)
model_comparison <- model_comparison[order(model_comparison$AIC), ]
print(model_comparison)
## Model AIC
## 1 ARIMA(0,1,1) 176.8132
## 2 ARIMA(0,1,2) 177.7143
## 4 ARIMA(1,1,1) 177.9052
## 3 ARIMA(1,1,0) 178.1494
auto.arima(ts_millions, seasonal = TRUE, stepwise = FALSE, trace = TRUE)
##
## ARIMA(0,1,0) : 180.0145
## ARIMA(0,1,0) with drift : 180.383
## ARIMA(0,1,0)(0,0,1)[4] : 182.0854
## ARIMA(0,1,0)(0,0,1)[4] with drift : 182.428
## ARIMA(0,1,0)(0,0,2)[4] : 184.1988
## ARIMA(0,1,0)(0,0,2)[4] with drift : 184.5974
## ARIMA(0,1,0)(1,0,0)[4] : 182.0851
## ARIMA(0,1,0)(1,0,0)[4] with drift : 182.4289
## ARIMA(0,1,0)(1,0,1)[4] : 184.1312
## ARIMA(0,1,0)(1,0,1)[4] with drift : 184.578
## ARIMA(0,1,0)(1,0,2)[4] : 186.2566
## ARIMA(0,1,0)(1,0,2)[4] with drift : 186.798
## ARIMA(0,1,0)(2,0,0)[4] : 184.1956
## ARIMA(0,1,0)(2,0,0)[4] with drift : 184.5998
## ARIMA(0,1,0)(2,0,1)[4] : 186.2616
## ARIMA(0,1,0)(2,0,1)[4] with drift : 186.7976
## ARIMA(0,1,0)(2,0,2)[4] : 187.8729
## ARIMA(0,1,0)(2,0,2)[4] with drift : 188.9964
## ARIMA(0,1,1) : 176.9382
## ARIMA(0,1,1) with drift : 175.4209
## ARIMA(0,1,1)(0,0,1)[4] : 179.0656
## ARIMA(0,1,1)(0,0,1)[4] with drift : 177.4352
## ARIMA(0,1,1)(0,0,2)[4] : 181.2365
## ARIMA(0,1,1)(0,0,2)[4] with drift : 179.4854
## ARIMA(0,1,1)(1,0,0)[4] : 179.0656
## ARIMA(0,1,1)(1,0,0)[4] with drift : 177.4479
## ARIMA(0,1,1)(1,0,1)[4] : 181.1917
## ARIMA(0,1,1)(1,0,1)[4] with drift : 179.3719
## ARIMA(0,1,1)(1,0,2)[4] : 183.3462
## ARIMA(0,1,1)(1,0,2)[4] with drift : 181.5794
## ARIMA(0,1,1)(2,0,0)[4] : 181.2362
## ARIMA(0,1,1)(2,0,0)[4] with drift : 179.5213
## ARIMA(0,1,1)(2,0,1)[4] : 183.3543
## ARIMA(0,1,1)(2,0,1)[4] with drift : 181.57
## ARIMA(0,1,1)(2,0,2)[4] : Inf
## ARIMA(0,1,1)(2,0,2)[4] with drift : Inf
## ARIMA(0,1,2) : 177.9669
## ARIMA(0,1,2) with drift : 175.2703
## ARIMA(0,1,2)(0,0,1)[4] : 180.1363
## ARIMA(0,1,2)(0,0,1)[4] with drift : 177.4147
## ARIMA(0,1,2)(0,0,2)[4] : 182.3482
## ARIMA(0,1,2)(0,0,2)[4] with drift : 179.4347
## ARIMA(0,1,2)(1,0,0)[4] : 180.1363
## ARIMA(0,1,2)(1,0,0)[4] with drift : 177.4222
## ARIMA(0,1,2)(1,0,1)[4] : 182.3228
## ARIMA(0,1,2)(1,0,1)[4] with drift : 179.2544
## ARIMA(0,1,2)(1,0,2)[4] : 184.4709
## ARIMA(0,1,2)(1,0,2)[4] with drift : 181.3913
## ARIMA(0,1,2)(2,0,0)[4] : 182.348
## ARIMA(0,1,2)(2,0,0)[4] with drift : 179.4768
## ARIMA(0,1,2)(2,0,1)[4] : 184.4864
## ARIMA(0,1,2)(2,0,1)[4] with drift : 181.3576
## ARIMA(0,1,3) : 180.1383
## ARIMA(0,1,3) with drift : 177.2737
## ARIMA(0,1,3)(0,0,1)[4] : 182.3551
## ARIMA(0,1,3)(0,0,1)[4] with drift : 179.5008
## ARIMA(0,1,3)(0,0,2)[4] : 184.6156
## ARIMA(0,1,3)(0,0,2)[4] with drift : 181.5928
## ARIMA(0,1,3)(1,0,0)[4] : 182.355
## ARIMA(0,1,3)(1,0,0)[4] with drift : 179.5048
## ARIMA(0,1,3)(1,0,1)[4] : 184.5876
## ARIMA(0,1,3)(1,0,1)[4] with drift : 181.4013
## ARIMA(0,1,3)(2,0,0)[4] : 184.6154
## ARIMA(0,1,3)(2,0,0)[4] with drift : 181.6263
## ARIMA(1,1,0) : 178.2744
## ARIMA(1,1,0) with drift : 177.7911
## ARIMA(1,1,0)(0,0,1)[4] : 180.4018
## ARIMA(1,1,0)(0,0,1)[4] with drift : 179.8889
## ARIMA(1,1,0)(0,0,2)[4] : 182.5714
## ARIMA(1,1,0)(0,0,2)[4] with drift : 182.0565
## ARIMA(1,1,0)(1,0,0)[4] : 180.4018
## ARIMA(1,1,0)(1,0,0)[4] with drift : 179.8923
## ARIMA(1,1,0)(1,0,1)[4] : 182.5322
## ARIMA(1,1,0)(1,0,1)[4] with drift : 182.0119
## ARIMA(1,1,0)(1,0,2)[4] : 184.6914
## ARIMA(1,1,0)(1,0,2)[4] with drift : 184.2609
## ARIMA(1,1,0)(2,0,0)[4] : 182.5709
## ARIMA(1,1,0)(2,0,0)[4] with drift : 182.0652
## ARIMA(1,1,0)(2,0,1)[4] : 184.6983
## ARIMA(1,1,0)(2,0,1)[4] with drift : 184.2582
## ARIMA(1,1,0)(2,0,2)[4] : Inf
## ARIMA(1,1,0)(2,0,2)[4] with drift : 186.5545
## ARIMA(1,1,1) : 178.1579
## ARIMA(1,1,1) with drift : Inf
## ARIMA(1,1,1)(0,0,1)[4] : 180.3166
## ARIMA(1,1,1)(0,0,1)[4] with drift : Inf
## ARIMA(1,1,1)(0,0,2)[4] : 182.5253
## ARIMA(1,1,1)(0,0,2)[4] with drift : Inf
## ARIMA(1,1,1)(1,0,0)[4] : 180.3163
## ARIMA(1,1,1)(1,0,0)[4] with drift : Inf
## ARIMA(1,1,1)(1,0,1)[4] : 182.5352
## ARIMA(1,1,1)(1,0,1)[4] with drift : Inf
## ARIMA(1,1,1)(1,0,2)[4] : 184.65
## ARIMA(1,1,1)(1,0,2)[4] with drift : Inf
## ARIMA(1,1,1)(2,0,0)[4] : 182.5257
## ARIMA(1,1,1)(2,0,0)[4] with drift : Inf
## ARIMA(1,1,1)(2,0,1)[4] : 184.6683
## ARIMA(1,1,1)(2,0,1)[4] with drift : Inf
## ARIMA(1,1,2) : 180.1389
## ARIMA(1,1,2) with drift : Inf
## ARIMA(1,1,2)(0,0,1)[4] : 182.3554
## ARIMA(1,1,2)(0,0,1)[4] with drift : Inf
## ARIMA(1,1,2)(0,0,2)[4] : 184.6158
## ARIMA(1,1,2)(0,0,2)[4] with drift : Inf
## ARIMA(1,1,2)(1,0,0)[4] : 182.3553
## ARIMA(1,1,2)(1,0,0)[4] with drift : Inf
## ARIMA(1,1,2)(1,0,1)[4] : 184.5894
## ARIMA(1,1,2)(1,0,1)[4] with drift : Inf
## ARIMA(1,1,2)(2,0,0)[4] : 184.6156
## ARIMA(1,1,2)(2,0,0)[4] with drift : Inf
## ARIMA(1,1,3) : 182.1386
## ARIMA(1,1,3) with drift : Inf
## ARIMA(1,1,3)(0,0,1)[4] : 184.3667
## ARIMA(1,1,3)(0,0,1)[4] with drift : Inf
## ARIMA(1,1,3)(1,0,0)[4] : 184.3672
## ARIMA(1,1,3)(1,0,0)[4] with drift : Inf
## ARIMA(2,1,0) : 178.4479
## ARIMA(2,1,0) with drift : 177.0901
## ARIMA(2,1,0)(0,0,1)[4] : 180.612
## ARIMA(2,1,0)(0,0,1)[4] with drift : 179.0669
## ARIMA(2,1,0)(0,0,2)[4] : 182.8311
## ARIMA(2,1,0)(0,0,2)[4] with drift : 181.1629
## ARIMA(2,1,0)(1,0,0)[4] : 180.6119
## ARIMA(2,1,0)(1,0,0)[4] with drift : 179.0869
## ARIMA(2,1,0)(1,0,1)[4] : 182.7598
## ARIMA(2,1,0)(1,0,1)[4] with drift : 181.0465
## ARIMA(2,1,0)(1,0,2)[4] : 184.982
## ARIMA(2,1,0)(1,0,2)[4] with drift : 183.3264
## ARIMA(2,1,0)(2,0,0)[4] : 182.8306
## ARIMA(2,1,0)(2,0,0)[4] with drift : 181.2081
## ARIMA(2,1,0)(2,0,1)[4] : 184.9886
## ARIMA(2,1,0)(2,0,1)[4] with drift : 183.3198
## ARIMA(2,1,1) : 180.0811
## ARIMA(2,1,1) with drift : Inf
## ARIMA(2,1,1)(0,0,1)[4] : 182.3005
## ARIMA(2,1,1)(0,0,1)[4] with drift : Inf
## ARIMA(2,1,1)(0,0,2)[4] : 184.561
## ARIMA(2,1,1)(0,0,2)[4] with drift : Inf
## ARIMA(2,1,1)(1,0,0)[4] : 182.3005
## ARIMA(2,1,1)(1,0,0)[4] with drift : Inf
## ARIMA(2,1,1)(1,0,1)[4] : Inf
## ARIMA(2,1,1)(1,0,1)[4] with drift : Inf
## ARIMA(2,1,1)(2,0,0)[4] : 184.5599
## ARIMA(2,1,1)(2,0,0)[4] with drift : Inf
## ARIMA(2,1,2) : 181.4882
## ARIMA(2,1,2) with drift : Inf
## ARIMA(2,1,2)(0,0,1)[4] : 184.6134
## ARIMA(2,1,2)(0,0,1)[4] with drift : Inf
## ARIMA(2,1,2)(1,0,0)[4] : 183.593
## ARIMA(2,1,2)(1,0,0)[4] with drift : Inf
## ARIMA(2,1,3) : 183.6576
## ARIMA(2,1,3) with drift : Inf
## ARIMA(3,1,0) : 180.0732
## ARIMA(3,1,0) with drift : 178.0388
## ARIMA(3,1,0)(0,0,1)[4] : 182.2157
## ARIMA(3,1,0)(0,0,1)[4] with drift : 179.381
## ARIMA(3,1,0)(0,0,2)[4] : 184.473
## ARIMA(3,1,0)(0,0,2)[4] with drift : 181.4586
## ARIMA(3,1,0)(1,0,0)[4] : 182.2138
## ARIMA(3,1,0)(1,0,0)[4] with drift : 179.477
## ARIMA(3,1,0)(1,0,1)[4] : 184.346
## ARIMA(3,1,0)(1,0,1)[4] with drift : 181.1515
## ARIMA(3,1,0)(2,0,0)[4] : 184.4669
## ARIMA(3,1,0)(2,0,0)[4] with drift : 181.6294
## ARIMA(3,1,1) : 182.2074
## ARIMA(3,1,1) with drift : Inf
## ARIMA(3,1,1)(0,0,1)[4] : 184.4563
## ARIMA(3,1,1)(0,0,1)[4] with drift : 181.6974
## ARIMA(3,1,1)(1,0,0)[4] : 184.4556
## ARIMA(3,1,1)(1,0,0)[4] with drift : Inf
## ARIMA(3,1,2) : 183.6786
## ARIMA(3,1,2) with drift : Inf
##
##
##
## Best model: ARIMA(0,1,2) with drift
## Series: ts_millions
## ARIMA(0,1,2) with drift
##
## Coefficients:
## ma1 ma2 drift
## -0.2895 -0.1542 0.0785
## s.e. 0.0987 0.1017 0.0317
##
## sigma^2 = 0.3251: log likelihood = -83.42
## AIC=174.84 AICc=175.27 BIC=185.23
#Residual diagnosis with Ljung-Box
best_model = Arima(ts_millions, order = c(0,1,2), include.drift = TRUE)
checkresiduals(best_model)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,2) with drift
## Q* = 0.95325, df = 6, p-value = 0.9873
##
## Model df: 2. Total lags used: 8
Box.test(best_model$residuals, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: best_model$residuals
## X-squared = 0.0014622, df = 1, p-value = 0.9695
#Constant variance -- residual plot over time
plot(best_model$residuals,
main = "Residuals Over Time",
ylab = "Residuals",
xlab = "Time",
col = "#0B3C5D",
lwd = 1.5)
abline(h = 0, col = "red", lty = 2)
#Forecastinggg #Ask professor how many steps ahead #Forecast with data
until 2024 and forecast 2025, then prove it against the actual values of
the 2025 GDP
gdp_forecast <- forecast(best_model, h = 4)
print(gdp_forecast)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2025 Q1 25.40675 24.67601 26.13749 24.28918 26.52432
## 2025 Q2 25.52941 24.63299 26.42583 24.15845 26.90037
## 2025 Q3 25.60790 24.62360 26.59219 24.10255 27.11324
## 2025 Q4 25.68638 24.62144 26.75132 24.05770 27.31506
# Plot forecast
plot(gdp_forecast, main = "Forecast for 2025 GDP")