# Libraries
library(readxl)
library(tseries)
## Warning: package 'tseries' was built under R version 4.4.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(forecast)
## Warning: package 'forecast' was built under R version 4.4.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
# Database (from Excel)
pib_data <- read_excel("db_PIB2.xlsx")
# Converting the date column to Date format
pib_data$fecha <- as.Date(pib_data$fecha, format = "%d/%m/%y")
# Filter data from 2000 to 2024
pib_data <- pib_data[pib_data$fecha >= as.Date("2000-01-01") &
pib_data$fecha <= as.Date("2024-12-31"),]
# Extracting the starting year and quarter
start_year <- as.numeric(format(min(pib_data$fecha), "%Y"))
start_month <- as.numeric(format(min(pib_data$fecha), "%m"))
# Converting month into quarter
start_quarter <- ifelse(start_month %in% c(1, 2, 3), 1,
ifelse(start_month %in% c(4, 5, 6), 2,
ifelse(start_month %in% c(7, 8, 9), 3, 4)))
# Creating the time series object with quarterly frequency
ts_data <- ts(pib_data$pib,
start = c(start_year, start_quarter),
frequency = 4)
#Import and Transform Data in RStudio: Upload the dataset to RStudio and convert it into a time series object using the as.ts() function.
# Converting to time series using as.ts()
ts_data = as.ts(ts_data)
# Displaying the time series
ts_data
## Qtr1 Qtr2 Qtr3 Qtr4
## 2000 17562072 17786512 17980809 17778211
## 2001 17895081 17758108 17721837 17568749
## 2002 17459227 17649247 17813026 17851054
## 2003 17846064 17868090 17877093 18039385
## 2004 18277012 18520992 18509751 18707302
## 2005 18748764 18752841 18943014 19294067
## 2006 19676167 19841753 19892102 19963992
## 2007 20107079 20234691 20307737 20359607
## 2008 20354106 20505531 20521779 20197940
## 2009 18952309 18633041 19309769 19731182
## 2010 19814164 20029359 20219279 20386074
## 2011 20528681 20591212 20977196 21128881
## 2012 21330425 21461669 21516987 21684247
## 2013 21703435 21614636 21750398 21857318
## 2014 22001548 22281559 22319088 22508686
## 2015 22618510 22816784 23085376 23017589
## 2016 22992150 23094709 23296235 23556605
## 2017 23693966 23720108 23578936 23908388
## 2018 24208912 24134241 24230199 24186080
## 2019 24190818 24123758 24122015 23947538
## 2020 23660840 19187221 22164196 23122943
## 2021 23288550 23507958 23301496 23574312
## 2022 23901537 24184105 24386676 24671752
## 2023 24798272 24983768 25181152 25202781
## 2024 25255205 25227221 25524271 25275173
# Converting to millions (for a better and easier visualization)
ts_millions = ts_data / 1e6
log_ts = log(ts_millions)
plot(log_ts,
main = "Mexico Quarterly GDP (2000–2024)",
ylab = "GDP (Millions of MXN)",
xlab = "Time",
col = "#0B3C5D",
lwd = 2)
adf_original = adf.test(log_ts)
print(adf_original)
##
## Augmented Dickey-Fuller Test
##
## data: log_ts
## Dickey-Fuller = -2.6697, Lag order = 4, p-value = 0.2992
## alternative hypothesis: stationary
#p-value was > 0.05 thus differencing needs to be applied
diff1 = diff(log_ts)
#Check for stationary again
#With the ADF Test
adf.test(diff1)
## Warning in adf.test(diff1): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff1
## Dickey-Fuller = -5.2017, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
# Plot differenced data
autoplot(diff1) +
ggtitle("Differenced Series") +
xlab("Time") + ylab("Differenced Value")
#Plot decomposition to see if there are other behaviors or patterns
decompose(log_ts)
## $x
## Qtr1 Qtr2 Qtr3 Qtr4
## 2000 2.865742 2.878440 2.889305 2.877974
## 2001 2.884526 2.876842 2.874798 2.866122
## 2002 2.859868 2.870693 2.879930 2.882063
## 2003 2.881783 2.883016 2.883520 2.892557
## 2004 2.905644 2.918905 2.918298 2.928914
## 2005 2.931128 2.931345 2.941435 2.959798
## 2006 2.979408 2.987788 2.990323 2.993930
## 2007 3.001072 3.007399 3.011002 3.013553
## 2008 3.013283 3.020695 3.021487 3.005581
## 2009 2.941926 2.924936 2.960611 2.982200
## 2010 2.986397 2.997199 3.006637 3.014852
## 2011 3.021823 3.024864 3.043436 3.050641
## 2012 3.060134 3.066269 3.068843 3.076586
## 2013 3.077471 3.073371 3.079632 3.084536
## 2014 3.091113 3.103759 3.105442 3.113901
## 2015 3.118769 3.127496 3.139199 3.136259
## 2016 3.135153 3.139604 3.148292 3.159406
## 2017 3.165220 3.166323 3.160354 3.174229
## 2018 3.186721 3.183632 3.187600 3.185777
## 2019 3.185973 3.183197 3.183125 3.175866
## 2020 3.163821 2.954245 3.098478 3.140825
## 2021 3.147962 3.157339 3.148518 3.160158
## 2022 3.173943 3.185696 3.194037 3.205659
## 2023 3.210774 3.218226 3.226096 3.226954
## 2024 3.229032 3.227924 3.239630 3.229823
##
## $seasonal
## Qtr1 Qtr2 Qtr3 Qtr4
## 2000 0.001906817 -0.006841192 0.001241194 0.003693181
## 2001 0.001906817 -0.006841192 0.001241194 0.003693181
## 2002 0.001906817 -0.006841192 0.001241194 0.003693181
## 2003 0.001906817 -0.006841192 0.001241194 0.003693181
## 2004 0.001906817 -0.006841192 0.001241194 0.003693181
## 2005 0.001906817 -0.006841192 0.001241194 0.003693181
## 2006 0.001906817 -0.006841192 0.001241194 0.003693181
## 2007 0.001906817 -0.006841192 0.001241194 0.003693181
## 2008 0.001906817 -0.006841192 0.001241194 0.003693181
## 2009 0.001906817 -0.006841192 0.001241194 0.003693181
## 2010 0.001906817 -0.006841192 0.001241194 0.003693181
## 2011 0.001906817 -0.006841192 0.001241194 0.003693181
## 2012 0.001906817 -0.006841192 0.001241194 0.003693181
## 2013 0.001906817 -0.006841192 0.001241194 0.003693181
## 2014 0.001906817 -0.006841192 0.001241194 0.003693181
## 2015 0.001906817 -0.006841192 0.001241194 0.003693181
## 2016 0.001906817 -0.006841192 0.001241194 0.003693181
## 2017 0.001906817 -0.006841192 0.001241194 0.003693181
## 2018 0.001906817 -0.006841192 0.001241194 0.003693181
## 2019 0.001906817 -0.006841192 0.001241194 0.003693181
## 2020 0.001906817 -0.006841192 0.001241194 0.003693181
## 2021 0.001906817 -0.006841192 0.001241194 0.003693181
## 2022 0.001906817 -0.006841192 0.001241194 0.003693181
## 2023 0.001906817 -0.006841192 0.001241194 0.003693181
## 2024 0.001906817 -0.006841192 0.001241194 0.003693181
##
## $trend
## Qtr1 Qtr2 Qtr3 Qtr4
## 2000 NA NA 2.880213 2.882361
## 2001 2.880348 2.877053 2.872490 2.868639
## 2002 2.868512 2.871146 2.875878 2.880158
## 2003 2.882147 2.883907 2.888202 2.895671
## 2004 2.904504 2.913396 2.921126 2.925866
## 2005 2.930313 2.937066 2.946962 2.960052
## 2006 2.973218 2.983596 2.990570 2.995730
## 2007 3.000766 3.005804 3.009783 3.012971
## 2008 3.015944 3.016258 3.006342 2.985452
## 2009 2.965873 2.955341 2.957977 2.972569
## 2010 2.987355 2.997190 3.005699 3.013586
## 2011 3.021644 3.030717 3.039980 3.049944
## 2012 3.058296 3.064715 3.070125 3.073180
## 2013 3.075416 3.077759 3.080458 3.085961
## 2014 3.092986 3.099883 3.107011 3.113435
## 2015 3.120622 3.127636 3.132479 3.136040
## 2016 3.138690 3.142720 3.149372 3.156470
## 2017 3.161318 3.164679 3.169219 3.174070
## 2018 3.179640 3.184489 3.185839 3.185691
## 2019 3.185077 3.183279 3.179271 3.147883
## 2020 3.108683 3.093722 3.087360 3.110764
## 2021 3.142406 3.151077 3.156742 3.163534
## 2022 3.172768 3.184146 3.194437 3.203108
## 2023 3.211181 3.217851 3.222795 3.226289
## 2024 3.229193 3.231244 NA NA
##
## $random
## Qtr1 Qtr2 Qtr3 Qtr4
## 2000 NA NA 7.850664e-03 -8.081054e-03
## 2001 2.270808e-03 6.630082e-03 1.066765e-03 -6.210307e-03
## 2002 -1.055027e-02 6.388448e-03 2.810960e-03 -1.788202e-03
## 2003 -2.270612e-03 5.950259e-03 -5.922930e-03 -6.806329e-03
## 2004 -7.665173e-04 1.235042e-02 -4.069127e-03 -6.453447e-04
## 2005 -1.092368e-03 1.120435e-03 -6.767505e-03 -3.947494e-03
## 2006 4.282998e-03 1.103382e-02 -1.488790e-03 -5.492556e-03
## 2007 -1.600628e-03 8.436214e-03 -2.190605e-05 -3.111325e-03
## 2008 -4.567777e-03 1.127816e-02 1.390395e-02 1.643527e-02
## 2009 -2.585396e-02 -2.356333e-02 1.392637e-03 5.938019e-03
## 2010 -2.864860e-03 6.850639e-03 -3.040908e-04 -2.426973e-03
## 2011 -1.727761e-03 9.881507e-04 2.214770e-03 -2.996755e-03
## 2012 -6.815011e-05 8.394934e-03 -2.523446e-03 -2.868351e-04
## 2013 1.475483e-04 2.453335e-03 -2.066712e-03 -5.118801e-03
## 2014 -3.780278e-03 1.071730e-02 -2.809807e-03 -3.226930e-03
## 2015 -3.759997e-03 6.701520e-03 5.479380e-03 -3.474742e-03
## 2016 -5.444106e-03 3.724571e-03 -2.321466e-03 -7.573825e-04
## 2017 1.995473e-03 8.485531e-03 -1.010665e-02 -3.534143e-03
## 2018 5.174356e-03 5.983945e-03 5.196583e-04 -3.607062e-03
## 2019 -1.011161e-03 6.759210e-03 2.612514e-03 2.428919e-02
## 2020 5.323130e-02 -1.326367e-01 9.877097e-03 2.636787e-02
## 2021 3.649009e-03 1.310271e-02 -9.465261e-03 -7.069349e-03
## 2022 -7.323482e-04 8.390884e-03 -1.641736e-03 -1.141929e-03
## 2023 -2.314251e-03 7.216839e-03 2.059690e-03 -3.028175e-03
## 2024 -2.067795e-03 3.521262e-03 NA NA
##
## $figure
## [1] 0.001906817 -0.006841192 0.001241194 0.003693181
##
## $type
## [1] "additive"
##
## attr(,"class")
## [1] "decomposed.ts"
plot(decompose(log_ts))
# ACF and PACF
par(mfrow = c(1,2))
acf(diff1, main = "ACF of Differenced Series")
pacf(diff1, main = "PACF of Differenced Series")
par(mfrow = c(1,1))
#Models
model1=Arima(log_ts, order = c(0,1,0), include.drift = TRUE)
model2=Arima(log_ts, order = c(0,1,1), include.drift = TRUE)
model3=Arima(log_ts, order = c(0,1,2), include.drift = TRUE)
model4=Arima(log_ts, order = c(1,1,0), include.drift = TRUE)
model5=Arima(log_ts, order = c(1,1,1), include.drift = TRUE)
model6=Arima(log_ts, order = c(1,1,2), include.drift = TRUE)
#Models
model1
## Series: log_ts
## ARIMA(0,1,0) with drift
##
## Coefficients:
## drift
## 0.0037
## s.e. 0.0028
##
## sigma^2 = 0.0007925: log likelihood = 213.48
## AIC=-422.95 AICc=-422.83 BIC=-417.76
model2
## Series: log_ts
## ARIMA(0,1,1) with drift
##
## Coefficients:
## ma1 drift
## -0.3388 0.0037
## s.e. 0.1155 0.0018
##
## sigma^2 = 0.0007404: log likelihood = 217.29
## AIC=-428.59 AICc=-428.34 BIC=-420.8
model3
## Series: log_ts
## ARIMA(0,1,2) with drift
##
## Coefficients:
## ma1 ma2 drift
## -0.3015 -0.1552 0.0037
## s.e. 0.0987 0.1019 0.0015
##
## sigma^2 = 0.0007301: log likelihood = 218.47
## AIC=-428.93 AICc=-428.5 BIC=-418.55
model4
## Series: log_ts
## ARIMA(1,1,0) with drift
##
## Coefficients:
## ar1 drift
## -0.2225 0.0037
## s.e. 0.0976 0.0023
##
## sigma^2 = 0.0007604: log likelihood = 216.01
## AIC=-426.02 AICc=-425.76 BIC=-418.23
model5
## Series: log_ts
## ARIMA(1,1,1) with drift
##
## Coefficients:
## ar1 ma1 drift
## 0.6935 -1.0000 0.0038
## s.e. 0.0743 0.0325 0.0003
##
## sigma^2 = 0.0006815: log likelihood = 220.5
## AIC=-433 AICc=-432.58 BIC=-422.62
model6
## Series: log_ts
## ARIMA(1,1,2) with drift
##
## Coefficients:
## ar1 ma1 ma2 drift
## 0.7744 -1.1547 0.1547 0.0038
## s.e. 0.0987 0.1585 0.1557 0.0003
##
## sigma^2 = 0.0006837: log likelihood = 220.98
## AIC=-431.95 AICc=-431.31 BIC=-418.98
#Compare models with AIC
model_comparison <- data.frame(
Model = c("ARIMA(0,1,0)",
"ARIMA(0,1,1)",
"ARIMA(0,1,2)",
"ARIMA(1,1,0)",
"ARIMA(1,1,1)",
"ARIMA(1,1,2)"),
AIC = c(AIC(model1),
AIC(model2),
AIC(model3),
AIC(model4),
AIC(model5),
AIC(model6))
)
model_comparison <- model_comparison[order(model_comparison$AIC), ]
print(model_comparison)
## Model AIC
## 5 ARIMA(1,1,1) -433.0030
## 6 ARIMA(1,1,2) -431.9508
## 3 ARIMA(0,1,2) -428.9301
## 2 ARIMA(0,1,1) -428.5881
## 4 ARIMA(1,1,0) -426.0156
## 1 ARIMA(0,1,0) -422.9517
#Auto.arima models
fit_arima <- auto.arima(log_ts, seasonal = TRUE)
summary(fit_arima)
## Series: log_ts
## ARIMA(0,1,2) with drift
##
## Coefficients:
## ma1 ma2 drift
## -0.3015 -0.1552 0.0037
## s.e. 0.0987 0.1019 0.0015
##
## sigma^2 = 0.0007301: log likelihood = 218.47
## AIC=-428.93 AICc=-428.5 BIC=-418.55
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 5.943548e-05 0.02647491 0.01126389 -0.003447065 0.3725021 0.378671
## ACF1
## Training set 0.004610251
auto.arima(diff1)
## Series: diff1
## ARIMA(0,0,2) with non-zero mean
##
## Coefficients:
## ma1 ma2 mean
## -0.3015 -0.1552 0.0037
## s.e. 0.0987 0.1019 0.0015
##
## sigma^2 = 0.00073: log likelihood = 218.47
## AIC=-428.93 AICc=-428.5 BIC=-418.55
auto.arima(log_ts)
## Series: log_ts
## ARIMA(0,1,2) with drift
##
## Coefficients:
## ma1 ma2 drift
## -0.3015 -0.1552 0.0037
## s.e. 0.0987 0.1019 0.0015
##
## sigma^2 = 0.0007301: log likelihood = 218.47
## AIC=-428.93 AICc=-428.5 BIC=-418.55
#The output of the 3 indicate that the best model is ARIMA(0,1,2) with drift
checkresiduals(model5)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,1) with drift
## Q* = 1.7891, df = 6, p-value = 0.938
##
## Model df: 2. Total lags used: 8
Box.test(model5$residuals, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: model5$residuals
## X-squared = 0.42169, df = 1, p-value = 0.5161
#Constant variance -- residual plot over time
plot(model5$residuals,
main = "Residuals Over Time",
ylab = "Residuals",
xlab = "Time",
col = "#0B3C5D",
lwd = 1.5)
abline(h = 0, col = "red", lty = 2)
#Forecasting #Forecast with data until 2024 and forecast 2025, then
prove it against the actual values of the 2025 GDP
gdp_forecast <- forecast(model5, h = 4)
print(gdp_forecast)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2025 Q1 3.237242 3.203628 3.270857 3.185833 3.288651
## 2025 Q2 3.243559 3.202471 3.284646 3.180720 3.306397
## 2025 Q3 3.249110 3.204759 3.293461 3.181281 3.316939
## 2025 Q4 3.254131 3.208216 3.300046 3.183910 3.324352
# Plot forecast
plot(gdp_forecast, main = "Forecast for 2025 GDP")
autoplot(gdp_forecast) +
autolayer(log_ts, color = "black", size = 1) +
theme_classic()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the forecast package.
## Please report the issue at <https://github.com/robjhyndman/forecast/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
exp(gdp_forecast$mean)
## Qtr1 Qtr2 Qtr3 Qtr4
## 2025 25.46340 25.62475 25.76740 25.89710
forecast_values <- exp(gdp_forecast$mean)
actual_values <- c(25.36062, 25.48931, 25.50637, 25.72661)
comparison_table <- data.frame(
Quarter = c("Q1 2025","Q2 2025","Q3 2025","Q4 2025"),
Forecast = exp(gdp_forecast$mean),
Lo80 = exp(gdp_forecast$lower[,1]),
Hi80 = exp(gdp_forecast$upper[,1]),
Lo95 = exp(gdp_forecast$lower[,2]),
Hi95 = exp(gdp_forecast$upper[,2]),
Actual = actual_values
)
# Compute error
comparison_table$Deviation <- comparison_table$Forecast - comparison_table$Actual
print(comparison_table)
## Quarter Forecast Lo80 Hi80 Lo95 Hi95 Actual Deviation
## 1 Q1 2025 25.46340 24.62169 26.33389 24.18743 26.80669 25.36062 0.1027821
## 2 Q2 2025 25.62475 24.59322 26.69954 24.06408 27.28664 25.48931 0.1354390
## 3 Q3 2025 25.76740 24.64956 26.93592 24.07758 27.57581 25.50637 0.2610279
## 4 Q4 2025 25.89710 24.73491 27.11389 24.14095 27.78100 25.72661 0.1704877