1) Select a Variable: Choose one of the following variables for your final project: GDP, inflation, or the S&P 500.

Chosen Variable: GDP

2) Gather Official Data: Find official data to build a time series. Use reliable sources such as government agencies or recognized financial institutions.

The dataset corresponds to Mexico’s quarterly GDP, obtained from INEGI.

For the purposes of this activity, the sample was restricted to the 2000-2024 period in order to forecast the 2025 GDP and then compare it to the actual 2025 data.

# Libraries
library(readxl)
library(tseries)    
## Warning: package 'tseries' was built under R version 4.4.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(forecast)
## Warning: package 'forecast' was built under R version 4.4.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
# Database (from Excel)
pib_data <- read_excel("db_PIB2.xlsx")
# Converting the date column to Date format
pib_data$fecha <- as.Date(pib_data$fecha, format = "%d/%m/%y")
# Filter data from 2000 to 2024
pib_data <- pib_data[pib_data$fecha >= as.Date("2000-01-01") & 
                     pib_data$fecha <= as.Date("2024-12-31"), ]
# Extracting the starting year and quarter
start_year <- as.numeric(format(min(pib_data$fecha), "%Y"))
start_month <- as.numeric(format(min(pib_data$fecha), "%m"))

# Converting month into quarter
start_quarter <- ifelse(start_month %in% c(1, 2, 3), 1,
                 ifelse(start_month %in% c(4, 5, 6), 2,
                 ifelse(start_month %in% c(7, 8, 9), 3, 4)))

# Creating the time series object with quarterly frequency
ts_data <- ts(pib_data$pib,
              start = c(start_year, start_quarter),
              frequency = 4)

3) Import and Transform Data in RStudio: Upload the dataset to RStudio and convert it into a time series object using the as.ts() function.

# Converting to time series using as.ts()
ts_data <- as.ts(ts_data)

# Displaying the time series
ts_data
##          Qtr1     Qtr2     Qtr3     Qtr4
## 2000 17562072 17786512 17980809 17778211
## 2001 17895081 17758108 17721837 17568749
## 2002 17459227 17649247 17813026 17851054
## 2003 17846064 17868090 17877093 18039385
## 2004 18277012 18520992 18509751 18707302
## 2005 18748764 18752841 18943014 19294067
## 2006 19676167 19841753 19892102 19963992
## 2007 20107079 20234691 20307737 20359607
## 2008 20354106 20505531 20521779 20197940
## 2009 18952309 18633041 19309769 19731182
## 2010 19814164 20029359 20219279 20386074
## 2011 20528681 20591212 20977196 21128881
## 2012 21330425 21461669 21516987 21684247
## 2013 21703435 21614636 21750398 21857318
## 2014 22001548 22281559 22319088 22508686
## 2015 22618510 22816784 23085376 23017589
## 2016 22992150 23094709 23296235 23556605
## 2017 23693966 23720108 23578936 23908388
## 2018 24208912 24134241 24230199 24186080
## 2019 24190818 24123758 24122015 23947538
## 2020 23660840 19187221 22164196 23122943
## 2021 23288550 23507958 23301496 23574312
## 2022 23901537 24184105 24386676 24671752
## 2023 24798272 24983768 25181152 25202781
## 2024 25255205 25227221 25524271 25275173
# Converting to millions (for a better and easier visualization)
ts_millions <- ts_data / 1e6

##Preguntarle al profe si ocupamos transformar los valores a niveles > log --> no es necesario

#Activity 3. Code for Problem Situation #Remember the already converted data

1) Check for Stationarity: visual inspection, ADF test and apply differencing if needed

plot(ts_millions,
     main = "Mexico Quarterly GDP (2000–2024)",
     ylab = "GDP (Millions of MXN)",
     xlab = "Time",
     col = "#0B3C5D",
     lwd = 2)

adf_original = adf.test(ts_millions)
print(adf_original)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_millions
## Dickey-Fuller = -2.8069, Lag order = 4, p-value = 0.2424
## alternative hypothesis: stationary

#p-value was > 0.05 thus differencing needs to be applied

diff1 = diff(ts_millions)
#Check for stationary again
#With the ADF Test
adf.test(diff1)
## Warning in adf.test(diff1): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff1
## Dickey-Fuller = -5.1413, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
# Plot differenced data
autoplot(diff1) +
  ggtitle("Differenced Series") +
  xlab("Time") + ylab("Differenced Value")

# 2) Analyze ACF and PACF to identify potential model structures and appropiate lag values

# ACF and PACF
par(mfrow = c(1,2))
acf(diff1, main = "ACF of Differenced Series")
pacf(diff1, main = "PACF of Differenced Series")

par(mfrow = c(1,1))

Fit the candidate models

model1 = arima(ts_millions, order = c(0,1,1))
model2 = arima(ts_millions, order = c(0,1,2))
model3 = arima(ts_millions, order = c(1,1,0))
model4 = arima(ts_millions, order = c(1,1,1))


#Models
model1
## 
## Call:
## arima(x = ts_millions, order = c(0, 1, 1))
## 
## Coefficients:
##           ma1
##       -0.2611
## s.e.   0.1094
## 
## sigma^2 estimated as 0.3352:  log likelihood = -86.41,  aic = 176.81
model2
## 
## Call:
## arima(x = ts_millions, order = c(0, 1, 2))
## 
## Coefficients:
##           ma1      ma2
##       -0.2322  -0.1002
## s.e.   0.0996   0.0952
## 
## sigma^2 estimated as 0.3314:  log likelihood = -85.86,  aic = 177.71
model3
## 
## Call:
## arima(x = ts_millions, order = c(1, 1, 0))
## 
## Coefficients:
##           ar1
##       -0.1940
## s.e.   0.0982
## 
## sigma^2 estimated as 0.3399:  log likelihood = -87.07,  aic = 178.15
model4
## 
## Call:
## arima(x = ts_millions, order = c(1, 1, 1))
## 
## Coefficients:
##          ar1      ma1
##       0.2795  -0.5203
## s.e.  0.2714   0.2354
## 
## sigma^2 estimated as 0.3321:  log likelihood = -85.95,  aic = 177.91
#Compare models with AIC
model_comparison <- data.frame(
  Model = c("ARIMA(0,1,1)",
            "ARIMA(0,1,2)",
            "ARIMA(1,1,0)",
            "ARIMA(1,1,1)"),
  AIC = c(AIC(model1),
          AIC(model2),
          AIC(model3),
          AIC(model4))
)

model_comparison <- model_comparison[order(model_comparison$AIC), ]
print(model_comparison)
##          Model      AIC
## 1 ARIMA(0,1,1) 176.8132
## 2 ARIMA(0,1,2) 177.7143
## 4 ARIMA(1,1,1) 177.9052
## 3 ARIMA(1,1,0) 178.1494
auto.arima(ts_millions, seasonal = TRUE, stepwise = FALSE, trace = TRUE)
## 
##  ARIMA(0,1,0)                              : 180.0145
##  ARIMA(0,1,0)           with drift         : 180.383
##  ARIMA(0,1,0)(0,0,1)[4]                    : 182.0854
##  ARIMA(0,1,0)(0,0,1)[4] with drift         : 182.428
##  ARIMA(0,1,0)(0,0,2)[4]                    : 184.1988
##  ARIMA(0,1,0)(0,0,2)[4] with drift         : 184.5974
##  ARIMA(0,1,0)(1,0,0)[4]                    : 182.0851
##  ARIMA(0,1,0)(1,0,0)[4] with drift         : 182.4289
##  ARIMA(0,1,0)(1,0,1)[4]                    : 184.1312
##  ARIMA(0,1,0)(1,0,1)[4] with drift         : 184.578
##  ARIMA(0,1,0)(1,0,2)[4]                    : 186.2566
##  ARIMA(0,1,0)(1,0,2)[4] with drift         : 186.798
##  ARIMA(0,1,0)(2,0,0)[4]                    : 184.1956
##  ARIMA(0,1,0)(2,0,0)[4] with drift         : 184.5998
##  ARIMA(0,1,0)(2,0,1)[4]                    : 186.2616
##  ARIMA(0,1,0)(2,0,1)[4] with drift         : 186.7976
##  ARIMA(0,1,0)(2,0,2)[4]                    : 187.8729
##  ARIMA(0,1,0)(2,0,2)[4] with drift         : 188.9964
##  ARIMA(0,1,1)                              : 176.9382
##  ARIMA(0,1,1)           with drift         : 175.4209
##  ARIMA(0,1,1)(0,0,1)[4]                    : 179.0656
##  ARIMA(0,1,1)(0,0,1)[4] with drift         : 177.4352
##  ARIMA(0,1,1)(0,0,2)[4]                    : 181.2365
##  ARIMA(0,1,1)(0,0,2)[4] with drift         : 179.4854
##  ARIMA(0,1,1)(1,0,0)[4]                    : 179.0656
##  ARIMA(0,1,1)(1,0,0)[4] with drift         : 177.4479
##  ARIMA(0,1,1)(1,0,1)[4]                    : 181.1917
##  ARIMA(0,1,1)(1,0,1)[4] with drift         : 179.3719
##  ARIMA(0,1,1)(1,0,2)[4]                    : 183.3462
##  ARIMA(0,1,1)(1,0,2)[4] with drift         : 181.5794
##  ARIMA(0,1,1)(2,0,0)[4]                    : 181.2362
##  ARIMA(0,1,1)(2,0,0)[4] with drift         : 179.5213
##  ARIMA(0,1,1)(2,0,1)[4]                    : 183.3543
##  ARIMA(0,1,1)(2,0,1)[4] with drift         : 181.57
##  ARIMA(0,1,1)(2,0,2)[4]                    : Inf
##  ARIMA(0,1,1)(2,0,2)[4] with drift         : Inf
##  ARIMA(0,1,2)                              : 177.9669
##  ARIMA(0,1,2)           with drift         : 175.2703
##  ARIMA(0,1,2)(0,0,1)[4]                    : 180.1363
##  ARIMA(0,1,2)(0,0,1)[4] with drift         : 177.4147
##  ARIMA(0,1,2)(0,0,2)[4]                    : 182.3482
##  ARIMA(0,1,2)(0,0,2)[4] with drift         : 179.4347
##  ARIMA(0,1,2)(1,0,0)[4]                    : 180.1363
##  ARIMA(0,1,2)(1,0,0)[4] with drift         : 177.4222
##  ARIMA(0,1,2)(1,0,1)[4]                    : 182.3228
##  ARIMA(0,1,2)(1,0,1)[4] with drift         : 179.2544
##  ARIMA(0,1,2)(1,0,2)[4]                    : 184.4709
##  ARIMA(0,1,2)(1,0,2)[4] with drift         : 181.3913
##  ARIMA(0,1,2)(2,0,0)[4]                    : 182.348
##  ARIMA(0,1,2)(2,0,0)[4] with drift         : 179.4768
##  ARIMA(0,1,2)(2,0,1)[4]                    : 184.4864
##  ARIMA(0,1,2)(2,0,1)[4] with drift         : 181.3576
##  ARIMA(0,1,3)                              : 180.1383
##  ARIMA(0,1,3)           with drift         : 177.2737
##  ARIMA(0,1,3)(0,0,1)[4]                    : 182.3551
##  ARIMA(0,1,3)(0,0,1)[4] with drift         : 179.5008
##  ARIMA(0,1,3)(0,0,2)[4]                    : 184.6156
##  ARIMA(0,1,3)(0,0,2)[4] with drift         : 181.5928
##  ARIMA(0,1,3)(1,0,0)[4]                    : 182.355
##  ARIMA(0,1,3)(1,0,0)[4] with drift         : 179.5048
##  ARIMA(0,1,3)(1,0,1)[4]                    : 184.5876
##  ARIMA(0,1,3)(1,0,1)[4] with drift         : 181.4013
##  ARIMA(0,1,3)(2,0,0)[4]                    : 184.6154
##  ARIMA(0,1,3)(2,0,0)[4] with drift         : 181.6263
##  ARIMA(1,1,0)                              : 178.2744
##  ARIMA(1,1,0)           with drift         : 177.7911
##  ARIMA(1,1,0)(0,0,1)[4]                    : 180.4018
##  ARIMA(1,1,0)(0,0,1)[4] with drift         : 179.8889
##  ARIMA(1,1,0)(0,0,2)[4]                    : 182.5714
##  ARIMA(1,1,0)(0,0,2)[4] with drift         : 182.0565
##  ARIMA(1,1,0)(1,0,0)[4]                    : 180.4018
##  ARIMA(1,1,0)(1,0,0)[4] with drift         : 179.8923
##  ARIMA(1,1,0)(1,0,1)[4]                    : 182.5322
##  ARIMA(1,1,0)(1,0,1)[4] with drift         : 182.0119
##  ARIMA(1,1,0)(1,0,2)[4]                    : 184.6914
##  ARIMA(1,1,0)(1,0,2)[4] with drift         : 184.2609
##  ARIMA(1,1,0)(2,0,0)[4]                    : 182.5709
##  ARIMA(1,1,0)(2,0,0)[4] with drift         : 182.0652
##  ARIMA(1,1,0)(2,0,1)[4]                    : 184.6983
##  ARIMA(1,1,0)(2,0,1)[4] with drift         : 184.2582
##  ARIMA(1,1,0)(2,0,2)[4]                    : Inf
##  ARIMA(1,1,0)(2,0,2)[4] with drift         : 186.5545
##  ARIMA(1,1,1)                              : 178.1579
##  ARIMA(1,1,1)           with drift         : Inf
##  ARIMA(1,1,1)(0,0,1)[4]                    : 180.3166
##  ARIMA(1,1,1)(0,0,1)[4] with drift         : Inf
##  ARIMA(1,1,1)(0,0,2)[4]                    : 182.5253
##  ARIMA(1,1,1)(0,0,2)[4] with drift         : Inf
##  ARIMA(1,1,1)(1,0,0)[4]                    : 180.3163
##  ARIMA(1,1,1)(1,0,0)[4] with drift         : Inf
##  ARIMA(1,1,1)(1,0,1)[4]                    : 182.5352
##  ARIMA(1,1,1)(1,0,1)[4] with drift         : Inf
##  ARIMA(1,1,1)(1,0,2)[4]                    : 184.65
##  ARIMA(1,1,1)(1,0,2)[4] with drift         : Inf
##  ARIMA(1,1,1)(2,0,0)[4]                    : 182.5257
##  ARIMA(1,1,1)(2,0,0)[4] with drift         : Inf
##  ARIMA(1,1,1)(2,0,1)[4]                    : 184.6683
##  ARIMA(1,1,1)(2,0,1)[4] with drift         : Inf
##  ARIMA(1,1,2)                              : 180.1389
##  ARIMA(1,1,2)           with drift         : Inf
##  ARIMA(1,1,2)(0,0,1)[4]                    : 182.3554
##  ARIMA(1,1,2)(0,0,1)[4] with drift         : Inf
##  ARIMA(1,1,2)(0,0,2)[4]                    : 184.6158
##  ARIMA(1,1,2)(0,0,2)[4] with drift         : Inf
##  ARIMA(1,1,2)(1,0,0)[4]                    : 182.3553
##  ARIMA(1,1,2)(1,0,0)[4] with drift         : Inf
##  ARIMA(1,1,2)(1,0,1)[4]                    : 184.5894
##  ARIMA(1,1,2)(1,0,1)[4] with drift         : Inf
##  ARIMA(1,1,2)(2,0,0)[4]                    : 184.6156
##  ARIMA(1,1,2)(2,0,0)[4] with drift         : Inf
##  ARIMA(1,1,3)                              : 182.1386
##  ARIMA(1,1,3)           with drift         : Inf
##  ARIMA(1,1,3)(0,0,1)[4]                    : 184.3667
##  ARIMA(1,1,3)(0,0,1)[4] with drift         : Inf
##  ARIMA(1,1,3)(1,0,0)[4]                    : 184.3672
##  ARIMA(1,1,3)(1,0,0)[4] with drift         : Inf
##  ARIMA(2,1,0)                              : 178.4479
##  ARIMA(2,1,0)           with drift         : 177.0901
##  ARIMA(2,1,0)(0,0,1)[4]                    : 180.612
##  ARIMA(2,1,0)(0,0,1)[4] with drift         : 179.0669
##  ARIMA(2,1,0)(0,0,2)[4]                    : 182.8311
##  ARIMA(2,1,0)(0,0,2)[4] with drift         : 181.1629
##  ARIMA(2,1,0)(1,0,0)[4]                    : 180.6119
##  ARIMA(2,1,0)(1,0,0)[4] with drift         : 179.0869
##  ARIMA(2,1,0)(1,0,1)[4]                    : 182.7598
##  ARIMA(2,1,0)(1,0,1)[4] with drift         : 181.0465
##  ARIMA(2,1,0)(1,0,2)[4]                    : 184.982
##  ARIMA(2,1,0)(1,0,2)[4] with drift         : 183.3264
##  ARIMA(2,1,0)(2,0,0)[4]                    : 182.8306
##  ARIMA(2,1,0)(2,0,0)[4] with drift         : 181.2081
##  ARIMA(2,1,0)(2,0,1)[4]                    : 184.9886
##  ARIMA(2,1,0)(2,0,1)[4] with drift         : 183.3198
##  ARIMA(2,1,1)                              : 180.0811
##  ARIMA(2,1,1)           with drift         : Inf
##  ARIMA(2,1,1)(0,0,1)[4]                    : 182.3005
##  ARIMA(2,1,1)(0,0,1)[4] with drift         : Inf
##  ARIMA(2,1,1)(0,0,2)[4]                    : 184.561
##  ARIMA(2,1,1)(0,0,2)[4] with drift         : Inf
##  ARIMA(2,1,1)(1,0,0)[4]                    : 182.3005
##  ARIMA(2,1,1)(1,0,0)[4] with drift         : Inf
##  ARIMA(2,1,1)(1,0,1)[4]                    : Inf
##  ARIMA(2,1,1)(1,0,1)[4] with drift         : Inf
##  ARIMA(2,1,1)(2,0,0)[4]                    : 184.5599
##  ARIMA(2,1,1)(2,0,0)[4] with drift         : Inf
##  ARIMA(2,1,2)                              : 181.4882
##  ARIMA(2,1,2)           with drift         : Inf
##  ARIMA(2,1,2)(0,0,1)[4]                    : 184.6134
##  ARIMA(2,1,2)(0,0,1)[4] with drift         : Inf
##  ARIMA(2,1,2)(1,0,0)[4]                    : 183.593
##  ARIMA(2,1,2)(1,0,0)[4] with drift         : Inf
##  ARIMA(2,1,3)                              : 183.6576
##  ARIMA(2,1,3)           with drift         : Inf
##  ARIMA(3,1,0)                              : 180.0732
##  ARIMA(3,1,0)           with drift         : 178.0388
##  ARIMA(3,1,0)(0,0,1)[4]                    : 182.2157
##  ARIMA(3,1,0)(0,0,1)[4] with drift         : 179.381
##  ARIMA(3,1,0)(0,0,2)[4]                    : 184.473
##  ARIMA(3,1,0)(0,0,2)[4] with drift         : 181.4586
##  ARIMA(3,1,0)(1,0,0)[4]                    : 182.2138
##  ARIMA(3,1,0)(1,0,0)[4] with drift         : 179.477
##  ARIMA(3,1,0)(1,0,1)[4]                    : 184.346
##  ARIMA(3,1,0)(1,0,1)[4] with drift         : 181.1515
##  ARIMA(3,1,0)(2,0,0)[4]                    : 184.4669
##  ARIMA(3,1,0)(2,0,0)[4] with drift         : 181.6294
##  ARIMA(3,1,1)                              : 182.2074
##  ARIMA(3,1,1)           with drift         : Inf
##  ARIMA(3,1,1)(0,0,1)[4]                    : 184.4563
##  ARIMA(3,1,1)(0,0,1)[4] with drift         : 181.6974
##  ARIMA(3,1,1)(1,0,0)[4]                    : 184.4556
##  ARIMA(3,1,1)(1,0,0)[4] with drift         : Inf
##  ARIMA(3,1,2)                              : 183.6786
##  ARIMA(3,1,2)           with drift         : Inf
## 
## 
## 
##  Best model: ARIMA(0,1,2)           with drift
## Series: ts_millions 
## ARIMA(0,1,2) with drift 
## 
## Coefficients:
##           ma1      ma2   drift
##       -0.2895  -0.1542  0.0785
## s.e.   0.0987   0.1017  0.0317
## 
## sigma^2 = 0.3251:  log likelihood = -83.42
## AIC=174.84   AICc=175.27   BIC=185.23

#Residual diagnosis with Ljung-Box

best_model = Arima(ts_millions, order = c(0,1,2), include.drift = TRUE)
checkresiduals(best_model)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,2) with drift
## Q* = 0.95325, df = 6, p-value = 0.9873
## 
## Model df: 2.   Total lags used: 8
Box.test(best_model$residuals, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  best_model$residuals
## X-squared = 0.0014622, df = 1, p-value = 0.9695
#Constant variance -- residual plot over time 
plot(best_model$residuals,
     main = "Residuals Over Time",
     ylab = "Residuals",
     xlab = "Time",
     col  = "#0B3C5D",
     lwd  = 1.5)
abline(h = 0, col = "red", lty = 2)

#Forecastinggg #Ask professor how many steps ahead #Forecast with data until 2024 and forecast 2025, then prove it against the actual values of the 2025 GDP

gdp_forecast <- forecast(best_model, h = 4)
print(gdp_forecast)
##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2025 Q1       25.40675 24.67601 26.13749 24.28918 26.52432
## 2025 Q2       25.52941 24.63299 26.42583 24.15845 26.90037
## 2025 Q3       25.60790 24.62360 26.59219 24.10255 27.11324
## 2025 Q4       25.68638 24.62144 26.75132 24.05770 27.31506
# Plot forecast
plot(gdp_forecast, main = "Forecast for 2025 GDP")