OPCIÓN 1

library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(readxl) 
datos <- read_excel("D:/Cali Lasso ARp.xlsx")
OR<-ts(datos$OR[1:189], frequency=12, start=c(2007,4))
UR<-ts(datos$UR, frequency=12, start=c(2007,4))

Se valida estacionariedad en las series:

#Cargar el paquete tseries
library(tseries)
# Realizar prueba de raĂ­z unitaria
adf.test(OR)
## Warning in adf.test(OR): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  OR
## Dickey-Fuller = -6.3407, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
#Cargar el paquete tseries
library(tseries)
# Realizar prueba de raĂ­z unitaria
adf.test(UR)
## Warning in adf.test(UR): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  UR
## Dickey-Fuller = -5.9187, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary

AUTOARIMA PARA OR=TASA DE OCUPACIÓN-OPCION 1

#Corremos la funciĂ³n auto.arima:
MODEL1_OR=auto.arima(OR)
MODEL1_OR
## Series: OR 
## ARIMA(2,0,2) with zero mean 
## 
## Coefficients:
##           ar1      ar2     ma1     ma2
##       -0.2329  -0.0681  0.9120  0.7828
## s.e.   0.1055   0.0919  0.0747  0.0661
## 
## sigma^2 = 0.5613:  log likelihood = -212.46
## AIC=434.92   AICc=435.25   BIC=451.13

Como el P-value (0.56) es mayor a 0.05 no se rechaza H0. En ese caso si se cumple la condiciĂ³n de los residuos, son ruido blanco (no se correlacionan los errores).

Box.test(MODEL1_OR$residuals, lag = 20, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  MODEL1_OR$residuals
## X-squared = 23.786, df = 20, p-value = 0.2519

PREDICCION OR=TO CALI

# Generar pronĂ³sticos futuros para 1 mes adelante
pronostico_OR <- forecast(MODEL1_OR, h = 1)

# Graficar la serie original y el pronĂ³stico
plot(OR, main = "Serie de datos original y pronĂ³stico", xlab = "Fecha", ylab = "Tasa de ocupaciĂ³n", xlim=c(2007,2023), ylim = c(-8, max(OR)))
lines(pronostico_OR$mean, col = "red")
legend("bottomleft", legend = c("Serie original", "PronĂ³stico"), col = c("black", "red"), lty = 1)

pronostico_OR
##          Point Forecast     Lo 80    Hi 80     Lo 95    Hi 95
## Jan 2023     -0.3596867 -1.319866 0.600493 -1.828155 1.108781

OPCION 1 MÉTRICAS DE EVALUACIÓN TO CALI

# Calcular las medidas de precisiĂ³n
accuracy_arima_OR <- accuracy(MODEL1_OR)

# Obtener el MSE y el MAE
mae_arima <- accuracy_arima_OR[1]
rmse_arima <- accuracy_arima_OR[2]

# Mostrar los resultados
cat("MAE:", mae_arima, "\n")
## MAE: -0.0007717626
cat("RMSE:", rmse_arima, "\n")
## RMSE: 0.7412614

OPCION 2 MÉTRICAS DE EVALUACIÓN TO CALI

accuracy(MODEL1_OR)
##                         ME      RMSE       MAE      MPE     MAPE     MASE
## Training set -0.0007717626 0.7412614 0.5198629 75.61189 476.4811 0.587638
##                     ACF1
## Training set 0.006127199
accuracy(MODEL1_OR)
##                         ME      RMSE       MAE      MPE     MAPE     MASE
## Training set -0.0007717626 0.7412614 0.5198629 75.61189 476.4811 0.587638
##                     ACF1
## Training set 0.006127199
checkresiduals(MODEL1_OR)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,0,2) with zero mean
## Q* = 33.819, df = 20, p-value = 0.02737
## 
## Model df: 4.   Total lags used: 24

OPCIÓN 2 ESTIMACIÓN ARIMA TO CALI ORIGINAL DESESTACIONALZIADOS

library(fpp2)
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ ggplot2   3.4.2     ✔ expsmooth 2.3  
## ✔ fma       2.5
## 
library(readxl)
base <- read_excel("D:/Cali Lasso ARp.xlsx")
data <- base[1:132,]
test <- base[133:189,]

#creaciĂ³n de la serie de tiempo histĂ³rica

data <- ts(data$OR, frequency=12,start=c(2007,4))

#creaciĂ³n de la serie de tiempo para validaciĂ³n
test <- ts(test$OR, frequency=12,start=c(2018,4))
MODEL2_OR <- auto.arima(data)
MODEL2_OR
## Series: data 
## ARIMA(0,0,0) with zero mean 
## 
## sigma^2 = 0.284:  log likelihood = -104.21
## AIC=210.42   AICc=210.45   BIC=213.3
#grafico de datos
autoplot(forecast(MODEL2_OR),h=1)+ xlab("Tiempo") + ylab("Tasa de ocupaciĂ³n")+ autolayer(test, serie="Real")

#anĂ¡lisis del modelo
autoplot(data, xlim = c(2007, 2023))+    
    autolayer(fitted(MODEL2_OR), series="Ajuste")
## Warning in ggplot2::geom_line(na.rm = TRUE, ...): Ignoring unknown parameters:
## `xlim`
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

accuracy(MODEL2_OR)
##                       ME      RMSE      MAE MPE MAPE      MASE       ACF1
## Training set -0.03860535 0.5328772 0.418157 100  100 0.6507242 0.06134493
accuracy(MODEL2_OR)
##                       ME      RMSE      MAE MPE MAPE      MASE       ACF1
## Training set -0.03860535 0.5328772 0.418157 100  100 0.6507242 0.06134493
checkresiduals(MODEL2_OR)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,0,0) with zero mean
## Q* = 34.703, df = 24, p-value = 0.07292
## 
## Model df: 0.   Total lags used: 24
pronostico2_OR <- forecast(MODEL2_OR, h = 1)
pronostico2_OR
##          Point Forecast      Lo 80     Hi 80    Lo 95   Hi 95
## Apr 2018              0 -0.6829096 0.6829096 -1.04442 1.04442
accuracy_results <- accuracy(forecast(MODEL2_OR), test)
accuracy_results
##                       ME      RMSE       MAE MPE MAPE      MASE        ACF1
## Training set -0.03860535 0.5328772 0.4181570 100  100 0.6507242  0.06134493
## Test set     -0.15214386 0.8725772 0.5127968 100  100 0.7980000 -0.03471624
##              Theil's U
## Training set        NA
## Test set     0.9732301