library(readxl)
library(stargazer)
library(lmtest)
library(ggrepel) 
library(tsibble)
library("fUnitRoots")
library(forecast)
library(timetk)
library(TSstudio)
library(urca)
library(texreg)
library("dplyr")

# energy <- read_excel("C:/Users/valeria herazo/Downloads/energy.xlsx")


energy <- read_excel("C:/Users/Walter Marin B/Documents/CURSOS UNISUCRE/CURSO ECONOMETRIA 2/Talleres/Taller 1/Datos de precio de energía/energy.xlsx")

PUNTO A

energy_mensual <- energy %>%
  group_by(Fecha) %>% 
  summarise_by_time(.date_var = Fecha, .by = "month", 
                    demanda_real = mean(demanda_real, na.rm = TRUE))

energy_ts<- ts(energy_mensual$demanda_real, start = c(2000, 1), end = c(2023, 9), frequency = 12)

ts_plot(energy_mensual, slider = T, title = "Demanda promedio mensual de energia Sincelejo \n 2000-2023", Ytitle = "KWh", Xtitle = "Años")
##  DESCOMPOCISION


ts_decompose(energy_ts, type= "both")

PUNTO B

## DESESTACIONALIZAR LA SERIE 

demanda_descomp <- decompose(energy_ts, type = "additive") 

demanda_des <- energy_ts - demanda_descomp$seasonal

demanda_energy <- data.frame("demanda_obs" = as.numeric(energy_ts)) %>% mutate(demanda_des = as.numeric(demanda_des),
Fecha = energy_mensual$Fecha)

ts_plot(demanda_energy, slider = T, title = "Demanda promedio mensual de energia Sincelejo \n observada y desestacionalizada \n 2000-2023", Ytitle = "KWh", Xtitle = "Años")

#PUNTO C

## ESTACIONARIEDAD DE LA SERIE 

df <- ur.df(energy_ts, type = "trend", lags = 0)
summary(df)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression trend 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -43.699  -2.332  -0.471   1.914  53.296 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -0.349890   0.862926  -0.405   0.6854  
## z.lag.1     -0.016088   0.011675  -1.378   0.1693  
## tt           0.016660   0.007653   2.177   0.0303 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.97 on 281 degrees of freedom
## Multiple R-squared:  0.01712,    Adjusted R-squared:  0.01012 
## F-statistic: 2.447 on 2 and 281 DF,  p-value: 0.0884
## 
## 
## Value of test-statistic is: -1.3781 2.2545 2.4469 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau3 -3.98 -3.42 -3.13
## phi2  6.15  4.71  4.05
## phi3  8.34  6.30  5.36
dwtest (df@testreg)
## 
##  Durbin-Watson test
## 
## data:  df@testreg
## DW = 2.0209, p-value = 0.5228
## alternative hypothesis: true autocorrelation is greater than 0
## PASAR LA SERIE NO ESTACIONARIA A ESTACIONARIA 
energy_ts_diff <- diff(energy_ts, differences = 1)
df_diff <- ur.df(energy_ts_diff, type = "trend", lags = 0)
summary(df_diff)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression trend 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -44.168  -2.159  -0.369   1.873  54.277 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.747230   0.835254  -0.895    0.372    
## z.lag.1     -1.020706   0.059707 -17.095   <2e-16 ***
## tt           0.009245   0.005119   1.806    0.072 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.999 on 280 degrees of freedom
## Multiple R-squared:  0.5107, Adjusted R-squared:  0.5072 
## F-statistic: 146.1 on 2 and 280 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: -17.0951 97.4168 146.1252 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau3 -3.98 -3.42 -3.13
## phi2  6.15  4.71  4.05
## phi3  8.34  6.30  5.36
dwtest (df@testreg)
## 
##  Durbin-Watson test
## 
## data:  df@testreg
## DW = 2.0209, p-value = 0.5228
## alternative hypothesis: true autocorrelation is greater than 0
## TEST DE CORRELOGRAMAS

par(mfcol = c(1, 2))
acf(energy_ts, )
pacf(energy_ts)

par(mfcol = c(1, 2))
acf(energy_ts_diff, )
pacf(energy_ts_diff)

#PUNTO D

## ESTIMAR MODELO ARIMA (1, 1, 0)

arima_ar1 <- arima(energy_ts, c(0, 1, 1), list(order = c(1, 1, 1), period = 12))
stargazer(arima_ar1, type = "text")
## 
## =============================================
##                       Dependent variable:    
##                   ---------------------------
##                            energy_ts         
## ---------------------------------------------
## ma1                          0.050           
##                             (0.068)          
##                                              
## sar1                        -0.060           
##                             (0.062)          
##                                              
## sma1                       -1.000***         
##                             (0.145)          
##                                              
## ---------------------------------------------
## Observations                  272            
## Log Likelihood             -916.177          
## sigma2                      42.695           
## Akaike Inf. Crit.          1,840.354         
## =============================================
## Note:             *p<0.1; **p<0.05; ***p<0.01
demanda_energy <- demanda_energy %>% mutate(resid = c(arima_ar1$residuals),
                              demanda_est = as.numeric(demanda_obs-resid), 
                              demanda_des  = NULL, 
                              resid = NULL) 

ts_plot(demanda_energy, slider = T, title = "Demanda promedio mensual de energia Sincelejo \n observada y ajustada \n 2000-2023", Ytitle = "KWh", Xtitle = "Años")
## NORMALIDAD DE LOS ERRORES

ts_plot(arima_ar1$residuals, slider = T, title = "Residuos", Ytitle = "", Xtitle = "Años")
par(mfcol = c(1, 2))
hist(arima_ar1$residuals)
qqnorm(arima_ar1$residuals)
qqline(arima_ar1$residuals, col = "purple")

library(tseries)
jarque.bera.test(arima_ar1$residuals)
## 
##  Jarque Bera Test
## 
## data:  arima_ar1$residuals
## X-squared = 6527.1, df = 2, p-value < 2.2e-16
## VERSION AUTOMATICA

arima_auto <- auto.arima(energy_ts, trace = T, seasonal = T, stationary = T)
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(2,0,2)(1,0,1)[12] with non-zero mean : Inf
##  ARIMA(0,0,0)            with non-zero mean : 3089.436
##  ARIMA(1,0,0)(1,0,0)[12] with non-zero mean : Inf
##  ARIMA(0,0,1)(0,0,1)[12] with non-zero mean : 2537.813
##  ARIMA(0,0,0)            with zero mean     : 3469.433
##  ARIMA(0,0,1)            with non-zero mean : 2743.108
##  ARIMA(0,0,1)(1,0,1)[12] with non-zero mean : Inf
##  ARIMA(0,0,1)(0,0,2)[12] with non-zero mean : 2455.209
##  ARIMA(0,0,1)(1,0,2)[12] with non-zero mean : Inf
##  ARIMA(0,0,0)(0,0,2)[12] with non-zero mean : 2739.332
##  ARIMA(1,0,1)(0,0,2)[12] with non-zero mean : Inf
##  ARIMA(0,0,2)(0,0,2)[12] with non-zero mean : 2315.586
##  ARIMA(0,0,2)(0,0,1)[12] with non-zero mean : 2422.846
##  ARIMA(0,0,2)(1,0,2)[12] with non-zero mean : Inf
##  ARIMA(0,0,2)(1,0,1)[12] with non-zero mean : Inf
##  ARIMA(1,0,2)(0,0,2)[12] with non-zero mean : Inf
##  ARIMA(0,0,3)(0,0,2)[12] with non-zero mean : 2232.601
##  ARIMA(0,0,3)(0,0,1)[12] with non-zero mean : 2304.477
##  ARIMA(0,0,3)(1,0,2)[12] with non-zero mean : Inf
##  ARIMA(0,0,3)(1,0,1)[12] with non-zero mean : Inf
##  ARIMA(1,0,3)(0,0,2)[12] with non-zero mean : Inf
##  ARIMA(0,0,4)(0,0,2)[12] with non-zero mean : 2145.194
##  ARIMA(0,0,4)(0,0,1)[12] with non-zero mean : 2200.104
##  ARIMA(0,0,4)(1,0,2)[12] with non-zero mean : Inf
##  ARIMA(0,0,4)(1,0,1)[12] with non-zero mean : Inf
##  ARIMA(1,0,4)(0,0,2)[12] with non-zero mean : Inf
##  ARIMA(0,0,5)(0,0,2)[12] with non-zero mean : 2119.294
##  ARIMA(0,0,5)(0,0,1)[12] with non-zero mean : 2162.638
##  ARIMA(0,0,5)(1,0,2)[12] with non-zero mean : Inf
##  ARIMA(0,0,5)(1,0,1)[12] with non-zero mean : Inf
##  ARIMA(1,0,5)(0,0,2)[12] with non-zero mean : Inf
##  ARIMA(0,0,5)(0,0,2)[12] with zero mean     : 2310.182
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(0,0,5)(0,0,2)[12] with non-zero mean : 2123.529
## 
##  Best model: ARIMA(0,0,5)(0,0,2)[12] with non-zero mean
summary(arima_auto)
## Series: energy_ts 
## ARIMA(0,0,5)(0,0,2)[12] with non-zero mean 
## 
## Coefficients:
##          ma1     ma2     ma3     ma4     ma5    sma1    sma2     mean
##       1.4135  1.4525  1.2815  0.8573  0.3712  0.3792  0.4285  95.0039
## s.e.  0.0597  0.0968  0.0992  0.0791  0.0627  0.0682  0.0671   6.3089
## 
## sigma^2 = 94.16:  log likelihood = -1052.44
## AIC=2122.87   AICc=2123.53   BIC=2155.75
## 
## Training set error measures:
##                      ME    RMSE      MAE       MPE     MAPE      MASE
## Training set 0.09626166 9.56668 5.855324 -3.148463 7.910709 0.4638178
##                    ACF1
## Training set 0.09419424
demanda_energy <- demanda_energy %>% mutate(resid = arima_auto$residuals,
                              demanda_est = as.numeric(demanda_obs-resid))
ts_plot(demanda_energy, slider = T, title = "Demanda promedio mensual de energia Sincelejo \n observada y ajustada \n 2000-2023", Ytitle = "KWh", Xtitle = "Años")
par(mfcol = c(1, 2))
hist(arima_auto$residuals)
qqnorm(arima_auto$residuals)
qqline(arima_auto$residuals, col = "purple")

jarque.bera.test(arima_auto$residuals)
## 
##  Jarque Bera Test
## 
## data:  arima_auto$residuals
## X-squared = 667.41, df = 2, p-value < 2.2e-16
## PRONOSTICO 

furval_1 <- forecast(arima_ar1, h = 10, level = c(0.95))

furval <- forecast(arima_auto, h = 10, level = c(0.95))

par(mfcol = c(1, 2))
plot(furval)
plot(furval_1)

# c <- dynlm(demanda_real ~ L(demanda_real, lag), data = energy)