Librerías

library(readxl)
library(urca)
library(highcharter)
library(forecast)
library(dplyr)
library(xts)
library(TSstudio)
library(tseries)
library(ggfortify)

productos <- read_excel("C:/Users/yulie/OneDrive/Documentos/Semestre 3/Series de Tiempo/Proyecto/datos_chocolates.xlsx",sheet = "Datos")

Base de datos de la línea de producto Chocolates

productos

## # A tibble: 220 × 3
##    Fecha  Producto     Cantidad
##    <chr>  <chr>           <dbl>
##  1 201901 Chocolates_A   393821
##  2 201902 Chocolates_A   326348
##  3 201903 Chocolates_A   402717
##  4 201904 Chocolates_A   307600
##  5 201905 Chocolates_A   438907
##  6 201906 Chocolates_A   436224
##  7 201907 Chocolates_A   475235
##  8 201908 Chocolates_A   476400
##  9 201909 Chocolates_A   509570
## 10 201910 Chocolates_A   499997
## # … with 210 more rows

lineas_ch <-productos %>% select(Fecha,Producto,Cantidad) %>% filter(Producto=="Chocolates_A") %>% select(Cantidad)

Formato de series de tiempo

Se selecciono la marca A de Chocolates para el análisis

serie_A=ts(lineas_ch, start = c(2019,1), end=c(2022,13), frequency = 13)
serie_A

## Time Series:
## Start = c(2019, 1) 
## End = c(2022, 13) 
## Frequency = 13 
##  [1]  393821  326348  402717  307600  438907  436224  475235  476400  509570
## [10]  499997  633646  348235  534172  504567  705254  785655  428341  427975
## [19]  495539  512081  532430  650608  730486  730839  597143  525132  862118
## [28]  809121  789536  626825  524424  810298  940036  861597  789060  968605
## [37]  887692  899210  786597 1022242  805157  773665  381854  764380  604689
## [46]  718128  698473  550002  602068  813271  779402  764391

Gráfico de la serie

ts_plot(serie_A, title="Chocolates_A", slider=T)

Descomposición de la serie

ts_decompose(serie_A)

Identificación

par(mfrow=c(3,3))
plot(serie_A,type="o")
acf(serie_A, lag.max=52)
pacf(serie_A, lag.max=52)
plot(diff(serie_A),type="o")
abline(h=2*sqrt(var(diff(serie_A))),col="red",lty=2)
abline(h=-2*sqrt(var(diff(serie_A))),col="red",lty=2)
acf(diff(serie_A), lag.max=52)
pacf(diff(serie_A), lag.max=52)
plot(diff(serie_A,13),type="o")
acf(diff(serie_A,13), lag.max=52)
pacf(diff(serie_A,13), lag.max=52)

### Pruebas de Raíz Unitaria #### Dickey-Fuller

adf.test(serie_A)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  serie_A
## Dickey-Fuller = -2.1272, Lag order = 3, p-value = 0.5233
## alternative hypothesis: stationary

Se rechaza la hipotesis nula, es decir que la serie no es estacionaria.

Phillips Perron

pp.test(serie_A)

## Warning in pp.test(serie_A): p-value smaller than printed p-value

## 
##  Phillips-Perron Unit Root Test
## 
## data:  serie_A
## Dickey-Fuller Z(alpha) = -30.117, Truncation lag parameter = 3, p-value
## = 0.01
## alternative hypothesis: stationary

De acuerdo al resultado, se observa que la serie es estacionaria.

Descomposición de la serie con una diferencia

ts_decompose(diff(serie_A))

Dickey-Fuller con una diferencia

adf.test(diff(serie_A))

## Warning in adf.test(diff(serie_A)): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(serie_A)
## Dickey-Fuller = -5.7523, Lag order = 3, p-value = 0.01
## alternative hypothesis: stationary

Al aplicar una diferencia la serie es estacionaria.

Test DF sobre la serie desestacionalizada

plot(ur.df(diff(serie_A,13),type="none",lag=13))

summary(ur.df(diff(serie_A,13),type="none",lag=13))

## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression none 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -169477  -75936   22260   91182  342275 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## z.lag.1      -0.26672    0.26394  -1.011    0.334
## z.diff.lag1  -0.37096    0.33424  -1.110    0.291
## z.diff.lag2   0.01274    0.34754   0.037    0.971
## z.diff.lag3   0.15281    0.34064   0.449    0.662
## z.diff.lag4   0.09293    0.34997   0.266    0.796
## z.diff.lag5   0.08216    0.33436   0.246    0.810
## z.diff.lag6   0.39349    0.33039   1.191    0.259
## z.diff.lag7   0.41090    0.37106   1.107    0.292
## z.diff.lag8   0.26175    0.45586   0.574    0.577
## z.diff.lag9  -0.21990    0.44061  -0.499    0.628
## z.diff.lag10 -0.22427    0.38804  -0.578    0.575
## z.diff.lag11  0.24605    0.32261   0.763    0.462
## z.diff.lag12  0.19231    0.35812   0.537    0.602
## z.diff.lag13 -0.24840    0.31009  -0.801    0.440
## 
## Residual standard error: 180800 on 11 degrees of freedom
## Multiple R-squared:  0.6069, Adjusted R-squared:  0.1066 
## F-statistic: 1.213 on 14 and 11 DF,  p-value: 0.3791
## 
## 
## Value of test-statistic is: -1.0105 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau1 -2.62 -1.95 -1.61

ts_cor(diff(serie_A),lag.max = 52)

Control de la varianza.

log_SA<-log(serie_A)
ts_plot(log_SA)

forecast::BoxCox.lambda(serie_A)

## [1] 0.2879514

Serie_chA<-forecast::BoxCox(serie_A,0.2879514)
ts_plot(Serie_chA)

modeloA_1<-stats::arima(serie_A,
                      order=c(2,1,1), 
                      seasonal=list(order=c(1,1,2), period=13), 
                      fixed=c(NA,NA,NA,NA,NA,NA))

modeloA_1

## 
## Call:
## stats::arima(x = serie_A, order = c(2, 1, 1), seasonal = list(order = c(1, 1, 
##     2), period = 13), fixed = c(NA, NA, NA, NA, NA, NA))
## 
## Coefficients:
##          ar1     ar2      ma1     sar1     sma1     sma2
##       0.2505  0.2328  -0.8112  -0.7411  -0.0045  -0.9873
## s.e.  0.5375  0.3736   0.5006   0.2992   1.8504   1.8213
## 
## sigma^2 estimated as 1.266e+10:  log likelihood = -506.05,  aic = 1026.1

tt <- modeloA_1$coef[which(modeloA_1$coef!=0)]/sqrt(diag(modeloA_1$var.coef))
1 - pt(abs(tt),(modeloA_1$nobs - length(modeloA_1$coef[which(modeloA_1$coef!=0)])))

##         ar1         ar2         ma1        sar1        sma1        sma2 
## 0.322198156 0.268858191 0.057472726 0.009363758 0.499046383 0.295763459

BIC(modeloA_1)

## [1] 1037.561

modeloA_2<-stats::arima(serie_A,
                      order=c(2,1,3), 
                      seasonal=list(order=c(1,1,2), period=13), 
                      fixed=c(NA,NA,NA,NA,NA,NA,NA,NA))

tt <- modeloA_2$coef[which(modeloA_2$coef!=0)]/sqrt(diag(modeloA_2$var.coef))
1 - pt(abs(tt),(modeloA_2$nobs - length(modeloA_2$coef[which(modeloA_2$coef!=0)])))

##       ar1       ar2       ma1       ma2       ma3      sar1      sma1      sma2 
## 0.3128843 0.4257148 0.4803634 0.1266126 0.4914307 0.2153821 0.4985140 0.3729261

BIC(modeloA_2)

## [1] 1044.34

modeloA_3<-stats::arima((serie_A),
                      order=c(2,1,2), 
                      seasonal=list(order=c(1,1,2), period=13), 
                      fixed=c(NA,NA,NA,NA,NA,NA,NA))

tt <- modeloA_3$coef[which(modeloA_3$coef!=0)]/sqrt(diag(modeloA_3$var.coef))
1 - pt(abs(tt),(modeloA_3$nobs - length(modeloA_3$coef[which(modeloA_3$coef!=0)])))

##         ar1         ar2         ma1         ma2        sar1        sma1 
## 0.146359986 0.267714172 0.469253745 0.045690985 0.004611823 0.499747773 
##        sma2 
## 0.356669488

BIC(modeloA_3)

## [1] 1040.702

et=residuals(modeloA_1)
serie_A.fit=serie_A-et 
fitted(modeloA_1)

## Time Series:
## Start = c(2019, 1) 
## End = c(2022, 13) 
## Frequency = 13 
##  [1]  393593.6  326298.6  402620.1  307611.7  438797.5  436135.4  475122.8
##  [8]  476300.8  509450.5  499898.6  633428.8  348309.7  534288.0  505772.0
## [15]  540151.6  696106.7  586748.3  600611.9  517240.0  536118.7  545814.9
## [22]  600510.8  645320.1  785199.6  528156.6  647262.8  703106.0  823910.8
## [29]  899398.4  594622.8  614863.9  684868.8  778127.2  864286.9  913935.6
## [36]  921349.1  945069.1  792206.8  810974.3 1043722.9  956016.6  911529.2
## [43]  642855.9  596020.0  755299.4  821602.3  717652.5  693760.7  723000.4
## [50]  711938.2  649970.7  742704.5

Pruebas al modelo elegido

log(52)

## [1] 3.951244

Box.test(et, lag=6, type ="Ljung-Box")

## 
##  Box-Ljung test
## 
## data:  et
## X-squared = 4.2725, df = 6, p-value = 0.6399

tsdiag(modeloA_2,go.lag=20)

De acuerdo al resultado del Ljung-Box, se observa que no es significativa la autocorrelación.

##Pronóstico

plot(forecast(modeloA_2, h=4, fan=T))
lines(fitted(modeloA_2), col="red")

forecast(modeloA_2, h=4)

##          Point Forecast    Lo 80     Hi 80    Lo 95     Hi 95
## 2023.000       867664.1 698634.5 1036693.7 609155.6 1126172.6
## 2023.077       835472.0 649664.8 1021279.1 551304.5 1119639.4
## 2023.154       853389.4 649343.3 1057435.5 541327.9 1165450.9
## 2023.231       565430.5 352419.1  778441.8 239657.8  891203.1

Modelo autoarima

auto.arima(serie_A)

## Series: serie_A 
## ARIMA(0,1,1)(0,0,1)[13] 
## 
## Coefficients:
##           ma1    sma1
##       -0.6151  0.3387
## s.e.   0.1200  0.1737
## 
## sigma^2 = 1.713e+10:  log likelihood = -673.25
## AIC=1352.51   AICc=1353.02   BIC=1358.3

modelo_Auto<-stats::arima(serie_A,
                      order=c(0,1,1), 
                      seasonal=list(order=c(0,0,1), period=13), 
                      fixed=c(NA,NA))

tt <- modelo_Auto$coef[which(modelo_Auto$coef!=0)]/sqrt(diag(modelo_Auto$var.coef))
1 - pt(abs(tt),(modelo_Auto$nobs - length(modelo_Auto$coef[which(modelo_Auto$coef!=0)])))

##          ma1         sma1 
## 2.523960e-06 2.843121e-02

BIC(modelo_Auto)

## [1] 1358.302

plot(forecast(modelo_Auto, h=4, fan=T))
lines(fitted(modelo_Auto), col="red")

forecast(modelo_Auto, h=4)

##          Point Forecast    Lo 80    Hi 80    Lo 95     Hi 95
## 2023.000       771177.9 606779.6 935576.1 519752.4 1022603.3
## 2023.077       712211.9 536057.3 888366.5 442806.7  981617.1
## 2023.154       705983.7 518809.8 893157.7 419725.9  992241.6
## 2023.231       579333.7 381754.0 776913.4 277161.6  881505.8

Pronóstico Línea Chocolates

2023-03-25