hersheys <- read_xlsx("C:\\Users\\emili\\Documents\\8vo semestre\\Ventas_Historicas_Lechitas VF.xlsx")
Utilizando modelos ARIMA (Box-Jenkins, ARMA, SARIMA) y los datos históricos de las ventas de leche saborizada ¿Cuál es el modelo que mejor se adapta a la serie?
## Convertir los datos en formato de serie temporal
hersheys_st <- ts(data = hersheys$Ventas, start= c(2017,01), frequency = 12)
plot(hersheys_st)
## Ajuste del modelo
modelo_arima <- auto.arima(hersheys_st, trace=TRUE)
##
## ARIMA(2,0,2)(1,1,1)[12] with drift : Inf
## ARIMA(0,0,0)(0,1,0)[12] with drift : 388.2968
## ARIMA(1,0,0)(1,1,0)[12] with drift : 373.1084
## ARIMA(0,0,1)(0,1,1)[12] with drift : Inf
## ARIMA(0,0,0)(0,1,0)[12] : 462.9795
## ARIMA(1,0,0)(0,1,0)[12] with drift : 374.4953
## ARIMA(1,0,0)(1,1,1)[12] with drift : Inf
## ARIMA(1,0,0)(0,1,1)[12] with drift : Inf
## ARIMA(0,0,0)(1,1,0)[12] with drift : 382.2401
## ARIMA(2,0,0)(1,1,0)[12] with drift : 373.3363
## ARIMA(1,0,1)(1,1,0)[12] with drift : Inf
## ARIMA(0,0,1)(1,1,0)[12] with drift : Inf
## ARIMA(2,0,1)(1,1,0)[12] with drift : Inf
## ARIMA(1,0,0)(1,1,0)[12] : Inf
##
## Best model: ARIMA(1,0,0)(1,1,0)[12] with drift
modelo_arima
## Series: hersheys_st
## ARIMA(1,0,0)(1,1,0)[12] with drift
##
## Coefficients:
## ar1 sar1 drift
## 0.6383 -0.5517 288.8979
## s.e. 0.1551 0.2047 14.5026
##
## sigma^2 = 202701: log likelihood = -181.5
## AIC=371 AICc=373.11 BIC=375.72
pronostico_arima <- forecast(modelo_arima, level=c(95), h=12)
pronostico_arima
## Point Forecast Lo 95 Hi 95
## Jan 2020 35498.90 34616.48 36381.32
## Feb 2020 34202.17 33155.28 35249.05
## Mar 2020 36703.01 35596.10 37809.92
## Apr 2020 36271.90 35141.44 37402.36
## May 2020 37121.98 35982.07 38261.90
## Jun 2020 37102.65 35958.90 38246.40
## Jul 2020 37151.04 36005.73 38296.34
## Aug 2020 38564.64 37418.70 39710.58
## Sep 2020 38755.22 37609.03 39901.42
## Oct 2020 39779.02 38632.72 40925.32
## Nov 2020 38741.63 37595.28 39887.97
## Dec 2020 38645.86 37499.50 39792.22
plot(pronostico_arima, main= "Pronóstico de ventas de leche Hershey's")
summary(modelo_arima)
## Series: hersheys_st
## ARIMA(1,0,0)(1,1,0)[12] with drift
##
## Coefficients:
## ar1 sar1 drift
## 0.6383 -0.5517 288.8979
## s.e. 0.1551 0.2047 14.5026
##
## sigma^2 = 202701: log likelihood = -181.5
## AIC=371 AICc=373.11 BIC=375.72
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 25.22158 343.864 227.17 0.08059932 0.7069542 0.06491044 0.2081026
hersheys3 <- read_xlsx("C:\\Users\\emili\\Documents\\8vo semestre\\Ventas_Historicas_Lechitas VF.xlsx")
data.ts <- as.ts(hersheys3$Ventas)
sarima100 <- arima(data.ts, order = c(1,1,1),
seasonal = list(order = c(1,1,1),period = 12),
include.mean = FALSE,
method = "ML")
stargazer(sarima100, type = "text",
title = "SARIMA (1,1,1)(1,1,1,12)",
style = "all")
##
## SARIMA (1,1,1)(1,1,1,12)
## =============================================
## Dependent variable:
## ---------------------------
## data.ts
## ---------------------------------------------
## ar1 -0.378*
## (0.203)
## t = -1.862
## p = 0.063
## ma1 1.000***
## (0.218)
## t = 4.581
## p = 0.00001
## sar1 0.151
## (0.503)
## t = 0.301
## p = 0.764
## sma1 -0.953
## (3.353)
## t = -0.284
## p = 0.777
## ---------------------------------------------
## Observations 23
## Log Likelihood -173.210
## sigma2 119,330.900
## Akaike Inf. Crit. 356.421
## =============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
forecast_sarima111 <- predict(sarima100, n.ahead = 36, newxreg = NULL)
forecast_s111 <- forecast(sarima100)
summary(forecast_s111)
##
## Forecast method: ARIMA(1,1,1)(1,1,1)[12]
##
## Model Information:
##
## Call:
## arima(x = data.ts, order = c(1, 1, 1), seasonal = list(order = c(1, 1, 1), period = 12),
## include.mean = FALSE, method = "ML")
##
## Coefficients:
## ar1 ma1 sar1 sma1
## -0.3781 1.0000 0.1511 -0.9535
## s.e. 0.2031 0.2183 0.5028 3.3531
##
## sigma^2 estimated as 119331: log likelihood = -173.21, aic = 356.42
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 5.436341 276.484 174.7976 0.01938218 0.5404538 0.1936061 -0.105018
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 37 35432.43 34906.21 35958.64 34627.65 36237.20
## 38 33952.69 32999.94 34905.45 32495.58 35409.81
## 39 36353.69 35179.74 37527.63 34558.29 38149.08
## 40 35922.09 34541.25 37302.94 33810.27 38033.92
## 41 36922.88 35369.51 38476.25 34547.21 39298.55
## 42 36871.89 35160.87 38582.92 34255.11 39488.68
## 43 36663.12 34808.67 38517.57 33826.98 39499.25
## 44 37971.73 35983.82 39959.64 34931.48 41011.98
## 45 38082.18 35969.47 40194.88 34851.07 41313.28
## 46 39047.44 36816.50 41278.38 35635.51 42459.36
## 47 37953.49 35611.23 40295.75 34371.32 41535.66
## 48 38061.67 35610.83 40512.50 34313.44 41809.89
## 49 38586.53 35961.32 41211.74 34571.62 42601.44
## 50 37093.39 34281.15 39905.63 32792.44 41394.34
## 51 39531.18 36558.28 42504.07 34984.52 44077.83
## 52 39102.71 35972.17 42233.25 34314.96 43890.46
## 53 40093.91 36815.19 43372.63 35079.55 45108.28
## 54 40072.99 36651.81 43494.17 34840.75 45305.24
## 55 39957.97 36400.31 43515.63 34516.99 45398.94
## 56 41253.33 37564.06 44942.60 35611.09 46895.58
## 57 41255.44 37439.34 45071.54 35419.21 47091.67
## 58 42218.22 38278.85 46157.59 36193.47 48242.97
## 59 41160.41 37102.82 45218.01 34954.85 47365.98
## 60 41277.95 37102.19 45453.71 34891.68 47664.22
ts.plot(data.ts, forecast_sarima111$pred)
plot(forecast_s111)
datast <- ts(hersheys4$Ventas, start = c(2017,1), frequency = 12)
plot(datast, col = "blue", ylab = "Ventas",
main = "Ventas por Año", xlab = "Año")
#axis(2, at = seq (from = 2017-01-01, to = 2019-12-01, by = 2))
data_ts <- ts(hersheys4$Ventas, start = 2017, end = 2019,
frequency = 1)
library(aTSA)
## Warning: package 'aTSA' was built under R version 4.1.1
##
## Attaching package: 'aTSA'
## The following object is masked from 'package:forecast':
##
## forecast
## The following object is masked from 'package:graphics':
##
## identify
adf.test(hersheys4$Ventas)
## Augmented Dickey-Fuller Test
## alternative: stationary
##
## Type 1: no drift no trend
## lag ADF p.value
## [1,] 0 1.31 0.948
## [2,] 1 2.97 0.990
## [3,] 2 2.43 0.990
## [4,] 3 1.93 0.983
## Type 2: with drift no trend
## lag ADF p.value
## [1,] 0 -1.111 0.648
## [2,] 1 -1.461 0.527
## [3,] 2 -1.020 0.680
## [4,] 3 -0.882 0.728
## Type 3: with drift and trend
## lag ADF p.value
## [1,] 0 -5.24 0.010
## [2,] 1 -2.99 0.183
## [3,] 2 -2.81 0.253
## [4,] 3 -3.12 0.137
## ----
## Note: in fact, p.value = 0.01 means p.value <= 0.01
#Dado que el p-value es menor a 0.01, la serie no es estacionaria.
#Por tanto, usaremos el modelo ARIMA.
par(mfrow = c(2,1)) #2 graphs in a single device
acf(datast, 16, main = "ACF for Ventas", panel.first = grid ())
pacf(datast, 16, main = "PACF for Ventas", panel.first = grid ())
par(mfrow = c(2,1)) #2 graphs in a single device
acf(diff (datast), 16, main = "ACF for First Difference de Ventas",
panel.first = grid ())
pacf(diff (datast), 16, main = "PACF for First Difference de Ventas",
panel.first = grid ())
# Model 1: ARIMA (3, 1, 3)
model1 <- arima(datast, order = c(3, 1, 3))
summary(model1)
##
## Call:
## arima(x = datast, order = c(3, 1, 3))
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 ma3
## -0.7343 0.7354 0.9985 0.7759 -0.7612 -0.9941
## s.e. 0.0298 0.0132 0.0241 0.3558 0.3937 0.2395
##
## sigma^2 estimated as 412546: log likelihood = -280.33, aic = 574.66
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 65.61968 633.328 508.3325 0.2179991 1.693988 0.5630301 -0.05594644
#Model 2: ARIMA (3, 1, 0)
model2 <- arima(datast, order = c(3, 1, 0))
summary(model2)
##
## Call:
## arima(x = datast, order = c(3, 1, 0))
##
## Coefficients:
## ar1 ar2 ar3
## -0.4540 0.2521 0.2240
## s.e. 0.1631 0.1870 0.1785
##
## sigma^2 estimated as 883037: log likelihood = -289.5, aic = 587
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 239.0785 926.5668 764.8734 0.7531903 2.516639 0.8471753
## ACF1
## Training set -0.008280451
#Model 2: ARIMA (3, 1, 0)
model2 <- arima(datast, order = c(3, 1, 0))
summary(model2)
##
## Call:
## arima(x = datast, order = c(3, 1, 0))
##
## Coefficients:
## ar1 ar2 ar3
## -0.4540 0.2521 0.2240
## s.e. 0.1631 0.1870 0.1785
##
## sigma^2 estimated as 883037: log likelihood = -289.5, aic = 587
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 239.0785 926.5668 764.8734 0.7531903 2.516639 0.8471753
## ACF1
## Training set -0.008280451
# Model 3: ARIMA (1, 1, 1)
model3 <- arima(datast, order = c(1, 1, 1))
summary(model3)
##
## Call:
## arima(x = datast, order = c(1, 1, 1))
##
## Coefficients:
## ar1 ma1
## -0.6134 0.1559
## s.e. 0.2154 0.2252
##
## sigma^2 estimated as 940215: log likelihood = -290.5, aic = 587.01
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 364.1346 956.0943 814.84 1.153207 2.695705 0.9025184 -0.1339544
# Model 4: ARIMA (1, 1, 0)
model4 <- arima(datast, order = c(1, 1, 0))
summary(model4)
##
## Call:
## arima(x = datast, order = c(1, 1, 0))
##
## Coefficients:
## ar1
## -0.4843
## s.e. 0.1504
##
## sigma^2 estimated as 952177: log likelihood = -290.71, aic = 585.42
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 390.7806 962.1568 818.8609 1.238751 2.707413 0.906972 -0.113897
#Dado que tiene el AIC menor, se toma el modelo 1.
Box.test(model1$residuals, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: model1$residuals
## X-squared = 0.12234, df = 1, p-value = 0.7265
Box.test(model2$residuals, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: model2$residuals
## X-squared = 0.0026799, df = 1, p-value = 0.9587
Box.test(model3$residuals, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: model3$residuals
## X-squared = 0.70135, df = 1, p-value = 0.4023
Box.test(model4$residuals, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: model4$residuals
## X-squared = 0.50704, df = 1, p-value = 0.4764
library(forecast)
#Podemos observar que este modelo (1) tiene de los menores errores y un buen p-value
library(forecast)
library(forecastHybrid)
## Warning: package 'forecastHybrid' was built under R version 4.1.3
## Loading required package: thief
## Warning: package 'thief' was built under R version 4.1.3
#Predicciones
forecast_arima <- predict(model1, n.ahead = 36, newxreg = NULL)
forecast_arima1 <- forecast(model1)
## Forecast for univariate time series:
## Lead Forecast S.E Lower Upper
## 37 1 35404 666 34098 36709
## ------
## Note: confidence level = 95 %
summary(forecast_arima1)
## Lead Forecast S.E Lower Upper
## Min. :1 Min. :35404 Min. :666.2 Min. :34098 Min. :36709
## 1st Qu.:1 1st Qu.:35404 1st Qu.:666.2 1st Qu.:34098 1st Qu.:36709
## Median :1 Median :35404 Median :666.2 Median :34098 Median :36709
## Mean :1 Mean :35404 Mean :666.2 Mean :34098 Mean :36709
## 3rd Qu.:1 3rd Qu.:35404 3rd Qu.:666.2 3rd Qu.:34098 3rd Qu.:36709
## Max. :1 Max. :35404 Max. :666.2 Max. :34098 Max. :36709
ts.plot(datast, forecast_arima$pred)
plot(forecast_arima1)
#m1pred <- forecast(model1, level=c(95), h=12)
#plot(m1pred)
hersheys2 <- read_xlsx("C:\\Users\\emili\\Documents\\8vo semestre\\Ventas_Historicas_Lechitas VF.xlsx")
modelo_st <- ts(data = hersheys2, start = c(2017,1), frequency = 12)
#Aplicamos logaritmo a la serie
log_ventas <- log(hersheys2$Ventas)
log_ventas <- ts(log_ventas, start = c(2017,1,1), frequency = 12)
#Desestacionalizar la serie
log_ventas_sa <- seas(log_ventas, x11="")
log_ventas_sa <- log_ventas_sa$data [,3]
log_ventas_sa <- as.data.frame(log_ventas_sa)
#La convertimos nuevamente en una serie de tiempo
log_ventas_sa <- ts(log_ventas_sa$x, start = c(2017,1,1), frequency = 12)
ts_plot(log_ventas_sa, Xgrid = TRUE, Ygrid = TRUE, color="red",
title = "Log(ventas) desestacionalizado")
arma_1 = arma(log_ventas_sa, order = c(1,0))
summary(arma_1)
##
## Call:
## arma(x = log_ventas_sa, order = c(1, 0))
##
## Model:
## ARMA(1,0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0317628 -0.0060231 0.0003805 0.0045670 0.0310081
##
## Coefficient(s):
## Estimate Std. Error t value Pr(>|t|)
## ar1 0.98560 0.02085 47.262 <2e-16 ***
## intercept 0.15770 0.21507 0.733 0.463
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Fit:
## sigma^2 estimated as 0.0001598, Conditional Sum-of-Squares = 0.01, AIC = -208.54
Tras haber analizado los 4 modelos, SARIMA con todos los parametros en 1 es aquel que presenta un error menor tomando el RMSE de 276.48. Otro indicador para medir a exactitud fue el AIC de 356.42 que fue el mas bajo de los modelos. Finalmente, guiandonos por el p value mas pequeño, SARIMA en todos sus parametros tuvo valores de 0.06 siendo todos estos indicadores de que es el mejor modelo.
El modelo SARIMA hace las siguients proyecciones para las ventas de lo que seria el año 2020
## A B
## 1 2020-01 35432.43
## 2 2020-02 33952.69
## 3 2020-03 36353.69
## 4 2020-04 35922.09
## 5 2020-05 36922.88
## 6 2020-06 36871.89
## 7 2020-07 36663.12
## 8 2020-08 37971.73
## 9 2020-09 38082.18
## 10 2020-10 39047.44
## 11 2020-11 37953.49
## 12 2020-12 38061.67
## A B
## 1 2020-01 35432.43
## 2 2020-02 33952.69
## 3 2020-03 36353.69
## 4 2020-04 35922.09
## 5 2020-05 36922.88
## 6 2020-06 36871.89
## 7 2020-07 36663.12
## 8 2020-08 37971.73
## 9 2020-09 38082.18
## 10 2020-10 39047.44
## 11 2020-11 37953.49
## 12 2020-12 38061.67
## A B
## 1 2020-01 34627.65
## 2 2020-02 332495.58
## 3 2020-03 34558.29
## 4 2020-04 33810.27
## 5 2020-05 34547.21
## 6 2020-06 34255.11
## 7 2020-07 33826.98
## 8 2020-08 34931.48
## 9 2020-09 34851.07
## 10 2020-10 35635.51
## 11 2020-11 34371.32
## 12 2020-12 34313.44
yearopt <- list("2020-01","2020-02","2020-03","2020-04","2020-05","2020-06","2020-07","2020-08","2020-09","2020-10","2020-11","2020-12")
salesopt <- list(36237.20, 35409.81, 38149.08, 38033.92, 39298.55, 39488.68, 39499.25, 41011.98, 41313.28, 42459.36, 41535.66, 41809.89)
do.call(rbind, Map(data.frame, A=yearopt, B=salesopt))
## A B
## 1 2020-01 36237.20
## 2 2020-02 35409.81
## 3 2020-03 38149.08
## 4 2020-04 38033.92
## 5 2020-05 39298.55
## 6 2020-06 39488.68
## 7 2020-07 39499.25
## 8 2020-08 41011.98
## 9 2020-09 41313.28
## 10 2020-10 42459.36
## 11 2020-11 41535.66
## 12 2020-12 41809.89
Tras realizar este analisis, la principal conclusión es que se esta manejando una informacion que es estacionaria es sus 3 escenarios (esperado, optimista, pesimista). Asi, la principal recomendación es que al tener un comportamiento cíclico, Hersheys entienda que temporadas o meses tienen un incremento en las ventas y asi, dar seguimiento a las causas detras de ello. Por ejemplo, después de la proyeccion de febrero, en los 3 escenarios hay una tendencia al aumento de las ventas lo cual puede ir justificado por San Valentin. Lo mismo ocurre en el mes de Octubre lo cual se ve explicado por Halloween donde el consumo de chocolate y dulces en general incrementa.
Si Hersheys como empresa identifica estos aumentos o caidas en las ventas, pueden generar estrategias enfocadas en satisfacer las necesidades de sus consumidores en dichos eventos haciendo mejores ofertas o promociones que mas alla de cambiar el comportamiento de la tendencia de ventas, impulse el orden que vienen siguiendo lo cual puede representar un alcance del escenario optimista.