hersheys <- read_xlsx("C:\\Users\\emili\\Documents\\8vo semestre\\Ventas_Historicas_Lechitas VF.xlsx")

Modelacion

Utilizando modelos ARIMA (Box-Jenkins, ARMA, SARIMA) y los datos históricos de las ventas de leche saborizada ¿Cuál es el modelo que mejor se adapta a la serie?

Modelo 1. ARIMA

## Convertir los datos en formato de serie temporal
hersheys_st <- ts(data = hersheys$Ventas, start= c(2017,01), frequency = 12)
plot(hersheys_st)

## Ajuste del modelo
modelo_arima <- auto.arima(hersheys_st, trace=TRUE)
## 
##  ARIMA(2,0,2)(1,1,1)[12] with drift         : Inf
##  ARIMA(0,0,0)(0,1,0)[12] with drift         : 388.2968
##  ARIMA(1,0,0)(1,1,0)[12] with drift         : 373.1084
##  ARIMA(0,0,1)(0,1,1)[12] with drift         : Inf
##  ARIMA(0,0,0)(0,1,0)[12]                    : 462.9795
##  ARIMA(1,0,0)(0,1,0)[12] with drift         : 374.4953
##  ARIMA(1,0,0)(1,1,1)[12] with drift         : Inf
##  ARIMA(1,0,0)(0,1,1)[12] with drift         : Inf
##  ARIMA(0,0,0)(1,1,0)[12] with drift         : 382.2401
##  ARIMA(2,0,0)(1,1,0)[12] with drift         : 373.3363
##  ARIMA(1,0,1)(1,1,0)[12] with drift         : Inf
##  ARIMA(0,0,1)(1,1,0)[12] with drift         : Inf
##  ARIMA(2,0,1)(1,1,0)[12] with drift         : Inf
##  ARIMA(1,0,0)(1,1,0)[12]                    : Inf
## 
##  Best model: ARIMA(1,0,0)(1,1,0)[12] with drift
modelo_arima
## Series: hersheys_st 
## ARIMA(1,0,0)(1,1,0)[12] with drift 
## 
## Coefficients:
##          ar1     sar1     drift
##       0.6383  -0.5517  288.8979
## s.e.  0.1551   0.2047   14.5026
## 
## sigma^2 = 202701:  log likelihood = -181.5
## AIC=371   AICc=373.11   BIC=375.72
pronostico_arima <- forecast(modelo_arima, level=c(95), h=12)
pronostico_arima
##          Point Forecast    Lo 95    Hi 95
## Jan 2020       35498.90 34616.48 36381.32
## Feb 2020       34202.17 33155.28 35249.05
## Mar 2020       36703.01 35596.10 37809.92
## Apr 2020       36271.90 35141.44 37402.36
## May 2020       37121.98 35982.07 38261.90
## Jun 2020       37102.65 35958.90 38246.40
## Jul 2020       37151.04 36005.73 38296.34
## Aug 2020       38564.64 37418.70 39710.58
## Sep 2020       38755.22 37609.03 39901.42
## Oct 2020       39779.02 38632.72 40925.32
## Nov 2020       38741.63 37595.28 39887.97
## Dec 2020       38645.86 37499.50 39792.22
plot(pronostico_arima, main= "Pronóstico de ventas de leche Hershey's")

summary(modelo_arima)
## Series: hersheys_st 
## ARIMA(1,0,0)(1,1,0)[12] with drift 
## 
## Coefficients:
##          ar1     sar1     drift
##       0.6383  -0.5517  288.8979
## s.e.  0.1551   0.2047   14.5026
## 
## sigma^2 = 202701:  log likelihood = -181.5
## AIC=371   AICc=373.11   BIC=375.72
## 
## Training set error measures:
##                    ME    RMSE    MAE        MPE      MAPE       MASE      ACF1
## Training set 25.22158 343.864 227.17 0.08059932 0.7069542 0.06491044 0.2081026

Modelo 2. SARIMA

hersheys3 <- read_xlsx("C:\\Users\\emili\\Documents\\8vo semestre\\Ventas_Historicas_Lechitas VF.xlsx")
data.ts <- as.ts(hersheys3$Ventas) 
sarima100 <- arima(data.ts, order = c(1,1,1),
                   seasonal = list(order = c(1,1,1),period = 12),
                   include.mean = FALSE,
                   method = "ML")
stargazer(sarima100, type = "text", 
          title = "SARIMA (1,1,1)(1,1,1,12)",
          style = "all")
## 
## SARIMA (1,1,1)(1,1,1,12)
## =============================================
##                       Dependent variable:    
##                   ---------------------------
##                             data.ts          
## ---------------------------------------------
## ar1                         -0.378*          
##                             (0.203)          
##                           t = -1.862         
##                            p = 0.063         
## ma1                        1.000***          
##                             (0.218)          
##                            t = 4.581         
##                           p = 0.00001        
## sar1                         0.151           
##                             (0.503)          
##                            t = 0.301         
##                            p = 0.764         
## sma1                        -0.953           
##                             (3.353)          
##                           t = -0.284         
##                            p = 0.777         
## ---------------------------------------------
## Observations                  23             
## Log Likelihood             -173.210          
## sigma2                    119,330.900        
## Akaike Inf. Crit.           356.421          
## =============================================
## Note:             *p<0.1; **p<0.05; ***p<0.01
forecast_sarima111 <- predict(sarima100, n.ahead = 36, newxreg = NULL)
forecast_s111 <- forecast(sarima100)
summary(forecast_s111)
## 
## Forecast method: ARIMA(1,1,1)(1,1,1)[12]
## 
## Model Information:
## 
## Call:
## arima(x = data.ts, order = c(1, 1, 1), seasonal = list(order = c(1, 1, 1), period = 12), 
##     include.mean = FALSE, method = "ML")
## 
## Coefficients:
##           ar1     ma1    sar1     sma1
##       -0.3781  1.0000  0.1511  -0.9535
## s.e.   0.2031  0.2183  0.5028   3.3531
## 
## sigma^2 estimated as 119331:  log likelihood = -173.21,  aic = 356.42
## 
## Error measures:
##                    ME    RMSE      MAE        MPE      MAPE      MASE      ACF1
## Training set 5.436341 276.484 174.7976 0.01938218 0.5404538 0.1936061 -0.105018
## 
## Forecasts:
##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 37       35432.43 34906.21 35958.64 34627.65 36237.20
## 38       33952.69 32999.94 34905.45 32495.58 35409.81
## 39       36353.69 35179.74 37527.63 34558.29 38149.08
## 40       35922.09 34541.25 37302.94 33810.27 38033.92
## 41       36922.88 35369.51 38476.25 34547.21 39298.55
## 42       36871.89 35160.87 38582.92 34255.11 39488.68
## 43       36663.12 34808.67 38517.57 33826.98 39499.25
## 44       37971.73 35983.82 39959.64 34931.48 41011.98
## 45       38082.18 35969.47 40194.88 34851.07 41313.28
## 46       39047.44 36816.50 41278.38 35635.51 42459.36
## 47       37953.49 35611.23 40295.75 34371.32 41535.66
## 48       38061.67 35610.83 40512.50 34313.44 41809.89
## 49       38586.53 35961.32 41211.74 34571.62 42601.44
## 50       37093.39 34281.15 39905.63 32792.44 41394.34
## 51       39531.18 36558.28 42504.07 34984.52 44077.83
## 52       39102.71 35972.17 42233.25 34314.96 43890.46
## 53       40093.91 36815.19 43372.63 35079.55 45108.28
## 54       40072.99 36651.81 43494.17 34840.75 45305.24
## 55       39957.97 36400.31 43515.63 34516.99 45398.94
## 56       41253.33 37564.06 44942.60 35611.09 46895.58
## 57       41255.44 37439.34 45071.54 35419.21 47091.67
## 58       42218.22 38278.85 46157.59 36193.47 48242.97
## 59       41160.41 37102.82 45218.01 34954.85 47365.98
## 60       41277.95 37102.19 45453.71 34891.68 47664.22
ts.plot(data.ts, forecast_sarima111$pred)

plot(forecast_s111)

Modelo 3. Box-Jenkins

datast <- ts(hersheys4$Ventas, start = c(2017,1), frequency = 12)

plot(datast, col = "blue", ylab = "Ventas", 
     main = "Ventas por Año", xlab = "Año")

#axis(2, at = seq (from = 2017-01-01, to = 2019-12-01, by = 2))

data_ts <- ts(hersheys4$Ventas, start = 2017, end = 2019, 
                    frequency = 1)
library(aTSA)
## Warning: package 'aTSA' was built under R version 4.1.1
## 
## Attaching package: 'aTSA'
## The following object is masked from 'package:forecast':
## 
##     forecast
## The following object is masked from 'package:graphics':
## 
##     identify
adf.test(hersheys4$Ventas)
## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag  ADF p.value
## [1,]   0 1.31   0.948
## [2,]   1 2.97   0.990
## [3,]   2 2.43   0.990
## [4,]   3 1.93   0.983
## Type 2: with drift no trend 
##      lag    ADF p.value
## [1,]   0 -1.111   0.648
## [2,]   1 -1.461   0.527
## [3,]   2 -1.020   0.680
## [4,]   3 -0.882   0.728
## Type 3: with drift and trend 
##      lag   ADF p.value
## [1,]   0 -5.24   0.010
## [2,]   1 -2.99   0.183
## [3,]   2 -2.81   0.253
## [4,]   3 -3.12   0.137
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01
#Dado que el p-value es menor a 0.01, la serie no es estacionaria.
#Por tanto, usaremos el modelo ARIMA.
par(mfrow = c(2,1)) #2 graphs in a single device
acf(datast, 16, main = "ACF for Ventas", panel.first = grid ())
pacf(datast, 16, main = "PACF for Ventas", panel.first = grid ())

par(mfrow = c(2,1)) #2 graphs in a single device
acf(diff (datast), 16, main = "ACF for First Difference de Ventas", 
    panel.first = grid ())
pacf(diff (datast), 16, main = "PACF for First Difference de Ventas", 
     panel.first = grid ())

# Model 1: ARIMA (3, 1, 3)
model1 <- arima(datast, order = c(3, 1, 3))
summary(model1)
## 
## Call:
## arima(x = datast, order = c(3, 1, 3))
## 
## Coefficients:
##           ar1     ar2     ar3     ma1      ma2      ma3
##       -0.7343  0.7354  0.9985  0.7759  -0.7612  -0.9941
## s.e.   0.0298  0.0132  0.0241  0.3558   0.3937   0.2395
## 
## sigma^2 estimated as 412546:  log likelihood = -280.33,  aic = 574.66
## 
## Training set error measures:
##                    ME    RMSE      MAE       MPE     MAPE      MASE        ACF1
## Training set 65.61968 633.328 508.3325 0.2179991 1.693988 0.5630301 -0.05594644
#Model 2: ARIMA (3, 1, 0)
model2 <- arima(datast, order = c(3, 1, 0))
summary(model2)
## 
## Call:
## arima(x = datast, order = c(3, 1, 0))
## 
## Coefficients:
##           ar1     ar2     ar3
##       -0.4540  0.2521  0.2240
## s.e.   0.1631  0.1870  0.1785
## 
## sigma^2 estimated as 883037:  log likelihood = -289.5,  aic = 587
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 239.0785 926.5668 764.8734 0.7531903 2.516639 0.8471753
##                      ACF1
## Training set -0.008280451
#Model 2: ARIMA (3, 1, 0)
model2 <- arima(datast, order = c(3, 1, 0))
summary(model2)
## 
## Call:
## arima(x = datast, order = c(3, 1, 0))
## 
## Coefficients:
##           ar1     ar2     ar3
##       -0.4540  0.2521  0.2240
## s.e.   0.1631  0.1870  0.1785
## 
## sigma^2 estimated as 883037:  log likelihood = -289.5,  aic = 587
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 239.0785 926.5668 764.8734 0.7531903 2.516639 0.8471753
##                      ACF1
## Training set -0.008280451
# Model 3: ARIMA (1, 1, 1)
model3 <- arima(datast, order = c(1, 1, 1))
summary(model3)
## 
## Call:
## arima(x = datast, order = c(1, 1, 1))
## 
## Coefficients:
##           ar1     ma1
##       -0.6134  0.1559
## s.e.   0.2154  0.2252
## 
## sigma^2 estimated as 940215:  log likelihood = -290.5,  aic = 587.01
## 
## Training set error measures:
##                    ME     RMSE    MAE      MPE     MAPE      MASE       ACF1
## Training set 364.1346 956.0943 814.84 1.153207 2.695705 0.9025184 -0.1339544
# Model 4: ARIMA (1, 1, 0)
model4 <- arima(datast, order = c(1, 1, 0))
summary(model4)
## 
## Call:
## arima(x = datast, order = c(1, 1, 0))
## 
## Coefficients:
##           ar1
##       -0.4843
## s.e.   0.1504
## 
## sigma^2 estimated as 952177:  log likelihood = -290.71,  aic = 585.42
## 
## Training set error measures:
##                    ME     RMSE      MAE      MPE     MAPE     MASE      ACF1
## Training set 390.7806 962.1568 818.8609 1.238751 2.707413 0.906972 -0.113897
#Dado que tiene el AIC menor, se toma el modelo 1.

Box.test(model1$residuals, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  model1$residuals
## X-squared = 0.12234, df = 1, p-value = 0.7265
Box.test(model2$residuals, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  model2$residuals
## X-squared = 0.0026799, df = 1, p-value = 0.9587
Box.test(model3$residuals, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  model3$residuals
## X-squared = 0.70135, df = 1, p-value = 0.4023
Box.test(model4$residuals, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  model4$residuals
## X-squared = 0.50704, df = 1, p-value = 0.4764
library(forecast)
#Podemos observar que este modelo (1) tiene de los menores errores y un buen p-value

library(forecast)
library(forecastHybrid)
## Warning: package 'forecastHybrid' was built under R version 4.1.3
## Loading required package: thief
## Warning: package 'thief' was built under R version 4.1.3
#Predicciones
forecast_arima <- predict(model1, n.ahead = 36, newxreg = NULL)
forecast_arima1 <- forecast(model1)
## Forecast for univariate time series: 
##    Lead Forecast S.E Lower Upper
## 37    1    35404 666 34098 36709
## ------ 
## Note: confidence level = 95 %

summary(forecast_arima1)
##       Lead      Forecast          S.E            Lower           Upper      
##  Min.   :1   Min.   :35404   Min.   :666.2   Min.   :34098   Min.   :36709  
##  1st Qu.:1   1st Qu.:35404   1st Qu.:666.2   1st Qu.:34098   1st Qu.:36709  
##  Median :1   Median :35404   Median :666.2   Median :34098   Median :36709  
##  Mean   :1   Mean   :35404   Mean   :666.2   Mean   :34098   Mean   :36709  
##  3rd Qu.:1   3rd Qu.:35404   3rd Qu.:666.2   3rd Qu.:34098   3rd Qu.:36709  
##  Max.   :1   Max.   :35404   Max.   :666.2   Max.   :34098   Max.   :36709
ts.plot(datast, forecast_arima$pred)

plot(forecast_arima1)

#m1pred <- forecast(model1, level=c(95), h=12)
#plot(m1pred)

Modelo 4. ARMA

hersheys2 <- read_xlsx("C:\\Users\\emili\\Documents\\8vo semestre\\Ventas_Historicas_Lechitas VF.xlsx")
modelo_st <- ts(data = hersheys2, start = c(2017,1), frequency = 12)
#Aplicamos logaritmo  a la serie
log_ventas <- log(hersheys2$Ventas)
log_ventas <- ts(log_ventas, start = c(2017,1,1), frequency = 12)

#Desestacionalizar la serie
log_ventas_sa <- seas(log_ventas, x11="")
log_ventas_sa <- log_ventas_sa$data [,3]
log_ventas_sa <- as.data.frame(log_ventas_sa)

#La convertimos nuevamente en una serie de tiempo
log_ventas_sa <- ts(log_ventas_sa$x, start = c(2017,1,1), frequency = 12)
ts_plot(log_ventas_sa, Xgrid = TRUE, Ygrid = TRUE, color="red", 
        title = "Log(ventas) desestacionalizado")
arma_1 =  arma(log_ventas_sa, order = c(1,0))
summary(arma_1)
## 
## Call:
## arma(x = log_ventas_sa, order = c(1, 0))
## 
## Model:
## ARMA(1,0)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0317628 -0.0060231  0.0003805  0.0045670  0.0310081 
## 
## Coefficient(s):
##            Estimate  Std. Error  t value Pr(>|t|)    
## ar1         0.98560     0.02085   47.262   <2e-16 ***
## intercept   0.15770     0.21507    0.733    0.463    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Fit:
## sigma^2 estimated as 0.0001598,  Conditional Sum-of-Squares = 0.01,  AIC = -208.54

2.- ¿Qué modelo de regresión ofrece mejor exactitud predictiva?

Tras haber analizado los 4 modelos, SARIMA con todos los parametros en 1 es aquel que presenta un error menor tomando el RMSE de 276.48. Otro indicador para medir a exactitud fue el AIC de 356.42 que fue el mas bajo de los modelos. Finalmente, guiandonos por el p value mas pequeño, SARIMA en todos sus parametros tuvo valores de 0.06 siendo todos estos indicadores de que es el mejor modelo.

3.-Según su mejor modelo ¿Cuál es la proyección de ventas en valor monetario para el siguiente año total de Hershey´s?

El modelo SARIMA hace las siguients proyecciones para las ventas de lo que seria el año 2020

##          A        B
## 1  2020-01 35432.43
## 2  2020-02 33952.69
## 3  2020-03 36353.69
## 4  2020-04 35922.09
## 5  2020-05 36922.88
## 6  2020-06 36871.89
## 7  2020-07 36663.12
## 8  2020-08 37971.73
## 9  2020-09 38082.18
## 10 2020-10 39047.44
## 11 2020-11 37953.49
## 12 2020-12 38061.67

4. Considerando algunos imponderables establezca con los modelos construidos tres escenarios futuros ( proyecciones) A (escenario esperado) , B (escenario optimista) y C (escenario pesimista).

Escenario esperado

##          A        B
## 1  2020-01 35432.43
## 2  2020-02 33952.69
## 3  2020-03 36353.69
## 4  2020-04 35922.09
## 5  2020-05 36922.88
## 6  2020-06 36871.89
## 7  2020-07 36663.12
## 8  2020-08 37971.73
## 9  2020-09 38082.18
## 10 2020-10 39047.44
## 11 2020-11 37953.49
## 12 2020-12 38061.67

Escenario pesimista

##          A         B
## 1  2020-01  34627.65
## 2  2020-02 332495.58
## 3  2020-03  34558.29
## 4  2020-04  33810.27
## 5  2020-05  34547.21
## 6  2020-06  34255.11
## 7  2020-07  33826.98
## 8  2020-08  34931.48
## 9  2020-09  34851.07
## 10 2020-10  35635.51
## 11 2020-11  34371.32
## 12 2020-12  34313.44

Escenario optimista

yearopt <- list("2020-01","2020-02","2020-03","2020-04","2020-05","2020-06","2020-07","2020-08","2020-09","2020-10","2020-11","2020-12")

salesopt <- list(36237.20, 35409.81, 38149.08, 38033.92, 39298.55, 39488.68, 39499.25, 41011.98, 41313.28, 42459.36, 41535.66, 41809.89)

do.call(rbind, Map(data.frame, A=yearopt, B=salesopt))
##          A        B
## 1  2020-01 36237.20
## 2  2020-02 35409.81
## 3  2020-03 38149.08
## 4  2020-04 38033.92
## 5  2020-05 39298.55
## 6  2020-06 39488.68
## 7  2020-07 39499.25
## 8  2020-08 41011.98
## 9  2020-09 41313.28
## 10 2020-10 42459.36
## 11 2020-11 41535.66
## 12 2020-12 41809.89

5. Con este análisis descriptivo, predictivo ¿Qué recomendaciones y medidas prescriptivas le puede dar a la compañía Hershey´s?

Tras realizar este analisis, la principal conclusión es que se esta manejando una informacion que es estacionaria es sus 3 escenarios (esperado, optimista, pesimista). Asi, la principal recomendación es que al tener un comportamiento cíclico, Hersheys entienda que temporadas o meses tienen un incremento en las ventas y asi, dar seguimiento a las causas detras de ello. Por ejemplo, después de la proyeccion de febrero, en los 3 escenarios hay una tendencia al aumento de las ventas lo cual puede ir justificado por San Valentin. Lo mismo ocurre en el mes de Octubre lo cual se ve explicado por Halloween donde el consumo de chocolate y dulces en general incrementa.

Si Hersheys como empresa identifica estos aumentos o caidas en las ventas, pueden generar estrategias enfocadas en satisfacer las necesidades de sus consumidores en dichos eventos haciendo mejores ofertas o promociones que mas alla de cambiar el comportamiento de la tendencia de ventas, impulse el orden que vienen siguiendo lo cual puede representar un alcance del escenario optimista.