Precio del Cordero Patagónico

Ana Haique; Darío Mendieta

28/9/2021

Datos, período abril-2014 a Julio-2021

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.4     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(readxl)

Datos<-read_xlsx("Base_principal_modificada_05.xlsx")

## New names:
## * PSITerH200 -> PSITerH200...30
## * PSITerH200 -> PSITerH200...36

Datos<-as_tibble(Datos)
head(Datos)

## # A tibble: 6 × 52
##   Fecha               ChubutCord StaCruzCord PACord PBCord TFuegoCord PPromCor
##   <dttm>                   <dbl>       <dbl>  <dbl>  <dbl>      <dbl>    <dbl>
## 1 2014-04-01 00:00:00       31.5        31.9     37     35         30     33.1
## 2 2014-05-01 00:00:00       30.5        33.5     37     32         30     32.6
## 3 2014-06-01 00:00:00       35          35       37     32         30     33.8
## 4 2014-07-01 00:00:00       35          35       37     42         30     35.8
## 5 2014-08-01 00:00:00       35          35       40     42         30     36.4
## 6 2014-09-01 00:00:00       48          35       40     42         30     39  
## # … with 45 more variables: ChubutAdult <dbl>, StaCruzAdult <dbl>,
## #   PAAdult <dbl>, PBAdult <dbl>, TFuegoAdult <dbl>, LiniersNovH390 <dbl>,
## #   LiniersNov390 <dbl>, LiniersNovJov <dbl>, LiniersVEJ <dbl>,
## #   LiniersVCB <dbl>, LiniersVCI <dbl>, PAITerH180 <dbl>, PAINovH220 <dbl>,
## #   PAFNovH320 <dbl>, PAFNovM320 <dbl>, PBFVacGor <dbl>, PBITerH200 <dbl>,
## #   PBINovH260 <dbl>, PBFNovH320 <dbl>, PBFNovM320 <dbl>, PBFVaCa <dbl>,
## #   PBFVaCo <dbl>, PSITerH200...30 <dbl>, PSFNovH300 <dbl>, PSFNovH380 <dbl>, …

Actividades de preprocesamiento: Se transformó la variable Fecha de formato a usando libreria lubridate.

Datos<-Datos %>% mutate(Fecha=ymd(Fecha))
glimpse(Datos)

## Rows: 87
## Columns: 52
## $ Fecha           <date> 2014-04-01, 2014-05-01, 2014-06-01, 2014-07-01, 2014-…
## $ ChubutCord      <dbl> 31.50, 30.50, 35.00, 35.00, 35.00, 48.00, 65.00, 50.00…
## $ StaCruzCord     <dbl> 31.9, 33.5, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 48.0, …
## $ PACord          <dbl> 37, 37, 37, 37, 40, 40, 38, 37, 42, 42, 45, 45, 45, 50…
## $ PBCord          <dbl> 35.0, 32.0, 32.0, 42.0, 42.0, 42.0, 55.0, 55.0, 50.0, …
## $ TFuegoCord      <dbl> 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 45.0, …
## $ PPromCor        <dbl> 33.08, 32.60, 33.80, 35.80, 36.40, 39.00, 44.60, 41.40…
## $ ChubutAdult     <dbl> 16.80, 18.00, 19.50, 22.00, 27.00, 28.35, 28.00, 29.00…
## $ StaCruzAdult    <dbl> 14.55, 12.75, 14.13, 14.00, 14.00, 14.00, 23.00, 23.00…
## $ PAAdult         <dbl> 20, 22, 22, 22, 25, 25, 23, 22, 22, 24, 26, 26, 26, 29…
## $ PBAdult         <dbl> 18.0, 18.0, 18.0, 21.5, 22.0, 25.0, 32.5, 30.0, 25.0, …
## $ TFuegoAdult     <dbl> 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 17, NA, NA, NA…
## $ LiniersNovH390  <dbl> 17.06, 18.96, 18.96, 18.96, 18.37, 17.23, 17.29, 17.17…
## $ LiniersNov390   <dbl> 16.29, 18.44, 18.44, 18.44, 17.80, 16.66, 16.70, 16.61…
## $ LiniersNovJov   <dbl> 13.88, 16.43, 16.43, 16.43, 15.82, 15.15, 14.31, 14.24…
## $ LiniersVEJ      <dbl> 9.11, 12.16, 12.16, 12.16, 12.82, 12.24, 12.08, 11.36,…
## $ LiniersVCB      <dbl> 6.34, 8.29, 8.29, 8.29, 10.28, 9.94, 9.85, 9.24, 8.57,…
## $ LiniersVCI      <dbl> 5.53, 8.05, 8.05, 8.05, 9.49, 9.21, 8.91, 7.83, 8.00, …
## $ PAITerH180      <dbl> 19.0, 20.0, 21.5, 25.0, 24.0, 25.0, 24.5, 25.0, 25.0, …
## $ PAINovH220      <dbl> 16.5, 18.5, 20.0, 22.0, 22.5, 23.5, 23.0, 22.5, 23.0, …
## $ PAFNovH320      <dbl> 17.60, 18.70, 19.50, 20.72, 19.80, 19.80, 18.70, 18.70…
## $ PAFNovM320      <dbl> 16.77, 17.80, 18.50, 19.00, 18.70, 17.40, 17.05, 17.05…
## $ PBFVacGor       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ PBITerH200      <dbl> 22.0, 21.0, 22.0, 22.0, 23.0, 25.0, 26.0, 27.0, 28.0, …
## $ PBINovH260      <dbl> 19.0, 19.0, 21.0, 21.0, 21.0, 23.5, 24.0, NA, 26.5, 28…
## $ PBFNovH320      <dbl> 18.5, 18.5, 22.0, 24.5, 24.5, 24.5, 25.0, 26.5, 27.0, …
## $ PBFNovM320      <dbl> 18.5, 18.0, 20.5, 23.5, 23.5, 24.0, 24.5, 25.0, 26.0, …
## $ PBFVaCa         <dbl> 9.0, 8.5, 10.0, 11.0, 11.0, 12.0, 13.5, 12.5, 11.5, 12…
## $ PBFVaCo         <dbl> 6.5, 6.0, 7.5, 7.5, 8.0, 8.0, 9.0, 8.0, 9.0, 9.0, 9.0,…
## $ PSITerH200...30 <dbl> 22.75, 22.00, 22.00, 23.00, 25.50, 24.50, 29.00, 28.00…
## $ PSFNovH300      <dbl> 21.34, 21.00, 21.00, 22.50, 23.50, 24.00, 28.25, 28.75…
## $ PSFNovH380      <dbl> 22.00, 21.00, 21.50, 23.60, 25.10, 25.15, 27.48, 28.55…
## $ PSFNovM380      <dbl> 20.00, 19.95, 20.50, 23.05, 20.80, 22.10, 25.93, 24.50…
## $ PSFVaCa         <dbl> 6.75, 6.50, 6.50, 8.50, 9.70, 11.00, 10.50, 14.00, 12.…
## $ PSFVaCo         <dbl> 4.50, 4.50, 4.50, 4.50, 5.15, 5.00, 5.00, 6.00, 6.00, …
## $ PSITerH200...36 <dbl> 21.4, 22.0, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ PSFNovH420      <dbl> 23.40, 23.10, 23.95, 24.50, 27.50, 28.00, 28.00, 29.15…
## $ PSFVaCon        <dbl> 5, 10, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ ESITerH200      <dbl> NA, 20, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ ESFNovH420      <dbl> 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24…
## $ ESFVaCon        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 8.5, NA, NA, N…
## $ NovChubut       <dbl> 39.10, 40.00, 38.20, 39.10, 42.90, 45.65, 45.75, 49.95…
## $ NovStaCruz      <dbl> 41.15, 42.50, 42.00, 43.50, 44.50, 50.00, 51.00, 51.00…
## $ NovPA           <dbl> 32.0, 32.0, 34.0, 35.0, 37.7, 36.0, 36.0, 34.0, 35.0, …
## $ NovPB           <dbl> 35.00, 32.00, 32.50, 40.00, 44.55, 42.75, 44.55, 44.45…
## $ NovTF           <dbl> 42.50, 42.50, 42.50, 42.50, 42.50, 42.50, 42.50, 42.50…
## $ TerChubut       <dbl> 23.90, 22.75, 22.00, 22.00, 23.00, 25.50, 24.50, 29.00…
## $ TerStaCruz      <dbl> 23.0, 21.4, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, …
## $ TerPA           <dbl> 19.5, 19.0, 20.0, 21.5, 25.0, 24.0, 25.0, 24.5, 25.0, …
## $ TerPB           <dbl> 21.5, 22.0, 21.0, 22.0, 22.0, 23.0, 25.0, 26.0, 28.0, …
## $ TerTF           <dbl> 20, 20, 20, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ Dólar           <dbl> 7.94, 8.03, 8.05, 8.19, 8.32, 8.37, 8.41, 8.45, 8.50, …

Contextualización del problema

El propósito del trabajo es pronosticar el precio del cordero en la región patagónica. Como el relevamiento del precio de este producto se realiza por regiones (Patagonia Norte A, Patagonia Norte B, Chubut, Santa Cruz y Tierra del Fuego), se decidió tomar como variable de respuesta, al precio promedio mensual, calculado a partir de los datos disponibles para las distintas regiones, PPromCord.

library(tidymodels)

## Registered S3 method overwritten by 'tune':
##   method                   from   
##   required_pkgs.model_spec parsnip

## ── Attaching packages ────────────────────────────────────── tidymodels 0.1.3 ──

## ✓ broom        0.7.9      ✓ rsample      0.1.0 
## ✓ dials        0.0.10     ✓ tune         0.1.6 
## ✓ infer        1.0.0      ✓ workflows    0.2.3 
## ✓ modeldata    0.1.1      ✓ workflowsets 0.1.0 
## ✓ parsnip      0.1.7      ✓ yardstick    0.0.8 
## ✓ recipes      0.1.16

## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## x scales::discard() masks purrr::discard()
## x dplyr::filter()   masks stats::filter()
## x recipes::fixed()  masks stringr::fixed()
## x dplyr::lag()      masks stats::lag()
## x yardstick::spec() masks readr::spec()
## x recipes::step()   masks stats::step()
## • Use tidymodels_prefer() to resolve common conflicts.

library(modeltime)
library(timetk)
Cordero<- Datos %>% select(Fecha,PPromCor)
Cordero %>% plot_time_series(Fecha,PPromCor,.title="Precio promedio del cordero patagónico, Abr14-Jun21")

División de los datos en Entrenamiento vs Testeo

El criterio fué 80% entrenamiento, 20% testeo. (Total 87 datos)

splits<- time_series_split(Cordero, assess=18, cumulative = TRUE)

## Using date_var: Fecha

splits %>% 
  tk_time_series_cv_plan() %>% 
  plot_time_series_cv_plan(Fecha,PPromCor)

Modelo ARIMA

A diferencia de los modelos de regresión, en los cuales Yt se explica por las k regresoras X1, X2, X3, . . . , Xk, en los modelos de series de tiempo del tipo ARIMA, Yt se explica por valores pasados o rezagados de sí misma y por los términos de error estocásticos.

AR(1) Proceso autoregresivo de orden “1”.

El valor de pronóstico de Yt es simplemente alguna proporción \(\alpha_1\) de su valor en el periodo (t − 1) más un “choque” o perturbación aleatoria en el tiempo t. AR(p)

MA(1) Proceso de promedios móviles

Yt es igual a una constante más un promedio móvil de los términos de error presente y pasado (t-1). En resumen, es tan sólo una combinación lineal de términos de error. MA(q)

ARIMA Proceso autoregresivo integrado de promedios móviles

Los dos modelos basan en el supuesto de que las series de tiempo consideradas, son estacionarias, I(0). Si una serie de tiempo es integrada de orden 1, I(1), sus primeras diferencias son I(0), es decir, estacionarias. I(d)

ARIMA(p,d,q)

Forecast ARIMA

mod_arima <- arima_reg() %>% 
  set_engine("auto_arima") %>% 
  fit(PPromCor~Fecha,training(splits))

## frequency = 12 observations per 1 year

mod_arima

## parsnip model object
## 
## Fit time:  398ms 
## Series: outcome 
## ARIMA(1,1,0) with drift 
## 
## Coefficients:
##          ar1   drift
##       0.2025  2.5652
## s.e.  0.1180  0.9693
## 
## sigma^2 estimated as 42.17:  log likelihood=-222.71
## AIC=451.42   AICc=451.8   BIC=458.08

Forecast glmnet

library(glmnet)

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## Loaded glmnet 4.1-2

mod_glmnet_R <- linear_reg(penalty = 0.01,mixture=0) %>% ##elastic net (0,1), lasso=1, ridge=0
  set_engine("glmnet") %>% 
  fit(PPromCor~month(Fecha,label=TRUE)
      +as.numeric(Fecha),
      training(splits))
tidy(mod_glmnet_R)

## # A tibble: 13 × 3
##    term                            estimate penalty
##    <chr>                              <dbl>   <dbl>
##  1 (Intercept)                   -1077.        0.01
##  2 month(Fecha, label = TRUE).L     19.4       0.01
##  3 month(Fecha, label = TRUE).Q     -6.15      0.01
##  4 month(Fecha, label = TRUE).C    -10.4       0.01
##  5 month(Fecha, label = TRUE)^4     -0.818     0.01
##  6 month(Fecha, label = TRUE)^5      2.89      0.01
##  7 month(Fecha, label = TRUE)^6      2.07      0.01
##  8 month(Fecha, label = TRUE)^7      0.992     0.01
##  9 month(Fecha, label = TRUE)^8     -1.36      0.01
## 10 month(Fecha, label = TRUE)^9      1.66      0.01
## 11 month(Fecha, label = TRUE)^10    -0.647     0.01
## 12 month(Fecha, label = TRUE)^11    -0.808     0.01
## 13 as.numeric(Fecha)                 0.0680    0.01

mod_glmnet_EN <- linear_reg(penalty = 0.01,mixture=0.8) %>% ##elastic net (0,1), lasso=1, ridge=0
  set_engine("glmnet") %>% 
  fit(PPromCor~month(Fecha,label=TRUE)
      +as.numeric(Fecha),
      training(splits))
tidy(mod_glmnet_EN)

## # A tibble: 13 × 3
##    term                            estimate penalty
##    <chr>                              <dbl>   <dbl>
##  1 (Intercept)                   -1184.        0.01
##  2 month(Fecha, label = TRUE).L     19.9       0.01
##  3 month(Fecha, label = TRUE).Q     -7.32      0.01
##  4 month(Fecha, label = TRUE).C    -10.9       0.01
##  5 month(Fecha, label = TRUE)^4     -0.198     0.01
##  6 month(Fecha, label = TRUE)^5      2.37      0.01
##  7 month(Fecha, label = TRUE)^6      2.16      0.01
##  8 month(Fecha, label = TRUE)^7      0.919     0.01
##  9 month(Fecha, label = TRUE)^8     -1.50      0.01
## 10 month(Fecha, label = TRUE)^9      1.80      0.01
## 11 month(Fecha, label = TRUE)^10    -0.580     0.01
## 12 month(Fecha, label = TRUE)^11    -0.519     0.01
## 13 as.numeric(Fecha)                 0.0743    0.01

mod_glmnet_L <- linear_reg(penalty = 0.01,mixture=0.8) %>% ##elastic net (0,1), lasso=1, ridge=0
  set_engine("glmnet") %>% 
  fit(PPromCor~month(Fecha,label=TRUE)
      +as.numeric(Fecha),
      training(splits))
tidy(mod_glmnet_L)

## # A tibble: 13 × 3
##    term                            estimate penalty
##    <chr>                              <dbl>   <dbl>
##  1 (Intercept)                   -1184.        0.01
##  2 month(Fecha, label = TRUE).L     19.9       0.01
##  3 month(Fecha, label = TRUE).Q     -7.32      0.01
##  4 month(Fecha, label = TRUE).C    -10.9       0.01
##  5 month(Fecha, label = TRUE)^4     -0.198     0.01
##  6 month(Fecha, label = TRUE)^5      2.37      0.01
##  7 month(Fecha, label = TRUE)^6      2.16      0.01
##  8 month(Fecha, label = TRUE)^7      0.919     0.01
##  9 month(Fecha, label = TRUE)^8     -1.50      0.01
## 10 month(Fecha, label = TRUE)^9      1.80      0.01
## 11 month(Fecha, label = TRUE)^10    -0.580     0.01
## 12 month(Fecha, label = TRUE)^11    -0.519     0.01
## 13 as.numeric(Fecha)                 0.0743    0.01

Forecast suavizamiento exponencial

Se usa un promedio ponderado de los valores pasados de la serie de tiempo; es un caso especial del método de promedios ponderados móviles; en este caso sólo hay que elegir un peso, el peso para la observación más reciente. Los pesos para los demás datos se calculan automáticamente y son más pequeños a medida que los datos son más antiguos.

model_exp <- exp_smoothing() %>%
    set_engine(engine = "ets") %>%
    fit(PPromCor ~ Fecha, data = training(splits))

## frequency = 12 observations per 1 year

model_exp

## parsnip model object
## 
## Fit time:  642ms 
## ETS(M,A,N) 
## 
## Call:
##  forecast::ets(y = outcome, model = model_ets, damped = damping_ets,  
## 
##  Call:
##      alpha = alpha, beta = beta, gamma = gamma) 
## 
##   Smoothing parameters:
##     alpha = 0.9707 
##     beta  = 1e-04 
## 
##   Initial states:
##     l = 30.7495 
##     b = 2.2236 
## 
##   sigma:  0.0721
## 
##      AIC     AICc      BIC 
## 543.3747 544.3271 554.5453

Forecast regresión lineal (lm)

model_lm <- linear_reg() %>%
    set_engine("lm") %>%
    fit(PPromCor ~ as.numeric(Fecha) + factor(month(Fecha, label = TRUE), ordered = FALSE),
        data = training(splits))
model_lm

## parsnip model object
## 
## Fit time:  2ms 
## 
## Call:
## stats::lm(formula = PPromCor ~ as.numeric(Fecha) + factor(month(Fecha, 
##     label = TRUE), ordered = FALSE), data = data)
## 
## Coefficients:
##                                            (Intercept)  
##                                             -1.196e+03  
##                                      as.numeric(Fecha)  
##                                              7.444e-02  
## factor(month(Fecha, label = TRUE), ordered = FALSE)Feb  
##                                              3.205e-01  
## factor(month(Fecha, label = TRUE), ordered = FALSE)Mar  
##                                             -1.801e+00  
## factor(month(Fecha, label = TRUE), ordered = FALSE)Apr  
##                                              3.648e+00  
## factor(month(Fecha, label = TRUE), ordered = FALSE)May  
##                                              4.877e+00  
## factor(month(Fecha, label = TRUE), ordered = FALSE)Jun  
##                                              6.764e+00  
## factor(month(Fecha, label = TRUE), ordered = FALSE)Jul  
##                                              1.321e+01  
## factor(month(Fecha, label = TRUE), ordered = FALSE)Aug  
##                                              1.695e+01  
## factor(month(Fecha, label = TRUE), ordered = FALSE)Sep  
##                                              1.979e+01  
## factor(month(Fecha, label = TRUE), ordered = FALSE)Oct  
##                                              1.818e+01  
## factor(month(Fecha, label = TRUE), ordered = FALSE)Nov  
##                                              1.263e+01  
## factor(month(Fecha, label = TRUE), ordered = FALSE)Dec  
##                                              1.000e+01

Comparación de los modelos

## Model time table

model_tbl<- modeltime_table(mod_arima,mod_glmnet_R,mod_glmnet_EN,mod_glmnet_L,model_exp,model_lm)

## Calibrate

calib_tbl<- model_tbl %>% 
  modeltime_calibrate((testing(splits)))

## Accuracy

calib_tbl %>% modeltime_accuracy()

## # A tibble: 6 × 9
##   .model_id .model_desc             .type   mae  mape  mase smape  rmse   rsq
##       <int> <chr>                   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1         1 ARIMA(1,1,0) WITH DRIFT Test   66.5  19.7  3.88  22.9  83.4 0.870
## 2         2 GLMNET                  Test  118.   37.6  6.86  47.0 129.  0.791
## 3         3 GLMNET                  Test  110.   34.8  6.39  42.9 121.  0.811
## 4         4 GLMNET                  Test  110.   34.8  6.39  42.9 121.  0.811
## 5         5 ETS(M,A,N)              Test   69.1  20.5  4.03  23.9  86.5 0.870
## 6         6 LM                      Test  109.   34.7  6.38  42.8 121.  0.803

## Test set visualization
    

calib_tbl %>% modeltime_forecast(new_data = testing(splits),
                                 actual_data = Cordero) %>% 
  plot_modeltime_forecast()

Future forecast, un año (todos los datos)

El paso final es reajustar los modelos al conjunto de datos completo y pronosticarlos un año hacia adelante. Desde luego, todos los modelos han cambiado. El modelo ARIMA y ETS se ven mucho mejor ahora porque la línea de tendencia ahora se ha ajustado a los nuevos datos que muestran una tendencia a más largo plazo.

refit_tbl <- calib_tbl %>%
    modeltime_refit(data = Cordero)

## frequency = 12 observations per 1 year
## frequency = 12 observations per 1 year

refit_tbl %>%
    modeltime_forecast(h = "1 years", actual_data = Cordero) %>%
    plot_modeltime_forecast()

Linealización de la variable en estudio

CorderoLn<- Cordero %>% mutate(LnPPromCor= log(PPromCor) ) 
CorderoLn %>% plot_time_series(Fecha,LnPPromCor)

Separación de los datos en Train vs Test

splits2<- time_series_split(CorderoLn, assess=18, cumulative = TRUE)

## Using date_var: Fecha

splits2 %>% 
  tk_time_series_cv_plan() %>% 
  plot_time_series_cv_plan(Fecha,LnPPromCor)

Forecast ARIMA para el LnPPromCor

mod_arima2 <- arima_reg() %>% 
  set_engine("auto_arima") %>% 
  fit(LnPPromCor~Fecha,training(splits2))

## frequency = 12 observations per 1 year

mod_arima2

## parsnip model object
## 
## Fit time:  199ms 
## Series: outcome 
## ARIMA(0,1,0) with drift 
## 
## Coefficients:
##        drift
##       0.0271
## s.e.  0.0081
## 
## sigma^2 estimated as 0.004533:  log likelihood=87.49
## AIC=-170.98   AICc=-170.8   BIC=-166.54

Forecast glmnet

#library(glmnet)

mod_glmnet_R2 <- linear_reg(penalty = 0.01,mixture=0) %>% 
  set_engine("glmnet") %>% 
  fit(LnPPromCor~month(Fecha,label=TRUE)
      +as.numeric(Fecha),
      training(splits2))
tidy(mod_glmnet_R2)

## # A tibble: 13 × 3
##    term                            estimate penalty
##    <chr>                              <dbl>   <dbl>
##  1 (Intercept)                   -8.45         0.01
##  2 month(Fecha, label = TRUE).L   0.140        0.01
##  3 month(Fecha, label = TRUE).Q  -0.0147       0.01
##  4 month(Fecha, label = TRUE).C  -0.104        0.01
##  5 month(Fecha, label = TRUE)^4  -0.0132       0.01
##  6 month(Fecha, label = TRUE)^5   0.0410       0.01
##  7 month(Fecha, label = TRUE)^6   0.00322      0.01
##  8 month(Fecha, label = TRUE)^7   0.00719      0.01
##  9 month(Fecha, label = TRUE)^8   0.00873      0.01
## 10 month(Fecha, label = TRUE)^9   0.00906      0.01
## 11 month(Fecha, label = TRUE)^10  0.0000321    0.01
## 12 month(Fecha, label = TRUE)^11 -0.0124       0.01
## 13 as.numeric(Fecha)              0.000747     0.01

mod_glmnet_EN2 <- linear_reg(penalty = 0.01,mixture=0.8) %>% 
  set_engine("glmnet") %>% 
  fit(LnPPromCor~month(Fecha,label=TRUE)
      +as.numeric(Fecha),
      training(splits2))
tidy(mod_glmnet_EN2)

## # A tibble: 13 × 3
##    term                           estimate penalty
##    <chr>                             <dbl>   <dbl>
##  1 (Intercept)                   -9.45        0.01
##  2 month(Fecha, label = TRUE).L   0.115       0.01
##  3 month(Fecha, label = TRUE).Q   0           0.01
##  4 month(Fecha, label = TRUE).C  -0.0867      0.01
##  5 month(Fecha, label = TRUE)^4   0           0.01
##  6 month(Fecha, label = TRUE)^5   0.0136      0.01
##  7 month(Fecha, label = TRUE)^6   0           0.01
##  8 month(Fecha, label = TRUE)^7   0           0.01
##  9 month(Fecha, label = TRUE)^8   0           0.01
## 10 month(Fecha, label = TRUE)^9   0           0.01
## 11 month(Fecha, label = TRUE)^10  0           0.01
## 12 month(Fecha, label = TRUE)^11  0           0.01
## 13 as.numeric(Fecha)              0.000806    0.01

mod_glmnet_L2 <- linear_reg(penalty = 0.01,mixture=1) %>% 
  set_engine("glmnet") %>% 
  fit(LnPPromCor~month(Fecha,label=TRUE)
      +as.numeric(Fecha),
      training(splits2))
tidy(mod_glmnet_L2)

## # A tibble: 13 × 3
##    term                           estimate penalty
##    <chr>                             <dbl>   <dbl>
##  1 (Intercept)                   -9.45        0.01
##  2 month(Fecha, label = TRUE).L   0.109       0.01
##  3 month(Fecha, label = TRUE).Q   0           0.01
##  4 month(Fecha, label = TRUE).C  -0.0804      0.01
##  5 month(Fecha, label = TRUE)^4   0           0.01
##  6 month(Fecha, label = TRUE)^5   0.00696     0.01
##  7 month(Fecha, label = TRUE)^6   0           0.01
##  8 month(Fecha, label = TRUE)^7   0           0.01
##  9 month(Fecha, label = TRUE)^8   0           0.01
## 10 month(Fecha, label = TRUE)^9   0           0.01
## 11 month(Fecha, label = TRUE)^10  0           0.01
## 12 month(Fecha, label = TRUE)^11  0           0.01
## 13 as.numeric(Fecha)              0.000806    0.01

Suavizamiento exponencial

model_exp2 <- exp_smoothing() %>%
    set_engine(engine = "ets") %>%
    fit(LnPPromCor ~ Fecha, data = training(splits2))

## frequency = 12 observations per 1 year

model_exp2

## parsnip model object
## 
## Fit time:  635ms 
## ETS(A,A,N) 
## 
## Call:
##  forecast::ets(y = outcome, model = model_ets, damped = damping_ets,  
## 
##  Call:
##      alpha = alpha, beta = beta, gamma = gamma) 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 1e-04 
## 
##   Initial states:
##     l = 3.4722 
##     b = 0.0271 
## 
##   sigma:  0.0684
## 
##       AIC      AICc       BIC 
## -72.21443 -71.26204 -61.04389

Forecast regresión lineal (lm)

model_lm2 <- linear_reg() %>%
    set_engine("lm") %>%
    fit(LnPPromCor ~ as.numeric(Fecha) + factor(month(Fecha, label = TRUE), ordered = FALSE),
        data = training(splits2))
model_lm2

## parsnip model object
## 
## Fit time:  3ms 
## 
## Call:
## stats::lm(formula = LnPPromCor ~ as.numeric(Fecha) + factor(month(Fecha, 
##     label = TRUE), ordered = FALSE), data = data)
## 
## Coefficients:
##                                            (Intercept)  
##                                             -9.7627326  
##                                      as.numeric(Fecha)  
##                                              0.0008213  
## factor(month(Fecha, label = TRUE), ordered = FALSE)Feb  
##                                             -0.0004141  
## factor(month(Fecha, label = TRUE), ordered = FALSE)Mar  
##                                             -0.0278069  
## factor(month(Fecha, label = TRUE), ordered = FALSE)Apr  
##                                             -0.0267203  
## factor(month(Fecha, label = TRUE), ordered = FALSE)May  
##                                             -0.0102567  
## factor(month(Fecha, label = TRUE), ordered = FALSE)Jun  
##                                              0.0148058  
## factor(month(Fecha, label = TRUE), ordered = FALSE)Jul  
##                                              0.0795923  
## factor(month(Fecha, label = TRUE), ordered = FALSE)Aug  
##                                              0.0998682  
## factor(month(Fecha, label = TRUE), ordered = FALSE)Sep  
##                                              0.1236322  
## factor(month(Fecha, label = TRUE), ordered = FALSE)Oct  
##                                              0.1236968  
## factor(month(Fecha, label = TRUE), ordered = FALSE)Nov  
##                                              0.0696322  
## factor(month(Fecha, label = TRUE), ordered = FALSE)Dec  
##                                              0.0508738

Comparación de los modelos

## Model time table

model_tbl2<- modeltime_table(mod_arima2,mod_glmnet_R2,mod_glmnet_EN2,mod_glmnet_L2,model_lm2,model_exp2)

## Calibrate

calib_tbl2<- model_tbl2 %>% 
  modeltime_calibrate((testing(splits2)))

## Accuracy

calib_tbl2 %>% modeltime_accuracy()

## # A tibble: 6 × 9
##   .model_id .model_desc             .type    mae  mape  mase smape  rmse   rsq
##       <int> <chr>                   <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1         1 ARIMA(0,1,0) WITH DRIFT Test  0.0977  1.69  1.73  1.71 0.121 0.888
## 2         2 GLMNET                  Test  0.301   5.25  5.33  5.40 0.317 0.898
## 3         3 GLMNET                  Test  0.220   3.83  3.90  3.91 0.239 0.922
## 4         4 GLMNET                  Test  0.219   3.81  3.88  3.89 0.237 0.926
## 5         5 LM                      Test  0.204   3.54  3.61  3.62 0.223 0.912
## 6         6 ETS(A,A,N)              Test  0.0977  1.69  1.73  1.71 0.121 0.888

## Test set visualization

calib_tbl2 %>% modeltime_forecast(new_data = testing(splits2),
                                 actual_data = CorderoLn) %>% 
  plot_modeltime_forecast()

Future forecast, un año (todos los datos)

refit_tbl2 <- calib_tbl2 %>%
    modeltime_refit(data = CorderoLn)

## frequency = 12 observations per 1 year
## frequency = 12 observations per 1 year

refit_tbl2 %>%
    modeltime_forecast(h = "1 years", actual_data = CorderoLn) %>%
    plot_modeltime_forecast()

# Agregado DarioM (plot en escala de los datos)

tmp <- refit_tbl2 %>% modeltime_forecast(h = "1 years", actual_data = CorderoLn)
tmp %>%  mutate(.value = exp(.value),.conf_lo = exp(.conf_lo),
                .conf_hi = exp(.conf_hi))  %>% plot_modeltime_forecast(
                .title =  "Pronósticos")

Conclusiones:

Para un trabajo futuro se puede comparar los resultados de las técnicas aplicadas, contra un modelo de regresión tradicional, en el que se tomen otras variables como regresoras, por ejemplo, el precio de otro tipo de ganado u otros indicadores económicos como el índice de precio del consumidor o el tipo de cambio.