Libraries and Data

require(fpp3)
## Loading required package: fpp3
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble      3.2.1          ✔ tsibble     1.1.3     
## ✔ dplyr       1.1.4          ✔ tsibbledata 0.4.1.9000
## ✔ tidyr       1.3.0          ✔ feasts      0.3.0     
## ✔ lubridate   1.9.3          ✔ fable       0.3.2     
## ✔ ggplot2     3.4.4          ✔ fabletools  0.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
require(ggplot2)
require(kableExtra)
## Loading required package: kableExtra
## Warning in !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output
## %in% : 'length(x) = 2 > 1' in coercion to 'logical(1)'
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
require(latex2exp)
## Loading required package: latex2exp
require(readxl)
## Loading required package: readxl
require(tidyverse)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0     ✔ readr   2.1.4
## ✔ purrr   1.0.2     ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()          masks stats::filter()
## ✖ kableExtra::group_rows() masks dplyr::group_rows()
## ✖ tsibble::interval()      masks lubridate::interval()
## ✖ dplyr::lag()             masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
require(magrittr)
## Loading required package: magrittr
## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## The following object is masked from 'package:tidyr':
## 
##     extract

Read & Edit Data

remove(list = ls())

soybean.ave.monthly.price. <- read.csv("../raw_data/soybean ave monthly price .csv")
df <- soybean.ave.monthly.price.[,1:2]

library(tidyverse)
glimpse(df)
## Rows: 583
## Columns: 2
## $ month                 <chr> "1975-09-01", "1975-10-01", "1975-11-01", "1975-…
## $ Average.monthly.price <dbl> 5.73, 5.25, 4.86, 4.64, 4.75, 4.80, 4.80, 4.80, …
tail(df)
##          month Average.monthly.price
## 578 2023-10-01                 12.91
## 579 2023-11-01                 13.44
## 580 2023-12-01                 13.07
## 581 2024-01-01                 12.35
## 582 2024-02-01                 11.67
## 583 2024-03-01                 11.91
?seq
df$month <- seq(from = as.Date("1975/09/01"), 
                to = as.Date("2024/03/01"), 
                by="1 month"
                )

?yearmonth # Use format() to display yearweek, yearmonth, and yearquarter objects in required formats. Please see strptime() details for supported conversion specifications.
df$month <- yearmonth(df$month)

Time Series and Train / Test Set

myts <- df %>% as_tsibble(index = month)

t <- round(length(df$Average.monthly.price)*.8, 0)
t
## [1] 466
train <- myts[1:t,]

t+1
## [1] 467
test <- myts[467:nrow(myts),]
nrow(myts)
## [1] 583
train <- myts[300:500,]

test <- myts[501:nrow(myts),]

Time Series Plots

myts %>% gg_season(Average.monthly.price)  +
    labs(x = "Month", y = "Average Monthly Price", color = "Year") 

myts %>% gg_lag(Average.monthly.price) +  
    labs(y = "Average Monthly Price", color = "Year") 

myts  %>% ACF(Average.monthly.price) %>% autoplot()

Transformation

lambda = myts %>% features(Average.monthly.price, features = guerrero) |> pull(lambda_guerrero)
lambda
## [1] -0.4565393
myts %>% autoplot(box_cox(Average.monthly.price, lambda[1])) +
  labs(y = "",title = latex2exp::TeX(paste0(
         "Transformed Future Prices with $\\lambda$ = ", round(lambda[1],2))))

Decomposition

comp = myts %>% model(stl=STL(Average.monthly.price))%>%components()
comp %>% as_tsibble() %>%autoplot(Average.monthly.price)+  geom_line(aes(y=trend), colour = "red")+
  geom_line(aes(y=season_adjust), colour = "blue")

comp%>%autoplot()

Models for Future Prices

#Naive Models

m1=train |>model(Mean_Model=MEAN(Average.monthly.price))
m2=train |>model(Drift_Model=RW(Average.monthly.price~drift()))
m3=train |>model(Naive_Model=NAIVE(Average.monthly.price))
m4=train |>model(Seasonal_Naive_Model=SNAIVE(Average.monthly.price))

#Basic Models
m5=train%>%model(TS_Model = TSLM(Average.monthly.price))
m6=train |>model(ETS_Model=ETS(Average.monthly.price))
m7=train |>model(ARIMA_Model=ARIMA(Average.monthly.price))

#Dynamic Models
m8=train |>model(Fourier_Model=TSLM(Average.monthly.price~ElectionSpline+trend(knots=yearquarter('2020 Q2'))+fourier(K=2)))
## Warning: 1 error encountered for Fourier_Model
## [1] object 'ElectionSpline' not found
m9=train |>model(Exponential_Model=TSLM(log(Average.monthly.price)~ElectionSpline+trend(knots=yearquarter('2020 Q2'))+season()))
## Warning: 1 error encountered for Exponential_Model
## [1] object 'ElectionSpline' not found
m10=train |>model(ARIMA(Average.monthly.price ~ ElectionSpline))
## Warning: 1 error encountered for ARIMA(Average.monthly.price ~ ElectionSpline)
## [1] object 'ElectionSpline' not found
#Dynamic Ensemble Models
m11=train|>model(Ensemble_Model=(ETS(Average.monthly.price)+ARIMA(Average.monthly.price~ElectionSpline+trend(knots = yearquarter('2020 Q2'))))/2)
## Warning: 1 error encountered for Ensemble_Model
## [1] object 'ElectionSpline' not found
m12=train|>model(Ensemble_Model_2=(ETS(Average.monthly.price)+TSLM(Average.monthly.price~trend(knots=yearquarter('2020 Q2'))+fourier(K=2))+ARIMA(Average.monthly.price))/3)

#Dynamic Neural Network Models
m13=train|>model(NeuralNet_Model=NNETAR(box_cox(Average.monthly.price, .9)~ElectionSpline+trend(knots=yearquarter('2020 Q2'))+season()))
## Warning: 1 error encountered for NeuralNet_Model
## [1] object 'ElectionSpline' not found

Example ARIMA + Regressor

fit <- train |>
     model(ARIMA(Average.monthly.price ~ ElectionSpline))
## Warning: 1 error encountered for ARIMA(Average.monthly.price ~ ElectionSpline)
## [1] object 'ElectionSpline' not found
 report(fit)
## Series: Average.monthly.price 
## Model: NULL model 
## NULL model
 bind_rows(
    `Regression residuals` =
        as_tibble(residuals(fit, type = "regression")),
    `ARIMA residuals` =
        as_tibble(residuals(fit, type = "innovation")),
    .id = "type"
  ) |>
  mutate(
    type = factor(type, levels=c(
      "Regression residuals", "ARIMA residuals"))
  ) |>
  ggplot(aes(x = month, y = .resid)) +
  geom_line() +
  facet_grid(vars(type))
## Warning: Removed 402 rows containing missing values (`geom_line()`).

 # fit |> gg_tsresiduals()
 # 
 # augment(fit) |>
 #  features(.innov, ljung_box, dof = 4, lag = 12)
 # 
 # forecast(fit, new_data = test) |>
 #  autoplot(train) +
 #  labs(y = "Percentage change")

Reports

for (i in 1:13){report(eval(parse(text = paste0('m',i))))}
## Series: Average.monthly.price 
## Model: MEAN 
## 
## Mean: 9.3068 
## sigma^2: 11.341 
## Series: Average.monthly.price 
## Model: RW w/ drift 
## 
## Drift: 0.0245 (se: 0.0451)
## sigma^2: 0.407 
## Series: Average.monthly.price 
## Model: NAIVE 
## 
## sigma^2: 0.407 
## Series: Average.monthly.price 
## Model: SNAIVE 
## 
## sigma^2: 6.064 
## Series: Average.monthly.price 
## Model: TSLM 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.9868 -3.4168  0.2232  2.8032  7.5032 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   9.3068     0.2375   39.18   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.368 on 200 degrees of freedom
## Series: Average.monthly.price 
## Model: ETS(M,Ad,N) 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.1637179 
##     phi   = 0.8000009 
## 
##   Initial states:
##      l[0]       b[0]
##  4.356984 0.04637866
## 
##   sigma^2:  0.0039
## 
##      AIC     AICc      BIC 
## 826.3053 826.7383 846.1251 
## Series: Average.monthly.price 
## Model: ARIMA(0,1,2) 
## 
## Coefficients:
##          ma1     ma2
##       0.3941  0.1906
## s.e.  0.0715  0.0730
## 
## sigma^2 estimated as 0.3509:  log likelihood=-178.14
## AIC=362.28   AICc=362.4   BIC=372.17
## Series: Average.monthly.price 
## Model: NULL model 
## NULL modelSeries: Average.monthly.price 
## Model: NULL model 
## Transformation: log(Average.monthly.price) 
## NULL modelSeries: Average.monthly.price 
## Model: NULL model 
## NULL modelSeries: Average.monthly.price 
## Model: NULL model 
## NULL modelSeries: Average.monthly.price 
## Model: COMBINATION 
## Combination: Average.monthly.price * 0.333333333333333
## 
## ======================================================
## 
## Series: Average.monthly.price 
## Model: COMBINATION 
## Combination: Average.monthly.price + Average.monthly.price
## 
## ==========================================================
## 
## Series: Average.monthly.price 
## Model: COMBINATION 
## Combination: Average.monthly.price + Average.monthly.price
## 
## ==========================================================
## 
## Series: Average.monthly.price 
## Model: ETS(M,Ad,N) 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.1637179 
##     phi   = 0.8000009 
## 
##   Initial states:
##      l[0]       b[0]
##  4.356984 0.04637866
## 
##   sigma^2:  0.0039
## 
##      AIC     AICc      BIC 
## 826.3053 826.7383 846.1251 
## 
## Series: Average.monthly.price 
## Model: TSLM 
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.2274 -1.5417 -0.4665  1.7912  5.9216 
## 
## Coefficients:
##                                                  Estimate Std. Error t value
## (Intercept)                                      5.105429   0.330521  15.447
## trend(knots = yearquarter("2020 Q2"))trend       0.041678   0.002838  14.687
## trend(knots = yearquarter("2020 Q2"))trend_-165        NA         NA      NA
## fourier(K = 2)C1_12                             -0.319269   0.233415  -1.368
## fourier(K = 2)S1_12                              0.368916   0.232284   1.588
## fourier(K = 2)C2_12                              0.154255   0.232748   0.663
## fourier(K = 2)S2_12                             -0.132862   0.232776  -0.571
##                                                 Pr(>|t|)    
## (Intercept)                                       <2e-16 ***
## trend(knots = yearquarter("2020 Q2"))trend        <2e-16 ***
## trend(knots = yearquarter("2020 Q2"))trend_-165       NA    
## fourier(K = 2)C1_12                                0.173    
## fourier(K = 2)S1_12                                0.114    
## fourier(K = 2)C2_12                                0.508    
## fourier(K = 2)S2_12                                0.569    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.333 on 195 degrees of freedom
## Multiple R-squared: 0.532,   Adjusted R-squared:  0.52
## F-statistic: 44.34 on 5 and 195 DF, p-value: < 2.22e-16
## 
## 
## Series: Average.monthly.price 
## Model: ARIMA(0,1,2) 
## 
## Coefficients:
##          ma1     ma2
##       0.3941  0.1906
## s.e.  0.0715  0.0730
## 
## sigma^2 estimated as 0.3509:  log likelihood=-178.14
## AIC=362.28   AICc=362.4   BIC=372.17
## 
## 
## Series: Average.monthly.price 
## Model: NULL model 
## Transformation: box_cox(Average.monthly.price, 0.9) 
## NULL model

Plots

myplot=function(model){
  model|>forecast(test)|>autoplot(myts)+geom_line(aes(y=.fitted,col=.model),
                                                   data=augment(model))+ggtitle(names(model)) +   labs(x = "Month", y = "Average Monthly Price") 
  }

for (i in 1:12){print(myplot(eval(parse(text=paste0('m',i)))))} #NNETAR takes long to plot

## Warning: Removed 1 row containing missing values (`geom_line()`).

## Warning: Removed 1 row containing missing values (`geom_line()`).

## Warning: Removed 12 rows containing missing values (`geom_line()`).

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 83 rows containing missing values (`()`).
## Warning: Removed 201 rows containing missing values (`geom_line()`).

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 83 rows containing missing values (`()`).
## Warning: Removed 201 rows containing missing values (`geom_line()`).

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 83 rows containing missing values (`()`).
## Warning: Removed 201 rows containing missing values (`geom_line()`).

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 83 rows containing missing values (`()`).
## Warning: Removed 201 rows containing missing values (`geom_line()`).
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Ensemble_Model_2 = (function (object, ...) ...`.
## Caused by warning:
## ! prediction from a rank-deficient fit may be misleading

Accuracy Metrics

tmp3=c(rep(0,12))
mymat=matrix(rep(0, 12*12), ncol=12)

for (i in 1:13){
  tmp=eval(parse(text=paste0('m',i)))%>%forecast(test)
  tmp2=tmp%>%accuracy(test)
  tmp3=rbind(tmp2, tmp3)
}
## Warning in rbind(deparse.level, ...): number of columns of result, 10, is not a
## multiple of vector length 12 of arg 2
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Ensemble_Model_2 = (function (object, ...) ...`.
## Caused by warning:
## ! prediction from a rank-deficient fit may be misleading
tmp3=tmp3[tmp3$RMSE>0,]
tmp3$MASE=tmp3$RMSSE=tmp3$.type=NULL
tmp3%>%arrange(MAPE)
## # A tibble: 13 × 7
##    .model                   ME  RMSE   MAE    MPE  MAPE   ACF1
##    <chr>                 <dbl> <dbl> <dbl>  <dbl> <dbl>  <dbl>
##  1 Drift_Model           1.11   2.50  1.98  5.79   15.6  0.961
##  2 Ensemble_Model_2      0.461  2.45  2.14 -0.515  18.3  0.966
##  3 Naive_Model           2.14   3.41  2.56 14.2    19.0  0.970
##  4 Mean_Model            2.30   3.51  2.61 15.7    19.2  0.970
##  5 TS_Model              2.30   3.51  2.61 15.7    19.2  0.970
##  6 ARIMA_Model           2.44   3.60  2.66 16.9    19.5  0.970
##  7 Seasonal_Naive_Model  1.39   3.02  2.47  7.41   19.6  0.952
##  8 ETS_Model             2.57   3.70  2.72 18.0    19.8  0.969
##  9 <NA>                 NA     NA    NA    NA      NA   NA    
## 10 <NA>                 NA     NA    NA    NA      NA   NA    
## 11 <NA>                 NA     NA    NA    NA      NA   NA    
## 12 <NA>                 NA     NA    NA    NA      NA   NA    
## 13 <NA>                 NA     NA    NA    NA      NA   NA