Libraries and Data
require(fpp3)
## Loading required package: fpp3
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.3
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1.9000
## ✔ tidyr 1.3.0 ✔ feasts 0.3.0
## ✔ lubridate 1.9.3 ✔ fable 0.3.2
## ✔ ggplot2 3.4.4 ✔ fabletools 0.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
require(ggplot2)
require(kableExtra)
## Loading required package: kableExtra
## Warning in !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output
## %in% : 'length(x) = 2 > 1' in coercion to 'logical(1)'
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
require(latex2exp)
## Loading required package: latex2exp
require(readxl)
## Loading required package: readxl
require(tidyverse)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ purrr 1.0.2 ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ kableExtra::group_rows() masks dplyr::group_rows()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
require(magrittr)
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
##
## The following object is masked from 'package:purrr':
##
## set_names
##
## The following object is masked from 'package:tidyr':
##
## extract
Read & Edit Data
remove(list = ls())
soybean.ave.monthly.price. <- read.csv("../raw_data/soybean ave monthly price .csv")
df <- soybean.ave.monthly.price.[,1:2]
library(tidyverse)
glimpse(df)
## Rows: 583
## Columns: 2
## $ month <chr> "1975-09-01", "1975-10-01", "1975-11-01", "1975-…
## $ Average.monthly.price <dbl> 5.73, 5.25, 4.86, 4.64, 4.75, 4.80, 4.80, 4.80, …
tail(df)
## month Average.monthly.price
## 578 2023-10-01 12.91
## 579 2023-11-01 13.44
## 580 2023-12-01 13.07
## 581 2024-01-01 12.35
## 582 2024-02-01 11.67
## 583 2024-03-01 11.91
?seq
df$month <- seq(from = as.Date("1975/09/01"),
to = as.Date("2024/03/01"),
by="1 month"
)
?yearmonth # Use format() to display yearweek, yearmonth, and yearquarter objects in required formats. Please see strptime() details for supported conversion specifications.
df$month <- yearmonth(df$month)
Time Series and Train / Test Set
myts <- df %>% as_tsibble(index = month)
t <- round(length(df$Average.monthly.price)*.8, 0)
t
## [1] 466
train <- myts[1:t,]
t+1
## [1] 467
test <- myts[467:nrow(myts),]
nrow(myts)
## [1] 583
train <- myts[300:500,]
test <- myts[501:nrow(myts),]
Time Series Plots
myts %>% gg_season(Average.monthly.price) +
labs(x = "Month", y = "Average Monthly Price", color = "Year")

myts %>% gg_lag(Average.monthly.price) +
labs(y = "Average Monthly Price", color = "Year")

myts %>% ACF(Average.monthly.price) %>% autoplot()

Decomposition
comp = myts %>% model(stl=STL(Average.monthly.price))%>%components()
comp %>% as_tsibble() %>%autoplot(Average.monthly.price)+ geom_line(aes(y=trend), colour = "red")+
geom_line(aes(y=season_adjust), colour = "blue")

comp%>%autoplot()

Models for Future Prices
#Naive Models
m1=train |>model(Mean_Model=MEAN(Average.monthly.price))
m2=train |>model(Drift_Model=RW(Average.monthly.price~drift()))
m3=train |>model(Naive_Model=NAIVE(Average.monthly.price))
m4=train |>model(Seasonal_Naive_Model=SNAIVE(Average.monthly.price))
#Basic Models
m5=train%>%model(TS_Model = TSLM(Average.monthly.price))
m6=train |>model(ETS_Model=ETS(Average.monthly.price))
m7=train |>model(ARIMA_Model=ARIMA(Average.monthly.price))
#Dynamic Models
m8=train |>model(Fourier_Model=TSLM(Average.monthly.price~ElectionSpline+trend(knots=yearquarter('2020 Q2'))+fourier(K=2)))
## Warning: 1 error encountered for Fourier_Model
## [1] object 'ElectionSpline' not found
m9=train |>model(Exponential_Model=TSLM(log(Average.monthly.price)~ElectionSpline+trend(knots=yearquarter('2020 Q2'))+season()))
## Warning: 1 error encountered for Exponential_Model
## [1] object 'ElectionSpline' not found
m10=train |>model(ARIMA(Average.monthly.price ~ ElectionSpline))
## Warning: 1 error encountered for ARIMA(Average.monthly.price ~ ElectionSpline)
## [1] object 'ElectionSpline' not found
#Dynamic Ensemble Models
m11=train|>model(Ensemble_Model=(ETS(Average.monthly.price)+ARIMA(Average.monthly.price~ElectionSpline+trend(knots = yearquarter('2020 Q2'))))/2)
## Warning: 1 error encountered for Ensemble_Model
## [1] object 'ElectionSpline' not found
m12=train|>model(Ensemble_Model_2=(ETS(Average.monthly.price)+TSLM(Average.monthly.price~trend(knots=yearquarter('2020 Q2'))+fourier(K=2))+ARIMA(Average.monthly.price))/3)
#Dynamic Neural Network Models
m13=train|>model(NeuralNet_Model=NNETAR(box_cox(Average.monthly.price, .9)~ElectionSpline+trend(knots=yearquarter('2020 Q2'))+season()))
## Warning: 1 error encountered for NeuralNet_Model
## [1] object 'ElectionSpline' not found
Example ARIMA + Regressor
fit <- train |>
model(ARIMA(Average.monthly.price ~ ElectionSpline))
## Warning: 1 error encountered for ARIMA(Average.monthly.price ~ ElectionSpline)
## [1] object 'ElectionSpline' not found
report(fit)
## Series: Average.monthly.price
## Model: NULL model
## NULL model
bind_rows(
`Regression residuals` =
as_tibble(residuals(fit, type = "regression")),
`ARIMA residuals` =
as_tibble(residuals(fit, type = "innovation")),
.id = "type"
) |>
mutate(
type = factor(type, levels=c(
"Regression residuals", "ARIMA residuals"))
) |>
ggplot(aes(x = month, y = .resid)) +
geom_line() +
facet_grid(vars(type))
## Warning: Removed 402 rows containing missing values (`geom_line()`).

# fit |> gg_tsresiduals()
#
# augment(fit) |>
# features(.innov, ljung_box, dof = 4, lag = 12)
#
# forecast(fit, new_data = test) |>
# autoplot(train) +
# labs(y = "Percentage change")
Reports
for (i in 1:13){report(eval(parse(text = paste0('m',i))))}
## Series: Average.monthly.price
## Model: MEAN
##
## Mean: 9.3068
## sigma^2: 11.341
## Series: Average.monthly.price
## Model: RW w/ drift
##
## Drift: 0.0245 (se: 0.0451)
## sigma^2: 0.407
## Series: Average.monthly.price
## Model: NAIVE
##
## sigma^2: 0.407
## Series: Average.monthly.price
## Model: SNAIVE
##
## sigma^2: 6.064
## Series: Average.monthly.price
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.9868 -3.4168 0.2232 2.8032 7.5032
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.3068 0.2375 39.18 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.368 on 200 degrees of freedom
## Series: Average.monthly.price
## Model: ETS(M,Ad,N)
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.1637179
## phi = 0.8000009
##
## Initial states:
## l[0] b[0]
## 4.356984 0.04637866
##
## sigma^2: 0.0039
##
## AIC AICc BIC
## 826.3053 826.7383 846.1251
## Series: Average.monthly.price
## Model: ARIMA(0,1,2)
##
## Coefficients:
## ma1 ma2
## 0.3941 0.1906
## s.e. 0.0715 0.0730
##
## sigma^2 estimated as 0.3509: log likelihood=-178.14
## AIC=362.28 AICc=362.4 BIC=372.17
## Series: Average.monthly.price
## Model: NULL model
## NULL modelSeries: Average.monthly.price
## Model: NULL model
## Transformation: log(Average.monthly.price)
## NULL modelSeries: Average.monthly.price
## Model: NULL model
## NULL modelSeries: Average.monthly.price
## Model: NULL model
## NULL modelSeries: Average.monthly.price
## Model: COMBINATION
## Combination: Average.monthly.price * 0.333333333333333
##
## ======================================================
##
## Series: Average.monthly.price
## Model: COMBINATION
## Combination: Average.monthly.price + Average.monthly.price
##
## ==========================================================
##
## Series: Average.monthly.price
## Model: COMBINATION
## Combination: Average.monthly.price + Average.monthly.price
##
## ==========================================================
##
## Series: Average.monthly.price
## Model: ETS(M,Ad,N)
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.1637179
## phi = 0.8000009
##
## Initial states:
## l[0] b[0]
## 4.356984 0.04637866
##
## sigma^2: 0.0039
##
## AIC AICc BIC
## 826.3053 826.7383 846.1251
##
## Series: Average.monthly.price
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.2274 -1.5417 -0.4665 1.7912 5.9216
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 5.105429 0.330521 15.447
## trend(knots = yearquarter("2020 Q2"))trend 0.041678 0.002838 14.687
## trend(knots = yearquarter("2020 Q2"))trend_-165 NA NA NA
## fourier(K = 2)C1_12 -0.319269 0.233415 -1.368
## fourier(K = 2)S1_12 0.368916 0.232284 1.588
## fourier(K = 2)C2_12 0.154255 0.232748 0.663
## fourier(K = 2)S2_12 -0.132862 0.232776 -0.571
## Pr(>|t|)
## (Intercept) <2e-16 ***
## trend(knots = yearquarter("2020 Q2"))trend <2e-16 ***
## trend(knots = yearquarter("2020 Q2"))trend_-165 NA
## fourier(K = 2)C1_12 0.173
## fourier(K = 2)S1_12 0.114
## fourier(K = 2)C2_12 0.508
## fourier(K = 2)S2_12 0.569
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.333 on 195 degrees of freedom
## Multiple R-squared: 0.532, Adjusted R-squared: 0.52
## F-statistic: 44.34 on 5 and 195 DF, p-value: < 2.22e-16
##
##
## Series: Average.monthly.price
## Model: ARIMA(0,1,2)
##
## Coefficients:
## ma1 ma2
## 0.3941 0.1906
## s.e. 0.0715 0.0730
##
## sigma^2 estimated as 0.3509: log likelihood=-178.14
## AIC=362.28 AICc=362.4 BIC=372.17
##
##
## Series: Average.monthly.price
## Model: NULL model
## Transformation: box_cox(Average.monthly.price, 0.9)
## NULL model
Plots
myplot=function(model){
model|>forecast(test)|>autoplot(myts)+geom_line(aes(y=.fitted,col=.model),
data=augment(model))+ggtitle(names(model)) + labs(x = "Month", y = "Average Monthly Price")
}
for (i in 1:12){print(myplot(eval(parse(text=paste0('m',i)))))} #NNETAR takes long to plot

## Warning: Removed 1 row containing missing values (`geom_line()`).

## Warning: Removed 1 row containing missing values (`geom_line()`).

## Warning: Removed 12 rows containing missing values (`geom_line()`).




## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 83 rows containing missing values (`()`).
## Warning: Removed 201 rows containing missing values (`geom_line()`).

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 83 rows containing missing values (`()`).
## Warning: Removed 201 rows containing missing values (`geom_line()`).

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 83 rows containing missing values (`()`).
## Warning: Removed 201 rows containing missing values (`geom_line()`).

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 83 rows containing missing values (`()`).
## Warning: Removed 201 rows containing missing values (`geom_line()`).
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Ensemble_Model_2 = (function (object, ...) ...`.
## Caused by warning:
## ! prediction from a rank-deficient fit may be misleading


Accuracy Metrics
tmp3=c(rep(0,12))
mymat=matrix(rep(0, 12*12), ncol=12)
for (i in 1:13){
tmp=eval(parse(text=paste0('m',i)))%>%forecast(test)
tmp2=tmp%>%accuracy(test)
tmp3=rbind(tmp2, tmp3)
}
## Warning in rbind(deparse.level, ...): number of columns of result, 10, is not a
## multiple of vector length 12 of arg 2
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Ensemble_Model_2 = (function (object, ...) ...`.
## Caused by warning:
## ! prediction from a rank-deficient fit may be misleading
tmp3=tmp3[tmp3$RMSE>0,]
tmp3$MASE=tmp3$RMSSE=tmp3$.type=NULL
tmp3%>%arrange(MAPE)
## # A tibble: 13 × 7
## .model ME RMSE MAE MPE MAPE ACF1
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Drift_Model 1.11 2.50 1.98 5.79 15.6 0.961
## 2 Ensemble_Model_2 0.461 2.45 2.14 -0.515 18.3 0.966
## 3 Naive_Model 2.14 3.41 2.56 14.2 19.0 0.970
## 4 Mean_Model 2.30 3.51 2.61 15.7 19.2 0.970
## 5 TS_Model 2.30 3.51 2.61 15.7 19.2 0.970
## 6 ARIMA_Model 2.44 3.60 2.66 16.9 19.5 0.970
## 7 Seasonal_Naive_Model 1.39 3.02 2.47 7.41 19.6 0.952
## 8 ETS_Model 2.57 3.70 2.72 18.0 19.8 0.969
## 9 <NA> NA NA NA NA NA NA
## 10 <NA> NA NA NA NA NA NA
## 11 <NA> NA NA NA NA NA NA
## 12 <NA> NA NA NA NA NA NA
## 13 <NA> NA NA NA NA NA NA