# Bring in Libraries
  library(tidyr)
  library(dplyr)
  library(ggplot2)
  library(gridExtra)
  library(forecast)
  library(tsibble)
  library(lubridate)
  library(tseries)
  library(zoo)
  library(kableExtra)
  library(urca)
  library(knitr)

Data from proprietary source (not included)

## Clean data 
   # rename rows
     rownames(occ) = rownames(hwe) = occ[,1]

   # change OCC from wide format to long
     occ = occ %>%
       
       pivot_longer(cols = -YEAR, # remove year
                     names_to = "month", # create month column
                     values_to = "price" #create price column
                    ) %>% 
       
       mutate(month = match(substr(month, 1, 3), month.abb),
        # match month names to month abbreviations
       
       date = as.Date(paste(YEAR, month, "01", sep = "-"))
        # create a date column of the month and year separated by -
               ) %>%
       
       arrange(date) # arrange by date

   # change HWE from wide format to long
     hwe = hwe %>%
       
       pivot_longer(cols = -YEAR, # remove year
                     names_to = "month", # create month column
                     values_to = "price" #create price column
                    ) %>% 
       
       mutate(month = match(substr(month, 1, 3), month.abb),
        # match string of month names to month abbreviations
               date = as.Date(paste(YEAR, month, "01", sep = "-"))
        
        # create a date column of the month and year separated by -
               ) %>%
       
       arrange(date) # arrange by date

## Find useful measures
   max_hwe = hwe %>% filter(price == max(hwe$price, na.rm = TRUE))
   min_hwe = hwe %>% filter(price == min(hwe$price, na.rm = TRUE))
   
   max_occ = occ %>% filter(price == max(occ$price, na.rm = TRUE))
   min_occ = occ %>% filter(price == min(occ$price, na.rm = TRUE)) 
   
## Set desired horizon
   h = 12

Visualizing Paper Prices Since 2018

Here is a broad overview of both prices side by side:

## Create HWE plot 
   hwe.plot = ggplot(aes(date, price), data = hwe) +
                geom_point(color = "black") +
                geom_line(color = "gray") +
                labs(title = "HWE Prices Since 2018", y = "Price ($)", x = "Year") +
                
                # force each year on x-axis
                  scale_x_date(breaks = seq(min(hwe$date), max(hwe$date), by = "1 year"),
                               date_labels = "%Y") + 
  
                # add max point
                  geom_segment(data = max_hwe,
                               aes(x = date, xend = date,
                                   y = price + 25, yend = price + 5),
                               arrow = arrow(length = unit(0.25, "cm")),
                               color = "black",
                               linewidth = 0.7) +
  
                  # add text
                    geom_text(data = max_hwe[2,],
                              aes(x = date - 10, y = price + 30,
                                  label = paste0("Max: $", price)),
                              color = "black") +
  
                  # add min point
                    geom_segment(data = min_hwe,
                                 aes(x = date - 100, xend = date,
                                     y = price - 25, yend = price - 5),
                                 arrow = arrow(length = unit(0.25, "cm")),
                                 color = "black",
                                 linewidth = 0.7) +
  
                  # add text
                    geom_text(data = min_hwe[1,], 
                              aes(x = date - 175, y = price - 30,
                                  label = paste0("Min: $", price)),
                              color = "black")


## Create OCC plot 
   occ.plot = ggplot(aes(date, price), data = occ) +
                geom_point(color = "brown2") +
                geom_line(color = "brown4") +
                labs(title = "OCC Prices Since 2018", y = "Price ($)", x = "Year") +
                
                # force each year on x-axis
                  scale_x_date(breaks = seq(min(occ$date), max(occ$date), by = "1 year"),
                               date_labels = "%Y") + 
  
                # add max point
                  geom_segment(data = max_occ,
                               aes(x = date, xend = date,
                                   y = price + 25, yend = price + 5),
                               arrow = arrow(length = unit(0.25, "cm")),
                               color = "black",
                               linewidth = 0.7) +
  
                  # add text
                    geom_text(data = max_occ[1,],
                              aes(x = date, y = price + 30,
                                  label = paste0("Max: $", price)),
                              color = "black") +
  
                  # add min point
                    geom_segment(data = min_occ,
                                 aes(x = date, xend = date,
                                     y = price - 25, yend = price - 5),
                                 arrow = arrow(length = unit(0.25, "cm")),
                                 color = "black",
                                 linewidth = 0.7) +
  
                  # add text
                    geom_text(data = min_occ[c(4,11),], 
                              aes(x = date, y = price - 30,
                                  label = paste0("Min: $", price)),
                              color = "black")


## Arrange both on a grid
   grid.arrange(hwe.plot, occ.plot, ncol = 2)

HWE: There was a 3 month max at $465 from August 2022 through October 2022. We are currently at the minimum price of $260 through December 2025.

OCC: There was a 2 month max at $195 from September 2021 through October 2021. The minimum price for OCC of $35 has been reached twice. Each time the price was stable for several months. The first time was from July 2019 through January 2020. The second time was from November 2022 through March 2023.

Here are both of the prices plotted for each month:

plot1 = ggplot(aes(hwe$price, occ$price), data = hwe) +
                geom_point(color = "black") +
                labs(title = "Paper Price Since 2018", y = "HWE Price", x = "OCC Price") 

plot1

There seems to be very little association with each other and the correlation is very small. ($\tau = 0.183$). It does not seem like when one peaks/falls so does the other.

HWE

# Create time series object
  ts_hwe = ts(data = hwe$price,
              start = c(year(min(hwe$date)),
                         month(min(hwe$date))),
               end = c(2025, 12),
               frequency = 12)

# Find recommended lambda
  lambda_hwe = BoxCox.lambda(ts_hwe)

# Transform with recommended lambda 
  ts_hwe = BoxCox(ts_hwe, lambda= lambda_hwe)

Visualizations

Here is the trend line above just for HWE:

hwe.plot

The data has extreme changes in price from Dec 2018 to Dec 2019. It then trended up and peaked in mid 2022 but has been in decline since. To this day, prices have risen only twice since the peak in 2022. HWE prices are now at an all-time low of 260.

Here is a plot of the prices through the years:

This is a breakdown of the prices by year. There doesn’t seem to be a strong relationship. There are years where prices decreased throughout the year (2019, 2023), years where prices increased then stabilized (2018, 2021, 2022), and years where prices were stable (2024, 2025). Only one year (2020) had the prices peak in the spring/summer then decrease again.

Model(s) Selected

## Fit the benchmark model
# Naive
  hwe_naive = naive(ts_hwe, bootstrap = TRUE, h = h)

## Fit the tested models
   # Fit Auto ARIMA
     hwe_arima = auto.arima(ts_hwe, seasonal = FALSE,
                              stepwise = FALSE, approximation = FALSE,
                              ic = "aicc")

   # Fit ETS Model
     hwe_ets = ets(ts_hwe, damped = TRUE) #no trace = true because it is too large

   # Fit Holt model
     hwe_holt = holt(ts_hwe, damped = TRUE, h = h)
     
   # Fit Neural Network
     hwe_nnet = nnetar(ts_hwe)

## Create functions for CV to find errors that will be the base of the weights
   # Create Naive forecast function (with bootstrapped residuals)
     naive.fun = function(y, h) {
       
       # leave out first year
       if(length(y) < 12) {
         return(structure(list(mean = ts(rep(NA, h))), class = "forecast"))
         }
       
        # finish function
          fit = naive(y,
                      bootstrap = TRUE,
                      h = h)
       
       return(fit)
     }

   # Create Drift forecast function
     drift.fun = function(y, h) {
       
       # leave out first year
         if(length(y) < 12) {
         return(structure(list(mean = ts(rep(NA, h))), class = "forecast"))
         }
       
       # finish function
         fit = rwf(y, drift = TRUE,
                 bootstrap = TRUE,
                 h = h)
       
       return(fit)
     }

    # Create ARIMA forecast function
      arima.fun = function(y, h) {
       
      # leave out first year
        if(length(y) < 12) {
         return(structure(list(mean = ts(rep(NA, h))), class = "forecast"))
         }
       
      # finish function
        fit = auto.arima(y, seasonal = FALSE,
                              stepwise = FALSE, approximation = FALSE,
                              ic = "aicc")
        fc = forecast(fit, h)
       
       return(fc)
     }
     
    # Create ETS forecast functions
      ets.fun = function(y, h) {
         
      # leave out first year
        if(length(y) < 12) {
           return(structure(list(mean = ts(rep(NA, h))), class = "forecast"))
           }
       
      # finish function
        fit = ets(y, damped = TRUE)
        fc = forecast(fit, h = h)
         
        return(fc)
    }
  
   # Create Holt forecast functions
     holt.fun = function(y, h) {
         
       # leave out first year
         if(length(y) < 12) {
           return(structure(list(mean = ts(rep(NA, h))), class = "forecast"))
         }
       
       # finish function
         fit = holt(y, damped = TRUE, h = h)
         fc = forecast(fit, h = h)
         
         return(fc)
       }
       
   # Create Neural Net forecast functions
     nnet.fun = function(y, h) {
         
      # leave out first year
        if(length(y) < 12) {
           return(structure(list(mean = ts(rep(NA, h))), class = "forecast"))
           }
       
       # finish function
         fit = nnetar(y)
         fc = forecast(fit, h = h)
         
         return(fc)
       }
       
       
## Find CV error & RMSE for all models
   set.seed(1120)
   hwe_naive_er = tsCV(ts_hwe, naive.fun, h = h)
   hwe_naive_rmse = sqrt(mean(hwe_naive_er^2, na.rm = TRUE))
       
   hwe_arima_er = tsCV(ts_hwe, arima.fun, h = h)
   hwe_arima_rmse = sqrt(mean(hwe_arima_er^2, na.rm = TRUE))
        
   hwe_ets_er = tsCV(ts_hwe, ets.fun, h = h)
   hwe_ets_rmse = sqrt(mean(hwe_ets_er^2, na.rm = TRUE))
        
   hwe_holt_er = tsCV(ts_hwe, holt.fun, h = h)
   hwe_holt_rmse = sqrt(mean(hwe_holt_er^2, na.rm = TRUE))
        
   hwe_nnet_er = tsCV(ts_hwe, nnet.fun, h = h)
   hwe_nnet_rmse = sqrt(mean(hwe_nnet_er^2, na.rm = TRUE))

Essentially, some of the models are better at predicting short-term forecasts (first 3 months) while some models are better in the long-term. I combined the models and found they predicted better than any of the models individually.

## Forecast the models
   # ARIMA
     hwe_fc_arima = forecast(hwe_arima, h = h)

   # ETS
     hwe_fc_ets = forecast(hwe_ets, h = h)
     
   # Holt
     hwe_fc_holt = forecast(hwe_holt, h = h)
     
   # Neural Net
     hwe_fc_nnet = forecast(hwe_nnet, h = h)

## Create function to find rmse per block
   rmse_block = function(e, cols) {
     sqrt(mean(e[, cols]^2, na.rm = TRUE))
   }

## Create RMSE df to find RMSEs for each block
   hwe.rmse.df = data.frame(
     Model = c("Naive", "ARIMA", "ETS", "Holt", "NNet"),
     
     Block1 = c(
       rmse_block(hwe_naive_er, 1:3),
       rmse_block(hwe_arima_er, 1:3),
       rmse_block(hwe_ets_er, 1:3),
       rmse_block(hwe_holt_er, 1:3),
       rmse_block(hwe_nnet_er, 1:3)
     ),
     
     Block2 = c(
       rmse_block(hwe_naive_er, 4:9),
       rmse_block(hwe_arima_er, 4:9),
       rmse_block(hwe_ets_er, 4:9),
       rmse_block(hwe_holt_er, 4:9),
       rmse_block(hwe_nnet_er, 4:9)
     ),
     
     Block3 = c(
       rmse_block(hwe_naive_er, 10:12),
       rmse_block(hwe_arima_er, 10:12),
       rmse_block(hwe_ets_er, 10:12),
       rmse_block(hwe_holt_er, 10:12),
       rmse_block(hwe_nnet_er, 10:12)
     )
   )
   

## Create a function that finds weights
   wghts = function(x){
     w = 1/x
     w / sum(w)
   }
   
   
## Create weights df
   hwe.wghts.df = data.frame(
     Model = hwe.rmse.df$Model,
     
     wblock1 = wghts(hwe.rmse.df$Block1),
     wblock2 = wghts(hwe.rmse.df$Block2),
     wblock3 = wghts(hwe.rmse.df$Block3)
   )

## Create the combination forecast
   # create the object    
     fc_hwe = numeric(12)
    
     # months 1 - 3
     fc_hwe[1:3] = 
       hwe.wghts.df$wblock1[1] * hwe_naive$mean[1:3] +
       hwe.wghts.df$wblock1[2] * hwe_fc_arima$mean[1:3] +
       hwe.wghts.df$wblock1[3] * hwe_fc_ets$mean[1:3] +
       hwe.wghts.df$wblock1[4] * hwe_fc_holt$mean[1:3] +
       hwe.wghts.df$wblock1[5] * hwe_fc_nnet$mean[1:3]
     
    # months 4-9
      fc_hwe[4:9] = 
       hwe.wghts.df$wblock1[1] * hwe_naive$mean[4:9] +
       hwe.wghts.df$wblock1[2] * hwe_fc_arima$mean[4:9] +
       hwe.wghts.df$wblock1[3] * hwe_fc_ets$mean[4:9] +
       hwe.wghts.df$wblock1[4] * hwe_fc_holt$mean[4:9] +
       hwe.wghts.df$wblock1[5] * hwe_fc_nnet$mean[4:9]
      
    # months 4-9
      fc_hwe[10:12] = 
       hwe.wghts.df$wblock1[1] * hwe_naive$mean[10:12] +
       hwe.wghts.df$wblock1[2] * hwe_fc_arima$mean[10:12] +
       hwe.wghts.df$wblock1[3] * hwe_fc_ets$mean[10:12] +
       hwe.wghts.df$wblock1[4] * hwe_fc_holt$mean[10:12] +
       hwe.wghts.df$wblock1[5] * hwe_fc_nnet$mean[10:12]
      

## Make forecast a time series object
   fc_hwe = ts(fc_hwe, start = 2026, frequency = 12)

## Inverse them back into original units, then round them to nearest 5 dollars
   # Inverse Box-Cox
     fc_hwe = InvBoxCox(fc_hwe, lambda = lambda_hwe)
     ts_hwe = InvBoxCox(ts_hwe, lambda = lambda_hwe)
     
     hwe_naive = InvBoxCox(hwe_naive$mean, lambda = lambda_hwe)
     hwe_fc_arima = InvBoxCox(hwe_fc_arima$mean, lambda = lambda_hwe)
     hwe_fc_ets = InvBoxCox(hwe_fc_ets$mean, lambda = lambda_hwe)
     hwe_fc_holt = InvBoxCox(hwe_fc_holt$mean, lambda = lambda_hwe)
     hwe_fc_nnet = InvBoxCox(hwe_fc_nnet$mean, lambda = lambda_hwe)
     
   # Round to nearest 5 dollars
     fc_hwe = round((fc_hwe/5))*5

Here are the forecasts for the combination models:

autoplot(ts_hwe) +
  autolayer(hwe_naive, series="Naive", PI=FALSE) +
  autolayer(hwe_fc_arima, series="ARIMA", PI=FALSE) +
  autolayer(hwe_fc_ets, series="ETS", PI=FALSE) +
  autolayer(hwe_fc_holt, series="Holt", PI=FALSE) +
  autolayer(hwe_fc_nnet, series="Neural Net", PI=FALSE) +
  autolayer(fc_hwe, series="Combined Models") +
  xlab("Year") + ylab("Transformed Price") +
  ggtitle("HWE Price Forecasts")

Here is at table of the predicted prices:

##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 2026 260 260 260 260 265 265 265 265 265 265 265 265

According to the model, the price of HWE is projected to stay the same at $260 and then maintain through April of 2026. After that, there will be an increase in May to $265 that is expected to maintain until the prices go back down to $260 in November.

OCC

# Create time series object
   ts_occ = ts(data = occ$price,
               start = c(year(min(occ$date)),
                         month(min(occ$date))),
               end = c(2025, 12),
               frequency = 12)

# Clean with tsclean() command
  ts_occ = tsclean(ts_occ)

# Find recommended lambda
  lambda_occ = BoxCox.lambda(ts_occ)
  
# Transform with recommended lambda 
  ts_occ = BoxCox(ts_occ, lambda = lambda_occ)

Visualizations

Here is the trend line above just for OCC:

occ.plot

The data has extreme changes in price from Nov 2020 to mid 2021 It then fell consistently until 2023. OCC prices peaked in mid 2024 but has been in decline since. Right now, prices currently sit at $60 which is below the median, but not at an all time low.

Here is a plot plotting all the years:

This is a breakdown of the prices by year. There doesn’t seem to be a strong relationship. The largest takeway is that OCC prices seem less volatile than HWE prices. In this graph there aren’t many peaks or extreme jumps up.

Model(s) Selected

## Fit the benchmark model
  # Naive
  occ_naive = naive(ts_occ, bootstrap = TRUE, h = h)

## Fit the tested models
   # Fit Auto ARIMA
     occ_arima = auto.arima(ts_occ, seasonal = FALSE,
                              stepwise = FALSE, approximation = FALSE,
                              ic = "aicc",
                              trace = TRUE)

## 
##  ARIMA(0,0,0)            with zero mean     : 573.6837
##  ARIMA(0,0,0)            with non-zero mean : 156.7202
##  ARIMA(0,0,1)            with zero mean     : Inf
##  ARIMA(0,0,1)            with non-zero mean : 55.19845
##  ARIMA(0,0,2)            with zero mean     : Inf
##  ARIMA(0,0,2)            with non-zero mean : -21.51933
##  ARIMA(0,0,3)            with zero mean     : Inf
##  ARIMA(0,0,3)            with non-zero mean : -61.74933
##  ARIMA(0,0,4)            with zero mean     : Inf
##  ARIMA(0,0,4)            with non-zero mean : -84.86529
##  ARIMA(0,0,5)            with zero mean     : Inf
##  ARIMA(0,0,5)            with non-zero mean : -92.99843
##  ARIMA(1,0,0)            with zero mean     : Inf
##  ARIMA(1,0,0)            with non-zero mean : -81.44967
##  ARIMA(1,0,1)            with zero mean     : Inf
##  ARIMA(1,0,1)            with non-zero mean : -99.84382
##  ARIMA(1,0,2)            with zero mean     : Inf
##  ARIMA(1,0,2)            with non-zero mean : -118.7397
##  ARIMA(1,0,3)            with zero mean     : Inf
##  ARIMA(1,0,3)            with non-zero mean : -117.1684
##  ARIMA(1,0,4)            with zero mean     : Inf
##  ARIMA(1,0,4)            with non-zero mean : -115.9116
##  ARIMA(2,0,0)            with zero mean     : Inf
##  ARIMA(2,0,0)            with non-zero mean : -116.169
##  ARIMA(2,0,1)            with zero mean     : Inf
##  ARIMA(2,0,1)            with non-zero mean : -117.67
##  ARIMA(2,0,2)            with zero mean     : Inf
##  ARIMA(2,0,2)            with non-zero mean : -117.7332
##  ARIMA(2,0,3)            with zero mean     : Inf
##  ARIMA(2,0,3)            with non-zero mean : Inf
##  ARIMA(3,0,0)            with zero mean     : Inf
##  ARIMA(3,0,0)            with non-zero mean : -119.1532
##  ARIMA(3,0,1)            with zero mean     : Inf
##  ARIMA(3,0,1)            with non-zero mean : -119.11
##  ARIMA(3,0,2)            with zero mean     : Inf
##  ARIMA(3,0,2)            with non-zero mean : -117.3318
##  ARIMA(4,0,0)            with zero mean     : Inf
##  ARIMA(4,0,0)            with non-zero mean : -118.6221
##  ARIMA(4,0,1)            with zero mean     : Inf
##  ARIMA(4,0,1)            with non-zero mean : -117.0096
##  ARIMA(5,0,0)            with zero mean     : Inf
##  ARIMA(5,0,0)            with non-zero mean : -118.2044
## 
## 
## 
##  Best model: ARIMA(3,0,0)            with non-zero mean

   # Fit ETS Model
     occ_ets = ets(ts_occ, damped = TRUE) #no trace = true because it is too large

   # Fit Holt model
     occ_holt = holt(ts_occ, damped = TRUE, h = h)
     
   # Fit Neural Network
     occ_nnet = nnetar(ts_occ)

## Find CV error & RMSE for all models
   set.seed(1120)
   occ_naive_er = tsCV(ts_occ, naive.fun, h = h)
   occ_naive_rmse = sqrt(mean(occ_naive_er^2, na.rm = TRUE))
       
   occ_arima_er = tsCV(ts_occ, arima.fun, h = h)
   occ_arima_rmse = sqrt(mean(occ_arima_er^2, na.rm = TRUE))
        
   occ_ets_er = tsCV(ts_occ, ets.fun, h = h)
   occ_ets_rmse = sqrt(mean(occ_ets_er^2, na.rm = TRUE))
        
   occ_holt_er = tsCV(ts_occ, holt.fun, h = h)
   occ_holt_rmse = sqrt(mean(occ_holt_er^2, na.rm = TRUE))
        
   occ_nnet_er = tsCV(ts_occ, nnet.fun, h = h)
   occ_nnet_rmse = sqrt(mean(occ_nnet_er^2, na.rm = TRUE))

Again, some of the models are better at predicting short term forecasts (first 3 months) while other models better at medium term, and one model is better at long-term. I tried a combination of these models but that did not improve the models compared to the baseline of using last month’s prices as the predicted price.

## Forecast the models
   # ARIMA
     occ_fc_arima = forecast(occ_arima, h = h)

   # ETS
     occ_fc_ets = forecast(occ_ets, h = h)
     
   # Holt
     occ_fc_holt = forecast(occ_holt, h = h)
     
   # Neural Net
     occ_fc_nnet = forecast(occ_nnet, h = h)

## Create RMSE df to find RMSEs for each block
   occ.rmse.df = data.frame(
     Model = c("Naive", "ARIMA", "ETS", "Holt", "NNet"),
     
     Block1 = c(
       rmse_block(occ_naive_er, 1:3),
       rmse_block(occ_arima_er, 1:3),
       rmse_block(occ_ets_er, 1:3),
       rmse_block(occ_holt_er, 1:3),
       rmse_block(occ_nnet_er, 1:3)
     ),
     
     Block2 = c(
       rmse_block(occ_naive_er, 4:9),
       rmse_block(occ_arima_er, 4:9),
       rmse_block(occ_ets_er, 4:9),
       rmse_block(occ_holt_er, 4:9),
       rmse_block(occ_nnet_er, 4:9)
     ),
     
     Block3 = c(
       rmse_block(occ_naive_er, 10:12),
       rmse_block(occ_arima_er, 10:12),
       rmse_block(occ_ets_er, 10:12),
       rmse_block(occ_holt_er, 10:12),
       rmse_block(occ_nnet_er, 10:12)
     )
   )
   
## Create weights df
   occ.wghts.df = data.frame(
     Model = occ.rmse.df$Model,
     
     wblock1 = wghts(occ.rmse.df$Block1),
     wblock2 = wghts(occ.rmse.df$Block2),
     wblock3 = wghts(occ.rmse.df$Block3)
   )

## Create the combination forecast
   # create the object    
     fc_occ = numeric(12)
    
     # months 1 - 3
     fc_occ[1:3] = 
       occ.wghts.df$wblock1[1] * occ_naive$mean[1:3] +
       occ.wghts.df$wblock1[2] * occ_fc_arima$mean[1:3] +
       occ.wghts.df$wblock1[3] * occ_fc_ets$mean[1:3] +
       occ.wghts.df$wblock1[4] * occ_fc_holt$mean[1:3] +
       occ.wghts.df$wblock1[5] * occ_fc_nnet$mean[1:3]
     
    # months 4-9
      fc_occ[4:9] = 
       occ.wghts.df$wblock1[1] * occ_naive$mean[4:9] +
       occ.wghts.df$wblock1[2] * occ_fc_arima$mean[4:9] +
       occ.wghts.df$wblock1[3] * occ_fc_ets$mean[4:9] +
       occ.wghts.df$wblock1[4] * occ_fc_holt$mean[4:9] +
       occ.wghts.df$wblock1[5] * occ_fc_nnet$mean[4:9]
      
    # months 4-9
      fc_occ[10:12] = 
       occ.wghts.df$wblock1[1] * occ_naive$mean[10:12] +
       occ.wghts.df$wblock1[2] * occ_fc_arima$mean[10:12] +
       occ.wghts.df$wblock1[3] * occ_fc_ets$mean[10:12] +
       occ.wghts.df$wblock1[4] * occ_fc_holt$mean[10:12] +
       occ.wghts.df$wblock1[5] * occ_fc_nnet$mean[10:12]
      

## Make forecast a time series object
   fc_occ = ts(fc_occ, start = 2026, frequency = 12)

## Inverse them back into original units, then round them to nearest 5 dollars
   # Inverse Box-Cox
     fc_occ = InvBoxCox(fc_occ, lambda = lambda_occ)
     ts_occ = InvBoxCox(ts_occ, lambda = lambda_occ)
     
     occ_naive = InvBoxCox(occ_naive$mean, lambda = lambda_occ)
     occ_fc_arima = InvBoxCox(occ_fc_arima$mean, lambda = lambda_occ)
     occ_fc_ets = InvBoxCox(occ_fc_ets$mean, lambda = lambda_occ)
     occ_fc_holt = InvBoxCox(occ_fc_holt$mean, lambda = lambda_occ)
     occ_fc_nnet = InvBoxCox(occ_fc_nnet$mean, lambda = lambda_occ)
     
   # Round to nearest 5 dollars
     fc_occ = round((fc_occ/5))*5

Here are the forecasts for the combination models:

autoplot(ts_occ) +
  autolayer(occ_naive, series="Naive", PI=FALSE) +
  autolayer(occ_fc_arima, series="ARIMA", PI=FALSE) +
  autolayer(occ_fc_ets, series="ETS", PI=FALSE) +
  autolayer(occ_fc_holt, series="Holt", PI=FALSE) +
  autolayer(occ_fc_nnet, series="Neural Net", PI=FALSE) +
  autolayer(fc_occ, series="Combined Models") +
  xlab("Year") + ylab("Transformed Price") +
  ggtitle("occ Price Forecasts")

## Warning in ggplot2::geom_line(ggplot2::aes(x = .data[["timeVal"]], y = .data[["seriesVal"]], : Ignoring unknown parameters: `PI`
## Ignoring unknown parameters: `PI`
## Ignoring unknown parameters: `PI`
## Ignoring unknown parameters: `PI`
## Ignoring unknown parameters: `PI`

Here is at table of the predicted prices:

##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 2026  60  60  60  60  65  65  65  65  70  70  70  70

According to the model, the price of OCC is projected to stay the same at $60 and then increase to $65 in April of 2026. After that, there will be an increase in August to $70 that is expected to maintain until another increase in December to $75.

Takeaways/Futher Steps:

The data is very noisy, with extreme jumps that are hard to predict.
HWE prices were predicted with a combination of models that seem to be better than any one individual model.
The combined model predicted the price of HWE will largely stay the same.
OCC prices did not move in any predictable pattern. The best overall model was the “naive” model that only used the last price as its prediction.
However, a combination was almost as good, and that is what I included above. The combinations predict price increase to $75 in the year 2026.
Other potential variables should be considered for prediction of paper prices.

2026 Price Forecasting

Lucas Libero

2025-12-01

Visualizing Paper Prices Since 2018

HWE

Visualizations

Model(s) Selected

OCC

Visualizations

Model(s) Selected

Takeaways/Futher Steps: