Here we explore five years of the US Trade Balance (deficit) from years 2018 to 2022. We also get a sneak peak at US Imports, which is directly linked to the US Trade Deficit.

Load Liabilities

#Load liabilities
library(fpp3)
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble      3.2.1     ✔ tsibble     1.1.3
## ✔ dplyr       1.1.2     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.0     ✔ feasts      0.3.1
## ✔ lubridate   1.9.2     ✔ fable       0.3.3
## ✔ ggplot2     3.4.2     ✔ fabletools  0.3.4
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(kableExtra)
## Warning in !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output
## %in% : 'length(x) = 2 > 1' in coercion to 'logical(1)'
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(latex2exp)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0     ✔ readr   2.1.4
## ✔ purrr   1.0.1     ✔ stringr 1.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()          masks stats::filter()
## ✖ kableExtra::group_rows() masks dplyr::group_rows()
## ✖ tsibble::interval()      masks lubridate::interval()
## ✖ dplyr::lag()             masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(magrittr)
## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## The following object is masked from 'package:tidyr':
## 
##     extract

Load in Data

#load data
balance = read.csv('/Users/adrianjones/Documents/Forecasting/Week4/homework week 4/USTrade.csv')
imports = read.csv('/Users/adrianjones/Documents/Forecasting/Week4/homework week 4/USImports.csv')
autovalue = read.csv('/Users/adrianjones/Documents/Forecasting/Week4/homework week 4/AutoManuf.csv')

Fix Data for Use

#merge datasets and reorder
library(zoo)
## 
## Attaching package: 'zoo'
## The following object is masked from 'package:tsibble':
## 
##     index
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
trade <- merge(balance, imports, by = "Period")
trade$Period <- as.yearmon(trade$Period, '%b-%Y')
trade <- trade[order(trade$Period), ]
autovalue$Period <- as.yearmon(autovalue$Period, '%b-%Y')
autovalue <- autovalue[order(autovalue$Period), ]
economy <- merge(trade, autovalue, by = "Period")

#rename columns
colnames(economy)[colnames(economy) == "Value.x"] <- "Trade_Balance"
colnames(economy)[colnames(economy) == "Value.y"] <- "Imports"
colnames(economy)[colnames(economy) == "Value"] <- "ValueAutos"

economy$Period <- as.Date(economy$Period)

#convert values to numeric
economy$Trade_Balance <- as.numeric(gsub(",", "", economy$Trade_Balance))
economy$Imports <- as.numeric(gsub(",", "", economy$Imports))
economy$ValueAutos <- as.numeric(gsub(",", "", economy$ValueAutos))

#convert to interpret in billions of usd
economy$Trade_Balance <- economy$Trade_Balance / 1000
economy$Imports <- economy$Imports / 1000
economy$ValueAutos <- economy$ValueAutos / 1000

Train/Test

#times series + train/test split
myts = economy %>% 
  mutate(Period = yearmonth(as.character(Period))) %>%
  as_tsibble(index = Period)
 
ggplot(myts, aes(x = Period, y = Trade_Balance)) +
  geom_line(color = "blue", size = 1) +  # Line for Trade_Balance
  geom_line(aes(y = Imports), color = "red", size = 1) +  # Line for imports
  xlab("Period") +
  ylab("Value") +
  ggtitle("Monthly US Trade Deficit and Imports (2018 - 2022) in Billions USD")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

train = myts[1:48, ]
test = myts[49:nrow(myts), ]

TS Plots

#Time Series Plots
myts%>%gg_season(Trade_Balance) #March 2022 seeing a huge drop in trade balance

myts%>%gg_lag(Trade_Balance)

myts%>%ACF(Trade_Balance)%>%autoplot()

myts%>%gg_season(Imports) #April/May 2020 saw signficant drops in imports, march 2018 was a peak

myts%>%gg_lag(Imports)

myts%>%ACF(Imports)%>%autoplot()

### Transformation

#Transformation
lambda = myts%>%features(Trade_Balance, features = guerrero) |> pull(lambda_guerrero)
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value

## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value

## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value

## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value

## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value

## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value

## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value

## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value

## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value

## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value

## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value

## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value

## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value

## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value

## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value

## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value

## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value

## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value

## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value

## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value

## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value

## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
myts%>%autoplot(box_cox(Trade_Balance, lambda[1])) +
  labs(y= " ", title = latex2exp:::TeX(paste0(
    "Transformed US Trade Balance with $\\lambda$ = ", round(lambda[1],2))))

### Decompositions

#Decompositions
comp=myts%>%model(stl=STL(Trade_Balance))%>%components()
comp%>%as_tsibble() %>%autoplot(Trade_Balance) + geom_line(aes(y=trend), colour = "red") + 
  geom_line(aes(y=season_adjust), colour = "green")

comp%>%autoplot()

#Decomposition for Imports
comp2 = myts%>%model(stl=STL(Imports))%>%components()
comp2%>%as_tsibble() %>%autoplot(Imports) + geom_line(aes(y=trend), colour = 'red') +
  geom_line(aes(y=season_adjust), colour = "blue")

comp2%>%autoplot()

Models

#Models for US Trade Balance (ETS, ARIMA, Dynamic, Ensemble)
m1=train |> model(ets_model = ETS(Trade_Balance))
m2=train |> model(arima_model = ARIMA(Trade_Balance))
m3=train |> model(dynamic_model = ARIMA(Trade_Balance ~ Imports, stepwise = TRUE))
m4 = train |> model(ensemble_model = (ETS(Trade_Balance) + ARIMA(Trade_Balance) + ARIMA(Trade_Balance ~ Imports))/3)

Reports

#Reports:
for (i in 1:4) {report(eval(parse(text=paste0('m', i))))}
## Series: Trade_Balance 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.6843094 
## 
##   Initial states:
##       l[0]
##  -47.14653
## 
##   sigma^2:  16.3982
## 
##      AIC     AICc      BIC 
## 324.0390 324.5845 329.6526 
## Series: Trade_Balance 
## Model: ARIMA(0,1,1) w/ drift 
## 
## Coefficients:
##           ma1  constant
##       -0.3790   -0.6943
## s.e.   0.1211    0.3547
## 
## sigma^2 estimated as 15.6:  log likelihood=-130.3
## AIC=266.6   AICc=267.16   BIC=272.15
## Series: Trade_Balance 
## Model: LM w/ ARIMA(0,1,1) errors 
## 
## Coefficients:
##           ma1  Imports  intercept
##       -0.3787  -0.0582    -0.6249
## s.e.   0.1263   0.0607     0.3587
## 
## sigma^2 estimated as 15.64:  log likelihood=-129.84
## AIC=267.68   AICc=268.63   BIC=275.08
## Series: Trade_Balance 
## Model: COMBINATION 
## Combination: Trade_Balance * 0.333333333333333
## 
## ==============================================
## 
## Series: Trade_Balance 
## Model: COMBINATION 
## Combination: Trade_Balance + Trade_Balance
## 
## ==========================================
## 
## Series: Trade_Balance 
## Model: COMBINATION 
## Combination: Trade_Balance + Trade_Balance
## 
## ==========================================
## 
## Series: Trade_Balance 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.6843094 
## 
##   Initial states:
##       l[0]
##  -47.14653
## 
##   sigma^2:  16.3982
## 
##      AIC     AICc      BIC 
## 324.0390 324.5845 329.6526 
## 
## Series: Trade_Balance 
## Model: ARIMA(0,1,1) w/ drift 
## 
## Coefficients:
##           ma1  constant
##       -0.3790   -0.6943
## s.e.   0.1211    0.3547
## 
## sigma^2 estimated as 15.6:  log likelihood=-130.3
## AIC=266.6   AICc=267.16   BIC=272.15
## 
## 
## Series: Trade_Balance 
## Model: LM w/ ARIMA(0,1,1) errors 
## 
## Coefficients:
##           ma1  Imports  intercept
##       -0.3787  -0.0582    -0.6249
## s.e.   0.1263   0.0607     0.3587
## 
## sigma^2 estimated as 15.64:  log likelihood=-129.84
## AIC=267.68   AICc=268.63   BIC=275.08
#Plot forecasts against held out data (test data)
myplot = function(model){model |> forecast(test) |>autoplot(myts)+geom_line(aes(y=.fitted, col=.model),
                                                    data=augment(model)) + ggtitle(names(model))}

for (i in 1:4){print(myplot(eval(parse(text=paste0('m',i)))))}

Accuracy Metrics

#Accuracy Metrics:
tmp3=c(rep(0,10))
mymat = matrix(rep(0, 4 * 10), ncol = 10)

for (i in 1:4){
  tmp=eval(parse(text=paste0('m',i)))%>%forecast(test)
  tmp2=tmp%>%accuracy(test)
  tmp3=rbind(tmp2, tmp3)
}

tmp3=tmp3[tmp3$RMSE>0,]
tmp3$MASE=tmp3$RMSSE=tmp3$.type=NULL
tmp3%>%arrange(MAPE)
## # A tibble: 4 × 7
##   .model             ME  RMSE   MAE   MPE  MAPE  ACF1
##   <chr>           <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ets_model      0.0286  10.3  8.56 -1.69  10.9 0.580
## 2 ensemble_model 3.24    11.9 10.1  -6.00  13.3 0.639
## 3 dynamic_model  5.20    12.9 10.9  -8.55  14.5 0.665
## 4 arima_model    4.51    13.1 11.1  -7.75  14.7 0.657
#We find that our ETS model has the lowest Mean Absolute Percentage Error, the widest band in our test forecast fit, but lowest autocorrelation. The MAPE performance is followed by (in order from lowest to highest) our ensemble model, the dynamic arima model, then lastly our default arima model.'''