Here we explore five years of the US Trade Balance (deficit) from years 2018 to 2022. We also get a sneak peak at US Imports, which is directly linked to the US Trade Deficit.
#Load liabilities
library(fpp3)
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.3
## ✔ dplyr 1.1.2 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.0 ✔ feasts 0.3.1
## ✔ lubridate 1.9.2 ✔ fable 0.3.3
## ✔ ggplot2 3.4.2 ✔ fabletools 0.3.4
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(kableExtra)
## Warning in !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output
## %in% : 'length(x) = 2 > 1' in coercion to 'logical(1)'
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(latex2exp)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ purrr 1.0.1 ✔ stringr 1.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ kableExtra::group_rows() masks dplyr::group_rows()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(magrittr)
##
## Attaching package: 'magrittr'
##
## The following object is masked from 'package:purrr':
##
## set_names
##
## The following object is masked from 'package:tidyr':
##
## extract
#load data
balance = read.csv('/Users/adrianjones/Documents/Forecasting/Week4/homework week 4/USTrade.csv')
imports = read.csv('/Users/adrianjones/Documents/Forecasting/Week4/homework week 4/USImports.csv')
autovalue = read.csv('/Users/adrianjones/Documents/Forecasting/Week4/homework week 4/AutoManuf.csv')
#merge datasets and reorder
library(zoo)
##
## Attaching package: 'zoo'
## The following object is masked from 'package:tsibble':
##
## index
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
trade <- merge(balance, imports, by = "Period")
trade$Period <- as.yearmon(trade$Period, '%b-%Y')
trade <- trade[order(trade$Period), ]
autovalue$Period <- as.yearmon(autovalue$Period, '%b-%Y')
autovalue <- autovalue[order(autovalue$Period), ]
economy <- merge(trade, autovalue, by = "Period")
#rename columns
colnames(economy)[colnames(economy) == "Value.x"] <- "Trade_Balance"
colnames(economy)[colnames(economy) == "Value.y"] <- "Imports"
colnames(economy)[colnames(economy) == "Value"] <- "ValueAutos"
economy$Period <- as.Date(economy$Period)
#convert values to numeric
economy$Trade_Balance <- as.numeric(gsub(",", "", economy$Trade_Balance))
economy$Imports <- as.numeric(gsub(",", "", economy$Imports))
economy$ValueAutos <- as.numeric(gsub(",", "", economy$ValueAutos))
#convert to interpret in billions of usd
economy$Trade_Balance <- economy$Trade_Balance / 1000
economy$Imports <- economy$Imports / 1000
economy$ValueAutos <- economy$ValueAutos / 1000
#times series + train/test split
myts = economy %>%
mutate(Period = yearmonth(as.character(Period))) %>%
as_tsibble(index = Period)
ggplot(myts, aes(x = Period, y = Trade_Balance)) +
geom_line(color = "blue", size = 1) + # Line for Trade_Balance
geom_line(aes(y = Imports), color = "red", size = 1) + # Line for imports
xlab("Period") +
ylab("Value") +
ggtitle("Monthly US Trade Deficit and Imports (2018 - 2022) in Billions USD")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
train = myts[1:48, ]
test = myts[49:nrow(myts), ]
#Time Series Plots
myts%>%gg_season(Trade_Balance) #March 2022 seeing a huge drop in trade balance
myts%>%gg_lag(Trade_Balance)
myts%>%ACF(Trade_Balance)%>%autoplot()
myts%>%gg_season(Imports) #April/May 2020 saw signficant drops in imports, march 2018 was a peak
myts%>%gg_lag(Imports)
myts%>%ACF(Imports)%>%autoplot()
### Transformation
#Transformation
lambda = myts%>%features(Trade_Balance, features = guerrero) |> pull(lambda_guerrero)
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
## Warning in optimise(lambda_coef_var, c(lower, upper), x = x, .period =
## max(.period, : NA/Inf replaced by maximum positive value
myts%>%autoplot(box_cox(Trade_Balance, lambda[1])) +
labs(y= " ", title = latex2exp:::TeX(paste0(
"Transformed US Trade Balance with $\\lambda$ = ", round(lambda[1],2))))
### Decompositions
#Decompositions
comp=myts%>%model(stl=STL(Trade_Balance))%>%components()
comp%>%as_tsibble() %>%autoplot(Trade_Balance) + geom_line(aes(y=trend), colour = "red") +
geom_line(aes(y=season_adjust), colour = "green")
comp%>%autoplot()
#Decomposition for Imports
comp2 = myts%>%model(stl=STL(Imports))%>%components()
comp2%>%as_tsibble() %>%autoplot(Imports) + geom_line(aes(y=trend), colour = 'red') +
geom_line(aes(y=season_adjust), colour = "blue")
comp2%>%autoplot()
#Models for US Trade Balance (ETS, ARIMA, Dynamic, Ensemble)
m1=train |> model(ets_model = ETS(Trade_Balance))
m2=train |> model(arima_model = ARIMA(Trade_Balance))
m3=train |> model(dynamic_model = ARIMA(Trade_Balance ~ Imports, stepwise = TRUE))
m4 = train |> model(ensemble_model = (ETS(Trade_Balance) + ARIMA(Trade_Balance) + ARIMA(Trade_Balance ~ Imports))/3)
#Reports:
for (i in 1:4) {report(eval(parse(text=paste0('m', i))))}
## Series: Trade_Balance
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.6843094
##
## Initial states:
## l[0]
## -47.14653
##
## sigma^2: 16.3982
##
## AIC AICc BIC
## 324.0390 324.5845 329.6526
## Series: Trade_Balance
## Model: ARIMA(0,1,1) w/ drift
##
## Coefficients:
## ma1 constant
## -0.3790 -0.6943
## s.e. 0.1211 0.3547
##
## sigma^2 estimated as 15.6: log likelihood=-130.3
## AIC=266.6 AICc=267.16 BIC=272.15
## Series: Trade_Balance
## Model: LM w/ ARIMA(0,1,1) errors
##
## Coefficients:
## ma1 Imports intercept
## -0.3787 -0.0582 -0.6249
## s.e. 0.1263 0.0607 0.3587
##
## sigma^2 estimated as 15.64: log likelihood=-129.84
## AIC=267.68 AICc=268.63 BIC=275.08
## Series: Trade_Balance
## Model: COMBINATION
## Combination: Trade_Balance * 0.333333333333333
##
## ==============================================
##
## Series: Trade_Balance
## Model: COMBINATION
## Combination: Trade_Balance + Trade_Balance
##
## ==========================================
##
## Series: Trade_Balance
## Model: COMBINATION
## Combination: Trade_Balance + Trade_Balance
##
## ==========================================
##
## Series: Trade_Balance
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.6843094
##
## Initial states:
## l[0]
## -47.14653
##
## sigma^2: 16.3982
##
## AIC AICc BIC
## 324.0390 324.5845 329.6526
##
## Series: Trade_Balance
## Model: ARIMA(0,1,1) w/ drift
##
## Coefficients:
## ma1 constant
## -0.3790 -0.6943
## s.e. 0.1211 0.3547
##
## sigma^2 estimated as 15.6: log likelihood=-130.3
## AIC=266.6 AICc=267.16 BIC=272.15
##
##
## Series: Trade_Balance
## Model: LM w/ ARIMA(0,1,1) errors
##
## Coefficients:
## ma1 Imports intercept
## -0.3787 -0.0582 -0.6249
## s.e. 0.1263 0.0607 0.3587
##
## sigma^2 estimated as 15.64: log likelihood=-129.84
## AIC=267.68 AICc=268.63 BIC=275.08
#Plot forecasts against held out data (test data)
myplot = function(model){model |> forecast(test) |>autoplot(myts)+geom_line(aes(y=.fitted, col=.model),
data=augment(model)) + ggtitle(names(model))}
for (i in 1:4){print(myplot(eval(parse(text=paste0('m',i)))))}
#Accuracy Metrics:
tmp3=c(rep(0,10))
mymat = matrix(rep(0, 4 * 10), ncol = 10)
for (i in 1:4){
tmp=eval(parse(text=paste0('m',i)))%>%forecast(test)
tmp2=tmp%>%accuracy(test)
tmp3=rbind(tmp2, tmp3)
}
tmp3=tmp3[tmp3$RMSE>0,]
tmp3$MASE=tmp3$RMSSE=tmp3$.type=NULL
tmp3%>%arrange(MAPE)
## # A tibble: 4 × 7
## .model ME RMSE MAE MPE MAPE ACF1
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ets_model 0.0286 10.3 8.56 -1.69 10.9 0.580
## 2 ensemble_model 3.24 11.9 10.1 -6.00 13.3 0.639
## 3 dynamic_model 5.20 12.9 10.9 -8.55 14.5 0.665
## 4 arima_model 4.51 13.1 11.1 -7.75 14.7 0.657
#We find that our ETS model has the lowest Mean Absolute Percentage Error, the widest band in our test forecast fit, but lowest autocorrelation. The MAPE performance is followed by (in order from lowest to highest) our ensemble model, the dynamic arima model, then lastly our default arima model.'''