Base Chunks
Libraries
## Loading required package: dplyr
## Warning: package 'dplyr' was built under R version 4.3.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Warning: package 'fpp3' was built under R version 4.3.2
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble 3.2.1 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.0 ✔ feasts 0.3.2
## ✔ lubridate 1.9.3 ✔ fable 0.3.3
## ✔ ggplot2 3.5.1 ✔ fabletools 0.4.2
## ✔ tsibble 1.1.4
## Warning: package 'tibble' was built under R version 4.3.2
## Warning: package 'tidyr' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tsibble' was built under R version 4.3.3
## Warning: package 'tsibbledata' was built under R version 4.3.2
## Warning: package 'feasts' was built under R version 4.3.3
## Warning: package 'fabletools' was built under R version 4.3.3
## Warning: package 'fable' was built under R version 4.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
## Loading required package: fredr
## Warning: package 'fredr' was built under R version 4.3.2
require(ggplot2)
require(httr)
## Loading required package: httr
## Warning: package 'httr' was built under R version 4.3.2
## Warning: package 'readxl' was built under R version 4.3.2
## Loading required package: tidyverse
## Warning: package 'tidyverse' was built under R version 4.3.2
## Warning: package 'readr' was built under R version 4.3.2
## Warning: package 'purrr' was built under R version 4.3.2
## Warning: package 'stringr' was built under R version 4.3.2
## Warning: package 'forcats' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ purrr 1.0.2 ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
API Keys (Hidden)
Load Data
mydata=as.data.frame(AirPassengers)
mydata$values=mydata$x
mydata$x=NULL
mydata$Date=seq(as.Date("1949/1/1"), as.Date("1960/12/1"), "month")
mydata
## values Date
## 1 112 1949-01-01
## 2 118 1949-02-01
## 3 132 1949-03-01
## 4 129 1949-04-01
## 5 121 1949-05-01
## 6 135 1949-06-01
## 7 148 1949-07-01
## 8 148 1949-08-01
## 9 136 1949-09-01
## 10 119 1949-10-01
## 11 104 1949-11-01
## 12 118 1949-12-01
## 13 115 1950-01-01
## 14 126 1950-02-01
## 15 141 1950-03-01
## 16 135 1950-04-01
## 17 125 1950-05-01
## 18 149 1950-06-01
## 19 170 1950-07-01
## 20 170 1950-08-01
## 21 158 1950-09-01
## 22 133 1950-10-01
## 23 114 1950-11-01
## 24 140 1950-12-01
## 25 145 1951-01-01
## 26 150 1951-02-01
## 27 178 1951-03-01
## 28 163 1951-04-01
## 29 172 1951-05-01
## 30 178 1951-06-01
## 31 199 1951-07-01
## 32 199 1951-08-01
## 33 184 1951-09-01
## 34 162 1951-10-01
## 35 146 1951-11-01
## 36 166 1951-12-01
## 37 171 1952-01-01
## 38 180 1952-02-01
## 39 193 1952-03-01
## 40 181 1952-04-01
## 41 183 1952-05-01
## 42 218 1952-06-01
## 43 230 1952-07-01
## 44 242 1952-08-01
## 45 209 1952-09-01
## 46 191 1952-10-01
## 47 172 1952-11-01
## 48 194 1952-12-01
## 49 196 1953-01-01
## 50 196 1953-02-01
## 51 236 1953-03-01
## 52 235 1953-04-01
## 53 229 1953-05-01
## 54 243 1953-06-01
## 55 264 1953-07-01
## 56 272 1953-08-01
## 57 237 1953-09-01
## 58 211 1953-10-01
## 59 180 1953-11-01
## 60 201 1953-12-01
## 61 204 1954-01-01
## 62 188 1954-02-01
## 63 235 1954-03-01
## 64 227 1954-04-01
## 65 234 1954-05-01
## 66 264 1954-06-01
## 67 302 1954-07-01
## 68 293 1954-08-01
## 69 259 1954-09-01
## 70 229 1954-10-01
## 71 203 1954-11-01
## 72 229 1954-12-01
## 73 242 1955-01-01
## 74 233 1955-02-01
## 75 267 1955-03-01
## 76 269 1955-04-01
## 77 270 1955-05-01
## 78 315 1955-06-01
## 79 364 1955-07-01
## 80 347 1955-08-01
## 81 312 1955-09-01
## 82 274 1955-10-01
## 83 237 1955-11-01
## 84 278 1955-12-01
## 85 284 1956-01-01
## 86 277 1956-02-01
## 87 317 1956-03-01
## 88 313 1956-04-01
## 89 318 1956-05-01
## 90 374 1956-06-01
## 91 413 1956-07-01
## 92 405 1956-08-01
## 93 355 1956-09-01
## 94 306 1956-10-01
## 95 271 1956-11-01
## 96 306 1956-12-01
## 97 315 1957-01-01
## 98 301 1957-02-01
## 99 356 1957-03-01
## 100 348 1957-04-01
## 101 355 1957-05-01
## 102 422 1957-06-01
## 103 465 1957-07-01
## 104 467 1957-08-01
## 105 404 1957-09-01
## 106 347 1957-10-01
## 107 305 1957-11-01
## 108 336 1957-12-01
## 109 340 1958-01-01
## 110 318 1958-02-01
## 111 362 1958-03-01
## 112 348 1958-04-01
## 113 363 1958-05-01
## 114 435 1958-06-01
## 115 491 1958-07-01
## 116 505 1958-08-01
## 117 404 1958-09-01
## 118 359 1958-10-01
## 119 310 1958-11-01
## 120 337 1958-12-01
## 121 360 1959-01-01
## 122 342 1959-02-01
## 123 406 1959-03-01
## 124 396 1959-04-01
## 125 420 1959-05-01
## 126 472 1959-06-01
## 127 548 1959-07-01
## 128 559 1959-08-01
## 129 463 1959-09-01
## 130 407 1959-10-01
## 131 362 1959-11-01
## 132 405 1959-12-01
## 133 417 1960-01-01
## 134 391 1960-02-01
## 135 419 1960-03-01
## 136 461 1960-04-01
## 137 472 1960-05-01
## 138 535 1960-06-01
## 139 622 1960-07-01
## 140 606 1960-08-01
## 141 508 1960-09-01
## 142 461 1960-10-01
## 143 390 1960-11-01
## 144 432 1960-12-01
Aggregate
mydata$Date=mydata$Date%>%yearmonth(Date)
head(mydata)
## values Date
## 1 112 1949 Jan
## 2 118 1949 Feb
## 3 132 1949 Mar
## 4 129 1949 Apr
## 5 121 1949 May
## 6 135 1949 Jun
Convert to tsibble
ts=as_tsibble(mydata, index = Date)
train=as_tsibble(ts[1:132,], index=Date)
test=as_tsibble(ts[133:nrow(ts),], index=Date)
Plots
## Plot variable not specified, automatically selected `.vars = values`

ts%>%ACF(values)%>%autoplot()

ts%>%PACF(values)%>%autoplot()

Build Models
m1=train %>%model(ARIMA(values))
m2=train %>% model(ETS(values))
Forecast
f1=m1 %>% forecast(test)
f2=m2 %>% forecast(test)
f3=fitted(m1, h=12) #12 month forward forecasts
f4=fitted(m2,h=12) #12 month forward forecasts
Plot
ts |>
autoplot(values) +
autolayer(f3, .fitted, col = "red") +
#autolayer(f4, .fitted, col = "blue") +
autolayer(f1, color='pink')+
#autolayer(f2, color='cyan')+
labs(title = "ARIMA, Airline Passengers by Year, one Frequency Forward Forecasts", y = "Pax")
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_line()`).

ts |>
autoplot(values) +
#autolayer(f3, .fitted, col = "red") +
autolayer(f4, .fitted, col = "blue") +
#autolayer(f1, color='pink')+
autolayer(f2, color='cyan')+
labs(title = "ETS, Airline Passengers by Year, one Frequency Forward Forecasts", y = "Pax")
## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_line()`).

Accuracy on Test Set
a1=accuracy(f1, test)
a2=accuracy(f2, test)
rbind(a1, a2)
## # A tibble: 2 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(values) Test 4.23 18.5 14.9 0.515 3.10 NaN NaN 0.230
## 2 ETS(values) Test 12.1 27.4 22.8 2.05 4.66 NaN NaN 0.484
Report
## Series: values
## Model: ARIMA(3,0,0)(0,1,0)[12] w/ drift
##
## Coefficients:
## ar1 ar2 ar3 constant
## 0.7049 0.2575 -0.1433 5.5278
## s.e. 0.0913 0.1093 0.0923 0.8866
##
## sigma^2 estimated as 104.6: log likelihood=-447.83
## AIC=905.66 AICc=906.19 BIC=919.6
## # A mable: 1 x 1
## `ARIMA(values)`
## <model>
## 1 <ARIMA(3,0,0)(0,1,0)[12] w/ drift>
## Series: values
## Model: ETS(M,Ad,M)
## Smoothing parameters:
## alpha = 0.7580163
## beta = 0.02129611
## gamma = 0.0001059297
## phi = 0.9799997
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5]
## 120.7483 1.763212 0.8970477 0.7980437 0.9190177 1.058714 1.215571 1.225056
## s[-6] s[-7] s[-8] s[-9] s[-10] s[-11]
## 1.107488 0.9781689 0.9803688 1.020673 0.8925558 0.907294
##
## sigma^2: 0.0014
##
## AIC AICc BIC
## 1244.458 1250.511 1296.348
## # A mable: 1 x 1
## `ETS(values)`
## <model>
## 1 <ETS(M,Ad,M)>
Plot
# Plot the forecasts
f1df <- as.data.frame(f1)
f2df <- as.data.frame(f2)
tsdf <- as.data.frame(ts)
# Plot using ggplot2
ggplot(tsdf, aes(x = Date, y = values)) +
geom_line(color = "black", linewidth = .5) +
geom_line(data = f1df, aes(x = Date, y = .mean, color = "ARIMA Forecast"), linetype = "dashed") +
geom_line(data = f2df, aes(x = Date, y = .mean, color = "ETS Forecast"), linetype = "dotted") +
ggtitle("Forecast Comparison") +
xlab("Year") +
ylab("Value") +
scale_color_manual(name = "Forecast",
values = c("ARIMA Forecast" = "blue", "ETS Forecast" = "red")) +
theme_minimal() +
theme(legend.position = "bottom")
