Base Chunks

Libraries

require(dplyr)
## Loading required package: dplyr
## Warning: package 'dplyr' was built under R version 4.3.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.2
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble      3.2.1     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.0     ✔ feasts      0.3.2
## ✔ lubridate   1.9.3     ✔ fable       0.3.3
## ✔ ggplot2     3.5.1     ✔ fabletools  0.4.2
## ✔ tsibble     1.1.4
## Warning: package 'tibble' was built under R version 4.3.2
## Warning: package 'tidyr' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tsibble' was built under R version 4.3.3
## Warning: package 'tsibbledata' was built under R version 4.3.2
## Warning: package 'feasts' was built under R version 4.3.3
## Warning: package 'fabletools' was built under R version 4.3.3
## Warning: package 'fable' was built under R version 4.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
require(fredr)
## Loading required package: fredr
## Warning: package 'fredr' was built under R version 4.3.2
require(ggplot2)
require(httr)
## Loading required package: httr
## Warning: package 'httr' was built under R version 4.3.2
library(readxl)
## Warning: package 'readxl' was built under R version 4.3.2

API Keys (Hidden)

Load Data

url = "https://archive.ics.uci.edu/static/public/360/air+quality.zip"
try(unlink('c:/users/lfult/downloads/data', recursive=T))
try(unlink('c:/users/lfult/downloads/data.zip'))

download.file(url, 'c:/users/lfult/downloads/data.zip', mode = "wb")
dir.create('c:/users/lfult/downloads/data')
unzip('c:/users/lfult/downloads/data.zip', exdir='c:/users/lfult/downloads/data')

mydata=read_excel('c:/users/lfult/downloads/data/AirQualityUCI.xlsx',sheet="AirQualityUCI")
head(mydata)
## # A tibble: 6 × 15
##   Date                Time                `CO(GT)` `PT08.S1(CO)` `NMHC(GT)`
##   <dttm>              <dttm>                 <dbl>         <dbl>      <dbl>
## 1 2004-03-10 00:00:00 1899-12-31 18:00:00      2.6         1360         150
## 2 2004-03-10 00:00:00 1899-12-31 19:00:00      2           1292.        112
## 3 2004-03-10 00:00:00 1899-12-31 20:00:00      2.2         1402          88
## 4 2004-03-10 00:00:00 1899-12-31 21:00:00      2.2         1376.         80
## 5 2004-03-10 00:00:00 1899-12-31 22:00:00      1.6         1272.         51
## 6 2004-03-10 00:00:00 1899-12-31 23:00:00      1.2         1197          38
## # ℹ 10 more variables: `C6H6(GT)` <dbl>, `PT08.S2(NMHC)` <dbl>,
## #   `NOx(GT)` <dbl>, `PT08.S3(NOx)` <dbl>, `NO2(GT)` <dbl>,
## #   `PT08.S4(NO2)` <dbl>, `PT08.S5(O3)` <dbl>, T <dbl>, RH <dbl>, AH <dbl>

Aggregate

mydata$Date=mydata$Date%>%yearmonth(Date)

monthdata <- mydata %>% group_by(Date) %>%
  summarise(across(everything(), ~ mean(.x, na.rm = TRUE)))

head(monthdata)
## # A tibble: 6 × 15
##       Date Time                `CO(GT)` `PT08.S1(CO)` `NMHC(GT)` `C6H6(GT)`
##      <mth> <dttm>                 <dbl>         <dbl>      <dbl>      <dbl>
## 1 2004 Mar 1899-12-31 11:36:22    -4.85         1223.       96.6      9.94 
## 2 2004 Apr 1899-12-31 11:30:00   -60.9          1112.      121.       2.50 
## 3 2004 May 1899-12-31 11:30:00   -39.3          1053.     -199.       6.26 
## 4 2004 Jun 1899-12-31 11:30:00   -19.4           956.     -200       -0.520
## 5 2004 Jul 1899-12-31 11:30:00   -48.7          1045.     -200       10.3  
## 6 2004 Aug 1899-12-31 11:30:00   -71.0           903.     -200       -6.64 
## # ℹ 9 more variables: `PT08.S2(NMHC)` <dbl>, `NOx(GT)` <dbl>,
## #   `PT08.S3(NOx)` <dbl>, `NO2(GT)` <dbl>, `PT08.S4(NO2)` <dbl>,
## #   `PT08.S5(O3)` <dbl>, T <dbl>, RH <dbl>, AH <dbl>

Convert to tsibble

monthdata$y=monthdata$`CO(GT)`
ts=as_tsibble(monthdata, index = Date)
train=as_tsibble(ts[1:12,], index=Date)
test=as_tsibble(ts[13:nrow(ts),], index=Date)

Plots

autoplot(ts)
## Plot variable not specified, automatically selected `.vars = CO(GT)`

ts%>%ACF(y)%>%autoplot()

ts%>%PACF(y)%>%autoplot()

Build Models

m1=train %>%model(ARIMA(y))
m2=train %>% model(ETS(y))

Forecast

f1=m1 %>% forecast(test)
f2=m2 %>%  forecast(test)

Accuracy on Test Set

a1=accuracy(f1, test)
a2=accuracy(f2, test)
rbind(a1, a2)
## # A tibble: 2 × 10
##   .model   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(y) Test   32.6  32.6  32.6 -958.  958.   NaN   NaN  -0.5
## 2 ETS(y)   Test   32.6  32.6  32.6 -958.  958.   NaN   NaN  -0.5

Report

print(report(m1))
## Series: y 
## Model: ARIMA(0,0,0) w/ mean 
## 
## Coefficients:
##       constant
##       -36.0253
## s.e.    7.9255
## 
## sigma^2 estimated as 822.3:  log likelihood=-56.78
## AIC=117.56   AICc=118.89   BIC=118.53
## # A mable: 1 x 1
##               `ARIMA(y)`
##                  <model>
## 1 <ARIMA(0,0,0) w/ mean>
print(report(m2))
## Series: y 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.0001001072 
## 
##   Initial states:
##      l[0]
##  -36.0176
## 
##   sigma^2:  904.6043
## 
##      AIC     AICc      BIC 
## 115.3210 118.3210 116.7757 
## # A mable: 1 x 1
##       `ETS(y)`
##        <model>
## 1 <ETS(A,N,N)>

Plot

# Plot the forecasts
f1df <- as.data.frame(f1)
f2df <- as.data.frame(f2)
tsdf <- as.data.frame(ts)

# Plot using ggplot2
ggplot(tsdf, aes(x = Date, y = y)) +
  geom_line(color = "black", linewidth = .5) +
  geom_line(data = f1df, aes(x = Date, y = .mean, color = "ARIMA Forecast"), linetype = "dashed") +
  geom_line(data = f2df, aes(x = Date, y = .mean, color = "ETS Forecast"), linetype = "dotted") +
  ggtitle("Forecast Comparison") +
  xlab("Year") +
  ylab("Value") +
  scale_color_manual(name = "Forecast",
                     values = c("ARIMA Forecast" = "blue", "ETS Forecast" = "red")) +
  theme_minimal() +
  theme(legend.position = "bottom")