Base Chunks
Libraries
## Loading required package: dplyr
## Warning: package 'dplyr' was built under R version 4.3.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Warning: package 'fpp3' was built under R version 4.3.2
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble 3.2.1 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.0 ✔ feasts 0.3.2
## ✔ lubridate 1.9.3 ✔ fable 0.3.3
## ✔ ggplot2 3.5.1 ✔ fabletools 0.4.2
## ✔ tsibble 1.1.4
## Warning: package 'tibble' was built under R version 4.3.2
## Warning: package 'tidyr' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tsibble' was built under R version 4.3.3
## Warning: package 'tsibbledata' was built under R version 4.3.2
## Warning: package 'feasts' was built under R version 4.3.3
## Warning: package 'fabletools' was built under R version 4.3.3
## Warning: package 'fable' was built under R version 4.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
## Loading required package: fredr
## Warning: package 'fredr' was built under R version 4.3.2
require(ggplot2)
require(httr)
## Loading required package: httr
## Warning: package 'httr' was built under R version 4.3.2
## Warning: package 'readxl' was built under R version 4.3.2
API Keys (Hidden)
Load Data
url = "https://archive.ics.uci.edu/static/public/360/air+quality.zip"
try(unlink('c:/users/lfult/downloads/data', recursive=T))
try(unlink('c:/users/lfult/downloads/data.zip'))
download.file(url, 'c:/users/lfult/downloads/data.zip', mode = "wb")
dir.create('c:/users/lfult/downloads/data')
unzip('c:/users/lfult/downloads/data.zip', exdir='c:/users/lfult/downloads/data')
mydata=read_excel('c:/users/lfult/downloads/data/AirQualityUCI.xlsx',sheet="AirQualityUCI")
head(mydata)
## # A tibble: 6 × 15
## Date Time `CO(GT)` `PT08.S1(CO)` `NMHC(GT)`
## <dttm> <dttm> <dbl> <dbl> <dbl>
## 1 2004-03-10 00:00:00 1899-12-31 18:00:00 2.6 1360 150
## 2 2004-03-10 00:00:00 1899-12-31 19:00:00 2 1292. 112
## 3 2004-03-10 00:00:00 1899-12-31 20:00:00 2.2 1402 88
## 4 2004-03-10 00:00:00 1899-12-31 21:00:00 2.2 1376. 80
## 5 2004-03-10 00:00:00 1899-12-31 22:00:00 1.6 1272. 51
## 6 2004-03-10 00:00:00 1899-12-31 23:00:00 1.2 1197 38
## # ℹ 10 more variables: `C6H6(GT)` <dbl>, `PT08.S2(NMHC)` <dbl>,
## # `NOx(GT)` <dbl>, `PT08.S3(NOx)` <dbl>, `NO2(GT)` <dbl>,
## # `PT08.S4(NO2)` <dbl>, `PT08.S5(O3)` <dbl>, T <dbl>, RH <dbl>, AH <dbl>
Aggregate
mydata$Date=mydata$Date%>%yearmonth(Date)
monthdata <- mydata %>% group_by(Date) %>%
summarise(across(everything(), ~ mean(.x, na.rm = TRUE)))
head(monthdata)
## # A tibble: 6 × 15
## Date Time `CO(GT)` `PT08.S1(CO)` `NMHC(GT)` `C6H6(GT)`
## <mth> <dttm> <dbl> <dbl> <dbl> <dbl>
## 1 2004 Mar 1899-12-31 11:36:22 -4.85 1223. 96.6 9.94
## 2 2004 Apr 1899-12-31 11:30:00 -60.9 1112. 121. 2.50
## 3 2004 May 1899-12-31 11:30:00 -39.3 1053. -199. 6.26
## 4 2004 Jun 1899-12-31 11:30:00 -19.4 956. -200 -0.520
## 5 2004 Jul 1899-12-31 11:30:00 -48.7 1045. -200 10.3
## 6 2004 Aug 1899-12-31 11:30:00 -71.0 903. -200 -6.64
## # ℹ 9 more variables: `PT08.S2(NMHC)` <dbl>, `NOx(GT)` <dbl>,
## # `PT08.S3(NOx)` <dbl>, `NO2(GT)` <dbl>, `PT08.S4(NO2)` <dbl>,
## # `PT08.S5(O3)` <dbl>, T <dbl>, RH <dbl>, AH <dbl>
Convert to tsibble
monthdata$y=monthdata$`CO(GT)`
ts=as_tsibble(monthdata, index = Date)
train=as_tsibble(ts[1:12,], index=Date)
test=as_tsibble(ts[13:nrow(ts),], index=Date)
Plots
## Plot variable not specified, automatically selected `.vars = CO(GT)`


ts%>%PACF(y)%>%autoplot()

Build Models
m1=train %>%model(ARIMA(y))
m2=train %>% model(ETS(y))
Forecast
f1=m1 %>% forecast(test)
f2=m2 %>% forecast(test)
Accuracy on Test Set
a1=accuracy(f1, test)
a2=accuracy(f2, test)
rbind(a1, a2)
## # A tibble: 2 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(y) Test 32.6 32.6 32.6 -958. 958. NaN NaN -0.5
## 2 ETS(y) Test 32.6 32.6 32.6 -958. 958. NaN NaN -0.5
Report
## Series: y
## Model: ARIMA(0,0,0) w/ mean
##
## Coefficients:
## constant
## -36.0253
## s.e. 7.9255
##
## sigma^2 estimated as 822.3: log likelihood=-56.78
## AIC=117.56 AICc=118.89 BIC=118.53
## # A mable: 1 x 1
## `ARIMA(y)`
## <model>
## 1 <ARIMA(0,0,0) w/ mean>
## Series: y
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.0001001072
##
## Initial states:
## l[0]
## -36.0176
##
## sigma^2: 904.6043
##
## AIC AICc BIC
## 115.3210 118.3210 116.7757
## # A mable: 1 x 1
## `ETS(y)`
## <model>
## 1 <ETS(A,N,N)>
Plot
# Plot the forecasts
f1df <- as.data.frame(f1)
f2df <- as.data.frame(f2)
tsdf <- as.data.frame(ts)
# Plot using ggplot2
ggplot(tsdf, aes(x = Date, y = y)) +
geom_line(color = "black", linewidth = .5) +
geom_line(data = f1df, aes(x = Date, y = .mean, color = "ARIMA Forecast"), linetype = "dashed") +
geom_line(data = f2df, aes(x = Date, y = .mean, color = "ETS Forecast"), linetype = "dotted") +
ggtitle("Forecast Comparison") +
xlab("Year") +
ylab("Value") +
scale_color_manual(name = "Forecast",
values = c("ARIMA Forecast" = "blue", "ETS Forecast" = "red")) +
theme_minimal() +
theme(legend.position = "bottom")
