knitr::opts_chunk$set(echo = TRUE)
# Core packages
library(fpp3) # tsibble, fable, feasts, distributional, ggplot2 (via fpp3)
## Warning: package 'fpp3' was built under R version 4.4.3
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.6
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.4.1
## ✔ lubridate 1.9.3 ✔ fable 0.4.1
## ✔ ggplot2 3.5.1
## Warning: package 'tsibble' was built under R version 4.4.3
## Warning: package 'tsibbledata' was built under R version 4.4.3
## Warning: package 'feasts' was built under R version 4.4.3
## Warning: package 'fabletools' was built under R version 4.4.3
## Warning: package 'fable' was built under R version 4.4.3
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(ggplot2) # explicit, but fpp3 also attaches it
# Problem 1
## 1(a): Use help() interactively (commented here so knitting won't choke)
# help(aus_production)
# help(pelt)
# help(gafa_stock)
# help(vic_elec)
## 1(b): Time interval of each series
interval(aus_production) # Bricks series interval
## <interval[1]>
## [1] 1Q
interval(pelt) # Lynx/Hare series interval
## <interval[1]>
## [1] 1Y
interval(gafa_stock) # Close price series interval
## <interval[1]>
## [1] !
interval(vic_elec) # Demand series interval
## <interval[1]>
## [1] 30m
## 1(c) & 1(d): Plot each series
# Bricks from aus_production
autoplot(aus_production, Bricks) +
ggtitle("Australian Brick Production") +
ylab("Millions of Units") + xlab("Quarter")
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

# Lynx from pelt
autoplot(pelt, Lynx) +
ggtitle("Annual Lynx Pelts (Canada)") +
ylab("Number of Pelts") + xlab("Year")

# Close from gafa_stock
autoplot(gafa_stock, Close) +
ggtitle("GAFA Stock Closing Prices") +
ylab("Stock Price (USD)") + xlab("Date")

# Demand from vic_elec (customized axes and title for part d)
autoplot(vic_elec, Demand) +
ggtitle("Electricity Demand in Victoria (Australia)") +
ylab("Electricity Demand (MW)") + xlab("Year")

# Problem 2
# Date of peak closing price for each stock in gafa_stock (break ties consistently)
gafa_stock |>
group_by(Symbol) |>
slice_max(Close, n = 1, with_ties = FALSE) |>
ungroup()
## # A tsibble: 4 x 8 [!]
## # Key: Symbol [4]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2018-10-03 230. 233. 230. 232. 230. 28654800
## 2 AMZN 2018-09-04 2026. 2050. 2013 2040. 2040. 5721100
## 3 FB 2018-07-25 216. 219. 214. 218. 218. 58954200
## 4 GOOG 2018-07-26 1251 1270. 1249. 1268. 1268. 2405600
# Problem 3
## 3(a): Compare arrivals from Japan, NZ, UK, US using plots
# Time series plots
autoplot(aus_arrivals, Arrivals) +
labs(y = "Arrivals (thousands)",
title = "Quarterly Arrivals to Australia (Japan, NZ, UK, US)")

# Seasonal plots
gg_season(aus_arrivals, Arrivals) +
labs(y = "Arrivals (thousands)",
title = "Seasonal Plot: International Arrivals by Country")

# Subseries plots
gg_subseries(aus_arrivals, Arrivals) +
labs(y = "Arrivals (thousands)",
title = "Subseries Plot: International Arrivals by Country")

# Problem 4
## 4.1 Total Private Employed (us_employment)
total_private <- us_employment |>
filter(Title == "Total Private")
autoplot(total_private, Employed) +
labs(title = "Total Private Employment (US)", y = "Number of Employees", x = "Year")

gg_season(total_private, Employed) +
labs(title = "Seasonal Plot: Total Private Employment", y = "Employees")

gg_subseries(total_private, Employed) +
labs(title = "Subseries Plot: Total Private Employment", y = "Employees")

gg_lag(total_private, Employed, geom = "point") +
labs(title = "Lag Plot: Total Private Employment")

total_private |>
ACF(Employed) |>
autoplot() +
labs(title = "ACF: Total Private Employment")

## 4.2 Bricks (aus_production)
bricks_ts <- aus_production |>
filter(!is.na(Bricks))
autoplot(bricks_ts, Bricks) +
labs(title = "Australian Clay Brick Production", y = "Millions of Units", x = "Year")

gg_season(bricks_ts, Bricks) +
labs(title = "Seasonal Plot: Brick Production", y = "Millions of Units")

gg_subseries(bricks_ts, Bricks) +
labs(title = "Subseries Plot: Brick Production", y = "Millions of Units")

gg_lag(bricks_ts, Bricks, geom = "point") +
labs(title = "Lag Plot: Brick Production")

bricks_ts |>
ACF(Bricks) |>
autoplot() +
labs(title = "ACF: Australian Brick Production")

## 4.3 Hare (pelt)
autoplot(pelt, Hare) +
labs(title = "Hare Pelts Traded (Canada)", y = "Number of Pelts", x = "Year")

# (Seasonal and subseries plots are less informative for annual data)
gg_lag(pelt, Hare, geom = "point") +
labs(title = "Lag Plot: Hare Pelts")

pelt |>
ACF(Hare) |>
autoplot() +
labs(title = "ACF: Hare Pelts")

## 4.4 H02 Cost (PBS) — fixed
# Monthly totals (no index_by overwrite)
h02_monthly <- PBS |>
filter(ATC2 == "H02") |>
summarise(Cost = sum(Cost))
# Use a short alias for downstream plots
h02 <- h02_monthly
autoplot(h02, Cost) +
labs(title = "Monthly Cost of H02 (Corticosteroids)", y = "Total Cost", x = "Year")

# Yearly totals (collapse months safely with a *new* index)
h02_yearly <- PBS |>
filter(ATC2 == "H02") |>
index_by(Year = year(Month)) |>
summarise(Cost = sum(Cost))
autoplot(h02_yearly, Cost) +
labs(title = "Annual Cost of H02 (Corticosteroids)", y = "Total Cost", x = "Year")

# Seasonality / subseries / lag / ACF on monthly data
gg_season(h02, Cost) +
labs(title = "Seasonal Plot: H02 Cost", y = "Cost (AUD)")

gg_subseries(h02, Cost) +
labs(title = "Subseries Plot: H02 Cost", y = "Cost (AUD)")

gg_lag(h02, Cost, geom = "point") +
labs(title = "Lag Plot: H02 Cost")

h02 |>
ACF(Cost) |>
autoplot() +
labs(title = "ACF: H02 Cost")

## 4.5 Barrels (us_gasoline)
autoplot(us_gasoline, Barrels) +
labs(title = "US Finished Motor Gasoline Supplied",
y = "Million Barrels/Day", x = "Year")

gg_season(us_gasoline, Barrels) +
labs(title = "Seasonal Plot: US Gasoline Supply", y = "Million Barrels/Day")

gg_subseries(us_gasoline, Barrels) +
labs(title = "Subseries Plot: US Gasoline Supply", y = "Million Barrels/Day")

gg_lag(us_gasoline, Barrels, geom = "point") +
labs(title = "Lag Plot: US Gasoline Supply")

us_gasoline |>
ACF(Barrels) |>
autoplot() +
labs(title = "ACF: US Gasoline Supply")

# Problem 5
## 5(a): Prepare January 2014 daily data and plot it; fit regression model
jan14_vic_elec <- vic_elec |>
filter(yearmonth(Time) == yearmonth("2014 Jan")) |>
index_by(Date = as_date(Time)) |>
summarise(
Demand = sum(Demand),
Temperature = max(Temperature)
)
# Show the tibble
print(jan14_vic_elec)
## # A tsibble: 31 x 3 [1D]
## Date Demand Temperature
## <date> <dbl> <dbl>
## 1 2014-01-01 175185. 26
## 2 2014-01-02 188351. 23
## 3 2014-01-03 189086. 22.2
## 4 2014-01-04 173798. 20.3
## 5 2014-01-05 169733. 26.1
## 6 2014-01-06 195241. 19.6
## 7 2014-01-07 199770. 20
## 8 2014-01-08 205339. 27.4
## 9 2014-01-09 227334. 32.4
## 10 2014-01-10 258111. 34
## # ℹ 21 more rows
# Plot daily Demand and Temperature over time (faceted)
jan14_vic_elec |>
pivot_longer(cols = c(Demand, Temperature), names_to = "Variable", values_to = "Value") |>
ggplot(aes(x = Date, y = Value)) +
geom_line() +
facet_grid(rows = vars(Variable), scales = "free_y") +
labs(title = "Jan 2014 Electricity Demand and Temperature", x = "Date", y = NULL)

# Scatter plot of Demand vs Temperature (Jan 2014)
ggplot(jan14_vic_elec, aes(x = Temperature, y = Demand)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "Demand vs Temperature (Jan 2014)", x = "Max Temperature (°C)", y = "Daily Demand")
## `geom_smooth()` using formula = 'y ~ x'

## 5(b): Fit linear model and plot residuals
fit <- jan14_vic_elec |>
model(TSLM(Demand ~ Temperature))
fit |>
gg_tsresiduals()

## 5(c): Forecast next day demand for Temperature = 15°C vs 35°C
fc_15 <- fit |>
forecast(new_data(jan14_vic_elec, 1) |> mutate(Temperature = 15))
fc_35 <- fit |>
forecast(new_data(jan14_vic_elec, 1) |> mutate(Temperature = 35))
autoplot(fc_15, jan14_vic_elec) +
labs(title = "Forecast with Temperature = 15°C", y = "Demand", x = "Date")

autoplot(fc_35, jan14_vic_elec) +
labs(title = "Forecast with Temperature = 35°C", y = "Demand", x = "Date")

## 5(d): Show prediction intervals for the two forecasts
fc_tbl <- dplyr::bind_rows(
as_tibble(hilo(fc_15, 80, 95)) |> mutate(Scenario = "Temp = 15°C"),
as_tibble(hilo(fc_35, 80, 95)) |> mutate(Scenario = "Temp = 35°C")
)
# Expand the 80% and 95% interval columns into lower/upper bounds
## 5(d): Show prediction intervals for the two forecasts
fc_tbl <- dplyr::bind_rows(
as_tibble(hilo(fc_15, c(80, 95))) |> mutate(Scenario = "Temp = 15°C"),
as_tibble(hilo(fc_35, c(80, 95))) |> mutate(Scenario = "Temp = 35°C")
)
fc_clean <- fc_tbl |>
fabletools::unpack_hilo(`80%`) |>
fabletools::unpack_hilo(`95%`) |>
select(Scenario, .mean, `80%_lower`, `80%_upper`, `95%_lower`, `95%_upper`)
print(fc_clean)
## # A tibble: 2 × 6
## Scenario .mean `80%_lower` `80%_upper` `95%_lower` `95%_upper`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Temp = 15°C 151398. 117908. 184889. 100179. 202617.
## 2 Temp = 35°C 274484. 242088. 306880. 224939. 324029.
## 5(e): Plot Demand vs Temperature for all available data (daily aggregates)
vic_elec_daily <- vic_elec |>
index_by(Date = as_date(Time)) |>
summarise(
Demand = sum(Demand),
Temperature = max(Temperature)
)
# Show the tibble
print(vic_elec_daily)
## # A tsibble: 1,096 x 3 [1D]
## Date Demand Temperature
## <date> <dbl> <dbl>
## 1 2012-01-01 222438. 32.7
## 2 2012-01-02 257965. 39.6
## 3 2012-01-03 267099. 31.8
## 4 2012-01-04 222742. 25.1
## 5 2012-01-05 210585. 21.2
## 6 2012-01-06 210247. 23.6
## 7 2012-01-07 202526. 29
## 8 2012-01-08 193413. 27.8
## 9 2012-01-09 213804. 24
## 10 2012-01-10 215020. 19.6
## # ℹ 1,086 more rows
# Scatter plot
ggplot(vic_elec_daily, aes(x = Temperature, y = Demand)) +
geom_point() +
labs(title = "Daily Demand vs Temperature (All Data)",
x = "Max Temperature (°C)", y = "Daily Demand")

# Problem 6
## 6(a): Time plot of monthly sales
autoplot(souvenirs, Sales) +
labs(title = "Souvenir Shop Monthly Sales", y = "Sales", x = "Year")

## 6(b): Note — Taking logs is helpful for multiplicative seasonality / variance
## 6(c): Regression on log sales with trend, seasonal dummies, and festival dummy
souvenirs <- souvenirs |>
mutate(Festival = if_else(year(Month) >= 1988 & month(Month) == 3, 1, 0))
souvenirs_mod <- souvenirs |>
model(TSLM(log(Sales) ~ trend() + season() + Festival))
## 6(d): Residuals vs time and vs fitted
aug <- augment(souvenirs_mod)
# Residuals over time
ggplot(aug, aes(x = Month, y = .resid)) +
geom_line() +
labs(title = "Residuals vs Time", x = "Year", y = "Residuals")

# Residuals vs fitted
ggplot(aug, aes(x = .fitted, y = .resid)) +
geom_point() +
labs(title = "Residuals vs Fitted", x = "Fitted values", y = "Residuals")

## 6(e): Boxplots of residuals by month
aug <- aug |>
mutate(MonthFac = factor(month(Month), labels = month.abb))
ggplot(aug, aes(x = MonthFac, y = .resid)) +
geom_boxplot() +
labs(title = "Residuals by Month", x = "Month", y = "Residuals")

## 6(f): Model coefficients
tidy(souvenirs_mod)
## # A tibble: 14 × 6
## .model term estimate std.error statistic p.value
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 TSLM(log(Sales) ~ trend() + seas… (Int… 7.62 0.0742 103. 4.67e-78
## 2 TSLM(log(Sales) ~ trend() + seas… tren… 0.0220 0.000827 26.6 2.32e-38
## 3 TSLM(log(Sales) ~ trend() + seas… seas… 0.251 0.0957 2.63 1.06e- 2
## 4 TSLM(log(Sales) ~ trend() + seas… seas… 0.266 0.193 1.38 1.73e- 1
## 5 TSLM(log(Sales) ~ trend() + seas… seas… 0.384 0.0957 4.01 1.48e- 4
## 6 TSLM(log(Sales) ~ trend() + seas… seas… 0.409 0.0957 4.28 5.88e- 5
## 7 TSLM(log(Sales) ~ trend() + seas… seas… 0.449 0.0958 4.69 1.33e- 5
## 8 TSLM(log(Sales) ~ trend() + seas… seas… 0.610 0.0958 6.37 1.71e- 8
## 9 TSLM(log(Sales) ~ trend() + seas… seas… 0.588 0.0959 6.13 4.53e- 8
## 10 TSLM(log(Sales) ~ trend() + seas… seas… 0.669 0.0959 6.98 1.36e- 9
## 11 TSLM(log(Sales) ~ trend() + seas… seas… 0.747 0.0960 7.79 4.48e-11
## 12 TSLM(log(Sales) ~ trend() + seas… seas… 1.21 0.0960 12.6 1.29e-19
## 13 TSLM(log(Sales) ~ trend() + seas… seas… 1.96 0.0961 20.4 3.39e-31
## 14 TSLM(log(Sales) ~ trend() + seas… Fest… 0.502 0.196 2.55 1.29e- 2
## 6(g): Ljung-Box test on residuals (24 lags; dof = number of regressors incl. intercept - 1)
aug |>
features(.resid, ljung_box, lag = 24, dof = 14)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 TSLM(log(Sales) ~ trend() + season() + Festival) 135. 0
## 6(h) & 6(i): Forecast monthly sales for 1994–1996 with prediction intervals
future <- new_data(souvenirs, 36) |>
mutate(Festival = if_else(year(Month) >= 1988 & month(Month) == 3, 1, 0))
fc <- souvenirs_mod |>
forecast(new_data = future)
autoplot(fc, souvenirs) +
labs(title = "Forecasts for 1994–1996", y = "Sales", x = "Year")

## 6(j): Improve predictions by adding an ARIMA error term (dynamic regression)
souvenirs_mod2 <- souvenirs |>
model(ARIMA(log(Sales) ~ trend() + season() + Festival))
report(souvenirs_mod2)
## Series: Sales
## Model: LM w/ ARIMA(1,0,2) errors
## Transformation: log(Sales)
##
## Coefficients:
## ar1 ma1 ma2 trend() season()year2 season()year3
## 0.5565 -0.1288 0.3404 0.0226 0.2516 0.2969
## s.e. 0.1787 0.1923 0.1139 0.0015 0.0574 0.1185
## season()year4 season()year5 season()year6 season()year7 season()year8
## 0.3769 0.4003 0.4379 0.5978 0.5732
## s.e. 0.0729 0.0789 0.0817 0.0827 0.0821
## season()year9 season()year10 season()year11 season()year12 Festival
## 0.6514 0.7245 1.1755 1.9274 0.4605
## s.e. 0.0799 0.0746 0.0629 0.0599 0.1192
## intercept
## 7.6040
## s.e. 0.0857
##
## sigma^2 estimated as 0.02022: log likelihood=53.81
## AIC=-71.62 AICc=-61.1 BIC=-27.87