2.10 Exercises

Explore the following four time series: Bricks from aus_production, Lynx from pelt, Close from gafa_stock, Demand from vic_elec.

library(fpp3)
## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble      3.2.1     ✔ tsibble     1.1.5
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.4.1
## ✔ lubridate   1.9.3     ✔ fable       0.4.0
## ✔ ggplot2     3.5.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
aus = aus_production
pel = pelt
gafa = gafa_stock
vic = vic_elec

Use ? (or help()) to find out about the data in each series.

#?aus_production
#?pelt
#?gafa_stock
#?vic_elec

What is the time interval of each series?

glimpse(aus)
## Rows: 218
## Columns: 7
## $ Quarter     <qtr> 1956 Q1, 1956 Q2, 1956 Q3, 1956 Q4, 1957 Q1, 1957 Q2, 1957…
## $ Beer        <dbl> 284, 213, 227, 308, 262, 228, 236, 320, 272, 233, 237, 313…
## $ Tobacco     <dbl> 5225, 5178, 5297, 5681, 5577, 5651, 5317, 6152, 5758, 5641…
## $ Bricks      <dbl> 189, 204, 208, 197, 187, 214, 227, 222, 199, 229, 249, 234…
## $ Cement      <dbl> 465, 532, 561, 570, 529, 604, 603, 582, 554, 620, 646, 637…
## $ Electricity <dbl> 3923, 4436, 4806, 4418, 4339, 4811, 5259, 4735, 4608, 5196…
## $ Gas         <dbl> 5, 6, 7, 6, 5, 7, 7, 6, 5, 7, 8, 6, 5, 7, 8, 6, 6, 8, 8, 7…
library(dplyr)
library(tsibble)

# For aus_production

summary_data <- aus_production %>%
  index_by(Quarter) %>%
  summarise(max_bricks = max(Bricks, na.rm = TRUE))
## Warning: There were 20 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `max_bricks = max(Bricks, na.rm = TRUE)`.
## ℹ In group 199: `Quarter = 2005 Q3`.
## Caused by warning in `max()`:
## ! no non-missing arguments to max; returning -Inf
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 19 remaining warnings.
# Display the result
print(summary_data)
## # A tsibble: 218 x 2 [1Q]
##    Quarter max_bricks
##      <qtr>      <dbl>
##  1 1956 Q1        189
##  2 1956 Q2        204
##  3 1956 Q3        208
##  4 1956 Q4        197
##  5 1957 Q1        187
##  6 1957 Q2        214
##  7 1957 Q3        227
##  8 1957 Q4        222
##  9 1958 Q1        199
## 10 1958 Q2        229
## # ℹ 208 more rows
glimpse(pel)
## Rows: 91
## Columns: 3
## $ Year <dbl> 1845, 1846, 1847, 1848, 1849, 1850, 1851, 1852, 1853, 1854, 1855,…
## $ Hare <dbl> 19580, 19600, 19610, 11990, 28040, 58000, 74600, 75090, 88480, 61…
## $ Lynx <dbl> 30090, 45150, 49150, 39520, 21230, 8420, 5560, 5080, 10170, 19600…
# Convert to a regular tibble and summarize
summary_data <- as_tibble(pelt) %>%
  group_by(Year) %>%
  summarise(max_lynx = max(Lynx, na.rm = TRUE), .groups = 'drop')

# Display the result
print(summary_data)
## # A tibble: 91 × 2
##     Year max_lynx
##    <dbl>    <dbl>
##  1  1845    30090
##  2  1846    45150
##  3  1847    49150
##  4  1848    39520
##  5  1849    21230
##  6  1850     8420
##  7  1851     5560
##  8  1852     5080
##  9  1853    10170
## 10  1854    19600
## # ℹ 81 more rows
# Convert to a regular tibble and summarize maximum closing prices
max_closing_prices <- as_tibble(gafa) %>%
  group_by(Symbol) %>%
  summarise(max_close = max(Close, na.rm = TRUE), .groups = 'drop')

# Display the result
print(max_closing_prices)
## # A tibble: 4 × 2
##   Symbol max_close
##   <chr>      <dbl>
## 1 AAPL        232.
## 2 AMZN       2040.
## 3 FB          218.
## 4 GOOG       1268.
# Convert to a regular tibble and summarize
max_demand_per_year <- as_tibble(vic) %>%
  group_by(Year = year(Date)) %>%
  summarise(max_demand = max(Demand, na.rm = TRUE), .groups = 'drop')

# Display the result
print(max_demand_per_year)
## # A tibble: 3 × 2
##    Year max_demand
##   <dbl>      <dbl>
## 1  2012      8443.
## 2  2013      8897.
## 3  2014      9345.

2.2

Use filter() to find what days corresponded to the peak closing price for each of the four stocks in gafa_stock.

gafa_stock %>% 
  group_by(Symbol) %>% 
  filter(Close == max(Close))
## # A tsibble: 4 x 8 [!]
## # Key:       Symbol [4]
## # Groups:    Symbol [4]
##   Symbol Date        Open  High   Low Close Adj_Close   Volume
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
## 1 AAPL   2018-10-03  230.  233.  230.  232.      230. 28654800
## 2 AMZN   2018-09-04 2026. 2050. 2013  2040.     2040.  5721100
## 3 FB     2018-07-25  216.  219.  214.  218.      218. 58954200
## 4 GOOG   2018-07-26 1251  1270. 1249. 1268.     1268.  2405600

The filter() operation has been used to extract the days when each stock reached its peak closing price, alongside other trading details such as the high, low, and adjusted close prices for those specific dates.

Download the file tute1.csv from the book website, open it in Excel (or some other spreadsheet application), and review its contents. You should find four columns of information. Columns B through D each contain a quarterly series, labelled Sales, AdBudget and GDP. Sales contains the quarterly sales for a small company over the period 1981-2005. AdBudget is the advertising budget and GDP is the gross domestic product. All series have been adjusted for inflation.

tute1 <- readr::read_csv("C:\\Users\\LENOVO\\Downloads\\tute1.csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (3): Sales, AdBudget, GDP
## date (1): Quarter
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(tute1)
## # A tibble: 6 × 4
##   Quarter    Sales AdBudget   GDP
##   <date>     <dbl>    <dbl> <dbl>
## 1 1981-03-01 1020.     659.  252.
## 2 1981-06-01  889.     589   291.
## 3 1981-09-01  795      512.  291.
## 4 1981-12-01 1004.     614.  292.
## 5 1982-03-01 1058.     647.  279.
## 6 1982-06-01  944.     602   254

Convert the data to time series

mytimeseries <- tute1 |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(index = Quarter)

Construct time series plots of each of the three series

mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y")

Check what happens when you don’t include ‘facet_grid()’

mytimeseries %>% 
  pivot_longer(-Quarter) %>% 
  ggplot(aes(x = Quarter, y= value, color = name)) +
  geom_line()

“When you don’t include facet_grid(), the result is a single plot where all three line graphs (representing different variables) are combined into one. This forces all the data onto a shared space, and the only way to tell the lines apart is by their color and by looking at the legend. While this approach shows everything in one place, it can become harder to analyze individual trends, especially if the values of the variables are on different scales or overlap. In such cases, the graph can feel crowded, making it difficult to focus on each variable’s behavior.”

2.4

The USgas package contains data on the demand for natural gas in the US.

Install the USgas package.

library(USgas)

Create a tsibble from us_total with year as the index and state as the key.

# Create a tsibble
us_gas_tsibble <- us_total %>%
  as_tsibble(index = year, key = state)
ne_area <- c("Maine", "Vermont", "New Hampshire", "Massachusetts", "Connecticut", "Rhode Island")
us_gas_tsibble %>% filter(state %in% ne_area) %>% 
  autoplot(y) +
  labs(y = "Gas Consumption (million cubic ft)", title = "Gas Consumption: New England")

Monthly Australian retail data is provided in aus_retail. Select one of the time series as follows

library(tsibbledata)
library(ggplot2)
library(fable)
library(fabletools)
library(feasts)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0     ✔ readr   2.1.5
## ✔ purrr   1.0.2     ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()     masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(fpp3)

# Load the aus_retail dataset
data("aus_retail")

# Set the seed and select a random retail series
set.seed(81525678)
myseries <- aus_retail |> 
  filter(`Series ID` == sample(aus_retail$`Series ID`, 1))

# Display the selected series
print(myseries)
## # A tsibble: 441 x 5 [1M]
## # Key:       State, Industry [1]
##    State           Industry           `Series ID`    Month Turnover
##    <chr>           <chr>              <chr>          <mth>    <dbl>
##  1 South Australia Clothing retailing A3349360V   1982 Apr     19.1
##  2 South Australia Clothing retailing A3349360V   1982 May     21.6
##  3 South Australia Clothing retailing A3349360V   1982 Jun     18.3
##  4 South Australia Clothing retailing A3349360V   1982 Jul     18.6
##  5 South Australia Clothing retailing A3349360V   1982 Aug     17.1
##  6 South Australia Clothing retailing A3349360V   1982 Sep     18.2
##  7 South Australia Clothing retailing A3349360V   1982 Oct     20.7
##  8 South Australia Clothing retailing A3349360V   1982 Nov     23.6
##  9 South Australia Clothing retailing A3349360V   1982 Dec     33.4
## 10 South Australia Clothing retailing A3349360V   1983 Jan     20  
## # ℹ 431 more rows
# Convert the data to a tsibble (time series tibble)
myseries_tsibble <- myseries |> 
  select(Month, Turnover) |>
  as_tsibble(index = Month)
# Plotting the selected series
myseries_tsibble |> 
  autoplot(Turnover) +
  labs(title = "Selected Retail Series Time Plot")

# Seasonal plot
myseries_tsibble |> 
  gg_season(Turnover) +
  labs(title = "Seasonal Plot of Selected Retail Series")

# Subseries plot
myseries_tsibble |> 
  gg_subseries(Turnover) +
  labs(title = "Subseries Plot of Selected Retail Series")

# Lag plot
myseries_tsibble |> 
  gg_lag(Turnover) +
  labs(title = "Lag Plot of Selected Retail Series")

# Autocorrelation function plot
myseries_tsibble |> 
  ACF(Turnover) |> 
  autoplot() +
  labs(title = "ACF Plot of Selected Retail Series")

Use the following graphics functions: autoplot(), gg_season(), gg_subseries(), gg_lag(), ACF() and explore features from the following time series: “Total Private” Employed from us_employment, Bricks from aus_production, Hare from pelt, “H02” Cost from PBS, and Barrels from us_gasoline.

# Example loading of datasets (Replace this with actual data loading code)
data("us_employment") # Total Private Employed
data("aus_production") # Bricks
data("pelt") # Hare
data("PBS") # H02 Cost
data("us_gasoline") # Barrels

Total private:

us_employment_private <- us_employment %>% filter(Title == "Total Private")
us_employment_private %>% 
    autoplot(Employed)

us_employment_private %>% 
    gg_season(Employed)

us_employment_private %>% 
    gg_subseries(Employed)

us_employment_private %>% 
    gg_lag(Employed)

us_employment_private %>% 
    ACF(Employed) %>% 
    autoplot()

Can you spot any seasonality, cyclicity and trend?

The US employment dataset for “Total Private” shows trends that align with historical economic cycles, including major recessions. While the data doesn’t extend fully into 2020, it would reflect the significant drop in March 2020.

Recent patterns indicate seasonality, with growth in the first seven months followed by declines. Functions like gg_season() and gg_subseries() can help visualize these seasonal effects, while autoplot() reveals cyclic trends. Overall, the dataset captures clear trends of growth alongside seasonal fluctuations and cyclical behavior.

What do you learn about the series?

The series reveals that economic recoveries in the US have been lengthening over time, suggesting that it now requires more time for employment levels to rebound to pre-recession levels. This trend indicates a shift in the dynamics of the labor market and may reflect underlying structural changes in the economy.

What can you say about the seasonal patterns?

The seasonal patterns in total US employment suggest that the increase in the latter part of the year may be driven by major consumer holidays, such as Black Friday and Christmas. Additionally, the pronounced peak in July likely reflects the fiscal year-end for many private companies, which often leads to heightened hiring and employment activities during that period.

Can you identify any unusual years?

A notable unusual period is the recovery after the 2010 financial crisis, which shows the most rapid and sustained growth in employment since the 1940s. This remarkable rebound distinguishes that timeframe from other years in the dataset.

Bricks:

aus_production%>% 
    autoplot(Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

aus_production %>% 
    gg_season(Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

aus_production %>% 
    gg_subseries(Bricks)
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).

aus_production %>% 
    gg_lag(Bricks)
## Warning: Removed 20 rows containing missing values (gg_lag).

aus_production %>% 
    ACF(Bricks) %>% 
    autoplot()

Can you spot any seasonality, cyclicity and trend?

The analysis of the “Bricks” time series from aus_production reveals a clear seasonal peak in Q3 across the entire dataset. Additionally, brick production reached its highest levels in the 1980s but has significantly declined throughout the 2000s, indicating a notable long-term downward trend.

what do you more learn about this series?

This trend suggests two possibilities: either brick has fallen out of favor with Australian home buyers, leading to decreased demand, or production costs have risen due to tightening resource constraints, making it less economically viable to produce bricks at previous levels.

What can you say about the seasonal patterns?

The seasonal plot indicates that brick production is notably higher in quarters 2 and 3, while quarter 1 shows relatively lower production levels. Overall, there is a clear upward trend in production over time, aligning with the observations from the prior time series plot.

Can you identify any unusual years?

The significant declines in brick production during the 1970s and again in the 1980s are particularly striking and stand out as unusual years in the dataset.

Hare:

pelt %>% 
    autoplot(Hare)

pelt %>%
    gg_subseries(Hare)

pelt %>%
    gg_lag(Hare)

pelt %>%
    ACF(Hare) %>%
    autoplot()

Ho2 Cost:

PBS1 <- PBS %>% 
    filter(ATC2 == "H02")
PBS1 %>%
    autoplot(Cost)

PBS1 %>%
    gg_season(Cost)

PBS1 %>%
    gg_subseries(Cost)

PBS1 %>%
    ACF(Cost) %>%
    autoplot()

Barrels:

us_gasoline %>%
          autoplot(Barrels)

us_gasoline %>%
          gg_season(Barrels)

us_gasoline %>% 
          gg_subseries(Barrels)

us_gasoline %>%
          gg_lag(Barrels)

us_gasoline %>%
           ACF(Barrels) %>%
           autoplot()

The following time plots and ACF plots correspond to four different time series. Your task is to match each time plot in the first row with one of the ACF plots in the second row.

1 == B

2 == A

3 == D

4 == C

Excercise 3

Consider the last five years of the Gas data from aus_production.

Plot the time series. Can you identify seasonal fluctuations and/or a trend-cycle?

#filter for the last 5 yrs of Gas data
gas <- tail(aus_production, 5*4) %>% select(Gas)

gas %>%
  autoplot()+
  labs(title = "Last Five Years of The Gas Data")+
  theme_replace()+
  geom_line(col = "#1B89D3")
## Plot variable not specified, automatically selected `.vars = Gas`

The plot of the Gas time series reveals both seasonal fluctuations and a trend-cycle. There are noticeable seasonal patterns, with distinct upward and downward movements occurring at regular intervals throughout the year. Overall, the trend-cycle shows a fluctuating pattern, suggesting periods of growth followed by declines. This interplay of seasonal variation and the overall trend reflects the dynamics of gas production in response to market demands and external factors.

We will now use classical decomposition to analyze this data:

Use classical_decomposition with type=multiplicative to calculate the trend-cycle and seasonal indices.

gas %>%
  model(classical_decomposition(Gas,type = "multiplicative")) %>%
  components() %>%
  autoplot() + 
  ggtitle("Last Five Years of The Gas Data")
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).

The results of the multiplicative decomposition indicate a quarterly seasonal component with a yearly frequency. The analysis reveals an increasing trend from 2006 through mid-2007, followed by a period of stability until early 2008. After that, another upward trend emerges in late 2009. This decomposition highlights both the seasonal patterns and the fluctuations in the trend-cycle, providing insight into the underlying dynamics of the data.

# STL decomposition
dcmp <- gas %>%
  model(stl = STL(Gas))

#Compute and plot the seasonally adjusted data
components(dcmp) %>%
  as_tsibble() %>%
  autoplot(Gas, colour = "gray") +
  geom_line(aes(y=season_adjust), colour = "#0072B2") +
  labs(y = "Gas production",
       title = "Australian Gas Production")

Do the results support the graphical interpretation from part a?

Yes, the results from the multiplicative decomposition support the graphical interpretation of the Gas time series. Both analyses reveal clear seasonal fluctuations, characterized by regular upward and downward movements throughout the year. The decomposition results confirm the presence of a fluctuating trend-cycle, highlighting periods of growth and decline that align with the graphical observations. This consistency between the graphical representation and the decomposition analysis underscores the dynamic nature of gas production in relation to market demands and external influences.

Compute and plot the seasonally adjusted data.

gas_decom <- gas %>%
             model(classical_decomposition(Gas,type = "multiplicative")) %>%
             components()
gas_decom %>%
  ggplot(aes(x = Quarter)) +
  geom_line(aes(y = Gas, colour = "Data")) +
  geom_line(aes(y = season_adjust,
                colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, colour = "Trend")) +
  labs(y = "",
       title = "Last Five Years of The Gas Data") +
  scale_colour_manual(
    values = c("gray", "#0072B2", "#D55E00"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).

Change one observation to be an outlier (e.g., add 300 to one observation), and recompute the seasonally adjusted data. What is the effect of the outlier?

new_gas <- gas
new_gas$Gas[10] <- new_gas$Gas[10]+300

new_gas %>%
  model(classical_decomposition(Gas,type = "multiplicative")) %>%
  components() %>%
  autoplot() + 
  ggtitle("Last Five Years of The Gas Data with 300 added to the 10th observation")
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).

new_gas %>%
  model(classical_decomposition(Gas,type = "multiplicative")) %>%
  components() %>%
  ggplot(aes(x = Quarter)) +
  geom_line(aes(y = Gas, colour = "Data")) +
  geom_line(aes(y = season_adjust,
                colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, colour = "Trend")) +
  labs(y = "",
       title = "Last Five Years of The Gas Data with 300 added to the 10th observation") +
  scale_colour_manual(
    values = c("gray", "#0072B2", "#D55E00"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).

When 300 was added to the 10th observation, it resulted in a significant spike in the seasonally adjusted data, shifting the quarterly gas figures from a seasonal low to a relative high. Despite this noticeable change in the overall data, the impact on the seasonal component was minimal because the seasonal pattern remains consistent across years, with only one observation altered. Additionally, this outlier contributed to a downward trend from early 2008 to mid-2008, highlighting how outliers can distort both seasonal adjustments and underlying trends.

Does it make any difference if the outlier is near the end rather than in the middle of the time series?

new_gas2 <- gas
new_gas2$Gas[20] <- new_gas2$Gas[10]+300

new_gas2 %>%
  model(classical_decomposition(Gas,type = "multiplicative")) %>%
  components() %>%
  autoplot() + 
  ggtitle("Last Five Years of The Gas Data with 300 added to the 10th observation")
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).

new_gas2 %>%
  model(classical_decomposition(Gas,type = "multiplicative")) %>%
  components() %>%
  ggplot(aes(x = Quarter)) +
  geom_line(aes(y = Gas, colour = "Data")) +
  geom_line(aes(y = season_adjust,
                colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, colour = "Trend")) +
  labs(y = "",
       title = "Last Five Years of The Gas Data with 300 added to the last observation") +
  scale_colour_manual(
    values = c("gray", "#0072B2", "#D55E00"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).

Yes, the position of the outlier makes a difference. Adding 300 to the last entry creates a spike at the end of the seasonally adjusted data, impacting the overall appearance of the trend. However, the seasonal component is less affected by this change, as its pattern closely aligns with the original data. The trend appears more favorable, showing a stronger upward trajectory, primarily due to the influence of the outlier at the end of the series.

Figures 3.19 and 3.20 show the result of decomposing the number of persons in the civilian labour force in Australia each month from February 1978 to August 1995.

Write about 3–5 sentences describing the results of the decomposition. Pay particular attention to the scales of the graphs in making your interpretation.

The decomposition results reveal that the trend component has generally increased throughout most of the timeframe, with notable stationary periods observed in the early 1990s. Analyzing the seasonal component on a monthly basis highlights that certain months exhibit more pronounced fluctuations than others, indicating varying levels of production. The scales of the graphs emphasize these differences, illustrating how seasonal variations interact with the overall upward trend in production.

Is the recession of 1991/1992 visible in the estimated components?

Yes, the recession of 1991/1992 is evident in the estimated components, as there is a noticeable dip in employment during this period. This decline is distinct from seasonal variations and the underlying positive trend, highlighting the recession’s impact on employment levels.

10 This exercise uses the canadian_gas data (monthly Canadian gas production in billions of cubic metres, January 1960 – February 2005).

Plot the data using autoplot(), gg_subseries() and gg_season() to look at the effect of the changing seasonality over time.3

Compare the results with those obtained using SEATS and X-11. How are they different?

canadian_gas %>%
  autoplot(Volume)+
  labs(title = "Monthly Canadian Gas Production",
       subtitle = "autoplot()",
       y = "billions of cubic meter")+
  theme_replace()+
  geom_line(col = "#1B89D3")

canadian_gas %>%
  gg_subseries(Volume)+
  labs(title = "Monthly Canadian Gas Production",
       subtitle = "gg_subseries()",
       y = "billions of cubic meter")

canadian_gas %>%
  gg_season(Volume)+
  labs(title = "Monthly Canadian Gas Production",
       subtitle = "gg_season()",
       y = "billions of cubic meter")

The time plot of the Canadian gas data reveals a clear increasing trend alongside strong seasonality. This is further confirmed by both the subseries and seasonal plots. Generally, gas production tends to decrease in the summer and rise in the winter months. Notably, the seasonality intensified between 1975 and 1990, as evidenced by the larger differences in production levels between summer and winter, which are highlighted by the blue and green lines in the seasonal plot.

Do an STL decomposition of the data. You will need to choose a seasonal window to allow for the changing shape of the seasonal component.

canadian_gas %>%
  model(
    STL(Volume ~ trend(window = 21) +
                   season(window = 13),
    robust = TRUE)) %>%
  components() %>%
  autoplot()+
  labs(title = "STL decomposition of Canadian Gas Production")

The results of the STL decomposition reveal that the trend component effectively captures the overall trend in the original data. The seasonal component shows an increase from 1975 to 1985, followed by a subsequent decline, which aligns with observations from the time plot. Additionally, the remainder component hovers around zero, indicating that the model has accounted for most of the variability in the data.

How does the seasonal shape change over time? [Hint: Try plotting the seasonal component using gg_season().

The seasonal shape exhibits a notable change over time. Initially, it appears relatively flat, but as time progresses, the seasonal shape becomes more pronounced. In 1960, there is no evident trend-cycle, suggesting that gas production was relatively stable during that period. However, after 1975, a clear trend-cycle emerges, indicating a significant increase in gas production during that time and beyond.

Can you produce a plausible seasonally adjusted series?

canadian_gas %>%
 model(
    STL(Volume ~ trend(window = 21) +
                   season(window = 13),
    robust = TRUE)) %>%
  components() %>%
  ggplot(aes(x = Month)) +
  geom_line(aes(y = Volume, colour = "Data")) +
  geom_line(aes(y = season_adjust,
                colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, colour = "Trend")) +
  labs(title = "STL decomposition of Canadian Gas Production") +
  scale_colour_manual(
    values = c("gray", "#0072B2", "#D55E00"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )

Compare the results with those obtained using SEATS and X-11. How are they different?

canadian_gas %>%
  model(x11 = X_13ARIMA_SEATS(Volume ~ x11())) %>%
  components() %>%
  autoplot()+
  labs(title = "X-11 decomposition of Canadian Gas Production")

canadian_gas %>%
  model(seats = X_13ARIMA_SEATS(Volume ~ seats())) %>%
  components() %>%
autoplot() +
  labs(title ="SEATS Decomposition of Canadian Gas Production")

The results from the SEATS and X-11 decompositions reveal similar trends and seasonal components. However, the changes in seasonality differ when compared to the original data. The differences in the seasonally adjusted time series between the two methods are minimal.

Additionally, the remainder component from the SEATS decomposition is larger than that of the X-11 decomposition, with both remainder components being around one. In contrast, the remainder component from the STL decomposition is smaller. This suggests that the STL decomposition provides a better fit for the Canadian gas data.