Exercise 3.1

Consider the GDP information in global_economy

Plot the GDP per capita for each country over time.

# Per documentation for the dataset, GDP data covers 1960 to 2017 and is in US dollars
autoplot(drop_na(global_economy, c(GDP, Population)), GDP / Population) + 
  ylab('GDP per capita (USD)') +
  ggtitle('GDP per capita, 1960-2017') + 
  xlim(1960, 2020) + ylim(0, 200000) +
  guides(
    x = guide_axis(minor.ticks = TRUE)
  ) +
  theme_classic() +
  theme(
    axis.title = element_text(face = 'bold'),
    plot.title = element_text(face = 'bold'),    
    # There too many countries, so I omitted the legend
    legend.position = 'none'
  )

Which country has the highest GDP per capita?

Answer: Monaco had the highest GDP per capita in 2014.

global_economy %>%
  mutate(
    GDP_per_capita = GDP / Population
  ) %>%
  slice_max(GDP_per_capita) %>%
  select(Country, GDP, Population, GDP_per_capita)

## # A tsibble: 1 x 5 [1Y]
## # Key:       Country [1]
##   Country         GDP Population GDP_per_capita  Year
##   <fct>         <dbl>      <dbl>          <dbl> <dbl>
## 1 Monaco  7060236168.      38132        185153.  2014

How has this changed over time?

Answer: The GDP per capita in Monaco trended upward from 1970 to 2016.

# Find the years that Monaco has data to calculate GDP per capita 
start_year <- global_economy %>%
  filter(Country == 'Monaco') %>%
  drop_na(c(GDP, Population)) %>%  
  slice_min(Year) %>%
  pull(Year)

end_year <- global_economy %>%
  filter(Country == 'Monaco') %>%
  drop_na(c(GDP, Population)) %>%  
  slice_max(Year) %>%
  pull(Year)

sprintf('Data for Monaco GDP per capita is available from %d to %d', start_year, end_year)

## [1] "Data for Monaco GDP per capita is available from 1970 to 2016"

global_economy %>%
  filter(Country == 'Monaco') %>%
  drop_na(c(GDP, Population)) %>%
  autoplot(GDP / Population) +
    ylab('GDP per capita (USD)') +
    ggtitle('Monaco GDP per capita, 1970-2016') + 
    xlim(1970, 2020) + ylim(0, 200000) +
    guides(
      x = guide_axis(minor.ticks = TRUE)
    ) +
  theme_classic() +
  theme(
    axis.title = element_text(face = 'bold'),
    plot.title = element_text(face = 'bold')
  )

Exercise 3.2

For each of the following series, make a graph of the data. If transforming seems appropriate, do so and describe the effect.

United States GDP from global_economy

global_economy %>%
  filter(Country == 'United States') %>%
  autoplot(GDP) +
    ylab('GDP (USD)') +
    ggtitle('United States GDP, 1960-2017') + 
    xlim(1960, 2020) + 
    guides(
      x = guide_axis(minor.ticks = TRUE)
    ) +
  theme_classic() +
  theme(
    axis.title = element_text(face = 'bold'),
    plot.title = element_text(face = 'bold')
  )

GDP data are usually analyzed per capita (as in Exercise 3.1). This transformation did not have any visible effect on the overall upward trend, but it does strengthen the hypothesis that US GDP trended upward between 1960 and 2017 by removing population increase during this period as a potential confounder.

global_economy %>%
  filter(Country == 'United States') %>%
  autoplot(GDP / Population) +
    ylab('GDP per capita (USD)') +
    ggtitle('United States GDP per capita, 1960-2017') + 
    xlim(1960, 2020) +
    guides(
      x = guide_axis(minor.ticks = TRUE),
      y = guide_axis(minor.ticks = TRUE)
    ) +
  theme_classic() +
  theme(
    axis.title = element_text(face = 'bold'),
    plot.title = element_text(face = 'bold')
  )

Slaughter of Victorian “Bulls, bullocks and steers” in aus_livestock

The time series is plotted below. The count data does not need to be transformed.

aus_livestock %>%
  filter(Animal == 'Bulls, bullocks and steers', State == 'Victoria') %>%
  autoplot(Count) +
    ggtitle('Bulls, Bullocks, and Steers Slaughtered in Victoria, 1976-2018') +
    theme_classic() +
    theme(
      axis.title = element_text(face = 'bold'),
      plot.title = element_text(face = 'bold')
    )

Victorian Electricity Demand from vic_elec

vic_elec %>%
  autoplot(Demand) +
    scale_y_continuous(limits = c(2000, 10000), breaks = seq(2000, 10000, by = 2000)) +
    guides(
      y = guide_axis(minor.ticks = TRUE)
    ) +
    ylab('Total Demand (MWh)') +
    ggtitle('Electricity Demand in Victoria (Australia), 2012-2014') +
    theme_classic() +
    theme(
      axis.title = element_text(face = 'bold'),
      plot.title = element_text(face = 'bold')
    )

Electricity demand would be expected to be seasonal (and does appear to be from the plot above); however, the half hour time interval is very small. Aggregating the demand data over a larger time interval (eg, daily average) shows the seasonality more clearly. Specifically, demand is higher during summer and winter months (keeping in mind that seasons in Australia are the opposite of those in North America).

# Aggregate half-hour electricity demand data by week
vic_elec2 <- as_tibble(vic_elec) %>%
  summarise_by_time(
    .date_var = Time,
    .by = 'month',
    Demand_mean = mean(Demand)
  ) %>%
  rename('Date' = 'Time') %>%
  as_tsibble(index = Date)

# Plot the new time series
vic_elec2 %>%
  autoplot(Demand_mean) +
    ylim(4250, 5250) +
    scale_x_date(date_breaks = '4 months', date_labels = '%b-%y') +
    labs(
      x = 'Date',
      y = 'Mean Demand (MWh)'
    ) +
    ggtitle('Electricity Demand in Victoria (Australia), 2012-2014') +
    theme_classic() +
    theme(
      axis.title = element_text(face = 'bold'),
      plot.title = element_text(face = 'bold')
    )

Gas production from aus_production

The plot below shows that gas production in Australia trended upward from 1970 to 2010.

aus_production %>%
  autoplot(Gas) +
    labs(
      y = 'Production (petajoules)',
      title = 'Gas Production in Australia, 1956-2010',
      subtitle = 'Untransformed data'
    ) + 
    guides(
      x = guide_axis(minor.ticks = TRUE)
    ) +  
    theme_classic() +
    theme(
      axis.title = element_text(face = 'bold'),
      plot.title = element_text(face = 'bold')
    )

As was explained in the textbook (Hyndman Chapter 3), a Box-Cox transformation with \(\lambda \approx 0.11\) makes the size of the seasonal variation the same across the series. This shows a rapid upward trend in production from 1970 to 1980, followed by a slower increase thereafter.

# Determine lambda for Box-Cox transformation using Guerrero method
lambda <- aus_production %>%
  features(Gas, features = guerrero) %>%
  pull(lambda_guerrero)

lambda

## [1] 0.1095171

plot_subtitle = latex2exp::TeX(paste0('Box-Cox Transformed, $\\lambda$ = ', round(lambda, 3)))

aus_production %>%
  autoplot(box_cox(Gas, lambda)) +
    labs(
      title = 'Gas Production in Australia, 1956-2010',
      subtitle = plot_subtitle
    ) +
    theme_classic() +
    theme(
      axis.title.y = element_blank(),
      axis.title.x = element_text(face = 'bold'),
      plot.title = element_text(face = 'bold')
    )

Exercise 3.3

canadian_gas %>%
  autoplot(Volume) +
    labs(
      y = 'Volume (billion cubic meters)',
      title = 'Gas Production in Canada, 1960-2005',
      subtitle = 'Untransformed data'
    ) +   
    theme_classic() +
    theme(
      axis.title = element_text(face = 'bold'),
      plot.title = element_text(face = 'bold')
    )

After Box-Cox transformation, the data appear unchanged. I’m not sure why, but comparison of the un-transformed Canadian gas production data with the un-transformed Australian gas production data (Exercise 3.2), which could be successfully transformed using Box-Cox, suggests that their data distributions differ. In particular, the disruption in the upward trend between approximately 1973 to 1990 in the Canadian dataset may prevent a Box-Cox transformation from stabilizing the variance.

# Determine lambda for Box-Cox transformation using Guerrero method
lambda <- canadian_gas %>%
  features(Volume, features = guerrero) %>%
  pull(lambda_guerrero)

plot_subtitle = latex2exp::TeX(paste0('Box-Cox transformed, $\\lambda$ = ', 
                                      round(lambda, 3)))

canadian_gas %>%
  autoplot(box_cox(Volume, lambda)) +
    labs(
      title = 'Gas Production in Canada, 1960-2005',
      subtitle = plot_subtitle
    ) +  
    theme_classic() +
    theme(
      axis.title.y = element_blank(),
      axis.title.x = element_text(face = 'bold'),
      plot.title = element_text(face = 'bold')
    )

Exercise 3.4

What Box-Cox transformation would you select for the retail data from Exercise 7 in Section 2.10?

Answer: \(\lambda = 0.22\)

Since Section 2.10 Exercise 7 wasn’t assigned in Homework 1, I first examined the time series:

set.seed(144)

retail_ts <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID` , 1))

The autoplot shows that the Australian retail trade turnover trended upward from 1982 to 2018. However, the variance increases with the level of the series.

The gg_season, gg_subseries, and gg_lag plots all show seasonality (monthly period). The lag plot also shows strong autocorrelation since the data are concentrated on the diagonal.

The ACF plot shows higher autocorrelations for small lags than large lags, which confirms the presence of trend. However, the autocorrelations for seasonal lags are not larger than other lags, which suggests the absence of seasonality, in contrast to the other plots. I’m not sure how to reconcile this; however, it may reflect the non-constant variance.

None of the plots show evidence of cyclic behavior.

autoplot(retail_ts, Turnover)

ggtime::gg_season(retail_ts, Turnover)

ggtime::gg_subseries(retail_ts, Turnover)

ggtime::gg_lag(retail_ts, Turnover)

ACF(retail_ts, Turnover) %>%
  autoplot()

After Box-Cox transformation with \(\lambda = 0.22\), the retail turnover time series has a similar upward trend as the un-transformed time series, but the variance is stabilized.

# Determine lambda for Box-Cox transformation using Guerrero method
lambda <- retail_ts %>%
  features(Turnover, features = guerrero) %>%
  pull(lambda_guerrero)

lambda

## [1] 0.2229869

plot_subtitle = latex2exp::TeX(paste0('Box-Cox transformed, $\\lambda$ = ', 
                                      round(lambda, 3)))

retail_ts %>%
  autoplot(box_cox(Turnover, lambda)) +
    labs(
      title = 'Retail Trade Turnover in Australia, 1982-2018',
      subtitle = plot_subtitle
    ) +
    theme_classic() +
    theme(
      axis.title.y = element_blank(),
      axis.title.x = element_text(face = 'bold'),
      plot.title = element_text(face = 'bold')
    )

Exercise 3.5

For the following series, find an appropriate Box-Cox transformation in order to stabilize the variance.

Tobacco from aus_production

The variance in the un-transformed tobacco time series does not appear to be as pronounced as previous exercises (eg, Exercise 3.4). This is consistent with the Guerrero method, which results in \(\lambda = 0.93\) for Box-Cox transformation. This value is close to 1, meaning that the distribution of the un-transformed data was already close to normal and minimal (or no) transformation is needed.

# Examine the time series (un-transformed)
aus_production %>%
  drop_na(Tobacco) %>%
  autoplot(Tobacco) +
    ylim(4000, 9000) +
    labs(
      y = 'Production (tonnes)',
      title = 'Production of Tobacco and Cigarettes in Australia, 1956-2010',
      subtitle = 'Un-transformed data'
    ) +
    theme_classic() +
    theme(
      axis.title = element_text(face = 'bold'),
      plot.title = element_text(face = 'bold')
    )

# Determine lambda for Box-Cox transformation using Guerrero method
lambda <- aus_production %>%
  features(Tobacco, features = guerrero) %>%
  pull(lambda_guerrero)

lambda

## [1] 0.9264636

# Plot the transformed time series
plot_subtitle = latex2exp::TeX(paste0('Box-Cox transformed, $\\lambda$ = ', 
                                      round(lambda, 3)))

aus_production %>%
  drop_na(Tobacco) %>%  
  autoplot(box_cox(Tobacco, lambda)) +
    ylim(2000, 5000) +
    labs(
      title = 'Production of Tobacco and Cigarettes in Australia, 1956-2010',
      subtitle = plot_subtitle
    ) +
    theme_classic() +
    theme(
      axis.title.y = element_blank(),
      axis.title.x = element_text(face = 'bold'),
      plot.title = element_text(face = 'bold')
    )

Economy class passengers between Melbourne and Sydney from ansett

The time series plot below shows unequal variances as well as some zero values.

Because the logarithmic and power functions used in Box-Cox transformations are only defined for positive values, I additionally transformed the time series by increasing all passenger counts by 1 before applying Box-Cox transformation. In subsequent forecasts (not done in this exercise), this adjustment would need to be reversed by subtracting 1 from predicted values.

# Examine the time series (un-transformed)
ansett %>%
  filter(Class == 'Economy', Airports == 'MEL-SYD') %>%
  autoplot(Passengers) +
    ylim(0, 40000) +
    labs(
      y = 'Passengers',
      title = 'Passengers Traveling With Ansett, 1987-1992',
      subtitle = 'Un-transformed data'
    ) +
    guides(
      y = guide_axis(minor.ticks = TRUE)
    ) +
    theme_classic() +
    theme(
      axis.title = element_text(face = 'bold'),
      plot.title = element_text(face = 'bold')
    )

The Guerrero method results in \(\lambda = 2\), which helps stabilize the variance.

# Adjust passenger counts to ensure they are positive
c <- 1
ansett_shifted <- ansett %>%
  filter(Class == 'Economy', Airports == 'MEL-SYD') %>%
  mutate(
    Passengers = Passengers + 1
  )

# Determine lambda for Box-Cox transformation using Guerrero method
lambda <- ansett_shifted %>%
  features(Passengers, features = guerrero) %>%
  pull(lambda_guerrero)

lambda

## [1] 1.999927

# Plot the transformed time series
plot_subtitle = latex2exp::TeX(
  paste0('Count + 1 and Box-Cox transformation ($\\lambda$ = ', round(lambda, 3), ')'))

ansett_shifted %>%
  autoplot(box_cox(Passengers, lambda)) +
    ylim(0, 5e+08) +
    labs(
      title = 'Passengers Traveling With Ansett, 1987-1992',
      subtitle = plot_subtitle
    ) +
    theme_classic() +
    theme(
      axis.title.y = element_blank(),
      axis.title.x = element_text(face = 'bold'),
      plot.title = element_text(face = 'bold')
    )

Pedestrian counts at Southern Cross Station from pedestrian

The raw data are very noisy due to the short time interval (1 hour). This makes it difficult to discern patterns, so I aggregated the data by weekly average.

# Examine the time series (un-transformed)
pedestrian %>%
  filter(Sensor == 'Southern Cross Station') %>%
  autoplot(Count) +
    ylim(0, 4000) +
    labs(
      y = 'Pedestrians',
      title = 'Pedestrians per Hour at Southern Cross Station, 2015-2016',
      subtitle = 'Un-transformed data'
    ) +
    guides(
      y = guide_axis(minor.ticks = TRUE)
    ) +
    theme_classic() +
    theme(
      axis.title = element_text(face = 'bold'),
      plot.title = element_text(face = 'bold')
    )

The plot of weekly averages shows unequal variances with a large dip around New Year’s Day in 2016. There appears to be similar decreases at the far left (2015) and right (2017) of the plot, but data are lacking prior to the beginning and after the end of the time series.

# Aggregate hourly pedestrian count data by week
pedestrian2 <- as_tibble(pedestrian) %>%
  filter(Sensor == 'Southern Cross Station') %>%  
  summarise_by_time(
    .date_var = Date_Time,
    .by = 'week',
    Count_mean = mean(Count)
  ) %>%
  rename('Date' = 'Date_Time') %>%
  as_tsibble(index = Date)

# Plot the aggregated time series
pedestrian2 %>%
  autoplot(Count_mean) +
    ylim(0, 800) +
    labs(
      x = 'Date',
      y = 'Pedestrians',
      title = 'Mean Pedestrians per Week at Southern Cross Station, 2015-2016',
      subtitle = 'Un-transformed data'      
    ) +
    guides(
      y = guide_axis(minor.ticks = TRUE)
    ) +  
    theme_classic() +
    theme(
      axis.title = element_text(face = 'bold'),
      plot.title = element_text(face = 'bold')
    )

However, aggregation may “hide” some values, so I checked for the presence of zero values in the raw data.

sprintf('There are %d abservations with zero counts', sum(pedestrian$Count == 0))

## [1] "There are 190 abservations with zero counts"

This showed that the raw count data need adjustment prior to Box-Cox transformation.

# Adjust pedestrian counts to ensure they are positive
c <- 1
pedestrian_shifted <- pedestrian %>%
  filter(Sensor == 'Southern Cross Station') %>%
  mutate(
    Count = Count + 1
  )

Using the Guerrero method results in \(\lambda = -0.25\), which helps stabilize the variance.

# Determine lambda for Box-Cox transformation using Guerrero method
# Note this is performed with the raw (non-aggregated) data
lambda <- pedestrian_shifted %>%
  features(Count, features = guerrero) %>%
  pull(lambda_guerrero)

lambda

## [1] -0.2566589

# Apply Box-Cox transformation and aggregate transformed count data by week
pedestrian3 <- as_tibble(pedestrian_shifted) %>%
  mutate(
    Count_transformed = box_cox(Count, lambda)
  ) %>%
  summarise_by_time(
    .date_var = Date_Time,
    .by = 'week',
    Count_transformed_mean = mean(Count_transformed)
  ) %>%
  rename('Date' = 'Date_Time') %>%
  as_tsibble(index = Date)

# Plot the transformed time series
plot_subtitle = latex2exp::TeX(paste0('Count + 1 and Box-Cox transformation ($\\lambda$ = ',
                                      round(lambda, 3), ')'))

pedestrian3 %>%
  autoplot(Count_transformed_mean) +
    labs(
      title = 'Mean Pedestrians per Week at Southern Cross Station, 2015-2016',
      subtitle = plot_subtitle
    ) +
    theme_classic() +
    theme(
      axis.title.y = element_blank(),
      axis.title.x = element_text(face = 'bold'),
      plot.title = element_text(face = 'bold')
    )

Exercise 3.7

Consider the last five years of the Gas data from aus_production

gas <- tail(aus_production, 5*4) |> select(Gas)

a. Plot the time series. Can you identify seasonal fluctuations and/or a trend-cycle?

The plot below shows that gas production in Australia trended upward from 2005 to 2010. Production has seasonal fluctuations, with increases in Q2/Q3 and decreases in Q1 of each year. With only five years of data in the subset, I don’t think there is sufficient data to assess cyclical patterns.

gas %>%
  autoplot(Gas) +
    ylim(150, 275) +
    ylab('Production (petajoules)') +
    ggtitle('Gas Production in Australia, 2005-2010') +
    theme_classic() +
    theme(
      axis.title = element_text(face = 'bold'),
      plot.title = element_text(face = 'bold')
    )

b. Use classical_decomposition with type='multiplicative' to calculate the trend-cycle and seasonal indices.

The decomposition plots are shown below.

gas_decomp_components <- gas %>%
  model(classical_decomposition(Gas, type = 'multiplicative')) %>%
  components()

autoplot(gas_decomp_components) +
  ggtitle('Classical Decomposition of Gas Production in Australia, 2005-2010') +
  theme(
    axis.title = element_text(face = 'bold'),
    plot.title = element_text(face = 'bold')
  )

The seasonal and trend indices are shown below.

Consistent with the multiplicative decomposition, the mean seasonality is 1.

# Seasonal indices
gas_decomp_components$seasonal

##  [1] 1.1256812 0.9250656 0.8752824 1.0739708 1.1256812 0.9250656 0.8752824
##  [8] 1.0739708 1.1256812 0.9250656 0.8752824 1.0739708 1.1256812 0.9250656
## [15] 0.8752824 1.0739708 1.1256812 0.9250656 0.8752824 1.0739708

mean(gas_decomp_components$seasonal)

## [1] 1

# Trend indices
# Note that these are calculated by moving average smoothing, so there are NAs at beginning and end of the series
gas_decomp_components$trend

##  [1]      NA      NA 200.500 203.500 207.000 210.250 213.000 216.125 218.625
## [10] 218.875 218.750 219.000 219.000 220.375 221.875 223.125 225.125 226.000
## [19]      NA      NA

c. Do the results support the graphical interpretation from part a?

Answer: Yes, the trend plot shows an upward trend, and the seasonal plot shows seasonal fluctuations (eg, lowest production in Q1 of each year).

d. Compute and plot the seasonally adjusted data.

The seasonally-adjusted data are already computed by the classical decomposition, so they only need to be plotted.

ggplot(gas_decomp_components, aes(x = Quarter, y = season_adjust)) +
  geom_line() +
  ylim(190, 240) +
  ylab('Production (petajoules)') +
  ggtitle('Seasonally-Adjusted Gas Production in Australia, 2005-2010') +
  theme_classic() +
  theme(
    axis.title = element_text(face = 'bold'),
    plot.title = element_text(face = 'bold')
  )

e. Change one observation to be an outlier (e.g., add 300 to one observation), and recompute the seasonally adjusted data. What is the effect of the outlier?

Answer: Creating an outlier in the raw gas production data also results in an outlier in the seasonally-adjusted data. The remaining seasonally-adjusted production values are similar to the time series without the outlier.

# Change the gas production for 2010 Q1 (row 19) to 300
# The tsibble has 20 rows, so this outlier is near the end of the time series
gas2 <- as_tibble(gas)
gas2[19, 'Gas'] <- 300

# Apply classical decomposiiton to modified data
gas_decomp_components2 <- as_tsibble(gas2, index = Quarter) %>%
  model(classical_decomposition(Gas, type = 'multiplicative')) %>%
  components()

# Create new tsibble with seasonally-adjusted data with vs without outlier to compare plots
gas_outlier_comparison <- tsibble(
  Quarter = gas_decomp_components$Quarter,
  No_Outlier = gas_decomp_components$season_adjust,
  With_Outlier = gas_decomp_components2$season_adjust,
  index = Quarter
)

# Reshape to long format for plot
gas_outlier_comparison_long <- gas_outlier_comparison %>%
  pivot_longer(
    names_to = 'outlier_status', values_to = 'season_adjust', cols = -Quarter
  )

# Plot the seasonally-adjusted data
ggplot(gas_outlier_comparison_long, 
       aes(x = Quarter, y = season_adjust, color = outlier_status)) +
  geom_line(linewidth = 1) +
  scale_color_manual(values = c('No_Outlier' = '#40B0A6', 'With_Outlier' ='#E1BE6A')) +
  ylim(180, 350) +
  labs(
    y = 'Production (petajoules)',
    title = 'Seasonally-Adjusted Gas Production in Australia, 2005-2010',
    color = 'Outlier'
  ) +
  theme_classic() +
  theme(
    axis.title = element_text(face = 'bold'),
    plot.title = element_text(face = 'bold')
  )

f. Does it make any difference if the outlier is near the end rather than in the middle of the time series?

Answer: Yes. Similar to the effect of creating an outlier at the end of the time series (part e), creating an outlier near the middle of the time series also results in an outlier in the seasonally-adjusted data. However, the remaining seasonally-adjusted data differ more from the original time series (with no outlier). Having an outlier in the middle of a time series would potentially affect more moving average windows than an outlier at the end of the series.

# Change the gas production for 2007 Q4 (row 10) to 300
# The tsibble has 20 rows, so this outlier is in the middle of the time series
gas3 <- as_tibble(gas)
gas3[10, 'Gas'] <- 300

# Apply classical decomposiiton to modified data
gas_decomp_components3 <- as_tsibble(gas3, index = Quarter) %>%
  model(classical_decomposition(Gas, type = 'multiplicative')) %>%
  components()

# Create new tsibble with seasonally-adjusted data with vs without outlier to compare plots
gas_outlier_comparison2 <- tsibble(
  Quarter = gas_decomp_components$Quarter,
  No_Outlier = gas_decomp_components$season_adjust,
  With_Outlier = gas_decomp_components3$season_adjust,
  index = Quarter
)

# Reshape to long format for plot
gas_outlier_comparison_long2 <- gas_outlier_comparison2 %>%
  pivot_longer(
    names_to = 'outlier_status', values_to = 'season_adjust', cols = -Quarter
  )

# Plot the seasonally-adjusted data
ggplot(gas_outlier_comparison_long2, 
       aes(x = Quarter, y = season_adjust, color = outlier_status)) +
  geom_line(linewidth = 1) +
  scale_color_manual(values = c('No_Outlier' = '#40B0A6', 'With_Outlier' ='#E1BE6A')) +
  ylim(180, 300) +
  labs(
    y = 'Production (petajoules)',
    title = 'Seasonally-Adjusted Gas Production in Australia, 2005-2010',
    color = 'Outlier'
  ) +
  theme_classic() +
  theme(
    axis.title = element_text(face = 'bold'),
    plot.title = element_text(face = 'bold')
  )

Exercise 3.8

Recall the retail time series data from Exercise 7 in Section 2.10. Decompose the series using X -11. Does it reveal any outliers, or unusual features that you had not noticed previously?

Answer: I didn’t notice any new/unusual features compared with previous plots of this time series (Exercise 3.4). However, the X-11 seasonal component plot does make it a little easier to see the non-constant variance across the series.

retail_x11_decomp_components <- retail_ts %>%
  model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
  components()

autoplot(retail_x11_decomp_components) +
  ggtitle('X-11 Decomposition of Retail Trade Turnover in Australia, 1982-2018') +
  theme(
    axis.title = element_text(face = 'bold'),
    plot.title = element_text(face = 'bold')
  )

Exercise 3.9

Figure 3.19

Figure 3.20

Figures 3.19 and 3.20 show the result of decomposing the number of persons in the civilian labour force in Australia each month from February 1978 to August 1995.

a. Write 3–5 sentences describing the results of the decomposition. Pay particular attention to the scales of the graphs in making your interpretation.

b. Is the recession of 1991/1992 visible in the estimated components?

Answer: The trend component plot shows that civilian labor force in Australia trended upward from 1978 to 1995. The season-year component plot shows periodic fluctuations in the size of the labor force across the series, which is consistent with the presence of seasonality; however, the magnitude appears to be relatively small. This can also be seen in the subseries plot, which does not show a strong seasonality.

The remainder component plot is not completely random. Of note, there is a notable dip around 1991-1992, which suggest that the labor force size during this time is not explained by the trend or seasonality components in the time series data. This is consistent with economic shock or other external factors leading to a recession during this period.¹

Session Details

sessionInfo()

## R version 4.5.2 (2025-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Tahoe 26.2
## 
## Matrix products: default
## BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] latex2exp_0.9.8   timetk_2.9.1      seasonal_1.10.0   ggtime_0.1.0     
##  [5] fable_0.5.0       feasts_0.4.2      fabletools_0.5.1  fpp3_1.0.2       
##  [9] tsibbledata_0.4.1 tsibble_1.1.6     lubridate_1.9.4   forcats_1.0.1    
## [13] stringr_1.6.0     dplyr_1.1.4       purrr_1.2.1       readr_2.1.6      
## [17] tidyr_1.3.2       tibble_3.3.1      ggplot2_4.0.1     tidyverse_2.0.0  
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1     viridisLite_0.4.2    timeDate_4052.112   
##  [4] farver_2.1.2         S7_0.2.1             fastmap_1.2.0       
##  [7] digest_0.6.39        rpart_4.1.24         timechange_0.3.0    
## [10] lifecycle_1.0.5      ellipsis_0.3.2       survival_3.8-6      
## [13] magrittr_2.0.4       compiler_4.5.2       rlang_1.1.7         
## [16] sass_0.4.10          tools_4.5.2          utf8_1.2.6          
## [19] yaml_2.3.12          data.table_1.18.2.1  knitr_1.51          
## [22] labeling_0.4.3       RColorBrewer_1.1-3   withr_3.0.2         
## [25] nnet_7.3-20          grid_4.5.2           xts_0.14.1          
## [28] future_1.69.0        progressr_0.18.0     globals_0.18.0      
## [31] scales_1.4.0         MASS_7.3-65          cli_3.6.5           
## [34] anytime_0.3.12       rmarkdown_2.30       crayon_1.5.3        
## [37] generics_0.1.4       otel_0.2.0           rstudioapi_0.18.0   
## [40] future.apply_1.20.1  tzdb_0.5.0           cachem_1.1.0        
## [43] splines_4.5.2        parallel_4.5.2       vctrs_0.7.1         
## [46] hardhat_1.4.2        Matrix_1.7-4         jsonlite_2.0.0      
## [49] hms_1.1.4            listenv_0.10.0       gower_1.0.2         
## [52] jquerylib_0.1.4      recipes_1.3.1        glue_1.8.0          
## [55] parallelly_1.46.1    codetools_0.2-20     distributional_0.6.0
## [58] x13binary_1.1.61.1   rsample_1.3.2        stringi_1.8.7       
## [61] gtable_0.3.6         pillar_1.11.1        furrr_0.3.1         
## [64] rappdirs_0.3.4       htmltools_0.5.9      ipred_0.9-15        
## [67] lava_1.8.2           R6_2.6.1             evaluate_1.0.5      
## [70] lattice_0.22-7       bslib_0.10.0         class_7.3-23        
## [73] Rcpp_1.1.1           prodlim_2025.04.28   xfun_0.56           
## [76] zoo_1.8-15           pkgconfig_2.0.3

Kevin L. Kliesen, “Uncertainty Shocks Can Trigger Recessionary Conditions,” St. Louis Fed On the Economy, April 7, 2025.↩︎

DATA624 Homework 2

Alexander Simon

2026-02-14