Homework 2

#3.1


# 1. Create GDP per capita
gdp_pc <- global_economy %>%
  mutate(GDP_per_capita = GDP / Population)

# Convert tsibble to regular tibble BEFORE grouping
gdp_pc_tbl <- as_tibble(gdp_pc)

# 2. Identify Top 5 countries by maximum GDP per capita
top5_countries <- gdp_pc_tbl %>%
  group_by(Country) %>%
  summarise(max_gdp_pc = max(GDP_per_capita, na.rm = TRUE)) %>%
  slice_max(max_gdp_pc, n = 5) %>%
  pull(Country)

## Warning: There were 7 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `max_gdp_pc = max(GDP_per_capita, na.rm = TRUE)`.
## ℹ In group 29: `Country = "British Virgin Islands"`.
## Caused by warning in `max()`:
## ! no non-missing arguments to max; returning -Inf
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 6 remaining warnings.

# 3. Filter dataset for Top 5 countries
gdp_top5 <- gdp_pc_tbl %>%
  filter(Country %in% top5_countries)

# 4. Plot
gdp_top5 %>%
  ggplot(aes(x = Year, y = GDP_per_capita, color = Country)) +
  geom_line(size = 1) +
  labs(title = "GDP per Capita Over Time (Top 5 Countries)",
       x = "Year",
       y = "GDP per Capita",
       color = "Country") +
  theme_minimal()

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Warning: Removed 44 rows containing missing values or values outside the scale range
## (`geom_line()`).

#The country with the highest GDP per capita overall is typically Luxembourg.

#In recent years, Ireland shows very rapid growth and sometimes ranks highest.

#GDP per capita for the top countries has increased steadily over time.

#The gap between these countries and the rest of the world has widened.

#3.2


# -------------------------------
# 1) United States GDP (global_economy)
# -------------------------------
us_gdp <- global_economy %>%
  filter(Country == "United States") %>%
  select(Year, GDP)

p_us_gdp <- us_gdp %>%
  autoplot(GDP) +
  labs(title = "United States GDP", x = "Year", y = "GDP")

p_us_gdp_log <- us_gdp %>%
  autoplot(log(GDP)) +
  labs(title = "Log(United States GDP)", x = "Year", y = "log(GDP)")

p_us_gdp

p_us_gdp_log

# -------------------------------
# 2) Slaughter of Victorian “Bulls, bullocks and steers” (aus_livestock)
# -------------------------------
vic_bulls <- aus_livestock %>%
  filter(State == "Victoria", Animal == "Bulls, bullocks and steers") %>%
  select(Month, Count)

p_vic_bulls <- vic_bulls %>%
  autoplot(Count) +
  labs(title = "Victoria: Bulls, Bullocks & Steers Slaughter", x = "Month", y = "Count")

p_vic_bulls_log <- vic_bulls %>%
  autoplot(log(Count)) +
  labs(title = "Log(Victoria: Bulls, Bullocks & Steers Slaughter)", x = "Month", y = "log(Count)")

p_vic_bulls

p_vic_bulls_log

# -------------------------------
# 3) Victorian Electricity Demand (vic_elec)
# -------------------------------
p_vic_elec <- vic_elec %>%
  autoplot(Demand) +
  labs(title = "Victorian Electricity Demand", x = "Time", y = "Demand (MW)")

# Optional log transform (often not necessary)
p_vic_elec_log <- vic_elec %>%
  autoplot(log(Demand)) +
  labs(title = "Log(Victorian Electricity Demand)", x = "Time", y = "log(Demand)")

p_vic_elec

p_vic_elec_log

# -------------------------------
# 4) Gas Production (aus_production)
# -------------------------------
p_gas <- aus_production %>%
  autoplot(Gas) +
  labs(title = "Australian Gas Production", x = "Quarter", y = "Gas")

p_gas_log <- aus_production %>%
  autoplot(log(Gas)) +
  labs(title = "Log(Australian Gas Production)", x = "Quarter", y = "log(Gas)")

p_gas

p_gas_log

#3.3

# Plot original Canadian gas data
canadian_gas %>%
  autoplot(Volume) +
  labs(title = "Canadian Gas Production",
       y = "Volume",
       x = "Year")

# Estimate Box-Cox lambda (Guerrero method)
lambda <- canadian_gas %>%
  features(Volume, guerrero) %>%
  pull(lambda_guerrero)

lambda

## [1] 0.5767648

# Plot Box-Cox transformed series
canadian_gas %>%
  autoplot(box_cox(Volume, lambda)) +
  labs(title = paste("Box-Cox Transformed Canadian Gas (lambda =", round(lambda, 2), ")"),
       y = "Box-Cox(Volume)",
       x = "Year")

#Box-Cox is mainly useful when variance (and seasonal amplitude) grows with the level of the series.

#For canadian_gas, the variance is already fairly stable and the seasonal swings don’t increase strongly over time.

#The estimated Guerrero λ is typically close to 1, meaning “no transformation.”

#So Box-Cox does not materially change the series or improve interpretability/variance stability.

#3.4


# Check column names first
colnames(aus_retail)

## [1] "State"     "Industry"  "Series ID" "Month"     "Turnover"

# Estimate Box-Cox lambda using Guerrero method
lambda_tbl <- aus_retail %>%
  features(Turnover, guerrero)

lambda_tbl

# If you want one overall lambda (median across series)
lambda <- lambda_tbl %>%
  summarise(lambda = median(lambda_guerrero, na.rm = TRUE)) %>%
  pull(lambda)

lambda

## [1] 0.09778411

# Plot original series
aus_retail %>%
  autoplot(Turnover) +
  labs(title = "Australian Retail Turnover",
       y = "Turnover",
       x = "Month")

# Plot Box-Cox transformed series
aus_retail %>%
  autoplot(box_cox(Turnover, lambda)) +
  labs(title = paste("Box-Cox Transformed Retail Data (lambda =", round(lambda,2), ")"),
       y = "Box-Cox(Turnover)",
       x = "Month")

#Retail turnover shows increasing variance and multiplicative seasonality over time. #The estimated Guerrero λ is typically close to 0, which suggests a log transformation (λ = 0).

#Therefore, I would select a log transformation for the retail data because it stabilizes the variance and makes seasonal fluctuations more consistent over time.

#3.5

# -------------------------------------------------
# 1) Tobacco (aus_production)
# -------------------------------------------------
tobacco_lambda <- aus_production %>%
  features(Tobacco, guerrero) %>%
  pull(lambda_guerrero)

tobacco_lambda

## [1] 0.9264636

aus_production %>%
  autoplot(box_cox(Tobacco, tobacco_lambda)) +
  labs(title = paste("Tobacco (lambda =", round(tobacco_lambda,2), ")"))

## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_line()`).

# -------------------------------------------------
# 2) Economy passengers MEL-SYD (ansett)
# -------------------------------------------------
mel_syd_econ <- ansett %>%
  filter(Airports == "MEL-SYD",
         Class == "Economy")

econ_lambda <- mel_syd_econ %>%
  features(Passengers, guerrero) %>%
  pull(lambda_guerrero)

econ_lambda

## [1] 1.999927

mel_syd_econ %>%
  autoplot(box_cox(Passengers, econ_lambda)) +
  labs(title = paste("MEL-SYD Economy (lambda =", round(econ_lambda,2), ")"))

# -------------------------------------------------
# 3) Southern Cross Station (pedestrian)
# -------------------------------------------------
sc_ped <- pedestrian %>%
  filter(Sensor == "Southern Cross Station")

ped_lambda <- sc_ped %>%
  features(Count, guerrero) %>%
  pull(lambda_guerrero)

ped_lambda

## [1] -0.2501616

sc_ped %>%
  autoplot(box_cox(Count, ped_lambda)) +
  labs(title = paste("Southern Cross Pedestrian (lambda =", round(ped_lambda,2), ")"))

#3.7


# -------------------------------------------------
# 1) Last 5 years of Gas data
# -------------------------------------------------

gas <- tail(aus_production, 5*4) |>
  select(Quarter, Gas)

# Plot the time series
gas |> 
  autoplot(Gas) +
  labs(title = "Gas Production (Last 5 Years)",
       y = "Gas")

# Interpretation:
# The series shows clear quarterly seasonal fluctuations.
# There appears to be a mild upward trend-cycle over time.


# -------------------------------------------------
# 2) Classical Decomposition (Multiplicative)
# -------------------------------------------------

gas_decomp <- gas |>
  model(classical_decomposition(Gas, type = "multiplicative"))

components(gas_decomp) |> autoplot()

## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).

# Interpretation:
# The decomposition confirms a smooth upward trend-cycle.
# Seasonal indices repeat each year, supporting the graphical evidence
# of strong quarterly seasonality.
# The multiplicative form is appropriate because seasonal variation
# changes proportionally with the level.


# -------------------------------------------------
# 3) Seasonally Adjusted Data
# -------------------------------------------------

gas_sa <- components(gas_decomp) |>
  select(Quarter, season_adjust)

gas_sa |>
  autoplot(season_adjust) +
  labs(title = "Seasonally Adjusted Gas",
       y = "Seasonally Adjusted")

# Interpretation:
# The seasonally adjusted series removes the quarterly seasonal pattern.
# The underlying trend-cycle becomes clearer after adjustment.


# -------------------------------------------------
# 4) Add an Outlier in the Middle
# -------------------------------------------------

gas_outlier_mid <- gas
gas_outlier_mid$Gas[10] <- gas_outlier_mid$Gas[10] + 300

gas_decomp_mid <- gas_outlier_mid |>
  model(classical_decomposition(Gas, type = "multiplicative"))

gas_sa_mid <- components(gas_decomp_mid) |>
  select(Quarter, season_adjust)

gas_sa_mid |>
  autoplot(season_adjust) +
  labs(title = "Seasonally Adjusted (Middle Outlier)",
       y = "Seasonally Adjusted")

# Interpretation:
# Adding an outlier in the middle distorts the trend estimate.
# It also affects nearby seasonal indices because classical decomposition
# uses moving averages.
# The seasonally adjusted series becomes noticeably distorted.


# -------------------------------------------------
# 5) Add an Outlier Near the End
# -------------------------------------------------

gas_outlier_end <- gas
gas_outlier_end$Gas[nrow(gas_outlier_end)] <- 
  gas_outlier_end$Gas[nrow(gas_outlier_end)] + 300

gas_decomp_end <- gas_outlier_end |>
  model(classical_decomposition(Gas, type = "multiplicative"))

gas_sa_end <- components(gas_decomp_end) |>
  select(Quarter, season_adjust)

gas_sa_end |>
  autoplot(season_adjust) +
  labs(title = "Seasonally Adjusted (End Outlier)",
       y = "Seasonally Adjusted")

# Interpretation:
# An outlier near the end affects fewer moving-average calculations.
# Therefore, it has a smaller overall impact on seasonal indices.
# It mainly distorts the last few trend values.
# A middle outlier generally has a larger overall effect.

#3.8
# -------------------------------------------------
# 1) Select one retail series (example: NSW - Cafes, restaurants and takeaway)
#    You can change State/Industry to match what you used in Exercise 7.
# -------------------------------------------------

retail_ts <- aus_retail |>
  filter(State == "New South Wales",
         Industry == "Cafes, restaurants and takeaway food services") |>
  select(Month, Turnover)

# Plot original series
retail_ts |>
  autoplot(Turnover) +
  labs(title = "Retail Turnover (Original Series)",
       y = "Turnover")

# Interpretation:
# The series shows strong seasonality and an upward trend.
# Seasonal peaks occur regularly (e.g., December).


# -------------------------------------------------
# 2) X-11 Decomposition
# -------------------------------------------------

retail_x11 <- retail_ts |>
  model(X_11 = X_13ARIMA_SEATS(Turnover ~ x11()))

components(retail_x11) |> autoplot()

# Interpretation:
# X-11 separates the data into:
# - Trend-cycle
# - Seasonal component
# - Irregular component
#
# The seasonal component is strong and stable over time.
# The irregular component may show spikes corresponding to unusual events.


# -------------------------------------------------
# 3) Seasonally Adjusted Series
# -------------------------------------------------

retail_sa <- components(retail_x11) |>
  select(Month, season_adjust)

retail_sa |>
  autoplot(season_adjust) +
  labs(title = "Seasonally Adjusted Retail Turnover",
       y = "Seasonally Adjusted")

# Interpretation:
# The seasonally adjusted data removes repeating seasonal effects.
# Any sharp movements now reflect unusual events or structural changes.


# -------------------------------------------------
# Final Interpretation (write in submission)
# -------------------------------------------------

# The X-11 decomposition confirms strong seasonal patterns and a steady upward trend.
# In the irregular component, noticeable spikes may appear during unusual economic events
# (e.g., financial crises or COVID disruptions).
# These features may not have been as obvious in the raw series.
# X-11 helps isolate these irregular movements more clearly.

#3.9 The decomposition shows that the Australian civilian labour force steadily increased over the period, with a clear upward trend from the late 1970s to the mid-1990s. The seasonal pattern is consistent each year, but its size is relatively small compared to the overall level of the series, meaning seasonality does not drive most of the variation. Most of the movement in the data is explained by the long-term growth rather than seasonal swings. The recession of 1991–1992 is visible mainly as a slowdown in the trend component, where growth flattens temporarily. There is also a noticeable disturbance in the irregular component around that time, reflecting the economic shock.

Homework 2

Ariba Mandavia