library(fpp3)
library(dplyr)
library(ggplot2)
library(seasonal)

data("global_economy")

Question 3.7.1

For this I am only going to consider the GDP per capita history of the top 20 countries that had the highest GDP per capita in the last year of the dataset

latest_year <- max(global_economy$Year)
top_countries <- global_economy %>%
  filter(Year == latest_year) %>%
  arrange(desc(GDP / Population)) %>%
  slice_head(n = 20) %>%
  pull(Country)

global_economy |> 
  mutate(gdp_per_capita = GDP / Population) |> 
  filter(Country %in% top_countries) |> 
  autoplot(gdp_per_capita) +
  labs(title = "GDP per Capita Over Time by Top 20 Countries", 
       x = "Year", 
       y = "GDP per Capita (USD)") +
  theme_minimal() +
  theme(legend.position = "bottom")

Just plotting the top 20 for clarity sake and not accounting for an inflation transformation, luxemborg has the highest GDP per capita , and from the 1990s it has consistently stayed higher than every other nation in the database

Question 3.7.2

a) US GDP

# Filter the data for the United States
us_gdp <- global_economy %>%
  filter(Country == "United States")

# Plot the GDP of the United States over time
us_gdp |> 
  autoplot(GDP) +
  labs(title = "GDP of United States Over Time", 
       x = "Year", 
       y = "GDP (USD)") +
  theme_minimal()

We could do an inflation transformation

# Calculate real GDP (adjusted for CPI)
us_gdp <- us_gdp %>%
  mutate(real_GDP = (GDP / CPI) * 100)

# Plot the real GDP of the United States over time
us_gdp |> 
  autoplot(real_GDP) +
  labs(title = "Real GDP of United States Over Time (Adjusted for CPI)", 
       x = "Year", 
       y = "Real GDP (USD)") +
  theme_minimal()

Now we can see the effects of recessionary periods

b) Victorian slaughter numbers

total_livestock <- aus_livestock %>%
  filter(Animal == "Bulls, bullocks and steers") %>%
  summarise(Total_Count = sum(Count))

total_livestock |> 
  autoplot(Total_Count) +
  labs(title = "Total Livestock Count for Bulls, Bullocks, and Steers Across All States", 
       x = "Year and Month", 
       y = "Total Count of Livestock")

Here the variation seems pretty consistent across the years even without any transformation

c) Victorian electricity demands

vic_elec |> 
  autoplot(Demand) +
  labs(title = "Victorian Electricity Demand Over Time", 
       x = "Time", 
       y = "Demand (MW)")

We have some peaks in 2013 and 2014 that would work better smoothed off

vic_elec |> 
  autoplot(log(Demand)) +
  labs(title = "Victorian Electricity Demand Over Time Log transormation", 
       x = "Time", 
       y = "Demand (MW)")

Thats a bit better , though not by much

d) Australian gas production

aus_production |> 
  autoplot(Gas) +
  labs(title = "Gas Production Over Time", 
       x = "Quarter", 
       y = "Gas Production")

Now trying a guerrero lambda function to get an optimal lambda

lambda <- aus_production |>
  features(Gas, features = guerrero) |>
  pull(lambda_guerrero)

aus_production |>
  autoplot(box_cox(Gas, lambda)) +
  labs(title = paste("Box-Cox Transformed Gas production (lambda = ", round(lambda, 3), ")", sep = ""),
       y = "Gas production")

This smooths out the variation nicely

Question 3.7.3

Lets plot difficult_gas

data("canadian_gas")
canadian_gas |> 
  autoplot(Volume) +
  labs(title = "Canadian Gas Production Over Time", 
       x = "Month", 
       y = "Gas Volume")

lambda <- canadian_gas |>
  features(Volume, features = guerrero) |>
  pull(lambda_guerrero)

canadian_gas |>
  autoplot(box_cox(Volume, lambda))

It might be because the trend in variation changes increases first and then decreases instead of consistently increasing or decreasing making it a lot more difficult to approximate

Question 3.7.4

set.seed(90483484)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries %>%
  autoplot(Turnover)

Now using guererro to select an appropriate lambda

lambdaseries <- myseries |>
  features(Turnover, features = guerrero) |>
  pull(lambda_guerrero)
myseries |>
  autoplot(box_cox(Turnover, lambda)) +
  labs(title = paste("Box-Cox Transformed Myseries (lambda = ", round(lambdaseries, 3), ")", sep = ""),
       y = "Turnover")

lambdaseries
## [1] 0.1555555

We get a good result with a box cox transformation with lambda value 0.15555

Question 3.7.5

a) Tobacco

lambda_tobacco <- aus_production |>
  features(Tobacco, features = guerrero) |>
  pull(lambda_guerrero)

aus_production |>
  autoplot(box_cox(Tobacco, lambda_tobacco)) +
  labs(title = paste("Box-Cox Transformed Tobacco Production (lambda = ", round(lambda_tobacco, 3), ")", sep = ""),
       y = "Transformed Tobacco")

b) Ansett

lambda_passengers <- ansett |>
  filter(Class == "Economy", Airports == "MEL-SYD") |>
  features(Passengers, features = guerrero) |>
  pull(lambda_guerrero)

ansett |>
  filter(Class == "Economy", Airports == "MEL-SYD") |>
  autoplot(box_cox(Passengers, lambda_passengers)) +
  labs(title = paste("Box-Cox Transformed Economy Class Passengers (lambda = ", round(lambda_passengers, 3), ")", sep = ""),
       y = "Transformed Passengers")

c) Pedestrian

lambda_pedestrian <- pedestrian |>
  filter(Sensor == "Southern Cross Station") |>
  features(Count, features = guerrero) |>
  pull(lambda_guerrero)

pedestrian |>
  filter(Sensor == "Southern Cross Station") |>
  autoplot(box_cox(Count, lambda_pedestrian)) +
  labs(title = paste("Box-Cox Transformed Pedestrian Counts at South Cross Station (lambda = ", round(lambda_pedestrian, 3), ")", sep = ""),
       y = "Transformed Pedestrian Count")

Question 3.7.7

gas <- tail(aus_production, 5*4) |> select(Gas)

a)

gas |>
  autoplot(Gas) +
  labs(title = "Gas Production (Last 5 Years)",
       x = "Quarter",
       y = "Gas Production")

Its an overall positive trend with quarterly fluctuations that remain consistent across the years with the lowest at the beginning of Q1 followed by a sharp increase maxing out around q3 then falling rapidly

b)

gas |>
  model(classical_decomposition(Gas, type = "multiplicative")) |>
  components() |>
  autoplot() +
  labs(title = "Classical Multiplicative Decomposition of Gas Production")

c)

The results support the graphical interpretation of there being an upward trend and the quarterly spikes and declines in gas production. The random variation is random there seems to be no pattern of variation.

d)

gas |>
model(classical_decomposition(Gas, type = "multiplicative")) |>
  components() |>
  ggplot(aes(x = Quarter)) +
  geom_line(aes(y = Gas, colour = "Data")) +
  geom_line(aes(y = season_adjust,
                colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, colour = "Trend")) +
  labs(y = "Prouction",
       title = "Gas production") +
  scale_colour_manual(
    values = c("gray", "#0072B2", "#D55E00"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )

e)

gas_outlier <- gas
gas_outlier$Gas[10] <- gas_outlier$Gas[10] + 300

gas_outlier |>
  model(classical_decomposition(Gas, type = "multiplicative")) |>
  components() |>
  autoplot() +
  labs(title = "Classical Multiplicative Decomposition of Gas Production with outlier")

gas_outlier |>
model(classical_decomposition(Gas, type = "multiplicative")) |>
  components() |>
  ggplot(aes(x = Quarter)) +
  geom_line(aes(y = Gas, colour = "Data")) +
  geom_line(aes(y = season_adjust,
                colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, colour = "Trend")) +
  labs(y = "Prouction",
       title = "Gas production") +
  scale_colour_manual(
    values = c("gray", "#0072B2", "#D55E00"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )

The outlier affects the seasonally adjusted data strongly but only very closely around the observations time

f)

gas_outlier_end <- gas
gas_outlier_end$Gas[nrow(gas_outlier_end)] <- gas_outlier_end$Gas[nrow(gas_outlier_end)] + 300


gas_outlier_end |>
  model(classical_decomposition(Gas, type = "multiplicative")) |>
  components() |>
  autoplot() +
  labs(title = "Classical Multiplicative Decomposition of Gas Production with outlier")

gas_outlier_end |>
model(classical_decomposition(Gas, type = "multiplicative")) |>
  components() |>
  ggplot(aes(x = Quarter)) +
  geom_line(aes(y = Gas, colour = "Data")) +
  geom_line(aes(y = season_adjust,
                colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, colour = "Trend")) +
  labs(y = "Prouction",
       title = "Gas production") +
  scale_colour_manual(
    values = c("gray", "#0072B2", "#D55E00"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
)

It doesn’t seem to me that there is a significant difference to the overall trends depending on the position of the outlier. The spike is obviously aligned with the outlier but the ripple effects seem negligible

Question 3.7.8

set.seed(90483484)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

myseries %>%
  autoplot(Turnover)

x11_dcmp <- myseries %>%
  model(X11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
  components() 

autoplot(x11_dcmp) + 
labs(title = "Decomposition of Australian clothing and accessory retailing trade turnover using X-11")

Seasonality remains strong, trend remains the same , this decomposition helps most in bringing out all the outliers when it comes to the changes in variation which wasn’t quite clear in the regular plot.

Question 3.7.9

a)

From what we can see the trend is positive . The number of people in the civilian labor force has been consistently increasing across the year at a pretty significant rate , close to a 1000 new people every 5 years with the trend plateauing between 1990 to 1995. Considering the scales for seasonality , it is consistent but not super strong.The peaks are in December followed by March with lows coming around Jan and August. The seasonal component graph while showing us the seasonality has a Y axis that was only stretched to -110 to 100 and considering the sample sizes in the 1000s we are dealing with , this is almost negligible. The remainder in variances shows us the outliers that occurred between 1991 and 1992 which is indicative of the major recessionary period

b)

Yes the recessionary period is clearly visible as within the outliers in the remainders and the plateauing of the trend curve around the same time