global_economy |>
autoplot(GDP / Population, show.legend = FALSE) +
labs(title = "GDP per capita", y = "$US")
## Warning: Removed 3242 rows containing missing values or values outside the scale range
## (`geom_line()`).
sa_countries <- c("Argentina", "Brazil", "Chile", "Colombia", "Peru", "Venezuela", "Ecuador", "Bolivia")
sa_global_economy <- global_economy |>
filter(Country %in% sa_countries)
sa_global_economy |>
ggplot(aes(x = Year, y = GDP / Population)) +
geom_line() +
facet_wrap(~ Country, scales = "free_y") +
labs(title = "GDP per Capita of South American Countries",
y = "GDP per Capita",
x = "Year")
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).
Monaco has the greatest GDP per capita as it is a known tax haven. This has increased year on year with a slight dip around the 2008 financial crisis and the pandemic but it has since recovered.
us_gdp <- global_economy |>
filter(Country == "United States")
us_gdp |> autoplot()
## Plot variable not specified, automatically selected `.vars = GDP`
vic_livestock <- aus_livestock |>
filter(State == "Victoria", Animal == "Bulls, bullocks and steers")
vic_livestock |> autoplot()
## Plot variable not specified, automatically selected `.vars = Count`
v <- vic_elec |>
group_by(Date) |>
mutate(Demand = sum(Demand)) |>
distinct(Date, Demand)
v |> as_tsibble(index = Date) |>
autoplot() +
labs(title = "Daily Victoria Electricity Demand")
## Plot variable not specified, automatically selected `.vars = Demand`
v |> mutate(Date = yearmonth(Date)) |>
group_by(Date) |>
summarise(Demand = sum(Demand)) |>
as_tsibble(index = Date) |>
autoplot() +
labs(title = "Monthly Victoria Electricity Demand")
## Plot variable not specified, automatically selected `.vars = Demand`
aus_production |>
autoplot(Gas)
lambda <- aus_production |>
features(Gas, features = guerrero) |>
pull(lambda_guerrero)
aus_production |>
autoplot(box_cox(Gas, lambda)) +
labs(y = "", title = TeX(paste0("Transformed Gas Production with $\\lambda$ = ",
round(lambda,2))))
The US retail and Victorian bulls, bullocks and steers data did not require any transformation. However, due to the volume of the the data, the Victoria Electricity demand was transformed into monthly demand for better visibility. The Canadian gas data utilised the Guerrero method which normalised the variance in the data.
canadian_gas |>
autoplot(Volume) +
labs(title = "Non-Transformed Gas Production")
lambda <- canadian_gas |>
features(Volume, features = guerrero) |>
pull(lambda_guerrero)
canadian_gas |>
autoplot(box_cox(Volume, lambda)) +
labs( y = "", title = TeX(paste0("Transformed Gas Production with $\\lambda$ = ",
round(lambda,2))))
The Box-Cox transformation is unhelpful because it doesn’t effectively flatten the variation in the time series. For example, the period between 1980 Jan and 1990 Jan still has large amounts of variation compared to the rest of the observations.
set.seed(123)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries |> autoplot(Turnover)
lambda <- myseries |>
features(Turnover, features = guerrero) |>
pull(lambda_guerrero)
myseries |>
autoplot(box_cox(Turnover, lambda)) +
labs(y = "",
title = paste("Transformation with lambda = ", round(lambda,2)))
tobacco <- aus_production |>
select(Quarter = index(aus_production), Tobacco)
tobacco |> autoplot()
## Plot variable not specified, automatically selected `.vars = Tobacco`
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_line()`).
lambda <- tobacco |>
features(tobacco, features = guerrero) |>
pull(lambda_guerrero)
tobacco |>
autoplot(box_cox(Tobacco, lambda)) +
labs(y = "",
title = paste("Transformation of Tobacco with lambda = ", round(lambda,2)))
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_line()`).
The Box-Cox transformation is not helpful for the tobacco data since it is just set to 1 with little transformation.
economy_mel_syd <- ansett |>
filter(Airports == "MEL-SYD",
Class == "Economy")
economy_mel_syd |> autoplot()
## Plot variable not specified, automatically selected `.vars = Passengers`
lambda <- economy_mel_syd |>
features(Passengers, features = guerrero) |>
pull(lambda_guerrero)
economy_mel_syd |>
autoplot(box_cox(Passengers, lambda)) +
labs(y = "",
title = paste("Transformation of Passengers with lambda = ", round(lambda,2)))
For the passenger data the transformation outlines the variation a little clearer.
southern_cross <- pedestrian |>
filter(Sensor == "Southern Cross Station")
southern_cross |> autoplot(Count)
lambda <- southern_cross |>
features(Count, features = guerrero) |>
pull(lambda_guerrero)
southern_cross |>
autoplot(box_cox(Count, lambda)) +
labs(y = "",
title = paste("Transformation of Cross Station Count with lambda = ", round(lambda,2)))
The Guerrero transformation helps to recognise greater varation compared to the original plot.
gas <- tail(aus_production, 5*4) |> select(Gas)
gas |> autoplot()
## Plot variable not specified, automatically selected `.vars = Gas`
gas_dcmp <- gas |>
model(classical_decomposition(Gas, type = "multiplicative"))
components(gas_dcmp) |>
autoplot()
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).
From the first plot, there is a clear trend showing a gradual increase in gas production with seasonal increases during Q3 and Q4 with big drops during Q1 each year.
The results from the decomposition support the graphical interpretation from part a, showing a clear trend and seasonality with the data.components(gas_dcmp) |>
as_tsibble() |>
autoplot(Gas, color = "gray") +
geom_line(aes(y=season_adjust), color = "red") +
labs(title = "Seasonally Adjusted Gas Production")