#libraries
library(fpp3)
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.6
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.4.1
## ✔ lubridate 1.9.3 ✔ fable 0.4.1
## ✔ ggplot2 3.5.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(latex2exp)
library(seasonal)
##
## Attaching package: 'seasonal'
## The following object is masked from 'package:tibble':
##
## view
Consider the GDP information in global_economy. Plot the GDP per capita for each country over time. Which country has the highest GDP per capita? How has this changed over time?
glimpse(global_economy)
## Rows: 15,150
## Columns: 9
## Key: Country [263]
## $ Country <fct> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",…
## $ Code <fct> AFG, AFG, AFG, AFG, AFG, AFG, AFG, AFG, AFG, AFG, AFG, AFG,…
## $ Year <dbl> 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969,…
## $ GDP <dbl> 537777811, 548888896, 546666678, 751111191, 800000044, 1006…
## $ Growth <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ CPI <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ Imports <dbl> 7.024793, 8.097166, 9.349593, 16.863910, 18.055555, 21.4128…
## $ Exports <dbl> 4.132233, 4.453443, 4.878051, 9.171601, 8.888893, 11.258279…
## $ Population <dbl> 8996351, 9166764, 9345868, 9533954, 9731361, 9938414, 10152…
# unique(global_economy$Country)
global_economy %>%
autoplot(GDP/Population, show.legend = FALSE) +
labs(title= "GDP per capita", y = "$US")
## Warning: Removed 3242 rows containing missing values or values outside the scale range
## (`geom_line()`).
global_economy <- global_economy %>%
mutate(gdp_per_capita = GDP/Population)
highest_gdp <- global_economy %>%
group_by(Country) %>%
summarise(avg_gdp_per_capita = mean(gdp_per_capita)) %>%
slice_max(avg_gdp_per_capita, n = 1)
highest_gdp # Monaco has the highest GDP per capita
## # A tsibble: 1 x 3 [1Y]
## # Key: Country [1]
## Country Year avg_gdp_per_capita
## <fct> <dbl> <dbl>
## 1 Monaco 2014 185153.
global_economy %>%
filter(Country == "Monaco") %>%
autoplot(gdp_per_capita) +
labs(title = "Monaco GDP per capita")
## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_line()`).
Monaco has the highest average GDP per capita. After plotting
the GDP data for Monaco, we observe that it has been increasing over
time. There are a few periods of decline around 1985 and 2000 but
overall it was growing.
For each of the following series, make a graph of the data. If transforming seems appropriate, do so and describe the effect.
#United States GDP from global_economy
global_economy %>%
filter(Country == "United States") %>%
autoplot(gdp_per_capita) +
labs(title = "US GDP per capita")
GDP was transformed to GDP per capita.
#Slaughter of Victorian “Bulls, bullocks and steers” in aus_livestock
aus_livestock
## # A tsibble: 29,364 x 4 [1M]
## # Key: Animal, State [54]
## Month Animal State Count
## <mth> <fct> <fct> <dbl>
## 1 1976 Jul Bulls, bullocks and steers Australian Capital Territory 2300
## 2 1976 Aug Bulls, bullocks and steers Australian Capital Territory 2100
## 3 1976 Sep Bulls, bullocks and steers Australian Capital Territory 2100
## 4 1976 Oct Bulls, bullocks and steers Australian Capital Territory 1900
## 5 1976 Nov Bulls, bullocks and steers Australian Capital Territory 2100
## 6 1976 Dec Bulls, bullocks and steers Australian Capital Territory 1800
## 7 1977 Jan Bulls, bullocks and steers Australian Capital Territory 1800
## 8 1977 Feb Bulls, bullocks and steers Australian Capital Territory 1900
## 9 1977 Mar Bulls, bullocks and steers Australian Capital Territory 2700
## 10 1977 Apr Bulls, bullocks and steers Australian Capital Territory 2300
## # ℹ 29,354 more rows
unique(aus_livestock$Animal)
## [1] Bulls, bullocks and steers Calves
## [3] Cattle (excl. calves) Cows and heifers
## [5] Lambs Pigs
## [7] Sheep
## 7 Levels: Bulls, bullocks and steers Calves ... Sheep
bulls_data <- aus_livestock %>%
filter(Animal == "Bulls, bullocks and steers",
State == "Victoria")
bulls_data %>%
autoplot(Count)
# Since this plot shows a lot of variability and fluctuating trends, I will proceed with the Box-Cox transformation
lambda <- bulls_data %>%
features(Count, features = guerrero) %>%
pull(lambda_guerrero)
bulls_data |>
autoplot(box_cox(Count, lambda)) +
labs(title = latex2exp::TeX(paste0(
"Transformed Slaughter of Vicorian bulls, bullocksand steers with $\\lambda$ = ",round(lambda,2))))
#interesting how the log transformation affects the series
bulls_data |>
mutate(log_count = log(Count)) |>
autoplot(log_count) +
labs(title = "Log-Transformed Series",
y = "Log(Count)")
Both Box-cox and log transformations stabilize the
variance.
#Victorian Electricity Demand from vic_elec
vic_elec
## # A tsibble: 52,608 x 5 [30m] <Australia/Melbourne>
## Time Demand Temperature Date Holiday
## <dttm> <dbl> <dbl> <date> <lgl>
## 1 2012-01-01 00:00:00 4383. 21.4 2012-01-01 TRUE
## 2 2012-01-01 00:30:00 4263. 21.0 2012-01-01 TRUE
## 3 2012-01-01 01:00:00 4049. 20.7 2012-01-01 TRUE
## 4 2012-01-01 01:30:00 3878. 20.6 2012-01-01 TRUE
## 5 2012-01-01 02:00:00 4036. 20.4 2012-01-01 TRUE
## 6 2012-01-01 02:30:00 3866. 20.2 2012-01-01 TRUE
## 7 2012-01-01 03:00:00 3694. 20.1 2012-01-01 TRUE
## 8 2012-01-01 03:30:00 3562. 19.6 2012-01-01 TRUE
## 9 2012-01-01 04:00:00 3433. 19.1 2012-01-01 TRUE
## 10 2012-01-01 04:30:00 3359. 19.0 2012-01-01 TRUE
## # ℹ 52,598 more rows
vic_elec %>%
autoplot(Demand) +
labs(title = "Victorian Electricity Demand")
vic_elec_lambda <- vic_elec %>%
features(Demand, features = guerrero) %>%
pull(lambda_guerrero)
vic_elec |>
autoplot(box_cox(Demand, vic_elec_lambda)) +
labs(title = latex2exp::TeX(paste0(
"Transformed Victorian Electricity Demand with $\\lambda$ = ",round(vic_elec_lambda,2))))
Overall, I don’t think the box-cox transformation is helpful in
this case. It is still difficult to determine the trend, perhaps
decompositioning would be the initial step here.
#Gas production from aus_production
aus_production %>%
autoplot(Gas) +
labs(title = "Gas Production in Australia")
gas_lambda <- aus_production %>%
features(Gas, features = guerrero) %>%
pull(lambda_guerrero)
aus_production |>
autoplot(box_cox(Gas, gas_lambda)) +
labs(title = latex2exp::TeX(paste0(
"Transformed Gas production with $\\lambda$ = ",round(gas_lambda,2))))
Why is a Box-Cox transformation unhelpful for the canadian_gas data?
canadian_gas %>%
autoplot(Volume)
canadian_lambda <- canadian_gas |>
features(Volume, features = guerrero) |>
pull(lambda_guerrero)
print(canadian_lambda)
## [1] 0.5767648
canadian_gas |>
autoplot(box_cox(Volume, canadian_lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed Canadian gas production with $\\lambda$ = ",round(canadian_lambda,2))))
The trend is clearly linear, and this characteristic persists even after the Box-Cox transformation is applied. Additionally, there is no significant variation in the variance
What Box-Cox transformation would you select for your retail data (from Exercise 7 in Section 2.10)?
set.seed(1234)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries %>%
autoplot(Turnover)
myseries %>%
gg_season(Turnover)
aus_retail_lambda <- myseries %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
print(aus_retail_lambda)
## [1] 0.3196221
# The recommened lambda is 0.3196
myseries |>
autoplot(box_cox(Turnover, aus_retail_lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed Australian Retail Turnover with $\\lambda$ = ",round(aus_retail_lambda,2))))
Answer
For the following series, find an appropriate Box-Cox transformation in order to stabilise the variance. Tobacco from aus_production, Economy class passengers between Melbourne and Sydney from ansett, and Pedestrian counts at Southern Cross Station from pedestrian.
#Tobacco from aus_production
aus_production %>%
autoplot(Tobacco)
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_line()`).
tobacco_lambda <- aus_production %>%
features(Tobacco, features = guerrero) %>%
pull(lambda_guerrero)
print(tobacco_lambda)
## [1] 0.9264636
#Economy class passengers between Melbourne and Sydney from ansett
mel_syd <- ansett %>%
filter(Airports == 'MEL-SYD',
Class == 'Economy')
mel_syd %>%
autoplot(Passengers)
economy_lambda <- mel_syd %>%
features(Passengers, features = guerrero) %>%
pull(lambda_guerrero)
print(economy_lambda)
## [1] 1.999927
#Pedestrian counts at Southern Cross Station from pedestrian
southern_pedestrian <- pedestrian %>%
filter(Sensor == 'Southern Cross Station')
southern_pedestrian %>%
autoplot(Count)
pedestrian_lambda <- southern_pedestrian %>%
features(Count, features = guerrero) %>%
pull(lambda_guerrero)
print(pedestrian_lambda)
## [1] -0.2501616
southern_pedestrian %>%
autoplot(box_cox(Count, pedestrian_lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed Canadian gas production with $\\lambda$ = ",
round(pedestrian_lambda,2))))
Consider the last five years of the Gas data from aus_production.
gas <- tail(aus_production, 5*4) |> select(Gas)
gas <- tail(aus_production, 5*4) |> select(Gas)
gas %>%
autoplot(Gas)
gas %>%
model(classical_decomposition(Gas ,type="multiplicative"))%>%
components() %>%
autoplot()
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).
#Compute and plot the seasonally adjusted data.
gas_decomp <- gas %>%
model(classical = classical_decomposition(Gas, type = "multiplicative")) %>%
components()
gas_adjust <- gas_decomp %>%
mutate(seasonally_adjusted = Gas / seasonal)
gas_adjust %>%
autoplot(seasonally_adjusted) +
labs(title = "Seasonally Adjusted Gas Production")
#Change one observation to be an outlier (e.g., add 300 to one observation), and recompute the seasonally adjusted data. What is the effect of the outlier?
new_gas_data <- gas
new_gas_data$Gas[4] =300
lambda <- gas %>%
features(Gas, features = guerrero)|>
pull(lambda_guerrero)
new_gas_data %>%
autoplot(box_cox(Gas, lambda)) +
labs( y="Transformed Gas production ",
x="Quarter",
title = latex2exp::TeX(paste0(
"Transformed Australian Gas Production $\\lambda$ = ",
round(lambda, 2))))
gas_outlier_decomp <- new_gas_data |>
model(classical = classical_decomposition(Gas, type = "multiplicative")) |>
components()
gas_outlier_adjusted <- gas_outlier_decomp |>
mutate(seasonally_adjusted = Gas / seasonal)
gas_outlier_adjusted |>
autoplot(seasonally_adjusted) +
labs(title = "Seasonally Adjusted Gas Production With Outlier")
In the initial dataset we observed positiv trend. Once we added
the outlier the visible upward trend disappeared. The position of the
outlier doesn’t matter, it is just creating noise.
Recall your retail time series data (from Exercise 7 in Section 2.10). Decompose the series using X-11. Does it reveal any outliers, or unusual features that you had not noticed previously?
x11_dcmp <- myseries %>%
model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
components()
autoplot(x11_dcmp)
Interestingly, the seasonal pattern remains consistent, while
the remainder component shows some unusual spikes.
Figures 3.19 and 3.20 show the result of decomposing the number of persons in the civilian labour force in Australia each month from February 1978 to August 1995.
The analysis shows an upward trend in both the initial dataset and the trend component. The scale for the seasonal component, which ranges from -100 to 100, is different from the trend’s scale of 6500 to 9000, indicating that seasonal changes don’t have a significant impact. Additionally, the remainder graph reveals some anomalies during 1991/1992, which aligns with the recession.
Yes, the recession of 1991/1992 is visible in the estimated components. It is observed in the remainder component.