library(fpp3)
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble      3.2.1     ✔ tsibble     1.1.4
## ✔ dplyr       1.1.2     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.0     ✔ feasts      0.3.1
## ✔ lubridate   1.9.2     ✔ fable       0.3.3
## ✔ ggplot2     3.4.4     ✔ fabletools  0.3.4
## Warning: package 'tsibble' was built under R version 4.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(seasonal)
## 
## Attaching package: 'seasonal'
## The following object is masked from 'package:tibble':
## 
##     view

Ask

Do exercises 3.1, 3.2, 3.3, 3.4, 3.5, 3.7, 3.8 and 3.9

3.1

Consider the GDP information in global_economy. Plot the GDP per capita for each country over time. Which country has the highest GDP per capita? How has this changed over time?

head(global_economy)
country <- global_economy |>
  group_by(Country)|>
  summarise(PerCapita= GDP/Population)

country |> 
  autoplot(PerCapita, show.legend=FALSE)
## Warning: Removed 3242 rows containing missing values (`geom_line()`).

country |>
  group_by(Country) |>
  filter(PerCapita == max(PerCapita)) |>
  arrange(desc(PerCapita))
country |> 
  filter(Country == "Luxembourg") |>
  autoplot(PerCapita)

3.2

For each of the following series, make a graph of the data. If transforming seems appropriate, do so and describe the effect.

United States GDP from global_economy

global_economy |>
  filter(Country == "United States") |>
  autoplot(GDP) +
  labs(title= "GDP per capita", y = "$US")

global_economy |>
  filter(Country == "United States") |>
  autoplot(GDP/Population) +
  labs(title= "GDP per capita", y = "$US")

Without per capita adjustment there does not seem to be a significant difference in the plots.

Slaughter of Victorian “Bulls, bullocks and steers” in aus_livestock

head(aus_livestock)
aus_livestock |>
  filter(Animal == "Bulls, bullocks and steers") |>
  autoplot(Count, show.legend = FALSE) +
  labs(title = "Bulls, bullocks and steers in Australia Livestock")

aus_livestock |>
  filter(Animal == "Bulls, bullocks and steers") |>
  autoplot(Count, show.legend = FALSE) +
  facet_wrap(~ State, scales = "free_y" , ncol = 1) |>
  labs(title = "Bulls, bullocks and steers in Australia Livestock")

Victorian Electricity Demand from vic_elec

head(vic_elec)
vic_elec |>
  autoplot(Demand)

Gas production from aus_production.

aus_production |>
  autoplot(Gas)

aus_production |>
  gg_subseries(Gas)

lambda <- aus_production |>
  features(Gas, features = guerrero) |>
  pull(lambda_guerrero)
aus_production |>
  autoplot(box_cox(Gas, lambda))

A Box Cox tranformation seems appropriate because of the seasonal variation. A good value of lambda eliminates this variation across the whole series.

3.3

Why is a Box-Cox transformation unhelpful for the canadian_gas data?

canadian_gas
canadian_gas |>
  autoplot(Volume)

If the data shows variation that increases or decreases with the level of the series, then a Box Cox transformation can be useful. It is not useful in this case.

3.4

What Box-Cox transformation would you select for your retail data (from Exercise 7 in Section 2.10)?

set.seed(811)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries |>
  autoplot()
## Plot variable not specified, automatically selected `.vars = Turnover`

lambda <- myseries |>
  features(Turnover, features = guerrero) |>
  pull(lambda_guerrero)

myseries |>
  autoplot(box_cox(Turnover, lambda)) +
  labs(y= "Turnover")

The best lambda value to use is 0.2247137.

3.5

For the following series, find an appropriate Box-Cox transformation in order to stabilise the variance.

Tobacco from aus_production

lambda <- aus_production |>
  features(Tobacco, features = guerrero) |>
  pull(lambda_guerrero)
aus_production |>
  autoplot(box_cox(Tobacco, lambda)) +
  labs(y="Tobacco")
## Warning: Removed 24 rows containing missing values (`geom_line()`).

Economy class passengers between Melbourne and Sydney from ansett

economy <- ansett |>
  filter(Class == "Economy") |>
  filter(Airports == "MEL-SYD")

head(economy)
lambda <- economy |>
  features(Passengers, features = guerrero) |>
  pull(lambda_guerrero)
economy |>
  autoplot(box_cox(Passengers, lambda)) +
  labs(y="Passengers", title = "Passengers in Economy Class flights MEL-SYD")

Pedestrian counts at Southern Cross Station from pedestrian.

southern_station <- pedestrian |>
  filter (Sensor == "Southern Cross Station")
lambda <- southern_station |>
  features(Count, features = guerrero) |>
  pull(lambda_guerrero)
southern_station |>
  autoplot(box_cox(Count, lambda)) +
  labs(y="Count", title = "Pedestrians at Southern Cross Station")

3.7

Consider the last five years of the Gas data from aus_production.

gas <- tail(aus_production, 5*4) |> select(Gas)
gas

Plot the time series.

Can you identify seasonal fluctuations and/or a trend-cycle?

gas |>
  autoplot(Gas)

There is a seasonal fluctuation and an upward trend in this time series.

Use classical_decomposition with type=multiplicative to calculate the trend-cycle and seasonal indices.

gas_dcp <- gas |>
  model(
    classical_decomposition(Gas, type="multiplicative")
  ) |>
    components()

gas_dcp |>
  autoplot() 
## Warning: Removed 2 rows containing missing values (`geom_line()`).

Do the results support the graphical interpretation from part a?

Yes the results support the graphical interpretation in part a. There shows an upward trend and the seasonal.

Compute and plot the seasonally adjusted data.

gas_dcp |>
  ggplot(aes(x = Quarter)) +
  geom_line(aes(y = Gas, colour = "Data")) +
  geom_line(aes(y = season_adjust,
                colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, colour = "Trend")) +
  labs(y = "Gas",
       title = "Gas") +
  scale_colour_manual(
    values = c("gray", "#0072B2", "#D55E00"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )
## Warning: Removed 4 rows containing missing values (`geom_line()`).

Change one observation to be an outlier (e.g., add 300 to one observation), and recompute the seasonally adjusted data. What is the effect of the outlier?

gas2 <- gas
gas2[15,1] <- 300 +gas2[15,1]
gas2
gas_dcp2 <- gas2 |>
  model(
    classical_decomposition(Gas, type="multiplicative")
  ) |>
    components()

gas_dcp2 |>
  autoplot() 
## Warning: Removed 2 rows containing missing values (`geom_line()`).

gas_dcp2 |>
  ggplot(aes(x = Quarter)) +
  geom_line(aes(y = Gas, colour = "Data")) +
  geom_line(aes(y = season_adjust,
                colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, colour = "Trend")) +
  labs(y = "Gas",
       title = "Gas") +
  scale_colour_manual(
    values = c("gray", "#0072B2", "#D55E00"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )
## Warning: Removed 4 rows containing missing values (`geom_line()`).

The outlier shows a upward spike in the trendline and the random plots of the decomposition. It also affects the seasonally adjusted line for the time series. The season component has constant variance.

Does it make any difference if the outlier is near the end rather than in the middle of the time series?

gas4 <- gas
gas4[20,1] <- 300 +gas4[20,1]
gas4
gas_dcp4 <- gas4 |>
  model(
    classical_decomposition(Gas, type="multiplicative")
  ) |>
    components()

gas_dcp4 |>
  autoplot() 
## Warning: Removed 2 rows containing missing values (`geom_line()`).

gas_dcp4 |>
  ggplot(aes(x = Quarter)) +
  geom_line(aes(y = Gas, colour = "Data")) +
  geom_line(aes(y = season_adjust,
                colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, colour = "Trend")) +
  labs(y = "Gas",
       title = "Gas") +
  scale_colour_manual(
    values = c("gray", "#0072B2", "#D55E00"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )
## Warning: Removed 4 rows containing missing values (`geom_line()`).

There is a difference when the outlier is in the end compared to in the middle. When the outlier is at the end, there is a sharp increase due to the outlier in the seasonally adjusted.

The seasonal pattern in the decomposition remains the same. The trend still shows an positive slope, it also shows the outlier at the end of the line.

3.8

Recall your retail time series data (from Exercise 7 in Section 2.10). Decompose the series using X-11. Does it reveal any outliers, or unusual features that you had not noticed previously?

set.seed(811)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
x11_dcmp <- myseries  |>
  model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) |>
  components()
autoplot(x11_dcmp) +
  labs(title =
    "Decomposition of total US retail employment using X-11.")

x11_dcmp |>
  ggplot(aes(x = Month)) +
  geom_line(aes(y = Turnover, colour = "Data")) +
  geom_line(aes(y = season_adjust,
                colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, colour = "Trend")) +
  labs(y = "Turnover",
       title = "Turnover") +
  scale_colour_manual(
    values = c("gray", "#0072B2", "#D55E00"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )

From previous chapter:
myseries |>
  autoplot()
## Plot variable not specified, automatically selected `.vars = Turnover`

myseries |>
  gg_season()
## Plot variable not specified, automatically selected `y = Turnover`

myseries |>
  gg_subseries()
## Plot variable not specified, automatically selected `y = Turnover`

There is an upward trend and a seasonal pattern. There is also a sharp upward increase in the late 1990s. The subseries show that average Turnover over every month is cyclic. The X_11 Decomp does not reveal anything that cannot be noticed with the other functions (autoplot, gg_subseries, etc. ). It can handle outliers better than other classical decompositions.

3.9

Figures 3.19 and 3.20 show the result of decomposing the number of persons in the civilian labour force in Australia each month from February 1978 to August 1995.

Write about 3–5 sentences describing the results of the decomposition. Pay particular attention to the scales of the graphs in making your interpretation.

There is an upward trend of this time series. The season variation is consistent and after 1997 the minimum value decreases and maximum values increases. The seasonal component also shows variation over time.

Is the recession of 1991/1992 visible in the estimated components?

The recession is visible in the remainder component of the decomposition.