library(fpp3)
library(tidyverse)
library(kableExtra)
library(reactable)
library(seasonal)
library(tsibble)

1. Consider the GDP information in global_economy. Plot the GDP per capita for each country over time. Which country has the highest GDP per capita? How has this changed over time?

data("global_economy")
gdp_per_capita <- global_economy |>
  mutate(GDP_Per_Capita = round(GDP/Population, 2)) |>
  select(Country, Year, GDP_Per_Capita) |>
  #group_by(Country) |>
  #summarize(Average_GDP_Per_Capita = mean(GDP_Per_Capita)) |>
  arrange(desc(GDP_Per_Capita))
reactable(gdp_per_capita)
autoplot(gdp_per_capita, GDP_Per_Capita, show.legend =  FALSE) +
  labs(x = "Year", y = "GDP Per Capita") + 
  ggtitle("GDP Per Capita for Each Country") +
  theme(plot.title=element_text(hjust=0.3)) 

Based on the table created, Monaco achieved the two highest GDP per capita in 2008 and 2014. As a result, I decided to look at how Monaco’s GDP per capita has changed over time:

gdp <- gdp_per_capita |>
  filter(Country == "Monaco")
autoplot(gdp) +
  labs(x="Year", y = "GDP Per Capita") +
  ggtitle("Yearly GDP Per Capita in Monaco") +
  theme(plot.title = element_text(hjust = 0.3))

The GDP per capita of Monaco significantly increased after the year 2000, more than doubling by 2008 and reaching a peak in 2014.

2. For each of the following series, make a graph of the data. If transforming seems appropriate, do so and describe the effect.

us_gdp <- global_economy |>
  filter(Country == "United States")
autoplot(us_gdp)

data("aus_livestock")
bulls_bullocks_steers <- aus_livestock |>
  filter(Animal == "Bulls, bullocks and steers", State == "Victoria")
autoplot(bulls_bullocks_steers)

data("vic_elec")
vic_elec |>
  autoplot(Demand)

I used a Box Cox Transformation to see if any patterns or trends would be visible:

vic_elec |>
  features(Demand, features = guerrero)
## # A tibble: 1 × 1
##   lambda_guerrero
##             <dbl>
## 1          0.0999
vic_elec |>
  autoplot(box_cox(Demand, 0.09993089))

The transformation didn’t produced a plot that showed discernible patterns or trends. I decided to transform the data first, looking at weekly, monthly, and daily electricity demand, then used Box Cox Transformations for each.

Weekly

weekly_vic_elec <- vic_elec |>
  mutate(Week = yearweek(Date)) |>
  group_by(Week) |>
  mutate(Demand = sum(Demand)) |>
  #mutate(Week = yearweek(Date)) |>
  distinct(Week, Demand) |>
  as_tsibble(index=Week)
autoplot(weekly_vic_elec, Demand) +
  labs(x = "Weekly", y = "Total Demand") +
  ggtitle("Total Weekly Electricity Demand") +
  theme(plot.title=element_text(hjust=0.4)) 

weekly_vic_elec |>
  features(Demand, features = guerrero) |>
  pull(lambda_guerrero)
## [1] 1.999927
weekly_vic_elec |>
  autoplot(box_cox(Demand, 1.999927)) +
  ggtitle("Box Cox Transformation of Weekly Electricity Demand")

Monthly

monthly_vic_elec <- vic_elec |>
  mutate(Month = yearmonth(Date)) |>
  group_by(Month) |>
  mutate(Demand = sum(Demand)) |>
  distinct(Month, Demand) |>
  as_tsibble(index=Month)
autoplot(monthly_vic_elec, Demand) +
  ggtitle("Monthly Electricity Demand") +
  theme(plot.title = element_text(hjust = 0.3))

monthly_vic_elec |>
  features(Demand, features = guerrero) |>
  pull(lambda_guerrero)
## [1] -0.8999268
monthly_vic_elec |>
  autoplot(box_cox(Demand, -0.8999268)) +
  ggtitle("Box Cox Transformation of Monthly Electricity Demand") +
  theme(plot.title = element_text(hjust = 0.3))

Daily

daily_vic_elec <- vic_elec |>
  group_by(Date) |>
  mutate(Demand = sum(Demand)) |>
  distinct(Date, Demand) |>
  as_tsibble(index=Date) 
autoplot(daily_vic_elec, Demand) + 
  ggtitle("Daily Electricity Demand") +
  theme(plot.title = element_text(hjust = 0.3))

daily_vic_elec |>
  features(Demand, features = guerrero) |>
  pull(lambda_guerrero)
## [1] -0.8999268
daily_vic_elec |>
  autoplot(box_cox(Demand, -0.8999268)) +
  ggtitle("Box Cox Transformation of Daily Electricity Demand") +
  theme(plot.title = element_text(hjust = 0.3))

data("aus_production")
autoplot(aus_production, Gas)

lambda <- aus_production |>
  features(Gas, features = guerrero) |>
  pull(lambda_guerrero)
aus_production |>
  autoplot(box_cox(Gas, lambda)) +
  labs(y = "",
       title = latex2exp::TeX(paste0(
         "Transformed gas production with $\\lambda$ = ",
         round(lambda,2))))

3. Why is a Box-Cox transformation unhelpful for the canadian_gas data?

data("canadian_gas")
autoplot(canadian_gas, Volume) + 
  ggtitle("Monthly Volume of Gas in Canada") +
  theme(plot.title = element_text(hjust = 0.3))

lambda <- canadian_gas |>
  features(Volume, features = guerrero) |>
  pull(lambda_guerrero)

canadian_gas |> autoplot(box_cox(Volume, lambda)) + 
  ggtitle("Box Cox Transformation of Monthly Volume of Gas in Canada") +
  theme(plot.title = element_text(hjust = 0.3))

A Box Cox Transformation would be unhelpful for the canadian_gas data because it does not stabilize the variance in the data, specifically the data from around 1973 to 1990.

4. What Box-Cox transformation would you select for your retail data (from Exercise 7 in Section 2.10)?

data("aus_retail")
set.seed(12345678)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries |>
  features(Turnover, features = guerrero) |>
  pull(lambda_guerrero)
## [1] 0.08303631

I would choose the value produced by the lambda_guerreo function, 0.08303631.

autoplot(myseries, Turnover) +
  ggtitle("Turnover in Australian Retail") +
  theme(plot.title = element_text(hjust=0.3))

lambda <- myseries |>
  features(Turnover, features = guerrero) |>
  pull(lambda_guerrero)
myseries |> autoplot(box_cox(Turnover, lambda)) +
  ggtitle("Box Cox Transformation of Turnover in Australian Retail") +
  theme(plot.title = element_text(hjust=0.3))

5. For the following series, find an appropriate Box-Cox transformation in order to stabilise the variance. Tobacco from aus_production, Economy class passengers between Melbourne and Sydney from ansett, and Pedestrian counts at Southern Cross Station from pedestrian.

Tobacco From aus_production

autoplot(aus_production, Tobacco)

aus_production |>
  features(Tobacco, features = guerrero) |>
  pull(lambda_guerrero)
## [1] 0.9264636
aus_production |> autoplot(box_cox(Tobacco, 0.9264636))

Economy class passengers between Melbourne and Sydney from ansett

data("ansett")
economy_class <- ansett |>
  filter(Class == "Economy", Airports == "MEL-SYD")
autoplot(economy_class, Passengers)

economy_class |>
  features(Passengers, features = guerrero) |>
  pull(lambda_guerrero)
## [1] 1.999927
options(scipen=100)
economy_class |> autoplot(box_cox(Passengers, 1.999927))

Pedestrian counts at Southern Cross Station from pedestrian

data("pedestrian")
pedestrian_scs <- pedestrian |>
  filter(Sensor == "Southern Cross Station")
autoplot(pedestrian_scs, Count)

pedestrian_scs |>
  features(Count, features = guerrero) |>
  pull(lambda_guerrero)
## [1] -0.2501616
pedestrian_scs |> 
  autoplot(box_cox(Count, -0.2501))

I didn’t think the Box Cox Transformation did an adequate job of stabilizing the variance, which could be the case because the data is hourly. As a result, I decided to mutate the Date column to get the monthly components of the data and sum the Count column of each month, then performed a Box Cox transformation to the newly transformed data:

new_ped <- pedestrian_scs |>
  mutate(Month = yearmonth(Date)) |>
  group_by(Month) |>
  mutate(Count = sum(Count)) |>
  distinct(Month, Count) |>
  as_tsibble(index=Month)
autoplot(new_ped, Count)

Transforming the data helped discern the seasonality of the pedestrian counts, showing a sharp decline in January 2016.

new_ped |>
  features(Count, features = guerrero) |>
  pull(lambda_guerrero)
## [1] 0.132791
new_ped |> 
  autoplot(box_cox(Count, 0.132791))

Using the Box Cox Transformation does not appear to have stabilized the variance in the data.

7. Consider the last five years of the Gas data from aus_production.

gas <- tail(aus_production, 5*4) |> select(Gas)

a. Plot the time series. Can you identify seasonal fluctuations and/or a trend-cycle?

gas |>
  autoplot(Gas) +
  labs(x="Quarter", y = "Gas Production") +
  ggtitle("Quarterly Production of Gas in Australia") +
  theme(plot.title = element_text(hjust = 0.3))

There appears to be 12-month fluctuations in gas production, where gas production decreases in Q1 of every year and increases in the middle of each year.

b. Use classical_decomposition with type=multiplicative to calculate the trend-cycle and seasonal indices.

gas |>
  model(
    classical_decomposition(Gas, type = "multiplicative")
    ) |>
  components() |>
  autoplot() +
  labs(title = "Classical Multiplicative Decomposition of Gas Production in Australia \n in the Past 5 Years") +
  theme(plot.title = element_text(hjust = 0.5))

c. Do the results support the graphical interpretation from part a?

When analyzing the classical multiplicative decomposition of the gas production data, the seasonal component reflects the 12-month fluctuations illustrated in part a. However, the random and trend components does not have the same seasonal fluctuations.

d. Compute and plot the seasonally adjusted data.

gas |>
  model(
    STL(Gas ~ trend(window = 7) +
                   season(window = "periodic"),
    robust = TRUE)) |>
  components() |>
  autoplot()

e. Change one observation to be an outlier (e.g., add 300 to one observation), and recompute the seasonally adjusted data. What is the effect of the outlier?

# Changed observation in 2006 Q1
new_gas <- gas |>
  mutate(Gas = if_else(Gas == 171, 300, Gas))
kbl(head(new_gas, n = 10)) %>%
kable_styling(latex_options="scale_down", c("striped", "hover", "condensed", full_width=F))
Gas Quarter
221 2005 Q3
180 2005 Q4
300 2006 Q1
224 2006 Q2
233 2006 Q3
192 2006 Q4
187 2007 Q1
234 2007 Q2
245 2007 Q3
205 2007 Q4
new_gas |>
  model(
    STL(Gas ~ trend(window = 7) +
                   season(window = "periodic"),
    robust = TRUE)) |>
  components() |>
  autoplot() 

You can visibly locate the outlier in 2006 Q1 in the remainder and Gas components, while the trend line looks more straight and less smooth.

f. Does it make any difference if the outlier is near the end rather than in the middle of the time series?

# Changed 2007 Q4 and 2010 Q1
new_gas2 <- gas |>
  mutate(Gas = if_else(Gas == 205, 400, Gas))
kbl(head(new_gas2, n = 20)) %>%
kable_styling(latex_options="scale_down", c("striped", "hover", "condensed", full_width=F))
Gas Quarter
221 2005 Q3
180 2005 Q4
171 2006 Q1
224 2006 Q2
233 2006 Q3
192 2006 Q4
187 2007 Q1
234 2007 Q2
245 2007 Q3
400 2007 Q4
194 2008 Q1
229 2008 Q2
249 2008 Q3
203 2008 Q4
196 2009 Q1
238 2009 Q2
252 2009 Q3
210 2009 Q4
400 2010 Q1
236 2010 Q2
new_gas2 |>
  model(
    STL(Gas ~ trend(window = 7) +
                   season(window = "periodic"),
    robust = TRUE)) |>
  components() |>
  autoplot()

It doesn’t make a difference in the remainder and Gas components, as the outliers stand out regardless of where it is located in the time series. However, it does appear that the line in the trend component becomes more straight and less smooth.

8. Recall your retail time series data (from Exercise 7 in Section 2.10). Decompose the series using X-11. Does it reveal any outliers, or unusual features that you had not noticed previously?

data("aus_retail")
set.seed(12345678)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
# original autoplot of data
myseries |>
  autoplot(Turnover)

x11_dcmp <- myseries |>
  model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) |>
  components()
autoplot(x11_dcmp) +
  labs(title =
    "Decomposition of Total Australian Retail Turnover using X-11") +
  theme(plot.title = element_text(hjust = 0.5))

The trend and Turnover highlights the outliers that occur after 1995, which is apparent in the autoplot as well. The irregular decomposition shows sharp increase and decline occurring around 2016-2017, which isn’t readily apparent in the autoplot of the Turnover retail data.

9. Figures 3.19 and 3.20 show the result of decomposing the number of persons in the civilian labour force in Australia each month from February 1978 to August 1995.

Figure 3.19

knitr::include_graphics("/Users/mohamedhassan/Downloads/figure_319.png")

Figure 3.20

knitr::include_graphics("/Users/mohamedhassan/Downloads/figure_320.png")

a. Write about 3–5 sentences describing the results of the decomposition. Pay particular attention to the scales of the graphs in making your interpretation.

The scale range of the value and trend decomposition are the same, from around 6500 to 9000. The scale range of the season_year component is wider, from 100 to -100, and 100 to -400. The season_year components repeats year to year, which may not be a realistic representation of the data. The negative numbers represented

b. Is the recession of 1991/1992 visible in the estimated components?

In the Value and Remainder decomposition components, the recession of 1991/1992 are visible, showing a distinct downturn in the Remainder component and a less sharp but noticeable downturn in the Value component. When examining the month by month breakdown of civilians in the labor force, May and July appear to be the only two months that shows a downturn in the civilian labor force during the 1991/1992 recession.