Data 624 Homework 2

library(fpp3)
library(tidyverse)
library(kableExtra)
library(reactable)
library(seasonal)
library(tsibble)

1. Consider the GDP information in `global_economy`. Plot the GDP per capita for each country over time. Which country has the highest GDP per capita? How has this changed over time?

data("global_economy")

gdp_per_capita <- global_economy |>
  mutate(GDP_Per_Capita = round(GDP/Population, 2)) |>
  select(Country, Year, GDP_Per_Capita) |>
  #group_by(Country) |>
  #summarize(Average_GDP_Per_Capita = mean(GDP_Per_Capita)) |>
  arrange(desc(GDP_Per_Capita))
reactable(gdp_per_capita)

autoplot(gdp_per_capita, GDP_Per_Capita, show.legend =  FALSE) +
  labs(x = "Year", y = "GDP Per Capita") + 
  ggtitle("GDP Per Capita for Each Country") +
  theme(plot.title=element_text(hjust=0.3))

Based on the table created, Monaco achieved the two highest GDP per capita in 2008 and 2014. As a result, I decided to look at how Monaco’s GDP per capita has changed over time:

gdp <- gdp_per_capita |>
  filter(Country == "Monaco")
autoplot(gdp) +
  labs(x="Year", y = "GDP Per Capita") +
  ggtitle("Yearly GDP Per Capita in Monaco") +
  theme(plot.title = element_text(hjust = 0.3))

The GDP per capita of Monaco significantly increased after the year 2000, more than doubling by 2008 and reaching a peak in 2014.

2. For each of the following series, make a graph of the data. If transforming seems appropriate, do so and describe the effect.

United States GDP from global_economy.

us_gdp <- global_economy |>
  filter(Country == "United States")
autoplot(us_gdp)

Slaughter of Victorian “Bulls, bullocks and steers” in aus_livestock.

data("aus_livestock")

bulls_bullocks_steers <- aus_livestock |>
  filter(Animal == "Bulls, bullocks and steers", State == "Victoria")
autoplot(bulls_bullocks_steers)

Victorian Electricity Demand from vic_elec.

data("vic_elec")

vic_elec |>
  autoplot(Demand)

I used a Box Cox Transformation to see if any patterns or trends would be visible:

vic_elec |>
  features(Demand, features = guerrero)

## # A tibble: 1 × 1
##   lambda_guerrero
##             <dbl>
## 1          0.0999

vic_elec |>
  autoplot(box_cox(Demand, 0.09993089))

The transformation didn’t produced a plot that showed discernible patterns or trends. I decided to transform the data first, looking at weekly, monthly, and daily electricity demand, then used Box Cox Transformations for each.

Weekly

weekly_vic_elec <- vic_elec |>
  mutate(Week = yearweek(Date)) |>
  group_by(Week) |>
  mutate(Demand = sum(Demand)) |>
  #mutate(Week = yearweek(Date)) |>
  distinct(Week, Demand) |>
  as_tsibble(index=Week)
autoplot(weekly_vic_elec, Demand) +
  labs(x = "Weekly", y = "Total Demand") +
  ggtitle("Total Weekly Electricity Demand") +
  theme(plot.title=element_text(hjust=0.4))

weekly_vic_elec |>
  features(Demand, features = guerrero) |>
  pull(lambda_guerrero)

## [1] 1.999927

weekly_vic_elec |>
  autoplot(box_cox(Demand, 1.999927)) +
  ggtitle("Box Cox Transformation of Weekly Electricity Demand")

Monthly

monthly_vic_elec <- vic_elec |>
  mutate(Month = yearmonth(Date)) |>
  group_by(Month) |>
  mutate(Demand = sum(Demand)) |>
  distinct(Month, Demand) |>
  as_tsibble(index=Month)
autoplot(monthly_vic_elec, Demand) +
  ggtitle("Monthly Electricity Demand") +
  theme(plot.title = element_text(hjust = 0.3))

monthly_vic_elec |>
  features(Demand, features = guerrero) |>
  pull(lambda_guerrero)

## [1] -0.8999268

monthly_vic_elec |>
  autoplot(box_cox(Demand, -0.8999268)) +
  ggtitle("Box Cox Transformation of Monthly Electricity Demand") +
  theme(plot.title = element_text(hjust = 0.3))

Daily

daily_vic_elec <- vic_elec |>
  group_by(Date) |>
  mutate(Demand = sum(Demand)) |>
  distinct(Date, Demand) |>
  as_tsibble(index=Date) 
autoplot(daily_vic_elec, Demand) + 
  ggtitle("Daily Electricity Demand") +
  theme(plot.title = element_text(hjust = 0.3))

daily_vic_elec |>
  features(Demand, features = guerrero) |>
  pull(lambda_guerrero)

## [1] -0.8999268

daily_vic_elec |>
  autoplot(box_cox(Demand, -0.8999268)) +
  ggtitle("Box Cox Transformation of Daily Electricity Demand") +
  theme(plot.title = element_text(hjust = 0.3))

Gas production from aus_production.

data("aus_production")

autoplot(aus_production, Gas)

lambda <- aus_production |>
  features(Gas, features = guerrero) |>
  pull(lambda_guerrero)
aus_production |>
  autoplot(box_cox(Gas, lambda)) +
  labs(y = "",
       title = latex2exp::TeX(paste0(
         "Transformed gas production with $\\lambda$ = ",
         round(lambda,2))))

3. Why is a Box-Cox transformation unhelpful for the `canadian_gas` data?

data("canadian_gas")

autoplot(canadian_gas, Volume) + 
  ggtitle("Monthly Volume of Gas in Canada") +
  theme(plot.title = element_text(hjust = 0.3))

lambda <- canadian_gas |>
  features(Volume, features = guerrero) |>
  pull(lambda_guerrero)

canadian_gas |> autoplot(box_cox(Volume, lambda)) + 
  ggtitle("Box Cox Transformation of Monthly Volume of Gas in Canada") +
  theme(plot.title = element_text(hjust = 0.3))

A Box Cox Transformation would be unhelpful for the canadian_gas data because it does not stabilize the variance in the data, specifically the data from around 1973 to 1990.

4. What Box-Cox transformation would you select for your retail data (from Exercise 7 in Section 2.10)?

data("aus_retail")

set.seed(12345678)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

myseries |>
  features(Turnover, features = guerrero) |>
  pull(lambda_guerrero)

## [1] 0.08303631

I would choose the value produced by the lambda_guerreo function, 0.08303631.

autoplot(myseries, Turnover) +
  ggtitle("Turnover in Australian Retail") +
  theme(plot.title = element_text(hjust=0.3))

lambda <- myseries |>
  features(Turnover, features = guerrero) |>
  pull(lambda_guerrero)

myseries |> autoplot(box_cox(Turnover, lambda)) +
  ggtitle("Box Cox Transformation of Turnover in Australian Retail") +
  theme(plot.title = element_text(hjust=0.3))

5. For the following series, find an appropriate Box-Cox transformation in order to stabilise the variance. Tobacco from `aus_production`, Economy class passengers between Melbourne and Sydney from `ansett`, and Pedestrian counts at Southern Cross Station from `pedestrian`.

Tobacco From `aus_production`

autoplot(aus_production, Tobacco)

aus_production |>
  features(Tobacco, features = guerrero) |>
  pull(lambda_guerrero)

## [1] 0.9264636

aus_production |> autoplot(box_cox(Tobacco, 0.9264636))

Economy class passengers between Melbourne and Sydney from `ansett`

data("ansett")

economy_class <- ansett |>
  filter(Class == "Economy", Airports == "MEL-SYD")
autoplot(economy_class, Passengers)

economy_class |>
  features(Passengers, features = guerrero) |>
  pull(lambda_guerrero)

## [1] 1.999927

options(scipen=100)
economy_class |> autoplot(box_cox(Passengers, 1.999927))

Pedestrian counts at Southern Cross Station from `pedestrian`

data("pedestrian")

pedestrian_scs <- pedestrian |>
  filter(Sensor == "Southern Cross Station")
autoplot(pedestrian_scs, Count)

pedestrian_scs |>
  features(Count, features = guerrero) |>
  pull(lambda_guerrero)

## [1] -0.2501616

pedestrian_scs |> 
  autoplot(box_cox(Count, -0.2501))

I didn’t think the Box Cox Transformation did an adequate job of stabilizing the variance, which could be the case because the data is hourly. As a result, I decided to mutate the Date column to get the monthly components of the data and sum the Count column of each month, then performed a Box Cox transformation to the newly transformed data:

new_ped <- pedestrian_scs |>
  mutate(Month = yearmonth(Date)) |>
  group_by(Month) |>
  mutate(Count = sum(Count)) |>
  distinct(Month, Count) |>
  as_tsibble(index=Month)
autoplot(new_ped, Count)

Transforming the data helped discern the seasonality of the pedestrian counts, showing a sharp decline in January 2016.

new_ped |>
  features(Count, features = guerrero) |>
  pull(lambda_guerrero)

## [1] 0.132791

new_ped |> 
  autoplot(box_cox(Count, 0.132791))

Using the Box Cox Transformation does not appear to have stabilized the variance in the data.

7. Consider the last five years of the Gas data from `aus_production`.

gas <- tail(aus_production, 5*4) |> select(Gas)

a. Plot the time series. Can you identify seasonal fluctuations and/or a trend-cycle?

gas |>
  autoplot(Gas) +
  labs(x="Quarter", y = "Gas Production") +
  ggtitle("Quarterly Production of Gas in Australia") +
  theme(plot.title = element_text(hjust = 0.3))

There appears to be 12-month fluctuations in gas production, where gas production decreases in Q1 of every year and increases in the middle of each year.

b. Use classical_decomposition with type=multiplicative to calculate the trend-cycle and seasonal indices.

gas |>
  model(
    classical_decomposition(Gas, type = "multiplicative")
    ) |>
  components() |>
  autoplot() +
  labs(title = "Classical Multiplicative Decomposition of Gas Production in Australia \n in the Past 5 Years") +
  theme(plot.title = element_text(hjust = 0.5))

c. Do the results support the graphical interpretation from part a?

When analyzing the classical multiplicative decomposition of the gas production data, the seasonal component reflects the 12-month fluctuations illustrated in part a. However, the random and trend components does not have the same seasonal fluctuations.

d. Compute and plot the seasonally adjusted data.

gas |>
  model(
    STL(Gas ~ trend(window = 7) +
                   season(window = "periodic"),
    robust = TRUE)) |>
  components() |>
  autoplot()

e. Change one observation to be an outlier (e.g., add 300 to one observation), and recompute the seasonally adjusted data. What is the effect of the outlier?

# Changed observation in 2006 Q1
new_gas <- gas |>
  mutate(Gas = if_else(Gas == 171, 300, Gas))

kbl(head(new_gas, n = 10)) %>%
kable_styling(latex_options="scale_down", c("striped", "hover", "condensed", full_width=F))

Gas	Quarter
221	2005 Q3
180	2005 Q4
300	2006 Q1
224	2006 Q2
233	2006 Q3
192	2006 Q4
187	2007 Q1
234	2007 Q2
245	2007 Q3
205	2007 Q4

new_gas |>
  model(
    STL(Gas ~ trend(window = 7) +
                   season(window = "periodic"),
    robust = TRUE)) |>
  components() |>
  autoplot()

You can visibly locate the outlier in 2006 Q1 in the remainder and Gas components, while the trend line looks more straight and less smooth.

f. Does it make any difference if the outlier is near the end rather than in the middle of the time series?

# Changed 2007 Q4 and 2010 Q1
new_gas2 <- gas |>
  mutate(Gas = if_else(Gas == 205, 400, Gas))

kbl(head(new_gas2, n = 20)) %>%
kable_styling(latex_options="scale_down", c("striped", "hover", "condensed", full_width=F))

Gas	Quarter
221	2005 Q3
180	2005 Q4
171	2006 Q1
224	2006 Q2
233	2006 Q3
192	2006 Q4
187	2007 Q1
234	2007 Q2
245	2007 Q3
400	2007 Q4
194	2008 Q1
229	2008 Q2
249	2008 Q3
203	2008 Q4
196	2009 Q1
238	2009 Q2
252	2009 Q3
210	2009 Q4
400	2010 Q1
236	2010 Q2

new_gas2 |>
  model(
    STL(Gas ~ trend(window = 7) +
                   season(window = "periodic"),
    robust = TRUE)) |>
  components() |>
  autoplot()

It doesn’t make a difference in the remainder and Gas components, as the outliers stand out regardless of where it is located in the time series. However, it does appear that the line in the trend component becomes more straight and less smooth.

8. Recall your retail time series data (from Exercise 7 in Section 2.10). Decompose the series using X-11. Does it reveal any outliers, or unusual features that you had not noticed previously?

data("aus_retail")

set.seed(12345678)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

# original autoplot of data
myseries |>
  autoplot(Turnover)

x11_dcmp <- myseries |>
  model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) |>
  components()
autoplot(x11_dcmp) +
  labs(title =
    "Decomposition of Total Australian Retail Turnover using X-11") +
  theme(plot.title = element_text(hjust = 0.5))

The trend and Turnover highlights the outliers that occur after 1995, which is apparent in the autoplot as well. The irregular decomposition shows sharp increase and decline occurring around 2016-2017, which isn’t readily apparent in the autoplot of the Turnover retail data.

9. Figures 3.19 and 3.20 show the result of decomposing the number of persons in the civilian labour force in Australia each month from February 1978 to August 1995.

Figure 3.19

knitr::include_graphics("/Users/mohamedhassan/Downloads/figure_319.png")

Figure 3.20

knitr::include_graphics("/Users/mohamedhassan/Downloads/figure_320.png")

a. Write about 3–5 sentences describing the results of the decomposition. Pay particular attention to the scales of the graphs in making your interpretation.

The scale range of the value and trend decomposition are the same, from around 6500 to 9000. The scale range of the season_year component is wider, from 100 to -100, and 100 to -400. The season_year components repeats year to year, which may not be a realistic representation of the data. The negative numbers represented

b. Is the recession of 1991/1992 visible in the estimated components?

In the Value and Remainder decomposition components, the recession of 1991/1992 are visible, showing a distinct downturn in the Remainder component and a less sharp but noticeable downturn in the Value component. When examining the month by month breakdown of civilians in the labor force, May and July appear to be the only two months that shows a downturn in the civilian labor force during the 1991/1992 recession.

Data 624 Homework 2

Mohamed Hassan-El Serafi

2024-02-11

1. Consider the GDP information in `global_economy`. Plot the GDP per capita for each country over time. Which country has the highest GDP per capita? How has this changed over time?

2. For each of the following series, make a graph of the data. If transforming seems appropriate, do so and describe the effect.

3. Why is a Box-Cox transformation unhelpful for the `canadian_gas` data?

4. What Box-Cox transformation would you select for your retail data (from Exercise 7 in Section 2.10)?

5. For the following series, find an appropriate Box-Cox transformation in order to stabilise the variance. Tobacco from `aus_production`, Economy class passengers between Melbourne and Sydney from `ansett`, and Pedestrian counts at Southern Cross Station from `pedestrian`.

Tobacco From `aus_production`

Economy class passengers between Melbourne and Sydney from `ansett`

Pedestrian counts at Southern Cross Station from `pedestrian`

7. Consider the last five years of the Gas data from `aus_production`.

a. Plot the time series. Can you identify seasonal fluctuations and/or a trend-cycle?

b. Use classical_decomposition with type=multiplicative to calculate the trend-cycle and seasonal indices.

c. Do the results support the graphical interpretation from part a?

d. Compute and plot the seasonally adjusted data.

e. Change one observation to be an outlier (e.g., add 300 to one observation), and recompute the seasonally adjusted data. What is the effect of the outlier?

f. Does it make any difference if the outlier is near the end rather than in the middle of the time series?

8. Recall your retail time series data (from Exercise 7 in Section 2.10). Decompose the series using X-11. Does it reveal any outliers, or unusual features that you had not noticed previously?

9. Figures 3.19 and 3.20 show the result of decomposing the number of persons in the civilian labour force in Australia each month from February 1978 to August 1995.

a. Write about 3–5 sentences describing the results of the decomposition. Pay particular attention to the scales of the graphs in making your interpretation.

b. Is the recession of 1991/1992 visible in the estimated components?

Data 624 Homework 2

Mohamed Hassan-El Serafi

2024-02-11

1. Consider the GDP information in global_economy. Plot the GDP per capita for each country over time. Which country has the highest GDP per capita? How has this changed over time?

2. For each of the following series, make a graph of the data. If transforming seems appropriate, do so and describe the effect.

3. Why is a Box-Cox transformation unhelpful for the canadian_gas data?

4. What Box-Cox transformation would you select for your retail data (from Exercise 7 in Section 2.10)?

5. For the following series, find an appropriate Box-Cox transformation in order to stabilise the variance. Tobacco from aus_production, Economy class passengers between Melbourne and Sydney from ansett, and Pedestrian counts at Southern Cross Station from pedestrian.

Tobacco From aus_production

Economy class passengers between Melbourne and Sydney from ansett

Pedestrian counts at Southern Cross Station from pedestrian

7. Consider the last five years of the Gas data from aus_production.

a. Plot the time series. Can you identify seasonal fluctuations and/or a trend-cycle?

b. Use classical_decomposition with type=multiplicative to calculate the trend-cycle and seasonal indices.

c. Do the results support the graphical interpretation from part a?

d. Compute and plot the seasonally adjusted data.

e. Change one observation to be an outlier (e.g., add 300 to one observation), and recompute the seasonally adjusted data. What is the effect of the outlier?

f. Does it make any difference if the outlier is near the end rather than in the middle of the time series?

8. Recall your retail time series data (from Exercise 7 in Section 2.10). Decompose the series using X-11. Does it reveal any outliers, or unusual features that you had not noticed previously?

9. Figures 3.19 and 3.20 show the result of decomposing the number of persons in the civilian labour force in Australia each month from February 1978 to August 1995.

a. Write about 3–5 sentences describing the results of the decomposition. Pay particular attention to the scales of the graphs in making your interpretation.

b. Is the recession of 1991/1992 visible in the estimated components?

1. Consider the GDP information in `global_economy`. Plot the GDP per capita for each country over time. Which country has the highest GDP per capita? How has this changed over time?

3. Why is a Box-Cox transformation unhelpful for the `canadian_gas` data?

5. For the following series, find an appropriate Box-Cox transformation in order to stabilise the variance. Tobacco from `aus_production`, Economy class passengers between Melbourne and Sydney from `ansett`, and Pedestrian counts at Southern Cross Station from `pedestrian`.

Tobacco From `aus_production`

Economy class passengers between Melbourne and Sydney from `ansett`

Pedestrian counts at Southern Cross Station from `pedestrian`

7. Consider the last five years of the Gas data from `aus_production`.