Week 2 HW Problems

Exercises 3.1, 3.2, 3.3, 3.4, 3.5, 3.7, 3.8 and 3.9 from the online Hyndman book

library("fma")
library("xts")
library("ggplot2")
library("forecast")
library("fpp3")
library('lubridate')
library('tsibble')
library('readr')
library('readxl')

3.1

Consider the GDP information in global_economy. Plot the GDP per capita for each country over time. Which country has the highest GDP per capita? How has this changed over time?

global_economy %>% autoplot(GDP/Population,show.legend=FALSE)

mutate_global_economy = global_economy %>% mutate(GDP_per_capita = GDP/Population)

mutate_global_economy %>% filter(GDP_per_capita >100000) %>%
  autoplot(GDP_per_capita) + 
  labs(title = "GDP per capita", y="US Dollars")

arrange(mutate_global_economy, desc(GDP_per_capita))
## # A tsibble: 15,150 x 10 [1Y]
## # Key:       Country [263]
##    Country       Code   Year    GDP Growth   CPI Imports Exports Popul…¹ GDP_p…²
##    <fct>         <fct> <dbl>  <dbl>  <dbl> <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
##  1 Monaco        MCO    2014 7.06e9  7.18     NA      NA      NA   38132 185153.
##  2 Monaco        MCO    2008 6.48e9  0.732    NA      NA      NA   35853 180640.
##  3 Liechtenstein LIE    2014 6.66e9 NA        NA      NA      NA   37127 179308.
##  4 Liechtenstein LIE    2013 6.39e9 NA        NA      NA      NA   36834 173528.
##  5 Monaco        MCO    2013 6.55e9  9.57     NA      NA      NA   37971 172589.
##  6 Monaco        MCO    2016 6.47e9  3.21     NA      NA      NA   38499 168011.
##  7 Liechtenstein LIE    2015 6.27e9 NA        NA      NA      NA   37403 167591.
##  8 Monaco        MCO    2007 5.87e9 14.4      NA      NA      NA   35111 167125.
##  9 Liechtenstein LIE    2016 6.21e9 NA        NA      NA      NA   37666 164993.
## 10 Monaco        MCO    2015 6.26e9  4.94     NA      NA      NA   38307 163369.
## # … with 15,140 more rows, and abbreviated variable names ¹​Population,
## #   ²​GDP_per_capita

3.2

For each of the following series, make a graph of the data. If transforming seems appropriate, do so and describe the effect.

  • United States GDP from global_economy.
  • Slaughter of Victorian “Bulls, bullocks and steers” in aus_livestock.
  • Victorian Electricity Demand from vic_elec.
  • Gas production from aus_production.

United States GDP from global_economy.

global_economy %>% filter(Country== "United States") %>%
  autoplot() +
    labs(title= "United States GDP",
       y = "USD")

Slaughter of Victorian “Bulls, bullocks and steers” in aus_livestock.

aus_livestock %>% filter(Animal == "Bulls, bullocks and steers", State == "Victoria") %>%
  autoplot() +
    labs(title= "Bulls, bullocks and steers"
       )

Victorian Electricity Demand from vic_elec.

head(vic_elec)
## # A tsibble: 6 x 5 [30m] <Australia/Melbourne>
##   Time                Demand Temperature Date       Holiday
##   <dttm>               <dbl>       <dbl> <date>     <lgl>  
## 1 2012-01-01 00:00:00  4383.        21.4 2012-01-01 TRUE   
## 2 2012-01-01 00:30:00  4263.        21.0 2012-01-01 TRUE   
## 3 2012-01-01 01:00:00  4049.        20.7 2012-01-01 TRUE   
## 4 2012-01-01 01:30:00  3878.        20.6 2012-01-01 TRUE   
## 5 2012-01-01 02:00:00  4036.        20.4 2012-01-01 TRUE   
## 6 2012-01-01 02:30:00  3866.        20.2 2012-01-01 TRUE
autoplot(vic_elec) + 
  labs(title = "30 Min Electricity Demand")

Gas production from aus_production

head(aus_production)
## # A tsibble: 6 x 7 [1Q]
##   Quarter  Beer Tobacco Bricks Cement Electricity   Gas
##     <qtr> <dbl>   <dbl>  <dbl>  <dbl>       <dbl> <dbl>
## 1 1956 Q1   284    5225    189    465        3923     5
## 2 1956 Q2   213    5178    204    532        4436     6
## 3 1956 Q3   227    5297    208    561        4806     7
## 4 1956 Q4   308    5681    197    570        4418     6
## 5 1957 Q1   262    5577    187    529        4339     5
## 6 1957 Q2   228    5651    214    604        4811     7
autoplot(aus_production, Gas) + 
  labs(title = "Australian Gas Production")

### 3.3 Why is a Box-Cox transformation unhelpful for the canadian_gas data?

head(canadian_gas)
## # A tsibble: 6 x 2 [1M]
##      Month Volume
##      <mth>  <dbl>
## 1 1960 Jan   1.43
## 2 1960 Feb   1.31
## 3 1960 Mar   1.40
## 4 1960 Apr   1.17
## 5 1960 May   1.12
## 6 1960 Jun   1.01
head(canadian_gas)
## # A tsibble: 6 x 2 [1M]
##      Month Volume
##      <mth>  <dbl>
## 1 1960 Jan   1.43
## 2 1960 Feb   1.31
## 3 1960 Mar   1.40
## 4 1960 Apr   1.17
## 5 1960 May   1.12
## 6 1960 Jun   1.01

non_box_cox

non_box_cox = autoplot(canadian_gas)
non_box_cox

3.5

For the following series, find an appropriate Box-Cox transformation in order to stabilise the variance. Tobacco from aus_production, Economy class passengers between Melbourne and Sydney from ansett, and Pedestrian counts at Southern Cross Station from pedestrian.

Tobacco from aus_production

head(aus_production)
## # A tsibble: 6 x 7 [1Q]
##   Quarter  Beer Tobacco Bricks Cement Electricity   Gas
##     <qtr> <dbl>   <dbl>  <dbl>  <dbl>       <dbl> <dbl>
## 1 1956 Q1   284    5225    189    465        3923     5
## 2 1956 Q2   213    5178    204    532        4436     6
## 3 1956 Q3   227    5297    208    561        4806     7
## 4 1956 Q4   308    5681    197    570        4418     6
## 5 1957 Q1   262    5577    187    529        4339     5
## 6 1957 Q2   228    5651    214    604        4811     7
autoplot(aus_production, (Tobacco)) +
  labs(title = "Tobacco Production")

aus_tobacco = aus_production %>% 
  select(Quarter, Tobacco)

lambda <- aus_tobacco %>%
  features(Tobacco, features = guerrero) %>%
  pull(lambda_guerrero)
aus_tobacco %>%
  autoplot(box_cox(Tobacco, lambda)) +
  labs(y = "",
       title = "Transformed Tobacco Production ",
         round(lambda,2))

No substantive transformation resulting from the Box-Cox transformation

Economy class passengers between Melbourne and Sydney from ansett

head(ansett)
## # A tsibble: 6 x 4 [1W]
## # Key:       Airports, Class [1]
##       Week Airports Class    Passengers
##     <week> <chr>    <chr>         <dbl>
## 1 1989 W28 ADL-PER  Business        193
## 2 1989 W29 ADL-PER  Business        254
## 3 1989 W30 ADL-PER  Business        185
## 4 1989 W31 ADL-PER  Business        254
## 5 1989 W32 ADL-PER  Business        191
## 6 1989 W33 ADL-PER  Business        136
eco_Mel_syd = ansett %>% filter(Airports == "MEL-SYD", Class == "Economy") 
autoplot(eco_Mel_syd) + 
  labs(title = "Weekly Economy Class Passengers. Airport: Melbourne and Sydeny")

lmda = eco_Mel_syd %>% 
  features(Passengers, features = guerrero) %>% 
  pull(lambda_guerrero)

eco_Mel_syd %>% autoplot(box_cox(Passengers, lmda)) 

Pedestrian counts at Southern Cross Station from pedestrian.

head(pedestrian)
## # A tsibble: 6 x 5 [1h] <Australia/Melbourne>
## # Key:       Sensor [1]
##   Sensor         Date_Time           Date        Time Count
##   <chr>          <dttm>              <date>     <int> <int>
## 1 Birrarung Marr 2015-01-01 00:00:00 2015-01-01     0  1630
## 2 Birrarung Marr 2015-01-01 01:00:00 2015-01-01     1   826
## 3 Birrarung Marr 2015-01-01 02:00:00 2015-01-01     2   567
## 4 Birrarung Marr 2015-01-01 03:00:00 2015-01-01     3   264
## 5 Birrarung Marr 2015-01-01 04:00:00 2015-01-01     4   139
## 6 Birrarung Marr 2015-01-01 05:00:00 2015-01-01     5    77
scs_pedestrian <- pedestrian %>%
  filter(Sensor == "Southern Cross Station")
autoplot(scs_pedestrian, Count) +
  labs(y = "Pedestrian Count",
         title = "Hourly Pedestrian counts @ Southern Cross Station")

lambda_scs <- scs_pedestrian %>%
  features(Count, features = guerrero) %>%
  pull(lambda_guerrero)

#plot
scs_pedestrian %>%
  autoplot(box_cox(Count, lambda_scs)) +
  labs(y = "Transformed Pedestrian Counts",
       title = ("Hourly Pedestrian counts at Southern Cross Station"))

3.9

  1. Write about 3–5 sentences describing the results of the decomposition. Pay particular attention to the scales of the graphs in making your interpretation.
  • The trend plot has direct impact on value subplot. The value plot is increasting as we go right just like the trend plot
  • The season_year plot does show a regular pattern
  • The remainder plot does make some impact on the value subplot which is notable for years around 1991-1992
  1. Is the recession of 1991/1992 visible in the estimated components?
    Yes, the recession is visible in the estimated components.