Data624 Time Series Decomposition

# install.packages('ggplot2')
# install.packages('tsibbledata')
# install.packages('tsibble')
# install.packages('dplyr')
# install.packages('ggfortify')
# install.packages('feasts')


library(ggplot2)
library(tsibble)

## 
## Attaching package: 'tsibble'

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggfortify)
library(tidyr)
library(feasts)

## Loading required package: fabletools

gafa_stock <- tsibbledata::gafa_stock

3.1

Consider the GDP information in global_economy. Plot the GDP per capita for each country over time. Which country has the highest GDP per capita? How has this changed over time?

global_economy <- tsibbledata::global_economy

head(global_economy, n = 6)

## # A tsibble: 6 x 9 [1Y]
## # Key:       Country [1]
##   Country     Code   Year         GDP Growth   CPI Imports Exports Population
##   <fct>       <fct> <dbl>       <dbl>  <dbl> <dbl>   <dbl>   <dbl>      <dbl>
## 1 Afghanistan AFG    1960  537777811.     NA    NA    7.02    4.13    8996351
## 2 Afghanistan AFG    1961  548888896.     NA    NA    8.10    4.45    9166764
## 3 Afghanistan AFG    1962  546666678.     NA    NA    9.35    4.88    9345868
## 4 Afghanistan AFG    1963  751111191.     NA    NA   16.9     9.17    9533954
## 5 Afghanistan AFG    1964  800000044.     NA    NA   18.1     8.89    9731361
## 6 Afghanistan AFG    1965 1006666638.     NA    NA   21.4    11.3     9938414

# 
top_economies <- global_economy %>% filter(Year == 2017) %>% group_by(Country) %>% mutate(GDP_per_Capita = GDP / Population) %>% arrange(desc(GDP_per_Capita))
top_economies

## # A tsibble: 262 x 10 [1Y]
## # Key:       Country [262]
## # Groups:    Country [262]
##    Country          Code   Year     GDP Growth   CPI Imports Exports Population
##    <fct>            <fct> <dbl>   <dbl>  <dbl> <dbl>   <dbl>   <dbl>      <dbl>
##  1 Luxembourg       LUX    2017 6.24e10   2.30 111.    194.    230.      599449
##  2 Macao SAR, China MAC    2017 5.04e10   9.10 136.     32.0    79.4     622567
##  3 Switzerland      CHE    2017 6.79e11   1.09  98.3    53.9    65.0    8466017
##  4 Norway           NOR    2017 3.99e11   1.92 115.     33.1    35.5    5282223
##  5 Iceland          ISL    2017 2.39e10   3.64 122.     42.8    47.0     341284
##  6 Ireland          IRL    2017 3.34e11   7.80 105.     87.9   120.     4813608
##  7 Qatar            QAT    2017 1.67e11   1.58 116.     37.3    51.0    2639211
##  8 United States    USA    2017 1.94e13   2.27 112.     NA      NA    325719178
##  9 North America    NAC    2017 2.10e13   2.35  NA      NA      NA    362492702
## 10 Singapore        SGP    2017 3.24e11   3.62 113.    149.    173.     5612253
## # … with 252 more rows, and 1 more variable: GDP_per_Capita <dbl>

# 
# Luxembourg has the highest GDP per capita

# global_economy <- semi_join(global_economy, top_economies, by = "GDP")

global_economy_shortlist <- global_economy %>% filter(Country == "United States" | Country == "Japan")

global_economy_shortlist %>% autoplot(GDP/ Population)

3.2

For each of the following series, make a graph of the data. If transforming seems appropriate, do so and describe the effect.

United States GDP from global_economy Slaughter of Victorian “Bulls, bullocks and steers” in aus_livestock Victorian Electricity Demand from vic_elec Gas production from aus_production

global_economy %>%
  filter(Country == "United States") %>%
  autoplot(GDP)

global_economy %>%
  filter(Country == "United States") %>%
  autoplot(log(GDP))

global_economy %>%
  filter(Country == "United States") %>%
  autoplot(GDP / Population)

aus_livestock <- tsibbledata::aus_livestock

unique(aus_livestock$Animal)

## [1] Bulls, bullocks and steers Calves                    
## [3] Cattle (excl. calves)      Cows and heifers          
## [5] Lambs                      Pigs                      
## [7] Sheep                     
## 7 Levels: Bulls, bullocks and steers Calves ... Sheep

aus_livestock %>%
  filter(Animal == "Bulls, bullocks and steers") %>%
  filter(State == "Victoria") %>%
  autoplot(Count)

vic_elec <- tsibbledata::vic_elec

vic_elec %>%
  autoplot(Demand)

aus_production <- tsibbledata::aus_production

aus_production %>% autoplot(Gas)

# this time series data is heteroscedastic meaning the variance of y is changing along the x-axis. We could try a box-cox transformation to stabilize this. 

# guerrero method for boxcox transformation

lambda <- aus_production %>% features(Gas, features = guerrero) %>% pull(lambda_guerrero)

aus_production %>%
  autoplot(box_cox(Gas, lambda))

3.3 Why is a Box-Cox transformation unhelpful for the canadian_gas data?

# we try a boxcox transformation but it's not successful in reducing heteroscedasticity / the variance is just as unequal as before. Since the boxcox transformation wasn't helpful (visually, as a heuristic)

3.4 What boxcox transformation would you select for your retail data? Exerise 8 in section 2.10

aus_retail <- tsibbledata::aus_retail

lambda <- aus_retail %>%
  features(Turnover, features = guerrero) %>%
  pull(lambda_guerrero)

3.5

For the following series, find an appropriate Box-Cox transformation in order to stablise the variance. Tobacco from aus_production, Economy class passengers between Melbourne and Sydney from ansett, and Pedestrian counts at Southern Cross Station from pedestrian.

aus_production %>% autoplot(Tobacco)

## Warning: Removed 24 row(s) containing missing values (geom_path).

lambda <- aus_production %>%
  features(Tobacco, features = guerrero) %>% pull(lambda_guerrero)

# lambda value is 0.9289

aus_production %>% autoplot(box_cox(Tobacco, lambda))

## Warning: Removed 24 row(s) containing missing values (geom_path).

ansett <- tsibbledata::ansett

melbourne <- ansett %>%
  filter(Class == "Economy", Airports == "MEL-SYD") 
  
melbourne %>%  
  autoplot(Passengers)

lambda <- melbourne %>%
  features(Passengers, features = guerrero) %>%
  pull(lambda_guerrero)

# lambda value is 1.999

melbourne %>% autoplot(box_cox(Passengers, lambda))

southern_cross <- pedestrian %>%
  filter(Sensor == "Southern Cross Station") %>%
  group_by(Sensor) %>%
  index_by(Date = yearweek(Date_Time)) %>%
  summarise(Count = sum(Count))

southern_cross %>% autoplot(Count)

lambda <- southern_cross %>%
  features(Count, features = guerrero) %>%
  pull(lambda_guerrero)

southern_cross %>% autoplot(box_cox(Count, lambda))

3.7 Consider the last five years of the Gas data from aus_production a. Plot the time series. Can you identify seasonal fluctuations and/or a trend-cycle? b. Use classical_decomposition with type=multiplicative to calculate the trend-cycle and seasonal indices. c. Do the results support the graphical interpretation from part a? d. Compute and plot the seasonally adjusted data. e. Change one observation to be an outlier (e.g., add 300 to one observation), and recompute the seasonally adjusted data. What is the effect of the outlier? f. Does it make any difference if the outlier is near the end rather than in the middle of the time series?

unique(aus_production$Quarter)

## <yearquarter[218]>
##   [1] "1956 Q1" "1956 Q2" "1956 Q3" "1956 Q4" "1957 Q1" "1957 Q2" "1957 Q3"
##   [8] "1957 Q4" "1958 Q1" "1958 Q2" "1958 Q3" "1958 Q4" "1959 Q1" "1959 Q2"
##  [15] "1959 Q3" "1959 Q4" "1960 Q1" "1960 Q2" "1960 Q3" "1960 Q4" "1961 Q1"
##  [22] "1961 Q2" "1961 Q3" "1961 Q4" "1962 Q1" "1962 Q2" "1962 Q3" "1962 Q4"
##  [29] "1963 Q1" "1963 Q2" "1963 Q3" "1963 Q4" "1964 Q1" "1964 Q2" "1964 Q3"
##  [36] "1964 Q4" "1965 Q1" "1965 Q2" "1965 Q3" "1965 Q4" "1966 Q1" "1966 Q2"
##  [43] "1966 Q3" "1966 Q4" "1967 Q1" "1967 Q2" "1967 Q3" "1967 Q4" "1968 Q1"
##  [50] "1968 Q2" "1968 Q3" "1968 Q4" "1969 Q1" "1969 Q2" "1969 Q3" "1969 Q4"
##  [57] "1970 Q1" "1970 Q2" "1970 Q3" "1970 Q4" "1971 Q1" "1971 Q2" "1971 Q3"
##  [64] "1971 Q4" "1972 Q1" "1972 Q2" "1972 Q3" "1972 Q4" "1973 Q1" "1973 Q2"
##  [71] "1973 Q3" "1973 Q4" "1974 Q1" "1974 Q2" "1974 Q3" "1974 Q4" "1975 Q1"
##  [78] "1975 Q2" "1975 Q3" "1975 Q4" "1976 Q1" "1976 Q2" "1976 Q3" "1976 Q4"
##  [85] "1977 Q1" "1977 Q2" "1977 Q3" "1977 Q4" "1978 Q1" "1978 Q2" "1978 Q3"
##  [92] "1978 Q4" "1979 Q1" "1979 Q2" "1979 Q3" "1979 Q4" "1980 Q1" "1980 Q2"
##  [99] "1980 Q3" "1980 Q4" "1981 Q1" "1981 Q2" "1981 Q3" "1981 Q4" "1982 Q1"
## [106] "1982 Q2" "1982 Q3" "1982 Q4" "1983 Q1" "1983 Q2" "1983 Q3" "1983 Q4"
## [113] "1984 Q1" "1984 Q2" "1984 Q3" "1984 Q4" "1985 Q1" "1985 Q2" "1985 Q3"
## [120] "1985 Q4" "1986 Q1" "1986 Q2" "1986 Q3" "1986 Q4" "1987 Q1" "1987 Q2"
## [127] "1987 Q3" "1987 Q4" "1988 Q1" "1988 Q2" "1988 Q3" "1988 Q4" "1989 Q1"
## [134] "1989 Q2" "1989 Q3" "1989 Q4" "1990 Q1" "1990 Q2" "1990 Q3" "1990 Q4"
## [141] "1991 Q1" "1991 Q2" "1991 Q3" "1991 Q4" "1992 Q1" "1992 Q2" "1992 Q3"
## [148] "1992 Q4" "1993 Q1" "1993 Q2" "1993 Q3" "1993 Q4" "1994 Q1" "1994 Q2"
## [155] "1994 Q3" "1994 Q4" "1995 Q1" "1995 Q2" "1995 Q3" "1995 Q4" "1996 Q1"
## [162] "1996 Q2" "1996 Q3" "1996 Q4" "1997 Q1" "1997 Q2" "1997 Q3" "1997 Q4"
## [169] "1998 Q1" "1998 Q2" "1998 Q3" "1998 Q4" "1999 Q1" "1999 Q2" "1999 Q3"
## [176] "1999 Q4" "2000 Q1" "2000 Q2" "2000 Q3" "2000 Q4" "2001 Q1" "2001 Q2"
## [183] "2001 Q3" "2001 Q4" "2002 Q1" "2002 Q2" "2002 Q3" "2002 Q4" "2003 Q1"
## [190] "2003 Q2" "2003 Q3" "2003 Q4" "2004 Q1" "2004 Q2" "2004 Q3" "2004 Q4"
## [197] "2005 Q1" "2005 Q2" "2005 Q3" "2005 Q4" "2006 Q1" "2006 Q2" "2006 Q3"
## [204] "2006 Q4" "2007 Q1" "2007 Q2" "2007 Q3" "2007 Q4" "2008 Q1" "2008 Q2"
## [211] "2008 Q3" "2008 Q4" "2009 Q1" "2009 Q2" "2009 Q3" "2009 Q4" "2010 Q1"
## [218] "2010 Q2"
## # Year starts on: January

last_five_years <- tail(aus_production, 20) # five yars four  quarters
last_five_years %>% autoplot(Gas)

last_five_years %>%
  model(classical_decomposition(Gas, type = 'multiplicative')) %>%
  components() %>% autoplot()

## Warning: Removed 2 row(s) containing missing values (geom_path).

3.9 Figures 3.19 and 3.20 show the result of decomposing the number of persons in the civilian labour force in Australia each month from February 1978 to August 1995.

Write about 3-5 sentences describing the results of the decomposition. Pay attention to the cales of the graphs. Is the recession of 91/92 visible in the estimated components?

unique(aus_retail$State)

## [1] "Australian Capital Territory" "New South Wales"             
## [3] "Northern Territory"           "Queensland"                  
## [5] "South Australia"              "Tasmania"                    
## [7] "Victoria"                     "Western Australia"

aus_retail %>%
  model(STL(Turnover ~ trend(window = 7) + season(window = 'periodic'), robust = TRUE)) %>%
  components() %>% autoplot()

# There is definitely an increase in the number of people who are working in the Australian retail sector. We know this because we see a clear upward trend in the line. The recessions of 91 and 92 are a littl bit apparent in the value section, which is trend+season+remainder. These recessions are super clear in the remainder section. It should also be clear in a seasonally adjusted graph also.

Data624 Time Series Decomposition

Josef

9/19/2021