library(dplyr)
library(ggplot2)
library(tidyr)
library(tibble)
library(tsibble)
library(ggfortify)
library(tidyverse)
library(fpp3)
library(moments)
library(zoo)
library(fable)
library(sos)
library(slider)
library(seasonal)
library(x13binary)
library(readxl)

Questions

Exercise 1

head(global_economy)
## # A tsibble: 6 x 9 [1Y]
## # Key:       Country [1]
##   Country     Code   Year         GDP Growth   CPI Imports Exports Population
##   <fct>       <fct> <dbl>       <dbl>  <dbl> <dbl>   <dbl>   <dbl>      <dbl>
## 1 Afghanistan AFG    1960  537777811.     NA    NA    7.02    4.13    8996351
## 2 Afghanistan AFG    1961  548888896.     NA    NA    8.10    4.45    9166764
## 3 Afghanistan AFG    1962  546666678.     NA    NA    9.35    4.88    9345868
## 4 Afghanistan AFG    1963  751111191.     NA    NA   16.9     9.17    9533954
## 5 Afghanistan AFG    1964  800000044.     NA    NA   18.1     8.89    9731361
## 6 Afghanistan AFG    1965 1006666638.     NA    NA   21.4    11.3     9938414
global_economy %>%
  filter(Country == "United States") %>%
  autoplot(GDP/Population)

#A plot of GDP per capita for the United States. Adjusting for population gives a more accurate depiction of how an economy is actually performing.

Aus1 <- aus_livestock %>%
  filter(Animal == "Bulls, bullocks and steers")%>%
  index_by(Year = year(Month))
head(Aus1)
## # A tsibble: 6 x 5 [1M]
## # Key:       Animal, State [1]
## # Groups:    @ Year [1]
##      Month Animal                     State                        Count  Year
##      <mth> <fct>                      <fct>                        <dbl> <dbl>
## 1 1976 Jul Bulls, bullocks and steers Australian Capital Territory  2300  1976
## 2 1976 Aug Bulls, bullocks and steers Australian Capital Territory  2100  1976
## 3 1976 Sep Bulls, bullocks and steers Australian Capital Territory  2100  1976
## 4 1976 Oct Bulls, bullocks and steers Australian Capital Territory  1900  1976
## 5 1976 Nov Bulls, bullocks and steers Australian Capital Territory  2100  1976
## 6 1976 Dec Bulls, bullocks and steers Australian Capital Territory  1800  1976
Aus1 %>%
  filter(State == "Victoria")%>%
  autoplot(Count)
## `mutate_if()` ignored the following grouping variables:
## * Column `Year`

#There is a very clear decreasing trend of Bulls, bullocks and steers in Victoria as the plot shows. No transformation needed.

Vic.Elec <- vic_elec %>% 
  filter(grepl("2013", Time))
Vic.Elec %>%
  features(Demand, features = guerrero)
## # A tibble: 1 x 1
##   lambda_guerrero
##             <dbl>
## 1           0.109
#.109
Vic.Elec %>%
  autoplot(box_cox(Demand, 0.0109))+ labs(y = "Box-Cox transformation")

# The variability is a little better than it was before and the data is much easier on the eyes when only a single year is being looked at and a box-cox transformation is used.

aus_production %>%
  autoplot(Gas)

BCaus <- aus_production %>%
  features(Gas, features = guerrero)
#0.121
aus_production %>%
  autoplot(box_cox(Gas, 0.121)) + labs(y = "Box-Cox transformation")

#The variance is much more constant now that a box-cox transformation has been used. 

Exercise 2

autoplot(canadian_gas)
## Plot variable not specified, automatically selected `.vars = Volume`

CangasBC <- canadian_gas %>% 
  features(Volume, features = guerrero)
CangasBC
## # A tibble: 1 x 1
##   lambda_guerrero
##             <dbl>
## 1           0.392
#0.392
canadian_gas %>%
  autoplot(box_cox(Volume, .392)) + labs(y= "Box-Cox transformation")

# It looks very similar as it did prior to the box-cox transformation. The variance becomes a bit more constant, but there wasn't a lot of heteroskedasticity to begin with. 

Exercise 3

autoplot(aus_production, Tobacco)
## Warning: Removed 24 row(s) containing missing values (geom_path).

TobaccoBC <- aus_production %>%
  features(Tobacco, features= guerrero)
TobaccoBC
## # A tibble: 1 x 1
##   lambda_guerrero
##             <dbl>
## 1           0.929
aus_production %>%
  autoplot(box_cox(Tobacco, .929)) + labs(y = "Box-Cox transformation")
## Warning: Removed 24 row(s) containing missing values (geom_path).

The lambda value the Guerrero feature gives us is really close to 1, therefore it is not going to change the plot dramatically. The variance is relatively constant to begin with.

autoplot(ansett, Passengers)

#That is a mess
Melb_Syd <- ansett %>%
  filter(Airports == "MEL-SYD")
Melb_Syd2 <- Melb_Syd %>%
  filter(Class == "Economy")
autoplot(Melb_Syd2)
## Plot variable not specified, automatically selected `.vars = Passengers`

Melb_Syd2 %>%
  features(Passengers, features= guerrero)
## # A tibble: 1 x 3
##   Airports Class   lambda_guerrero
##   <chr>    <chr>             <dbl>
## 1 MEL-SYD  Economy            2.00
#Lambda = 2
Melb_Syd2 %>%
  autoplot(box_cox(Passengers, 2.00)) + labs(y= "Box-Cox transformation")

autoplot(pedestrian, Count)

Ped2 <- pedestrian %>%
  filter(Sensor == "Southern Cross Station")
autoplot(Ped2, Count)

Ped2 %>% 
  features(Count, features = guerrero)
## # A tibble: 1 x 2
##   Sensor                 lambda_guerrero
##   <chr>                            <dbl>
## 1 Southern Cross Station          -0.226
#Lambda = -.226
Ped2 %>% 
  autoplot(box_cox(Count, -.226)) + labs(y= "Box-Cox transformation") 

Exercise 4

movinga <- readxl::read_excel("C:\\Users\\Zacha\\OneDrive\\Documents\\Movingav.xlsx")
y_ma5 <- movinga %>%
  rollmean(num, k = 5, fill = NA)
y_ma3 <- y_ma5 %>%
  rollmean(num, k = 3, fill = NA)
y_ma3
##      num
## [1,]  NA
## [2,]  NA
## [3,]  NA
## [4,]  NA
## [5,]  NA
## [6,]  NA
z <- c(0.067, 0.133, 0.2, 0.2, 0.2, 0.133, 0.067)
z[1] = 0.067*(1/15)
z[2] = 0.133*(2/15)
z[3] = 0.200*(3/15)
z[4] = 0.200*(3/15)
z[5] = 0.200*(3/15)
z[6] = 0.133*(2/15)
z[7] = 0.067*(1/15)
z_sum <- sum(z)
z_sum
## [1] 0.1644

Exercise 5

gas <- tail(aus_production, 5*4) %>% select(Gas)
head(gas)
## # A tsibble: 6 x 2 [1Q]
##     Gas Quarter
##   <dbl>   <qtr>
## 1   221 2005 Q3
## 2   180 2005 Q4
## 3   171 2006 Q1
## 4   224 2006 Q2
## 5   233 2006 Q3
## 6   192 2006 Q4
autoplot(gas)
## Plot variable not specified, automatically selected `.vars = Gas`

It definitely has a positive trend there is a consistent increase from year to year. There appears to be a spike halfway through each year which implies a quarterly seasonality.

gas2 <- gas %>%
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components() %>%
  autoplot() + 
  labs(title = "Classical Decomposition")
gas2
## Warning: Removed 2 row(s) containing missing values (geom_path).

There is clearly an upward trend and the seasonality spikes in one spot each time showing our quarterly seasonality. I would say the Classical Decomposition certainly supports the interpretation from a. autoplot(gas)

seasonal <- as_tsibble(gas) %>%
  model(classical_decomposition(Gas ~ season(4), type = "mult")) %>%
  components()
seasonal
## # A dable: 20 x 7 [1Q]
## # Key:     .model [1]
## # :        Gas = trend * seasonal * random
##    .model                             Quarter   Gas trend seaso~1 random seaso~2
##    <chr>                                <qtr> <dbl> <dbl>   <dbl>  <dbl>   <dbl>
##  1 "classical_decomposition(Gas ~ se~ 2005 Q3   221   NA    1.13  NA        196.
##  2 "classical_decomposition(Gas ~ se~ 2005 Q4   180   NA    0.925 NA        195.
##  3 "classical_decomposition(Gas ~ se~ 2006 Q1   171  200.   0.875  0.974    195.
##  4 "classical_decomposition(Gas ~ se~ 2006 Q2   224  204.   1.07   1.02     209.
##  5 "classical_decomposition(Gas ~ se~ 2006 Q3   233  207    1.13   1.00     207.
##  6 "classical_decomposition(Gas ~ se~ 2006 Q4   192  210.   0.925  0.987    208.
##  7 "classical_decomposition(Gas ~ se~ 2007 Q1   187  213    0.875  1.00     214.
##  8 "classical_decomposition(Gas ~ se~ 2007 Q2   234  216.   1.07   1.01     218.
##  9 "classical_decomposition(Gas ~ se~ 2007 Q3   245  219.   1.13   0.996    218.
## 10 "classical_decomposition(Gas ~ se~ 2007 Q4   205  219.   0.925  1.01     222.
## 11 "classical_decomposition(Gas ~ se~ 2008 Q1   194  219.   0.875  1.01     222.
## 12 "classical_decomposition(Gas ~ se~ 2008 Q2   229  219    1.07   0.974    213.
## 13 "classical_decomposition(Gas ~ se~ 2008 Q3   249  219    1.13   1.01     221.
## 14 "classical_decomposition(Gas ~ se~ 2008 Q4   203  220.   0.925  0.996    219.
## 15 "classical_decomposition(Gas ~ se~ 2009 Q1   196  222.   0.875  1.01     224.
## 16 "classical_decomposition(Gas ~ se~ 2009 Q2   238  223.   1.07   0.993    222.
## 17 "classical_decomposition(Gas ~ se~ 2009 Q3   252  225.   1.13   0.994    224.
## 18 "classical_decomposition(Gas ~ se~ 2009 Q4   210  226    0.925  1.00     227.
## 19 "classical_decomposition(Gas ~ se~ 2010 Q1   205   NA    0.875 NA        234.
## 20 "classical_decomposition(Gas ~ se~ 2010 Q2   236   NA    1.07  NA        220.
## # ... with abbreviated variable names 1: seasonal, 2: season_adjust
seasonal %>%
  ggplot(aes(x = Quarter)) +
  geom_line(aes(y = Gas, color = "Data")) +
  geom_line(aes(y = season_adjust,
                colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, color = "Trend")) +
  labs(y = "Gas",
       title = "Gas Production") +
  scale_color_manual(
    values = c("gray", "#0072B2", "#D55E00"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )
## Warning: Removed 4 row(s) containing missing values (geom_path).

outlier <- gas
  outlier[1,1]<- gas[1,1] + 300 
head(outlier)
## # A tsibble: 6 x 2 [1Q]
##     Gas Quarter
##   <dbl>   <qtr>
## 1   521 2005 Q3
## 2   180 2005 Q4
## 3   171 2006 Q1
## 4   224 2006 Q2
## 5   233 2006 Q3
## 6   192 2006 Q4
outlierCD <- outlier %>%
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components() %>%
  autoplot() + 
  labs(title = "Classical Decomposition with outlier")
outlierCD
## Warning: Removed 2 row(s) containing missing values (geom_path).

Soutlier<-as_tsibble(outlier) %>%
  model(classical_decomposition(Gas ~ season(4), type = "mult")) %>%
  components()
Soutlier
## # A dable: 20 x 7 [1Q]
## # Key:     .model [1]
## # :        Gas = trend * seasonal * random
##    .model                             Quarter   Gas trend seaso~1 random seaso~2
##    <chr>                                <qtr> <dbl> <dbl>   <dbl>  <dbl>   <dbl>
##  1 "classical_decomposition(Gas ~ se~ 2005 Q3   521   NA    1.14  NA        459.
##  2 "classical_decomposition(Gas ~ se~ 2005 Q4   180   NA    0.933 NA        193.
##  3 "classical_decomposition(Gas ~ se~ 2006 Q1   171  238    0.849  0.846    201.
##  4 "classical_decomposition(Gas ~ se~ 2006 Q2   224  204.   1.08   1.02     207.
##  5 "classical_decomposition(Gas ~ se~ 2006 Q3   233  207    1.14   0.992    205.
##  6 "classical_decomposition(Gas ~ se~ 2006 Q4   192  210.   0.933  0.979    206.
##  7 "classical_decomposition(Gas ~ se~ 2007 Q1   187  213    0.849  1.03     220.
##  8 "classical_decomposition(Gas ~ se~ 2007 Q2   234  216.   1.08   1.00     216.
##  9 "classical_decomposition(Gas ~ se~ 2007 Q3   245  219.   1.14   0.987    216.
## 10 "classical_decomposition(Gas ~ se~ 2007 Q4   205  219.   0.933  1.00     220.
## 11 "classical_decomposition(Gas ~ se~ 2008 Q1   194  219.   0.849  1.04     229.
## 12 "classical_decomposition(Gas ~ se~ 2008 Q2   229  219    1.08   0.965    211.
## 13 "classical_decomposition(Gas ~ se~ 2008 Q3   249  219    1.14   1.00     219.
## 14 "classical_decomposition(Gas ~ se~ 2008 Q4   203  220.   0.933  0.987    218.
## 15 "classical_decomposition(Gas ~ se~ 2009 Q1   196  222.   0.849  1.04     231.
## 16 "classical_decomposition(Gas ~ se~ 2009 Q2   238  223.   1.08   0.985    220.
## 17 "classical_decomposition(Gas ~ se~ 2009 Q3   252  225.   1.14   0.986    222.
## 18 "classical_decomposition(Gas ~ se~ 2009 Q4   210  226    0.933  0.996    225.
## 19 "classical_decomposition(Gas ~ se~ 2010 Q1   205   NA    0.849 NA        242.
## 20 "classical_decomposition(Gas ~ se~ 2010 Q2   236   NA    1.08  NA        218.
## # ... with abbreviated variable names 1: seasonal, 2: season_adjust
Soutlier %>%
  ggplot(aes(x = Quarter)) +
  geom_line(aes(y = Gas, color = "Data")) +
  geom_line(aes(y = season_adjust,
                color = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, color = "Trend")) +
  labs(y = "Gas",
       title = "Gas Production with an outlier at the beginning") +
  scale_color_manual(
    values = c("gray", "#0072B2", "#D55E00"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )
## Warning: Removed 4 row(s) containing missing values (geom_path).

The outlier has made a very obvious impact on the seasonally adjusted data. A giant spike in the beginning of the data is hard to miss. The trend is thrown off and takes about half a year to begin the upward trend with the outlier.

outliermid <- gas
  outliermid[12,1] <- gas[12,1] +300 
outliermid
## # A tsibble: 20 x 2 [1Q]
##      Gas Quarter
##    <dbl>   <qtr>
##  1   221 2005 Q3
##  2   180 2005 Q4
##  3   171 2006 Q1
##  4   224 2006 Q2
##  5   233 2006 Q3
##  6   192 2006 Q4
##  7   187 2007 Q1
##  8   234 2007 Q2
##  9   245 2007 Q3
## 10   205 2007 Q4
## 11   194 2008 Q1
## 12   529 2008 Q2
## 13   249 2008 Q3
## 14   203 2008 Q4
## 15   196 2009 Q1
## 16   238 2009 Q2
## 17   252 2009 Q3
## 18   210 2009 Q4
## 19   205 2010 Q1
## 20   236 2010 Q2
outliermid2 <- outliermid %>%
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components() %>%
  autoplot() +
  labs(title = "Classical Decomposition with Middle Outlier")
outliermid2
## Warning: Removed 2 row(s) containing missing values (geom_path).

outliermid3 <-as_tsibble(outliermid) %>%
  model(classical_decomposition(Gas ~ season(4), type = "mult")) %>%
  components()
outliermid3 %>%
  ggplot(aes(x = Quarter)) +
  geom_line(aes(y = Gas, color = "Data")) +
  geom_line(aes(y = season_adjust,
                color = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, color = "Trend")) +
  labs(y = "Gas",
       title = "Gas Production with a middle outlier") +
  scale_color_manual(
    values = c("gray", "#0072B2", "#D55E00"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )
## Warning: Removed 4 row(s) containing missing values (geom_path).

outlierend <- gas
  outlierend[20,1] <- gas[20,1] +300 
outlierend
## # A tsibble: 20 x 2 [1Q]
##      Gas Quarter
##    <dbl>   <qtr>
##  1   221 2005 Q3
##  2   180 2005 Q4
##  3   171 2006 Q1
##  4   224 2006 Q2
##  5   233 2006 Q3
##  6   192 2006 Q4
##  7   187 2007 Q1
##  8   234 2007 Q2
##  9   245 2007 Q3
## 10   205 2007 Q4
## 11   194 2008 Q1
## 12   229 2008 Q2
## 13   249 2008 Q3
## 14   203 2008 Q4
## 15   196 2009 Q1
## 16   238 2009 Q2
## 17   252 2009 Q3
## 18   210 2009 Q4
## 19   205 2010 Q1
## 20   536 2010 Q2
outlierend2 <- outlierend %>%
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components() %>%
  autoplot() +
  labs(title = "Classical Decomposition with End Outlier")

outlierend2
## Warning: Removed 2 row(s) containing missing values (geom_path).

outlierend3 <-as_tsibble(outlierend) %>%
  model(classical_decomposition(Gas ~ season(4), type = "mult")) %>%
  components()

outlierend3 %>%
  ggplot(aes(x = Quarter)) +
  geom_line(aes(y = Gas, color = "Data")) +
  geom_line(aes(y = season_adjust,
                color = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, color = "Trend")) +
  labs(y = "Gas",
       title = "Gas Production with an outlier at the end") +
  scale_color_manual(
    values = c("gray", "#0072B2", "#D55E00"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )
## Warning: Removed 4 row(s) containing missing values (geom_path).

It is different when the outlier is in the middle rather than the end. The trend remains positive all the way through with an outlier at the end, it just has a spike at the end. With an outlier in the middle the trend has to settle back in which leads to it decreasing for a portion of time.

Exercise 6

set.seed(1738133)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

autoplot(myseries)
## Plot variable not specified, automatically selected `.vars = Turnover`

x11 <- myseries %>%
  model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
  components()

autoplot(x11) + labs(title = "Decompostion using x11")

###Exercise 7

There is a clear steady positive trend throughout the years that begins to plateau slightly in the early 90s, but always appears to remain positive. A few months have higher variations ranges than the others mainly: March, July, and August. The mean for each month vary greatly over the years with December having the highest by far.

The recession in 1991 and 1992 is visible through the positive trend plateauing at approximately that time. The seasonality appears to offer no explanation for it meaning it’s clearly a recession.

###Exercise 8

canadian_gas
## # A tsibble: 542 x 2 [1M]
##       Month Volume
##       <mth>  <dbl>
##  1 1960 Jan  1.43 
##  2 1960 Feb  1.31 
##  3 1960 Mar  1.40 
##  4 1960 Apr  1.17 
##  5 1960 May  1.12 
##  6 1960 Jun  1.01 
##  7 1960 Jul  0.966
##  8 1960 Aug  0.977
##  9 1960 Sep  1.03 
## 10 1960 Oct  1.25 
## # ... with 532 more rows
## # i Use `print(n = ...)` to see more rows
autoplot(canadian_gas)
## Plot variable not specified, automatically selected `.vars = Volume`

gg_subseries(canadian_gas)
## Plot variable not specified, automatically selected `y = Volume`

gg_season(canadian_gas)  
## Plot variable not specified, automatically selected `y = Volume`

canadian_gas %>%
  model(STL(Volume ~ season(window = 12), robust=TRUE)) %>%
  components() %>% autoplot() +
  labs(title = "STL Decompostion: Canadian Gas")

#The variance gets a lot larger in the middle

Cangascomp <- canadian_gas%>%
  model(stl=STL(Volume))

sdcomp <- canadian_gas %>%
  autoplot(Volume, color = "Orange")+
  autolayer(components(Cangascomp), season_adjust, color = "#0072B2")
sdcomp

###Exercise 9

liquor<-aus_retail%>%
  filter(Industry == "Liquor retailing" & year(Month)>= 2000)%>%
  summarise(Turnover = sum(Turnover))

ggplot(data= liquor)+
  geom_line(aes(x=Month, y=Turnover))

liquor %>%
  model(STL(Turnover ~ season(window = 12), robust=TRUE)) %>%
  components() %>%
  autoplot() + labs(title = "Liquor Turnover")

liquor <-liquor%>%
  mutate(`12-MA` = slider::slide_dbl(Turnover, mean,
                                     .before = 5, .after = 6, .complete = TRUE))%>%
  mutate(`2x12-MA` = slider::slide_dbl(`12-MA`, mean,
                                       .before = 1, .after = 0, .complete = TRUE))
liquor%>%
  autoplot(Turnover) +
  geom_line(aes(y =`2x12-MA`), colour = "#0072B2") +
  labs(y = "Liquor retailing Turnover",
       title = "Total Liquor Turnover")+
  guides(colour = guide_legend(title = "series"))
## Warning: Removed 12 row(s) containing missing values (geom_path).

###Exercise 10

Additive decomposition: 1) If m is even then calculate the trend-cycle using 2*m -MA. If m is odd then use m-MA. 2) Calculate the detrended series Yt - Tt. 3) For the seasonal component average the detrended values for that season. 4) The effects should be in a normal distribution and the average of m seasonal components should be 0. Next calculate the total sum of season components and then calculate the average of the seasonal components. Then adjust each seasonal component. 5) You can calculate the remainder by subtracting the estimated seasonal and trend-cycle components: Rt = yt - Tt - St.

Multiplicative decomposition 1) If m is even calculate the trend-cyle using 2m -MA. If m is odd use m-MA. 2) Calculate the detrended series using yt/Tt. 3) Estimate the seasonal component by averaging the detrended values of that season. 4) Make the seasonal effects normalized. Average of m seasonal components is 1, which is gathered by recreating the sequence for each years data (St). Calculate the total sum of the seasonal components. Next calculate the average of the seasonal components then adjust each seasonal component. 5) Calculate the remainder component by subtracting the estimated seasonal and trend-cycle components : Rt = yt/(Tt St).