Load packages and data

library(dplyr)
library(ggplot2)
library(tidyr)
library(tibble)
library(tsibble)
library(ggfortify)
library(tidyverse)
library(fpp3)
library(moments)
library(zoo)
library(fable)
library(brew)
library(sos)
library(slider)
library(seasonal)
library(x13binary)

Questions

Exercise 1

# 1. United States GDP from global_economy.

us_economy_bil <- global_economy %>%
  filter(Country == "United States") %>%
  select(-c(Code, Growth, CPI, Imports, Exports, Population)) %>%
  mutate(GDP = GDP/1000000000)

us_economy_plot1 <- us_economy_bil %>%
  ggplot(aes(x = Year, y = GDP)) +
  geom_line() +
  labs(y = "GDP in Billions($)", title = "GDP Over Time")
us_economy_plot1

us_economy_pop <- global_economy %>%
  filter(Country == "United States") %>%
  select(-c(Code, Growth, CPI, Imports, Exports)) %>%
  mutate(GDP = GDP/Population)

us_economy_plot2 <- us_economy_pop %>%
  ggplot(aes(x = Year, y = GDP)) +
  geom_line() +
  labs(title = "GDP per Capita Over Time", y = "GDP in Dollars")
us_economy_plot2

In order to better understand the GDP data for the US I transformed the data in two ways. I converted the data to dollars in billions to make it easier to understand the changes throughout time. I also converted the data to be GDP per capita to see how the individual’s income changed throughout time. Neither of these appear to change the trend, seasonality, nor cycle of the data but made it easier to interpret the graph.

# 2. Slaughter of Victorian “Bulls, bullocks and steers” in aus_livestock.

v_bbs <- aus_livestock %>%
  filter(State == "Victoria", 
         Animal== "Bulls, bullocks and steers")

v_bbs_ts <- v_bbs %>%
  mutate(Month = yearmonth(Month)) %>%
  as_tsibble(index = Month) %>%
  select(-State)

v_bbs_ts_plot <- v_bbs %>%
  ggplot(mapping = aes(x = Month, y = Count)) +
  geom_line() +
  labs(title = "Slaughter of Bulls, Bullocks, and Steers in Victoria", y = "Slaughter Count")
v_bbs_ts_plot

v_bbs_ts %>%
  gg_season()
## Plot variable not specified, automatically selected `y = Count`

v_bbs_ts %>%
  gg_subseries()
## Plot variable not specified, automatically selected `y = Count`

Before transforming the data, I can see a trend decreasing over time. I transformed the data to be seasonal and monthly data. This does not show much different in trend, seasonality, nor cycle. I was able to see that slaughter counts of bulls, bullocks, and steers is less, on average, in the middle months of the year using the gg_subseries() function.

# 3. Victorian Electricity Demand from vic_elec.

vic_elect_ts <- vic_elec %>%
  mutate(Time = as_datetime(Time)) %>%
  as_tsibble(index = Time) %>%
  select(-c(Temperature, Date, Holiday))

vic_elec_ts_plot <- vic_elect_ts %>%
  ggplot(aes(x = Time, y = Demand)) +
  geom_line() +
  labs(title = "Electricity Demand Over Time")
vic_elec_ts_plot

vic_elec %>%
  gg_season() +
  labs(title = "Seasonal Electricity Demand")
## Plot variable not specified, automatically selected `y = Demand`

Before transforming the data I could see a clear seasonality of the data where the demand for electricity increases in the middle of each year. In order to check this I used gg_season(). This showed me the increased variation of the data at the beginning of 2014. This variance decreased through time.

# 4. Gas production from aus_production.

gas_production <- aus_production %>%
  select(-c(Beer, Tobacco, Bricks, Electricity, Cement))

gas_production_plot <- gas_production %>%
  ggplot(aes(x = Quarter, y = Gas)) +
  geom_line() +
  labs(title = "Australian Gas Production", y = "Gas Production in Gallons")
gas_production_plot

gas_production %>%
  gg_season()
## Plot variable not specified, automatically selected `y = Gas`

gas_production_stl <- gas_production %>%
  model(STL(Gas ~ season(window = 12), robust = TRUE)) %>%
  components() %>% 
  autoplot() +
  labs(title = "STL decomposition: Gas")
gas_production_stl

Before transforming the data, I can tell that variation in the data increases through time. In addition to increased variance, I can see a clear yearly season such that Q2 and Q3 increase every year compared to Q1 and Q4. I saw this using the gg_season() function. After doing an STL transformation, my findings from plotting and doing gg_season() are upheld. I can see the increase in variation in both the seasonality and remainder components.

Exercise 2

gas_production <- aus_production %>%
  select(-c(Beer, Tobacco, Bricks, Electricity, Cement))

gas_production %>%
  features(Gas, features = guerrero)
## # A tibble: 1 × 1
##   lambda_guerrero
##             <dbl>
## 1           0.121
gas_production %>%
  autoplot(box_cox(Gas, 0.121)) +
  labs(y = "Box-Cox Transformed Tobacco")

The Box-Cos transformation is unhelpful for the canadian_gas data because it fails to make it resemble a normal distribution. It also faild to make the variance consistent. The variance in the data is constant then increases drastically from 1969 till 1975. After 1975 the variance is consistent again till 2010.

Exercise 3

# Australian Tobacco Production

tob_production <- aus_production %>%
  select(-c(Beer, Bricks, Electricity, Cement, Gas))

tob_production %>%
  features(Tobacco, features = guerrero)
## # A tibble: 1 × 1
##   lambda_guerrero
##             <dbl>
## 1           0.929
tob_production %>%
  autoplot(box_cox(Tobacco, 0.929)) +
  labs(y = "Box-Cox Transformed Tobacco")
## Warning: Removed 24 row(s) containing missing values (geom_path).

# Economy Passengers between Melbourne and Sydney

aus_flights <- ansett %>%
  filter(Airports == "MEL-SYD",
         Class == "Economy")
aus_flights %>%
  features(Passengers, features = guerrero)
## # A tibble: 1 × 3
##   Airports Class   lambda_guerrero
##   <chr>    <chr>             <dbl>
## 1 MEL-SYD  Economy            2.00
aus_flights %>%
  autoplot(box_cox(Passengers, 2.00)) +
  labs(y = "Box-Cox Transformed Passengers")

# Pedestrian Counts at Southern Cross Station

scs_pedestrian <- pedestrian %>%
  filter(Sensor == "Southern Cross Station") 

scs_pedestrian %>%
  features(Count, features = guerrero)
## # A tibble: 1 × 2
##   Sensor                 lambda_guerrero
##   <chr>                            <dbl>
## 1 Southern Cross Station          -0.226
scs_pedestrian %>%
  autoplot(box_cox(Count, -0.226)) +
  labs(y = "Box-Cox Transformed Count")

Exercise 4

y <- readxl::read_excel("//Users//colinadams//Documents//GCSU//Fall 2022//Forecasting//Homeworks//Homework 2//y.xlsx")
y_ma5 <- y %>%
  rollmean(num, k = 5, fill = NA)
y_ma3 <- y_ma5 %>%
  rollmean(num, k = 3, fill = NA)
y_ma3
##      num
## [1,]  NA
## [2,]  NA
## [3,]  NA
## [4,] 122
## [5,]  NA
## [6,]  NA
## [7,]  NA
z <- c(100, 110, 130, 120, 130, 110, 150)
z[1] = 0.066666666*(100)
z[2] = 0.133333333*(110)
z[3] = 0.200*(130)
z[4] = 0.200*(120)
z[5] = 0.200*(130)
z[6] = 0.133333333*(110)
z[7] = 0.066666666*(150)
z_sum <- sum(z)
z_sum
## [1] 122

3x5-MA is equivalent to a 7-term weighted moving average because they both result in an average of 122.

Exercise 5

# 1

gas_aus <- tail(aus_production, 5*4) %>%
  select(Gas)

gas_aus %>%
  autoplot() +
  labs(title = "Gas Usage per Quarter")
## Plot variable not specified, automatically selected `.vars = Gas`

The gas usage is seasonal fluctuated with spikes in Q2 and Q3 of every year.

# 2

gas_aus %>%
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components() %>%
  autoplot()
## Warning: Removed 2 row(s) containing missing values (geom_path).

#3 The results of the STL decomposition support that there is a yearly seasonal component. This results in high gas usage in Q2 and Q3. We also see that the remainder is random and appears to be white noise, although it would be ideal to have a larger sample to judge this.

# 4

gas_adjusted <- gas_aus %>%
  model(STL(Gas ~ season(window = 12), robust = TRUE)) %>%
  components() %>% 
  autoplot() +
  labs(title = "STL decomposition: Gas")
gas_adjusted

# 5

gas_outlier1 <- gas_aus

gas_outlier1[1,1] = 999

gas_outlier_stl1 <- gas_outlier1 %>%
  model(STL(Gas ~ season(window = 12), robust = TRUE)) %>%
  components() %>% 
  autoplot() +
  labs(title = "STL decomposition: Gas Beginning Outlier")
gas_outlier_stl1

# 6
gas_outlier2 <- gas_aus
gas_outlier2[10,1] = 999

gas_outlier_stl2 <- gas_outlier2 %>%
  model(STL(Gas ~ season(window = 12), robust = TRUE)) %>%
  components() %>% 
  autoplot() +
  labs(title = "STL decomposition: Gas Middle Outlier")
gas_outlier_stl2

gas_outlier3 <- gas_aus
gas_outlier3[20,1] = 999

gas_outlier_stl3 <- gas_outlier3 %>%
  model(STL(Gas ~ season(window = 12), robust = TRUE)) %>%
  components() %>% 
  autoplot() +
  labs(title = "STL decomposition: Gas End Outlier")
gas_outlier_stl3

The position of the outlier in the data has no effect on the trend or seasonality. The presence of an outlier had a huge effect on the remainder. This is due to it being hundreds of dollars more than any other observation in the data. This outlier makes the remainder look like a straight line and removes any chance of white noise. The reason for it having no effect on the trend or seasonality is because it takes multiple observations to have an effect on either trend or seasonality.

Exercise 6

set.seed(87654321)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

autoplot(myseries)
## Plot variable not specified, automatically selected `.vars = Turnover`

myseries_x11 <- myseries %>%
  model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
  components()

autoplot(myseries_x11)

The X-11 decomposition reveals a large outlier in the end of 2000. This outlier may be because of the effect the dot com bubble had on the economy, although I am not sure how large of an effect it had in Australia. Aside from these outliers, the remainder appears to be white noise. In addition, The seasonal component is also higher in the latter years of the 2010s than it had ever been previously.

Exercise 7

  1. By using STL decomposition it is clear that there is a positive upward trend since 1980 and lots of seasonality. The seasonality is yearly and peaks in December of each year. The remainder looks to be white noise aside from data in 1992 where the remainder is significantly lower.

  2. Yes we can see that the remainder decreases significantly. This led to more unemployment. This is not explained in our trend nor seasonality. After 1993 our employment increases again and the trend continues.

Exercise 8

# 1

canadian_gas %>%
  autoplot()
## Plot variable not specified, automatically selected `.vars = Volume`

canadian_gas %>%
  gg_subseries()
## Plot variable not specified, automatically selected `y = Volume`

canadian_gas %>%
  gg_season
## Plot variable not specified, automatically selected `y = Volume`

# 2

canadian_gas_stl <- canadian_gas %>%
  model(STL(Volume ~ season(window = 12), robust = TRUE)) %>%
  components() %>%
  autoplot() +
  labs("STL Decomposition: Canadian Gas")
canadian_gas_stl

#3

There is more seasonality throughout the 1980s than the rest of the data. This is at a point in the data where there is no trend. The high seasonality could be part of the reason for there not being a trend.

# 4

canadian_gas_dcmp <- canadian_gas %>%
  model(stl = STL(Volume))
components(canadian_gas_dcmp)
## # A dable: 542 x 7 [1M]
## # Key:     .model [1]
## # :        Volume = trend + season_year + remainder
##    .model    Month Volume trend season_year remainder season_adjust
##    <chr>     <mth>  <dbl> <dbl>       <dbl>     <dbl>         <dbl>
##  1 stl    1960 Jan  1.43   1.08      0.520   -0.172           0.911
##  2 stl    1960 Feb  1.31   1.11      0.215   -0.0178          1.09 
##  3 stl    1960 Mar  1.40   1.13      0.307   -0.0395          1.09 
##  4 stl    1960 Apr  1.17   1.16      0.0161  -0.00627         1.15 
##  5 stl    1960 May  1.12   1.18     -0.116    0.0476          1.23 
##  6 stl    1960 Jun  1.01   1.21     -0.356    0.159           1.37 
##  7 stl    1960 Jul  0.966  1.23     -0.403    0.136           1.37 
##  8 stl    1960 Aug  0.977  1.26     -0.349    0.0677          1.33 
##  9 stl    1960 Sep  1.03   1.28     -0.340    0.0870          1.37 
## 10 stl    1960 Oct  1.25   1.31     -0.0899   0.0329          1.34 
## # … with 532 more rows
## # ℹ Use `print(n = ...)` to see more rows
canadian_gas %>%
  autoplot(Volume, color = 'gray') +
  autolayer(components(canadian_gas_dcmp), season_adjust, color = 'red')

#5

Exercise 9

# 1
aus_liquor <-aus_retail%>%
  filter(Industry == "Liquor retailing" & year(Month)>= 2000)%>%
  summarise(Turnover = sum(Turnover))

ggplot(data = aus_liquor)+
  geom_line(aes(x=Month, y=Turnover))

#2 The best moving average approach for this type of data is to do a monthly moving average then do a double moving average of the moving average. This allows us to see the yearly trend and then track how this changed from year to year. Overall I think this is the best method to account for monthly change and year over year change.

# 3
aus_liquor_stl <- aus_liquor %>%
  model(STL(Turnover ~ season(window = 12), robust = TRUE)) %>%
  components() %>%
  autoplot() +
  labs(title = "STL Decomposition: Australian Liquor")
aus_liquor_stl

aus_liquor <- aus_liquor %>%
  mutate(`12-MA` = slider::slide_dbl(Turnover, mean,
                                     .before = 5, .after = 6, .complete = TRUE))%>%
  mutate(`2x12-MA` = slider::slide_dbl(`12-MA`, mean,
                                       .before = 1, .after = 0, .complete = TRUE))
aus_liquor %>%
  autoplot(Turnover) +
  geom_line(aes(y =`2x12-MA`), color = "blue") +
  labs(y = "Liquor retailing Turnover",
       title = "Total AUS Liquor retailing Turnover")+
  guides(colour = guide_legend(title = "series"))
## Warning: Removed 12 row(s) containing missing values (geom_path).

Exercise 10

Classical Additive Decomposition:

1- If the seasonal period (m) is even then calculate a 2x(m)-MA to get the trend cycle. If season(m) is odd, calculate the trend cycle using a (m)-MA.

2- Calculate the detrend series based off your trend cycle from step 1. Detrend = yt - Tt

3- Average the detrended values for each season to find the seasonal component. Replicate the sequence of data for each year to obtain St.

4- Calculate the remainder by doing using the formula: Rt = yt - Tt - St. This takes the observation for each time and removes the trend and seasonal components calculated above. This leaves you with only the remainder.

Classical Multiplicative Decomposition:

1- If the seasonal period (m) is even then calculate a 2x(m)-MA to get the trend cycle. If season(m) is odd, calculate the trend cycle using a (m)-MA.

2- Calculate the detrend series based off your trend cycle from step 1. Detrend = yt / Tt (This is the key difference between multiplicative and additive decomposition.)

3- Average the detrended values for each season to find the seasonal component. Replicate the sequence of data for each year to obtain St.

4- Calculate the remainder by doing using the formula: Rt = yt / (Tt - St). This takes the observation for each time and removes the trend and seasonal components calculated above. This leaves you with only the remainder. (This step is also different from the additive decomposition.)