library(readxl)
library(tidyquant)
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ tibble      3.1.8      ✔ tsibble     1.1.2 
## ✔ dplyr       1.0.10     ✔ tsibbledata 0.4.1 
## ✔ tidyr       1.2.0      ✔ feasts      0.3.0 
## ✔ ggplot2     3.3.6      ✔ fable       0.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ dplyr::first()       masks xts::first()
## ✖ tsibble::index()     masks zoo::index()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ dplyr::last()        masks xts::last()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
## ✖ fable::VAR()         masks tidyquant::VAR()
## 
## Attaching package: 'fpp3'
## The following object is masked from 'package:PerformanceAnalytics':
## 
##     prices
library(moments)
## 
## Attaching package: 'moments'
## The following objects are masked from 'package:PerformanceAnalytics':
## 
##     kurtosis, skewness
library(tsibble)
library(tsibbledata)
library(ggfortify)
library(ggplot2)
library(dplyr)
library(seasonal)
## 
## Attaching package: 'seasonal'
## The following object is masked from 'package:tibble':
## 
##     view
library(seasonalview)
## 
## Attaching package: 'seasonalview'
## The following object is masked from 'package:seasonal':
## 
##     view
## The following object is masked from 'package:tibble':
## 
##     view

#Question 1 Part 1

USA_tsibble <- global_economy %>%
  as_tsibble(index = Year,
             key = c(Country)) %>%
  filter(Country == "United States")

ggplot(USA_tsibble, mapping = aes(x = Year, y = GDP)) + 
  geom_line()

#This data has near constant variance and no seasonality, therefore no transformation is necessary.

#Question 1 Part 2

Slaughter_tsibble <- aus_livestock %>%
  as_tsibble(index = Month,
             key = c(Animal, State)) %>%
  filter(Animal == "Bulls, bullocks and steers") %>%
  filter(State == "Victoria")
  
  
Slaughter_tsibble
## # A tsibble: 510 x 4 [1M]
## # Key:       Animal, State [1]
##       Month Animal                     State     Count
##       <mth> <fct>                      <fct>     <dbl>
##  1 1976 Jul Bulls, bullocks and steers Victoria 109200
##  2 1976 Aug Bulls, bullocks and steers Victoria  94700
##  3 1976 Sep Bulls, bullocks and steers Victoria  95500
##  4 1976 Oct Bulls, bullocks and steers Victoria  94800
##  5 1976 Nov Bulls, bullocks and steers Victoria  94100
##  6 1976 Dec Bulls, bullocks and steers Victoria  98300
##  7 1977 Jan Bulls, bullocks and steers Victoria  93500
##  8 1977 Feb Bulls, bullocks and steers Victoria 102000
##  9 1977 Mar Bulls, bullocks and steers Victoria 102600
## 10 1977 Apr Bulls, bullocks and steers Victoria  91500
## # … with 500 more rows
ggplot(Slaughter_tsibble, mapping = aes(x = Month, y = Count)) +
  geom_line()

Slaughter_tsibble %>%
  features(Count, features = guerrero)
## # A tibble: 1 × 3
##   Animal                     State    lambda_guerrero
##   <fct>                      <fct>              <dbl>
## 1 Bulls, bullocks and steers Victoria         -0.0446
Slaughter_tsibble %>%
  autoplot(box_cox(Count, -0.0446))

#I tried a few transformations but nothing would smooth the data. There is, however, a noticeable downward trend and seasonality.

#Question 1 Part 3

vic_tsibble <- vic_elec %>%
  as_tsibble(index = Date,
             key = c(Time, Holiday))

ggplot(vic_tsibble, mapping = aes(x = Time, y = Demand)) +
  geom_line()

#No transformation is necessary since the seasonal variation is not proportional to time. It is not linear.

#Question 1 Part 4

aus_tsibble <- aus_production %>%
  as_tsibble(index = Quarter) %>%
  select(Gas)

ggplot(aus_tsibble, mapping = aes(x = Quarter, y = Gas)) +
  geom_line()

aus_tsibble %>%
  features(Gas, features = guerrero)
## # A tibble: 1 × 1
##   lambda_guerrero
##             <dbl>
## 1           0.110
aus_tsibble %>%
  autoplot(box_cox(Gas, 0.110))

#This transformation balanced seasonal fluctuations and variance across the series.

#Question 2

ggplot(canadian_gas, mapping = aes(x =  Month, y = Volume)) +
  geom_line() +
  labs(title = "Plot of Monthly Canadian Gas Volume")

canadian_gas %>%
  features(Volume, features = guerrero)
## # A tibble: 1 × 1
##   lambda_guerrero
##             <dbl>
## 1           0.577
canadian_gas %>%
  autoplot(box_cox(Volume, 0.577))

#When comparing the two plots, one can see that the Box-Cox transformation does not make the seasonal variation constant.

#Question 3

Cig <- aus_production %>% 
  select(Tobacco, Quarter) 
Cig %>%
  features(Tobacco, features = guerrero)
## # A tibble: 1 × 1
##   lambda_guerrero
##             <dbl>
## 1           0.926
Cigpl <- Cig %>%
  autoplot(box_cox(Tobacco, 0.929)) +
  labs(y = "Box-Cox transformed Tobacco")
Cigpl
## Warning: Removed 24 row(s) containing missing values (geom_path).

#Box-Cox is not effective since the change in seasonality is not proportional to time.

Airr <- ansett %>%
  filter(Airports == "MEL-SYD") %>%
  filter(Class == "Economy")
Airr %>%
  features(Passengers, features = guerrero)
## # A tibble: 1 × 3
##   Airports Class   lambda_guerrero
##   <chr>    <chr>             <dbl>
## 1 MEL-SYD  Economy            2.00
Airrpl <- Airr %>%
  autoplot(box_cox(Passengers, 2.00)) +
  labs(y = "Box-Cox transformed Passengers")
Airrpl  

#This transformation shows seasonal variation more clearly but not by much. 

Walk <- pedestrian  %>%
  select(Sensor, Date, Count) %>%
  filter(Sensor == "Southern Cross Station")
Walk %>%
  features(Count, features = guerrero)
## # A tibble: 1 × 2
##   Sensor                 lambda_guerrero
##   <chr>                            <dbl>
## 1 Southern Cross Station          -0.250
Walkpl <- Walk %>%
  autoplot(box_cox(Count, -0.226)) +
  labs(y = "Box-Cox transformed SCS Pedestrians")
 Walkpl

#Count shifted downwards with no other change in the plot. 

#Question 4

HW2Q4_Data <- read_excel("C:/Users/harle/OneDrive/Desktop/Class Files/Fall 2022/Forecasting/HW2Q4 Data.xlsx")

x_ma5 <- HW2Q4_Data %>%
  rollmean(num, k = 5, fill = NA)
x_ma3 <- x_ma5 %>%
  rollmean(num, k = 3, fill = NA)


z <- c(0.067, 0.133, 0.2, 0.2, 0.2, 0.133, 0.067)
z[1] = 0.067*(1/15)
z[2] = 0.133*(2/15)
z[3] = 0.200*(3/15)
z[4] = 0.200*(3/15)
z[5] = 0.200*(3/15)
z[6] = 0.133*(2/15)
z[7] = 0.067*(1/15)
z_sum <- sum(z)
z_sum
## [1] 0.1644

#Question 5 Part 1

gas <- tail(aus_production, 5*4) %>% select(Gas)

ggplot(gas, mapping = aes(x = Quarter, y = Gas)) + 
  geom_line()

#Positive trend with seasonality. Peak production is in Q3 with the least production occurring in Q1.

#Question 5 Parts 2 and 3

gas_decomp <- gas %>%
  model(classical_decomposition(Gas, type = "multiplicative")) 

components(gas_decomp) %>%
  autoplot() +
  labs(title = "Gas Production Classical Multiplicative Decomposition")
## Warning: Removed 2 row(s) containing missing values (geom_path).

#Yes, the seasonality (Q1 and Q3) and the trend is the same.

#Question 5 Parts 4-6

components(gas_decomp) %>%
  as_tsibble() %>%
  autoplot(Gas, colour = "blue") +
  geom_line(aes(y=season_adjust), colour = "red") +
  labs(title = "Seasonally Adjusted Gas Production")

gas %>%
  mutate(Gas = ifelse(Gas == 171, Gas + 300, Gas)) %>%
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components() %>%
  as_tsibble() %>%
  autoplot(Gas, colour = "black") +
  geom_line(aes(y=season_adjust), colour = "red") +
  labs(title = "Seasonally Adjusted Gas Production plus an Outlier")

gas %>%
  mutate(Gas = ifelse(Gas == 236, Gas + 300, Gas)) %>%
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components() %>%
  as_tsibble() %>%
  autoplot(Gas, colour = "black") +
  geom_line(aes(y=season_adjust), colour = "red") +
  labs(title = "Seasonally Adjusted Gas Production plus an Outlier at End") #outlier at end of data

gas %>%
  mutate(Gas = ifelse(Gas == 205, Gas + 300, Gas)) %>%
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components() %>%
  as_tsibble() %>%
  autoplot(Gas, colour = "black") +
  geom_line(aes(y=season_adjust), colour = "red") +
  labs(title = "Seasonally Adjusted Gas Production plus an Outlier in the Middle")

#It does not seem to make much of a difference as there is still a spike where the outlier was at the end.

#Question 6

set.seed(87654321)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

ggplot(myseries, mapping = aes(x = Month, y = Turnover)) +
  geom_line()

x11_dcmp <- myseries %>%
  model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
  components()

autoplot(x11_dcmp) +
  labs(title = "Decomposition of aus_retail with X-11")

#No, the seasonality and trend is still there, but the lines look slightly more jagged. The seasonality is slightly more noticeable.

#Question 7 #1. From the decomposition graph, one can see that there is a strong positive trend throughout with a brief slowdown in growth during the 1990s. #From the season_year, one can see that there is nearly always a spike in March and December, likely due to population factors unique to Australia. #The remainder component is concerning as it may not truly be white noise. There is a large downward spike noticeable around 1992. #2. Yes. As previously stated, there is a massive downward spike around 1992 that was strong enough to be seen in the remainder component.

#Question 8 Part 1

canadian_gas %>%
  autoplot(Volume) +
  labs(title = "Autoplot of Monthly Canadian Gas Production",
       y = "Volume in Billions of Cubic Meters")

canadian_gas %>%
  gg_subseries(Volume) +
  labs(title = "gg_subseries of Monthly Canadian Gas Production",
       y = "Volume in Billions of Cubic Meters")

canadian_gas %>%
  gg_season(Volume) +
  labs(title = "gg_season of Monthly Canadian Gas Production",
       y = "Volume in Billions of Cubic Meters")

#Question 8 Part 2

canadian_gas %>% 
  model(
    STL(Volume ~ trend(window = 19) + 
          season(window = 12),
        robust = TRUE)) %>% 
  components() %>% 
  autoplot()+
  labs(title = "STL decomp of Canadian Gas Production")

#Question 8 Part 3 #gg_season was performed in part a. The seasonality seems to become more jagged over time with peaks forming in March, October, December, and January.

#Question 8 Part 4

canadian_gas %>%
  model(
    STL(Volume ~ trend(window = 19) +
          season(window = 12),
        robust = TRUE)) %>%
  components() %>%
  ggplot(aes(x = Month)) +
  geom_line(aes(y = Volume, colour = "Data")) +
  geom_line(aes(y = season_adjust,
                colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, colour = "Trend")) +
  labs(title = "STL decomp of Canadian Production") +
  scale_colour_manual(
    values = c("black", "blue", "red"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )

#Question 8 Part 5

canadian_gas %>%
  model(x11 = X_13ARIMA_SEATS(Volume ~ x11())) %>%
  components() %>%
  autoplot()

#The seasonality at the end of the STL is larger than the ARIMA. I don’t see any other significant differences.

#Question 9 Parts 1 and 2

liquor1 <-aus_retail%>%
  filter(Industry == "Liquor retailing" & year(Month)>= 2000)%>%
  summarise(Turnover = sum(Turnover))

ggplot(data= liquor1)+
  geom_line(aes(x=Month, y=Turnover))

#Since the seasonality is yearly and with a monthly dataset, we should use a 2*12-MA approach. A double moving average will be taken due to m being even.

#Question 9 Part 3

liquor2 <-liquor1 %>%
  mutate(`12-MA` = slider::slide_dbl(Turnover, mean,
                                     .before = 5, .after = 6, .complete = TRUE))%>%
  mutate(`2x12-MA` = slider::slide_dbl(`12-MA`, mean,
                                       .before = 1, .after = 0, .complete = TRUE))
liquor2 %>%
  autoplot(Turnover) +
  geom_line(aes(y =`2x12-MA`), colour = "blue") +
  labs(y = "Turnover",
       title = "Austrailian Liquor Retailing with 2*12 MA")
## Warning: Removed 12 row(s) containing missing values (geom_path).

#Question 10 #Step 1(For both)) Smooth the data with a procedure such as moving averages to estimate the trend. One could estimate the trend with a regression. #Step 2) Detrend the series. For additive, one should subtract the trend estimate. For multiplicative, one should divide the series by the calculated weight. #Step 3 (For both)) Average the detrended values depending on seasonality. If the seasonality is monthly, average the detrended values for all Januarys, Februarys, and so on. #Step 4) Determine the remainder component. For additive, the remainder = series - trend - seasonal. For multiplicative, the remainder = series / (trend * seasonal).