library(readxl)
library(tidyquant)
## Loading required package: lubridate
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ tibble 3.1.8 ✔ tsibble 1.1.2
## ✔ dplyr 1.0.10 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.2.0 ✔ feasts 0.3.0
## ✔ ggplot2 3.3.6 ✔ fable 0.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks xts::first()
## ✖ tsibble::index() masks zoo::index()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks xts::last()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
## ✖ fable::VAR() masks tidyquant::VAR()
##
## Attaching package: 'fpp3'
## The following object is masked from 'package:PerformanceAnalytics':
##
## prices
library(moments)
##
## Attaching package: 'moments'
## The following objects are masked from 'package:PerformanceAnalytics':
##
## kurtosis, skewness
library(tsibble)
library(tsibbledata)
library(ggfortify)
library(ggplot2)
library(dplyr)
library(seasonal)
##
## Attaching package: 'seasonal'
## The following object is masked from 'package:tibble':
##
## view
library(seasonalview)
##
## Attaching package: 'seasonalview'
## The following object is masked from 'package:seasonal':
##
## view
## The following object is masked from 'package:tibble':
##
## view
#Question 1 Part 1
USA_tsibble <- global_economy %>%
as_tsibble(index = Year,
key = c(Country)) %>%
filter(Country == "United States")
ggplot(USA_tsibble, mapping = aes(x = Year, y = GDP)) +
geom_line()
#This data has near constant variance and no seasonality, therefore no transformation is necessary.
#Question 1 Part 2
Slaughter_tsibble <- aus_livestock %>%
as_tsibble(index = Month,
key = c(Animal, State)) %>%
filter(Animal == "Bulls, bullocks and steers") %>%
filter(State == "Victoria")
Slaughter_tsibble
## # A tsibble: 510 x 4 [1M]
## # Key: Animal, State [1]
## Month Animal State Count
## <mth> <fct> <fct> <dbl>
## 1 1976 Jul Bulls, bullocks and steers Victoria 109200
## 2 1976 Aug Bulls, bullocks and steers Victoria 94700
## 3 1976 Sep Bulls, bullocks and steers Victoria 95500
## 4 1976 Oct Bulls, bullocks and steers Victoria 94800
## 5 1976 Nov Bulls, bullocks and steers Victoria 94100
## 6 1976 Dec Bulls, bullocks and steers Victoria 98300
## 7 1977 Jan Bulls, bullocks and steers Victoria 93500
## 8 1977 Feb Bulls, bullocks and steers Victoria 102000
## 9 1977 Mar Bulls, bullocks and steers Victoria 102600
## 10 1977 Apr Bulls, bullocks and steers Victoria 91500
## # … with 500 more rows
ggplot(Slaughter_tsibble, mapping = aes(x = Month, y = Count)) +
geom_line()
Slaughter_tsibble %>%
features(Count, features = guerrero)
## # A tibble: 1 × 3
## Animal State lambda_guerrero
## <fct> <fct> <dbl>
## 1 Bulls, bullocks and steers Victoria -0.0446
Slaughter_tsibble %>%
autoplot(box_cox(Count, -0.0446))
#I tried a few transformations but nothing would smooth the data. There is, however, a noticeable downward trend and seasonality.
#Question 1 Part 3
vic_tsibble <- vic_elec %>%
as_tsibble(index = Date,
key = c(Time, Holiday))
ggplot(vic_tsibble, mapping = aes(x = Time, y = Demand)) +
geom_line()
#No transformation is necessary since the seasonal variation is not proportional to time. It is not linear.
#Question 1 Part 4
aus_tsibble <- aus_production %>%
as_tsibble(index = Quarter) %>%
select(Gas)
ggplot(aus_tsibble, mapping = aes(x = Quarter, y = Gas)) +
geom_line()
aus_tsibble %>%
features(Gas, features = guerrero)
## # A tibble: 1 × 1
## lambda_guerrero
## <dbl>
## 1 0.110
aus_tsibble %>%
autoplot(box_cox(Gas, 0.110))
#This transformation balanced seasonal fluctuations and variance across the series.
#Question 2
ggplot(canadian_gas, mapping = aes(x = Month, y = Volume)) +
geom_line() +
labs(title = "Plot of Monthly Canadian Gas Volume")
canadian_gas %>%
features(Volume, features = guerrero)
## # A tibble: 1 × 1
## lambda_guerrero
## <dbl>
## 1 0.577
canadian_gas %>%
autoplot(box_cox(Volume, 0.577))
#When comparing the two plots, one can see that the Box-Cox transformation does not make the seasonal variation constant.
#Question 3
Cig <- aus_production %>%
select(Tobacco, Quarter)
Cig %>%
features(Tobacco, features = guerrero)
## # A tibble: 1 × 1
## lambda_guerrero
## <dbl>
## 1 0.926
Cigpl <- Cig %>%
autoplot(box_cox(Tobacco, 0.929)) +
labs(y = "Box-Cox transformed Tobacco")
Cigpl
## Warning: Removed 24 row(s) containing missing values (geom_path).
#Box-Cox is not effective since the change in seasonality is not proportional to time.
Airr <- ansett %>%
filter(Airports == "MEL-SYD") %>%
filter(Class == "Economy")
Airr %>%
features(Passengers, features = guerrero)
## # A tibble: 1 × 3
## Airports Class lambda_guerrero
## <chr> <chr> <dbl>
## 1 MEL-SYD Economy 2.00
Airrpl <- Airr %>%
autoplot(box_cox(Passengers, 2.00)) +
labs(y = "Box-Cox transformed Passengers")
Airrpl
#This transformation shows seasonal variation more clearly but not by much.
Walk <- pedestrian %>%
select(Sensor, Date, Count) %>%
filter(Sensor == "Southern Cross Station")
Walk %>%
features(Count, features = guerrero)
## # A tibble: 1 × 2
## Sensor lambda_guerrero
## <chr> <dbl>
## 1 Southern Cross Station -0.250
Walkpl <- Walk %>%
autoplot(box_cox(Count, -0.226)) +
labs(y = "Box-Cox transformed SCS Pedestrians")
Walkpl
#Count shifted downwards with no other change in the plot.
#Question 4
HW2Q4_Data <- read_excel("C:/Users/harle/OneDrive/Desktop/Class Files/Fall 2022/Forecasting/HW2Q4 Data.xlsx")
x_ma5 <- HW2Q4_Data %>%
rollmean(num, k = 5, fill = NA)
x_ma3 <- x_ma5 %>%
rollmean(num, k = 3, fill = NA)
z <- c(0.067, 0.133, 0.2, 0.2, 0.2, 0.133, 0.067)
z[1] = 0.067*(1/15)
z[2] = 0.133*(2/15)
z[3] = 0.200*(3/15)
z[4] = 0.200*(3/15)
z[5] = 0.200*(3/15)
z[6] = 0.133*(2/15)
z[7] = 0.067*(1/15)
z_sum <- sum(z)
z_sum
## [1] 0.1644
#Question 5 Part 1
gas <- tail(aus_production, 5*4) %>% select(Gas)
ggplot(gas, mapping = aes(x = Quarter, y = Gas)) +
geom_line()
#Positive trend with seasonality. Peak production is in Q3 with the least production occurring in Q1.
#Question 5 Parts 2 and 3
gas_decomp <- gas %>%
model(classical_decomposition(Gas, type = "multiplicative"))
components(gas_decomp) %>%
autoplot() +
labs(title = "Gas Production Classical Multiplicative Decomposition")
## Warning: Removed 2 row(s) containing missing values (geom_path).
#Yes, the seasonality (Q1 and Q3) and the trend is the same.
#Question 5 Parts 4-6
components(gas_decomp) %>%
as_tsibble() %>%
autoplot(Gas, colour = "blue") +
geom_line(aes(y=season_adjust), colour = "red") +
labs(title = "Seasonally Adjusted Gas Production")
gas %>%
mutate(Gas = ifelse(Gas == 171, Gas + 300, Gas)) %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
as_tsibble() %>%
autoplot(Gas, colour = "black") +
geom_line(aes(y=season_adjust), colour = "red") +
labs(title = "Seasonally Adjusted Gas Production plus an Outlier")
gas %>%
mutate(Gas = ifelse(Gas == 236, Gas + 300, Gas)) %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
as_tsibble() %>%
autoplot(Gas, colour = "black") +
geom_line(aes(y=season_adjust), colour = "red") +
labs(title = "Seasonally Adjusted Gas Production plus an Outlier at End") #outlier at end of data
gas %>%
mutate(Gas = ifelse(Gas == 205, Gas + 300, Gas)) %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
as_tsibble() %>%
autoplot(Gas, colour = "black") +
geom_line(aes(y=season_adjust), colour = "red") +
labs(title = "Seasonally Adjusted Gas Production plus an Outlier in the Middle")
#It does not seem to make much of a difference as there is still a spike where the outlier was at the end.
#Question 6
set.seed(87654321)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
ggplot(myseries, mapping = aes(x = Month, y = Turnover)) +
geom_line()
x11_dcmp <- myseries %>%
model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
components()
autoplot(x11_dcmp) +
labs(title = "Decomposition of aus_retail with X-11")
#No, the seasonality and trend is still there, but the lines look slightly more jagged. The seasonality is slightly more noticeable.
#Question 7 #1. From the decomposition graph, one can see that there is a strong positive trend throughout with a brief slowdown in growth during the 1990s. #From the season_year, one can see that there is nearly always a spike in March and December, likely due to population factors unique to Australia. #The remainder component is concerning as it may not truly be white noise. There is a large downward spike noticeable around 1992. #2. Yes. As previously stated, there is a massive downward spike around 1992 that was strong enough to be seen in the remainder component.
#Question 8 Part 1
canadian_gas %>%
autoplot(Volume) +
labs(title = "Autoplot of Monthly Canadian Gas Production",
y = "Volume in Billions of Cubic Meters")
canadian_gas %>%
gg_subseries(Volume) +
labs(title = "gg_subseries of Monthly Canadian Gas Production",
y = "Volume in Billions of Cubic Meters")
canadian_gas %>%
gg_season(Volume) +
labs(title = "gg_season of Monthly Canadian Gas Production",
y = "Volume in Billions of Cubic Meters")
#Question 8 Part 2
canadian_gas %>%
model(
STL(Volume ~ trend(window = 19) +
season(window = 12),
robust = TRUE)) %>%
components() %>%
autoplot()+
labs(title = "STL decomp of Canadian Gas Production")
#Question 8 Part 3 #gg_season was performed in part a. The seasonality seems to become more jagged over time with peaks forming in March, October, December, and January.
#Question 8 Part 4
canadian_gas %>%
model(
STL(Volume ~ trend(window = 19) +
season(window = 12),
robust = TRUE)) %>%
components() %>%
ggplot(aes(x = Month)) +
geom_line(aes(y = Volume, colour = "Data")) +
geom_line(aes(y = season_adjust,
colour = "Seasonally Adjusted")) +
geom_line(aes(y = trend, colour = "Trend")) +
labs(title = "STL decomp of Canadian Production") +
scale_colour_manual(
values = c("black", "blue", "red"),
breaks = c("Data", "Seasonally Adjusted", "Trend")
)
#Question 8 Part 5
canadian_gas %>%
model(x11 = X_13ARIMA_SEATS(Volume ~ x11())) %>%
components() %>%
autoplot()
#The seasonality at the end of the STL is larger than the ARIMA. I don’t see any other significant differences.
#Question 9 Parts 1 and 2
liquor1 <-aus_retail%>%
filter(Industry == "Liquor retailing" & year(Month)>= 2000)%>%
summarise(Turnover = sum(Turnover))
ggplot(data= liquor1)+
geom_line(aes(x=Month, y=Turnover))
#Since the seasonality is yearly and with a monthly dataset, we should use a 2*12-MA approach. A double moving average will be taken due to m being even.
#Question 9 Part 3
liquor2 <-liquor1 %>%
mutate(`12-MA` = slider::slide_dbl(Turnover, mean,
.before = 5, .after = 6, .complete = TRUE))%>%
mutate(`2x12-MA` = slider::slide_dbl(`12-MA`, mean,
.before = 1, .after = 0, .complete = TRUE))
liquor2 %>%
autoplot(Turnover) +
geom_line(aes(y =`2x12-MA`), colour = "blue") +
labs(y = "Turnover",
title = "Austrailian Liquor Retailing with 2*12 MA")
## Warning: Removed 12 row(s) containing missing values (geom_path).
#Question 10 #Step 1(For both)) Smooth the data with a procedure such as moving averages to estimate the trend. One could estimate the trend with a regression. #Step 2) Detrend the series. For additive, one should subtract the trend estimate. For multiplicative, one should divide the series by the calculated weight. #Step 3 (For both)) Average the detrended values depending on seasonality. If the seasonality is monthly, average the detrended values for all Januarys, Februarys, and so on. #Step 4) Determine the remainder component. For additive, the remainder = series - trend - seasonal. For multiplicative, the remainder = series / (trend * seasonal).