Homework #2

rm(list=ls())
library(tsibble)

## 
## Attaching package: 'tsibble'

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union

library(tidyverse)

## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──

## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(fpp3)

## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ lubridate   1.8.0     ✔ feasts      0.2.2
## ✔ tsibbledata 0.4.0     ✔ fable       0.3.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()     masks base::date()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ tsibble::intersect()  masks base::intersect()
## ✖ lubridate::interval() masks tsibble::interval()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ tsibble::setdiff()    masks base::setdiff()
## ✖ tsibble::union()      masks base::union()

library(openxlsx)
library(ggplot2)
library(tidyr)
library(timeSeries)

## Loading required package: timeDate

library(readr)
library(dplyr)
library(zoo)

## 
## Attaching package: 'zoo'
## 
## The following object is masked from 'package:timeSeries':
## 
##     time<-
## 
## The following object is masked from 'package:tsibble':
## 
##     index
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(seasonal)

## 
## Attaching package: 'seasonal'
## 
## The following objects are masked from 'package:timeSeries':
## 
##     outlier, series
## 
## The following object is masked from 'package:tibble':
## 
##     view

#Question 1
#Plotting United States GDP
head(global_economy)

## # A tsibble: 6 x 9 [1Y]
## # Key:       Country [1]
##   Country     Code   Year         GDP Growth   CPI Imports Exports Population
##   <fct>       <fct> <dbl>       <dbl>  <dbl> <dbl>   <dbl>   <dbl>      <dbl>
## 1 Afghanistan AFG    1960  537777811.     NA    NA    7.02    4.13    8996351
## 2 Afghanistan AFG    1961  548888896.     NA    NA    8.10    4.45    9166764
## 3 Afghanistan AFG    1962  546666678.     NA    NA    9.35    4.88    9345868
## 4 Afghanistan AFG    1963  751111191.     NA    NA   16.9     9.17    9533954
## 5 Afghanistan AFG    1964  800000044.     NA    NA   18.1     8.89    9731361
## 6 Afghanistan AFG    1965 1006666638.     NA    NA   21.4    11.3     9938414

UnitedStates <- global_economy %>%
  dplyr::filter(Country == "United States") %>%
  select(-c(Code, Growth, CPI, Imports, Exports, Population)) %>%
  mutate(GDP = GDP/1000000000)
UnitedStatesPlot <- UnitedStates %>%
  ggplot(aes(x = Year, y = GDP)) +
  geom_line() +
  labs(y = "GDP in Billions($)")
UnitedStatesPlot

#Plotting Slaughter of Victorian "Bulls, bullocks and steers"
victorian <- aus_livestock %>%
  dplyr::filter(State == "Victoria",
         Animal== "Bulls, bullocks and steers")

victoriants <- victorian %>%
  mutate(Month = yearmonth(Month)) %>%
  as_tsibble(index = Month) %>%
  select(-State)
head(victoriants)

## # A tsibble: 6 x 3 [1M]
## # Key:       Animal [1]
##      Month Animal                      Count
##      <mth> <fct>                       <dbl>
## 1 1976 Jul Bulls, bullocks and steers 109200
## 2 1976 Aug Bulls, bullocks and steers  94700
## 3 1976 Sep Bulls, bullocks and steers  95500
## 4 1976 Oct Bulls, bullocks and steers  94800
## 5 1976 Nov Bulls, bullocks and steers  94100
## 6 1976 Dec Bulls, bullocks and steers  98300

victorianplot <- victorian %>%
  ggplot(mapping = aes(x = Month, y = Count)) +
  geom_line() +
  labs(title = "Slaughter of Bulls, Bullocks, and Steers in Victoria", y = "Slaughter Count")
victorianplot

#Plotting Victorian Electricity Demand
head(vic_elec)

## # A tsibble: 6 x 5 [30m] <Australia/Melbourne>
##   Time                Demand Temperature Date       Holiday
##   <dttm>               <dbl>       <dbl> <date>     <lgl>  
## 1 2012-01-01 00:00:00  4383.        21.4 2012-01-01 TRUE   
## 2 2012-01-01 00:30:00  4263.        21.0 2012-01-01 TRUE   
## 3 2012-01-01 01:00:00  4049.        20.7 2012-01-01 TRUE   
## 4 2012-01-01 01:30:00  3878.        20.6 2012-01-01 TRUE   
## 5 2012-01-01 02:00:00  4036.        20.4 2012-01-01 TRUE   
## 6 2012-01-01 02:30:00  3866.        20.2 2012-01-01 TRUE

Demand <- vic_elec %>%
  select(Demand) %>%
  autoplot()

## Plot variable not specified, automatically selected `.vars = Demand`

Demand

#Plotting Gas Production
head(aus_production)

## # A tsibble: 6 x 7 [1Q]
##   Quarter  Beer Tobacco Bricks Cement Electricity   Gas
##     <qtr> <dbl>   <dbl>  <dbl>  <dbl>       <dbl> <dbl>
## 1 1956 Q1   284    5225    189    465        3923     5
## 2 1956 Q2   213    5178    204    532        4436     6
## 3 1956 Q3   227    5297    208    561        4806     7
## 4 1956 Q4   308    5681    197    570        4418     6
## 5 1957 Q1   262    5577    187    529        4339     5
## 6 1957 Q2   228    5651    214    604        4811     7

Gas <- aus_production %>%
  select(Gas) %>%
  autoplot()

## Plot variable not specified, automatically selected `.vars = Gas`

Gas

#Question 2
#A Box-Cox transformation is unhelpful for the Canadian_gas data (volume) because the box-cox helps to normalize the variance when it is either decreasing or increasing overtime. Box-cox transformation is used to balance the seasonal fluctuations and random variation in the data. In the case of the Canadian gas data, the gas demand over time does not follow a seasonal trend per year, rather it tends to fluctuate between the years with varying spikes mixed into some years (2012, 2013, and 2014).

#Question 3
#Tobacco
tobacco_prod <- aus_production %>%
  select(-c(Beer, Bricks, Electricity, Cement, Gas))
tobacco_prod %>%
  features(Tobacco, features = guerrero)

## # A tibble: 1 × 1
##   lambda_guerrero
##             <dbl>
## 1           0.929

tobacco_prod %>%
  autoplot(box_cox(Tobacco, 0.929)) +
  labs(y = "Box-Cox Transformed Turnover")

## Warning: Removed 24 row(s) containing missing values (geom_path).

#Economy Class
ansett

## # A tsibble: 7,407 x 4 [1W]
## # Key:       Airports, Class [30]
##        Week Airports Class    Passengers
##      <week> <chr>    <chr>         <dbl>
##  1 1989 W28 ADL-PER  Business        193
##  2 1989 W29 ADL-PER  Business        254
##  3 1989 W30 ADL-PER  Business        185
##  4 1989 W31 ADL-PER  Business        254
##  5 1989 W32 ADL-PER  Business        191
##  6 1989 W33 ADL-PER  Business        136
##  7 1989 W34 ADL-PER  Business          0
##  8 1989 W35 ADL-PER  Business          0
##  9 1989 W36 ADL-PER  Business          0
## 10 1989 W37 ADL-PER  Business          0
## # … with 7,397 more rows
## # ℹ Use `print(n = ...)` to see more rows

airport <- ansett %>%
  dplyr::filter(Class== "Economy") %>%
  dplyr::filter(Airports== "MEL-SYD")

airport %>%
  features(Passengers, features = guerrero)

## # A tibble: 1 × 3
##   Airports Class   lambda_guerrero
##   <chr>    <chr>             <dbl>
## 1 MEL-SYD  Economy            2.00

airport %>%
  autoplot(box_cox(Passengers, 2.00)) +
  labs(y = "Box-Cox Transformed Turnover")

#Pedestrian Counts
pedestriancount <- pedestrian %>%
  dplyr::filter(Sensor == "Southern Cross Station")
pedestriancount %>%
  features(Count, features = guerrero)

## # A tibble: 1 × 2
##   Sensor                 lambda_guerrero
##   <chr>                            <dbl>
## 1 Southern Cross Station          -0.226

pedestriancount %>%
  autoplot(box_cox(Count, -0.226)) +
  labs(y = "Box-Cox Transformed Count")

#Question 4
y <- readxl::read_excel("C:\\Users\\lvm12\\OneDrive\\Desktop\\q4.xlsx")
head(y)

## # A tibble: 6 × 1
##   number
##    <dbl>
## 1  0.067
## 2  0.133
## 3  0.2  
## 4  0.2  
## 5  0.2  
## 6  0.133

colnames(y)

## [1] "number"

names(y)[1]<-"num"
y_ma5 <- y %>%
  rollmean(num, k = 5, fill = NA)

y_ma3 <- y_ma5 %>%
  rollmean(num, k = 3, fill = NA)
y_ma3

##         num
## [1,]     NA
## [2,]     NA
## [3,]     NA
## [4,] 0.1644
## [5,]     NA
## [6,]     NA
## [7,]     NA

z <- c(0.067, 0.133, 0.2, 0.2, 0.2, 0.133, 0.067)
z[1] = 0.067*(1/15)
z[2] = 0.133*(2/15)
z[3] = 0.200*(3/15)
z[4] = 0.200*(3/15)
z[5] = 0.200*(3/15)
z[6] = 0.133*(2/15)
z[7] = 0.067*(1/15)
z_sum <- sum(z)
z_sum

## [1] 0.1644

#Question 5
#Part 1
gas <- tail(aus_production, 5*4) %>%
  select(Gas)
gas %>% 
  autoplot()

## Plot variable not specified, automatically selected `.vars = Gas`

#There seems to be a seasonal trend where the gas demand is down in Q1, but spikes by the middle of the year between Q1 of the following year and Q1 of the year observing. Because seasonal trend can be focused around quarter, month, or day, this data follows a quarter seasonal trend.
#There is a trend pattern as the data slowly increases over a decent amount of time (and it don't have to be linear)

#Part 2
gas %>%
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components() %>% 
  autoplot()

## Warning: Removed 2 row(s) containing missing values (geom_path).

#Part 3
#After the data was decomposed, the data shows that there truly is a seasonal trend in the data as you can see the increases and decreases over time in the data
#Also, the trend graph shows the data increasing over a long period of time proving that the data has a trend as well.
#Part 4
gas_adjusted <- gas %>% 
  model(STL(Gas ~ season(window=9), robust=TRUE)) %>%
  components() %>% autoplot() +
    labs(title = "STL decomposition: Gas Over the Past 5 Years")
gas_adjusted

#Part 5
gas[1,1]=782
gas %>%
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components() %>% 
  autoplot()

## Warning: Removed 2 row(s) containing missing values (geom_path).

gas_seasonal <- gas %>% 
  model(STL(Gas ~ season(window=9), robust=TRUE)) %>%
  components() %>% autoplot() +
    labs(title = "STL decomposition: Gas Over the Past 5 Years w/ Outlier")
gas_seasonal

#Part 6
gas[20,1]=782
gas %>%
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components() %>% 
  autoplot()

## Warning: Removed 2 row(s) containing missing values (geom_path).

gas_seasonal <- gas %>% 
  model(STL(Gas ~ season(window=9), robust=TRUE)) %>%
  components() %>% autoplot() +
    labs(title = "STL decomposition: Gas Over the Past 5 Years w/ Outlier")
gas_seasonal

#If the outlier is added at the end of the data, all the data spikes upwards in the very end, especially hen looking at the "Gas" and "Remainder" ones, trend and season could just be following their typical patterns, but they could also be influenced by the change in data.

#Question 6
set.seed(12345678)
myseries <- aus_retail %>%
  dplyr::filter(`Series ID` == sample(aus_retail$`Series ID`,1))

myseries %>%
  model(classical_decomposition(Turnover,type = "multiplicative")) %>%
  components() %>%
  autoplot() + 
  ggtitle("Multiplicative Decomposition")

## Warning: Removed 6 row(s) containing missing values (geom_path).

myseries %>%
  model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
  components() %>%
  autoplot()+
  labs(title = "X-11 Decomposition")

#In the X-11 decomposition, the data in seasonal mellows out and dips down around 2005 Jan, whereas in the multiplicative decomposition, seasonal remains steady overtime

#Question 7
#Part 1 - From the first chart (with the 4 graphs), you can see that there is an increasing trend overtime for the amount of people in the civilian labor force in Australia from 1978 to 1995. Although the scale for "trend" is 6500 to 9000 while the scale for remainder is -400 to 0 and for season its -100 to 100, throwing off the ability to compare the graphs interchangeably. There might be a seasonal trend as well, but it is hard to observe from the scale and the spikes/dips in the data as it does not land on the yearly lines consecutively.
#Part 2 - The recession is very visible in the "remainder" graph as there is an intense and sharp dip about a year after 1990, which can be correlated to the 1991/1992 recession.

#Question 8
#Part 1
head(canadian_gas)

## # A tsibble: 6 x 2 [1M]
##      Month Volume
##      <mth>  <dbl>
## 1 1960 Jan   1.43
## 2 1960 Feb   1.31
## 3 1960 Mar   1.40
## 4 1960 Apr   1.17
## 5 1960 May   1.12
## 6 1960 Jun   1.01

autocanadian <- canadian_gas %>%
  autoplot()

## Plot variable not specified, automatically selected `.vars = Volume`

autocanadian

subcanadian <- canadian_gas %>%
  gg_subseries()

## Plot variable not specified, automatically selected `y = Volume`

subcanadian

seasoncanadian <- canadian_gas %>%
  gg_season()

## Plot variable not specified, automatically selected `y = Volume`

seasoncanadian

#Part 2 - Do an STL Decomposition
STLcanadian <- canadian_gas %>%
  model(STL(Volume ~ trend(window=20) +
              season(window=9), robust=TRUE)) %>%
  components() %>%
  autoplot()+
  labs(title = "STL decomposition: Canadian Gas")
STLcanadian

#Part 3 - How does the seasonal shape change
STLseason <- canadian_gas %>%
  model(STL(Volume ~ trend(window=20) +
              season(window=9), robust=TRUE)) %>%
  components() %>%
  gg_season()+
  labs(title = "STL decomposition: Canadian Gas")

## Plot variable not specified, automatically selected `y = Volume`

STLseason

#Part 4 - Plot a plausible seasonally adjusted series
canadian_gas %>%
 model(
    STL(Volume ~ trend(window = 21) +
                   season(window = 13),
    robust = TRUE)) %>%
  components() %>%
  ggplot(aes(x = Month)) +
  geom_line(aes(y = Volume, colour = "Data")) +
  geom_line(aes(y = season_adjust,
                colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, colour = "Trend")) +
  labs(title = "STL decomposition of Canadian Gas Production After Seasonal Adjustment") +
  scale_colour_manual(
    values = c("pink", "blue", "red"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )

#Part 5
canadian_gas %>%
  model(seats = X_13ARIMA_SEATS(Volume ~ seats())) %>%
  components() %>%
autoplot() +
  labs(title ="SEATS Decomposition")

canadian_gas %>%
  model(x11 = X_13ARIMA_SEATS(Volume ~ x11())) %>%
  components() %>%
  autoplot()+
  labs(title = "X-11 Decomposition")

#Both decomposition are very similar especially the trend and seasonal. The irregular data for X-11 decomposition is less intense as the SEATS decomposition

#Question 9
liquor<-aus_retail%>%
  dplyr::filter(Industry == "Liquor retailing" & year(Month)>= 2000)%>%
  summarise(Turnover = sum(Turnover))

ggplot(data= liquor)+
  geom_line(aes(x=Month, y=Turnover))

# Part 2 - a 12 Moving average is the best to use the first 12 observations in the data (this would equate to a year), which would be able to see the MA over a year.
# Part 3 -
liquor <-liquor%>%
  mutate(`12-MA` = slider::slide_dbl(Turnover, mean,
                                    .before = 5, .after = 6, .complete = TRUE))%>%
  mutate(`2x12-MA` = slider::slide_dbl(`12-MA`, mean,
                                    .before = 1, .after = 0, .complete = TRUE))
liquor%>%
  autoplot(Turnover) +
  geom_line(aes(y =`2x12-MA`), colour = "#0072B2") +
  labs(y = "Liquor retailing Turnover",
       title = "Total AUS Liquor retailing Turnover")+
  guides(colour = guide_legend(title = "series"))

## Warning: Removed 12 row(s) containing missing values (geom_path).

#Question 10
#Part 1 (Additive)
  #Step 1 - Determine if m is an even or odd number
    #If even - compute the trend-cycle (Tt) using 2 * m - MA
    #If odd - compute the trend-cycle (Tt) using m - MA
  #Step 2 - Calculate the detrended series yt - Tt
  #Step 3 - Estimate the seasonal component by averaging the detrended values of that season
  #Step 4 - Make the seasonal effects normalized. 
    # Average of m seasonal components is 0, which is gathered by replicating the sequence for each years data (St)
      # 1. Calculate the total sum of the seasonal components
      # 2. Calculate the average of the seasonal components
      # 3. Adjust each seasonal component
  #Step 5 - Calculate the remainder component by subtracting the estimated seasonal and trend-cycle components
      # Rt = yt - Tt - St

#Part 2 (Multiplicative)
  #Step 1 - Determine if m is an even or odd number
    #If even - compute the trend-cycle (Tt) using 2 * m - MA
    #If odd - compute the trend-cycle (Tt) using m - MA
  #Step 2 - Calculate the detrended series yt/Tt
  #Step 3 - Estimate the seasonal component by averaging the detrended values of that season
  #Step 4 - Make the seasonal effects normalized. 
    # Average of m seasonal components is 1, which is gathered by replicating the sequence for each years data (St)
      # 1. Calculate the total sum of the seasonal components
      # 2. Calculate the average of the seasonal components
      # 3. Adjust each seasonal component 
  #Step 5 - Calculate the remainder component by subtracting the estimated seasonal and trend-cycle components
      # Rt = yt/(Tt * St)

#Additive has an average m of 0 while multiplicative has a 1, the formulas also differ slightly between the two (where one may subtract, the other may divide, so looking at the formulas is a big indicator on which you are using to decompose the data)

Homework #2

2022-09-22