HW2

Load packages and data

# install and load any package necessary
library(fpp3)
library(tidyverse)
library(seasonal)

Questions

Exercise 1

usa_gdp <- global_economy %>% 
  filter(Country=='United States') %>% 
  select(GDP) %>% 
  autoplot(GDP)+
  labs(title="USA GDP")
usa_gdp

No data transformation seems necessary here.

slau <- aus_livestock %>% 
  filter(State=='Victoria') %>% 
  filter(Animal=='Bulls, bullocks and steers') %>% 
  model(STL(Count~trend(window=7)+season(window="periodic"),robust=TRUE)) %>% 
  components() %>% 
  autoplot()+
  labs(title="STL decomposition of slaughters")
slau

I did an STL decomposition here just to show the various components.

vic_elec %>% 
  ggplot(aes(x=Time,y=Demand))+
  geom_line()

vic_elec %>% 
  gg_season(Demand, period="week")

vic_elec %>% 
  gg_season(Demand, period="day")

This graph seemed difficult to interpret, so I graphed it a few other ways.

gas <- aus_production %>% 
  select(Gas)
autoplot(gas,Gas)+
  labs(title="Gas")

gas %>% 
  features(Gas, features = guerrero)

## # A tibble: 1 × 1
##   lambda_guerrero
##             <dbl>
## 1           0.121

gas %>% 
  autoplot(box_cox(Gas, 0.121)) +
  labs(y="Volume",title= "Box-Cox Transformed Gas")

This graph exhibited some heteroskedasticity, so I did a box-cox transformation. It made the variance much more constant.

Exercise 2

canadian_gas %>% 
  autoplot(Volume)+
  labs(title="Gas")

canadian_gas %>% 
  features(Volume, features = guerrero)

## # A tibble: 1 × 1
##   lambda_guerrero
##             <dbl>
## 1           0.392

canadian_gas %>% 
  autoplot(box_cox(Volume,0.392))+
  labs(title="Box-cox Transformed Gas")

This graph does have changing variance, but the box-cox transformation did not change anything. I would guess this is because the variance is the same at the ends, and only changes in the middle.

Exercise 3

tobacco <- aus_production %>% 
  select(Tobacco)
#Before
tobacco %>% 
  autoplot(Tobacco)+
  labs(title="Tobacco")

## Warning: Removed 24 row(s) containing missing values (geom_path).

tobacco %>% 
  features(Tobacco,features=guerrero)

## # A tibble: 1 × 1
##   lambda_guerrero
##             <dbl>
## 1           0.929

#After
tobacco %>% 
  autoplot(box_cox(Tobacco,.9289402))+
  labs(title="Box-cox Transformed Tobacco")

## Warning: Removed 24 row(s) containing missing values (geom_path).

This is what the Guerrero feature returns as the proper lambda value, but doesn’t seem to change much. The variance seems relatively constant anyways.

passen <- ansett %>% 
  filter(Class=="Economy") %>% 
  filter(Airports=="MEL-SYD")
#Before
passen %>% 
  autoplot(Passengers)+
  labs(title="Passengers")

passen %>% 
  features(Passengers,features=guerrero)

## # A tibble: 1 × 3
##   Airports Class   lambda_guerrero
##   <chr>    <chr>             <dbl>
## 1 MEL-SYD  Economy            2.00

#After
passen %>% 
  autoplot(box_cox(Passengers,1.999927))+
  labs(title="Box-cox Transformed Passengers")

This seemed to tighten up the graph a bit.

socross <- pedestrian %>% 
  filter(Sensor=='Southern Cross Station')
#Before
socross %>% 
  ggplot(aes(x=Date_Time,y=Count))+
  geom_line()

socross %>% 
  features(Count,features=guerrero)

## # A tibble: 1 × 2
##   Sensor                 lambda_guerrero
##   <chr>                            <dbl>
## 1 Southern Cross Station          -0.226

#After
socross %>% 
  autoplot(box_cox(Count,-0.226))+
  labs(title="Box-cox Transformed")

socross %>% 
  autoplot(box_cox(Count,-1))+
  labs(title="Box-cox Transformed")

socross %>% 
  autoplot(box_cox(Count,.33))+
  labs(title="Box-cox Transformed")

socross %>% 
  autoplot(box_cox(Count,0))+
  labs(title="Box-cox Transformed")

This graph is very hard to interpret after the box-cox, so I tried a few different values. I think the Guerrero feature isn’t perfect.

Exercise 4

I used the livestock tsibble and iterated through it to show that the 3x5 and weighted average are the same.

I decreased the amount drastically, but if you put nrow(numb)-6 instead of nrow(numb)-500, it shows that they are the same for every value.

ma <- aus_livestock %>% 
  filter(State=='Victoria') %>% 
  filter(Animal=='Bulls, bullocks and steers') %>% 
  mutate('5-MA' = slider::slide_dbl(Count, mean,
                                             .before = 2, .after = 2, .complete = TRUE),
         `3x5-MA` = slider::slide_dbl(`5-MA`, mean,.before = 1, .after = 1, .complete = TRUE))
numb <- ma %>% 
  select(Count)
mavg <- ma %>%
  select(`3x5-MA`)
len <- seq(1:(nrow(numb)-500))
for (i in len){
  print((mavg[(i+3),1]))
  print((numb[(i),1]*.0666666666+numb[(i+1),1]*.1333333333+numb[(i+2),1]*.2+numb[(i+3),1]*.2+numb[(i+4),1]*.2+numb[(i+5),1]*.133333333+numb[(i+6),1]*.066666666))
  print(i)
}

## # A tibble: 1 × 1
##   `3x5-MA`
##      <dbl>
## 1   96127.
##      Count
## 1 96126.67
## [1] 1
## # A tibble: 1 × 1
##   `3x5-MA`
##      <dbl>
## 1   95753.
##      Count
## 1 95753.33
## [1] 2
## # A tibble: 1 × 1
##   `3x5-MA`
##      <dbl>
## 1   96627.
##      Count
## 1 96626.67
## [1] 3
## # A tibble: 1 × 1
##   `3x5-MA`
##      <dbl>
## 1   97407.
##      Count
## 1 97406.67
## [1] 4
## # A tibble: 1 × 1
##   `3x5-MA`
##      <dbl>
## 1    97280
##   Count
## 1 97280
## [1] 5
## # A tibble: 1 × 1
##   `3x5-MA`
##      <dbl>
## 1   97827.
##      Count
## 1 97826.67
## [1] 6
## # A tibble: 1 × 1
##   `3x5-MA`
##      <dbl>
## 1   99227.
##      Count
## 1 99226.67
## [1] 7
## # A tibble: 1 × 1
##   `3x5-MA`
##      <dbl>
## 1  101633.
##      Count
## 1 101633.3
## [1] 8
## # A tibble: 1 × 1
##   `3x5-MA`
##      <dbl>
## 1   104280
##    Count
## 1 104280
## [1] 9
## # A tibble: 1 × 1
##   `3x5-MA`
##      <dbl>
## 1  107687.
##      Count
## 1 107686.7
## [1] 10

Exercise 5

wgas<-tail(aus_production, 5*4) %>% 
  select(Gas)
wgas %>% 
  autoplot(Gas)+
  labs(title="Gas")

Highly seasonal, seems to peak in Q2/Q3 There is a weak upward trend

compgas <- wgas %>% 
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components()
compgas %>% 
  autoplot()

## Warning: Removed 2 row(s) containing missing values (geom_path).

compgas %>%
  ggplot(aes(x=Quarter))+
  geom_line(aes(y=Gas,color='Data'))+
  geom_line(aes(y=season_adjust,color='Adjusted'))+
  geom_line(aes(y=trend,color='Trend'))+
  labs(y='Amount')

## Warning: Removed 4 row(s) containing missing values (geom_path).

Yes, they do confirm my suspicions. The seasonally adjusted data is included in this graph.

bgas <- wgas
bgas[1,1]=521
bgas %>% 
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components() %>% 
  autoplot()

## Warning: Removed 2 row(s) containing missing values (geom_path).

bgas %>% 
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components() %>% 
  ggplot(aes(x=Quarter))+
  geom_line(aes(y=Gas,color='Data'))+
  geom_line(aes(y=season_adjust,color='Adjusted'))+
  geom_line(aes(y=trend,color='Trend'))+
  labs(y='Amount')

## Warning: Removed 4 row(s) containing missing values (geom_path).

Here, I added 300 at the beginning. It affects the trend somewhat.

mgas <- wgas
mgas[11,1]=521
mgas %>% 
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components() %>% 
  autoplot()

## Warning: Removed 2 row(s) containing missing values (geom_path).

mgas %>% 
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components() %>% 
  ggplot(aes(x=Quarter))+
  geom_line(aes(y=Gas,color='Data'))+
  geom_line(aes(y=season_adjust,color='Adjusted'))+
  geom_line(aes(y=trend,color='Trend'))+
  labs(y='Amount')

## Warning: Removed 4 row(s) containing missing values (geom_path).

This drastically affects every part of the decomposition.

egas <- wgas
egas[20,1]=521
egas %>% 
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components() %>% 
  autoplot()

## Warning: Removed 2 row(s) containing missing values (geom_path).

egas %>% 
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components() %>% 
  ggplot(aes(x=Quarter))+
  geom_line(aes(y=Gas,color='Data'))+
  geom_line(aes(y=season_adjust,color='Adjusted'))+
  geom_line(aes(y=trend,color='Trend'))+
  labs(y='Amount')

## Warning: Removed 4 row(s) containing missing values (geom_path).

The effect is diminished when it is at the end or beginning. Only in the middle does it seem to truly mess things up.

Exercise 6

set.seed(3657323)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
autoplot(myseries,Turnover)

x11_dcmp <- myseries %>%
  model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
  components()
autoplot(x11_dcmp) +
  labs(title =
         "Decomposition of Turnover using X-11.")

I don’t really see any outliers after the decomposition.

Exercise 7

The trend line fits very closely with the original data. The seasonality decomposition shows that there is extreme seasonality. The residuals appear to have an average of zero, except for a period in the early 90s.

Yes, the recession shows up in the residual. A recession would be an exogenous shock that the model can’t really explain.

Exercise 8

autoplot(canadian_gas, Volume)

canadian_gas %>% 
  gg_subseries(Volume)

canadian_gas %>% 
  gg_season(Volume)

dcmp1=canadian_gas %>% 
  model(STL(Volume~trend(window=21)+season(window="periodic"),robust=TRUE)) %>%
  components() 
dcmp1 %>%
  autoplot()+
  labs(title="Decomposition")

dcmp3 <- dcmp1 %>% 
  select(season_year,season_adjust)
dcmp3 %>% 
  gg_season(season_year)+
  labs(title="Season plot of seasonality")

dcmp1 %>% 
  gg_season(season_adjust)+
  labs(title="Seasonally adjusted data")

dcmp1 %>% 
  autoplot(season_adjust)+
  labs(title="Seasonally adjusted data")

This is one that I can’t seem to get to work. Obviously the seasonality changes but its not showing up in any graph I can do. The seasonally adjusted data looks great but the change in seasons is something I can’t figure out.

dcmp1 %>% 
  select(season_adjust) %>% 
  autoplot(season_adjust)

x11_dcmp1 <- canadian_gas %>%
  model(x11 = X_13ARIMA_SEATS(Volume ~ x11())) %>%
  components() %>% 
  select(season_adjust) %>% 
  autoplot(season_adjust)
x11_dcmp1

x11_dcmp2 <- canadian_gas %>%
  model(x11 = X_13ARIMA_SEATS(Volume ~ seats())) %>%
  components() %>% 
  select(season_adjust) %>% 
  autoplot(season_adjust)
x11_dcmp2

Seats and x11 are nearly the exact same graph. They are more homoskedastic than the regularly decomposed graph.

Exercise 9

retail<-aus_retail%>%
  filter(Industry == "Liquor retailing" & year(Month)>= 2000)%>%
  summarise(Turnover = sum(Turnover))
ggplot(data= retail)+
  geom_line(aes(x=Month, y=Turnover))+
  labs(title="Turnover")

retail <-retail%>%
  mutate(`12-MA` = slider::slide_dbl(Turnover, mean,
                                     .before = 5, .after = 6, .complete = TRUE))%>%
  mutate(`2x12-MA` = slider::slide_dbl(`12-MA`, mean,
                                       .before = 1, .after = 0, .complete = TRUE))
retail%>%
  autoplot(Turnover) +
  geom_line(aes(y =`2x12-MA`), colour = "#0072B2") +
  labs(y = "Liquor retailing Turnover",
       title = "Total AUS Liquor retailing Turnover")+
  guides(colour = guide_legend(title = "series"))

## Warning: Removed 12 row(s) containing missing values (geom_path).

This question was confusing to me since the answers are printed right under the question. It was easy to replicate with my own code, but still.

Exercise 10

Additive steps: The data needs to be smoothed, generally using a moving average If the period is even, then do 2xma moving average If odd, just fo m-moving average This is the trend component Now, subtract the trend component from each data point, this is seasonal Average the seasonal component for each de-trended value Normalize the seasonal components This is done by averaging all of the values, and subtracting the average from each value The remainder is what is left from the data after subtracting the trend and the seasonal components

Multiplicative steps: The data needs to be smoothed, generally using a moving average If the period is even, then do 2xma moving average If odd, just fo m-moving average This is the trend component Now, divide by the trend component from each data point, this is seasonal Average the seasonal component for each de-trended value Normalize the seasonal components This is done by averaging all of the values, and dividing the average from each value The remainder is what is left from the data after dividing by the trend and the seasonal components

HW2

Brandon Pascali

2022-09-23