title: “HW #2” author: “Matt Mullis” date: “09/23/22” output: html_document —

Load packages and data

library(fpp3)
library(readr)
library(lubridate)
library(tsibble)
library(fpp3)
library(ggplot2)
library(tidyverse)
library(dplyr)
library(ggfortify)
library(tidyquant)
library(MASS)
library(slider)
library(forecast)
library(zoo)
library(seasonal)
library(seasonalview)

# install and load any package necessary

Questions

Exercise 1

#Im going to convert this to GDP per Capita. This will make the numbers smaller
#and easier to understand, as well as control for population increases. 
usecon <- global_economy %>% 
  filter(Country == "United States") %>% 
  mutate(gdppc= GDP/Population)

ggplot(data=usecon)+
  geom_line(mapping=aes(x=Year,y=gdppc))

#1b
sltr <- aus_livestock %>% 
  filter(Animal== "Bulls, bullocks and steers") %>% 
  filter(State=="Victoria")
ggplot(data=sltr)+
  geom_line(mapping=aes(x=Month, y= Count))

#it doesn't really look like this should be adjusted. I get a clear picture from 
#this graph

#1c


vic_elec %>% 
  mutate(Date=yearmonth(Date)) %>% 
  autoplot()
## Plot variable not specified, automatically selected `.vars = Demand`

#i changed this data to months to make it easier to read. 


#1d

ausgas <- aus_production %>%
  features(Gas, features = guerrero) %>%
  pull(lambda_guerrero)
aus_production %>%
  autoplot(box_cox(Gas, ausgas))+
  labs(title= "adjusted graph")

ggplot(data=aus_production)+
  geom_line(mapping=aes(x=Quarter, y=Gas))+
  labs(title= "not adjusted")

#the graph is adjusted to make seasonal variation more evident. I used this because 
#the variation in the data changes over time. I used the Guerero 
#method to choose my lambda value. 

Exercise 2

#this is not a good candidate because it ends and starts with nearly the same 
#variation in the data, however the variation at the end is much smaller. c. Boxcox 
#transformations only work when the variance increases or is constant. 

Exercise 3

#3.1
austob <- aus_production %>%
  features(Tobacco, features = guerrero) %>%
  pull(lambda_guerrero)
aus_production %>%
  autoplot(box_cox(Tobacco, austob))+
  labs(title= "tobacco box cox")
## Warning: Removed 24 row(s) containing missing values (geom_path).

#3.2
ansett2 <- ansett %>% 
  filter(Airports=="MEL-SYD") %>% 
  filter(Class == "Economy")
ansett3 <- ansett2 %>% 
  features(Passengers, features = guerrero) %>%
  pull(lambda_guerrero)
ansett2 %>% 
  autoplot(box_cox(Passengers, ansett3))+
  labs(title="flights box cox")

#3.3
head(pedestrian)
## # A tsibble: 6 x 5 [1h] <Australia/Melbourne>
## # Key:       Sensor [1]
##   Sensor         Date_Time           Date        Time Count
##   <chr>          <dttm>              <date>     <int> <int>
## 1 Birrarung Marr 2015-01-01 00:00:00 2015-01-01     0  1630
## 2 Birrarung Marr 2015-01-01 01:00:00 2015-01-01     1   826
## 3 Birrarung Marr 2015-01-01 02:00:00 2015-01-01     2   567
## 4 Birrarung Marr 2015-01-01 03:00:00 2015-01-01     3   264
## 5 Birrarung Marr 2015-01-01 04:00:00 2015-01-01     4   139
## 6 Birrarung Marr 2015-01-01 05:00:00 2015-01-01     5    77
pedestrian2 <- pedestrian %>% 
  filter(Sensor == "Southern Cross Station")
pedestrian3 <- pedestrian2 %>% 
  features(Count, features = guerrero) %>%
  pull(lambda_guerrero)
pedestrian2 %>% 
  autoplot(box_cox(Count,pedestrian3 ))+
  labs(title="pedestrian box cox")

### exercise 4

nmbr <- c(100,120,140,120,110,110,100)
nmbr2 <- nmbr %>% 
  ma(3)
nmbr3 <- nmbr2 %>% 
  ma(5)
print(nmbr3) 
## Time Series:
## Start = 1 
## End = 7 
## Frequency = 1 
## [1]  NA  NA  NA 118  NA  NA  NA
result <- (100*0.067)+(120*0.133)+(140*.2)+(120*.2)+(110*.2)+(110*.133)+(100*.067)
print(result)
## [1] 117.99
#I assume these would be the same if i rounded. 

exercise 5

gas <- tail(aus_production, 5*4) %>% 
  dplyr::select(Gas)
autoplot(gas)
## Plot variable not specified, automatically selected `.vars = Gas`

#this data is very seasonal, at the beginning of every first quarter is the lowest
#trough, then a big increase until halfway through the year, when the decline begins. 
gas %>% 
  model(
    classical_decomposition(Gas, type= "multiplicative")
  ) %>% 
  components %>% 
  autoplot()
## Warning: Removed 2 row(s) containing missing values (geom_path).

#this absolutely supports the interpretation from part A. it is clearly seasonal
#and has a strong upward trend. 

gas %>% 
  model(
    STL(Gas~trend(window=4)+
          season(window="periodic"),
        robust= TRUE)) %>% 
  components() %>% 
  autoplot()

gas2 <- gas %>% 
  mutate(Gas = ifelse(Gas == 171, Gas + 300, Gas))
gas2 %>% 
  model(
    STL(Gas~trend(window=4)+
          season(window="periodic"),
        robust= TRUE)) %>% 
  components() %>% 
  autoplot()

gas3<- gas %>% 
  mutate(Gas = ifelse(Gas ==236, Gas + 300, Gas))
gas3 %>% 
  model(
    STL(Gas~trend(window=4)+
          season(window="periodic"),
        robust= TRUE)) %>% 
  components() %>% 
  autoplot() %>% 
  labs(title= "outlier at the end")
## [[1]]

## 
## $title
## [1] "outlier at the end"
## 
## attr(,"class")
## [1] "labels"
#it doesnt look like much has changed of the trend and seasonality at all. maybe the 
#trend is a little steeper, but that is it. 

###exercise 6

set.seed(12122)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
head(myseries)
## # A tsibble: 6 x 5 [1M]
## # Key:       State, Industry [1]
##   State             Industry                  `Series ID`    Month Turnover
##   <chr>             <chr>                     <chr>          <mth>    <dbl>
## 1 Western Australia Household goods retailing A3349824F   1982 Apr     50.4
## 2 Western Australia Household goods retailing A3349824F   1982 May     57.9
## 3 Western Australia Household goods retailing A3349824F   1982 Jun     54.4
## 4 Western Australia Household goods retailing A3349824F   1982 Jul     56.2
## 5 Western Australia Household goods retailing A3349824F   1982 Aug     55  
## 6 Western Australia Household goods retailing A3349824F   1982 Sep     55.6
x11_dcmp<- myseries %>%
  model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
  components()
autoplot(x11_dcmp)

#to be honest, I do not see much difference here. The lines are not very smooth,
#and there seems to be a lot of white noise in the data. You can see a clear dip
#during the 2008 recession, but the trend is still upwards. I do not see any outliers
#at all. 

exercise 7

The data shows a very strong upward trend, showing that more people are entering

#the Australian workforce. It also shows seasonality in that pretty much each #year followed the same pattern. Clearly, a lot of people (literally called seasonal workers) # are hired in December then laid off in january. The effects of the recession #are very clear in the remainder, where there is a huge dip.

exercise 8

head(canadian_gas)
## # A tsibble: 6 x 2 [1M]
##      Month Volume
##      <mth>  <dbl>
## 1 1960 Jan   1.43
## 2 1960 Feb   1.31
## 3 1960 Mar   1.40
## 4 1960 Apr   1.17
## 5 1960 May   1.12
## 6 1960 Jun   1.01
autoplot(canadian_gas)
## Plot variable not specified, automatically selected `.vars = Volume`

gg_subseries(canadian_gas)
## Plot variable not specified, automatically selected `y = Volume`

gg_season(canadian_gas)
## Plot variable not specified, automatically selected `y = Volume`

 cg <- canadian_gas%>% 
  model(stl=STL(Volume))
components(cg)
## # A dable: 542 x 7 [1M]
## # Key:     .model [1]
## # :        Volume = trend + season_year + remainder
##    .model    Month Volume trend season_year remainder season_adjust
##    <chr>     <mth>  <dbl> <dbl>       <dbl>     <dbl>         <dbl>
##  1 stl    1960 Jan  1.43   1.08      0.520   -0.172           0.911
##  2 stl    1960 Feb  1.31   1.11      0.215   -0.0178          1.09 
##  3 stl    1960 Mar  1.40   1.13      0.307   -0.0395          1.09 
##  4 stl    1960 Apr  1.17   1.16      0.0161  -0.00627         1.15 
##  5 stl    1960 May  1.12   1.18     -0.116    0.0476          1.23 
##  6 stl    1960 Jun  1.01   1.21     -0.356    0.159           1.37 
##  7 stl    1960 Jul  0.966  1.23     -0.403    0.136           1.37 
##  8 stl    1960 Aug  0.977  1.26     -0.349    0.0677          1.33 
##  9 stl    1960 Sep  1.03   1.28     -0.340    0.0870          1.37 
## 10 stl    1960 Oct  1.25   1.31     -0.0899   0.0329          1.34 
## # … with 532 more rows
#it looks like beginning in 1975, there is an increase in the volatility of seasonality of the data.this ends a little before 1990. looks like an accordion. 
 canadian_gas %>%  
   autoplot(Volume, color='gray') +
  autolayer(components(cg), season_adjust, color='#D55E00')

###exercise 9

liquor<-aus_retail%>%
  filter(Industry == "Liquor retailing" & year(Month)>= 2000)%>%
  summarise(Turnover = sum(Turnover))

ggplot(data= liquor)+
  geom_line(aes(x=Month, y=Turnover))

# since the data is monthly, we should take a moving average of 2X12 to estimate the
#trend cycle 


liquor <-liquor%>%
  mutate(`12-MA` = slider::slide_dbl(Turnover, mean,
                                    .before = 5, .after = 6, .complete = TRUE))%>%
  mutate(`2x12-MA` = slider::slide_dbl(`12-MA`, mean,
                                    .before = 1, .after = 0, .complete = TRUE))
liquor%>%
  autoplot(Turnover) +
  geom_line(aes(y =`2x12-MA`), colour = "#0072B2") +
  labs(y = "Liquor retailing Turnover",
       title = "Total AUS Liquor retailing Turnover")+
  guides(colour = guide_legend(title = "series"))
## Warning: Removed 12 row(s) containing missing values (geom_path).

###exercise 10 #Explain the step-by-step how to employ the classical decomposition for both Additive and Multiplicative.

#additive: Y= S+T+R. multiplicative: Y=STR #Additive: 1. find m, then get the moving average of the data, which gives you #the trend T. 2. find the de-trended series, y-T. 3. get the S value by averaging #the detrended values for the season. 4. get R by subtracting S and T from Y.

#multiplicative: #find T by calculating the moving average, then find the detrended series, #y/t. then, find S by averaging the detrended values for that season to get S. #then, find R by doing R=(y/S*T)