title: “HW #2” author: “Matt Mullis” date: “09/23/22” output: html_document —
library(fpp3)
library(readr)
library(lubridate)
library(tsibble)
library(fpp3)
library(ggplot2)
library(tidyverse)
library(dplyr)
library(ggfortify)
library(tidyquant)
library(MASS)
library(slider)
library(forecast)
library(zoo)
library(seasonal)
library(seasonalview)
# install and load any package necessary
#Im going to convert this to GDP per Capita. This will make the numbers smaller
#and easier to understand, as well as control for population increases.
usecon <- global_economy %>%
filter(Country == "United States") %>%
mutate(gdppc= GDP/Population)
ggplot(data=usecon)+
geom_line(mapping=aes(x=Year,y=gdppc))
#1b
sltr <- aus_livestock %>%
filter(Animal== "Bulls, bullocks and steers") %>%
filter(State=="Victoria")
ggplot(data=sltr)+
geom_line(mapping=aes(x=Month, y= Count))
#it doesn't really look like this should be adjusted. I get a clear picture from
#this graph
#1c
vic_elec %>%
mutate(Date=yearmonth(Date)) %>%
autoplot()
## Plot variable not specified, automatically selected `.vars = Demand`
#i changed this data to months to make it easier to read.
#1d
ausgas <- aus_production %>%
features(Gas, features = guerrero) %>%
pull(lambda_guerrero)
aus_production %>%
autoplot(box_cox(Gas, ausgas))+
labs(title= "adjusted graph")
ggplot(data=aus_production)+
geom_line(mapping=aes(x=Quarter, y=Gas))+
labs(title= "not adjusted")
#the graph is adjusted to make seasonal variation more evident. I used this because
#the variation in the data changes over time. I used the Guerero
#method to choose my lambda value.
#this is not a good candidate because it ends and starts with nearly the same
#variation in the data, however the variation at the end is much smaller. c. Boxcox
#transformations only work when the variance increases or is constant.
#3.1
austob <- aus_production %>%
features(Tobacco, features = guerrero) %>%
pull(lambda_guerrero)
aus_production %>%
autoplot(box_cox(Tobacco, austob))+
labs(title= "tobacco box cox")
## Warning: Removed 24 row(s) containing missing values (geom_path).
#3.2
ansett2 <- ansett %>%
filter(Airports=="MEL-SYD") %>%
filter(Class == "Economy")
ansett3 <- ansett2 %>%
features(Passengers, features = guerrero) %>%
pull(lambda_guerrero)
ansett2 %>%
autoplot(box_cox(Passengers, ansett3))+
labs(title="flights box cox")
#3.3
head(pedestrian)
## # A tsibble: 6 x 5 [1h] <Australia/Melbourne>
## # Key: Sensor [1]
## Sensor Date_Time Date Time Count
## <chr> <dttm> <date> <int> <int>
## 1 Birrarung Marr 2015-01-01 00:00:00 2015-01-01 0 1630
## 2 Birrarung Marr 2015-01-01 01:00:00 2015-01-01 1 826
## 3 Birrarung Marr 2015-01-01 02:00:00 2015-01-01 2 567
## 4 Birrarung Marr 2015-01-01 03:00:00 2015-01-01 3 264
## 5 Birrarung Marr 2015-01-01 04:00:00 2015-01-01 4 139
## 6 Birrarung Marr 2015-01-01 05:00:00 2015-01-01 5 77
pedestrian2 <- pedestrian %>%
filter(Sensor == "Southern Cross Station")
pedestrian3 <- pedestrian2 %>%
features(Count, features = guerrero) %>%
pull(lambda_guerrero)
pedestrian2 %>%
autoplot(box_cox(Count,pedestrian3 ))+
labs(title="pedestrian box cox")
###
exercise 4
nmbr <- c(100,120,140,120,110,110,100)
nmbr2 <- nmbr %>%
ma(3)
nmbr3 <- nmbr2 %>%
ma(5)
print(nmbr3)
## Time Series:
## Start = 1
## End = 7
## Frequency = 1
## [1] NA NA NA 118 NA NA NA
result <- (100*0.067)+(120*0.133)+(140*.2)+(120*.2)+(110*.2)+(110*.133)+(100*.067)
print(result)
## [1] 117.99
#I assume these would be the same if i rounded.
gas <- tail(aus_production, 5*4) %>%
dplyr::select(Gas)
autoplot(gas)
## Plot variable not specified, automatically selected `.vars = Gas`
#this data is very seasonal, at the beginning of every first quarter is the lowest
#trough, then a big increase until halfway through the year, when the decline begins.
gas %>%
model(
classical_decomposition(Gas, type= "multiplicative")
) %>%
components %>%
autoplot()
## Warning: Removed 2 row(s) containing missing values (geom_path).
#this absolutely supports the interpretation from part A. it is clearly seasonal
#and has a strong upward trend.
gas %>%
model(
STL(Gas~trend(window=4)+
season(window="periodic"),
robust= TRUE)) %>%
components() %>%
autoplot()
gas2 <- gas %>%
mutate(Gas = ifelse(Gas == 171, Gas + 300, Gas))
gas2 %>%
model(
STL(Gas~trend(window=4)+
season(window="periodic"),
robust= TRUE)) %>%
components() %>%
autoplot()
gas3<- gas %>%
mutate(Gas = ifelse(Gas ==236, Gas + 300, Gas))
gas3 %>%
model(
STL(Gas~trend(window=4)+
season(window="periodic"),
robust= TRUE)) %>%
components() %>%
autoplot() %>%
labs(title= "outlier at the end")
## [[1]]
##
## $title
## [1] "outlier at the end"
##
## attr(,"class")
## [1] "labels"
#it doesnt look like much has changed of the trend and seasonality at all. maybe the
#trend is a little steeper, but that is it.
###exercise 6
set.seed(12122)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
head(myseries)
## # A tsibble: 6 x 5 [1M]
## # Key: State, Industry [1]
## State Industry `Series ID` Month Turnover
## <chr> <chr> <chr> <mth> <dbl>
## 1 Western Australia Household goods retailing A3349824F 1982 Apr 50.4
## 2 Western Australia Household goods retailing A3349824F 1982 May 57.9
## 3 Western Australia Household goods retailing A3349824F 1982 Jun 54.4
## 4 Western Australia Household goods retailing A3349824F 1982 Jul 56.2
## 5 Western Australia Household goods retailing A3349824F 1982 Aug 55
## 6 Western Australia Household goods retailing A3349824F 1982 Sep 55.6
x11_dcmp<- myseries %>%
model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
components()
autoplot(x11_dcmp)
#to be honest, I do not see much difference here. The lines are not very smooth,
#and there seems to be a lot of white noise in the data. You can see a clear dip
#during the 2008 recession, but the trend is still upwards. I do not see any outliers
#at all.
#the Australian workforce. It also shows seasonality in that pretty much each #year followed the same pattern. Clearly, a lot of people (literally called seasonal workers) # are hired in December then laid off in january. The effects of the recession #are very clear in the remainder, where there is a huge dip.
head(canadian_gas)
## # A tsibble: 6 x 2 [1M]
## Month Volume
## <mth> <dbl>
## 1 1960 Jan 1.43
## 2 1960 Feb 1.31
## 3 1960 Mar 1.40
## 4 1960 Apr 1.17
## 5 1960 May 1.12
## 6 1960 Jun 1.01
autoplot(canadian_gas)
## Plot variable not specified, automatically selected `.vars = Volume`
gg_subseries(canadian_gas)
## Plot variable not specified, automatically selected `y = Volume`
gg_season(canadian_gas)
## Plot variable not specified, automatically selected `y = Volume`
cg <- canadian_gas%>%
model(stl=STL(Volume))
components(cg)
## # A dable: 542 x 7 [1M]
## # Key: .model [1]
## # : Volume = trend + season_year + remainder
## .model Month Volume trend season_year remainder season_adjust
## <chr> <mth> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stl 1960 Jan 1.43 1.08 0.520 -0.172 0.911
## 2 stl 1960 Feb 1.31 1.11 0.215 -0.0178 1.09
## 3 stl 1960 Mar 1.40 1.13 0.307 -0.0395 1.09
## 4 stl 1960 Apr 1.17 1.16 0.0161 -0.00627 1.15
## 5 stl 1960 May 1.12 1.18 -0.116 0.0476 1.23
## 6 stl 1960 Jun 1.01 1.21 -0.356 0.159 1.37
## 7 stl 1960 Jul 0.966 1.23 -0.403 0.136 1.37
## 8 stl 1960 Aug 0.977 1.26 -0.349 0.0677 1.33
## 9 stl 1960 Sep 1.03 1.28 -0.340 0.0870 1.37
## 10 stl 1960 Oct 1.25 1.31 -0.0899 0.0329 1.34
## # … with 532 more rows
#it looks like beginning in 1975, there is an increase in the volatility of seasonality of the data.this ends a little before 1990. looks like an accordion.
canadian_gas %>%
autoplot(Volume, color='gray') +
autolayer(components(cg), season_adjust, color='#D55E00')
###exercise 9
liquor<-aus_retail%>%
filter(Industry == "Liquor retailing" & year(Month)>= 2000)%>%
summarise(Turnover = sum(Turnover))
ggplot(data= liquor)+
geom_line(aes(x=Month, y=Turnover))
# since the data is monthly, we should take a moving average of 2X12 to estimate the
#trend cycle
liquor <-liquor%>%
mutate(`12-MA` = slider::slide_dbl(Turnover, mean,
.before = 5, .after = 6, .complete = TRUE))%>%
mutate(`2x12-MA` = slider::slide_dbl(`12-MA`, mean,
.before = 1, .after = 0, .complete = TRUE))
liquor%>%
autoplot(Turnover) +
geom_line(aes(y =`2x12-MA`), colour = "#0072B2") +
labs(y = "Liquor retailing Turnover",
title = "Total AUS Liquor retailing Turnover")+
guides(colour = guide_legend(title = "series"))
## Warning: Removed 12 row(s) containing missing values (geom_path).
###exercise 10 #Explain the step-by-step how to employ the classical decomposition for both Additive and Multiplicative.
#additive: Y= S+T+R. multiplicative: Y=STR #Additive: 1. find m, then get the moving average of the data, which gives you #the trend T. 2. find the de-trended series, y-T. 3. get the S value by averaging #the detrended values for the season. 4. get R by subtracting S and T from Y.
#multiplicative: #find T by calculating the moving average, then find the detrended series, #y/t. then, find S by averaging the detrended values for that season to get S. #then, find R by doing R=(y/S*T)