title: “HW 2” author: “Summer Ligon” date: “9/23/2022” output: html_document —
library(ggplot2)
library(readxl)
library(readr)
library(tidyverse)
library(tidyquant)
library(dplyr)
library(feasts)
library(fpp3)
library(slider)
library(tibble)
library(forecast)
library(seasonal)
library(generics)
# 1.
# While plotting the United states GDP, I # divided GDP by the population to show GDP # per capita.
GDP <- global_economy %>%
group_by(GDP) %>%
filter(Country == "United States") %>%
mutate(GDP = GDP/Population)
ggplot(data = GDP, mapping = aes(x = Year, y = GDP)) +
geom_line()
# 2.
View(aus_livestock)
# I made sure that count represented the number of slaughters
# by looking at the details of aus_livestock
?aus_livestock
## starting httpd help server ... done
# I decided to do slaughters per state and cube root the
# count since the original graph had an immeasurable amount
# of lines with an intense amount of fluctuations each. Now,
# it is more visible to see the trend and seasonality of
# the data.
slaughters <- aus_livestock %>%
group_by(Count) %>%
filter(Animal == "Bulls, bullocks and steers") %>%
mutate(Count = Count^(1/3))
ggplot(data = slaughters, mapping = aes(x = Month, y = Count, color = State), show.legend = TRUE)+
geom_line() +
labs(title = "Number of Bull, Bullocks and Steers Sluaghters Per State", y = "Count (cube root)")
# 3. ???
# I had a bunch of trouble with this one, but this is the best that I could get.
View(vic_elec)
distinct(vic_elec)
## # A tibble: 52,608 × 5
## Time Demand Temperature Date Holiday
## <dttm> <dbl> <dbl> <date> <lgl>
## 1 2012-01-01 00:00:00 4383. 21.4 2012-01-01 TRUE
## 2 2012-01-01 00:30:00 4263. 21.0 2012-01-01 TRUE
## 3 2012-01-01 01:00:00 4049. 20.7 2012-01-01 TRUE
## 4 2012-01-01 01:30:00 3878. 20.6 2012-01-01 TRUE
## 5 2012-01-01 02:00:00 4036. 20.4 2012-01-01 TRUE
## 6 2012-01-01 02:30:00 3866. 20.2 2012-01-01 TRUE
## 7 2012-01-01 03:00:00 3694. 20.1 2012-01-01 TRUE
## 8 2012-01-01 03:30:00 3562. 19.6 2012-01-01 TRUE
## 9 2012-01-01 04:00:00 3433. 19.1 2012-01-01 TRUE
## 10 2012-01-01 04:30:00 3359. 19.0 2012-01-01 TRUE
## # … with 52,598 more rows
## # ℹ Use `print(n = ...)` to see more rows
vic <- vic_elec %>%
group_by(Date, Demand) %>%
mutate(Date = yearmonth(Date))
vic
## # A tsibble: 52,608 x 5 [30m] <Australia/Melbourne>
## # Groups: Date, Demand [52,607]
## Time Demand Temperature Date Holiday
## <dttm> <dbl> <dbl> <mth> <lgl>
## 1 2012-01-01 00:00:00 4383. 21.4 2012 Jan TRUE
## 2 2012-01-01 00:30:00 4263. 21.0 2012 Jan TRUE
## 3 2012-01-01 01:00:00 4049. 20.7 2012 Jan TRUE
## 4 2012-01-01 01:30:00 3878. 20.6 2012 Jan TRUE
## 5 2012-01-01 02:00:00 4036. 20.4 2012 Jan TRUE
## 6 2012-01-01 02:30:00 3866. 20.2 2012 Jan TRUE
## 7 2012-01-01 03:00:00 3694. 20.1 2012 Jan TRUE
## 8 2012-01-01 03:30:00 3562. 19.6 2012 Jan TRUE
## 9 2012-01-01 04:00:00 3433. 19.1 2012 Jan TRUE
## 10 2012-01-01 04:30:00 3359. 19.0 2012 Jan TRUE
## # … with 52,598 more rows
## # ℹ Use `print(n = ...)` to see more rows
ggplot(data = vic, mapping = aes(x = Date, y = Demand))+
geom_line()
?aggregate
# 4.
# I didn't use any transforamtions for this one
View(aus_production)
ggplot(data = aus_production, mapping = aes(x = Quarter, y = Gas)) +
geom_line()
# Using the guerrero feature, I am getting my lambda.
# A good lambda is one that makes size of the seasonal
# variation about the same across the whole series (mkaes
# forecasting model simpler).
canadian_gas %>%
features(Volume, features = guerrero)
## # A tibble: 1 × 1
## lambda_guerrero
## <dbl>
## 1 0.392
# ^^ a low value of lambda can give me extremely large
# prediction intervals. My lambda is not very low compared
# to the one in the slides. Let's see what we get.
canadian_gas %>%
autoplot(box_cox(Volume, 0.392)) +
labs(y = "Box-Cox transformed Volume")
# After viewing this, we can see that the seasonal fluctuations
# and random variation across the series.
View(canadian_gas)
autoplot(canadian_gas)
## Plot variable not specified, automatically selected `.vars = Volume`
# By plotting the original graph, it's obvious that the seasonal
# didn't change much at all. It seems as if it is only a shift
# in the graph that changed which makes the trend easier to
# view; however, again, the seasonality was not affected much at
# all between the original and Box-Cox transformation.
tob <- aus_production %>%
select(Tobacco)
tob
## # A tsibble: 218 x 2 [1Q]
## Tobacco Quarter
## <dbl> <qtr>
## 1 5225 1956 Q1
## 2 5178 1956 Q2
## 3 5297 1956 Q3
## 4 5681 1956 Q4
## 5 5577 1957 Q1
## 6 5651 1957 Q2
## 7 5317 1957 Q3
## 8 6152 1957 Q4
## 9 5758 1958 Q1
## 10 5641 1958 Q2
## # … with 208 more rows
## # ℹ Use `print(n = ...)` to see more rows
# Finding my lambda for Tobacco in aus_production
tob %>%
features(Tobacco, features = guerrero)
## # A tibble: 1 × 1
## lambda_guerrero
## <dbl>
## 1 0.929
# Now auto plotting with box-cox with lambda = 0.929
tob %>%
autoplot(box_cox(Tobacco, 0.929))
## Warning: Removed 24 row(s) containing missing values (geom_path).
# Comparing it to the original graph, it is obvious
# that nothing really changed, this is because lambda
# is extremely close to 1 (no change in the shape of
# the time series). There was a substantial shift though.
autoplot(tob)
## Plot variable not specified, automatically selected `.vars = Tobacco`
## Warning: Removed 24 row(s) containing missing values (geom_path).
# Now finding lambda for Economy class passengers between
# Melbourne and Sydney from ansett
ansett %>%
distinct(Airports)
## # A tibble: 10 × 1
## Airports
## <chr>
## 1 ADL-PER
## 2 MEL-ADL
## 3 MEL-BNE
## 4 MEL-OOL
## 5 MEL-PER
## 6 MEL-SYD
## 7 SYD-ADL
## 8 SYD-BNE
## 9 SYD-OOL
## 10 SYD-PER
pass <- ansett %>%
filter(Class == "Economy") %>%
filter(Airports == "MEL-SYD")
pass
## # A tsibble: 282 x 4 [1W]
## # Key: Airports, Class [1]
## Week Airports Class Passengers
## <week> <chr> <chr> <dbl>
## 1 1987 W26 MEL-SYD Economy 20167
## 2 1987 W27 MEL-SYD Economy 20161
## 3 1987 W28 MEL-SYD Economy 19993
## 4 1987 W29 MEL-SYD Economy 20986
## 5 1987 W30 MEL-SYD Economy 20497
## 6 1987 W31 MEL-SYD Economy 20770
## 7 1987 W32 MEL-SYD Economy 21111
## 8 1987 W33 MEL-SYD Economy 20675
## 9 1987 W34 MEL-SYD Economy 22092
## 10 1987 W35 MEL-SYD Economy 20772
## # … with 272 more rows
## # ℹ Use `print(n = ...)` to see more rows
pass2 <- pass %>%
select(Passengers)
pass2
## # A tsibble: 282 x 2 [1W]
## Passengers Week
## <dbl> <week>
## 1 20167 1987 W26
## 2 20161 1987 W27
## 3 19993 1987 W28
## 4 20986 1987 W29
## 5 20497 1987 W30
## 6 20770 1987 W31
## 7 21111 1987 W32
## 8 20675 1987 W33
## 9 22092 1987 W34
## 10 20772 1987 W35
## # … with 272 more rows
## # ℹ Use `print(n = ...)` to see more rows
pass2 %>%
features(Passengers, feature = guerrero)
## # A tibble: 1 × 1
## lambda_guerrero
## <dbl>
## 1 2.00
# The Lamda is very high for this situation. Let's
# see where this goes.
pass2 %>%
autoplot(box_cox(Passengers, 2.00))+
labs(y = "Box-Cox transformed turnover")
# Comparing it to the original plot...
autoplot(pass2)
## Plot variable not specified, automatically selected `.vars = Passengers`
# ... I see that the variance of seasonality has been
# adjusted fairly nicely comparatively to it's original
# plot. The original plot had a huge declining spike
# and now the Box-Cox has adjusted to it.
## ??? why is it so funky??
View(pedestrian)
pedestrian %>%
distinct(Sensor)
## # A tibble: 4 × 1
## Sensor
## <chr>
## 1 Birrarung Marr
## 2 Bourke Street Mall (North)
## 3 QV Market-Elizabeth St (West)
## 4 Southern Cross Station
cross <- pedestrian %>%
filter(Sensor == "Southern Cross Station") %>%
select(Count)
cross
## # A tsibble: 17,539 x 2 [1h] <Australia/Melbourne>
## Count Date_Time
## <int> <dttm>
## 1 746 2015-01-01 00:00:00
## 2 312 2015-01-01 01:00:00
## 3 180 2015-01-01 02:00:00
## 4 133 2015-01-01 03:00:00
## 5 44 2015-01-01 04:00:00
## 6 16 2015-01-01 05:00:00
## 7 13 2015-01-01 06:00:00
## 8 21 2015-01-01 07:00:00
## 9 39 2015-01-01 08:00:00
## 10 36 2015-01-01 09:00:00
## # … with 17,529 more rows
## # ℹ Use `print(n = ...)` to see more rows
cross %>%
features(Count, feature = guerrero)
## # A tibble: 1 × 1
## lambda_guerrero
## <dbl>
## 1 -0.226
cross %>%
autoplot(box_cox(Count, -0.226))
# Comparing it to its original graph...
autoplot(cross)
## Plot variable not specified, automatically selected `.vars = Count`
# ... I see that the majority of the lines have been centered
# making the data appear that the fluctuation of the seasonality
# has been balanced.
# I am going to creat a random table to calculate the
# moving averages of each.
tibble <- data_frame(list = c(100, 110, 120, 130, 100, 150, 95))
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
tibble
## # A tibble: 7 × 1
## list
## <dbl>
## 1 100
## 2 110
## 3 120
## 4 130
## 5 100
## 6 150
## 7 95
tibble2 <- tibble %>%
mutate('5-MA' = slide_dbl(list, mean, .before = 2, .after = 2, .complete = TRUE),
'3-MA' = slide_dbl(`5-MA`, mean, .before = 1, .after = 1, .complete = TRUE))
tibble2
## # A tibble: 7 × 3
## list `5-MA` `3-MA`
## <dbl> <dbl> <dbl>
## 1 100 NA NA
## 2 110 NA NA
## 3 120 112 NA
## 4 130 122 118.
## 5 100 119 NA
## 6 150 NA NA
## 7 95 NA NA
# For some odd reason, the mean isn't showing up??
# This is an equation of the weights multiplied by each
# value from the list. As you can see, we get the same value
# as we did in the 3x5-MA calculation.
round((1/15*100) + (2/15*110) + (1/5*120) + (1/5*130) + (1/5*100) + (2/15*150) + (1/15*95))
## [1] 118
gas <- tail(aus_production, 20) %>%
select(Gas)
gas
## # A tsibble: 20 x 2 [1Q]
## Gas Quarter
## <dbl> <qtr>
## 1 221 2005 Q3
## 2 180 2005 Q4
## 3 171 2006 Q1
## 4 224 2006 Q2
## 5 233 2006 Q3
## 6 192 2006 Q4
## 7 187 2007 Q1
## 8 234 2007 Q2
## 9 245 2007 Q3
## 10 205 2007 Q4
## 11 194 2008 Q1
## 12 229 2008 Q2
## 13 249 2008 Q3
## 14 203 2008 Q4
## 15 196 2009 Q1
## 16 238 2009 Q2
## 17 252 2009 Q3
## 18 210 2009 Q4
## 19 205 2010 Q1
## 20 236 2010 Q2
# 1.
autoplot(gas)
## Plot variable not specified, automatically selected `.vars = Gas`
# I notice a seasonality of a increase until the second quarter and then a decrease for the rest of
# the year for each year. There is also a slight increasing trend from 2006 to 2010.
# 2.
gas2 <- gas %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
select(trend, seasonal, random)
gas2
## # A tsibble: 20 x 4 [1Q]
## trend seasonal random Quarter
## <dbl> <dbl> <dbl> <qtr>
## 1 NA 1.13 NA 2005 Q3
## 2 NA 0.925 NA 2005 Q4
## 3 200. 0.875 0.974 2006 Q1
## 4 204. 1.07 1.02 2006 Q2
## 5 207 1.13 1.00 2006 Q3
## 6 210. 0.925 0.987 2006 Q4
## 7 213 0.875 1.00 2007 Q1
## 8 216. 1.07 1.01 2007 Q2
## 9 219. 1.13 0.996 2007 Q3
## 10 219. 0.925 1.01 2007 Q4
## 11 219. 0.875 1.01 2008 Q1
## 12 219 1.07 0.974 2008 Q2
## 13 219 1.13 1.01 2008 Q3
## 14 220. 0.925 0.996 2008 Q4
## 15 222. 0.875 1.01 2009 Q1
## 16 223. 1.07 0.993 2009 Q2
## 17 225. 1.13 0.994 2009 Q3
## 18 226 0.925 1.00 2009 Q4
## 19 NA 0.875 NA 2010 Q1
## 20 NA 1.07 NA 2010 Q2
# 3.
# The trend index is slightly increasing from one observation
# to the next which is seems to be correct based off looking at
# the plot. Since seasonally adjusted can be found by multiplying
# trend, seasonal, and random together, I did this from the grapgh
# for each quarter and found that the values were lining up with
# the values from the graph. I felt as if seasonal couldn't be
# interpretted from the graph very easily, so this is why I checked
# the values by hand. Seasonal and random seemed to be very close
# in value.
# 4.
gas3 <- gas %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
select(season_adjust)
gas3
## # A tsibble: 20 x 2 [1Q]
## season_adjust Quarter
## <dbl> <qtr>
## 1 196. 2005 Q3
## 2 195. 2005 Q4
## 3 195. 2006 Q1
## 4 209. 2006 Q2
## 5 207. 2006 Q3
## 6 208. 2006 Q4
## 7 214. 2007 Q1
## 8 218. 2007 Q2
## 9 218. 2007 Q3
## 10 222. 2007 Q4
## 11 222. 2008 Q1
## 12 213. 2008 Q2
## 13 221. 2008 Q3
## 14 219. 2008 Q4
## 15 224. 2009 Q1
## 16 222. 2009 Q2
## 17 224. 2009 Q3
## 18 227. 2009 Q4
## 19 234. 2010 Q1
## 20 220. 2010 Q2
# The blue line is the seasonally adjusted line
gas %>%
autoplot(Gas, color = 'black') +
autolayer(gas3, season_adjust, color = 'blue')
# 5.
# I am first changing the 12th row of the gas tsibble
# to its value (229) plus 300 to attain an outlier within
# my new tsibble names 'out'.
out <- gas
out$Gas[12] <- 229 + 300
out
## # A tsibble: 20 x 2 [1Q]
## Gas Quarter
## <dbl> <qtr>
## 1 221 2005 Q3
## 2 180 2005 Q4
## 3 171 2006 Q1
## 4 224 2006 Q2
## 5 233 2006 Q3
## 6 192 2006 Q4
## 7 187 2007 Q1
## 8 234 2007 Q2
## 9 245 2007 Q3
## 10 205 2007 Q4
## 11 194 2008 Q1
## 12 529 2008 Q2
## 13 249 2008 Q3
## 14 203 2008 Q4
## 15 196 2009 Q1
## 16 238 2009 Q2
## 17 252 2009 Q3
## 18 210 2009 Q4
## 19 205 2010 Q1
## 20 236 2010 Q2
# I am now recomputing the seasoanlly adjusted data:
out2 <- out %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components %>%
select(season_adjust)
out2
## # A tsibble: 20 x 2 [1Q]
## season_adjust Quarter
## <dbl> <qtr>
## 1 209. 2005 Q3
## 2 210. 2005 Q4
## 3 208. 2006 Q1
## 4 177. 2006 Q2
## 5 221. 2006 Q3
## 6 223. 2006 Q4
## 7 228. 2007 Q1
## 8 185. 2007 Q2
## 9 232. 2007 Q3
## 10 239. 2007 Q4
## 11 236. 2008 Q1
## 12 418. 2008 Q2
## 13 236. 2008 Q3
## 14 236. 2008 Q4
## 15 239. 2009 Q1
## 16 188. 2009 Q2
## 17 239. 2009 Q3
## 18 244. 2009 Q4
## 19 250. 2010 Q1
## 20 187. 2010 Q2
View(out2)
View(gas3)
# By viewing the data side by side, I noticed that the outlier
# caused about every single row of seasonal_adj to increase by around
# ten units each except for the row that I manipulated (12). Some of the
# values happened to decrease. The 12th row went from 213 to 418, about
# a 200 unit difference.
autoplot(out2)
## Plot variable not specified, automatically selected `.vars = season_adjust`
# I feel like I could observe it best by plotting it.
# 6.
# We shall see if it does have any type of affect.
out3 <- gas
out3$Gas[1] <- 221 + 300
out3
## # A tsibble: 20 x 2 [1Q]
## Gas Quarter
## <dbl> <qtr>
## 1 521 2005 Q3
## 2 180 2005 Q4
## 3 171 2006 Q1
## 4 224 2006 Q2
## 5 233 2006 Q3
## 6 192 2006 Q4
## 7 187 2007 Q1
## 8 234 2007 Q2
## 9 245 2007 Q3
## 10 205 2007 Q4
## 11 194 2008 Q1
## 12 229 2008 Q2
## 13 249 2008 Q3
## 14 203 2008 Q4
## 15 196 2009 Q1
## 16 238 2009 Q2
## 17 252 2009 Q3
## 18 210 2009 Q4
## 19 205 2010 Q1
## 20 236 2010 Q2
out4 <- out3 %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
select(season_adjust)
out4
## # A tsibble: 20 x 2 [1Q]
## season_adjust Quarter
## <dbl> <qtr>
## 1 459. 2005 Q3
## 2 193. 2005 Q4
## 3 201. 2006 Q1
## 4 207. 2006 Q2
## 5 205. 2006 Q3
## 6 206. 2006 Q4
## 7 220. 2007 Q1
## 8 216. 2007 Q2
## 9 216. 2007 Q3
## 10 220. 2007 Q4
## 11 229. 2008 Q1
## 12 211. 2008 Q2
## 13 219. 2008 Q3
## 14 218. 2008 Q4
## 15 231. 2009 Q1
## 16 220. 2009 Q2
## 17 222. 2009 Q3
## 18 225. 2009 Q4
## 19 242. 2010 Q1
## 20 218. 2010 Q2
autoplot(out4)
## Plot variable not specified, automatically selected `.vars = season_adjust`
# Obviously, the observation that was manipulated is much
# higher than the others. The seasonality for the rest of
# the data seems to have declined for each following quarter.
out5 <- gas
out5$Gas[18] <- 210 + 300
out5
## # A tsibble: 20 x 2 [1Q]
## Gas Quarter
## <dbl> <qtr>
## 1 221 2005 Q3
## 2 180 2005 Q4
## 3 171 2006 Q1
## 4 224 2006 Q2
## 5 233 2006 Q3
## 6 192 2006 Q4
## 7 187 2007 Q1
## 8 234 2007 Q2
## 9 245 2007 Q3
## 10 205 2007 Q4
## 11 194 2008 Q1
## 12 229 2008 Q2
## 13 249 2008 Q3
## 14 203 2008 Q4
## 15 196 2009 Q1
## 16 238 2009 Q2
## 17 252 2009 Q3
## 18 510 2009 Q4
## 19 205 2010 Q1
## 20 236 2010 Q2
out6 <- out5 %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
select(season_adjust)
out6
## # A tsibble: 20 x 2 [1Q]
## season_adjust Quarter
## <dbl> <qtr>
## 1 214. 2005 Q3
## 2 165. 2005 Q4
## 3 199. 2006 Q1
## 4 221. 2006 Q2
## 5 225. 2006 Q3
## 6 176. 2006 Q4
## 7 218. 2007 Q1
## 8 231. 2007 Q2
## 9 237. 2007 Q3
## 10 187. 2007 Q4
## 11 226. 2008 Q1
## 12 226. 2008 Q2
## 13 241. 2008 Q3
## 14 186. 2008 Q4
## 15 229. 2009 Q1
## 16 235. 2009 Q2
## 17 244. 2009 Q3
## 18 466. 2009 Q4
## 19 239. 2010 Q1
## 20 233. 2010 Q2
autoplot(out6)
## Plot variable not specified, automatically selected `.vars = season_adjust`
# From the plots, we can observe that the outlier causes
# the seasonally adjusted component to highly increase around
# the outlier itself. The seasonality seems to be a little
# stronger than if the outlier were at the beginning of the data.
# The seasonality seems to be the strongest when the outlier is
# standing within the middle of the data. Does this have to do
# with how much it's weigted from quarter to quarter?
# I want to see what it would look like if the outlier was the last
# observation for the 20 rows in 'gas'.
out7 <- gas
out7$Gas[20] <- 236 + 300
out7
## # A tsibble: 20 x 2 [1Q]
## Gas Quarter
## <dbl> <qtr>
## 1 221 2005 Q3
## 2 180 2005 Q4
## 3 171 2006 Q1
## 4 224 2006 Q2
## 5 233 2006 Q3
## 6 192 2006 Q4
## 7 187 2007 Q1
## 8 234 2007 Q2
## 9 245 2007 Q3
## 10 205 2007 Q4
## 11 194 2008 Q1
## 12 229 2008 Q2
## 13 249 2008 Q3
## 14 203 2008 Q4
## 15 196 2009 Q1
## 16 238 2009 Q2
## 17 252 2009 Q3
## 18 210 2009 Q4
## 19 205 2010 Q1
## 20 536 2010 Q2
out8 <- out7 %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
select(season_adjust)
out8
## # A tsibble: 20 x 2 [1Q]
## season_adjust Quarter
## <dbl> <qtr>
## 1 195. 2005 Q3
## 2 200. 2005 Q4
## 3 194. 2006 Q1
## 4 207. 2006 Q2
## 5 205. 2006 Q3
## 6 213. 2006 Q4
## 7 212. 2007 Q1
## 8 216. 2007 Q2
## 9 216. 2007 Q3
## 10 228. 2007 Q4
## 11 220. 2008 Q1
## 12 211. 2008 Q2
## 13 219. 2008 Q3
## 14 226. 2008 Q4
## 15 222. 2009 Q1
## 16 220. 2009 Q2
## 17 222. 2009 Q3
## 18 233. 2009 Q4
## 19 232. 2010 Q1
## 20 495. 2010 Q2
autoplot(out8)
## Plot variable not specified, automatically selected `.vars = season_adjust`
# It seems as if when the outlier is either the very first or last
# observation in the data set, it has a much bigger impact
# on the seasonality through out it, either before or afterwards.
set.seed(8)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries
## # A tsibble: 441 x 5 [1M]
## # Key: State, Industry [1]
## State Industry `Series ID` Month Turnover
## <chr> <chr> <chr> <mth> <dbl>
## 1 New South Wales Household goods retailing A3349397X 1982 Apr 211.
## 2 New South Wales Household goods retailing A3349397X 1982 May 224.
## 3 New South Wales Household goods retailing A3349397X 1982 Jun 216.
## 4 New South Wales Household goods retailing A3349397X 1982 Jul 226.
## 5 New South Wales Household goods retailing A3349397X 1982 Aug 217.
## 6 New South Wales Household goods retailing A3349397X 1982 Sep 213.
## 7 New South Wales Household goods retailing A3349397X 1982 Oct 224.
## 8 New South Wales Household goods retailing A3349397X 1982 Nov 244.
## 9 New South Wales Household goods retailing A3349397X 1982 Dec 393.
## 10 New South Wales Household goods retailing A3349397X 1983 Jan 220.
## # … with 431 more rows
## # ℹ Use `print(n = ...)` to see more rows
View(myseries)
x11_dcmp <- myseries %>%
model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
components()
autoplot(x11_dcmp)
# 1.
# It is obvious that it contains an increasing trend that
# perfectly aligns with the values from the value plot.
# Looking at the seasonality, I notice that the interval is
# between -100 and 100. This makes sense since it is a
# addition decomposition. In other words, the value of the
# seasonality is added/subtracted to the value from the original
# plot, representing the fluctuation within the data. The
# remainder seems to have a seasonality, denoting that it isn't
# considered to be a white noise which isn't preferred. There
# seems to be an outlier within the remainder time series which
# is visible from the original plot of value as well. The averages
# of the seasonality values for each month seem to be pretty
# similar to each season within the decomposition (a repitition of an
# increase from jan-march, decrease from march-Jun, a sudden increase
# in Jul to then a sudden decrease in Aug, increase in Sep, dec from
# Oct-Nov, and finally an increase in Dec).
# 2.
# Yes, as you can see in the remainder component (which can be
# categorized as random) is an outlier of -400 compared to values
# of greater than -100 from the rest of the data. Meaning, that a
# substantial amount of people in Australia left the labor force
# (an indication of discouraged workers).
# 1.
autoplot(canadian_gas)
## Plot variable not specified, automatically selected `.vars = Volume`
gg_subseries(canadian_gas)
## Plot variable not specified, automatically selected `y = Volume`
gg_season(canadian_gas)
## Plot variable not specified, automatically selected `y = Volume`
# 2.
dcmp <- canadian_gas %>%
model(STL(Volume ~ trend(window = 21) + season(window = "periodic"), robust = TRUE)) %>%
components() %>%
autoplot()
dcmp
# 3.
dcmp1 <- canadian_gas %>%
model(STL(Volume ~ trend(window = 21) + season(window = 1), robust = TRUE)) %>%
components()
dcmp1
## # A dable: 542 x 7 [1M]
## # Key: .model [1]
## # : Volume = trend + season_year + remainder
## .model Month Volume trend seaso…¹ remain…² seaso…³
## <chr> <mth> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 STL(Volume ~ trend(window = 2… 1960 Jan 1.43 1.09 0.355 -1.46e-2 1.08
## 2 STL(Volume ~ trend(window = 2… 1960 Feb 1.31 1.12 0.191 -1.10e-3 1.12
## 3 STL(Volume ~ trend(window = 2… 1960 Mar 1.40 1.14 0.259 -7.03e-5 1.14
## 4 STL(Volume ~ trend(window = 2… 1960 Apr 1.17 1.17 0.0264 -2.59e-2 1.14
## 5 STL(Volume ~ trend(window = 2… 1960 May 1.12 1.20 -0.0775 -1.83e-3 1.19
## 6 STL(Volume ~ trend(window = 2… 1960 Jun 1.01 1.22 -0.233 2.33e-2 1.24
## 7 STL(Volume ~ trend(window = 2… 1960 Jul 0.966 1.25 -0.299 1.77e-2 1.26
## 8 STL(Volume ~ trend(window = 2… 1960 Aug 0.977 1.27 -0.299 3.34e-3 1.28
## 9 STL(Volume ~ trend(window = 2… 1960 Sep 1.03 1.30 -0.276 8.01e-3 1.31
## 10 STL(Volume ~ trend(window = 2… 1960 Oct 1.25 1.32 -0.0989 2.66e-2 1.35
## # … with 532 more rows, and abbreviated variable names ¹season_year,
## # ²remainder, ³season_adjust
## # ℹ Use `print(n = ...)` to see more rows
dcmp1 %>%
gg_season(season_year)
# Since the seasonality was from year to year, I made my season window equal to 1,
# and my trend was monthly, so I equaled the trend to equal 21. From the
# gg_series, I can see that the variance of the seasonality seems to increase
# overtime, with orange representing the year 1960 and having a flatter rate
# compared to the years that follow overtime.
# 4.
# I am using the time series from canadian_gas. I have already
# attained the seasonal_adj component so now I am going to
# graph it.
View(dcmp1)
dcmp1 %>%
select(season_adjust) %>%
autoplot()
## Plot variable not specified, automatically selected `.vars = season_adjust`
# 5.
# I am first doing an X-11 decomposition with the data
x11_seas <- canadian_gas %>%
model(x11 = X_13ARIMA_SEATS(Volume ~ x11())) %>%
components()
x11_seas
## # A dable: 542 x 7 [1M]
## # Key: .model [1]
## # : Volume = trend * seasonal * irregular
## .model Month Volume trend seasonal irregular season_adjust
## <chr> <mth> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 x11 1960 Jan 1.43 1.15 1.25 0.993 1.14
## 2 x11 1960 Feb 1.31 1.15 1.13 1.01 1.16
## 3 x11 1960 Mar 1.40 1.16 1.18 1.02 1.18
## 4 x11 1960 Apr 1.17 1.17 1.02 0.983 1.15
## 5 x11 1960 May 1.12 1.18 0.951 0.996 1.17
## 6 x11 1960 Jun 1.01 1.20 0.844 1.00 1.20
## 7 x11 1960 Jul 0.966 1.22 0.812 0.974 1.19
## 8 x11 1960 Aug 0.977 1.25 0.841 0.929 1.16
## 9 x11 1960 Sep 1.03 1.28 0.857 0.938 1.20
## 10 x11 1960 Oct 1.25 1.31 0.946 1.01 1.32
## # … with 532 more rows
## # ℹ Use `print(n = ...)` to see more rows
autoplot(x11_seas) +
labs(title = "X-11 Decomposition")
# Now I am going to do the SEATS method
seats_dcmp <- canadian_gas %>%
model(seats = X_13ARIMA_SEATS(Volume ~ seats())) %>%
components()
seats_dcmp
## # A dable: 542 x 7 [1M]
## # Key: .model [1]
## # : Volume = f(trend, seasonal, irregular)
## .model Month Volume trend seasonal irregular season_adjust
## <chr> <mth> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 seats 1960 Jan 1.43 1.13 1.27 1.00 1.13
## 2 seats 1960 Feb 1.31 1.15 1.14 0.997 1.15
## 3 seats 1960 Mar 1.40 1.17 1.18 1.02 1.19
## 4 seats 1960 Apr 1.17 1.17 1.01 0.988 1.15
## 5 seats 1960 May 1.12 1.18 0.949 0.999 1.18
## 6 seats 1960 Jun 1.01 1.19 0.844 1.01 1.20
## 7 seats 1960 Jul 0.966 1.19 0.813 0.997 1.19
## 8 seats 1960 Aug 0.977 1.20 0.823 0.989 1.19
## 9 seats 1960 Sep 1.03 1.24 0.844 0.988 1.22
## 10 seats 1960 Oct 1.25 1.30 0.958 1.01 1.31
## # … with 532 more rows
## # ℹ Use `print(n = ...)` to see more rows
autoplot(seats_dcmp) +
labs(title = "Seats Decomposition")
# The X-11 decomp has a smoother trend than the seats decomp.
# There is slightly more variance in the seasonality for
# the X-11 decomp, and the irregular component seems to have less
# variance within the data for X-11. However, these two
# decompositions seem to be very similar nonetheless.
# 1.
# Liquor retailing since 2000:
liquor<-aus_retail%>%
filter(Industry == "Liquor retailing" & year(Month)>= 2000)%>%
summarise(Turnover = sum(Turnover))
# Now plotting the data
ggplot(data= liquor)+
geom_line(aes(x=Month, y=Turnover))
# 2.
# The seasonality occurs every year, so the best approach
# to do a 2x12-MA that can estimate the
# trend-cycle of monthly data with annual seasonality.
# 3. Now finding the moving average and plotting it
liquor <-liquor%>%
mutate(`12-MA` = slider::slide_dbl(Turnover, mean,
.before = 5, .after = 6, .complete = TRUE))%>%
mutate(`2x12-MA` = slider::slide_dbl(`12-MA`, mean,
.before = 1, .after = 0, .complete = TRUE))
liquor%>%
autoplot(Turnover) +
geom_line(aes(y =`2x12-MA`), colour = "#0072B2") +
labs(y = "Liquor retailing Turnover",
title = "Total AUS Liquor retailing Turnover")+
guides(colour = guide_legend(title = "series"))
## Warning: Removed 12 row(s) containing missing values (geom_path).
# 1.
# Additive:
# We first denote our value of m according to the seasonality.
# We then compute the trend-cycle component using m - MA.
# Next, in oder to estimate the seasonal component, you calculate
# the detrended series, which is your data minus the trend-cycle
# component, to then average the values for each season. You then must
# normalize the seasonal effects by taking the average of the seasoanl
# components which is equal to your weight (w). Adjust the seasonal
# component by subtracting the weight. Finally, you calculate the
# remainder. These are the components that are the attributions of your
# chosen list of ordered values from your time series. This is a way to
# split up the trends, seasonality, and remainder for your values across
# a time series. It's very useful for observing in particular patterns.
# Multiplicative
# For the multiplicative classical decomposition, you basiaclly follow the
# same steps except instead of subtracting, you divide, and instead of adding,
# you multiple. The function should look like y = T x S x R. Thus, each of these
# components, if you multiple them all within the same chosen time, you calculate
# the values from your time series. Another useful deploy to inspect differnt aspects
# of each pattern.