## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.1.0
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
##
## Attaching package: 'tsibble'
##
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
##
##
##
## Attaching package: 'lubridate'
##
##
## The following object is masked from 'package:tsibble':
##
## interval
##
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
##
##
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
##
## ✔ feasts 0.3.0 ✔ fabletools 0.3.2
## ✔ fable 0.3.2
##
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ lubridate::interval() masks tsibble::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
3.1 a
# Let's take a look at the data
global_economy
## # A tsibble: 15,150 x 9 [1Y]
## # Key: Country [263]
## Country Code Year GDP Growth CPI Imports Exports Population
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan AFG 1960 537777811. NA NA 7.02 4.13 8996351
## 2 Afghanistan AFG 1961 548888896. NA NA 8.10 4.45 9166764
## 3 Afghanistan AFG 1962 546666678. NA NA 9.35 4.88 9345868
## 4 Afghanistan AFG 1963 751111191. NA NA 16.9 9.17 9533954
## 5 Afghanistan AFG 1964 800000044. NA NA 18.1 8.89 9731361
## 6 Afghanistan AFG 1965 1006666638. NA NA 21.4 11.3 9938414
## 7 Afghanistan AFG 1966 1399999967. NA NA 18.6 8.57 10152331
## 8 Afghanistan AFG 1967 1673333418. NA NA 14.2 6.77 10372630
## 9 Afghanistan AFG 1968 1373333367. NA NA 15.2 8.90 10604346
## 10 Afghanistan AFG 1969 1408888922. NA NA 15.0 10.1 10854428
## # … with 15,140 more rows
# now let's make sure it is a tibble
index_var(global_economy)
## [1] "Year"
#Then create the GDP Per Capita column
global_economy$gdppc = global_economy$GDP / global_economy$Population
#and plot
global_economy %>%
autoplot(gdppc, show.legend = FALSE) +
labs(title= "GDP per capita",
y = "Amount in Dollars")
## Warning: Removed 3242 rows containing missing values (`geom_line()`).
3.1b-c
# Now let's see the country with the highest GDP PC in the most recent year
global_economy %>%
arrange(desc(Year),desc(gdppc))
## Warning: Current temporal ordering may yield unexpected results.
## ℹ Suggest to sort by `Country`, `Year` first.
## # A tsibble: 15,150 x 10 [1Y]
## # Key: Country [263]
## Country Code Year GDP Growth CPI Imports Exports Popul…¹ gdppc
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Luxembourg LUX 2017 6.24e10 2.30 111. 194. 230. 5.99e5 1.04e5
## 2 Macao SAR, C… MAC 2017 5.04e10 9.10 136. 32.0 79.4 6.23e5 8.09e4
## 3 Switzerland CHE 2017 6.79e11 1.09 98.3 53.9 65.0 8.47e6 8.02e4
## 4 Norway NOR 2017 3.99e11 1.92 115. 33.1 35.5 5.28e6 7.55e4
## 5 Iceland ISL 2017 2.39e10 3.64 122. 42.8 47.0 3.41e5 7.01e4
## 6 Ireland IRL 2017 3.34e11 7.80 105. 87.9 120. 4.81e6 6.93e4
## 7 Qatar QAT 2017 1.67e11 1.58 116. 37.3 51.0 2.64e6 6.32e4
## 8 United States USA 2017 1.94e13 2.27 112. NA NA 3.26e8 5.95e4
## 9 North America NAC 2017 2.10e13 2.35 NA NA NA 3.62e8 5.81e4
## 10 Singapore SGP 2017 3.24e11 3.62 113. 149. 173. 5.61e6 5.77e4
## # … with 15,140 more rows, and abbreviated variable name ¹​Population
#It's Luxembourg in 2017, let's see the growth of GDP Per Capita
global_economy %>%
filter(Country=="Luxembourg") %>%
autoplot(gdppc)
# For analysis, let's take a look at population and GDP Growth
global_economy %>%
filter(Country=="Luxembourg") %>%
autoplot(Population)
global_economy %>%
filter(Country=="Luxembourg") %>%
autoplot(GDP)
We can see that GDP per Capita has increased overtime because the GDP
Growth rate has increased at a much more rapid pace than population, and
with an already low population, it allows for the numbers to be much
higher than places which might have higher GDP but more far more
people.
3.2a
global_economy %>%
filter(Code=="USA") %>%
autoplot(GDP)
#This one looks fine as is since we are showing the growth of GDP, which is usually measured each year. We could look at GDP Per Capita above if we want to show us if the trend macthes with population.
global_economy |>
filter(Code == "USA") |>
autoplot(GDP/Population) +
labs(title= "GDP per capita", y = "USD")
3.2.b
aus_livestock %>%
filter(Animal == "Bulls, bullocks and steers",
State == "Victoria")%>%
autoplot(Count)
# With monthly counts, this doesn't show the seasonality, instead we should show it at each quarter
yearquarter(aus_livestock$Month) -> aus_livestock$Quarter
aus_livestock %>%
group_by(Quarter, Animal, State) %>%
mutate(Count = sum(Count)) %>%
filter(Animal == "Bulls, bullocks and steers",
State == "Victoria")%>%
autoplot(Count) +
labs(x = "Quarter")
3.2c
vic_elec %>%
autoplot(Demand) +
labs(title = "Victorian Electricity Demand",
y = "Demand")
# This one would much improved by looking at the week/month level, let's try out both
vic_elec %>%
mutate(Week = yearweek(Time)) %>%
index_by(Week) %>%
summarise(Demand = sum(Demand)) %>%
autoplot(Demand) +
labs(title = "Victorian Electricity Demand",
y = "Demand")
vic_elec %>%
mutate(Week = yearmonth(Time)) %>%
index_by(Week) %>%
summarise(Demand = sum(Demand)) %>%
autoplot(Demand) +
labs(title = "Victorian Electricity Demand",
y = "Demand")
# With week, we can see a more granular seasonality, while with month, much more macro (Apr is the Autumn and Sept is Spring we see much less demand both months, while June, peak winter, sees the highest demand)
3.2d
# This one is directly from the reading, we want to use Lambda to make the cycles more evident. Guerrero applies Guerrero's (1993) method to select the lambda which minimises the coefficient of variation for subseries of x.
lambda <- aus_production |>
features(Gas, features = guerrero) |>
pull(lambda_guerrero)
aus_production |>
autoplot(box_cox(Gas, lambda))
3.3
canadian_gas %>%
autoplot(Volume)
lambda <- canadian_gas %>%
features(Volume, features = guerrero) %>%
pull(lambda_guerrero)
canadian_gas %>%
autoplot(box_cox(Volume, lambda))
# when we plot the before and after, we can see that the pattern remains the same. Using Lambda we strive to make the seasonal variation more obvious.
3.4
When looking for the box cox transformation, like with the chunks above, we can use the Guerrero method to find the best lambda. We can see that compared with the original data, that seasonality is more poignant with the Box-Cox Transformation.
set.seed(43)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
lambda <- myseries%>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
paste0("The best Lambda is: ",lambda)
## [1] "The best Lambda is: 0.464098968231467"
myseries %>%
autoplot(Turnover)
myseries %>%
autoplot(box_cox(Turnover, lambda))
3.5
# Like above, we can just continue to use the Guerrero method to get the lambda
lambdat <- aus_production %>%
features(Tobacco, features = guerrero) %>%
pull(lambda_guerrero)
aus_production %>%
autoplot(box_cox(Tobacco, lambdat))
## Warning: Removed 24 rows containing missing values (`geom_line()`).
lambdap <- ansett %>%
filter(Class == "Economy", Airports == "MEL-SYD") %>%
features(Passengers, features = guerrero) %>%
pull(lambda_guerrero)
ansett %>%
filter(Class == "Economy", Airports == "MEL-SYD") %>%
autoplot(box_cox(Passengers, lambdap))
lambdaped <- pedestrian %>%
filter(Sensor == "Southern Cross Station") %>%
features(Count, features = guerrero) %>%
pull(lambda_guerrero)
pedestrian %>%
filter(Sensor == "Southern Cross Station") %>%
autoplot(box_cox(Count, lambdaped))
3.7
# A: get the last 5 years (In quarters, so 5*4)
gas <- aus_production %>%
filter(Quarter >= max(aus_production$Quarter) - (5*4))
gas %>%
autoplot(Gas)+
labs(title = "Five Years of Gas Data")
# B:
gas %>%
model(classical_decomposition(Gas,type = "multiplicative")) %>%
components() %>%
autoplot()
## Warning: Removed 2 rows containing missing values (`geom_line()`).
# C: Yes, it shows the seasonality of Gas as well as the upward trend for purchasing
# D:
gas %>%
model(classical_decomposition(Gas,type = "multiplicative")) %>%
components() -> gas_CD
gas_CD %>%
autoplot
## Warning: Removed 2 rows containing missing values (`geom_line()`).
gas_CD %>%
ggplot(aes(x = Quarter)) +
geom_line(aes(y = Gas, colour = "Data")) +
geom_line(aes(y = season_adjust, colour = "Seasonally Adjusted")) +
geom_line(aes(y = trend, colour = "Trend")) +
labs(y = "",
title = "Five Years of Gas Data") +
scale_colour_manual(
values = c("gray", "blue", "red"),
breaks = c("Data", "Seasonally Adjusted", "Trend")
)
## Warning: Removed 4 rows containing missing values (`geom_line()`).
# E:
gas2 <- gas
gas2$Gas[13] <- gas2$Gas[13] + 300
gas2 %>%
model(classical_decomposition(Gas,type = "multiplicative")) %>%
components() -> gas_CD2
gas_CD2 %>%
ggplot(aes(x = Quarter)) +
geom_line(aes(y = Gas, colour = "Data")) +
geom_line(aes(y = season_adjust, colour = "Seasonally Adjusted")) +
geom_line(aes(y = trend, colour = "Trend")) +
labs(y = "",
title = "Five Years of Gas Data") +
scale_colour_manual(
values = c("gray", "blue", "red"),
breaks = c("Data", "Seasonally Adjusted", "Trend")
)
## Warning: Removed 4 rows containing missing values (`geom_line()`).
# The additional 300 adds a large spike in the data as well as seasonally adjusted data. The trend decreases after the spike, showing a mixed trend instead of uniform trend.
# F
gas3 <- gas
gas3$Gas[length(gas3$Gas)] <- gas3$Gas[length(gas3$Gas)] + 300
gas3 %>%
model(classical_decomposition(Gas,type = "multiplicative")) %>%
components() -> gas_CD3
gas_CD3 %>%
ggplot(aes(x = Quarter)) +
geom_line(aes(y = Gas, colour = "Data")) +
geom_line(aes(y = season_adjust, colour = "Seasonally Adjusted")) +
geom_line(aes(y = trend, colour = "Trend")) +
labs(y = "",
title = "Five Years of Gas Data") +
scale_colour_manual(
values = c("gray", "blue", "red"),
breaks = c("Data", "Seasonally Adjusted", "Trend")
)
## Warning: Removed 4 rows containing missing values (`geom_line()`).
# It still adds a dramatic spike to the data'
3.8
set.seed(4313)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries %>%
model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
components() %>%
autoplot()
## With the new plot we can better see the Trend as well as the seasonal component of the data. For instance,it looks like the trend is increasing through the whole year, which is why the season spikes are less than before.
3.9
Isolating the trend from the seasonal shows the trend increasing while the seasonal changes have remained stead. Trend shows a flat period in the early 90s.
We can see a dip in performance in the value chart and it is confirmed by the flatness of the Trend chart