library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.2
## Warning: package 'readr' was built under R version 4.3.2
## Warning: package 'purrr' was built under R version 4.3.2
## Warning: package 'dplyr' was built under R version 4.3.2
## Warning: package 'stringr' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.3
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.0 ──
## ✔ tsibble 1.1.5 ✔ fable 0.3.4
## ✔ tsibbledata 0.4.1 ✔ fabletools 0.4.2
## ✔ feasts 0.3.2
## Warning: package 'tsibble' was built under R version 4.3.3
## Warning: package 'tsibbledata' was built under R version 4.3.3
## Warning: package 'feasts' was built under R version 4.3.3
## Warning: package 'fabletools' was built under R version 4.3.3
## Warning: package 'fable' was built under R version 4.3.3
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(ggplot2)
library(seasonal)
## Warning: package 'seasonal' was built under R version 4.3.3
##
## Attaching package: 'seasonal'
##
## The following object is masked from 'package:tibble':
##
## view
head(global_economy)
#3.1:
new_ge <- global_economy %>%
group_by(Country, GDP, Population) %>%
summarise(GDPPC = GDP/Population) %>%
arrange(desc(GDPPC))
head(new_ge)
ggplot(new_ge, aes(x=Year, y=GDPPC, colour = Country)) + geom_line(stat = "identity", show.legend = F)
## Warning: Removed 3242 rows containing missing values or values outside the scale range
## (`geom_line()`).
global_economy %>%
mutate(GDP_per_capita = GDP / Population) %>%
filter(GDP_per_capita == max(GDP_per_capita, na.rm = TRUE)) %>%
select(Country, GDP_per_capita)
global_economy %>%
filter(Country == "Monaco") %>%
autoplot(GDP/Population) +
labs(title= "GDP per capita for Monaco", y = "$US")
## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_line()`).
##The country with the highest GPD per Capita is Monaco during the year
2014. We can also observe in the plot above, that the GDP per Capita for
the majority of the countries in the data has increased over the
years.
#3.2:
#GDPPC in USA:
global_economy %>%
filter(Country == "United States") %>%
autoplot(GDP/Population) +
labs(title= "GDP per capita", y = "$US")
#There was no transformation, the increasing population did not seem to
have an effect on the GDP.
# Slaughter of Victorian
head(aus_livestock)
aus_livestock %>%
filter(State == "Victoria", Animal == "Bulls, bullocks and steers") %>%
autoplot(Count)
aus_livestock %>%
filter(State == "Victoria", Animal == "Bulls, bullocks and steers") %>%
mutate(Quarter = yearquarter(Month)) %>%
index_by(Quarter) %>%
summarise(Count = sum(Count)) %>%
autoplot(Count)
# There was no transformation but an overall decreasing trend can be
observed.
#c. Victorian Electricity Demand
head(vic_elec)
vic_elec %>% autoplot(Demand)
vic_elec %>%
mutate(Week = yearweek(Time)) %>%
index_by(Week) %>%
summarise(Demand = sum(Demand)) %>%
autoplot(Demand)
# The data was transformed from half-hourly data into weeks in order to
simplify the time series and make it more interpretable.
# D.Gas:
aus_production %>% autoplot(Gas)
lambda <- aus_production %>%
features(Gas, features = guerrero) %>%
pull(lambda_guerrero)
aus_production %>%
autoplot(box_cox(Gas, lambda)) +
labs(y = "",
title = paste("Transformed gas production with lambda = ", round(lambda,2)))
#3.3:
#Why is a Box-Cox transformation unhelpful for the canadian_gas ?
head(canadian_gas)
canadian_gas %>% autoplot (Volume)
lambda <- canadian_gas %>%
features(Volume, features = guerrero) %>%
pull(lambda_guerrero)
canadian_gas %>%
autoplot(box_cox(Volume, lambda)) +
labs(y = "",
title = paste("Transformed Canadian Gas volume with lambda = ", round(lambda,2)))
# After applying a Box-Cox transformation with lambda 0.58, we can
observe that the data’s pattern did not change. The scales might have
changed a bit, but we did not simplify the variation in these data.
#3.4:
set.seed(123)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
autoplot(myseries, Turnover)
lambda <- myseries %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
myseries %>%
autoplot(box_cox(Turnover, lambda)) +
labs(y = "",
title = paste("Transformation with lambda = ", round(lambda,2)))
# We would use a Box-Cox transformation with lambda 0.22. This is the
optimal lambda in order to reduce variation in the data.
#3.5:
aus_production %>% autoplot(Tobacco)
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_line()`).
lambda <- aus_production %>%
features(Tobacco, features = guerrero) %>%
pull(lambda_guerrero)
aus_production %>%
autoplot(box_cox(Tobacco, lambda)) +
labs(y = "",
title = paste("Box-Cox Transformation with lambda = ", round(lambda,2)))
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_line()`).
# The Box-Cox transformation is not effective on the tobacco production
data.
# ansett
eco_mel_syd <- ansett %>%
filter(Class == "Economy", Airports == "MEL-SYD")
eco_mel_syd %>% autoplot(Passengers)
#The Box-Cox transformation is not effective on ansett too.
lambda <- eco_mel_syd %>%
features(Passengers, features = guerrero) %>%
pull(lambda_guerrero)
eco_mel_syd %>%
autoplot(box_cox(Passengers, lambda)) +
labs(y = "",
title = paste("Box-Cox Transformation with lambda = ", round(lambda,2)))
#pedestrian:
sct_count <- pedestrian %>%
filter(Sensor == "Southern Cross Station") %>%
group_by(Sensor) %>%
index_by(Week = yearweek(Date_Time)) %>%
summarise(Count = sum(Count))
sct_count %>% autoplot(Count)
lambda <- sct_count %>%
features(Count, features = guerrero) %>%
pull(lambda_guerrero)
sct_count %>%
autoplot(box_cox(Count, lambda)) +
labs(y = "",
title = paste("Box-Cox Transformation with lambda = ", round(lambda,2)))
# The weekly pedestrian counts show the variations.
#3.7:
#a.
gas <- tail(aus_production, 5*4) %>% select(Gas)
gas %>% autoplot(Gas)
# in a cycle of 1 year. There is an increase after the first quarter,
then peaks in the third quarter and then decreases again.
#b.
class_decomp <- gas %>%
model(
classical_decomposition(Gas, type = "multiplicative")
) %>%
components()
class_decomp %>% autoplot() +
labs(title = "Classical multiplicative decomposition of Australia
Gas Production")
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).
#c. Yes, the results support the graphical interpretation from part a, as there is an increasing trend & seasonality that increases every first quarter and decreases after the third quarter.
#d.
as_tsibble(class_decomp) %>%
autoplot(season_adjust) +
labs(title = "Seasonally Adjusted Data")
#there is an increasing trend in gas production.
#e.
gas %>%
mutate(Gas = if_else(Quarter==yearquarter("2007Q2"), Gas + 300, Gas)) %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
as_tsibble() %>%
autoplot(season_adjust) +
labs(title = 'Seasonally Adjusted Data with 300 added to "2007 Q2"')
# The effect of adding 300 to an observation and making it an outlier,
changes the seasonally adjusted data in a way that brings back
seasonality in the opposite direction of the outlier.
#f.
gas %>%
mutate(Gas = if_else(Quarter==yearquarter("2010Q1"), Gas + 300, Gas)) %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
as_tsibble() %>%
autoplot(season_adjust) +
labs(title = 'Seasonally Adjusted Data with 300 added to "2010 Q1"')
#Adding the outlier near the end of the data causes the seasonally
adjusted data to display no seasonality as opposed to in the previous
plot.
#3.8:
x11_dcmp <- myseries %>%
model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
components()
autoplot(x11_dcmp) +
labs(title =
"Decomposition of Australian retail data using X-11.")
#Can observe that seasonality was much stronger during the years 1982 to ~ 1990, which is quite the opposite of what we previously observed. I can also observe there was a big jump around the middle of the year 2000.
##3.9:
There is an increasing trend in the number of persons in the civilian labor force in Australia. There is also a seasonality. There is also a decrease in the early 1990s which was due to a recession which can be seen in the remainder.