library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.2
## Warning: package 'readr' was built under R version 4.3.2
## Warning: package 'purrr' was built under R version 4.3.2
## Warning: package 'dplyr' was built under R version 4.3.2
## Warning: package 'stringr' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.3
## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.0 ──
## ✔ tsibble     1.1.5     ✔ fable       0.3.4
## ✔ tsibbledata 0.4.1     ✔ fabletools  0.4.2
## ✔ feasts      0.3.2
## Warning: package 'tsibble' was built under R version 4.3.3
## Warning: package 'tsibbledata' was built under R version 4.3.3
## Warning: package 'feasts' was built under R version 4.3.3
## Warning: package 'fabletools' was built under R version 4.3.3
## Warning: package 'fable' was built under R version 4.3.3
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(ggplot2)
library(seasonal)
## Warning: package 'seasonal' was built under R version 4.3.3
## 
## Attaching package: 'seasonal'
## 
## The following object is masked from 'package:tibble':
## 
##     view
head(global_economy)
#3.1:

new_ge <- global_economy %>%
  group_by(Country, GDP, Population) %>%
  summarise(GDPPC = GDP/Population) %>% 
  arrange(desc(GDPPC))
head(new_ge)
ggplot(new_ge, aes(x=Year, y=GDPPC, colour = Country)) + geom_line(stat = "identity", show.legend = F)
## Warning: Removed 3242 rows containing missing values or values outside the scale range
## (`geom_line()`).

global_economy %>%
  mutate(GDP_per_capita = GDP / Population) %>%
  filter(GDP_per_capita == max(GDP_per_capita, na.rm = TRUE)) %>%
  select(Country, GDP_per_capita)
global_economy %>%
  filter(Country == "Monaco") %>%
  autoplot(GDP/Population) +
  labs(title= "GDP per capita for Monaco", y = "$US")
## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_line()`).

##The country with the highest GPD per Capita is Monaco during the year 2014. We can also observe in the plot above, that the GDP per Capita for the majority of the countries in the data has increased over the years.

#3.2:

#GDPPC in USA:

global_economy %>%
  filter(Country == "United States") %>%
  autoplot(GDP/Population) +
  labs(title= "GDP per capita", y = "$US")

#There was no transformation, the increasing population did not seem to have an effect on the GDP.

# Slaughter of Victorian

head(aus_livestock)
aus_livestock %>% 
  filter(State == "Victoria", Animal == "Bulls, bullocks and steers") %>% 
  autoplot(Count)

aus_livestock %>% 
  filter(State == "Victoria", Animal == "Bulls, bullocks and steers") %>%
  mutate(Quarter = yearquarter(Month)) %>%
  index_by(Quarter) %>%
  summarise(Count = sum(Count)) %>%
  autoplot(Count)

# There was no transformation but an overall decreasing trend can be observed.

#c. Victorian Electricity Demand

head(vic_elec)
vic_elec %>% autoplot(Demand)

vic_elec %>%
  mutate(Week = yearweek(Time)) %>%
  index_by(Week) %>%
  summarise(Demand = sum(Demand)) %>%
  autoplot(Demand)

# The data was transformed from half-hourly data into weeks in order to simplify the time series and make it more interpretable.

# D.Gas:

aus_production %>% autoplot(Gas)

lambda <- aus_production %>%
  features(Gas, features = guerrero) %>%
  pull(lambda_guerrero)
aus_production %>%
  autoplot(box_cox(Gas, lambda)) +
  labs(y = "",
       title = paste("Transformed gas production with lambda = ", round(lambda,2)))

#3.3:

#Why is a Box-Cox transformation unhelpful for the canadian_gas ?

head(canadian_gas)
canadian_gas %>% autoplot (Volume)

lambda <- canadian_gas %>%
  features(Volume, features = guerrero) %>%
  pull(lambda_guerrero)
canadian_gas %>%
  autoplot(box_cox(Volume, lambda)) +
  labs(y = "",
       title = paste("Transformed Canadian Gas volume with lambda = ", round(lambda,2)))

# After applying a Box-Cox transformation with lambda 0.58, we can observe that the data’s pattern did not change. The scales might have changed a bit, but we did not simplify the variation in these data.

#3.4:

set.seed(123)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
autoplot(myseries, Turnover)

lambda <- myseries %>%
  features(Turnover, features = guerrero) %>%
  pull(lambda_guerrero)
myseries %>%
  autoplot(box_cox(Turnover, lambda)) +
  labs(y = "",
       title = paste("Transformation with lambda = ", round(lambda,2)))

# We would use a Box-Cox transformation with lambda 0.22. This is the optimal lambda in order to reduce variation in the data.

#3.5:

aus_production %>% autoplot(Tobacco)
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_line()`).

lambda <- aus_production %>%
  features(Tobacco, features = guerrero) %>%
  pull(lambda_guerrero)
aus_production %>%
  autoplot(box_cox(Tobacco, lambda)) +
  labs(y = "",
       title = paste("Box-Cox Transformation with lambda = ", round(lambda,2)))
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_line()`).

# The Box-Cox transformation is not effective on the tobacco production data.

# ansett

eco_mel_syd <- ansett %>% 
  filter(Class == "Economy", Airports == "MEL-SYD")
eco_mel_syd %>% autoplot(Passengers)

#The Box-Cox transformation is not effective on ansett too.

lambda <- eco_mel_syd %>%
  features(Passengers, features = guerrero) %>%
  pull(lambda_guerrero)
eco_mel_syd %>%
  autoplot(box_cox(Passengers, lambda)) +
  labs(y = "",
       title = paste("Box-Cox Transformation with lambda = ", round(lambda,2)))

#pedestrian:

sct_count <- pedestrian %>%
  filter(Sensor == "Southern Cross Station") %>%
  group_by(Sensor) %>%
  index_by(Week = yearweek(Date_Time)) %>%
  summarise(Count = sum(Count))
sct_count %>% autoplot(Count)

lambda <- sct_count %>%
  features(Count, features = guerrero) %>%
  pull(lambda_guerrero)
sct_count %>%
  autoplot(box_cox(Count, lambda)) +
  labs(y = "",
       title = paste("Box-Cox Transformation with lambda = ", round(lambda,2)))

# The weekly pedestrian counts show the variations.

#3.7:

#a.

gas <- tail(aus_production, 5*4) %>% select(Gas)

gas %>% autoplot(Gas)

# in a cycle of 1 year. There is an increase after the first quarter, then peaks in the third quarter and then decreases again.

#b.

class_decomp <- gas %>%
  model(
    classical_decomposition(Gas, type = "multiplicative")
  ) %>%
  components()
class_decomp %>% autoplot() +
  labs(title = "Classical multiplicative decomposition of Australia
                  Gas Production")
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).

#c. Yes, the results support the graphical interpretation from part a, as there is an increasing trend & seasonality that increases every first quarter and decreases after the third quarter.

#d.

as_tsibble(class_decomp) %>%
  autoplot(season_adjust) +
  labs(title = "Seasonally Adjusted Data")

#there is an increasing trend in gas production.

#e.

gas %>%
  mutate(Gas = if_else(Quarter==yearquarter("2007Q2"), Gas + 300, Gas)) %>%
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components() %>%
  as_tsibble() %>%
  autoplot(season_adjust) +
  labs(title = 'Seasonally Adjusted Data with 300 added to "2007 Q2"')

# The effect of adding 300 to an observation and making it an outlier, changes the seasonally adjusted data in a way that brings back seasonality in the opposite direction of the outlier.

#f.

gas %>%
  mutate(Gas = if_else(Quarter==yearquarter("2010Q1"), Gas + 300, Gas)) %>%
  model(classical_decomposition(Gas, type = "multiplicative")) %>%
  components() %>%
  as_tsibble() %>%
  autoplot(season_adjust) +
  labs(title = 'Seasonally Adjusted Data with 300 added to "2010 Q1"')

#Adding the outlier near the end of the data causes the seasonally adjusted data to display no seasonality as opposed to in the previous plot.

#3.8:

x11_dcmp <- myseries %>%
  model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
  components()
autoplot(x11_dcmp) +
  labs(title =
    "Decomposition of Australian retail data using X-11.")

#Can observe that seasonality was much stronger during the years 1982 to ~ 1990, which is quite the opposite of what we previously observed. I can also observe there was a big jump around the middle of the year 2000.

##3.9:

There is an increasing trend in the number of persons in the civilian labor force in Australia. There is also a seasonality. There is also a decrease in the early 1990s which was due to a recession which can be seen in the remainder.

  1. The recession of 1991/1992 is very visible in the estimated components as there is a sharp decrease in the remainder component