Hummell_Josh_Week_Two

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0     ✔ purrr   1.0.1
## ✔ tibble  3.1.8     ✔ dplyr   1.1.0
## ✔ tidyr   1.3.0     ✔ stringr 1.5.0
## ✔ readr   2.1.3     ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## 
## Attaching package: 'tsibble'
## 
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union
## 
## 
## 
## Attaching package: 'lubridate'
## 
## 
## The following object is masked from 'package:tsibble':
## 
##     interval
## 
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
## 
## 
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## 
## ✔ feasts     0.3.0     ✔ fabletools 0.3.2
## ✔ fable      0.3.2     
## 
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()     masks base::date()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ tsibble::intersect()  masks base::intersect()
## ✖ lubridate::interval() masks tsibble::interval()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ tsibble::setdiff()    masks base::setdiff()
## ✖ tsibble::union()      masks base::union()

3.1 a

# Let's take a look at the data
global_economy

## # A tsibble: 15,150 x 9 [1Y]
## # Key:       Country [263]
##    Country     Code   Year         GDP Growth   CPI Imports Exports Population
##    <fct>       <fct> <dbl>       <dbl>  <dbl> <dbl>   <dbl>   <dbl>      <dbl>
##  1 Afghanistan AFG    1960  537777811.     NA    NA    7.02    4.13    8996351
##  2 Afghanistan AFG    1961  548888896.     NA    NA    8.10    4.45    9166764
##  3 Afghanistan AFG    1962  546666678.     NA    NA    9.35    4.88    9345868
##  4 Afghanistan AFG    1963  751111191.     NA    NA   16.9     9.17    9533954
##  5 Afghanistan AFG    1964  800000044.     NA    NA   18.1     8.89    9731361
##  6 Afghanistan AFG    1965 1006666638.     NA    NA   21.4    11.3     9938414
##  7 Afghanistan AFG    1966 1399999967.     NA    NA   18.6     8.57   10152331
##  8 Afghanistan AFG    1967 1673333418.     NA    NA   14.2     6.77   10372630
##  9 Afghanistan AFG    1968 1373333367.     NA    NA   15.2     8.90   10604346
## 10 Afghanistan AFG    1969 1408888922.     NA    NA   15.0    10.1    10854428
## # … with 15,140 more rows

# now let's make sure it is a tibble
index_var(global_economy)

## [1] "Year"

#Then create the GDP Per Capita column
global_economy$gdppc = global_economy$GDP / global_economy$Population


#and plot
global_economy %>%
  autoplot(gdppc, show.legend =  FALSE) +
  labs(title= "GDP per capita",
       y = "Amount in Dollars")

## Warning: Removed 3242 rows containing missing values (`geom_line()`).

3.1b-c

# Now let's see the country with the highest GDP PC in the most recent year
global_economy %>%
   arrange(desc(Year),desc(gdppc))

## Warning: Current temporal ordering may yield unexpected results.
## ℹ Suggest to sort by `Country`, `Year` first.

## # A tsibble: 15,150 x 10 [1Y]
## # Key:       Country [263]
##    Country       Code   Year     GDP Growth   CPI Imports Exports Popul…¹  gdppc
##    <fct>         <fct> <dbl>   <dbl>  <dbl> <dbl>   <dbl>   <dbl>   <dbl>  <dbl>
##  1 Luxembourg    LUX    2017 6.24e10   2.30 111.    194.    230.   5.99e5 1.04e5
##  2 Macao SAR, C… MAC    2017 5.04e10   9.10 136.     32.0    79.4  6.23e5 8.09e4
##  3 Switzerland   CHE    2017 6.79e11   1.09  98.3    53.9    65.0  8.47e6 8.02e4
##  4 Norway        NOR    2017 3.99e11   1.92 115.     33.1    35.5  5.28e6 7.55e4
##  5 Iceland       ISL    2017 2.39e10   3.64 122.     42.8    47.0  3.41e5 7.01e4
##  6 Ireland       IRL    2017 3.34e11   7.80 105.     87.9   120.   4.81e6 6.93e4
##  7 Qatar         QAT    2017 1.67e11   1.58 116.     37.3    51.0  2.64e6 6.32e4
##  8 United States USA    2017 1.94e13   2.27 112.     NA      NA    3.26e8 5.95e4
##  9 North America NAC    2017 2.10e13   2.35  NA      NA      NA    3.62e8 5.81e4
## 10 Singapore     SGP    2017 3.24e11   3.62 113.    149.    173.   5.61e6 5.77e4
## # … with 15,140 more rows, and abbreviated variable name ¹Population

#It's Luxembourg in 2017, let's see the growth of GDP Per Capita



global_economy %>%
  filter(Country=="Luxembourg") %>%
  autoplot(gdppc)

# For analysis, let's take a look at population and GDP Growth
global_economy %>%
  filter(Country=="Luxembourg") %>%
  autoplot(Population)

global_economy %>%
  filter(Country=="Luxembourg") %>%
  autoplot(GDP)

We can see that GDP per Capita has increased overtime because the GDP Growth rate has increased at a much more rapid pace than population, and with an already low population, it allows for the numbers to be much higher than places which might have higher GDP but more far more people.

3.2a

global_economy %>%
  filter(Code=="USA") %>%
  autoplot(GDP)

#This one looks fine as is since we are showing the growth of GDP, which is usually measured each year. We could look at GDP Per Capita above if we want to show us if the trend macthes with population. 

global_economy |>
  filter(Code == "USA") |>
  autoplot(GDP/Population) +
  labs(title= "GDP per capita", y = "USD")

3.2.b

aus_livestock %>%
  filter(Animal == "Bulls, bullocks and steers",
         State == "Victoria")%>%
  autoplot(Count)

# With monthly counts, this doesn't show the seasonality, instead we should show it at each quarter 

yearquarter(aus_livestock$Month) -> aus_livestock$Quarter 


aus_livestock %>%
  group_by(Quarter, Animal, State) %>%
  mutate(Count = sum(Count)) %>%
  filter(Animal == "Bulls, bullocks and steers",
         State == "Victoria")%>%
  autoplot(Count) +
  labs(x = "Quarter")

3.2c

vic_elec %>%
  autoplot(Demand) +
  labs(title = "Victorian Electricity Demand",
       y = "Demand")

# This one would much improved by looking at the week/month level, let's try out both

vic_elec %>%
  mutate(Week = yearweek(Time)) %>%
  index_by(Week) %>%
  summarise(Demand = sum(Demand)) %>%
  autoplot(Demand) + 
  labs(title = "Victorian Electricity Demand",
       y = "Demand")

vic_elec %>%
  mutate(Week = yearmonth(Time)) %>%
  index_by(Week) %>%
  summarise(Demand = sum(Demand)) %>%
  autoplot(Demand) + 
  labs(title = "Victorian Electricity Demand",
       y = "Demand")

# With week, we can see a more granular seasonality, while with month, much more macro (Apr is the Autumn and Sept is Spring we see much less demand both months, while June, peak winter, sees the highest demand)

3.2d

# This one is directly from the reading, we want to use Lambda to make the cycles more evident. Guerrero applies Guerrero's (1993) method to select the lambda which minimises the coefficient of variation for subseries of x.
lambda <- aus_production |>
  features(Gas, features = guerrero) |>
  pull(lambda_guerrero)


aus_production |>
  autoplot(box_cox(Gas, lambda))

3.3

canadian_gas %>%
  autoplot(Volume)

lambda <- canadian_gas %>%
  features(Volume, features = guerrero) %>%
  pull(lambda_guerrero)

canadian_gas %>%
  autoplot(box_cox(Volume, lambda))

# when we plot the before and after, we can see that the pattern remains the same. Using Lambda we strive to make the seasonal variation more obvious.

3.4

When looking for the box cox transformation, like with the chunks above, we can use the Guerrero method to find the best lambda. We can see that compared with the original data, that seasonality is more poignant with the Box-Cox Transformation.

set.seed(43)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

lambda <- myseries%>%
  features(Turnover, features = guerrero) %>%
  pull(lambda_guerrero)

paste0("The best Lambda is: ",lambda)

## [1] "The best Lambda is: 0.464098968231467"

myseries %>%
  autoplot(Turnover)

myseries %>%
  autoplot(box_cox(Turnover, lambda))

3.5

# Like above, we can just continue to use the Guerrero method to get the lambda

lambdat <- aus_production %>%
  features(Tobacco, features = guerrero) %>%
  pull(lambda_guerrero)

aus_production %>%
  autoplot(box_cox(Tobacco, lambdat))

## Warning: Removed 24 rows containing missing values (`geom_line()`).

lambdap <- ansett %>%
  filter(Class == "Economy", Airports == "MEL-SYD") %>%
  features(Passengers, features = guerrero) %>%
  pull(lambda_guerrero)

ansett %>%
   filter(Class == "Economy", Airports == "MEL-SYD") %>%
  autoplot(box_cox(Passengers, lambdap))

lambdaped <- pedestrian %>%
  filter(Sensor == "Southern Cross Station") %>%
  features(Count, features = guerrero) %>%
  pull(lambda_guerrero)

pedestrian %>%
  filter(Sensor == "Southern Cross Station") %>%
  autoplot(box_cox(Count, lambdaped))

3.7

# A: get the last 5 years (In quarters, so 5*4)

gas <- aus_production %>% 
filter(Quarter >= max(aus_production$Quarter) - (5*4))

gas %>%
  autoplot(Gas)+
  labs(title = "Five Years of Gas Data")

# B: 

gas %>%
  model(classical_decomposition(Gas,type = "multiplicative")) %>%
  components() %>%
  autoplot()

## Warning: Removed 2 rows containing missing values (`geom_line()`).

# C: Yes, it shows the seasonality of Gas as well as the upward trend for purchasing


# D:

gas %>%
  model(classical_decomposition(Gas,type = "multiplicative")) %>%
  components() -> gas_CD

gas_CD %>%
  autoplot

## Warning: Removed 2 rows containing missing values (`geom_line()`).

gas_CD %>%
  ggplot(aes(x = Quarter)) +
  geom_line(aes(y = Gas, colour = "Data")) +
  geom_line(aes(y = season_adjust, colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, colour = "Trend")) +
  labs(y = "",
       title = "Five Years of Gas Data") +
  scale_colour_manual(
    values = c("gray", "blue", "red"),
    breaks = c("Data", "Seasonally Adjusted", "Trend")
  )

## Warning: Removed 4 rows containing missing values (`geom_line()`).

# E: 

gas2 <- gas

gas2$Gas[13] <-  gas2$Gas[13] + 300

gas2 %>%
  model(classical_decomposition(Gas,type = "multiplicative")) %>%
  components() -> gas_CD2

gas_CD2 %>%
  ggplot(aes(x = Quarter)) +
  geom_line(aes(y = Gas, colour = "Data")) +
  geom_line(aes(y = season_adjust, colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, colour = "Trend")) +
  labs(y = "",
       title = "Five Years of Gas Data") +
  scale_colour_manual(
    values = c("gray", "blue", "red"),
    breaks = c("Data", "Seasonally Adjusted", "Trend") 
    )

## Warning: Removed 4 rows containing missing values (`geom_line()`).

# The additional 300 adds a large spike in the data as well as seasonally adjusted data. The trend decreases after the spike, showing a mixed trend instead of uniform trend. 


# F
gas3 <- gas

gas3$Gas[length(gas3$Gas)] <-  gas3$Gas[length(gas3$Gas)] + 300

gas3 %>%
  model(classical_decomposition(Gas,type = "multiplicative")) %>%
  components() -> gas_CD3

gas_CD3 %>%
  ggplot(aes(x = Quarter)) +
  geom_line(aes(y = Gas, colour = "Data")) +
  geom_line(aes(y = season_adjust, colour = "Seasonally Adjusted")) +
  geom_line(aes(y = trend, colour = "Trend")) +
  labs(y = "",
       title = "Five Years of Gas Data") +
  scale_colour_manual(
    values = c("gray", "blue", "red"),
    breaks = c("Data", "Seasonally Adjusted", "Trend") 
    )

## Warning: Removed 4 rows containing missing values (`geom_line()`).

# It still adds a dramatic spike to the data'

3.8

set.seed(4313)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

myseries %>%
  model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
  components() %>%
  autoplot()

## With the new plot we can better see the Trend as well as the seasonal component of the data. For instance,it looks like the trend is increasing through the whole year, which is why the season spikes are less than before.

3.9

Isolating the trend from the seasonal shows the trend increasing while the seasonal changes have remained stead. Trend shows a flat period in the early 90s.
We can see a dip in performance in the value chart and it is confirmed by the flatness of the Trend chart

Hummell_Josh_Week_Two_HMWK

Joshua Hummell

2/7/2023