library(fpp3)
library(tidyverse)
library(kableExtra)
library(reactable)
library(seasonal)
library(tsibble)
global_economy. Plot
the GDP per capita for each country over time. Which country has the
highest GDP per capita? How has this changed over time?data("global_economy")
gdp_per_capita <- global_economy |>
mutate(GDP_Per_Capita = round(GDP/Population, 2)) |>
select(Country, Year, GDP_Per_Capita) |>
#group_by(Country) |>
#summarize(Average_GDP_Per_Capita = mean(GDP_Per_Capita)) |>
arrange(desc(GDP_Per_Capita))
reactable(gdp_per_capita)
autoplot(gdp_per_capita, GDP_Per_Capita, show.legend = FALSE) +
labs(x = "Year", y = "GDP Per Capita") +
ggtitle("GDP Per Capita for Each Country") +
theme(plot.title=element_text(hjust=0.3))
Based on the table created, Monaco achieved the two highest GDP per capita in 2008 and 2014. As a result, I decided to look at how Monaco’s GDP per capita has changed over time:
gdp <- gdp_per_capita |>
filter(Country == "Monaco")
autoplot(gdp) +
labs(x="Year", y = "GDP Per Capita") +
ggtitle("Yearly GDP Per Capita in Monaco") +
theme(plot.title = element_text(hjust = 0.3))
The GDP per capita of Monaco significantly increased after the year 2000, more than doubling by 2008 and reaching a peak in 2014.
global_economy.us_gdp <- global_economy |>
filter(Country == "United States")
autoplot(us_gdp)
aus_livestock.data("aus_livestock")
bulls_bullocks_steers <- aus_livestock |>
filter(Animal == "Bulls, bullocks and steers", State == "Victoria")
autoplot(bulls_bullocks_steers)
vic_elec.data("vic_elec")
vic_elec |>
autoplot(Demand)
I used a Box Cox Transformation to see if any patterns or trends would be visible:
vic_elec |>
features(Demand, features = guerrero)
## # A tibble: 1 × 1
## lambda_guerrero
## <dbl>
## 1 0.0999
vic_elec |>
autoplot(box_cox(Demand, 0.09993089))
The transformation didn’t produced a plot that showed discernible patterns or trends. I decided to transform the data first, looking at weekly, monthly, and daily electricity demand, then used Box Cox Transformations for each.
Weekly
weekly_vic_elec <- vic_elec |>
mutate(Week = yearweek(Date)) |>
group_by(Week) |>
mutate(Demand = sum(Demand)) |>
#mutate(Week = yearweek(Date)) |>
distinct(Week, Demand) |>
as_tsibble(index=Week)
autoplot(weekly_vic_elec, Demand) +
labs(x = "Weekly", y = "Total Demand") +
ggtitle("Total Weekly Electricity Demand") +
theme(plot.title=element_text(hjust=0.4))
weekly_vic_elec |>
features(Demand, features = guerrero) |>
pull(lambda_guerrero)
## [1] 1.999927
weekly_vic_elec |>
autoplot(box_cox(Demand, 1.999927)) +
ggtitle("Box Cox Transformation of Weekly Electricity Demand")
Monthly
monthly_vic_elec <- vic_elec |>
mutate(Month = yearmonth(Date)) |>
group_by(Month) |>
mutate(Demand = sum(Demand)) |>
distinct(Month, Demand) |>
as_tsibble(index=Month)
autoplot(monthly_vic_elec, Demand) +
ggtitle("Monthly Electricity Demand") +
theme(plot.title = element_text(hjust = 0.3))
monthly_vic_elec |>
features(Demand, features = guerrero) |>
pull(lambda_guerrero)
## [1] -0.8999268
monthly_vic_elec |>
autoplot(box_cox(Demand, -0.8999268)) +
ggtitle("Box Cox Transformation of Monthly Electricity Demand") +
theme(plot.title = element_text(hjust = 0.3))
Daily
daily_vic_elec <- vic_elec |>
group_by(Date) |>
mutate(Demand = sum(Demand)) |>
distinct(Date, Demand) |>
as_tsibble(index=Date)
autoplot(daily_vic_elec, Demand) +
ggtitle("Daily Electricity Demand") +
theme(plot.title = element_text(hjust = 0.3))
daily_vic_elec |>
features(Demand, features = guerrero) |>
pull(lambda_guerrero)
## [1] -0.8999268
daily_vic_elec |>
autoplot(box_cox(Demand, -0.8999268)) +
ggtitle("Box Cox Transformation of Daily Electricity Demand") +
theme(plot.title = element_text(hjust = 0.3))
aus_production.data("aus_production")
autoplot(aus_production, Gas)
lambda <- aus_production |>
features(Gas, features = guerrero) |>
pull(lambda_guerrero)
aus_production |>
autoplot(box_cox(Gas, lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed gas production with $\\lambda$ = ",
round(lambda,2))))
canadian_gas data?data("canadian_gas")
autoplot(canadian_gas, Volume) +
ggtitle("Monthly Volume of Gas in Canada") +
theme(plot.title = element_text(hjust = 0.3))
lambda <- canadian_gas |>
features(Volume, features = guerrero) |>
pull(lambda_guerrero)
canadian_gas |> autoplot(box_cox(Volume, lambda)) +
ggtitle("Box Cox Transformation of Monthly Volume of Gas in Canada") +
theme(plot.title = element_text(hjust = 0.3))
A Box Cox Transformation would be unhelpful for the
canadian_gas data because it does not stabilize the
variance in the data, specifically the data from around 1973 to
1990.
data("aus_retail")
set.seed(12345678)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries |>
features(Turnover, features = guerrero) |>
pull(lambda_guerrero)
## [1] 0.08303631
I would choose the value produced by the lambda_guerreo function, 0.08303631.
autoplot(myseries, Turnover) +
ggtitle("Turnover in Australian Retail") +
theme(plot.title = element_text(hjust=0.3))
lambda <- myseries |>
features(Turnover, features = guerrero) |>
pull(lambda_guerrero)
myseries |> autoplot(box_cox(Turnover, lambda)) +
ggtitle("Box Cox Transformation of Turnover in Australian Retail") +
theme(plot.title = element_text(hjust=0.3))
aus_production, Economy class passengers between Melbourne
and Sydney from ansett, and Pedestrian counts at Southern
Cross Station from pedestrian.aus_productionautoplot(aus_production, Tobacco)
aus_production |>
features(Tobacco, features = guerrero) |>
pull(lambda_guerrero)
## [1] 0.9264636
aus_production |> autoplot(box_cox(Tobacco, 0.9264636))
ansettdata("ansett")
economy_class <- ansett |>
filter(Class == "Economy", Airports == "MEL-SYD")
autoplot(economy_class, Passengers)
economy_class |>
features(Passengers, features = guerrero) |>
pull(lambda_guerrero)
## [1] 1.999927
options(scipen=100)
economy_class |> autoplot(box_cox(Passengers, 1.999927))
pedestriandata("pedestrian")
pedestrian_scs <- pedestrian |>
filter(Sensor == "Southern Cross Station")
autoplot(pedestrian_scs, Count)
pedestrian_scs |>
features(Count, features = guerrero) |>
pull(lambda_guerrero)
## [1] -0.2501616
pedestrian_scs |>
autoplot(box_cox(Count, -0.2501))
I didn’t think the Box Cox Transformation did an adequate job of stabilizing the variance, which could be the case because the data is hourly. As a result, I decided to mutate the Date column to get the monthly components of the data and sum the Count column of each month, then performed a Box Cox transformation to the newly transformed data:
new_ped <- pedestrian_scs |>
mutate(Month = yearmonth(Date)) |>
group_by(Month) |>
mutate(Count = sum(Count)) |>
distinct(Month, Count) |>
as_tsibble(index=Month)
autoplot(new_ped, Count)
Transforming the data helped discern the seasonality of the pedestrian counts, showing a sharp decline in January 2016.
new_ped |>
features(Count, features = guerrero) |>
pull(lambda_guerrero)
## [1] 0.132791
new_ped |>
autoplot(box_cox(Count, 0.132791))
Using the Box Cox Transformation does not appear to have stabilized the variance in the data.
aus_production.gas <- tail(aus_production, 5*4) |> select(Gas)
gas |>
autoplot(Gas) +
labs(x="Quarter", y = "Gas Production") +
ggtitle("Quarterly Production of Gas in Australia") +
theme(plot.title = element_text(hjust = 0.3))
There appears to be 12-month fluctuations in gas production, where gas production decreases in Q1 of every year and increases in the middle of each year.
gas |>
model(
classical_decomposition(Gas, type = "multiplicative")
) |>
components() |>
autoplot() +
labs(title = "Classical Multiplicative Decomposition of Gas Production in Australia \n in the Past 5 Years") +
theme(plot.title = element_text(hjust = 0.5))
When analyzing the classical multiplicative decomposition of the gas production data, the seasonal component reflects the 12-month fluctuations illustrated in part a. However, the random and trend components does not have the same seasonal fluctuations.
gas |>
model(
STL(Gas ~ trend(window = 7) +
season(window = "periodic"),
robust = TRUE)) |>
components() |>
autoplot()
# Changed observation in 2006 Q1
new_gas <- gas |>
mutate(Gas = if_else(Gas == 171, 300, Gas))
kbl(head(new_gas, n = 10)) %>%
kable_styling(latex_options="scale_down", c("striped", "hover", "condensed", full_width=F))
| Gas | Quarter |
|---|---|
| 221 | 2005 Q3 |
| 180 | 2005 Q4 |
| 300 | 2006 Q1 |
| 224 | 2006 Q2 |
| 233 | 2006 Q3 |
| 192 | 2006 Q4 |
| 187 | 2007 Q1 |
| 234 | 2007 Q2 |
| 245 | 2007 Q3 |
| 205 | 2007 Q4 |
new_gas |>
model(
STL(Gas ~ trend(window = 7) +
season(window = "periodic"),
robust = TRUE)) |>
components() |>
autoplot()
You can visibly locate the outlier in 2006 Q1 in the remainder and Gas components, while the trend line looks more straight and less smooth.
# Changed 2007 Q4 and 2010 Q1
new_gas2 <- gas |>
mutate(Gas = if_else(Gas == 205, 400, Gas))
kbl(head(new_gas2, n = 20)) %>%
kable_styling(latex_options="scale_down", c("striped", "hover", "condensed", full_width=F))
| Gas | Quarter |
|---|---|
| 221 | 2005 Q3 |
| 180 | 2005 Q4 |
| 171 | 2006 Q1 |
| 224 | 2006 Q2 |
| 233 | 2006 Q3 |
| 192 | 2006 Q4 |
| 187 | 2007 Q1 |
| 234 | 2007 Q2 |
| 245 | 2007 Q3 |
| 400 | 2007 Q4 |
| 194 | 2008 Q1 |
| 229 | 2008 Q2 |
| 249 | 2008 Q3 |
| 203 | 2008 Q4 |
| 196 | 2009 Q1 |
| 238 | 2009 Q2 |
| 252 | 2009 Q3 |
| 210 | 2009 Q4 |
| 400 | 2010 Q1 |
| 236 | 2010 Q2 |
new_gas2 |>
model(
STL(Gas ~ trend(window = 7) +
season(window = "periodic"),
robust = TRUE)) |>
components() |>
autoplot()
It doesn’t make a difference in the remainder and Gas components, as the outliers stand out regardless of where it is located in the time series. However, it does appear that the line in the trend component becomes more straight and less smooth.
data("aus_retail")
set.seed(12345678)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
# original autoplot of data
myseries |>
autoplot(Turnover)
x11_dcmp <- myseries |>
model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) |>
components()
autoplot(x11_dcmp) +
labs(title =
"Decomposition of Total Australian Retail Turnover using X-11") +
theme(plot.title = element_text(hjust = 0.5))
The trend and Turnover highlights the outliers that occur after 1995, which is apparent in the autoplot as well. The irregular decomposition shows sharp increase and decline occurring around 2016-2017, which isn’t readily apparent in the autoplot of the Turnover retail data.
Figure 3.19
knitr::include_graphics("/Users/mohamedhassan/Downloads/figure_319.png")
Figure 3.20
knitr::include_graphics("/Users/mohamedhassan/Downloads/figure_320.png")
The scale range of the value and trend decomposition are the same, from around 6500 to 9000. The scale range of the season_year component is wider, from 100 to -100, and 100 to -400. The season_year components repeats year to year, which may not be a realistic representation of the data. The negative numbers represented
In the Value and Remainder decomposition components, the recession of 1991/1992 are visible, showing a distinct downturn in the Remainder component and a less sharp but noticeable downturn in the Value component. When examining the month by month breakdown of civilians in the labor force, May and July appear to be the only two months that shows a downturn in the civilian labor force during the 1991/1992 recession.