library(fpp3)
library(patchwork)
library(USgas)
library(lubridate)
library(scales)
options(scipen=999)
# data(global_economy)
<- function(df_tsibble, time_column,
generate_time_series_plots
value_column) {<- list()
plots"Autoplot"]] <- autoplot(df_tsibble) + labs(title = "Autoplot")
plots[["Seasonal_Plot"]] <- gg_season(df_tsibble, {{ value_column }}) + labs(title = "Seasonal Plot")
plots[["Subseries_Plot"]] <- df_tsibble %>% gg_subseries({{ value_column }})+ labs(title = "Subseries Plot")
plots[[
<- ACF(df_tsibble, {{ value_column }})
acf_result "ACF_Plot"]] <- autoplot(acf_result) + labs(title = "ACF Plot")
plots[[
<- (plots[['Autoplot']] | plots[['Seasonal_Plot']]) /
combined_plots 'Subseries_Plot']] | plots[['ACF_Plot']])
(plots[[
+ plot_layout(guides = "collect", ncol = 1)
combined_plots
}
<- function(df, value_column, time_column) {
generate_math_plots <- list()
plots <- c("none", "sqrt", "cubert", "log")
transformations for (transformation in transformations) {
<- df %>%
df_transformed as_tibble() %>%
mutate(
transformed_value = case_when(
== "none" ~ !!sym(value_column),
transformation == "sqrt" ~ sqrt(!!sym(value_column)),
transformation == "cubert" ~ (!!sym(value_column))^(1/3),
transformation == "log" ~ log(!!sym(value_column))
transformation
)%>%
) rename(!!transformation := transformed_value) %>%
select(time_column, !!transformation) %>%
as_tsibble()
<- autoplot(df_transformed)
plots[[transformation]]
}<- (plots[['none']] | plots[['sqrt']]) /
combined_plots 'cubert']] | plots[['log']])
(plots[[+ plot_layout(guides = "collect", ncol = 1)
combined_plots
}
<- function(df, value_column,
generate_decomposition_plots seasonally_adjusted = TRUE) {
<- df |>
decomposed model(classical_decomposition(!!sym(value_column), type = "multiplicative")) |>
components()
<- autoplot(decomposed)
component_plots
print(component_plots)
if (seasonally_adjusted) { # Plot seasonally adjusted as well
<- decomposed %>%
seasonally_adjusted mutate(Seasonally_Adjusted = !!sym(value_column) / seasonal)
<- autoplot(seasonally_adjusted, Seasonally_Adjusted)
seasonally_adjusted_plot print(seasonally_adjusted_plot)
} }
Hyndman Chapter 3 Homework
Please note that you can scroll to the left and right in the code blocks within this interface. Click into a code block, then start highlighting to the right. The sideways scrollbar will appear.
Loading & Functions
3.1
Consider the GDP information in global_economy. Plot the GDP per capita for each country over time. Which country has the highest GDP per capita? How has this changed over time?
Wow, didn’t expect luxembourg! Data shows an general upward trend in everything, with some downturns in the mid 80’s and around y2k. It might need to be adjusted for inflation, as the angle on some of those lines looks like they could be flat if we did.
$GDP_per_capita <- global_economy$GDP / global_economy$Population
global_economy
### Just get all the countries up there!!
ggplot(global_economy, aes(x = Year, y = GDP_per_capita, group = Country, color = Country)) +
geom_line() +
theme_minimal() +
labs(x = "Year", y = "GDP per capita", title = "GDP per capita over time") +
theme(legend.position = "none")
<- max(global_economy$Year)
latest_year
# Plot the winner
<- global_economy %>%
top_country_latest_year filter(Year == latest_year) %>%
arrange(desc(GDP_per_capita)) %>%
slice(1)
<- top_country_latest_year$Country
top_country_name
<- global_economy %>%
top_country_data filter(Country == top_country_name)
ggplot(top_country_data, aes(x = Year, y = GDP_per_capita)) +
geom_line() +
theme_minimal() +
labs(x = "Year", y = "GDP per capita",
title = sprintf("GDP per capita over time for %s", top_country_name)) +
theme(legend.position = "none")
## have a quick look at the top 10.
<- global_economy %>%
top_ten filter(Year == latest_year) %>%
select(Country, GDP_per_capita, Population) %>%
arrange(desc(GDP_per_capita)) %>%
slice_head(n = 10)
print(top_ten)
# A tsibble: 10 x 4 [1Y]
# Key: Country [10]
Country GDP_per_capita Population Year
<fct> <dbl> <dbl> <dbl>
1 Luxembourg 104103. 599449 2017
2 Macao SAR, China 80893. 622567 2017
3 Switzerland 80190. 8466017 2017
4 Norway 75505. 5282223 2017
5 Iceland 70057. 341284 2017
6 Ireland 69331. 4813608 2017
7 Qatar 63249. 2639211 2017
8 United States 59532. 325719178 2017
9 North America 58070. 362492702 2017
10 Singapore 57714. 5612253 2017
3.2
For each of the following series, make a graph of the data. If transforming seems appropriate, do so and describe the effect.
- United States GDP from global_economy
.
It was a smooth line, I left alone.
%>%
global_economy filter(Code == "USA") %>%
autoplot(GDP) +
scale_y_continuous(labels = scales::comma)
- Slaughter of Victorian “Bulls, bullocks and steers” in aus_livestock
.
For this, there was lot of data, and seemed to want to be smoothed out so I. It was almost illegible and I couldn’t wrap my head around it, so I ran it through the plotting function I wrote last week. There is an obvious annual seasonality.
<- aus_livestock %>%
modified_aus filter(Animal == "Bulls, bullocks and steers") %>%
group_by(Animal) %>%
summarise(total_count = sum(Count), .groups = "drop") %>%
mutate(`12-MA` = slider::slide_dbl(total_count, mean,
.before = 12, .after = 12, .complete = TRUE))
%>% autoplot(total_count) +
modified_aus geom_line(aes(y = `12-MA`), colour = "#D55E00")
generate_time_series_plots(modified_aus, time_column = Month,
value_column = total_count)
- Victorian Electricity Demand from vic_elec
.
This is three years worth of data at a half hourly grain. We could have looked it by week or day, but I chose to look at over the entire data set. The first thing I did was to group it by day, and then I set up a six month moving average to level it out.
This was a good exercise for me, a little wrangling first but with an objective in mind, and then plotting. Also, this tsibble/tibble thing is really confusing and only seems to work half the time. From here out I think I’ll do all my transformations either in a tibble or a dataframe, and then at the last minute covert back to a tisbble.
In all honesty, this could have been the homework assignment all by itself, look at this by hour, day, half hour, find the trends, smooth things out, get to know this data, but I’m already at 6 hours and I still have most of this homework assignment to go.
# > vic_elec %>% as_tibble %>% summarise( MinDate = min(Date)
# 1 2012-01-01 2014-12-31 < annual data for 3 full years.
<- vic_elec %>%
vt %>%
as_tibble group_by(Date) %>%
summarise(total_demand = sum(Demand), .groups = "drop") %>%
mutate(`6-MA` = slider::slide_dbl(total_demand, mean, .before = 6, .after = 6, .complete = TRUE)) %>%
as_tsibble
%>% autoplot(total_demand) +
vt scale_y_continuous(labels = scales::comma) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
geom_line(aes(y = `6-MA`), colour = "#D55E00")
- Gas production from aus_production
.
There’s some definite trending, cycles and seasonal behavior here. Let’s have a look at this first wtih a 2 quarter smoothing, and then with some of last week’s forecasting plots.
There is some definite seasonality, and this is on an annual cycle.
library(slider)
<- aus_production %>%
aus_production_ma as_tibble() %>%
mutate(`2Q-MA` = slider::slide_dbl(Gas, mean, .before = 1, .after = 1, .complete = TRUE)) %>%
as_tsibble()
%>%
aus_production_ma autoplot(Gas) +
geom_line(aes(y = `2Q-MA`), colour = "#D55E00")
generate_time_series_plots(aus_production_ma, time_column = Quarter, value_column = Gas)
3.3
Why is a Box-Cox transformation unhelpful for the canadian_gas
data?
You’re supposed to use Box/Cox transformations when the difference between the sides of the graph is considerable – when the peaks and valleys on the left are small and the peaks and valleys on the right are considerable. That’s not really the case with this data. This data is more related to trend than seasonality, as show in the lag plot.
generate_time_series_plots(canadian_gas, time_column = Month, value_column = Volume)
3.4
What Box-Cox transformation would you select for your retail data (from Exercise 7 in Section 2.10)?
We didn’t do exercise 2.7. But if we did,I don’t think I’d use any transformations. The Y axis is legible and close to the same scale, in millions, with the whole dataset being between a few and 20 million. Introducing more math would add to the complexity of the problem.
set.seed(12345678)
<- aus_retail |>
aus_sample filter(`Series ID` == sample(aus_retail$`Series ID`,1))
generate_time_series_plots(aus_sample, time_column = Month, value_column = Turnover)
3.5
For the following series, find an appropriate Box-Cox transformation in order to stabilise the variance.
I am fairly certain these are all wrong. This was the dumb part of the assignment.
Tobacco from aus_production
If we’re going to use box cox transformations, we have to have peaks and valleys that are different on the two sides of the graph. None of these help that. This data is predictably seasonal and to scale.
= aus_production %>% select (Quarter, Tobacco)
tobacco generate_math_plots(tobacco, value_column = "Tobacco", time_column="Quarter")
Economy class passengers between Melbourne and Sydney fromansett
,
= ansett %>%
economy filter(Class == "Economy", Passengers > 0) %>%
group_by("Week") %>%
summarise(total_people = sum(Passengers)) %>%
ungroup() %>%
as_tsibble()
generate_math_plots(economy,
time_column = "Week", value_column ="total_people")
Pedestrian counts at Southern Cross Station from pedestrian
.
= pedestrian %>%
scs as_tibble() %>%
filter(Date >= "2015-01-01" & Date <= "2015-01-07")%>%
filter(Sensor == "Southern Cross Station") %>%
select(Date_Time, Count)
generate_math_plots(scs, time_column = "Date_Time",
value_column ="Count")
3.7
Consider the last five years of the Gas data from aus_production
. gas <- tail(aus_production, 5*4) |> select(Gas)
- Plot the time series. Can you identify seasonal fluctuations and/or a trend-cycle?
Yes, beginning of every year it is way down.
<- tail(aus_production, 5 * 4) |> select(Gas)
gas %>% autoplot() gas
- Use
classical_decomposition
withtype=multiplicative
to calculate the trend-cycle and seasonal indices.
generate_decomposition_plots(gas, value_column="Gas", seasonally_adjusted=FALSE)
- Do the results support the graphical interpretation from part a?
Yes, 100%.
- Compute and plot the seasonally adjusted data.
generate_decomposition_plots(gas, value_column="Gas", seasonally_adjusted=TRUE)
Warning: Removed 2 rows containing missing values (`geom_line()`).
- Change one observation to be an outlier (e.g., add 300 to one observation), and recompute the seasonally adjusted data. What is the effect of the outlier?
I don’t see anything different.
<- make_yearquarter(year = 2010, quarter = 3)
qtr <- gas %>% add_row(tibble(Gas =5000,Quarter = qtr))
gas2
generate_decomposition_plots(df=gas2, value_column="Gas", seasonally_adjusted = TRUE)
- Does it make any difference if the outlier is near the end rather than in the middle of the time series?
I don’t see any difference here either
<- tibble(Gas = 5000, Quarter = make_yearquarter(year = 2010, quarter = 3))
fivek
# Replace the row where Quarter is "2010 Q3"
<- gas %>%
gas3 filter(Quarter != make_yearquarter(year = 2010, quarter = 3)) %>%
add_row(fivek)
generate_decomposition_plots(df=gas3,value_column="Gas", seasonally_adjusted = TRUE)
3.8
Recall your retail time series data (from Exercise 7 in Section 2.10). Decompose the series using X-11. Does it reveal any outliers, or unusual features that you had not noticed previously?
I’m not sure, but there might be some outliers in the later part of the 2010’s. The remainders look a little high.
set.seed(12345678)
<- aus_retail %>%
retail_turnover filter(`Series ID` == sample(aus_retail$`Series ID`,1)) %>%
select(Month, Turnover)
%>% autoplot() retail_turnover
%>%
retail_turnover model(
STL(Turnover ~ season(window = "periodic"),
robust = TRUE)) |>
components() |>
autoplot()
3.9
Figures 3.19 and 3.20 show the result of decomposing the number of persons in the civilian labour force in Australia each month from February 1978 to August 1995.
- Write about 3–5 sentences describing the results of the decomposition. Pay particular attention to the scales of the graphs in making your interpretation.
If I understand this right, between one and two percent of the fluctuation is seasonal, otherwise it follows close to the trend. As far as teh cycle goes, it appears that there is most employment in December, and the least in January and August. aNd of course, there’s that downward spike in about 91.
- Is the recession of 1991/1992 visible in the estimated components?
Something certainly is. The remainder is all negative during that time period.