Consider the GDP information in global_economy. Plot the GDP per capita for each country over time. Which country has the highest GDP per capita? How has this changed over time?
global_economy <- global_economy
global_economy %>%
filter(Country %in% c("United States", "China", "India", "Germany", "Japan", "United Kingdom","Singapore","Malaysia","Hong Kong SAR, China","France","Italia","Brazil")) %>%
ggplot(aes(x = Year, y = GDP / Population, color = Country)) +
geom_line(size = 1) +
labs(title = "GDP per Capita Over Time",
y = "GDP per Capita (USD)",
x = "Year") +
theme_minimal()
For each of the following series, make a graph of the data. If transforming seems appropriate, do so and describe the effect.
United States GDP from global_economy.
global_economy %>%
filter(Country == "United States") %>%
autoplot(GDP) +
labs(title = "United States GDP Over Time",
y = "GDP (USD)",
x = "Year")
global_economy %>%
filter(Country == "United States") %>%
mutate(log_GDP = log(GDP)) %>%
autoplot(log_GDP) +
labs(title = "Log-Transformed United States GDP",
y = "Log(GDP)",
x = "Year")
Slaughter of Victorian “Bulls, bullocks and steers” in aus_livestock.
aus_livestock %>%
filter(State == "Victoria", Animal == "Bulls, bullocks and steers") %>%
autoplot(Count) +
labs(title = "Slaughter of Bulls, Bullocks, and Steers in Victoria",
y = "Number Slaughtered",
x = "Year")
aus_livestock %>%
filter(State == "Victoria", Animal == "Bulls, bullocks and steers") %>%
gg_season(Count) +
labs(title = "Seasonal Plot: Victorian Slaughter of Bulls, Bullocks, and Steers")
aus_livestock %>%
filter(State == "Victoria", Animal == "Bulls, bullocks and steers") %>%
mutate(log_Count = log(Count)) %>%
autoplot(log_Count) +
labs(title = "Log-Transformed Slaughter Data")
Victorian Electricity Demand from vic_elec.
vic_elec %>%
mutate(Month = yearmonth(as_date(Time))) %>%
index_by(Month) %>%
summarise(Demand = mean(Demand)) %>%
autoplot(Demand) +
labs(title = "Monthly Average Electricity Demand in Victoria",
y = "Electricity Demand (MW)",
x = "Month")
vic_elec %>%
mutate(Month = month(Time, label = TRUE)) %>%
gg_season(Demand) +
labs(title = "Seasonal Plot: Victorian Electricity Demand by Month",
y = "Electricity Demand (MW)")
vic_elec %>%
mutate(Month = yearmonth(as_date(Time)),
log_Demand = log(Demand)) %>%
index_by(Month) %>%
summarise(log_Demand = mean(log_Demand)) %>%
autoplot(log_Demand) +
labs(title = "Log-Transformed Victorian Electricity Demand (Monthly Avg)",
y = "Log(Demand)",
x = "Month")
Gas production from aus_production.
aus_production %>%
autoplot(Gas) +
labs(title = "Australian Gas Production Over Time",
y = "Gas Production",
x = "Time")
aus_production %>%
gg_season(Gas) +
labs(title = "Seasonal Plot: Australian Gas Production")
aus_production %>%
mutate(log_Gas = log(Gas)) %>%
autoplot(log_Gas) +
labs(title = "Log-Transformed Australian Gas Production")
lambda <- aus_production |>
features(Gas, features = guerrero) |>
pull(lambda_guerrero)
aus_production |>
autoplot(box_cox(Gas, lambda)) +
labs(y = "",
title = latex2exp::TeX(paste0(
"Transformed gas production with $\\lambda$ = ",
round(lambda,2))))
Why is a Box-Cox transformation unhelpful for the canadian_gas data?
For the following series, find an appropriate Box-Cox transformation in order to stabilise the variance. Tobacco from aus_production, Economy class passengers between Melbourne and Sydney from ansett, and Pedestrian counts at Southern Cross Station from pedestrian.
aus_production %>%
autoplot(Tobacco) +
labs(title = "Australian Tobacco Production Over Time",
y = "Tobacco Production",
x = "Time")
lambda_tobacco <- aus_production %>%
features(Tobacco, features = guerrero) %>%
pull(lambda_guerrero)
aus_production %>%
mutate(Tobacco_transformed = box_cox(Tobacco, lambda_tobacco)) %>%
autoplot(Tobacco_transformed) +
labs(title = "Box-Cox Transformed Tobacco Production",
y = "Transformed Production Volume")
ansett %>%
filter(Class == "Economy", Airports == "MEL-SYD") %>%
autoplot(Passengers) +
labs(title = "Economy Class Passengers (MEL-SYD)",
y = "Passengers")
lambda_passengers <- ansett %>%
filter(Class == "Economy", Airports == "MEL-SYD") %>%
features(Passengers, features = guerrero) %>%
pull(lambda_guerrero)
ansett %>%
filter(Class == "Economy", Airports == "MEL-SYD") %>%
mutate(Passengers_transformed = box_cox(Passengers, lambda_passengers)) %>%
autoplot(Passengers_transformed) +
labs(title = "Box-Cox Transformed Economy Class Passengers (MEL-SYD)",
y = "Passengers")
pedestrian %>%
filter(Sensor == "Southern Cross Station") %>%
autoplot(Count) +
labs(title = "Southern Cross Station's Pedestrian Counts",
y = "Pedestrian")
pedestrian_weekly <- pedestrian %>%
filter(Sensor == "Southern Cross Station") %>%
index_by(week = yearweek(Date_Time)) %>%
summarise(Weekly_Count = sum(Count, na.rm = TRUE))
lambda_pedestrian <- pedestrian_weekly %>%
features(Weekly_Count, features = guerrero) %>%
pull(lambda_guerrero) %>%
first()
pedestrian_weekly %>%
mutate(Transformed_Count = box_cox(Weekly_Count, lambda_pedestrian)) %>%
autoplot(Transformed_Count) +
labs(title = "Box-Cox Transformed Weekly Southern Cross Station's Pedestrian Counts",
y = "Transformed Weekly Count")
Consider the last five years of the Gas data from aus_production.
gas <- tail(aus_production, 5*4) |> select(Gas)
Plot the time series. Can you identify seasonal fluctuations and/or a trend-cycle?
gas <- tail(aus_production, 5*4) %>%
select(Gas)
autoplot(gas, Gas) +
labs(title = "Gas Production Over the Last 5 Years",
y = "Gas Production",
x = "Time")
Use classical_decomposition with type=multiplicative to calculate the trend-cycle and seasonal indices.
gas_decomp <- gas %>%
model(classical_decomposition(Gas, type = "multiplicative"))
components(gas_decomp) %>%
autoplot()
Do the results support the graphical interpretation from part a?
Compute and plot the seasonally adjusted data.
gas_sa <- gas %>%
mutate(Adjusted = Gas / components(gas_decomp)$seasonal)
autoplot(gas_sa, Adjusted)
Change one observation to be an outlier (e.g., add 300 to one observation), and recompute the seasonally adjusted data. What is the effect of the outlier?
gas_outlier <- gas
gas_outlier$Gas[10] <- gas_outlier$Gas[10] + 300
gas_outlier_decomp <- gas_outlier %>%
model(classical_decomposition(Gas, type = "multiplicative"))
components(gas_outlier_decomp) %>%
autoplot()
Does it make any difference if the outlier is near the end rather than in the middle of the time series?
Recall your retail time series data (from Exercise 7 in Section 2.10). Decompose the series using X-11. Does it reveal any outliers, or unusual features that you had not noticed previously?
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
autoplot(myseries)
## Plot variable not specified, automatically selected `.vars = Turnover`
gg_season(myseries)
## Plot variable not specified, automatically selected `y = Turnover`
gg_subseries(myseries)
## Plot variable not specified, automatically selected `y = Turnover`
gg_lag(myseries, geom = "point")
## Plot variable not specified, automatically selected `y = Turnover`
ACF(myseries, Turnover) |> autoplot()
myseries_x11 <- myseries |>
model(X_13ARIMA_SEATS(Turnover ~ x11())) |>
components()
autoplot(myseries_x11)
Figures 3.19 and 3.20 show the result of decomposing the number of persons in the civilian labour force in Australia each month from February 1978 to August 1995.
STL decomposition value = trend + season_year + remainder
Write about 3–5 sentences describing the results of the decomposition. Pay particular attention to the scales of the graphs in making your interpretation.
Is the recession of 1991/1992 visible in the estimated components?