library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.3
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.0 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.5
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.0 ✔ feasts 0.3.2
## ✔ lubridate 1.9.3 ✔ fable 0.3.4
## ✔ ggplot2 3.5.1 ✔ fabletools 0.4.2
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tsibble' was built under R version 4.3.3
## Warning: package 'tsibbledata' was built under R version 4.3.3
## Warning: package 'feasts' was built under R version 4.3.3
## Warning: package 'fabletools' was built under R version 4.3.3
## Warning: package 'fable' was built under R version 4.3.3
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ purrr 1.0.2 ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
##3.1
Consider the GDP information in global_economy. Plot the GDP per capita for each country over time. Which country has the highest GDP per capita? How has this changed over time?
glimpse(global_economy)
## Rows: 15,150
## Columns: 9
## Key: Country [263]
## $ Country <fct> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan",…
## $ Code <fct> AFG, AFG, AFG, AFG, AFG, AFG, AFG, AFG, AFG, AFG, AFG, AFG,…
## $ Year <dbl> 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969,…
## $ GDP <dbl> 537777811, 548888896, 546666678, 751111191, 800000044, 1006…
## $ Growth <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ CPI <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ Imports <dbl> 7.024793, 8.097166, 9.349593, 16.863910, 18.055555, 21.4128…
## $ Exports <dbl> 4.132233, 4.453443, 4.878051, 9.171601, 8.888893, 11.258279…
## $ Population <dbl> 8996351, 9166764, 9345868, 9533954, 9731361, 9938414, 10152…
global_economy %>%
autoplot(GDP/Population, show.legend = FALSE)
## Warning: Removed 3242 rows containing missing values or values outside the scale range
## (`geom_line()`).
#we can create a new variable called GDP_per_Capita and filter through the years to see which contry had the highest GDP per capita
global_economy <- global_economy %>%
mutate(GDP_per_capita = GDP / Population)
#we can use index by year to see which contry had the highest GDP every year
highest_gdp_per_capita <- global_economy %>%
index_by(Year) %>%
filter(GDP_per_capita == max(GDP_per_capita, na.rm = TRUE)) %>%
select(Country, Year, GDP_per_capita)%>%
arrange(desc(Year))
# View the country with the highest GDP per capita over time
print(highest_gdp_per_capita)
## # A tsibble: 58 x 3 [1Y]
## # Key: Country [263]
## # Groups: @ Year [58]
## Country Year GDP_per_capita
## <fct> <dbl> <dbl>
## 1 Luxembourg 2017 104103.
## 2 Monaco 2016 168011.
## 3 Liechtenstein 2015 167591.
## 4 Monaco 2014 185153.
## 5 Liechtenstein 2013 173528.
## 6 Monaco 2012 152000.
## 7 Monaco 2011 162155.
## 8 Monaco 2010 144569.
## 9 Monaco 2009 149221.
## 10 Monaco 2008 180640.
## # ℹ 48 more rows
#we can also find the highest GDP recorded.
highest_gdp_per_capita2 <- global_economy %>%
filter(GDP_per_capita == max(GDP_per_capita, na.rm = TRUE)) %>%
select(Country, Year, GDP_per_capita)
print(highest_gdp_per_capita2)
## # A tsibble: 1 x 3 [1Y]
## # Key: Country [1]
## Country Year GDP_per_capita
## <fct> <dbl> <dbl>
## 1 Monaco 2014 185153.
It looks like as of 2017 Luxembourg had the highest GDP per capita. Monaco seems to be from the top preforming countries in terms of GDP per capita through out the previouse years and also had the highest GDP recorded in the data set, of 185152.
##3.2 For each of the following series, make a graph of the data. If transforming seems appropriate, do so and describe the effect.
United States GDP from global_economy.
us_gdp <- global_economy %>%
filter(Country == "United States")
autoplot(us_gdp, GDP) +
labs(title = "United States GDP Over Time", y = "GDP", x = "Year")
Slaughter of Victorian “Bulls, bullocks and steers” in
aus_livestock.
victorian_slaughter <- aus_livestock %>%
filter(Animal == "Bulls, bullocks and steers", State == "Victoria")
autoplot(victorian_slaughter, Count) +
labs(title = "Victorian Slaughter of Bulls, Bullocks, and Steers", y = "Slaughter Count", x = "Year")
Victorian Electricity Demand from vic_elec.
autoplot(vic_elec, Demand) +
labs(title = "Victorian Electricity Demand Over Time", y = "Electricity Demand", x = "Time")
Gas production from aus_production.
autoplot(aus_production, Gas) +
labs(title = "Australian Gas Production Over Time", y = "Gas Production", x = "Quarter")
##3.3 Why is a Box-Cox transformation unhelpful for the canadian_gas data?
canadian_gas %>%
autoplot() +
labs(title = "Monthly Canadian gas production")
## Plot variable not specified, automatically selected `.vars = Volume`
The Box-Cox transformation helps with issues like changing variance and skewness but doesn’t fix seasonality patterns or long-term changes (trends). If the canadian_gas data has these kinds of patterns, Box-Cox won’t remove them. In such cases, you might need other methods like difference to deal with these patterns.
##3.4 What Box-Cox transformation would you select for your retail data (from Exercise 7 in Section 2.10)? an optimal λ which I chose by examining multiple values was 0.08.
random_series_id <- sample(aus_retail$`Series ID`, 1)
myseries <- aus_retail %>%
filter(`Series ID` == random_series_id)
autoplot(myseries, Turnover) +
labs(title = "Retail Data Turnover",
y = "$AUD (Millions)")
lambda <- myseries %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
transformed_series <- myseries %>%
mutate(Turnover = box_cox(Turnover, lambda))
autoplot(transformed_series, Turnover) +
labs(title = paste("Transformed Retail Turnover with \u03BB =", round(lambda, 2)))
##3.5 For the following series, find an appropriate Box-Cox transformation in order to stabilise the variance. Tobacco from aus_production, Economy class passengers between Melbourne and Sydney from ansett, and Pedestrian counts at Southern Cross Station from pedestrian.
head(aus_production)
## # A tsibble: 6 x 7 [1Q]
## Quarter Beer Tobacco Bricks Cement Electricity Gas
## <qtr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1956 Q1 284 5225 189 465 3923 5
## 2 1956 Q2 213 5178 204 532 4436 6
## 3 1956 Q3 227 5297 208 561 4806 7
## 4 1956 Q4 308 5681 197 570 4418 6
## 5 1957 Q1 262 5577 187 529 4339 5
## 6 1957 Q2 228 5651 214 604 4811 7
aus_production %>% autoplot(Tobacco)
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_line()`).
# Plot the original data
aus_production %>%
autoplot(Tobacco) +
labs(title = "Tobacco Production", y = "Tobacco Production")
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_line()`).
# Calculate the optimal lambda
lambda_value <- aus_production %>%
features(Tobacco,features = guerrero) %>%
pull(lambda_guerrero)
lambda_value
## [1] 0.9264636
aus_production %>% autoplot(box_cox(Tobacco,lambda)) +
labs(title = paste("Transformed Tobacco Production with \u03BB =", round(lambda, 2)))
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_line()`).
Using the guerrero feature we found the optimal value for the Box-Cox transformation parameter λ to be equal to 0.92.
##3.7
Consider the last five years of the Gas data from aus_production.
A. Plot the time series. Can you identify seasonal fluctuations and/or a trend-cycle?
gas <- tail(aus_production, 5*4) |> select(Gas)
gas %>% autoplot(Gas) +
labs(title = "Australia Gas Production", y = "Petajoules")
It looks like there is a seasonal trend with a low gas production around
Q1 of every year.
B. Use classical_decomposition with type=multiplicative to calculate the trend-cycle and seasonal indices.
classical_decomposition_1 <- gas %>%
model(
classical_decomposition(Gas, type = "multiplicative")
) %>%
components()
classical_decomposition_1 %>% autoplot()
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).
C. Do the results support the graphical interpretation from part a?
Yes, the decomposition results match what we saw in the visual analysis from part a. It breaks the time series into trend, seasonality, and residuals, confirming the upward trend and seasonal patterns. The trend shows a steady rise, and the seasonal component captures the repeating cycles, like quarterly changes. These
D. Compute and plot the seasonally adjusted data.
as_tsibble(classical_decomposition_1) %>%
autoplot(season_adjust) +
labs(title = "Seasonally Adjusted Data")
E. Change one observation to be an outlier (e.g., add 300 to one observation), and recompute the seasonally adjusted data. What is the effect of the outlier?
gas %>%
mutate(Gas = if_else(Quarter==yearquarter("2009Q4"), Gas + 300, Gas)) %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
as_tsibble() %>%
autoplot(season_adjust)
gas %>%
mutate(Gas = if_else(Quarter==yearquarter("2007Q4"), Gas + 300, Gas)) %>%
model(classical_decomposition(Gas, type = "multiplicative")) %>%
components() %>%
as_tsibble() %>%
autoplot(season_adjust)
F. Does it make any difference if the outlier is near the end rather than in the middle of the time series?
yes, if the outlier is near the end rather than the middle it can cause more impact in forcasting, since the most recent data point carry more weight in predictive models.
##3.8 Recall your retail time series data (from Exercise 7 in Section 2.10). Decompose the series using X-11. Does it reveal any outliers, or unusual features that you had not noticed previously?
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
x11_decomposition <- myseries %>%
model(x11 = X_13ARIMA_SEATS(Turnover ~ x11())) %>%
components()
autoplot(x11_decomposition) +
labs(title = "X11 Retail Decomposition.")
In this series, I notice some spikes in the irregular component, which
could indicate outliers that might affect the slightly increasing
trend.
##3.9 Figures 3.19 and 3.20 show the result of decomposing the number of persons in the civilian labour force in Australia each month from February 1978 to August 1995.
The decomposition results show that there is a clear upward trend in the data, which indicates growth over time. The trends show asome flatting around the 1990s. The seasonaluty shows fluctuation within the year, indicating months seasonality.
The recession of 1991/1992 is somewhat visible in the residule component, where there is a sharp dip. However, it does not cause a significant deviation in the overall trend, which continues to rise steadily. This suggests that while the recession had a short-term impact, the long-term growth trend was not heavily affected.