library(fpp3)
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.5
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.4.1
## ✔ lubridate 1.9.3 ✔ fable 0.4.0
## ✔ ggplot2 3.5.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
aus = aus_production
pel = pelt
gafa = gafa_stock
vic = vic_elec
#?aus_production
#?pelt
#?gafa_stock
#?vic_elec
glimpse(aus)
## Rows: 218
## Columns: 7
## $ Quarter <qtr> 1956 Q1, 1956 Q2, 1956 Q3, 1956 Q4, 1957 Q1, 1957 Q2, 1957…
## $ Beer <dbl> 284, 213, 227, 308, 262, 228, 236, 320, 272, 233, 237, 313…
## $ Tobacco <dbl> 5225, 5178, 5297, 5681, 5577, 5651, 5317, 6152, 5758, 5641…
## $ Bricks <dbl> 189, 204, 208, 197, 187, 214, 227, 222, 199, 229, 249, 234…
## $ Cement <dbl> 465, 532, 561, 570, 529, 604, 603, 582, 554, 620, 646, 637…
## $ Electricity <dbl> 3923, 4436, 4806, 4418, 4339, 4811, 5259, 4735, 4608, 5196…
## $ Gas <dbl> 5, 6, 7, 6, 5, 7, 7, 6, 5, 7, 8, 6, 5, 7, 8, 6, 6, 8, 8, 7…
library(dplyr)
library(tsibble)
# For aus_production
summary_data <- aus_production %>%
index_by(Quarter) %>%
summarise(max_bricks = max(Bricks, na.rm = TRUE))
## Warning: There were 20 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `max_bricks = max(Bricks, na.rm = TRUE)`.
## ℹ In group 199: `Quarter = 2005 Q3`.
## Caused by warning in `max()`:
## ! no non-missing arguments to max; returning -Inf
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 19 remaining warnings.
# Display the result
print(summary_data)
## # A tsibble: 218 x 2 [1Q]
## Quarter max_bricks
## <qtr> <dbl>
## 1 1956 Q1 189
## 2 1956 Q2 204
## 3 1956 Q3 208
## 4 1956 Q4 197
## 5 1957 Q1 187
## 6 1957 Q2 214
## 7 1957 Q3 227
## 8 1957 Q4 222
## 9 1958 Q1 199
## 10 1958 Q2 229
## # ℹ 208 more rows
glimpse(pel)
## Rows: 91
## Columns: 3
## $ Year <dbl> 1845, 1846, 1847, 1848, 1849, 1850, 1851, 1852, 1853, 1854, 1855,…
## $ Hare <dbl> 19580, 19600, 19610, 11990, 28040, 58000, 74600, 75090, 88480, 61…
## $ Lynx <dbl> 30090, 45150, 49150, 39520, 21230, 8420, 5560, 5080, 10170, 19600…
# Convert to a regular tibble and summarize
summary_data <- as_tibble(pelt) %>%
group_by(Year) %>%
summarise(max_lynx = max(Lynx, na.rm = TRUE), .groups = 'drop')
# Display the result
print(summary_data)
## # A tibble: 91 × 2
## Year max_lynx
## <dbl> <dbl>
## 1 1845 30090
## 2 1846 45150
## 3 1847 49150
## 4 1848 39520
## 5 1849 21230
## 6 1850 8420
## 7 1851 5560
## 8 1852 5080
## 9 1853 10170
## 10 1854 19600
## # ℹ 81 more rows
# Convert to a regular tibble and summarize maximum closing prices
max_closing_prices <- as_tibble(gafa) %>%
group_by(Symbol) %>%
summarise(max_close = max(Close, na.rm = TRUE), .groups = 'drop')
# Display the result
print(max_closing_prices)
## # A tibble: 4 × 2
## Symbol max_close
## <chr> <dbl>
## 1 AAPL 232.
## 2 AMZN 2040.
## 3 FB 218.
## 4 GOOG 1268.
# Convert to a regular tibble and summarize
max_demand_per_year <- as_tibble(vic) %>%
group_by(Year = year(Date)) %>%
summarise(max_demand = max(Demand, na.rm = TRUE), .groups = 'drop')
# Display the result
print(max_demand_per_year)
## # A tibble: 3 × 2
## Year max_demand
## <dbl> <dbl>
## 1 2012 8443.
## 2 2013 8897.
## 3 2014 9345.
gafa_stock %>%
group_by(Symbol) %>%
filter(Close == max(Close))
## # A tsibble: 4 x 8 [!]
## # Key: Symbol [4]
## # Groups: Symbol [4]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2018-10-03 230. 233. 230. 232. 230. 28654800
## 2 AMZN 2018-09-04 2026. 2050. 2013 2040. 2040. 5721100
## 3 FB 2018-07-25 216. 219. 214. 218. 218. 58954200
## 4 GOOG 2018-07-26 1251 1270. 1249. 1268. 1268. 2405600
The filter() operation has been used to extract the days when each stock reached its peak closing price, alongside other trading details such as the high, low, and adjusted close prices for those specific dates.
tute1 <- readr::read_csv("C:\\Users\\LENOVO\\Downloads\\tute1.csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): Sales, AdBudget, GDP
## date (1): Quarter
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(tute1)
## # A tibble: 6 × 4
## Quarter Sales AdBudget GDP
## <date> <dbl> <dbl> <dbl>
## 1 1981-03-01 1020. 659. 252.
## 2 1981-06-01 889. 589 291.
## 3 1981-09-01 795 512. 291.
## 4 1981-12-01 1004. 614. 292.
## 5 1982-03-01 1058. 647. 279.
## 6 1982-06-01 944. 602 254
mytimeseries <- tute1 |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter)
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +
facet_grid(name ~ ., scales = "free_y")
mytimeseries %>%
pivot_longer(-Quarter) %>%
ggplot(aes(x = Quarter, y= value, color = name)) +
geom_line()
“When you don’t include facet_grid(), the result is a single plot where all three line graphs (representing different variables) are combined into one. This forces all the data onto a shared space, and the only way to tell the lines apart is by their color and by looking at the legend. While this approach shows everything in one place, it can become harder to analyze individual trends, especially if the values of the variables are on different scales or overlap. In such cases, the graph can feel crowded, making it difficult to focus on each variable’s behavior.”
library(USgas)
# Create a tsibble
us_gas_tsibble <- us_total %>%
as_tsibble(index = year, key = state)
ne_area <- c("Maine", "Vermont", "New Hampshire", "Massachusetts", "Connecticut", "Rhode Island")
us_gas_tsibble %>% filter(state %in% ne_area) %>%
autoplot(y) +
labs(y = "Gas Consumption (million cubic ft)", title = "Gas Consumption: New England")
library(tsibbledata)
library(ggplot2)
library(fable)
library(fabletools)
library(feasts)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ purrr 1.0.2 ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(fpp3)
# Load the aus_retail dataset
data("aus_retail")
# Set the seed and select a random retail series
set.seed(81525678)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
# Display the selected series
print(myseries)
## # A tsibble: 441 x 5 [1M]
## # Key: State, Industry [1]
## State Industry `Series ID` Month Turnover
## <chr> <chr> <chr> <mth> <dbl>
## 1 South Australia Clothing retailing A3349360V 1982 Apr 19.1
## 2 South Australia Clothing retailing A3349360V 1982 May 21.6
## 3 South Australia Clothing retailing A3349360V 1982 Jun 18.3
## 4 South Australia Clothing retailing A3349360V 1982 Jul 18.6
## 5 South Australia Clothing retailing A3349360V 1982 Aug 17.1
## 6 South Australia Clothing retailing A3349360V 1982 Sep 18.2
## 7 South Australia Clothing retailing A3349360V 1982 Oct 20.7
## 8 South Australia Clothing retailing A3349360V 1982 Nov 23.6
## 9 South Australia Clothing retailing A3349360V 1982 Dec 33.4
## 10 South Australia Clothing retailing A3349360V 1983 Jan 20
## # ℹ 431 more rows
# Convert the data to a tsibble (time series tibble)
myseries_tsibble <- myseries |>
select(Month, Turnover) |>
as_tsibble(index = Month)
# Plotting the selected series
myseries_tsibble |>
autoplot(Turnover) +
labs(title = "Selected Retail Series Time Plot")
# Seasonal plot
myseries_tsibble |>
gg_season(Turnover) +
labs(title = "Seasonal Plot of Selected Retail Series")
# Subseries plot
myseries_tsibble |>
gg_subseries(Turnover) +
labs(title = "Subseries Plot of Selected Retail Series")
# Lag plot
myseries_tsibble |>
gg_lag(Turnover) +
labs(title = "Lag Plot of Selected Retail Series")
# Autocorrelation function plot
myseries_tsibble |>
ACF(Turnover) |>
autoplot() +
labs(title = "ACF Plot of Selected Retail Series")
# Example loading of datasets (Replace this with actual data loading code)
data("us_employment") # Total Private Employed
data("aus_production") # Bricks
data("pelt") # Hare
data("PBS") # H02 Cost
data("us_gasoline") # Barrels
us_employment_private <- us_employment %>% filter(Title == "Total Private")
us_employment_private %>%
autoplot(Employed)
us_employment_private %>%
gg_season(Employed)
us_employment_private %>%
gg_subseries(Employed)
us_employment_private %>%
gg_lag(Employed)
us_employment_private %>%
ACF(Employed) %>%
autoplot()
The US employment dataset for “Total Private” shows trends that align with historical economic cycles, including major recessions. While the data doesn’t extend fully into 2020, it would reflect the significant drop in March 2020.
Recent patterns indicate seasonality, with growth in the first seven months followed by declines. Functions like gg_season() and gg_subseries() can help visualize these seasonal effects, while autoplot() reveals cyclic trends. Overall, the dataset captures clear trends of growth alongside seasonal fluctuations and cyclical behavior.
The series reveals that economic recoveries in the US have been lengthening over time, suggesting that it now requires more time for employment levels to rebound to pre-recession levels. This trend indicates a shift in the dynamics of the labor market and may reflect underlying structural changes in the economy.
The seasonal patterns in total US employment suggest that the increase in the latter part of the year may be driven by major consumer holidays, such as Black Friday and Christmas. Additionally, the pronounced peak in July likely reflects the fiscal year-end for many private companies, which often leads to heightened hiring and employment activities during that period.
A notable unusual period is the recovery after the 2010 financial crisis, which shows the most rapid and sustained growth in employment since the 1940s. This remarkable rebound distinguishes that timeframe from other years in the dataset.
aus_production%>%
autoplot(Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
aus_production %>%
gg_season(Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
aus_production %>%
gg_subseries(Bricks)
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).
aus_production %>%
gg_lag(Bricks)
## Warning: Removed 20 rows containing missing values (gg_lag).
aus_production %>%
ACF(Bricks) %>%
autoplot()
The analysis of the “Bricks” time series from aus_production reveals a clear seasonal peak in Q3 across the entire dataset. Additionally, brick production reached its highest levels in the 1980s but has significantly declined throughout the 2000s, indicating a notable long-term downward trend.
This trend suggests two possibilities: either brick has fallen out of favor with Australian home buyers, leading to decreased demand, or production costs have risen due to tightening resource constraints, making it less economically viable to produce bricks at previous levels.
The seasonal plot indicates that brick production is notably higher in quarters 2 and 3, while quarter 1 shows relatively lower production levels. Overall, there is a clear upward trend in production over time, aligning with the observations from the prior time series plot.
The significant declines in brick production during the 1970s and again in the 1980s are particularly striking and stand out as unusual years in the dataset.
pelt %>%
autoplot(Hare)
pelt %>%
gg_subseries(Hare)
pelt %>%
gg_lag(Hare)
pelt %>%
ACF(Hare) %>%
autoplot()
PBS1 <- PBS %>%
filter(ATC2 == "H02")
PBS1 %>%
autoplot(Cost)
PBS1 %>%
gg_season(Cost)
PBS1 %>%
gg_subseries(Cost)
PBS1 %>%
ACF(Cost) %>%
autoplot()
us_gasoline %>%
autoplot(Barrels)
us_gasoline %>%
gg_season(Barrels)
us_gasoline %>%
gg_subseries(Barrels)
us_gasoline %>%
gg_lag(Barrels)
us_gasoline %>%
ACF(Barrels) %>%
autoplot()
1 == B
2 == A
3 == D
4 == C
#filter for the last 5 yrs of Gas data
gas <- tail(aus_production, 5*4) %>% select(Gas)
gas %>%
autoplot()+
labs(title = "Last Five Years of The Gas Data")+
theme_replace()+
geom_line(col = "#1B89D3")
## Plot variable not specified, automatically selected `.vars = Gas`
The plot of the Gas time series reveals both seasonal fluctuations and a trend-cycle. There are noticeable seasonal patterns, with distinct upward and downward movements occurring at regular intervals throughout the year. Overall, the trend-cycle shows a fluctuating pattern, suggesting periods of growth followed by declines. This interplay of seasonal variation and the overall trend reflects the dynamics of gas production in response to market demands and external factors.
We will now use classical decomposition to analyze this data:
gas %>%
model(classical_decomposition(Gas,type = "multiplicative")) %>%
components() %>%
autoplot() +
ggtitle("Last Five Years of The Gas Data")
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).
The results of the multiplicative decomposition indicate a quarterly seasonal component with a yearly frequency. The analysis reveals an increasing trend from 2006 through mid-2007, followed by a period of stability until early 2008. After that, another upward trend emerges in late 2009. This decomposition highlights both the seasonal patterns and the fluctuations in the trend-cycle, providing insight into the underlying dynamics of the data.
# STL decomposition
dcmp <- gas %>%
model(stl = STL(Gas))
#Compute and plot the seasonally adjusted data
components(dcmp) %>%
as_tsibble() %>%
autoplot(Gas, colour = "gray") +
geom_line(aes(y=season_adjust), colour = "#0072B2") +
labs(y = "Gas production",
title = "Australian Gas Production")
Yes, the results from the multiplicative decomposition support the graphical interpretation of the Gas time series. Both analyses reveal clear seasonal fluctuations, characterized by regular upward and downward movements throughout the year. The decomposition results confirm the presence of a fluctuating trend-cycle, highlighting periods of growth and decline that align with the graphical observations. This consistency between the graphical representation and the decomposition analysis underscores the dynamic nature of gas production in relation to market demands and external influences.
gas_decom <- gas %>%
model(classical_decomposition(Gas,type = "multiplicative")) %>%
components()
gas_decom %>%
ggplot(aes(x = Quarter)) +
geom_line(aes(y = Gas, colour = "Data")) +
geom_line(aes(y = season_adjust,
colour = "Seasonally Adjusted")) +
geom_line(aes(y = trend, colour = "Trend")) +
labs(y = "",
title = "Last Five Years of The Gas Data") +
scale_colour_manual(
values = c("gray", "#0072B2", "#D55E00"),
breaks = c("Data", "Seasonally Adjusted", "Trend")
)
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
new_gas <- gas
new_gas$Gas[10] <- new_gas$Gas[10]+300
new_gas %>%
model(classical_decomposition(Gas,type = "multiplicative")) %>%
components() %>%
autoplot() +
ggtitle("Last Five Years of The Gas Data with 300 added to the 10th observation")
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).
new_gas %>%
model(classical_decomposition(Gas,type = "multiplicative")) %>%
components() %>%
ggplot(aes(x = Quarter)) +
geom_line(aes(y = Gas, colour = "Data")) +
geom_line(aes(y = season_adjust,
colour = "Seasonally Adjusted")) +
geom_line(aes(y = trend, colour = "Trend")) +
labs(y = "",
title = "Last Five Years of The Gas Data with 300 added to the 10th observation") +
scale_colour_manual(
values = c("gray", "#0072B2", "#D55E00"),
breaks = c("Data", "Seasonally Adjusted", "Trend")
)
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
When 300 was added to the 10th observation, it resulted in a significant spike in the seasonally adjusted data, shifting the quarterly gas figures from a seasonal low to a relative high. Despite this noticeable change in the overall data, the impact on the seasonal component was minimal because the seasonal pattern remains consistent across years, with only one observation altered. Additionally, this outlier contributed to a downward trend from early 2008 to mid-2008, highlighting how outliers can distort both seasonal adjustments and underlying trends.
new_gas2 <- gas
new_gas2$Gas[20] <- new_gas2$Gas[10]+300
new_gas2 %>%
model(classical_decomposition(Gas,type = "multiplicative")) %>%
components() %>%
autoplot() +
ggtitle("Last Five Years of The Gas Data with 300 added to the 10th observation")
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).
new_gas2 %>%
model(classical_decomposition(Gas,type = "multiplicative")) %>%
components() %>%
ggplot(aes(x = Quarter)) +
geom_line(aes(y = Gas, colour = "Data")) +
geom_line(aes(y = season_adjust,
colour = "Seasonally Adjusted")) +
geom_line(aes(y = trend, colour = "Trend")) +
labs(y = "",
title = "Last Five Years of The Gas Data with 300 added to the last observation") +
scale_colour_manual(
values = c("gray", "#0072B2", "#D55E00"),
breaks = c("Data", "Seasonally Adjusted", "Trend")
)
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
Yes, the position of the outlier makes a difference. Adding 300 to the last entry creates a spike at the end of the seasonally adjusted data, impacting the overall appearance of the trend. However, the seasonal component is less affected by this change, as its pattern closely aligns with the original data. The trend appears more favorable, showing a stronger upward trajectory, primarily due to the influence of the outlier at the end of the series.
The decomposition results reveal that the trend component has generally increased throughout most of the timeframe, with notable stationary periods observed in the early 1990s. Analyzing the seasonal component on a monthly basis highlights that certain months exhibit more pronounced fluctuations than others, indicating varying levels of production. The scales of the graphs emphasize these differences, illustrating how seasonal variations interact with the overall upward trend in production.
Yes, the recession of 1991/1992 is evident in the estimated components, as there is a noticeable dip in employment during this period. This decline is distinct from seasonal variations and the underlying positive trend, highlighting the recession’s impact on employment levels.
canadian_gas %>%
autoplot(Volume)+
labs(title = "Monthly Canadian Gas Production",
subtitle = "autoplot()",
y = "billions of cubic meter")+
theme_replace()+
geom_line(col = "#1B89D3")
canadian_gas %>%
gg_subseries(Volume)+
labs(title = "Monthly Canadian Gas Production",
subtitle = "gg_subseries()",
y = "billions of cubic meter")
canadian_gas %>%
gg_season(Volume)+
labs(title = "Monthly Canadian Gas Production",
subtitle = "gg_season()",
y = "billions of cubic meter")
The time plot of the Canadian gas data reveals a clear increasing trend
alongside strong seasonality. This is further confirmed by both the
subseries and seasonal plots. Generally, gas production tends to
decrease in the summer and rise in the winter months. Notably, the
seasonality intensified between 1975 and 1990, as evidenced by the
larger differences in production levels between summer and winter, which
are highlighted by the blue and green lines in the seasonal plot.
canadian_gas %>%
model(
STL(Volume ~ trend(window = 21) +
season(window = 13),
robust = TRUE)) %>%
components() %>%
autoplot()+
labs(title = "STL decomposition of Canadian Gas Production")
The results of the STL decomposition reveal that the trend component
effectively captures the overall trend in the original data. The
seasonal component shows an increase from 1975 to 1985, followed by a
subsequent decline, which aligns with observations from the time plot.
Additionally, the remainder component hovers around zero, indicating
that the model has accounted for most of the variability in the
data.
The seasonal shape exhibits a notable change over time. Initially, it appears relatively flat, but as time progresses, the seasonal shape becomes more pronounced. In 1960, there is no evident trend-cycle, suggesting that gas production was relatively stable during that period. However, after 1975, a clear trend-cycle emerges, indicating a significant increase in gas production during that time and beyond.
canadian_gas %>%
model(
STL(Volume ~ trend(window = 21) +
season(window = 13),
robust = TRUE)) %>%
components() %>%
ggplot(aes(x = Month)) +
geom_line(aes(y = Volume, colour = "Data")) +
geom_line(aes(y = season_adjust,
colour = "Seasonally Adjusted")) +
geom_line(aes(y = trend, colour = "Trend")) +
labs(title = "STL decomposition of Canadian Gas Production") +
scale_colour_manual(
values = c("gray", "#0072B2", "#D55E00"),
breaks = c("Data", "Seasonally Adjusted", "Trend")
)
canadian_gas %>%
model(x11 = X_13ARIMA_SEATS(Volume ~ x11())) %>%
components() %>%
autoplot()+
labs(title = "X-11 decomposition of Canadian Gas Production")
canadian_gas %>%
model(seats = X_13ARIMA_SEATS(Volume ~ seats())) %>%
components() %>%
autoplot() +
labs(title ="SEATS Decomposition of Canadian Gas Production")
The results from the SEATS and X-11 decompositions reveal similar trends and seasonal components. However, the changes in seasonality differ when compared to the original data. The differences in the seasonally adjusted time series between the two methods are minimal.
Additionally, the remainder component from the SEATS decomposition is larger than that of the X-11 decomposition, with both remainder components being around one. In contrast, the remainder component from the STL decomposition is smaller. This suggests that the STL decomposition provides a better fit for the Canadian gas data.