# Find available datasets
dataset_info <- data(package = "fpp3")
# Count number of rows
count <- nrow(dataset_info$results)
print(count)[1] 25
fpp3 package about?The fpp3 package in R is a resource to the book Forecasting: Principles and Practice by Rob J. Hyndman and George Athanasopoulos that provides datasets and tools for learning and applying time series forecasting techniques. It includes all datasets needed for the book’s examples and exercises, as well as additional datasets for practice. The depth of real-world time series, with a high level of contribution from Australia, is helpful in aiding users to develop forecasting skills. The package also loads a suite of other packages to support to support time series analysis within the tidyverse ecosystem.
# Find available datasets
dataset_info <- data(package = "fpp3")
# Count number of rows
count <- nrow(dataset_info$results)
print(count)[1] 25
There are 25 datasets in fpp3, as shown above, including aus_fertility, aus_migration, us_employment, and us_gasoline among others.
library(fpp3)Registered S3 method overwritten by 'tsibble':
method from
as_tibble.grouped_df dplyr
── Attaching packages ──────────────────────────────────────────── fpp3 1.0.2 ──
✔ tibble 3.2.1 ✔ tsibble 1.1.6
✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
✔ tidyr 1.3.1 ✔ feasts 0.4.2
✔ lubridate 1.9.4 ✔ fable 0.4.1
✔ ggplot2 3.5.1
── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
✖ lubridate::date() masks base::date()
✖ dplyr::filter() masks stats::filter()
✖ tsibble::intersect() masks base::intersect()
✖ tsibble::interval() masks lubridate::interval()
✖ dplyr::lag() masks stats::lag()
✖ tsibble::setdiff() masks base::setdiff()
✖ tsibble::union() masks base::union()
Loading fpp3 with library(fpp3) attaches the packages below:
tibble: Modern data frames
dplyr: Data manipulation
tidyr: Data tidying
lubridate: Date/time handling
ggplot2: Visualization
tsibble: Time series data structures
tsibbledata: Additional time series datasets
feasts: Time series feature extraction
fable: Forecasting models
tsibble?A tsibble is a tidyverse-compatible time series data structure with a time index option and optional keys (e.g., as_tibble(data, index = Date, key = Store))
feasts package about?The feasts package extracts time series features (e.g., STL() for decomposition)
fable package about?The fable package fits forecasting models like ARIMA or ETS (e.g., model(ARIMA(Sales)))
# Create tsbl
data <- tibble(
Date = seq(as.Date("2020-01-01"), by = "month", length.out = 12),
Sales = c(100, 120, 130, 110, 140, 150, 160, 170, 180, 190, 200, 210)
)
sales_tsbl <- as_tsibble(data, index = Date)
glimpse(sales_tsbl)Rows: 12
Columns: 2
$ Date <date> 2020-01-01, 2020-02-01, 2020-03-01, 2020-04-01, 2020-05-01, 202…
$ Sales <dbl> 100, 120, 130, 110, 140, 150, 160, 170, 180, 190, 200, 210
# Filter and summarize data
valid_id <- aus_retail %>% distinct(`Series ID`) %>% slice_head(n = 1) %>% pull(`Series ID`)
aus_retail %>%
filter(`Series ID` == valid_id) %>%
summarise(Total_Turnover = sum(Turnover)) %>%
glimpse()Rows: 441
Columns: 2
$ Month <mth> 1982 Apr, 1982 May, 1982 Jun, 1982 Jul, 1982 Aug, 1982 …
$ Total_Turnover <dbl> 4.4, 3.4, 3.6, 4.0, 3.6, 4.2, 4.8, 5.4, 6.9, 3.8, 4.2, …
# Select two valid Series IDs
valid_ids <- aus_retail %>% distinct(`Series ID`) %>% slice_head(n = 2) %>% pull(`Series ID`)
# Pivot data to wide format with tidyr
aus_retail %>%
filter(`Series ID` %in% valid_ids) %>%
pivot_wider(names_from = `Series ID`, values_from = Turnover) %>%
head()# A tsibble: 6 x 5 [1M]
# Key: State, Industry [1]
State Industry Month A3349849A A3349606J
<chr> <chr> <mth> <dbl> <dbl>
1 Australian Capital Territory Cafes, restaurants … 1982 Apr 4.4 NA
2 Australian Capital Territory Cafes, restaurants … 1982 May 3.4 NA
3 Australian Capital Territory Cafes, restaurants … 1982 Jun 3.6 NA
4 Australian Capital Territory Cafes, restaurants … 1982 Jul 4 NA
5 Australian Capital Territory Cafes, restaurants … 1982 Aug 3.6 NA
6 Australian Capital Territory Cafes, restaurants … 1982 Sep 4.2 NA
# From discussion 1
data <- read.csv("C:\\Users\\Troy\\OneDrive\\Desktop\\pred_anal\\EIA_ng_ts.csv")
data <- data %>%
mutate(Date = as.Date(Date, format = "%m/%d/%Y"),
Month = as.factor(month(Date)),
Year = year(Date))
colnames(data)[2] <- "Consumption"
head (data) Date Consumption Month Year
1 1973-01-15 843900 1 1973
2 1973-02-15 747331 2 1973
3 1973-03-15 648504 3 1973
4 1973-04-15 465867 4 1973
5 1973-05-15 326313 5 1973
6 1973-06-15 207172 6 1973
# Extract year and month from `aus_retail`
valid_id <- aus_retail %>% distinct(`Series ID`) %>% slice_head(n = 1) %>% pull(`Series ID`)
aus_retail %>%
filter(`Series ID` == valid_id) %>%
mutate(Year = year(Month), Month_Name = month(Month, label = TRUE)) %>%
select(Year, Month_Name, Turnover) %>%
head()# A tsibble: 6 x 4 [1M]
Year Month_Name Turnover Month
<dbl> <ord> <dbl> <mth>
1 1982 Apr 4.4 1982 Apr
2 1982 May 3.4 1982 May
3 1982 Jun 3.6 1982 Jun
4 1982 Jul 4 1982 Jul
5 1982 Aug 3.6 1982 Aug
6 1982 Sep 4.2 1982 Sep
# plot
valid_id <- aus_retail %>% distinct(`Series ID`) %>% slice_head(n = 1) %>% pull(`Series ID`)
aus_retail %>%
filter(`Series ID` == valid_id) %>%
ggplot(aes(x = Month, y = Turnover)) +
geom_line() +
labs(title = paste("Retail Turnover for Series ID", valid_id))# tsibbledata
tsibbledata::ansett %>%
filter(Airports == "MEL-SYD") %>%
autoplot(Passengers) +
labs(title = "Melbourne-Sydney Passenger Data")# Decompose a time series with feasts
valid_id <- aus_retail %>% distinct(`Series ID`) %>% slice_head(n = 1) %>% pull(`Series ID`)
aus_retail %>%
filter(`Series ID` == valid_id) %>%
model(STL(Turnover)) %>%
components() %>%
autoplot()# Fit ARIMA model and forecast
valid_id <- aus_retail %>% distinct(`Series ID`) %>% slice_head(n = 1) %>% pull(`Series ID`)
fit <- aus_retail %>%
filter(`Series ID` == valid_id) %>%
model(ARIMA(Turnover))
forecast(fit, h = 6) %>%
autoplot(aus_retail)# pred 2
valid_id <- aus_retail %>% distinct(`Series ID`) %>% slice_head(n = 1) %>% pull(`Series ID`)
fit <- aus_retail %>%
filter(`Series ID` == valid_id) %>%
model(ARIMA(Turnover))
forecast(fit, h = 6) %>%
autoplot()