This assignment explores different time series datasets from the
fpp3
package and related sources.
I used simple R code with explanations to answer each question.
tute1
is read directly from the book’s website to avoid
missing file issues.
We look at Bricks (aus_production), Lynx (pelt), Close (gafa_stock), and Demand (vic_elec).
# Extract series
bricks <- aus_production |> select(Quarter, Bricks)
lynx <- pelt |> select(Year, Lynx)
gafa <- gafa_stock |> select(Symbol, Date, Close)
demand <- vic_elec |> select(Time, Demand)
# Time intervals
interval(bricks)
## <interval[1]>
## [1] 1Q
interval(lynx)
## <interval[1]>
## [1] 1Y
interval(gafa)
## <interval[1]>
## [1] !
interval(demand)
## <interval[1]>
## [1] 30m
# Plots
autoplot(bricks)
## Plot variable not specified, automatically selected `.vars = Bricks`
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
autoplot(lynx)
## Plot variable not specified, automatically selected `.vars = Lynx`
autoplot(gafa, Close)
autoplot(demand) +
labs(title = "Electricity Demand in Victoria",
x = "Time", y = "Demand (MW)")
## Plot variable not specified, automatically selected `.vars = Demand`
gafa_stock |>
group_by(Symbol) |>
filter(Close == max(Close)) |>
select(Symbol, Date, Close)
## # A tsibble: 4 x 3 [!]
## # Key: Symbol [4]
## # Groups: Symbol [4]
## Symbol Date Close
## <chr> <date> <dbl>
## 1 AAPL 2018-10-03 232.
## 2 AMZN 2018-09-04 2040.
## 3 FB 2018-07-25 218.
## 4 GOOG 2018-07-26 1268.
tute1
is not bundled in fpp3, so we load it directly
from the book website.
tute1 <- read_csv("https://otexts.com/fpp3/extrafiles/tute1.csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): Sales, AdBudget, GDP
## date (1): Quarter
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mytimeseries <- tute1 |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter)
# Plot with facets
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +
facet_grid(name ~ ., scales = "free_y")
# Without facets
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line()
usgas <- us_total |> as_tsibble(index = year, key = state)
usgas |>
filter(state %in% c("Maine","Vermont","New Hampshire",
"Massachusetts","Connecticut","Rhode Island")) |>
autoplot(y) +
labs(title="US Natural Gas Consumption - New England")
tourism
## # A tsibble: 24,320 x 5 [1Q]
## # Key: Region, State, Purpose [304]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
## 7 1999 Q3 Adelaide South Australia Business 169.
## 8 1999 Q4 Adelaide South Australia Business 134.
## 9 2000 Q1 Adelaide South Australia Business 154.
## 10 2000 Q2 Adelaide South Australia Business 169.
## # ℹ 24,310 more rows
# Find combination with max average trips
tourism |>
group_by(Region, Purpose) |>
summarise(avg_trips = mean(Trips, na.rm=TRUE)) |>
arrange(desc(avg_trips))
## Warning: Current temporal ordering may yield unexpected results.
## ℹ Suggest to sort by `Region`, `Purpose`, `Quarter` first.
## # A tsibble: 24,320 x 4 [1Q]
## # Key: Region, Purpose [304]
## # Groups: Region [76]
## Region Purpose Quarter avg_trips
## <chr> <chr> <qtr> <dbl>
## 1 Melbourne Visiting 2017 Q4 985.
## 2 Sydney Business 2001 Q4 948.
## 3 Sydney Visiting 2016 Q4 921.
## 4 Sydney Visiting 2017 Q4 920.
## 5 Sydney Visiting 2017 Q1 916.
## 6 South Coast Holiday 1998 Q1 915.
## 7 North Coast NSW Holiday 2016 Q1 906.
## 8 Sydney Business 2017 Q3 892.
## 9 Sydney Business 2017 Q2 884.
## 10 Sydney Visiting 2013 Q4 882.
## # ℹ 24,310 more rows
# Aggregate by State (fixed version using index_by)
tourism |>
index_by(Quarter) |>
group_by(State) |>
summarise(Trips = sum(Trips))
## # A tsibble: 640 x 3 [1Q]
## # Key: State [8]
## State Quarter Trips
## <chr> <qtr> <dbl>
## 1 ACT 1998 Q1 551.
## 2 ACT 1998 Q2 416.
## 3 ACT 1998 Q3 436.
## 4 ACT 1998 Q4 450.
## 5 ACT 1999 Q1 379.
## 6 ACT 1999 Q2 558.
## 7 ACT 1999 Q3 449.
## 8 ACT 1999 Q4 595.
## 9 ACT 2000 Q1 600.
## 10 ACT 2000 Q2 557.
## # ℹ 630 more rows
aus_arrivals |>
autoplot(Arrivals) +
labs(title="International Arrivals to Australia")
aus_arrivals |> gg_season(Arrivals)
## Warning: `gg_season()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_season()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
aus_arrivals |> gg_subseries(Arrivals)
## Warning: `gg_subseries()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_subseries()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
set.seed(12345678)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
autoplot(myseries, Turnover)
gg_season(myseries, Turnover)
gg_subseries(myseries, Turnover)
gg_lag(myseries, Turnover)
## Warning: `gg_lag()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_lag()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ACF(myseries, Turnover) |> autoplot()
# US Employment – Total Private
us_employment |>
filter(Title=="Total Private") |>
autoplot(Employed)
# Bricks
autoplot(aus_production, Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
# Hare
autoplot(pelt, Hare)
# PBS H02 Cost
PBS |>
filter(ATC2=="H02") |>
autoplot(Cost)
# US Gasoline
autoplot(us_gasoline, Barrels)
vic_pigs <- aus_livestock |>
filter(Animal=="Pigs", State=="Victoria",
between(Month, yearmonth("1990 Jan"), yearmonth("1995 Dec")))
autoplot(vic_pigs, Count)
ACF(vic_pigs, Count) |> autoplot()
dgoog <- gafa_stock |>
filter(Symbol=="GOOG", year(Date)>=2018) |>
mutate(trading_day = row_number()) |>
update_tsibble(index=trading_day, regular=TRUE) |>
mutate(diff = difference(Close))
autoplot(dgoog, diff)
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
ACF(dgoog, diff) |> autoplot()
This assignment explored multiple datasets, time intervals, and
visualization techniques using fpp3
.
I practiced using autoplot
, gg_season
,
gg_subseries
, gg_lag
, and ACF
to
study seasonality, cyclicity, and unusual patterns in time series.