This assignment uses time series graphics from the fpp3 framework to explore key features such as trend, seasonality, cyclicity, and unusual observations. The goal is to understand the series in the real-world datasets from economics, health and energy.
library(fpp3)
library(tidyverse)
library(USgas)
library(readxl)
bricks <- aus_production %>%
select(Quarter, Bricks) %>%
drop_na()
#plot bricks
autoplot(bricks, Bricks) +
labs(
title = "Bricks Production in Australia",
x = "Quarter",
y = "Bricks",
)
lynx <- pelt %>%
select(Year, Lynx) %>%
drop_na()
autoplot(lynx, Lynx) +
labs(
title = "Canadian Lynx Trappings",
x = "Year",
y = "Number of Lynx"
)
goog_close <- gafa_stock %>%
filter(Symbol == "GOOG") %>%
select(Date, Close) %>%
drop_na()
autoplot(goog_close, Close) +
labs(
title = "GOOG Stock Closing Price",
x = "Date",
y = "Close Price in USD"
)
demand <- vic_elec %>%
select(Time, Demand) %>%
drop_na()
autoplot(demand, Demand) +
labs(
title = "Electricity Demand in Victoria",
x = "Time",
y = "Demand"
)
## 2.2
unique(gafa_stock$Symbol)
## [1] "AAPL" "AMZN" "FB" "GOOG"
# check the unique symbol
peak_days <- gafa_stock %>%
group_by(Symbol) %>%
filter(Close == max(Close, na.rm=TRUE)) %>%
select(Symbol, Date, Close)
peak_days
## # A tsibble: 4 x 3 [!]
## # Key: Symbol [4]
## # Groups: Symbol [4]
## Symbol Date Close
## <chr> <date> <dbl>
## 1 AAPL 2018-10-03 232.
## 2 AMZN 2018-09-04 2040.
## 3 FB 2018-07-25 218.
## 4 GOOG 2018-07-26 1268.
tute1 <- read.csv("https://raw.githubusercontent.com/vincent-usny/week2/refs/heads/main/tute1.csv")
view(tute1)
mytimeseries <- tute1 |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter)
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line()
Without facet_grid(), all time series will be in one grid. Using
facet_grid() is easier to identify each time series.
?us_total
## starting httpd help server ... done
head(us_total)
## year state y
## 1 1997 Alabama 324158
## 2 1998 Alabama 329134
## 3 1999 Alabama 337270
## 4 2000 Alabama 353614
## 5 2001 Alabama 332693
## 6 2002 Alabama 379343
us_total_ts <- us_total %>%
as_tsibble(
index = year,
key = state
)
new_england <- c("Maine", "Vermont", "New Hampshire",
"Massachusetts", "Connecticut", "Rhode Island")
us_new_england <- us_total_ts %>%
filter(state %in% new_england)
autoplot(us_new_england, y) +
labs(
title = "Annual Gas Consumption in New England",
x = "Year",
y = "Consumption"
)
#a
download.file(
url = "https://github.com/vincent-usny/week2/raw/main/tourism.xlsx",
destfile = "tourism.xlsx",
mode = "wb"
)
tourism <-read_excel("tourism.xlsx")
head(tourism)
## # A tibble: 6 × 5
## Quarter Region State Purpose Trips
## <chr> <chr> <chr> <chr> <dbl>
## 1 1998-01-01 Adelaide South Australia Business 135.
## 2 1998-04-01 Adelaide South Australia Business 110.
## 3 1998-07-01 Adelaide South Australia Business 166.
## 4 1998-10-01 Adelaide South Australia Business 127.
## 5 1999-01-01 Adelaide South Australia Business 137.
## 6 1999-04-01 Adelaide South Australia Business 200.
#b
tourism_ts <- tourism %>%
mutate(Quarter = yearquarter(Quarter)) %>%
as_tsibble(
index = Quarter,
key = c(State, Region, Purpose)
)
tourism_ts
## # A tsibble: 24,320 x 5 [1Q]
## # Key: State, Region, Purpose [304]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Canberra ACT Business 150.
## 2 1998 Q2 Canberra ACT Business 99.9
## 3 1998 Q3 Canberra ACT Business 130.
## 4 1998 Q4 Canberra ACT Business 102.
## 5 1999 Q1 Canberra ACT Business 95.5
## 6 1999 Q2 Canberra ACT Business 229.
## 7 1999 Q3 Canberra ACT Business 109.
## 8 1999 Q4 Canberra ACT Business 159.
## 9 2000 Q1 Canberra ACT Business 105.
## 10 2000 Q2 Canberra ACT Business 202.
## # ℹ 24,310 more rows
#c
max_avg <- tourism_ts %>%
group_by(Region, Purpose) %>%
summarise(avg_trips = mean(Trips, na.rm=TRUE)) %>%
arrange(desc(avg_trips)) %>%
slice(1)
#d
tourism_state <- tourism_ts %>%
index_by(Quarter) %>%
group_by(State) %>%
summarise(TotalTrips = sum(Trips, na.rm = TRUE)) %>%
ungroup() %>%
as_tsibble(index = Quarter, key = State)
tourism_state
## # A tsibble: 640 x 3 [1Q]
## # Key: State [8]
## State Quarter TotalTrips
## <chr> <qtr> <dbl>
## 1 ACT 1998 Q1 551.
## 2 ACT 1998 Q2 416.
## 3 ACT 1998 Q3 436.
## 4 ACT 1998 Q4 450.
## 5 ACT 1999 Q1 379.
## 6 ACT 1999 Q2 558.
## 7 ACT 1999 Q3 449.
## 8 ACT 1999 Q4 595.
## 9 ACT 2000 Q1 600.
## 10 ACT 2000 Q2 557.
## # ℹ 630 more rows
#1. Total Private
us_private <- us_employment %>%
filter(Title == "Total Private") %>%
select(Month, Employed)
autoplot(us_private, Employed)
gg_season(us_private, Employed)
## Warning: `gg_season()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_season()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
gg_subseries(us_private, Employed)
## Warning: `gg_subseries()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_subseries()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
gg_lag(us_private, Employed, lag=12)
## Warning: `gg_lag()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_lag()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ACF(us_private, Employed)
## # A tsibble: 29 x 2 [1M]
## lag acf
## <cf_lag> <dbl>
## 1 1M 0.997
## 2 2M 0.993
## 3 3M 0.990
## 4 4M 0.986
## 5 5M 0.983
## 6 6M 0.980
## 7 7M 0.977
## 8 8M 0.974
## 9 9M 0.971
## 10 10M 0.968
## # ℹ 19 more rows
#2. Bricks
bricks <- aus_production %>%
select(Quarter, Bricks) %>%
drop_na(Bricks)
autoplot(bricks, Bricks)
gg_season(bricks, Bricks)
gg_subseries(bricks, Bricks)
gg_lag(bricks, Bricks, lag = 4)
ACF(bricks, Bricks)
## # A tsibble: 22 x 2 [1Q]
## lag acf
## <cf_lag> <dbl>
## 1 1Q 0.900
## 2 2Q 0.815
## 3 3Q 0.813
## 4 4Q 0.828
## 5 5Q 0.720
## 6 6Q 0.642
## 7 7Q 0.655
## 8 8Q 0.692
## 9 9Q 0.609
## 10 10Q 0.556
## # ℹ 12 more rows
#3. Hare
hare <- pelt %>%
select(Year, Hare)
autoplot(hare, Hare)
gg_lag(hare, Hare, lag = 1)
ACF(hare, Hare)
## # A tsibble: 19 x 2 [1Y]
## lag acf
## <cf_lag> <dbl>
## 1 1Y 0.658
## 2 2Y 0.214
## 3 3Y -0.155
## 4 4Y -0.401
## 5 5Y -0.493
## 6 6Y -0.401
## 7 7Y -0.168
## 8 8Y 0.113
## 9 9Y 0.307
## 10 10Y 0.340
## 11 11Y 0.296
## 12 12Y 0.206
## 13 13Y 0.0372
## 14 14Y -0.153
## 15 15Y -0.285
## 16 16Y -0.295
## 17 17Y -0.202
## 18 18Y -0.0676
## 19 19Y 0.0956
#4. PBS HO2
pbs_h02 <- PBS %>%
filter(
ATC1 == "H",
ATC2 == "H02",
Concession == "General",
Type == "Co-payments") %>%
select(Month, Cost)
autoplot(pbs_h02, Cost)
gg_season(pbs_h02, Cost)
gg_subseries(pbs_h02, Cost)
gg_lag(pbs_h02, Cost, lag = 12)
ACF(pbs_h02, Cost)
## # A tsibble: 23 x 2 [1M]
## lag acf
## <cf_lag> <dbl>
## 1 1M 0.525
## 2 2M 0.484
## 3 3M 0.417
## 4 4M 0.386
## 5 5M 0.320
## 6 6M 0.357
## 7 7M 0.346
## 8 8M 0.239
## 9 9M 0.272
## 10 10M 0.197
## # ℹ 13 more rows
#5. Gasoline
gasoline <- us_gasoline %>%
select(Week, Barrels)
autoplot(gasoline, Barrels)
gg_season(gasoline, Barrels)
gg_subseries(gasoline, Barrels)
gg_lag(gasoline, Barrels, lag = 52)
ACF(gasoline, Barrels)
## # A tsibble: 31 x 2 [1W]
## lag acf
## <cf_lag> <dbl>
## 1 1W 0.893
## 2 2W 0.882
## 3 3W 0.873
## 4 4W 0.866
## 5 5W 0.847
## 6 6W 0.844
## 7 7W 0.832
## 8 8W 0.831
## 9 9W 0.822
## 10 10W 0.808
## # ℹ 21 more rows
Seasonality is strong is total private employment, PBS H02 cost and US gasoline, but none in Hare. Hare is strong in cyclicity. Employment and PBS costs show strong upward trend. Employment and gasoline peak in specific seasons each year. Unusual years vary in each session.
The analysis shows that many series have strong trends and seasonal patterns, especially employment, pharmaceutical costs, and gasoline consumption. rick production displays seasonal behavior with long-term decline, while hare pelts show clear multi-year cycles without seasonality.