library(fpp3)
# Find the peak closing price date(s) for each stock
gafa_stock %>%
group_by(Symbol) %>%
filter(Close == max(Close, na.rm = TRUE)) %>%
arrange(Symbol, Date) %>%
select(Symbol, Date, Close)
## # A tsibble: 4 x 3 [!]
## # Key: Symbol [4]
## # Groups: Symbol [4]
## Symbol Date Close
## <chr> <date> <dbl>
## 1 AAPL 2018-10-03 232.
## 2 AMZN 2018-09-04 2040.
## 3 FB 2018-07-25 218.
## 4 GOOG 2018-07-26 1268.
group_by(Symbol) handles each stock separately (GOOG,
AAPL, FB/META, AMZN depending on the dataset version)filter(Close == max(Close)) keeps only the rows where
Close is at the maximum for that stock#explanation > I grouped the data by stock symbol and filtered each group to keep only rows where the closing price equals the maximum closing price for that stock. This returns the date(s) when each stock reached its peak closing price.
library(fpp3)
library(readr)
# Read the file you uploaded
tute1 <- read_csv("tute1.csv")
#View(tute1)
# Convert to quarterly tsibble
mytimeseries <- tute1 |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter)
# Plot each series in its own panel (recommended)
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +
facet_grid(name ~ ., scales = "free_y")
# What happens without facet_grid(): all on one axis
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line()
- With
facet_grid(), each variable gets its own
panel and its own y-scale, so you can clearly see the movement
in Sales, AdBudget, and
GDP. - Without facet_grid(), all three
series share one y-axis, so the series with larger
values dominates and the smaller one(s) look flatter / harder to
compare.
library(fpp3)
library(USgas)
# 1) Create a tsibble: year = index, state = key
gas_ts <- us_total %>%
as_tsibble(index = year, key = state)
# 2) Filter to New England states
new_england_states <- c(
"Maine", "Vermont", "New Hampshire",
"Massachusetts", "Connecticut", "Rhode Island"
)
gas_ne <- gas_ts %>%
filter(state %in% new_england_states)
# 3) Plot annual natural gas consumption by state (faceted)
autoplot(gas_ne, y) +
facet_wrap(~ state, scales = "free_y") +
labs(
title = "Annual Natural Gas Consumption by State (New England)",
x = "Year",
y = "Natural Gas Consumption"
)
#Exalanation I installed the USgas package, converted us_total into a
tsibble using year as the index and state as the key, filtered the six
New England states, and plotted annual natural gas consumption by state
using faceted time plots.
library(fpp3)
library(readxl)
# 1) Read the Excel file you uploaded
tourism_xlsx <- read_excel("tourism.xlsx")
# Check column names (helps confirm structure)
names(tourism_xlsx)
## [1] "Quarter" "Region" "State" "Purpose" "Trips"
# 2) Create a tsibble identical to the built-in tourism tsibble
# (Quarter index + Region/State/Purpose keys)
tourism_ts <- tourism_xlsx |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter, key = c(Region, State, Purpose))
# 3) Region + Purpose with the maximum average Trips
max_region_purpose <- tourism_ts |>
group_by(Region, Purpose) |>
summarise(avg_trips = mean(Trips, na.rm = TRUE), .groups = "drop") |>
arrange(desc(avg_trips)) |>
slice(1)
max_region_purpose
## # A tsibble: 1 x 4 [1Q]
## # Key: Region, Purpose [1]
## Region Purpose Quarter avg_trips
## <chr> <chr> <qtr> <dbl>
## 1 Melbourne Visiting 2017 Q4 985.
# 4) New tsibble: total Trips by State (combine Regions + Purposes)
tourism_state_total <- tourism_ts |>
group_by(State) |>
index_by(Quarter) |>
summarise(Trips = sum(Trips, na.rm = TRUE)) |>
as_tsibble(index = Quarter, key = State)
tourism_state_total
## # A tsibble: 640 x 3 [1Q]
## # Key: State [8]
## State Quarter Trips
## <chr> <qtr> <dbl>
## 1 ACT 1998 Q1 551.
## 2 ACT 1998 Q2 416.
## 3 ACT 1998 Q3 436.
## 4 ACT 1998 Q4 450.
## 5 ACT 1999 Q1 379.
## 6 ACT 1999 Q2 558.
## 7 ACT 1999 Q3 449.
## 8 ACT 1999 Q4 595.
## 9 ACT 2000 Q1 600.
## 10 ACT 2000 Q2 557.
## # ℹ 630 more rows
library(fpp3)
# ----------------------------
# 1) US employment: "Total Private"
# ----------------------------
emp <- us_employment %>%
filter(Title == "Total Private") %>%
select(Month, Employed)
autoplot(emp, Employed) +
labs(title = "US Employment: Total Private", x = "Month", y = "Employed")
gg_season(emp, Employed) +
labs(title = "Season plot: US Employment (Total Private)")
gg_subseries(emp, Employed) +
labs(title = "Subseries plot: US Employment (Total Private)")
gg_lag(emp, Employed, lags = 1:12) +
labs(title = "Lag plots: US Employment (Total Private)")
emp %>% ACF(Employed) %>% autoplot() +
labs(title = "ACF: US Employment (Total Private)")
# ----------------------------
# 2) Bricks from aus_production
# ----------------------------
bricks <- aus_production %>% select(Bricks)
autoplot(bricks, Bricks) +
labs(title = "Australia Production: Bricks", x = "Quarter", y = "Bricks")
gg_season(bricks, Bricks) +
labs(title = "Season plot: Bricks (aus_production)")
gg_subseries(bricks, Bricks) +
labs(title = "Subseries plot: Bricks (aus_production)")
gg_lag(bricks, Bricks, lags = 1:8) +
labs(title = "Lag plots: Bricks (aus_production)")
bricks %>% ACF(Bricks) %>% autoplot() +
labs(title = "ACF: Bricks (aus_production)")
# ----------------------------
# 3) Hare from pelt
# ----------------------------
hare <- pelt %>% select(Year, Hare)
autoplot(hare, Hare) +
labs(title = "Pelt: Hare", x = "Year", y = "Hare")
gg_lag(hare, Hare, lags = 1:12) +
labs(title = "Lag plots: Hare (pelt)")
hare %>% ACF(Hare) %>% autoplot() +
labs(title = "ACF: Hare (pelt)")
# (Season/subseries plots are not meaningful for yearly data, so we skip them.)
# ----------------------------
# 4) PBS: H02 Cost
# ----------------------------
pbs_h02 <- PBS %>%
filter(ATC2 == "H02")
autoplot(pbs_h02, Cost) +
labs(title = "PBS: H02 Cost", x = "Month", y = "Cost")
gg_season(pbs_h02, Cost) +
labs(title = "Season plot: PBS H02 Cost")
gg_subseries(pbs_h02, Cost) +
labs(title = "Subseries plot: PBS H02 Cost")
# Pick ONE PBS H02 series so gg_lag/ACF will work
pbs_h02_one <- pbs_h02 %>%
group_by(Concession, Type) %>%
group_split() %>%
.[[1]] %>%
ungroup()
gg_lag(pbs_h02_one, Cost, lags = 1:12) +
labs(title = "Lag plots: PBS H02 Cost (single series)")
pbs_h02_one %>% ACF(Cost) %>% autoplot() +
labs(title = "ACF: PBS H02 Cost (single series)")
# ----------------------------
# 5) US gasoline: Barrels
# ----------------------------
gas <- us_gasoline %>% select(Week, Barrels)
autoplot(gas, Barrels) +
labs(title = "US Gasoline: Barrels", x = "Week", y = "Barrels")
gg_season(gas, Barrels) +
labs(title = "Season plot: US Gasoline Barrels")
gg_subseries(gas, Barrels) +
labs(title = "Subseries plot: US Gasoline Barrels")
gg_lag(gas, Barrels, lags = 1:12) +
labs(title = "Lag plots: US Gasoline Barrels")
gas %>% ACF(Barrels) %>% autoplot() +
labs(title = "ACF: US Gasoline Barrels")
## Exercise 2.8 – Write-up
These series are not random. Most of them show a combination of trend + seasonality (employment, bricks, PBS cost, gasoline), while hare is mostly driven by longer multi-year cycles. The ACF and lag behavior make it clear that past values are informative for future values, especially for employment and gasoline.
Employment’s seasonal swings are present but smaller compared to the overall long-run growth. Bricks has a strong quarterly seasonal pattern where certain quarters tend to be consistently higher/lower. PBS H02 cost also has seasonality, and the size of the seasonal peaks doesn’t look perfectly constant across time. Gasoline shows a strong repeating yearly seasonal pattern.
Unusual periods show up as sharp spikes or dips relative to the usual seasonal range or trend. Employment and gasoline both have periods that deviate strongly from the surrounding years. PBS H02 cost has some spikes that stand out compared to its typical seasonal pattern. Bricks shows stretches where the level shifts or stays depressed compared to earlier periods, and hare has extreme peaks/troughs at the high and low points of its cycle.