library(lubridate)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
library(fpp3)
## Warning: package 'fpp3' was built under R version 4.4.3
## Warning: package 'tsibble' was built under R version 4.4.3
## Warning: package 'tsibbledata' was built under R version 4.4.3
## Warning: package 'feasts' was built under R version 4.4.3
## Warning: package 'fabletools' was built under R version 4.4.3
## Warning: package 'fable' was built under R version 4.4.3
library(tsibble)
library(forecast)
## Warning: package 'forecast' was built under R version 4.4.3
A.Use ? (or help()) to find out about the data in each series. B.What is the time interval of each series? C.Use autoplot() to produce a time plot of each series. D.For the last plot, modify the axis labels and title.
#2.1.A
data("aus_production")
?aus_production
## starting httpd help server ... done
data("pelt")
?pelt
data("gafa_stock")
?gafa_stock
data("vic_elec")
?vic_elec
#2.1.B
library(fpp3)
library(tsibble)
library(lubridate)
# helper: get start/end year from a tsibble index (works for all 4 datasets)
year_range <- function(x) {
idx <- x[[tsibble::index_var(x)]]
c(year(min(idx, na.rm = TRUE)),
year(max(idx, na.rm = TRUE)))
}
aus_rng <- year_range(aus_production) # 1956–2010
pelt_rng <- year_range(pelt) # 1845–1935
## Warning: tz(): Don't know how to compute timezone for object of class numeric;
## returning "UTC".
## Warning: tz(): Don't know how to compute timezone for object of class numeric;
## returning "UTC".
gafa_rng <- year_range(gafa_stock) # 2014–2018
vic_rng <- year_range(vic_elec) # 2012–2014
cat("The time interval for aus_production is Quarterly and it extends from",
aus_rng[1], "to", aus_rng[2], ".\n")
## The time interval for aus_production is Quarterly and it extends from 1956 to 2010 .
cat("The time interval for pelt is Yearly and it extends from",
pelt_rng[1], "to", pelt_rng[2], ".\n")
## The time interval for pelt is Yearly and it extends from 1970 to 1970 .
cat("The time interval for gafa_stock is every Business day when the market is open and it extends from",
gafa_rng[1], "to", gafa_rng[2], ".\n")
## The time interval for gafa_stock is every Business day when the market is open and it extends from 2014 to 2018 .
cat("The time interval for vic_elec is every 30 minutes and it extends from",
vic_rng[1], "to", vic_rng[2], ".\n")
## The time interval for vic_elec is every 30 minutes and it extends from 2012 to 2014 .
#2.1.C
aus_production_yearly <- aus_production |>
index_by(Year = year(Quarter)) |>
summarise(Bricks = sum(Bricks, na.rm = TRUE))
aus_production_yearly |>
autoplot(Bricks) +
geom_line(color = "steelblue", linewidth = 1.2) +
geom_point(color = "darkred", size = 1.5) +
labs(
title = "Annual Bricks Production in Australia",
x = "Year",
y = "Bricks (Annual Total)"
)
pelt |>
autoplot(Lynx) +
geom_line(color = "forestgreen", linewidth = 1.2) +
geom_point(color = "orange", size = 1.2) +
labs(
title = "Annual Lynx Trappings",
x = "Year",
y = "Number of Lynx"
)
gafa_stock |>
filter(Symbol == "GOOG") |>
autoplot(Close) +
geom_line(color = "purple", linewidth = 1.1) +
labs(
title = "GOOG Daily Closing Price",
x = "Year",
y = "Closing Price (USD)"
)
#2.1.C& D
vic_elec |>
autoplot(Demand) +
geom_line(color = "dodgerblue4", linewidth = 0.8) +
labs(
title = "Half-hourly Electricity Demand in Victoria",
x = "Time",
y = "Electricity Demand (MW)"
)
library(fpp3)
gafa_stock |>
group_by(Symbol) |>
filter(Close == max(Close, na.rm = TRUE)) |>
select(Symbol, Date, Close)
Apple (AAPL) reached its peak closing price of $232.07 on October 3, 2018.
Amazon (AMZN) reached its peak closing price of $2,039.51 on September 4, 2018.
Facebook (FB) reached its peak closing price of $217.50 on July 25, 2018.
Google (GOOG) reached its peak closing price of $1,268.33 on July 26, 2018.
These results indicate that all four stocks achieved their maximum closing prices in mid-to-late 2018, near the end of the dataset’s time span. This clustering suggests a period of strong performance for major technology stocks before subsequent market declines later in 2018.
library(readr)
tute1 <- readr::read_csv(
"https://raw.githubusercontent.com/tanzil64/DATA624/main/tute1.csv",
show_col_types = FALSE
)
head(tute1)
mytimeseries <- tute1 |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter)
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +
facet_grid(name ~ ., scales = "free_y")
# WHAT HAPPENS WHEN FACET_GRID IS NOT ADDED:
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line()
#FINDINGS:
When facet_grid() is not used, all three time series (Sales, AdBudget, and GDP) are plotted on the same y-axis. Because GDP has values that are much larger in magnitude than the other two series, it determines the scale of the axis. As a result, the variations in Sales and AdBudget become visually compressed and appear almost flat. This phenomenon occurs due to differences in scale, not because Sales or AdBudget are constant. Using facet_grid() separates the series into different panels with independent y-scales, allowing the underlying patterns in each series to be clearly observed.
#2.4.A Install the USgas package
library(USgas)
## Warning: package 'USgas' was built under R version 4.4.3
library(fpp3)
data("us_total")
head(us_total)
#2.4.B
library(USgas)
library(fpp3)
data("us_total")
us_total_ts <- us_total |>
as_tsibble(index = year, key = state)
us_total_ts
#2.4.C
library(fpp3)
us_total_ts <- us_total |>
as_tsibble(index = year, key = state)
new_england <- c("Maine", "Vermont", "New Hampshire",
"Massachusetts", "Connecticut", "Rhode Island")
us_total_ts |>
filter(state %in% new_england) |>
autoplot(y) +
labs(
title = "Annual Natural Gas Consumption by State (New England)",
x = "Year",
y = "Natural Gas Consumption"
)
###2.5
https://github.com/tanzil64/DATA624/blob/main/tourism.xlsx
#2.5.A
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(readr)
tourism_raw <- readr::read_csv(
"https://raw.githubusercontent.com/tanzil64/DATA624/main/tourism.csv",
show_col_types = FALSE
)
#2.3.B
tourism_ts <- tourism_raw |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(
index = Quarter,
key = c(Region, State, Purpose)
)
tourism_ts
#2.5.C
max_region_purpose <- tourism_ts |>
group_by(Region, Purpose) |>
summarise(avg_trips = mean(Trips, na.rm = TRUE), .groups = "drop") |>
arrange(desc(avg_trips)) |>
slice(1)
## Warning: Current temporal ordering may yield unexpected results.
## ℹ Suggest to sort by `Region`, `Purpose`, `Quarter` first.
max_region_purpose
#2.5.d
state_total_trips <- tourism_ts |>
index_by(Quarter) |>
group_by(State) |>
summarise(Trips = sum(Trips, na.rm = TRUE)) |>
as_tsibble(index = Quarter, key = State)
state_total_trips
#2.8.A
library(fpp3)
us_employment |>
filter(Title == "Total Private") |>
autoplot(Employed)
us_employment |>
filter(Title == "Total Private") |>
gg_season(Employed)
## Warning: `gg_season()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_season()` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
us_employment |>
filter(Title == "Total Private") |>
gg_subseries(Employed)
## Warning: `gg_subseries()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_subseries()` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
us_employment |>
filter(Title == "Total Private") |>
gg_lag(Employed)
## Warning: `gg_lag()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_lag()` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
us_employment |>
filter(Title == "Total Private") |>
ACF(Employed)
Total Private Employed – us_employment
Seasonality: Yes. There is clear annual seasonality, reflecting hiring and layoffs that repeat each year.
Trend: A strong long-term upward trend, showing growth in private employment over time.
Cyclicality: Yes. Employment follows business cycles, expanding during economic booms and contracting during recessions.
Unusual years:
2008–2009: Sharp decline during the Global Financial Crisis.
2020: Large and sudden drop due to COVID-19.
What we learn: Employment is strongly influenced by both long-term economic growth and short-term economic shocks.
#2.8.B
library(fpp3)
aus_production |>
autoplot(Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
aus_production |>
gg_season(Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
aus_production |>
gg_subseries(Bricks)
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).
aus_production |>
gg_lag(Bricks)
## Warning: Removed 20 rows containing missing values (gg_lag).
aus_production |>
ACF(Bricks)
– aus_production
Seasonality: Yes. Strong quarterly seasonality, with regular peaks and troughs each year.
Trend: A long-term decline, especially after the mid-1970s, indicating reduced brick production.
Cyclicality: Yes. Production fluctuates with construction and housing cycles.
Unusual years:
Periods of sharp drops during housing downturns and economic slowdowns.
What we learn: Brick production is seasonal and highly sensitive to the construction sector and broader economic conditions.
#2.8.C
library(fpp3)
pelt |>
autoplot(Hare)
pelt |>
gg_lag(Hare)
pelt |>
ACF(Hare)
Hare – pelt
Seasonality: No. The data are annual, so there is no within-year seasonality.
Trend: No clear long-term trend.
Cyclicality: Very strong cyclical behavior, with peaks roughly every 10 years, consistent with predator–prey population dynamics.
Unusual years:
Extreme population peaks and crashes, which are natural biological fluctuations.
What we learn: The hare population is dominated by biological cycles rather than trend or seasonality.
library(fpp3)
PBS |>
filter(ATC2 == "H02") |>
summarise(Cost = sum(Cost)) |>
autoplot(Cost)
PBS |>
filter(ATC2 == "H02") |>
summarise(Cost = sum(Cost)) |>
gg_season(Cost)
PBS |>
filter(ATC2 == "H02") |>
summarise(Cost = sum(Cost)) |>
gg_subseries(Cost)
PBS |>
filter(ATC2 == "H02") |>
summarise(Cost = sum(Cost)) |>
ACF(Cost)
H02 Cost – PBS
Seasonality: Yes. Very strong annual seasonality, with consistent monthly patterns.
Trend: A strong upward trend, indicating rising pharmaceutical costs over time.
Cyclicality: Some medium-term fluctuations, often linked to policy changes or shifts in demand.
Unusual years:
Sudden jumps in cost during years of policy reforms or pricing changes.
What we learn: Health expenditure grows over time and shows predictable seasonal usage patterns.
#2.8.E.
library(fpp3)
us_gasoline |>
autoplot(Barrels)
us_gasoline |>
gg_season(Barrels)
us_gasoline |>
gg_subseries(Barrels)
us_gasoline |>
gg_lag(Barrels)
us_gasoline |>
ACF(Barrels)
us_gasoline
Seasonality: Yes. Strong annual seasonality, with higher usage in summer months.
Trend: A moderate upward trend, followed by flattening in later years.
Cyclicality: Yes. Consumption responds to economic cycles.
Unusual years:
Sharp drops during recessions (e.g., 2008–2009).
Noticeable decline around 2020 due to reduced travel.
What we learn: Gasoline consumption is highly seasonal and sensitive to economic activity and mobility.