# load libraries
library(fpp3)
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.6
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.4.1
## ✔ lubridate 1.9.3 ✔ fable 0.4.1
## ✔ ggplot2 3.5.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(ggplot2)
library(readxl)
library(tsibble)
library(dplyr)
library(ggplot2)
library(USgas)
# 2.1 : Bricks
?aus_production
bricks_data <- aus_production %>% select(Quarter, Bricks)
time_interval_bricks <- range(bricks_data$Quarter)
time_interval_bricks
## <yearquarter[2]>
## [1] "1956 Q1" "2010 Q2"
## # Year starts on: January
autoplot(aus_production, Bricks) +
ggtitle("Bricks Production in Australia") +
xlab("Year") + ylab("Bricks Produced")
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
# 2.1 : Lynx
?pelt
lynx_data <- pelt %>% select(Year, Lynx)
time_interval_lynx <- range(lynx_data$Year)
time_interval_lynx
## [1] 1845 1935
autoplot(pelt, Lynx) +
ggtitle("Lynx Population Over Time") +
xlab("Year") + ylab("Lynx Count")
# 2.1 : Close
?gafa_stock
close_data <- gafa_stock %>% select(Date, Close)
time_interval_close <- range(close_data$Date)
time_interval_close
## [1] "2014-01-02" "2018-12-31"
autoplot(gafa_stock, Close) +
ggtitle("GAFA Stock Closing Prices") +
xlab("Year") + ylab("Closing Price")
# 2.1 : Demand
?vic_elec
demand_data <- vic_elec %>% select(Time, Demand)
time_interval_demand <- range(demand_data$Time)
time_interval_demand
## [1] "2012-01-01 00:00:00 AEDT" "2014-12-31 23:30:00 AEDT"
autoplot(vic_elec, Demand) +
ggtitle("Electricity Demand in Victoria") +
xlab("Year") + ylab("Electricity Demand (MW)") +
theme_minimal()
#2.2
gafa_tbl <- as_tibble(gafa_stock)
# maximum closing price for each stock
peak_prices <- gafa_tbl %>%
group_by(Symbol) %>%
summarize(Max_Close = max(Close, na.rm = TRUE), .groups = "drop")
# days when closing price matched peak price
peak_days <- gafa_tbl %>%
inner_join(peak_prices, by = "Symbol") %>%
filter(Close == Max_Close) %>%
select(Date, Symbol, Close)
peak_days
## # A tibble: 4 × 3
## Date Symbol Close
## <date> <chr> <dbl>
## 1 2018-10-03 AAPL 232.
## 2 2018-09-04 AMZN 2040.
## 3 2018-07-25 FB 218.
## 4 2018-07-26 GOOG 1268.
#2.3
tute1 <- readr::read_csv("tute1.csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): Sales, AdBudget, GDP
## date (1): Quarter
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(tute1)
# data to time series
mytimeseries <- tute1 |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter)
# time series plots of each of the three series
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +
facet_grid(name ~ ., scales = "free_y")
#2.4
library(USgas)
library(fpp3)
library(ggplot2)
# US natural gas consumption data
data("us_total", package = "USgas")
us_gas_tsibble <- us_total %>%
rename(Consumption = y) %>%
as_tsibble(index = year, key = state)
new_england_states <- c("Maine", "Vermont", "New Hampshire",
"Massachusetts", "Connecticut", "Rhode Island")
new_england_gas <- us_gas_tsibble %>%
filter(state %in% new_england_states)
autoplot(new_england_gas, Consumption) +
ggtitle("Annual Natural Gas Consumption in New England (US)") +
xlab("Year") + ylab("Consumption (Billion Cubic Feet)") +
facet_wrap(~state, scales = "free_y") +
theme_minimal()
#2.5
tourism_data <- read_excel("tourism.xlsx")
head(tourism_data)
## # A tibble: 6 × 5
## Quarter Region State Purpose Trips
## <chr> <chr> <chr> <chr> <dbl>
## 1 1998-01-01 Adelaide South Australia Business 135.
## 2 1998-04-01 Adelaide South Australia Business 110.
## 3 1998-07-01 Adelaide South Australia Business 166.
## 4 1998-10-01 Adelaide South Australia Business 127.
## 5 1999-01-01 Adelaide South Australia Business 137.
## 6 1999-04-01 Adelaide South Australia Business 200.
tourism_tsibble <- tourism_data %>%
mutate(Quarter = yearquarter(Quarter)) %>%
as_tsibble(index = Quarter, key = c(State, Region, Purpose))
glimpse(tourism_tsibble)
## Rows: 24,320
## Columns: 5
## Key: State, Region, Purpose [304]
## $ Quarter <qtr> 1998 Q1, 1998 Q2, 1998 Q3, 1998 Q4, 1999 Q1, 1999 Q2, 1999 Q3,…
## $ Region <chr> "Canberra", "Canberra", "Canberra", "Canberra", "Canberra", "C…
## $ State <chr> "ACT", "ACT", "ACT", "ACT", "ACT", "ACT", "ACT", "ACT", "ACT",…
## $ Purpose <chr> "Business", "Business", "Business", "Business", "Business", "B…
## $ Trips <dbl> 150.19812, 99.93268, 129.56512, 101.69897, 95.52491, 229.05762…
# Region and purpose with max average overnight trips
max_avg_trips <- tourism_tsibble %>%
arrange(Region, Purpose, Quarter) %>%
group_by(Region, Purpose) %>%
summarise(Avg_Trips = mean(Trips, na.rm = TRUE), .groups = "drop") %>%
arrange(desc(Avg_Trips)) %>%
slice(1)
## Warning: Current temporal ordering may yield unexpected results.
## ℹ Suggest to sort by `Region`, `Purpose`, `Quarter` first.
max_avg_trips
## # A tsibble: 1 x 4 [1Q]
## # Key: Region, Purpose [1]
## Region Purpose Quarter Avg_Trips
## <chr> <chr> <qtr> <dbl>
## 1 Melbourne Visiting 2017 Q4 985.
state_tsibble <- tourism_tsibble %>%
index_by(Quarter) %>%
summarise(Total_Trips = sum(Trips, na.rm = TRUE), .groups = "drop")
state_tsibble
## # A tsibble: 80 x 2 [1Q]
## Quarter Total_Trips
## <qtr> <dbl>
## 1 1998 Q1 23182.
## 2 1998 Q2 20323.
## 3 1998 Q3 19827.
## 4 1998 Q4 20830.
## 5 1999 Q1 22087.
## 6 1999 Q2 21458.
## 7 1999 Q3 19914.
## 8 1999 Q4 20028.
## 9 2000 Q1 22339.
## 10 2000 Q2 19941.
## # ℹ 70 more rows
# 2.8 : Total Private Employment from us_employment
?us_employment
employment_ts <- us_employment %>%
filter(Title == "Total Private") %>%
select(Month, Employed)
# Autoplot
autoplot(employment_ts, Employed) +
ggtitle("Total Private Employment in the US") +
xlab("Year") + ylab("Employed (millions)")
# Seasonal plot
gg_season(employment_ts, Employed) +
ggtitle("Seasonal Plot: Total Private Employment")
# Subseries plot
gg_subseries(employment_ts, Employed) +
ggtitle("Subseries Plot: Total Private Employment")
# Lag plot
gg_lag(employment_ts, Employed, geom = "point")
# Autocorrelation
ACF(employment_ts, Employed) %>% autoplot()
- upward trend in private employment from 1940 to 2023 - employment
numbers steadily increase, reflecting economic growth over time -
seasonal variation is relatively small
# 2.8 : Bricks from aus_production
?aus_production
# Autoplot
autoplot(aus_production, Bricks) +
ggtitle("Bricks Production in Australia") +
xlab("Year") + ylab("Production (millions)")
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
# Seasonal plot
gg_season(aus_production, Bricks) +
ggtitle("Seasonal Plot: Bricks Production")
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
# Subseries plot
gg_subseries(aus_production, Bricks) +
ggtitle("Subseries Plot: Bricks Production")
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).
# Lag plot
gg_lag(aus_production, Bricks, geom = "point")
## Warning: Removed 20 rows containing missing values (gg_lag).
# Autocorrelation
ACF(aus_production, Bricks) %>% autoplot()
- There is a clear growth trend up to 1980, followed by cyclical
fluctuations and volatility. - Bricks production exhibits strong
seasonal patterns, with higher output in Q2 and Q3.
# 2.8 : Hare Population from pelt
?pelt
# Autoplot - General Trend
autoplot(pelt, Hare) +
ggtitle("Hare Population Over Time") +
xlab("Year") + ylab("Hare Count")
# Lag plot - Checks for cyclic dependence
gg_lag(pelt, Hare, geom = "point") +
ggtitle("Lag Plot: Hare Population")
# Autocorrelation function (ACF) to check periodicity
ACF(pelt, Hare) %>% autoplot() +
ggtitle("Autocorrelation: Hare Population")
- The hare population follows cyclic fluctuations, with peaks and
crashes roughly every 10 years
#2.8: H02 Cost from PBS (Pharmaceutical Benefit Scheme)
?PBS
pbs_h02 <- PBS %>%
filter(ATC2 == "H02") %>%
index_by(Month) %>%
summarise(Cost = sum(Cost, na.rm = TRUE)) %>%
as_tsibble(index = Month)
# Autoplot - Trend Analysis
autoplot(pbs_h02, Cost) +
ggtitle("H02 Drug Costs Over Time") +
xlab("Year") + ylab("Cost (millions)")
# Seasonal Plot
gg_season(pbs_h02, Cost) +
ggtitle("Seasonal Plot: H02 Costs")
# Subseries Plot
gg_subseries(pbs_h02, Cost) +
ggtitle("Subseries Plot: H02 Costs")
# Lag Plot
gg_lag(pbs_h02, Cost, geom = "point") +
ggtitle("Lag Plot: H02 Costs")
# Autocorrelation Function (ACF)
ACF(pbs_h02, Cost) %>% autoplot() +
ggtitle("Autocorrelation: H02 Costs")
- H02 drug costs show long-term growth with seasonal fluctuations. -
Drug costs follow a predictable annual cycle, with peaks in January and
December.
#2.8: Barrels from us_gasoline
?us_gasoline
# Autoplot
autoplot(us_gasoline, Barrels) +
ggtitle("US Gasoline Production Over Time") +
xlab("Year") + ylab("Barrels (millions)")
# Seasonal plot
gg_season(us_gasoline, Barrels) +
ggtitle("Seasonal Plot: US Gasoline Production")
# Subseries plot
gg_subseries(us_gasoline, Barrels) +
ggtitle("Subseries Plot: US Gasoline Production")
# Lag plot
gg_lag(us_gasoline, Barrels, geom = "point")
# Autocorrelation
ACF(us_gasoline, Barrels) %>% autoplot()
- Upward trend
- Strong weekly/seasonal