{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE)
# Function to install a package if not already installed
install_if_needed <- function(pkg) {
if (!requireNamespace(pkg, quietly = TRUE)) {
install.packages(pkg)
}
}
# List of packages to check and install if necessary
required_packages <- c("fpp3", "dplyr", "ggplot2", "lubridate", "tsibble",
"tsibbledata", "feasts", "fable", "fabletools",
"curl", "USgas", "readxl", "readr", "tidyr")
# Loop through the list and install packages only if needed
for (pkg in required_packages) {
install_if_needed(pkg)
}
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
# Function to suppress package startup messages
suppressPackageStartupMessages({
library(fpp3)
library(dplyr)
library(ggplot2)
library(lubridate)
library(tsibble)
library(tsibbledata)
library(feasts)
library(fable)
library(fabletools)
library(readr)
library(readxl)
library(tidyr)
})
?aus_production
## starting httpd help server ... done
?pelt
?gafa_stock
?vic_elec
interval_info <- interval(aus_production)
sprintf("The time interval of aus_production is quarterly: %s quarters", interval_info$quarter)
## [1] "The time interval of aus_production is quarterly: 1 quarters"
interval_info <- interval(pelt)
sprintf("The time interval of pelt is yearly: %s year", interval_info$year)
## [1] "The time interval of pelt is yearly: 1 year"
library(tsibble)
library(lubridate)
interval_info <- interval(gafa_stock)
print("The time interval of gafa_stock is irregular")
## [1] "The time interval of gafa_stock is irregular"
interval_info
## <interval[1]>
## [1] !
interval_info <- interval(vic_elec)
sprintf("The time interval of vic_elec is every: %s minutes", interval_info$minute)
## [1] "The time interval of vic_elec is every: 30 minutes"
aus_production |>
filter(!is.na(Bricks)) |> # Filter out missing values
autoplot(Bricks) + # Explicitly specify the 'Bricks' column
labs(title = "Bricks Production Over Time",
x = "Quarter",
y = "Bricks")
pelt |>
select(Lynx) |>
autoplot(Lynx) + # Explicitly specify 'Lynx' column
labs(title = "Lynx Pelt Production Over Time",
x = "Year",
y = "Number of Pelts")
gafa_stock |>
select(Close) |>
autoplot(Close) + # Explicitly specify 'Close' column for plotting
labs(title = "GAFA Stock Closing Prices Over Time",
x = "Date",
y = "Closing Price")
vic_elec |>
autoplot(Demand) + # Explicitly specify 'Demand' column
labs(x = "Time",
y = "Electricity Demand",
title = "Half-Hourly Electricity Demand")
Use filter() to find what days corresponded to the peak closing price for each of the four stocks in gafa_stock.
# Display the first 5 rows of the gafa_stock dataset to inspect the stock data
head(gafa_stock, n = 5)
## # A tsibble: 5 x 8 [!]
## # Key: Symbol [1]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2014-01-02 79.4 79.6 78.9 79.0 67.0 58671200
## 2 AAPL 2014-01-03 79.0 79.1 77.2 77.3 65.5 98116900
## 3 AAPL 2014-01-06 76.8 78.1 76.2 77.7 65.9 103152700
## 4 AAPL 2014-01-07 77.8 78.0 76.8 77.1 65.4 79302300
## 5 AAPL 2014-01-08 77.0 77.9 77.0 77.6 65.8 64632400
# Find the days corresponding to the peak closing price for each stock
peak_close <- gafa_stock |>
group_by(Symbol) |> # Group by stock symbol
filter(Close == max(Close)) |> # Filter for the maximum close price for each stock
select(Symbol, Date, Close) # Select relevant columns for output
# View the result
peak_close
## # A tsibble: 4 x 3 [!]
## # Key: Symbol [4]
## # Groups: Symbol [4]
## Symbol Date Close
## <chr> <date> <dbl>
## 1 AAPL 2018-10-03 232.
## 2 AMZN 2018-09-04 2040.
## 3 FB 2018-07-25 218.
## 4 GOOG 2018-07-26 1268.
tute1 <- readr::read_csv('https://raw.githubusercontent.com/hawa1983/DATA-624/main/tute1.csv', show_col_types = FALSE)
head(tute1, n=5)
## # A tibble: 5 × 4
## Quarter Sales AdBudget GDP
## <date> <dbl> <dbl> <dbl>
## 1 1981-03-01 1020. 659. 252.
## 2 1981-06-01 889. 589 291.
## 3 1981-09-01 795 512. 291.
## 4 1981-12-01 1004. 614. 292.
## 5 1982-03-01 1058. 647. 279.
mytimeseries <- tute1 |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter)
head(mytimeseries, n=5)
## # A tsibble: 5 x 4 [1Q]
## Quarter Sales AdBudget GDP
## <qtr> <dbl> <dbl> <dbl>
## 1 1981 Q1 1020. 659. 252.
## 2 1981 Q2 889. 589 291.
## 3 1981 Q3 795 512. 291.
## 4 1981 Q4 1004. 614. 292.
## 5 1982 Q1 1058. 647. 279.
mytimeseries |>
pivot_longer(-Quarter) |> # reshapes the dataset from wide to long format.
ggplot(aes(x = Quarter, y = value, colour = name)) + # Creates a ggplot object to plot the data.
geom_line() + # Adds a line plot for each "name" variable across time (Quarter).
facet_grid(name ~ ., scales = "free_y") # Splits the plot into multiple panels, one for each "name". The "scales = 'free_y'" argument allows each panel to have its own y-axis scale.
c_1. Construct time series plots of each of the three without facet_grid
mytimeseries |>
pivot_longer(-Quarter) |> # reshapes the dataset from wide to long format.
ggplot(aes(x = Quarter, y = value, colour = name)) + # Creates a ggplot object to plot the data.
geom_line() # Adds a line plot for each "name" variable across time (Quarter).
if (!require(USgas)) {
install.packages("USgas")
library(USgas)
}
## Loading required package: USgas
us_total <- us_total |>
as_tsibble(index = year, key = state)
head(us_total, n=5)
## # A tsibble: 5 x 3 [1Y]
## # Key: state [1]
## year state y
## <int> <chr> <int>
## 1 1997 Alabama 324158
## 2 1998 Alabama 329134
## 3 1999 Alabama 337270
## 4 2000 Alabama 353614
## 5 2001 Alabama 332693
us_total |>
filter(state %in% c("Maine", "Vermont", "New Hampshire", "Massachusetts", "Connecticut", "Rhode Island")) |>
autoplot(y, size = 1.0) +
labs(x = "Year",
y = "Natural Gas Consumption",
title = "Annual Natural Gas Consumption")
# Download the file to a temporary location
temp_file <- tempfile(fileext = ".xlsx")
download.file('https://raw.githubusercontent.com/hawa1983/DATA-624/main/tourism.xlsx', temp_file, mode = "wb")
# Read the downloaded Excel file
tourism_data <- readxl::read_excel(temp_file)
# Display the first few rows of the data
head(tourism_data)
## # A tibble: 6 × 5
## Quarter Region State Purpose Trips
## <chr> <chr> <chr> <chr> <dbl>
## 1 1998-01-01 Adelaide South Australia Business 135.
## 2 1998-04-01 Adelaide South Australia Business 110.
## 3 1998-07-01 Adelaide South Australia Business 166.
## 4 1998-10-01 Adelaide South Australia Business 127.
## 5 1999-01-01 Adelaide South Australia Business 137.
## 6 1999-04-01 Adelaide South Australia Business 200.
# Clean up by removing the temporary file
unlink(temp_file)
# Create the tsibble
my_tourism_tsibble <- tourism_data |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(key = c(Region, State, Purpose), index = Quarter)
head(my_tourism_tsibble)
## # A tsibble: 6 x 5 [1Q]
## # Key: Region, State, Purpose [1]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
b1. Determine if the my_tourism_tsibble is equal/identical to the tourism tsibble in the tsibble package
library(dplyr)
# Sort both datasets by relevant columns
all.equal(arrange(my_tourism_tsibble, State, Quarter), arrange(tourism, State, Quarter))
## [1] TRUE
# Calculate average trips by Region and Purpose
average_trips <- my_tourism_tsibble |>
group_by(Region, Purpose) |>
summarise(avg_trips = mean(Trips, na.rm = TRUE)) |>
arrange(desc(avg_trips))
## Warning: Current temporal ordering may yield unexpected results.
## ℹ Suggest to sort by `Region`, `Purpose`, `Quarter` first.
# Find the combination with the maximum average trips
max_avg_trips <- average_trips |>
filter(avg_trips == max(avg_trips))
head(max_avg_trips)
## # A tsibble: 6 x 4 [1Q]
## # Key: Region, Purpose [6]
## # Groups: Region [6]
## Region Purpose Quarter avg_trips
## <chr> <chr> <qtr> <dbl>
## 1 Melbourne Visiting 2017 Q4 985.
## 2 Sydney Business 2001 Q4 948.
## 3 South Coast Holiday 1998 Q1 915.
## 4 North Coast NSW Holiday 2016 Q1 906.
## 5 Brisbane Visiting 2016 Q4 796.
## 6 Gold Coast Holiday 2002 Q1 711.
# Summarize total trips by State and Quarter
state_totals <- my_tourism_tsibble |>
group_by(State) |>
summarise(total_trips = sum(Trips, na.rm = TRUE)) |>
ungroup()
# Convert back into a tsibble with State as the key and Quarter as the index
state_totals_tsibble <- state_totals |>
as_tsibble(key = State, index = Quarter)
head(state_totals_tsibble, n=5)
## # A tsibble: 5 x 3 [1Q]
## # Key: State [1]
## State Quarter total_trips
## <chr> <qtr> <dbl>
## 1 ACT 1998 Q1 551.
## 2 ACT 1998 Q2 416.
## 3 ACT 1998 Q3 436.
## 4 ACT 1998 Q4 450.
## 5 ACT 1999 Q1 379.
# Loading the datasets
data("us_employment")
data("aus_production")
data("pelt")
data("PBS")
data("us_gasoline")
# Plot 1: Total Private Employment in the U.S. (Time Series)
# Extract "Total Private" Employed
us_employment_total_private <- us_employment |>
filter(Title == "Total Private")
# Autoplot to see overall trends
autoplot(us_employment_total_private, Employed) +
labs(title = "Total Private Employment in the U.S.",
y = "Employed",
x = "Month")
# Plot 2: Seasonality of Total Private Employment
gg_season(us_employment_total_private, Employed) +
labs(title = "Seasonality of Total Private Employment",
y = "Employed",
x = "Month")
# Plot 3: Subseries Plot of Total Private Employment
# Add a new column for the month name
us_employment_total_private <- us_employment_total_private |>
mutate(Month_Name = month(Month, label = TRUE))
# Subseries plot with faceted individual months
gg_subseries(us_employment_total_private, Employed) +
facet_wrap(~ Month_Name, nrow = 3) + # Arrange individual months in 3 rows
labs(title = "Subseries Plot of Total Private Employment by Month",
y = "Employed",
x = "Year")
# Plot 4: Lag Plot for Total Private Employment
gg_lag(us_employment_total_private, Employed) +
labs(title = "Lag Plot for Total Private Employment",
y = "Employed",
x = "Lag")
# Plot 5: ACF of Total Private Employment
ACF(us_employment_total_private, Employed) |>
autoplot() +
labs(title = "ACF of Total Private Employment",
y = "Autocorrelation",
x = "Lag")
# Plot 1: Time Series of Bricks Production in Australia
# Filter out missing values for Bricks
aus_production_filtered <- aus_production |>
filter(!is.na(Bricks))
# Autoplot for Bricks
autoplot(aus_production_filtered, Bricks) +
labs(title = "Bricks Production in Australia",
y = "Bricks Produced",
x = "Quarter")
# Plot 2: Seasonality of Bricks Production
# Seasonality with gg_season
gg_season(aus_production_filtered, Bricks) +
labs(title = "Seasonality of Bricks Production",
y = "Bricks Produced",
x = "Quarter")
# Plot 3: Subseries Plot of Bricks Production
# Subseries plot to observe seasonal patterns
gg_subseries(aus_production_filtered, Bricks) +
labs(title = "Subseries Plot of Bricks Production",
y = "Bricks Produced",
x = "Quarter")
# Plot 4: Lag Plot for Bricks Production
# Lag plot for autocorrelation
gg_lag(aus_production_filtered, Bricks, geom = "point") +
labs(title = "Lag Plot for Bricks Production",
y = "Bricks Produced",
x = "Lag")
# Plot 5: ACF of Bricks Production
# ACF for autocorrelation
ACF(aus_production_filtered, Bricks) |>
autoplot() +
labs(title = "ACF of Bricks Production",
y = "Autocorrelation",
x = "Lag")
# Autoplot for hare pelt traded
# Plot 1: Time Series of Hare Pelt Traded
autoplot(pelt, Hare) +
labs(title = "Hare Pelt Traded Over Time",
y = "Number of Pelts",
x = "Year")
# Lag plot for autocorrelation
# Plot 2: Lag Plot for Hare Pelt Traded
gg_lag(pelt, Hare, geom = "point") +
labs(title = "Lag Plot for Hare Pelt Traded",
y = "Number of Pelts",
x = "Lag")
# ACF for autocorrelation
# Plot 3: ACF of Hare Pelt Traded
ACF(pelt, Hare) |>
autoplot() +
labs(title = "ACF of Hare Pelt Traded",
y = "Autocorrelation",
x = "Lag")
# Plot 1: H02 Drug Cost Over Time (Facet Wrap)
# Filter for H02 cost in the PBS dataset
PBS_H02 <- PBS |>
filter(ATC2 == "H02")
# Autoplot for H02 Drug Cost with facet wrap
autoplot(PBS_H02, Cost) +
labs(title = "H02 Drug Cost Over Time",
y = "Cost",
x = "Month") +
facet_wrap(~ Concession + Type + ATC1 + ATC2, scales = "free_y", ncol = 1) +
theme(strip.text = element_blank()) # Remove the facet strip text
# Plot 2: Seasonality of H02 Drug Cost
gg_season(PBS_H02, Cost) +
labs(title = "Seasonality of H02 Drug Cost",
y = "Cost",
x = "Month")
# Plot 3: Subseries Plot of H02 Drug Cost
gg_subseries(PBS_H02, Cost) +
labs(title = "Subseries Plot of H02 Drug Cost",
y = "Cost",
x = "Month")
# Plot 4: Lag Plot for H02 Drug Cost
# Filter for a single Concession and Type
PBS_H02_single <- PBS_H02 |>
filter(Concession == "Concessional", Type == "Co-payments")
# Lag plot for autocorrelation
gg_lag(PBS_H02_single, Cost, geom = "point") +
labs(title = "Lag Plot for H02 Drug Cost",
y = "Cost",
x = "Lag")
# Plot 5: ACF of H02 Drug Cost
ACF(PBS_H02, Cost) |>
autoplot() +
labs(title = "ACF of H02 Drug Cost",
y = "Autocorrelation",
x = "Lag")
library(feasts)
# Plot 1: Gasoline Barrels Over Time
autoplot(us_gasoline, Barrels) +
labs(title = "Gasoline Barrels Over Time",
y = "Barrels",
x = "Week")
# Plot 2: Seasonality of Gasoline Barrels by Year
# Extract year from the index
us_gasoline <- us_gasoline |>
mutate(Year = year(Week)) # Assuming 'Week' is the time index; replace if needed
# Seasonality plot faceted by year
gg_season(us_gasoline, Barrels) +
facet_wrap(~ Year) + # Facet by year
labs(title = "Seasonality of Gasoline Barrels by Year",
y = "Barrels",
x = "Week")
Seasonality: The subseries plot reveals clear seasonal patterns in gasoline consumption, with the highest demand during the late spring to summer months (weeks 19 to 35) and again during the holiday season (weeks 45 to 52). The lowest consumption is observed at the beginning of the year (weeks 1 to 9).
Trend: There is a consistent upward trend in gasoline consumption over time. This trend is particularly noticeable after 2000, where the peaks in consumption during each segment are higher compared to earlier years (1990-2000), reflecting growing gasoline demand.
Volatility: Each segment shows volatility with fluctuations in gasoline consumption, especially during the high-demand periods (summer and holidays).
# Plot 3: Subseries Plot of Gasoline Barrels by Week Segments
# Create a factor for segments (divide weeks into 6 groups to visualize trends)
us_gasoline <- us_gasoline %>%
mutate(Week_Group = cut(week(Week), breaks = 6,
labels = c("Weeks 1 to 9", "Weeks 10 to 18", "Weeks 19 to 27",
"Weeks 28 to 35", "Weeks 36 to 44", "Weeks 45 to 52")))
# Generate the subseries plot with 6 facets
gg_subseries(us_gasoline, Barrels) +
facet_wrap(~ Week_Group, scales = "free_y") +
labs(title = "Subseries Plot of Gasoline Barrels by Week Segments",
x = "Week",
y = "Barrels") +
theme_minimal()
# Plot 4: Lag Plot for Gasoline Barrels
gg_lag(us_gasoline, Barrels, geom = "point") +
labs(title = "Lag Plot for Gasoline Barrels",
y = "Barrels",
x = "Lag")
# Plot 5: ACF of Gasoline Barrels
ACF(us_gasoline, Barrels) |>
autoplot() +
labs(title = "ACF of Gasoline Barrels",
y = "Autocorrelation",
x = "Lag")