The chuck below contains the required libraries for this assignment.
if (!require("fpp3")) {
install.packages("fpp3")
}
## Loading required package: fpp3
## Warning: package 'fpp3' was built under R version 4.4.2
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.6
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.4.1
## ✔ lubridate 1.9.3 ✔ fable 0.4.1
## ✔ ggplot2 3.5.1
## Warning: package 'ggplot2' was built under R version 4.4.2
## Warning: package 'tsibble' was built under R version 4.4.2
## Warning: package 'tsibbledata' was built under R version 4.4.2
## Warning: package 'feasts' was built under R version 4.4.2
## Warning: package 'fabletools' was built under R version 4.4.2
## Warning: package 'fable' was built under R version 4.4.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(fpp3)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ purrr 1.0.2 ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
#Exercise 2.10 Explore the following four time series: Bricks from aus_production, Lynx from pelt, Close from gafa_stock, Demand from vic_elec.
Use ? (or help()) to find out about the data in each series. What is the time interval of each series? Use autoplot() to produce a time plot of each series. For the last plot, modify the axis labels and title.
data("aus_production")
data("pelt")
data("gafa_stock")
data("vic_elec")
help("aus_production")
## starting httpd help server ... done
autoplot(aus_production, Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
The time interval for aus_production serie is quarterly.
help("pelt")
autoplot(pelt, Lynx)
The time interval for the pelt serie is every twenty years.
help("gafa_stock")
autoplot(gafa_stock, Close)
The
time interval for the gafa_stock serie is every two years.
help("vic_elec")
# Load the data
data("vic_elec")
# Create the plot with modified axis labels and title
autoplot(vic_elec, Demand) +
labs(
title = "Electricity Demand in Victoria",
x = "30 min intervals",
y = "Demand (MW)"
)
The
time interval for the vic_elec serie is 30 minutes.
##2.2) Use filter() to find what days corresponded to the peak closing price for each of the four stocks in gafa_stock
# Find the days with peak closing prices for each stock
peak_closing_days <- gafa_stock %>%
group_by(Symbol) %>%
filter(Close == max(Close))
# Display the result
print(peak_closing_days)
## # A tsibble: 4 x 8 [!]
## # Key: Symbol [4]
## # Groups: Symbol [4]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2018-10-03 230. 233. 230. 232. 230. 28654800
## 2 AMZN 2018-09-04 2026. 2050. 2013 2040. 2040. 5721100
## 3 FB 2018-07-25 216. 219. 214. 218. 218. 58954200
## 4 GOOG 2018-07-26 1251 1270. 1249. 1268. 1268. 2405600
# Read the data
tute1 <- readr::read_csv("C:/Users/Dell/Downloads/tute1.csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): Sales, AdBudget, GDP
## date (1): Quarter
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(tute1)
# Convert the data to time series
mytimeseries <- tute1 |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter)
# Construct time series plots of each of the three series with facet_grid
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +
facet_grid(name ~ ., scales = "free_y") +
labs(title = "Time Series Plots of Sales, AdBudget, and GDP",
x = "Quarter", y = "Value")
# Construct time series plots of each of the three series without facet_grid
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +
labs(title = "Time Series Plots of Sales, AdBudget, and GDP",
x = "Quarter", y = "Value")
Install the USgas package. Create a tsibble from us_total with year as the index and state as the key. Plot the annual natural gas consumption by state for the New England area (comprising the states of Maine, Vermont, New Hampshire, Massachusetts, Connecticut and Rhode Island).
if (!require("USgas")) {
install.packages("USgas")
}
## Loading required package: USgas
## Warning: package 'USgas' was built under R version 4.4.2
library(USgas)
library(tsibble)
library(ggplot2)
data("us_total")
us_total_tsibble <- us_total |>
as_tsibble(index = year, key = state)
new_england_states <- c("Maine", "Vermont", "New Hampshire", "Massachusetts", "Connecticut", "Rhode Island")
new_england_data <- us_total_tsibble |>
filter(state %in% new_england_states)
ggplot(new_england_data, aes(x = year, y = y, color = state)) +
geom_line() +
labs(title = "Annual Natural Gas Consumption by State in New England",
x = "Year",
y = "Natural Gas Consumption") +
theme_minimal()
##2.5) Download tourism.xlsx from the book website and read it into R using readxl::read_excel(). Create a tsibble which is identical to the tourism tsibble from the tsibble package. Find what combination of Region and Purpose had the maximum number of overnight trips on average. Create a new tsibble which combines the Purposes and Regions, and just has total trips by State.
library(readxl)
library(tsibble)
library(dplyr)
tourism_data <- readxl::read_excel("C:/Users/Dell/Downloads/tourism.xlsx")
# Create a tsibble which is identical to the tourism tsibble
tourism_tsibble <- tourism_data %>%
mutate(Quarter = yearquarter(Quarter)) %>%
as_tsibble(index = Quarter, key = c(Region, Purpose))
# Find the combination of Region and Purpose with the maximum number of overnight trips on average
max_avg_overnight_trips <- tourism_tsibble %>%
group_by(Region, Purpose) %>%
summarize(avg_trips = mean(Trips, na.rm = TRUE)) %>%
filter(avg_trips == max(avg_trips))
print(max_avg_overnight_trips)
## # A tsibble: 76 x 4 [1Q]
## # Key: Region, Purpose [76]
## # Groups: Region [76]
## Region Purpose Quarter avg_trips
## <chr> <chr> <qtr> <dbl>
## 1 Adelaide Visiting 2017 Q1 270.
## 2 Adelaide Hills Visiting 2002 Q4 81.1
## 3 Alice Springs Holiday 1998 Q3 76.5
## 4 Australia's Coral Coast Holiday 2014 Q3 198.
## 5 Australia's Golden Outback Business 2017 Q3 174.
## 6 Australia's North West Business 2016 Q3 297.
## 7 Australia's South West Holiday 2016 Q1 612.
## 8 Ballarat Visiting 2004 Q1 103.
## 9 Barkly Holiday 1998 Q3 37.9
## 10 Barossa Holiday 2006 Q1 51.0
## # ℹ 66 more rows
# Create a new tsibble which combines the Purposes and Regions, and just has total trips by State
total_trips_by_state <- tourism_tsibble %>%
index_by(Quarter) %>%
group_by(State) %>%
summarize(total_trips = sum(Trips, na.rm = TRUE)) %>%
as_tsibble(index = Quarter, key = State)
print(total_trips_by_state)
## # A tsibble: 640 x 3 [1Q]
## # Key: State [8]
## State Quarter total_trips
## <chr> <qtr> <dbl>
## 1 ACT 1998 Q1 551.
## 2 ACT 1998 Q2 416.
## 3 ACT 1998 Q3 436.
## 4 ACT 1998 Q4 450.
## 5 ACT 1999 Q1 379.
## 6 ACT 1999 Q2 558.
## 7 ACT 1999 Q3 449.
## 8 ACT 1999 Q4 595.
## 9 ACT 2000 Q1 600.
## 10 ACT 2000 Q2 557.
## # ℹ 630 more rows
##2.8) Use the following graphics functions: autoplot(), gg_season(), gg_subseries(), gg_lag(), ACF() and explore features from the following time series: “Total Private” Employed from us_employment, Bricks from aus_production, Hare from pelt, “H02” Cost from PBS, and Barrels from us_gasoline.
Can you spot any seasonality, cyclicity and trend? What do you learn about the series? What can you say about the seasonal patterns? Can you identify any unusual years?
# Install and load necessary packages
library(fpp3)
library(USgas)
library(ggplot2)
# Load the data
data("us_employment")
data("aus_production")
data("pelt")
data("PBS")
data("us_gasoline")
# Visualize Total Private Employed from us_employment
us_employment_private <- us_employment %>% filter(Title == "Total Private")
autoplot(us_employment_private, Employed) +
labs(title = "Total Private Employment in US", x = "Year", y = "Employed")
gg_season(us_employment_private, Employed) +
labs(title = "Seasonal Plot: Total Private Employment in US")
gg_subseries(us_employment_private, Employed) +
labs(title = "Subseries Plot: Total Private Employment in US")
gg_lag(us_employment_private, Employed) +
labs(title = "Lag Plot: Total Private Employment in US")
acf_us_employment_private <- ACF(us_employment_private, Employed)
autoplot(acf_us_employment_private) +
labs(title = "ACF Plot: Total Private Employment in US")
# Visualize Bricks from aus_production
autoplot(aus_production, Bricks) +
labs(title = "Bricks Production in Australia", x = "Year", y = "Bricks")
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
gg_season(aus_production, Bricks) +
labs(title = "Seasonal Plot: Bricks Production in Australia")
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
gg_subseries(aus_production, Bricks) +
labs(title = "Subseries Plot: Bricks Production in Australia")
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).
gg_lag(aus_production, Bricks) +
labs(title = "Lag Plot: Bricks Production in Australia")
## Warning: Removed 20 rows containing missing values (gg_lag).
acf_aus_production <- ACF(aus_production, Bricks)
autoplot(acf_aus_production) +
labs(title = "ACF Plot: Bricks Production in Australia")
# Visualize Hare from pelt
autoplot(pelt, Hare) +
labs(title = "Hare Pelts", x = "Year", y = "Hare Pelts")
gg_subseries(pelt, Hare) +
labs(title = "Subseries Plot: Hare Pelts")
gg_lag(pelt, Hare) +
labs(title = "Lag Plot: Hare Pelts")
acf_pelt <- ACF(pelt, Hare)
autoplot(acf_pelt) +
labs(title = "ACF Plot: Hare Pelts")
# H02 Cost from PBS
ts_cost <- PBS %>%
filter(ATC2 == 'H02') %>%
select(Month, Cost)
autoplot(ts_cost, .vars = Cost) +
labs(title = "H02 Cost from PBS")
gg_season(ts_cost, y = Cost) +
labs(title = "Seasonal Plot: H02 Cost from PBS")
gg_subseries(ts_cost, y = Cost) +
labs(title = "Subseries Plot: H02 Cost from PBS")
gg_lag(ts_cost %>% filter(Concession == 'General', Type == 'Safety net'), y = Cost) +
labs(title = "Lag Plot: H02 Cost from PBS (General, Safety net)")
ACF(ts_cost, y = Cost) %>%
autoplot() +
labs(title = "ACF Plot: H02 Cost from PBS")
# Barrels from us_gasoline
ts_barrels <- us_gasoline %>%
select(Week, Barrels)
# Convert ts_barrels to a tsibble with specified week_start
ts_barrels <- ts_barrels %>%
mutate(Week = yearweek(Week, week_start = 1)) %>%
as_tsibble(index = Week)
autoplot(ts_barrels, .vars = Barrels) +
labs(title = "Gasoline Consumption in US")
gg_season(ts_barrels, Barrels) +
labs(title = "Seasonal Plot: Gasoline Consumption in US")
gg_subseries(ts_barrels, Barrels) +
labs(title = "Subseries Plot: Gasoline Consumption in US")
gg_lag(ts_barrels, Barrels, period = 12, lags = 1:12) +
labs(title = "Lag Plot: Gasoline Consumption in US")
ACF(ts_barrels, Barrels) %>%
autoplot() +
labs(title = "ACF Plot: Gasoline Consumption in US")
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.