Explore the following four time series: Bricks from aus_production, Lynx from pelt, Close from gafa_stock, Demand from vic_elec.
#knitr::opts_chunk$set(echo = TRUE)
#tinytex::reinstall_tinytex(repository = "illinois")
library(fpp3)
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.0 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.5
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.0 ✔ feasts 0.3.2
## ✔ lubridate 1.9.2 ✔ fable 0.3.4
## ✔ ggplot2 3.5.1 ✔ fabletools 0.4.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(dplyr)
library(tsibble)
library(imputeTS)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
#install.packages(c("fpp3", "ggplot2", "feasts", "fable", "tsibble"))
# Load libraries
library(ggplot2)
library(feasts)
library(fable)
library(tsibble)
library(USgas)
library(dplyr)
library(tsibble)
Load dataset: aus_production: Quarterly estimates of selected indicators of manufacturing production in Australia.
Details:
pelt: Hudson Bay Company trading records for Snowshoe Hare and Canadian Lynx furs from 1845 to 1935. This data contains trade records for all areas of the company. Details: pelt is an annual tsibble with two values:
gafa_stock:Historical stock prices from 2014-2018 for Google, Amazon, Facebook and Apple. All prices are in $USD.
Details gafa_stock is a tsibble containing data on irregular trading days:
vic_elec: Half-hourly electricity demand for Victoria, Australia
# For Bricks (aus_production)
help(aus_production)# or ?aus_production
## starting httpd help server ... done
# For Lynx (pelt)
help(pelt) # or ?pelt
# For Close (gafa_stock)
help(gafa_stock) # ?gafa_stock
# For Demand (vic_elec)
help(vic_elec) # or ?vic_elec
View all dataset
# You can view the data like this
View(aus_production)
View(pelt)
View(gafa_stock)
View(vic_elec)
TIme interval Quarterly estimates of selected indicators of manufacturing production in Australia.
frequency(aus_production) # Quarterly data
## [1] 4
Hudson Bay Company trading records for Snowshoe Hare and Canadian Lynx furs from 1845 to 1935. This data contains trade records for all areas of the company.
frequency(pelt) # Yearly data
## [1] 1
Historical stock prices from 2014-2018 for Google, Amazon, Facebook and Apple. All prices are in $USD.
#frequency(gafa_stock)
Half-hourly electricity demand for Victoria, Australia
frequency(vic_elec) # Half-hourly data
## [1] 48
FIx the Irregular Time inter in gafa_stock
interval(gafa_stock)
## <interval[1]>
## [1] !
# Aggregate to a regular daily interval
gafa_stock %>%
index_by(Day = ~ as.Date(.)) %>%
summarise(Close = mean(Close, na.rm = TRUE))
## # A tsibble: 1,258 x 2 [1D]
## Day Close
## <date> <dbl>
## 1 2014-01-02 271.
## 2 2014-01-03 269.
## 3 2014-01-06 271.
## 4 2014-01-07 275.
## 5 2014-01-08 276.
## 6 2014-01-09 274.
## 7 2014-01-10 273.
## 8 2014-01-13 270.
## 9 2014-01-14 276.
## 10 2014-01-15 276.
## # ℹ 1,248 more rows
# use imputation methods for missing values
gafa_stock_regular <- gafa_stock %>%
fill_gaps() %>%
mutate(Close = na_interpolation(Close))
gafa_stock_regular
## # A tsibble: 5,032 x 8 [!]
## # Key: Symbol [4]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2014-01-02 79.4 79.6 78.9 79.0 67.0 58671200
## 2 AAPL 2014-01-03 79.0 79.1 77.2 77.3 65.5 98116900
## 3 AAPL 2014-01-06 76.8 78.1 76.2 77.7 65.9 103152700
## 4 AAPL 2014-01-07 77.8 78.0 76.8 77.1 65.4 79302300
## 5 AAPL 2014-01-08 77.0 77.9 77.0 77.6 65.8 64632400
## 6 AAPL 2014-01-09 78.1 78.1 76.5 76.6 65.0 69787200
## 7 AAPL 2014-01-10 77.1 77.3 75.9 76.1 64.5 76244000
## 8 AAPL 2014-01-13 75.7 77.5 75.7 76.5 64.9 94623200
## 9 AAPL 2014-01-14 76.9 78.1 76.8 78.1 66.1 83140400
## 10 AAPL 2014-01-15 79.1 80.0 78.8 79.6 67.5 97909700
## # ℹ 5,022 more rows
interval(gafa_stock_regular)
## <interval[1]>
## [1] !
print(gafa_stock_regular)
## # A tsibble: 5,032 x 8 [!]
## # Key: Symbol [4]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2014-01-02 79.4 79.6 78.9 79.0 67.0 58671200
## 2 AAPL 2014-01-03 79.0 79.1 77.2 77.3 65.5 98116900
## 3 AAPL 2014-01-06 76.8 78.1 76.2 77.7 65.9 103152700
## 4 AAPL 2014-01-07 77.8 78.0 76.8 77.1 65.4 79302300
## 5 AAPL 2014-01-08 77.0 77.9 77.0 77.6 65.8 64632400
## 6 AAPL 2014-01-09 78.1 78.1 76.5 76.6 65.0 69787200
## 7 AAPL 2014-01-10 77.1 77.3 75.9 76.1 64.5 76244000
## 8 AAPL 2014-01-13 75.7 77.5 75.7 76.5 64.9 94623200
## 9 AAPL 2014-01-14 76.9 78.1 76.8 78.1 66.1 83140400
## 10 AAPL 2014-01-15 79.1 80.0 78.8 79.6 67.5 97909700
## # ℹ 5,022 more rows
autoplot(gafa_stock_regular, Close) +
labs(title = "GAFA Stock Close Prices",
x = "Time",
y = "Close Price")
As we can see here Amazon stock price went really high in 2019 comapre
to other three stock prices her. Facebook and Apple prices are
relatively low compare to Google and Amazon.
Using autoplot function
autoplot(aus_production) +
ggtitle("Time Plot of Aus Production") +
xlab("Time") +
ylab("Production")
## Plot variable not specified, automatically selected `.vars = Beer`
autoplot(gafa_stock)+
labs(title= "GAFA Stock Prices", y = "Stock price")
## Plot variable not specified, automatically selected `.vars = Open`
autoplot(vic_elec) +
labs(title = "Electricity Demand in Victoria",
x = "Year",
y = "Electricity Demand (MW)")
## Plot variable not specified, automatically selected `.vars = Demand`
autoplot(pelt)+
labs(title= "Lynx Pelts Over Time",
y ="number of Pelts" )
## Plot variable not specified, automatically selected `.vars = Hare`
Use filter() to find what days corresponded to the peak closing price for each of the four stocks in gafa_stock.
gafa_stock_peaks <- gafa_stock %>%
group_by(Symbol) %>%
filter(Close == max(Close, na.rm = TRUE)) %>%
ungroup()
print(gafa_stock_peaks)
## # A tsibble: 4 x 8 [!]
## # Key: Symbol [4]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2018-10-03 230. 233. 230. 232. 230. 28654800
## 2 AMZN 2018-09-04 2026. 2050. 2013 2040. 2040. 5721100
## 3 FB 2018-07-25 216. 219. 214. 218. 218. 58954200
## 4 GOOG 2018-07-26 1251 1270. 1249. 1268. 1268. 2405600
Download the file tute1.csv from the book website, open it in Excel (or some other spreadsheet application), and review its contents. You should find four columns of information. Columns B through D each contain a quarterly series, labelled Sales, AdBudget and GDP. Sales contains the quarterly sales for a small company over the period 1981-2005. AdBudget is the advertising budget and GDP is the gross domestic product. All series have been adjusted for inflation.
tute1 <- read.csv("https://raw.githubusercontent.com/deepasharma06/Data-624/main/tute1.csv", header = TRUE, sep = ',', na.strings="", fill = TRUE)
head(tute1)
## Quarter Sales AdBudget GDP
## 1 1981-03-01 1020.2 659.2 251.8
## 2 1981-06-01 889.2 589.0 290.9
## 3 1981-09-01 795.0 512.5 290.8
## 4 1981-12-01 1003.9 614.1 292.4
## 5 1982-03-01 1057.7 647.2 279.1
## 6 1982-06-01 944.4 602.0 254.0
Convert the data to time series
mytimeseries <- tute1 |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter)
mytimeseries
## # A tsibble: 100 x 4 [1Q]
## Quarter Sales AdBudget GDP
## <qtr> <dbl> <dbl> <dbl>
## 1 1981 Q1 1020. 659. 252.
## 2 1981 Q2 889. 589 291.
## 3 1981 Q3 795 512. 291.
## 4 1981 Q4 1004. 614. 292.
## 5 1982 Q1 1058. 647. 279.
## 6 1982 Q2 944. 602 254
## 7 1982 Q3 778. 531. 296.
## 8 1982 Q4 932. 608. 272.
## 9 1983 Q1 996. 638. 260.
## 10 1983 Q2 908. 582. 280.
## # ℹ 90 more rows
Construct time series plots of each of the three series
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +
facet_grid(name ~ ., scales = "free_y") +
labs(title = "Time Series Plot of All Variables",
x = "Quarter",
y = "Value",
colour = "Series") +
theme_minimal()
Check what happens when you don’t include facet_grid().
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line()
The USgas package contains data on the demand for natural gas in the US. * Install the USgas package. * Create a tsibble from us_total with year as the index and state as the key. * Plot the annual natural gas consumption by state for the New England area (comprising the states of Maine, Vermont, New Hampshire,Massachusetts, Connecticut and Rhode Island).
# Load the dataset
data("us_total")
US Annual Total Natural Gas Consumption Description: The dataset provides information on the US annual natural gas consumption by state level between 1997 and 2019, and aggregate level between 1949 and 2019
view(us_total)
Convert to a tsibble with year as the index and state as the key
us_tsibble <- as_tsibble(us_total, key = state, index = year)
us_tsibble
## # A tsibble: 1,266 x 3 [1Y]
## # Key: state [53]
## year state y
## <int> <chr> <int>
## 1 1997 Alabama 324158
## 2 1998 Alabama 329134
## 3 1999 Alabama 337270
## 4 2000 Alabama 353614
## 5 2001 Alabama 332693
## 6 2002 Alabama 379343
## 7 2003 Alabama 350345
## 8 2004 Alabama 382367
## 9 2005 Alabama 353156
## 10 2006 Alabama 391093
## # ℹ 1,256 more rows
new_england_states <- c("Maine", "Vermont", "New Hampshire", "Massachusetts", "Connecticut", "Rhode Island")
new_england_states
## [1] "Maine" "Vermont" "New Hampshire" "Massachusetts"
## [5] "Connecticut" "Rhode Island"
# Filter for only the New England states
new_england_gas <- us_tsibble |>
filter(state %in% new_england_states)
library(ggplot2)
# Plot the data
ggplot(new_england_gas, aes(x = year, y = y, color = state)) +
geom_line() +
labs(title = "Annual Natural Gas Consumption in New England States",
x = "Year",
y = "Natural Gas Consumption",
color = "State") +
theme_minimal()
# Install the readxl package
#install.packages("readxl")
# Load the readxl package
library(readxl)
## Warning: package 'readxl' was built under R version 4.3.0
library(readxl)
library(httr)
## Warning: package 'httr' was built under R version 4.3.0
library(tsibble)
library(lubridate)
# URL of the Excel file
url <- "https://raw.githubusercontent.com/deepasharma06/Data-624/main/tourism.xlsx"
# Destination file path
destfile <- "tourism.xlsx"
# Download the file
download.file(url, destfile, mode = "wb")
# Read the Excel file into R
tourism_data <- read_excel("tourism.xlsx")
# View the first few rows of the dataset
head(tourism_data)
## # A tibble: 6 × 5
## Quarter Region State Purpose Trips
## <chr> <chr> <chr> <chr> <dbl>
## 1 1998-01-01 Adelaide South Australia Business 135.
## 2 1998-04-01 Adelaide South Australia Business 110.
## 3 1998-07-01 Adelaide South Australia Business 166.
## 4 1998-10-01 Adelaide South Australia Business 127.
## 5 1999-01-01 Adelaide South Australia Business 137.
## 6 1999-04-01 Adelaide South Australia Business 200.
# Inspect the structure of the dataset
str(tourism_data)
## tibble [24,320 × 5] (S3: tbl_df/tbl/data.frame)
## $ Quarter: chr [1:24320] "1998-01-01" "1998-04-01" "1998-07-01" "1998-10-01" ...
## $ Region : chr [1:24320] "Adelaide" "Adelaide" "Adelaide" "Adelaide" ...
## $ State : chr [1:24320] "South Australia" "South Australia" "South Australia" "South Australia" ...
## $ Purpose: chr [1:24320] "Business" "Business" "Business" "Business" ...
## $ Trips : num [1:24320] 135 110 166 127 137 ...
# Get a summary of the dataset
summary(tourism_data)
## Quarter Region State Purpose
## Length:24320 Length:24320 Length:24320 Length:24320
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Trips
## Min. : 0.000
## 1st Qu.: 7.224
## Median : 28.134
## Mean : 70.896
## 3rd Qu.: 78.154
## Max. :985.278
Create a tsibble which is identical to the tourism tsibble from the tsibble package.
# Create tsibble
tourism_tsibble <- tourism_data |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(key = c(Region, State, Purpose), index = Quarter)
# Load the packages
library(readxl)
library(tsibble)
library(dplyr)
library(ggplot2)
Find what combination of Region and Purpose had the maximum number of overnight trips on average.
max_trips <- tourism_tsibble |>
group_by(Region, Purpose) |>
summarise(avg_trips = mean(Trips), .groups = "drop") |>
slice_max(avg_trips, n = 1)
print("Region and Purpose with maximum average overnight trips:")
## [1] "Region and Purpose with maximum average overnight trips:"
Create a new tsibble which combines the Purposes and Regions, and just has total trips by State.
state_trips <- tourism_tsibble |>
index_by(Quarter) |>
group_by(State) |>
summarise(Total_Trips = sum(Trips), .groups = "drop") |>
as_tsibble(key = State, index = Quarter)
print("Total State Trips:")
## [1] "Total State Trips:"
Plot state trips
state_trips_plot <- ggplot(state_trips, aes(x = Quarter, y = Total_Trips, color = State)) +
geom_line() +
labs(title = "Total Trips by State Over Time",
x = "Quarter",
y = "Total Trips") +
theme(legend.position = "right")
print(state_trips_plot)
Use the following graphics functions: autoplot(), gg_season(), gg_subseries(), gg_lag(), ACF() and explore features from the following time series: “Total Private” Employed from us_employment, Bricks from aus_production, Hare from pelt, “H02” Cost from PBS, and Barrels from us_gasoline.
Total Private Employed from us_employment
employment_ts <- us_employment |> filter(Title == "Total Private")
# Time plot for Total Private Employment
autoplot(employment_ts) + labs(title = "Time Plot of Total Private Employment")
## Plot variable not specified, automatically selected `.vars = Employed`
gg_season(employment_ts) + labs(title = "Seasonal Plot of Total Private Employment")
## Plot variable not specified, automatically selected `y = Employed`
# Subseries plot for Total Private Employment
gg_subseries(employment_ts) + labs(title = "Subseries Plot of Total Private Employment")
## Plot variable not specified, automatically selected `y = Employed`
gg_lag(employment_ts) +
geom_point() + labs(title = "Lag Plot of Total Private Employment")
## Plot variable not specified, automatically selected `y = Employed`
ACF plot
ACF(employment_ts, Employed) |>
autoplot() + labs(title = "ACF Plot of Total Private Employment")
Bricks from aus_production
# Load the aus_production data
data("aus_production")
# Select Bricks data
bricks_ts <- aus_production |> select(Bricks)
autoplot() for Bricks from aus_production
autoplot(bricks_ts) +
ggtitle("Time Plot of Bricks Production") +
xlab("Year") +
ylab("Number of Bricks")
## Plot variable not specified, automatically selected `.vars = Bricks`
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
gg_season()
# Seasonal plot
gg_season(bricks_ts) +
ggtitle("Seasonal Plot of Bricks Production")
## Plot variable not specified, automatically selected `y = Bricks`
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
gg_subseries()
# Subseries plot
gg_subseries(bricks_ts) +
ggtitle("Subseries Plot of Bricks Production")
## Plot variable not specified, automatically selected `y = Bricks`
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).
gg_lag()
# Lag plot
gg_lag(bricks_ts) +
ggtitle("Lag Plot of Bricks Production")
## Plot variable not specified, automatically selected `y = Bricks`
## Warning: Removed 20 rows containing missing values (gg_lag).
ACF()
# ACF plot for Bricks Production
ACF(bricks_ts) |> autoplot() + labs(title = "ACF Plot of Bricks Production")
## Response variable not specified, automatically selected `var = Bricks`
Hare from pelt
autoplot(pelt, Hare) +
ggtitle("Hare Population") +
xlab("Year") +
ylab("Population")
# Subseries plot
gg_subseries(pelt, Hare) +
ggtitle("Subseries Plot of Hare Population")
# Lag plot
gg_lag(pelt, Hare) +
ggtitle("Lag Plot of Hare Population")
ACF(pelt, Hare) |>
autoplot() + labs(title = "ACF Plot of Hare Pelts")
H02 Cost from PBS
h02_ts <- PBS |>
filter(ATC2 == "H02") |>
select(Month, Type, Cost)
# Time plot for H02 Cost
autoplot(h02_ts, Cost) + labs(title = "Time Plot of H02 Cost")
# Seasonal plot for ?? Cost
gg_season(h02_ts, y = Cost) + labs(title = "Seasonal Plot of H02 Cost")
# Subseries plot for H02 Cost
gg_subseries(h02_ts, y = Cost) + labs(title = "Subseries Plot of H02 Cost")
Filter the dataset for a single Type
h02_single <- h02_ts |>
filter(Type == "Safety net") |>
as_tsibble(index = Month)
# ACF plot for H02 Cost (for a single type: Safety net)
ACF(h02_single, Cost) |>
autoplot() + labs(title = "ACF Plot of H02 Cost (Safety net)")
Barrels from us_gasoline
autoplot(us_gasoline, Barrels) +
ggtitle("Gasoline Barrels Production") +
xlab("Year") +
ylab("Number of Barrels")
# Seasonal plot
gg_season(us_gasoline, Barrels) +
ggtitle("Seasonal Plot of Gasoline Barrels Production")
Subseries plot
gg_subseries(us_gasoline, Barrels) +
ggtitle("Subseries Plot of Gasoline Barrels Production")
# Lag plot
gg_lag(us_gasoline, Barrels) +
ggtitle("Lag Plot of Gasoline Barrels Production")
Can you spot any seasonality, cyclicity and trend?
What do you learn about the series?
What can you say about the seasonal patterns?
Can you identify any unusual years?
Bricks Production: Production exhibits annual seasonality, with a prolonged upward trend in brick production from the 1960s to the 1980s, followed by a series of cycles characterized by minor declines
Hare Pelts: Notable spikes at lags 1 and 10 indicate pronounced cyclic behavior in hare pelts production. We observe large fluctuations in the annual number of hare pelts traded, with significant variations between high and low volumes
Barrels of Gasoline: production increased over the years and decreased during the recession.