library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.3
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.0 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.5
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.3.2
## ✔ lubridate 1.9.3 ✔ fable 0.3.4
## ✔ ggplot2 3.5.1 ✔ fabletools 0.4.2
## Warning: package 'tsibble' was built under R version 4.3.3
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(dplyr)
library(tsibble)
library(ggplot2)
data(aus_production, pelt, gafa_stock, vic_elec)
Use ? (or help()) to find out about the data in each series.
help("aus_production")
help("pelt")
help("gafa_stock")
help("vic_elec")
(aus_plot <- autoplot(aus_production, Bricks))
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
(pelt_plot <- autoplot(pelt, Lynx))
(gafa_plot <- autoplot(gafa_stock, Close))
+ For the last plot, modify the axis labels and title.
((vic_plot <- autoplot(vic_elec, Demand)) +
labs(title = "The Demand of Electricity Over Time",
x = "Time (30 min intervals)" ,
y = "Total electricity demand (MWh)")
)
Use filter() to find what days corresponded to the peak closing price for each of the four stocks in gafa_stock.
gafa_stock |>
group_by(Symbol) |>
filter(Close == max(Close)) |>
knitr::kable()
| Symbol | Date | Open | High | Low | Close | Adj_Close | Volume |
|---|---|---|---|---|---|---|---|
| AAPL | 2018-10-03 | 230.05 | 233.470 | 229.78 | 232.07 | 230.2755 | 28654800 |
| AMZN | 2018-09-04 | 2026.50 | 2050.500 | 2013.00 | 2039.51 | 2039.5100 | 5721100 |
| FB | 2018-07-25 | 215.72 | 218.620 | 214.27 | 217.50 | 217.5000 | 58954200 |
| GOOG | 2018-07-26 | 1251.00 | 1269.771 | 1249.02 | 1268.33 | 1268.3300 | 2405600 |
Download the file tute1.csv from the book website, open it in Excel (or some other spreadsheet application), and review its contents. You should find four columns of information. Columns B through D each contain a quarterly series, labelled Sales, AdBudget and GDP. Sales contains the quarterly sales for a small company over the period 1981-2005. AdBudget is the advertising budget and GDP is the gross domestic product. All series have been adjusted for inflation.
tute1 <- read.csv("~/Desktop/Data 624 fall 2024/Data-624-Homework-1/tute1.csv")
View(tute1)
timeseries_a <- tute1 |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter)
# View the tsibble
head(timeseries_a)
timeseries_a |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +
facet_grid(name ~ ., scales = "free_y")
Check what happens when you don’t include facet_grid().
timeseries_a |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line()
Without using facet_grid, all of the data is plotted on the same grid using the same scale.
The USgas package contains data on the demand for natural gas in the US. a. Install the USgas package.
#install.packages('USgas')
library(USgas)
#help("usgas")
us_total_tsibble <- as_tsibble(us_total,index = year, key = state)
n_e_area <- c('Maine', 'Vermont', 'New Hampshire', 'Massachusetts', 'Connecticut', 'Rhode Island')
us_total_tsibble |>
filter(state %in% n_e_area) |>
autoplot() +
labs(title = "Natural Gas Consumption for New England States (1997-2019)",
x = 'Year',
y = 'Gas Consumption (Million Cubic Feet)') +
scale_y_continuous(label = scales::comma)
## Plot variable not specified, automatically selected `.vars = y`
## Exercise 2.5 a. Download tourism.xlsx from the book website and read
it into R using readxl::read_excel().
tourism_ds <- readxl::read_excel("~/Desktop/Data 624 fall 2024/Data-624-Homework-1/tourism.xlsx")
head(tourism_ds)
data("tourism")
head(tourism)
keys = c('Region', 'State', 'Purpose')
tourism_tsibble <- tourism_ds |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index=Quarter, key=all_of(keys))
tourism_ds |>
group_by(Region, Purpose) |>
summarize(avg_trips = mean(Trips)) |>
arrange(desc(avg_trips)) |>
head(1) |>
knitr::kable()
## `summarise()` has grouped output by 'Region'. You can override using the
## `.groups` argument.
| Region | Purpose | avg_trips |
|---|---|---|
| Sydney | Visiting | 747.27 |
trips_by_state <- tourism_ds |>
group_by(Quarter, State) |>
summarize(total_trips = sum(Trips)) |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index=Quarter, key=State)
## `summarise()` has grouped output by 'Quarter'. You can override using the
## `.groups` argument.
head(trips_by_state)
Use the following graphics functions: autoplot(), gg_season(), gg_subseries(), gg_lag(), ACF() and explore features from the following time series: “Total Private” Employed from us_employment, Bricks from aus_production, Hare from pelt, “H02” Cost from PBS, and Barrels from us_gasoline.
us_employment |>
filter(Title == 'Total Private') |>
autoplot(Employed) +
labs(title = 'Autoplot')
us_employment |>
filter(Title == 'Total Private') |>
gg_season(Employed) +
labs(title = 'Seasonal Plot')
us_employment |>
filter(Title == 'Total Private') |>
gg_subseries(Employed) +
labs(title = 'Seasonal Subseries Plot')
us_employment |>
filter(Title == 'Total Private') |>
gg_lag(Employed) +
labs(title = 'Lag Plot')
us_employment |>
filter(Title == 'Total Private') |>
ACF(Employed) |>
autoplot() +
labs(title = 'Autocorrelation and Cross-Correlation Function Plot')
aus_production |>
autoplot(Bricks) +
labs(title = 'Autoplot')
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
aus_production |>
gg_season(Bricks) +
labs(title = 'Seasonal Plot')
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
aus_production |>
gg_subseries(Bricks) +
labs(title = 'Seasonal Subseries Plot')
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).
aus_production |>
gg_lag(Bricks, geom = 'point') +
labs(title = 'Lag Plot')
## Warning: Removed 20 rows containing missing values (gg_lag).
aus_production |>
ACF(Bricks) |>
autoplot() +
labs(title = 'Autocorrelation and Cross-Correlation Function Plot')
The aus_production data seems to be cyclical. Seasonally, it appears to
be increasing from the first to the third quarter, then it decreases on
the fourth quarter. Trend-wise, there seems to be an upward trend up
until the 80s, where it begins to decline.The years that stand out are
in the mid 70s and early 80s, where there was a drastic drop.
pelt |>
autoplot(Hare) +
labs(title = 'Autoplot')
pelt |>
gg_lag(Hare, geom='point') +
labs(title = 'Lag Plot')
pelt |>
ACF(Hare) |>
autoplot() +
labs(title = 'Autocorrelation and Cross-Correlation Function Plot')
This data is very cyclical as demonstrated in the plots. The cycle seems
to be about every 5 years.
PBS |>
filter(ATC2 == 'H02') |>
autoplot(Cost) +
labs(title = 'Autoplot')
PBS |>
filter(ATC2 == 'H02') |>
gg_season(Cost) +
labs(title = 'Seasonal Plot')
While Concessional Co-payments and General Co-payments seem to peak on
the seasons from March to July, Concessional safety net, and General
Safety Net are at their lowest.
us_gasoline |>
autoplot(Barrels) +
labs(title = 'Autoplot')
us_gasoline |>
gg_season(Barrels) +
labs(title = 'Seasonal Plot')
us_gasoline |>
gg_subseries(Barrels) +
labs(title = 'Seasonal Subseries Plot')
It’s difficult to spot a trend here. It’s generally an upward trend. Seasonally, it seems at though the the barrels supplied peaks in June/July (summer season).