library(fpp3)
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.6
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.4.1
## ✔ lubridate 1.9.4 ✔ fable 0.4.1
## ✔ ggplot2 3.5.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
?aus_production
## starting httpd help server ... done
aus_production %>%
select(Bricks)
## # A tsibble: 218 x 2 [1Q]
## Bricks Quarter
## <dbl> <qtr>
## 1 189 1956 Q1
## 2 204 1956 Q2
## 3 208 1956 Q3
## 4 197 1956 Q4
## 5 187 1957 Q1
## 6 214 1957 Q2
## 7 227 1957 Q3
## 8 222 1957 Q4
## 9 199 1958 Q1
## 10 229 1958 Q2
## # ℹ 208 more rows
aus_production %>%
autoplot(Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
?pelt
pelt %>%
autoplot(Lynx)
?gafa_stock
gafa_stock %>%
autoplot(Close)
?vic_elec
vic_elec %>%
autoplot(Demand) +
labs(y = "Total Electricity Demand in MWh", title = "Electricity Demand in Victoria, Australia")
glimpse(gafa_stock)
## Rows: 5,032
## Columns: 8
## Key: Symbol [4]
## $ Symbol <chr> "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAP…
## $ Date <date> 2014-01-02, 2014-01-03, 2014-01-06, 2014-01-07, 2014-01-08,…
## $ Open <dbl> 79.38286, 78.98000, 76.77857, 77.76000, 76.97285, 78.11429, …
## $ High <dbl> 79.57571, 79.10000, 78.11429, 77.99429, 77.93714, 78.12286, …
## $ Low <dbl> 78.86000, 77.20428, 76.22857, 76.84571, 76.95571, 76.47857, …
## $ Close <dbl> 79.01857, 77.28286, 77.70428, 77.14857, 77.63715, 76.64571, …
## $ Adj_Close <dbl> 66.96433, 65.49342, 65.85053, 65.37959, 65.79363, 64.95345, …
## $ Volume <dbl> 58671200, 98116900, 103152700, 79302300, 64632400, 69787200,…
gafa_stock %>%
group_by(Symbol) %>%
filter(Close == max(Close)) %>%
select(Symbol, Date, Close)
## # A tsibble: 4 x 3 [!]
## # Key: Symbol [4]
## # Groups: Symbol [4]
## Symbol Date Close
## <chr> <date> <dbl>
## 1 AAPL 2018-10-03 232.
## 2 AMZN 2018-09-04 2040.
## 3 FB 2018-07-25 218.
## 4 GOOG 2018-07-26 1268.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ purrr 1.0.2 ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
url <- "https://raw.githubusercontent.com/silmaxk/DATA624_PredictiveAnalytics/refs/heads/main/Homework%231/tute1.csv"
tute <- read.csv(url)
head(tute)
## Quarter Sales AdBudget GDP
## 1 1981-03-01 1020.2 659.2 251.8
## 2 1981-06-01 889.2 589.0 290.9
## 3 1981-09-01 795.0 512.5 290.8
## 4 1981-12-01 1003.9 614.1 292.4
## 5 1982-03-01 1057.7 647.2 279.1
## 6 1982-06-01 944.4 602.0 254.0
mytimeseries <- tute |>
mutate(Quarter = yearquarter(Quarter)) |>
as_tsibble(index = Quarter)
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +
facet_grid(name ~ ., scales = "free_y")
mytimeseries |>
pivot_longer(-Quarter) |>
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line()
#### There is no clear distinction between the three series
library(USgas)
gas <- us_total %>%
as_tsibble(index = year, key = state)
new_england <- c('Maine','Vermont','New Hampshire', 'Massachusetts', 'Connecticut', 'Rhode Island')
gas %>%
filter(state %in% new_england) %>%
autoplot(y) +
labs(title = "Natural Gas Consumption", subtitle = "New England Area", x = "Year", y = "Consumption")
library(readxl)
tourism <- read_excel('./tourism.xlsx')
head(tourism)
## # A tibble: 6 × 5
## Quarter Region State Purpose Trips
## <chr> <chr> <chr> <chr> <dbl>
## 1 1998-01-01 Adelaide South Australia Business 135.
## 2 1998-04-01 Adelaide South Australia Business 110.
## 3 1998-07-01 Adelaide South Australia Business 166.
## 4 1998-10-01 Adelaide South Australia Business 127.
## 5 1999-01-01 Adelaide South Australia Business 137.
## 6 1999-04-01 Adelaide South Australia Business 200.
tourism_ts <- tourism %>%
mutate(Quarter = yearquarter(Quarter)) %>%
as_tsibble(index = Quarter, key = c('Region', 'Purpose', 'State'))
head(tourism_ts)
## # A tsibble: 6 x 5 [1Q]
## # Key: Region, State, Purpose [1]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
tourism_ts %>%
mutate(avg_trips = mean(Trips, na.rm = TRUE), .by = c(Region, Purpose), .keep = "none") %>%
distinct() %>%
arrange(desc(avg_trips)) %>%
slice(1)
## # A tibble: 1 × 3
## Region Purpose avg_trips
## <chr> <chr> <dbl>
## 1 Sydney Visiting 747.
Explanation: I needed some assistance with this portion and learned that:
mutate(avg_trips = mean(Trips, na.rm = TRUE), .by = c(Region, Purpose))
computes the average trips per combination of Region and
Purpose
distinct()
makes sure that all the unique
combinations are still there
arrange(desc(avg_trips))
sorts the results in
descending order
slice(1)
picks the combination with the highest
average from the rest so we get the result with the
maximum number
Create a new tsibble which combines the Purposes and Regions, and just has total trips by State
total_trips_by_state <- tourism_ts %>%
group_by(State = Region) %>%
summarise(total_trips = sum(Trips, na.rm = TRUE))
head(total_trips_by_state)
## # A tsibble: 6 x 3 [1Q]
## # Key: State [1]
## State Quarter total_trips
## <chr> <qtr> <dbl>
## 1 Adelaide 1998 Q1 659.
## 2 Adelaide 1998 Q2 450.
## 3 Adelaide 1998 Q3 593.
## 4 Adelaide 1998 Q4 524.
## 5 Adelaide 1999 Q1 548.
## 6 Adelaide 1999 Q2 569.
us_employment %>%
filter(Title == "Total Private") %>%
autoplot(Employed)
aus_production %>%
gg_season(Bricks)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
pelt %>%
gg_subseries(Hare)
us_gasoline %>%
gg_lag(Barrels)
PBS %>%
filter(ATC2 == "H02") %>%
ACF(Cost) %>%
autoplot()