DATA 624: Predictive Analytics

Evan McLaughlin

Homework Chapter 2

library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.3
## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.0 ──
## ✔ tibble      3.2.1     ✔ tsibble     1.1.5
## ✔ dplyr       1.1.3     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.0     ✔ feasts      0.3.2
## ✔ lubridate   1.9.3     ✔ fable       0.3.4
## ✔ ggplot2     3.5.1     ✔ fabletools  0.4.2
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tsibble' was built under R version 4.3.3
## Warning: package 'tsibbledata' was built under R version 4.3.3
## Warning: package 'feasts' was built under R version 4.3.3
## Warning: package 'fabletools' was built under R version 4.3.3
## Warning: package 'fable' was built under R version 4.3.3
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0     ✔ readr   2.1.4
## ✔ purrr   1.0.2     ✔ stringr 1.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()     masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(tsibble)
library(fable)
library(ggplot2)
library(lattice)
library(scales)
## Warning: package 'scales' was built under R version 4.3.3
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor

#2.1 Bricks: quarterly interval Lynx: yearly interval Close: Trading days Demand: half-hourly interval

#2.2

gafa_stock |>
  group_by(Symbol) |>
  filter(Close == max(Close)) |>
  select(Symbol, Date, Close) |>
  arrange(Symbol)
## # A tsibble: 4 x 3 [!]
## # Key:       Symbol [4]
## # Groups:    Symbol [4]
##   Symbol Date       Close
##   <chr>  <date>     <dbl>
## 1 AAPL   2018-10-03  232.
## 2 AMZN   2018-09-04 2040.
## 3 FB     2018-07-25  218.
## 4 GOOG   2018-07-26 1268.

#2.3

Removing facet_grid() from the code places all the plots on the same axes, which is somewhat problematic given the varying ranges of each of the variables.

#Read in data and review

tute1 <- read_csv("https://raw.githubusercontent.com/evanmclaughlin/DATA624/main/tute1.csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (3): Sales, AdBudget, GDP
## date (1): Quarter
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(tute1)
## # A tibble: 6 × 4
##   Quarter    Sales AdBudget   GDP
##   <date>     <dbl>    <dbl> <dbl>
## 1 1981-03-01 1020.     659.  252.
## 2 1981-06-01  889.     589   291.
## 3 1981-09-01  795      512.  291.
## 4 1981-12-01 1004.     614.  292.
## 5 1982-03-01 1058.     647.  279.
## 6 1982-06-01  944.     602   254
# convert to time series
mytimeseries <- tute1 |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(index = Quarter)

# construct time series plots
mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, color = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y")

# remove facet_grid()

mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, color = name)) +
  geom_line()

#2.4

library(USgas)
## Warning: package 'USgas' was built under R version 4.3.3
tsibble1 <- as_tsibble(us_total, index = year, key = state)
ne_states <- c('Maine', 'Vermont', 'New Hampshire', 'Massachusetts', 'Connecticut','Rhode Island')
ne_gas <- tsibble1 %>%
  filter(state %in% ne_states)

head(usgas)
##         date                 process state state_abb      y
## 1 1973-01-01  Commercial Consumption  U.S.      U.S. 392315
## 2 1973-01-01 Residential Consumption  U.S.      U.S. 843900
## 3 1973-02-01  Commercial Consumption  U.S.      U.S. 394281
## 4 1973-02-01 Residential Consumption  U.S.      U.S. 747331
## 5 1973-03-01  Commercial Consumption  U.S.      U.S. 310799
## 6 1973-03-01 Residential Consumption  U.S.      U.S. 648504
autoplot(ne_gas, y) +
  labs(title = "Natural Gas Consumption among New England States by Year",
       x = "Year",
       y = "Gas Demand") +
  theme_minimal()+  scale_y_continuous(labels = comma)

#2.5

library(readxl)
link1 <- "https://github.com/evanmclaughlin/DATA624/raw/main/tourism.xlsx"
download.file(link1, destfile = "tourism.xlsx", mode = "wb")
tourism <- read_excel("tourism.xlsx")
head(tourism)
## # A tibble: 6 × 5
##   Quarter    Region   State           Purpose  Trips
##   <chr>      <chr>    <chr>           <chr>    <dbl>
## 1 1998-01-01 Adelaide South Australia Business  135.
## 2 1998-04-01 Adelaide South Australia Business  110.
## 3 1998-07-01 Adelaide South Australia Business  166.
## 4 1998-10-01 Adelaide South Australia Business  127.
## 5 1999-01-01 Adelaide South Australia Business  137.
## 6 1999-04-01 Adelaide South Australia Business  200.

#2.6 The interesting trends in the images below relate mostly to large seasonality swings in arrivals from the UK, with Q2 and Q3, seeing large drops most years. It’s also notable that arrivals across all countries, except Japan, are consistently up over the years.

library(scales)
# plot arrivals
autoplot(aus_arrivals) +
  labs(title = "Quarterly Arrivals to Australia",
       x = "Year", y = "Arrivals") +
  facet_wrap(~Origin, scales = "free_y")+  scale_y_continuous(labels = comma)
## Plot variable not specified, automatically selected `.vars = Arrivals`

# employ gg_season to examine seasonality
aus_arrivals |>
  gg_season(Arrivals) +
  labs(title = "Seasonality of Arrivals to AUS")+scale_y_continuous(labels = scales::comma)

# employ gg_subseries to examine quarterly subseries
aus_arrivals |>
  gg_subseries(Arrivals) +
  labs(title = "Subseries - Quarterly Arrivals")+scale_y_continuous(labels = scales::comma)

```