Homework#1 - Data624

Author

Michael Robinson

Published

February 9, 2025

2.1

Explore the following four time series: Bricks from aus_production, Lynx from pelt, Close from gafa_stock, Demand from vic_elec. Use ? (or help()) to find out about the data in each series.

library(fpp3)
library(dplyr)
library(lubridate)
library(tsibble)
library(tidyverse)
library(forecast)

?aus_production
?pelt
?gafa_stock
?vic_elec

Ts_bricks <- aus_production %>%
   filter(Bricks == (Bricks)) %>%
  select(Quarter, Bricks)
print(Ts_bricks)
# A tsibble: 198 x 2 [1Q]
   Quarter Bricks
     <qtr>  <dbl>
 1 1956 Q1    189
 2 1956 Q2    204
 3 1956 Q3    208
 4 1956 Q4    197
 5 1957 Q1    187
 6 1957 Q2    214
 7 1957 Q3    227
 8 1957 Q4    222
 9 1958 Q1    199
10 1958 Q2    229
# ℹ 188 more rows
Ts_Lynx <- pelt %>%
  filter(Lynx == (Lynx)) %>%
  select(Year, Lynx)
print(Ts_Lynx)
# A tsibble: 91 x 2 [1Y]
    Year  Lynx
   <dbl> <dbl>
 1  1845 30090
 2  1846 45150
 3  1847 49150
 4  1848 39520
 5  1849 21230
 6  1850  8420
 7  1851  5560
 8  1852  5080
 9  1853 10170
10  1854 19600
# ℹ 81 more rows
Ts_Close <- gafa_stock %>%
  filter(Close == (Close)) %>%
  select(Date, Close)
print(Ts_Close)
# A tsibble: 5,032 x 3 [!]
# Key:       Symbol [4]
   Date       Close Symbol
   <date>     <dbl> <chr> 
 1 2014-01-02  79.0 AAPL  
 2 2014-01-03  77.3 AAPL  
 3 2014-01-06  77.7 AAPL  
 4 2014-01-07  77.1 AAPL  
 5 2014-01-08  77.6 AAPL  
 6 2014-01-09  76.6 AAPL  
 7 2014-01-10  76.1 AAPL  
 8 2014-01-13  76.5 AAPL  
 9 2014-01-14  78.1 AAPL  
10 2014-01-15  79.6 AAPL  
# ℹ 5,022 more rows
Ts_Demand <- vic_elec  %>%
  select(Time, Demand)
print(Ts_Demand)
# A tsibble: 52,608 x 2 [30m] <Australia/Melbourne>
   Time                Demand
   <dttm>               <dbl>
 1 2012-01-01 00:00:00  4383.
 2 2012-01-01 00:30:00  4263.
 3 2012-01-01 01:00:00  4049.
 4 2012-01-01 01:30:00  3878.
 5 2012-01-01 02:00:00  4036.
 6 2012-01-01 02:30:00  3866.
 7 2012-01-01 03:00:00  3694.
 8 2012-01-01 03:30:00  3562.
 9 2012-01-01 04:00:00  3433.
10 2012-01-01 04:30:00  3359.
# ℹ 52,598 more rows
autoplot(Ts_bricks)
Plot variable not specified, automatically selected `.vars = Bricks`

autoplot(Ts_Lynx) 
Plot variable not specified, automatically selected `.vars = Lynx`

autoplot(Ts_Close) 
Plot variable not specified, automatically selected `.vars = Close`

autoplot(Ts_Demand, series = "Electricity Demand") +
  labs(title = "Time Series Plot of Electricity Demand", y = "Demand", x = "Time")
Plot variable not specified, automatically selected `.vars = Demand`
Warning in geom_line(...): Ignoring unknown parameters: `series`

2.2

Use filter() to find what days corresponded to the peak closing price for each of the four stocks in gafa_stock.

PeakClosing <- gafa_stock %>%
  group_by(Symbol)  %>%
  filter(Close == max(Close)) %>%
  select(Symbol,Date,Close) 
print(PeakClosing)
# A tsibble: 4 x 3 [!]
# Key:       Symbol [4]
# Groups:    Symbol [4]
  Symbol Date       Close
  <chr>  <date>     <dbl>
1 AAPL   2018-10-03  232.
2 AMZN   2018-09-04 2040.
3 FB     2018-07-25  218.
4 GOOG   2018-07-26 1268.

2.3

Download the file tute1.csv from the book website, open it in Excel (or some other spreadsheet application), and review its contents. You should find four columns of information. Columns B through D each contain a quarterly series, labelled Sales, AdBudget and GDP. Sales contains the quarterly sales for a small company over the period 1981-2005. AdBudget is the advertising budget and GDP is the gross domestic product. All series have been adjusted for inflation. You can read and view data with the code below, but check out what happens if you don’t include facet_grid()

tute1 <- readr::read_csv("/Users/michaelrobinson/Downloads/tute1.csv")
mytimeseries <- tute1 |>
  mutate(Quarter = yearquarter(Quarter)) |>
  as_tsibble(index = Quarter)
mytimeseries |>
  pivot_longer(-Quarter) |>
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y")

2.4

The USgas package contains data on the demand for natural gas in the US. Install the USgas package. Create a tsibble from us_total with year as the index and state as the key. Plot the annual natural gas consumption by state for the New England area (comprising the states of Maine, Vermont, New Hampshire, Massachusetts, Connecticut and Rhode Island).

library(ggplot2)
library(USgas)

head(usgas)
        date                 process state state_abb      y
1 1973-01-01  Commercial Consumption  U.S.      U.S. 392315
2 1973-01-01 Residential Consumption  U.S.      U.S. 843900
3 1973-02-01  Commercial Consumption  U.S.      U.S. 394281
4 1973-02-01 Residential Consumption  U.S.      U.S. 747331
5 1973-03-01  Commercial Consumption  U.S.      U.S. 310799
6 1973-03-01 Residential Consumption  U.S.      U.S. 648504
USgas_ts <- as_tsibble(us_total, index = year, key = state)

NES<- c("Maine", "Vermont", "New Hampshire", "Massachusetts", "Connecticut", "Rhode Island")
NES_gas <- USgas_ts %>% filter(state %in% NES)

ggplot(NES_gas, aes(x = year, y = y, color = state)) +
  geom_line() +
  labs(title = "New England Annual Natural Gas Consumption ",
       x = "Year",
       y = "Consumption") +
  theme_minimal()

2.5

Download tourism.xlsx from the book website and read it into R using read_excel(), and create a tsibble which is identical to the tourism tsibble from the tsibble package.

library(readxl)

# 5a-b

Tourism_Data <- read_excel("/Users/michaelrobinson/Downloads/tourism.xlsx")%>% 
   mutate(Quarter = yearquarter(Quarter)) %>% 
     as_tsibble(index = "Quarter",  key = c("Region", "State", "Purpose") )

Tourism_Data
# A tsibble: 24,320 x 5 [1Q]
# Key:       Region, State, Purpose [304]
   Quarter Region   State           Purpose  Trips
     <qtr> <chr>    <chr>           <chr>    <dbl>
 1 1998 Q1 Adelaide South Australia Business  135.
 2 1998 Q2 Adelaide South Australia Business  110.
 3 1998 Q3 Adelaide South Australia Business  166.
 4 1998 Q4 Adelaide South Australia Business  127.
 5 1999 Q1 Adelaide South Australia Business  137.
 6 1999 Q2 Adelaide South Australia Business  200.
 7 1999 Q3 Adelaide South Australia Business  169.
 8 1999 Q4 Adelaide South Australia Business  134.
 9 2000 Q1 Adelaide South Australia Business  154.
10 2000 Q2 Adelaide South Australia Business  169.
# ℹ 24,310 more rows
# 5c

max_trip_average <-Tourism_Data %>%
 group_by(Region, Purpose) %>%
  summarise(avgerage_trips = mean(Trips))
head(max_trip_average)
# A tsibble: 6 x 4 [1Q]
# Key:       Region, Purpose [1]
# Groups:    Region [1]
  Region   Purpose  Quarter avgerage_trips
  <chr>    <chr>      <qtr>          <dbl>
1 Adelaide Business 1998 Q1           135.
2 Adelaide Business 1998 Q2           110.
3 Adelaide Business 1998 Q3           166.
4 Adelaide Business 1998 Q4           127.
5 Adelaide Business 1999 Q1           137.
6 Adelaide Business 1999 Q2           200.
# 5d
total_trip_by_state <- Tourism_Data %>%
     select(Quarter, State, Trips) %>% 
     group_by(State) %>% 
     summarise(total_trips = sum(Trips))

total_trip_by_state
# A tsibble: 640 x 3 [1Q]
# Key:       State [8]
   State Quarter total_trips
   <chr>   <qtr>       <dbl>
 1 ACT   1998 Q1        551.
 2 ACT   1998 Q2        416.
 3 ACT   1998 Q3        436.
 4 ACT   1998 Q4        450.
 5 ACT   1999 Q1        379.
 6 ACT   1999 Q2        558.
 7 ACT   1999 Q3        449.
 8 ACT   1999 Q4        595.
 9 ACT   2000 Q1        600.
10 ACT   2000 Q2        557.
# ℹ 630 more rows

2.8

Use the following graphics functions: autoplot(), gg_season(), gg_subseries(), gg_lag(), ACF() and explore features from the following time series: Employed from us_employment, Bricks from aus_production, Hare from pelt, Cost from PBS, and Barrels from us_gasoline.

us_employment %>%
  filter(Title == "Total Private") %>%
  autoplot(Employed) +
  ggtitle("Total Private Employed")

us_employment %>%
  filter(Title == "Total Private") %>%
  gg_season(Employed) +
  ggtitle("Total Private Employed - Season plot")

us_employment %>%
  filter(Title == "Total Private") %>%
  gg_subseries(Employed)+
  ggtitle("Total Private Employed - Subseries plot")

us_employment %>%
  filter(Title == "Total Private") %>%
  gg_lag(Employed)+
  ggtitle("Total Private Employed - Lag plot") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5))

us_employment %>%
   filter(Title == "Total Private") %>%
 ACF(Employed) %>%
 autoplot()+
  ggtitle("Total Private Employed - ACF plot")

aus_production %>%
  autoplot(Bricks) + ggtitle("Bricks Autoplot")
Warning: Removed 20 rows containing missing values or values outside the scale range
(`geom_line()`).

aus_production %>%
  gg_season(Bricks) + ggtitle("Bricks Season Plot")
Warning: Removed 20 rows containing missing values or values outside the scale range
(`geom_line()`).

aus_production %>%
  gg_subseries(Bricks)+ ggtitle("Bricks Subseries plot")
Warning: Removed 5 rows containing missing values or values outside the scale range
(`geom_line()`).

aus_production %>%
  gg_lag(Bricks) + ggtitle("Bricks - Lag plot") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
Warning: Removed 20 rows containing missing values (gg_lag).

aus_production %>%
 ACF(Bricks) %>%
 autoplot() + ggtitle("Bricks- ACF plot")

pelt %>%
  autoplot(Hare) + ggtitle("Hare Autoplot")

pelt %>%
  gg_subseries(Hare)+ ggtitle("Hare Subseries plot")

pelt %>%
  gg_lag(Hare) + ggtitle("Hare - Lag plot") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5))

pelt %>%
 ACF(Hare) %>%
 autoplot() + ggtitle("Hare- ACF plot")

PBS %>%
  filter(ATC2 == "H02") %>% 
  autoplot(Cost) + ggtitle("Cost Autoplot")

PBS %>%
  filter(ATC2 == "H02") %>% 
  gg_season(Cost) + ggtitle("Cost Season Plot")

PBS %>%
  filter(ATC2 == "H02") %>% 
  gg_subseries(Cost)+ ggtitle("Cost Subseries plot")

PBS %>%
  filter(ATC2 == "H02") %>% 
 ACF(Cost) %>%
 autoplot() + ggtitle("Cost- ACF plot")

us_gasoline %>%
  autoplot(Barrels) + ggtitle("Barrels Autoplot")

us_gasoline %>%
  gg_season(Barrels) + ggtitle("Barrels Season Plot")

us_gasoline %>%
  gg_subseries(Barrels)+ ggtitle("Barrels Subseries plot")

us_gasoline %>%
  gg_lag(Barrels) + ggtitle("Barrels - Lag plot") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5))

us_gasoline %>%
 ACF(Barrels) %>%
 autoplot() + ggtitle("Barrels- ACF plot")