DATA 624 HW1 Chapter 2 091221

library(fpp3)

## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --

## v tibble      3.1.4      v tsibble     1.0.1 
## v dplyr       1.0.7      v tsibbledata 0.3.0 
## v tidyr       1.1.3      v feasts      0.2.2 
## v lubridate   1.7.10     v fable       0.3.1 
## v ggplot2     3.3.5

## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date()    masks base::date()
## x dplyr::filter()      masks stats::filter()
## x tsibble::intersect() masks base::intersect()
## x tsibble::interval()  masks lubridate::interval()
## x dplyr::lag()         masks stats::lag()
## x tsibble::setdiff()   masks base::setdiff()
## x tsibble::union()     masks base::union()

library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v readr   2.0.1     v stringr 1.4.0
## v purrr   0.3.4     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date()        masks base::date()
## x dplyr::filter()          masks stats::filter()
## x tsibble::intersect()     masks lubridate::intersect(), base::intersect()
## x tsibble::interval()      masks lubridate::interval()
## x dplyr::lag()             masks stats::lag()
## x tsibble::setdiff()       masks lubridate::setdiff(), base::setdiff()
## x tsibble::union()         masks lubridate::union(), base::union()

Use the help function to explore what the series gafa_stock, PBS, vic_elec and pelt represent.

#?gafa_stock
#Historical stock prices from 2014-2018 for Google, #Amazon, Facebook and Apple. All prices are in $USD


#?PBS
#Monthly Medicare Australia prescription data

head(PBS)

## # A tsibble: 6 x 9 [1M]
## # Key:       Concession, Type, ATC1, ATC2 [1]
##      Month Concession   Type   ATC1  ATC1_desc    ATC2  ATC2_desc  Scripts  Cost
##      <mth> <chr>        <chr>  <chr> <chr>        <chr> <chr>        <dbl> <dbl>
## 1 1991 Jul Concessional Co-pa~ A     Alimentary ~ A01   STOMATOLO~   18228 67877
## 2 1991 Aug Concessional Co-pa~ A     Alimentary ~ A01   STOMATOLO~   15327 57011
## 3 1991 Sep Concessional Co-pa~ A     Alimentary ~ A01   STOMATOLO~   14775 55020
## 4 1991 Oct Concessional Co-pa~ A     Alimentary ~ A01   STOMATOLO~   15380 57222
## 5 1991 Nov Concessional Co-pa~ A     Alimentary ~ A01   STOMATOLO~   14371 52120
## 6 1991 Dec Concessional Co-pa~ A     Alimentary ~ A01   STOMATOLO~   15028 54299

PBS %>% distinct(Type)

## # A tibble: 2 x 1
##   Type       
##   <chr>      
## 1 Co-payments
## 2 Safety net

PBS %>% distinct(Concession)

## # A tibble: 2 x 1
##   Concession  
##   <chr>       
## 1 Concessional
## 2 General

PBS %>% distinct(ATC1)

## # A tibble: 15 x 1
##    ATC1 
##    <chr>
##  1 A    
##  2 B    
##  3 C    
##  4 D    
##  5 G    
##  6 H    
##  7 J    
##  8 L    
##  9 M    
## 10 N    
## 11 P    
## 12 R    
## 13 S    
## 14 V    
## 15 Z

PBS %>% distinct(ATC2)

## # A tibble: 84 x 1
##    ATC2 
##    <chr>
##  1 A01  
##  2 A02  
##  3 A03  
##  4 A04  
##  5 A05  
##  6 A06  
##  7 A07  
##  8 A09  
##  9 A10  
## 10 A11  
## # ... with 74 more rows

#?vic_elec
#Half-hourly electricity demand for Victoria, Australia
head(vic_elec)

## # A tsibble: 6 x 5 [30m] <Australia/Melbourne>
##   Time                Demand Temperature Date       Holiday
##   <dttm>               <dbl>       <dbl> <date>     <lgl>  
## 1 2012-01-01 00:00:00  4383.        21.4 2012-01-01 TRUE   
## 2 2012-01-01 00:30:00  4263.        21.0 2012-01-01 TRUE   
## 3 2012-01-01 01:00:00  4049.        20.7 2012-01-01 TRUE   
## 4 2012-01-01 01:30:00  3878.        20.6 2012-01-01 TRUE   
## 5 2012-01-01 02:00:00  4036.        20.4 2012-01-01 TRUE   
## 6 2012-01-01 02:30:00  3866.        20.2 2012-01-01 TRUE

str(vic_elec)

## tbl_ts [52,608 x 5] (S3: tbl_ts/tbl_df/tbl/data.frame)
##  $ Time       : POSIXct[1:52608], format: "2012-01-01 00:00:00" "2012-01-01 00:30:00" ...
##  $ Demand     : num [1:52608] 4383 4263 4049 3878 4036 ...
##  $ Temperature: num [1:52608] 21.4 21.1 20.7 20.6 20.4 ...
##  $ Date       : Date[1:52608], format: "2012-01-01" "2012-01-01" ...
##  $ Holiday    : logi [1:52608] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  - attr(*, "key")= tibble [1 x 1] (S3: tbl_df/tbl/data.frame)
##   ..$ .rows: list<int> [1:1] 
##   .. ..$ : int [1:52608] 1 2 3 4 5 6 7 8 9 10 ...
##   .. ..@ ptype: int(0) 
##  - attr(*, "index")= chr "Time"
##   ..- attr(*, "ordered")= logi TRUE
##  - attr(*, "index2")= chr "Time"
##  - attr(*, "interval")= interval [1:1] 30m
##   ..@ .regular: logi TRUE

#?pelt
#Pelt trading records
head(pelt)

## # A tsibble: 6 x 3 [1Y]
##    Year  Hare  Lynx
##   <dbl> <dbl> <dbl>
## 1  1845 19580 30090
## 2  1846 19600 45150
## 3  1847 19610 49150
## 4  1848 11990 39520
## 5  1849 28040 21230
## 6  1850 58000  8420

str(pelt)

## tbl_ts [91 x 3] (S3: tbl_ts/tbl_df/tbl/data.frame)
##  $ Year: num [1:91] 1845 1846 1847 1848 1849 ...
##  $ Hare: num [1:91] 19580 19600 19610 11990 28040 ...
##  $ Lynx: num [1:91] 30090 45150 49150 39520 21230 ...
##  - attr(*, "key")= tibble [1 x 1] (S3: tbl_df/tbl/data.frame)
##   ..$ .rows: list<int> [1:1] 
##   .. ..$ : int [1:91] 1 2 3 4 5 6 7 8 9 10 ...
##   .. ..@ ptype: int(0) 
##  - attr(*, "index")= chr "Year"
##   ..- attr(*, "ordered")= logi TRUE
##  - attr(*, "index2")= chr "Year"
##  - attr(*, "interval")= interval [1:1] 1Y
##   ..@ .regular: logi TRUE

Use autoplot() to plot some of the series in these data sets.

AAPL<-gafa_stock %>%
  filter(Symbol=="AAPL")
  

autoplot(AAPL,Adj_Close) + labs(y="$", title="APPLE ADJ CLOSE")

AMZN<-gafa_stock %>%
  filter(Symbol=="AMZN")

autoplot(AMZN,Adj_Close) + labs(y="$", title="AMAZON ADJ CLOSE")

###########################
#PBS
#following the example in the book

a11<-PBS %>%
  filter (ATC2=="A11")%>%
  select(Month, Concession, Type, Cost) %>%
  summarise(TotalC=sum(Cost))%>%
  mutate (Cost=TotalC/1e6)

 autoplot(a11,Cost)

 ################################vic_elect
 autoplot(vic_elec,Demand)

autoplot(vic_elec,Temperature)

################################pelt
autoplot(pelt,Hare)

autoplot(pelt,Lynx)

What is the time interval of each series?

#gafa_stock is stock trading days, weekends excluded
##################################################33
#PBS is monthly
#################################################
#vic_elec is 30 minute intervals
########################################
#pelt is by year

Use filter() to find what days corresponded to the peak closing price for each of the four stocks in gafa_stock.

AAPL %>%
  filter(Close==max(Close))

## # A tsibble: 1 x 8 [!]
## # Key:       Symbol [1]
##   Symbol Date        Open  High   Low Close Adj_Close   Volume
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
## 1 AAPL   2018-10-03  230.  233.  230.  232.      230. 28654800

AMZN %>%
  filter(Close==max(Close))

## # A tsibble: 1 x 8 [!]
## # Key:       Symbol [1]
##   Symbol Date        Open  High   Low Close Adj_Close  Volume
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>   <dbl>
## 1 AMZN   2018-09-04 2026. 2050.  2013 2040.     2040. 5721100

FB<-gafa_stock %>%
  filter(Symbol=="FB")

GOOG<-gafa_stock %>%
  filter(Symbol=="GOOG")

FB %>%
  filter(Close==max(Close))

## # A tsibble: 1 x 8 [!]
## # Key:       Symbol [1]
##   Symbol Date        Open  High   Low Close Adj_Close   Volume
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
## 1 FB     2018-07-25  216.  219.  214.  218.      218. 58954200

GOOG %>%
  filter(Close==max(Close))

## # A tsibble: 1 x 8 [!]
## # Key:       Symbol [1]
##   Symbol Date        Open  High   Low Close Adj_Close  Volume
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>   <dbl>
## 1 GOOG   2018-07-26  1251 1270. 1249. 1268.     1268. 2405600

ANSWER TO 2.

APPL Max close on 10/3/18 at 232 AMZN Max close on 9/4/18 at 2040 GOOG Max close on 7/26/18 at 1268 FB Max ckise on 7/25/18 at 218

Download the file tute1.csv from the book website, open it in Excel (or some other spreadsheet application), and review its contents. You should find four columns of information. Columns B through D each contain a quarterly series, labelled Sales, AdBudget and GDP. Sales contains the quarterly sales for a small company over the period 1981-2005. AdBudget is the advertising budget and GDP is the gross domestic product. All series have been adjusted for inflation.

You can read the data into R with the following script:

tute1 <- readr::read_csv("tute1.csv")

## Rows: 100 Columns: 4

## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): Quarter
## dbl (3): Sales, AdBudget, GDP

## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

head(tute1)

## # A tibble: 6 x 4
##   Quarter   Sales AdBudget   GDP
##   <chr>     <dbl>    <dbl> <dbl>
## 1 3/1/1981  1020.     659.  252.
## 2 6/1/1981   889.     589   291.
## 3 9/1/1981   795      512.  291.
## 4 12/1/1981 1004.     614.  292.
## 5 3/1/1982  1058.     647.  279.
## 6 6/1/1982   944.     602   254

Convert the data to time series

mytimeseries <- tute1 %>%
  mutate(Quarter = yearmonth(Quarter)) %>%
  as_tsibble(index = Quarter)

Construct time series plots of each of the three series

mytimeseries %>%
  pivot_longer(-Quarter) %>%
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = "free_y")

mytimeseries %>%
  pivot_longer(-Quarter) %>%
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line()

Check what happens when you don’t include facet_grid().

COMMENT: facet_grid forms a matrix of panels defined by row and column var

The USgas package contains data on the demand for natural gas in the US.

Install the USgas package.

#Previously installed
library(USgas)
head(us_total)

##   year   state      y
## 1 1997 Alabama 324158
## 2 1998 Alabama 329134
## 3 1999 Alabama 337270
## 4 2000 Alabama 353614
## 5 2001 Alabama 332693
## 6 2002 Alabama 379343

Create a tsibble from us_total with year as the index and state as the key.

mytts2 <- us_total %>%
    as_tsibble(key=state, index = year)

mytts2

## # A tsibble: 1,266 x 3 [1Y]
## # Key:       state [53]
##     year state        y
##    <int> <chr>    <int>
##  1  1997 Alabama 324158
##  2  1998 Alabama 329134
##  3  1999 Alabama 337270
##  4  2000 Alabama 353614
##  5  2001 Alabama 332693
##  6  2002 Alabama 379343
##  7  2003 Alabama 350345
##  8  2004 Alabama 382367
##  9  2005 Alabama 353156
## 10  2006 Alabama 391093
## # ... with 1,256 more rows

Plot the annual natural gas consumption by state for the New England area (comprising the states of Maine, Vermont, New Hampshire, Massachusetts, Connecticut and Rhode Island).

New_England<-mytts2%>%
  filter(state=='Maine'|state=='Vermont'|state=='NewHampshire' | state=='Massachusetts' | state== 'Connecticut' | state=='Rhode Island')

New_England %>%
  
  ggplot(aes(x =year, y = y, colour = state)) +
  geom_line() +
  facet_grid(state ~ ., scales = "free_y")

Download tourism.xlsx from the book website and read it into R using readxl::read_excel().

tourism1<-readxl::read_excel("tourism.xlsx")
head(tourism1)

## # A tibble: 6 x 5
##   Quarter    Region         State           Purpose   Trips
##   <chr>      <chr>          <chr>           <chr>     <dbl>
## 1 1998-01-01 Adelaide       South Australia Business 135.  
## 2 1998-01-01 Adelaide       South Australia Holiday  224.  
## 3 1998-01-01 Adelaide       South Australia Other     58.4 
## 4 1998-01-01 Adelaide       South Australia Visiting 242.  
## 5 1998-01-01 Adelaide Hills South Australia Business   0   
## 6 1998-01-01 Adelaide Hills South Australia Holiday    6.81

Create a tsibble which is identical to the tourism tsibble from the tsibble package.

library(tsibble)
head(tourism)

## # A tsibble: 6 x 5 [1Q]
## # Key:       Region, State, Purpose [1]
##   Quarter Region   State           Purpose  Trips
##     <qtr> <chr>    <chr>           <chr>    <dbl>
## 1 1998 Q1 Adelaide South Australia Business  135.
## 2 1998 Q2 Adelaide South Australia Business  110.
## 3 1998 Q3 Adelaide South Australia Business  166.
## 4 1998 Q4 Adelaide South Australia Business  127.
## 5 1999 Q1 Adelaide South Australia Business  137.
## 6 1999 Q2 Adelaide South Australia Business  200.

class(tourism)

## [1] "tbl_ts"     "tbl_df"     "tbl"        "data.frame"

class(tourism1)

## [1] "tbl_df"     "tbl"        "data.frame"

tourism2<-tourism1 %>%
  mutate(Quarter = yearquarter(Quarter))%>%
  as_tsibble(key=c(Region, State,Purpose),
             index=Quarter)


class(tourism2)

## [1] "tbl_ts"     "tbl_df"     "tbl"        "data.frame"

Find what combination of Region and Purpose had the maximum number of overnight trips on average.

tourism2 %>%
  filter(Trips==max(Trips))

## # A tsibble: 1 x 5 [1Q]
## # Key:       Region, State, Purpose [1]
##   Quarter Region    State    Purpose  Trips
##     <qtr> <chr>     <chr>    <chr>    <dbl>
## 1 2017 Q4 Melbourne Victoria Visiting  985.

(head(tourism2 %>% as_tibble() %>% group_by(Region,Purpose) %>% dplyr::summarise(maxTrips = max(Trips)) %>% arrange(desc(maxTrips))))

## `summarise()` has grouped output by 'Region'. You can override using the `.groups` argument.

## # A tibble: 6 x 3
## # Groups:   Region [4]
##   Region          Purpose  maxTrips
##   <chr>           <chr>       <dbl>
## 1 Melbourne       Visiting     985.
## 2 Sydney          Business     948.
## 3 Sydney          Visiting     921.
## 4 South Coast     Holiday      915.
## 5 North Coast NSW Holiday      906.
## 6 Sydney          Holiday      828.

ANSWER TO c:

Melbourne , Visiting had the highest trips 985.3

I do have an interpretation issue regarding this question. Perhaps, wording should be " maximum number of overnight trips on average", and exclude on average. There is only one occurrence of region, purpose per quarter. Unless the intent is to ignore time and average over quarters?
_______________________

Create a new tsibble which combines the Purposes and Regions, and just has total trips by State.

StateT<-tourism2%>% group_by(State)%>%summarize(TotTrips=sum(Trips))
StateT %>% autoplot(.vars=TotTrips)+
facet_grid(State ~ ., scales = "free_y")

head(StateT)

## # A tsibble: 6 x 3 [1Q]
## # Key:       State [1]
##   State Quarter TotTrips
##   <chr>   <qtr>    <dbl>
## 1 ACT   1998 Q1     551.
## 2 ACT   1998 Q2     416.
## 3 ACT   1998 Q3     436.
## 4 ACT   1998 Q4     450.
## 5 ACT   1999 Q1     379.
## 6 ACT   1999 Q2     558.

Monthly Australian retail data is provided in aus_retail. Select one of the time series as follows (but choose your own seed value):

aus_retail %>% distinct(Industry)

## # A tibble: 20 x 1
##    Industry                                                         
##    <chr>                                                            
##  1 Cafes, restaurants and catering services                         
##  2 Cafes, restaurants and takeaway food services                    
##  3 Clothing retailing                                               
##  4 Clothing, footwear and personal accessory retailing              
##  5 Department stores                                                
##  6 Electrical and electronic goods retailing                        
##  7 Food retailing                                                   
##  8 Footwear and other personal accessory retailing                  
##  9 Furniture, floor coverings, houseware and textile goods retailing
## 10 Hardware, building and garden supplies retailing                 
## 11 Household goods retailing                                        
## 12 Liquor retailing                                                 
## 13 Newspaper and book retailing                                     
## 14 Other recreational goods retailing                               
## 15 Other retailing                                                  
## 16 Other retailing n.e.c.                                           
## 17 Other specialised food retailing                                 
## 18 Pharmaceutical, cosmetic and toiletry goods retailing            
## 19 Supermarket and grocery stores                                   
## 20 Takeaway food services

set.seed(1111111)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

myseries %>% distinct(Industry)

## # A tibble: 1 x 1
##   Industry                    
##   <chr>                       
## 1 Newspaper and book retailing

head(myseries)

## # A tsibble: 6 x 5 [1M]
## # Key:       State, Industry [1]
##   State             Industry                     `Series ID`    Month Turnover
##   <chr>             <chr>                        <chr>          <mth>    <dbl>
## 1 Western Australia Newspaper and book retailing A3349822A   1982 Apr      9.7
## 2 Western Australia Newspaper and book retailing A3349822A   1982 May     11  
## 3 Western Australia Newspaper and book retailing A3349822A   1982 Jun     10.7
## 4 Western Australia Newspaper and book retailing A3349822A   1982 Jul      9  
## 5 Western Australia Newspaper and book retailing A3349822A   1982 Aug      9.1
## 6 Western Australia Newspaper and book retailing A3349822A   1982 Sep     10

Explore your chosen retail time series using the following functions:

autoplot(), gg_season(), gg_subseries(), gg_lag(),

ACF() %>% autoplot()

Can you spot any seasonality, cyclicity and trend? What do you learn about the series?

str(myseries)

## tbl_ts [441 x 5] (S3: tbl_ts/tbl_df/tbl/data.frame)
##  $ State    : chr [1:441] "Western Australia" "Western Australia" "Western Australia" "Western Australia" ...
##  $ Industry : chr [1:441] "Newspaper and book retailing" "Newspaper and book retailing" "Newspaper and book retailing" "Newspaper and book retailing" ...
##  $ Series ID: chr [1:441] "A3349822A" "A3349822A" "A3349822A" "A3349822A" ...
##  $ Month    : mth [1:441] 1982 Apr, 1982 May, 1982 Jun, 1982 Jul, 1982 Aug, 1982 Sep...
##  $ Turnover : num [1:441] 9.7 11 10.7 9 9.1 10 7.7 8.4 11.8 7.4 ...
##  - attr(*, "key")= tibble [1 x 3] (S3: tbl_df/tbl/data.frame)
##   ..$ State   : chr "Western Australia"
##   ..$ Industry: chr "Newspaper and book retailing"
##   ..$ .rows   : list<int> [1:1] 
##   .. ..$ : int [1:441] 1 2 3 4 5 6 7 8 9 10 ...
##   .. ..@ ptype: int(0) 
##   ..- attr(*, ".drop")= logi TRUE
##  - attr(*, "index")= chr "Month"
##   ..- attr(*, "ordered")= logi TRUE
##  - attr(*, "index2")= chr "Month"
##  - attr(*, "interval")= interval [1:1] 1M
##   ..@ .regular: logi TRUE

autoplot(myseries,.vars=Turnover)

A quick look at this timeseries shows an upward trend, with possible yearly seasonality and also possible a 10 year cyclicity. Let’s take a closer look at seasonality. Note the downward trend in 2010.

gg_season(myseries,y=Turnover)

There is a yearly seasonality marked by a sharp increase in retail trade turnover in December.

gg_subseries(myseries,y=Turnover)

This subseries plot also depicts the seasonal changes, increase in December. You can see the downward trend starting in 2010 as well.

gg_lag(myseries,y=Turnover)

The lag plot shows positive relationship at lag 1, and lag2, which reflect the seasonality.

ACF(myseries,Turnover) %>% autoplot()

Note the decrease in ACF as the lags increase due to the trend and the scallop due to the yearly seasonality.

DATA 624 HW1 Chapter 2 091221

Lisa Szydziak

9/3/2021