library(fpp3)
## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --
## v tibble 3.1.4 v tsibble 1.0.1
## v dplyr 1.0.7 v tsibbledata 0.3.0
## v tidyr 1.1.3 v feasts 0.2.2
## v lubridate 1.7.10 v fable 0.3.1
## v ggplot2 3.3.5
## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x tsibble::intersect() masks base::intersect()
## x tsibble::interval() masks lubridate::interval()
## x dplyr::lag() masks stats::lag()
## x tsibble::setdiff() masks base::setdiff()
## x tsibble::union() masks base::union()
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v readr 2.0.1 v stringr 1.4.0
## v purrr 0.3.4 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x tsibble::intersect() masks lubridate::intersect(), base::intersect()
## x tsibble::interval() masks lubridate::interval()
## x dplyr::lag() masks stats::lag()
## x tsibble::setdiff() masks lubridate::setdiff(), base::setdiff()
## x tsibble::union() masks lubridate::union(), base::union()
#?gafa_stock
#Historical stock prices from 2014-2018 for Google, #Amazon, Facebook and Apple. All prices are in $USD
#?PBS
#Monthly Medicare Australia prescription data
head(PBS)
## # A tsibble: 6 x 9 [1M]
## # Key: Concession, Type, ATC1, ATC2 [1]
## Month Concession Type ATC1 ATC1_desc ATC2 ATC2_desc Scripts Cost
## <mth> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 1991 Jul Concessional Co-pa~ A Alimentary ~ A01 STOMATOLO~ 18228 67877
## 2 1991 Aug Concessional Co-pa~ A Alimentary ~ A01 STOMATOLO~ 15327 57011
## 3 1991 Sep Concessional Co-pa~ A Alimentary ~ A01 STOMATOLO~ 14775 55020
## 4 1991 Oct Concessional Co-pa~ A Alimentary ~ A01 STOMATOLO~ 15380 57222
## 5 1991 Nov Concessional Co-pa~ A Alimentary ~ A01 STOMATOLO~ 14371 52120
## 6 1991 Dec Concessional Co-pa~ A Alimentary ~ A01 STOMATOLO~ 15028 54299
PBS %>% distinct(Type)
## # A tibble: 2 x 1
## Type
## <chr>
## 1 Co-payments
## 2 Safety net
PBS %>% distinct(Concession)
## # A tibble: 2 x 1
## Concession
## <chr>
## 1 Concessional
## 2 General
PBS %>% distinct(ATC1)
## # A tibble: 15 x 1
## ATC1
## <chr>
## 1 A
## 2 B
## 3 C
## 4 D
## 5 G
## 6 H
## 7 J
## 8 L
## 9 M
## 10 N
## 11 P
## 12 R
## 13 S
## 14 V
## 15 Z
PBS %>% distinct(ATC2)
## # A tibble: 84 x 1
## ATC2
## <chr>
## 1 A01
## 2 A02
## 3 A03
## 4 A04
## 5 A05
## 6 A06
## 7 A07
## 8 A09
## 9 A10
## 10 A11
## # ... with 74 more rows
#?vic_elec
#Half-hourly electricity demand for Victoria, Australia
head(vic_elec)
## # A tsibble: 6 x 5 [30m] <Australia/Melbourne>
## Time Demand Temperature Date Holiday
## <dttm> <dbl> <dbl> <date> <lgl>
## 1 2012-01-01 00:00:00 4383. 21.4 2012-01-01 TRUE
## 2 2012-01-01 00:30:00 4263. 21.0 2012-01-01 TRUE
## 3 2012-01-01 01:00:00 4049. 20.7 2012-01-01 TRUE
## 4 2012-01-01 01:30:00 3878. 20.6 2012-01-01 TRUE
## 5 2012-01-01 02:00:00 4036. 20.4 2012-01-01 TRUE
## 6 2012-01-01 02:30:00 3866. 20.2 2012-01-01 TRUE
str(vic_elec)
## tbl_ts [52,608 x 5] (S3: tbl_ts/tbl_df/tbl/data.frame)
## $ Time : POSIXct[1:52608], format: "2012-01-01 00:00:00" "2012-01-01 00:30:00" ...
## $ Demand : num [1:52608] 4383 4263 4049 3878 4036 ...
## $ Temperature: num [1:52608] 21.4 21.1 20.7 20.6 20.4 ...
## $ Date : Date[1:52608], format: "2012-01-01" "2012-01-01" ...
## $ Holiday : logi [1:52608] TRUE TRUE TRUE TRUE TRUE TRUE ...
## - attr(*, "key")= tibble [1 x 1] (S3: tbl_df/tbl/data.frame)
## ..$ .rows: list<int> [1:1]
## .. ..$ : int [1:52608] 1 2 3 4 5 6 7 8 9 10 ...
## .. ..@ ptype: int(0)
## - attr(*, "index")= chr "Time"
## ..- attr(*, "ordered")= logi TRUE
## - attr(*, "index2")= chr "Time"
## - attr(*, "interval")= interval [1:1] 30m
## ..@ .regular: logi TRUE
#?pelt
#Pelt trading records
head(pelt)
## # A tsibble: 6 x 3 [1Y]
## Year Hare Lynx
## <dbl> <dbl> <dbl>
## 1 1845 19580 30090
## 2 1846 19600 45150
## 3 1847 19610 49150
## 4 1848 11990 39520
## 5 1849 28040 21230
## 6 1850 58000 8420
str(pelt)
## tbl_ts [91 x 3] (S3: tbl_ts/tbl_df/tbl/data.frame)
## $ Year: num [1:91] 1845 1846 1847 1848 1849 ...
## $ Hare: num [1:91] 19580 19600 19610 11990 28040 ...
## $ Lynx: num [1:91] 30090 45150 49150 39520 21230 ...
## - attr(*, "key")= tibble [1 x 1] (S3: tbl_df/tbl/data.frame)
## ..$ .rows: list<int> [1:1]
## .. ..$ : int [1:91] 1 2 3 4 5 6 7 8 9 10 ...
## .. ..@ ptype: int(0)
## - attr(*, "index")= chr "Year"
## ..- attr(*, "ordered")= logi TRUE
## - attr(*, "index2")= chr "Year"
## - attr(*, "interval")= interval [1:1] 1Y
## ..@ .regular: logi TRUE
AAPL<-gafa_stock %>%
filter(Symbol=="AAPL")
autoplot(AAPL,Adj_Close) + labs(y="$", title="APPLE ADJ CLOSE")
AMZN<-gafa_stock %>%
filter(Symbol=="AMZN")
autoplot(AMZN,Adj_Close) + labs(y="$", title="AMAZON ADJ CLOSE")
###########################
#PBS
#following the example in the book
a11<-PBS %>%
filter (ATC2=="A11")%>%
select(Month, Concession, Type, Cost) %>%
summarise(TotalC=sum(Cost))%>%
mutate (Cost=TotalC/1e6)
autoplot(a11,Cost)
################################vic_elect
autoplot(vic_elec,Demand)
autoplot(vic_elec,Temperature)
################################pelt
autoplot(pelt,Hare)
autoplot(pelt,Lynx)
#gafa_stock is stock trading days, weekends excluded
##################################################33
#PBS is monthly
#################################################
#vic_elec is 30 minute intervals
########################################
#pelt is by year
AAPL %>%
filter(Close==max(Close))
## # A tsibble: 1 x 8 [!]
## # Key: Symbol [1]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2018-10-03 230. 233. 230. 232. 230. 28654800
AMZN %>%
filter(Close==max(Close))
## # A tsibble: 1 x 8 [!]
## # Key: Symbol [1]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AMZN 2018-09-04 2026. 2050. 2013 2040. 2040. 5721100
FB<-gafa_stock %>%
filter(Symbol=="FB")
GOOG<-gafa_stock %>%
filter(Symbol=="GOOG")
FB %>%
filter(Close==max(Close))
## # A tsibble: 1 x 8 [!]
## # Key: Symbol [1]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 FB 2018-07-25 216. 219. 214. 218. 218. 58954200
GOOG %>%
filter(Close==max(Close))
## # A tsibble: 1 x 8 [!]
## # Key: Symbol [1]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 GOOG 2018-07-26 1251 1270. 1249. 1268. 1268. 2405600
ANSWER TO 2.
APPL Max close on 10/3/18 at 232 AMZN Max close on 9/4/18 at 2040 GOOG Max close on 7/26/18 at 1268 FB Max ckise on 7/25/18 at 218
Download the file tute1.csv from the book website, open it in Excel (or some other spreadsheet application), and review its contents. You should find four columns of information. Columns B through D each contain a quarterly series, labelled Sales, AdBudget and GDP. Sales contains the quarterly sales for a small company over the period 1981-2005. AdBudget is the advertising budget and GDP is the gross domestic product. All series have been adjusted for inflation.
tute1 <- readr::read_csv("tute1.csv")
## Rows: 100 Columns: 4
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): Quarter
## dbl (3): Sales, AdBudget, GDP
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(tute1)
## # A tibble: 6 x 4
## Quarter Sales AdBudget GDP
## <chr> <dbl> <dbl> <dbl>
## 1 3/1/1981 1020. 659. 252.
## 2 6/1/1981 889. 589 291.
## 3 9/1/1981 795 512. 291.
## 4 12/1/1981 1004. 614. 292.
## 5 3/1/1982 1058. 647. 279.
## 6 6/1/1982 944. 602 254
mytimeseries <- tute1 %>%
mutate(Quarter = yearmonth(Quarter)) %>%
as_tsibble(index = Quarter)
mytimeseries %>%
pivot_longer(-Quarter) %>%
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line() +
facet_grid(name ~ ., scales = "free_y")
mytimeseries %>%
pivot_longer(-Quarter) %>%
ggplot(aes(x = Quarter, y = value, colour = name)) +
geom_line()
Check what happens when you don’t include facet_grid().
COMMENT: facet_grid forms a matrix of panels defined by row and column var
#Previously installed
library(USgas)
head(us_total)
## year state y
## 1 1997 Alabama 324158
## 2 1998 Alabama 329134
## 3 1999 Alabama 337270
## 4 2000 Alabama 353614
## 5 2001 Alabama 332693
## 6 2002 Alabama 379343
mytts2 <- us_total %>%
as_tsibble(key=state, index = year)
mytts2
## # A tsibble: 1,266 x 3 [1Y]
## # Key: state [53]
## year state y
## <int> <chr> <int>
## 1 1997 Alabama 324158
## 2 1998 Alabama 329134
## 3 1999 Alabama 337270
## 4 2000 Alabama 353614
## 5 2001 Alabama 332693
## 6 2002 Alabama 379343
## 7 2003 Alabama 350345
## 8 2004 Alabama 382367
## 9 2005 Alabama 353156
## 10 2006 Alabama 391093
## # ... with 1,256 more rows
New_England<-mytts2%>%
filter(state=='Maine'|state=='Vermont'|state=='NewHampshire' | state=='Massachusetts' | state== 'Connecticut' | state=='Rhode Island')
New_England %>%
ggplot(aes(x =year, y = y, colour = state)) +
geom_line() +
facet_grid(state ~ ., scales = "free_y")
tourism1<-readxl::read_excel("tourism.xlsx")
head(tourism1)
## # A tibble: 6 x 5
## Quarter Region State Purpose Trips
## <chr> <chr> <chr> <chr> <dbl>
## 1 1998-01-01 Adelaide South Australia Business 135.
## 2 1998-01-01 Adelaide South Australia Holiday 224.
## 3 1998-01-01 Adelaide South Australia Other 58.4
## 4 1998-01-01 Adelaide South Australia Visiting 242.
## 5 1998-01-01 Adelaide Hills South Australia Business 0
## 6 1998-01-01 Adelaide Hills South Australia Holiday 6.81
library(tsibble)
head(tourism)
## # A tsibble: 6 x 5 [1Q]
## # Key: Region, State, Purpose [1]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
class(tourism)
## [1] "tbl_ts" "tbl_df" "tbl" "data.frame"
class(tourism1)
## [1] "tbl_df" "tbl" "data.frame"
tourism2<-tourism1 %>%
mutate(Quarter = yearquarter(Quarter))%>%
as_tsibble(key=c(Region, State,Purpose),
index=Quarter)
class(tourism2)
## [1] "tbl_ts" "tbl_df" "tbl" "data.frame"
tourism2 %>%
filter(Trips==max(Trips))
## # A tsibble: 1 x 5 [1Q]
## # Key: Region, State, Purpose [1]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 2017 Q4 Melbourne Victoria Visiting 985.
(head(tourism2 %>% as_tibble() %>% group_by(Region,Purpose) %>% dplyr::summarise(maxTrips = max(Trips)) %>% arrange(desc(maxTrips))))
## `summarise()` has grouped output by 'Region'. You can override using the `.groups` argument.
## # A tibble: 6 x 3
## # Groups: Region [4]
## Region Purpose maxTrips
## <chr> <chr> <dbl>
## 1 Melbourne Visiting 985.
## 2 Sydney Business 948.
## 3 Sydney Visiting 921.
## 4 South Coast Holiday 915.
## 5 North Coast NSW Holiday 906.
## 6 Sydney Holiday 828.
ANSWER TO c:
Melbourne , Visiting had the highest trips 985.3
I do have an interpretation issue regarding this question. Perhaps, wording should be " maximum number of overnight trips on average", and exclude on average. There is only one occurrence of region, purpose per quarter. Unless the intent is to ignore time and average over quarters?
_______________________
StateT<-tourism2%>% group_by(State)%>%summarize(TotTrips=sum(Trips))
StateT %>% autoplot(.vars=TotTrips)+
facet_grid(State ~ ., scales = "free_y")
head(StateT)
## # A tsibble: 6 x 3 [1Q]
## # Key: State [1]
## State Quarter TotTrips
## <chr> <qtr> <dbl>
## 1 ACT 1998 Q1 551.
## 2 ACT 1998 Q2 416.
## 3 ACT 1998 Q3 436.
## 4 ACT 1998 Q4 450.
## 5 ACT 1999 Q1 379.
## 6 ACT 1999 Q2 558.
aus_retail %>% distinct(Industry)
## # A tibble: 20 x 1
## Industry
## <chr>
## 1 Cafes, restaurants and catering services
## 2 Cafes, restaurants and takeaway food services
## 3 Clothing retailing
## 4 Clothing, footwear and personal accessory retailing
## 5 Department stores
## 6 Electrical and electronic goods retailing
## 7 Food retailing
## 8 Footwear and other personal accessory retailing
## 9 Furniture, floor coverings, houseware and textile goods retailing
## 10 Hardware, building and garden supplies retailing
## 11 Household goods retailing
## 12 Liquor retailing
## 13 Newspaper and book retailing
## 14 Other recreational goods retailing
## 15 Other retailing
## 16 Other retailing n.e.c.
## 17 Other specialised food retailing
## 18 Pharmaceutical, cosmetic and toiletry goods retailing
## 19 Supermarket and grocery stores
## 20 Takeaway food services
set.seed(1111111)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries %>% distinct(Industry)
## # A tibble: 1 x 1
## Industry
## <chr>
## 1 Newspaper and book retailing
head(myseries)
## # A tsibble: 6 x 5 [1M]
## # Key: State, Industry [1]
## State Industry `Series ID` Month Turnover
## <chr> <chr> <chr> <mth> <dbl>
## 1 Western Australia Newspaper and book retailing A3349822A 1982 Apr 9.7
## 2 Western Australia Newspaper and book retailing A3349822A 1982 May 11
## 3 Western Australia Newspaper and book retailing A3349822A 1982 Jun 10.7
## 4 Western Australia Newspaper and book retailing A3349822A 1982 Jul 9
## 5 Western Australia Newspaper and book retailing A3349822A 1982 Aug 9.1
## 6 Western Australia Newspaper and book retailing A3349822A 1982 Sep 10
Explore your chosen retail time series using the following functions:
autoplot(), gg_season(), gg_subseries(), gg_lag(),
ACF() %>% autoplot()
Can you spot any seasonality, cyclicity and trend? What do you learn about the series?
str(myseries)
## tbl_ts [441 x 5] (S3: tbl_ts/tbl_df/tbl/data.frame)
## $ State : chr [1:441] "Western Australia" "Western Australia" "Western Australia" "Western Australia" ...
## $ Industry : chr [1:441] "Newspaper and book retailing" "Newspaper and book retailing" "Newspaper and book retailing" "Newspaper and book retailing" ...
## $ Series ID: chr [1:441] "A3349822A" "A3349822A" "A3349822A" "A3349822A" ...
## $ Month : mth [1:441] 1982 Apr, 1982 May, 1982 Jun, 1982 Jul, 1982 Aug, 1982 Sep...
## $ Turnover : num [1:441] 9.7 11 10.7 9 9.1 10 7.7 8.4 11.8 7.4 ...
## - attr(*, "key")= tibble [1 x 3] (S3: tbl_df/tbl/data.frame)
## ..$ State : chr "Western Australia"
## ..$ Industry: chr "Newspaper and book retailing"
## ..$ .rows : list<int> [1:1]
## .. ..$ : int [1:441] 1 2 3 4 5 6 7 8 9 10 ...
## .. ..@ ptype: int(0)
## ..- attr(*, ".drop")= logi TRUE
## - attr(*, "index")= chr "Month"
## ..- attr(*, "ordered")= logi TRUE
## - attr(*, "index2")= chr "Month"
## - attr(*, "interval")= interval [1:1] 1M
## ..@ .regular: logi TRUE
autoplot(myseries,.vars=Turnover)
A quick look at this timeseries shows an upward trend, with possible yearly seasonality and also possible a 10 year cyclicity. Let’s take a closer look at seasonality. Note the downward trend in 2010.
gg_season(myseries,y=Turnover)
There is a yearly seasonality marked by a sharp increase in retail trade turnover in December.
gg_subseries(myseries,y=Turnover)
This subseries plot also depicts the seasonal changes, increase in December. You can see the downward trend starting in 2010 as well.
gg_lag(myseries,y=Turnover)
The lag plot shows positive relationship at lag 1, and lag2, which reflect the seasonality.
ACF(myseries,Turnover) %>% autoplot()
Note the decrease in ACF as the lags increase due to the trend and the scallop due to the yearly seasonality.