# install.packages('ggplot2')
# install.packages('tsibbledata')
# install.packages('tsibble')
# install.packages('dplyr')
# install.packages('ggfortify')
# install.packages('feasts')
library(ggplot2)
library(tsibble)
##
## Attaching package: 'tsibble'
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggfortify)
library(tidyr)
library(feasts)
## Loading required package: fabletools
gafa_stock <- tsibbledata::gafa_stock
# Use the help function to explore what the series gafa_stock, PBS, vic_elec and pelt represent
?gafa_stock
# an alternative way to write the code is:
help(gafa_stock)
# GAFA stock prices
?PBS
# Monthly Medicare Australia prescription data
PBS <- tsibbledata::PBS
?vic_elec
# Half-hourly electricity demand for Victoria, Australia
vic_elec <- tsibbledata::vic_elec
?pelt
# Pelt trading records
pelt <- tsibbledata::pelt
# a. Use autoplot() to plot some of the series in these data sets
class(gafa_stock)
## [1] "tbl_ts" "tbl_df" "tbl" "data.frame"
tibble::glimpse(gafa_stock)
## Rows: 5,032
## Columns: 8
## Key: Symbol [4]
## $ Symbol <chr> "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAP…
## $ Date <date> 2014-01-02, 2014-01-03, 2014-01-06, 2014-01-07, 2014-01-08,…
## $ Open <dbl> 79.38286, 78.98000, 76.77857, 77.76000, 76.97285, 78.11429, …
## $ High <dbl> 79.57571, 79.10000, 78.11429, 77.99429, 77.93714, 78.12286, …
## $ Low <dbl> 78.86000, 77.20428, 76.22857, 76.84571, 76.95571, 76.47857, …
## $ Close <dbl> 79.01857, 77.28286, 77.70428, 77.14857, 77.63715, 76.64571, …
## $ Adj_Close <dbl> 66.96433, 65.49342, 65.85053, 65.37959, 65.79363, 64.95345, …
## $ Volume <dbl> 58671200, 98116900, 103152700, 79302300, 64632400, 69787200,…
gafa_stock_ts <- ts(gafa_stock)
# gafa_stock <- tsibble(gafa_stock) # doesn't work / not sure why
autoplot(gafa_stock_ts)

gafa_stock_ts %>% autoplot(., 'Open')

# b. What is the time interval of each series?
pelt_long <- tidyr::pivot_longer(pelt, names_to = 'Animal', values_to = 'Number_of_Pelts', 2:3)
# PBS %>% dplyr::mutate(scale_Cost = Cost - mean(Cost))
PBS_long <- tidyr::pivot_longer(PBS, names_to = 'Measure', values_to = 'Number', 8:9)
vic_elec_long <- tidyr::pivot_longer(vic_elec, names_to = 'Measure', values_to = 'Value', 2:3)
# 2.1
gafa_stock_plot <- ggplot(gafa_stock, aes(x = Date, y = Open, color = Symbol)) + geom_line() +
ggtitle("gafa")
PBS_plot <- ggplot(PBS_long, aes(x = Month, y = Number)) + geom_line() + facet_wrap(~Measure, scale = 'free_y') +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5)) +
ggtitle("PBS")
vic_elec_plot <- ggplot(vic_elec_long, aes(x = Time, y = Value)) + geom_line() + facet_wrap(~Measure, scale = 'free_y') +
ggtitle("vic_elec")
pelt_plot <- ggplot(pelt_long, aes(x = Year, y = Number_of_Pelts, color = Animal)) + geom_line() +
ggtitle("pelt")
# time interval of gafa_stock is daily
# time interval of PBS is monthly
# time interval of vic_elec is 30 minutes
# time interval of pelt is yearly
ggpubr::ggarrange(gafa_stock_plot, PBS_plot, vic_elec_plot, pelt_plot)

ggplot(PBS, aes(x = Month, y = Cost)) + geom_line()

ggplot(PBS, aes(x = Month, y = Scripts)) + geom_line()

# 2.2
gafa_stock %>% dplyr::group_by(Symbol) %>% dplyr::filter(Close == max(Close))
## # A tsibble: 4 x 8 [!]
## # Key: Symbol [4]
## # Groups: Symbol [4]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2018-10-03 230. 233. 230. 232. 230. 28654800
## 2 AMZN 2018-09-04 2026. 2050. 2013 2040. 2040. 5721100
## 3 FB 2018-07-25 216. 219. 214. 218. 218. 58954200
## 4 GOOG 2018-07-26 1251 1270. 1249. 1268. 1268. 2405600
# 2.3
# a
tute1 <- read.csv('~/Downloads/tute1 - Sheet1.csv')
glimpse(tute1)
## Rows: 100
## Columns: 4
## $ Quarter <chr> "1981-03-01", "1981-06-01", "1981-09-01", "1981-12-01", "1982…
## $ Sales <dbl> 1020.2, 889.2, 795.0, 1003.9, 1057.7, 944.4, 778.5, 932.5, 99…
## $ AdBudget <dbl> 659.2, 589.0, 512.5, 614.1, 647.2, 602.0, 530.7, 608.4, 637.9…
## $ GDP <dbl> 251.8, 290.9, 290.8, 292.4, 279.1, 254.0, 295.6, 271.7, 259.6…
tute1$Quarter <- as.Date(tute1$Quarter)
# b
mytimeseries <- tute1 %>%
mutate(Quarter = yearmonth(Quarter)) %>%
as_tsibble(index = Quarter)
# c
mytimeseries <- tute1 %>%
pivot_longer(-Quarter) %>%
ggplot(aes(x = Quarter, y = value, color = name)) +
geom_line() +
facet_grid(name ~ ., scales = 'free_y')
mytimeseries

# without facet_grid
mytimeseries <- tute1 %>%
pivot_longer(-Quarter) %>%
ggplot(aes(x = Quarter, y = value, color = name)) +
geom_line()
mytimeseries

# a.
# install the USgas package
# install.packages('USgas')
library(USgas)
# b.
# create a tsibble from us_total with year as the index and state as the key
class(us_total)
## [1] "data.frame"
us_total_tsibble <- as_tsibble(us_total, index = "year", key = "state")
class(us_total_tsibble)
## [1] "tbl_ts" "tbl_df" "tbl" "data.frame"
us_total_tsibble %>%
filter(state == "Maine" | state == "Vermont" | state == "New Hampshire" | state == "Massachusetts" | state == "Connecticut" | state == "Rhode Island") %>%
ggplot(aes(x = state, y = y)) + geom_boxplot()

library(readxl)
# a.
tourism <- readxl::read_excel('~/Downloads/tourism.xlsx')
tourism
## # A tibble: 24,320 x 5
## Quarter Region State Purpose Trips
## <chr> <chr> <chr> <chr> <dbl>
## 1 1998-01-01 Adelaide South Australia Business 135.
## 2 1998-04-01 Adelaide South Australia Business 110.
## 3 1998-07-01 Adelaide South Australia Business 166.
## 4 1998-10-01 Adelaide South Australia Business 127.
## 5 1999-01-01 Adelaide South Australia Business 137.
## 6 1999-04-01 Adelaide South Australia Business 200.
## 7 1999-07-01 Adelaide South Australia Business 169.
## 8 1999-10-01 Adelaide South Australia Business 134.
## 9 2000-01-01 Adelaide South Australia Business 154.
## 10 2000-04-01 Adelaide South Australia Business 169.
## # … with 24,310 more rows
tsibble::tourism
## # A tsibble: 24,320 x 5 [1Q]
## # Key: Region, State, Purpose [304]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
## 7 1999 Q3 Adelaide South Australia Business 169.
## 8 1999 Q4 Adelaide South Australia Business 134.
## 9 2000 Q1 Adelaide South Australia Business 154.
## 10 2000 Q2 Adelaide South Australia Business 169.
## # … with 24,310 more rows
# b. create a tsibble which is identical to the version from the tsibble package
tourism_tsbl <- tourism %>%
mutate(Quarter = yearquarter(Quarter)) %>%
as_tsibble(key = c("Region", "State", "Purpose"), index = Quarter)
# c. find what combination of Region and Purpose had the maximum number of overnight trips on average
tourism %>% group_by(Region, Purpose) %>% summarize(mean = mean(Trips)) %>% arrange(desc(mean)) %>% ungroup() %>% filter(mean == max(mean))
## `summarise()` has grouped output by 'Region'. You can override using the `.groups` argument.
## # A tibble: 1 x 3
## Region Purpose mean
## <chr> <chr> <dbl>
## 1 Sydney Visiting 747.
# d. Create a new tsibble which combines the Purposes and Regions and just has total trips by State
tourism %>% group_by(State) %>% summarise(total = sum(Trips)) %>% arrange(desc(total))
## # A tibble: 8 x 2
## State total
## <chr> <dbl>
## 1 New South Wales 557367.
## 2 Victoria 390463.
## 3 Queensland 386643.
## 4 Western Australia 147820.
## 5 South Australia 118151.
## 6 Tasmania 54137.
## 7 ACT 41007.
## 8 Northern Territory 28614.
# New South Wales is the State with the highest total
aus_retail <- tsibbledata::aus_retail
class(aus_retail)
## [1] "tbl_ts" "tbl_df" "tbl" "data.frame"
set.seed(1024)
# myseries <- tsibbledata::aus_retail %>%
# filter('Series ID' == sample(tsibbledata::aus_retail$'Series ID', 1))
ggplot(aus_retail, aes(x = Month, y = Turnover)) + geom_line()

aus_retail %>% dplyr::select(Month, Turnover) %>% autoplot(Turnover)

unique(aus_retail$State)
## [1] "Australian Capital Territory" "New South Wales"
## [3] "Northern Territory" "Queensland"
## [5] "South Australia" "Tasmania"
## [7] "Victoria" "Western Australia"
unique(aus_retail$Industry)
## [1] "Cafes, restaurants and catering services"
## [2] "Cafes, restaurants and takeaway food services"
## [3] "Clothing retailing"
## [4] "Clothing, footwear and personal accessory retailing"
## [5] "Department stores"
## [6] "Electrical and electronic goods retailing"
## [7] "Food retailing"
## [8] "Footwear and other personal accessory retailing"
## [9] "Furniture, floor coverings, houseware and textile goods retailing"
## [10] "Hardware, building and garden supplies retailing"
## [11] "Household goods retailing"
## [12] "Liquor retailing"
## [13] "Newspaper and book retailing"
## [14] "Other recreational goods retailing"
## [15] "Other retailing"
## [16] "Other retailing n.e.c."
## [17] "Other specialised food retailing"
## [18] "Pharmaceutical, cosmetic and toiletry goods retailing"
## [19] "Supermarket and grocery stores"
## [20] "Takeaway food services"
# gg_season
aus_retail %>% filter(State == "New South Wales") %>% filter(Industry == "Supermarket and grocery stores") %>% feasts::gg_season(Turnover, period = "year")

# gg_subseries
aus_retail %>% filter(State == "New South Wales") %>% filter(Industry == "Supermarket and grocery stores") %>% feasts::gg_subseries(Turnover, period = "year")

# gg_lag
aus_retail %>% filter(State == "New South Wales") %>% filter(Industry == "Supermarket and grocery stores") %>% feasts::gg_lag(Turnover, geom = 'point')
