# install.packages('ggplot2')
# install.packages('tsibbledata')
# install.packages('tsibble')
# install.packages('dplyr')
# install.packages('ggfortify')
# install.packages('feasts')


library(ggplot2)
library(tsibble)
## 
## Attaching package: 'tsibble'
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggfortify)
library(tidyr)
library(feasts)
## Loading required package: fabletools
gafa_stock <- tsibbledata::gafa_stock



# Use the help function to explore what the series gafa_stock, PBS, vic_elec and pelt represent

?gafa_stock

# an alternative way to write the code is: 
help(gafa_stock)
# GAFA stock prices

?PBS
# Monthly Medicare Australia prescription data
PBS <- tsibbledata::PBS

?vic_elec
# Half-hourly electricity demand for Victoria, Australia
vic_elec <- tsibbledata::vic_elec

?pelt
# Pelt trading records
pelt <- tsibbledata::pelt

# a. Use autoplot() to plot some of the series in these data sets
class(gafa_stock)
## [1] "tbl_ts"     "tbl_df"     "tbl"        "data.frame"
tibble::glimpse(gafa_stock)
## Rows: 5,032
## Columns: 8
## Key: Symbol [4]
## $ Symbol    <chr> "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAP…
## $ Date      <date> 2014-01-02, 2014-01-03, 2014-01-06, 2014-01-07, 2014-01-08,…
## $ Open      <dbl> 79.38286, 78.98000, 76.77857, 77.76000, 76.97285, 78.11429, …
## $ High      <dbl> 79.57571, 79.10000, 78.11429, 77.99429, 77.93714, 78.12286, …
## $ Low       <dbl> 78.86000, 77.20428, 76.22857, 76.84571, 76.95571, 76.47857, …
## $ Close     <dbl> 79.01857, 77.28286, 77.70428, 77.14857, 77.63715, 76.64571, …
## $ Adj_Close <dbl> 66.96433, 65.49342, 65.85053, 65.37959, 65.79363, 64.95345, …
## $ Volume    <dbl> 58671200, 98116900, 103152700, 79302300, 64632400, 69787200,…
gafa_stock_ts <- ts(gafa_stock)
# gafa_stock <- tsibble(gafa_stock) # doesn't work / not sure why

autoplot(gafa_stock_ts)

gafa_stock_ts %>% autoplot(., 'Open')

# b. What is the time interval of each series?



pelt_long <- tidyr::pivot_longer(pelt, names_to = 'Animal', values_to = 'Number_of_Pelts', 2:3)

# PBS %>% dplyr::mutate(scale_Cost = Cost - mean(Cost))
PBS_long <- tidyr::pivot_longer(PBS, names_to = 'Measure', values_to = 'Number', 8:9)
vic_elec_long <- tidyr::pivot_longer(vic_elec, names_to = 'Measure', values_to = 'Value', 2:3)
# 2.1

gafa_stock_plot <- ggplot(gafa_stock, aes(x = Date, y = Open, color = Symbol)) + geom_line() +
  ggtitle("gafa")
PBS_plot <- ggplot(PBS_long, aes(x = Month, y = Number)) + geom_line() + facet_wrap(~Measure, scale = 'free_y') + 
  theme(axis.text.x = element_text(angle = 45, vjust = 0.5)) +
  ggtitle("PBS")
vic_elec_plot <- ggplot(vic_elec_long, aes(x = Time, y = Value)) + geom_line() + facet_wrap(~Measure, scale = 'free_y') +
  ggtitle("vic_elec")
pelt_plot <- ggplot(pelt_long, aes(x = Year, y = Number_of_Pelts, color = Animal)) + geom_line() +
  ggtitle("pelt")

# time interval of gafa_stock is daily
# time interval of PBS is monthly
# time interval of vic_elec is 30 minutes
# time interval of pelt is yearly

ggpubr::ggarrange(gafa_stock_plot, PBS_plot, vic_elec_plot, pelt_plot)

ggplot(PBS, aes(x = Month, y = Cost)) + geom_line()

ggplot(PBS, aes(x = Month, y = Scripts)) + geom_line()

# 2.2



gafa_stock %>% dplyr::group_by(Symbol) %>% dplyr::filter(Close == max(Close))
## # A tsibble: 4 x 8 [!]
## # Key:       Symbol [4]
## # Groups:    Symbol [4]
##   Symbol Date        Open  High   Low Close Adj_Close   Volume
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
## 1 AAPL   2018-10-03  230.  233.  230.  232.      230. 28654800
## 2 AMZN   2018-09-04 2026. 2050. 2013  2040.     2040.  5721100
## 3 FB     2018-07-25  216.  219.  214.  218.      218. 58954200
## 4 GOOG   2018-07-26 1251  1270. 1249. 1268.     1268.  2405600
# 2.3

# a

tute1 <- read.csv('~/Downloads/tute1 - Sheet1.csv')

glimpse(tute1)
## Rows: 100
## Columns: 4
## $ Quarter  <chr> "1981-03-01", "1981-06-01", "1981-09-01", "1981-12-01", "1982…
## $ Sales    <dbl> 1020.2, 889.2, 795.0, 1003.9, 1057.7, 944.4, 778.5, 932.5, 99…
## $ AdBudget <dbl> 659.2, 589.0, 512.5, 614.1, 647.2, 602.0, 530.7, 608.4, 637.9…
## $ GDP      <dbl> 251.8, 290.9, 290.8, 292.4, 279.1, 254.0, 295.6, 271.7, 259.6…
tute1$Quarter <- as.Date(tute1$Quarter)

# b

mytimeseries <- tute1 %>%
  mutate(Quarter = yearmonth(Quarter)) %>%
  as_tsibble(index = Quarter)

# c

mytimeseries <- tute1 %>%
  pivot_longer(-Quarter) %>%
  ggplot(aes(x = Quarter, y = value, color = name)) +
  geom_line() +
  facet_grid(name ~ ., scales = 'free_y')
mytimeseries

# without facet_grid

mytimeseries <- tute1 %>%
  pivot_longer(-Quarter) %>%
  ggplot(aes(x = Quarter, y = value, color = name)) +
  geom_line() 
mytimeseries

# a. 
# install the USgas package

# install.packages('USgas')
library(USgas)

# b. 

# create a tsibble from us_total with year as the index and state as the key
class(us_total)
## [1] "data.frame"
us_total_tsibble <- as_tsibble(us_total, index = "year", key = "state")

class(us_total_tsibble)
## [1] "tbl_ts"     "tbl_df"     "tbl"        "data.frame"
us_total_tsibble %>%
  filter(state == "Maine" | state == "Vermont" | state == "New Hampshire" | state == "Massachusetts" | state == "Connecticut" | state == "Rhode Island") %>%
  ggplot(aes(x = state, y = y)) + geom_boxplot()

library(readxl)

# a.
tourism <- readxl::read_excel('~/Downloads/tourism.xlsx')

tourism
## # A tibble: 24,320 x 5
##    Quarter    Region   State           Purpose  Trips
##    <chr>      <chr>    <chr>           <chr>    <dbl>
##  1 1998-01-01 Adelaide South Australia Business  135.
##  2 1998-04-01 Adelaide South Australia Business  110.
##  3 1998-07-01 Adelaide South Australia Business  166.
##  4 1998-10-01 Adelaide South Australia Business  127.
##  5 1999-01-01 Adelaide South Australia Business  137.
##  6 1999-04-01 Adelaide South Australia Business  200.
##  7 1999-07-01 Adelaide South Australia Business  169.
##  8 1999-10-01 Adelaide South Australia Business  134.
##  9 2000-01-01 Adelaide South Australia Business  154.
## 10 2000-04-01 Adelaide South Australia Business  169.
## # … with 24,310 more rows
tsibble::tourism
## # A tsibble: 24,320 x 5 [1Q]
## # Key:       Region, State, Purpose [304]
##    Quarter Region   State           Purpose  Trips
##      <qtr> <chr>    <chr>           <chr>    <dbl>
##  1 1998 Q1 Adelaide South Australia Business  135.
##  2 1998 Q2 Adelaide South Australia Business  110.
##  3 1998 Q3 Adelaide South Australia Business  166.
##  4 1998 Q4 Adelaide South Australia Business  127.
##  5 1999 Q1 Adelaide South Australia Business  137.
##  6 1999 Q2 Adelaide South Australia Business  200.
##  7 1999 Q3 Adelaide South Australia Business  169.
##  8 1999 Q4 Adelaide South Australia Business  134.
##  9 2000 Q1 Adelaide South Australia Business  154.
## 10 2000 Q2 Adelaide South Australia Business  169.
## # … with 24,310 more rows
# b. create a tsibble which is identical to the version from the tsibble package
tourism_tsbl <- tourism %>%
  mutate(Quarter = yearquarter(Quarter)) %>%
  as_tsibble(key = c("Region", "State", "Purpose"), index = Quarter)
  
  
# c. find what combination of Region and Purpose had the maximum number of overnight trips on average

tourism %>% group_by(Region, Purpose) %>% summarize(mean = mean(Trips)) %>% arrange(desc(mean)) %>% ungroup() %>% filter(mean == max(mean))
## `summarise()` has grouped output by 'Region'. You can override using the `.groups` argument.
## # A tibble: 1 x 3
##   Region Purpose   mean
##   <chr>  <chr>    <dbl>
## 1 Sydney Visiting  747.
# d. Create a new tsibble which combines the Purposes and Regions and just has total trips by State

tourism %>% group_by(State) %>% summarise(total = sum(Trips)) %>% arrange(desc(total))
## # A tibble: 8 x 2
##   State                total
##   <chr>                <dbl>
## 1 New South Wales    557367.
## 2 Victoria           390463.
## 3 Queensland         386643.
## 4 Western Australia  147820.
## 5 South Australia    118151.
## 6 Tasmania            54137.
## 7 ACT                 41007.
## 8 Northern Territory  28614.
# New South Wales is the State with the highest total
aus_retail <- tsibbledata::aus_retail

class(aus_retail)
## [1] "tbl_ts"     "tbl_df"     "tbl"        "data.frame"
set.seed(1024)

# myseries <- tsibbledata::aus_retail %>%
#   filter('Series ID' == sample(tsibbledata::aus_retail$'Series ID', 1))



ggplot(aus_retail, aes(x = Month, y = Turnover)) + geom_line()

aus_retail %>% dplyr::select(Month, Turnover) %>% autoplot(Turnover)

unique(aus_retail$State)
## [1] "Australian Capital Territory" "New South Wales"             
## [3] "Northern Territory"           "Queensland"                  
## [5] "South Australia"              "Tasmania"                    
## [7] "Victoria"                     "Western Australia"
unique(aus_retail$Industry)
##  [1] "Cafes, restaurants and catering services"                         
##  [2] "Cafes, restaurants and takeaway food services"                    
##  [3] "Clothing retailing"                                               
##  [4] "Clothing, footwear and personal accessory retailing"              
##  [5] "Department stores"                                                
##  [6] "Electrical and electronic goods retailing"                        
##  [7] "Food retailing"                                                   
##  [8] "Footwear and other personal accessory retailing"                  
##  [9] "Furniture, floor coverings, houseware and textile goods retailing"
## [10] "Hardware, building and garden supplies retailing"                 
## [11] "Household goods retailing"                                        
## [12] "Liquor retailing"                                                 
## [13] "Newspaper and book retailing"                                     
## [14] "Other recreational goods retailing"                               
## [15] "Other retailing"                                                  
## [16] "Other retailing n.e.c."                                           
## [17] "Other specialised food retailing"                                 
## [18] "Pharmaceutical, cosmetic and toiletry goods retailing"            
## [19] "Supermarket and grocery stores"                                   
## [20] "Takeaway food services"
# gg_season

aus_retail %>% filter(State == "New South Wales") %>% filter(Industry == "Supermarket and grocery stores") %>% feasts::gg_season(Turnover, period = "year")

# gg_subseries

aus_retail %>% filter(State == "New South Wales") %>% filter(Industry == "Supermarket and grocery stores") %>% feasts::gg_subseries(Turnover, period = "year")

# gg_lag

aus_retail %>% filter(State == "New South Wales") %>% filter(Industry == "Supermarket and grocery stores") %>% feasts::gg_lag(Turnover, geom = 'point')