Load packages and data

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ tibble      3.1.8     ✔ tsibble     1.1.2
## ✔ tidyr       1.2.0     ✔ tsibbledata 0.4.1
## ✔ lubridate   1.8.0     ✔ feasts      0.3.0
## ✔ ggplot2     3.3.6     ✔ fable       0.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ readr   2.1.2     ✔ stringr 1.4.1
## ✔ purrr   0.3.4     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::as.difftime() masks base::as.difftime()
## ✖ lubridate::date()        masks base::date()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ tsibble::intersect()     masks lubridate::intersect(), base::intersect()
## ✖ tsibble::interval()      masks lubridate::interval()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ tsibble::setdiff()       masks lubridate::setdiff(), base::setdiff()
## ✖ tsibble::union()         masks lubridate::union(), base::union()
library(moments)
library(tsibble)
library(tsibbledata)
library(readxl)
library(USgas)

Questions

Exercise 1

    #Question 1
    #1, Convert the data set into tsibble
    tute1 <- read_csv("C:/Users/harle/OneDrive/Desktop/tute1.csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Quarter
## dbl (3): Sales, AdBudget, GDP
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
    View(tute1)
    tute_ts <- tute1 %>%
      mutate(Quarter = yearmonth(Quarter)) %>%
      as_tsibble(index = Quarter)

    #2, Construct a time series plot (geom_line()) for each three series
num_2 <- tute_ts <- tute1 %>%
      ggplot(aes(x = Quarter, y = Sales, group = 1)) +
      geom_line() 
num_2

num_2_2 <- tute_ts <- tute1 %>%
      ggplot(aes(x = Quarter, y = AdBudget, group = 1)) +
      geom_line() 
num_2_2

num_2_3 <- tute_ts <- tute1 %>%
      ggplot(aes(x = Quarter, y = GDP, group = 1)) +
      geom_line() 
num_2_3

    #3, Construct a time series plot facet_grid() for all (together) three series (transform the data using pivot_longer)
    num_3 <- tute_ts <- tute1 %>%
      pivot_longer(-Quarter) %>%
      ggplot(aes(x = Quarter, y = value, colour = name, group = 1)) +
      geom_line() +
      facet_grid(name ~., scales = "free_y")
    num_3

Exercise 2

    install.packages("USgas")
## Warning: package 'USgas' is in use and will not be installed
    library(USgas)
    us_tsibble <- us_total %>%
     as_tsibble(index = year,
                 key = state)

    northeast <- us_tsibble %>%
      filter(state == "Maine" | state == "Vermont" | state == "New Hampshire" | state == "Massachusetts" | state == "Connecticut" | state == "Rhode Island")

    ggplot(data = northeast) + geom_line(aes(x = year, y = y, color = state)) +
      labs(x = "year", y = "annual consumption")

Exercise 3

    install.packages("readxl")
## Warning: package 'readxl' is in use and will not be installed
    library(readxl)
    tourism1 <- read_excel("C:/Users/harle/OneDrive/Desktop/Class Files/Fall 2022/Forecasting/tourism.xlsx")
    View(tourism)
    #myseries$Quarter<-as.Date(myseries$Quarter) # it is not necessary to do that

    myseries<-tourism1 %>%
      mutate(quarter = yearquarter(Quarter))%>%
      as_tsibble(index = quarter, 
                          key= c(Region, State, Purpose))

    tourism1
## # A tibble: 24,320 × 5
##    Quarter    Region   State           Purpose  Trips
##    <chr>      <chr>    <chr>           <chr>    <dbl>
##  1 1998-01-01 Adelaide South Australia Business  135.
##  2 1998-04-01 Adelaide South Australia Business  110.
##  3 1998-07-01 Adelaide South Australia Business  166.
##  4 1998-10-01 Adelaide South Australia Business  127.
##  5 1999-01-01 Adelaide South Australia Business  137.
##  6 1999-04-01 Adelaide South Australia Business  200.
##  7 1999-07-01 Adelaide South Australia Business  169.
##  8 1999-10-01 Adelaide South Australia Business  134.
##  9 2000-01-01 Adelaide South Australia Business  154.
## 10 2000-04-01 Adelaide South Australia Business  169.
## # … with 24,310 more rows
    toursible <- tourism %>% 
      mutate(Quarter = yearquarter(Quarter)) %>% 
      as_tsibble(index=Quarter, key= c(Region, State, Purpose))

    toursible
## # A tsibble: 24,320 x 5 [1Q]
## # Key:       Region, State, Purpose [304]
##    Quarter Region   State           Purpose  Trips
##      <qtr> <chr>    <chr>           <chr>    <dbl>
##  1 1998 Q1 Adelaide South Australia Business  135.
##  2 1998 Q2 Adelaide South Australia Business  110.
##  3 1998 Q3 Adelaide South Australia Business  166.
##  4 1998 Q4 Adelaide South Australia Business  127.
##  5 1999 Q1 Adelaide South Australia Business  137.
##  6 1999 Q2 Adelaide South Australia Business  200.
##  7 1999 Q3 Adelaide South Australia Business  169.
##  8 1999 Q4 Adelaide South Australia Business  134.
##  9 2000 Q1 Adelaide South Australia Business  154.
## 10 2000 Q2 Adelaide South Australia Business  169.
## # … with 24,310 more rows
    #Find what combination of `Region` and `Purpose` had the maximum number of overnight trips on average.
    toursible%>%group_by(Region, Purpose)%>%summarise(Trips = mean(Trips))%>%ungroup()%>%filter(Trips == max(Trips))
## # A tsibble: 1 x 4 [1Q]
## # Key:       Region, Purpose [1]
##   Region    Purpose  Quarter Trips
##   <chr>     <chr>      <qtr> <dbl>
## 1 Melbourne Visiting 2017 Q4  985.
    #Create a new tsibble which combines the Purposes and Regions, and just has total trips by State.

    new_tsibble <- toursible %>%
      group_by(State) %>% summarise(Trips = sum(Trips))%>%
      ungroup()
    new_tsibble
## # A tsibble: 640 x 3 [1Q]
## # Key:       State [8]
##    State Quarter Trips
##    <chr>   <qtr> <dbl>
##  1 ACT   1998 Q1  551.
##  2 ACT   1998 Q2  416.
##  3 ACT   1998 Q3  436.
##  4 ACT   1998 Q4  450.
##  5 ACT   1999 Q1  379.
##  6 ACT   1999 Q2  558.
##  7 ACT   1999 Q3  449.
##  8 ACT   1999 Q4  595.
##  9 ACT   2000 Q1  600.
## 10 ACT   2000 Q2  557.
## # … with 630 more rows

Exercise 4

aus_arrivals
## # A tsibble: 508 x 3 [1Q]
## # Key:       Origin [4]
##    Quarter Origin Arrivals
##      <qtr> <chr>     <int>
##  1 1981 Q1 Japan     14763
##  2 1981 Q2 Japan      9321
##  3 1981 Q3 Japan     10166
##  4 1981 Q4 Japan     19509
##  5 1982 Q1 Japan     17117
##  6 1982 Q2 Japan     10617
##  7 1982 Q3 Japan     11737
##  8 1982 Q4 Japan     20961
##  9 1983 Q1 Japan     20671
## 10 1983 Q2 Japan     12235
## # … with 498 more rows
view(aus_arrivals)

aus_arrivals %>% autoplot(Arrivals)

aus_arrivals %>% gg_season(Arrivals)

aus_arrivals %>% gg_subseries(Arrivals)

#Question 4 Part 2 Answer: Japanese arrivals appear to be dramatically decreasing relative to the other countries. 

Exercise 5

    aus_retail
## # A tsibble: 64,532 x 5 [1M]
## # Key:       State, Industry [152]
##    State                        Industry                Serie…¹    Month Turno…²
##    <chr>                        <chr>                   <chr>      <mth>   <dbl>
##  1 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Apr     4.4
##  2 Australian Capital Territory Cafes, restaurants and… A33498… 1982 May     3.4
##  3 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Jun     3.6
##  4 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Jul     4  
##  5 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Aug     3.6
##  6 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Sep     4.2
##  7 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Oct     4.8
##  8 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Nov     5.4
##  9 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Dec     6.9
## 10 Australian Capital Territory Cafes, restaurants and… A33498… 1983 Jan     3.8
## # … with 64,522 more rows, and abbreviated variable names ¹​`Series ID`,
## #   ²​Turnover
    set.seed(7465)
    myretailseed <- aus_retail %>%
      filter(`Series ID` == sample(aus_retail$`Series ID`,1))
    autoplot(myretailseed, Turnover)

    gg_season(myretailseed, Turnover)

    gg_subseries(myretailseed, Turnover)

    gg_lag(myretailseed, Turnover)

    myretailseed %>% 
        ACF(Turnover) %>%
        autoplot()

    #Question 5 Part 2 Answer: The autoplot shows that there is clear seasonality and a strong upward trend. 
    #The gg_season shows seasonality, with peaks March, June, and the holiday season at the end of the year.
    #The gg_subseries shows a strong upward trend from 1990-2020. 

Exercise 6

    gafa_stock
## # A tsibble: 5,032 x 8 [!]
## # Key:       Symbol [4]
##    Symbol Date        Open  High   Low Close Adj_Close    Volume
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>     <dbl>
##  1 AAPL   2014-01-02  79.4  79.6  78.9  79.0      67.0  58671200
##  2 AAPL   2014-01-03  79.0  79.1  77.2  77.3      65.5  98116900
##  3 AAPL   2014-01-06  76.8  78.1  76.2  77.7      65.9 103152700
##  4 AAPL   2014-01-07  77.8  78.0  76.8  77.1      65.4  79302300
##  5 AAPL   2014-01-08  77.0  77.9  77.0  77.6      65.8  64632400
##  6 AAPL   2014-01-09  78.1  78.1  76.5  76.6      65.0  69787200
##  7 AAPL   2014-01-10  77.1  77.3  75.9  76.1      64.5  76244000
##  8 AAPL   2014-01-13  75.7  77.5  75.7  76.5      64.9  94623200
##  9 AAPL   2014-01-14  76.9  78.1  76.8  78.1      66.1  83140400
## 10 AAPL   2014-01-15  79.1  80.0  78.8  79.6      67.5  97909700
## # … with 5,022 more rows
    facebook <- as.data.frame(gafa_stock) %>% group_by(Symbol) %>% 
      filter(Symbol == "FB")
    mean(facebook$Close)
## [1] 120.4625
    sd(facebook$Close)
## [1] 41.32364
    vec1 <- pull(facebook, Close)
    FBmedian <- median(vec1)
    FBmedian
## [1] 117.675
    FBmeandif <- mean(vec1, na.rm = TRUE)
    FBmeandif
## [1] 120.4625
    FBSDdif <- sd(vec1, na.rm = TRUE)
    FBSDdif
## [1] 41.32364
    FBkurtDIF <- kurtosis(vec1, na.rm = TRUE)
    FBkurtDIF
## [1] 1.836712
    FBskewDIF <- skewness(vec1, na.rm = TRUE)
    FBskewDIF
## [1] 0.2279339
    fbMEANbyHAND <- sum(vec1, na.rm = TRUE)/nrow(facebook)
    fbMEANbyHAND
## [1] 120.4625
    fbSDbyHAND <- sqrt(sum((vec1-mean(vec1))^2/(length(vec1)-1)))
    fbSDbyHAND
## [1] 41.32364
    fbKURTbyHAND <- ((sum((vec1-mean(vec1))^4))/length(vec1))/(sum((vec1-mean(vec1))^2/length(vec1))^2)
    fbKURTbyHAND
## [1] 1.836712
    fbSKEWbyHAND <- (3*(fbMEANbyHAND-FBmedian))/fbSDbyHAND
    fbSKEWbyHAND
## [1] 0.2023634

Exercise 7

    GME <- read_excel("GME.xlsx")
    View(GME)

    GMEadj <- GME %>% select(-(Open:Close)) %>% 
      select(-(Volume))
    GMEadj2 <- GMEadj %>%
      mutate(Date =as_date(Date)) %>%
      as_tsibble(index = "Date", key = "Adj Close")

    GMEadj3 <- GMEadj2 %>%
      filter(Date >= as.Date("2022-06-01") & Date <= as.Date("2022-06-30")) %>%
      ggplot(aes(x = Date, y = `Adj Close`)) +
      geom_line()
    GMEadj3

    GMEjan <- GMEadj2 %>%
      filter(Date >= as.Date("2022-01-01") & Date <= as.Date("2022-01-31"))
    GMEfeb <- GMEadj2 %>%
      filter(Date >= as.Date("2022-02-01") & Date <= as.Date("2022-02-28"))
    GMEmar <- GMEadj2 %>%
      filter(Date >= as.Date("2022-03-01") & Date <= as.Date("2022-03-31"))
    GMEap <- GMEadj2 %>%
      filter(Date >= as.Date("2022-04-01") & Date <= as.Date("2022-04-30")) 
    GMEmay <- GMEadj2 %>%
      filter(Date >= as.Date("2022-05-01") & Date <= as.Date("2022-05-31"))
    GMEjune <- GMEadj2 %>%
      filter(Date >= as.Date("2022-06-01") & Date <= as.Date("2022-06-30"))
    GMEjul <- GMEadj2 %>%
      filter(Date >= as.Date("2022-07-01") & Date <= as.Date("2022-07-31"))
    GMEaug <- GMEadj2 %>%
      filter(Date >= as.Date("2022-08-01") & Date <= as.Date("2022-08-31"))

    Tjan_Vec <- pull(GMEjan, "Adj Close")
    Tjan_mean <- mean(Tjan_Vec)
    Tjan_mean
## [1] 29.4935
    Tfeb_Vec <- pull(GMEfeb, "Adj Close")
    Tfeb_mean <- mean(Tfeb_Vec)
    Tfeb_mean
## [1] 29.19882
    Tmar_Vec <- pull(GMEmar, "Adj Close")
    Tmar_mean <- mean(Tmar_Vec)
    Tmar_mean
## [1] 29.93022
    Tap_Vec <- pull(GMEap, "Adj Close")
    Tap_mean <- mean(Tap_Vec)
    Tap_mean
## [1] 36.32187
    Tmay_Vec <- pull(GMEmay, "Adj Close")
    Tmay_mean <- mean(Tmay_Vec)
    Tmay_mean
## [1] 26.56976
    Tjune_Vec <- pull(GMEjune, "Adj Close")
    Tjune_mean <- mean(Tjune_Vec)
    Tjune_mean
## [1] 32.74143
    Tjul_Vec <- pull(GMEjul, "Adj Close")
    Tjul_mean <- mean(Tjul_Vec)
    Tjul_mean
## [1] 34.21137
    Taug_Vec <- pull(GMEaug, "Adj Close")
    Taug_mean <- mean(Taug_Vec)
    Taug_mean
## [1] 36.59739
    Tjan_var <- var(Tjan_Vec)
    Tjan_var
## [1] 19.61737
    Tfeb_var <- var(Tfeb_Vec)
    Tfeb_var
## [1] 5.448255
    Tmar_var <- var(Tmar_Vec)
    Tmar_var
## [1] 67.36374
    Tap_var <- var(Tap_Vec)
    Tap_var
## [1] 9.093501
    Tmay_var <- var(Tmay_Vec)
    Tmay_var
## [1] 16.00114
    Tjune_var <- var(Tjune_Vec)
    Tjune_var
## [1] 3.520998
    Tjul_var <- var(Tjul_Vec)
    Tjul_var
## [1] 7.117174
    Taug_var <- var(Taug_Vec)
    Taug_var
## [1] 18.13404