Load packages and data
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(fpp3)
## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──
## ✔ tibble 3.1.8 ✔ tsibble 1.1.2
## ✔ tidyr 1.2.0 ✔ tsibbledata 0.4.1
## ✔ lubridate 1.8.0 ✔ feasts 0.3.0
## ✔ ggplot2 3.3.6 ✔ fable 0.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ readr 2.1.2 ✔ stringr 1.4.1
## ✔ purrr 0.3.4 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::as.difftime() masks base::as.difftime()
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks lubridate::intersect(), base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks lubridate::setdiff(), base::setdiff()
## ✖ tsibble::union() masks lubridate::union(), base::union()
library(moments)
library(tsibble)
library(tsibbledata)
library(readxl)
library(USgas)
Questions
Exercise 1
#Question 1
#1, Convert the data set into tsibble
tute1 <- read_csv("C:/Users/harle/OneDrive/Desktop/tute1.csv")
## Rows: 100 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Quarter
## dbl (3): Sales, AdBudget, GDP
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(tute1)
tute_ts <- tute1 %>%
mutate(Quarter = yearmonth(Quarter)) %>%
as_tsibble(index = Quarter)
#2, Construct a time series plot (geom_line()) for each three series
num_2 <- tute_ts <- tute1 %>%
ggplot(aes(x = Quarter, y = Sales, group = 1)) +
geom_line()
num_2

num_2_2 <- tute_ts <- tute1 %>%
ggplot(aes(x = Quarter, y = AdBudget, group = 1)) +
geom_line()
num_2_2

num_2_3 <- tute_ts <- tute1 %>%
ggplot(aes(x = Quarter, y = GDP, group = 1)) +
geom_line()
num_2_3

#3, Construct a time series plot facet_grid() for all (together) three series (transform the data using pivot_longer)
num_3 <- tute_ts <- tute1 %>%
pivot_longer(-Quarter) %>%
ggplot(aes(x = Quarter, y = value, colour = name, group = 1)) +
geom_line() +
facet_grid(name ~., scales = "free_y")
num_3

Exercise 2
install.packages("USgas")
## Warning: package 'USgas' is in use and will not be installed
library(USgas)
us_tsibble <- us_total %>%
as_tsibble(index = year,
key = state)
northeast <- us_tsibble %>%
filter(state == "Maine" | state == "Vermont" | state == "New Hampshire" | state == "Massachusetts" | state == "Connecticut" | state == "Rhode Island")
ggplot(data = northeast) + geom_line(aes(x = year, y = y, color = state)) +
labs(x = "year", y = "annual consumption")

Exercise 3
install.packages("readxl")
## Warning: package 'readxl' is in use and will not be installed
library(readxl)
tourism1 <- read_excel("C:/Users/harle/OneDrive/Desktop/Class Files/Fall 2022/Forecasting/tourism.xlsx")
View(tourism)
#myseries$Quarter<-as.Date(myseries$Quarter) # it is not necessary to do that
myseries<-tourism1 %>%
mutate(quarter = yearquarter(Quarter))%>%
as_tsibble(index = quarter,
key= c(Region, State, Purpose))
tourism1
## # A tibble: 24,320 × 5
## Quarter Region State Purpose Trips
## <chr> <chr> <chr> <chr> <dbl>
## 1 1998-01-01 Adelaide South Australia Business 135.
## 2 1998-04-01 Adelaide South Australia Business 110.
## 3 1998-07-01 Adelaide South Australia Business 166.
## 4 1998-10-01 Adelaide South Australia Business 127.
## 5 1999-01-01 Adelaide South Australia Business 137.
## 6 1999-04-01 Adelaide South Australia Business 200.
## 7 1999-07-01 Adelaide South Australia Business 169.
## 8 1999-10-01 Adelaide South Australia Business 134.
## 9 2000-01-01 Adelaide South Australia Business 154.
## 10 2000-04-01 Adelaide South Australia Business 169.
## # … with 24,310 more rows
toursible <- tourism %>%
mutate(Quarter = yearquarter(Quarter)) %>%
as_tsibble(index=Quarter, key= c(Region, State, Purpose))
toursible
## # A tsibble: 24,320 x 5 [1Q]
## # Key: Region, State, Purpose [304]
## Quarter Region State Purpose Trips
## <qtr> <chr> <chr> <chr> <dbl>
## 1 1998 Q1 Adelaide South Australia Business 135.
## 2 1998 Q2 Adelaide South Australia Business 110.
## 3 1998 Q3 Adelaide South Australia Business 166.
## 4 1998 Q4 Adelaide South Australia Business 127.
## 5 1999 Q1 Adelaide South Australia Business 137.
## 6 1999 Q2 Adelaide South Australia Business 200.
## 7 1999 Q3 Adelaide South Australia Business 169.
## 8 1999 Q4 Adelaide South Australia Business 134.
## 9 2000 Q1 Adelaide South Australia Business 154.
## 10 2000 Q2 Adelaide South Australia Business 169.
## # … with 24,310 more rows
#Find what combination of `Region` and `Purpose` had the maximum number of overnight trips on average.
toursible%>%group_by(Region, Purpose)%>%summarise(Trips = mean(Trips))%>%ungroup()%>%filter(Trips == max(Trips))
## # A tsibble: 1 x 4 [1Q]
## # Key: Region, Purpose [1]
## Region Purpose Quarter Trips
## <chr> <chr> <qtr> <dbl>
## 1 Melbourne Visiting 2017 Q4 985.
#Create a new tsibble which combines the Purposes and Regions, and just has total trips by State.
new_tsibble <- toursible %>%
group_by(State) %>% summarise(Trips = sum(Trips))%>%
ungroup()
new_tsibble
## # A tsibble: 640 x 3 [1Q]
## # Key: State [8]
## State Quarter Trips
## <chr> <qtr> <dbl>
## 1 ACT 1998 Q1 551.
## 2 ACT 1998 Q2 416.
## 3 ACT 1998 Q3 436.
## 4 ACT 1998 Q4 450.
## 5 ACT 1999 Q1 379.
## 6 ACT 1999 Q2 558.
## 7 ACT 1999 Q3 449.
## 8 ACT 1999 Q4 595.
## 9 ACT 2000 Q1 600.
## 10 ACT 2000 Q2 557.
## # … with 630 more rows
Exercise 4
aus_arrivals
## # A tsibble: 508 x 3 [1Q]
## # Key: Origin [4]
## Quarter Origin Arrivals
## <qtr> <chr> <int>
## 1 1981 Q1 Japan 14763
## 2 1981 Q2 Japan 9321
## 3 1981 Q3 Japan 10166
## 4 1981 Q4 Japan 19509
## 5 1982 Q1 Japan 17117
## 6 1982 Q2 Japan 10617
## 7 1982 Q3 Japan 11737
## 8 1982 Q4 Japan 20961
## 9 1983 Q1 Japan 20671
## 10 1983 Q2 Japan 12235
## # … with 498 more rows
view(aus_arrivals)
aus_arrivals %>% autoplot(Arrivals)

aus_arrivals %>% gg_season(Arrivals)

aus_arrivals %>% gg_subseries(Arrivals)

#Question 4 Part 2 Answer: Japanese arrivals appear to be dramatically decreasing relative to the other countries.
Exercise 5
aus_retail
## # A tsibble: 64,532 x 5 [1M]
## # Key: State, Industry [152]
## State Industry Serie…¹ Month Turno…²
## <chr> <chr> <chr> <mth> <dbl>
## 1 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Apr 4.4
## 2 Australian Capital Territory Cafes, restaurants and… A33498… 1982 May 3.4
## 3 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Jun 3.6
## 4 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Jul 4
## 5 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Aug 3.6
## 6 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Sep 4.2
## 7 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Oct 4.8
## 8 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Nov 5.4
## 9 Australian Capital Territory Cafes, restaurants and… A33498… 1982 Dec 6.9
## 10 Australian Capital Territory Cafes, restaurants and… A33498… 1983 Jan 3.8
## # … with 64,522 more rows, and abbreviated variable names ¹`Series ID`,
## # ²Turnover
set.seed(7465)
myretailseed <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
autoplot(myretailseed, Turnover)

gg_season(myretailseed, Turnover)

gg_subseries(myretailseed, Turnover)

gg_lag(myretailseed, Turnover)

myretailseed %>%
ACF(Turnover) %>%
autoplot()

#Question 5 Part 2 Answer: The autoplot shows that there is clear seasonality and a strong upward trend.
#The gg_season shows seasonality, with peaks March, June, and the holiday season at the end of the year.
#The gg_subseries shows a strong upward trend from 1990-2020.
Exercise 6
gafa_stock
## # A tsibble: 5,032 x 8 [!]
## # Key: Symbol [4]
## Symbol Date Open High Low Close Adj_Close Volume
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2014-01-02 79.4 79.6 78.9 79.0 67.0 58671200
## 2 AAPL 2014-01-03 79.0 79.1 77.2 77.3 65.5 98116900
## 3 AAPL 2014-01-06 76.8 78.1 76.2 77.7 65.9 103152700
## 4 AAPL 2014-01-07 77.8 78.0 76.8 77.1 65.4 79302300
## 5 AAPL 2014-01-08 77.0 77.9 77.0 77.6 65.8 64632400
## 6 AAPL 2014-01-09 78.1 78.1 76.5 76.6 65.0 69787200
## 7 AAPL 2014-01-10 77.1 77.3 75.9 76.1 64.5 76244000
## 8 AAPL 2014-01-13 75.7 77.5 75.7 76.5 64.9 94623200
## 9 AAPL 2014-01-14 76.9 78.1 76.8 78.1 66.1 83140400
## 10 AAPL 2014-01-15 79.1 80.0 78.8 79.6 67.5 97909700
## # … with 5,022 more rows
facebook <- as.data.frame(gafa_stock) %>% group_by(Symbol) %>%
filter(Symbol == "FB")
mean(facebook$Close)
## [1] 120.4625
sd(facebook$Close)
## [1] 41.32364
vec1 <- pull(facebook, Close)
FBmedian <- median(vec1)
FBmedian
## [1] 117.675
FBmeandif <- mean(vec1, na.rm = TRUE)
FBmeandif
## [1] 120.4625
FBSDdif <- sd(vec1, na.rm = TRUE)
FBSDdif
## [1] 41.32364
FBkurtDIF <- kurtosis(vec1, na.rm = TRUE)
FBkurtDIF
## [1] 1.836712
FBskewDIF <- skewness(vec1, na.rm = TRUE)
FBskewDIF
## [1] 0.2279339
fbMEANbyHAND <- sum(vec1, na.rm = TRUE)/nrow(facebook)
fbMEANbyHAND
## [1] 120.4625
fbSDbyHAND <- sqrt(sum((vec1-mean(vec1))^2/(length(vec1)-1)))
fbSDbyHAND
## [1] 41.32364
fbKURTbyHAND <- ((sum((vec1-mean(vec1))^4))/length(vec1))/(sum((vec1-mean(vec1))^2/length(vec1))^2)
fbKURTbyHAND
## [1] 1.836712
fbSKEWbyHAND <- (3*(fbMEANbyHAND-FBmedian))/fbSDbyHAND
fbSKEWbyHAND
## [1] 0.2023634
Exercise 7
GME <- read_excel("GME.xlsx")
View(GME)
GMEadj <- GME %>% select(-(Open:Close)) %>%
select(-(Volume))
GMEadj2 <- GMEadj %>%
mutate(Date =as_date(Date)) %>%
as_tsibble(index = "Date", key = "Adj Close")
GMEadj3 <- GMEadj2 %>%
filter(Date >= as.Date("2022-06-01") & Date <= as.Date("2022-06-30")) %>%
ggplot(aes(x = Date, y = `Adj Close`)) +
geom_line()
GMEadj3

GMEjan <- GMEadj2 %>%
filter(Date >= as.Date("2022-01-01") & Date <= as.Date("2022-01-31"))
GMEfeb <- GMEadj2 %>%
filter(Date >= as.Date("2022-02-01") & Date <= as.Date("2022-02-28"))
GMEmar <- GMEadj2 %>%
filter(Date >= as.Date("2022-03-01") & Date <= as.Date("2022-03-31"))
GMEap <- GMEadj2 %>%
filter(Date >= as.Date("2022-04-01") & Date <= as.Date("2022-04-30"))
GMEmay <- GMEadj2 %>%
filter(Date >= as.Date("2022-05-01") & Date <= as.Date("2022-05-31"))
GMEjune <- GMEadj2 %>%
filter(Date >= as.Date("2022-06-01") & Date <= as.Date("2022-06-30"))
GMEjul <- GMEadj2 %>%
filter(Date >= as.Date("2022-07-01") & Date <= as.Date("2022-07-31"))
GMEaug <- GMEadj2 %>%
filter(Date >= as.Date("2022-08-01") & Date <= as.Date("2022-08-31"))
Tjan_Vec <- pull(GMEjan, "Adj Close")
Tjan_mean <- mean(Tjan_Vec)
Tjan_mean
## [1] 29.4935
Tfeb_Vec <- pull(GMEfeb, "Adj Close")
Tfeb_mean <- mean(Tfeb_Vec)
Tfeb_mean
## [1] 29.19882
Tmar_Vec <- pull(GMEmar, "Adj Close")
Tmar_mean <- mean(Tmar_Vec)
Tmar_mean
## [1] 29.93022
Tap_Vec <- pull(GMEap, "Adj Close")
Tap_mean <- mean(Tap_Vec)
Tap_mean
## [1] 36.32187
Tmay_Vec <- pull(GMEmay, "Adj Close")
Tmay_mean <- mean(Tmay_Vec)
Tmay_mean
## [1] 26.56976
Tjune_Vec <- pull(GMEjune, "Adj Close")
Tjune_mean <- mean(Tjune_Vec)
Tjune_mean
## [1] 32.74143
Tjul_Vec <- pull(GMEjul, "Adj Close")
Tjul_mean <- mean(Tjul_Vec)
Tjul_mean
## [1] 34.21137
Taug_Vec <- pull(GMEaug, "Adj Close")
Taug_mean <- mean(Taug_Vec)
Taug_mean
## [1] 36.59739
Tjan_var <- var(Tjan_Vec)
Tjan_var
## [1] 19.61737
Tfeb_var <- var(Tfeb_Vec)
Tfeb_var
## [1] 5.448255
Tmar_var <- var(Tmar_Vec)
Tmar_var
## [1] 67.36374
Tap_var <- var(Tap_Vec)
Tap_var
## [1] 9.093501
Tmay_var <- var(Tmay_Vec)
Tmay_var
## [1] 16.00114
Tjune_var <- var(Tjune_Vec)
Tjune_var
## [1] 3.520998
Tjul_var <- var(Tjul_Vec)
Tjul_var
## [1] 7.117174
Taug_var <- var(Taug_Vec)
Taug_var
## [1] 18.13404