Load packages and data

library(dplyr)
library(ggplot2)
library(tidyr)
library(USgas)
library(tibble)
library(tsibble)
library(ggfortify)
library(tidyverse)
library(fpp3)
library(moments)

Questions

Exercise 1

#1, Convert the data set into tsibble
tute1 <- readxl::read_excel("//Users//colinadams//Documents//GCSU//Fall 2022//Forecasting//Homeworks//Homework 1//tute1.xls")
tute_ts <- tute1 %>%
  mutate(Quarter = yearmonth(Quarter)) %>%
  as_tsibble(index = Quarter)

#2, Construct a time series plot (geom_line()) for each three series
num_2 <- tute_ts <- tute1 %>%
  pivot_longer(-Quarter) %>%
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line()
num_2

#3, Construct a time series plot facet_grid() for all (together) three series (transform the data using pivot_longer)
num_3 <- tute_ts <- tute1 %>%
  pivot_longer(-Quarter) %>%
  ggplot(aes(x = Quarter, y = value, colour = name)) +
  geom_line() +
  facet_grid(name ~., scales = "free_y")
num_3

Exercise 2

#1, Install the 'USgas" package.
library(USgas)

#2, Create a tsibble from `us_total` with year as the index and state as the key
NE_totals <- us_total %>%
  as_tsibble(index=year,key=state)

#3, Plot the annual natural gas consumption by state for the New England area (comprising the states of Maine, Vermont, "New Hampshire" ~ "Massachusetts" ~ "Connecticut" ~ "Rhode Island")
NE_totals %>%
  filter(state=="New Hampshire"|state=="Maine"|state=="Vermont"|state=="Massachusetts"|state=="Connecticut"|state=="Rhode Island") %>%
  autoplot(y/1e3)+
  labs(y="Natural Gas Consumption (Thousands)", x="Time")

Exercise 3

#1, Create a tsibble which is identical to the `tourism` tsibble from the `tsibble` package
tourism <- readxl::read_excel("//Users//colinadams//Documents//GCSU//Fall 2022//Forecasting//Homeworks//Homework 1//tourism.xlsx")
tourism_ts <- tourism %>%
  mutate(Quarter = yearquarter(Quarter)) %>%
  as_tsibble(key = c("Region", "State", "Purpose"), index = "Quarter")
tourism_ts
## # A tsibble: 24,320 x 5 [1Q]
## # Key:       Region, State, Purpose [304]
##    Quarter Region   State           Purpose  Trips
##      <qtr> <chr>    <chr>           <chr>    <dbl>
##  1 1998 Q1 Adelaide South Australia Business  135.
##  2 1998 Q2 Adelaide South Australia Business  110.
##  3 1998 Q3 Adelaide South Australia Business  166.
##  4 1998 Q4 Adelaide South Australia Business  127.
##  5 1999 Q1 Adelaide South Australia Business  137.
##  6 1999 Q2 Adelaide South Australia Business  200.
##  7 1999 Q3 Adelaide South Australia Business  169.
##  8 1999 Q4 Adelaide South Australia Business  134.
##  9 2000 Q1 Adelaide South Australia Business  154.
## 10 2000 Q2 Adelaide South Australia Business  169.
## # … with 24,310 more rows
## # ℹ Use `print(n = ...)` to see more rows
#2, Find what combination of `Region` and `Purpose` had the maximum number of overnight trips on average
tourism_ts %>%
  group_by(Region,Purpose) %>%
  summarise(mean = mean(Trips)) %>%
  ungroup() %>%
  filter(mean == max(mean))
## # A tsibble: 1 x 4 [1Q]
## # Key:       Region, Purpose [1]
##   Region    Purpose  Quarter  mean
##   <chr>     <chr>      <qtr> <dbl>
## 1 Melbourne Visiting 2017 Q4  985.
#3, Create a new tsibble which combines the Purposes and Regions, and just has total trips by State
tourism %>%
  group_by(State) %>%
  summarise(total = sum(Trips))
## # A tibble: 8 × 2
##   State                total
##   <chr>                <dbl>
## 1 ACT                 41007.
## 2 New South Wales    557367.
## 3 Northern Territory  28614.
## 4 Queensland         386643.
## 5 South Australia    118151.
## 6 Tasmania            54137.
## 7 Victoria           390463.
## 8 Western Australia  147820.

For more math examples, please check this website

Exercise 4

#1, Use `autoplot()`, `gg_season()` and `gg_subseries()` to compare the differences between the arrivals from these four countries.
aus_arrivals %>%
  autoplot()
## Plot variable not specified, automatically selected `.vars = Arrivals`

aus_arrivals %>%
  gg_season()
## Plot variable not specified, automatically selected `y = Arrivals`

aus_arrivals %>%
  gg_subseries()
## Plot variable not specified, automatically selected `y = Arrivals`

#2, Can you identify any unusual observations?

I see an unusually large spike In Newzealand’s arrivals in Q4 of 1988. This spike was abnormally large even when compared to the typical spike in Q4 travel for New Zealand. Similarly, I see an unusual spike in arrivals in Q3 of 2001 in the United States. There was also drops in arrivals in Q2 and Q4 in 2002 in Japan. During this fall in travel the arrivals dropped back to the amount they were nearly 10 years prior.

Exercise 5

set.seed(87654321)
aus_series <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
autoplot(aus_series)
## Plot variable not specified, automatically selected `.vars = Turnover`

gg_season(aus_series)
## Plot variable not specified, automatically selected `y = Turnover`

gg_subseries(aus_series)
## Plot variable not specified, automatically selected `y = Turnover`

gg_lag(aus_series)
## Plot variable not specified, automatically selected `y = Turnover`

aus_series %>%
  ACF(Turnover) %>%
  autoplot() +
    labs(x = "Lag(s)", y = "Correlation")

Using these plots I see a clear trend in turnovers. The trend is that turnover is increasing as time goes on. I also see seasonality in the data with turnovers spiking every December. Despite the trend and seasonality, I see no cycle in the data.

Exercise 6

#1, calculate the mean and standard deviation of the Facebook closing stock prices
fb_close <- as.data.frame(gafa_stock) %>%
  filter(Symbol == "FB") %>%
  select(Symbol, Date, Close)
fb_vec <- pull(fb_close, Close)
fb_mean <- mean(fb_vec, na.rm = TRUE)
fb_mean
## [1] 120.4625
fb_median <- median(fb_vec, na.rm = TRUE)
fb_median
## [1] 117.675
fb_sd <- sd(fb_vec, na.rm = TRUE)
fb_sd
## [1] 41.32364
fb_k <- kurtosis(fb_vec, na.rm= TRUE)
fb_k
## [1] 1.836712
#2, calculate the mean, standard deviation, kurtosis and Skewenss of the first difference of Facebook closing stock prices
fb_1diff <- diff(fb_vec)
head(fb_1diff)
## [1] -0.149998  2.640000  0.719997  0.310002 -1.009999  0.719998
fb_1mean <- mean(fb_1diff, na.rm = TRUE)
fb_1mean
## [1] 0.06076372
fb_1sd <- sd(fb_1diff, na.rm = TRUE)
fb_1sd
## [1] 2.414555
fb_1k <- kurtosis(fb_1diff, na.rm= TRUE)
fb_1k
## [1] 74.02921
fb_1skew <- skewness(fb_1diff, na.rm = TRUE)
fb_1skew
## [1] -3.973192
#3, Now, you should calculate the mean, standard deviation, kurtosis and Skewenss of the Facebook closing stock prices without using the functions in R (in other words, you should compute the statistic by coding it in R)

fb_mean_hand <- (sum(fb_vec)/ nrow(fb_close))
fb_mean_hand
## [1] 120.4625
fb_sd_hand <- sqrt(sum((fb_vec-mean(fb_vec))^2/(length(fb_vec)-1)))
fb_sd_hand
## [1] 41.32364
fb_k_hand <- ((sum((fb_vec-mean(fb_vec))^4))/length(fb_vec))/(sum((fb_vec-mean(fb_vec))^2)/length(fb_vec))^2
fb_k_hand
## [1] 1.836712
fb_skew_hand <- (3*(fb_mean_hand-fb_median))/fb_sd_hand
fb_skew_hand
## [1] 0.2023634

Exercise 7

#1, Load the excel file into R
aapl <- readxl::read_excel("//Users//colinadams//Documents//GCSU/Fall 2022//Forecasting//Homeworks//Homework 1//aapl.xls")

#2, Clean the dataset to only show the Date and adj. Close
aapl_clean <- aapl %>%
  select(-(Open:Close)) %>%
  select(-(Volume))
head(aapl_clean)
## # A tibble: 6 × 2
##   Date                `Adj Close`
##   <dttm>                    <dbl>
## 1 2022-01-03 00:00:00        181.
## 2 2022-01-04 00:00:00        179.
## 3 2022-01-05 00:00:00        174.
## 4 2022-01-06 00:00:00        171.
## 5 2022-01-07 00:00:00        171.
## 6 2022-01-10 00:00:00        171.
#3, Transform it into a tsibble (index it correctly!!!)
aapl_ts <- aapl_clean %>%
  mutate(Date = as_date(Date)) %>%
  as_tsibble(index = Date)
head(aapl_ts)
## # A tsibble: 6 x 2 [1D]
##   Date       `Adj Close`
##   <date>           <dbl>
## 1 2022-01-03        181.
## 2 2022-01-04        179.
## 3 2022-01-05        174.
## 4 2022-01-06        171.
## 5 2022-01-07        171.
## 6 2022-01-10        171.
#4, Plot the Adj. Close price using geom_line() only for the month of June
aapl_jn <- aapl_ts %>%
  filter(Date >= as.Date('2022-06-01') & Date <= as.Date('2022-06-30')) %>%
  ggplot(aes(x = Date, y = `Adj Close`)) +
  geom_line()
aapl_jn

#5, Calculate the mean and variance per month
aapl_january <- aapl_ts %>%
  filter(Date >= as.Date('2022-01-01') & Date <= as.Date('2022-01-31'))
aapl_january_vec <- pull(aapl_january, 'Adj Close')
aapl_january_mean <- mean(aapl_january_vec, na.rm = TRUE)
aapl_january_var <- var(aapl_january_vec, na.rm = TRUE)
aapl_january_mean
## [1] 169.1615
aapl_january_var
## [1] 45.32941
aapl_feb <- aapl_ts %>%
  filter(Date >= as.Date('2022-02-01') & Date <= as.Date('2022-02-28'))
aapl_feb_vec <- pull(aapl_feb, 'Adj Close')
aapl_feb_mean <- mean(aapl_feb_vec, na.rm = TRUE)
aapl_feb_var <- var(aapl_feb_vec, na.rm = TRUE)
aapl_feb_mean
## [1] 169.3107
aapl_feb_var
## [1] 21.6954
aapl_march <- aapl_ts %>%
  filter(Date >= as.Date('2022-03-01') & Date <= as.Date('2022-03-31'))
aapl_march_vec <- pull(aapl_march, 'Adj Close')
aapl_march_mean <- mean(aapl_march_vec, na.rm = TRUE)
aapl_march_var <- var(aapl_march_vec, na.rm = TRUE)
aapl_march_mean
## [1] 164.8389
aapl_march_var
## [1] 62.61423
aapl_april <- aapl_ts %>%
  filter(Date >= as.Date('2022-04-01') & Date <= as.Date('2022-04-30'))
aapl_april_vec <- pull(aapl_april, 'Adj Close')
aapl_april_mean <- mean(aapl_april_vec, na.rm = TRUE)
aapl_april_var <- var(aapl_april_vec, na.rm = TRUE)
aapl_april_mean
## [1] 166.3452
aapl_april_var
## [1] 35.81272
aapl_may <- aapl_ts %>%
  filter(Date >= as.Date('2022-05-01') & Date <= as.Date('2022-05-31'))
aapl_may_vec <- pull(aapl_may, 'Adj Close')
aapl_may_mean <- mean(aapl_may_vec, na.rm = TRUE)
aapl_may_var <- var(aapl_may_vec, na.rm = TRUE)
aapl_may_mean
## [1] 148.1799
aapl_may_var
## [1] 61.07552
aapl_june <- aapl_ts %>%
  filter(Date >= as.Date('2022-06-01') & Date <= as.Date('2022-06-30'))
aapl_june_vec <- pull(aapl_june, 'Adj Close')
aapl_june_mean <- mean(aapl_june_vec, na.rm = TRUE)
aapl_june_var <- var(aapl_june_vec, na.rm = TRUE)
aapl_june_mean
## [1] 139.6047
aapl_june_var
## [1] 39.69542
aapl_july <- aapl_ts %>%
  filter(Date >= as.Date('2022-07-01') & Date <= as.Date('2022-07-31'))
aapl_july_vec <- pull(aapl_july, 'Adj Close')
aapl_july_mean <- mean(aapl_july_vec, na.rm = TRUE)
aapl_july_var <- var(aapl_july_vec, na.rm = TRUE)
aapl_july_mean
## [1] 149.4629
aapl_july_var
## [1] 34.74818
aapl_august <- aapl_ts %>%
  filter(Date >= as.Date('2022-08-01') & Date <= as.Date('2022-08-31'))
aapl_august_vec <- pull(aapl_august, 'Adj Close')
aapl_august_mean <- mean(aapl_august_vec, na.rm = TRUE)
aapl_august_var <- var(aapl_august_vec, na.rm = TRUE)
aapl_august_mean
## [1] 166.8458
aapl_august_var
## [1] 24.97063
aapl_sept <- aapl_ts %>%
  filter(Date >= as.Date('2022-09-01') & Date <= as.Date('2022-09-30'))
aapl_sept_vec <- pull(aapl_sept, 'Adj Close')
aapl_sept_mean <- mean(aapl_sept_vec, na.rm = TRUE)
aapl_sept_var <- var(aapl_sept_vec, na.rm = TRUE)
aapl_sept_mean
## [1] 157.96
aapl_sept_var
## [1] NA