Working with Time Series

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.2
## -- Attaching packages ---------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.0
## v tidyr   1.1.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## Warning: package 'ggplot2' was built under R version 4.0.2
## Warning: package 'tibble' was built under R version 4.0.2
## -- Conflicts ------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(tidyquant)
## Warning: package 'tidyquant' was built under R version 4.0.2
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
## Loading required package: PerformanceAnalytics
## Warning: package 'PerformanceAnalytics' was built under R version 4.0.2
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.0.2
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.0.2
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
## Loading required package: quantmod
## Warning: package 'quantmod' was built under R version 4.0.2
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 4.0.2
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Version 0.4-0 included new data defaults. See ?getSymbols.
## == Need to Learn tidyquant? ===========================================================
## Business Science offers a 1-hour course - Learning Lab #9: Performance Analysis & Portfolio Optimization with tidyquant!
## </> Learn more at: https://university.business-science.io/p/learning-labs-pro </>
library(timetk)
## Warning: package 'timetk' was built under R version 4.0.2

We are going to explore some new packages in R created by Matt Dancho. These packages tidyquant and timetk will make analyzing time series on sample datasets easier. We will be working with the FANG companies Stock shares.

data(FANG) #this is preloaded dataset in base R
head(FANG)

Visualize the data with timetk

FANG %>% 
  group_by(symbol) %>% 
  plot_time_series(date, adjusted, .facet_ncol = 2, .interactive = FALSE) + ggtitle("Adjusted Closing Prices")

FANG %>% 
  group_by(symbol) %>% 
  plot_time_series(date,volume, .facet_ncol =2, .interactive = FALSE) + ggtitle("Trade Volume")

Summarize by Time Periods using tidyquant

FANG %>% 
  group_by(symbol) %>% 
  summarise_by_time(
    date, .by = "quarter",
    volume = SUM(volume)) %>% #SUM aggregates values falling in time range (.by)
  plot_time_series(date, volume, .facet_ncol = 2, .interactive= FALSE, .y_intercept = 0) + ggtitle("Total trade by quarter")

Period Smoothing using tidyquant

FANG %>% 
  group_by(symbol) %>% 
  summarise_by_time(
    date, .by = "month",
    adjusted = FIRST(adjusted)) %>%  #First smooths by first value
  plot_time_series(date, adjusted, .facet_ncol = 2, .interactive = FALSE) + ggtitle("Adjusted Closing Prices Smoothed by Month")

Filter by Time Ranges using timetk

FANG %>%
  group_by(symbol) %>% 
  filter_by_time(date, "2015-09", "2015") %>%  
  plot_time_series(date, adjusted, .facet_ncol = 2, .interactive = FALSE) + ggtitle("Stock Prices for 3rd Quarter 2015")

Note: for filter_by_time .start_date = “2013-09”: Converts to “2013-09-01 .end_date =”2013": Converts to “2013-12-31

Padding Data Gaps using timetk

FANG %>%
  group_by(symbol) %>%
  pad_by_time(date, .by = "auto") # Guesses .by = "day"
## pad applied on the interval: day
## pad applied on the interval: day
## pad applied on the interval: day
## pad applied on the interval: day

Q#1:Go from Daily to Hourly timestamp intervals for 1 month from the start date. Impute the missing values.

FANG %>%
  group_by(symbol) %>%
  pad_by_time(date, .by = "hour") %>%
  mutate_at(vars(open:adjusted), .funs = ts_impute_vec, period = 1) %>%
  filter_by_time(date, "start", FIRST(date) %+time% "1 month") %>%
  plot_time_series(date, adjusted, .facet_ncol = 2, .interactive = FALSE) + ggtitle("Solution to Q#1")

Sliding Windows using timetk

#Make Window/Rolling Function
roll_avg_30 <- slidify(.f = AVERAGE, .period = 30, .align = "center", .partial = TRUE)
#Apply window
FANG %>% 
  select(symbol, date, adjusted) %>%
  group_by(symbol) %>% 
  mutate(rolling_avg_30 = roll_avg_30(adjusted)) %>% 
  pivot_longer(cols = c(adjusted, rolling_avg_30)) %>% 
  plot_time_series(date, value, .color_var = name, 
                   .facet_ncol = 2, .smooth = FALSE,
                   .interactive = FALSE) + ggtitle("Rolling Average")

FANG %>%
  select(symbol, date, adjusted) %>%
  group_by(symbol) %>%
  # Apply roll apply Function
  mutate(rolling_avg_30 = slidify_vec(adjusted,  ~ AVERAGE(.), 
                                      .period = 30, .partial = TRUE))

Rolling Regression

# Rolling regressions are easy to implement using `.unlist = FALSE`
lm_roll <- slidify(~ lm(..1 ~ ..2 + ..3), .period = 90, 
                   .unlist = FALSE, .align = "right")
FANG %>%
  select(symbol, date, adjusted, volume) %>%
  group_by(symbol) %>%
  mutate(numeric_date = as.numeric(date)) %>%
  # Apply rolling regression
  mutate(rolling_lm = lm_roll(adjusted, volume, numeric_date)) %>%
  filter(!is.na(rolling_lm))