Packages

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(timetk)
library(tidyquant)
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

Plotting time serives

taylor_30_min
## # A tibble: 4,032 × 2
##    date                value
##    <dttm>              <dbl>
##  1 2000-06-05 00:00:00 22262
##  2 2000-06-05 00:30:00 21756
##  3 2000-06-05 01:00:00 22247
##  4 2000-06-05 01:30:00 22759
##  5 2000-06-05 02:00:00 22549
##  6 2000-06-05 02:30:00 22313
##  7 2000-06-05 03:00:00 22128
##  8 2000-06-05 03:30:00 21860
##  9 2000-06-05 04:00:00 21751
## 10 2000-06-05 04:30:00 21336
## # ℹ 4,022 more rows
taylor_30_min %>%
  plot_time_series(.date_var = date, .value = value)
m4_daily %>% count(id)
## # A tibble: 4 × 2
##   id        n
##   <fct> <int>
## 1 D10     674
## 2 D160   4197
## 3 D410    676
## 4 D500   4196
m4_daily %>%
  group_by(id) %>%
  plot_time_series(
      .date_var     = date, 
      .value        = value, 
      .facet_ncol   = 2, 
      .facet_scales = "free", 
      .interactive  = FALSE)

Visualizing Transformations and Sub-Groups

m4_hourly %>% count(id)
## # A tibble: 4 × 2
##   id        n
##   <fct> <int>
## 1 H10     700
## 2 H50     700
## 3 H150    700
## 4 H410    960
m4_hourly %>% 
  group_by(id) %>%
  plot_time_series(
    .date_var     = date, 
    .value        = log(value), 
    .facet_ncol   = 2, 
    .facet_scales = "free", 
    .color_var    = week(date))

Static ggplot2 Visualizations and Customizations

taylor_30_min %>%
  plot_time_series(date, value, 
                   .color_var = month(date, label = TRUE),
                   
                   # Returns static ggplot
                   .interactive = FALSE, 
                   .title = "Taylor's Megawatt Data", 
                   .x_lab = "Date (30-min intervals)", 
                   .y_lab = "Energy Demand (MW)", 
                   .color_lab = "Month")

Box plots

m4_monthly %>% count(id)
## # A tibble: 4 × 2
##   id        n
##   <fct> <int>
## 1 M1      469
## 2 M2      469
## 3 M750    306
## 4 M1000   330
m4_monthly %>%
  filter_by_time(.date_var = date, .end_date = "1976") %>%
  group_by(id) %>%
  plot_time_series_boxplot(
    .date_var  = date, 
    .value      = value, 
    .period     = "1 year", 
    .facet_ncol = 2)

Regression plots

m4_monthly %>%
  group_by(id) %>%
  plot_time_series_regression(
    .date_var = date, 
    .facet_ncol = 2, 
    .formula  = log(value) ~ as.numeric(date) + month(date, label = TRUE), 
    .show_summary = TRUE)
## 
## Summary for Group: M1---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.64182 -0.17965 -0.01619  0.14517  0.60369 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   8.725e+00  2.915e-02 299.310  < 2e-16 ***
## as.numeric(date)             -1.562e-06  2.824e-06  -0.553  0.58039    
## month(date, label = TRUE).L   5.226e-02  4.035e-02   1.295  0.19584    
## month(date, label = TRUE).Q  -1.982e-01  4.029e-02  -4.918 1.22e-06 ***
## month(date, label = TRUE).C  -2.505e-01  4.034e-02  -6.210 1.19e-09 ***
## month(date, label = TRUE)^4   1.767e-01  4.030e-02   4.386 1.44e-05 ***
## month(date, label = TRUE)^5   1.215e-01  4.033e-02   3.013  0.00273 ** 
## month(date, label = TRUE)^6  -1.111e-01  4.030e-02  -2.758  0.00605 ** 
## month(date, label = TRUE)^7  -6.178e-02  4.032e-02  -1.532  0.12611    
## month(date, label = TRUE)^8   8.347e-02  4.031e-02   2.071  0.03893 *  
## month(date, label = TRUE)^9  -1.322e-02  4.029e-02  -0.328  0.74298    
## month(date, label = TRUE)^10 -1.173e-03  4.032e-02  -0.029  0.97680    
## month(date, label = TRUE)^11  4.784e-02  4.019e-02   1.190  0.23454    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.252 on 456 degrees of freedom
## Multiple R-squared:  0.1932, Adjusted R-squared:  0.172 
## F-statistic:   9.1 on 12 and 456 DF,  p-value: 8.931e-16
## 
## ----
## 
## Summary for Group: M2---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5601 -0.1519 -0.0033  0.1400  0.5414 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   7.710e+00  2.451e-02 314.624  < 2e-16 ***
## as.numeric(date)              3.671e-07  2.374e-06   0.155 0.877169    
## month(date, label = TRUE).L  -5.815e-02  3.392e-02  -1.714 0.087134 .  
## month(date, label = TRUE).Q  -4.611e-02  3.387e-02  -1.361 0.174113    
## month(date, label = TRUE).C  -7.356e-02  3.391e-02  -2.169 0.030611 *  
## month(date, label = TRUE)^4   5.917e-02  3.387e-02   1.747 0.081361 .  
## month(date, label = TRUE)^5  -9.701e-03  3.391e-02  -0.286 0.774919    
## month(date, label = TRUE)^6  -1.201e-01  3.388e-02  -3.545 0.000434 ***
## month(date, label = TRUE)^7  -4.469e-03  3.389e-02  -0.132 0.895164    
## month(date, label = TRUE)^8   7.949e-02  3.388e-02   2.346 0.019403 *  
## month(date, label = TRUE)^9  -4.324e-02  3.387e-02  -1.277 0.202390    
## month(date, label = TRUE)^10 -6.214e-02  3.389e-02  -1.833 0.067411 .  
## month(date, label = TRUE)^11  2.628e-02  3.379e-02   0.778 0.437107    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2118 on 456 degrees of freedom
## Multiple R-squared:  0.07436,    Adjusted R-squared:   0.05 
## F-statistic: 3.053 on 12 and 456 DF,  p-value: 0.0003732
## 
## ----
## 
## Summary for Group: M750---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.12770 -0.05159 -0.01753  0.05142  0.17828 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   8.407e+00  1.651e-02 509.199  < 2e-16 ***
## as.numeric(date)              5.679e-05  1.348e-06  42.118  < 2e-16 ***
## month(date, label = TRUE).L  -3.584e-02  1.256e-02  -2.854 0.004625 ** 
## month(date, label = TRUE).Q   7.509e-02  1.256e-02   5.979 6.51e-09 ***
## month(date, label = TRUE).C   7.879e-02  1.256e-02   6.273 1.27e-09 ***
## month(date, label = TRUE)^4  -4.931e-02  1.256e-02  -3.926 0.000108 ***
## month(date, label = TRUE)^5  -7.964e-02  1.256e-02  -6.341 8.61e-10 ***
## month(date, label = TRUE)^6   1.215e-02  1.256e-02   0.967 0.334270    
## month(date, label = TRUE)^7   5.196e-02  1.256e-02   4.137 4.60e-05 ***
## month(date, label = TRUE)^8   1.200e-02  1.256e-02   0.955 0.340143    
## month(date, label = TRUE)^9  -3.433e-02  1.256e-02  -2.733 0.006652 ** 
## month(date, label = TRUE)^10 -1.566e-02  1.256e-02  -1.247 0.213483    
## month(date, label = TRUE)^11  1.182e-02  1.256e-02   0.941 0.347375    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06341 on 293 degrees of freedom
## Multiple R-squared:  0.8695, Adjusted R-squared:  0.8641 
## F-statistic: 162.6 on 12 and 293 DF,  p-value: < 2.2e-16
## 
## ----
## 
## Summary for Group: M1000---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.30903 -0.44884 -0.06059  0.47527  1.86372 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   9.476e+00  1.451e-01  65.292   <2e-16 ***
## as.numeric(date)             -1.590e-04  1.216e-05 -13.079   <2e-16 ***
## month(date, label = TRUE).L  -7.113e-02  1.221e-01  -0.582    0.561    
## month(date, label = TRUE).Q  -3.050e-01  1.221e-01  -2.498    0.013 *  
## month(date, label = TRUE).C   5.160e-02  1.221e-01   0.423    0.673    
## month(date, label = TRUE)^4   6.570e-02  1.221e-01   0.538    0.591    
## month(date, label = TRUE)^5   4.299e-02  1.221e-01   0.352    0.725    
## month(date, label = TRUE)^6   5.051e-02  1.221e-01   0.414    0.679    
## month(date, label = TRUE)^7  -4.125e-02  1.221e-01  -0.338    0.736    
## month(date, label = TRUE)^8   1.122e-01  1.221e-01   0.919    0.359    
## month(date, label = TRUE)^9   4.733e-03  1.221e-01   0.039    0.969    
## month(date, label = TRUE)^10  5.748e-02  1.221e-01   0.471    0.638    
## month(date, label = TRUE)^11  5.818e-02  1.221e-01   0.476    0.634    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6402 on 317 degrees of freedom
## Multiple R-squared:  0.3621, Adjusted R-squared:  0.3379 
## F-statistic: 14.99 on 12 and 317 DF,  p-value: < 2.2e-16
## 
## ----

Plotting seasonality and Correlation

Correlation Plots

m4_hourly %>%
  group_by(id) %>%
  plot_acf_diagnostics(
    date, value, 
    .lags = "7 days")
walmart_sales_weekly %>%
  group_by(id) %>%
  plot_acf_diagnostics(
    Date, Weekly_Sales,
    .ccf_vars = c(Temperature, Fuel_Price), 
    .lags =  "3 months")

Seasonality

taylor_30_min %>%
  plot_seasonal_diagnostics(date, value)
m4_hourly %>% count(id)
## # A tibble: 4 × 2
##   id        n
##   <fct> <int>
## 1 H10     700
## 2 H50     700
## 3 H150    700
## 4 H410    960
m4_hourly %>%
  group_by(id) %>%
  plot_seasonal_diagnostics(date, value)

STL Diagnostics

m4_hourly %>%
  group_by(id) %>%
  plot_stl_diagnostics(
    date, value, 
    .feature_set = c("observed", "season", "trend", "remainder"))
## frequency = 24 observations per 1 day
## trend = 336 observations per 14 days
## frequency = 24 observations per 1 day
## trend = 336 observations per 14 days
## frequency = 24 observations per 1 day
## trend = 336 observations per 14 days
## frequency = 24 observations per 1 day
## trend = 336 observations per 14 days

TIme Series Data Wrangling

Summarize by time

FANG %>%
  group_by(symbol) %>%
  plot_time_series(date, volume, .facet_ncol = 2, .interactive = FALSE)

Summarize it by quarter

FANG %>%
  group_by(symbol) %>%
  summarise_by_time(.date_var = date, volume = sum(volume), .by = "quarter") %>%
  plot_time_series(date, volume, .facet_ncol = 2, .interactive = FALSE)

FANG %>%
  group_by(symbol) %>%
  summarise_by_time(.date_var = date, adjusted = mean(adjusted), .by = "month") %>%
  plot_time_series(date, adjusted, .facet_ncol = 2, .interactive = FALSE)

Filter by time

FANG %>%
  group_by(symbol) %>%
  filter_by_time(.date_var   = date, 
                 .start_date = "2013-09", 
                 .end_date   = "2013") %>%
  plot_time_series(date, adjusted, .facet_ncol = 2)

Padding data

FANG %>% 
  group_by(symbol) %>%
  pad_by_time(date, .by = "day", .pad_value = 0)
## # A tibble: 5,836 × 8
## # Groups:   symbol [4]
##    symbol date        open  high   low close  volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>   <dbl>    <dbl>
##  1 AMZN   2013-01-02  256.  258.  253.  257. 3271000     257.
##  2 AMZN   2013-01-03  257.  261.  256.  258. 2750900     258.
##  3 AMZN   2013-01-04  258.  260.  257.  259. 1874200     259.
##  4 AMZN   2013-01-05    0     0     0     0        0       0 
##  5 AMZN   2013-01-06    0     0     0     0        0       0 
##  6 AMZN   2013-01-07  263.  270.  263.  268. 4910000     268.
##  7 AMZN   2013-01-08  267.  269.  264.  266. 3010700     266.
##  8 AMZN   2013-01-09  268.  270.  265.  266. 2265600     266.
##  9 AMZN   2013-01-10  269.  269.  262.  265. 2863400     265.
## 10 AMZN   2013-01-11  265.  268.  264.  268. 2413300     268.
## # ℹ 5,826 more rows

Sliding (Rolling) Calculations

FANG %>%
  head(10) %>%
  mutate(rolling_avg_2 = slidify_vec(adjusted, mean, 
                                     .period  = 2, 
                                     .align   = "right", 
                                     .partial = TRUE))
## # A tibble: 10 × 9
##    symbol date        open  high   low close    volume adjusted rolling_avg_2
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>         <dbl>
##  1 FB     2013-01-02  27.4  28.2  27.4  28    69846400     28            28  
##  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8          27.9
##  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8          28.3
##  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4          29.1
##  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1          29.2
##  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6          29.8
##  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3          30.9
##  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7          31.5
##  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0          31.3
## 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1          30.5
# Rolling regressions are easy to implement using '.unlist = FALSE 
lm_roll <- slidify(~ lm(..1 ~ ..2 + ..3), 
                   .period = 90, 
                   .unlist = FALSE, 
                   .align  = "right")



FANG %>%
  select(symbol, date, adjusted, volume) %>%
  group_by(symbol) %>% 
  mutate(numeric_date = as.numeric(date)) %>%
  #Apply rolling regression 
  mutate(rolling_lm = lm_roll(adjusted, volume, numeric_date)) %>%
  filter(!is.na(rolling_lm))
## # A tibble: 3,676 × 6
## # Groups:   symbol [4]
##    symbol date       adjusted   volume numeric_date rolling_lm
##    <chr>  <date>        <dbl>    <dbl>        <dbl> <list>    
##  1 FB     2013-05-10     26.7 30847100        15835 <lm>      
##  2 FB     2013-05-13     26.8 29068800        15838 <lm>      
##  3 FB     2013-05-14     27.1 24930300        15839 <lm>      
##  4 FB     2013-05-15     26.6 30299800        15840 <lm>      
##  5 FB     2013-05-16     26.1 35499100        15841 <lm>      
##  6 FB     2013-05-17     26.2 29462700        15842 <lm>      
##  7 FB     2013-05-20     25.8 42402900        15845 <lm>      
##  8 FB     2013-05-21     25.7 26261300        15846 <lm>      
##  9 FB     2013-05-22     25.2 45314500        15847 <lm>      
## 10 FB     2013-05-23     25.1 37663100        15848 <lm>      
## # ℹ 3,666 more rows