Code Along 10

Packages

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(timetk)
library(tidyquant)

## Loading required package: PerformanceAnalytics

## Loading required package: xts

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################

## 
## Attaching package: 'xts'

## The following objects are masked from 'package:dplyr':
## 
##     first, last

## 
## Attaching package: 'PerformanceAnalytics'

## The following object is masked from 'package:graphics':
## 
##     legend

## Loading required package: quantmod

## Loading required package: TTR

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

Plotting time serives

taylor_30_min

## # A tibble: 4,032 × 2
##    date                value
##    <dttm>              <dbl>
##  1 2000-06-05 00:00:00 22262
##  2 2000-06-05 00:30:00 21756
##  3 2000-06-05 01:00:00 22247
##  4 2000-06-05 01:30:00 22759
##  5 2000-06-05 02:00:00 22549
##  6 2000-06-05 02:30:00 22313
##  7 2000-06-05 03:00:00 22128
##  8 2000-06-05 03:30:00 21860
##  9 2000-06-05 04:00:00 21751
## 10 2000-06-05 04:30:00 21336
## # ℹ 4,022 more rows

taylor_30_min %>%
  plot_time_series(.date_var = date, .value = value)

m4_daily %>% count(id)

## # A tibble: 4 × 2
##   id        n
##   <fct> <int>
## 1 D10     674
## 2 D160   4197
## 3 D410    676
## 4 D500   4196

m4_daily %>%
  group_by(id) %>%
  plot_time_series(
      .date_var     = date, 
      .value        = value, 
      .facet_ncol   = 2, 
      .facet_scales = "free", 
      .interactive  = FALSE)

Visualizing Transformations and Sub-Groups

m4_hourly %>% count(id)

## # A tibble: 4 × 2
##   id        n
##   <fct> <int>
## 1 H10     700
## 2 H50     700
## 3 H150    700
## 4 H410    960

m4_hourly %>% 
  group_by(id) %>%
  plot_time_series(
    .date_var     = date, 
    .value        = log(value), 
    .facet_ncol   = 2, 
    .facet_scales = "free", 
    .color_var    = week(date))

Static ggplot2 Visualizations and Customizations

taylor_30_min %>%
  plot_time_series(date, value, 
                   .color_var = month(date, label = TRUE),
                   
                   # Returns static ggplot
                   .interactive = FALSE, 
                   .title = "Taylor's Megawatt Data", 
                   .x_lab = "Date (30-min intervals)", 
                   .y_lab = "Energy Demand (MW)", 
                   .color_lab = "Month")

Box plots

m4_monthly %>% count(id)

## # A tibble: 4 × 2
##   id        n
##   <fct> <int>
## 1 M1      469
## 2 M2      469
## 3 M750    306
## 4 M1000   330

m4_monthly %>%
  filter_by_time(.date_var = date, .end_date = "1976") %>%
  group_by(id) %>%
  plot_time_series_boxplot(
    .date_var  = date, 
    .value      = value, 
    .period     = "1 year", 
    .facet_ncol = 2)

Regression plots

m4_monthly %>%
  group_by(id) %>%
  plot_time_series_regression(
    .date_var = date, 
    .facet_ncol = 2, 
    .formula  = log(value) ~ as.numeric(date) + month(date, label = TRUE), 
    .show_summary = TRUE)

## 
## Summary for Group: M1---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.64182 -0.17965 -0.01619  0.14517  0.60369 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   8.725e+00  2.915e-02 299.310  < 2e-16 ***
## as.numeric(date)             -1.562e-06  2.824e-06  -0.553  0.58039    
## month(date, label = TRUE).L   5.226e-02  4.035e-02   1.295  0.19584    
## month(date, label = TRUE).Q  -1.982e-01  4.029e-02  -4.918 1.22e-06 ***
## month(date, label = TRUE).C  -2.505e-01  4.034e-02  -6.210 1.19e-09 ***
## month(date, label = TRUE)^4   1.767e-01  4.030e-02   4.386 1.44e-05 ***
## month(date, label = TRUE)^5   1.215e-01  4.033e-02   3.013  0.00273 ** 
## month(date, label = TRUE)^6  -1.111e-01  4.030e-02  -2.758  0.00605 ** 
## month(date, label = TRUE)^7  -6.178e-02  4.032e-02  -1.532  0.12611    
## month(date, label = TRUE)^8   8.347e-02  4.031e-02   2.071  0.03893 *  
## month(date, label = TRUE)^9  -1.322e-02  4.029e-02  -0.328  0.74298    
## month(date, label = TRUE)^10 -1.173e-03  4.032e-02  -0.029  0.97680    
## month(date, label = TRUE)^11  4.784e-02  4.019e-02   1.190  0.23454    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.252 on 456 degrees of freedom
## Multiple R-squared:  0.1932, Adjusted R-squared:  0.172 
## F-statistic:   9.1 on 12 and 456 DF,  p-value: 8.931e-16
## 
## ----
## 
## Summary for Group: M2---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5601 -0.1519 -0.0033  0.1400  0.5414 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   7.710e+00  2.451e-02 314.624  < 2e-16 ***
## as.numeric(date)              3.671e-07  2.374e-06   0.155 0.877169    
## month(date, label = TRUE).L  -5.815e-02  3.392e-02  -1.714 0.087134 .  
## month(date, label = TRUE).Q  -4.611e-02  3.387e-02  -1.361 0.174113    
## month(date, label = TRUE).C  -7.356e-02  3.391e-02  -2.169 0.030611 *  
## month(date, label = TRUE)^4   5.917e-02  3.387e-02   1.747 0.081361 .  
## month(date, label = TRUE)^5  -9.701e-03  3.391e-02  -0.286 0.774919    
## month(date, label = TRUE)^6  -1.201e-01  3.388e-02  -3.545 0.000434 ***
## month(date, label = TRUE)^7  -4.469e-03  3.389e-02  -0.132 0.895164    
## month(date, label = TRUE)^8   7.949e-02  3.388e-02   2.346 0.019403 *  
## month(date, label = TRUE)^9  -4.324e-02  3.387e-02  -1.277 0.202390    
## month(date, label = TRUE)^10 -6.214e-02  3.389e-02  -1.833 0.067411 .  
## month(date, label = TRUE)^11  2.628e-02  3.379e-02   0.778 0.437107    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2118 on 456 degrees of freedom
## Multiple R-squared:  0.07436,    Adjusted R-squared:   0.05 
## F-statistic: 3.053 on 12 and 456 DF,  p-value: 0.0003732
## 
## ----
## 
## Summary for Group: M750---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.12770 -0.05159 -0.01753  0.05142  0.17828 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   8.407e+00  1.651e-02 509.199  < 2e-16 ***
## as.numeric(date)              5.679e-05  1.348e-06  42.118  < 2e-16 ***
## month(date, label = TRUE).L  -3.584e-02  1.256e-02  -2.854 0.004625 ** 
## month(date, label = TRUE).Q   7.509e-02  1.256e-02   5.979 6.51e-09 ***
## month(date, label = TRUE).C   7.879e-02  1.256e-02   6.273 1.27e-09 ***
## month(date, label = TRUE)^4  -4.931e-02  1.256e-02  -3.926 0.000108 ***
## month(date, label = TRUE)^5  -7.964e-02  1.256e-02  -6.341 8.61e-10 ***
## month(date, label = TRUE)^6   1.215e-02  1.256e-02   0.967 0.334270    
## month(date, label = TRUE)^7   5.196e-02  1.256e-02   4.137 4.60e-05 ***
## month(date, label = TRUE)^8   1.200e-02  1.256e-02   0.955 0.340143    
## month(date, label = TRUE)^9  -3.433e-02  1.256e-02  -2.733 0.006652 ** 
## month(date, label = TRUE)^10 -1.566e-02  1.256e-02  -1.247 0.213483    
## month(date, label = TRUE)^11  1.182e-02  1.256e-02   0.941 0.347375    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06341 on 293 degrees of freedom
## Multiple R-squared:  0.8695, Adjusted R-squared:  0.8641 
## F-statistic: 162.6 on 12 and 293 DF,  p-value: < 2.2e-16
## 
## ----
## 
## Summary for Group: M1000---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.30903 -0.44884 -0.06059  0.47527  1.86372 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   9.476e+00  1.451e-01  65.292   <2e-16 ***
## as.numeric(date)             -1.590e-04  1.216e-05 -13.079   <2e-16 ***
## month(date, label = TRUE).L  -7.113e-02  1.221e-01  -0.582    0.561    
## month(date, label = TRUE).Q  -3.050e-01  1.221e-01  -2.498    0.013 *  
## month(date, label = TRUE).C   5.160e-02  1.221e-01   0.423    0.673    
## month(date, label = TRUE)^4   6.570e-02  1.221e-01   0.538    0.591    
## month(date, label = TRUE)^5   4.299e-02  1.221e-01   0.352    0.725    
## month(date, label = TRUE)^6   5.051e-02  1.221e-01   0.414    0.679    
## month(date, label = TRUE)^7  -4.125e-02  1.221e-01  -0.338    0.736    
## month(date, label = TRUE)^8   1.122e-01  1.221e-01   0.919    0.359    
## month(date, label = TRUE)^9   4.733e-03  1.221e-01   0.039    0.969    
## month(date, label = TRUE)^10  5.748e-02  1.221e-01   0.471    0.638    
## month(date, label = TRUE)^11  5.818e-02  1.221e-01   0.476    0.634    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6402 on 317 degrees of freedom
## Multiple R-squared:  0.3621, Adjusted R-squared:  0.3379 
## F-statistic: 14.99 on 12 and 317 DF,  p-value: < 2.2e-16
## 
## ----

Plotting seasonality and Correlation

Correlation Plots

m4_hourly %>%
  group_by(id) %>%
  plot_acf_diagnostics(
    date, value, 
    .lags = "7 days")

walmart_sales_weekly %>%
  group_by(id) %>%
  plot_acf_diagnostics(
    Date, Weekly_Sales,
    .ccf_vars = c(Temperature, Fuel_Price), 
    .lags =  "3 months")

Seasonality

taylor_30_min %>%
  plot_seasonal_diagnostics(date, value)

m4_hourly %>% count(id)

## # A tibble: 4 × 2
##   id        n
##   <fct> <int>
## 1 H10     700
## 2 H50     700
## 3 H150    700
## 4 H410    960

m4_hourly %>%
  group_by(id) %>%
  plot_seasonal_diagnostics(date, value)

STL Diagnostics

m4_hourly %>%
  group_by(id) %>%
  plot_stl_diagnostics(
    date, value, 
    .feature_set = c("observed", "season", "trend", "remainder"))

## frequency = 24 observations per 1 day

## trend = 336 observations per 14 days

## frequency = 24 observations per 1 day

## trend = 336 observations per 14 days

## frequency = 24 observations per 1 day

## trend = 336 observations per 14 days

## frequency = 24 observations per 1 day

## trend = 336 observations per 14 days

TIme Series Data Wrangling

Summarize by time

FANG %>%
  group_by(symbol) %>%
  plot_time_series(date, volume, .facet_ncol = 2, .interactive = FALSE)

Summarize it by quarter

FANG %>%
  group_by(symbol) %>%
  summarise_by_time(.date_var = date, volume = sum(volume), .by = "quarter") %>%
  plot_time_series(date, volume, .facet_ncol = 2, .interactive = FALSE)

FANG %>%
  group_by(symbol) %>%
  summarise_by_time(.date_var = date, adjusted = mean(adjusted), .by = "month") %>%
  plot_time_series(date, adjusted, .facet_ncol = 2, .interactive = FALSE)

Filter by time

FANG %>%
  group_by(symbol) %>%
  filter_by_time(.date_var   = date, 
                 .start_date = "2013-09", 
                 .end_date   = "2013") %>%
  plot_time_series(date, adjusted, .facet_ncol = 2)

Padding data

FANG %>% 
  group_by(symbol) %>%
  pad_by_time(date, .by = "day", .pad_value = 0)

## # A tibble: 5,836 × 8
## # Groups:   symbol [4]
##    symbol date        open  high   low close  volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>   <dbl>    <dbl>
##  1 AMZN   2013-01-02  256.  258.  253.  257. 3271000     257.
##  2 AMZN   2013-01-03  257.  261.  256.  258. 2750900     258.
##  3 AMZN   2013-01-04  258.  260.  257.  259. 1874200     259.
##  4 AMZN   2013-01-05    0     0     0     0        0       0 
##  5 AMZN   2013-01-06    0     0     0     0        0       0 
##  6 AMZN   2013-01-07  263.  270.  263.  268. 4910000     268.
##  7 AMZN   2013-01-08  267.  269.  264.  266. 3010700     266.
##  8 AMZN   2013-01-09  268.  270.  265.  266. 2265600     266.
##  9 AMZN   2013-01-10  269.  269.  262.  265. 2863400     265.
## 10 AMZN   2013-01-11  265.  268.  264.  268. 2413300     268.
## # ℹ 5,826 more rows

Sliding (Rolling) Calculations

FANG %>%
  head(10) %>%
  mutate(rolling_avg_2 = slidify_vec(adjusted, mean, 
                                     .period  = 2, 
                                     .align   = "right", 
                                     .partial = TRUE))

## # A tibble: 10 × 9
##    symbol date        open  high   low close    volume adjusted rolling_avg_2
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>         <dbl>
##  1 FB     2013-01-02  27.4  28.2  27.4  28    69846400     28            28  
##  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8          27.9
##  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8          28.3
##  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4          29.1
##  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1          29.2
##  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6          29.8
##  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3          30.9
##  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7          31.5
##  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0          31.3
## 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1          30.5

# Rolling regressions are easy to implement using '.unlist = FALSE 
lm_roll <- slidify(~ lm(..1 ~ ..2 + ..3), 
                   .period = 90, 
                   .unlist = FALSE, 
                   .align  = "right")



FANG %>%
  select(symbol, date, adjusted, volume) %>%
  group_by(symbol) %>% 
  mutate(numeric_date = as.numeric(date)) %>%
  #Apply rolling regression 
  mutate(rolling_lm = lm_roll(adjusted, volume, numeric_date)) %>%
  filter(!is.na(rolling_lm))

## # A tibble: 3,676 × 6
## # Groups:   symbol [4]
##    symbol date       adjusted   volume numeric_date rolling_lm
##    <chr>  <date>        <dbl>    <dbl>        <dbl> <list>    
##  1 FB     2013-05-10     26.7 30847100        15835 <lm>      
##  2 FB     2013-05-13     26.8 29068800        15838 <lm>      
##  3 FB     2013-05-14     27.1 24930300        15839 <lm>      
##  4 FB     2013-05-15     26.6 30299800        15840 <lm>      
##  5 FB     2013-05-16     26.1 35499100        15841 <lm>      
##  6 FB     2013-05-17     26.2 29462700        15842 <lm>      
##  7 FB     2013-05-20     25.8 42402900        15845 <lm>      
##  8 FB     2013-05-21     25.7 26261300        15846 <lm>      
##  9 FB     2013-05-22     25.2 45314500        15847 <lm>      
## 10 FB     2013-05-23     25.1 37663100        15848 <lm>      
## # ℹ 3,666 more rows

Code Along 10

Spencer Murrin

2024-04-15

Packages

Plotting time serives

Box plots

Regression plots

Plotting seasonality and Correlation

Correlation Plots

Seasonality

STL Diagnostics

TIme Series Data Wrangling

Summarize by time

Filter by time

Padding data

Sliding (Rolling) Calculations