Code Along 10: Visualizing Time Series

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.4.1

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
library(lubridate)

## Warning: package 'lubridate' was built under R version 4.4.1

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

library(timetk)

## Warning: package 'timetk' was built under R version 4.4.2

library(tidyquant)

## Loading required package: PerformanceAnalytics

## Loading required package: xts

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################

## 
## Attaching package: 'xts'

## The following objects are masked from 'package:dplyr':
## 
##     first, last

## 
## Attaching package: 'PerformanceAnalytics'

## The following object is masked from 'package:graphics':
## 
##     legend

## Loading required package: quantmod

## Loading required package: TTR

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

Plotting Time Series

taylor_30_min

## # A tibble: 4,032 × 2
##    date                value
##    <dttm>              <dbl>
##  1 2000-06-05 00:00:00 22262
##  2 2000-06-05 00:30:00 21756
##  3 2000-06-05 01:00:00 22247
##  4 2000-06-05 01:30:00 22759
##  5 2000-06-05 02:00:00 22549
##  6 2000-06-05 02:30:00 22313
##  7 2000-06-05 03:00:00 22128
##  8 2000-06-05 03:30:00 21860
##  9 2000-06-05 04:00:00 21751
## 10 2000-06-05 04:30:00 21336
## # ℹ 4,022 more rows

taylor_30_min %>%
  plot_time_series(.date_var = date, .value = value)

m4_daily %>% count(id)

## # A tibble: 4 × 2
##   id        n
##   <fct> <int>
## 1 D10     674
## 2 D160   4197
## 3 D410    676
## 4 D500   4196

m4_daily %>%
  group_by(id) %>%
  plot_time_series(
    .date_var     = date, 
    .value        = value, 
    .facet_ncol   = 2, 
    .facet_scales = "free", 
    .interactive  = FALSE)

Visualizing Transformation and Sub-groups

m4_hourly %>% count(id)

## # A tibble: 4 × 2
##   id        n
##   <fct> <int>
## 1 H10     700
## 2 H50     700
## 3 H150    700
## 4 H410    960

m4_hourly %>%
  group_by(id) %>%
  plot_time_series(
    .date_var      = date, 
    .value         = log(value),
     .facet_ncol   = 2, 
    .facet_scales  = "free", 
    .color_var     = week(date), 
    .interactive   = FALSE)

Static ggplot2 Visualizations & Customizations

taylor_30_min %>%
  plot_time_series(date, value, 
                   .color_var = month(date, label = TRUE),
                   
                   # Returns static ggplot
                   .interactive = FALSE, 
                   
                   # Customize
                   .title = "Taylor's MegaWatt Data", 
                   .x_lab = "Date (30-min intervals)",
                   .y_lab = "Energy Demand (MW)",
                    .color_lab = "Month")

Plotting Box Plots

m4_monthly %>% count(id)

## # A tibble: 4 × 2
##   id        n
##   <fct> <int>
## 1 M1      469
## 2 M2      469
## 3 M750    306
## 4 M1000   330

m4_monthly %>%
  filter_by_time(.date_var = date, .end_date = "1977") %>%
  group_by(id) %>%
  plot_time_series_boxplot(
    .date_var    = date, 
    .value       = value, 
    .period      = "1 year",
    .facet_ncol  = 2, 
    .interactive = FALSE)

## Warning: There were 10 warnings in `dplyr::mutate()`.
## The first warning was:
## ℹ In argument: `.value_smooth = auto_smooth(...)`.
## ℹ In group 1: `id = M1`.
## Caused by warning in `simpleLoess()`:
## ! span too small.   fewer data values than degrees of freedom.
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 9 remaining warnings.

Plotting Regression Plots

m4_monthly %>% 
  group_by(id) %>%
  plot_time_series_regression(
    .date_var = date, 
    .facet_ncol = 2,
    .formula = log(value) ~ as.numeric(date) + month(date, label = TRUE),
    .show_summary = FALSE,
    .interactive = FALSE)

Plotting Seasonality and Correlation

Correlation Plots

m4_hourly %>%
    group_by(id) %>%
    plot_acf_diagnostics(
        date, value,              
        .lags = "7 days",          
        .interactive = FALSE
    )

walmart_sales_weekly %>%
  group_by(id) %>%
  plot_acf_diagnostics(
    Date, Weekly_Sales, 
    .ccf_vars = c(Temperature, Fuel_Price), 
    .lags = "3 months",
    .interactive = FALSE)

Seasonality

taylor_30_min %>%
    plot_seasonal_diagnostics(date, value, .interactive = FALSE)

m4_hourly %>% count(id)

## # A tibble: 4 × 2
##   id        n
##   <fct> <int>
## 1 H10     700
## 2 H50     700
## 3 H150    700
## 4 H410    960

m4_hourly %>% 
  group_by(id) %>%
  plot_seasonal_diagnostics(date, value, .interactive = FALSE)

STL Diagnostics

m4_hourly %>%
    group_by(id) %>%
    plot_stl_diagnostics(
        date, value,
        .feature_set = c("observed", "season", "trend", "remainder"),
        .interactive = FALSE)

## frequency = 24 observations per 1 day

## trend = 336 observations per 14 days

## frequency = 24 observations per 1 day

## trend = 336 observations per 14 days

## frequency = 24 observations per 1 day

## trend = 336 observations per 14 days

## frequency = 24 observations per 1 day

## trend = 336 observations per 14 days

Time Series Data Wrangling

Summarize by Time

FANG %>%
  group_by(symbol) %>%
  plot_time_series(date, volume, .facet_ncol = 2, .interactive = FALSE)

Summarize by Quarter

FANG %>%
  group_by(symbol) %>%
  summarise_by_time(.date_var = date, volume = sum(volume), .by = "quarter") %>%
  plot_time_series(date, volume, .facet_ncol = 2, .interactive = FALSE)

Period Smoothing

FANG %>%
  group_by(symbol) %>%
  summarise_by_time(.date_var = date, adjusted = mean(adjusted), .by = "month") %>%
  plot_time_series(date, adjusted, .facet_ncol = 2, .interactive = FALSE)

Filter by Time

FANG %>%
  group_by(symbol) %>%
  filter_by_time(.date_var = date, 
                 .start_date = "2013-09", 
                 .end_date = "2013") %>%
  plot_time_series(date, adjusted, .facet_ncol = 2,  
                 .interactive = FALSE)

Padding Data

FANG %>%
  group_by(symbol) %>%
  pad_by_time(date, .by = "auto") # .by = "" shows data by a time-based frequency, either automatically determined, or by command

## pad applied on the interval: day

## # A tibble: 5,836 × 8
## # Groups:   symbol [4]
##    symbol date        open  high   low close  volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>   <dbl>    <dbl>
##  1 AMZN   2013-01-02  256.  258.  253.  257. 3271000     257.
##  2 AMZN   2013-01-03  257.  261.  256.  258. 2750900     258.
##  3 AMZN   2013-01-04  258.  260.  257.  259. 1874200     259.
##  4 AMZN   2013-01-05   NA    NA    NA    NA       NA      NA 
##  5 AMZN   2013-01-06   NA    NA    NA    NA       NA      NA 
##  6 AMZN   2013-01-07  263.  270.  263.  268. 4910000     268.
##  7 AMZN   2013-01-08  267.  269.  264.  266. 3010700     266.
##  8 AMZN   2013-01-09  268.  270.  265.  266. 2265600     266.
##  9 AMZN   2013-01-10  269.  269.  262.  265. 2863400     265.
## 10 AMZN   2013-01-11  265.  268.  264.  268. 2413300     268.
## # ℹ 5,826 more rows

FANG %>%
  group_by(symbol) %>%
  pad_by_time(date, .by = "day", .fill_na_direction = "down") # Fills any NA with same values as commanded direction

## # A tibble: 5,836 × 8
## # Groups:   symbol [4]
##    symbol date        open  high   low close  volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>   <dbl>    <dbl>
##  1 AMZN   2013-01-02  256.  258.  253.  257. 3271000     257.
##  2 AMZN   2013-01-03  257.  261.  256.  258. 2750900     258.
##  3 AMZN   2013-01-04  258.  260.  257.  259. 1874200     259.
##  4 AMZN   2013-01-05  258.  260.  257.  259. 1874200     259.
##  5 AMZN   2013-01-06  258.  260.  257.  259. 1874200     259.
##  6 AMZN   2013-01-07  263.  270.  263.  268. 4910000     268.
##  7 AMZN   2013-01-08  267.  269.  264.  266. 3010700     266.
##  8 AMZN   2013-01-09  268.  270.  265.  266. 2265600     266.
##  9 AMZN   2013-01-10  269.  269.  262.  265. 2863400     265.
## 10 AMZN   2013-01-11  265.  268.  264.  268. 2413300     268.
## # ℹ 5,826 more rows

FANG %>%
  group_by(symbol) %>%
  pad_by_time(date, .by = "day", .pad_value = 0) # Fills any NA with commanded value

## # A tibble: 5,836 × 8
## # Groups:   symbol [4]
##    symbol date        open  high   low close  volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>   <dbl>    <dbl>
##  1 AMZN   2013-01-02  256.  258.  253.  257. 3271000     257.
##  2 AMZN   2013-01-03  257.  261.  256.  258. 2750900     258.
##  3 AMZN   2013-01-04  258.  260.  257.  259. 1874200     259.
##  4 AMZN   2013-01-05    0     0     0     0        0       0 
##  5 AMZN   2013-01-06    0     0     0     0        0       0 
##  6 AMZN   2013-01-07  263.  270.  263.  268. 4910000     268.
##  7 AMZN   2013-01-08  267.  269.  264.  266. 3010700     266.
##  8 AMZN   2013-01-09  268.  270.  265.  266. 2265600     266.
##  9 AMZN   2013-01-10  269.  269.  262.  265. 2863400     265.
## 10 AMZN   2013-01-11  265.  268.  264.  268. 2413300     268.
## # ℹ 5,826 more rows

Sliding (Rolling) Calculations

FANG %>%
  head(10) %>%
  mutate(rolling_avg_2 = slidify_vec(adjusted, mean, 
            .period = 2,   # .period overrides default value to show multiple days
            .align = "right",
            .partial = TRUE))

## # A tibble: 10 × 9
##    symbol date        open  high   low close    volume adjusted rolling_avg_2
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>         <dbl>
##  1 FB     2013-01-02  27.4  28.2  27.4  28    69846400     28            28  
##  2 FB     2013-01-03  27.9  28.5  27.6  27.8  63140600     27.8          27.9
##  3 FB     2013-01-04  28.0  28.9  27.8  28.8  72715400     28.8          28.3
##  4 FB     2013-01-07  28.7  29.8  28.6  29.4  83781800     29.4          29.1
##  5 FB     2013-01-08  29.5  29.6  28.9  29.1  45871300     29.1          29.2
##  6 FB     2013-01-09  29.7  30.6  29.5  30.6 104787700     30.6          29.8
##  7 FB     2013-01-10  30.6  31.5  30.3  31.3  95316400     31.3          30.9
##  8 FB     2013-01-11  31.3  32.0  31.1  31.7  89598000     31.7          31.5
##  9 FB     2013-01-14  32.1  32.2  30.6  31.0  98892800     31.0          31.3
## 10 FB     2013-01-15  30.6  31.7  29.9  30.1 173242600     30.1          30.5

# Make the rolling function
roll_avg_30 <- slidify(.f = mean, .period = 30, .align = "center", .partial = TRUE)

# Apply the rolling function
FANG %>%
  select(symbol, date, adjusted) %>%
  group_by(symbol) %>%
  # Apply Sliding Function
  mutate(rolling_avg_30 = roll_avg_30(adjusted)) %>%
  tidyr::pivot_longer(cols = c(adjusted, rolling_avg_30)) %>%
  plot_time_series(date, value, .color_var = name,
                   .facet_ncol = 2, .smooth = FALSE, 
                   .interactive = FALSE)

# Rolling regressions are easy to implement using `.unlist = FALSE`
lm_roll <- slidify(~ lm(..1 ~ ..2 + ..3), .period = 90, 
                   .unlist = FALSE, .align = "right")


FANG %>%
  select(symbol, date, adjusted, volume) %>%
  group_by(symbol) %>%
  mutate(numeric_date = as.numeric(date)) %>%
  # Apply rolling regression
  mutate(rolling_lm = lm_roll(adjusted, volume, numeric_date)) %>%
  filter(!is.na(rolling_lm))

## # A tibble: 3,676 × 6
## # Groups:   symbol [4]
##    symbol date       adjusted   volume numeric_date rolling_lm
##    <chr>  <date>        <dbl>    <dbl>        <dbl> <list>    
##  1 FB     2013-05-10     26.7 30847100        15835 <lm>      
##  2 FB     2013-05-13     26.8 29068800        15838 <lm>      
##  3 FB     2013-05-14     27.1 24930300        15839 <lm>      
##  4 FB     2013-05-15     26.6 30299800        15840 <lm>      
##  5 FB     2013-05-16     26.1 35499100        15841 <lm>      
##  6 FB     2013-05-17     26.2 29462700        15842 <lm>      
##  7 FB     2013-05-20     25.8 42402900        15845 <lm>      
##  8 FB     2013-05-21     25.7 26261300        15846 <lm>      
##  9 FB     2013-05-22     25.2 45314500        15847 <lm>      
## 10 FB     2013-05-23     25.1 37663100        15848 <lm>      
## # ℹ 3,666 more rows

Code Along 10: Visualizing Time Series

Sara Donahue

2024-12-16

Plotting Time Series

Plotting Box Plots

Plotting Regression Plots

Plotting Seasonality and Correlation

Correlation Plots

Seasonality

STL Diagnostics

Time Series Data Wrangling

Summarize by Time

Filter by Time

Padding Data

Sliding (Rolling) Calculations