library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(timetk)
library(tidyquant)
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
taylor_30_min
## # A tibble: 4,032 × 2
## date value
## <dttm> <dbl>
## 1 2000-06-05 00:00:00 22262
## 2 2000-06-05 00:30:00 21756
## 3 2000-06-05 01:00:00 22247
## 4 2000-06-05 01:30:00 22759
## 5 2000-06-05 02:00:00 22549
## 6 2000-06-05 02:30:00 22313
## 7 2000-06-05 03:00:00 22128
## 8 2000-06-05 03:30:00 21860
## 9 2000-06-05 04:00:00 21751
## 10 2000-06-05 04:30:00 21336
## # ℹ 4,022 more rows
taylor_30_min %>%
plot_time_series(.date_var = date, .value = value)
m4_daily %>% count(id)
## # A tibble: 4 × 2
## id n
## <fct> <int>
## 1 D10 674
## 2 D160 4197
## 3 D410 676
## 4 D500 4196
m4_daily %>%
group_by(id) %>%
plot_time_series(
.date_var = date,
.value = value,
.facet_ncol = 2,
.facet_scales = "free",
.interactive = FALSE)
Visualizing Transformations and Sub-Groups
m4_hourly %>% count(id)
## # A tibble: 4 × 2
## id n
## <fct> <int>
## 1 H10 700
## 2 H50 700
## 3 H150 700
## 4 H410 960
m4_hourly %>%
group_by(id) %>%
plot_time_series(
.date_var = date,
.value = log(value),
.facet_ncol = 2,
.facet_scales = "free",
.color_var = week(date))
Static ggplot2 Visualizations and Customizations
taylor_30_min %>%
plot_time_series(date, value,
.color_var = month(date, label = TRUE),
# Returns static ggplot
.interactive = FALSE,
.title = "Taylor's Megawatt Data",
.x_lab = "Date (30-min intervals)",
.y_lab = "Energy Demand (MW)",
.color_lab = "Month")
m4_monthly %>% count(id)
## # A tibble: 4 × 2
## id n
## <fct> <int>
## 1 M1 469
## 2 M2 469
## 3 M750 306
## 4 M1000 330
m4_monthly %>%
filter_by_time(.date_var = date, .end_date = "1976") %>%
group_by(id) %>%
plot_time_series_boxplot(
.date_var = date,
.value = value,
.period = "1 year",
.facet_ncol = 2)
m4_monthly %>%
group_by(id) %>%
plot_time_series_regression(
.date_var = date,
.facet_ncol = 2,
.formula = log(value) ~ as.numeric(date) + month(date, label = TRUE),
.show_summary = TRUE)
##
## Summary for Group: M1---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.64182 -0.17965 -0.01619 0.14517 0.60369
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.725e+00 2.915e-02 299.310 < 2e-16 ***
## as.numeric(date) -1.562e-06 2.824e-06 -0.553 0.58039
## month(date, label = TRUE).L 5.226e-02 4.035e-02 1.295 0.19584
## month(date, label = TRUE).Q -1.982e-01 4.029e-02 -4.918 1.22e-06 ***
## month(date, label = TRUE).C -2.505e-01 4.034e-02 -6.210 1.19e-09 ***
## month(date, label = TRUE)^4 1.767e-01 4.030e-02 4.386 1.44e-05 ***
## month(date, label = TRUE)^5 1.215e-01 4.033e-02 3.013 0.00273 **
## month(date, label = TRUE)^6 -1.111e-01 4.030e-02 -2.758 0.00605 **
## month(date, label = TRUE)^7 -6.178e-02 4.032e-02 -1.532 0.12611
## month(date, label = TRUE)^8 8.347e-02 4.031e-02 2.071 0.03893 *
## month(date, label = TRUE)^9 -1.322e-02 4.029e-02 -0.328 0.74298
## month(date, label = TRUE)^10 -1.173e-03 4.032e-02 -0.029 0.97680
## month(date, label = TRUE)^11 4.784e-02 4.019e-02 1.190 0.23454
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.252 on 456 degrees of freedom
## Multiple R-squared: 0.1932, Adjusted R-squared: 0.172
## F-statistic: 9.1 on 12 and 456 DF, p-value: 8.931e-16
##
## ----
##
## Summary for Group: M2---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5601 -0.1519 -0.0033 0.1400 0.5414
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.710e+00 2.451e-02 314.624 < 2e-16 ***
## as.numeric(date) 3.671e-07 2.374e-06 0.155 0.877169
## month(date, label = TRUE).L -5.815e-02 3.392e-02 -1.714 0.087134 .
## month(date, label = TRUE).Q -4.611e-02 3.387e-02 -1.361 0.174113
## month(date, label = TRUE).C -7.356e-02 3.391e-02 -2.169 0.030611 *
## month(date, label = TRUE)^4 5.917e-02 3.387e-02 1.747 0.081361 .
## month(date, label = TRUE)^5 -9.701e-03 3.391e-02 -0.286 0.774919
## month(date, label = TRUE)^6 -1.201e-01 3.388e-02 -3.545 0.000434 ***
## month(date, label = TRUE)^7 -4.469e-03 3.389e-02 -0.132 0.895164
## month(date, label = TRUE)^8 7.949e-02 3.388e-02 2.346 0.019403 *
## month(date, label = TRUE)^9 -4.324e-02 3.387e-02 -1.277 0.202390
## month(date, label = TRUE)^10 -6.214e-02 3.389e-02 -1.833 0.067411 .
## month(date, label = TRUE)^11 2.628e-02 3.379e-02 0.778 0.437107
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2118 on 456 degrees of freedom
## Multiple R-squared: 0.07436, Adjusted R-squared: 0.05
## F-statistic: 3.053 on 12 and 456 DF, p-value: 0.0003732
##
## ----
##
## Summary for Group: M750---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.12770 -0.05159 -0.01753 0.05142 0.17828
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.407e+00 1.651e-02 509.199 < 2e-16 ***
## as.numeric(date) 5.679e-05 1.348e-06 42.118 < 2e-16 ***
## month(date, label = TRUE).L -3.584e-02 1.256e-02 -2.854 0.004625 **
## month(date, label = TRUE).Q 7.509e-02 1.256e-02 5.979 6.51e-09 ***
## month(date, label = TRUE).C 7.879e-02 1.256e-02 6.273 1.27e-09 ***
## month(date, label = TRUE)^4 -4.931e-02 1.256e-02 -3.926 0.000108 ***
## month(date, label = TRUE)^5 -7.964e-02 1.256e-02 -6.341 8.61e-10 ***
## month(date, label = TRUE)^6 1.215e-02 1.256e-02 0.967 0.334270
## month(date, label = TRUE)^7 5.196e-02 1.256e-02 4.137 4.60e-05 ***
## month(date, label = TRUE)^8 1.200e-02 1.256e-02 0.955 0.340143
## month(date, label = TRUE)^9 -3.433e-02 1.256e-02 -2.733 0.006652 **
## month(date, label = TRUE)^10 -1.566e-02 1.256e-02 -1.247 0.213483
## month(date, label = TRUE)^11 1.182e-02 1.256e-02 0.941 0.347375
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06341 on 293 degrees of freedom
## Multiple R-squared: 0.8695, Adjusted R-squared: 0.8641
## F-statistic: 162.6 on 12 and 293 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: M1000---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.30903 -0.44884 -0.06059 0.47527 1.86372
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.476e+00 1.451e-01 65.292 <2e-16 ***
## as.numeric(date) -1.590e-04 1.216e-05 -13.079 <2e-16 ***
## month(date, label = TRUE).L -7.113e-02 1.221e-01 -0.582 0.561
## month(date, label = TRUE).Q -3.050e-01 1.221e-01 -2.498 0.013 *
## month(date, label = TRUE).C 5.160e-02 1.221e-01 0.423 0.673
## month(date, label = TRUE)^4 6.570e-02 1.221e-01 0.538 0.591
## month(date, label = TRUE)^5 4.299e-02 1.221e-01 0.352 0.725
## month(date, label = TRUE)^6 5.051e-02 1.221e-01 0.414 0.679
## month(date, label = TRUE)^7 -4.125e-02 1.221e-01 -0.338 0.736
## month(date, label = TRUE)^8 1.122e-01 1.221e-01 0.919 0.359
## month(date, label = TRUE)^9 4.733e-03 1.221e-01 0.039 0.969
## month(date, label = TRUE)^10 5.748e-02 1.221e-01 0.471 0.638
## month(date, label = TRUE)^11 5.818e-02 1.221e-01 0.476 0.634
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6402 on 317 degrees of freedom
## Multiple R-squared: 0.3621, Adjusted R-squared: 0.3379
## F-statistic: 14.99 on 12 and 317 DF, p-value: < 2.2e-16
##
## ----
m4_hourly %>%
group_by(id) %>%
plot_acf_diagnostics(
date, value,
.lags = "7 days")
walmart_sales_weekly %>%
group_by(id) %>%
plot_acf_diagnostics(
Date, Weekly_Sales,
.ccf_vars = c(Temperature, Fuel_Price),
.lags = "3 months")
taylor_30_min %>%
plot_seasonal_diagnostics(date, value)
m4_hourly %>% count(id)
## # A tibble: 4 × 2
## id n
## <fct> <int>
## 1 H10 700
## 2 H50 700
## 3 H150 700
## 4 H410 960
m4_hourly %>%
group_by(id) %>%
plot_seasonal_diagnostics(date, value)
m4_hourly %>%
group_by(id) %>%
plot_stl_diagnostics(
date, value,
.feature_set = c("observed", "season", "trend", "remainder"))
## frequency = 24 observations per 1 day
## trend = 336 observations per 14 days
## frequency = 24 observations per 1 day
## trend = 336 observations per 14 days
## frequency = 24 observations per 1 day
## trend = 336 observations per 14 days
## frequency = 24 observations per 1 day
## trend = 336 observations per 14 days
FANG %>%
group_by(symbol) %>%
plot_time_series(date, volume, .facet_ncol = 2, .interactive = FALSE)
Summarize it by quarter
FANG %>%
group_by(symbol) %>%
summarise_by_time(.date_var = date, volume = sum(volume), .by = "quarter") %>%
plot_time_series(date, volume, .facet_ncol = 2, .interactive = FALSE)
FANG %>%
group_by(symbol) %>%
summarise_by_time(.date_var = date, adjusted = mean(adjusted), .by = "month") %>%
plot_time_series(date, adjusted, .facet_ncol = 2, .interactive = FALSE)
FANG %>%
group_by(symbol) %>%
filter_by_time(.date_var = date,
.start_date = "2013-09",
.end_date = "2013") %>%
plot_time_series(date, adjusted, .facet_ncol = 2)
FANG %>%
group_by(symbol) %>%
pad_by_time(date, .by = "day", .pad_value = 0)
## # A tibble: 5,836 × 8
## # Groups: symbol [4]
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AMZN 2013-01-02 256. 258. 253. 257. 3271000 257.
## 2 AMZN 2013-01-03 257. 261. 256. 258. 2750900 258.
## 3 AMZN 2013-01-04 258. 260. 257. 259. 1874200 259.
## 4 AMZN 2013-01-05 0 0 0 0 0 0
## 5 AMZN 2013-01-06 0 0 0 0 0 0
## 6 AMZN 2013-01-07 263. 270. 263. 268. 4910000 268.
## 7 AMZN 2013-01-08 267. 269. 264. 266. 3010700 266.
## 8 AMZN 2013-01-09 268. 270. 265. 266. 2265600 266.
## 9 AMZN 2013-01-10 269. 269. 262. 265. 2863400 265.
## 10 AMZN 2013-01-11 265. 268. 264. 268. 2413300 268.
## # ℹ 5,826 more rows
FANG %>%
head(10) %>%
mutate(rolling_avg_2 = slidify_vec(adjusted, mean,
.period = 2,
.align = "right",
.partial = TRUE))
## # A tibble: 10 × 9
## symbol date open high low close volume adjusted rolling_avg_2
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 FB 2013-01-02 27.4 28.2 27.4 28 69846400 28 28
## 2 FB 2013-01-03 27.9 28.5 27.6 27.8 63140600 27.8 27.9
## 3 FB 2013-01-04 28.0 28.9 27.8 28.8 72715400 28.8 28.3
## 4 FB 2013-01-07 28.7 29.8 28.6 29.4 83781800 29.4 29.1
## 5 FB 2013-01-08 29.5 29.6 28.9 29.1 45871300 29.1 29.2
## 6 FB 2013-01-09 29.7 30.6 29.5 30.6 104787700 30.6 29.8
## 7 FB 2013-01-10 30.6 31.5 30.3 31.3 95316400 31.3 30.9
## 8 FB 2013-01-11 31.3 32.0 31.1 31.7 89598000 31.7 31.5
## 9 FB 2013-01-14 32.1 32.2 30.6 31.0 98892800 31.0 31.3
## 10 FB 2013-01-15 30.6 31.7 29.9 30.1 173242600 30.1 30.5
# Rolling regressions are easy to implement using '.unlist = FALSE
lm_roll <- slidify(~ lm(..1 ~ ..2 + ..3),
.period = 90,
.unlist = FALSE,
.align = "right")
FANG %>%
select(symbol, date, adjusted, volume) %>%
group_by(symbol) %>%
mutate(numeric_date = as.numeric(date)) %>%
#Apply rolling regression
mutate(rolling_lm = lm_roll(adjusted, volume, numeric_date)) %>%
filter(!is.na(rolling_lm))
## # A tibble: 3,676 × 6
## # Groups: symbol [4]
## symbol date adjusted volume numeric_date rolling_lm
## <chr> <date> <dbl> <dbl> <dbl> <list>
## 1 FB 2013-05-10 26.7 30847100 15835 <lm>
## 2 FB 2013-05-13 26.8 29068800 15838 <lm>
## 3 FB 2013-05-14 27.1 24930300 15839 <lm>
## 4 FB 2013-05-15 26.6 30299800 15840 <lm>
## 5 FB 2013-05-16 26.1 35499100 15841 <lm>
## 6 FB 2013-05-17 26.2 29462700 15842 <lm>
## 7 FB 2013-05-20 25.8 42402900 15845 <lm>
## 8 FB 2013-05-21 25.7 26261300 15846 <lm>
## 9 FB 2013-05-22 25.2 45314500 15847 <lm>
## 10 FB 2013-05-23 25.1 37663100 15848 <lm>
## # ℹ 3,666 more rows