stocks <- tq_get(c("AAPL", "MSFT","META"),
get = "stock.prices",
from = "2018-01-01",
to = "2021-01-01")
stocks
## # A tibble: 2,268 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2018-01-02 42.5 43.1 42.3 43.1 102223600 41.0
## 2 AAPL 2018-01-03 43.1 43.6 43.0 43.1 118071600 41.0
## 3 AAPL 2018-01-04 43.1 43.4 43.0 43.3 89738400 41.2
## 4 AAPL 2018-01-05 43.4 43.8 43.3 43.8 94640000 41.7
## 5 AAPL 2018-01-08 43.6 43.9 43.5 43.6 82271200 41.5
## 6 AAPL 2018-01-09 43.6 43.8 43.4 43.6 86336000 41.5
## 7 AAPL 2018-01-10 43.3 43.6 43.2 43.6 95839600 41.5
## 8 AAPL 2018-01-11 43.6 43.9 43.6 43.8 74670800 41.7
## 9 AAPL 2018-01-12 44.0 44.3 43.9 44.3 101672400 42.2
## 10 AAPL 2018-01-16 44.5 44.8 44.0 44.0 118263600 42.0
## # … with 2,258 more rows
stocks %>%
ggplot(aes(x = date, y = adjusted, color = symbol)) +
geom_line()
stocks %>% filter(adjusted < 41)
## # A tibble: 71 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2018-01-25 43.6 43.7 42.6 42.8 166116000 40.7
## 2 AAPL 2018-01-26 43 43 42.5 42.9 156572000 40.8
## 3 AAPL 2018-01-29 42.5 42.5 41.8 42.0 202561600 40.0
## 4 AAPL 2018-01-30 41.4 41.8 41.2 41.7 184192800 39.8
## 5 AAPL 2018-01-31 41.7 42.1 41.6 41.9 129915600 39.9
## 6 AAPL 2018-02-01 41.8 42.2 41.7 41.9 188923200 40.0
## 7 AAPL 2018-02-02 41.5 41.7 40.0 40.1 346375200 38.2
## 8 AAPL 2018-02-05 39.8 41.0 39 39.1 290954000 37.3
## 9 AAPL 2018-02-06 38.7 40.9 38.5 40.8 272975200 38.8
## 10 AAPL 2018-02-07 40.8 40.8 39.8 39.9 206434400 38.0
## # … with 61 more rows
stocks %>% filter(adjusted < 42)
## # A tibble: 114 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2018-01-02 42.5 43.1 42.3 43.1 102223600 41.0
## 2 AAPL 2018-01-03 43.1 43.6 43.0 43.1 118071600 41.0
## 3 AAPL 2018-01-04 43.1 43.4 43.0 43.3 89738400 41.2
## 4 AAPL 2018-01-05 43.4 43.8 43.3 43.8 94640000 41.7
## 5 AAPL 2018-01-08 43.6 43.9 43.5 43.6 82271200 41.5
## 6 AAPL 2018-01-09 43.6 43.8 43.4 43.6 86336000 41.5
## 7 AAPL 2018-01-10 43.3 43.6 43.2 43.6 95839600 41.5
## 8 AAPL 2018-01-11 43.6 43.9 43.6 43.8 74670800 41.7
## 9 AAPL 2018-01-16 44.5 44.8 44.0 44.0 118263600 42.0
## 10 AAPL 2018-01-24 44.3 44.3 43.3 43.6 204420400 41.5
## # … with 104 more rows
arrange(stocks, desc(open), desc(close))
## # A tibble: 2,268 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 META 2020-08-27 300. 301. 292. 293. 30301300 293.
## 2 META 2020-09-02 299. 304. 293. 302. 24341400 302.
## 3 META 2020-09-03 296. 298. 284. 291. 32294100 291.
## 4 META 2020-08-28 295 297. 291. 294. 17157400 294.
## 5 META 2020-09-01 295. 301. 293. 295. 17295900 295.
## 6 META 2020-11-06 294. 295. 288. 293. 13891000 293.
## 7 META 2020-08-31 294. 297. 292. 293. 17345100 293.
## 8 META 2020-11-05 292. 297. 289. 295. 23823600 295.
## 9 META 2020-11-09 290. 293. 279. 279. 25117700 279.
## 10 META 2020-09-04 287. 289 271. 283. 30333700 283.
## # … with 2,258 more rows
arrange(stocks, desc(low), desc(high))
## # A tibble: 2,268 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 META 2020-09-02 299. 304. 293. 302. 24341400 302.
## 2 META 2020-09-01 295. 301. 293. 295. 17295900 295.
## 3 META 2020-08-27 300. 301. 292. 293. 30301300 293.
## 4 META 2020-08-31 294. 297. 292. 293. 17345100 293.
## 5 META 2020-08-28 295 297. 291. 294. 17157400 294.
## 6 META 2020-11-05 292. 297. 289. 295. 23823600 295.
## 7 META 2020-11-06 294. 295. 288. 293. 13891000 293.
## 8 META 2020-08-26 284 305. 284 304. 69015200 304.
## 9 META 2020-09-03 296. 298. 284. 291. 32294100 291.
## 10 META 2020-12-08 286. 286. 282. 283. 10747700 283.
## # … with 2,258 more rows
select(stocks, open:close)
## # A tibble: 2,268 × 4
## open high low close
## <dbl> <dbl> <dbl> <dbl>
## 1 42.5 43.1 42.3 43.1
## 2 43.1 43.6 43.0 43.1
## 3 43.1 43.4 43.0 43.3
## 4 43.4 43.8 43.3 43.8
## 5 43.6 43.9 43.5 43.6
## 6 43.6 43.8 43.4 43.6
## 7 43.3 43.6 43.2 43.6
## 8 43.6 43.9 43.6 43.8
## 9 44.0 44.3 43.9 44.3
## 10 44.5 44.8 44.0 44.0
## # … with 2,258 more rows
select(stocks, open, high, low, close)
## # A tibble: 2,268 × 4
## open high low close
## <dbl> <dbl> <dbl> <dbl>
## 1 42.5 43.1 42.3 43.1
## 2 43.1 43.6 43.0 43.1
## 3 43.1 43.4 43.0 43.3
## 4 43.4 43.8 43.3 43.8
## 5 43.6 43.9 43.5 43.6
## 6 43.6 43.8 43.4 43.6
## 7 43.3 43.6 43.2 43.6
## 8 43.6 43.9 43.6 43.8
## 9 44.0 44.3 43.9 44.3
## 10 44.5 44.8 44.0 44.0
## # … with 2,258 more rows
select(stocks, open, high, low, close, adjusted)
## # A tibble: 2,268 × 5
## open high low close adjusted
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 42.5 43.1 42.3 43.1 41.0
## 2 43.1 43.6 43.0 43.1 41.0
## 3 43.1 43.4 43.0 43.3 41.2
## 4 43.4 43.8 43.3 43.8 41.7
## 5 43.6 43.9 43.5 43.6 41.5
## 6 43.6 43.8 43.4 43.6 41.5
## 7 43.3 43.6 43.2 43.6 41.5
## 8 43.6 43.9 43.6 43.8 41.7
## 9 44.0 44.3 43.9 44.3 42.2
## 10 44.5 44.8 44.0 44.0 42.0
## # … with 2,258 more rows
select(stocks, open, high, low, starts_with("open"))
## # A tibble: 2,268 × 3
## open high low
## <dbl> <dbl> <dbl>
## 1 42.5 43.1 42.3
## 2 43.1 43.6 43.0
## 3 43.1 43.4 43.0
## 4 43.4 43.8 43.3
## 5 43.6 43.9 43.5
## 6 43.6 43.8 43.4
## 7 43.3 43.6 43.2
## 8 43.6 43.9 43.6
## 9 44.0 44.3 43.9
## 10 44.5 44.8 44.0
## # … with 2,258 more rows
select(stocks, open, high, low, contains("time"))
## # A tibble: 2,268 × 3
## open high low
## <dbl> <dbl> <dbl>
## 1 42.5 43.1 42.3
## 2 43.1 43.6 43.0
## 3 43.1 43.4 43.0
## 4 43.4 43.8 43.3
## 5 43.6 43.9 43.5
## 6 43.6 43.8 43.4
## 7 43.3 43.6 43.2
## 8 43.6 43.9 43.6
## 9 44.0 44.3 43.9
## 10 44.5 44.8 44.0
## # … with 2,258 more rows
select(stocks, open, high, low, ends_with("time"))
## # A tibble: 2,268 × 3
## open high low
## <dbl> <dbl> <dbl>
## 1 42.5 43.1 42.3
## 2 43.1 43.6 43.0
## 3 43.1 43.4 43.0
## 4 43.4 43.8 43.3
## 5 43.6 43.9 43.5
## 6 43.6 43.8 43.4
## 7 43.3 43.6 43.2
## 8 43.6 43.9 43.6
## 9 44.0 44.3 43.9
## 10 44.5 44.8 44.0
## # … with 2,258 more rows
select(stocks, open, high, low, ends_with("time"))
## # A tibble: 2,268 × 3
## open high low
## <dbl> <dbl> <dbl>
## 1 42.5 43.1 42.3
## 2 43.1 43.6 43.0
## 3 43.1 43.4 43.0
## 4 43.4 43.8 43.3
## 5 43.6 43.9 43.5
## 6 43.6 43.8 43.4
## 7 43.3 43.6 43.2
## 8 43.6 43.9 43.6
## 9 44.0 44.3 43.9
## 10 44.5 44.8 44.0
## # … with 2,258 more rows
mutate(stocks,
gain = high - low) %>%
# Select year, month, day, and gain
select(open:close, gain)
## # A tibble: 2,268 × 5
## open high low close gain
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 42.5 43.1 42.3 43.1 0.760
## 2 43.1 43.6 43.0 43.1 0.647
## 3 43.1 43.4 43.0 43.3 0.347
## 4 43.4 43.8 43.3 43.8 0.580
## 5 43.6 43.9 43.5 43.6 0.420
## 6 43.6 43.8 43.4 43.6 0.412
## 7 43.3 43.6 43.2 43.6 0.325
## 8 43.6 43.9 43.6 43.8 0.25
## 9 44.0 44.3 43.9 44.3 0.428
## 10 44.5 44.8 44.0 44.0 0.812
## # … with 2,258 more rows
# Just keep gain.
mutate(stocks,
gain = high - low) %>%
# Select year, month, day, and gain
select(gain)
## # A tibble: 2,268 × 1
## gain
## <dbl>
## 1 0.760
## 2 0.647
## 3 0.347
## 4 0.580
## 5 0.420
## 6 0.412
## 7 0.325
## 8 0.25
## 9 0.428
## 10 0.812
## # … with 2,258 more rows
# Alternative using transmute()
transmute(stocks,
gain= high - low)
## # A tibble: 2,268 × 1
## gain
## <dbl>
## 1 0.760
## 2 0.647
## 3 0.347
## 4 0.580
## 5 0.420
## 6 0.412
## 7 0.325
## 8 0.25
## 9 0.428
## 10 0.812
## # … with 2,258 more rows
Collapsing data to a single row
stocks
## # A tibble: 2,268 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2018-01-02 42.5 43.1 42.3 43.1 102223600 41.0
## 2 AAPL 2018-01-03 43.1 43.6 43.0 43.1 118071600 41.0
## 3 AAPL 2018-01-04 43.1 43.4 43.0 43.3 89738400 41.2
## 4 AAPL 2018-01-05 43.4 43.8 43.3 43.8 94640000 41.7
## 5 AAPL 2018-01-08 43.6 43.9 43.5 43.6 82271200 41.5
## 6 AAPL 2018-01-09 43.6 43.8 43.4 43.6 86336000 41.5
## 7 AAPL 2018-01-10 43.3 43.6 43.2 43.6 95839600 41.5
## 8 AAPL 2018-01-11 43.6 43.9 43.6 43.8 74670800 41.7
## 9 AAPL 2018-01-12 44.0 44.3 43.9 44.3 101672400 42.2
## 10 AAPL 2018-01-16 44.5 44.8 44.0 44.0 118263600 42.0
## # … with 2,258 more rows
# average high of APPL, MSFT, META
summarise(stocks, high = mean(high, na.rm = TRUE))
## # A tibble: 1 × 1
## high
## <dbl>
## 1 136.
Missing values
stocks %>%
# Remove missing values
filter(!is.na(high))
## # A tibble: 2,268 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2018-01-02 42.5 43.1 42.3 43.1 102223600 41.0
## 2 AAPL 2018-01-03 43.1 43.6 43.0 43.1 118071600 41.0
## 3 AAPL 2018-01-04 43.1 43.4 43.0 43.3 89738400 41.2
## 4 AAPL 2018-01-05 43.4 43.8 43.3 43.8 94640000 41.7
## 5 AAPL 2018-01-08 43.6 43.9 43.5 43.6 82271200 41.5
## 6 AAPL 2018-01-09 43.6 43.8 43.4 43.6 86336000 41.5
## 7 AAPL 2018-01-10 43.3 43.6 43.2 43.6 95839600 41.5
## 8 AAPL 2018-01-11 43.6 43.9 43.6 43.8 74670800 41.7
## 9 AAPL 2018-01-12 44.0 44.3 43.9 44.3 101672400 42.2
## 10 AAPL 2018-01-16 44.5 44.8 44.0 44.0 118263600 42.0
## # … with 2,258 more rows
Grouping multiple variables
stocks %>%
group_by(open, high, low) %>%
summarise(count = n()) %>%
ungroup()
## `summarise()` has grouped output by 'open', 'high'. You can override using the
## `.groups` argument.
## # A tibble: 2,268 × 4
## open high low count
## <dbl> <dbl> <dbl> <int>
## 1 36.0 36.4 35.5 1
## 2 36.1 37.1 36.0 1
## 3 37.0 37.9 36.6 1
## 4 37.1 39.3 36.7 1
## 5 37.2 37.2 36.5 1
## 6 37.4 38.0 37.1 1
## 7 37.6 38.3 37.5 1
## 8 37.7 37.8 37.3 1
## 9 37.8 38.6 37.4 1
## 10 38.1 38.5 37.7 1
## # … with 2,258 more rows
Find the worst members of each group
stocks %>%
group_by(open, high, low) %>%
filter(rank(desc(low)) < 10)
## # A tibble: 2,268 × 8
## # Groups: open, high, low [2,268]
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAPL 2018-01-02 42.5 43.1 42.3 43.1 102223600 41.0
## 2 AAPL 2018-01-03 43.1 43.6 43.0 43.1 118071600 41.0
## 3 AAPL 2018-01-04 43.1 43.4 43.0 43.3 89738400 41.2
## 4 AAPL 2018-01-05 43.4 43.8 43.3 43.8 94640000 41.7
## 5 AAPL 2018-01-08 43.6 43.9 43.5 43.6 82271200 41.5
## 6 AAPL 2018-01-09 43.6 43.8 43.4 43.6 86336000 41.5
## 7 AAPL 2018-01-10 43.3 43.6 43.2 43.6 95839600 41.5
## 8 AAPL 2018-01-11 43.6 43.9 43.6 43.8 74670800 41.7
## 9 AAPL 2018-01-12 44.0 44.3 43.9 44.3 101672400 42.2
## 10 AAPL 2018-01-16 44.5 44.8 44.0 44.0 118263600 42.0
## # … with 2,258 more rows
Standardise to compute per group metrics
stocks %>%
filter(high > 0) %>%
mutate(close = high / sum(high)) %>%
select(open:low, volume, high, close)
## # A tibble: 2,268 × 5
## open high low volume close
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 42.5 43.1 42.3 102223600 0.000140
## 2 43.1 43.6 43.0 118071600 0.000142
## 3 43.1 43.4 43.0 89738400 0.000141
## 4 43.4 43.8 43.3 94640000 0.000142
## 5 43.6 43.9 43.5 82271200 0.000143
## 6 43.6 43.8 43.4 86336000 0.000142
## 7 43.3 43.6 43.2 95839600 0.000142
## 8 43.6 43.9 43.6 74670800 0.000143
## 9 44.0 44.3 43.9 101672400 0.000144
## 10 44.5 44.8 44.0 118263600 0.000146
## # … with 2,258 more rows