stocks <- tq_get(c("VOOG", "NVDA"),
get = "stock.prices",
from = "2019-01-01",
to = "2022-01-01")
stocks
## # A tibble: 1,514 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 VOOG 2019-01-02 133. 135. 133. 135. 239200 129.
## 2 VOOG 2019-01-03 134. 134. 131. 131. 177400 126.
## 3 VOOG 2019-01-04 133. 137. 133. 136. 198400 130.
## 4 VOOG 2019-01-07 136. 138. 136. 137. 262000 131.
## 5 VOOG 2019-01-08 139. 139. 137. 139. 178600 133.
## 6 VOOG 2019-01-09 139. 140. 138. 139. 207000 133.
## 7 VOOG 2019-01-10 139. 140. 138. 140. 156200 134.
## 8 VOOG 2019-01-11 139. 140. 139. 140. 123400 134.
## 9 VOOG 2019-01-14 139. 139. 138. 139. 75500 133.
## 10 VOOG 2019-01-15 139. 141. 139. 141. 128500 135.
## # ℹ 1,504 more rows
filter(stocks, high == 1, low == 1)
## # A tibble: 0 × 8
## # ℹ 8 variables: symbol <chr>, date <date>, open <dbl>, high <dbl>, low <dbl>,
## # close <dbl>, volume <dbl>, adjusted <dbl>
arrange(stocks, desc(high), desc(date()))
## # A tibble: 1,514 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NVDA 2021-11-22 335. 346. 319 320. 75433500 319.
## 2 NVDA 2021-11-29 324. 334. 320. 334. 45496400 333.
## 3 NVDA 2021-11-30 332. 334. 319. 327. 62206600 326.
## 4 NVDA 2021-12-01 332. 333. 314. 314. 48436800 314.
## 5 NVDA 2021-11-19 322. 331. 319. 330. 53386700 329.
## 6 NVDA 2021-11-24 315. 329. 309. 327. 43516200 326.
## 7 NVDA 2021-11-18 324. 328. 313. 317. 78171100 316.
## 8 NVDA 2021-11-26 326 327. 314. 315. 28306900 315.
## 9 NVDA 2021-12-02 312. 325. 310. 321. 47289000 321.
## 10 NVDA 2021-12-07 310. 324. 307. 324. 59305300 324.
## # ℹ 1,504 more rows
select(stocks, low:volume)
## # A tibble: 1,514 × 3
## low close volume
## <dbl> <dbl> <dbl>
## 1 133. 135. 239200
## 2 131. 131. 177400
## 3 133. 136. 198400
## 4 136. 137. 262000
## 5 137. 139. 178600
## 6 138. 139. 207000
## 7 138. 140. 156200
## 8 139. 140. 123400
## 9 138. 139. 75500
## 10 139. 141. 128500
## # ℹ 1,504 more rows
select(stocks, high, volume)
## # A tibble: 1,514 × 2
## high volume
## <dbl> <dbl>
## 1 135. 239200
## 2 134. 177400
## 3 137. 198400
## 4 138. 262000
## 5 139. 178600
## 6 140. 207000
## 7 140. 156200
## 8 140. 123400
## 9 139. 75500
## 10 141. 128500
## # ℹ 1,504 more rows
select(stocks,date,open)
## # A tibble: 1,514 × 2
## date open
## <date> <dbl>
## 1 2019-01-02 133.
## 2 2019-01-03 134.
## 3 2019-01-04 133.
## 4 2019-01-07 136.
## 5 2019-01-08 139.
## 6 2019-01-09 139.
## 7 2019-01-10 139.
## 8 2019-01-11 139.
## 9 2019-01-14 139.
## 10 2019-01-15 139.
## # ℹ 1,504 more rows
select(stocks,low,high, starts_with("dep"))
## # A tibble: 1,514 × 2
## low high
## <dbl> <dbl>
## 1 133. 135.
## 2 131. 134.
## 3 133. 137.
## 4 136. 138.
## 5 137. 139.
## 6 138. 140.
## 7 138. 140.
## 8 139. 140.
## 9 138. 139.
## 10 139. 141.
## # ℹ 1,504 more rows
mutate(stocks,
high = date - volume) %>%
# Select year, month, day, and high
select(symbol:date, high)
## # A tibble: 1,514 × 3
## symbol date high
## <chr> <date> <date>
## 1 VOOG 2019-01-02 1364-02-05
## 2 VOOG 2019-01-03 1533-04-21
## 3 VOOG 2019-01-04 1475-10-23
## 4 VOOG 2019-01-07 1301-09-08
## 5 VOOG 2019-01-08 1530-01-12
## 6 VOOG 2019-01-09 1452-04-11
## 7 VOOG 2019-01-10 1591-05-14
## 8 VOOG 2019-01-11 1681-03-03
## 9 VOOG 2019-01-14 1812-04-29
## 10 VOOG 2019-01-15 1667-03-21
## # ℹ 1,504 more rows
# Just keep high
mutate(stocks,
high = date - low) %>%
# Select year, month, day, and gain
select(low)
## # A tibble: 1,514 × 1
## low
## <dbl>
## 1 133.
## 2 131.
## 3 133.
## 4 136.
## 5 137.
## 6 138.
## 7 138.
## 8 139.
## 9 138.
## 10 139.
## # ℹ 1,504 more rows
# alternative using transmute()
transmute(stocks,
high = date- low)
## # A tibble: 1,514 × 1
## high
## <date>
## 1 2018-08-22
## 2 2018-08-24
## 3 2018-08-23
## 4 2018-08-24
## 5 2018-08-23
## 6 2018-08-23
## 7 2018-08-25
## 8 2018-08-25
## 9 2018-08-28
## 10 2018-08-28
## # ℹ 1,504 more rows
Collapsing data to a single row
stocks
## # A tibble: 1,514 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 VOOG 2019-01-02 133. 135. 133. 135. 239200 129.
## 2 VOOG 2019-01-03 134. 134. 131. 131. 177400 126.
## 3 VOOG 2019-01-04 133. 137. 133. 136. 198400 130.
## 4 VOOG 2019-01-07 136. 138. 136. 137. 262000 131.
## 5 VOOG 2019-01-08 139. 139. 137. 139. 178600 133.
## 6 VOOG 2019-01-09 139. 140. 138. 139. 207000 133.
## 7 VOOG 2019-01-10 139. 140. 138. 140. 156200 134.
## 8 VOOG 2019-01-11 139. 140. 139. 140. 123400 134.
## 9 VOOG 2019-01-14 139. 139. 138. 139. 75500 133.
## 10 VOOG 2019-01-15 139. 141. 139. 141. 128500 135.
## # ℹ 1,504 more rows
# average stock price
summarise(stocks, date = mean(high, na.rm = TRUE))
## # A tibble: 1 × 1
## date
## <dbl>
## 1 160.
Summarize by group
stocks %>%
# Group by stock
group_by(symbol) %>%
# Calculate average high
summarise(high = mean(date, na.rm = TRUE)) %>%
# Sort it
arrange(high)
## # A tibble: 2 × 2
## symbol high
## <chr> <date>
## 1 NVDA 2020-07-02
## 2 VOOG 2020-07-02
grouping the mean high and 52 week low
stocks %>%
group_by(low) %>%
summarise(count = n(),
l = mean(low, na.rm = TRUE),
hi= mean(high, na.rm = TRUE)) %>%
# Plot
ggplot(mapping = aes(x = l, y = hi)) +
geom_point(aes(size = count), alpha = 0.3) +
geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
Missing values
stocks %>%
# Remove missing values
filter(!is.na(high))
## # A tibble: 1,514 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 VOOG 2019-01-02 133. 135. 133. 135. 239200 129.
## 2 VOOG 2019-01-03 134. 134. 131. 131. 177400 126.
## 3 VOOG 2019-01-04 133. 137. 133. 136. 198400 130.
## 4 VOOG 2019-01-07 136. 138. 136. 137. 262000 131.
## 5 VOOG 2019-01-08 139. 139. 137. 139. 178600 133.
## 6 VOOG 2019-01-09 139. 140. 138. 139. 207000 133.
## 7 VOOG 2019-01-10 139. 140. 138. 140. 156200 134.
## 8 VOOG 2019-01-11 139. 140. 139. 140. 123400 134.
## 9 VOOG 2019-01-14 139. 139. 138. 139. 75500 133.
## 10 VOOG 2019-01-15 139. 141. 139. 141. 128500 135.
## # ℹ 1,504 more rows
useful summary functions
grouping multiple variables
stocks %>%
group_by(date, high, low) %>%
summarise(count = n()) %>%
ungroup()
## `summarise()` has grouped output by 'date', 'high'. You can override using the
## `.groups` argument.
## # A tibble: 1,514 × 4
## date high low count
## <date> <dbl> <dbl> <int>
## 1 2019-01-02 34.6 32.5 1
## 2 2019-01-02 135. 133. 1
## 3 2019-01-03 33.8 31.9 1
## 4 2019-01-03 134. 131. 1
## 5 2019-01-04 34.4 32.4 1
## 6 2019-01-04 137. 133. 1
## 7 2019-01-07 36.2 34.1 1
## 8 2019-01-07 138. 136. 1
## 9 2019-01-08 36.7 34.2 1
## 10 2019-01-08 139. 137. 1
## # ℹ 1,504 more rows
ungrouping