stocks <- tq_get(c("NVDA", "MSFT", "GOOGL", "AMD"),
get = "stock.prices",
from = "2020-01-01")
stocks
## # A tibble: 5,716 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NVDA 2020-01-02 5.97 6.00 5.92 6.00 237536000 5.97
## 2 NVDA 2020-01-03 5.88 5.95 5.85 5.90 205384000 5.88
## 3 NVDA 2020-01-06 5.81 5.93 5.78 5.93 262636000 5.90
## 4 NVDA 2020-01-07 5.95 6.04 5.91 6.00 314856000 5.97
## 5 NVDA 2020-01-08 5.99 6.05 5.95 6.01 277108000 5.98
## 6 NVDA 2020-01-09 6.10 6.15 6.02 6.08 255112000 6.05
## 7 NVDA 2020-01-10 6.18 6.21 6.09 6.11 316296000 6.08
## 8 NVDA 2020-01-13 6.19 6.32 6.17 6.30 319840000 6.27
## 9 NVDA 2020-01-14 6.26 6.28 6.17 6.18 359088000 6.16
## 10 NVDA 2020-01-15 6.19 6.22 6.11 6.14 263104000 6.11
## # ℹ 5,706 more rows
stocks %>%
ggplot(aes(x = date, y = adjusted, color = symbol)) +
geom_line()
stocks %>% filter(adjusted > 30)
## # A tibble: 4,891 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NVDA 2021-11-08 30.1 31.1 29.9 30.8 503101000 30.7
## 2 NVDA 2021-11-09 32.3 32.3 30.0 30.7 646746000 30.6
## 3 NVDA 2021-11-11 30.5 30.6 29.8 30.4 332172000 30.3
## 4 NVDA 2021-11-12 30.0 30.7 29.6 30.4 413054000 30.3
## 5 NVDA 2021-11-16 29.8 30.4 29.7 30.2 264484000 30.1
## 6 NVDA 2021-11-18 32.4 32.8 31.3 31.7 781711000 31.6
## 7 NVDA 2021-11-19 32.2 33.1 31.9 33.0 533867000 32.9
## 8 NVDA 2021-11-22 33.5 34.6 31.9 32.0 754335000 31.9
## 9 NVDA 2021-11-23 31.5 32.4 30.9 31.7 532163000 31.7
## 10 NVDA 2021-11-24 31.5 32.9 30.9 32.7 435162000 32.6
## # ℹ 4,881 more rows
filter(stocks, date >= "2025-1-1")
## # A tibble: 684 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NVDA 2025-01-02 136 139. 135. 138. 198247200 138.
## 2 NVDA 2025-01-03 140. 145. 140. 144. 229322500 144.
## 3 NVDA 2025-01-06 149. 152. 148. 149. 265377400 149.
## 4 NVDA 2025-01-07 153. 153. 140. 140. 351782200 140.
## 5 NVDA 2025-01-08 143. 144. 138. 140. 227349900 140.
## 6 NVDA 2025-01-10 137. 140. 134. 136. 207602500 136.
## 7 NVDA 2025-01-13 130. 133. 130. 133. 204808900 133.
## 8 NVDA 2025-01-14 136. 136. 130. 132. 195590500 132.
## 9 NVDA 2025-01-15 134. 136. 131. 136. 185217300 136.
## 10 NVDA 2025-01-16 139. 139. 133. 134. 209235600 134.
## # ℹ 674 more rows
arrange(stocks, desc(date), desc(close))
## # A tibble: 5,716 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MSFT 2025-09-09 501. 502. 498. 498. 14349500 498.
## 2 GOOGL 2025-09-09 234. 240. 233. 240. 38012800 240.
## 3 NVDA 2025-09-09 169. 171. 167. 171. 156305800 171.
## 4 AMD 2025-09-09 152. 157. 152. 156. 42466400 156.
## 5 MSFT 2025-09-08 498. 501. 495. 498. 16771000 498.
## 6 GOOGL 2025-09-08 235. 238. 234. 234. 32474700 234.
## 7 NVDA 2025-09-08 168. 171. 167. 168. 163769100 168.
## 8 AMD 2025-09-08 152. 153. 149. 151. 41849000 151.
## 9 MSFT 2025-09-05 509. 512. 492. 495 31994800 495
## 10 GOOGL 2025-09-05 232. 236. 232. 235 46588900 235.
## # ℹ 5,706 more rows
select(stocks, symbol:close)
## # A tibble: 5,716 × 6
## symbol date open high low close
## <chr> <date> <dbl> <dbl> <dbl> <dbl>
## 1 NVDA 2020-01-02 5.97 6.00 5.92 6.00
## 2 NVDA 2020-01-03 5.88 5.95 5.85 5.90
## 3 NVDA 2020-01-06 5.81 5.93 5.78 5.93
## 4 NVDA 2020-01-07 5.95 6.04 5.91 6.00
## 5 NVDA 2020-01-08 5.99 6.05 5.95 6.01
## 6 NVDA 2020-01-09 6.10 6.15 6.02 6.08
## 7 NVDA 2020-01-10 6.18 6.21 6.09 6.11
## 8 NVDA 2020-01-13 6.19 6.32 6.17 6.30
## 9 NVDA 2020-01-14 6.26 6.28 6.17 6.18
## 10 NVDA 2020-01-15 6.19 6.22 6.11 6.14
## # ℹ 5,706 more rows
select(stocks, symbol, date, open, close)
## # A tibble: 5,716 × 4
## symbol date open close
## <chr> <date> <dbl> <dbl>
## 1 NVDA 2020-01-02 5.97 6.00
## 2 NVDA 2020-01-03 5.88 5.90
## 3 NVDA 2020-01-06 5.81 5.93
## 4 NVDA 2020-01-07 5.95 6.00
## 5 NVDA 2020-01-08 5.99 6.01
## 6 NVDA 2020-01-09 6.10 6.08
## 7 NVDA 2020-01-10 6.18 6.11
## 8 NVDA 2020-01-13 6.19 6.30
## 9 NVDA 2020-01-14 6.26 6.18
## 10 NVDA 2020-01-15 6.19 6.14
## # ℹ 5,706 more rows
mutate(stocks,
gain = close - open) %>%
select(symbol, date, gain)
## # A tibble: 5,716 × 3
## symbol date gain
## <chr> <date> <dbl>
## 1 NVDA 2020-01-02 0.0290
## 2 NVDA 2020-01-03 0.0243
## 3 NVDA 2020-01-06 0.118
## 4 NVDA 2020-01-07 0.0433
## 5 NVDA 2020-01-08 0.0155
## 6 NVDA 2020-01-09 -0.0208
## 7 NVDA 2020-01-10 -0.0753
## 8 NVDA 2020-01-13 0.108
## 9 NVDA 2020-01-14 -0.0745
## 10 NVDA 2020-01-15 -0.0555
## # ℹ 5,706 more rows
transmute(stocks,
gap = high - low)
## # A tibble: 5,716 × 1
## gap
## <dbl>
## 1 0.0797
## 2 0.0933
## 3 0.150
## 4 0.135
## 5 0.0972
## 6 0.127
## 7 0.120
## 8 0.156
## 9 0.113
## 10 0.104
## # ℹ 5,706 more rows
select(stocks, close) %>%
mutate(close_lag1 = lag(close))
## # A tibble: 5,716 × 2
## close close_lag1
## <dbl> <dbl>
## 1 6.00 NA
## 2 5.90 6.00
## 3 5.93 5.90
## 4 6.00 5.93
## 5 6.01 6.00
## 6 6.08 6.01
## 7 6.11 6.08
## 8 6.30 6.11
## 9 6.18 6.30
## 10 6.14 6.18
## # ℹ 5,706 more rows
select(stocks, close) %>%
mutate(close_cumsum = cumsum(close))
## # A tibble: 5,716 × 2
## close close_cumsum
## <dbl> <dbl>
## 1 6.00 6.00
## 2 5.90 11.9
## 3 5.93 17.8
## 4 6.00 23.8
## 5 6.01 29.8
## 6 6.08 35.9
## 7 6.11 42.0
## 8 6.30 48.3
## 9 6.18 54.5
## 10 6.14 60.6
## # ℹ 5,706 more rows
Collapsing data to a single row
stocks
## # A tibble: 5,716 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NVDA 2020-01-02 5.97 6.00 5.92 6.00 237536000 5.97
## 2 NVDA 2020-01-03 5.88 5.95 5.85 5.90 205384000 5.88
## 3 NVDA 2020-01-06 5.81 5.93 5.78 5.93 262636000 5.90
## 4 NVDA 2020-01-07 5.95 6.04 5.91 6.00 314856000 5.97
## 5 NVDA 2020-01-08 5.99 6.05 5.95 6.01 277108000 5.98
## 6 NVDA 2020-01-09 6.10 6.15 6.02 6.08 255112000 6.05
## 7 NVDA 2020-01-10 6.18 6.21 6.09 6.11 316296000 6.08
## 8 NVDA 2020-01-13 6.19 6.32 6.17 6.30 319840000 6.27
## 9 NVDA 2020-01-14 6.26 6.28 6.17 6.18 359088000 6.16
## 10 NVDA 2020-01-15 6.19 6.22 6.11 6.14 263104000 6.11
## # ℹ 5,706 more rows
# average closing price
summarise(stocks, closure = mean(close, na.rm = TRUE))
## # A tibble: 1 × 1
## closure
## <dbl>
## 1 149.
Summarize by groups
stocks
## # A tibble: 5,716 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NVDA 2020-01-02 5.97 6.00 5.92 6.00 237536000 5.97
## 2 NVDA 2020-01-03 5.88 5.95 5.85 5.90 205384000 5.88
## 3 NVDA 2020-01-06 5.81 5.93 5.78 5.93 262636000 5.90
## 4 NVDA 2020-01-07 5.95 6.04 5.91 6.00 314856000 5.97
## 5 NVDA 2020-01-08 5.99 6.05 5.95 6.01 277108000 5.98
## 6 NVDA 2020-01-09 6.10 6.15 6.02 6.08 255112000 6.05
## 7 NVDA 2020-01-10 6.18 6.21 6.09 6.11 316296000 6.08
## 8 NVDA 2020-01-13 6.19 6.32 6.17 6.30 319840000 6.27
## 9 NVDA 2020-01-14 6.26 6.28 6.17 6.18 359088000 6.16
## 10 NVDA 2020-01-15 6.19 6.22 6.11 6.14 263104000 6.11
## # ℹ 5,706 more rows
# average closing
summarise(stocks, gain = mean(close, na.rm = TRUE))
## # A tibble: 1 × 1
## gain
## <dbl>
## 1 149.
stocks %>%
# Group by symbol
group_by(symbol) %>%
# Calculate average gain
summarize(gain = mean(close, na.rm = TRUE)) %>%
# Sort it
arrange(gain) %>%
ungroup()
## # A tibble: 4 × 2
## symbol gain
## <chr> <dbl>
## 1 NVDA 50.6
## 2 AMD 106.
## 3 GOOGL 126.
## 4 MSFT 313.
Gain per stock increases with difference between closing and opening
stocks %>%
mutate(gain = close - open) %>%
group_by(gain) %>%
summarise(count = n(),
closing = mean(close, na.rm = TRUE),
opening = mean(open, na.rm = TRUE)) %>%
# Plot
ggplot(mapping = aes(x = closing, y = opening)) +
geom_point(aes(size = count), alpha = 0.3) +
geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
Missing values
stocks %>%
# Remove missing values
# ! is negative
filter(!is.na(close))
## # A tibble: 5,716 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NVDA 2020-01-02 5.97 6.00 5.92 6.00 237536000 5.97
## 2 NVDA 2020-01-03 5.88 5.95 5.85 5.90 205384000 5.88
## 3 NVDA 2020-01-06 5.81 5.93 5.78 5.93 262636000 5.90
## 4 NVDA 2020-01-07 5.95 6.04 5.91 6.00 314856000 5.97
## 5 NVDA 2020-01-08 5.99 6.05 5.95 6.01 277108000 5.98
## 6 NVDA 2020-01-09 6.10 6.15 6.02 6.08 255112000 6.05
## 7 NVDA 2020-01-10 6.18 6.21 6.09 6.11 316296000 6.08
## 8 NVDA 2020-01-13 6.19 6.32 6.17 6.30 319840000 6.27
## 9 NVDA 2020-01-14 6.26 6.28 6.17 6.18 359088000 6.16
## 10 NVDA 2020-01-15 6.19 6.22 6.11 6.14 263104000 6.11
## # ℹ 5,706 more rows
Count
By loss
stocks <- stocks %>%
mutate(gain = close - open,
no_profit = gain <= 0)
loss <- stocks %>%
group_by(gain) %>%
summarise(loss = mean(gain))
ggplot(data = loss, mapping = aes(x = loss)) +
geom_freqpoly(binwidth = 1)
Grouping multiple variable
stocks %>%
group_by(date, symbol, open, close) %>%
summarise(count = n()) %>%
# Every time you group, you need to ungroup it
ungroup()
## `summarise()` has grouped output by 'date', 'symbol', 'open'. You can override
## using the `.groups` argument.
## # A tibble: 5,716 × 5
## date symbol open close count
## <date> <chr> <dbl> <dbl> <int>
## 1 2020-01-02 AMD 46.9 49.1 1
## 2 2020-01-02 GOOGL 67.4 68.4 1
## 3 2020-01-02 MSFT 159. 161. 1
## 4 2020-01-02 NVDA 5.97 6.00 1
## 5 2020-01-03 AMD 48.0 48.6 1
## 6 2020-01-03 GOOGL 67.4 68.1 1
## 7 2020-01-03 MSFT 158. 159. 1
## 8 2020-01-03 NVDA 5.88 5.90 1
## 9 2020-01-06 AMD 48.0 48.4 1
## 10 2020-01-06 GOOGL 67.6 69.9 1
## # ℹ 5,706 more rows