Import stock prices
stocks <- tq_get(c("NVDA","MSFT","INTC","DELL","SONY","GOOG","APPL","AMD"))
## Warning: There was 1 warning in `dplyr::mutate()`.
## ℹ In argument: `data.. = purrr::map(...)`.
## Caused by warning:
## ! x = 'APPL', get = 'stock.prices': Error in getSymbols.yahoo(Symbols = "APPL", env = <environment>, verbose = FALSE, : Unable to import "APPL".
## attempt to set an attribute on NULL
## Removing APPL.
stocks %>%
# Plot Stock Prices
ggplot(aes(x = date, y = adjusted, color = symbol)) +
geom_line()

Apply the dpylr verbs you learned in chapter 5
Filter Rows
stocks %>% filter(adjusted > 24)
## # A tibble: 11,850 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NVDA 2021-10-26 24.0 25.3 23.9 24.7 485898000 24.7
## 2 NVDA 2021-10-27 24.5 25.1 24.3 24.5 245990000 24.4
## 3 NVDA 2021-10-28 24.9 25.0 24.5 24.9 234204000 24.9
## 4 NVDA 2021-10-29 25.0 25.7 25 25.6 292503000 25.5
## 5 NVDA 2021-11-01 25.6 25.9 25.2 25.8 265740000 25.8
## 6 NVDA 2021-11-02 25.8 26.7 25.8 26.4 294112000 26.3
## 7 NVDA 2021-11-03 26.7 26.8 26.2 26.6 239910000 26.5
## 8 NVDA 2021-11-04 27.2 31.4 27.1 29.8 1153631000 29.7
## 9 NVDA 2021-11-05 30.2 31.4 29.4 29.8 851260000 29.7
## 10 NVDA 2021-11-08 30.1 31.1 29.9 30.8 503101000 30.7
## # ℹ 11,840 more rows
Arange Rows
arrange(stocks, desc(open), desc(close))
## # A tibble: 18,400 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MSFT 2025-07-31 555. 555. 532. 534. 51617300 533.
## 2 MSFT 2025-08-05 537. 537. 527. 528. 19171600 527.
## 3 MSFT 2025-08-01 535 536. 521. 524. 28977600 523.
## 4 MSFT 2025-08-13 532. 533. 519. 521. 19619200 520.
## 5 MSFT 2025-08-06 531. 532. 524. 525. 21355700 524.
## 6 MSFT 2025-08-04 528. 538. 528. 536. 25349000 535.
## 7 MSFT 2025-08-07 527. 528. 518. 521. 16079100 520.
## 8 MSFT 2025-08-12 524. 531. 523. 529. 18667000 528.
## 9 MSFT 2025-08-15 523. 526. 519. 520. 25213300 519.
## 10 MSFT 2025-08-08 523. 525. 519. 522. 15531000 521.
## # ℹ 18,390 more rows
Select Comlumns
select(stocks, date:open)
## # A tibble: 18,400 × 2
## date open
## <date> <dbl>
## 1 2015-01-02 0.503
## 2 2015-01-05 0.503
## 3 2015-01-06 0.495
## 4 2015-01-07 0.483
## 5 2015-01-08 0.484
## 6 2015-01-09 0.498
## 7 2015-01-12 0.500
## 8 2015-01-13 0.496
## 9 2015-01-14 0.486
## 10 2015-01-15 0.497
## # ℹ 18,390 more rows
select(stocks, date, open)
## # A tibble: 18,400 × 2
## date open
## <date> <dbl>
## 1 2015-01-02 0.503
## 2 2015-01-05 0.503
## 3 2015-01-06 0.495
## 4 2015-01-07 0.483
## 5 2015-01-08 0.484
## 6 2015-01-09 0.498
## 7 2015-01-12 0.500
## 8 2015-01-13 0.496
## 9 2015-01-14 0.486
## 10 2015-01-15 0.497
## # ℹ 18,390 more rows
select(stocks, date, open, close)
## # A tibble: 18,400 × 3
## date open close
## <date> <dbl> <dbl>
## 1 2015-01-02 0.503 0.503
## 2 2015-01-05 0.503 0.495
## 3 2015-01-06 0.495 0.480
## 4 2015-01-07 0.483 0.479
## 5 2015-01-08 0.484 0.496
## 6 2015-01-09 0.498 0.498
## 7 2015-01-12 0.500 0.492
## 8 2015-01-13 0.496 0.491
## 9 2015-01-14 0.486 0.493
## 10 2015-01-15 0.497 0.490
## # ℹ 18,390 more rows
select(stocks, date, starts_with("open"))
## # A tibble: 18,400 × 2
## date open
## <date> <dbl>
## 1 2015-01-02 0.503
## 2 2015-01-05 0.503
## 3 2015-01-06 0.495
## 4 2015-01-07 0.483
## 5 2015-01-08 0.484
## 6 2015-01-09 0.498
## 7 2015-01-12 0.500
## 8 2015-01-13 0.496
## 9 2015-01-14 0.486
## 10 2015-01-15 0.497
## # ℹ 18,390 more rows
select(stocks, date, contains("close"))
## # A tibble: 18,400 × 2
## date close
## <date> <dbl>
## 1 2015-01-02 0.503
## 2 2015-01-05 0.495
## 3 2015-01-06 0.480
## 4 2015-01-07 0.479
## 5 2015-01-08 0.496
## 6 2015-01-09 0.498
## 7 2015-01-12 0.492
## 8 2015-01-13 0.491
## 9 2015-01-14 0.493
## 10 2015-01-15 0.490
## # ℹ 18,390 more rows
select(stocks, date, ends_with("close"))
## # A tibble: 18,400 × 2
## date close
## <date> <dbl>
## 1 2015-01-02 0.503
## 2 2015-01-05 0.495
## 3 2015-01-06 0.480
## 4 2015-01-07 0.479
## 5 2015-01-08 0.496
## 6 2015-01-09 0.498
## 7 2015-01-12 0.492
## 8 2015-01-13 0.491
## 9 2015-01-14 0.493
## 10 2015-01-15 0.490
## # ℹ 18,390 more rows
select(stocks, date, contains("close"),everything())
## # A tibble: 18,400 × 8
## date close symbol open high low volume adjusted
## <date> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2015-01-02 0.503 NVDA 0.503 0.507 0.495 113680000 0.483
## 2 2015-01-05 0.495 NVDA 0.503 0.505 0.493 197952000 0.475
## 3 2015-01-06 0.480 NVDA 0.495 0.496 0.479 197764000 0.461
## 4 2015-01-07 0.479 NVDA 0.483 0.488 0.477 321808000 0.459
## 5 2015-01-08 0.496 NVDA 0.484 0.500 0.484 283780000 0.477
## 6 2015-01-09 0.498 NVDA 0.498 0.502 0.491 209540000 0.479
## 7 2015-01-12 0.492 NVDA 0.500 0.5 0.488 190732000 0.473
## 8 2015-01-13 0.491 NVDA 0.496 0.506 0.488 236720000 0.472
## 9 2015-01-14 0.493 NVDA 0.486 0.495 0.485 155260000 0.474
## 10 2015-01-15 0.490 NVDA 0.497 0.5 0.490 188932000 0.470
## # ℹ 18,390 more rows
Add Columns
mutate(stocks,
gain = open - close) %>%
# select date and gain
select(gain)
## # A tibble: 18,400 × 1
## gain
## <dbl>
## 1 0
## 2 0.00850
## 3 0.0157
## 4 0.00475
## 5 -0.0125
## 6 -0.000250
## 7 0.00750
## 8 0.00450
## 9 -0.00725
## 10 0.00700
## # ℹ 18,390 more rows
# alternative using transmute()
transmute(stocks,
gain = open - close)
## # A tibble: 18,400 × 1
## gain
## <dbl>
## 1 0
## 2 0.00850
## 3 0.0157
## 4 0.00475
## 5 -0.0125
## 6 -0.000250
## 7 0.00750
## 8 0.00450
## 9 -0.00725
## 10 0.00700
## # ℹ 18,390 more rows
#lag()
mutate(stocks, date) %>%
mutate(high = close)
## # A tibble: 18,400 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NVDA 2015-01-02 0.503 0.503 0.495 0.503 113680000 0.483
## 2 NVDA 2015-01-05 0.503 0.495 0.493 0.495 197952000 0.475
## 3 NVDA 2015-01-06 0.495 0.480 0.479 0.480 197764000 0.461
## 4 NVDA 2015-01-07 0.483 0.479 0.477 0.479 321808000 0.459
## 5 NVDA 2015-01-08 0.484 0.496 0.484 0.496 283780000 0.477
## 6 NVDA 2015-01-09 0.498 0.498 0.491 0.498 209540000 0.479
## 7 NVDA 2015-01-12 0.500 0.492 0.488 0.492 190732000 0.473
## 8 NVDA 2015-01-13 0.496 0.491 0.488 0.491 236720000 0.472
## 9 NVDA 2015-01-14 0.486 0.493 0.485 0.493 155260000 0.474
## 10 NVDA 2015-01-15 0.497 0.490 0.490 0.490 188932000 0.470
## # ℹ 18,390 more rows
# cum sum()
select(stocks, open) %>%
mutate(open_cumsum = cumsum(open))
## # A tibble: 18,400 × 2
## open open_cumsum
## <dbl> <dbl>
## 1 0.503 0.503
## 2 0.503 1.01
## 3 0.495 1.50
## 4 0.483 1.99
## 5 0.484 2.47
## 6 0.498 2.97
## 7 0.500 3.47
## 8 0.496 3.96
## 9 0.486 4.45
## 10 0.497 4.95
## # ℹ 18,390 more rows
Summarise with groups
stocks
## # A tibble: 18,400 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NVDA 2015-01-02 0.503 0.507 0.495 0.503 113680000 0.483
## 2 NVDA 2015-01-05 0.503 0.505 0.493 0.495 197952000 0.475
## 3 NVDA 2015-01-06 0.495 0.496 0.479 0.480 197764000 0.461
## 4 NVDA 2015-01-07 0.483 0.488 0.477 0.479 321808000 0.459
## 5 NVDA 2015-01-08 0.484 0.500 0.484 0.496 283780000 0.477
## 6 NVDA 2015-01-09 0.498 0.502 0.491 0.498 209540000 0.479
## 7 NVDA 2015-01-12 0.500 0.5 0.488 0.492 190732000 0.473
## 8 NVDA 2015-01-13 0.496 0.506 0.488 0.491 236720000 0.472
## 9 NVDA 2015-01-14 0.486 0.495 0.485 0.493 155260000 0.474
## 10 NVDA 2015-01-15 0.497 0.5 0.490 0.490 188932000 0.470
## # ℹ 18,390 more rows
# average departure delay
summarise(stocks,high = mean(high, na.rm = TRUE ))
## # A tibble: 1 × 1
## high
## <dbl>
## 1 71.0
stocks %>%
# Group by volume
group_by(volume) %>%
# Calculate average volume
summarise(volume = mean(volume, na.rm = TRUE)) %>%
# Sort it
arrange(volume)
## # A tibble: 17,943 × 1
## volume
## <dbl>
## 1 0
## 2 223415
## 3 271519
## 4 371646
## 5 905062
## 6 941000
## 7 979900
## 8 1060500
## 9 1140591
## 10 1161000
## # ℹ 17,933 more rows
stocks %>%
group_by(adjusted) %>%
summarise(count =n(),
high = mean(high, na.rm = TRUE),
close = mean(close, na.rm = TRUE)) %>%
#Plot
ggplot(mapping = aes(x = high, y = close)) +
geom_point(aes(size = count), alpha = 0.3) +
geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

stocks %>%
# Remove missing values
filter(!is.na(low))
## # A tibble: 18,400 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NVDA 2015-01-02 0.503 0.507 0.495 0.503 113680000 0.483
## 2 NVDA 2015-01-05 0.503 0.505 0.493 0.495 197952000 0.475
## 3 NVDA 2015-01-06 0.495 0.496 0.479 0.480 197764000 0.461
## 4 NVDA 2015-01-07 0.483 0.488 0.477 0.479 321808000 0.459
## 5 NVDA 2015-01-08 0.484 0.500 0.484 0.496 283780000 0.477
## 6 NVDA 2015-01-09 0.498 0.502 0.491 0.498 209540000 0.479
## 7 NVDA 2015-01-12 0.500 0.5 0.488 0.492 190732000 0.473
## 8 NVDA 2015-01-13 0.496 0.506 0.488 0.491 236720000 0.472
## 9 NVDA 2015-01-14 0.486 0.495 0.485 0.493 155260000 0.474
## 10 NVDA 2015-01-15 0.497 0.5 0.490 0.490 188932000 0.470
## # ℹ 18,390 more rows