Import stock prices

stocks <- tq_get(c("NVDA","MSFT","INTC","DELL","SONY","GOOG","APPL","AMD"))
## Warning: There was 1 warning in `dplyr::mutate()`.
## ℹ In argument: `data.. = purrr::map(...)`.
## Caused by warning:
## ! x = 'APPL', get = 'stock.prices': Error in getSymbols.yahoo(Symbols = "APPL", env = <environment>, verbose = FALSE, : Unable to import "APPL".
## attempt to set an attribute on NULL
##  Removing APPL.
stocks %>%
  # Plot Stock Prices   
    ggplot(aes(x = date, y = adjusted, color = symbol)) +
    geom_line()

Apply the dpylr verbs you learned in chapter 5

Filter Rows

stocks %>% filter(adjusted > 24) 
## # A tibble: 11,850 × 8
##    symbol date        open  high   low close     volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>      <dbl>    <dbl>
##  1 NVDA   2021-10-26  24.0  25.3  23.9  24.7  485898000     24.7
##  2 NVDA   2021-10-27  24.5  25.1  24.3  24.5  245990000     24.4
##  3 NVDA   2021-10-28  24.9  25.0  24.5  24.9  234204000     24.9
##  4 NVDA   2021-10-29  25.0  25.7  25    25.6  292503000     25.5
##  5 NVDA   2021-11-01  25.6  25.9  25.2  25.8  265740000     25.8
##  6 NVDA   2021-11-02  25.8  26.7  25.8  26.4  294112000     26.3
##  7 NVDA   2021-11-03  26.7  26.8  26.2  26.6  239910000     26.5
##  8 NVDA   2021-11-04  27.2  31.4  27.1  29.8 1153631000     29.7
##  9 NVDA   2021-11-05  30.2  31.4  29.4  29.8  851260000     29.7
## 10 NVDA   2021-11-08  30.1  31.1  29.9  30.8  503101000     30.7
## # ℹ 11,840 more rows

Arange Rows

arrange(stocks, desc(open), desc(close))
## # A tibble: 18,400 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 MSFT   2025-07-31  555.  555.  532.  534. 51617300     533.
##  2 MSFT   2025-08-05  537.  537.  527.  528. 19171600     527.
##  3 MSFT   2025-08-01  535   536.  521.  524. 28977600     523.
##  4 MSFT   2025-08-13  532.  533.  519.  521. 19619200     520.
##  5 MSFT   2025-08-06  531.  532.  524.  525. 21355700     524.
##  6 MSFT   2025-08-04  528.  538.  528.  536. 25349000     535.
##  7 MSFT   2025-08-07  527.  528.  518.  521. 16079100     520.
##  8 MSFT   2025-08-12  524.  531.  523.  529. 18667000     528.
##  9 MSFT   2025-08-15  523.  526.  519.  520. 25213300     519.
## 10 MSFT   2025-08-08  523.  525.  519.  522. 15531000     521.
## # ℹ 18,390 more rows

Select Comlumns

select(stocks, date:open)
## # A tibble: 18,400 × 2
##    date        open
##    <date>     <dbl>
##  1 2015-01-02 0.503
##  2 2015-01-05 0.503
##  3 2015-01-06 0.495
##  4 2015-01-07 0.483
##  5 2015-01-08 0.484
##  6 2015-01-09 0.498
##  7 2015-01-12 0.500
##  8 2015-01-13 0.496
##  9 2015-01-14 0.486
## 10 2015-01-15 0.497
## # ℹ 18,390 more rows
select(stocks, date, open)
## # A tibble: 18,400 × 2
##    date        open
##    <date>     <dbl>
##  1 2015-01-02 0.503
##  2 2015-01-05 0.503
##  3 2015-01-06 0.495
##  4 2015-01-07 0.483
##  5 2015-01-08 0.484
##  6 2015-01-09 0.498
##  7 2015-01-12 0.500
##  8 2015-01-13 0.496
##  9 2015-01-14 0.486
## 10 2015-01-15 0.497
## # ℹ 18,390 more rows
select(stocks, date, open, close)
## # A tibble: 18,400 × 3
##    date        open close
##    <date>     <dbl> <dbl>
##  1 2015-01-02 0.503 0.503
##  2 2015-01-05 0.503 0.495
##  3 2015-01-06 0.495 0.480
##  4 2015-01-07 0.483 0.479
##  5 2015-01-08 0.484 0.496
##  6 2015-01-09 0.498 0.498
##  7 2015-01-12 0.500 0.492
##  8 2015-01-13 0.496 0.491
##  9 2015-01-14 0.486 0.493
## 10 2015-01-15 0.497 0.490
## # ℹ 18,390 more rows
select(stocks, date, starts_with("open")) 
## # A tibble: 18,400 × 2
##    date        open
##    <date>     <dbl>
##  1 2015-01-02 0.503
##  2 2015-01-05 0.503
##  3 2015-01-06 0.495
##  4 2015-01-07 0.483
##  5 2015-01-08 0.484
##  6 2015-01-09 0.498
##  7 2015-01-12 0.500
##  8 2015-01-13 0.496
##  9 2015-01-14 0.486
## 10 2015-01-15 0.497
## # ℹ 18,390 more rows
select(stocks, date, contains("close"))
## # A tibble: 18,400 × 2
##    date       close
##    <date>     <dbl>
##  1 2015-01-02 0.503
##  2 2015-01-05 0.495
##  3 2015-01-06 0.480
##  4 2015-01-07 0.479
##  5 2015-01-08 0.496
##  6 2015-01-09 0.498
##  7 2015-01-12 0.492
##  8 2015-01-13 0.491
##  9 2015-01-14 0.493
## 10 2015-01-15 0.490
## # ℹ 18,390 more rows
select(stocks, date, ends_with("close"))
## # A tibble: 18,400 × 2
##    date       close
##    <date>     <dbl>
##  1 2015-01-02 0.503
##  2 2015-01-05 0.495
##  3 2015-01-06 0.480
##  4 2015-01-07 0.479
##  5 2015-01-08 0.496
##  6 2015-01-09 0.498
##  7 2015-01-12 0.492
##  8 2015-01-13 0.491
##  9 2015-01-14 0.493
## 10 2015-01-15 0.490
## # ℹ 18,390 more rows
select(stocks, date, contains("close"),everything())
## # A tibble: 18,400 × 8
##    date       close symbol  open  high   low    volume adjusted
##    <date>     <dbl> <chr>  <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 2015-01-02 0.503 NVDA   0.503 0.507 0.495 113680000    0.483
##  2 2015-01-05 0.495 NVDA   0.503 0.505 0.493 197952000    0.475
##  3 2015-01-06 0.480 NVDA   0.495 0.496 0.479 197764000    0.461
##  4 2015-01-07 0.479 NVDA   0.483 0.488 0.477 321808000    0.459
##  5 2015-01-08 0.496 NVDA   0.484 0.500 0.484 283780000    0.477
##  6 2015-01-09 0.498 NVDA   0.498 0.502 0.491 209540000    0.479
##  7 2015-01-12 0.492 NVDA   0.500 0.5   0.488 190732000    0.473
##  8 2015-01-13 0.491 NVDA   0.496 0.506 0.488 236720000    0.472
##  9 2015-01-14 0.493 NVDA   0.486 0.495 0.485 155260000    0.474
## 10 2015-01-15 0.490 NVDA   0.497 0.5   0.490 188932000    0.470
## # ℹ 18,390 more rows

Add Columns

mutate(stocks, 
       gain = open - close) %>% 
    # select date and gain
    select(gain)
## # A tibble: 18,400 × 1
##         gain
##        <dbl>
##  1  0       
##  2  0.00850 
##  3  0.0157  
##  4  0.00475 
##  5 -0.0125  
##  6 -0.000250
##  7  0.00750 
##  8  0.00450 
##  9 -0.00725 
## 10  0.00700 
## # ℹ 18,390 more rows
#  alternative using transmute()
transmute(stocks,
          gain = open - close)
## # A tibble: 18,400 × 1
##         gain
##        <dbl>
##  1  0       
##  2  0.00850 
##  3  0.0157  
##  4  0.00475 
##  5 -0.0125  
##  6 -0.000250
##  7  0.00750 
##  8  0.00450 
##  9 -0.00725 
## 10  0.00700 
## # ℹ 18,390 more rows
#lag()
mutate(stocks, date) %>%
    mutate(high = close)
## # A tibble: 18,400 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 NVDA   2015-01-02 0.503 0.503 0.495 0.503 113680000    0.483
##  2 NVDA   2015-01-05 0.503 0.495 0.493 0.495 197952000    0.475
##  3 NVDA   2015-01-06 0.495 0.480 0.479 0.480 197764000    0.461
##  4 NVDA   2015-01-07 0.483 0.479 0.477 0.479 321808000    0.459
##  5 NVDA   2015-01-08 0.484 0.496 0.484 0.496 283780000    0.477
##  6 NVDA   2015-01-09 0.498 0.498 0.491 0.498 209540000    0.479
##  7 NVDA   2015-01-12 0.500 0.492 0.488 0.492 190732000    0.473
##  8 NVDA   2015-01-13 0.496 0.491 0.488 0.491 236720000    0.472
##  9 NVDA   2015-01-14 0.486 0.493 0.485 0.493 155260000    0.474
## 10 NVDA   2015-01-15 0.497 0.490 0.490 0.490 188932000    0.470
## # ℹ 18,390 more rows
# cum sum()
select(stocks, open) %>%
    mutate(open_cumsum = cumsum(open))
## # A tibble: 18,400 × 2
##     open open_cumsum
##    <dbl>       <dbl>
##  1 0.503       0.503
##  2 0.503       1.01 
##  3 0.495       1.50 
##  4 0.483       1.99 
##  5 0.484       2.47 
##  6 0.498       2.97 
##  7 0.500       3.47 
##  8 0.496       3.96 
##  9 0.486       4.45 
## 10 0.497       4.95 
## # ℹ 18,390 more rows

Summarise with groups

stocks
## # A tibble: 18,400 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 NVDA   2015-01-02 0.503 0.507 0.495 0.503 113680000    0.483
##  2 NVDA   2015-01-05 0.503 0.505 0.493 0.495 197952000    0.475
##  3 NVDA   2015-01-06 0.495 0.496 0.479 0.480 197764000    0.461
##  4 NVDA   2015-01-07 0.483 0.488 0.477 0.479 321808000    0.459
##  5 NVDA   2015-01-08 0.484 0.500 0.484 0.496 283780000    0.477
##  6 NVDA   2015-01-09 0.498 0.502 0.491 0.498 209540000    0.479
##  7 NVDA   2015-01-12 0.500 0.5   0.488 0.492 190732000    0.473
##  8 NVDA   2015-01-13 0.496 0.506 0.488 0.491 236720000    0.472
##  9 NVDA   2015-01-14 0.486 0.495 0.485 0.493 155260000    0.474
## 10 NVDA   2015-01-15 0.497 0.5   0.490 0.490 188932000    0.470
## # ℹ 18,390 more rows
# average departure delay
summarise(stocks,high =  mean(high, na.rm = TRUE ))
## # A tibble: 1 × 1
##    high
##   <dbl>
## 1  71.0
stocks %>% 
    # Group by volume
    group_by(volume) %>%
    
    # Calculate average volume
    summarise(volume = mean(volume, na.rm = TRUE)) %>%

    # Sort it 
    arrange(volume)
## # A tibble: 17,943 × 1
##     volume
##      <dbl>
##  1       0
##  2  223415
##  3  271519
##  4  371646
##  5  905062
##  6  941000
##  7  979900
##  8 1060500
##  9 1140591
## 10 1161000
## # ℹ 17,933 more rows
stocks %>%
    group_by(adjusted) %>%
    summarise(count =n(),
              high = mean(high, na.rm = TRUE),
              close = mean(close, na.rm = TRUE)) %>%
    #Plot 
    ggplot(mapping = aes(x = high, y = close)) +
    geom_point(aes(size = count), alpha = 0.3) +
    geom_smooth(se = FALSE) 
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

stocks %>% 
    
    # Remove missing values
    filter(!is.na(low))
## # A tibble: 18,400 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 NVDA   2015-01-02 0.503 0.507 0.495 0.503 113680000    0.483
##  2 NVDA   2015-01-05 0.503 0.505 0.493 0.495 197952000    0.475
##  3 NVDA   2015-01-06 0.495 0.496 0.479 0.480 197764000    0.461
##  4 NVDA   2015-01-07 0.483 0.488 0.477 0.479 321808000    0.459
##  5 NVDA   2015-01-08 0.484 0.500 0.484 0.496 283780000    0.477
##  6 NVDA   2015-01-09 0.498 0.502 0.491 0.498 209540000    0.479
##  7 NVDA   2015-01-12 0.500 0.5   0.488 0.492 190732000    0.473
##  8 NVDA   2015-01-13 0.496 0.506 0.488 0.491 236720000    0.472
##  9 NVDA   2015-01-14 0.486 0.495 0.485 0.493 155260000    0.474
## 10 NVDA   2015-01-15 0.497 0.5   0.490 0.490 188932000    0.470
## # ℹ 18,390 more rows