Import stock prices

stocks <- tq_get(c("NVDA", "MSFT", "GOOGL", "AMD"),
                 get = "stock.prices",
                 from = "2020-01-01")
stocks
## # A tibble: 5,716 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 NVDA   2020-01-02  5.97  6.00  5.92  6.00 237536000     5.97
##  2 NVDA   2020-01-03  5.88  5.95  5.85  5.90 205384000     5.88
##  3 NVDA   2020-01-06  5.81  5.93  5.78  5.93 262636000     5.90
##  4 NVDA   2020-01-07  5.95  6.04  5.91  6.00 314856000     5.97
##  5 NVDA   2020-01-08  5.99  6.05  5.95  6.01 277108000     5.98
##  6 NVDA   2020-01-09  6.10  6.15  6.02  6.08 255112000     6.05
##  7 NVDA   2020-01-10  6.18  6.21  6.09  6.11 316296000     6.08
##  8 NVDA   2020-01-13  6.19  6.32  6.17  6.30 319840000     6.27
##  9 NVDA   2020-01-14  6.26  6.28  6.17  6.18 359088000     6.16
## 10 NVDA   2020-01-15  6.19  6.22  6.11  6.14 263104000     6.11
## # ℹ 5,706 more rows

Plot stock prices

stocks %>%
    
    ggplot(aes(x = date, y = adjusted, color = symbol)) +
    geom_line()

Apply the dplyr verbs you learned in Chapter 5

Filter rows

stocks %>% filter(adjusted > 30)
## # A tibble: 4,891 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 NVDA   2021-11-08  30.1  31.1  29.9  30.8 503101000     30.7
##  2 NVDA   2021-11-09  32.3  32.3  30.0  30.7 646746000     30.6
##  3 NVDA   2021-11-11  30.5  30.6  29.8  30.4 332172000     30.3
##  4 NVDA   2021-11-12  30.0  30.7  29.6  30.4 413054000     30.3
##  5 NVDA   2021-11-16  29.8  30.4  29.7  30.2 264484000     30.1
##  6 NVDA   2021-11-18  32.4  32.8  31.3  31.7 781711000     31.6
##  7 NVDA   2021-11-19  32.2  33.1  31.9  33.0 533867000     32.9
##  8 NVDA   2021-11-22  33.5  34.6  31.9  32.0 754335000     31.9
##  9 NVDA   2021-11-23  31.5  32.4  30.9  31.7 532163000     31.7
## 10 NVDA   2021-11-24  31.5  32.9  30.9  32.7 435162000     32.6
## # ℹ 4,881 more rows
filter(stocks, date >= "2025-1-1")
## # A tibble: 684 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 NVDA   2025-01-02  136   139.  135.  138. 198247200     138.
##  2 NVDA   2025-01-03  140.  145.  140.  144. 229322500     144.
##  3 NVDA   2025-01-06  149.  152.  148.  149. 265377400     149.
##  4 NVDA   2025-01-07  153.  153.  140.  140. 351782200     140.
##  5 NVDA   2025-01-08  143.  144.  138.  140. 227349900     140.
##  6 NVDA   2025-01-10  137.  140.  134.  136. 207602500     136.
##  7 NVDA   2025-01-13  130.  133.  130.  133. 204808900     133.
##  8 NVDA   2025-01-14  136.  136.  130.  132. 195590500     132.
##  9 NVDA   2025-01-15  134.  136.  131.  136. 185217300     136.
## 10 NVDA   2025-01-16  139.  139.  133.  134. 209235600     134.
## # ℹ 674 more rows

Arrange rows

arrange(stocks, desc(date), desc(close))
## # A tibble: 5,716 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 MSFT   2025-09-09  501.  502.  498.  498.  14349500     498.
##  2 GOOGL  2025-09-09  234.  240.  233.  240.  38012800     240.
##  3 NVDA   2025-09-09  169.  171.  167.  171. 156305800     171.
##  4 AMD    2025-09-09  152.  157.  152.  156.  42466400     156.
##  5 MSFT   2025-09-08  498.  501.  495.  498.  16771000     498.
##  6 GOOGL  2025-09-08  235.  238.  234.  234.  32474700     234.
##  7 NVDA   2025-09-08  168.  171.  167.  168. 163769100     168.
##  8 AMD    2025-09-08  152.  153.  149.  151.  41849000     151.
##  9 MSFT   2025-09-05  509.  512.  492.  495   31994800     495 
## 10 GOOGL  2025-09-05  232.  236.  232.  235   46588900     235.
## # ℹ 5,706 more rows

Select columns

select(stocks, symbol:close)
## # A tibble: 5,716 × 6
##    symbol date        open  high   low close
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>
##  1 NVDA   2020-01-02  5.97  6.00  5.92  6.00
##  2 NVDA   2020-01-03  5.88  5.95  5.85  5.90
##  3 NVDA   2020-01-06  5.81  5.93  5.78  5.93
##  4 NVDA   2020-01-07  5.95  6.04  5.91  6.00
##  5 NVDA   2020-01-08  5.99  6.05  5.95  6.01
##  6 NVDA   2020-01-09  6.10  6.15  6.02  6.08
##  7 NVDA   2020-01-10  6.18  6.21  6.09  6.11
##  8 NVDA   2020-01-13  6.19  6.32  6.17  6.30
##  9 NVDA   2020-01-14  6.26  6.28  6.17  6.18
## 10 NVDA   2020-01-15  6.19  6.22  6.11  6.14
## # ℹ 5,706 more rows
select(stocks, symbol, date, open, close)
## # A tibble: 5,716 × 4
##    symbol date        open close
##    <chr>  <date>     <dbl> <dbl>
##  1 NVDA   2020-01-02  5.97  6.00
##  2 NVDA   2020-01-03  5.88  5.90
##  3 NVDA   2020-01-06  5.81  5.93
##  4 NVDA   2020-01-07  5.95  6.00
##  5 NVDA   2020-01-08  5.99  6.01
##  6 NVDA   2020-01-09  6.10  6.08
##  7 NVDA   2020-01-10  6.18  6.11
##  8 NVDA   2020-01-13  6.19  6.30
##  9 NVDA   2020-01-14  6.26  6.18
## 10 NVDA   2020-01-15  6.19  6.14
## # ℹ 5,706 more rows

Add columns

mutate(stocks, 
       gain = close - open) %>%
    select(symbol, date, gain)
## # A tibble: 5,716 × 3
##    symbol date          gain
##    <chr>  <date>       <dbl>
##  1 NVDA   2020-01-02  0.0290
##  2 NVDA   2020-01-03  0.0243
##  3 NVDA   2020-01-06  0.118 
##  4 NVDA   2020-01-07  0.0433
##  5 NVDA   2020-01-08  0.0155
##  6 NVDA   2020-01-09 -0.0208
##  7 NVDA   2020-01-10 -0.0753
##  8 NVDA   2020-01-13  0.108 
##  9 NVDA   2020-01-14 -0.0745
## 10 NVDA   2020-01-15 -0.0555
## # ℹ 5,706 more rows
transmute(stocks,
       gap = high - low)
## # A tibble: 5,716 × 1
##       gap
##     <dbl>
##  1 0.0797
##  2 0.0933
##  3 0.150 
##  4 0.135 
##  5 0.0972
##  6 0.127 
##  7 0.120 
##  8 0.156 
##  9 0.113 
## 10 0.104 
## # ℹ 5,706 more rows
select(stocks, close) %>% 
    mutate(close_lag1 = lag(close))
## # A tibble: 5,716 × 2
##    close close_lag1
##    <dbl>      <dbl>
##  1  6.00      NA   
##  2  5.90       6.00
##  3  5.93       5.90
##  4  6.00       5.93
##  5  6.01       6.00
##  6  6.08       6.01
##  7  6.11       6.08
##  8  6.30       6.11
##  9  6.18       6.30
## 10  6.14       6.18
## # ℹ 5,706 more rows
select(stocks, close) %>% 
    mutate(close_cumsum = cumsum(close))
## # A tibble: 5,716 × 2
##    close close_cumsum
##    <dbl>        <dbl>
##  1  6.00         6.00
##  2  5.90        11.9 
##  3  5.93        17.8 
##  4  6.00        23.8 
##  5  6.01        29.8 
##  6  6.08        35.9 
##  7  6.11        42.0 
##  8  6.30        48.3 
##  9  6.18        54.5 
## 10  6.14        60.6 
## # ℹ 5,706 more rows

Summarise with groups

Collapsing data to a single row

stocks
## # A tibble: 5,716 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 NVDA   2020-01-02  5.97  6.00  5.92  6.00 237536000     5.97
##  2 NVDA   2020-01-03  5.88  5.95  5.85  5.90 205384000     5.88
##  3 NVDA   2020-01-06  5.81  5.93  5.78  5.93 262636000     5.90
##  4 NVDA   2020-01-07  5.95  6.04  5.91  6.00 314856000     5.97
##  5 NVDA   2020-01-08  5.99  6.05  5.95  6.01 277108000     5.98
##  6 NVDA   2020-01-09  6.10  6.15  6.02  6.08 255112000     6.05
##  7 NVDA   2020-01-10  6.18  6.21  6.09  6.11 316296000     6.08
##  8 NVDA   2020-01-13  6.19  6.32  6.17  6.30 319840000     6.27
##  9 NVDA   2020-01-14  6.26  6.28  6.17  6.18 359088000     6.16
## 10 NVDA   2020-01-15  6.19  6.22  6.11  6.14 263104000     6.11
## # ℹ 5,706 more rows
# average closing price
summarise(stocks, closure = mean(close, na.rm = TRUE))
## # A tibble: 1 × 1
##   closure
##     <dbl>
## 1    149.

Summarize by groups

stocks
## # A tibble: 5,716 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 NVDA   2020-01-02  5.97  6.00  5.92  6.00 237536000     5.97
##  2 NVDA   2020-01-03  5.88  5.95  5.85  5.90 205384000     5.88
##  3 NVDA   2020-01-06  5.81  5.93  5.78  5.93 262636000     5.90
##  4 NVDA   2020-01-07  5.95  6.04  5.91  6.00 314856000     5.97
##  5 NVDA   2020-01-08  5.99  6.05  5.95  6.01 277108000     5.98
##  6 NVDA   2020-01-09  6.10  6.15  6.02  6.08 255112000     6.05
##  7 NVDA   2020-01-10  6.18  6.21  6.09  6.11 316296000     6.08
##  8 NVDA   2020-01-13  6.19  6.32  6.17  6.30 319840000     6.27
##  9 NVDA   2020-01-14  6.26  6.28  6.17  6.18 359088000     6.16
## 10 NVDA   2020-01-15  6.19  6.22  6.11  6.14 263104000     6.11
## # ℹ 5,706 more rows
# average closing
summarise(stocks, gain = mean(close, na.rm = TRUE))
## # A tibble: 1 × 1
##    gain
##   <dbl>
## 1  149.
stocks %>%
    
    # Group by symbol
    group_by(symbol) %>%
    
    # Calculate average gain
    summarize(gain = mean(close, na.rm = TRUE)) %>%
    
    # Sort it
    arrange(gain) %>%
    ungroup()
## # A tibble: 4 × 2
##   symbol  gain
##   <chr>  <dbl>
## 1 NVDA    50.6
## 2 AMD    106. 
## 3 GOOGL  126. 
## 4 MSFT   313.

Gain per stock increases with difference between closing and opening

stocks %>%
    mutate(gain = close - open) %>%
    group_by(gain) %>%
    summarise(count = n(),
              closing = mean(close, na.rm = TRUE),
              opening = mean(open, na.rm = TRUE)) %>%
    # Plot
    ggplot(mapping = aes(x = closing, y = opening)) + 
    geom_point(aes(size = count), alpha = 0.3) +
    geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

Missing values

stocks %>%
    
    # Remove missing values
    # ! is negative
    filter(!is.na(close))
## # A tibble: 5,716 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 NVDA   2020-01-02  5.97  6.00  5.92  6.00 237536000     5.97
##  2 NVDA   2020-01-03  5.88  5.95  5.85  5.90 205384000     5.88
##  3 NVDA   2020-01-06  5.81  5.93  5.78  5.93 262636000     5.90
##  4 NVDA   2020-01-07  5.95  6.04  5.91  6.00 314856000     5.97
##  5 NVDA   2020-01-08  5.99  6.05  5.95  6.01 277108000     5.98
##  6 NVDA   2020-01-09  6.10  6.15  6.02  6.08 255112000     6.05
##  7 NVDA   2020-01-10  6.18  6.21  6.09  6.11 316296000     6.08
##  8 NVDA   2020-01-13  6.19  6.32  6.17  6.30 319840000     6.27
##  9 NVDA   2020-01-14  6.26  6.28  6.17  6.18 359088000     6.16
## 10 NVDA   2020-01-15  6.19  6.22  6.11  6.14 263104000     6.11
## # ℹ 5,706 more rows

Count

By loss

stocks <- stocks %>%
    mutate(gain = close - open, 
           no_profit = gain <= 0)

loss <- stocks %>% 
  group_by(gain) %>% 
  summarise(loss = mean(gain))

ggplot(data = loss, mapping = aes(x = loss)) + 
  geom_freqpoly(binwidth = 1)

Grouping multiple variable

stocks %>%
    group_by(date, symbol, open, close) %>%
    summarise(count = n()) %>%
    # Every time you group, you need to ungroup it
    ungroup()
## `summarise()` has grouped output by 'date', 'symbol', 'open'. You can override
## using the `.groups` argument.
## # A tibble: 5,716 × 5
##    date       symbol   open  close count
##    <date>     <chr>   <dbl>  <dbl> <int>
##  1 2020-01-02 AMD     46.9   49.1      1
##  2 2020-01-02 GOOGL   67.4   68.4      1
##  3 2020-01-02 MSFT   159.   161.       1
##  4 2020-01-02 NVDA     5.97   6.00     1
##  5 2020-01-03 AMD     48.0   48.6      1
##  6 2020-01-03 GOOGL   67.4   68.1      1
##  7 2020-01-03 MSFT   158.   159.       1
##  8 2020-01-03 NVDA     5.88   5.90     1
##  9 2020-01-06 AMD     48.0   48.4      1
## 10 2020-01-06 GOOGL   67.6   69.9      1
## # ℹ 5,706 more rows