Import stock prices

stocks <- tq_get(c("GOOG", "GME", "NVDA", "V"),
                 get = "stock.prices",
                 from = "2017-01-01")
stocks
## # A tibble: 7,732 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 GOOG   2017-01-03  38.9  39.5  38.8  39.3 33146000     39.2
##  2 GOOG   2017-01-04  39.4  39.6  39.2  39.3 21460000     39.2
##  3 GOOG   2017-01-05  39.3  39.7  39.3  39.7 26704000     39.6
##  4 GOOG   2017-01-06  39.8  40.4  39.6  40.3 32804000     40.2
##  5 GOOG   2017-01-09  40.3  40.5  40.1  40.3 25492000     40.2
##  6 GOOG   2017-01-10  40.4  40.5  40.2  40.2 23536000     40.1
##  7 GOOG   2017-01-11  40.2  40.4  40.1  40.4 21318000     40.3
##  8 GOOG   2017-01-12  40.4  40.4  40.0  40.3 27062000     40.2
##  9 GOOG   2017-01-13  40.4  40.6  40.3  40.4 21984000     40.3
## 10 GOOG   2017-01-17  40.4  40.4  40.0  40.2 27242000     40.1
## # ℹ 7,722 more rows

Plot stock prices

stocks %>%
    
    ggplot(aes(x = date, y = adjusted, color = symbol)) +
    geom_line()

Apply the dplyr verbs learned in chapter 5

Filter rows

# Filter by adjusted
stocks %>% filter(adjusted > 50.5)
## # A tibble: 3,883 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 GOOG   2017-10-27  50.5  52.4  50.4  51.0 103354000     50.8
##  2 GOOG   2017-10-30  50.7  51.2  50.4  50.9  41702000     50.7
##  3 GOOG   2017-10-31  50.8  51.2  50.5  50.8  26614000     50.7
##  4 GOOG   2017-11-01  50.9  51.5  50.8  51.3  27468000     51.1
##  5 GOOG   2017-11-02  51.1  51.4  50.7  51.3  20980000     51.2
##  6 GOOG   2017-11-03  51.1  51.6  51.0  51.6  21528000     51.5
##  7 GOOG   2017-11-06  51.4  51.7  51.2  51.3  22504000     51.2
##  8 GOOG   2017-11-07  51.4  51.7  51.3  51.7  22246000     51.5
##  9 GOOG   2017-11-08  51.5  52.2  51.4  52.0  21774000     51.9
## 10 GOOG   2017-11-09  51.7  51.7  51.0  51.6  24904000     51.4
## # ℹ 3,873 more rows
# Filter by volume
stocks %>% filter(volume > 100000000)
## # A tibble: 1,996 × 8
##    symbol date         open  high    low  close    volume adjusted
##    <chr>  <date>      <dbl> <dbl>  <dbl>  <dbl>     <dbl>    <dbl>
##  1 GOOG   2017-10-27 50.5   52.4  50.4   51.0   103354000   50.8  
##  2 GOOG   2019-04-30 59.2   59.6  58.8   59.4   124140000   59.3  
##  3 GOOG   2019-06-03 53.3   53.3  51.2   51.8   102612000   51.7  
##  4 GME    2018-03-29  3.16   3.25  3.05   3.15  101847200    2.82 
##  5 GME    2018-06-01  3.20   3.43  3.20   3.43  103296000    3.07 
##  6 GME    2019-01-29  3.10   3.12  2.78   2.82  131481200    2.73 
##  7 GME    2019-04-03  2.21   2.51  2.20   2.41  106391600    2.41 
##  8 GME    2019-06-05  1.37   1.40  1.18   1.26  157416800    1.26 
##  9 GME    2019-08-22  0.993  1.06  0.930  0.967 116709200    0.967
## 10 GME    2019-09-11  1.05   1.24  0.993  1.15  136020000    1.15 
## # ℹ 1,986 more rows
# Filter by symbol
stocks %>% filter(symbol == "GOOG" | symbol == "NVDA")
## # A tibble: 3,866 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 GOOG   2017-01-03  38.9  39.5  38.8  39.3 33146000     39.2
##  2 GOOG   2017-01-04  39.4  39.6  39.2  39.3 21460000     39.2
##  3 GOOG   2017-01-05  39.3  39.7  39.3  39.7 26704000     39.6
##  4 GOOG   2017-01-06  39.8  40.4  39.6  40.3 32804000     40.2
##  5 GOOG   2017-01-09  40.3  40.5  40.1  40.3 25492000     40.2
##  6 GOOG   2017-01-10  40.4  40.5  40.2  40.2 23536000     40.1
##  7 GOOG   2017-01-11  40.2  40.4  40.1  40.4 21318000     40.3
##  8 GOOG   2017-01-12  40.4  40.4  40.0  40.3 27062000     40.2
##  9 GOOG   2017-01-13  40.4  40.6  40.3  40.4 21984000     40.3
## 10 GOOG   2017-01-17  40.4  40.4  40.0  40.2 27242000     40.1
## # ℹ 3,856 more rows

Arrange rows

arrange(stocks, desc(date))
## # A tibble: 7,732 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 GOOG   2024-09-09 154.  155.  148.  150.   28031400    150. 
##  2 GME    2024-09-09  23.2  25.0  23.2  24.2  13998800     24.2
##  3 NVDA   2024-09-09 105.  107.  104.  106.  272998900    106. 
##  4 V      2024-09-09 282.  287.  282.  286.   10932300    286. 
##  5 GOOG   2024-09-06 159.  159.  152.  152.   24999100    152. 
##  6 GME    2024-09-06  22.4  24.5  21.8  23.9  23884000     23.9
##  7 NVDA   2024-09-06 108.  108.  101.  103.  413638100    103. 
##  8 V      2024-09-06 278.  280.  277.  279.    7187700    279. 
##  9 GOOG   2024-09-05 158.  161.  158.  159.   14139500    158. 
## 10 GME    2024-09-05  22    22.7  21.7  22.4   5101200     22.4
## # ℹ 7,722 more rows
arrange(stocks, desc(volume))
## # A tibble: 7,732 × 8
##    symbol date        open  high   low close     volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>      <dbl>    <dbl>
##  1 NVDA   2017-06-09  4.12  4.21  3.57  3.74 3692928000     3.69
##  2 NVDA   2019-01-28  3.41  3.54  3.28  3.45 2511528000     3.42
##  3 NVDA   2017-05-10  2.86  3.05  2.85  3.03 2129096000     2.99
##  4 NVDA   2018-11-16  4.08  4.27  4.04  4.11 1963520000     4.07
##  5 NVDA   2017-05-11  3.00  3.26  3.00  3.16 1939792000     3.12
##  6 NVDA   2018-11-19  4.04  4.05  3.62  3.62 1697820000     3.59
##  7 NVDA   2017-06-12  3.65  3.79  3.55  3.75 1697532000     3.70
##  8 NVDA   2018-11-20  3.35  3.86  3.33  3.73 1692032000     3.69
##  9 NVDA   2017-09-18  4.63  4.78  4.62  4.69 1684460000     4.64
## 10 NVDA   2018-02-09  5.96  5.97  5.44  5.80 1674604000     5.74
## # ℹ 7,722 more rows
arrange(stocks, desc(open))
## # A tibble: 7,732 × 8
##    symbol date        open  high   low close  volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>   <dbl>    <dbl>
##  1 V      2024-03-22  290   290.  283.  283. 8743000     282.
##  2 V      2024-03-21  290.  291.  288.  290. 5057600     289.
##  3 V      2024-03-14  289.  289.  285.  286. 5484500     285.
##  4 V      2024-03-20  288.  290.  287.  289. 6105000     288.
##  5 V      2024-03-19  287.  288.  285.  287. 6595200     286.
##  6 V      2024-02-29  286.  286.  282.  283. 6633700     282.
##  7 V      2024-02-23  285.  286.  284.  284. 5107700     283.
##  8 V      2024-03-13  285.  286.  284.  285. 5783800     284.
##  9 V      2024-02-27  284.  284.  282.  283. 4145200     282.
## 10 V      2024-03-26  284.  284.  278.  281. 8752400     280.
## # ℹ 7,722 more rows
arrange(stocks, desc(close))
## # A tibble: 7,732 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 V      2024-03-21  290.  291.  288.  290.  5057600     289.
##  2 V      2024-03-20  288.  290.  287.  289.  6105000     288.
##  3 V      2024-03-19  287.  288.  285.  287.  6595200     286.
##  4 V      2024-03-14  289.  289.  285.  286.  5484500     285.
##  5 V      2024-02-28  282.  286.  282.  286.  4358800     285.
##  6 V      2024-09-09  282.  287.  282.  286. 10932300     286.
##  7 V      2024-03-13  285.  286.  284.  285.  5783800     284.
##  8 V      2024-03-18  284.  286.  283.  285.  4095600     284.
##  9 V      2024-02-26  283.  285.  283.  284.  3856900     283.
## 10 V      2024-03-12  281.  284.  280.  284.  7988700     283.
## # ℹ 7,722 more rows

Select columns

select(stocks, symbol:open, close)
## # A tibble: 7,732 × 4
##    symbol date        open close
##    <chr>  <date>     <dbl> <dbl>
##  1 GOOG   2017-01-03  38.9  39.3
##  2 GOOG   2017-01-04  39.4  39.3
##  3 GOOG   2017-01-05  39.3  39.7
##  4 GOOG   2017-01-06  39.8  40.3
##  5 GOOG   2017-01-09  40.3  40.3
##  6 GOOG   2017-01-10  40.4  40.2
##  7 GOOG   2017-01-11  40.2  40.4
##  8 GOOG   2017-01-12  40.4  40.3
##  9 GOOG   2017-01-13  40.4  40.4
## 10 GOOG   2017-01-17  40.4  40.2
## # ℹ 7,722 more rows
select(stocks, symbol, adjusted)
## # A tibble: 7,732 × 2
##    symbol adjusted
##    <chr>     <dbl>
##  1 GOOG       39.2
##  2 GOOG       39.2
##  3 GOOG       39.6
##  4 GOOG       40.2
##  5 GOOG       40.2
##  6 GOOG       40.1
##  7 GOOG       40.3
##  8 GOOG       40.2
##  9 GOOG       40.3
## 10 GOOG       40.1
## # ℹ 7,722 more rows
select(stocks, symbol, date, high, low)
## # A tibble: 7,732 × 4
##    symbol date        high   low
##    <chr>  <date>     <dbl> <dbl>
##  1 GOOG   2017-01-03  39.5  38.8
##  2 GOOG   2017-01-04  39.6  39.2
##  3 GOOG   2017-01-05  39.7  39.3
##  4 GOOG   2017-01-06  40.4  39.6
##  5 GOOG   2017-01-09  40.5  40.1
##  6 GOOG   2017-01-10  40.5  40.2
##  7 GOOG   2017-01-11  40.4  40.1
##  8 GOOG   2017-01-12  40.4  40.0
##  9 GOOG   2017-01-13  40.6  40.3
## 10 GOOG   2017-01-17  40.4  40.0
## # ℹ 7,722 more rows
select(stocks, symbol, everything())
## # A tibble: 7,732 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 GOOG   2017-01-03  38.9  39.5  38.8  39.3 33146000     39.2
##  2 GOOG   2017-01-04  39.4  39.6  39.2  39.3 21460000     39.2
##  3 GOOG   2017-01-05  39.3  39.7  39.3  39.7 26704000     39.6
##  4 GOOG   2017-01-06  39.8  40.4  39.6  40.3 32804000     40.2
##  5 GOOG   2017-01-09  40.3  40.5  40.1  40.3 25492000     40.2
##  6 GOOG   2017-01-10  40.4  40.5  40.2  40.2 23536000     40.1
##  7 GOOG   2017-01-11  40.2  40.4  40.1  40.4 21318000     40.3
##  8 GOOG   2017-01-12  40.4  40.4  40.0  40.3 27062000     40.2
##  9 GOOG   2017-01-13  40.4  40.6  40.3  40.4 21984000     40.3
## 10 GOOG   2017-01-17  40.4  40.4  40.0  40.2 27242000     40.1
## # ℹ 7,722 more rows

Add columns

# Using mutate
mutate(stocks,
       gain = high - low) %>%
    select(symbol, gain)
## # A tibble: 7,732 × 2
##    symbol  gain
##    <chr>  <dbl>
##  1 GOOG   0.691
##  2 GOOG   0.409
##  3 GOOG   0.473
##  4 GOOG   0.785
##  5 GOOG   0.357
##  6 GOOG   0.281
##  7 GOOG   0.339
##  8 GOOG   0.411
##  9 GOOG   0.227
## 10 GOOG   0.338
## # ℹ 7,722 more rows
mutate(stocks,
       gain = high - low) %>%
    select(gain)
## # A tibble: 7,732 × 1
##     gain
##    <dbl>
##  1 0.691
##  2 0.409
##  3 0.473
##  4 0.785
##  5 0.357
##  6 0.281
##  7 0.339
##  8 0.411
##  9 0.227
## 10 0.338
## # ℹ 7,722 more rows
# Using transmute
transmute(stocks, 
          gain = high - low)
## # A tibble: 7,732 × 1
##     gain
##    <dbl>
##  1 0.691
##  2 0.409
##  3 0.473
##  4 0.785
##  5 0.357
##  6 0.281
##  7 0.339
##  8 0.411
##  9 0.227
## 10 0.338
## # ℹ 7,722 more rows
# Using lag
select(stocks, volume) %>%
    mutate(volume_lag1 = lag(volume))
## # A tibble: 7,732 × 2
##      volume volume_lag1
##       <dbl>       <dbl>
##  1 33146000          NA
##  2 21460000    33146000
##  3 26704000    21460000
##  4 32804000    26704000
##  5 25492000    32804000
##  6 23536000    25492000
##  7 21318000    23536000
##  8 27062000    21318000
##  9 21984000    27062000
## 10 27242000    21984000
## # ℹ 7,722 more rows
# Using cumsum
select(stocks, open) %>%
    mutate(open_cumsum = cumsum(open))
## # A tibble: 7,732 × 2
##     open open_cumsum
##    <dbl>       <dbl>
##  1  38.9        38.9
##  2  39.4        78.4
##  3  39.3       118. 
##  4  39.8       157. 
##  5  40.3       198. 
##  6  40.4       238. 
##  7  40.2       278. 
##  8  40.4       319. 
##  9  40.4       359. 
## 10  40.4       399. 
## # ℹ 7,722 more rows

Summarize with groups

stocks %>% 
    
    # Group by stockholder
    group_by(symbol) %>%
    
    # Calculate average volume
    summarize(volume = mean(volume, na.rm = TRUE)) %>%
    
    # Sort it
    arrange(volume)
## # A tibble: 4 × 2
##   symbol     volume
##   <chr>       <dbl>
## 1 V        7892862.
## 2 GME     21444741.
## 3 GOOG    29124573.
## 4 NVDA   500866680.

Collapsing data into a single row

stocks
## # A tibble: 7,732 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 GOOG   2017-01-03  38.9  39.5  38.8  39.3 33146000     39.2
##  2 GOOG   2017-01-04  39.4  39.6  39.2  39.3 21460000     39.2
##  3 GOOG   2017-01-05  39.3  39.7  39.3  39.7 26704000     39.6
##  4 GOOG   2017-01-06  39.8  40.4  39.6  40.3 32804000     40.2
##  5 GOOG   2017-01-09  40.3  40.5  40.1  40.3 25492000     40.2
##  6 GOOG   2017-01-10  40.4  40.5  40.2  40.2 23536000     40.1
##  7 GOOG   2017-01-11  40.2  40.4  40.1  40.4 21318000     40.3
##  8 GOOG   2017-01-12  40.4  40.4  40.0  40.3 27062000     40.2
##  9 GOOG   2017-01-13  40.4  40.6  40.3  40.4 21984000     40.3
## 10 GOOG   2017-01-17  40.4  40.4  40.0  40.2 27242000     40.1
## # ℹ 7,722 more rows
# average volume for all stocks 
summarize(stocks, volume = mean(volume, na.rm = TRUE))
## # A tibble: 1 × 1
##       volume
##        <dbl>
## 1 139832214.

Grouping multiple values

stocks %>%
    group_by(symbol, date, volume) %>%
    summarize(count = n()) %>%
    ungroup()
## `summarise()` has grouped output by 'symbol', 'date'. You can override using
## the `.groups` argument.
## # A tibble: 7,732 × 4
##    symbol date         volume count
##    <chr>  <date>        <dbl> <int>
##  1 GME    2017-01-03  7765600     1
##  2 GME    2017-01-04  8977200     1
##  3 GME    2017-01-05 12150800     1
##  4 GME    2017-01-06  9334400     1
##  5 GME    2017-01-09  6586800     1
##  6 GME    2017-01-10  8972400     1
##  7 GME    2017-01-11 10173600     1
##  8 GME    2017-01-12 10572800     1
##  9 GME    2017-01-13 59295600     1
## 10 GME    2017-01-17 19933600     1
## # ℹ 7,722 more rows