Import stock prices

stocks <- tq_get(c("VOOG", "NVDA"),
                 get = "stock.prices",
                 from = "2019-01-01",
                 to = "2022-01-01")
stocks
## # A tibble: 1,514 × 8
##    symbol date        open  high   low close volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>  <dbl>    <dbl>
##  1 VOOG   2019-01-02  133.  135.  133.  135. 239200     129.
##  2 VOOG   2019-01-03  134.  134.  131.  131. 177400     126.
##  3 VOOG   2019-01-04  133.  137.  133.  136. 198400     130.
##  4 VOOG   2019-01-07  136.  138.  136.  137. 262000     131.
##  5 VOOG   2019-01-08  139.  139.  137.  139. 178600     133.
##  6 VOOG   2019-01-09  139.  140.  138.  139. 207000     133.
##  7 VOOG   2019-01-10  139.  140.  138.  140. 156200     134.
##  8 VOOG   2019-01-11  139.  140.  139.  140. 123400     134.
##  9 VOOG   2019-01-14  139.  139.  138.  139.  75500     133.
## 10 VOOG   2019-01-15  139.  141.  139.  141. 128500     135.
## # ℹ 1,504 more rows

Filter rows

filter(stocks, high == 1, low == 1)
## # A tibble: 0 × 8
## # ℹ 8 variables: symbol <chr>, date <date>, open <dbl>, high <dbl>, low <dbl>,
## #   close <dbl>, volume <dbl>, adjusted <dbl>

Arrange rows

arrange(stocks, desc(high), desc(date()))
## # A tibble: 1,514 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 NVDA   2021-11-22  335.  346.  319   320. 75433500     319.
##  2 NVDA   2021-11-29  324.  334.  320.  334. 45496400     333.
##  3 NVDA   2021-11-30  332.  334.  319.  327. 62206600     326.
##  4 NVDA   2021-12-01  332.  333.  314.  314. 48436800     314.
##  5 NVDA   2021-11-19  322.  331.  319.  330. 53386700     329.
##  6 NVDA   2021-11-24  315.  329.  309.  327. 43516200     326.
##  7 NVDA   2021-11-18  324.  328.  313.  317. 78171100     316.
##  8 NVDA   2021-11-26  326   327.  314.  315. 28306900     315.
##  9 NVDA   2021-12-02  312.  325.  310.  321. 47289000     321.
## 10 NVDA   2021-12-07  310.  324.  307.  324. 59305300     324.
## # ℹ 1,504 more rows

Select columns

select(stocks, low:volume)
## # A tibble: 1,514 × 3
##      low close volume
##    <dbl> <dbl>  <dbl>
##  1  133.  135. 239200
##  2  131.  131. 177400
##  3  133.  136. 198400
##  4  136.  137. 262000
##  5  137.  139. 178600
##  6  138.  139. 207000
##  7  138.  140. 156200
##  8  139.  140. 123400
##  9  138.  139.  75500
## 10  139.  141. 128500
## # ℹ 1,504 more rows
select(stocks, high, volume)
## # A tibble: 1,514 × 2
##     high volume
##    <dbl>  <dbl>
##  1  135. 239200
##  2  134. 177400
##  3  137. 198400
##  4  138. 262000
##  5  139. 178600
##  6  140. 207000
##  7  140. 156200
##  8  140. 123400
##  9  139.  75500
## 10  141. 128500
## # ℹ 1,504 more rows
select(stocks,date,open)
## # A tibble: 1,514 × 2
##    date        open
##    <date>     <dbl>
##  1 2019-01-02  133.
##  2 2019-01-03  134.
##  3 2019-01-04  133.
##  4 2019-01-07  136.
##  5 2019-01-08  139.
##  6 2019-01-09  139.
##  7 2019-01-10  139.
##  8 2019-01-11  139.
##  9 2019-01-14  139.
## 10 2019-01-15  139.
## # ℹ 1,504 more rows
select(stocks,low,high, starts_with("dep"))
## # A tibble: 1,514 × 2
##      low  high
##    <dbl> <dbl>
##  1  133.  135.
##  2  131.  134.
##  3  133.  137.
##  4  136.  138.
##  5  137.  139.
##  6  138.  140.
##  7  138.  140.
##  8  139.  140.
##  9  138.  139.
## 10  139.  141.
## # ℹ 1,504 more rows

Add columns

mutate(stocks,
       high = date - volume) %>%
    
    # Select year, month, day, and high
    select(symbol:date, high)
## # A tibble: 1,514 × 3
##    symbol date       high      
##    <chr>  <date>     <date>    
##  1 VOOG   2019-01-02 1364-02-05
##  2 VOOG   2019-01-03 1533-04-21
##  3 VOOG   2019-01-04 1475-10-23
##  4 VOOG   2019-01-07 1301-09-08
##  5 VOOG   2019-01-08 1530-01-12
##  6 VOOG   2019-01-09 1452-04-11
##  7 VOOG   2019-01-10 1591-05-14
##  8 VOOG   2019-01-11 1681-03-03
##  9 VOOG   2019-01-14 1812-04-29
## 10 VOOG   2019-01-15 1667-03-21
## # ℹ 1,504 more rows
# Just keep high
mutate(stocks,
       high = date - low) %>%
    
    # Select year, month, day, and gain
    select(low)
## # A tibble: 1,514 × 1
##      low
##    <dbl>
##  1  133.
##  2  131.
##  3  133.
##  4  136.
##  5  137.
##  6  138.
##  7  138.
##  8  139.
##  9  138.
## 10  139.
## # ℹ 1,504 more rows
# alternative using transmute()
transmute(stocks,
          high = date- low)
## # A tibble: 1,514 × 1
##    high      
##    <date>    
##  1 2018-08-22
##  2 2018-08-24
##  3 2018-08-23
##  4 2018-08-24
##  5 2018-08-23
##  6 2018-08-23
##  7 2018-08-25
##  8 2018-08-25
##  9 2018-08-28
## 10 2018-08-28
## # ℹ 1,504 more rows

Summarize by groups

Collapsing data to a single row

stocks
## # A tibble: 1,514 × 8
##    symbol date        open  high   low close volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>  <dbl>    <dbl>
##  1 VOOG   2019-01-02  133.  135.  133.  135. 239200     129.
##  2 VOOG   2019-01-03  134.  134.  131.  131. 177400     126.
##  3 VOOG   2019-01-04  133.  137.  133.  136. 198400     130.
##  4 VOOG   2019-01-07  136.  138.  136.  137. 262000     131.
##  5 VOOG   2019-01-08  139.  139.  137.  139. 178600     133.
##  6 VOOG   2019-01-09  139.  140.  138.  139. 207000     133.
##  7 VOOG   2019-01-10  139.  140.  138.  140. 156200     134.
##  8 VOOG   2019-01-11  139.  140.  139.  140. 123400     134.
##  9 VOOG   2019-01-14  139.  139.  138.  139.  75500     133.
## 10 VOOG   2019-01-15  139.  141.  139.  141. 128500     135.
## # ℹ 1,504 more rows
# average stock price
summarise(stocks, date = mean(high, na.rm = TRUE))
## # A tibble: 1 × 1
##    date
##   <dbl>
## 1  160.

Summarize by group

stocks %>%
    
    # Group by stock
    group_by(symbol) %>%
    
    # Calculate average high
    summarise(high = mean(date, na.rm = TRUE)) %>%
    
    # Sort it
    arrange(high)
## # A tibble: 2 × 2
##   symbol high      
##   <chr>  <date>    
## 1 NVDA   2020-07-02
## 2 VOOG   2020-07-02

grouping the mean high and 52 week low

stocks %>%
    group_by(low) %>%
    summarise(count = n(),
              l = mean(low, na.rm = TRUE),
              hi= mean(high, na.rm = TRUE)) %>%
    
    # Plot
    ggplot(mapping = aes(x = l, y = hi)) +
    geom_point(aes(size = count), alpha = 0.3) +
    geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

Missing values

stocks %>%
    
    # Remove missing values
    filter(!is.na(high))
## # A tibble: 1,514 × 8
##    symbol date        open  high   low close volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>  <dbl>    <dbl>
##  1 VOOG   2019-01-02  133.  135.  133.  135. 239200     129.
##  2 VOOG   2019-01-03  134.  134.  131.  131. 177400     126.
##  3 VOOG   2019-01-04  133.  137.  133.  136. 198400     130.
##  4 VOOG   2019-01-07  136.  138.  136.  137. 262000     131.
##  5 VOOG   2019-01-08  139.  139.  137.  139. 178600     133.
##  6 VOOG   2019-01-09  139.  140.  138.  139. 207000     133.
##  7 VOOG   2019-01-10  139.  140.  138.  140. 156200     134.
##  8 VOOG   2019-01-11  139.  140.  139.  140. 123400     134.
##  9 VOOG   2019-01-14  139.  139.  138.  139.  75500     133.
## 10 VOOG   2019-01-15  139.  141.  139.  141. 128500     135.
## # ℹ 1,504 more rows

useful summary functions

grouping multiple variables

stocks %>%
    group_by(date, high, low) %>%
    summarise(count = n()) %>%
    ungroup()
## `summarise()` has grouped output by 'date', 'high'. You can override using the
## `.groups` argument.
## # A tibble: 1,514 × 4
##    date        high   low count
##    <date>     <dbl> <dbl> <int>
##  1 2019-01-02  34.6  32.5     1
##  2 2019-01-02 135.  133.      1
##  3 2019-01-03  33.8  31.9     1
##  4 2019-01-03 134.  131.      1
##  5 2019-01-04  34.4  32.4     1
##  6 2019-01-04 137.  133.      1
##  7 2019-01-07  36.2  34.1     1
##  8 2019-01-07 138.  136.      1
##  9 2019-01-08  36.7  34.2     1
## 10 2019-01-08 139.  137.      1
## # ℹ 1,504 more rows

ungrouping