Import stock prices

stocks <- tq_get(c("NVDA", "AMD", "MSFT"),
                 get = "stock.prices",
                 from = "2019-09-10",
                 to = "2023-09-10")
stocks
## # A tibble: 3,021 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 NVDA   2019-09-10  44.8  46.1  44.7  45.8 35281200     45.6
##  2 NVDA   2019-09-11  45.9  46.6  45.7  46.1 36151200     45.9
##  3 NVDA   2019-09-12  46.5  47.1  46.0  46.1 32784000     45.9
##  4 NVDA   2019-09-13  45.4  45.8  45.0  45.5 32459200     45.3
##  5 NVDA   2019-09-16  44.7  45.4  44.6  45.1 23183200     44.8
##  6 NVDA   2019-09-17  45.1  45.3  44.7  45.3 22304800     45.1
##  7 NVDA   2019-09-18  45.2  45.4  44.1  45.0 25815200     44.8
##  8 NVDA   2019-09-19  45.1  45.2  44.1  44.2 24547600     44.0
##  9 NVDA   2019-09-20  44.2  44.5  43.1  43.2 35024400     43.0
## 10 NVDA   2019-09-23  43.2  44.2  43.1  43.7 24971200     43.5
## # ℹ 3,011 more rows

Plot stock prices

stocks %>%
    
    ggplot(aes(x = date, y = adjusted, color = symbol)) +
    geom_line()

Apply the dyplr verbs I learned in ch5

Filter rows

stocks %>% filter(adjusted < 24)
## # A tibble: 0 × 8
## # ℹ 8 variables: symbol <chr>, date <date>, open <dbl>, high <dbl>, low <dbl>,
## #   close <dbl>, volume <dbl>, adjusted <dbl>
filter(stocks, symbol == 1, date == 1)
## # A tibble: 0 × 8
## # ℹ 8 variables: symbol <chr>, date <date>, open <dbl>, high <dbl>, low <dbl>,
## #   close <dbl>, volume <dbl>, adjusted <dbl>

Arange rows

arrange(stocks, open, low, high)
## # A tibble: 3,021 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 AMD    2019-10-03  28.3  28.8  27.4  28.7 56325200     28.7
##  2 AMD    2019-10-02  28.5  28.9  28.0  28.3 43640700     28.3
##  3 AMD    2019-10-10  28.5  28.9  28    28.4 40660100     28.4
##  4 AMD    2019-10-08  28.5  28.6  27.9  28.2 44265700     28.2
##  5 AMD    2019-10-09  28.8  29.0  28.4  28.5 35661500     28.5
##  6 AMD    2019-10-04  28.8  29.1  28.3  29.0 34446700     29.0
##  7 AMD    2019-10-11  28.8  30.2  28.8  29.8 66256900     29.8
##  8 AMD    2019-09-30  28.9  29.0  28.4  29.0 37295600     29.0
##  9 AMD    2019-10-07  28.9  29.6  28.8  28.9 39337200     28.9
## 10 AMD    2019-10-01  29.0  29.5  28.6  28.8 37081800     28.8
## # ℹ 3,011 more rows
arrange(stocks, low, high, open)
## # A tibble: 3,021 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 AMD    2019-10-03  28.3  28.8  27.4  28.7 56325200     28.7
##  2 AMD    2019-10-08  28.5  28.6  27.9  28.2 44265700     28.2
##  3 AMD    2019-10-10  28.5  28.9  28    28.4 40660100     28.4
##  4 AMD    2019-10-02  28.5  28.9  28.0  28.3 43640700     28.3
##  5 AMD    2019-10-04  28.8  29.1  28.3  29.0 34446700     29.0
##  6 AMD    2019-10-09  28.8  29.0  28.4  28.5 35661500     28.5
##  7 AMD    2019-09-27  29.2  29.8  28.4  28.7 54091100     28.7
##  8 AMD    2019-09-30  28.9  29.0  28.4  29.0 37295600     29.0
##  9 AMD    2019-09-25  29.5  29.6  28.5  29.5 58234800     29.5
## 10 AMD    2019-10-01  29.0  29.5  28.6  28.8 37081800     28.8
## # ℹ 3,011 more rows

Select colums

select (stocks, open, low, high)
## # A tibble: 3,021 × 3
##     open   low  high
##    <dbl> <dbl> <dbl>
##  1  44.8  44.7  46.1
##  2  45.9  45.7  46.6
##  3  46.5  46.0  47.1
##  4  45.4  45.0  45.8
##  5  44.7  44.6  45.4
##  6  45.1  44.7  45.3
##  7  45.2  44.1  45.4
##  8  45.1  44.1  45.2
##  9  44.2  43.1  44.5
## 10  43.2  43.1  44.2
## # ℹ 3,011 more rows
select(stocks, low:high)
## # A tibble: 3,021 × 2
##      low  high
##    <dbl> <dbl>
##  1  44.7  46.1
##  2  45.7  46.6
##  3  46.0  47.1
##  4  45.0  45.8
##  5  44.6  45.4
##  6  44.7  45.3
##  7  44.1  45.4
##  8  44.1  45.2
##  9  43.1  44.5
## 10  43.1  44.2
## # ℹ 3,011 more rows
select(stocks, -(low:high))
## # A tibble: 3,021 × 6
##    symbol date        open close   volume adjusted
##    <chr>  <date>     <dbl> <dbl>    <dbl>    <dbl>
##  1 NVDA   2019-09-10  44.8  45.8 35281200     45.6
##  2 NVDA   2019-09-11  45.9  46.1 36151200     45.9
##  3 NVDA   2019-09-12  46.5  46.1 32784000     45.9
##  4 NVDA   2019-09-13  45.4  45.5 32459200     45.3
##  5 NVDA   2019-09-16  44.7  45.1 23183200     44.8
##  6 NVDA   2019-09-17  45.1  45.3 22304800     45.1
##  7 NVDA   2019-09-18  45.2  45.0 25815200     44.8
##  8 NVDA   2019-09-19  45.1  44.2 24547600     44.0
##  9 NVDA   2019-09-20  44.2  43.2 35024400     43.0
## 10 NVDA   2019-09-23  43.2  43.7 24971200     43.5
## # ℹ 3,011 more rows

Add colums

stocks_sml <- select(stocks, 
  low:high, 
  ends_with("colse"), 
  date, 
  volume
)
mutate(stocks_sml,
  average = high + low / 2,
) 
## # A tibble: 3,021 × 5
##      low  high date         volume average
##    <dbl> <dbl> <date>        <dbl>   <dbl>
##  1  44.7  46.1 2019-09-10 35281200    68.4
##  2  45.7  46.6 2019-09-11 36151200    69.4
##  3  46.0  47.1 2019-09-12 32784000    70.1
##  4  45.0  45.8 2019-09-13 32459200    68.3
##  5  44.6  45.4 2019-09-16 23183200    67.7
##  6  44.7  45.3 2019-09-17 22304800    67.6
##  7  44.1  45.4 2019-09-18 25815200    67.4
##  8  44.1  45.2 2019-09-19 24547600    67.3
##  9  43.1  44.5 2019-09-20 35024400    66.0
## 10  43.1  44.2 2019-09-23 24971200    65.7
## # ℹ 3,011 more rows

Summarise with groups

stocks %>%
    group_by(open, close) %>%
    summarise(count = n(),
              high = mean(high, nr.rm = TRUE),
              low = mean(low, na.rm = TRUE)) %>%
    
    #Plot
    ggplot(mapping = aes(x = high, y = low)) +
    geom_point(aes(size = count), alpha = 0.3) +
    geom_smooth(se = FALSE) 
## `summarise()` has grouped output by 'open'. You can override using the
## `.groups` argument.
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

stocks %>%
    # Remove missing values
    filter(!is.na(close))
## # A tibble: 3,021 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 NVDA   2019-09-10  44.8  46.1  44.7  45.8 35281200     45.6
##  2 NVDA   2019-09-11  45.9  46.6  45.7  46.1 36151200     45.9
##  3 NVDA   2019-09-12  46.5  47.1  46.0  46.1 32784000     45.9
##  4 NVDA   2019-09-13  45.4  45.8  45.0  45.5 32459200     45.3
##  5 NVDA   2019-09-16  44.7  45.4  44.6  45.1 23183200     44.8
##  6 NVDA   2019-09-17  45.1  45.3  44.7  45.3 22304800     45.1
##  7 NVDA   2019-09-18  45.2  45.4  44.1  45.0 25815200     44.8
##  8 NVDA   2019-09-19  45.1  45.2  44.1  44.2 24547600     44.0
##  9 NVDA   2019-09-20  44.2  44.5  43.1  43.2 35024400     43.0
## 10 NVDA   2019-09-23  43.2  44.2  43.1  43.7 24971200     43.5
## # ℹ 3,011 more rows
stocks %>%
    group_by(open, close) %>%
    summarise(count = n()) %>%
    ungroup()
## `summarise()` has grouped output by 'open'. You can override using the
## `.groups` argument.
## # A tibble: 3,021 × 3
##     open close count
##    <dbl> <dbl> <int>
##  1  28.3  28.7     1
##  2  28.5  28.3     1
##  3  28.5  28.4     1
##  4  28.5  28.2     1
##  5  28.8  28.5     1
##  6  28.8  29.0     1
##  7  28.8  29.8     1
##  8  28.9  29.0     1
##  9  28.9  28.9     1
## 10  29.0  28.8     1
## # ℹ 3,011 more rows