Import stock prices

stocks <- tq_get(c("AAPL", "MSFT","META"),
                 get = "stock.prices",
                 from = "2018-01-01",
                 to = "2021-01-01")
stocks
## # A tibble: 2,268 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 AAPL   2018-01-02  42.5  43.1  42.3  43.1 102223600     41.0
##  2 AAPL   2018-01-03  43.1  43.6  43.0  43.1 118071600     41.0
##  3 AAPL   2018-01-04  43.1  43.4  43.0  43.3  89738400     41.2
##  4 AAPL   2018-01-05  43.4  43.8  43.3  43.8  94640000     41.7
##  5 AAPL   2018-01-08  43.6  43.9  43.5  43.6  82271200     41.5
##  6 AAPL   2018-01-09  43.6  43.8  43.4  43.6  86336000     41.5
##  7 AAPL   2018-01-10  43.3  43.6  43.2  43.6  95839600     41.5
##  8 AAPL   2018-01-11  43.6  43.9  43.6  43.8  74670800     41.7
##  9 AAPL   2018-01-12  44.0  44.3  43.9  44.3 101672400     42.2
## 10 AAPL   2018-01-16  44.5  44.8  44.0  44.0 118263600     42.0
## # … with 2,258 more rows

Plot stock prices

stocks %>%
    
    ggplot(aes(x = date, y = adjusted, color = symbol)) +
    geom_line()

Filter rows

stocks %>% filter(adjusted < 41)
## # A tibble: 71 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 AAPL   2018-01-25  43.6  43.7  42.6  42.8 166116000     40.7
##  2 AAPL   2018-01-26  43    43    42.5  42.9 156572000     40.8
##  3 AAPL   2018-01-29  42.5  42.5  41.8  42.0 202561600     40.0
##  4 AAPL   2018-01-30  41.4  41.8  41.2  41.7 184192800     39.8
##  5 AAPL   2018-01-31  41.7  42.1  41.6  41.9 129915600     39.9
##  6 AAPL   2018-02-01  41.8  42.2  41.7  41.9 188923200     40.0
##  7 AAPL   2018-02-02  41.5  41.7  40.0  40.1 346375200     38.2
##  8 AAPL   2018-02-05  39.8  41.0  39    39.1 290954000     37.3
##  9 AAPL   2018-02-06  38.7  40.9  38.5  40.8 272975200     38.8
## 10 AAPL   2018-02-07  40.8  40.8  39.8  39.9 206434400     38.0
## # … with 61 more rows
stocks %>% filter(adjusted < 42)
## # A tibble: 114 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 AAPL   2018-01-02  42.5  43.1  42.3  43.1 102223600     41.0
##  2 AAPL   2018-01-03  43.1  43.6  43.0  43.1 118071600     41.0
##  3 AAPL   2018-01-04  43.1  43.4  43.0  43.3  89738400     41.2
##  4 AAPL   2018-01-05  43.4  43.8  43.3  43.8  94640000     41.7
##  5 AAPL   2018-01-08  43.6  43.9  43.5  43.6  82271200     41.5
##  6 AAPL   2018-01-09  43.6  43.8  43.4  43.6  86336000     41.5
##  7 AAPL   2018-01-10  43.3  43.6  43.2  43.6  95839600     41.5
##  8 AAPL   2018-01-11  43.6  43.9  43.6  43.8  74670800     41.7
##  9 AAPL   2018-01-16  44.5  44.8  44.0  44.0 118263600     42.0
## 10 AAPL   2018-01-24  44.3  44.3  43.3  43.6 204420400     41.5
## # … with 104 more rows

Arrange rows

arrange(stocks, desc(open), desc(close))
## # A tibble: 2,268 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 META   2020-08-27  300.  301.  292.  293. 30301300     293.
##  2 META   2020-09-02  299.  304.  293.  302. 24341400     302.
##  3 META   2020-09-03  296.  298.  284.  291. 32294100     291.
##  4 META   2020-08-28  295   297.  291.  294. 17157400     294.
##  5 META   2020-09-01  295.  301.  293.  295. 17295900     295.
##  6 META   2020-11-06  294.  295.  288.  293. 13891000     293.
##  7 META   2020-08-31  294.  297.  292.  293. 17345100     293.
##  8 META   2020-11-05  292.  297.  289.  295. 23823600     295.
##  9 META   2020-11-09  290.  293.  279.  279. 25117700     279.
## 10 META   2020-09-04  287.  289   271.  283. 30333700     283.
## # … with 2,258 more rows
arrange(stocks, desc(low), desc(high))
## # A tibble: 2,268 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 META   2020-09-02  299.  304.  293.  302. 24341400     302.
##  2 META   2020-09-01  295.  301.  293.  295. 17295900     295.
##  3 META   2020-08-27  300.  301.  292.  293. 30301300     293.
##  4 META   2020-08-31  294.  297.  292.  293. 17345100     293.
##  5 META   2020-08-28  295   297.  291.  294. 17157400     294.
##  6 META   2020-11-05  292.  297.  289.  295. 23823600     295.
##  7 META   2020-11-06  294.  295.  288.  293. 13891000     293.
##  8 META   2020-08-26  284   305.  284   304. 69015200     304.
##  9 META   2020-09-03  296.  298.  284.  291. 32294100     291.
## 10 META   2020-12-08  286.  286.  282.  283. 10747700     283.
## # … with 2,258 more rows

Select columns

select(stocks, open:close) 
## # A tibble: 2,268 × 4
##     open  high   low close
##    <dbl> <dbl> <dbl> <dbl>
##  1  42.5  43.1  42.3  43.1
##  2  43.1  43.6  43.0  43.1
##  3  43.1  43.4  43.0  43.3
##  4  43.4  43.8  43.3  43.8
##  5  43.6  43.9  43.5  43.6
##  6  43.6  43.8  43.4  43.6
##  7  43.3  43.6  43.2  43.6
##  8  43.6  43.9  43.6  43.8
##  9  44.0  44.3  43.9  44.3
## 10  44.5  44.8  44.0  44.0
## # … with 2,258 more rows
select(stocks, open, high, low, close)
## # A tibble: 2,268 × 4
##     open  high   low close
##    <dbl> <dbl> <dbl> <dbl>
##  1  42.5  43.1  42.3  43.1
##  2  43.1  43.6  43.0  43.1
##  3  43.1  43.4  43.0  43.3
##  4  43.4  43.8  43.3  43.8
##  5  43.6  43.9  43.5  43.6
##  6  43.6  43.8  43.4  43.6
##  7  43.3  43.6  43.2  43.6
##  8  43.6  43.9  43.6  43.8
##  9  44.0  44.3  43.9  44.3
## 10  44.5  44.8  44.0  44.0
## # … with 2,258 more rows
select(stocks, open, high, low, close, adjusted)
## # A tibble: 2,268 × 5
##     open  high   low close adjusted
##    <dbl> <dbl> <dbl> <dbl>    <dbl>
##  1  42.5  43.1  42.3  43.1     41.0
##  2  43.1  43.6  43.0  43.1     41.0
##  3  43.1  43.4  43.0  43.3     41.2
##  4  43.4  43.8  43.3  43.8     41.7
##  5  43.6  43.9  43.5  43.6     41.5
##  6  43.6  43.8  43.4  43.6     41.5
##  7  43.3  43.6  43.2  43.6     41.5
##  8  43.6  43.9  43.6  43.8     41.7
##  9  44.0  44.3  43.9  44.3     42.2
## 10  44.5  44.8  44.0  44.0     42.0
## # … with 2,258 more rows
select(stocks, open, high, low, starts_with("open"))
## # A tibble: 2,268 × 3
##     open  high   low
##    <dbl> <dbl> <dbl>
##  1  42.5  43.1  42.3
##  2  43.1  43.6  43.0
##  3  43.1  43.4  43.0
##  4  43.4  43.8  43.3
##  5  43.6  43.9  43.5
##  6  43.6  43.8  43.4
##  7  43.3  43.6  43.2
##  8  43.6  43.9  43.6
##  9  44.0  44.3  43.9
## 10  44.5  44.8  44.0
## # … with 2,258 more rows
select(stocks, open, high, low, contains("time"))
## # A tibble: 2,268 × 3
##     open  high   low
##    <dbl> <dbl> <dbl>
##  1  42.5  43.1  42.3
##  2  43.1  43.6  43.0
##  3  43.1  43.4  43.0
##  4  43.4  43.8  43.3
##  5  43.6  43.9  43.5
##  6  43.6  43.8  43.4
##  7  43.3  43.6  43.2
##  8  43.6  43.9  43.6
##  9  44.0  44.3  43.9
## 10  44.5  44.8  44.0
## # … with 2,258 more rows
select(stocks, open, high, low, ends_with("time"))
## # A tibble: 2,268 × 3
##     open  high   low
##    <dbl> <dbl> <dbl>
##  1  42.5  43.1  42.3
##  2  43.1  43.6  43.0
##  3  43.1  43.4  43.0
##  4  43.4  43.8  43.3
##  5  43.6  43.9  43.5
##  6  43.6  43.8  43.4
##  7  43.3  43.6  43.2
##  8  43.6  43.9  43.6
##  9  44.0  44.3  43.9
## 10  44.5  44.8  44.0
## # … with 2,258 more rows
select(stocks, open, high, low, ends_with("time"))
## # A tibble: 2,268 × 3
##     open  high   low
##    <dbl> <dbl> <dbl>
##  1  42.5  43.1  42.3
##  2  43.1  43.6  43.0
##  3  43.1  43.4  43.0
##  4  43.4  43.8  43.3
##  5  43.6  43.9  43.5
##  6  43.6  43.8  43.4
##  7  43.3  43.6  43.2
##  8  43.6  43.9  43.6
##  9  44.0  44.3  43.9
## 10  44.5  44.8  44.0
## # … with 2,258 more rows

Add Column

mutate(stocks,
       gain = high - low) %>%
   
    # Select year, month, day, and gain
    select(open:close, gain)
## # A tibble: 2,268 × 5
##     open  high   low close  gain
##    <dbl> <dbl> <dbl> <dbl> <dbl>
##  1  42.5  43.1  42.3  43.1 0.760
##  2  43.1  43.6  43.0  43.1 0.647
##  3  43.1  43.4  43.0  43.3 0.347
##  4  43.4  43.8  43.3  43.8 0.580
##  5  43.6  43.9  43.5  43.6 0.420
##  6  43.6  43.8  43.4  43.6 0.412
##  7  43.3  43.6  43.2  43.6 0.325
##  8  43.6  43.9  43.6  43.8 0.25 
##  9  44.0  44.3  43.9  44.3 0.428
## 10  44.5  44.8  44.0  44.0 0.812
## # … with 2,258 more rows
# Just keep gain.
mutate(stocks,
       gain = high - low) %>%
   
    # Select year, month, day, and gain
    select(gain)
## # A tibble: 2,268 × 1
##     gain
##    <dbl>
##  1 0.760
##  2 0.647
##  3 0.347
##  4 0.580
##  5 0.420
##  6 0.412
##  7 0.325
##  8 0.25 
##  9 0.428
## 10 0.812
## # … with 2,258 more rows
# Alternative using transmute()
transmute(stocks,
          gain= high - low)
## # A tibble: 2,268 × 1
##     gain
##    <dbl>
##  1 0.760
##  2 0.647
##  3 0.347
##  4 0.580
##  5 0.420
##  6 0.412
##  7 0.325
##  8 0.25 
##  9 0.428
## 10 0.812
## # … with 2,258 more rows

Summarise with groups

Collapsing data to a single row

stocks
## # A tibble: 2,268 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 AAPL   2018-01-02  42.5  43.1  42.3  43.1 102223600     41.0
##  2 AAPL   2018-01-03  43.1  43.6  43.0  43.1 118071600     41.0
##  3 AAPL   2018-01-04  43.1  43.4  43.0  43.3  89738400     41.2
##  4 AAPL   2018-01-05  43.4  43.8  43.3  43.8  94640000     41.7
##  5 AAPL   2018-01-08  43.6  43.9  43.5  43.6  82271200     41.5
##  6 AAPL   2018-01-09  43.6  43.8  43.4  43.6  86336000     41.5
##  7 AAPL   2018-01-10  43.3  43.6  43.2  43.6  95839600     41.5
##  8 AAPL   2018-01-11  43.6  43.9  43.6  43.8  74670800     41.7
##  9 AAPL   2018-01-12  44.0  44.3  43.9  44.3 101672400     42.2
## 10 AAPL   2018-01-16  44.5  44.8  44.0  44.0 118263600     42.0
## # … with 2,258 more rows
# average high of APPL, MSFT, META 
summarise(stocks, high = mean(high, na.rm = TRUE))
## # A tibble: 1 × 1
##    high
##   <dbl>
## 1  136.

Missing values

stocks %>%
    
    # Remove missing values
    filter(!is.na(high))
## # A tibble: 2,268 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 AAPL   2018-01-02  42.5  43.1  42.3  43.1 102223600     41.0
##  2 AAPL   2018-01-03  43.1  43.6  43.0  43.1 118071600     41.0
##  3 AAPL   2018-01-04  43.1  43.4  43.0  43.3  89738400     41.2
##  4 AAPL   2018-01-05  43.4  43.8  43.3  43.8  94640000     41.7
##  5 AAPL   2018-01-08  43.6  43.9  43.5  43.6  82271200     41.5
##  6 AAPL   2018-01-09  43.6  43.8  43.4  43.6  86336000     41.5
##  7 AAPL   2018-01-10  43.3  43.6  43.2  43.6  95839600     41.5
##  8 AAPL   2018-01-11  43.6  43.9  43.6  43.8  74670800     41.7
##  9 AAPL   2018-01-12  44.0  44.3  43.9  44.3 101672400     42.2
## 10 AAPL   2018-01-16  44.5  44.8  44.0  44.0 118263600     42.0
## # … with 2,258 more rows

Grouping multiple variables

stocks %>%
    group_by(open, high, low) %>%
    summarise(count = n()) %>%
    ungroup()
## `summarise()` has grouped output by 'open', 'high'. You can override using the
## `.groups` argument.
## # A tibble: 2,268 × 4
##     open  high   low count
##    <dbl> <dbl> <dbl> <int>
##  1  36.0  36.4  35.5     1
##  2  36.1  37.1  36.0     1
##  3  37.0  37.9  36.6     1
##  4  37.1  39.3  36.7     1
##  5  37.2  37.2  36.5     1
##  6  37.4  38.0  37.1     1
##  7  37.6  38.3  37.5     1
##  8  37.7  37.8  37.3     1
##  9  37.8  38.6  37.4     1
## 10  38.1  38.5  37.7     1
## # … with 2,258 more rows

Mutate by groups

Find the worst members of each group

stocks %>% 
  group_by(open, high, low) %>%
  filter(rank(desc(low)) < 10)
## # A tibble: 2,268 × 8
## # Groups:   open, high, low [2,268]
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 AAPL   2018-01-02  42.5  43.1  42.3  43.1 102223600     41.0
##  2 AAPL   2018-01-03  43.1  43.6  43.0  43.1 118071600     41.0
##  3 AAPL   2018-01-04  43.1  43.4  43.0  43.3  89738400     41.2
##  4 AAPL   2018-01-05  43.4  43.8  43.3  43.8  94640000     41.7
##  5 AAPL   2018-01-08  43.6  43.9  43.5  43.6  82271200     41.5
##  6 AAPL   2018-01-09  43.6  43.8  43.4  43.6  86336000     41.5
##  7 AAPL   2018-01-10  43.3  43.6  43.2  43.6  95839600     41.5
##  8 AAPL   2018-01-11  43.6  43.9  43.6  43.8  74670800     41.7
##  9 AAPL   2018-01-12  44.0  44.3  43.9  44.3 101672400     42.2
## 10 AAPL   2018-01-16  44.5  44.8  44.0  44.0 118263600     42.0
## # … with 2,258 more rows

Standardise to compute per group metrics

stocks %>% 
  filter(high > 0) %>% 
  mutate(close = high / sum(high)) %>% 
  select(open:low, volume, high, close)
## # A tibble: 2,268 × 5
##     open  high   low    volume    close
##    <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1  42.5  43.1  42.3 102223600 0.000140
##  2  43.1  43.6  43.0 118071600 0.000142
##  3  43.1  43.4  43.0  89738400 0.000141
##  4  43.4  43.8  43.3  94640000 0.000142
##  5  43.6  43.9  43.5  82271200 0.000143
##  6  43.6  43.8  43.4  86336000 0.000142
##  7  43.3  43.6  43.2  95839600 0.000142
##  8  43.6  43.9  43.6  74670800 0.000143
##  9  44.0  44.3  43.9 101672400 0.000144
## 10  44.5  44.8  44.0 118263600 0.000146
## # … with 2,258 more rows