Import stock prices

stocks <- tq_get(c("AAPL", "MSFT", "DPZ"),
                 get = "stock.prices",
                 from = "2016-01-01",
                 to = "2019-01-01")
stocks
## # A tibble: 2,262 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 AAPL   2016-01-04  25.7  26.3  25.5  26.3 270597600     23.8
##  2 AAPL   2016-01-05  26.4  26.5  25.6  25.7 223164000     23.2
##  3 AAPL   2016-01-06  25.1  25.6  25.0  25.2 273829600     22.8
##  4 AAPL   2016-01-07  24.7  25.0  24.1  24.1 324377600     21.8
##  5 AAPL   2016-01-08  24.6  24.8  24.2  24.2 283192000     21.9
##  6 AAPL   2016-01-11  24.7  24.8  24.3  24.6 198957600     22.3
##  7 AAPL   2016-01-12  25.1  25.2  24.7  25.0 196616800     22.6
##  8 AAPL   2016-01-13  25.1  25.3  24.3  24.3 249758400     22.0
##  9 AAPL   2016-01-14  24.5  25.1  23.9  24.9 252680400     22.5
## 10 AAPL   2016-01-15  24.0  24.4  23.8  24.3 319335600     21.9
## # ℹ 2,252 more rows

Plot stock prices

stocks %>%
    
    ggplot(aes(x = date, y = adjusted, color = symbol)) +
    geom_line()

Apply the dplyr verbs you learned in chapter 5

Filter rows

stocks %>% filter(adjusted > 24)
## # A tibble: 2,141 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 AAPL   2016-03-16  26.2  26.6  26.1  26.5 153214000     24.1
##  2 AAPL   2016-03-17  26.4  26.6  26.2  26.5 137682800     24.0
##  3 AAPL   2016-03-18  26.6  26.6  26.3  26.5 176820800     24.1
##  4 AAPL   2016-03-21  26.5  26.9  26.3  26.5 142010800     24.1
##  5 AAPL   2016-03-22  26.3  26.8  26.3  26.7 129777600     24.2
##  6 AAPL   2016-03-23  26.6  26.8  26.5  26.5 102814000     24.1
##  7 AAPL   2016-03-24  26.4  26.6  26.2  26.4 104532000     24.0
##  8 AAPL   2016-03-29  26.2  26.9  26.2  26.9 124760400     24.5
##  9 AAPL   2016-03-30  27.2  27.6  27.1  27.4 182404400     24.9
## 10 AAPL   2016-03-31  27.4  27.5  27.2  27.2 103553600     24.8
## # ℹ 2,131 more rows

Arrange rows

arrange(stocks, desc(date), desc(symbol), desc(open), desc(close))
## # A tibble: 2,262 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 MSFT   2018-12-31 101.  102.  100.  102.   33173800     95.4
##  2 DPZ    2018-12-31 252.  254.  246.  248.     443200    231. 
##  3 AAPL   2018-12-31  39.6  39.8  39.1  39.4 140014000     37.6
##  4 MSFT   2018-12-28 102.  102.   99.5 100.   38196300     94.3
##  5 DPZ    2018-12-28 250.  253.  247.  250.     404200    233. 
##  6 AAPL   2018-12-28  39.4  39.6  38.6  39.1 169165600     37.2
##  7 MSFT   2018-12-27  99.3 101.   96.4 101.   49498500     95.0
##  8 DPZ    2018-12-27 243.  249.  241.  248.     413000    232. 
##  9 AAPL   2018-12-27  39.0  39.2  37.5  39.0 212468400     37.2
## 10 MSFT   2018-12-26  95.1 101.   94.0 101.   51634800     94.4
## # ℹ 2,252 more rows

Select columns

select(stocks, symbol, date, high, close)
## # A tibble: 2,262 × 4
##    symbol date        high close
##    <chr>  <date>     <dbl> <dbl>
##  1 AAPL   2016-01-04  26.3  26.3
##  2 AAPL   2016-01-05  26.5  25.7
##  3 AAPL   2016-01-06  25.6  25.2
##  4 AAPL   2016-01-07  25.0  24.1
##  5 AAPL   2016-01-08  24.8  24.2
##  6 AAPL   2016-01-11  24.8  24.6
##  7 AAPL   2016-01-12  25.2  25.0
##  8 AAPL   2016-01-13  25.3  24.3
##  9 AAPL   2016-01-14  25.1  24.9
## 10 AAPL   2016-01-15  24.4  24.3
## # ℹ 2,252 more rows

Add columns

mutate(stocks,
       gain = close - open) %>%
    
    # Select symbol, date, and gain
    select(symbol:date, gain)
## # A tibble: 2,262 × 3
##    symbol date          gain
##    <chr>  <date>       <dbl>
##  1 AAPL   2016-01-04  0.685 
##  2 AAPL   2016-01-05 -0.760 
##  3 AAPL   2016-01-06  0.0350
##  4 AAPL   2016-01-07 -0.558 
##  5 AAPL   2016-01-08 -0.398 
##  6 AAPL   2016-01-11 -0.110 
##  7 AAPL   2016-01-12 -0.148 
##  8 AAPL   2016-01-13 -0.733 
##  9 AAPL   2016-01-14  0.390 
## 10 AAPL   2016-01-15  0.233 
## # ℹ 2,252 more rows
# Just keep gain
mutate(stocks,
       gain = close - open) %>%
    
    # Select symbol, date, and gain
    select(gain)
## # A tibble: 2,262 × 1
##       gain
##      <dbl>
##  1  0.685 
##  2 -0.760 
##  3  0.0350
##  4 -0.558 
##  5 -0.398 
##  6 -0.110 
##  7 -0.148 
##  8 -0.733 
##  9  0.390 
## 10  0.233 
## # ℹ 2,252 more rows
# alternative using transmute()
transmute(stocks,
          gain = close - open)
## # A tibble: 2,262 × 1
##       gain
##      <dbl>
##  1  0.685 
##  2 -0.760 
##  3  0.0350
##  4 -0.558 
##  5 -0.398 
##  6 -0.110 
##  7 -0.148 
##  8 -0.733 
##  9  0.390 
## 10  0.233 
## # ℹ 2,252 more rows
# lag ()
select(stocks, open) %>%
    
    mutate(dept_time_lag1 = lag(open))
## # A tibble: 2,262 × 2
##     open dept_time_lag1
##    <dbl>          <dbl>
##  1  25.7           NA  
##  2  26.4           25.7
##  3  25.1           26.4
##  4  24.7           25.1
##  5  24.6           24.7
##  6  24.7           24.6
##  7  25.1           24.7
##  8  25.1           25.1
##  9  24.5           25.1
## 10  24.0           24.5
## # ℹ 2,252 more rows
# cumsum()
select(stocks, high) %>%
    
    mutate(high_cumsum = cumsum(high))
## # A tibble: 2,262 × 2
##     high high_cumsum
##    <dbl>       <dbl>
##  1  26.3        26.3
##  2  26.5        52.8
##  3  25.6        78.4
##  4  25.0       103. 
##  5  24.8       128. 
##  6  24.8       153. 
##  7  25.2       178. 
##  8  25.3       203. 
##  9  25.1       229. 
## 10  24.4       253. 
## # ℹ 2,252 more rows

Summarise with groups

stocks %>%
    
    # Group by company
    group_by(symbol) %>%
    
    # Calculate average high
    summarise(high = mean(high, na.rm = TRUE)) %>%
    
    # Sort it
    arrange(high)
## # A tibble: 3 × 2
##   symbol  high
##   <chr>  <dbl>
## 1 AAPL    37.3
## 2 MSFT    76.7
## 3 DPZ    196.