Import stock prices

stocks <- tq_get(c("CRM", "PYPL", "ABNB"),
                 get = "stock.prices",
                 from = "2016-01-01")
stocks
## # A tibble: 4,111 × 8
##    symbol date        open  high   low close  volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>   <dbl>    <dbl>
##  1 CRM    2016-01-04  77.1  77.2  75.6  76.7 4919200     76.7
##  2 CRM    2016-01-05  77.1  78.0  76.7  77.1 2656800     77.1
##  3 CRM    2016-01-06  75.7  77.3  75.6  76.3 3484400     76.3
##  4 CRM    2016-01-07  75.1  75.6  73.5  74.3 6972200     74.3
##  5 CRM    2016-01-08  74.8  75.3  72.9  73.2 3673800     73.2
##  6 CRM    2016-01-11  73.7  73.8  71.9  73.2 3685100     73.2
##  7 CRM    2016-01-12  74.0  75.3  73.1  74.5 4108100     74.5
##  8 CRM    2016-01-13  75.0  75.7  71.8  72.3 4727500     72.3
##  9 CRM    2016-01-14  72.4  73.6  70.9  73.1 4738200     73.1
## 10 CRM    2016-01-15  69.9  71.1  68.2  70.9 9572100     70.9
## # … with 4,101 more rows

Plot stock prices

stocks %>%
    
    ggplot(aes(x = date, y = adjusted, color = symbol)) +
    geom_line()

Apply the dplyr verbs I learned in Chapter 5

Filter Rows

stocks %>% filter(close > 70)
## # A tibble: 3,606 × 8
##    symbol date        open  high   low close  volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>   <dbl>    <dbl>
##  1 CRM    2016-01-04  77.1  77.2  75.6  76.7 4919200     76.7
##  2 CRM    2016-01-05  77.1  78.0  76.7  77.1 2656800     77.1
##  3 CRM    2016-01-06  75.7  77.3  75.6  76.3 3484400     76.3
##  4 CRM    2016-01-07  75.1  75.6  73.5  74.3 6972200     74.3
##  5 CRM    2016-01-08  74.8  75.3  72.9  73.2 3673800     73.2
##  6 CRM    2016-01-11  73.7  73.8  71.9  73.2 3685100     73.2
##  7 CRM    2016-01-12  74.0  75.3  73.1  74.5 4108100     74.5
##  8 CRM    2016-01-13  75.0  75.7  71.8  72.3 4727500     72.3
##  9 CRM    2016-01-14  72.4  73.6  70.9  73.1 4738200     73.1
## 10 CRM    2016-01-15  69.9  71.1  68.2  70.9 9572100     70.9
## # … with 3,596 more rows
stocks %>% filter(close > 70 & low > 65)
## # A tibble: 3,606 × 8
##    symbol date        open  high   low close  volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>   <dbl>    <dbl>
##  1 CRM    2016-01-04  77.1  77.2  75.6  76.7 4919200     76.7
##  2 CRM    2016-01-05  77.1  78.0  76.7  77.1 2656800     77.1
##  3 CRM    2016-01-06  75.7  77.3  75.6  76.3 3484400     76.3
##  4 CRM    2016-01-07  75.1  75.6  73.5  74.3 6972200     74.3
##  5 CRM    2016-01-08  74.8  75.3  72.9  73.2 3673800     73.2
##  6 CRM    2016-01-11  73.7  73.8  71.9  73.2 3685100     73.2
##  7 CRM    2016-01-12  74.0  75.3  73.1  74.5 4108100     74.5
##  8 CRM    2016-01-13  75.0  75.7  71.8  72.3 4727500     72.3
##  9 CRM    2016-01-14  72.4  73.6  70.9  73.1 4738200     73.1
## 10 CRM    2016-01-15  69.9  71.1  68.2  70.9 9572100     70.9
## # … with 3,596 more rows

Arrange Rows

stocks %>% arrange(stocks, desc(close), desc(adjusted))
## # A tibble: 4,111 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 ABNB   2020-12-10  146   165   141.  145. 70447500     145.
##  2 ABNB   2020-12-11  147.  152.  135.  139. 26980800     139.
##  3 ABNB   2020-12-14  135   135.  125.  130  16966100     130 
##  4 ABNB   2020-12-15  127.  128.  122.  125. 10914400     125.
##  5 ABNB   2020-12-16  126.  142   125.  138. 20409600     138.
##  6 ABNB   2020-12-17  143   152.  143.  147. 15054700     147.
##  7 ABNB   2020-12-18  150.  159   150.  157. 15954200     157.
##  8 ABNB   2020-12-21  155.  172   145.  163. 17788100     163.
##  9 ABNB   2020-12-22  170   175.  161.  163.  9872600     163.
## 10 ABNB   2020-12-23  163.  168.  156.  158.  5852500     158.
## # … with 4,101 more rows

Select Comumns

stocks %>% select(symbol, date, high:close)
## # A tibble: 4,111 × 5
##    symbol date        high   low close
##    <chr>  <date>     <dbl> <dbl> <dbl>
##  1 CRM    2016-01-04  77.2  75.6  76.7
##  2 CRM    2016-01-05  78.0  76.7  77.1
##  3 CRM    2016-01-06  77.3  75.6  76.3
##  4 CRM    2016-01-07  75.6  73.5  74.3
##  5 CRM    2016-01-08  75.3  72.9  73.2
##  6 CRM    2016-01-11  73.8  71.9  73.2
##  7 CRM    2016-01-12  75.3  73.1  74.5
##  8 CRM    2016-01-13  75.7  71.8  72.3
##  9 CRM    2016-01-14  73.6  70.9  73.1
## 10 CRM    2016-01-15  71.1  68.2  70.9
## # … with 4,101 more rows
stocks %>% select(symbol, date, contains("a"))
## # A tibble: 4,111 × 3
##    symbol date       adjusted
##    <chr>  <date>        <dbl>
##  1 CRM    2016-01-04     76.7
##  2 CRM    2016-01-05     77.1
##  3 CRM    2016-01-06     76.3
##  4 CRM    2016-01-07     74.3
##  5 CRM    2016-01-08     73.2
##  6 CRM    2016-01-11     73.2
##  7 CRM    2016-01-12     74.5
##  8 CRM    2016-01-13     72.3
##  9 CRM    2016-01-14     73.1
## 10 CRM    2016-01-15     70.9
## # … with 4,101 more rows

Add Columns

mutate(stocks,
       High_Low_Difference = high - low) %>%
    
    # Selecting which columns to show
    select(symbol, date, High_Low_Difference)
## # A tibble: 4,111 × 3
##    symbol date       High_Low_Difference
##    <chr>  <date>                   <dbl>
##  1 CRM    2016-01-04                1.57
##  2 CRM    2016-01-05                1.30
##  3 CRM    2016-01-06                1.70
##  4 CRM    2016-01-07                2.13
##  5 CRM    2016-01-08                2.36
##  6 CRM    2016-01-11                1.97
##  7 CRM    2016-01-12                2.23
##  8 CRM    2016-01-13                3.91
##  9 CRM    2016-01-14                2.75
## 10 CRM    2016-01-15                2.87
## # … with 4,101 more rows
# Using the transmute()
transmute(stocks,
         HL_Difference_Transmute = high - low) %>%

    # Selecting just new HL after using the transmute function
    select(HL_Difference_Transmute)
## # A tibble: 4,111 × 1
##    HL_Difference_Transmute
##                      <dbl>
##  1                    1.57
##  2                    1.30
##  3                    1.70
##  4                    2.13
##  5                    2.36
##  6                    1.97
##  7                    2.23
##  8                    3.91
##  9                    2.75
## 10                    2.87
## # … with 4,101 more rows

Summarise with groups

Collapsing data to a single row

stocks
## # A tibble: 4,111 × 8
##    symbol date        open  high   low close  volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>   <dbl>    <dbl>
##  1 CRM    2016-01-04  77.1  77.2  75.6  76.7 4919200     76.7
##  2 CRM    2016-01-05  77.1  78.0  76.7  77.1 2656800     77.1
##  3 CRM    2016-01-06  75.7  77.3  75.6  76.3 3484400     76.3
##  4 CRM    2016-01-07  75.1  75.6  73.5  74.3 6972200     74.3
##  5 CRM    2016-01-08  74.8  75.3  72.9  73.2 3673800     73.2
##  6 CRM    2016-01-11  73.7  73.8  71.9  73.2 3685100     73.2
##  7 CRM    2016-01-12  74.0  75.3  73.1  74.5 4108100     74.5
##  8 CRM    2016-01-13  75.0  75.7  71.8  72.3 4727500     72.3
##  9 CRM    2016-01-14  72.4  73.6  70.9  73.1 4738200     73.1
## 10 CRM    2016-01-15  69.9  71.1  68.2  70.9 9572100     70.9
## # … with 4,101 more rows
# average closing price
summarise(stocks, close = mean(close, na.rm = TRUE))
## # A tibble: 1 × 1
##   close
##   <dbl>
## 1  135.

Summarize by group

stocks %>% 
    
    #Group by Symbol
    group_by(symbol) %>%
    
    # Calculate average adjusted price
    summarise(adjusted = mean(adjusted, na.rm = TRUE)) %>% 
    
    #Sort it
    arrange(adjusted)
## # A tibble: 3 × 2
##   symbol adjusted
##   <chr>     <dbl>
## 1 PYPL       113.
## 2 ABNB       145.
## 3 CRM        154.
stocks %>% 
    group_by(symbol) %>%
    summarise(count = n(),
              adjusted = mean(adjusted, na.rm = TRUE),
              close = mean(close, na.rm = TRUE)) %>%
    # Plot
    ggplot(mapping = aes(x = symbol, y = adjusted)) + 
    geom_point(aes(size = count), alpha = 0.3) +
    geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Missing values

stocks %>%
    
    #remove missing values
    filter(!is.na(adjusted))
## # A tibble: 4,111 × 8
##    symbol date        open  high   low close  volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>   <dbl>    <dbl>
##  1 CRM    2016-01-04  77.1  77.2  75.6  76.7 4919200     76.7
##  2 CRM    2016-01-05  77.1  78.0  76.7  77.1 2656800     77.1
##  3 CRM    2016-01-06  75.7  77.3  75.6  76.3 3484400     76.3
##  4 CRM    2016-01-07  75.1  75.6  73.5  74.3 6972200     74.3
##  5 CRM    2016-01-08  74.8  75.3  72.9  73.2 3673800     73.2
##  6 CRM    2016-01-11  73.7  73.8  71.9  73.2 3685100     73.2
##  7 CRM    2016-01-12  74.0  75.3  73.1  74.5 4108100     74.5
##  8 CRM    2016-01-13  75.0  75.7  71.8  72.3 4727500     72.3
##  9 CRM    2016-01-14  72.4  73.6  70.9  73.1 4738200     73.1
## 10 CRM    2016-01-15  69.9  71.1  68.2  70.9 9572100     70.9
## # … with 4,101 more rows

Grouping multiple variables

stocks %>%
    group_by(symbol, open, adjusted) %>%
    summarise(count = n()) %>%
    ungroup()
## `summarise()` has grouped output by 'symbol', 'open'. You can override using
## the `.groups` argument.
## # A tibble: 4,111 × 4
##    symbol  open adjusted count
##    <chr>  <dbl>    <dbl> <int>
##  1 ABNB    83.0     85.2     1
##  2 ABNB    83.2     82.5     1
##  3 ABNB    83.6     85.5     1
##  4 ABNB    84.7     85.2     1
##  5 ABNB    84.8     83.5     1
##  6 ABNB    85.5     84.9     1
##  7 ABNB    85.9     88.7     1
##  8 ABNB    86.7     87.6     1
##  9 ABNB    87.4     84.9     1
## 10 ABNB    87.5     87.1     1
## # … with 4,101 more rows