Import stock prices

stocks <- tq_get(c("AMZN", "TSLA", "TM"),
                 get = "stock.prices",
                 from = "2016-01-01")
stocks
## # A tibble: 5,910 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 AMZN   2016-01-04  32.8  32.9  31.4  31.8 186290000     31.8
##  2 AMZN   2016-01-05  32.3  32.3  31.4  31.7 116452000     31.7
##  3 AMZN   2016-01-06  31.1  32.0  31.0  31.6 106584000     31.6
##  4 AMZN   2016-01-07  31.1  31.5  30.3  30.4 141498000     30.4
##  5 AMZN   2016-01-08  31.0  31.2  30.3  30.4 110258000     30.4
##  6 AMZN   2016-01-11  30.6  31.0  29.9  30.9  97832000     30.9
##  7 AMZN   2016-01-12  31.3  31.3  30.6  30.9  94482000     30.9
##  8 AMZN   2016-01-13  31.0  31.0  29.0  29.1 153104000     29.1
##  9 AMZN   2016-01-14  29.0  30.1  28.5  29.6 144760000     29.6
## 10 AMZN   2016-01-15  28.6  29.2  28.3  28.5 155690000     28.5
## # ℹ 5,900 more rows

Plot stock prices

stocks %>%
    
    ggplot(aes(x = date, y = adjusted, color = symbol)) +
    geom_line()

Filter rows

stocks %>% filter(close > 70) 
## # A tibble: 4,251 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 AMZN   2018-01-26  69.6  70.1  69.0  70.1  97146000     70.1
##  2 AMZN   2018-01-29  70.5  71.6  70.0  70.9 114038000     70.9
##  3 AMZN   2018-01-30  70.2  72.0  69.6  71.9 117438000     71.9
##  4 AMZN   2018-01-31  72.6  73.6  72.5  72.5 128494000     72.5
##  5 AMZN   2018-02-02  73.9  74.9  70.7  71.5 222514000     71.5
##  6 AMZN   2018-02-06  68.1  72.2  67.6  72.1 221336000     72.1
##  7 AMZN   2018-02-07  72.4  73.0  70.8  70.8 143254000     70.8
##  8 AMZN   2018-02-13  69.3  71.0  69.2  70.7 118358000     70.7
##  9 AMZN   2018-02-14  70.3  72.6  70.2  72.6 119184000     72.6
## 10 AMZN   2018-02-15  73.3  73.4  71.8  73.1 113054000     73.1
## # ℹ 4,241 more rows
stocks %>% filter(close > 70 & low > 65)
## # A tibble: 4,251 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 AMZN   2018-01-26  69.6  70.1  69.0  70.1  97146000     70.1
##  2 AMZN   2018-01-29  70.5  71.6  70.0  70.9 114038000     70.9
##  3 AMZN   2018-01-30  70.2  72.0  69.6  71.9 117438000     71.9
##  4 AMZN   2018-01-31  72.6  73.6  72.5  72.5 128494000     72.5
##  5 AMZN   2018-02-02  73.9  74.9  70.7  71.5 222514000     71.5
##  6 AMZN   2018-02-06  68.1  72.2  67.6  72.1 221336000     72.1
##  7 AMZN   2018-02-07  72.4  73.0  70.8  70.8 143254000     70.8
##  8 AMZN   2018-02-13  69.3  71.0  69.2  70.7 118358000     70.7
##  9 AMZN   2018-02-14  70.3  72.6  70.2  72.6 119184000     72.6
## 10 AMZN   2018-02-15  73.3  73.4  71.8  73.1 113054000     73.1
## # ℹ 4,241 more rows

Arrange rows

stocks %>% arrange(stocks, desc(close), desc(adjusted))
## # A tibble: 5,910 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 AMZN   2016-01-04  32.8  32.9  31.4  31.8 186290000     31.8
##  2 AMZN   2016-01-05  32.3  32.3  31.4  31.7 116452000     31.7
##  3 AMZN   2016-01-06  31.1  32.0  31.0  31.6 106584000     31.6
##  4 AMZN   2016-01-07  31.1  31.5  30.3  30.4 141498000     30.4
##  5 AMZN   2016-01-08  31.0  31.2  30.3  30.4 110258000     30.4
##  6 AMZN   2016-01-11  30.6  31.0  29.9  30.9  97832000     30.9
##  7 AMZN   2016-01-12  31.3  31.3  30.6  30.9  94482000     30.9
##  8 AMZN   2016-01-13  31.0  31.0  29.0  29.1 153104000     29.1
##  9 AMZN   2016-01-14  29.0  30.1  28.5  29.6 144760000     29.6
## 10 AMZN   2016-01-15  28.6  29.2  28.3  28.5 155690000     28.5
## # ℹ 5,900 more rows

#select columns

stocks %>% select(symbol, date, high:close)
## # A tibble: 5,910 × 5
##    symbol date        high   low close
##    <chr>  <date>     <dbl> <dbl> <dbl>
##  1 AMZN   2016-01-04  32.9  31.4  31.8
##  2 AMZN   2016-01-05  32.3  31.4  31.7
##  3 AMZN   2016-01-06  32.0  31.0  31.6
##  4 AMZN   2016-01-07  31.5  30.3  30.4
##  5 AMZN   2016-01-08  31.2  30.3  30.4
##  6 AMZN   2016-01-11  31.0  29.9  30.9
##  7 AMZN   2016-01-12  31.3  30.6  30.9
##  8 AMZN   2016-01-13  31.0  29.0  29.1
##  9 AMZN   2016-01-14  30.1  28.5  29.6
## 10 AMZN   2016-01-15  29.2  28.3  28.5
## # ℹ 5,900 more rows
stocks %>% select(symbol, date, contains("a"))
## # A tibble: 5,910 × 3
##    symbol date       adjusted
##    <chr>  <date>        <dbl>
##  1 AMZN   2016-01-04     31.8
##  2 AMZN   2016-01-05     31.7
##  3 AMZN   2016-01-06     31.6
##  4 AMZN   2016-01-07     30.4
##  5 AMZN   2016-01-08     30.4
##  6 AMZN   2016-01-11     30.9
##  7 AMZN   2016-01-12     30.9
##  8 AMZN   2016-01-13     29.1
##  9 AMZN   2016-01-14     29.6
## 10 AMZN   2016-01-15     28.5
## # ℹ 5,900 more rows

Add columns

mutate(stocks,
       High_Low_Difference = high - low) %>%
    
    # Selecting which columns to show
    select(symbol, date, High_Low_Difference)
## # A tibble: 5,910 × 3
##    symbol date       High_Low_Difference
##    <chr>  <date>                   <dbl>
##  1 AMZN   2016-01-04               1.51 
##  2 AMZN   2016-01-05               0.963
##  3 AMZN   2016-01-06               0.974
##  4 AMZN   2016-01-07               1.24 
##  5 AMZN   2016-01-08               0.907
##  6 AMZN   2016-01-11               1.06 
##  7 AMZN   2016-01-12               0.688
##  8 AMZN   2016-01-13               2.09 
##  9 AMZN   2016-01-14               1.62 
## 10 AMZN   2016-01-15               0.966
## # ℹ 5,900 more rows
# Using the transmute()
transmute(stocks,
         HL_Difference_Transmute = high - low) %>%

    # Selecting just new HL after using the transmute function
    select(HL_Difference_Transmute)
## # A tibble: 5,910 × 1
##    HL_Difference_Transmute
##                      <dbl>
##  1                   1.51 
##  2                   0.963
##  3                   0.974
##  4                   1.24 
##  5                   0.907
##  6                   1.06 
##  7                   0.688
##  8                   2.09 
##  9                   1.62 
## 10                   0.966
## # ℹ 5,900 more rows

summarize by groups

stocks
## # A tibble: 5,910 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 AMZN   2016-01-04  32.8  32.9  31.4  31.8 186290000     31.8
##  2 AMZN   2016-01-05  32.3  32.3  31.4  31.7 116452000     31.7
##  3 AMZN   2016-01-06  31.1  32.0  31.0  31.6 106584000     31.6
##  4 AMZN   2016-01-07  31.1  31.5  30.3  30.4 141498000     30.4
##  5 AMZN   2016-01-08  31.0  31.2  30.3  30.4 110258000     30.4
##  6 AMZN   2016-01-11  30.6  31.0  29.9  30.9  97832000     30.9
##  7 AMZN   2016-01-12  31.3  31.3  30.6  30.9  94482000     30.9
##  8 AMZN   2016-01-13  31.0  31.0  29.0  29.1 153104000     29.1
##  9 AMZN   2016-01-14  29.0  30.1  28.5  29.6 144760000     29.6
## 10 AMZN   2016-01-15  28.6  29.2  28.3  28.5 155690000     28.5
## # ℹ 5,900 more rows
# average closing price
summarise(stocks, close = mean(close, na.rm = TRUE))
## # A tibble: 1 × 1
##   close
##   <dbl>
## 1  116.

summarize by group

stocks %>% 
    
    #Group by Symbol
    group_by(symbol) %>%
    
    # Calculate average adjusted price
    summarise(adjusted = mean(adjusted, na.rm = TRUE)) %>% 
    
    #Sort it
    arrange(adjusted)
## # A tibble: 3 × 2
##   symbol adjusted
##   <chr>     <dbl>
## 1 AMZN       99.5
## 2 TSLA      111. 
## 3 TM        137.
stocks %>% 
    group_by(symbol) %>%
    summarise(count = n(),
              adjusted = mean(adjusted, na.rm = TRUE),
              close = mean(close, na.rm = TRUE)) %>%
    # Plot
    ggplot(mapping = aes(x = symbol, y = adjusted)) + 
    geom_point(aes(size = count), alpha = 0.3) +
    geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Missing Value

stocks %>%
    
    #remove missing values
    filter(!is.na(adjusted))
## # A tibble: 5,910 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 AMZN   2016-01-04  32.8  32.9  31.4  31.8 186290000     31.8
##  2 AMZN   2016-01-05  32.3  32.3  31.4  31.7 116452000     31.7
##  3 AMZN   2016-01-06  31.1  32.0  31.0  31.6 106584000     31.6
##  4 AMZN   2016-01-07  31.1  31.5  30.3  30.4 141498000     30.4
##  5 AMZN   2016-01-08  31.0  31.2  30.3  30.4 110258000     30.4
##  6 AMZN   2016-01-11  30.6  31.0  29.9  30.9  97832000     30.9
##  7 AMZN   2016-01-12  31.3  31.3  30.6  30.9  94482000     30.9
##  8 AMZN   2016-01-13  31.0  31.0  29.0  29.1 153104000     29.1
##  9 AMZN   2016-01-14  29.0  30.1  28.5  29.6 144760000     29.6
## 10 AMZN   2016-01-15  28.6  29.2  28.3  28.5 155690000     28.5
## # ℹ 5,900 more rows

Grouping Multiple Variable

stocks %>%
    group_by(symbol, open, adjusted) %>%
    summarise(count = n()) %>%
    ungroup()
## `summarise()` has grouped output by 'symbol', 'open'. You can override using
## the `.groups` argument.
## # A tibble: 5,907 × 4
##    symbol  open adjusted count
##    <chr>  <dbl>    <dbl> <int>
##  1 AMZN    23.9     24.1     1
##  2 AMZN    24.3     24.4     1
##  3 AMZN    24.6     25.2     1
##  4 AMZN    24.6     24.5     1
##  5 AMZN    25.5     25.4     1
##  6 AMZN    26.0     26.1     1
##  7 AMZN    26.0     26.7     1
##  8 AMZN    26.2     26.8     1
##  9 AMZN    26.4     26.7     1
## 10 AMZN    26.5     25.1     1
## # ℹ 5,897 more rows