Import stock prices

stocks <- tq_get(c("AAL", "OXY"),
                 get = "stock.prices",
                 from = "2016-01-01",
                 to = "2017-01-01")
stocks
## # A tibble: 504 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 AAL    2016-01-04  41.3  41.3  40.3  40.9 12037200     39.1
##  2 AAL    2016-01-05  41.2  41.5  40.0  40.5 10514200     38.7
##  3 AAL    2016-01-06  40.0  41.7  40    41.2 12393800     39.4
##  4 AAL    2016-01-07  41.0  41.5  39.7  40.5 11312900     38.7
##  5 AAL    2016-01-08  40.9  41.4  40.3  40.4  8853100     38.6
##  6 AAL    2016-01-11  40.6  41.2  39.9  41.1 15877500     39.3
##  7 AAL    2016-01-12  41.2  42.4  40.5  42   12636000     40.1
##  8 AAL    2016-01-13  42.2  42.7  39.7  40.1 12718600     38.3
##  9 AAL    2016-01-14  40.2  40.8  38.9  40.5 11756500     38.8
## 10 AAL    2016-01-15  39.0  39.9  38.4  38.8 18924800     37.0
## # … with 494 more rows

Plot stock prices

stocks %>%
    
    ggplot(aes(x = date, y = adjusted, color = symbol)) +
    geom_line()

Apply the dplyr verse you learned in Ch 5

Filter Rows

stocks %>% filter(adjusted > 24)
## # A tibble: 504 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 AAL    2016-01-04  41.3  41.3  40.3  40.9 12037200     39.1
##  2 AAL    2016-01-05  41.2  41.5  40.0  40.5 10514200     38.7
##  3 AAL    2016-01-06  40.0  41.7  40    41.2 12393800     39.4
##  4 AAL    2016-01-07  41.0  41.5  39.7  40.5 11312900     38.7
##  5 AAL    2016-01-08  40.9  41.4  40.3  40.4  8853100     38.6
##  6 AAL    2016-01-11  40.6  41.2  39.9  41.1 15877500     39.3
##  7 AAL    2016-01-12  41.2  42.4  40.5  42   12636000     40.1
##  8 AAL    2016-01-13  42.2  42.7  39.7  40.1 12718600     38.3
##  9 AAL    2016-01-14  40.2  40.8  38.9  40.5 11756500     38.8
## 10 AAL    2016-01-15  39.0  39.9  38.4  38.8 18924800     37.0
## # … with 494 more rows

Arrange Rows

arrange(stocks, desc(low), desc (high))
## # A tibble: 504 × 8
##    symbol date        open  high   low close  volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>   <dbl>    <dbl>
##  1 OXY    2016-08-30  78.5  78.5  77.6  77.8 2309600     62.5
##  2 OXY    2016-09-06  77.6  78.2  77.4  78   3978800     62.7
##  3 OXY    2016-06-23  77.8  78.3  77.3  78.3 4104000     62.9
##  4 OXY    2016-08-25  77.7  77.7  77    77.4 3228100     62.2
##  5 OXY    2016-09-07  77.4  77.8  76.9  77.1 3881700     62.6
##  6 OXY    2016-07-14  77.6  77.7  76.9  77.2 4158200     62.0
##  7 OXY    2016-07-15  77.3  77.6  76.9  77.3 3886400     62.1
##  8 OXY    2016-08-26  77.5  77.9  76.8  77.2 3517400     62.0
##  9 OXY    2016-09-08  77.6  77.9  76.8  77.8 5019200     63.1
## 10 OXY    2016-06-22  77.3  77.7  76.8  77.1 3564000     62.0
## # … with 494 more rows

Select Columns

select(stocks, date: open)
## # A tibble: 504 × 2
##    date        open
##    <date>     <dbl>
##  1 2016-01-04  41.3
##  2 2016-01-05  41.2
##  3 2016-01-06  40.0
##  4 2016-01-07  41.0
##  5 2016-01-08  40.9
##  6 2016-01-11  40.6
##  7 2016-01-12  41.2
##  8 2016-01-13  42.2
##  9 2016-01-14  40.2
## 10 2016-01-15  39.0
## # … with 494 more rows
select(stocks, date , open , volume , adjusted)
## # A tibble: 504 × 4
##    date        open   volume adjusted
##    <date>     <dbl>    <dbl>    <dbl>
##  1 2016-01-04  41.3 12037200     39.1
##  2 2016-01-05  41.2 10514200     38.7
##  3 2016-01-06  40.0 12393800     39.4
##  4 2016-01-07  41.0 11312900     38.7
##  5 2016-01-08  40.9  8853100     38.6
##  6 2016-01-11  40.6 15877500     39.3
##  7 2016-01-12  41.2 12636000     40.1
##  8 2016-01-13  42.2 12718600     38.3
##  9 2016-01-14  40.2 11756500     38.8
## 10 2016-01-15  39.0 18924800     37.0
## # … with 494 more rows

Add Columns

mutate(stocks, 
       change = high - low)
## # A tibble: 504 × 9
##    symbol date        open  high   low close   volume adjusted change
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>  <dbl>
##  1 AAL    2016-01-04  41.3  41.3  40.3  40.9 12037200     39.1   1.05
##  2 AAL    2016-01-05  41.2  41.5  40.0  40.5 10514200     38.7   1.54
##  3 AAL    2016-01-06  40.0  41.7  40    41.2 12393800     39.4   1.70
##  4 AAL    2016-01-07  41.0  41.5  39.7  40.5 11312900     38.7   1.84
##  5 AAL    2016-01-08  40.9  41.4  40.3  40.4  8853100     38.6   1.10
##  6 AAL    2016-01-11  40.6  41.2  39.9  41.1 15877500     39.3   1.30
##  7 AAL    2016-01-12  41.2  42.4  40.5  42   12636000     40.1   1.88
##  8 AAL    2016-01-13  42.2  42.7  39.7  40.1 12718600     38.3   3.08
##  9 AAL    2016-01-14  40.2  40.8  38.9  40.5 11756500     38.8   1.92
## 10 AAL    2016-01-15  39.0  39.9  38.4  38.8 18924800     37.0   1.52
## # … with 494 more rows

Summarize with Groups

Collapsing data to a single row

stocks
## # A tibble: 504 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 AAL    2016-01-04  41.3  41.3  40.3  40.9 12037200     39.1
##  2 AAL    2016-01-05  41.2  41.5  40.0  40.5 10514200     38.7
##  3 AAL    2016-01-06  40.0  41.7  40    41.2 12393800     39.4
##  4 AAL    2016-01-07  41.0  41.5  39.7  40.5 11312900     38.7
##  5 AAL    2016-01-08  40.9  41.4  40.3  40.4  8853100     38.6
##  6 AAL    2016-01-11  40.6  41.2  39.9  41.1 15877500     39.3
##  7 AAL    2016-01-12  41.2  42.4  40.5  42   12636000     40.1
##  8 AAL    2016-01-13  42.2  42.7  39.7  40.1 12718600     38.3
##  9 AAL    2016-01-14  40.2  40.8  38.9  40.5 11756500     38.8
## 10 AAL    2016-01-15  39.0  39.9  38.4  38.8 18924800     37.0
## # … with 494 more rows
# average departure delay
summarise(stocks , avg_high =  mean(high, na.rm = TRUE))
## # A tibble: 1 × 1
##   avg_high
##      <dbl>
## 1     55.7

Summarize by Group

stocks %>%
    
    # Group by symbol
    group_by(symbol) %>%
    
    # Calculate average average high
    summarise(avg_high =  mean(high, na.rm = TRUE)) %>%
    
    # Sort it
    arrange(avg_high)
## # A tibble: 2 × 2
##   symbol avg_high
##   <chr>     <dbl>
## 1 AAL        38.7
## 2 OXY        72.7

Delays increase with distance up to ~750 miles and then decrease’

stocks %>%
    group_by(close) %>%
    summarise(count = n() , 
              close = mean(close, na.rm = TRUE),
              open = mean(open, na.rm = TRUE)) %>%
    # Plot
    ggplot(mapping = aes(x = close, y = open)) + 
    geom_point(aes(size = count), alpha = 0.3) + 
    geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Missing Value

stocks %>%
    
    #Remove missing values
    filter(!is.na(volume))
## # A tibble: 504 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 AAL    2016-01-04  41.3  41.3  40.3  40.9 12037200     39.1
##  2 AAL    2016-01-05  41.2  41.5  40.0  40.5 10514200     38.7
##  3 AAL    2016-01-06  40.0  41.7  40    41.2 12393800     39.4
##  4 AAL    2016-01-07  41.0  41.5  39.7  40.5 11312900     38.7
##  5 AAL    2016-01-08  40.9  41.4  40.3  40.4  8853100     38.6
##  6 AAL    2016-01-11  40.6  41.2  39.9  41.1 15877500     39.3
##  7 AAL    2016-01-12  41.2  42.4  40.5  42   12636000     40.1
##  8 AAL    2016-01-13  42.2  42.7  39.7  40.1 12718600     38.3
##  9 AAL    2016-01-14  40.2  40.8  38.9  40.5 11756500     38.8
## 10 AAL    2016-01-15  39.0  39.9  38.4  38.8 18924800     37.0
## # … with 494 more rows