Import stock prices

stocks <- tq_get(c("APPL", "NKE"),
                 get = "stock.prices",
                 from = "2016-01-01",
                 to = "2017-01-01")
## Warning: There was 1 warning in `dplyr::mutate()`.
## ℹ In argument: `data.. = purrr::map(...)`.
## Caused by warning:
## ! x = 'APPL', get = 'stock.prices': Error in getSymbols.yahoo(Symbols = "APPL", env = <environment>, verbose = FALSE, : Unable to import "APPL".
## attempt to set an attribute on NULL
##  Removing APPL.
stocks
## # A tibble: 252 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 NKE    2016-01-04  61.1  61.9  60.9  61.5 11626800     56.6
##  2 NKE    2016-01-05  61.7  62.6  61.7  62.4  9220600     57.4
##  3 NKE    2016-01-06  61.3  62.0  61.2  61.5  6551600     56.5
##  4 NKE    2016-01-07  60.4  61.3  59.8  59.8 10881300     55.0
##  5 NKE    2016-01-08  60.1  60.8  58.7  58.9 11191300     54.1
##  6 NKE    2016-01-11  59.0  60.0  58.5  59.5 12825000     54.7
##  7 NKE    2016-01-12  60.3  60.8  59.6  59.9  8292200     55.1
##  8 NKE    2016-01-13  60.5  60.5  58.7  58.8  9944300     54.0
##  9 NKE    2016-01-14  59    59.3  57.3  58.5  9989000     53.8
## 10 NKE    2016-01-15  56.9  58.1  56.6  57.6 12208300     52.9
## # ℹ 242 more rows

Plot stock prices

stocks %>%
    
    ggplot(aes(x = date, y = adjusted, color = symbol)) +
    geom_line()

Apply the dplyr verbs you learned in chapter 5

Filter rows

filter(stocks, high ==1, low ==1)
## # A tibble: 0 × 8
## # ℹ 8 variables: symbol <chr>, date <date>, open <dbl>, high <dbl>, low <dbl>,
## #   close <dbl>, volume <dbl>, adjusted <dbl>
filter(stocks, high ==1 & low ==1)
## # A tibble: 0 × 8
## # ℹ 8 variables: symbol <chr>, date <date>, open <dbl>, high <dbl>, low <dbl>,
## #   close <dbl>, volume <dbl>, adjusted <dbl>
filter(stocks, high ==1 | low ==1)
## # A tibble: 0 × 8
## # ℹ 8 variables: symbol <chr>, date <date>, open <dbl>, high <dbl>, low <dbl>,
## #   close <dbl>, volume <dbl>, adjusted <dbl>

Arrange rows

arrange(stocks, desc(high), desc(date()))
## # A tibble: 252 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 NKE    2016-03-22  64.8  65.4  64.2  64.9 18212900     59.8
##  2 NKE    2016-03-21  63.5  65.3  63.4  64.7 13635400     59.7
##  3 NKE    2016-03-18  63.5  64.0  62.8  63.0 13192900     58.1
##  4 NKE    2016-02-02  63.0  63.5  62.3  62.5  9458500     57.5
##  5 NKE    2016-02-26  62.6  63.5  62.5  62.6  8896100     57.6
##  6 NKE    2016-02-01  61.8  63.5  61.7  63.2  8247800     58.1
##  7 NKE    2016-03-23  61.3  63.5  61.2  62.4 26987000     57.6
##  8 NKE    2016-03-17  61.8  63.4  61.7  63.2  9926800     58.2
##  9 NKE    2016-03-01  62.0  63.1  61.8  62.9  7438300     57.8
## 10 NKE    2016-02-03  62.9  62.9  61.3  62.5  7213500     57.5
## # ℹ 242 more rows

Select columns

select(stocks, high)
## # A tibble: 252 × 1
##     high
##    <dbl>
##  1  61.9
##  2  62.6
##  3  62.0
##  4  61.3
##  5  60.8
##  6  60.0
##  7  60.8
##  8  60.5
##  9  59.3
## 10  58.1
## # ℹ 242 more rows
select(stocks, high, low, date)
## # A tibble: 252 × 3
##     high   low date      
##    <dbl> <dbl> <date>    
##  1  61.9  60.9 2016-01-04
##  2  62.6  61.7 2016-01-05
##  3  62.0  61.2 2016-01-06
##  4  61.3  59.8 2016-01-07
##  5  60.8  58.7 2016-01-08
##  6  60.0  58.5 2016-01-11
##  7  60.8  59.6 2016-01-12
##  8  60.5  58.7 2016-01-13
##  9  59.3  57.3 2016-01-14
## 10  58.1  56.6 2016-01-15
## # ℹ 242 more rows
select(stocks, high, low, date)
## # A tibble: 252 × 3
##     high   low date      
##    <dbl> <dbl> <date>    
##  1  61.9  60.9 2016-01-04
##  2  62.6  61.7 2016-01-05
##  3  62.0  61.2 2016-01-06
##  4  61.3  59.8 2016-01-07
##  5  60.8  58.7 2016-01-08
##  6  60.0  58.5 2016-01-11
##  7  60.8  59.6 2016-01-12
##  8  60.5  58.7 2016-01-13
##  9  59.3  57.3 2016-01-14
## 10  58.1  56.6 2016-01-15
## # ℹ 242 more rows

Add columns

mutate(stocks,
       high = date - volume) %>%
    
    #select year, month, day, and gain
    select(symbol:date, high)
## # A tibble: 252 × 3
##    symbol date       high        
##    <chr>  <date>     <date>      
##  1 NKE    2016-01-04 -29818-11-29
##  2 NKE    2016-01-05 -23230-11-13
##  3 NKE    2016-01-06 -15922-05-05
##  4 NKE    2016-01-07 -27776-01-12
##  5 NKE    2016-01-08 -28625-04-13
##  6 NKE    2016-01-11 -33098-05-16
##  7 NKE    2016-01-12 -20688-10-04
##  8 NKE    2016-01-13 -25211-06-19
##  9 NKE    2016-01-14 -25333-01-31
## 10 NKE    2016-01-15 -31410-11-06
## # ℹ 242 more rows
#just keep gain
mutate(stocks,
       high = date - low) %>%
    
    #select year, month, day, and gain
    select(low)
## # A tibble: 252 × 1
##      low
##    <dbl>
##  1  60.9
##  2  61.7
##  3  61.2
##  4  59.8
##  5  58.7
##  6  58.5
##  7  59.6
##  8  58.7
##  9  57.3
## 10  56.6
## # ℹ 242 more rows
#alternative using transmute()
transmute(stocks,
          high = date - low)
## # A tibble: 252 × 1
##    high      
##    <date>    
##  1 2015-11-04
##  2 2015-11-04
##  3 2015-11-05
##  4 2015-11-08
##  5 2015-11-10
##  6 2015-11-13
##  7 2015-11-13
##  8 2015-11-15
##  9 2015-11-17
## 10 2015-11-19
## # ℹ 242 more rows

Summarize by groups

Collapsing data to a single row

stocks
## # A tibble: 252 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 NKE    2016-01-04  61.1  61.9  60.9  61.5 11626800     56.6
##  2 NKE    2016-01-05  61.7  62.6  61.7  62.4  9220600     57.4
##  3 NKE    2016-01-06  61.3  62.0  61.2  61.5  6551600     56.5
##  4 NKE    2016-01-07  60.4  61.3  59.8  59.8 10881300     55.0
##  5 NKE    2016-01-08  60.1  60.8  58.7  58.9 11191300     54.1
##  6 NKE    2016-01-11  59.0  60.0  58.5  59.5 12825000     54.7
##  7 NKE    2016-01-12  60.3  60.8  59.6  59.9  8292200     55.1
##  8 NKE    2016-01-13  60.5  60.5  58.7  58.8  9944300     54.0
##  9 NKE    2016-01-14  59    59.3  57.3  58.5  9989000     53.8
## 10 NKE    2016-01-15  56.9  58.1  56.6  57.6 12208300     52.9
## # ℹ 242 more rows
# average stock price
summarise(stocks, date = mean(low, na.rm = TRUE))
## # A tibble: 1 × 1
##    date
##   <dbl>
## 1  55.7

Summarize by group

stocks %>%
    
    # Group by Stock
    group_by(symbol) %>%
    
    # Calculate average Stock
    summarise(symbol = mean(low, na.rm = TRUE)) %>%
    
    # Sort it
    arrange(symbol)
## # A tibble: 1 × 1
##   symbol
##    <dbl>
## 1   55.7

Group by stock week 52 high and low

stocks %>%
    group_by(high) %>%
    summarise(count = n(),
              high = mean(adjusted, na.rm = TRUE),
              low = mean(adjusted, na.rm = TRUE)) %>%
   
     # plot
    ggplot(mapping = aes(x = high, y = low)) + 
    geom_point(aes(size = count), alpha = 0.3) +
    geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Missing values

stocks %>%
    
    # Remove missing values
    filter(!is.na(high))
## # A tibble: 252 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 NKE    2016-01-04  61.1  61.9  60.9  61.5 11626800     56.6
##  2 NKE    2016-01-05  61.7  62.6  61.7  62.4  9220600     57.4
##  3 NKE    2016-01-06  61.3  62.0  61.2  61.5  6551600     56.5
##  4 NKE    2016-01-07  60.4  61.3  59.8  59.8 10881300     55.0
##  5 NKE    2016-01-08  60.1  60.8  58.7  58.9 11191300     54.1
##  6 NKE    2016-01-11  59.0  60.0  58.5  59.5 12825000     54.7
##  7 NKE    2016-01-12  60.3  60.8  59.6  59.9  8292200     55.1
##  8 NKE    2016-01-13  60.5  60.5  58.7  58.8  9944300     54.0
##  9 NKE    2016-01-14  59    59.3  57.3  58.5  9989000     53.8
## 10 NKE    2016-01-15  56.9  58.1  56.6  57.6 12208300     52.9
## # ℹ 242 more rows

grouping multiple variables

stocks %>%
    #group_by(date, high, low) %>%
    summarise(count = n()) %>%
    ungroup()
## # A tibble: 1 × 1
##   count
##   <int>
## 1   252