Import stock prices

stocks <- tq_get(c("MSFT", "JPM", "NVDA"),
                 get = "stock.prices",
                 from = "2016-01-01")
stocks
## # A tibble: 5,964 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 MSFT   2016-01-04  54.3  54.8  53.4  54.8 53778000     48.7
##  2 MSFT   2016-01-05  54.9  55.4  54.5  55.0 34079700     48.9
##  3 MSFT   2016-01-06  54.3  54.4  53.6  54.0 39518900     48.0
##  4 MSFT   2016-01-07  52.7  53.5  52.1  52.2 56564900     46.4
##  5 MSFT   2016-01-08  52.4  53.3  52.2  52.3 48754000     46.5
##  6 MSFT   2016-01-11  52.5  52.8  51.5  52.3 36943800     46.5
##  7 MSFT   2016-01-12  52.8  53.1  52.1  52.8 36095500     46.9
##  8 MSFT   2016-01-13  53.8  54.1  51.3  51.6 66883600     45.9
##  9 MSFT   2016-01-14  52    53.4  51.6  53.1 52381900     47.2
## 10 MSFT   2016-01-15  51.3  52.0  50.3  51.0 71820700     45.3
## # ℹ 5,954 more rows

Plot stock prices

stocks %>%
    
    ggplot(aes(x = date, y = adjusted, color = symbol)) +
    geom_line()

Apply the deplyr verbs you learned in chapter 5

Filter rows

filter(stocks, high == 1, low == 1)
## # A tibble: 0 × 8
## # ℹ 8 variables: symbol <chr>, date <date>, open <dbl>, high <dbl>, low <dbl>,
## #   close <dbl>, volume <dbl>, adjusted <dbl>
filter(stocks, high == 1 & low == 1)
## # A tibble: 0 × 8
## # ℹ 8 variables: symbol <chr>, date <date>, open <dbl>, high <dbl>, low <dbl>,
## #   close <dbl>, volume <dbl>, adjusted <dbl>
filter(stocks, high == 1 | low == 1)
## # A tibble: 0 × 8
## # ℹ 8 variables: symbol <chr>, date <date>, open <dbl>, high <dbl>, low <dbl>,
## #   close <dbl>, volume <dbl>, adjusted <dbl>

Arrange Rows

arrange(stocks, desc(high), desc(date()))
## # A tibble: 5,964 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 NVDA   2023-11-20  493.  505.  492.  504.  41412000     504.
##  2 NVDA   2023-11-21  501.  505.  492.  499.  56574700     499.
##  3 NVDA   2023-11-22  499.  503.  477.  487.  89942000     487.
##  4 NVDA   2023-08-24  502.  503.  472.  472. 115604400     472.
##  5 NVDA   2023-11-15  499.  500.  482   489.  47549700     489.
##  6 NVDA   2023-08-30  490.  499.  484.  493.  73520600     493.
##  7 NVDA   2023-11-14  497.  498.  490.  497.  41695400     497.
##  8 NVDA   2023-09-01  498.  498   481.  485.  46319100     485.
##  9 NVDA   2023-08-31  494.  497.  490.  494.  52857000     494.
## 10 NVDA   2023-11-17  495.  497.  490.  493.  32520500     493.
## # ℹ 5,954 more rows

Select Columns

select(stocks, high)
## # A tibble: 5,964 × 1
##     high
##    <dbl>
##  1  54.8
##  2  55.4
##  3  54.4
##  4  53.5
##  5  53.3
##  6  52.8
##  7  53.1
##  8  54.1
##  9  53.4
## 10  52.0
## # ℹ 5,954 more rows
select(stocks, high, low, date)
## # A tibble: 5,964 × 3
##     high   low date      
##    <dbl> <dbl> <date>    
##  1  54.8  53.4 2016-01-04
##  2  55.4  54.5 2016-01-05
##  3  54.4  53.6 2016-01-06
##  4  53.5  52.1 2016-01-07
##  5  53.3  52.2 2016-01-08
##  6  52.8  51.5 2016-01-11
##  7  53.1  52.1 2016-01-12
##  8  54.1  51.3 2016-01-13
##  9  53.4  51.6 2016-01-14
## 10  52.0  50.3 2016-01-15
## # ℹ 5,954 more rows
select(stocks, high, low, date)
## # A tibble: 5,964 × 3
##     high   low date      
##    <dbl> <dbl> <date>    
##  1  54.8  53.4 2016-01-04
##  2  55.4  54.5 2016-01-05
##  3  54.4  53.6 2016-01-06
##  4  53.5  52.1 2016-01-07
##  5  53.3  52.2 2016-01-08
##  6  52.8  51.5 2016-01-11
##  7  53.1  52.1 2016-01-12
##  8  54.1  51.3 2016-01-13
##  9  53.4  51.6 2016-01-14
## 10  52.0  50.3 2016-01-15
## # ℹ 5,954 more rows

Add Columns

mutate(stocks,
       high = date - volume) %>%
    
    #select year, month, day, and gain
    select(symbol:date, high)
## # A tibble: 5,964 × 3
##    symbol date       high         
##    <chr>  <date>     <date>       
##  1 MSFT   2016-01-04 -145224-11-05
##  2 MSFT   2016-01-05 -91292-12-18 
##  3 MSFT   2016-01-06 -106184-12-10
##  4 MSFT   2016-01-07 -152854-08-01
##  5 MSFT   2016-01-08 -131468-02-07
##  6 MSFT   2016-01-11 -99133-05-04 
##  7 MSFT   2016-01-12 -96811-11-28 
##  8 MSFT   2016-01-13 -181106-12-15
##  9 MSFT   2016-01-14 -141401-04-07
## 10 MSFT   2016-01-15 -194623-08-22
## # ℹ 5,954 more rows
#just keep gain
mutate(stocks,
       high = date - low) %>%
    
    #select year, month, day, and gain
    select(low)
## # A tibble: 5,964 × 1
##      low
##    <dbl>
##  1  53.4
##  2  54.5
##  3  53.6
##  4  52.1
##  5  52.2
##  6  51.5
##  7  52.1
##  8  51.3
##  9  51.6
## 10  50.3
## # ℹ 5,954 more rows
#alternative using transmute()
transmute(stocks,
          high = date - low)
## # A tibble: 5,964 × 1
##    high      
##    <date>    
##  1 2015-11-11
##  2 2015-11-11
##  3 2015-11-13
##  4 2015-11-15
##  5 2015-11-16
##  6 2015-11-20
##  7 2015-11-20
##  8 2015-11-22
##  9 2015-11-23
## 10 2015-11-25
## # ℹ 5,954 more rows

Summarize with groups

stocks
## # A tibble: 5,964 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 MSFT   2016-01-04  54.3  54.8  53.4  54.8 53778000     48.7
##  2 MSFT   2016-01-05  54.9  55.4  54.5  55.0 34079700     48.9
##  3 MSFT   2016-01-06  54.3  54.4  53.6  54.0 39518900     48.0
##  4 MSFT   2016-01-07  52.7  53.5  52.1  52.2 56564900     46.4
##  5 MSFT   2016-01-08  52.4  53.3  52.2  52.3 48754000     46.5
##  6 MSFT   2016-01-11  52.5  52.8  51.5  52.3 36943800     46.5
##  7 MSFT   2016-01-12  52.8  53.1  52.1  52.8 36095500     46.9
##  8 MSFT   2016-01-13  53.8  54.1  51.3  51.6 66883600     45.9
##  9 MSFT   2016-01-14  52    53.4  51.6  53.1 52381900     47.2
## 10 MSFT   2016-01-15  51.3  52.0  50.3  51.0 71820700     45.3
## # ℹ 5,954 more rows
# average stock price
summarise(stocks, date = mean(low, na.rm = TRUE))
## # A tibble: 1 × 1
##    date
##   <dbl>
## 1  134.
stocks %>%
    
    # Group by Stock
    group_by(symbol) %>%
    
    # Calculate average Stock
    summarise(symbol = mean(low, na.rm = TRUE)) %>%
    
    # Sort it
    arrange(symbol)
## # A tibble: 3 × 1
##   symbol
##    <dbl>
## 1   113.
## 2   118.
## 3   172.

Group by stock week 52 high and low

stocks %>%
    group_by(high) %>%
    summarise(count = n(),
              high = mean(adjusted, na.rm = TRUE),
              low = mean(adjusted, na.rm = TRUE)) %>%
   
     # plot
    ggplot(mapping = aes(x = high, y = low)) + 
    geom_point(aes(size = count), alpha = 0.3) +
    geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

Missing Values

stocks %>%
    
    # Remove missing values
    filter(!is.na(high))
## # A tibble: 5,964 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 MSFT   2016-01-04  54.3  54.8  53.4  54.8 53778000     48.7
##  2 MSFT   2016-01-05  54.9  55.4  54.5  55.0 34079700     48.9
##  3 MSFT   2016-01-06  54.3  54.4  53.6  54.0 39518900     48.0
##  4 MSFT   2016-01-07  52.7  53.5  52.1  52.2 56564900     46.4
##  5 MSFT   2016-01-08  52.4  53.3  52.2  52.3 48754000     46.5
##  6 MSFT   2016-01-11  52.5  52.8  51.5  52.3 36943800     46.5
##  7 MSFT   2016-01-12  52.8  53.1  52.1  52.8 36095500     46.9
##  8 MSFT   2016-01-13  53.8  54.1  51.3  51.6 66883600     45.9
##  9 MSFT   2016-01-14  52    53.4  51.6  53.1 52381900     47.2
## 10 MSFT   2016-01-15  51.3  52.0  50.3  51.0 71820700     45.3
## # ℹ 5,954 more rows

Grouping multiple variables

stocks %>%
    group_by(date, high, low) %>%
    summarise(count = n()) %>%
    ungroup()
## `summarise()` has grouped output by 'date', 'high'. You can override using the
## `.groups` argument.
## # A tibble: 5,964 × 4
##    date        high   low count
##    <date>     <dbl> <dbl> <int>
##  1 2016-01-04  8.15  8.01     1
##  2 2016-01-04 54.8  53.4      1
##  3 2016-01-04 64.1  63.0      1
##  4 2016-01-05  8.36  8.12     1
##  5 2016-01-05 55.4  54.5      1
##  6 2016-01-05 64.1  63.0      1
##  7 2016-01-06  8.12  7.79     1
##  8 2016-01-06 54.4  53.6      1
##  9 2016-01-06 63.1  62.3      1
## 10 2016-01-07  7.74  7.47     1
## # ℹ 5,954 more rows