Import stock prices

stocks <- tq_get(c("TSLA", "AMZN"),
                 get = "stock.prices",
                 from = "2016-01-01")
stocks
## # A tibble: 3,878 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 TSLA   2016-01-04  15.4  15.4  14.6  14.9 102406500     14.9
##  2 TSLA   2016-01-05  15.1  15.1  14.7  14.9  47802000     14.9
##  3 TSLA   2016-01-06  14.7  14.7  14.4  14.6  56686500     14.6
##  4 TSLA   2016-01-07  14.3  14.6  14.2  14.4  53314500     14.4
##  5 TSLA   2016-01-08  14.5  14.7  14.1  14.1  54421500     14.1
##  6 TSLA   2016-01-11  14.3  14.3  13.5  13.9  61371000     13.9
##  7 TSLA   2016-01-12  14.1  14.2  13.7  14.0  46378500     14.0
##  8 TSLA   2016-01-13  14.1  14.2  13.3  13.4  61896000     13.4
##  9 TSLA   2016-01-14  13.5  14    12.9  13.7  97360500     13.7
## 10 TSLA   2016-01-15  13.3  13.7  13.1  13.7  83679000     13.7
## # ℹ 3,868 more rows

Plot stock prices

stocks %>%
    
    ggplot(aes(x = date, y = adjusted, color = symbol)) +
    geom_line()

apply the dplyer verbs you learned in chapter 5

Filter rows

filter(stocks,high==1,low==1)
## # A tibble: 0 × 8
## # ℹ 8 variables: symbol <chr>, date <date>, open <dbl>, high <dbl>, low <dbl>,
## #   close <dbl>, volume <dbl>, adjusted <dbl>

Arrange rows

arrange(stocks, desc(low), desc(date))
## # A tibble: 3,878 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 TSLA   2021-11-04  411.  414.  406.  410.  76192200     410.
##  2 TSLA   2021-11-05  409.  413.  403.  407.  64886400     407.
##  3 TSLA   2021-11-03  392.  405.  384.  405. 103885500     405.
##  4 TSLA   2021-11-02  386.  403.  382   391. 128213400     391.
##  5 TSLA   2022-01-03  383.  400.  379.  400. 103931400     400.
##  6 TSLA   2021-11-08  383.  399   378.  388. 100337100     388.
##  7 TSLA   2021-11-22  387.  401.  377.  386.  99217500     386.
##  8 TSLA   2022-01-04  397.  403.  374.  383. 100248300     383.
##  9 TSLA   2021-11-01  382.  403.  373.  403. 168146100     403.
## 10 TSLA   2021-11-30  381.  389.  373.  382.  81276000     382.
## # ℹ 3,868 more rows

Select columns

select(stocks, high)
## # A tibble: 3,878 × 1
##     high
##    <dbl>
##  1  15.4
##  2  15.1
##  3  14.7
##  4  14.6
##  5  14.7
##  6  14.3
##  7  14.2
##  8  14.2
##  9  14  
## 10  13.7
## # ℹ 3,868 more rows
select(stocks, high, low, date)
## # A tibble: 3,878 × 3
##     high   low date      
##    <dbl> <dbl> <date>    
##  1  15.4  14.6 2016-01-04
##  2  15.1  14.7 2016-01-05
##  3  14.7  14.4 2016-01-06
##  4  14.6  14.2 2016-01-07
##  5  14.7  14.1 2016-01-08
##  6  14.3  13.5 2016-01-11
##  7  14.2  13.7 2016-01-12
##  8  14.2  13.3 2016-01-13
##  9  14    12.9 2016-01-14
## 10  13.7  13.1 2016-01-15
## # ℹ 3,868 more rows

Add colums

mutate(stocks,
       high = date - low) %>%
    
    # Select high, low, date, and volume
    select(high:date, high)
## # A tibble: 3,878 × 3
##    high        open date      
##    <date>     <dbl> <date>    
##  1 2015-12-20  15.4 2016-01-04
##  2 2015-12-21  15.1 2016-01-05
##  3 2015-12-22  14.7 2016-01-06
##  4 2015-12-23  14.3 2016-01-07
##  5 2015-12-24  14.5 2016-01-08
##  6 2015-12-28  14.3 2016-01-11
##  7 2015-12-29  14.1 2016-01-12
##  8 2015-12-30  14.1 2016-01-13
##  9 2016-01-01  13.5 2016-01-14
## 10 2016-01-01  13.3 2016-01-15
## # ℹ 3,868 more rows

Just keep gain

mutate(stocks,
       high = date - low) %>%
    
    # Select high, low, date, and volume
    select(adjusted)
## # A tibble: 3,878 × 1
##    adjusted
##       <dbl>
##  1     14.9
##  2     14.9
##  3     14.6
##  4     14.4
##  5     14.1
##  6     13.9
##  7     14.0
##  8     13.4
##  9     13.7
## 10     13.7
## # ℹ 3,868 more rows

alternative using transmute()

transmute(stocks, 
          high = date - low)
## # A tibble: 3,878 × 1
##    high      
##    <date>    
##  1 2015-12-20
##  2 2015-12-21
##  3 2015-12-22
##  4 2015-12-23
##  5 2015-12-24
##  6 2015-12-28
##  7 2015-12-29
##  8 2015-12-30
##  9 2016-01-01
## 10 2016-01-01
## # ℹ 3,868 more rows

Sumarise with groups

stocks
## # A tibble: 3,878 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 TSLA   2016-01-04  15.4  15.4  14.6  14.9 102406500     14.9
##  2 TSLA   2016-01-05  15.1  15.1  14.7  14.9  47802000     14.9
##  3 TSLA   2016-01-06  14.7  14.7  14.4  14.6  56686500     14.6
##  4 TSLA   2016-01-07  14.3  14.6  14.2  14.4  53314500     14.4
##  5 TSLA   2016-01-08  14.5  14.7  14.1  14.1  54421500     14.1
##  6 TSLA   2016-01-11  14.3  14.3  13.5  13.9  61371000     13.9
##  7 TSLA   2016-01-12  14.1  14.2  13.7  14.0  46378500     14.0
##  8 TSLA   2016-01-13  14.1  14.2  13.3  13.4  61896000     13.4
##  9 TSLA   2016-01-14  13.5  14    12.9  13.7  97360500     13.7
## 10 TSLA   2016-01-15  13.3  13.7  13.1  13.7  83679000     13.7
## # ℹ 3,868 more rows
# average stock price
summarise(stocks, date = mean(low, na.rm = TRUE))
## # A tibble: 1 × 1
##    date
##   <dbl>
## 1  102.

collapsing data to a single row

stocks
## # A tibble: 3,878 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 TSLA   2016-01-04  15.4  15.4  14.6  14.9 102406500     14.9
##  2 TSLA   2016-01-05  15.1  15.1  14.7  14.9  47802000     14.9
##  3 TSLA   2016-01-06  14.7  14.7  14.4  14.6  56686500     14.6
##  4 TSLA   2016-01-07  14.3  14.6  14.2  14.4  53314500     14.4
##  5 TSLA   2016-01-08  14.5  14.7  14.1  14.1  54421500     14.1
##  6 TSLA   2016-01-11  14.3  14.3  13.5  13.9  61371000     13.9
##  7 TSLA   2016-01-12  14.1  14.2  13.7  14.0  46378500     14.0
##  8 TSLA   2016-01-13  14.1  14.2  13.3  13.4  61896000     13.4
##  9 TSLA   2016-01-14  13.5  14    12.9  13.7  97360500     13.7
## 10 TSLA   2016-01-15  13.3  13.7  13.1  13.7  83679000     13.7
## # ℹ 3,868 more rows
# average stock price
summarise(stocks, date = mean(low, na.rm = TRUE))
## # A tibble: 1 × 1
##    date
##   <dbl>
## 1  102.

summarise by group

stocks %>%
    
    # group by stock
    group_by(symbol) %>%
    
    # calculate average stock
    summarise(symbol = mean(low, na.rm = TRUE)) %>%
    
    # sort it
    arrange(symbol)
## # A tibble: 2 × 1
##   symbol
##    <dbl>
## 1   97.8
## 2  106.

group by stock week 52 high and low

stocks %>%
    group_by(high) %>%
    summarise(count = n(),
              high = mean(adjusted, na.rm = TRUE),
              low = mean(adjusted, na.rm = TRUE)) %>%
    
    
    # plot
    ggplot(mapping = aes(x = high, y = low)) +
    geom_point(aes(size = count), alpha = 0.3) +
    geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

missing values

stocks %>%
    
    # remove missing values
    filter(!is.na(high))
## # A tibble: 3,878 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 TSLA   2016-01-04  15.4  15.4  14.6  14.9 102406500     14.9
##  2 TSLA   2016-01-05  15.1  15.1  14.7  14.9  47802000     14.9
##  3 TSLA   2016-01-06  14.7  14.7  14.4  14.6  56686500     14.6
##  4 TSLA   2016-01-07  14.3  14.6  14.2  14.4  53314500     14.4
##  5 TSLA   2016-01-08  14.5  14.7  14.1  14.1  54421500     14.1
##  6 TSLA   2016-01-11  14.3  14.3  13.5  13.9  61371000     13.9
##  7 TSLA   2016-01-12  14.1  14.2  13.7  14.0  46378500     14.0
##  8 TSLA   2016-01-13  14.1  14.2  13.3  13.4  61896000     13.4
##  9 TSLA   2016-01-14  13.5  14    12.9  13.7  97360500     13.7
## 10 TSLA   2016-01-15  13.3  13.7  13.1  13.7  83679000     13.7
## # ℹ 3,868 more rows

grouping multiple variables

stocks %>%
    #group_by(date, high, low) %>%
    summarise(count = n()) %>%
    ungroup()
## # A tibble: 1 × 1
##   count
##   <int>
## 1  3878