Import stock prices

stocks <- tq_get(c("HD", "LOW"),
                 get = "stock.prices",
                 from = "2016-01-01")
stocks
## # A tibble: 3,878 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 HD     2016-01-04  130.  131.  129.  131.  8050900    109. 
##  2 HD     2016-01-05  131.  132.  130.  130.  5320900    109. 
##  3 HD     2016-01-06  129   130.  129.  129.  8199600    108. 
##  4 HD     2016-01-07  127.  128.  125.  125. 12551000    104. 
##  5 HD     2016-01-08  126.  127.  124.  124.  7104500    103. 
##  6 HD     2016-01-11  125.  126.  124.  126.  6811600    105. 
##  7 HD     2016-01-12  127.  129.  126.  127.  5695700    106. 
##  8 HD     2016-01-13  127.  127.  121.  121.  8932200    101. 
##  9 HD     2016-01-14  121.  122.  118.  120. 13389100     99.7
## 10 HD     2016-01-15  117.  121.  116.  119. 10515000     99.3
## # ℹ 3,868 more rows

Plot stock prices

stocks %>%
    
    ggplot(aes(x = date, y = adjusted, color = symbol)) +
    geom_line()

apply the dplyer verbs you learned in chapter 5

Filter rows

filter(stocks, high ==1, low ==1)
## # A tibble: 0 × 8
## # ℹ 8 variables: symbol <chr>, date <date>, open <dbl>, high <dbl>, low <dbl>,
## #   close <dbl>, volume <dbl>, adjusted <dbl>

Arange rows

arrange(stocks, desc(low), desc(date))
## # A tibble: 3,878 × 8
##    symbol date        open  high   low close  volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>   <dbl>    <dbl>
##  1 HD     2021-12-07  417   419.  413.  416. 4204500     398.
##  2 HD     2021-12-06  414.  421.  411.  416. 4624500     397.
##  3 HD     2021-12-10  412.  416.  410.  415. 2625200     397.
##  4 HD     2021-12-31  410.  417.  410.  415. 2386100     397.
##  5 HD     2021-11-22  410.  417.  409.  409. 4006400     389.
##  6 HD     2021-12-30  412.  413.  408.  410. 1804400     392.
##  7 HD     2021-12-09  411.  415.  408.  411. 3822900     393.
##  8 HD     2021-12-08  416.  420.  408.  411. 4079400     393.
##  9 HD     2022-01-04  409.  414.  408.  413. 3266500     394.
## 10 HD     2022-01-05  414   414.  407.  407. 4443200     389.
## # ℹ 3,868 more rows

Select columns

select(stocks, high)
## # A tibble: 3,878 × 1
##     high
##    <dbl>
##  1  131.
##  2  132.
##  3  130.
##  4  128.
##  5  127.
##  6  126.
##  7  129.
##  8  127.
##  9  122.
## 10  121.
## # ℹ 3,868 more rows
select(stocks, high, low, date)
## # A tibble: 3,878 × 3
##     high   low date      
##    <dbl> <dbl> <date>    
##  1  131.  129. 2016-01-04
##  2  132.  130. 2016-01-05
##  3  130.  129. 2016-01-06
##  4  128.  125. 2016-01-07
##  5  127.  124. 2016-01-08
##  6  126.  124. 2016-01-11
##  7  129.  126. 2016-01-12
##  8  127.  121. 2016-01-13
##  9  122.  118. 2016-01-14
## 10  121.  116. 2016-01-15
## # ℹ 3,868 more rows

Add colums

mutate(stocks, high = date - low) %>%

# Select year, month, date, and gain
select(symbol:date, high)

Just keep gain

mutate(stocks, high = date - low) %>%

# Select high, low, date, and gain
select(gain)

alternative using transmute()

transmute(stocks, high = date - low) # Summarize by groups

Collapsing data to a single row

stocks
## # A tibble: 3,878 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 HD     2016-01-04  130.  131.  129.  131.  8050900    109. 
##  2 HD     2016-01-05  131.  132.  130.  130.  5320900    109. 
##  3 HD     2016-01-06  129   130.  129.  129.  8199600    108. 
##  4 HD     2016-01-07  127.  128.  125.  125. 12551000    104. 
##  5 HD     2016-01-08  126.  127.  124.  124.  7104500    103. 
##  6 HD     2016-01-11  125.  126.  124.  126.  6811600    105. 
##  7 HD     2016-01-12  127.  129.  126.  127.  5695700    106. 
##  8 HD     2016-01-13  127.  127.  121.  121.  8932200    101. 
##  9 HD     2016-01-14  121.  122.  118.  120. 13389100     99.7
## 10 HD     2016-01-15  117.  121.  116.  119. 10515000     99.3
## # ℹ 3,868 more rows
# average stock price
summarise(stocks, date = mean(low, na.rm = TRUE))
## # A tibble: 1 × 1
##    date
##   <dbl>
## 1  182.

Summarize by group

stocks %>%
    
    # Group by stock
    group_by(symbol) %>%
    
    # Calculate average stock
    summarise(symbol = mean(low, na.rm = TRUE)) %>%
    
    # Sort it
    arrange(symbol)
## # A tibble: 2 × 1
##   symbol
##    <dbl>
## 1   134.
## 2   229.

Group by stock week 52 high and low

stocks %>%
    group_by(high) %>%
    summarise(count = n(),
              high = mean(adjusted, na.rm = TRUE),
              low = mean(adjusted, na.rm = TRUE)) %>%
           
    
    # Plot
    ggplot(mapping = aes(x = high, y = low)) +
    geom_point(aes(size = count), alpha = 0.3) +
    geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

Missing values

stocks %>%
    
    # Remove missing values
    filter(!is.na(high))
## # A tibble: 3,878 × 8
##    symbol date        open  high   low close   volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>    <dbl>    <dbl>
##  1 HD     2016-01-04  130.  131.  129.  131.  8050900    109. 
##  2 HD     2016-01-05  131.  132.  130.  130.  5320900    109. 
##  3 HD     2016-01-06  129   130.  129.  129.  8199600    108. 
##  4 HD     2016-01-07  127.  128.  125.  125. 12551000    104. 
##  5 HD     2016-01-08  126.  127.  124.  124.  7104500    103. 
##  6 HD     2016-01-11  125.  126.  124.  126.  6811600    105. 
##  7 HD     2016-01-12  127.  129.  126.  127.  5695700    106. 
##  8 HD     2016-01-13  127.  127.  121.  121.  8932200    101. 
##  9 HD     2016-01-14  121.  122.  118.  120. 13389100     99.7
## 10 HD     2016-01-15  117.  121.  116.  119. 10515000     99.3
## # ℹ 3,868 more rows

grouping multiple varibables

stocks %>%
    #group_by(date, high, low) %>%
    summarise(count = n()) %>%
    ungroup()
## # A tibble: 1 × 1
##   count
##   <int>
## 1  3878