Import stock prices

stocks <- tq_get(c("TSLA", "AMZN", "AAPL", "NVDA", "PG"),
                 get = "stock.prices",
                 from = "2016-01-01")
stocks
## # A tibble: 12,165 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 TSLA   2016-01-04  15.4  15.4  14.6  14.9 102406500     14.9
##  2 TSLA   2016-01-05  15.1  15.1  14.7  14.9  47802000     14.9
##  3 TSLA   2016-01-06  14.7  14.7  14.4  14.6  56686500     14.6
##  4 TSLA   2016-01-07  14.3  14.6  14.2  14.4  53314500     14.4
##  5 TSLA   2016-01-08  14.5  14.7  14.1  14.1  54421500     14.1
##  6 TSLA   2016-01-11  14.3  14.3  13.5  13.9  61371000     13.9
##  7 TSLA   2016-01-12  14.1  14.2  13.7  14.0  46378500     14.0
##  8 TSLA   2016-01-13  14.1  14.2  13.3  13.4  61896000     13.4
##  9 TSLA   2016-01-14  13.5  14    12.9  13.7  97360500     13.7
## 10 TSLA   2016-01-15  13.3  13.7  13.1  13.7  83679000     13.7
## # ℹ 12,155 more rows

Plot stock prices

stocks %>%
    
    ggplot(aes(x = date, y = adjusted, color = symbol)) +
    geom_line()

Apply the dplyr verbs you learned in chapter 5

Filter rows

filtered_stocks <- stocks %>% 
    filter(date >= "2022-01-01" | open - close <= 2)
filtered_stocks
## # A tibble: 11,794 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 TSLA   2016-01-04  15.4  15.4  14.6  14.9 102406500     14.9
##  2 TSLA   2016-01-05  15.1  15.1  14.7  14.9  47802000     14.9
##  3 TSLA   2016-01-06  14.7  14.7  14.4  14.6  56686500     14.6
##  4 TSLA   2016-01-07  14.3  14.6  14.2  14.4  53314500     14.4
##  5 TSLA   2016-01-08  14.5  14.7  14.1  14.1  54421500     14.1
##  6 TSLA   2016-01-11  14.3  14.3  13.5  13.9  61371000     13.9
##  7 TSLA   2016-01-12  14.1  14.2  13.7  14.0  46378500     14.0
##  8 TSLA   2016-01-13  14.1  14.2  13.3  13.4  61896000     13.4
##  9 TSLA   2016-01-14  13.5  14    12.9  13.7  97360500     13.7
## 10 TSLA   2016-01-15  13.3  13.7  13.1  13.7  83679000     13.7
## # ℹ 11,784 more rows

Arrange Rows

arrange(stocks, desc(date), desc(high))
## # A tibble: 12,165 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 TSLA   2025-09-05  348   356.  345.  351. 108989800     351.
##  2 AAPL   2025-09-05  240   241.  238.  240.  54870400     240.
##  3 AMZN   2025-09-05  235.  236   232.  232.  36721800     232.
##  4 NVDA   2025-09-05  168.  169.  164.  167. 224441400     167.
##  5 PG     2025-09-05  159.  161.  158.  160.   6436200     160.
##  6 TSLA   2025-09-04  336.  339.  331.  339.  60711000     339.
##  7 AAPL   2025-09-04  238.  240.  237.  240.  47549400     240.
##  8 AMZN   2025-09-04  231.  236.  231.  236.  59391800     236.
##  9 NVDA   2025-09-04  171.  172.  169.  172. 141670100     172.
## 10 PG     2025-09-04  159.  160.  158.  159.   6934900     159.
## # ℹ 12,155 more rows

Select columns

select(stocks, date, symbol, adjusted)
## # A tibble: 12,165 × 3
##    date       symbol adjusted
##    <date>     <chr>     <dbl>
##  1 2016-01-04 TSLA       14.9
##  2 2016-01-05 TSLA       14.9
##  3 2016-01-06 TSLA       14.6
##  4 2016-01-07 TSLA       14.4
##  5 2016-01-08 TSLA       14.1
##  6 2016-01-11 TSLA       13.9
##  7 2016-01-12 TSLA       14.0
##  8 2016-01-13 TSLA       13.4
##  9 2016-01-14 TSLA       13.7
## 10 2016-01-15 TSLA       13.7
## # ℹ 12,155 more rows

Add columns

mutate(stocks, 
       daily_returns = (adjusted - lag(adjusted))/lag(adjusted)) %>%
    
    # Select symbol, date, daily_returns
    select(symbol, date, daily_returns)
## # A tibble: 12,165 × 3
##    symbol date       daily_returns
##    <chr>  <date>             <dbl>
##  1 TSLA   2016-01-04    NA        
##  2 TSLA   2016-01-05     0.0000895
##  3 TSLA   2016-01-06    -0.0196   
##  4 TSLA   2016-01-07    -0.0155   
##  5 TSLA   2016-01-08    -0.0216   
##  6 TSLA   2016-01-11    -0.0149   
##  7 TSLA   2016-01-12     0.0102   
##  8 TSLA   2016-01-13    -0.0460   
##  9 TSLA   2016-01-14     0.0293   
## 10 TSLA   2016-01-15    -0.00577  
## # ℹ 12,155 more rows
# Using transmute() to only show daily_returns
transmute(stocks, daily_returns = (adjusted - lag(adjusted))/lag(adjusted))
## # A tibble: 12,165 × 1
##    daily_returns
##            <dbl>
##  1    NA        
##  2     0.0000895
##  3    -0.0196   
##  4    -0.0155   
##  5    -0.0216   
##  6    -0.0149   
##  7     0.0102   
##  8    -0.0460   
##  9     0.0293   
## 10    -0.00577  
## # ℹ 12,155 more rows

Summarize with groups

stocks %>%
    group_by(symbol) %>%
    summarise(count = n(), avg_monthly_return = mean((adjusted - lag(adjusted))/lag(adjusted), na.rm = TRUE))
## # A tibble: 5 × 3
##   symbol count avg_monthly_return
##   <chr>  <int>              <dbl>
## 1 AAPL    2433           0.00112 
## 2 AMZN    2433           0.00103 
## 3 NVDA    2433           0.00270 
## 4 PG      2433           0.000474
## 5 TSLA    2433           0.00200

Plot my findings

stocks %>%
  group_by(symbol) %>%
  summarise(avg_monthly_return = mean((adjusted - lag(adjusted))/lag(adjusted), na.rm = TRUE)) %>%
  ggplot(aes(x = symbol, y = avg_monthly_return)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(title = "Average Monthly Return by Stock", x = "Stock Symbol", y = "Average Monthly Return")