Import stock prices

stocks <- tq_get(c("TSLA", "META", "XOM", "AAPL", "PG", "AMZN"),
                 get = "stock.prices",
                 from = "2016-01-01")
stocks
## # A tibble: 11,118 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 TSLA   2016-01-04  15.4  15.4  14.6  14.9 102406500     14.9
##  2 TSLA   2016-01-05  15.1  15.1  14.7  14.9  47802000     14.9
##  3 TSLA   2016-01-06  14.7  14.7  14.4  14.6  56686500     14.6
##  4 TSLA   2016-01-07  14.3  14.6  14.2  14.4  53314500     14.4
##  5 TSLA   2016-01-08  14.5  14.7  14.1  14.1  54421500     14.1
##  6 TSLA   2016-01-11  14.3  14.3  13.5  13.9  61371000     13.9
##  7 TSLA   2016-01-12  14.1  14.2  13.7  14.0  46378500     14.0
##  8 TSLA   2016-01-13  14.1  14.2  13.3  13.4  61896000     13.4
##  9 TSLA   2016-01-14  13.5  14    12.9  13.7  97360500     13.7
## 10 TSLA   2016-01-15  13.3  13.7  13.1  13.7  83679000     13.7
## # ℹ 11,108 more rows

Plot stock prices

stocks %>%
    
    ggplot(aes(x = date, y = adjusted, color = symbol)) +
    geom_line() +
    labs(title = "Stock Prices", x = "Date", y = "Adjusted Prices")

Apply the dplyr verbs you learned in chapter 5

Filter rows

filtered_stocks <- stocks %>% 
    filter(date >= "2022-01-01" | open - close <= 2)
filtered_stocks
## # A tibble: 10,451 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 TSLA   2016-01-04  15.4  15.4  14.6  14.9 102406500     14.9
##  2 TSLA   2016-01-05  15.1  15.1  14.7  14.9  47802000     14.9
##  3 TSLA   2016-01-06  14.7  14.7  14.4  14.6  56686500     14.6
##  4 TSLA   2016-01-07  14.3  14.6  14.2  14.4  53314500     14.4
##  5 TSLA   2016-01-08  14.5  14.7  14.1  14.1  54421500     14.1
##  6 TSLA   2016-01-11  14.3  14.3  13.5  13.9  61371000     13.9
##  7 TSLA   2016-01-12  14.1  14.2  13.7  14.0  46378500     14.0
##  8 TSLA   2016-01-13  14.1  14.2  13.3  13.4  61896000     13.4
##  9 TSLA   2016-01-14  13.5  14    12.9  13.7  97360500     13.7
## 10 TSLA   2016-01-15  13.3  13.7  13.1  13.7  83679000     13.7
## # ℹ 10,441 more rows

Arrange rows

arrange(stocks, desc(date), desc(high))
## # A tibble: 11,118 × 8
##    symbol date        open  high   low close    volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
##  1 META   2023-05-12  237.  237.  231.  234.  16155300     234.
##  2 TSLA   2023-05-12  176.  177.  167.  168. 157577100     168.
##  3 AAPL   2023-05-12  174.  174.  171   173.  45497800     173.
##  4 PG     2023-05-12  155.  156.  154.  156.   5252200     156.
##  5 AMZN   2023-05-12  112.  113.  109.  110.  49810100     110.
##  6 XOM    2023-05-12  106.  107.  105.  106.  12608300     106.
##  7 META   2023-05-11  233.  238.  232.  236.  20449000     236.
##  8 AAPL   2023-05-11  174.  175.  172.  174.  49514700     174.
##  9 TSLA   2023-05-11  169.  174.  167.  172. 103889900     172.
## 10 PG     2023-05-11  155.  155.  153.  154.   3495300     154.
## # ℹ 11,108 more rows

Select columns

select(stocks, date, symbol, adjusted)
## # A tibble: 11,118 × 3
##    date       symbol adjusted
##    <date>     <chr>     <dbl>
##  1 2016-01-04 TSLA       14.9
##  2 2016-01-05 TSLA       14.9
##  3 2016-01-06 TSLA       14.6
##  4 2016-01-07 TSLA       14.4
##  5 2016-01-08 TSLA       14.1
##  6 2016-01-11 TSLA       13.9
##  7 2016-01-12 TSLA       14.0
##  8 2016-01-13 TSLA       13.4
##  9 2016-01-14 TSLA       13.7
## 10 2016-01-15 TSLA       13.7
## # ℹ 11,108 more rows

Add columns

mutate(stocks, 
       daily_returns = (adjusted - lag(adjusted))/lag(adjusted)) %>%
    
    # Select symbol, date, daily_returns
    select(symbol, date, daily_returns)
## # A tibble: 11,118 × 3
##    symbol date       daily_returns
##    <chr>  <date>             <dbl>
##  1 TSLA   2016-01-04    NA        
##  2 TSLA   2016-01-05     0.0000895
##  3 TSLA   2016-01-06    -0.0196   
##  4 TSLA   2016-01-07    -0.0155   
##  5 TSLA   2016-01-08    -0.0216   
##  6 TSLA   2016-01-11    -0.0149   
##  7 TSLA   2016-01-12     0.0102   
##  8 TSLA   2016-01-13    -0.0460   
##  9 TSLA   2016-01-14     0.0293   
## 10 TSLA   2016-01-15    -0.00577  
## # ℹ 11,108 more rows
# Using transmute() to only show daily_returns
transmute(stocks, daily_returns = (adjusted - lag(adjusted))/lag(adjusted))
## # A tibble: 11,118 × 1
##    daily_returns
##            <dbl>
##  1    NA        
##  2     0.0000895
##  3    -0.0196   
##  4    -0.0155   
##  5    -0.0216   
##  6    -0.0149   
##  7     0.0102   
##  8    -0.0460   
##  9     0.0293   
## 10    -0.00577  
## # ℹ 11,108 more rows

Summarise with groups

stocks %>%
    group_by(symbol) %>%
    summarise(count = n(), avg_monthly_return = mean((adjusted - lag(adjusted))/lag(adjusted), na.rm = TRUE))
## # A tibble: 6 × 3
##   symbol count avg_monthly_return
##   <chr>  <int>              <dbl>
## 1 AAPL    1853           0.00124 
## 2 AMZN    1853           0.000892
## 3 META    1853           0.000763
## 4 PG      1853           0.000561
## 5 TSLA    1853           0.00200 
## 6 XOM     1853           0.000529

Plotting my findings

stocks %>%
  group_by(symbol) %>%
  summarise(avg_monthly_return = mean((adjusted - lag(adjusted))/lag(adjusted), na.rm = TRUE)) %>%
  ggplot(aes(x = symbol, y = avg_monthly_return)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(title = "Average Monthly Return by Stock", x = "Stock Symbol", y = "Average Monthly Return")