Import stock prices
stocks <- tq_get(c("TSLA", "AMZN", "AAPL", "NVDA", "PG"),
get = "stock.prices",
from = "2016-01-01")
stocks
## # A tibble: 12,165 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 TSLA 2016-01-04 15.4 15.4 14.6 14.9 102406500 14.9
## 2 TSLA 2016-01-05 15.1 15.1 14.7 14.9 47802000 14.9
## 3 TSLA 2016-01-06 14.7 14.7 14.4 14.6 56686500 14.6
## 4 TSLA 2016-01-07 14.3 14.6 14.2 14.4 53314500 14.4
## 5 TSLA 2016-01-08 14.5 14.7 14.1 14.1 54421500 14.1
## 6 TSLA 2016-01-11 14.3 14.3 13.5 13.9 61371000 13.9
## 7 TSLA 2016-01-12 14.1 14.2 13.7 14.0 46378500 14.0
## 8 TSLA 2016-01-13 14.1 14.2 13.3 13.4 61896000 13.4
## 9 TSLA 2016-01-14 13.5 14 12.9 13.7 97360500 13.7
## 10 TSLA 2016-01-15 13.3 13.7 13.1 13.7 83679000 13.7
## # ℹ 12,155 more rows
Apply the dplyr verbs you learned in chapter 5
Filter rows
filtered_stocks <- stocks %>%
filter(date >= "2022-01-01" | open - close <= 2)
filtered_stocks
## # A tibble: 11,794 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 TSLA 2016-01-04 15.4 15.4 14.6 14.9 102406500 14.9
## 2 TSLA 2016-01-05 15.1 15.1 14.7 14.9 47802000 14.9
## 3 TSLA 2016-01-06 14.7 14.7 14.4 14.6 56686500 14.6
## 4 TSLA 2016-01-07 14.3 14.6 14.2 14.4 53314500 14.4
## 5 TSLA 2016-01-08 14.5 14.7 14.1 14.1 54421500 14.1
## 6 TSLA 2016-01-11 14.3 14.3 13.5 13.9 61371000 13.9
## 7 TSLA 2016-01-12 14.1 14.2 13.7 14.0 46378500 14.0
## 8 TSLA 2016-01-13 14.1 14.2 13.3 13.4 61896000 13.4
## 9 TSLA 2016-01-14 13.5 14 12.9 13.7 97360500 13.7
## 10 TSLA 2016-01-15 13.3 13.7 13.1 13.7 83679000 13.7
## # ℹ 11,784 more rows
Arrange Rows
arrange(stocks, desc(date), desc(high))
## # A tibble: 12,165 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 TSLA 2025-09-05 348 356. 345. 351. 108989800 351.
## 2 AAPL 2025-09-05 240 241. 238. 240. 54870400 240.
## 3 AMZN 2025-09-05 235. 236 232. 232. 36721800 232.
## 4 NVDA 2025-09-05 168. 169. 164. 167. 224441400 167.
## 5 PG 2025-09-05 159. 161. 158. 160. 6436200 160.
## 6 TSLA 2025-09-04 336. 339. 331. 339. 60711000 339.
## 7 AAPL 2025-09-04 238. 240. 237. 240. 47549400 240.
## 8 AMZN 2025-09-04 231. 236. 231. 236. 59391800 236.
## 9 NVDA 2025-09-04 171. 172. 169. 172. 141670100 172.
## 10 PG 2025-09-04 159. 160. 158. 159. 6934900 159.
## # ℹ 12,155 more rows
Select columns
select(stocks, date, symbol, adjusted)
## # A tibble: 12,165 × 3
## date symbol adjusted
## <date> <chr> <dbl>
## 1 2016-01-04 TSLA 14.9
## 2 2016-01-05 TSLA 14.9
## 3 2016-01-06 TSLA 14.6
## 4 2016-01-07 TSLA 14.4
## 5 2016-01-08 TSLA 14.1
## 6 2016-01-11 TSLA 13.9
## 7 2016-01-12 TSLA 14.0
## 8 2016-01-13 TSLA 13.4
## 9 2016-01-14 TSLA 13.7
## 10 2016-01-15 TSLA 13.7
## # ℹ 12,155 more rows
Add columns
mutate(stocks,
daily_returns = (adjusted - lag(adjusted))/lag(adjusted)) %>%
# Select symbol, date, daily_returns
select(symbol, date, daily_returns)
## # A tibble: 12,165 × 3
## symbol date daily_returns
## <chr> <date> <dbl>
## 1 TSLA 2016-01-04 NA
## 2 TSLA 2016-01-05 0.0000895
## 3 TSLA 2016-01-06 -0.0196
## 4 TSLA 2016-01-07 -0.0155
## 5 TSLA 2016-01-08 -0.0216
## 6 TSLA 2016-01-11 -0.0149
## 7 TSLA 2016-01-12 0.0102
## 8 TSLA 2016-01-13 -0.0460
## 9 TSLA 2016-01-14 0.0293
## 10 TSLA 2016-01-15 -0.00577
## # ℹ 12,155 more rows
# Using transmute() to only show daily_returns
transmute(stocks, daily_returns = (adjusted - lag(adjusted))/lag(adjusted))
## # A tibble: 12,165 × 1
## daily_returns
## <dbl>
## 1 NA
## 2 0.0000895
## 3 -0.0196
## 4 -0.0155
## 5 -0.0216
## 6 -0.0149
## 7 0.0102
## 8 -0.0460
## 9 0.0293
## 10 -0.00577
## # ℹ 12,155 more rows
Summarize with groups
stocks %>%
group_by(symbol) %>%
summarise(count = n(), avg_monthly_return = mean((adjusted - lag(adjusted))/lag(adjusted), na.rm = TRUE))
## # A tibble: 5 × 3
## symbol count avg_monthly_return
## <chr> <int> <dbl>
## 1 AAPL 2433 0.00112
## 2 AMZN 2433 0.00103
## 3 NVDA 2433 0.00270
## 4 PG 2433 0.000474
## 5 TSLA 2433 0.00200
Plot my findings
stocks %>%
group_by(symbol) %>%
summarise(avg_monthly_return = mean((adjusted - lag(adjusted))/lag(adjusted), na.rm = TRUE)) %>%
ggplot(aes(x = symbol, y = avg_monthly_return)) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(title = "Average Monthly Return by Stock", x = "Stock Symbol", y = "Average Monthly Return")
