Import stock prices
stocks <- tq_get(c("TSLA", "AMZN"),
get = "stock.prices",
from = "2016-01-01")
stocks
## # A tibble: 3,878 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 TSLA 2016-01-04 15.4 15.4 14.6 14.9 102406500 14.9
## 2 TSLA 2016-01-05 15.1 15.1 14.7 14.9 47802000 14.9
## 3 TSLA 2016-01-06 14.7 14.7 14.4 14.6 56686500 14.6
## 4 TSLA 2016-01-07 14.3 14.6 14.2 14.4 53314500 14.4
## 5 TSLA 2016-01-08 14.5 14.7 14.1 14.1 54421500 14.1
## 6 TSLA 2016-01-11 14.3 14.3 13.5 13.9 61371000 13.9
## 7 TSLA 2016-01-12 14.1 14.2 13.7 14.0 46378500 14.0
## 8 TSLA 2016-01-13 14.1 14.2 13.3 13.4 61896000 13.4
## 9 TSLA 2016-01-14 13.5 14 12.9 13.7 97360500 13.7
## 10 TSLA 2016-01-15 13.3 13.7 13.1 13.7 83679000 13.7
## # ℹ 3,868 more rows
apply the dplyer verbs you learned in chapter 5
Filter rows
filter(stocks,high==1,low==1)
## # A tibble: 0 × 8
## # ℹ 8 variables: symbol <chr>, date <date>, open <dbl>, high <dbl>, low <dbl>,
## # close <dbl>, volume <dbl>, adjusted <dbl>
Arrange rows
arrange(stocks, desc(low), desc(date))
## # A tibble: 3,878 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 TSLA 2021-11-04 411. 414. 406. 410. 76192200 410.
## 2 TSLA 2021-11-05 409. 413. 403. 407. 64886400 407.
## 3 TSLA 2021-11-03 392. 405. 384. 405. 103885500 405.
## 4 TSLA 2021-11-02 386. 403. 382 391. 128213400 391.
## 5 TSLA 2022-01-03 383. 400. 379. 400. 103931400 400.
## 6 TSLA 2021-11-08 383. 399 378. 388. 100337100 388.
## 7 TSLA 2021-11-22 387. 401. 377. 386. 99217500 386.
## 8 TSLA 2022-01-04 397. 403. 374. 383. 100248300 383.
## 9 TSLA 2021-11-01 382. 403. 373. 403. 168146100 403.
## 10 TSLA 2021-11-30 381. 389. 373. 382. 81276000 382.
## # ℹ 3,868 more rows
Select columns
select(stocks, high)
## # A tibble: 3,878 × 1
## high
## <dbl>
## 1 15.4
## 2 15.1
## 3 14.7
## 4 14.6
## 5 14.7
## 6 14.3
## 7 14.2
## 8 14.2
## 9 14
## 10 13.7
## # ℹ 3,868 more rows
select(stocks, high, low, date)
## # A tibble: 3,878 × 3
## high low date
## <dbl> <dbl> <date>
## 1 15.4 14.6 2016-01-04
## 2 15.1 14.7 2016-01-05
## 3 14.7 14.4 2016-01-06
## 4 14.6 14.2 2016-01-07
## 5 14.7 14.1 2016-01-08
## 6 14.3 13.5 2016-01-11
## 7 14.2 13.7 2016-01-12
## 8 14.2 13.3 2016-01-13
## 9 14 12.9 2016-01-14
## 10 13.7 13.1 2016-01-15
## # ℹ 3,868 more rows
Add colums
mutate(stocks,
high = date - low) %>%
# Select high, low, date, and volume
select(high:date, high)
## # A tibble: 3,878 × 3
## high open date
## <date> <dbl> <date>
## 1 2015-12-20 15.4 2016-01-04
## 2 2015-12-21 15.1 2016-01-05
## 3 2015-12-22 14.7 2016-01-06
## 4 2015-12-23 14.3 2016-01-07
## 5 2015-12-24 14.5 2016-01-08
## 6 2015-12-28 14.3 2016-01-11
## 7 2015-12-29 14.1 2016-01-12
## 8 2015-12-30 14.1 2016-01-13
## 9 2016-01-01 13.5 2016-01-14
## 10 2016-01-01 13.3 2016-01-15
## # ℹ 3,868 more rows
Just keep gain
mutate(stocks,
high = date - low) %>%
# Select high, low, date, and volume
select(adjusted)
## # A tibble: 3,878 × 1
## adjusted
## <dbl>
## 1 14.9
## 2 14.9
## 3 14.6
## 4 14.4
## 5 14.1
## 6 13.9
## 7 14.0
## 8 13.4
## 9 13.7
## 10 13.7
## # ℹ 3,868 more rows
alternative using transmute()
transmute(stocks,
high = date - low)
## # A tibble: 3,878 × 1
## high
## <date>
## 1 2015-12-20
## 2 2015-12-21
## 3 2015-12-22
## 4 2015-12-23
## 5 2015-12-24
## 6 2015-12-28
## 7 2015-12-29
## 8 2015-12-30
## 9 2016-01-01
## 10 2016-01-01
## # ℹ 3,868 more rows
Sumarise with groups
stocks
## # A tibble: 3,878 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 TSLA 2016-01-04 15.4 15.4 14.6 14.9 102406500 14.9
## 2 TSLA 2016-01-05 15.1 15.1 14.7 14.9 47802000 14.9
## 3 TSLA 2016-01-06 14.7 14.7 14.4 14.6 56686500 14.6
## 4 TSLA 2016-01-07 14.3 14.6 14.2 14.4 53314500 14.4
## 5 TSLA 2016-01-08 14.5 14.7 14.1 14.1 54421500 14.1
## 6 TSLA 2016-01-11 14.3 14.3 13.5 13.9 61371000 13.9
## 7 TSLA 2016-01-12 14.1 14.2 13.7 14.0 46378500 14.0
## 8 TSLA 2016-01-13 14.1 14.2 13.3 13.4 61896000 13.4
## 9 TSLA 2016-01-14 13.5 14 12.9 13.7 97360500 13.7
## 10 TSLA 2016-01-15 13.3 13.7 13.1 13.7 83679000 13.7
## # ℹ 3,868 more rows
# average stock price
summarise(stocks, date = mean(low, na.rm = TRUE))
## # A tibble: 1 × 1
## date
## <dbl>
## 1 102.
collapsing data to a single row
stocks
## # A tibble: 3,878 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 TSLA 2016-01-04 15.4 15.4 14.6 14.9 102406500 14.9
## 2 TSLA 2016-01-05 15.1 15.1 14.7 14.9 47802000 14.9
## 3 TSLA 2016-01-06 14.7 14.7 14.4 14.6 56686500 14.6
## 4 TSLA 2016-01-07 14.3 14.6 14.2 14.4 53314500 14.4
## 5 TSLA 2016-01-08 14.5 14.7 14.1 14.1 54421500 14.1
## 6 TSLA 2016-01-11 14.3 14.3 13.5 13.9 61371000 13.9
## 7 TSLA 2016-01-12 14.1 14.2 13.7 14.0 46378500 14.0
## 8 TSLA 2016-01-13 14.1 14.2 13.3 13.4 61896000 13.4
## 9 TSLA 2016-01-14 13.5 14 12.9 13.7 97360500 13.7
## 10 TSLA 2016-01-15 13.3 13.7 13.1 13.7 83679000 13.7
## # ℹ 3,868 more rows
# average stock price
summarise(stocks, date = mean(low, na.rm = TRUE))
## # A tibble: 1 × 1
## date
## <dbl>
## 1 102.
summarise by group
stocks %>%
# group by stock
group_by(symbol) %>%
# calculate average stock
summarise(symbol = mean(low, na.rm = TRUE)) %>%
# sort it
arrange(symbol)
## # A tibble: 2 × 1
## symbol
## <dbl>
## 1 97.8
## 2 106.
group by stock week 52 high and low
stocks %>%
group_by(high) %>%
summarise(count = n(),
high = mean(adjusted, na.rm = TRUE),
low = mean(adjusted, na.rm = TRUE)) %>%
# plot
ggplot(mapping = aes(x = high, y = low)) +
geom_point(aes(size = count), alpha = 0.3) +
geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

missing values
stocks %>%
# remove missing values
filter(!is.na(high))
## # A tibble: 3,878 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 TSLA 2016-01-04 15.4 15.4 14.6 14.9 102406500 14.9
## 2 TSLA 2016-01-05 15.1 15.1 14.7 14.9 47802000 14.9
## 3 TSLA 2016-01-06 14.7 14.7 14.4 14.6 56686500 14.6
## 4 TSLA 2016-01-07 14.3 14.6 14.2 14.4 53314500 14.4
## 5 TSLA 2016-01-08 14.5 14.7 14.1 14.1 54421500 14.1
## 6 TSLA 2016-01-11 14.3 14.3 13.5 13.9 61371000 13.9
## 7 TSLA 2016-01-12 14.1 14.2 13.7 14.0 46378500 14.0
## 8 TSLA 2016-01-13 14.1 14.2 13.3 13.4 61896000 13.4
## 9 TSLA 2016-01-14 13.5 14 12.9 13.7 97360500 13.7
## 10 TSLA 2016-01-15 13.3 13.7 13.1 13.7 83679000 13.7
## # ℹ 3,868 more rows
grouping multiple variables
stocks %>%
#group_by(date, high, low) %>%
summarise(count = n()) %>%
ungroup()
## # A tibble: 1 × 1
## count
## <int>
## 1 3878