Apply the deplyr verbs you learned in chapter 5
Filter rows
filter(stocks, high == 1, low == 1)
## # A tibble: 0 × 8
## # ℹ 8 variables: symbol <chr>, date <date>, open <dbl>, high <dbl>, low <dbl>,
## # close <dbl>, volume <dbl>, adjusted <dbl>
filter(stocks, high == 1 & low == 1)
## # A tibble: 0 × 8
## # ℹ 8 variables: symbol <chr>, date <date>, open <dbl>, high <dbl>, low <dbl>,
## # close <dbl>, volume <dbl>, adjusted <dbl>
filter(stocks, high == 1 | low == 1)
## # A tibble: 0 × 8
## # ℹ 8 variables: symbol <chr>, date <date>, open <dbl>, high <dbl>, low <dbl>,
## # close <dbl>, volume <dbl>, adjusted <dbl>
Arrange Rows
arrange(stocks, desc(high), desc(date()))
## # A tibble: 5,964 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NVDA 2023-11-20 493. 505. 492. 504. 41412000 504.
## 2 NVDA 2023-11-21 501. 505. 492. 499. 56574700 499.
## 3 NVDA 2023-11-22 499. 503. 477. 487. 89942000 487.
## 4 NVDA 2023-08-24 502. 503. 472. 472. 115604400 472.
## 5 NVDA 2023-11-15 499. 500. 482 489. 47549700 489.
## 6 NVDA 2023-08-30 490. 499. 484. 493. 73520600 493.
## 7 NVDA 2023-11-14 497. 498. 490. 497. 41695400 497.
## 8 NVDA 2023-09-01 498. 498 481. 485. 46319100 485.
## 9 NVDA 2023-08-31 494. 497. 490. 494. 52857000 494.
## 10 NVDA 2023-11-17 495. 497. 490. 493. 32520500 493.
## # ℹ 5,954 more rows
Select Columns
select(stocks, high)
## # A tibble: 5,964 × 1
## high
## <dbl>
## 1 54.8
## 2 55.4
## 3 54.4
## 4 53.5
## 5 53.3
## 6 52.8
## 7 53.1
## 8 54.1
## 9 53.4
## 10 52.0
## # ℹ 5,954 more rows
select(stocks, high, low, date)
## # A tibble: 5,964 × 3
## high low date
## <dbl> <dbl> <date>
## 1 54.8 53.4 2016-01-04
## 2 55.4 54.5 2016-01-05
## 3 54.4 53.6 2016-01-06
## 4 53.5 52.1 2016-01-07
## 5 53.3 52.2 2016-01-08
## 6 52.8 51.5 2016-01-11
## 7 53.1 52.1 2016-01-12
## 8 54.1 51.3 2016-01-13
## 9 53.4 51.6 2016-01-14
## 10 52.0 50.3 2016-01-15
## # ℹ 5,954 more rows
select(stocks, high, low, date)
## # A tibble: 5,964 × 3
## high low date
## <dbl> <dbl> <date>
## 1 54.8 53.4 2016-01-04
## 2 55.4 54.5 2016-01-05
## 3 54.4 53.6 2016-01-06
## 4 53.5 52.1 2016-01-07
## 5 53.3 52.2 2016-01-08
## 6 52.8 51.5 2016-01-11
## 7 53.1 52.1 2016-01-12
## 8 54.1 51.3 2016-01-13
## 9 53.4 51.6 2016-01-14
## 10 52.0 50.3 2016-01-15
## # ℹ 5,954 more rows
Add Columns
mutate(stocks,
high = date - volume) %>%
#select year, month, day, and gain
select(symbol:date, high)
## # A tibble: 5,964 × 3
## symbol date high
## <chr> <date> <date>
## 1 MSFT 2016-01-04 -145224-11-05
## 2 MSFT 2016-01-05 -91292-12-18
## 3 MSFT 2016-01-06 -106184-12-10
## 4 MSFT 2016-01-07 -152854-08-01
## 5 MSFT 2016-01-08 -131468-02-07
## 6 MSFT 2016-01-11 -99133-05-04
## 7 MSFT 2016-01-12 -96811-11-28
## 8 MSFT 2016-01-13 -181106-12-15
## 9 MSFT 2016-01-14 -141401-04-07
## 10 MSFT 2016-01-15 -194623-08-22
## # ℹ 5,954 more rows
#just keep gain
mutate(stocks,
high = date - low) %>%
#select year, month, day, and gain
select(low)
## # A tibble: 5,964 × 1
## low
## <dbl>
## 1 53.4
## 2 54.5
## 3 53.6
## 4 52.1
## 5 52.2
## 6 51.5
## 7 52.1
## 8 51.3
## 9 51.6
## 10 50.3
## # ℹ 5,954 more rows
#alternative using transmute()
transmute(stocks,
high = date - low)
## # A tibble: 5,964 × 1
## high
## <date>
## 1 2015-11-11
## 2 2015-11-11
## 3 2015-11-13
## 4 2015-11-15
## 5 2015-11-16
## 6 2015-11-20
## 7 2015-11-20
## 8 2015-11-22
## 9 2015-11-23
## 10 2015-11-25
## # ℹ 5,954 more rows
Summarize with groups
stocks
## # A tibble: 5,964 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MSFT 2016-01-04 54.3 54.8 53.4 54.8 53778000 48.7
## 2 MSFT 2016-01-05 54.9 55.4 54.5 55.0 34079700 48.9
## 3 MSFT 2016-01-06 54.3 54.4 53.6 54.0 39518900 48.0
## 4 MSFT 2016-01-07 52.7 53.5 52.1 52.2 56564900 46.4
## 5 MSFT 2016-01-08 52.4 53.3 52.2 52.3 48754000 46.5
## 6 MSFT 2016-01-11 52.5 52.8 51.5 52.3 36943800 46.5
## 7 MSFT 2016-01-12 52.8 53.1 52.1 52.8 36095500 46.9
## 8 MSFT 2016-01-13 53.8 54.1 51.3 51.6 66883600 45.9
## 9 MSFT 2016-01-14 52 53.4 51.6 53.1 52381900 47.2
## 10 MSFT 2016-01-15 51.3 52.0 50.3 51.0 71820700 45.3
## # ℹ 5,954 more rows
# average stock price
summarise(stocks, date = mean(low, na.rm = TRUE))
## # A tibble: 1 × 1
## date
## <dbl>
## 1 134.
stocks %>%
# Group by Stock
group_by(symbol) %>%
# Calculate average Stock
summarise(symbol = mean(low, na.rm = TRUE)) %>%
# Sort it
arrange(symbol)
## # A tibble: 3 × 1
## symbol
## <dbl>
## 1 113.
## 2 118.
## 3 172.
Group by stock week 52 high and low
stocks %>%
group_by(high) %>%
summarise(count = n(),
high = mean(adjusted, na.rm = TRUE),
low = mean(adjusted, na.rm = TRUE)) %>%
# plot
ggplot(mapping = aes(x = high, y = low)) +
geom_point(aes(size = count), alpha = 0.3) +
geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

Missing Values
stocks %>%
# Remove missing values
filter(!is.na(high))
## # A tibble: 5,964 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MSFT 2016-01-04 54.3 54.8 53.4 54.8 53778000 48.7
## 2 MSFT 2016-01-05 54.9 55.4 54.5 55.0 34079700 48.9
## 3 MSFT 2016-01-06 54.3 54.4 53.6 54.0 39518900 48.0
## 4 MSFT 2016-01-07 52.7 53.5 52.1 52.2 56564900 46.4
## 5 MSFT 2016-01-08 52.4 53.3 52.2 52.3 48754000 46.5
## 6 MSFT 2016-01-11 52.5 52.8 51.5 52.3 36943800 46.5
## 7 MSFT 2016-01-12 52.8 53.1 52.1 52.8 36095500 46.9
## 8 MSFT 2016-01-13 53.8 54.1 51.3 51.6 66883600 45.9
## 9 MSFT 2016-01-14 52 53.4 51.6 53.1 52381900 47.2
## 10 MSFT 2016-01-15 51.3 52.0 50.3 51.0 71820700 45.3
## # ℹ 5,954 more rows
Grouping multiple variables
stocks %>%
group_by(date, high, low) %>%
summarise(count = n()) %>%
ungroup()
## `summarise()` has grouped output by 'date', 'high'. You can override using the
## `.groups` argument.
## # A tibble: 5,964 × 4
## date high low count
## <date> <dbl> <dbl> <int>
## 1 2016-01-04 8.15 8.01 1
## 2 2016-01-04 54.8 53.4 1
## 3 2016-01-04 64.1 63.0 1
## 4 2016-01-05 8.36 8.12 1
## 5 2016-01-05 55.4 54.5 1
## 6 2016-01-05 64.1 63.0 1
## 7 2016-01-06 8.12 7.79 1
## 8 2016-01-06 54.4 53.6 1
## 9 2016-01-06 63.1 62.3 1
## 10 2016-01-07 7.74 7.47 1
## # ℹ 5,954 more rows