Apply the diplyr verbs you learned in chapter 5
Filter rows
filter(stocks, symbol == "GD")
## # A tibble: 2,516 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 GD 2010-01-04 68.5 69.2 68.2 69.2 1198500 51.1
## 2 GD 2010-01-05 69.3 69.8 68.8 69.3 1535400 51.2
## 3 GD 2010-01-06 69.0 69.4 68.8 69.2 1149700 51.1
## 4 GD 2010-01-07 69.1 69.5 68.7 69.4 1262300 51.3
## 5 GD 2010-01-08 69.4 69.7 68.9 69.4 1582000 51.3
## 6 GD 2010-01-11 69.8 70.8 69.7 70.7 1508400 52.2
## 7 GD 2010-01-12 70.3 70.7 69.8 70.3 1677200 51.9
## 8 GD 2010-01-13 70.2 71.5 70.0 71.1 1797800 52.8
## 9 GD 2010-01-14 70.9 71.2 70.3 71.1 2332600 52.8
## 10 GD 2010-01-15 71.1 71.1 69.8 70.6 1829400 52.4
## # … with 2,506 more rows
GD <- filter(stocks, symbol == "GD")
filter(stocks, symbol == "RTX")
## # A tibble: 2,516 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 RTX 2010-01-04 44.2 45.2 44.0 45.1 9699415 33.3
## 2 RTX 2010-01-05 45.1 45.1 44.2 44.4 7952786 32.8
## 3 RTX 2010-01-06 44.4 44.6 44.1 44.2 7511044 32.6
## 4 RTX 2010-01-07 44.1 44.4 43.8 44.4 6776132 32.7
## 5 RTX 2010-01-08 44.2 44.5 44.2 44.4 7396477 32.8
## 6 RTX 2010-01-11 44.5 45.5 44.5 45.4 8787170 33.5
## 7 RTX 2010-01-12 45.2 45.3 44.7 45.1 7423808 33.3
## 8 RTX 2010-01-13 45.2 45.7 45.2 45.7 7749712 33.7
## 9 RTX 2010-01-14 45.5 45.8 45.4 45.8 5643016 33.8
## 10 RTX 2010-01-15 45.8 45.9 45.1 45.3 8490504 33.4
## # … with 2,506 more rows
RTX <- filter(stocks, symbol == "RTX")
filter(stocks, symbol == "LMT")
## # A tibble: 2,516 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 LMT 2010-01-04 75.9 76.9 75.5 76.8 2408300 51.1
## 2 LMT 2010-01-05 76.6 77.8 76.6 77.7 2356500 51.6
## 3 LMT 2010-01-06 76.1 77.2 75.8 76.5 5208100 50.9
## 4 LMT 2010-01-07 76.2 76.2 73.6 74.4 7038300 49.5
## 5 LMT 2010-01-08 74.3 75.9 74.3 75.6 3629400 50.3
## 6 LMT 2010-01-11 75.8 76.2 75.2 76.0 1973500 50.5
## 7 LMT 2010-01-12 75.8 76.5 75.3 76.0 2119000 50.5
## 8 LMT 2010-01-13 76.2 77.0 75.7 76.9 2245100 51.1
## 9 LMT 2010-01-14 76.8 77.0 76.2 76.8 1954500 51.1
## 10 LMT 2010-01-15 77.0 77.2 76.3 76.8 2041900 51.0
## # … with 2,506 more rows
LMT <- filter(stocks, symbol == "LMT")
Arrange rows
arrange(GD, desc(date))
## # A tibble: 2,516 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 GD 2019-12-31 176. 177. 175. 176. 726300 164.
## 2 GD 2019-12-30 178. 178. 176. 176. 886600 164.
## 3 GD 2019-12-27 178. 179. 177. 177. 781100 165.
## 4 GD 2019-12-26 179. 179. 177. 178. 664600 166.
## 5 GD 2019-12-24 180. 180. 178. 179. 321400 166.
## 6 GD 2019-12-23 179. 181. 179. 180. 1404400 167.
## 7 GD 2019-12-20 179. 180. 178. 178. 1961900 166.
## 8 GD 2019-12-19 179. 180. 178. 179. 1161900 166.
## 9 GD 2019-12-18 181. 181. 178. 178. 995100 166.
## 10 GD 2019-12-17 181. 182. 181. 181. 857500 169.
## # … with 2,506 more rows
arrange(RTX, desc(date))
## # A tibble: 2,516 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 RTX 2019-12-31 94.3 94.5 93.8 94.2 3227100 87.9
## 2 RTX 2019-12-30 94.7 94.9 94.1 94.3 2674605 88.0
## 3 RTX 2019-12-27 95.0 95.1 94.4 94.6 2928050 88.2
## 4 RTX 2019-12-26 94.4 94.9 94.2 94.8 2231274 88.5
## 5 RTX 2019-12-24 94.4 94.6 94.0 94.1 1596786 87.8
## 6 RTX 2019-12-23 94.4 94.7 94.1 94.3 4361328 88.0
## 7 RTX 2019-12-20 95.0 95.0 93.9 94.1 7252673 87.8
## 8 RTX 2019-12-19 93.3 93.9 92.6 93.6 4755241 87.3
## 9 RTX 2019-12-18 93.2 93.5 92.6 93.0 6036929 86.7
## 10 RTX 2019-12-17 93.9 94.0 93.1 93.5 5759172 87.3
## # … with 2,506 more rows
arrange(LMT, desc(date))
## # A tibble: 2,516 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 LMT 2019-12-31 392. 392. 387. 389. 775400 361.
## 2 LMT 2019-12-30 394. 394. 391. 392. 776900 364.
## 3 LMT 2019-12-27 394. 394. 392. 394. 728800 365.
## 4 LMT 2019-12-26 391. 392. 390. 392. 711600 364.
## 5 LMT 2019-12-24 392. 392. 390. 390. 516300 362.
## 6 LMT 2019-12-23 388. 395. 388. 392. 1050400 364.
## 7 LMT 2019-12-20 385. 387. 383 386. 1599600 359.
## 8 LMT 2019-12-19 381. 385. 380. 383. 766300 356.
## 9 LMT 2019-12-18 387. 388. 380. 381. 1526000 353.
## 10 LMT 2019-12-17 385 388 385 387. 730000 359.
## # … with 2,506 more rows
Select columns
select(GD, symbol, date, open, close)
## # A tibble: 2,516 × 4
## symbol date open close
## <chr> <date> <dbl> <dbl>
## 1 GD 2010-01-04 68.5 69.2
## 2 GD 2010-01-05 69.3 69.3
## 3 GD 2010-01-06 69.0 69.2
## 4 GD 2010-01-07 69.1 69.4
## 5 GD 2010-01-08 69.4 69.4
## 6 GD 2010-01-11 69.8 70.7
## 7 GD 2010-01-12 70.3 70.3
## 8 GD 2010-01-13 70.2 71.1
## 9 GD 2010-01-14 70.9 71.1
## 10 GD 2010-01-15 71.1 70.6
## # … with 2,506 more rows
select(RTX, symbol, date, open, close)
## # A tibble: 2,516 × 4
## symbol date open close
## <chr> <date> <dbl> <dbl>
## 1 RTX 2010-01-04 44.2 45.1
## 2 RTX 2010-01-05 45.1 44.4
## 3 RTX 2010-01-06 44.4 44.2
## 4 RTX 2010-01-07 44.1 44.4
## 5 RTX 2010-01-08 44.2 44.4
## 6 RTX 2010-01-11 44.5 45.4
## 7 RTX 2010-01-12 45.2 45.1
## 8 RTX 2010-01-13 45.2 45.7
## 9 RTX 2010-01-14 45.5 45.8
## 10 RTX 2010-01-15 45.8 45.3
## # … with 2,506 more rows
select(LMT, symbol, date, open, close)
## # A tibble: 2,516 × 4
## symbol date open close
## <chr> <date> <dbl> <dbl>
## 1 LMT 2010-01-04 75.9 76.8
## 2 LMT 2010-01-05 76.6 77.7
## 3 LMT 2010-01-06 76.1 76.5
## 4 LMT 2010-01-07 76.2 74.4
## 5 LMT 2010-01-08 74.3 75.6
## 6 LMT 2010-01-11 75.8 76.0
## 7 LMT 2010-01-12 75.8 76.0
## 8 LMT 2010-01-13 76.2 76.9
## 9 LMT 2010-01-14 76.8 76.8
## 10 LMT 2010-01-15 77.0 76.8
## # … with 2,506 more rows
GD_2 <- select(GD, symbol, date, open, close)
RTX_2 <- select(RTX, symbol, date, open, close)
LMT_2 <- select(LMT, symbol, date, open, close)
Add Columns
mutate(GD_2,
gain_or_loss = open - close
)
## # A tibble: 2,516 × 5
## symbol date open close gain_or_loss
## <chr> <date> <dbl> <dbl> <dbl>
## 1 GD 2010-01-04 68.5 69.2 -0.680
## 2 GD 2010-01-05 69.3 69.3 -0.0100
## 3 GD 2010-01-06 69.0 69.2 -0.210
## 4 GD 2010-01-07 69.1 69.4 -0.350
## 5 GD 2010-01-08 69.4 69.4 -0.0100
## 6 GD 2010-01-11 69.8 70.7 -0.900
## 7 GD 2010-01-12 70.3 70.3 -0.0300
## 8 GD 2010-01-13 70.2 71.1 -0.900
## 9 GD 2010-01-14 70.9 71.1 -0.230
## 10 GD 2010-01-15 71.1 70.6 0.470
## # … with 2,506 more rows
mutate(RTX_2,
gain_or_loss = open - close
)
## # A tibble: 2,516 × 5
## symbol date open close gain_or_loss
## <chr> <date> <dbl> <dbl> <dbl>
## 1 RTX 2010-01-04 44.2 45.1 -0.894
## 2 RTX 2010-01-05 45.1 44.4 0.680
## 3 RTX 2010-01-06 44.4 44.2 0.233
## 4 RTX 2010-01-07 44.1 44.4 -0.302
## 5 RTX 2010-01-08 44.2 44.4 -0.233
## 6 RTX 2010-01-11 44.5 45.4 -0.900
## 7 RTX 2010-01-12 45.2 45.1 0.0378
## 8 RTX 2010-01-13 45.2 45.7 -0.466
## 9 RTX 2010-01-14 45.5 45.8 -0.359
## 10 RTX 2010-01-15 45.8 45.3 0.529
## # … with 2,506 more rows
mutate(LMT_2,
gain_or_loss = open - close
)
## # A tibble: 2,516 × 5
## symbol date open close gain_or_loss
## <chr> <date> <dbl> <dbl> <dbl>
## 1 LMT 2010-01-04 75.9 76.8 -0.980
## 2 LMT 2010-01-05 76.6 77.7 -1.03
## 3 LMT 2010-01-06 76.1 76.5 -0.410
## 4 LMT 2010-01-07 76.2 74.4 1.76
## 5 LMT 2010-01-08 74.3 75.6 -1.26
## 6 LMT 2010-01-11 75.8 76.0 -0.210
## 7 LMT 2010-01-12 75.8 76.0 -0.210
## 8 LMT 2010-01-13 76.2 76.9 -0.720
## 9 LMT 2010-01-14 76.8 76.8 -0.0500
## 10 LMT 2010-01-15 77.0 76.8 0.200
## # … with 2,506 more rows
GD_3 <- mutate(GD_2,
gain_or_loss = open - close
)
RTX_3 <- mutate(RTX_2,
gain_or_loss = open - close
)
LMT_3 <- mutate(LMT_2,
gain_or_loss = open - close
)
Summarize with groups
summarise(GD_3, symbol, gain_or_loss >= 0,
count = n()
)
## # A tibble: 2,516 × 3
## symbol `gain_or_loss >= 0` count
## <chr> <lgl> <int>
## 1 GD FALSE 2516
## 2 GD FALSE 2516
## 3 GD FALSE 2516
## 4 GD FALSE 2516
## 5 GD FALSE 2516
## 6 GD FALSE 2516
## 7 GD FALSE 2516
## 8 GD FALSE 2516
## 9 GD FALSE 2516
## 10 GD TRUE 2516
## # … with 2,506 more rows
summarise(RTX_3, symbol, gain_or_loss >= 0,
count = n()
)
## # A tibble: 2,516 × 3
## symbol `gain_or_loss >= 0` count
## <chr> <lgl> <int>
## 1 RTX FALSE 2516
## 2 RTX TRUE 2516
## 3 RTX TRUE 2516
## 4 RTX FALSE 2516
## 5 RTX FALSE 2516
## 6 RTX FALSE 2516
## 7 RTX TRUE 2516
## 8 RTX FALSE 2516
## 9 RTX FALSE 2516
## 10 RTX TRUE 2516
## # … with 2,506 more rows
summarise(LMT_3, symbol, gain_or_loss >= 0,
count = n()
)
## # A tibble: 2,516 × 3
## symbol `gain_or_loss >= 0` count
## <chr> <lgl> <int>
## 1 LMT FALSE 2516
## 2 LMT FALSE 2516
## 3 LMT FALSE 2516
## 4 LMT TRUE 2516
## 5 LMT FALSE 2516
## 6 LMT FALSE 2516
## 7 LMT FALSE 2516
## 8 LMT FALSE 2516
## 9 LMT FALSE 2516
## 10 LMT TRUE 2516
## # … with 2,506 more rows
GD_4 <- select(GD_3, date, gain_or_loss)
RTX_4 <- summarise(RTX_3, date, gain_or_loss)
LMT_4 <- summarise(LMT_3, date, gain_or_loss)
ggplot(data = GD_4) +
geom_point(mapping = aes(x = date, y = gain_or_loss), color = "red")

ggplot(data = RTX_4) +
geom_point(mapping = aes(x = date, y = gain_or_loss), color = "blue")

ggplot(data = LMT_4) +
geom_point(mapping = aes(x = date, y = gain_or_loss), color = "green")
