Import stock prices

stocks <- tq_get(c("GD", "RTX", "LMT"),
                 get = "stock.prices",
                 from = "2010-01-01",
                 to = "2020-01-01")
stocks
## # A tibble: 7,548 × 8
##    symbol date        open  high   low close  volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>   <dbl>    <dbl>
##  1 GD     2010-01-04  68.5  69.2  68.2  69.2 1198500     51.1
##  2 GD     2010-01-05  69.3  69.8  68.8  69.3 1535400     51.2
##  3 GD     2010-01-06  69.0  69.4  68.8  69.2 1149700     51.1
##  4 GD     2010-01-07  69.1  69.5  68.7  69.4 1262300     51.3
##  5 GD     2010-01-08  69.4  69.7  68.9  69.4 1582000     51.3
##  6 GD     2010-01-11  69.8  70.8  69.7  70.7 1508400     52.2
##  7 GD     2010-01-12  70.3  70.7  69.8  70.3 1677200     51.9
##  8 GD     2010-01-13  70.2  71.5  70.0  71.1 1797800     52.8
##  9 GD     2010-01-14  70.9  71.2  70.3  71.1 2332600     52.8
## 10 GD     2010-01-15  71.1  71.1  69.8  70.6 1829400     52.4
## # … with 7,538 more rows

Plot stock prices

stocks %>%
    
    ggplot(aes(x = date, y = adjusted, color = symbol)) +
    geom_line()

Apply the diplyr verbs you learned in chapter 5

Filter rows

filter(stocks, symbol == "GD")
## # A tibble: 2,516 × 8
##    symbol date        open  high   low close  volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>   <dbl>    <dbl>
##  1 GD     2010-01-04  68.5  69.2  68.2  69.2 1198500     51.1
##  2 GD     2010-01-05  69.3  69.8  68.8  69.3 1535400     51.2
##  3 GD     2010-01-06  69.0  69.4  68.8  69.2 1149700     51.1
##  4 GD     2010-01-07  69.1  69.5  68.7  69.4 1262300     51.3
##  5 GD     2010-01-08  69.4  69.7  68.9  69.4 1582000     51.3
##  6 GD     2010-01-11  69.8  70.8  69.7  70.7 1508400     52.2
##  7 GD     2010-01-12  70.3  70.7  69.8  70.3 1677200     51.9
##  8 GD     2010-01-13  70.2  71.5  70.0  71.1 1797800     52.8
##  9 GD     2010-01-14  70.9  71.2  70.3  71.1 2332600     52.8
## 10 GD     2010-01-15  71.1  71.1  69.8  70.6 1829400     52.4
## # … with 2,506 more rows
GD <- filter(stocks, symbol == "GD")
filter(stocks, symbol == "RTX")
## # A tibble: 2,516 × 8
##    symbol date        open  high   low close  volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>   <dbl>    <dbl>
##  1 RTX    2010-01-04  44.2  45.2  44.0  45.1 9699415     33.3
##  2 RTX    2010-01-05  45.1  45.1  44.2  44.4 7952786     32.8
##  3 RTX    2010-01-06  44.4  44.6  44.1  44.2 7511044     32.6
##  4 RTX    2010-01-07  44.1  44.4  43.8  44.4 6776132     32.7
##  5 RTX    2010-01-08  44.2  44.5  44.2  44.4 7396477     32.8
##  6 RTX    2010-01-11  44.5  45.5  44.5  45.4 8787170     33.5
##  7 RTX    2010-01-12  45.2  45.3  44.7  45.1 7423808     33.3
##  8 RTX    2010-01-13  45.2  45.7  45.2  45.7 7749712     33.7
##  9 RTX    2010-01-14  45.5  45.8  45.4  45.8 5643016     33.8
## 10 RTX    2010-01-15  45.8  45.9  45.1  45.3 8490504     33.4
## # … with 2,506 more rows
RTX <- filter(stocks, symbol == "RTX")
filter(stocks, symbol == "LMT")
## # A tibble: 2,516 × 8
##    symbol date        open  high   low close  volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>   <dbl>    <dbl>
##  1 LMT    2010-01-04  75.9  76.9  75.5  76.8 2408300     51.1
##  2 LMT    2010-01-05  76.6  77.8  76.6  77.7 2356500     51.6
##  3 LMT    2010-01-06  76.1  77.2  75.8  76.5 5208100     50.9
##  4 LMT    2010-01-07  76.2  76.2  73.6  74.4 7038300     49.5
##  5 LMT    2010-01-08  74.3  75.9  74.3  75.6 3629400     50.3
##  6 LMT    2010-01-11  75.8  76.2  75.2  76.0 1973500     50.5
##  7 LMT    2010-01-12  75.8  76.5  75.3  76.0 2119000     50.5
##  8 LMT    2010-01-13  76.2  77.0  75.7  76.9 2245100     51.1
##  9 LMT    2010-01-14  76.8  77.0  76.2  76.8 1954500     51.1
## 10 LMT    2010-01-15  77.0  77.2  76.3  76.8 2041900     51.0
## # … with 2,506 more rows
LMT <- filter(stocks, symbol == "LMT")

Arrange rows

arrange(GD, desc(date))
## # A tibble: 2,516 × 8
##    symbol date        open  high   low close  volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>   <dbl>    <dbl>
##  1 GD     2019-12-31  176.  177.  175.  176.  726300     164.
##  2 GD     2019-12-30  178.  178.  176.  176.  886600     164.
##  3 GD     2019-12-27  178.  179.  177.  177.  781100     165.
##  4 GD     2019-12-26  179.  179.  177.  178.  664600     166.
##  5 GD     2019-12-24  180.  180.  178.  179.  321400     166.
##  6 GD     2019-12-23  179.  181.  179.  180. 1404400     167.
##  7 GD     2019-12-20  179.  180.  178.  178. 1961900     166.
##  8 GD     2019-12-19  179.  180.  178.  179. 1161900     166.
##  9 GD     2019-12-18  181.  181.  178.  178.  995100     166.
## 10 GD     2019-12-17  181.  182.  181.  181.  857500     169.
## # … with 2,506 more rows
arrange(RTX, desc(date))
## # A tibble: 2,516 × 8
##    symbol date        open  high   low close  volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>   <dbl>    <dbl>
##  1 RTX    2019-12-31  94.3  94.5  93.8  94.2 3227100     87.9
##  2 RTX    2019-12-30  94.7  94.9  94.1  94.3 2674605     88.0
##  3 RTX    2019-12-27  95.0  95.1  94.4  94.6 2928050     88.2
##  4 RTX    2019-12-26  94.4  94.9  94.2  94.8 2231274     88.5
##  5 RTX    2019-12-24  94.4  94.6  94.0  94.1 1596786     87.8
##  6 RTX    2019-12-23  94.4  94.7  94.1  94.3 4361328     88.0
##  7 RTX    2019-12-20  95.0  95.0  93.9  94.1 7252673     87.8
##  8 RTX    2019-12-19  93.3  93.9  92.6  93.6 4755241     87.3
##  9 RTX    2019-12-18  93.2  93.5  92.6  93.0 6036929     86.7
## 10 RTX    2019-12-17  93.9  94.0  93.1  93.5 5759172     87.3
## # … with 2,506 more rows
arrange(LMT, desc(date))
## # A tibble: 2,516 × 8
##    symbol date        open  high   low close  volume adjusted
##    <chr>  <date>     <dbl> <dbl> <dbl> <dbl>   <dbl>    <dbl>
##  1 LMT    2019-12-31  392.  392.  387.  389.  775400     361.
##  2 LMT    2019-12-30  394.  394.  391.  392.  776900     364.
##  3 LMT    2019-12-27  394.  394.  392.  394.  728800     365.
##  4 LMT    2019-12-26  391.  392.  390.  392.  711600     364.
##  5 LMT    2019-12-24  392.  392.  390.  390.  516300     362.
##  6 LMT    2019-12-23  388.  395.  388.  392. 1050400     364.
##  7 LMT    2019-12-20  385.  387.  383   386. 1599600     359.
##  8 LMT    2019-12-19  381.  385.  380.  383.  766300     356.
##  9 LMT    2019-12-18  387.  388.  380.  381. 1526000     353.
## 10 LMT    2019-12-17  385   388   385   387.  730000     359.
## # … with 2,506 more rows

Select columns

select(GD, symbol, date, open, close)
## # A tibble: 2,516 × 4
##    symbol date        open close
##    <chr>  <date>     <dbl> <dbl>
##  1 GD     2010-01-04  68.5  69.2
##  2 GD     2010-01-05  69.3  69.3
##  3 GD     2010-01-06  69.0  69.2
##  4 GD     2010-01-07  69.1  69.4
##  5 GD     2010-01-08  69.4  69.4
##  6 GD     2010-01-11  69.8  70.7
##  7 GD     2010-01-12  70.3  70.3
##  8 GD     2010-01-13  70.2  71.1
##  9 GD     2010-01-14  70.9  71.1
## 10 GD     2010-01-15  71.1  70.6
## # … with 2,506 more rows
select(RTX, symbol, date, open, close)
## # A tibble: 2,516 × 4
##    symbol date        open close
##    <chr>  <date>     <dbl> <dbl>
##  1 RTX    2010-01-04  44.2  45.1
##  2 RTX    2010-01-05  45.1  44.4
##  3 RTX    2010-01-06  44.4  44.2
##  4 RTX    2010-01-07  44.1  44.4
##  5 RTX    2010-01-08  44.2  44.4
##  6 RTX    2010-01-11  44.5  45.4
##  7 RTX    2010-01-12  45.2  45.1
##  8 RTX    2010-01-13  45.2  45.7
##  9 RTX    2010-01-14  45.5  45.8
## 10 RTX    2010-01-15  45.8  45.3
## # … with 2,506 more rows
select(LMT, symbol, date, open, close)
## # A tibble: 2,516 × 4
##    symbol date        open close
##    <chr>  <date>     <dbl> <dbl>
##  1 LMT    2010-01-04  75.9  76.8
##  2 LMT    2010-01-05  76.6  77.7
##  3 LMT    2010-01-06  76.1  76.5
##  4 LMT    2010-01-07  76.2  74.4
##  5 LMT    2010-01-08  74.3  75.6
##  6 LMT    2010-01-11  75.8  76.0
##  7 LMT    2010-01-12  75.8  76.0
##  8 LMT    2010-01-13  76.2  76.9
##  9 LMT    2010-01-14  76.8  76.8
## 10 LMT    2010-01-15  77.0  76.8
## # … with 2,506 more rows
GD_2 <- select(GD, symbol, date, open, close)
RTX_2 <- select(RTX, symbol, date, open, close)
LMT_2 <- select(LMT, symbol, date, open, close)

Add Columns

mutate(GD_2,
       gain_or_loss = open - close
       )
## # A tibble: 2,516 × 5
##    symbol date        open close gain_or_loss
##    <chr>  <date>     <dbl> <dbl>        <dbl>
##  1 GD     2010-01-04  68.5  69.2      -0.680 
##  2 GD     2010-01-05  69.3  69.3      -0.0100
##  3 GD     2010-01-06  69.0  69.2      -0.210 
##  4 GD     2010-01-07  69.1  69.4      -0.350 
##  5 GD     2010-01-08  69.4  69.4      -0.0100
##  6 GD     2010-01-11  69.8  70.7      -0.900 
##  7 GD     2010-01-12  70.3  70.3      -0.0300
##  8 GD     2010-01-13  70.2  71.1      -0.900 
##  9 GD     2010-01-14  70.9  71.1      -0.230 
## 10 GD     2010-01-15  71.1  70.6       0.470 
## # … with 2,506 more rows
mutate(RTX_2,
       gain_or_loss = open - close
       )
## # A tibble: 2,516 × 5
##    symbol date        open close gain_or_loss
##    <chr>  <date>     <dbl> <dbl>        <dbl>
##  1 RTX    2010-01-04  44.2  45.1      -0.894 
##  2 RTX    2010-01-05  45.1  44.4       0.680 
##  3 RTX    2010-01-06  44.4  44.2       0.233 
##  4 RTX    2010-01-07  44.1  44.4      -0.302 
##  5 RTX    2010-01-08  44.2  44.4      -0.233 
##  6 RTX    2010-01-11  44.5  45.4      -0.900 
##  7 RTX    2010-01-12  45.2  45.1       0.0378
##  8 RTX    2010-01-13  45.2  45.7      -0.466 
##  9 RTX    2010-01-14  45.5  45.8      -0.359 
## 10 RTX    2010-01-15  45.8  45.3       0.529 
## # … with 2,506 more rows
mutate(LMT_2,
       gain_or_loss = open - close
       )
## # A tibble: 2,516 × 5
##    symbol date        open close gain_or_loss
##    <chr>  <date>     <dbl> <dbl>        <dbl>
##  1 LMT    2010-01-04  75.9  76.8      -0.980 
##  2 LMT    2010-01-05  76.6  77.7      -1.03  
##  3 LMT    2010-01-06  76.1  76.5      -0.410 
##  4 LMT    2010-01-07  76.2  74.4       1.76  
##  5 LMT    2010-01-08  74.3  75.6      -1.26  
##  6 LMT    2010-01-11  75.8  76.0      -0.210 
##  7 LMT    2010-01-12  75.8  76.0      -0.210 
##  8 LMT    2010-01-13  76.2  76.9      -0.720 
##  9 LMT    2010-01-14  76.8  76.8      -0.0500
## 10 LMT    2010-01-15  77.0  76.8       0.200 
## # … with 2,506 more rows
GD_3 <- mutate(GD_2,
       gain_or_loss = open - close
       )
RTX_3 <- mutate(RTX_2,
       gain_or_loss = open - close
       )
LMT_3 <- mutate(LMT_2,
       gain_or_loss = open - close
       )

Summarize with groups

summarise(GD_3, symbol, gain_or_loss >= 0, 
          count = n()
          )
## # A tibble: 2,516 × 3
##    symbol `gain_or_loss >= 0` count
##    <chr>  <lgl>               <int>
##  1 GD     FALSE                2516
##  2 GD     FALSE                2516
##  3 GD     FALSE                2516
##  4 GD     FALSE                2516
##  5 GD     FALSE                2516
##  6 GD     FALSE                2516
##  7 GD     FALSE                2516
##  8 GD     FALSE                2516
##  9 GD     FALSE                2516
## 10 GD     TRUE                 2516
## # … with 2,506 more rows
summarise(RTX_3, symbol, gain_or_loss >= 0, 
          count = n()
          )
## # A tibble: 2,516 × 3
##    symbol `gain_or_loss >= 0` count
##    <chr>  <lgl>               <int>
##  1 RTX    FALSE                2516
##  2 RTX    TRUE                 2516
##  3 RTX    TRUE                 2516
##  4 RTX    FALSE                2516
##  5 RTX    FALSE                2516
##  6 RTX    FALSE                2516
##  7 RTX    TRUE                 2516
##  8 RTX    FALSE                2516
##  9 RTX    FALSE                2516
## 10 RTX    TRUE                 2516
## # … with 2,506 more rows
summarise(LMT_3, symbol, gain_or_loss >= 0, 
          count = n()
          )
## # A tibble: 2,516 × 3
##    symbol `gain_or_loss >= 0` count
##    <chr>  <lgl>               <int>
##  1 LMT    FALSE                2516
##  2 LMT    FALSE                2516
##  3 LMT    FALSE                2516
##  4 LMT    TRUE                 2516
##  5 LMT    FALSE                2516
##  6 LMT    FALSE                2516
##  7 LMT    FALSE                2516
##  8 LMT    FALSE                2516
##  9 LMT    FALSE                2516
## 10 LMT    TRUE                 2516
## # … with 2,506 more rows
GD_4 <- select(GD_3, date, gain_or_loss)
RTX_4 <- summarise(RTX_3, date, gain_or_loss)
LMT_4 <- summarise(LMT_3, date, gain_or_loss)
ggplot(data = GD_4) +
  geom_point(mapping = aes(x = date, y = gain_or_loss), color = "red")

ggplot(data = RTX_4) +
  geom_point(mapping = aes(x = date, y = gain_or_loss), color = "blue")

ggplot(data = LMT_4) +
  geom_point(mapping = aes(x = date, y = gain_or_loss), color = "green")