stocks <- tq_get(c("CRM", "PYPL", "ABNB"),
get = "stock.prices",
from = "2016-01-01")
stocks
## # A tibble: 4,111 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 CRM 2016-01-04 77.1 77.2 75.6 76.7 4919200 76.7
## 2 CRM 2016-01-05 77.1 78.0 76.7 77.1 2656800 77.1
## 3 CRM 2016-01-06 75.7 77.3 75.6 76.3 3484400 76.3
## 4 CRM 2016-01-07 75.1 75.6 73.5 74.3 6972200 74.3
## 5 CRM 2016-01-08 74.8 75.3 72.9 73.2 3673800 73.2
## 6 CRM 2016-01-11 73.7 73.8 71.9 73.2 3685100 73.2
## 7 CRM 2016-01-12 74.0 75.3 73.1 74.5 4108100 74.5
## 8 CRM 2016-01-13 75.0 75.7 71.8 72.3 4727500 72.3
## 9 CRM 2016-01-14 72.4 73.6 70.9 73.1 4738200 73.1
## 10 CRM 2016-01-15 69.9 71.1 68.2 70.9 9572100 70.9
## # … with 4,101 more rows
stocks %>%
ggplot(aes(x = date, y = adjusted, color = symbol)) +
geom_line()
stocks %>% filter(close > 70)
## # A tibble: 3,606 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 CRM 2016-01-04 77.1 77.2 75.6 76.7 4919200 76.7
## 2 CRM 2016-01-05 77.1 78.0 76.7 77.1 2656800 77.1
## 3 CRM 2016-01-06 75.7 77.3 75.6 76.3 3484400 76.3
## 4 CRM 2016-01-07 75.1 75.6 73.5 74.3 6972200 74.3
## 5 CRM 2016-01-08 74.8 75.3 72.9 73.2 3673800 73.2
## 6 CRM 2016-01-11 73.7 73.8 71.9 73.2 3685100 73.2
## 7 CRM 2016-01-12 74.0 75.3 73.1 74.5 4108100 74.5
## 8 CRM 2016-01-13 75.0 75.7 71.8 72.3 4727500 72.3
## 9 CRM 2016-01-14 72.4 73.6 70.9 73.1 4738200 73.1
## 10 CRM 2016-01-15 69.9 71.1 68.2 70.9 9572100 70.9
## # … with 3,596 more rows
stocks %>% filter(close > 70 & low > 65)
## # A tibble: 3,606 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 CRM 2016-01-04 77.1 77.2 75.6 76.7 4919200 76.7
## 2 CRM 2016-01-05 77.1 78.0 76.7 77.1 2656800 77.1
## 3 CRM 2016-01-06 75.7 77.3 75.6 76.3 3484400 76.3
## 4 CRM 2016-01-07 75.1 75.6 73.5 74.3 6972200 74.3
## 5 CRM 2016-01-08 74.8 75.3 72.9 73.2 3673800 73.2
## 6 CRM 2016-01-11 73.7 73.8 71.9 73.2 3685100 73.2
## 7 CRM 2016-01-12 74.0 75.3 73.1 74.5 4108100 74.5
## 8 CRM 2016-01-13 75.0 75.7 71.8 72.3 4727500 72.3
## 9 CRM 2016-01-14 72.4 73.6 70.9 73.1 4738200 73.1
## 10 CRM 2016-01-15 69.9 71.1 68.2 70.9 9572100 70.9
## # … with 3,596 more rows
stocks %>% arrange(stocks, desc(close), desc(adjusted))
## # A tibble: 4,111 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ABNB 2020-12-10 146 165 141. 145. 70447500 145.
## 2 ABNB 2020-12-11 147. 152. 135. 139. 26980800 139.
## 3 ABNB 2020-12-14 135 135. 125. 130 16966100 130
## 4 ABNB 2020-12-15 127. 128. 122. 125. 10914400 125.
## 5 ABNB 2020-12-16 126. 142 125. 138. 20409600 138.
## 6 ABNB 2020-12-17 143 152. 143. 147. 15054700 147.
## 7 ABNB 2020-12-18 150. 159 150. 157. 15954200 157.
## 8 ABNB 2020-12-21 155. 172 145. 163. 17788100 163.
## 9 ABNB 2020-12-22 170 175. 161. 163. 9872600 163.
## 10 ABNB 2020-12-23 163. 168. 156. 158. 5852500 158.
## # … with 4,101 more rows
stocks %>% select(symbol, date, high:close)
## # A tibble: 4,111 × 5
## symbol date high low close
## <chr> <date> <dbl> <dbl> <dbl>
## 1 CRM 2016-01-04 77.2 75.6 76.7
## 2 CRM 2016-01-05 78.0 76.7 77.1
## 3 CRM 2016-01-06 77.3 75.6 76.3
## 4 CRM 2016-01-07 75.6 73.5 74.3
## 5 CRM 2016-01-08 75.3 72.9 73.2
## 6 CRM 2016-01-11 73.8 71.9 73.2
## 7 CRM 2016-01-12 75.3 73.1 74.5
## 8 CRM 2016-01-13 75.7 71.8 72.3
## 9 CRM 2016-01-14 73.6 70.9 73.1
## 10 CRM 2016-01-15 71.1 68.2 70.9
## # … with 4,101 more rows
stocks %>% select(symbol, date, contains("a"))
## # A tibble: 4,111 × 3
## symbol date adjusted
## <chr> <date> <dbl>
## 1 CRM 2016-01-04 76.7
## 2 CRM 2016-01-05 77.1
## 3 CRM 2016-01-06 76.3
## 4 CRM 2016-01-07 74.3
## 5 CRM 2016-01-08 73.2
## 6 CRM 2016-01-11 73.2
## 7 CRM 2016-01-12 74.5
## 8 CRM 2016-01-13 72.3
## 9 CRM 2016-01-14 73.1
## 10 CRM 2016-01-15 70.9
## # … with 4,101 more rows
mutate(stocks,
High_Low_Difference = high - low) %>%
# Selecting which columns to show
select(symbol, date, High_Low_Difference)
## # A tibble: 4,111 × 3
## symbol date High_Low_Difference
## <chr> <date> <dbl>
## 1 CRM 2016-01-04 1.57
## 2 CRM 2016-01-05 1.30
## 3 CRM 2016-01-06 1.70
## 4 CRM 2016-01-07 2.13
## 5 CRM 2016-01-08 2.36
## 6 CRM 2016-01-11 1.97
## 7 CRM 2016-01-12 2.23
## 8 CRM 2016-01-13 3.91
## 9 CRM 2016-01-14 2.75
## 10 CRM 2016-01-15 2.87
## # … with 4,101 more rows
# Using the transmute()
transmute(stocks,
HL_Difference_Transmute = high - low) %>%
# Selecting just new HL after using the transmute function
select(HL_Difference_Transmute)
## # A tibble: 4,111 × 1
## HL_Difference_Transmute
## <dbl>
## 1 1.57
## 2 1.30
## 3 1.70
## 4 2.13
## 5 2.36
## 6 1.97
## 7 2.23
## 8 3.91
## 9 2.75
## 10 2.87
## # … with 4,101 more rows
Collapsing data to a single row
stocks
## # A tibble: 4,111 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 CRM 2016-01-04 77.1 77.2 75.6 76.7 4919200 76.7
## 2 CRM 2016-01-05 77.1 78.0 76.7 77.1 2656800 77.1
## 3 CRM 2016-01-06 75.7 77.3 75.6 76.3 3484400 76.3
## 4 CRM 2016-01-07 75.1 75.6 73.5 74.3 6972200 74.3
## 5 CRM 2016-01-08 74.8 75.3 72.9 73.2 3673800 73.2
## 6 CRM 2016-01-11 73.7 73.8 71.9 73.2 3685100 73.2
## 7 CRM 2016-01-12 74.0 75.3 73.1 74.5 4108100 74.5
## 8 CRM 2016-01-13 75.0 75.7 71.8 72.3 4727500 72.3
## 9 CRM 2016-01-14 72.4 73.6 70.9 73.1 4738200 73.1
## 10 CRM 2016-01-15 69.9 71.1 68.2 70.9 9572100 70.9
## # … with 4,101 more rows
# average closing price
summarise(stocks, close = mean(close, na.rm = TRUE))
## # A tibble: 1 × 1
## close
## <dbl>
## 1 135.
Summarize by group
stocks %>%
#Group by Symbol
group_by(symbol) %>%
# Calculate average adjusted price
summarise(adjusted = mean(adjusted, na.rm = TRUE)) %>%
#Sort it
arrange(adjusted)
## # A tibble: 3 × 2
## symbol adjusted
## <chr> <dbl>
## 1 PYPL 113.
## 2 ABNB 145.
## 3 CRM 154.
stocks %>%
group_by(symbol) %>%
summarise(count = n(),
adjusted = mean(adjusted, na.rm = TRUE),
close = mean(close, na.rm = TRUE)) %>%
# Plot
ggplot(mapping = aes(x = symbol, y = adjusted)) +
geom_point(aes(size = count), alpha = 0.3) +
geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Missing values
stocks %>%
#remove missing values
filter(!is.na(adjusted))
## # A tibble: 4,111 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 CRM 2016-01-04 77.1 77.2 75.6 76.7 4919200 76.7
## 2 CRM 2016-01-05 77.1 78.0 76.7 77.1 2656800 77.1
## 3 CRM 2016-01-06 75.7 77.3 75.6 76.3 3484400 76.3
## 4 CRM 2016-01-07 75.1 75.6 73.5 74.3 6972200 74.3
## 5 CRM 2016-01-08 74.8 75.3 72.9 73.2 3673800 73.2
## 6 CRM 2016-01-11 73.7 73.8 71.9 73.2 3685100 73.2
## 7 CRM 2016-01-12 74.0 75.3 73.1 74.5 4108100 74.5
## 8 CRM 2016-01-13 75.0 75.7 71.8 72.3 4727500 72.3
## 9 CRM 2016-01-14 72.4 73.6 70.9 73.1 4738200 73.1
## 10 CRM 2016-01-15 69.9 71.1 68.2 70.9 9572100 70.9
## # … with 4,101 more rows
Grouping multiple variables
stocks %>%
group_by(symbol, open, adjusted) %>%
summarise(count = n()) %>%
ungroup()
## `summarise()` has grouped output by 'symbol', 'open'. You can override using
## the `.groups` argument.
## # A tibble: 4,111 × 4
## symbol open adjusted count
## <chr> <dbl> <dbl> <int>
## 1 ABNB 83.0 85.2 1
## 2 ABNB 83.2 82.5 1
## 3 ABNB 83.6 85.5 1
## 4 ABNB 84.7 85.2 1
## 5 ABNB 84.8 83.5 1
## 6 ABNB 85.5 84.9 1
## 7 ABNB 85.9 88.7 1
## 8 ABNB 86.7 87.6 1
## 9 ABNB 87.4 84.9 1
## 10 ABNB 87.5 87.1 1
## # … with 4,101 more rows