stocks <- tq_get(c("AAL", "OXY"),
get = "stock.prices",
from = "2016-01-01",
to = "2017-01-01")
stocks
## # A tibble: 504 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAL 2016-01-04 41.3 41.3 40.3 40.9 12037200 39.1
## 2 AAL 2016-01-05 41.2 41.5 40.0 40.5 10514200 38.7
## 3 AAL 2016-01-06 40.0 41.7 40 41.2 12393800 39.4
## 4 AAL 2016-01-07 41.0 41.5 39.7 40.5 11312900 38.7
## 5 AAL 2016-01-08 40.9 41.4 40.3 40.4 8853100 38.6
## 6 AAL 2016-01-11 40.6 41.2 39.9 41.1 15877500 39.3
## 7 AAL 2016-01-12 41.2 42.4 40.5 42 12636000 40.1
## 8 AAL 2016-01-13 42.2 42.7 39.7 40.1 12718600 38.3
## 9 AAL 2016-01-14 40.2 40.8 38.9 40.5 11756500 38.8
## 10 AAL 2016-01-15 39.0 39.9 38.4 38.8 18924800 37.0
## # … with 494 more rows
stocks %>%
ggplot(aes(x = date, y = adjusted, color = symbol)) +
geom_line()
stocks %>% filter(adjusted > 24)
## # A tibble: 504 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAL 2016-01-04 41.3 41.3 40.3 40.9 12037200 39.1
## 2 AAL 2016-01-05 41.2 41.5 40.0 40.5 10514200 38.7
## 3 AAL 2016-01-06 40.0 41.7 40 41.2 12393800 39.4
## 4 AAL 2016-01-07 41.0 41.5 39.7 40.5 11312900 38.7
## 5 AAL 2016-01-08 40.9 41.4 40.3 40.4 8853100 38.6
## 6 AAL 2016-01-11 40.6 41.2 39.9 41.1 15877500 39.3
## 7 AAL 2016-01-12 41.2 42.4 40.5 42 12636000 40.1
## 8 AAL 2016-01-13 42.2 42.7 39.7 40.1 12718600 38.3
## 9 AAL 2016-01-14 40.2 40.8 38.9 40.5 11756500 38.8
## 10 AAL 2016-01-15 39.0 39.9 38.4 38.8 18924800 37.0
## # … with 494 more rows
arrange(stocks, desc(low), desc (high))
## # A tibble: 504 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 OXY 2016-08-30 78.5 78.5 77.6 77.8 2309600 62.5
## 2 OXY 2016-09-06 77.6 78.2 77.4 78 3978800 62.7
## 3 OXY 2016-06-23 77.8 78.3 77.3 78.3 4104000 62.9
## 4 OXY 2016-08-25 77.7 77.7 77 77.4 3228100 62.2
## 5 OXY 2016-09-07 77.4 77.8 76.9 77.1 3881700 62.6
## 6 OXY 2016-07-14 77.6 77.7 76.9 77.2 4158200 62.0
## 7 OXY 2016-07-15 77.3 77.6 76.9 77.3 3886400 62.1
## 8 OXY 2016-08-26 77.5 77.9 76.8 77.2 3517400 62.0
## 9 OXY 2016-09-08 77.6 77.9 76.8 77.8 5019200 63.1
## 10 OXY 2016-06-22 77.3 77.7 76.8 77.1 3564000 62.0
## # … with 494 more rows
select(stocks, date: open)
## # A tibble: 504 × 2
## date open
## <date> <dbl>
## 1 2016-01-04 41.3
## 2 2016-01-05 41.2
## 3 2016-01-06 40.0
## 4 2016-01-07 41.0
## 5 2016-01-08 40.9
## 6 2016-01-11 40.6
## 7 2016-01-12 41.2
## 8 2016-01-13 42.2
## 9 2016-01-14 40.2
## 10 2016-01-15 39.0
## # … with 494 more rows
select(stocks, date , open , volume , adjusted)
## # A tibble: 504 × 4
## date open volume adjusted
## <date> <dbl> <dbl> <dbl>
## 1 2016-01-04 41.3 12037200 39.1
## 2 2016-01-05 41.2 10514200 38.7
## 3 2016-01-06 40.0 12393800 39.4
## 4 2016-01-07 41.0 11312900 38.7
## 5 2016-01-08 40.9 8853100 38.6
## 6 2016-01-11 40.6 15877500 39.3
## 7 2016-01-12 41.2 12636000 40.1
## 8 2016-01-13 42.2 12718600 38.3
## 9 2016-01-14 40.2 11756500 38.8
## 10 2016-01-15 39.0 18924800 37.0
## # … with 494 more rows
mutate(stocks,
change = high - low)
## # A tibble: 504 × 9
## symbol date open high low close volume adjusted change
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAL 2016-01-04 41.3 41.3 40.3 40.9 12037200 39.1 1.05
## 2 AAL 2016-01-05 41.2 41.5 40.0 40.5 10514200 38.7 1.54
## 3 AAL 2016-01-06 40.0 41.7 40 41.2 12393800 39.4 1.70
## 4 AAL 2016-01-07 41.0 41.5 39.7 40.5 11312900 38.7 1.84
## 5 AAL 2016-01-08 40.9 41.4 40.3 40.4 8853100 38.6 1.10
## 6 AAL 2016-01-11 40.6 41.2 39.9 41.1 15877500 39.3 1.30
## 7 AAL 2016-01-12 41.2 42.4 40.5 42 12636000 40.1 1.88
## 8 AAL 2016-01-13 42.2 42.7 39.7 40.1 12718600 38.3 3.08
## 9 AAL 2016-01-14 40.2 40.8 38.9 40.5 11756500 38.8 1.92
## 10 AAL 2016-01-15 39.0 39.9 38.4 38.8 18924800 37.0 1.52
## # … with 494 more rows
Collapsing data to a single row
stocks
## # A tibble: 504 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAL 2016-01-04 41.3 41.3 40.3 40.9 12037200 39.1
## 2 AAL 2016-01-05 41.2 41.5 40.0 40.5 10514200 38.7
## 3 AAL 2016-01-06 40.0 41.7 40 41.2 12393800 39.4
## 4 AAL 2016-01-07 41.0 41.5 39.7 40.5 11312900 38.7
## 5 AAL 2016-01-08 40.9 41.4 40.3 40.4 8853100 38.6
## 6 AAL 2016-01-11 40.6 41.2 39.9 41.1 15877500 39.3
## 7 AAL 2016-01-12 41.2 42.4 40.5 42 12636000 40.1
## 8 AAL 2016-01-13 42.2 42.7 39.7 40.1 12718600 38.3
## 9 AAL 2016-01-14 40.2 40.8 38.9 40.5 11756500 38.8
## 10 AAL 2016-01-15 39.0 39.9 38.4 38.8 18924800 37.0
## # … with 494 more rows
# average departure delay
summarise(stocks , avg_high = mean(high, na.rm = TRUE))
## # A tibble: 1 × 1
## avg_high
## <dbl>
## 1 55.7
Summarize by Group
stocks %>%
# Group by symbol
group_by(symbol) %>%
# Calculate average average high
summarise(avg_high = mean(high, na.rm = TRUE)) %>%
# Sort it
arrange(avg_high)
## # A tibble: 2 × 2
## symbol avg_high
## <chr> <dbl>
## 1 AAL 38.7
## 2 OXY 72.7
Delays increase with distance up to ~750 miles and then decrease’
stocks %>%
group_by(close) %>%
summarise(count = n() ,
close = mean(close, na.rm = TRUE),
open = mean(open, na.rm = TRUE)) %>%
# Plot
ggplot(mapping = aes(x = close, y = open)) +
geom_point(aes(size = count), alpha = 0.3) +
geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Missing Value
stocks %>%
#Remove missing values
filter(!is.na(volume))
## # A tibble: 504 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAL 2016-01-04 41.3 41.3 40.3 40.9 12037200 39.1
## 2 AAL 2016-01-05 41.2 41.5 40.0 40.5 10514200 38.7
## 3 AAL 2016-01-06 40.0 41.7 40 41.2 12393800 39.4
## 4 AAL 2016-01-07 41.0 41.5 39.7 40.5 11312900 38.7
## 5 AAL 2016-01-08 40.9 41.4 40.3 40.4 8853100 38.6
## 6 AAL 2016-01-11 40.6 41.2 39.9 41.1 15877500 39.3
## 7 AAL 2016-01-12 41.2 42.4 40.5 42 12636000 40.1
## 8 AAL 2016-01-13 42.2 42.7 39.7 40.1 12718600 38.3
## 9 AAL 2016-01-14 40.2 40.8 38.9 40.5 11756500 38.8
## 10 AAL 2016-01-15 39.0 39.9 38.4 38.8 18924800 37.0
## # … with 494 more rows