diamonds %>% sample_frac(0.1) %>%
plot_ly(x = ~x, y = ~y, z = ~z, color = ~price) %>%
add_markers()
ばらつきや価格との関係など、x軸が重要っぽい
steps <- seq(from = 1, to = 1000) %>%
map(~list(args = list("xbins.size", .x), label = .x, method = "restyle", value = .x))
diamonds %>%
plot_ly() %>%
add_histogram(x = ~price, xbins = list(size = 100)) %>%
layout(sliders = list(
list(
active = 1,
currentvalue = list(prefix = "binwidth: "),
steps = steps
)
))
diamonds %>%
ggplot() +
geom_freqpoly(aes(x = carat), binwidth = 0.01)
0.99や0.49などはダメ。 超える方向(1.01)は大丈夫
diamonds %>%
ggplot() +
geom_freqpoly(aes(x = carat)) +
xlim(c(0, 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
diamonds %>%
ggplot() +
geom_freqpoly(aes(x = carat)) +
coord_cartesian(xlim = c(0, 1))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
xlimは値を消すので、自動できまるbinwidthが変わる。
diamonds2 <- diamonds %>%
mutate(y = ifelse(y < 3 | y > 20, NA, y))
diamonds2 %>%
ggplot() +
geom_histogram(aes(x = y))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 9 rows containing non-finite values (stat_bin).
diamonds2 %>%
ggplot() +
geom_bar(aes(x = y))
## Warning: Removed 9 rows containing non-finite values (stat_count).
わからん
na.rmをする
flights %>%
ggplot() +
geom_boxplot(aes(x = is.na(air_time), y = sched_dep_time))
library(ggstance)
##
## Attaching package: 'ggstance'
## The following objects are masked from 'package:ggplot2':
##
## geom_errorbarh, GeomErrorbarh
diamonds %>%
ggplot() +
geom_boxplot(aes(color, price)) +
coord_flip()
diamonds %>%
ggplot() +
geom_boxploth(aes(price, color))
便利か?
library(lvplot)
diamonds %>%
ggplot() +
geom_lv(aes(cut, price, fill = ..LV..))
芋虫みたいでキモい。外れ値という括りは大きすぎて不適切な場合があるので、もっと細かく見ようという動き。 この論文 (Letter-Value Plots: Boxplots for Large Data)を読むといいらしい。
diamonds %>%
ggplot() +
geom_histogram(aes(x = price)) +
facet_wrap(~cut)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
diamonds %>%
ggplot() +
geom_freqpoly(aes(x = price, color = cut))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
diamonds %>%
ggplot() +
geom_violin(aes(x = cut, y = price))
並べてみやすい。
count1 <- diamonds %>%
count(color, cut)
## cut within color
count1 %>%
group_by(color) %>%
mutate(cut_p = n / sum(n)) %>%
ggplot(aes(color, cut)) +
geom_tile(aes(fill = cut_p)) +
ggtitle("cut within color")
## color within cut
count1 %>%
group_by(cut) %>%
mutate(color_p = n / sum(n)) %>%
ggplot(aes(color, cut)) +
geom_tile(aes(fill = color_p)) +
ggtitle("color within cut")
delay_count <- flights %>%
group_by(month, dest) %>%
summarise(d = mean(dep_delay, na.rm = TRUE)) %>%
ungroup
delay_count %>%
ggplot(aes(month, dest)) +
geom_tile(aes(fill = d))
歯抜けが多いデータなので見辛いので存在しない組み合わせを0で埋める。
delay_count %>%
complete(month, dest, fill = list(d = 0)) %>%
ggplot(aes(month, dest)) +
geom_tile(aes(fill = d))
???
legendのnのパターンと似た向きにした方が良いから?
diamonds %>%
ggplot() +
geom_freqpoly(aes(x = carat, y = ..density.., color = cut_width(price, 10000)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
上でやったプロットの逆
diamonds %>%
ggplot() +
geom_freqpoly(aes(x = price, y = ..density.., color = cut_number(carat, 2)))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
diamonds %>%
count(cut, carat = cut_number(carat, 20), wt = price) %>%
ggplot() +
geom_tile(aes(cut, carat, fill = n))
ggplot(data = diamonds) +
geom_point(mapping = aes(x = x, y = y)) +
coord_cartesian(xlim = c(4, 11), ylim = c(4, 11))
ggplot(data = diamonds) +
geom_bin2d(mapping = aes(x = x, y = y)) +
coord_cartesian(xlim = c(4, 11), ylim = c(4, 11))
outliersのような細かい特徴を、ノイズとして排除したい場合にgeom_bin2dは適切。