rev()# ライブラリ
pacman::p_load(knitr, ggplot2, dplyr, tidyr, qtl, RColorBrewer)
# サンプルデータ読み込み
data("listeria")
dat <- listeria$geno[[1]][[1]]
d <- data.frame(F2_indivisuals = factor(1:nrow(dat), levels = rev(1:nrow(dat))), dat)
d[1:5,1:5]## F2_indivisuals D10M44 D1M3 D1M75 D1M215
## 1 1 3 3 3 2
## 2 2 NA 3 3 3
## 3 3 NA 2 2 2
## 4 4 3 3 2 2
## 5 5 2 2 2 2
# 行持ちデータに変換
gd <- tidyr::gather(data = d, key = marker, value = allele_type, -F2_indivisuals) %>%
mutate(marker = factor(marker, levels = dimnames(dat)[[2]]),
allele_type = as.character(allele_type))
head(gd)## F2_indivisuals marker allele_type
## 1 1 D10M44 3
## 2 2 D10M44 <NA>
## 3 3 D10M44 <NA>
## 4 4 D10M44 3
## 5 5 D10M44 2
## 6 6 D10M44 2
# geom_tileを使ってそのままplot
ggplot(gd, aes(marker, F2_indivisuals)) +
geom_tile(aes(fill = allele_type),colour = "white") +
scale_fill_manual(values = c("steelblue", "darkseagreen1","violetred2")) +
theme(axis.text.x = element_text(angle = 90, size = 6, vjust = 0.5),
axis.text.y = element_text(size = 5))gplots::heatmap.2を用いた作図は自作関数を使った。library(rsko)
# マーカーのクラスタリング
cden <- d[-1] %>%
mutate_if(is.numeric, as.character) %>%
rsko::siml_mat(., method = "jaccard", vorder = T) %>%
{as.dendrogram(hclust(as.dist(.$jaccard_distance), "average"))}
# F2集団のクラスタリング
rden <- data.frame(t(d[-1]), check.names = F) %>%
mutate_if(is.numeric, as.character) %>%
rsko::siml_mat(., method = "jaccard", vorder = T) %>%
{as.dendrogram(hclust(as.dist(.$jaccard_distance), "average"))}
# geom_tileを使ってplot
cl.gd <- gd %>% mutate(marker = factor(marker, levels = labels(cden)),
F2_indivisuals = factor(F2_indivisuals, levels = rev(labels(rden))))
ggplot(cl.gd, aes(marker, F2_indivisuals)) +
geom_tile(aes(fill = allele_type),colour = "white") +
scale_fill_manual(values = c("steelblue", "darkseagreen1","violetred2")) +
theme(axis.text.x = element_text(angle = 90, size = 6, vjust = 0.5),
axis.text.y = element_text(size = 5))# gplots::heatmap.2を使ったヒートマップ(geom_tileで書いた場合と比べて行が入れ替わっていることに注意)
col <- c("steelblue", "darkseagreen1","violetred2")
rsko::myheat(d[-1], categorical = T, draw = "both", cden = cden, rden = rden, color = col)
legend("topleft", legend = c(levels(factor(as.matrix(d[-1]))),"null"),
col = c(col,NA), pch = 15, cex = 0.8)colors()の657色の色名のタイル表示# データ
d <- data.frame(matrix(c(1:length(colors()), rep(NA,3)), nrow = 60)) %>%
setNames(., 1:ncol(.)) %>%
tibble::rownames_to_column("Row") %>%
tidyr::gather(., key = k, value = v, -Row) %>%
dplyr::mutate(Row = factor(Row, levels = 60:1),
k = factor(k, levels = 1:11),
v = factor(v))
# テキストの色を見やすいように変える
cex.cols <- ifelse(1:660 %in% c(grep("dark",colors()), 24,61,153:199,260:309,477,490,491), "white", "black")
# geom_tile
ggplot2::ggplot(d, ggplot2::aes(k, Row)) +
ggplot2::geom_tile(ggplot2::aes(fill = v), colour = "white") +
ggplot2::scale_fill_manual(values = colors()) +
ggplot2::theme(legend.position = "none") +
ggplot2::labs(x = "", y = "") +
ggplot2::geom_text(ggplot2::aes(label = paste0(1:660, ":",c(colors(), rep("",3)))),
col = factor(cex.cols), size = 1.8)ggplot2::geom_tilescale(x, center=min(x), scale=(max(x) - min(x)))(x-min(x))/(max(x)-min(x)) * (M-m) + mcutを用いて離散値化(factorが返る), findIntervalを用いて離散値化(ベクトルが返る)findInterval(x, vec, all.inside, left.open), 開いているのが右か左かleft.open=T/F、その場合最小もしくは最大を含めるか否かall.inside=T/Fgeom_tileを使ってtile表示# サンプルデータ
nba <- read.csv("http://datasets.flowingdata.com/ppg2008.csv", check.names = F)
kableExtra::kable(head(nba), "html", align = "lr", caption = "head(nba)") %>%
kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left") | Name | G | MIN | PTS | FGM | FGA | FGP | FTM | FTA | FTP | 3PM | 3PA | 3PP | ORB | DRB | TRB | AST | STL | BLK | TO | PF |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Dwyane Wade | 79 | 38.6 | 30.2 | 10.8 | 22.0 | 0.491 | 7.5 | 9.8 | 0.765 | 1.1 | 3.5 | 0.317 | 1.1 | 3.9 | 5.0 | 7.5 | 2.2 | 1.3 | 3.4 | 2.3 |
| LeBron James | 81 | 37.7 | 28.4 | 9.7 | 19.9 | 0.489 | 7.3 | 9.4 | 0.780 | 1.6 | 4.7 | 0.344 | 1.3 | 6.3 | 7.6 | 7.2 | 1.7 | 1.1 | 3.0 | 1.7 |
| Kobe Bryant | 82 | 36.2 | 26.8 | 9.8 | 20.9 | 0.467 | 5.9 | 6.9 | 0.856 | 1.4 | 4.1 | 0.351 | 1.1 | 4.1 | 5.2 | 4.9 | 1.5 | 0.5 | 2.6 | 2.3 |
| Dirk Nowitzki | 81 | 37.7 | 25.9 | 9.6 | 20.0 | 0.479 | 6.0 | 6.7 | 0.890 | 0.8 | 2.1 | 0.359 | 1.1 | 7.3 | 8.4 | 2.4 | 0.8 | 0.8 | 1.9 | 2.2 |
| Danny Granger | 67 | 36.2 | 25.8 | 8.5 | 19.1 | 0.447 | 6.0 | 6.9 | 0.878 | 2.7 | 6.7 | 0.404 | 0.7 | 4.4 | 5.1 | 2.7 | 1.0 | 1.4 | 2.5 | 3.1 |
| Kevin Durant | 74 | 39.0 | 25.3 | 8.9 | 18.8 | 0.476 | 6.1 | 7.1 | 0.863 | 1.3 | 3.1 | 0.422 | 1.0 | 5.5 | 6.5 | 2.8 | 1.3 | 0.7 | 3.0 | 1.8 |
# 変数ごとに数値を0~1に標準化
scaled.mat <- as.matrix(
sapply(nba[-1], function(x){scale(x, center = min(x), scale = (max(x) - min(x)))}))
# findIntervalを用いて離散値化
br <- c(0, 0.2, 0.4, 0.6, 0.8, 1.0) # ブレークポイント(ここでは5段階)
disc.mat <- matrix(findInterval(scaled.mat, br, all.inside = TRUE), nrow = nrow(nba))
# 離散値変換したデータフレーム 行持ちデータに変換
nba_dat <- data.frame(nba["Name"], disc.mat, stringsAsFactors = F) %>%
setNames(., names(nba)) %>%
tidyr::gather(., key = k, value = v, -Name) %>%
mutate(Name = factor(Name, levels = rev(nba$Name)),
k = factor(k, levels = unique(k)))
# 5段階に離散値化したカラーコードとレジェンドラベル
v.lab <- sapply(1:5, function(i) paste(seq(0, 1, 0.2)[i:(i + 2 - 1)], collapse = " : "))
v.pal <- RColorBrewer::brewer.pal(5, "Blues")
# geom_tile
ggplot(nba_dat, aes(k, Name)) +
geom_tile(aes(fill = as.character(v)), colour = "white") +
scale_fill_manual(values = v.pal, name = "", labels = v.lab) +
theme(axis.text.x = element_text(angle = 90, size = 8, vjust = 0.5)) +
labs(x = NULL, y = NULL)ggplot2::geom_tileggplot2::scale_fill_gradient(low, high)でグラジエントカラーを自動的に割り振る。# 変数ごとに0~1に標準化したデータを行持ちデータに変換
scaled.mat2 <- as.matrix(
sapply(nba[-1], function(x){scale(x, center = min(x), scale = (max(x) - min(x)))}))
# # 0 ~ 2 に標準化
# M = 2; m = 0
# scaled.mat2 <- as.matrix(
# sapply(nba[-1], function(x){(x-min(x))/(max(x)-min(x)) * (M-m) + m}))
nba_dat2 <- data.frame(nba["Name"], scaled.mat2) %>%
setNames(names(nba)) %>%
tidyr::gather(., key = k, value = v, -Name) %>%
mutate(Name = factor(Name, levels = rev(nba$Name)),
k = factor(k, levels = unique(k)))
kableExtra::kable(head(nba_dat2), "html", align = "lr", caption = "head(nba)") %>%
kableExtra::kable_styling(bootstrap_options = "striped", full_width = F, position = "left") | Name | k | v |
|---|---|---|
| Dwyane Wade | G | 0.9473684 |
| LeBron James | G | 0.9824561 |
| Kobe Bryant | G | 1.0000000 |
| Dirk Nowitzki | G | 0.9824561 |
| Danny Granger | G | 0.7368421 |
| Kevin Durant | G | 0.8596491 |
# 連続値データのplot
ggplot(nba_dat2, aes(k, Name)) +
geom_tile(aes(fill = v), colour = "white") +
labs(x = "", fill = "") +
scale_fill_gradient(low = "white", high = "steelblue") +
theme(axis.text.x = element_text(angle = 90, size = 8, vjust = 0.5))hclust(dist(x))した結果を反映させた図ggplot2::geom_tile# (1-相関係数)を距離と定義してクラスタリング
cden <- nba[-1] %>%
{as.dist(1 - cor(.))} %>%
{as.dendrogram(hclust(., "average"))}
rden <- nba[-1] %>%
{as.dist(1 - cor(t(.)))} %>%
{as.dendrogram(hclust(., "average"))}
# クラスタリングの結果を元に因子の水準を指定する
cl.nba2 <- nba_dat2 %>%
mutate(k = factor(k, levels = labels(cden)),
Name = factor(Name, levels = rev(nba_dat2$Name[labels(rden)])))
# geom_tile
ggplot(cl.nba2, aes(k, Name)) +
geom_tile(aes(fill = v), colour = "white") +
scale_fill_gradient(low = "white", high = "steelblue") +
theme(axis.text.x = element_text(angle = 90, size = 8, vjust = 0.5)) +
labs(x = NULL, y = NULL, fill = NULL)gplots::heatmap.2を使ってクラスタリングとヒートマップgplots::colorpanel(n,low,mid,high)を使う。gplots::redgreen, gplots::greenred,gplots::bluered,gplots::redblueから選ぶ。scaleで指定した方向(“row”, “column”, “none”)に対して、平均0、分散1に標準化している。# add row names
dat <- tibble::column_to_rownames(nba, var = "Name")
# color code
cm250 <- gplots::colorpanel(n = 256, low = "deepskyblue", mid = "white", high = "violetred")
# heatmap.2
gplots::heatmap.2(
as.matrix(dat), # matrix
col = cm250, # gplots::bluered(256)
scale = "column", # "none", "row", "column"
dendrogram = "both", # "none", "row", "column", "both"
Colv = cden, # Column dendrogram object
Rowv = rden, # Row dendrogram object
key = TRUE,
keysize = 1,
key.title = NA,
symkey = FALSE,
density.info = "none",
trace = "none",
margin = c(6,6),
cexRow = 0.8,
cexCol = 0.8
#labRow = labels(cden),
#labCol = labels(rden)
)image()を用いてプロットfields::image.plot(legend.only=T)# データ
m <- disc.mat
# カラーコード
library(RColorBrewer)
col5 <- brewer.pal(5, "Blues")
# imageの出力は左下が起点なのでもとのデータの順番を変える
m <- t(m)[,nrow(m):1]
# イメージ関数を使ってmatrixをplotする
library(fields)
par(mar = c(4,2,2,6))
image(m, col = col5, xaxt = "n", yaxt = "n" )
par(new = T)
image.plot(m, legend.only = T, zlim = range(c(20,40,60,80,100)),
lab.breaks = seq(0,100, 20),
col = col5 )# 相関マトリックス
cor.mat <- cor(mtcars)
# 相関マトリックスを行持ちデータに変換
cor.dat <- data.frame(x = dimnames(cor.mat)[[1]], cor.mat) %>%
tidyr::gather(., key = keys, value = values, -x) %>%
mutate(x = factor(x, levels = unique(x)),
keys = factor(keys, levels = rev(unique(keys))))
knitr::kable(cor.mat[1:6,1:6], align = 'c', caption = "correlation matrix(6x6)")| mpg | cyl | disp | hp | drat | wt | |
|---|---|---|---|---|---|---|
| mpg | 1.0000000 | -0.8521620 | -0.8475514 | -0.7761684 | 0.6811719 | -0.8676594 |
| cyl | -0.8521620 | 1.0000000 | 0.9020329 | 0.8324475 | -0.6999381 | 0.7824958 |
| disp | -0.8475514 | 0.9020329 | 1.0000000 | 0.7909486 | -0.7102139 | 0.8879799 |
| hp | -0.7761684 | 0.8324475 | 0.7909486 | 1.0000000 | -0.4487591 | 0.6587479 |
| drat | 0.6811719 | -0.6999381 | -0.7102139 | -0.4487591 | 1.0000000 | -0.7124406 |
| wt | -0.8676594 | 0.7824958 | 0.8879799 | 0.6587479 | -0.7124406 | 1.0000000 |
| x | keys | values |
|---|---|---|
| mpg | mpg | 1.0000000 |
| cyl | mpg | -0.8521620 |
| disp | mpg | -0.8475514 |
| hp | mpg | -0.7761684 |
| drat | mpg | 0.6811719 |
| wt | mpg | -0.8676594 |
ggplot2::geom_tilescale_fill_gradientを使ってグラジエントカラーを割り当てggplot(cor.dat,
aes(x, keys, colour=values)) +
geom_tile(aes(fill = values), colour = "white") +
geom_text(aes(label=round(values,1)), size = 3, colour="black", show.legend = F) +
scale_fill_gradient(low = "white", high = "deeppink") +
labs(x="", y="") +
theme(axis.text.x=element_text(angle=90, size=8, vjust=0.5))gplots::heatmap.2dist及びhclustのデフォルトの引数を用いている(ユークリッド距離で距離定義, 完全連結法でクラスタリング)。# dendrogram
den <- as.dendrogram(hclust(as.dist(1-cor.mat)))
# heatmap
gplots::heatmap.2(
x = as.matrix(cor.mat),
Rowv = den,
Colv = den,
dendrogram = "both",
symm = T,
revC = T,
col = rev(heat.colors(256)),
scale = "none",
key = TRUE,
keysize = 1,
symkey = FALSE,
density.info = "none",
trace = "none",
margin = c(6,5),
cellnote = as.matrix(round(cor.mat, 1)),
notecol = "black", # cell label colour
notecex = 1, # cell label cex
cexCol = 0.8 # Column label cex
)## R version 3.5.1 (2018-07-02)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] ja_JP.UTF-8/ja_JP.UTF-8/ja_JP.UTF-8/C/ja_JP.UTF-8/ja_JP.UTF-8
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] fields_9.6 maps_3.3.0 spam_2.2-1
## [4] dotCall64_1.0-0 rsko_0.1.0 bindrcpp_0.2.2
## [7] RColorBrewer_1.1-2 qtl_1.42-8 tidyr_0.8.2
## [10] dplyr_0.7.8 ggplot2_3.1.0 knitr_1.21
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-6 bit64_0.9-7 webshot_0.5.1
## [4] httr_1.4.0 prabclus_2.2-7 Rgraphviz_2.24.0
## [7] dynamicTreeCut_1.63-1 tools_3.5.1 R6_2.3.0
## [10] KernSmooth_2.23-15 DBI_1.0.0 lazyeval_0.2.1
## [13] BiocGenerics_0.26.0 colorspace_1.4-0 trimcluster_0.1-2.1
## [16] nnet_7.3-12 withr_2.1.2 tidyselect_0.2.5
## [19] gridExtra_2.3 bit_1.1-14 compiler_3.5.1
## [22] rvest_0.3.2 graph_1.58.2 Biobase_2.40.0
## [25] pacman_0.5.0 xml2_1.2.0 labeling_0.3
## [28] diptest_0.75-7 caTools_1.17.1.1 KEGGgraph_1.40.0
## [31] scales_1.0.0 DEoptimR_1.0-8 mvtnorm_1.0-8
## [34] robustbase_0.93-3 readr_1.3.1 stringr_1.3.1
## [37] digest_0.6.18 rmarkdown_1.11 XVector_0.20.0
## [40] pkgconfig_2.0.2 htmltools_0.3.6 highr_0.7
## [43] rlang_0.3.1 rstudioapi_0.9.0 RSQLite_2.1.1
## [46] bindr_0.1.1 zoo_1.8-4 mclust_5.4.2
## [49] gtools_3.8.1 dendextend_1.9.0 magrittr_1.5
## [52] modeltools_0.2-22 kableExtra_1.0.0 Rcpp_1.0.0
## [55] munsell_0.5.0 S4Vectors_0.18.3 viridis_0.5.1
## [58] pathview_1.20.0 stringi_1.2.4 whisker_0.3-2
## [61] yaml_2.2.0 MASS_7.3-51.1 zlibbioc_1.26.0
## [64] flexmix_2.3-14 gplots_3.0.1 plyr_1.8.4
## [67] blob_1.1.1 parallel_3.5.1 gdata_2.18.0
## [70] ggrepel_0.8.0 crayon_1.3.4 lattice_0.20-38
## [73] Biostrings_2.48.0 hms_0.4.2 KEGGREST_1.20.2
## [76] pillar_1.3.1 fpc_2.1-11.1 stats4_3.5.1
## [79] XML_3.98-1.16 glue_1.3.0 evaluate_0.12
## [82] png_0.1-7 gtable_0.2.0 purrr_0.2.5
## [85] kernlab_0.9-27 amap_0.8-16 assertthat_0.2.0
## [88] xfun_0.4 class_7.3-15 viridisLite_0.3.0
## [91] tibble_2.0.1 AnnotationDbi_1.42.1 memoise_1.1.0
## [94] IRanges_2.14.12 cluster_2.0.7-1