WGCNAのチュートリアルを写経しながら、メモを書いていく
# 作業ディレクトリを設定
wd = "~/pub/sampledata/wgcna_demo/"
setwd(wd)
# ライブラリロード
library(WGCNA)
packageVersion("WGCNA")## [1] '1.70.3'
# データ読み込みの際のファクター化を抑制
options(stringsAsFactors = FALSE)
# データ読み込み set 135サンプル 3600遺伝子
femData = read.csv("./dat/LiverFemale3600.csv", header = T )
# dim(femData) femData[1:3,1:10]
# データの変換.
datExpr0 = as.data.frame(t(femData[, -c(1:8)])) # 発現データのみにして転置行列
names(datExpr0) = femData$substanceBXH # 列ラベルに遺伝子ID
# rownames(datExpr0) = names(femData)[-c(1:8)];
head(datExpr0[1:4,1:4])## MMT00000044 MMT00000046 MMT00000051 MMT00000076
## F2_2 -1.81e-02 -0.0773 -0.0226 -0.00924
## F2_3 6.42e-02 -0.0297 0.0617 -0.14500
## F2_14 6.44e-05 0.1120 -0.1290 0.02870
## F2_15 -5.80e-02 -0.0589 0.0871 -0.04390
WGCNA::goodSamplesGenes## Flagging genes and samples with too many missing values...
## ..step 1
## List of 3
## $ goodGenes : logi [1:3600] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ goodSamples: logi [1:135] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ allOK : logi TRUE
# 上のチェックをパスしなかったサンプル及び遺伝子を除く
if (!gsg$allOK)
{
# Optionally, print the gene and sample names that were removed:
if (sum(!gsg$goodGenes)>0)
printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", ")));
if (sum(!gsg$goodSamples)>0)
printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")));
# Remove the offending genes and samples from the data:
datExpr0 = datExpr0[gsg$goodSamples, gsg$goodGenes]
}WGCNA::cutreeStaticcutreeStaticはhclustオブジェクトを入力として数値ベクトルを返す。クラスタメンバーの大きい順に1,2, … の順に割り当てられる。0は外れサンプルcutHeightの指標はないのか。このサンプルでは視覚的に判断できるが。# サンプルのクラスタリング
sampleTree = hclust(dist(datExpr0), method = "average")
# デンドログラム猫画
par(cex = 0.6, mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers",
sub = "", xlab = "", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2)
abline(h = 15, col = "red")# treeの枝の高さでカットする。数値ベクトルが返る。
clust = WGCNA::cutreeStatic(sampleTree, cutHeight = 15, minSize = 10)
table(clust)## clust
## 0 1
## 1 134
# 形質データの読み込み
traitData = read.csv("./dat/ClinicalTraits.csv", stringsAsFactors = F)
# dim(traitData); names(traitData)
traitData[1:6,1:6]## X Mice Number Mouse_ID Strain sex
## 1 1 F2_290 290 306-4 BxH ApoE-/-, F2 2
## 2 2 F2_291 291 307-1 BxH ApoE-/-, F2 2
## 3 3 F2_292 292 307-2 BxH ApoE-/-, F2 1
## 4 4 F2_293 293 307-3 BxH ApoE-/-, F2 1
## 5 5 F2_294 294 307-4 BxH ApoE-/-, F2 1
## 6 6 F2_295 295 308-1 BxH ApoE-/-, F2 1
# 不用なカラムを除く(なぜ不用かは調べていない)
allTraits = traitData[, c(2, 11:15, 17:30, 32:36) ]
# dim(allTraits);names(allTraits)
# 発現データと対応する形質データを取り出す(行ラベルにサンプルIDを入れる)。
femaleSamples = rownames(datExpr)
traitRows = match(femaleSamples, allTraits$Mice)
datTraits = allTraits[traitRows, -1]
rownames(datTraits) = allTraits[traitRows, 1]
# ガベージコレクション. gc();gc()ではだめなのか。
WGCNA::collectGarbage()numbers2colors 形質データに対応したカラ-マップを作る。形質データは全て数値ベクトルになっている。質的データを前項で除いたのか?# Re-cluster samples
sampleTree2 = hclust(dist(datExpr), method = "average")
# Convert traits to a color representation: white means low, red means high, grey means missing entry
traitColors = WGCNA::numbers2colors(datTraits, signed = FALSE);
#
WGCNA::plotDendroAndColors(sampleTree2, traitColors,
groupLabels = names(datTraits),
main = "Sample dendrogram and trait heatmap")## R version 3.5.2 (2018-12-20)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] ja_JP.UTF-8/ja_JP.UTF-8/ja_JP.UTF-8/C/ja_JP.UTF-8/ja_JP.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] WGCNA_1.70-3 fastcluster_1.2.3 dynamicTreeCut_1.63-1
##
## loaded via a namespace (and not attached):
## [1] Biobase_2.42.0 sass_0.4.0 bit64_4.0.5
## [4] jsonlite_1.7.2 splines_3.5.2 foreach_1.5.1
## [7] bslib_0.2.5.1 Formula_1.2-4 assertthat_0.2.1
## [10] highr_0.9 stats4_3.5.2 latticeExtra_0.6-28
## [13] blob_1.2.2 yaml_2.2.1 impute_1.56.0
## [16] pillar_1.6.1 RSQLite_2.2.7 backports_1.2.1
## [19] lattice_0.20-44 glue_1.4.2 digest_0.6.27
## [22] RColorBrewer_1.1-2 checkmate_2.0.0 colorspace_2.0-2
## [25] htmltools_0.5.1.1 preprocessCore_1.44.0 Matrix_1.3-4
## [28] pkgconfig_2.0.3 purrr_0.3.4 GO.db_3.7.0
## [31] scales_1.1.1 tibble_3.1.3 htmlTable_2.2.1
## [34] IRanges_2.16.0 ggplot2_3.3.5 ellipsis_0.3.2
## [37] cachem_1.0.5 nnet_7.3-16 BiocGenerics_0.28.0
## [40] survival_3.2-11 magrittr_2.0.1 crayon_1.4.1
## [43] memoise_2.0.0 evaluate_0.14 fansi_0.5.0
## [46] doParallel_1.0.16 foreign_0.8-72 tools_3.5.2
## [49] data.table_1.14.0 lifecycle_1.0.0 matrixStats_0.60.0
## [52] stringr_1.4.0 S4Vectors_0.20.1 munsell_0.5.0
## [55] cluster_2.1.2 AnnotationDbi_1.44.0 compiler_3.5.2
## [58] jquerylib_0.1.4 rlang_0.4.11 grid_3.5.2
## [61] iterators_1.0.13 rstudioapi_0.13 htmlwidgets_1.5.3
## [64] base64enc_0.1-3 rmarkdown_2.9 gtable_0.3.0
## [67] codetools_0.2-18 DBI_1.1.1 R6_2.5.0
## [70] gridExtra_2.3 knitr_1.33 dplyr_0.8.5
## [73] fastmap_1.1.0 bit_4.0.4 utf8_1.2.2
## [76] Hmisc_4.5-0 stringi_1.7.3 parallel_3.5.2
## [79] Rcpp_1.0.7 vctrs_0.3.8 rpart_4.1-15
## [82] tidyselect_1.1.1 xfun_0.24