WGCNAのチュートリアルを写経しながら、メモを書いていく

1 data読み込み

# 作業ディレクトリを設定
wd = "~/pub/sampledata/wgcna_demo/"
setwd(wd)

# ライブラリロード
library(WGCNA)
packageVersion("WGCNA")

## [1] '1.70.3'

# データ読み込みの際のファクター化を抑制
options(stringsAsFactors = FALSE)

# データ読み込み set 135サンプル 3600遺伝子
femData = read.csv("./dat/LiverFemale3600.csv", header = T )
# dim(femData)　femData[1:3,1:10]

# データの変換. 
datExpr0 = as.data.frame(t(femData[, -c(1:8)])) # 発現データのみにして転置行列
names(datExpr0) = femData$substanceBXH  # 列ラベルに遺伝子ID
# rownames(datExpr0) = names(femData)[-c(1:8)];

head(datExpr0[1:4,1:4])

##       MMT00000044 MMT00000046 MMT00000051 MMT00000076
## F2_2    -1.81e-02     -0.0773     -0.0226    -0.00924
## F2_3     6.42e-02     -0.0297      0.0617    -0.14500
## F2_14    6.44e-05      0.1120     -0.1290     0.02870
## F2_15   -5.80e-02     -0.0589      0.0871    -0.04390

2 サンプル及び遺伝子のチェック`WGCNA::goodSamplesGenes`

# 欠失値の多いサンプル及び遺伝子、分散が0の遺伝子を検出
gsg <- WGCNA::goodSamplesGenes(datExpr0, verbose = 3)

##  Flagging genes and samples with too many missing values...
##   ..step 1

# 結果のオプジェクトを確認
str(gsg)

## List of 3
##  $ goodGenes  : logi [1:3600] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ goodSamples: logi [1:135] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ allOK      : logi TRUE

# 上のチェックをパスしなかったサンプル及び遺伝子を除く
if (!gsg$allOK)
{
  # Optionally, print the gene and sample names that were removed:
  if (sum(!gsg$goodGenes)>0) 
     printFlush(paste("Removing genes:", paste(names(datExpr0)[!gsg$goodGenes], collapse = ", ")));
  if (sum(!gsg$goodSamples)>0) 
     printFlush(paste("Removing samples:", paste(rownames(datExpr0)[!gsg$goodSamples], collapse = ", ")));
  # Remove the offending genes and samples from the data:
  datExpr0 = datExpr0[gsg$goodSamples, gsg$goodGenes]
}

3 サンプルのクラスタリングして外れザンプル除く`WGCNA::cutreeStatic`

cutreeStaticはhclustオブジェクトを入力として数値ベクトルを返す。クラスタメンバーの大きい順に1,2, … の順に割り当てられる。0は外れサンプル
cutHeightの指標はないのか。このサンプルでは視覚的に判断できるが。

# サンプルのクラスタリング
sampleTree = hclust(dist(datExpr0), method = "average")

# デンドログラム猫画
par(cex = 0.6, mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", 
     sub = "", xlab = "", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2)
abline(h = 15, col = "red")

# treeの枝の高さでカットする。数値ベクトルが返る。
clust = WGCNA::cutreeStatic(sampleTree, cutHeight = 15, minSize = 10)
table(clust)

## clust
##   0   1 
##   1 134

# 外れサンンプルを除外したデータを作る
keepSamples = (clust != 0)
datExpr = datExpr0[keepSamples, ]
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)

4 形質データの読み込み

# 形質データの読み込み
traitData = read.csv("./dat/ClinicalTraits.csv", stringsAsFactors = F)
# dim(traitData); names(traitData)
traitData[1:6,1:6]

##   X   Mice Number Mouse_ID          Strain sex
## 1 1 F2_290    290    306-4 BxH ApoE-/-, F2   2
## 2 2 F2_291    291    307-1 BxH ApoE-/-, F2   2
## 3 3 F2_292    292    307-2 BxH ApoE-/-, F2   1
## 4 4 F2_293    293    307-3 BxH ApoE-/-, F2   1
## 5 5 F2_294    294    307-4 BxH ApoE-/-, F2   1
## 6 6 F2_295    295    308-1 BxH ApoE-/-, F2   1

# 不用なカラムを除く(なぜ不用かは調べていない)
allTraits = traitData[, c(2, 11:15, 17:30, 32:36) ]
# dim(allTraits);names(allTraits)

# 発現データと対応する形質データを取り出す(行ラベルにサンプルIDを入れる)。
femaleSamples = rownames(datExpr)
traitRows = match(femaleSamples, allTraits$Mice)
datTraits = allTraits[traitRows, -1]
rownames(datTraits) = allTraits[traitRows, 1]

# ガベージコレクション. gc();gc()ではだめなのか。
WGCNA::collectGarbage()

5 再クラスタリング

numbers2colors 形質データに対応したカラ-マップを作る。形質データは全て数値ベクトルになっている。質的データを前項で除いたのか?

# Re-cluster samples
sampleTree2 = hclust(dist(datExpr), method = "average")

# Convert traits to a color representation: white means low, red means high, grey means missing entry
traitColors = WGCNA::numbers2colors(datTraits, signed = FALSE);

# 
WGCNA::plotDendroAndColors(sampleTree2, traitColors,
                    groupLabels = names(datTraits), 
                    main = "Sample dendrogram and trait heatmap")

6 環境.

sessionInfo()

## R version 3.5.2 (2018-12-20)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] ja_JP.UTF-8/ja_JP.UTF-8/ja_JP.UTF-8/C/ja_JP.UTF-8/ja_JP.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] WGCNA_1.70-3          fastcluster_1.2.3     dynamicTreeCut_1.63-1
## 
## loaded via a namespace (and not attached):
##  [1] Biobase_2.42.0        sass_0.4.0            bit64_4.0.5          
##  [4] jsonlite_1.7.2        splines_3.5.2         foreach_1.5.1        
##  [7] bslib_0.2.5.1         Formula_1.2-4         assertthat_0.2.1     
## [10] highr_0.9             stats4_3.5.2          latticeExtra_0.6-28  
## [13] blob_1.2.2            yaml_2.2.1            impute_1.56.0        
## [16] pillar_1.6.1          RSQLite_2.2.7         backports_1.2.1      
## [19] lattice_0.20-44       glue_1.4.2            digest_0.6.27        
## [22] RColorBrewer_1.1-2    checkmate_2.0.0       colorspace_2.0-2     
## [25] htmltools_0.5.1.1     preprocessCore_1.44.0 Matrix_1.3-4         
## [28] pkgconfig_2.0.3       purrr_0.3.4           GO.db_3.7.0          
## [31] scales_1.1.1          tibble_3.1.3          htmlTable_2.2.1      
## [34] IRanges_2.16.0        ggplot2_3.3.5         ellipsis_0.3.2       
## [37] cachem_1.0.5          nnet_7.3-16           BiocGenerics_0.28.0  
## [40] survival_3.2-11       magrittr_2.0.1        crayon_1.4.1         
## [43] memoise_2.0.0         evaluate_0.14         fansi_0.5.0          
## [46] doParallel_1.0.16     foreign_0.8-72        tools_3.5.2          
## [49] data.table_1.14.0     lifecycle_1.0.0       matrixStats_0.60.0   
## [52] stringr_1.4.0         S4Vectors_0.20.1      munsell_0.5.0        
## [55] cluster_2.1.2         AnnotationDbi_1.44.0  compiler_3.5.2       
## [58] jquerylib_0.1.4       rlang_0.4.11          grid_3.5.2           
## [61] iterators_1.0.13      rstudioapi_0.13       htmlwidgets_1.5.3    
## [64] base64enc_0.1-3       rmarkdown_2.9         gtable_0.3.0         
## [67] codetools_0.2-18      DBI_1.1.1             R6_2.5.0             
## [70] gridExtra_2.3         knitr_1.33            dplyr_0.8.5          
## [73] fastmap_1.1.0         bit_4.0.4             utf8_1.2.2           
## [76] Hmisc_4.5-0           stringi_1.7.3         parallel_3.5.2       
## [79] Rcpp_1.0.7            vctrs_0.3.8           rpart_4.1-15         
## [82] tidyselect_1.1.1      xfun_0.24

WGCNA_demo

S.Konishi

2021-08-30

1 data読み込み

2 サンプル及び遺伝子のチェック`WGCNA::goodSamplesGenes`

3 サンプルのクラスタリングして外れザンプル除く`WGCNA::cutreeStatic`

4 形質データの読み込み

5 再クラスタリング

6 環境.

WGCNA_demo

S.Konishi

2021-08-30

1 data読み込み

2 サンプル及び遺伝子のチェックWGCNA::goodSamplesGenes

3 サンプルのクラスタリングして外れザンプル除くWGCNA::cutreeStatic

4 形質データの読み込み

5 再クラスタリング

6 環境.

2 サンプル及び遺伝子のチェック`WGCNA::goodSamplesGenes`

3 サンプルのクラスタリングして外れザンプル除く`WGCNA::cutreeStatic`