count标准化及过滤函数

count_norm<-function (counts, filter = TRUE,percent=0.5) 
{
  if(!require(GDCRNATools)) install.packages("GDCRNATools")
  library(GDCRNATools)
  library(edgeR)
  expr = DGEList(counts = counts)
  expr = calcNormFactors(expr)
  if (filter == TRUE) {
    keepALL <- rowSums(cpm(expr) > 1) >= percent * ncol(counts)
    nGenes <- as.numeric(summary(keepALL)[2]) + as.numeric(summary(keepALL)[3])
    nKeep <- summary(keepALL)[3]
    cat(paste("Total Number of genes: ", nGenes, "\n", # 输出
              sep = ""))
    cat(paste("Number of genes for downstream analysis: ", # 输出
              nKeep, "\n", sep = ""))
    exprALL <- expr[keepALL, , keep.lib.sizes = TRUE]
    v <- voom(exprALL, design = NULL, plot = FALSE)$E
  }
  else if (filter == FALSE) {
    v <- voom(expr, design = NULL, plot = FALSE)$E
  }
  return(v)
}

load("BRCA_mirCounts.Rdata")
## 标准化 
mir_norm<-count_norm(mirCounts,filter = TRUE,percent=0.1)#
## Loading required package: GDCRNATools
## 
## 
## Registered S3 method overwritten by 'enrichplot':
##   method               from
##   fortify.enrichResult DOSE
## ##############################################################################
## Pathview is an open source software package distributed under GNU General
## Public License version 3 (GPLv3). Details of GPLv3 is available at
## http://www.gnu.org/licenses/gpl-3.0.html. Particullary, users are required to
## formally cite the original Pathview paper (not just mention it) in publications
## or products. For details, do citation("pathview") within R.
## 
## The pathview downloads and uses KEGG data. Non-academic uses may require a KEGG
## license agreement (details at http://www.kegg.jp/kegg/legal.html).
## ##############################################################################
## Loading required package: limma
## Total Number of genes: 2588
## Number of genes for downstream analysis: 653
dim(mir_norm)
## [1]  653 1182
mir_norm[1:5,1:5]
##                 TCGA-3C-AAAU-01 TCGA-3C-AALI-01 TCGA-3C-AALJ-01
## hsa-let-7a-5p         15.170973      15.4940258       15.453850
## hsa-let-7a-3p          6.167795       5.1528795        4.846815
## hsa-let-7a-2-3p        3.027508       0.8748948        1.433066
## hsa-let-7b-5p         14.631798      15.0857437       13.852763
## hsa-let-7b-3p          5.856256       5.4315286        3.835165
##                 TCGA-3C-AALK-01 TCGA-4H-AAAK-01
## hsa-let-7a-5p         15.592174       15.966968
## hsa-let-7a-3p          5.222530        5.380424
## hsa-let-7a-2-3p        2.773889        2.071671
## hsa-let-7b-5p         15.109550       14.981025
## hsa-let-7b-3p          5.362788        5.457730
## save
#save(mir_norm,file = "BRCA_mir_norm_filter_10.Rdata")

参考资料

参考文献GDCRNATools