count标准化及过滤函数
- count_norm函数
- counts输入raw count数据
- percent设置过滤比率
count_norm<-function (counts, filter = TRUE,percent=0.5)
{
if(!require(GDCRNATools)) install.packages("GDCRNATools")
library(GDCRNATools)
library(edgeR)
expr = DGEList(counts = counts)
expr = calcNormFactors(expr)
if (filter == TRUE) {
keepALL <- rowSums(cpm(expr) > 1) >= percent * ncol(counts)
nGenes <- as.numeric(summary(keepALL)[2]) + as.numeric(summary(keepALL)[3])
nKeep <- summary(keepALL)[3]
cat(paste("Total Number of genes: ", nGenes, "\n", # 输出
sep = ""))
cat(paste("Number of genes for downstream analysis: ", # 输出
nKeep, "\n", sep = ""))
exprALL <- expr[keepALL, , keep.lib.sizes = TRUE]
v <- voom(exprALL, design = NULL, plot = FALSE)$E
}
else if (filter == FALSE) {
v <- voom(expr, design = NULL, plot = FALSE)$E
}
return(v)
}
load("BRCA_mirCounts.Rdata")
## 标准化
mir_norm<-count_norm(mirCounts,filter = TRUE,percent=0.1)#
## Loading required package: GDCRNATools
##
##
## Registered S3 method overwritten by 'enrichplot':
## method from
## fortify.enrichResult DOSE
## ##############################################################################
## Pathview is an open source software package distributed under GNU General
## Public License version 3 (GPLv3). Details of GPLv3 is available at
## http://www.gnu.org/licenses/gpl-3.0.html. Particullary, users are required to
## formally cite the original Pathview paper (not just mention it) in publications
## or products. For details, do citation("pathview") within R.
##
## The pathview downloads and uses KEGG data. Non-academic uses may require a KEGG
## license agreement (details at http://www.kegg.jp/kegg/legal.html).
## ##############################################################################
## Loading required package: limma
## Total Number of genes: 2588
## Number of genes for downstream analysis: 653
dim(mir_norm)
## [1] 653 1182
mir_norm[1:5,1:5]
## TCGA-3C-AAAU-01 TCGA-3C-AALI-01 TCGA-3C-AALJ-01
## hsa-let-7a-5p 15.170973 15.4940258 15.453850
## hsa-let-7a-3p 6.167795 5.1528795 4.846815
## hsa-let-7a-2-3p 3.027508 0.8748948 1.433066
## hsa-let-7b-5p 14.631798 15.0857437 13.852763
## hsa-let-7b-3p 5.856256 5.4315286 3.835165
## TCGA-3C-AALK-01 TCGA-4H-AAAK-01
## hsa-let-7a-5p 15.592174 15.966968
## hsa-let-7a-3p 5.222530 5.380424
## hsa-let-7a-2-3p 2.773889 2.071671
## hsa-let-7b-5p 15.109550 14.981025
## hsa-let-7b-3p 5.362788 5.457730
## save
#save(mir_norm,file = "BRCA_mir_norm_filter_10.Rdata")