rm(list = ls()) ## 魔幻操作,一键清空~
options(stringsAsFactors = F)
load(file = '../input.Rdata')
a[1:4,1:4]
## SS2_15_0048_A3 SS2_15_0048_A6 SS2_15_0048_A5 SS2_15_0048_A4
## 0610005C13Rik 0 0 0 1
## 0610007P14Rik 0 0 18 11
## 0610009B22Rik 0 0 0 0
## 0610009L18Rik 0 0 0 0
head(df)
## g plate n_g all
## SS2_15_0048_A3 1 0048 2624 all
## SS2_15_0048_A6 1 0048 2664 all
## SS2_15_0048_A5 1 0048 3319 all
## SS2_15_0048_A4 2 0048 4447 all
## SS2_15_0048_A1 2 0048 4725 all
## SS2_15_0048_A2 2 0048 5263 all
## 载入第0步准备好的表达矩阵,及细胞的一些属性(hclust分群,plate批次,检测到的细胞基因)
# 注意 变量a是原始的counts矩阵,变量 dat是logCPM后的表达量矩阵。
group_list=df$g
plate=df$plate
table(plate)
## plate
## 0048 0049
## 384 384
rownames(dat)=toupper(rownames(dat)) ##toupper()函数,把小写字符转换成大写字符
dat[1:4,1:4]
## SS2_15_0048_A3 SS2_15_0048_A6 SS2_15_0048_A5 SS2_15_0048_A4
## 0610007P14RIK 0 0 8.091727 7.701429
## 0610009B22RIK 0 0 0.000000 0.000000
## 0610009L18RIK 0 0 0.000000 0.000000
## 0610009O20RIK 0 0 6.755828 6.647482
library(genefu)
## Loading required package: survcomp
## Loading required package: survival
## Loading required package: prodlim
## Loading required package: mclust
## Package 'mclust' version 5.4.2
## Type 'citation("mclust")' for citing this R package in publications.
## Loading required package: limma
## Loading required package: biomaRt
## Loading required package: iC10
## Loading required package: pamr
## Loading required package: cluster
## Loading required package: impute
## Loading required package: iC10TrainingData
## Loading required package: AIMS
## Loading required package: e1071
## Loading required package: Biobase
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:limma':
##
## plotMA
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind,
## colMeans, colnames, colSums, dirname, do.call, duplicated,
## eval, evalq, Filter, Find, get, grep, grepl, intersect,
## is.unsorted, lapply, lengths, Map, mapply, match, mget, order,
## paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind,
## Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which, which.max,
## which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
if(T){
ddata=t(dat)
ddata[1:4,1:4]
s=colnames(ddata);head(s);tail(s) ##把实验检测到的基因赋值给S
##ERCC指的是spikes in
library(org.Hs.eg.db) ##人类基因信息的包
s2g=toTable(org.Hs.egSYMBOL)
g=s2g[match(s,s2g$symbol),1];head(g) ##取出实验检测到的基因所对应的基因名
# match(x, y)返回的是vector x中每个元素在vector y中对映的位置(positions in y),
# 如果vector x中存在不在vector y中的元素,该元素处返回的是NA
# probe Gene.symbol Gene.ID
dannot=data.frame(probe=s,
"Gene.Symbol" =s,
"EntrezGene.ID"=g)
head(dannot)
#s向量是实验检测基因的基因名字,g向量是标准基因ID
# 这里s应该和g是一一对应的,制作一个数据框
ddata=ddata[,!is.na(dannot$EntrezGene.ID)] #ID转换,仅保留ID不为NA的
dim(ddata)
#制作行为样本,列为实验检测基因(这里的剩下的实验检测基因都有标准基因ID对应)的矩阵。
#即剔除无基因ID对应的列
# !is.na去除dannot数据框EntrezGene.ID列为NA的行(去除NA值即去除没有标准基因ID对应的实验检测基因名))
dannot=dannot[!is.na(dannot$EntrezGene.ID),] #去除有NA的行,即剔除无对应的基因
head(dannot)
ddata[1:4,1:4]
library(genefu)
# c("scmgene", "scmod1", "scmod2","pam50", "ssp2006", "ssp2003", "intClust", "AIMS","claudinLow")
s<-molecular.subtyping(sbt.model = "pam50",data=ddata,
annot=dannot,do.mapping=TRUE)
table(s$subtype)
tmp=as.data.frame(s$subtype)
subtypes=as.character(s$subtype)
}
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
##
## windows
##
## Warning in cor(x = x, y = y, method = method.cor, use = "complete.obs"): 标
## 准差为零
## Warning in cor(x = x, y = y, method = method.cor, use = "complete.obs"): 标
## 准差为零
## Warning in cor(x = x, y = y, method = method.cor, use = "complete.obs"): 标
## 准差为零
## Warning in cor(x = x, y = y, method = method.cor, use = "complete.obs"): 标
## 准差为零
## Warning in cor(x = x, y = y, method = method.cor, use = "complete.obs"): 标
## 准差为零
## Warning in cor(x = x, y = y, method = method.cor, use = "complete.obs"): 标
## 准差为零
## Warning in cor(x = x, y = y, method = method.cor, use = "complete.obs"): 标
## 准差为零
## Warning in cor(x = x, y = y, method = method.cor, use = "complete.obs"): 标
## 准差为零
## Warning in cor(x = x, y = y, method = method.cor, use = "complete.obs"): 标
## 准差为零
## Warning in cor(x = x, y = y, method = method.cor, use = "complete.obs"): 标
## 准差为零
## Warning in cor(x = x, y = y, method = method.cor, use = "complete.obs"): 标
## 准差为零
## Warning in cor(x = x, y = y, method = method.cor, use = "complete.obs"): 标
## 准差为零
## Warning in cor(x = x, y = y, method = method.cor, use = "complete.obs"): 标
## 准差为零
## Warning in cor(x = x, y = y, method = method.cor, use = "complete.obs"): 标
## 准差为零
## Warning in cor(x = x, y = y, method = method.cor, use = "complete.obs"): 标
## 准差为零
## Warning in cor(x = x, y = y, method = method.cor, use = "complete.obs"): 标
## 准差为零
## Warning in cor(x = x, y = y, method = method.cor, use = "complete.obs"): 标
## 准差为零
## Warning in cor(x = x, y = y, method = method.cor, use = "complete.obs"): 标
## 准差为零
## Warning in cor(x = x, y = y, method = method.cor, use = "complete.obs"): 标
## 准差为零
## Warning in cor(x = x, y = y, method = method.cor, use = "complete.obs"): 标
## 准差为零
## Warning in cor(x = x, y = y, method = method.cor, use = "complete.obs"): 标
## 准差为零
## Warning in cor(x = x, y = y, method = method.cor, use = "complete.obs"): 标
## 准差为零
## Warning in cor(x = x, y = y, method = method.cor, use = "complete.obs"): 标
## 准差为零
head(df)
## g plate n_g all
## SS2_15_0048_A3 1 0048 2624 all
## SS2_15_0048_A6 1 0048 2664 all
## SS2_15_0048_A5 1 0048 3319 all
## SS2_15_0048_A4 2 0048 4447 all
## SS2_15_0048_A1 2 0048 4725 all
## SS2_15_0048_A2 2 0048 5263 all
df$subtypes=subtypes
table(df[,c(1,5)])
## subtypes
## g Basal Her2 LumA LumB Normal
## 1 6 17 251 14 23
## 2 5 26 225 25 30
## 3 32 18 54 8 15
## 4 1 0 5 0 13
library(genefu)
pam50genes=pam50$centroids.map[c(1,3)]
pam50genes[pam50genes$probe=='CDCA1',1]='NUF2'
pam50genes[pam50genes$probe=='KNTC2',1]='NDC80'
pam50genes[pam50genes$probe=='ORC6L',1]='ORC6'
x=dat
dim(x)
## [1] 12198 768
x=x[pam50genes$probe[pam50genes$probe %in% rownames(x)] ,]
table(group_list)
## group_list
## 1 2 3 4
## 311 311 127 19
tmp=data.frame(group=group_list,
subtypes=subtypes)
rownames(tmp)=colnames(x)
library(pheatmap)
pheatmap(x,show_rownames = T,show_colnames = F,
annotation_col = tmp
)

x=t(scale(t(x)))
x[x>1.6]=1.6
x[x< -1.6]= -1.6
pheatmap(x,show_rownames = T,show_colnames = F,
annotation_col = tmp
)
