if(T){
require(tidyverse)
AS_gene<-read.table("F:/Bioinfor_project/Breast/AS_research/AS/result/BRCA_marker_cox_single_all.txt",header =T,sep = "\t")
AS_gene[1:5,1:5]
final_DEmRNA<-read.csv("F:/Bioinfor_project/Breast/AS_research/AS/result/TNBC_DEmRNA.csv",header=T,row.names = 1)
DEG_2<-final_DEmRNA %>%
rownames_to_column("genename")
hubgene<-intersect(AS_gene$symbol,DEG_2$genename)
}
## Loading required package: tidyverse
## -- Attaching packages ------------------------------------------- tidyverse 1.2.1 --
## √ ggplot2 3.2.0 √ purrr 0.3.2
## √ tibble 2.1.3 √ dplyr 0.8.3
## √ tidyr 0.8.3 √ stringr 1.4.0
## √ readr 1.3.1 √ forcats 0.4.0
## Warning: package 'dplyr' was built under R version 3.6.1
## -- Conflicts ---------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
if(T){
require(dplyr)
## 提取已标准化的数据
mRNA_exp<-read.csv(file="F:/Bioinfor_project/Breast/AS_research/AS/result/TNBC_mRNA_exp_log.csv",header = T,row.names = 1)
load("F:/Bioinfor_project/Breast/TCGA/RNA-seq-model/BRCA_all.Rdata")
BRCA_all[1:5,1:5]
dim(BRCA_all)## 24491 1200
head(phe)
colnames(phe)
##
mRNA_exp[1:5,1:5]
dim(mRNA_exp)
data_exp<-mRNA_exp[hubgene,]
## 提取匹配的临床信息
data_phe<-phe %>%
filter(ID %in% colnames(mRNA_exp))
## 合并临床信息与表达数据
data<-data_exp %>% t() %>% as.data.frame() %>%
##行名变列名
rownames_to_column(var = "ID") %>%
as_tibble() %>%
## 合并临床信息与表达数据
inner_join(data_phe,by="ID") %>%
## 增加分组信息
mutate(group=ifelse(substring(colnames(mRNA_exp),14,15)=="01","TNBC","Normal")) %>%
## 分组提前
dplyr::select(ID,group,everything())
group=factor(c(rep(1,115),rep(0,113)))
head(data)
#save(data,file = "F:/Bioinfor_project/Breast/AS_research/AS/result/hubgene.Rdata")
}
## # A tibble: 6 x 22
## ID group CCL14 HBA1 CCL16 TUBB3 PAM50 Os_time OS_event RFS_time
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <fct> <int> <int> <int>
## 1 TCGA~ TNBC 3.37 0 0 5.94 Basal 967 1 NA
## 2 TCGA~ TNBC 4.97 3.39 0 6.00 Basal 584 0 NA
## 3 TCGA~ TNBC 4.77 0 0 5.04 Basal 2654 0 NA
## 4 TCGA~ TNBC 4.19 3.74 2.84 5.13 Basal 754 1 NA
## 5 TCGA~ TNBC 0 0 0 5.65 Basal 2048 0 2048
## 6 TCGA~ TNBC 5.34 0 4.07 6.04 Basal 1027 0 1027
## # ... with 12 more variables: RFS_event <int>, age <int>, ER <fct>,
## # PR <fct>, gender <fct>, HER2 <fct>, Margin_status <fct>, Node <int>,
## # M_stage <fct>, N_stage <fct>, T_stage <fct>, `Pathologic stage` <fct>
load(file = "F:/Bioinfor_project/Breast/AS_research/AS/result/hubgene.Rdata")
head(data)
## # A tibble: 6 x 22
## ID group CCL14 HBA1 CCL16 TUBB3 PAM50 Os_time OS_event RFS_time
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <fct> <int> <int> <int>
## 1 TCGA~ TNBC 3.37 0 0 5.94 Basal 967 1 NA
## 2 TCGA~ TNBC 4.97 3.39 0 6.00 Basal 584 0 NA
## 3 TCGA~ TNBC 4.77 0 0 5.04 Basal 2654 0 NA
## 4 TCGA~ TNBC 4.19 3.74 2.84 5.13 Basal 754 1 NA
## 5 TCGA~ TNBC 0 0 0 5.65 Basal 2048 0 2048
## 6 TCGA~ TNBC 5.34 0 4.07 6.04 Basal 1027 0 1027
## # ... with 12 more variables: RFS_event <int>, age <int>, ER <fct>,
## # PR <fct>, gender <fct>, HER2 <fct>, Margin_status <fct>, Node <int>,
## # M_stage <fct>, N_stage <fct>, T_stage <fct>, `Pathologic stage` <fct>
require(cowplot)
## Loading required package: cowplot
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
##
## ggsave
require(tidyverse)
require(ggplot2)
require(ggsci)
## Loading required package: ggsci
mydata<-data %>%
## 基因表达数据gather,gather的范围应调整
gather(key="gene",value="Expression",CCL14:TUBB3) %>%
##
dplyr::select(ID,gene,Expression,everything())
head(mydata)
## # A tibble: 6 x 20
## ID gene Expression group PAM50 Os_time OS_event RFS_time RFS_event
## <chr> <chr> <dbl> <chr> <fct> <int> <int> <int> <int>
## 1 TCGA~ CCL14 3.37 TNBC Basal 967 1 NA NA
## 2 TCGA~ CCL14 4.97 TNBC Basal 584 0 NA NA
## 3 TCGA~ CCL14 4.77 TNBC Basal 2654 0 NA NA
## 4 TCGA~ CCL14 4.19 TNBC Basal 754 1 NA NA
## 5 TCGA~ CCL14 0 TNBC Basal 2048 0 2048 0
## 6 TCGA~ CCL14 5.34 TNBC Basal 1027 0 1027 0
## # ... with 11 more variables: age <int>, ER <fct>, PR <fct>, gender <fct>,
## # HER2 <fct>, Margin_status <fct>, Node <int>, M_stage <fct>,
## # N_stage <fct>, T_stage <fct>, `Pathologic stage` <fct>
P<-mydata %>%
## 确定x,y
ggplot(aes(x = gene, y = Expression, fill = group)) +
geom_boxplot(alpha=0.7) +
scale_y_continuous(name = "Expression")+
scale_x_discrete(name = "Gene") +
ggtitle("Boxplot of hub gene") +
theme_bw() +
theme(plot.title = element_text(size = 14, face = "bold"),
text = element_text(size = 12),
axis.title = element_text(face="bold"),
axis.text.x=element_text(size = 11))
P
p2<-P+scale_fill_lancet()
p2
#ggsave(p2,filename = "hub_gene_boxplot.pdf",width = 5,height = 5)
P<-mydata %>%
## 确定x,y
ggplot(aes(x = group, y = Expression, fill = group)) +
geom_boxplot(alpha=0.7) +
scale_y_continuous(name = "Expression")+
scale_x_discrete(name = "group") +
ggtitle("Boxplot of hub gene") +
theme_bw() +
theme(plot.title = element_text(size = 14, face = "bold"),
text = element_text(size = 12),
axis.title = element_text(face="bold"),
axis.text.x=element_text(size = 11))
P
p3<-P+scale_fill_lancet()+facet_wrap(~gene)
p3
#ggsave(p3,filename = "hub_gene_boxplot_facet.pdf",width = 5,height = 5)
p<-mydata %>%
## 筛选数据,选定x,y
filter(group=="TNBC" & PAM50!="NA") %>%
ggplot(aes(x = PAM50, y = Expression, fill = PAM50)) +
geom_boxplot(alpha=0.7) +
scale_y_continuous(name = "Expression")+
scale_x_discrete(name = "PAM50") +
ggtitle("Boxplot of hub gene") +
theme_bw() +
theme(plot.title = element_text(size = 14, face = "bold"),
text = element_text(size = 12),
axis.title = element_text(face="bold"),
axis.text.x=element_text(size = 11))
p+scale_fill_lancet()+
facet_wrap(~gene)##分面变量
for循环:命名空对象list 配合do.call函数
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(ggplot2)
p <- list()
genename<-hubgene
for (j in genename) {
p[[j]] <- ggplot(data=data, aes_string(x=group, y=j)) +
geom_boxplot(alpha=0.7,aes(fill=group)) +
scale_y_continuous(name = "Expression")+
scale_x_discrete(name = j) +
ggtitle(paste0("boxlot of ",j) ) +##ggtitle指定
theme_bw() +
theme(plot.title = element_text(size = 14, face = "bold"),
text = element_text(size = 12),
axis.title = element_text(face="bold"),
axis.text.x=element_text(size = 11))+
scale_fill_lancet()
#ggsave(sprintf("boxplot%s.pdf",j),width=5,height=5,onefile=T)
}
do.call(grid.arrange, c(p, ncol=2))
输入感兴趣的基因,即绘制出箱线图
if(F){
## 参数1:genename为基因名
## 参数2:data为清洁数据框,列为观测,行为sample
## 参数3:group为分组
## function()中data=data表示data默认为data,如果在该位置出现其它数据则为其它
head(data)
colnames(data)
###
gene_plot<-function(gene,data){
require(ggstatsplot)
ggbetweenstats(data = data,
x = group,
y = gene)
}
gene_plot("CCL14",data)
###
gene_plot<-function(gene="CCL14"){
ggplot(data=data, aes(x=group, y=gene,fill=group)) +
geom_boxplot(alpha=0.7) +
scale_y_continuous(name = "Expression")+
scale_x_discrete(name = "group") +
ggtitle(sprintf("Boxplot of %s ",genename)) +
theme_bw() +
theme(plot.title = element_text(size = 14, face = "bold"),
text = element_text(size = 12),
axis.title = element_text(face="bold"),
axis.text.x=element_text(size = 11))+
scale_fill_lancet()
}
gene_plot(gene = CCL14)
##
p<-function(genename="CCL14",data){
ggplot(data=data, aes(x=group, y=genename,fill=group)) +
geom_boxplot(alpha=0.7) +
#scale_y_continuous(name = "Expression")+
#scale_x_discrete(name = "genename") +
ggtitle(sprintf("Boxplot of %s ",genename)) +
theme_bw() +
theme(plot.title = element_text(size = 14, face = "bold"),
text = element_text(size = 12),
axis.title = element_text(face="bold"),
axis.text.x=element_text(size = 11))+
scale_fill_lancet()
}
p("CCL14",data)
}