setwd("D:/R/R-4.0.5/bin/project_visual/visualization/scrip")
rm(list=ls())
suppressPackageStartupMessages(library(reshape2))
suppressPackageStartupMessages(library(ggplot2))
suppressMessages(library(tidyverse))
suppressMessages(library(clusterProfiler))
load("D:/R/R-4.0.5/bin/project_practise/SXtree/SXtree_beforeGEO/R-20/expr_pd_sCLLex.Rdata")
#箱型图
boxplot(expr_sCLLex[,1])
#ggplot()绘图数据准备
exprl <- melt(expr_sCLLex)
colnames(exprl) <- c("gene","sample","expr")
exprl$group <- rep(pd_sCLLex$Disease,each=nrow(expr_sCLLex))
exprl_CLL11 <- exprl %>% filter(sample == "CLL11.CEL")
#直方图和密度图
ggplot(exprl_CLL11,aes(x=expr))+geom_histogram()+theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(exprl_CLL11,aes(x=expr))+geom_density()+theme_bw()
#数据准备
table(pd_sCLLex$Disease)
##
## progres. stable
## 14 8
pd_sCLLex$group <- as.factor(pd_sCLLex$Disease )
#分组箱型图
ggplot(exprl,aes(x=sample,y=expr,fill=group))+geom_boxplot()+
theme(text = element_text(size=13),
axis.text.x = element_text(angle=135, hjust=1))
#直方图
ggplot(exprl,aes(x=expr,fill=group))+geom_histogram()+facet_wrap(exprl$sample)+theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#密度图
ggplot(exprl,aes(x=expr,col=group))+geom_density()+facet_wrap(exprl$sample)+theme_bw()
ggplot(exprl,aes(expr,col=group))+geom_density()
suppressMessages(library(ggpubr))
#小提琴图
ggviolin(exprl,x="sample",y="expr",fill = "group")+
theme(text = element_text(size=10),
axis.text.x = element_text(angle = 90,hjust = 1))
gpairs——多变量的相关关系,即多个变量两两画散点图
#install.packages("gpairs")
library(gpairs)
expr_pairs <- expr_sCLLex[,1:6]
gpairs(expr_pairs)
expr_cluster <- expr_sCLLex
colnames(expr_cluster) <- pd_sCLLex$group
#out.dist <- dist(t(expr_cluster),method = "euclidean")
#out.hclust <- hclust(out.dist,method = "complete")
hc <- hclust(dist(t(expr_cluster)))#这个代码只是把上面的代码简并了一下,是一样的,method默认是"complete"
plot(hc)
hc <- hclust(dist(t(expr_sCLLex),method = "euclidean"),
method = "ward.D")
suppressPackageStartupMessages(library(factoextra))#高级可视化的包
fviz_dend(hc, k = 4 ,cex = 0.5,k_colors = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"),color_labels_by_k = T, rect = TRUE)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
参数说明:
最终结果:样本名+分组+分析结果
使用运算函数:prcomp()结果输出一个对象,其中的x为主成分分析结果
pc <- prcomp(t(expr_sCLLex),scale=T)
class(pc)#pc是一个封装好的对象,其中的x是主成分 分析的结果
## [1] "prcomp"
pcx <- data.frame(pc$x)
head(pcx,3)
## PC1 PC2 PC3 PC4 PC5 PC6
## CLL11.CEL -46.85689 14.463746 17.26379 -1.77114 6.923367 -9.006708
## CLL12.CEL -22.03909 8.439319 -31.50484 -48.12062 35.350428 -2.473449
## CLL13.CEL -61.72712 -10.386196 28.51325 12.07738 -26.940805 -3.192176
## PC7 PC8 PC9 PC10 PC11 PC12
## CLL11.CEL 14.73949 -6.934635 13.581109 -4.859836 4.268705 -1.111283
## CLL12.CEL -5.60240 -9.688525 5.002439 8.983478 -6.131755 -8.681008
## CLL13.CEL 13.45881 1.909465 4.927676 -17.793132 -23.149501 -17.621971
## PC13 PC14 PC15 PC16 PC17 PC18
## CLL11.CEL -6.7781881 17.37202 -4.35242499 18.496281 0.178317 17.884427
## CLL12.CEL 0.6323493 -25.15555 -0.07925103 22.019632 -13.905205 -7.864069
## CLL13.CEL -8.8274219 -19.33089 -23.03844446 -2.220767 9.475638 -13.662748
## PC19 PC20 PC21 PC22
## CLL11.CEL 27.642539 -20.105829 3.584657 1.376243e-13
## CLL12.CEL 2.206418 5.190389 -2.154347 1.009271e-13
## CLL13.CEL 4.941798 5.728124 -3.498910 6.398181e-14
pcr <- cbind(samples=rownames(pcx),pd_sCLLex$Disease,pcx)#加上样本和分组信息
head(pcr)
## samples pd_sCLLex$Disease PC1 PC2 PC3
## CLL11.CEL CLL11.CEL progres. -46.856885 14.463746 17.263794
## CLL12.CEL CLL12.CEL stable -22.039089 8.439319 -31.504845
## CLL13.CEL CLL13.CEL progres. -61.727121 -10.386196 28.513253
## CLL14.CEL CLL14.CEL progres. -49.054707 -10.202203 -16.721321
## CLL15.CEL CLL15.CEL progres. 1.141835 18.773784 28.301385
## CLL16.CEL CLL16.CEL progres. 32.653663 51.063530 -6.182675
## PC4 PC5 PC6 PC7 PC8 PC9
## CLL11.CEL -1.771140 6.923367 -9.006708 14.739488 -6.934635 13.581109
## CLL12.CEL -48.120623 35.350428 -2.473449 -5.602400 -9.688525 5.002439
## CLL13.CEL 12.077384 -26.940805 -3.192176 13.458808 1.909465 4.927676
## CLL14.CEL 7.546233 1.589274 14.297332 -1.719354 -28.400677 -21.958378
## CLL15.CEL 14.248706 -12.115247 19.570496 4.796330 -3.067718 -34.120146
## CLL16.CEL 34.931700 -32.103064 1.710500 -20.257911 2.909489 -4.537663
## PC10 PC11 PC12 PC13 PC14 PC15
## CLL11.CEL -4.859836 4.268705 -1.1112834 -6.7781881 17.372020 -4.35242499
## CLL12.CEL 8.983478 -6.131755 -8.6810079 0.6323493 -25.155551 -0.07925103
## CLL13.CEL -17.793132 -23.149501 -17.6219714 -8.8274219 -19.330890 -23.03844446
## CLL14.CEL 15.760874 12.899864 -0.6006821 -15.6392494 -1.658030 17.14739249
## CLL15.CEL -24.816528 8.868848 23.4662184 14.1284253 -14.504122 6.88664102
## CLL16.CEL 30.083816 -3.387800 -25.7013128 18.6407371 -1.587664 1.86653584
## PC16 PC17 PC18 PC19 PC20 PC21
## CLL11.CEL 18.496281 0.178317 17.884427 27.642539 -20.105829 3.584657
## CLL12.CEL 22.019632 -13.905205 -7.864069 2.206418 5.190389 -2.154347
## CLL13.CEL -2.220767 9.475638 -13.662748 4.941798 5.728124 -3.498910
## CLL14.CEL -15.622991 14.835333 -13.188203 19.668413 3.264009 -4.113575
## CLL15.CEL 13.827600 -14.098071 3.963714 3.284001 5.522894 -1.377382
## CLL16.CEL 7.880383 -6.792640 1.371190 2.861844 -6.285913 -3.505023
## PC22
## CLL11.CEL 1.376243e-13
## CLL12.CEL 1.009271e-13
## CLL13.CEL 6.398181e-14
## CLL14.CEL 1.254760e-13
## CLL15.CEL 1.155465e-13
## CLL16.CEL 6.083935e-14
PC2~PC1,color ~ group
ggplot(pcr,aes(PC1,PC2))+geom_point(size=3,aes(color=pd_sCLLex$Disease))+
geom_text(aes(label=samples),hjust=-0.1,vjust=-0.3,size=2.5)
缺点在于没办法标注
#BiocManager::install("ggfortify")
suppressMessages(library(ggfortify))
df <- as.data.frame(t(expr_sCLLex))
df$group=pd_sCLLex$group#df就是转了一下再加上group
autoplot(prcomp(df[,1:(ncol(df)-1)]),data=df,colour='group')#也是使用prcomp
library("FactoMineR")#画主成分分析图需要加载这两个包
library("factoextra")
exprSet <- expr_sCLLex
group_list <- as.factor(pd_sCLLex$Disease)
df=as.data.frame(t(exprSet))
dat.pca <- PCA(df, graph = FALSE)#现在dat最后一列是group_list,需要重新赋值给一个dat.pca,这个矩阵是不含有分组信息的
fviz_pca_ind(dat.pca,
geom.ind = "point", # show points only (nbut not "text")
col.ind = group_list, # color by groups
# palette = c("#00AFBB", "#E7B800"),
addEllipses = TRUE, # Concentration ellipses
legend.title = "Groups")
## UpsetR集合交集可视化
STATS_FUN <- function(median,expr_sCLLex,n){
a <- apply(expr_sCLLex,1,median)
b <- as.data.frame(cbind(rownames(expr_sCLLex),a))
b <- arrange(b,desc(a))
Topgene <- head(b$V1,n)
return(Topgene)
}
e_median <- STATS_FUN(median,expr_sCLLex,50)
e_mad <- STATS_FUN(mad,expr_sCLLex,50)
e_sd <- STATS_FUN(sd,expr_sCLLex,50)
e_var <- STATS_FUN(var,expr_sCLLex,50)
e_mean <- STATS_FUN(mean,expr_sCLLex,50)
e_min <- STATS_FUN(min,expr_sCLLex,50)
e_max <- STATS_FUN(max,expr_sCLLex,50)
all <- unique(c(e_mean,e_mad,e_max,e_min,e_sd,e_var,e_median,fromLast=T))#取所有集合的交集
e_all <- data.frame(all ,
e_mean=ifelse(all %in% e_mean,1,0),
e_median=ifelse(all %in% e_median,1,0),
e_max=ifelse(all %in% e_max,1,0),
e_min=ifelse(all %in% e_min,1,0),
e_sd=ifelse(all %in% e_sd,1,0),
e_var=ifelse(all %in% e_var,1,0),
e_mad=ifelse(all %in% e_mad,1,0))##相当于在总体里面分别绘制每个集合
head(e_all)#总集+每个个体的坐标
## all e_mean e_median e_max e_min e_sd e_var e_mad
## 1 ADAR 1 1 0 1 0 0 0
## 2 SEPTIN7 1 1 0 0 0 0 0
## 3 PCBP1 1 0 0 0 0 0 0
## 4 SYPL1 1 1 0 0 0 0 0
## 5 CCT2 1 1 0 0 0 0 0
## 6 DYNLL1 1 0 0 0 0 0 0
#install.packages("UpSetR")
suppressPackageStartupMessages(library(UpSetR))
suppressPackageStartupMessages(library(magrittr))
upset(e_all,nsets = 7,sets.bar.color = "#BD1111")#绘制韦恩图,查看数据的交集