重要信息:

Web url of this page: R_plot_challenges
R version: R4.1.0 on CentOS7.9

Good colors

1. single cell 40 colors

par(mar=c(0,0,0,0))
# Good colors
allcolour=c("#DC143C","#0000FF","#20B2AA","#FFA500","#9370DB","#98FB98","#F08080","#1E90FF",
            "#7CFC00","#FFFF00","#808000","#FF00FF","#FA8072","#7B68EE","#9400D3","#800080",
            "#A0522D","#D2B48C","#D2691E","#87CEEB","#40E0D0","#5F9EA0","#FF1493","#0000CD",
            "#008B8B","#FFE4B5","#8A2BE2","#228B22","#E9967A","#4682B4","#32CD32","#F0E68C",
            "#FFFFE0","#EE82EE","#FF6347","#6A5ACD","#9932CC","#8B008B","#8B4513","#DEB887")
#length(allcolour) #40
barplot( rep(1,length(allcolour)), col=allcolour, border = NA, space = 0, ann=F, axes=F )

2. Feature plot

par(mar=c(0,0,0,0))
library(RColorBrewer)
barplot( rep(1, 9), col= brewer.pal(9, "YlOrRd"), border = NA, axes=F, space=0 )

3. ggplot2 使用色板

library(ggplot2)
bp <- ggplot(iris, aes(Species, Sepal.Length)) + 
  geom_boxplot(aes(fill = Species)) +
  theme_minimal() +
  theme(legend.position = "top")
bp+scale_fill_brewer(palette = "Dark2")

单个变量

1. barplot：自定义填充色、标签字体颜色

par(mar=c(0,0,0,0))
# view color
barplot(c(1,1), col=c("steelblue", "darkred"), axes=F, border = NA, space=0)

# 1. 模拟数据
dat.tmp=data.frame(
  index.name=c(
    "KIR+EOMES+ NK-like",
    "Tn",
    "Temra",
    "KIR+TXK+ NK-like",
    "NME1+ T",
    "Tc17",
    paste0("XX_prefix_", 1:4)
  ),
  value=c( seq(0.1,2,0.5), seq(7,10,1), 15, 29 )/100
)
str(dat.tmp)

## 'data.frame':    10 obs. of  2 variables:
##  $ index.name: chr  "KIR+EOMES+ NK-like" "Tn" "Temra" "KIR+TXK+ NK-like" ...
##  $ value     : num  0.001 0.006 0.011 0.016 0.07 0.08 0.09 0.1 0.15 0.29

dat.tmp[, "index.name"]=factor(dat.tmp$index.name, levels = dat.tmp$index.name) #因子形式才能在ggplot2保持顺序
# str(dat.tmp)

#library("ggpubr")
set.seed(2021)
colors2=rnbinom(10,3,0.8)+1;
colors2=c("#BBBCBF", "#73709F", "#78B478")[colors2]; colors2

##  [1] "#BBBCBF" "#73709F" "#BBBCBF" "#73709F" "#73709F" "#73709F" "#BBBCBF"
##  [8] "#78B478" "#78B478" "#73709F"

library(ggplot2)
ggplot(dat.tmp, aes(index.name, value)) +
  geom_col(fill=ifelse( dat.tmp$value<0.1 ,"steelblue", "darkred"),col=NA,width=0.8) +
  geom_hline(yintercept=0.1, linetype="dashed", color="black", alpha=0.2) +
  xlab("") + ylab(sprintf("pTrans Of %s", "Tex" ))+
  theme_classic(base_size = 12)+
  #scale_x_discrete(fill=colors2)+
  theme(axis.text.x=element_text(angle=60, face="bold",
                                 color=colors2, #Warning: 不提倡传入向量，以后会删除掉该功能
                                 hjust=1,vjust=1))

## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.

# https://www.jianshu.com/p/1bacaedb58b0
# 看来未来要用原生R画图来实现标签文字染色


# 原生R的绘图
par(mar=c(8,4,0.5,0.5))
posX=barplot(dat.tmp$value, col=ifelse( dat.tmp$value<0.1 ,"steelblue", "darkred"), 
             ylim=c(0, 0.3), border = NA, #条状图无描边
             space=0.1,
             yaxs ="r", #y轴上下留白
             yaxt="n") #不要y轴
box(bty="l", lwd=2)
#y轴
axis(side=2, at=seq(0.0,0.3, 0.1), labels = seq(0,0.3, 0.1), tck=-0.02,
     las=2, mgp=c(1.5,0.5,0))
cell.ident=c("Tex")
mtext( sprintf("pTrans of %s",cell.ident[1]), side = 2, line = 2 )

#x轴
axis(side=1, at=posX, label=NA, tck=-0.02)
text(x=posX, y=par("usr")[3]-0.02, 
     labels = dat.tmp$index.name, 
     col = colors2,
     adj=1, srt=60, xpd=T)

2个变量

两个变量的情况:

两个平等的维度: 散点图
一个是数值，另一个是分类变量: boxplot, violin plot

1. 堆叠密度图 Stacked density plot

使用 ggplot2 内置数据 diamonds ，画出各种价位的钻石中，每种切割效果的比例。

library(ggplot2)
library(dplyr)

head(diamonds)

## # A tibble: 6 × 10
##   carat cut       color clarity depth table price     x     y     z
##   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31
## 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
## 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75
## 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48

# v0 只有密度曲线
ggplot(data=diamonds, aes(x=price, color=cut)) +
  geom_density()+
  theme_classic()

# v1: 使用fill是概率密度图，但是依旧互相覆盖
diamonds %>% select( c("price", "cut") ) %>% 
  #head(n=100) %>%
  ggplot( aes( x=price, y=..density.., fill=cut ) ) +
  geom_density( alpha=0.2 ) #半透明

# v2: 堆叠，且使用比例
diamonds %>% select( c("price", "cut") ) %>% 
  #head(n=1000) %>%
  ggplot( aes( x=price, y=..density.., fill=cut ) ) +
  geom_density(position = "fill") #使用 fill 密度曲线标准化为1，且互相堆叠

# v3: 修饰主题
# 差不多就这样了。
ggplot(diamonds, aes( x=price, y=..density.., fill=cut ) ) +
  geom_density(position = "fill")+
  theme_classic()+
  scale_y_continuous(expand = c(0,0))+ #这个可以去掉与X轴间隙
  scale_x_continuous(expand = c(0,0)) #这个可以去掉与Y轴间隙

v3: 看答案

# Stacked density plot: https://www.r-graph-gallery.com/135-stacked-density-graph.html#stacked
ggplot(data=diamonds, aes(x=price, group=cut, fill=cut)) +
  geom_density(adjust=1.5, position="fill") +
  theme_minimal()

# 这里使用的是 group 参数，如果不加呢？貌似没变化
ggplot(data=diamonds, aes(x=price, fill=cut)) +
  geom_density(adjust=1.5, position="fill") +
  theme_minimal()

# adjust 参数什么意思？取bin的宽度是默认宽度bw的多少倍
# 默认是1倍，越小越精细，越能刻画局部变化
ggplot(data=diamonds, aes(x=price, fill=cut)) +
  geom_density(adjust=0.1, position="fill") +
  theme_minimal()

# v4：添加自定义颜色，和几个分类一一对应
ggplot(data=diamonds, aes(x=price, fill=factor(cut, levels = c("Fair", "Good", "Very Good", "Premium", "Ideal") ) )) +
  geom_density( position="fill") +
  theme_minimal() + 
  #scale_fill_manual("Cut effect", values=c( "red", "blue", "#FFC125", "#AB82FF", "#CD8500" ) )
  scale_fill_manual("Cut effect", values=c( "#FF3030", "#009ACD", "#FFC125", "#AB82FF", "#CD8500" ) )

# v5 如何分面画密度曲线？
ggplot(data=diamonds, aes(x=price, fill=cut)) +
  geom_density() +
  facet_wrap(~cut)+ #按照cut分面
  theme_minimal()+
  theme(
    legend.position = "none", #不要图例
    panel.spacing = unit(0.5, "lines"), #每个小图的间隔
    axis.text.x=element_text(angle=60), #x坐标轴文字 旋转60度
    #axis.ticks.x=element_line(color="red") #x坐标轴刻度 隐藏
  )

2. 水平条状图 horizontal barplot(单个，批量)

目的：展示不同气缸数(cyl)的自动/手动档车型的比例。

# opar=par(no.readonly = T)

par(mar=c(1,2,1,1))
df0=table(mtcars$am, mtcars$cyl)
df0

##    
##      4  6  8
##   0  3  4 12
##   1  8  3  2

# 转为百分比，按列
df0=apply(df0, 2, function(x){x/sum(x)})
df0

##    
##             4         6         8
##   0 0.2727273 0.5714286 0.8571429
##   1 0.7272727 0.4285714 0.1428571

# v1 画图
colors = c("#83C57E", "#3A5F88")
barplot(df0, col=colors) #指定颜色

# v2
barplot(df0, 
        horiz = T, #水平放置
        col=colors)

# v3 修饰
barplot(df0, 
        horiz = T, #水平放置
        las=2, #文字和坐标一致
        space=0, #bar间距为0
        border="white", #白边
        xaxt="n", #去掉x坐标轴和轴标签
        col=colors)

精细画法

#' horizontal barplot
#'
#' @param df1 color by 
#' @param colors Fill color of each bars, by row in df from row1 to row2, ...
#' @param labels text labeled for each bar
#' @param v position of vertical line 
#' @param adj.x the position of text label
#' @param scale whether scale to 1 by Column

#'
#' @return
#' @export
#'
#' @examples
barplot_h=function(df1, colors, labels=NULL, v=NULL, adj.x=-0.04, scale=T){
  # columns of the df, means number of bars
  len=ncol(df1)
  df1=df1[,seq(len,1,-1)] # invert by columns index
  # normalization by row
  if(scale){
    df1=apply(df1, 2, function(x){
      x/sum(x)
    })
  }
  # 主图
  posY=barplot( df1, #color by row, a column a bar
                col=colors, 
                horiz = TRUE, #水平放置
                ann=F, xaxt="n", yaxt="n", #去掉x、y轴及文字
                #las=2,
                border = "white", space=0 )
  #添加y轴文字
  if(is.null(labels)){
    labels=seq(len,1,-1)
  }else{
    labels=labels[seq(len,1,-1)]
  }
  text(adj.x, #文字的x坐标
       posY,  #文字的y坐标
       labels=labels, #文字内容
       col="black", #颜色
       xpd=T) #可显示在主图外部
  
  # 竖直虚线，可选
  if(!is.null(v)){
    abline(v=v, lty=3, lwd=2, xpd=T)
  }
}

# 传入一个矩阵，一列一个柱子，不同行染色不同

# test1: 可以输入一个table(para1, para2)返回的表格，参数1颜色，参数2指定几个bar
df0=table(mtcars$am, mtcars$cyl);df0 # 数据：不同气缸数cyl的自动/手动档车型的比例

##    
##      4  6  8
##   0  3  4 12
##   1  8  3  2

#    4  6  8
# 0  3  4 12 (No)
# 1  8  3  2 (Yes)

par(mfrow=c(5,1), mar=c(1,2,0.5,0))
colors = c("#83C57E", "#3A5F88")
barplot_h(df0, colors)
barplot_h(df0, colors, scale=F, adj.x = -0.3)
barplot_h(df0, colors, labels = c("dog", 'cat', 'goose'), adj.x = -0.07 )
barplot_h(df0, colors, labels = colnames(df0) )
barplot_h(df0, colors, v=0.2 )

# test2 模拟数据
getDf=function(n=7,adjust=10, seed=NULL){
  if(!is.null(seed)) set.seed(seed)
  
  t1=rnorm(n, 100, 30)
  t1=t1/ max(t1+adjust)
  return(t(data.frame(
    normal=t1,
    tumor=1-t1
  )))
}
# test
getDf()

##             [,1]       [,2]       [,3]      [,4]      [,5]      [,6]      [,7]
## normal 0.6688673 0.92943432 0.93564975 0.4615631 0.6076559 0.4309296 0.8767301
## tumor  0.3311327 0.07056568 0.06435025 0.5384369 0.3923441 0.5690704 0.1232699

colors2 = c("#5AC9FF", "#0182D5", "#FE8C72", "#C90C12")

# plot
par(mar=c(0,0, 0, 0), oma=c(0,0,0,0))
mat=matrix(c(1:12),nrow=6,byrow=F); mat #分割画布为6行2列

##      [,1] [,2]
## [1,]    1    7
## [2,]    2    8
## [3,]    3    9
## [4,]    4   10
## [5,]    5   11
## [6,]    6   12

#layout(mat) 
layout(mat, widths = c(1,3),heights =c(5,7,6,2,7,7)) #设置左右和上下比例

# Left, text
for(i in 1:6){
  plot(c(1), type="n", ann=F, axes=F)
  #box(which = "figure", col="red")
  text(1.4, 1, cex=1.1, adj=1,
       label=c("","Alveolar", "Endothelial", "Epithelial", "Fibroblast", "B cell")[i])
  if(1==i){  text(1.4, 1,adj=1, labels=expression( italic("Fake_Data") ), col="red", cex=1.2) }
}

#Right
# legend
plot(c(1), type="n", axes=F)
legend(0.6, 1.4, legend = c("Non-malignant", "Tumor"), fill=colors2[1:2], 
       #adj = 0, #文字相对于小图
       #xjust=0.5, #整个图例的x坐标相对于指定点
       border = NA,
       ncol = 2,
       x.intersp=0.4, #小图和文字间距
       text.width = 0.4, #各小图之间水平间距
       cex=1.2, 
       pt.cex=0.5,
       inset=.02,
       bty="n")
text(1, 0.8, labels="Anticipated fraction\ntumor versus non-malignant", 
     cex=1.2, adj=0.5)

par(mar=c(0, 1, 0.5, 0))
# plot
barplot_h( getDf(7, 0, 1) , colors2, v=0.3)
barplot_h( getDf(6, 40, 3) , colors2, v=0.3)
barplot_h( getDf(2, 60, 4) , colors2, v=0.3)
barplot_h( getDf(7, 80, 5) , colors2, v=0.3)
barplot_h( getDf(7, 120, 7) , colors2, v=0.3)

3. 右侧带层次聚类的水平 barplot

目的: 随机从 iris 数据集的三个子类中各抽取3个样本，画每个样本中三个指标的百分比水平barplot，并在右侧添加层次聚类效果。

有关聚类的R包可以参考cluster包和ape包
要用到 ape 包: http://ape-package.ird.fr/
R语言最全谱系图 http://rstudio-pubs-static.s3.amazonaws.com/1876_df0bf890dd54461f98719b461d987c3d.html

# 模拟数据：
set.seed(2021)
df0=iris[c(sample(1:50, 3), sample(51:100, 3), sample(101:150, 3)),]
rownames(df0)=paste0(df0$Species,"_", 1:9)
#head(df0)
#table(df0$Species)
#归一化, by observation
df1= data.frame( t(apply(df0[,1:4], 1, function(x){x/sum(x)} )) )
head(df1)

##              Sepal.Length Sepal.Width Petal.Length Petal.Width
## setosa_1        0.4742268   0.3505155    0.1443299  0.03092784
## setosa_2        0.4900000   0.3600000    0.1400000  0.01000000
## setosa_3        0.5052632   0.3157895    0.1473684  0.03157895
## versicolor_4    0.4000000   0.2142857    0.2928571  0.09285714
## versicolor_5    0.4041096   0.2054795    0.2876712  0.10273973
## versicolor_6    0.3986014   0.1958042    0.3146853  0.09090909

# step1. 求距离
out.dist= dist(df1); out.dist

##                setosa_1   setosa_2   setosa_3 versicolor_4 versicolor_5
## setosa_2     0.02820412                                                
## setosa_3     0.04667768 0.05203344                                     
## versicolor_4 0.22352571 0.24405587 0.21518674                          
## versicolor_5 0.22727858 0.24831676 0.21713839   0.01479858             
## versicolor_6 0.24954658 0.26902641 0.23934955   0.02870168   0.03152262
## virginica_7  0.27489956 0.29535002 0.26607942   0.05142526   0.05071139
## virginica_8  0.27331825 0.29452500 0.26606333   0.05140095   0.05043403
## virginica_9  0.27149932 0.29358160 0.26042938   0.05880821   0.04849519
##              versicolor_6 virginica_7 virginica_8
## setosa_2                                         
## setosa_3                                         
## versicolor_4                                     
## versicolor_5                                     
## versicolor_6                                     
## virginica_7    0.02938451                        
## virginica_8    0.03556237  0.01234903            
## virginica_9    0.05025099  0.03885795  0.03618163

# step2. 聚类
out.hclust=hclust( out.dist, method="ward.D2" ); out.hclust

## 
## Call:
## hclust(d = out.dist, method = "ward.D2")
## 
## Cluster method   : ward.D2 
## Distance         : euclidean 
## Number of objects: 9

# step3. 画图，原生R不能旋转树
par(mar=c(0,0,0,0))
plot(out.hclust,
    hang=0.1, #最外围长度
     ann=F, axes=F,
     cex = 0.7,
     col = "darkred")

step4. 精修图

# 使用 ape包绘制右侧聚类数，使用上文2定义的 barplot_h()函数绘制barplot
# plot 253*310

# 对矩阵的观测值按照聚类结果调整顺序，用来画条形图
df1=df1[ rev(out.hclust$order),]
df1

##              Sepal.Length Sepal.Width Petal.Length Petal.Width
## virginica_8     0.3734177   0.1898734    0.3227848  0.11392405
## virginica_7     0.3809524   0.1845238    0.3273810  0.10714286
## virginica_9     0.3895349   0.1744186    0.3023256  0.13372093
## versicolor_5    0.4041096   0.2054795    0.2876712  0.10273973
## versicolor_4    0.4000000   0.2142857    0.2928571  0.09285714
## versicolor_6    0.3986014   0.1958042    0.3146853  0.09090909
## setosa_2        0.4900000   0.3600000    0.1400000  0.01000000
## setosa_1        0.4742268   0.3505155    0.1443299  0.03092784
## setosa_3        0.5052632   0.3157895    0.1473684  0.03157895

# set colors
colors2=c("#A5CDE1", "#1F78B3", "#349F48", "#F47E20")

# pdf("bar_plot_with_tree.pdf", width=2.53, height=3.1)
par(mar=c(0,0, 0, 0), oma=c(0,0,0,0))
mat=matrix(c(1,1,2,3),nrow=2,byrow=T); mat #分割画布为2行2列

##      [,1] [,2]
## [1,]    1    1
## [2,]    2    3

#layout(mat) 
layout(mat, widths = c(16,4),heights =c(1,6)) #设置左右和上下比例


#1 up pannel: legend
# 警告: 通常 Rmarkdown图和pdf图是一致的。而R终端直接显示的图则略有错位
plot(c(1), type="n", axes=F)
legend(0.7, 1.4, 
       legend = c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"),
       #c("BP c-kit+Gr1", "BM Gr1", "PB Gr1", "SP Gr1"), 
       
       fill=colors2, 
       #adj = 0, #文字相对于小图
       #xjust=0.5, #整个图例的x坐标相对于指定点
       border = NA,
       ncol = 2,
       x.intersp=0.2, #小图和文字间距
       y.intersp=1.2, #小图上下间距
       text.width = 0.25, #各小图之间水平间距
       cex=0.9,
       pt.cex=0.5,
       inset=.02,
       bty="n")

#2 down left
#par("mar") #5.1 4.1 4.1 2.1
par(mar=c(3,2.5, 0, 0))
barplot_h( t(df1), colors=colors2, 
           labels = substring(rownames(df1), nchar(rownames(df1))-3), 
           adj.x = -0.12)
axis(side=1, at=seq(0,1,0.25), labels = seq(0,1,0.25), mgp=c(1,0.5,0))
box(bty="l", col="black", lwd=2)


#3 down right
par(mar=c(3.8, 0.1, 0.8, 1))
library(ape)
# ape::plot.phylo() #F1 查看更多帮助
plot(as.phylo(out.hclust), 
     direction ="leftwards",
     edge.width = 2, #线条粗细
     use.edge.length=F, #是否使用各类的距离？
     show.tip.label=F, #不显示文字
     cex = 2) #文字放大倍数

特殊需求

旋转 ggplot2 图形

library(ggplot2)
bp <- ggplot(iris, aes(Species, Sepal.Length)) + 
  geom_boxplot(aes(fill = Species)) +
  theme_minimal() +
  theme(legend.position = "top")
bp2=bp+scale_fill_brewer(palette = "Dark2")
# bp2

library(grid)
grid.newpage()
vp1=viewport(x=1, y=0.4, width=0.8, height=0.7, just=c("right", "bottom"), angle=30)
pushViewport(vp1); 
# grid.rect()
print(bp2, newpage=F)

## Warning in grid.Call.graphics(C_setviewport, vp, TRUE): cannot clip to rotated
## viewport

## Warning in grid.Call.graphics(C_setviewport, vp, TRUE): cannot clip to rotated
## viewport

ggplot2 图例放左侧，图例上移

g1=ggplot(mtcars, aes(wt, mpg, color=gear) )+
  geom_point(size=1)+
  scale_color_gradientn(name="Z Score", colors=brewer.pal(9, "YlOrRd"))+
  theme_classic(base_size = 12)+
  theme(
    legend.position = c("left"),
    legend.text=element_text(size=8),
    legend.title=element_text(size=8),
    legend.key.width = unit(4, "mm"), #control legend width and height
    legend.key.height = unit(4, "mm"),
    legend.margin=margin(t = 0, r = 0, b = 70, l = 0, unit = "pt"), #move legend up a litter
  );
print(g1)

# the end of this file ==
# The file is under updating now and then.

## [1] "0.18 minutes elapsed."

xx 画图挑战赛

BioMooc.com

2021-12-26 20:13:32

Good colors

1. single cell 40 colors

2. Feature plot

3. ggplot2 使用色板

单个变量

1. barplot：自定义填充色、标签字体颜色

2个变量

1. 堆叠密度图 Stacked density plot

v3: 看答案

2. 水平条状图 horizontal barplot(单个，批量)

精细画法

3. 右侧带层次聚类的水平 barplot

step4. 精修图

特殊需求

旋转 ggplot2 图形

ggplot2 图例放左侧，图例上移

xx 画图挑战赛

BioMooc.com

2021-12-26 20:13:32

Good colors

1. single cell 40 colors

2. Feature plot

3. ggplot2 使用 色板

单个变量

1. barplot：自定义填充色、标签字体颜色

2个变量

1. 堆叠密度图 Stacked density plot

v3: 看答案

2. 水平条状图 horizontal barplot(单个，批量)

精细画法

3. 右侧带层次聚类的水平 barplot

step4. 精修图

特殊需求

旋转 ggplot2 图形

ggplot2 图例放左侧，图例上移

3. ggplot2 使用色板