连续型变量-绘图之道

library(tidyverse) library(ggsci) library(cowplot)

knitr::opts_chunk$set(echo = TRUE)

#绘图代码
#连续型变量可视化
#盒型图&小提琴图
#install.packages(c('tidyverse','ggsci','cowplot'))
library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(ggsci)
library(cowplot)

p1 <- ggplot(mtcars,aes(x = 1,y = qsec))+
  geom_boxplot(fill = 'forestgreen')+
  theme_classic()

p2 <- ggplot(mtcars,aes(x = 1, y = qsec))+
  geom_violin(fill = 'deeppink')+
  theme_classic()

plot_grid(p1,p2,ncol=2,labels = c('A','B'),align = c('v','h'))

#直方图&密度曲线
dat <- tibble(x = rnorm(100,5,2))

p1 <- ggplot(dat,aes(x))+
  geom_histogram(fill = 'forestgreen',color = 'black')+
  theme_classic()

p2 <- ggplot(dat,aes(x))+
  geom_density(fill = 'deeppink',color = NA)+
  theme_classic()

plot_grid(p1,p2,ncol=2,labels = c('A','B'),align = c('v','h'))

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#多组盒型图&多组小提琴图&多组密度曲线
p1 <- ggplot(mtcars,aes(x = factor(cyl),y = qsec,fill = factor(cyl)))+
  geom_boxplot()+
  theme_classic()+
  scale_fill_lancet()+
  theme(legend.position = c(.8,.8))

p2 <- ggplot(mtcars,aes(x = factor(cyl),y = qsec,fill = factor(cyl)))+
  geom_violin()+
  theme_classic()+
  scale_fill_lancet()+
  theme(legend.position = c(.8,.8))

p3 <- ggplot(mtcars,aes(qsec,fill = factor(cyl)))+
  geom_density(alpha = .5,color = NA)+
  theme_classic()+
  scale_fill_npg()+
  theme(legend.position = c(.8,.8))

plot_grid(p1,p2,p3,ncol=3,labels = c('A','B','C'),align = c('v','h'))

#密度曲线的三种不同排列方式
#library(RColorBrewer)
p1 <- ggplot(mtcars,aes(qsec,fill = factor(cyl)))+
  geom_density(alpha = .5,color = NA)+
  theme_classic()+
  scale_fill_brewer(palette="Accent")+
  theme(legend.position = c(.8,.8))

p2 <- ggplot(mtcars,aes(qsec,fill = factor(cyl)))+
  geom_density(alpha = .5,color = NA,position = 'stack')+
  theme_classic()+
  scale_fill_brewer(palette="Accent")+
  theme(legend.position = c(.8,.8))

p3 <- ggplot(mtcars,aes(qsec,fill = factor(cyl)))+
  geom_density(alpha = .5,color = NA,position = 'fill')+
  theme_classic()+
  scale_fill_brewer(palette="Accent")+
  theme(legend.position = 'none')
plot_grid(p1,p2,p3,ncol=3,labels = c('A','B','C'),align = c('v','h'))

#山脊图
set.seed(2019)
dsamp <- diamonds[sample(nrow(diamonds), 1000), ]

ggplot(dsamp,aes(carat,fill = clarity))+
  geom_density(alpha = .5,color = NA)+
  scale_fill_brewer(palette = 'Set2')+
  theme_classic()+
  theme(legend.position = c(.85,.65))

# install.packages('ggridges')
library(ggridges)

ggplot(dsamp,aes(x = carat,y = clarity, fill = clarity))+
  geom_density_ridges(alpha = .5)+
  scale_fill_brewer(palette = 'Set3')+
  theme_classic()

## Picking joint bandwidth of 0.117

#最简单的散点图
ggplot(mtcars,aes(mpg,qsec))+
  geom_point(size = 2,color = '#00468b')+
  theme_classic()

#散点的颜色：边框色与填充色

mtcars <- mtcars %>% as_tibble() %>% mutate(cyl = factor(cyl))
p1 <- ggplot(mtcars,aes(mpg,qsec,color = cyl))+
  geom_point(size = 2,shape = 1, fill = 'gray')+
  theme_classic()+
  scale_color_jama()+
  theme(legend.position = c(.1,.8))

p2 <- ggplot(mtcars,aes(mpg,qsec,color = cyl))+
  geom_point(size = 2,shape = 21, fill = 'gray')+
  theme_classic()+
  scale_color_aaas()+
  theme(legend.position = c(.1,.8))

plot_grid(p1,p2,ncol=2,labels = c('A','B'),align = c('v','h'))

#25种不同的点型
plot(rep(1:5,times = 5), rep(1:5,each = 5), pch = 1:25, cex = 1.5, 
     bty = 'l', xlim = c(1,5.4),xlab = '',ylab = '',
     main = '25 different point shapes')
text(rep(1:5,times = 5)+0.15,rep(1:5,each = 5),labels = 1:25,
     col = 'red')

#散点颜色的设置：21号点型的应用
# 1号点只能有边框色，不能有填充色，21号点既能有边框色，也能有填充色
mtcars <- mtcars %>% as_tibble() %>% mutate(cyl = factor(cyl),
                                            carb = factor(carb))
p1 <- ggplot(mtcars,aes(mpg,qsec,fill = cyl))+
  geom_point(size = 3,shape = 21, color = 'black')+
  theme_classic()+
  theme(legend.position = c(.1,.8),
        legend.background = element_rect(color = 'black'))

p2 <- ggplot(mtcars,aes(mpg,qsec,fill = carb))+
  geom_point(size = 3,shape = 21, color = 'black')+
  theme_classic()+
  theme(legend.position = c(.1,.7),
        legend.background = element_rect(color = 'black'))

plot_grid(p1,p2,ncol=2,labels = c('A','B'),align = c('v','h'))

#画个甜甜圈：圆圈的粗细
mtcars <- mtcars %>% as_tibble() %>% mutate(cyl = factor(cyl))

ggplot(mtcars,aes(mpg,qsec,color = cyl))+
  geom_point(size = 2,shape = 1, stroke = 1.5)+ # stroke表示圆圈的粗细
  theme_classic()+
  scale_color_lancet()+
  theme(legend.position = c(.1,.8))

#散点的渐变色
mtcars <- mtcars %>% as_tibble() %>% mutate(cyl = 
                                              as.numeric(as.character(cyl)))
ggplot(mtcars,aes(mpg,qsec,color = cyl))+
  geom_point(size = 2)+
  theme_classic()+
  theme(legend.position = c(.1,.8))

ggplot(mtcars,aes(mpg,qsec,color = cyl))+
  geom_point(size = 2)+
  theme_classic()+
  theme(legend.position = c(.1,.8))+
  scale_color_gradient(low = '#fee0d2',high = '#de2d26')

#散点的渐变色：学会使用双色梯度表示正负数
set.seed(2019)
dsamp <- diamonds[sample(nrow(diamonds), 1000), ]
ggplot(dsamp, aes(carat, price)) +
  geom_point(aes(color = depth))+
  scale_color_gradient(low = '#fde0dd',high = '#c51b8a')+
  theme(legend.position = c(.85,.25))

dsamp <- dsamp %>% mutate(depth = depth - median(depth))

p1 <- ggplot(dsamp, aes(carat, price)) +
  geom_point(aes(fill = depth),shape=21, color = 'black',size = 2)+
  scale_fill_gradient2(low = '#41ab5d',
                       mid = 'white',
                       high = '#e31a1c',
                       midpoint = 0)+
  theme(legend.position = c(.1,.8))

p2 <- ggplot(dsamp, aes(carat, price)) +
  geom_point(aes(fill = depth),shape=21, color = 'black',size = 2)+
  scale_fill_gsea()+
  theme(legend.position = c(.1,.8))

plot_grid(p1,p2,ncol=2,labels = c('A','B'),align = c('v','h'))

#气泡图：学会合理使用散点的大小属性
set.seed(2019)
dsamp <- diamonds[sample(nrow(diamonds), 500), ]

ggplot(dsamp, aes(carat, price)) +
  geom_point(aes(size = x,fill = cut),shape=21, 
             color = 'black')+
  scale_fill_brewer(palette = 'Set2')

#散点大小的设置
set.seed(2019)
dsamp <- diamonds[sample(nrow(diamonds), 500), ]

ggplot(dsamp, aes(carat, price)) +
  geom_point(aes(size = x,fill = cut),shape=21, 
             color = 'black')+
  scale_fill_brewer(palette = 'Set2')+
  scale_size_continuous(name = 'Size',
                        breaks = c(5,7,9),
                        labels = c('Small','Middle','Large'))

#合理变换数据，使散点大小分布更均匀
set.seed(2019)
dsamp <- diamonds[sample(nrow(diamonds), 500), ]
dsamp <-  dsamp %>% mutate(x = exp(x))

p1 <- ggplot(dsamp, aes(carat, price)) +
  geom_point(aes(size = x,fill = cut),shape=21, 
             color = 'black')+
  scale_fill_brewer(palette = 'Set2')

p2 <- ggplot(dsamp, aes(carat, price)) +
  geom_point(aes(size = log(x),fill = cut),shape=21, 
             color = 'black')+
  scale_fill_brewer(palette = 'Set2')

plot_grid(p1,p2,ncol=2,labels = c('A','B'),align = c('v','h'))


#散点图遇上密度图
install.packages("ggExtra")

## 
## 下载的二进制程序包在
##  /var/folders/wp/w6gnsqsd21lckpb6065r29c00000gn/T//Rtmp96mIpj/downloaded_packages里

library(ggExtra)

piris <- ggplot(iris, aes(Sepal.Length, Sepal.Width, 
                          color = Species)) +
  geom_point(shape=21, size = 3,stroke = 1.2)+
  scale_color_npg()+
  theme_bw()+
  theme(legend.position = c(.1,.86))
ggMarginal(piris, type= 'density',
           groupFill = TRUE)

#相关矩阵图
# install.packages("ggcorrplot")
library(ggcorrplot)

mtcars2 <- mtcars %>% select(c('mpg','disp','hp',
                              'drat','wt','qsec'))
corr <- round(cor(mtcars2), 1)
corr

##       mpg disp   hp drat   wt qsec
## mpg   1.0 -0.8 -0.8  0.7 -0.9  0.4
## disp -0.8  1.0  0.8 -0.7  0.9 -0.4
## hp   -0.8  0.8  1.0 -0.4  0.7 -0.7
## drat  0.7 -0.7 -0.4  1.0 -0.7  0.1
## wt   -0.9  0.9  0.7 -0.7  1.0 -0.2
## qsec  0.4 -0.4 -0.7  0.1 -0.2  1.0

p1 <- ggcorrplot(corr, method = 'square')

p2 <- ggcorrplot(corr, method = "circle")

## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

plot_grid(p1,p2,ncol=2,labels = LETTERS[1:2],align = c('v','h'))

#相关矩阵图的不同表现形式

p1 <- ggcorrplot(
  corr,
  type = "lower",
  outline.color = "white",
  colors = c("#6D9EC1", "white", "#E46726")
)

p2 <- ggcorrplot(
  corr,
  type = "upper",
  outline.color = "white",
  colors = c("#084594", "white", "#ef3b2c")
)

p3 <- ggcorrplot(corr,
                 type = "lower",
                 lab = TRUE)

p.mat <- cor_pmat(mtcars2) # 增加显著性P值的显示

p4 <- ggcorrplot(corr,
                 type = "lower",
                 p.mat = p.mat) # 增加显著性P值的显示

plot_grid(p1,p2,p3,p4,ncol=2,labels = LETTERS[1:4],align = c('v','h'))

#简单的折线图
set.seed(2019)
x <- 1:8
dat <- tibble(x = x,
              y = 1.2*x+5+rnorm(8,0,3))

ggplot(dat,aes(x, y))+
  geom_line(color = '#41b6c4',size = .8)+
  theme_classic()

#散点+折线，颜值立马变

set.seed(2019)
x <- 1:8
dat <- tibble(x = rep(x,2),
              y = 1.2*x+5+rnorm(16,0,2),
              group = rep(c('Group1','Group2'),each = 8))

p1 <- ggplot(dat,aes(x, y, color = group))+
  geom_line(size = .8)+
  scale_color_d3()+
  theme_classic()+
  theme(legend.position = c(.85,.15))


p2 <- ggplot(dat,aes(x, y, color = group))+
  geom_line(size = .8)+
  geom_point(shape = 21,color = 'black',size=3,fill = 'white')+
  scale_color_d3()+
  theme_classic()+
  theme(legend.position = c(.85,.15))

plot_grid(p1,p2,ncol=2,labels = LETTERS[1:2],align = c('v','h'))

#合理处理差距过大的数据，让折线更协调
set.seed(2020)
id <- 1:8
dat <- tibble(x = rep(id,2),
              y = c(2*id+rnorm(8,0,1),
                    6*id+rnorm(8,0,2)),
              group = rep(c('Group1','Group2'),each = 8))

p1 <- ggplot(dat,aes(x, y, color = group))+
  geom_line(size = .8)+
  geom_point(shape = 21,color = 'black',size=3,fill = 'white')+
  scale_color_d3()+
  theme_classic()+
  theme(legend.position = c(.15,.85))

p2 <- ggplot(dat,aes(x, log(y), color = group))+
  geom_line(size = .8)+
  geom_point(shape = 21,color = 'black',size=3,fill = 'white')+
  scale_color_d3()+
  theme_classic()+
  theme(legend.position = c(.15,.85))

p3 <- ggplot(dat,aes(x, y))+
  geom_line(size = .8,color = '#2b8cbe')+
  geom_point(shape = 21,color = 'black',size=3,fill = 'white')+
  theme_classic()+
  facet_wrap(~group,ncol = 2,scales = 'free_y')+
  theme(legend.position = c(.15,.85),
        strip.background = element_blank(),
        strip.text = element_text(size = 12))

p4 <- plot_grid(p1,p2,ncol=2,labels = LETTERS[1:2],align = c('v','h'))
plot_grid(p4,p3,ncol=1,labels = c('','C'),align = c('v','h'))

## Warning: Graphs cannot be vertically aligned unless the axis parameter is set.
## Placing graphs unaligned.

#配对坡度图
data <- structure(list(
  Date = structure(c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2), 
                   .Label = c("11-May-19", "18-May-19"), 
                   class = "factor"), 
  Party = structure(rep(1:5,2), 
                    .Label = c("Green", "Liberal", "NDP", "Others", "PC"), 
                    class = "factor"), 
  Pct = c(42.3, 28.4, 22.1, 8.4, 1.8, 37.9, 33.3, 27.3, 5, 8.4)), 
  class = "data.frame", 
  row.names = c(NA, -10L))

ggplot(data = data, aes(x = Date, y = Pct, 
                        group = Party,color = Party)) +
  geom_line(size = 2) +
  geom_point(size = 4) +
  annotate('text',x = 2.1,y = c(37.9, 33.3, 27.3, 5, 8.4),
           label = c("Green", "Liberal", "NDP", "Others", "PC"))+
  scale_x_discrete(position = "top") +
  scale_color_brewer(palette = 'Paired')+
  theme_bw() +
  theme(legend.position = 'none') +
  theme(panel.border = element_blank())

#面积图
bmi <- tibble(year = rep(1975:2016,5),
              group = rep(c('BMI < -2SD','-2SD<=BMI< -1SD','-1SD<=BMI<=1SD',
                            '1SD<BMI<=2SD','BMI>2SD'),each = 42),
              prevalence = c(sample(1:10,42,replace = TRUE),
                             sample(5:20,42,replace = TRUE),
                             sample(30:50,42,replace = TRUE),
                             sample(1:10,42,replace = TRUE),
                             sample(1:10,42,replace = TRUE)))
bmi <- bmi %>% 
  mutate(group = factor(group,levels = c('BMI < -2SD','-2SD<=BMI< -1SD',
                                         '-1SD<=BMI<=1SD','1SD<BMI<=2SD',
                                         'BMI>2SD')))

ggplot(bmi,aes(year, prevalence, fill = group))+
  geom_area(color = NA,position = 'fill')+
  scale_fill_lancet(name = '')+
  scale_x_continuous(expand = c(0,0))+
  scale_y_continuous(expand = c(0,0),labels = scales::percent)+
  xlab('Year')+
  ylab('Prevalence')+
  guides(fill = guide_legend(ncol = 2))+ 
  theme(legend.position = 'bottom')

#平滑曲线
set.seed(2019)
dsamp <- diamonds[sample(nrow(diamonds), 1000), ]

p1 <- ggplot(dsamp,aes(carat, price, fill = cut))+
  geom_point(shape = 21, color = 'black', size = 3)+
  geom_smooth(se = .8)+
  theme(legend.position = c(.1,.8))

##巧用数据叠加
p2 <- ggplot()+
  geom_point(data = dsamp,aes(carat, price, fill = cut),
             shape = 21, color = 'black', size = 3)+
  geom_smooth(data = dsamp,aes(carat, price),se = .8)+
  theme(legend.position = c(.1,.8))

plot_grid(p1,p2,ncol=2,labels = LETTERS[1:2],align = c('v','h'))

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

#ggplot2内置了很多平滑方法

p <- ggplot(mtcars, aes(x = hp, y = mpg)) + 
  geom_point(size = 3,shape = 21,fill = '#5dc863',color = 'black')

p1 <- p + geom_smooth(method = "lm", formula = y ~ x, size = 1)+
  ggtitle(label = 'liner model')

p2 <- p + geom_smooth(method = "lm", formula = y ~ poly(x, 2), size = 1)+
  ggtitle(label = 'polynomial regression')

p3 <- p + geom_smooth(method = "gam", formula = y ~ x, size = 1)+
  ggtitle(label = 'GAM model')

p4 <- p + geom_smooth(method = "gam", formula = y ~ s(x), size = 1)+
  ggtitle(label = 'GAM model with spline')

plot_grid(p1,p2,p3,p4,ncol=2,labels = LETTERS[1:4],align = c('v','h'))

#生存曲线

# install.packages("survminer")
library(survminer)

## 载入需要的程辑包：ggpubr

## 
## 载入程辑包：'ggpubr'

## The following object is masked from 'package:cowplot':
## 
##     get_legend

library(survival)

## 
## 载入程辑包：'survival'

## The following object is masked from 'package:survminer':
## 
##     myeloma

fit <- survfit(Surv(time, status) ~ sex, data = lung)
p1 <- ggsurvplot(fit, data = lung,
                 legend.title = 'Sex',
                 legend = c(.85,.8))

p2 <- ggsurvplot(
  fit, 
  data = lung, 
  size = 1,                
  palette = c("#E7B800", "#2E9FDF"), #设置颜色
  conf.int = TRUE,          
  pval = TRUE,   #设置P值     
  risk.table = TRUE,        
  risk.table.col = "strata",
  legend.labs = 
    c("Male", "Female"),    
  risk.table.height = 0.25, 
  ggtheme = theme_bw(),
  legend.title = 'Sex',
  legend = c(.85,.8)
)

arrange_ggsurvplots(list(p1,p2),ncol = 2,nrow = 1)

#三个连续型变量的“三角恋”
# install.packages('ggtern')
library(ggtern)

## Registered S3 methods overwritten by 'ggtern':
##   method           from   
##   grid.draw.ggplot ggplot2
##   plot.ggplot      ggplot2
##   print.ggplot     ggplot2

## --
## Remember to cite, run citation(package = 'ggtern') for further info.
## --

## 
## 载入程辑包：'ggtern'

## The following objects are masked from 'package:ggplot2':
## 
##     aes, annotate, ggplot, ggplot_build, ggplot_gtable, ggplotGrob,
##     ggsave, layer_data, theme_bw, theme_classic, theme_dark,
##     theme_gray, theme_light, theme_linedraw, theme_minimal, theme_void

library(ggsci)

set.seed(2019)
x  <- data.frame(
  x1 = c( 0.1, 0.2, 0.6, 0.1, 0.6, 0.2 ),
  x2 = c( 0.1, 0.1, 0.5, 0.3, 0.2, 0.8 ),
  x3 = c( 0.1, 0.3, 0.4, 0.6, 0.2, 0.1 ),
  label = LETTERS[1:6]
)

ggtern(data=x,aes(x1,x2,x3)) + 
  geom_point(fill="forestgreen",shape=21,size = 4)+ 
  geom_text(aes(label = label),vjust = 1.7)+
  theme_bw()

#继续三元图
ggtern(data=iris,aes(x = Sepal.Length,
                     y = Sepal.Width,
                     z = Petal.Length,
                     fill = Species,
                     size = Petal.Width))+
  Tlab("") + Llab("") + Rlab("") + 
  Tarrowlab("Sepal.Width")+
  Larrowlab("Sepal.Length")+
  Rarrowlab("Petal.Length")+
  geom_point(shape=21)+
  scale_fill_npg()+
  theme_showarrows()

#三元密度图
set.seed(2019)
a <- tibble(x=rnorm(20000, 80, 30), 
            y=rnorm(20000, 100, 30), 
            z = rnorm(20000, 60, 20))

ggtern(data=a,aes(x,y,z))+
  stat_density_tern(aes(fill = ..level..,alpha = ..level..),
                    geom = 'polygon')+
  scale_fill_gradient(low = '#fcbba1',high = '#ef3b2c')+
  theme_showarrows()+
  guides(color = "none", fill = "none", alpha = "none")

## Warning: Removed 121 rows containing non-finite values (StatDensityTern).

## Warning: stat_density_tern: You have not specified a below-detection-limit (bdl) value (Ref. 'bdl' and 'bdl.val' arguments in ?stat_density_tern). Presently you have 19x value/s below a detection limit of 0.010, which acounts for 0.096% of your data. Density values at fringes may appear abnormally high attributed to the mathematics of the ILR transformation. 
## You can either:
## 1. Ignore this warning,
## 2. Set the bdl value appropriately so that fringe values are omitted from the ILR calculation, or
## 3. Accept the high density values if they exist, and manually set the 'breaks' argument 
##    so that the countours at lower densities are represented appropriately.

#群魔乱舞之雷达图
# devtools::install_github("ricardo-bion/ggradar", 
#                          dependencies = TRUE)
library(ggradar)
library(dplyr)
library(scales)

## 
## 载入程辑包：'scales'

## The following object is masked from 'package:purrr':
## 
##     discard

## The following object is masked from 'package:readr':
## 
##     col_factor

mtcars_radar <- mtcars%>% 
  tail(4) %>% 
  select(1:10)%>%
  mutate_all(rescale)

group <- paste0('Group',LETTERS[1:4])
mtcars_radar <- cbind(group,mtcars_radar)

ggradar(mtcars_radar,group.point.size = 4,
        group.line.width = 1.2,
        legend.text.size = 8,
        legend.position = 'right')

#拉一曲和弦
# install.packages('circlize')
library(circlize)

## ========================================
## circlize version 0.4.15
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
##   in R. Bioinformatics 2014.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(circlize))
## ========================================

set.seed(2019)

numbers <- sample(c(1:1000), 100, replace = T)
data <- matrix( numbers, ncol=5)
rownames(data) <- paste0("Set-", seq(1,20))
colnames(data) <- paste0("Pair-", seq(1,5))

chordDiagram(data, transparency = 0.5)

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

连续型变量-绘图之道

2022-06-07

Including Plots