热图(Heatmap)

install.packages("sf")
install.packages("rnaturalearth")
data <- as.matrix(mtcars) #dataset

library(RColorBrewer)
coul <- colorRampPalette(brewer.pal(9, "RdBu"))(25) #brewer.pal()用于生产一组颜色,9代表9种 种颜色,使用的是 "RdBu"(红色到蓝色)的颜色序列。colorRampPalette()生成渐变颜色的函数,(25)表示生产255个渐变颜色。
coul
##  [1] "#B2182B" "#BE3036" "#CA4841" "#D6604D" "#E0775E" "#EA8E70" "#F4A582"
##  [8] "#F7B698" "#FAC9B0" "#FDDBC7" "#FBE4D6" "#F9EDE7" "#F7F7F7" "#EAF1F4"
## [15] "#DDEBF2" "#D1E5F0" "#BCDAEA" "#A7CFE4" "#92C5DE" "#77B4D5" "#5DA3CC"
## [22] "#4393C3" "#3784BB" "#2C75B3" "#2166AC"
heatmap(data, scale="column", col = coul) #heatmap

词云图

# library
library(wordcloud2) 
head(demoFreq)
##          word freq
## oil       oil   85
## said     said   73
## prices prices   48
## opec     opec   42
## mln       mln   31
## the       the   26
wordcloud2(data=demoFreq, size=1.6)

相线图

library(tidyverse)
library(hrbrthemes)
library(viridis)
# create a dataset
data <- data.frame(
  variable=c( rep("A",500), rep("B",500), rep("B",500), rep("C",20), rep('D', 100)  ),
  value=c( rnorm(500, 10, 5), rnorm(500, 13, 1), rnorm(500, 18, 1), rnorm(20, 25, 4), rnorm(100, 12, 1) )
)

** Boxplot basic **

data %>%  #%>%是管道运算符,来自于dplyr 包
  ggplot(aes(x=variable, y=value, fill=variable)) + # ggplot()核心函数,初始化绘画对象,aes() 是映射函数,定义了数据中的变量与图形属性之间的映射关系,variable是个分类变量,value是数值型变量
    geom_boxplot() + #绘制了箱线图
    scale_fill_viridis(discrete = TRUE, alpha=0.6, option="A") + #用于设置图形的填充色,alpha设置透明度,A是viridis的一中最常用的配色版本。
    theme_ipsum() + # 主题函数,提升图形的视觉效果,移除多余元素,强调数据本身。
    theme(
      legend.position="none", #隐藏图例,
      plot.title = element_text(size=11) #设置字体大小
    ) + 
    ggtitle("Basic boxplot") + #添加标题
    xlab("")

** Violin basic **

data %>%
  ggplot( aes(x=variable, y=value, fill=variable)) + 
    geom_violin() +  #绘制小提琴图
    scale_fill_viridis(discrete = TRUE, alpha=0.6, option="A") +
    theme_ipsum() +
    theme(
      legend.position="none",
      plot.title = element_text(size=11)
    ) +
    ggtitle("Violin chart") +
    xlab("")

** Boxplot with individual data points**

data %>%
  ggplot( aes(x=variable, y=value, fill=variable)) +
    geom_boxplot() + 
    scale_fill_viridis(discrete = TRUE, alpha=0.6) +
    geom_jitter(color="black", size=0.4, alpha=0.9) + #添加散点图,会在数据点的位置上添加小幅度的随机抖动,避免数据点在同一个位置完全重叠。
    theme_ipsum() +
    theme(
      legend.position="none",
      plot.title = element_text(size=11)
    ) +
    ggtitle("A boxplot with jitter") +
    xlab("")

聚类图(Highlight clusters)

rect.dendrogram() 函数甚至允许用矩形高亮显示一个或多个特定的聚类。

library(tidyverse)
library(dendextend)
# Color in function of the cluster
par(mar=c(1,1,1,7)) #par()用于设置或查询图形参数,mar设置边距(bottom,left, top, right)
 
# Data
head(mtcars) 
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
# Clusterisation using 3 variables
mtcars %>% 
  select(mpg, cyl, disp) %>% #从数据集中选择三个变量
  dist() %>%  #计算数据集的距离矩阵
  hclust() %>% #对距离矩阵应用层次聚类(hierarchical clustering)算法。默认情况下 使用最小距离法(single linkage)。
  as.dendrogram() -> dend #将聚类结果(hclust 对象)转换为树状图(dendrogram)后赋值给变量dend,用于后续操作
 
dend %>%
  set("labels_col", value = c("skyblue", "orange", "grey"), k=3) %>% #设置树状图中标签(即每个聚类的标记)的颜色
  set("branches_k_color", value = c("skyblue", "orange", "grey"), k = 3) %>% #设置树状图的分支(聚类树的连接线)颜色
  plot(horiz=TRUE, axes=FALSE) #绘制树状图
abline(v = 350, lty = 2) #添加一条垂直的辅助线(v = 350 指定了垂直线的位置),lty = 2 指定线型为虚线。

# Highlight a cluster with rectangle
par(mar=c(9,1,1,1))
dend %>%
  set("labels_col", value = c("skyblue", "orange", "grey"), k=3) %>%
  set("branches_k_color", value = c("skyblue", "orange", "grey"), k = 3) %>%
  plot(axes=FALSE)
rect.dendrogram( dend, k=3, lty = 5, lwd = 0, x=1, col=rgb(0.1, 0.2, 0.4, 0.1) ) #k=3 指定绘制3 个矩形框来突出显示 3 个聚类区域,lty=5 代表“点线”(dashed line),lwd 参数指定矩形框线条的宽度,x 参数指定矩形框的横坐标位置, col 参数定义矩形框的颜色,rgb(0.1, 0.2, 0.4, 0.1) 表示(RGBA值:红色=0.1,绿色=0.2,蓝色=0.4,透明度=0.1)。

#Comparing 2 dendrograms with tanglegram()
# Make 2 dendrograms, using 2 different clustering methods
d1 <- USArrests %>% dist() %>% hclust( method="average" ) %>% as.dendrogram()
d2 <- USArrests %>% dist() %>% hclust( method="complete" ) %>% as.dendrogram()
 
# Custom these kendo, and place them in a list
dl <- dendlist(
  d1 %>% 
    set("labels_col", value = c("skyblue", "orange", "grey"), k=3) %>%
    set("branches_lty", 1) %>%
    set("branches_k_color", value = c("skyblue", "orange", "grey"), k = 3),
  d2 %>% 
    set("labels_col", value = c("skyblue", "orange", "grey"), k=3) %>%
    set("branches_lty", 1) %>%
    set("branches_k_color", value = c("skyblue", "orange", "grey"), k = 3)
)
 
# Plot them together
tanglegram(dl, 
           common_subtrees_color_lines = FALSE, highlight_distinct_edges  = TRUE, highlight_branches_lwd=FALSE, 
           margin_inner=7,
           lwd=2
)

平行坐标图(CParallel coordinates chart)

# Libraries
library(hrbrthemes)
library(GGally)
library(viridis)
# Data set is provided by R natively
data <- iris

ggparcoord(data, # ggparcoord()绘制平行坐标图
    columns = 1:4, # 指定使用数据框的第 1 到第 4 列作为平行坐标图的变量(
    groupColumn = 5, # 指定第 5 列作为分组变量
    order = "anyClass", # 指定轴的排序方式,"anyClass" 表示按类别进行排序
    scale="globalminmax",# 全局最小-最大缩放(Global Min-Max Scaling),使用所有数据的全局最小值和最大值来缩放每个变量,使变量的最小值变为 0,最大值变为 1。
    showPoints = TRUE, # 指定是否显示数据点
    title = "No scaling", # 指定图形的标题。
    alphaLines = 0.3 # 设置平行坐标图中线条的透明度
    ) + 
  scale_color_viridis(discrete=TRUE) + # viridis 是一种色彩渐变方案
  theme_ipsum()+
  theme(
    legend.position="none", #隐藏图例
    plot.title = element_text(size=13)
  ) +
  xlab("")

ggparcoord(data,
    columns = 1:4, 
    groupColumn = 5, 
    order = "anyClass",
    scale="uniminmax", # 最小-最大缩放(Min-Max Scaling):对每个变量进行线性变换,使得该变量的最小值变为 0,最大值变为 1。
    showPoints = TRUE, 
    title = "Standardize to Min = 0 and Max = 1",
    alphaLines = 0.3
    ) + 
  scale_color_viridis(discrete=TRUE) +
  theme_ipsum()+
  theme(
    legend.position="none",
    plot.title = element_text(size=13)
  ) +
  xlab("")

ggparcoord(data,
    columns = 1:4, groupColumn = 5, order = "anyClass",
    scale="std", # 标准化(Standardization),对每个变量(列)进行标准化,使得每个变量的均值为 0,标准差为 1。
    showPoints = TRUE, 
    title = "Normalize univariately (substract mean & divide by sd)",
    alphaLines = 0.3
    ) + 
  scale_color_viridis(discrete=TRUE) +
  theme_ipsum()+
  theme(
    legend.position="none",
    plot.title = element_text(size=13)
  ) +
  xlab("")

ggparcoord(data,
    columns = 1:4, groupColumn = 5, order = "anyClass",
    scale="center", # 中心化(Centering):每个变量的值减去其均值,使得该变量的均值变为 0
    showPoints = TRUE, 
    title = "Standardize and center variables",
    alphaLines = 0.3
    ) + 
  scale_color_viridis(discrete=TRUE) +
  theme_ipsum()+
  theme(
    legend.position="none",
    plot.title = element_text(size=13)
  ) +
  xlab("")

#Highlight a group
# Libraries
library(GGally)
library(dplyr)

# Data set is provided by R natively
data <- iris

# Plot
data %>%
  arrange(desc(Species)) %>%
  ggparcoord(
    columns = 1:4, groupColumn = 5, order = "anyClass",
    showPoints = TRUE, 
    title = "Original",
    alphaLines = 1
    ) + 
  scale_color_manual(values=c( "#69b3a2", "#E8E8E8", "#E8E8E8") ) + #指定不同组的颜色
  theme_ipsum()+
  theme(
    legend.position="Default",
    plot.title = element_text(size=10)
  ) +
  xlab("")

和弦图(Chord diagram)

#数据
numbers <- sample(c(1:1000), 100, replace = T)  #从1到1000的整数中随机抽取100个数字,允许重复
data <- matrix( numbers, ncol=5) #创建矩阵
rownames(data) <- paste0("orig-", seq(1,20)) # 设置行名
colnames(data) <- paste0("dest-", seq(1,5)) # 设置列名

library(circlize)

# 绘制 circular plot
chordDiagram(data, transparency = 0.5)

### 参考: https://r-graph-gallery.com/