install.packages("sf")
install.packages("rnaturalearth")
data <- as.matrix(mtcars) #dataset
library(RColorBrewer)
coul <- colorRampPalette(brewer.pal(9, "RdBu"))(25) #brewer.pal()用于生产一组颜色,9代表9种 种颜色,使用的是 "RdBu"(红色到蓝色)的颜色序列。colorRampPalette()生成渐变颜色的函数,(25)表示生产255个渐变颜色。
coul
## [1] "#B2182B" "#BE3036" "#CA4841" "#D6604D" "#E0775E" "#EA8E70" "#F4A582"
## [8] "#F7B698" "#FAC9B0" "#FDDBC7" "#FBE4D6" "#F9EDE7" "#F7F7F7" "#EAF1F4"
## [15] "#DDEBF2" "#D1E5F0" "#BCDAEA" "#A7CFE4" "#92C5DE" "#77B4D5" "#5DA3CC"
## [22] "#4393C3" "#3784BB" "#2C75B3" "#2166AC"
heatmap(data, scale="column", col = coul) #heatmap
# library
library(wordcloud2)
head(demoFreq)
## word freq
## oil oil 85
## said said 73
## prices prices 48
## opec opec 42
## mln mln 31
## the the 26
wordcloud2(data=demoFreq, size=1.6)
library(tidyverse)
library(hrbrthemes)
library(viridis)
# create a dataset
data <- data.frame(
variable=c( rep("A",500), rep("B",500), rep("B",500), rep("C",20), rep('D', 100) ),
value=c( rnorm(500, 10, 5), rnorm(500, 13, 1), rnorm(500, 18, 1), rnorm(20, 25, 4), rnorm(100, 12, 1) )
)
** Boxplot basic **
data %>% #%>%是管道运算符,来自于dplyr 包
ggplot(aes(x=variable, y=value, fill=variable)) + # ggplot()核心函数,初始化绘画对象,aes() 是映射函数,定义了数据中的变量与图形属性之间的映射关系,variable是个分类变量,value是数值型变量
geom_boxplot() + #绘制了箱线图
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="A") + #用于设置图形的填充色,alpha设置透明度,A是viridis的一中最常用的配色版本。
theme_ipsum() + # 主题函数,提升图形的视觉效果,移除多余元素,强调数据本身。
theme(
legend.position="none", #隐藏图例,
plot.title = element_text(size=11) #设置字体大小
) +
ggtitle("Basic boxplot") + #添加标题
xlab("")
** Violin basic **
data %>%
ggplot( aes(x=variable, y=value, fill=variable)) +
geom_violin() + #绘制小提琴图
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="A") +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
ggtitle("Violin chart") +
xlab("")
** Boxplot with individual data points**
data %>%
ggplot( aes(x=variable, y=value, fill=variable)) +
geom_boxplot() +
scale_fill_viridis(discrete = TRUE, alpha=0.6) +
geom_jitter(color="black", size=0.4, alpha=0.9) + #添加散点图,会在数据点的位置上添加小幅度的随机抖动,避免数据点在同一个位置完全重叠。
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
ggtitle("A boxplot with jitter") +
xlab("")
rect.dendrogram() 函数甚至允许用矩形高亮显示一个或多个特定的聚类。
library(tidyverse)
library(dendextend)
# Color in function of the cluster
par(mar=c(1,1,1,7)) #par()用于设置或查询图形参数,mar设置边距(bottom,left, top, right)
# Data
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
# Clusterisation using 3 variables
mtcars %>%
select(mpg, cyl, disp) %>% #从数据集中选择三个变量
dist() %>% #计算数据集的距离矩阵
hclust() %>% #对距离矩阵应用层次聚类(hierarchical clustering)算法。默认情况下 使用最小距离法(single linkage)。
as.dendrogram() -> dend #将聚类结果(hclust 对象)转换为树状图(dendrogram)后赋值给变量dend,用于后续操作
dend %>%
set("labels_col", value = c("skyblue", "orange", "grey"), k=3) %>% #设置树状图中标签(即每个聚类的标记)的颜色
set("branches_k_color", value = c("skyblue", "orange", "grey"), k = 3) %>% #设置树状图的分支(聚类树的连接线)颜色
plot(horiz=TRUE, axes=FALSE) #绘制树状图
abline(v = 350, lty = 2) #添加一条垂直的辅助线(v = 350 指定了垂直线的位置),lty = 2 指定线型为虚线。
# Highlight a cluster with rectangle
par(mar=c(9,1,1,1))
dend %>%
set("labels_col", value = c("skyblue", "orange", "grey"), k=3) %>%
set("branches_k_color", value = c("skyblue", "orange", "grey"), k = 3) %>%
plot(axes=FALSE)
rect.dendrogram( dend, k=3, lty = 5, lwd = 0, x=1, col=rgb(0.1, 0.2, 0.4, 0.1) ) #k=3 指定绘制3 个矩形框来突出显示 3 个聚类区域,lty=5 代表“点线”(dashed line),lwd 参数指定矩形框线条的宽度,x 参数指定矩形框的横坐标位置, col 参数定义矩形框的颜色,rgb(0.1, 0.2, 0.4, 0.1) 表示(RGBA值:红色=0.1,绿色=0.2,蓝色=0.4,透明度=0.1)。
#Comparing 2 dendrograms with tanglegram()
# Make 2 dendrograms, using 2 different clustering methods
d1 <- USArrests %>% dist() %>% hclust( method="average" ) %>% as.dendrogram()
d2 <- USArrests %>% dist() %>% hclust( method="complete" ) %>% as.dendrogram()
# Custom these kendo, and place them in a list
dl <- dendlist(
d1 %>%
set("labels_col", value = c("skyblue", "orange", "grey"), k=3) %>%
set("branches_lty", 1) %>%
set("branches_k_color", value = c("skyblue", "orange", "grey"), k = 3),
d2 %>%
set("labels_col", value = c("skyblue", "orange", "grey"), k=3) %>%
set("branches_lty", 1) %>%
set("branches_k_color", value = c("skyblue", "orange", "grey"), k = 3)
)
# Plot them together
tanglegram(dl,
common_subtrees_color_lines = FALSE, highlight_distinct_edges = TRUE, highlight_branches_lwd=FALSE,
margin_inner=7,
lwd=2
)
# Libraries
library(hrbrthemes)
library(GGally)
library(viridis)
# Data set is provided by R natively
data <- iris
ggparcoord(data, # ggparcoord()绘制平行坐标图
columns = 1:4, # 指定使用数据框的第 1 到第 4 列作为平行坐标图的变量(
groupColumn = 5, # 指定第 5 列作为分组变量
order = "anyClass", # 指定轴的排序方式,"anyClass" 表示按类别进行排序
scale="globalminmax",# 全局最小-最大缩放(Global Min-Max Scaling),使用所有数据的全局最小值和最大值来缩放每个变量,使变量的最小值变为 0,最大值变为 1。
showPoints = TRUE, # 指定是否显示数据点
title = "No scaling", # 指定图形的标题。
alphaLines = 0.3 # 设置平行坐标图中线条的透明度
) +
scale_color_viridis(discrete=TRUE) + # viridis 是一种色彩渐变方案
theme_ipsum()+
theme(
legend.position="none", #隐藏图例
plot.title = element_text(size=13)
) +
xlab("")
ggparcoord(data,
columns = 1:4,
groupColumn = 5,
order = "anyClass",
scale="uniminmax", # 最小-最大缩放(Min-Max Scaling):对每个变量进行线性变换,使得该变量的最小值变为 0,最大值变为 1。
showPoints = TRUE,
title = "Standardize to Min = 0 and Max = 1",
alphaLines = 0.3
) +
scale_color_viridis(discrete=TRUE) +
theme_ipsum()+
theme(
legend.position="none",
plot.title = element_text(size=13)
) +
xlab("")
ggparcoord(data,
columns = 1:4, groupColumn = 5, order = "anyClass",
scale="std", # 标准化(Standardization),对每个变量(列)进行标准化,使得每个变量的均值为 0,标准差为 1。
showPoints = TRUE,
title = "Normalize univariately (substract mean & divide by sd)",
alphaLines = 0.3
) +
scale_color_viridis(discrete=TRUE) +
theme_ipsum()+
theme(
legend.position="none",
plot.title = element_text(size=13)
) +
xlab("")
ggparcoord(data,
columns = 1:4, groupColumn = 5, order = "anyClass",
scale="center", # 中心化(Centering):每个变量的值减去其均值,使得该变量的均值变为 0
showPoints = TRUE,
title = "Standardize and center variables",
alphaLines = 0.3
) +
scale_color_viridis(discrete=TRUE) +
theme_ipsum()+
theme(
legend.position="none",
plot.title = element_text(size=13)
) +
xlab("")
#Highlight a group
# Libraries
library(GGally)
library(dplyr)
# Data set is provided by R natively
data <- iris
# Plot
data %>%
arrange(desc(Species)) %>%
ggparcoord(
columns = 1:4, groupColumn = 5, order = "anyClass",
showPoints = TRUE,
title = "Original",
alphaLines = 1
) +
scale_color_manual(values=c( "#69b3a2", "#E8E8E8", "#E8E8E8") ) + #指定不同组的颜色
theme_ipsum()+
theme(
legend.position="Default",
plot.title = element_text(size=10)
) +
xlab("")
#数据
numbers <- sample(c(1:1000), 100, replace = T) #从1到1000的整数中随机抽取100个数字,允许重复
data <- matrix( numbers, ncol=5) #创建矩阵
rownames(data) <- paste0("orig-", seq(1,20)) # 设置行名
colnames(data) <- paste0("dest-", seq(1,5)) # 设置列名
library(circlize)
# 绘制 circular plot
chordDiagram(data, transparency = 0.5)
### 参考: https://r-graph-gallery.com/