## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
##
## 载入程序包:'gridExtra'
##
##
## The following object is masked from 'package:dplyr':
##
## combine
##
##
## ------------------------------------------------------------------------------
##
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
##
## ------------------------------------------------------------------------------
##
##
## 载入程序包:'plyr'
##
##
## The following object is masked from 'package:ggstats':
##
## round_any
##
##
## The following object is masked from 'package:ggpubr':
##
## mutate
##
##
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
##
## The following object is masked from 'package:purrr':
##
## compact
faithful是R语言中自带的一个经典数据集,它记录了美国黄石国家公园老忠实间歇泉(Old
Faithful
geyser)的喷发数据。这个数据集经常被用于统计教学和数据分析示例。
faithful数据集包含两个变量,共有272个观测值。
data = faithful
datatable(data,rownames = FALSE)
eruptions:
喷发持续时间,连续数值变量,以分钟为单位,范围:1.6分钟到5.1分钟。
waiting:
两次喷发之间的等待时间,连续数值变量,以分钟为单位,范围:43分钟到96分钟。
利用geom_histogram(aes(y=..density..))绘制eruptions的直方图,使用预设主题:mytheme;
利用geom_rug()为直方图添加地毯图;
利用geom_density()为直方图添加核密度曲线;
利用annotate()在直方图标注峰度和偏度信息;
利用geom_vline()
为直方图添加一条垂直的均值参考线;
利用geom_point()在横轴上添加一个中位数参考点,并在点上方添加文字注释
library(e1071) # 用于计算偏度系数和峰度系数
df <- data
# 作初始直方图,纵轴默认为频数
ggplot(data=df,aes(x=eruptions))+mytheme+ # 绘制直方图
geom_histogram(aes(y=..density..),fill="lightgreen",color="gray50")+
geom_rug(size=0.2,color="blue3")+
geom_density(color="blue2",size=0.7)+ # 添加核密度曲线
annotate("text",x=2.5,y=0.7,label=paste0("偏度系数 = ",round(skewness(df$eruptions),4)),size=3)+ # 添加注释文本
annotate("text",x=2.5,y=0.6,label=paste0("峰度系数 = ",round(kurtosis(df$eruptions),4)),size=3)+ # 添加注释文本
geom_vline(xintercept=mean(df$eruptions),linetype="twodash",size=0.6,color="red")+ # 添加均值垂线,并设置线形、线宽和颜色
annotate("text",x=mean(df$eruptions),y=0.7,label=paste0("均值线 = ",round(mean(df$eruptions),2)),size=3)+ # 添加注释文本
geom_point(x=median(df$eruptions),y=0,shape=21,size=4,fill="yellow")+# 添加中位数点
annotate("text",x=median(df$eruptions),y=0.05,label="中位数",size=3,color="red3") # 添加注释文本
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
绘制eruptions和
waiting两个变量的叠加直方图和镜像直方图,使用预设主题:mytheme。
将数据转化为长型数据再作叠加直方图,利用scale_fill_brewer()将叠加直方图配色方案改为set3
。
镜像直方图中eruptions在正方向,waiting在负方向,直方数bins=30,并添加文字标签作标签。
两种图都需要针对原始数据作图和标准标准化数据作图,可以使用scale()函数对变量标准化,分类标准化可以使用plyr::ddply()函数。
df <- data |>
gather(eruptions,waiting,key=指标,value=指标值) %>% # 融合数据
ddply("指标",transform,标准化值=scale(指标值))
p1<-ggplot(df)+aes(x=指标值,y=..density..,fill=指标)+
geom_histogram(position="identity",color="gray60",alpha=0.5)+
scale_fill_brewer(palette = "set3")+
theme(legend.position=c(0.8,0.8),# 设置图例位置
legend.background=element_rect(fill="grey90",color="grey"))+
# 设置图例背景色和边框颜色
ggtitle("(a) 原始数据叠加直方图")
## Warning: Unknown palette: "set3"
## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p2<-ggplot(df)+aes(x=标准化值,y=..density..,fill=指标)+
geom_histogram(position="identity",color="gray60",alpha=0.5)+
scale_fill_brewer(palette = "set3")+
theme(legend.position=c(0.8,0.8),# 设置图例位置
legend.background=element_rect(fill="grey90",color="grey"))+
# 设置图例背景色和边框颜色
ggtitle("(b) 标准化数据叠加直方图")
## Warning: Unknown palette: "set3"
grid.arrange(p1,p2,ncol=2) # 组合图形
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
df <- data |>
mutate(
std.eruptions=scale(eruptions),
std.waiting=scale(waiting)
)
p1<-ggplot(df)+aes(x=x)+
geom_histogram(aes(x=eruptions,y=..density..),bins=30,color="grey50",fill="red",alpha=0.3)+ # 绘制AQI的直方图(上图)
geom_label(aes(x=30,y=0.2),label="eruptions",color="red")+ # 添加标签
geom_histogram(aes(x=waiting,y=-..density..),bins=30,color="grey50",fill="blue",alpha=0.3)+ # 绘制PM2.5的直方图(下图)
geom_label(aes(x=60,y=-0.1),label="waiting",color="blue")+ # 添加标签
xlab("指标值")+ggtitle("(a) 原始数据镜像直方图")
p2<-ggplot(df)+aes(x=x)+
geom_histogram(aes(x=std.eruptions,y=..density..),bins=30,color="grey50",fill="red",alpha=0.3)+ # 绘制AQI的直方图(上图)
geom_label(aes(x=-0.5,y=0.3),label="eruptions",color="red")+ # 添加标签
geom_histogram(aes(x=std.waiting,y=-..density..),bins=30,color="grey50",fill="blue",alpha=0.3)+ # 绘制PM2.5的直方图(下图)
geom_label(aes(x=-0.5,y=-0.3),label="waiting",color="blue")+ # 添加标签
xlab("指标值")+ggtitle("(b) 标准化数据镜像直方图")
grid.arrange(p1,p2,ncol=2) # 组合图形
## Warning in geom_label(aes(x = 30, y = 0.2), label = "eruptions", color = "red"): All aesthetics have length 1, but the data has 272 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_label(aes(x = 60, y = -0.1), label = "waiting", color = "blue"): All aesthetics have length 1, but the data has 272 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_label(aes(x = -0.5, y = 0.3), label = "eruptions", color = "red"): All aesthetics have length 1, but the data has 272 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_label(aes(x = -0.5, y = -0.3), label = "waiting", color = "blue"): All aesthetics have length 1, but the data has 272 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
绘制eruptions和 waiting两个变量的分组核密度图、分面核密度图和镜像核密度图。
分组核密度图,采用geom_density(position="identity")
。
分面核密度图,采用geom_density()+facet_wrap(~xx,scale="free")
。
镜像核密度图中eruptions在正方向,waiting在负方向,直方数bins=30,并添加文字标签作标签。
分组核密度图和镜像核密度图需要针对原始数据作图和标准标准化数据作图。
df <- data |>
gather(eruptions,waiting,key=指标,value=指标值) %>% # 融合数据
ddply("指标",transform,标准化值=scale(指标值))
p1<-ggplot(df)+aes(x=指标值,y=..density..,fill=指标)+
geom_density(position="identity",color="gray60",alpha=0.5)+
scale_fill_brewer(palette = "set3")+
theme(legend.position=c(0.8,0.8),# 设置图例位置
legend.background=element_rect(fill="grey90",color="grey"))+
# 设置图例背景色和边框颜色
ggtitle("(a) 原始数据叠加直方图")
## Warning: Unknown palette: "set3"
p2<-ggplot(df)+aes(x=标准化值,y=..density..,fill=指标)+
geom_density(position="identity",color="gray60",alpha=0.5)+
scale_fill_brewer(palette = "set3")+
theme(legend.position=c(0.8,0.8),# 设置图例位置
legend.background=element_rect(fill="grey90",color="grey"))+
# 设置图例背景色和边框颜色
ggtitle("(b) 标准化数据叠加直方图")
## Warning: Unknown palette: "set3"
grid.arrange(p1,p2,ncol=2) # 组合图形
ggplot(df)+aes(x=指标值,fill=指标)+
geom_density(color="gray50")+
scale_fill_brewer(palette ="Set3")+
guides(fill="none")+
facet_wrap(~指标,ncol = 2,scale="free")
df <- data |>
mutate(
std.eruptions=scale(eruptions),
std.waiting=scale(waiting)
)
p1<-ggplot(df)+aes(x=x)+
geom_density(aes(x=eruptions,y=..density..),bins=30,color="grey50",fill="red",alpha=0.3)+ # 绘制AQI的直方图(上图)
geom_label(aes(x=30,y=0.2),label="eruptions",color="red")+ # 添加标签
geom_density(aes(x=waiting,y=-..density..),bins=30,color="grey50",fill="blue",alpha=0.3)+ # 绘制PM2.5的直方图(下图)
geom_label(aes(x=60,y=-0.1),label="waiting",color="blue")+ # 添加标签
xlab("指标值")+ggtitle("(a) 原始数据镜像直方图")
## Warning in geom_density(aes(x = eruptions, y = ..density..), bins = 30, :
## Ignoring unknown parameters: `bins`
## Warning in geom_density(aes(x = waiting, y = -..density..), bins = 30, color =
## "grey50", : Ignoring unknown parameters: `bins`
p2<-ggplot(df)+aes(x=x)+
geom_density(aes(x=std.eruptions,y=..density..),bins=30,color="grey50",fill="red",alpha=0.3)+ # 绘制AQI的直方图(上图)
geom_label(aes(x=-0.5,y=0.3),label="eruptions",color="red")+ # 添加标签
geom_density(aes(x=std.waiting,y=-..density..),bins=30,color="grey50",fill="blue",alpha=0.3)+ # 绘制PM2.5的直方图(下图)
geom_label(aes(x=-0.5,y=-0.3),label="waiting",color="blue")+ # 添加标签
xlab("指标值")+ggtitle("(b) 标准化数据镜像直方图")
## Warning in geom_density(aes(x = std.eruptions, y = ..density..), bins = 30, :
## Ignoring unknown parameters: `bins`
## Warning in geom_density(aes(x = std.waiting, y = -..density..), bins = 30, :
## Ignoring unknown parameters: `bins`
grid.arrange(p1,p2,ncol=2) # 组合图形
## Warning in geom_label(aes(x = 30, y = 0.2), label = "eruptions", color = "red"): All aesthetics have length 1, but the data has 272 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_label(aes(x = 60, y = -0.1), label = "waiting", color = "blue"): All aesthetics have length 1, but the data has 272 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_label(aes(x = -0.5, y = 0.3), label = "eruptions", color = "red"): All aesthetics have length 1, but the data has 272 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_label(aes(x = -0.5, y = -0.3), label = "waiting", color = "blue"): All aesthetics have length 1, but the data has 272 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
根据实际数据和标准化后的数据绘制eruptions和waiting两个变量的箱线图geom_boxplot和小提琴图geom_violin。
采用stat_summary(fun="mean",geom="point")在箱线图和均值图中要添加均值点。
小提琴图中要加入点图和箱线图
采用调色板前两种颜色,brewer.pal(6,"Set2")[1:2]
,作为箱体填充颜色。
"#66C2A5" "#FC8D62" "#8DA0CB" "#E78AC3" "#A6D854" "#FFD92F"
df <- data |>
gather(everything(),key=指标,value=指标值) %>%
mutate(指标=fct_inorder(指标)) %>%
ddply("指标",transform,标准化值=scale(指标值)) # 计算标准化值并返回数据框
palette<-RColorBrewer::brewer.pal(6,"Set2")[1:2] # 设置离散型调色板
p1 <- ggplot(df,aes(x=指标,y=指标值))+
geom_boxplot(fill=palette)+ # 绘制箱线图并设置填充颜色
stat_summary(fun="mean",geom="point",shape=21,size=2.5,fill="white")
p2 <- ggplot(df,aes(x=指标,y=标准化值))+
geom_boxplot(fill=palette)+ # 绘制箱线图并设置填充颜色
stat_summary(fun="mean",geom="point",shape=21,size=2.5,fill="white")
gridExtra::grid.arrange(p1,p2,ncol=2) # 组合图形
d3r::d3_nest将数据框转化为层次数据“d3.js”作为绘图输入df <- data |>
gather(everything(),key=指标,value=指标值) %>%
mutate(指标=fct_inorder(指标)) %>%
ddply("指标",transform,标准化值=scale(指标值)) # 计算标准化值并返回数据框
# 图(a)原始数据小提琴图
p1<-ggplot(df,aes(x=指标,y=指标值,fill=指标))+
geom_violin(scale="width",trim=FALSE)+
geom_point(color="black",size=0.8)+ # 添加点
geom_boxplot(outlier.size=0.7,outlier.color="white",size=0.3,
width=0.2,fill="white")+ # 添加并设置箱线图和离群点参数
scale_fill_brewer(palette="Set2")+
stat_summary(fun=mean,geom="point",shape=21,size=2)+# 添加均值点
guides(fill="none")+
ggtitle("(a) 原始数据小提琴图")
# 图(b)数据标准化后的小提琴图
p2<-ggplot(df,aes(x=指标,y=标准化值,fill=指标))+
geom_violin(scale="width")+
geom_point(color="black",size=1)+
geom_boxplot(,outlier.size=0.7,outlier.color="black",size=0.3,
width=0.2,fill="white")+
scale_fill_brewer(palette="Set2")+
guides(fill="none")+
ggtitle("(b) 标准化小提琴图")
gridExtra::grid.arrange(p1,p2,ncol=2) # 组合图形p1和p2
绘制eruptions和 waiting
两个变量的威尔金森点图、蜂群图和云雨图。
三种图形均采用标准化数据作图
威尔金森点图采用geom_dotplot(binaxis="y",bins=30,dotsize = 0.3)
,要求作出居中堆叠和向上堆叠两种情况的图。
蜂群图采用geom_beeswarm(cex=0.8,shape=21,size=0.8),要求作出不带箱线图和带有箱线图两种情况的图。
云雨图采用geom_violindot(dots_size=0.7,binwidth=0.07)
,要求作出横向和纵向图两种情况的图。
分别作矩形热图和极坐标热图
mytheme<-theme_bw()+theme(legend.position="none")
df <- data |>
gather(everything(),key=指标,value=指标值) %>%
mutate(指标=fct_inorder(指标)) %>%
ddply("指标",transform,标准化值=scale(指标值)) # 计算标准化值并返回数据框
mytheme<-theme_bw()+theme(legend.position="none")
p<-ggplot(df,aes(x=指标,y=标准化值,fill=指标))
p1<-p+geom_dotplot(binaxis="y",binwidth=0.05,stackdir="center")+ # 绘制点图
mytheme+ggtitle("(a) 居中堆叠")
p2<-p+geom_dotplot(binaxis="y",binwidth=0.05)+ # 绘制点图
mytheme+ggtitle("(b) 向上堆叠")
gridExtra::grid.arrange(p1,p2,ncol=2) # 按2列组合图形
library(ggbeeswarm)
# 处理数据
df1<-data |> select(eruptions,waiting) |>
gather(everything(),key=指标,value=指标值) # 将数据转化成长格式
# 图(a)5项指标的蜂群图
mytheme<-theme_bw()+theme(legend.position="none")
p<-ggplot(df1,aes(x=指标,y=指标值))
p1<-p+geom_beeswarm(cex=0.8,shape=21,fill="black",size=0.8,aes(color=指标))+# 设置蜂群的宽度、点的形状、大小和填充颜色
mytheme+ggtitle("(a) 蜂群图")
# 图(b)箱线图+蜂群图
p2<-p+geom_boxplot(size=0.8,outlier.size=0.8,aes(color=指标))+
geom_beeswarm(shape=21,cex=0.8,size=0.8,aes(color=指标))+
mytheme+ggtitle("(b) 箱线图+蜂群图")
grid.arrange(p1,p2,ncol=2) # 组合图形
library(see) # 提供主题函数theme_modern
##
## 载入程序包:'see'
## The following object is masked from 'package:ggiraphExtra':
##
## coord_radar
mytheme<-theme_modern()+
theme(legend.position="none",
plot.title=element_text(size=14,hjust=0.5)) # 调整标题位置
p1<-ggplot(df,aes(x=指标,y=标准化值,fill=指标))+
geom_violindot(dots_size=1,binwidth=0.07)+ # 绘制云雨图并设置点的大小和箱宽
mytheme+ggtitle("(a) 垂直排列(默认)")
p2<-ggplot(df,aes(x=指标,y=标准化值,fill=指标))+
geom_violindot(dots_size=1,binwidth=0.06)+
coord_flip()+mytheme+ggtitle("(b) 水平排列")
gridExtra::grid.arrange(p1,p2,ncol=2) # 按2列组合图形p1和p2