使用geom_boxplot()创建箱线图。了解不同的参数和函数参数,以多种方式自定义箱线图。
Boxplot theory
library(ggplot2); library(dplyr);
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(viridis); library(ggExtra)
## Loading required package: viridisLite
red <- rbeta(200, shape1 = 0.3, shape2 = 2) * 4
green <- rnorm(1000, mean = 2)
orange <- rnorm(1000, mean = 3.5, sd = 0.5)
xx <-
data.frame(
group = rep(c("red", "green", "orange"), times = c(200, 1000, 1000)),
value = c(red, green, orange),
box = rep(c(0.4, 0.7, 1.0), times = c(200, 1000, 1000))
)
col_vector <- c("#72F281", "#357BF0", "#F0624D")
xx %>%
ggplot(aes(x = value, color = group, fill = group)) +
geom_density(alpha = 0.4) +
geom_boxplot(aes(x = value, y = box, alpha = 0.8), color = "black") +
theme_void() +
theme(legend.position = "none") +
labs(x = "", y = "") +
scale_color_manual(values = col_vector) +
scale_fill_manual(values = col_vector)

set.seed(3)
rnorm_1000 <- rnorm(n = 1000, mean = 0, sd = 1)
par(mfrow = c(1, 2))
plot(rnorm_1000, las = 1)
boxplot(rnorm_1000, las = 1)

summary(rnorm_1000)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.056328 -0.684539 0.032338 0.006397 0.676673 3.519299
table(rnorm_1000 > -0.68 & rnorm_1000 < 0.67)
##
## FALSE TRUE
## 504 496
names(summary(rnorm_1000))
## [1] "Min." "1st Qu." "Median" "Mean" "3rd Qu." "Max."
(IQR <- diff(summary(rnorm_1000)[c(2, 5)]))
## 3rd Qu.
## 1.361212
# 计算3/4分位数
summary(rnorm_1000)["3rd Qu."]
## 3rd Qu.
## 0.6766734
# 3/4分位数+1.5倍盒子高度,超过该范围作为异常点
summary(rnorm_1000)["3rd Qu."] + 1.5 * IQR # 2.718491
## 3rd Qu.
## 2.718491
tail(sort(rnorm_1000))
## [1] 2.536236 2.595481 2.635045 2.676632 3.200590 3.519299
boxplot(rnorm_1000, las = 1, ylim = c(2.2, 3.6))
abline(h = 2.718491,
lty = 2,
lwd = 1,
col = "red")
abline(h = 2.676632,
lty = 2,
lwd = 1,
col = "black")
points(x=jitter(rep(1,length(rnorm_1000)), factor = 4), y=rnorm_1000, col="red")
head(sort(rnorm_1000))
## [1] -3.056328 -3.053300 -2.665698 -2.545858 -2.403664 -2.398453
summary(rnorm_1000)["1st Qu."]-1.5*IQR
## 1st Qu.
## -2.726356
boxplot(rnorm_1000, las=1, ylim=c(-3.1,-1.7))
abline(h=-2.726356,lty=2, lwd=1, col="red")
abline(h=-2.665698, lty=2, lwd=1, col="black")
points(x=jitter(rep(1,length(rnorm_1000)), factor = 4), y=rnorm_1000, col="red")

Basic ggplot2 boxplot
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
ggplot(data=mtcars, aes(x=as.factor(cyl),y=mpg))+geom_boxplot()

mtcars %>% ggplot(aes(as.factor(cyl),mpg))+geom_boxplot(fill="navyblue", alpha=0.4)+xlab("cyl")

mtcars %>% ggplot(aes(as.factor(cyl),mpg, fill=as.factor(cyl)))+geom_boxplot(alpha=0.4)+xlab("cyl")

geom_boxplot() parameters
# ?geom_boxplot()
head(mpg)
## # A tibble: 6 × 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compa…
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compa…
## 3 audi a4 2 2008 4 manual(m6) f 20 31 p compa…
## 4 audi a4 2 2008 4 auto(av) f 21 30 p compa…
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compa…
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26 p compa…
mpg %>% ggplot(aes(x=class,y=hwy))+geom_boxplot(color="blue",fill="yellow", alpha=.5)

mpg %>% ggplot(aes(x=class,y=hwy))+geom_boxplot(color="blue",fill="yellow", alpha=.5, notch=TRUE, notchwidth=.5)
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?

mpg %>% ggplot(aes(x=class,y=hwy))+geom_boxplot(color="blue",fill="yellow", alpha=.5, notch=TRUE, notchwidth=.5, outlier.colour = "red", outlier.shape = 19, outlier.size = 3)
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?

mpg %>% ggplot(aes(x=class,y=hwy))+geom_boxplot(color="blue",fill="yellow", alpha=.5, notch=TRUE, notchwidth=.5, outlier.colour = "red", outlier.shape = NA)
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?

Width for sample size
set.seed(10)
names <- c(rep("A",20), rep("B",5), rep("C", 30), rep("D",100))
value <- c(sample(2:5,20, replace = T),sample(4:10,5, replace = T),sample(1:7,30,replace = T), sample(3:8,100, replace = T))
mydata <- data.frame(names,value)
my_xlab <- paste(levels(mydata$names),"\n(N=", table(mydata$names),")",sep="")
my_xlab
## [1] "\n(N=20)" "\n(N=5)" "\n(N=30)" "\n(N=100)"
ggplot(mydata,aes(names,value,fill=names))+geom_boxplot(varwidth = T,alpha=.2)

ggplot(mydata,aes(names,value,fill=names))+geom_boxplot(varwidth = T,alpha=.2)+theme(legend.position = "none")

ggplot(mydata,aes(names,value,fill=names))+geom_boxplot(varwidth = T,alpha=.2)+theme(legend.position = "none")+scale_x_discrete(labels=my_xlab)

Control ggplot2 boxplot colors
p1 <- ggplot(mpg,aes(class,hwy,fill=class))+geom_boxplot()+theme(legend.position = "none")
p1

p2 <- ggplot(mpg,aes(class,hwy))+geom_boxplot(color="red",fill="orange",alpha=.3)+theme(legend.position = "none")
p2

p3 <- ggplot(mpg,aes(class,hwy, fill=class))+geom_boxplot()+theme(legend.position = "none")+scale_fill_brewer(palette="BuPu")
p3

p4 <- ggplot(mpg,aes(class,hwy, fill=class))+geom_boxplot()+theme(legend.position = "none")+scale_fill_brewer(palette="Dark2")
p4

library(patchwork)
(p1|p2)/(p3|p4)

Highlight one boxplot
mpg %>% mutate(type=ifelse(class=="subcompact","Highlighted","Normal")) %>% ggplot(aes(class,hwy,fill=type))+geom_boxplot()

mpg %>% mutate(type=ifelse(class=="subcompact","Highlighted","Normal")) %>% ggplot(aes(class,hwy,fill=type,alpha=type))+geom_boxplot() + scale_fill_manual(values=c("#69b3a2","grey"))+scale_alpha_manual(values=c(.8,.2))+theme(legend.position = "none")

Grouping boxplot
variety <- rep(LETTERS[1:6],each=40)
treatment <- rep(c("high","low"),each=20)
value <- seq(1:240)+sample(1:150,240,replace = T)
mydata <- data.frame(variety, treatment, value)
mydata
## variety treatment value
## 1 A high 103
## 2 A high 22
## 3 A high 69
## 4 A high 33
## 5 A high 54
## 6 A high 34
## 7 A high 56
## 8 A high 148
## 9 A high 113
## 10 A high 127
## 11 A high 38
## 12 A high 72
## 13 A high 87
## 14 A high 78
## 15 A high 87
## 16 A high 149
## 17 A high 122
## 18 A high 110
## 19 A high 164
## 20 A high 75
## 21 A low 35
## 22 A low 91
## 23 A low 93
## 24 A low 119
## 25 A low 174
## 26 A low 109
## 27 A low 58
## 28 A low 36
## 29 A low 67
## 30 A low 60
## 31 A low 60
## 32 A low 141
## 33 A low 55
## 34 A low 138
## 35 A low 167
## 36 A low 107
## 37 A low 64
## 38 A low 144
## 39 A low 173
## 40 A low 44
## 41 B high 93
## 42 B high 132
## 43 B high 96
## 44 B high 90
## 45 B high 141
## 46 B high 131
## 47 B high 153
## 48 B high 150
## 49 B high 81
## 50 B high 161
## 51 B high 65
## 52 B high 64
## 53 B high 117
## 54 B high 154
## 55 B high 199
## 56 B high 165
## 57 B high 95
## 58 B high 80
## 59 B high 198
## 60 B high 95
## 61 B low 184
## 62 B low 155
## 63 B low 161
## 64 B low 129
## 65 B low 137
## 66 B low 178
## 67 B low 132
## 68 B low 197
## 69 B low 216
## 70 B low 132
## 71 B low 118
## 72 B low 143
## 73 B low 166
## 74 B low 172
## 75 B low 111
## 76 B low 191
## 77 B low 120
## 78 B low 106
## 79 B low 198
## 80 B low 178
## 81 C high 133
## 82 C high 106
## 83 C high 113
## 84 C high 147
## 85 C high 214
## 86 C high 179
## 87 C high 187
## 88 C high 135
## 89 C high 162
## 90 C high 206
## 91 C high 109
## 92 C high 153
## 93 C high 230
## 94 C high 204
## 95 C high 105
## 96 C high 111
## 97 C high 157
## 98 C high 175
## 99 C high 232
## 100 C high 190
## 101 C low 225
## 102 C low 195
## 103 C low 236
## 104 C low 129
## 105 C low 223
## 106 C low 119
## 107 C low 150
## 108 C low 194
## 109 C low 238
## 110 C low 170
## 111 C low 194
## 112 C low 247
## 113 C low 192
## 114 C low 240
## 115 C low 163
## 116 C low 171
## 117 C low 163
## 118 C low 266
## 119 C low 226
## 120 C low 243
## 121 D high 264
## 122 D high 170
## 123 D high 228
## 124 D high 213
## 125 D high 133
## 126 D high 239
## 127 D high 241
## 128 D high 181
## 129 D high 257
## 130 D high 197
## 131 D high 259
## 132 D high 202
## 133 D high 268
## 134 D high 178
## 135 D high 184
## 136 D high 154
## 137 D high 168
## 138 D high 171
## 139 D high 267
## 140 D high 200
## 141 D low 160
## 142 D low 169
## 143 D low 200
## 144 D low 250
## 145 D low 203
## 146 D low 247
## 147 D low 204
## 148 D low 274
## 149 D low 174
## 150 D low 220
## 151 D low 285
## 152 D low 172
## 153 D low 273
## 154 D low 223
## 155 D low 304
## 156 D low 287
## 157 D low 206
## 158 D low 232
## 159 D low 164
## 160 D low 285
## 161 E high 191
## 162 E high 304
## 163 E high 228
## 164 E high 249
## 165 E high 225
## 166 E high 256
## 167 E high 299
## 168 E high 190
## 169 E high 283
## 170 E high 310
## 171 E high 227
## 172 E high 184
## 173 E high 206
## 174 E high 180
## 175 E high 179
## 176 E high 280
## 177 E high 290
## 178 E high 198
## 179 E high 314
## 180 E high 200
## 181 E low 226
## 182 E low 238
## 183 E low 324
## 184 E low 273
## 185 E low 203
## 186 E low 195
## 187 E low 204
## 188 E low 225
## 189 E low 327
## 190 E low 329
## 191 E low 231
## 192 E low 253
## 193 E low 241
## 194 E low 230
## 195 E low 238
## 196 E low 285
## 197 E low 292
## 198 E low 303
## 199 E low 242
## 200 E low 343
## 201 F high 345
## 202 F high 223
## 203 F high 221
## 204 F high 350
## 205 F high 353
## 206 F high 277
## 207 F high 329
## 208 F high 223
## 209 F high 275
## 210 F high 227
## 211 F high 344
## 212 F high 323
## 213 F high 360
## 214 F high 357
## 215 F high 273
## 216 F high 219
## 217 F high 338
## 218 F high 220
## 219 F high 344
## 220 F high 255
## 221 F low 292
## 222 F low 232
## 223 F low 246
## 224 F low 303
## 225 F low 317
## 226 F low 336
## 227 F low 347
## 228 F low 362
## 229 F low 303
## 230 F low 248
## 231 F low 364
## 232 F low 338
## 233 F low 308
## 234 F low 293
## 235 F low 247
## 236 F low 326
## 237 F low 354
## 238 F low 306
## 239 F low 290
## 240 F low 255
ggplot(mydata, aes(variety,value,fill=treatment))+geom_boxplot()

ggplot(mydata, aes(variety,value,fill=treatment))+geom_boxplot()+facet_wrap(~treatment)

ggplot(mydata, aes(variety,value,fill=treatment))+geom_boxplot()+facet_wrap(~variety)

ggplot(mydata, aes(variety,value,fill=treatment))+geom_boxplot()+facet_wrap(~variety, scales="free_x")

ggplot(mydata, aes(variety,value,fill=treatment))+geom_boxplot()+facet_wrap(~variety, scales="free")

Adding the mean with stat_summary()
names <- c(rep("A",20),rep("B",8),rep("C",30),rep("D",80))
value <-
c(
sample(2:5, 20, replace = T),
sample(4:10, 8, replace = T),
sample(1:7, 30, replace = T),
sample(3:8, 80, replace = T)
)
mydata <- data.frame(names,value)
ggplot(mydata,aes(names,value,fill=names))+geom_boxplot(alpha=.7)

ggplot(mydata,aes(names,value,fill=names))+geom_boxplot(alpha=.7)+stat_summary(fun=mean,geom="point",shape=20,size=10,color="yellow")

ggplot(mydata,aes(names,value,fill=names))+geom_boxplot(alpha=.7)+stat_summary(fun=mean,geom="point",shape=20,size=10,color="yellow")+theme(legend.position = "none")+scale_fill_brewer(palette="Set1")

ggplot(mydata,aes(names,value,fill=names))+geom_boxplot(alpha=.7)+stat_summary(fun=mean,geom="point",shape=20,size=10,color="yellow")+theme(legend.position = "none")+scale_fill_brewer(palette="Set1")+stat_boxplot(geom="errorbar",width=0.5)

Adding geom_point, geom_jitter, geom_dotplot
mydata <- data.frame(name=c(rep("A",500),rep("B",500), rep("C",20), rep("D",100)), value=c(rnorm(500,10,5),rnorm(500,13,1),rnorm(20,25,4),rnorm(100,12,1)))
mydata %>% ggplot(aes(name,value,fill=name))+geom_boxplot()+geom_point()+theme(legend.position = "none", plot.title = element_text(size=11))+ggtitle("A boxplot with geom_point")+ xlab("")

mydata %>% ggplot(aes(name,value,fill=name))+geom_boxplot()+geom_jitter(color="black",size=.4,alpha=.9)+theme(legend.position = "none", plot.title = element_text(size=11))+ggtitle("A boxplot with geom_point")+ xlab("")

mydata %>% ggplot(aes(name,value,fill=name))+geom_jitter(color="black",size=.8,alpha=.6)+geom_boxplot(alpha=.5)+theme(legend.position = "none", plot.title = element_text(size=11))+ggtitle("A boxplot with geom_point")+ xlab("")

Boxplot with dot plot
ggplot(ToothGrowth, aes(dose, len, group=factor(dose)))+geom_boxplot()

ggplot(ToothGrowth, aes(dose, len, group=factor(dose)))+geom_boxplot()+geom_jitter()

ggplot(ToothGrowth, aes(dose, len, group=factor(dose)))+geom_boxplot()+geom_dotplot(binaxis = 'y',stackdir="center",dotsize=.8)
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.

Boxplots in the margins with ggMarginal
library(ggExtra)
p <- ggplot(mtcars, aes(wt,mpg,color=cyl,size=2))+geom_point()+theme(legend.position = "none")
ggMarginal(p,type="boxplot")
## Warning: Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
## Continuous x aesthetic
## ℹ did you forget `aes(group = ...)`?
ggMarginal(p,type="density")
ggMarginal(p,type="histogram")
ggMarginal(p,type="violin")
ggMarginal(p,type="densigram")
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## ℹ The deprecated feature was likely used in the ggExtra package.
## Please report the issue at <https://github.com/daattali/ggExtra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Putting it all together
set.seed(1)
names <- c(rep("A",40),rep("D",20),rep("C",60),rep("B",200))
value <- c(rnorm(40,3.5),rnorm(20,7),rnorm(60,4,sd=2),rnorm(200,6))
mydata <- data.frame(names,value)
my_xlab <- paste(unique(mydata$names),"\n(N=",table(mydata$names),")",sep="")
mydata %>% mutate(type = ifelse(names=="B","Highlighted","Normal")) %>% ggplot(aes(names,value,fill=type))+geom_jitter(size=1, color="black", alpha=.3, width=.2)+geom_boxplot(alpha=.7, varwidth=T, notch=T, notchwidth = .4, outlier.color = "red", outlier.fill = "orange", outlier.size = 2)

mydata %>% mutate(type=ifelse(names=="B","Highlighted","Normal")) %>% ggplot(aes(names,value,fill=type))+geom_jitter(size=1, color="black", alpha=.3, width=.2)+geom_boxplot(alpha=.7, varwidth=T, notch=T, notchwidth=.4, outlier.color = "red", outlier.fill = "orange", outlier.size = 2) + scale_fill_manual(values=c("#DE6B45","#45DEBA")) + theme(legend.position = "none") + scale_x_discrete(labels=my_xlab) + labs(x="",y="") + stat_summary(fun = mean, geom = "point", shape=23, size=3, color= "darkgreen", fill="orange")
