library(ggplot2)
library(gcookbook) # Load gcookbook for the pg_mean data set
pg_mean group weight
1 ctrl 5.032
2 trt1 4.661
3 trt2 5.526
ggplot(pg_mean, aes(x = group, y = weight)) +
geom_col()使用ggplot()和geom_col(),并指定你想要在 x 轴和 y 轴上显示的变量:
library(ggplot2)
library(gcookbook) # Load gcookbook for the pg_mean data set
pg_mean group weight
1 ctrl 5.032
2 trt1 4.661
3 trt2 5.526
ggplot(pg_mean, aes(x = group, y = weight)) +
geom_col()使用factor()函数转换连续变量为离散变量:
# There's no entry for Time == 6
BOD Time demand
1 1 8.3
2 2 10.3
3 3 19.0
4 4 16.0
5 5 15.6
6 7 19.8
# Time is numeric (continuous)
str(BOD)'data.frame': 6 obs. of 2 variables:
$ Time : num 1 2 3 4 5 7
$ demand: num 8.3 10.3 19 16 15.6 19.8
- attr(*, "reference")= chr "A1.4, p. 270"
ggplot(BOD, aes(x = Time, y = demand)) +
geom_col()# Convert Time to a discrete (categorical) variable with factor()
ggplot(BOD, aes(x = factor(Time), y = demand)) +
geom_col()转换颜色
ggplot(pg_mean, aes(x = group, y = weight)) +
geom_col(fill = "lightblue", colour = "black")数据形式如下:
library(gcookbook) # Load gcookbook for the cabbage_exp data set
cabbage_exp Cultivar Date Weight sd n se
1 c39 d16 3.18 0.9566144 10 0.30250803
2 c39 d20 2.80 0.2788867 10 0.08819171
3 c39 d21 2.74 0.9834181 10 0.31098410
4 c52 d16 2.26 0.4452215 10 0.14079141
5 c52 d20 3.11 0.7908505 10 0.25008887
6 c52 d21 1.47 0.2110819 10 0.06674995
ggplot(cabbage_exp, aes(x = Date, y = Weight)) +
geom_col()需要进行分组,把分组信息映射到填充颜色上。
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(position = "dodge")最基本的条形图在 x 轴上有一个分类变量,在 y 轴上有一个连续变量。有时,除了 x 轴上的变量之外,您还想使用另一个分类变量来划分数据。您可以通过将该变量映射到填充颜色来生成分组条形图,该颜色表示条形的填充颜色。您还必须使用position = “dodge”,它告诉条形图在水平方向上”躲避”彼此;如果不这样做,您将得到一个堆叠的条形图。
美化:
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(position = "dodge", colour = "black") +
scale_fill_brewer(palette = "Pastel1")数据结构:
head(diamonds)# A tibble: 6 × 10
carat cut color clarity depth table price x y z
<dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
4 0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63
5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
需要对不同cut品质的钻石进行计数。
使用 geom_bar() 函数而不映射任何变量到 y:
# Equivalent to using geom_bar(stat = "bin")
ggplot(diamonds, aes(x = cut)) +
geom_bar()美化:
ggplot(diamonds, aes(x = cut)) +
geom_bar(colour = "black", fill = "lightblue")数据结构:
library(gcookbook) # Load gcookbook for the uspopchange data set
library(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
upc <- uspopchange %>%
arrange(desc(Change)) %>%
slice(1:10)
upc State Abb Region Change
1 Nevada NV West 35.1
2 Arizona AZ West 24.6
3 Utah UT West 23.8
4 Idaho ID West 21.1
5 Texas TX South 20.6
6 North Carolina NC South 18.5
7 Georgia GA South 18.3
8 Florida FL South 17.6
9 Colorado CO West 16.9
10 South Carolina SC South 15.3
对Region变量进行颜色区分:
ggplot(upc, aes(x = Abb, y = Change, fill = Region)) +
geom_col()默认的颜色不是最吸引人的,所以你可能想用scale_fill_brewer()或scale_fill_manual()来设置它们。在这个例子中,我们将使用后者,并且用colour="black"把条形的轮廓颜色设置为黑色。注意,设置是在aes()外面进行的,而映射是在aes()里面进行的。
ggplot(upc, aes(x = reorder(Abb, Change), y = Change, fill = Region)) +
geom_col(colour = "black") +
scale_fill_manual(values = c("#669933", "#FFCC66")) +
xlab("State")这个例子还使用了reorder()函数,根据Change的值重新排序Abb因子的级别。在这种情况下,按高度排序条形图比按字母顺序排序更有意义。
首先给是否为负数打上标签:
library(gcookbook) # Load gcookbook for the climate data set
library(dplyr)
climate_sub <- climate %>%
filter(Source == "Berkeley" & Year >= 1900) %>%
mutate(pos = Anomaly10y >= 0)
head(climate_sub) Source Year Anomaly1y Anomaly5y Anomaly10y Unc10y pos
1 Berkeley 1900 NA NA -0.171 0.108 FALSE
2 Berkeley 1901 NA NA -0.162 0.109 FALSE
3 Berkeley 1902 NA NA -0.177 0.108 FALSE
4 Berkeley 1903 NA NA -0.199 0.104 FALSE
5 Berkeley 1904 NA NA -0.223 0.105 FALSE
6 Berkeley 1905 NA NA -0.241 0.107 FALSE
ggplot(climate_sub, aes(x = Year, y = Anomaly10y, fill = pos)) +
geom_col(position = "identity")使用position = "identity"来避免一些不必要的警告。
library(gcookbook) # Load gcookbook for the pg_mean data set
ggplot(pg_mean, aes(x = group, y = weight)) +
geom_col()ggplot(pg_mean, aes(x = group, y = weight)) +
geom_col(width = 0.5)ggplot(pg_mean, aes(x = group, y = weight)) +
geom_col(width = 1)ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(width = 0.5, position = "dodge")ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(width = 0.5, position = position_dodge(0.7))library(gcookbook) # Load gcookbook for the cabbage_exp data set
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col()反转图例:
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col() +
guides(fill = guide_legend(reverse = TRUE))反转条:
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(position = position_stack(reverse = TRUE)) +
guides(fill = guide_legend(reverse = TRUE))美化:
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(
position = position_stack(reverse = TRUE),
colour = "black"
) +
guides(fill = guide_legend(reverse = TRUE)) +
scale_fill_brewer(palette = "Pastel1")使用geom_col(position = "fill")
library(gcookbook) # Load gcookbook for the cabbage_exp data set
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(position = "fill")替换y轴为百分比:
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(position = "fill") +
scale_y_continuous(labels = scales::percent)美化:
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(colour = "black", position = "fill") +
scale_y_continuous(labels = scales::percent) +
scale_fill_brewer(palette = "Pastel1")不用让ggplot2自动计算比例,你也可以自己计算比例值。如果你想在其他计算中使用这些值,这样做会很有用。
要做到这一点,首先要把数据按每个堆叠的比例缩放到 100%。这可以通过使用dplyr包中的group_by()和mutate()函数来实现。
library(gcookbook)
library(dplyr)
cabbage_exp Cultivar Date Weight sd n se
1 c39 d16 3.18 0.9566144 10 0.30250803
2 c39 d20 2.80 0.2788867 10 0.08819171
3 c39 d21 2.74 0.9834181 10 0.31098410
4 c52 d16 2.26 0.4452215 10 0.14079141
5 c52 d20 3.11 0.7908505 10 0.25008887
6 c52 d21 1.47 0.2110819 10 0.06674995
# Do a group-wise transform(), splitting on "Date"
ce <- cabbage_exp %>%
group_by(Date) %>%
mutate(percent_weight = Weight / sum(Weight) * 100)
ce# A tibble: 6 × 7
# Groups: Date [3]
Cultivar Date Weight sd n se percent_weight
<fct> <fct> <dbl> <dbl> <int> <dbl> <dbl>
1 c39 d16 3.18 0.957 10 0.303 58.5
2 c39 d20 2.8 0.279 10 0.0882 47.4
3 c39 d21 2.74 0.983 10 0.311 65.1
4 c52 d16 2.26 0.445 10 0.141 41.5
5 c52 d20 3.11 0.791 10 0.250 52.6
6 c52 d21 1.47 0.211 10 0.0667 34.9
要计算每个Weight组内的百分比,我们使用了dplyr的group_by()和mutate()函数。在这里的例子中,group_by()函数告诉dplyr,未来的操作应该像是在Date列上分组的数据框一样进行。mutate()函数告诉它计算一个新的列,把每一行的Weight值除以每个组内的Weight列的总和。
ggplot(ce, aes(x = Date, y = percent_weight, fill = Cultivar)) +
geom_col()library(gcookbook) # Load gcookbook for the cabbage_exp data set
# Below the top
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) +
geom_col() +
geom_text(aes(label = Weight), vjust = 1.5, colour = "white")# Above the top
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) +
geom_col() +
geom_text(aes(label = Weight), vjust = -0.2)另一种常见的情况是给柱状图添加计数的标签,而不是数值。要做到这一点,使用geom_bar(),它会添加高度与行数成比例的柱子,然后使用geom_text()和计数:
ggplot(mtcars, aes(x = factor(cyl))) +
geom_bar() +
geom_text(
aes(label = after_stat(count)),
stat = "count",
vjust = 1.5, colour = "white"
)通过设置垂直对齐(vjust),标签可以出现在柱子的上方或下方。这样做的一个缺点是,当标签在柱子的顶部时,它可能会超出绘图区域的上边界。要解决这个问题,你可以手动设置y轴的限制,或者你可以设置文本在柱子上方的y位置,而不改变垂直对齐。改变文本的y位置的一个缺点是,如果你想把文本完全放在柱子的上方或下方,你需要添加的值将取决于数据的y范围;相反,改变vjust的值将总是让文本相对于柱子的高度移动相同的距离:
# Adjust y limits to be a little higher
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) +
geom_col() +
geom_text(aes(label = Weight), vjust = -0.2) +
ylim(0, max(cabbage_exp$Weight) * 1.05)# Map y positions slightly above bar top - y range of plot will auto-adjust
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) +
geom_col() +
geom_text(aes(y = Weight + 0.1, label = Weight))给堆叠柱状图添加标签需要找到每个堆叠的累积和。要做到这一点,首先要确保数据按正确的顺序排序——如果没有,累积和可能会按错误的顺序计算。我们将使用dplyr包中的arrange()函数。注意,我们必须使用rev()函数来反转Cultivar的顺序(如果c39组堆叠在上,需要改变其标签的高度)。
library(dplyr)
# Sort by the Date and Cultivar columns
ce <- cabbage_exp %>%
arrange(Date, rev(Cultivar))# Get the cumulative sum
ce <- ce %>%
group_by(Date) %>%
mutate(label_y = cumsum(Weight))
ce# A tibble: 6 × 7
# Groups: Date [3]
Cultivar Date Weight sd n se label_y
<fct> <fct> <dbl> <dbl> <int> <dbl> <dbl>
1 c52 d16 2.26 0.445 10 0.141 2.26
2 c39 d16 3.18 0.957 10 0.303 5.44
3 c52 d20 3.11 0.791 10 0.250 3.11
4 c39 d20 2.8 0.279 10 0.0882 5.91
5 c52 d21 1.47 0.211 10 0.0667 1.47
6 c39 d21 2.74 0.983 10 0.311 4.21
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col() +
geom_text(aes(y = label_y, label = Weight), vjust = 1.5, colour = "white")如果想在中间标签,同理:
ce <- cabbage_exp %>%
arrange(Date, rev(Cultivar))
# Calculate y position, placing it in the middle
ce <- ce %>%
group_by(Date) %>%
mutate(label_y = cumsum(Weight) - 0.5 * Weight)
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col() +
geom_text(aes(y = label_y, label = Weight), colour = "white")我们将改变颜色,用size设置较小的字体在中间添加标签,用paste添加"kg”后缀,并用format()确保小数点后总是有两位数字。
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(colour = "black") +
geom_text(aes(y = label_y, label = paste(format(Weight, nsmall = 2), "kg")), size = 4) +
scale_fill_brewer(palette = "Pastel1")library(gcookbook) # Load gcookbook for the tophitters2001 data set
tophit <- tophitters2001[1:25, ] # Take the top 25 from the tophitters data set
ggplot(tophit, aes(x = avg, y = name)) +
geom_point()筛选数据:
tophit[, c("name", "lg", "avg")] name lg avg
1 Larry Walker NL 0.3501
2 Ichiro Suzuki AL 0.3497
3 Jason Giambi AL 0.3423
4 Roberto Alomar AL 0.3357
5 Todd Helton NL 0.3356
6 Moises Alou NL 0.3314
7 Lance Berkman NL 0.3310
8 Bret Boone AL 0.3307
9 Frank Catalanotto AL 0.3305
10 Chipper Jones NL 0.3304
11 Albert Pujols NL 0.3288
12 Barry Bonds NL 0.3277
13 Sammy Sosa NL 0.3276
14 Juan Pierre NL 0.3274
15 Juan Gonzalez AL 0.3252
16 Luis Gonzalez NL 0.3251
17 Rich Aurilia NL 0.3239
18 Paul Lo Duca NL 0.3196
19 Jose Vidro NL 0.3189
20 Alex Rodriguez AL 0.3180
21 Cliff Floyd NL 0.3171
22 Shannon Stewart AL 0.3156
23 Jeff Cirillo NL 0.3125
24 Jeff Conine AL 0.3111
25 Derek Jeter AL 0.3111
排序 + 换绘图风格:
ggplot(tophit, aes(x = avg, y = reorder(name, avg))) +
geom_point(size = 3) + # Use a larger dot
theme_bw() +
theme(
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(colour = "grey60", linetype = "dashed")
)交换变量:
ggplot(tophit, aes(x = reorder(name, avg), y = avg)) +
geom_point(size = 3) + # Use a larger dot
theme_bw() +
theme(
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_line(colour = "grey60", linetype = "dashed"),
axis.text.x = element_text(angle = 60, hjust = 1)
)分组排序:
# Get the names, sorted first by lg, then by avg
nameorder <- tophit$name[order(tophit$lg, tophit$avg)]
# Turn name into a factor, with levels in the order of nameorder
tophit$name <- factor(tophit$name, levels = nameorder)ggplot(tophit, aes(x = avg, y = name)) +
geom_segment(aes(yend = name), xend = 0, colour = "grey50") +
geom_point(size = 3, aes(colour = lg)) +
scale_colour_brewer(palette = "Set1", limits = c("NL", "AL")) +
theme_bw() +
theme(
panel.grid.major.y = element_blank(), # No horizontal grid lines
legend.position = c(1, 0.55), # Put legend inside plot area
legend.justification = c(1, 0.5)
)ggplot(tophit, aes(x = avg, y = name)) +
geom_segment(aes(yend = name), xend = 0, colour = "grey50") +
geom_point(size = 3, aes(colour = lg)) +
scale_colour_brewer(palette = "Set1", limits = c("NL", "AL"), guide = "none") +
theme_bw() +
theme(panel.grid.major.y = element_blank()) +
facet_grid(lg ~ ., scales = "free_y", space = "free_y")devtools::session_info()─ Session info ───────────────────────────────────────────────────────────────
setting value
version R version 4.3.1 (2023-06-16 ucrt)
os Windows 11 x64 (build 22621)
system x86_64, mingw32
ui RTerm
language (EN)
collate Chinese (Simplified)_China.utf8
ctype Chinese (Simplified)_China.utf8
tz Asia/Hong_Kong
date 2023-11-04
pandoc 3.1.9 @ C:/Users/HANWAN~1/AppData/Local/Pandoc/ (via rmarkdown)
─ Packages ───────────────────────────────────────────────────────────────────
package * version date (UTC) lib source
cachem 1.0.8 2023-05-01 [1] CRAN (R 4.3.1)
callr 3.7.3 2022-11-02 [1] CRAN (R 4.3.1)
cli 3.6.1 2023-03-23 [1] CRAN (R 4.3.1)
colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.3.1)
crayon 1.5.2 2022-09-29 [1] CRAN (R 4.3.1)
devtools 2.4.5 2022-10-11 [1] CRAN (R 4.3.1)
digest 0.6.33 2023-07-07 [1] CRAN (R 4.3.1)
dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.3.1)
ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.3.1)
evaluate 0.21 2023-05-05 [1] CRAN (R 4.3.1)
fansi 1.0.4 2023-01-22 [1] CRAN (R 4.3.1)
farver 2.1.1 2022-07-06 [1] CRAN (R 4.3.1)
fastmap 1.1.1 2023-02-24 [1] CRAN (R 4.3.1)
fs 1.6.3 2023-07-20 [1] CRAN (R 4.3.1)
gcookbook * 2.0 2018-11-07 [1] CRAN (R 4.3.1)
generics 0.1.3 2022-07-05 [1] CRAN (R 4.3.1)
ggplot2 * 3.4.2 2023-04-03 [1] CRAN (R 4.3.1)
glue 1.6.2 2022-02-24 [1] CRAN (R 4.3.1)
gtable 0.3.3 2023-03-21 [1] CRAN (R 4.3.1)
htmltools 0.5.5 2023-03-23 [1] CRAN (R 4.3.1)
htmlwidgets 1.6.2 2023-03-17 [1] CRAN (R 4.3.1)
httpuv 1.6.11 2023-05-11 [1] CRAN (R 4.3.1)
jsonlite 1.8.7 2023-06-29 [1] CRAN (R 4.3.1)
knitr 1.43 2023-05-25 [1] CRAN (R 4.3.1)
labeling 0.4.2 2020-10-20 [1] CRAN (R 4.3.0)
later 1.3.1 2023-05-02 [1] CRAN (R 4.3.1)
lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.3.1)
magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.3.1)
memoise 2.0.1 2021-11-26 [1] CRAN (R 4.3.1)
mime 0.12 2021-09-28 [1] CRAN (R 4.3.0)
miniUI 0.1.1.1 2018-05-18 [1] CRAN (R 4.3.1)
munsell 0.5.0 2018-06-12 [1] CRAN (R 4.3.1)
pillar 1.9.0 2023-03-22 [1] CRAN (R 4.3.1)
pkgbuild 1.4.2 2023-06-26 [1] CRAN (R 4.3.1)
pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.3.1)
pkgload 1.3.2.1 2023-07-08 [1] CRAN (R 4.3.1)
prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.3.1)
processx 3.8.2 2023-06-30 [1] CRAN (R 4.3.1)
profvis 0.3.8 2023-05-02 [1] CRAN (R 4.3.1)
promises 1.2.0.1 2021-02-11 [1] CRAN (R 4.3.1)
ps 1.7.5 2023-04-18 [1] CRAN (R 4.3.1)
purrr 1.0.1 2023-01-10 [1] CRAN (R 4.3.1)
R6 2.5.1 2021-08-19 [1] CRAN (R 4.3.1)
RColorBrewer 1.1-3 2022-04-03 [1] CRAN (R 4.3.0)
Rcpp 1.0.11 2023-07-06 [1] CRAN (R 4.3.1)
remotes 2.4.2.1 2023-07-18 [1] CRAN (R 4.3.1)
rlang 1.1.1 2023-04-28 [1] CRAN (R 4.3.1)
rmarkdown 2.23 2023-07-01 [1] CRAN (R 4.3.1)
rstudioapi 0.15.0 2023-07-07 [1] CRAN (R 4.3.1)
scales 1.2.1 2022-08-20 [1] CRAN (R 4.3.1)
sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.3.1)
shiny 1.7.4.1 2023-07-06 [1] CRAN (R 4.3.1)
stringi 1.7.12 2023-01-11 [1] CRAN (R 4.3.0)
stringr 1.5.0 2022-12-02 [1] CRAN (R 4.3.1)
tibble 3.2.1 2023-03-20 [1] CRAN (R 4.3.1)
tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.3.1)
urlchecker 1.0.1 2021-11-30 [1] CRAN (R 4.3.1)
usethis 2.2.2 2023-07-06 [1] CRAN (R 4.3.1)
utf8 1.2.3 2023-01-31 [1] CRAN (R 4.3.1)
vctrs 0.6.3 2023-06-14 [1] CRAN (R 4.3.1)
withr 2.5.0 2022-03-03 [1] CRAN (R 4.3.1)
xfun 0.39 2023-04-20 [1] CRAN (R 4.3.1)
xtable 1.8-4 2019-04-21 [1] CRAN (R 4.3.1)
yaml 2.3.7 2023-01-23 [1] CRAN (R 4.3.0)
[1] C:/Users/Han Wang/AppData/Local/R/win-library/4.3
[2] C:/Program Files/R/R-4.3.1/library
──────────────────────────────────────────────────────────────────────────────