安装ggplot2包、安装gcookbook包,加载ggplot2包、加载本书的数据包

# install.packages('ggplot2') install.packages('gcookbook')
library(ggplot2)
library(gcookbook)

第三章 条形图

3.1 绘制基本条形图

fill:设置条形图的填充色,colour:设置条形图的边框线颜色

pg_mean
##   group weight
## 1  ctrl  5.032
## 2  trt1  4.661
## 3  trt2  5.526
ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity")

BOD
##   Time demand
## 1    1    8.3
## 2    2   10.3
## 3    3   19.0
## 4    4   16.0
## 5    5   15.6
## 6    7   19.8
str(BOD)
## 'data.frame':    6 obs. of  2 variables:
##  $ Time  : num  1 2 3 4 5 7
##  $ demand: num  8.3 10.3 19 16 15.6 19.8
##  - attr(*, "reference")= chr "A1.4, p. 270"
ggplot(data = BOD, mapping = aes(x = Time, y = demand)) + geom_bar(stat = "identity")

ggplot(data = BOD, mapping = aes(x = factor(Time), y = demand)) + geom_bar(stat = "identity")

ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity", fill = "lightblue", 
    colour = "black")

3.2 绘制簇状条形图

默认值position=“stack”设置堆积条形图,position=“dodge”设置为并排的条形图

cabbage_exp
##   Cultivar Date Weight        sd  n         se
## 1      c39  d16   3.18 0.9566144 10 0.30250803
## 2      c39  d20   2.80 0.2788867 10 0.08819171
## 3      c39  d21   2.74 0.9834181 10 0.31098410
## 4      c52  d16   2.26 0.4452215 10 0.14079141
## 5      c52  d20   3.11 0.7908505 10 0.25008887
## 6      c52  d21   1.47 0.2110819 10 0.06674995
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity", 
    position = "dodge")

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity", 
    position = "stack")

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity", 
    position = "dodge", colour = "black") + scale_fill_brewer(palette = "Pastel1")

ce <- cabbage_exp[1:5, ]
ce
##   Cultivar Date Weight        sd  n         se
## 1      c39  d16   3.18 0.9566144 10 0.30250803
## 2      c39  d20   2.80 0.2788867 10 0.08819171
## 3      c39  d21   2.74 0.9834181 10 0.31098410
## 4      c52  d16   2.26 0.4452215 10 0.14079141
## 5      c52  d20   3.11 0.7908505 10 0.25008887
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity", 
    position = "dodge", colour = "black") + scale_fill_brewer(palette = "Pastel1")

3.3 绘制频数条形图

cut为离散型变量,得到直方图

head(diamonds)
##   carat       cut color clarity depth table price    x    y    z
## 1  0.23     Ideal     E     SI2  61.5    55   326 3.95 3.98 2.43
## 2  0.21   Premium     E     SI1  59.8    61   326 3.89 3.84 2.31
## 3  0.23      Good     E     VS1  56.9    65   327 4.05 4.07 2.31
## 4  0.29   Premium     I     VS2  62.4    58   334 4.20 4.23 2.63
## 5  0.31      Good     J     SI2  63.3    58   335 4.34 4.35 2.75
## 6  0.24 Very Good     J    VVS2  62.8    57   336 3.94 3.96 2.48
ggplot(diamonds, aes(x = cut)) + geom_bar()

carat为连续型变量,得到的条形图

ggplot(diamonds, aes(x = carat)) + geom_bar(binwidth = 0.25)

3.4 给条形图着色

upc <- subset(uspopchange, rank(Change) > 40)
head(upc)
##       State Abb Region Change
## 3   Arizona  AZ   West   24.6
## 6  Colorado  CO   West   16.9
## 10  Florida  FL  South   17.6
## 11  Georgia  GA  South   18.3
## 13    Idaho  ID   West   21.1
## 29   Nevada  NV   West   35.1
ggplot(upc, aes(x = reorder(Abb, Change), y = Change, fill = Region)) + geom_bar(stat = "identity")

ggplot(upc, aes(x = reorder(Abb, Change), y = Change, fill = Region)) + geom_bar(stat = "identity", 
    colour = "black") + scale_fill_manual(values = c("#669933", "#FFCC66")) + 
    xlab("State")

3.5 对正负条形图分别着色

csub <- subset(climate, Source == "Berkeley" & Year >= 1900)
csub$pos <- csub$Anomaly10y >= 0
head(csub)
##       Source Year Anomaly1y Anomaly5y Anomaly10y Unc10y   pos
## 101 Berkeley 1900        NA        NA     -0.171  0.108 FALSE
## 102 Berkeley 1901        NA        NA     -0.162  0.109 FALSE
## 103 Berkeley 1902        NA        NA     -0.177  0.108 FALSE
## 104 Berkeley 1903        NA        NA     -0.199  0.104 FALSE
## 105 Berkeley 1904        NA        NA     -0.223  0.105 FALSE
## 106 Berkeley 1905        NA        NA     -0.241  0.107 FALSE
ggplot(csub, aes(x = Year, y = Anomaly10y, fill = pos)) + geom_bar(stat = "identity", 
    position = "identity")

ggplot(csub, aes(x = Year, y = Anomaly10y, fill = pos)) + geom_bar(stat = "identity", 
    position = "identity", colour = "black", size = 0.25) + scale_fill_manual(values = c("#CCEEFF", 
    "#FFDDDD"), guide = FALSE)

3.6 调整条形宽度和条形间距

3.6.1 通过设定geom_bar()函数的参数width可以使条形变得更宽或者更窄,默认值为0.9,数值越大,条形越宽

p <- ggplot(data = pg_mean, mapping = aes(x = group, y = weight))
p + geom_bar(stat = "identity") + ggtitle("条形默认宽度width=0.9")

p + geom_bar(stat = "identity", width = 0.5) + ggtitle("条形宽度width=0.5")

p + geom_bar(stat = "identity", width = 1) + ggtitle("条形宽度width=1")

p + geom_bar(stat = "identity", fill = NA, colour = "black", width = 1.5) + 
    ggtitle("条形宽度width=1.5")

3.6.2 簇状条形图默认的组内间距为0,可通过将width设定小一些,并令position_dodge的取值大于width

下面的4个命令是等价的
  • geom_bar(position=“dodge”)
  • geom_bar(width=0.9,position=position_dodge())
  • geom_bar(position=position_dodge(0.9))
  • geom_bar(width=0.9,position=position_dodge(width=0.9))
p <- ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar))
p + geom_bar(stat = "identity", position = "dodge") + ggtitle("width=0.9")

p + geom_bar(stat = "identity", width = 0.9, position = position_dodge()) + 
    ggtitle("width=0.9,position=position_dodge(width=0.9)")

p + geom_bar(stat = "identity", width = 0.5, position = "dodge") + ggtitle("width=0.5")

p + geom_bar(stat = "identity", width = 0.5, position = position_dodge()) + 
    ggtitle("width=0.5,position=position_dodge(width=0.9)")

p + geom_bar(stat = "identity", width = 0.5, position = position_dodge(width = 1.2)) + 
    ggtitle("width=0.5,position=position_dodge(width=1.2)")

p + geom_bar(stat = "identity", width = 0.5, position = position_dodge(width = 0.7)) + 
    ggtitle("width=0.5,position=position_dodge(width=0.7)")

p + geom_bar(stat = "identity", width = 0.5, position = position_dodge(width = 0.5)) + 
    ggtitle("width=0.5,position=position_dodge(width=0.5)")

p + geom_bar(stat = "identity", width = 0.5, position = position_dodge(width = 0.3)) + 
    ggtitle("width=0.5,position=position_dodge(width=0.3)")

p + geom_bar(stat = "identity", width = 0.5, position = position_dodge(width = 0)) + 
    ggtitle("width=0.5,position=position_dodge(width=0)")

p + geom_bar(stat = "identity", width = 0.9, position = position_dodge(width = 0)) + 
    ggtitle("width=0.9,position=position_dodge(width=0)")

p + geom_bar(stat = "identity", width = 1.2, position = position_dodge(width = 0)) + 
    ggtitle("width=1.2,position=position_dodge(width=0)")

p + geom_bar(stat = "identity", width = 0.2, position = position_dodge(width = 0.9)) + 
    ggtitle("width=1.2,position=position_dodge(width=0)")

3.7 绘制堆积条形图

默认的图例顺序与条形不一致

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity")

guides(fill=guide_legend(reverse=TRUE))调整图例顺序,取默认顺序的逆序

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity") + 
    guides(fill = guide_legend(reverse = TRUE))

library(plyr)
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar, order = desc(Cultivar))) + 
    geom_bar(stat = "identity")

增加边框,指定调色板

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity", 
    colour = "black") + guides(fill = guide_legend(reverse = TRUE)) + scale_fill_brewer(palette = "Pastel1")

3.8 绘制百分比堆积条形图

首先,通过plyr包中的ddply()函数和transform()函数将每组条形对应的数据标准化为100%的格式,之后按得到的结果绘制堆积条形图

ce <- ddply(.data = cabbage_exp, .variables = "Date", .fun = transform, percent_weight = Weight/sum(Weight) * 
    100)
head(cabbage_exp)
##   Cultivar Date Weight        sd  n         se
## 1      c39  d16   3.18 0.9566144 10 0.30250803
## 2      c39  d20   2.80 0.2788867 10 0.08819171
## 3      c39  d21   2.74 0.9834181 10 0.31098410
## 4      c52  d16   2.26 0.4452215 10 0.14079141
## 5      c52  d20   3.11 0.7908505 10 0.25008887
## 6      c52  d21   1.47 0.2110819 10 0.06674995
head(ce)
##   Cultivar Date Weight        sd  n         se percent_weight
## 1      c39  d16   3.18 0.9566144 10 0.30250803       58.45588
## 2      c52  d16   2.26 0.4452215 10 0.14079141       41.54412
## 3      c39  d20   2.80 0.2788867 10 0.08819171       47.37733
## 4      c52  d20   3.11 0.7908505 10 0.25008887       52.62267
## 5      c39  d21   2.74 0.9834181 10 0.31098410       65.08314
## 6      c52  d21   1.47 0.2110819 10 0.06674995       34.91686
ggplot(ce, aes(x = Date, y = percent_weight, fill = Cultivar)) + geom_bar(stat = "identity")

ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity", 
    colour = "black") + guides(fill = guide_legend(reverse = TRUE)) + scale_fill_brewer(palette = "Pastel1")

3.9 添加数据标签

在绘图命令中加上geom_text()即可为条形图添加数据标签,需要分别指定一个变量映射给x、y和标签,通过设定vjust(调整数据标签垂直位置)可以将标签位置移动至条形图顶端的上方或者下方

3.9.1 给简单条形图添加数据标签

数据标签在条形顶端下方,vjust取正值,数据标签在条形顶端上方,vjust取负值

ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) + geom_bar(stat = "identity") + 
    geom_text(aes(label = Weight), vjust = 1.5, colour = "white")

ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) + geom_bar(stat = "identity") + 
    geom_text(aes(label = Weight), vjust = -0.5, colour = "black")

3.9.2 给簇状条形图添加数据标签
geom_bar和geom_text需同时加position=position_dodge(0.9)
ggplot(cabbage_exp, aes(x = Date, Cultivar, y = Weight, fill = Cultivar)) + 
    geom_bar(stat = "identity", position = position_dodge(0.9)) + geom_text(aes(label = Weight), 
    vjust = -0.5, colour = "black", position = position_dodge(0.9))

ggplot(cabbage_exp, aes(x = Date, Cultivar, y = Weight, fill = Cultivar)) + 
    geom_bar(stat = "identity", position = position_dodge(0.9)) + geom_text(aes(label = Weight), 
    vjust = 1.5, colour = "black", position = position_dodge(0.9))

ggplot(cabbage_exp, aes(x = Date, Cultivar, y = Weight, fill = Cultivar)) + 
    geom_bar(stat = "identity", position = position_dodge(0.9)) + geom_text(aes(label = Weight, 
    y = 0.1), colour = "black", position = position_dodge(0.9))

3.9.3 给堆积条形图添加数据标签
向堆积条形图添加数据标签前,需要先对每组条形的数据进行累积求和
library(plyr)
ce <- arrange(cabbage_exp, Date, Cultivar)
ce
##   Cultivar Date Weight        sd  n         se
## 1      c39  d16   3.18 0.9566144 10 0.30250803
## 2      c52  d16   2.26 0.4452215 10 0.14079141
## 3      c39  d20   2.80 0.2788867 10 0.08819171
## 4      c52  d20   3.11 0.7908505 10 0.25008887
## 5      c39  d21   2.74 0.9834181 10 0.31098410
## 6      c52  d21   1.47 0.2110819 10 0.06674995
cabbage_exp
##   Cultivar Date Weight        sd  n         se
## 1      c39  d16   3.18 0.9566144 10 0.30250803
## 2      c39  d20   2.80 0.2788867 10 0.08819171
## 3      c39  d21   2.74 0.9834181 10 0.31098410
## 4      c52  d16   2.26 0.4452215 10 0.14079141
## 5      c52  d20   3.11 0.7908505 10 0.25008887
## 6      c52  d21   1.47 0.2110819 10 0.06674995
ce1 <- ddply(ce, "Date", transform, label_y = cumsum(Weight))
ce1
##   Cultivar Date Weight        sd  n         se label_y
## 1      c39  d16   3.18 0.9566144 10 0.30250803    3.18
## 2      c52  d16   2.26 0.4452215 10 0.14079141    5.44
## 3      c39  d20   2.80 0.2788867 10 0.08819171    2.80
## 4      c52  d20   3.11 0.7908505 10 0.25008887    5.91
## 5      c39  d21   2.74 0.9834181 10 0.31098410    2.74
## 6      c52  d21   1.47 0.2110819 10 0.06674995    4.21
ggplot(ce1, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity") + 
    geom_text(aes(y = label_y, label = Weight), vjust = 1.5, colour = "white")

修改填充色、边框,数据标签加单位,调整图例顺序

ggplot(ce1, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity", 
    colour = "black") + geom_text(aes(y = label_y, label = paste(format(Weight, 
    nsmall = 2), "kg")), vjust = 1.5, colour = "black") + guides(fill = guide_legend(reverse = TRUE)) + 
    scale_fill_brewer(palette = "Pastel1")

修改堆叠顺序,最好通过在计算累积和之前修改因子的水平顺序

ce2<-arrange(cabbage_exp,Date,desc(Cultivar))
ce2
##   Cultivar Date Weight        sd  n         se
## 1      c52  d16   2.26 0.4452215 10 0.14079141
## 2      c39  d16   3.18 0.9566144 10 0.30250803
## 3      c52  d20   3.11 0.7908505 10 0.25008887
## 4      c39  d20   2.80 0.2788867 10 0.08819171
## 5      c52  d21   1.47 0.2110819 10 0.06674995
## 6      c39  d21   2.74 0.9834181 10 0.31098410
ce3<-ddply(ce2,"Date",transform,label_y=cumsum(Weight))
ce3
##   Cultivar Date Weight        sd  n         se label_y
## 1      c52  d16   2.26 0.4452215 10 0.14079141    2.26
## 2      c39  d16   3.18 0.9566144 10 0.30250803    5.44
## 3      c52  d20   3.11 0.7908505 10 0.25008887    3.11
## 4      c39  d20   2.80 0.2788867 10 0.08819171    5.91
## 5      c52  d21   1.47 0.2110819 10 0.06674995    1.47
## 6      c39  d21   2.74 0.9834181 10 0.31098410    4.21
ggplot(ce3,aes(x=Date,y=Weight,fill=Cultivar))+geom_bar(stat="identity")+geom_text(aes(y=label_y,label=Weight),vjust=1.5,colour="white")

将数据标签添加到条形中部,须调整累计求和的结果

ce4<-ddply(ce2,"Date",transform,label_y=cumsum(Weight)-Weight/2)
ce4
##   Cultivar Date Weight        sd  n         se label_y
## 1      c52  d16   2.26 0.4452215 10 0.14079141   1.130
## 2      c39  d16   3.18 0.9566144 10 0.30250803   3.850
## 3      c52  d20   3.11 0.7908505 10 0.25008887   1.555
## 4      c39  d20   2.80 0.2788867 10 0.08819171   4.510
## 5      c52  d21   1.47 0.2110819 10 0.06674995   0.735
## 6      c39  d21   2.74 0.9834181 10 0.31098410   2.840
ggplot(ce4,aes(x=Date,y=Weight,fill=Cultivar))+geom_bar(stat="identity")+geom_text(aes(y=label_y,label=Weight),vjust=1.5,colour="white")

3.10 绘制Cleverland点图

使用geom_point()
tophit <- tophitters2001[1:25, ]
head(tophit[, c("name", "lg", "avg")])
##             name lg    avg
## 1   Larry Walker NL 0.3501
## 2  Ichiro Suzuki AL 0.3497
## 3   Jason Giambi AL 0.3423
## 4 Roberto Alomar AL 0.3357
## 5    Todd Helton NL 0.3356
## 6    Moises Alou NL 0.3314
ggplot(tophit, aes(x = avg, y = name)) + geom_point()

ggplot(tophit, aes(x = avg, y = reorder(name, avg))) + geom_point(size = 6)