Topic: 柱状图

library(ggplot2)

# 抽样获取数据
set.seed(100)
keep=sample(1:nrow(diamonds),1000)
dat= diamonds[keep,]
head(dat)
## # A tibble: 6 × 10
##   carat cut     color clarity depth table price     x     y     z
##   <dbl> <ord>   <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  1.26 Ideal   G     SI1      59.6    57  6738  7.08  7.04  4.21
## 2  0.7  Ideal   D     VS2      62.7    57  3448  5.65  5.67  3.55
## 3  0.36 Ideal   F     SI1      62      56   770  4.59  4.54  2.83
## 4  2.1  Premium J     SI2      59.1    58 12494  8.46  8.4   4.98
## 5  1.21 Premium D     SI2      59.7    58  4946  7.06  6.96  4.19
## 6  2    Good    E     SI2      64.7    57 15393  7.75  7.86  5.05
# 一维频数数据
df1=as.data.frame( table(dat$cut) );
colnames(df1)=c("cut", "count")
df1
##         cut count
## 1      Fair    30
## 2      Good    78
## 3 Very Good   242
## 4   Premium   243
## 5     Ideal   407

一维数据的bar图

geom_col() 绘制柱状图: 默认按频数

ggplot(df1, aes(x=cut, y=count))+
  geom_col()

geom_bar() 默认先统计频数,再绘图

ggplot(dat, aes(x=cut))+
  geom_bar()

# 设置 identity="identity" 则和 geom_col() 一致
ggplot(df1, aes(x=cut, y=count))+
  geom_bar(stat="identity")

后面我们直接使用频率数据绘图。

控制bar的宽度: width=0.5

ggplot(df1, aes(x=cut, y=count))+
  geom_col(width=0.5)

修改主题

ggplot(df1, aes(x=cut, y=count))+
  geom_col(width=0.5)+
  theme_classic(base_size = 14)

x 轴文字旋转 60 度

ggplot(df1, aes(x=cut, y=count))+
  geom_col(width=0.5)+
  theme_classic(base_size = 14)+
  theme(
    axis.text.x = element_text(angle=60, hjust = 1)
  )

# 写一个主题函数,方便后面调用
RotatedAxis2=function(angle=60, ...){
  theme(
    axis.text.x = element_text(angle=angle, hjust = 1),
    ...
  )
}

去掉x轴标签并修改y轴文字标签

ggplot(df1, aes(x=cut, y=count))+
  geom_col(width=0.5)+
  theme_classic(base_size = 14)+
  RotatedAxis2() +
  labs(x="", y="Count")

指定bar颜色: 单色

ggplot(df1, aes(x=cut, y=count))+
  geom_col(width=0.5, fill="steelblue")+
  theme_classic(base_size = 14)+
  RotatedAxis2()

柱子底部和x轴紧密结合

ggplot(data=df1, aes(x=cut, y=count)) +
  # 频数数据使用 geom_bar() 画bar plot,记得指定 stat="identity"
  geom_bar(stat="identity", fill="orange2", width=0.7)+ 
  theme_classic(base_size = 14)+RotatedAxis2()+
  scale_y_continuous(expand = c(0, 0))

柱子顶部添加文字

ggplot(data=df1, aes(x=cut, y=count)) +
  geom_bar(stat="identity", fill="orange2")+
  geom_text(aes(label=count), vjust=-0.3, size=3.5)+
  theme_classic(base_size = 14)+RotatedAxis2()

柱子内部添加文字

ggplot(data=df1, aes(x=cut, y=count)) +
  geom_bar(stat="identity", fill="steelblue")+
  geom_text(aes(label=count), vjust=1.6, size=3.5, color="white")+
  theme_classic(base_size = 14)+RotatedAxis2()

指定每个bar的颜色: 每个bar一个颜色

# 自动颜色
ggplot(df1, aes(x=cut, y=count, fill=cut))+
  geom_col(width=0.7)+
  theme_classic(base_size = 14)+RotatedAxis2()

# 隐藏图例
ggplot(df1, aes(x=cut, y=count, fill=cut))+
  geom_col(width=0.7, show.legend = F)+
  theme_classic(base_size = 14)+RotatedAxis2()

# 自定义颜色
ggplot(df1, aes(x=cut, y=count, fill=cut))+
  geom_col(width=0.7, show.legend = F)+
  scale_fill_manual(values=c("#FF3030", "#009ACD", "#FFC125", "#AB82FF", "#CD8500", "#548B54"))+
  theme_classic(base_size = 14)+RotatedAxis2()

水平bar: 旋转坐标轴

ggplot(df1, aes(x=cut, y=count))+
  geom_col(width=0.8, show.legend = F, fill="orange2")+
  theme_classic(base_size = 14)+RotatedAxis2()+
  labs(x="")+
  coord_flip()

# 使用原始数据
ggplot(dat, aes(x=cut, fill=cut))+
  geom_bar(width=0.8, show.legend = F)+
  theme_classic(base_size = 14)+RotatedAxis2()+
  labs(x="")+
  coord_flip()

使用极坐标: 半径表示原bar的高度

ggplot(dat, aes(x=cut, fill=cut))+
  geom_bar(width=1, show.legend = F)+
  theme_bw(base_size = 14)+
  labs(x="")+
  coord_polar()

二维数据(带分类变量)的barplot

df1=table(mtcars$carb, mtcars$am); df1
##    
##     0 1
##   1 3 4
##   2 6 4
##   3 3 0
##   4 7 3
##   6 0 1
##   8 0 1
df2=as.data.frame(df1);
colnames(df2)=c("carb", "am", "count")
df2
##    carb am count
## 1     1  0     3
## 2     2  0     6
## 3     3  0     3
## 4     4  0     7
## 5     6  0     0
## 6     8  0     0
## 7     1  1     4
## 8     2  1     4
## 9     3  1     0
## 10    4  1     3
## 11    6  1     1
## 12    8  1     1

基础版: 堆叠频数,默认是 position=“stack”

library(ggplot2)
# 基础版: 分类是堆叠的
ggplot(df2, aes(x=carb, y=count, fill=am))+
  geom_bar(stat = "identity")+ #identity表示按频数画
  theme_classic()+ggtitle("Fig 1")

# 等价画法
ggplot(df2, aes(x=carb, y=count, fill=am))+
  geom_col()+
  theme_classic()

指定颜色

ggplot(df2, aes(x=carb, y=count, fill=am))+
  geom_col()+
  scale_fill_manual(values=c("orange2", "steelblue"))+
  theme_classic()

y轴标准化为1:position=“fill”

ggplot(df2, aes(x=carb, y=count, fill=am))+
  geom_col(position="fill")+
  scale_fill_manual(values=c("orange2", "steelblue"))+
  theme_classic()

并列显示: position=“dodge”

ggplot(df2, aes(x=carb, y=count, fill=am))+
  geom_col(position="dodge")+
  scale_fill_manual(values=c("orange2", "steelblue"))+
  theme_classic()

控制柱子的宽度

ggplot(df2, aes(x=carb, y=count, fill=am))+
  geom_col(position="dodge", width=0.4)+
  scale_fill_manual(values=c("orange2", "steelblue"))+
  theme_classic()

控制一组内的柱间宽度

ggplot(df2, aes(x=carb, y=count, fill=am))+
  # 两个width的差,就是组内柱子的间距: position_dodge(width=0.7) - (width=0.5) = 0.2
  geom_col(width=0.5, #柱子实际宽度
           position = position_dodge(width=0.7) #每个柱子占的宽度
  )+
  scale_fill_manual(values=c("orange2", "steelblue"))+
  theme_classic()+
  scale_y_continuous(expand = c(0, 0)) #柱子底部与x轴不留缝隙

综合实例

1. 统计不同气缸数的汽车中,手动挡和自动挡的数量

# 整理数据
df1=table(mtcars$carb, mtcars$am); df1
##    
##     0 1
##   1 3 4
##   2 6 4
##   3 3 0
##   4 7 3
##   6 0 1
##   8 0 1
df2=as.data.frame(df1);
colnames(df2)=c("carb", "am", "count")
df2
##    carb am count
## 1     1  0     3
## 2     2  0     6
## 3     3  0     3
## 4     4  0     7
## 5     6  0     0
## 6     8  0     0
## 7     1  1     4
## 8     2  1     4
## 9     3  1     0
## 10    4  1     3
## 11    6  1     1
## 12    8  1     1
# 美化 barplot
ggplot(df2, aes(x=carb, y=count, fill=am))+
  geom_bar(stat = "identity",
           width=0.6, #柱子显示宽度
           position = position_dodge(width=0.75))+ #每个柱子占用的宽度,即便不显示内容
  # 设置颜色及图例: am Transmission (0 = automatic, 1 = manual)
  scale_fill_manual( name="Type", #图例标题
                     breaks=c("0","1"), #规定图例顺序:从上到下
                     labels=c("Automatic", "Manual"), #图例的新标签,与breaks一一对应
                     values=c("#1874B7", "#E3B338"), # 颜色,按照 breaks 规定的顺序
  )+
  labs(x="Number of carburetors", y="Count", title="Car types", caption ="Data source: R help")+
  theme_classic(base_size = 12)+
  theme(
    plot.title = element_text(hjust=0.5, face="bold"), #标题居中、黑体
    legend.position = c("top"), #图例方位: 上下左右共4个
    legend.justification = "left", #靠左对齐

    legend.key.height = unit(2, "mm"), #控制图例 小图的高度
    legend.key.width = unit(5, "mm"),  #控制图例 小图的宽度
    plot.caption = element_text(color="grey", face="italic", size=8, hjust=1 ), #副标题字体
  )+
  scale_y_continuous(expand = c(0, 0)) #柱子底部与x轴的缝隙