# Multiple plot function ggplot objects can be passed in ..., or to plotlist
# (as a list of ggplot objects) - cols: Number of columns in layout -
# layout: A matrix specifying the layout. If present, 'cols' is ignored.  If
# the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE), then
# plot 1 will go in the upper left, 2 will go in the upper right, and 3 will
# go all the way across the bottom.
multiplot <- function(..., plotlist = NULL, file, cols = 1, layout = NULL) {
    library(grid)
    
    # Make a list from the ... arguments and plotlist
    plots <- c(list(...), plotlist)
    
    numPlots = length(plots)
    
    # If layout is NULL, then use 'cols' to determine layout
    if (is.null(layout)) {
        # Make the panel ncol: Number of columns of plots nrow: Number of rows
        # needed, calculated from # of cols
        layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), ncol = cols, 
            nrow = ceiling(numPlots/cols))
    }
    
    if (numPlots == 1) {
        print(plots[[1]])
        
    } else {
        # Set up the page
        grid.newpage()
        pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
        
        # Make each plot, in the correct location
        for (i in 1:numPlots) {
            # Get the i,j matrix positions of the regions that contain this subplot
            matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
            
            print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, layout.pos.col = matchidx$col))
        }
    }
}
# multiplot(p1, p2, p3, p4, cols=2) gridExtra
# install.packages('gcookbook')
library(ggplot2)
library(gcookbook)
library(plyr)

6.1 绘制简单直方图

应用geom_histogram()函数并映射一个连续型变量到参数x,直方图默认的填充色是黑色,且无边框线。

p1 <- ggplot(faithful, aes(x = waiting)) + geom_histogram()
p2 <- ggplot(faithful, aes(x = waiting)) + stat_bin()
multiplot(p1, p2, cols = 2)

绘制一个变量的直方图,将data设置为NULL,向量映射到参数x即可

x <- faithful$waiting
ggplot(data = NULL, aes(x = x)) + geom_histogram()

默认情况下,数据将被切分为30组,还可以通过设置组距(binwidth)参数来调整数据的分组数目或者将数据切分为指定的分组数目。

1、设定组距为5,并设置填充色和边框:

ggplot(faithful, aes(x = waiting)) + geom_histogram(binwidth = 5, fill = "white", 
    colour = "black")

2、将x的取值切分为15组,可通过设置bins参数:

binsize <- diff(range(faithful$waiting))/15
p1 <- ggplot(faithful, aes(x = waiting)) + geom_histogram(binwidth = binsize, 
    fill = "white", colour = "black")
p2 <- ggplot(faithful, aes(x = waiting)) + geom_histogram(bins = 15, fill = "white", 
    colour = "black")
multiplot(p1, p2, cols = 2)

3、设置组距(binwidth)以及分组原点(boundary)

p <- ggplot(faithful, aes(x = waiting))
p + geom_histogram(binwidth = 5, fill = "white", colour = "black", boundary = 40)

4、可通过参数breaks设置直方图的边界,并应用scale_x_continuous()设置x轴的刻度与直方图的边界保持一致。

p + geom_histogram(fill = "white", colour = "black", breaks = c(40, 50, 70, 
    80, 100)) + scale_x_continuous(breaks = c(40, 50, 70, 80, 100))

6.2 绘制分组直方图

library(MASS)
head(birthwt)
##    low age lwt race smoke ptl ht ui ftv  bwt
## 85   0  19 182    2     0   0  0  1   0 2523
## 86   0  33 155    3     0   0  0  0   3 2551
## 87   0  20 105    1     1   0  0  0   1 2557
## 88   0  21 108    1     1   0  0  1   2 2594
## 89   0  18 107    1     1   0  0  1   0 2600
## 91   0  21 124    3     0   0  0  0   0 2622
ggplot(birthwt, aes(x = bwt)) + geom_histogram(fill = "white", colour = "black") + 
    facet_grid(smoke ~ .)

修改分面标签,需要修改因子水平的名称

birthwt1 <- birthwt
birthwt1$smoke <- factor(birthwt1$smoke)
birthwt1$smoke <- revalue(birthwt1$smoke, c(`0` = "No smoke", `1` = "Smoke"))  # package:plyr   revalue
head(birthwt1)
##    low age lwt race    smoke ptl ht ui ftv  bwt
## 85   0  19 182    2 No smoke   0  0  1   0 2523
## 86   0  33 155    3 No smoke   0  0  0   3 2551
## 87   0  20 105    1    Smoke   0  0  0   1 2557
## 88   0  21 108    1    Smoke   0  0  1   2 2594
## 89   0  18 107    1    Smoke   0  0  1   0 2600
## 91   0  21 124    3 No smoke   0  0  0   0 2622
ggplot(birthwt1, aes(x = bwt)) + geom_histogram(fill = "white", colour = "black") + 
    facet_grid(smoke ~ .)

6.4 绘制密度曲线

应用geom_density()函数,并映射一个连续型变量到x:

p1 <- ggplot(faithful, aes(x = waiting)) + geom_density()
p2 <- ggplot(faithful, aes(x = waiting)) + geom_line(stat = "density") + expand_limits(y = 0)
multiplot(p1, p2, cols = 2)

核密度曲线是基于样本数据对总体分布的一个估计,曲线的光滑程度取决于核函数的带宽:带宽越大,曲线越光滑,带宽可通过adjust参数来设置,其默认值为1。

ggplot(faithful, aes(x = waiting)) + geom_line(stat = "density", adjust = 0.25, 
    colour = "red") + geom_line(stat = "density") + geom_line(stat = "density", 
    adjust = 2, colour = "blue")

x轴的坐标范围是自动设定的,以使其能包含相应的数据,但这会导致曲线的边缘被裁剪,想要展示曲线的更多部分,可以听过xlim()设定x轴的范围:

p1 <- ggplot(faithful, aes(x = waiting)) + geom_density(fill = "blue", alpha = 0.2)
p2 <- ggplot(faithful, aes(x = waiting)) + geom_density(fill = "blue", alpha = 0.2) + 
    xlim(35, 105)
multiplot(p1, p2, cols = 2)

移除底部的横线,可将geom_density()中的colour参数设置为NA,使用geom_line(stat=‘density’)添加一条密度曲线:

p1 <- ggplot(faithful, aes(x = waiting)) + geom_density(fill = "blue", alpha = 0.2) + 
    xlim(35, 105)
p2 <- ggplot(faithful, aes(x = waiting)) + geom_density(fill = "blue", colour = NA, 
    alpha = 0.2) + geom_line(stat = "density") + xlim(35, 105)
multiplot(p1, p2, cols = 2)

在直方图中添加密度曲线:

p1:绘制的频数直方图

p2:绘制的是频率直方图,需要设置y=..density..

p3:在频数直方图上添加密度曲线,由于密度曲线的y值相比频数很小,在图中与x轴的横线重合

p4:在频率直方图添加密度曲线

p1 <- ggplot(faithful, aes(x = waiting)) + geom_histogram(fill = "cornsilk", 
    colour = "grey60", size = 0.2) + ggtitle("p1")
p2 <- ggplot(faithful, aes(x = waiting, y = ..density..)) + geom_histogram(fill = "cornsilk", 
    colour = "grey60", size = 0.2) + ggtitle("p2")
p3 <- ggplot(faithful, aes(x = waiting)) + geom_histogram(fill = "cornsilk", 
    colour = "grey60", size = 0.2) + geom_density() + xlim(35, 105) + ggtitle("p3")
p4 <- ggplot(faithful, aes(x = waiting, y = ..density..)) + geom_histogram(fill = "cornsilk", 
    colour = "grey60", size = 0.2) + geom_density() + xlim(35, 105) + ggtitle("p4")
multiplot(p1, p2, p3, p4, cols = 2)

在频数直方图上添加密度曲线,由于密度曲线的y值相比频数很小,在图中与x轴的横线重合

p1 <- ggplot(faithful, aes(x = waiting)) + geom_histogram(fill = "cornsilk", 
    colour = "grey60", size = 0.2) + geom_line(stat = "density") + xlim(35, 
    105)
p2 <- ggplot(faithful, aes(x = waiting, y = ..density..)) + geom_histogram(fill = "cornsilk", 
    colour = "grey60", size = 0.2) + geom_line(stat = "density") + xlim(35, 
    105)
multiplot(p1, p2, cols = 2)

### 6.4 绘制分组密度曲线

birthwt1 <- birthwt
birthwt1$smoke <- as.factor(birthwt1$smoke)
p1 <- ggplot(birthwt1, aes(x = bwt, colour = smoke)) + geom_density()
p2 <- ggplot(birthwt1, aes(x = bwt, fill = smoke)) + geom_density(alpha = 0.2)
multiplot(p1, p2, cols = 2)

还可以使用分面绘制分组密度曲线:

ggplot(birthwt1, aes(x = bwt)) + geom_density() + facet_grid(. ~ smoke)

在分组直方图上添加分组密度曲线:

birthwt1$smoke <- revalue(birthwt1$smoke, c(`0` = "No Smoke", `1` = "Smoke"))
ggplot(birthwt1, aes(x = bwt, y = ..density..)) + geom_histogram(fill = "cornsilk", 
    binwidth = 200, colour = "grey60", size = 0.2) + geom_density() + facet_grid(. ~ 
    smoke)

6.5 绘制频数多边形

使用geom_freqpoly()函数即可:

p1 <- ggplot(faithful, aes(x = waiting)) + geom_freqpoly()
p2 <- ggplot(faithful, aes(x = waiting)) + geom_freqpoly(binwidth = 4)
multiplot(p1, p2, cols = 2)

6.6 绘制箱线图

1、绘制基本箱线图:使用geom_boxplot()函数,分别映射一个连续型变量和一个离散型变量到y和x即可:

p1 <- ggplot(birthwt, aes(x = factor(race), y = bwt)) + geom_boxplot()
p2 <- ggplot(birthwt, aes(x = factor(race), y = bwt)) + geom_boxplot(width = 0.5)
multiplot(p1, p2, cols = 2)

2、向箱线图添加槽口

p1 <- ggplot(birthwt, aes(x = factor(race), y = bwt)) + geom_boxplot(width = 0.5, 
    notch = TRUE) + ggtitle("p1:notch=TRUE")

3、向箱线图添加均值

p2 <- ggplot(birthwt, aes(x = factor(race), y = bwt)) + geom_boxplot() + stat_summary(fun.y = "mean", 
    geom = "point", shape = 21, size = 3, fill = "red") + ggtitle("p2:stat_summary")
multiplot(p1, p2, cols = 2)

### 6.7 绘制小提琴图

p1:绘制基本小提琴图 p2:向小提琴图添加箱线图 p3:添加中位数

p <- ggplot(heightweight, aes(x = sex, y = heightIn))
p1 <- p + geom_violin() + ggtitle("p1")
p2 <- p1 + geom_boxplot(width = 0.1, fill = "black") + ggtitle("p2")
p3 <- p2 + stat_summary(fun.y = median, geom = "point", fill = "white", shape = 21, 
    size = 2.5) + ggtitle("p3")
multiplot(p1, p2, p3, cols = 3)

### 6.8 绘制二维数据的密度图 1、使用stat_density2d()函数,系统默认使用的是等高线:

p <- ggplot(faithful, aes(x = eruptions, y = waiting))
p1 <- p + geom_point() + stat_density2d()
p2 <- p + stat_density_2d(aes(colour = ..level..))
multiplot(p1, p2, cols = 2)

将密度估计映射到填充色或者瓦片图的透明度 p1:将密度估计映射到填充色 p2:将密度估计映射到瓦片图的透明度 p3:将密度估计映射到填充色,并设置使用更小的带宽,以使密度估计对数据的拟合程度更高

p1 <- p + stat_density2d(aes(fill = ..density..), geom = "raster", contour = FALSE) + 
    ggtitle("p1:raster")
p2 <- p + stat_density2d(aes(alpha = ..density..), geom = "tile", contour = FALSE) + 
    ggtitle("p2:tile")
multiplot(p1, p2, cols = 2)

p3 <- p + stat_density2d(aes(fill = ..density..), geom = "raster", contour = FALSE, 
    h = c(0.5, 5)) + ggtitle("p3:raster")
multiplot(p1, p3, cols = 2)