# Multiple plot function ggplot objects can be passed in ..., or to plotlist
# (as a list of ggplot objects) - cols: Number of columns in layout -
# layout: A matrix specifying the layout. If present, 'cols' is ignored.  If
# the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE), then
# plot 1 will go in the upper left, 2 will go in the upper right, and 3 will
# go all the way across the bottom.
multiplot <- function(..., plotlist = NULL, file, cols = 1, layout = NULL) {
    library(grid)
    
    # Make a list from the ... arguments and plotlist
    plots <- c(list(...), plotlist)
    
    numPlots = length(plots)
    
    # If layout is NULL, then use 'cols' to determine layout
    if (is.null(layout)) {
        # Make the panel ncol: Number of columns of plots nrow: Number of rows
        # needed, calculated from # of cols
        layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), ncol = cols, 
            nrow = ceiling(numPlots/cols))
    }
    
    if (numPlots == 1) {
        print(plots[[1]])
        
    } else {
        # Set up the page
        grid.newpage()
        pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
        
        # Make each plot, in the correct location
        for (i in 1:numPlots) {
            # Get the i,j matrix positions of the regions that contain this subplot
            matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
            
            print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, layout.pos.col = matchidx$col))
        }
    }
}
# multiplot(p1, p2, p3, p4, cols=2) gridExtra包
# install.packages('gcookbook')
library(ggplot2)
library(gcookbook)
library(plyr)

5.1 绘制基本散点图

使用geom_point()

head(heightweight[, c("ageYear", "heightIn")], 5)
##   ageYear heightIn
## 1   11.92     56.3
## 2   12.92     62.3
## 3   12.75     63.3
## 4   13.42     59.0
## 5   15.92     62.5
p1 <- ggplot(heightweight, aes(x = ageYear, y = heightIn)) + geom_point()
p2 <- ggplot(heightweight, aes(x = ageYear, y = heightIn)) + geom_point(shape = 21)
p3 <- ggplot(heightweight, aes(x = ageYear, y = heightIn)) + geom_point(size = 0.5)
multiplot(p1, p2, p3, cols = 3)

5.2 使用点形和颜色属性,绘制分组散点图

head(heightweight)
##   sex ageYear ageMonth heightIn weightLb
## 1   f   11.92      143     56.3     85.0
## 2   f   12.92      155     62.3    105.0
## 3   f   12.75      153     63.3    108.0
## 4   f   13.42      161     59.0     92.0
## 5   f   15.92      191     62.5    112.5
## 6   f   14.25      171     62.5    112.0
p1 <- ggplot(heightweight, aes(x = ageYear, y = heightIn, colour = sex)) + geom_point()
p2 <- ggplot(heightweight, aes(x = ageYear, y = heightIn, shape = sex)) + geom_point()
p3 <- ggplot(heightweight, aes(x = ageYear, y = heightIn, shape = sex, colour = sex)) + 
    geom_point()
multiplot(p1, p2, p3, cols = 3)

5.4 将连续型变量映射到点的颜色或大小属性

p1 <- ggplot(heightweight, aes(x = ageYear, y = heightIn, colour = weightLb)) + 
    geom_point()
p2 <- ggplot(heightweight, aes(x = ageYear, y = heightIn, size = weightLb)) + 
    geom_point()
multiplot(p1, p2, cols = 2)

p3 <- ggplot(heightweight, aes(x = ageYear, y = heightIn, fill = weightLb)) + 
    geom_point(shape = 21, size = 2.5) + scale_fill_gradient(low = "white", 
    high = "black")
p4 <- ggplot(heightweight, aes(x = ageYear, y = heightIn, fill = weightLb)) + 
    geom_point(shape = 21, size = 2.5) + scale_fill_gradient(low = "white", 
    high = "black", breaks = seq(70, 130, by = 20), guide = guide_legend())
multiplot(p3, p4, cols = 2)

ggplot(heightweight, aes(x = ageYear, y = heightIn, size = weightLb, colour = sex)) + 
    geom_point(alpha = 0.5) + scale_size_area() + scale_colour_brewer(palette = "Set1")

5.5 处理图形重叠

sp <- ggplot(diamonds, aes(x = carat, y = price))
p1 <- sp + geom_point()
p2 <- sp + geom_point(alpha = 0.5)
multiplot(p1, p2, cols = 2)

p3 <- sp + geom_point(alpha = 0.1)
p4 <- sp + geom_point(alpha = 0.01)
multiplot(p3, p4, cols = 2)

将数据分箱并以矩形来表示,同时将数据点的密度映射为矩形的填充色

p1 <- sp + stat_bin2d()  #分箱斌使用矩形
p2 <- sp + stat_bin2d(bins = 50) + scale_fill_gradient(low = "lightblue", high = "red", 
    limits = c(0, 6000))
multiplot(p1, p2, cols = 2)

将数据分箱并以六边形表示

# install.packages('hexbin')
library(hexbin)
p1 <- sp + stat_binhex() + scale_fill_gradient(low = "lightblue", high = "red", 
    limits = c(0, 8000))
p2 <- sp + stat_binhex() + scale_fill_gradient(low = "lightblue", high = "red", 
    breaks = c(0, 250, 500, 1000, 2000, 4000, 6000), limits = c(0, 6000))
multiplot(p1, p2, cols = 2)

当散点图的数据轴对应离散数据时,可使用position_jitter()函数给数据点增加随机扰动

sp1 <- ggplot(ChickWeight, aes(x = Time, y = weight))
p1 <- sp1 + geom_point()
p2 <- sp1 + geom_point(position = "jitter")  #等价于sp1 + geom_jitter()
p3 <- sp1 + geom_point(position = position_jitter(width = 0.5, height = 0))
multiplot(p1, p2, p3, cols = 3)

使用箱线图

p1 <- sp1 + geom_boxplot(aes(group = Time))
p2 <- sp1 + geom_boxplot()
multiplot(p1, p2, cols = 2)

5.6 添加回归模型拟合线

使用stat_smooth(method = lm)即可添加拟合线,置信域的置信水平可通过设置level=0.95来设置,se=FALSE时不会对拟合线添加置信域;还可以设置拟合线的颜色(colour)、粗细(size)、线型(linetype)、fill(置信域的填充色)、alpha(置信域的填充色的透明度)等

sp <- ggplot(heightweight, aes(x = ageYear, y = heightIn))
sp + geom_point() + stat_smooth(method = lm)

sp + geom_point() + stat_smooth(method = lm, se = FALSE)

sp + geom_point() + stat_smooth(method = lm, colour = "red", size = 1, level = 0.99)

sp + geom_point() + stat_smooth(method = lm, colour = "red", size = 1, level = 0.99, 
    fill = "blue", alpha = 0.5)

sp + geom_point() + stat_smooth(method = loess)

对分组数据绘制拟合线

sps <- ggplot(heightweight, aes(x = ageYear, y = heightIn, colour = sex)) + 
    geom_point()
sps + geom_smooth()

拟合逻辑回归

MASS包中的数据集biopsy,包含9个与乳腺癌活检组织相关的指标以及肿瘤的分类,包括良性(benign)和恶性(malignant)

数据点重叠严重,需要向数据点添加一些扰动,将数据点设置为半透明、点形设置为空心圆,并使用略小的额数据点

library(MASS)  # for dataset biopsy
b <- biopsy
b$classn[b$class == "benign"] <- 0
b$classn[b$class == "malignant"] <- 1
head(b)
##        ID V1 V2 V3 V4 V5 V6 V7 V8 V9     class classn
## 1 1000025  5  1  1  1  2  1  3  1  1    benign      0
## 2 1002945  5  4  4  5  7 10  3  2  1    benign      0
## 3 1015425  3  1  1  1  2  2  3  1  1    benign      0
## 4 1016277  6  8  8  1  3  4  3  7  1    benign      0
## 5 1017023  4  1  1  3  2  1  3  1  1    benign      0
## 6 1017122  8 10 10  8  7 10  9  7  1 malignant      1
ggplot(b, aes(x = V1, y = classn)) + geom_point(position = position_jitter(width = 0.3, 
    height = 0.06), alpha = 0.4, shape = 21, size = 1.5) + stat_smooth(method = glm, 
    method.args = list(family = binomial))