# Multiple plot function ggplot objects can be passed in ..., or to plotlist
# (as a list of ggplot objects) - cols: Number of columns in layout -
# layout: A matrix specifying the layout. If present, 'cols' is ignored. If
# the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE), then
# plot 1 will go in the upper left, 2 will go in the upper right, and 3 will
# go all the way across the bottom.
multiplot <- function(..., plotlist = NULL, file, cols = 1, layout = NULL) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel ncol: Number of columns of plots nrow: Number of rows
# needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), ncol = cols,
nrow = ceiling(numPlots/cols))
}
if (numPlots == 1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, layout.pos.col = matchidx$col))
}
}
}
# multiplot(p1, p2, p3, p4, cols=2) gridExtra包
# install.packages('gcookbook')
library(ggplot2)
library(gcookbook)
library(plyr)
使用geom_point()
head(heightweight[, c("ageYear", "heightIn")], 5)
## ageYear heightIn
## 1 11.92 56.3
## 2 12.92 62.3
## 3 12.75 63.3
## 4 13.42 59.0
## 5 15.92 62.5
p1 <- ggplot(heightweight, aes(x = ageYear, y = heightIn)) + geom_point()
p2 <- ggplot(heightweight, aes(x = ageYear, y = heightIn)) + geom_point(shape = 21)
p3 <- ggplot(heightweight, aes(x = ageYear, y = heightIn)) + geom_point(size = 0.5)
multiplot(p1, p2, p3, cols = 3)
head(heightweight)
## sex ageYear ageMonth heightIn weightLb
## 1 f 11.92 143 56.3 85.0
## 2 f 12.92 155 62.3 105.0
## 3 f 12.75 153 63.3 108.0
## 4 f 13.42 161 59.0 92.0
## 5 f 15.92 191 62.5 112.5
## 6 f 14.25 171 62.5 112.0
p1 <- ggplot(heightweight, aes(x = ageYear, y = heightIn, colour = sex)) + geom_point()
p2 <- ggplot(heightweight, aes(x = ageYear, y = heightIn, shape = sex)) + geom_point()
p3 <- ggplot(heightweight, aes(x = ageYear, y = heightIn, shape = sex, colour = sex)) +
geom_point()
multiplot(p1, p2, p3, cols = 3)
p1 <- ggplot(heightweight, aes(x = ageYear, y = heightIn, colour = weightLb)) +
geom_point()
p2 <- ggplot(heightweight, aes(x = ageYear, y = heightIn, size = weightLb)) +
geom_point()
multiplot(p1, p2, cols = 2)
p3 <- ggplot(heightweight, aes(x = ageYear, y = heightIn, fill = weightLb)) +
geom_point(shape = 21, size = 2.5) + scale_fill_gradient(low = "white",
high = "black")
p4 <- ggplot(heightweight, aes(x = ageYear, y = heightIn, fill = weightLb)) +
geom_point(shape = 21, size = 2.5) + scale_fill_gradient(low = "white",
high = "black", breaks = seq(70, 130, by = 20), guide = guide_legend())
multiplot(p3, p4, cols = 2)
ggplot(heightweight, aes(x = ageYear, y = heightIn, size = weightLb, colour = sex)) +
geom_point(alpha = 0.5) + scale_size_area() + scale_colour_brewer(palette = "Set1")
sp <- ggplot(diamonds, aes(x = carat, y = price))
p1 <- sp + geom_point()
p2 <- sp + geom_point(alpha = 0.5)
multiplot(p1, p2, cols = 2)
p3 <- sp + geom_point(alpha = 0.1)
p4 <- sp + geom_point(alpha = 0.01)
multiplot(p3, p4, cols = 2)
将数据分箱并以矩形来表示,同时将数据点的密度映射为矩形的填充色
p1 <- sp + stat_bin2d() #分箱斌使用矩形
p2 <- sp + stat_bin2d(bins = 50) + scale_fill_gradient(low = "lightblue", high = "red",
limits = c(0, 6000))
multiplot(p1, p2, cols = 2)
将数据分箱并以六边形表示
# install.packages('hexbin')
library(hexbin)
p1 <- sp + stat_binhex() + scale_fill_gradient(low = "lightblue", high = "red",
limits = c(0, 8000))
p2 <- sp + stat_binhex() + scale_fill_gradient(low = "lightblue", high = "red",
breaks = c(0, 250, 500, 1000, 2000, 4000, 6000), limits = c(0, 6000))
multiplot(p1, p2, cols = 2)
当散点图的数据轴对应离散数据时,可使用position_jitter()函数给数据点增加随机扰动
sp1 <- ggplot(ChickWeight, aes(x = Time, y = weight))
p1 <- sp1 + geom_point()
p2 <- sp1 + geom_point(position = "jitter") #等价于sp1 + geom_jitter()
p3 <- sp1 + geom_point(position = position_jitter(width = 0.5, height = 0))
multiplot(p1, p2, p3, cols = 3)
使用箱线图
p1 <- sp1 + geom_boxplot(aes(group = Time))
p2 <- sp1 + geom_boxplot()
multiplot(p1, p2, cols = 2)
使用stat_smooth(method = lm)即可添加拟合线,置信域的置信水平可通过设置level=0.95来设置,se=FALSE时不会对拟合线添加置信域;还可以设置拟合线的颜色(colour)、粗细(size)、线型(linetype)、fill(置信域的填充色)、alpha(置信域的填充色的透明度)等
sp <- ggplot(heightweight, aes(x = ageYear, y = heightIn))
sp + geom_point() + stat_smooth(method = lm)
sp + geom_point() + stat_smooth(method = lm, se = FALSE)
sp + geom_point() + stat_smooth(method = lm, colour = "red", size = 1, level = 0.99)
sp + geom_point() + stat_smooth(method = lm, colour = "red", size = 1, level = 0.99,
fill = "blue", alpha = 0.5)
sp + geom_point() + stat_smooth(method = loess)
对分组数据绘制拟合线
sps <- ggplot(heightweight, aes(x = ageYear, y = heightIn, colour = sex)) +
geom_point()
sps + geom_smooth()
拟合逻辑回归
MASS包中的数据集biopsy,包含9个与乳腺癌活检组织相关的指标以及肿瘤的分类,包括良性(benign)和恶性(malignant)
数据点重叠严重,需要向数据点添加一些扰动,将数据点设置为半透明、点形设置为空心圆,并使用略小的额数据点
library(MASS) # for dataset biopsy
b <- biopsy
b$classn[b$class == "benign"] <- 0
b$classn[b$class == "malignant"] <- 1
head(b)
## ID V1 V2 V3 V4 V5 V6 V7 V8 V9 class classn
## 1 1000025 5 1 1 1 2 1 3 1 1 benign 0
## 2 1002945 5 4 4 5 7 10 3 2 1 benign 0
## 3 1015425 3 1 1 1 2 2 3 1 1 benign 0
## 4 1016277 6 8 8 1 3 4 3 7 1 benign 0
## 5 1017023 4 1 1 3 2 1 3 1 1 benign 0
## 6 1017122 8 10 10 8 7 10 9 7 1 malignant 1
ggplot(b, aes(x = V1, y = classn)) + geom_point(position = position_jitter(width = 0.3,
height = 0.06), alpha = 0.4, shape = 21, size = 1.5) + stat_smooth(method = glm,
method.args = list(family = binomial))