# Multiple plot function ggplot objects can be passed in ..., or to plotlist
# (as a list of ggplot objects) - cols: Number of columns in layout -
# layout: A matrix specifying the layout. If present, 'cols' is ignored. If
# the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE), then
# plot 1 will go in the upper left, 2 will go in the upper right, and 3 will
# go all the way across the bottom.
multiplot <- function(..., plotlist = NULL, file, cols = 1, layout = NULL) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel ncol: Number of columns of plots nrow: Number of rows
# needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), ncol = cols,
nrow = ceiling(numPlots/cols))
}
if (numPlots == 1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, layout.pos.col = matchidx$col))
}
}
}
# multiplot(p1, p2, p3, p4, cols=2) gridExtra
# install.packages('gcookbook')
library(ggplot2)
library(gcookbook)
library(plyr)
应用geom_histogram()函数并映射一个连续型变量到参数x,直方图默认的填充色是黑色,且无边框线。
p1 <- ggplot(faithful, aes(x = waiting)) + geom_histogram()
p2 <- ggplot(faithful, aes(x = waiting)) + stat_bin()
multiplot(p1, p2, cols = 2)
绘制一个变量的直方图,将data设置为NULL,向量映射到参数x即可
x <- faithful$waiting
ggplot(data = NULL, aes(x = x)) + geom_histogram()
默认情况下,数据将被切分为30组,还可以通过设置组距(binwidth)参数来调整数据的分组数目或者将数据切分为指定的分组数目。
1、设定组距为5,并设置填充色和边框:
ggplot(faithful, aes(x = waiting)) + geom_histogram(binwidth = 5, fill = "white",
colour = "black")
2、将x的取值切分为15组,可通过设置bins参数:
binsize <- diff(range(faithful$waiting))/15
p1 <- ggplot(faithful, aes(x = waiting)) + geom_histogram(binwidth = binsize,
fill = "white", colour = "black")
p2 <- ggplot(faithful, aes(x = waiting)) + geom_histogram(bins = 15, fill = "white",
colour = "black")
multiplot(p1, p2, cols = 2)
3、设置组距(binwidth)以及分组原点(boundary)
p <- ggplot(faithful, aes(x = waiting))
p + geom_histogram(binwidth = 5, fill = "white", colour = "black", boundary = 40)
4、可通过参数breaks设置直方图的边界,并应用scale_x_continuous()设置x轴的刻度与直方图的边界保持一致。
p + geom_histogram(fill = "white", colour = "black", breaks = c(40, 50, 70,
80, 100)) + scale_x_continuous(breaks = c(40, 50, 70, 80, 100))
library(MASS)
head(birthwt)
## low age lwt race smoke ptl ht ui ftv bwt
## 85 0 19 182 2 0 0 0 1 0 2523
## 86 0 33 155 3 0 0 0 0 3 2551
## 87 0 20 105 1 1 0 0 0 1 2557
## 88 0 21 108 1 1 0 0 1 2 2594
## 89 0 18 107 1 1 0 0 1 0 2600
## 91 0 21 124 3 0 0 0 0 0 2622
ggplot(birthwt, aes(x = bwt)) + geom_histogram(fill = "white", colour = "black") +
facet_grid(smoke ~ .)
修改分面标签,需要修改因子水平的名称
birthwt1 <- birthwt
birthwt1$smoke <- factor(birthwt1$smoke)
birthwt1$smoke <- revalue(birthwt1$smoke, c(`0` = "No smoke", `1` = "Smoke")) # package:plyr revalue
head(birthwt1)
## low age lwt race smoke ptl ht ui ftv bwt
## 85 0 19 182 2 No smoke 0 0 1 0 2523
## 86 0 33 155 3 No smoke 0 0 0 3 2551
## 87 0 20 105 1 Smoke 0 0 0 1 2557
## 88 0 21 108 1 Smoke 0 0 1 2 2594
## 89 0 18 107 1 Smoke 0 0 1 0 2600
## 91 0 21 124 3 No smoke 0 0 0 0 2622
ggplot(birthwt1, aes(x = bwt)) + geom_histogram(fill = "white", colour = "black") +
facet_grid(smoke ~ .)
应用geom_density()函数,并映射一个连续型变量到x:
p1 <- ggplot(faithful, aes(x = waiting)) + geom_density()
p2 <- ggplot(faithful, aes(x = waiting)) + geom_line(stat = "density") + expand_limits(y = 0)
multiplot(p1, p2, cols = 2)
核密度曲线是基于样本数据对总体分布的一个估计,曲线的光滑程度取决于核函数的带宽:带宽越大,曲线越光滑,带宽可通过adjust参数来设置,其默认值为1。
ggplot(faithful, aes(x = waiting)) + geom_line(stat = "density", adjust = 0.25,
colour = "red") + geom_line(stat = "density") + geom_line(stat = "density",
adjust = 2, colour = "blue")
x轴的坐标范围是自动设定的,以使其能包含相应的数据,但这会导致曲线的边缘被裁剪,想要展示曲线的更多部分,可以听过xlim()设定x轴的范围:
p1 <- ggplot(faithful, aes(x = waiting)) + geom_density(fill = "blue", alpha = 0.2)
p2 <- ggplot(faithful, aes(x = waiting)) + geom_density(fill = "blue", alpha = 0.2) +
xlim(35, 105)
multiplot(p1, p2, cols = 2)
移除底部的横线,可将geom_density()中的colour参数设置为NA,使用geom_line(stat=‘density’)添加一条密度曲线:
p1 <- ggplot(faithful, aes(x = waiting)) + geom_density(fill = "blue", alpha = 0.2) +
xlim(35, 105)
p2 <- ggplot(faithful, aes(x = waiting)) + geom_density(fill = "blue", colour = NA,
alpha = 0.2) + geom_line(stat = "density") + xlim(35, 105)
multiplot(p1, p2, cols = 2)
在直方图中添加密度曲线:
p1:绘制的频数直方图
p2:绘制的是频率直方图,需要设置y=..density..
p3:在频数直方图上添加密度曲线,由于密度曲线的y值相比频数很小,在图中与x轴的横线重合
p4:在频率直方图添加密度曲线
p1 <- ggplot(faithful, aes(x = waiting)) + geom_histogram(fill = "cornsilk",
colour = "grey60", size = 0.2) + ggtitle("p1")
p2 <- ggplot(faithful, aes(x = waiting, y = ..density..)) + geom_histogram(fill = "cornsilk",
colour = "grey60", size = 0.2) + ggtitle("p2")
p3 <- ggplot(faithful, aes(x = waiting)) + geom_histogram(fill = "cornsilk",
colour = "grey60", size = 0.2) + geom_density() + xlim(35, 105) + ggtitle("p3")
p4 <- ggplot(faithful, aes(x = waiting, y = ..density..)) + geom_histogram(fill = "cornsilk",
colour = "grey60", size = 0.2) + geom_density() + xlim(35, 105) + ggtitle("p4")
multiplot(p1, p2, p3, p4, cols = 2)
在频数直方图上添加密度曲线,由于密度曲线的y值相比频数很小,在图中与x轴的横线重合
p1 <- ggplot(faithful, aes(x = waiting)) + geom_histogram(fill = "cornsilk",
colour = "grey60", size = 0.2) + geom_line(stat = "density") + xlim(35,
105)
p2 <- ggplot(faithful, aes(x = waiting, y = ..density..)) + geom_histogram(fill = "cornsilk",
colour = "grey60", size = 0.2) + geom_line(stat = "density") + xlim(35,
105)
multiplot(p1, p2, cols = 2)
### 6.4 绘制分组密度曲线
birthwt1 <- birthwt
birthwt1$smoke <- as.factor(birthwt1$smoke)
p1 <- ggplot(birthwt1, aes(x = bwt, colour = smoke)) + geom_density()
p2 <- ggplot(birthwt1, aes(x = bwt, fill = smoke)) + geom_density(alpha = 0.2)
multiplot(p1, p2, cols = 2)
还可以使用分面绘制分组密度曲线:
ggplot(birthwt1, aes(x = bwt)) + geom_density() + facet_grid(. ~ smoke)
在分组直方图上添加分组密度曲线:
birthwt1$smoke <- revalue(birthwt1$smoke, c(`0` = "No Smoke", `1` = "Smoke"))
ggplot(birthwt1, aes(x = bwt, y = ..density..)) + geom_histogram(fill = "cornsilk",
binwidth = 200, colour = "grey60", size = 0.2) + geom_density() + facet_grid(. ~
smoke)
使用geom_freqpoly()函数即可:
p1 <- ggplot(faithful, aes(x = waiting)) + geom_freqpoly()
p2 <- ggplot(faithful, aes(x = waiting)) + geom_freqpoly(binwidth = 4)
multiplot(p1, p2, cols = 2)
1、绘制基本箱线图:使用geom_boxplot()函数,分别映射一个连续型变量和一个离散型变量到y和x即可:
p1 <- ggplot(birthwt, aes(x = factor(race), y = bwt)) + geom_boxplot()
p2 <- ggplot(birthwt, aes(x = factor(race), y = bwt)) + geom_boxplot(width = 0.5)
multiplot(p1, p2, cols = 2)
2、向箱线图添加槽口
p1 <- ggplot(birthwt, aes(x = factor(race), y = bwt)) + geom_boxplot(width = 0.5,
notch = TRUE) + ggtitle("p1:notch=TRUE")
3、向箱线图添加均值
p2 <- ggplot(birthwt, aes(x = factor(race), y = bwt)) + geom_boxplot() + stat_summary(fun.y = "mean",
geom = "point", shape = 21, size = 3, fill = "red") + ggtitle("p2:stat_summary")
multiplot(p1, p2, cols = 2)
### 6.7 绘制小提琴图
p1:绘制基本小提琴图 p2:向小提琴图添加箱线图 p3:添加中位数
p <- ggplot(heightweight, aes(x = sex, y = heightIn))
p1 <- p + geom_violin() + ggtitle("p1")
p2 <- p1 + geom_boxplot(width = 0.1, fill = "black") + ggtitle("p2")
p3 <- p2 + stat_summary(fun.y = median, geom = "point", fill = "white", shape = 21,
size = 2.5) + ggtitle("p3")
multiplot(p1, p2, p3, cols = 3)
### 6.8 绘制二维数据的密度图 1、使用stat_density2d()函数,系统默认使用的是等高线:
p <- ggplot(faithful, aes(x = eruptions, y = waiting))
p1 <- p + geom_point() + stat_density2d()
p2 <- p + stat_density_2d(aes(colour = ..level..))
multiplot(p1, p2, cols = 2)
将密度估计映射到填充色或者瓦片图的透明度 p1:将密度估计映射到填充色 p2:将密度估计映射到瓦片图的透明度 p3:将密度估计映射到填充色,并设置使用更小的带宽,以使密度估计对数据的拟合程度更高
p1 <- p + stat_density2d(aes(fill = ..density..), geom = "raster", contour = FALSE) +
ggtitle("p1:raster")
p2 <- p + stat_density2d(aes(alpha = ..density..), geom = "tile", contour = FALSE) +
ggtitle("p2:tile")
multiplot(p1, p2, cols = 2)
p3 <- p + stat_density2d(aes(fill = ..density..), geom = "raster", contour = FALSE,
h = c(0.5, 5)) + ggtitle("p3:raster")
multiplot(p1, p3, cols = 2)