587*(23234+899)/9
## [1] 1574008
example(boxplot)
##
## boxplt> ## boxplot on a formula:
## boxplt> boxplot(count ~ spray, data = InsectSprays, col = "lightgray")
##
## boxplt> # *add* notches (somewhat funny here <--> warning "notches .. outside hinges"):
## boxplt> boxplot(count ~ spray, data = InsectSprays,
## boxplt+ notch = TRUE, add = TRUE, col = "blue")
## Warning in (function (z, notch = FALSE, width = NULL, varwidth = FALSE, : some
## notches went outside hinges ('box'): maybe set notch=FALSE
##
## boxplt> boxplot(decrease ~ treatment, data = OrchardSprays, col = "bisque",
## boxplt+ log = "y")
##
## boxplt> ## horizontal=TRUE, switching y <--> x :
## boxplt> boxplot(decrease ~ treatment, data = OrchardSprays, col = "bisque",
## boxplt+ log = "x", horizontal=TRUE)
##
## boxplt> rb <- boxplot(decrease ~ treatment, data = OrchardSprays, col = "bisque")
##
## boxplt> title("Comparing boxplot()s and non-robust mean +/- SD")
##
## boxplt> mn.t <- tapply(OrchardSprays$decrease, OrchardSprays$treatment, mean)
##
## boxplt> sd.t <- tapply(OrchardSprays$decrease, OrchardSprays$treatment, sd)
##
## boxplt> xi <- 0.3 + seq(rb$n)
##
## boxplt> points(xi, mn.t, col = "orange", pch = 18)
##
## boxplt> arrows(xi, mn.t - sd.t, xi, mn.t + sd.t,
## boxplt+ code = 3, col = "pink", angle = 75, length = .1)
##
## boxplt> ## boxplot on a matrix:
## boxplt> mat <- cbind(Uni05 = (1:100)/21, Norm = rnorm(100),
## boxplt+ `5T` = rt(100, df = 5), Gam2 = rgamma(100, shape = 2))
##
## boxplt> boxplot(mat) # directly, calling boxplot.matrix()
##
## boxplt> ## boxplot on a data frame:
## boxplt> df. <- as.data.frame(mat)
##
## boxplt> par(las = 1) # all axis labels horizontal
##
## boxplt> boxplot(df., main = "boxplot(*, horizontal = TRUE)", horizontal = TRUE)
##
## boxplt> ## Using 'at = ' and adding boxplots -- example idea by Roger Bivand :
## boxplt> boxplot(len ~ dose, data = ToothGrowth,
## boxplt+ boxwex = 0.25, at = 1:3 - 0.2,
## boxplt+ subset = supp == "VC", col = "yellow",
## boxplt+ main = "Guinea Pigs' Tooth Growth",
## boxplt+ xlab = "Vitamin C dose mg",
## boxplt+ ylab = "tooth length",
## boxplt+ xlim = c(0.5, 3.5), ylim = c(0, 35), yaxs = "i")
##
## boxplt> boxplot(len ~ dose, data = ToothGrowth, add = TRUE,
## boxplt+ boxwex = 0.25, at = 1:3 + 0.2,
## boxplt+ subset = supp == "OJ", col = "orange")
##
## boxplt> legend(2, 9, c("Ascorbic acid", "Orange juice"),
## boxplt+ fill = c("yellow", "orange"))
##
## boxplt> ## With less effort (slightly different) using factor *interaction*:
## boxplt> boxplot(len ~ dose:supp, data = ToothGrowth,
## boxplt+ boxwex = 0.5, col = c("orange", "yellow"),
## boxplt+ main = "Guinea Pigs' Tooth Growth",
## boxplt+ xlab = "Vitamin C dose mg", ylab = "tooth length",
## boxplt+ sep = ":", lex.order = TRUE, ylim = c(0, 35), yaxs = "i")
##
## boxplt> ## more examples in help(bxp)
## boxplt>
## boxplt>
## boxplt>
example(hist)
##
## hist> op <- par(mfrow = c(2, 2))
##
## hist> hist(islands)
##
## hist> utils::str(hist(islands, col = "gray", labels = TRUE))
## List of 6
## $ breaks : num [1:10] 0 2000 4000 6000 8000 10000 12000 14000 16000 18000
## $ counts : int [1:9] 41 2 1 1 1 1 0 0 1
## $ density : num [1:9] 4.27e-04 2.08e-05 1.04e-05 1.04e-05 1.04e-05 ...
## $ mids : num [1:9] 1000 3000 5000 7000 9000 11000 13000 15000 17000
## $ xname : chr "islands"
## $ equidist: logi TRUE
## - attr(*, "class")= chr "histogram"
##
## hist> hist(sqrt(islands), breaks = 12, col = "lightblue", border = "pink")
##
## hist> ##-- For non-equidistant breaks, counts should NOT be graphed unscaled:
## hist> r <- hist(sqrt(islands), breaks = c(4*0:5, 10*3:5, 70, 100, 140),
## hist+ col = "blue1")
##
## hist> text(r$mids, r$density, r$counts, adj = c(.5, -.5), col = "blue3")
##
## hist> sapply(r[2:3], sum)
## counts density
## 48.000000 0.215625
##
## hist> sum(r$density * diff(r$breaks)) # == 1
## [1] 1
##
## hist> lines(r, lty = 3, border = "purple") # -> lines.histogram(*)
##
## hist> par(op)
##
## hist> require(utils) # for str
##
## hist> str(hist(islands, breaks = 12, plot = FALSE)) #-> 10 (~= 12) breaks
## List of 6
## $ breaks : num [1:10] 0 2000 4000 6000 8000 10000 12000 14000 16000 18000
## $ counts : int [1:9] 41 2 1 1 1 1 0 0 1
## $ density : num [1:9] 4.27e-04 2.08e-05 1.04e-05 1.04e-05 1.04e-05 ...
## $ mids : num [1:9] 1000 3000 5000 7000 9000 11000 13000 15000 17000
## $ xname : chr "islands"
## $ equidist: logi TRUE
## - attr(*, "class")= chr "histogram"
##
## hist> str(hist(islands, breaks = c(12,20,36,80,200,1000,17000), plot = FALSE))
## List of 6
## $ breaks : num [1:7] 12 20 36 80 200 1000 17000
## $ counts : int [1:6] 12 11 8 6 4 7
## $ density : num [1:6] 0.03125 0.014323 0.003788 0.001042 0.000104 ...
## $ mids : num [1:6] 16 28 58 140 600 9000
## $ xname : chr "islands"
## $ equidist: logi FALSE
## - attr(*, "class")= chr "histogram"
##
## hist> hist(islands, breaks = c(12,20,36,80,200,1000,17000), freq = TRUE,
## hist+ main = "WRONG histogram") # and warning
## Warning in plot.histogram(r, freq = freq1, col = col, border = border, angle =
## angle, : the AREAS in the plot are wrong -- rather use 'freq = FALSE'
##
## hist> ## No test: ##D
## hist> ##D ## Extreme outliers; the "FD" rule would take very large number of 'breaks':
## hist> ##D XXL <- c(1:9, c(-1,1)*1e300)
## hist> ##D hh <- hist(XXL, "FD") # did not work in R <= 3.4.1; now gives warning
## hist> ##D ## pretty() determines how many counts are used (platform dependently!):
## hist> ##D length(hh$breaks) ## typically 1 million -- though 1e6 was "a suggestion only"
## hist> ## End(No test)
## hist> require(stats)
##
## hist> set.seed(14)
##
## hist> x <- rchisq(100, df = 4)
##
## hist> ## Don't show:
## hist> op <- par(mfrow = 2:1, mgp = c(1.5, 0.6, 0), mar = .1 + c(3,3:1))
##
## hist> ## End(Don't show)
## hist> ## Comparing data with a model distribution should be done with qqplot()!
## hist> qqplot(x, qchisq(ppoints(x), df = 4)); abline(0, 1, col = 2, lty = 2)
##
## hist> ## if you really insist on using hist() ... :
## hist> hist(x, freq = FALSE, ylim = c(0, 0.2))
##
## hist> curve(dchisq(x, df = 4), col = 2, lty = 2, lwd = 2, add = TRUE)
##
## hist> ## Don't show:
## hist> par(op)
##
## hist> ## End(Don't show)
## hist>
## hist>
## hist>
example(heatmap)
##
## heatmp> require(graphics); require(grDevices)
##
## heatmp> x <- as.matrix(mtcars)
##
## heatmp> rc <- rainbow(nrow(x), start = 0, end = .3)
##
## heatmp> cc <- rainbow(ncol(x), start = 0, end = .3)
##
## heatmp> hv <- heatmap(x, col = cm.colors(256), scale = "column",
## heatmp+ RowSideColors = rc, ColSideColors = cc, margins = c(5,10),
## heatmp+ xlab = "specification variables", ylab = "Car Models",
## heatmp+ main = "heatmap(<Mtcars data>, ..., scale = \"column\")")
##
## heatmp> utils::str(hv) # the two re-ordering index vectors
## List of 4
## $ rowInd: int [1:32] 31 17 16 15 5 25 29 24 7 6 ...
## $ colInd: int [1:11] 2 9 8 11 6 5 10 7 1 4 ...
## $ Rowv : NULL
## $ Colv : NULL
##
## heatmp> ## no column dendrogram (nor reordering) at all:
## heatmp> heatmap(x, Colv = NA, col = cm.colors(256), scale = "column",
## heatmp+ RowSideColors = rc, margins = c(5,10),
## heatmp+ xlab = "specification variables", ylab = "Car Models",
## heatmp+ main = "heatmap(<Mtcars data>, ..., scale = \"column\")")
##
## heatmp> ## Don't show:
## heatmp> ## no row dendrogram (nor reordering) at all:
## heatmp> heatmap(x, Rowv = NA, col = cm.colors(256), scale = "column",
## heatmp+ ColSideColors = cc, margins = c(5,10),
## heatmp+ xlab = "xlab", ylab = "ylab") # no main
##
## heatmp> ## End(Don't show)
## heatmp> ## "no nothing"
## heatmp> heatmap(x, Rowv = NA, Colv = NA, scale = "column",
## heatmp+ main = "heatmap(*, NA, NA) ~= image(t(x))")
##
## heatmp> round(Ca <- cor(attitude), 2)
## rating complaints privileges learning raises critical advance
## rating 1.00 0.83 0.43 0.62 0.59 0.16 0.16
## complaints 0.83 1.00 0.56 0.60 0.67 0.19 0.22
## privileges 0.43 0.56 1.00 0.49 0.45 0.15 0.34
## learning 0.62 0.60 0.49 1.00 0.64 0.12 0.53
## raises 0.59 0.67 0.45 0.64 1.00 0.38 0.57
## critical 0.16 0.19 0.15 0.12 0.38 1.00 0.28
## advance 0.16 0.22 0.34 0.53 0.57 0.28 1.00
##
## heatmp> symnum(Ca) # simple graphic
## rt cm p l rs cr a
## rating 1
## complaints + 1
## privileges . . 1
## learning , . . 1
## raises . , . , 1
## critical . 1
## advance . . . 1
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1
##
## heatmp> heatmap(Ca, symm = TRUE, margins = c(6,6)) # with reorder()
##
## heatmp> heatmap(Ca, Rowv = FALSE, symm = TRUE, margins = c(6,6)) # _NO_ reorder()
##
## heatmp> ## slightly artificial with color bar, without and with ordering:
## heatmp> cc <- rainbow(nrow(Ca))
##
## heatmp> heatmap(Ca, Rowv = FALSE, symm = TRUE, RowSideColors = cc, ColSideColors = cc,
## heatmp+ margins = c(6,6))
##
## heatmp> heatmap(Ca, symm = TRUE, RowSideColors = cc, ColSideColors = cc,
## heatmp+ margins = c(6,6))
##
## heatmp> ## For variable clustering, rather use distance based on cor():
## heatmp> symnum( cU <- cor(USJudgeRatings) )
## CO I DM DI CF DE PR F O W PH R
## CONT 1
## INTG 1
## DMNR B 1
## DILG + + 1
## CFMG + + B 1
## DECI + + B B 1
## PREP + + B B B 1
## FAMI + + B * * B 1
## ORAL * * B B * B B 1
## WRIT * + B * * B B B 1
## PHYS , , + + + + + + + 1
## RTEN * * * * * B * B B * 1
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1
##
## heatmp> hU <- heatmap(cU, Rowv = FALSE, symm = TRUE, col = topo.colors(16),
## heatmp+ distfun = function(c) as.dist(1 - c), keep.dendro = TRUE)
##
## heatmp> ## The Correlation matrix with same reordering:
## heatmp> round(100 * cU[hU[[1]], hU[[2]]])
## CONT INTG DMNR PHYS DILG CFMG DECI RTEN ORAL WRIT PREP FAMI
## CONT 100 -13 -15 5 1 14 9 -3 -1 -4 1 -3
## INTG -13 100 96 74 87 81 80 94 91 91 88 87
## DMNR -15 96 100 79 84 81 80 94 91 89 86 84
## PHYS 5 74 79 100 81 88 87 91 89 86 85 84
## DILG 1 87 84 81 100 96 96 93 95 96 98 96
## CFMG 14 81 81 88 96 100 98 93 95 94 96 94
## DECI 9 80 80 87 96 98 100 92 95 95 96 94
## RTEN -3 94 94 91 93 93 92 100 98 97 95 94
## ORAL -1 91 91 89 95 95 95 98 100 99 98 98
## WRIT -4 91 89 86 96 94 95 97 99 100 99 99
## PREP 1 88 86 85 98 96 96 95 98 99 100 99
## FAMI -3 87 84 84 96 94 94 94 98 99 99 100
##
## heatmp> ## The column dendrogram:
## heatmp> utils::str(hU$Colv)
## --[dendrogram w/ 2 branches and 12 members at h = 1.15]
## |--leaf "CONT"
## `--[dendrogram w/ 2 branches and 11 members at h = 0.258]
## |--[dendrogram w/ 2 branches and 2 members at h = 0.0354]
## | |--leaf "INTG"
## | `--leaf "DMNR"
## `--[dendrogram w/ 2 branches and 9 members at h = 0.187]
## |--leaf "PHYS"
## `--[dendrogram w/ 2 branches and 8 members at h = 0.075]
## |--[dendrogram w/ 2 branches and 3 members at h = 0.0438]
## | |--leaf "DILG"
## | `--[dendrogram w/ 2 branches and 2 members at h = 0.0189]
## | |--leaf "CFMG"
## | `--leaf "DECI"
## `--[dendrogram w/ 2 branches and 5 members at h = 0.0584]
## |--leaf "RTEN"
## `--[dendrogram w/ 2 branches and 4 members at h = 0.0187]
## |--[dendrogram w/ 2 branches and 2 members at h = 0.00657]
## | |--leaf "ORAL"
## | `--leaf "WRIT"
## `--[dendrogram w/ 2 branches and 2 members at h = 0.0101]
## |--leaf "PREP"
## `--leaf "FAMI"
# ?boxplot
# ?t.test 将详细的帮助文档参数设置等打印到屏幕
t.test(rnorm(n=10,mean=10,sd=1), rnorm(n=10,mean=0,sd=1))
##
## Welch Two Sample t-test
##
## data: rnorm(n = 10, mean = 10, sd = 1) and rnorm(n = 10, mean = 0, sd = 1)
## t = 21.664, df = 17.808, p-value = 3.035e-14
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 9.29991 11.29905
## sample estimates:
## mean of x mean of y
## 10.0456926 -0.2537863
# help.start() 自动打开Chrome等浏览器,可以按照包列表,关键词等浏览搜索帮助文档
mydata <-
read.table("../dataset/Planning.csv", header = T) ###指定文件的相对或绝对路径
head(mydata) ###显示数据的前几行
## id age relig ped income am reason bps bpd wt ht
## 1 1 37 1 2 2 20 1 120 80 57.0 153
## 2 2 32 1 5 4 19 1 120 80 40.7 141
## 3 3 24 1 7 3 23 2 110 70 55.1 156
## 4 4 33 1 2 5 23 1 120 80 43.0 154
## 5 5 31 1 2 2 22 3 130 70 58.5 154
## 6 6 30 1 2 3 17 1 120 90 40.0 154
summary(mydata) ###显示数据的摘要信息
## id age relig ped
## Min. : 1.00 Min. :18.00 Min. :1.000 Min. :0.000
## 1st Qu.: 63.25 1st Qu.:24.00 1st Qu.:1.000 1st Qu.:2.000
## Median :125.50 Median :27.00 Median :1.000 Median :2.000
## Mean :125.64 Mean :27.43 Mean :1.108 Mean :3.283
## 3rd Qu.:187.75 3rd Qu.:30.00 3rd Qu.:1.000 3rd Qu.:5.000
## Max. :251.00 Max. :41.00 Max. :2.000 Max. :7.000
## NA's :1 NA's :24
## income am reason bps bpd
## Min. :1.000 Min. :15.00 Min. :1.000 Min. : 0 Min. : 0.00
## 1st Qu.:1.000 1st Qu.:18.00 1st Qu.:1.000 1st Qu.:110 1st Qu.: 60.00
## Median :2.000 Median :20.00 Median :1.000 Median :110 Median : 70.00
## Mean :2.129 Mean :20.35 Mean :1.484 Mean :113 Mean : 71.69
## 3rd Qu.:3.000 3rd Qu.:22.00 3rd Qu.:2.000 3rd Qu.:120 3rd Qu.: 80.00
## Max. :6.000 Max. :31.00 Max. :3.000 Max. :170 Max. :110.00
## NA's :26 NA's :1 NA's :2 NA's :7 NA's :7
## wt ht
## Min. : 0.00 Min. :141.0
## 1st Qu.:46.50 1st Qu.:150.0
## Median :51.40 Median :153.0
## Mean :51.89 Mean :153.5
## 3rd Qu.:57.00 3rd Qu.:156.0
## Max. :73.80 Max. :175.0
## NA's :5 NA's :7
boxplot(mydata$bps ~ mydata$income, col = "tomato") ###绘制箱线图
mydata.omit <- na.omit(mydata) ###删除缺失值
dim(mydata.omit) ###显示数据的维度
## [1] 210 11
dim(mydata) ###显示数据的维度
## [1] 250 11
plot(mydata.omit$ht, mydata.omit$wt) ###绘制散点图
plot(
mydata.omit$ht,
mydata.omit$wt,
pch = 20,
col = "rosybrown",
xlab = "Height",
ylab = "Weight"
)
lm_line <- lm(mydata.omit$wt ~ mydata.omit$ht)
# 增加线
abline(lm_line)
# 增加文本
text(158, 65, "Weight=0.53*Height-29.36")
# 增加图例
legend("topright", "A legend", col = "rosybrown", pch = 20)
cars <- c(1, 3, 6, 4, 9)
plot(
cars,
type = "o",
col = "blue",
pch = 19,
ylim = c(0, 10),
xaxt = "n",
ann = FALSE
) # axes=F不绘制坐标轴,xaxt=''n"不绘制横轴; ann=F不标注坐标轴的注释(标题)
axis(1,
at = 1:5,
lab = c("Mon", "Tue", "Wed", "Thu", "Fri"))
box()
plot(
rep(1:5, 5),
rep(1:5, each = 5),
pch = 1:25,
cex = 2,
col = "blue",
bg = "yellow"
)
set.seed(1234)
sample(colors(),20)
## [1] "grey23" "darksalmon" "tan3" "violetred4"
## [5] "lightblue1" "darkorchid3" "darkseagreen1" "slategray3"
## [9] "grey65" "darkgoldenrod4" "grey9" "khaki"
## [13] "gray31" "seagreen" "antiquewhite1" "red"
## [17] "gray59" "gray42" "orchid3" "mistyrose"
plot(
x = 1:10,
y = rep(5, 10),
pch = 19,
cex = 7,
col = sample(colors () , 10)
)
points(
x = 1:10 ,
y = rep (6 , 10),
pch = 19,
cex = 7,
col = sample(colors () , 10)
)
points(
x = 1:10 ,
y = rep (4 , 10),
pch = 19,
cex = 7,
col = sample(colors () , 10)
)
library(RColorBrewer)
display.brewer.all()
library(fishualize)
fishualize(n = 8, option = 'Hypsypops_rubicundus', end = 0.9)
fishualize(n = 8, option = 'Zanclus_cornutus', end = 0.9)
fish_pal(
alpha = 1,
begin = 0,
end = 1,
direction = 1,
option = "Centropyge_loricula"
)(9)
## [1] "#8F1D1EFF" "#B80029FF" "#EA3228FF" "#FF9A0FFF" "#FFC737FF" "#BA9E80FF"
## [7] "#3E52A1FF" "#001E7CFF" "#000000FF"
library("ggsci")
library("ggplot2")
library("gridExtra")
data("diamonds")
p1 <- ggplot(
subset(diamonds, carat >= 2.2),
aes(x = table, y = price, colour = cut)
) +
geom_point(alpha = 0.7) +
geom_smooth(method = "loess", alpha = 0.05, linewidth = 1, span = 1) +
theme_bw()
p2 <- ggplot(
subset(diamonds, carat > 2.2 & depth > 55 & depth < 70),
aes(x = depth, fill = cut)
) +
geom_histogram(colour = "black", binwidth = 1, position = "dodge") +
theme_bw()
p1_npg <- p1 + scale_color_npg()
p2_npg <- p2 + scale_fill_npg()
grid.arrange(p1_npg, p2_npg, ncol = 2)
## `geom_smooth()` using formula = 'y ~ x'
p1_lancet <- p1 + scale_color_lancet()
p2_lancet <- p2 + scale_fill_lancet()
grid.arrange(p1_lancet, p2_lancet, ncol = 2)
## `geom_smooth()` using formula = 'y ~ x'
split.screen(c(1,2))
## [1] 1 2
split.screen(c(2,1), screen=2)
## [1] 3 4
screen(1);plot(1)
screen(3); plot(1)
screen(4); plot(1)
close.screen(all = TRUE)
cowplot 包针对ggplot2的扩展包,用于改进图形的组合与布局,使多图拼接更简洁、美观。cowplot 适用于创建复杂多面板图形。
ggpubr 是一个基于 ggplot2的R包,科研作图更简单、美观、直接达到发表要求。它提供了很多快捷函数,如ggboxpolt, ggbarplot, ggdotplot,ggviolin, ggballoonplot 不用掌握 ggplot2 的复杂语法就能快速生成专业图表,特别适合做生物统计、生信、医学科研可视化。
library(ggplot2)
library(ggpubr)
library(cowplot)
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggpubr':
##
## get_legend
bxp <- ggboxplot(ToothGrowth, x = "dose", y = "len",
color = "dose", palette = "jco")
bxp
dp <- ggdotplot(ToothGrowth, x = "dose", y = "len",
color = "dose", palette = "jco", binwidth = 1)
dp
data("mtcars")
mtcars$name <- rownames(mtcars)
mtcars$cyl <- as.factor(mtcars$cyl)
bp <- ggbarplot(mtcars, x = "name", y = "mpg",
fill = "cyl",
color = "white",
palette = "jco",
sort.val = "asc",
sort.by.groups = TRUE,
x.text.angle = 90 )
bp
library("cowplot")
plot_grid(bxp, dp, bp + rremove("x.text"),
labels = c("A", "B", "C"),
ncol = 2, nrow = 2)
library(tidyverse) # for ggplot2
library(magrittr) # for pipes and %<>%
library(ggpubr) # for theme_pubr()
library(cowplot)
# 非洲八哥物种平均颜色变量、黑素体、栖息地和社会系统特征数据。
df <- read_csv("../dataset/traitdata.csv")
df %<>% mutate(melano = factor(melano), melano = forcats::fct_recode(melano, "derived" = "1", "rod-shaped" = "0"))
# create the plot
p1 <- df %>%
ggplot(aes(x = male.meanB, y = fema.meanB)) +
geom_point(aes(color = melano)) +
scale_color_manual(values = c("#BC3C29FF", "#0072B5FF")) +
labs(x = "male mean brightness", y = "female mean brightness", title = "relationship between male and female brightness") +
theme_pubr()
p1
p_xhist <- axis_canvas(p1, axis = "x") +
geom_histogram(data = df, aes(x = male.meanB, fill = melano)) + scale_fill_manual(values = c("#BC3C29FF", "#0072B5FF"))
p_y_box <- axis_canvas(p1, axis = "y") +
geom_boxplot(data = df, aes(x = 0, y = fema.meanB, fill = melano)) +
scale_fill_manual(values = c("#BC3C29FF", "#0072B5FF"))
p_y_density <- axis_canvas(p1, axis = "y", coord_flip = TRUE) + geom_density(data = df, aes(x = fema.meanB,fill = melano), color = NA, alpha = 0.5) +
scale_fill_manual(values = c("#BC3C29FF", "#0072B5FF")) + coord_flip()
combined_plot <- insert_xaxis_grob(p1 + theme(legend.position="top"), p_xhist, position = "bottom")
combined_plot %<>% insert_yaxis_grob(., p_y_box, position = "right")
combined_plot %<>% insert_yaxis_grob(., p_y_density, position = "right")
ggdraw(combined_plot)
par 设置图形参数,mfrow控制绘图的布局
mydata <- read.table("../dataset/svmvsrpart10times.csv",header=T)
mydata
## diags1 diags2 kappas1 kappas2 sensitivity1 sensitivity2
## 1 0.8148148 0.7870370 0.5749705 0.5083135 0.6153846 0.5641026
## 2 0.8148148 0.8240741 0.5847751 0.6032483 0.6666667 0.6666667
## 3 0.7592593 0.7314815 0.4840132 0.4212860 0.6923077 0.6410256
## 4 0.7962963 0.6574074 0.5379230 0.2776573 0.6153846 0.5897436
## 5 0.6944444 0.6666667 0.3109049 0.3157339 0.4871795 0.6666667
## 6 0.7500000 0.7685185 0.4361949 0.4955157 0.5641026 0.6666667
## 7 0.7870370 0.6851852 0.5306122 0.3100338 0.6666667 0.5384615
## 8 0.7500000 0.7314815 0.4551570 0.4212860 0.6410256 0.6410256
## 9 0.7962963 0.7500000 0.5379230 0.4295775 0.6153846 0.5384615
## 10 0.7685185 0.7500000 0.4779582 0.4551570 0.5897436 0.6410256
## specificity1 specificity2
## 1 0.9275362 0.9130435
## 2 0.8985507 0.9130435
## 3 0.7971014 0.7826087
## 4 0.8985507 0.6956522
## 5 0.8115942 0.6666667
## 6 0.8550725 0.8260870
## 7 0.8550725 0.7681159
## 8 0.8115942 0.7826087
## 9 0.8985507 0.8695652
## 10 0.8695652 0.8115942
plot(mydata$diags1,pch=19,xlab="",ylab="",ylim=c(0,1),cex=0.8,col="blue",xaxt="n",main="Accuracy")
axis(side=1,at=c(1:10),line=NA)
lines(mydata$diags1,col="blue")
points(mydata$diags2,col="red",pch=17,cex=0.8)
lines(mydata$diags2,col="red",lty=3)
legend("bottomright",c("svm","rpart"),col=c("blue","red"),pch=c(18,17),lty=c(1,3),cex=0.8)
par(mfrow=c(2,2))
plot(mydata$diags1,pch=19,xlab="",ylab="",ylim=c(0,1),cex=0.8,col="blue",xaxt="n",main="Accuracy")
axis(side=1,at=c(1:10),line=NA)
lines(mydata$diags1,col="blue")
points(mydata$diags2,col="red",pch=17,cex=0.8)
lines(mydata$diags2,col="red",lty=3)
legend("bottomright",c("svm","rpart"),col=c("blue","red"),pch=c(18,17),lty=c(1,3),cex=0.8)
plot(mydata$kappas1,pch=19,xlab="",ylab="",ylim=c(0,1),cex=0.8,col="blue",xaxt="n",main="Kappa")
axis(side=1,at=c(1:10),line=NA)
lines(mydata$kappas1,col="blue")
points(mydata$kappas2,col="red",pch=17,cex=0.8)
lines(mydata$kappas2,col="red",lty=3)
legend("bottomright",c("svm","rpart"),col=c("blue","red"),pch=c(18,17),lty=c(1,3),cex=0.8)
plot(mydata$sensitivity1,pch=19,xlab="",ylab="",ylim=c(0,1),cex=0.8,col="blue",xaxt="n",main="Sensitivity")
axis(side=1,at=c(1:10),line=NA)
lines(mydata$sensitivity1,col="blue")
points(mydata$sensitivity2,col="red",pch=17,cex=0.8)
lines(mydata$sensitivity2,col="red",lty=3)
legend("bottomright",c("svm","rpart"),col=c("blue","red"),pch=c(18,17),lty=c(1,3),cex=0.8)
plot(mydata$specificity1,pch=19,xlab="",ylab="",ylim=c(0,1),cex=0.8,col="blue",xaxt="n",main="Specificity")
axis(side=1,at=c(1:10),line=NA)
lines(mydata$specificity1,col="blue")
points(mydata$specificity2,col="red",pch=17,cex=0.8)
lines(mydata$specificity2,col="red",lty=3)
legend("bottomright",c("svm","rpart"),col=c("blue","red"),pch=c(18,17),lty=c(1,3),cex=0.8)
ggplot2
是 R
语言中最流行和强大的数据可视化包之一。它基于 Leland Wilkinson 于 1999
年提出的《The Grammar of Graphics》(图形语法)理论,并由 Hadley Wickham
于 2006 年在 R 中实现为 ggplot
包。ggplot2
的核心理念是允许用户通过添加或移除不同的图层来构建复杂的、高度定制化的图表。
ggplot2
的优势ggplot2
可以用于创建散点图、折线图、柱状图、箱线图、直方图、密度图等各种常见的图表类型,并且能够通过扩展包支持更多高级图表。ggplot2
生成的图表具有清晰、美观的特点,可以轻松地用于出版物、报告和演示文稿等场景。ggplot2
可以很好地与 R 语言中的其他数据处理包(如
dplyr
)配合使用,实现数据清洗、转换和可视化的流畅工作流程。ggplot2
的核心概念ggplot2
的核心是基于图形语法,将图表分解为不同的组成部分,用户可以通过组合这些部分来创建所需的图表:
geom_point
)、线
(geom_line
)、柱 (geom_bar
) 等。library(ggplot2)
library(palmerpenguins)
ggplot(data = penguins, mapping = aes(x = flipper_length_mm, y = body_mass_g, color = species)) + geom_point()
ggplot(data = penguins, aes(x = flipper_length_mm, y = body_mass_g, color = species, shape = species)) + geom_point()
ggplot(data = penguins, aes(x = flipper_length_mm, y = body_mass_g, color = species, shape = species, size = body_mass_g)) + geom_point(alpha=.6)
ggplot(data = penguins, mapping = aes(x = flipper_length_mm, y = body_mass_g)) +
geom_point(alpha = .6) + geom_smooth()
ggplot(data = penguins, mapping = aes(x = species, y = flipper_length_mm)) +
geom_violin()
ggplot(data = penguins) +
geom_point(mapping = aes(x = flipper_length_mm, y = body_mass_g, color = species)) +
labs(
title = "Palmer Penguins: 体重与鳍长的关系",
subtitle = "按物种分组的散点图",
caption = "数据由 Dr. Kristen Gorman 收集",
x = "Bill Length (mm)",# 设置 x 轴标签
y = "Bill Depth (mm)",# 设置 y 轴标签
color = "Species" # 设置图例的标题
) +
annotate(
"text",
x = 220,
y = 3500,
label = "Gentoo 企鹅最大",
fontface = "bold",
size = 4.5,
angle = 25
)
ggplot(data = penguins, mapping = aes(x = flipper_length_mm, y = body_mass_g, color = species)) + geom_point() + facet_wrap(~species)
ggplot(data = penguins, mapping = aes(x = species, y = body_mass_g, color = species)) + geom_boxplot() + facet_wrap(~island) + theme_bw() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
library(ggplot2)
library(RColorBrewer)
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
mydata <- read.table("../dataset/DatasaurusDozen.tsv", header=T, stringsAsFactors=F)
head(mydata)
## dataset x y
## 1 dino 55.3846 97.1795
## 2 dino 51.5385 96.0256
## 3 dino 46.1538 94.4872
## 4 dino 42.8205 91.4103
## 5 dino 40.7692 88.3333
## 6 dino 38.7179 84.8718
table(mydata$dataset)
##
## away bullseye circle dino dots h_lines high_lines
## 142 142 142 142 142 142 142
## slant_down slant_up star v_lines wide_lines x_shape
## 142 142 142 142 142 142
summary(subset(mydata, dataset=="away"))
## dataset x y
## Length:142 Min. :15.56 Min. : 0.01512
## Class :character 1st Qu.:39.72 1st Qu.:24.62589
## Mode :character Median :53.34 Median :47.53527
## Mean :54.27 Mean :47.83472
## 3rd Qu.:69.15 3rd Qu.:71.80315
## Max. :91.64 Max. :97.47577
summary(subset(mydata, dataset=="dino"))
## dataset x y
## Length:142 Min. :22.31 Min. : 2.949
## Class :character 1st Qu.:44.10 1st Qu.:25.288
## Mode :character Median :53.33 Median :46.026
## Mean :54.26 Mean :47.832
## 3rd Qu.:64.74 3rd Qu.:68.526
## Max. :98.21 Max. :99.487
describeBy(mydata, group="dataset")
##
## Descriptive statistics by group
## dataset: away
## vars n mean sd median trimmed mad min max range skew
## dataset* 1 142 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00 NaN
## x 2 142 54.27 16.77 53.34 54.36 21.22 15.56 91.64 76.08 -0.02
## y 3 142 47.83 26.94 47.54 48.23 35.89 0.02 97.48 97.46 -0.09
## kurtosis se
## dataset* NaN 0.00
## x -1.24 1.41
## y -1.28 2.26
## ------------------------------------------------------------
## dataset: bullseye
## vars n mean sd median trimmed mad min max range skew
## dataset* 1 142 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00 NaN
## x 2 142 54.27 16.77 53.84 53.86 17.49 19.29 91.74 72.45 0.17
## y 3 142 47.83 26.94 47.38 47.83 36.83 9.69 85.88 76.18 0.01
## kurtosis se
## dataset* NaN 0.00
## x -0.27 1.41
## y -1.45 2.26
## ------------------------------------------------------------
## dataset: circle
## vars n mean sd median trimmed mad min max range skew
## dataset* 1 142 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00 NaN
## x 2 142 54.27 16.76 54.02 54.08 16.29 21.86 85.66 63.80 0.03
## y 3 142 47.84 26.93 51.03 47.71 41.72 16.33 85.58 69.25 0.00
## kurtosis se
## dataset* NaN 0.00
## x -0.67 1.41
## y -1.80 2.26
## ------------------------------------------------------------
## dataset: dino
## vars n mean sd median trimmed mad min max range skew
## dataset* 1 142 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00 NaN
## x 2 142 54.26 16.77 53.33 53.69 15.97 22.31 98.21 75.90 0.28
## y 3 142 47.83 26.94 46.03 46.90 30.79 2.95 99.49 96.54 0.25
## kurtosis se
## dataset* NaN 0.00
## x -0.29 1.41
## y -1.06 2.26
## ------------------------------------------------------------
## dataset: dots
## vars n mean sd median trimmed mad min max range skew
## dataset* 1 142 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00 NaN
## x 2 142 54.26 16.77 50.98 54.88 2.76 25.44 77.95 52.51 -0.12
## y 3 142 47.84 26.93 51.30 46.88 46.99 15.77 94.25 78.48 0.15
## kurtosis se
## dataset* NaN 0.00
## x -0.87 1.41
## y -1.41 2.26
## ------------------------------------------------------------
## dataset: h_lines
## vars n mean sd median trimmed mad min max range skew
## dataset* 1 142 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00 NaN
## x 2 142 54.26 16.77 53.07 53.87 17.90 22.00 98.29 76.28 0.24
## y 3 142 47.83 26.94 50.47 47.23 29.62 10.46 90.46 80.00 0.15
## kurtosis se
## dataset* NaN 0.00
## x -0.35 1.41
## y -1.16 2.26
## ------------------------------------------------------------
## dataset: high_lines
## vars n mean sd median trimmed mad min max range skew
## dataset* 1 142 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00 NaN
## x 2 142 54.27 16.77 54.17 53.91 16.00 17.89 96.08 78.19 0.19
## y 3 142 47.84 26.94 32.50 47.15 22.77 14.91 87.15 72.24 0.20
## kurtosis se
## dataset* NaN 0.00
## x -0.28 1.41
## y -1.80 2.26
## ------------------------------------------------------------
## dataset: slant_down
## vars n mean sd median trimmed mad min max range skew
## dataset* 1 142 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00 NaN
## x 2 142 54.27 16.77 53.14 53.83 16.24 18.11 95.59 77.48 0.25
## y 3 142 47.84 26.94 46.40 47.31 28.65 0.30 99.64 99.34 0.19
## kurtosis se
## dataset* NaN 0.00
## x -0.26 1.41
## y -0.95 2.26
## ------------------------------------------------------------
## dataset: slant_up
## vars n mean sd median trimmed mad min max range skew
## dataset* 1 142 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00 NaN
## x 2 142 54.27 16.77 54.26 53.69 16.13 20.21 95.26 75.05 0.23
## y 3 142 47.83 26.94 45.29 46.83 33.80 5.65 99.58 93.93 0.26
## kurtosis se
## dataset* NaN 0.00
## x -0.38 1.41
## y -1.09 2.26
## ------------------------------------------------------------
## dataset: star
## vars n mean sd median trimmed mad min max range skew
## dataset* 1 142 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00 NaN
## x 2 142 54.27 16.77 56.53 54.33 22.16 27.02 86.44 59.41 0.08
## y 3 142 47.84 26.93 50.11 46.56 37.30 14.37 92.21 77.85 0.24
## kurtosis se
## dataset* NaN 0.00
## x -1.19 1.41
## y -1.36 2.26
## ------------------------------------------------------------
## dataset: v_lines
## vars n mean sd median trimmed mad min max range skew
## dataset* 1 142 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00 NaN
## x 2 142 54.27 16.77 50.36 53.57 28.37 30.45 89.50 59.06 0.26
## y 3 142 47.84 26.94 47.11 46.83 33.00 2.73 99.69 96.96 0.23
## kurtosis se
## dataset* NaN 0.00
## x -0.58 1.41
## y -1.05 2.26
## ------------------------------------------------------------
## dataset: wide_lines
## vars n mean sd median trimmed mad min max range skew
## dataset* 1 142 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00 NaN
## x 2 142 54.27 16.77 64.55 54.70 11.68 27.44 77.92 50.48 -0.32
## y 3 142 47.83 26.94 46.28 46.97 32.59 0.22 99.28 99.07 0.25
## kurtosis se
## dataset* NaN 0.00
## x -1.72 1.41
## y -1.09 2.26
## ------------------------------------------------------------
## dataset: x_shape
## vars n mean sd median trimmed mad min max range skew
## dataset* 1 142 1.00 0.00 1.00 1.00 0.00 1.00 1.00 0.00 NaN
## x 2 142 54.26 16.77 47.14 53.27 14.76 31.11 85.45 54.34 0.48
## y 3 142 47.84 26.93 39.88 46.98 33.05 4.58 97.84 93.26 0.23
## kurtosis se
## dataset* NaN 0.00
## x -1.37 1.41
## y -1.44 2.26
ggplot(mydata, aes(dataset,y, color=dataset))+geom_boxplot()
ggplot(mydata, aes(dataset,y, color=dataset))+geom_violin()
ggplot(mydata, aes(x,y, color=dataset))+geom_point()
p <- ggplot(mydata, aes(x,y, color=dataset))+geom_point()+facet_wrap(~dataset, nrow=3) + scale_color_manual(values=c(brewer.pal(12,"Paired"), "black")) +theme(legend.position="none")
print(p)