快速入门

当计算器

587*(23234+899)/9

## [1] 1574008

使用example快速学习

example(boxplot)

  example(boxplot)

  ## 
  ## boxplt> ## boxplot on a formula:
  ## boxplt> boxplot(count ~ spray, data = InsectSprays, col = "lightgray")

  ## 
  ## boxplt> # *add* notches (somewhat funny here <--> warning "notches .. outside hinges"):
  ## boxplt> boxplot(count ~ spray, data = InsectSprays,
  ## boxplt+         notch = TRUE, add = TRUE, col = "blue")

  ## Warning in (function (z, notch = FALSE, width = NULL, varwidth = FALSE, : some
  ## notches went outside hinges ('box'): maybe set notch=FALSE

  ## 
  ## boxplt> boxplot(decrease ~ treatment, data = OrchardSprays, col = "bisque",
  ## boxplt+         log = "y")

  ## 
  ## boxplt> ## horizontal=TRUE, switching  y <--> x :
  ## boxplt> boxplot(decrease ~ treatment, data = OrchardSprays, col = "bisque",
  ## boxplt+         log = "x", horizontal=TRUE)

  ## 
  ## boxplt> rb <- boxplot(decrease ~ treatment, data = OrchardSprays, col = "bisque")

  ## 
  ## boxplt> title("Comparing boxplot()s and non-robust mean +/- SD")
  ## 
  ## boxplt> mn.t <- tapply(OrchardSprays$decrease, OrchardSprays$treatment, mean)
  ## 
  ## boxplt> sd.t <- tapply(OrchardSprays$decrease, OrchardSprays$treatment, sd)
  ## 
  ## boxplt> xi <- 0.3 + seq(rb$n)
  ## 
  ## boxplt> points(xi, mn.t, col = "orange", pch = 18)
  ## 
  ## boxplt> arrows(xi, mn.t - sd.t, xi, mn.t + sd.t,
  ## boxplt+        code = 3, col = "pink", angle = 75, length = .1)
  ## 
  ## boxplt> ## boxplot on a matrix:
  ## boxplt> mat <- cbind(Uni05 = (1:100)/21, Norm = rnorm(100),
  ## boxplt+              `5T` = rt(100, df = 5), Gam2 = rgamma(100, shape = 2))
  ## 
  ## boxplt> boxplot(mat) # directly, calling boxplot.matrix()

  ## 
  ## boxplt> ## boxplot on a data frame:
  ## boxplt> df. <- as.data.frame(mat)
  ## 
  ## boxplt> par(las = 1) # all axis labels horizontal
  ## 
  ## boxplt> boxplot(df., main = "boxplot(*, horizontal = TRUE)", horizontal = TRUE)

  ## 
  ## boxplt> ## Using 'at = ' and adding boxplots -- example idea by Roger Bivand :
  ## boxplt> boxplot(len ~ dose, data = ToothGrowth,
  ## boxplt+         boxwex = 0.25, at = 1:3 - 0.2,
  ## boxplt+         subset = supp == "VC", col = "yellow",
  ## boxplt+         main = "Guinea Pigs' Tooth Growth",
  ## boxplt+         xlab = "Vitamin C dose mg",
  ## boxplt+         ylab = "tooth length",
  ## boxplt+         xlim = c(0.5, 3.5), ylim = c(0, 35), yaxs = "i")

  ## 
  ## boxplt> boxplot(len ~ dose, data = ToothGrowth, add = TRUE,
  ## boxplt+         boxwex = 0.25, at = 1:3 + 0.2,
  ## boxplt+         subset = supp == "OJ", col = "orange")
  ## 
  ## boxplt> legend(2, 9, c("Ascorbic acid", "Orange juice"),
  ## boxplt+        fill = c("yellow", "orange"))
  ## 
  ## boxplt> ## With less effort (slightly different) using factor *interaction*:
  ## boxplt> boxplot(len ~ dose:supp, data = ToothGrowth,
  ## boxplt+         boxwex = 0.5, col = c("orange", "yellow"),
  ## boxplt+         main = "Guinea Pigs' Tooth Growth",
  ## boxplt+         xlab = "Vitamin C dose mg", ylab = "tooth length",
  ## boxplt+         sep = ":", lex.order = TRUE, ylim = c(0, 35), yaxs = "i")

  ## 
  ## boxplt> ## more examples in  help(bxp)
  ## boxplt> 
  ## boxplt> 
  ## boxplt>

example(hist)

  example(hist)

  ## 
  ## hist> op <- par(mfrow = c(2, 2))
  ## 
  ## hist> hist(islands)

  ## 
  ## hist> utils::str(hist(islands, col = "gray", labels = TRUE))

  ## List of 6
  ##  $ breaks  : num [1:10] 0 2000 4000 6000 8000 10000 12000 14000 16000 18000
  ##  $ counts  : int [1:9] 41 2 1 1 1 1 0 0 1
  ##  $ density : num [1:9] 4.27e-04 2.08e-05 1.04e-05 1.04e-05 1.04e-05 ...
  ##  $ mids    : num [1:9] 1000 3000 5000 7000 9000 11000 13000 15000 17000
  ##  $ xname   : chr "islands"
  ##  $ equidist: logi TRUE
  ##  - attr(*, "class")= chr "histogram"
  ## 
  ## hist> hist(sqrt(islands), breaks = 12, col = "lightblue", border = "pink")

  ## 
  ## hist> ##-- For non-equidistant breaks, counts should NOT be graphed unscaled:
  ## hist> r <- hist(sqrt(islands), breaks = c(4*0:5, 10*3:5, 70, 100, 140),
  ## hist+           col = "blue1")

  ## 
  ## hist> text(r$mids, r$density, r$counts, adj = c(.5, -.5), col = "blue3")
  ## 
  ## hist> sapply(r[2:3], sum)
  ##    counts   density 
  ## 48.000000  0.215625 
  ## 
  ## hist> sum(r$density * diff(r$breaks)) # == 1
  ## [1] 1
  ## 
  ## hist> lines(r, lty = 3, border = "purple") # -> lines.histogram(*)
  ## 
  ## hist> par(op)
  ## 
  ## hist> require(utils) # for str
  ## 
  ## hist> str(hist(islands, breaks = 12, plot =  FALSE)) #-> 10 (~= 12) breaks
  ## List of 6
  ##  $ breaks  : num [1:10] 0 2000 4000 6000 8000 10000 12000 14000 16000 18000
  ##  $ counts  : int [1:9] 41 2 1 1 1 1 0 0 1
  ##  $ density : num [1:9] 4.27e-04 2.08e-05 1.04e-05 1.04e-05 1.04e-05 ...
  ##  $ mids    : num [1:9] 1000 3000 5000 7000 9000 11000 13000 15000 17000
  ##  $ xname   : chr "islands"
  ##  $ equidist: logi TRUE
  ##  - attr(*, "class")= chr "histogram"
  ## 
  ## hist> str(hist(islands, breaks = c(12,20,36,80,200,1000,17000), plot = FALSE))
  ## List of 6
  ##  $ breaks  : num [1:7] 12 20 36 80 200 1000 17000
  ##  $ counts  : int [1:6] 12 11 8 6 4 7
  ##  $ density : num [1:6] 0.03125 0.014323 0.003788 0.001042 0.000104 ...
  ##  $ mids    : num [1:6] 16 28 58 140 600 9000
  ##  $ xname   : chr "islands"
  ##  $ equidist: logi FALSE
  ##  - attr(*, "class")= chr "histogram"
  ## 
  ## hist> hist(islands, breaks = c(12,20,36,80,200,1000,17000), freq = TRUE,
  ## hist+      main = "WRONG histogram") # and warning

  ## Warning in plot.histogram(r, freq = freq1, col = col, border = border, angle =
  ## angle, : the AREAS in the plot are wrong -- rather use 'freq = FALSE'

  ## 
  ## hist> ## No test: ##D 
  ## hist> ##D ## Extreme outliers; the "FD" rule would take very large number of 'breaks':
  ## hist> ##D XXL <- c(1:9, c(-1,1)*1e300)
  ## hist> ##D hh <- hist(XXL, "FD") # did not work in R <= 3.4.1; now gives warning
  ## hist> ##D ## pretty() determines how many counts are used (platform dependently!):
  ## hist> ##D length(hh$breaks) ## typically 1 million -- though 1e6 was "a suggestion only"
  ## hist> ## End(No test)
  ## hist> require(stats)
  ## 
  ## hist> set.seed(14)
  ## 
  ## hist> x <- rchisq(100, df = 4)
  ## 
  ## hist> ## Don't show: 
  ## hist> op <- par(mfrow = 2:1, mgp = c(1.5, 0.6, 0), mar = .1 + c(3,3:1))
  ## 
  ## hist> ## End(Don't show)
  ## hist> ## Comparing data with a model distribution should be done with qqplot()!
  ## hist> qqplot(x, qchisq(ppoints(x), df = 4)); abline(0, 1, col = 2, lty = 2)

  ## 
  ## hist> ## if you really insist on using hist() ... :
  ## hist> hist(x, freq = FALSE, ylim = c(0, 0.2))

  ## 
  ## hist> curve(dchisq(x, df = 4), col = 2, lty = 2, lwd = 2, add = TRUE)
  ## 
  ## hist> ## Don't show: 
  ## hist> par(op)
  ## 
  ## hist> ## End(Don't show)
  ## hist> 
  ## hist> 
  ## hist>

example(heatmap)

  example(heatmap)

  ## 
  ## heatmp> require(graphics); require(grDevices)
  ## 
  ## heatmp> x  <- as.matrix(mtcars)
  ## 
  ## heatmp> rc <- rainbow(nrow(x), start = 0, end = .3)
  ## 
  ## heatmp> cc <- rainbow(ncol(x), start = 0, end = .3)
  ## 
  ## heatmp> hv <- heatmap(x, col = cm.colors(256), scale = "column",
  ## heatmp+               RowSideColors = rc, ColSideColors = cc, margins = c(5,10),
  ## heatmp+               xlab = "specification variables", ylab =  "Car Models",
  ## heatmp+               main = "heatmap(<Mtcars data>, ..., scale = \"column\")")

  ## 
  ## heatmp> utils::str(hv) # the two re-ordering index vectors
  ## List of 4
  ##  $ rowInd: int [1:32] 31 17 16 15 5 25 29 24 7 6 ...
  ##  $ colInd: int [1:11] 2 9 8 11 6 5 10 7 1 4 ...
  ##  $ Rowv  : NULL
  ##  $ Colv  : NULL
  ## 
  ## heatmp> ## no column dendrogram (nor reordering) at all:
  ## heatmp> heatmap(x, Colv = NA, col = cm.colors(256), scale = "column",
  ## heatmp+         RowSideColors = rc, margins = c(5,10),
  ## heatmp+         xlab = "specification variables", ylab =  "Car Models",
  ## heatmp+         main = "heatmap(<Mtcars data>, ..., scale = \"column\")")

  ## 
  ## heatmp> ## Don't show: 
  ## heatmp> ## no row dendrogram (nor reordering) at all:
  ## heatmp> heatmap(x, Rowv = NA, col = cm.colors(256), scale = "column",
  ## heatmp+         ColSideColors = cc, margins = c(5,10),
  ## heatmp+         xlab = "xlab", ylab =  "ylab")  # no main

  ## 
  ## heatmp> ## End(Don't show)
  ## heatmp> ## "no nothing"
  ## heatmp> heatmap(x, Rowv = NA, Colv = NA, scale = "column",
  ## heatmp+         main = "heatmap(*, NA, NA) ~= image(t(x))")

  ## 
  ## heatmp> round(Ca <- cor(attitude), 2)
  ##            rating complaints privileges learning raises critical advance
  ## rating       1.00       0.83       0.43     0.62   0.59     0.16    0.16
  ## complaints   0.83       1.00       0.56     0.60   0.67     0.19    0.22
  ## privileges   0.43       0.56       1.00     0.49   0.45     0.15    0.34
  ## learning     0.62       0.60       0.49     1.00   0.64     0.12    0.53
  ## raises       0.59       0.67       0.45     0.64   1.00     0.38    0.57
  ## critical     0.16       0.19       0.15     0.12   0.38     1.00    0.28
  ## advance      0.16       0.22       0.34     0.53   0.57     0.28    1.00
  ## 
  ## heatmp> symnum(Ca) # simple graphic
  ##            rt cm p l rs cr a
  ## rating     1                
  ## complaints +  1             
  ## privileges .  .  1          
  ## learning   ,  .  . 1        
  ## raises     .  ,  . , 1      
  ## critical             .  1   
  ## advance          . . .     1
  ## attr(,"legend")
  ## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1
  ## 
  ## heatmp> heatmap(Ca,               symm = TRUE, margins = c(6,6)) # with reorder()

  ## 
  ## heatmp> heatmap(Ca, Rowv = FALSE, symm = TRUE, margins = c(6,6)) # _NO_ reorder()

  ## 
  ## heatmp> ## slightly artificial with color bar, without and with ordering:
  ## heatmp> cc <- rainbow(nrow(Ca))
  ## 
  ## heatmp> heatmap(Ca, Rowv = FALSE, symm = TRUE, RowSideColors = cc, ColSideColors = cc,
  ## heatmp+    margins = c(6,6))

  ## 
  ## heatmp> heatmap(Ca,        symm = TRUE, RowSideColors = cc, ColSideColors = cc,
  ## heatmp+    margins = c(6,6))

  ## 
  ## heatmp> ## For variable clustering, rather use distance based on cor():
  ## heatmp> symnum( cU <- cor(USJudgeRatings) )
  ##      CO I DM DI CF DE PR F O W PH R
  ## CONT 1                             
  ## INTG    1                          
  ## DMNR    B 1                        
  ## DILG    + +  1                     
  ## CFMG    + +  B  1                  
  ## DECI    + +  B  B  1               
  ## PREP    + +  B  B  B  1            
  ## FAMI    + +  B  *  *  B  1         
  ## ORAL    * *  B  B  *  B  B 1       
  ## WRIT    * +  B  *  *  B  B B 1     
  ## PHYS    , ,  +  +  +  +  + + + 1   
  ## RTEN    * *  *  *  *  B  * B B *  1
  ## attr(,"legend")
  ## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1
  ## 
  ## heatmp> hU <- heatmap(cU, Rowv = FALSE, symm = TRUE, col = topo.colors(16),
  ## heatmp+              distfun = function(c) as.dist(1 - c), keep.dendro = TRUE)

  ## 
  ## heatmp> ## The Correlation matrix with same reordering:
  ## heatmp> round(100 * cU[hU[[1]], hU[[2]]])
  ##      CONT INTG DMNR PHYS DILG CFMG DECI RTEN ORAL WRIT PREP FAMI
  ## CONT  100  -13  -15    5    1   14    9   -3   -1   -4    1   -3
  ## INTG  -13  100   96   74   87   81   80   94   91   91   88   87
  ## DMNR  -15   96  100   79   84   81   80   94   91   89   86   84
  ## PHYS    5   74   79  100   81   88   87   91   89   86   85   84
  ## DILG    1   87   84   81  100   96   96   93   95   96   98   96
  ## CFMG   14   81   81   88   96  100   98   93   95   94   96   94
  ## DECI    9   80   80   87   96   98  100   92   95   95   96   94
  ## RTEN   -3   94   94   91   93   93   92  100   98   97   95   94
  ## ORAL   -1   91   91   89   95   95   95   98  100   99   98   98
  ## WRIT   -4   91   89   86   96   94   95   97   99  100   99   99
  ## PREP    1   88   86   85   98   96   96   95   98   99  100   99
  ## FAMI   -3   87   84   84   96   94   94   94   98   99   99  100
  ## 
  ## heatmp> ## The column dendrogram:
  ## heatmp> utils::str(hU$Colv)
  ## --[dendrogram w/ 2 branches and 12 members at h = 1.15]
  ##   |--leaf "CONT" 
  ##   `--[dendrogram w/ 2 branches and 11 members at h = 0.258]
  ##      |--[dendrogram w/ 2 branches and 2 members at h = 0.0354]
  ##      |  |--leaf "INTG" 
  ##      |  `--leaf "DMNR" 
  ##      `--[dendrogram w/ 2 branches and 9 members at h = 0.187]
  ##         |--leaf "PHYS" 
  ##         `--[dendrogram w/ 2 branches and 8 members at h = 0.075]
  ##            |--[dendrogram w/ 2 branches and 3 members at h = 0.0438]
  ##            |  |--leaf "DILG" 
  ##            |  `--[dendrogram w/ 2 branches and 2 members at h = 0.0189]
  ##            |     |--leaf "CFMG" 
  ##            |     `--leaf "DECI" 
  ##            `--[dendrogram w/ 2 branches and 5 members at h = 0.0584]
  ##               |--leaf "RTEN" 
  ##               `--[dendrogram w/ 2 branches and 4 members at h = 0.0187]
  ##                  |--[dendrogram w/ 2 branches and 2 members at h = 0.00657]
  ##                  |  |--leaf "ORAL" 
  ##                  |  `--leaf "WRIT" 
  ##                  `--[dendrogram w/ 2 branches and 2 members at h = 0.0101]
  ##                     |--leaf "PREP" 
  ##                     `--leaf "FAMI"

使用？，使用help.start()查看帮助文档

# ?boxplot 
# ?t.test 将详细的帮助文档参数设置等打印到屏幕
t.test(rnorm(n=10,mean=10,sd=1), rnorm(n=10,mean=0,sd=1))

## 
##  Welch Two Sample t-test
## 
## data:  rnorm(n = 10, mean = 10, sd = 1) and rnorm(n = 10, mean = 0, sd = 1)
## t = 21.664, df = 17.808, p-value = 3.035e-14
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   9.29991 11.29905
## sample estimates:
##  mean of x  mean of y 
## 10.0456926 -0.2537863

# help.start() 自动打开Chrome等浏览器，可以按照包列表，关键词等浏览搜索帮助文档

R 读取数据

mydata <-
  read.table("../dataset/Planning.csv", header = T) ###指定文件的相对或绝对路径
head(mydata)　###显示数据的前几行

##   id age relig ped income am reason bps bpd   wt  ht
## 1  1  37     1   2      2 20      1 120  80 57.0 153
## 2  2  32     1   5      4 19      1 120  80 40.7 141
## 3  3  24     1   7      3 23      2 110  70 55.1 156
## 4  4  33     1   2      5 23      1 120  80 43.0 154
## 5  5  31     1   2      2 22      3 130  70 58.5 154
## 6  6  30     1   2      3 17      1 120  90 40.0 154

summary(mydata)　###显示数据的摘要信息

##        id              age            relig            ped       
##  Min.   :  1.00   Min.   :18.00   Min.   :1.000   Min.   :0.000  
##  1st Qu.: 63.25   1st Qu.:24.00   1st Qu.:1.000   1st Qu.:2.000  
##  Median :125.50   Median :27.00   Median :1.000   Median :2.000  
##  Mean   :125.64   Mean   :27.43   Mean   :1.108   Mean   :3.283  
##  3rd Qu.:187.75   3rd Qu.:30.00   3rd Qu.:1.000   3rd Qu.:5.000  
##  Max.   :251.00   Max.   :41.00   Max.   :2.000   Max.   :7.000  
##                                   NA's   :1       NA's   :24     
##      income            am            reason           bps           bpd        
##  Min.   :1.000   Min.   :15.00   Min.   :1.000   Min.   :  0   Min.   :  0.00  
##  1st Qu.:1.000   1st Qu.:18.00   1st Qu.:1.000   1st Qu.:110   1st Qu.: 60.00  
##  Median :2.000   Median :20.00   Median :1.000   Median :110   Median : 70.00  
##  Mean   :2.129   Mean   :20.35   Mean   :1.484   Mean   :113   Mean   : 71.69  
##  3rd Qu.:3.000   3rd Qu.:22.00   3rd Qu.:2.000   3rd Qu.:120   3rd Qu.: 80.00  
##  Max.   :6.000   Max.   :31.00   Max.   :3.000   Max.   :170   Max.   :110.00  
##  NA's   :26      NA's   :1       NA's   :2       NA's   :7     NA's   :7       
##        wt              ht       
##  Min.   : 0.00   Min.   :141.0  
##  1st Qu.:46.50   1st Qu.:150.0  
##  Median :51.40   Median :153.0  
##  Mean   :51.89   Mean   :153.5  
##  3rd Qu.:57.00   3rd Qu.:156.0  
##  Max.   :73.80   Max.   :175.0  
##  NA's   :5       NA's   :7

boxplot(mydata$bps ~ mydata$income, col = "tomato")　###绘制箱线图

mydata.omit <- na.omit(mydata)　###删除缺失值
dim(mydata.omit)　###显示数据的维度

## [1] 210  11

dim(mydata)　###显示数据的维度

## [1] 250  11

高级绘图命令，产生一个图

低级绘图命令增加点，线，文本，图例等

plot(mydata.omit$ht, mydata.omit$wt)　###绘制散点图

plot(
  mydata.omit$ht,
  mydata.omit$wt,
  pch = 20,
  col = "rosybrown",
  xlab = "Height",
  ylab = "Weight"
)

lm_line <- lm(mydata.omit$wt ~ mydata.omit$ht)
# 增加线
abline(lm_line)
# 增加文本 
text(158, 65, "Weight=0.53*Height-29.36")
# 增加图例
legend("topright", "A legend", col = "rosybrown", pch = 20)

坐标轴的控制

cars  <-  c(1, 3, 6, 4, 9)
plot(
  cars,
  type = "o",
  col = "blue",
  pch = 19,
  ylim = c(0, 10),
  xaxt = "n",
  ann = FALSE
) # axes＝F不绘制坐标轴，xaxt=''n"不绘制横轴;  ann=F不标注坐标轴的注释（标题）
axis(1,
     at = 1:5,
     lab = c("Mon", "Tue", "Wed", "Thu", "Fri"))
box()

R的符号形状

plot(
  rep(1:5, 5),
  rep(1:5, each = 5),
  pch = 1:25,
  cex = 2,
  col = "blue",
  bg = "yellow"
)

R的颜色

set.seed(1234)
sample(colors(),20)

##  [1] "grey23"         "darksalmon"     "tan3"           "violetred4"    
##  [5] "lightblue1"     "darkorchid3"    "darkseagreen1"  "slategray3"    
##  [9] "grey65"         "darkgoldenrod4" "grey9"          "khaki"         
## [13] "gray31"         "seagreen"       "antiquewhite1"  "red"           
## [17] "gray59"         "gray42"         "orchid3"        "mistyrose"

plot(
  x = 1:10,
  y = rep(5, 10),
  pch = 19,
  cex = 7,
  col = sample(colors () , 10)
)
points(
  x = 1:10 ,
  y = rep (6 , 10),
  pch = 19,
  cex = 7,
  col = sample(colors () , 10)
)
points(
  x = 1:10 ,
  y = rep (4 , 10),
  pch = 19,
  cex = 7,
  col = sample(colors () , 10)
)

library(RColorBrewer)
display.brewer.all()

library(fishualize)
fishualize(n = 8, option = 'Hypsypops_rubicundus', end = 0.9)

fishualize(n = 8, option = 'Zanclus_cornutus', end = 0.9)

fish_pal(
       alpha = 1,
       begin = 0,
       end = 1,
       direction = 1,
       option = "Centropyge_loricula"
     )(9)

## [1] "#8F1D1EFF" "#B80029FF" "#EA3228FF" "#FF9A0FFF" "#FFC737FF" "#BA9E80FF"
## [7] "#3E52A1FF" "#001E7CFF" "#000000FF"

ggsci Nature Publication Group & Lacent

library("ggsci")
library("ggplot2")
library("gridExtra")

data("diamonds")

p1 <- ggplot(
  subset(diamonds, carat >= 2.2),
  aes(x = table, y = price, colour = cut)
) +
  geom_point(alpha = 0.7) +
  geom_smooth(method = "loess", alpha = 0.05, linewidth = 1, span = 1) +
  theme_bw()

p2 <- ggplot(
  subset(diamonds, carat > 2.2 & depth > 55 & depth < 70),
  aes(x = depth, fill = cut)
) +
  geom_histogram(colour = "black", binwidth = 1, position = "dodge") +
  theme_bw()
p1_npg <- p1 + scale_color_npg()
p2_npg <- p2 + scale_fill_npg()
grid.arrange(p1_npg, p2_npg, ncol = 2)

## `geom_smooth()` using formula = 'y ~ x'

p1_lancet <- p1 + scale_color_lancet()
p2_lancet <- p2 + scale_fill_lancet()
grid.arrange(p1_lancet, p2_lancet, ncol = 2)

## `geom_smooth()` using formula = 'y ~ x'

多图（使用split.screen实现）

split.screen(c(1,2))

## [1] 1 2

split.screen(c(2,1), screen=2)

## [1] 3 4

screen(1);plot(1)
screen(3); plot(1)
screen(4); plot(1)

close.screen(all = TRUE)

使用cowplot包实现多绘图区域的对齐

http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/81-ggplot2-easy-way-to-mix-multiple-graphs-on-the-same-page/#use-cowplot-r-package

cowplot 包针对ggplot2的扩展包，用于改进图形的组合与布局，使多图拼接更简洁、美观。cowplot 适用于创建复杂多面板图形。

ggpubr 是一个基于 ggplot2的R包，科研作图更简单、美观、直接达到发表要求。它提供了很多快捷函数，如ggboxpolt, ggbarplot, ggdotplot，ggviolin, ggballoonplot 不用掌握 ggplot2 的复杂语法就能快速生成专业图表，特别适合做生物统计、生信、医学科研可视化。

library(ggplot2)
library(ggpubr)
library(cowplot)

## 
## Attaching package: 'cowplot'

## The following object is masked from 'package:ggpubr':
## 
##     get_legend

bxp <- ggboxplot(ToothGrowth, x = "dose", y = "len",
                 color = "dose", palette = "jco")
bxp

dp <- ggdotplot(ToothGrowth, x = "dose", y = "len",
                 color = "dose", palette = "jco", binwidth = 1)
dp

data("mtcars")
mtcars$name <- rownames(mtcars)
mtcars$cyl <- as.factor(mtcars$cyl)
bp <- ggbarplot(mtcars, x = "name", y = "mpg",
          fill = "cyl", 
          color = "white", 
          palette = "jco", 
          sort.val = "asc", 
          sort.by.groups = TRUE,
          x.text.angle = 90 )
bp

library("cowplot")
plot_grid(bxp, dp, bp + rremove("x.text"), 
          labels = c("A", "B", "C"),
          ncol = 2, nrow = 2)

Marginal plots（边际图）with cowplot

library(tidyverse) # for ggplot2
library(magrittr) # for pipes and %<>%
library(ggpubr) # for theme_pubr()
library(cowplot)
# 非洲八哥物种平均颜色变量、黑素体、栖息地和社会系统特征数据。
df <- read_csv("../dataset/traitdata.csv")
df %<>% mutate(melano = factor(melano), melano = forcats::fct_recode(melano, "derived" = "1", "rod-shaped" = "0"))

# create the plot
p1 <- df %>% 
  ggplot(aes(x = male.meanB, y = fema.meanB)) + 
  geom_point(aes(color = melano)) +
  scale_color_manual(values = c("#BC3C29FF", "#0072B5FF")) +
  labs(x = "male mean brightness", y = "female mean brightness", title = "relationship between male and female brightness") +
  theme_pubr()

p1

p_xhist <- axis_canvas(p1, axis = "x") +
  geom_histogram(data = df, aes(x = male.meanB, fill = melano)) + scale_fill_manual(values = c("#BC3C29FF", "#0072B5FF"))

p_y_box <- axis_canvas(p1, axis = "y") +
  geom_boxplot(data = df, aes(x = 0, y = fema.meanB, fill = melano)) +
  scale_fill_manual(values = c("#BC3C29FF", "#0072B5FF"))
p_y_density <- axis_canvas(p1, axis = "y", coord_flip = TRUE) +  geom_density(data = df, aes(x = fema.meanB,fill = melano), color = NA, alpha = 0.5) +
  scale_fill_manual(values = c("#BC3C29FF", "#0072B5FF")) +  coord_flip()

combined_plot <- insert_xaxis_grob(p1 + theme(legend.position="top"), p_xhist, position = "bottom")
combined_plot %<>% insert_yaxis_grob(., p_y_box, position = "right")
combined_plot %<>% insert_yaxis_grob(., p_y_density, position = "right")

ggdraw(combined_plot)

使用par(mfrow=c(2,2))实现四图并列

par 设置图形参数，mfrow控制绘图的布局

mydata <- read.table("../dataset/svmvsrpart10times.csv",header=T)
mydata

##       diags1    diags2   kappas1   kappas2 sensitivity1 sensitivity2
## 1  0.8148148 0.7870370 0.5749705 0.5083135    0.6153846    0.5641026
## 2  0.8148148 0.8240741 0.5847751 0.6032483    0.6666667    0.6666667
## 3  0.7592593 0.7314815 0.4840132 0.4212860    0.6923077    0.6410256
## 4  0.7962963 0.6574074 0.5379230 0.2776573    0.6153846    0.5897436
## 5  0.6944444 0.6666667 0.3109049 0.3157339    0.4871795    0.6666667
## 6  0.7500000 0.7685185 0.4361949 0.4955157    0.5641026    0.6666667
## 7  0.7870370 0.6851852 0.5306122 0.3100338    0.6666667    0.5384615
## 8  0.7500000 0.7314815 0.4551570 0.4212860    0.6410256    0.6410256
## 9  0.7962963 0.7500000 0.5379230 0.4295775    0.6153846    0.5384615
## 10 0.7685185 0.7500000 0.4779582 0.4551570    0.5897436    0.6410256
##    specificity1 specificity2
## 1     0.9275362    0.9130435
## 2     0.8985507    0.9130435
## 3     0.7971014    0.7826087
## 4     0.8985507    0.6956522
## 5     0.8115942    0.6666667
## 6     0.8550725    0.8260870
## 7     0.8550725    0.7681159
## 8     0.8115942    0.7826087
## 9     0.8985507    0.8695652
## 10    0.8695652    0.8115942

plot(mydata$diags1,pch=19,xlab="",ylab="",ylim=c(0,1),cex=0.8,col="blue",xaxt="n",main="Accuracy")
axis(side=1,at=c(1:10),line=NA)
lines(mydata$diags1,col="blue")
points(mydata$diags2,col="red",pch=17,cex=0.8)
lines(mydata$diags2,col="red",lty=3)
legend("bottomright",c("svm","rpart"),col=c("blue","red"),pch=c(18,17),lty=c(1,3),cex=0.8)

par(mfrow=c(2,2))
plot(mydata$diags1,pch=19,xlab="",ylab="",ylim=c(0,1),cex=0.8,col="blue",xaxt="n",main="Accuracy")
axis(side=1,at=c(1:10),line=NA)
lines(mydata$diags1,col="blue")
points(mydata$diags2,col="red",pch=17,cex=0.8)
lines(mydata$diags2,col="red",lty=3)
legend("bottomright",c("svm","rpart"),col=c("blue","red"),pch=c(18,17),lty=c(1,3),cex=0.8)

plot(mydata$kappas1,pch=19,xlab="",ylab="",ylim=c(0,1),cex=0.8,col="blue",xaxt="n",main="Kappa")
axis(side=1,at=c(1:10),line=NA)
lines(mydata$kappas1,col="blue")
points(mydata$kappas2,col="red",pch=17,cex=0.8)
lines(mydata$kappas2,col="red",lty=3)
legend("bottomright",c("svm","rpart"),col=c("blue","red"),pch=c(18,17),lty=c(1,3),cex=0.8)

plot(mydata$sensitivity1,pch=19,xlab="",ylab="",ylim=c(0,1),cex=0.8,col="blue",xaxt="n",main="Sensitivity")
axis(side=1,at=c(1:10),line=NA)
lines(mydata$sensitivity1,col="blue")
points(mydata$sensitivity2,col="red",pch=17,cex=0.8)
lines(mydata$sensitivity2,col="red",lty=3)
legend("bottomright",c("svm","rpart"),col=c("blue","red"),pch=c(18,17),lty=c(1,3),cex=0.8)

plot(mydata$specificity1,pch=19,xlab="",ylab="",ylim=c(0,1),cex=0.8,col="blue",xaxt="n",main="Specificity")
axis(side=1,at=c(1:10),line=NA)
lines(mydata$specificity1,col="blue")
points(mydata$specificity2,col="red",pch=17,cex=0.8)
lines(mydata$specificity2,col="red",lty=3)
legend("bottomright",c("svm","rpart"),col=c("blue","red"),pch=c(18,17),lty=c(1,3),cex=0.8)

ggplot2 入门, 强大而灵活的数据可视化R包

ggplot2 是 R 语言中最流行和强大的数据可视化包之一。它基于 Leland Wilkinson 于 1999 年提出的《The Grammar of Graphics》（图形语法）理论，并由 Hadley Wickham 于 2006 年在 R 中实现为 ggplot 包。ggplot2 的核心理念是允许用户通过添加或移除不同的图层来构建复杂的、高度定制化的图表。

`ggplot2` 的优势

创建各种类型的图表： ggplot2 可以用于创建散点图、折线图、柱状图、箱线图、直方图、密度图等各种常见的图表类型，并且能够通过扩展包支持更多高级图表。
高度自定义的视觉效果： 用户可以精细地控制图表的每一个视觉属性，例如颜色、形状、大小、标签、标题、图例、坐标轴刻度等等，从而创建符合特定需求的专业级图表。
生成高质量的视觉效果： ggplot2 生成的图表具有清晰、美观的特点，可以轻松地用于出版物、报告和演示文稿等场景。
数据操作与可视化无缝结合： ggplot2 可以很好地与 R 语言中的其他数据处理包（如 dplyr）配合使用，实现数据清洗、转换和可视化的流畅工作流程。

`ggplot2` 的核心概念

ggplot2 的核心是基于图形语法，将图表分解为不同的组成部分，用户可以通过组合这些部分来创建所需的图表：

美学 (Aesthetics)： 图表中对象的视觉属性，例如位置（x 和 y 轴）、颜色、形状、大小、透明度等。
几何对象 (Geoms)： 用于在图表中实际呈现数据的几何形状，例如点 (geom_point)、线 (geom_line)、柱 (geom_bar) 等。
分面 (Facets)： 一种将数据划分为多个子集并在不同的小图中显示的方法，用于比较不同组的数据。
标签和注释 (Labels and annotations)： 用于自定义图表的标题、副标题、轴标签、图例标题以及添加额外的文本或图形注释。

美学参数

x
y
颜色（Color）
形状（Shape）
大小（Size）
透明度（Alpha）

library(ggplot2)
library(palmerpenguins)
ggplot(data = penguins, mapping = aes(x = flipper_length_mm, y = body_mass_g, color = species)) + geom_point()

ggplot(data = penguins, aes(x = flipper_length_mm, y = body_mass_g, color = species, shape = species)) + geom_point()

ggplot(data = penguins, aes(x = flipper_length_mm, y = body_mass_g, color = species, shape = species, size = body_mass_g)) + geom_point(alpha=.6)

几何对象（Geom functions）

geom_point
geom_bar
geom_line
geom_smooth
geom_boxplot
geom_violin
geom_histogram
geom_density
geom_text

ggplot(data = penguins, mapping = aes(x = flipper_length_mm, y = body_mass_g)) + 
  geom_point(alpha = .6) + geom_smooth()

ggplot(data = penguins, mapping = aes(x = species, y = flipper_length_mm)) + 
  geom_violin()

注释（Annotate）

ggplot(data = penguins) +
  geom_point(mapping = aes(x = flipper_length_mm, y = body_mass_g, color = species)) +
  labs(
    title = "Palmer Penguins: 体重与鳍长的关系",
    subtitle = "按物种分组的散点图",
    caption = "数据由 Dr. Kristen Gorman 收集",
    x = "Bill Length (mm)",# 设置 x 轴标签
    y = "Bill Depth (mm)",# 设置 y 轴标签
    color = "Species" # 设置图例的标题
  ) +  
  annotate(
    "text",
    x = 220,
    y = 3500,
    label = "Gentoo 企鹅最大",
    fontface = "bold",
    size = 4.5,
    angle = 25
  )

绘图区域分面（Facets）

ggplot(data = penguins, mapping = aes(x = flipper_length_mm, y = body_mass_g, color = species)) + geom_point() + facet_wrap(~species)

ggplot(data = penguins, mapping = aes(x = species, y = body_mass_g, color = species)) + geom_boxplot() + facet_wrap(~island) + theme_bw() + theme(axis.text.x = element_text(angle = 90, hjust = 1))

library(ggplot2)
library(RColorBrewer)
library(psych)

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

mydata <- read.table("../dataset/DatasaurusDozen.tsv", header=T, stringsAsFactors=F)
head(mydata)

##   dataset       x       y
## 1    dino 55.3846 97.1795
## 2    dino 51.5385 96.0256
## 3    dino 46.1538 94.4872
## 4    dino 42.8205 91.4103
## 5    dino 40.7692 88.3333
## 6    dino 38.7179 84.8718

table(mydata$dataset)

## 
##       away   bullseye     circle       dino       dots    h_lines high_lines 
##        142        142        142        142        142        142        142 
## slant_down   slant_up       star    v_lines wide_lines    x_shape 
##        142        142        142        142        142        142

summary(subset(mydata, dataset=="away"))

##    dataset                x               y           
##  Length:142         Min.   :15.56   Min.   : 0.01512  
##  Class :character   1st Qu.:39.72   1st Qu.:24.62589  
##  Mode  :character   Median :53.34   Median :47.53527  
##                     Mean   :54.27   Mean   :47.83472  
##                     3rd Qu.:69.15   3rd Qu.:71.80315  
##                     Max.   :91.64   Max.   :97.47577

summary(subset(mydata, dataset=="dino"))

##    dataset                x               y         
##  Length:142         Min.   :22.31   Min.   : 2.949  
##  Class :character   1st Qu.:44.10   1st Qu.:25.288  
##  Mode  :character   Median :53.33   Median :46.026  
##                     Mean   :54.26   Mean   :47.832  
##                     3rd Qu.:64.74   3rd Qu.:68.526  
##                     Max.   :98.21   Max.   :99.487

describeBy(mydata, group="dataset")

## 
##  Descriptive statistics by group 
## dataset: away
##          vars   n  mean    sd median trimmed   mad   min   max range  skew
## dataset*    1 142  1.00  0.00   1.00    1.00  0.00  1.00  1.00  0.00   NaN
## x           2 142 54.27 16.77  53.34   54.36 21.22 15.56 91.64 76.08 -0.02
## y           3 142 47.83 26.94  47.54   48.23 35.89  0.02 97.48 97.46 -0.09
##          kurtosis   se
## dataset*      NaN 0.00
## x           -1.24 1.41
## y           -1.28 2.26
## ------------------------------------------------------------ 
## dataset: bullseye
##          vars   n  mean    sd median trimmed   mad   min   max range skew
## dataset*    1 142  1.00  0.00   1.00    1.00  0.00  1.00  1.00  0.00  NaN
## x           2 142 54.27 16.77  53.84   53.86 17.49 19.29 91.74 72.45 0.17
## y           3 142 47.83 26.94  47.38   47.83 36.83  9.69 85.88 76.18 0.01
##          kurtosis   se
## dataset*      NaN 0.00
## x           -0.27 1.41
## y           -1.45 2.26
## ------------------------------------------------------------ 
## dataset: circle
##          vars   n  mean    sd median trimmed   mad   min   max range skew
## dataset*    1 142  1.00  0.00   1.00    1.00  0.00  1.00  1.00  0.00  NaN
## x           2 142 54.27 16.76  54.02   54.08 16.29 21.86 85.66 63.80 0.03
## y           3 142 47.84 26.93  51.03   47.71 41.72 16.33 85.58 69.25 0.00
##          kurtosis   se
## dataset*      NaN 0.00
## x           -0.67 1.41
## y           -1.80 2.26
## ------------------------------------------------------------ 
## dataset: dino
##          vars   n  mean    sd median trimmed   mad   min   max range skew
## dataset*    1 142  1.00  0.00   1.00    1.00  0.00  1.00  1.00  0.00  NaN
## x           2 142 54.26 16.77  53.33   53.69 15.97 22.31 98.21 75.90 0.28
## y           3 142 47.83 26.94  46.03   46.90 30.79  2.95 99.49 96.54 0.25
##          kurtosis   se
## dataset*      NaN 0.00
## x           -0.29 1.41
## y           -1.06 2.26
## ------------------------------------------------------------ 
## dataset: dots
##          vars   n  mean    sd median trimmed   mad   min   max range  skew
## dataset*    1 142  1.00  0.00   1.00    1.00  0.00  1.00  1.00  0.00   NaN
## x           2 142 54.26 16.77  50.98   54.88  2.76 25.44 77.95 52.51 -0.12
## y           3 142 47.84 26.93  51.30   46.88 46.99 15.77 94.25 78.48  0.15
##          kurtosis   se
## dataset*      NaN 0.00
## x           -0.87 1.41
## y           -1.41 2.26
## ------------------------------------------------------------ 
## dataset: h_lines
##          vars   n  mean    sd median trimmed   mad   min   max range skew
## dataset*    1 142  1.00  0.00   1.00    1.00  0.00  1.00  1.00  0.00  NaN
## x           2 142 54.26 16.77  53.07   53.87 17.90 22.00 98.29 76.28 0.24
## y           3 142 47.83 26.94  50.47   47.23 29.62 10.46 90.46 80.00 0.15
##          kurtosis   se
## dataset*      NaN 0.00
## x           -0.35 1.41
## y           -1.16 2.26
## ------------------------------------------------------------ 
## dataset: high_lines
##          vars   n  mean    sd median trimmed   mad   min   max range skew
## dataset*    1 142  1.00  0.00   1.00    1.00  0.00  1.00  1.00  0.00  NaN
## x           2 142 54.27 16.77  54.17   53.91 16.00 17.89 96.08 78.19 0.19
## y           3 142 47.84 26.94  32.50   47.15 22.77 14.91 87.15 72.24 0.20
##          kurtosis   se
## dataset*      NaN 0.00
## x           -0.28 1.41
## y           -1.80 2.26
## ------------------------------------------------------------ 
## dataset: slant_down
##          vars   n  mean    sd median trimmed   mad   min   max range skew
## dataset*    1 142  1.00  0.00   1.00    1.00  0.00  1.00  1.00  0.00  NaN
## x           2 142 54.27 16.77  53.14   53.83 16.24 18.11 95.59 77.48 0.25
## y           3 142 47.84 26.94  46.40   47.31 28.65  0.30 99.64 99.34 0.19
##          kurtosis   se
## dataset*      NaN 0.00
## x           -0.26 1.41
## y           -0.95 2.26
## ------------------------------------------------------------ 
## dataset: slant_up
##          vars   n  mean    sd median trimmed   mad   min   max range skew
## dataset*    1 142  1.00  0.00   1.00    1.00  0.00  1.00  1.00  0.00  NaN
## x           2 142 54.27 16.77  54.26   53.69 16.13 20.21 95.26 75.05 0.23
## y           3 142 47.83 26.94  45.29   46.83 33.80  5.65 99.58 93.93 0.26
##          kurtosis   se
## dataset*      NaN 0.00
## x           -0.38 1.41
## y           -1.09 2.26
## ------------------------------------------------------------ 
## dataset: star
##          vars   n  mean    sd median trimmed   mad   min   max range skew
## dataset*    1 142  1.00  0.00   1.00    1.00  0.00  1.00  1.00  0.00  NaN
## x           2 142 54.27 16.77  56.53   54.33 22.16 27.02 86.44 59.41 0.08
## y           3 142 47.84 26.93  50.11   46.56 37.30 14.37 92.21 77.85 0.24
##          kurtosis   se
## dataset*      NaN 0.00
## x           -1.19 1.41
## y           -1.36 2.26
## ------------------------------------------------------------ 
## dataset: v_lines
##          vars   n  mean    sd median trimmed   mad   min   max range skew
## dataset*    1 142  1.00  0.00   1.00    1.00  0.00  1.00  1.00  0.00  NaN
## x           2 142 54.27 16.77  50.36   53.57 28.37 30.45 89.50 59.06 0.26
## y           3 142 47.84 26.94  47.11   46.83 33.00  2.73 99.69 96.96 0.23
##          kurtosis   se
## dataset*      NaN 0.00
## x           -0.58 1.41
## y           -1.05 2.26
## ------------------------------------------------------------ 
## dataset: wide_lines
##          vars   n  mean    sd median trimmed   mad   min   max range  skew
## dataset*    1 142  1.00  0.00   1.00    1.00  0.00  1.00  1.00  0.00   NaN
## x           2 142 54.27 16.77  64.55   54.70 11.68 27.44 77.92 50.48 -0.32
## y           3 142 47.83 26.94  46.28   46.97 32.59  0.22 99.28 99.07  0.25
##          kurtosis   se
## dataset*      NaN 0.00
## x           -1.72 1.41
## y           -1.09 2.26
## ------------------------------------------------------------ 
## dataset: x_shape
##          vars   n  mean    sd median trimmed   mad   min   max range skew
## dataset*    1 142  1.00  0.00   1.00    1.00  0.00  1.00  1.00  0.00  NaN
## x           2 142 54.26 16.77  47.14   53.27 14.76 31.11 85.45 54.34 0.48
## y           3 142 47.84 26.93  39.88   46.98 33.05  4.58 97.84 93.26 0.23
##          kurtosis   se
## dataset*      NaN 0.00
## x           -1.37 1.41
## y           -1.44 2.26

ggplot(mydata, aes(dataset,y, color=dataset))+geom_boxplot()

ggplot(mydata, aes(dataset,y, color=dataset))+geom_violin()

ggplot(mydata, aes(x,y, color=dataset))+geom_point()

p <- ggplot(mydata, aes(x,y, color=dataset))+geom_point()+facet_wrap(~dataset, nrow=3) + scale_color_manual(values=c(brewer.pal(12,"Paired"), "black")) +theme(legend.position="none")
print(p)

R data visualization

Wang Zhigang, wangzg.pumc@gmail.com

2025-04-23

快速入门

当计算器

使用example快速学习

使用？，使用help.start()查看帮助文档

R 读取数据

高级绘图命令，产生一个图

低级绘图命令增加点，线，文本，图例等

坐标轴的控制

R的符号形状

R的颜色

ggsci Nature Publication Group & Lacent

多图（使用split.screen实现）

使用cowplot包实现多绘图区域的对齐

Marginal plots（边际图）with cowplot

使用par(mfrow=c(2,2))实现四图并列

ggplot2 入门, 强大而灵活的数据可视化R包

`ggplot2` 的优势

`ggplot2` 的核心概念

美学参数

几何对象（Geom functions）

注释（Annotate）

绘图区域分面（Facets）

R data visualization

Wang Zhigang, wangzg.pumc@gmail.com

2025-04-23

快速入门

当计算器

使用example快速学习

使用？，使用help.start()查看帮助文档

R 读取数据

高级绘图命令，产生一个图

低级绘图命令增加点，线，文本，图例等

坐标轴的控制

R的符号形状

R的颜色

ggsci Nature Publication Group & Lacent

多图（使用split.screen实现）

使用cowplot包实现多绘图区域的对齐

Marginal plots（边际图）with cowplot

使用par(mfrow=c(2,2))实现四图并列

ggplot2 入门, 强大而灵活的数据可视化R包

ggplot2 的优势

ggplot2 的核心概念

美学参数

几何对象（Geom functions）

注释（Annotate）

绘图区域分面（Facets）

`ggplot2` 的优势

`ggplot2` 的核心概念