R引言绘图示例

# 绘图示例： GDP数据

# install.packages("usethis")
# install.packages("devtools")
# install.packages("voronoiTreemap")

library("usethis")
library("devtools")
library("voronoiTreemap")

data <- vt_input_from_df(ExampleGDP)

gdp_json <- vt_export_json(data)

vt_d3(gdp_json,
      legend=TRUE, width = 800, height=500,
      size_circle = "5px",
      size_border = "2px", 
      size_border_hover = "5px")

GDP数据

#csv: Comma-Separated Values
#file<-read.csv("./data/export2020.csv", header=TRUE, sep = ",", stringsAsFactors = F) 

# file <- "http://162.105.145.16/rs/R_data_samples/export2020.csv"
# pop<-read.csv(file, header=TRUE, sep = ",", stringsAsFactors = F, fileEncoding = "GB2312") 
# 
# data<-vt_input_from_df(pop)
# gdp_json<-vt_export_json(data)
# vt_d3(gdp_json,
#       legend=TRUE, width = 800, height=500,
#       size_circle = "5px",
#       size_border = "2px", 
#       size_border_hover = "5px")

1 R入门

rm(list=ls())

#创建一个名为y的向量对象，它包含50个来自标准正态分布的随机偏差。
y <- rnorm(50)

#创建一个名为x的向量对象，它包含1~50共50个整数。
x  <- c(1:50)

plot (x,   y)

##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50

##  [1]  0.04672860 -0.50530575  0.29831725 -0.65306209 -0.81164885  0.29620891
##  [7]  1.72089226  0.23961060  0.49191893 -0.22515998  0.57693681 -0.57220944
## [13]  0.15345285  1.07378005 -0.47728744 -1.93109108  1.88413127 -1.21435029
## [19] -0.64251071 -0.61748440 -1.36309981  1.98841315  0.44390043  1.33636769
## [25]  2.15152962 -1.40061650  0.18201961  0.19631750 -0.63610197  0.92186519
## [31] -1.07570892  0.09612166 -0.09585022 -1.30867486  0.13458338 -1.95839092
## [37] -0.93267133 -0.40837518 -0.02897932 -1.10444052 -1.09334793  2.56723743
## [43] -0.28883785 -0.11020391 -1.44266360  0.38403116  0.41710666  0.23118942
## [49] -1.43608902 -0.52316254

if(!isTRUE(getOption('knitr.in.progress')) )
  {
  help(lm)

  help("lm")

  ?lm

  ?"lm"

  help(rlm, package="MASS")

  ??lm

  demo("nlm")
}

The workspace

#设置工作目录
#在Windows系统中，正斜杠/表示除法；反斜杠\用来表示目录。 #在Unix系统中，/表示目录；\表示转义字符。 

#setwd("D:\R_learning")  #会出错
#用两个反斜杠，第一个表示转义，第二个表示目录
setwd("D:\\R_learning")
getwd()

## [1] "D:/R_learning"

setwd("D:/R_learning")
getwd()

## [1] "D:/R_learning"

#显示当前的选项设置
options()

## $add.smooth
## [1] TRUE
## 
## $browserNLdisabled
## [1] FALSE
## 
## $callr.condition_handler_cli_message
## function (msg) 
## {
##     custom_handler <- getOption("cli.default_handler")
##     if (is.function(custom_handler)) {
##         custom_handler(msg)
##     }
##     else {
##         cli_server_default(msg)
##     }
## }
## <bytecode: 0x0000025e04477e50>
## <environment: namespace:cli>
## 
## $CBoundsCheck
## [1] FALSE
## 
## $check.bounds
## [1] FALSE
## 
## $citation.bibtex.max
## [1] 1
## 
## $continue
## [1] "+ "
## 
## $contrasts
##         unordered           ordered 
## "contr.treatment"      "contr.poly" 
## 
## $defaultPackages
## [1] "datasets"  "utils"     "grDevices" "graphics"  "stats"     "methods"  
## 
## $demo.ask
## [1] "default"
## 
## $deparse.cutoff
## [1] 60
## 
## $device
## function (width = 7, height = 7, ...) 
## {
##     grDevices::pdf(NULL, width, height, ...)
## }
## <bytecode: 0x0000025e064b73c0>
## <environment: namespace:knitr>
## 
## $device.ask.default
## [1] FALSE
## 
## $devtools.ellipsis_action
## function (message = NULL, class = NULL, ..., body = NULL, footer = NULL, 
##     use_cli_format = NULL, .frequency = c("always", "regularly", 
##         "once"), .frequency_id = NULL, .subclass = deprecated()) 
## {
##     message <- validate_signal_args(message, class, NULL, .subclass, 
##         "warn")
##     message_info <- cnd_message_info(message, body, footer, caller_env(), 
##         use_cli_format = use_cli_format)
##     message <- message_info$message
##     extra_fields <- message_info$extra_fields
##     use_cli_format <- message_info$use_cli_format
##     .frequency <- arg_match0(.frequency, c("always", "regularly", 
##         "once"))
##     if (!needs_signal(.frequency, .frequency_id, warning_freq_env, 
##         "rlib_warning_verbosity")) {
##         return(invisible(NULL))
##     }
##     cnd <- warning_cnd(class, message = message, !!!extra_fields, 
##         use_cli_format = use_cli_format, ...)
##     cnd$footer <- c(cnd$footer, message_freq(message, .frequency, 
##         "warning"))
##     local_long_messages()
##     warning(cnd)
## }
## <bytecode: 0x0000025e08ba7bf0>
## <environment: namespace:rlang>
## 
## $devtools.install.args
## [1] ""
## 
## $devtools.path
## [1] "~/R-dev"
## 
## $digits
## [1] 7
## 
## $echo
## [1] FALSE
## 
## $editor
## [1] "notepad"
## 
## $encoding
## [1] "native.enc"
## 
## $example.ask
## [1] "default"
## 
## $expressions
## [1] 5000
## 
## $help.search.types
## [1] "vignette" "demo"     "help"    
## 
## $help.try.all.packages
## [1] FALSE
## 
## $help_type
## [1] "html"
## 
## $htmltools.preserve.raw
## [1] TRUE
## 
## $HTTPUserAgent
## [1] "R (4.2.2 x86_64-w64-mingw32 x86_64 mingw32)"
## 
## $install.packages.compile.from.source
## [1] "interactive"
## 
## $internet.info
## [1] 2
## 
## $keep.parse.data
## [1] TRUE
## 
## $keep.parse.data.pkgs
## [1] FALSE
## 
## $keep.source
## [1] FALSE
## 
## $keep.source.pkgs
## [1] FALSE
## 
## $knitr.in.progress
## [1] TRUE
## 
## $locatorBell
## [1] TRUE
## 
## $mailer
## [1] "mailto"
## 
## $matprod
## [1] "default"
## 
## $max.print
## [1] 99999
## 
## $menu.graphics
## [1] TRUE
## 
## $na.action
## [1] "na.omit"
## 
## $nwarnings
## [1] 50
## 
## $OutDec
## [1] "."
## 
## $pager
## [1] "internal"
## 
## $papersize
## [1] "a4"
## 
## $PCRE_limit_recursion
## [1] NA
## 
## $PCRE_study
## [1] FALSE
## 
## $PCRE_use_JIT
## [1] TRUE
## 
## $pdfviewer
## [1] "C:/PROGRA~1/R/R-42~1.2/bin/x64/open.exe"
## 
## $pkgType
## [1] "both"
## 
## $prompt
## [1] "> "
## 
## $repos
##     CRAN 
## "@CRAN@" 
## 
## $scipen
## [1] 0
## 
## $show.coef.Pvalues
## [1] TRUE
## 
## $show.error.messages
## [1] TRUE
## 
## $show.signif.stars
## [1] TRUE
## 
## $showErrorCalls
## [1] TRUE
## 
## $str
## $str$strict.width
## [1] "no"
## 
## $str$digits.d
## [1] 3
## 
## $str$vec.len
## [1] 4
## 
## $str$list.len
## [1] 99
## 
## $str$deparse.lines
## NULL
## 
## $str$drop.deparse.attr
## [1] TRUE
## 
## $str$formatNum
## function (x, ...) 
## format(x, trim = TRUE, drop0trailing = TRUE, ...)
## <environment: 0x0000025e053d2198>
## 
## 
## $str.dendrogram.last
## [1] "`"
## 
## $stringsAsFactors
## [1] FALSE
## 
## $tikzMetricsDictionary
## [1] "Ch01-R入门介绍-tikzDictionary"
## 
## $timeout
## [1] 60
## 
## $try.outFile
## A connection with                            
## description "output"        
## class       "textConnection"
## mode        "wr"            
## text        "text"          
## opened      "opened"        
## can read    "no"            
## can write   "yes"           
## 
## $ts.eps
## [1] 1e-05
## 
## $ts.S.compat
## [1] FALSE
## 
## $unzip
## [1] "internal"
## 
## $useFancyQuotes
## $useFancyQuotes$useFancyQuotes
## [1] FALSE
## 
## 
## $verbose
## [1] FALSE
## 
## $warn
## [1] 0
## 
## $warning.length
## [1] 1000
## 
## $width
## [1] 80
## 
## $windowsTimeouts
## [1] 100 500

#数字被格式化为小数点后3位有效数字的格式; 
options(digits=3)

#创建一个包含20个均匀分布随机变量的向量; 
#生成了此数据的摘要统计量和直方图; 
x <- runif(20)
summary(x)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.093   0.191   0.503   0.483   0.744   0.942

hist(x)

#命令的历史记录保存到文件.Rhistory中；
#工作空间（包含向量x）保存到文件.RData中。

if(isTRUE(getOption('knitr.in.progress')) == FALSE){
  savehistory()
}
save.image()

#输入输出

#执行外部脚本(程序)
source("Ch01_test1.R")

## .. 2 .... 3 .... 5 .... 7 .... 11 ..
## .. 13 .... 17 .... 19 .... 23 .... 29 ..
## .. 31 .... 37 .... 41 .... 43 .... 47 ..
## .. 53 .... 59 .... 61 .... 67 .... 71 ..
## .. 73 .... 79 .... 83 .... 89 .... 97 ..

#文本输出
#输出到文件
sink("Ch01_demo2.txt", append=TRUE, split=TRUE)
source("Ch01_test1.R")

## .. 2 .... 3 .... 5 .... 7 .... 11 ..
## .. 13 .... 17 .... 19 .... 23 .... 29 ..
## .. 31 .... 37 .... 41 .... 43 .... 47 ..
## .. 53 .... 59 .... 61 .... 67 .... 71 ..
## .. 73 .... 79 .... 83 .... 89 .... 97 ..

sink()


#图形输出
pdf("Ch01_demo1.pdf")

y <- rnorm(50)
x  <- c(1:50)

plot(x,   y)

dev.off()

## png 
##   2

R包

#显示库所在的位置
.libPaths()

## [1] "C:/Users/Jack/AppData/Local/R/win-library/4.2"
## [2] "C:/Program Files/R/R-4.2.2/library"

#显示库中安装有哪些包
library()

#哪些包已加载
search()

##  [1] ".GlobalEnv"             "package:voronoiTreemap" "package:devtools"      
##  [4] "package:usethis"        "package:stats"          "package:graphics"      
##  [7] "package:grDevices"      "package:utils"          "package:datasets"      
## [10] "package:methods"        "Autoloads"              "package:base"

#包的安装：

if(!isTRUE(getOption('knitr.in.progress')) )
  {

  install.packages()  #无参数，交互方式

#repos参数可表示镜像地址
  #install.packages("gclus", repos = "http://cran.us.r-project.org")

  install.packages("gclus", repos = "https://mirrors.tuna.tsinghua.edu.cn/CRAN/")

  update.packages("gclus", repos = "https://mirrors.tuna.tsinghua.edu.cn/CRAN/")

  installed.packages()
}

#载入包
library(gclus)

## 载入需要的程辑包：cluster

#test1 <- library("abcd")
#Error in library("abcd") : there is no package called 'abcd'
#test1
#library没有返回值
#Error: object 'test' not found

test <- require("abc")

## 载入需要的程辑包：abc

## 载入需要的程辑包：abc.data

## 载入需要的程辑包：nnet

## 载入需要的程辑包：quantreg

## 载入需要的程辑包：SparseM

## 
## 载入程辑包：'SparseM'

## The following object is masked from 'package:base':
## 
##     backsolve

## 载入需要的程辑包：MASS

## 载入需要的程辑包：locfit

## locfit 1.5-9.7    2023-01-02

# Loading required package: abc
# Warning message:
#   In library(package, lib.loc = lib.loc, character.only = TRUE, logical.return = TRUE,  :
#                there is no package called 'abc'

test     #require有返回值

## [1] TRUE

#[1] FALSE
             
             
if(require("lme4")){
  print("lme4 is loaded correctly")
} else {
  print("trying to install lme4")
  install.packages("lme4", repos = "https://mirrors.tuna.tsinghua.edu.cn/CRAN/")
  if(require(lme4)){
    print("lme4 installed and loaded")
  } else {
    stop("could not install lme4")
  }
}

## 载入需要的程辑包：lme4

## 载入需要的程辑包：Matrix

## [1] "lme4 is loaded correctly"

help(package="lme4")
#卸载（从当前运行环境）
detach("package:lme4")

if(!isTRUE(getOption('knitr.in.progress')) )
  {
#卸除（从硬盘）
  remove.packages( c('lme4') )
#重新安装
  install.packages("lme4", repos = "http://cran.us.r-project.org")
}

2 创建数据集

2.1 基本数据类型

# 数值型
a<- -1
is.numeric(a)

## [1] TRUE

# 字符型
b<- "peter"
nchar(b)

## [1] 5

# 日期型
c<- as.Date("2021-03-08")
class(c)

## [1] "Date"

d<- as.POSIXct("2021-03-08 13:00")   #含日期和时间
class(d)

## [1] "POSIXct" "POSIXt"

## [1] "2021-03-08 13:00:00 CST"

# [1] "2021-03-08 13:00:00 CST"


# 逻辑型
e <- TRUE; f<- FALSE
e

## [1] TRUE

# [1] TRUE
f

## [1] FALSE

# [1] FALSE

2.2 基本数据结构

# 向量： Vectors，存储数值型、字符型或逻辑性的一维数组。

a <- c(1, 2, 5, 3, 6, -2, 4)
b <- c("one", "two", "three")
c <- c(TRUE, TRUE, TRUE, FALSE, TRUE, FALSE)

# 注意：同一向量中无法混杂不同模式的数据。


# 向量的访问：
a <- c(1, 2, 5, 3, 6, -2, 4)
a[3]

## [1] 5

a[c(1, 3, 5)]

## [1] 1 5 6

a[2:6]

## [1]  2  5  3  6 -2

# 矩阵：Matrices，二维数组，只是每个元素都拥有相同的类型（数值型、字符型或逻辑型）。可通过函数matrix创建矩阵：
# mymatrix <- matrix(vector, nrow=number_of_rows, ncol=number_of_columns, byrow=logical_value, dimnames = list(char_vector_rownames, char_vector_colnames))

y <- matrix(1:20, nrow=5, ncol=4) 
y

##      [,1] [,2] [,3] [,4]
## [1,]    1    6   11   16
## [2,]    2    7   12   17
## [3,]    3    8   13   18
## [4,]    4    9   14   19
## [5,]    5   10   15   20

cells <- c(1,26,24,68)
rnames <- c("R1", "R2")
cnames <- c("C1", "C2") 
mymatrix <- matrix(cells, nrow=2, ncol=2, byrow=TRUE,
                   dimnames=list(rnames, cnames))
mymatrix

##    C1 C2
## R1  1 26
## R2 24 68

mymatrix <- matrix(cells, nrow=2, ncol=2, byrow=FALSE,
                     dimnames=list(rnames, cnames))
mymatrix

##    C1 C2
## R1  1 24
## R2 26 68

#数组：Arrays，与矩阵类似，但是维度可以大于2。数组可通过array函数创建，形式如下：
# myarray <- array(vector, dimensions,  dimnames)
dim1 <- c("A1", "A2")
dim2 <- c("B1", "B2", "B3")
dim3 <- c("C1", "C2", "C3", "C4")
z <- array(1:24,  c(2, 3, 4),  dimnames=list(dim1, dim2, dim3))
z

## , , C1
## 
##    B1 B2 B3
## A1  1  3  5
## A2  2  4  6
## 
## , , C2
## 
##    B1 B2 B3
## A1  7  9 11
## A2  8 10 12
## 
## , , C3
## 
##    B1 B2 B3
## A1 13 15 17
## A2 14 16 18
## 
## , , C4
## 
##    B1 B2 B3
## A1 19 21 23
## A2 20 22 24

# 数据框：Data frames, 是R中最常处理的数据结构，不同的列可以包含不同的数据类型（数值型、字符型等），通过函数data.frame()创建：
# mydata <- data.frame(col1,col2,col3, ? )


patientID <- c(1, 2, 3, 4)
age <- c(25, 34, 28, 52)
diabetes <- c("Type1", "Type2", "Type1", "Type1")
status <- c("Poor", "Improved", "Excellent", "Poor")
patientdata <- data.frame(patientID, age, diabetes, status)

patientdata

##   patientID age diabetes    status
## 1         1  25    Type1      Poor
## 2         2  34    Type2  Improved
## 3         3  28    Type1 Excellent
## 4         4  52    Type1      Poor

patientdata[1:2]

##   patientID age
## 1         1  25
## 2         2  34
## 3         3  28
## 4         4  52

patientdata[c("diabetes","status")]

##   diabetes    status
## 1    Type1      Poor
## 2    Type2  Improved
## 3    Type1 Excellent
## 4    Type1      Poor

patientdata$age

## [1] 25 34 28 52

# 数据框变量访问： attach()、detach()和with()用于简化代码
summary(mtcars$mpg)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    10.4    15.4    19.2    20.1    22.8    33.9

plot(mtcars$mpg, mtcars$disp)

plot(mtcars$mpg, mtcars$wt)

# 利用attach():
attach(mtcars)
summary(mpg)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    10.4    15.4    19.2    20.1    22.8    33.9

plot(mpg, disp)

plot(mpg, wt)

detach(mtcars)
# 缺陷是数据框变量与全局环境变量重复时出现混淆。


with (mtcars, {
  print ( summary(mpg) )
  plot(mpg, disp)
  plot(mpg, wt)
  stats <- summary(mpg)
  stats 
} )

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    10.4    15.4    19.2    20.1    22.8    33.9

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    10.4    15.4    19.2    20.1    22.8    33.9

#查看stats会出错，它不是全程环境变量
# stats
# # Error: object ‘stats’ not found
# # 缺陷是赋值仅在括号内生效，如果需要赋值给全局变量，需要在括号内使用特殊赋值符 “<<-” 替代标准赋值符”<-” :  

with (mtcars, {
  print ( summary(mpg) )
  plot(mpg, disp)
  plot(mpg, wt)
  stats <<- summary(mpg)
  stats 
} )

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    10.4    15.4    19.2    20.1    22.8    33.9

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    10.4    15.4    19.2    20.1    22.8    33.9

stats

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    10.4    15.4    19.2    20.1    22.8    33.9

2.3 因子变量

# 因子：factors, 用于处理名义型变量（糖尿病类型Type1、Type2）和有序型变量（病人状态poor、improved、excellent）

diabetes <- c("Type1", "Type2", "Type1", "Type1")
diabetes <- factor(diabetes)

# 将此名义向量存储为(1, 2, 1, 1)，并在内部将其关联为1=Type1和2=Type2。  ordered=TRUE选项用于有序变量：
status <- c("Poor", "Improved", "Excellent", "Poor")
status <- factor(status, ordered=TRUE) 

# 将向量编码为(3, 2, 1, 3)，并在内部将这些值关联为1=Excellent、2=Improved以及3=Poor。另外，针对此向量进行的任何分析都会将其作为有序型变量对待，并自动选择合适的统计方法。可以通过指定levels选项来覆盖默认排序
status <- factor(status, order=TRUE, levels=c("Poor", "Improved", "Excellent"))
# 将指定1=Poor、2=Improved、3=Excellent

2.4 列表

# 列表：lists,  是R数据类型中最为复杂的一种, 是一些对象（或成分，components）的有序集合。它整合若干对象为单个对象名下。某个列表中可能是若干向量、矩阵、数据框，甚至其他列表的组合，其中的对象可以是目前为止讲到的任何结构：
# mylist <- list(object1, object2, …)          
# mylist <- list(name1=object1, name2=object2, …)

g <- "My First List"
h <- c(25, 26, 18, 39)
j <- matrix(1:10, nrow=5)
k <- c("one", "two", "three")
mylist <- list(title=g, ages=h, j, k)

mylist

## $title
## [1] "My First List"
## 
## $ages
## [1] 25 26 18 39
## 
## [[3]]
##      [,1] [,2]
## [1,]    1    6
## [2,]    2    7
## [3,]    3    8
## [4,]    4    9
## [5,]    5   10
## 
## [[4]]
## [1] "one"   "two"   "three"

mylist[[2]]

## [1] 25 26 18 39

mylist[["ages"]]

## [1] 25 26 18 39

2.5 数据输入方法

# 键盘：
mydata <- data.frame(age=numeric(0), gender=character(0), weight=numeric(0))
mydata <- edit(mydata)
mydata

## [1] age    gender weight
## <0 行> (或0-长度的row.names)

mydatatxt <-   "
      age  gender  weight
       25   m       166
       30   f         115
       18   f         120
"

mydatatxt

## [1] "\n      age  gender  weight\n       25   m       166\n       30   f         115\n       18   f         120\n"

Mydata <-  read.table(header=TRUE, text=mydatatxt)
Mydata

##   age gender weight
## 1  25      m    166
## 2  30      f    115
## 3  18      f    120

# 数据输入：从带分隔符的文本文件导入
grades <- read.table("../data/studentgrades.csv", header=TRUE, sep=",", row.names="STUDENTID",fileEncoding = "GB2312")

pop <- read.csv("../data/export2020.csv", header=TRUE, sep=",",fileEncoding = "GB2312")

# 注意中文文本的具体编码格式
pop1 <- read.csv("../data/export2020.csv", header=TRUE, sep=",", fileEncoding = "GB2312")

#export2020-1.csv为UTF-8格式
pop2 <- read.csv("../data/export2020-1.csv", header=TRUE, sep=",", fileEncoding = "UTF-8")

#写入文件时按缺省（GB2312）

unlink("../data/export2020-2.csv")

write.table(pop, "../data/export2020-2.csv", row.names=FALSE,   col.names=TRUE,  sep="," )
pop1<-read.table("../data/export2020-2.csv", header = TRUE,  sep="," )

file <- "http://162.105.145.16/rs/R_data_samples/export2020.csv"
pop<-read.csv(file, header=TRUE, sep = ",", stringsAsFactors = F, fileEncoding = "GB2312") 

file <- "http://162.105.145.16/rs/R_data_samples/export2020-1.csv"
pop<-read.csv(file, header=TRUE, sep = ",", stringsAsFactors = F, fileEncoding = "GB2312")  #查看结果，发现中文出错

## Warning in read.table(file = file, header = header, sep = sep, quote = quote, :
## 输入链结'http://162.105.145.16/rs/R_data_samples/export2020-1.csv'内的输入不对

## Warning in read.table(file = file, header = header, sep = sep, quote = quote, :
## incomplete final line found by readTableHeader on
## 'http://162.105.145.16/rs/R_data_samples/export2020-1.csv'

pop<-read.csv(file, header=TRUE, sep = ",", stringsAsFactors = F, fileEncoding = "UTF-8")  #加编码格式

pop<-read.csv(file, fileEncoding = "UTF-8")  #加编码格式

3 图形初阶

attach(mtcars)
plot(wt, mpg)
abline(lm(mpg~wt))
title("Regression of MPG on Weight")

detach(mtcars)

pdf("Ch01_demo3.pdf")

attach(mtcars)
plot(wt, mpg)
abline(lm(mpg~wt))
title("Regression of MPG on Weight")
detach(mtcars)
dev.off()

## png 
##   2

#注意类似上述代码块，需要一起调试

3.1 plot图形参数

dose <- c(20, 30, 40, 45, 60)
drugA <- c(16, 20, 27, 40, 60)
drugB <- c(15, 18, 25, 31, 40)

plot(dose, drugA, type="b")

par(lty=2, pch=17)  #设置虚线及三角图标
plot(dose, drugA, type="b")

# or:  
plot(dose, drugA, type="b", lty=3, lwd=3, pch=15, cex=1.2)

#色彩：
n <- 10
mycolors <- rainbow(n)

pie(rep(2, n), labels=mycolors, col=mycolors, cex=0.8)

mygrays <- gray(0:n/n)
pie(rep(1, n), labels=mygrays, col=mygrays, cex=0.7)

mycolors

##  [1] "#FF0000" "#FF9900" "#CCFF00" "#33FF00" "#00FF66" "#00FFFF" "#0066FF"
##  [8] "#3300FF" "#CC00FF" "#FF0099"

mygrays

##  [1] "#000000" "#1A1A1A" "#333333" "#4D4D4D" "#666666" "#808080" "#999999"
##  [8] "#B3B3B3" "#CCCCCC" "#E6E6E6" "#FFFFFF"

dose <- c(20, 30, 40, 45, 60)
drugA <- c(16, 20, 27, 40, 60)
drugB <- c(15, 18, 25, 31, 40)

opar <- par(no.readonly=TRUE)

par(pin=c(4, 3))
par(lwd=2, cex=1.5)
par(cex.axis=.75, font.axis=3)
plot(dose, drugA, type="b", pch=19, lty=2, col="red")

plot(dose, drugB, type="b", pch=23, lty=6, col="blue", bg="green")

par(opar)

3.2 坐标轴和标题设置

plot(dose, drugA, type="b",
     col="red", lty=2, pch=2, lwd=2,
     main="Clinical Trials for Drug A",
     sub="This is hypothetical data",
     xlab="Dosage", ylab="Drug Response",
     xlim=c(10, 60), ylim=c(10, 70))

# 标题
plot(dose, drugA, type="b",
     col="red", lty=2, pch=2, lwd=2,
     main="", sub="", xlab="", ylab="",
     xlim=c(10, 60), ylim=c(10, 70))
title(main="My Title", col.main="red",
      sub="My Sub-title", col.sub="blue",
      xlab="My X label", ylab="My Y label",
      col.lab="green", cex.lab=0.75)

# 注意title和plot同时选中，然后Ctrl+Enter调试运行

x <- c(1:10)
y <- x
z <- 10/x

#存储原有绘图参数
opar <- par(no.readonly=TRUE)

par(mar=c(5, 4, 4, 8) + 0.1)
plot(x, y, type="b", pch=21, col="red",yaxt="n", lty=3, ann=FALSE)
lines(x, z, type="b", pch=22, col="blue", lty=2)
axis(2, at=x, labels=x, col.axis="red", las=2)
axis(4, at=z, labels=round(z, digits=2),
     col.axis="blue", las=2, cex.axis=0.7, tck=-.01)
mtext("y=1/x", side=4, line=3, cex.lab=1, las=2, col="blue")
title("An Example of Creative Axes", xlab="X values", ylab="Y=X")

3.3 次要刻度线

if(require("Hmisc")){
  print("Hmisc is loaded correctly")
} else {
  print("trying to install Hmisc")
  install.packages("Hmisc")
}

## 载入需要的程辑包：Hmisc

## 载入需要的程辑包：lattice

## 载入需要的程辑包：survival

## 
## 载入程辑包：'survival'

## The following object is masked from 'package:quantreg':
## 
##     untangle.specials

## 载入需要的程辑包：Formula

## 载入需要的程辑包：ggplot2

## 
## 载入程辑包：'Hmisc'

## The following object is masked from 'package:quantreg':
## 
##     latex

## The following objects are masked from 'package:base':
## 
##     format.pval, units

## [1] "Hmisc is loaded correctly"

library(Hmisc)

#在X轴的每两条主刻度线之间添加1条次要刻度线，并在Y轴的每两条主刻度线之间添加3条次要刻度线。

plot(x, y, type="b", pch=21, col="red", lty=3, ann=FALSE)
minor.tick(nx=2, ny=4, tick.ratio=0.5)

#恢复原有绘图参数
par(opar)

plot(runif(20), runif(20))
minor.tick()

# Plot with arguments passed to axis()
plot(c(0,1), c(0,1), type = 'n', axes = FALSE, ann = FALSE)
# setting up a plot without axes and annotation
points(runif(20), runif(20))                       # plotting data
axis(1, pos = 0.5, lwd = 2)                        # showing X-axis at Y = 0.5 with formatting
axis(2, col = 2, lwd=2)                                   # formatted Y-axis
minor.tick( nx = 4, ny = 4, tick.ratio = 0.3,
            x.args = list(pos = 0.5, lwd = 2),     # X-minor tick format argumnets
            y.args = list(col = 2))                # Y-minor tick format arguments

3.4 参考线和图例

函数abline()可以用来为图形添加参考线。 abline(h=yvalues, v=xvalues) 函数abline()中也可以指定其他图形参数（如线条类型、颜色和宽度）。举例来说： abline(h=c(1,5,7)) 在y为1、5、7的位置添加了水平实线，而代码： abline(v=seq(1, 10, 2), lty=2, col=“blue”) 则在x为1、3、5、7、9的位置添加了垂直的蓝色虚线。

dose <- c(20, 30, 40, 45, 60)
drugA <- c(16, 20, 27, 40, 60)
drugB <- c(15, 18, 25, 31, 40)

opar <- par(no.readonly=TRUE)

par(lwd=2, cex=0.8, font.lab=2)
plot(dose, drugA, type="b", 
      pch=15, lty=1, col="red", ylim=c(0, 60), 
      main="Drug A vs. Drug B", xlab="Drug Dosage", 
      ylab="Drug Response")
lines(dose, drugB, type="b", 
       pch=17, lty=2, col="blue")
abline(h=c(30), lwd=1.5, lty=2, col="gray")

lmpa <- lm(drugA~dose)
abline(lmpa$coefficients[1], lmpa$coefficients[2], lty =3, col = "red")

library(Hmisc)
minor.tick(nx=3, ny=3, tick.ratio=0.5)
legend("topleft", inset=.05, title="Drug Type", 
        c("A","B"), lty=c(1, 2), pch=c(15, 17), col=c("red", "blue"))

par(opar)

3.5 文本标注

attach(mtcars)

## The following object is masked from package:ggplot2:
## 
##     mpg

plot(wt, mpg, main="Mileage vs. Car Weight", xlab="Weight", ylab="Mileage", pch=18, col="blue")
text(wt, mpg, row.names(mtcars), cex=0.6, pos=4, col="red")

detach(mtcars)

opar <- par(no.readonly=TRUE)
par(cex=0.8)
plot(1:7,1:7,type="n")
text(3,3,"Example of default text")
text(4,4,family="mono", 
     "Example of mono-spaced text")
text(5,5,family="serif","Example of serif text")

par(opar)

3.6 数学标注

x <- seq(0, 4, 0.01)
y <- sqrt(x)
text <- expression(y == sqrt(x))
plot(x, y, type = 'l', main = text, las = 1)

plot(1:10, 1:10)
text(4, 9, expression(hat(beta) == (X^t * X)^{-1} * X^t * y), cex=2.0)
text(4, 8.0, "expression(hat(beta) == (X^t * X)^{-1} * X^t * y)",
     cex = 1.5)
text(4, 6.5, expression(bar(x) == sum(frac(x[i], n), i==1, n)), cex=2.0)
text(4, 5.4, "expression(bar(x) == sum(frac(x[i], n), i==1, n))",
     cex = 1.5)
text(8, 2.5, expression(paste(frac(1, sigma*sqrt(2*pi)), " ",
     plain(e)^{frac(-(x-mu)^2, 2*sigma^2)})), cex = 3.0)

plot(rnorm(100), rnorm(100), xlab = expression(hat(mu)[0]), ylab = expression(alpha^beta), 
     main = expression(paste("Plot of ", alpha^beta, " versus ", hat(mu)[0])))

#使用latex2exp包的Tex函数形成：

#install.packages("latex2exp")
library(latex2exp)

dev.off()

## null device 
##           1

x <- seq(0, 4, length.out = 100)
alpha <- 1:5
plot(x, xlim = c(0, 4), ylim = c(0, 10), xlab = "x", ylab = TeX("$\\alpha  x^\\alpha$, where $\\alpha \\in 1\\ldots 5$"), 
     type = "n", main = TeX("Using $\\LaTeX$ for plotting in base graphics!"), lwd=2)

text(2, 5, TeX("$\\alpha  x^\\alpha$, where $\\alpha \\in 1\\ldots 5$"), cex=4.0)
     
invisible(sapply(alpha, function(a) lines(x, a * x^a, col = a)))
legend("topleft", legend = TeX(sprintf("$\\alpha = %d$", alpha)), lwd = 1, col = alpha, cex=0.8)

3.7 图形组合和布局

attach(mtcars)

## The following object is masked from package:ggplot2:
## 
##     mpg

opar <- par(no.readonly=TRUE)
par(mfrow=c(2,2))
#par(mfcol=c(2,2))
plot(wt,mpg, main="Scatterplot of wt vs. mpg")
plot(wt,disp, main="Scatterplot of wt vs disp")
hist(wt, main="Histogram of wt")
boxplot(wt, main="Boxplot of wt")

par(opar)
detach(mtcars)

attach(mtcars)

## The following object is masked from package:ggplot2:
## 
##     mpg

opar <- par(no.readonly=TRUE)
par(mfrow=c(3,1))
hist(wt)
hist(mpg)
hist(disp)
par(opar)
detach(mtcars)

# layout
attach(mtcars)

## The following object is masked from package:ggplot2:
## 
##     mpg

layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE))
hist(wt)
hist(mpg)
hist(disp)
detach(mtcars)

attach(mtcars)

## The following object is masked from package:ggplot2:
## 
##     mpg

layout(matrix(c(1, 1, 2, 3), 2, 2, byrow = TRUE),
       widths=c(3, 1), heights=c(1, 2))
hist(wt)
hist(mpg)
hist(disp)

detach(mtcars)

opar <- par(no.readonly=TRUE)
par(fig=c(0.05, 0.8, 0.05, 0.8))
plot(mtcars$wt, mtcars$mpg, xlab="Miles Per Gallon", ylab="Car Weight")
par(fig=c(0.05, 0.8, 0.55, 0.95), new=TRUE)
boxplot(mtcars$wt, horizontal=TRUE, axes=FALSE)
par(fig=c(0.65, 0.95, 0.05, 0.8), new=TRUE)
boxplot(mtcars$mpg, axes=FALSE)
mtext("Enhanced Scatterplot", side=3, outer=TRUE, line=-3)

par(opar)

The end of Chapter 1

4 作业

4.1 读取数据

定义函数 read_data() 以读取所需数据, 注意缺省目录、skip行数（与文件适合）、是否有header

如果数据文件*.csv 前几行有 ‘#’ 注释, 会被 read.table() 自动跳过，不必计算skip

read_data <- function(file_name, path='../data/', skip=3, header=TRUE, sep=',') {
  return (read.table(paste(path, file_name, sep=''), skip=skip, header=header, sep=sep))
}

利用上述函数，读出温度文件数据，选定数据源==GTS_METAR的数据。

#temp <- read_data('A_TEMP.csv', skip=3, header=TRUE)

temp <- read_data('A_TEMP.csv', path='http://162.105.145.16/rs/R_data_samples/', skip=3, header=TRUE)
colnames(temp) <- c('Time', 'Height', 'Temp', 'Source', 'Status',   'Details')
temp <- temp[temp$Source == 'GTS_METAR',]

rh <- read_data('A_RH.csv', path='http://162.105.145.16/rs/R_data_samples/', skip=3, header=TRUE)
colnames(rh) <- c('Time', 'Height', 'RH', 'Source', 'Status',   'Details')
rh <- rh[rh$Source == 'GTS_METAR', ]

定义两个函数 get_hours(), get_date() 对时间加以处理

get_hours <- function(datetime) {
  return (difftime(datetime, datetime[1], unit='hours'))
  }

4.2 绘制地面气温图像示例1

temp$Time <- as.POSIXlt( temp$Time )

temp$hours <- get_hours(temp$Time)
temp$date <- as.Date(temp$Time)


half_month <- round((temp$hours-temp$hours[1]) /(15*24*4))
xtick_mask <- (abs((temp$hours-temp$hours[1]) - half_month*15*24*4) < 0.3)

opar <- par(no.readonly=TRUE)

plot(temp$hours, temp$Temp, xaxt="n", yaxt="n", pch=20, cex=0.5,type="b", lty=1,col="blue", xlab="Date", ylab = "Temperature (Degree Celsius)")

axis(1, at=temp$hours[xtick_mask], labels=temp$date[xtick_mask], col.axis="black", las=1, cex.axis=0.8, tck=-0.02)

axis(2, at=10*(-2:10), col.axis="black", las=1, cex.axis=0.8, tck=-0.02)

abline(h=c(0), lty=2, col="red")

title(main="Surface Air Temperature in Beijing (2019/01/01 - 2023/02/18)")

par(opar)

4.3 绘制地面气温图像示例2

横坐标利用日期，采用不计算小时数的方式

temp$date <- as.Date(temp$Time)

#如果原数据有缺失，就不能用这个label:
x_label<-seq(from=as.Date("2019-01-01"), to=as.Date("2023-02-18"), by=180)

plot(temp$date, temp$Temp, xaxt="n", yaxt="n", pch=20, cex=0.1, type="b", lty=1,col="red", xlab='', ylab = "Temperature")

axis(2, at=seq(-20, 40, 5), col.axis="black", las=1, cex.axis=0.7, tck=-0.03)

axis(1,x_label,format(x_label,"%Y-%m-%d"),las=1, col.axis="black",cex.axis=0.7, tck=-0.03)

abline(h=c(0), lty=2, col="black")


title(main="Surface Air Temperature in Beijing (2019/01/01 - 2023/02/18)")

R语言介绍、创建数据集、图形初阶

Chengcai LI

2023-02-27