R引言绘图示例
# 绘图示例: GDP数据
# install.packages("usethis")
# install.packages("devtools")
# install.packages("voronoiTreemap")
library("usethis")
library("devtools")
library("voronoiTreemap")
<- vt_input_from_df(ExampleGDP)
data
<- vt_export_json(data)
gdp_json
vt_d3(gdp_json,
legend=TRUE, width = 800, height=500,
size_circle = "5px",
size_border = "2px",
size_border_hover = "5px")
GDP数据
#csv: Comma-Separated Values
#file<-read.csv("./data/export2020.csv", header=TRUE, sep = ",", stringsAsFactors = F)
# file <- "http://162.105.145.16/rs/R_data_samples/export2020.csv"
# pop<-read.csv(file, header=TRUE, sep = ",", stringsAsFactors = F, fileEncoding = "GB2312")
#
# data<-vt_input_from_df(pop)
# gdp_json<-vt_export_json(data)
# vt_d3(gdp_json,
# legend=TRUE, width = 800, height=500,
# size_circle = "5px",
# size_border = "2px",
# size_border_hover = "5px")
1 R入门
rm(list=ls())
#创建一个名为y的向量对象,它包含50个来自标准正态分布的随机偏差。
<- rnorm(50)
y
#创建一个名为x的向量对象,它包含1~50共50个整数。
<- c(1:50)
x
plot (x, y)
x
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
y
## [1] 0.04672860 -0.50530575 0.29831725 -0.65306209 -0.81164885 0.29620891
## [7] 1.72089226 0.23961060 0.49191893 -0.22515998 0.57693681 -0.57220944
## [13] 0.15345285 1.07378005 -0.47728744 -1.93109108 1.88413127 -1.21435029
## [19] -0.64251071 -0.61748440 -1.36309981 1.98841315 0.44390043 1.33636769
## [25] 2.15152962 -1.40061650 0.18201961 0.19631750 -0.63610197 0.92186519
## [31] -1.07570892 0.09612166 -0.09585022 -1.30867486 0.13458338 -1.95839092
## [37] -0.93267133 -0.40837518 -0.02897932 -1.10444052 -1.09334793 2.56723743
## [43] -0.28883785 -0.11020391 -1.44266360 0.38403116 0.41710666 0.23118942
## [49] -1.43608902 -0.52316254
if(!isTRUE(getOption('knitr.in.progress')) )
{help(lm)
help("lm")
?lm
"lm"
?
help(rlm, package="MASS")
??lm
demo("nlm")
}
The workspace
#设置工作目录
#在Windows系统中,正斜杠/表示除法;反斜杠\用来表示目录。 #在Unix系统中,/表示目录;\表示转义字符。
#setwd("D:\R_learning") #会出错
#用两个反斜杠,第一个表示转义,第二个表示目录
setwd("D:\\R_learning")
getwd()
## [1] "D:/R_learning"
setwd("D:/R_learning")
getwd()
## [1] "D:/R_learning"
#显示当前的选项设置
options()
## $add.smooth
## [1] TRUE
##
## $browserNLdisabled
## [1] FALSE
##
## $callr.condition_handler_cli_message
## function (msg)
## {
## custom_handler <- getOption("cli.default_handler")
## if (is.function(custom_handler)) {
## custom_handler(msg)
## }
## else {
## cli_server_default(msg)
## }
## }
## <bytecode: 0x0000025e04477e50>
## <environment: namespace:cli>
##
## $CBoundsCheck
## [1] FALSE
##
## $check.bounds
## [1] FALSE
##
## $citation.bibtex.max
## [1] 1
##
## $continue
## [1] "+ "
##
## $contrasts
## unordered ordered
## "contr.treatment" "contr.poly"
##
## $defaultPackages
## [1] "datasets" "utils" "grDevices" "graphics" "stats" "methods"
##
## $demo.ask
## [1] "default"
##
## $deparse.cutoff
## [1] 60
##
## $device
## function (width = 7, height = 7, ...)
## {
## grDevices::pdf(NULL, width, height, ...)
## }
## <bytecode: 0x0000025e064b73c0>
## <environment: namespace:knitr>
##
## $device.ask.default
## [1] FALSE
##
## $devtools.ellipsis_action
## function (message = NULL, class = NULL, ..., body = NULL, footer = NULL,
## use_cli_format = NULL, .frequency = c("always", "regularly",
## "once"), .frequency_id = NULL, .subclass = deprecated())
## {
## message <- validate_signal_args(message, class, NULL, .subclass,
## "warn")
## message_info <- cnd_message_info(message, body, footer, caller_env(),
## use_cli_format = use_cli_format)
## message <- message_info$message
## extra_fields <- message_info$extra_fields
## use_cli_format <- message_info$use_cli_format
## .frequency <- arg_match0(.frequency, c("always", "regularly",
## "once"))
## if (!needs_signal(.frequency, .frequency_id, warning_freq_env,
## "rlib_warning_verbosity")) {
## return(invisible(NULL))
## }
## cnd <- warning_cnd(class, message = message, !!!extra_fields,
## use_cli_format = use_cli_format, ...)
## cnd$footer <- c(cnd$footer, message_freq(message, .frequency,
## "warning"))
## local_long_messages()
## warning(cnd)
## }
## <bytecode: 0x0000025e08ba7bf0>
## <environment: namespace:rlang>
##
## $devtools.install.args
## [1] ""
##
## $devtools.path
## [1] "~/R-dev"
##
## $digits
## [1] 7
##
## $echo
## [1] FALSE
##
## $editor
## [1] "notepad"
##
## $encoding
## [1] "native.enc"
##
## $example.ask
## [1] "default"
##
## $expressions
## [1] 5000
##
## $help.search.types
## [1] "vignette" "demo" "help"
##
## $help.try.all.packages
## [1] FALSE
##
## $help_type
## [1] "html"
##
## $htmltools.preserve.raw
## [1] TRUE
##
## $HTTPUserAgent
## [1] "R (4.2.2 x86_64-w64-mingw32 x86_64 mingw32)"
##
## $install.packages.compile.from.source
## [1] "interactive"
##
## $internet.info
## [1] 2
##
## $keep.parse.data
## [1] TRUE
##
## $keep.parse.data.pkgs
## [1] FALSE
##
## $keep.source
## [1] FALSE
##
## $keep.source.pkgs
## [1] FALSE
##
## $knitr.in.progress
## [1] TRUE
##
## $locatorBell
## [1] TRUE
##
## $mailer
## [1] "mailto"
##
## $matprod
## [1] "default"
##
## $max.print
## [1] 99999
##
## $menu.graphics
## [1] TRUE
##
## $na.action
## [1] "na.omit"
##
## $nwarnings
## [1] 50
##
## $OutDec
## [1] "."
##
## $pager
## [1] "internal"
##
## $papersize
## [1] "a4"
##
## $PCRE_limit_recursion
## [1] NA
##
## $PCRE_study
## [1] FALSE
##
## $PCRE_use_JIT
## [1] TRUE
##
## $pdfviewer
## [1] "C:/PROGRA~1/R/R-42~1.2/bin/x64/open.exe"
##
## $pkgType
## [1] "both"
##
## $prompt
## [1] "> "
##
## $repos
## CRAN
## "@CRAN@"
##
## $scipen
## [1] 0
##
## $show.coef.Pvalues
## [1] TRUE
##
## $show.error.messages
## [1] TRUE
##
## $show.signif.stars
## [1] TRUE
##
## $showErrorCalls
## [1] TRUE
##
## $str
## $str$strict.width
## [1] "no"
##
## $str$digits.d
## [1] 3
##
## $str$vec.len
## [1] 4
##
## $str$list.len
## [1] 99
##
## $str$deparse.lines
## NULL
##
## $str$drop.deparse.attr
## [1] TRUE
##
## $str$formatNum
## function (x, ...)
## format(x, trim = TRUE, drop0trailing = TRUE, ...)
## <environment: 0x0000025e053d2198>
##
##
## $str.dendrogram.last
## [1] "`"
##
## $stringsAsFactors
## [1] FALSE
##
## $tikzMetricsDictionary
## [1] "Ch01-R入门介绍-tikzDictionary"
##
## $timeout
## [1] 60
##
## $try.outFile
## A connection with
## description "output"
## class "textConnection"
## mode "wr"
## text "text"
## opened "opened"
## can read "no"
## can write "yes"
##
## $ts.eps
## [1] 1e-05
##
## $ts.S.compat
## [1] FALSE
##
## $unzip
## [1] "internal"
##
## $useFancyQuotes
## $useFancyQuotes$useFancyQuotes
## [1] FALSE
##
##
## $verbose
## [1] FALSE
##
## $warn
## [1] 0
##
## $warning.length
## [1] 1000
##
## $width
## [1] 80
##
## $windowsTimeouts
## [1] 100 500
#数字被格式化为小数点后3位有效数字的格式;
options(digits=3)
#创建一个包含20个均匀分布随机变量的向量;
#生成了此数据的摘要统计量和直方图;
<- runif(20)
x summary(x)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.093 0.191 0.503 0.483 0.744 0.942
hist(x)
#命令的历史记录保存到文件.Rhistory中;
#工作空间(包含向量x)保存到文件.RData中。
if(isTRUE(getOption('knitr.in.progress')) == FALSE){
savehistory()
}save.image()
#输入输出
#执行外部脚本(程序)
source("Ch01_test1.R")
## .. 2 .... 3 .... 5 .... 7 .... 11 ..
## .. 13 .... 17 .... 19 .... 23 .... 29 ..
## .. 31 .... 37 .... 41 .... 43 .... 47 ..
## .. 53 .... 59 .... 61 .... 67 .... 71 ..
## .. 73 .... 79 .... 83 .... 89 .... 97 ..
#文本输出
#输出到文件
sink("Ch01_demo2.txt", append=TRUE, split=TRUE)
source("Ch01_test1.R")
## .. 2 .... 3 .... 5 .... 7 .... 11 ..
## .. 13 .... 17 .... 19 .... 23 .... 29 ..
## .. 31 .... 37 .... 41 .... 43 .... 47 ..
## .. 53 .... 59 .... 61 .... 67 .... 71 ..
## .. 73 .... 79 .... 83 .... 89 .... 97 ..
sink()
#图形输出
pdf("Ch01_demo1.pdf")
<- rnorm(50)
y <- c(1:50)
x
plot(x, y)
dev.off()
## png
## 2
R包
#显示库所在的位置
.libPaths()
## [1] "C:/Users/Jack/AppData/Local/R/win-library/4.2"
## [2] "C:/Program Files/R/R-4.2.2/library"
#显示库中安装有哪些包
library()
#哪些包已加载
search()
## [1] ".GlobalEnv" "package:voronoiTreemap" "package:devtools"
## [4] "package:usethis" "package:stats" "package:graphics"
## [7] "package:grDevices" "package:utils" "package:datasets"
## [10] "package:methods" "Autoloads" "package:base"
#包的安装:
if(!isTRUE(getOption('knitr.in.progress')) )
{
install.packages() #无参数,交互方式
#repos参数可表示镜像地址
#install.packages("gclus", repos = "http://cran.us.r-project.org")
install.packages("gclus", repos = "https://mirrors.tuna.tsinghua.edu.cn/CRAN/")
update.packages("gclus", repos = "https://mirrors.tuna.tsinghua.edu.cn/CRAN/")
installed.packages()
}
#载入包
library(gclus)
## 载入需要的程辑包:cluster
#test1 <- library("abcd")
#Error in library("abcd") : there is no package called 'abcd'
#test1
#library没有返回值
#Error: object 'test' not found
<- require("abc") test
## 载入需要的程辑包:abc
## 载入需要的程辑包:abc.data
## 载入需要的程辑包:nnet
## 载入需要的程辑包:quantreg
## 载入需要的程辑包:SparseM
##
## 载入程辑包:'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
## 载入需要的程辑包:MASS
## 载入需要的程辑包:locfit
## locfit 1.5-9.7 2023-01-02
# Loading required package: abc
# Warning message:
# In library(package, lib.loc = lib.loc, character.only = TRUE, logical.return = TRUE, :
# there is no package called 'abc'
#require有返回值 test
## [1] TRUE
#[1] FALSE
if(require("lme4")){
print("lme4 is loaded correctly")
else {
} print("trying to install lme4")
install.packages("lme4", repos = "https://mirrors.tuna.tsinghua.edu.cn/CRAN/")
if(require(lme4)){
print("lme4 installed and loaded")
else {
} stop("could not install lme4")
} }
## 载入需要的程辑包:lme4
## 载入需要的程辑包:Matrix
## [1] "lme4 is loaded correctly"
help(package="lme4")
#卸载(从当前运行环境)
detach("package:lme4")
if(!isTRUE(getOption('knitr.in.progress')) )
{#卸除(从硬盘)
remove.packages( c('lme4') )
#重新安装
install.packages("lme4", repos = "http://cran.us.r-project.org")
}
2 创建数据集
2.1 基本数据类型
# 数值型
<- -1
ais.numeric(a)
## [1] TRUE
# 字符型
<- "peter"
bnchar(b)
## [1] 5
# 日期型
<- as.Date("2021-03-08")
cclass(c)
## [1] "Date"
<- as.POSIXct("2021-03-08 13:00") #含日期和时间
dclass(d)
## [1] "POSIXct" "POSIXt"
d
## [1] "2021-03-08 13:00:00 CST"
# [1] "2021-03-08 13:00:00 CST"
# 逻辑型
<- TRUE; f<- FALSE
e e
## [1] TRUE
# [1] TRUE
f
## [1] FALSE
# [1] FALSE
2.2 基本数据结构
# 向量: Vectors,存储数值型、字符型或逻辑性的一维数组。
<- c(1, 2, 5, 3, 6, -2, 4)
a <- c("one", "two", "three")
b <- c(TRUE, TRUE, TRUE, FALSE, TRUE, FALSE)
c
# 注意:同一向量中无法混杂不同模式的数据。
# 向量的访问:
<- c(1, 2, 5, 3, 6, -2, 4)
a 3] a[
## [1] 5
c(1, 3, 5)] a[
## [1] 1 5 6
2:6] a[
## [1] 2 5 3 6 -2
# 矩阵:Matrices,二维数组,只是每个元素都拥有相同的类型(数值型、字符型或逻辑型)。可通过函数matrix创建矩阵:
# mymatrix <- matrix(vector, nrow=number_of_rows, ncol=number_of_columns, byrow=logical_value, dimnames = list(char_vector_rownames, char_vector_colnames))
<- matrix(1:20, nrow=5, ncol=4)
y y
## [,1] [,2] [,3] [,4]
## [1,] 1 6 11 16
## [2,] 2 7 12 17
## [3,] 3 8 13 18
## [4,] 4 9 14 19
## [5,] 5 10 15 20
<- c(1,26,24,68)
cells <- c("R1", "R2")
rnames <- c("C1", "C2")
cnames <- matrix(cells, nrow=2, ncol=2, byrow=TRUE,
mymatrix dimnames=list(rnames, cnames))
mymatrix
## C1 C2
## R1 1 26
## R2 24 68
<- matrix(cells, nrow=2, ncol=2, byrow=FALSE,
mymatrix dimnames=list(rnames, cnames))
mymatrix
## C1 C2
## R1 1 24
## R2 26 68
#数组:Arrays,与矩阵类似,但是维度可以大于2。数组可通过array函数创建,形式如下:
# myarray <- array(vector, dimensions, dimnames)
<- c("A1", "A2")
dim1 <- c("B1", "B2", "B3")
dim2 <- c("C1", "C2", "C3", "C4")
dim3 <- array(1:24, c(2, 3, 4), dimnames=list(dim1, dim2, dim3))
z z
## , , C1
##
## B1 B2 B3
## A1 1 3 5
## A2 2 4 6
##
## , , C2
##
## B1 B2 B3
## A1 7 9 11
## A2 8 10 12
##
## , , C3
##
## B1 B2 B3
## A1 13 15 17
## A2 14 16 18
##
## , , C4
##
## B1 B2 B3
## A1 19 21 23
## A2 20 22 24
# 数据框:Data frames, 是R中最常处理的数据结构,不同的列可以包含不同的数据类型(数值型、字符型等),通过函数data.frame()创建:
# mydata <- data.frame(col1,col2,col3, ? )
<- c(1, 2, 3, 4)
patientID <- c(25, 34, 28, 52)
age <- c("Type1", "Type2", "Type1", "Type1")
diabetes <- c("Poor", "Improved", "Excellent", "Poor")
status <- data.frame(patientID, age, diabetes, status)
patientdata
patientdata
## patientID age diabetes status
## 1 1 25 Type1 Poor
## 2 2 34 Type2 Improved
## 3 3 28 Type1 Excellent
## 4 4 52 Type1 Poor
1:2] patientdata[
## patientID age
## 1 1 25
## 2 2 34
## 3 3 28
## 4 4 52
c("diabetes","status")] patientdata[
## diabetes status
## 1 Type1 Poor
## 2 Type2 Improved
## 3 Type1 Excellent
## 4 Type1 Poor
$age patientdata
## [1] 25 34 28 52
# 数据框变量访问: attach()、detach()和with()用于简化代码
summary(mtcars$mpg)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10.4 15.4 19.2 20.1 22.8 33.9
plot(mtcars$mpg, mtcars$disp)
plot(mtcars$mpg, mtcars$wt)
# 利用attach():
attach(mtcars)
summary(mpg)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10.4 15.4 19.2 20.1 22.8 33.9
plot(mpg, disp)
plot(mpg, wt)
detach(mtcars)
# 缺陷是数据框变量与全局环境变量重复时出现混淆。
with (mtcars, {
print ( summary(mpg) )
plot(mpg, disp)
plot(mpg, wt)
<- summary(mpg)
stats
stats } )
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10.4 15.4 19.2 20.1 22.8 33.9
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10.4 15.4 19.2 20.1 22.8 33.9
#查看stats会出错,它不是全程环境变量
# stats
# # Error: object ‘stats’ not found
# # 缺陷是赋值仅在括号内生效,如果需要赋值给全局变量,需要在括号内使用特殊赋值符 “<<-” 替代标准赋值符”<-” :
with (mtcars, {
print ( summary(mpg) )
plot(mpg, disp)
plot(mpg, wt)
<<- summary(mpg)
stats
stats } )
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10.4 15.4 19.2 20.1 22.8 33.9
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10.4 15.4 19.2 20.1 22.8 33.9
stats
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10.4 15.4 19.2 20.1 22.8 33.9
2.3 因子变量
# 因子:factors, 用于处理名义型变量(糖尿病类型Type1、Type2)和有序型变量(病人状态poor、improved、excellent)
<- c("Type1", "Type2", "Type1", "Type1")
diabetes <- factor(diabetes)
diabetes
# 将此名义向量存储为(1, 2, 1, 1),并在内部将其关联为1=Type1和2=Type2。 ordered=TRUE选项用于有序变量:
<- c("Poor", "Improved", "Excellent", "Poor")
status <- factor(status, ordered=TRUE)
status
# 将向量编码为(3, 2, 1, 3),并在内部将这些值关联为1=Excellent、2=Improved以及3=Poor。另外,针对此向量进行的任何分析都会将其作为有序型变量对待,并自动选择合适的统计方法。可以通过指定levels选项来覆盖默认排序
<- factor(status, order=TRUE, levels=c("Poor", "Improved", "Excellent"))
status # 将指定1=Poor、2=Improved、3=Excellent
2.4 列表
# 列表:lists, 是R数据类型中最为复杂的一种, 是一些对象(或成分,components)的有序集合。它整合若干对象为单个对象名下。某个列表中可能是若干向量、矩阵、数据框,甚至其他列表的组合,其中的对象可以是目前为止讲到的任何结构:
# mylist <- list(object1, object2, …)
# mylist <- list(name1=object1, name2=object2, …)
<- "My First List"
g <- c(25, 26, 18, 39)
h <- matrix(1:10, nrow=5)
j <- c("one", "two", "three")
k <- list(title=g, ages=h, j, k)
mylist
mylist
## $title
## [1] "My First List"
##
## $ages
## [1] 25 26 18 39
##
## [[3]]
## [,1] [,2]
## [1,] 1 6
## [2,] 2 7
## [3,] 3 8
## [4,] 4 9
## [5,] 5 10
##
## [[4]]
## [1] "one" "two" "three"
2]] mylist[[
## [1] 25 26 18 39
"ages"]] mylist[[
## [1] 25 26 18 39
2.5 数据输入方法
# 键盘:
<- data.frame(age=numeric(0), gender=character(0), weight=numeric(0))
mydata <- edit(mydata)
mydata mydata
## [1] age gender weight
## <0 行> (或0-长度的row.names)
<- "
mydatatxt age gender weight
25 m 166
30 f 115
18 f 120
"
mydatatxt
## [1] "\n age gender weight\n 25 m 166\n 30 f 115\n 18 f 120\n"
<- read.table(header=TRUE, text=mydatatxt)
Mydata Mydata
## age gender weight
## 1 25 m 166
## 2 30 f 115
## 3 18 f 120
# 数据输入:从带分隔符的文本文件导入
<- read.table("../data/studentgrades.csv", header=TRUE, sep=",", row.names="STUDENTID",fileEncoding = "GB2312")
grades
<- read.csv("../data/export2020.csv", header=TRUE, sep=",",fileEncoding = "GB2312")
pop
# 注意中文文本的具体编码格式
<- read.csv("../data/export2020.csv", header=TRUE, sep=",", fileEncoding = "GB2312")
pop1
#export2020-1.csv为UTF-8格式
<- read.csv("../data/export2020-1.csv", header=TRUE, sep=",", fileEncoding = "UTF-8")
pop2
#写入文件时按缺省(GB2312)
unlink("../data/export2020-2.csv")
write.table(pop, "../data/export2020-2.csv", row.names=FALSE, col.names=TRUE, sep="," )
<-read.table("../data/export2020-2.csv", header = TRUE, sep="," ) pop1
<- "http://162.105.145.16/rs/R_data_samples/export2020.csv"
file <-read.csv(file, header=TRUE, sep = ",", stringsAsFactors = F, fileEncoding = "GB2312")
pop
<- "http://162.105.145.16/rs/R_data_samples/export2020-1.csv"
file <-read.csv(file, header=TRUE, sep = ",", stringsAsFactors = F, fileEncoding = "GB2312") #查看结果,发现中文出错 pop
## Warning in read.table(file = file, header = header, sep = sep, quote = quote, :
## 输入链结'http://162.105.145.16/rs/R_data_samples/export2020-1.csv'内的输入不对
## Warning in read.table(file = file, header = header, sep = sep, quote = quote, :
## incomplete final line found by readTableHeader on
## 'http://162.105.145.16/rs/R_data_samples/export2020-1.csv'
<-read.csv(file, header=TRUE, sep = ",", stringsAsFactors = F, fileEncoding = "UTF-8") #加编码格式
pop
<-read.csv(file, fileEncoding = "UTF-8") #加编码格式 pop
3 图形初阶
attach(mtcars)
plot(wt, mpg)
abline(lm(mpg~wt))
title("Regression of MPG on Weight")
detach(mtcars)
pdf("Ch01_demo3.pdf")
attach(mtcars)
plot(wt, mpg)
abline(lm(mpg~wt))
title("Regression of MPG on Weight")
detach(mtcars)
dev.off()
## png
## 2
#注意类似上述代码块,需要一起调试
3.1 plot图形参数
<- c(20, 30, 40, 45, 60)
dose <- c(16, 20, 27, 40, 60)
drugA <- c(15, 18, 25, 31, 40)
drugB
plot(dose, drugA, type="b")
par(lty=2, pch=17) #设置虚线及三角图标
plot(dose, drugA, type="b")
# or:
plot(dose, drugA, type="b", lty=3, lwd=3, pch=15, cex=1.2)
#色彩:
<- 10
n <- rainbow(n)
mycolors
pie(rep(2, n), labels=mycolors, col=mycolors, cex=0.8)
<- gray(0:n/n)
mygrays pie(rep(1, n), labels=mygrays, col=mygrays, cex=0.7)
mycolors
## [1] "#FF0000" "#FF9900" "#CCFF00" "#33FF00" "#00FF66" "#00FFFF" "#0066FF"
## [8] "#3300FF" "#CC00FF" "#FF0099"
mygrays
## [1] "#000000" "#1A1A1A" "#333333" "#4D4D4D" "#666666" "#808080" "#999999"
## [8] "#B3B3B3" "#CCCCCC" "#E6E6E6" "#FFFFFF"
<- c(20, 30, 40, 45, 60)
dose <- c(16, 20, 27, 40, 60)
drugA <- c(15, 18, 25, 31, 40)
drugB
<- par(no.readonly=TRUE)
opar
par(pin=c(4, 3))
par(lwd=2, cex=1.5)
par(cex.axis=.75, font.axis=3)
plot(dose, drugA, type="b", pch=19, lty=2, col="red")
plot(dose, drugB, type="b", pch=23, lty=6, col="blue", bg="green")
par(opar)
3.2 坐标轴和标题设置
plot(dose, drugA, type="b",
col="red", lty=2, pch=2, lwd=2,
main="Clinical Trials for Drug A",
sub="This is hypothetical data",
xlab="Dosage", ylab="Drug Response",
xlim=c(10, 60), ylim=c(10, 70))
# 标题
plot(dose, drugA, type="b",
col="red", lty=2, pch=2, lwd=2,
main="", sub="", xlab="", ylab="",
xlim=c(10, 60), ylim=c(10, 70))
title(main="My Title", col.main="red",
sub="My Sub-title", col.sub="blue",
xlab="My X label", ylab="My Y label",
col.lab="green", cex.lab=0.75)
# 注意title和plot同时选中,然后Ctrl+Enter调试运行
<- c(1:10)
x <- x
y <- 10/x
z
#存储原有绘图参数
<- par(no.readonly=TRUE)
opar
par(mar=c(5, 4, 4, 8) + 0.1)
plot(x, y, type="b", pch=21, col="red",yaxt="n", lty=3, ann=FALSE)
lines(x, z, type="b", pch=22, col="blue", lty=2)
axis(2, at=x, labels=x, col.axis="red", las=2)
axis(4, at=z, labels=round(z, digits=2),
col.axis="blue", las=2, cex.axis=0.7, tck=-.01)
mtext("y=1/x", side=4, line=3, cex.lab=1, las=2, col="blue")
title("An Example of Creative Axes", xlab="X values", ylab="Y=X")
3.3 次要刻度线
if(require("Hmisc")){
print("Hmisc is loaded correctly")
else {
} print("trying to install Hmisc")
install.packages("Hmisc")
}
## 载入需要的程辑包:Hmisc
## 载入需要的程辑包:lattice
## 载入需要的程辑包:survival
##
## 载入程辑包:'survival'
## The following object is masked from 'package:quantreg':
##
## untangle.specials
## 载入需要的程辑包:Formula
## 载入需要的程辑包:ggplot2
##
## 载入程辑包:'Hmisc'
## The following object is masked from 'package:quantreg':
##
## latex
## The following objects are masked from 'package:base':
##
## format.pval, units
## [1] "Hmisc is loaded correctly"
library(Hmisc)
#在X轴的每两条主刻度线之间添加1条次要刻度线,并在Y轴的每两条主刻度线之间添加3条次要刻度线。
plot(x, y, type="b", pch=21, col="red", lty=3, ann=FALSE)
minor.tick(nx=2, ny=4, tick.ratio=0.5)
#恢复原有绘图参数
par(opar)
plot(runif(20), runif(20))
minor.tick()
# Plot with arguments passed to axis()
plot(c(0,1), c(0,1), type = 'n', axes = FALSE, ann = FALSE)
# setting up a plot without axes and annotation
points(runif(20), runif(20)) # plotting data
axis(1, pos = 0.5, lwd = 2) # showing X-axis at Y = 0.5 with formatting
axis(2, col = 2, lwd=2) # formatted Y-axis
minor.tick( nx = 4, ny = 4, tick.ratio = 0.3,
x.args = list(pos = 0.5, lwd = 2), # X-minor tick format argumnets
y.args = list(col = 2)) # Y-minor tick format arguments
3.4 参考线和图例
函数abline()可以用来为图形添加参考线。 abline(h=yvalues, v=xvalues) 函数abline()中也可以指定其他图形参数(如线条类型、颜色和宽度)。举例来说: abline(h=c(1,5,7)) 在y为1、5、7的位置添加了水平实线,而代码: abline(v=seq(1, 10, 2), lty=2, col=“blue”) 则在x为1、3、5、7、9的位置添加了垂直的蓝色虚线。
<- c(20, 30, 40, 45, 60)
dose <- c(16, 20, 27, 40, 60)
drugA <- c(15, 18, 25, 31, 40)
drugB
<- par(no.readonly=TRUE)
opar
par(lwd=2, cex=0.8, font.lab=2)
plot(dose, drugA, type="b",
pch=15, lty=1, col="red", ylim=c(0, 60),
main="Drug A vs. Drug B", xlab="Drug Dosage",
ylab="Drug Response")
lines(dose, drugB, type="b",
pch=17, lty=2, col="blue")
abline(h=c(30), lwd=1.5, lty=2, col="gray")
<- lm(drugA~dose)
lmpa abline(lmpa$coefficients[1], lmpa$coefficients[2], lty =3, col = "red")
library(Hmisc)
minor.tick(nx=3, ny=3, tick.ratio=0.5)
legend("topleft", inset=.05, title="Drug Type",
c("A","B"), lty=c(1, 2), pch=c(15, 17), col=c("red", "blue"))
par(opar)
3.5 文本标注
attach(mtcars)
## The following object is masked from package:ggplot2:
##
## mpg
plot(wt, mpg, main="Mileage vs. Car Weight", xlab="Weight", ylab="Mileage", pch=18, col="blue")
text(wt, mpg, row.names(mtcars), cex=0.6, pos=4, col="red")
detach(mtcars)
<- par(no.readonly=TRUE)
opar par(cex=0.8)
plot(1:7,1:7,type="n")
text(3,3,"Example of default text")
text(4,4,family="mono",
"Example of mono-spaced text")
text(5,5,family="serif","Example of serif text")
par(opar)
3.6 数学标注
<- seq(0, 4, 0.01)
x <- sqrt(x)
y <- expression(y == sqrt(x))
text plot(x, y, type = 'l', main = text, las = 1)
plot(1:10, 1:10)
text(4, 9, expression(hat(beta) == (X^t * X)^{-1} * X^t * y), cex=2.0)
text(4, 8.0, "expression(hat(beta) == (X^t * X)^{-1} * X^t * y)",
cex = 1.5)
text(4, 6.5, expression(bar(x) == sum(frac(x[i], n), i==1, n)), cex=2.0)
text(4, 5.4, "expression(bar(x) == sum(frac(x[i], n), i==1, n))",
cex = 1.5)
text(8, 2.5, expression(paste(frac(1, sigma*sqrt(2*pi)), " ",
plain(e)^{frac(-(x-mu)^2, 2*sigma^2)})), cex = 3.0)
plot(rnorm(100), rnorm(100), xlab = expression(hat(mu)[0]), ylab = expression(alpha^beta),
main = expression(paste("Plot of ", alpha^beta, " versus ", hat(mu)[0])))
#使用latex2exp包的Tex函数形成:
#install.packages("latex2exp")
library(latex2exp)
dev.off()
## null device
## 1
<- seq(0, 4, length.out = 100)
x <- 1:5
alpha plot(x, xlim = c(0, 4), ylim = c(0, 10), xlab = "x", ylab = TeX("$\\alpha x^\\alpha$, where $\\alpha \\in 1\\ldots 5$"),
type = "n", main = TeX("Using $\\LaTeX$ for plotting in base graphics!"), lwd=2)
text(2, 5, TeX("$\\alpha x^\\alpha$, where $\\alpha \\in 1\\ldots 5$"), cex=4.0)
invisible(sapply(alpha, function(a) lines(x, a * x^a, col = a)))
legend("topleft", legend = TeX(sprintf("$\\alpha = %d$", alpha)), lwd = 1, col = alpha, cex=0.8)
3.7 图形组合和布局
attach(mtcars)
## The following object is masked from package:ggplot2:
##
## mpg
<- par(no.readonly=TRUE)
opar par(mfrow=c(2,2))
#par(mfcol=c(2,2))
plot(wt,mpg, main="Scatterplot of wt vs. mpg")
plot(wt,disp, main="Scatterplot of wt vs disp")
hist(wt, main="Histogram of wt")
boxplot(wt, main="Boxplot of wt")
par(opar)
detach(mtcars)
attach(mtcars)
## The following object is masked from package:ggplot2:
##
## mpg
<- par(no.readonly=TRUE)
opar par(mfrow=c(3,1))
hist(wt)
hist(mpg)
hist(disp)
par(opar)
detach(mtcars)
# layout
attach(mtcars)
## The following object is masked from package:ggplot2:
##
## mpg
layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE))
hist(wt)
hist(mpg)
hist(disp)
detach(mtcars)
attach(mtcars)
## The following object is masked from package:ggplot2:
##
## mpg
layout(matrix(c(1, 1, 2, 3), 2, 2, byrow = TRUE),
widths=c(3, 1), heights=c(1, 2))
hist(wt)
hist(mpg)
hist(disp)
detach(mtcars)
<- par(no.readonly=TRUE)
opar par(fig=c(0.05, 0.8, 0.05, 0.8))
plot(mtcars$wt, mtcars$mpg, xlab="Miles Per Gallon", ylab="Car Weight")
par(fig=c(0.05, 0.8, 0.55, 0.95), new=TRUE)
boxplot(mtcars$wt, horizontal=TRUE, axes=FALSE)
par(fig=c(0.65, 0.95, 0.05, 0.8), new=TRUE)
boxplot(mtcars$mpg, axes=FALSE)
mtext("Enhanced Scatterplot", side=3, outer=TRUE, line=-3)
par(opar)
The end of Chapter 1
4 作业
4.1 读取数据
定义函数 read_data()
以读取所需数据,
注意缺省目录、skip行数(与文件适合)、是否有header
如果数据文件*.csv
前几行有 ‘#’ 注释, 会被
read.table()
自动跳过,不必计算skip
<- function(file_name, path='../data/', skip=3, header=TRUE, sep=',') {
read_data return (read.table(paste(path, file_name, sep=''), skip=skip, header=header, sep=sep))
}
利用上述函数,读出温度文件数据,选定数据源==GTS_METAR的数据。
#temp <- read_data('A_TEMP.csv', skip=3, header=TRUE)
<- read_data('A_TEMP.csv', path='http://162.105.145.16/rs/R_data_samples/', skip=3, header=TRUE)
temp colnames(temp) <- c('Time', 'Height', 'Temp', 'Source', 'Status', 'Details')
<- temp[temp$Source == 'GTS_METAR',]
temp
<- read_data('A_RH.csv', path='http://162.105.145.16/rs/R_data_samples/', skip=3, header=TRUE)
rh colnames(rh) <- c('Time', 'Height', 'RH', 'Source', 'Status', 'Details')
<- rh[rh$Source == 'GTS_METAR', ] rh
定义两个函数 get_hours()
, get_date()
对时间加以处理
<- function(datetime) {
get_hours return (difftime(datetime, datetime[1], unit='hours'))
}
4.2 绘制地面气温图像示例1
$Time <- as.POSIXlt( temp$Time )
temp
$hours <- get_hours(temp$Time)
temp$date <- as.Date(temp$Time)
temp
<- round((temp$hours-temp$hours[1]) /(15*24*4))
half_month <- (abs((temp$hours-temp$hours[1]) - half_month*15*24*4) < 0.3)
xtick_mask
<- par(no.readonly=TRUE)
opar
plot(temp$hours, temp$Temp, xaxt="n", yaxt="n", pch=20, cex=0.5,type="b", lty=1,col="blue", xlab="Date", ylab = "Temperature (Degree Celsius)")
axis(1, at=temp$hours[xtick_mask], labels=temp$date[xtick_mask], col.axis="black", las=1, cex.axis=0.8, tck=-0.02)
axis(2, at=10*(-2:10), col.axis="black", las=1, cex.axis=0.8, tck=-0.02)
abline(h=c(0), lty=2, col="red")
title(main="Surface Air Temperature in Beijing (2019/01/01 - 2023/02/18)")
par(opar)
4.3 绘制地面气温图像示例2
横坐标利用日期,采用不计算小时数的方式
$date <- as.Date(temp$Time)
temp
#如果原数据有缺失,就不能用这个label:
<-seq(from=as.Date("2019-01-01"), to=as.Date("2023-02-18"), by=180)
x_label
plot(temp$date, temp$Temp, xaxt="n", yaxt="n", pch=20, cex=0.1, type="b", lty=1,col="red", xlab='', ylab = "Temperature")
axis(2, at=seq(-20, 40, 5), col.axis="black", las=1, cex.axis=0.7, tck=-0.03)
axis(1,x_label,format(x_label,"%Y-%m-%d"),las=1, col.axis="black",cex.axis=0.7, tck=-0.03)
abline(h=c(0), lty=2, col="black")
title(main="Surface Air Temperature in Beijing (2019/01/01 - 2023/02/18)")