Vocabulary

Basic

library(knitr)
opts_chunk$set(message = F,eval = F)
func = function(x();y){
  if(missing(y)){
    y = x
  }
  return(y)
}

func(1)
func(1();2)
for(number_loop in 1:100){
  if(number_loop%%5 == 0){
    break
  }
  print(number_loop)
  ifelse(test = 1==2();"123"();"234")
}
# The first functions to learn
?
str

# Important operators and assignment
%in%(); match
=(); <-(); <<-
$(); [(); [[(); head(); tail(); subset
with
assign(); get

# Comparison 
all.equal(); identical
!=(); ==(); >(); >=(); <(); <=
is.na(); complete.cases
is.finite

# Basic math
*(); +(); -(); /(); ^(); %%(); %/%
abs(); sign
acos(); asin(); atan(); atan2
sin(); cos(); tan
ceiling(); floor(); round(); trunc(); signif
exp(); log(); log10(); log2(); sqrt

max(); min(); prod(); sum
cummax(); cummin(); cumprod(); cumsum(); diff
pmax(); pmin
range
mean(); median(); cor(); sd(); var
rle

# Functions to do with functions
function
missing()
on.exit
return(); invisible

# Logical & sets 
&(); |(); !(); xor
all(); any
intersect(); union(); setdiff(); setequal
which

# Vectors and matrices
c(); matrix
# automatic coercion rules character > numeric > logical
length(); dim(); ncol(); nrow
cbind(); rbind
names(); colnames(); rownames
t
diag
sweep
as.matrix(); data.matrix

# Making vectors 
c
rep(); rep_len
seq(); seq_len(); seq_along
rev
sample
choose(); factorial(); combn
(is/as).(character/numeric/logical/...)

# Lists & data.frames 
list(); unlist
data.frame(); as.data.frame
split
expand.grid

# Control flow 
if(); &&(); || (short circuiting)
for(); while
next(); break
switch
ifelse
for(i in 1:10){
  if(i ==3){
    next()
  }
  if(i ==6){
    break()
  }
  print(i)
}

# Apply & friends
lapply(); sapply(); vapply
apply
tapply
replicate

Common data structures

Date

# Date Time
typeof(ISOdate(year = 1994,month = 12,day = 12,hour = 1,min = 1,sec = 1))
class(ISOdate(year = 1994,month = 12,day = 12,hour = 1,min = 1,sec = 1))
typeof(ISOdatetime(year = 1994,month = 12,day = 12,hour = 1,min = 1,sec = 1))
class(ISOdatetime(year = 1994,month = 12,day = 12,hour = 1,min = 1,sec = 1))
ISOdatetime(year = 1994,month = 12,day = 12,hour = 1,min = 1,sec = 1)
ISOdate(year = 1994,month = 12,day = 12,hour = 1,min = 1,sec = 1)

time_example = ISOdate(year = 1994,month = 12,day = 12,hour = 1,min = 1,sec = 1)

# strftime
strftime(time_example)
typeof(strftime(time_example))
class(strftime(time_example))

# strptime
strptime(strftime(time_example),format = "")

date()
julian()

Character

# 对比字符串
grep("huo",c("huozhiji","huoyuanjia","huoqubing"))

# 复杂对比字符串。。略复杂
agrep("huo",c("huozhiji","huoyuanjia","huoqubing"))
agrep("lasy", "1 lazy 2")
agrep("lasy", c(" 1 lazy 2", "1 lasy 2"), max = list(sub = 0))
agrep("laysy", c("1 lazy", "1", "1 LAZY"), max = 2)
agrep("laysy", c("1 lazy", "1", "1 LAZY"), max = 2, value = TRUE)
agrep("laysy", c("1 lazy", "1", "1 LAZY"), max = 2, ignore.case = TRUE)

# 替换
gsub(pattern = "huozhiji",replacement = "chenyahui",x = "huozhijikecongmingle")

# 分割
strsplit(x = "huozhiji",split = "o")

# 多个,按照字母替换
x <- "MiXeD cAsE 123"
chartr("iXs", "why", x)
chartr("a-cX", "D-Fw", x)
tolower(x)
toupper(x)

# 长度
x <- c("asfef", "qwerty", "yuiop[", "b", "stuff.blah.yech")
nchar(x)
nchar(deparse(mean))

x[3] <- NA; x
nchar(x, keepNA= TRUE) #  5  6 NA  1 15
nchar(x, keepNA=FALSE) #  5  6  2  1 15
stopifnot(identical(nchar(x     ), nchar(x, keepNA= TRUE)),
          identical(nchar(x, "w"), nchar(x, keepNA=FALSE)),
          identical(is.na(x), is.na(nchar(x))))
stopifnot(1==1,T == TRUE,"huozhiji" == "genius")
is.na(x)
x = "123"
length(x)

# subset of the str
substr(x = "huozhiji",start = 1,2)

# paste
paste

#stringr
library(stringr)
length(sentences)
sentences[1:5]

length(fruit)
fruit[1:5]

length(words)
words[1:5]

pattern <- "a.b"
strings <- c("abb", "a.b")
str_detect(strings, pattern)
str_detect(strings, fixed(pattern))
str_detect(strings, coll(pattern))

# coll() is useful for locale-aware case-insensitive matching
i <- c("I", "\u0130", "i")
i
str_detect(i, fixed("i", TRUE))
str_detect(i, coll("i", TRUE))
str_detect(i, coll("i", TRUE, locale = "tr"))

# Word boundaries
words <- c("These are   some words.")
str_count(words, boundary("word"))
str_split(words, " ")[[1]]
str_split(words, boundary("word"))[[1]]

# Regular expression variations
str_extract_all("The Cat in the Hat", "[a-z]+")
str_extract_all("The Cat in the Hat", regex("[a-z]+", TRUE))

str_extract_all("a\nb\nc", "^.")
str_extract_all("a\nb\nc", regex("^.", multiline = TRUE))

str_extract_all("a\nb\nc", "a.")
str_extract_all("a\nb\nc", regex("a.", dotall = TRUE))

Factor

#
factor, levels, nlevels
reorder, relevel
cut, findInterval
interaction
options(stringsAsFactors = FALSE)

#
test = as.factor(c(1,2,3,1,2))
test
nlevels(test)
warpbreaks$tension
relevel(warpbreaks$tension, ref = "M")
relevel(warpbreaks$tension, ref = c("H","L","M"))
?cut
typeof(cut(rep(1,5), 4))
str(cut(rep(1,5), 4))
class(cut(rep(1,5), 4)[1])
# 膜拜?
cut(mtcars$mpg,5)

# 膜拜第二次?
?findInterval
x <- 2:18
v <- c(5, 10, 15) # create two bins [5,10) and [10,15)
cbind(x, findInterval(x, v))

# interaction
?interaction
a <- gl(2, 4, 8)
b <- gl(2, 2, 8, labels = c("ctrl", "treat"))
s <- gl(2, 1, 8, labels = c("M", "F"))
a;b;s
gl(n = 10,k = 2)
interaction(a, b)
interaction(a, b, s, sep = ":")
stopifnot(identical(a:s,
                    interaction(a, s, sep = ":", lex.order = TRUE)),
          identical(a:s:b,
                    interaction(a, s, b, sep = ":", lex.order = TRUE)))
a
s
a:s

array

# dimnames(mtcars)
# names(mtcars)
x  <- array(1:24, 2:4)
xt <- aperm(x, c(2,1,3))
stopifnot(t(xt[,,2]) == x[,,2],
          t(xt[,,3]) == x[,,3],
          t(xt[,,4]) == x[,,4])
x
xt

library(abind)
# Five different ways of binding together two matrices
x <- matrix(1:12,3,4)
y <- x+100
abind(x,y,along=0)     # binds on new dimension before first
dim(abind(x,y,along=1))     # binds on first dimension
dim(abind(x,y,along=1.5))
dim(abind(x,y,along=2))
dim(abind(x,y,along=3))
dim(abind(x,y,rev.along=1)) # binds on last dimension
dim(abind(x,y,rev.along=0)) # binds on new dimension after last
x
y

Statictics

Ordering and tabulating

# 重复
test = gl(2,1,10,labels = c("boy","girl"))
duplicated(test)
test
duplicated(c(1,2,3,4,2))
unique(c(1,2,3,4,1))

# 合并,有选择合并
merge

# 顺序
order,rank,qunatile
cut()
table()
ftable()
table(mtcars$drat)
table(c(1,2,3,1,2,3))
class(ftable(c(1,2,3,1,2,3)))
dim(ftable(c(1,2,3,1,2,3)))
str(ftable(c(1,2,3,1,2,3)))
is.list(ftable(c(1,2,3,1,2,3)))
is.vector(ftable(c(1,2,3,1,2,3)))
dim(ftable(c(1,2,3,1,2,3)))
is.factor(ftable(c(1,2,3,1,2,3)))

Linear models

fitted, predict, resid, rstandard
lm, glm
hat, influence.measures
logLik, df, deviance
formula, ~, I
anova, coef, confint, vcov
contrasts

Miscellaneous tests

apropos("\\.test$")

Random variables

(q, p, d, r) * (beta, binom, cauchy, chisq, exp, f, gamma, geom, 
                hyper, lnorm, logis, multinom, nbinom, norm, pois, signrank, t, 
                unif, weibull, wilcox, birthday, tukey)

Matrix algebra

crossprod, tcrossprod
eigen, qr, svd
%*%, %o%, outer
rcond
solve

Work with R

# workspace
ls()
exists(x = "huozhiji")
rm("a")
getwd()
setwd()
q....
source()
require(?)
?require
a = library(ggplot2)
a = require(ggplot2)
a
test_function = function(){
  return(1)
}
test_function()


# HELP
?
help()
help.search(package = ggplot2)
RSiteSearch("ggplot2")
citation(package = "base")
demo
example(topic = "ggplot2")
a = example(topic = "ls")
a
vignette()
?vignette


# Debugging
traceback(max.lines = 10)
browser()
recover()
options(error = )
stop()
warning("test")
message("123")
tryCatch(expr = a == TRUE)
a
?tryCatch

tryCatch(1, finally = print("Hello"))
e <- simpleError("test error")
## Not run: 
stop(e)
tryCatch(stop(e), finally = print("Hello"))
tryCatch(stop("fred"), finally = print("Hello"))

## End(Not run)
tryCatch(stop(e), error = function(e) e, finally = print("Hello"))
tryCatch(stop("fred"),  error = function(e) e, finally = print("Hello"))
withCallingHandlers({ warning("A"); 1+2 }, warning = function(w) {})
## Not run: 
 { withRestarts(stop("A"), abort = function() {}); 1 }

## End(Not run)
withRestarts(invokeRestart("foo", 1, 2), foo = function(x, y) {x + y})

##--> More examples are part of
##-->   demo(error.catching)

IO

# Output
print("huozhiji")


?cat
iter <- stats::rpois(1, lambda = 10)
cat("iteration = ", iter <- iter + 1, "\n")
paste("iteration = ", iter <- iter + 1, "\n")
print(paste("iteration = ", iter <- iter + 1, "\n"))
a = cat(paste(letters, 100* 1:26), fill = TRUE, labels = paste0("{", 1:10, "}:"))
a
str(class(cat(paste(letters, 100* 1:26), fill = TRUE, labels = paste0("{", 1:10, "}:"))))
typeof(class(cat(paste(letters, 100* 1:26), fill = TRUE, labels = paste0("{", 1:10, "}:"))))
typeof(paste0("{", 1:10, "}:"))
paste("{", 1:10, "}:",sep = "")
?cat
cat("huozhiji","feichangcongm","\n","danshi","chenyahui","gengkeai")
print("huozhiji","feichangcongm","\n","danshi","chenyahui","gengkeai")
cat("huozhiji","feichangcongm","\n","danshi","chenyahui","gengkeai",file = "FileOutput/cat.txt")
cat("huozhiji","feichangcongm","\n","danshi","chenyahui","gengkeai",file = "FileOutput/cat.txt",append = T)


message();warning()


dput(number_loop)
ls()


format(x = "")
?format()
format(1:10)
format(1:10, trim = TRUE)
1:10
cat(1:10)
print(1:10)
zz <- data.frame("(row names)"= c("aaaaa", "b"), check.names = FALSE)
format(zz)
zz
format(zz, justify = "left")

format(13.7)
format(13.7, nsmall = 3)
format(c(6.0, 13.1), digits = 2)
format(c(6.0, 13.1), digits = 2, nsmall = 1)
print(c(1.234),digits = 1)
print(c(1.234),digits = 2)
print(c(1.234),digits = 3)
format(2^31-1)
format(2^31-1, scientific = TRUE)
z <- list(a = letters[1:3], b = (-pi+0i)^((-2:2)/2), c = c(1,10,100,1000),
          d = c("a", "longer", "character", "string"),
          q = quote( a + b ), e = expression(1+x))
## can you find the "2" small differences?
(f1 <- format(z, digits = 2))
(f2 <- format(z, digits = 2, justify = "left", trim = FALSE))
f1 == f2 ## 2 FALSE, 4 TRUE

?sink()
sink("FileOutput/sink-examp.txt",append = T)
sink("FileOutput/sink-examp.txt")
i <- 1:10
outer(i, i, "*")
sink()
file(open = "FileOutput/sink-examp.txt")
## capture all the output to a file.
zz <- file("all.Rout", open = "wt")
sink(zz)
sink(zz, type = "message")
try(log("a"))
## revert output back to the console -- only then access the file!
sink(type = "message")
sink()
file.show("all.Rout")
sink.number()
open(con = "FileOutput/sink-examp.txt")
print(1:10)


require(stats)
glmout <- capture.output(summary(glm(case ~ spontaneous+induced,
                                     data = infert, family = binomial())))
glmout[1:5]
capture.output(1+1, 2+2)
capture.output({1+1; 2+2})

## Not run: ## on Unix-alike with a2ps available
op <- options(useFancyQuotes=FALSE)
pdf <- pipe("a2ps -o - | ps2pdf - tempout.pdf", "w")
capture.output(example(glm), file = pdf)
close(pdf); options(op) ; system("evince tempout.pdf &")

## End(Not run)






# Reading and Writing data
?data()
?count.fields
cat("NAME", "1:John", "2:Paul", file = "FileOutput/foo.txt", sep = "\n")
count.fields("FileOutput/foo.txt", sep = ":")
unlink("FileOutput/foo.txt")
?saveRDS


# 
dir()
file.path("","p1","p2","p3", c("file1", "file2"))
basename(file.path("","p1","p2","p3", c("file1", "file2")))
dirname(file.path("","p1","p2","p3","filename"))

# tools::
?file_ext()
library(tools)
R.home()
dir <- file.path(R.home(), "library", "stats")
dir
list_files_with_exts(file.path(dir, "demo"), "R")
list_files_with_type(file.path(dir, "demo"), "demo") # the same
file_path_sans_ext(list.files(file.path(R.home("modules"))))

#
file.path(R.home(),"huozhiji")
path.expand(R.home())
file.choose(new = "FileOutput/cat.txt")
file.copy(from = "FileOutput/cat.txt",to = "FileOutput/cat2.txt")
file.create("FileOutput/create.txt")
file.remove("FileOutput/create.txt")
file.rename("FileOutput/cat.txt","FileOutput/dog.txt")
dir.create(path = "FileOutput/createpath")
file.exists("FileOutput/cat2.txt")
file.exists("FileOutput/dog2.txt")
file.info("FileOutput/cat2.txt")
tempdir("FileOutput/tempdir")
?tempdir
tempfile(c("ab", "a b c"))   # give file name with spaces in!

tempfile("plot", fileext = c(".ps", ".pdf"))

tempdir() # works on all platforms with a platform-dependent result
tempfile()
download.file()

Style Guide

Notation and naming

Good coding style is like useing correct punctuation. You can manage without it, but it sure makes things easier to read.

As with styles of punctuation, there are many possible variations.

good style is ~

library(formatR)
## comments are retained;
# a comment block will be reflowed if it contains long comments;
#' roxygen comments will not be wrapped in any case
1+1

if(TRUE){
x=1  # inline comments
}else{
x=2;print('Oh no... ask the right bracket to go away!')}
1*3 # one space before this comment will become two!
2+2+2    # only 'single quotes' are allowed in comments

lm(y~x1+x2, data=data.frame(y=rnorm(100),x1=rnorm(100),x2=rnorm(100)))  ### a linear model
1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1  ## comments after a long line
## here is a long long long long long long long long long long long long long comment which will be wrapped
## comments are retained; a comment block will be reflowed if it
## contains long comments;
#' roxygen comments will not be wrapped in any case
1 + 1

if (TRUE) {
    x = 1  # inline comments
} else {
    x = 2
    print("Oh no... ask the right bracket to go away!")
}
1 * 3  # one space before this comment will become two!
2 + 2 + 2  # only 'single quotes' are allowed in comments

lm(y ~ x1 + x2, data = data.frame(y = rnorm(100), x1 = rnorm(100), x2 = rnorm(100)))  ### a linear model
1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 
    1 + 1 + 1 + 1 + 1  ## comments after a long line
## here is a long long long long long long long long long long long long
## long comment which will be wrapped

## comments are retained; a comment block will be reflowed if it contains long comments;
#' roxygen comments will not be wrapped in any case
1 + 1

if (TRUE) {
    x = 1  # inline comments
} else {
    x = 2
    print("Oh no... ask the right bracket to go away!")
}
1 * 3  # one space before this comment will become two!
2 + 2 + 2  # only 'single quotes' are allowed in comments

lm(y ~ x1 + x2, data = data.frame(y = rnorm(100), x1 = rnorm(100), x2 = rnorm(100)))  ### a linear model
1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1  ## comments after a long line
## here is a long long long long long long long long long long long long long comment which will be wrapped
usage(glm, width = 100)  # can set arbitrary width here
## glm(formula, family = gaussian, data, weights, subset, na.action, start = NULL, 
##     etastart, mustart, offset, control = list(...), model = TRUE, method = "glm.fit", 
##     x = FALSE, y = TRUE, contrasts = NULL, ...)
args(glm)
## function (formula, family = gaussian, data, weights, subset, 
##     na.action, start = NULL, etastart, mustart, offset, control = list(...), 
##     model = TRUE, method = "glm.fit", x = FALSE, y = TRUE, contrasts = NULL, 
##     ...) 
## NULL
formatR::tidy_app()

tidy_eval()
tidy_source()

File names

# Good
fit-models.R
utility-functions.R

# Bad
foo.r
stuff.r

0-download.R
1-parse.R
2-explore.R

Object names

concise and short, it is not easy

# Good
day_one
day_1

# Bad
first_day_of_the_month
DayOne
dayone
djm1

# Bad
T <- FALSE
c <- 10
mean <- function(x) sum(x)

Syntax

Spacing

# Good
average <- mean(feet / 12 + inches, na.rm = TRUE)

# Bad
average <- mean(feet / 12 + inches, na.rm=TRUE)

# Good
x <- 1:10
base::get

# Bad
x <- 1 : 10
base :: get

# Good
if (debug) do(x)
plot(x, y)

# Bad
if(debug)do(x)
plot (x, y)

list(
  total = a + b + c, 
  mean  = (a + b + c) / n
)

# Good
if (debug) do(x)
diamonds[5, ]

# Bad
if ( debug ) do(x)  # No spaces around debug
x[1,]   # Needs a space after the comma
x[1 ,]  # Space goes after comma not before

Function

Function components

body(fun = "mean")
formals()
environment()

f = function(x) x^2
f
formals(f)
body(f)
environment(f)

attributes(mean)
attributes(f)
# For example, you can set the class() and add a custom print() method.

Primitive functions

formals(sum)
#> NULL
body(sum)
#> NULL
environment(sum)
#> NULL

私有化函数直接调用C

Primitive functions are only found in the base package, and since they operate at a low level, they can be more efficient (primitive replacement functions don’t have to make copies), and can have different rules for argument matching (e.g., switch and call). This, however, comes at a cost of behaving differently from all other functions in R. Hence the R core team generally avoids creating them unless there is no other option.

Exercises

objs <- mget(ls("package:base"), inherits = TRUE)
length(objs)
names(objs)
class(objs)
funs <- Filter(is.function, objs)
length(funs)
class(funs)
names(funs)
library(ggplot2)
ggplot
body(ggplot)
formals(ggplot)
environment(ggplot)

Lexical scoping

lexical scoping?

x = 10

the implementation of lexical scoping

Name masking

f <- function() {
  x <- 1
  y <- 2
  c(x, y)
}
f()
rm(f)
h = function() {
  c("huozhiji")
}
h()

if (y == 0) {
  log(x)
} else {
  y ^ x
}

x <- 2
g <- function() {
  y <- 1
  c(x, y)
}
g()
rm(x, g)

x <- 1
h <- function() {
  y <- 2
  i <- function() {
    z <- 3
    c(x, y, z)
  }
  i()
}
h()
rm(x, h)

j <- function(x) {
  y <- 2
  function() {
    c(x, y)
  }
}
k <- j(1)
k()
rm(j, k)
j(1)

because the enviroment includes the value of Y

Function and value

l <- function(x) x + 1
m <- function() {
  l <- function(x) x * 2
  l(10)
}
m()
#> [1] 20
l
rm(l, m)
n <- function(x) x / 2
o <- function() {
  n <- 10
  n(n)
}
o()
#> [1] 5
rm(n, o)

A fresh start

j <- function() {
  if (!exists("a")) {
    a <- 1
  } else {
    a <- a + 1
  }
  print(a)
}
j()
rm(j)
a
exists("a")
a = a + 1
a
class(a)
typeof(a)
a = 1
a

Dynamic lookup

f <- function() x + 1
f = function() huozhiji + 1
codetools::findGlobals(f)

`(` <- function(e1) {
  if (is.numeric(e1) && runif(1) < 0.1) {
    e1 + 1
  } else {
    e1
  }
}
replicate(50, (1 + 2))
#>  [1] 3 3 3 3 3 3 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
#> [36] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
rm("(")

exercises

c <- 10
c(c = c)

rm(c)

f <- function(x) {
  f <- function(x) {
    f <- function(x) {
      x ^ 2
    }
    f(x) + 1
  }
  f(x) * 2
}
f(10)

Every operation is a function call

x <- 10; y <- 5
x + y
#> [1] 15
`+`(x, y)

Function arguments

Calling functions

ggplot(d = mtcars)

Calling a function given a list of arguments

do.call(mean,list(listofparameter))

default and missing arguments

g <- function(a = 1, b = a * 2) {
  c(a, b)
}
g()
#> [1] 1 2
g(10)
#> [1] 10 20

i <- function(a, b) {
  c(missing(a), missing(b))
}
i()
#> [1] TRUE TRUE
i(a = 1)
#> [1] FALSE  TRUE
i(b = 2)
#> [1]  TRUE FALSE
i(1, 2)
#> [1] FALSE FALSE

Instead, I usually set the default value to NULL and use is.null() to check if the argument was supplied.

Lazy evaluation

f <- function(x) {
  force(x)
  10
}
f(stop("This is an error!"))

add <- function(x) {
  function(y) x + y
}
adders <- lapply(1:10, add)
adders
?lapply
adders[[1]](10)
#> [1] 11
adders[[10]](10)
#> [1] 20
add <- function(x) {
  force(x)
  function(y) x + y
}
adders2 <- lapply(1:10, add)
adders2[[1]](10)
#> [1] 11
adders2[[10]](10)
#> [1] 20
x = NULL
 x > 0
 if(x>0){
   print(1)
 }
 
 `&&` <- function(x, y) {
  if (!x) return(FALSE)
  if (!y) return(FALSE)

  TRUE
}
a <- NULL
!is.null(a) && a > 0
#> [1] FALSE

"&&"(!is.null(a),a > 0)
'&&'

special argument called …

get all the other thing

Exercises

x <- sample(replace = TRUE, 20, x = c(1:10, NA))
y <- runif(min = 0, max = 1, 20)
cor(m = "k", y = y, u = "p", x = x)
f1 <- function(x = {y <- 1; 2}, y = 0) {
  x + y
}
f1()

{1;2}?
?{}
{c(1,2,3)}
{1,2,3}
{1;2;3}
f2 <- function(x = z) {
  z <- 100
  x
}
f2()

Special calls

Infix functions

paste(1,2,3)
paste0(1,2,3)
"new" %+% " string"
#> [1] "new string"
`%+%`("new", " string")
#> [1] "new string"1 + 5
#> [1] 6
`+`(1, 5)
#> [1] 6

Replacement functions

`second<-` <- function(x, value) {
  x[2] <- value
  x
}
x <- 1:10
second(x) <- 5L
x
`second<-`(x,3)
x
second(x) <- 3L
x
library(pryr)
x <- 1:10
address(x)
second(x) <- 6L
address(x)
#> [1] "0x397efe0"
`modify<-` <- function(x, position, value) {
  x[position] <- value
  x
}
x = 1:10
address(x)
modify(x, 1) <- 10
#>  [1] 10  6  3  4  5  6  7  8  9 10
x
address(x)

modify = function(x, position, value){
   x[position] <- value
   x
}
x
modify(x,2,10)
x
get("x")
exists("x")
assign("x",1)
x

(Yes, it really does create a local variable named tmp, which is removed afterwards.)

Return values

return one value in the case of error

the functions always made a copy

(There are two important exceptions to the copy-on-modify rule: environments and reference classes. These can be modified in place, so extra care is needed when working with them.)

library() which loads a package, and hence modifies the search path.

setwd(), Sys.setenv(), Sys.setlocale() which change the working directory, environment variables, and the locale, respectively.

plot() and friends which produce graphical output.

write(), write.csv(), saveRDS(), etc. which save output to disk.

options() and par() which modify global settings.

S4 related functions which modify global tables of classes and methods.

Random number generators which produce different numbers each time you run them.

f1 <- function() 1
f2 <- function() invisible(1)

a = f1()
#> [1] 1
f2()
f1() == 1
#> [1] TRUE
f2() == 1
#> [1] TRUE

On exit

getwd()
a  = setwd("~")
a
getwd()
par()
dput(par())
?on.exit()
opar <- par(mai = c(1,1,1,1))
on.exit(par(opar))
par(opar)

capture.output

OO Field guide

Base types

  1. R的类型来源于C,每个R类底层都是C类。
  2. base type 由R core Team组件,其他人不能自主构建
  3. data structures讲解了很多类型,但是还有很多其他的类型。比如:names calls promises
  4. 比较麻烦的事情是,有一些type比较混淆。比如function这个东西,不同的函数类型不同。
  5. 泛型函数的逻辑基本来源于C。
  6. S3 ,S4,RC 都建立在base 上面

S3

  1. first simplest OO
  2. only system used in the base and stat
  3. most common
  4. informal and ad hoc
  5. certain elegance minimalism

Recognising objects, generic functions, and methods

  1. most objects
  2. you cant test if it is
  3. is.object()
library(pryr)

df <- data.frame(x = 1:10, y = letters[1:10])
otype(df)    # A data frame is an S3 class
#> [1] "S3"
otype(df$x)  # A numeric vector isn't
#> [1] "base"
otype(df$y)  # A factor is
#> [1] "S3"
  1. method == generic functions in functions
  2. methods dont belong to objects + this is diff from other OO style
  3. Some S3 generics, like [, sum(), and cbind(), don’t call UseMethod() because they are implemented in C
  4. given a class, we can find the right method of the class.like mean.Date(),mean.default mean.difftime and so on
mean
#> function (x, ...) 
#> UseMethod("mean")
#> <bytecode: 0x21b6ea8>
#> <environment: namespace:base>
ftype(mean)
#> [1] "s3"      "generic"
cbind
# function (..., deparse.level = 1) 
# .Internal(cbind(deparse.level, ...))
# <bytecode: 0x000000000a4066f0>
# <environment: namespace:base>
ftype(cbind)
# "internal" "generic" 
methods("mean")
# [1] mean.Date     mean.default  mean.difftime
# [4] mean.POSIXct  mean.POSIXlt 
# see '?methods' for accessing help and source code
methods("sum")
methods("rbind")
methods(class = "ts")
#  [1] [             [<-           aggregate    
#  [4] as.data.frame cbind         coerce       
#  [7] cycle         diff          diffinv      
# [10] initialize    kernapply     lines        
# [13] Math          Math2         monthplot    
# [16] na.omit       Ops           plot         
# [19] print         show          slotsFromS3  
# [22] t             time          window       
# [25] window<-     
# see '?methods' for accessing help and source code
  1. you can get all the methods of a class

Defining classes and creating objects

  1. we can create a “foo” class.and Dont know why
# Create and assign class in one step
foo <- structure(list(), class = "foo")

# Create, then set class
foo <- list()
class(foo) <- "foo"
  1. 大部分对象建立在list
  2. 可以查看特定变量的类,和继承
  3. 存在对象具有父类,爷类。。c(glm,lm)
class(foo)
#> [1] "foo"
inherits(foo, "foo")
#> [1] TRUE
  1. 大多数类有构建方法
# 构建一个变量
foo <- function(x) {
  if (!is.numeric(x)) stop("X must be numeric")
  structure(list(x), class = "foo")
}
  1. 可以强制改变一个变量的类型。但是有时候结果可能出乎意料。
# Create a linear model
mod <- lm(log(mpg) ~ log(disp), data = mtcars)
class(mod)
#> [1] "lm"
print(mod)
#> 
#> Call:
#> lm(formula = log(mpg) ~ log(disp), data = mtcars)
#> 
#> Coefficients:
#> (Intercept)    log(disp)  
#>      5.3810      -0.4586



# Turn it into a data frame (?!)
class(mod) <- "data.frame"
# But unsurprisingly this doesn't work very well
mod
print(mod)
#>  [1] coefficients  residuals     effects       rank          fitted.values
#>  [6] assign        qr            df.residual   xlevels       call         
#> [11] terms         model        
#> <0 rows> (or 0-length row.names)
# However, the data is still there
mod$coefficients
#> (Intercept)   log(disp) 
#>   5.3809725  -0.4585683

Creating new methods and generics

f = function(x) UseMethod("f")
mean.a <- function(x) "Class a"
f
f.a
a <- structure(list(), class = "a")
class(a)
#> [1] "a"
mean(a)
#> [1] "Class a"
rm(mean.a)
  1. 没人保证调用的方法是你想要的。所以你得确保你的函数是针对这个类的。

Method dispatch

# install.packages("rvest")
# install.packages("leafletCN")
# windows:
  # Sys.setlocale("LC_CTYPE", "eng")
## GAME begin~
library(rvest)
library(leafletCN)
# Sys.setlocale("LC_CTYPE", "eng")
doc = read_html("http://www.pm25s.com/cn/rank/")
## http://flukeout.github.io/
cities = doc %>% html_nodes(".cityrank a") %>%
  html_text()
# windows:
#   cities = iconv(cities, "UTF-8", "UTF-8")
AQI = doc %>% html_nodes("span[class^='lv']") %>%
  html_text() %>% .[c(F,F,T)] %>% as.numeric
dat = data.frame(city = cities, AQI = AQI)
geojsonMap(dat, "city",
           popup =  paste0(dat$city,":",dat$AQI),
           palette = "Reds", legendTitle = "AQI")
%>%