I dump the memory address with the following C code:
#define USE_RINTERNALS
#include <R.h>
#include <Rdefines.h>
SEXP dump_address(SEXP src) {
Rprintf("%16p %16p %d\n", &(src->u), INTEGER(src), INTEGER(src) - (int*)&(src->u));
return R_NilValue;
}
It will print 2 address:
SEXPintegerRcpp:::SHLIB("dump_address.c")
dyn.load("dump_address.so")
Here is the sessionInfo of the testing environment.
sessionInfo()
## R version 2.15.3 (2013-03-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=C LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] knitr_1.2
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.3 evaluate_0.4.3 formatR_0.7 Rcpp_0.10.3
## [5] stringr_0.6.2 tools_2.15.3
First I test the property of copy on write, which means that R only copy the object only when it is modified.
a <- 1L
b <- a
invisible(.Call("dump_address", a))
## 0x3bcc788 0x3bcc790 2
invisible(.Call("dump_address", b))
## 0x3bcc788 0x3bcc790 2
b <- b + 1
invisible(.Call("dump_address", b))
## 0x410e0c8 0x410e0d0 2
The object b copies from a at the modification. R does implement the copy on write property.
Then I test if R will copy the object when we modify an element of a vector/matrix.
a <- 1L
invisible(.Call("dump_address", a))
## 0x3731798 0x37317a0 2
a <- 1L
invisible(.Call("dump_address", a))
## 0x2d898c8 0x2d898d0 2
a[1] <- 1L
invisible(.Call("dump_address", a))
## 0x3f47918 0x3f47920 2
a <- 2L
invisible(.Call("dump_address", a))
## 0x409efc8 0x409efd0 2
The address changes everytime which means that R does not reuse the memory.
system.time(a <- rep(1L, 10^7))
## user system elapsed
## 0.060 0.024 0.087
invisible(.Call("dump_address", a))
## 0x7fe0996cf030 0x7fe0996cf038 2
system.time(a[1] <- 1L)
## user system elapsed
## 0.056 0.028 0.081
invisible(.Call("dump_address", a))
## 0x7fe0970a9030 0x7fe0970a9038 2
system.time(a[1] <- 1L)
## user system elapsed
## 0 0 0
invisible(.Call("dump_address", a))
## 0x7fe0970a9030 0x7fe0970a9038 2
system.time(a[1] <- 2L)
## user system elapsed
## 0 0 0
invisible(.Call("dump_address", a))
## 0x7fe0970a9030 0x7fe0970a9038 2
For long vector, R reuse the memory after the first modification.
Moreover, the above example also shows that “modify in place” does affect the performance when the object is huge.
system.time(a <- matrix(0L, 3162, 3162))
## user system elapsed
## 0.028 0.024 0.051
invisible(.Call("dump_address", a))
## 0x7fe0996d1030 0x7fe0996d1038 2
system.time(a[1, 1] <- 0L)
## user system elapsed
## 0.016 0.020 0.033
invisible(.Call("dump_address", a))
## 0x7fe0970ad030 0x7fe0970ad038 2
system.time(a[1, 1] <- 1L)
## user system elapsed
## 0 0 0
invisible(.Call("dump_address", a))
## 0x7fe0970ad030 0x7fe0970ad038 2
system.time(a[1] <- 2L)
## user system elapsed
## 0 0 0
invisible(.Call("dump_address", a))
## 0x7fe0970ad030 0x7fe0970ad038 2
system.time(a[1] <- 2L)
## user system elapsed
## 0 0 0
invisible(.Call("dump_address", a))
## 0x7fe0970ad030 0x7fe0970ad038 2
It seems that R copies the object at the first modifications only.
I don't know why.
system.time(a <- vector("integer", 10^2))
## user system elapsed
## 0 0 0
invisible(.Call("dump_address", a))
## 0x38d8370 0x38d8378 2
system.time(names(a) <- paste(1:(10^2)))
## user system elapsed
## 0 0 0
invisible(.Call("dump_address", a))
## 0x2912020 0x2912028 2
system.time(names(a) <- paste(1:(10^2)))
## user system elapsed
## 0 0 0
invisible(.Call("dump_address", a))
## 0x2912020 0x2912028 2
system.time(names(a) <- paste(1:(10^2) + 1))
## user system elapsed
## 0 0 0
invisible(.Call("dump_address", a))
## 0x2912020 0x2912028 2
The result is the same. R only copies the object at the first modification.