Merge tutorial
small data set
“?merge”
set.seed(123) # set random seed
x <- data.frame(id = c(1,1,2,2,3,3),
time = c("w1","w2","w1","w2","w1","w2"),
weight=c(sample(1:6)))
y <- data.frame(id = c(1,1,2,2,3,3),
time = c("w1","w2","w1","w2","w1","w2"),
height=rnorm(6))
y <- y[sample(nrow(y)),]
merge(x=x, y=y, by=c("id","time"))
## id time weight height
## 1 1 w1 2 0.07051
## 2 1 w2 4 0.12929
## 3 2 w1 6 1.71506
## 4 2 w2 3 0.46092
## 5 3 w1 5 -1.26506
## 6 3 w2 1 -0.68685
x <- data.frame(k1 = c(NA,NA,3,4,5), k2 = c(1,NA,NA,4,5), data = 1:5)
y <- data.frame(k1 = c(NA,2,NA,4,5), k2 = c(NA,NA,3,4,5), data = 1:5)
merge(x, y, by = c("k1","k2")) # NA's match
## k1 k2 data.x data.y
## 1 4 4 4 4
## 2 5 5 5 5
## 3 NA NA 2 1
merge(x, y, by = "data") # NA's match, so 6 rows
## data k1.x k2.x k1.y k2.y
## 1 1 NA 1 NA NA
## 2 2 NA NA 2 NA
## 3 3 3 NA NA 3
## 4 4 4 4 4 4
## 5 5 5 5 5 5
merge(x, y, by = "k1") # NA's match, so 6 rows
## k1 k2.x data.x k2.y data.y
## 1 4 4 4 4 4
## 2 5 5 5 5 5
## 3 NA 1 1 NA 1
## 4 NA 1 1 3 3
## 5 NA NA 2 NA 1
## 6 NA NA 2 3 3
merge(x, y, by = "k2") # NA's match, so 6 rows
## k2 k1.x data.x k1.y data.y
## 1 4 4 4 4 4
## 2 5 5 5 5 5
## 3 NA NA 2 NA 1
## 4 NA NA 2 2 2
## 5 NA 3 3 NA 1
## 6 NA 3 3 2 2
merge(x, y, by = c("data", "k1","k2")) # NA's match
## data k1 k2
## 1 4 4 4
## 2 5 5 5
big data set
use data.table package
install.packages("data.table")
## Error: trying to use CRAN without setting a mirror
library(data.table)
n=1e6
DT = data.table( a=sample(1:1000,n,replace=TRUE),
b=sample(1:1000,n,replace=TRUE),
c=rnorm(n),
d=sample(c("foo","bar","baz","qux","quux"),n,replace=TRUE),
e=rnorm(n),
f=1:n)
DT2 = data.table(f=n:1, g=rnorm(n))
merge(DT, DT2, by="f")
## f a b c d e g
## 1e+00: 1 656 839 -0.9460 foo -0.4253 1.28413
## 2e+00: 2 709 968 -0.9979 baz -0.8791 -0.52712
## 3e+00: 3 545 336 -0.1102 baz 2.0788 -0.64525
## 4e+00: 4 595 979 -0.4677 baz -1.0269 0.63584
## 5e+00: 5 290 358 0.8173 quux -1.9013 -1.57476
## ---
## 1e+06: 999996 399 991 0.4975 foo 0.8475 -2.03451
## 1e+06: 999997 855 654 -0.4182 foo -0.9286 0.46599
## 1e+06: 999998 581 16 1.4037 qux -0.7189 -1.20245
## 1e+06: 999999 512 53 1.1885 bar -1.7627 0.07735
## 1e+06: 1000000 870 464 -1.0909 qux -1.6692 -1.11044