In mass spectrometry you frequently need to match points in RT and MZ with some measurement error.
tmp1 <- cbind(sample(letters,1000,replace=TRUE),sample(letters,1000,replace=TRUE),sample(letters,1000,replace=TRUE))
tmp2 <- cbind(sample(letters,1000,replace=TRUE),sample(letters,1000,replace=TRUE),sample(letters,1000,replace=TRUE))
tmp1<-apply(tmp1, 1, paste ,collapse="")
tmp2<-apply(tmp2, 1, paste ,collapse="")
nrseq1 <- 1:1000
nrseq2 <- (rnorm(1000, mean=1000,sd=100))
tab1 <- data.frame(tmp1, nrseq1 + rnorm(1000,0,1.5), nrseq2 + rnorm(1000,0,0.4))
tab2 <- data.frame(tmp2, nrseq1 + rnorm(1000,0,1.5), nrseq2 + rnorm(1000,0,0.4) )
colnames(tab1) <- c("name","rt","mz")
tab2 <- data.frame(tmp2, nrseq1 + rnorm(1000,0,1.5), nrseq2 + rnorm(1000,0,0.4) )
colnames(tab2) <- c("name","rt","mz")
Add 4 columns with error range for mz and rt to one of the tables
mzerror <- 1
rterror <- 2.5
tab2<-data.frame(tab2, rtmi = tab2$rt - rterror , rtma = tab2$rt + rterror , mzmi = tab2$mz - mzerror, mzma = tab2$mz + mzerror )
suppressMessages(library(dplyr))
suppressMessages(library(DBI))
dbfile <- "my_mqdb.sqlite3"
if(file.exists(dbfile)){
file.remove(dbfile)
}
## [1] TRUE
my_mqdb <- src_sqlite(dbfile, create=TRUE)
copy_to(my_mqdb, tab1, temporary = FALSE)
## Source: sqlite 3.8.6 [my_mqdb.sqlite3]
## From: tab1 [1,000 x 3]
##
## name rt mz
## (chr) (dbl) (dbl)
## 1 gfc 2.460138 1047.6260
## 2 rnx 3.214221 878.4476
## 3 gaj 4.190572 1019.8799
## 4 tfu 5.032405 999.7834
## 5 nms 5.064113 904.9872
## 6 rtv 5.417524 829.2131
## 7 hmj 8.062583 1055.5779
## 8 kxb 5.765629 1000.4393
## 9 mvv 7.816920 937.1018
## 10 loc 8.177781 1070.6675
## .. ... ... ...
copy_to(my_mqdb, tab2, temporary = FALSE)
## Source: sqlite 3.8.6 [my_mqdb.sqlite3]
## From: tab2 [1,000 x 7]
##
## name rt mz rtmi rtma mzmi mzma
## (chr) (dbl) (dbl) (dbl) (dbl) (dbl) (dbl)
## 1 ssc 2.786813 1047.8346 0.2868130 5.286813 1046.8346 1048.8346
## 2 mgl 1.606113 878.4946 -0.8938866 4.106113 877.4946 879.4946
## 3 qjz 4.836084 1020.3323 2.3360841 7.336084 1019.3323 1021.3323
## 4 kit 3.795568 999.9929 1.2955676 6.295568 998.9929 1000.9929
## 5 ihk 5.152855 903.6745 2.6528545 7.652855 902.6745 904.6745
## 6 gao 5.978470 828.9304 3.4784703 8.478470 827.9304 829.9304
## 7 dnb 8.959492 1055.9336 6.4594919 11.459492 1054.9336 1056.9336
## 8 idg 9.276355 1001.0328 6.7763554 11.776355 1000.0328 1002.0328
## 9 hxy 7.677646 936.6617 5.1776462 10.177646 935.6617 937.6617
## 10 qzr 10.077482 1071.2388 7.5774821 12.577482 1070.2388 1072.2388
## .. ... ... ... ... ... ... ...
dbDisconnect(my_mqdb$con)
## [1] TRUE
my_mqdb <- src_sqlite(dbfile)
dbListTables(my_mqdb$con)
## [1] "sqlite_stat1" "tab1" "tab2"
res <-dbSendQuery(my_mqdb$con , "select tab1.mz, tab1.rt, tab2.mz as tab2mz, tab2.rt as tab2rt from tab1
inner join tab2 on tab1.mz < tab2.mzma and tab1.mz > tab2.mzmi
and tab1.rt < tab2.rtma and tab1.rt > tab2.rtmi")
resData <- dbFetch(res,n=-1)
dbClearResult(res)
## [1] TRUE
dim(resData)
## [1] 722 4
head(resData)
## mz rt tab2mz tab2rt
## 1 1047.6260 2.460138 1047.8346 2.786813
## 2 878.4476 3.214221 878.4946 1.606113
## 3 1019.8799 4.190572 1020.3323 4.836084
## 4 999.7834 5.032405 999.9929 3.795568
## 5 829.2131 5.417524 828.9304 5.978470
## 6 1055.5779 8.062583 1055.9336 8.959492
plot(resData$rt, resData$mz, xlab="RT", ylab="mz",pch="x" )
points(resData$tab2rt, resData$tab2mz,col=2 )
plot((resData$rt + resData$tab2rt)/2, resData$mz - resData$tab2mz, xlab="rt")
plot(resData$rt - resData$tab2rt, (resData$mz + resData$tab2mz)/2, ylab="mz")
dbDisconnect(my_mqdb$con)
## [1] TRUE
AFAIK you can’t do it as easily with any of the R join methods as base::merge or dplyr::innter_join. But maybee sqldf does it?
suppressMessages(library(sqldf))
res2 <- sqldf("select tab1.mz, tab1.rt, tab2.mz as tab2mz, tab2.rt as tab2rt from tab1
inner join tab2 on tab1.mz < tab2.mzma and tab1.mz > tab2.mzmi
and tab1.rt < tab2.rtma and tab1.rt > tab2.rtmi")
## Loading required package: tcltk
dim(res2)
## [1] 722 4
head(res2)
## mz rt tab2mz tab2rt
## 1 1047.6260 2.460138 1047.8346 2.786813
## 2 878.4476 3.214221 878.4946 1.606113
## 3 1019.8799 4.190572 1020.3323 4.836084
## 4 999.7834 5.032405 999.9929 3.795568
## 5 829.2131 5.417524 828.9304 5.978470
## 6 1055.5779 8.062583 1055.9336 8.959492
plot(res2$rt, res2$mz, xlab="RT", ylab="mz",pch="x" )
points(res2$tab2rt, res2$tab2mz,col=2 )