Maxquant produces quite convoluted output in tab delimited files. To get a first idea obout those files I would like to read them into a sql database and explore them using sqlitestudio.
To populate the database I am going to use R and the package dplyr.
suppressMessages(library(dplyr))
path <- "output/20150930/combined/txt/"
files <- dir(path)
filesf <- dir(path, full.names = T)
files <- data_frame(files,filesf)
info <- file.info(files$filesf)
stopifnot(files$filesf == rownames(info))
files <- tbl_df(data.frame(files, info))
glimpse(files)
## Observations: 20
## Variables: 9
## $ files (chr) "aifMsms.txt", "allPeptides.txt", "Arg10_StandardSites....
## $ filesf (chr) "output/20150930/combined/txt/aifMsms.txt", "output/201...
## $ size (dbl) 0, 107412695, 129699, 3179471, 0, 186883, 0, 251804, 0,...
## $ isdir (lgl) FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,...
## $ mode (octm) 666, 666, 666, 666, 666, 666, 666, 666, 666, 666, 666,...
## $ mtime (time) 2015-09-30 16:52:38, 2015-09-30 16:56:51, 2015-09-30 1...
## $ ctime (time) 2015-09-30 11:51:04, 2015-09-30 11:52:29, 2015-09-30 1...
## $ atime (time) 2015-09-30 11:51:04, 2015-09-30 16:54:06, 2015-09-30 1...
## $ exe (chr) "no", "no", "no", "no", "no", "no", "no", "no", "no", "...
Select text files with size greater than 0.
files <- filter(files, size > 0)
files <- files[ grep("\\.txt",files$files),]
mqdata <-list()
for(file in files$filesf){
mqdata[[file]] <- read.csv(file,sep="\t",stringsAsFactors = FALSE)
}
stopifnot(names(mqdata) == files$filesf)
Before doing it you need to fix the database files which we derive from the file names. As a collegue of mine says if you see a file name such as “Oxidation (M)Sites.txt” you know where they came from. It is a lonely place.
names <- files$files
names <- gsub("\\.txt", "",files$files)
names <- gsub("[[:space:]]", "", names)
names <- gsub("[[:punct:]]", "", names)
names(mqdata) <- names
library(DBI)
if(file.exists(file.path(path,"my_mqdb.sqlite3"))){
file.remove(file.path(path,"my_mqdb.sqlite3"))
}
my_mqdb <- src_sqlite(file.path(path,"my_mqdb.sqlite3"), create=TRUE)
for( i in 1:length(mqdata)){
tmp<-copy_to(my_mqdb, mqdata[[i]], name=names(mqdata)[i], temporary = FALSE)
}
dbDisconnect(my_mqdb$con)
Now you are ready to explore the data with SQLiteStudio.
For more interesting information about Data Science in Mass Spectrometry visit http://aggrivet.blogspot.ch/.