R Markdown

Simple script to read in tabular report from MaxQuant on Galaxy and find top 10 non-contaminat proteins.

library(dplyr, warn.conflicts = FALSE)
#Step 1: download .tabular protein groups file after running MaxQuant on Galaxy

#Step 2: upload file

#Step 3: read in file from Galaxy
myDF <- read.table('Galaxy3-[MaxQuant_Protein_Groups_for_data_1_and_data_2].tabular', 
                   sep="\t",
                   header=TRUE)

#Step 4: remove rows with contaminants
# remove rows with decoy/reverse sequences
#only keep FASTA header and peptide count columns
myDF2 <- myDF[!grepl("CON",myDF$Protein.IDs),]
myDF3 <- myDF2[!grepl("REV",myDF2$Protein.IDs), c(6,8)]

#Step 5: Use dplyr to make list of top ten proteins
topProteins <- myDF3 %>% top_n(10)
## Selecting by Peptides
#Note in this example, I get 11 proteins
#because the last two both have 37 peptides
#and dplyr won't break a tie

#Step 6: write to file
write.csv(topProteins,'topProteins.csv', row.names=FALSE)