motifStack

A tool for graphic representation of multiple motifs

author: Jianhong Ou, Julie Zhu

date: July 30, 2014

From single motif to multiple motifs

  • single motif: package seqlogo, or webLogo

  • Multiple motifs: motifStack

motifStack workflow

Demo: draw single motif

library(motifStack)
pcm <- readPCM(file.path(find.package("motifStack"), 
                            "extdata"), pattern="bin_SOLEXA.pcm")
plot(pcm[[1]], font="mono,Courier")

plot of chunk unnamed-chunk-1

Demo: draw Amino-Acid sequence logo

protein<-read.table(file.path(
    find.package("motifStack"), 
    "extdata", "cap.txt"))
protein<-t(protein[,1:20])
motif<-pcm2pfm(protein)
motif<-new("pfm", mat=motif, name="CAP", 
            color=colorset(alphabet="AA", 
                           colorScheme="chemistry"))
plot(motif)

plot of chunk unnamed-chunk-2

Demo: read multiple position count matrixes

pcms<-readPCM(file.path(
        find.package("motifStack"), 
        "extdata"),
    "pcm$")
pfms<-lapply(pcms, pcm2pfm)

Demo: draw motif stacks

motifStack(pfms, layout="tree", trueDist=TRUE)

plot of chunk unnamed-chunk-4

motifStack(pfms, layout="tree", trueDist=TRUE, ic.scale=FALSE)

plot of chunk unnamed-chunk-5

Demo: draw motifs as circular dendrogram

library("MotifDb")
matrix.fly <- query(MotifDb, 
                    "Dmelanogaster")
motifs2 <- as.list(matrix.fly)
## use data from FlyFactorSurvey
motifs2 <- motifs2[grepl("Dmelanogaster\\-FlyFactorSurvey\\-",
     names(motifs2))]
## format the names
names(motifs2) <- gsub("Dmelanogaster_FlyFactorSurvey_", "",
     gsub("_FBgn\\d+$", "",
         gsub("[^a-zA-Z0-9]","_",
          gsub("(_\\d+)+$", "", 
               names(motifs2)))))
names(motifs2) <- gsub("_\\d+$", "", 
                       names(motifs2))

Calculate motif distance

motifs2 <- motifs2[unique(names(motifs2))]
pfms <- sample(motifs2, 50)
## creat a list of object of pfm 
motifs2 <- lapply(names(pfms), 
                  function(.ele, pfms){new("pfm",mat=pfms[[.ele]], name=.ele)}
                  ,pfms)
## trim the motifs
motifs2 <- lapply(motifs2, trimMotif, t=0.4)
## use MotIV to calculate the distances of motifs
jaspar.scores <- MotIV::readDBScores(
    file.path(find.package("MotIV"), 
    "extdata", 
    "jaspar2010_PCC_SWU.scores"))
d <- MotIV::motifDistances(pfms)
hc <- MotIV::motifHclust(d)

resort the motifs by the order of hclust

## convert the hclust to phylog object
phylog <- hclust2phylog(hc)
## reorder the pfms by the order of hclust
leaves <- names(phylog$leaves)
pfms <- pfms[leaves]
## create a list of pfm objects
pfms <- lapply(names(pfms), function(.ele, pfms){
                                new("pfm",mat=pfms[[.ele]], name=.ele)}
               ,pfms)

Merge close motifs to motif signatures

## extract the motif signatures
motifSig <- motifSignature(pfms, phylog, groupDistance=0.01, min.freq=1)
## assign groups for motifs
methods <- gsub("^.*?_(SANGER|FlyReg|SOLEXA|Cell|NAR)$", "\\1", leaves)
names(methods) <- leaves
## assign group colors
methods.col <- 1:length(methods)
names(methods.col)<-unique(methods)
## get the signatures from object of motifSignature
sig <- signatures(motifSig)
## set the inner-circle color for each signature
gpCol <- sigColor(motifSig)

Plot the motifs as circular dendrogram

plotMotifStackWithRadialPhylog(phylog=phylog, pfms=sig, 
    circle=0.6, cleaves = 0.5, 
    clabel.leaves = 0.7, 
    col.bg=methods.col, col.bg.alpha=0.3, 
    col.leaves=rep(1:6, 10)[1:length(gpCol)],
    col.inner.label.circle=gpCol, 
    inner.label.circle.width=0.03,
    angle=350, circle.motif=1.2, 
    motifScale="logarithmic")

plot of chunk unnamed-chunk-11

motif cloud

motifCloud(motifSig, scale=c(6, .5), 
           layout="rectangles", 
           draw.legend=T)

plot of chunk unnamed-chunk-12

motifCloud(motifSig, layout = "cloud", ic.scale = FALSE)

plot of chunk unnamed-chunk-13