Splicing In DSCAM

Results

# Required libraries
library(ggplot2)
library(ggforce)
library(gridExtra)
# Load in the data from previous analysis.
dscamSJs <- read.csv(file="dscam_sjs.csv")
# Create a useful subset and rename columns to be easier to work with.
graf <- dscamSJs[,c(3,4,8,11,12)]
id <- 1:63
graf <- cbind(graf, id)
colnames(graf) <- c("start", "end", "reads","Experiment", "Novel", "ID")
sumofreads <- sum(graf$reads)
norm_reads <- graf$reads/sumofreads
graf2 <- cbind(graf, norm_reads)
# Read depth graph
ggplot(graf, aes(x=ID, y=log(reads)+0.0001)) +
  geom_point(aes(colour = Experiment, shape = Novel), size = 3)+
    scale_shape_manual(values = c(16, 17)) +
  ggtitle("Read Depth of All Splice Junctions")+
  theme(plot.title = element_text(hjust = 0.5))

# Read depth graph with zoom on novel junctions
ggplot(graf, aes(x=ID, y=log(reads)+0.0001)) +
  geom_point(aes(colour = Experiment, shape = Novel), size = 3) +
    scale_shape_manual(values = c(16, 17)) +
  ggtitle("Read Depth of All Splice Junctions") +
  facet_zoom(x = id <16) +
  theme(plot.title = element_text(hjust = 0.5))

Or:

indb <- graf[c(16:63), ]
nov <- graf[c(1:15), ]
g1 <- ggplot(indb, aes(x=ID, y=log(reads)+0.0001)) +
  geom_point(aes(colour = Experiment), size = 3)+
  ggtitle("Read Depth of Known Splice Junctions(log scale)")+
  theme(plot.title = element_text(hjust = 0.5))
g2 <- ggplot(nov, aes(x=ID, y=reads)) +
  geom_point(aes(colour = Experiment), size = 3)+
  ggtitle("Read Depth of Novel Splice Junctions")+
  theme(plot.title = element_text(hjust = 0.5))
grid.arrange(g1, g2, ncol=1)

With lines instead:

g1 <- ggplot(indb, aes(x=ID, y=log(reads)+0.0001)) +
  geom_line(aes(colour = Experiment))+
  ggtitle("Read Depth of Known Splice Junctions(log scale)")+
  theme(plot.title = element_text(hjust = 0.5))
g2 <- ggplot(nov, aes(x=ID, y=reads)) +
  geom_line(aes(colour = Experiment))+
  ggtitle("Read Depth of Novel Splice Junctions")+
  theme(plot.title = element_text(hjust = 0.5))
grid.arrange(g1, g2, ncol=1)

# Some data wrangling for the graph
int_mot <- dscamSJs[,c(6,12)]
colnames(int_mot) <- c("Motif", "Novel")
# Key: Intron Motif (0:  non-canonical;  1:  GT/AG,  2:  CT/AC,  3:  GC/AG,  4:  CT/GC,  5:AT/AC, 6:  GT/AT)
int_mot$Motif[int_mot$Motif == 2] <- "GT/AG"
int_mot$Motif[int_mot$Motif == 4] <- "CT/GC"
int_mot$Motif[int_mot$Motif == 6] <- "GT/AT"
# Intron motif count
ggplot(int_mot, aes(x=Motif, color=Novel, fill=Novel)) + 
  geom_bar(position="dodge") +
  ggtitle("Occurence of Intron Motif") +
  theme(plot.title = element_text(hjust = 0.5))

Here you can see that only novel splice junctions from our data have intron motifs that are not GT/AG


Preceeding pipeline: Finding novel splice junctions in RNA-seq data from D. melanogaster DSCAM gene