This is an R script that can read in rpm coverage data files and make an average gene plot that shows the coverage 1500 bp flanking both ends of the gene, as well as the average normalized gene (coverage across gene, broken into 500 bins). This then takes any list of genes and saves an average gene plot (png) for each list. The final output should look like the following image:
library(GenomicFeatures)
library(GenomicRanges)
library(tidyr)
library(dplyr)
library(ggplot2)
## Load in Files
rpm.files <- list.files(path="R", pattern="IP.*rpm.RData$", full.names=T)
for (i in 1:length(rpm.files)) {
load(rpm.files[i])
}
rpm.names <- ls(pattern="rpm$")
rpm.names
dre.NS.list <- sapply(grep("dre4_NS", rpm.names, value=T), get)
dre.dre.list <- sapply(grep("dre4_dre", rpm.names, value=T), get)
ssrp.NS.list <- sapply(grep("SSRP_NS", rpm.names, value=T), get)
ssrp.ssrp.list <- sapply(grep("SSRP_SSRP", rpm.names, value=T), get)
igg.list <- sapply(grep("IgG", rpm.names, value=T), get)
collapse.cov <- function(cov, genes.gr) {
cov.new <- Reduce('+', cov)
cov.new <- cov.new/length(cov)
names(cov.new) <- gsub("chr", "", names(cov.new))
keep <- match(seqlevels(genes.gr), names(cov.new))
cov.new <- cov.new[keep]
return(cov.new)
}
dre.NS.cov <- collapse.cov(cov=dre.NS.list, genes.gr = ssrp)
dre.dre.cov <- collapse.cov(cov=dre.dre.list, genes.gr = ssrp)
ssrp.NS.cov <- collapse.cov(cov=ssrp.NS.list, genes.gr = ssrp)
ssrp.ssrp.cov <- collapse.cov(cov=ssrp.ssrp.list, genes.gr = ssrp)
igg.cov <- collapse.cov(cov=igg.list, genes.gr =ssrp)
####Average Gene
get.cov.from.gr <- function(gr, gene, covs, bins) {
ch <- as.character(unique(seqnames(gr[gene])))
view <- Views(covs[ch][[1]], ranges(gr[gene]))
cov <- as.numeric(view[1][[1]])
if (as.logical(strand(gr[gene]) == "-")) {
cov <- rev(cov)
}
b <- length(cov)
st <- trunc(seq(1,b,by=(b/bins)) + .001)
end <- trunc(seq(trunc(b/bins), b, by=(b/bins)) + .001)
bin.mtx <- mapply(function(x,y) {cov[x:y]}, st, end)
cov.mtx <- colMeans(bin.mtx)
cov.mtx <- t(matrix(cov.mtx))
row.names(cov.mtx) <- names(gr[gene])
cov.mtx
}
get.cov.mtx <- function(gr.var, covs.var, n.cores, bins.var) {
cov.list <- mclapply(1:length(gr.var),
FUN=get.cov.from.gr,
gr = gr.var,
covs = covs.var,
bins = bins.var,
mc.cores = n.cores)
cov.mtx <- do.call(rbind, cov.list)
cov.mtx
}
colstd <- function(x) {
sapply(1:ncol(x), function(y) { sd(x[,y])})
}
ggplot_avg.gene.4 <- function(w, x, y, z, title) {
df <- data.frame(cbind(colMeans(w), colMeans(x), colMeans(y), colMeans(z)))
colnames(df) <- c("dre4.NS", "dre4.dre4", "ssrp.NS", "ssrp.ssrp")
df.gg <- df %>% gather(Expression, RPM, dre4.NS:ssrp.ssrp)
l <- ncol(x)
df.gg$Index <- rep(1:l, 4)
df.gg$se <- c(colstd(w)/sqrt(nrow(w)), colstd(x)/sqrt(nrow(x)), colstd(y)/sqrt(nrow(y)), colstd(z)/sqrt(nrow(z)))
ggplot(df.gg, aes(x=Index, y=RPM, Group=factor(Expression))) +
# geom_ribbon(aes(ymin=RPM-se, ymax=RPM+se), alpha=0.2) +
geom_line(aes(colour=factor(Expression)), size=1) +
#scale_colour_manual(labels = c("NS", "dre4", "ssrp"),
# values = c("#C77CFF","#F8766D", "#7CAE00", "#00BFC4")) +
scale_x_continuous(breaks = c(0, 150, 650, 800),
labels = c("-1500", "TSS", "pA", "+1500")) +
geom_vline(xintercept=150,
colour="red",
linetype="longdash") +
geom_vline(xintercept=650,
colour="red",
linetype="longdash") +
labs(colour = "Expiriment (ChIP.Knockdown)") +
xlab("Position") +
ggtitle(paste0("Average Gene Plot of ", title)) +
theme_bw()
}
ggplot_avg.gene <- function(w, x, y) {
df <- data.frame(cbind(colMeans(w), colMeans(x), colMeans(y)))
colnames(df) <- c("dre4", "NS", "ssrp")
df.gg <- df %>% gather(Expression, RPM, dre4:ssrp)
l <- ncol(x)
df.gg$Index <- rep(1:l, 3)
df.gg$se <- c(colstd(w)/sqrt(nrow(w)), colstd(x)/sqrt(nrow(x)), colstd(y)/sqrt(nrow(y)))
ggplot(df.gg, aes(x=Index, y=RPM, Group=factor(Expression))) +
# geom_ribbon(aes(ymin=RPM-se, ymax=RPM+se), alpha=0.2) +
geom_line(aes(colour=factor(Expression)), size=1) +
#scale_colour_manual(labels = c("NS", "dre4", "ssrp"),
# values = c("#C77CFF","#F8766D", "#7CAE00", "#00BFC4")) +
scale_x_continuous(breaks = c(0, 150, 650, 800),
labels = c("-1500", "TSS", "TES", "+1500")) +
geom_vline(xintercept=150,
colour="red",
linetype="longdash") +
geom_vline(xintercept=650,
colour="red",
linetype="longdash") +
labs(colour = "Expression") +
xlab("Position") +
ggtitle("Average Gene Plot") +
theme_bw()
}
###Load in granges
genes.all <- readRDS(file="/n/core/Genomics/Analysis/Conaway/theo_tettey/tht1/goi.RDS")
txdb <- loadDb("/home/adc/txdb/Dmel.txdb")
tx <- transcriptsBy(txdb, 'gene')
### Run Function
getplots <- function(geneset) {
unlist.tx <- unlist(tx)
gen <- unlist.tx[geneset[[1]]]
genes.gr <- gen[width(gen) > 1000]
genes.gr <- keepSeqlevels(genes.gr, names(dre.NS.cov))
chr.len <- seqlengths(seqinfo(genes.gr))
is.close.end <- end(genes.gr) + 1500 > chr.len[as.character(seqnames(genes.gr))]
genes.gr <- genes.gr[start(genes.gr) > 1500 & !(is.close.end)]
label <- paste0("img/average.gene/", names(geneset), "_cov.averagegene.png")
## Average Gene
dre.NS.mtx.gen <- get.cov.mtx(genes.gr, dre.NS.cov, 22, 500)
dre.dre.mtx.gen <- get.cov.mtx(genes.gr, dre.dre.cov, 22, 500)
ssrp.NS.mtx.gen <- get.cov.mtx(genes.gr, ssrp.NS.cov, 22, 500)
ssrp.ssrp.mtx.gen <- get.cov.mtx(genes.gr, ssrp.ssrp.cov, 22, 500)
##1500 upstream of Gene
pre <- flank(genes.gr, 1500, start=TRUE)
dre.NS.mtx.pre <- get.cov.mtx(pre, dre.NS.cov, 22, 150)
dre.dre.mtx.pre <- get.cov.mtx(pre, dre.dre.cov, 22, 150)
ssrp.NS.mtx.pre <- get.cov.mtx(pre, ssrp.NS.cov, 22, 150)
ssrp.ssrp.mtx.pre <- get.cov.mtx(pre, ssrp.ssrp.cov, 22, 150)
##1500 downstream of gene
post <- flank(genes.gr, 1500, start=FALSE)
dre.NS.mtx.post <- get.cov.mtx(post, dre.NS.cov, 22, 150)
dre.dre.mtx.post <- get.cov.mtx(post, dre.dre.cov, 22, 150)
ssrp.NS.mtx.post <- get.cov.mtx(post, ssrp.NS.cov, 22, 150)
ssrp.ssrp.mtx.post <- get.cov.mtx(post, ssrp.ssrp.cov, 22, 150)
##Combine them together
dre.NS.avg.gene <- cbind(dre.NS.mtx.pre, dre.NS.mtx.gen, dre.NS.mtx.post)
dre.dre.avg.gene <- cbind(dre.dre.mtx.pre, dre.dre.mtx.gen, dre.dre.mtx.post)
ssrp.ssrp.avg.gene <- cbind(ssrp.ssrp.mtx.pre, ssrp.ssrp.mtx.gen, ssrp.ssrp.mtx.post)
ssrp.NS.avg.gene <- cbind(ssrp.NS.mtx.pre, ssrp.NS.mtx.gen, ssrp.NS.mtx.post)
png(file=label, width=1080, height=720)
p <- ggplot_avg.gene.4(dre.NS.avg.gene, dre.dre.avg.gene, ssrp.NS.avg.gene, ssrp.ssrp.avg.gene, title = paste0(names(geneset), " (", length(geneset[[1]]), " Genes)"))
print(p)
dev.off()
}
for (i in 1:length(genes.all)) {
getplots(genes.all[i])
}