First step is to load DESeq2 and the necessary packages
source("https://bioconductor.org/biocLite.R")
biocLite("DESeq2")
library(DESeq2)
OK great had to do a few things to get it to load: Do not compile from source (when it asks say n). I think this fixed problems.
Next step: I need to load the data. In this case I am loading the HTSeq count tables that I downloaded from trimming, mapping and counting on galaxy. First I have to load the sample table so that the object knows the conditions, then I can make the HTSeq count object.
sample_table_RNAi_ku217 = read.table("sample_table_RNAi_ku217.txt", header = TRUE, sep = "\t")
HTSeq_RNAi_ku217 <- DESeqDataSetFromHTSeqCount(sampleTable = sample_table_RNAi_ku217, directory = "HTseq_count_galaxy/", design = ~ Condition)
Next step: I wanter filter reads with low counts. I am going to filter out those reads that have less than 2 for total FPKM across all conditions. This is a proxy for saying that across biological replicates, there should be close to 1 FPKM in each condition. This is slightly more stringent than I did before, but I think it is good.
HTSeq_RNAi_ku217 <- HTSeq_RNAi_ku217[rowSums(counts(HTSeq_RNAi_ku217))>2]
Next for the intial analyses, I want to perform the read normalization and variance stabilization. Just so that I can take a look at the data. I am concerned right now it did not read in correctly
HTSeq_RNAi_ku217_vsd <- vst(HTSeq_RNAi_ku217, blind = FALSE)
HTSeq_RNAi_ku217_rlog <- rlog(HTSeq_RNAi_ku217, blind = FALSE)
head(assay(HTSeq_RNAi_ku217_vsd))
head(assay(HTSeq_RNAi_ku217_rlog))
OK I think it looks ok… itersting the values are so different depending on the assay type. Now I want to take a look at the PCA analysis.
plotPCA(HTSeq_RNAi_ku217_vsd, intgroup=c("Condition", "Replicate"))
plotPCA(HTSeq_RNAi_ku217_rlog, intgroup=c("Condition", "Replicate"))
OK few pieces here… there is still more varaince across the WTs, which is annoying, but PC2 clearly separates along the condition, so that is good. Also, looks like the file loaded correctly.
HTSeq_RNAi_ku217 <- DESeq(HTSeq_RNAi_ku217)
RNAi_N2_res <- results(HTSeq_RNAi_ku217, contrast = c("Condition", "nhr-25(RNAi)", "N2_L4440"))
ku217_N2_res <- results(HTSeq_RNAi_ku217, contrast = c("Condition", "nhr-25(ku217)", "N2_L4440"))
ku217_RNAi_res <- results(HTSeq_RNAi_ku217, contrast = c("Condition", "nhr-25(ku217)","nhr-25(RNAi)" ))
RNAi_N2_res
ku217_N2_res
ku217_RNAi_res
summary(RNAi_N2_res)
summary(ku217_N2_res)
summary(ku217_RNAi_res)
plotCounts(dds = HTSeq_RNAi_ku217, gene = "lin-14", intgroup = c("Condition"))
plotMA(RNAi_N2_res)
plotMA(ku217_N2_res)
plotMA(ku217_RNAi_res)
Ordering the results tables by the smallest pvalue, then subset for only those padj values that are less than .05, I could likely even go as low as .01, but I will start with .05.I am going to write the tables to files, then I can use these in a python analysis.
RNAi_N2_res_ordered <- RNAi_N2_res[order(RNAi_N2_res$pvalue),]
ku217_N2_res_ordered <- ku217_N2_res[order(ku217_N2_res$pvalue),]
ku217_RNAi_res_ordered <- ku217_RNAi_res[order(ku217_RNAi_res$pvalue),]
RNAi_N2_res_sig <- subset(RNAi_N2_res_ordered, padj < .05)
ku217_N2_res_sig <- subset(ku217_N2_res_ordered, padj < .05)
ku217_RNAi_res_sig <- subset(ku217_RNAi_res_ordered, padj < .05)
write.table(as.data.frame(RNAi_N2_res_sig), file="RNAi_N2_dea.txt", sep = "\t")
write.table(as.data.frame(ku217_N2_res_sig), file="ku217_N2_dea.txt", sep = "\t")
write.table(as.data.frame(ku217_RNAi_res_sig), file="ku217_RNAi_dea.txt", sep = "\t")
I wrote a python script that correlates the chip peaks with diff reg geens in the rna-seq against N2. I am going to read that file in here and then do some visualization.
peak_to_gene <- read.table('peak_to_drg.txt', header = TRUE, sep = '\t')
Try and plot all the distances as a histogram
hist(peak_to_gene$Peak.to.DRG.distance, breaks = 50, main = 'ChIP peak to diff reg gene', xlab = 'Peak to gene distance (basepairs)', xlim = c(-60000, 60000), col = '#2e6bf1')
Now I want to make a violin plot sep, by the types of peaks and then hopefully the types of chromatin. Question: do the distance change based on peak features?
peak_to_gene$motif.class <- as.factor(peak_to_gene$motif.class)
library(ggplot2)
violin <- ggplot(peak_to_gene, aes(x=peak_to_gene$motif.class, y=peak_to_gene$Peak.to.DRG.distance, fill = peak_to_gene$motif.class)) + geom_violin(trim = FALSE)
violin + stat_summary(fun.data=mean_sdl, mult = 1, geom="pointrange", width=.1)
peak_to_gene$chromatin.class <- as.factor(peak_to_gene$chromatin.class)
violin <- ggplot(peak_to_gene, aes(x=peak_to_gene$chromatin.class, y=peak_to_gene$Peak.to.DRG.distance, fill = peak_to_gene$chromatin.class)) + geom_violin(trim = FALSE)
violin + stat_summary(fun.data=mean_sdl, mult = 1, geom="pointrange", width=.1)
LS0tCnRpdGxlOiAiUk5BaSBhbmQga3UyMTcgREVzZXEyIGFuZCBjb3JyZWxhdGlvbiB0byBDaElQIHBlYWtzIgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazogZGVmYXVsdAogIHBkZl9kb2N1bWVudDogZGVmYXVsdAogIGh0bWxfZG9jdW1lbnQ6IGRlZmF1bHQKLS0tCgpGaXJzdCBzdGVwIGlzIHRvIGxvYWQgREVTZXEyIGFuZCB0aGUgbmVjZXNzYXJ5IHBhY2thZ2VzCgpgYGB7cn0Kc291cmNlKCJodHRwczovL2Jpb2NvbmR1Y3Rvci5vcmcvYmlvY0xpdGUuUiIpCmJpb2NMaXRlKCJERVNlcTIiKQpsaWJyYXJ5KERFU2VxMikKYGBgCk9LIGdyZWF0IGhhZCB0byBkbyBhIGZldyB0aGluZ3MgdG8gZ2V0IGl0IHRvIGxvYWQ6IERvIG5vdCBjb21waWxlIGZyb20gc291cmNlICh3aGVuIGl0IGFza3Mgc2F5IG4pLiBJIHRoaW5rIHRoaXMgZml4ZWQgcHJvYmxlbXMuCgpOZXh0IHN0ZXA6IEkgbmVlZCB0byBsb2FkIHRoZSBkYXRhLiBJbiB0aGlzIGNhc2UgSSBhbSBsb2FkaW5nIHRoZSBIVFNlcSBjb3VudCB0YWJsZXMgdGhhdCBJIGRvd25sb2FkZWQgZnJvbSB0cmltbWluZywgbWFwcGluZyBhbmQgY291bnRpbmcgb24gZ2FsYXh5LiBGaXJzdCBJIGhhdmUgdG8gbG9hZCB0aGUgc2FtcGxlIHRhYmxlIHNvIHRoYXQgdGhlIG9iamVjdCBrbm93cyB0aGUgY29uZGl0aW9ucywgdGhlbiBJIGNhbiBtYWtlIHRoZSBIVFNlcSBjb3VudCBvYmplY3QuIApgYGB7cn0Kc2FtcGxlX3RhYmxlX1JOQWlfa3UyMTcgPSByZWFkLnRhYmxlKCJzYW1wbGVfdGFibGVfUk5BaV9rdTIxNy50eHQiLCBoZWFkZXIgPSBUUlVFLCBzZXAgPSAiXHQiKQpIVFNlcV9STkFpX2t1MjE3IDwtIERFU2VxRGF0YVNldEZyb21IVFNlcUNvdW50KHNhbXBsZVRhYmxlID0gc2FtcGxlX3RhYmxlX1JOQWlfa3UyMTcsIGRpcmVjdG9yeSA9ICJIVHNlcV9jb3VudF9nYWxheHkvIiwgZGVzaWduID0gfiBDb25kaXRpb24pCmBgYAoKTmV4dCBzdGVwOiBJIHdhbnRlciBmaWx0ZXIgcmVhZHMgd2l0aCBsb3cgY291bnRzLiBJIGFtIGdvaW5nIHRvIGZpbHRlciBvdXQgdGhvc2UgcmVhZHMgdGhhdCBoYXZlIGxlc3MgdGhhbiAyIGZvciB0b3RhbCBGUEtNIGFjcm9zcyBhbGwgY29uZGl0aW9ucy4gVGhpcyBpcyBhIHByb3h5IGZvciBzYXlpbmcgdGhhdCBhY3Jvc3MgYmlvbG9naWNhbCByZXBsaWNhdGVzLCB0aGVyZSBzaG91bGQgYmUgY2xvc2UgdG8gMSBGUEtNIGluIGVhY2ggY29uZGl0aW9uLiBUaGlzIGlzIHNsaWdodGx5IG1vcmUgc3RyaW5nZW50IHRoYW4gSSBkaWQgYmVmb3JlLCBidXQgSSB0aGluayBpdCBpcyBnb29kLgpgYGB7cn0KSFRTZXFfUk5BaV9rdTIxNyA8LSBIVFNlcV9STkFpX2t1MjE3W3Jvd1N1bXMoY291bnRzKEhUU2VxX1JOQWlfa3UyMTcpKT4yXQpgYGAKCk5leHQgZm9yIHRoZSBpbnRpYWwgYW5hbHlzZXMsIEkgd2FudCB0byBwZXJmb3JtIHRoZSByZWFkIG5vcm1hbGl6YXRpb24gYW5kIHZhcmlhbmNlIHN0YWJpbGl6YXRpb24uIEp1c3Qgc28gdGhhdCBJIGNhbiB0YWtlIGEgbG9vayBhdCB0aGUgZGF0YS4gSSBhbSBjb25jZXJuZWQgcmlnaHQgbm93IGl0IGRpZCBub3QgcmVhZCBpbiBjb3JyZWN0bHkKYGBge3J9CkhUU2VxX1JOQWlfa3UyMTdfdnNkIDwtIHZzdChIVFNlcV9STkFpX2t1MjE3LCBibGluZCA9IEZBTFNFKQpIVFNlcV9STkFpX2t1MjE3X3Jsb2cgPC0gcmxvZyhIVFNlcV9STkFpX2t1MjE3LCBibGluZCA9IEZBTFNFKQpoZWFkKGFzc2F5KEhUU2VxX1JOQWlfa3UyMTdfdnNkKSkKaGVhZChhc3NheShIVFNlcV9STkFpX2t1MjE3X3Jsb2cpKQpgYGAKCk9LIEkgdGhpbmsgaXQgbG9va3Mgb2suLi4gaXRlcnN0aW5nIHRoZSB2YWx1ZXMgYXJlIHNvIGRpZmZlcmVudCBkZXBlbmRpbmcgb24gdGhlIGFzc2F5IHR5cGUuIE5vdyBJIHdhbnQgdG8gdGFrZSBhIGxvb2sgYXQgdGhlIFBDQSBhbmFseXNpcy4KYGBge3J9CnBsb3RQQ0EoSFRTZXFfUk5BaV9rdTIxN192c2QsIGludGdyb3VwPWMoIkNvbmRpdGlvbiIsICJSZXBsaWNhdGUiKSkKcGxvdFBDQShIVFNlcV9STkFpX2t1MjE3X3Jsb2csIGludGdyb3VwPWMoIkNvbmRpdGlvbiIsICJSZXBsaWNhdGUiKSkKCmBgYApPSyBmZXcgcGllY2VzIGhlcmUuLi4gdGhlcmUgaXMgc3RpbGwgbW9yZSB2YXJhaW5jZSBhY3Jvc3MgdGhlIFdUcywgd2hpY2ggaXMgYW5ub3lpbmcsIGJ1dCBQQzIgY2xlYXJseSBzZXBhcmF0ZXMgYWxvbmcgdGhlIGNvbmRpdGlvbiwgc28gdGhhdCBpcyBnb29kLiBBbHNvLCBsb29rcyBsaWtlIHRoZSBmaWxlIGxvYWRlZCBjb3JyZWN0bHkuIApgYGB7cn0KSFRTZXFfUk5BaV9rdTIxNyA8LSBERVNlcShIVFNlcV9STkFpX2t1MjE3KQpSTkFpX04yX3JlcyA8LSByZXN1bHRzKEhUU2VxX1JOQWlfa3UyMTcsIGNvbnRyYXN0ID0gYygiQ29uZGl0aW9uIiwgIm5oci0yNShSTkFpKSIsICJOMl9MNDQ0MCIpKQprdTIxN19OMl9yZXMgPC0gcmVzdWx0cyhIVFNlcV9STkFpX2t1MjE3LCBjb250cmFzdCA9IGMoIkNvbmRpdGlvbiIsICJuaHItMjUoa3UyMTcpIiwgIk4yX0w0NDQwIikpCmt1MjE3X1JOQWlfcmVzIDwtIHJlc3VsdHMoSFRTZXFfUk5BaV9rdTIxNywgY29udHJhc3QgPSBjKCJDb25kaXRpb24iLCAibmhyLTI1KGt1MjE3KSIsIm5oci0yNShSTkFpKSIgKSkKUk5BaV9OMl9yZXMKa3UyMTdfTjJfcmVzCmt1MjE3X1JOQWlfcmVzCmBgYApgYGB7cn0Kc3VtbWFyeShSTkFpX04yX3JlcykKc3VtbWFyeShrdTIxN19OMl9yZXMpCnN1bW1hcnkoa3UyMTdfUk5BaV9yZXMpCmBgYApgYGB7cn0KcGxvdENvdW50cyhkZHMgPSBIVFNlcV9STkFpX2t1MjE3LCBnZW5lID0gImxpbi0xNCIsIGludGdyb3VwID0gYygiQ29uZGl0aW9uIikpCmBgYApgYGB7cn0KcGxvdE1BKFJOQWlfTjJfcmVzKQpwbG90TUEoa3UyMTdfTjJfcmVzKQpwbG90TUEoa3UyMTdfUk5BaV9yZXMpCmBgYAoKT3JkZXJpbmcgdGhlIHJlc3VsdHMgdGFibGVzIGJ5IHRoZSBzbWFsbGVzdCBwdmFsdWUsIHRoZW4gc3Vic2V0IGZvciBvbmx5IHRob3NlIHBhZGogdmFsdWVzIHRoYXQgYXJlIGxlc3MgdGhhbiAuMDUsIEkgY291bGQgbGlrZWx5IGV2ZW4gZ28gYXMgbG93IGFzIC4wMSwgYnV0IEkgd2lsbCBzdGFydCB3aXRoIC4wNS5JIGFtIGdvaW5nIHRvIHdyaXRlIHRoZSB0YWJsZXMgdG8gZmlsZXMsIHRoZW4gSSBjYW4gdXNlIHRoZXNlIGluIGEgcHl0aG9uIGFuYWx5c2lzLgoKYGBge3J9ClJOQWlfTjJfcmVzX29yZGVyZWQgPC0gUk5BaV9OMl9yZXNbb3JkZXIoUk5BaV9OMl9yZXMkcHZhbHVlKSxdCmt1MjE3X04yX3Jlc19vcmRlcmVkIDwtIGt1MjE3X04yX3Jlc1tvcmRlcihrdTIxN19OMl9yZXMkcHZhbHVlKSxdCmt1MjE3X1JOQWlfcmVzX29yZGVyZWQgPC0ga3UyMTdfUk5BaV9yZXNbb3JkZXIoa3UyMTdfUk5BaV9yZXMkcHZhbHVlKSxdClJOQWlfTjJfcmVzX3NpZyA8LSBzdWJzZXQoUk5BaV9OMl9yZXNfb3JkZXJlZCwgcGFkaiA8IC4wNSkKa3UyMTdfTjJfcmVzX3NpZyA8LSBzdWJzZXQoa3UyMTdfTjJfcmVzX29yZGVyZWQsIHBhZGogPCAuMDUpCmt1MjE3X1JOQWlfcmVzX3NpZyA8LSBzdWJzZXQoa3UyMTdfUk5BaV9yZXNfb3JkZXJlZCwgcGFkaiA8IC4wNSkKd3JpdGUudGFibGUoYXMuZGF0YS5mcmFtZShSTkFpX04yX3Jlc19zaWcpLCBmaWxlPSJSTkFpX04yX2RlYS50eHQiLCBzZXAgPSAiXHQiKQp3cml0ZS50YWJsZShhcy5kYXRhLmZyYW1lKGt1MjE3X04yX3Jlc19zaWcpLCBmaWxlPSJrdTIxN19OMl9kZWEudHh0Iiwgc2VwID0gIlx0IikKd3JpdGUudGFibGUoYXMuZGF0YS5mcmFtZShrdTIxN19STkFpX3Jlc19zaWcpLCBmaWxlPSJrdTIxN19STkFpX2RlYS50eHQiLCBzZXAgPSAiXHQiKQpgYGAKCkkgd3JvdGUgYSBweXRob24gc2NyaXB0IHRoYXQgY29ycmVsYXRlcyB0aGUgY2hpcCBwZWFrcyB3aXRoIGRpZmYgcmVnIGdlZW5zIGluIHRoZSBybmEtc2VxIGFnYWluc3QgTjIuIEkgYW0gZ29pbmcgdG8gcmVhZCB0aGF0IGZpbGUgaW4gaGVyZSBhbmQgdGhlbiBkbyBzb21lIHZpc3VhbGl6YXRpb24uCmBgYHtyfQpwZWFrX3RvX2dlbmUgPC0gcmVhZC50YWJsZSgncGVha190b19kcmcudHh0JywgaGVhZGVyID0gVFJVRSwgc2VwID0gJ1x0JykKYGBgCgpUcnkgYW5kIHBsb3QgYWxsIHRoZSBkaXN0YW5jZXMgYXMgYSBoaXN0b2dyYW0KCmBgYHtyfQpoaXN0KHBlYWtfdG9fZ2VuZSRQZWFrLnRvLkRSRy5kaXN0YW5jZSwgYnJlYWtzID0gNTAsIG1haW4gPSAnQ2hJUCBwZWFrIHRvIGRpZmYgcmVnIGdlbmUnLCB4bGFiID0gJ1BlYWsgdG8gZ2VuZSBkaXN0YW5jZSAoYmFzZXBhaXJzKScsIHhsaW0gPSBjKC02MDAwMCwgNjAwMDApLCBjb2wgPSAnIzJlNmJmMScpCmBgYApOb3cgSSB3YW50IHRvIG1ha2UgYSB2aW9saW4gcGxvdCBzZXAsIGJ5IHRoZSB0eXBlcyBvZiBwZWFrcyBhbmQgdGhlbiBob3BlZnVsbHkgdGhlIHR5cGVzIG9mIGNocm9tYXRpbi4gUXVlc3Rpb246IGRvIHRoZSBkaXN0YW5jZSBjaGFuZ2UgYmFzZWQgb24gcGVhayBmZWF0dXJlcz8KCmBgYHtyfQpwZWFrX3RvX2dlbmUkbW90aWYuY2xhc3MgPC0gYXMuZmFjdG9yKHBlYWtfdG9fZ2VuZSRtb3RpZi5jbGFzcykKbGlicmFyeShnZ3Bsb3QyKQp2aW9saW4gPC0gZ2dwbG90KHBlYWtfdG9fZ2VuZSwgYWVzKHg9cGVha190b19nZW5lJG1vdGlmLmNsYXNzLCB5PXBlYWtfdG9fZ2VuZSRQZWFrLnRvLkRSRy5kaXN0YW5jZSwgZmlsbCA9IHBlYWtfdG9fZ2VuZSRtb3RpZi5jbGFzcykpICsgIGdlb21fdmlvbGluKHRyaW0gPSBGQUxTRSkgCnZpb2xpbiArIHN0YXRfc3VtbWFyeShmdW4uZGF0YT1tZWFuX3NkbCwgbXVsdCA9IDEsIGdlb209InBvaW50cmFuZ2UiLCB3aWR0aD0uMSkKYGBgCmBgYHtyfQpwZWFrX3RvX2dlbmUkY2hyb21hdGluLmNsYXNzIDwtIGFzLmZhY3RvcihwZWFrX3RvX2dlbmUkY2hyb21hdGluLmNsYXNzKQp2aW9saW4gPC0gZ2dwbG90KHBlYWtfdG9fZ2VuZSwgYWVzKHg9cGVha190b19nZW5lJGNocm9tYXRpbi5jbGFzcywgeT1wZWFrX3RvX2dlbmUkUGVhay50by5EUkcuZGlzdGFuY2UsIGZpbGwgPSBwZWFrX3RvX2dlbmUkY2hyb21hdGluLmNsYXNzKSkgKyAgZ2VvbV92aW9saW4odHJpbSA9IEZBTFNFKSAKdmlvbGluICsgc3RhdF9zdW1tYXJ5KGZ1bi5kYXRhPW1lYW5fc2RsLCBtdWx0ID0gMSwgZ2VvbT0icG9pbnRyYW5nZSIsIHdpZHRoPS4xKQpgYGAKCg==