library("edgeR")
## Loading required package: limma
library(ggplot2)
files <- c("265_1A.count", "265_2B.count", "265_3C.count", "265_1A_2.count",
"265_2B_2.count", "265_4A.count", "265_1B.count", "265_3A.count", "265_4B.count",
"265_2A.count", "265_3B.count", "265_4C.count", "400_1A.count", "400_2B.count",
"400_3C.count", "400_1A_2.count", "400_2B_2.count", "400_4A.count", "400_1B.count",
"400_3A.count", "400_4C.count", "400_2A.count", "400_3B.count")
Counts files are from HTseq analysis of STAR alignment to genome
setwd("/Users/jri/projects/dolores/teo_plastic/results/")
counts = readDGE(files)$counts
metadat <- data.frame(files)
metadat$shortname = colnames(counts)
metadat$co2 = c(rep(265, 12), rep(400, 11))
noint = rownames(counts) %in% c("__no_feature", "__ambiguous", "__too_low_aQual",
"__not_aligned", "__alignment_not_unique")
cpms <- cpm(counts)
Keep only genes with >= 3 hits and calculate normalization factors
keep = rowSums(cpms > 1) >= 3 & !noint
new.counts <- counts[keep, ]
# head(new.counts[,order(metadat$co2)],5)
d = DGEList(counts = new.counts, group = metadat$co2)
d2 = calcNormFactors(d)
Plot MDS of data
plotMDS(d2, pch = 16, labels = metadat$shortname, cex = 0.75, col = c("green",
"blue")[factor(metadat$co2)])
ML analysis of neg. binomial
d3 = estimateCommonDisp(d2)
d4 = estimateTagwiseDisp(d3)
Plot mean, variance and CoV
plotMeanVar(d4, show.tagwise.vars = TRUE, NBline = TRUE)
plotBCV(d4)
EdgeR analyses of expression diffs.
de = exactTest(d4)
tt = topTags(de, n = nrow(d4))
# head(tt$table) head(nc[rn,order(metadat$co2)],5)
Plot tag mean vs variance
# counts per million of d4
nc = cpm(d4, normalized.lib.sizes = TRUE)
rn = rownames(tt$table)
# highlight stuff with log diff >2 or with FDR < 0.01
deg = rn[tt$table$FDR < 0.01]
# deg=rn[tt$table$logFC>2]
plotSmear(d4, de.tags = deg, ylab = "log fold counts")
abline(h = c(2, -2), col = "grey", lwd = 2, lty = 2)
# set alpha and color al=rep(0,length(tt$table$FDR));
# al[which(tt$table$FDR>0.01)]=0.5 cl=rep(1,length(tt$table$FDR));
# cl[which(tt$table$FDR>0.01)]=0.2
# plot(tt$table$logFC~tt$table$logCPM,pch=16,cex=0.5,col=rgb(cl,.2,.2,al),ylab='log2
# difference in expression 400-265',xlab='average log counts per million');
# points(tt$table$logFC~tt$table$logCPM,pch=16,cex=0.5,col=rgb(cl,0,0,1-al))
write.csv(tt$table, file = "toptags_edgeR.csv")