# Clear the console
rm(list = ls())
# Load the DESeq2 package
library("DESeq2")
# Load the limma package
library("limma")
# Set working directory for the datafile
setwd("~/Documents/git_repos/Oyster_DEseq2/")
# Load the file
data <- read.csv("data_51_full.txt", sep = "\t")
# Remove the "X" that gets prepended to the colnames
names(data) <- sub("X", "", names(data))
# Add the first column as rownames to the data
row.names(data) <- data[,1]
# Remove the first column that contains gene id's
data <- data[,-1]
# Create a df with sample, condition, population and replicate information
samples <- data.frame(
sample = names(data),
cond = sub("(5|33)_(OP|M|LL)_([1-6])","\\1",names(data)),
pop = sub("(5|33)_(OP|M|LL)_([1-6])","\\2",names(data)),
rep = sub("(5|33)_(OP|M|LL)_([1-6])","\\3",names(data))
)
samples
## sample cond pop rep
## 1 5_OP_1 5 OP 1
## 2 5_OP_2 5 OP 2
## 3 5_OP_3 5 OP 3
## 4 5_OP_4 5 OP 4
## 5 5_OP_5 5 OP 5
## 6 5_OP_6 5 OP 6
## 7 33_OP_1 33 OP 1
## 8 33_OP_2 33 OP 2
## 9 33_OP_3 33 OP 3
## 10 33_OP_4 33 OP 4
## 11 33_OP_5 33 OP 5
## 12 33_OP_6 33 OP 6
## 13 5_M_1 5 M 1
## 14 5_M_2 5 M 2
## 15 5_M_3 5 M 3
## 16 5_M_4 5 M 4
## 17 5_M_5 5 M 5
## 18 5_M_6 5 M 6
## 19 33_M_1 33 M 1
## 20 33_M_2 33 M 2
## 21 33_M_3 33 M 3
## 22 33_M_4 33 M 4
## 23 33_M_5 33 M 5
## 24 33_M_6 33 M 6
## 25 5_LL_1 5 LL 1
## 26 5_LL_2 5 LL 2
## 27 5_LL_3 5 LL 3
## 28 5_LL_4 5 LL 4
## 29 5_LL_5 5 LL 5
## 30 5_LL_6 5 LL 6
## 31 33_LL_1 33 LL 1
## 32 33_LL_2 33 LL 2
## 33 33_LL_3 33 LL 3
## 34 33_LL_4 33 LL 4
## 35 33_LL_5 33 LL 5
## 36 33_LL_6 33 LL 6
# Count matrix input (no design here)
dds <- DESeqDataSetFromMatrix(countData = data,
colData = samples,
design = ~ cond + cond:pop) #This is essentially the same design as # the ~cond + pop + cond:pop, but
# makes it easier to perform the contrasts
dds
## class: DESeqDataSet
## dim: 51574 36
## metadata(0):
## assays(1): counts
## rownames(51574): oystercontig_1 oystercontig_2 ...
## oystercontig_51573 oystercontig_51574
## rowRanges metadata column names(0):
## colnames(36): 1 2 ... 35 36
## colData names(4): sample cond pop rep
# Add feature data (gene id's as column)
featureData <- data.frame(gene=rownames(data))
(mcols(dds) <- DataFrame(mcols(dds), featureData))
## DataFrame with 51574 rows and 1 column
## gene
## <factor>
## 1 oystercontig_1
## 2 oystercontig_2
## 3 oystercontig_3
## 4 oystercontig_4
## 5 oystercontig_5
## ... ...
## 51570 oystercontig_51570
## 51571 oystercontig_51571
## 51572 oystercontig_51572
## 51573 oystercontig_51573
## 51574 oystercontig_51574
# Filtering to remove rows with 0 reads
dds <- dds[ rowSums(counts(dds)) > 1, ]
# First generate the dispersions
dds <- DESeq(dds)
# Dropping condition column
dds <- nbinomLRT(dds, full = design(dds), reduced = ~ cond)
# Check the coefficients
resultsNames(dds)
## [1] "Intercept" "cond_5_vs_33" "cond33.popM" "cond5.popM"
## [5] "cond33.popOP" "cond5.popOP"
# nbinormLRT
# For analyses using the likelihood ratio test (using nbinomLRT), the p-values are determined solely # by the difference in deviance between the full and reduced model formula. A log2 fold change is # included, which # can be controlled using the name argument, or by default this will be the # estimated coefficient for the # last element of resultsNames(object)
# Question 1: There is not a single contrast which gives the answer of DE of e.g. (OP vs LL) or (M # vs LL), but you can accomplish this by taking the union of the FDR limited sets from the above two # pairwise comparisons.
# You can compare OP vs LL for treatment 5 with the following
res_OPvsLL.5 <- results(dds, test = "Wald", name="cond5.popOP")
res_OPvsLL.5 <- subset(res_OPvsLL.5, res_OPvsLL.5$padj <= 0.05)
res_OPvsLL.5.genes <- row.names(res_OPvsLL.5)
# You can compare M vs LL for treatment 5 with the following
res_MvsLL.5 <- results(dds, test = "Wald", name = "cond5.popM")
res_MvsLL.5 <- subset(res_MvsLL.5, res_MvsLL.5$padj <= 0.05)
res_MvsLL.5.genes <- row.names(res_MvsLL.5)
# Combining the two above..
comb <- c(res_OPvsLL.5.genes,res_MvsLL.5.genes)
# Comparing comb with the above two
res_OPvsLL.5.genes.2 <- comb %in% res_OPvsLL.5.genes
res_MvsLL.5.genes.2 <- comb %in% res_MvsLL.5.genes
# Generating venn counts to plot venn diagram
counts.5 <- cbind(res_OPvsLL.5.genes.2, res_MvsLL.5.genes.2)
results.5 <- vennCounts(counts.5)
vennDiagram(results.5, cex = 1,names = c("OPvsLL.5","MvsLL.5"), circle.col = c("red", "blue"))

# You can compare OP vs LL for treatment 33 with the following
res_OPvsLL.33 <- results(dds, test = "Wald", name="cond33.popOP")
res_OPvsLL.33 <- subset(res_OPvsLL.33, res_OPvsLL.33$padj <= 0.05)
res_OPvsLL.33.genes <- row.names(res_OPvsLL.33)
# You can compare M vs LL for treatment 33 with the following
res_MvsLL.33 <- results(dds, test = "Wald", name = "cond33.popM")
res_MvsLL.33 <- subset(res_MvsLL.33, res_MvsLL.33$padj <= 0.05)
res_MvsLL.33.genes <- row.names(res_MvsLL.33)
# Combining the two above..
comb <- c(res_OPvsLL.33.genes,res_MvsLL.33.genes)
# Comparing the comb with the above two
res_OPvsLL.33.genes.2 <- comb %in% res_OPvsLL.33.genes
res_MvsLL.33.genes.2 <- comb %in% res_MvsLL.33.genes
# Generating venn counts to plot venn diagram
counts.33 <- cbind(res_OPvsLL.33.genes.2, res_MvsLL.33.genes.2)
results.33 <- vennCounts(counts.33)
vennDiagram(results.33, cex = 1,names = c("OPvsLL.33","MvsLL.33"), circle.col = c("red", "blue"))

# Question 2:
# If you want to ask if the OP vs LL effect is different in treatment 33 vs treatment 5, you form the contrast:
dds <- DESeq(dds)
res.1 <- results(dds, contrast=list("cond33.popOP", "cond5.popOP"))
res.1 <- subset(res.1, res.1$padj <= 0.05)
dim(res.1) # Very few genes
## [1] 16 6
# If you want to ask if the M vs LL effect is different in treatment 33 vs treatment 5, you form the contrast:
res.2 <- results(dds, contrast=list("cond33.popM", "cond5.popM"))
res.2 <- subset(res.2, res.2$padj <= 0.05)
dim(res.2)
## [1] 173 6
# Then you extract the genes in the union of venn diagram. Not doing union here since there are very few gens in the the first contrast