An important analysis question is the quantification and statistical inference of systematic changes between conditions, as compared to within-condition variability. The package DESeq2 provides methods to test for differential expression by use of negative binomial generalized linear models; the estimates of dispersion and logarithmic fold changes incorporate data-driven prior distributions. Analyzing RNA-seq data with DESeq2 following the vignette http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html
library(DESeq2) # DE analysis
library(ashr) # lFC shinkage estimator
library(ggplot2) # plots
library(BiocParallel) # multi-core computations
library(pheatmap) # heatmaps
library(RColorBrewer) # colour palettes
library(dplyr) # list to dataframe
library(ggrepel) # nice arrangement of labels for ggplot
library(apeglm) #for shrinkage estimators for effect sizes for a variety of GLM models, using approximation of the posterior for individual coefficients.
#library(limma) #Linear Models for Microarray Data
library(tidyverse)
DESeq2 analysis first on the data set of OB-phytotron, with quantification files generated by StringTie with the Reference genome v2.
The file named
OB-phyto_basev2_expression-gene-counts_raw.tsv containing
only the raw counts was used (is the same as the csv, but in tsv format,
to make column, the gene names). The file name
Rosa-SP_basev2_expression-gene-counts_raw.txt was used for
the Patio datasets.
The count data are presented as a table which reports, for each sample, the number of sequence fragments that have been assigned to each gene.
counts_data_Phyto<- read.table("OB-phyto_basev2_expression-gene-counts_raw.txt", header=T, sep= "\t", dec=".", check.names = FALSE)
head(counts_data_Phyto, 4)
## ST1s1 ST1s2 ST1s3 ST1s4 ST2s1 ST2s2 ST2s3 ST2s4 ST3s1
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0 0 0 0
## ST3s2 ST3s3 ST3s4 ST4s1 ST4s2 ST4s3 ST4s4 ST5s1 ST5s2
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0 0 0 0
## ST5s3 ST5s4 ST6s1 ST6s2 ST6s3 ST6s4
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
counts_data_Patio <- read.table("Rosa-SP_basev2_expression-gene-counts_raw.txt", header=T, sep= "\t", dec=".", check.names = FALSE)
head(counts_data_Patio, 4)
## FE_LF_R1 FE_LF_R2 FE_LF_R3 FE_S1_R1 FE_S1_R2 FE_S1_R3
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## FE_S1_R4 FE_S1_R5 FE_S2_R1 FE_S2_R2 FE_S2_R3 FE_S2_R4
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## FE_S2_R5 FE_S3_R1 FE_S3_R2 FE_S3_R3 FE_S3_R4 FE_S3_R5
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## FE_S4_R1 FE_S4_R2 FE_S4_R3 FE_S4_R4 FE_S4_R5 FE_S5_R1
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## FE_S5_R2 FE_S5_R3 FE_S5_R4 FE_S5_R5 OB_LF_R1 OB_LF_R2
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 1 0 0
## OB_LF_R3 OB_S1_R1 OB_S1_R2 OB_S1_R3 OB_S1_R4 OB_S1_R5
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## OB_S2_R1 OB_S2_R2 OB_S2_R3 OB_S2_R4 OB_S2_R5 OB_S3_R1
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## OB_S3_R2 OB_S3_R3 OB_S3_R4 OB_S3_R5 OB_S4_R1 OB_S4_R2
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## OB_S4_R3 OB_S4_R4 OB_S4_R5 OB_S5_R1 OB_S5_R2 OB_S5_R3
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 1 0
## OB_S5_R4 OB_S5_R5 PM_LF_R1 PM_LF_R2 PM_LF_R3 PM_S1_R1
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## PM_S1_R2 PM_S1_R3 PM_S1_R4 PM_S1_R5 PM_S2_R1 PM_S2_R2
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## PM_S2_R3 PM_S2_R4 PM_S2_R5 PM_S3_R1 PM_S3_R2 PM_S3_R3
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## PM_S3_R4 PM_S3_R5 PM_S4_R1 PM_S4_R2 PM_S4_R3 PM_S4_R4
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## PM_S4_R5 PM_S5_R1 PM_S5_R2 PM_S5_R3 PM_S5_R4 PM_S5_R5
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## RM_LF_R1 RM_LF_R2 RM_LF_R3 RM_S1_R1 RM_S1_R2 RM_S1_R3
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## RM_S1_R4 RM_S1_R5 RM_S2_R1 RM_S2_R2 RM_S2_R3 RM_S2_R4
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## RM_S2_R5 RM_S3_R1 RM_S3_R2 RM_S3_R3 RM_S3_R4 RM_S3_R5
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## RM_S4_R1 RM_S4_R2 RM_S4_R3 RM_S4_R4 RM_S4_R5 RM_S5_R1
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## RM_S5_R2 RM_S5_R3 RM_S5_R4 RM_S5_R5
## RchiOBHmChr0c01g0497671 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0
### subset to discard RM samples
counts_data_Patio = dplyr::select(counts_data_Patio, -contains("RM"))
head(counts_data_Patio, 4)
## FE_LF_R1 FE_LF_R2 FE_LF_R3 FE_S1_R1 FE_S1_R2 FE_S1_R3
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## FE_S1_R4 FE_S1_R5 FE_S2_R1 FE_S2_R2 FE_S2_R3 FE_S2_R4
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## FE_S2_R5 FE_S3_R1 FE_S3_R2 FE_S3_R3 FE_S3_R4 FE_S3_R5
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## FE_S4_R1 FE_S4_R2 FE_S4_R3 FE_S4_R4 FE_S4_R5 FE_S5_R1
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## FE_S5_R2 FE_S5_R3 FE_S5_R4 FE_S5_R5 OB_LF_R1 OB_LF_R2
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 1 0 0
## OB_LF_R3 OB_S1_R1 OB_S1_R2 OB_S1_R3 OB_S1_R4 OB_S1_R5
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## OB_S2_R1 OB_S2_R2 OB_S2_R3 OB_S2_R4 OB_S2_R5 OB_S3_R1
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## OB_S3_R2 OB_S3_R3 OB_S3_R4 OB_S3_R5 OB_S4_R1 OB_S4_R2
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## OB_S4_R3 OB_S4_R4 OB_S4_R5 OB_S5_R1 OB_S5_R2 OB_S5_R3
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 1 0
## OB_S5_R4 OB_S5_R5 PM_LF_R1 PM_LF_R2 PM_LF_R3 PM_S1_R1
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## PM_S1_R2 PM_S1_R3 PM_S1_R4 PM_S1_R5 PM_S2_R1 PM_S2_R2
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## PM_S2_R3 PM_S2_R4 PM_S2_R5 PM_S3_R1 PM_S3_R2 PM_S3_R3
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## PM_S3_R4 PM_S3_R5 PM_S4_R1 PM_S4_R2 PM_S4_R3 PM_S4_R4
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
## PM_S4_R5 PM_S5_R1 PM_S5_R2 PM_S5_R3 PM_S5_R4 PM_S5_R5
## RchiOBHmChr0c01g0497671 0 0 0 0 0 0
## RchiOBHmChr0c01g0497681 0 0 0 0 0 0
## RchiOBHmChr0c01g0497691 0 0 0 0 0 0
## RchiOBHmChr0c01g0497701 0 0 0 0 0 0
The column data is the same file with sample and condition called
samples_description.tsv for the phytotron data and
Rosa-SP_sample-list.csv for patio dataset
col_data_Phyto<- read.table("samples_description.tsv", header=T, sep= "\t", dec=",")
head(col_data_Phyto, 4)
## sample condition
## 1 ST1s1 ST1
## 2 ST1s2 ST1
## 3 ST1s3 ST1
## 4 ST1s4 ST1
col_data_Patio<- read.table("Rosa-SP_sample-list.csv", header=T, sep= ";", dec=",")
head(col_data_Patio, 4)
## sample condition variety stage replicate fragrance
## 1 FE_LF_R1 FE_LF FE LF R1 NP
## 2 FE_LF_R2 FE_LF FE LF R2 NP
## 3 FE_LF_R3 FE_LF FE LF R3 NP
## 4 FE_S1_R1 FE_S1 FE S1 R1 NP
col_data_Patio = col_data_Patio[col_data_Patio$variety != "RM",]
head(col_data_Patio, 4)
## sample condition variety stage replicate fragrance
## 1 FE_LF_R1 FE_LF FE LF R1 NP
## 2 FE_LF_R2 FE_LF FE LF R2 NP
## 3 FE_LF_R3 FE_LF FE LF R3 NP
## 4 FE_S1_R1 FE_S1 FE S1 R1 NP
We check that row names matches with the column - Both dim of 24 variables , 6 developmental stages, 4 replicates each for phytotron - Both dim of 112 variables, 4 varieties, 5 developmental stages + Leaves, 4 replicates for each stage 3 for leaves.
ncol(counts_data_Phyto) == nrow(col_data_Phyto)
## [1] TRUE
ncol(counts_data_Patio) == nrow(col_data_Patio)
## [1] TRUE
For the Phytotron dataset the design is only the condition, described
in col_data_Phyto
dds_Phyto <- DESeqDataSetFromMatrix(countData = counts_data_Phyto,
colData = col_data_Phyto,
design = ~ condition)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
dds_Phyto
## class: DESeqDataSet
## dim: 50594 24
## metadata(1): version
## assays(1): counts
## rownames(50594): RchiOBHmChr0c01g0497671 RchiOBHmChr0c01g0497681 ...
## RchiOBHmMTg0499291 RchiOBHmMTg0499301
## rowData names(0):
## colnames(24): ST1s1 ST1s2 ... ST6s3 ST6s4
## colData names(2): sample condition
For the Patio dataset we need to establish interaction terms to be able to compare between variety and stages.
dds_Patio <- DESeqDataSetFromMatrix(countData = counts_data_Patio,
colData = col_data_Patio,
design = ~ variety +stage + variety:stage)
dds_Patio
## class: DESeqDataSet
## dim: 50594 84
## metadata(1): version
## assays(1): counts
## rownames(50594): RchiOBHmChr0c01g0497671 RchiOBHmChr0c01g0497681 ...
## RchiOBHmMTg0499291 RchiOBHmMTg0499301
## rowData names(0):
## colnames(84): FE_LF_R1 FE_LF_R2 ... PM_S5_R4 PM_S5_R5
## colData names(6): sample condition ... replicate fragrance
Removing rows (genes) with low gene counts, keeping rows that have at
least 10 reads. By removing rows in which there are very few reads, we
reduce the memory size of the dds data object, and we
increase the speed of the transformation and testing functions within
DESeq2.
keep <-rowSums(counts(dds_Phyto)) >= 10
dds_Phyto <- dds_Phyto[keep,]
keep <-rowSums(counts(dds_Patio)) >= 10
dds_Patio <- dds_Patio[keep,]
For Phytotron Developmental stages dataset For the transcript quantification based on the merged gft using stringtie the dimension went from 53 926 to 41 927 in dimension after the filtering = 11 999 excluded genes < 10 For the transcript quantification based on the Reference genome v2 the dim went from 50 594 to 38 670 in dimension after the filtering = 11 924 excluded genes < 10
For Patio dataset dim went from 50594 to 43560 genes = 7 034 excluded genes < 10 dim went from 50594 to 42990 genes = 7 604 excluded genes < 10 when we delete RM
The levels are organized based on alphabetical order, therefore in
this case we dont have to change it, but if it is necessary we use the
function relevel.
For Phytotron we have 6 levels (the 6 different developmental stages). For Patio, levels in stage the reference is LF (leaf) and for variety is FE (Fetera).
dds_Phyto$condition
## [1] ST1 ST1 ST1 ST1 ST2 ST2 ST2 ST2 ST3 ST3 ST3 ST3 ST4 ST4 ST4 ST4 ST5 ST5 ST5
## [20] ST5 ST6 ST6 ST6 ST6
## Levels: ST1 ST2 ST3 ST4 ST5 ST6
dds_Phyto$condition<-relevel(dds_Phyto$condition, ref = "ST1")
dds_Patio$variety
## [1] FE FE FE FE FE FE FE FE FE FE FE FE FE FE FE FE FE FE FE FE FE FE FE FE FE
## [26] FE FE FE OB OB OB OB OB OB OB OB OB OB OB OB OB OB OB OB OB OB OB OB OB OB
## [51] OB OB OB OB OB OB PM PM PM PM PM PM PM PM PM PM PM PM PM PM PM PM PM PM PM
## [76] PM PM PM PM PM PM PM PM PM
## Levels: FE OB PM
dds_Patio$stage
## [1] LF LF LF S1 S1 S1 S1 S1 S2 S2 S2 S2 S2 S3 S3 S3 S3 S3 S4 S4 S4 S4 S4 S5 S5
## [26] S5 S5 S5 LF LF LF S1 S1 S1 S1 S1 S2 S2 S2 S2 S2 S3 S3 S3 S3 S3 S4 S4 S4 S4
## [51] S4 S5 S5 S5 S5 S5 LF LF LF S1 S1 S1 S1 S1 S2 S2 S2 S2 S2 S3 S3 S3 S3 S3 S4
## [76] S4 S4 S4 S4 S5 S5 S5 S5 S5
## Levels: LF S1 S2 S3 S4 S5
dds_Patio$variety<-relevel(dds_Patio$variety, ref = "FE")
dds_Patio$stage<-relevel(dds_Patio$stage, ref = "LF")
dds_Phyto <-DESeq(dds_Phyto)
save(dds_Phyto,file = "data_Renviron/dds_Phyto.RData")
load(file = "data_Renviron/dds_Phyto.RData")
dds_Patio <-DESeq(dds_Patio)
save(dds_Patio,file = "data_Renviron/dds_Patio.RData")
load(file = "data_Renviron/dds_Patio.RData")
dds_Phyto_counts <- counts(dds_Phyto_counts, normalized = TRUE)
#NUDX1.1a genes Expression Patters in Patio Dataset
dds_Patio_counts <- counts(dds_Patio, normalized = TRUE)
we specify the contrast we want to build a results table for, using
either of the following equivalent commands. For the first one, we only
specify the conditions. For the second we use the name or contrast
arguments of resultsNames(dds_Phyto). By default the
argument alpha is set to 0.1. The adjusted p value cutoff
will be set to 0.01.
resultsNames(dds_Phyto)
## [1] "Intercept" "condition_ST2_vs_ST1" "condition_ST3_vs_ST1"
## [4] "condition_ST4_vs_ST1" "condition_ST5_vs_ST1" "condition_ST6_vs_ST1"
results_Phyto_ST3vsST1 <-results(dds_Phyto, contrast = c("condition", "ST3", "ST1"), alpha = 0.01)
results_Phyto_ST3vsST1 <-results(dds_Phyto, name="condition_ST3_vs_ST1", alpha = 0.01)
We can summarize some basic tallies using the summary function. How many p-values were less than 0.01?
summary(results_Phyto_ST3vsST1)
##
## out of 38670 with nonzero total read count
## adjusted p-value < 0.01
## LFC > 0 (up) : 8933, 23%
## LFC < 0 (down) : 9697, 25%
## outliers [1] : 11, 0.028%
## low counts [2] : 1500, 3.9%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
sum(results_Phyto_ST3vsST1$pvalue < 0.01, na.rm=TRUE)
## [1] 19713
Also the BH-adjusted p values, are given in the column padj of the results object.If we consider a fraction of 10% false positives acceptable, we can consider all genes with an adjusted p value below 10%=0.1 as significant. How many such genes are there?
sum(results_Phyto_ST3vsST1$padj < 0.1, na.rm=TRUE)
## [1] 23774
###Log fold change shrinkage for visualization and ranking
Shrinkage of effect size (LFC estimates) is useful for visualization
and ranking of genes. To shrink the LFC, we pass the
dds_Phyto object to the function lfcShrink.
Below we specify to use the apeglm and ashr method for effect size
shrinkage
In DESeq2, the function plotMA shows the log2 fold changes attributable to a given variable over the mean of normalized counts for all the samples in the DESeqDataSet. Points will be colored red if the adjusted p value is less than 0.1. Points which fall out of the window are plotted as open triangles pointing either up or down.
Because we are interested in ST3 vs ST1, we set coef=3
or establish condition condition_ST3_vs_ST1
We specify the gene which had the smallest p value from the results condition_ST3_vs_ST1. NOTE: same gene as with the transcripts counts obtained from Stringtie merge
After the save as a csv, the DESeq results from this contrast only genes with pvalue < 0.01 as OBS3vsOBS1.csv
## [1] "RchiOBHmChr4g0421251"
We extract the Differential Expressed Genes obtained from the contrast
# We sort them based on pvalue
results_dds_Phyto<- results_Phyto_ST3vsST1[order(results_Phyto_ST3vsST1$pvalue),]
#Explore results already ordered from lower padj
results_Phyto_ST3vsST1 #dim 38670
## log2 fold change (MLE): condition ST3 vs ST1
## Wald test p-value: condition ST3 vs ST1
## DataFrame with 38670 rows and 6 columns
## baseMean log2FoldChange lfcSE stat
## <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr0c01g0497711 143.31101 1.141313 0.122949 9.282840
## RchiOBHmChr0c03g0497851 0.74533 -0.939800 1.266407 -0.742099
## RchiOBHmChr0c03g0497861 1.43375 -0.908053 1.078587 -0.841891
## RchiOBHmChr0c08g0498091 33.06813 0.633345 0.269838 2.347133
## RchiOBHmChr0c08g0498101 19.50143 1.001496 0.344192 2.909699
## ... ... ... ... ...
## RchiOBHmMTg0499261 0.525285 0.508683 1.534735 0.331447
## RchiOBHmMTg0499271 3.787748 0.820382 0.651760 1.258718
## RchiOBHmMTg0499281 1.653035 2.107668 1.066626 1.976014
## RchiOBHmMTg0499291 11.084627 0.196925 0.484654 0.406320
## RchiOBHmMTg0499301 3.688173 1.209328 0.768003 1.574641
## pvalue padj
## <numeric> <numeric>
## RchiOBHmChr0c01g0497711 1.65021e-20 2.24431e-19
## RchiOBHmChr0c03g0497851 4.58027e-01 5.42205e-01
## RchiOBHmChr0c03g0497861 3.99849e-01 4.86334e-01
## RchiOBHmChr0c08g0498091 1.89185e-02 3.36376e-02
## RchiOBHmChr0c08g0498101 3.61777e-03 7.39821e-03
## ... ... ...
## RchiOBHmMTg0499261 0.7403072 NA
## RchiOBHmMTg0499271 0.2081323 0.279868
## RchiOBHmMTg0499281 0.0481532 0.077601
## RchiOBHmMTg0499291 0.6845075 0.750048
## RchiOBHmMTg0499301 0.1153394 0.168081
#We subset the results table to these genes and then sort it by the log2 fold change estimate and log2FoldChange >= 1
results_dds_Phyto_sub <- subset(results_Phyto_ST3vsST1, pvalue < 0.01)
results_dds_Phyto_sub #is 19713 dimension with p = 0.01
## log2 fold change (MLE): condition ST3 vs ST1
## Wald test p-value: condition ST3 vs ST1
## DataFrame with 19713 rows and 6 columns
## baseMean log2FoldChange lfcSE stat
## <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr0c01g0497711 143.31101 1.14131 0.122949 9.28284
## RchiOBHmChr0c08g0498101 19.50143 1.00150 0.344192 2.90970
## RchiOBHmChr0c11g0499351 12.11157 -0.93489 0.305011 -3.06510
## RchiOBHmChr0c11g0499371 33.50252 -2.38514 0.224736 -10.61307
## RchiOBHmChr0c11g0499461 9.53917 -1.97017 0.409894 -4.80654
## ... ... ... ... ...
## RchiOBHmMTg0498961 3.52976 3.64576 1.319508 2.76297
## RchiOBHmMTg0498981 3917.38439 1.01093 0.315190 3.20736
## RchiOBHmMTg0498991 28.88154 3.65262 0.805213 4.53622
## RchiOBHmMTg0499071 16.66074 2.05360 0.546384 3.75852
## RchiOBHmMTg0499081 4.63467 1.91000 0.711882 2.68303
## pvalue padj
## <numeric> <numeric>
## RchiOBHmChr0c01g0497711 1.65021e-20 2.24431e-19
## RchiOBHmChr0c08g0498101 3.61777e-03 7.39821e-03
## RchiOBHmChr0c11g0499351 2.17594e-03 4.62509e-03
## RchiOBHmChr0c11g0499371 2.59107e-26 5.78616e-25
## RchiOBHmChr0c11g0499461 1.53564e-06 5.08809e-06
## ... ... ...
## RchiOBHmMTg0498961 5.72782e-03 1.12990e-02
## RchiOBHmMTg0498981 1.33957e-03 2.94592e-03
## RchiOBHmMTg0498991 5.72712e-06 1.77094e-05
## RchiOBHmMTg0499071 1.70919e-04 4.31554e-04
## RchiOBHmMTg0499081 7.29575e-03 1.41016e-02
OBS3vsOBS1_phyto <- as.data.frame(results_dds_Phyto_sub) #dim 19713
We want to stablish the genes that are up or down regulated if log2Foldchange > 1 and pvalue < 0.01, set as “Up-Regulated” if log2Foldchange < -1 and pvalue < 0.01, set as “Down-Regulated”
We save the results of the Differential Expressed Genes
write.csv(OBS3vsOBS1_phyto, file = "OBS3vsOBS1.csv") #is 19713
resultsNames(dds_Phyto)
## [1] "Intercept" "condition_ST2_vs_ST1" "condition_ST3_vs_ST1"
## [4] "condition_ST4_vs_ST1" "condition_ST5_vs_ST1" "condition_ST6_vs_ST1"
results_Phyto_ST2vsST1 <-results(dds_Phyto, contrast = c("condition", "ST2", "ST1"), alpha = 0.01)
results_Phyto_ST2vsST1 <-results(dds_Phyto, name="condition_ST2_vs_ST1", alpha = 0.01)
We can summarize some basic tallies using the summary function. How many p-values were less than 0.01?
summary(results_Phyto_ST2vsST1)
##
## out of 38670 with nonzero total read count
## adjusted p-value < 0.01
## LFC > 0 (up) : 6631, 17%
## LFC < 0 (down) : 7946, 21%
## outliers [1] : 11, 0.028%
## low counts [2] : 2250, 5.8%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
sum(results_Phyto_ST2vsST1$pvalue < 0.01, na.rm=TRUE)
## [1] 16166
Also the BH-adjusted p values, How many genes with an adjusted p value below 10%=0.1 are significant?
sum(results_Phyto_ST2vsST1$padj < 0.1, na.rm=TRUE)
## [1] 20015
###Log fold change shrinkage for visualization and ranking
We specify the gene which had the smallest p value from the results condition_ST2_vs_ST1. After the save as a csv, the DESeq results from this contrast only genes with pvalue < 0.01 as OBS2vsOBS1.csv
## [1] "RchiOBHmChr1g0332511"
## log2 fold change (MLE): condition ST2 vs ST1
## Wald test p-value: condition ST2 vs ST1
## DataFrame with 38670 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr1g0332511 87217.91 4.66083 0.175594 26.5432 3.07606e-155
## RchiOBHmChr2g0119291 367351.82 6.16375 0.233310 26.4188 8.34476e-154
## RchiOBHmChr4g0421251 22860.87 7.06974 0.272769 25.9185 4.12599e-148
## RchiOBHmChr4g0426001 9229.31 5.33511 0.210638 25.3283 1.55717e-141
## RchiOBHmChr5g0038101 191329.87 7.32087 0.290993 25.1582 1.14802e-139
## ... ... ... ... ... ...
## RchiOBHmMTg0499061 0.757284 -0.449664 1.68741 -0.266482 0.789868
## RchiOBHmMTg0499171 0.519663 -0.422201 1.75567 -0.240478 0.809960
## RchiOBHmMTg0499191 0.698619 1.013688 1.75578 0.577345 0.563706
## RchiOBHmMTg0499241 0.476576 1.548717 2.04756 0.756374 0.449425
## RchiOBHmMTg0499261 0.525285 -1.009303 1.63870 -0.615915 0.537950
## padj
## <numeric>
## RchiOBHmChr1g0332511 1.11996e-150
## RchiOBHmChr2g0119291 1.51912e-149
## RchiOBHmChr4g0421251 5.00744e-144
## RchiOBHmChr4g0426001 1.41738e-137
## RchiOBHmChr5g0038101 8.35965e-136
## ... ...
## RchiOBHmMTg0499061 NA
## RchiOBHmMTg0499171 NA
## RchiOBHmMTg0499191 NA
## RchiOBHmMTg0499241 NA
## RchiOBHmMTg0499261 NA
## log2 fold change (MLE): condition ST2 vs ST1
## Wald test p-value: condition ST2 vs ST1
## DataFrame with 16166 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr1g0332511 87217.91 4.66083 0.175594 26.5432 3.07606e-155
## RchiOBHmChr2g0119291 367351.82 6.16375 0.233310 26.4188 8.34476e-154
## RchiOBHmChr4g0421251 22860.87 7.06974 0.272769 25.9185 4.12599e-148
## RchiOBHmChr4g0426001 9229.31 5.33511 0.210638 25.3283 1.55717e-141
## RchiOBHmChr5g0038101 191329.87 7.32087 0.290993 25.1582 1.14802e-139
## ... ... ... ... ... ...
## RchiOBHmChr5g0005241 1.229177 -3.429264 1.331036 -2.57639 0.00998385
## RchiOBHmChr1g0343371 559.345641 -0.619601 0.240532 -2.57596 0.00999625
## RchiOBHmChr5g0039501 0.775709 -4.575215 1.705965 -2.68189 0.00732068
## RchiOBHmChr6g0261041 0.757843 4.381281 1.586478 2.76164 0.00575120
## RchiOBHmChr7g0231071 0.785730 4.070956 1.401133 2.90547 0.00366697
## padj
## <numeric>
## RchiOBHmChr1g0332511 1.11996e-150
## RchiOBHmChr2g0119291 1.51912e-149
## RchiOBHmChr4g0421251 5.00744e-144
## RchiOBHmChr4g0426001 1.41738e-137
## RchiOBHmChr5g0038101 8.35965e-136
## ... ...
## RchiOBHmChr5g0005241 0.0224912
## RchiOBHmChr1g0343371 0.0225177
## RchiOBHmChr5g0039501 NA
## RchiOBHmChr6g0261041 NA
## RchiOBHmChr7g0231071 NA
if log2Foldchange > 1 and pvalue < 0.01, set as “Up-Regulated” if log2Foldchange < -1 and pvalue < 0.01, set as “Down-Regulated”
We save the results of the Differential Expressed Genes
write.csv(OBS2vsOBS1_phyto, file = "OBS2vsOBS1.csv") #is 16166
As we want to make several comparison between the varieties and stages we will use Interaction terms:
resultsNames(dds_Patio)
## [1] "Intercept" "variety_OB_vs_FE" "variety_PM_vs_FE"
## [4] "stage_S1_vs_LF" "stage_S2_vs_LF" "stage_S3_vs_LF"
## [7] "stage_S4_vs_LF" "stage_S5_vs_LF" "varietyOB.stageS1"
## [10] "varietyPM.stageS1" "varietyOB.stageS2" "varietyPM.stageS2"
## [13] "varietyOB.stageS3" "varietyPM.stageS3" "varietyOB.stageS4"
## [16] "varietyPM.stageS4" "varietyOB.stageS5" "varietyPM.stageS5"
results_Patio_ST3vsLF <-results(dds_Patio, list( c("stage_S3_vs_LF", "varietyOB.stageS3") ), alpha = 0.01)
summary(results_Patio_ST3vsLF)
##
## out of 42990 with nonzero total read count
## adjusted p-value < 0.01
## LFC > 0 (up) : 10339, 24%
## LFC < 0 (down) : 11992, 28%
## outliers [1] : 18, 0.042%
## low counts [2] : 3334, 7.8%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
results_Patio_ST3vsLF<- results_Patio_ST3vsLF[order(results_Patio_ST3vsLF$padj),] # sort
#Explore results already ordered from lower padj
results_Patio_ST3vsLF #dim 43560
## log2 fold change (MLE): stage_S3_vs_LF+varietyOB.stageS3 effect
## Wald test p-value: stage_S3_vs_LF+varietyOB.stageS3 effect
## DataFrame with 42990 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr5g0063441 5508.78 -7.60757 0.239353 -31.7839 1.07911e-221
## RchiOBHmChr5g0063141 4183.77 -7.56574 0.244365 -30.9609 1.81421e-210
## RchiOBHmChr2g0100071 14335.16 12.23206 0.399197 30.6416 3.41636e-206
## RchiOBHmChr3g0453451 14060.12 6.10206 0.202089 30.1949 2.76125e-200
## RchiOBHmChr3g0449291 1240.51 7.46498 0.250520 29.7980 4.14812e-195
## ... ... ... ... ... ...
## RchiOBHmMTg0499121 0.360404 -3.064356 2.80517 -1.092397 0.2746587
## RchiOBHmMTg0499171 0.393024 -0.701932 1.87927 -0.373512 0.7087670
## RchiOBHmMTg0499191 0.390878 -2.848983 1.76649 -1.612791 0.1067899
## RchiOBHmMTg0499201 0.117237 -2.105031 9.73138 -0.216314 0.8287431
## RchiOBHmMTg0499261 0.378783 -3.242838 1.73226 -1.872030 0.0612025
## padj
## <numeric>
## RchiOBHmChr5g0063441 4.27736e-217
## RchiOBHmChr5g0063141 3.59558e-206
## RchiOBHmChr2g0100071 4.51392e-202
## RchiOBHmChr3g0453451 2.73626e-196
## RchiOBHmChr3g0449291 3.28846e-191
## ... ...
## RchiOBHmMTg0499121 NA
## RchiOBHmMTg0499171 NA
## RchiOBHmMTg0499191 NA
## RchiOBHmMTg0499201 NA
## RchiOBHmMTg0499261 NA
#We subset the results table to these genes and then sort it by the log2 fold change estimate
results_Patio_ST3vsLF_sub<- subset(results_Patio_ST3vsLF, padj < 0.01)
#We subset the results table to these genes and then sort it by the log2 fold change estimate
results_Patio_ST3vsLF_sub #is 22095 dimension with p = 0.01
## log2 fold change (MLE): stage_S3_vs_LF+varietyOB.stageS3 effect
## Wald test p-value: stage_S3_vs_LF+varietyOB.stageS3 effect
## DataFrame with 22331 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr5g0063441 5508.78 -7.60757 0.239353 -31.7839 1.07911e-221
## RchiOBHmChr5g0063141 4183.77 -7.56574 0.244365 -30.9609 1.81421e-210
## RchiOBHmChr2g0100071 14335.16 12.23206 0.399197 30.6416 3.41636e-206
## RchiOBHmChr3g0453451 14060.12 6.10206 0.202089 30.1949 2.76125e-200
## RchiOBHmChr3g0449291 1240.51 7.46498 0.250520 29.7980 4.14812e-195
## ... ... ... ... ... ...
## RchiOBHmChr3g0475421 708.52570 0.540430 0.195172 2.76899 0.00562308
## RchiOBHmChr1g0382591 11.34797 -1.144300 0.413293 -2.76873 0.00562745
## RchiOBHmChr5g0025751 529.56420 -0.478300 0.172755 -2.76866 0.00562868
## RchiOBHmChr1g0325661 1.86255 -2.232181 0.806258 -2.76857 0.00563030
## RchiOBHmChr1g0382821 16.88331 -0.992702 0.358580 -2.76843 0.00563279
## padj
## <numeric>
## RchiOBHmChr5g0063441 4.27736e-217
## RchiOBHmChr5g0063141 3.59558e-206
## RchiOBHmChr2g0100071 4.51392e-202
## RchiOBHmChr3g0453451 2.73626e-196
## RchiOBHmChr3g0449291 3.28846e-191
## ... ...
## RchiOBHmChr3g0475421 0.00998287
## RchiOBHmChr1g0382591 0.00999018
## RchiOBHmChr5g0025751 0.00999192
## RchiOBHmChr1g0325661 0.00999435
## RchiOBHmChr1g0382821 0.00999832
OBS3vsOBLF <- as.data.frame(results_Patio_ST3vsLF_sub)
We specify the gene which had the smallest p value from the results ST3 vs LF of OB.
## [1] "RchiOBHmChr5g0063441"
if log2Foldchange > 1 and pvalue < 0.01, set as “Up-Regulated” if log2Foldchange < -1 and pvalue < 0.01, set as “Down-Regulated”
We save the results of the Differential Expressed Genes
write.csv(OBS3vsOBLF, file = "OBS3vsOBLF.csv") #22331
resultsNames(dds_Patio)
## [1] "Intercept" "variety_OB_vs_FE" "variety_PM_vs_FE"
## [4] "stage_S1_vs_LF" "stage_S2_vs_LF" "stage_S3_vs_LF"
## [7] "stage_S4_vs_LF" "stage_S5_vs_LF" "varietyOB.stageS1"
## [10] "varietyPM.stageS1" "varietyOB.stageS2" "varietyPM.stageS2"
## [13] "varietyOB.stageS3" "varietyPM.stageS3" "varietyOB.stageS4"
## [16] "varietyPM.stageS4" "varietyOB.stageS5" "varietyPM.stageS5"
results_Patio_ST2vsLF <-results(dds_Patio, list( c("stage_S2_vs_LF", "varietyOB.stageS2") ), alpha = 0.01)
summary(results_Patio_ST2vsLF)
##
## out of 42990 with nonzero total read count
## adjusted p-value < 0.01
## LFC > 0 (up) : 9081, 21%
## LFC < 0 (down) : 10940, 25%
## outliers [1] : 18, 0.042%
## low counts [2] : 3334, 7.8%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
results_Patio_ST2vsLF<- results_Patio_ST2vsLF[order(results_Patio_ST2vsLF$padj),] # sort
#Explore results already ordered from lower padj
results_Patio_ST2vsLF #dim 43560
## log2 fold change (MLE): stage_S2_vs_LF+varietyOB.stageS2 effect
## Wald test p-value: stage_S2_vs_LF+varietyOB.stageS2 effect
## DataFrame with 42990 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr5g0063441 5508.78 -7.27511 0.237819 -30.5910 1.61465e-205
## RchiOBHmChr2g0100071 14335.16 12.02149 0.399199 30.1141 3.17159e-199
## RchiOBHmChr5g0063141 4183.77 -7.27470 0.241843 -30.0803 8.76305e-199
## RchiOBHmChr3g0453451 14060.12 6.01596 0.202087 29.7692 9.79107e-195
## RchiOBHmChr3g0449291 1240.51 7.19435 0.250541 28.7152 2.46401e-181
## ... ... ... ... ... ...
## RchiOBHmMTg0499121 0.360404 -2.683172 2.80225 -0.9575064 0.3383117
## RchiOBHmMTg0499171 0.393024 -0.138818 1.80347 -0.0769729 0.9386451
## RchiOBHmMTg0499191 0.390878 -1.853203 1.76054 -1.0526335 0.2925090
## RchiOBHmMTg0499201 0.117237 -1.703113 9.73138 -0.1750125 0.8610698
## RchiOBHmMTg0499261 0.378783 -3.995091 1.73226 -2.3062909 0.0210944
## padj
## <numeric>
## RchiOBHmChr5g0063441 6.40016e-201
## RchiOBHmChr2g0100071 6.28578e-195
## RchiOBHmChr5g0063141 1.15783e-194
## RchiOBHmChr3g0453451 9.70246e-191
## RchiOBHmChr3g0449291 1.95337e-177
## ... ...
## RchiOBHmMTg0499121 NA
## RchiOBHmMTg0499171 NA
## RchiOBHmMTg0499191 NA
## RchiOBHmMTg0499201 NA
## RchiOBHmMTg0499261 NA
#We subset the results table to these genes and then sort it by the log2 fold change estimate
results_Patio_ST2vsLF_sub<- subset(results_Patio_ST2vsLF, padj < 0.01)
#We subset the results table to these genes and then sort it by the log2 fold change estimate
results_Patio_ST2vsLF_sub #is 19767 dimension with p = 0.01
## log2 fold change (MLE): stage_S2_vs_LF+varietyOB.stageS2 effect
## Wald test p-value: stage_S2_vs_LF+varietyOB.stageS2 effect
## DataFrame with 20021 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr5g0063441 5508.78 -7.27511 0.237819 -30.5910 1.61465e-205
## RchiOBHmChr2g0100071 14335.16 12.02149 0.399199 30.1141 3.17159e-199
## RchiOBHmChr5g0063141 4183.77 -7.27470 0.241843 -30.0803 8.76305e-199
## RchiOBHmChr3g0453451 14060.12 6.01596 0.202087 29.7692 9.79107e-195
## RchiOBHmChr3g0449291 1240.51 7.19435 0.250541 28.7152 2.46401e-181
## ... ... ... ... ... ...
## RchiOBHmChr7g0200731 1.59547 3.746190 1.335622 2.80483 0.00503435
## RchiOBHmChr5g0057741 5924.98480 0.567457 0.202340 2.80447 0.00503996
## RchiOBHmChr2g0135031 47.56492 0.773478 0.275831 2.80417 0.00504457
## RchiOBHmChr2g0100841 17.20056 -3.433876 1.224630 -2.80401 0.00504713
## RchiOBHmChr2g0171691 386.73921 -1.109346 0.395648 -2.80387 0.00504929
## padj
## <numeric>
## RchiOBHmChr5g0063441 6.40016e-201
## RchiOBHmChr2g0100071 6.28578e-195
## RchiOBHmChr5g0063141 1.15783e-194
## RchiOBHmChr3g0453451 9.70246e-191
## RchiOBHmChr3g0449291 1.95337e-177
## ... ...
## RchiOBHmChr7g0200731 0.00996910
## RchiOBHmChr5g0057741 0.00997972
## RchiOBHmChr2g0135031 0.00998835
## RchiOBHmChr2g0100841 0.00999290
## RchiOBHmChr2g0171691 0.00999669
OBS2vsOBLF <- as.data.frame(results_Patio_ST2vsLF_sub)
We specify the gene which had the smallest p value from the results ST2 vs LF of OB.
## [1] "RchiOBHmChr5g0063441"
if log2Foldchange > 1 and pvalue < 0.01, set as “Up-Regulated” if log2Foldchange < -1 and pvalue < 0.01, set as “Down-Regulated”
We save the results of the Differential Expressed Genes
write.csv(OBS2vsOBLF, file = "OBS2vsOBLF.csv") #20021
resultsNames(dds_Patio)
## [1] "Intercept" "variety_OB_vs_FE" "variety_PM_vs_FE"
## [4] "stage_S1_vs_LF" "stage_S2_vs_LF" "stage_S3_vs_LF"
## [7] "stage_S4_vs_LF" "stage_S5_vs_LF" "varietyOB.stageS1"
## [10] "varietyPM.stageS1" "varietyOB.stageS2" "varietyPM.stageS2"
## [13] "varietyOB.stageS3" "varietyPM.stageS3" "varietyOB.stageS4"
## [16] "varietyPM.stageS4" "varietyOB.stageS5" "varietyPM.stageS5"
results_Patio_OB3vsFE3 <-results(dds_Patio, list( c("variety_OB_vs_FE", "varietyOB.stageS3") ), alpha = 0.01)
summary(results_Patio_OB3vsFE3)
##
## out of 42990 with nonzero total read count
## adjusted p-value < 0.01
## LFC > 0 (up) : 11043, 26%
## LFC < 0 (down) : 11631, 27%
## outliers [1] : 18, 0.042%
## low counts [2] : 3334, 7.8%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
results_Patio_OB3vsFE3<- results_Patio_OB3vsFE3[order(results_Patio_OB3vsFE3$padj),] # sort
#Explore results already ordered from lower padj
results_Patio_OB3vsFE3 #dim 43560
## log2 fold change (MLE): variety_OB_vs_FE+varietyOB.stageS3 effect
## Wald test p-value: variety_OB_vs_FE+varietyOB.stageS3 effect
## DataFrame with 42990 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr1g0362711 737.532 4.97158 0.112400 44.2312 0.00000e+00
## RchiOBHmChr7g0182191 1545.456 7.67894 0.201162 38.1729 7.92284e-319
## RchiOBHmChr2g0102701 294.488 4.57999 0.124119 36.9001 4.60678e-298
## RchiOBHmChr6g0278171 1695.884 -7.51749 0.220620 -34.0743 1.77070e-254
## RchiOBHmChr2g0167291 5869.416 12.97605 0.412919 31.4251 9.17997e-217
## ... ... ... ... ... ...
## RchiOBHmMTg0499121 0.360404 0.921371 2.60157 0.3541591 0.723220
## RchiOBHmMTg0499171 0.393024 1.452649 1.80063 0.8067451 0.419813
## RchiOBHmMTg0499191 0.390878 -0.800490 1.75157 -0.4570131 0.647662
## RchiOBHmMTg0499201 0.117237 0.344365 8.46072 0.0407017 0.967534
## RchiOBHmMTg0499261 0.378783 -0.244244 1.76728 -0.1382032 0.890080
## padj
## <numeric>
## RchiOBHmChr1g0362711 0.00000e+00
## RchiOBHmChr7g0182191 1.57023e-314
## RchiOBHmChr2g0102701 6.08678e-294
## RchiOBHmChr6g0278171 1.75468e-250
## RchiOBHmChr2g0167291 7.27751e-213
## ... ...
## RchiOBHmMTg0499121 NA
## RchiOBHmMTg0499171 NA
## RchiOBHmMTg0499191 NA
## RchiOBHmMTg0499201 NA
## RchiOBHmMTg0499261 NA
#We subset the results table to these genes and then sort it by the log2 fold change estimate
results_Patio_OB3vsFE3_sub<- subset(results_Patio_OB3vsFE3, padj < 0.01)
#We subset the results table to these genes and then sort it by the log2 fold change estimate
results_Patio_OB3vsFE3_sub #is 22394 dimension with p = 0.01
## log2 fold change (MLE): variety_OB_vs_FE+varietyOB.stageS3 effect
## Wald test p-value: variety_OB_vs_FE+varietyOB.stageS3 effect
## DataFrame with 22674 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr1g0362711 737.532 4.97158 0.112400 44.2312 0.00000e+00
## RchiOBHmChr7g0182191 1545.456 7.67894 0.201162 38.1729 7.92284e-319
## RchiOBHmChr2g0102701 294.488 4.57999 0.124119 36.9001 4.60678e-298
## RchiOBHmChr6g0278171 1695.884 -7.51749 0.220620 -34.0743 1.77070e-254
## RchiOBHmChr2g0167291 5869.416 12.97605 0.412919 31.4251 9.17997e-217
## ... ... ... ... ... ...
## RchiOBHmChr7g0229781 48.06208 0.654093 0.236675 2.76368 0.00571540
## RchiOBHmChr7g0241261 4.72554 -1.864169 0.674551 -2.76357 0.00571727
## RchiOBHmChr3g0464851 6.87959 3.188972 1.153944 2.76354 0.00571778
## RchiOBHmChr5g0078421 205.39296 1.575436 0.570087 2.76350 0.00571848
## RchiOBHmChr6g0292471 214.59570 0.564002 0.204095 2.76343 0.00571973
## padj
## <numeric>
## RchiOBHmChr1g0362711 0.00000e+00
## RchiOBHmChr7g0182191 1.57023e-314
## RchiOBHmChr2g0102701 6.08678e-294
## RchiOBHmChr6g0278171 1.75468e-250
## RchiOBHmChr2g0167291 7.27751e-213
## ... ...
## RchiOBHmChr7g0229781 0.00999326
## RchiOBHmChr7g0241261 0.00999608
## RchiOBHmChr3g0464851 0.00999653
## RchiOBHmChr5g0078421 0.00999732
## RchiOBHmChr6g0292471 0.00999906
OB3vsFE3 <- as.data.frame(results_Patio_OB3vsFE3_sub)
We specify the gene which had the smallest p value from the results OB vs FE at ST3.
## [1] "RchiOBHmChr1g0362711"
if log2Foldchange > 1 and pvalue < 0.01, set as “Up-Regulated” if log2Foldchange < -1 and pvalue < 0.01, set as “Down-Regulated”
We save the results of the Differential Expressed Genes
write.csv(OB3vsFE3 , file = "OB3vsFE3.csv") #22674
resultsNames(dds_Patio)
## [1] "Intercept" "variety_OB_vs_FE" "variety_PM_vs_FE"
## [4] "stage_S1_vs_LF" "stage_S2_vs_LF" "stage_S3_vs_LF"
## [7] "stage_S4_vs_LF" "stage_S5_vs_LF" "varietyOB.stageS1"
## [10] "varietyPM.stageS1" "varietyOB.stageS2" "varietyPM.stageS2"
## [13] "varietyOB.stageS3" "varietyPM.stageS3" "varietyOB.stageS4"
## [16] "varietyPM.stageS4" "varietyOB.stageS5" "varietyPM.stageS5"
results_Patio_OB2vsFE2 <-results(dds_Patio, list( c("variety_OB_vs_FE", "varietyOB.stageS2") ), alpha = 0.01)
summary(results_Patio_OB2vsFE2)
##
## out of 42990 with nonzero total read count
## adjusted p-value < 0.01
## LFC > 0 (up) : 9460, 22%
## LFC < 0 (down) : 10249, 24%
## outliers [1] : 18, 0.042%
## low counts [2] : 3334, 7.8%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
results_Patio_OB2vsFE2<- results_Patio_OB2vsFE2[order(results_Patio_OB2vsFE2$padj),] # sort
#Explore results already ordered from lower padj
results_Patio_OB2vsFE2 #dim 43560
## log2 fold change (MLE): variety_OB_vs_FE+varietyOB.stageS2 effect
## Wald test p-value: variety_OB_vs_FE+varietyOB.stageS2 effect
## DataFrame with 42990 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr6g0278171 1695.884 -6.83483 0.178722 -38.2427 5.50488e-320
## RchiOBHmChr2g0102701 294.488 4.14274 0.114906 36.0532 1.22664e-284
## RchiOBHmChr1g0362711 737.532 3.64479 0.105914 34.4128 1.62105e-259
## RchiOBHmChr7g0182191 1545.456 7.44986 0.216484 34.4130 1.61030e-259
## RchiOBHmChr4g0445201 2582.186 -8.20944 0.254125 -32.3048 5.99540e-229
## ... ... ... ... ... ...
## RchiOBHmMTg0499121 0.360404 1.0434693 2.59843 0.4015774 0.687995
## RchiOBHmMTg0499171 0.393024 1.1379757 1.70811 0.6662175 0.505272
## RchiOBHmMTg0499191 0.390878 0.5038930 1.74890 0.2881206 0.773254
## RchiOBHmMTg0499201 0.117237 -0.0897912 8.46072 -0.0106127 0.991532
## RchiOBHmMTg0499261 0.378783 -0.6670608 1.77124 -0.3766065 0.706466
## padj
## <numeric>
## RchiOBHmChr6g0278171 2.18202e-315
## RchiOBHmChr2g0102701 2.43108e-280
## RchiOBHmChr1g0362711 1.60638e-255
## RchiOBHmChr7g0182191 1.60638e-255
## RchiOBHmChr4g0445201 4.75291e-225
## ... ...
## RchiOBHmMTg0499121 NA
## RchiOBHmMTg0499171 NA
## RchiOBHmMTg0499191 NA
## RchiOBHmMTg0499201 NA
## RchiOBHmMTg0499261 NA
#We subset the results table to these genes and then sort it by the log2 fold change estimate
results_Patio_OB2vsFE2_sub<- subset(results_Patio_OB2vsFE2, padj < 0.01)
#We subset the results table to these genes and then sort it by the log2 fold change estimate
results_Patio_OB2vsFE2_sub #is 19413 dimension with p = 0.01
## log2 fold change (MLE): variety_OB_vs_FE+varietyOB.stageS2 effect
## Wald test p-value: variety_OB_vs_FE+varietyOB.stageS2 effect
## DataFrame with 19709 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr6g0278171 1695.884 -6.83483 0.178722 -38.2427 5.50488e-320
## RchiOBHmChr2g0102701 294.488 4.14274 0.114906 36.0532 1.22664e-284
## RchiOBHmChr1g0362711 737.532 3.64479 0.105914 34.4128 1.62105e-259
## RchiOBHmChr7g0182191 1545.456 7.44986 0.216484 34.4130 1.61030e-259
## RchiOBHmChr4g0445201 2582.186 -8.20944 0.254125 -32.3048 5.99540e-229
## ... ... ... ... ... ...
## RchiOBHmChr2g0128891 9.92784 -3.447901 1.226908 -2.81024 0.00495051
## RchiOBHmChr3g0459441 163.05079 -0.506124 0.180128 -2.80981 0.00495715
## RchiOBHmChr7g0239451 71.80038 3.525050 1.254569 2.80977 0.00495769
## RchiOBHmChr7g0220041 1.08967 -3.342069 1.189482 -2.80968 0.00495902
## RchiOBHmChr1g0367631 2.95390 -3.119652 1.110383 -2.80953 0.00496141
## padj
## <numeric>
## RchiOBHmChr6g0278171 2.18202e-315
## RchiOBHmChr2g0102701 2.43108e-280
## RchiOBHmChr1g0362711 1.60638e-255
## RchiOBHmChr7g0182191 1.60638e-255
## RchiOBHmChr4g0445201 4.75291e-225
## ... ...
## RchiOBHmChr2g0128891 0.00995830
## RchiOBHmChr3g0459441 0.00997116
## RchiOBHmChr7g0239451 0.00997174
## RchiOBHmChr7g0220041 0.00997390
## RchiOBHmChr1g0367631 0.00997821
OB2vsFE2 <- as.data.frame(results_Patio_OB2vsFE2_sub)
We specify the gene which had the smallest p value from the results OB vs FE at ST2.
## [1] "RchiOBHmChr6g0278171"
if log2Foldchange > 1 and pvalue < 0.01, set as “Up-Regulated” if log2Foldchange < -1 and pvalue < 0.01, set as “Down-Regulated”
We save the results of the Differential Expressed Genes
write.csv(OB2vsFE2 , file = "OB2vsFE2.csv") #19709
As we want to make several comparison between the varieties and stages we will use Interaction terms:
resultsNames(dds_Patio)
## [1] "Intercept" "variety_OB_vs_FE" "variety_PM_vs_FE"
## [4] "stage_S1_vs_LF" "stage_S2_vs_LF" "stage_S3_vs_LF"
## [7] "stage_S4_vs_LF" "stage_S5_vs_LF" "varietyOB.stageS1"
## [10] "varietyPM.stageS1" "varietyOB.stageS2" "varietyPM.stageS2"
## [13] "varietyOB.stageS3" "varietyPM.stageS3" "varietyOB.stageS4"
## [16] "varietyPM.stageS4" "varietyOB.stageS5" "varietyPM.stageS5"
results_Patio_ST3vsLF <-results(dds_Patio, list( c("stage_S3_vs_LF", "varietyPM.stageS3") ), alpha = 0.01)
summary(results_Patio_ST3vsLF)
##
## out of 42990 with nonzero total read count
## adjusted p-value < 0.01
## LFC > 0 (up) : 9834, 23%
## LFC < 0 (down) : 10693, 25%
## outliers [1] : 18, 0.042%
## low counts [2] : 3334, 7.8%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
results_Patio_ST3vsLF<- results_Patio_ST3vsLF[order(results_Patio_ST3vsLF$padj),] # sort
#Explore results already ordered from lower padj
results_Patio_ST3vsLF #dim 43560
## log2 fold change (MLE): stage_S3_vs_LF+varietyPM.stageS3 effect
## Wald test p-value: stage_S3_vs_LF+varietyPM.stageS3 effect
## DataFrame with 42990 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr2g0100071 14335.16 9.96285 0.205551 48.4690 0.00000e+00
## RchiOBHmChr4g0429931 26780.71 11.85533 0.346859 34.1791 4.94526e-256
## RchiOBHmChr5g0063141 4183.77 -8.00487 0.248502 -32.2125 1.18103e-227
## RchiOBHmChr3g0453451 14060.12 6.18595 0.201389 30.7164 3.43638e-207
## RchiOBHmChr5g0063441 5508.78 -6.33921 0.237703 -26.6686 1.08973e-156
## ... ... ... ... ... ...
## RchiOBHmMTg0499121 0.360404 0.638367 3.00399 0.2125067 0.831712
## RchiOBHmMTg0499171 0.393024 -1.233264 1.94904 -0.6327550 0.526894
## RchiOBHmMTg0499191 0.390878 0.638367 2.02636 0.3150321 0.752737
## RchiOBHmMTg0499201 0.117237 0.638190 9.76789 0.0653355 0.947907
## RchiOBHmMTg0499261 0.378783 -1.244816 1.95401 -0.6370559 0.524088
## padj
## <numeric>
## RchiOBHmChr2g0100071 0.00000e+00
## RchiOBHmChr4g0429931 9.80101e-252
## RchiOBHmChr5g0063141 1.56046e-223
## RchiOBHmChr3g0453451 3.40528e-203
## RchiOBHmChr5g0063441 8.63894e-153
## ... ...
## RchiOBHmMTg0499121 NA
## RchiOBHmMTg0499171 NA
## RchiOBHmMTg0499191 NA
## RchiOBHmMTg0499201 NA
## RchiOBHmMTg0499261 NA
#We subset the results table to these genes and then sort it by the log2 fold change estimate
results_Patio_ST3vsLF_sub<- subset(results_Patio_ST3vsLF, padj < 0.01)
#We subset the results table to these genes and then sort it by the log2 fold change estimate
results_Patio_ST3vsLF_sub #is 20487 dimension with p = 0.01
## log2 fold change (MLE): stage_S3_vs_LF+varietyPM.stageS3 effect
## Wald test p-value: stage_S3_vs_LF+varietyPM.stageS3 effect
## DataFrame with 20527 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr2g0100071 14335.16 9.96285 0.205551 48.4690 0.00000e+00
## RchiOBHmChr4g0429931 26780.71 11.85533 0.346859 34.1791 4.94526e-256
## RchiOBHmChr5g0063141 4183.77 -8.00487 0.248502 -32.2125 1.18103e-227
## RchiOBHmChr3g0453451 14060.12 6.18595 0.201389 30.7164 3.43638e-207
## RchiOBHmChr5g0063441 5508.78 -6.33921 0.237703 -26.6686 1.08973e-156
## ... ... ... ... ... ...
## RchiOBHmChr5g0027041 280.3939 0.543563 0.194258 2.79816 0.00513953
## RchiOBHmChr6g0311371 876.0839 0.532231 0.190218 2.79800 0.00514200
## RchiOBHmChr1g0339971 30.5532 -3.027904 1.082458 -2.79725 0.00515399
## RchiOBHmChr2g0170951 808.9832 -0.439402 0.157119 -2.79663 0.00516393
## RchiOBHmChr2g0119561 88.4966 0.809475 0.289506 2.79605 0.00517306
## padj
## <numeric>
## RchiOBHmChr2g0100071 0.00000e+00
## RchiOBHmChr4g0429931 9.80101e-252
## RchiOBHmChr5g0063141 1.56046e-223
## RchiOBHmChr3g0453451 3.40528e-203
## RchiOBHmChr5g0063441 8.63894e-153
## ... ...
## RchiOBHmChr5g0027041 0.00992646
## RchiOBHmChr6g0311371 0.00993075
## RchiOBHmChr1g0339971 0.00995341
## RchiOBHmChr2g0170951 0.00997212
## RchiOBHmChr2g0119561 0.00998928
PMS3vsPMLF <- as.data.frame(results_Patio_ST3vsLF_sub)
We specify the gene which had the smallest p value from the results ST3 vs LF of PM.
## [1] "RchiOBHmChr2g0100071"
if log2Foldchange > 1 and pvalue < 0.01, set as “Up-Regulated” if log2Foldchange < -1 and pvalue < 0.01, set as “Down-Regulated”
We save the results of the Differential Expressed Genes
write.csv(PMS3vsPMLF, file = "PMS3vsPMLF.csv") #20527
resultsNames(dds_Patio)
## [1] "Intercept" "variety_OB_vs_FE" "variety_PM_vs_FE"
## [4] "stage_S1_vs_LF" "stage_S2_vs_LF" "stage_S3_vs_LF"
## [7] "stage_S4_vs_LF" "stage_S5_vs_LF" "varietyOB.stageS1"
## [10] "varietyPM.stageS1" "varietyOB.stageS2" "varietyPM.stageS2"
## [13] "varietyOB.stageS3" "varietyPM.stageS3" "varietyOB.stageS4"
## [16] "varietyPM.stageS4" "varietyOB.stageS5" "varietyPM.stageS5"
results_Patio_ST2vsLF <-results(dds_Patio, list( c("stage_S2_vs_LF", "varietyPM.stageS2") ), alpha = 0.01)
summary(results_Patio_ST2vsLF)
##
## out of 42990 with nonzero total read count
## adjusted p-value < 0.01
## LFC > 0 (up) : 6610, 15%
## LFC < 0 (down) : 8584, 20%
## outliers [1] : 18, 0.042%
## low counts [2] : 4168, 9.7%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
results_Patio_ST2vsLF<- results_Patio_ST2vsLF[order(results_Patio_ST2vsLF$padj),] # sort
#Explore results already ordered from lower padj
results_Patio_ST2vsLF #dim 43560
## log2 fold change (MLE): stage_S2_vs_LF+varietyPM.stageS2 effect
## Wald test p-value: stage_S2_vs_LF+varietyPM.stageS2 effect
## DataFrame with 42990 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr2g0100071 14335.16 9.56039 0.205552 46.5109 0.00000e+00
## RchiOBHmChr4g0429931 26780.71 12.58403 0.346843 36.2816 3.15391e-288
## RchiOBHmChr5g0063141 4183.77 -7.28005 0.241286 -30.1718 5.54758e-200
## RchiOBHmChr3g0453451 14060.12 5.54521 0.201397 27.5337 6.92803e-167
## RchiOBHmChr7g0186051 696.84 -6.56702 0.241139 -27.2333 2.61913e-163
## ... ... ... ... ... ...
## RchiOBHmMTg0499171 0.393024 -0.407838 1.90298 -0.2143157 0.830301
## RchiOBHmMTg0499191 0.390878 0.839925 2.02636 0.4145005 0.678508
## RchiOBHmMTg0499201 0.117237 0.262715 9.76789 0.0268957 0.978543
## RchiOBHmMTg0499241 0.546183 0.108746 1.50575 0.0722207 0.942426
## RchiOBHmMTg0499261 0.378783 -1.043258 1.95401 -0.5339050 0.593407
## padj
## <numeric>
## RchiOBHmChr2g0100071 0.00000e+00
## RchiOBHmChr4g0429931 6.11921e-284
## RchiOBHmChr5g0063141 7.17560e-196
## RchiOBHmChr3g0453451 6.72088e-163
## RchiOBHmChr7g0186051 2.03265e-159
## ... ...
## RchiOBHmMTg0499171 NA
## RchiOBHmMTg0499191 NA
## RchiOBHmMTg0499201 NA
## RchiOBHmMTg0499241 NA
## RchiOBHmMTg0499261 NA
#We subset the results table to these genes and then sort it by the log2 fold change estimate
results_Patio_ST2vsLF_sub<- subset(results_Patio_ST2vsLF, padj < 0.01)
#We subset the results table to these genes and then sort it by the log2 fold change estimate
results_Patio_ST2vsLF_sub #is 15006 dimension with p = 0.01
## log2 fold change (MLE): stage_S2_vs_LF+varietyPM.stageS2 effect
## Wald test p-value: stage_S2_vs_LF+varietyPM.stageS2 effect
## DataFrame with 15194 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr2g0100071 14335.16 9.56039 0.205552 46.5109 0.00000e+00
## RchiOBHmChr4g0429931 26780.71 12.58403 0.346843 36.2816 3.15391e-288
## RchiOBHmChr5g0063141 4183.77 -7.28005 0.241286 -30.1718 5.54758e-200
## RchiOBHmChr3g0453451 14060.12 5.54521 0.201397 27.5337 6.92803e-167
## RchiOBHmChr7g0186051 696.84 -6.56702 0.241139 -27.2333 2.61913e-163
## ... ... ... ... ... ...
## RchiOBHmChr7g0197161 134.8014 0.405427 0.140500 2.88560 0.00390672
## RchiOBHmChr6g0278781 57.5137 -0.850526 0.294769 -2.88540 0.00390914
## RchiOBHmChr1g0372481 1001.7202 1.487079 0.515408 2.88524 0.00391110
## RchiOBHmChr1g0369221 72.8999 -0.729631 0.252903 -2.88502 0.00391388
## RchiOBHmChr3g0492261 823.6492 -0.328981 0.114033 -2.88498 0.00391443
## padj
## <numeric>
## RchiOBHmChr2g0100071 0.00000e+00
## RchiOBHmChr4g0429931 6.11921e-284
## RchiOBHmChr5g0063141 7.17560e-196
## RchiOBHmChr3g0453451 6.72088e-163
## RchiOBHmChr7g0186051 2.03265e-159
## ... ...
## RchiOBHmChr7g0197161 0.00998001
## RchiOBHmChr6g0278781 0.00998553
## RchiOBHmChr1g0372481 0.00998988
## RchiOBHmChr1g0369221 0.00999633
## RchiOBHmChr3g0492261 0.00999707
PMS2vsPMLF <- as.data.frame(results_Patio_ST2vsLF_sub)
We specify the gene which had the smallest p value from the results ST2 vs LF of PM.
## [1] "RchiOBHmChr2g0100071"
if log2Foldchange > 1 and pvalue < 0.01, set as “Up-Regulated” if log2Foldchange < -1 and pvalue < 0.01, set as “Down-Regulated”
We save the results of the Differential Expressed Genes
write.csv(PMS2vsPMLF, file = "PMS2vsPMLF.csv") #15194
resultsNames(dds_Patio)
## [1] "Intercept" "variety_OB_vs_FE" "variety_PM_vs_FE"
## [4] "stage_S1_vs_LF" "stage_S2_vs_LF" "stage_S3_vs_LF"
## [7] "stage_S4_vs_LF" "stage_S5_vs_LF" "varietyOB.stageS1"
## [10] "varietyPM.stageS1" "varietyOB.stageS2" "varietyPM.stageS2"
## [13] "varietyOB.stageS3" "varietyPM.stageS3" "varietyOB.stageS4"
## [16] "varietyPM.stageS4" "varietyOB.stageS5" "varietyPM.stageS5"
results_Patio_PM3vsFE3 <-results(dds_Patio, list( c("variety_PM_vs_FE", "varietyPM.stageS3") ), alpha = 0.01)
summary(results_Patio_PM3vsFE3)
##
## out of 42990 with nonzero total read count
## adjusted p-value < 0.01
## LFC > 0 (up) : 8241, 19%
## LFC < 0 (down) : 8862, 21%
## outliers [1] : 18, 0.042%
## low counts [2] : 4168, 9.7%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
results_Patio_PM3vsFE3<- results_Patio_PM3vsFE3[order(results_Patio_PM3vsFE3$padj),] # sort
head(results_Patio_OB3vsFE3)
## log2 fold change (MLE): variety_OB_vs_FE+varietyOB.stageS3 effect
## Wald test p-value: variety_OB_vs_FE+varietyOB.stageS3 effect
## DataFrame with 6 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr1g0362711 737.532 4.97158 0.112400 44.2312 0.00000e+00
## RchiOBHmChr7g0182191 1545.456 7.67894 0.201162 38.1729 7.92284e-319
## RchiOBHmChr2g0102701 294.488 4.57999 0.124119 36.9001 4.60678e-298
## RchiOBHmChr6g0278171 1695.884 -7.51749 0.220620 -34.0743 1.77070e-254
## RchiOBHmChr2g0167291 5869.416 12.97605 0.412919 31.4251 9.17997e-217
## RchiOBHmChr2g0137141 371.747 5.89051 0.191463 30.7658 7.51063e-208
## padj
## <numeric>
## RchiOBHmChr1g0362711 0.00000e+00
## RchiOBHmChr7g0182191 1.57023e-314
## RchiOBHmChr2g0102701 6.08678e-294
## RchiOBHmChr6g0278171 1.75468e-250
## RchiOBHmChr2g0167291 7.27751e-213
## RchiOBHmChr2g0137141 4.96177e-204
#Explore results already ordered from lower padj
results_Patio_PM3vsFE3 #dim 43560
## log2 fold change (MLE): variety_PM_vs_FE+varietyPM.stageS3 effect
## Wald test p-value: variety_PM_vs_FE+varietyPM.stageS3 effect
## DataFrame with 42990 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr7g0214271 707.781 -4.80020 0.163877 -29.2916 1.32839e-188
## RchiOBHmChr5g0079591 631.286 4.20084 0.143628 29.2481 4.74340e-188
## RchiOBHmChr6g0252011 276.520 -3.56966 0.135183 -26.4061 1.16559e-153
## RchiOBHmChr6g0273951 209.977 4.03306 0.157219 25.6525 3.96422e-145
## RchiOBHmChr4g0392991 889.854 -3.98633 0.158075 -25.2180 2.54257e-140
## ... ... ... ... ... ...
## RchiOBHmMTg0499171 0.393024 0.462892 1.83604 0.2521149 0.800952
## RchiOBHmMTg0499191 0.390878 -0.681888 1.75157 -0.3893014 0.697053
## RchiOBHmMTg0499201 0.117237 0.462874 8.46072 0.0547086 0.956371
## RchiOBHmMTg0499241 0.546183 1.615994 1.42965 1.1303447 0.258331
## RchiOBHmMTg0499261 0.378783 -0.702718 1.76728 -0.3976264 0.690906
## padj
## <numeric>
## RchiOBHmChr7g0214271 5.15467e-184
## RchiOBHmChr5g0079591 9.20315e-184
## RchiOBHmChr6g0252011 1.50766e-149
## RchiOBHmChr6g0273951 3.84569e-141
## RchiOBHmChr4g0392991 1.97324e-136
## ... ...
## RchiOBHmMTg0499171 NA
## RchiOBHmMTg0499191 NA
## RchiOBHmMTg0499201 NA
## RchiOBHmMTg0499241 NA
## RchiOBHmMTg0499261 NA
#We subset the results table to these genes and then sort it by the log2 fold change estimate
results_Patio_PM3vsFE3_sub<- subset(results_Patio_PM3vsFE3, padj < 0.01)
#We subset the results table to these genes and then sort it by the log2 fold change estimate
results_Patio_PM3vsFE3_sub #is 16888 dimension with p = 0.01
## log2 fold change (MLE): variety_PM_vs_FE+varietyPM.stageS3 effect
## Wald test p-value: variety_PM_vs_FE+varietyPM.stageS3 effect
## DataFrame with 17103 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr7g0214271 707.781 -4.80020 0.163877 -29.2916 1.32839e-188
## RchiOBHmChr5g0079591 631.286 4.20084 0.143628 29.2481 4.74340e-188
## RchiOBHmChr6g0252011 276.520 -3.56966 0.135183 -26.4061 1.16559e-153
## RchiOBHmChr6g0273951 209.977 4.03306 0.157219 25.6525 3.96422e-145
## RchiOBHmChr4g0392991 889.854 -3.98633 0.158075 -25.2180 2.54257e-140
## ... ... ... ... ... ...
## RchiOBHmChr5g0009371 22.07412 -2.061760 0.723929 -2.84802 0.00439928
## RchiOBHmChr6g0246731 57.03372 0.511944 0.179762 2.84790 0.00440086
## RchiOBHmChr2g0156961 3.23183 1.345432 0.472436 2.84786 0.00440145
## RchiOBHmChr2g0121801 12.48153 1.808211 0.634984 2.84765 0.00440438
## RchiOBHmChr2g0175941 128.10124 0.534450 0.187693 2.84747 0.00440681
## padj
## <numeric>
## RchiOBHmChr7g0214271 5.15467e-184
## RchiOBHmChr5g0079591 9.20315e-184
## RchiOBHmChr6g0252011 1.50766e-149
## RchiOBHmChr6g0273951 3.84569e-141
## RchiOBHmChr4g0392991 1.97324e-136
## ... ...
## RchiOBHmChr5g0009371 0.00998360
## RchiOBHmChr6g0246731 0.00998661
## RchiOBHmChr2g0156961 0.00998736
## RchiOBHmChr2g0121801 0.00999341
## RchiOBHmChr2g0175941 0.00999836
PM3vsFE3 <- as.data.frame(results_Patio_PM3vsFE3_sub)
We specify the gene which had the smallest p value from the results PM vs FE at ST3.
## [1] "RchiOBHmChr7g0214271"
if log2Foldchange > 1 and pvalue < 0.01, set as “Up-Regulated” if log2Foldchange < -1 and pvalue < 0.01, set as “Down-Regulated”
We save the results of the Differential Expressed Genes
write.csv(PM3vsFE3 , file = "PM3vsFE3.csv") #17103
resultsNames(dds_Patio)
## [1] "Intercept" "variety_OB_vs_FE" "variety_PM_vs_FE"
## [4] "stage_S1_vs_LF" "stage_S2_vs_LF" "stage_S3_vs_LF"
## [7] "stage_S4_vs_LF" "stage_S5_vs_LF" "varietyOB.stageS1"
## [10] "varietyPM.stageS1" "varietyOB.stageS2" "varietyPM.stageS2"
## [13] "varietyOB.stageS3" "varietyPM.stageS3" "varietyOB.stageS4"
## [16] "varietyPM.stageS4" "varietyOB.stageS5" "varietyPM.stageS5"
results_Patio_PM2vsFE2 <-results(dds_Patio, list( c("variety_PM_vs_FE", "varietyPM.stageS2") ), alpha = 0.01)
summary(results_Patio_PM2vsFE2)
##
## out of 42990 with nonzero total read count
## adjusted p-value < 0.01
## LFC > 0 (up) : 5064, 12%
## LFC < 0 (down) : 6189, 14%
## outliers [1] : 18, 0.042%
## low counts [2] : 3334, 7.8%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
results_Patio_PM2vsFE2<- results_Patio_PM2vsFE2[order(results_Patio_PM2vsFE2$padj),] # sort
#Explore results already ordered from lower padj
results_Patio_PM2vsFE2 #dim 43560
## log2 fold change (MLE): variety_PM_vs_FE+varietyPM.stageS2 effect
## Wald test p-value: variety_PM_vs_FE+varietyPM.stageS2 effect
## DataFrame with 42990 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr5g0079591 631.286 5.02831 0.153084 32.8466 1.27181e-236
## RchiOBHmChr7g0214271 707.781 -4.85608 0.152011 -31.9456 6.21530e-224
## RchiOBHmChr6g0252011 276.520 -4.14150 0.138283 -29.9495 4.46594e-197
## RchiOBHmChr4g0394391 609.182 4.43317 0.161751 27.4074 2.23603e-165
## RchiOBHmChr4g0392991 889.854 -4.17416 0.155899 -26.7748 6.34731e-158
## ... ... ... ... ... ...
## RchiOBHmMTg0499121 0.360404 0.405362 2.60157 0.1558141 0.876180
## RchiOBHmMTg0499171 0.393024 0.410531 1.77431 0.2313754 0.817023
## RchiOBHmMTg0499191 0.390878 -0.171727 1.75489 -0.0978561 0.922047
## RchiOBHmMTg0499201 0.117237 -0.748676 8.46072 -0.0884885 0.929488
## RchiOBHmMTg0499261 0.378783 -0.171724 1.77124 -0.0969511 0.922765
## padj
## <numeric>
## RchiOBHmChr5g0079591 5.04122e-232
## RchiOBHmChr7g0214271 1.23181e-219
## RchiOBHmChr6g0252011 5.90070e-193
## RchiOBHmChr4g0394391 2.21580e-161
## RchiOBHmChr4g0392991 5.03189e-154
## ... ...
## RchiOBHmMTg0499121 NA
## RchiOBHmMTg0499171 NA
## RchiOBHmMTg0499191 NA
## RchiOBHmMTg0499201 NA
## RchiOBHmMTg0499261 NA
#We subset the results table to these genes and then sort it by the log2 fold change estimate
results_Patio_PM2vsFE2_sub<- subset(results_Patio_PM2vsFE2, padj < 0.01)
#We subset the results table to these genes and then sort it by the log2 fold change estimate
results_Patio_PM2vsFE2_sub #is 11043 dimension with p = 0.01
## log2 fold change (MLE): variety_PM_vs_FE+varietyPM.stageS2 effect
## Wald test p-value: variety_PM_vs_FE+varietyPM.stageS2 effect
## DataFrame with 11253 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr5g0079591 631.286 5.02831 0.153084 32.8466 1.27181e-236
## RchiOBHmChr7g0214271 707.781 -4.85608 0.152011 -31.9456 6.21530e-224
## RchiOBHmChr6g0252011 276.520 -4.14150 0.138283 -29.9495 4.46594e-197
## RchiOBHmChr4g0394391 609.182 4.43317 0.161751 27.4074 2.23603e-165
## RchiOBHmChr4g0392991 889.854 -4.17416 0.155899 -26.7748 6.34731e-158
## ... ... ... ... ... ...
## RchiOBHmChr1g0370151 7.66056 -1.70618 0.571453 -2.98568 0.00282945
## RchiOBHmChr7g0192871 2.04820 3.37719 1.131174 2.98556 0.00283058
## RchiOBHmChr4g0387401 14.21931 -1.25674 0.420989 -2.98522 0.00283375
## RchiOBHmChr1g0359511 22.78329 -1.77025 0.593024 -2.98512 0.00283463
## RchiOBHmChr7g0223251 4.40903 2.93312 0.982713 2.98472 0.00283841
## padj
## <numeric>
## RchiOBHmChr5g0079591 5.04122e-232
## RchiOBHmChr7g0214271 1.23181e-219
## RchiOBHmChr6g0252011 5.90070e-193
## RchiOBHmChr4g0394391 2.21580e-161
## RchiOBHmChr4g0392991 5.03189e-154
## ... ...
## RchiOBHmChr1g0370151 0.00997009
## RchiOBHmChr7g0192871 0.00997320
## RchiOBHmChr4g0387401 0.00998347
## RchiOBHmChr1g0359511 0.00998571
## RchiOBHmChr7g0223251 0.00999812
PM2vsFE2 <- as.data.frame(results_Patio_PM2vsFE2_sub)
We specify the gene which had the smallest p value from the results PM vs FE at ST2.
## [1] "RchiOBHmChr5g0079591"
if log2Foldchange > 1 and pvalue < 0.01, set as “Up-Regulated” if log2Foldchange < -1 and pvalue < 0.01, set as “Down-Regulated”
We save the results of the Differential Expressed Genes
write.csv(PM2vsFE2 , file = "PM2vsFE2.csv") #11253
For this comparisons we need to change the reference level from LF to S1
dds_Patio$stage<-relevel(dds_Patio$stage, ref = "S1")
dds_Patio$stage
## [1] LF LF LF S1 S1 S1 S1 S1 S2 S2 S2 S2 S2 S3 S3 S3 S3 S3 S4 S4 S4 S4 S4 S5 S5
## [26] S5 S5 S5 LF LF LF S1 S1 S1 S1 S1 S2 S2 S2 S2 S2 S3 S3 S3 S3 S3 S4 S4 S4 S4
## [51] S4 S5 S5 S5 S5 S5 LF LF LF S1 S1 S1 S1 S1 S2 S2 S2 S2 S2 S3 S3 S3 S3 S3 S4
## [76] S4 S4 S4 S4 S5 S5 S5 S5 S5
## Levels: S1 LF S2 S3 S4 S5
dds_Patio_S1 <-DESeq(dds_Patio)
save(dds_Patio_S1,file = "data_Renviron/dds_Patio_S1.RData")
load(file = "data_Renviron/dds_Patio_S1.RData")
As we want to make several comparison between the varieties and stages we will use Interaction terms:
resultsNames(dds_Patio_S1)
## [1] "Intercept" "variety_OB_vs_FE" "variety_PM_vs_FE"
## [4] "stage_LF_vs_S1" "stage_S2_vs_S1" "stage_S3_vs_S1"
## [7] "stage_S4_vs_S1" "stage_S5_vs_S1" "varietyOB.stageLF"
## [10] "varietyPM.stageLF" "varietyOB.stageS2" "varietyPM.stageS2"
## [13] "varietyOB.stageS3" "varietyPM.stageS3" "varietyOB.stageS4"
## [16] "varietyPM.stageS4" "varietyOB.stageS5" "varietyPM.stageS5"
results_Patio_ST3vsST1 <-results(dds_Patio_S1, list( c("stage_S3_vs_S1", "varietyOB.stageS3") ), alpha = 0.01)
summary(results_Patio_ST3vsST1)
##
## out of 42990 with nonzero total read count
## adjusted p-value < 0.01
## LFC > 0 (up) : 10318, 24%
## LFC < 0 (down) : 11832, 28%
## outliers [1] : 18, 0.042%
## low counts [2] : 3334, 7.8%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
results_Patio_ST3vsST1<- results_Patio_ST3vsST1[order(results_Patio_ST3vsST1$padj),] # sort
#Explore results already ordered from lower padj
results_Patio_ST3vsST1 #dim 42990
## log2 fold change (MLE): stage_S3_vs_S1+varietyOB.stageS3 effect
## Wald test p-value: stage_S3_vs_S1+varietyOB.stageS3 effect
## DataFrame with 42990 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr1g0332511 28544.72 12.91066 0.373408 34.5752 5.95183e-262
## RchiOBHmChr6g0245301 1702.81 3.84820 0.132476 29.0484 1.61427e-185
## RchiOBHmChr2g0172331 18397.06 14.95889 0.532191 28.1081 7.79802e-174
## RchiOBHmChr7g0241141 81251.61 11.25842 0.409198 27.5134 1.21414e-166
## RchiOBHmChr5g0010861 4913.05 8.03446 0.294796 27.2543 1.47795e-163
## ... ... ... ... ... ...
## RchiOBHmMTg0499121 0.360404 0.523459 2.60156 0.2012099 0.840534
## RchiOBHmMTg0499171 0.393024 0.571368 1.76699 0.3233571 0.746425
## RchiOBHmMTg0499191 0.390878 0.523471 1.75488 0.2982935 0.765479
## RchiOBHmMTg0499201 0.117237 0.523307 8.46026 0.0618547 0.950679
## RchiOBHmMTg0499261 0.378783 1.100530 1.77124 0.6213342 0.534380
## padj
## <numeric>
## RchiOBHmChr1g0332511 2.35918e-257
## RchiOBHmChr6g0245301 3.19933e-181
## RchiOBHmChr2g0172331 1.03033e-169
## RchiOBHmChr7g0241141 1.20315e-162
## RchiOBHmChr5g0010861 1.17166e-159
## ... ...
## RchiOBHmMTg0499121 NA
## RchiOBHmMTg0499171 NA
## RchiOBHmMTg0499191 NA
## RchiOBHmMTg0499201 NA
## RchiOBHmMTg0499261 NA
#We subset the results table to these genes and then sort it by the log2 fold change estimate
results_Patio_ST3vsST1_sub<- subset(results_Patio_ST3vsST1, padj < 0.01)
#We subset the results table to these genes and then sort it by the log2 fold change estimate
results_Patio_ST3vsST1_sub #is 22150 dimension with p = 0.01
## log2 fold change (MLE): stage_S3_vs_S1+varietyOB.stageS3 effect
## Wald test p-value: stage_S3_vs_S1+varietyOB.stageS3 effect
## DataFrame with 22150 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr1g0332511 28544.72 12.91066 0.373408 34.5752 5.95183e-262
## RchiOBHmChr6g0245301 1702.81 3.84820 0.132476 29.0484 1.61427e-185
## RchiOBHmChr2g0172331 18397.06 14.95889 0.532191 28.1081 7.79802e-174
## RchiOBHmChr7g0241141 81251.61 11.25842 0.409198 27.5134 1.21414e-166
## RchiOBHmChr5g0010861 4913.05 8.03446 0.294796 27.2543 1.47795e-163
## ... ... ... ... ... ...
## RchiOBHmChr3g0475171 1479.54335 -1.13170 0.408189 -2.77249 0.00556287
## RchiOBHmChr2g0167811 1.78613 -2.88441 1.040412 -2.77237 0.00556502
## RchiOBHmChr6g0262301 3.94990 -1.48871 0.537116 -2.77167 0.00557699
## RchiOBHmChr1g0384271 4.35858 -5.58079 2.013596 -2.77155 0.00557894
## RchiOBHmChr6g0264431 20.36662 1.05733 0.381530 2.77129 0.00558343
## padj
## <numeric>
## RchiOBHmChr1g0332511 2.35918e-257
## RchiOBHmChr6g0245301 3.19933e-181
## RchiOBHmChr2g0172331 1.03033e-169
## RchiOBHmChr7g0241141 1.20315e-162
## RchiOBHmChr5g0010861 1.17166e-159
## ... ...
## RchiOBHmChr3g0475171 0.00995669
## RchiOBHmChr2g0167811 0.00996010
## RchiOBHmChr6g0262301 0.00998107
## RchiOBHmChr1g0384271 0.00998411
## RchiOBHmChr6g0264431 0.00999170
OBS3vsOBST1_patio <- as.data.frame(results_Patio_ST3vsST1_sub)
We specify the gene which had the smallest p value from the results ST3 vs S1 of OB.
## [1] "RchiOBHmChr1g0332511"
if log2Foldchange > 1 and pvalue < 0.01, set as “Up-Regulated” if log2Foldchange < -1 and pvalue < 0.01, set as “Down-Regulated”
We save the results of the Differential Expressed Genes
write.csv(OBS3vsOBST1_patio, file = "OBS3vsOBST1_patio.csv") #22150
resultsNames(dds_Patio_S1)
## [1] "Intercept" "variety_OB_vs_FE" "variety_PM_vs_FE"
## [4] "stage_LF_vs_S1" "stage_S2_vs_S1" "stage_S3_vs_S1"
## [7] "stage_S4_vs_S1" "stage_S5_vs_S1" "varietyOB.stageLF"
## [10] "varietyPM.stageLF" "varietyOB.stageS2" "varietyPM.stageS2"
## [13] "varietyOB.stageS3" "varietyPM.stageS3" "varietyOB.stageS4"
## [16] "varietyPM.stageS4" "varietyOB.stageS5" "varietyPM.stageS5"
results_Patio_ST2vsST1 <-results(dds_Patio_S1, list( c("stage_S2_vs_S1", "varietyOB.stageS2") ), alpha = 0.01)
summary(results_Patio_ST2vsST1)
##
## out of 42990 with nonzero total read count
## adjusted p-value < 0.01
## LFC > 0 (up) : 7187, 17%
## LFC < 0 (down) : 8387, 20%
## outliers [1] : 18, 0.042%
## low counts [2] : 4168, 9.7%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
results_Patio_ST2vsST1<- results_Patio_ST2vsST1[order(results_Patio_ST2vsST1$padj),] # sort
#Explore results already ordered from lower padj
results_Patio_ST2vsST1 #dim 42990
## log2 fold change (MLE): stage_S2_vs_S1+varietyOB.stageS2 effect
## Wald test p-value: stage_S2_vs_S1+varietyOB.stageS2 effect
## DataFrame with 42990 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr1g0332511 28544.72 11.60229 0.373411 31.0711 5.92240e-212
## RchiOBHmChr2g0167291 5869.42 14.81809 0.582102 25.4562 6.03333e-143
## RchiOBHmChr2g0172331 18397.06 13.38596 0.532206 25.1518 1.35003e-139
## RchiOBHmChr1g0321181 15420.34 11.39933 0.456023 24.9973 6.54259e-138
## RchiOBHmChr6g0245301 1702.81 3.14846 0.132686 23.7286 1.82648e-124
## ... ... ... ... ... ...
## RchiOBHmMTg0499171 0.393024 1.134483 1.68614 0.672828 0.501057
## RchiOBHmMTg0499191 0.390878 1.519250 1.74889 0.868693 0.385015
## RchiOBHmMTg0499201 0.117237 0.925225 8.46026 0.109361 0.912916
## RchiOBHmMTg0499241 0.546183 0.359932 1.33794 0.269020 0.787914
## RchiOBHmMTg0499261 0.378783 0.348277 1.77124 0.196629 0.844118
## padj
## <numeric>
## RchiOBHmChr1g0332511 2.29813e-207
## RchiOBHmChr2g0167291 1.17059e-138
## RchiOBHmChr2g0172331 1.74622e-135
## RchiOBHmChr1g0321181 6.34697e-134
## RchiOBHmChr6g0245301 1.41750e-120
## ... ...
## RchiOBHmMTg0499171 NA
## RchiOBHmMTg0499191 NA
## RchiOBHmMTg0499201 NA
## RchiOBHmMTg0499241 NA
## RchiOBHmMTg0499261 NA
#We subset the results table to these genes and then sort it by the log2 fold change estimate
results_Patio_ST2vsST1_sub<- subset(results_Patio_ST2vsST1, padj < 0.01)
#We subset the results table to these genes and then sort it by the log2 fold change estimate
results_Patio_ST2vsST1_sub #is 15574 dimension with p = 0.01
## log2 fold change (MLE): stage_S2_vs_S1+varietyOB.stageS2 effect
## Wald test p-value: stage_S2_vs_S1+varietyOB.stageS2 effect
## DataFrame with 15574 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr1g0332511 28544.72 11.60229 0.373411 31.0711 5.92240e-212
## RchiOBHmChr2g0167291 5869.42 14.81809 0.582102 25.4562 6.03333e-143
## RchiOBHmChr2g0172331 18397.06 13.38596 0.532206 25.1518 1.35003e-139
## RchiOBHmChr1g0321181 15420.34 11.39933 0.456023 24.9973 6.54259e-138
## RchiOBHmChr6g0245301 1702.81 3.14846 0.132686 23.7286 1.82648e-124
## ... ... ... ... ... ...
## RchiOBHmChr5g0077971 175.98138 4.020661 1.396929 2.87821 0.00399933
## RchiOBHmChr6g0279391 1.38580 -3.378523 1.173880 -2.87808 0.00400100
## RchiOBHmChr4g0414901 12.03440 0.824864 0.286684 2.87726 0.00401148
## RchiOBHmChr6g0257001 3.31705 -3.066117 1.065644 -2.87724 0.00401165
## RchiOBHmChr1g0354841 41.78249 1.574983 0.547410 2.87715 0.00401281
## padj
## <numeric>
## RchiOBHmChr1g0332511 2.29813e-207
## RchiOBHmChr2g0167291 1.17059e-138
## RchiOBHmChr2g0172331 1.74622e-135
## RchiOBHmChr1g0321181 6.34697e-134
## RchiOBHmChr6g0245301 1.41750e-120
## ... ...
## RchiOBHmChr5g0077971 0.00996725
## RchiOBHmChr6g0279391 0.00997077
## RchiOBHmChr4g0414901 0.00999602
## RchiOBHmChr6g0257001 0.00999602
## RchiOBHmChr1g0354841 0.00999828
OBS2vsOBST1_patio <- as.data.frame(results_Patio_ST2vsST1_sub)
We specify the gene which had the smallest p value from the results ST2 vs ST1 of OB.
## [1] "RchiOBHmChr1g0332511"
if log2Foldchange > 1 and pvalue < 0.01, set as “Up-Regulated” if log2Foldchange < -1 and pvalue < 0.01, set as “Down-Regulated”
We save the results of the Differential Expressed Genes
write.csv(OBS2vsOBST1_patio, file = "OBS2vsOBST1_patio.csv") #15574
resultsNames(dds_Patio_S1)
## [1] "Intercept" "variety_OB_vs_FE" "variety_PM_vs_FE"
## [4] "stage_LF_vs_S1" "stage_S2_vs_S1" "stage_S3_vs_S1"
## [7] "stage_S4_vs_S1" "stage_S5_vs_S1" "varietyOB.stageLF"
## [10] "varietyPM.stageLF" "varietyOB.stageS2" "varietyPM.stageS2"
## [13] "varietyOB.stageS3" "varietyPM.stageS3" "varietyOB.stageS4"
## [16] "varietyPM.stageS4" "varietyOB.stageS5" "varietyPM.stageS5"
results_Patio_ST3vsST1 <-results(dds_Patio_S1, list( c("stage_S3_vs_S1", "varietyPM.stageS3") ), alpha = 0.01)
summary(results_Patio_ST3vsST1)
##
## out of 42990 with nonzero total read count
## adjusted p-value < 0.01
## LFC > 0 (up) : 10623, 25%
## LFC < 0 (down) : 10864, 25%
## outliers [1] : 18, 0.042%
## low counts [2] : 2501, 5.8%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
results_Patio_ST3vsST1<- results_Patio_ST3vsST1[order(results_Patio_ST3vsST1$padj),] # sort
#Explore results already ordered from lower padj
results_Patio_ST3vsST1 #dim 42990
## log2 fold change (MLE): stage_S3_vs_S1+varietyPM.stageS3 effect
## Wald test p-value: stage_S3_vs_S1+varietyPM.stageS3 effect
## DataFrame with 42990 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr5g0063141 4183.77 -7.59856 0.219026 -34.6926 1.02017e-263
## RchiOBHmChr5g0007561 9543.51 10.71416 0.310936 34.4578 3.44456e-260
## RchiOBHmChr2g0172331 18397.06 13.57994 0.433791 31.3053 3.95435e-215
## RchiOBHmChr5g0010861 4913.05 10.01319 0.328123 30.5166 1.57083e-204
## RchiOBHmChr5g0051201 1010.07 -4.29997 0.141699 -30.3458 2.85009e-202
## ... ... ... ... ... ...
## RchiOBHmMTg0499011 0.157409 -1.091765 8.44680 -0.1292519 0.897158
## RchiOBHmMTg0499051 0.114853 0.843163 8.46026 0.0996615 0.920613
## RchiOBHmMTg0499061 0.170776 0.843085 6.47338 0.1302386 0.896378
## RchiOBHmMTg0499121 0.360404 -0.877613 2.57256 -0.3411434 0.732996
## RchiOBHmMTg0499201 0.117237 0.266066 8.46026 0.0314489 0.974912
## padj
## <numeric>
## RchiOBHmChr5g0063141 4.12875e-259
## RchiOBHmChr5g0007561 6.97025e-256
## RchiOBHmChr2g0172331 5.33455e-211
## RchiOBHmChr5g0010861 1.58933e-200
## RchiOBHmChr5g0051201 2.30692e-198
## ... ...
## RchiOBHmMTg0499011 NA
## RchiOBHmMTg0499051 NA
## RchiOBHmMTg0499061 NA
## RchiOBHmMTg0499121 NA
## RchiOBHmMTg0499201 NA
#We subset the results table to these genes and then sort it by the log2 fold change estimate
results_Patio_ST3vsST1_sub<- subset(results_Patio_ST3vsST1, padj < 0.01)
#We subset the results table to these genes and then sort it by the log2 fold change estimate
results_Patio_ST3vsST1_sub #is 21487 dimension with p = 0.01
## log2 fold change (MLE): stage_S3_vs_S1+varietyPM.stageS3 effect
## Wald test p-value: stage_S3_vs_S1+varietyPM.stageS3 effect
## DataFrame with 21487 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## RchiOBHmChr5g0063141 4183.77 -7.59856 0.219026 -34.6926 1.02017e-263
## RchiOBHmChr5g0007561 9543.51 10.71416 0.310936 34.4578 3.44456e-260
## RchiOBHmChr2g0172331 18397.06 13.57994 0.433791 31.3053 3.95435e-215
## RchiOBHmChr5g0010861 4913.05 10.01319 0.328123 30.5166 1.57083e-204
## RchiOBHmChr5g0051201 1010.07 -4.29997 0.141699 -30.3458 2.85009e-202
## ... ... ... ... ... ...
## RchiOBHmChr2g0091791 1.41641 3.860544 1.384493 2.78842 0.00529663
## RchiOBHmChr1g0319931 4.55465 1.745510 0.626016 2.78828 0.00529880
## RchiOBHmChr6g0257441 14.90724 -2.240743 0.803679 -2.78811 0.00530172
## RchiOBHmChr2g0124651 875.70678 -1.742807 0.625126 -2.78793 0.00530462
## RchiOBHmChr4g0395581 139.73988 0.998375 0.358142 2.78765 0.00530917
## padj
## <numeric>
## RchiOBHmChr5g0063141 4.12875e-259
## RchiOBHmChr5g0007561 6.97025e-256
## RchiOBHmChr2g0172331 5.33455e-211
## RchiOBHmChr5g0010861 1.58933e-200
## RchiOBHmChr5g0051201 2.30692e-198
## ... ...
## RchiOBHmChr2g0091791 0.00997812
## RchiOBHmChr1g0319931 0.00998174
## RchiOBHmChr6g0257441 0.00998678
## RchiOBHmChr2g0124651 0.00999177
## RchiOBHmChr4g0395581 0.00999989
PMS3vsPMST1_patio <- as.data.frame(results_Patio_ST3vsST1_sub)
We specify the gene which had the smallest p value from the results ST3 vs S1 of PM.
## [1] "RchiOBHmChr5g0063141"
if log2Foldchange > 1 and pvalue < 0.01, set as “Up-Regulated” if log2Foldchange < -1 and pvalue < 0.01, set as “Down-Regulated”
We save the results of the Differential Expressed Genes
write.csv(PMS3vsPMST1_patio, file = "PMS3vsPMST1_patio.csv") #21487
#Venn Diagramms To be able to determinate the genes that all the different contrast have in common, Venn diagramms are used for visualizing the overlap.
We have a final list of 98 genes that are shared between all the
different contrast.
If we take a look of the 98 genes that ovelap the different contrast. NUDX1.1a genes that are present in overlap genes are RchiOBHm_Chr2g0142111,RchiOBHm_Chr2g0142071,RchiOBHm_Chr2g0142081.
From the 9 genes: 9 genes have Transcription annotation or GO annotation related (6 annotated in V2 and 2 have a transcription related description in the GO annotation) 6 genes were predicted as TF with DeepTFactor bioinformatic tool 5 genes are annotated in TF database
There was no category found of TF in this contrast.
We have a final list of 202 genes that are shared between all the
different contrast.
From the 202 genes: 39 genes have Transcription annotation or GO annotation related (36 TF in the V2 annotation) 29 genes were predicted as TF with DeepTFactor bioinformatic tool
There was no category found of TF in this contrast.
Petals at 6 different developmental stages of Old Blush
Petals at 5 different developmental stages and leaf of Old Blush,
Fetera, PapaMeilland and RougeMeilland
#Correlation of Up-Regulated Genes
For this analysis we used the Phytotron DataSet, normalized vst
counts. We calculate their correlation using the
cor(data, method = "pearson") command. We also calculated
How many correlations are > Pearson 0.5? Being False lower than
0.5.
##
## FALSE TRUE
## 2584 7020
In the correlation plot we can see the genes that are more correlated
in green and the less correlated in grey.
For this analysis we used the Phytotron DataSet. We calculate their
correlation using the cor(data, method = "pearson")
command. We also calculated How many correlations are > Pearson 0.5?
Being False lower than 0.5.
##
## FALSE TRUE
## 42 183
For this analysis we used the Phytotron DataSet, normalized vst
counts. We calculate their correlation using the
cor(data, method = "pearson") command. We also calculated
How many correlations are > Pearson 0.5? Being False lower than
0.5.
##
## FALSE TRUE
## 6752 34864
For this analysis we used the Phytotron DataSet. We calculate their
correlation using the cor(data, method = "pearson")
command. We also calculated How many correlations are > Pearson 0.5?
Being False lower than 0.5.
##
## FALSE TRUE
## 656 1108
HOMER (Hypergeometric Optimization of Motif EnRichment) is a suite of tools for Motif Discovery. HOMER contains a novel motif discovery algorithm that was designed for regulatory element analysis in genomics applications.
De novo motif discovery allows you to directly query the sequence to discover which motifs are the MOST enriched sequences in your target set. Known motif discovery will simply tell you which of the known motifs is most enriched in your target set.
Results of de Novo and Known Motifs on promoter sequences of NUDX1.1a genes
The promoter sequence was extracted from the Genome V2 Rosa chinensis Old Blush available in Ensemble Plants. It was checked manually that the 1500pb upstream the TSS contained the box38 and the 138bp.
sessionInfo()
## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 10 x64 (build 19045)
##
## Matrix products: default
## LAPACK version 3.12.1
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## time zone: Europe/Paris
## tzcode source: internal
##
## attached base packages:
## [1] grid stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] igraph_2.2.1 corrplot_0.95
## [3] visNetwork_2.1.4 VennDiagram_1.7.3
## [5] futile.logger_1.4.3 cowplot_1.2.0
## [7] biomaRt_2.64.0 clusterProfiler_4.16.0
## [9] enrichplot_1.28.4 lubridate_1.9.4
## [11] forcats_1.0.1 stringr_1.5.2
## [13] purrr_1.1.0 readr_2.1.5
## [15] tidyr_1.3.1 tibble_3.3.0
## [17] tidyverse_2.0.0 apeglm_1.30.0
## [19] ggrepel_0.9.6 dplyr_1.1.4
## [21] RColorBrewer_1.1-3 pheatmap_1.0.13
## [23] BiocParallel_1.42.2 ggplot2_4.0.0
## [25] ashr_2.2-63 DESeq2_1.48.2
## [27] SummarizedExperiment_1.38.1 Biobase_2.68.0
## [29] MatrixGenerics_1.20.0 matrixStats_1.5.0
## [31] GenomicRanges_1.60.0 GenomeInfoDb_1.44.3
## [33] IRanges_2.42.0 S4Vectors_0.46.0
## [35] BiocGenerics_0.54.1 generics_0.1.4
##
## loaded via a namespace (and not attached):
## [1] rstudioapi_0.17.1 jsonlite_2.0.0 magrittr_2.0.4
## [4] ggtangle_0.1.1 farver_2.1.2 rmarkdown_2.30
## [7] ragg_1.5.0 fs_1.6.6 vctrs_0.6.5
## [10] memoise_2.0.1 ggtree_3.16.3 SQUAREM_2021.1
## [13] mixsqp_0.3-54 progress_1.2.3 htmltools_0.5.8.1
## [16] S4Arrays_1.8.1 lambda.r_1.2.4 curl_7.0.0
## [19] truncnorm_1.0-9 SparseArray_1.8.1 gridGraphics_0.5-1
## [22] sass_0.4.10 bslib_0.9.0 htmlwidgets_1.6.4
## [25] httr2_1.2.1 plyr_1.8.9 futile.options_1.0.1
## [28] cachem_1.1.0 lifecycle_1.0.4 pkgconfig_2.0.3
## [31] gson_0.1.0 Matrix_1.7-4 R6_2.6.1
## [34] fastmap_1.2.0 GenomeInfoDbData_1.2.14 digest_0.6.37
## [37] numDeriv_2016.8-1.1 aplot_0.2.9 colorspace_2.1-2
## [40] patchwork_1.3.2 AnnotationDbi_1.70.0 irlba_2.3.5.1
## [43] textshaping_1.0.4 crosstalk_1.2.2 RSQLite_2.4.3
## [46] filelock_1.0.3 invgamma_1.2 labeling_0.4.3
## [49] timechange_0.3.0 httr_1.4.7 abind_1.4-8
## [52] compiler_4.5.1 bit64_4.6.0-1 withr_3.0.2
## [55] S7_0.2.0 DBI_1.2.3 R.utils_2.13.0
## [58] MASS_7.3-65 rappdirs_0.3.3 DelayedArray_0.34.1
## [61] tools_4.5.1 ape_5.8-1 R.oo_1.27.1
## [64] glue_1.8.0 nlme_3.1-168 GOSemSim_2.34.0
## [67] reshape2_1.4.4 fgsea_1.34.2 gtable_0.3.6
## [70] tzdb_0.5.0 R.methodsS3_1.8.2 ggVennDiagram_1.5.4
## [73] data.table_1.17.8 hms_1.1.4 xml2_1.4.0
## [76] XVector_0.48.0 pillar_1.11.1 yulab.utils_0.2.3
## [79] emdbook_1.3.14 splines_4.5.1 BiocFileCache_2.16.2
## [82] treeio_1.32.0 lattice_0.22-7 bit_4.6.0
## [85] tidyselect_1.2.1 GO.db_3.21.0 locfit_1.5-9.12
## [88] Biostrings_2.76.0 knitr_1.50 svglite_2.2.2
## [91] xfun_0.53 DT_0.34.0 stringi_1.8.7
## [94] UCSC.utils_1.4.0 lazyeval_0.2.2 ggfun_0.2.0
## [97] yaml_2.3.10 evaluate_1.0.5 codetools_0.2-20
## [100] bbmle_1.0.25.1 qvalue_2.40.0 ggplotify_0.1.3
## [103] cli_3.6.5 systemfonts_1.3.1 jquerylib_0.1.4
## [106] Rcpp_1.1.0 dbplyr_2.5.1 coda_0.19-4.1
## [109] png_0.1-8 bdsmatrix_1.3-7 parallel_4.5.1
## [112] blob_1.2.4 prettyunits_1.2.0 DOSE_4.2.0
## [115] mvtnorm_1.3-3 tidytree_0.4.7 scales_1.4.0
## [118] crayon_1.5.3 rlang_1.1.6 formatR_1.14
## [121] fastmatch_1.1-8 KEGGREST_1.48.1