Library Used
library("edgeR")
## Loading required package: limma
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("AnnotationDbi")
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following object is masked from 'package:limma':
##
## plotMA
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
## tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:grDevices':
##
## windows
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
library("org.Mm.eg.db")
##
Data Preparation
rawdata = read.csv("GLDS-102_rna_seq_Normalized_Counts.csv")
group <- factor(c('1', '1', '1', '1', '1', '1', '2', '2', '2', '2', '2', '2'))
dgeGlm <- DGEList(counts = rawdata, group = group)
## Setting first column of `counts` as gene annotation.
str(dgeGlm)
## Formal class 'DGEList' [package "edgeR"] with 1 slot
## ..@ .Data:List of 3
## .. ..$ : num [1:24035, 1:12] 2976.8 59.8 21.2 40.1 0 ...
## .. .. ..- attr(*, "dimnames")=List of 2
## .. .. .. ..$ : chr [1:24035] "1" "2" "3" "4" ...
## .. .. .. ..$ : chr [1:12] "Mmus_C57.6J_KDN_FLT_Rep1_M23" "Mmus_C57.6J_KDN_FLT_Rep2_M24" "Mmus_C57.6J_KDN_FLT_Rep3_M25" "Mmus_C57.6J_KDN_FLT_Rep4_M26" ...
## .. ..$ :'data.frame': 12 obs. of 3 variables:
## .. .. ..$ group : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 2 2 2 2 ...
## .. .. ..$ lib.size : num [1:12] 40266365 40740336 37739541 39196969 36820645 ...
## .. .. ..$ norm.factors: num [1:12] 1 1 1 1 1 1 1 1 1 1 ...
## .. ..$ :'data.frame': 24035 obs. of 1 variable:
## .. .. ..$ X: chr [1:24035] "ENSMUSG00000000001" "ENSMUSG00000000028" "ENSMUSG00000000031" "ENSMUSG00000000037" ...
## ..$ names: chr [1:3] "counts" "samples" "genes"
keep <- rowSums(cpm(dgeGlm)>2) >=4
dgeGlm <- dgeGlm[keep,]
names(dgeGlm)
## [1] "counts" "samples" "genes"
dgeGlm[["samples"]]
## group lib.size norm.factors
## Mmus_C57.6J_KDN_FLT_Rep1_M23 1 40266365 1
## Mmus_C57.6J_KDN_FLT_Rep2_M24 1 40740336 1
## Mmus_C57.6J_KDN_FLT_Rep3_M25 1 37739541 1
## Mmus_C57.6J_KDN_FLT_Rep4_M26 1 39196969 1
## Mmus_C57.6J_KDN_FLT_Rep5_M27 1 36820645 1
## Mmus_C57.6J_KDN_FLT_Rep6_M28 1 36475638 1
## Mmus_C57.6J_KDN_GC_Rep1_M33 2 36585661 1
## Mmus_C57.6J_KDN_GC_Rep2_M34 2 36550490 1
## Mmus_C57.6J_KDN_GC_Rep3_M35 2 39573830 1
## Mmus_C57.6J_KDN_GC_Rep4_M36 2 36346170 1
## Mmus_C57.6J_KDN_GC_Rep5_M37 2 37663039 1
## Mmus_C57.6J_KDN_GC_Rep6_M38 2 34662629 1
design <- model.matrix(~group)
design
## (Intercept) group2
## 1 1 0
## 2 1 0
## 3 1 0
## 4 1 0
## 5 1 0
## 6 1 0
## 7 1 1
## 8 1 1
## 9 1 1
## 10 1 1
## 11 1 1
## 12 1 1
## attr(,"assign")
## [1] 0 1
## attr(,"contrasts")
## attr(,"contrasts")$group
## [1] "contr.treatment"
dgeGlmComDisp <- estimateGLMCommonDisp(dgeGlm, design, verbose = TRUE)
## Disp = 0.02648 , BCV = 0.1627
dgeGlmTrendDisp <- estimateGLMTrendedDisp(dgeGlmComDisp, design)
dgeGlmTagDisp <- estimateGLMTagwiseDisp(dgeGlmTrendDisp, design)
Data Analysis
plotBCV(dgeGlmTagDisp)

fit <- glmFit(dgeGlmTagDisp, design)
colnames(coef(fit))
## [1] "(Intercept)" "group2"
lrt <- glmLRT(fit, coef =2)
topTags(lrt)
## Coefficient: group2
## X logFC logCPM LR PValue FDR
## 4902 ENSMUSG00000026077 -1.3621448 3.576328 79.72497 4.303282e-19 5.858058e-15
## 1062 ENSMUSG00000007872 0.8863833 5.525394 76.76969 1.921032e-18 1.307551e-14
## 3038 ENSMUSG00000021775 0.9537055 6.241961 63.05270 2.012493e-15 9.132021e-12
## 364 ENSMUSG00000002250 -0.8282806 5.266124 62.46754 2.708731e-15 9.218490e-12
## 14467 ENSMUSG00000059824 2.2593713 4.561763 57.98741 2.638001e-14 7.182221e-11
## 16306 ENSMUSG00000074715 -1.9895168 3.805293 47.03450 6.974820e-12 1.582470e-08
## 5812 ENSMUSG00000027875 1.7237129 3.994952 38.26066 6.189788e-10 8.622217e-07
## 10133 ENSMUSG00000038393 0.7704989 9.396674 38.23907 6.258678e-10 8.622217e-07
## 2581 ENSMUSG00000020889 0.7898922 6.346773 38.23375 6.275752e-10 8.622217e-07
## 10066 ENSMUSG00000038224 -0.6928777 5.185635 38.21578 6.333811e-10 8.622217e-07
ttGlm <- topTags(lrt, n = Inf)
class(ttGlm)
## [1] "TopTags"
## attr(,"package")
## [1] "edgeR"
Summary
summary(deGlm <- decideTestsDGE(lrt, p = 0.1, adjust = "fdr"))
## Warning in decideTestsDGE(lrt, p = 0.1, adjust = "fdr"): 'decideTestsDGE' is deprecated.
## Use 'decideTests' instead.
## See help("Deprecated")
## group2
## Down 64
## NotSig 13390
## Up 159
tagsGlm <- rownames(dgeGlmTagDisp)[as.logical(deGlm)]
plotSmear(lrt, de.tags = tagsGlm)
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "panel.first" is not a
## graphical parameter

hist2 <- ttGlm$table[ttGlm$table$FDR < 0.1, ]
write.csv(hist2, "Mouse_EdgeR_Results_Zero_vs_1.csv")