library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, as.vector, cbind,
## colnames, do.call, duplicated, eval, evalq, Filter, Find, get,
## grep, grepl, intersect, is.unsorted, lapply, lengths, Map,
## mapply, match, mget, order, paste, pmax, pmax.int, pmin,
## pmin.int, Position, rank, rbind, Reduce, rownames, sapply,
## setdiff, sort, table, tapply, union, unique, unlist, unsplit
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: Rcpp
## Loading required package: RcppArmadillo
curData <- read.csv (file= "C:/Users/Jim/P68 Ensembl counts/P68A Ensembl defNASH.csv")
dim(curData)
## [1] 56300 96
curData[1:5,1:5]
## geneName lib2009 lib2010 lib2011 lib2012
## 1 defNASH 0 0 1 0
## 2 ENSG00000000003 198 363 675 290
## 3 ENSG00000000005 0 2 0 2
## 4 ENSG00000000419 58 89 163 93
## 5 ENSG00000000457 62 89 167 99
rawCounts<-curData[2:nrow(curData), 2:ncol(curData)]
rownames(rawCounts)<-as.character(curData[2:nrow(curData),1])
designData<-t(curData[1,2:ncol(curData)])
colnames(designData)<-as.character(curData[1,1])
table(designData[,1])
##
## 0 1
## 43 52
designData<-as.data.frame(designData)
rawCounts<-as.matrix(rawCounts)
remRows<-which(apply(rawCounts, 1, sum)==0)
rawCountsSub<-rawCounts[-remRows,]
length(remRows)
## [1] 17749
countsCompare1<-rawCountsSub
designData1<-designData
colnames(designData1)
## [1] "defNASH"
colnames(designData1)[1]<-"defNASH"
firstDESeq<-DESeqDataSetFromMatrix(countData=countsCompare1, colData=designData1, design=~defNASH)
## the design formula contains a numeric variable with integer values,
## specifying a model with increasing fold change for higher values.
## did you mean for this to be a factor? if so, first convert
## this variable to a factor using the factor() function
firstDESeq$defNASH<-factor(firstDESeq$defNASH, levels=c(0,1))
firstDESeq<-DESeq(firstDESeq)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 383 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
res1<-results(firstDESeq)
plotMA(res1, main="DESeq2 defNASH", ylim=c(-2,2))
sum(res1$padj < 0.05, na.rm=T)
## [1] 534
res1Ordered<-res1[order(res1$padj),]
head (res1Ordered)
## log2 fold change (MAP): defNASH 1 vs 0
## Wald test p-value: defNASH 1 vs 0
## DataFrame with 6 rows and 6 columns
## baseMean log2FoldChange lfcSE stat
## <numeric> <numeric> <numeric> <numeric>
## ENSG00000213606 9.110225 0.6712725 0.09498844 7.066886
## ENSG00000083857 954.096228 0.5364576 0.07841993 6.840832
## ENSG00000198074 83.225461 0.6216194 0.09071975 6.852084
## ENSG00000173391 4.615674 0.6804876 0.10132357 6.715985
## ENSG00000165912 65.513518 -0.4965216 0.07805611 -6.361086
## ENSG00000115009 2.762327 0.6071290 0.09649355 6.291913
## pvalue padj
## <numeric> <numeric>
## ENSG00000213606 1.584485e-12 3.501078e-08
## ENSG00000083857 7.873440e-12 5.799051e-08
## ENSG00000198074 7.278158e-12 5.799051e-08
## ENSG00000173391 1.867996e-11 1.031881e-07
## ENSG00000165912 2.003329e-10 8.853110e-07
## ENSG00000115009 3.135777e-10 1.154802e-06
write.csv(res1Ordered, file="C:/Users/Jim/P68 Ensembl counts/P68A Ensembl defNASH DESeq2 results.csv")