script for nominal data

load DESeq2

library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, as.vector, cbind,
##     colnames, do.call, duplicated, eval, evalq, Filter, Find, get,
##     grep, grepl, intersect, is.unsorted, lapply, lengths, Map,
##     mapply, match, mget, order, paste, pmax, pmax.int, pmin,
##     pmin.int, Position, rank, rbind, Reduce, rownames, sapply,
##     setdiff, sort, table, tapply, union, unique, unlist, unsplit
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: Rcpp
## Loading required package: RcppArmadillo

Read in csv Ensembl count file

curData <- read.csv (file= "C:/Users/Jim/P68 Ensembl counts/P68A Ensembl defNASH.csv")
dim(curData)
## [1] 56300    96
curData[1:5,1:5]
##          geneName lib2009 lib2010 lib2011 lib2012
## 1         defNASH       0       0       1       0
## 2 ENSG00000000003     198     363     675     290
## 3 ENSG00000000005       0       2       0       2
## 4 ENSG00000000419      58      89     163      93
## 5 ENSG00000000457      62      89     167      99

Create count matrix, assign design data

rawCounts<-curData[2:nrow(curData), 2:ncol(curData)]
rownames(rawCounts)<-as.character(curData[2:nrow(curData),1])
designData<-t(curData[1,2:ncol(curData)])
colnames(designData)<-as.character(curData[1,1])
table(designData[,1])
## 
##  0  1 
## 43 52
designData<-as.data.frame(designData)

rawCounts<-as.matrix(rawCounts)

remove rows with null values

remRows<-which(apply(rawCounts, 1, sum)==0)

rawCountsSub<-rawCounts[-remRows,]

length(remRows)
## [1] 17749
countsCompare1<-rawCountsSub
designData1<-designData
colnames(designData1)
## [1] "defNASH"
colnames(designData1)[1]<-"defNASH"

Run algorithim

firstDESeq<-DESeqDataSetFromMatrix(countData=countsCompare1, colData=designData1, design=~defNASH)
## the design formula contains a numeric variable with integer values,
##   specifying a model with increasing fold change for higher values.
##   did you mean for this to be a factor? if so, first convert
##   this variable to a factor using the factor() function
firstDESeq$defNASH<-factor(firstDESeq$defNASH, levels=c(0,1))
firstDESeq<-DESeq(firstDESeq)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 383 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
res1<-results(firstDESeq)

ANALYSIS

MA plot (log2 FC over normalized mean expression)

plotMA(res1, main="DESeq2 defNASH", ylim=c(-2,2))

adjust P values, order and write results file

sum(res1$padj < 0.05, na.rm=T)
## [1] 534
res1Ordered<-res1[order(res1$padj),]
head (res1Ordered)
## log2 fold change (MAP): defNASH 1 vs 0 
## Wald test p-value: defNASH 1 vs 0 
## DataFrame with 6 rows and 6 columns
##                   baseMean log2FoldChange      lfcSE      stat
##                  <numeric>      <numeric>  <numeric> <numeric>
## ENSG00000213606   9.110225      0.6712725 0.09498844  7.066886
## ENSG00000083857 954.096228      0.5364576 0.07841993  6.840832
## ENSG00000198074  83.225461      0.6216194 0.09071975  6.852084
## ENSG00000173391   4.615674      0.6804876 0.10132357  6.715985
## ENSG00000165912  65.513518     -0.4965216 0.07805611 -6.361086
## ENSG00000115009   2.762327      0.6071290 0.09649355  6.291913
##                       pvalue         padj
##                    <numeric>    <numeric>
## ENSG00000213606 1.584485e-12 3.501078e-08
## ENSG00000083857 7.873440e-12 5.799051e-08
## ENSG00000198074 7.278158e-12 5.799051e-08
## ENSG00000173391 1.867996e-11 1.031881e-07
## ENSG00000165912 2.003329e-10 8.853110e-07
## ENSG00000115009 3.135777e-10 1.154802e-06
write.csv(res1Ordered, file="C:/Users/Jim/P68 Ensembl counts/P68A Ensembl defNASH DESeq2 results.csv")