fdata6 = read.delim("jun10_2016_fdata6.txt",
    stringsAsFactors = F,
    check.names = F)
    
  noness = filter(fdata6, fdata6$essential == 0)
  noness = noness %>% arrange(strain)

preprocessing for low cnts – all cnts less than 50 mymin50; removes rows that are < 50 in ALL control conditions

function for edgeR

function for DESeq2

compare DESeq2, edgeR

preprocessing 1. count matrix 2. filter count matrix for ALL rows < 50 3. filter count matrix for all CTRLS < 50

example, synthetic dropouts hom samples

xbar= as.matrix(read.delim('aug6_2016_hom_barseq.txt',header = T,stringsAsFactors =F,check.names = F,strip.white = T))

p11 = read.delim("oct2_phsbar.txt",header = T,stringsAsFactors = F,check.names = F)

w11 = which(p11$type == 'ctrl')
lp11 = p11[-w11,]
#filter out essential strains
wnebar = which(noness$strain %in% rownames(xbar))

hsbar = myall_less50(xbar[noness$strain[wnebar],p11$name],w11)

print('dim after filtering all cnts less 50')

## [1] "dim after filtering all cnts less 50"

print(dim(hsbar))

## [1] 9412   11

hsbar = mymin50(hsbar,w11)

print('dim after filtering cnts less 50 only in contrl')

## [1] "dim after filtering cnts less 50 only in contrl"

print(dim(hsbar))

## [1] 9038   11

retrieve normalized counts from edgeR and DESeq2

group = model factor

ref = reference condition

hsbar = hsbar[,p11$name]

p11$cond = factor(p11$cond)
p11$cond = relevel(p11$cond,ref='sc')
hdds = mydds(hsbar,groups = p11$cond)

## Loading required package: DESeq2

## Loading required package: S4Vectors

## Loading required package: stats4

## Loading required package: BiocGenerics

## Loading required package: parallel

## 
## Attaching package: 'BiocGenerics'

## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB

## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union

## The following objects are masked from 'package:stats':
## 
##     IQR, mad, xtabs

## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, as.vector, cbind,
##     colnames, do.call, duplicated, eval, evalq, Filter, Find, get,
##     grep, grepl, intersect, is.unsorted, lapply, lengths, Map,
##     mapply, match, mget, order, paste, pmax, pmax.int, pmin,
##     pmin.int, Position, rank, rbind, Reduce, rownames, sapply,
##     setdiff, sort, table, tapply, union, unique, unlist, unsplit

## 
## Attaching package: 'S4Vectors'

## The following object is masked from 'package:dplyr':
## 
##     rename

## Loading required package: IRanges

## 
## Attaching package: 'IRanges'

## The following object is masked from 'package:nlme':
## 
##     collapse

## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice

## Loading required package: GenomicRanges

## Loading required package: GenomeInfoDb

## Loading required package: SummarizedExperiment

## Loading required package: Biobase

## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.

## Loading required package: Rcpp

## Loading required package: RcppArmadillo

## estimating size factors

## estimating dispersions

## gene-wise dispersion estimates

## mean-dispersion relationship

## final dispersion estimates

## fitting model and testing

hedge = myedgeR(hsbar,group = p11$cond,ref = 'sc')

## Loading required package: limma

## 
## Attaching package: 'limma'

## The following object is masked from 'package:DESeq2':
## 
##     plotMA

## The following object is masked from 'package:BiocGenerics':
## 
##     plotMA

## Disp = 0.04536 , BCV = 0.213

##    sc arg lys trp
## 1   1   0   0   0
## 2   1   0   0   0
## 3   1   0   0   0
## 4   0   1   0   0
## 5   0   1   0   0
## 6   0   1   0   0
## 7   0   0   1   0
## 8   0   0   1   0
## 9   0   0   1   0
## 10  0   0   0   1
## 11  0   0   0   1
## attr(,"assign")
## [1] 1 1 1 1
## attr(,"contrasts")
## attr(,"contrasts")$fac
## [1] "contr.treatment"
## 
##    arg lys trp
## 1   -1  -1  -1
## 4    1   0   0
## 7    0   1   0
## 10   0   0   1

mysumtags = sum counts from up and down tags

mysumcond = sum counts from each condition

post processing normalized count matrix collapse matrix dimensions by:

sum up and downtags
summing replicate condtions

hdds2 = myproc_normcounts(hdds,p11$cond)

## [1] 9038   11
## [1] 4718   11

hedge2 = myproc_normcounts(hedge,p11$cond)

## [1] 9038   11
## [1] 4718   11

compare this to same data generated by arrays

x11= as.matrix(read.delim('x11.txt',header = T,stringsAsFactors =F,check.names = F,strip.white = T))

# x11 = x11[noness$strain,p11$celfile]
# colnames(x11)=p11$name
x111 = mymixEMctrl(x11,w11)

## Loading required package: reshape2

## Loading required package: mixtools

## mixtools package, version 1.0.4, Released 2016-01-11
## This package is based upon work supported by the National Science Foundation under Grant No. SES-0518772.

## number of iterations= 98

btst = myBesttag(x111,bgThresh = 6.3)

## [1] min value of ctrl medians =  7.60542128303907            
## [1] 4352   11

hbat = ComBat(btst,mod = model.matrix(~cond,p11),batch = p11$pcr)

## Found 3 batches
## Adjusting for 3 covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data

tsw = mysweep(hbat)

hsum = mysumm(tsw[,-w11],lp11$cond)

include comparison to something REALLY trivial like normalizing by counts/experiment and NO R packages!!!

tst = mybarnorm(hsbar)

tst2 = myproc_normcounts(tst,p11$cond)

## [1] 9038   11
## [1] 4718   11

noRpkg = tst2[,colnames(hsum)]

runs to here

runs to here compare this to same ypd data generated by arrays

xochip= as.matrix(read.delim(
"xochip.txt",header = T,stringsAsFactors = F,check.names = F))

omix = xochip[,poybar$name]
omix = mymixEMctrl(omix,woybar)

## number of iterations= 211

omix = medShift.updn(omix)
omin = min(apply(omix[,woybar],1,median))


hmix = xochip[,phybar$name]
hmix = mymixEMctrl(hmix,whybar)

## number of iterations= 115

hmin = min(apply(hmix[,whybar],1,median))

bomix = myBesttag(omix,bgThresh=omin)

## [1] min value of ctrl medians =  7.38834850891002            
## [1] 4423    7

bhmix = myBesttag(hmix,bgThresh=hmin)

## [1] min value of ctrl medians =  7.17596949818443            
## [1] 4264   15

mydendy(bhmix,phybar$pcr)

## 'dendrogram' with 2 branches and 15 members total, at height 46.95969

bhybat = ComBat(bhmix,mod=model.matrix(~cond,phybar),batch=phybar$pcr)

## Found 7 batches
## Note: one batch has only one sample, setting mean.only=TRUE
## Adjusting for 2 covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data

mydendy(bhybat,phybar$cond)

## 'dendrogram' with 2 branches and 15 members total, at height 29.75203

mydendy(bomix,poybar$pcr)

## 'dendrogram' with 2 branches and 7 members total, at height 34.75801

mydendy(bomix,poybar$cond)

## 'dendrogram' with 2 branches and 7 members total, at height 34.75801

boybat = ComBat(bomix,mod=model.matrix(~cond,poybar),batch=poybar$pcr)

## Found 4 batches
## Note: one batch has only one sample, setting mean.only=TRUE
## Adjusting for 1 covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data

mydendy(boybat,poybar$cond)

## 'dendrogram' with 2 branches and 7 members total, at height 26.96797

hysw= mysweep(bhybat)
lphybar = phybar[-whybar,]
hysum = mysumm(hysw[,lphybar$name],lphybar$cond)

oysw= mysweep(boybat)
lpoybar = poybar[-woybar,]
oysum = mysumm(oysw[,lpoybar$name],lpoybar$cond)

runs to here

include normalizing by counts/experiment and NO R packages!!!

noRhy = mybarnorm(hybar)


noRhy3 = myproc_normcounts(noRhy,phybar$cond)

## [1] 8222   15
## [1] 4444   15

noRhy3 = noRhy3[,colnames(hysum)]

noRoy = mybarnorm(oybar)


noRoy3 = myproc_normcounts(noRoy,poybar$cond)

## [1] 7760    7
## [1] 4427    7

noRoy3 = noRoy3[,colnames(oysum),drop=F]

hom YPD

opar <- par(pch=19,mfrow=c(1,2),mar=c(2.2,2,1.4,1),cex=1.2)
for (i in 1:ncol(hysum)) { 
  
  p1(hysum,i)
  p1(noRhy3,i)
  p1(hyj,i)
  p1(hyd,i)
  
}

oliver YPD

opar <- par(pch=19,mfrow=c(1,2),mar=c(2.2,2,1.4,1),cex=1.2)
for (i in 1:ncol(oysum)) { 
  
  p1(oysum,i)
  p1(noRoy3,i,sig =0.7)
  p1(oyj,i,sig =0.7)
  p1(oyd,i,sig =0.7)
  
}

maurico_seq.Rmd

ggiaever

02/10/2016

include comparison to something REALLY trivial like normalizing by counts/experiment and NO R packages!!!

runs to here

runs to here

include normalizing by counts/experiment and NO R packages!!!