chip_barseq.Rmd

example, SC dropouts hom samples, erica

#read in count matrix
xbar= as.matrix(read.delim('aug6_2016_hom_barseq.txt',header = T,stringsAsFactors =F,check.names = F,strip.white = T))

#read in annotation for count matrix
p11 = read.delim("oct2_phsbar.txt",header = T,stringsAsFactors = F,check.names = F)

#in this file, controls are SC, experiments are SC in dropout media lacking a single amino acid
# 
w11 = which(p11$type == 'ctrl')
lp11 = p11[-w11,]
#filter out essential strains
wnebar = which(noness$strain %in% rownames(xbar))


hsbar = xbar[noness$strain[wnebar],p11$name]

retrieve normalized counts from edgeR

ref = reference condition

hsbar = hsbar[,p11$name]
#define conditions as factors using SC as the reference conditions
p11$cond = factor(p11$cond)
p11$cond = relevel(p11$cond,ref='sc')

w11 = which(p11$type == 'ctrl')

#removes low counts that are < 50 for each gene across all samples 
hsbar = myall_less50(xbar[noness$strain[wnebar],p11$name])

#removes low counts that are < 50 in any of the ctrls 
hsbar = mymin50(hsbar,w11)

#function that returns normalized counts 
hedge = mynorm_EdgeR(hsbar,group = p11$cond,ref = 'sc')

## Loading required package: limma

## Disp = 0.04536 , BCV = 0.213

mysumtags = sum counts from up and down tags

mysumcond = sum counts from each condition

post processing normalized count matrix collapse matrix dimensions by:

sum up and downtags
summing replicate condtions

#sums uptags and downtags into one value
#sums all replicate conditions into on value
#this functions uses all data to define a median value, each experiment is subtract from this value to get a log ratio
hedge2 = myproc_normcounts(hedge,p11$cond)

## [1] 9038   11
## [1] 4718   11

plot results

## R version 3.3.1 (2016-06-21)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: CentOS release 6.6 (Final)
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] edgeR_3.12.1       limma_3.26.9       RColorBrewer_1.1-2
## [4] sva_3.18.0         genefilter_1.52.1  mgcv_1.8-15       
## [7] nlme_3.1-128       dplyr_0.5.0        knitr_1.15.1      
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.8          tools_3.3.1          digest_0.6.10       
##  [4] annotate_1.48.0      evaluate_0.10        RSQLite_1.0.0       
##  [7] tibble_1.2           lattice_0.20-34      Matrix_1.2-7.1      
## [10] DBI_0.5-1            yaml_2.1.14          parallel_3.3.1      
## [13] stringr_1.1.0        S4Vectors_0.8.11     IRanges_2.4.8       
## [16] stats4_3.3.1         grid_3.3.1           Biobase_2.30.0      
## [19] R6_2.2.0             AnnotationDbi_1.32.3 XML_3.98-1.4        
## [22] survival_2.39-5      rmarkdown_1.0        magrittr_1.5        
## [25] htmltools_0.3.5      BiocGenerics_0.16.1  splines_3.3.1       
## [28] assertthat_0.1       xtable_1.8-2         stringi_1.1.2       
## [31] lazyeval_0.2.0

chip_barseq.Rmd

ggiaever

02/10/2016

input data count matrix

example, SC dropouts hom samples, erica