#fdata6 is a file that includes ORFs, genes, essential etc
fdata6 = read.delim("jun10_2016_fdata6.txt",
stringsAsFactors = F,
check.names = F)
#select nonessential only for this dataset
noness = filter(fdata6, fdata6$essential == 0)
noness = noness %>% arrange(strain)
#read in count matrix
xbar= as.matrix(read.delim('aug6_2016_hom_barseq.txt',header = T,stringsAsFactors =F,check.names = F,strip.white = T))
#read in annotation for count matrix
p11 = read.delim("oct2_phsbar.txt",header = T,stringsAsFactors = F,check.names = F)
#in this file, controls are SC, experiments are SC in dropout media lacking a single amino acid
#
w11 = which(p11$type == 'ctrl')
lp11 = p11[-w11,]
#filter out essential strains
wnebar = which(noness$strain %in% rownames(xbar))
hsbar = xbar[noness$strain[wnebar],p11$name]
retrieve normalized counts from edgeR
ref = reference condition
hsbar = hsbar[,p11$name]
#define conditions as factors using SC as the reference conditions
p11$cond = factor(p11$cond)
p11$cond = relevel(p11$cond,ref='sc')
w11 = which(p11$type == 'ctrl')
#removes low counts that are < 50 for each gene across all samples
hsbar = myall_less50(xbar[noness$strain[wnebar],p11$name])
#removes low counts that are < 50 in any of the ctrls
hsbar = mymin50(hsbar,w11)
#function that returns normalized counts
hedge = mynorm_EdgeR(hsbar,group = p11$cond,ref = 'sc')
## Loading required package: limma
## Disp = 0.04536 , BCV = 0.213
mysumtags = sum counts from up and down tags
mysumcond = sum counts from each condition
post processing normalized count matrix collapse matrix dimensions by:
#sums uptags and downtags into one value
#sums all replicate conditions into on value
#this functions uses all data to define a median value, each experiment is subtract from this value to get a log ratio
hedge2 = myproc_normcounts(hedge,p11$cond)
## [1] 9038 11
## [1] 4718 11
plot results
## R version 3.3.1 (2016-06-21)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: CentOS release 6.6 (Final)
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] edgeR_3.12.1 limma_3.26.9 RColorBrewer_1.1-2
## [4] sva_3.18.0 genefilter_1.52.1 mgcv_1.8-15
## [7] nlme_3.1-128 dplyr_0.5.0 knitr_1.15.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.8 tools_3.3.1 digest_0.6.10
## [4] annotate_1.48.0 evaluate_0.10 RSQLite_1.0.0
## [7] tibble_1.2 lattice_0.20-34 Matrix_1.2-7.1
## [10] DBI_0.5-1 yaml_2.1.14 parallel_3.3.1
## [13] stringr_1.1.0 S4Vectors_0.8.11 IRanges_2.4.8
## [16] stats4_3.3.1 grid_3.3.1 Biobase_2.30.0
## [19] R6_2.2.0 AnnotationDbi_1.32.3 XML_3.98-1.4
## [22] survival_2.39-5 rmarkdown_1.0 magrittr_1.5
## [25] htmltools_0.3.5 BiocGenerics_0.16.1 splines_3.3.1
## [28] assertthat_0.1 xtable_1.8-2 stringi_1.1.2
## [31] lazyeval_0.2.0