read in strain gene translation table
#fdata6 is a file that includes ORFs, genes, essential etc
fdata6 = read.delim("jun10_2016_fdata6.txt",
stringsAsFactors = F,
check.names = F)
#select nonessential only for this dataset
noness = filter(fdata6, fdata6$essential == 0)
noness = noness %>% arrange(strain)
input data count matrix
example, SC dropouts hom samples, erica
we also have some new barseq data
#read in count matrix
xbar= as.matrix(read.delim('aug6_2016_hom_barseq.txt',header = T,stringsAsFactors =F,check.names = F,strip.white = T))
#read in annotation for count matrix
p11 = read.delim("oct2_phsbar.txt",header = T,stringsAsFactors = F,check.names = F)
#in this file, controls are SC, experiments are SC in dropout media lacking a single amino acid
#
w11 = which(p11$type == 'ctrl')
lp11 = p11[-w11,]
#filter out essential strains
wnebar = which(noness$strain %in% rownames(xbar))
hsbar = xbar[noness$strain[wnebar],p11$name]
retrieve normalized counts from edgeR
hsbar = hsbar[,p11$name]
#define conditions as factors using SC as the reference conditions
p11$cond = factor(p11$cond)
p11$cond = relevel(p11$cond,ref='sc')
w11 = which(p11$type == 'ctrl')
#removes low counts that are < 50 for each gene across all samples
hsbar1 = myall_less50(xbar[noness$strain[wnebar],p11$name])
#removes low counts that are < 50 in any of the ctrls
hsbar2 = mymin50(hsbar1,w11)
#function that returns normalized counts
hedge = mynorm_EdgeR(hsbar2,group = p11$cond,ref = 'sc')
## Loading required package: limma
## Disp = 0.04536 , BCV = 0.213

post processing of normalized counts
#sums uptags and downtags into one value
#sums all replicate conditions into on value
#this functions uses all data to define a median value, each experiment is subtract from this value to get a log ratio
hedge2 = myproc_normcounts(hedge,p11$cond)
## [1] 9038 11
## [1] 4718 11
plot results


## R version 3.5.2 (2018-12-20)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: CentOS release 6.10 (Final)
##
## Matrix products: default
## BLAS: /usr/lib64/R/lib/libRblas.so
## LAPACK: /usr/lib64/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] edgeR_3.24.3 limma_3.38.3 RColorBrewer_1.1-2
## [4] sva_3.30.1 BiocParallel_1.16.6 genefilter_1.64.0
## [7] mgcv_1.8-27 nlme_3.1-137 dplyr_0.8.0.1
## [10] knitr_1.22
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.1 pillar_1.3.1 compiler_3.5.2
## [4] bitops_1.0-6 tools_3.5.2 bit_1.1-14
## [7] digest_0.6.18 memoise_1.1.0 RSQLite_2.1.1
## [10] annotate_1.60.1 evaluate_0.13 tibble_2.1.1
## [13] lattice_0.20-38 pkgconfig_2.0.2 rlang_0.3.3
## [16] Matrix_1.2-16 DBI_1.0.0 yaml_2.2.0
## [19] parallel_3.5.2 xfun_0.6 stringr_1.4.0
## [22] IRanges_2.16.0 S4Vectors_0.20.1 locfit_1.5-9.1
## [25] bit64_0.9-7 stats4_3.5.2 grid_3.5.2
## [28] tidyselect_0.2.5 glue_1.3.1 Biobase_2.42.0
## [31] R6_2.4.0 AnnotationDbi_1.44.0 survival_2.43-3
## [34] XML_3.98-1.19 rmarkdown_1.12 blob_1.1.1
## [37] purrr_0.3.2 magrittr_1.5 matrixStats_0.54.0
## [40] htmltools_0.3.6 splines_3.5.2 BiocGenerics_0.28.0
## [43] assertthat_0.2.1 xtable_1.8-3 stringi_1.4.3
## [46] RCurl_1.95-4.12 crayon_1.3.4