GNI_barseq.Rmd

read in strain gene translation table

#fdata6 is a file that includes ORFs, genes, essential etc
  fdata6 = read.delim("jun10_2016_fdata6.txt",
    stringsAsFactors = F,
    check.names = F)
#select nonessential only for this dataset    
  noness = filter(fdata6, fdata6$essential == 0)
  noness = noness %>% arrange(strain)

input data count matrix

example, SC dropouts hom samples, erica

we also have some new barseq data

#read in count matrix
xbar= as.matrix(read.delim('aug6_2016_hom_barseq.txt',header = T,stringsAsFactors =F,check.names = F,strip.white = T))

#read in annotation for count matrix
p11 = read.delim("oct2_phsbar.txt",header = T,stringsAsFactors = F,check.names = F)

#in this file, controls are SC, experiments are SC in dropout media lacking a single amino acid
# 
w11 = which(p11$type == 'ctrl')
lp11 = p11[-w11,]
#filter out essential strains
wnebar = which(noness$strain %in% rownames(xbar))


hsbar = xbar[noness$strain[wnebar],p11$name]

retrieve normalized counts from edgeR

hsbar = hsbar[,p11$name]
#define conditions as factors using SC as the reference conditions
p11$cond = factor(p11$cond)
p11$cond = relevel(p11$cond,ref='sc')

w11 = which(p11$type == 'ctrl')

#removes low counts that are < 50 for each gene across all samples 
hsbar1 = myall_less50(xbar[noness$strain[wnebar],p11$name])

#removes low counts that are < 50 in any of the ctrls 
hsbar2 = mymin50(hsbar1,w11)

#function that returns normalized counts 
hedge = mynorm_EdgeR(hsbar2,group = p11$cond,ref = 'sc')

## Loading required package: limma

## Disp = 0.04536 , BCV = 0.213

post processing of normalized counts

#sums uptags and downtags into one value
#sums all replicate conditions into on value
#this functions uses all data to define a median value, each experiment is subtract from this value to get a log ratio
hedge2 = myproc_normcounts(hedge,p11$cond)

## [1] 9038   11
## [1] 4718   11

plot results

## R version 3.5.2 (2018-12-20)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: CentOS release 6.10 (Final)
## 
## Matrix products: default
## BLAS: /usr/lib64/R/lib/libRblas.so
## LAPACK: /usr/lib64/R/lib/libRlapack.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] edgeR_3.24.3        limma_3.38.3        RColorBrewer_1.1-2 
##  [4] sva_3.30.1          BiocParallel_1.16.6 genefilter_1.64.0  
##  [7] mgcv_1.8-27         nlme_3.1-137        dplyr_0.8.0.1      
## [10] knitr_1.22         
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.1           pillar_1.3.1         compiler_3.5.2      
##  [4] bitops_1.0-6         tools_3.5.2          bit_1.1-14          
##  [7] digest_0.6.18        memoise_1.1.0        RSQLite_2.1.1       
## [10] annotate_1.60.1      evaluate_0.13        tibble_2.1.1        
## [13] lattice_0.20-38      pkgconfig_2.0.2      rlang_0.3.3         
## [16] Matrix_1.2-16        DBI_1.0.0            yaml_2.2.0          
## [19] parallel_3.5.2       xfun_0.6             stringr_1.4.0       
## [22] IRanges_2.16.0       S4Vectors_0.20.1     locfit_1.5-9.1      
## [25] bit64_0.9-7          stats4_3.5.2         grid_3.5.2          
## [28] tidyselect_0.2.5     glue_1.3.1           Biobase_2.42.0      
## [31] R6_2.4.0             AnnotationDbi_1.44.0 survival_2.43-3     
## [34] XML_3.98-1.19        rmarkdown_1.12       blob_1.1.1          
## [37] purrr_0.3.2          magrittr_1.5         matrixStats_0.54.0  
## [40] htmltools_0.3.6      splines_3.5.2        BiocGenerics_0.28.0 
## [43] assertthat_0.2.1     xtable_1.8-3         stringi_1.4.3       
## [46] RCurl_1.95-4.12      crayon_1.3.4

GNI_barseq.Rmd

ggiaever

02/11/2017

read in strain gene translation table

input data count matrix

example, SC dropouts hom samples, erica

we also have some new barseq data

retrieve normalized counts from edgeR

post processing of normalized counts

plot results