# Load metadata information
library(readr)
library(DESeq2)
library(dplyr)
MetaData_File <- read_csv("~/Downloads/MetaData_File.csv",col_types = readr::cols())
DT::datatable(MetaData_File,
filter = 'top',
class = 'cell-border stripe',
rownames = FALSE,
extensions = 'Buttons',
options = list(scrollX = TRUE))
# Load count matrix
all_cell_lines_ordered <- read_csv("~/Documents/GENAVi/all_cell_lines_ordered.csv",col_types = readr::cols())
DT::datatable(all_cell_lines_ordered,
filter = 'top',
class = 'cell-border stripe',
rownames = FALSE,
extensions = 'Buttons',
options = list(scrollX = TRUE))
## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
gene <- "ST13P12"
gene.id <- "ENSG00000248400.2" # "ST13P12" gene ID
aux <- t(all_cell_lines_ordered[all_cell_lines_ordered$Genename == gene,-c(1:7)])
aux <- as.data.frame(merge(aux,MetaData_File, by.x = 0,by.y = "samples"))
library(dplyr)
aux %>%
group_by(groups2) %>%
summarise(nb.samples=n(),
sum.counts=sum(V1),
avg.count=mean(V1,na.rm=TRUE),
max.count=max(V1, na.rm=TRUE))
## # A tibble: 7 x 5
## groups2 nb.samples sum.counts avg.count max.count
## <chr> <int> <int> <dbl> <dbl>
## 1 A 4 0 0 0
## 2 B 3 3 1 1
## 3 C 2 1 0.5 1
## 4 D 4 1 0.25 1
## 5 E 3 643 214. 642
## 6 F 3 3 1 3
## 7 X 1 0 0 0
cts <- as.matrix(all_cell_lines_ordered[,-c(1:7)])
rownames(cts) <- all_cell_lines_ordered$Geneid
dds <- DESeqDataSetFromMatrix(countData = cts,
colData = MetaData_File,
design = formula("~ groups2"))
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## [,1] [,2]
## IOSE11 "0" "F"
## IOSE4 "0" "D"
## FT246 "0" "X"
## FT33 "0" "B"
## EEC16 "0" "E"
## EFO27 "1" "A"
## GTFR230 "1" "A"
## MCAS "1" "B"
## OAW42 "0" "D"
## VOA1056 "1" "A"
## CaOV3 "0" "A"
## HEY "0" "E"
## UWB1_289 "1" "B"
## Kuramochi "0" "F"
## ES2 "1" "F"
## JHOC5 "642" "C"
## RMGII "0" "E"
## BT549 "3" "D"
## MCF10A "0" "C"
## MCF7 "0" "D"
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
ref <- "E"
dds[["groups2"]] <- relevel(dds[["groups2"]], ref = ref)
dds.ref.E <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## log2 fold change (MLE): groups2 A vs E
## Wald test p-value: groups2 A vs E
## DataFrame with 1 row and 6 columns
## baseMean log2FoldChange lfcSE
## <numeric> <numeric> <numeric>
## ENSG00000248400.2 25.002057999415 13.9216827751186 2.55812781347369
## stat pvalue
## <numeric> <numeric>
## ENSG00000248400.2 5.44213729345066 5.26450749598908e-08
## padj
## <numeric>
## ENSG00000248400.2 6.0835144478651e-05
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## log2 fold change (MLE): groups2 E vs A
## Wald test p-value: groups2 E vs A
## DataFrame with 1 row and 6 columns
## baseMean log2FoldChange lfcSE
## <numeric> <numeric> <numeric>
## ENSG00000248400.2 25.002057999415 -29.7213619263519 2.55819943569283
## stat pvalue
## <numeric> <numeric>
## ENSG00000248400.2 -11.618078524946 3.33553730190726e-31
## padj
## <numeric>
## ENSG00000248400.2 2.69811612351278e-27
## R version 3.5.0 (2018-04-23)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.4
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] bindrcpp_0.2.2 dplyr_0.7.5
## [3] DESeq2_1.20.0 SummarizedExperiment_1.10.1
## [5] DelayedArray_0.6.0 BiocParallel_1.14.1
## [7] matrixStats_0.53.1 Biobase_2.40.0
## [9] GenomicRanges_1.32.3 GenomeInfoDb_1.16.0
## [11] IRanges_2.14.10 S4Vectors_0.18.2
## [13] BiocGenerics_0.26.0 readr_1.1.1
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-6 bit64_0.9-7 RColorBrewer_1.1-2
## [4] rprojroot_1.3-2 tools_3.5.0 backports_1.1.2
## [7] utf8_1.1.3 R6_2.2.2 DT_0.4
## [10] rpart_4.1-13 Hmisc_4.1-1 DBI_1.0.0
## [13] lazyeval_0.2.1 colorspace_1.3-2 nnet_7.3-12
## [16] tidyselect_0.2.4 gridExtra_2.3 bit_1.1-13
## [19] compiler_3.5.0 cli_1.0.0 htmlTable_1.11.2
## [22] scales_0.5.0 checkmate_1.8.5 genefilter_1.62.0
## [25] stringr_1.3.1 digest_0.6.15 foreign_0.8-70
## [28] rmarkdown_1.9 XVector_0.20.0 base64enc_0.1-3
## [31] pkgconfig_2.0.1 htmltools_0.3.6 htmlwidgets_1.2
## [34] rlang_0.2.0 rstudioapi_0.7 RSQLite_2.1.1
## [37] shiny_1.0.5 bindr_0.1.1 jsonlite_1.5
## [40] crosstalk_1.0.0 acepack_1.4.1 RCurl_1.95-4.10
## [43] magrittr_1.5 GenomeInfoDbData_1.1.0 Formula_1.2-3
## [46] Matrix_1.2-14 Rcpp_0.12.17 munsell_0.4.3
## [49] stringi_1.2.2 yaml_2.1.19 zlibbioc_1.26.0
## [52] plyr_1.8.4 grid_3.5.0 blob_1.1.1
## [55] promises_1.0.1 crayon_1.3.4 lattice_0.20-35
## [58] splines_3.5.0 annotate_1.58.0 hms_0.4.2
## [61] locfit_1.5-9.1 knitr_1.20 pillar_1.2.2
## [64] geneplotter_1.58.0 XML_3.98-1.11 glue_1.2.0
## [67] evaluate_0.10.1 latticeExtra_0.6-28 data.table_1.11.2
## [70] httpuv_1.4.3 gtable_0.2.0 purrr_0.2.4
## [73] assertthat_0.2.0 ggplot2_2.2.1 mime_0.5
## [76] xtable_1.8-2 later_0.7.2 survival_2.42-3
## [79] tibble_1.4.2 AnnotationDbi_1.42.1 memoise_1.1.0
## [82] cluster_2.0.7-1