if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("DESeq2")
library(Matrix)
library(data.table)
library(dplyr)
library(DESeq2)
RNGkind("L'Ecuyer-CMRG")
data.dir = "/scratch/as5916/Projects/Rutgers_asthma_smc_scSeq/Data"
# ------------------------------------------------------------------------
# read in cell information
# ------------------------------------------------------------------------
cell_info = fread(file.path(data.dir, "meta_lung.tsv"),
stringsAsFactors=TRUE)
dim(cell_info)
cell_info[1:2,]
read in processed count data as a single matrix
# ------------------------------------------------------------------------
# read in count data of one celltype
# ------------------------------------------------------------------------
dat = readRDS(file.path(data.dir, sprintf("ct_mtx/combinedCountMatrix.rds"))) #, grp
dim(dat)
[1] 25146 173551
class(dat)
[1] "dgCMatrix"
attr(,"package")
[1] "Matrix"
dat[1:5,1:4]
5 x 4 sparse Matrix of class "dgCMatrix"
AAACCAAAGAACCTAT-1_1 AAACCAAAGATTGCAT-1_1 AAACCAAAGATTGCGC-1_1 AAACCAAAGGCTACTA-1_1
ENSG00000238009 . . . .
ENSG00000241860 . . . .
ENSG00000290385 . . . .
ENSG00000291215 . . . .
LINC01409 . . . .
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 7520060 401.7 11612667 620.2 11612667 620.2
Vcells 879478856 6709.9 1269052289 9682.2 3738572678 28523.1
————————————————————————
subset cell information
————————————————————————
table(colnames(dat) %in% cell_info$cell)
TRUE
173551
meta = cell_info[match(colnames(dat), cell_info$cell),]
dim(meta) #173551 19
[1] 173551 19
meta[1:2,]