suppressPackageStartupMessages(
  { library(flowWorkspace)
    library(SingleCellExperiment)
  })

## Warning: replacing previous import 'ncdfFlow::filter' by 'dplyr::filter' when
## loading 'flowWorkspace'

gs_to_sce

A gs with cell id present

cf <- gs_get_cytoframe(gs, 1)
cf

## cytoframe object '12828_1_Bcell_C01.fcs'
## with 81638 cells and 10 observables:
##             name        desc     range  minRange  maxRange
## $P1        FSC-A          NA 262142.00    0.0000 262142.00
## $P2        SSC-A          NA 262142.00 -111.0000 262142.00
## $P3         Live      B515-A   4102.68 -666.1160   4102.68
## $P4          CD3      V450-A   4102.09  -15.8156   4102.09
## $P5         CD19      B710-A   4102.33  160.9434   4102.33
## $P6         CD20      R780-A   4098.99  600.5175   4098.99
## $P7          IgD      V545-A   4102.68 -860.4089   4102.68
## $P8         CD27      G780-A   4100.33  392.9639   4100.33
## $P9         CD38      R660-A   4098.41  491.7829   4098.41
## $P10        CD24      G560-A   4102.51 -525.8096   4102.51
## 214 keywords are stored in the 'description' slot
## row names(81638): cell id #1 cell id #2 ... cell id #81637 cell id
##   #81638

head(rownames(cf))

## [1] "cell id #1" "cell id #2" "cell id #3" "cell id #4" "cell id #5"
## [6] "cell id #6"

convert gatingset to singlecellexperiment

sce <- gs_to_sce(gs, pop = "CD19andCD20")

rownames are carried over

sce

## class: gsexperiment 
## dim: 10 31832 
## metadata(0):
## assays(1): intensity
## rownames(10): FSC-A SSC-A ... CD38 CD24
## rowData names(1): desc
## colnames(31832): cell id #23 cell id #27 ... cell id #148201 cell id
##   #148212
## colData names(2): pop sample
## reducedDimNames(0):
## altExpNames(0):

colData(sce)

## DataFrame with 31832 rows and 2 columns
##                       pop                sample
##                  <factor>              <factor>
## cell id #23     IgD-CD27- 12828_1_Bcell_C01.fcs
## cell id #27     IgD+CD27- 12828_1_Bcell_C01.fcs
## cell id #29     IgD+CD27- 12828_1_Bcell_C01.fcs
## cell id #32     IgD+CD27- 12828_1_Bcell_C01.fcs
## cell id #37     IgD-CD27+ 12828_1_Bcell_C01.fcs
## ...                   ...                   ...
## cell id #148194 IgD+CD27- 12828_2_Bcell_C02.fcs
## cell id #148199 IgD+CD27- 12828_2_Bcell_C02.fcs
## cell id #148200 IgD+CD27- 12828_2_Bcell_C02.fcs
## cell id #148201 IgD-CD27+ 12828_2_Bcell_C02.fcs
## cell id #148212 IgD+CD27- 12828_2_Bcell_C02.fcs

head(colnames(sce))

## [1] "cell id #23" "cell id #27" "cell id #29" "cell id #32" "cell id #37"
## [6] "cell id #38"

sce_to_gs

mimic a real native SCE case by making it conventional array

## subset row and cols
sce <- sce[c("CD19", "CD20", "CD27", "IgD"), sce$pop %in% c("IgD+CD27+", "IgD-CD27+")]
assay(sce) <- DelayedArray(as.matrix(assay(sce)))
## strip pop label for now
colData(sce)$pop <- NULL
colnames(rowData(sce)) <- "marker"

Now it is a conventional SCE object with the underlying data as in-mem matrix

sce

## class: gsexperiment 
## dim: 4 3064 
## metadata(0):
## assays(1): intensity
## rownames(4): CD19 CD20 CD27 IgD
## rowData names(1): marker
## colnames(3064): cell id #37 cell id #526 ... cell id #148031 cell id
##   #148201
## colData names(1): sample
## reducedDimNames(0):
## altExpNames(0):

ar <- assay(sce)

colData has sample names info, rownames is cell id

colData(sce)

## DataFrame with 3064 rows and 1 column
##                                sample
##                              <factor>
## cell id #37     12828_1_Bcell_C01.fcs
## cell id #526    12828_1_Bcell_C01.fcs
## cell id #813    12828_1_Bcell_C01.fcs
## cell id #852    12828_1_Bcell_C01.fcs
## cell id #926    12828_1_Bcell_C01.fcs
## ...                               ...
## cell id #147732 12828_2_Bcell_C02.fcs
## cell id #147933 12828_2_Bcell_C02.fcs
## cell id #148011 12828_2_Bcell_C02.fcs
## cell id #148031 12828_2_Bcell_C02.fcs
## cell id #148201 12828_2_Bcell_C02.fcs

rowData : rownames will be used as channel, ‘marker’ col contains marker info

rowData(sce)

## DataFrame with 4 rows and 1 column
##           marker
##      <character>
## CD19      B710-A
## CD20      R780-A
## CD27      G780-A
## IgD       V545-A

convert it to a GatingSet

gs <- sce_to_gs(sce
          , assay_type = "intensity"
          , sample = "sample" #specify col for sample splitting
          , channel = NA # specify channel col (NA means parse it from rownames)
          , marker = "marker" # specify marker col
              )

data is split by sample column

gs

## A GatingSet with 2 samples

sampleNames(gs)

## [1] "12828_1_Bcell_C01.fcs" "12828_2_Bcell_C02.fcs"

nrow(gs)

## $`12828_1_Bcell_C01.fcs`
## [1] 1019
## 
## $`12828_2_Bcell_C02.fcs`
## [1] 2045

channel and marker are parsed from rowData(sce)

colnames(gs)

## [1] "CD19" "CD20" "CD27" "IgD"

markernames(gs)

##     CD19     CD20     CD27      IgD 
## "B710-A" "R780-A" "G780-A" "V545-A"

no gates yet

gs_get_pop_paths(gs)

## [1] "root"

CELL id is also preserved as rownames

cf <- gs_get_cytoframe(gs, 1)
cf

## cytoframe object 'file100d6a5ba971'
## with 1019 cells and 4 observables:
##            name        desc     range  minRange  maxRange
## $P1        CD19      B710-A      3453         0      3453
## $P2        CD20      R780-A      4002         0      4002
## $P3        CD27      G780-A      3418         0      3418
## $P4         IgD      V545-A      2882         0      2882
## 46 keywords are stored in the 'description' slot
## row names(1019): cell id #37 cell id #526 ... cell id #81530 cell id
##   #81584

head(rownames(cf))

## [1] "cell id #37"  "cell id #526" "cell id #813" "cell id #852" "cell id #926"
## [6] "cell id #935"

sce_to_gs.R

wjiang2

2020-09-14

gs_to_sce

A gs with cell id present

convert gatingset to singlecellexperiment

rownames are carried over

sce_to_gs

mimic a real native SCE case by making it conventional array

Now it is a conventional SCE object with the underlying data as in-mem matrix

colData has sample names info, rownames is cell id

rowData : rownames will be used as channel, ‘marker’ col contains marker info

convert it to a GatingSet

data is split by sample column

channel and marker are parsed from rowData(sce)

no gates yet

CELL id is also preserved as rownames