suppressPackageStartupMessages(library(flowWorkspace))
suppressPackageStartupMessages(library(SingleCellExperiment))
library(scran)
library(scater)
## Loading required package: ggplot2

Load gs

dataDir <- system.file("extdata",package="flowWorkspaceData")
gs_file <- list.files(dataDir, "gs_bcell", full.names = TRUE)[1]
gs <- load_gs(gs_file)
gs
## A GatingSet with 2 samples

construct a SingleCellExperiment from a particular population node

sce <- gsexperiment(gs, pop = "CD19andCD20")

two samples are cbind together and appear as a single DelayedArray

assay(sce)
## <10 x 31832> matrix of class DelayedMatrix and type "double":
##              [,1]        [,2]        [,3] ...   [,31831]   [,31832]
## FSC-A  87248.0703  83856.1484 102181.5859   . 89586.0000 85701.4219
## SSC-A  25449.2383  26181.9590  46257.9180   . 58667.2969 37851.5195
## Live    1241.3336    729.1126   1188.9646   .  1120.2526   964.9685
## CD3      558.0569    548.0437    554.4562   .   640.0275   565.8510
## CD19    2311.8428   2394.4863   2504.9692   .  2355.9600  2421.0378
## CD20    3257.5906   3358.4663   3639.8579   .  3481.9807  3554.6306
## IgD     1640.0588   2305.2375   2408.0701   .  1528.8494  2288.5474
## CD27     -30.2827    243.1002   -217.7404   .  2527.2271   825.1616
## CD38    1387.7577   2552.9001   2815.7805   .  1701.7698  1909.4937
## CD24     501.1392   1873.3600   2288.0195   .  2504.6697  1677.6904

The underlying data of two samples are still physically separated

assay(sce)@seed
## 10x31832 double: Abind (along=2)
## ├─ 10x11028 double: [seed] CytoArraySeed object
## └─ 10x20804 double: [seed] CytoArraySeed object

cells are annotated by sample names and leaf nodes

colData(sce)
## DataFrame with 31832 rows and 2 columns
##             pop                sample
##        <factor>              <factor>
## 1     IgD-CD27- 12828_1_Bcell_C01.fcs
## 2     IgD+CD27- 12828_1_Bcell_C01.fcs
## 3     IgD+CD27- 12828_1_Bcell_C01.fcs
## 4     IgD+CD27- 12828_1_Bcell_C01.fcs
## 5     IgD-CD27+ 12828_1_Bcell_C01.fcs
## ...         ...                   ...
## 31828 IgD+CD27- 12828_2_Bcell_C02.fcs
## 31829 IgD+CD27- 12828_2_Bcell_C02.fcs
## 31830 IgD+CD27- 12828_2_Bcell_C02.fcs
## 31831 IgD-CD27+ 12828_2_Bcell_C02.fcs
## 31832 IgD+CD27- 12828_2_Bcell_C02.fcs
levels(colData(sce)$pop)
## [1] "IgD-CD27-"    "IgD+CD27-"    "IgD-CD27+"    "IgD+CD27+"    "Transitional"
rowData(sce)
## DataFrame with 10 rows and 1 column
##              desc
##       <character>
## FSC-A          NA
## SSC-A          NA
## Live       B515-A
## CD3        V450-A
## CD19       B710-A
## CD20       R780-A
## IgD        V545-A
## CD27       G780-A
## CD38       R660-A
## CD24       G560-A

subset row and cols

sce <- sce[c("CD19", "CD20", "CD27", "IgD"), sce$pop != "Transitional"]
sce
## class: gsexperiment 
## dim: 4 28701 
## metadata(0):
## assays(1): intensity
## rownames(4): CD19 CD20 CD27 IgD
## rowData names(1): desc
## colnames: NULL
## colData names(2): pop sample
## reducedDimNames(0):
## altExpNames(0):
dim(sce)
## [1]     4 28701

run PCA

sce1 <- runPCA(sce, ncomponents=10, exprs_values = "intensity")
## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available
reducedDimNames(sce1)
## [1] "PCA"
head(reducedDim(sce1))
##             PC1       PC2       PC3        PC4
## [1,]   976.7533  934.0675 126.18329  -67.61197
## [2,]   902.3101  211.4034  47.77809  -84.17175
## [3,]  1369.0751  159.1839 307.67458   77.67294
## [4,]   260.3435  273.0880  65.90192 -207.29212
## [5,] -1165.3494  564.2825 -14.28582   64.68810
## [6,]  1476.9334 1088.3304 565.97320   74.27193
plotPCA(sce1, colour_by="pop", text_by="pop")
## Warning: Removed 1 rows containing missing values (geom_text).

run graph clustering

clusters <- clusterSNNGraph(sce1, use.dimred="PCA"
                            # , k = 4
                            , use.kmeans=TRUE, full.stats=TRUE)
## Warning: did not converge in 10 iterations
head(clusters)
## DataFrame with 6 rows and 2 columns
##      kmeans   igraph
##   <integer> <factor>
## 1       125        3
## 2       147        4
## 3         4        4
## 4         6        3
## 5         7        1
## 6       125        3
metadata(clusters)$graph
## IGRAPH c7d0c4b U-W- 170 3469 -- 
## + attr: weight (e/n)
## + edges from c7d0c4b:
##  [1] 1-- 10 1-- 12 1-- 13 1-- 18 1-- 25 1-- 35 1-- 41 1-- 51 1-- 57 1-- 59
## [11] 1-- 61 1-- 66 1-- 75 1-- 78 1-- 82 1-- 85 1-- 92 1-- 96 1--101 1--111
## [21] 1--113 1--114 1--129 1--130 1--132 1--151 1--157 1--161 2--  5 2--  6
## [31] 2-- 15 2-- 17 2-- 19 2-- 20 2-- 21 2-- 26 2-- 33 2-- 37 2-- 47 2-- 52
## [41] 2-- 62 2-- 63 2-- 65 2-- 67 2-- 71 2-- 77 2-- 80 2-- 81 2-- 86 2-- 93
## [51] 2-- 99 2--104 2--105 2--112 2--115 2--119 2--121 2--126 2--131 2--136
## [61] 2--140 2--145 2--152 2--154 2--155 2--162 2--163 2--164 2--166 2--170
## [71] 3-- 15 3-- 19 3-- 20 3-- 21 3-- 33 3-- 37 3-- 47 3-- 48 3-- 49 3-- 50
## + ... omitted several edges
# plot(metadata(clusters)$graph)

run tsne

sce2 <- runTSNE(sce1, dimred="PCA")
reducedDimNames(sce2)
## [1] "PCA"  "TSNE"
# sce2$cluster <- factor(clusters$kmeans)
# plotTSNE(sce2, colour_by="cluster", text_by="cluster")
plotTSNE(sce2, colour_by="pop", text_by="pop")
## Warning: Removed 1 rows containing missing values (geom_text).