Download Example Tabula Sapiens Anndata Object and Manipulation
!wget https://figshare.com/ndownloader/files/34701991 # not working with Anaconda terminal, has to be wsl terminal
The following block will download the TS_lung.h5ad file into current
directory
import requests
from zipfile import ZipFile
from io import BytesIO
# Downloading the file using requests
url = "https://figshare.com/ndownloader/files/34701991"
response = requests.get(url)
response.raise_for_status()
# Unzipping the file using zipfile
with ZipFile(BytesIO(response.content)) as z:
z.extractall()
import scanpy as sc
adata = sc.read_h5ad("TS_lung.h5ad")
adata
adata.obs
|
|
organ_tissue
|
method
|
donor
|
anatomical_information
|
n_counts_UMIs
|
n_genes
|
cell_ontology_class
|
free_annotation
|
manually_annotated
|
compartment
|
gender
|
|
cell_id
|
|
|
|
|
|
|
|
|
|
|
|
|
AAACCCAAGAACTCCT_TSP14_Lung_Distal_10X_1_1
|
Lung
|
10X
|
TSP14
|
Distal
|
44221.0
|
6970
|
type ii pneumocyte
|
type ii pneumocyte
|
True
|
epithelial
|
male
|
|
AAACGAAAGGAGCTGT_TSP14_Lung_Distal_10X_1_1
|
Lung
|
10X
|
TSP14
|
Distal
|
28563.0
|
5638
|
type ii pneumocyte
|
type ii pneumocyte
|
True
|
epithelial
|
male
|
|
AAACGAACACGCGTGT_TSP14_Lung_Distal_10X_1_1
|
Lung
|
10X
|
TSP14
|
Distal
|
4099.0
|
1929
|
type ii pneumocyte
|
type ii pneumocyte
|
True
|
epithelial
|
male
|
|
AAACGAACATGCCGCA_TSP14_Lung_Distal_10X_1_1
|
Lung
|
10X
|
TSP14
|
Distal
|
24619.0
|
4021
|
type ii pneumocyte
|
type ii pneumocyte
|
True
|
epithelial
|
male
|
|
AAAGAACGTTAAGGGC_TSP14_Lung_Distal_10X_1_1
|
Lung
|
10X
|
TSP14
|
Distal
|
28129.0
|
5278
|
type ii pneumocyte
|
type ii pneumocyte
|
True
|
epithelial
|
male
|
|
…
|
…
|
…
|
…
|
…
|
…
|
…
|
…
|
…
|
…
|
…
|
…
|
|
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P1_S361
|
Lung
|
smartseq2
|
TSP2
|
proxmedialdistal
|
1788070.0
|
1325
|
macrophage
|
macrophage
|
True
|
immune
|
female
|
|
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P2_S362
|
Lung
|
smartseq2
|
TSP2
|
proxmedialdistal
|
2897053.0
|
3320
|
plasma cell
|
plasma cell
|
True
|
immune
|
female
|
|
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P6_S366
|
Lung
|
smartseq2
|
TSP2
|
proxmedialdistal
|
7151.0
|
617
|
lung microvascular endothelial cell
|
lung microvascular endothelial cell
|
True
|
endothelial
|
female
|
|
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P8_S368
|
Lung
|
smartseq2
|
TSP2
|
proxmedialdistal
|
14444.0
|
404
|
neutrophil
|
neutrophil
|
True
|
immune
|
female
|
|
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P9_S369
|
Lung
|
smartseq2
|
TSP2
|
proxmedialdistal
|
689104.0
|
6964
|
type ii pneumocyte
|
type ii pneumocyte
|
True
|
epithelial
|
female
|
35682 rows × 11 columns
adata.X
<35682x58870 sparse matrix of type '<class 'numpy.float32'>'
with 119787345 stored elements in Compressed Sparse Row format>
adata.var_names
Index(['DDX11L1', 'WASH7P', 'MIR6859-1', 'MIR1302-2HG', 'MIR1302-2', 'FAM138A',
'OR4G4P', 'OR4G11P', 'OR4F5', 'AL627309.1',
...
'MT-ND4', 'MT-TH', 'MT-TS2', 'MT-TL2', 'MT-ND5', 'MT-ND6', 'MT-TE',
'MT-CYB', 'MT-TT', 'MT-TP'],
dtype='object', length=58870)
adata.obs.cell_ontology_class.unique().tolist()[0:10]
['type ii pneumocyte',
'neutrophil',
'cd4-positive alpha-beta t cell',
'cd8-positive alpha-beta t cell',
'nk cell',
'bronchial vessel endothelial cell',
'smooth muscle cell',
'adventitial cell',
'macrophage',
'respiratory mucous cell']
bdata = adata[adata.obs.cell_ontology_class == 'neutrophil']
adata.obs.head(1)
|
|
organ_tissue
|
method
|
donor
|
anatomical_information
|
n_counts_UMIs
|
n_genes
|
cell_ontology_class
|
free_annotation
|
manually_annotated
|
compartment
|
gender
|
|
cell_id
|
|
|
|
|
|
|
|
|
|
|
|
|
AAACCCAAGAACTCCT_TSP14_Lung_Distal_10X_1_1
|
Lung
|
10X
|
TSP14
|
Distal
|
44221.0
|
6970
|
type ii pneumocyte
|
type ii pneumocyte
|
True
|
epithelial
|
male
|
adata[(adata.obs.cell_ontology_class == 'neutrophil') & (adata.obs.method == '10X')]
View of AnnData object with n_obs × n_vars = 138 × 58870
obs: 'organ_tissue', 'method', 'donor', 'anatomical_information', 'n_counts_UMIs', 'n_genes', 'cell_ontology_class', 'free_annotation', 'manually_annotated', 'compartment', 'gender'
var: 'gene_symbol', 'feature_type', 'ensemblid', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: '_scvi', '_training_mode', 'cell_ontology_class_colors', 'dendrogram_cell_type_tissue', 'dendrogram_computational_compartment_assignment', 'dendrogram_consensus_prediction', 'dendrogram_tissue_cell_type', 'donor_colors', 'donor_method_colors', 'hvg', 'method_colors', 'neighbors', 'organ_tissue_colors', 'sex_colors', 'tissue_colors', 'umap'
obsm: 'X_pca', 'X_scvi', 'X_scvi_umap', 'X_umap'
layers: 'decontXcounts', 'raw_counts'
obsp: 'connectivities', 'distances'
adata[(adata.obs.cell_ontology_class == 'neutrophil') &\
((adata.obs.method == '10X') | (adata.obs.donor == 'TSP2'))]
View of AnnData object with n_obs × n_vars = 172 × 58870
obs: 'organ_tissue', 'method', 'donor', 'anatomical_information', 'n_counts_UMIs', 'n_genes', 'cell_ontology_class', 'free_annotation', 'manually_annotated', 'compartment', 'gender'
var: 'gene_symbol', 'feature_type', 'ensemblid', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: '_scvi', '_training_mode', 'cell_ontology_class_colors', 'dendrogram_cell_type_tissue', 'dendrogram_computational_compartment_assignment', 'dendrogram_consensus_prediction', 'dendrogram_tissue_cell_type', 'donor_colors', 'donor_method_colors', 'hvg', 'method_colors', 'neighbors', 'organ_tissue_colors', 'sex_colors', 'tissue_colors', 'umap'
obsm: 'X_pca', 'X_scvi', 'X_scvi_umap', 'X_umap'
layers: 'decontXcounts', 'raw_counts'
obsp: 'connectivities', 'distances'
l = ['neutrophil', 'basophil']
adata[adata.obs.cell_ontology_class.isin(l)]
# adata[~adata.obs.cell_ontology_class.isin(l)]
View of AnnData object with n_obs × n_vars = 860 × 58870
obs: 'organ_tissue', 'method', 'donor', 'anatomical_information', 'n_counts_UMIs', 'n_genes', 'cell_ontology_class', 'free_annotation', 'manually_annotated', 'compartment', 'gender'
var: 'gene_symbol', 'feature_type', 'ensemblid', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: '_scvi', '_training_mode', 'cell_ontology_class_colors', 'dendrogram_cell_type_tissue', 'dendrogram_computational_compartment_assignment', 'dendrogram_consensus_prediction', 'dendrogram_tissue_cell_type', 'donor_colors', 'donor_method_colors', 'hvg', 'method_colors', 'neighbors', 'organ_tissue_colors', 'sex_colors', 'tissue_colors', 'umap'
obsm: 'X_pca', 'X_scvi', 'X_scvi_umap', 'X_umap'
layers: 'decontXcounts', 'raw_counts'
obsp: 'connectivities', 'distances'
adata.obs.cell_ontology_class.str.contains('t cell')
cell_id
AAACCCAAGAACTCCT_TSP14_Lung_Distal_10X_1_1 False
AAACGAAAGGAGCTGT_TSP14_Lung_Distal_10X_1_1 False
AAACGAACACGCGTGT_TSP14_Lung_Distal_10X_1_1 False
AAACGAACATGCCGCA_TSP14_Lung_Distal_10X_1_1 False
AAAGAACGTTAAGGGC_TSP14_Lung_Distal_10X_1_1 False
...
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P1_S361 False
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P2_S362 False
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P6_S366 False
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P8_S368 False
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P9_S369 False
Name: cell_ontology_class, Length: 35682, dtype: bool
adata[adata.obs.cell_ontology_class.str.contains('t cell')]
View of AnnData object with n_obs × n_vars = 2071 × 58870
obs: 'organ_tissue', 'method', 'donor', 'anatomical_information', 'n_counts_UMIs', 'n_genes', 'cell_ontology_class', 'free_annotation', 'manually_annotated', 'compartment', 'gender'
var: 'gene_symbol', 'feature_type', 'ensemblid', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: '_scvi', '_training_mode', 'cell_ontology_class_colors', 'dendrogram_cell_type_tissue', 'dendrogram_computational_compartment_assignment', 'dendrogram_consensus_prediction', 'dendrogram_tissue_cell_type', 'donor_colors', 'donor_method_colors', 'hvg', 'method_colors', 'neighbors', 'organ_tissue_colors', 'sex_colors', 'tissue_colors', 'umap'
obsm: 'X_pca', 'X_scvi', 'X_scvi_umap', 'X_umap'
layers: 'decontXcounts', 'raw_counts'
obsp: 'connectivities', 'distances'
adata[adata.obs.cell_ontology_class.str.contains('t cell')].obs
|
|
organ_tissue
|
method
|
donor
|
anatomical_information
|
n_counts_UMIs
|
n_genes
|
cell_ontology_class
|
free_annotation
|
manually_annotated
|
compartment
|
gender
|
|
cell_id
|
|
|
|
|
|
|
|
|
|
|
|
|
AAAGGGCGTCGGAAAC_TSP14_Lung_Distal_10X_1_1
|
Lung
|
10X
|
TSP14
|
Distal
|
3052.0
|
1166
|
cd4-positive alpha-beta t cell
|
cd4-positive alpha-beta t cell
|
True
|
immune
|
male
|
|
AACAAGAGTGTATACC_TSP14_Lung_Distal_10X_1_1
|
Lung
|
10X
|
TSP14
|
Distal
|
4181.0
|
1659
|
cd8-positive alpha-beta t cell
|
cd8-positive alpha-beta t cell
|
True
|
immune
|
male
|
|
AAGGAATGTGTGGTCC_TSP14_Lung_Distal_10X_1_1
|
Lung
|
10X
|
TSP14
|
Distal
|
8710.0
|
2606
|
cd8-positive alpha-beta t cell
|
cd8-positive alpha-beta t cell
|
True
|
immune
|
male
|
|
ACACGCGGTTGCCGAC_TSP14_Lung_Distal_10X_1_1
|
Lung
|
10X
|
TSP14
|
Distal
|
13060.0
|
3888
|
cd4-positive alpha-beta t cell
|
cd4-positive alpha-beta t cell
|
True
|
immune
|
male
|
|
ACAGGGATCTTCTCAA_TSP14_Lung_Distal_10X_1_1
|
Lung
|
10X
|
TSP14
|
Distal
|
3023.0
|
1181
|
cd8-positive alpha-beta t cell
|
cd8-positive alpha-beta t cell
|
True
|
immune
|
male
|
|
…
|
…
|
…
|
…
|
…
|
…
|
…
|
…
|
…
|
…
|
…
|
…
|
|
TSP2_LungNeuron_proximal_SS2_B113240_B134457_MixedStromalNeuron_K8_S56
|
Lung
|
smartseq2
|
TSP2
|
Neuron
|
1947811.0
|
5807
|
respiratory goblet cell
|
respiratory goblet cell
|
True
|
epithelial
|
female
|
|
TSP2_LungNeuron_proximal_SS2_B113240_B134457_MixedStromalNeuron_M12_S108
|
Lung
|
smartseq2
|
TSP2
|
Neuron
|
1368759.0
|
4310
|
respiratory goblet cell
|
respiratory goblet cell
|
True
|
epithelial
|
female
|
|
TSP2_LungNeuron_proximal_SS2_B113453_B133092_MixedStromalNeuron_A5_S125
|
Lung
|
smartseq2
|
TSP2
|
Neuron
|
2427549.0
|
4968
|
respiratory goblet cell
|
respiratory goblet cell
|
True
|
epithelial
|
female
|
|
TSP2_LungNeuron_proximal_SS2_B113453_B133092_MixedStromalNeuron_G16_S280
|
Lung
|
smartseq2
|
TSP2
|
Neuron
|
129219.0
|
960
|
cd4-positive, alpha-beta t cell
|
cd4-positive, alpha-beta t cell
|
True
|
immune
|
female
|
|
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Empty_G12_S156
|
Lung
|
smartseq2
|
TSP2
|
proxmedialdistal
|
499949.0
|
5598
|
respiratory goblet cell
|
respiratory goblet cell
|
True
|
epithelial
|
female
|
2071 rows × 11 columns
adata[(adata.obs.cell_ontology_class.str.contains('t cell')) &\
(adata.obs.cell_ontology_class != 'respiratory goblet cell')].obs
|
|
organ_tissue
|
method
|
donor
|
anatomical_information
|
n_counts_UMIs
|
n_genes
|
cell_ontology_class
|
free_annotation
|
manually_annotated
|
compartment
|
gender
|
|
cell_id
|
|
|
|
|
|
|
|
|
|
|
|
|
AAAGGGCGTCGGAAAC_TSP14_Lung_Distal_10X_1_1
|
Lung
|
10X
|
TSP14
|
Distal
|
3052.0
|
1166
|
cd4-positive alpha-beta t cell
|
cd4-positive alpha-beta t cell
|
True
|
immune
|
male
|
|
AACAAGAGTGTATACC_TSP14_Lung_Distal_10X_1_1
|
Lung
|
10X
|
TSP14
|
Distal
|
4181.0
|
1659
|
cd8-positive alpha-beta t cell
|
cd8-positive alpha-beta t cell
|
True
|
immune
|
male
|
|
AAGGAATGTGTGGTCC_TSP14_Lung_Distal_10X_1_1
|
Lung
|
10X
|
TSP14
|
Distal
|
8710.0
|
2606
|
cd8-positive alpha-beta t cell
|
cd8-positive alpha-beta t cell
|
True
|
immune
|
male
|
|
ACACGCGGTTGCCGAC_TSP14_Lung_Distal_10X_1_1
|
Lung
|
10X
|
TSP14
|
Distal
|
13060.0
|
3888
|
cd4-positive alpha-beta t cell
|
cd4-positive alpha-beta t cell
|
True
|
immune
|
male
|
|
ACAGGGATCTTCTCAA_TSP14_Lung_Distal_10X_1_1
|
Lung
|
10X
|
TSP14
|
Distal
|
3023.0
|
1181
|
cd8-positive alpha-beta t cell
|
cd8-positive alpha-beta t cell
|
True
|
immune
|
male
|
|
…
|
…
|
…
|
…
|
…
|
…
|
…
|
…
|
…
|
…
|
…
|
…
|
|
TTTGTTGTCAAGCCCG_TSP2_Lung_proxmedialdistal_10X_1_2
|
Lung
|
10X
|
TSP2
|
proxmedialdistal
|
2892.0
|
1349
|
cd4-positive, alpha-beta t cell
|
cd4-positive, alpha-beta t cell
|
True
|
immune
|
female
|
|
TTTGTTGTCTACCACC_TSP2_Lung_proxmedialdistal_10X_1_2
|
Lung
|
10X
|
TSP2
|
proxmedialdistal
|
3234.0
|
1566
|
cd8-positive, alpha-beta t cell
|
cd8-positive, alpha-beta t cell
|
True
|
immune
|
female
|
|
TSP2_Lung_proxmedialdistal_SS2_B113378_B104862_Endothelial_N21_S333
|
Lung
|
smartseq2
|
TSP2
|
nan
|
29807.0
|
224
|
cd4-positive, alpha-beta t cell
|
cd4-positive, alpha-beta t cell
|
True
|
immune
|
female
|
|
TSP2_LungNeuron_proximal_SS2_B113240_B134457_MixedStromalNeuron_C22_S178
|
Lung
|
smartseq2
|
TSP2
|
Neuron
|
94079.0
|
1065
|
cd4-positive, alpha-beta t cell
|
cd4-positive, alpha-beta t cell
|
True
|
immune
|
female
|
|
TSP2_LungNeuron_proximal_SS2_B113453_B133092_MixedStromalNeuron_G16_S280
|
Lung
|
smartseq2
|
TSP2
|
Neuron
|
129219.0
|
960
|
cd4-positive, alpha-beta t cell
|
cd4-positive, alpha-beta t cell
|
True
|
immune
|
female
|
1317 rows × 11 columns
Advanced cell filtering and subsetting
def fancy_filter(x):
if x/5 > 1000:
return True
else:
return False
adata.obs.n_genes.map(fancy_filter)
cell_id
AAACCCAAGAACTCCT_TSP14_Lung_Distal_10X_1_1 True
AAACGAAAGGAGCTGT_TSP14_Lung_Distal_10X_1_1 True
AAACGAACACGCGTGT_TSP14_Lung_Distal_10X_1_1 False
AAACGAACATGCCGCA_TSP14_Lung_Distal_10X_1_1 False
AAAGAACGTTAAGGGC_TSP14_Lung_Distal_10X_1_1 True
...
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P1_S361 False
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P2_S362 False
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P6_S366 False
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P8_S368 False
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P9_S369 True
Name: n_genes, Length: 35682, dtype: bool
adata[adata.obs.n_genes.map(fancy_filter)].obs.head(1)
|
|
organ_tissue
|
method
|
donor
|
anatomical_information
|
n_counts_UMIs
|
n_genes
|
cell_ontology_class
|
free_annotation
|
manually_annotated
|
compartment
|
gender
|
|
cell_id
|
|
|
|
|
|
|
|
|
|
|
|
|
AAACCCAAGAACTCCT_TSP14_Lung_Distal_10X_1_1
|
Lung
|
10X
|
TSP14
|
Distal
|
44221.0
|
6970
|
type ii pneumocyte
|
type ii pneumocyte
|
True
|
epithelial
|
male
|
def very_fancy_filter(x):
donor, n_counts_UMIs, n_genes = x
if donor.endswith('14') and n_counts_UMIs/n_genes > 10:
return True
else:
return False
adata[adata.obs[['donor', 'n_counts_UMIs', 'n_genes']].apply(very_fancy_filter, axis = 1)]
View of AnnData object with n_obs × n_vars = 32 × 58870
obs: 'organ_tissue', 'method', 'donor', 'anatomical_information', 'n_counts_UMIs', 'n_genes', 'cell_ontology_class', 'free_annotation', 'manually_annotated', 'compartment', 'gender'
var: 'gene_symbol', 'feature_type', 'ensemblid', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: '_scvi', '_training_mode', 'cell_ontology_class_colors', 'dendrogram_cell_type_tissue', 'dendrogram_computational_compartment_assignment', 'dendrogram_consensus_prediction', 'dendrogram_tissue_cell_type', 'donor_colors', 'donor_method_colors', 'hvg', 'method_colors', 'neighbors', 'organ_tissue_colors', 'sex_colors', 'tissue_colors', 'umap'
obsm: 'X_pca', 'X_scvi', 'X_scvi_umap', 'X_umap'
layers: 'decontXcounts', 'raw_counts'
obsp: 'connectivities', 'distances'
Gene-based cell filtering and subsetting
import numpy as np
adata.var_names == 'CDKN2A'
array([False, False, False, ..., False, False, False])
np.where(adata.var_names == 'CDKN2A')
(array([26368], dtype=int64),)
gene_loc = np.where(adata.var_names == 'CDKN2A')[0][0]
gene_loc
26368
adata.X
<35682x58870 sparse matrix of type '<class 'numpy.float32'>'
with 119787345 stored elements in Compressed Sparse Row format>
adata.X[:, gene_loc]
<35682x1 sparse matrix of type '<class 'numpy.float32'>'
with 1625 stored elements in Compressed Sparse Row format>
adata.X[:, gene_loc].toarray()
array([[0.],
[0.],
[0.],
...,
[0.],
[0.],
[0.]], dtype=float32)
adata[adata.X[:, gene_loc].toarray() > 0]
View of AnnData object with n_obs × n_vars = 1563 × 58870
obs: 'organ_tissue', 'method', 'donor', 'anatomical_information', 'n_counts_UMIs', 'n_genes', 'cell_ontology_class', 'free_annotation', 'manually_annotated', 'compartment', 'gender'
var: 'gene_symbol', 'feature_type', 'ensemblid', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
uns: '_scvi', '_training_mode', 'cell_ontology_class_colors', 'dendrogram_cell_type_tissue', 'dendrogram_computational_compartment_assignment', 'dendrogram_consensus_prediction', 'dendrogram_tissue_cell_type', 'donor_colors', 'donor_method_colors', 'hvg', 'method_colors', 'neighbors', 'organ_tissue_colors', 'sex_colors', 'tissue_colors', 'umap'
obsm: 'X_pca', 'X_scvi', 'X_scvi_umap', 'X_umap'
layers: 'decontXcounts', 'raw_counts'
obsp: 'connectivities', 'distances'