Download Example Tabula Sapiens Anndata Object and Manipulation

!wget https://figshare.com/ndownloader/files/34701991 # not working with Anaconda terminal, has to be wsl terminal

The following block will download the TS_lung.h5ad file into current directory

import requests
from zipfile import ZipFile
from io import BytesIO

# Downloading the file using requests
url = "https://figshare.com/ndownloader/files/34701991"
response = requests.get(url)
response.raise_for_status()

# Unzipping the file using zipfile
with ZipFile(BytesIO(response.content)) as z:
    z.extractall()
import scanpy as sc
adata = sc.read_h5ad("TS_lung.h5ad")
adata
adata.obs
organ_tissue method donor anatomical_information n_counts_UMIs n_genes cell_ontology_class free_annotation manually_annotated compartment gender
cell_id
AAACCCAAGAACTCCT_TSP14_Lung_Distal_10X_1_1 Lung 10X TSP14 Distal 44221.0 6970 type ii pneumocyte type ii pneumocyte True epithelial male
AAACGAAAGGAGCTGT_TSP14_Lung_Distal_10X_1_1 Lung 10X TSP14 Distal 28563.0 5638 type ii pneumocyte type ii pneumocyte True epithelial male
AAACGAACACGCGTGT_TSP14_Lung_Distal_10X_1_1 Lung 10X TSP14 Distal 4099.0 1929 type ii pneumocyte type ii pneumocyte True epithelial male
AAACGAACATGCCGCA_TSP14_Lung_Distal_10X_1_1 Lung 10X TSP14 Distal 24619.0 4021 type ii pneumocyte type ii pneumocyte True epithelial male
AAAGAACGTTAAGGGC_TSP14_Lung_Distal_10X_1_1 Lung 10X TSP14 Distal 28129.0 5278 type ii pneumocyte type ii pneumocyte True epithelial male
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P1_S361 Lung smartseq2 TSP2 proxmedialdistal 1788070.0 1325 macrophage macrophage True immune female
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P2_S362 Lung smartseq2 TSP2 proxmedialdistal 2897053.0 3320 plasma cell plasma cell True immune female
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P6_S366 Lung smartseq2 TSP2 proxmedialdistal 7151.0 617 lung microvascular endothelial cell lung microvascular endothelial cell True endothelial female
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P8_S368 Lung smartseq2 TSP2 proxmedialdistal 14444.0 404 neutrophil neutrophil True immune female
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P9_S369 Lung smartseq2 TSP2 proxmedialdistal 689104.0 6964 type ii pneumocyte type ii pneumocyte True epithelial female

35682 rows × 11 columns

adata.X
<35682x58870 sparse matrix of type '<class 'numpy.float32'>'
    with 119787345 stored elements in Compressed Sparse Row format>
adata.var_names
Index(['DDX11L1', 'WASH7P', 'MIR6859-1', 'MIR1302-2HG', 'MIR1302-2', 'FAM138A',
       'OR4G4P', 'OR4G11P', 'OR4F5', 'AL627309.1',
       ...
       'MT-ND4', 'MT-TH', 'MT-TS2', 'MT-TL2', 'MT-ND5', 'MT-ND6', 'MT-TE',
       'MT-CYB', 'MT-TT', 'MT-TP'],
      dtype='object', length=58870)
adata.obs.cell_ontology_class.unique().tolist()[0:10]
['type ii pneumocyte',
 'neutrophil',
 'cd4-positive alpha-beta t cell',
 'cd8-positive alpha-beta t cell',
 'nk cell',
 'bronchial vessel endothelial cell',
 'smooth muscle cell',
 'adventitial cell',
 'macrophage',
 'respiratory mucous cell']
bdata = adata[adata.obs.cell_ontology_class == 'neutrophil']
adata.obs.head(1)
organ_tissue method donor anatomical_information n_counts_UMIs n_genes cell_ontology_class free_annotation manually_annotated compartment gender
cell_id
AAACCCAAGAACTCCT_TSP14_Lung_Distal_10X_1_1 Lung 10X TSP14 Distal 44221.0 6970 type ii pneumocyte type ii pneumocyte True epithelial male
adata[(adata.obs.cell_ontology_class == 'neutrophil') & (adata.obs.method == '10X')]
View of AnnData object with n_obs × n_vars = 138 × 58870
    obs: 'organ_tissue', 'method', 'donor', 'anatomical_information', 'n_counts_UMIs', 'n_genes', 'cell_ontology_class', 'free_annotation', 'manually_annotated', 'compartment', 'gender'
    var: 'gene_symbol', 'feature_type', 'ensemblid', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: '_scvi', '_training_mode', 'cell_ontology_class_colors', 'dendrogram_cell_type_tissue', 'dendrogram_computational_compartment_assignment', 'dendrogram_consensus_prediction', 'dendrogram_tissue_cell_type', 'donor_colors', 'donor_method_colors', 'hvg', 'method_colors', 'neighbors', 'organ_tissue_colors', 'sex_colors', 'tissue_colors', 'umap'
    obsm: 'X_pca', 'X_scvi', 'X_scvi_umap', 'X_umap'
    layers: 'decontXcounts', 'raw_counts'
    obsp: 'connectivities', 'distances'
adata[(adata.obs.cell_ontology_class == 'neutrophil') &\
      ((adata.obs.method == '10X') | (adata.obs.donor == 'TSP2'))]
View of AnnData object with n_obs × n_vars = 172 × 58870
    obs: 'organ_tissue', 'method', 'donor', 'anatomical_information', 'n_counts_UMIs', 'n_genes', 'cell_ontology_class', 'free_annotation', 'manually_annotated', 'compartment', 'gender'
    var: 'gene_symbol', 'feature_type', 'ensemblid', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: '_scvi', '_training_mode', 'cell_ontology_class_colors', 'dendrogram_cell_type_tissue', 'dendrogram_computational_compartment_assignment', 'dendrogram_consensus_prediction', 'dendrogram_tissue_cell_type', 'donor_colors', 'donor_method_colors', 'hvg', 'method_colors', 'neighbors', 'organ_tissue_colors', 'sex_colors', 'tissue_colors', 'umap'
    obsm: 'X_pca', 'X_scvi', 'X_scvi_umap', 'X_umap'
    layers: 'decontXcounts', 'raw_counts'
    obsp: 'connectivities', 'distances'
l = ['neutrophil', 'basophil']
adata[adata.obs.cell_ontology_class.isin(l)]
# adata[~adata.obs.cell_ontology_class.isin(l)]
View of AnnData object with n_obs × n_vars = 860 × 58870
    obs: 'organ_tissue', 'method', 'donor', 'anatomical_information', 'n_counts_UMIs', 'n_genes', 'cell_ontology_class', 'free_annotation', 'manually_annotated', 'compartment', 'gender'
    var: 'gene_symbol', 'feature_type', 'ensemblid', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: '_scvi', '_training_mode', 'cell_ontology_class_colors', 'dendrogram_cell_type_tissue', 'dendrogram_computational_compartment_assignment', 'dendrogram_consensus_prediction', 'dendrogram_tissue_cell_type', 'donor_colors', 'donor_method_colors', 'hvg', 'method_colors', 'neighbors', 'organ_tissue_colors', 'sex_colors', 'tissue_colors', 'umap'
    obsm: 'X_pca', 'X_scvi', 'X_scvi_umap', 'X_umap'
    layers: 'decontXcounts', 'raw_counts'
    obsp: 'connectivities', 'distances'
adata.obs.cell_ontology_class.str.contains('t cell')
cell_id
AAACCCAAGAACTCCT_TSP14_Lung_Distal_10X_1_1                        False
AAACGAAAGGAGCTGT_TSP14_Lung_Distal_10X_1_1                        False
AAACGAACACGCGTGT_TSP14_Lung_Distal_10X_1_1                        False
AAACGAACATGCCGCA_TSP14_Lung_Distal_10X_1_1                        False
AAAGAACGTTAAGGGC_TSP14_Lung_Distal_10X_1_1                        False
                                                                  ...  
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P1_S361    False
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P2_S362    False
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P6_S366    False
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P8_S368    False
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P9_S369    False
Name: cell_ontology_class, Length: 35682, dtype: bool
adata[adata.obs.cell_ontology_class.str.contains('t cell')]
View of AnnData object with n_obs × n_vars = 2071 × 58870
    obs: 'organ_tissue', 'method', 'donor', 'anatomical_information', 'n_counts_UMIs', 'n_genes', 'cell_ontology_class', 'free_annotation', 'manually_annotated', 'compartment', 'gender'
    var: 'gene_symbol', 'feature_type', 'ensemblid', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: '_scvi', '_training_mode', 'cell_ontology_class_colors', 'dendrogram_cell_type_tissue', 'dendrogram_computational_compartment_assignment', 'dendrogram_consensus_prediction', 'dendrogram_tissue_cell_type', 'donor_colors', 'donor_method_colors', 'hvg', 'method_colors', 'neighbors', 'organ_tissue_colors', 'sex_colors', 'tissue_colors', 'umap'
    obsm: 'X_pca', 'X_scvi', 'X_scvi_umap', 'X_umap'
    layers: 'decontXcounts', 'raw_counts'
    obsp: 'connectivities', 'distances'
adata[adata.obs.cell_ontology_class.str.contains('t cell')].obs
organ_tissue method donor anatomical_information n_counts_UMIs n_genes cell_ontology_class free_annotation manually_annotated compartment gender
cell_id
AAAGGGCGTCGGAAAC_TSP14_Lung_Distal_10X_1_1 Lung 10X TSP14 Distal 3052.0 1166 cd4-positive alpha-beta t cell cd4-positive alpha-beta t cell True immune male
AACAAGAGTGTATACC_TSP14_Lung_Distal_10X_1_1 Lung 10X TSP14 Distal 4181.0 1659 cd8-positive alpha-beta t cell cd8-positive alpha-beta t cell True immune male
AAGGAATGTGTGGTCC_TSP14_Lung_Distal_10X_1_1 Lung 10X TSP14 Distal 8710.0 2606 cd8-positive alpha-beta t cell cd8-positive alpha-beta t cell True immune male
ACACGCGGTTGCCGAC_TSP14_Lung_Distal_10X_1_1 Lung 10X TSP14 Distal 13060.0 3888 cd4-positive alpha-beta t cell cd4-positive alpha-beta t cell True immune male
ACAGGGATCTTCTCAA_TSP14_Lung_Distal_10X_1_1 Lung 10X TSP14 Distal 3023.0 1181 cd8-positive alpha-beta t cell cd8-positive alpha-beta t cell True immune male
TSP2_LungNeuron_proximal_SS2_B113240_B134457_MixedStromalNeuron_K8_S56 Lung smartseq2 TSP2 Neuron 1947811.0 5807 respiratory goblet cell respiratory goblet cell True epithelial female
TSP2_LungNeuron_proximal_SS2_B113240_B134457_MixedStromalNeuron_M12_S108 Lung smartseq2 TSP2 Neuron 1368759.0 4310 respiratory goblet cell respiratory goblet cell True epithelial female
TSP2_LungNeuron_proximal_SS2_B113453_B133092_MixedStromalNeuron_A5_S125 Lung smartseq2 TSP2 Neuron 2427549.0 4968 respiratory goblet cell respiratory goblet cell True epithelial female
TSP2_LungNeuron_proximal_SS2_B113453_B133092_MixedStromalNeuron_G16_S280 Lung smartseq2 TSP2 Neuron 129219.0 960 cd4-positive, alpha-beta t cell cd4-positive, alpha-beta t cell True immune female
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Empty_G12_S156 Lung smartseq2 TSP2 proxmedialdistal 499949.0 5598 respiratory goblet cell respiratory goblet cell True epithelial female

2071 rows × 11 columns

adata[(adata.obs.cell_ontology_class.str.contains('t cell')) &\
     (adata.obs.cell_ontology_class != 'respiratory goblet cell')].obs
organ_tissue method donor anatomical_information n_counts_UMIs n_genes cell_ontology_class free_annotation manually_annotated compartment gender
cell_id
AAAGGGCGTCGGAAAC_TSP14_Lung_Distal_10X_1_1 Lung 10X TSP14 Distal 3052.0 1166 cd4-positive alpha-beta t cell cd4-positive alpha-beta t cell True immune male
AACAAGAGTGTATACC_TSP14_Lung_Distal_10X_1_1 Lung 10X TSP14 Distal 4181.0 1659 cd8-positive alpha-beta t cell cd8-positive alpha-beta t cell True immune male
AAGGAATGTGTGGTCC_TSP14_Lung_Distal_10X_1_1 Lung 10X TSP14 Distal 8710.0 2606 cd8-positive alpha-beta t cell cd8-positive alpha-beta t cell True immune male
ACACGCGGTTGCCGAC_TSP14_Lung_Distal_10X_1_1 Lung 10X TSP14 Distal 13060.0 3888 cd4-positive alpha-beta t cell cd4-positive alpha-beta t cell True immune male
ACAGGGATCTTCTCAA_TSP14_Lung_Distal_10X_1_1 Lung 10X TSP14 Distal 3023.0 1181 cd8-positive alpha-beta t cell cd8-positive alpha-beta t cell True immune male
TTTGTTGTCAAGCCCG_TSP2_Lung_proxmedialdistal_10X_1_2 Lung 10X TSP2 proxmedialdistal 2892.0 1349 cd4-positive, alpha-beta t cell cd4-positive, alpha-beta t cell True immune female
TTTGTTGTCTACCACC_TSP2_Lung_proxmedialdistal_10X_1_2 Lung 10X TSP2 proxmedialdistal 3234.0 1566 cd8-positive, alpha-beta t cell cd8-positive, alpha-beta t cell True immune female
TSP2_Lung_proxmedialdistal_SS2_B113378_B104862_Endothelial_N21_S333 Lung smartseq2 TSP2 nan 29807.0 224 cd4-positive, alpha-beta t cell cd4-positive, alpha-beta t cell True immune female
TSP2_LungNeuron_proximal_SS2_B113240_B134457_MixedStromalNeuron_C22_S178 Lung smartseq2 TSP2 Neuron 94079.0 1065 cd4-positive, alpha-beta t cell cd4-positive, alpha-beta t cell True immune female
TSP2_LungNeuron_proximal_SS2_B113453_B133092_MixedStromalNeuron_G16_S280 Lung smartseq2 TSP2 Neuron 129219.0 960 cd4-positive, alpha-beta t cell cd4-positive, alpha-beta t cell True immune female

1317 rows × 11 columns

Advanced cell filtering and subsetting

def fancy_filter(x):
    if x/5 > 1000:
        return True
    else:
        return False
adata.obs.n_genes.map(fancy_filter)
cell_id
AAACCCAAGAACTCCT_TSP14_Lung_Distal_10X_1_1                         True
AAACGAAAGGAGCTGT_TSP14_Lung_Distal_10X_1_1                         True
AAACGAACACGCGTGT_TSP14_Lung_Distal_10X_1_1                        False
AAACGAACATGCCGCA_TSP14_Lung_Distal_10X_1_1                        False
AAAGAACGTTAAGGGC_TSP14_Lung_Distal_10X_1_1                         True
                                                                  ...  
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P1_S361    False
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P2_S362    False
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P6_S366    False
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P8_S368    False
TSP2_Lung_proxmedialdistal_SS2_B113373_B134458_Stromal_P9_S369     True
Name: n_genes, Length: 35682, dtype: bool
adata[adata.obs.n_genes.map(fancy_filter)].obs.head(1)
organ_tissue method donor anatomical_information n_counts_UMIs n_genes cell_ontology_class free_annotation manually_annotated compartment gender
cell_id
AAACCCAAGAACTCCT_TSP14_Lung_Distal_10X_1_1 Lung 10X TSP14 Distal 44221.0 6970 type ii pneumocyte type ii pneumocyte True epithelial male
def very_fancy_filter(x):
    donor, n_counts_UMIs, n_genes = x
    if donor.endswith('14') and n_counts_UMIs/n_genes > 10:
        return True
    else:
        return False
adata[adata.obs[['donor', 'n_counts_UMIs', 'n_genes']].apply(very_fancy_filter, axis = 1)]
View of AnnData object with n_obs × n_vars = 32 × 58870
    obs: 'organ_tissue', 'method', 'donor', 'anatomical_information', 'n_counts_UMIs', 'n_genes', 'cell_ontology_class', 'free_annotation', 'manually_annotated', 'compartment', 'gender'
    var: 'gene_symbol', 'feature_type', 'ensemblid', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: '_scvi', '_training_mode', 'cell_ontology_class_colors', 'dendrogram_cell_type_tissue', 'dendrogram_computational_compartment_assignment', 'dendrogram_consensus_prediction', 'dendrogram_tissue_cell_type', 'donor_colors', 'donor_method_colors', 'hvg', 'method_colors', 'neighbors', 'organ_tissue_colors', 'sex_colors', 'tissue_colors', 'umap'
    obsm: 'X_pca', 'X_scvi', 'X_scvi_umap', 'X_umap'
    layers: 'decontXcounts', 'raw_counts'
    obsp: 'connectivities', 'distances'

Gene-based cell filtering and subsetting

import numpy as np
adata.var_names == 'CDKN2A'
array([False, False, False, ..., False, False, False])
np.where(adata.var_names == 'CDKN2A')
(array([26368], dtype=int64),)
gene_loc = np.where(adata.var_names == 'CDKN2A')[0][0]
gene_loc
26368
adata.X
<35682x58870 sparse matrix of type '<class 'numpy.float32'>'
    with 119787345 stored elements in Compressed Sparse Row format>
adata.X[:, gene_loc]
<35682x1 sparse matrix of type '<class 'numpy.float32'>'
    with 1625 stored elements in Compressed Sparse Row format>
adata.X[:, gene_loc].toarray()
array([[0.],
       [0.],
       [0.],
       ...,
       [0.],
       [0.],
       [0.]], dtype=float32)
adata[adata.X[:, gene_loc].toarray() > 0]
View of AnnData object with n_obs × n_vars = 1563 × 58870
    obs: 'organ_tissue', 'method', 'donor', 'anatomical_information', 'n_counts_UMIs', 'n_genes', 'cell_ontology_class', 'free_annotation', 'manually_annotated', 'compartment', 'gender'
    var: 'gene_symbol', 'feature_type', 'ensemblid', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: '_scvi', '_training_mode', 'cell_ontology_class_colors', 'dendrogram_cell_type_tissue', 'dendrogram_computational_compartment_assignment', 'dendrogram_consensus_prediction', 'dendrogram_tissue_cell_type', 'donor_colors', 'donor_method_colors', 'hvg', 'method_colors', 'neighbors', 'organ_tissue_colors', 'sex_colors', 'tissue_colors', 'umap'
    obsm: 'X_pca', 'X_scvi', 'X_scvi_umap', 'X_umap'
    layers: 'decontXcounts', 'raw_counts'
    obsp: 'connectivities', 'distances'