Query artifacts#
Here, we鈥檒l query artifacts and inspect their metadata.
This guide can be skipped if you are only interested in how to leverage the overall collection.
import lamindb as ln
import bionty as bt
馃挕 connected lamindb: testuser1/test-scrna
ln.settings.transform.stem_uid = "agayZTonayqA"
ln.settings.transform.version = "1"
ln.track()
馃挕 notebook imports: bionty==0.42.9 lamindb==0.71.0
馃挕 saved: Transform(uid='agayZTonayqA5zKv', name='Query artifacts', key='scrna3', version='1', type='notebook', updated_at=2024-05-06 19:34:35 UTC, created_by_id=1)
馃挕 saved: Run(uid='3K3D9lwlg90WXXhcijui', transform_id=3, created_by_id=1)
Query artifacts by provenance metadata#
users = ln.User.lookup()
ln.Transform.filter(created_by=users.testuser1).search("scrna")
uid | score | |
---|---|---|
name | ||
scRNA-seq | Nv48yAceNSh85zKv | 90.0 |
Standardize and append a batch of data | ManDYgmftZ8C5zKv | 45.0 |
Query artifacts | agayZTonayqA5zKv | 36.0 |
transform = ln.Transform.filter(uid="Nv48yAceNSh85zKv").one()
ln.Artifact.filter(transform=transform).df()
uid | storage_id | key | suffix | accessor | description | version | size | hash | hash_type | n_objects | n_observations | transform_id | run_id | visibility | key_is_virtual | created_at | updated_at | created_by_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
id | |||||||||||||||||||
1 | z2bLVYwezFXkexWYCD38 | 1 | None | .h5ad | AnnData | Human immune cells from Conde22 | None | 57612943 | 9sXda5E7BYiVoDOQkTC0KB | sha1-fl | None | 1648 | 1 | 1 | 1 | True | 2024-05-06 19:33:48.298981+00:00 | 2024-05-06 19:33:51.831588+00:00 | 1 |
Query artifacts by biological metadata#
organism = bt.Organism.lookup()
tissues = bt.Tissue.lookup()
query = ln.Artifact.filter(
organism=organism.human,
tissues=tissues.bone_marrow,
)
query.df()
uid | key | suffix | accessor | description | version | size | hash | hash_type | n_objects | n_observations | visibility | key_is_virtual | created_at | updated_at | storage_id | transform_id | run_id | created_by_id | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
id |
Inspect artifact metadata#
query_set = ln.Artifact.filter().all()
artifact1, artifact2 = query_set[0], query_set[1]
artifact1.describe()
Artifact(uid='z2bLVYwezFXkexWYCD38', suffix='.h5ad', accessor='AnnData', description='Human immune cells from Conde22', size=57612943, hash='9sXda5E7BYiVoDOQkTC0KB', hash_type='sha1-fl', n_observations=1648, visibility=1, key_is_virtual=True, updated_at=2024-05-06 19:33:51 UTC)
Provenance:
馃搸 storage: Storage(uid='OiRpS2Y9tkQs', root='/home/runner/work/lamin-usecases/lamin-usecases/docs/test-scrna', type='local', instance_uid='5ZP9QR2HPILj')
馃搸 transform: Transform(uid='Nv48yAceNSh85zKv', name='scRNA-seq', key='scrna', version='1', type='notebook')
馃搸 run: Run(uid='9uRlmMsOoszauSoXZHDe', started_at=2024-05-06 19:30:58 UTC, is_consecutive=True)
馃搸 created_by: User(uid='DzTjkKse', handle='testuser1', name='Test User1')
馃搸 input_of (core.Run): ['2024-05-06 19:33:59 UTC']
Features:
var: FeatureSet(uid='52qw0VKAcPSfmY0R3cwA', n=36503, type='number', registry='bionty.Gene')
'CAMK2N2', 'NDUFA3', 'PHEX', 'PIK3IP1-DT', 'CDK5RAP1', 'RAI14-DT', 'NANOS3', 'ALG5', 'FGL1', 'DOK7', 'SPINT2', 'CGB5', 'DYNC1LI2-DT', 'ATP5F1A', 'SOST', 'HPN', 'NPIPB3', 'KLHL11', 'AQP4', 'LINC02199', ...
obs: FeatureSet(uid='fsbbypYLWPZMyFa8AQE1', n=4, registry='core.Feature')
馃敆 donor (12, core.ULabel): 'D503', 'A31', 'A35', 'A36', '640C', '582C', '637C', 'A52', 'A29', '621B', ...
馃敆 tissue (17, bionty.Tissue): 'sigmoid colon', 'spleen', 'liver', 'ileum', 'lamina propria', 'duodenum', 'thymus', 'skeletal muscle tissue', 'lung', 'omentum', ...
馃敆 cell_type (32, bionty.CellType): 'progenitor cell', 'CD16-positive, CD56-dim natural killer cell, human', 'mast cell', 'effector memory CD8-positive, alpha-beta T cell, terminally differentiated', 'regulatory T cell', 'germinal center B cell', 'mucosal invariant T cell', 'naive thymus-derived CD8-positive, alpha-beta T cell', 'effector memory CD4-positive, alpha-beta T cell', 'animal cell', ...
馃敆 assay (3, bionty.ExperimentalFactor): '10x 5' v2', '10x 3' v3', '10x 5' v1'
Labels:
馃搸 tissues (17, bionty.Tissue): 'sigmoid colon', 'spleen', 'liver', 'ileum', 'lamina propria', 'duodenum', 'thymus', 'skeletal muscle tissue', 'lung', 'omentum', ...
馃搸 cell_types (32, bionty.CellType): 'progenitor cell', 'CD16-positive, CD56-dim natural killer cell, human', 'mast cell', 'effector memory CD8-positive, alpha-beta T cell, terminally differentiated', 'regulatory T cell', 'germinal center B cell', 'mucosal invariant T cell', 'naive thymus-derived CD8-positive, alpha-beta T cell', 'effector memory CD4-positive, alpha-beta T cell', 'animal cell', ...
馃搸 experimental_factors (3, bionty.ExperimentalFactor): '10x 5' v2', '10x 3' v3', '10x 5' v1'
馃搸 ulabels (12, core.ULabel): 'D503', 'A31', 'A35', 'A36', '640C', '582C', '637C', 'A52', 'A29', '621B', ...
artifact1.view_lineage()
artifact2.describe()
Artifact(uid='5aVhAlxvcEvmnw92HW4y', suffix='.h5ad', accessor='AnnData', description='10x reference adata', size=857752, hash='0Fozmib89XWbFoD6hSq5yA', hash_type='md5', n_observations=70, visibility=1, key_is_virtual=True, updated_at=2024-05-06 19:34:27 UTC)
Provenance:
馃搸 storage: Storage(uid='OiRpS2Y9tkQs', root='/home/runner/work/lamin-usecases/lamin-usecases/docs/test-scrna', type='local', instance_uid='5ZP9QR2HPILj')
馃搸 transform: Transform(uid='ManDYgmftZ8C5zKv', name='Standardize and append a batch of data', key='scrna2', version='1', type='notebook')
馃搸 run: Run(uid='z81MRZ4LN68TXmVRvvDI', started_at=2024-05-06 19:33:59 UTC, is_consecutive=True)
馃搸 created_by: User(uid='DzTjkKse', handle='testuser1', name='Test User1')
Features:
var: FeatureSet(uid='hEynm1GOtKA9ynMsBjfn', n=754, type='number', registry='bionty.Gene')
'PPP2R5C', 'SLCO3A1', 'SH3YL1', 'BLK', 'FGR', 'BEX3', 'DDAH2', 'PSMC3', 'MARCKSL1', 'ALKBH7', 'CD74', 'NDFIP1', 'RNF130', 'CALM3', 'PKM', 'GBP2', 'SF3B2', 'PAXBP1', 'HSPD1', 'S100A4', ...
obs: FeatureSet(uid='LonQue6vqGYGTED5Na7U', n=1, registry='core.Feature')
馃敆 cell_type (9, bionty.CellType): 'cytotoxic T cell', 'CD8-positive, CD25-positive, alpha-beta regulatory T cell', 'CD16-positive, CD56-dim natural killer cell, human', 'CD4-positive, alpha-beta T cell', 'effector memory CD4-positive, alpha-beta T cell, terminally differentiated', 'CD14-positive, CD16-negative classical monocyte', 'dendritic cell', 'CD38-positive naive B cell', 'B cell, CD19-positive'
Labels:
馃搸 cell_types (9, bionty.CellType): 'cytotoxic T cell', 'CD8-positive, CD25-positive, alpha-beta regulatory T cell', 'CD16-positive, CD56-dim natural killer cell, human', 'CD4-positive, alpha-beta T cell', 'effector memory CD4-positive, alpha-beta T cell, terminally differentiated', 'CD14-positive, CD16-negative classical monocyte', 'dendritic cell', 'CD38-positive naive B cell', 'B cell, CD19-positive'
artifact2.view_lineage()
Compare features#
Here we compute shared genes:
artifact1_genes = artifact1.features["var"]
artifact2_genes = artifact2.features["var"]
shared_genes = artifact1_genes & artifact2_genes
len(shared_genes)
749
shared_genes.list("symbol")[:10]
['HES4',
'TNFRSF4',
'SSU72',
'PARK7',
'RBP7',
'SRM',
'MAD2L2',
'AGTRAP',
'TNFRSF1B',
'EFHD2']
Compare cell types#
artifact1_celltypes = artifact1.cell_types.all()
artifact2_celltypes = artifact2.cell_types.all()
shared_celltypes = artifact1_celltypes & artifact2_celltypes
shared_celltypes_names = shared_celltypes.list("name")
shared_celltypes_names
['CD16-positive, CD56-dim natural killer cell, human']
Load the individual artifacts#
We could either load the artifacts into memory or access them in backed
mode through .backed()
to lazily load their content.
Let鈥檚 load them into memory:
adata1 = artifact1.load()
adata2 = artifact2.load()
We can now subset the two collections by shared cell types:
adata1_subset = adata1[adata1.obs["cell_type"].isin(shared_celltypes_names)]
adata2_subset = adata2[adata2.obs["cell_type"].isin(shared_celltypes_names)]