{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Tutorial of pymmdb" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from pymmdb import MMDB" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### 1. Create a MMDB object that provides access to the datasets in scMMDB database." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MMDB(storage_path=./, server_address=https://mmdb.piaqia.com/)\n" ] } ], "source": [ "mmdb = MMDB('./') # creates a new MMDB object with the current directory as the root\n", "print(mmdb) # prints the configuration of the MMDB object" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### 2. Check the scMMDB's details." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MMDB Information: \n", "Species: Homo sapiens, Mus musculus, Macaca mulatta, Sus scrofa\n", "Tissue: human cell line , human cell line, human blood, human bone marrow, human kidney, human primary motor cortex, human intra-abdominal lymph node tumor, human brain, human achilles tendon, mouse retina, mouse colon, mouse brain cortex, mouse cell line, mouse forebrain, mouse kidney, mouse primary motor cortex, mouse brain, human brain cortex, human jejunum, mouse thymic epithelium, human lung, bone marrow, human liver, human glioblastoma, human blood/skin, human tumor, macaca vaginal, mouse submandibular gland, mouse aorta, mouse tumor, mouse bone marrow, mouse mesenteric lymph nodes, mouse glioblastoma, mouse spleen/lymph nodes, mouse spleen, mouse epididymal adipose, mouse liver, pig liver\n", "Disease: none, cancer, diffuse small lymphocytic lymphoma of the lymph node, tendinopathy, pearson syndrome, alzheimer's disease, non-small-cell lung cancer, acute myeloid leukemia, atherosclerosis, acute lymphoblastic leukemia, glioblastoma, COVID-19, B cell acute lymphoblastic leukemia, HIV, obese, epilepsy, multisystem inflammatory syndrome; COVID-19, multiple sclerosis, peruvian tuberculosis disease, cutaneous T cell lymphoma, melanoma, SHIV infection, salivary gland squamous cell carcinoma, aortic aneurysm, breast cancer, nonalcoholic fatty liver disease\n", "Technology: SNARE-seq, Paired-seq, Novaseq, DOGMA-seq, SHARE-seq, NEAT-seq, sci-CAR-seq, HiSeq, ASAP-seq, CITE-seq, ECCITE-seq, Perturb-CITE-seq, REAP-seq, TEA-seq\n", "Technology Type: ATAC_RNA, ATAC_PROTEIN, RNA_PROTEIN, ATAC_RNA_PROTEIN\n" ] } ], "source": [ "mmdb.list_mmdb_info()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Species: Homo sapiens, Mus musculus, Macaca mulatta, Sus scrofa\n" ] } ], "source": [ "mmdb.list_species() # list all species in the database" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Disease: none, cancer, diffuse small lymphocytic lymphoma of the lymph node, tendinopathy, pearson syndrome, alzheimer's disease, non-small-cell lung cancer, acute myeloid leukemia, atherosclerosis, acute lymphoblastic leukemia, glioblastoma, COVID-19, B cell acute lymphoblastic leukemia, HIV, obese, epilepsy, multisystem inflammatory syndrome; COVID-19, multiple sclerosis, peruvian tuberculosis disease, cutaneous T cell lymphoma, melanoma, SHIV infection, salivary gland squamous cell carcinoma, aortic aneurysm, breast cancer, nonalcoholic fatty liver disease\n" ] } ], "source": [ "mmdb.list_disease() # list all diseases in the database" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Tissue: human cell line , human cell line, human blood, human bone marrow, human kidney, human primary motor cortex, human intra-abdominal lymph node tumor, human brain, human achilles tendon, mouse retina, mouse colon, mouse brain cortex, mouse cell line, mouse forebrain, mouse kidney, mouse primary motor cortex, mouse brain, human brain cortex, human jejunum, mouse thymic epithelium, human lung, bone marrow, human liver, human glioblastoma, human blood/skin, human tumor, macaca vaginal, mouse submandibular gland, mouse aorta, mouse tumor, mouse bone marrow, mouse mesenteric lymph nodes, mouse glioblastoma, mouse spleen/lymph nodes, mouse spleen, mouse epididymal adipose, mouse liver, pig liver\n" ] } ], "source": [ "mmdb.list_tissue() # list all tissues in the database" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Technology: SNARE-seq, Paired-seq, Novaseq, DOGMA-seq, SHARE-seq, NEAT-seq, sci-CAR-seq, HiSeq, ASAP-seq, CITE-seq, ECCITE-seq, Perturb-CITE-seq, REAP-seq, TEA-seq\n" ] } ], "source": [ "mmdb.list_technology() # list all technologies in the database" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Technology Type: ATAC_RNA, ATAC_PROTEIN, RNA_PROTEIN, ATAC_RNA_PROTEIN\n" ] } ], "source": [ "mmdb.list_technology_type() # list all technology types in the database" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### 3. Load the dataset in the scMMDB" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDSpeciesTissueDiseaseTechnology_typeTechnologyCell_numTitle
82Dataset_C_004Homo sapienshuman bloodatherosclerosisRNA_PROTEINCITE-seq5232Single-cell immune landscape of human atherosc...
83Dataset_C_005Homo sapienshuman bloodacute lymphoblastic leukemiaRNA_PROTEINCITE-seq16450Single-cell antigen-specific landscape of CAR ...
84Dataset_C_006Homo sapienshuman bloodacute lymphoblastic leukemiaRNA_PROTEINCITE-seq23287Single-cell antigen-specific landscape of CAR ...
87Dataset_C_009Homo sapienshuman bloodacute lymphoblastic leukemiaRNA_PROTEINCITE-seq30484Single-cell antigen-specific landscape of CAR ...
88Dataset_C_010Homo sapienshuman bloodacute lymphoblastic leukemiaRNA_PROTEINCITE-seq31105Single-cell antigen-specific landscape of CAR ...
\n", "
" ], "text/plain": [ " ID Species Tissue Disease \\\n", "82 Dataset_C_004 Homo sapiens human blood atherosclerosis \n", "83 Dataset_C_005 Homo sapiens human blood acute lymphoblastic leukemia \n", "84 Dataset_C_006 Homo sapiens human blood acute lymphoblastic leukemia \n", "87 Dataset_C_009 Homo sapiens human blood acute lymphoblastic leukemia \n", "88 Dataset_C_010 Homo sapiens human blood acute lymphoblastic leukemia \n", "\n", " Technology_type Technology Cell_num \\\n", "82 RNA_PROTEIN CITE-seq 5232 \n", "83 RNA_PROTEIN CITE-seq 16450 \n", "84 RNA_PROTEIN CITE-seq 23287 \n", "87 RNA_PROTEIN CITE-seq 30484 \n", "88 RNA_PROTEIN CITE-seq 31105 \n", "\n", " Title \n", "82 Single-cell immune landscape of human atherosc... \n", "83 Single-cell antigen-specific landscape of CAR ... \n", "84 Single-cell antigen-specific landscape of CAR ... \n", "87 Single-cell antigen-specific landscape of CAR ... \n", "88 Single-cell antigen-specific landscape of CAR ... " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mmdb.list_dataset(species='Homo sapiens', tissue='human blood', disease=None, technology_type='RNA_PROTEIN').head(5) # list datasets information under the corresponding conditions." ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Load dataset: [Dataset_C_004]\n" ] } ], "source": [ "Dataset_A_000 = mmdb.load_dataset('Dataset_C_004') # load the dataset based on the Dataset ID." ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'RNA': AnnData object with n_obs × n_vars = 5232 × 100\n", " obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'nCount_ADT', 'nFeature_ADT', 'sample', 'tissue', 'celltype', 'ident', 'RNA.weight', 'ADT.weight', 'wsnn_res.1', 'seurat_clusters'\n", " var: 'features'\n", " uns: 'neighbors'\n", " obsm: 'X_pca_rna', 'X_umap_adt', 'X_umap_rna', 'X_wnnUMAP'\n", " varm: 'PCA_RNA'\n", " obsp: 'distances',\n", " 'PROTEIN': AnnData object with n_obs × n_vars = 5232 × 21\n", " obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'nCount_ADT', 'nFeature_ADT', 'sample', 'tissue', 'celltype', 'ident', 'RNA.weight', 'ADT.weight', 'wsnn_res.1', 'seurat_clusters'\n", " var: 'features'\n", " obsm: 'X_pca_adt', 'X_umap_adt', 'X_umap_rna', 'X_wnnUMAP'\n", " varm: 'PCA_ADT'}" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Dataset_A_000 # check the dataset information" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "AnnData object with n_obs × n_vars = 5232 × 100\n", " obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'nCount_ADT', 'nFeature_ADT', 'sample', 'tissue', 'celltype', 'ident', 'RNA.weight', 'ADT.weight', 'wsnn_res.1', 'seurat_clusters'\n", " var: 'features'\n", " uns: 'neighbors'\n", " obsm: 'X_pca_rna', 'X_umap_adt', 'X_umap_rna', 'X_wnnUMAP'\n", " varm: 'PCA_RNA'\n", " obsp: 'distances'" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Dataset_A_000['RNA']" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "AnnData object with n_obs × n_vars = 5232 × 21\n", " obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'nCount_ADT', 'nFeature_ADT', 'sample', 'tissue', 'celltype', 'ident', 'RNA.weight', 'ADT.weight', 'wsnn_res.1', 'seurat_clusters'\n", " var: 'features'\n", " obsm: 'X_pca_adt', 'X_umap_adt', 'X_umap_rna', 'X_wnnUMAP'\n", " varm: 'PCA_ADT'" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Dataset_A_000['PROTEIN']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.19" } }, "nbformat": 4, "nbformat_minor": 2 }