{ "cells": [ { "cell_type": "markdown", "id": "84931453", "metadata": {}, "source": [ "## Single-cell transcriptomics\n", "\n", "Provided you completed the [3k PBMCs tutorial](https://scanpy-tutorials.readthedocs.io/en/latest/pbmc3k.html) of scanpy, we illustrate the depiction of marker gene information upon the KEGG pathway in this example." ] }, { "cell_type": "code", "execution_count": 1, "id": "ee93cf67", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import scanpy as sc\n", "import matplotlib\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "matplotlib.rcParams['figure.figsize'] = (6, 6)" ] }, { "cell_type": "code", "execution_count": 2, "id": "02cf5b8b", "metadata": {}, "outputs": [], "source": [ "adata = sc.read_10x_mtx(\n", " '../filtered_gene_bc_matrices/hg19', # the directory with the `.mtx` file\n", " var_names='gene_symbols') " ] }, { "cell_type": "code", "execution_count": 3, "id": "fe6bf58e", "metadata": {}, "outputs": [], "source": [ "adata.var_names_make_unique() " ] }, { "cell_type": "code", "execution_count": 4, "id": "dc0c2fff", "metadata": {}, "outputs": [], "source": [ "sc.pp.filter_cells(adata, min_genes=200)\n", "sc.pp.filter_genes(adata, min_cells=3)" ] }, { "cell_type": "code", "execution_count": 5, "id": "7e495ef7", "metadata": {}, "outputs": [], "source": [ "adata.var['mt'] = adata.var_names.str.startswith('MT-') # annotate the group of mitochondrial genes as 'mt'\n", "sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)" ] }, { "cell_type": "code", "execution_count": 6, "id": "b13dac9c", "metadata": {}, "outputs": [], "source": [ "adata = adata[adata.obs.n_genes_by_counts < 2500, :]\n", "adata = adata[adata.obs.pct_counts_mt < 5, :]" ] }, { "cell_type": "code", "execution_count": 7, "id": "9017ae3a", "metadata": {}, "outputs": [], "source": [ "sc.pp.normalize_total(adata, target_sum=1e4)" ] }, { "cell_type": "code", "execution_count": 8, "id": "135c5c79", "metadata": {}, "outputs": [], "source": [ "sc.pp.log1p(adata)" ] }, { "cell_type": "code", "execution_count": 9, "id": "6f4dfe14", "metadata": {}, "outputs": [], "source": [ "sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)" ] }, { "cell_type": "code", "execution_count": 10, "id": "a7ee6049", "metadata": {}, "outputs": [], "source": [ "adata = adata[:, adata.var.highly_variable]" ] }, { "cell_type": "code", "execution_count": 11, "id": "95007565", "metadata": {}, "outputs": [], "source": [ "sc.pp.regress_out(adata, ['total_counts', 'pct_counts_mt'])" ] }, { "cell_type": "code", "execution_count": 12, "id": "15e85440", "metadata": {}, "outputs": [], "source": [ "sc.pp.scale(adata, max_value=10)" ] }, { "cell_type": "code", "execution_count": 13, "id": "7f604976", "metadata": {}, "outputs": [], "source": [ "sc.tl.pca(adata, svd_solver='arpack')" ] }, { "cell_type": "code", "execution_count": 14, "id": "bffc16f9", "metadata": {}, "outputs": [], "source": [ "sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40)" ] }, { "cell_type": "code", "execution_count": 15, "id": "94b84e0d", "metadata": {}, "outputs": [], "source": [ "sc.tl.leiden(adata)" ] }, { "cell_type": "code", "execution_count": 16, "id": "0f72fc73", "metadata": {}, "outputs": [], "source": [ "sc.tl.rank_genes_groups(adata, 'leiden', method='t-test')" ] }, { "cell_type": "code", "execution_count": 17, "id": "a70ec155", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | 0 | \n", "1 | \n", "2 | \n", "3 | \n", "4 | \n", "5 | \n", "6 | \n", "7 | \n", "
---|---|---|---|---|---|---|---|---|
0 | \n", "LTB | \n", "CST3 | \n", "HLA-DPB1 | \n", "CCL5 | \n", "FCER1G | \n", "NKG7 | \n", "HLA-DPA1 | \n", "PF4 | \n", "
1 | \n", "IL32 | \n", "TYROBP | \n", "CD79A | \n", "NKG7 | \n", "AIF1 | \n", "CTSW | \n", "HLA-DPB1 | \n", "PPBP | \n", "
2 | \n", "CD2 | \n", "S100A8 | \n", "HLA-DRB1 | \n", "IL32 | \n", "LST1 | \n", "GZMB | \n", "HLA-DRB1 | \n", "SDPR | \n", "
3 | \n", "MAL | \n", "FCN1 | \n", "HLA-DPA1 | \n", "CST7 | \n", "TYROBP | \n", "GNLY | \n", "HLA-DMA | \n", "GNG11 | \n", "
4 | \n", "AQP3 | \n", "LGALS1 | \n", "CD79B | \n", "GZMA | \n", "CST3 | \n", "PRF1 | \n", "HLA-DQA1 | \n", "NRGN | \n", "
5 | \n", "HINT1 | \n", "S100A6 | \n", "HLA-DQA1 | \n", "CTSW | \n", "SAT1 | \n", "CST7 | \n", "HLA-DQB1 | \n", "SPARC | \n", "
6 | \n", "GIMAP7 | \n", "LGALS2 | \n", "MS4A1 | \n", "GZMK | \n", "COTL1 | \n", "GZMA | \n", "CST3 | \n", "CCL5 | \n", "
7 | \n", "GIMAP5 | \n", "AIF1 | \n", "HLA-DQB1 | \n", "PTPRCAP | \n", "IFITM3 | \n", "FGFBP2 | \n", "FCER1A | \n", "RGS18 | \n", "
8 | \n", "LDLRAP1 | \n", "GPX1 | \n", "CD37 | \n", "LYAR | \n", "CTSS | \n", "FCGR3A | \n", "TACR2 | \n", "GPX1 | \n", "
9 | \n", "TRABD2A | \n", "LST1 | \n", "TCL1A | \n", "KLRG1 | \n", "LGALS1 | \n", "SRGN | \n", "FSTL1 | \n", "TPM4 | \n", "
10 | \n", "ACAP1 | \n", "GSTP1 | \n", "HLA-DMA | \n", "GZMH | \n", "FCGR3A | \n", "CD247 | \n", "SYTL4 | \n", "GP9 | \n", "
11 | \n", "FYB | \n", "TYMP | \n", "LTB | \n", "PRF1 | \n", "S100A11 | \n", "SPON2 | \n", "RP11-324I22.4 | \n", "HIST1H2AC | \n", "
12 | \n", "TRAF3IP3 | \n", "CTSS | \n", "LINC00926 | \n", "CD2 | \n", "HLA-DPA1 | \n", "UBB | \n", "FAXDC2 | \n", "CD9 | \n", "
13 | \n", "ITM2A | \n", "FCER1G | \n", "HLA-DMB | \n", "SAMD3 | \n", "TYMP | \n", "FCER1G | \n", "HLA-DMB | \n", "AP001189.4 | \n", "
14 | \n", "PASK | \n", "COTL1 | \n", "HVCN1 | \n", "HOPX | \n", "NPC2 | \n", "ID2 | \n", "HP | \n", "TUBB1 | \n", "
15 | \n", "SCGB3A1 | \n", "S100A11 | \n", "EAF2 | \n", "APOBEC3G | \n", "CEBPB | \n", "CCL4 | \n", "RP4-781K5.2 | \n", "ITGA2B | \n", "
16 | \n", "TNFRSF4 | \n", "SAT1 | \n", "IRF8 | \n", "TIGIT | \n", "HLA-DRB1 | \n", "TYROBP | \n", "PPT2-EGFL8 | \n", "MPP1 | \n", "
17 | \n", "GIMAP4 | \n", "CYBA | \n", "FCRLA | \n", "NCR3 | \n", "S100A6 | \n", "HOPX | \n", "HIST1H2AH | \n", "CLU | \n", "
18 | \n", "RP11-18H21.1 | \n", "NPC2 | \n", "HLA-DOB | \n", "GIMAP7 | \n", "CYBA | \n", "GZMH | \n", "GPX1 | \n", "TMEM40 | \n", "
19 | \n", "CISH | \n", "GRN | \n", "PKIG | \n", "CCL4 | \n", "SRGN | \n", "CLIC3 | \n", "SAMD14 | \n", "CA2 | \n", "
20 | \n", "RP11-291B21.2 | \n", "MS4A6A | \n", "SMIM14 | \n", "CD247 | \n", "PYCARD | \n", "XCL2 | \n", "CLEC10A | \n", "ARPC1B | \n", "
21 | \n", "PTPRCAP | \n", "LGALS3 | \n", "CD72 | \n", "ID2 | \n", "IFI30 | \n", "CCL5 | \n", "PROK2 | \n", "NCOA4 | \n", "
22 | \n", "GPR183 | \n", "PYCARD | \n", "P2RX5 | \n", "FGFBP2 | \n", "HLA-DPB1 | \n", "PTPRCAP | \n", "PVALB | \n", "PTCRA | \n", "
23 | \n", "SELL | \n", "TALDO1 | \n", "SPIB | \n", "ZAP70 | \n", "ARPC1B | \n", "IGFBP7 | \n", "GSTP1 | \n", "TREML1 | \n", "
24 | \n", "SRSF5 | \n", "FCGRT | \n", "PTPRCAP | \n", "ARPC5L | \n", "ABI3 | \n", "AKR1C3 | \n", "HGD | \n", "FERMT3 | \n", "
25 | \n", "ATP6V0E2 | \n", "SRGN | \n", "PNOC | \n", "C9orf142 | \n", "FCN1 | \n", "APOBEC3G | \n", "ITGA2B | \n", "PGRMC1 | \n", "
26 | \n", "IL23A | \n", "NCF2 | \n", "BLNK | \n", "GIMAP4 | \n", "APOBEC3A | \n", "TTC38 | \n", "LINC00957 | \n", "SAT1 | \n", "
27 | \n", "ICOS | \n", "ARPC1B | \n", "KIAA0125 | \n", "LITAF | \n", "C1orf162 | \n", "CYBA | \n", "TDRP | \n", "LAMTOR1 | \n", "
28 | \n", "NAP1L4 | \n", "HLA-DRB1 | \n", "SWAP70 | \n", "HSP90AA1 | \n", "CDA | \n", "PRSS23 | \n", "LGALS2 | \n", "ODC1 | \n", "
29 | \n", "TNFRSF25 | \n", "CDA | \n", "CD19 | \n", "FCRL6 | \n", "C5AR1 | \n", "ARPC5L | \n", "RP11-252A24.3 | \n", "ACRBP | \n", "
30 | \n", "SIRPG | \n", "IFI30 | \n", "IGLL5 | \n", "KLRB1 | \n", "MAFB | \n", "PDIA3 | \n", "TGFB1I1 | \n", "SEPT5 | \n", "
31 | \n", "AKTIP | \n", "C1orf162 | \n", "LIMD2 | \n", "PRR5 | \n", "FGL2 | \n", "EFHD2 | \n", "GRN | \n", "MMD | \n", "
32 | \n", "CD96 | \n", "IFITM3 | \n", "ADAM28 | \n", "GYG1 | \n", "TNFRSF1B | \n", "S1PR5 | \n", "RAB6B | \n", "F13A1 | \n", "
33 | \n", "ILF3-AS1 | \n", "ALDH2 | \n", "SNX3 | \n", "JAKMIP1 | \n", "BLVRA | \n", "LITAF | \n", "S100P | \n", "SNCA | \n", "
34 | \n", "RIC3 | \n", "TNFSF13B | \n", "SNX29P2 | \n", "STK17A | \n", "BLOC1S1 | \n", "XCL1 | \n", "PBLD | \n", "SRGN | \n", "
35 | \n", "PPP1R2 | \n", "FPR1 | \n", "LAT2 | \n", "S1PR5 | \n", "TNFSF10 | \n", "GPR56 | \n", "PDZK1IP1 | \n", "LY6G6F | \n", "
36 | \n", "GPR171 | \n", "CEBPB | \n", "ARHGAP24 | \n", "SRGN | \n", "LGALS3 | \n", "ABI3 | \n", "DNAJC27 | \n", "CMTM5 | \n", "
37 | \n", "RORA | \n", "ASGR1 | \n", "FCGR2B | \n", "ARL6IP5 | \n", "ARRB2 | \n", "PLEKHF1 | \n", "RP11-390B4.5 | \n", "PTGS1 | \n", "
38 | \n", "CCDC66 | \n", "LINC00936 | \n", "MZB1 | \n", "GZMB | \n", "NCF2 | \n", "HAVCR2 | \n", "CCDC122 | \n", "GP1BA | \n", "
39 | \n", "RP11-589C21.6 | \n", "IGSF6 | \n", "PLD4 | \n", "PSME1 | \n", "TNFSF13B | \n", "ZAP70 | \n", "ZNF594 | \n", "MYL9 | \n", "
40 | \n", "DENND2D | \n", "FOLR3 | \n", "BTK | \n", "TMBIM6 | \n", "STX11 | \n", "NCR3 | \n", "SUOX | \n", "RP11-367G6.3 | \n", "
41 | \n", "SSBP1 | \n", "CNPY3 | \n", "PRKCB | \n", "RORA | \n", "FCGR2A | \n", "PTGDR | \n", "ITGB3 | \n", "DYNLL1 | \n", "
42 | \n", "SUCLG2 | \n", "SERPINB1 | \n", "IGJ | \n", "GIMAP5 | \n", "OAS1 | \n", "SAMD3 | \n", "SEPT4 | \n", "TPM1 | \n", "
43 | \n", "AP3M2 | \n", "APOBEC3A | \n", "C16orf74 | \n", "CD160 | \n", "FCGRT | \n", "C5orf56 | \n", "CYBA | \n", "SNX3 | \n", "
44 | \n", "STK17A | \n", "PLBD1 | \n", "PPP1R14A | \n", "SNRPB | \n", "CTD-2006K23.1 | \n", "PSMB8 | \n", "SYP | \n", "TALDO1 | \n", "
45 | \n", "MDS2 | \n", "BLVRA | \n", "RPL22L1 | \n", "PTGDR | \n", "SYTL4 | \n", "CCL3 | \n", "CD1C | \n", "MARCH2 | \n", "
46 | \n", "CORO1B | \n", "S100A12 | \n", "FCRL2 | \n", "TERF2IP | \n", "RGS19 | \n", "PSME1 | \n", "AP001189.4 | \n", "C2orf88 | \n", "
47 | \n", "MAGEH1 | \n", "CCL3 | \n", "PARP1 | \n", "BUB3 | \n", "SMCO4 | \n", "FCRL6 | \n", "PHACTR1 | \n", "CLEC1B | \n", "
48 | \n", "SNHG8 | \n", "VAMP8 | \n", "GUCD1 | \n", "ORAI1 | \n", "C20orf27 | \n", "C9orf142 | \n", "DRAXIN | \n", "AC147651.3 | \n", "
49 | \n", "CCNG1 | \n", "SMCO4 | \n", "RP5-887A10.1 | \n", "CDC42EP3 | \n", "BTK | \n", "RAMP1 | \n", "COLGALT2 | \n", "GAS2L1 | \n", "