{ "cells": [ { "cell_type": "markdown", "id": "55e261bb", "metadata": {}, "source": [ "## Example usage with PyDESeq2\n", "\n", "The codes are derived from [Step-by-step PyDESeq2 workflow](https://pydeseq2.readthedocs.io/en/latest/auto_examples/plot_step_by_step.html).\n", "The dataset used was from the paper investigating BK polyomavirus infection in urothelial cells ([Baker et al. Oncogene. 2022](https://www.nature.com/articles/s41388-022-02235-8))." ] }, { "cell_type": "code", "execution_count": 1, "id": "2743dfe7", "metadata": {}, "outputs": [], "source": [ "import os\n", "import pickle as pkl\n", "\n", "from pydeseq2.dds import DeseqDataSet\n", "from pydeseq2.ds import DeseqStats\n", "from pydeseq2.utils import load_example_data" ] }, { "cell_type": "code", "execution_count": 2, "id": "61388295", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | A1BG | \n", "A1BG-AS1 | \n", "A1CF | \n", "A2M | \n", "A2M-AS1 | \n", "A2ML1 | \n", "A2MP1 | \n", "A3GALT2 | \n", "A4GALT | \n", "A4GNT | \n", "... | \n", "ZWILCH | \n", "ZWINT | \n", "ZXDA | \n", "ZXDB | \n", "ZXDC | \n", "ZYG11A | \n", "ZYG11B | \n", "ZYX | \n", "ZZEF1 | \n", "ZZZ3 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SRR14509882 | \n", "49 | \n", "57 | \n", "0 | \n", "0 | \n", "0 | \n", "718 | \n", "1 | \n", "0 | \n", "3682 | \n", "0 | \n", "... | \n", "574 | \n", "793 | \n", "223 | \n", "1320 | \n", "882 | \n", "1 | \n", "1791 | \n", "2787 | \n", "2276 | \n", "1576 | \n", "
SRR14509883 | \n", "106 | \n", "82 | \n", "0 | \n", "12 | \n", "6 | \n", "83 | \n", "0 | \n", "0 | \n", "1715 | \n", "1 | \n", "... | \n", "731 | \n", "1266 | \n", "44 | \n", "471 | \n", "389 | \n", "5 | \n", "632 | \n", "2127 | \n", "871 | \n", "676 | \n", "
SRR14509884 | \n", "67 | \n", "36 | \n", "2 | \n", "25 | \n", "5 | \n", "499 | \n", "0 | \n", "0 | \n", "2188 | \n", "0 | \n", "... | \n", "1172 | \n", "2256 | \n", "122 | \n", "741 | \n", "732 | \n", "14 | \n", "915 | \n", "3463 | \n", "1419 | \n", "1009 | \n", "
SRR14509885 | \n", "85 | \n", "67 | \n", "0 | \n", "2 | \n", "3 | \n", "393 | \n", "0 | \n", "0 | \n", "2155 | \n", "2 | \n", "... | \n", "1348 | \n", "3155 | \n", "187 | \n", "1566 | \n", "738 | \n", "26 | \n", "1198 | \n", "3738 | \n", "2044 | \n", "1051 | \n", "
SRR14509886 | \n", "29 | \n", "42 | \n", "0 | \n", "2 | \n", "2 | \n", "76 | \n", "0 | \n", "0 | \n", "2834 | \n", "0 | \n", "... | \n", "298 | \n", "137 | \n", "205 | \n", "743 | \n", "852 | \n", "0 | \n", "1548 | \n", "2878 | \n", "1952 | \n", "1297 | \n", "
5 rows × 29744 columns
\n", "\n", " | Run | \n", "Assay Type | \n", "AvgSpotLen | \n", "Bases | \n", "BioProject | \n", "BioSample | \n", "Bytes | \n", "Center Name | \n", "Consent | \n", "DATASTORE filetype | \n", "... | \n", "LibrarySelection | \n", "LibrarySource | \n", "Organism | \n", "Platform | \n", "ReleaseDate | \n", "Sample Name | \n", "source_name | \n", "SRA Study | \n", "Tissue | \n", "viral_infection | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SRR14509882 | \n", "SRR14509882 | \n", "RNA-Seq | \n", "300 | \n", "11066499600 | \n", "PRJNA728925 | \n", "SAMN19107552 | \n", "3333844788 | \n", "GEO | \n", "public | \n", "sra,fastq | \n", "... | \n", "cDNA | \n", "TRANSCRIPTOMIC | \n", "Homo sapiens | \n", "ILLUMINA | \n", "2022-02-23T00:00:00Z | \n", "GSM5289794 | \n", "Normal human urothelial cells | \n", "SRP319465 | \n", "Ureter | \n", "BKPyV (Dunlop) MOI=1 | \n", "
SRR14509883 | \n", "SRR14509883 | \n", "RNA-Seq | \n", "302 | \n", "8436386308 | \n", "PRJNA728925 | \n", "SAMN19107551 | \n", "2801216097 | \n", "GEO | \n", "public | \n", "fastq,sra | \n", "... | \n", "cDNA | \n", "TRANSCRIPTOMIC | \n", "Homo sapiens | \n", "ILLUMINA | \n", "2022-02-23T00:00:00Z | \n", "GSM5289795 | \n", "Normal human urothelial cells | \n", "SRP319465 | \n", "Ureter | \n", "BKPyV (Dunlop) MOI=1 | \n", "
SRR14509884 | \n", "SRR14509884 | \n", "RNA-Seq | \n", "300 | \n", "9742943700 | \n", "PRJNA728925 | \n", "SAMN19107550 | \n", "3188119940 | \n", "GEO | \n", "public | \n", "fastq,sra | \n", "... | \n", "cDNA | \n", "TRANSCRIPTOMIC | \n", "Homo sapiens | \n", "ILLUMINA | \n", "2022-02-23T00:00:00Z | \n", "GSM5289796 | \n", "Normal human urothelial cells | \n", "SRP319465 | \n", "Ureter | \n", "BKPyV (Dunlop) MOI=1 | \n", "
SRR14509885 | \n", "SRR14509885 | \n", "RNA-Seq | \n", "300 | \n", "11410353600 | \n", "PRJNA728925 | \n", "SAMN19107549 | \n", "3722953816 | \n", "GEO | \n", "public | \n", "sra,fastq | \n", "... | \n", "cDNA | \n", "TRANSCRIPTOMIC | \n", "Homo sapiens | \n", "ILLUMINA | \n", "2022-02-23T00:00:00Z | \n", "GSM5289797 | \n", "Normal human urothelial cells | \n", "SRP319465 | \n", "Ureter | \n", "BKPyV (Dunlop) MOI=1 | \n", "
SRR14509886 | \n", "SRR14509886 | \n", "RNA-Seq | \n", "300 | \n", "9985769400 | \n", "PRJNA728925 | \n", "SAMN19107548 | \n", "3153799143 | \n", "GEO | \n", "public | \n", "sra,fastq | \n", "... | \n", "cDNA | \n", "TRANSCRIPTOMIC | \n", "Homo sapiens | \n", "ILLUMINA | \n", "2022-02-23T00:00:00Z | \n", "GSM5289798 | \n", "Normal human urothelial cells | \n", "SRP319465 | \n", "Ureter | \n", "No infection | \n", "
5 rows × 27 columns
\n", "\n", " | baseMean | \n", "log2FoldChange | \n", "lfcSE | \n", "stat | \n", "pvalue | \n", "padj | \n", "
---|---|---|---|---|---|---|
A1BG | \n", "65.042617 | \n", "-0.174495 | \n", "0.274393 | \n", "-0.635932 | \n", "0.524821 | \n", "0.998845 | \n", "
A1BG-AS1 | \n", "60.814662 | \n", "-0.097512 | \n", "0.260997 | \n", "-0.373613 | \n", "0.708692 | \n", "0.998845 | \n", "
A1CF | \n", "0.234785 | \n", "-0.438579 | \n", "2.539843 | \n", "-0.172680 | \n", "0.862903 | \n", "NaN | \n", "
A2M | \n", "3.841866 | \n", "0.917285 | \n", "0.822768 | \n", "1.114876 | \n", "0.264904 | \n", "0.965674 | \n", "
A2M-AS1 | \n", "3.520622 | \n", "0.412318 | \n", "0.492133 | \n", "0.837817 | \n", "0.402133 | \n", "0.994437 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
ZYG11A | \n", "4.020418 | \n", "0.989967 | \n", "0.716537 | \n", "1.381599 | \n", "0.167095 | \n", "0.906121 | \n", "
ZYG11B | \n", "1387.374886 | \n", "-0.189671 | \n", "0.126651 | \n", "-1.497585 | \n", "0.134241 | \n", "0.873862 | \n", "
ZYX | \n", "2956.789644 | \n", "-0.056822 | \n", "0.104073 | \n", "-0.545980 | \n", "0.585079 | \n", "0.998845 | \n", "
ZZEF1 | \n", "1916.557304 | \n", "-0.129015 | \n", "0.110549 | \n", "-1.167041 | \n", "0.243194 | \n", "0.963318 | \n", "
ZZZ3 | \n", "1102.880698 | \n", "-0.019320 | \n", "0.093984 | \n", "-0.205573 | \n", "0.837124 | \n", "0.998845 | \n", "
29744 rows × 6 columns
\n", "