From 012ce1e09f273d5d27cd3d9ef8deadc5fe8734c8 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Fri, 24 Mar 2023 11:27:45 -0700 Subject: [PATCH 1/4] feat: allow user to provide own chain file for pyliftover (#136) - Can set via UTADatabase parameters, environment variables (LIFTOVER_CHAIN_37_TO_38, LIFTOVER_CHAIN_38_TO_37), else will use pyliftover default methods to download from UCSC --- cool_seq_tool/data_sources/uta_database.py | 38 +++++++++++++++++++--- cool_seq_tool/version.py | 2 +- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/cool_seq_tool/data_sources/uta_database.py b/cool_seq_tool/data_sources/uta_database.py index faa453d2..089b0a3e 100644 --- a/cool_seq_tool/data_sources/uta_database.py +++ b/cool_seq_tool/data_sources/uta_database.py @@ -21,25 +21,53 @@ # use `bound` to upper-bound UTADatabase or child classes UTADatabaseType = TypeVar("UTADatabaseType", bound="UTADatabase") +# Environment variables for paths to chain files for pyliftover +LIFTOVER_CHAIN_37_TO_38 = environ.get("LIFTOVER_CHAIN_37_TO_38") +LIFTOVER_CHAIN_38_TO_37 = environ.get("LIFTOVER_CHAIN_38_TO_37") + class UTADatabase: """Class for connecting and querying UTA database.""" - def __init__(self, db_url: str = UTA_DB_URL, db_pwd: str = "") -> None: + def __init__( + self, + db_url: str = UTA_DB_URL, + db_pwd: str = "", + chain_file_37_to_38: Optional[str] = None, + chain_file_38_to_37: Optional[str] = None + ) -> None: """Initialize DB class. Downstream libraries should use the create() method to construct a new instance: await UTADatabase.create() - :param str db_url: PostgreSQL connection URL + :param db_url: PostgreSQL connection URL Format: `driver://user:pass@host/database/schema` - :param str db_pwd: User's password for uta database + :param db_pwd: User's password for uta database + :param chain_file_37_to_38: Optional path to chain file for 37 to 38 assembly. + This is used for pyliftover. If this is not provided, will check to see if + LIFTOVER_CHAIN_37_TO_38 env var is set. If neither is provided, will allow + pyliftover to download a chain file from UCSC + :param chain_file_38_to_37: Optional path to chain file for 38 to 37 assembly. + This is used for pyliftover. If this is not provided, will check to see if + LIFTOVER_CHAIN_38_TO_37 env var is set. If neither is provided, will allow + pyliftover to download a chain file from UCSC """ self.schema = None self.db_url = db_url self.db_pwd = db_pwd self._connection_pool = None self.args = self._get_conn_args() - self.liftover_37_to_38 = LiftOver("hg19", "hg38") - self.liftover_38_to_37 = LiftOver("hg38", "hg19") + + chain_file_37_to_38 = chain_file_37_to_38 or LIFTOVER_CHAIN_37_TO_38 + if chain_file_37_to_38: + self.liftover_37_to_38 = LiftOver(chain_file_37_to_38) + else: + self.liftover_37_to_38 = LiftOver("hg19", "hg38") + + chain_file_38_to_37 = chain_file_38_to_37 or LIFTOVER_CHAIN_38_TO_37 + if chain_file_38_to_37: + self.liftover_38_to_37 = LiftOver(chain_file_38_to_37) + else: + self.liftover_38_to_37 = LiftOver("hg38", "hg19") @staticmethod def _update_db_url(db_pwd: str, db_url: str) -> str: diff --git a/cool_seq_tool/version.py b/cool_seq_tool/version.py index 9cb17e79..c11f861a 100644 --- a/cool_seq_tool/version.py +++ b/cool_seq_tool/version.py @@ -1 +1 @@ -__version__ = "0.1.8" +__version__ = "0.1.9" From ab8a276e9e941d7dd4e0fc425a7aa50e1686800e Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Fri, 31 Mar 2023 08:17:30 -0700 Subject: [PATCH 2/4] refactor!: SeqRepoAccess extends vrs-python's SeqRepoDataProxy class (#138) - SeqRepoAccess translate_identifier method: parameter changed from target_namespace --> target_namespaces - SeqRepoAccess aliases method: renamed to translate_alias --- README.md | 2 +- cool_seq_tool/__init__.py | 3 +- cool_seq_tool/cool_seq_tool.py | 29 +++++++------- cool_seq_tool/data_sources/seqrepo_access.py | 41 +++++++++----------- cool_seq_tool/version.py | 2 +- tests/unit/conftest.py | 12 ++++++ tests/unit/test_alignment_mapper.py | 7 ++-- tests/unit/test_mane_transcript.py | 4 +- tests/unit/test_seqrepo_access.py | 16 ++++---- 9 files changed, 63 insertions(+), 53 deletions(-) create mode 100644 tests/unit/conftest.py diff --git a/README.md b/README.md index 3f6fe996..854bdfab 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ If you do not wish to use the default, you must set the environment variable `UT #### SeqRepo `cool-seq-tool` relies on [seqrepo](https://github.com/biocommons/biocommons.seqrepo), which you must download yourself. -Use the `SEQREPO_DATA_PATH` environment variable to set the path of an already existing SeqRepo directory. The default is `/usr/local/share/seqrepo/latest`. +Use the `SEQREPO_ROOT_DIR` environment variable to set the path of an already existing SeqRepo directory. The default is `/usr/local/share/seqrepo/latest`. From the _root_ directory: ``` diff --git a/cool_seq_tool/__init__.py b/cool_seq_tool/__init__.py index e7cb917f..8641a4e4 100644 --- a/cool_seq_tool/__init__.py +++ b/cool_seq_tool/__init__.py @@ -16,8 +16,7 @@ UTA_DB_URL = environ.get("UTA_DB_URL", "postgresql://uta_admin@localhost:5433/uta/uta_20210129") -SEQREPO_DATA_PATH = Path(environ.get("SEQREPO_DATA_PATH", - "/usr/local/share/seqrepo/latest")) +SEQREPO_ROOT_DIR = environ.get("SEQREPO_ROOT_DIR", "/usr/local/share/seqrepo/latest") TRANSCRIPT_MAPPINGS_PATH = Path(environ.get("TRANSCRIPT_MAPPINGS_PATH", f"{APP_ROOT}/data/transcript_mapping.tsv")) diff --git a/cool_seq_tool/cool_seq_tool.py b/cool_seq_tool/cool_seq_tool.py index 90a67235..b75553b3 100644 --- a/cool_seq_tool/cool_seq_tool.py +++ b/cool_seq_tool/cool_seq_tool.py @@ -3,17 +3,17 @@ from typing import Optional, Union, List, Tuple, Dict from pathlib import Path +from biocommons.seqrepo import SeqRepo from gene.query import QueryHandler as GeneQueryHandler -from cool_seq_tool import logger +from cool_seq_tool import logger, SEQREPO_ROOT_DIR from cool_seq_tool.data_sources.alignment_mapper import AlignmentMapper from cool_seq_tool.schemas import Assembly, GenomicData, TranscriptExonData, \ ResidueMode, GenomicDataResponse, ServiceMeta, TranscriptExonDataResponse from cool_seq_tool.data_sources import MANETranscript, MANETranscriptMappings,\ SeqRepoAccess, TranscriptMappings, UTADatabase, GeneNormalizer -from cool_seq_tool import SEQREPO_DATA_PATH, \ - TRANSCRIPT_MAPPINGS_PATH, LRG_REFSEQGENE_PATH, MANE_SUMMARY_PATH, \ - UTA_DB_URL +from cool_seq_tool import TRANSCRIPT_MAPPINGS_PATH, LRG_REFSEQGENE_PATH, \ + MANE_SUMMARY_PATH, UTA_DB_URL from cool_seq_tool.version import __version__ @@ -21,33 +21,36 @@ class CoolSeqTool: """Class to initialize data sources.""" def __init__( - self, seqrepo_data_path: Path = SEQREPO_DATA_PATH, + self, transcript_file_path: Path = TRANSCRIPT_MAPPINGS_PATH, lrg_refseqgene_path: Path = LRG_REFSEQGENE_PATH, mane_data_path: Path = MANE_SUMMARY_PATH, db_url: str = UTA_DB_URL, db_pwd: str = "", - gene_query_handler: GeneQueryHandler = None, - gene_db_url: str = "", gene_db_region: str = "us-east-2" + gene_query_handler: Optional[GeneQueryHandler] = None, + gene_db_url: str = "", gene_db_region: str = "us-east-2", + sr: Optional[SeqRepo] = None ) -> None: """Initialize CoolSeqTool class - :param Path seqrepo_data_path: The path to the seqrepo directory. :param Path transcript_file_path: The path to transcript_mappings.tsv :param Path lrg_refseqgene_path: The path to LRG_RefSeqGene :param Path mane_data_path: Path to RefSeq MANE summary data :param str db_url: PostgreSQL connection URL Format: `driver://user:pass@host/database/schema` :param str db_pwd: User's password for uta database - :param GeneQueryHandler gene_query_handler: Gene normalizer query handler - instance. If this is provided, will use a current instance. If this is not - provided, will create a new instance. + :param Optional[GeneQueryHandler] gene_query_handler: Gene normalizer query + handler instance. If this is provided, will use a current instance. If this + is not provided, will create a new instance. :param str gene_db_url: URL to gene normalizer dynamodb. Only used when `gene_query_handler` is `None`. :param str gene_db_region: AWS region for gene normalizer db. Only used when `gene_query_handler` is `None`. + :param Optional[SeqRepo] sr: SeqRepo instance. If this is not provided, will + create a new instance. """ - self.seqrepo_access = SeqRepoAccess( - seqrepo_data_path=seqrepo_data_path) + if not sr: + sr = SeqRepo(root_dir=SEQREPO_ROOT_DIR) + self.seqrepo_access = SeqRepoAccess(sr) self.transcript_mappings = TranscriptMappings( transcript_file_path=transcript_file_path, lrg_refseqgene_path=lrg_refseqgene_path) diff --git a/cool_seq_tool/data_sources/seqrepo_access.py b/cool_seq_tool/data_sources/seqrepo_access.py index 4e3fc83b..ef9e0f05 100644 --- a/cool_seq_tool/data_sources/seqrepo_access.py +++ b/cool_seq_tool/data_sources/seqrepo_access.py @@ -1,28 +1,22 @@ """A module for accessing SeqRepo.""" from typing import Optional, List, Tuple, Union from os import environ -from pathlib import Path -from biocommons.seqrepo import SeqRepo +from ga4gh.vrs.dataproxy import SeqRepoDataProxy from cool_seq_tool.schemas import ResidueMode -from cool_seq_tool import SEQREPO_DATA_PATH, logger +from cool_seq_tool import logger from cool_seq_tool.data_sources.residue_mode import get_inter_residue_pos -class SeqRepoAccess: +class SeqRepoAccess(SeqRepoDataProxy): """The SeqRepoAccess class.""" - def __init__(self, seqrepo_data_path: Path = SEQREPO_DATA_PATH) -> None: - """Initialize the SeqRepoAccess class. - :param Path seqrepo_data_path: The path to the seqrepo directory. - """ - environ["SEQREPO_LRU_CACHE_MAXSIZE"] = "none" - self.seqrepo_client = SeqRepo(seqrepo_data_path) + environ["SEQREPO_LRU_CACHE_MAXSIZE"] = "none" def get_reference_sequence( - self, ac: str, start: Optional[int] = None, end: Optional[int] = None, - residue_mode: str = ResidueMode.RESIDUE + self, ac: str, start: Optional[int] = None, end: Optional[int] = None, + residue_mode: str = ResidueMode.RESIDUE ) -> Tuple[str, Optional[str]]: """Get reference sequence for an accession given a start and end position. If `start` and `end` are not given, it will return the entire reference sequence @@ -45,7 +39,7 @@ def get_reference_sequence( if start == end: end += 1 try: - sequence = self.seqrepo_client.fetch(ac, start=start, end=end) + sequence = self.sr.fetch(ac, start=start, end=end) except KeyError: msg = f"Accession, {ac}, not found in SeqRepo" logger.warning(msg) @@ -77,7 +71,7 @@ def get_reference_sequence( return sequence, None def translate_identifier( - self, ac: str, target_namespace: Optional[Union[str, List[str]]] = None + self, ac: str, target_namespaces: Optional[Union[str, List[str]]] = None ) -> Tuple[List[str], Optional[str]]: """Return list of identifiers for accession. @@ -86,8 +80,8 @@ def translate_identifier( :return: List of identifiers, warning """ try: - ga4gh_identifiers = self.seqrepo_client.translate_identifier( - ac, target_namespaces=target_namespace) + ga4gh_identifiers = self.sr.translate_identifier( + ac, target_namespaces=target_namespaces) except KeyError: msg = f"SeqRepo unable to get translated identifiers for {ac}" logger.warning(msg) @@ -95,22 +89,23 @@ def translate_identifier( else: return ga4gh_identifiers, None - def aliases(self, - input_str: str) -> Tuple[List[Optional[str]], Optional[str]]: + def translate_alias( + self, input_str: str + ) -> Tuple[List[Optional[str]], Optional[str]]: """Get aliases for a given input. :param str input_str: Input to get aliases for :return: List of aliases, warning """ try: - return self.seqrepo_client.translate_alias(input_str), None + return self.sr.translate_alias(input_str), None except KeyError: msg = f"SeqRepo could not translate alias {input_str}" logger.warning(msg) return [], msg def chromosome_to_acs( - self, chromosome: str + self, chromosome: str ) -> Tuple[Optional[List[str]], Optional[str]]: """Get accessions for a chromosome @@ -119,8 +114,8 @@ def chromosome_to_acs( """ acs = [] for assembly in ["GRCh38", "GRCh37"]: - tmp_acs = self.translate_identifier(f"{assembly}:chr{chromosome}", - target_namespace="refseq")[0] + tmp_acs, _ = self.translate_identifier(f"{assembly}:chr{chromosome}", + target_namespaces="refseq") for ac in tmp_acs: acs.append(ac.split("refseq:")[-1]) if acs: @@ -134,7 +129,7 @@ def ac_to_chromosome(self, ac: str) -> Tuple[Optional[str], Optional[str]]: :param str ac: Accession :return: Chromosome, warning """ - aliases, warning = self.aliases(ac) + aliases, _ = self.translate_alias(ac) aliases = ([a.split(":")[-1] for a in aliases if a.startswith("GRCh") and "." not in a and "chr" not in a] or [None])[0] # noqa: E501 if aliases is None: diff --git a/cool_seq_tool/version.py b/cool_seq_tool/version.py index c11f861a..569b1212 100644 --- a/cool_seq_tool/version.py +++ b/cool_seq_tool/version.py @@ -1 +1 @@ -__version__ = "0.1.9" +__version__ = "0.1.10" diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py new file mode 100644 index 00000000..4a7bd3b1 --- /dev/null +++ b/tests/unit/conftest.py @@ -0,0 +1,12 @@ +"""Provide utilities for test cases.""" +from biocommons.seqrepo import SeqRepo +import pytest + +from cool_seq_tool import SEQREPO_ROOT_DIR +from cool_seq_tool.data_sources import SeqRepoAccess + + +@pytest.fixture(scope="session") +def test_seqrepo_access(): + """Create SeqRepoAccess test fixture""" + return SeqRepoAccess(SeqRepo(root_dir=SEQREPO_ROOT_DIR)) diff --git a/tests/unit/test_alignment_mapper.py b/tests/unit/test_alignment_mapper.py index d01c03b6..6d155b25 100644 --- a/tests/unit/test_alignment_mapper.py +++ b/tests/unit/test_alignment_mapper.py @@ -1,15 +1,14 @@ """Module for testing the Alignment Mapper class""" import pytest -from cool_seq_tool.data_sources import AlignmentMapper, SeqRepoAccess, \ - TranscriptMappings, UTADatabase +from cool_seq_tool.data_sources import AlignmentMapper, TranscriptMappings, UTADatabase from cool_seq_tool.schemas import Assembly, ResidueMode @pytest.fixture(scope="module") -def test_alignment_mapper(): +def test_alignment_mapper(test_seqrepo_access): """Build AlignmentMapper test fixture""" - return AlignmentMapper(SeqRepoAccess(), TranscriptMappings(), UTADatabase()) + return AlignmentMapper(test_seqrepo_access, TranscriptMappings(), UTADatabase()) @pytest.fixture(scope="module") diff --git a/tests/unit/test_mane_transcript.py b/tests/unit/test_mane_transcript.py index f08ced33..e9639910 100644 --- a/tests/unit/test_mane_transcript.py +++ b/tests/unit/test_mane_transcript.py @@ -12,9 +12,9 @@ @pytest.fixture(scope="module") -def test_mane_transcript(): +def test_mane_transcript(test_seqrepo_access): """Build mane transcript test fixture.""" - return MANETranscript(SeqRepoAccess(), TranscriptMappings(), + return MANETranscript(test_seqrepo_access, TranscriptMappings(), MANETranscriptMappings(), UTADatabase(), GeneNormalizer()) diff --git a/tests/unit/test_seqrepo_access.py b/tests/unit/test_seqrepo_access.py index dd886bb1..d1a31f25 100644 --- a/tests/unit/test_seqrepo_access.py +++ b/tests/unit/test_seqrepo_access.py @@ -1,13 +1,15 @@ """Module for testing seqrepo access class""" import pytest +from biocommons.seqrepo import SeqRepo +from cool_seq_tool import SEQREPO_ROOT_DIR from cool_seq_tool.data_sources import SeqRepoAccess @pytest.fixture(scope="module") def test_seqrepo_access(): """Create SeqRepoAccess test fixture""" - return SeqRepoAccess() + return SeqRepoAccess(SeqRepo(root_dir=SEQREPO_ROOT_DIR)) def test_get_reference_sequence(test_seqrepo_access): @@ -51,11 +53,11 @@ def test_translate_identifier(test_seqrepo_access): """Test that translate_identifier method works correctly""" expected = (["ga4gh:SQ.ijXOSP3XSsuLWZhXQ7_TJ5JXu4RJO6VT"], None) resp = test_seqrepo_access.translate_identifier( - "NM_152263.3", target_namespace="ga4gh") + "NM_152263.3", target_namespaces="ga4gh") assert resp == expected resp = test_seqrepo_access.translate_identifier( - "refseq:NM_152263.3", target_namespace="ga4gh") + "refseq:NM_152263.3", target_namespaces="ga4gh") assert resp == expected resp = test_seqrepo_access.translate_identifier("refseq:NM_152263.3") @@ -81,20 +83,20 @@ def test_translate_identifier(test_seqrepo_access): def test_aliases(test_seqrepo_access): """Test that aliases method works correctly""" expected = (["ga4gh:SQ.ijXOSP3XSsuLWZhXQ7_TJ5JXu4RJO6VT"], None) - resp = test_seqrepo_access.aliases("NM_152263.3") + resp = test_seqrepo_access.translate_alias("NM_152263.3") assert len(resp[0]) > 0 assert resp[1] is None assert expected[0][0] in resp[0] - resp = test_seqrepo_access.aliases("NC_000002.12") + resp = test_seqrepo_access.translate_alias("NC_000002.12") assert len(resp[0]) > 0 assert resp[1] is None assert "GRCh38:2" in resp[0] - resp = test_seqrepo_access.aliases("refseq_152263.3") + resp = test_seqrepo_access.translate_alias("refseq_152263.3") assert resp == ([], "SeqRepo could not translate alias refseq_152263.3") - resp = test_seqrepo_access.aliases("GRCh38:2") + resp = test_seqrepo_access.translate_alias("GRCh38:2") assert resp == ([], "SeqRepo could not translate alias GRCh38:2") From fe798ef8542f4a46b538cd5d47199da03c8af276 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Fri, 31 Mar 2023 08:30:54 -0700 Subject: [PATCH 3/4] build: update dependencies (#139) - Add ga4gh.vrs - Separate out dev/tests dependencies in setup.cfg --- Pipfile | 5 +- README.md | 4 +- requirements-dev.txt | 193 +++++++++++++++++-------------------------- requirements.txt | 104 +++++++++++------------ setup.cfg | 21 +++-- 5 files changed, 144 insertions(+), 183 deletions(-) diff --git a/Pipfile b/Pipfile index 792cffe5..3046fffb 100644 --- a/Pipfile +++ b/Pipfile @@ -15,6 +15,7 @@ pydantic = "*" fastapi = "*" uvicorn = "*" gene-normalizer = "*" +"ga4gh.vrs" = "*" [dev-packages] cool_seq_tool = {editable = true, path = "."} @@ -25,11 +26,9 @@ flake8-docstrings = "*" flake8-annotations = "*" flake8-quotes = "*" flake8-import-order = "*" -coverage = "*" pytest-cov = "*" -coveralls = "*" -jupyterlab = "*" pytest-asyncio = "==0.18.3" ipython = "*" +ipykernel = "*" psycopg2-binary = "*" mock = "*" diff --git a/README.md b/README.md index 854bdfab..04860f54 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ The **cool-seq-tool** provides: ### pip ```commandline -pip install cool-seq-tool +pip install cool-seq-tool[dev,tests] ``` ### Development @@ -30,7 +30,7 @@ Install backend dependencies and enter Pipenv environment: ```commandline pipenv shell -pipenv lock && pipenv sync +pipenv update pipenv install --dev ``` diff --git a/requirements-dev.txt b/requirements-dev.txt index fc552ac8..c8f885e3 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,164 +1,125 @@ -i https://pypi.org/simple -aiofiles==22.1.0 -anyio==3.6.1 ; python_full_version >= '3.6.2' +aiofiles==23.1.0 +anyio==3.6.2 ; python_full_version >= '3.6.2' appdirs==1.4.4 appnope==0.1.3 ; sys_platform == 'darwin' -argcomplete==2.0.0 ; python_version >= '3.6' -argh==0.26.2 -argon2-cffi==21.3.0 ; python_version >= '3.6' -argon2-cffi-bindings==21.2.0 ; python_version >= '3.6' -asttokens==2.0.8 -asyncpg==0.26.0 -attrs==22.1.0 ; python_version >= '3.5' -babel==2.10.3 ; python_version >= '3.6' +asttokens==2.2.1 +asyncpg==0.27.0 +attrs==22.2.0 ; python_version >= '3.6' backcall==0.2.0 -beautifulsoup4==4.11.1 ; python_full_version >= '3.6.0' +beautifulsoup4==4.12.0 ; python_full_version >= '3.6.0' biocommons.seqrepo==0.6.5 bioutils==0.5.7 ; python_version >= '3.6' -bleach==5.0.1 ; python_version >= '3.7' -boto3==1.24.90 -botocore==1.27.90 ; python_version >= '3.7' +boto3==1.26.103 +botocore==1.29.103 ; python_version >= '3.7' bs4==0.0.1 -canonicaljson==1.6.3 ; python_version >= '3.7' -certifi==2022.9.24 ; python_version >= '3.6' -cffi==1.15.1 +canonicaljson==2.0.0 ; python_version >= '3.7' +certifi==2022.12.7 ; python_version >= '3.6' cfgv==3.3.1 ; python_full_version >= '3.6.1' -charset-normalizer==2.1.1 ; python_full_version >= '3.6.0' +charset-normalizer==3.1.0 ; python_full_version >= '3.7.0' click==8.1.3 ; python_version >= '3.7' coloredlogs==15.0.1 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' +comm==0.1.3 ; python_version >= '3.6' configparser==5.3.0 ; python_version >= '3.7' -e . -coverage==6.5.0 -coveralls==3.3.1 -cssselect==1.1.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' -debugpy==1.6.3 ; python_version >= '3.7' +coverage[toml]==7.2.2 ; python_version >= '3.7' +cssselect==1.2.0 ; python_version >= '3.7' +debugpy==1.6.6 ; python_version >= '3.7' decorator==5.1.1 ; python_version >= '3.5' -defusedxml==0.7.1 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' distlib==0.3.6 -docopt==0.6.2 -entrypoints==0.4 ; python_version >= '3.6' -executing==1.1.1 -fake-useragent==0.1.11 -fastapi==0.85.0 -fastjsonschema==2.16.2 -filelock==3.8.0 ; python_version >= '3.7' -flake8==5.0.4 -flake8-annotations==2.9.1 -flake8-docstrings==1.6.0 -flake8-import-order==0.18.1 -flake8-quotes==3.3.1 -ga4gh.vrs[extras]==0.8.7.dev0 ; python_version >= '3.6' -ga4gh.vrsatile.pydantic==0.1.dev3 ; python_version >= '3.8' -gene-normalizer==0.2.3 -gffutils==0.11.1 +exceptiongroup==1.1.1 ; python_version < '3.11' +executing==1.2.0 +fake-useragent==1.1.3 +fastapi==0.95.0 +filelock==3.10.7 ; python_version >= '3.7' +flake8==6.0.0 +flake8-annotations==3.0.0 +flake8-docstrings==1.7.0 +flake8-import-order==0.18.2 +flake8-quotes==3.3.2 +ga4gh.vrs==0.9.0.dev0 +ga4gh.vrsatile.pydantic==0.1.0.dev7 ; python_version >= '3.8' +gene-normalizer==0.2.8 h11==0.14.0 ; python_version >= '3.7' -hgvs==1.5.2 +hgvs==1.5.4 humanfriendly==10.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' -identify==2.5.6 ; python_version >= '3.7' +identify==2.5.22 ; python_version >= '3.7' idna==3.4 ; python_version >= '3.5' -importlib-metadata==5.0.0 ; python_version >= '3.7' +importlib-metadata==6.1.0 ; python_version >= '3.7' inflection==0.5.1 ; python_version >= '3.5' -iniconfig==1.1.1 -ipykernel==6.16.0 ; python_version >= '3.7' -ipython==8.5.0 ; python_version >= '3.8' -ipython-genutils==0.2.0 -jedi==0.18.1 ; python_version >= '3.6' -jinja2==3.1.2 ; python_version >= '3.7' +iniconfig==2.0.0 ; python_version >= '3.7' +ipykernel==6.22.0 +ipython==8.12.0 ; python_version >= '3.8' +jedi==0.18.2 ; python_version >= '3.6' jmespath==1.0.1 ; python_version >= '3.7' -json5==0.9.10 jsonschema==3.2.0 -jupyter-client==7.4.2 ; python_version >= '3.7' -jupyter-core==4.11.1 ; python_version >= '3.7' -jupyter-server==1.21.0 ; python_version >= '3.7' -jupyterlab==3.4.8 -jupyterlab-pygments==0.2.2 ; python_version >= '3.7' -jupyterlab-server==2.16.0 ; python_version >= '3.7' -lxml==4.9.1 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' -markdown==3.4.1 ; python_version >= '3.7' -markupsafe==2.1.1 ; python_version >= '3.7' +jupyter-client==8.1.0 ; python_version >= '3.8' +jupyter-core==5.3.0 ; python_version >= '3.8' +lxml==4.9.2 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' +markdown==3.4.3 ; python_version >= '3.7' matplotlib-inline==0.1.6 ; python_version >= '3.5' mccabe==0.7.0 ; python_version >= '3.6' -mistune==2.0.4 -mock==4.0.3 -nbclassic==0.4.5 ; python_version >= '3.7' -nbclient==0.7.0 ; python_full_version >= '3.7.0' -nbconvert==7.2.1 ; python_version >= '3.7' -nbformat==5.7.0 ; python_version >= '3.7' +mock==5.0.1 nest-asyncio==1.5.6 ; python_version >= '3.5' nodeenv==1.7.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6' -notebook==6.5.1 ; python_version >= '3.7' -notebook-shim==0.1.0 ; python_version >= '3.7' -numpy==1.23.4 ; python_version >= '3.10' -packaging==21.3 ; python_version >= '3.6' -pandas==1.5.0 -pandocfilters==1.5.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' +numpy==1.24.2 ; python_version >= '3.8' +packaging==23.0 ; python_version >= '3.7' +pandas==1.5.3 parse==1.19.0 parsley==1.3 parso==0.8.3 ; python_version >= '3.6' pexpect==4.8.0 ; sys_platform != 'win32' pickleshare==0.7.5 -platformdirs==2.5.2 ; python_version >= '3.7' +platformdirs==3.2.0 ; python_version >= '3.7' pluggy==1.0.0 ; python_version >= '3.6' -pre-commit==2.20.0 -prometheus-client==0.15.0 ; python_version >= '3.6' -prompt-toolkit==3.0.31 ; python_full_version >= '3.6.2' -psutil==5.9.2 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' -psycopg2==2.9.4 ; python_version >= '3.6' -psycopg2-binary==2.9.4 +pre-commit==3.2.1 +prompt-toolkit==3.0.38 ; python_full_version >= '3.7.0' +psutil==5.9.4 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' +psycopg2==2.9.5 ; python_version >= '3.6' +psycopg2-binary==2.9.5 ptyprocess==0.7.0 pure-eval==0.2.2 -py==1.11.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' -pycodestyle==2.9.1 ; python_version >= '3.6' -pycparser==2.21 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' -pydantic==1.10.2 -pydocstyle==6.1.1 ; python_version >= '3.6' +pycodestyle==2.10.0 ; python_version >= '3.6' +pydantic==1.10.7 +pydocstyle==6.3.0 ; python_version >= '3.6' pyee==8.2.2 -pyfaidx==0.7.1 -pyflakes==2.5.0 ; python_version >= '3.6' -pygments==2.13.0 ; python_version >= '3.6' +pyflakes==3.0.1 ; python_version >= '3.6' +pygments==2.14.0 ; python_version >= '3.6' pyliftover==0.4 -pyparsing==3.0.9 ; python_full_version >= '3.6.8' pyppeteer==1.0.2 ; python_version >= '3.7' and python_version < '4.0' -pyquery==1.4.3 -pyrsistent==0.18.1 ; python_version >= '3.7' -pysam==0.19.1 -pytest==7.1.3 +pyquery==2.0.0 +pyrsistent==0.19.3 ; python_version >= '3.7' +pysam==0.20.0 +pytest==7.2.2 pytest-asyncio==0.18.3 pytest-cov==4.0.0 python-dateutil==2.8.2 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' python-jsonschema-objects==0.4.1 -pytz==2022.4 +pytz==2023.3 pyyaml==6.0 ; python_version >= '3.6' -pyzmq==24.0.1 ; python_version >= '3.6' -requests==2.28.1 ; python_version >= '3.7' and python_version < '4' +pyzmq==25.0.2 ; python_version >= '3.6' +requests==2.28.2 ; python_version >= '3.7' and python_version < '4' requests-html==0.10.0 ; python_full_version >= '3.6.0' s3transfer==0.6.0 ; python_version >= '3.7' -send2trash==1.8.0 -setuptools==65.5.0 ; python_version >= '3.7' -simplejson==3.17.6 ; python_version >= '2.5' and python_version not in '3.0, 3.1, 3.2, 3.3' +setuptools==67.6.1 ; python_version >= '3.7' six==1.16.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' sniffio==1.3.0 ; python_version >= '3.7' snowballstemmer==2.2.0 -soupsieve==2.3.2.post1 ; python_version >= '3.6' +soupsieve==2.4 ; python_version >= '3.7' sqlparse==0.4.3 ; python_version >= '3.5' -stack-data==0.5.1 -starlette==0.20.4 ; python_version >= '3.7' +stack-data==0.6.2 +starlette==0.26.1 ; python_version >= '3.7' tabulate==0.9.0 ; python_version >= '3.7' -terminado==0.16.0 ; python_version >= '3.7' -tinycss2==1.1.1 ; python_version >= '3.6' -toml==0.10.2 ; python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3' -tomli==2.0.1 ; python_version >= '3.7' +tomli==2.0.1 ; python_version < '3.11' tornado==6.2 ; python_version >= '3.7' -tqdm==4.64.1 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' -traitlets==5.4.0 ; python_version >= '3.7' -typing-extensions==4.4.0 ; python_version >= '3.7' -urllib3==1.26.12 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' and python_version < '4' -uvicorn==0.18.3 -virtualenv==20.16.5 ; python_version >= '3.6' -w3lib==2.0.1 ; python_version >= '3.6' -wcwidth==0.2.5 -webencodings==0.5.1 -websocket-client==1.4.1 ; python_version >= '3.7' -websockets==10.3 ; python_version >= '3.7' -yoyo-migrations==8.0.0 -zipp==3.9.0 ; python_version >= '3.7' +tqdm==4.65.0 ; python_version >= '3.7' +traitlets==5.9.0 ; python_version >= '3.7' +typing-extensions==4.5.0 ; python_version >= '3.7' +urllib3==1.26.15 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' +uvicorn==0.21.1 +virtualenv==20.21.0 ; python_version >= '3.7' +w3lib==2.1.1 ; python_version >= '3.7' +wcwidth==0.2.6 +websockets==10.4 ; python_version >= '3.7' +yoyo-migrations==8.2.0 +zipp==3.15.0 ; python_version >= '3.7' diff --git a/requirements.txt b/requirements.txt index f1a7b124..5eaf8560 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,92 +1,86 @@ -i https://pypi.org/simple -aiofiles==22.1.0 -anyio==3.6.1 ; python_full_version >= '3.6.2' +aiofiles==23.1.0 +anyio==3.6.2 ; python_full_version >= '3.6.2' appdirs==1.4.4 appnope==0.1.3 ; sys_platform == 'darwin' -argcomplete==2.0.0 ; python_version >= '3.6' -argh==0.26.2 -asttokens==2.0.8 -asyncpg==0.26.0 -attrs==22.1.0 ; python_version >= '3.5' +asttokens==2.2.1 +asyncpg==0.27.0 +attrs==22.2.0 ; python_version >= '3.6' backcall==0.2.0 -beautifulsoup4==4.11.1 ; python_full_version >= '3.6.0' +beautifulsoup4==4.12.0 ; python_full_version >= '3.6.0' biocommons.seqrepo==0.6.5 bioutils==0.5.7 ; python_version >= '3.6' -boto3==1.24.90 -botocore==1.27.90 ; python_version >= '3.7' +boto3==1.26.103 +botocore==1.29.103 ; python_version >= '3.7' bs4==0.0.1 -canonicaljson==1.6.3 ; python_version >= '3.7' -certifi==2022.9.24 ; python_version >= '3.6' -charset-normalizer==2.1.1 ; python_full_version >= '3.6.0' +canonicaljson==2.0.0 ; python_version >= '3.7' +certifi==2022.12.7 ; python_version >= '3.6' +charset-normalizer==3.1.0 ; python_full_version >= '3.7.0' click==8.1.3 ; python_version >= '3.7' coloredlogs==15.0.1 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' configparser==5.3.0 ; python_version >= '3.7' -cssselect==1.1.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' +cssselect==1.2.0 ; python_version >= '3.7' decorator==5.1.1 ; python_version >= '3.5' -executing==1.1.1 -fake-useragent==0.1.11 -fastapi==0.85.0 -ga4gh.vrs[extras]==0.8.7.dev0 ; python_version >= '3.6' -ga4gh.vrsatile.pydantic==0.1.dev3 ; python_version >= '3.8' -gene-normalizer==0.2.3 -gffutils==0.11.1 +executing==1.2.0 +fake-useragent==1.1.3 +fastapi==0.95.0 +ga4gh.vrs==0.9.0.dev0 +ga4gh.vrsatile.pydantic==0.1.0.dev7 ; python_version >= '3.8' +gene-normalizer==0.2.8 h11==0.14.0 ; python_version >= '3.7' -hgvs==1.5.2 +hgvs==1.5.4 humanfriendly==10.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' idna==3.4 ; python_version >= '3.5' -importlib-metadata==5.0.0 ; python_version >= '3.7' +importlib-metadata==6.1.0 ; python_version >= '3.7' inflection==0.5.1 ; python_version >= '3.5' -ipython==8.5.0 ; python_version >= '3.8' -jedi==0.18.1 ; python_version >= '3.6' +ipython==8.12.0 ; python_version >= '3.8' +jedi==0.18.2 ; python_version >= '3.6' jmespath==1.0.1 ; python_version >= '3.7' jsonschema==3.2.0 -lxml==4.9.1 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' -markdown==3.4.1 ; python_version >= '3.7' +lxml==4.9.2 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' +markdown==3.4.3 ; python_version >= '3.7' matplotlib-inline==0.1.6 ; python_version >= '3.5' -numpy==1.23.4 ; python_version >= '3.10' -pandas==1.5.0 +numpy==1.24.2 ; python_version >= '3.8' +pandas==1.5.3 parse==1.19.0 parsley==1.3 parso==0.8.3 ; python_version >= '3.6' pexpect==4.8.0 ; sys_platform != 'win32' pickleshare==0.7.5 -prompt-toolkit==3.0.31 ; python_full_version >= '3.6.2' -psycopg2==2.9.4 ; python_version >= '3.6' -psycopg2-binary==2.9.4 +prompt-toolkit==3.0.38 ; python_full_version >= '3.7.0' +psycopg2==2.9.5 ; python_version >= '3.6' ptyprocess==0.7.0 pure-eval==0.2.2 -pydantic==1.10.2 +pydantic==1.10.7 pyee==8.2.2 -pyfaidx==0.7.1 -pygments==2.13.0 ; python_version >= '3.6' +pygments==2.14.0 ; python_version >= '3.6' pyliftover==0.4 pyppeteer==1.0.2 ; python_version >= '3.7' and python_version < '4.0' -pyquery==1.4.3 -pyrsistent==0.18.1 ; python_version >= '3.7' -pysam==0.19.1 +pyquery==2.0.0 +pyrsistent==0.19.3 ; python_version >= '3.7' +pysam==0.20.0 python-dateutil==2.8.2 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' python-jsonschema-objects==0.4.1 -pytz==2022.4 +pytz==2023.3 pyyaml==6.0 ; python_version >= '3.6' -requests==2.28.1 ; python_version >= '3.7' and python_version < '4' +requests==2.28.2 ; python_version >= '3.7' and python_version < '4' requests-html==0.10.0 ; python_full_version >= '3.6.0' s3transfer==0.6.0 ; python_version >= '3.7' -setuptools==65.5.0 ; python_version >= '3.7' -simplejson==3.17.6 ; python_version >= '2.5' and python_version not in '3.0, 3.1, 3.2, 3.3' +setuptools==67.6.1 ; python_version >= '3.7' six==1.16.0 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' sniffio==1.3.0 ; python_version >= '3.7' -soupsieve==2.3.2.post1 ; python_version >= '3.6' +soupsieve==2.4 ; python_version >= '3.7' sqlparse==0.4.3 ; python_version >= '3.5' -stack-data==0.5.1 -starlette==0.20.4 ; python_version >= '3.7' +stack-data==0.6.2 +starlette==0.26.1 ; python_version >= '3.7' tabulate==0.9.0 ; python_version >= '3.7' -tqdm==4.64.1 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' -traitlets==5.4.0 ; python_version >= '3.7' -typing-extensions==4.4.0 ; python_version >= '3.7' -urllib3==1.26.12 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' and python_version < '4' -uvicorn==0.18.3 -w3lib==2.0.1 ; python_version >= '3.6' -wcwidth==0.2.5 -websockets==10.3 ; python_version >= '3.7' -yoyo-migrations==8.0.0 -zipp==3.9.0 ; python_version >= '3.7' +tqdm==4.65.0 ; python_version >= '3.7' +traitlets==5.9.0 ; python_version >= '3.7' +typing-extensions==4.5.0 ; python_version >= '3.7' +urllib3==1.26.15 ; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' +uvicorn==0.21.1 +w3lib==2.1.1 ; python_version >= '3.7' +wcwidth==0.2.6 +websockets==10.4 ; python_version >= '3.7' +yoyo-migrations==8.2.0 +zipp==3.15.0 ; python_version >= '3.7' diff --git a/setup.cfg b/setup.cfg index 43fddde1..5a26ad00 100644 --- a/setup.cfg +++ b/setup.cfg @@ -24,12 +24,7 @@ install_requires = uvicorn fastapi gene-normalizer - -tests_require = - pytest - pytest-cov - pytest-asyncio == 0.18.3 - mock + ga4gh.vrs [options.package_data] cool_seq_tool = @@ -37,9 +32,21 @@ cool_seq_tool = [options.extras_require] dev = + pre-commit + flake8 + flake8-docstrings + flake8-annotations + flake8-quotes + flake8-import-order + ipython + ipykernel + psycopg2-binary + +tests = pytest pytest-cov - psycopg2-binary + pytest-asyncio == 0.18.3 + mock [tool:pytest] addopts = --ignore setup.py --ignore codebuild/ --doctest-modules --cov-report term-missing --disable-warnings --cov . From 5a8c3bdd46d7a52a30fcb8b98e22b012ab97ae89 Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Tue, 4 Apr 2023 05:53:26 -0700 Subject: [PATCH 4/4] refactor: remove unused instance variables (#141) Removes `MANETranscript.hgvs_parser` and `UTADatabase.liftover_38_to_37` --- cool_seq_tool/data_sources/mane_transcript.py | 2 -- cool_seq_tool/data_sources/uta_database.py | 16 +--------------- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/cool_seq_tool/data_sources/mane_transcript.py b/cool_seq_tool/data_sources/mane_transcript.py index d98b17da..62655e08 100644 --- a/cool_seq_tool/data_sources/mane_transcript.py +++ b/cool_seq_tool/data_sources/mane_transcript.py @@ -10,7 +10,6 @@ import math from typing import Optional, Set, Tuple, Dict, List, Union -import hgvs.parser import pandas as pd from cool_seq_tool.schemas import AnnotationLayer, Assembly, MappedManeData, \ @@ -47,7 +46,6 @@ def __init__(self, seqrepo_access: SeqRepoAccess, :param GeneNormalizer gene_normalizer: Access to Gene Normalizer """ self.seqrepo_access = seqrepo_access - self.hgvs_parser = hgvs.parser.Parser() self.transcript_mappings = transcript_mappings self.mane_transcript_mappings = mane_transcript_mappings self.uta_db = uta_db diff --git a/cool_seq_tool/data_sources/uta_database.py b/cool_seq_tool/data_sources/uta_database.py index 089b0a3e..a4b1c7b9 100644 --- a/cool_seq_tool/data_sources/uta_database.py +++ b/cool_seq_tool/data_sources/uta_database.py @@ -23,7 +23,6 @@ # Environment variables for paths to chain files for pyliftover LIFTOVER_CHAIN_37_TO_38 = environ.get("LIFTOVER_CHAIN_37_TO_38") -LIFTOVER_CHAIN_38_TO_37 = environ.get("LIFTOVER_CHAIN_38_TO_37") class UTADatabase: @@ -33,8 +32,7 @@ def __init__( self, db_url: str = UTA_DB_URL, db_pwd: str = "", - chain_file_37_to_38: Optional[str] = None, - chain_file_38_to_37: Optional[str] = None + chain_file_37_to_38: Optional[str] = None ) -> None: """Initialize DB class. Downstream libraries should use the create() method to construct a new instance: await UTADatabase.create() @@ -46,10 +44,6 @@ def __init__( This is used for pyliftover. If this is not provided, will check to see if LIFTOVER_CHAIN_37_TO_38 env var is set. If neither is provided, will allow pyliftover to download a chain file from UCSC - :param chain_file_38_to_37: Optional path to chain file for 38 to 37 assembly. - This is used for pyliftover. If this is not provided, will check to see if - LIFTOVER_CHAIN_38_TO_37 env var is set. If neither is provided, will allow - pyliftover to download a chain file from UCSC """ self.schema = None self.db_url = db_url @@ -63,12 +57,6 @@ def __init__( else: self.liftover_37_to_38 = LiftOver("hg19", "hg38") - chain_file_38_to_37 = chain_file_38_to_37 or LIFTOVER_CHAIN_38_TO_37 - if chain_file_38_to_37: - self.liftover_38_to_37 = LiftOver(chain_file_38_to_37) - else: - self.liftover_38_to_37 = LiftOver("hg38", "hg19") - @staticmethod def _update_db_url(db_pwd: str, db_url: str) -> str: """Return new db_url containing password. @@ -1022,8 +1010,6 @@ def get_liftover(self, chromosome: str, pos: int, if liftover_to_assembly == Assembly.GRCH38: liftover = self.liftover_37_to_38.convert_coordinate(chromosome, pos) - elif liftover_to_assembly == Assembly.GRCH37: - liftover = self.liftover_38_to_37.convert_coordinate(chromosome, pos) else: logger.warning(f"{liftover_to_assembly} assembly not supported") liftover = None