Skip to content

Commit

Permalink
feat!: rename residue mode to coordinate type (#337)
Browse files Browse the repository at this point in the history
close #335

Note: There are some places of +/- 1 for positions that I will revisit in #224 .
  • Loading branch information
korikuzma committed Aug 21, 2024
1 parent 3759edc commit 16db94e
Show file tree
Hide file tree
Showing 13 changed files with 217 additions and 197 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,13 @@ All CoolSeqTool resources can be initialized by way of a top-level class instanc

```pycon
>>> from cool_seq_tool import CoolSeqTool
>>> from cool_seq_tool.schemas import AnnotationLayer, ResidueMode
>>> from cool_seq_tool.schemas import AnnotationLayer, CoordinateType
>>> cst = CoolSeqTool()
>>> result = await cst.mane_transcript.get_mane_transcript(
... "NP_004324.2",
... 599,
... AnnotationLayer.PROTEIN,
... residue_mode=ResidueMode.INTER_RESIDUE,
... coordinate_type=CoordinateType.INTER_RESIDUE,
... )
>>> result.gene, result.refseq, result.status
('EGFR', 'NM_005228.5', <TranscriptPriority.MANE_SELECT: 'mane_select'>)
Expand Down
6 changes: 3 additions & 3 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,16 @@ def linkcode_resolve(domain, info):


def _clip_rst_tables(app: Sphinx, what: str, name: str, obj: ModuleType, options: Options, lines: List[str]):
"""The ResidueMode docstring contains an RST table and an ASCII table because
"""The CoordinateType docstring contains an RST table and an ASCII table because
the former gets omitted in IDEs like VSCode and the latter won't render properly in
Sphinx docs. This chops out the ASCII table when rendering autodocs.
"""
if what == "class" and name == "cool_seq_tool.schemas.ResidueMode":
if what == "class" and name == "cool_seq_tool.schemas.CoordinateType":
for i in range(len(lines) -1, -1, -1):
line = lines[i]
if line.count("|") >= 8:
del lines[i]
print("Running preprocessing on ResidueMode docstring...")
print("Running preprocessing on CoordinateType docstring...")

def setup(app: Sphinx):
app.connect("autodoc-process-docstring", _clip_rst_tables)
10 changes: 5 additions & 5 deletions src/cool_seq_tool/handlers/seqrepo_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from ga4gh.vrs.dataproxy import SeqRepoDataProxy

from cool_seq_tool.schemas import Assembly, ResidueMode
from cool_seq_tool.schemas import Assembly, CoordinateType
from cool_seq_tool.utils import get_inter_residue_pos, process_chromosome_input

_logger = logging.getLogger(__name__)
Expand All @@ -29,7 +29,7 @@ def get_reference_sequence(
ac: str,
start: int | None = None,
end: int | None = None,
residue_mode: ResidueMode = ResidueMode.RESIDUE,
coordinate_type: CoordinateType = CoordinateType.RESIDUE,
) -> tuple[str, str | None]:
"""Get reference sequence for an accession given a start and end position. If
``start`` and ``end`` are not given, returns the entire reference sequence.
Expand All @@ -46,7 +46,7 @@ def get_reference_sequence(
:param start: Start pos change
:param end: End pos change. If ``None`` assumes both ``start`` and ``end`` have
same values, if ``start`` exists.
:param residue_mode: Residue mode for ``start`` and ``end``
:param coordinate_type: Coordinate type for ``start`` and ``end``
:return: Sequence at position (if accession and positions actually
exist, else return empty string), warning if any
"""
Expand All @@ -55,11 +55,11 @@ def get_reference_sequence(
msg = f"start ({start}) cannot be greater than end ({end})"
return "", msg

start, end = get_inter_residue_pos(start, end, residue_mode)
start, end = get_inter_residue_pos(start, end, coordinate_type)
if start == end:
end += 1
else:
if start is not None and residue_mode == ResidueMode.RESIDUE:
if start is not None and coordinate_type == CoordinateType.RESIDUE:
start -= 1

try:
Expand Down
28 changes: 14 additions & 14 deletions src/cool_seq_tool/mappers/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""

from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
from cool_seq_tool.schemas import AnnotationLayer, Assembly, ResidueMode
from cool_seq_tool.schemas import AnnotationLayer, Assembly, CoordinateType
from cool_seq_tool.sources import TranscriptMappings, UtaDatabase


Expand Down Expand Up @@ -32,14 +32,14 @@ async def p_to_c(
p_ac: str,
p_start_pos: int,
p_end_pos: int,
residue_mode: ResidueMode = ResidueMode.RESIDUE,
coordinate_type: CoordinateType = CoordinateType.RESIDUE,
) -> tuple[dict | None, str | None]:
"""Translate protein representation to cDNA representation.
:param p_ac: Protein RefSeq accession
:param p_start_pos: Protein start position
:param p_end_pos: Protein end position
:param residue_mode: Residue mode for ``p_start_pos`` and ``p_end_pos``
:param coordinate_type: Coordinate type for ``p_start_pos`` and ``p_end_pos``
:return: Tuple containing:
* cDNA representation (accession, codon range positions for corresponding
Expand All @@ -66,7 +66,7 @@ async def p_to_c(
# 1 amino acid maps to 3 nucleotides in the codon
# Since we have the end of the codon, we will subtract 2 to get the start of the
# codon. We want to return inter-residue (0-based), so we subtract 1 from this.
if residue_mode == ResidueMode.RESIDUE:
if coordinate_type == CoordinateType.RESIDUE:
c_pos = (p_start_pos * 3) - 3, p_end_pos * 3
else:
if p_start_pos == p_end_pos:
Expand All @@ -79,7 +79,7 @@ async def p_to_c(
"c_start_pos": c_pos[0],
"c_end_pos": c_pos[1],
"cds_start": cds_start,
"residue_mode": ResidueMode.INTER_RESIDUE.value,
"coordinate_type": CoordinateType.INTER_RESIDUE.value,
}, None

async def _get_cds_start(self, c_ac: str) -> tuple[int | None, str | None]:
Expand All @@ -105,7 +105,7 @@ async def c_to_g(
c_start_pos: int,
c_end_pos: int,
cds_start: int | None = None,
residue_mode: ResidueMode = ResidueMode.RESIDUE,
coordinate_type: CoordinateType = CoordinateType.RESIDUE,
target_genome_assembly: bool = Assembly.GRCH38,
) -> tuple[dict | None, str | None]:
"""Translate cDNA representation to genomic representation
Expand All @@ -125,9 +125,9 @@ async def c_to_g(
if any(
(
c_start_pos == c_end_pos,
(residue_mode == ResidueMode.INTER_RESIDUE)
(coordinate_type == CoordinateType.INTER_RESIDUE)
and ((c_end_pos - c_start_pos) % 3 != 0),
(residue_mode == ResidueMode.RESIDUE)
(coordinate_type == CoordinateType.RESIDUE)
and ((c_end_pos - (c_start_pos - 1)) % 3 != 0),
)
):
Expand All @@ -146,7 +146,7 @@ async def c_to_g(
return None, warning

# Change to inter-residue
if residue_mode == ResidueMode.RESIDUE:
if coordinate_type == CoordinateType.RESIDUE:
c_start_pos -= 1

# Get aligned genomic and transcript data
Expand Down Expand Up @@ -194,7 +194,7 @@ async def c_to_g(
"g_ac": alt_ac,
"g_start_pos": g_start_pos,
"g_end_pos": g_end_pos,
"residue_mode": ResidueMode.INTER_RESIDUE.value,
"coordinate_type": CoordinateType.INTER_RESIDUE.value,
}
else:
warning = (
Expand All @@ -209,7 +209,7 @@ async def p_to_g(
p_ac: str,
p_start_pos: int,
p_end_pos: int,
residue_mode: ResidueMode = ResidueMode.INTER_RESIDUE,
coordinate_type: CoordinateType = CoordinateType.INTER_RESIDUE,
target_genome_assembly: Assembly = Assembly.GRCH38,
) -> tuple[dict | None, str | None]:
"""Translate protein representation to genomic representation, by way of
Expand All @@ -218,7 +218,7 @@ async def p_to_g(
:param p_ac: Protein RefSeq accession
:param p_start_pos: Protein start position
:param p_end_pos: Protein end position
:param residue_mode: Residue mode for ``p_start_pos`` and ``p_end_pos``.
:param coordinate_type: Coordinate type for ``p_start_pos`` and ``p_end_pos``.
:param target_genome_assembly: Genome assembly to get genomic data for
:return: Tuple containing:
Expand All @@ -227,7 +227,7 @@ async def p_to_g(
* Warnings, if conversion to cDNA or genomic coordinates fails.
"""
c_data, warning = await self.p_to_c(
p_ac, p_start_pos, p_end_pos, residue_mode=residue_mode
p_ac, p_start_pos, p_end_pos, coordinate_type=coordinate_type
)
if not c_data:
return None, warning
Expand All @@ -238,7 +238,7 @@ async def p_to_g(
c_data["c_start_pos"],
c_data["c_end_pos"],
c_data["cds_start"],
residue_mode=ResidueMode.INTER_RESIDUE,
coordinate_type=CoordinateType.INTER_RESIDUE,
target_genome_assembly=target_genome_assembly,
)
return g_data, warning
25 changes: 11 additions & 14 deletions src/cool_seq_tool/mappers/exon_genomic_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@
from cool_seq_tool.schemas import (
AnnotationLayer,
Assembly,
CoordinateType,
GenomicData,
GenomicDataResponse,
ResidueMode,
Strand,
TranscriptExonData,
TranscriptExonDataResponse,
)
from cool_seq_tool.sources.mane_transcript_mappings import ManeTranscriptMappings
from cool_seq_tool.sources.uta_database import GenesGenomicAcs, UtaDatabase
from cool_seq_tool.utils import get_inter_residue_pos, service_meta
from cool_seq_tool.utils import service_meta

CoordinatesResponseType = TypeVar(
"CoordinatesResponseType", GenomicDataResponse, TranscriptExonDataResponse
Expand Down Expand Up @@ -243,8 +243,8 @@ async def genomic_to_transcript_exon_coordinates(
transcript: str | None = None,
get_nearest_transcript_junction: bool = False,
gene: str | None = None,
residue_mode: Literal[ResidueMode.INTER_RESIDUE]
| Literal[ResidueMode.RESIDUE] = ResidueMode.RESIDUE,
coordinate_type: Literal[CoordinateType.INTER_RESIDUE]
| Literal[CoordinateType.RESIDUE] = CoordinateType.RESIDUE,
) -> GenomicDataResponse:
"""Get transcript data for genomic data, lifted over to GRCh38.
Expand Down Expand Up @@ -288,7 +288,7 @@ async def genomic_to_transcript_exon_coordinates(
breakpoint for the 3' end.
:param gene: gene name. Ideally, HGNC symbol. Must be given if no ``transcript``
value is provided.
:param residue_mode: Residue mode for ``start`` and ``end``
:param coordinate_type: Coordinate type for ``start`` and ``end``
:return: Genomic data (inter-residue coordinates)
"""
resp = GenomicDataResponse(
Expand All @@ -309,10 +309,9 @@ async def genomic_to_transcript_exon_coordinates(
gene = gene.upper().strip()

if start:
if residue_mode == ResidueMode.RESIDUE:
# zero-based for UTA
if coordinate_type == CoordinateType.RESIDUE:
# inter-residue based for UTA
start -= 1
residue_mode = ResidueMode.ZERO
start_data = await self._genomic_to_transcript_exon_coordinate(
start,
chromosome=chromosome,
Expand All @@ -332,7 +331,6 @@ async def genomic_to_transcript_exon_coordinates(

if end:
end -= 1
residue_mode = ResidueMode.ZERO
end_data = await self._genomic_to_transcript_exon_coordinate(
end,
chromosome=chromosome,
Expand Down Expand Up @@ -500,7 +498,7 @@ async def _genomic_to_transcript_exon_coordinate(
) -> TranscriptExonDataResponse:
"""Convert individual genomic data to transcript data
:param pos: Genomic position (zero-based)
:param pos: Genomic position (inter-residue based)
:param chromosome: Chromosome. Must give chromosome without a prefix
(i.e. ``1`` or ``X``). If not provided, must provide ``alt_ac``.
If ``alt_ac`` is also provided, ``alt_ac`` will be used.
Expand Down Expand Up @@ -723,17 +721,16 @@ async def _set_mane_genomic_data(
position.
:return: Warnings if found
"""
start, end = get_inter_residue_pos(pos, pos, residue_mode=ResidueMode.ZERO)
mane_data: (
CdnaRepresentation | None
) = await self.mane_transcript.get_mane_transcript(
alt_ac,
start,
end,
pos,
pos + 1,
AnnotationLayer.GENOMIC,
gene=gene,
try_longest_compatible=True,
residue_mode=ResidueMode.INTER_RESIDUE,
coordinate_type=CoordinateType.INTER_RESIDUE,
)
if not mane_data:
msg = f"Unable to find mane data for {alt_ac} with position {pos}"
Expand Down
Loading

0 comments on commit 16db94e

Please sign in to comment.