Skip to content

Commit 90ba34b

Browse files
authored
Merge pull request #109 from icbi-lab/10x-io-test
Merge read_10x_vdj and read_10x_vdj_csv.
2 parents 80446d6 + 502f08b commit 90ba34b

File tree

5 files changed

+48
-47
lines changed

5 files changed

+48
-47
lines changed

docs/api.rst

-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ Input/Output
1717

1818
read_h5ad
1919
read_10x_vdj
20-
read_10x_vdj_csv
2120
read_tracer
2221

2322

scirpy/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
__author__ = ", ".join(["Gregor Sturm", "Tamas Szabo"])
88

99
from scanpy import AnnData, read_h5ad
10-
from ._io import read_10x_vdj, read_tracer, read_10x_vdj_csv
10+
from ._io import read_10x_vdj, read_tracer
1111
from . import _preprocessing as pp
1212
from . import _tools as tl
1313
from . import _plotting as pl

scirpy/_io/_io.py

+36-41
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,11 @@ def _process_tcr_cell(tcr_obj: TcrCell) -> dict:
7474
res_dict["cell_id"] = tcr_obj.cell_id
7575
chain_dict = dict()
7676
for c in ["TRA", "TRB"]:
77+
# sorting subordinately by raw and cdr3 ensures consistency
78+
# between load from json and load from csv.
7779
tmp_chains = sorted(
7880
[x for x in tcr_obj.chains if x.chain_type == c and x.is_productive],
79-
key=lambda x: x.expr,
81+
key=lambda x: (x.expr, x.expr_raw, x.cdr3),
8082
reverse=True,
8183
)
8284
res_dict["multi_chain"] = res_dict.get("multi_chain", False) | (
@@ -126,23 +128,8 @@ def _process_tcr_cell(tcr_obj: TcrCell) -> dict:
126128
return res_dict
127129

128130

129-
@_doc_params(doc_working_model=doc_working_model)
130-
def read_10x_vdj(path: str, filtered: bool = True) -> AnnData:
131-
"""Read TCR data from a 10x genomics sample.
132-
133-
{doc_working_model}
134-
135-
Parameters
136-
----------
137-
path
138-
Path to all_contig_annotations.json
139-
filtered
140-
Only keep filtered contig annotations (= is_cell and high_confidence)
141-
142-
Returns
143-
-------
144-
AnnData object with TCR data in `obs` for each cell.
145-
"""
131+
def _read_10x_vdj_json(path: str, filtered: bool = True) -> AnnData:
132+
"""Read TCR data from a 10x genomics `all_contig_annotations.json` file"""
146133
with open(path, "r") as f:
147134
cells = json.load(f)
148135

@@ -230,29 +217,8 @@ def read_10x_vdj(path: str, filtered: bool = True) -> AnnData:
230217
return _tcr_objs_to_anndata(tcr_objs.values())
231218

232219

233-
@_doc_params(doc_working_model=doc_working_model)
234-
def read_10x_vdj_csv(path: str, filtered: bool = True) -> AnnData:
235-
"""Read TCR data from a 10x genomics `_contig_annotations.csv` file
236-
237-
If the `all_contig_annotations.json` file is available it is perferable!
238-
For instance, the `csv` file does not contain information about
239-
junctions.
240-
241-
{doc_working_model}
242-
243-
Parameters
244-
----------
245-
path
246-
Path to filterd_contig_annotations.csv or all_contig_annotationgs.csv
247-
filtered
248-
Only keep filtered contig annotations (= is_cell and high_confidence)
249-
If using `filtered_contig_annotations.csv` already, this option
250-
is futile.
251-
252-
Returns
253-
-------
254-
AnnData object with TCR data in `obs` for each cell.
255-
"""
220+
def _read_10x_vdj_csv(path: str, filtered: bool = True) -> AnnData:
221+
"""Read TCR data from a 10x genomics `_contig_annotations.csv` file """
256222
df = pd.read_csv(path)
257223

258224
tcr_objs = {}
@@ -283,6 +249,35 @@ def read_10x_vdj_csv(path: str, filtered: bool = True) -> AnnData:
283249
return _tcr_objs_to_anndata(tcr_objs.values())
284250

285251

252+
@_doc_params(doc_working_model=doc_working_model)
253+
def read_10x_vdj(path: str, filtered: bool = True) -> AnnData:
254+
"""Read TCR data from 10x Genomics cell-ranger output.
255+
256+
Supports `all_contig_annotations.json` and `{{all,filtered}}_contig_annotations.csv`.
257+
If the json file is available, it is preferable as it additionally
258+
contains information about VDJ-junction insertions.
259+
260+
{doc_working_model}
261+
262+
Parameters
263+
----------
264+
path
265+
Path to filterd_contig_annotations.csv or all_contig_annotationgs.csv
266+
filtered
267+
Only keep filtered contig annotations (= is_cell and high_confidence)
268+
If using `filtered_contig_annotations.csv` already, this option
269+
is futile.
270+
271+
Returns
272+
-------
273+
AnnData object with TCR data in `obs` for each cell.
274+
"""
275+
if path.endswith("json"):
276+
return _read_10x_vdj_json(path, filtered)
277+
else:
278+
return _read_10x_vdj_csv(path, filtered)
279+
280+
286281
@_doc_params(doc_working_model=doc_working_model)
287282
def read_tracer(path: str) -> AnnData:
288283
"""Read data from TraCeR.
Binary file not shown.

tests/test_io.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,26 @@
1-
from scirpy import read_10x_vdj, read_tracer, read_10x_vdj_csv
1+
from scirpy import read_10x_vdj, read_tracer
22
from scirpy._util import _is_na, _is_false
33
import numpy as np
44
import pytest
55

66

7-
# def test_read_10x_example():
8-
# anndata = read_10x_vdj("tutorial/example_data/10x/all_contig_annotations.json")
7+
def test_read_10x_example():
8+
"""Test that a full 10x CSV table can be imported without errors.
9+
10+
Test-dataset from https://support.10xgenomics.com/single-cell-vdj/datasets/3.1.0/vdj_nextgem_hs_pbmc3
11+
under CC-BY-4.0
12+
"""
13+
anndata = read_10x_vdj(
14+
"tests/data/10x/vdj_nextgem_hs_pbmc3_t_filtered_contig_annotations.csv.gz"
15+
)
916

1017

1118
# def test_read_tracer_example():
1219
# anndata = read_tracer("tutorial/example_data/tracer/tracer_100")
1320

1421

1522
def test_read_10x_csv():
16-
anndata = read_10x_vdj_csv("tests/data/10x/filtered_contig_annotations.csv")
23+
anndata = read_10x_vdj("tests/data/10x/filtered_contig_annotations.csv")
1724
obs = anndata.obs
1825
assert obs.shape[0] == 4
1926
cell1 = obs.iloc[1, :]

0 commit comments

Comments
 (0)