@@ -74,9 +74,11 @@ def _process_tcr_cell(tcr_obj: TcrCell) -> dict:
74
74
res_dict ["cell_id" ] = tcr_obj .cell_id
75
75
chain_dict = dict ()
76
76
for c in ["TRA" , "TRB" ]:
77
+ # sorting subordinately by raw and cdr3 ensures consistency
78
+ # between load from json and load from csv.
77
79
tmp_chains = sorted (
78
80
[x for x in tcr_obj .chains if x .chain_type == c and x .is_productive ],
79
- key = lambda x : x .expr ,
81
+ key = lambda x : ( x .expr , x . expr_raw , x . cdr3 ) ,
80
82
reverse = True ,
81
83
)
82
84
res_dict ["multi_chain" ] = res_dict .get ("multi_chain" , False ) | (
@@ -126,23 +128,8 @@ def _process_tcr_cell(tcr_obj: TcrCell) -> dict:
126
128
return res_dict
127
129
128
130
129
- @_doc_params (doc_working_model = doc_working_model )
130
- def read_10x_vdj (path : str , filtered : bool = True ) -> AnnData :
131
- """Read TCR data from a 10x genomics sample.
132
-
133
- {doc_working_model}
134
-
135
- Parameters
136
- ----------
137
- path
138
- Path to all_contig_annotations.json
139
- filtered
140
- Only keep filtered contig annotations (= is_cell and high_confidence)
141
-
142
- Returns
143
- -------
144
- AnnData object with TCR data in `obs` for each cell.
145
- """
131
+ def _read_10x_vdj_json (path : str , filtered : bool = True ) -> AnnData :
132
+ """Read TCR data from a 10x genomics `all_contig_annotations.json` file"""
146
133
with open (path , "r" ) as f :
147
134
cells = json .load (f )
148
135
@@ -230,29 +217,8 @@ def read_10x_vdj(path: str, filtered: bool = True) -> AnnData:
230
217
return _tcr_objs_to_anndata (tcr_objs .values ())
231
218
232
219
233
- @_doc_params (doc_working_model = doc_working_model )
234
- def read_10x_vdj_csv (path : str , filtered : bool = True ) -> AnnData :
235
- """Read TCR data from a 10x genomics `_contig_annotations.csv` file
236
-
237
- If the `all_contig_annotations.json` file is available it is perferable!
238
- For instance, the `csv` file does not contain information about
239
- junctions.
240
-
241
- {doc_working_model}
242
-
243
- Parameters
244
- ----------
245
- path
246
- Path to filterd_contig_annotations.csv or all_contig_annotationgs.csv
247
- filtered
248
- Only keep filtered contig annotations (= is_cell and high_confidence)
249
- If using `filtered_contig_annotations.csv` already, this option
250
- is futile.
251
-
252
- Returns
253
- -------
254
- AnnData object with TCR data in `obs` for each cell.
255
- """
220
+ def _read_10x_vdj_csv (path : str , filtered : bool = True ) -> AnnData :
221
+ """Read TCR data from a 10x genomics `_contig_annotations.csv` file """
256
222
df = pd .read_csv (path )
257
223
258
224
tcr_objs = {}
@@ -283,6 +249,35 @@ def read_10x_vdj_csv(path: str, filtered: bool = True) -> AnnData:
283
249
return _tcr_objs_to_anndata (tcr_objs .values ())
284
250
285
251
252
+ @_doc_params (doc_working_model = doc_working_model )
253
+ def read_10x_vdj (path : str , filtered : bool = True ) -> AnnData :
254
+ """Read TCR data from 10x Genomics cell-ranger output.
255
+
256
+ Supports `all_contig_annotations.json` and `{{all,filtered}}_contig_annotations.csv`.
257
+ If the json file is available, it is preferable as it additionally
258
+ contains information about VDJ-junction insertions.
259
+
260
+ {doc_working_model}
261
+
262
+ Parameters
263
+ ----------
264
+ path
265
+ Path to filterd_contig_annotations.csv or all_contig_annotationgs.csv
266
+ filtered
267
+ Only keep filtered contig annotations (= is_cell and high_confidence)
268
+ If using `filtered_contig_annotations.csv` already, this option
269
+ is futile.
270
+
271
+ Returns
272
+ -------
273
+ AnnData object with TCR data in `obs` for each cell.
274
+ """
275
+ if path .endswith ("json" ):
276
+ return _read_10x_vdj_json (path , filtered )
277
+ else :
278
+ return _read_10x_vdj_csv (path , filtered )
279
+
280
+
286
281
@_doc_params (doc_working_model = doc_working_model )
287
282
def read_tracer (path : str ) -> AnnData :
288
283
"""Read data from TraCeR.
0 commit comments