Skip to content

Commit fb21600

Browse files
authored
Merge pull request #283 from zktuong/master
Fix an issue where serialized AIRR chains were not filtered for `include_fields`.
2 parents f258588 + b3b1106 commit fb21600

File tree

3 files changed

+29
-12
lines changed

3 files changed

+29
-12
lines changed

docs/references.bib

+6-6
Original file line numberDiff line numberDiff line change
@@ -411,12 +411,12 @@ @article{Lindeman2018
411411
}
412412

413413
@article{Stephenson2021,
414-
doi = {10.1101/2021.01.13.21249725},
415-
url = {https://doi.org/10.1101/2021.01.13.21249725},
414+
doi = {10.1038/s41591-021-01329-2},
415+
url = {https://www.nature.com/articles/s41591-021-01329-2},
416416
year = {2021},
417-
month = jan,
418-
publisher = {Cold Spring Harbor Laboratory},
417+
month = apr,
418+
publisher = {Springer Science and Business Media {LLC}},
419419
author = {Emily Stephenson and Gary Reynolds and Rachel A Botting and Fernando J Calero-Nieto and Michael Morgan and Zewen Kelvin Tuong and Karsten Bach and Waradon Sungnak and Kaylee B Worlock and Masahiro Yoshida and Natsuhiko Kumasaka and Katarzyna Kania and Justin Engelbert and Bayanne Olabi and Jarmila Stremenova Spegarova and Nicola K Wilson and Nicole Mende and Laura Jardine and Louis CS Gardner and Issac Goh and Dave Horsfall and Jim McGrath and Simone Webb and Michael W. Mather and Rik GH Lindeboom and Emma Dann and Ni Huang and Krzysztof Polanski and Elena Prigmore and Florian Gothe and Jonathan Scott and Rebecca P Payne and Kenneth F Baker and Aidan T Hanrath and Ina CD Schim van der Loeff and Andrew S Barr and Amada Sanchez-Gonzalez and Laura Bergamaschi and Federica Mescia and Josephine L Barnes and Eliz Kilich and Angus de Wilton and Anita Saigal and Aarash Saleh and Sam M Janes and Claire M Smith and Nusayhah Gopee and Caroline Wilson and Paul Coupland and Jonathan M Coxhead and Vladimir Y Kiselev and Stijn van Dongen and Jaume Bacardit and Hamish W King and Anthony J Rostron and A John Simpson and Sophie Hambleton and Elisa Laurenti and Paul A Lyons and Kerstin B Meyer and Marko Z Nikolic and Christopher JA Duncan and Ken Smith and Sarah A Teichmann and Menna R Clatworthy and John C Marioni and Berthold Gottgens and Muzlifah Haniffa and},
420-
title = {The cellular immune response to {COVID}-19 deciphered by single cell multi-omics across three {UK} centres},
421-
journal = {BioRxiv}
420+
title = {Single-cell multi-omics analysis of the immune response in {COVID}-19},
421+
journal = {Nature Medicine}
422422
}

scirpy/io/_datastructures.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,9 @@ def add_serialized_chains(self, serialized_chains: str) -> None:
163163
if not _is_na2(serialized_chains):
164164
tmp_chains = json.loads(serialized_chains)
165165
for chain in tmp_chains:
166-
self.add_chain(chain)
166+
tmp_chain = AirrCell.empty_chain_dict()
167+
tmp_chain.update(chain)
168+
self.add_chain(tmp_chain)
167169

168170
def _split_chains(self) -> Tuple[bool, dict]:
169171
"""
@@ -234,7 +236,9 @@ def _key_sort_chains(chain) -> Tuple:
234236
return tuple(-1 if x is None else x for x in sort_tuple)
235237

236238
@staticmethod
237-
def _serialize_chains(chains: List[MutableMapping]) -> str:
239+
def _serialize_chains(
240+
chains: List[MutableMapping], include_fields: Optional[Collection[str]] = None
241+
) -> str:
238242
"""Serialize chains into a JSON object. This is useful for storing
239243
an arbitrary number of extra chains in a single column of a dataframe."""
240244
# convert numpy dtypes to python types
@@ -245,7 +249,13 @@ def _serialize_chains(chains: List[MutableMapping]) -> str:
245249
chain[k] = chain[k].item()
246250
except AttributeError:
247251
pass
248-
return json.dumps(chains)
252+
253+
# Filter chains for `include_fields`
254+
chains_filtered = [
255+
{k: v for k, v in chain.items() if k in include_fields} for chain in chains
256+
]
257+
258+
return json.dumps(chains_filtered)
249259

250260
def to_airr_records(self) -> Iterable[dict]:
251261
"""Iterate over chains as AIRR-Rearrangent compliant dictonaries.
@@ -292,7 +302,9 @@ def to_scirpy_record(
292302
include_fields.add("cell_id")
293303

294304
res_dict["multi_chain"], chain_dict = self._split_chains()
295-
res_dict["extra_chains"] = self._serialize_chains(chain_dict.pop("extra"))
305+
res_dict["extra_chains"] = self._serialize_chains(
306+
chain_dict.pop("extra"), include_fields=include_fields
307+
)
296308

297309
# add cell-level attributes
298310
for key in self:

scirpy/io/_io.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -663,10 +663,12 @@ def to_dandelion(adata: AnnData):
663663

664664

665665
@_doc_params(doc_working_model=doc_working_model)
666-
def from_dandelion(dandelion, transfer=False) -> AnnData:
666+
def from_dandelion(dandelion, transfer: bool = False, **kwargs) -> AnnData:
667667
"""\
668668
Import data from `Dandelion <https://github.com/zktuong/dandelion>`_ (:cite:`Stephenson2021`).
669669
670+
Internally calls :func:`scirpy.io.read_airr`.
671+
670672
{doc_working_model}
671673
672674
Parameters
@@ -676,6 +678,8 @@ def from_dandelion(dandelion, transfer=False) -> AnnData:
676678
transfer
677679
Whether to execute `dandelion.tl.transfer` to transfer all data
678680
to the :class:`anndata.AnnData` instance.
681+
**kwargs
682+
Additional arguments passed to :func:`scirpy.io.read_airr`.
679683
680684
Returns
681685
-------
@@ -691,7 +695,8 @@ def from_dandelion(dandelion, transfer=False) -> AnnData:
691695
for col in dandelion_df.columns:
692696
dandelion_df.loc[dandelion_df[col] == "unassigned", col] = None
693697

694-
adata = read_airr(dandelion_df)
698+
adata = read_airr(dandelion_df, **kwargs)
699+
695700
if transfer:
696701
ddl.tl.transfer(
697702
adata, dandelion

0 commit comments

Comments
 (0)