Skip to content

Commit

Permalink
fix: OAI-PMH crash for document containing control chars
Browse files Browse the repository at this point in the history
* Adds new `safety` exceptions.
* Removes controls chars when the dublin core xml file is produced.
* Closes #867.

Co-Authored-by: Johnny Mariéthoz <[email protected]>
  • Loading branch information
jma committed Nov 22, 2022
1 parent feda000 commit ac86d20
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 2 deletions.
5 changes: 4 additions & 1 deletion scripts/test
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@ fi
# | package | installed | affected | ID |
# +============================+===========+==========================+==========+
# | wtforms | 2.3.3 | <3.0.0a1 | 42852 |
# | wheel | 0.37.1 | <0.38.0 | 51499 |
# | sqlalchemy-utils | 0.35.0 | >=0.27.0 | 42194 |
# | safety | 1.10.3 | <2.2.0 | 51358 |
# | py | 1.11.0 | <=1.11.0 | 51457 |
# | nbconvert | 6.4.5 | <6.5.1 | 50792 |
# | lxml | 4.7.0 | <4.9.1 | 50748 |
# | flask-security | 3.0.0 | <3.1.0 | 45183 |
Expand All @@ -43,7 +46,7 @@ fi
# | celery | 5.1.2 | <5.2.0 | 42498 |
# | celery | 5.1.2 | <5.2.2 | 43738 |
# +==============================================================================+
safety check -i 42852 -i 42050 -i 42194 -i 45183 -i 42498 -i 44501 -i 43738 -i 47833 -i 50748 -i 50792
safety check -i 42852 -i 51499 -i 42194 -i 51358 -i 51457 -i 50792 -i 50748 -i 45183 -i 44501 -i 47833 -i 42498 -i 43738
pydocstyle sonar tests docs
isort --check-only --diff "${SCRIPT_PATH}/.."
autoflake -c -r --remove-all-unused-imports --ignore-init-module-imports . &> /dev/null || {
Expand Down
3 changes: 2 additions & 1 deletion sonar/modules/documents/serializers/dc.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"""Dublin Core serializer."""

from flask_resources.serializers import SerializerMixin
from invenio_oaiserver.utils import sanitize_unicode
from lxml import etree

from sonar.modules.documents.serializers.schemas.dc import DublinCoreSchema
Expand Down Expand Up @@ -102,7 +103,7 @@ def serialize_dict_to_etree(self, data):
f'{{http://purl.org/dc/elements/1.1/}}{elements[key]}',
attrs
)
field.text = val
field.text = sanitize_unicode(val)
return root


Expand Down
9 changes: 9 additions & 0 deletions tests/ui/documents/test_dc_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,15 @@ def test_descriptions_attributes(minimal_document):
}
]


def test_descriptions_xml_control_char(minimal_document):
minimal_document['abstracts'] = [{
'language': 'fre',
'value': 'sous\x02évalués'
}]
assert SonarDublinCoreXMLSerializer().serialize_object_xml(dict(_source=minimal_document))


def test_formats(minimal_document):
result = SonarDublinCoreXMLSerializer().transform_record(minimal_document)
assert result['formats'] == []
Expand Down

0 comments on commit ac86d20

Please sign in to comment.