Skip to content

Commit

Permalink
fix: handle identifiers with slash (#367)
Browse files Browse the repository at this point in the history
  • Loading branch information
wilson-nomic authored Dec 12, 2024
1 parent f102513 commit 119aab8
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion nomic/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -773,10 +773,13 @@ def __init__(
assert identifier is not None or dataset_id is not None, "You must pass a dataset identifier"
# Normalize identifier.
if identifier is not None:
identifier = unicodedata.normalize("NFD", identifier) # normalize accents
s = identifier.split("/", 1)
identifier = unicodedata.normalize("NFD", s[-1]) # normalize accents
identifier = identifier.lower().replace(" ", "-").replace("_", "-")
identifier = re.sub(r"[^a-z0-9-]", "", identifier)
identifier = re.sub(r"-+", "-", identifier)
if len(s) == 2:
identifier = f"{s[0]}/{identifier}"

super().__init__()

Expand Down

0 comments on commit 119aab8

Please sign in to comment.