Skip to content

Commit

Permalink
Add Spacy utils
Browse files Browse the repository at this point in the history
  • Loading branch information
ajdapretnar committed Jul 5, 2024
1 parent 61f7885 commit 4a3d360
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 0 deletions.
4 changes: 4 additions & 0 deletions orangecontrib/text/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,10 @@ def has_tokens(self):
""" Return whether corpus is preprocessed or not. """
return self._tokens is not None

def has_tags(self):
""" Return whether corpus is POS tagged or not. """
return self._pos_tags is not None

def _base_tokens(self):
from orangecontrib.text.preprocess import BASE_TRANSFORMER, \
BASE_TOKENIZER, PreprocessorList
Expand Down
2 changes: 2 additions & 0 deletions orangecontrib/text/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@
"vi": "Vietnamese",
"zh": "Chinese",
"zh_char": "Chinese - Chinese Characters",
# Spacy code for multi-language model
"xx": "Multi-language",
None: None,
}
LANG2ISO = {lang: code for code, lang in ISO2LANG.items()}
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ serverfiles
simhash >=1.11
shapely >=2.0
six
spacy
tweepy >=4.0.0
ufal.udpipe >=1.2.0.3
trimesh >=3.9.8 # required by alphashape
Expand Down

0 comments on commit 4a3d360

Please sign in to comment.