File tree 2 files changed +6
-13
lines changed
2 files changed +6
-13
lines changed Original file line number Diff line number Diff line change 42
42
it's recommended to specify model paths in config.yaml.
43
43
"""
44
44
45
- model_list = [
46
- # {"model_path": "model_name/G_9000.pth", "config_path": "model_name/config.json"},
47
- ]
48
-
49
45
50
46
@dataclass
51
47
class AsDictMixin :
@@ -391,6 +387,9 @@ class LanguageIdentification(AsDictMixin):
391
387
espeak_library : str = r"C:/Program Files/eSpeak NG/libespeak-ng.dll" if "win" in sys .platform else ""
392
388
# zh ja ko en... If it is empty, it will be read based on the text_cleaners specified in the config.json.
393
389
language_automatic_detect : list = field (default_factory = list )
390
+ split_pattern : str = r'[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\>\=\?\@\[\]\{\}\\\\\^\_\`' \
391
+ r'\!?。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」' \
392
+ r'『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘\'\‛\“\”\„\‟…‧﹏.]+'
394
393
395
394
396
395
@dataclass
@@ -476,9 +475,6 @@ def load_config():
476
475
else :
477
476
logging .info ("config.yaml is empty, initializing config.yaml..." )
478
477
479
- # Load default models from config.py.
480
- # config.update_config(model_list)
481
-
482
478
# If parameters are incomplete, they will be automatically filled in upon saving.
483
479
Config .save_config (config )
484
480
Original file line number Diff line number Diff line change 2
2
3
3
import regex as re
4
4
5
+ from contants import config
5
6
from utils .data_utils import check_is_none
6
7
from utils .classify_language import classify_language , split_alpha_nonalpha
7
8
@@ -18,9 +19,7 @@ def _expand_hyphens(text):
18
19
19
20
20
21
def markup_language (text : str , target_languages : list = None ) -> str :
21
- pattern = r'[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\>\=\?\@\[\]\{\}\\\\\^\_\`' \
22
- r'\!?。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」' \
23
- r'『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘\'\‛\“\”\„\‟…‧﹏.]+'
22
+ pattern = config .LanguageIdentification .split_pattern
24
23
sentences = re .split (pattern , text )
25
24
26
25
pre_lang = ""
@@ -51,9 +50,7 @@ def markup_language(text: str, target_languages: list = None) -> str:
51
50
52
51
def split_languages (text : str , target_languages : list = None , segment_size : int = 50 ,
53
52
expand_abbreviations : bool = False , expand_hyphens : bool = False ) -> list :
54
- pattern = r'[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\>\=\?\@\[\]\{\}\\\\\^\_\`' \
55
- r'\!?\。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」' \
56
- r'『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘\'\‛\“\”\„\‟…‧﹏.]+'
53
+ pattern = config .language_identification .split_pattern
57
54
sentences = re .split (pattern , text )
58
55
59
56
pre_lang = ""
You can’t perform that action at this time.
0 commit comments