From 09a16433126c9e20d2f46f2cb50afcc1e3ed4e96 Mon Sep 17 00:00:00 2001 From: weiliang Date: Wed, 4 Jan 2023 17:08:11 +0800 Subject: [PATCH] add load_custom_phonemes() --- g2p_en/g2p.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/g2p_en/g2p.py b/g2p_en/g2p.py index 8b37659..c4092af 100644 --- a/g2p_en/g2p.py +++ b/g2p_en/g2p.py @@ -71,6 +71,25 @@ def __init__(self): self.load_variables() self.homograph2features = construct_homograph_dictionary() + def load_custom_phonemes(self, file_path): + '''Load custom graphemes (spelling) to phonemes (pronunciation) + in the file, which has the same format with nltk's cmudict: + A 1 AH0 + A. 1 EY1 + A 2 EY1 + A42128 1 EY1 F AO1 R T UW1 W AH1 N T UW1 EY1 T + AAA 1 T R IH2 P AH0 L EY1 + ''' + with open(file_path) as fin: + for line in fin: + if line[0] == '#': + continue + word, left = line.strip().split(' ', maxsplit=1) + phonemes = left.split()[1:] + word = word.lower() + # just replace if word is in cmudict + self.cmu[word] = [phonemes] + def load_variables(self): self.variables = np.load(os.path.join(dirname,'checkpoint20.npz')) self.enc_emb = self.variables["enc_emb"] # (29, 64). (len(graphemes), emb)