-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
check word sets against lexemes file
- Loading branch information
1 parent
e700398
commit 09c94e1
Showing
3 changed files
with
45 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import argparse | ||
import sys | ||
|
||
import yaml | ||
|
||
from morphgnt.utils import load_wordset | ||
|
||
|
||
def is_word_a_lexeme_in_lexemes_file(f, word, lexemes): | ||
if word in lexemes: | ||
return 0 | ||
else: | ||
print("Word {} shall be removed from the set because not among lexemes in lexemes file.".format(word), file=f) | ||
return 1 | ||
|
||
|
||
f = sys.stderr | ||
e = 0 # exit status | ||
|
||
argparser = argparse.ArgumentParser() | ||
argparser.add_argument("wordset", help="word set file") | ||
argparser.add_argument("lexemes", type=argparse.FileType('r'), help="lexemes file") | ||
|
||
args = argparser.parse_args() | ||
wordset = load_wordset(args.wordset) | ||
lexemes = yaml.load(args.lexemes) | ||
|
||
for word in wordset: | ||
e = is_word_a_lexeme_in_lexemes_file(f, word, lexemes) or e | ||
|
||
sys.exit(e) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/bin/sh | ||
|
||
RootDir="${1:?}" | ||
LexemesFile="${2:?}" | ||
|
||
S=0 | ||
for F in $(find "${RootDir:?}" -name "missing_*.txt"); do | ||
echo Checking that word set in "${F:?}" contains only lexemes from the lexemes file "${LexemesFile:?}"... | ||
"${RootDir}"/ci.d/word_set_with_only_lexemes_from_lexemes_files "${F:?}" "${LexemesFile:?}" || S=1 | ||
done | ||
exit ${S:?} |