Skip to content

Commit

Permalink
check word sets against lexemes file
Browse files Browse the repository at this point in the history
  • Loading branch information
lucafavatella committed Dec 27, 2016
1 parent e700398 commit 09c94e1
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 0 deletions.
1 change: 1 addition & 0 deletions ci
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ case "${1:?}" in
ci.d/lexemes_file_with_only_pysblgnt_lemmas "${RootDir:?}"/lexemes.yaml || S=1
ci.d/lexemes_file_unicode_normalized "${RootDir:?}"/lexemes.yaml || S=1
ci.d/word_sets_unicode_normalized "${RootDir:?}" || S=1
ci.d/word_sets_with_only_lexemes_from_lexemes_files "${RootDir:?}" "${RootDir:?}"/lexemes.yaml || S=1
exit ${S:?}
;;
esac
33 changes: 33 additions & 0 deletions ci.d/word_set_with_only_lexemes_from_lexemes_files
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env python3

import argparse
import sys

import yaml

from morphgnt.utils import load_wordset


def is_word_a_lexeme_in_lexemes_file(f, word, lexemes):
if word in lexemes:
return 0
else:
print("Word {} shall be removed from the set because not among lexemes in lexemes file.".format(word), file=f)
return 1


f = sys.stderr
e = 0 # exit status

argparser = argparse.ArgumentParser()
argparser.add_argument("wordset", help="word set file")
argparser.add_argument("lexemes", type=argparse.FileType('r'), help="lexemes file")

args = argparser.parse_args()
wordset = load_wordset(args.wordset)
lexemes = yaml.load(args.lexemes)

for word in wordset:
e = is_word_a_lexeme_in_lexemes_file(f, word, lexemes) or e

sys.exit(e)
11 changes: 11 additions & 0 deletions ci.d/word_sets_with_only_lexemes_from_lexemes_files
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/sh

RootDir="${1:?}"
LexemesFile="${2:?}"

S=0
for F in $(find "${RootDir:?}" -name "missing_*.txt"); do
echo Checking that word set in "${F:?}" contains only lexemes from the lexemes file "${LexemesFile:?}"...
"${RootDir}"/ci.d/word_set_with_only_lexemes_from_lexemes_files "${F:?}" "${LexemesFile:?}" || S=1
done
exit ${S:?}

0 comments on commit 09c94e1

Please sign in to comment.