@@ -10,6 +10,7 @@ void PrintUsage(const char** argv) {
10
10
std::cerr << " train alphabet.txt dataset.txt resultModel.bin - train model" << std::endl;
11
11
std::cerr << " score model.bin - input sentences and get score" << std::endl;
12
12
std::cerr << " correct model.bin - input sentences and get corrected one" << std::endl;
13
+ std::cerr << " fix model.bin input.txt output.txt - automatically fix txt file" << std::endl;
13
14
}
14
15
15
16
int Train (const std::string& alphabetFile,
@@ -39,6 +40,26 @@ int Score(const std::string& modelFile) {
39
40
return 0 ;
40
41
}
41
42
43
+ int Fix (const std::string& modelFile,
44
+ const std::string& inputFile,
45
+ const std::string& outFile)
46
+ {
47
+ TSpellCorrector corrector;
48
+ std::cerr << " [info] loading model" << std::endl;
49
+ if (!corrector.LoadLangModel (modelFile)) {
50
+ std::cerr << " [error] failed to load model" << std::endl;
51
+ return 42 ;
52
+ }
53
+ std::cerr << " [info] loaded" << std::endl;
54
+ std::wstring text = UTF8ToWide (LoadFile (inputFile));
55
+ uint64_t startTime = GetCurrentTimeMs ();
56
+ std::wstring result = corrector.FixFragment (text);
57
+ uint64_t finishTime = GetCurrentTimeMs ();
58
+ SaveFile (outFile, WideToUTF8 (result));
59
+ std::cerr << " [info] process time: " << finishTime - startTime << " ms" << std::endl;
60
+ return 0 ;
61
+ }
62
+
42
63
int Correct (const std::string& modelFile) {
43
64
TSpellCorrector corrector;
44
65
std::cerr << " [info] loading model" << std::endl;
@@ -86,6 +107,15 @@ int main(int argc, const char** argv) {
86
107
}
87
108
std::string modelFile = argv[2 ];
88
109
return Correct (modelFile);
110
+ } else if (mode == " fix" ) {
111
+ if (argc < 5 ) {
112
+ PrintUsage (argv);
113
+ return 42 ;
114
+ }
115
+ std::string modelFile = argv[2 ];
116
+ std::string inFile = argv[3 ];
117
+ std::string outFile = argv[4 ];
118
+ return Fix (modelFile, inFile, outFile);
89
119
}
90
120
91
121
PrintUsage (argv);
0 commit comments