Skip to content

Commit c9ba256

Browse files
committed
Added fixFragment command-line option
1 parent 41ccef0 commit c9ba256

File tree

3 files changed

+36
-0
lines changed

3 files changed

+36
-0
lines changed

jamspell/utils.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ std::string LoadFile(const std::string& fileName) {
2626
return out.str();
2727
}
2828

29+
void SaveFile(const std::string& fileName, const std::string& data) {
30+
std::ofstream out(fileName, std::ios::binary);
31+
out << data;
32+
}
33+
2934
TTokenizer::TTokenizer()
3035
: Locale("en_US.utf-8")
3136
{

jamspell/utils.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ class TTokenizer {
5454
};
5555

5656
std::string LoadFile(const std::string& fileName);
57+
void SaveFile(const std::string& fileName, const std::string& data);
5758
std::wstring UTF8ToWide(const std::string& text);
5859
std::string WideToUTF8(const std::wstring& text);
5960
uint64_t GetCurrentTimeMs();

main/main.cpp

+30
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ void PrintUsage(const char** argv) {
1010
std::cerr << " train alphabet.txt dataset.txt resultModel.bin - train model" << std::endl;
1111
std::cerr << " score model.bin - input sentences and get score" << std::endl;
1212
std::cerr << " correct model.bin - input sentences and get corrected one" << std::endl;
13+
std::cerr << " fix model.bin input.txt output.txt - automatically fix txt file" << std::endl;
1314
}
1415

1516
int Train(const std::string& alphabetFile,
@@ -39,6 +40,26 @@ int Score(const std::string& modelFile) {
3940
return 0;
4041
}
4142

43+
int Fix(const std::string& modelFile,
44+
const std::string& inputFile,
45+
const std::string& outFile)
46+
{
47+
TSpellCorrector corrector;
48+
std::cerr << "[info] loading model" << std::endl;
49+
if (!corrector.LoadLangModel(modelFile)) {
50+
std::cerr << "[error] failed to load model" << std::endl;
51+
return 42;
52+
}
53+
std::cerr << "[info] loaded" << std::endl;
54+
std::wstring text = UTF8ToWide(LoadFile(inputFile));
55+
uint64_t startTime = GetCurrentTimeMs();
56+
std::wstring result = corrector.FixFragment(text);
57+
uint64_t finishTime = GetCurrentTimeMs();
58+
SaveFile(outFile, WideToUTF8(result));
59+
std::cerr << "[info] process time: " << finishTime - startTime << "ms" << std::endl;
60+
return 0;
61+
}
62+
4263
int Correct(const std::string& modelFile) {
4364
TSpellCorrector corrector;
4465
std::cerr << "[info] loading model" << std::endl;
@@ -86,6 +107,15 @@ int main(int argc, const char** argv) {
86107
}
87108
std::string modelFile = argv[2];
88109
return Correct(modelFile);
110+
} else if (mode == "fix") {
111+
if (argc < 5) {
112+
PrintUsage(argv);
113+
return 42;
114+
}
115+
std::string modelFile = argv[2];
116+
std::string inFile = argv[3];
117+
std::string outFile = argv[4];
118+
return Fix(modelFile, inFile, outFile);
89119
}
90120

91121
PrintUsage(argv);

0 commit comments

Comments
 (0)