Skip to content

Commit 47c1810

Browse files
author
Boris Filippov
committed
FIX #6 by encoding UTF-8 into UTF-16LE
1 parent a496927 commit 47c1810

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

jamspell/lang_model.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ bool TLangModel::Train(const std::string& fileName, const std::string& alphabetF
127127
std::unordered_map<TGram2Key, TCount, TGram2KeyHash> grams2;
128128
std::unordered_map<TGram3Key, TCount, TGram3KeyHash> grams3;
129129

130-
std::cerr << "[info] generating N-grams " << sentences.size() << std::endl;
130+
std::cerr << "[info] generating N-grams " << sentenceIds.size() << std::endl;
131131
uint64_t lastTime = GetCurrentTimeMs();
132132
size_t total = sentenceIds.size();
133133
for (size_t i = 0; i < total; ++i) {

jamspell/utils.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ std::wstring UTF8ToWide(const std::string& text) {
113113
using boost::locale::conv::utf_to_utf;
114114
return utf_to_utf<wchar_t>(text.c_str(), text.c_str() + text.size());
115115
#else
116-
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
116+
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t, 0x10ffff, std::little_endian>> converter;
117117
return converter.from_bytes(text);
118118
#endif
119119
}
@@ -123,7 +123,7 @@ std::string WideToUTF8(const std::wstring& text) {
123123
using boost::locale::conv::utf_to_utf;
124124
return utf_to_utf<char>(text.c_str(), text.c_str() + text.size());
125125
#else
126-
using convert_type = std::codecvt_utf8<wchar_t>;
126+
using convert_type = std::codecvt_utf8<wchar_t, 0x10ffff, std::little_endian>;
127127
std::wstring_convert<convert_type, wchar_t> converter;
128128
return converter.to_bytes(text);
129129
#endif

0 commit comments

Comments
 (0)