@@ -74,7 +74,7 @@ namespace KB {
74
74
static Hashmap<unsigned , String> g_cached_sort_keys_for_letter_forms;
75
75
76
76
unsigned m_single_primaries_to_letter[256 ];
77
- bool m_is_known_lead_primary[256 ];
77
+ uint8_t m_is_known_lead_primary[256 ];
78
78
Hashmap<String, unsigned > m_sort_keys_to_letter_forms;
79
79
};
80
80
@@ -88,7 +88,7 @@ namespace KB {
88
88
89
89
void set_custom_capitalization ();
90
90
void move_capitalization_to_bits ();
91
- void parse_substitution_list (ReadOnlyDataFile& dat_file, unsigned offset, const String&, unsigned sort_key_length);
91
+ void parse_substitution_list (ReadOnlyDataFile& dat_file, unsigned offset, const String& string , unsigned sort_key_length);
92
92
String capitalized_string () const ;
93
93
94
94
Word (); // /< Create an empty word.
@@ -99,7 +99,7 @@ namespace KB {
99
99
// / <byte = h> [if h & 4, <byte = a if not flag & 512, a * 255 else>] [if h & 32, <byte = c>. If c & 1, cap_mask is set to 1]
100
100
// / [if h & 1, <unsigned = cap_mask>] [if h & 64, <unsigned = d>] [if not flag & 2048 or h & 8 or c & 48, <unsigned short = SSSSSS.....KKKKK> ]
101
101
// / [if h & 16, <null-terminated-string = string>, else if not h & 8, string = lettersForSortKey(sort_key), else parse_substitution_list(dat_file, "offset", ?, sort_key_length) ]
102
- Word (ReadOnlyDataFile& dat_file, unsigned x , const String& sort_key, const SortKeyByteConverter&, unsigned flag );
102
+ Word (ReadOnlyDataFile& dat_file, unsigned offset , const String& sort_key, const SortKeyByteConverter&, unsigned compilation_flag );
103
103
104
104
const String& string () const { return m_string; }
105
105
float probability () const { return word.probability ; }
@@ -181,14 +181,66 @@ namespace KB {
181
181
} freqNword;
182
182
unsigned char patricia_key_bytes[4 ];
183
183
184
- // void parse_trie_sibling_binary(const char* arg1, const char* arg2);
184
+ private:
185
+ const char * parse_trie_sibling_binary (const char * child, const char * data) // returns the address of the next sibling.
186
+ #if 0
187
+ {
188
+ int cur_child_offset = child - data;
189
+
190
+ this->sortNchild.v2bytes.flags_byte = *child++;
191
+
192
+ for (unsigned i = 0; i <= this->sortNchild.v2fields.patricia_key_size_1; ++ i) {
193
+ this->patricia_key_bytes[i] = *child++;
194
+ }
195
+
196
+ switch (this->sortNchild.v2fields.has_child_offset_type) {
197
+ case 1:
198
+ this->sortNchild.v2fields.child_offset = cur_child_offset + (*child++);
199
+ break;
200
+ case 2:
201
+ this->sortNchild.v2fields.child_offset = cur_child_offset + unsigned_short_at(child);
202
+ child += 2;
203
+ break;
204
+ case 3:
205
+ // note: no addition.
206
+ this->sortNchild.v2fields.child_offset = (read 3 bytes from child);
207
+ child += 3;
208
+ break;
209
+ }
210
+
211
+ if (has_freq bit is cleared) {
212
+ this->freqNword.v2fields.compacted_freq = 0xff;
213
+ } else {
214
+ this->freqNword.v2fields.compacted_freq = 1 + *child++;
215
+ }
216
+
217
+ if (has_unigram_list_offset bit is cleared) {
218
+ if (has_word_termination_prob bit is set) {
219
+ this->freqNword.fields.word_offset = 1 + **child++
220
+ }
221
+ } else {
222
+ this->freqNword.fields.word_[offset,is_0freq] = (read 3 bytes from child);
223
+ child += 3;
224
+ }
225
+
226
+ if (more_sibling bit is set) {
227
+ return child;
228
+ } else
229
+ return NULL;
230
+ }
231
+ #endif
232
+ ;
185
233
186
- bool finishesWords () const {
234
+ bool finishesWords () const
235
+ #if 0
236
+ {
187
237
if (sortNchild.v2fields.child_offset != 0 || freqNword.fields.compacted_freq == 0 || freqNword.fields.word_is_0freq)
188
238
return sortNchild.v2fields.has_word_termination_prob || sortNchild.v2fields.has_unigram_list_offset;
189
239
else
190
240
return true;
191
241
}
242
+ #endif
243
+ ;
192
244
};
193
245
194
246
#pragma mark -
@@ -258,6 +310,78 @@ namespace KB {
258
310
char l, m;
259
311
};
260
312
313
+ class WordTrieNode {
314
+ private:
315
+ WordTrieNode (ReadOnlyDataFile* data_file, const char *, TrieSearchType type);
316
+ WordTrieNode (ReadOnlyDataFile* data_file, unsigned int root_offset);
317
+ WordTrieNode ();
318
+
319
+ WordTrieNode advance (unsigned char input, TrieSearchType search_type) const ;
320
+ #if 0
321
+ {
322
+ const char* child_addr = this->trie_addr(this->sibling.child_offset);
323
+ RefPtr<WordTrieNode> child_b = NULL;
324
+ RefPtr<WordTrieNode> child_refptr = NULL;
325
+
326
+ // begin:
327
+ for (; child_addr != NULL && r6 != 256; ++ r6, child_addr = next_child_addr ) {
328
+
329
+ PackedTrieSibling sibling;
330
+ next_child_addr = sibling.parse_trie_sibling_binary(child_addr, data_file->m_data);
331
+
332
+ char first_pat_char = sibling.pat_key[0];
333
+ bool matches;
334
+ TrieSearchType child_search_type;
335
+
336
+ if (search_type == Exact) {
337
+ matches = (first_pat_char == input);
338
+ child_search_type = search_type;
339
+ } else if (search_type == Fuzzy) {
340
+ child_search_type = this->trie_search_type;
341
+ if (this->trie_search_type != Fuzzy && first_pat_char == input)
342
+ matches = true;
343
+ else {
344
+ matches = keyboard_sort_key_match(first_pat_char, input);
345
+ child_search_type = Fuzzy;
346
+ }
347
+ } else {
348
+ assert(false);
349
+ }
350
+
351
+ if (match) {
352
+ RefPtr<WordTrieNode> child_a = WordTrieNode::create(this->data_file, child_addr, child_search_type);
353
+ if (child_b == NULL) {
354
+ child_refptr = child_a;
355
+ child_b = child_refptr;
356
+ } else {
357
+ child_refptr->m = child_a;
358
+ child_refptr = child_a;
359
+ }
360
+ // deref child_a.
361
+ }
362
+ }
363
+ if (child_b == NULL)
364
+ child_b = new WordTrieNode;
365
+ return child_b;
366
+ // deref stuff.
367
+ }
368
+ #endif
369
+
370
+ static WordTrieNode* create (ReadOnlyDataFile* data_file, const char *, TrieSearchType type); // /< Just calls the constructor.
371
+ static WordTrieNode* create (ReadOnlyDataFile* data_file, unsigned int offset); // /< Just calls the constructor.
372
+ bool not_valid (void ) const ;
373
+ const char * trie_addr (unsigned int child_offset) const ; // returns : data_file->m_data + child_offset.
374
+
375
+ private:
376
+ int one;
377
+ char b;
378
+ ReadOnlyDataFile* data_file;
379
+ const char * data_ptr;
380
+ int parse_result;
381
+ TrieSearchType trie_search_type;
382
+ PackedTrieSibling sibling;
383
+ WTF::RefPtr<WordTrieNode> m;
384
+ };
261
385
262
386
class WordTrie {
263
387
public:
@@ -305,11 +429,11 @@ namespace KB {
305
429
306
430
float probability_sum_for_V2_words_at (unsigned ) const ;
307
431
308
- void recurse_matching_words_for_node (Vector<Word>&, WordTrieNode*, unsigned , int , bool ) const ;
432
+ void recurse_matching_words_for_node (Vector<Word>& res , WordTrieNode* node , unsigned offset , int the_int , bool perform_match ) const ;
309
433
void fill_vector_with_trie_children_at (unsigned , Vector<DictionaryCursors>&) const ;
310
434
bool advance_static_cursor_to_next_patricia_node (DictionaryCursors& cursors, unsigned char tag, unsigned address) const ;
311
435
312
- Word word_at (unsigned , unsigned &, const String&) const ;
436
+ Word word_at (unsigned offset , unsigned & res_offset , const String& sort_key ) const ;
313
437
314
438
315
439
private:
@@ -327,7 +451,7 @@ namespace KB {
327
451
unsigned m_trie_root_offset; // 1ad4
328
452
float m_root_usage_sum; // 1ad8
329
453
SortKeyByteConverter m_sort_key_converter; // 1adc (sizeof = 524)
330
- }; // sizeof = 1ff0 .
454
+ }; // sizeof = 2000 .
331
455
}
332
456
333
457
#endif
0 commit comments