Skip to content

Commit 959937e

Browse files
authored
feat(translator_commons): add dictionary_exclude to exclude words (#1008)
Previously, the user must delete words from the dictionary.
1 parent 6e46361 commit 959937e

7 files changed

Lines changed: 50 additions & 20 deletions

File tree

src/rime/dict/dictionary.cc

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ void DictEntryIterator::AddFilter(DictEntryFilter filter) {
137137
// the introduced filter could invalidate the current or even all the
138138
// remaining entries
139139
while (!exhausted() && !filter_(Peek())) {
140+
entry_.reset();
140141
FindNextEntry();
141142
}
142143
}
@@ -181,15 +182,12 @@ bool DictEntryIterator::FindNextEntry() {
181182
}
182183

183184
bool DictEntryIterator::Next() {
184-
entry_.reset();
185-
if (!FindNextEntry()) {
186-
return false;
187-
}
188-
while (filter_ && !filter_(Peek())) {
185+
do {
186+
entry_.reset();
189187
if (!FindNextEntry()) {
190188
return false;
191189
}
192-
}
190+
} while (filter_ && !filter_(Peek()));
193191
return true;
194192
}
195193

@@ -262,6 +260,7 @@ static void lookup_table(Table* table,
262260

263261
an<DictEntryCollector> Dictionary::Lookup(const SyllableGraph& syllable_graph,
264262
size_t start_pos,
263+
const hash_set<string>* blacklist,
265264
bool predict_word,
266265
double initial_credibility) {
267266
if (!loaded())
@@ -275,17 +274,23 @@ an<DictEntryCollector> Dictionary::Lookup(const SyllableGraph& syllable_graph,
275274
}
276275
if (collector->empty())
277276
return nullptr;
278-
// sort each group of equal code length
277+
// for each group of equal code length, sort it and filter words
279278
for (auto& v : *collector) {
280279
v.second.Sort();
280+
if (blacklist && !blacklist->empty()) {
281+
v.second.AddFilter([blacklist](an<DictEntry> entry) {
282+
return entry && !blacklist->count(entry->text);
283+
});
284+
}
281285
}
282286
return collector;
283287
}
284288

285289
size_t Dictionary::LookupWords(DictEntryIterator* result,
286290
const string& str_code,
287291
bool predictive,
288-
size_t expand_search_limit) {
292+
size_t expand_search_limit,
293+
const hash_set<string>* blacklist) {
289294
DLOG(INFO) << "lookup: " << str_code;
290295
if (!loaded())
291296
return 0;
@@ -325,6 +330,11 @@ size_t Dictionary::LookupWords(DictEntryIterator* result,
325330
}
326331
}
327332
}
333+
if (blacklist && !blacklist->empty()) {
334+
result->AddFilter([blacklist](an<DictEntry> entry) {
335+
return entry && !blacklist->count(entry->text);
336+
});
337+
}
328338
return keys.size();
329339
}
330340

src/rime/dict/dictionary.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,17 +71,20 @@ class Dictionary : public Class<Dictionary, const Ticket&> {
7171
RIME_DLL bool Remove();
7272
RIME_DLL bool Load();
7373

74-
RIME_DLL an<DictEntryCollector> Lookup(const SyllableGraph& syllable_graph,
75-
size_t start_pos,
76-
bool predict_word = false,
77-
double initial_credibility = 0.0);
74+
RIME_DLL an<DictEntryCollector> Lookup(
75+
const SyllableGraph& syllable_graph,
76+
size_t start_pos,
77+
const hash_set<string>* blacklist = nullptr,
78+
bool predict_word = false,
79+
double initial_credibility = 0.0);
7880
// if predictive is true, do an expand search with limit,
7981
// otherwise do an exact match.
8082
// return num of matching keys.
8183
RIME_DLL size_t LookupWords(DictEntryIterator* result,
8284
const string& str_code,
8385
bool predictive,
84-
size_t limit = 0);
86+
size_t limit = 0,
87+
const hash_set<string>* blacklist = nullptr);
8588
// translate syllable id sequence to string code
8689
RIME_DLL bool Decode(const Code& code, vector<string>* result);
8790

src/rime/gear/reverse_lookup_translator.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ an<Translation> ReverseLookupTranslator::Query(const string& input,
172172
bool quality = false;
173173
if (start < input.length()) {
174174
if (options_ && options_->enable_completion()) {
175-
dict_->LookupWords(&iter, code, true, 100);
175+
dict_->LookupWords(&iter, code, true, 100, nullptr);
176176
quality = !iter.exhausted() && (iter.Peek()->remaining_code_length == 0);
177177
} else {
178178
// 2012-04-08 gongchen: fetch multi-syllable words from rev-lookup table

src/rime/gear/script_translator.cc

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66
//
77
// 2011-07-10 GONG Chen <chen.sst@gmail.com>
88
//
9-
#include <algorithm>
109
#include <stack>
1110
#include <cmath>
1211
#include <boost/algorithm/string/join.hpp>
1312
#include <boost/range/adaptor/reversed.hpp>
13+
#include <rime/common.h>
1414
#include <rime/composition.h>
1515
#include <rime/candidate.h>
1616
#include <rime/config.h>
@@ -368,7 +368,8 @@ bool ScriptTranslation::Evaluate(Dictionary* dict, UserDictionary* user_dict) {
368368
bool predict_word = translator_->enable_word_completion() &&
369369
start_ + consumed == end_of_input_;
370370

371-
phrase_ = dict->Lookup(syllable_graph, 0, predict_word);
371+
phrase_ =
372+
dict->Lookup(syllable_graph, 0, &translator_->blacklist(), predict_word);
372373
if (user_dict) {
373374
const size_t kUnlimitedDepth = 0;
374375
const size_t kNumSyllablesToPredictWord = 4;
@@ -578,7 +579,8 @@ an<Sentence> ScriptTranslation::MakeSentence(Dictionary* dict,
578579
kMaxSyllablesForUserPhraseQuery));
579580
}
580581
// merge lookup results
581-
EnrollEntries(same_start_pos, dict->Lookup(syllable_graph, x.first));
582+
EnrollEntries(same_start_pos, dict->Lookup(syllable_graph, x.first,
583+
&translator_->blacklist()));
582584
}
583585
if (auto sentence =
584586
poet_->MakeSentence(graph, syllable_graph.interpreted_length,

src/rime/gear/table_translator.cc

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ class LazyTableTranslation : public TableTranslation {
131131

132132
private:
133133
Dictionary* dict_;
134+
const hash_set<string>* blacklist_;
134135
UserDictionary* user_dict_;
135136
size_t limit_;
136137
size_t user_dict_limit_;
@@ -150,6 +151,7 @@ LazyTableTranslation::LazyTableTranslation(TableTranslator* translator,
150151
end,
151152
preedit),
152153
dict_(translator->dict()),
154+
blacklist_(&translator->blacklist()),
153155
user_dict_(enable_user_dict ? translator->user_dict() : NULL),
154156
limit_(kInitialSearchLimit),
155157
user_dict_limit_(kInitialSearchLimit) {
@@ -191,7 +193,7 @@ bool LazyTableTranslation::FetchMoreTableEntries() {
191193
DLOG(INFO) << "fetching more table entries: limit = " << limit_
192194
<< ", count = " << previous_entry_count;
193195
DictEntryIterator more;
194-
if (dict_->LookupWords(&more, input_, true, limit_) < limit_) {
196+
if (dict_->LookupWords(&more, input_, true, limit_, blacklist_) < limit_) {
195197
DLOG(INFO) << "all table entries obtained.";
196198
limit_ = 0; // no more try
197199
} else {
@@ -263,7 +265,7 @@ an<Translation> TableTranslator::Query(const string& input,
263265
} else {
264266
DictEntryIterator iter;
265267
if (dict_ && dict_->loaded()) {
266-
dict_->LookupWords(&iter, code, false);
268+
dict_->LookupWords(&iter, code, false, 0, &blacklist());
267269
}
268270
UserDictEntryIterator uter;
269271
if (enable_user_dict) {
@@ -645,7 +647,8 @@ an<Translation> TableTranslator::MakeSentence(const string& input,
645647
if (homographs.size() >= max_homographs_)
646648
continue;
647649
DictEntryIterator iter;
648-
dict_->LookupWords(&iter, active_input.substr(0, m.length), false);
650+
dict_->LookupWords(&iter, active_input.substr(0, m.length), false, 0,
651+
&blacklist());
649652
if (filter_by_charset) {
650653
iter.AddFilter(CharsetFilter::FilterDictEntry);
651654
}

src/rime/gear/translator_commons.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,16 @@ TranslatorOptions::TranslatorOptions(const Ticket& ticket) {
145145
tags_.push_back(value->str());
146146
if (tags_.empty())
147147
tags_.push_back("abc");
148+
149+
// blacklist
150+
if (auto blacklist =
151+
config->GetList(ticket.name_space + "/dictionary_exclude")) {
152+
for (auto it = blacklist->begin(); it != blacklist->end(); ++it) {
153+
if (auto blackword = As<ConfigValue>(*it)) {
154+
blacklist_.insert(blackword->str());
155+
}
156+
}
157+
}
148158
}
149159
if (delimiters_.empty()) {
150160
delimiters_ = " ";

src/rime/gear/translator_commons.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ class TranslatorOptions {
167167
void set_initial_quality(double quality) { initial_quality_ = quality; }
168168
Projection& preedit_formatter() { return preedit_formatter_; }
169169
Projection& comment_formatter() { return comment_formatter_; }
170+
const hash_set<string>& blacklist() { return blacklist_; }
170171

171172
protected:
172173
string delimiters_;
@@ -178,6 +179,7 @@ class TranslatorOptions {
178179
Projection preedit_formatter_;
179180
Projection comment_formatter_;
180181
Patterns user_dict_disabling_patterns_;
182+
hash_set<string> blacklist_;
181183
};
182184

183185
} // namespace rime

0 commit comments

Comments
 (0)