Skip to content

Commit 5c7fb64

Browse files
authored
feat(script_translator): word completion from 2nd place (#848)
* prefer exact match phrase on top * set word "completion" type
1 parent 9184ae6 commit 5c7fb64

5 files changed

Lines changed: 117 additions & 66 deletions

File tree

src/rime/dict/user_dictionary.cc

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,21 @@ an<UserDictEntryCollector> UserDictionary::Lookup(
325325
for (auto& v : state.query_result) {
326326
v.second.Sort();
327327
}
328+
auto entries_with_word_completion =
329+
state.query_result.find(state.predict_word_from_depth);
330+
if (entries_with_word_completion != state.query_result.end()) {
331+
auto& entries = entries_with_word_completion->second;
332+
// if the top candidate is predictive match,
333+
if (!entries.empty() && entries.front()->IsPredictiveMatch()) {
334+
auto found =
335+
std::find_if(entries.begin(), entries.end(),
336+
[](const auto& e) { return e->IsExactMatch(); });
337+
if (found != entries.end()) {
338+
// move the first exact match candidate to top.
339+
std::rotate(entries.begin(), found, found + 1);
340+
}
341+
}
342+
}
328343
return collect(&state.query_result);
329344
}
330345

src/rime/dict/vocabulary.cc

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,6 @@ string Code::ToString() const {
5757
return stream.str();
5858
}
5959

60-
inline ShortDictEntry DictEntry::ToShort() const {
61-
return {text, code, weight};
62-
}
63-
6460
bool ShortDictEntry::operator<(const ShortDictEntry& other) const {
6561
// Sort different entries sharing the same code by weight desc.
6662
if (weight != other.weight)

src/rime/dict/vocabulary.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,13 @@ struct DictEntry {
5555
int matching_code_size = 0;
5656

5757
DictEntry() = default;
58-
ShortDictEntry ToShort() const;
58+
ShortDictEntry ToShort() const { return {text, code, weight}; }
59+
bool IsExactMatch() const {
60+
return matching_code_size == 0 || matching_code_size == code.size();
61+
}
62+
bool IsPredictiveMatch() const {
63+
return matching_code_size != 0 && matching_code_size < code.size();
64+
}
5965
bool operator<(const DictEntry& other) const;
6066
};
6167

src/rime/gear/contextual_translation.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ bool ContextualTranslation::Replenish() {
1818
DLOG(INFO) << cand->text() << " cache/queue: " << cache_.size() << "/"
1919
<< queue.size();
2020
if (cand->type() == "phrase" || cand->type() == "user_phrase" ||
21-
cand->type() == "table" || cand->type() == "user_table") {
21+
cand->type() == "table" || cand->type() == "user_table" ||
22+
cand->type() == "completion") {
2223
if (end_pos != cand->end() || last_type != cand->type()) {
2324
end_pos = cand->end();
2425
last_type = cand->type();

src/rime/gear/script_translator.cc

Lines changed: 93 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ class ScriptTranslation : public Translation {
130130
protected:
131131
bool CheckEmpty();
132132
bool IsNormalSpelling() const;
133-
void PrepareCandidate();
133+
bool PrepareCandidate();
134134
template <class QueryResult>
135135
void EnrollEntries(map<int, DictEntryList>& entries_by_end_pos,
136136
const an<QueryResult>& query_result);
@@ -147,11 +147,19 @@ class ScriptTranslation : public Translation {
147147
an<Sentence> sentence_;
148148

149149
an<Phrase> candidate_ = nullptr;
150+
size_t candidate_index_ = 0;
151+
enum CandidateSource {
152+
kUninitialized,
153+
kUserPhrase,
154+
kSysPhrase,
155+
kSentence,
156+
};
157+
CandidateSource candidate_source_ = kUninitialized;
150158

151159
DictEntryCollector::reverse_iterator phrase_iter_;
152160
UserDictEntryCollector::reverse_iterator user_phrase_iter_;
153161

154-
size_t max_corrections_ = 4;
162+
const size_t max_corrections_ = 4;
155163
size_t correction_count_ = 0;
156164

157165
bool enable_correction_;
@@ -342,9 +350,10 @@ string ScriptSyllabifier::GetOriginalSpelling(const Phrase& cand) const {
342350
return string();
343351
}
344352

345-
static bool is_exact_match_phrase(const an<DictEntry>& entry) {
346-
return entry && entry->matching_code_size == 0 ||
347-
entry->matching_code_size == entry->code.size();
353+
template <class Ptr, class Iter>
354+
static bool has_exact_match_phrase(Ptr ptr, Iter iter, size_t consumed) {
355+
return ptr && iter->first == consumed && !iter->second.exhausted() &&
356+
iter->second.Peek()->IsExactMatch();
348357
}
349358

350359
// ScriptTranslation implementation
@@ -372,15 +381,10 @@ bool ScriptTranslation::Evaluate(Dictionary* dict, UserDictionary* user_dict) {
372381
user_phrase_iter_ = user_phrase_->rbegin();
373382

374383
// make sentences when there is no exact-matching phrase candidate
375-
bool has_exact_match_phrase =
376-
phrase_ && phrase_iter_->first == consumed &&
377-
is_exact_match_phrase(phrase_iter_->second.Peek());
378-
bool has_exact_match_user_phrase =
379-
user_phrase_ && user_phrase_iter_->first == consumed &&
380-
is_exact_match_phrase(user_phrase_iter_->second.Peek());
381384
bool has_at_least_two_syllables = syllable_graph.edges.size() >= 2;
382-
if (!has_exact_match_phrase && !has_exact_match_user_phrase &&
383-
has_at_least_two_syllables) {
385+
if (has_at_least_two_syllables &&
386+
!has_exact_match_phrase(phrase_, phrase_iter_, consumed) &&
387+
!has_exact_match_phrase(user_phrase_, user_phrase_iter_, consumed)) {
384388
sentence_ = MakeSentence(dict, user_dict);
385389
}
386390

@@ -393,43 +397,42 @@ bool ScriptTranslation::Next() {
393397
is_correction = false;
394398
if (exhausted())
395399
return false;
396-
if (sentence_) {
397-
sentence_.reset();
398-
return !CheckEmpty();
400+
if (candidate_source_ == kUninitialized) {
401+
PrepareCandidate(); // to determine candidate_source_
399402
}
400-
int user_phrase_code_length = 0;
401-
if (user_phrase_ && user_phrase_iter_ != user_phrase_->rend()) {
402-
user_phrase_code_length = user_phrase_iter_->first;
403-
}
404-
int phrase_code_length = 0;
405-
if (phrase_ && phrase_iter_ != phrase_->rend()) {
406-
phrase_code_length = phrase_iter_->first;
407-
}
408-
if (user_phrase_code_length > 0 &&
409-
user_phrase_code_length >= phrase_code_length) {
410-
UserDictEntryIterator& uter(user_phrase_iter_->second);
411-
if (!uter.Next()) {
412-
++user_phrase_iter_;
413-
}
414-
} else if (phrase_code_length > 0) {
415-
DictEntryIterator& iter(phrase_iter_->second);
416-
if (!iter.Next()) {
417-
++phrase_iter_;
418-
}
403+
switch (candidate_source_) {
404+
case kUninitialized:
405+
break;
406+
case kSentence:
407+
sentence_.reset();
408+
break;
409+
case kUserPhrase: {
410+
UserDictEntryIterator& uter(user_phrase_iter_->second);
411+
if (!uter.Next()) {
412+
++user_phrase_iter_;
413+
}
414+
} break;
415+
case kSysPhrase: {
416+
DictEntryIterator& iter(phrase_iter_->second);
417+
if (!iter.Next()) {
418+
++phrase_iter_;
419+
}
420+
} break;
419421
}
422+
candidate_.reset();
423+
candidate_source_ = kUninitialized;
420424
if (enable_correction_) {
421-
PrepareCandidate();
422-
if (!candidate_) {
425+
// populate next candidate and skip it if it's a correction beyond max
426+
// numbers.
427+
if (!PrepareCandidate()) {
423428
break;
424429
}
425-
is_correction = syllabifier_->IsCandidateCorrection(*candidate_);
426430
}
427-
} while ( // limit the number of correction candidates
428-
enable_correction_ && is_correction &&
429-
correction_count_ > max_corrections_);
430-
if (is_correction) {
431-
++correction_count_;
432-
}
431+
} while (enable_correction_ &&
432+
syllabifier_->IsCandidateCorrection(*candidate_) &&
433+
// limit the number of correction candidates
434+
++correction_count_ > max_corrections_);
435+
++candidate_index_;
433436
return !CheckEmpty();
434437
}
435438

@@ -440,8 +443,7 @@ bool ScriptTranslation::IsNormalSpelling() const {
440443
}
441444

442445
an<Candidate> ScriptTranslation::Peek() {
443-
PrepareCandidate();
444-
if (!candidate_) {
446+
if (candidate_source_ == kUninitialized && !PrepareCandidate()) {
445447
return nullptr;
446448
}
447449
if (candidate_->preedit().empty()) {
@@ -458,14 +460,29 @@ an<Candidate> ScriptTranslation::Peek() {
458460
return candidate_;
459461
}
460462

461-
void ScriptTranslation::PrepareCandidate() {
463+
static bool always_true() {
464+
return true;
465+
}
466+
467+
template <typename T>
468+
inline static bool prefer_user_phrase(
469+
T user_phrase_weight,
470+
T sys_phrase_weight,
471+
function<bool()> compare_on_tie = always_true) {
472+
return user_phrase_weight > sys_phrase_weight ||
473+
(user_phrase_weight == sys_phrase_weight && compare_on_tie());
474+
}
475+
476+
bool ScriptTranslation::PrepareCandidate() {
462477
if (exhausted()) {
478+
candidate_source_ = kUninitialized;
463479
candidate_ = nullptr;
464-
return;
480+
return false;
465481
}
466482
if (sentence_) {
483+
candidate_source_ = kSentence;
467484
candidate_ = sentence_;
468-
return;
485+
return true;
469486
}
470487
size_t user_phrase_code_length = 0;
471488
if (user_phrase_ && user_phrase_iter_ != user_phrase_->rend()) {
@@ -475,28 +492,44 @@ void ScriptTranslation::PrepareCandidate() {
475492
if (phrase_ && phrase_iter_ != phrase_->rend()) {
476493
phrase_code_length = phrase_iter_->first;
477494
}
478-
an<Phrase> cand;
479495
if (user_phrase_code_length > 0 &&
480-
user_phrase_code_length >= phrase_code_length) {
496+
prefer_user_phrase(user_phrase_code_length, phrase_code_length, [this]() {
497+
const int kNumExactMatchOnTop = 1;
498+
size_t full_code_length = end_of_input_ - start_;
499+
return candidate_index_ >= kNumExactMatchOnTop ||
500+
prefer_user_phrase(
501+
has_exact_match_phrase(user_phrase_, user_phrase_iter_,
502+
full_code_length),
503+
has_exact_match_phrase(phrase_, phrase_iter_,
504+
full_code_length));
505+
})) {
481506
UserDictEntryIterator& uter = user_phrase_iter_->second;
482507
const auto& entry = uter.Peek();
483508
DLOG(INFO) << "user phrase '" << entry->text
484509
<< "', code length: " << user_phrase_code_length;
485-
cand = New<Phrase>(translator_->language(), "user_phrase", start_,
486-
start_ + user_phrase_code_length, entry);
487-
cand->set_quality(std::exp(entry->weight) + translator_->initial_quality() +
488-
(IsNormalSpelling() ? 0.5 : -0.5));
510+
candidate_source_ = kUserPhrase;
511+
candidate_ =
512+
New<Phrase>(translator_->language(),
513+
entry->IsPredictiveMatch() ? "completion" : "user_phrase",
514+
start_, start_ + user_phrase_code_length, entry);
515+
candidate_->set_quality(std::exp(entry->weight) +
516+
translator_->initial_quality() +
517+
(IsNormalSpelling() ? 0.5 : -0.5));
489518
} else if (phrase_code_length > 0) {
490519
DictEntryIterator& iter = phrase_iter_->second;
491520
const auto& entry = iter.Peek();
492521
DLOG(INFO) << "phrase '" << entry->text
493522
<< "', code length: " << phrase_code_length;
494-
cand = New<Phrase>(translator_->language(), "phrase", start_,
495-
start_ + phrase_code_length, entry);
496-
cand->set_quality(std::exp(entry->weight) + translator_->initial_quality() +
497-
(IsNormalSpelling() ? 0 : -1));
523+
candidate_source_ = kSysPhrase;
524+
candidate_ =
525+
New<Phrase>(translator_->language(),
526+
entry->IsPredictiveMatch() ? "completion" : "phrase",
527+
start_, start_ + phrase_code_length, entry);
528+
candidate_->set_quality(std::exp(entry->weight) +
529+
translator_->initial_quality() +
530+
(IsNormalSpelling() ? 0 : -1));
498531
}
499-
candidate_ = cand;
532+
return true;
500533
}
501534

502535
bool ScriptTranslation::CheckEmpty() {

0 commit comments

Comments
 (0)