Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/compile_charsmap_main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ std::string ToHexUInt64Array(
std::string ToHexData(absl::string_view data) {
const char *begin = data.data();
const char *end = data.data() + data.size();
constexpr char kHex[] = "0123456789ABCDEF";
constexpr absl::string_view kHex = "0123456789ABCDEF";
constexpr size_t kNumOfBytesOnOneLine = 20;

size_t output_count = 0;
Expand Down
18 changes: 8 additions & 10 deletions src/model_interface.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,11 @@ absl::string_view ModelInterface::pad_piece() const {
#undef RETURN_PIECE

int ModelInterface::PieceToId(absl::string_view piece) const {
auto it = reserved_id_map_.find(piece);
if (it != reserved_id_map_.end()) {
if (auto it = reserved_id_map_.find(piece); it != reserved_id_map_.end()) {
return it->second;
}
auto it2 = pieces_.find(piece);
if (it2 != pieces_.end()) {
return it2->second;
if (auto it = pieces_.find(piece); it != pieces_.end()) {
return it->second;
}
return unk_id_;
}
Expand Down Expand Up @@ -160,7 +158,7 @@ std::vector<absl::string_view> SplitIntoWords(absl::string_view text,
const char *end = text.data() + text.size();

// Space symbol (U+2581)
const absl::string_view kSpaceSymbol = "\xe2\x96\x81";
constexpr absl::string_view kSpaceSymbol = "\xe2\x96\x81";
bool in_ws_sequence = false;

std::vector<absl::string_view> result;
Expand Down Expand Up @@ -223,12 +221,12 @@ int PieceToByte(absl::string_view piece) {
}
return m;
}();
const auto it = kMap->find(piece);
if (it == kMap->end()) {
return -1;
} else {

if (const auto it = kMap->find(piece); it != kMap->end()) {
return it->second;
}

return -1;
}

} // namespace sentencepiece
13 changes: 6 additions & 7 deletions src/normalizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ util::Status Normalizer::Normalize(absl::string_view input,
}

// Reserves the output buffer to avoid re-allocations.
const size_t kReservedSize = input.size() * 3;
const size_t kReservedSize = input.size() * 1.5;
normalized->reserve(kReservedSize);
if (norm_to_orig) norm_to_orig->reserve(kReservedSize);

Expand Down Expand Up @@ -191,14 +191,12 @@ std::string Normalizer::Normalize(absl::string_view input) const {

std::pair<absl::string_view, int> Normalizer::NormalizePrefix(
absl::string_view input) const {
std::pair<absl::string_view, int> result;

if (input.empty()) return result;
if (input.empty()) return {};

if (matcher_ != nullptr) {
bool found = false;
const int mblen = matcher_->PrefixMatch(input, &found);
if (found) return std::make_pair(input.substr(0, mblen), mblen);
if (found) return {input.substr(0, mblen), mblen};
}

size_t longest_length = 0;
Expand All @@ -225,6 +223,7 @@ std::pair<absl::string_view, int> Normalizer::NormalizePrefix(
}
}

std::pair<absl::string_view, int> result;
if (longest_length == 0 || longest_length > input.size() ||
longest_value >= normalized_.size()) {
size_t length = 0;
Expand All @@ -234,8 +233,8 @@ std::pair<absl::string_view, int> Normalizer::NormalizePrefix(
// which is a valid Unicode of three bytes in utf8,
// but here we only consume one byte.
result.second = 1;
static const char kReplacementChar[] = "\xEF\xBF\xBD";
result.first = absl::string_view(kReplacementChar);
static constexpr absl::string_view kReplacementChar = "\xEF\xBF\xBD";
result.first = kReplacementChar;
} else {
result.second = length;
result.first = absl::string_view(input.data(), result.second);
Expand Down
Loading
Loading