Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion data/data_generator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@
$case_folding_string = File.open("CaseFolding.txt", :encoding => 'utf-8').read
$case_folding = {}
$case_folding_string.chomp.split("\n").each do |line|
next unless line =~ /([0-9A-F]+); [CFS]; ([0-9A-F ]+);/i
next unless line =~ /([0-9A-F]+); [CF]; ([0-9A-F ]+);/i
$case_folding[$1.hex] = $2.split(" ").collect { |e| e.hex }
end

Expand Down
9 changes: 8 additions & 1 deletion utf8proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc,
if (!category) return UTF8PROC_ERROR_NOTASSIGNED;
}
if (options & UTF8PROC_IGNORE) {
if (property->ignorable) return 0;
if (!category || property->ignorable) return 0;
}
if (options & UTF8PROC_LUMP) {
if (category == UTF8PROC_CATEGORY_ZS) utf8proc_decompose_lump(0x0020);
Expand Down Expand Up @@ -753,3 +753,10 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str)
UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
return retval;
}

UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_CF(const utf8proc_uint8_t *str) {
utf8proc_uint8_t *retval;
utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD | UTF8PROC_IGNORE);
return retval;
}
14 changes: 11 additions & 3 deletions utf8proc.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,10 @@ typedef enum {
UTF8PROC_COMPOSE = (1<<3),
/** Return a result with decomposed characters. */
UTF8PROC_DECOMPOSE = (1<<4),
/** Strip "default ignorable characters" such as SOFT-HYPHEN or ZERO-WIDTH-SPACE. */
/**
* Strip "default ignorable characters" such as SOFT-HYPHEN or
* ZERO-WIDTH-SPACE, along with unassigned codepoints.
*/
UTF8PROC_IGNORE = (1<<5),
/** Return an error, if the input contains unassigned codepoints. */
UTF8PROC_REJECTNA = (1<<6),
Expand Down Expand Up @@ -676,8 +679,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom(

/** @name Unicode normalization
*
* Returns a pointer to newly allocated memory of a NFD, NFC, NFKD or NFKC
* normalized version of the null-terminated string `str`. These
* Returns a pointer to newly allocated memory of a NFD, NFC, NFKD, NFKC or
* NFKC_Casefold normalized version of the null-terminated string `str`. These
* are shortcuts to calling @ref utf8proc_map with @ref UTF8PROC_NULLTERM
* combined with @ref UTF8PROC_STABLE and flags indicating the normalization.
*/
Expand All @@ -690,6 +693,11 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str);
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str);
/** NFKC normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str);
/**
* NFKC_Casefold normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT
* and @ref UTF8PROC_CASEFOLD and @ref UTF8PROC_IGNORE).
**/
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_CF(const utf8proc_uint8_t *str);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably just call it NFKC_Casefold since that is the standard name.

/** @} */

#ifdef __cplusplus
Expand Down