JuliaStrings · nomoon · Mar 5, 2017 · Mar 5, 2017 · Mar 5, 2017 · stevengj
diff --git a/data/data_generator.rb b/data/data_generator.rb
@@ -104,7 +104,7 @@
 $case_folding_string = File.open("CaseFolding.txt", :encoding => 'utf-8').read
 $case_folding = {}
 $case_folding_string.chomp.split("\n").each do |line|
-  next unless line =~ /([0-9A-F]+); [CFS]; ([0-9A-F ]+);/i
+  next unless line =~ /([0-9A-F]+); [CF]; ([0-9A-F ]+);/i
   $case_folding[$1.hex] = $2.split(" ").collect { |e| e.hex }
 end
 

diff --git a/utf8proc.c b/utf8proc.c
@@ -422,7 +422,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc,
     if (!category) return UTF8PROC_ERROR_NOTASSIGNED;
   }
   if (options & UTF8PROC_IGNORE) {
-    if (property->ignorable) return 0;
+    if (!category || property->ignorable) return 0;
   }
   if (options & UTF8PROC_LUMP) {
     if (category == UTF8PROC_CATEGORY_ZS) utf8proc_decompose_lump(0x0020);
@@ -753,3 +753,10 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str)
     UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
   return retval;
 }
+
+UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_CF(const utf8proc_uint8_t *str) {
+  utf8proc_uint8_t *retval;
+  utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
+    UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD | UTF8PROC_IGNORE);
+  return retval;
+}
diff --git a/utf8proc.h b/utf8proc.h
@@ -158,7 +158,10 @@ typedef enum {
   UTF8PROC_COMPOSE   = (1<<3),
   /** Return a result with decomposed characters. */
   UTF8PROC_DECOMPOSE = (1<<4),
-  /** Strip "default ignorable characters" such as SOFT-HYPHEN or ZERO-WIDTH-SPACE. */
+  /**
+   * Strip "default ignorable characters" such as SOFT-HYPHEN or
+   * ZERO-WIDTH-SPACE, along with unassigned codepoints.
+   */
   UTF8PROC_IGNORE    = (1<<5),
   /** Return an error, if the input contains unassigned codepoints. */
   UTF8PROC_REJECTNA  = (1<<6),
@@ -676,8 +679,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom(
 
 /** @name Unicode normalization
  *
- * Returns a pointer to newly allocated memory of a NFD, NFC, NFKD or NFKC
- * normalized version of the null-terminated string `str`.  These
+ * Returns a pointer to newly allocated memory of a NFD, NFC, NFKD, NFKC or
+ * NFKC_Casefold normalized version of the null-terminated string `str`.  These
  * are shortcuts to calling @ref utf8proc_map with @ref UTF8PROC_NULLTERM
  * combined with @ref UTF8PROC_STABLE and flags indicating the normalization.
  */
@@ -690,6 +693,11 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str);
 UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str);
 /** NFKC normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */
 UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str);
+/**
+ * NFKC_Casefold normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT
+ * and @ref UTF8PROC_CASEFOLD and @ref UTF8PROC_IGNORE).
+ **/
+UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_CF(const utf8proc_uint8_t *str);
 /** @} */
 
 #ifdef __cplusplus