Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ endif()
if(UTF8PROC_ENABLE_TESTING)
enable_testing()
file(MAKE_DIRECTORY data)
set(UNICODE_VERSION 16.0.0)
set(UNICODE_VERSION 17.0.0)
file(DOWNLOAD https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/NormalizationTest.txt ${CMAKE_BINARY_DIR}/data/NormalizationTest.txt SHOW_PROGRESS)
file(DOWNLOAD https://www.unicode.org/Public/${UNICODE_VERSION}/ucd/auxiliary/GraphemeBreakTest.txt ${CMAKE_BINARY_DIR}/data/GraphemeBreakTest.txt SHOW_PROGRESS)
add_executable(case test/tests.h test/tests.c utf8proc.h test/case.c)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ The C library is found in this directory after successful compilation
and is named `libutf8proc.a` (for the static library) and
`libutf8proc.so` (for the dynamic library).

The Unicode version supported is 16.0.0.
The Unicode version supported is 17.0.0.

For Unicode normalizations, the following options are used:

Expand Down
2 changes: 1 addition & 1 deletion data/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ utf8proc_data.c.new: data_generator.jl $(RAWDATA)
$(JULIA) --project=. data_generator.jl > $@

# Unicode data version (must also update utf8proc_unicode_version function)
UNICODE_VERSION=16.0.0
UNICODE_VERSION=17.0.0

UnicodeData.txt:
$(CURL) $(CURLFLAGS) -o $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/UnicodeData.txt
Expand Down
7 changes: 7 additions & 0 deletions data/download.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
wget https://www.unicode.org/Public/17.0.0/ucd/CaseFolding.txt
wget https://www.unicode.org/Public/17.0.0/ucd/CompositionExclusions.txt
wget https://www.unicode.org/Public/17.0.0/ucd/DerivedCoreProperties.txt
wget https://www.unicode.org/Public/17.0.0/ucd/EastAsianWidth.txt
wget https://www.unicode.org/Public/17.0.0/ucd/UnicodeData.txt
wget https://www.unicode.org/Public/17.0.0/ucd/auxiliary/GraphemeBreakProperty.txt
wget https://www.unicode.org/Public/17.0.0/ucd/emoji/emoji-data.txt
Comment thread
eschnett marked this conversation as resolved.
Outdated
5 changes: 3 additions & 2 deletions test/graphemetest.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@ void checkline(const char *_buf, bool verbose) {
bi += 1;
}
else { /* hex-encoded codepoint */
size_t len = encode((unsigned char*) (src + si), buf + bi) - 1;
while (src[si]) ++si; /* advance to NUL termination */
Comment thread
stevengj marked this conversation as resolved.
size_t dest_len;
size_t len = encode((unsigned char*) (src + si), &dest_len, buf + bi) - 1;
si += dest_len; /* advance to NUL termination */
bi += len;
}
}
Expand Down
5 changes: 3 additions & 2 deletions test/iscase.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@ int read_range(FILE *f, utf8proc_int32_t *start, utf8proc_int32_t *end)
size_t len = simple_getline(buf, f);
size_t pos = skipspaces(buf, 0);
unsigned char s[16];
size_t s_len;
if (pos == len || buf[pos] == '#') return 0;
pos += encode(s, buf + pos) - 1;
pos += encode(s, &s_len, buf + pos) - 1;
check(s[0], "invalid line %s in data", buf);
utf8proc_iterate((utf8proc_uint8_t*) s, -1, start);
if (buf[pos] == '.' && buf[pos+1] == '.') {
encode(s, buf + pos + 2);
encode(s, &s_len, buf + pos + 2);
check(s[0], "invalid line %s in data", buf);
utf8proc_iterate((utf8proc_uint8_t*) s, -1, end);
}
Expand Down
11 changes: 6 additions & 5 deletions test/normtest.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@ int main(int argc, char **argv)

if (buf[0] == '#') continue;

offset = encode(source, buf);
offset += encode(NFC, buf + offset);
offset += encode(NFD, buf + offset);
offset += encode(NFKC, buf + offset);
offset += encode(NFKD, buf + offset);
size_t len;
offset = encode(source, &len, buf);
offset += encode(NFC, &len, buf + offset);
offset += encode(NFD, &len, buf + offset);
offset += encode(NFKC, &len, buf + offset);
offset += encode(NFKD, &len, buf + offset);

CHECK_NORM(NFC, NFC, source);
CHECK_NORM(NFC, NFC, NFC);
Expand Down
3 changes: 2 additions & 1 deletion test/tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ size_t skipspaces(const unsigned char *buf, size_t i)
separated by whitespace, and terminated by any character not in
[0-9a-fA-F] or whitespace, then stores the corresponding utf8 string
in dest, returning the number of bytes read from buf */
size_t encode(unsigned char *dest, const unsigned char *buf)
size_t encode(unsigned char *dest, size_t *dest_len, const unsigned char *buf)
{
size_t i = 0, j;
utf8proc_ssize_t d = 0;
Expand All @@ -38,6 +38,7 @@ size_t encode(unsigned char *dest, const unsigned char *buf)
; /* find end of hex input */
if (j == i) { /* no codepoint found */
dest[d] = 0; /* NUL-terminate destination string */
*dest_len = (size_t)d;
return i + 1;
}
check(sscanf((char *) (buf + i), "%x", (unsigned int *)&c) == 1, "invalid hex input %s", buf+i);
Expand Down
2 changes: 1 addition & 1 deletion test/tests.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,5 @@ extern size_t lineno;

void check(int cond, const char *format, ...);
size_t skipspaces(const unsigned char *buf, size_t i);
size_t encode(unsigned char *dest, const unsigned char *buf);
size_t encode(unsigned char *dest, size_t *dest_len, const unsigned char *buf);
size_t simple_getline(unsigned char buf[8192], FILE *f);
2 changes: 1 addition & 1 deletion utf8proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_version(void) {
}

UTF8PROC_DLLEXPORT const char *utf8proc_unicode_version(void) {
return "16.0.0";
return "17.0.0";
}

UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
Expand Down
Loading
Loading