From 07c4a01f9fa55b620c9bf87a4e64e45de22b7e5f Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Fri, 29 Aug 2008 09:38:58 +0200 Subject: added "length" parameter to validUtf8String() At several places, we create temporary copies of non-null-terminated strings, just to use them in functions like validUtf8String(). We can save this temporary allocation and avoid heap fragmentation if we add a length parameter instead of expecting a null-terminated string. --- src/path.c | 2 +- src/tag.c | 2 +- src/utf8.c | 18 +++++++++++------- src/utf8.h | 4 +++- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/path.c b/src/path.c index 6aaff84cf..ceb00c5de 100644 --- a/src/path.c +++ b/src/path.c @@ -47,7 +47,7 @@ static char *path_conv_charset(char *dest, const char *to, char *fs_charset_to_utf8(char *dst, const char *str) { char *ret = path_conv_charset(dst, "UTF-8", fsCharset, str); - return (ret && !validUtf8String(ret)) ? NULL : ret; + return (ret && !validUtf8String(ret, strlen(ret))) ? NULL : ret; } char *utf8_to_fs_charset(char *dst, const char *str) diff --git a/src/tag.c b/src/tag.c index 927f1b789..b93456641 100644 --- a/src/tag.c +++ b/src/tag.c @@ -352,7 +352,7 @@ static inline char *fix_utf8(char *str) { assert(str != NULL); - if (validUtf8String(str)) + if (validUtf8String(str, strlen(str))) return str; DEBUG("not valid utf8 in tag: %s\n",str); diff --git a/src/utf8.c b/src/utf8.c index e8f3dbdde..1b03f5d20 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -69,10 +69,12 @@ static char utf8_to_latin1_char(const char *inUtf8) return (char)(c + utf8[1]); } -static unsigned int validateUtf8Char(const char *inUtf8Char) +static unsigned int validateUtf8Char(const char *inUtf8Char, size_t length) { const unsigned char *utf8Char = (const unsigned char *)inUtf8Char; + assert(length > 0); + if (utf8Char[0] < 0x80) return 1; @@ -84,7 +86,7 @@ static unsigned int validateUtf8Char(const char *inUtf8Char) t = (t >> 1); count++; } - if (count > 5) + if (count > 5 || (size_t)count > length) return 0; for (i = 1; i <= count; i++) { if (utf8Char[i] < 0x80 || utf8Char[i] > 0xBF) @@ -95,15 +97,17 @@ static unsigned int validateUtf8Char(const char *inUtf8Char) return 0; } -int validUtf8String(const char *string) +int validUtf8String(const char *string, size_t length) { unsigned int ret; - while (*string) { - ret = validateUtf8Char(string); + while (length > 0) { + ret = validateUtf8Char(string, length); + assert((size_t)ret <= length); if (0 == ret) return 0; string += ret; + length -= ret; } return 1; @@ -118,7 +122,7 @@ char *utf8StrToLatin1Dup(const char *utf8) size_t len = 0; while (*utf8) { - count = validateUtf8Char(utf8); + count = validateUtf8Char(utf8, INT_MAX); if (!count) { free(ret); return NULL; @@ -140,7 +144,7 @@ char *utf8_to_latin1(char *dest, const char *utf8) size_t len = 0; while (*utf8) { - count = validateUtf8Char(utf8); + count = validateUtf8Char(utf8, INT_MAX); if (count) { *(cp++) = utf8_to_latin1_char(utf8); utf8 += count; diff --git a/src/utf8.h b/src/utf8.h index 4a4983064..353977bef 100644 --- a/src/utf8.h +++ b/src/utf8.h @@ -19,11 +19,13 @@ #ifndef UTF_8_H #define UTF_8_H +#include + char *latin1StrToUtf8Dup(const char *latin1); char *utf8StrToLatin1Dup(const char *utf8); -int validUtf8String(const char *string); +int validUtf8String(const char *string, size_t length); char *utf8_to_latin1(char *dest, const char *utf8); -- cgit v1.2.3