diff options
author | Max Kellermann <max@duempel.org> | 2008-08-29 09:38:58 +0200 |
---|---|---|
committer | Max Kellermann <max@duempel.org> | 2008-08-29 09:38:58 +0200 |
commit | 43c389b961c609f9c705cfe14ed429082ac9115a (patch) | |
tree | 61e93ef93115a5f96c5981cae1fd7a36e789d9f7 | |
parent | 92b757674ebcf5cf90e8adb66e7583edf1bc604e (diff) | |
download | mpd-43c389b961c609f9c705cfe14ed429082ac9115a.tar.gz mpd-43c389b961c609f9c705cfe14ed429082ac9115a.tar.xz mpd-43c389b961c609f9c705cfe14ed429082ac9115a.zip |
added "length" parameter to validUtf8String()
At several places, we create temporary copies of non-null-terminated
strings, just to use them in functions like validUtf8String(). We can
save this temporary allocation and avoid heap fragmentation if we
add a length parameter instead of expecting a null-terminated string.
Diffstat (limited to '')
-rw-r--r-- | src/path.c | 2 | ||||
-rw-r--r-- | src/tag.c | 2 | ||||
-rw-r--r-- | src/utf8.c | 18 | ||||
-rw-r--r-- | src/utf8.h | 4 |
4 files changed, 16 insertions, 10 deletions
diff --git a/src/path.c b/src/path.c index 6aaff84cf..ceb00c5de 100644 --- a/src/path.c +++ b/src/path.c @@ -47,7 +47,7 @@ static char *path_conv_charset(char *dest, const char *to, char *fs_charset_to_utf8(char *dst, const char *str) { char *ret = path_conv_charset(dst, "UTF-8", fsCharset, str); - return (ret && !validUtf8String(ret)) ? NULL : ret; + return (ret && !validUtf8String(ret, strlen(ret))) ? NULL : ret; } char *utf8_to_fs_charset(char *dst, const char *str) @@ -352,7 +352,7 @@ static inline char *fix_utf8(char *str) { assert(str != NULL); - if (validUtf8String(str)) + if (validUtf8String(str, strlen(str))) return str; DEBUG("not valid utf8 in tag: %s\n",str); diff --git a/src/utf8.c b/src/utf8.c index e8f3dbdde..1b03f5d20 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -69,10 +69,12 @@ static char utf8_to_latin1_char(const char *inUtf8) return (char)(c + utf8[1]); } -static unsigned int validateUtf8Char(const char *inUtf8Char) +static unsigned int validateUtf8Char(const char *inUtf8Char, size_t length) { const unsigned char *utf8Char = (const unsigned char *)inUtf8Char; + assert(length > 0); + if (utf8Char[0] < 0x80) return 1; @@ -84,7 +86,7 @@ static unsigned int validateUtf8Char(const char *inUtf8Char) t = (t >> 1); count++; } - if (count > 5) + if (count > 5 || (size_t)count > length) return 0; for (i = 1; i <= count; i++) { if (utf8Char[i] < 0x80 || utf8Char[i] > 0xBF) @@ -95,15 +97,17 @@ static unsigned int validateUtf8Char(const char *inUtf8Char) return 0; } -int validUtf8String(const char *string) +int validUtf8String(const char *string, size_t length) { unsigned int ret; - while (*string) { - ret = validateUtf8Char(string); + while (length > 0) { + ret = validateUtf8Char(string, length); + assert((size_t)ret <= length); if (0 == ret) return 0; string += ret; + length -= ret; } return 1; @@ -118,7 +122,7 @@ char *utf8StrToLatin1Dup(const char *utf8) size_t len = 0; while (*utf8) { - count = validateUtf8Char(utf8); + count = validateUtf8Char(utf8, INT_MAX); if (!count) { free(ret); return NULL; @@ -140,7 +144,7 @@ char *utf8_to_latin1(char *dest, const char *utf8) size_t len = 0; while (*utf8) { - count = validateUtf8Char(utf8); + count = validateUtf8Char(utf8, INT_MAX); if (count) { *(cp++) = utf8_to_latin1_char(utf8); utf8 += count; diff --git a/src/utf8.h b/src/utf8.h index 4a4983064..353977bef 100644 --- a/src/utf8.h +++ b/src/utf8.h @@ -19,11 +19,13 @@ #ifndef UTF_8_H #define UTF_8_H +#include <os_compat.h> + char *latin1StrToUtf8Dup(const char *latin1); char *utf8StrToLatin1Dup(const char *utf8); -int validUtf8String(const char *string); +int validUtf8String(const char *string, size_t length); char *utf8_to_latin1(char *dest, const char *utf8); |