diff options
Diffstat (limited to '')
-rw-r--r-- | src/tag/TagString.cxx | 97 |
1 files changed, 55 insertions, 42 deletions
diff --git a/src/tag/TagString.cxx b/src/tag/TagString.cxx index 9ab095249..4f07cd62a 100644 --- a/src/tag/TagString.cxx +++ b/src/tag/TagString.cxx @@ -1,5 +1,5 @@ /* - * Copyright (C) 2003-2013 The Music Player Daemon Project + * Copyright (C) 2003-2014 The Music Player Daemon Project * http://www.musicpd.org * * This program is free software; you can redistribute it and/or modify @@ -19,52 +19,69 @@ #include "config.h" #include "TagString.hxx" - -#include <glib.h> +#include "util/Alloc.hxx" +#include "util/WritableBuffer.hxx" +#include "util/UTF8.hxx" #include <assert.h> #include <string.h> +#include <stdlib.h> + +gcc_pure +static const char * +FindInvalidUTF8(const char *p, const char *const end) +{ + while (p < end) { + const size_t s = SequenceLengthUTF8(*p); + if (p + s > end) + /* partial sequence at end of string */ + return p; + + /* now call the other SequenceLengthUTF8() overload + which also validates the continuations */ + const size_t t = SequenceLengthUTF8(p); + assert(s == t); + if (t == 0) + return p; + + p += s; + } + + return nullptr; +} /** * Replace invalid sequences with the question mark. */ -static char * -patch_utf8(const char *src, size_t length, const gchar *end) +static WritableBuffer<char> +patch_utf8(const char *src, size_t length, const char *_invalid) { /* duplicate the string, and replace invalid bytes in that buffer */ - char *dest = g_strndup(src, length); + char *dest = (char *)xmemdup(src, length); + char *const end = dest + length; + char *invalid = dest + (_invalid - src); do { - dest[end - src] = '?'; - } while (!g_utf8_validate(end + 1, (src + length) - (end + 1), &end)); + *invalid = '?'; + + const char *__invalid = FindInvalidUTF8(invalid + 1, end); + invalid = const_cast<char *>(__invalid); + } while (invalid != nullptr); - return dest; + return { dest, length }; } -static char * +static WritableBuffer<char> fix_utf8(const char *str, size_t length) { - const gchar *end; - char *temp; - gsize written; - - assert(str != nullptr); - /* check if the string is already valid UTF-8 */ - if (g_utf8_validate(str, length, &end)) + const char *invalid = FindInvalidUTF8(str, str + length); + if (invalid == nullptr) return nullptr; - /* no, it's not - try to import it from ISO-Latin-1 */ - temp = g_convert(str, length, "utf-8", "iso-8859-1", - nullptr, &written, nullptr); - if (temp != nullptr) - /* success! */ - return temp; - - /* no, still broken - there's no medication, just patch - invalid sequences */ - return patch_utf8(str, length, end); + /* no, broken - patch invalid sequences */ + return patch_utf8(str, length, invalid); } static bool @@ -87,40 +104,36 @@ find_non_printable(const char *p, size_t length) * Clears all non-printable characters, convert them to space. * Returns nullptr if nothing needs to be cleared. */ -static char * +static WritableBuffer<char> clear_non_printable(const char *p, size_t length) { const char *first = find_non_printable(p, length); - char *dest; - if (first == nullptr) return nullptr; - dest = g_strndup(p, length); + char *dest = (char *)xmemdup(p, length); for (size_t i = first - p; i < length; ++i) if (char_is_non_printable(dest[i])) dest[i] = ' '; - return dest; + return { dest, length }; } -char * +WritableBuffer<char> FixTagString(const char *p, size_t length) { - char *utf8, *cleared; - - utf8 = fix_utf8(p, length); - if (utf8 != nullptr) { - p = utf8; - length = strlen(p); + auto utf8 = fix_utf8(p, length); + if (!utf8.IsNull()) { + p = utf8.data; + length = utf8.size; } - cleared = clear_non_printable(p, length); - if (cleared == nullptr) + WritableBuffer<char> cleared = clear_non_printable(p, length); + if (cleared.IsNull()) cleared = utf8; else - g_free(utf8); + free(utf8.data); return cleared; } |