diff options
Diffstat (limited to 'src/lib/icu')
-rw-r--r-- | src/lib/icu/Collate.cxx | 45 | ||||
-rw-r--r-- | src/lib/icu/Converter.cxx | 169 | ||||
-rw-r--r-- | src/lib/icu/Converter.hxx | 95 | ||||
-rw-r--r-- | src/lib/icu/Util.cxx | 72 | ||||
-rw-r--r-- | src/lib/icu/Util.hxx | 44 |
5 files changed, 381 insertions, 44 deletions
diff --git a/src/lib/icu/Collate.cxx b/src/lib/icu/Collate.cxx index b8560a4d8..1dde5d5e2 100644 --- a/src/lib/icu/Collate.cxx +++ b/src/lib/icu/Collate.cxx @@ -21,6 +21,7 @@ #include "Collate.hxx" #ifdef HAVE_ICU +#include "Util.hxx" #include "Error.hxx" #include "util/WritableBuffer.hxx" #include "util/ConstBuffer.hxx" @@ -71,50 +72,6 @@ IcuCollateFinish() ucol_close(collator); } -static WritableBuffer<UChar> -UCharFromUTF8(const char *src) -{ - assert(src != nullptr); - - const size_t src_length = strlen(src); - const size_t dest_capacity = src_length; - UChar *dest = new UChar[dest_capacity]; - - UErrorCode error_code = U_ZERO_ERROR; - int32_t dest_length; - u_strFromUTF8(dest, dest_capacity, &dest_length, - src, src_length, - &error_code); - if (U_FAILURE(error_code)) { - delete[] dest; - return nullptr; - } - - return { dest, size_t(dest_length) }; -} - -static WritableBuffer<char> -UCharToUTF8(ConstBuffer<UChar> src) -{ - assert(!src.IsNull()); - - /* worst-case estimate */ - size_t dest_capacity = 4 * src.size; - - char *dest = new char[dest_capacity]; - - UErrorCode error_code = U_ZERO_ERROR; - int32_t dest_length; - u_strToUTF8(dest, dest_capacity, &dest_length, src.data, src.size, - &error_code); - if (U_FAILURE(error_code)) { - delete[] dest; - return nullptr; - } - - return { dest, size_t(dest_length) }; -} - #endif gcc_pure diff --git a/src/lib/icu/Converter.cxx b/src/lib/icu/Converter.cxx new file mode 100644 index 000000000..bb170a071 --- /dev/null +++ b/src/lib/icu/Converter.cxx @@ -0,0 +1,169 @@ +/* + * Copyright (C) 2003-2014 The Music Player Daemon Project + * http://www.musicpd.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "config.h" +#include "Converter.hxx" +#include "Error.hxx" +#include "util/Error.hxx" +#include "util/Macros.hxx" +#include "util/WritableBuffer.hxx" +#include "util/ConstBuffer.hxx" + +#include <string.h> + +#ifdef HAVE_ICU +#include "Util.hxx" +#include <unicode/ucnv.h> +#elif defined(HAVE_GLIB) +#include "util/Domain.hxx" +static constexpr Domain g_iconv_domain("g_iconv"); +#endif + +#ifdef HAVE_ICU + +IcuConverter::~IcuConverter() +{ + ucnv_close(converter); +} + +#endif + +#ifdef HAVE_ICU_CONVERTER + +IcuConverter * +IcuConverter::Create(const char *charset, Error &error) +{ +#ifdef HAVE_ICU + UErrorCode code = U_ZERO_ERROR; + UConverter *converter = ucnv_open(charset, &code); + if (converter == nullptr) { + error.Format(icu_domain, int(code), + "Failed to initialize charset '%s': %s", + charset, u_errorName(code)); + return nullptr; + } + + return new IcuConverter(converter); +#elif defined(HAVE_GLIB) + GIConv to = g_iconv_open("utf-8", charset); + GIConv from = g_iconv_open(charset, "utf-8"); + if (to == (GIConv)-1 || from == (GIConv)-1) { + if (to != (GIConv)-1) + g_iconv_close(to); + if (from != (GIConv)-1) + g_iconv_close(from); + error.Format(g_iconv_domain, + "Failed to initialize charset '%s'", charset); + return nullptr; + } + + return new IcuConverter(to, from); +#endif +} + +#ifdef HAVE_ICU +#elif defined(HAVE_GLIB) + +static std::string +DoConvert(GIConv conv, const char *src) +{ + // TODO: dynamic buffer? + char buffer[4096]; + char *in = const_cast<char *>(src); + char *out = buffer; + size_t in_left = strlen(src); + size_t out_left = sizeof(buffer); + + size_t n = g_iconv(conv, &in, &in_left, &out, &out_left); + + if (n == static_cast<size_t>(-1) || in_left > 0) + return std::string(); + + return std::string(buffer, sizeof(buffer) - out_left); +} + +#endif + +std::string +IcuConverter::ToUTF8(const char *s) const +{ +#ifdef HAVE_ICU + const ScopeLock protect(mutex); + + ucnv_resetToUnicode(converter); + + // TODO: dynamic buffer? + UChar buffer[4096], *target = buffer; + const char *source = s; + + UErrorCode code = U_ZERO_ERROR; + + ucnv_toUnicode(converter, &target, buffer + ARRAY_SIZE(buffer), + &source, source + strlen(source), + nullptr, true, &code); + if (code != U_ZERO_ERROR) + return std::string(); + + const size_t target_length = target - buffer; + const auto u = UCharToUTF8({buffer, target_length}); + if (u.IsNull()) + return std::string(); + + std::string result(u.data, u.size); + delete[] u.data; + return result; + +#elif defined(HAVE_GLIB) + return DoConvert(to_utf8, s); +#endif +} + +std::string +IcuConverter::FromUTF8(const char *s) const +{ +#ifdef HAVE_ICU + const ScopeLock protect(mutex); + + const auto u = UCharFromUTF8(s); + if (u.IsNull()) + return std::string(); + + ucnv_resetFromUnicode(converter); + + // TODO: dynamic buffer? + char buffer[4096], *target = buffer; + const UChar *source = u.data; + UErrorCode code = U_ZERO_ERROR; + + ucnv_fromUnicode(converter, &target, buffer + ARRAY_SIZE(buffer), + &source, u.end(), + nullptr, true, &code); + delete[] u.data; + + if (code != U_ZERO_ERROR) + return std::string(); + + return std::string(buffer, target); + +#elif defined(HAVE_GLIB) + return DoConvert(from_utf8, s); +#endif +} + +#endif diff --git a/src/lib/icu/Converter.hxx b/src/lib/icu/Converter.hxx new file mode 100644 index 000000000..26eccfe94 --- /dev/null +++ b/src/lib/icu/Converter.hxx @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2003-2014 The Music Player Daemon Project + * http://www.musicpd.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef MPD_ICU_CONVERTER_HXX +#define MPD_ICU_CONVERTER_HXX + +#include "check.h" +#include "Compiler.h" + +#ifdef HAVE_ICU +#include "thread/Mutex.hxx" +#define HAVE_ICU_CONVERTER +#elif defined(HAVE_GLIB) +#include <glib.h> +#define HAVE_ICU_CONVERTER +#endif + +#ifdef HAVE_ICU_CONVERTER + +#include <string> + +class Error; + +#ifdef HAVE_ICU +struct UConverter; +#endif + +/** + * This class can convert strings with a certain character set to and + * from UTF-8. + */ +class IcuConverter { +#ifdef HAVE_ICU + /** + * ICU's UConverter class is not thread-safe. This mutex + * serializes simultaneous calls. + */ + mutable Mutex mutex; + + UConverter *const converter; + + IcuConverter(UConverter *_converter):converter(_converter) {} +#elif defined(HAVE_GLIB) + const GIConv to_utf8, from_utf8; + + IcuConverter(GIConv _to, GIConv _from) + :to_utf8(_to), from_utf8(_from) {} +#endif + +public: +#ifdef HAVE_ICU + ~IcuConverter(); +#elif defined(HAVE_GLIB) + ~IcuConverter() { + g_iconv_close(to_utf8); + g_iconv_close(from_utf8); + } +#endif + + static IcuConverter *Create(const char *charset, Error &error); + + /** + * Convert the string to UTF-8. + * Returns empty string on error. + */ + gcc_pure gcc_nonnull_all + std::string ToUTF8(const char *s) const; + + /** + * Convert the string from UTF-8. + * Returns empty string on error. + */ + gcc_pure gcc_nonnull_all + std::string FromUTF8(const char *s) const; +}; + +#endif + +#endif diff --git a/src/lib/icu/Util.cxx b/src/lib/icu/Util.cxx new file mode 100644 index 000000000..a18043c03 --- /dev/null +++ b/src/lib/icu/Util.cxx @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2003-2014 The Music Player Daemon Project + * http://www.musicpd.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "config.h" +#include "Util.hxx" +#include "util/WritableBuffer.hxx" +#include "util/ConstBuffer.hxx" + +#include <unicode/ustring.h> + +#include <assert.h> +#include <string.h> + +WritableBuffer<UChar> +UCharFromUTF8(const char *src) +{ + assert(src != nullptr); + + const size_t src_length = strlen(src); + const size_t dest_capacity = src_length; + UChar *dest = new UChar[dest_capacity]; + + UErrorCode error_code = U_ZERO_ERROR; + int32_t dest_length; + u_strFromUTF8(dest, dest_capacity, &dest_length, + src, src_length, + &error_code); + if (U_FAILURE(error_code)) { + delete[] dest; + return nullptr; + } + + return { dest, size_t(dest_length) }; +} + +WritableBuffer<char> +UCharToUTF8(ConstBuffer<UChar> src) +{ + assert(!src.IsNull()); + + /* worst-case estimate */ + size_t dest_capacity = 4 * src.size; + + char *dest = new char[dest_capacity]; + + UErrorCode error_code = U_ZERO_ERROR; + int32_t dest_length; + u_strToUTF8(dest, dest_capacity, &dest_length, src.data, src.size, + &error_code); + if (U_FAILURE(error_code)) { + delete[] dest; + return nullptr; + } + + return { dest, size_t(dest_length) }; +} diff --git a/src/lib/icu/Util.hxx b/src/lib/icu/Util.hxx new file mode 100644 index 000000000..ce80bb3fd --- /dev/null +++ b/src/lib/icu/Util.hxx @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2003-2014 The Music Player Daemon Project + * http://www.musicpd.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef MPD_ICU_UTIL_HXX +#define MPD_ICU_UTIL_HXX + +#include "check.h" + +#include <unicode/utypes.h> + +template<typename T> struct WritableBuffer; +template<typename T> struct ConstBuffer; + +/** + * Wrapper for u_strFromUTF8(). The returned pointer must be freed + * with delete[]. + */ +WritableBuffer<UChar> +UCharFromUTF8(const char *src); + +/** + * Wrapper for u_strToUTF8(). The returned pointer must be freed with + * delete[]. + */ +WritableBuffer<char> +UCharToUTF8(ConstBuffer<UChar> src); + +#endif |