diff options
-rw-r--r-- | Makefile.am | 4 | ||||
-rw-r--r-- | src/fs/Charset.hxx | 2 | ||||
-rw-r--r-- | src/lib/icu/Converter.cxx | 92 | ||||
-rw-r--r-- | src/lib/icu/Converter.hxx | 25 |
4 files changed, 119 insertions, 4 deletions
diff --git a/Makefile.am b/Makefile.am index f126efeee..c6aab247b 100644 --- a/Makefile.am +++ b/Makefile.am @@ -465,7 +465,11 @@ libicu_a_CPPFLAGS = $(AM_CPPFLAGS) \ $(ICU_CFLAGS) ICU_LDADD = libicu.a $(ICU_LIBS) + +if HAVE_ICU +else ICU_LDADD += $(GLIB_LIBS) +endif # PCM library diff --git a/src/fs/Charset.hxx b/src/fs/Charset.hxx index 80f510ce0..f1d5f3bbf 100644 --- a/src/fs/Charset.hxx +++ b/src/fs/Charset.hxx @@ -25,7 +25,7 @@ #include <string> -#ifdef HAVE_GLIB +#if defined(HAVE_ICU) || defined(HAVE_GLIB) #define HAVE_FS_CHARSET #endif diff --git a/src/lib/icu/Converter.cxx b/src/lib/icu/Converter.cxx index 1c6874065..bb170a071 100644 --- a/src/lib/icu/Converter.cxx +++ b/src/lib/icu/Converter.cxx @@ -19,20 +19,48 @@ #include "config.h" #include "Converter.hxx" +#include "Error.hxx" #include "util/Error.hxx" -#include "util/Domain.hxx" +#include "util/Macros.hxx" +#include "util/WritableBuffer.hxx" +#include "util/ConstBuffer.hxx" #include <string.h> -#ifdef HAVE_GLIB +#ifdef HAVE_ICU +#include "Util.hxx" +#include <unicode/ucnv.h> +#elif defined(HAVE_GLIB) +#include "util/Domain.hxx" static constexpr Domain g_iconv_domain("g_iconv"); #endif +#ifdef HAVE_ICU + +IcuConverter::~IcuConverter() +{ + ucnv_close(converter); +} + +#endif + #ifdef HAVE_ICU_CONVERTER IcuConverter * IcuConverter::Create(const char *charset, Error &error) { +#ifdef HAVE_ICU + UErrorCode code = U_ZERO_ERROR; + UConverter *converter = ucnv_open(charset, &code); + if (converter == nullptr) { + error.Format(icu_domain, int(code), + "Failed to initialize charset '%s': %s", + charset, u_errorName(code)); + return nullptr; + } + + return new IcuConverter(converter); +#elif defined(HAVE_GLIB) GIConv to = g_iconv_open("utf-8", charset); GIConv from = g_iconv_open(charset, "utf-8"); if (to == (GIConv)-1 || from == (GIConv)-1) { @@ -46,8 +74,12 @@ IcuConverter::Create(const char *charset, Error &error) } return new IcuConverter(to, from); +#endif } +#ifdef HAVE_ICU +#elif defined(HAVE_GLIB) + static std::string DoConvert(GIConv conv, const char *src) { @@ -66,16 +98,72 @@ DoConvert(GIConv conv, const char *src) return std::string(buffer, sizeof(buffer) - out_left); } +#endif + std::string IcuConverter::ToUTF8(const char *s) const { +#ifdef HAVE_ICU + const ScopeLock protect(mutex); + + ucnv_resetToUnicode(converter); + + // TODO: dynamic buffer? + UChar buffer[4096], *target = buffer; + const char *source = s; + + UErrorCode code = U_ZERO_ERROR; + + ucnv_toUnicode(converter, &target, buffer + ARRAY_SIZE(buffer), + &source, source + strlen(source), + nullptr, true, &code); + if (code != U_ZERO_ERROR) + return std::string(); + + const size_t target_length = target - buffer; + const auto u = UCharToUTF8({buffer, target_length}); + if (u.IsNull()) + return std::string(); + + std::string result(u.data, u.size); + delete[] u.data; + return result; + +#elif defined(HAVE_GLIB) return DoConvert(to_utf8, s); +#endif } std::string IcuConverter::FromUTF8(const char *s) const { +#ifdef HAVE_ICU + const ScopeLock protect(mutex); + + const auto u = UCharFromUTF8(s); + if (u.IsNull()) + return std::string(); + + ucnv_resetFromUnicode(converter); + + // TODO: dynamic buffer? + char buffer[4096], *target = buffer; + const UChar *source = u.data; + UErrorCode code = U_ZERO_ERROR; + + ucnv_fromUnicode(converter, &target, buffer + ARRAY_SIZE(buffer), + &source, u.end(), + nullptr, true, &code); + delete[] u.data; + + if (code != U_ZERO_ERROR) + return std::string(); + + return std::string(buffer, target); + +#elif defined(HAVE_GLIB) return DoConvert(from_utf8, s); +#endif } #endif diff --git a/src/lib/icu/Converter.hxx b/src/lib/icu/Converter.hxx index f20dc6f7c..26eccfe94 100644 --- a/src/lib/icu/Converter.hxx +++ b/src/lib/icu/Converter.hxx @@ -23,7 +23,10 @@ #include "check.h" #include "Compiler.h" -#ifdef HAVE_GLIB +#ifdef HAVE_ICU +#include "thread/Mutex.hxx" +#define HAVE_ICU_CONVERTER +#elif defined(HAVE_GLIB) #include <glib.h> #define HAVE_ICU_CONVERTER #endif @@ -34,21 +37,41 @@ class Error; +#ifdef HAVE_ICU +struct UConverter; +#endif + /** * This class can convert strings with a certain character set to and * from UTF-8. */ class IcuConverter { +#ifdef HAVE_ICU + /** + * ICU's UConverter class is not thread-safe. This mutex + * serializes simultaneous calls. + */ + mutable Mutex mutex; + + UConverter *const converter; + + IcuConverter(UConverter *_converter):converter(_converter) {} +#elif defined(HAVE_GLIB) const GIConv to_utf8, from_utf8; IcuConverter(GIConv _to, GIConv _from) :to_utf8(_to), from_utf8(_from) {} +#endif public: +#ifdef HAVE_ICU + ~IcuConverter(); +#elif defined(HAVE_GLIB) ~IcuConverter() { g_iconv_close(to_utf8); g_iconv_close(from_utf8); } +#endif static IcuConverter *Create(const char *charset, Error &error); |