aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile.am4
-rw-r--r--src/fs/Charset.hxx2
-rw-r--r--src/lib/icu/Converter.cxx92
-rw-r--r--src/lib/icu/Converter.hxx25
4 files changed, 119 insertions, 4 deletions
diff --git a/Makefile.am b/Makefile.am
index f126efeee..c6aab247b 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -465,7 +465,11 @@ libicu_a_CPPFLAGS = $(AM_CPPFLAGS) \
$(ICU_CFLAGS)
ICU_LDADD = libicu.a $(ICU_LIBS)
+
+if HAVE_ICU
+else
ICU_LDADD += $(GLIB_LIBS)
+endif
# PCM library
diff --git a/src/fs/Charset.hxx b/src/fs/Charset.hxx
index 80f510ce0..f1d5f3bbf 100644
--- a/src/fs/Charset.hxx
+++ b/src/fs/Charset.hxx
@@ -25,7 +25,7 @@
#include <string>
-#ifdef HAVE_GLIB
+#if defined(HAVE_ICU) || defined(HAVE_GLIB)
#define HAVE_FS_CHARSET
#endif
diff --git a/src/lib/icu/Converter.cxx b/src/lib/icu/Converter.cxx
index 1c6874065..bb170a071 100644
--- a/src/lib/icu/Converter.cxx
+++ b/src/lib/icu/Converter.cxx
@@ -19,20 +19,48 @@
#include "config.h"
#include "Converter.hxx"
+#include "Error.hxx"
#include "util/Error.hxx"
-#include "util/Domain.hxx"
+#include "util/Macros.hxx"
+#include "util/WritableBuffer.hxx"
+#include "util/ConstBuffer.hxx"
#include <string.h>
-#ifdef HAVE_GLIB
+#ifdef HAVE_ICU
+#include "Util.hxx"
+#include <unicode/ucnv.h>
+#elif defined(HAVE_GLIB)
+#include "util/Domain.hxx"
static constexpr Domain g_iconv_domain("g_iconv");
#endif
+#ifdef HAVE_ICU
+
+IcuConverter::~IcuConverter()
+{
+ ucnv_close(converter);
+}
+
+#endif
+
#ifdef HAVE_ICU_CONVERTER
IcuConverter *
IcuConverter::Create(const char *charset, Error &error)
{
+#ifdef HAVE_ICU
+ UErrorCode code = U_ZERO_ERROR;
+ UConverter *converter = ucnv_open(charset, &code);
+ if (converter == nullptr) {
+ error.Format(icu_domain, int(code),
+ "Failed to initialize charset '%s': %s",
+ charset, u_errorName(code));
+ return nullptr;
+ }
+
+ return new IcuConverter(converter);
+#elif defined(HAVE_GLIB)
GIConv to = g_iconv_open("utf-8", charset);
GIConv from = g_iconv_open(charset, "utf-8");
if (to == (GIConv)-1 || from == (GIConv)-1) {
@@ -46,8 +74,12 @@ IcuConverter::Create(const char *charset, Error &error)
}
return new IcuConverter(to, from);
+#endif
}
+#ifdef HAVE_ICU
+#elif defined(HAVE_GLIB)
+
static std::string
DoConvert(GIConv conv, const char *src)
{
@@ -66,16 +98,72 @@ DoConvert(GIConv conv, const char *src)
return std::string(buffer, sizeof(buffer) - out_left);
}
+#endif
+
std::string
IcuConverter::ToUTF8(const char *s) const
{
+#ifdef HAVE_ICU
+ const ScopeLock protect(mutex);
+
+ ucnv_resetToUnicode(converter);
+
+ // TODO: dynamic buffer?
+ UChar buffer[4096], *target = buffer;
+ const char *source = s;
+
+ UErrorCode code = U_ZERO_ERROR;
+
+ ucnv_toUnicode(converter, &target, buffer + ARRAY_SIZE(buffer),
+ &source, source + strlen(source),
+ nullptr, true, &code);
+ if (code != U_ZERO_ERROR)
+ return std::string();
+
+ const size_t target_length = target - buffer;
+ const auto u = UCharToUTF8({buffer, target_length});
+ if (u.IsNull())
+ return std::string();
+
+ std::string result(u.data, u.size);
+ delete[] u.data;
+ return result;
+
+#elif defined(HAVE_GLIB)
return DoConvert(to_utf8, s);
+#endif
}
std::string
IcuConverter::FromUTF8(const char *s) const
{
+#ifdef HAVE_ICU
+ const ScopeLock protect(mutex);
+
+ const auto u = UCharFromUTF8(s);
+ if (u.IsNull())
+ return std::string();
+
+ ucnv_resetFromUnicode(converter);
+
+ // TODO: dynamic buffer?
+ char buffer[4096], *target = buffer;
+ const UChar *source = u.data;
+ UErrorCode code = U_ZERO_ERROR;
+
+ ucnv_fromUnicode(converter, &target, buffer + ARRAY_SIZE(buffer),
+ &source, u.end(),
+ nullptr, true, &code);
+ delete[] u.data;
+
+ if (code != U_ZERO_ERROR)
+ return std::string();
+
+ return std::string(buffer, target);
+
+#elif defined(HAVE_GLIB)
return DoConvert(from_utf8, s);
+#endif
}
#endif
diff --git a/src/lib/icu/Converter.hxx b/src/lib/icu/Converter.hxx
index f20dc6f7c..26eccfe94 100644
--- a/src/lib/icu/Converter.hxx
+++ b/src/lib/icu/Converter.hxx
@@ -23,7 +23,10 @@
#include "check.h"
#include "Compiler.h"
-#ifdef HAVE_GLIB
+#ifdef HAVE_ICU
+#include "thread/Mutex.hxx"
+#define HAVE_ICU_CONVERTER
+#elif defined(HAVE_GLIB)
#include <glib.h>
#define HAVE_ICU_CONVERTER
#endif
@@ -34,21 +37,41 @@
class Error;
+#ifdef HAVE_ICU
+struct UConverter;
+#endif
+
/**
* This class can convert strings with a certain character set to and
* from UTF-8.
*/
class IcuConverter {
+#ifdef HAVE_ICU
+ /**
+ * ICU's UConverter class is not thread-safe. This mutex
+ * serializes simultaneous calls.
+ */
+ mutable Mutex mutex;
+
+ UConverter *const converter;
+
+ IcuConverter(UConverter *_converter):converter(_converter) {}
+#elif defined(HAVE_GLIB)
const GIConv to_utf8, from_utf8;
IcuConverter(GIConv _to, GIConv _from)
:to_utf8(_to), from_utf8(_from) {}
+#endif
public:
+#ifdef HAVE_ICU
+ ~IcuConverter();
+#elif defined(HAVE_GLIB)
~IcuConverter() {
g_iconv_close(to_utf8);
g_iconv_close(from_utf8);
}
+#endif
static IcuConverter *Create(const char *charset, Error &error);