From 87c88fcb27692a21060b9824d3d14b9fa2d22e60 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Sat, 29 Nov 2014 23:59:37 +0100 Subject: fs/Charset: move code to wrapper class IcuConverter Prepare for a ICU-based backend without GLib. --- src/fs/Charset.cxx | 77 ++++++++------------------------------------- src/lib/icu/Converter.cxx | 80 +++++++++++++++++++++++++++++++++++++++++++++++ src/lib/icu/Converter.hxx | 72 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 165 insertions(+), 64 deletions(-) create mode 100644 src/lib/icu/Converter.cxx create mode 100644 src/lib/icu/Converter.hxx (limited to 'src') diff --git a/src/fs/Charset.cxx b/src/fs/Charset.cxx index fb7313a35..453962c1f 100644 --- a/src/fs/Charset.cxx +++ b/src/fs/Charset.cxx @@ -23,12 +23,8 @@ #include "Limits.hxx" #include "Log.hxx" #include "Traits.hxx" +#include "lib/icu/Converter.hxx" #include "util/Error.hxx" -#include "util/Domain.hxx" - -#ifdef HAVE_GLIB -#include -#endif #include @@ -37,49 +33,19 @@ #ifdef HAVE_FS_CHARSET -static constexpr Domain convert_domain("convert"); - -/** - * Maximal number of bytes required to represent path name in UTF-8 - * (including nul-terminator). - * This value is a rought estimate of upper bound. - * It's based on path name limit in bytes (MPD_PATH_MAX) - * and assumption that some weird encoding could represent some UTF-8 4 byte - * sequences with single byte. - */ -static constexpr size_t MPD_PATH_MAX_UTF8 = (MPD_PATH_MAX - 1) * 4 + 1; - static std::string fs_charset; -gcc_pure -static bool -CheckCharset(const char *charset, Error &error) -{ - /* convert a space to check if the charset is valid */ - GError *error2 = nullptr; - char *test = g_convert(" ", 1, charset, "UTF-8", nullptr, nullptr, &error2); - if (test == nullptr) { - error.Set(convert_domain, error2->code, error2->message); - g_error_free(error2); - return false; - } - - g_free(test); - return true; -} +static IcuConverter *fs_converter; bool SetFSCharset(const char *charset, Error &error) { assert(charset != nullptr); + assert(fs_converter == nullptr); - if (!CheckCharset(charset, error)) { - error.FormatPrefix("Failed to initialize filesystem charset '%s': ", - charset); + fs_converter = IcuConverter::Create(charset, error); + if (fs_converter == nullptr) return false; - } - - fs_charset = charset; FormatDebug(path_domain, "SetFSCharset: fs charset is: %s", fs_charset.c_str()); @@ -91,6 +57,10 @@ SetFSCharset(const char *charset, Error &error) void DeinitFSCharset() { +#ifdef HAVE_ICU_CONVERTER + delete fs_converter; + fs_converter = nullptr; +#endif } const char * @@ -122,7 +92,7 @@ PathToUTF8(const char *path_fs) assert(path_fs != nullptr); #ifdef HAVE_FS_CHARSET - if (fs_charset.empty()) { + if (fs_converter == nullptr) { #endif auto result = std::string(path_fs); FixSeparators(result); @@ -130,26 +100,7 @@ PathToUTF8(const char *path_fs) #ifdef HAVE_FS_CHARSET } - GIConv conv = g_iconv_open("utf-8", fs_charset.c_str()); - if (conv == reinterpret_cast(-1)) - return std::string(); - - // g_iconv() does not need nul-terminator, - // std::string could be created without it too. - char path_utf8[MPD_PATH_MAX_UTF8 - 1]; - char *in = const_cast(path_fs); - char *out = path_utf8; - size_t in_left = strlen(path_fs); - size_t out_left = sizeof(path_utf8); - - size_t ret = g_iconv(conv, &in, &in_left, &out, &out_left); - - g_iconv_close(conv); - - if (ret == static_cast(-1) || in_left > 0) - return std::string(); - - auto result_path = std::string(path_utf8, sizeof(path_utf8) - out_left); + auto result_path = fs_converter->ToUTF8(path_fs); FixSeparators(result_path); return result_path; #endif @@ -162,12 +113,10 @@ PathFromUTF8(const char *path_utf8) { assert(path_utf8 != nullptr); - if (fs_charset.empty()) + if (fs_converter == nullptr) return path_utf8; - return g_convert(path_utf8, -1, - fs_charset.c_str(), "utf-8", - nullptr, nullptr, nullptr); + return fs_converter->FromUTF8(path_utf8); } #endif diff --git a/src/lib/icu/Converter.cxx b/src/lib/icu/Converter.cxx new file mode 100644 index 000000000..7378f8636 --- /dev/null +++ b/src/lib/icu/Converter.cxx @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2003-2014 The Music Player Daemon Project + * http://www.musicpd.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "config.h" +#include "Converter.hxx" +#include "util/Error.hxx" +#include "util/Domain.hxx" + +#include + +#ifdef HAVE_GLIB +static constexpr Domain g_iconv_domain("g_iconv"); +#endif + +#ifdef HAVE_ICU_CONVERTER + +IcuConverter * +IcuConverter::Create(const char *charset, Error &error) +{ + GIConv to = g_iconv_open("utf-8", charset); + GIConv from = g_iconv_open(charset, "utf-8"); + if (to == (GIConv)-1 || from == (GIConv)-1) { + if (to != (GIConv)-1) + g_iconv_close(to); + if (from != (GIConv)-1) + g_iconv_close(from); + error.Format(g_iconv_domain, + "Failed to initialize charset '%s'", charset); + return nullptr; + } + + return new IcuConverter(to, from); +} + +static std::string +DoConvert(GIConv conv, const char *src) +{ + char buffer[4096]; + char *in = const_cast(src); + char *out = buffer; + size_t in_left = strlen(src); + size_t out_left = sizeof(buffer); + + size_t n = g_iconv(conv, &in, &in_left, &out, &out_left); + + if (n == static_cast(-1) || in_left > 0) + return std::string(); + + return std::string(buffer, sizeof(buffer) - out_left); +} + +std::string +IcuConverter::ToUTF8(const char *s) const +{ + return DoConvert(to_utf8, s); +} + +std::string +IcuConverter::FromUTF8(const char *s) const +{ + return DoConvert(from_utf8, s); +} + +#endif diff --git a/src/lib/icu/Converter.hxx b/src/lib/icu/Converter.hxx new file mode 100644 index 000000000..f20dc6f7c --- /dev/null +++ b/src/lib/icu/Converter.hxx @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2003-2014 The Music Player Daemon Project + * http://www.musicpd.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef MPD_ICU_CONVERTER_HXX +#define MPD_ICU_CONVERTER_HXX + +#include "check.h" +#include "Compiler.h" + +#ifdef HAVE_GLIB +#include +#define HAVE_ICU_CONVERTER +#endif + +#ifdef HAVE_ICU_CONVERTER + +#include + +class Error; + +/** + * This class can convert strings with a certain character set to and + * from UTF-8. + */ +class IcuConverter { + const GIConv to_utf8, from_utf8; + + IcuConverter(GIConv _to, GIConv _from) + :to_utf8(_to), from_utf8(_from) {} + +public: + ~IcuConverter() { + g_iconv_close(to_utf8); + g_iconv_close(from_utf8); + } + + static IcuConverter *Create(const char *charset, Error &error); + + /** + * Convert the string to UTF-8. + * Returns empty string on error. + */ + gcc_pure gcc_nonnull_all + std::string ToUTF8(const char *s) const; + + /** + * Convert the string from UTF-8. + * Returns empty string on error. + */ + gcc_pure gcc_nonnull_all + std::string FromUTF8(const char *s) const; +}; + +#endif + +#endif -- cgit v1.2.3