aboutsummaryrefslogtreecommitdiffstats
path: root/trunk/src/utf8.c
diff options
context:
space:
mode:
Diffstat (limited to 'trunk/src/utf8.c')
-rw-r--r--trunk/src/utf8.c148
1 files changed, 148 insertions, 0 deletions
diff --git a/trunk/src/utf8.c b/trunk/src/utf8.c
new file mode 100644
index 000000000..2061a78de
--- /dev/null
+++ b/trunk/src/utf8.c
@@ -0,0 +1,148 @@
+/* the Music Player Daemon (MPD)
+ * Copyright (C) 2003-2007 by Warren Dukes (warren.dukes@gmail.com)
+ * This project's homepage is: http://www.musicpd.org
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "utf8.h"
+#include "utils.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+static char *latin1ToUtf8(char c)
+{
+ static unsigned char utf8[3];
+ unsigned char uc = c;
+
+ memset(utf8, 0, 3);
+
+ if (uc < 128)
+ utf8[0] = uc;
+ else if (uc < 192) {
+ utf8[0] = 194;
+ utf8[1] = uc;
+ } else {
+ utf8[0] = 195;
+ utf8[1] = uc - 64;
+ }
+
+ return (char *)utf8;
+}
+
+char *latin1StrToUtf8Dup(char *latin1)
+{
+ /* utf8 should have at most two char's per latin1 char */
+ int len = strlen(latin1) * 2 + 1;
+ char *ret = xmalloc(len);
+ char *cp = ret;
+ char *utf8;
+
+ memset(ret, 0, len);
+
+ len = 0;
+
+ while (*latin1) {
+ utf8 = latin1ToUtf8(*latin1);
+ while (*utf8) {
+ *(cp++) = *(utf8++);
+ len++;
+ }
+ latin1++;
+ }
+
+ return xrealloc(ret, len + 1);
+}
+
+static char utf8ToLatin1(char *inUtf8)
+{
+ unsigned char c = 0;
+ unsigned char *utf8 = (unsigned char *)inUtf8;
+
+ if (utf8[0] < 128)
+ return utf8[0];
+ else if (utf8[0] == 195)
+ c += 64;
+ else if (utf8[0] != 194)
+ return '?';
+ return (char)(c + utf8[1]);
+}
+
+static int validateUtf8Char(char *inUtf8Char)
+{
+ unsigned char *utf8Char = (unsigned char *)inUtf8Char;
+
+ if (utf8Char[0] < 0x80)
+ return 1;
+
+ if (utf8Char[0] >= 0xC0 && utf8Char[0] <= 0xFD) {
+ int count = 1;
+ char t = 1 << 5;
+ int i;
+ while (count < 6 && (t & utf8Char[0])) {
+ t = (t >> 1);
+ count++;
+ }
+ if (count > 5)
+ return 0;
+ for (i = 1; i <= count; i++) {
+ if (utf8Char[i] < 0x80 || utf8Char[i] > 0xBF)
+ return 0;
+ }
+ return count + 1;
+ } else
+ return 0;
+}
+
+int validUtf8String(char *string)
+{
+ int ret;
+
+ while (*string) {
+ ret = validateUtf8Char(string);
+ if (0 == ret)
+ return 0;
+ string += ret;
+ }
+
+ return 1;
+}
+
+char *utf8StrToLatin1Dup(char *utf8)
+{
+ /* utf8 should have at most two char's per latin1 char */
+ int len = strlen(utf8) + 1;
+ char *ret = xmalloc(len);
+ char *cp = ret;
+ int count;
+
+ memset(ret, 0, len);
+
+ len = 0;
+
+ while (*utf8) {
+ count = validateUtf8Char(utf8);
+ if (!count) {
+ free(ret);
+ return NULL;
+ }
+ *(cp++) = utf8ToLatin1(utf8);
+ utf8 += count;
+ len++;
+ }
+
+ return xrealloc(ret, len + 1);
+}