aboutsummaryrefslogtreecommitdiffstats
path: root/src/util/UTF8.hxx
diff options
context:
space:
mode:
Diffstat (limited to 'src/util/UTF8.hxx')
-rw-r--r--src/util/UTF8.hxx90
1 files changed, 90 insertions, 0 deletions
diff --git a/src/util/UTF8.hxx b/src/util/UTF8.hxx
new file mode 100644
index 000000000..82d324f3e
--- /dev/null
+++ b/src/util/UTF8.hxx
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2011-2014 Max Kellermann <max@duempel.org>
+ * http://www.musicpd.org
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef UTF8_HXX
+#define UTF8_HXX
+
+#include "Compiler.h"
+
+#include <stddef.h>
+
+/**
+ * Is this a valid UTF-8 string?
+ */
+gcc_pure gcc_nonnull_all
+bool
+ValidateUTF8(const char *p);
+
+/**
+ * @return the number of the sequence beginning with the given
+ * character, or 0 if the character is not a valid start byte
+ */
+gcc_const
+size_t
+SequenceLengthUTF8(char ch);
+
+/**
+ * @return the number of the first sequence in the given string, or 0
+ * if the sequence is malformed
+ */
+gcc_pure
+size_t
+SequenceLengthUTF8(const char *p);
+
+/**
+ * Convert the specified string from ISO-8859-1 to UTF-8.
+ *
+ * @return the UTF-8 version of the source string; may return #src if
+ * there are no non-ASCII characters; returns nullptr if the destination
+ * buffer is too small
+ */
+gcc_pure gcc_nonnull_all
+const char *
+Latin1ToUTF8(const char *src, char *buffer, size_t buffer_size);
+
+/**
+ * Convert the specified Unicode character to UTF-8 and write it to
+ * the buffer. buffer must have a length of at least 6!
+ *
+ * @return a pointer to the buffer plus the added bytes(s)
+ */
+gcc_nonnull_all
+char *
+UnicodeToUTF8(unsigned ch, char *buffer);
+
+/**
+ * Returns the number of characters in the string. This is different
+ * from strlen(), which counts the number of bytes.
+ */
+gcc_pure gcc_nonnull_all
+size_t
+LengthUTF8(const char *p);
+
+#endif