aboutsummaryrefslogtreecommitdiffstats
path: root/src/util/Tokenizer.cxx
diff options
context:
space:
mode:
authorMax Kellermann <max@duempel.org>2013-04-08 23:51:39 +0200
committerMax Kellermann <max@duempel.org>2013-04-08 23:51:39 +0200
commit450c26c4713a2de440296260e8d0be8e6b283d75 (patch)
tree21ad06ced3adbe9c79e47c7b759a9d6f64f6dd02 /src/util/Tokenizer.cxx
parent7ec1121cc832086f533dd0adfcb581e16c1e312d (diff)
downloadmpd-450c26c4713a2de440296260e8d0be8e6b283d75.tar.gz
mpd-450c26c4713a2de440296260e8d0be8e6b283d75.tar.xz
mpd-450c26c4713a2de440296260e8d0be8e6b283d75.zip
tokenizer: convert to C++
Diffstat (limited to 'src/util/Tokenizer.cxx')
-rw-r--r--src/util/Tokenizer.cxx202
1 files changed, 202 insertions, 0 deletions
diff --git a/src/util/Tokenizer.cxx b/src/util/Tokenizer.cxx
new file mode 100644
index 000000000..9ade0d1b1
--- /dev/null
+++ b/src/util/Tokenizer.cxx
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2003-2013 The Music Player Daemon Project
+ * http://www.musicpd.org
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "config.h"
+#include "Tokenizer.hxx"
+#include "string_util.h"
+
+#include <glib.h>
+
+#include <stdbool.h>
+#include <assert.h>
+#include <string.h>
+
+G_GNUC_CONST
+static GQuark
+tokenizer_quark(void)
+{
+ return g_quark_from_static_string("tokenizer");
+}
+
+static inline bool
+valid_word_first_char(char ch)
+{
+ return g_ascii_isalpha(ch);
+}
+
+static inline bool
+valid_word_char(char ch)
+{
+ return g_ascii_isalnum(ch) || ch == '_';
+}
+
+char *
+Tokenizer::NextWord(GError **error_r)
+{
+ char *const word = input;
+
+ if (*input == 0)
+ return nullptr;
+
+ /* check the first character */
+
+ if (!valid_word_first_char(*input)) {
+ g_set_error(error_r, tokenizer_quark(), 0,
+ "Letter expected");
+ return nullptr;
+ }
+
+ /* now iterate over the other characters until we find a
+ whitespace or end-of-string */
+
+ while (*++input != 0) {
+ if (g_ascii_isspace(*input)) {
+ /* a whitespace: the word ends here */
+ *input = 0;
+ /* skip all following spaces, too */
+ input = strchug_fast(input + 1);
+ break;
+ }
+
+ if (!valid_word_char(*input)) {
+ g_set_error(error_r, tokenizer_quark(), 0,
+ "Invalid word character");
+ return nullptr;
+ }
+ }
+
+ /* end of string: the string is already null-terminated
+ here */
+
+ return word;
+}
+
+static inline bool
+valid_unquoted_char(char ch)
+{
+ return (unsigned char)ch > 0x20 && ch != '"' && ch != '\'';
+}
+
+char *
+Tokenizer::NextUnquoted(GError **error_r)
+{
+ char *const word = input;
+
+ if (*input == 0)
+ return nullptr;
+
+ /* check the first character */
+
+ if (!valid_unquoted_char(*input)) {
+ g_set_error(error_r, tokenizer_quark(), 0,
+ "Invalid unquoted character");
+ return nullptr;
+ }
+
+ /* now iterate over the other characters until we find a
+ whitespace or end-of-string */
+
+ while (*++input != 0) {
+ if (g_ascii_isspace(*input)) {
+ /* a whitespace: the word ends here */
+ *input = 0;
+ /* skip all following spaces, too */
+ input = strchug_fast(input + 1);
+ break;
+ }
+
+ if (!valid_unquoted_char(*input)) {
+ g_set_error(error_r, tokenizer_quark(), 0,
+ "Invalid unquoted character");
+ return nullptr;
+ }
+ }
+
+ /* end of string: the string is already null-terminated
+ here */
+
+ return word;
+}
+
+char *
+Tokenizer::NextString(GError **error_r)
+{
+ char *const word = input, *dest = input;
+
+ if (*input == 0)
+ /* end of line */
+ return nullptr;
+
+ /* check for the opening " */
+
+ if (*input != '"') {
+ g_set_error(error_r, tokenizer_quark(), 0,
+ "'\"' expected");
+ return nullptr;
+ }
+
+ ++input;
+
+ /* copy all characters */
+
+ while (*input != '"') {
+ if (*input == '\\')
+ /* the backslash escapes the following
+ character */
+ ++input;
+
+ if (*input == 0) {
+ /* return input-1 so the caller can see the
+ difference between "end of line" and
+ "error" */
+ --input;
+ g_set_error(error_r, tokenizer_quark(), 0,
+ "Missing closing '\"'");
+ return nullptr;
+ }
+
+ /* copy one character */
+ *dest++ = *input++;
+ }
+
+ /* the following character must be a whitespace (or end of
+ line) */
+
+ ++input;
+ if (*input != 0 && !g_ascii_isspace(*input)) {
+ g_set_error(error_r, tokenizer_quark(), 0,
+ "Space expected after closing '\"'");
+ return nullptr;
+ }
+
+ /* finish the string and return it */
+
+ *dest = 0;
+ input = strchug_fast(input);
+ return word;
+}
+
+char *
+Tokenizer::NextParam(GError **error_r)
+{
+ if (*input == '"')
+ return NextString(error_r);
+ else
+ return NextUnquoted(error_r);
+}