aboutsummaryrefslogtreecommitdiffstats
path: root/src/util
diff options
context:
space:
mode:
authorMax Kellermann <max@duempel.org>2013-04-08 23:51:39 +0200
committerMax Kellermann <max@duempel.org>2013-04-08 23:51:39 +0200
commit450c26c4713a2de440296260e8d0be8e6b283d75 (patch)
tree21ad06ced3adbe9c79e47c7b759a9d6f64f6dd02 /src/util
parent7ec1121cc832086f533dd0adfcb581e16c1e312d (diff)
downloadmpd-450c26c4713a2de440296260e8d0be8e6b283d75.tar.gz
mpd-450c26c4713a2de440296260e8d0be8e6b283d75.tar.xz
mpd-450c26c4713a2de440296260e8d0be8e6b283d75.zip
tokenizer: convert to C++
Diffstat (limited to 'src/util')
-rw-r--r--src/util/Tokenizer.cxx202
-rw-r--r--src/util/Tokenizer.hxx99
2 files changed, 301 insertions, 0 deletions
diff --git a/src/util/Tokenizer.cxx b/src/util/Tokenizer.cxx
new file mode 100644
index 000000000..9ade0d1b1
--- /dev/null
+++ b/src/util/Tokenizer.cxx
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2003-2013 The Music Player Daemon Project
+ * http://www.musicpd.org
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "config.h"
+#include "Tokenizer.hxx"
+#include "string_util.h"
+
+#include <glib.h>
+
+#include <stdbool.h>
+#include <assert.h>
+#include <string.h>
+
+G_GNUC_CONST
+static GQuark
+tokenizer_quark(void)
+{
+ return g_quark_from_static_string("tokenizer");
+}
+
+static inline bool
+valid_word_first_char(char ch)
+{
+ return g_ascii_isalpha(ch);
+}
+
+static inline bool
+valid_word_char(char ch)
+{
+ return g_ascii_isalnum(ch) || ch == '_';
+}
+
+char *
+Tokenizer::NextWord(GError **error_r)
+{
+ char *const word = input;
+
+ if (*input == 0)
+ return nullptr;
+
+ /* check the first character */
+
+ if (!valid_word_first_char(*input)) {
+ g_set_error(error_r, tokenizer_quark(), 0,
+ "Letter expected");
+ return nullptr;
+ }
+
+ /* now iterate over the other characters until we find a
+ whitespace or end-of-string */
+
+ while (*++input != 0) {
+ if (g_ascii_isspace(*input)) {
+ /* a whitespace: the word ends here */
+ *input = 0;
+ /* skip all following spaces, too */
+ input = strchug_fast(input + 1);
+ break;
+ }
+
+ if (!valid_word_char(*input)) {
+ g_set_error(error_r, tokenizer_quark(), 0,
+ "Invalid word character");
+ return nullptr;
+ }
+ }
+
+ /* end of string: the string is already null-terminated
+ here */
+
+ return word;
+}
+
+static inline bool
+valid_unquoted_char(char ch)
+{
+ return (unsigned char)ch > 0x20 && ch != '"' && ch != '\'';
+}
+
+char *
+Tokenizer::NextUnquoted(GError **error_r)
+{
+ char *const word = input;
+
+ if (*input == 0)
+ return nullptr;
+
+ /* check the first character */
+
+ if (!valid_unquoted_char(*input)) {
+ g_set_error(error_r, tokenizer_quark(), 0,
+ "Invalid unquoted character");
+ return nullptr;
+ }
+
+ /* now iterate over the other characters until we find a
+ whitespace or end-of-string */
+
+ while (*++input != 0) {
+ if (g_ascii_isspace(*input)) {
+ /* a whitespace: the word ends here */
+ *input = 0;
+ /* skip all following spaces, too */
+ input = strchug_fast(input + 1);
+ break;
+ }
+
+ if (!valid_unquoted_char(*input)) {
+ g_set_error(error_r, tokenizer_quark(), 0,
+ "Invalid unquoted character");
+ return nullptr;
+ }
+ }
+
+ /* end of string: the string is already null-terminated
+ here */
+
+ return word;
+}
+
+char *
+Tokenizer::NextString(GError **error_r)
+{
+ char *const word = input, *dest = input;
+
+ if (*input == 0)
+ /* end of line */
+ return nullptr;
+
+ /* check for the opening " */
+
+ if (*input != '"') {
+ g_set_error(error_r, tokenizer_quark(), 0,
+ "'\"' expected");
+ return nullptr;
+ }
+
+ ++input;
+
+ /* copy all characters */
+
+ while (*input != '"') {
+ if (*input == '\\')
+ /* the backslash escapes the following
+ character */
+ ++input;
+
+ if (*input == 0) {
+ /* return input-1 so the caller can see the
+ difference between "end of line" and
+ "error" */
+ --input;
+ g_set_error(error_r, tokenizer_quark(), 0,
+ "Missing closing '\"'");
+ return nullptr;
+ }
+
+ /* copy one character */
+ *dest++ = *input++;
+ }
+
+ /* the following character must be a whitespace (or end of
+ line) */
+
+ ++input;
+ if (*input != 0 && !g_ascii_isspace(*input)) {
+ g_set_error(error_r, tokenizer_quark(), 0,
+ "Space expected after closing '\"'");
+ return nullptr;
+ }
+
+ /* finish the string and return it */
+
+ *dest = 0;
+ input = strchug_fast(input);
+ return word;
+}
+
+char *
+Tokenizer::NextParam(GError **error_r)
+{
+ if (*input == '"')
+ return NextString(error_r);
+ else
+ return NextUnquoted(error_r);
+}
diff --git a/src/util/Tokenizer.hxx b/src/util/Tokenizer.hxx
new file mode 100644
index 000000000..da45348d4
--- /dev/null
+++ b/src/util/Tokenizer.hxx
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2003-2013 The Music Player Daemon Project
+ * http://www.musicpd.org
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef MPD_TOKENIZER_HXX
+#define MPD_TOKENIZER_HXX
+
+#include "gerror.h"
+
+class Tokenizer {
+ char *input;
+
+public:
+ /**
+ * @param _input the input string; the contents will be
+ * modified by this class
+ */
+ constexpr Tokenizer(char *_input):input(_input) {}
+
+ Tokenizer(const Tokenizer &) = delete;
+ Tokenizer &operator=(const Tokenizer &) = delete;
+
+ char *Rest() {
+ return input;
+ }
+
+ char CurrentChar() const {
+ return *input;
+ }
+
+ bool IsEnd() const {
+ return CurrentChar() == 0;
+ }
+
+ /**
+ * Reads the next word.
+ *
+ * @param error_r if this function returns nullptr and
+ * **input_p!=0, it optionally provides a GError object in
+ * this argument
+ * @return a pointer to the null-terminated word, or nullptr
+ * on error or end of line
+ */
+ char *NextWord(GError **error_r);
+
+ /**
+ * Reads the next unquoted word from the input string.
+ *
+ * @param error_r if this function returns nullptr and **input_p!=0, it
+ * optionally provides a GError object in this argument
+ * @return a pointer to the null-terminated word, or nullptr
+ * on error or end of line
+ */
+ char *NextUnquoted(GError **error_r);
+
+ /**
+ * Reads the next quoted string from the input string. A backslash
+ * escapes the following character. This function modifies the input
+ * string.
+ *
+ * @param input_p the input string; this function returns a pointer to
+ * the first non-whitespace character of the following token
+ * @param error_r if this function returns nullptr and **input_p!=0, it
+ * optionally provides a GError object in this argument
+ * @return a pointer to the null-terminated string, or nullptr on error
+ * or end of line
+ */
+ char *NextString(GError **error_r);
+
+ /**
+ * Reads the next unquoted word or quoted string from the
+ * input. This is a wrapper for NextUnquoted() and
+ * NextString().
+ *
+ * @param error_r if this function returns nullptr and
+ * **input_p!=0, it optionally provides a GError object in
+ * this argument
+ * @return a pointer to the null-terminated string, or nullptr
+ * on error or end of line
+ */
+ char *NextParam(GError **error_r);
+};
+
+#endif