aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMax Kellermann <max@duempel.org>2009-07-19 15:11:36 +0200
committerMax Kellermann <max@duempel.org>2009-07-19 15:11:36 +0200
commit1745e68795b84aa84a2f148985e03d6fce6bddba (patch)
tree721dc4ad410b60b376c33cb740b53d8c8bc2713a /src
parent809c96b53f265fd59e9f660bac1990bf19089ba2 (diff)
downloadmpd-1745e68795b84aa84a2f148985e03d6fce6bddba.tar.gz
mpd-1745e68795b84aa84a2f148985e03d6fce6bddba.tar.xz
mpd-1745e68795b84aa84a2f148985e03d6fce6bddba.zip
tokenizer: new library replacing buffer2array()
The new code is more robust and more flexible. It provides detailed error information in GError objects.
Diffstat (limited to '')
-rw-r--r--src/tokenizer.c167
-rw-r--r--src/tokenizer.h68
2 files changed, 235 insertions, 0 deletions
diff --git a/src/tokenizer.c b/src/tokenizer.c
new file mode 100644
index 000000000..635d507df
--- /dev/null
+++ b/src/tokenizer.c
@@ -0,0 +1,167 @@
+/*
+ * Copyright (C) 2003-2009 The Music Player Daemon Project
+ * http://www.musicpd.org
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "tokenizer.h"
+
+#include <stdbool.h>
+#include <assert.h>
+#include <string.h>
+
+G_GNUC_CONST
+static GQuark
+tokenizer_quark(void)
+{
+ return g_quark_from_static_string("tokenizer");
+}
+
+static inline bool
+valid_word_first_char(char ch)
+{
+ return g_ascii_isalpha(ch);
+}
+
+static inline bool
+valid_word_char(char ch)
+{
+ return g_ascii_isalnum(ch) || ch == '_';
+}
+
+char *
+tokenizer_next_word(char **input_p, GError **error_r)
+{
+ char *word, *input;
+
+ assert(input_p != NULL);
+ assert(*input_p != NULL);
+
+ word = input = *input_p;
+
+ if (*input == 0)
+ return NULL;
+
+ /* check the first character */
+
+ if (!valid_word_first_char(*input)) {
+ g_set_error(error_r, tokenizer_quark(), 0,
+ "Letter expected");
+ return NULL;
+ }
+
+ /* now iterate over the other characters until we find a
+ whitespace or end-of-string */
+
+ while (*++input != 0) {
+ if (g_ascii_isspace(*input)) {
+ /* a whitespace: the word ends here */
+ *input = 0;
+ /* skip all following spaces, too */
+ input = g_strchug(input + 1);
+ break;
+ }
+
+ if (!valid_word_char(*input)) {
+ *input_p = input;
+ g_set_error(error_r, tokenizer_quark(), 0,
+ "Invalid word character");
+ return NULL;
+ }
+ }
+
+ /* end of string: the string is already null-terminated
+ here */
+
+ *input_p = input;
+ return word;
+}
+
+char *
+tokenizer_next_string(char **input_p, GError **error_r)
+{
+ char *word, *dest, *input;
+
+ assert(input_p != NULL);
+ assert(*input_p != NULL);
+
+ word = dest = input = *input_p;
+
+ if (*input == 0)
+ /* end of line */
+ return NULL;
+
+ /* check for the opening " */
+
+ if (*input != '"') {
+ g_set_error(error_r, tokenizer_quark(), 0,
+ "'\"' expected");
+ return NULL;
+ }
+
+ ++input;
+
+ /* copy all characters */
+
+ while (*input != '"') {
+ if (*input == '\\')
+ /* the backslash escapes the following
+ character */
+ ++input;
+
+ if (*input == 0) {
+ /* return input-1 so the caller can see the
+ difference between "end of line" and
+ "error" */
+ *input_p = input - 1;
+ g_set_error(error_r, tokenizer_quark(), 0,
+ "Missing closing '\"'");
+ return NULL;
+ }
+
+ /* copy one character */
+ *dest++ = *input++;
+ }
+
+ /* the following character must be a whitespace (or end of
+ line) */
+
+ ++input;
+ if (*input != 0 && !g_ascii_isspace(*input)) {
+ *input_p = input;
+ g_set_error(error_r, tokenizer_quark(), 0,
+ "Space expected after closing '\"'");
+ return NULL;
+ }
+
+ /* finish the string and return it */
+
+ *dest = 0;
+ *input_p = g_strchug(input);
+ return word;
+}
+
+char *
+tokenizer_next_word_or_string(char **input_p, GError **error_r)
+{
+ assert(input_p != NULL);
+ assert(*input_p != NULL);
+
+ if (**input_p == '"')
+ return tokenizer_next_string(input_p, error_r);
+ else
+ return tokenizer_next_word(input_p, error_r);
+}
diff --git a/src/tokenizer.h b/src/tokenizer.h
new file mode 100644
index 000000000..e0238f0af
--- /dev/null
+++ b/src/tokenizer.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2003-2009 The Music Player Daemon Project
+ * http://www.musicpd.org
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef MPD_TOKENIZER_H
+#define MPD_TOKENIZER_H
+
+#include <glib.h>
+
+/**
+ * Reads the next word from the input string. This function modifies
+ * the input string.
+ *
+ * @param input_p the input string; this function returns a pointer to
+ * the first non-whitespace character of the following token
+ * @param error_r if this function returns NULL and **input_p!=0, it
+ * optionally provides a GError object in this argument
+ * @return a pointer to the null-terminated word, or NULL on error or
+ * end of line
+ */
+char *
+tokenizer_next_word(char **input_p, GError **error_r);
+
+/**
+ * Reads the next quoted string from the input string. A backslash
+ * escapes the following character. This function modifies the input
+ * string.
+ *
+ * @param input_p the input string; this function returns a pointer to
+ * the first non-whitespace character of the following token
+ * @param error_r if this function returns NULL and **input_p!=0, it
+ * optionally provides a GError object in this argument
+ * @return a pointer to the null-terminated string, or NULL on error
+ * or end of line
+ */
+char *
+tokenizer_next_string(char **input_p, GError **error_r);
+
+/**
+ * Reads the next word or quoted string from the input. This is a
+ * wrapper for tokenizer_next_word() and tokenizer_next_string().
+ *
+ * @param input_p the input string; this function returns a pointer to
+ * the first non-whitespace character of the following token
+ * @param error_r if this function returns NULL and **input_p!=0, it
+ * optionally provides a GError object in this argument
+ * @return a pointer to the null-terminated string, or NULL on error
+ * or end of line
+ */
+char *
+tokenizer_next_word_or_string(char **input_p, GError **error_r);
+
+#endif