diff options
author | Max Kellermann <max@duempel.org> | 2009-07-19 15:11:36 +0200 |
---|---|---|
committer | Max Kellermann <max@duempel.org> | 2009-07-19 15:11:36 +0200 |
commit | 1745e68795b84aa84a2f148985e03d6fce6bddba (patch) | |
tree | 721dc4ad410b60b376c33cb740b53d8c8bc2713a | |
parent | 809c96b53f265fd59e9f660bac1990bf19089ba2 (diff) | |
download | mpd-1745e68795b84aa84a2f148985e03d6fce6bddba.tar.gz mpd-1745e68795b84aa84a2f148985e03d6fce6bddba.tar.xz mpd-1745e68795b84aa84a2f148985e03d6fce6bddba.zip |
tokenizer: new library replacing buffer2array()
The new code is more robust and more flexible. It provides detailed
error information in GError objects.
-rw-r--r-- | Makefile.am | 2 | ||||
-rw-r--r-- | src/tokenizer.c | 167 | ||||
-rw-r--r-- | src/tokenizer.h | 68 |
3 files changed, 237 insertions, 0 deletions
diff --git a/Makefile.am b/Makefile.am index 5c53ca5c1..fa14c6a1d 100644 --- a/Makefile.am +++ b/Makefile.am @@ -153,6 +153,7 @@ mpd_headers = \ src/tag_id3.h \ src/tag_print.h \ src/tag_save.h \ + src/tokenizer.h \ src/strset.h \ src/uri.h \ src/utils.h \ @@ -249,6 +250,7 @@ src_mpd_SOURCES = \ src/tag_pool.c \ src/tag_print.c \ src/tag_save.c \ + src/tokenizer.c \ src/strset.c \ src/uri.c \ src/utils.c \ diff --git a/src/tokenizer.c b/src/tokenizer.c new file mode 100644 index 000000000..635d507df --- /dev/null +++ b/src/tokenizer.c @@ -0,0 +1,167 @@ +/* + * Copyright (C) 2003-2009 The Music Player Daemon Project + * http://www.musicpd.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "tokenizer.h" + +#include <stdbool.h> +#include <assert.h> +#include <string.h> + +G_GNUC_CONST +static GQuark +tokenizer_quark(void) +{ + return g_quark_from_static_string("tokenizer"); +} + +static inline bool +valid_word_first_char(char ch) +{ + return g_ascii_isalpha(ch); +} + +static inline bool +valid_word_char(char ch) +{ + return g_ascii_isalnum(ch) || ch == '_'; +} + +char * +tokenizer_next_word(char **input_p, GError **error_r) +{ + char *word, *input; + + assert(input_p != NULL); + assert(*input_p != NULL); + + word = input = *input_p; + + if (*input == 0) + return NULL; + + /* check the first character */ + + if (!valid_word_first_char(*input)) { + g_set_error(error_r, tokenizer_quark(), 0, + "Letter expected"); + return NULL; + } + + /* now iterate over the other characters until we find a + whitespace or end-of-string */ + + while (*++input != 0) { + if (g_ascii_isspace(*input)) { + /* a whitespace: the word ends here */ + *input = 0; + /* skip all following spaces, too */ + input = g_strchug(input + 1); + break; + } + + if (!valid_word_char(*input)) { + *input_p = input; + g_set_error(error_r, tokenizer_quark(), 0, + "Invalid word character"); + return NULL; + } + } + + /* end of string: the string is already null-terminated + here */ + + *input_p = input; + return word; +} + +char * +tokenizer_next_string(char **input_p, GError **error_r) +{ + char *word, *dest, *input; + + assert(input_p != NULL); + assert(*input_p != NULL); + + word = dest = input = *input_p; + + if (*input == 0) + /* end of line */ + return NULL; + + /* check for the opening " */ + + if (*input != '"') { + g_set_error(error_r, tokenizer_quark(), 0, + "'\"' expected"); + return NULL; + } + + ++input; + + /* copy all characters */ + + while (*input != '"') { + if (*input == '\\') + /* the backslash escapes the following + character */ + ++input; + + if (*input == 0) { + /* return input-1 so the caller can see the + difference between "end of line" and + "error" */ + *input_p = input - 1; + g_set_error(error_r, tokenizer_quark(), 0, + "Missing closing '\"'"); + return NULL; + } + + /* copy one character */ + *dest++ = *input++; + } + + /* the following character must be a whitespace (or end of + line) */ + + ++input; + if (*input != 0 && !g_ascii_isspace(*input)) { + *input_p = input; + g_set_error(error_r, tokenizer_quark(), 0, + "Space expected after closing '\"'"); + return NULL; + } + + /* finish the string and return it */ + + *dest = 0; + *input_p = g_strchug(input); + return word; +} + +char * +tokenizer_next_word_or_string(char **input_p, GError **error_r) +{ + assert(input_p != NULL); + assert(*input_p != NULL); + + if (**input_p == '"') + return tokenizer_next_string(input_p, error_r); + else + return tokenizer_next_word(input_p, error_r); +} diff --git a/src/tokenizer.h b/src/tokenizer.h new file mode 100644 index 000000000..e0238f0af --- /dev/null +++ b/src/tokenizer.h @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2003-2009 The Music Player Daemon Project + * http://www.musicpd.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef MPD_TOKENIZER_H +#define MPD_TOKENIZER_H + +#include <glib.h> + +/** + * Reads the next word from the input string. This function modifies + * the input string. + * + * @param input_p the input string; this function returns a pointer to + * the first non-whitespace character of the following token + * @param error_r if this function returns NULL and **input_p!=0, it + * optionally provides a GError object in this argument + * @return a pointer to the null-terminated word, or NULL on error or + * end of line + */ +char * +tokenizer_next_word(char **input_p, GError **error_r); + +/** + * Reads the next quoted string from the input string. A backslash + * escapes the following character. This function modifies the input + * string. + * + * @param input_p the input string; this function returns a pointer to + * the first non-whitespace character of the following token + * @param error_r if this function returns NULL and **input_p!=0, it + * optionally provides a GError object in this argument + * @return a pointer to the null-terminated string, or NULL on error + * or end of line + */ +char * +tokenizer_next_string(char **input_p, GError **error_r); + +/** + * Reads the next word or quoted string from the input. This is a + * wrapper for tokenizer_next_word() and tokenizer_next_string(). + * + * @param input_p the input string; this function returns a pointer to + * the first non-whitespace character of the following token + * @param error_r if this function returns NULL and **input_p!=0, it + * optionally provides a GError object in this argument + * @return a pointer to the null-terminated string, or NULL on error + * or end of line + */ +char * +tokenizer_next_word_or_string(char **input_p, GError **error_r); + +#endif |