From 1745e68795b84aa84a2f148985e03d6fce6bddba Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Sun, 19 Jul 2009 15:11:36 +0200 Subject: tokenizer: new library replacing buffer2array() The new code is more robust and more flexible. It provides detailed error information in GError objects. --- src/tokenizer.c | 167 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 src/tokenizer.c (limited to 'src/tokenizer.c') diff --git a/src/tokenizer.c b/src/tokenizer.c new file mode 100644 index 000000000..635d507df --- /dev/null +++ b/src/tokenizer.c @@ -0,0 +1,167 @@ +/* + * Copyright (C) 2003-2009 The Music Player Daemon Project + * http://www.musicpd.org + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "tokenizer.h" + +#include +#include +#include + +G_GNUC_CONST +static GQuark +tokenizer_quark(void) +{ + return g_quark_from_static_string("tokenizer"); +} + +static inline bool +valid_word_first_char(char ch) +{ + return g_ascii_isalpha(ch); +} + +static inline bool +valid_word_char(char ch) +{ + return g_ascii_isalnum(ch) || ch == '_'; +} + +char * +tokenizer_next_word(char **input_p, GError **error_r) +{ + char *word, *input; + + assert(input_p != NULL); + assert(*input_p != NULL); + + word = input = *input_p; + + if (*input == 0) + return NULL; + + /* check the first character */ + + if (!valid_word_first_char(*input)) { + g_set_error(error_r, tokenizer_quark(), 0, + "Letter expected"); + return NULL; + } + + /* now iterate over the other characters until we find a + whitespace or end-of-string */ + + while (*++input != 0) { + if (g_ascii_isspace(*input)) { + /* a whitespace: the word ends here */ + *input = 0; + /* skip all following spaces, too */ + input = g_strchug(input + 1); + break; + } + + if (!valid_word_char(*input)) { + *input_p = input; + g_set_error(error_r, tokenizer_quark(), 0, + "Invalid word character"); + return NULL; + } + } + + /* end of string: the string is already null-terminated + here */ + + *input_p = input; + return word; +} + +char * +tokenizer_next_string(char **input_p, GError **error_r) +{ + char *word, *dest, *input; + + assert(input_p != NULL); + assert(*input_p != NULL); + + word = dest = input = *input_p; + + if (*input == 0) + /* end of line */ + return NULL; + + /* check for the opening " */ + + if (*input != '"') { + g_set_error(error_r, tokenizer_quark(), 0, + "'\"' expected"); + return NULL; + } + + ++input; + + /* copy all characters */ + + while (*input != '"') { + if (*input == '\\') + /* the backslash escapes the following + character */ + ++input; + + if (*input == 0) { + /* return input-1 so the caller can see the + difference between "end of line" and + "error" */ + *input_p = input - 1; + g_set_error(error_r, tokenizer_quark(), 0, + "Missing closing '\"'"); + return NULL; + } + + /* copy one character */ + *dest++ = *input++; + } + + /* the following character must be a whitespace (or end of + line) */ + + ++input; + if (*input != 0 && !g_ascii_isspace(*input)) { + *input_p = input; + g_set_error(error_r, tokenizer_quark(), 0, + "Space expected after closing '\"'"); + return NULL; + } + + /* finish the string and return it */ + + *dest = 0; + *input_p = g_strchug(input); + return word; +} + +char * +tokenizer_next_word_or_string(char **input_p, GError **error_r) +{ + assert(input_p != NULL); + assert(*input_p != NULL); + + if (**input_p == '"') + return tokenizer_next_string(input_p, error_r); + else + return tokenizer_next_word(input_p, error_r); +} -- cgit v1.2.3