From 450c26c4713a2de440296260e8d0be8e6b283d75 Mon Sep 17 00:00:00 2001
From: Max Kellermann <max@duempel.org>
Date: Mon, 8 Apr 2013 23:51:39 +0200
Subject: tokenizer: convert to C++

---
 src/AllCommands.cxx    |  14 ++-
 src/ConfigFile.cxx     |  30 ++++---
 src/tokenizer.c        | 225 -------------------------------------------------
 src/tokenizer.h        |  83 ------------------
 src/util/Tokenizer.cxx | 202 ++++++++++++++++++++++++++++++++++++++++++++
 src/util/Tokenizer.hxx |  99 ++++++++++++++++++++++
 6 files changed, 324 insertions(+), 329 deletions(-)
 delete mode 100644 src/tokenizer.c
 delete mode 100644 src/tokenizer.h
 create mode 100644 src/util/Tokenizer.cxx
 create mode 100644 src/util/Tokenizer.hxx

(limited to 'src')

diff --git a/src/AllCommands.cxx b/src/AllCommands.cxx
index 58dcf4dba..f3243915b 100644
--- a/src/AllCommands.cxx
+++ b/src/AllCommands.cxx
@@ -31,10 +31,7 @@
 #include "tag.h"
 #include "protocol/Result.hxx"
 #include "Client.hxx"
-
-extern "C" {
-#include "tokenizer.h"
-}
+#include "util/Tokenizer.hxx"
 
 #ifdef ENABLE_SQLITE
 #include "StickerCommands.hxx"
@@ -329,10 +326,11 @@ command_process(Client *client, unsigned num, char *line)
 
 	/* get the command name (first word on the line) */
 
-	argv[0] = tokenizer_next_word(&line, &error);
+	Tokenizer tokenizer(line);
+	argv[0] = tokenizer.NextWord(&error);
 	if (argv[0] == NULL) {
 		current_command = "";
-		if (*line == 0)
+		if (tokenizer.IsEnd())
 			command_error(client, ACK_ERROR_UNKNOWN,
 				      "No command given");
 		else {
@@ -351,7 +349,7 @@ command_process(Client *client, unsigned num, char *line)
 
 	while (argc < (int)G_N_ELEMENTS(argv) &&
 	       (argv[argc] =
-		tokenizer_next_param(&line, &error)) != NULL)
+		tokenizer.NextParam(&error)) != NULL)
 		++argc;
 
 	/* some error checks; we have to set current_command because
@@ -365,7 +363,7 @@ command_process(Client *client, unsigned num, char *line)
 		return COMMAND_RETURN_ERROR;
 	}
 
-	if (*line != 0) {
+	if (!tokenizer.IsEnd()) {
 		command_error(client, ACK_ERROR_ARG,
 			      "%s", error->message);
 		current_command = NULL;
diff --git a/src/ConfigFile.cxx b/src/ConfigFile.cxx
index e94f3f238..f7f525096 100644
--- a/src/ConfigFile.cxx
+++ b/src/ConfigFile.cxx
@@ -23,10 +23,10 @@
 #include "ConfigData.hxx"
 #include "ConfigTemplates.hxx"
 #include "conf.h"
+#include "util/Tokenizer.hxx"
 
 extern "C" {
 #include "string_util.h"
-#include "tokenizer.h"
 }
 
 #include "fs/Path.hxx"
@@ -50,15 +50,17 @@ static bool
 config_read_name_value(struct config_param *param, char *input, unsigned line,
 		       GError **error_r)
 {
-	const char *name = tokenizer_next_word(&input, error_r);
+	Tokenizer tokenizer(input);
+
+	const char *name = tokenizer.NextWord(error_r);
 	if (name == NULL) {
-		assert(*input != 0);
+		assert(!tokenizer.IsEnd());
 		return false;
 	}
 
-	const char *value = tokenizer_next_string(&input, error_r);
+	const char *value = tokenizer.NextString(error_r);
 	if (value == NULL) {
-		if (*input == 0) {
+		if (tokenizer.IsEnd()) {
 			assert(error_r == NULL || *error_r == NULL);
 			g_set_error(error_r, config_quark(), 0,
 				    "Value missing");
@@ -69,7 +71,7 @@ config_read_name_value(struct config_param *param, char *input, unsigned line,
 		return false;
 	}
 
-	if (*input != 0 && *input != CONF_COMMENT) {
+	if (!tokenizer.IsEnd() && tokenizer.CurrentChar() != CONF_COMMENT) {
 		g_set_error(error_r, config_quark(), 0,
 			    "Unknown tokens after value");
 		return false;
@@ -173,9 +175,10 @@ ReadConfigFile(ConfigData &config_data, FILE *fp, GError **error_r)
 		/* the first token in each line is the name, followed
 		   by either the value or '{' */
 
-		name = tokenizer_next_word(&line, &error);
+		Tokenizer tokenizer(line);
+		name = tokenizer.NextWord(&error);
 		if (name == NULL) {
-			assert(*line != 0);
+			assert(!tokenizer.IsEnd());
 			g_propagate_prefixed_error(error_r, error,
 						   "line %i: ", count);
 			return false;
@@ -210,13 +213,13 @@ ReadConfigFile(ConfigData &config_data, FILE *fp, GError **error_r)
 		if (option.block) {
 			/* it's a block, call config_read_block() */
 
-			if (*line != '{') {
+			if (tokenizer.CurrentChar() != '{') {
 				g_set_error(error_r, config_quark(), 0,
 					    "line %i: '{' expected", count);
 				return false;
 			}
 
-			line = strchug_fast(line + 1);
+			line = strchug_fast(tokenizer.Rest() + 1);
 			if (*line != 0 && *line != CONF_COMMENT) {
 				g_set_error(error_r, config_quark(), 0,
 					    "line %i: Unknown tokens after '{'",
@@ -231,9 +234,9 @@ ReadConfigFile(ConfigData &config_data, FILE *fp, GError **error_r)
 		} else {
 			/* a string value */
 
-			value = tokenizer_next_string(&line, &error);
+			value = tokenizer.NextString(&error);
 			if (value == NULL) {
-				if (*line == 0)
+				if (tokenizer.IsEnd())
 					g_set_error(error_r, config_quark(), 0,
 						    "line %i: Value missing",
 						    count);
@@ -247,7 +250,8 @@ ReadConfigFile(ConfigData &config_data, FILE *fp, GError **error_r)
 				return false;
 			}
 
-			if (*line != 0 && *line != CONF_COMMENT) {
+			if (!tokenizer.IsEnd() &&
+			    tokenizer.CurrentChar() != CONF_COMMENT) {
 				g_set_error(error_r, config_quark(), 0,
 					    "line %i: Unknown tokens after value",
 					    count);
diff --git a/src/tokenizer.c b/src/tokenizer.c
deleted file mode 100644
index 4a98e882f..000000000
--- a/src/tokenizer.c
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * Copyright (C) 2003-2011 The Music Player Daemon Project
- * http://www.musicpd.org
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include "config.h"
-#include "tokenizer.h"
-#include "string_util.h"
-
-#include <glib.h>
-
-#include <stdbool.h>
-#include <assert.h>
-#include <string.h>
-
-G_GNUC_CONST
-static GQuark
-tokenizer_quark(void)
-{
-	return g_quark_from_static_string("tokenizer");
-}
-
-static inline bool
-valid_word_first_char(char ch)
-{
-	return g_ascii_isalpha(ch);
-}
-
-static inline bool
-valid_word_char(char ch)
-{
-	return g_ascii_isalnum(ch) || ch == '_';
-}
-
-char *
-tokenizer_next_word(char **input_p, GError **error_r)
-{
-	char *word, *input;
-
-	assert(input_p != NULL);
-	assert(*input_p != NULL);
-
-	word = input = *input_p;
-
-	if (*input == 0)
-		return NULL;
-
-	/* check the first character */
-
-	if (!valid_word_first_char(*input)) {
-		g_set_error(error_r, tokenizer_quark(), 0,
-			    "Letter expected");
-		return NULL;
-	}
-
-	/* now iterate over the other characters until we find a
-	   whitespace or end-of-string */
-
-	while (*++input != 0) {
-		if (g_ascii_isspace(*input)) {
-			/* a whitespace: the word ends here */
-			*input = 0;
-			/* skip all following spaces, too */
-			input = strchug_fast(input + 1);
-			break;
-		}
-
-		if (!valid_word_char(*input)) {
-			*input_p = input;
-			g_set_error(error_r, tokenizer_quark(), 0,
-				    "Invalid word character");
-			return NULL;
-		}
-	}
-
-	/* end of string: the string is already null-terminated
-	   here */
-
-	*input_p = input;
-	return word;
-}
-
-static inline bool
-valid_unquoted_char(char ch)
-{
-	return (unsigned char)ch > 0x20 && ch != '"' && ch != '\'';
-}
-
-char *
-tokenizer_next_unquoted(char **input_p, GError **error_r)
-{
-	char *word, *input;
-
-	assert(input_p != NULL);
-	assert(*input_p != NULL);
-
-	word = input = *input_p;
-
-	if (*input == 0)
-		return NULL;
-
-	/* check the first character */
-
-	if (!valid_unquoted_char(*input)) {
-		g_set_error(error_r, tokenizer_quark(), 0,
-			    "Invalid unquoted character");
-		return NULL;
-	}
-
-	/* now iterate over the other characters until we find a
-	   whitespace or end-of-string */
-
-	while (*++input != 0) {
-		if (g_ascii_isspace(*input)) {
-			/* a whitespace: the word ends here */
-			*input = 0;
-			/* skip all following spaces, too */
-			input = strchug_fast(input + 1);
-			break;
-		}
-
-		if (!valid_unquoted_char(*input)) {
-			*input_p = input;
-			g_set_error(error_r, tokenizer_quark(), 0,
-				    "Invalid unquoted character");
-			return NULL;
-		}
-	}
-
-	/* end of string: the string is already null-terminated
-	   here */
-
-	*input_p = input;
-	return word;
-}
-
-char *
-tokenizer_next_string(char **input_p, GError **error_r)
-{
-	char *word, *dest, *input;
-
-	assert(input_p != NULL);
-	assert(*input_p != NULL);
-
-	word = dest = input = *input_p;
-
-	if (*input == 0)
-		/* end of line */
-		return NULL;
-
-	/* check for the opening " */
-
-	if (*input != '"') {
-		g_set_error(error_r, tokenizer_quark(), 0,
-			    "'\"' expected");
-		return NULL;
-	}
-
-	++input;
-
-	/* copy all characters */
-
-	while (*input != '"') {
-		if (*input == '\\')
-			/* the backslash escapes the following
-			   character */
-			++input;
-
-		if (*input == 0) {
-			/* return input-1 so the caller can see the
-			   difference between "end of line" and
-			   "error" */
-			*input_p = input - 1;
-			g_set_error(error_r, tokenizer_quark(), 0,
-				    "Missing closing '\"'");
-			return NULL;
-		}
-
-		/* copy one character */
-		*dest++ = *input++;
-	}
-
-	/* the following character must be a whitespace (or end of
-	   line) */
-
-	++input;
-	if (*input != 0 && !g_ascii_isspace(*input)) {
-		*input_p = input;
-		g_set_error(error_r, tokenizer_quark(), 0,
-			    "Space expected after closing '\"'");
-		return NULL;
-	}
-
-	/* finish the string and return it */
-
-	*dest = 0;
-	*input_p = strchug_fast(input);
-	return word;
-}
-
-char *
-tokenizer_next_param(char **input_p, GError **error_r)
-{
-	assert(input_p != NULL);
-	assert(*input_p != NULL);
-
-	if (**input_p == '"')
-		return tokenizer_next_string(input_p, error_r);
-	else
-		return tokenizer_next_unquoted(input_p, error_r);
-}
diff --git a/src/tokenizer.h b/src/tokenizer.h
deleted file mode 100644
index 2026e5ad6..000000000
--- a/src/tokenizer.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (C) 2003-2011 The Music Player Daemon Project
- * http://www.musicpd.org
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef MPD_TOKENIZER_H
-#define MPD_TOKENIZER_H
-
-#include "gerror.h"
-
-/**
- * Reads the next word from the input string.  This function modifies
- * the input string.
- *
- * @param input_p the input string; this function returns a pointer to
- * the first non-whitespace character of the following token
- * @param error_r if this function returns NULL and **input_p!=0, it
- * optionally provides a GError object in this argument
- * @return a pointer to the null-terminated word, or NULL on error or
- * end of line
- */
-char *
-tokenizer_next_word(char **input_p, GError **error_r);
-
-/**
- * Reads the next unquoted word from the input string.  This function
- * modifies the input string.
- *
- * @param input_p the input string; this function returns a pointer to
- * the first non-whitespace character of the following token
- * @param error_r if this function returns NULL and **input_p!=0, it
- * optionally provides a GError object in this argument
- * @return a pointer to the null-terminated word, or NULL on error or
- * end of line
- */
-char *
-tokenizer_next_unquoted(char **input_p, GError **error_r);
-
-/**
- * Reads the next quoted string from the input string.  A backslash
- * escapes the following character.  This function modifies the input
- * string.
- *
- * @param input_p the input string; this function returns a pointer to
- * the first non-whitespace character of the following token
- * @param error_r if this function returns NULL and **input_p!=0, it
- * optionally provides a GError object in this argument
- * @return a pointer to the null-terminated string, or NULL on error
- * or end of line
- */
-char *
-tokenizer_next_string(char **input_p, GError **error_r);
-
-/**
- * Reads the next unquoted word or quoted string from the input.  This
- * is a wrapper for tokenizer_next_unquoted() and
- * tokenizer_next_string().
- *
- * @param input_p the input string; this function returns a pointer to
- * the first non-whitespace character of the following token
- * @param error_r if this function returns NULL and **input_p!=0, it
- * optionally provides a GError object in this argument
- * @return a pointer to the null-terminated string, or NULL on error
- * or end of line
- */
-char *
-tokenizer_next_param(char **input_p, GError **error_r);
-
-#endif
diff --git a/src/util/Tokenizer.cxx b/src/util/Tokenizer.cxx
new file mode 100644
index 000000000..9ade0d1b1
--- /dev/null
+++ b/src/util/Tokenizer.cxx
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2003-2013 The Music Player Daemon Project
+ * http://www.musicpd.org
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "config.h"
+#include "Tokenizer.hxx"
+#include "string_util.h"
+
+#include <glib.h>
+
+#include <stdbool.h>
+#include <assert.h>
+#include <string.h>
+
+G_GNUC_CONST
+static GQuark
+tokenizer_quark(void)
+{
+	return g_quark_from_static_string("tokenizer");
+}
+
+static inline bool
+valid_word_first_char(char ch)
+{
+	return g_ascii_isalpha(ch);
+}
+
+static inline bool
+valid_word_char(char ch)
+{
+	return g_ascii_isalnum(ch) || ch == '_';
+}
+
+char *
+Tokenizer::NextWord(GError **error_r)
+{
+	char *const word = input;
+
+	if (*input == 0)
+		return nullptr;
+
+	/* check the first character */
+
+	if (!valid_word_first_char(*input)) {
+		g_set_error(error_r, tokenizer_quark(), 0,
+			    "Letter expected");
+		return nullptr;
+	}
+
+	/* now iterate over the other characters until we find a
+	   whitespace or end-of-string */
+
+	while (*++input != 0) {
+		if (g_ascii_isspace(*input)) {
+			/* a whitespace: the word ends here */
+			*input = 0;
+			/* skip all following spaces, too */
+			input = strchug_fast(input + 1);
+			break;
+		}
+
+		if (!valid_word_char(*input)) {
+			g_set_error(error_r, tokenizer_quark(), 0,
+				    "Invalid word character");
+			return nullptr;
+		}
+	}
+
+	/* end of string: the string is already null-terminated
+	   here */
+
+	return word;
+}
+
+static inline bool
+valid_unquoted_char(char ch)
+{
+	return (unsigned char)ch > 0x20 && ch != '"' && ch != '\'';
+}
+
+char *
+Tokenizer::NextUnquoted(GError **error_r)
+{
+	char *const word = input;
+
+	if (*input == 0)
+		return nullptr;
+
+	/* check the first character */
+
+	if (!valid_unquoted_char(*input)) {
+		g_set_error(error_r, tokenizer_quark(), 0,
+			    "Invalid unquoted character");
+		return nullptr;
+	}
+
+	/* now iterate over the other characters until we find a
+	   whitespace or end-of-string */
+
+	while (*++input != 0) {
+		if (g_ascii_isspace(*input)) {
+			/* a whitespace: the word ends here */
+			*input = 0;
+			/* skip all following spaces, too */
+			input = strchug_fast(input + 1);
+			break;
+		}
+
+		if (!valid_unquoted_char(*input)) {
+			g_set_error(error_r, tokenizer_quark(), 0,
+				    "Invalid unquoted character");
+			return nullptr;
+		}
+	}
+
+	/* end of string: the string is already null-terminated
+	   here */
+
+	return word;
+}
+
+char *
+Tokenizer::NextString(GError **error_r)
+{
+	char *const word = input, *dest = input;
+
+	if (*input == 0)
+		/* end of line */
+		return nullptr;
+
+	/* check for the opening " */
+
+	if (*input != '"') {
+		g_set_error(error_r, tokenizer_quark(), 0,
+			    "'\"' expected");
+		return nullptr;
+	}
+
+	++input;
+
+	/* copy all characters */
+
+	while (*input != '"') {
+		if (*input == '\\')
+			/* the backslash escapes the following
+			   character */
+			++input;
+
+		if (*input == 0) {
+			/* return input-1 so the caller can see the
+			   difference between "end of line" and
+			   "error" */
+			--input;
+			g_set_error(error_r, tokenizer_quark(), 0,
+				    "Missing closing '\"'");
+			return nullptr;
+		}
+
+		/* copy one character */
+		*dest++ = *input++;
+	}
+
+	/* the following character must be a whitespace (or end of
+	   line) */
+
+	++input;
+	if (*input != 0 && !g_ascii_isspace(*input)) {
+		g_set_error(error_r, tokenizer_quark(), 0,
+			    "Space expected after closing '\"'");
+		return nullptr;
+	}
+
+	/* finish the string and return it */
+
+	*dest = 0;
+	input = strchug_fast(input);
+	return word;
+}
+
+char *
+Tokenizer::NextParam(GError **error_r)
+{
+	if (*input == '"')
+		return NextString(error_r);
+	else
+		return NextUnquoted(error_r);
+}
diff --git a/src/util/Tokenizer.hxx b/src/util/Tokenizer.hxx
new file mode 100644
index 000000000..da45348d4
--- /dev/null
+++ b/src/util/Tokenizer.hxx
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2003-2013 The Music Player Daemon Project
+ * http://www.musicpd.org
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef MPD_TOKENIZER_HXX
+#define MPD_TOKENIZER_HXX
+
+#include "gerror.h"
+
+class Tokenizer {
+	char *input;
+
+public:
+	/**
+	 * @param _input the input string; the contents will be
+	 * modified by this class
+	 */
+	constexpr Tokenizer(char *_input):input(_input) {}
+
+	Tokenizer(const Tokenizer &) = delete;
+	Tokenizer &operator=(const Tokenizer &) = delete;
+
+	char *Rest() {
+		return input;
+	}
+
+	char CurrentChar() const {
+		return *input;
+	}
+
+	bool IsEnd() const {
+		return CurrentChar() == 0;
+	}
+
+	/**
+	 * Reads the next word.
+	 *
+	 * @param error_r if this function returns nullptr and
+	 * **input_p!=0, it optionally provides a GError object in
+	 * this argument
+	 * @return a pointer to the null-terminated word, or nullptr
+	 * on error or end of line
+	 */
+	char *NextWord(GError **error_r);
+
+	/**
+	 * Reads the next unquoted word from the input string.
+	 *
+	 * @param error_r if this function returns nullptr and **input_p!=0, it
+	 * optionally provides a GError object in this argument
+	 * @return a pointer to the null-terminated word, or nullptr
+	 * on error or end of line
+	 */
+	char *NextUnquoted(GError **error_r);
+
+	/**
+	 * Reads the next quoted string from the input string.  A backslash
+	 * escapes the following character.  This function modifies the input
+	 * string.
+	 *
+	 * @param input_p the input string; this function returns a pointer to
+	 * the first non-whitespace character of the following token
+	 * @param error_r if this function returns nullptr and **input_p!=0, it
+	 * optionally provides a GError object in this argument
+	 * @return a pointer to the null-terminated string, or nullptr on error
+	 * or end of line
+	 */
+	char *NextString(GError **error_r);
+
+	/**
+	 * Reads the next unquoted word or quoted string from the
+	 * input.  This is a wrapper for NextUnquoted() and
+	 * NextString().
+	 *
+	 * @param error_r if this function returns nullptr and
+	 * **input_p!=0, it optionally provides a GError object in
+	 * this argument
+	 * @return a pointer to the null-terminated string, or nullptr
+	 * on error or end of line
+	 */
+	char *NextParam(GError **error_r);
+};
+
+#endif
-- 
cgit v1.2.3