From e7be81dd66bc4d19ff8552b81aa38821eeb9f457 Mon Sep 17 00:00:00 2001 From: yaworsky Date: Mon, 28 Nov 2005 14:03:41 +0000 Subject: Implemented charset conversion in each log path. Fixed bugs in configuration reader. --- TODO | 8 ----- daemon/conf.c | 72 +++++++++++++++++++++++++++++++-------------- daemon/syslogd.c | 75 +++++++++++++++++------------------------------ daemon/syslogd.h | 5 ++-- doc/src/configuration.xml | 48 +++++++++++++++--------------- doc/src/internals.xml | 21 ++++++------- doc/src/intro.xml | 5 ++++ 7 files changed, 121 insertions(+), 113 deletions(-) diff --git a/TODO b/TODO index dddc7d5..184b9ca 100644 --- a/TODO +++ b/TODO @@ -1,11 +1,3 @@ -Charset conversion between various sources and destinations. -Currently only global conversion is possible. -Add "encoding" attribute to "source" and "destination" elements. -Convert message when it has been assigned to destination. -If source encoding is not specified, assume UTF-8. -Similarly, assume UTF-8 for destination encoding if not specified. -But do not attempt to convert if both encodings are not specified. - GUI configuration utility GUI log viewer diff --git a/daemon/conf.c b/daemon/conf.c index 383673c..2255369 100644 --- a/daemon/conf.c +++ b/daemon/conf.c @@ -36,8 +36,6 @@ static char syslog_conf_dir[] = SYSLOG_CONF_DIR; /* options and their default values */ gboolean use_dns = TRUE; -gchar *source_encoding = NULL; -gchar *destination_encoding = NULL; int mark_interval = 0; gchar *mark_message = "-- MARK --"; int hold = 3; @@ -108,6 +106,8 @@ static void create_source( int line_number, } else if( strcmp( aname, "port" ) == 0 ) source->udp.sin_port = htons( strtoul( aval, NULL, 0 ) ); + else if( strcmp( aname, "encoding" ) == 0 ) + source->encoding = g_locale_from_utf8( aval, -1, NULL, NULL, NULL ); } if( !source->name ) @@ -255,29 +255,30 @@ static void create_destination( int line_number, const gchar** attribute_values ) { gboolean r = FALSE; + const gchar **attr_names; + const gchar **attr_values; const gchar *aname; struct destination *dest = g_malloc0( sizeof(struct destination) ); /* at first, we must determine destination type for selection of type-specific structure that we'll fill later; - also, look for 'name' attribute and set the name of destination */ + also, look for 'name' and 'encoding' attributes and set corresponding fields + in destination structure */ dest->type = DT_UNDEFINED; - for( ; (aname = *attribute_names) != NULL; attribute_names++, attribute_values++ ) + attr_names = attribute_names; + attr_values = attribute_values; + for( ; (aname = *attr_names) != NULL; attr_names++, attr_values++ ) { - const gchar *aval = *attribute_values; + const gchar *aval = *attr_values; if( strcmp( aname, "name" ) == 0 ) dest->name = g_locale_from_utf8( aval, -1, NULL, NULL, NULL ); + else if( strcmp( aname, "encoding" ) == 0 ) + dest->encoding = g_locale_from_utf8( aval, -1, NULL, NULL, NULL ); else if( strcmp( aname, "file" ) == 0 ) - { dest->type = DT_FILE; - break; - } else if( strcmp( aname, "collector" ) == 0 ) - { dest->type = DT_RELAY; - break; - } } if( !dest->name ) { @@ -500,10 +501,6 @@ static void read_options( int line_number, else ERR( "Invalid value \"%s\" of attribute \"%s\" at line %d\n", aval, aname, line_number ); } - else if( strcmp( aname, "source_encoding" ) == 0 ) - source_encoding = g_strdup( aval ); - else if( strcmp( aname, "destination_encoding" ) == 0 ) - destination_encoding = g_strdup( aval ); else if( strcmp( aname, "mark_interval" ) == 0 ) mark_interval = strtoul( aval, NULL, 0 ); else if( strcmp( aname, "mark_message" ) == 0 ) @@ -697,6 +694,8 @@ static void resolve_logpaths() g_free( logpath ); logpath = g_malloc( sizeof(struct logpath) ); + logpath->message_cd = (GIConv) -1; + /* find source */ for( item = sources; item; item = item->next ) { @@ -741,6 +740,32 @@ static void resolve_logpaths() else logpath->filter = NULL; } + + /* create message charset conversion descriptor */ + if( logpath->source->encoding || logpath->destination->encoding ) + { + char *source_encoding = logpath->source->encoding? + logpath->source->encoding : "UTF-8"; + char *destination_encoding = logpath->destination->encoding? + logpath->destination->encoding : "UTF-8"; + + if( strcasecmp( source_encoding, destination_encoding ) != 0 ) + { + logpath->message_cd = g_iconv_open( destination_encoding, source_encoding ); + if( logpath->message_cd == (GIConv) -1 ) + { + ERR( "Cannot convert messages from %s to %s\n", + source_encoding, destination_encoding ); + } + else + { + TRACE( "Log path %s-%s: converting messages from %s to %s\n", + logpath->source->name, logpath->destination->name, + source_encoding, destination_encoding ); + } + } + } + /* add item to paths */ paths = g_list_append( paths, logpath ); logpath = NULL; @@ -774,9 +799,10 @@ static void dump_configuration() for( item = sources; item; item = item->next ) { struct source *s = item->data; - TRACE( "\tname=%s\ttype=%s\tinterface=%d:%d:%d:%d\tport=%d\n", + TRACE( "\tname=%s\ttype=%s\tencoding=%s\tinterface=%d:%d:%d:%d\tport=%d\n", s->name, (s->type == ST_INTERNAL)? "internal" : ((s->type == ST_UDP)? "udp" : "undefined"), + s->encoding? s->encoding : "UTF-8", s->udp.sin_addr.S_un.S_un_b.s_b1, s->udp.sin_addr.S_un.S_un_b.s_b2, s->udp.sin_addr.S_un.S_un_b.s_b3, s->udp.sin_addr.S_un.S_un_b.s_b4, ntohs( s->udp.sin_port ) ); @@ -788,10 +814,12 @@ static void dump_configuration() switch( d->type ) { case DT_FILE: - TRACE( "\tname=%s\tfile=%s\n" + TRACE( "\tname=%s\tencoding=%s\tfile=%s\n" "\t\trotate=%s size=%d backlogs=%d ifempty=%s\n" "\t\tolddir=%s compresscmd=%s\n", - d->name, d->u.file.name_pattern, + d->name, + d->encoding? d->encoding : "UTF-8", + d->u.file.name_pattern, (d->u.file.rotate == RP_DAILY)? "daily" : (d->u.file.rotate == RP_WEEKLY)? "weekly" : (d->u.file.rotate == RP_MONTHLY)? "monthly" @@ -801,9 +829,11 @@ static void dump_configuration() d->u.file.compresscmd? d->u.file.compresscmd : "NULL" ); break; case DT_RELAY: - TRACE( "\tname=%s\tcollector=%s\n" + TRACE( "\tname=%s\tencoding=%s\tcollector=%s\n" "\t\tomit_hostname=%s\n", - d->name, d->u.file.name_pattern, + d->name, + d->encoding? d->encoding : "UTF-8", + d->u.relay.collector, d->u.relay.omit_hostname? "yes" : "no" ); break; default: @@ -840,8 +870,6 @@ static void dump_configuration() } TRACE( "Options:\n" ); TRACE( "\tuse_dns=%d\n", (int) use_dns ); - TRACE( "\tsource_encoding=%s\n", source_encoding? source_encoding : "NULL" ); - TRACE( "\tdestination_encoding=%s\n", destination_encoding? destination_encoding : "NULL" ); TRACE( "\tmark_interval=%d\n", mark_interval ); TRACE( "\tmark_message=%s\n", mark_message ); TRACE( "\thold=%d\n", hold ); diff --git a/daemon/syslogd.c b/daemon/syslogd.c index 99e9a92..e4467b3 100644 --- a/daemon/syslogd.c +++ b/daemon/syslogd.c @@ -46,8 +46,6 @@ static GList *hostnames = NULL; #define HOSTNAME_LIFETIME 60 /* seconds */ /* FIXME: is this value correct? maybe we should make it configurable? */ -static GIConv conversion_descriptor = (GIConv) -1; - char *str_month[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; @@ -150,36 +148,6 @@ void release_message( struct message* msg ) TRACE_LEAVE( "done\n" ); } -/****************************************************************************** - * convert_message_encoding - */ -static void convert_message_encoding( struct message* msg ) -{ - gchar *converted_msg; - - TRACE_ENTER( "message=%p\n", msg ); - - if( conversion_descriptor == (GIConv) -1 ) - { - TRACE_LEAVE( "nothing to do\n" ); - return; - } - - converted_msg = g_convert_with_iconv( msg->message->gstr->str, -1, - conversion_descriptor, NULL, NULL, NULL ); - if( !converted_msg ) - { - TRACE_LEAVE( "conversion error\n" ); - return; - } - - string_release( msg->message ); - msg->message = string_new( converted_msg ); - g_free( converted_msg ); - - TRACE_LEAVE( "done; %s\n", msg->message->gstr->str ); -} - /****************************************************************************** * filter_message * @@ -223,11 +191,10 @@ static void mux_message( struct message* msg ) TRACE_ENTER( "message=%p\n", msg ); - convert_message_encoding( msg ); - for( item = logpaths; item; item = item->next ) { struct logpath *logpath = item->data; + struct message *converted_msg; if( logpath->source != msg->source ) continue; @@ -235,7 +202,32 @@ static void mux_message( struct message* msg ) if( !filter_message( msg, logpath->filter ) ) continue; - logpath->destination->put( logpath->destination, msg ); + /* convert message encoding if needed */ + if( logpath->message_cd == (GIConv) -1 ) + { + converted_msg = msg; + reference_message( msg ); + } + else + { + gchar *c_message = g_convert_with_iconv( msg->message->gstr->str, -1, + logpath->message_cd, + NULL, NULL, NULL ); + if( !c_message ) + { + TRACE( "conversion error\n" ); + c_message = g_strdup( msg->message->gstr->str ); + } + + converted_msg = duplicate_message( msg ); + string_release( converted_msg->message ); + converted_msg->message = string_new( c_message ); + g_free( c_message ); + } + + /* put message to destination */ + logpath->destination->put( logpath->destination, converted_msg ); + release_message( converted_msg ); } release_message( msg ); @@ -825,16 +817,6 @@ void syslogd_main() if( !init_udp_listener() ) goto done; - if( source_encoding && destination_encoding ) - { - conversion_descriptor = g_iconv_open( destination_encoding, source_encoding ); - if( conversion_descriptor == (GIConv) -1 ) - { - ERR( "Cannot convert messages from %s to %s\n", - source_encoding, destination_encoding ); - } - } - log_internal( LOG_NOTICE, "Syslog daemon started" ); /* get messages from queues */ @@ -959,8 +941,5 @@ done: fini_purger(); free_hostnames(); - if( conversion_descriptor != (GIConv) -1 ) - g_iconv_close( conversion_descriptor ); - TRACE_LEAVE( "done\n" ); } diff --git a/daemon/syslogd.h b/daemon/syslogd.h index a70b326..d2bb056 100644 --- a/daemon/syslogd.h +++ b/daemon/syslogd.h @@ -66,8 +66,6 @@ extern void log_internal( int pri, char* fmt, ... ); /* options and their default values */ extern gboolean use_dns; -extern gchar *source_encoding; -extern gchar *destination_encoding; extern int mark_interval; extern gchar *mark_message; extern int hold; @@ -126,6 +124,7 @@ struct source { gchar *name; enum source_type type; + gchar *encoding; struct sockaddr_in udp; }; @@ -174,6 +173,7 @@ struct destination { gchar *name; enum destination_type type; + gchar *encoding; union { struct destination_file file; @@ -197,6 +197,7 @@ struct logpath struct source *source; struct filter *filter; struct destination *destination; + GIConv message_cd; }; extern GList *sources; diff --git a/doc/src/configuration.xml b/doc/src/configuration.xml index a29a210..dc99510 100644 --- a/doc/src/configuration.xml +++ b/doc/src/configuration.xml @@ -73,6 +73,18 @@ and udp defines a listening UDP socket. +encoding + + + +Optional. +Expect incoming messages in this encoding. +Default is UTF-8. + + + + + interface @@ -125,6 +137,19 @@ or collector respectively. +encoding + + + +Optional. +Messages must be converted to this encoding. +Conversion is not performed if both source and destination encodings are identical or not specified. +Default is UTF-8. + + + + + file @@ -526,29 +551,6 @@ Default is yes. -source_encoding - - - -Optional. -Convert incoming messages from specified encoding to -destination_encoding. -Default is do not convert. - - - - - -destination_encoding - - - -Required, if source_encoding is given. - - - - - mark_interval diff --git a/doc/src/internals.xml b/doc/src/internals.xml index dafa160..88da43a 100644 --- a/doc/src/internals.xml +++ b/doc/src/internals.xml @@ -68,16 +68,17 @@ or by the writing thread after file is closed. +------------+ +-----+ | +------+ | +--------------+ - +-----------------+ +------+ +-----------+ +-----------+ ---->|charset converter|--->|filter|--->|multiplexer|-+->|destination|+ - +-----------------+ +------+ +-----------+ +->+-----------+|+ - ^ +-> +-----------+| - | +-----------+ - +-------+ - |logpath|+ - +-------+|+ - +-------+| - +-------+ + +------+ +-----------------+ +-----------+ +--->|filter|--->| multiplexer |-+->|destination|+ + | | |charset converter| | | || + +------+ +-----------------+ +->+-----------+|+ + ^ +-> +-----------+| + | +-----------+ + +-------+ + |logpath|+ + +-------+|+ + +-------+| + +-------+ file +-----------+ +-----+ +--------------+ diff --git a/doc/src/intro.xml b/doc/src/intro.xml index 7c3d104..077b606 100644 --- a/doc/src/intro.xml +++ b/doc/src/intro.xml @@ -26,6 +26,11 @@ log rotation; +charset conversion between message sources and destinations; + + + + completely open-source. -- cgit v1.2.3