From 375e3ffed496c995383156f9675fa95f145e05bf Mon Sep 17 00:00:00 2001 From: Warren Dukes Date: Tue, 13 Apr 2004 02:20:46 +0000 Subject: add my own utf8/ascii converters and utf8 validator validate all mpd tags on import, if they are invalid, assume they are ascii and convert to utf8 git-svn-id: https://svn.musicpd.org/mpd/trunk@707 09075e82-0dd4-0310-85a5-a0d7c8717e4f --- src/Makefile.am | 4 +-- src/tag.c | 29 +++++++++++++++++++ src/utf8.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/utf8.h | 14 ++++++++++ 4 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 src/utf8.c create mode 100644 src/utf8.h (limited to 'src') diff --git a/src/Makefile.am b/src/Makefile.am index e3d7c0144..3b6717936 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -5,13 +5,13 @@ mpd_headers = buffer2array.h interface.h command.h playlist.h ls.h \ tag.h player.h listen.h conf.h ogg_decode.h volume.h flac_decode.h \ audio.h playerData.h stats.h myfprintf.h sig_handlers.h decode.h log.h \ audiofile_decode.h charConv.h permission.h mpd_types.h pcm_utils.h \ - mp4_decode.h aac_decode.h signal_check.h + mp4_decode.h aac_decode.h signal_check.h utf8.h mpd_SOURCES = main.c buffer2array.c interface.c command.c playlist.c ls.c \ song.c list.c directory.c tables.c utils.c path.c mp3_decode.c \ tag.c player.c listen.c conf.c ogg_decode.c volume.c flac_decode.c \ audio.c playerData.c stats.c myfprintf.c sig_handlers.c decode.c log.c \ audiofile_decode.c charConv.c permission.c pcm_utils.c mp4_decode.c \ - aac_decode.c signal_check.c $(mpd_headers) + aac_decode.c signal_check.c utf8.c $(mpd_headers) mpd_CFLAGS = $(MPD_CFLAGS) mpd_LDADD = $(MPD_LIBS) $(ID3_LIB) $(MAD_LIB) $(MP4FF_LIB) diff --git a/src/tag.c b/src/tag.c index bda1810be..2fabf5639 100644 --- a/src/tag.c +++ b/src/tag.c @@ -24,6 +24,7 @@ #include "mp4_decode.h" #include "aac_decode.h" #include "utils.h" +#include "utf8.h" #include #include @@ -57,6 +58,22 @@ void printMpdTag(FILE * fp, MpdTag * tag) { if(tag->time>=0) myfprintf(fp,"Time: %i\n",tag->time); } +#define fixUtf8(str) { \ + if(str && !validUtf8String(str)) { \ + char * temp; \ + temp = asciiStrToUtf8Dup(str); \ + free(str); \ + str = temp; \ + } \ +} + +void validateUtf8Tag(MpdTag * tag) { + fixUtf8(tag->artist); + fixUtf8(tag->album); + fixUtf8(tag->track); + fixUtf8(tag->title); +} + #ifdef HAVE_ID3TAG char * getID3Info(struct id3_tag * tag, char * id) { struct id3_frame const * frame; @@ -145,6 +162,8 @@ MpdTag * audiofileTagDup(char * utf8file) { ret->time = time; } + if(ret) validateUtf8Tag(ret); + return ret; } #endif @@ -163,6 +182,8 @@ MpdTag * mp3TagDup(char * utf8file) { ret->time = time; } + if(ret) validateUtf8Tag(ret); + return ret; } #endif @@ -179,6 +200,8 @@ MpdTag * aacTagDup(char * utf8file) { ret->time = time; } + if(ret) validateUtf8Tag(ret); + return ret; } @@ -267,6 +290,8 @@ MpdTag * mp4TagDup(char * utf8file) { } } + if(ret) validateUtf8Tag(ret); + return ret; } #endif @@ -329,6 +354,8 @@ MpdTag * oggTagDup(char * utf8file) { ov_clear(&vf); + if(ret) validateUtf8Tag(ret); + return ret; } #endif @@ -441,6 +468,8 @@ MpdTag * flacTagDup(char * utf8file) { } } + if(ret) validateUtf8Tag(ret); + return ret; } #endif diff --git a/src/utf8.c b/src/utf8.c new file mode 100644 index 000000000..aa427e99b --- /dev/null +++ b/src/utf8.c @@ -0,0 +1,87 @@ +#include "utf8.h" + +#include +#include +#include + +unsigned char * asciiToUtf8(unsigned char c) { + static unsigned char utf8[3]; + + memset(utf8,0,3); + + if(c < 128) utf8[0] = c; + else if(c<192) { + utf8[0] = 194; + utf8[1] = c; + } + else { + utf8[0] = 195; + utf8[1] = c-64; + } + + return utf8; +} + +unsigned char * asciiStrToUtf8Dup(unsigned char * ascii) { + /* utf8 should have at most two char's per ascii char */ + int len = strlen(ascii)*2+1; + unsigned char * ret = malloc(len); + unsigned char * cp = ret; + unsigned char * utf8; + + memset(ret,0,len); + + len = 0; + + while(*ascii) { + utf8 = asciiToUtf8(*ascii); + while(*utf8) { + *(cp++) = *(utf8++); + len++; + } + ascii++; + } + + return realloc(ret,len+1); +} + +unsigned char utf8ToAscii(unsigned char * utf8) { + unsigned char c = 0; + + if(utf8[0]<128) return utf8[0]; + else if(utf8[0]==195) c+=64; + else if(utf8[0]!=194) return '?'; + return c+utf8[1]; +} + +int validateUtf8Char(unsigned char * utf8Char) { + if(utf8Char[0]<0x80) return 1; + + if(utf8Char[0]>=0xC0 && utf8Char[0]<=0xFD) { + int count = 1; + unsigned char t = 0x20; + int i; + while(count < 6 && (t & utf8Char[0])) { + t = (t >> 1); + count++; + } + if(count > 5) return 0; + for(i=1;i<=count;i++) { + if(utf8Char[i] < 0x80 || utf8Char[i] > 0xBF) return 0; + } + return count; + } + else return 0; +} + +int validUtf8String(unsigned char * string) { + int ret; + + while(*string) { + ret = validateUtf8Char(string); + if(!ret) return 0; + string+= ret; + } + + return 1; +} diff --git a/src/utf8.h b/src/utf8.h new file mode 100644 index 000000000..1928a8a81 --- /dev/null +++ b/src/utf8.h @@ -0,0 +1,14 @@ +#ifndef UTF_8_H +#define UTF_8_H + +unsigned char * asciiToUtf8(unsigned char c); + +unsigned char * asciiStrToUtf8Dup(unsigned char * ascii); + +unsigned char utf8ToAscii(unsigned char * utf8); + +int validateUtf8Char(unsigned char * utf8Char); + +int validUtf8String(unsigned char * string); + +#endif -- cgit v1.2.3