From 1004890e253453faba81126028986f075e5fc5e7 Mon Sep 17 00:00:00 2001 From: Warren Dukes Date: Tue, 13 Apr 2004 04:59:57 +0000 Subject: lots of fsCharset, utf8/ascii converting clean-up and robustness stuff Also, if fsCharsetToUtf8 can't convert to valid UTF-8, then don't add it to the db, this way clients don't have to worry about weirdness and it will force ppl to convert it. git-svn-id: https://svn.musicpd.org/mpd/trunk@711 09075e82-0dd4-0310-85a5-a0d7c8717e4f --- src/charConv.c | 107 +++++++++++++++++++++++++++++++++++++++++--------------- src/directory.c | 20 ++++++++--- src/ls.c | 6 ++-- src/path.c | 28 ++++++++++++--- src/playlist.c | 4 ++- src/utf8.c | 25 +++++++++++++ src/utf8.h | 2 ++ 7 files changed, 152 insertions(+), 40 deletions(-) (limited to 'src') diff --git a/src/charConv.c b/src/charConv.c index 6d905bccc..402683e18 100644 --- a/src/charConv.c +++ b/src/charConv.c @@ -17,6 +17,8 @@ */ #include "charConv.h" +#include "mpd_types.h" +#include "utf8.h" #include #include @@ -25,14 +27,21 @@ #ifdef HAVE_ICONV #include iconv_t char_conv_iconv; +#endif + char * char_conv_to = NULL; char * char_conv_from = NULL; -#endif +mpd_sint8 char_conv_same = 0; +mpd_sint8 char_conv_use_iconv = 0; + +/* 1 is to use asciiToUtf8 + 0 is not to use ascii/utf8 converter + -1 is to use utf8ToAscii*/ +mpd_sint8 char_conv_asciiToUtf8 = 0; #define BUFFER_SIZE 1024 int setCharSetConversion(char * to, char * from) { -#ifdef HAVE_ICONV if(char_conv_to && strcmp(to,char_conv_to)==0 && char_conv_from && strcmp(from,char_conv_from)==0) { @@ -41,60 +50,100 @@ int setCharSetConversion(char * to, char * from) { closeCharSetConversion(); + if(0==strcmp(to,from)) { + char_conv_same = 1; + char_conv_to = strdup(to); + char_conv_from = strdup(from); + return 0; + } + + if(strcmp(to,"UTF-8")==0 && strcmp(from,"ISO-8859-1")==0) { + char_conv_asciiToUtf8 = 1; + } + else if(strcmp(to,"ISO-8859-1")==0 && strcmp(from,"UTF-8")==0) { + char_conv_asciiToUtf8 = -1; + } + + if(char_conv_asciiToUtf8!=0) { + char_conv_to = strdup(to); + char_conv_from = strdup(from); + return 0; + } + +#ifdef HAVE_ICONV if((char_conv_iconv = iconv_open(to,from))==(iconv_t)(-1)) return -1; char_conv_to = strdup(to); char_conv_from = strdup(from); + char_conv_use_iconv = 1; return 0; #endif + return -1; } char * convStrDup(char * string) { -#ifdef HAVE_ICONV - char buffer[BUFFER_SIZE]; - size_t inleft = strlen(string); - char * ret; - size_t outleft; - size_t retlen = 0; - size_t err; - char * bufferPtr; - if(!char_conv_to) return NULL; - ret = malloc(1); - ret[0] = '\0'; + if(char_conv_same) return strdup(string); - while(inleft) { - bufferPtr = buffer; - outleft = BUFFER_SIZE; - err = iconv(char_conv_iconv,&string,&inleft,&bufferPtr, +#ifdef HAVE_ICONV + if(char_conv_use_iconv) { + char buffer[BUFFER_SIZE]; + size_t inleft = strlen(string); + char * ret; + size_t outleft; + size_t retlen = 0; + size_t err; + char * bufferPtr; + + ret = malloc(1); + ret[0] = '\0'; + + while(inleft) { + bufferPtr = buffer; + outleft = BUFFER_SIZE; + err = iconv(char_conv_iconv,&string,&inleft,&bufferPtr, &outleft); - if(outleft==BUFFER_SIZE || (err<0 && errno!=E2BIG)) { - free(ret); - return NULL; + if(outleft==BUFFER_SIZE || (err<0 && errno!=E2BIG)) { + free(ret); + return NULL; + } + + ret = realloc(ret,retlen+BUFFER_SIZE-outleft+1); + memcpy(ret+retlen,buffer,BUFFER_SIZE-outleft); + retlen+=BUFFER_SIZE-outleft; + ret[retlen] = '\0'; } - ret = realloc(ret,retlen+BUFFER_SIZE-outleft+1); - memcpy(ret+retlen,buffer,BUFFER_SIZE-outleft); - retlen+=BUFFER_SIZE-outleft; - ret[retlen] = '\0'; + return ret; } - - return ret; #endif + + switch(char_conv_asciiToUtf8) { + case 1: + return asciiStrToUtf8Dup(string); + break; + case -1: + return utf8StrToAsciiDup(string); + break; + } + return NULL; } void closeCharSetConversion() { -#ifdef HAVE_ICONV if(char_conv_to) { - iconv_close(char_conv_iconv); +#ifdef HAVE_ICONV + if(char_conv_use_iconv) iconv_close(char_conv_iconv); +#endif free(char_conv_to); free(char_conv_from); char_conv_to = NULL; char_conv_from = NULL; + char_conv_same = 0; + char_conv_asciiToUtf8 = 0; + char_conv_use_iconv = 0; } -#endif } diff --git a/src/directory.c b/src/directory.c index 6b1ed9441..b4634c42f 100644 --- a/src/directory.c +++ b/src/directory.c @@ -310,14 +310,18 @@ int removeDeletedFromDirectory(Directory * directory) { while((ent = readdir(dir))) { if(ent->d_name[0]=='.') continue; /* hide hidden stuff */ - utf8 = strdup(fsCharsetToUtf8(ent->d_name)); + utf8 = fsCharsetToUtf8(ent->d_name); + + if(!utf8) continue; + + utf8 = strdup(utf8); if(directory->utf8name) { s = malloc(strlen(directory->utf8name)+strlen(utf8)+2); sprintf(s,"%s/%s",directory->utf8name,utf8); } else s= strdup(utf8); - insertInList(entList,fsCharsetToUtf8(ent->d_name),s); + insertInList(entList,utf8,s); free(utf8); } @@ -377,7 +381,11 @@ int updateDirectory(Directory * directory) { while((ent = readdir(dir))) { if(ent->d_name[0]=='.') continue; /* hide hidden stuff */ - utf8 = strdup(fsCharsetToUtf8(ent->d_name)); + utf8 = fsCharsetToUtf8(ent->d_name); + + if(!utf8) continue; + + utf8 = strdup(utf8); if(directory->utf8name) { s = malloc(strlen(directory->utf8name)+strlen(utf8)+2); @@ -415,7 +423,11 @@ int exploreDirectory(Directory * directory) { while((ent = readdir(dir))) { if(ent->d_name[0]=='.') continue; /* hide hidden stuff */ - utf8 = strdup(fsCharsetToUtf8(ent->d_name)); + utf8 = fsCharsetToUtf8(ent->d_name); + + if(!utf8) continue; + + utf8 = strdup(utf8); DEBUG("explore: found: %s (%s)\n",ent->d_name,utf8); diff --git a/src/ls.c b/src/ls.c index a47f64d74..d461109d5 100644 --- a/src/ls.c +++ b/src/ls.c @@ -46,6 +46,7 @@ int lsPlaylists(FILE * fp, char * utf8path) { struct stat st; struct dirent * ent; char * dup; + char * utf8; char s[MAXPATHLEN+1]; List * list = NULL; ListNode * node = NULL; @@ -78,8 +79,9 @@ int lsPlaylists(FILE * fp, char * utf8path) { if(list==NULL) list = makeList(NULL); dup = strdup(ent->d_name); dup[suff] = '\0'; - insertInList(list, - fsCharsetToUtf8(dup),NULL); + if((utf8 = fsCharsetToUtf8(dup))) { + insertInList(list,utf8,NULL); + } free(dup); } } diff --git a/src/path.c b/src/path.c index e209b671d..db41ed968 100644 --- a/src/path.c +++ b/src/path.c @@ -20,6 +20,7 @@ #include "log.h" #include "charConv.h" #include "conf.h" +#include "utf8.h" #include #include @@ -46,24 +47,35 @@ char * pathConvCharset(char * to, char * from, char * str, char * ret) { ret = convStrDup(str); } - if(!ret) ret = strdup(str); - return ret; } char * fsCharsetToUtf8(char * str) { static char * ret = NULL; - return ret = pathConvCharset("UTF-8",fsCharset,str,ret); + ret = pathConvCharset("UTF-8",fsCharset,str,ret); + + if(ret && !validUtf8String(ret)) ret = NULL; + /*if(!ret) ret = asciiStrToUtf8Dup(str);*/ + + /* if all else fails, just strdup */ + + return ret; } char * utf8ToFsCharset(char * str) { static char * ret = NULL; - return ret = pathConvCharset(fsCharset,"UTF-8",str,ret); + ret = pathConvCharset(fsCharset,"UTF-8",str,ret); + + if(!ret) ret = strdup(str); + + return ret; } void setFsCharset(char * charset) { + int error = 0; + if(fsCharset) free(fsCharset); fsCharset = strdup(charset); @@ -74,11 +86,19 @@ void setFsCharset(char * charset) { ERROR("fs charset conversion problem: " "not able to convert from \"%s\" to \"%s\"\n", fsCharset,"UTF-8"); + error = 1; } if(setCharSetConversion(fsCharset,"UTF-8")!=0) { ERROR("fs charset conversion problem: " "not able to convert from \"%s\" to \"%s\"\n", "UTF-8",fsCharset); + error = 1; + } + + if(error) { + free(fsCharset); + ERROR("setting fs charset to ISO-8859-1!\n"); + fsCharset = strdup("ISO-8859-1"); } } diff --git a/src/playlist.c b/src/playlist.c index 471c1b08e..cf064047f 100644 --- a/src/playlist.c +++ b/src/playlist.c @@ -1122,7 +1122,9 @@ int loadPlaylist(FILE * fp, char * utf8file) { free(temp); } slength = 0; - temp = strdup(fsCharsetToUtf8(s)); + temp = fsCharsetToUtf8(s); + if(!temp) continue; + temp = strdup(temp); if(s[0]==PLAYLIST_COMMENT && !getSong(temp)) { free(temp); continue; diff --git a/src/utf8.c b/src/utf8.c index 140316150..4b8814a80 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -85,3 +85,28 @@ int validUtf8String(unsigned char * string) { return 1; } + +unsigned char * utf8StrToAsciiDup(unsigned char * utf8) { + /* utf8 should have at most two char's per ascii char */ + int len = strlen(utf8)+1; + unsigned char * ret = malloc(len); + unsigned char * cp = ret; + int count; + + memset(ret,0,len); + + len = 0; + + while(*utf8) { + count = validateUtf8Char(utf8); + if(!count) { + free(ret); + return NULL; + } + *(cp++) = utf8ToAscii(utf8); + utf8+= count; + len++; + } + + return realloc(ret,len+1); +} diff --git a/src/utf8.h b/src/utf8.h index 1928a8a81..bf8f1a9c9 100644 --- a/src/utf8.h +++ b/src/utf8.h @@ -5,6 +5,8 @@ unsigned char * asciiToUtf8(unsigned char c); unsigned char * asciiStrToUtf8Dup(unsigned char * ascii); +unsigned char * utf8StrToAsciiDup(unsigned char * utf8); + unsigned char utf8ToAscii(unsigned char * utf8); int validateUtf8Char(unsigned char * utf8Char); -- cgit v1.2.3