{* UltraStar Deluxe - Karaoke Game * * UltraStar Deluxe is the legal property of its developers, whose names * are too numerous to list here. Please refer to the COPYRIGHT * file distributed with this source distribution. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; see the file COPYING. If not, write to * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA 02110-1301, USA. * * $URL$ * $Id$ *} unit UUnicodeUtils; interface {$IFDEF FPC} {$MODE Delphi} {$ENDIF} {$I switches.inc} uses SysUtils {$IFDEF MSWINDOWS} , Windows {$ENDIF} ; (* * Character classes *) function IsAlphaChar(ch: WideChar): boolean; overload; function IsAlphaChar(ch: UCS4Char): boolean; overload; function IsNumericChar(ch: WideChar): boolean; overload; function IsNumericChar(ch: UCS4Char): boolean; overload; function IsAlphaNumericChar(ch: WideChar): boolean; overload; function IsAlphaNumericChar(ch: UCS4Char): boolean; overload; function IsPunctuationChar(ch: WideChar): boolean; overload; function IsPunctuationChar(ch: UCS4Char): boolean; overload; function IsControlChar(ch: WideChar): boolean; overload; function IsControlChar(ch: UCS4Char): boolean; overload; {* * String format conversion *} function UTF8ToUCS4String(const str: UTF8String): UCS4String; function UCS4ToUTF8String(const str: UCS4String): UTF8String; overload; function UCS4ToUTF8String(ch: UCS4Char): UTF8String; overload; {** * Returns the number of characters (not bytes) in string str. *} function LengthUTF8(const str: UTF8String): integer; function UTF8CompareStr(const S1, S2: UTF8String): integer; function UTF8CompareText(const S1, S2: UTF8String): integer; function UTF8StartsText(const SubText, Text: UTF8String): boolean; function UTF8ContainsStr(const Text, SubText: UTF8String): boolean; function UTF8ContainsText(const Text, SubText: UTF8String): boolean; function UTF8UpperCase(const str: UTF8String): UTF8String; function UTF8LowerCase(const str: UTF8String): UTF8String; {** * Converts a UCS-4 char ch to its upper-case representation. *} function UCS4UpperCase(ch: UCS4Char): UCS4Char; overload; {** * Converts a UCS-4 string str to its upper-case representation. *} function UCS4UpperCase(const str: UCS4String): UCS4String; overload; {** * *} function UCS4CharToString(ch: UCS4Char): UCS4String; (* * Converts a WideString to its upper-case representation. * Wrapper for WideUpperCase. Needed because some plattforms have problems with * unicode support. * * Note that characters in UTF-16 might consist of one or two WideChar valus * (see surrogates). So instead of using WideStringUpperCase(ch)[1] for single * character access, convert to UCS-4 where each character is represented by * one UCS4Char. *) function WideStringUpperCase(const str: WideString) : WideString; overload; function WideStringUpperCase(ch: WideChar): WideString; overload; function StringReplaceW(const text : WideString; search, rep: WideChar): WideString; implementation function IsAlphaChar(ch: WideChar): boolean; begin {$IFDEF MSWINDOWS} Result := IsCharAlphaW(ch); {$ELSE} // TODO: add chars > 255 case ch of 'A'..'Z', // A-Z 'a'..'z', // a-z #170,#181,#186, #192..#214, #216..#246, #248..#255: Result := true; else Result := false; end; {$ENDIF} end; function IsAlphaChar(ch: UCS4Char): boolean; begin Result := IsAlphaChar(WideChar(Ord(ch))); end; function IsNumericChar(ch: WideChar): boolean; begin // ignore non-arabic numerals as we do not want to handle them case ch of '0'..'9': Result := true; else Result := false; end; end; function IsNumericChar(ch: UCS4Char): boolean; begin Result := IsNumericChar(WideChar(Ord(ch))); end; function IsAlphaNumericChar(ch: WideChar): boolean; begin Result := (IsAlphaChar(ch) or IsNumericChar(ch)); end; function IsAlphaNumericChar(ch: UCS4Char): boolean; begin Result := (IsAlphaChar(ch) or IsNumericChar(ch)); end; function IsPunctuationChar(ch: WideChar): boolean; begin // TODO: add chars > 255? case ch of ' '..'/',':'..'@','['..'`','{'..'~', #160..#191,#215,#247: Result := true; else Result := false; end; end; function IsPunctuationChar(ch: UCS4Char): boolean; begin Result := IsPunctuationChar(WideChar(Ord(ch))); end; function IsControlChar(ch: WideChar): boolean; begin case ch of #0..#31, #127..#159: Result := true; else Result := false; end; end; function IsControlChar(ch: UCS4Char): boolean; begin Result := IsControlChar(WideChar(Ord(ch))); end; function UTF8ToUCS4String(const str: UTF8String): UCS4String; begin Result := WideStringToUCS4String(UTF8Decode(str)); end; function UCS4ToUTF8String(const str: UCS4String): UTF8String; begin Result := UTF8Encode(UCS4StringToWideString(str)); end; function UCS4ToUTF8String(ch: UCS4Char): UTF8String; begin Result := UCS4ToUTF8String(UCS4CharToString(ch)); end; function LengthUTF8(const str: UTF8String): integer; begin Result := Length(UTF8ToUCS4String(str)); end; function UTF8CompareStr(const S1, S2: UTF8String): integer; begin // FIXME Result := WideCompareStr(UTF8Decode(S1), UTF8Decode(S2)); end; function UTF8CompareText(const S1, S2: UTF8String): integer; begin // FIXME Result := WideCompareText(UTF8Decode(S1), UTF8Decode(S2)); end; function UTF8StartsStr(const SubText, Text: UTF8String): boolean; begin // TODO: use WideSameStr ()? Result := (Pos(SubText, Text) = 1); end; function UTF8StartsText(const SubText, Text: UTF8String): boolean; begin // TODO: use WideSameText? Result := (Pos(UTF8UpperCase(SubText), UTF8UpperCase(Text)) = 1); end; function UTF8ContainsStr(const Text, SubText: UTF8String): boolean; begin Result := Pos(SubText, Text) > 0; end; function UTF8ContainsText(const Text, SubText: UTF8String): boolean; begin Result := Pos(UTF8UpperCase(SubText), UTF8UpperCase(Text)) > 0; end; function UTF8UpperCase(const str: UTF8String): UTF8String; begin Result := UTF8Encode(WideStringUpperCase(UTF8Decode(str))); end; function UTF8LowerCase(const str: UTF8String): UTF8String; begin // FIXME Result := UTF8Encode(WideLowerCase(UTF8Decode(str))); end; function UCS4UpperCase(ch: UCS4Char): UCS4Char; begin Result := UCS4UpperCase(UCS4CharToString(ch))[0]; end; function UCS4UpperCase(const str: UCS4String): UCS4String; begin // convert to upper-case as WideString and convert result back to UCS-4 Result := WideStringToUCS4String( WideStringUpperCase( UCS4StringToWideString(str))); end; function UCS4CharToString(ch: UCS4Char): UCS4String; begin SetLength(Result, 2); Result[0] := ch; Result[1] := 0; end; function WideStringUpperCase(ch: WideChar): WideString; begin // If WideChar #0 is converted to a WideString in Delphi, a string with // length 1 and a single char #0 is returned. In FPC an empty (length=0) // string will be returned. This will crash, if a non printable key was // pressed, its char code (#0) is translated to upper-case and the the first // character is accessed with Result[1]. // We cannot catch this error in the WideString parameter variant as the string // has length 0 already. // Force min. string length of 1 if (ch = #0) then Result := #0 else Result := WideStringUpperCase(WideString(ch)); end; function WideStringUpperCase(const str: WideString): WideString; begin // On Linux and MacOSX the cwstring unit is necessary for Unicode function-calls. // Otherwise you will get an EIntOverflow exception (thrown by unimplementedwidestring()). // The Unicode manager cwstring does not work with MacOSX at the moment because // of missing references to iconv. {.$IFNDEF DARWIN} {$IFDEF NOIGNORE} Result := WideUpperCase(str) {$ELSE} Result := UTF8Decode(UpperCase(UTF8Encode(str))); {$ENDIF} end; function StringReplaceW(const text : WideString; search, rep: WideChar) : WideString; var iPos : integer; // sTemp : WideString; begin (* result := text; iPos := Pos(search, result); while (iPos > 0) do begin sTemp := copy(result, iPos + length(search), length(result)); result := copy(result, 1, iPos - 1) + rep + sTEmp; iPos := Pos(search, result); end; *) result := text; if search = rep then exit; for iPos := 1 to length(result) do begin if result[iPos] = search then result[iPos] := rep; end; end; end.