diff options
Diffstat (limited to '')
-rw-r--r-- | unicode/src/base/UUnicodeUtils.pas | 87 |
1 files changed, 86 insertions, 1 deletions
diff --git a/unicode/src/base/UUnicodeUtils.pas b/unicode/src/base/UUnicodeUtils.pas index 91c5966f..49be200f 100644 --- a/unicode/src/base/UUnicodeUtils.pas +++ b/unicode/src/base/UUnicodeUtils.pas @@ -47,11 +47,51 @@ function IsAlphaNumericChar(ch: WideChar): boolean; function IsPunctuationChar(ch: WideChar): boolean; function IsControlChar(ch: WideChar): boolean; +{* + * String format conversion + *} + function UTF8ToUCS4String(const str: UTF8String): UCS4String; -function UCS4ToUTF8String(const str: UCS4String): UTF8String; +function UCS4ToUTF8String(const str: UCS4String): UTF8String; overload; +function UCS4ToUTF8String(ch: UCS4Char): UTF8String; overload; + +{** + * Returns the number of characters (not bytes) in string str. + *} +function LengthUTF8(const str: UTF8String): integer; + +{** + * Converts a UCS-4 char ch to its upper-case representation. + *} +function UCS4UpperCase(ch: UCS4Char): UCS4Char; overload; + +{** + * Converts a UCS-4 string str to its upper-case representation. + *} +function UCS4UpperCase(const str: UCS4String): UCS4String; overload; + +{** + * + *} +function UCS4CharToString(ch: UCS4Char): UCS4String; + +(* + + * Converts a WideString to its upper-case representation. + * Wrapper for WideUpperCase. Needed because some plattforms have problems with + * unicode support. + * + * Note that characters in UTF-16 might consist of one or two WideChar valus + * (see surrogates). So instead of using WideStringUpperCase(ch)[1] for single + * character access, convert to UCS-4 where each character is represented by + * one UCS4Char. + *) +function WideStringUpperCase(const str: WideString) : WideString; + implementation + function IsAlphaChar(ch: WideChar): boolean; begin {$IFDEF MSWINDOWS} @@ -121,4 +161,49 @@ begin Result := UTF8Encode(UCS4StringToWideString(str)); end; +function UCS4ToUTF8String(ch: UCS4Char): UTF8String; +begin + Result := UCS4ToUTF8String(UCS4CharToString(ch)); +end; + +function LengthUTF8(const str: UTF8String): integer; +begin + Result := Length(UTF8ToUCS4String(str)); +end; + +function UCS4UpperCase(ch: UCS4Char): UCS4Char; +begin + Result := UCS4UpperCase(UCS4CharToString(ch))[0]; +end; + +function UCS4UpperCase(const str: UCS4String): UCS4String; +begin + // convert to upper-case as WideString and convert result back to UCS-4 + Result := WideStringToUCS4String( + WideStringUpperCase( + UCS4StringToWideString(str))); +end; + +function UCS4CharToString(ch: UCS4Char): UCS4String; +begin + SetLength(Result, 2); + Result[0] := ch; + Result[1] := 0; +end; + +function WideStringUpperCase(const str: WideString): WideString; +begin + // On Linux and MacOSX the cwstring unit is necessary for Unicode function-calls. + // Otherwise you will get an EIntOverflow exception (thrown by unimplementedwidestring()). + // The Unicode manager cwstring does not work with MacOSX at the moment because + // of missing references to iconv. So we have to use Ansi... for the moment. + + {.$IFNDEF DARWIN} + {$IFDEF NOIGNORE} + Result := WideUpperCase(str) + {$ELSE} + Result := UTF8Decode(AnsiUpperCase(UTF8Encode(str))); + {$ENDIF} +end; + end. |