aboutsummaryrefslogtreecommitdiffstats
path: root/unicode/src/base/UUnicodeUtils.pas
diff options
context:
space:
mode:
authortobigun <tobigun@b956fd51-792f-4845-bead-9b4dfca2ff2c>2008-11-07 20:49:01 +0000
committertobigun <tobigun@b956fd51-792f-4845-bead-9b4dfca2ff2c>2008-11-07 20:49:01 +0000
commit17614ea059162f432f7feba5f39329667a335fa6 (patch)
treeaa95668867bf58d6bd708bfcb5b3f9db38dc59f8 /unicode/src/base/UUnicodeUtils.pas
parente520f12663f97a3ca7e609d0d6f6bf91dc88e675 (diff)
downloadusdx-17614ea059162f432f7feba5f39329667a335fa6.tar.gz
usdx-17614ea059162f432f7feba5f39329667a335fa6.tar.xz
usdx-17614ea059162f432f7feba5f39329667a335fa6.zip
- WideStringUpperCase moved to UUnicodeUtils.pas
- WideCharUpperCase removed as single characters (code-point) can be represented by two WideChars (surrogates). Convert to UCS4 instead (one code-point <-> one UCS4Char). - UCS4 functions added to UUUnicodeUtils - string replaced with UTF8String (although it's just a typedef) to mark UTF8 strings. git-svn-id: svn://svn.code.sf.net/p/ultrastardx/svn/branches/experimental@1507 b956fd51-792f-4845-bead-9b4dfca2ff2c
Diffstat (limited to 'unicode/src/base/UUnicodeUtils.pas')
-rw-r--r--unicode/src/base/UUnicodeUtils.pas87
1 files changed, 86 insertions, 1 deletions
diff --git a/unicode/src/base/UUnicodeUtils.pas b/unicode/src/base/UUnicodeUtils.pas
index 91c5966f..49be200f 100644
--- a/unicode/src/base/UUnicodeUtils.pas
+++ b/unicode/src/base/UUnicodeUtils.pas
@@ -47,11 +47,51 @@ function IsAlphaNumericChar(ch: WideChar): boolean;
function IsPunctuationChar(ch: WideChar): boolean;
function IsControlChar(ch: WideChar): boolean;
+{*
+ * String format conversion
+ *}
+
function UTF8ToUCS4String(const str: UTF8String): UCS4String;
-function UCS4ToUTF8String(const str: UCS4String): UTF8String;
+function UCS4ToUTF8String(const str: UCS4String): UTF8String; overload;
+function UCS4ToUTF8String(ch: UCS4Char): UTF8String; overload;
+
+{**
+ * Returns the number of characters (not bytes) in string str.
+ *}
+function LengthUTF8(const str: UTF8String): integer;
+
+{**
+ * Converts a UCS-4 char ch to its upper-case representation.
+ *}
+function UCS4UpperCase(ch: UCS4Char): UCS4Char; overload;
+
+{**
+ * Converts a UCS-4 string str to its upper-case representation.
+ *}
+function UCS4UpperCase(const str: UCS4String): UCS4String; overload;
+
+{**
+ *
+ *}
+function UCS4CharToString(ch: UCS4Char): UCS4String;
+
+(*
+
+ * Converts a WideString to its upper-case representation.
+ * Wrapper for WideUpperCase. Needed because some plattforms have problems with
+ * unicode support.
+ *
+ * Note that characters in UTF-16 might consist of one or two WideChar valus
+ * (see surrogates). So instead of using WideStringUpperCase(ch)[1] for single
+ * character access, convert to UCS-4 where each character is represented by
+ * one UCS4Char.
+ *)
+function WideStringUpperCase(const str: WideString) : WideString;
+
implementation
+
function IsAlphaChar(ch: WideChar): boolean;
begin
{$IFDEF MSWINDOWS}
@@ -121,4 +161,49 @@ begin
Result := UTF8Encode(UCS4StringToWideString(str));
end;
+function UCS4ToUTF8String(ch: UCS4Char): UTF8String;
+begin
+ Result := UCS4ToUTF8String(UCS4CharToString(ch));
+end;
+
+function LengthUTF8(const str: UTF8String): integer;
+begin
+ Result := Length(UTF8ToUCS4String(str));
+end;
+
+function UCS4UpperCase(ch: UCS4Char): UCS4Char;
+begin
+ Result := UCS4UpperCase(UCS4CharToString(ch))[0];
+end;
+
+function UCS4UpperCase(const str: UCS4String): UCS4String;
+begin
+ // convert to upper-case as WideString and convert result back to UCS-4
+ Result := WideStringToUCS4String(
+ WideStringUpperCase(
+ UCS4StringToWideString(str)));
+end;
+
+function UCS4CharToString(ch: UCS4Char): UCS4String;
+begin
+ SetLength(Result, 2);
+ Result[0] := ch;
+ Result[1] := 0;
+end;
+
+function WideStringUpperCase(const str: WideString): WideString;
+begin
+ // On Linux and MacOSX the cwstring unit is necessary for Unicode function-calls.
+ // Otherwise you will get an EIntOverflow exception (thrown by unimplementedwidestring()).
+ // The Unicode manager cwstring does not work with MacOSX at the moment because
+ // of missing references to iconv. So we have to use Ansi... for the moment.
+
+ {.$IFNDEF DARWIN}
+ {$IFDEF NOIGNORE}
+ Result := WideUpperCase(str)
+ {$ELSE}
+ Result := UTF8Decode(AnsiUpperCase(UTF8Encode(str)));
+ {$ENDIF}
+end;
+
end.