aboutsummaryrefslogtreecommitdiffstats
path: root/unicode
diff options
context:
space:
mode:
authortobigun <tobigun@b956fd51-792f-4845-bead-9b4dfca2ff2c>2009-03-14 22:10:00 +0000
committertobigun <tobigun@b956fd51-792f-4845-bead-9b4dfca2ff2c>2009-03-14 22:10:00 +0000
commitaf8fa664f71276fb857360531f04b1c7fb101d22 (patch)
tree715ce965fd3e32c0ef34a398e394e3d65a6bdf65 /unicode
parenta455c8ef53bd91fe4dfba1df7d266561b2a2ac49 (diff)
downloadusdx-af8fa664f71276fb857360531f04b1c7fb101d22.tar.gz
usdx-af8fa664f71276fb857360531f04b1c7fb101d22.tar.xz
usdx-af8fa664f71276fb857360531f04b1c7fb101d22.zip
- font-engine uses UCS4 internally
- more UTf-8 and UCS4 routines in UnicodeUtils git-svn-id: svn://svn.code.sf.net/p/ultrastardx/svn/branches/experimental@1636 b956fd51-792f-4845-bead-9b4dfca2ff2c
Diffstat (limited to 'unicode')
-rw-r--r--unicode/src/base/UFont.pas206
-rw-r--r--unicode/src/base/UUnicodeUtils.pas133
2 files changed, 249 insertions, 90 deletions
diff --git a/unicode/src/base/UFont.pas b/unicode/src/base/UFont.pas
index 3d6e9be3..346f4a07 100644
--- a/unicode/src/base/UFont.pas
+++ b/unicode/src/base/UFont.pas
@@ -47,6 +47,7 @@ uses
glext,
glu,
sdl,
+ UUnicodeUtils,
{$IFDEF BITMAP_FONT}
UTexture,
{$ENDIF}
@@ -60,7 +61,7 @@ type
TGLubyteArray = array[0 .. (MaxInt div SizeOf(GLubyte))-1] of GLubyte;
TGLubyteDynArray = array of GLubyte;
- TWideStringArray = array of WideString;
+ TUCS4StringArray = array of UCS4String;
TGLColor = packed record
case byte of
@@ -127,33 +128,33 @@ type
{**
* Splits lines in Text seperated by newline (char-code #13).
* @param Text UTF-8 encoded string
- * @param Lines splitted WideString lines
+ * @param Lines splitted UCS4String lines
*}
- procedure SplitLines(const Text: UTF8String; var Lines: TWideStringArray);
+ procedure SplitLines(const Text: UCS4String; var Lines: TUCS4StringArray);
{**
- * Print an array of WideStrings. Each array-item is a line of text.
+ * Print an array of UCS4Strings. Each array-item is a line of text.
* Lines of text are seperated by the line-spacing.
* This is the base function for all text drawing.
*}
- procedure Print(const Text: TWideStringArray); overload; virtual;
+ procedure Print(const Text: TUCS4StringArray); overload; virtual;
{**
* Draws an underline.
*}
- procedure DrawUnderline(const Text: WideString); virtual;
+ procedure DrawUnderline(const Text: UCS4String); virtual;
{**
* Renders (one) line of text.
*}
- procedure Render(const Text: WideString); virtual; abstract;
+ procedure Render(const Text: UCS4String); virtual; abstract;
{**
* Returns the bounds of text-lines contained in Text.
* @param(Advance if true the right bound is set to the advance instead
* of the minimal right bound.)
*}
- function BBox(const Text: TWideStringArray; Advance: boolean): TBoundsDbl; overload; virtual; abstract;
+ function BBox(const Text: TUCS4StringArray; Advance: boolean): TBoundsDbl; overload; virtual; abstract;
{**
* Resets all user settings to default values.
@@ -188,6 +189,8 @@ type
{**
* Prints a text.
*}
+ procedure Print(const Text: UCS4String); overload;
+ {** UTF-16 version of @link(Print) }
procedure Print(const Text: WideString); overload;
{** UTF-8 version of @link(Print) }
procedure Print(const Text: UTF8String); overload;
@@ -203,6 +206,8 @@ type
* bigger than the text's width as it additionally contains the advance
* and glyph-spacing of the last character.
*}
+ function BBox(const Text: UCS4String; Advance: boolean = true): TBoundsDbl; overload;
+ {** UTF-16 version of @link(BBox) }
function BBox(const Text: WideString; Advance: boolean = true): TBoundsDbl; overload;
{** UTF-8 version of @link(BBox) }
function BBox(const Text: UTF8String; Advance: boolean = true): TBoundsDbl; overload;
@@ -249,9 +254,9 @@ type
/// Mipmap fonts (size[level+1] = size[level]/2)
fMipmapFonts: array[0..cMaxMipmapLevel] of TFont;
- procedure Render(const Text: WideString); override;
- procedure Print(const Text: TWideStringArray); override;
- function BBox(const Text: TWideStringArray; Advance: boolean): TBoundsDbl; override;
+ procedure Render(const Text: UCS4String); override;
+ procedure Print(const Text: TUCS4StringArray); override;
+ function BBox(const Text: TUCS4StringArray; Advance: boolean): TBoundsDbl; override;
{**
* Callback called for creation of each mipmap font.
@@ -322,7 +327,7 @@ type
{**
* Table for storage of max. 256 glyphs.
- * Used for the second cache level. Indexed by the LSB of the WideChar
+ * Used for the second cache level. Indexed by the LSB of the UCS4Char
* char-code.
*}
PGlyphTable = ^TGlyphTable;
@@ -332,7 +337,7 @@ type
* Cache for glyphs of a single font.
* The cached glyphs are stored inside a hash-list.
* Hashing is performed in two steps:
- * 1. the least significant byte (LSB) of the WideChar character code
+ * 1. the least significant byte (LSB) of the UCS4Char character code
* is removed (shr 8) and the result (we call it BaseCode here) looked up in
* the hash-list.
* 2. Each entry of the hash-list contains a table with max. 256 entries.
@@ -359,22 +364,22 @@ type
* Add glyph Glyph with char-code ch to the cache.
* @returns @true on success, @false otherwise
*}
- function AddGlyph(ch: WideChar; const Glyph: TGlyph): boolean;
+ function AddGlyph(ch: UCS4Char; const Glyph: TGlyph): boolean;
{**
* Removes the glyph with char-code ch from the cache.
*}
- procedure DeleteGlyph(ch: WideChar);
+ procedure DeleteGlyph(ch: UCS4Char);
{**
* Removes the glyph with char-code ch from the cache.
*}
- function GetGlyph(ch: WideChar): TGlyph;
+ function GetGlyph(ch: UCS4Char): TGlyph;
{**
* Checks if a glyph with char-code ch is cached.
*}
- function HasGlyph(ch: WideChar): boolean;
+ function HasGlyph(ch: UCS4Char): boolean;
{**
* Remove and free all cached glyphs. If KeepBaseSet is set to
@@ -408,13 +413,13 @@ type
* Retrieves a cached glyph with char-code ch from cache.
* If the glyph is not already cached, it is loaded with LoadGlyph().
*}
- function GetGlyph(ch: WideChar): TGlyph;
+ function GetGlyph(ch: UCS4Char): TGlyph;
{**
* Callback to create (load) a glyph with char-code ch.
* Implemented by subclasses.
*}
- function LoadGlyph(ch: WideChar): TGlyph; virtual; abstract;
+ function LoadGlyph(ch: UCS4Char): TGlyph; virtual; abstract;
public
constructor Create();
@@ -436,6 +441,7 @@ type
*}
TFTGlyph = class(TGlyph)
private
+ fCharCode: UCS4Char; //**< Char code
fCharIndex: FT_UInt; //**< Freetype specific char-index (<> char-code)
fDisplayList: GLuint; //**< Display-list ID
fTexture: GLuint; //**< Texture ID
@@ -477,7 +483,7 @@ type
* Creates a glyph with char-code ch from font Font.
* @param LoadFlags flags passed to FT_Load_Glyph()
*}
- constructor Create(Font: TFTFont; ch: WideChar; Outset: single;
+ constructor Create(Font: TFTFont; ch: UCS4Char; Outset: single;
LoadFlags: FT_Int32);
destructor Destroy(); override;
@@ -507,10 +513,10 @@ type
fUseDisplayLists: boolean; //**< true: use display-lists, false: direct drawing
{** @seealso TCachedFont.LoadGlyph }
- function LoadGlyph(ch: WideChar): TGlyph; override;
+ function LoadGlyph(ch: UCS4Char): TGlyph; override;
- procedure Render(const Text: WideString); override;
- function BBox(const Text: TWideStringArray; Advance: boolean): TBoundsDbl; override;
+ procedure Render(const Text: UCS4String); override;
+ function BBox(const Text: TUCS4StringArray; Advance: boolean): TBoundsDbl; override;
function GetHeight(): single; override;
function GetAscender(): single; override;
@@ -585,9 +591,9 @@ type
procedure ResetIntern();
protected
- procedure DrawUnderline(const Text: WideString); override;
- procedure Render(const Text: WideString); override;
- function BBox(const Text: TWideStringArray; Advance: boolean): TBoundsDbl; override;
+ procedure DrawUnderline(const Text: UCS4String); override;
+ procedure Render(const Text: UCS4String); override;
+ function BBox(const Text: TUCS4StringArray; Advance: boolean): TBoundsDbl; override;
function GetHeight(): single; override;
function GetAscender(): single; override;
@@ -672,7 +678,7 @@ type
procedure ResetIntern();
- procedure RenderChar(ch: WideChar; var AdvanceX: real);
+ procedure RenderChar(ch: UCS4Char; var AdvanceX: real);
{**
* Load font widths from an info file.
@@ -682,8 +688,8 @@ type
procedure LoadFontInfo(const InfoFile: AnsiString);
protected
- procedure Render(const Text: WideString); override;
- function BBox(const Text: TWideStringArray; Advance: boolean): TBoundsDbl; override;
+ procedure Render(const Text: UCS4String); override;
+ function BBox(const Text: TUCS4StringArray; Advance: boolean): TBoundsDbl; override;
function GetHeight(): single; override;
function GetAscender(): single; override;
@@ -801,37 +807,61 @@ begin
ResetIntern();
end;
-procedure TFont.SplitLines(const Text: UTF8String; var Lines: TWideStringArray);
+procedure TFont.SplitLines(const Text: UCS4String; var Lines: TUCS4StringArray);
var
- LineList: TStringList;
- LineIndex: integer;
+ CharIndex: integer;
+ LineStart: integer;
+ LineLength: integer;
+ EOT: boolean; // End-Of-Text
begin
- // split lines on newline (there is no WideString version of ExtractStrings)
- LineList := TStringList.Create();
- ExtractStrings([#13], [], PChar(Text), LineList);
+ // split lines on newline (there is no UCS4String version of ExtractStrings)
+ SetLength(Lines, 0);
+ EOT := false;
+ LineStart := 0;
+
+ for CharIndex := 0 to High(Text) do
+ begin
+ // check for end of text (UCS4Strings are zero-terminated)
+ if (CharIndex = High(Text)) then
+ EOT := true;
+
+ // check for newline (carriage return (#13)) or end of text
+ if (Text[CharIndex] = 13) or EOT then
+ begin
+ LineLength := CharIndex - LineStart;
+ // check if last character was a newline
+ if (EOT and (LineLength = 0)) then
+ Break;
+
+ // copy line (even if LineLength is 0)
+ SetLength(Lines, Length(Lines)+1);
+ Lines[High(Lines)] := UCS4Copy(Text, LineStart, LineLength);
- // create an array of WideStrins from the UTF-8 string-list
- SetLength(Lines, LineList.Count);
- for LineIndex := 0 to LineList.Count-1 do
- Lines[LineIndex] := UTF8Decode(LineList[LineIndex]);
- LineList.Free();
+ LineStart := CharIndex+1;
+ end;
+ end;
end;
-function TFont.BBox(const Text: UTF8String; Advance: boolean): TBoundsDbl;
+function TFont.BBox(const Text: UCS4String; Advance: boolean): TBoundsDbl;
var
- LineArray: TWideStringArray;
+ LineArray: TUCS4StringArray;
begin
SplitLines(Text, LineArray);
Result := BBox(LineArray, Advance);
SetLength(LineArray, 0);
end;
+function TFont.BBox(const Text: UTF8String; Advance: boolean): TBoundsDbl;
+begin
+ Result := BBox(UTF8Decode(Text), Advance);
+end;
+
function TFont.BBox(const Text: WideString; Advance: boolean): TBoundsDbl;
begin
- Result := BBox(UTF8Encode(Text), Advance);
+ Result := BBox(WideStringToUCS4String(Text), Advance);
end;
-procedure TFont.Print(const Text: TWideStringArray);
+procedure TFont.Print(const Text: TUCS4StringArray);
var
LineIndex: integer;
begin
@@ -912,21 +942,26 @@ begin
glPopAttrib();
end;
-procedure TFont.Print(const Text: UTF8String);
+procedure TFont.Print(const Text: UCS4String);
var
- LineArray: TWideStringArray;
+ LineArray: TUCS4StringArray;
begin
SplitLines(Text, LineArray);
Print(LineArray);
SetLength(LineArray, 0);
end;
+procedure TFont.Print(const Text: UTF8String);
+begin
+ Print(UTF8Decode(Text));
+end;
+
procedure TFont.Print(const Text: WideString);
begin
- Print(UTF8Encode(Text));
+ Print(WideStringToUCS4String(Text));
end;
-procedure TFont.DrawUnderline(const Text: WideString);
+procedure TFont.DrawUnderline(const Text: UCS4String);
var
UnderlineY1, UnderlineY2: single;
Bounds: TBoundsDbl;
@@ -1194,7 +1229,7 @@ begin
glScalef(MipmapScale, MipmapScale, 0);
end;
-procedure TScalableFont.Print(const Text: TWideStringArray);
+procedure TScalableFont.Print(const Text: TUCS4StringArray);
begin
glPushMatrix();
@@ -1210,12 +1245,12 @@ begin
glPopMatrix();
end;
-procedure TScalableFont.Render(const Text: WideString);
+procedure TScalableFont.Render(const Text: UCS4String);
begin
Assert(false, 'Unused TScalableFont.Render() was called');
end;
-function TScalableFont.BBox(const Text: TWideStringArray; Advance: boolean): TBoundsDbl;
+function TScalableFont.BBox(const Text: TUCS4StringArray; Advance: boolean): TBoundsDbl;
begin
Result := fBaseFont.BBox(Text, Advance);
Result.Left := Result.Left * fScale * fAspect;
@@ -1346,7 +1381,7 @@ begin
inherited;
end;
-function TCachedFont.GetGlyph(ch: WideChar): TGlyph;
+function TCachedFont.GetGlyph(ch: UCS4Char): TGlyph;
begin
Result := fCache.GetGlyph(ch);
if (Result = nil) then
@@ -1372,7 +1407,7 @@ constructor TFTFont.Create(
Size: integer; Outset: single;
LoadFlags: FT_Int32);
var
- i: WideChar;
+ ch: UCS4Char;
begin
inherited Create();
@@ -1400,8 +1435,8 @@ begin
ResetIntern();
// pre-cache some commonly used glyphs (' ' - '~')
- for i := #32 to #126 do
- fCache.AddGlyph(i, TFTGlyph.Create(Self, i, Outset, LoadFlags));
+ for ch := 32 to 126 do
+ fCache.AddGlyph(ch, TFTGlyph.Create(Self, ch, Outset, LoadFlags));
end;
destructor TFTFont.Destroy();
@@ -1424,15 +1459,15 @@ begin
ResetIntern();
end;
-function TFTFont.LoadGlyph(ch: WideChar): TGlyph;
+function TFTFont.LoadGlyph(ch: UCS4Char): TGlyph;
begin
Result := TFTGlyph.Create(Self, ch, Outset, fLoadFlags);
end;
-function TFTFont.BBox(const Text: TWideStringArray; Advance: boolean): TBoundsDbl;
+function TFTFont.BBox(const Text: TUCS4StringArray; Advance: boolean): TBoundsDbl;
var
Glyph, PrevGlyph: TFTGlyph;
- TextLine: WideString;
+ TextLine: UCS4String;
LineYOffset: single;
LineIndex, CharIndex: integer;
LineBounds: TBoundsDbl;
@@ -1462,7 +1497,7 @@ begin
LineBounds.Top := 0;
// for each glyph image, compute its bounding box
- for CharIndex := 1 to Length(TextLine) do
+ for CharIndex := 0 to LengthUCS4(TextLine)-1 do
begin
Glyph := TFTGlyph(GetGlyph(TextLine[CharIndex]));
if (Glyph <> nil) then
@@ -1480,9 +1515,9 @@ begin
LineBounds.Left := LineBounds.Right + Glyph.Bounds.Left;
// update right bound
- if (CharIndex < Length(TextLine)) or // not the last character
- (TextLine[CharIndex] = ' ') or // on space char (Bounds.Right = 0)
- Advance then // or in advance mode
+ if (CharIndex < LengthUCS4(TextLine)-1) or // not the last character
+ (TextLine[CharIndex] = Ord(' ')) or // on space char (Bounds.Right = 0)
+ Advance then // or in advance mode
begin
// add advance and glyph spacing
LineBounds.Right := LineBounds.Right + Glyph.Advance.x + GlyphSpacing
@@ -1540,7 +1575,7 @@ begin
Result.Bottom := 0.0;
end;
-procedure TFTFont.Render(const Text: WideString);
+procedure TFTFont.Render(const Text: UCS4String);
var
CharIndex: integer;
Glyph, PrevGlyph: TFTGlyph;
@@ -1550,7 +1585,7 @@ begin
PrevGlyph := nil;
// draw current line
- for CharIndex := 1 to Length(Text) do
+ for CharIndex := 0 to LengthUCS4(Text)-1 do
begin
Glyph := TFTGlyph(GetGlyph(Text[CharIndex]));
if (Assigned(Glyph)) then
@@ -1705,7 +1740,7 @@ begin
ResetIntern();
end;
-procedure TFTOutlineFont.DrawUnderline(const Text: WideString);
+procedure TFTOutlineFont.DrawUnderline(const Text: UCS4String);
var
CurrentColor: TGLColor;
OutlineColor: TGLColor;
@@ -1730,7 +1765,7 @@ begin
glPopMatrix();
end;
-procedure TFTOutlineFont.Render(const Text: WideString);
+procedure TFTOutlineFont.Render(const Text: UCS4String);
var
CurrentColor: TGLColor;
OutlineColor: TGLColor;
@@ -1770,7 +1805,7 @@ begin
fInnerFont.FlushCache(KeepBaseSet);
end;
-function TFTOutlineFont.BBox(const Text: TWideStringArray; Advance: boolean): TBoundsDbl;
+function TFTOutlineFont.BBox(const Text: TUCS4StringArray; Advance: boolean): TBoundsDbl;
begin
Result := fOutlineFont.BBox(Text, Advance);
end;
@@ -2151,13 +2186,14 @@ begin
FT_Done_Glyph(Glyph);
end;
-constructor TFTGlyph.Create(Font: TFTFont; ch: WideChar; Outset: single;
+constructor TFTGlyph.Create(Font: TFTFont; ch: UCS4Char; Outset: single;
LoadFlags: FT_Int32);
begin
inherited Create();
fFont := Font;
fOutset := Outset;
+ fCharCode := ch;
// get the Freetype char-index (use default UNICODE charmap)
fCharIndex := FT_Get_Char_Index(Font.fFace, FT_ULONG(ch));
@@ -2336,7 +2372,7 @@ begin
InsertPos := fHash.Count;
end;
-function TGlyphCache.AddGlyph(ch: WideChar; const Glyph: TGlyph): boolean;
+function TGlyphCache.AddGlyph(ch: UCS4Char; const Glyph: TGlyph): boolean;
var
BaseCode: cardinal;
GlyphCode: integer;
@@ -2346,7 +2382,7 @@ var
begin
Result := false;
- BaseCode := cardinal(ch) shr 8;
+ BaseCode := Ord(ch) shr 8;
GlyphTable := FindGlyphTable(BaseCode, InsertPos);
if (GlyphTable = nil) then
begin
@@ -2356,7 +2392,7 @@ begin
end;
// get glyph table offset
- GlyphCode := cardinal(ch) and $FF;
+ GlyphCode := Ord(ch) and $FF;
// insert glyph into table if not present
if (GlyphTable[GlyphCode] = nil) then
begin
@@ -2365,19 +2401,19 @@ begin
end;
end;
-procedure TGlyphCache.DeleteGlyph(ch: WideChar);
+procedure TGlyphCache.DeleteGlyph(ch: UCS4Char);
var
Table: PGlyphTable;
TableIndex, GlyphIndex: integer;
TableEmpty: boolean;
begin
// find table
- Table := FindGlyphTable(cardinal(ch) shr 8, TableIndex);
+ Table := FindGlyphTable(Ord(ch) shr 8, TableIndex);
if (Table = nil) then
Exit;
// find glyph
- GlyphIndex := cardinal(ch) and $FF;
+ GlyphIndex := Ord(ch) and $FF;
if (Table[GlyphIndex] <> nil) then
begin
// destroy glyph
@@ -2402,19 +2438,19 @@ begin
end;
end;
-function TGlyphCache.GetGlyph(ch: WideChar): TGlyph;
+function TGlyphCache.GetGlyph(ch: UCS4Char): TGlyph;
var
InsertPos: integer;
Table: PGlyphTable;
begin
- Table := FindGlyphTable(cardinal(ch) shr 8, InsertPos);
+ Table := FindGlyphTable(Ord(ch) shr 8, InsertPos);
if (Table = nil) then
Result := nil
else
- Result := Table[cardinal(ch) and $FF];
+ Result := Table[Ord(ch) and $FF];
end;
-function TGlyphCache.HasGlyph(ch: WideChar): boolean;
+function TGlyphCache.HasGlyph(ch: UCS4Char): boolean;
begin
Result := (GetGlyph(ch) <> nil);
end;
@@ -2540,11 +2576,11 @@ begin
Stream.Free;
end;
-function TBitmapFont.BBox(const Text: TWideStringArray; Advance: boolean): TBoundsDbl;
+function TBitmapFont.BBox(const Text: TUCS4StringArray; Advance: boolean): TBoundsDbl;
var
LineIndex, CharIndex: integer;
CharCode: cardinal;
- Line: WideString;
+ Line: UCS4String;
LineWidth: double;
begin
Result.Left := 0;
@@ -2556,7 +2592,7 @@ begin
begin
Line := Text[LineIndex];
LineWidth := 0;
- for CharIndex := 1 to Length(Line) do
+ for CharIndex := 0 to LengthUCS4(Line)-1 do
begin
CharCode := Ord(Line[CharIndex]);
if (CharCode < Length(fWidths)) then
@@ -2567,7 +2603,7 @@ begin
end;
end;
-procedure TBitmapFont.RenderChar(ch: WideChar; var AdvanceX: real);
+procedure TBitmapFont.RenderChar(ch: UCS4Char; var AdvanceX: real);
var
TexX, TexY: real;
TexR, TexB: real;
@@ -2659,20 +2695,20 @@ begin
AdvanceX := AdvanceX + GlyphWidth;
end;
-procedure TBitmapFont.Render(const Text: WideString);
+procedure TBitmapFont.Render(const Text: UCS4String);
var
CharIndex: integer;
AdvanceX: real;
begin
// if there is no text do nothing
- if (Text = '') then
+ if (Text = nil) or (Text[0] = 0) then
Exit;
//Save the current color and alpha (for reflection)
glGetFloatv(GL_CURRENT_COLOR, @fTempColor);
AdvanceX := 0;
- for CharIndex := 1 to Length(Text) do
+ for CharIndex := 0 to LengthUCS4(Text)-1 do
begin
RenderChar(Text[CharIndex], AdvanceX);
end;
diff --git a/unicode/src/base/UUnicodeUtils.pas b/unicode/src/base/UUnicodeUtils.pas
index 01c279bd..26f240a9 100644
--- a/unicode/src/base/UUnicodeUtils.pas
+++ b/unicode/src/base/UUnicodeUtils.pas
@@ -34,11 +34,11 @@ interface
{$I switches.inc}
uses
- SysUtils
{$IFDEF MSWINDOWS}
- , Windows
+ Windows,
{$ENDIF}
- ;
+ SysUtils;
+
(*
* Character classes
*)
@@ -58,6 +58,19 @@ function IsPunctuationChar(ch: UCS4Char): boolean; overload;
function IsControlChar(ch: WideChar): boolean; overload;
function IsControlChar(ch: UCS4Char): boolean; overload;
+{**
+ * Checks if the given string is a valid UTF-8 string.
+ * If an ANSI encoded string (with char codes >= 128) is passed, the
+ * function will most probably return false, as most ANSI strings sequences
+ * are illegal in UTF-8.
+ *}
+function IsUTF8String(const str: AnsiString): boolean;
+
+{**
+ * Checks if the string is composed of ASCII characters.
+ *}
+function IsASCIIString(const str: AnsiString): boolean;
+
{*
* String format conversion
*}
@@ -71,6 +84,12 @@ function UCS4ToUTF8String(ch: UCS4Char): UTF8String; overload;
*}
function LengthUTF8(const str: UTF8String): integer;
+{**
+ * Returns the length of an UCS4String. Note that Length(UCS4String) returns
+ * the length+1 as UCS4Strings are zero-terminated.
+ *}
+function LengthUCS4(const str: UCS4String): integer;
+
function UTF8CompareStr(const S1, S2: UTF8String): integer;
function UTF8CompareText(const S1, S2: UTF8String): integer;
@@ -93,12 +112,19 @@ function UCS4UpperCase(ch: UCS4Char): UCS4Char; overload;
function UCS4UpperCase(const str: UCS4String): UCS4String; overload;
{**
- *
+ * Converts a UCS4Char to an UCS4String.
+ * Note that UCS4Strings are zero-terminated dynamic arrays.
*}
function UCS4CharToString(ch: UCS4Char): UCS4String;
-(*
+{**
+ * Copies a segment of str starting with Index with Count characters.
+ * Note: Do not use Copy() to copy UCS4Strings as the result will not contain
+ * a trailing #0 character and hence is invalid.
+ *}
+function UCS4Copy(const str: UCS4String; Index: Integer = 0; Count: Integer = -1): UCS4String;
+(*
* Converts a WideString to its upper-case representation.
* Wrapper for WideUpperCase. Needed because some plattforms have problems with
* unicode support.
@@ -199,6 +225,78 @@ begin
Result := IsControlChar(WideChar(Ord(ch)));
end;
+
+function IsUTF8String(const str: AnsiString): boolean;
+
+ // find the most significant zero bit (Result: [7..-1])
+ function FindZeroMSB(b: byte): integer;
+ var
+ Mask: byte;
+ begin
+ Mask := $80;
+ Result := 7;
+ while (b and Mask <> 0) do
+ begin
+ Mask := Mask shr 1;
+ Dec(Result);
+ end;
+ end;
+
+var
+ I: integer;
+ ZeroBit: integer;
+ SeqCount: integer; // number of trailing bytes to follow
+begin
+ Result := false;
+ SeqCount := 0;
+
+ for I := 1 to Length(str) do
+ begin
+ if (str[I] >= #128) then
+ begin
+ ZeroBit := FindZeroMSB(Ord(str[I]));
+ // trailing byte expected
+ if (SeqCount > 0) then
+ begin
+ // check if trailing byte has pattern 10xxxxxx
+ if (ZeroBit <> 6) then
+ Exit;
+ Dec(SeqCount);
+ end
+ else // leading byte expected
+ begin
+ // check if pattern is one of 110xxxxx/1110xxxx/11110xxx
+ if (ZeroBit > 5) or (ZeroBit < 3) then
+ Exit;
+ // calculate number of trailing bytes (1, 2 or 3)
+ SeqCount := 6 - ZeroBit;
+ end;
+ end;
+ end;
+
+ // trailing bytes missing?
+ if (SeqCount > 0) then
+ Exit;
+
+ Result := true;
+end;
+
+function IsASCIIString(const str: AnsiString): boolean;
+var
+ I: integer;
+begin
+ for I := 1 to Length(str) do
+ begin
+ if (str[I] >= #128) then
+ begin
+ Result := false;
+ Exit;
+ end;
+ end;
+ Result := true;
+end;
+
+
function UTF8ToUCS4String(const str: UTF8String): UCS4String;
begin
Result := WideStringToUCS4String(UTF8Decode(str));
@@ -219,6 +317,11 @@ begin
Result := Length(UTF8ToUCS4String(str));
end;
+function LengthUCS4(const str: UCS4String): integer;
+begin
+ Result := High(str);
+end;
+
function UTF8CompareStr(const S1, S2: UTF8String): integer;
begin
// FIXME
@@ -284,6 +387,26 @@ begin
Result[1] := 0;
end;
+function UCS4Copy(const str: UCS4String; Index: Integer; Count: Integer): UCS4String;
+var
+ I: integer;
+ MaxCount: integer;
+begin
+ // calculate max. copy count
+ MaxCount := LengthUCS4(str)-Index;
+ if (MaxCount < 0) then
+ MaxCount := 0;
+ // adjust copy count
+ if (Count > MaxCount) or (Count < 0) then
+ Count := MaxCount;
+
+ // copy (and add zero terminator)
+ SetLength(Result, Count + 1);
+ for I := 0 to Count-1 do
+ Result[I] := str[Index+I];
+ Result[Count] := 0;
+end;
+
function WideStringUpperCase(ch: WideChar): WideString;
begin
// If WideChar #0 is converted to a WideString in Delphi, a string with