From af8fa664f71276fb857360531f04b1c7fb101d22 Mon Sep 17 00:00:00 2001 From: tobigun Date: Sat, 14 Mar 2009 22:10:00 +0000 Subject: - font-engine uses UCS4 internally - more UTf-8 and UCS4 routines in UnicodeUtils git-svn-id: svn://svn.code.sf.net/p/ultrastardx/svn/branches/experimental@1636 b956fd51-792f-4845-bead-9b4dfca2ff2c --- unicode/src/base/UFont.pas | 206 ++++++++++++++++++++++--------------- unicode/src/base/UUnicodeUtils.pas | 133 +++++++++++++++++++++++- 2 files changed, 249 insertions(+), 90 deletions(-) diff --git a/unicode/src/base/UFont.pas b/unicode/src/base/UFont.pas index 3d6e9be3..346f4a07 100644 --- a/unicode/src/base/UFont.pas +++ b/unicode/src/base/UFont.pas @@ -47,6 +47,7 @@ uses glext, glu, sdl, + UUnicodeUtils, {$IFDEF BITMAP_FONT} UTexture, {$ENDIF} @@ -60,7 +61,7 @@ type TGLubyteArray = array[0 .. (MaxInt div SizeOf(GLubyte))-1] of GLubyte; TGLubyteDynArray = array of GLubyte; - TWideStringArray = array of WideString; + TUCS4StringArray = array of UCS4String; TGLColor = packed record case byte of @@ -127,33 +128,33 @@ type {** * Splits lines in Text seperated by newline (char-code #13). * @param Text UTF-8 encoded string - * @param Lines splitted WideString lines + * @param Lines splitted UCS4String lines *} - procedure SplitLines(const Text: UTF8String; var Lines: TWideStringArray); + procedure SplitLines(const Text: UCS4String; var Lines: TUCS4StringArray); {** - * Print an array of WideStrings. Each array-item is a line of text. + * Print an array of UCS4Strings. Each array-item is a line of text. * Lines of text are seperated by the line-spacing. * This is the base function for all text drawing. *} - procedure Print(const Text: TWideStringArray); overload; virtual; + procedure Print(const Text: TUCS4StringArray); overload; virtual; {** * Draws an underline. *} - procedure DrawUnderline(const Text: WideString); virtual; + procedure DrawUnderline(const Text: UCS4String); virtual; {** * Renders (one) line of text. *} - procedure Render(const Text: WideString); virtual; abstract; + procedure Render(const Text: UCS4String); virtual; abstract; {** * Returns the bounds of text-lines contained in Text. * @param(Advance if true the right bound is set to the advance instead * of the minimal right bound.) *} - function BBox(const Text: TWideStringArray; Advance: boolean): TBoundsDbl; overload; virtual; abstract; + function BBox(const Text: TUCS4StringArray; Advance: boolean): TBoundsDbl; overload; virtual; abstract; {** * Resets all user settings to default values. @@ -188,6 +189,8 @@ type {** * Prints a text. *} + procedure Print(const Text: UCS4String); overload; + {** UTF-16 version of @link(Print) } procedure Print(const Text: WideString); overload; {** UTF-8 version of @link(Print) } procedure Print(const Text: UTF8String); overload; @@ -203,6 +206,8 @@ type * bigger than the text's width as it additionally contains the advance * and glyph-spacing of the last character. *} + function BBox(const Text: UCS4String; Advance: boolean = true): TBoundsDbl; overload; + {** UTF-16 version of @link(BBox) } function BBox(const Text: WideString; Advance: boolean = true): TBoundsDbl; overload; {** UTF-8 version of @link(BBox) } function BBox(const Text: UTF8String; Advance: boolean = true): TBoundsDbl; overload; @@ -249,9 +254,9 @@ type /// Mipmap fonts (size[level+1] = size[level]/2) fMipmapFonts: array[0..cMaxMipmapLevel] of TFont; - procedure Render(const Text: WideString); override; - procedure Print(const Text: TWideStringArray); override; - function BBox(const Text: TWideStringArray; Advance: boolean): TBoundsDbl; override; + procedure Render(const Text: UCS4String); override; + procedure Print(const Text: TUCS4StringArray); override; + function BBox(const Text: TUCS4StringArray; Advance: boolean): TBoundsDbl; override; {** * Callback called for creation of each mipmap font. @@ -322,7 +327,7 @@ type {** * Table for storage of max. 256 glyphs. - * Used for the second cache level. Indexed by the LSB of the WideChar + * Used for the second cache level. Indexed by the LSB of the UCS4Char * char-code. *} PGlyphTable = ^TGlyphTable; @@ -332,7 +337,7 @@ type * Cache for glyphs of a single font. * The cached glyphs are stored inside a hash-list. * Hashing is performed in two steps: - * 1. the least significant byte (LSB) of the WideChar character code + * 1. the least significant byte (LSB) of the UCS4Char character code * is removed (shr 8) and the result (we call it BaseCode here) looked up in * the hash-list. * 2. Each entry of the hash-list contains a table with max. 256 entries. @@ -359,22 +364,22 @@ type * Add glyph Glyph with char-code ch to the cache. * @returns @true on success, @false otherwise *} - function AddGlyph(ch: WideChar; const Glyph: TGlyph): boolean; + function AddGlyph(ch: UCS4Char; const Glyph: TGlyph): boolean; {** * Removes the glyph with char-code ch from the cache. *} - procedure DeleteGlyph(ch: WideChar); + procedure DeleteGlyph(ch: UCS4Char); {** * Removes the glyph with char-code ch from the cache. *} - function GetGlyph(ch: WideChar): TGlyph; + function GetGlyph(ch: UCS4Char): TGlyph; {** * Checks if a glyph with char-code ch is cached. *} - function HasGlyph(ch: WideChar): boolean; + function HasGlyph(ch: UCS4Char): boolean; {** * Remove and free all cached glyphs. If KeepBaseSet is set to @@ -408,13 +413,13 @@ type * Retrieves a cached glyph with char-code ch from cache. * If the glyph is not already cached, it is loaded with LoadGlyph(). *} - function GetGlyph(ch: WideChar): TGlyph; + function GetGlyph(ch: UCS4Char): TGlyph; {** * Callback to create (load) a glyph with char-code ch. * Implemented by subclasses. *} - function LoadGlyph(ch: WideChar): TGlyph; virtual; abstract; + function LoadGlyph(ch: UCS4Char): TGlyph; virtual; abstract; public constructor Create(); @@ -436,6 +441,7 @@ type *} TFTGlyph = class(TGlyph) private + fCharCode: UCS4Char; //**< Char code fCharIndex: FT_UInt; //**< Freetype specific char-index (<> char-code) fDisplayList: GLuint; //**< Display-list ID fTexture: GLuint; //**< Texture ID @@ -477,7 +483,7 @@ type * Creates a glyph with char-code ch from font Font. * @param LoadFlags flags passed to FT_Load_Glyph() *} - constructor Create(Font: TFTFont; ch: WideChar; Outset: single; + constructor Create(Font: TFTFont; ch: UCS4Char; Outset: single; LoadFlags: FT_Int32); destructor Destroy(); override; @@ -507,10 +513,10 @@ type fUseDisplayLists: boolean; //**< true: use display-lists, false: direct drawing {** @seealso TCachedFont.LoadGlyph } - function LoadGlyph(ch: WideChar): TGlyph; override; + function LoadGlyph(ch: UCS4Char): TGlyph; override; - procedure Render(const Text: WideString); override; - function BBox(const Text: TWideStringArray; Advance: boolean): TBoundsDbl; override; + procedure Render(const Text: UCS4String); override; + function BBox(const Text: TUCS4StringArray; Advance: boolean): TBoundsDbl; override; function GetHeight(): single; override; function GetAscender(): single; override; @@ -585,9 +591,9 @@ type procedure ResetIntern(); protected - procedure DrawUnderline(const Text: WideString); override; - procedure Render(const Text: WideString); override; - function BBox(const Text: TWideStringArray; Advance: boolean): TBoundsDbl; override; + procedure DrawUnderline(const Text: UCS4String); override; + procedure Render(const Text: UCS4String); override; + function BBox(const Text: TUCS4StringArray; Advance: boolean): TBoundsDbl; override; function GetHeight(): single; override; function GetAscender(): single; override; @@ -672,7 +678,7 @@ type procedure ResetIntern(); - procedure RenderChar(ch: WideChar; var AdvanceX: real); + procedure RenderChar(ch: UCS4Char; var AdvanceX: real); {** * Load font widths from an info file. @@ -682,8 +688,8 @@ type procedure LoadFontInfo(const InfoFile: AnsiString); protected - procedure Render(const Text: WideString); override; - function BBox(const Text: TWideStringArray; Advance: boolean): TBoundsDbl; override; + procedure Render(const Text: UCS4String); override; + function BBox(const Text: TUCS4StringArray; Advance: boolean): TBoundsDbl; override; function GetHeight(): single; override; function GetAscender(): single; override; @@ -801,37 +807,61 @@ begin ResetIntern(); end; -procedure TFont.SplitLines(const Text: UTF8String; var Lines: TWideStringArray); +procedure TFont.SplitLines(const Text: UCS4String; var Lines: TUCS4StringArray); var - LineList: TStringList; - LineIndex: integer; + CharIndex: integer; + LineStart: integer; + LineLength: integer; + EOT: boolean; // End-Of-Text begin - // split lines on newline (there is no WideString version of ExtractStrings) - LineList := TStringList.Create(); - ExtractStrings([#13], [], PChar(Text), LineList); + // split lines on newline (there is no UCS4String version of ExtractStrings) + SetLength(Lines, 0); + EOT := false; + LineStart := 0; + + for CharIndex := 0 to High(Text) do + begin + // check for end of text (UCS4Strings are zero-terminated) + if (CharIndex = High(Text)) then + EOT := true; + + // check for newline (carriage return (#13)) or end of text + if (Text[CharIndex] = 13) or EOT then + begin + LineLength := CharIndex - LineStart; + // check if last character was a newline + if (EOT and (LineLength = 0)) then + Break; + + // copy line (even if LineLength is 0) + SetLength(Lines, Length(Lines)+1); + Lines[High(Lines)] := UCS4Copy(Text, LineStart, LineLength); - // create an array of WideStrins from the UTF-8 string-list - SetLength(Lines, LineList.Count); - for LineIndex := 0 to LineList.Count-1 do - Lines[LineIndex] := UTF8Decode(LineList[LineIndex]); - LineList.Free(); + LineStart := CharIndex+1; + end; + end; end; -function TFont.BBox(const Text: UTF8String; Advance: boolean): TBoundsDbl; +function TFont.BBox(const Text: UCS4String; Advance: boolean): TBoundsDbl; var - LineArray: TWideStringArray; + LineArray: TUCS4StringArray; begin SplitLines(Text, LineArray); Result := BBox(LineArray, Advance); SetLength(LineArray, 0); end; +function TFont.BBox(const Text: UTF8String; Advance: boolean): TBoundsDbl; +begin + Result := BBox(UTF8Decode(Text), Advance); +end; + function TFont.BBox(const Text: WideString; Advance: boolean): TBoundsDbl; begin - Result := BBox(UTF8Encode(Text), Advance); + Result := BBox(WideStringToUCS4String(Text), Advance); end; -procedure TFont.Print(const Text: TWideStringArray); +procedure TFont.Print(const Text: TUCS4StringArray); var LineIndex: integer; begin @@ -912,21 +942,26 @@ begin glPopAttrib(); end; -procedure TFont.Print(const Text: UTF8String); +procedure TFont.Print(const Text: UCS4String); var - LineArray: TWideStringArray; + LineArray: TUCS4StringArray; begin SplitLines(Text, LineArray); Print(LineArray); SetLength(LineArray, 0); end; +procedure TFont.Print(const Text: UTF8String); +begin + Print(UTF8Decode(Text)); +end; + procedure TFont.Print(const Text: WideString); begin - Print(UTF8Encode(Text)); + Print(WideStringToUCS4String(Text)); end; -procedure TFont.DrawUnderline(const Text: WideString); +procedure TFont.DrawUnderline(const Text: UCS4String); var UnderlineY1, UnderlineY2: single; Bounds: TBoundsDbl; @@ -1194,7 +1229,7 @@ begin glScalef(MipmapScale, MipmapScale, 0); end; -procedure TScalableFont.Print(const Text: TWideStringArray); +procedure TScalableFont.Print(const Text: TUCS4StringArray); begin glPushMatrix(); @@ -1210,12 +1245,12 @@ begin glPopMatrix(); end; -procedure TScalableFont.Render(const Text: WideString); +procedure TScalableFont.Render(const Text: UCS4String); begin Assert(false, 'Unused TScalableFont.Render() was called'); end; -function TScalableFont.BBox(const Text: TWideStringArray; Advance: boolean): TBoundsDbl; +function TScalableFont.BBox(const Text: TUCS4StringArray; Advance: boolean): TBoundsDbl; begin Result := fBaseFont.BBox(Text, Advance); Result.Left := Result.Left * fScale * fAspect; @@ -1346,7 +1381,7 @@ begin inherited; end; -function TCachedFont.GetGlyph(ch: WideChar): TGlyph; +function TCachedFont.GetGlyph(ch: UCS4Char): TGlyph; begin Result := fCache.GetGlyph(ch); if (Result = nil) then @@ -1372,7 +1407,7 @@ constructor TFTFont.Create( Size: integer; Outset: single; LoadFlags: FT_Int32); var - i: WideChar; + ch: UCS4Char; begin inherited Create(); @@ -1400,8 +1435,8 @@ begin ResetIntern(); // pre-cache some commonly used glyphs (' ' - '~') - for i := #32 to #126 do - fCache.AddGlyph(i, TFTGlyph.Create(Self, i, Outset, LoadFlags)); + for ch := 32 to 126 do + fCache.AddGlyph(ch, TFTGlyph.Create(Self, ch, Outset, LoadFlags)); end; destructor TFTFont.Destroy(); @@ -1424,15 +1459,15 @@ begin ResetIntern(); end; -function TFTFont.LoadGlyph(ch: WideChar): TGlyph; +function TFTFont.LoadGlyph(ch: UCS4Char): TGlyph; begin Result := TFTGlyph.Create(Self, ch, Outset, fLoadFlags); end; -function TFTFont.BBox(const Text: TWideStringArray; Advance: boolean): TBoundsDbl; +function TFTFont.BBox(const Text: TUCS4StringArray; Advance: boolean): TBoundsDbl; var Glyph, PrevGlyph: TFTGlyph; - TextLine: WideString; + TextLine: UCS4String; LineYOffset: single; LineIndex, CharIndex: integer; LineBounds: TBoundsDbl; @@ -1462,7 +1497,7 @@ begin LineBounds.Top := 0; // for each glyph image, compute its bounding box - for CharIndex := 1 to Length(TextLine) do + for CharIndex := 0 to LengthUCS4(TextLine)-1 do begin Glyph := TFTGlyph(GetGlyph(TextLine[CharIndex])); if (Glyph <> nil) then @@ -1480,9 +1515,9 @@ begin LineBounds.Left := LineBounds.Right + Glyph.Bounds.Left; // update right bound - if (CharIndex < Length(TextLine)) or // not the last character - (TextLine[CharIndex] = ' ') or // on space char (Bounds.Right = 0) - Advance then // or in advance mode + if (CharIndex < LengthUCS4(TextLine)-1) or // not the last character + (TextLine[CharIndex] = Ord(' ')) or // on space char (Bounds.Right = 0) + Advance then // or in advance mode begin // add advance and glyph spacing LineBounds.Right := LineBounds.Right + Glyph.Advance.x + GlyphSpacing @@ -1540,7 +1575,7 @@ begin Result.Bottom := 0.0; end; -procedure TFTFont.Render(const Text: WideString); +procedure TFTFont.Render(const Text: UCS4String); var CharIndex: integer; Glyph, PrevGlyph: TFTGlyph; @@ -1550,7 +1585,7 @@ begin PrevGlyph := nil; // draw current line - for CharIndex := 1 to Length(Text) do + for CharIndex := 0 to LengthUCS4(Text)-1 do begin Glyph := TFTGlyph(GetGlyph(Text[CharIndex])); if (Assigned(Glyph)) then @@ -1705,7 +1740,7 @@ begin ResetIntern(); end; -procedure TFTOutlineFont.DrawUnderline(const Text: WideString); +procedure TFTOutlineFont.DrawUnderline(const Text: UCS4String); var CurrentColor: TGLColor; OutlineColor: TGLColor; @@ -1730,7 +1765,7 @@ begin glPopMatrix(); end; -procedure TFTOutlineFont.Render(const Text: WideString); +procedure TFTOutlineFont.Render(const Text: UCS4String); var CurrentColor: TGLColor; OutlineColor: TGLColor; @@ -1770,7 +1805,7 @@ begin fInnerFont.FlushCache(KeepBaseSet); end; -function TFTOutlineFont.BBox(const Text: TWideStringArray; Advance: boolean): TBoundsDbl; +function TFTOutlineFont.BBox(const Text: TUCS4StringArray; Advance: boolean): TBoundsDbl; begin Result := fOutlineFont.BBox(Text, Advance); end; @@ -2151,13 +2186,14 @@ begin FT_Done_Glyph(Glyph); end; -constructor TFTGlyph.Create(Font: TFTFont; ch: WideChar; Outset: single; +constructor TFTGlyph.Create(Font: TFTFont; ch: UCS4Char; Outset: single; LoadFlags: FT_Int32); begin inherited Create(); fFont := Font; fOutset := Outset; + fCharCode := ch; // get the Freetype char-index (use default UNICODE charmap) fCharIndex := FT_Get_Char_Index(Font.fFace, FT_ULONG(ch)); @@ -2336,7 +2372,7 @@ begin InsertPos := fHash.Count; end; -function TGlyphCache.AddGlyph(ch: WideChar; const Glyph: TGlyph): boolean; +function TGlyphCache.AddGlyph(ch: UCS4Char; const Glyph: TGlyph): boolean; var BaseCode: cardinal; GlyphCode: integer; @@ -2346,7 +2382,7 @@ var begin Result := false; - BaseCode := cardinal(ch) shr 8; + BaseCode := Ord(ch) shr 8; GlyphTable := FindGlyphTable(BaseCode, InsertPos); if (GlyphTable = nil) then begin @@ -2356,7 +2392,7 @@ begin end; // get glyph table offset - GlyphCode := cardinal(ch) and $FF; + GlyphCode := Ord(ch) and $FF; // insert glyph into table if not present if (GlyphTable[GlyphCode] = nil) then begin @@ -2365,19 +2401,19 @@ begin end; end; -procedure TGlyphCache.DeleteGlyph(ch: WideChar); +procedure TGlyphCache.DeleteGlyph(ch: UCS4Char); var Table: PGlyphTable; TableIndex, GlyphIndex: integer; TableEmpty: boolean; begin // find table - Table := FindGlyphTable(cardinal(ch) shr 8, TableIndex); + Table := FindGlyphTable(Ord(ch) shr 8, TableIndex); if (Table = nil) then Exit; // find glyph - GlyphIndex := cardinal(ch) and $FF; + GlyphIndex := Ord(ch) and $FF; if (Table[GlyphIndex] <> nil) then begin // destroy glyph @@ -2402,19 +2438,19 @@ begin end; end; -function TGlyphCache.GetGlyph(ch: WideChar): TGlyph; +function TGlyphCache.GetGlyph(ch: UCS4Char): TGlyph; var InsertPos: integer; Table: PGlyphTable; begin - Table := FindGlyphTable(cardinal(ch) shr 8, InsertPos); + Table := FindGlyphTable(Ord(ch) shr 8, InsertPos); if (Table = nil) then Result := nil else - Result := Table[cardinal(ch) and $FF]; + Result := Table[Ord(ch) and $FF]; end; -function TGlyphCache.HasGlyph(ch: WideChar): boolean; +function TGlyphCache.HasGlyph(ch: UCS4Char): boolean; begin Result := (GetGlyph(ch) <> nil); end; @@ -2540,11 +2576,11 @@ begin Stream.Free; end; -function TBitmapFont.BBox(const Text: TWideStringArray; Advance: boolean): TBoundsDbl; +function TBitmapFont.BBox(const Text: TUCS4StringArray; Advance: boolean): TBoundsDbl; var LineIndex, CharIndex: integer; CharCode: cardinal; - Line: WideString; + Line: UCS4String; LineWidth: double; begin Result.Left := 0; @@ -2556,7 +2592,7 @@ begin begin Line := Text[LineIndex]; LineWidth := 0; - for CharIndex := 1 to Length(Line) do + for CharIndex := 0 to LengthUCS4(Line)-1 do begin CharCode := Ord(Line[CharIndex]); if (CharCode < Length(fWidths)) then @@ -2567,7 +2603,7 @@ begin end; end; -procedure TBitmapFont.RenderChar(ch: WideChar; var AdvanceX: real); +procedure TBitmapFont.RenderChar(ch: UCS4Char; var AdvanceX: real); var TexX, TexY: real; TexR, TexB: real; @@ -2659,20 +2695,20 @@ begin AdvanceX := AdvanceX + GlyphWidth; end; -procedure TBitmapFont.Render(const Text: WideString); +procedure TBitmapFont.Render(const Text: UCS4String); var CharIndex: integer; AdvanceX: real; begin // if there is no text do nothing - if (Text = '') then + if (Text = nil) or (Text[0] = 0) then Exit; //Save the current color and alpha (for reflection) glGetFloatv(GL_CURRENT_COLOR, @fTempColor); AdvanceX := 0; - for CharIndex := 1 to Length(Text) do + for CharIndex := 0 to LengthUCS4(Text)-1 do begin RenderChar(Text[CharIndex], AdvanceX); end; diff --git a/unicode/src/base/UUnicodeUtils.pas b/unicode/src/base/UUnicodeUtils.pas index 01c279bd..26f240a9 100644 --- a/unicode/src/base/UUnicodeUtils.pas +++ b/unicode/src/base/UUnicodeUtils.pas @@ -34,11 +34,11 @@ interface {$I switches.inc} uses - SysUtils {$IFDEF MSWINDOWS} - , Windows + Windows, {$ENDIF} - ; + SysUtils; + (* * Character classes *) @@ -58,6 +58,19 @@ function IsPunctuationChar(ch: UCS4Char): boolean; overload; function IsControlChar(ch: WideChar): boolean; overload; function IsControlChar(ch: UCS4Char): boolean; overload; +{** + * Checks if the given string is a valid UTF-8 string. + * If an ANSI encoded string (with char codes >= 128) is passed, the + * function will most probably return false, as most ANSI strings sequences + * are illegal in UTF-8. + *} +function IsUTF8String(const str: AnsiString): boolean; + +{** + * Checks if the string is composed of ASCII characters. + *} +function IsASCIIString(const str: AnsiString): boolean; + {* * String format conversion *} @@ -71,6 +84,12 @@ function UCS4ToUTF8String(ch: UCS4Char): UTF8String; overload; *} function LengthUTF8(const str: UTF8String): integer; +{** + * Returns the length of an UCS4String. Note that Length(UCS4String) returns + * the length+1 as UCS4Strings are zero-terminated. + *} +function LengthUCS4(const str: UCS4String): integer; + function UTF8CompareStr(const S1, S2: UTF8String): integer; function UTF8CompareText(const S1, S2: UTF8String): integer; @@ -93,12 +112,19 @@ function UCS4UpperCase(ch: UCS4Char): UCS4Char; overload; function UCS4UpperCase(const str: UCS4String): UCS4String; overload; {** - * + * Converts a UCS4Char to an UCS4String. + * Note that UCS4Strings are zero-terminated dynamic arrays. *} function UCS4CharToString(ch: UCS4Char): UCS4String; -(* +{** + * Copies a segment of str starting with Index with Count characters. + * Note: Do not use Copy() to copy UCS4Strings as the result will not contain + * a trailing #0 character and hence is invalid. + *} +function UCS4Copy(const str: UCS4String; Index: Integer = 0; Count: Integer = -1): UCS4String; +(* * Converts a WideString to its upper-case representation. * Wrapper for WideUpperCase. Needed because some plattforms have problems with * unicode support. @@ -199,6 +225,78 @@ begin Result := IsControlChar(WideChar(Ord(ch))); end; + +function IsUTF8String(const str: AnsiString): boolean; + + // find the most significant zero bit (Result: [7..-1]) + function FindZeroMSB(b: byte): integer; + var + Mask: byte; + begin + Mask := $80; + Result := 7; + while (b and Mask <> 0) do + begin + Mask := Mask shr 1; + Dec(Result); + end; + end; + +var + I: integer; + ZeroBit: integer; + SeqCount: integer; // number of trailing bytes to follow +begin + Result := false; + SeqCount := 0; + + for I := 1 to Length(str) do + begin + if (str[I] >= #128) then + begin + ZeroBit := FindZeroMSB(Ord(str[I])); + // trailing byte expected + if (SeqCount > 0) then + begin + // check if trailing byte has pattern 10xxxxxx + if (ZeroBit <> 6) then + Exit; + Dec(SeqCount); + end + else // leading byte expected + begin + // check if pattern is one of 110xxxxx/1110xxxx/11110xxx + if (ZeroBit > 5) or (ZeroBit < 3) then + Exit; + // calculate number of trailing bytes (1, 2 or 3) + SeqCount := 6 - ZeroBit; + end; + end; + end; + + // trailing bytes missing? + if (SeqCount > 0) then + Exit; + + Result := true; +end; + +function IsASCIIString(const str: AnsiString): boolean; +var + I: integer; +begin + for I := 1 to Length(str) do + begin + if (str[I] >= #128) then + begin + Result := false; + Exit; + end; + end; + Result := true; +end; + + function UTF8ToUCS4String(const str: UTF8String): UCS4String; begin Result := WideStringToUCS4String(UTF8Decode(str)); @@ -219,6 +317,11 @@ begin Result := Length(UTF8ToUCS4String(str)); end; +function LengthUCS4(const str: UCS4String): integer; +begin + Result := High(str); +end; + function UTF8CompareStr(const S1, S2: UTF8String): integer; begin // FIXME @@ -284,6 +387,26 @@ begin Result[1] := 0; end; +function UCS4Copy(const str: UCS4String; Index: Integer; Count: Integer): UCS4String; +var + I: integer; + MaxCount: integer; +begin + // calculate max. copy count + MaxCount := LengthUCS4(str)-Index; + if (MaxCount < 0) then + MaxCount := 0; + // adjust copy count + if (Count > MaxCount) or (Count < 0) then + Count := MaxCount; + + // copy (and add zero terminator) + SetLength(Result, Count + 1); + for I := 0 to Count-1 do + Result[I] := str[Index+I]; + Result[Count] := 0; +end; + function WideStringUpperCase(ch: WideChar): WideString; begin // If WideChar #0 is converted to a WideString in Delphi, a string with -- cgit v1.2.3