From 917901e8e33438c425aef50a0a7417f32d77b760 Mon Sep 17 00:00:00 2001 From: s_alexander Date: Mon, 9 Nov 2009 00:27:55 +0000 Subject: merged unicode branch (r1931) into trunk git-svn-id: svn://svn.code.sf.net/p/ultrastardx/svn/trunk@1939 b956fd51-792f-4845-bead-9b4dfca2ff2c --- src/encoding/CP1250.inc | 236 ++++++++++++++++++++++++++++++++++++++++++++++++ src/encoding/CP1252.inc | 122 +++++++++++++++++++++++++ src/encoding/Locale.inc | 55 +++++++++++ src/encoding/UTF8.inc | 70 ++++++++++++++ 4 files changed, 483 insertions(+) create mode 100644 src/encoding/CP1250.inc create mode 100644 src/encoding/CP1252.inc create mode 100644 src/encoding/Locale.inc create mode 100644 src/encoding/UTF8.inc (limited to 'src/encoding') diff --git a/src/encoding/CP1250.inc b/src/encoding/CP1250.inc new file mode 100644 index 00000000..5628156e --- /dev/null +++ b/src/encoding/CP1250.inc @@ -0,0 +1,236 @@ +{* UltraStar Deluxe - Karaoke Game + * + * UltraStar Deluxe is the legal property of its developers, whose names + * are too numerous to list here. Please refer to the COPYRIGHT + * file distributed with this source distribution. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + * $URL$ + * $Id$ + *} + +{* + * Windows-1250 Central/Eastern Europe + * (used by Ultrastar) + *} + +type + TEncoderCP1250 = class(TSingleByteEncoder) + public + function GetName(): AnsiString; override; + function DecodeChar(InChr: AnsiChar; out OutChr: UCS4Char): boolean; override; + function EncodeChar(InChr: UCS4Char; out OutChr: AnsiChar): boolean; override; + end; + +function TEncoderCP1250.GetName(): AnsiString; +begin + Result := 'CP1250'; +end; + +const + // Positions marked as #0 are invalid. + CP1250Table: array[128..255] of UCS4Char = ( + { $80 } + $20AC, 0, $201A, 0, $201E, $2026, $2020, $2021, + 0, $2030, $0160, $2039, $015A, $0164, $017D, $0179, + { $90 } + 0, $2018, $2019, $201C, $201D, $2022, $2013, $2014, + 0, $2122, $0161, $203A, $015B, $0165, $017E, $017A, + { $A0 } + $00A0, $02C7, $02D8, $0141, $00A4, $0104, $00A6, $00A7, + $00A8, $00A9, $015E, $00AB, $00AC, $00AD, $00AE, $017B, + { $B0 } + $00B0, $00B1, $02DB, $0142, $00B4, $00B5, $00B6, $00B7, + $00B8, $0105, $015F, $00BB, $013D, $02DD, $013E, $017C, + { $C0 } + $0154, $00C1, $00C2, $0102, $00C4, $0139, $0106, $00C7, + $010C, $00C9, $0118, $00CB, $011A, $00CD, $00CE, $010E, + { $D0 } + $0110, $0143, $0147, $00D3, $00D4, $0150, $00D6, $00D7, + $0158, $016E, $00DA, $0170, $00DC, $00DD, $0162, $00DF, + { $E0 } + $0155, $00E1, $00E2, $0103, $00E4, $013A, $0107, $00E7, + $010D, $00E9, $0119, $00EB, $011B, $00ED, $00EE, $010F, + { $F0 } + $0111, $0144, $0148, $00F3, $00F4, $0151, $00F6, $00F7, + $0159, $016F, $00FA, $0171, $00FC, $00FD, $0163, $02D9 + ); + +function TEncoderCP1250.DecodeChar(InChr: AnsiChar; out OutChr: UCS4Char): boolean; +begin + Result := true; + if (InChr < #128) then + OutChr := UCS4Char(Ord(InChr)) // use Ord() to avoid automatic conversion + else + begin + OutChr := CP1250Table[Ord(InChr)]; + if (OutChr = 0) then + begin + Result := false; + OutChr := Ord(ERROR_CHAR); + end; + end; +end; + +function TEncoderCP1250.EncodeChar(InChr: UCS4Char; out OutChr: AnsiChar): boolean; +begin + if (InChr < 128) then + begin + OutChr := AnsiChar(Ord(InChr)); + Result := true; + end + else + begin + case InChr of + $20AC: OutChr := #128; + // invalid: #129 + $201A: OutChr := #130; + // invalid: #131 + $201E: OutChr := #132; + $2026: OutChr := #133; + $2020: OutChr := #134; + $2021: OutChr := #135; + // invalid: #136 + $2030: OutChr := #137; + $0160: OutChr := #138; + $2039: OutChr := #139; + $015A: OutChr := #140; + $0164: OutChr := #141; + $017D: OutChr := #142; + $0179: OutChr := #143; + // invalid: #144 + $2018: OutChr := #145; + $2019: OutChr := #146; + $201C: OutChr := #147; + $201D: OutChr := #148; + $2022: OutChr := #149; + $2013: OutChr := #150; + $2014: OutChr := #151; + // invalid: #152 + $2122: OutChr := #153; + $0161: OutChr := #154; + $203A: OutChr := #155; + $015B: OutChr := #156; + $0165: OutChr := #157; + $017E: OutChr := #158; + $017A: OutChr := #159; + $00A0: OutChr := #160; + $02C7: OutChr := #161; + $02D8: OutChr := #162; + $0141: OutChr := #163; + $00A4: OutChr := #164; + $0104: OutChr := #165; + $00A6: OutChr := #166; + $00A7: OutChr := #167; + $00A8: OutChr := #168; + $00A9: OutChr := #169; + $015E: OutChr := #170; + $00AB: OutChr := #171; + $00AC: OutChr := #172; + $00AD: OutChr := #173; + $00AE: OutChr := #174; + $017B: OutChr := #175; + $00B0: OutChr := #176; + $00B1: OutChr := #177; + $02DB: OutChr := #178; + $0142: OutChr := #179; + $00B4: OutChr := #180; + $00B5: OutChr := #181; + $00B6: OutChr := #182; + $00B7: OutChr := #183; + $00B8: OutChr := #184; + $0105: OutChr := #185; + $015F: OutChr := #186; + $00BB: OutChr := #187; + $013D: OutChr := #188; + $02DD: OutChr := #189; + $013E: OutChr := #190; + $017C: OutChr := #191; + $0154: OutChr := #192; + $00C1: OutChr := #193; + $00C2: OutChr := #194; + $0102: OutChr := #195; + $00C4: OutChr := #196; + $0139: OutChr := #197; + $0106: OutChr := #198; + $00C7: OutChr := #199; + $010C: OutChr := #200; + $00C9: OutChr := #201; + $0118: OutChr := #202; + $00CB: OutChr := #203; + $011A: OutChr := #204; + $00CD: OutChr := #205; + $00CE: OutChr := #206; + $010E: OutChr := #207; + $0110: OutChr := #208; + $0143: OutChr := #209; + $0147: OutChr := #210; + $00D3: OutChr := #211; + $00D4: OutChr := #212; + $0150: OutChr := #213; + $00D6: OutChr := #214; + $00D7: OutChr := #215; + $0158: OutChr := #216; + $016E: OutChr := #217; + $00DA: OutChr := #218; + $0170: OutChr := #219; + $00DC: OutChr := #220; + $00DD: OutChr := #221; + $0162: OutChr := #222; + $00DF: OutChr := #223; + $0155: OutChr := #224; + $00E1: OutChr := #225; + $00E2: OutChr := #226; + $0103: OutChr := #227; + $00E4: OutChr := #228; + $013A: OutChr := #229; + $0107: OutChr := #230; + $00E7: OutChr := #231; + $010D: OutChr := #232; + $00E9: OutChr := #233; + $0119: OutChr := #234; + $00EB: OutChr := #235; + $011B: OutChr := #236; + $00ED: OutChr := #237; + $00EE: OutChr := #238; + $010F: OutChr := #239; + $0111: OutChr := #240; + $0144: OutChr := #241; + $0148: OutChr := #242; + $00F3: OutChr := #243; + $00F4: OutChr := #244; + $0151: OutChr := #245; + $00F6: OutChr := #246; + $00F7: OutChr := #247; + $0159: OutChr := #248; + $016F: OutChr := #249; + $00FA: OutChr := #250; + $0171: OutChr := #251; + $00FC: OutChr := #252; + $00FD: OutChr := #253; + $0163: OutChr := #254; + $02D9: OutChr := #255; + else begin + OutChr := ERROR_CHAR; + Result := false; + Exit; + end; + end; + Result := true; + end; +end; + diff --git a/src/encoding/CP1252.inc b/src/encoding/CP1252.inc new file mode 100644 index 00000000..f7d3f8ea --- /dev/null +++ b/src/encoding/CP1252.inc @@ -0,0 +1,122 @@ +{* UltraStar Deluxe - Karaoke Game + * + * UltraStar Deluxe is the legal property of its developers, whose names + * are too numerous to list here. Please refer to the COPYRIGHT + * file distributed with this source distribution. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + * $URL$ + * $Id$ + *} + +{* + * Windows-1252 Western Europe + * (used by UltraStar Deluxe < 1.1) + *} + +type + TEncoderCP1252 = class(TSingleByteEncoder) + public + function GetName(): AnsiString; override; + function DecodeChar(InChr: AnsiChar; out OutChr: UCS4Char): boolean; override; + function EncodeChar(InChr: UCS4Char; out OutChr: AnsiChar): boolean; override; + end; + +function TEncoderCP1252.GetName(): AnsiString; +begin + Result := 'CP1252'; +end; + +const + // Positions marked as #0 are invalid. + CP1252Table: array[128..159] of UCS4Char = ( + { $80 } + $20AC, 0, $201A, $0192, $201E, $2026, $2020, $2021, + $02C6, $2030, $0160, $2039, $0152, 0, $017D, 0, + { $90 } + 0, $2018, $2019, $201C, $201D, $2022, $2013, $2014, + $02DC, $2122, $0161, $203A, $0153, 0, $017E, $0178 + ); + +function TEncoderCP1252.DecodeChar(InChr: AnsiChar; out OutChr: UCS4Char): boolean; +begin + Result := true; + if (InChr < #128) or (InChr >= #160) then + OutChr := UCS4Char(Ord(InChr)) // use Ord() to avoid automatic conversion + else + begin + OutChr := CP1252Table[Ord(InChr)]; + if (OutChr = 0) then + begin + Result := false; + OutChr := Ord(ERROR_CHAR); + end; + end; +end; + +function TEncoderCP1252.EncodeChar(InChr: UCS4Char; out OutChr: AnsiChar): boolean; +begin + if (InChr < 128) or ((InChr >= 160) and (InChr <= 255)) then + begin + OutChr := AnsiChar(Ord(InChr)); + Result := true; + end + else + begin + case InChr of + $20AC: OutChr := #128; + // invalid: #129 + $201A: OutChr := #130; + $0192: OutChr := #131; + $201E: OutChr := #132; + $2026: OutChr := #133; + $2020: OutChr := #134; + $2021: OutChr := #135; + $02C6: OutChr := #136; + $2030: OutChr := #137; + $0160: OutChr := #138; + $2039: OutChr := #139; + $0152: OutChr := #140; + // invalid: #141 + $017D: OutChr := #142; + // invalid: #143 + // invalid: #144 + $2018: OutChr := #145; + $2019: OutChr := #146; + $201C: OutChr := #147; + $201D: OutChr := #148; + $2022: OutChr := #149; + $2013: OutChr := #150; + $2014: OutChr := #151; + $02DC: OutChr := #152; + $2122: OutChr := #153; + $0161: OutChr := #154; + $203A: OutChr := #155; + $0153: OutChr := #156; + // invalid: #157 + $017E: OutChr := #158; + $0178: OutChr := #159; + else begin + OutChr := ERROR_CHAR; + Result := false; + Exit; + end; + end; + Result := true; + end; +end; + diff --git a/src/encoding/Locale.inc b/src/encoding/Locale.inc new file mode 100644 index 00000000..a3cdcebc --- /dev/null +++ b/src/encoding/Locale.inc @@ -0,0 +1,55 @@ +{* UltraStar Deluxe - Karaoke Game + * + * UltraStar Deluxe is the legal property of its developers, whose names + * are too numerous to list here. Please refer to the COPYRIGHT + * file distributed with this source distribution. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + * $URL$ + * $Id$ + *} + +{* + * Locale + *} + +type + TEncoderLocale = class(TEncoder) + public + function GetName(): AnsiString; override; + function Encode(const InStr: UCS4String; out OutStr: AnsiString): boolean; override; + function Decode(const InStr: AnsiString; out OutStr: UCS4String): boolean; override; + end; + +function TEncoderLocale.GetName(): AnsiString; +begin + Result := 'LOCALE'; +end; + +function TEncoderLocale.Decode(const InStr: AnsiString; out OutStr: UCS4String): boolean; +begin + OutStr := WideStringToUCS4String(InStr); // use implicit conversion + Result := true; +end; + +function TEncoderLocale.Encode(const InStr: UCS4String; out OutStr: AnsiString): boolean; +begin + OutStr := UCS4StringToWideString(InStr); // use implicit conversion + // any way to check for errors? + Result := true; +end; + diff --git a/src/encoding/UTF8.inc b/src/encoding/UTF8.inc new file mode 100644 index 00000000..43eacfbd --- /dev/null +++ b/src/encoding/UTF8.inc @@ -0,0 +1,70 @@ +{* UltraStar Deluxe - Karaoke Game + * + * UltraStar Deluxe is the legal property of its developers, whose names + * are too numerous to list here. Please refer to the COPYRIGHT + * file distributed with this source distribution. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + * $URL$ + * $Id$ + *} + +{* + * UTF-8 + *} + +type + TEncoderUTF8 = class(TEncoder) + public + function GetName(): AnsiString; override; + function Encode(const InStr: UCS4String; out OutStr: AnsiString): boolean; override; + function Decode(const InStr: AnsiString; out OutStr: UCS4String): boolean; override; + end; + +function TEncoderUTF8.GetName(): AnsiString; +begin + Result := 'UTF8'; +end; + +function TEncoderUTF8.Decode(const InStr: AnsiString; out OutStr: UCS4String): boolean; +var + I: integer; + StrPtr: PAnsiChar; +begin + // UTF8Decode() may crash with FPC < 2.2.2 if the input string is not UTF-8 + // encoded. Newer versions do not crash but do not signal errors either. + // So let's implement this stuff again. + Result := true; + SetLength(OutStr, Length(InStr)+1); + I := 0; + StrPtr := PChar(InStr); + while (StrPtr^ <> #0) do + begin + if (not NextCharUTF8(StrPtr, OutStr[I])) then + Result := false;; + Inc(I); + end; + SetLength(OutStr, I+1); + OutStr[High(OutStr)] := 0; +end; + +function TEncoderUTF8.Encode(const InStr: UCS4String; out OutStr: AnsiString): boolean; +begin + OutStr := UCS4ToUTF8String(InStr); + Result := true; +end; + -- cgit v1.2.3 From d589e6221ffcafc077eeefaa60cdc3e33a800558 Mon Sep 17 00:00:00 2001 From: s_alexander Date: Sat, 5 Dec 2009 12:26:00 +0000 Subject: added autodetection of utf8 used w3c regex to match all song lines whether they are utf8 lines and decode it on match as utf8 and as latin1 otherwise git-svn-id: svn://svn.code.sf.net/p/ultrastardx/svn/trunk@1964 b956fd51-792f-4845-bead-9b4dfca2ff2c --- src/encoding/Auto.inc | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 src/encoding/Auto.inc (limited to 'src/encoding') diff --git a/src/encoding/Auto.inc b/src/encoding/Auto.inc new file mode 100644 index 00000000..bf512f95 --- /dev/null +++ b/src/encoding/Auto.inc @@ -0,0 +1,127 @@ +{* UltraStar Deluxe - Karaoke Game + * + * UltraStar Deluxe is the legal property of its developers, whose names + * are too numerous to list here. Please refer to the COPYRIGHT + * file distributed with this source distribution. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + * $URL$ + * $Id$ + *} + +// Auto +// try to match the w3c regex and decode as unicode on match and as fallback if not match +// (copied from http://www.w3.org/International/questions/qa-forms-utf-8.en.php) +// +// m/\A( +// [\x09\x0A\x0D\x20-\x7E] # ASCII +// | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte +// | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs +// | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte +// | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates +// | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 +// | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 +// | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 +// )*\z/x + +type + TEncoderAuto = class(TEncoder) + public + function GetName(): AnsiString; override; + function Encode(const InStr: UCS4String; out OutStr: AnsiString): boolean; override; + function Decode(const InStr: AnsiString; out OutStr: UCS4String): boolean; override; + + constructor Create(const UTF8Encoder, FallbackEncoder: IEncoder); + + private + FallbackEncoder: IEncoder; + UTF8Encoder: IEncoder; + Regex: PPCRE; + RegexExtra: PPCREExtra; + end; + +function PCREGetMem(Size: SizeInt): Pointer; cdecl; +begin + GetMem(Result, Size); +end; + +procedure PCREFreeMem(P: Pointer); cdecl; +begin + FreeMem(P); +end; + +constructor TEncoderAuto.Create(const UTF8Encoder, FallbackEncoder: IEncoder); +var + Error: PChar; + ErrorOffset: Integer; +begin + // NOTICE: Log.LogError() is not possible here because it isn't loaded + inherited Create(); + self.FallbackEncoder := FallbackEncoder; + self.UTF8Encoder := UTF8Encoder; + + // Load and initialize PCRE Library + LoadPCRE(); + SetPCREMallocCallback(PCREGetMem); + SetPCREFreeCallback(PCREFreeMem); + + // compile regex + self.Regex := pcre_compile('\A([\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*\z', 0, @Error, @ErrorOffset, nil); + + if self.Regex = Nil then + begin + writeln('ERROR: UTF8 Regex compilation failed: ', AnsiString(Error), ' at ', ErrorOffset); + end + else + begin + // if compiled successfull, try to get more informations the speed up the matching + self.RegexExtra := pcre_study(self.Regex, 0, @Error); + + if Error <> Nil then + begin + writeln('ERROR: UTF8 Regex study failed: ', AnsiString(Error)); + end; + end; +end; + +function TEncoderAuto.GetName(): AnsiString; +begin + Result := 'Auto'; +end; + +function TEncoderAuto.Decode(const InStr: AnsiString; out OutStr: UCS4String): boolean; +var + RegexResults: Integer; +begin + if (self.Regex <> Nil) then + begin + RegexResults := pcre_exec(Regex, RegexExtra, PChar(InStr), Length(InStr), 0, 0, Nil, 0); + + if RegexResults >= 0 then + begin + Result := UTF8Encoder.Decode(InStr, OutStr); + Exit; + end; + end; + + Result := FallbackEncoder.Decode(InStr, OutStr); +end; + +function TEncoderAuto.Encode(const InStr: UCS4String; out OutStr: AnsiString): boolean; +begin + Result := UTF8Encoder.Encode(InStr, OutStr); +end; -- cgit v1.2.3 From 4c927bd3abb27cb477db47b42c368d356c7002cb Mon Sep 17 00:00:00 2001 From: s_alexander Date: Sat, 5 Dec 2009 12:26:56 +0000 Subject: disable autodetection, if pcre-lib did not load if the pcre-lib could not be load, do not try to compile the regex and so do not execute the regex and allways use the fallback encoding git-svn-id: svn://svn.code.sf.net/p/ultrastardx/svn/trunk@1966 b956fd51-792f-4845-bead-9b4dfca2ff2c --- src/encoding/Auto.inc | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'src/encoding') diff --git a/src/encoding/Auto.inc b/src/encoding/Auto.inc index bf512f95..3d415095 100644 --- a/src/encoding/Auto.inc +++ b/src/encoding/Auto.inc @@ -75,27 +75,28 @@ begin self.UTF8Encoder := UTF8Encoder; // Load and initialize PCRE Library - LoadPCRE(); - SetPCREMallocCallback(PCREGetMem); - SetPCREFreeCallback(PCREFreeMem); - - // compile regex - self.Regex := pcre_compile('\A([\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*\z', 0, @Error, @ErrorOffset, nil); - - if self.Regex = Nil then - begin - writeln('ERROR: UTF8 Regex compilation failed: ', AnsiString(Error), ' at ', ErrorOffset); - end - else + if LoadPCRE() then begin - // if compiled successfull, try to get more informations the speed up the matching - self.RegexExtra := pcre_study(self.Regex, 0, @Error); + // compile regex + self.Regex := pcre_compile('\A([\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*\z', 0, @Error, @ErrorOffset, nil); - if Error <> Nil then + if self.Regex = Nil then begin - writeln('ERROR: UTF8 Regex study failed: ', AnsiString(Error)); + writeln('ERROR: UTF8 Regex compilation failed: ', AnsiString(Error), ' at ', ErrorOffset); + end + else + begin + // if compiled successfull, try to get more informations the speed up the matching + self.RegexExtra := pcre_study(self.Regex, 0, @Error); + + if Error <> Nil then + begin + writeln('ERROR: UTF8 Regex study failed: ', AnsiString(Error)); + end; end; - end; + end + else + writeln('ERROR: pcre not loaded. utf-8 autodetection will not work.'); end; function TEncoderAuto.GetName(): AnsiString; -- cgit v1.2.3 From 35b181b5b78b5ce81409ec8ebe69b47d0e4ec716 Mon Sep 17 00:00:00 2001 From: s_alexander Date: Sat, 5 Dec 2009 14:19:51 +0000 Subject: only use writeln if CONSOLE is defined (*hope* that fixes the exception without pcre3.dll on windows) git-svn-id: svn://svn.code.sf.net/p/ultrastardx/svn/trunk@1969 b956fd51-792f-4845-bead-9b4dfca2ff2c --- src/encoding/Auto.inc | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/encoding') diff --git a/src/encoding/Auto.inc b/src/encoding/Auto.inc index 3d415095..2f7faa0c 100644 --- a/src/encoding/Auto.inc +++ b/src/encoding/Auto.inc @@ -82,7 +82,9 @@ begin if self.Regex = Nil then begin + {$IFDEF CONSOLE} writeln('ERROR: UTF8 Regex compilation failed: ', AnsiString(Error), ' at ', ErrorOffset); + {$ENDIF} end else begin @@ -91,12 +93,18 @@ begin if Error <> Nil then begin + {$IFDEF CONSOLE} writeln('ERROR: UTF8 Regex study failed: ', AnsiString(Error)); + {$ENDIF} end; end; end else + begin + {$IFDEF CONSOLE} writeln('ERROR: pcre not loaded. utf-8 autodetection will not work.'); + {$ENDIF} + end; end; function TEncoderAuto.GetName(): AnsiString; -- cgit v1.2.3 From 78b2bd7d0d408a7f8d679a4e97a0260900211fb6 Mon Sep 17 00:00:00 2001 From: k-m_schindler Date: Thu, 22 Apr 2010 17:50:50 +0000 Subject: use Log.LogError instead of writeln git-svn-id: svn://svn.code.sf.net/p/ultrastardx/svn/trunk@2278 b956fd51-792f-4845-bead-9b4dfca2ff2c --- src/encoding/Auto.inc | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'src/encoding') diff --git a/src/encoding/Auto.inc b/src/encoding/Auto.inc index 2f7faa0c..487e2e42 100644 --- a/src/encoding/Auto.inc +++ b/src/encoding/Auto.inc @@ -82,9 +82,7 @@ begin if self.Regex = Nil then begin - {$IFDEF CONSOLE} - writeln('ERROR: UTF8 Regex compilation failed: ', AnsiString(Error), ' at ', ErrorOffset); - {$ENDIF} + Log.LogError ('UTF8 Regex compilation failed: ' + AnsiString(Error) + ' at ' + IntToStr(ErrorOffset), 'EncoderAuto.Create'); end else begin @@ -93,17 +91,13 @@ begin if Error <> Nil then begin - {$IFDEF CONSOLE} - writeln('ERROR: UTF8 Regex study failed: ', AnsiString(Error)); - {$ENDIF} + Log.LogError ('UTF8 regex study failed: ' + AnsiString(Error), 'EncoderAuto.Create'); end; end; end else begin - {$IFDEF CONSOLE} - writeln('ERROR: pcre not loaded. utf-8 autodetection will not work.'); - {$ENDIF} + Log.LogError ('pcre not loaded. utf-8 autodetection will not work.', 'EncoderAuto.Create'); end; end; -- cgit v1.2.3 From c9a13f8923d1cc8798bcb07a0808855380d083c8 Mon Sep 17 00:00:00 2001 From: tobigun Date: Fri, 23 Apr 2010 12:16:36 +0000 Subject: reverted revision 2278 - Auto.inc: Log.LogError is not possible as ULog is not loaded at this point -> used ConsoleWriteLn instead git-svn-id: svn://svn.code.sf.net/p/ultrastardx/svn/trunk@2279 b956fd51-792f-4845-bead-9b4dfca2ff2c --- src/encoding/Auto.inc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src/encoding') diff --git a/src/encoding/Auto.inc b/src/encoding/Auto.inc index 487e2e42..8c32b5d0 100644 --- a/src/encoding/Auto.inc +++ b/src/encoding/Auto.inc @@ -69,7 +69,6 @@ var Error: PChar; ErrorOffset: Integer; begin - // NOTICE: Log.LogError() is not possible here because it isn't loaded inherited Create(); self.FallbackEncoder := FallbackEncoder; self.UTF8Encoder := UTF8Encoder; @@ -82,7 +81,8 @@ begin if self.Regex = Nil then begin - Log.LogError ('UTF8 Regex compilation failed: ' + AnsiString(Error) + ' at ' + IntToStr(ErrorOffset), 'EncoderAuto.Create'); + // NOTICE: Log.LogError() is not possible here because it isn't loaded + ConsoleWriteLn(Format('ERROR: UTF8 Regex compilation failed: %s at %d', [Error, ErrorOffset])); end else begin @@ -91,13 +91,15 @@ begin if Error <> Nil then begin - Log.LogError ('UTF8 regex study failed: ' + AnsiString(Error), 'EncoderAuto.Create'); + // NOTICE: Log.LogError() is not possible here because it isn't loaded + ConsoleWriteLn('ERROR: UTF8 Regex study failed: ' + Error); end; end; end else begin - Log.LogError ('pcre not loaded. utf-8 autodetection will not work.', 'EncoderAuto.Create'); + // NOTICE: Log.LogError() is not possible here because it isn't loaded + ConsoleWriteLn('ERROR: pcre not loaded. utf-8 autodetection will not work.'); end; end; -- cgit v1.2.3 From d5a50482317c90ef83bfa4787b392b34f59d0989 Mon Sep 17 00:00:00 2001 From: tobigun Date: Fri, 23 Apr 2010 21:08:06 +0000 Subject: With FPC ConsoleWriteLn is not initialized. Fallback to AlexanderS original solution but make the warning more explicit. git-svn-id: svn://svn.code.sf.net/p/ultrastardx/svn/trunk@2287 b956fd51-792f-4845-bead-9b4dfca2ff2c --- src/encoding/Auto.inc | 269 +++++++++++++++++++++++++------------------------- 1 file changed, 137 insertions(+), 132 deletions(-) (limited to 'src/encoding') diff --git a/src/encoding/Auto.inc b/src/encoding/Auto.inc index 8c32b5d0..f404c2f6 100644 --- a/src/encoding/Auto.inc +++ b/src/encoding/Auto.inc @@ -1,132 +1,137 @@ -{* UltraStar Deluxe - Karaoke Game - * - * UltraStar Deluxe is the legal property of its developers, whose names - * are too numerous to list here. Please refer to the COPYRIGHT - * file distributed with this source distribution. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; see the file COPYING. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - * $URL$ - * $Id$ - *} - -// Auto -// try to match the w3c regex and decode as unicode on match and as fallback if not match -// (copied from http://www.w3.org/International/questions/qa-forms-utf-8.en.php) -// -// m/\A( -// [\x09\x0A\x0D\x20-\x7E] # ASCII -// | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte -// | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs -// | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte -// | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates -// | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 -// | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 -// | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 -// )*\z/x - -type - TEncoderAuto = class(TEncoder) - public - function GetName(): AnsiString; override; - function Encode(const InStr: UCS4String; out OutStr: AnsiString): boolean; override; - function Decode(const InStr: AnsiString; out OutStr: UCS4String): boolean; override; - - constructor Create(const UTF8Encoder, FallbackEncoder: IEncoder); - - private - FallbackEncoder: IEncoder; - UTF8Encoder: IEncoder; - Regex: PPCRE; - RegexExtra: PPCREExtra; - end; - -function PCREGetMem(Size: SizeInt): Pointer; cdecl; -begin - GetMem(Result, Size); -end; - -procedure PCREFreeMem(P: Pointer); cdecl; -begin - FreeMem(P); -end; - -constructor TEncoderAuto.Create(const UTF8Encoder, FallbackEncoder: IEncoder); -var - Error: PChar; - ErrorOffset: Integer; -begin - inherited Create(); - self.FallbackEncoder := FallbackEncoder; - self.UTF8Encoder := UTF8Encoder; - - // Load and initialize PCRE Library - if LoadPCRE() then - begin - // compile regex - self.Regex := pcre_compile('\A([\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*\z', 0, @Error, @ErrorOffset, nil); - - if self.Regex = Nil then - begin - // NOTICE: Log.LogError() is not possible here because it isn't loaded - ConsoleWriteLn(Format('ERROR: UTF8 Regex compilation failed: %s at %d', [Error, ErrorOffset])); - end - else - begin - // if compiled successfull, try to get more informations the speed up the matching - self.RegexExtra := pcre_study(self.Regex, 0, @Error); - - if Error <> Nil then - begin - // NOTICE: Log.LogError() is not possible here because it isn't loaded - ConsoleWriteLn('ERROR: UTF8 Regex study failed: ' + Error); - end; - end; - end - else - begin - // NOTICE: Log.LogError() is not possible here because it isn't loaded - ConsoleWriteLn('ERROR: pcre not loaded. utf-8 autodetection will not work.'); - end; -end; - -function TEncoderAuto.GetName(): AnsiString; -begin - Result := 'Auto'; -end; - -function TEncoderAuto.Decode(const InStr: AnsiString; out OutStr: UCS4String): boolean; -var - RegexResults: Integer; -begin - if (self.Regex <> Nil) then - begin - RegexResults := pcre_exec(Regex, RegexExtra, PChar(InStr), Length(InStr), 0, 0, Nil, 0); - - if RegexResults >= 0 then - begin - Result := UTF8Encoder.Decode(InStr, OutStr); - Exit; - end; - end; - - Result := FallbackEncoder.Decode(InStr, OutStr); -end; - -function TEncoderAuto.Encode(const InStr: UCS4String; out OutStr: AnsiString): boolean; -begin - Result := UTF8Encoder.Encode(InStr, OutStr); -end; +{* UltraStar Deluxe - Karaoke Game + * + * UltraStar Deluxe is the legal property of its developers, whose names + * are too numerous to list here. Please refer to the COPYRIGHT + * file distributed with this source distribution. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, + * Boston, MA 02110-1301, USA. + * + * $URL$ + * $Id$ + *} + +// Auto +// try to match the w3c regex and decode as unicode on match and as fallback if not match +// (copied from http://www.w3.org/International/questions/qa-forms-utf-8.en.php) +// +// m/\A( +// [\x09\x0A\x0D\x20-\x7E] # ASCII +// | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte +// | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs +// | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte +// | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates +// | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 +// | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 +// | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 +// )*\z/x + +type + TEncoderAuto = class(TEncoder) + public + function GetName(): AnsiString; override; + function Encode(const InStr: UCS4String; out OutStr: AnsiString): boolean; override; + function Decode(const InStr: AnsiString; out OutStr: UCS4String): boolean; override; + + constructor Create(const UTF8Encoder, FallbackEncoder: IEncoder); + + private + FallbackEncoder: IEncoder; + UTF8Encoder: IEncoder; + Regex: PPCRE; + RegexExtra: PPCREExtra; + end; + +function PCREGetMem(Size: SizeInt): Pointer; cdecl; +begin + GetMem(Result, Size); +end; + +procedure PCREFreeMem(P: Pointer); cdecl; +begin + FreeMem(P); +end; + +// NOTICE: Log.LogError/ConsoleWriteLn/DebugWriteLn are initialized yet +procedure ShowError(const msg: string); +begin + {$IFDEF CONSOLE} + WriteLn('ERROR: ', msg); + {$ENDIF} +end; + +constructor TEncoderAuto.Create(const UTF8Encoder, FallbackEncoder: IEncoder); +var + Error: PChar; + ErrorOffset: Integer; +begin + inherited Create(); + self.FallbackEncoder := FallbackEncoder; + self.UTF8Encoder := UTF8Encoder; + + // Load and initialize PCRE Library + if LoadPCRE() then + begin + // compile regex + self.Regex := pcre_compile('\A([\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*\z', 0, @Error, @ErrorOffset, nil); + + if self.Regex = Nil then + begin + ShowError(Format('UTF8 Regex compilation failed: %s at %d', [Error, ErrorOffset])); + end + else + begin + // if compiled successfull, try to get more informations the speed up the matching + self.RegexExtra := pcre_study(self.Regex, 0, @Error); + + if Error <> Nil then + begin + ShowError('UTF8 Regex study failed: ' + Error); + end; + end; + end + else + begin + ShowError('pcre not loaded. utf-8 autodetection will not work.'); + end; +end; + +function TEncoderAuto.GetName(): AnsiString; +begin + Result := 'Auto'; +end; + +function TEncoderAuto.Decode(const InStr: AnsiString; out OutStr: UCS4String): boolean; +var + RegexResults: Integer; +begin + if (self.Regex <> Nil) then + begin + RegexResults := pcre_exec(Regex, RegexExtra, PChar(InStr), Length(InStr), 0, 0, Nil, 0); + + if RegexResults >= 0 then + begin + Result := UTF8Encoder.Decode(InStr, OutStr); + Exit; + end; + end; + + Result := FallbackEncoder.Decode(InStr, OutStr); +end; + +function TEncoderAuto.Encode(const InStr: UCS4String; out OutStr: AnsiString): boolean; +begin + Result := UTF8Encoder.Encode(InStr, OutStr); +end; -- cgit v1.2.3