1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
{* UltraStar Deluxe - Karaoke Game
*
* UltraStar Deluxe is the legal property of its developers, whose names
* are too numerous to list here. Please refer to the COPYRIGHT
* file distributed with this source distribution.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*
* $URL$
* $Id$
*}
unit UUnicodeUtils;
interface
{$IFDEF FPC}
{$MODE Delphi}
{$ENDIF}
{$I switches.inc}
uses
SysUtils,
Windows;
(*
* Character classes
*)
function IsAlphaChar(ch: WideChar): boolean;
function IsNumericChar(ch: WideChar): boolean;
function IsAlphaNumericChar(ch: WideChar): boolean;
function IsPunctuationChar(ch: WideChar): boolean;
function IsControlChar(ch: WideChar): boolean;
function UTF8ToUCS4String(const str: UTF8String): UCS4String;
function UCS4ToUTF8String(const str: UCS4String): UTF8String;
implementation
function IsAlphaChar(ch: WideChar): boolean;
begin
{$IFDEF MSWINDOWS}
Result := IsCharAlphaW(ch);
{$ELSE}
// TODO: add chars > 255
case ch of
'A'..'Z', // A-Z
'a'..'z', // a-z
#170,#181,#186,
#192..#214,
#216..#246,
#248..#255:
Result := true;
else
Result := false;
end;
{$ENDIF}
end;
function IsNumericChar(ch: WideChar): boolean;
begin
// ignore non-arabic numerals as we do not want to handle them
case ch of
'0'..'9':
Result := true;
else
Result := false;
end;
end;
function IsAlphaNumericChar(ch: WideChar): boolean;
begin
Result := (IsAlphaChar(ch) or IsNumericChar(ch));
end;
function IsPunctuationChar(ch: WideChar): boolean;
begin
// TODO: add chars > 255?
case ch of
' '..'/',':'..'@','['..'`','{'..'~',
#160..#191,#215,#247:
Result := true;
else
Result := false;
end;
end;
function IsControlChar(ch: WideChar): boolean;
begin
case ch of
#0..#31,
#127..#159:
Result := true;
else
Result := false;
end;
end;
function UTF8ToUCS4String(const str: UTF8String): UCS4String;
begin
Result := WideStringToUCS4String(UTF8Decode(str));
end;
function UCS4ToUTF8String(const str: UCS4String): UTF8String;
begin
Result := UTF8Encode(UCS4StringToWideString(str));
end;
end.
|