From 3260749d369d3466c345d40a8b2189c32c8c1b60 Mon Sep 17 00:00:00 2001 From: Alexander Sulfrian Date: Mon, 7 Nov 2011 15:26:44 +0100 Subject: removed pascal code --- src/lib/JEDI-SDL/SDL/Pas/libxmlparser.pas | 2688 ----------------------------- 1 file changed, 2688 deletions(-) delete mode 100644 src/lib/JEDI-SDL/SDL/Pas/libxmlparser.pas (limited to 'src/lib/JEDI-SDL/SDL/Pas/libxmlparser.pas') diff --git a/src/lib/JEDI-SDL/SDL/Pas/libxmlparser.pas b/src/lib/JEDI-SDL/SDL/Pas/libxmlparser.pas deleted file mode 100644 index 63e7b7fb..00000000 --- a/src/lib/JEDI-SDL/SDL/Pas/libxmlparser.pas +++ /dev/null @@ -1,2688 +0,0 @@ -(** -=============================================================================================== -Name : LibXmlParser -=============================================================================================== -Project : All Projects -=============================================================================================== -Subject : Progressive XML Parser for all types of XML Files -=============================================================================================== -Author : Stefan Heymann - Eschenweg 3 - 72076 Tübingen - GERMANY - -E-Mail: stefan@destructor.de -URL: www.destructor.de -=============================================================================================== -Source, Legals ("Licence") --------------------------- -The official site to get this parser is http://www.destructor.de/ - -Usage and Distribution of this Source Code is ruled by the -"Destructor.de Source code Licence" (DSL) which comes with this file or -can be downloaded at http://www.destructor.de/ - -IN SHORT: Usage and distribution of this source code is free. - You use it completely on your own risk. - -Postcardware ------------- -If you like this code, please send a postcard of your city to my above address. -=============================================================================================== -!!! All parts of this code which are not finished or not conforming exactly to - the XmlSpec are marked with three exclamation marks - --!- Parts where the parser may be able to detect errors in the document's syntax are - marked with the dash-exlamation mark-dash sequence. -=============================================================================================== -Terminology: ------------- -- Start: Start of a buffer part -- Final: End (last character) of a buffer part -- DTD: Document Type Definition -- DTDc: Document Type Declaration -- XMLSpec: The current W3C XML Recommendation (version 1.0 as of 1998-02-10), Chapter No. -- Cur*: Fields concerning the "Current" part passed back by the "Scan" method -=============================================================================================== -Scanning the XML document -------------------------- -- Create TXmlParser Instance MyXml := TXmlParser.Create; -- Load XML Document MyXml.LoadFromFile (Filename); -- Start Scanning MyXml.StartScan; -- Scan Loop WHILE MyXml.Scan DO -- Test for Part Type CASE MyXml.CurPartType OF -- Handle Parts ... : ;;; -- Handle Parts ... : ;;; -- Handle Parts ... : ;;; - END; -- Destroy MyXml.Free; -=============================================================================================== -Loading the XML document ------------------------- -You can load the XML document from a file with the "LoadFromFile" method. -It is beyond the scope of this parser to perform HTTP or FTP accesses. If you want your -application to handle such requests (URLs), you can load the XML via HTTP or FTP or whatever -protocol and hand over the data buffer using the "LoadFromBuffer" or "SetBuffer" method. -"LoadFromBuffer" loads the internal buffer of TXmlParser with the given null-terminated -string, thereby creating a copy of that buffer. -"SetBuffer" just takes the pointer to another buffer, which means that the given -buffer pointer must be valid while the document is accessed via TXmlParser. -=============================================================================================== -Encodings: ----------- -This XML parser kind of "understands" the following encodings: -- UTF-8 -- ISO-8859-1 -- Windows-1252 - -Any flavor of multi-byte characters (and this includes UTF-16) is not supported. Sorry. - -Every string which has to be passed to the application passes the virtual method -"TranslateEncoding" which translates the string from the current encoding (stored in -"CurEncoding") into the encoding the application wishes to receive. -The "TranslateEncoding" method that is built into TXmlParser assumes that the application -wants to receive Windows ANSI (Windows-1252, about the same as ISO-8859-1) and is able -to convert UTF-8 and ISO-8859-1 encodings. -For other source and target encodings, you will have to override "TranslateEncoding". -=============================================================================================== -Buffer Handling ---------------- -- The document must be loaded completely into a piece of RAM -- All character positions are referenced by PChar pointers -- The TXmlParser instance can either "own" the buffer itself (then, FBufferSize is > 0) - or reference the buffer of another instance or object (then, FBuffersize is 0 and - FBuffer is not NIL) -- The Property DocBuffer passes back a pointer to the first byte of the document. If there - is no document stored (FBuffer is NIL), the DocBuffer returns a pointer to a NULL character. -=============================================================================================== -Whitespace Handling -------------------- -The TXmlParser property "PackSpaces" determines how Whitespace is returned in Text Content: -While PackSpaces is true, all leading and trailing whitespace characters are trimmed of, all -Whitespace is converted to Space #x20 characters and contiguous Whitespace characters are -compressed to one. -If the "Scan" method reports a ptContent part, the application can get the original text -with all whitespace characters by extracting the characters from "CurStart" to "CurFinal". -If the application detects an xml:space attribute, it can set "PackSpaces" accordingly or -use CurStart/CurFinal. -Please note that TXmlParser does _not_ normalize Line Breaks to single LineFeed characters -as the XmlSpec requires (XmlSpec 2.11). -The xml:space attribute is not handled by TXmlParser. This is on behalf of the application. -=============================================================================================== -Non-XML-Conforming ------------------- -TXmlParser does not conform 100 % exactly to the XmlSpec: -- UTF-16 is not supported (XmlSpec 2.2) - (Workaround: Convert UTF-16 to UTF-8 and hand the buffer over to TXmlParser) -- As the parser only works with single byte strings, all Unicode characters > 255 - can currently not be handled correctly. -- Line breaks are not normalized to single Linefeed #x0A characters (XmlSpec 2.11) - (Workaround: The Application can access the text contents on its own [CurStart, CurFinal], - thereby applying every normalization it wishes to) -- The attribute value normalization does not work exactly as defined in the - Second Edition of the XML 1.0 specification. -- See also the code parts marked with three consecutive exclamation marks. These are - parts which are not finished in the current code release. - -This list may be incomplete, so it may grow if I get to know any other points. -As work on the parser proceeds, this list may also shrink. -=============================================================================================== -Things Todo ------------ -- Introduce a new event/callback which is called when there is an unresolvable - entity or character reference -- Support Unicode -- Use Streams instead of reading the whole XML into memory -=============================================================================================== -Change History, Version numbers -------------------------------- -The Date is given in ISO Year-Month-Day (YYYY-MM-DD) order. -Versions are counted from 1.0.0 beginning with the version from 2000-03-16. -Unreleased versions don't get a version number. - -Date Author Version Changes ------------------------------------------------------------------------------------------------ -2000-03-16 HeySt 1.0.0 Start -2000-03-28 HeySt 1.0.1 Initial Publishing of TXmlParser on the destructor.de Web Site -2000-03-30 HeySt 1.0.2 TXmlParser.AnalyzeCData: Call "TranslateEncoding" for CurContent -2000-03-31 HeySt 1.0.3 Deleted the StrPosE function (was not needed anyway) -2000-04-04 HeySt 1.0.4 TDtdElementRec modified: Start/Final for all Elements; - Should be backwards compatible. - AnalyzeDtdc: Set CurPartType to ptDtdc -2000-04-23 HeySt 1.0.5 New class TObjectList. Eliminated reference to the Delphi 5 - "Contnrs" unit so LibXmlParser is Delphi 4 compatible. -2000-07-03 HeySt 1.0.6 TNvpNode: Added Constructor -2000-07-11 HeySt 1.0.7 Removed "Windows" from USES clause - Added three-exclamation-mark comments for Utf8ToAnsi/AnsiToUtf8 - Added three-exclamation-mark comments for CHR function calls -2000-07-23 HeySt 1.0.8 TXmlParser.Clear: CurAttr.Clear; EntityStack.Clear; - (This was not a bug; just defensive programming) -2000-07-29 HeySt 1.0.9 TNvpList: Added methods: Node(Index), Value(Index), Name(Index); -2000-10-07 HeySt Introduced Conditional Defines - Uses Contnrs unit and its TObjectList class again for - Delphi 5 and newer versions -2001-01-30 HeySt Introduced Version Numbering - Made LoadFromFile and LoadFromBuffer BOOLEAN functions - Introduced FileMode parameter for LoadFromFile - BugFix: TAttrList.Analyze: Must add CWhitespace to ExtractName call - Comments worked over -2001-02-28 HeySt 1.0.10 Completely worked over and tested the UTF-8 functions - Fixed a bug in TXmlParser.Scan which caused it to start over when it - was called after the end of scanning, resulting in an endless loop - TEntityStack is now a TObjectList instead of TList -2001-07-03 HeySt 1.0.11 Updated Compiler Version IFDEFs for Kylix -2001-07-11 HeySt 1.0.12 New TCustomXmlScanner component (taken over from LibXmlComps.pas) -2001-07-14 HeySt 1.0.13 Bugfix TCustomXmlScanner.FOnTranslateEncoding -2001-10-22 HeySt Don't clear CurName anymore when the parser finds a CDATA section. -2001-12-03 HeySt 1.0.14 TObjectList.Clear: Make call to INHERITED method (fixes a memory leak) -2001-12-05 HeySt 1.0.15 TObjectList.Clear: removed call to INHERITED method - TObjectList.Destroy: Inserted SetCapacity call. - Reduces need for frequent re-allocation of pointer buffer - Dedicated to my father, Theodor Heymann -2002-06-26 HeySt 1.0.16 TXmlParser.Scan: Fixed a bug with PIs whose name is beginning - with 'xml'. Thanks to Uwe Kamm for submitting this bug. - The CurEncoding property is now always in uppercase letters (the XML - spec wants it to be treated case independently so when it's uppercase - comparisons are faster) -2002-03-04 HeySt 1.0.17 Included an IFDEF for Delphi 7 (VER150) and Kylix - There is a new symbol HAS_CONTNRS_UNIT which is used now to - distinguish between IDEs which come with the Contnrs unit and - those that don't. -*) - -UNIT libxmlparser; - -{$I jedi-sdl.inc} - -INTERFACE - -USES - SysUtils, Classes, - (*$IFDEF HAS_CONTNRS_UNIT *) // The Contnrs Unit was introduced in Delphi 5 - Contnrs, - (*$ENDIF*) - Math; - -CONST - CVersion = '1.0.17'; // This variable will be updated for every release - // (I hope, I won't forget to do it everytime ...) - -TYPE - TPartType = // --- Document Part Types - (ptNone, // Nothing - ptXmlProlog, // XML Prolog XmlSpec 2.8 / 4.3.1 - ptComment, // Comment XmlSpec 2.5 - ptPI, // Processing Instruction XmlSpec 2.6 - ptDtdc, // Document Type Declaration XmlSpec 2.8 - ptStartTag, // Start Tag XmlSpec 3.1 - ptEmptyTag, // Empty-Element Tag XmlSpec 3.1 - ptEndTag, // End Tag XmlSpec 3.1 - ptContent, // Text Content between Tags - ptCData); // CDATA Section XmlSpec 2.7 - - TDtdElemType = // --- DTD Elements - (deElement, // !ELEMENT declaration - deAttList, // !ATTLIST declaration - deEntity, // !ENTITY declaration - deNotation, // !NOTATION declaration - dePI, // PI in DTD - deComment, // Comment in DTD - deError); // Error found in the DTD - -TYPE - TAttrList = CLASS; - TEntityStack = CLASS; - TNvpList = CLASS; - TElemDef = CLASS; - TElemList = CLASS; - TEntityDef = CLASS; - TNotationDef = CLASS; - - TDtdElementRec = RECORD // --- This Record is returned by the DTD parser callback function - Start, Final : PChar; // Start/End of the Element's Declaration - CASE ElementType : TDtdElemType OF // Type of the Element - deElement, // - deAttList : (ElemDef : TElemDef); // - deEntity : (EntityDef : TEntityDef); // - deNotation : (NotationDef : TNotationDef); // - dePI : (Target : PChar; // - Content : PChar; - AttrList : TAttrList); - deError : (Pos : PChar); // Error - // deComment : ((No additional fields here)); // - END; - - TXmlParser = CLASS // --- Internal Properties and Methods - PROTECTED - FBuffer : PChar; // NIL if there is no buffer available - FBufferSize : INTEGER; // 0 if the buffer is not owned by the Document instance - FSource : STRING; // Name of Source of document. Filename for Documents loaded with LoadFromFile - - FXmlVersion : STRING; // XML version from Document header. Default is '1.0' - FEncoding : STRING; // Encoding from Document header. Default is 'UTF-8' - FStandalone : BOOLEAN; // Standalone declaration from Document header. Default is 'yes' - FRootName : STRING; // Name of the Root Element (= DTD name) - FDtdcFinal : PChar; // Pointer to the '>' character terminating the DTD declaration - - FNormalize : BOOLEAN; // If true: Pack Whitespace and don't return empty contents - EntityStack : TEntityStack; // Entity Stack for Parameter and General Entities - FCurEncoding : STRING; // Current Encoding during parsing (always uppercase) - - PROCEDURE AnalyzeProlog; // Analyze XML Prolog or Text Declaration - PROCEDURE AnalyzeComment (Start : PChar; VAR Final : PChar); // Analyze Comments - PROCEDURE AnalyzePI (Start : PChar; VAR Final : PChar); // Analyze Processing Instructions (PI) - PROCEDURE AnalyzeDtdc; // Analyze Document Type Declaration - PROCEDURE AnalyzeDtdElements (Start : PChar; VAR Final : PChar); // Analyze DTD declarations - PROCEDURE AnalyzeTag; // Analyze Start/End/Empty-Element Tags - PROCEDURE AnalyzeCData; // Analyze CDATA Sections - PROCEDURE AnalyzeText (VAR IsDone : BOOLEAN); // Analyze Text Content between Tags - PROCEDURE AnalyzeElementDecl (Start : PChar; VAR Final : PChar); - PROCEDURE AnalyzeAttListDecl (Start : PChar; VAR Final : PChar); - PROCEDURE AnalyzeEntityDecl (Start : PChar; VAR Final : PChar); - PROCEDURE AnalyzeNotationDecl (Start : PChar; VAR Final : PChar); - - PROCEDURE PushPE (VAR Start : PChar); - PROCEDURE ReplaceCharacterEntities (VAR Str : STRING); - PROCEDURE ReplaceParameterEntities (VAR Str : STRING); - PROCEDURE ReplaceGeneralEntities (VAR Str : STRING); - - FUNCTION GetDocBuffer : PChar; // Returns FBuffer or a pointer to a NUL char if Buffer is empty - - PUBLIC // --- Document Properties - PROPERTY XmlVersion : STRING READ FXmlVersion; // XML version from the Document Prolog - PROPERTY Encoding : STRING READ FEncoding; // Document Encoding from Prolog - PROPERTY Standalone : BOOLEAN READ FStandalone; // Standalone Declaration from Prolog - PROPERTY RootName : STRING READ FRootName; // Name of the Root Element - PROPERTY Normalize : BOOLEAN READ FNormalize WRITE FNormalize; // True if Content is to be normalized - PROPERTY Source : STRING READ FSource; // Name of Document Source (Filename) - PROPERTY DocBuffer : PChar READ GetDocBuffer; // Returns document buffer - PUBLIC // --- DTD Objects - Elements : TElemList; // Elements: List of TElemDef (contains Attribute Definitions) - Entities : TNvpList; // General Entities: List of TEntityDef - ParEntities : TNvpList; // Parameter Entities: List of TEntityDef - Notations : TNvpList; // Notations: List of TNotationDef - PUBLIC - CONSTRUCTOR Create; - DESTRUCTOR Destroy; OVERRIDE; - - // --- Document Handling - FUNCTION LoadFromFile (Filename : STRING; - FileMode : INTEGER = fmOpenRead OR fmShareDenyNone) : BOOLEAN; - // Loads Document from given file - FUNCTION LoadFromBuffer (Buffer : PChar) : BOOLEAN; // Loads Document from another buffer - PROCEDURE SetBuffer (Buffer : PChar); // References another buffer - PROCEDURE Clear; // Clear Document - - PUBLIC - // --- Scanning through the document - CurPartType : TPartType; // Current Type - CurName : STRING; // Current Name - CurContent : STRING; // Current Normalized Content - CurStart : PChar; // Current First character - CurFinal : PChar; // Current Last character - CurAttr : TAttrList; // Current Attribute List - PROPERTY CurEncoding : STRING READ FCurEncoding; // Current Encoding - PROCEDURE StartScan; - FUNCTION Scan : BOOLEAN; - - // --- Events / Callbacks - FUNCTION LoadExternalEntity (SystemId, PublicId, - Notation : STRING) : TXmlParser; VIRTUAL; - FUNCTION TranslateEncoding (CONST Source : STRING) : STRING; VIRTUAL; - PROCEDURE DtdElementFound (DtdElementRec : TDtdElementRec); VIRTUAL; - END; - - TValueType = // --- Attribute Value Type - (vtNormal, // Normal specified Attribute - vtImplied, // #IMPLIED attribute value - vtFixed, // #FIXED attribute value - vtDefault); // Attribute value from default value in !ATTLIST declaration - - TAttrDefault = // --- Attribute Default Type - (adDefault, // Normal default value - adRequired, // #REQUIRED attribute - adImplied, // #IMPLIED attribute - adFixed); // #FIXED attribute - - TAttrType = // --- Type of attribute - (atUnknown, // Unknown type - atCData, // Character data only - atID, // ID - atIdRef, // ID Reference - atIdRefs, // Several ID References, separated by Whitespace - atEntity, // Name of an unparsed Entity - atEntities, // Several unparsed Entity names, separated by Whitespace - atNmToken, // Name Token - atNmTokens, // Several Name Tokens, separated by Whitespace - atNotation, // A selection of Notation names (Unparsed Entity) - atEnumeration); // Enumeration - - TElemType = // --- Element content type - (etEmpty, // Element is always empty - etAny, // Element can have any mixture of PCDATA and any elements - etChildren, // Element must contain only elements - etMixed); // Mixed PCDATA and elements - - (*$IFDEF HAS_CONTNRS_UNIT *) - TObjectList = Contnrs.TObjectList; // Re-Export this identifier - (*$ELSE *) - TObjectList = CLASS (TList) - DESTRUCTOR Destroy; OVERRIDE; - PROCEDURE Delete (Index : INTEGER); - PROCEDURE Clear; OVERRIDE; - END; - (*$ENDIF *) - - TNvpNode = CLASS // Name-Value Pair Node - Name : STRING; - Value : STRING; - CONSTRUCTOR Create (TheName : STRING = ''; TheValue : STRING = ''); - END; - - TNvpList = CLASS (TObjectList) // Name-Value Pair List - PROCEDURE Add (Node : TNvpNode); - FUNCTION Node (Name : STRING) : TNvpNode; OVERLOAD; - FUNCTION Node (Index : INTEGER) : TNvpNode; OVERLOAD; - FUNCTION Value (Name : STRING) : STRING; OVERLOAD; - FUNCTION Value (Index : INTEGER) : STRING; OVERLOAD; - FUNCTION Name (Index : INTEGER) : STRING; - END; - - TAttr = CLASS (TNvpNode) // Attribute of a Start-Tag or Empty-Element-Tag - ValueType : TValueType; - AttrType : TAttrType; - END; - - TAttrList = CLASS (TNvpList) // List of Attributes - PROCEDURE Analyze (Start : PChar; VAR Final : PChar); - END; - - TEntityStack = CLASS (TObjectList) // Stack where current position is stored before parsing entities - PROTECTED - Owner : TXmlParser; - PUBLIC - CONSTRUCTOR Create (TheOwner : TXmlParser); - PROCEDURE Push (LastPos : PChar); OVERLOAD; - PROCEDURE Push (Instance : TObject; LastPos : PChar); OVERLOAD; - FUNCTION Pop : PChar; // Returns next char or NIL if EOF is reached. Frees Instance. - END; - - TAttrDef = CLASS (TNvpNode) // Represents a '; - - // --- Name Constants for the above enumeration types - CPartType_Name : ARRAY [TPartType] OF STRING = - ('', 'XML Prolog', 'Comment', 'PI', - 'DTD Declaration', 'Start Tag', 'Empty Tag', 'End Tag', - 'Text', 'CDATA'); - CValueType_Name : ARRAY [TValueType] OF STRING = ('Normal', 'Implied', 'Fixed', 'Default'); - CAttrDefault_Name : ARRAY [TAttrDefault] OF STRING = ('Default', 'Required', 'Implied', 'Fixed'); - CElemType_Name : ARRAY [TElemType] OF STRING = ('Empty', 'Any', 'Childs only', 'Mixed'); - CAttrType_Name : ARRAY [TAttrType] OF STRING = ('Unknown', 'CDATA', - 'ID', 'IDREF', 'IDREFS', - 'ENTITY', 'ENTITIES', - 'NMTOKEN', 'NMTOKENS', - 'Notation', 'Enumeration'); - -FUNCTION ConvertWs (Source: STRING; PackWs: BOOLEAN) : STRING; // Convert WS to spaces #x20 -PROCEDURE SetStringSF (VAR S : STRING; BufferStart, BufferFinal : PChar); // SetString by Start/Final of buffer -FUNCTION StrSFPas (Start, Finish : PChar) : STRING; // Convert buffer part to Pascal string -FUNCTION TrimWs (Source : STRING) : STRING; // Trim Whitespace - -FUNCTION AnsiToUtf8 (Source : ANSISTRING) : STRING; // Convert Win-1252 to UTF-8 -FUNCTION Utf8ToAnsi (Source : STRING; UnknownChar : CHAR = '¿') : ANSISTRING; // Convert UTF-8 to Win-1252 - - -(* -=============================================================================================== -TCustomXmlScanner event based component wrapper for TXmlParser -=============================================================================================== -*) - -TYPE - TCustomXmlScanner = CLASS; - TXmlPrologEvent = PROCEDURE (Sender : TObject; XmlVersion, Encoding: STRING; Standalone : BOOLEAN) OF OBJECT; - TCommentEvent = PROCEDURE (Sender : TObject; Comment : STRING) OF OBJECT; - TPIEvent = PROCEDURE (Sender : TObject; Target, Content: STRING; Attributes : TAttrList) OF OBJECT; - TDtdEvent = PROCEDURE (Sender : TObject; RootElementName : STRING) OF OBJECT; - TStartTagEvent = PROCEDURE (Sender : TObject; TagName : STRING; Attributes : TAttrList) OF OBJECT; - TEndTagEvent = PROCEDURE (Sender : TObject; TagName : STRING) OF OBJECT; - TContentEvent = PROCEDURE (Sender : TObject; Content : STRING) OF OBJECT; - TElementEvent = PROCEDURE (Sender : TObject; ElemDef : TElemDef) OF OBJECT; - TEntityEvent = PROCEDURE (Sender : TObject; EntityDef : TEntityDef) OF OBJECT; - TNotationEvent = PROCEDURE (Sender : TObject; NotationDef : TNotationDef) OF OBJECT; - TErrorEvent = PROCEDURE (Sender : TObject; ErrorPos : PChar) OF OBJECT; - TExternalEvent = PROCEDURE (Sender : TObject; SystemId, PublicId, NotationId : STRING; - VAR Result : TXmlParser) OF OBJECT; - TEncodingEvent = FUNCTION (Sender : TObject; CurrentEncoding, Source : STRING) : STRING OF OBJECT; - - - TCustomXmlScanner = CLASS (TComponent) - PROTECTED - FXmlParser : TXmlParser; - FOnXmlProlog : TXmlPrologEvent; - FOnComment : TCommentEvent; - FOnPI : TPIEvent; - FOnDtdRead : TDtdEvent; - FOnStartTag : TStartTagEvent; - FOnEmptyTag : TStartTagEvent; - FOnEndTag : TEndTagEvent; - FOnContent : TContentEvent; - FOnCData : TContentEvent; - FOnElement : TElementEvent; - FOnAttList : TElementEvent; - FOnEntity : TEntityEvent; - FOnNotation : TNotationEvent; - FOnDtdError : TErrorEvent; - FOnLoadExternal : TExternalEvent; - FOnTranslateEncoding : TEncodingEvent; - FStopParser : BOOLEAN; - FUNCTION GetNormalize : BOOLEAN; - PROCEDURE SetNormalize (Value : BOOLEAN); - - PROCEDURE WhenXmlProlog(XmlVersion, Encoding: STRING; Standalone : BOOLEAN); VIRTUAL; - PROCEDURE WhenComment (Comment : STRING); VIRTUAL; - PROCEDURE WhenPI (Target, Content: STRING; Attributes : TAttrList); VIRTUAL; - PROCEDURE WhenDtdRead (RootElementName : STRING); VIRTUAL; - PROCEDURE WhenStartTag (TagName : STRING; Attributes : TAttrList); VIRTUAL; - PROCEDURE WhenEmptyTag (TagName : STRING; Attributes : TAttrList); VIRTUAL; - PROCEDURE WhenEndTag (TagName : STRING); VIRTUAL; - PROCEDURE WhenContent (Content : STRING); VIRTUAL; - PROCEDURE WhenCData (Content : STRING); VIRTUAL; - PROCEDURE WhenElement (ElemDef : TElemDef); VIRTUAL; - PROCEDURE WhenAttList (ElemDef : TElemDef); VIRTUAL; - PROCEDURE WhenEntity (EntityDef : TEntityDef); VIRTUAL; - PROCEDURE WhenNotation (NotationDef : TNotationDef); VIRTUAL; - PROCEDURE WhenDtdError (ErrorPos : PChar); VIRTUAL; - - PUBLIC - CONSTRUCTOR Create (AOwner: TComponent); OVERRIDE; - DESTRUCTOR Destroy; OVERRIDE; - - PROCEDURE LoadFromFile (Filename : TFilename); // Load XML Document from file - PROCEDURE LoadFromBuffer (Buffer : PChar); // Load XML Document from buffer - PROCEDURE SetBuffer (Buffer : PChar); // Refer to Buffer - FUNCTION GetFilename : TFilename; - - PROCEDURE Execute; // Perform scanning - - PROTECTED - PROPERTY XmlParser : TXmlParser READ FXmlParser; - PROPERTY StopParser : BOOLEAN READ FStopParser WRITE FStopParser; - PROPERTY Filename : TFilename READ GetFilename WRITE LoadFromFile; - PROPERTY Normalize : BOOLEAN READ GetNormalize WRITE SetNormalize; - PROPERTY OnXmlProlog : TXmlPrologEvent READ FOnXmlProlog WRITE FOnXmlProlog; - PROPERTY OnComment : TCommentEvent READ FOnComment WRITE FOnComment; - PROPERTY OnPI : TPIEvent READ FOnPI WRITE FOnPI; - PROPERTY OnDtdRead : TDtdEvent READ FOnDtdRead WRITE FOnDtdRead; - PROPERTY OnStartTag : TStartTagEvent READ FOnStartTag WRITE FOnStartTag; - PROPERTY OnEmptyTag : TStartTagEvent READ FOnEmptyTag WRITE FOnEmptyTag; - PROPERTY OnEndTag : TEndTagEvent READ FOnEndTag WRITE FOnEndTag; - PROPERTY OnContent : TContentEvent READ FOnContent WRITE FOnContent; - PROPERTY OnCData : TContentEvent READ FOnCData WRITE FOnCData; - PROPERTY OnElement : TElementEvent READ FOnElement WRITE FOnElement; - PROPERTY OnAttList : TElementEvent READ FOnAttList WRITE FOnAttList; - PROPERTY OnEntity : TEntityEvent READ FOnEntity WRITE FOnEntity; - PROPERTY OnNotation : TNotationEvent READ FOnNotation WRITE FOnNotation; - PROPERTY OnDtdError : TErrorEvent READ FOnDtdError WRITE FOnDtdError; - PROPERTY OnLoadExternal : TExternalEvent READ FOnLoadExternal WRITE FOnLoadExternal; - PROPERTY OnTranslateEncoding : TEncodingEvent READ FOnTranslateEncoding WRITE FOnTranslateEncoding; - END; - -(* -=============================================================================================== -IMPLEMENTATION -=============================================================================================== -*) - -IMPLEMENTATION - - -(* -=============================================================================================== -Unicode and UTF-8 stuff -=============================================================================================== -*) - -CONST - // --- Character Translation Table for Unicode <-> Win-1252 - WIN1252_UNICODE : ARRAY [$00..$FF] OF WORD = ( - $0000, $0001, $0002, $0003, $0004, $0005, $0006, $0007, $0008, $0009, - $000A, $000B, $000C, $000D, $000E, $000F, $0010, $0011, $0012, $0013, - $0014, $0015, $0016, $0017, $0018, $0019, $001A, $001B, $001C, $001D, - $001E, $001F, $0020, $0021, $0022, $0023, $0024, $0025, $0026, $0027, - $0028, $0029, $002A, $002B, $002C, $002D, $002E, $002F, $0030, $0031, - $0032, $0033, $0034, $0035, $0036, $0037, $0038, $0039, $003A, $003B, - $003C, $003D, $003E, $003F, $0040, $0041, $0042, $0043, $0044, $0045, - $0046, $0047, $0048, $0049, $004A, $004B, $004C, $004D, $004E, $004F, - $0050, $0051, $0052, $0053, $0054, $0055, $0056, $0057, $0058, $0059, - $005A, $005B, $005C, $005D, $005E, $005F, $0060, $0061, $0062, $0063, - $0064, $0065, $0066, $0067, $0068, $0069, $006A, $006B, $006C, $006D, - $006E, $006F, $0070, $0071, $0072, $0073, $0074, $0075, $0076, $0077, - $0078, $0079, $007A, $007B, $007C, $007D, $007E, $007F, - - $20AC, $0081, $201A, $0192, $201E, $2026, $2020, $2021, $02C6, $2030, - $0160, $2039, $0152, $008D, $017D, $008F, $0090, $2018, $2019, $201C, - $201D, $2022, $2013, $2014, $02DC, $2122, $0161, $203A, $0153, $009D, - $017E, $0178, $00A0, $00A1, $00A2, $00A3, $00A4, $00A5, $00A6, $00A7, - $00A8, $00A9, $00AA, $00AB, $00AC, $00AD, $00AE, $00AF, $00B0, $00B1, - $00B2, $00B3, $00B4, $00B5, $00B6, $00B7, $00B8, $00B9, $00BA, $00BB, - $00BC, $00BD, $00BE, $00BF, $00C0, $00C1, $00C2, $00C3, $00C4, $00C5, - $00C6, $00C7, $00C8, $00C9, $00CA, $00CB, $00CC, $00CD, $00CE, $00CF, - $00D0, $00D1, $00D2, $00D3, $00D4, $00D5, $00D6, $00D7, $00D8, $00D9, - $00DA, $00DB, $00DC, $00DD, $00DE, $00DF, $00E0, $00E1, $00E2, $00E3, - $00E4, $00E5, $00E6, $00E7, $00E8, $00E9, $00EA, $00EB, $00EC, $00ED, - $00EE, $00EF, $00F0, $00F1, $00F2, $00F3, $00F4, $00F5, $00F6, $00F7, - $00F8, $00F9, $00FA, $00FB, $00FC, $00FD, $00FE, $00FF); - -(* UTF-8 (somewhat simplified) - ----- - Character Range Byte sequence - --------------- -------------------------- (x=Bits from original character) - $0000..$007F 0xxxxxxx - $0080..$07FF 110xxxxx 10xxxxxx - $8000..$FFFF 1110xxxx 10xxxxxx 10xxxxxx - - Example - -------- - Transforming the Unicode character U+00E4 LATIN SMALL LETTER A WITH DIAERESIS ("ä"): - - ISO-8859-1, Decimal 228 - Win1252, Hex $E4 - ANSI Bin 1110 0100 - abcd efgh - - UTF-8 Binary 1100xxab 10cdefgh - Binary 11000011 10100100 - Hex $C3 $A4 - Decimal 195 164 - ANSI Ã ¤ *) - - -FUNCTION AnsiToUtf8 (Source : ANSISTRING) : STRING; - (* Converts the given Windows ANSI (Win1252) String to UTF-8. *) -VAR - I : INTEGER; // Loop counter - U : WORD; // Current Unicode value - Len : INTEGER; // Current real length of "Result" string -BEGIN - SetLength (Result, Length (Source) * 3); // Worst case - Len := 0; - FOR I := 1 TO Length (Source) DO BEGIN - U := WIN1252_UNICODE [ORD (Source [I])]; - CASE U OF - $0000..$007F : BEGIN - INC (Len); - Result [Len] := CHR (U); - END; - $0080..$07FF : BEGIN - INC (Len); - Result [Len] := CHR ($C0 OR (U SHR 6)); - INC (Len); - Result [Len] := CHR ($80 OR (U AND $3F)); - END; - $0800..$FFFF : BEGIN - INC (Len); - Result [Len] := CHR ($E0 OR (U SHR 12)); - INC (Len); - Result [Len] := CHR ($80 OR ((U SHR 6) AND $3F)); - INC (Len); - Result [Len] := CHR ($80 OR (U AND $3F)); - END; - END; - END; - SetLength (Result, Len); -END; - - -FUNCTION Utf8ToAnsi (Source : STRING; UnknownChar : CHAR = '¿') : ANSISTRING; - (* Converts the given UTF-8 String to Windows ANSI (Win-1252). - If a character can not be converted, the "UnknownChar" is inserted. *) -VAR - SourceLen : INTEGER; // Length of Source string - I, K : INTEGER; - A : BYTE; // Current ANSI character value - U : WORD; - Ch : CHAR; // Dest char - Len : INTEGER; // Current real length of "Result" string -BEGIN - SourceLen := Length (Source); - SetLength (Result, SourceLen); // Enough room to live - Len := 0; - I := 1; - WHILE I <= SourceLen DO BEGIN - A := ORD (Source [I]); - IF A < $80 THEN BEGIN // Range $0000..$007F - INC (Len); - Result [Len] := Source [I]; - INC (I); - END - ELSE BEGIN // Determine U, Inc I - IF (A AND $E0 = $C0) AND (I < SourceLen) THEN BEGIN // Range $0080..$07FF - U := (WORD (A AND $1F) SHL 6) OR (ORD (Source [I+1]) AND $3F); - INC (I, 2); - END - ELSE IF (A AND $F0 = $E0) AND (I < SourceLen-1) THEN BEGIN // Range $0800..$FFFF - U := (WORD (A AND $0F) SHL 12) OR - (WORD (ORD (Source [I+1]) AND $3F) SHL 6) OR - ( ORD (Source [I+2]) AND $3F); - INC (I, 3); - END - ELSE BEGIN // Unknown/unsupported - INC (I); - FOR K := 7 DOWNTO 0 DO - IF A AND (1 SHL K) = 0 THEN BEGIN - INC (I, (A SHR (K+1))-1); - BREAK; - END; - U := WIN1252_UNICODE [ORD (UnknownChar)]; - END; - Ch := UnknownChar; // Retrieve ANSI char - FOR A := $00 TO $FF DO - IF WIN1252_UNICODE [A] = U THEN BEGIN - Ch := CHR (A); - BREAK; - END; - INC (Len); - Result [Len] := Ch; - END; - END; - SetLength (Result, Len); -END; - - -(* -=============================================================================================== -"Special" Helper Functions - -Don't ask me why. But including these functions makes the parser *DRAMATICALLY* faster -on my K6-233 machine. You can test it yourself just by commenting them out. -They do exactly the same as the Assembler routines defined in SysUtils. -(This is where you can see how great the Delphi compiler really is. The compiled code is -faster than hand-coded assembler!) -=============================================================================================== ---> Just move this line below the StrScan function --> *) - - -FUNCTION StrPos (CONST Str, SearchStr : PChar) : PChar; - // Same functionality as SysUtils.StrPos -VAR - First : CHAR; - Len : INTEGER; -BEGIN - First := SearchStr^; - Len := StrLen (SearchStr); - Result := Str; - REPEAT - IF Result^ = First THEN - IF StrLComp (Result, SearchStr, Len) = 0 THEN BREAK; - IF Result^ = #0 THEN BEGIN - Result := NIL; - BREAK; - END; - INC (Result); - UNTIL FALSE; -END; - - -FUNCTION StrScan (CONST Start : PChar; CONST Ch : CHAR) : PChar; - // Same functionality as SysUtils.StrScan -BEGIN - Result := Start; - WHILE Result^ <> Ch DO BEGIN - IF Result^ = #0 THEN BEGIN - Result := NIL; - EXIT; - END; - INC (Result); - END; -END; - - -(* -=============================================================================================== -Helper Functions -=============================================================================================== -*) - -FUNCTION DelChars (Source : STRING; CharsToDelete : TCharset) : STRING; - // Delete all "CharsToDelete" from the string -VAR - I : INTEGER; -BEGIN - Result := Source; - FOR I := Length (Result) DOWNTO 1 DO - IF Result [I] IN CharsToDelete THEN - Delete (Result, I, 1); -END; - - -FUNCTION TrimWs (Source : STRING) : STRING; - // Trimms off Whitespace characters from both ends of the string -VAR - I : INTEGER; -BEGIN - // --- Trim Left - I := 1; - WHILE (I <= Length (Source)) AND (Source [I] IN CWhitespace) DO - INC (I); - Result := Copy (Source, I, MaxInt); - - // --- Trim Right - I := Length (Result); - WHILE (I > 1) AND (Result [I] IN CWhitespace) DO - DEC (I); - Delete (Result, I+1, Length (Result)-I); -END; - - -FUNCTION ConvertWs (Source: STRING; PackWs: BOOLEAN) : STRING; - // Converts all Whitespace characters to the Space #x20 character - // If "PackWs" is true, contiguous Whitespace characters are packed to one -VAR - I : INTEGER; -BEGIN - Result := Source; - FOR I := Length (Result) DOWNTO 1 DO - IF (Result [I] IN CWhitespace) THEN - IF PackWs AND (I > 1) AND (Result [I-1] IN CWhitespace) - THEN Delete (Result, I, 1) - ELSE Result [I] := #32; -END; - - -PROCEDURE SetStringSF (VAR S : STRING; BufferStart, BufferFinal : PChar); -BEGIN - SetString (S, BufferStart, BufferFinal-BufferStart+1); -END; - - -FUNCTION StrLPas (Start : PChar; Len : INTEGER) : STRING; -BEGIN - SetString (Result, Start, Len); -END; - - -FUNCTION StrSFPas (Start, Finish : PChar) : STRING; -BEGIN - SetString (Result, Start, Finish-Start+1); -END; - - -FUNCTION StrScanE (CONST Source : PChar; CONST CharToScanFor : CHAR) : PChar; - // If "CharToScanFor" is not found, StrScanE returns the last char of the - // buffer instead of NIL -BEGIN - Result := StrScan (Source, CharToScanFor); - IF Result = NIL THEN - Result := StrEnd (Source)-1; -END; - - -PROCEDURE ExtractName (Start : PChar; Terminators : TCharset; VAR Final : PChar); - (* Extracts the complete Name beginning at "Start". - It is assumed that the name is contained in Markup, so the '>' character is - always a Termination. - Start: IN Pointer to first char of name. Is always considered to be valid - Terminators: IN Characters which terminate the name - Final: OUT Pointer to last char of name *) -BEGIN - Final := Start+1; - Include (Terminators, #0); - Include (Terminators, '>'); - WHILE NOT (Final^ IN Terminators) DO - INC (Final); - DEC (Final); -END; - - -PROCEDURE ExtractQuote (Start : PChar; VAR Content : STRING; VAR Final : PChar); - (* Extract a string which is contained in single or double Quotes. - Start: IN Pointer to opening quote - Content: OUT The quoted string - Final: OUT Pointer to closing quote *) -BEGIN - Final := StrScan (Start+1, Start^); - IF Final = NIL THEN BEGIN - Final := StrEnd (Start+1)-1; - SetString (Content, Start+1, Final-Start); - END - ELSE - SetString (Content, Start+1, Final-1-Start); -END; - - -(* -=============================================================================================== -TEntityStackNode -This Node is pushed to the "Entity Stack" whenever the parser parses entity replacement text. -The "Instance" field holds the Instance pointer of an External Entity buffer. When it is -popped, the Instance is freed. -The "Encoding" field holds the name of the Encoding. External Parsed Entities may have -another encoding as the document entity (XmlSpec 4.3.3). So when there is an " 0 THEN BEGIN - ESN := TEntityStackNode (Items [Count-1]); - Result := ESN.LastPos; - IF ESN.Instance <> NIL THEN - ESN.Instance.Free; - IF ESN.Encoding <> '' THEN - Owner.FCurEncoding := ESN.Encoding; // Restore current Encoding - Delete (Count-1); - END - ELSE - Result := NIL; -END; - - -(* -=============================================================================================== -TExternalID ------------ -XmlSpec 4.2.2: ExternalID ::= 'SYSTEM' S SystemLiteral | - 'PUBLIC' S PubidLiteral S SystemLiteral -XmlSpec 4.7: PublicID ::= 'PUBLIC' S PubidLiteral -SystemLiteral and PubidLiteral are quoted -=============================================================================================== -*) - -TYPE - TExternalID = CLASS - PublicId : STRING; - SystemId : STRING; - Final : PChar; - CONSTRUCTOR Create (Start : PChar); - END; - -CONSTRUCTOR TExternalID.Create (Start : PChar); -BEGIN - INHERITED Create; - Final := Start; - IF StrLComp (Start, 'SYSTEM', 6) = 0 THEN BEGIN - WHILE NOT (Final^ IN (CQuoteChar + [#0, '>', '['])) DO INC (Final); - IF NOT (Final^ IN CQuoteChar) THEN EXIT; - ExtractQuote (Final, SystemID, Final); - END - ELSE IF StrLComp (Start, 'PUBLIC', 6) = 0 THEN BEGIN - WHILE NOT (Final^ IN (CQuoteChar + [#0, '>', '['])) DO INC (Final); - IF NOT (Final^ IN CQuoteChar) THEN EXIT; - ExtractQuote (Final, PublicID, Final); - INC (Final); - WHILE NOT (Final^ IN (CQuoteChar + [#0, '>', '['])) DO INC (Final); - IF NOT (Final^ IN CQuoteChar) THEN EXIT; - ExtractQuote (Final, SystemID, Final); - END; -END; - - -(* -=============================================================================================== -TXmlParser -=============================================================================================== -*) - -CONSTRUCTOR TXmlParser.Create; -BEGIN - INHERITED Create; - FBuffer := NIL; - FBufferSize := 0; - Elements := TElemList.Create; - Entities := TNvpList.Create; - ParEntities := TNvpList.Create; - Notations := TNvpList.Create; - CurAttr := TAttrList.Create; - EntityStack := TEntityStack.Create (Self); - Clear; -END; - - -DESTRUCTOR TXmlParser.Destroy; -BEGIN - Clear; - Elements.Free; - Entities.Free; - ParEntities.Free; - Notations.Free; - CurAttr.Free; - EntityStack.Free; - INHERITED Destroy; -END; - - -PROCEDURE TXmlParser.Clear; - // Free Buffer and clear all object attributes -BEGIN - IF (FBufferSize > 0) AND (FBuffer <> NIL) THEN - FreeMem (FBuffer); - FBuffer := NIL; - FBufferSize := 0; - FSource := ''; - FXmlVersion := ''; - FEncoding := ''; - FStandalone := FALSE; - FRootName := ''; - FDtdcFinal := NIL; - FNormalize := TRUE; - Elements.Clear; - Entities.Clear; - ParEntities.Clear; - Notations.Clear; - CurAttr.Clear; - EntityStack.Clear; -END; - - -FUNCTION TXmlParser.LoadFromFile (Filename : STRING; FileMode : INTEGER = fmOpenRead OR fmShareDenyNone) : BOOLEAN; - // Loads Document from given file - // Returns TRUE if successful -VAR - f : FILE; - ReadIn : INTEGER; - OldFileMode : INTEGER; -BEGIN - Result := FALSE; - Clear; - - // --- Open File - OldFileMode := SYSTEM.FileMode; - TRY - SYSTEM.FileMode := FileMode; - TRY - AssignFile (f, Filename); - Reset (f, 1); - EXCEPT - EXIT; - END; - - TRY - // --- Allocate Memory - TRY - FBufferSize := Filesize (f) + 1; - GetMem (FBuffer, FBufferSize); - EXCEPT - Clear; - EXIT; - END; - - // --- Read File - TRY - BlockRead (f, FBuffer^, FBufferSize, ReadIn); - (FBuffer+ReadIn)^ := #0; // NULL termination - EXCEPT - Clear; - EXIT; - END; - FINALLY - CloseFile (f); - END; - - FSource := Filename; - Result := TRUE; - - FINALLY - SYSTEM.FileMode := OldFileMode; - END; -END; - - -FUNCTION TXmlParser.LoadFromBuffer (Buffer : PChar) : BOOLEAN; - // Loads Document from another buffer - // Returns TRUE if successful - // The "Source" property becomes '' if successful -BEGIN - Result := FALSE; - Clear; - FBufferSize := StrLen (Buffer) + 1; - TRY - GetMem (FBuffer, FBufferSize); - EXCEPT - Clear; - EXIT; - END; - StrCopy (FBuffer, Buffer); - FSource := ''; - Result := TRUE; -END; - - -PROCEDURE TXmlParser.SetBuffer (Buffer : PChar); // References another buffer -BEGIN - Clear; - FBuffer := Buffer; - FBufferSize := 0; - FSource := ''; -END; - - -//----------------------------------------------------------------------------------------------- -// Scanning through the document -//----------------------------------------------------------------------------------------------- - -PROCEDURE TXmlParser.StartScan; -BEGIN - CurPartType := ptNone; - CurName := ''; - CurContent := ''; - CurStart := NIL; - CurFinal := NIL; - CurAttr.Clear; - EntityStack.Clear; -END; - - -FUNCTION TXmlParser.Scan : BOOLEAN; - // Scans the next Part - // Returns TRUE if a part could be found, FALSE if there is no part any more - // - // "IsDone" can be set to FALSE by AnalyzeText in order to go to the next part - // if there is no Content due to normalization -VAR - IsDone : BOOLEAN; -BEGIN - REPEAT - IsDone := TRUE; - - // --- Start of next Part - IF CurStart = NIL - THEN CurStart := DocBuffer - ELSE CurStart := CurFinal+1; - CurFinal := CurStart; - - // --- End of Document of Pop off a new part from the Entity stack? - IF CurStart^ = #0 THEN - CurStart := EntityStack.Pop; - - // --- No Document or End Of Document: Terminate Scan - IF (CurStart = NIL) OR (CurStart^ = #0) THEN BEGIN - CurStart := StrEnd (DocBuffer); - CurFinal := CurStart-1; - EntityStack.Clear; - Result := FALSE; - EXIT; - END; - - IF (StrLComp (CurStart, ''); - IF CurFinal <> NIL - THEN INC (CurFinal) - ELSE CurFinal := StrEnd (CurStart)-1; - FCurEncoding := AnsiUpperCase (CurAttr.Value ('encoding')); - IF FCurEncoding = '' THEN - FCurEncoding := 'UTF-8'; // Default XML Encoding is UTF-8 - CurPartType := ptXmlProlog; - CurName := ''; - CurContent := ''; -END; - - -PROCEDURE TXmlParser.AnalyzeComment (Start : PChar; VAR Final : PChar); - // Analyze Comments -BEGIN - Final := StrPos (Start+4, '-->'); - IF Final = NIL - THEN Final := StrEnd (Start)-1 - ELSE INC (Final, 2); - CurPartType := ptComment; -END; - - -PROCEDURE TXmlParser.AnalyzePI (Start : PChar; VAR Final : PChar); - // Analyze Processing Instructions (PI) - // This is also called for Character -VAR - F : PChar; -BEGIN - CurPartType := ptPI; - Final := StrPos (Start+2, '?>'); - IF Final = NIL - THEN Final := StrEnd (Start)-1 - ELSE INC (Final); - ExtractName (Start+2, CWhitespace + ['?', '>'], F); - SetStringSF (CurName, Start+2, F); - SetStringSF (CurContent, F+1, Final-2); - CurAttr.Analyze (F+1, F); -END; - - -PROCEDURE TXmlParser.AnalyzeDtdc; - (* Analyze Document Type Declaration - doctypedecl ::= '' - markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment - PEReference ::= '%' Name ';' - - elementdecl ::= '' - AttlistDecl ::= '' - EntityDecl ::= '' | - '' - NotationDecl ::= '' - PI ::= '' Char* )))? '?>' - Comment ::= '' *) -TYPE - TPhase = (phName, phDtd, phInternal, phFinishing); -VAR - Phase : TPhase; - F : PChar; - ExternalID : TExternalID; - ExternalDTD : TXmlParser; - DER : TDtdElementRec; -BEGIN - DER.Start := CurStart; - EntityStack.Clear; // Clear stack for Parameter Entities - CurPartType := ptDtdc; - - // --- Don't read DTDc twice - IF FDtdcFinal <> NIL THEN BEGIN - CurFinal := FDtdcFinal; - EXIT; - END; - - // --- Scan DTDc - CurFinal := CurStart + 9; // First char after '' : BREAK; - ELSE IF NOT (CurFinal^ IN CWhitespace) THEN BEGIN - CASE Phase OF - phName : IF (CurFinal^ IN CNameStart) THEN BEGIN - ExtractName (CurFinal, CWhitespace + ['[', '>'], F); - SetStringSF (FRootName, CurFinal, F); - CurFinal := F; - Phase := phDtd; - END; - phDtd : IF (StrLComp (CurFinal, 'SYSTEM', 6) = 0) OR - (StrLComp (CurFinal, 'PUBLIC', 6) = 0) THEN BEGIN - ExternalID := TExternalID.Create (CurFinal); - ExternalDTD := LoadExternalEntity (ExternalId.SystemId, ExternalID.PublicId, ''); - F := StrPos (ExternalDtd.DocBuffer, ' NIL THEN - AnalyzeDtdElements (F, F); - ExternalDTD.Free; - CurFinal := ExternalID.Final; - ExternalID.Free; - END; - ELSE BEGIN - DER.ElementType := deError; - DER.Pos := CurFinal; - DER.Final := CurFinal; - DtdElementFound (DER); - END; - END; - - END; - END; - INC (CurFinal); - UNTIL FALSE; - - CurPartType := ptDtdc; - CurName := ''; - CurContent := ''; - - // It is an error in the document if "EntityStack" is not empty now - IF EntityStack.Count > 0 THEN BEGIN - DER.ElementType := deError; - DER.Final := CurFinal; - DER.Pos := CurFinal; - DtdElementFound (DER); - END; - - EntityStack.Clear; // Clear stack for General Entities - FDtdcFinal := CurFinal; -END; - - -PROCEDURE TXmlParser.AnalyzeDtdElements (Start : PChar; VAR Final : PChar); - // Analyze the "Elements" of a DTD contained in the external or - // internal DTD subset. -VAR - DER : TDtdElementRec; -BEGIN - Final := Start; - REPEAT - CASE Final^ OF - '%' : BEGIN - PushPE (Final); - CONTINUE; - END; - #0 : IF EntityStack.Count = 0 THEN - BREAK - ELSE BEGIN - CurFinal := EntityStack.Pop; - CONTINUE; - END; - ']', - '>' : BREAK; - '<' : IF StrLComp (Final, ''); - - // --- Set Default Attribute values for nonexistent attributes - IF (CurPartType = ptStartTag) OR (CurPartType = ptEmptyTag) THEN BEGIN - ElemDef := Elements.Node (CurName); - IF ElemDef <> NIL THEN BEGIN - FOR I := 0 TO ElemDef.Count-1 DO BEGIN - AttrDef := TAttrDef (ElemDef [I]); - Attr := TAttr (CurAttr.Node (AttrDef.Name)); - IF (Attr = NIL) AND (AttrDef.Value <> '') THEN BEGIN - Attr := TAttr.Create (AttrDef.Name, AttrDef.Value); - Attr.ValueType := vtDefault; - CurAttr.Add (Attr); - END; - IF Attr <> NIL THEN BEGIN - CASE AttrDef.DefaultType OF - adDefault : ; - adRequired : ; // -!- It is an error in the document if "Attr.Value" is an empty string - adImplied : Attr.ValueType := vtImplied; - adFixed : BEGIN - Attr.ValueType := vtFixed; - Attr.Value := AttrDef.Value; - END; - END; - Attr.AttrType := AttrDef.AttrType; - END; - END; - END; - - // --- Normalize Attribute Values. XmlSpec: - // - a character reference is processed by appending the referenced character to the attribute value - // - an entity reference is processed by recursively processing the replacement text of the entity - // - a whitespace character (#x20, #xD, #xA, #x9) is processed by appending #x20 to the normalized value, - // except that only a single #x20 is appended for a "#xD#xA" sequence that is part of an external - // parsed entity or the literal entity value of an internal parsed entity - // - other characters are processed by appending them to the normalized value - // If the declared value is not CDATA, then the XML processor must further process the - // normalized attribute value by discarding any leading and trailing space (#x20) characters, - // and by replacing sequences of space (#x20) characters by a single space (#x20) character. - // All attributes for which no declaration has been read should be treated by a - // non-validating parser as if declared CDATA. - // !!! The XML 1.0 SE specification is somewhat different here - // This code does not conform exactly to this specification - FOR I := 0 TO CurAttr.Count-1 DO - WITH TAttr (CurAttr [I]) DO BEGIN - ReplaceGeneralEntities (Value); - ReplaceCharacterEntities (Value); - IF (AttrType <> atCData) AND (AttrType <> atUnknown) - THEN Value := TranslateEncoding (TrimWs (ConvertWs (Value, TRUE))) - ELSE Value := TranslateEncoding (ConvertWs (Value, FALSE)); - END; - END; -END; - - -PROCEDURE TXmlParser.AnalyzeCData; - // Analyze CDATA Sections -BEGIN - CurPartType := ptCData; - CurFinal := StrPos (CurStart, CDEnd); - IF CurFinal = NIL THEN BEGIN - CurFinal := StrEnd (CurStart)-1; - CurContent := TranslateEncoding (StrPas (CurStart+Length (CDStart))); - END - ELSE BEGIN - SetStringSF (CurContent, CurStart+Length (CDStart), CurFinal-1); - INC (CurFinal, Length (CDEnd)-1); - CurContent := TranslateEncoding (CurContent); - END; -END; - - -PROCEDURE TXmlParser.AnalyzeText (VAR IsDone : BOOLEAN); - (* Analyzes Text Content between Tags. CurFinal will point to the last content character. - Content ends at a '<' character or at the end of the document. - Entity References and Character Entity references are resolved. - If PackSpaces is TRUE, contiguous Whitespace Characters will be compressed to - one Space #x20 character, Whitespace at the beginning and end of content will - be trimmed off and content which is or becomes empty is not returned to - the application (in this case, "IsDone" is set to FALSE which causes the - Scan method to proceed directly to the next part. *) - - PROCEDURE ProcessEntity; - (* Is called if there is an ampsersand '&' character found in the document. - IN "CurFinal" points to the ampersand - OUT "CurFinal" points to the first character after the semi-colon ';' *) - VAR - P : PChar; - Name : STRING; - EntityDef : TEntityDef; - ExternalEntity : TXmlParser; - BEGIN - P := StrScan (CurFinal , ';'); - IF P <> NIL THEN BEGIN - SetStringSF (Name, CurFinal+1, P-1); - - // Is it a Character Entity? - IF (CurFinal+1)^ = '#' THEN BEGIN - IF UpCase ((CurFinal+2)^) = 'X' // !!! Can't use "CHR" for Unicode characters > 255: - THEN CurContent := CurContent + CHR (StrToIntDef ('$'+Copy (Name, 3, MaxInt), 32)) - ELSE CurContent := CurContent + CHR (StrToIntDef (Copy (Name, 2, MaxInt), 32)); - CurFinal := P+1; - EXIT; - END - - // Is it a Predefined Entity? - ELSE IF Name = 'lt' THEN BEGIN CurContent := CurContent + '<'; CurFinal := P+1; EXIT; END - ELSE IF Name = 'gt' THEN BEGIN CurContent := CurContent + '>'; CurFinal := P+1; EXIT; END - ELSE IF Name = 'amp' THEN BEGIN CurContent := CurContent + '&'; CurFinal := P+1; EXIT; END - ELSE IF Name = 'apos' THEN BEGIN CurContent := CurContent + ''''; CurFinal := P+1; EXIT; END - ELSE IF Name = 'quot' THEN BEGIN CurContent := CurContent + '"'; CurFinal := P+1; EXIT; END; - - // Replace with Entity from DTD - EntityDef := TEntityDef (Entities.Node (Name)); - IF EntityDef <> NIL THEN BEGIN - IF EntityDef.Value <> '' THEN BEGIN - EntityStack.Push (P+1); - CurFinal := PChar (EntityDef.Value); - END - ELSE BEGIN - ExternalEntity := LoadExternalEntity (EntityDef.SystemId, EntityDef.PublicId, EntityDef.NotationName); - EntityStack.Push (ExternalEntity, P+1); - CurFinal := ExternalEntity.DocBuffer; - END; - END - ELSE BEGIN - CurContent := CurContent + Name; - CurFinal := P+1; - END; - END - ELSE BEGIN - INC (CurFinal); - END; - END; - -VAR - C : INTEGER; -BEGIN - CurFinal := CurStart; - CurPartType := ptContent; - CurContent := ''; - C := 0; - REPEAT - CASE CurFinal^ OF - '&' : BEGIN - CurContent := CurContent + TranslateEncoding (StrLPas (CurFinal-C, C)); - C := 0; - ProcessEntity; - CONTINUE; - END; - #0 : BEGIN - IF EntityStack.Count = 0 THEN - BREAK - ELSE BEGIN - CurContent := CurContent + TranslateEncoding (StrLPas (CurFinal-C, C)); - C := 0; - CurFinal := EntityStack.Pop; - CONTINUE; - END; - END; - '<' : BREAK; - ELSE INC (C); - END; - INC (CurFinal); - UNTIL FALSE; - CurContent := CurContent + TranslateEncoding (StrLPas (CurFinal-C, C)); - DEC (CurFinal); - - IF FNormalize THEN BEGIN - CurContent := ConvertWs (TrimWs (CurContent), TRUE); - IsDone := CurContent <> ''; // IsDone will only get FALSE if PackSpaces is TRUE - END; -END; - - -PROCEDURE TXmlParser.AnalyzeElementDecl (Start : PChar; VAR Final : PChar); - (* Parse ' character - XmlSpec 3.2: - elementdecl ::= '' - contentspec ::= 'EMPTY' | 'ANY' | Mixed | children - Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | - '(' S? '#PCDATA' S? ')' - children ::= (choice | seq) ('?' | '*' | '+')? - choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' - cp ::= (Name | choice | seq) ('?' | '*' | '+')? - seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' - - More simply: - contentspec ::= EMPTY - ANY - '(#PCDATA)' - '(#PCDATA | A | B)*' - '(A, B, C)' - '(A | B | C)' - '(A?, B*, C+), - '(A, (B | C | D)* )' *) -VAR - Element : TElemDef; - Elem2 : TElemDef; - F : PChar; - DER : TDtdElementRec; -BEGIN - Element := TElemDef.Create; - Final := Start + 9; - DER.Start := Start; - REPEAT - IF Final^ = '>' THEN BREAK; - IF (Final^ IN CNameStart) AND (Element.Name = '') THEN BEGIN - ExtractName (Final, CWhitespace, F); - SetStringSF (Element.Name, Final, F); - Final := F; - F := StrScan (Final+1, '>'); - IF F = NIL THEN BEGIN - Element.Definition := STRING (Final); - Final := StrEnd (Final); - BREAK; - END - ELSE BEGIN - SetStringSF (Element.Definition, Final+1, F-1); - Final := F; - BREAK; - END; - END; - INC (Final); - UNTIL FALSE; - Element.Definition := DelChars (Element.Definition, CWhitespace); - ReplaceParameterEntities (Element.Definition); - IF Element.Definition = 'EMPTY' THEN Element.ElemType := etEmpty - ELSE IF Element.Definition = 'ANY' THEN Element.ElemType := etAny - ELSE IF Copy (Element.Definition, 1, 8) = '(#PCDATA' THEN Element.ElemType := etMixed - ELSE IF Copy (Element.Definition, 1, 1) = '(' THEN Element.ElemType := etChildren - ELSE Element.ElemType := etAny; - - Elem2 := Elements.Node (Element.Name); - IF Elem2 <> NIL THEN - Elements.Delete (Elements.IndexOf (Elem2)); - Elements.Add (Element); - Final := StrScanE (Final, '>'); - DER.ElementType := deElement; - DER.ElemDef := Element; - DER.Final := Final; - DtdElementFound (DER); -END; - - -PROCEDURE TXmlParser.AnalyzeAttListDecl (Start : PChar; VAR Final : PChar); - (* Parse ' character - XmlSpec 3.3: - AttlistDecl ::= '' - AttDef ::= S Name S AttType S DefaultDecl - AttType ::= StringType | TokenizedType | EnumeratedType - StringType ::= 'CDATA' - TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' - EnumeratedType ::= NotationType | Enumeration - NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' - Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' - DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) - AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" - Examples: - *) -TYPE - TPhase = (phElementName, phName, phType, phNotationContent, phDefault); -VAR - Phase : TPhase; - F : PChar; - ElementName : STRING; - ElemDef : TElemDef; - AttrDef : TAttrDef; - AttrDef2 : TAttrDef; - Strg : STRING; - DER : TDtdElementRec; -BEGIN - Final := Start + 9; // The character after ' : BREAK; - ELSE CASE Phase OF - phElementName : BEGIN - ExtractName (Final, CWhitespace + CQuoteChar + ['#'], F); - SetStringSF (ElementName, Final, F); - Final := F; - ElemDef := Elements.Node (ElementName); - IF ElemDef = NIL THEN BEGIN - ElemDef := TElemDef.Create; - ElemDef.Name := ElementName; - ElemDef.Definition := 'ANY'; - ElemDef.ElemType := etAny; - Elements.Add (ElemDef); - END; - Phase := phName; - END; - phName : BEGIN - AttrDef := TAttrDef.Create; - ExtractName (Final, CWhitespace + CQuoteChar + ['#'], F); - SetStringSF (AttrDef.Name, Final, F); - Final := F; - AttrDef2 := TAttrDef (ElemDef.Node (AttrDef.Name)); - IF AttrDef2 <> NIL THEN - ElemDef.Delete (ElemDef.IndexOf (AttrDef2)); - ElemDef.Add (AttrDef); - Phase := phType; - END; - phType : BEGIN - IF Final^ = '(' THEN BEGIN - F := StrScan (Final+1, ')'); - IF F <> NIL - THEN SetStringSF (AttrDef.TypeDef, Final+1, F-1) - ELSE AttrDef.TypeDef := STRING (Final+1); - AttrDef.TypeDef := DelChars (AttrDef.TypeDef, CWhitespace); - AttrDef.AttrType := atEnumeration; - ReplaceParameterEntities (AttrDef.TypeDef); - ReplaceCharacterEntities (AttrDef.TypeDef); - Phase := phDefault; - END - ELSE IF StrLComp (Final, 'NOTATION', 8) = 0 THEN BEGIN - INC (Final, 8); - AttrDef.AttrType := atNotation; - Phase := phNotationContent; - END - ELSE BEGIN - ExtractName (Final, CWhitespace+CQuoteChar+['#'], F); - SetStringSF (AttrDef.TypeDef, Final, F); - IF AttrDef.TypeDef = 'CDATA' THEN AttrDef.AttrType := atCData - ELSE IF AttrDef.TypeDef = 'ID' THEN AttrDef.AttrType := atId - ELSE IF AttrDef.TypeDef = 'IDREF' THEN AttrDef.AttrType := atIdRef - ELSE IF AttrDef.TypeDef = 'IDREFS' THEN AttrDef.AttrType := atIdRefs - ELSE IF AttrDef.TypeDef = 'ENTITY' THEN AttrDef.AttrType := atEntity - ELSE IF AttrDef.TypeDef = 'ENTITIES' THEN AttrDef.AttrType := atEntities - ELSE IF AttrDef.TypeDef = 'NMTOKEN' THEN AttrDef.AttrType := atNmToken - ELSE IF AttrDef.TypeDef = 'NMTOKENS' THEN AttrDef.AttrType := atNmTokens; - Phase := phDefault; - END - END; - phNotationContent : BEGIN - F := StrScan (Final, ')'); - IF F <> NIL THEN - SetStringSF (AttrDef.Notations, Final+1, F-1) - ELSE BEGIN - AttrDef.Notations := STRING (Final+1); - Final := StrEnd (Final); - END; - ReplaceParameterEntities (AttrDef.Notations); - AttrDef.Notations := DelChars (AttrDef.Notations, CWhitespace); - Phase := phDefault; - END; - phDefault : BEGIN - IF Final^ = '#' THEN BEGIN - ExtractName (Final, CWhiteSpace + CQuoteChar, F); - SetStringSF (Strg, Final, F); - Final := F; - ReplaceParameterEntities (Strg); - IF Strg = '#REQUIRED' THEN BEGIN AttrDef.DefaultType := adRequired; Phase := phName; END - ELSE IF Strg = '#IMPLIED' THEN BEGIN AttrDef.DefaultType := adImplied; Phase := phName; END - ELSE IF Strg = '#FIXED' THEN AttrDef.DefaultType := adFixed; - END - ELSE IF (Final^ IN CQuoteChar) THEN BEGIN - ExtractQuote (Final, AttrDef.Value, Final); - ReplaceParameterEntities (AttrDef.Value); - ReplaceCharacterEntities (AttrDef.Value); - Phase := phName; - END; - IF Phase = phName THEN BEGIN - AttrDef := NIL; - END; - END; - - END; - END; - INC (Final); - UNTIL FALSE; - - Final := StrScan (Final, '>'); - - DER.ElementType := deAttList; - DER.ElemDef := ElemDef; - DER.Final := Final; - DtdElementFound (DER); -END; - - -PROCEDURE TXmlParser.AnalyzeEntityDecl (Start : PChar; VAR Final : PChar); - (* Parse ' character - XmlSpec 4.2: - EntityDecl ::= '' | - '' - EntityDef ::= EntityValue | (ExternalID NDataDecl?) - PEDef ::= EntityValue | ExternalID - NDataDecl ::= S 'NDATA' S Name - EntityValue ::= '"' ([^%&"] | PEReference | EntityRef | CharRef)* '"' | - "'" ([^%&'] | PEReference | EntityRef | CharRef)* "'" - PEReference ::= '%' Name ';' - - Examples - - - - "> - - - Dies ist ein Test-Absatz

"> - *) -TYPE - TPhase = (phName, phContent, phNData, phNotationName, phFinalGT); -VAR - Phase : TPhase; - IsParamEntity : BOOLEAN; - F : PChar; - ExternalID : TExternalID; - EntityDef : TEntityDef; - EntityDef2 : TEntityDef; - DER : TDtdElementRec; -BEGIN - Final := Start + 8; // First char after ' : BREAK; - ELSE CASE Phase OF - phName : IF Final^ IN CNameStart THEN BEGIN - ExtractName (Final, CWhitespace + CQuoteChar, F); - SetStringSF (EntityDef.Name, Final, F); - Final := F; - Phase := phContent; - END; - phContent : IF Final^ IN CQuoteChar THEN BEGIN - ExtractQuote (Final, EntityDef.Value, Final); - Phase := phFinalGT; - END - ELSE IF (StrLComp (Final, 'SYSTEM', 6) = 0) OR - (StrLComp (Final, 'PUBLIC', 6) = 0) THEN BEGIN - ExternalID := TExternalID.Create (Final); - EntityDef.SystemId := ExternalID.SystemId; - EntityDef.PublicId := ExternalID.PublicId; - Final := ExternalID.Final; - Phase := phNData; - ExternalID.Free; - END; - phNData : IF StrLComp (Final, 'NDATA', 5) = 0 THEN BEGIN - INC (Final, 4); - Phase := phNotationName; - END; - phNotationName : IF Final^ IN CNameStart THEN BEGIN - ExtractName (Final, CWhitespace + ['>'], F); - SetStringSF (EntityDef.NotationName, Final, F); - Final := F; - Phase := phFinalGT; - END; - phFinalGT : ; // -!- There is an error in the document if this branch is called - END; - END; - INC (Final); - UNTIL FALSE; - IF IsParamEntity THEN BEGIN - EntityDef2 := TEntityDef (ParEntities.Node (EntityDef.Name)); - IF EntityDef2 <> NIL THEN - ParEntities.Delete (ParEntities.IndexOf (EntityDef2)); - ParEntities.Add (EntityDef); - ReplaceCharacterEntities (EntityDef.Value); - END - ELSE BEGIN - EntityDef2 := TEntityDef (Entities.Node (EntityDef.Name)); - IF EntityDef2 <> NIL THEN - Entities.Delete (Entities.IndexOf (EntityDef2)); - Entities.Add (EntityDef); - ReplaceParameterEntities (EntityDef.Value); // Create replacement texts (see XmlSpec 4.5) - ReplaceCharacterEntities (EntityDef.Value); - END; - Final := StrScanE (Final, '>'); - - DER.ElementType := deEntity; - DER.EntityDef := EntityDef; - DER.Final := Final; - DtdElementFound (DER); -END; - - -PROCEDURE TXmlParser.AnalyzeNotationDecl (Start : PChar; VAR Final : PChar); - // Parse ' character - // XmlSpec 4.7: NotationDecl ::= '' -TYPE - TPhase = (phName, phExtId, phEnd); -VAR - ExternalID : TExternalID; - Phase : TPhase; - F : PChar; - NotationDef : TNotationDef; - DER : TDtdElementRec; -BEGIN - Final := Start + 10; // Character after ', - #0 : BREAK; - ELSE CASE Phase OF - phName : BEGIN - ExtractName (Final, CWhitespace + ['>'], F); - SetStringSF (NotationDef.Name, Final, F); - Final := F; - Phase := phExtId; - END; - phExtId : BEGIN - ExternalID := TExternalID.Create (Final); - NotationDef.Value := ExternalID.SystemId; - NotationDef.PublicId := ExternalID.PublicId; - Final := ExternalId.Final; - ExternalId.Free; - Phase := phEnd; - END; - phEnd : ; // -!- There is an error in the document if this branch is called - END; - END; - INC (Final); - UNTIL FALSE; - Notations.Add (NotationDef); - Final := StrScanE (Final, '>'); - - DER.ElementType := deNotation; - DER.NotationDef := NotationDef; - DER.Final := Final; - DtdElementFound (DER); -END; - - -PROCEDURE TXmlParser.PushPE (VAR Start : PChar); - (* If there is a parameter entity reference found in the data stream, - the current position will be pushed to the entity stack. - Start: IN Pointer to the '%' character starting the PE reference - OUT Pointer to first character of PE replacement text *) -VAR - P : PChar; - EntityDef : TEntityDef; -BEGIN - P := StrScan (Start, ';'); - IF P <> NIL THEN BEGIN - EntityDef := TEntityDef (ParEntities.Node (StrSFPas (Start+1, P-1))); - IF EntityDef <> NIL THEN BEGIN - EntityStack.Push (P+1); - Start := PChar (EntityDef.Value); - END - ELSE - Start := P+1; - END; -END; - - -PROCEDURE TXmlParser.ReplaceCharacterEntities (VAR Str : STRING); - // Replaces all Character Entity References in the String -VAR - Start : INTEGER; - PAmp : PChar; - PSemi : PChar; - PosAmp : INTEGER; - Len : INTEGER; // Length of Entity Reference -BEGIN - IF Str = '' THEN EXIT; - Start := 1; - REPEAT - PAmp := StrPos (PChar (Str) + Start-1, '&#'); - IF PAmp = NIL THEN BREAK; - PSemi := StrScan (PAmp+2, ';'); - IF PSemi = NIL THEN BREAK; - PosAmp := PAmp - PChar (Str) + 1; - Len := PSemi-PAmp+1; - IF CompareText (Str [PosAmp+2], 'x') = 0 // !!! Can't use "CHR" for Unicode characters > 255 - THEN Str [PosAmp] := CHR (StrToIntDef ('$'+Copy (Str, PosAmp+3, Len-4), 0)) - ELSE Str [PosAmp] := CHR (StrToIntDef (Copy (Str, PosAmp+2, Len-3), 32)); - Delete (Str, PosAmp+1, Len-1); - Start := PosAmp + 1; - UNTIL FALSE; -END; - - -PROCEDURE TXmlParser.ReplaceParameterEntities (VAR Str : STRING); - // Recursively replaces all Parameter Entity References in the String - PROCEDURE ReplaceEntities (VAR Str : STRING); - VAR - Start : INTEGER; - PAmp : PChar; - PSemi : PChar; - PosAmp : INTEGER; - Len : INTEGER; - Entity : TEntityDef; - Repl : STRING; // Replacement - BEGIN - IF Str = '' THEN EXIT; - Start := 1; - REPEAT - PAmp := StrPos (PChar (Str)+Start-1, '%'); - IF PAmp = NIL THEN BREAK; - PSemi := StrScan (PAmp+2, ';'); - IF PSemi = NIL THEN BREAK; - PosAmp := PAmp - PChar (Str) + 1; - Len := PSemi-PAmp+1; - Entity := TEntityDef (ParEntities.Node (Copy (Str, PosAmp+1, Len-2))); - IF Entity <> NIL THEN BEGIN - Repl := Entity.Value; - ReplaceEntities (Repl); // Recursion - END - ELSE - Repl := Copy (Str, PosAmp, Len); - Delete (Str, PosAmp, Len); - Insert (Repl, Str, PosAmp); - Start := PosAmp + Length (Repl); - UNTIL FALSE; - END; -BEGIN - ReplaceEntities (Str); -END; - - -PROCEDURE TXmlParser.ReplaceGeneralEntities (VAR Str : STRING); - // Recursively replaces General Entity References in the String - PROCEDURE ReplaceEntities (VAR Str : STRING); - VAR - Start : INTEGER; - PAmp : PChar; - PSemi : PChar; - PosAmp : INTEGER; - Len : INTEGER; - EntityDef : TEntityDef; - EntName : STRING; - Repl : STRING; // Replacement - ExternalEntity : TXmlParser; - BEGIN - IF Str = '' THEN EXIT; - Start := 1; - REPEAT - PAmp := StrPos (PChar (Str)+Start-1, '&'); - IF PAmp = NIL THEN BREAK; - PSemi := StrScan (PAmp+2, ';'); - IF PSemi = NIL THEN BREAK; - PosAmp := PAmp - PChar (Str) + 1; - Len := PSemi-PAmp+1; - EntName := Copy (Str, PosAmp+1, Len-2); - IF EntName = 'lt' THEN Repl := '<' - ELSE IF EntName = 'gt' THEN Repl := '>' - ELSE IF EntName = 'amp' THEN Repl := '&' - ELSE IF EntName = 'apos' THEN Repl := '''' - ELSE IF EntName = 'quot' THEN Repl := '"' - ELSE BEGIN - EntityDef := TEntityDef (Entities.Node (EntName)); - IF EntityDef <> NIL THEN BEGIN - IF EntityDef.Value <> '' THEN // Internal Entity - Repl := EntityDef.Value - ELSE BEGIN // External Entity - ExternalEntity := LoadExternalEntity (EntityDef.SystemId, EntityDef.PublicId, EntityDef.NotationName); - Repl := StrPas (ExternalEntity.DocBuffer); // !!! What if it contains a Text Declaration? - ExternalEntity.Free; - END; - ReplaceEntities (Repl); // Recursion - END - ELSE - Repl := Copy (Str, PosAmp, Len); - END; - Delete (Str, PosAmp, Len); - Insert (Repl, Str, PosAmp); - Start := PosAmp + Length (Repl); - UNTIL FALSE; - END; -BEGIN - ReplaceEntities (Str); -END; - - -FUNCTION TXmlParser.LoadExternalEntity (SystemId, PublicId, Notation : STRING) : TXmlParser; - // This will be called whenever there is a Parsed External Entity or - // the DTD External Subset to be parsed. - // It has to create a TXmlParser instance and load the desired Entity. - // This instance of LoadExternalEntity assumes that "SystemId" is a valid - // file name (relative to the Document source) and loads this file using - // the LoadFromFile method. -VAR - Filename : STRING; -BEGIN - // --- Convert System ID to complete filename - Filename := StringReplace (SystemId, '/', '\', [rfReplaceAll]); - IF Copy (FSource, 1, 1) <> '<' THEN - IF (Copy (Filename, 1, 2) = '\\') OR (Copy (Filename, 2, 1) = ':') THEN - // Already has an absolute Path - ELSE BEGIN - Filename := ExtractFilePath (FSource) + Filename; - END; - - // --- Load the File - Result := TXmlParser.Create; - Result.LoadFromFile (Filename); -END; - - -FUNCTION TXmlParser.TranslateEncoding (CONST Source : STRING) : STRING; - // The member variable "CurEncoding" always holds the name of the current - // encoding, e.g. 'UTF-8' or 'ISO-8859-1'. - // This virtual method "TranslateEncoding" is responsible for translating - // the content passed in the "Source" parameter to the Encoding which - // is expected by the application. - // This instance of "TranlateEncoding" assumes that the Application expects - // Windows ANSI (Win1252) strings. It is able to transform UTF-8 or ISO-8859-1 - // encodings. - // If you want your application to understand or create other encodings, you - // override this function. -BEGIN - IF CurEncoding = 'UTF-8' - THEN Result := Utf8ToAnsi (Source) - ELSE Result := Source; -END; - - -PROCEDURE TXmlParser.DtdElementFound (DtdElementRec : TDtdElementRec); - // This method is called for every element which is found in the DTD - // declaration. The variant record TDtdElementRec is passed which - // holds informations about the element. - // You can override this function to handle DTD declarations. - // Note that when you parse the same Document instance a second time, - // the DTD will not get parsed again. -BEGIN -END; - - -FUNCTION TXmlParser.GetDocBuffer: PChar; - // Returns FBuffer or a pointer to a NUL char if Buffer is empty -BEGIN - IF FBuffer = NIL - THEN Result := #0 - ELSE Result := FBuffer; -END; - - -(*$IFNDEF HAS_CONTNRS_UNIT -=============================================================================================== -TObjectList -=============================================================================================== -*) - -DESTRUCTOR TObjectList.Destroy; -BEGIN - Clear; - SetCapacity(0); - INHERITED Destroy; -END; - - -PROCEDURE TObjectList.Delete (Index : INTEGER); -BEGIN - IF (Index < 0) OR (Index >= Count) THEN EXIT; - TObject (Items [Index]).Free; - INHERITED Delete (Index); -END; - - -PROCEDURE TObjectList.Clear; -BEGIN - WHILE Count > 0 DO - Delete (Count-1); -END; - -(*$ENDIF *) - -(* -=============================================================================================== -TNvpNode --------- -Node base class for the TNvpList -=============================================================================================== -*) - -CONSTRUCTOR TNvpNode.Create (TheName, TheValue : STRING); -BEGIN - INHERITED Create; - Name := TheName; - Value := TheValue; -END; - - -(* -=============================================================================================== -TNvpList --------- -A generic List of Name-Value Pairs, based on the TObjectList introduced in Delphi 5 -=============================================================================================== -*) - -PROCEDURE TNvpList.Add (Node : TNvpNode); -VAR - I : INTEGER; -BEGIN - FOR I := Count-1 DOWNTO 0 DO - IF Node.Name > TNvpNode (Items [I]).Name THEN BEGIN - Insert (I+1, Node); - EXIT; - END; - Insert (0, Node); -END; - - - -FUNCTION TNvpList.Node (Name : STRING) : TNvpNode; - // Binary search for Node -VAR - L, H : INTEGER; // Low, High Limit - T, C : INTEGER; // Test Index, Comparison result - Last : INTEGER; // Last Test Index -BEGIN - IF Count=0 THEN BEGIN - Result := NIL; - EXIT; - END; - - L := 0; - H := Count; - Last := -1; - REPEAT - T := (L+H) DIV 2; - IF T=Last THEN BREAK; - Result := TNvpNode (Items [T]); - C := CompareStr (Result.Name, Name); - IF C = 0 THEN EXIT - ELSE IF C < 0 THEN L := T - ELSE H := T; - Last := T; - UNTIL FALSE; - Result := NIL; -END; - - -FUNCTION TNvpList.Node (Index : INTEGER) : TNvpNode; -BEGIN - IF (Index < 0) OR (Index >= Count) - THEN Result := NIL - ELSE Result := TNvpNode (Items [Index]); -END; - - -FUNCTION TNvpList.Value (Name : STRING) : STRING; -VAR - Nvp : TNvpNode; -BEGIN - Nvp := TNvpNode (Node (Name)); - IF Nvp <> NIL - THEN Result := Nvp.Value - ELSE Result := ''; -END; - - -FUNCTION TNvpList.Value (Index : INTEGER) : STRING; -BEGIN - IF (Index < 0) OR (Index >= Count) - THEN Result := '' - ELSE Result := TNvpNode (Items [Index]).Value; -END; - - -FUNCTION TNvpList.Name (Index : INTEGER) : STRING; -BEGIN - IF (Index < 0) OR (Index >= Count) - THEN Result := '' - ELSE Result := TNvpNode (Items [Index]).Name; -END; - - -(* -=============================================================================================== -TAttrList -List of Attributes. The "Analyze" method extracts the Attributes from the given Buffer. -Is used for extraction of Attributes in Start-Tags, Empty-Element Tags and the "pseudo" -attributes in XML Prologs, Text Declarations and PIs. -=============================================================================================== -*) - -PROCEDURE TAttrList.Analyze (Start : PChar; VAR Final : PChar); - // Analyze the Buffer for Attribute=Name pairs. - // Terminates when there is a character which is not IN CNameStart - // (e.g. '?>' or '>' or '/>') -TYPE - TPhase = (phName, phEq, phValue); -VAR - Phase : TPhase; - F : PChar; - Name : STRING; - Value : STRING; - Attr : TAttr; -BEGIN - Clear; - Phase := phName; - Final := Start; - REPEAT - IF (Final^ = #0) OR (Final^ = '>') THEN BREAK; - IF NOT (Final^ IN CWhitespace) THEN - CASE Phase OF - phName : BEGIN - IF NOT (Final^ IN CNameStart) THEN EXIT; - ExtractName (Final, CWhitespace + ['=', '/'], F); - SetStringSF (Name, Final, F); - Final := F; - Phase := phEq; - END; - phEq : BEGIN - IF Final^ = '=' THEN - Phase := phValue - END; - phValue : BEGIN - IF Final^ IN CQuoteChar THEN BEGIN - ExtractQuote (Final, Value, F); - Attr := TAttr.Create; - Attr.Name := Name; - Attr.Value := Value; - Attr.ValueType := vtNormal; - Add (Attr); - Final := F; - Phase := phName; - END; - END; - END; - INC (Final); - UNTIL FALSE; -END; - - -(* -=============================================================================================== -TElemList -List of TElemDef nodes. -=============================================================================================== -*) - -FUNCTION TElemList.Node (Name : STRING) : TElemDef; - // Binary search for the Node with the given Name -VAR - L, H : INTEGER; // Low, High Limit - T, C : INTEGER; // Test Index, Comparison result - Last : INTEGER; // Last Test Index -BEGIN - IF Count=0 THEN BEGIN - Result := NIL; - EXIT; - END; - - L := 0; - H := Count; - Last := -1; - REPEAT - T := (L+H) DIV 2; - IF T=Last THEN BREAK; - Result := TElemDef (Items [T]); - C := CompareStr (Result.Name, Name); - IF C = 0 THEN EXIT - ELSE IF C < 0 THEN L := T - ELSE H := T; - Last := T; - UNTIL FALSE; - Result := NIL; -END; - - -PROCEDURE TElemList.Add (Node : TElemDef); -VAR - I : INTEGER; -BEGIN - FOR I := Count-1 DOWNTO 0 DO - IF Node.Name > TElemDef (Items [I]).Name THEN BEGIN - Insert (I+1, Node); - EXIT; - END; - Insert (0, Node); -END; - - -(* -=============================================================================================== -TScannerXmlParser -A TXmlParser descendant for the TCustomXmlScanner component -=============================================================================================== -*) - -TYPE - TScannerXmlParser = CLASS (TXmlParser) - Scanner : TCustomXmlScanner; - CONSTRUCTOR Create (TheScanner : TCustomXmlScanner); - FUNCTION LoadExternalEntity (SystemId, PublicId, - Notation : STRING) : TXmlParser; OVERRIDE; - FUNCTION TranslateEncoding (CONST Source : STRING) : STRING; OVERRIDE; - PROCEDURE DtdElementFound (DtdElementRec : TDtdElementRec); OVERRIDE; - END; - -CONSTRUCTOR TScannerXmlParser.Create (TheScanner : TCustomXmlScanner); -BEGIN - INHERITED Create; - Scanner := TheScanner; -END; - - -FUNCTION TScannerXmlParser.LoadExternalEntity (SystemId, PublicId, Notation : STRING) : TXmlParser; -BEGIN - IF Assigned (Scanner.FOnLoadExternal) - THEN Scanner.FOnLoadExternal (Scanner, SystemId, PublicId, Notation, Result) - ELSE Result := INHERITED LoadExternalEntity (SystemId, PublicId, Notation); -END; - - -FUNCTION TScannerXmlParser.TranslateEncoding (CONST Source : STRING) : STRING; -BEGIN - IF Assigned (Scanner.FOnTranslateEncoding) - THEN Result := Scanner.FOnTranslateEncoding (Scanner, CurEncoding, Source) - ELSE Result := INHERITED TranslateEncoding (Source); -END; - - -PROCEDURE TScannerXmlParser.DtdElementFound (DtdElementRec : TDtdElementRec); -BEGIN - WITH DtdElementRec DO - CASE ElementType OF - deElement : Scanner.WhenElement (ElemDef); - deAttList : Scanner.WhenAttList (ElemDef); - deEntity : Scanner.WhenEntity (EntityDef); - deNotation : Scanner.WhenNotation (NotationDef); - dePI : Scanner.WhenPI (STRING (Target), STRING (Content), AttrList); - deComment : Scanner.WhenComment (StrSFPas (Start, Final)); - deError : Scanner.WhenDtdError (Pos); - END; -END; - - -(* -=============================================================================================== -TCustomXmlScanner -=============================================================================================== -*) - -CONSTRUCTOR TCustomXmlScanner.Create (AOwner: TComponent); -BEGIN - INHERITED; - FXmlParser := TScannerXmlParser.Create (Self); -END; - - -DESTRUCTOR TCustomXmlScanner.Destroy; -BEGIN - FXmlParser.Free; - INHERITED; -END; - - -PROCEDURE TCustomXmlScanner.LoadFromFile (Filename : TFilename); - // Load XML Document from file -BEGIN - FXmlParser.LoadFromFile (Filename); -END; - - -PROCEDURE TCustomXmlScanner.LoadFromBuffer (Buffer : PChar); - // Load XML Document from buffer -BEGIN - FXmlParser.LoadFromBuffer (Buffer); -END; - - -PROCEDURE TCustomXmlScanner.SetBuffer (Buffer : PChar); - // Refer to Buffer -BEGIN - FXmlParser.SetBuffer (Buffer); -END; - - -FUNCTION TCustomXmlScanner.GetFilename : TFilename; -BEGIN - Result := FXmlParser.Source; -END; - - -FUNCTION TCustomXmlScanner.GetNormalize : BOOLEAN; -BEGIN - Result := FXmlParser.Normalize; -END; - - -PROCEDURE TCustomXmlScanner.SetNormalize (Value : BOOLEAN); -BEGIN - FXmlParser.Normalize := Value; -END; - - -PROCEDURE TCustomXmlScanner.WhenXmlProlog(XmlVersion, Encoding: STRING; Standalone : BOOLEAN); - // Is called when the parser has parsed the declaration of the prolog -BEGIN - IF Assigned (FOnXmlProlog) THEN FOnXmlProlog (Self, XmlVersion, Encoding, Standalone); -END; - - -PROCEDURE TCustomXmlScanner.WhenComment (Comment : STRING); - // Is called when the parser has parsed a -BEGIN - IF Assigned (FOnComment) THEN FOnComment (Self, Comment); -END; - - -PROCEDURE TCustomXmlScanner.WhenPI (Target, Content: STRING; Attributes : TAttrList); - // Is called when the parser has parsed a -BEGIN - IF Assigned (FOnPI) THEN FOnPI (Self, Target, Content, Attributes); -END; - - -PROCEDURE TCustomXmlScanner.WhenDtdRead (RootElementName : STRING); - // Is called when the parser has completely parsed the DTD -BEGIN - IF Assigned (FOnDtdRead) THEN FOnDtdRead (Self, RootElementName); -END; - - -PROCEDURE TCustomXmlScanner.WhenStartTag (TagName : STRING; Attributes : TAttrList); - // Is called when the parser has parsed a start tag like

-BEGIN - IF Assigned (FOnStartTag) THEN FOnStartTag (Self, TagName, Attributes); -END; - - -PROCEDURE TCustomXmlScanner.WhenEmptyTag (TagName : STRING; Attributes : TAttrList); - // Is called when the parser has parsed an Empty Element Tag like
-BEGIN - IF Assigned (FOnEmptyTag) THEN FOnEmptyTag (Self, TagName, Attributes); -END; - - -PROCEDURE TCustomXmlScanner.WhenEndTag (TagName : STRING); - // Is called when the parser has parsed an End Tag like

-BEGIN - IF Assigned (FOnEndTag) THEN FOnEndTag (Self, TagName); -END; - - -PROCEDURE TCustomXmlScanner.WhenContent (Content : STRING); - // Is called when the parser has parsed an element's text content -BEGIN - IF Assigned (FOnContent) THEN FOnContent (Self, Content); -END; - - -PROCEDURE TCustomXmlScanner.WhenCData (Content : STRING); - // Is called when the parser has parsed a CDATA section -BEGIN - IF Assigned (FOnCData) THEN FOnCData (Self, Content); -END; - - -PROCEDURE TCustomXmlScanner.WhenElement (ElemDef : TElemDef); - // Is called when the parser has parsed an definition - // inside the DTD -BEGIN - IF Assigned (FOnElement) THEN FOnElement (Self, ElemDef); -END; - - -PROCEDURE TCustomXmlScanner.WhenAttList (ElemDef : TElemDef); - // Is called when the parser has parsed an definition - // inside the DTD -BEGIN - IF Assigned (FOnAttList) THEN FOnAttList (Self, ElemDef); -END; - - -PROCEDURE TCustomXmlScanner.WhenEntity (EntityDef : TEntityDef); - // Is called when the parser has parsed an definition - // inside the DTD -BEGIN - IF Assigned (FOnEntity) THEN FOnEntity (Self, EntityDef); -END; - - -PROCEDURE TCustomXmlScanner.WhenNotation (NotationDef : TNotationDef); - // Is called when the parser has parsed a definition - // inside the DTD -BEGIN - IF Assigned (FOnNotation) THEN FOnNotation (Self, NotationDef); -END; - - -PROCEDURE TCustomXmlScanner.WhenDtdError (ErrorPos : PChar); - // Is called when the parser has found an Error in the DTD -BEGIN - IF Assigned (FOnDtdError) THEN FOnDtdError (Self, ErrorPos); -END; - - -PROCEDURE TCustomXmlScanner.Execute; - // Perform scanning - // Scanning is done synchronously, i.e. you can expect events to be triggered - // in the order of the XML data stream. Execute will finish when the whole XML - // document has been scanned or when the StopParser property has been set to TRUE. -BEGIN - FStopParser := FALSE; - FXmlParser.StartScan; - WHILE FXmlParser.Scan AND (NOT FStopParser) DO - CASE FXmlParser.CurPartType OF - ptNone : ; - ptXmlProlog : WhenXmlProlog (FXmlParser.XmlVersion, FXmlParser.Encoding, FXmlParser.Standalone); - ptComment : WhenComment (StrSFPas (FXmlParser.CurStart, FXmlParser.CurFinal)); - ptPI : WhenPI (FXmlParser.CurName, FXmlParser.CurContent, FXmlParser.CurAttr); - ptDtdc : WhenDtdRead (FXmlParser.RootName); - ptStartTag : WhenStartTag (FXmlParser.CurName, FXmlParser.CurAttr); - ptEmptyTag : WhenEmptyTag (FXmlParser.CurName, FXmlParser.CurAttr); - ptEndTag : WhenEndTag (FXmlParser.CurName); - ptContent : WhenContent (FXmlParser.CurContent); - ptCData : WhenCData (FXmlParser.CurContent); - END; -END; - - -END. -- cgit v1.2.3