首页  编辑  

XML解释器

Tags: /超级猛料/User.自定义类、函数单元/   Date Created:
parse an XML file?
Author: Yurii Zhukow  
(*
Here I will show one way to parse XML document
The main concept of XML is using containers for XML objects -
so we will use Tree concept while building our XML object from XML document.
XML text uses containers (<TAG ... >...</TAG>) or simple definitions (<TAG ... />)
in each TAG we can use parameters (<TAG key1="value1" key2="value2">... <SIMPLE key3="value3"> ...</TAG>)
Finally we will have an array of objects, describing XML tags. Every object of this
class will have an array of children if needed, and a hash to describe properties of it.
For example if we have a text
<UL name="xxx">
 <LI name="xxx1"/>
 <LI name="xxx2"/>
 <LI name="xxx3"/>
 <LI name="xxx4"/>
</UL>
we will have one root object (named "UL") in wich we will have 4 children
(named "LI" with different sets of properties - from "NAME"="xxx1" to "NAME"="xxx4")
This is not a trivial task - so we will make a unit to solve this...
I will try to comment some here...
if you have any comments for this unit - write to me: sunworx@mail.ru; yz@infoteh.ru
*)
unit YZXMLParser;

interface

uses
 SysUtils, ComCtrls;

type  
 THashElement = record
   Key, Value: string;
 end;

type  
 THashElementArr = array of THashElement;
 // here  we declare a THash class to use in our parser
 // The concept of THash is to retreive named values from an array
 // Hash is an array where index is a string (example V[Key]=value,
 // whehe Key and Value are of type string)
 // The main purpose of this class is to rerurn a value of a String-named key
 //(example: s:=hash['someValue'])
 // the description of a hash element we use
type  
 THash = class(TObject)
 private
   Arr: THashElementArr;
   function GetValue(Key: string): string;
   procedure SetValue(Key: string; const VValue: string);
   function GetKeys: StrArr;
   function GetValues: StrArr;
   function GetCount: Integer;
   function Getempty: Boolean;
 public
   property Value[Key: string]: string read GetValue write SetValue; default;
   property Values: StrArr read GetValues;
   property Keys: StrArr read GetKeys;
   property Count: Integer read GetCount;
   property Empty: Boolean read Getempty;
   procedure Clear;
   constructor Create;
   destructor Destroy; override;
 end;
 TYZHash = THash;

type
 // Here we declare some definitions for our parser to know what
 // identifier we would receive next in our text
 // these  values will be used in the result of WhatNext() function which will scan text for keys
 TYZXMLMarker = (xmlOpenTag, xmlCloseTagShort, xmlCloseTag, xmlCloseTagLong,
   xmlEOF, xmlIdentifier, xmlunknown);
  {
   Because we use recursive definition of our class(as TreeView, where we declare children of
   the same type in opur type  declaration) we must use forward declaration
  }
 // The definition of a TAG class
 TYZXMLTag  = class;
 TYZXMLTags = array of TYZXMLTag;
 TYZXMLTag = class(TObject)
 private
   FData: TYZHash;
   FParent: TYZXMLTag;
   FName: string;
   function GetValue(AName: string): string;
   procedure SetName(const Value: string);
   procedure SetValue(AName: string; const Value: string);
   function GetCount: Integer;
   function GetValueNames: strarr;
 public
   Children: TYZXMLTags; // these are our child nodes
   Text: string;
   property Name: string read FName write SetName; // name of a tag
   property Values[AName: string]: string read GetValue write SetValue; default; // values of properties of a tag (hash values)
   property ValueNames: strarr read GetValueNames; // array of strings returniong names of all props of this tag
   property Count: Integer read GetCount; // a count of children of a tag (if this tag is a container)
   function SkipSpaces(var AData: string; var APos: Integer; RememberBreaks: Boolean = False): Char;
   // internal. for skip spaces (also CR or LF or other non-text chars) while parsing text
   function ParseValue(var AData: string; var APos: Integer): Boolean;
   // parse value (calling when found a parameter of a tag)
   function ParseName(var AData: string; var APos: Integer): Boolean;
   // parse key of parameter in a tag
   // these two procs used to parse any text found while parsing XML
   function ParseString(var AData: string; var APos: Integer; RememberBreaks: Boolean = False): string;
   function ParseQuotedString(var AData: string; var APos: Integer; QIndef: Char = '"'): string;
   // returnes the type of next identifier in XML
   function WhatNext(var AData: string; var APos: Integer; var ANext: Integer; RememberBreaks: Boolean = False): TYZXMLMarker;
   // This is a main procedure of our class - AData is a string,
   // containing all XML data (you can use TMemo.Text, for example, as a parameter of AData)
   function ParseXML(var AData: string; var APos: Integer): Boolean;
   // This function returnes a text string, built based on data, stored in an object.
   function GenerateXML(var AData: string; ATab: string = ''): Boolean;
   // returnes char from string at specified pos (#0 if not in range)
   function CharAt(var S: string; APos: Integer): Char;
   function TagNameExists(AName: string): Boolean;
   // Adds a child to children array of a current tag
   function AddChild: TYZXMLTag;
   // Initializes current tag and deletes all existing children
   procedure Clear; virtual;
   constructor Create(AParent: TYZXMLTag); virtual;
   destructor Destroy; virtual;
 end;

type  
 TYZXMLParser = class(TYZXMLTag)
 private
   Header: TYZHash;
   procedure _BuildTreeView(ATreeView: TTreeView; ANode: TTreeNode; ATag: TYZXMLTag);
 public
   property HeaderValues: TYZHash read Header;
   procedure BuildTreeView(ATreeView: TTreeView);
   function Parse(AData: string): Boolean;
   function Generate(var AData: string): Boolean;
   constructor Create;
   destructor Destroy;
 end;

implementation

//==============================================================================
{ TYZXMLTag }
function TYZXMLTag.AddChild: TYZXMLTag;
begin
 setlength(children, Length(children) + 1);
 Result := TYZXMLTag.Create(Self);
 children[Length(children) - 1] := Result;
end;
//------------------------------------------------------------------------------
procedure TYZXMLTag.Clear;
var  
 i: Integer;
begin
 for i := 0 to Count - 1 do if children[i] <> nil then Children[i].Destroy;
 setlength(children, 0);
 FData.Clear;
 Text := '';
end;
//------------------------------------------------------------------------------
constructor TYZXMLTag.Create(AParent: TYZXMLTag);
begin
 inherited Create;
 FData   := TYZHash.Create;
 FParent := AParent;
 Clear;
end;
//------------------------------------------------------------------------------
destructor TYZXMLTag.Destroy;
begin
 Clear;
 FData.Destroy;
end;
//------------------------------------------------------------------------------
function TYZXMLTag.GetCount: Integer;
begin
 Result := Length(children);
end;
//------------------------------------------------------------------------------
function TYZXMLTag.GetValue(AName: string): string;
begin
 Result := FData[AName];
end;
//------------------------------------------------------------------------------
function TYZXMLTag.ParseName(var AData: string; var APos: Integer): Boolean;
begin
 Result := False;
 FName  := ParseString(AData, APos);
 if fname = '' then Exit;
 Result := True;
end;
//------------------------------------------------------------------------------
function TYZXMLTag.ParseQuotedString(var AData: string; var APos: Integer;
 QIndef: Char = '"'): string;
var  
 i: Integer;
 skipnext: Boolean;
 z: Char;
begin
 Result := '';
 if CharAt(AData, APos) <> QIndef then Exit;
 i        := apos;
 skipnext := True;
 repeat
   if not skipnext then
   begin
     if charat(AData, I) = '\' then SkipNext := True  
     else
     begin
       z := charat(AData, I);
       if (Z = QIndef) or (z = #0) then
       begin
         Result := Copy(AData, aPos + 1, I - APos - 1);
         //          result:=exch(result,'\','');
         APos := I + 1;
         Exit;
       end;
     end;
   end  
   else  
     skipnext := False;
   Inc(i);
 until False;
end;
//------------------------------------------------------------------------------
function TYZXMLTag.ParseString(var AData: string; var APos: Integer;
 RememberBreaks: Boolean = False): string;
const  
 extsym: string = '=<>;?*/';
var  
 nxt: Char;
 x1, x2, i: Integer;
begin
 Result := '';
 nxt    := SkipSpaces(AData, APos, RememberBreaks);
 if nxt = #0 then Exit;
 if (nxt = '"') or (nxt = '''') then  
 begin  
   Result := ParseQuotedString(AData, APos);  
   Exit;  
 end;
 x1  := APos;
 i   := x1;
 nxt := CharAt(AData, i);
 while ((Ord(nxt) <= 32) or (Pos(nxt, extsym) > 0)) and (nxt <> #0) do  
 begin  
   Inc(i);  
   nxt := CharAt(AData, i);  
 end;
 APos := i;
 X1 := APos;
 while (Ord(nxt) > 32) and (Pos(nxt, extsym) <= 0) do  
 begin  
   Inc(i);  
   nxt := CharAt(AData, i);  
 end;
 x2 := i - x1;
 Result := Copy(AData, x1, x2);
 APos := i;
end;
//------------------------------------------------------------------------------
function TYZXMLTag.ParseValue(var AData: string; var APos: Integer): Boolean;
var  
 n, v: string;
 i, x: Integer;
begin
 Result := False;
 n := parseString(AData, APos);
 if n = '' then Exit;
 if skipspaces(AData, APos) <> '=' then Exit;
 Inc(apos);
 V := parseString(AData, APos);
 fdata[n] := dequote(v);
 Result := True;
end;
//------------------------------------------------------------------------------
function TYZXMLTag.ParseXML(var AData: string; var APos: Integer): Boolean;
var  
 N: TYZXMLMarker;
 nxt: Integer;
 isLong: Boolean;
 inTag: Boolean;
begin
 isLong := False;
 Result := False;
 Clear;
 if WhatNext(AData, APos, nxt) <> xmlOpenTag then Exit;
 APos := nxt;
 if WhatNext(AData, APos, nxt) <> xmlIdentifier then Exit;
 Result := ParseName(AData, APos);
 if not Result then Exit;
 intag  := True;
 Result := False;
 while True do
 begin
   N := WhatNext(AData, APos, nxt, (not intag and islong and (Count > 0)));
   case N of
     xmlEOF: Exit;
     xmlCloseTagLong:  
       begin  
         Result := True;  
         if islong then APos := nxt;  
         if (Text <> '') and (Count > 0) then  
         begin  
           Text := exch(Text, #13#10#13#10, #13#10);  
         end;
         Exit;  
       end;
     xmlCloseTagShort:  
       begin  
         Result := (not IsLong) and intag;  
         if Result then APos := nxt;  
         Exit;  
       end;
     xmlOpenTag:  
       begin  
         if islong then Result := AddChild.ParseXML(AData, APos)  
         else  
         begin  
           Result := False;  
           Exit;  
         end;  
         if not Result then Exit;  
       end;
     xmlCloseTag:  
       begin  
         IsLong := True;  
         APos   := nxt;  
         intag  := False;  
       end;
     xmlIdentifier:  
       begin  
         if intag then parsevalue(AData, APos)  
         else  
           Text := Text + ParseString(AData, APos, True)  
       end;
     xmlUnknown:  
       begin  
         Result := True;  
         Exit;  
       end;
   end;
 end;
end;
//------------------------------------------------------------------------------
procedure TYZXMLTag.SetName(const Value: string);
begin
 FName := Value;
end;
//------------------------------------------------------------------------------
procedure TYZXMLTag.SetValue(AName: string; const Value: string);
begin
 FData[AName] := Value;
end;
//------------------------------------------------------------------------------
function TYZXMLTag.SkipSpaces(var AData: string; var APos: Integer;
 RememberBreaks: Boolean = False): Char;
var  
 L: Integer;
 P: Char;
begin
 L := Length(AData);
 while APos <= L do
 begin
   P := AData[APos];
   if Ord(p) > 32 then  
   begin  
     Result := p;  
     Exit;  
   end  
   else if rememberbreaks then
   begin
     if Pos(p, #13#9' ') > 0 then
       Text := Text + ' ';
   end;
   Inc(APos);
 end;
 Result := #0;
end;
//------------------------------------------------------------------------------
function TYZXMLTag.CharAt(var S: string; APos: Integer): Char;
begin
 Result := #0;
 if (Length(s) < APos) or (apos < 1) then Exit;
 Result := s[APos];
end;
//------------------------------------------------------------------------------
function TYZXMLTag.WhatNext(var AData: string; var APos: Integer;
 var ANext: Integer; RememberBreaks: Boolean = False): TYZXMLMarker;
var  
 s: string;
 C: Char;
 P: Integer;
begin
 Result := xmlEOF;
 P := APos;
 C := SkipSpaces(AData, APos);
 P := APos;
 ANext  := P;
 if C = #0 then Exit;
 if C = '<' then if CharAt(AData, P + 1) = '/' then
   begin
     Inc(P, 2);
     s := parsestring(AData, P);
     if (uppercase(s) = uppercase(FName)) and (SkipSpaces(AData, P) = '>') then
     begin  
       ANext := P + 1;
       Result := xmlCloseTagLong;  
       Exit;  
     end  
     else
     begin
       if TagNameExists(s) then
       begin
         Result := xmlCloseTagLong;
         ANext := APos;
         Exit;
       end;
       ANext  := P + 1;
       Result := xmlCloseTagLong;
       Exit;
     end;
   end;
 if C = '<' then  
 begin  
   ANext := P + 1;
   Result := xmlOpenTag;  
   Exit;  
 end;
 if C = '>' then  
 begin  
   ANext := P + 1;
   Result := xmlCloseTag;  
   Exit;  
 end;
 if C = '/' then if CharAt(AData, P + 1) = '>' then  
   begin  
     ANext := P + 2;
     Result := xmlCloseTagShort;  
     Exit;
   end;
 ANext := P;
 parsestring(AData, ANext);
 Result := xmlIdentifier;
end;
//------------------------------------------------------------------------------
function TYZXMLTag.GetValueNames: strarr;
begin
 Result := FData.Keys;
end;
//------------------------------------------------------------------------------
function TYZXMLTag.GenerateXML(var AData: string; ATab: string = ''): Boolean;
var  
 valDelimiter: string;
 spc: string;
 i: Integer;
 a: strarr;
begin
 spc := ATab + #9;
 if FData.Count < 5 then valDelimiter := ' '  
 else  
   valDelimiter := #13#10 + spc;
 AData := AData + #13#10 + ATab + '<' + FName;
 a     := FData.keys;
 for i := 0 to Length(a) - 1 do
 begin
   AData := AData + valDelimiter + a[i] + ' = "' + EnQuote(values[a[i]]) + '"';
 end;
 if (Count > 0) or (Text <> '') then
 begin
   AData := AData + '>' + Text;
   for i := 0 to Count - 1 do
   begin
     Children[i].GenerateXML(AData, ATab + #9);
   end;
   AData := AData + #13#10 + ATab + '</' + FName + '>';
 end  
 else  
   AData := AData + '/>';
 Result := True;
end;
//------------------------------------------------------------------------------
function TYZXMLTag.TagNameExists(AName: string): Boolean;
begin
 Result := AnsiUpperCase(AName) = AnsiUpperCase(Self.FName);
 if Self.FParent = nil then Exit;
 if not Result then Result := fparent.TagNameExists(AName);
end;
//==============================================================================
{ TYZXMLParser }
constructor TYZXMLParser.Create;
begin
 Header := TYZHash.Create;
 inherited Create(nil);
end;
//------------------------------------------------------------------------------
destructor TYZXMLParser.Destroy;
begin
 inherited;
 Header.Destroy;
end;
//------------------------------------------------------------------------------
procedure TYZXMLParser.BuildTreeView(ATreeView: TTreeView);
var  
 i: Integer;
begin
 //  clear;
 ATreeView.Items.Clear;
 for i := 0 to Count - 1 do _BuildTreeView(ATreeView, nil, children[i]);
end;
//------------------------------------------------------------------------------
procedure TYZXMLParser._BuildTreeView(ATreeView: TTreeView; ANode: TTreeNode;
 ATag: TYZXMLTag);
var  
 i: Integer;
 N: TTreeNode;
begin
 N := ATreeView.Items.AddChildObject(ANode, ATag.Name + ' ' + FData['ID'], Pointer(ATag));
 for i := 0 to ATag.Count - 1 do
 begin
   if ATag.children[i] <> nil then _BuildTreeView(ATreeView, N, ATag.children[i])  
   else  
     ATreeView.Items.AddChild(N, 'nil');
 end;
 N.Expanded := True;
end;
//------------------------------------------------------------------------------
function TYZXMLParser.Parse(AData: string): Boolean;
var  
 x1, x2, X, i: Integer;
 s: string;
 tmp: TYZXMLTag;
 a: strarr;
 N: TYZXMLMarker;
begin
 X := 1;
 Self.SkipSpaces(AData, X);
 x2 := -1;
 Result := False;
 Clear;
 Header.Clear;
 x1 := Pos('<?', AData);
 if x1 >= X then
 begin
   x2 := Pos('?>', AData);
   if x2 < X then Exit;
   s := uppercase(Copy(AData, x1 + 2, 4));
   if Pos('XML ', s) <> 1 then Exit;
   s   := '<xml ' + Copy(AData, x1 + 6, x2 - x1 - 6) + '/>';
   tmp := TYZXMLTag.Create(nil);
   tmp.ParseXML(s, x);
   a := tmp.ValueNames;
   for i := 0 to Length(a) - 1 do
     Header[a[i]] := tmp.Values[a[i]];
   tmp.Destroy;
   x := x2 + 2;
 end;
 Result := True;
 repeat
   N := whatnext(AData, X, x1);
   case N of
     xmlOpenTag: Result := Result and AddChild.ParseXML(AData, X);
     xmlIdentifier:  
       begin  
         if Text <> '' then Text := Text + ' ';  
         Text := Text + parsestring(AData, X, True);  
       end;
     else  
       Parsestring(AData, X);
   end;
 until skipspaces(adata, x) = #0;
 //  if not result then ShowMessage('Error Parsing: '+inttostr(X));
end;

function TYZXMLParser.Generate(var AData: string): Boolean;
var  
 i: Integer;
 a: strarr;
begin
 Header['Date'] := DateTimeToStr(now);
 a := header.Keys;
 AData := '<?xml';
 for i := 0 to Length(a) - 1 do
   AData := AData + ' ' + a[i] + '="' + Header[a[i]] + '"';
 AData  := AData + '?>'#13#10 + Text;
 Result := True;
 for i := 0 to Length(children) - 1 do
 begin
   Result := Result and children[i].generatexml(AData);
 end;
end;
//==============================================================================
// procedures of THash class
//==============================================================================
{THASH CLASS}
procedure THash.Clear;
begin
 SetLength(Arr, 0);
end;
constructor THash.Create;
begin
 inherited;
 Clear;
end;
//------------------------------------------------------------------------------
destructor THash.Destroy;
begin
 Clear;
 inherited;
end;
//------------------------------------------------------------------------------
function THash.GetCount: Integer;
begin
 Result := Length(Arr);
end;
//------------------------------------------------------------------------------
function THash.Getempty: Boolean;
begin
 Result := Length(Arr) = 0;
end;
function THash.GetKeys: StrArr;
var  
 i: Integer;
begin
 SetLength(Result, Length(arr));
 for i := 0 to Length(Result) - 1 do
   Result[i] := arr[i].Key;
end;
//------------------------------------------------------------------------------
function THash.GetValue(Key: string): string;
var  
 i: Integer;
 r: Boolean;
begin
 Result := '';
 i      := 0;  
 r      := False;
 while (i < Length(Arr)) and (not r) do
 begin
   if AnsiUpperCase(arr[i].key) = AnsiUpperCase(Key) then  
   begin  
     Result := Arr[i].Value;  
     r := True;
   end;
   i := i + 1;
 end;
end;
//------------------------------------------------------------------------------
function THash.GetValues: StrArr;
var  
 i: Integer;
begin
 SetLength(Result, Length(arr));
 for i := 0 to Length(Result) - 1 do
   Result[i] := arr[i].Value;
end;
//------------------------------------------------------------------------------
procedure THash.SetValue(Key: string; const VValue: string);
var  
 i, j: Integer;
 r: Boolean;
 E: THashElementArr;
begin
 if VValue <> '' then
 begin
   i := 0;  
   r := False;
   while (i < Length(Arr)) and not r do
   begin
     if AnsiUpperCase(arr[i].key) = AnsiUpperCase(Key) then  
     begin  
       Arr[i].Value := VValue;  
       r := True;
     end;
     i := i + 1;
   end;
   if not r then  
   begin  
     SetLength(Arr, Length(arr) + 1);
     arr[Length(arr) - 1].Key   := Key;
     arr[Length(arr) - 1].Value := Vvalue;  
   end;
 end;
 SetLength(E, Length(Arr));
 for i := 0 to Length(arr) - 1 do E[i] := Arr[i];
 SetLength(arr, 0);
 for i := 0 to Length(E) - 1 do if (E[i].Key <> '') and (E[i].Value <> '') then
   begin
     j := Length(arr);
     setlength(arr, j + 1);
     arr[j] := E[i];
   end;
end;

end.