首页  编辑  

转换CVS为XML文件

Tags: /超级猛料/Stream.File.流、文件和目录/文件操作/   Date Created:

> I am trying to write an application that converts a CSV(or similar)it to

> an XML one.The application looks for a character(the comma - - or anything

 > else specified in an Edit box - -), adds a starting and ending tag to the

> line, and writes the line to the new XML file. in the end I should get an

> XML file with the various elements.

{Your task has a number of subtasks.

The first is parsing the input file into lines. You can leave that to a

Tstringlist, if the files you need to handle are not in the

multimegabyte size range. If they are you would be best served by using

the good old Pascal Textfile routines, where a simple ReadLn( filevar, S

) gets you a line.

The second is parsing a line into its elements, based on a separator

character between the elements. This is also not so difficult to do,

especially if you don't need to deal with quoted elements that may

contain the separator. Search the newsgroup archives for "SplitString"

for an example. Tstringlist.Delimitedtext may be of use here, but be

warned that it considers any character <= #32 as a separator *in

addition* to what you define as Delimiter. It can deal with quoted

elements, though.

The second subtask would end with a TStringlist instance containing the

elements to store into the XML file for one line of the input file. This

is the input for the third task: to create a first-level XML element

containing the data. To write valid XML you need not only deal with

proper nesting of XML tags, you also have to properly represent some

characters that have special meaning in XML ('<' and '&' for instance).

I can recommend Berend de Boers xml_generator class

http://www.pobox.com/~berend/delphi for this task, it deals with all the

nastiness behind the scenes and produces syntactically correct XML

without the overhead of a DOM model implementation.

There is something else you need: a list of column names, one name for

each "column" in your XML file. These names will become the node names

for the subnodes of the produced XML. Depending on your input files you

may be able to get these names from the first line (which often is a

header line giving the column names).

Here is sketch (untested!) of the conversion routine: }

type

 {: Callback for CSVToXML. If given the callback will be called

   after each processed line.

   @Param currentline is the 0-based number of the processed line

   @Param totallines is the total number of lines. This may be a

     raw estimate if the file is not completly loaded in memory.

   @Returns true to continue processing, false to stop it. }

 TProgressNotification =

   function(currentline, totallines: Integer): Boolean of object;

{-- CSVToXML ----------------------------------------------------------}

{: Convert a delimiter-separated file of data to XML

@Param csvfilename is the file to convert

@Param xmlfilename is the xml file to create

@Param aSeparator is the separator for the data

@Param aRootNodeName is the name to use for the root node of the XML

 file.

@Param columnnames is an optional list of column names to use as subnode

 names. If this parameter is nil the first line of the data file must

 contain a header line with the names to use.

@Param onProgress is an optional callback to call afte each processed

 line.

@Precondition  csvfilename exists

}{ Created 17.3.2003 by P. Below

-----------------------------------------------------------------------}

procedure CSVToXML(const csvfilename, xmlfilename: string;

 const aSeparator: Char;

 const aRootNodeName: string;

 const columnnames: TStrings = nil;

 const onProgress: TProgressNotification = nil);

 function DoProgress(currentline, totallines: Integer): Boolean;

 begin

   if Assigned(onProgress) then

     Result := onProgress(currentline, totallines)

   else

     Result := true;

 end;

 procedure WriteDataline(const line: string; header: TStringlist; xml: TXMLGenerator);

 var

   elements: TStringlist;

   i, max: Integer;

 begin

   elements := TStringlist.Create;

   try

     elements.Delimiter := aSeparator;

     elements.Delimitedtext := line;

     if elements.count > header.count then

       max := header.count

     else

       max := elements.count;

     for i := 0 to max - 1 do begin

       xml.StartTag(header[i]);

       xml.AddData(elements[i]);

       xml.StopTag;

     end; { For }

   finally

     elements.Free;

   end;

 end;

 procedure WriteData(data: TStringlist; xml: TXMLGenerator);

 var

   header: TStringlist;

   firstline: Integer;

   i: Integer;

 begin

   header := Tstringlist.Create;

   try

     firstline := 0;

     if assigned(columnnames) then

       header.Assign(columnnames)

     else begin

       header.Delimiter := aSeparator;

       header.DelimitedText := data[0];

       firstline := 1;

     end; { Else }

     for i := firstline to data.count - 1 do begin

       WriteDataline(data[i], header, xml);

       if not DoProgress(i, data.count) then

         Break;

     end; { For }

   finally

     header.Free;

   end;

 end;

 procedure SaveStringToFile(const S, filename: string);

 var

   fs: TFilestream;

 begin

   fs := TFileStream.Create(filename, fmCreate);

   try

     if Length(S) > 0 then

       fs.WriteBuffer(S[1], Length(S));

   finally

     fs.free

   end;

 end; { SaveStringToFile }

var

 xml: TXMLGenerator; // from xml_generator unit by Berend de Boers

 datafile: Tstringlist;

begin { CSVToXML }

 if not FileExists(csvfilename) then

   raise Exception.CreateFmt('Input file %s not found', [csvfilename]);

 datafile := Tstringlist.Create;

 try

   datafile.LoadfromFile(csvfilename);

   xml := TXMLGenerator.CreateWithEncoding(16 * 1024, encISO_8859_1);

   try

     xml.StartTag(aRootNodeName);

     if datafile.count > 0 then

       WriteData(datafile, xml);

     xml.StopTag;

     SaveStringToFile(xml.AsLatin1, xmlfilename);

   finally

     xml.Free;

   end;

 finally

   datafile.free;

 end;

end; { CSVToXML }