首页  编辑  

RTF中如何提取图片数据

Tags: /超级猛料/Format.格式,单位/   Date Created:

如何提取RTF中的图片

来自:lanyaoshi, 时间:2006-6-1 21:37:00, ID:3459681

已知道RTF格式中的图片文件是以metafile格式存在的,以{\pict\wmetafile8\picw423\pich688\picwgoal240\pichgoal390开始,然后是metafile的HEX字符串,但不知道如何将这一大段HEX字符串保存为BMP文件或metafile格式文件呢?

例如下面是一段包含图片的RTF文件的内容:

{\rtf1\ansi\ansicpg936\deff0{\fonttbl{\f0\fnil\fcharset134 \'cb\'ce\'cc\'e5;}}

\viewkind4\uc1\pard\lang2052\f0\fs18{\pict\wmetafile8\picw423\pich688\picwgoal240\pichgoal390

010009000003160300000000f102000000000400000003010800050000000b0200000000050000

000c021b001100030000001e000400000007010400f1020000410b2000cc001a00100000000000

1a0010000000000028000000100000001a00000001000800000000000000000000000000000000

00000000000000000000000000ffffff00fbfcfd00ced2ec00a4aee80095a3f100c7cbe300eff0

f900e4e5eb006472d1005c68c7009aa0cf00949acd004c59c300273fde00465bd800bdc1df00e1

e2e700848bc200fdfdfe007f86c300122be000051bd700a5a9cb00f4f4f5007a8ae2003b46ac00

0f1fac00dbdce400354be0006b72ab00b9bedb000c25dd000112b900868aaf008898e100182cca

00777ca800ebedf3004f569700f8f9fb005d71de000c21c800636aa5005065db00091dbf002c35

9000fbfbfc00424a94007282de00424a8d0008128d000210a800a5acda00c8cad8004954bb001a

27a400dee1ea00303ba900daddee003f4fc300f7f7f900202da900d1d3e400d6d9e90000000000

000000000000000000000000000000000000000000000000000000000000000000000000000000

000000000000000000000000000000000000000000000000000000000000000000000000000000

000000000000000000000000000000000000000000000000000000000000000000000000000000

000000000000000000000000000000000000000000000000000000000000000000000000000000

000000000000000000000000000000000000000000000000000000000000000000000000000000

000000000000000000000000000000000000000000000000000000000000000000000000000000

000000000000000000000000000000000000000000000000000000000000000000000000000000

000000000000000000000000000000000000000000000000000000000000000000000000000000

000000000000000000000000000000000000000000000000000000000000000000000000000000

000000000000000000000000000000000000000000000000000000000000000000000000000000

000000000000000000000000000000000000000000000000000000000000000000000000000000

000000000000000000000000000000000000000000000000000000000000000000000000000000

000000000000000000000000000000000000000000000000000000000000000000000000000000

000000000000000000000000000000000000000000000000000000000000000000000000000000

000000000000000000000000000000000000000000000000000000000000000000000000000000

000000000000000000000000000000000000000000000000000000000000000000000000000000

000000000000000000000000000000000000000000000000000000000000000000000000000000

000000000000000000000000000000000000000000000000000000000000000000000000000000

000000000000000000000000000000000000000000000000000000000000000000000000000000

000000000000000000000000000000000000000101010101010101010101010101010101010101

010101010101010101010101010101010101010101010101010101010101010101010101010101

010101010101010101010101010101010101010101010101010101010101010101010101010101

010101013d3f10402801010101010101010126222e3e0a090d1713010101010101081a343a3b01

133b3c2b3d010101012f3721381101010101391b25130101013516212201010101011329333601

0113091633080101010101011934250101282c2a3001010101010101312d32130128292a2b0101

01010101022c2d2e2f0113232425010101010101260e2127010101101d1e0101010101011f2021

2201010118191a1301010101130d161b1c01010101110f12130101011415161701010101010108

090a0b0c0d0e0f1001010101010101010203040504060701010101010101010101010101010101

010101010101010101010101010101010101010101010101010101010101010101010101010101

010101010101010101010101010101010101010101010101010101010101010101010101010101

010101010101040000002701ffff030000000000

}

\par }

解答:

Copyright (C) Kingron, 2006

实际上很简单,RTF中对所有的二进制数据均采用HEX化的处理,所以只要把HEX转成二进制即可。也就是把那些HEX字符串,每两个作为一个整体,然后转换成二进制对应的字符,并存储下来,就是原始的数据!

例如把上面的十六进制转换代码如下

const

 WMFKey = Integer($9AC6CDD7);

 WMFWord = $CDD7;

 HundredthMMPerInch = 2540;

type

 TMetafileHeader = packed record

   Key: Longint;

   Handle: SmallInt;

   Box: TSmallRect;

   Inch: Word;

   Reserved: Longint;

   CheckSum: Word;

 end;

function ComputeAldusChecksum(var WMF: TMetafileHeader): Word;

{

 本函数用于计算Checksum校验值

}

type

 PWord = ^Word;

var

 pW: PWord;

 pEnd: PWord;

begin

 result := 0;

 pW := @WMF;

 pEnd := @WMF.CheckSum;

 while Longint(pW) < Longint(pEnd) do

 begin

   result := result xor pW^;

   Inc(Longint(pW), SizeOf(Word));

 end;

end;

procedure RTFPictToWMF(const WMFFilename, AnsiStorString: string; Width, Height: Integer);

{

 本函数用于把RTF中WMF文件ANSISTORPIC存储格式的字符串转换成对应的WMF文件

 FileName表示输出的WMF文件名

 Width,Height表示WMF文件的宽度和高度,即RTF中的{picwN和{pichN的N的值

 Data: ANSISTORPIC格式的数据,可以包含回车,程序自动处理

}

var

 PureText : string;

 buf : PChar;

 Len : DWORD;

 wmf : TMetafileHeader;

 fs : TFileStream;

begin

 /// 去掉回车换行之类的数据

 PureText := StringReplace(AnsiStorString, #13#10, '', [rfReplaceAll]);

 /// 计算转换后的长度,并申请缓冲区内存

 Len := Length(PureText) shr 1;

 /// WMF头信息

 FillChar(wmf, SizeOf(wmf), 0);

 with WMF do

 begin

   Key := WMFKEY;

   Inch := 96;          { WMF defaults to 96 units per inch }

   with Box do

   begin

     Right := MulDiv(Width, WMF.Inch, HundredthMMPerInch);

     Bottom := MulDiv(Height, WMF.Inch, HundredthMMPerInch);

   end;

   CheckSum := ComputeAldusChecksum(WMF);

 end;

 GetMem(buf,  Len);

 fs := TFileStream.Create(WMFFilename, fmCreate);

 try

   /// 转换ANSISTORPIC格式为BIN格式

   HexToBin(PChar(PureText), Buf, Len);

   fs.Write(wmf, SizeOf(wmf));

   fs.Write(buf^, Len);

 finally

   FreeMem(buf);

   fs.Free;

 end;

end;

function WMFToRTFPict(const WMFFilename: string): string;

var

 fs: TMemoryStream;

 wh : TMetafileHeader;

 p : PChar;

 buf : string;

begin

 fs := TMemoryStream.Create;

 try

   fs.LoadFromFile(WMFFilename);

   if fs.Read(wh, SizeOf(wh)) <> SizeOf(wh) then

     raise Exception.Create('Error read file');

   if wh.Key <> WMFKey then

     raise Exception.Create('Invalid WMF file');

   p := Pointer(Integer(fs.Memory) + SizeOf(wh));

   SetLength(buf, (fs.Size - SizeOf(wh)) shl 1);

   BinToHex(p, PChar(buf), fs.Size - SizeOf(wh));

   with wh.Box do

     Result := Format('{\pict\wmetafile8\picwgoal%d\pichgoal%d %s }',

                      [MulDiv((Right - Left), HundredthMMPerInch, wh.Inch),

                       MulDiv((Bottom - Top), HundredthMMPerInch, wh.Inch),

                       buf]);

 finally

   fs.Free;

 end;

end;

procedure RTFPictToJPG(const JPGFilename, RTFAnsiStorString: string);

var

 fs : TFileStream;

 PureText: string;

 buf : PChar;

 Len : DWORD;

begin

 fs := TFileStream.Create(JPGFilename, fmCreate);

 /// 去掉回车换行之类的数据

 PureText := StringReplace(RTFAnsiStorString, #13#10, '', [rfReplaceAll]);

 /// 计算转换后的长度,并申请缓冲区内存

 Len := Length(PureText) shr 1;

 GetMem(Buf, Len);

 HexToBin(PChar(PureText), buf, Len);

 try

   fs.Write(buf^, Len);

 finally

   FreeMem(buf);

   fs.Free;

 end;

end;

function GetJPEGSize(FileName: string; var Width, Height: WORD): Boolean;

{

 本函数返回JPG的图片大小信息

}

const

 JPEG_FLAG_BEGIN = $D8FF;

 JPEG_FLAG_END = $D9FF;

 JPEG_FRAME = $C0FF;

var

 FS: TFileStream;

 Flag1, Flag2: WORD;

 B: Byte;

 procedure SeekForFrame;

 type

   TSegHeader = packed record

     Flag: WORD;

     LenHi, LenLo: Byte;

   end;

 var

   Seg: TSegHeader;

 begin

   with Seg, FS do

     repeat

       Read(Seg, SizeOf(Seg));

       if Flag <> JPEG_FRAME then

         Position := Position + MakeWord(LenLo, LenHi) - 2;

     until (Position >= Fs.Size) or (Seg.Flag = JPEG_FRAME);

 end;

begin

 FS := TFileStream.Create(FileName, fmOpenRead);

 try

   { JPEG 文件开头必须为 FF D8,文件尾必须为 FF D9 }

   FS.Read(Flag1, SizeOf(Flag1));

   FS.Position := FS.Size - 2;

   FS.Read(Flag2, SizeOf(Flag2));

   Result := (Flag1 = JPEG_FLAG_BEGIN) and (Flag2 = JPEG_FLAG_END);

   if not Result then exit; { 不是合法的JPEG文件则退出 }

   FS.Position := 2;

   SeekForFrame; { 寻找JPEG的Frame段,即图像数据区 }

   FS.Read(B, SizeOf(B)); { Frame段段头后第一个Byte为数据精度 }

   FS.Read(B, SizeOf(B)); { 高度高字节 }

   WordRec(Height).Hi := B;

   FS.Read(B, SizeOf(B)); { 高度低字节 }

   WordRec(Height).Lo := B;

   FS.Read(B, SizeOf(B)); { 宽度高字节 }

   WordRec(Width).Hi := B;

   FS.Read(B, SizeOf(B)); { 宽度低字节 }

   WordRec(Width).Lo := B;

 finally

   FS.Free;

 end;

end;

function JPGToRTFPict(const JPGFilename: string): string;

{

 本函数用于把一个JPG文件转换成RTF字符串

}

var

 ms : TMemoryStream;

 w, h: Word;

 p : PChar;

begin

 ms := TMemoryStream.Create;

 try

   GetJPEGSize(JPGFilename, w, h);

   w := MulDiv(w, HundredthMMPerInch, 96);

   h := MulDiv(h, HundredthMMPerInch, 96);

   ms.LoadFromFile(JPGFilename);

   SetLength(Result, ms.Size shl 1);

   p := ms.Memory;

   BinToHex(p, PChar(Result), ms.Size);

   Result := '{{\*\shppict{\pict\picscalex100\picscaley100\' + Format('picw%d\pich%d\jpegblip ', [w, h])

             + Result + '}}';

 finally

   ms.Free;

 end;

end;

procedure TForm1.Button1Click(Sender: TObject);

begin

 //RTFPictToWMF('C:\o.wmf', Memo1.Text, 423, 688);

 Memo1.Text := WMFToRTFPict('C:\o.wmf');

end;

参考资料:

 《RTF文件格式》:

     http://msdn.microsoft.com/library/default.asp?url=/library/en-us/dnrtfspec/html/rtfspec.asp

     http://www.biblioscape.com/rtf15_spec.htm#Heading49

 《WMF文件格式》: http://www.fltvu.com/jiaocheng/chenxu1/FORMAT/windows/wmf.htm

 Delphi源代码

 

其中Memo1的Text就是那些十六进制的字符,注意不要包括RTF的那些格式数据。上面转换得到的图片如下: ,和RTF中是一样的!

img_16605.bmp (1.7KB)