首页  编辑  

字符串压缩算法

Tags: /超级猛料/String.字符串处理/   Date Created:
标题: 字符串压缩算法
Contributor: SWAG SUPPORT TEAM
{ 
  You won't get that sort of compression from my routines, but here
  they are anyway. When testing, you'll get best compression if you
  use English and longish Strings.
}
Unit Compress;

Interface

Const
    CompressedStringArraySize = 500; { err on the side of generosity }

Type
    tCompressedStringArray = Array [1 .. CompressedStringArraySize] of Byte;
Function GetCompressedString(Arr: tCompressedStringArray): String;
Procedure CompressString(st: String; Var Arr: tCompressedStringArray; Var len: Integer);

{ converts st into a tCompressedStringArray of length len }
Implementation

Const
    FreqChar: Array [4 .. 14] of Char = 'etaonirshdl';

    { can't be in [0..3] because two empty bits signify a space }
Function GetCompressedString(Arr: tCompressedStringArray): String;
Var
    Shift: Byte;
    I: Integer;
    ch: Char;
    st: String;
    b: Byte;
    Function GetHalfNibble: Byte;
    begin
        GetHalfNibble := (Arr[I] shr Shift) and 3;
        if Shift = 0 then
        begin
            Shift := 6;
            inc(I);
        end
        else
            dec(Shift, 2);
    end;

begin
    st := '';
    I := 1;
    Shift := 6;
    Repeat
        b := GetHalfNibble;
        if b = 0 then
            ch := ' '
        else
        begin
            b := (b shl 2) or GetHalfNibble;
            if b = $F then
            begin
                b := GetHalfNibble shl 6;
                b := b or GetHalfNibble shl 4;
                b := b or GetHalfNibble shl 2;
                b := b or GetHalfNibble;
                ch := Char(b);
            end
            else
                ch := FreqChar[b];
        end;
        if ch <> #0 then
            st := st + ch;
    Until ch = #0;
    GetCompressedString := st;
end;

Procedure CompressString(st: String; Var Arr: tCompressedStringArray; Var len: Integer);
{ converts st into a tCompressedStringArray of length len }
Var
    I: Integer;
    Shift: Byte;
    Procedure OutHalfNibble(b: Byte);
    begin
        Arr[len] := Arr[len] or (b shl Shift);
        if Shift = 0 then
        begin
            Shift := 6;
            inc(len);
        end
        else
            dec(Shift, 2);
    end;
    Procedure OutChar(ch: Char);
    Var
        I: Byte;
        bych: Byte Absolute ch;
    begin
        if ch = ' ' then
            OutHalfNibble(0)
        else
        begin
            I := 4;
            While (I < 15) and (FreqChar[I] <> ch) do
                inc(I);
            OutHalfNibble(I shr 2);
            OutHalfNibble(I and 3);
            if I = $F then
            begin
                OutHalfNibble(bych shr 6);
                OutHalfNibble((bych shr 4) and 3);
                OutHalfNibble((bych shr 2) and 3);
                OutHalfNibble(bych and 3);
            end;
        end;
    end;

begin
    len := 1;
    Shift := 6;
    fillChar(Arr, sizeof(Arr), 0);
    For I := 1 to length(st) do
        OutChar(st[I]);
    OutChar(#0); { end of compressed String signaled by #0 }
    if Shift = 6 then
        dec(len);
end;

end.
测试压缩字符串 
Contributor: SWAG SUPPORT TEAM

Program TestComp; { tests Compression }

{ kludgy test of Compress Unit }
Uses Crt, Dos, Compress;

Const
    NumofStrings = 5;

Var
    ch: Char;
    LongestStringLength, I, j, len: Integer;
    Textfname, Compfname: String;
    TextFile: Text;
    ByteFile: File;
    CompArr: tCompressedStringArray;
    st: Array [1 .. NumofStrings] of String;
    Rec: SearchRec;
    BigArr: Array [1 .. 5000] of Byte;
    Arr: Array [1 .. NumofStrings] of tCompressedStringArray;

begin
    Writeln('note: No I/O checking in this test.');
    Write('Test ompress or nCompress? ');
    Repeat
        ch := upCase(ReadKey);
    Until ch in ['C', 'U', #27];
    if ch = #27 then
        halt;
    Writeln(ch);
    if ch = 'C' then
    begin
        Writeln('Enter ', NumofStrings, ' Strings:');
        LongestStringLength := 0;
        For I := 1 to NumofStrings do
        begin
            Write(I, ': ');
            readln(st[I]);
            if length(st[I]) > LongestStringLength then
                LongestStringLength := length(st[I]);
        end;
        Writeln;
        Writeln('Enter name of File to store unCompressed Strings in.');
        Writeln('ANY EXISTinG File With THIS NAME WILL BE OVERWRITTEN.');
        readln(Textfname);
        assign(TextFile, Textfname);
        reWrite(TextFile);
        For I := 1 to NumofStrings do
            Writeln(TextFile, st[I]);
        close(TextFile);
        Writeln;
        Writeln('Enter name of File to store Compressed Strings in.');
        Writeln('ANY EXISTinG File With THIS NAME WILL BE OVERWRITTEN.');
        readln(Compfname);
        assign(ByteFile, Compfname);
        reWrite(ByteFile, 1);
        For I := 1 to NumofStrings do
        begin
            CompressString(st[I], CompArr, len);
            blockWrite(ByteFile, CompArr, len);
        end;
        close(ByteFile);
        FindFirst(Textfname, AnyFile, Rec);
        Writeln;
        Writeln;
        Writeln('Size of Text File storing Strings: ', Rec.Size);
        Writeln;
        Writeln('Using Typed Files, a File of Type String[', LongestStringLength, '] would be necessary.');
        Writeln('That would be ', (LongestStringLength + 1) * NumofStrings, ' long, including length Bytes.');
        Writeln;
        FindFirst(Compfname, AnyFile, Rec);
        Writeln('Size of the Compressed File: ', Rec.Size);
        Writeln;
        Writeln('Now erase the Text File, and run this Program again, choosing');
        Writeln('nCompress to show that the Compression retains all info.');
    end
    else
    begin { ch = 'U' }
        Write('Name of Compressed File: ');
        readln(Compfname);
        assign(ByteFile, Compfname);
        reset(ByteFile, 1);
        blockread(ByteFile, BigArr, Filesize(ByteFile));
        close(ByteFile);
        For j := 1 to NumofStrings do
        begin
            I := 1;
            While BigArr[I] <> 0 do
                inc(I);
            move(BigArr[1], Arr[j], I);
            move(BigArr[I + 1], BigArr[1], sizeof(BigArr));
        end;
        For I := 1 to NumofStrings do
            st[I] := GetCompressedString(Arr[I]);
        For I := 1 to NumofStrings do
            Writeln(st[I]);
    end;

end.