首页  编辑  

把一个字符串拆分成单词

Tags: /超级猛料/String.字符串处理/   Date Created:
Author:Peter Below
procedure SplitTextIntoWords(const S: string; words: TstringList);
var
 startpos, endpos: Integer;
begin
 Assert(Assigned(words));
 words.Clear;
 startpos := 1;
 while startpos <= Length(S) do
 begin
   // skip non-letters
   while (startpos <= Length(S)) and not IsCharAlpha(S[startpos]) do
     Inc(startpos);
   if startpos <= Length(S) then
   begin
     // find next non-letter
     endpos := startpos + 1;
     while (endpos <= Length(S)) and IsCharAlpha(S[endpos]) do
       Inc(endpos);
     words.Add(Copy(S, startpos, endpos - startpos));
     startpos := endpos + 1;
   end; { If }
 end; { While }
end; { SplitTextIntoWords }

function StringMatchesMask(S, mask: string;
 case_sensitive: Boolean): Boolean;
var
 sIndex, maskIndex: Integer;
begin
 if not case_sensitive then
 begin
   S    := AnsiUpperCase(S);
   mask := AnsiUpperCase(mask);
 end; { If }
 Result    := True; // blatant optimism
 sIndex    := 1;
 maskIndex := 1;
 while (sIndex <= Length(S)) and (maskIndex <= Length(mask)) do
 begin
   case mask[maskIndex] of
     '?':
       begin
         // matches any character
         Inc(sIndex);
         Inc(maskIndex);
       end; { case '?' }
     '*':
       begin
         // matches 0 or more characters, so need to check for
         // next character in mask
         Inc(maskIndex);
         if maskIndex > Length(mask) then
           // * at end matches rest of string
           Exit
         else if mask[maskindex] in ['*', '?'] then
           raise Exception.Create('Invalid mask');
         // look for mask character in S
         while (sIndex <= Length(S)) and
           (S[sIndex] <> mask[maskIndex]) do
           Inc(sIndex);
         if sIndex > Length(S) then
         begin
           // character not found, no match
           Result := False;
           Exit;
         end;
         { If }
       end; { Case '*' }
     else if S[sIndex] = mask[maskIndex] then
       begin
         Inc(sIndex);
         Inc(maskIndex);
       end { If }
       else
         begin
           // no match
           Result := False;
           Exit;
         end;
   end; { Case }
 end; { While }
 // if we have reached the end of both S and mask we have a complete
 // match, otherwise we only have a partial match
 if (sIndex <= Length(S)) or (maskIndex <= Length(mask)) then
   Result := False;
end; { stringMatchesMask }

procedure FindMatchingWords(const S, mask: string;
 case_sensitive: Boolean; matches: Tstrings);
var
 words: TstringList;
 i: Integer;
begin
 Assert(Assigned(matches));
 words := TstringList.Create;
 try
   SplitTextIntoWords(S, words);
   matches.Clear;
   for i := 0 to words.Count - 1 do
   begin
     if stringMatchesMask(words[i], mask, case_sensitive) then
       matches.Add(words[i]);
   end; { For }
 finally
   words.Free;
 end;
end;

{
The Form has one TMemo for the text to check, one TEdit for the mask,
one TCheckbox (check = case sensitive), one TListbox for the results,
one Tbutton
}
procedure TForm1.Button1Click(Sender: TObject);
begin
 FindMatchingWords(memo1.Text, edit1.Text, checkbox1.Checked, listbox1.Items);
end;