Ok, I've updated the TBlockSearch class.
One problem I mentioned was with large search terms, the reason for this was due to the way I was filling the Fifo buffer up, this was done this way so that comparemem would work.
Now with some small modifications large search terms are not a problem, and it's also twice as fast as my previous one. eg. 20Gig search takes about 5 mins.
Basically instead of shifting the fifo buffer using the move instructions, I've just created a simple ring buffer instead, comparemem of course won't understand the ringbuffer so I also just do the comparison manually inside the checkPos sub procedure.
Also there was a slight bug in the fact that integer or even largeint is not big enough for 20Gig files, so I've updated to use int64.
I'm sure there are ways to make this faster, eg. even Lazarus has the Search in files, that I assume will be doing even cleverer things for doing it's grep.
unit block_search;
{$mode objfpc}{$H+}
interface
uses
Classes, SysUtils, fgl;
type
{ TBlockSearch }
TBlockSearchResults = specialize TFPGList<int64>;
TBlockSearch = class
private
src:TStream;
fresults:TBlockSearchResults;
block:array of byte;
public
procedure SearchFor(a:array of byte);
constructor Create(_Src:TStream; blocksize:integer = 1024*1024);
destructor Destroy; override;
property Results:TBlockSearchResults read fResults;
end;
implementation
{ TBlockSearch }
procedure TBlockSearch.SearchFor(a: array of byte);
var
readsize:integer;
fPos:Int64;
fifoBuff:array of byte;
fifoSt,fifoEn,searchLen,lpbyte:integer;
//
procedure CheckPos;
var
l,p:integer;
begin
p := fifoST;
for l := 0 to pred(SearchLen) do
begin
if a[l] <> fifoBuff[p] then exit;
//p := (p+1) mod SearchLen, the if seems quicker
inc(p); if p >= SearchLen then p := 0;
end;
fresults.Add(fpos-searchLen);
end;
//
begin
fresults.clear;
src.Position:=0;
readsize := src.Read(block[0],Length(block));
searchLen := length(a);
if searchLen > length(block) then
raise Exception.Create('Search term larger than blocksize');
if readsize < searchLen then exit;
setlength(fifoBuff,searchLen);
move(block[0],fifoBuff[0],searchLen);
fPos:=0;
fifoSt:=0;
fifoEn:=SearchLen-1;
CheckPos;
while readsize > 0 do
begin
for lpByte := 0 to pred(readsize) do
begin
inc(fifoSt); if fifoSt>=SearchLen then fifoST := 0;
inc(fifoEn); if fifoEn>=SearchLen then fifoEn := 0;
fifoBuff[fifoEn] := block[lpByte];
inc(fPos);
CheckPos;
end;
readsize := src.Read(block[0],Length(block));
end;
end;
constructor TBlockSearch.Create(_Src: TStream; blocksize: integer);
begin
inherited Create;
setlength(block,blocksize);
src := _src;
fresults := TBlockSearchResults.Create;
end;
destructor TBlockSearch.Destroy;
begin
freeAndNil(fresults);
inherited Destroy;
end;
end.