QuietUnrar/libunrar/unpack50mt.cpp

656 lines
18 KiB
C++

#define UNP_READ_SIZE_MT 0x400000
#define UNP_BLOCKS_PER_THREAD 2
struct UnpackThreadDataList
{
UnpackThreadData *D;
uint BlockCount;
};
THREAD_PROC(UnpackDecodeThread)
{
UnpackThreadDataList *DL=(UnpackThreadDataList *)Data;
for (uint I=0;I<DL->BlockCount;I++)
DL->D->UnpackPtr->UnpackDecode(DL->D[I]);
}
void Unpack::InitMT()
{
if (ReadBufMT==NULL)
{
// Even getbits32 can read up to 3 additional bytes after current
// and our block header and table reading code can look much further.
// Let's allocate the additional space here, so we do not need to check
// bounds for every bit field access.
const size_t Overflow=1024;
ReadBufMT=new byte[UNP_READ_SIZE_MT+Overflow];
memset(ReadBufMT,0,UNP_READ_SIZE_MT+Overflow);
}
if (UnpThreadData==NULL)
{
uint MaxItems=MaxUserThreads*UNP_BLOCKS_PER_THREAD;
UnpThreadData=new UnpackThreadData[MaxItems];
memset(UnpThreadData,0,sizeof(UnpackThreadData)*MaxItems);
for (uint I=0;I<MaxItems;I++)
{
UnpackThreadData *CurData=UnpThreadData+I;
if (CurData->Decoded==NULL)
{
// Typical number of items in RAR blocks does not exceed 0x4000.
CurData->DecodedAllocated=0x4100;
// It will be freed in the object destructor, not in this file.
CurData->Decoded=(UnpackDecodedItem *)malloc(CurData->DecodedAllocated*sizeof(UnpackDecodedItem));
if (CurData->Decoded==NULL)
ErrHandler.MemoryError();
}
}
}
}
void Unpack::Unpack5MT(bool Solid)
{
InitMT();
UnpInitData(Solid);
for (uint I=0;I<MaxUserThreads*UNP_BLOCKS_PER_THREAD;I++)
{
UnpackThreadData *CurData=UnpThreadData+I;
CurData->LargeBlock=false;
CurData->Incomplete=false;
}
UnpThreadData[0].BlockHeader=BlockHeader;
UnpThreadData[0].BlockTables=BlockTables;
uint LastBlockNum=0;
int DataSize=0;
int BlockStart=0;
// 'true' if we found a block too large for multithreaded extraction,
// so we switched to single threaded mode until the end of file.
// Large blocks could cause too high memory use in multithreaded mode.
bool LargeBlock=false;
bool Done=false;
while (!Done)
{
// Data amount, which is guaranteed to fit block header and tables,
// so we can safely read them without additional checks.
const int TooSmallToProcess=1024;
int ReadSize=UnpIO->UnpRead(ReadBufMT+DataSize,(UNP_READ_SIZE_MT-DataSize)&~0xf);
if (ReadSize<0)
break;
DataSize+=ReadSize;
if (DataSize==0)
break;
// First read chunk can be small if we are near the end of volume
// and we want it to fit block header and tables.
if (ReadSize>0 && DataSize<TooSmallToProcess)
continue;
while (BlockStart<DataSize && !Done)
{
uint BlockNumber=0,BlockNumberMT=0;
while (BlockNumber<MaxUserThreads*UNP_BLOCKS_PER_THREAD)
{
UnpackThreadData *CurData=UnpThreadData+BlockNumber;
LastBlockNum=BlockNumber;
CurData->UnpackPtr=this;
// 'Incomplete' thread is present. This is a thread processing block
// in the end of buffer, split between two read operations.
if (CurData->Incomplete)
CurData->DataSize=DataSize;
else
{
CurData->Inp.SetExternalBuffer(ReadBufMT+BlockStart);
CurData->Inp.InitBitInput();
CurData->DataSize=DataSize-BlockStart;
if (CurData->DataSize==0)
break;
CurData->DamagedData=false;
CurData->HeaderRead=false;
CurData->TableRead=false;
}
// We should not use 'last block in file' block flag here unless
// we'll check the block size, because even if block is last in file,
// it can exceed the current buffer and require more reading.
CurData->NoDataLeft=(ReadSize==0);
CurData->Incomplete=false;
CurData->ThreadNumber=BlockNumber;
if (!CurData->HeaderRead)
{
CurData->HeaderRead=true;
if (!ReadBlockHeader(CurData->Inp,CurData->BlockHeader) ||
!CurData->BlockHeader.TablePresent && !TablesRead5)
{
Done=true;
break;
}
TablesRead5=true;
}
// To prevent too high memory use we switch to single threaded mode
// if block exceeds this size. Typically RAR blocks do not exceed
// 64 KB, so this protection should not affect most of valid archives.
const int LargeBlockSize=0x20000;
if (LargeBlock || CurData->BlockHeader.BlockSize>LargeBlockSize)
LargeBlock=CurData->LargeBlock=true;
else
BlockNumberMT++; // Number of normal blocks processed in MT mode.
BlockStart+=CurData->BlockHeader.HeaderSize+CurData->BlockHeader.BlockSize;
BlockNumber++;
int DataLeft=DataSize-BlockStart;
if (DataLeft>=0 && CurData->BlockHeader.LastBlockInFile)
break;
// For second and following threads we move smaller blocks to buffer
// start to ensure that we have enough data to fit block header
// and tables.
if (DataLeft<TooSmallToProcess)
break;
}
//#undef USE_THREADS
UnpackThreadDataList UTDArray[MaxPoolThreads];
uint UTDArrayPos=0;
uint MaxBlockPerThread=BlockNumberMT/MaxUserThreads;
if (BlockNumberMT%MaxUserThreads!=0)
MaxBlockPerThread++;
// Decode all normal blocks until the first 'large' if any.
for (uint CurBlock=0;CurBlock<BlockNumberMT;CurBlock+=MaxBlockPerThread)
{
UnpackThreadDataList *UTD=UTDArray+UTDArrayPos++;
UTD->D=UnpThreadData+CurBlock;
UTD->BlockCount=Min(MaxBlockPerThread,BlockNumberMT-CurBlock);
#ifdef USE_THREADS
if (BlockNumber==1)
UnpackDecode(*UTD->D);
else
UnpThreadPool->AddTask(UnpackDecodeThread,(void*)UTD);
#else
for (uint I=0;I<UTD->BlockCount;I++)
UnpackDecode(UTD->D[I]);
#endif
}
if (BlockNumber==0)
break;
#ifdef USE_THREADS
UnpThreadPool->WaitDone();
#endif
bool IncompleteThread=false;
for (uint Block=0;Block<BlockNumber;Block++)
{
UnpackThreadData *CurData=UnpThreadData+Block;
if (!CurData->LargeBlock && !ProcessDecoded(*CurData) ||
CurData->LargeBlock && !UnpackLargeBlock(*CurData) ||
CurData->DamagedData)
{
Done=true;
break;
}
if (CurData->Incomplete)
{
int BufPos=int(CurData->Inp.InBuf+CurData->Inp.InAddr-ReadBufMT);
if (DataSize<=BufPos) // Thread exceeded input buffer boundary.
{
Done=true;
break;
}
IncompleteThread=true;
memmove(ReadBufMT,ReadBufMT+BufPos,DataSize-BufPos);
CurData->BlockHeader.BlockSize-=CurData->Inp.InAddr-CurData->BlockHeader.BlockStart;
CurData->BlockHeader.HeaderSize=0;
CurData->BlockHeader.BlockStart=0;
CurData->Inp.InBuf=ReadBufMT;
CurData->Inp.InAddr=0;
if (Block!=0)
{
// Move the incomplete thread entry to the first position,
// so we'll start processing from it. Preserve the original
// buffer for decoded data.
UnpackDecodedItem *Decoded=UnpThreadData[0].Decoded;
uint DecodedAllocated=UnpThreadData[0].DecodedAllocated;
UnpThreadData[0]=*CurData;
UnpThreadData[0].Decoded=Decoded;
UnpThreadData[0].DecodedAllocated=DecodedAllocated;
CurData->Incomplete=false;
}
BlockStart=0;
DataSize-=BufPos;
break;
}
else
if (CurData->BlockHeader.LastBlockInFile)
{
Done=true;
break;
}
}
if (IncompleteThread || Done)
break; // Current buffer is done, read more data or quit.
else
{
int DataLeft=DataSize-BlockStart;
if (DataLeft<TooSmallToProcess)
{
if (DataLeft<0) // Invalid data, must not happen in valid archive.
{
Done=true;
break;
}
// If we do not have incomplete thread and have some data
// in the end of buffer, too small for single thread,
// let's move it to beginning of next buffer.
if (DataLeft>0)
memmove(ReadBufMT,ReadBufMT+BlockStart,DataLeft);
DataSize=DataLeft;
BlockStart=0;
break; // Current buffer is done, try to read more data.
}
}
}
}
UnpPtr&=MaxWinMask; // ProcessDecoded and maybe others can leave UnpPtr > MaxWinMask here.
UnpWriteBuf();
BlockHeader=UnpThreadData[LastBlockNum].BlockHeader;
BlockTables=UnpThreadData[LastBlockNum].BlockTables;
}
// Decode Huffman block and save decoded data to memory.
void Unpack::UnpackDecode(UnpackThreadData &D)
{
if (!D.TableRead)
{
D.TableRead=true;
if (!ReadTables(D.Inp,D.BlockHeader,D.BlockTables))
{
D.DamagedData=true;
return;
}
}
if (D.Inp.InAddr>D.BlockHeader.HeaderSize+D.BlockHeader.BlockSize)
{
D.DamagedData=true;
return;
}
D.DecodedSize=0;
int BlockBorder=D.BlockHeader.BlockStart+D.BlockHeader.BlockSize-1;
// Reserve enough space even for filter entry.
int DataBorder=D.DataSize-16;
int ReadBorder=Min(BlockBorder,DataBorder);
while (true)
{
if (D.Inp.InAddr>=ReadBorder)
{
if (D.Inp.InAddr>BlockBorder || D.Inp.InAddr==BlockBorder &&
D.Inp.InBit>=D.BlockHeader.BlockBitSize)
break;
// If we do not have any more data in file to read, we must process
// what we have until last byte. Otherwise we can return and append
// more data to unprocessed few bytes.
if ((D.Inp.InAddr>=DataBorder) && !D.NoDataLeft || D.Inp.InAddr>=D.DataSize)
{
D.Incomplete=true;
break;
}
}
if (D.DecodedSize>D.DecodedAllocated-8) // Filter can use several slots.
{
D.DecodedAllocated=D.DecodedAllocated*2;
void *Decoded=realloc(D.Decoded,D.DecodedAllocated*sizeof(UnpackDecodedItem));
if (Decoded==NULL)
ErrHandler.MemoryError(); // D.Decoded will be freed in the destructor.
D.Decoded=(UnpackDecodedItem *)Decoded;
}
UnpackDecodedItem *CurItem=D.Decoded+D.DecodedSize++;
uint MainSlot=DecodeNumber(D.Inp,&D.BlockTables.LD);
if (MainSlot<256)
{
if (D.DecodedSize>1)
{
UnpackDecodedItem *PrevItem=CurItem-1;
if (PrevItem->Type==UNPDT_LITERAL && PrevItem->Length<3)
{
PrevItem->Length++;
PrevItem->Literal[PrevItem->Length]=(byte)MainSlot;
D.DecodedSize--;
continue;
}
}
CurItem->Type=UNPDT_LITERAL;
CurItem->Literal[0]=(byte)MainSlot;
CurItem->Length=0;
continue;
}
if (MainSlot>=262)
{
uint Length=SlotToLength(D.Inp,MainSlot-262);
uint DBits,Distance=1,DistSlot=DecodeNumber(D.Inp,&D.BlockTables.DD);
if (DistSlot<4)
{
DBits=0;
Distance+=DistSlot;
}
else
{
DBits=DistSlot/2 - 1;
Distance+=(2 | (DistSlot & 1)) << DBits;
}
if (DBits>0)
{
if (DBits>=4)
{
if (DBits>4)
{
Distance+=((D.Inp.getbits32()>>(36-DBits))<<4);
D.Inp.addbits(DBits-4);
}
uint LowDist=DecodeNumber(D.Inp,&D.BlockTables.LDD);
Distance+=LowDist;
}
else
{
Distance+=D.Inp.getbits32()>>(32-DBits);
D.Inp.addbits(DBits);
}
}
if (Distance>0x100)
{
Length++;
if (Distance>0x2000)
{
Length++;
if (Distance>0x40000)
Length++;
}
}
CurItem->Type=UNPDT_MATCH;
CurItem->Length=(ushort)Length;
CurItem->Distance=Distance;
continue;
}
if (MainSlot==256)
{
UnpackFilter Filter;
ReadFilter(D.Inp,Filter);
CurItem->Type=UNPDT_FILTER;
CurItem->Length=Filter.Type;
CurItem->Distance=Filter.BlockStart;
CurItem=D.Decoded+D.DecodedSize++;
CurItem->Type=UNPDT_FILTER;
CurItem->Length=Filter.Channels;
CurItem->Distance=Filter.BlockLength;
continue;
}
if (MainSlot==257)
{
CurItem->Type=UNPDT_FULLREP;
continue;
}
if (MainSlot<262)
{
CurItem->Type=UNPDT_REP;
CurItem->Distance=MainSlot-258;
uint LengthSlot=DecodeNumber(D.Inp,&D.BlockTables.RD);
uint Length=SlotToLength(D.Inp,LengthSlot);
CurItem->Length=(ushort)Length;
continue;
}
}
}
// Process decoded Huffman block data.
bool Unpack::ProcessDecoded(UnpackThreadData &D)
{
UnpackDecodedItem *Item=D.Decoded,*Border=D.Decoded+D.DecodedSize;
while (Item<Border)
{
UnpPtr&=MaxWinMask;
if (((WriteBorder-UnpPtr) & MaxWinMask)<MAX_INC_LZ_MATCH && WriteBorder!=UnpPtr)
{
UnpWriteBuf();
if (WrittenFileSize>DestUnpSize)
return false;
}
if (Item->Type==UNPDT_LITERAL)
{
#if defined(LITTLE_ENDIAN) && defined(ALLOW_MISALIGNED)
if (Item->Length==3 && UnpPtr<MaxWinSize-4)
{
*(uint32 *)(Window+UnpPtr)=*(uint32 *)Item->Literal;
UnpPtr+=4;
}
else
#endif
for (uint I=0;I<=Item->Length;I++)
Window[UnpPtr++ & MaxWinMask]=Item->Literal[I];
}
else
if (Item->Type==UNPDT_MATCH)
{
InsertOldDist(Item->Distance);
LastLength=Item->Length;
CopyString(Item->Length,Item->Distance);
}
else
if (Item->Type==UNPDT_REP)
{
uint Distance=OldDist[Item->Distance];
for (uint I=Item->Distance;I>0;I--)
OldDist[I]=OldDist[I-1];
OldDist[0]=Distance;
LastLength=Item->Length;
CopyString(Item->Length,Distance);
}
else
if (Item->Type==UNPDT_FULLREP)
{
if (LastLength!=0)
CopyString(LastLength,OldDist[0]);
}
else
if (Item->Type==UNPDT_FILTER)
{
UnpackFilter Filter;
Filter.Type=(byte)Item->Length;
Filter.BlockStart=Item->Distance;
Item++;
Filter.Channels=(byte)Item->Length;
Filter.BlockLength=Item->Distance;
AddFilter(Filter);
}
Item++;
}
return true;
}
// For large blocks we decode and process in same function in single threaded
// mode, so we do not need to store intermediate data in memory.
bool Unpack::UnpackLargeBlock(UnpackThreadData &D)
{
if (!D.TableRead)
{
D.TableRead=true;
if (!ReadTables(D.Inp,D.BlockHeader,D.BlockTables))
{
D.DamagedData=true;
return false;
}
}
if (D.Inp.InAddr>D.BlockHeader.HeaderSize+D.BlockHeader.BlockSize)
{
D.DamagedData=true;
return false;
}
int BlockBorder=D.BlockHeader.BlockStart+D.BlockHeader.BlockSize-1;
// Reserve enough space even for filter entry.
int DataBorder=D.DataSize-16;
int ReadBorder=Min(BlockBorder,DataBorder);
while (true)
{
UnpPtr&=MaxWinMask;
if (D.Inp.InAddr>=ReadBorder)
{
if (D.Inp.InAddr>BlockBorder || D.Inp.InAddr==BlockBorder &&
D.Inp.InBit>=D.BlockHeader.BlockBitSize)
break;
// If we do not have any more data in file to read, we must process
// what we have until last byte. Otherwise we can return and append
// more data to unprocessed few bytes.
if ((D.Inp.InAddr>=DataBorder) && !D.NoDataLeft || D.Inp.InAddr>=D.DataSize)
{
D.Incomplete=true;
break;
}
}
if (((WriteBorder-UnpPtr) & MaxWinMask)<MAX_INC_LZ_MATCH && WriteBorder!=UnpPtr)
{
UnpWriteBuf();
if (WrittenFileSize>DestUnpSize)
return false;
}
uint MainSlot=DecodeNumber(D.Inp,&D.BlockTables.LD);
if (MainSlot<256)
{
Window[UnpPtr++]=(byte)MainSlot;
continue;
}
if (MainSlot>=262)
{
uint Length=SlotToLength(D.Inp,MainSlot-262);
uint DBits,Distance=1,DistSlot=DecodeNumber(D.Inp,&D.BlockTables.DD);
if (DistSlot<4)
{
DBits=0;
Distance+=DistSlot;
}
else
{
DBits=DistSlot/2 - 1;
Distance+=(2 | (DistSlot & 1)) << DBits;
}
if (DBits>0)
{
if (DBits>=4)
{
if (DBits>4)
{
Distance+=((D.Inp.getbits32()>>(36-DBits))<<4);
D.Inp.addbits(DBits-4);
}
uint LowDist=DecodeNumber(D.Inp,&D.BlockTables.LDD);
Distance+=LowDist;
}
else
{
Distance+=D.Inp.getbits32()>>(32-DBits);
D.Inp.addbits(DBits);
}
}
if (Distance>0x100)
{
Length++;
if (Distance>0x2000)
{
Length++;
if (Distance>0x40000)
Length++;
}
}
InsertOldDist(Distance);
LastLength=Length;
CopyString(Length,Distance);
continue;
}
if (MainSlot==256)
{
UnpackFilter Filter;
if (!ReadFilter(D.Inp,Filter) || !AddFilter(Filter))
break;
continue;
}
if (MainSlot==257)
{
if (LastLength!=0)
CopyString(LastLength,OldDist[0]);
continue;
}
if (MainSlot<262)
{
uint DistNum=MainSlot-258;
uint Distance=OldDist[DistNum];
for (uint I=DistNum;I>0;I--)
OldDist[I]=OldDist[I-1];
OldDist[0]=Distance;
uint LengthSlot=DecodeNumber(D.Inp,&D.BlockTables.RD);
uint Length=SlotToLength(D.Inp,LengthSlot);
LastLength=Length;
CopyString(Length,Distance);
continue;
}
}
return true;
}