Files
easy7zip/7zip/Compress/BZip2/BZip2Encoder.cpp
Igor Pavlov 3c510ba80b 4.20
2016-05-28 00:15:41 +01:00

634 lines
16 KiB
C++
Executable File

// BZip2Encoder.cpp
#include "StdAfx.h"
#include "BZip2Encoder.h"
#include "../../../Common/Alloc.h"
#include "../BWT/Mtf8.h"
#include "BZip2CRC.h"
namespace NCompress {
namespace NBZip2 {
static const UInt32 kBufferSize = (1 << 17);
static const int kNumHuffPasses = 4;
CEncoder::CEncoder():
m_Block(0),
m_NeedHuffmanCreate(true),
m_NumPasses(1),
m_OptimizeNumTables(false),
m_BlockSizeMult(kBlockSizeMultMax)
{}
CEncoder::~CEncoder()
{
::BigFree(m_Block);
}
UInt32 CEncoder::ReadRleBlock(Byte *buffer)
{
UInt32 i = 0;
Byte prevByte;
if (m_InStream.ReadByte(prevByte))
{
UInt32 blockSize = m_BlockSizeMult * kBlockSizeStep - 1;
int numReps = 1;
buffer[i++] = prevByte;
while (i < blockSize) // "- 1" to support RLE
{
Byte b;
if (!m_InStream.ReadByte(b))
break;
if (b != prevByte)
{
if (numReps >= kRleModeRepSize)
buffer[i++] = numReps - kRleModeRepSize;
buffer[i++] = b;
numReps = 1;
prevByte = b;
continue;
}
numReps++;
if (numReps <= kRleModeRepSize)
buffer[i++] = b;
else if (numReps == kRleModeRepSize + 255)
{
buffer[i++] = numReps - kRleModeRepSize;
numReps = 0;
}
}
// it's to support original BZip2 decoder
if (numReps >= kRleModeRepSize)
buffer[i++] = numReps - kRleModeRepSize;
}
return i;
}
void CEncoder::WriteBits2(UInt32 value, UInt32 numBits)
{ m_OutStreamCurrent->WriteBits(value, numBits); }
void CEncoder::WriteByte2(Byte b) { WriteBits2(b , 8); }
void CEncoder::WriteBit2(bool v) { WriteBits2((v ? 1 : 0), 1); }
void CEncoder::WriteCRC2(UInt32 v)
{
for (int i = 0; i < 4; i++)
WriteByte2(((Byte)(v >> (24 - i * 8))));
}
void CEncoder::WriteBits(UInt32 value, UInt32 numBits)
{ m_OutStream.WriteBits(value, numBits); }
void CEncoder::WriteByte(Byte b) { WriteBits(b , 8); }
void CEncoder::WriteBit(bool v) { WriteBits((v ? 1 : 0), 1); }
void CEncoder::WriteCRC(UInt32 v)
{
for (int i = 0; i < 4; i++)
WriteByte(((Byte)(v >> (24 - i * 8))));
}
// blockSize > 0
void CEncoder::EncodeBlock(Byte *block, UInt32 blockSize)
{
WriteBit2(false); // Randomised = false
{
UInt32 origPtr = m_BlockSorter.Sort(block, blockSize);
WriteBits2(origPtr, kNumOrigBits);
}
CMtf8Encoder mtf;
int numInUse = 0;
{
bool inUse[256];
bool inUse16[16];
UInt32 i;
for (i = 0; i < 256; i++)
inUse[i] = false;
for (i = 0; i < 16; i++)
inUse16[i] = false;
for (i = 0; i < blockSize; i++)
inUse[block[i]] = true;
for (i = 0; i < 256; i++)
if (inUse[i])
{
inUse16[i >> 4] = true;
mtf.Buffer[numInUse++] = (Byte)i;
}
for (i = 0; i < 16; i++)
WriteBit2(inUse16[i]);
for (i = 0; i < 256; i++)
if (inUse16[i >> 4])
WriteBit2(inUse[i]);
}
int alphaSize = numInUse + 2;
Byte *mtfs = m_MtfArray;
UInt32 mtfArraySize = 0;
UInt32 symbolCounts[kMaxAlphaSize];
{
for (int i = 0; i < kMaxAlphaSize; i++)
symbolCounts[i] = 0;
}
{
UInt32 rleSize = 0;
UInt32 i = 0;
do
{
UInt32 index = m_BlockSorter.Indices[i];
if (index == 0)
index = blockSize - 1;
else
index--;
int pos = mtf.FindAndMove(block[index]);
if (pos == 0)
rleSize++;
else
{
while (rleSize != 0)
{
rleSize--;
mtfs[mtfArraySize++] = (rleSize & 1);
symbolCounts[rleSize & 1]++;
rleSize >>= 1;
}
if (pos >= 0xFE)
{
mtfs[mtfArraySize++] = 0xFF;
mtfs[mtfArraySize++] = pos - 0xFE;
}
else
mtfs[mtfArraySize++] = pos + 1;
symbolCounts[pos + 1]++;
}
}
while (++i < blockSize);
while (rleSize != 0)
{
rleSize--;
mtfs[mtfArraySize++] = (rleSize & 1);
symbolCounts[rleSize & 1]++;
rleSize >>= 1;
}
if (alphaSize < 256)
mtfs[mtfArraySize++] = (Byte)(alphaSize - 1);
else
{
mtfs[mtfArraySize++] = 0xFF;
mtfs[mtfArraySize++] = (Byte)(alphaSize - 256);
}
symbolCounts[alphaSize - 1]++;
}
UInt32 numSymbols = 0;
{
for (int i = 0; i < kMaxAlphaSize; i++)
numSymbols += symbolCounts[i];
}
int bestNumTables = kNumTablesMin;
UInt32 bestPrice = 0xFFFFFFFF;
UInt32 startPos = m_OutStreamCurrent->GetPos();
UInt32 startCurByte = m_OutStreamCurrent->GetCurByte();
for (int nt = kNumTablesMin; nt <= kNumTablesMax + 1; nt++)
{
int numTables;
if(m_OptimizeNumTables)
{
m_OutStreamCurrent->SetPos(startPos);
m_OutStreamCurrent->SetCurState((startPos & 7), startCurByte);
if (nt <= kNumTablesMax)
numTables = nt;
else
numTables = bestNumTables;
}
else
{
if (numSymbols < 200) numTables = 2;
else if (numSymbols < 600) numTables = 3;
else if (numSymbols < 1200) numTables = 4;
else if (numSymbols < 2400) numTables = 5;
else numTables = 6;
}
WriteBits2(numTables, kNumTablesBits);
UInt32 numSelectors = (numSymbols + kGroupSize - 1) / kGroupSize;
WriteBits2(numSelectors, kNumSelectorsBits);
{
UInt32 remFreq = numSymbols;
int gs = 0;
int t = numTables;
do
{
UInt32 tFreq = remFreq / t;
int ge = gs;
UInt32 aFreq = 0;
while (aFreq < tFreq) // && ge < alphaSize)
aFreq += symbolCounts[ge++];
if (ge - 1 > gs && t != numTables && t != 1 && (((numTables - t) & 1) == 1))
aFreq -= symbolCounts[--ge];
NCompression::NHuffman::CEncoder &huffEncoder = m_HuffEncoders[t - 1];
int i = 0;
do
huffEncoder.m_Items[i].Len = (i >= gs && i < ge) ? 0 : 1;
while (++i < alphaSize);
gs = ge;
remFreq -= aFreq;
}
while(--t != 0);
}
for (int pass = 0; pass < kNumHuffPasses; pass++)
{
{
int t = 0;
do
m_HuffEncoders[t].StartNewBlock();
while(++t < numTables);
}
{
UInt32 mtfPos = 0;
UInt32 g = 0;
do
{
UInt32 symbols[kGroupSize];
int i = 0;
do
{
UInt32 symbol = mtfs[mtfPos++];
if (symbol >= 0xFF)
symbol += mtfs[mtfPos++];
symbols[i] = symbol;
}
while (++i < kGroupSize && mtfPos < mtfArraySize);
UInt32 bestPrice = 0xFFFFFFFF;
int t = 0;
do
{
NCompression::NHuffman::CItem *items = m_HuffEncoders[t].m_Items;
UInt32 price = 0;
int j = 0;
do
price += items[symbols[j]].Len;
while (++j < i);
if (price < bestPrice)
{
m_Selectors[g] = (Byte)t;
bestPrice = price;
}
}
while(++t < numTables);
NCompression::NHuffman::CEncoder &huffEncoder = m_HuffEncoders[m_Selectors[g++]];
int j = 0;
do
huffEncoder.AddSymbol(symbols[j]);
while (++j < i);
}
while (mtfPos < mtfArraySize);
}
int t = 0;
do
{
NCompression::NHuffman::CEncoder &huffEncoder = m_HuffEncoders[t];
int i = 0;
do
if (huffEncoder.m_Items[i].Freq == 0)
huffEncoder.m_Items[i].Freq = 1;
while(++i < alphaSize);
Byte levels[kMaxAlphaSize];
huffEncoder.BuildTree(levels);
}
while(++t < numTables);
}
{
Byte mtfSel[kNumTablesMax];
{
int t = 0;
do
mtfSel[t] = (Byte)t;
while(++t < numTables);
}
UInt32 i = 0;
do
{
Byte sel = m_Selectors[i];
int pos;
for (pos = 0; mtfSel[pos] != sel; pos++)
WriteBit2(true);
WriteBit2(false);
for (; pos > 0; pos--)
mtfSel[pos] = mtfSel[pos - 1];
mtfSel[0] = sel;
}
while(++i < numSelectors);
}
{
int t = 0;
do
{
NCompression::NHuffman::CItem *items = m_HuffEncoders[t].m_Items;
UInt32 len = items[0].Len;
WriteBits2(len, kNumLevelsBits);
int i = 0;
do
{
UInt32 level = items[i].Len;
while (len != level)
{
WriteBit2(true);
if (len < level)
{
WriteBit2(false);
len++;
}
else
{
WriteBit2(true);
len--;
}
}
WriteBit2(false);
}
while (++i < alphaSize);
}
while(++t < numTables);
}
{
UInt32 groupSize = 0;
UInt32 groupIndex = 0;
NCompression::NHuffman::CEncoder *huffEncoder = 0;
UInt32 mtfPos = 0;
do
{
UInt32 symbol = mtfs[mtfPos++];
if (symbol >= 0xFF)
symbol += mtfs[mtfPos++];
if (groupSize == 0)
{
groupSize = kGroupSize;
huffEncoder = &m_HuffEncoders[m_Selectors[groupIndex++]];
}
groupSize--; \
huffEncoder->CodeOneValue(m_OutStreamCurrent, symbol);
}
while (mtfPos < mtfArraySize);
}
if (!m_OptimizeNumTables)
break;
UInt32 price = m_OutStreamCurrent->GetPos() - startPos;
if (price <= bestPrice)
{
if (nt == kNumTablesMax)
break;
bestPrice = price;
bestNumTables = nt;
}
}
}
// blockSize > 0
UInt32 CEncoder::EncodeBlockWithHeaders(Byte *block, UInt32 blockSize)
{
WriteByte2(kBlockSig0);
WriteByte2(kBlockSig1);
WriteByte2(kBlockSig2);
WriteByte2(kBlockSig3);
WriteByte2(kBlockSig4);
WriteByte2(kBlockSig5);
CBZip2CRC crc;
int numReps = 0;
Byte prevByte = block[0];
UInt32 i = 0;
do
{
Byte b = block[i];
if (numReps == kRleModeRepSize)
{
for (; b > 0; b--)
crc.UpdateByte(prevByte);
numReps = 0;
continue;
}
if (prevByte == b)
numReps++;
else
{
numReps = 1;
prevByte = b;
}
crc.UpdateByte(b);
}
while (++i < blockSize);
UInt32 crcRes = crc.GetDigest();
WriteCRC2(crcRes);
EncodeBlock(block, blockSize);
return crcRes;
}
void CEncoder::EncodeBlock2(CBZip2CombinedCRC &combinedCRC,
Byte *block, UInt32 blockSize, UInt32 numPasses)
{
bool needCompare = false;
CBZip2CombinedCRC specCombinedCRC = combinedCRC;
UInt32 startBytePos = m_OutStreamCurrent->GetBytePos();
UInt32 startPos = m_OutStreamCurrent->GetPos();
UInt32 startCurByte = m_OutStreamCurrent->GetCurByte();
UInt32 endCurByte;
UInt32 endPos;
if (numPasses > 1 && blockSize >= (1 << 10))
{
UInt32 blockSize0 = blockSize / 2;
for (;(block[blockSize0] == block[blockSize0 - 1] ||
block[blockSize0 - 1] == block[blockSize0 - 2]) &&
blockSize0 < blockSize; blockSize0++);
if (blockSize0 < blockSize)
{
EncodeBlock2(specCombinedCRC, block, blockSize0, numPasses - 1);
EncodeBlock2(specCombinedCRC, block + blockSize0, blockSize - blockSize0,
numPasses - 1);
endPos = m_OutStreamCurrent->GetPos();
endCurByte = m_OutStreamCurrent->GetCurByte();
if ((endPos & 7) > 0)
WriteBits2(0, 8 - (endPos & 7));
m_OutStreamCurrent->SetCurState((startPos & 7), startCurByte);
needCompare = true;
}
}
UInt32 startBytePos2 = m_OutStreamCurrent->GetBytePos();
UInt32 startPos2 = m_OutStreamCurrent->GetPos();
UInt32 crcVal = EncodeBlockWithHeaders(block, blockSize);
UInt32 endPos2 = m_OutStreamCurrent->GetPos();
combinedCRC.Update(crcVal);
if (needCompare)
{
UInt32 size2 = endPos2 - startPos2;
if (size2 < endPos - startPos)
{
UInt32 numBytes = m_OutStreamCurrent->GetBytePos() - startBytePos2;
Byte *buffer = m_OutStreamCurrent->GetStream();
for (UInt32 i = 0; i < numBytes; i++)
buffer[startBytePos + i] = buffer[startBytePos2 + i];
m_OutStreamCurrent->SetPos(startPos + endPos2 - startPos2);
}
else
{
m_OutStreamCurrent->SetPos(endPos);
m_OutStreamCurrent->SetCurState((endPos & 7), endCurByte);
combinedCRC = specCombinedCRC;
}
}
}
void CEncoder::EncodeBlock3(CBZip2CombinedCRC &combinedCRC, UInt32 blockSize)
{
CMsbfEncoderTemp outStreamTemp;
outStreamTemp.SetStream(m_TempArray);
outStreamTemp.Init();
m_OutStreamCurrent = &outStreamTemp;
EncodeBlock2(combinedCRC, m_Block, blockSize, m_NumPasses);
UInt32 size = outStreamTemp.GetPos();
UInt32 bytesSize = (size / 8);
for (UInt32 i = 0; i < bytesSize; i++)
m_OutStream.WriteBits(m_TempArray[i], 8);
WriteBits(outStreamTemp.GetCurByte(), (size & 7));
}
HRESULT CEncoder::CodeReal(ISequentialInStream *inStream,
ISequentialOutStream *outStream, const UInt64 *inSize, const UInt64 *outSize,
ICompressProgressInfo *progress)
{
if (!m_BlockSorter.Create(kBlockSizeMax))
return E_OUTOFMEMORY;
if (m_Block == 0)
{
m_Block = (Byte *)BigAlloc(kBlockSizeMax * 5 + kBlockSizeMax / 10 + (20 << 10));
if (m_Block == 0)
return E_OUTOFMEMORY;
m_MtfArray = m_Block + kBlockSizeMax;
m_TempArray = m_MtfArray + kBlockSizeMax * 2 + 2;
}
if (!m_InStream.Create(kBufferSize))
return E_OUTOFMEMORY;
if (!m_OutStream.Create(kBufferSize))
return E_OUTOFMEMORY;
if (m_NeedHuffmanCreate)
{
for (int i = 0; i < kNumTablesMax; i++)
if (!m_HuffEncoders[i].Create(kMaxAlphaSize, 0, 0, kMaxHuffmanLen))
return E_OUTOFMEMORY;
m_NeedHuffmanCreate = false;
}
m_InStream.SetStream(inStream);
m_InStream.Init();
m_OutStream.SetStream(outStream);
m_OutStream.Init();
CFlusher flusher(this);
CBZip2CombinedCRC combinedCRC;
WriteByte(kArSig0);
WriteByte(kArSig1);
WriteByte(kArSig2);
WriteByte((Byte)(kArSig3 + m_BlockSizeMult));
while (true)
{
UInt32 blockSize = ReadRleBlock(m_Block);
if (blockSize == 0)
break;
EncodeBlock3(combinedCRC, blockSize);
if (progress)
{
UInt64 packSize = m_InStream.GetProcessedSize();
UInt64 unpackSize = m_OutStream.GetProcessedSize();
RINOK(progress->SetRatioInfo(&packSize, &unpackSize));
}
}
WriteByte(kFinSig0);
WriteByte(kFinSig1);
WriteByte(kFinSig2);
WriteByte(kFinSig3);
WriteByte(kFinSig4);
WriteByte(kFinSig5);
WriteCRC(combinedCRC.GetDigest());
return S_OK;
}
STDMETHODIMP CEncoder::Code(ISequentialInStream *inStream,
ISequentialOutStream *outStream, const UInt64 *inSize, const UInt64 *outSize,
ICompressProgressInfo *progress)
{
try { return CodeReal(inStream, outStream, inSize, outSize, progress); }
catch(...) { return S_FALSE; }
}
HRESULT CEncoder::SetCoderProperties(const PROPID *propIDs,
const PROPVARIANT *properties, UInt32 numProperties)
{
for(UInt32 i = 0; i < numProperties; i++)
{
const PROPVARIANT &property = properties[i];
switch(propIDs[i])
{
case NCoderPropID::kNumPasses:
{
if (property.vt != VT_UI4)
return E_INVALIDARG;
UInt32 numPasses = property.ulVal;
if(numPasses == 0 || numPasses > 10)
return E_INVALIDARG;
m_NumPasses = numPasses;
m_OptimizeNumTables = (m_NumPasses > 1);
break;
}
case NCoderPropID::kDictionarySize:
{
if (property.vt != VT_UI4)
return E_INVALIDARG;
UInt32 dictionary = property.ulVal / kBlockSizeStep;
if (dictionary < kBlockSizeMultMin)
dictionary = kBlockSizeMultMin;
else if (dictionary > kBlockSizeMultMax)
dictionary = kBlockSizeMultMax;
m_BlockSizeMult = dictionary;
break;
}
default:
return E_INVALIDARG;
}
}
return S_OK;
}
}}