Add options to brotli and implement clean brotli .br support

- allow to specify brotli window size
  - parameter -m0=brotli:long=n, BROTLI_MAX_WINDOW_BITS (24) used by default in brotli-mt, smaller == faster
  - note that :long can be set up to BROTLI_LARGE_MAX_WINDOW_BITS (30), whereas :wlog can be set up to BROTLI_MAX_WINDOW_BITS (24) only...
  - todo: check whether set of BROTLI_PARAM_LARGE_WINDOW to BROTLI_TRUE is needed if (lgwin > BROTLI_MAX_WINDOW_BITS)

- implementation of single-threaded brotli compression / decompression for .br data

Signed-off-by: Sergey G. Brester <info@sebres.de>
Reviewed-by: Tino Reichardt <milky-7zip@mcmilk.de>
This commit is contained in:
sebres
2021-08-30 17:16:08 +02:00
committed by Tino Reichardt
parent 9bb11a56b6
commit eeae03eaa1
8 changed files with 660 additions and 40 deletions

View File

@@ -0,0 +1,351 @@
// BrotliHandler.cpp
#include "StdAfx.h"
#include "../../../C/CpuArch.h"
#include "../../Common/ComTry.h"
#include "../../Common/Defs.h"
#include "../Common/ProgressUtils.h"
#include "../Common/RegisterArc.h"
#include "../Common/StreamUtils.h"
#include "../Compress/BrotliDecoder.h"
#include "../Compress/BrotliEncoder.h"
#include "../Compress/CopyCoder.h"
#include "Common/DummyOutStream.h"
#include "Common/HandlerOut.h"
using namespace NWindows;
namespace NArchive {
namespace NBROTLI {
class CHandler:
public IInArchive,
public IArchiveOpenSeq,
public IOutArchive,
public ISetProperties,
public CMyUnknownImp
{
CMyComPtr<IInStream> _stream;
CMyComPtr<ISequentialInStream> _seqStream;
bool _isArc;
bool _dataAfterEnd;
bool _needMoreInput;
bool _packSize_Defined;
bool _unpackSize_Defined;
UInt64 _packSize;
UInt64 _unpackSize;
UInt64 _numStreams;
UInt64 _numBlocks;
CSingleMethodProps _props;
public:
MY_UNKNOWN_IMP4(
IInArchive,
IArchiveOpenSeq,
IOutArchive,
ISetProperties)
INTERFACE_IInArchive(;)
INTERFACE_IOutArchive(;)
STDMETHOD(OpenSeq)(ISequentialInStream *stream);
STDMETHOD(SetProperties)(const wchar_t * const *names, const PROPVARIANT *values, UInt32 numProps);
CHandler() { }
};
static const Byte kProps[] =
{
kpidSize,
kpidPackSize
};
static const Byte kArcProps[] =
{
kpidNumStreams,
kpidNumBlocks
};
IMP_IInArchive_Props
IMP_IInArchive_ArcProps
STDMETHODIMP CHandler::GetArchiveProperty(PROPID /*propID*/, PROPVARIANT * /*value*/)
{
return S_OK;
}
STDMETHODIMP CHandler::GetNumberOfItems(UInt32 *numItems)
{
*numItems = 1;
return S_OK;
}
STDMETHODIMP CHandler::GetProperty(UInt32 /* index */, PROPID propID, PROPVARIANT *value)
{
NCOM::CPropVariant prop;
switch (propID)
{
case kpidPackSize: if (_packSize_Defined) prop = _packSize; break;
case kpidSize: if (_unpackSize_Defined) prop = _unpackSize; break;
}
prop.Detach(value);
return S_OK;
}
// brotli format has no signature
// brotli stream can't be veriefied unless errors by unpack
#if 0
API_FUNC_static_IsArc IsArc_Brotli(const Byte *p, size_t size)
{
return k_IsArc_Res_YES;
}
}
#endif
STDMETHODIMP CHandler::Open(IInStream *stream, const UInt64 *, IArchiveOpenCallback *)
{
COM_TRY_BEGIN
Close();
{
_isArc = true;
_stream = stream;
_seqStream = stream;
}
return S_OK;
COM_TRY_END
}
STDMETHODIMP CHandler::OpenSeq(ISequentialInStream *stream)
{
Close();
_isArc = true;
_seqStream = stream;
return S_OK;
}
STDMETHODIMP CHandler::Close()
{
_isArc = false;
_dataAfterEnd = false;
_needMoreInput = false;
_packSize_Defined = false;
_unpackSize_Defined = false;
_packSize = 0;
_seqStream.Release();
_stream.Release();
return S_OK;
}
STDMETHODIMP CHandler::Extract(const UInt32 *indices, UInt32 numItems,
Int32 testMode, IArchiveExtractCallback *extractCallback)
{
COM_TRY_BEGIN
if (numItems == 0)
return S_OK;
if (numItems != (UInt32)(Int32)-1 && (numItems != 1 || indices[0] != 0))
return E_INVALIDARG;
if (_packSize_Defined)
extractCallback->SetTotal(_packSize);
CMyComPtr<ISequentialOutStream> realOutStream;
Int32 askMode = testMode ?
NExtract::NAskMode::kTest :
NExtract::NAskMode::kExtract;
RINOK(extractCallback->GetStream(0, &realOutStream, askMode));
if (!testMode && !realOutStream)
return S_OK;
extractCallback->PrepareOperation(askMode);
Int32 opRes;
{
NCompress::NBROTLI::CDecoder *decoderSpec = new NCompress::NBROTLI::CDecoder;
decoderSpec->SetNumberOfThreads(0); /* .br - single threaded processing (without header/mt-frames) */
CMyComPtr<ICompressCoder> decoder = decoderSpec;
decoderSpec->SetInStream(_seqStream);
CDummyOutStream *outStreamSpec = new CDummyOutStream;
CMyComPtr<ISequentialOutStream> outStream(outStreamSpec);
outStreamSpec->SetStream(realOutStream);
outStreamSpec->Init();
realOutStream.Release();
CLocalProgress *lps = new CLocalProgress;
CMyComPtr<ICompressProgressInfo> progress = lps;
lps->Init(extractCallback, true);
UInt64 packSize = 0;
UInt64 unpackedSize = 0;
HRESULT result = S_OK;
for (;;)
{
lps->InSize = packSize;
lps->OutSize = unpackedSize;
RINOK(lps->SetCur());
result = decoderSpec->CodeResume(outStream, &unpackedSize, progress);
UInt64 streamSize = decoderSpec->GetInputProcessedSize();
if (result != S_FALSE && result != S_OK)
return result;
if (unpackedSize == 0)
break;
if (streamSize == packSize)
{
// no new bytes in input stream, So it's good end of archive.
result = S_OK;
break;
}
if (packSize > streamSize)
return E_FAIL;
if (result != S_OK)
break;
}
decoderSpec->ReleaseInStream();
outStream.Release();
if (!_isArc)
opRes = NExtract::NOperationResult::kIsNotArc;
else if (_needMoreInput)
opRes = NExtract::NOperationResult::kUnexpectedEnd;
else if (_dataAfterEnd)
opRes = NExtract::NOperationResult::kDataAfterEnd;
else if (result == S_FALSE)
opRes = NExtract::NOperationResult::kDataError;
else if (result == S_OK)
opRes = NExtract::NOperationResult::kOK;
else
return result;
}
return extractCallback->SetOperationResult(opRes);
COM_TRY_END
}
static HRESULT UpdateArchive(
UInt64 unpackSize,
ISequentialOutStream *outStream,
const CProps &props,
IArchiveUpdateCallback *updateCallback)
{
RINOK(updateCallback->SetTotal(unpackSize));
CMyComPtr<ISequentialInStream> fileInStream;
RINOK(updateCallback->GetStream(0, &fileInStream));
CLocalProgress *localProgressSpec = new CLocalProgress;
CMyComPtr<ICompressProgressInfo> localProgress = localProgressSpec;
localProgressSpec->Init(updateCallback, true);
NCompress::NBROTLI::CEncoder *encoderSpec = new NCompress::NBROTLI::CEncoder;
encoderSpec->unpackSize = unpackSize;
encoderSpec->SetNumberOfThreads(0); /* .br - single threaded processing (without header/mt-frames) */
CMyComPtr<ICompressCoder> encoder = encoderSpec;
RINOK(props.SetCoderProps(encoderSpec, NULL));
RINOK(encoder->Code(fileInStream, outStream, NULL, NULL, localProgress));
return updateCallback->SetOperationResult(NArchive::NUpdate::NOperationResult::kOK);
}
STDMETHODIMP CHandler::GetFileTimeType(UInt32 *type)
{
*type = NFileTimeType::kUnix;
return S_OK;
}
STDMETHODIMP CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numItems,
IArchiveUpdateCallback *updateCallback)
{
COM_TRY_BEGIN
if (numItems != 1)
return E_INVALIDARG;
Int32 newData, newProps;
UInt32 indexInArchive;
if (!updateCallback)
return E_FAIL;
RINOK(updateCallback->GetUpdateItemInfo(0, &newData, &newProps, &indexInArchive));
if ((newProps))
{
{
NCOM::CPropVariant prop;
RINOK(updateCallback->GetProperty(0, kpidIsDir, &prop));
if (prop.vt != VT_EMPTY)
if (prop.vt != VT_BOOL || prop.boolVal != VARIANT_FALSE)
return E_INVALIDARG;
}
}
if ((newData))
{
UInt64 size;
{
NCOM::CPropVariant prop;
RINOK(updateCallback->GetProperty(0, kpidSize, &prop));
if (prop.vt != VT_UI8)
return E_INVALIDARG;
size = prop.uhVal.QuadPart;
}
return UpdateArchive(size, outStream, _props, updateCallback);
}
if (indexInArchive != 0)
return E_INVALIDARG;
CLocalProgress *lps = new CLocalProgress;
CMyComPtr<ICompressProgressInfo> progress = lps;
lps->Init(updateCallback, true);
CMyComPtr<IArchiveUpdateCallbackFile> opCallback;
updateCallback->QueryInterface(IID_IArchiveUpdateCallbackFile, (void **)&opCallback);
if (opCallback)
{
RINOK(opCallback->ReportOperation(
NEventIndexType::kInArcIndex, 0,
NUpdateNotifyOp::kReplicate))
}
if (_stream)
RINOK(_stream->Seek(0, STREAM_SEEK_SET, NULL));
return NCompress::CopyStream(_stream, outStream, progress);
COM_TRY_END
}
STDMETHODIMP CHandler::SetProperties(const wchar_t * const *names, const PROPVARIANT *values, UInt32 numProps)
{
return _props.SetProperties(names, values, numProps);
}
IMP_CreateArcIn
IMP_CreateArcOut
REGISTER_ARC_R(
"brotli", "br brotli tbr", 0, 0x1F,
0, NULL,
0,
NArcInfoFlags::kKeepName | NArcInfoFlags::kPureStartOpen | NArcInfoFlags::kByExtOnlyOpen,
0,
CreateArc, CreateArcOut,
/* IsArc_Brotli */)
}}

View File

@@ -70,6 +70,7 @@ AR_OBJS = \
$O\ArHandler.obj \
$O\ArjHandler.obj \
$O\Base64Handler.obj \
$O\BrotliHandler.obj \
$O\Bz2Handler.obj \
$O\ComHandler.obj \
$O\CpioHandler.obj \

View File

@@ -98,7 +98,7 @@ STDMETHODIMP CDecoder::SetDecoderProperties2(const Byte * prop, UInt32 size)
STDMETHODIMP CDecoder::SetNumberOfThreads(UInt32 numThreads)
{
const UInt32 kNumThreadsMax = BROTLIMT_THREAD_MAX;
if (numThreads < 1) numThreads = 1;
if (numThreads < 0) numThreads = 0;
if (numThreads > kNumThreadsMax) numThreads = kNumThreadsMax;
_numThreads = numThreads;
return S_OK;

View File

@@ -12,8 +12,10 @@ CEncoder::CEncoder():
_processedIn(0),
_processedOut(0),
_inputSize(0),
_ctx(NULL),
_numThreads(NWindows::NSystem::GetNumberOfProcessors())
_numThreads(NWindows::NSystem::GetNumberOfProcessors()),
_Long(-1),
_WindowLog(-1),
_ctx(NULL)
{
_props.clear();
}
@@ -52,6 +54,37 @@ STDMETHODIMP CEncoder::SetCoderProperties(const PROPID * propIDs, const PROPVARI
SetNumberOfThreads(v);
break;
}
case NCoderPropID::kLong:
{
/* like --long in zstd cli program */
_Long = 1;
if (v == 0) {
if (prop.vt == VT_EMPTY) {
// m0=brotli:long -> long=default
_WindowLog = BROTLI_MAX_WINDOW_BITS; //BROTLI_DEFAULT_WINDOW;
} else {
// m0=brotli:long=0 -> long=auto
_WindowLog = 0;
}
} else if (v < BROTLI_MIN_WINDOW_BITS) {
// m0=brotli:long=9 -> long=10
_WindowLog = BROTLI_MIN_WINDOW_BITS;
} else if (v > BROTLI_LARGE_MAX_WINDOW_BITS) {
// m0=brotli:long=33 -> long=max
_WindowLog = BROTLI_LARGE_MAX_WINDOW_BITS;
} else {
// m0=brotli:long=15 -> long=value
_WindowLog = v;
}
break;
}
case NCoderPropID::kWindowLog:
{
if (v < BROTLI_MIN_WINDOW_BITS) v = BROTLI_MIN_WINDOW_BITS;
if (v > BROTLI_MAX_WINDOW_BITS) v = BROTLI_MAX_WINDOW_BITS;
_WindowLog = v;
break;
}
default:
{
break;
@@ -102,11 +135,12 @@ STDMETHODIMP CEncoder::Code(ISequentialInStream *inStream,
/* 2) create compression context, if needed */
if (!_ctx)
_ctx = BROTLIMT_createCCtx(_numThreads, _props._level, _inputSize);
_ctx = BROTLIMT_createCCtx(_numThreads, unpackSize, _props._level, _inputSize,
_WindowLog >= 0 ? _WindowLog : BROTLI_MAX_WINDOW_BITS);
if (!_ctx)
return S_FALSE;
/* 3) compress */
/* 4) compress */
result = BROTLIMT_compressCCtx(_ctx, &rdwr);
if (BROTLIMT_isError(result)) {
if (result == (size_t)-BROTLIMT_error_canceled)
@@ -120,7 +154,7 @@ STDMETHODIMP CEncoder::Code(ISequentialInStream *inStream,
STDMETHODIMP CEncoder::SetNumberOfThreads(UInt32 numThreads)
{
const UInt32 kNumThreadsMax = BROTLIMT_THREAD_MAX;
if (numThreads < 1) numThreads = 1;
if (numThreads < 0) numThreads = 0;
if (numThreads > kNumThreadsMax) numThreads = kNumThreadsMax;
_numThreads = numThreads;
return S_OK;

View File

@@ -45,9 +45,15 @@ class CEncoder:
UInt32 _inputSize;
UInt32 _numThreads;
Int32 _Long;
Int32 _WindowLog;
BROTLIMT_CCtx *_ctx;
public:
UInt64 unpackSize;
MY_QUERYINTERFACE_BEGIN2(ICompressCoder)
MY_QUERYINTERFACE_ENTRY(ICompressSetCoderMt)
MY_QUERYINTERFACE_ENTRY(ICompressSetCoderProperties)