Merge pull request #354 from sebres/zstd-like-zstdcli--sizehint-checksum

Make compression of zstd archive type more similar to zstdcli
This commit is contained in:
Tino Reichardt
2023-09-17 08:08:24 +02:00
committed by GitHub
5 changed files with 37 additions and 4 deletions

View File

@@ -383,7 +383,7 @@ static size_t st_compress(void *arg)
/* 0, or not specified by user; could be chosen by compressor. */ /* 0, or not specified by user; could be chosen by compressor. */
uint32_t lgwin = 24 /* DEFAULT_LGWIN */; uint32_t lgwin = 24 /* DEFAULT_LGWIN */;
/* Use file size to limit lgwin. */ /* Use file size to limit lgwin. */
if (ctx->unpackSize >= 0) { if (ctx->unpackSize >= 0 && ctx->unpackSize != (uint64_t)(int64_t)-1) {
lgwin = BROTLI_MIN_WINDOW_BITS; lgwin = BROTLI_MIN_WINDOW_BITS;
while (BROTLI_MAX_BACKWARD_LIMIT(lgwin) < while (BROTLI_MAX_BACKWARD_LIMIT(lgwin) <
(uint64_t)ctx->unpackSize) { (uint64_t)ctx->unpackSize) {
@@ -393,7 +393,7 @@ static size_t st_compress(void *arg)
} }
BrotliEncoderSetParameter(state, BROTLI_PARAM_LGWIN, lgwin); BrotliEncoderSetParameter(state, BROTLI_PARAM_LGWIN, lgwin);
} }
if (ctx->unpackSize > 0) { if (ctx->unpackSize > 0 && ctx->unpackSize != (uint64_t)(int64_t)-1) {
uint32_t size_hint = ctx->unpackSize < (1 << 30) ? uint32_t size_hint = ctx->unpackSize < (1 << 30) ?
(uint32_t)ctx->unpackSize : (1u << 30); (uint32_t)ctx->unpackSize : (1u << 30);
BrotliEncoderSetParameter(state, BROTLI_PARAM_SIZE_HINT, size_hint); BrotliEncoderSetParameter(state, BROTLI_PARAM_SIZE_HINT, size_hint);

View File

@@ -286,6 +286,10 @@ static HRESULT UpdateArchive(
CMyComPtr<ICompressProgressInfo> localProgress = localProgressSpec; CMyComPtr<ICompressProgressInfo> localProgress = localProgressSpec;
localProgressSpec->Init(updateCallback, true); localProgressSpec->Init(updateCallback, true);
NCompress::NZSTD::CEncoder *encoderSpec = new NCompress::NZSTD::CEncoder; NCompress::NZSTD::CEncoder *encoderSpec = new NCompress::NZSTD::CEncoder;
// by zstd archive type store dictID and checksum (similar to zstd client)
encoderSpec->dictIDFlag = 1;
encoderSpec->checksumFlag = 1;
encoderSpec->unpackSize = unpackSize;
CMyComPtr<ICompressCoder> encoder = encoderSpec; CMyComPtr<ICompressCoder> encoder = encoderSpec;
RINOK(props.SetCoderProps(encoderSpec, NULL)); RINOK(props.SetCoderProps(encoderSpec, NULL));
RINOK(encoder->Code(fileInStream, outStream, NULL, NULL, localProgress)); RINOK(encoder->Code(fileInStream, outStream, NULL, NULL, localProgress));

View File

@@ -15,7 +15,8 @@ CEncoder::CEncoder():
_numThreads(NWindows::NSystem::GetNumberOfProcessors()), _numThreads(NWindows::NSystem::GetNumberOfProcessors()),
_Long(-1), _Long(-1),
_WindowLog(-1), _WindowLog(-1),
_ctx(NULL) _ctx(NULL),
unpackSize(0)
{ {
_props.clear(); _props.clear();
} }

View File

@@ -30,7 +30,10 @@ CEncoder::CEncoder():
_LdmHashLog(-1), _LdmHashLog(-1),
_LdmMinMatch(-1), _LdmMinMatch(-1),
_LdmBucketSizeLog(-1), _LdmBucketSizeLog(-1),
_LdmHashRateLog(-1) _LdmHashRateLog(-1),
dictIDFlag(-1),
checksumFlag(-1),
unpackSize(0)
{ {
_props.clear(); _props.clear();
} }
@@ -251,6 +254,20 @@ STDMETHODIMP CEncoder::Code(ISequentialInStream *inStream,
err = ZSTD_CCtx_setParameter(_ctx, ZSTD_c_contentSizeFlag, 1); err = ZSTD_CCtx_setParameter(_ctx, ZSTD_c_contentSizeFlag, 1);
if (ZSTD_isError(err)) return E_INVALIDARG; if (ZSTD_isError(err)) return E_INVALIDARG;
if (dictIDFlag != -1) {
err = ZSTD_CCtx_setParameter(_ctx, ZSTD_c_dictIDFlag, dictIDFlag);
if (ZSTD_isError(err)) return E_INVALIDARG;
}
if (checksumFlag != -1) {
err = ZSTD_CCtx_setParameter(_ctx, ZSTD_c_checksumFlag, checksumFlag);
if (ZSTD_isError(err)) return E_INVALIDARG;
}
if (unpackSize && unpackSize != (UInt64)(Int64)-1) { // size is known
err = ZSTD_CCtx_setParameter(_ctx, ZSTD_c_srcSizeHint, (int)(unpackSize <= INT_MAX ? unpackSize : INT_MAX));
if (ZSTD_isError(err)) return E_INVALIDARG;
}
/* enable ldm for large windowlog values */ /* enable ldm for large windowlog values */
if (_WindowLog > 27 && _Long == 0) if (_WindowLog > 27 && _Long == 0)
_Long = 1; _Long = 1;
@@ -320,6 +337,12 @@ STDMETHODIMP CEncoder::Code(ISequentialInStream *inStream,
err = ZSTD_CCtx_setParameter(_ctx, ZSTD_c_ldmHashRateLog, _LdmHashRateLog); err = ZSTD_CCtx_setParameter(_ctx, ZSTD_c_ldmHashRateLog, _LdmHashRateLog);
if (ZSTD_isError(err)) return E_INVALIDARG; if (ZSTD_isError(err)) return E_INVALIDARG;
} }
//err = ZSTD_CCtx_setParameter(_ctx, ZSTD_c_literalCompressionMode, (int)ZSTD_ps_auto);
//if (ZSTD_isError(err)) return E_INVALIDARG;
//err = ZSTD_CCtx_setParameter(_ctx, ZSTD_c_enableDedicatedDictSearch, 1);
//if (ZSTD_isError(err)) return E_INVALIDARG;
} }
for (;;) { for (;;) {

View File

@@ -67,6 +67,11 @@ class CEncoder:
Int32 _LdmHashRateLog; Int32 _LdmHashRateLog;
public: public:
int dictIDFlag;
int checksumFlag;
UInt64 unpackSize;
MY_QUERYINTERFACE_BEGIN2(ICompressCoder) MY_QUERYINTERFACE_BEGIN2(ICompressCoder)
MY_QUERYINTERFACE_ENTRY(ICompressSetCoderMt) MY_QUERYINTERFACE_ENTRY(ICompressSetCoderMt)
MY_QUERYINTERFACE_ENTRY(ICompressSetCoderProperties) MY_QUERYINTERFACE_ENTRY(ICompressSetCoderProperties)