Files
easy7zip/C/fast-lzma2/lzma2_enc.c
2019-07-29 08:58:39 +02:00

2100 lines
80 KiB
C

/* lzma2_enc.c -- LZMA2 Encoder
Based on LzmaEnc.c and Lzma2Enc.c : Igor Pavlov
Modified for FL2 by Conor McCarthy
Public domain
*/
#include <stdlib.h>
#include <math.h>
#include "fl2_errors.h"
#include "fl2_internal.h"
#include "lzma2_enc.h"
#include "fl2_compress_internal.h"
#include "mem.h"
#include "count.h"
#include "radix_mf.h"
#include "range_enc.h"
#ifdef FL2_XZ_BUILD
# include "tuklib_integer.h"
# define MEM_readLE32(a) unaligned_read32le(a)
# ifdef TUKLIB_FAST_UNALIGNED_ACCESS
# define MEM_read16(a) (*(const U16*)(a))
# endif
#endif
#define kNumReps 4U
#define kNumStates 12U
#define kNumLiterals 0x100U
#define kNumLitTables 3U
#define kNumLenToPosStates 4U
#define kNumPosSlotBits 6U
#define kDicLogSizeMin 18U
#define kDicLogSizeMax 31U
#define kDistTableSizeMax (kDicLogSizeMax * 2U)
#define kNumAlignBits 4U
#define kAlignTableSize (1U << kNumAlignBits)
#define kAlignMask (kAlignTableSize - 1U)
#define kMatchRepriceFrequency 64U
#define kRepLenRepriceFrequency 64U
#define kStartPosModelIndex 4U
#define kEndPosModelIndex 14U
#define kNumPosModels (kEndPosModelIndex - kStartPosModelIndex)
#define kNumFullDistancesBits (kEndPosModelIndex >> 1U)
#define kNumFullDistances (1U << kNumFullDistancesBits)
#define kNumPositionBitsMax 4U
#define kNumPositionStatesMax (1U << kNumPositionBitsMax)
#define kNumLiteralContextBitsMax 4U
#define kNumLiteralPosBitsMax 4U
#define kLcLpMax 4U
#define kLenNumLowBits 3U
#define kLenNumLowSymbols (1U << kLenNumLowBits)
#define kLenNumHighBits 8U
#define kLenNumHighSymbols (1U << kLenNumHighBits)
#define kLenNumSymbolsTotal (kLenNumLowSymbols * 2 + kLenNumHighSymbols)
#define kMatchLenMin 2U
#define kMatchLenMax (kMatchLenMin + kLenNumSymbolsTotal - 1U)
#define kMatchesMax 65U /* Doesn't need to be larger than FL2_HYBRIDCYCLES_MAX + 1 */
#define kOptimizerEndSize 32U
#define kOptimizerBufferSize (kMatchLenMax * 2U + kOptimizerEndSize)
#define kOptimizerSkipSize 16U
#define kInfinityPrice (1UL << 30U)
#define kNullDist (U32)-1
#define kMaxMatchEncodeSize 20
#define kMaxChunkCompressedSize (1UL << 16U)
/* Need to leave sufficient space for expanded output from a full opt buffer with bad starting probs */
#define kChunkSize (kMaxChunkCompressedSize - 2048U)
#define kSqrtChunkSize 252U
/* Hard to define where the match table read pos definitely catches up with the output size, but
* 64 bytes of input expanding beyond 256 bytes right after an encoder reset is most likely impossible.
* The encoder will error out if this happens. */
#define kTempMinOutput 256U
#define kTempBufferSize (kTempMinOutput + kOptimizerBufferSize + kOptimizerBufferSize / 4U)
#define kMaxChunkUncompressedSize (1UL << 21U)
#define kChunkHeaderSize 5U
#define kChunkResetShift 5U
#define kChunkUncompressedDictReset 1U
#define kChunkUncompressed 2U
#define kChunkCompressedFlag 0x80U
#define kChunkNothingReset 0U
#define kChunkStateReset (1U << kChunkResetShift)
#define kChunkStatePropertiesReset (2U << kChunkResetShift)
#define kChunkAllReset (3U << kChunkResetShift)
#define kMaxHashDictBits 14U
#define kHash3Bits 14U
#define kNullLink -1
#define kMinTestChunkSize 0x4000U
#define kRandomFilterMarginBits 8U
#define kState_LitAfterMatch 4
#define kState_LitAfterRep 5
#define kState_MatchAfterLit 7
#define kState_RepAfterLit 8
static const BYTE kLiteralNextStates[kNumStates] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
#define LIT_NEXT_STATE(s) kLiteralNextStates[s]
static const BYTE kMatchNextStates[kNumStates] = { 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10 };
#define MATCH_NEXT_STATE(s) kMatchNextStates[s]
static const BYTE kRepNextStates[kNumStates] = { 8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11 };
#define REP_NEXT_STATE(s) kRepNextStates[s]
static const BYTE kShortRepNextStates[kNumStates] = { 9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11 };
#define SHORT_REP_NEXT_STATE(s) kShortRepNextStates[s]
#include "fastpos_table.h"
#include "radix_get.h"
/* Probabilities and prices for encoding match lengths.
* Two objects of this type are needed, one for normal matches
* and another for rep matches.
*/
typedef struct
{
size_t table_size;
unsigned prices[kNumPositionStatesMax][kLenNumSymbolsTotal];
LZMA2_prob choice; /* low[0] is choice_2. Must be consecutive for speed */
LZMA2_prob low[kNumPositionStatesMax << (kLenNumLowBits + 1)];
LZMA2_prob high[kLenNumHighSymbols];
} LZMA2_lenStates;
/* All probabilities for the encoder. This is a separate from the encoder object
* so the state can be saved and restored in case a chunk is not compressible.
*/
typedef struct
{
/* Fields are ordered for speed */
LZMA2_lenStates rep_len_states;
LZMA2_prob is_rep0_long[kNumStates][kNumPositionStatesMax];
size_t state;
U32 reps[kNumReps];
LZMA2_prob is_match[kNumStates][kNumPositionStatesMax];
LZMA2_prob is_rep[kNumStates];
LZMA2_prob is_rep_G0[kNumStates];
LZMA2_prob is_rep_G1[kNumStates];
LZMA2_prob is_rep_G2[kNumStates];
LZMA2_lenStates len_states;
LZMA2_prob dist_slot_encoders[kNumLenToPosStates][1 << kNumPosSlotBits];
LZMA2_prob dist_align_encoders[1 << kNumAlignBits];
LZMA2_prob dist_encoders[kNumFullDistances - kEndPosModelIndex];
LZMA2_prob literal_probs[(kNumLiterals * kNumLitTables) << kLcLpMax];
} LZMA2_encStates;
/*
* Linked list item for optimal parsing
*/
typedef struct
{
size_t state;
U32 price;
unsigned extra; /* 0 : normal
* 1 : LIT : MATCH
* > 1 : MATCH (extra-1) : LIT : REP0 (len) */
unsigned len;
U32 dist;
U32 reps[kNumReps];
} LZMA2_node;
#define MARK_LITERAL(node) (node).dist = kNullDist; (node).extra = 0;
#define MARK_SHORT_REP(node) (node).dist = 0; (node).extra = 0;
/*
* Table and chain for 3-byte hash. Extra elements in hash_chain_3 are malloced.
*/
typedef struct {
S32 table_3[1 << kHash3Bits];
S32 hash_chain_3[1];
} LZMA2_hc3;
/*
* LZMA2 encoder.
*/
struct LZMA2_ECtx_s
{
unsigned lc;
unsigned lp;
unsigned pb;
unsigned fast_length;
size_t len_end_max;
size_t lit_pos_mask;
size_t pos_mask;
unsigned match_cycles;
FL2_strategy strategy;
RC_encoder rc;
/* Finish writing the chunk at this size */
size_t chunk_size;
/* Don't encode a symbol beyond this limit (used by fast mode) */
size_t chunk_limit;
LZMA2_encStates states;
unsigned match_price_count;
unsigned rep_len_price_count;
size_t dist_price_table_size;
unsigned align_prices[kAlignTableSize];
unsigned dist_slot_prices[kNumLenToPosStates][kDistTableSizeMax];
unsigned distance_prices[kNumLenToPosStates][kNumFullDistances];
RMF_match base_match; /* Allows access to matches[-1] in LZMA_optimalParse */
RMF_match matches[kMatchesMax];
size_t match_count;
LZMA2_node opt_buf[kOptimizerBufferSize];
LZMA2_hc3* hash_buf;
ptrdiff_t chain_mask_2;
ptrdiff_t chain_mask_3;
ptrdiff_t hash_dict_3;
ptrdiff_t hash_prev_index;
ptrdiff_t hash_alloc_3;
/* Temp output buffer before space frees up in the match table */
BYTE out_buf[kTempBufferSize];
};
LZMA2_ECtx* LZMA2_createECtx(void)
{
LZMA2_ECtx *const enc = malloc(sizeof(LZMA2_ECtx));
DEBUGLOG(3, "LZMA2_createECtx");
if (enc == NULL)
return NULL;
enc->lc = 3;
enc->lp = 0;
enc->pb = 2;
enc->fast_length = 48;
enc->len_end_max = kOptimizerBufferSize - 1;
enc->lit_pos_mask = (1 << enc->lp) - 1;
enc->pos_mask = (1 << enc->pb) - 1;
enc->match_cycles = 1;
enc->strategy = FL2_ultra;
enc->match_price_count = 0;
enc->rep_len_price_count = 0;
enc->dist_price_table_size = kDistTableSizeMax;
enc->hash_buf = NULL;
enc->hash_dict_3 = 0;
enc->chain_mask_3 = 0;
enc->hash_alloc_3 = 0;
return enc;
}
void LZMA2_freeECtx(LZMA2_ECtx *const enc)
{
if (enc == NULL)
return;
free(enc->hash_buf);
free(enc);
}
#define LITERAL_PROBS(enc, pos, prev_symbol) (enc->states.literal_probs + ((((pos) & enc->lit_pos_mask) << enc->lc) + ((prev_symbol) >> (8 - enc->lc))) * kNumLiterals * kNumLitTables)
#define LEN_TO_DIST_STATE(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1)
#define IS_LIT_STATE(state) ((state) < 7)
HINT_INLINE
unsigned LZMA_getRepLen1Price(LZMA2_ECtx* const enc, size_t const state, size_t const pos_state)
{
unsigned const rep_G0_prob = enc->states.is_rep_G0[state];
unsigned const rep0_long_prob = enc->states.is_rep0_long[state][pos_state];
return GET_PRICE_0(rep_G0_prob) + GET_PRICE_0(rep0_long_prob);
}
static unsigned LZMA_getRepPrice(LZMA2_ECtx* const enc, size_t const rep_index, size_t const state, size_t const pos_state)
{
unsigned price;
unsigned const rep_G0_prob = enc->states.is_rep_G0[state];
if (rep_index == 0) {
unsigned const rep0_long_prob = enc->states.is_rep0_long[state][pos_state];
price = GET_PRICE_0(rep_G0_prob);
price += GET_PRICE_1(rep0_long_prob);
}
else {
unsigned const rep_G1_prob = enc->states.is_rep_G1[state];
price = GET_PRICE_1(rep_G0_prob);
if (rep_index == 1) {
price += GET_PRICE_0(rep_G1_prob);
}
else {
unsigned const rep_G2_prob = enc->states.is_rep_G2[state];
price += GET_PRICE_1(rep_G1_prob);
price += GET_PRICE(rep_G2_prob, rep_index - 2);
}
}
return price;
}
static unsigned LZMA_getRepMatch0Price(LZMA2_ECtx *const enc, size_t const len, size_t const state, size_t const pos_state)
{
unsigned const rep_G0_prob = enc->states.is_rep_G0[state];
unsigned const rep0_long_prob = enc->states.is_rep0_long[state][pos_state];
return enc->states.rep_len_states.prices[pos_state][len - kMatchLenMin]
+ GET_PRICE_0(rep_G0_prob)
+ GET_PRICE_1(rep0_long_prob);
}
static unsigned LZMA_getLiteralPriceMatched(const LZMA2_prob *const prob_table, U32 symbol, unsigned match_byte)
{
unsigned price = 0;
unsigned offs = 0x100;
symbol |= 0x100;
do {
match_byte <<= 1;
price += GET_PRICE(prob_table[offs + (match_byte & offs) + (symbol >> 8)], (symbol >> 7) & 1);
symbol <<= 1;
offs &= ~(match_byte ^ symbol);
} while (symbol < 0x10000);
return price;
}
HINT_INLINE
void LZMA_encodeLiteral(LZMA2_ECtx *const enc, size_t const pos, U32 symbol, unsigned const prev_symbol)
{
RC_encodeBit0(&enc->rc, &enc->states.is_match[enc->states.state][pos & enc->pos_mask]);
enc->states.state = LIT_NEXT_STATE(enc->states.state);
LZMA2_prob* const prob_table = LITERAL_PROBS(enc, pos, prev_symbol);
symbol |= 0x100;
do {
RC_encodeBit(&enc->rc, prob_table + (symbol >> 8), symbol & (1 << 7));
symbol <<= 1;
} while (symbol < 0x10000);
}
HINT_INLINE
void LZMA_encodeLiteralMatched(LZMA2_ECtx *const enc, const BYTE* const data_block, size_t const pos, U32 symbol)
{
RC_encodeBit0(&enc->rc, &enc->states.is_match[enc->states.state][pos & enc->pos_mask]);
enc->states.state = LIT_NEXT_STATE(enc->states.state);
unsigned match_symbol = data_block[pos - enc->states.reps[0] - 1];
LZMA2_prob* const prob_table = LITERAL_PROBS(enc, pos, data_block[pos - 1]);
unsigned offset = 0x100;
symbol |= 0x100;
do {
match_symbol <<= 1;
size_t prob_index = offset + (match_symbol & offset) + (symbol >> 8);
RC_encodeBit(&enc->rc, prob_table + prob_index, symbol & (1 << 7));
symbol <<= 1;
offset &= ~(match_symbol ^ symbol);
} while (symbol < 0x10000);
}
HINT_INLINE
void LZMA_encodeLiteralBuf(LZMA2_ECtx *const enc, const BYTE* const data_block, size_t const pos)
{
U32 const symbol = data_block[pos];
if (IS_LIT_STATE(enc->states.state)) {
unsigned const prev_symbol = data_block[pos - 1];
LZMA_encodeLiteral(enc, pos, symbol, prev_symbol);
}
else {
LZMA_encodeLiteralMatched(enc, data_block, pos, symbol);
}
}
static void LZMA_lengthStates_SetPrices(const LZMA2_prob *probs, U32 start_price, unsigned *prices)
{
for (size_t i = 0; i < 8; i += 2) {
U32 prob = probs[4 + (i >> 1)];
U32 price = start_price + GET_PRICE(probs[1], (i >> 2))
+ GET_PRICE(probs[2 + (i >> 2)], (i >> 1) & 1);
prices[i] = price + GET_PRICE_0(prob);
prices[i + 1] = price + GET_PRICE_1(prob);
}
}
FORCE_NOINLINE
static void LZMA_lengthStates_updatePrices(LZMA2_ECtx *const enc, LZMA2_lenStates* const ls)
{
U32 b;
{
unsigned const prob = ls->choice;
U32 a, c;
b = GET_PRICE_1(prob);
a = GET_PRICE_0(prob);
c = b + GET_PRICE_0(ls->low[0]);
for (size_t pos_state = 0; pos_state <= enc->pos_mask; pos_state++) {
unsigned *const prices = ls->prices[pos_state];
const LZMA2_prob *const probs = ls->low + (pos_state << (1 + kLenNumLowBits));
LZMA_lengthStates_SetPrices(probs, a, prices);
LZMA_lengthStates_SetPrices(probs + kLenNumLowSymbols, c, prices + kLenNumLowSymbols);
}
}
size_t i = ls->table_size;
if (i > kLenNumLowSymbols * 2) {
const LZMA2_prob *const probs = ls->high;
unsigned *const prices = ls->prices[0] + kLenNumLowSymbols * 2;
i = (i - (kLenNumLowSymbols * 2 - 1)) >> 1;
b += GET_PRICE_1(ls->low[0]);
do {
--i;
size_t sym = i + (1 << (kLenNumHighBits - 1));
U32 price = b;
do {
size_t bit = sym & 1;
sym >>= 1;
price += GET_PRICE(probs[sym], bit);
} while (sym >= 2);
unsigned const prob = probs[i + (1 << (kLenNumHighBits - 1))];
prices[i * 2] = price + GET_PRICE_0(prob);
prices[i * 2 + 1] = price + GET_PRICE_1(prob);
} while (i);
size_t const size = (ls->table_size - kLenNumLowSymbols * 2) * sizeof(ls->prices[0][0]);
for (size_t pos_state = 1; pos_state <= enc->pos_mask; pos_state++)
memcpy(ls->prices[pos_state] + kLenNumLowSymbols * 2, ls->prices[0] + kLenNumLowSymbols * 2, size);
}
}
/* Rare enough that not inlining is faster overall */
FORCE_NOINLINE
static void LZMA_encodeLength_MidHigh(LZMA2_ECtx *const enc, LZMA2_lenStates* const len_prob_table, unsigned const len, size_t const pos_state)
{
RC_encodeBit1(&enc->rc, &len_prob_table->choice);
if (len < kLenNumLowSymbols * 2) {
RC_encodeBit0(&enc->rc, &len_prob_table->low[0]);
RC_encodeBitTree(&enc->rc, len_prob_table->low + kLenNumLowSymbols + (pos_state << (1 + kLenNumLowBits)), kLenNumLowBits, len - kLenNumLowSymbols);
}
else {
RC_encodeBit1(&enc->rc, &len_prob_table->low[0]);
RC_encodeBitTree(&enc->rc, len_prob_table->high, kLenNumHighBits, len - kLenNumLowSymbols * 2);
}
}
HINT_INLINE
void LZMA_encodeLength(LZMA2_ECtx *const enc, LZMA2_lenStates* const len_prob_table, unsigned len, size_t const pos_state)
{
len -= kMatchLenMin;
if (len < kLenNumLowSymbols) {
RC_encodeBit0(&enc->rc, &len_prob_table->choice);
RC_encodeBitTree(&enc->rc, len_prob_table->low + (pos_state << (1 + kLenNumLowBits)), kLenNumLowBits, len);
}
else {
LZMA_encodeLength_MidHigh(enc, len_prob_table, len, pos_state);
}
}
FORCE_NOINLINE
static void LZMA_encodeRepMatchShort(LZMA2_ECtx *const enc, size_t const pos_state)
{
DEBUGLOG(7, "LZMA_encodeRepMatchShort");
RC_encodeBit1(&enc->rc, &enc->states.is_match[enc->states.state][pos_state]);
RC_encodeBit1(&enc->rc, &enc->states.is_rep[enc->states.state]);
RC_encodeBit0(&enc->rc, &enc->states.is_rep_G0[enc->states.state]);
RC_encodeBit0(&enc->rc, &enc->states.is_rep0_long[enc->states.state][pos_state]);
enc->states.state = SHORT_REP_NEXT_STATE(enc->states.state);
}
FORCE_NOINLINE
static void LZMA_encodeRepMatchLong(LZMA2_ECtx *const enc, unsigned const len, unsigned const rep, size_t const pos_state)
{
DEBUGLOG(7, "LZMA_encodeRepMatchLong : length %u, rep %u", len, rep);
RC_encodeBit1(&enc->rc, &enc->states.is_match[enc->states.state][pos_state]);
RC_encodeBit1(&enc->rc, &enc->states.is_rep[enc->states.state]);
if (rep == 0) {
RC_encodeBit0(&enc->rc, &enc->states.is_rep_G0[enc->states.state]);
RC_encodeBit1(&enc->rc, &enc->states.is_rep0_long[enc->states.state][pos_state]);
}
else {
U32 const distance = enc->states.reps[rep];
RC_encodeBit1(&enc->rc, &enc->states.is_rep_G0[enc->states.state]);
if (rep == 1) {
RC_encodeBit0(&enc->rc, &enc->states.is_rep_G1[enc->states.state]);
}
else {
RC_encodeBit1(&enc->rc, &enc->states.is_rep_G1[enc->states.state]);
RC_encodeBit(&enc->rc, &enc->states.is_rep_G2[enc->states.state], rep - 2);
if (rep == 3)
enc->states.reps[3] = enc->states.reps[2];
enc->states.reps[2] = enc->states.reps[1];
}
enc->states.reps[1] = enc->states.reps[0];
enc->states.reps[0] = distance;
}
LZMA_encodeLength(enc, &enc->states.rep_len_states, len, pos_state);
enc->states.state = REP_NEXT_STATE(enc->states.state);
++enc->rep_len_price_count;
}
/*
* Distance slot functions based on fastpos.h from XZ
*/
HINT_INLINE
unsigned LZMA_fastDistShift(unsigned const n)
{
return n * (kFastDistBits - 1);
}
HINT_INLINE
unsigned LZMA_fastDistResult(U32 const dist, unsigned const n)
{
return distance_table[dist >> LZMA_fastDistShift(n)]
+ 2 * LZMA_fastDistShift(n);
}
static size_t LZMA_getDistSlot(U32 const distance)
{
U32 limit = 1UL << kFastDistBits;
/* If it is small enough, we can pick the result directly from */
/* the precalculated table. */
if (distance < limit) {
return distance_table[distance];
}
limit <<= LZMA_fastDistShift(1);
if (distance < limit) {
return LZMA_fastDistResult(distance, 1);
}
return LZMA_fastDistResult(distance, 2);
}
/* * */
HINT_INLINE
void LZMA_encodeNormalMatch(LZMA2_ECtx *const enc, unsigned const len, U32 const dist, size_t const pos_state)
{
DEBUGLOG(7, "LZMA_encodeNormalMatch : length %u, dist %u", len, dist);
RC_encodeBit1(&enc->rc, &enc->states.is_match[enc->states.state][pos_state]);
RC_encodeBit0(&enc->rc, &enc->states.is_rep[enc->states.state]);
enc->states.state = MATCH_NEXT_STATE(enc->states.state);
LZMA_encodeLength(enc, &enc->states.len_states, len, pos_state);
size_t const dist_slot = LZMA_getDistSlot(dist);
RC_encodeBitTree(&enc->rc, enc->states.dist_slot_encoders[LEN_TO_DIST_STATE(len)], kNumPosSlotBits, (unsigned)dist_slot);
if (dist_slot >= kStartPosModelIndex) {
unsigned const footer_bits = ((unsigned)(dist_slot >> 1) - 1);
size_t const base = ((2 | (dist_slot & 1)) << footer_bits);
unsigned const dist_reduced = (unsigned)(dist - base);
if (dist_slot < kEndPosModelIndex) {
RC_encodeBitTreeReverse(&enc->rc, enc->states.dist_encoders + base - dist_slot - 1, footer_bits, dist_reduced);
}
else {
RC_encodeDirect(&enc->rc, dist_reduced >> kNumAlignBits, footer_bits - kNumAlignBits);
RC_encodeBitTreeReverse(&enc->rc, enc->states.dist_align_encoders, kNumAlignBits, dist_reduced & kAlignMask);
}
}
enc->states.reps[3] = enc->states.reps[2];
enc->states.reps[2] = enc->states.reps[1];
enc->states.reps[1] = enc->states.reps[0];
enc->states.reps[0] = dist;
++enc->match_price_count;
}
FORCE_INLINE_TEMPLATE
size_t LZMA_encodeChunkFast(LZMA2_ECtx *const enc,
FL2_dataBlock const block,
FL2_matchTable* const tbl,
int const struct_tbl,
size_t pos,
size_t const uncompressed_end)
{
size_t const pos_mask = enc->pos_mask;
size_t prev = pos;
unsigned const search_depth = tbl->params.depth;
while (pos < uncompressed_end && enc->rc.out_index < enc->chunk_size) {
size_t max_len;
const BYTE* data;
/* Table of distance restrictions for short matches */
static const U32 max_dist_table[] = { 0, 0, 0, 1 << 6, 1 << 14 };
/* Get a match from the table, extended to its full length */
RMF_match best_match = RMF_getMatch(block, tbl, search_depth, struct_tbl, pos);
if (best_match.length < kMatchLenMin) {
++pos;
continue;
}
/* Use if near enough */
if (best_match.length >= 5 || best_match.dist < max_dist_table[best_match.length])
best_match.dist += kNumReps;
else
best_match.length = 0;
max_len = MIN(kMatchLenMax, block.end - pos);
data = block.data + pos;
RMF_match best_rep = { 0, 0 };
RMF_match rep_match;
/* Search all of the rep distances */
for (rep_match.dist = 0; rep_match.dist < kNumReps; ++rep_match.dist) {
const BYTE *data_2 = data - enc->states.reps[rep_match.dist] - 1;
if (MEM_read16(data) != MEM_read16(data_2))
continue;
rep_match.length = (U32)(ZSTD_count(data + 2, data_2 + 2, data + max_len) + 2);
if (rep_match.length >= max_len) {
best_match = rep_match;
goto _encode;
}
if (rep_match.length > best_rep.length)
best_rep = rep_match;
}
/* Encode if it is kMatchLenMax or completes the block */
if (best_match.length >= max_len)
goto _encode;
if (best_rep.length >= 2) {
if (best_rep.length > best_match.length) {
best_match = best_rep;
}
else {
/* Modified ZSTD scheme for estimating cost */
int const gain2 = (int)(best_rep.length * 3 - best_rep.dist);
int const gain1 = (int)(best_match.length * 3 - ZSTD_highbit32(best_match.dist + 1) + 1);
if (gain2 > gain1) {
DEBUGLOG(7, "Replace match (%u, %u) with rep (%u, %u)", best_match.length, best_match.dist, best_rep.length, best_rep.dist);
best_match = best_rep;
}
}
}
if (best_match.length < kMatchLenMin) {
++pos;
continue;
}
for (size_t next = pos + 1; best_match.length < kMatchLenMax && next < uncompressed_end; ++next) {
/* lazy matching scheme from ZSTD */
RMF_match next_match = RMF_getNextMatch(block, tbl, search_depth, struct_tbl, next);
if (next_match.length >= kMatchLenMin) {
best_rep.length = 0;
data = block.data + next;
max_len = MIN(kMatchLenMax, block.end - next);
for (rep_match.dist = 0; rep_match.dist < kNumReps; ++rep_match.dist) {
const BYTE *data_2 = data - enc->states.reps[rep_match.dist] - 1;
if (MEM_read16(data) != MEM_read16(data_2))
continue;
rep_match.length = (U32)(ZSTD_count(data + 2, data_2 + 2, data + max_len) + 2);
if (rep_match.length > best_rep.length)
best_rep = rep_match;
}
if (best_rep.length >= 3) {
int const gain2 = (int)(best_rep.length * 3 - best_rep.dist);
int const gain1 = (int)(best_match.length * 3 - ZSTD_highbit32((U32)best_match.dist + 1) + 1);
if (gain2 > gain1) {
DEBUGLOG(7, "Replace match (%u, %u) with rep (%u, %u)", best_match.length, best_match.dist, best_rep.length, best_rep.dist);
best_match = best_rep;
pos = next;
}
}
if (next_match.length >= 3 && next_match.dist != best_match.dist) {
int const gain2 = (int)(next_match.length * 4 - ZSTD_highbit32((U32)next_match.dist + 1)); /* raw approx */
int const gain1 = (int)(best_match.length * 4 - ZSTD_highbit32((U32)best_match.dist + 1) + 4);
if (gain2 > gain1) {
DEBUGLOG(7, "Replace match (%u, %u) with match (%u, %u)", best_match.length, best_match.dist, next_match.length, next_match.dist + kNumReps);
best_match = next_match;
best_match.dist += kNumReps;
pos = next;
continue;
}
}
}
++next;
/* Recheck next < uncompressed_end. uncompressed_end could be block.end so decrementing the max chunk size won't obviate the need. */
if (next >= uncompressed_end)
break;
next_match = RMF_getNextMatch(block, tbl, search_depth, struct_tbl, next);
if (next_match.length < 4)
break;
data = block.data + next;
max_len = MIN(kMatchLenMax, block.end - next);
best_rep.length = 0;
for (rep_match.dist = 0; rep_match.dist < kNumReps; ++rep_match.dist) {
const BYTE *data_2 = data - enc->states.reps[rep_match.dist] - 1;
if (MEM_read16(data) != MEM_read16(data_2))
continue;
rep_match.length = (U32)(ZSTD_count(data + 2, data_2 + 2, data + max_len) + 2);
if (rep_match.length > best_rep.length)
best_rep = rep_match;
}
if (best_rep.length >= 4) {
int const gain2 = (int)(best_rep.length * 4 - (best_rep.dist >> 1));
int const gain1 = (int)(best_match.length * 4 - ZSTD_highbit32((U32)best_match.dist + 1) + 1);
if (gain2 > gain1) {
DEBUGLOG(7, "Replace match (%u, %u) with rep (%u, %u)", best_match.length, best_match.dist, best_rep.length, best_rep.dist);
best_match = best_rep;
pos = next;
}
}
if (next_match.dist != best_match.dist) {
int const gain2 = (int)(next_match.length * 4 - ZSTD_highbit32((U32)next_match.dist + 1));
int const gain1 = (int)(best_match.length * 4 - ZSTD_highbit32((U32)best_match.dist + 1) + 7);
if (gain2 > gain1) {
DEBUGLOG(7, "Replace match (%u, %u) with match (%u, %u)", best_match.length, best_match.dist, next_match.length, next_match.dist + kNumReps);
best_match = next_match;
best_match.dist += kNumReps;
pos = next;
continue;
}
}
break;
}
_encode:
assert(pos + best_match.length <= block.end);
while (prev < pos) {
if (enc->rc.out_index >= enc->chunk_limit)
return prev;
if (block.data[prev] != block.data[prev - enc->states.reps[0] - 1]) {
LZMA_encodeLiteralBuf(enc, block.data, prev);
++prev;
}
else {
LZMA_encodeRepMatchShort(enc, prev & pos_mask);
++prev;
}
}
if(best_match.length >= kMatchLenMin) {
if (best_match.dist >= kNumReps) {
LZMA_encodeNormalMatch(enc, best_match.length, best_match.dist - kNumReps, pos & pos_mask);
pos += best_match.length;
prev = pos;
}
else {
LZMA_encodeRepMatchLong(enc, best_match.length, best_match.dist, pos & pos_mask);
pos += best_match.length;
prev = pos;
}
}
}
while (prev < pos && enc->rc.out_index < enc->chunk_limit) {
if (block.data[prev] != block.data[prev - enc->states.reps[0] - 1])
LZMA_encodeLiteralBuf(enc, block.data, prev);
else
LZMA_encodeRepMatchShort(enc, prev & pos_mask);
++prev;
}
return prev;
}
/*
* Reverse the direction of the linked list generated by the optimal parser
*/
FORCE_NOINLINE
static void LZMA_reverseOptimalChain(LZMA2_node* const opt_buf, size_t cur)
{
unsigned len = (unsigned)opt_buf[cur].len;
U32 dist = opt_buf[cur].dist;
for(;;) {
unsigned const extra = (unsigned)opt_buf[cur].extra;
cur -= len;
if (extra) {
opt_buf[cur].len = (U32)len;
len = extra;
if (extra == 1) {
opt_buf[cur].dist = dist;
dist = kNullDist;
--cur;
}
else {
opt_buf[cur].dist = 0;
--cur;
--len;
opt_buf[cur].dist = kNullDist;
opt_buf[cur].len = 1;
cur -= len;
}
}
unsigned const next_len = opt_buf[cur].len;
U32 const next_dist = opt_buf[cur].dist;
opt_buf[cur].dist = dist;
opt_buf[cur].len = (U32)len;
if (cur == 0)
break;
len = next_len;
dist = next_dist;
}
}
static unsigned LZMA_getLiteralPrice(LZMA2_ECtx *const enc, size_t const pos, size_t const state, unsigned const prev_symbol, U32 symbol, unsigned const match_byte)
{
const LZMA2_prob* const prob_table = LITERAL_PROBS(enc, pos, prev_symbol);
if (IS_LIT_STATE(state)) {
unsigned price = 0;
symbol |= 0x100;
do {
price += GET_PRICE(prob_table[symbol >> 8], (symbol >> 7) & 1);
symbol <<= 1;
} while (symbol < 0x10000);
return price;
}
return LZMA_getLiteralPriceMatched(prob_table, symbol, match_byte);
}
/*
* Reset the hash object for encoding a new slice of a block
*/
static void LZMA_hashReset(LZMA2_ECtx *const enc, unsigned const dictionary_bits_3)
{
enc->hash_dict_3 = (ptrdiff_t)1 << dictionary_bits_3;
enc->chain_mask_3 = enc->hash_dict_3 - 1;
memset(enc->hash_buf->table_3, 0xFF, sizeof(enc->hash_buf->table_3));
}
/*
* Create hash table and chain with dict size dictionary_bits_3. Frees any existing object.
*/
static int LZMA_hashCreate(LZMA2_ECtx *const enc, unsigned const dictionary_bits_3)
{
DEBUGLOG(3, "Create hash chain : dict bits %u", dictionary_bits_3);
if (enc->hash_buf)
free(enc->hash_buf);
enc->hash_alloc_3 = (ptrdiff_t)1 << dictionary_bits_3;
enc->hash_buf = malloc(sizeof(LZMA2_hc3) + (enc->hash_alloc_3 - 1) * sizeof(S32));
if (enc->hash_buf == NULL)
return 1;
LZMA_hashReset(enc, dictionary_bits_3);
return 0;
}
/* Create a hash chain for hybrid mode if options require one.
* Used for allocating before compression begins. Any existing table will be reused if
* it is at least as large as required.
*/
int LZMA2_hashAlloc(LZMA2_ECtx *const enc, const FL2_lzma2Parameters* const options)
{
if (enc->strategy == FL2_ultra && enc->hash_alloc_3 < ((ptrdiff_t)1 << options->second_dict_bits))
return LZMA_hashCreate(enc, options->second_dict_bits);
return 0;
}
#define GET_HASH_3(data) ((((MEM_readLE32(data)) << 8) * 506832829U) >> (32 - kHash3Bits))
/* Find matches nearer than the match from the RMF. If none is at least as long as
* the RMF match (most likely), insert that match at the end of the list.
*/
HINT_INLINE
size_t LZMA_hashGetMatches(LZMA2_ECtx *const enc, FL2_dataBlock const block,
ptrdiff_t const pos,
size_t const length_limit,
RMF_match const match)
{
ptrdiff_t const hash_dict_3 = enc->hash_dict_3;
const BYTE* data = block.data;
LZMA2_hc3* const tbl = enc->hash_buf;
ptrdiff_t const chain_mask_3 = enc->chain_mask_3;
enc->match_count = 0;
enc->hash_prev_index = MAX(enc->hash_prev_index, pos - hash_dict_3);
/* Update hash tables and chains for any positions that were skipped */
while (++enc->hash_prev_index < pos) {
size_t hash = GET_HASH_3(data + enc->hash_prev_index);
tbl->hash_chain_3[enc->hash_prev_index & chain_mask_3] = tbl->table_3[hash];
tbl->table_3[hash] = (S32)enc->hash_prev_index;
}
data += pos;
size_t const hash = GET_HASH_3(data);
ptrdiff_t const first_3 = tbl->table_3[hash];
tbl->table_3[hash] = (S32)pos;
size_t max_len = 2;
if (first_3 >= 0) {
int cycles = enc->match_cycles;
ptrdiff_t const end_index = pos - (((ptrdiff_t)match.dist < hash_dict_3) ? match.dist : hash_dict_3);
ptrdiff_t match_3 = first_3;
if (match_3 >= end_index) {
do {
--cycles;
const BYTE* data_2 = block.data + match_3;
size_t len_test = ZSTD_count(data + 1, data_2 + 1, data + length_limit) + 1;
if (len_test > max_len) {
enc->matches[enc->match_count].length = (U32)len_test;
enc->matches[enc->match_count].dist = (U32)(pos - match_3 - 1);
++enc->match_count;
max_len = len_test;
if (len_test >= length_limit)
break;
}
if (cycles <= 0)
break;
match_3 = tbl->hash_chain_3[match_3 & chain_mask_3];
} while (match_3 >= end_index);
}
}
tbl->hash_chain_3[pos & chain_mask_3] = (S32)first_3;
if ((unsigned)max_len < match.length) {
/* Insert the match from the RMF */
enc->matches[enc->match_count] = match;
++enc->match_count;
return match.length;
}
return max_len;
}
/* The speed of this function is critical. The sections have many variables
* in common, so breaking it up into shorter functions is not feasible.
* For each position cur, starting at 1, check some or all possible
* encoding choices - a literal, 1-byte rep 0 match, all rep match lengths, and
* all match lengths at available distances. It also checks the combined
* sequences literal+rep0, rep+lit+rep0 and match+lit+rep0.
* If is_hybrid != 0, this method works in hybrid mode, using the
* hash chain to find shorter matches at near distances. */
FORCE_INLINE_TEMPLATE
size_t LZMA_optimalParse(LZMA2_ECtx* const enc, FL2_dataBlock const block,
RMF_match match,
size_t const pos,
size_t const cur,
size_t len_end,
int const is_hybrid,
U32* const reps)
{
LZMA2_node* const cur_opt = &enc->opt_buf[cur];
size_t const pos_mask = enc->pos_mask;
size_t const pos_state = (pos & pos_mask);
const BYTE* const data = block.data + pos;
size_t const fast_length = enc->fast_length;
size_t prev_index = cur - cur_opt->len;
size_t state;
size_t bytes_avail;
U32 match_price;
U32 rep_match_price;
/* Update the states according to how this location was reached */
if (cur_opt->len == 1) {
/* Literal or 1-byte rep */
const BYTE *next_state = (cur_opt->dist == 0) ? kShortRepNextStates : kLiteralNextStates;
state = next_state[enc->opt_buf[prev_index].state];
}
else {
/* Match or rep match */
size_t const dist = cur_opt->dist;
if (cur_opt->extra) {
prev_index -= cur_opt->extra;
state = kState_RepAfterLit - ((dist >= kNumReps) & (cur_opt->extra == 1));
}
else {
state = enc->opt_buf[prev_index].state;
state = MATCH_NEXT_STATE(state) + (dist < kNumReps);
}
const LZMA2_node *const prev_opt = &enc->opt_buf[prev_index];
if (dist < kNumReps) {
/* Move the chosen rep to the front.
* The table is hideous but faster than branching :D */
reps[0] = prev_opt->reps[dist];
size_t table = 1 | (2 << 2) | (3 << 4)
| (0 << 8) | (2 << 10) | (3 << 12)
| (0L << 16) | (1L << 18) | (3L << 20)
| (0L << 24) | (1L << 26) | (2L << 28);
table >>= (dist << 3);
reps[1] = prev_opt->reps[table & 3];
table >>= 2;
reps[2] = prev_opt->reps[table & 3];
table >>= 2;
reps[3] = prev_opt->reps[table & 3];
}
else {
reps[0] = (U32)(dist - kNumReps);
reps[1] = prev_opt->reps[0];
reps[2] = prev_opt->reps[1];
reps[3] = prev_opt->reps[2];
}
}
cur_opt->state = state;
memcpy(cur_opt->reps, reps, sizeof(cur_opt->reps));
LZMA2_prob const is_rep_prob = enc->states.is_rep[state];
{ LZMA2_node *const next_opt = &enc->opt_buf[cur + 1];
U32 const cur_price = cur_opt->price;
U32 const next_price = next_opt->price;
LZMA2_prob const is_match_prob = enc->states.is_match[state][pos_state];
unsigned const cur_byte = *data;
unsigned const match_byte = *(data - reps[0] - 1);
U32 cur_and_lit_price = cur_price + GET_PRICE_0(is_match_prob);
/* This is a compromise to try to filter out cases where literal + rep0 is unlikely to be cheaper */
BYTE try_lit = cur_and_lit_price + kMinLitPrice / 2U <= next_price;
if (try_lit) {
/* cur_and_lit_price is used later for the literal + rep0 test */
cur_and_lit_price += LZMA_getLiteralPrice(enc, pos, state, data[-1], cur_byte, match_byte);
/* Try literal */
if (cur_and_lit_price < next_price) {
next_opt->price = cur_and_lit_price;
next_opt->len = 1;
MARK_LITERAL(*next_opt);
if (is_hybrid) /* Evaluates as a constant expression due to inlining */
try_lit = 0;
}
}
match_price = cur_price + GET_PRICE_1(is_match_prob);
rep_match_price = match_price + GET_PRICE_1(is_rep_prob);
if (match_byte == cur_byte) {
/* Try 1-byte rep0 */
U32 short_rep_price = rep_match_price + LZMA_getRepLen1Price(enc, state, pos_state);
if (short_rep_price <= next_opt->price) {
next_opt->price = short_rep_price;
next_opt->len = 1;
MARK_SHORT_REP(*next_opt);
}
}
bytes_avail = MIN(block.end - pos, kOptimizerBufferSize - 1 - cur);
if (bytes_avail < 2)
return len_end;
/* If match_byte == cur_byte a rep0 begins at the current position */
if (is_hybrid && try_lit && match_byte != cur_byte) {
/* Try literal + rep0 */
const BYTE *const data_2 = data - reps[0];
size_t limit = MIN(bytes_avail - 1, fast_length);
size_t len_test_2 = ZSTD_count(data + 1, data_2, data + 1 + limit);
if (len_test_2 >= 2) {
size_t const state_2 = LIT_NEXT_STATE(state);
size_t const pos_state_next = (pos + 1) & pos_mask;
U32 const next_rep_match_price = cur_and_lit_price +
GET_PRICE_1(enc->states.is_match[state_2][pos_state_next]) +
GET_PRICE_1(enc->states.is_rep[state_2]);
U32 const cur_and_len_price = next_rep_match_price + LZMA_getRepMatch0Price(enc, len_test_2, state_2, pos_state_next);
size_t const offset = cur + 1 + len_test_2;
if (cur_and_len_price < enc->opt_buf[offset].price) {
len_end = MAX(len_end, offset);
enc->opt_buf[offset].price = cur_and_len_price;
enc->opt_buf[offset].len = (unsigned)len_test_2;
enc->opt_buf[offset].dist = 0;
enc->opt_buf[offset].extra = 1;
}
}
}
}
size_t const max_length = MIN(bytes_avail, fast_length);
size_t start_len = 2;
if (match.length > 0) {
size_t len_test;
size_t len;
U32 cur_rep_price;
for (size_t rep_index = 0; rep_index < kNumReps; ++rep_index) {
const BYTE *const data_2 = data - reps[rep_index] - 1;
if (MEM_read16(data) != MEM_read16(data_2))
continue;
/* Test is limited to fast_length, but it is rare for the RMF to miss the longest match,
* therefore this function is rarely called when a rep len > fast_length exists */
len_test = ZSTD_count(data + 2, data_2 + 2, data + max_length) + 2;
len_end = MAX(len_end, cur + len_test);
cur_rep_price = rep_match_price + LZMA_getRepPrice(enc, rep_index, state, pos_state);
len = 2;
/* Try rep match */
do {
U32 const cur_and_len_price = cur_rep_price + enc->states.rep_len_states.prices[pos_state][len - kMatchLenMin];
LZMA2_node *const opt = &enc->opt_buf[cur + len];
if (cur_and_len_price < opt->price) {
opt->price = cur_and_len_price;
opt->len = (unsigned)len;
opt->dist = (U32)rep_index;
opt->extra = 0;
}
} while (++len <= len_test);
if (rep_index == 0) {
/* Save time by exluding normal matches not longer than the rep */
start_len = len_test + 1;
}
/* rep + literal + rep0 is not common so this test is skipped for faster, non-hybrid encoding */
if (is_hybrid && len_test + 3 <= bytes_avail && MEM_read16(data + len_test + 1) == MEM_read16(data_2 + len_test + 1)) {
/* Try rep + literal + rep0.
* The second rep may be > fast_length, but it is not worth the extra time to handle this case
* and the price table is not filled for it */
size_t const len_test_2 = ZSTD_count(data + len_test + 3,
data_2 + len_test + 3,
data + MIN(len_test + 1 + fast_length, bytes_avail)) + 2;
size_t state_2 = REP_NEXT_STATE(state);
size_t pos_state_next = (pos + len_test) & pos_mask;
U32 rep_lit_rep_total_price =
cur_rep_price + enc->states.rep_len_states.prices[pos_state][len_test - kMatchLenMin]
+ GET_PRICE_0(enc->states.is_match[state_2][pos_state_next])
+ LZMA_getLiteralPriceMatched(LITERAL_PROBS(enc, pos + len_test, data[len_test - 1]),
data[len_test], data_2[len_test]);
state_2 = kState_LitAfterRep;
pos_state_next = (pos + len_test + 1) & pos_mask;
rep_lit_rep_total_price +=
GET_PRICE_1(enc->states.is_match[state_2][pos_state_next]) +
GET_PRICE_1(enc->states.is_rep[state_2]);
size_t const offset = cur + len_test + 1 + len_test_2;
rep_lit_rep_total_price += LZMA_getRepMatch0Price(enc, len_test_2, state_2, pos_state_next);
if (rep_lit_rep_total_price < enc->opt_buf[offset].price) {
len_end = MAX(len_end, offset);
enc->opt_buf[offset].price = rep_lit_rep_total_price;
enc->opt_buf[offset].len = (unsigned)len_test_2;
enc->opt_buf[offset].dist = (U32)rep_index;
enc->opt_buf[offset].extra = (unsigned)(len_test + 1);
}
}
}
}
if (match.length >= start_len && max_length >= start_len) {
/* Try normal match */
U32 const normal_match_price = match_price + GET_PRICE_0(is_rep_prob);
if (!is_hybrid) {
/* Normal mode - single match */
size_t const length = MIN(match.length, max_length);
size_t const cur_dist = match.dist;
size_t const dist_slot = LZMA_getDistSlot(match.dist);
size_t len_test = length;
len_end = MAX(len_end, cur + length);
for (; len_test >= start_len; --len_test) {
U32 cur_and_len_price = normal_match_price + enc->states.len_states.prices[pos_state][len_test - kMatchLenMin];
size_t const len_to_dist_state = LEN_TO_DIST_STATE(len_test);
if (cur_dist < kNumFullDistances)
cur_and_len_price += enc->distance_prices[len_to_dist_state][cur_dist];
else
cur_and_len_price += enc->dist_slot_prices[len_to_dist_state][dist_slot] + enc->align_prices[cur_dist & kAlignMask];
LZMA2_node *const opt = &enc->opt_buf[cur + len_test];
if (cur_and_len_price < opt->price) {
opt->price = cur_and_len_price;
opt->len = (unsigned)len_test;
opt->dist = (U32)(cur_dist + kNumReps);
opt->extra = 0;
}
else break;
}
}
else {
/* Hybrid mode */
size_t main_len;
match.length = MIN(match.length, (U32)max_length);
/* Need to test max_length < 4 because the hash fn reads a U32 */
if (match.length < 3 || max_length < 4) {
enc->matches[0] = match;
enc->match_count = 1;
main_len = match.length;
}
else {
main_len = LZMA_hashGetMatches(enc, block, pos, max_length, match);
}
ptrdiff_t match_index = enc->match_count - 1;
len_end = MAX(len_end, cur + main_len);
/* Start with a match longer than the best rep if one exists */
ptrdiff_t start_match = 0;
while (start_len > enc->matches[start_match].length)
++start_match;
enc->matches[start_match - 1].length = (U32)start_len - 1; /* Avoids an if..else branch in the loop. [-1] is ok */
for (; match_index >= start_match; --match_index) {
size_t len_test = enc->matches[match_index].length;
size_t const cur_dist = enc->matches[match_index].dist;
const BYTE *const data_2 = data - cur_dist - 1;
size_t const rep_0_pos = len_test + 1;
size_t dist_slot = LZMA_getDistSlot((U32)cur_dist);
U32 cur_and_len_price;
/* Test from the full length down to 1 more than the next shorter match */
size_t base_len = enc->matches[match_index - 1].length + 1;
for (; len_test >= base_len; --len_test) {
cur_and_len_price = normal_match_price + enc->states.len_states.prices[pos_state][len_test - kMatchLenMin];
size_t const len_to_dist_state = LEN_TO_DIST_STATE(len_test);
if (cur_dist < kNumFullDistances)
cur_and_len_price += enc->distance_prices[len_to_dist_state][cur_dist];
else
cur_and_len_price += enc->dist_slot_prices[len_to_dist_state][dist_slot] + enc->align_prices[cur_dist & kAlignMask];
BYTE const sub_len = len_test < enc->matches[match_index].length;
LZMA2_node *const opt = &enc->opt_buf[cur + len_test];
if (cur_and_len_price < opt->price) {
opt->price = cur_and_len_price;
opt->len = (unsigned)len_test;
opt->dist = (U32)(cur_dist + kNumReps);
opt->extra = 0;
}
else if(sub_len)
break; /* End the tests if prices for shorter lengths are not lower than those already recorded */
if (!sub_len && rep_0_pos + 2 <= bytes_avail && MEM_read16(data + rep_0_pos) == MEM_read16(data_2 + rep_0_pos)) {
/* Try match + literal + rep0 */
size_t const limit = MIN(rep_0_pos + fast_length, bytes_avail);
size_t const len_test_2 = ZSTD_count(data + rep_0_pos + 2, data_2 + rep_0_pos + 2, data + limit) + 2;
size_t state_2 = MATCH_NEXT_STATE(state);
size_t pos_state_next = (pos + len_test) & pos_mask;
U32 match_lit_rep_total_price = cur_and_len_price +
GET_PRICE_0(enc->states.is_match[state_2][pos_state_next]) +
LZMA_getLiteralPriceMatched(LITERAL_PROBS(enc, pos + len_test, data[len_test - 1]),
data[len_test], data_2[len_test]);
state_2 = kState_LitAfterMatch;
pos_state_next = (pos_state_next + 1) & pos_mask;
match_lit_rep_total_price +=
GET_PRICE_1(enc->states.is_match[state_2][pos_state_next]) +
GET_PRICE_1(enc->states.is_rep[state_2]);
size_t const offset = cur + rep_0_pos + len_test_2;
match_lit_rep_total_price += LZMA_getRepMatch0Price(enc, len_test_2, state_2, pos_state_next);
if (match_lit_rep_total_price < enc->opt_buf[offset].price) {
len_end = MAX(len_end, offset);
enc->opt_buf[offset].price = match_lit_rep_total_price;
enc->opt_buf[offset].len = (unsigned)len_test_2;
enc->opt_buf[offset].extra = (unsigned)rep_0_pos;
enc->opt_buf[offset].dist = (U32)(cur_dist + kNumReps);
}
}
}
}
}
}
return len_end;
}
FORCE_NOINLINE
static void LZMA_initMatchesPos0(LZMA2_ECtx *const enc,
RMF_match const match,
size_t const pos_state,
size_t len,
unsigned const normal_match_price)
{
if ((unsigned)len <= match.length) {
size_t const distance = match.dist;
size_t const slot = LZMA_getDistSlot(match.dist);
/* Test every available length of the match */
do {
unsigned cur_and_len_price = normal_match_price + enc->states.len_states.prices[pos_state][len - kMatchLenMin];
size_t const len_to_dist_state = LEN_TO_DIST_STATE(len);
if (distance < kNumFullDistances)
cur_and_len_price += enc->distance_prices[len_to_dist_state][distance];
else
cur_and_len_price += enc->align_prices[distance & kAlignMask] + enc->dist_slot_prices[len_to_dist_state][slot];
if (cur_and_len_price < enc->opt_buf[len].price) {
enc->opt_buf[len].price = cur_and_len_price;
enc->opt_buf[len].len = (unsigned)len;
enc->opt_buf[len].dist = (U32)(distance + kNumReps);
enc->opt_buf[len].extra = 0;
}
++len;
} while ((U32)len <= match.length);
}
}
FORCE_NOINLINE
static size_t LZMA_initMatchesPos0Best(LZMA2_ECtx *const enc, FL2_dataBlock const block,
RMF_match const match,
size_t const pos,
size_t start_len,
unsigned const normal_match_price)
{
if (start_len <= match.length) {
size_t main_len;
if (match.length < 3 || block.end - pos < 4) {
enc->matches[0] = match;
enc->match_count = 1;
main_len = match.length;
}
else {
main_len = LZMA_hashGetMatches(enc, block, pos, MIN(block.end - pos, enc->fast_length), match);
}
ptrdiff_t start_match = 0;
while (start_len > enc->matches[start_match].length)
++start_match;
enc->matches[start_match - 1].length = (U32)start_len - 1; /* Avoids an if..else branch in the loop. [-1] is ok */
size_t pos_state = pos & enc->pos_mask;
for (ptrdiff_t match_index = enc->match_count - 1; match_index >= start_match; --match_index) {
size_t len_test = enc->matches[match_index].length;
size_t const distance = enc->matches[match_index].dist;
size_t const slot = LZMA_getDistSlot((U32)distance);
size_t const base_len = enc->matches[match_index - 1].length + 1;
/* Test every available match length at the shortest distance. The buffer is sorted */
/* in order of increasing length, and therefore increasing distance too. */
for (; len_test >= base_len; --len_test) {
unsigned cur_and_len_price = normal_match_price
+ enc->states.len_states.prices[pos_state][len_test - kMatchLenMin];
size_t const len_to_dist_state = LEN_TO_DIST_STATE(len_test);
if (distance < kNumFullDistances)
cur_and_len_price += enc->distance_prices[len_to_dist_state][distance];
else
cur_and_len_price += enc->align_prices[distance & kAlignMask] + enc->dist_slot_prices[len_to_dist_state][slot];
if (cur_and_len_price < enc->opt_buf[len_test].price) {
enc->opt_buf[len_test].price = cur_and_len_price;
enc->opt_buf[len_test].len = (unsigned)len_test;
enc->opt_buf[len_test].dist = (U32)(distance + kNumReps);
enc->opt_buf[len_test].extra = 0;
}
else break;
}
}
return main_len;
}
return 0;
}
/* Test all available options at position 0 of the optimizer buffer.
* The prices at this point are all initialized to kInfinityPrice.
* This function must not be called at a position where no match is
* available. */
FORCE_INLINE_TEMPLATE
size_t LZMA_initOptimizerPos0(LZMA2_ECtx *const enc, FL2_dataBlock const block,
RMF_match const match,
size_t const pos,
int const is_hybrid,
U32* const reps)
{
size_t const max_length = MIN(block.end - pos, kMatchLenMax);
const BYTE *const data = block.data + pos;
const BYTE *data_2;
size_t rep_max_index = 0;
size_t rep_lens[kNumReps];
/* Find any rep matches */
for (size_t i = 0; i < kNumReps; ++i) {
reps[i] = enc->states.reps[i];
data_2 = data - reps[i] - 1;
if (MEM_read16(data) != MEM_read16(data_2)) {
rep_lens[i] = 0;
continue;
}
rep_lens[i] = ZSTD_count(data + 2, data_2 + 2, data + max_length) + 2;
if (rep_lens[i] > rep_lens[rep_max_index])
rep_max_index = i;
}
if (rep_lens[rep_max_index] >= enc->fast_length) {
enc->opt_buf[0].len = (unsigned)(rep_lens[rep_max_index]);
enc->opt_buf[0].dist = (U32)rep_max_index;
return 0;
}
if (match.length >= enc->fast_length) {
enc->opt_buf[0].len = match.length;
enc->opt_buf[0].dist = match.dist + kNumReps;
return 0;
}
unsigned const cur_byte = *data;
unsigned const match_byte = *(data - reps[0] - 1);
size_t const state = enc->states.state;
size_t const pos_state = pos & enc->pos_mask;
LZMA2_prob const is_match_prob = enc->states.is_match[state][pos_state];
LZMA2_prob const is_rep_prob = enc->states.is_rep[state];
enc->opt_buf[0].state = state;
/* Set the price for literal */
enc->opt_buf[1].price = GET_PRICE_0(is_match_prob) +
LZMA_getLiteralPrice(enc, pos, state, data[-1], cur_byte, match_byte);
MARK_LITERAL(enc->opt_buf[1]);
unsigned const match_price = GET_PRICE_1(is_match_prob);
unsigned const rep_match_price = match_price + GET_PRICE_1(is_rep_prob);
if (match_byte == cur_byte) {
/* Try 1-byte rep0 */
unsigned const short_rep_price = rep_match_price + LZMA_getRepLen1Price(enc, state, pos_state);
if (short_rep_price < enc->opt_buf[1].price) {
enc->opt_buf[1].price = short_rep_price;
MARK_SHORT_REP(enc->opt_buf[1]);
}
}
memcpy(enc->opt_buf[0].reps, reps, sizeof(enc->opt_buf[0].reps));
enc->opt_buf[1].len = 1;
/* Test the rep match prices */
for (size_t i = 0; i < kNumReps; ++i) {
size_t rep_len = rep_lens[i];
if (rep_len < 2)
continue;
unsigned const price = rep_match_price + LZMA_getRepPrice(enc, i, state, pos_state);
/* Test every available length of the rep */
do {
unsigned const cur_and_len_price = price + enc->states.rep_len_states.prices[pos_state][rep_len - kMatchLenMin];
if (cur_and_len_price < enc->opt_buf[rep_len].price) {
enc->opt_buf[rep_len].price = cur_and_len_price;
enc->opt_buf[rep_len].len = (unsigned)rep_len;
enc->opt_buf[rep_len].dist = (U32)i;
enc->opt_buf[rep_len].extra = 0;
}
} while (--rep_len >= kMatchLenMin);
}
unsigned const normal_match_price = match_price + GET_PRICE_0(is_rep_prob);
size_t const len = (rep_lens[0] >= 2) ? rep_lens[0] + 1 : 2;
/* Test the match prices */
if (!is_hybrid) {
/* Normal mode */
LZMA_initMatchesPos0(enc, match, pos_state, len, normal_match_price);
return MAX(match.length, rep_lens[rep_max_index]);
}
else {
/* Hybrid mode */
size_t main_len = LZMA_initMatchesPos0Best(enc, block, match, pos, len, normal_match_price);
return MAX(main_len, rep_lens[rep_max_index]);
}
}
FORCE_INLINE_TEMPLATE
size_t LZMA_encodeOptimumSequence(LZMA2_ECtx *const enc, FL2_dataBlock const block,
FL2_matchTable* const tbl,
int const struct_tbl,
int const is_hybrid,
size_t start_index,
size_t const uncompressed_end,
RMF_match match)
{
size_t len_end = enc->len_end_max;
unsigned const search_depth = tbl->params.depth;
do {
size_t const pos_mask = enc->pos_mask;
/* Reset all prices that were set last time */
for (; (len_end & 3) != 0; --len_end)
enc->opt_buf[len_end].price = kInfinityPrice;
for (; len_end >= 4; len_end -= 4) {
enc->opt_buf[len_end].price = kInfinityPrice;
enc->opt_buf[len_end - 1].price = kInfinityPrice;
enc->opt_buf[len_end - 2].price = kInfinityPrice;
enc->opt_buf[len_end - 3].price = kInfinityPrice;
}
/* Set everything up at position 0 */
size_t pos = start_index;
U32 reps[kNumReps];
len_end = LZMA_initOptimizerPos0(enc, block, match, pos, is_hybrid, reps);
match.length = 0;
size_t cur = 1;
/* len_end == 0 if a match of fast_length was found */
if (len_end > 0) {
++pos;
for (; cur < len_end; ++cur, ++pos) {
/* Terminate if the farthest calculated price is too near the buffer end */
if (len_end >= kOptimizerBufferSize - kOptimizerEndSize) {
U32 price = enc->opt_buf[cur].price;
/* This is a compromise to favor more distant end points
* even if the price is a bit higher */
U32 const delta = price / (U32)cur / 2U;
for (size_t j = cur + 1; j <= len_end; j++) {
U32 const price2 = enc->opt_buf[j].price;
if (price >= price2) {
price = price2;
cur = j;
}
price += delta;
}
break;
}
/* Skip ahead if a lower or equal price is available at greater distance */
size_t const end = MIN(cur + kOptimizerSkipSize, len_end);
U32 price = enc->opt_buf[cur].price;
for (size_t j = cur + 1; j <= end; j++) {
U32 const price2 = enc->opt_buf[j].price;
if (price >= price2) {
price = price2;
pos += j - cur;
cur = j;
if (cur == len_end)
goto reverse;
}
}
match = RMF_getMatch(block, tbl, search_depth, struct_tbl, pos);
if (match.length >= enc->fast_length)
break;
len_end = LZMA_optimalParse(enc, block, match, pos, cur, len_end, is_hybrid, reps);
}
reverse:
DEBUGLOG(6, "End optimal parse at %u", (U32)cur);
LZMA_reverseOptimalChain(enc->opt_buf, cur);
}
/* Encode the selections in the buffer */
size_t i = 0;
do {
unsigned const len = enc->opt_buf[i].len;
if (len == 1 && enc->opt_buf[i].dist == kNullDist) {
LZMA_encodeLiteralBuf(enc, block.data, start_index + i);
++i;
}
else {
size_t const pos_state = (start_index + i) & pos_mask;
U32 const dist = enc->opt_buf[i].dist;
/* Updating i separately for each case may allow a branch to be eliminated */
if (dist >= kNumReps) {
LZMA_encodeNormalMatch(enc, len, dist - kNumReps, pos_state);
i += len;
}
else if(len == 1) {
LZMA_encodeRepMatchShort(enc, pos_state);
++i;
}
else {
LZMA_encodeRepMatchLong(enc, len, dist, pos_state);
i += len;
}
}
} while (i < cur);
start_index += i;
/* Do another round if there is a long match pending,
* because the reps must be checked and the match encoded. */
} while (match.length >= enc->fast_length && start_index < uncompressed_end && enc->rc.out_index < enc->chunk_size);
enc->len_end_max = len_end;
return start_index;
}
static void FORCE_NOINLINE LZMA_fillAlignPrices(LZMA2_ECtx *const enc)
{
unsigned i;
const LZMA2_prob *const probs = enc->states.dist_align_encoders;
for (i = 0; i < kAlignTableSize / 2; i++) {
U32 price = 0;
unsigned sym = i;
unsigned m = 1;
unsigned bit;
bit = sym & 1; sym >>= 1; price += GET_PRICE(probs[m], bit); m = (m << 1) + bit;
bit = sym & 1; sym >>= 1; price += GET_PRICE(probs[m], bit); m = (m << 1) + bit;
bit = sym & 1; sym >>= 1; price += GET_PRICE(probs[m], bit); m = (m << 1) + bit;
U32 const prob = probs[m];
enc->align_prices[i] = price + GET_PRICE_0(prob);
enc->align_prices[i + 8] = price + GET_PRICE_1(prob);
}
}
static void FORCE_NOINLINE LZMA_fillDistancesPrices(LZMA2_ECtx *const enc)
{
U32 * const temp_prices = enc->distance_prices[kNumLenToPosStates - 1];
enc->match_price_count = 0;
for (size_t i = kStartPosModelIndex / 2; i < kNumFullDistances / 2; i++) {
unsigned const dist_slot = distance_table[i];
unsigned footer_bits = (dist_slot >> 1) - 1;
size_t base = ((2 | (dist_slot & 1)) << footer_bits);
const LZMA2_prob *probs = enc->states.dist_encoders + base * 2U;
base += i;
probs = probs - distance_table[base] - 1;
U32 price = 0;
unsigned m = 1;
unsigned sym = (unsigned)i;
unsigned const offset = (unsigned)1 << footer_bits;
for (; footer_bits != 0; --footer_bits) {
unsigned bit = sym & 1;
sym >>= 1;
price += GET_PRICE(probs[m], bit);
m = (m << 1) + bit;
};
unsigned const prob = probs[m];
temp_prices[base] = price + GET_PRICE_0(prob);
temp_prices[base + offset] = price + GET_PRICE_1(prob);
}
for (unsigned lps = 0; lps < kNumLenToPosStates; lps++) {
size_t slot;
size_t const dist_table_size2 = (enc->dist_price_table_size + 1) >> 1;
U32 *const dist_slot_prices = enc->dist_slot_prices[lps];
const LZMA2_prob *const probs = enc->states.dist_slot_encoders[lps];
for (slot = 0; slot < dist_table_size2; slot++) {
/* dist_slot_prices[slot] = RcTree_GetPrice(encoder, kNumPosSlotBits, slot, p->ProbPrices); */
U32 price;
unsigned bit;
unsigned sym = (unsigned)slot + (1 << (kNumPosSlotBits - 1));
bit = sym & 1; sym >>= 1; price = GET_PRICE(probs[sym], bit);
bit = sym & 1; sym >>= 1; price += GET_PRICE(probs[sym], bit);
bit = sym & 1; sym >>= 1; price += GET_PRICE(probs[sym], bit);
bit = sym & 1; sym >>= 1; price += GET_PRICE(probs[sym], bit);
bit = sym & 1; sym >>= 1; price += GET_PRICE(probs[sym], bit);
unsigned const prob = probs[slot + (1 << (kNumPosSlotBits - 1))];
dist_slot_prices[slot * 2] = price + GET_PRICE_0(prob);
dist_slot_prices[slot * 2 + 1] = price + GET_PRICE_1(prob);
}
{
U32 delta = ((U32)((kEndPosModelIndex / 2 - 1) - kNumAlignBits) << kNumBitPriceShiftBits);
for (slot = kEndPosModelIndex / 2; slot < dist_table_size2; slot++) {
dist_slot_prices[slot * 2] += delta;
dist_slot_prices[slot * 2 + 1] += delta;
delta += ((U32)1 << kNumBitPriceShiftBits);
}
}
{
U32 *const dp = enc->distance_prices[lps];
dp[0] = dist_slot_prices[0];
dp[1] = dist_slot_prices[1];
dp[2] = dist_slot_prices[2];
dp[3] = dist_slot_prices[3];
for (size_t i = 4; i < kNumFullDistances; i += 2) {
U32 slot_price = dist_slot_prices[distance_table[i]];
dp[i] = slot_price + temp_prices[i];
dp[i + 1] = slot_price + temp_prices[i + 1];
}
}
}
}
FORCE_INLINE_TEMPLATE
size_t LZMA_encodeChunkBest(LZMA2_ECtx *const enc,
FL2_dataBlock const block,
FL2_matchTable* const tbl,
int const struct_tbl,
size_t pos,
size_t const uncompressed_end)
{
unsigned const search_depth = tbl->params.depth;
LZMA_fillDistancesPrices(enc);
LZMA_fillAlignPrices(enc);
LZMA_lengthStates_updatePrices(enc, &enc->states.len_states);
LZMA_lengthStates_updatePrices(enc, &enc->states.rep_len_states);
while (pos < uncompressed_end && enc->rc.out_index < enc->chunk_size)
{
RMF_match const match = RMF_getMatch(block, tbl, search_depth, struct_tbl, pos);
if (match.length > 1) {
/* Template-like inline function */
if (enc->strategy == FL2_ultra) {
pos = LZMA_encodeOptimumSequence(enc, block, tbl, struct_tbl, 1, pos, uncompressed_end, match);
}
else {
pos = LZMA_encodeOptimumSequence(enc, block, tbl, struct_tbl, 0, pos, uncompressed_end, match);
}
if (enc->match_price_count >= kMatchRepriceFrequency) {
LZMA_fillAlignPrices(enc);
LZMA_fillDistancesPrices(enc);
LZMA_lengthStates_updatePrices(enc, &enc->states.len_states);
}
if (enc->rep_len_price_count >= kRepLenRepriceFrequency) {
enc->rep_len_price_count = 0;
LZMA_lengthStates_updatePrices(enc, &enc->states.rep_len_states);
}
}
else {
if (block.data[pos] != block.data[pos - enc->states.reps[0] - 1]) {
LZMA_encodeLiteralBuf(enc, block.data, pos);
++pos;
}
else {
LZMA_encodeRepMatchShort(enc, pos & enc->pos_mask);
++pos;
}
}
}
return pos;
}
static void LZMA_lengthStates_Reset(LZMA2_lenStates* const ls, unsigned const fast_length)
{
ls->choice = kProbInitValue;
for (size_t i = 0; i < (kNumPositionStatesMax << (kLenNumLowBits + 1)); ++i)
ls->low[i] = kProbInitValue;
for (size_t i = 0; i < kLenNumHighSymbols; ++i)
ls->high[i] = kProbInitValue;
ls->table_size = fast_length + 1 - kMatchLenMin;
}
static void LZMA_encoderStates_Reset(LZMA2_encStates* const es, unsigned const lc, unsigned const lp, unsigned fast_length)
{
es->state = 0;
for (size_t i = 0; i < kNumReps; ++i)
es->reps[i] = 0;
for (size_t i = 0; i < kNumStates; ++i) {
for (size_t j = 0; j < kNumPositionStatesMax; ++j) {
es->is_match[i][j] = kProbInitValue;
es->is_rep0_long[i][j] = kProbInitValue;
}
es->is_rep[i] = kProbInitValue;
es->is_rep_G0[i] = kProbInitValue;
es->is_rep_G1[i] = kProbInitValue;
es->is_rep_G2[i] = kProbInitValue;
}
size_t const num = (size_t)(kNumLiterals * kNumLitTables) << (lp + lc);
for (size_t i = 0; i < num; ++i)
es->literal_probs[i] = kProbInitValue;
for (size_t i = 0; i < kNumLenToPosStates; ++i) {
LZMA2_prob *probs = es->dist_slot_encoders[i];
for (size_t j = 0; j < (1 << kNumPosSlotBits); ++j)
probs[j] = kProbInitValue;
}
for (size_t i = 0; i < kNumFullDistances - kEndPosModelIndex; ++i)
es->dist_encoders[i] = kProbInitValue;
LZMA_lengthStates_Reset(&es->len_states, fast_length);
LZMA_lengthStates_Reset(&es->rep_len_states, fast_length);
for (size_t i = 0; i < (1 << kNumAlignBits); ++i)
es->dist_align_encoders[i] = kProbInitValue;
}
BYTE LZMA2_getDictSizeProp(size_t const dictionary_size)
{
BYTE dict_size_prop = 0;
for (BYTE bit = 11; bit < 32; ++bit) {
if (((size_t)2 << bit) >= dictionary_size) {
dict_size_prop = (bit - 11) << 1;
break;
}
if (((size_t)3 << bit) >= dictionary_size) {
dict_size_prop = ((bit - 11) << 1) | 1;
break;
}
}
return dict_size_prop;
}
size_t LZMA2_compressBound(size_t src_size)
{
/* Minimum average uncompressed size. An average size of half kChunkSize should be assumed
* to account for thread_count incomplete end chunks per block. LZMA expansion is < 2% so 1/16
* is a safe overestimate. */
static const unsigned chunk_min_avg = (kChunkSize - (kChunkSize / 16U)) / 2U;
/* Maximum size of data stored in a sequence of uncompressed chunks */
return src_size + ((src_size + chunk_min_avg - 1) / chunk_min_avg) * 3 + 6;
}
size_t LZMA2_encMemoryUsage(unsigned const chain_log, FL2_strategy const strategy, unsigned const thread_count)
{
size_t size = sizeof(LZMA2_ECtx);
if(strategy == FL2_ultra)
size += sizeof(LZMA2_hc3) + (sizeof(U32) << chain_log) - sizeof(U32);
return size * thread_count;
}
static void LZMA2_reset(LZMA2_ECtx *const enc, size_t const max_distance)
{
DEBUGLOG(5, "LZMA encoder reset : max_distance %u", (unsigned)max_distance);
RC_reset(&enc->rc);
LZMA_encoderStates_Reset(&enc->states, enc->lc, enc->lp, enc->fast_length);
enc->pos_mask = (1 << enc->pb) - 1;
enc->lit_pos_mask = (1 << enc->lp) - 1;
U32 i = 0;
for (; max_distance > (size_t)1 << i; ++i) {
}
enc->dist_price_table_size = i * 2;
enc->rep_len_price_count = 0;
enc->match_price_count = 0;
}
static BYTE LZMA_getLcLpPbCode(LZMA2_ECtx *const enc)
{
return (BYTE)((enc->pb * 5 + enc->lp) * 9 + enc->lc);
}
/* Integer square root from https://stackoverflow.com/a/1101217 */
static U32 LZMA2_isqrt(U32 op)
{
U32 res = 0;
/* "one" starts at the highest power of four <= than the argument. */
U32 one = (U32)1 << (ZSTD_highbit32(op) & ~1);
while (one != 0) {
if (op >= res + one) {
op -= res + one;
res = res + 2U * one;
}
res >>= 1;
one >>= 2;
}
return res;
}
static BYTE LZMA2_isChunkIncompressible(const FL2_matchTable* const tbl,
FL2_dataBlock const block, size_t const start,
unsigned const strategy)
{
if (block.end - start >= kMinTestChunkSize) {
static const size_t max_dist_table[][5] = {
{ 0, 0, 0, 1U << 6, 1U << 14 }, /* fast */
{ 0, 0, 1U << 6, 1U << 14, 1U << 22 }, /* opt */
{ 0, 0, 1U << 6, 1U << 14, 1U << 22 } }; /* ultra */
static const size_t margin_divisor[3] = { 60U, 45U, 120U };
static const U32 dev_table[3] = { 24, 24, 20};
size_t const end = MIN(start + kChunkSize, block.end);
size_t const chunk_size = end - start;
size_t count = 0;
size_t const margin = chunk_size / margin_divisor[strategy];
size_t const terminator = start + margin;
if (tbl->is_struct) {
size_t prev_dist = 0;
for (size_t pos = start; pos < end; ) {
U32 const link = GetMatchLink(tbl->table, pos);
if (link == RADIX_NULL_LINK) {
++pos;
++count;
prev_dist = 0;
}
else {
size_t const length = GetMatchLength(tbl->table, pos);
size_t const dist = pos - GetMatchLink(tbl->table, pos);
if (length > 4) {
/* Increase the cost if it's not the same match */
count += dist != prev_dist;
}
else {
/* Increment the cost for a short match. The cost is the entire length if it's too far */
count += (dist < max_dist_table[strategy][length]) ? 1 : length;
}
pos += length;
prev_dist = dist;
}
if (count + terminator <= pos)
return 0;
}
}
else {
size_t prev_dist = 0;
for (size_t pos = start; pos < end; ) {
U32 const link = tbl->table[pos];
if (link == RADIX_NULL_LINK) {
++pos;
++count;
prev_dist = 0;
}
else {
size_t const length = link >> RADIX_LINK_BITS;
size_t const dist = pos - (link & RADIX_LINK_MASK);
if (length > 4)
count += dist != prev_dist;
else
count += (dist < max_dist_table[strategy][length]) ? 1 : length;
pos += length;
prev_dist = dist;
}
if (count + terminator <= pos)
return 0;
}
}
U32 char_count[256];
U32 char_total = 0;
/* Expected normal character count * 4 */
U32 const avg = (U32)(chunk_size / 64U);
memset(char_count, 0, sizeof(char_count));
for (size_t pos = start; pos < end; ++pos)
char_count[block.data[pos]] += 4;
/* Sum the deviations */
for (size_t i = 0; i < 256; ++i) {
S32 delta = char_count[i] - avg;
char_total += delta * delta;
}
U32 sqrt_chunk = (chunk_size == kChunkSize) ? kSqrtChunkSize : LZMA2_isqrt((U32)chunk_size);
/* Result base on character count std dev */
return LZMA2_isqrt(char_total) / sqrt_chunk <= dev_table[strategy];
}
return 0;
}
static size_t LZMA2_encodeChunk(LZMA2_ECtx *const enc,
FL2_matchTable* const tbl,
FL2_dataBlock const block,
size_t const pos, size_t const uncompressed_end)
{
/* Template-like inline functions */
if (enc->strategy == FL2_fast) {
if (tbl->is_struct) {
return LZMA_encodeChunkFast(enc, block, tbl, 1,
pos, uncompressed_end);
}
else {
return LZMA_encodeChunkFast(enc, block, tbl, 0,
pos, uncompressed_end);
}
}
else {
if (tbl->is_struct) {
return LZMA_encodeChunkBest(enc, block, tbl, 1,
pos, uncompressed_end);
}
else {
return LZMA_encodeChunkBest(enc, block, tbl, 0,
pos, uncompressed_end);
}
}
}
size_t LZMA2_encode(LZMA2_ECtx *const enc,
FL2_matchTable* const tbl,
FL2_dataBlock const block,
const FL2_lzma2Parameters* const options,
int stream_prop,
FL2_atomic *const progress_in,
FL2_atomic *const progress_out,
int *const canceled)
{
size_t const start = block.start;
/* Output starts in the temp buffer */
BYTE* out_dest = enc->out_buf;
enc->chunk_size = kTempMinOutput;
enc->chunk_limit = kTempBufferSize - kMaxMatchEncodeSize * 2;
/* Each encoder writes a properties byte because the upstream encoder(s) could */
/* write only uncompressed chunks with no properties. */
BYTE encode_properties = 1;
BYTE incompressible = 0;
if (block.end <= block.start)
return 0;
enc->lc = options->lc;
enc->lp = MIN(options->lp, kNumLiteralPosBitsMax);
if (enc->lc + enc->lp > kLcLpMax)
enc->lc = kLcLpMax - enc->lp;
enc->pb = MIN(options->pb, kNumPositionBitsMax);
enc->strategy = options->strategy;
enc->fast_length = MIN(options->fast_length, kMatchLenMax);
enc->match_cycles = MIN(options->match_cycles, kMatchesMax - 1);
LZMA2_reset(enc, block.end);
if (enc->strategy == FL2_ultra) {
/* Create a hash chain to put the encoder into hybrid mode */
if (enc->hash_alloc_3 < ((ptrdiff_t)1 << options->second_dict_bits)) {
if(LZMA_hashCreate(enc, options->second_dict_bits) != 0)
return FL2_ERROR(memory_allocation);
}
else {
LZMA_hashReset(enc, options->second_dict_bits);
}
enc->hash_prev_index = (start >= (size_t)enc->hash_dict_3) ? (ptrdiff_t)(start - enc->hash_dict_3) : (ptrdiff_t)-1;
}
enc->len_end_max = kOptimizerBufferSize - 1;
/* Limit the matches near the end of this slice to not exceed block.end */
RMF_limitLengths(tbl, block.end);
for (size_t pos = start; pos < block.end;) {
size_t header_size = (stream_prop >= 0) + (encode_properties ? kChunkHeaderSize + 1 : kChunkHeaderSize);
LZMA2_encStates saved_states;
size_t next_index;
RC_reset(&enc->rc);
RC_setOutputBuffer(&enc->rc, out_dest + header_size);
if (!incompressible) {
size_t cur = pos;
size_t const end = (enc->strategy == FL2_fast) ? MIN(block.end, pos + kMaxChunkUncompressedSize - kMatchLenMax + 1)
: MIN(block.end, pos + kMaxChunkUncompressedSize - kOptimizerBufferSize + 2); /* last byte of opt_buf unused */
/* Copy states in case chunk is incompressible */
saved_states = enc->states;
if (pos == 0) {
/* First byte of the dictionary */
LZMA_encodeLiteral(enc, 0, block.data[0], 0);
++cur;
}
if (pos == start) {
/* After kTempMinOutput bytes we can write data to the match table because the */
/* compressed data will never catch up with the table position being read. */
cur = LZMA2_encodeChunk(enc, tbl, block, cur, end);
if (header_size + enc->rc.out_index > kTempBufferSize)
return FL2_ERROR(internal);
/* Switch to the match table as output buffer */
out_dest = RMF_getTableAsOutputBuffer(tbl, start);
memcpy(out_dest, enc->out_buf, header_size + enc->rc.out_index);
enc->rc.out_buffer = out_dest + header_size;
/* Now encode up to the full chunk size */
enc->chunk_size = kChunkSize;
enc->chunk_limit = kMaxChunkCompressedSize - kMaxMatchEncodeSize * 2;
}
next_index = LZMA2_encodeChunk(enc, tbl, block, cur, end);
RC_flush(&enc->rc);
}
else {
next_index = MIN(pos + kChunkSize, block.end);
}
size_t compressed_size = enc->rc.out_index;
size_t uncompressed_size = next_index - pos;
if (compressed_size > kMaxChunkCompressedSize || uncompressed_size > kMaxChunkUncompressedSize)
return FL2_ERROR(internal);
BYTE* header = out_dest;
if (stream_prop >= 0) {
*header++ = (BYTE)stream_prop;
stream_prop = -1;
}
header[1] = (BYTE)((uncompressed_size - 1) >> 8);
header[2] = (BYTE)(uncompressed_size - 1);
/* Output an uncompressed chunk if necessary */
if (incompressible || uncompressed_size + 3 <= compressed_size + header_size) {
DEBUGLOG(6, "Storing chunk : was %u => %u", (unsigned)uncompressed_size, (unsigned)compressed_size);
header[0] = (pos == 0) ? kChunkUncompressedDictReset : kChunkUncompressed;
/* Copy uncompressed data into the output */
memcpy(header + 3, block.data + pos, uncompressed_size);
compressed_size = uncompressed_size;
header_size = 3 + (header - out_dest);
/* Restore states if compression was attempted */
if (!incompressible)
enc->states = saved_states;
}
else {
DEBUGLOG(6, "Compressed chunk : %u => %u", (unsigned)uncompressed_size, (unsigned)compressed_size);
if (pos == 0)
header[0] = kChunkCompressedFlag | kChunkAllReset;
else if (encode_properties)
header[0] = kChunkCompressedFlag | kChunkStatePropertiesReset;
else
header[0] = kChunkCompressedFlag | kChunkNothingReset;
header[0] |= (BYTE)((uncompressed_size - 1) >> 16);
header[3] = (BYTE)((compressed_size - 1) >> 8);
header[4] = (BYTE)(compressed_size - 1);
if (encode_properties) {
header[5] = LZMA_getLcLpPbCode(enc);
encode_properties = 0;
}
}
if (incompressible || uncompressed_size + 3 <= compressed_size + (compressed_size >> kRandomFilterMarginBits) + header_size) {
/* Test the next chunk for compressibility */
incompressible = LZMA2_isChunkIncompressible(tbl, block, next_index, enc->strategy);
}
out_dest += compressed_size + header_size;
/* Update progress concurrently with other encoder threads */
FL2_atomic_add(*progress_in, (long)(next_index - pos));
FL2_atomic_add(*progress_out, (long)(compressed_size + header_size));
pos = next_index;
if (*canceled)
return FL2_ERROR(canceled);
}
return out_dest - RMF_getTableAsOutputBuffer(tbl, start);
}