mirror of
https://github.com/Xevion/easy7zip.git
synced 2025-12-10 18:07:07 -06:00
Major changes, including Brotli and Lizard
- update of zstd-mt library - add brotli v0.6.0 - add lizard v2.0 - xxhash is from zstd for lz4, lz5 and lizard now - update also the documentation, where needed
This commit is contained in:
@@ -1,130 +0,0 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Function to find backward reference copies. */
|
||||
|
||||
#include "./backward_references.h"
|
||||
|
||||
#include "../common/constants.h"
|
||||
#include "../common/dictionary.h"
|
||||
#include <brotli/types.h>
|
||||
#include "./command.h"
|
||||
#include "./dictionary_hash.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
#include "./quality.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance,
|
||||
size_t max_distance,
|
||||
const int* dist_cache) {
|
||||
if (distance <= max_distance) {
|
||||
size_t distance_plus_3 = distance + 3;
|
||||
size_t offset0 = distance_plus_3 - (size_t)dist_cache[0];
|
||||
size_t offset1 = distance_plus_3 - (size_t)dist_cache[1];
|
||||
if (distance == (size_t)dist_cache[0]) {
|
||||
return 0;
|
||||
} else if (distance == (size_t)dist_cache[1]) {
|
||||
return 1;
|
||||
} else if (offset0 < 7) {
|
||||
return (0x9750468 >> (4 * offset0)) & 0xF;
|
||||
} else if (offset1 < 7) {
|
||||
return (0xFDB1ACE >> (4 * offset1)) & 0xF;
|
||||
} else if (distance == (size_t)dist_cache[2]) {
|
||||
return 2;
|
||||
} else if (distance == (size_t)dist_cache[3]) {
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
return distance + BROTLI_NUM_DISTANCE_SHORT_CODES - 1;
|
||||
}
|
||||
|
||||
#define EXPAND_CAT(a, b) CAT(a, b)
|
||||
#define CAT(a, b) a ## b
|
||||
#define FN(X) EXPAND_CAT(X, HASHER())
|
||||
|
||||
#define HASHER() H2
|
||||
/* NOLINTNEXTLINE(build/include) */
|
||||
#include "./backward_references_inc.h"
|
||||
#undef HASHER
|
||||
|
||||
#define HASHER() H3
|
||||
/* NOLINTNEXTLINE(build/include) */
|
||||
#include "./backward_references_inc.h"
|
||||
#undef HASHER
|
||||
|
||||
#define HASHER() H4
|
||||
/* NOLINTNEXTLINE(build/include) */
|
||||
#include "./backward_references_inc.h"
|
||||
#undef HASHER
|
||||
|
||||
#define HASHER() H5
|
||||
/* NOLINTNEXTLINE(build/include) */
|
||||
#include "./backward_references_inc.h"
|
||||
#undef HASHER
|
||||
|
||||
#define HASHER() H6
|
||||
/* NOLINTNEXTLINE(build/include) */
|
||||
#include "./backward_references_inc.h"
|
||||
#undef HASHER
|
||||
|
||||
#define HASHER() H40
|
||||
/* NOLINTNEXTLINE(build/include) */
|
||||
#include "./backward_references_inc.h"
|
||||
#undef HASHER
|
||||
|
||||
#define HASHER() H41
|
||||
/* NOLINTNEXTLINE(build/include) */
|
||||
#include "./backward_references_inc.h"
|
||||
#undef HASHER
|
||||
|
||||
#define HASHER() H42
|
||||
/* NOLINTNEXTLINE(build/include) */
|
||||
#include "./backward_references_inc.h"
|
||||
#undef HASHER
|
||||
|
||||
#define HASHER() H54
|
||||
/* NOLINTNEXTLINE(build/include) */
|
||||
#include "./backward_references_inc.h"
|
||||
#undef HASHER
|
||||
|
||||
#undef FN
|
||||
#undef CAT
|
||||
#undef EXPAND_CAT
|
||||
|
||||
void BrotliCreateBackwardReferences(const BrotliDictionary* dictionary,
|
||||
size_t num_bytes,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask,
|
||||
const BrotliEncoderParams* params,
|
||||
HasherHandle hasher,
|
||||
int* dist_cache,
|
||||
size_t* last_insert_len,
|
||||
Command* commands,
|
||||
size_t* num_commands,
|
||||
size_t* num_literals) {
|
||||
switch (params->hasher.type) {
|
||||
#define CASE_(N) \
|
||||
case N: \
|
||||
CreateBackwardReferencesH ## N(dictionary, \
|
||||
kStaticDictionaryHash, num_bytes, position, ringbuffer, \
|
||||
ringbuffer_mask, params, hasher, dist_cache, \
|
||||
last_insert_len, commands, num_commands, num_literals); \
|
||||
break;
|
||||
FOR_GENERIC_HASHERS(CASE_)
|
||||
#undef CASE_
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
#include "../common/constants.h"
|
||||
#include "../common/dictionary.h"
|
||||
#include <brotli/types.h>
|
||||
#include "../types.h"
|
||||
#include "./command.h"
|
||||
#include "./hash.h"
|
||||
#include "./port.h"
|
||||
|
||||
@@ -1,790 +0,0 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Function to find backward reference copies. */
|
||||
|
||||
#include "./backward_references_hq.h"
|
||||
|
||||
#include <string.h> /* memcpy, memset */
|
||||
|
||||
#include "../common/constants.h"
|
||||
#include <brotli/types.h>
|
||||
#include "./command.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./find_match_length.h"
|
||||
#include "./literal_cost.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
#include "./prefix.h"
|
||||
#include "./quality.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static const float kInfinity = 1.7e38f; /* ~= 2 ^ 127 */
|
||||
|
||||
static const uint32_t kDistanceCacheIndex[] = {
|
||||
0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
|
||||
};
|
||||
static const int kDistanceCacheOffset[] = {
|
||||
0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
|
||||
};
|
||||
|
||||
void BrotliInitZopfliNodes(ZopfliNode* array, size_t length) {
|
||||
ZopfliNode stub;
|
||||
size_t i;
|
||||
stub.length = 1;
|
||||
stub.distance = 0;
|
||||
stub.insert_length = 0;
|
||||
stub.u.cost = kInfinity;
|
||||
for (i = 0; i < length; ++i) array[i] = stub;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t ZopfliNodeCopyLength(const ZopfliNode* self) {
|
||||
return self->length & 0xffffff;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t ZopfliNodeLengthCode(const ZopfliNode* self) {
|
||||
const uint32_t modifier = self->length >> 24;
|
||||
return ZopfliNodeCopyLength(self) + 9u - modifier;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t ZopfliNodeCopyDistance(const ZopfliNode* self) {
|
||||
return self->distance & 0x1ffffff;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t ZopfliNodeDistanceCode(const ZopfliNode* self) {
|
||||
const uint32_t short_code = self->distance >> 25;
|
||||
return short_code == 0 ?
|
||||
ZopfliNodeCopyDistance(self) + BROTLI_NUM_DISTANCE_SHORT_CODES - 1 :
|
||||
short_code - 1;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t ZopfliNodeCommandLength(const ZopfliNode* self) {
|
||||
return ZopfliNodeCopyLength(self) + self->insert_length;
|
||||
}
|
||||
|
||||
/* Histogram based cost model for zopflification. */
|
||||
typedef struct ZopfliCostModel {
|
||||
/* The insert and copy length symbols. */
|
||||
float cost_cmd_[BROTLI_NUM_COMMAND_SYMBOLS];
|
||||
float cost_dist_[BROTLI_NUM_DISTANCE_SYMBOLS];
|
||||
/* Cumulative costs of literals per position in the stream. */
|
||||
float* literal_costs_;
|
||||
float min_cost_cmd_;
|
||||
size_t num_bytes_;
|
||||
} ZopfliCostModel;
|
||||
|
||||
static void InitZopfliCostModel(
|
||||
MemoryManager* m, ZopfliCostModel* self, size_t num_bytes) {
|
||||
self->num_bytes_ = num_bytes;
|
||||
self->literal_costs_ = BROTLI_ALLOC(m, float, num_bytes + 2);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
}
|
||||
|
||||
static void CleanupZopfliCostModel(MemoryManager* m, ZopfliCostModel* self) {
|
||||
BROTLI_FREE(m, self->literal_costs_);
|
||||
}
|
||||
|
||||
static void SetCost(const uint32_t* histogram, size_t histogram_size,
|
||||
float* cost) {
|
||||
size_t sum = 0;
|
||||
float log2sum;
|
||||
size_t i;
|
||||
for (i = 0; i < histogram_size; i++) {
|
||||
sum += histogram[i];
|
||||
}
|
||||
log2sum = (float)FastLog2(sum);
|
||||
for (i = 0; i < histogram_size; i++) {
|
||||
if (histogram[i] == 0) {
|
||||
cost[i] = log2sum + 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Shannon bits for this symbol. */
|
||||
cost[i] = log2sum - (float)FastLog2(histogram[i]);
|
||||
|
||||
/* Cannot be coded with less than 1 bit */
|
||||
if (cost[i] < 1) cost[i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask,
|
||||
const Command* commands,
|
||||
size_t num_commands,
|
||||
size_t last_insert_len) {
|
||||
uint32_t histogram_literal[BROTLI_NUM_LITERAL_SYMBOLS];
|
||||
uint32_t histogram_cmd[BROTLI_NUM_COMMAND_SYMBOLS];
|
||||
uint32_t histogram_dist[BROTLI_NUM_DISTANCE_SYMBOLS];
|
||||
float cost_literal[BROTLI_NUM_LITERAL_SYMBOLS];
|
||||
size_t pos = position - last_insert_len;
|
||||
float min_cost_cmd = kInfinity;
|
||||
size_t i;
|
||||
float* cost_cmd = self->cost_cmd_;
|
||||
|
||||
memset(histogram_literal, 0, sizeof(histogram_literal));
|
||||
memset(histogram_cmd, 0, sizeof(histogram_cmd));
|
||||
memset(histogram_dist, 0, sizeof(histogram_dist));
|
||||
|
||||
for (i = 0; i < num_commands; i++) {
|
||||
size_t inslength = commands[i].insert_len_;
|
||||
size_t copylength = CommandCopyLen(&commands[i]);
|
||||
size_t distcode = commands[i].dist_prefix_;
|
||||
size_t cmdcode = commands[i].cmd_prefix_;
|
||||
size_t j;
|
||||
|
||||
histogram_cmd[cmdcode]++;
|
||||
if (cmdcode >= 128) histogram_dist[distcode]++;
|
||||
|
||||
for (j = 0; j < inslength; j++) {
|
||||
histogram_literal[ringbuffer[(pos + j) & ringbuffer_mask]]++;
|
||||
}
|
||||
|
||||
pos += inslength + copylength;
|
||||
}
|
||||
|
||||
SetCost(histogram_literal, BROTLI_NUM_LITERAL_SYMBOLS, cost_literal);
|
||||
SetCost(histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, cost_cmd);
|
||||
SetCost(histogram_dist, BROTLI_NUM_DISTANCE_SYMBOLS, self->cost_dist_);
|
||||
|
||||
for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
|
||||
min_cost_cmd = BROTLI_MIN(float, min_cost_cmd, cost_cmd[i]);
|
||||
}
|
||||
self->min_cost_cmd_ = min_cost_cmd;
|
||||
|
||||
{
|
||||
float* literal_costs = self->literal_costs_;
|
||||
size_t num_bytes = self->num_bytes_;
|
||||
literal_costs[0] = 0.0;
|
||||
for (i = 0; i < num_bytes; ++i) {
|
||||
literal_costs[i + 1] = literal_costs[i] +
|
||||
cost_literal[ringbuffer[(position + i) & ringbuffer_mask]];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ZopfliCostModelSetFromLiteralCosts(ZopfliCostModel* self,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask) {
|
||||
float* literal_costs = self->literal_costs_;
|
||||
float* cost_dist = self->cost_dist_;
|
||||
float* cost_cmd = self->cost_cmd_;
|
||||
size_t num_bytes = self->num_bytes_;
|
||||
size_t i;
|
||||
BrotliEstimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask,
|
||||
ringbuffer, &literal_costs[1]);
|
||||
literal_costs[0] = 0.0;
|
||||
for (i = 0; i < num_bytes; ++i) {
|
||||
literal_costs[i + 1] += literal_costs[i];
|
||||
}
|
||||
for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
|
||||
cost_cmd[i] = (float)FastLog2(11 + (uint32_t)i);
|
||||
}
|
||||
for (i = 0; i < BROTLI_NUM_DISTANCE_SYMBOLS; ++i) {
|
||||
cost_dist[i] = (float)FastLog2(20 + (uint32_t)i);
|
||||
}
|
||||
self->min_cost_cmd_ = (float)FastLog2(11);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE float ZopfliCostModelGetCommandCost(
|
||||
const ZopfliCostModel* self, uint16_t cmdcode) {
|
||||
return self->cost_cmd_[cmdcode];
|
||||
}
|
||||
|
||||
static BROTLI_INLINE float ZopfliCostModelGetDistanceCost(
|
||||
const ZopfliCostModel* self, size_t distcode) {
|
||||
return self->cost_dist_[distcode];
|
||||
}
|
||||
|
||||
static BROTLI_INLINE float ZopfliCostModelGetLiteralCosts(
|
||||
const ZopfliCostModel* self, size_t from, size_t to) {
|
||||
return self->literal_costs_[to] - self->literal_costs_[from];
|
||||
}
|
||||
|
||||
static BROTLI_INLINE float ZopfliCostModelGetMinCostCmd(
|
||||
const ZopfliCostModel* self) {
|
||||
return self->min_cost_cmd_;
|
||||
}
|
||||
|
||||
/* REQUIRES: len >= 2, start_pos <= pos */
|
||||
/* REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity */
|
||||
/* Maintains the "ZopfliNode array invariant". */
|
||||
static BROTLI_INLINE void UpdateZopfliNode(ZopfliNode* nodes, size_t pos,
|
||||
size_t start_pos, size_t len, size_t len_code, size_t dist,
|
||||
size_t short_code, float cost) {
|
||||
ZopfliNode* next = &nodes[pos + len];
|
||||
next->length = (uint32_t)(len | ((len + 9u - len_code) << 24));
|
||||
next->distance = (uint32_t)(dist | (short_code << 25));
|
||||
next->insert_length = (uint32_t)(pos - start_pos);
|
||||
next->u.cost = cost;
|
||||
}
|
||||
|
||||
typedef struct PosData {
|
||||
size_t pos;
|
||||
int distance_cache[4];
|
||||
float costdiff;
|
||||
float cost;
|
||||
} PosData;
|
||||
|
||||
/* Maintains the smallest 8 cost difference together with their positions */
|
||||
typedef struct StartPosQueue {
|
||||
PosData q_[8];
|
||||
size_t idx_;
|
||||
} StartPosQueue;
|
||||
|
||||
static BROTLI_INLINE void InitStartPosQueue(StartPosQueue* self) {
|
||||
self->idx_ = 0;
|
||||
}
|
||||
|
||||
static size_t StartPosQueueSize(const StartPosQueue* self) {
|
||||
return BROTLI_MIN(size_t, self->idx_, 8);
|
||||
}
|
||||
|
||||
static void StartPosQueuePush(StartPosQueue* self, const PosData* posdata) {
|
||||
size_t offset = ~(self->idx_++) & 7;
|
||||
size_t len = StartPosQueueSize(self);
|
||||
size_t i;
|
||||
PosData* q = self->q_;
|
||||
q[offset] = *posdata;
|
||||
/* Restore the sorted order. In the list of |len| items at most |len - 1|
|
||||
adjacent element comparisons / swaps are required. */
|
||||
for (i = 1; i < len; ++i) {
|
||||
if (q[offset & 7].costdiff > q[(offset + 1) & 7].costdiff) {
|
||||
BROTLI_SWAP(PosData, q, offset & 7, (offset + 1) & 7);
|
||||
}
|
||||
++offset;
|
||||
}
|
||||
}
|
||||
|
||||
static const PosData* StartPosQueueAt(const StartPosQueue* self, size_t k) {
|
||||
return &self->q_[(k - self->idx_) & 7];
|
||||
}
|
||||
|
||||
/* Returns the minimum possible copy length that can improve the cost of any */
|
||||
/* future position. */
|
||||
static size_t ComputeMinimumCopyLength(const float start_cost,
|
||||
const ZopfliNode* nodes,
|
||||
const size_t num_bytes,
|
||||
const size_t pos) {
|
||||
/* Compute the minimum possible cost of reaching any future position. */
|
||||
float min_cost = start_cost;
|
||||
size_t len = 2;
|
||||
size_t next_len_bucket = 4;
|
||||
size_t next_len_offset = 10;
|
||||
while (pos + len <= num_bytes && nodes[pos + len].u.cost <= min_cost) {
|
||||
/* We already reached (pos + len) with no more cost than the minimum
|
||||
possible cost of reaching anything from this pos, so there is no point in
|
||||
looking for lengths <= len. */
|
||||
++len;
|
||||
if (len == next_len_offset) {
|
||||
/* We reached the next copy length code bucket, so we add one more
|
||||
extra bit to the minimum cost. */
|
||||
min_cost += 1.0f;
|
||||
next_len_offset += next_len_bucket;
|
||||
next_len_bucket *= 2;
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/* REQUIRES: nodes[pos].cost < kInfinity
|
||||
REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". */
|
||||
static uint32_t ComputeDistanceShortcut(const size_t block_start,
|
||||
const size_t pos,
|
||||
const size_t max_backward,
|
||||
const ZopfliNode* nodes) {
|
||||
const size_t clen = ZopfliNodeCopyLength(&nodes[pos]);
|
||||
const size_t ilen = nodes[pos].insert_length;
|
||||
const size_t dist = ZopfliNodeCopyDistance(&nodes[pos]);
|
||||
/* Since |block_start + pos| is the end position of the command, the copy part
|
||||
starts from |block_start + pos - clen|. Distances that are greater than
|
||||
this or greater than |max_backward| are static dictionary references, and
|
||||
do not update the last distances. Also distance code 0 (last distance)
|
||||
does not update the last distances. */
|
||||
if (pos == 0) {
|
||||
return 0;
|
||||
} else if (dist + clen <= block_start + pos &&
|
||||
dist <= max_backward &&
|
||||
ZopfliNodeDistanceCode(&nodes[pos]) > 0) {
|
||||
return (uint32_t)pos;
|
||||
} else {
|
||||
return nodes[pos - clen - ilen].u.shortcut;
|
||||
}
|
||||
}
|
||||
|
||||
/* Fills in dist_cache[0..3] with the last four distances (as defined by
|
||||
Section 4. of the Spec) that would be used at (block_start + pos) if we
|
||||
used the shortest path of commands from block_start, computed from
|
||||
nodes[0..pos]. The last four distances at block_start are in
|
||||
starting_dist_cache[0..3].
|
||||
REQUIRES: nodes[pos].cost < kInfinity
|
||||
REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". */
|
||||
static void ComputeDistanceCache(const size_t pos,
|
||||
const int* starting_dist_cache,
|
||||
const ZopfliNode* nodes,
|
||||
int* dist_cache) {
|
||||
int idx = 0;
|
||||
size_t p = nodes[pos].u.shortcut;
|
||||
while (idx < 4 && p > 0) {
|
||||
const size_t ilen = nodes[p].insert_length;
|
||||
const size_t clen = ZopfliNodeCopyLength(&nodes[p]);
|
||||
const size_t dist = ZopfliNodeCopyDistance(&nodes[p]);
|
||||
dist_cache[idx++] = (int)dist;
|
||||
/* Because of prerequisite, p >= clen + ilen >= 2. */
|
||||
p = nodes[p - clen - ilen].u.shortcut;
|
||||
}
|
||||
for (; idx < 4; ++idx) {
|
||||
dist_cache[idx] = *starting_dist_cache++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Maintains "ZopfliNode array invariant" and pushes node to the queue, if it
|
||||
is eligible. */
|
||||
static void EvaluateNode(
|
||||
const size_t block_start, const size_t pos, const size_t max_backward_limit,
|
||||
const int* starting_dist_cache, const ZopfliCostModel* model,
|
||||
StartPosQueue* queue, ZopfliNode* nodes) {
|
||||
/* Save cost, because ComputeDistanceCache invalidates it. */
|
||||
float node_cost = nodes[pos].u.cost;
|
||||
nodes[pos].u.shortcut = ComputeDistanceShortcut(
|
||||
block_start, pos, max_backward_limit, nodes);
|
||||
if (node_cost <= ZopfliCostModelGetLiteralCosts(model, 0, pos)) {
|
||||
PosData posdata;
|
||||
posdata.pos = pos;
|
||||
posdata.cost = node_cost;
|
||||
posdata.costdiff = node_cost -
|
||||
ZopfliCostModelGetLiteralCosts(model, 0, pos);
|
||||
ComputeDistanceCache(
|
||||
pos, starting_dist_cache, nodes, posdata.distance_cache);
|
||||
StartPosQueuePush(queue, &posdata);
|
||||
}
|
||||
}
|
||||
|
||||
/* Returns longest copy length. */
|
||||
static size_t UpdateNodes(
|
||||
const size_t num_bytes, const size_t block_start, const size_t pos,
|
||||
const uint8_t* ringbuffer, const size_t ringbuffer_mask,
|
||||
const BrotliEncoderParams* params, const size_t max_backward_limit,
|
||||
const int* starting_dist_cache, const size_t num_matches,
|
||||
const BackwardMatch* matches, const ZopfliCostModel* model,
|
||||
StartPosQueue* queue, ZopfliNode* nodes) {
|
||||
const size_t cur_ix = block_start + pos;
|
||||
const size_t cur_ix_masked = cur_ix & ringbuffer_mask;
|
||||
const size_t max_distance = BROTLI_MIN(size_t, cur_ix, max_backward_limit);
|
||||
const size_t max_len = num_bytes - pos;
|
||||
const size_t max_zopfli_len = MaxZopfliLen(params);
|
||||
const size_t max_iters = MaxZopfliCandidates(params);
|
||||
size_t min_len;
|
||||
size_t result = 0;
|
||||
size_t k;
|
||||
|
||||
EvaluateNode(block_start, pos, max_backward_limit, starting_dist_cache, model,
|
||||
queue, nodes);
|
||||
|
||||
{
|
||||
const PosData* posdata = StartPosQueueAt(queue, 0);
|
||||
float min_cost = (posdata->cost + ZopfliCostModelGetMinCostCmd(model) +
|
||||
ZopfliCostModelGetLiteralCosts(model, posdata->pos, pos));
|
||||
min_len = ComputeMinimumCopyLength(min_cost, nodes, num_bytes, pos);
|
||||
}
|
||||
|
||||
/* Go over the command starting positions in order of increasing cost
|
||||
difference. */
|
||||
for (k = 0; k < max_iters && k < StartPosQueueSize(queue); ++k) {
|
||||
const PosData* posdata = StartPosQueueAt(queue, k);
|
||||
const size_t start = posdata->pos;
|
||||
const uint16_t inscode = GetInsertLengthCode(pos - start);
|
||||
const float start_costdiff = posdata->costdiff;
|
||||
const float base_cost = start_costdiff + (float)GetInsertExtra(inscode) +
|
||||
ZopfliCostModelGetLiteralCosts(model, 0, pos);
|
||||
|
||||
/* Look for last distance matches using the distance cache from this
|
||||
starting position. */
|
||||
size_t best_len = min_len - 1;
|
||||
size_t j = 0;
|
||||
for (; j < BROTLI_NUM_DISTANCE_SHORT_CODES && best_len < max_len; ++j) {
|
||||
const size_t idx = kDistanceCacheIndex[j];
|
||||
const size_t backward =
|
||||
(size_t)(posdata->distance_cache[idx] + kDistanceCacheOffset[j]);
|
||||
size_t prev_ix = cur_ix - backward;
|
||||
if (prev_ix >= cur_ix) {
|
||||
continue;
|
||||
}
|
||||
if (BROTLI_PREDICT_FALSE(backward > max_distance)) {
|
||||
continue;
|
||||
}
|
||||
prev_ix &= ringbuffer_mask;
|
||||
|
||||
if (cur_ix_masked + best_len > ringbuffer_mask ||
|
||||
prev_ix + best_len > ringbuffer_mask ||
|
||||
ringbuffer[cur_ix_masked + best_len] !=
|
||||
ringbuffer[prev_ix + best_len]) {
|
||||
continue;
|
||||
}
|
||||
{
|
||||
const size_t len =
|
||||
FindMatchLengthWithLimit(&ringbuffer[prev_ix],
|
||||
&ringbuffer[cur_ix_masked],
|
||||
max_len);
|
||||
const float dist_cost = base_cost +
|
||||
ZopfliCostModelGetDistanceCost(model, j);
|
||||
size_t l;
|
||||
for (l = best_len + 1; l <= len; ++l) {
|
||||
const uint16_t copycode = GetCopyLengthCode(l);
|
||||
const uint16_t cmdcode =
|
||||
CombineLengthCodes(inscode, copycode, j == 0);
|
||||
const float cost = (cmdcode < 128 ? base_cost : dist_cost) +
|
||||
(float)GetCopyExtra(copycode) +
|
||||
ZopfliCostModelGetCommandCost(model, cmdcode);
|
||||
if (cost < nodes[pos + l].u.cost) {
|
||||
UpdateZopfliNode(nodes, pos, start, l, l, backward, j + 1, cost);
|
||||
result = BROTLI_MAX(size_t, result, l);
|
||||
}
|
||||
best_len = l;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* At higher iterations look only for new last distance matches, since
|
||||
looking only for new command start positions with the same distances
|
||||
does not help much. */
|
||||
if (k >= 2) continue;
|
||||
|
||||
{
|
||||
/* Loop through all possible copy lengths at this position. */
|
||||
size_t len = min_len;
|
||||
for (j = 0; j < num_matches; ++j) {
|
||||
BackwardMatch match = matches[j];
|
||||
size_t dist = match.distance;
|
||||
BROTLI_BOOL is_dictionary_match = TO_BROTLI_BOOL(dist > max_distance);
|
||||
/* We already tried all possible last distance matches, so we can use
|
||||
normal distance code here. */
|
||||
size_t dist_code = dist + BROTLI_NUM_DISTANCE_SHORT_CODES - 1;
|
||||
uint16_t dist_symbol;
|
||||
uint32_t distextra;
|
||||
uint32_t distnumextra;
|
||||
float dist_cost;
|
||||
size_t max_match_len;
|
||||
PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra);
|
||||
distnumextra = distextra >> 24;
|
||||
dist_cost = base_cost + (float)distnumextra +
|
||||
ZopfliCostModelGetDistanceCost(model, dist_symbol);
|
||||
|
||||
/* Try all copy lengths up until the maximum copy length corresponding
|
||||
to this distance. If the distance refers to the static dictionary, or
|
||||
the maximum length is long enough, try only one maximum length. */
|
||||
max_match_len = BackwardMatchLength(&match);
|
||||
if (len < max_match_len &&
|
||||
(is_dictionary_match || max_match_len > max_zopfli_len)) {
|
||||
len = max_match_len;
|
||||
}
|
||||
for (; len <= max_match_len; ++len) {
|
||||
const size_t len_code =
|
||||
is_dictionary_match ? BackwardMatchLengthCode(&match) : len;
|
||||
const uint16_t copycode = GetCopyLengthCode(len_code);
|
||||
const uint16_t cmdcode = CombineLengthCodes(inscode, copycode, 0);
|
||||
const float cost = dist_cost + (float)GetCopyExtra(copycode) +
|
||||
ZopfliCostModelGetCommandCost(model, cmdcode);
|
||||
if (cost < nodes[pos + len].u.cost) {
|
||||
UpdateZopfliNode(nodes, pos, start, len, len_code, dist, 0, cost);
|
||||
result = BROTLI_MAX(size_t, result, len);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static size_t ComputeShortestPathFromNodes(size_t num_bytes,
|
||||
ZopfliNode* nodes) {
|
||||
size_t index = num_bytes;
|
||||
size_t num_commands = 0;
|
||||
while (nodes[index].insert_length == 0 && nodes[index].length == 1) --index;
|
||||
nodes[index].u.next = BROTLI_UINT32_MAX;
|
||||
while (index != 0) {
|
||||
size_t len = ZopfliNodeCommandLength(&nodes[index]);
|
||||
index -= len;
|
||||
nodes[index].u.next = (uint32_t)len;
|
||||
num_commands++;
|
||||
}
|
||||
return num_commands;
|
||||
}
|
||||
|
||||
/* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */
|
||||
void BrotliZopfliCreateCommands(const size_t num_bytes,
|
||||
const size_t block_start,
|
||||
const size_t max_backward_limit,
|
||||
const ZopfliNode* nodes,
|
||||
int* dist_cache,
|
||||
size_t* last_insert_len,
|
||||
Command* commands,
|
||||
size_t* num_literals) {
|
||||
size_t pos = 0;
|
||||
uint32_t offset = nodes[0].u.next;
|
||||
size_t i;
|
||||
for (i = 0; offset != BROTLI_UINT32_MAX; i++) {
|
||||
const ZopfliNode* next = &nodes[pos + offset];
|
||||
size_t copy_length = ZopfliNodeCopyLength(next);
|
||||
size_t insert_length = next->insert_length;
|
||||
pos += insert_length;
|
||||
offset = next->u.next;
|
||||
if (i == 0) {
|
||||
insert_length += *last_insert_len;
|
||||
*last_insert_len = 0;
|
||||
}
|
||||
{
|
||||
size_t distance = ZopfliNodeCopyDistance(next);
|
||||
size_t len_code = ZopfliNodeLengthCode(next);
|
||||
size_t max_distance =
|
||||
BROTLI_MIN(size_t, block_start + pos, max_backward_limit);
|
||||
BROTLI_BOOL is_dictionary = TO_BROTLI_BOOL(distance > max_distance);
|
||||
size_t dist_code = ZopfliNodeDistanceCode(next);
|
||||
|
||||
InitCommand(
|
||||
&commands[i], insert_length, copy_length, len_code, dist_code);
|
||||
|
||||
if (!is_dictionary && dist_code > 0) {
|
||||
dist_cache[3] = dist_cache[2];
|
||||
dist_cache[2] = dist_cache[1];
|
||||
dist_cache[1] = dist_cache[0];
|
||||
dist_cache[0] = (int)distance;
|
||||
}
|
||||
}
|
||||
|
||||
*num_literals += insert_length;
|
||||
pos += copy_length;
|
||||
}
|
||||
*last_insert_len += num_bytes - pos;
|
||||
}
|
||||
|
||||
static size_t ZopfliIterate(size_t num_bytes,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask,
|
||||
const BrotliEncoderParams* params,
|
||||
const size_t max_backward_limit,
|
||||
const int* dist_cache,
|
||||
const ZopfliCostModel* model,
|
||||
const uint32_t* num_matches,
|
||||
const BackwardMatch* matches,
|
||||
ZopfliNode* nodes) {
|
||||
const size_t max_zopfli_len = MaxZopfliLen(params);
|
||||
StartPosQueue queue;
|
||||
size_t cur_match_pos = 0;
|
||||
size_t i;
|
||||
nodes[0].length = 0;
|
||||
nodes[0].u.cost = 0;
|
||||
InitStartPosQueue(&queue);
|
||||
for (i = 0; i + 3 < num_bytes; i++) {
|
||||
size_t skip = UpdateNodes(num_bytes, position, i, ringbuffer,
|
||||
ringbuffer_mask, params, max_backward_limit, dist_cache,
|
||||
num_matches[i], &matches[cur_match_pos], model, &queue, nodes);
|
||||
if (skip < BROTLI_LONG_COPY_QUICK_STEP) skip = 0;
|
||||
cur_match_pos += num_matches[i];
|
||||
if (num_matches[i] == 1 &&
|
||||
BackwardMatchLength(&matches[cur_match_pos - 1]) > max_zopfli_len) {
|
||||
skip = BROTLI_MAX(size_t,
|
||||
BackwardMatchLength(&matches[cur_match_pos - 1]), skip);
|
||||
}
|
||||
if (skip > 1) {
|
||||
skip--;
|
||||
while (skip) {
|
||||
i++;
|
||||
if (i + 3 >= num_bytes) break;
|
||||
EvaluateNode(
|
||||
position, i, max_backward_limit, dist_cache, model, &queue, nodes);
|
||||
cur_match_pos += num_matches[i];
|
||||
skip--;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ComputeShortestPathFromNodes(num_bytes, nodes);
|
||||
}
|
||||
|
||||
/* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */
|
||||
size_t BrotliZopfliComputeShortestPath(MemoryManager* m,
|
||||
const BrotliDictionary* dictionary,
|
||||
size_t num_bytes,
|
||||
size_t position,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t ringbuffer_mask,
|
||||
const BrotliEncoderParams* params,
|
||||
const size_t max_backward_limit,
|
||||
const int* dist_cache,
|
||||
HasherHandle hasher,
|
||||
ZopfliNode* nodes) {
|
||||
const size_t max_zopfli_len = MaxZopfliLen(params);
|
||||
ZopfliCostModel model;
|
||||
StartPosQueue queue;
|
||||
BackwardMatch matches[MAX_NUM_MATCHES_H10];
|
||||
const size_t store_end = num_bytes >= StoreLookaheadH10() ?
|
||||
position + num_bytes - StoreLookaheadH10() + 1 : position;
|
||||
size_t i;
|
||||
nodes[0].length = 0;
|
||||
nodes[0].u.cost = 0;
|
||||
InitZopfliCostModel(m, &model, num_bytes);
|
||||
if (BROTLI_IS_OOM(m)) return 0;
|
||||
ZopfliCostModelSetFromLiteralCosts(
|
||||
&model, position, ringbuffer, ringbuffer_mask);
|
||||
InitStartPosQueue(&queue);
|
||||
for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; i++) {
|
||||
const size_t pos = position + i;
|
||||
const size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit);
|
||||
size_t num_matches = FindAllMatchesH10(hasher, dictionary, ringbuffer,
|
||||
ringbuffer_mask, pos, num_bytes - i, max_distance, params, matches);
|
||||
size_t skip;
|
||||
if (num_matches > 0 &&
|
||||
BackwardMatchLength(&matches[num_matches - 1]) > max_zopfli_len) {
|
||||
matches[0] = matches[num_matches - 1];
|
||||
num_matches = 1;
|
||||
}
|
||||
skip = UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
|
||||
params, max_backward_limit, dist_cache, num_matches, matches, &model,
|
||||
&queue, nodes);
|
||||
if (skip < BROTLI_LONG_COPY_QUICK_STEP) skip = 0;
|
||||
if (num_matches == 1 && BackwardMatchLength(&matches[0]) > max_zopfli_len) {
|
||||
skip = BROTLI_MAX(size_t, BackwardMatchLength(&matches[0]), skip);
|
||||
}
|
||||
if (skip > 1) {
|
||||
/* Add the tail of the copy to the hasher. */
|
||||
StoreRangeH10(hasher, ringbuffer, ringbuffer_mask, pos + 1, BROTLI_MIN(
|
||||
size_t, pos + skip, store_end));
|
||||
skip--;
|
||||
while (skip) {
|
||||
i++;
|
||||
if (i + HashTypeLengthH10() - 1 >= num_bytes) break;
|
||||
EvaluateNode(
|
||||
position, i, max_backward_limit, dist_cache, &model, &queue, nodes);
|
||||
skip--;
|
||||
}
|
||||
}
|
||||
}
|
||||
CleanupZopfliCostModel(m, &model);
|
||||
return ComputeShortestPathFromNodes(num_bytes, nodes);
|
||||
}
|
||||
|
||||
void BrotliCreateZopfliBackwardReferences(
|
||||
MemoryManager* m, const BrotliDictionary* dictionary, size_t num_bytes,
|
||||
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
||||
const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache,
|
||||
size_t* last_insert_len, Command* commands, size_t* num_commands,
|
||||
size_t* num_literals) {
|
||||
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
|
||||
ZopfliNode* nodes;
|
||||
nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
BrotliInitZopfliNodes(nodes, num_bytes + 1);
|
||||
*num_commands += BrotliZopfliComputeShortestPath(m, dictionary, num_bytes,
|
||||
position, ringbuffer, ringbuffer_mask, params, max_backward_limit,
|
||||
dist_cache, hasher, nodes);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
BrotliZopfliCreateCommands(num_bytes, position, max_backward_limit, nodes,
|
||||
dist_cache, last_insert_len, commands, num_literals);
|
||||
BROTLI_FREE(m, nodes);
|
||||
}
|
||||
|
||||
void BrotliCreateHqZopfliBackwardReferences(
|
||||
MemoryManager* m, const BrotliDictionary* dictionary, size_t num_bytes,
|
||||
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
|
||||
const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache,
|
||||
size_t* last_insert_len, Command* commands, size_t* num_commands,
|
||||
size_t* num_literals) {
|
||||
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
|
||||
uint32_t* num_matches = BROTLI_ALLOC(m, uint32_t, num_bytes);
|
||||
size_t matches_size = 4 * num_bytes;
|
||||
const size_t store_end = num_bytes >= StoreLookaheadH10() ?
|
||||
position + num_bytes - StoreLookaheadH10() + 1 : position;
|
||||
size_t cur_match_pos = 0;
|
||||
size_t i;
|
||||
size_t orig_num_literals;
|
||||
size_t orig_last_insert_len;
|
||||
int orig_dist_cache[4];
|
||||
size_t orig_num_commands;
|
||||
ZopfliCostModel model;
|
||||
ZopfliNode* nodes;
|
||||
BackwardMatch* matches = BROTLI_ALLOC(m, BackwardMatch, matches_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; ++i) {
|
||||
const size_t pos = position + i;
|
||||
size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit);
|
||||
size_t max_length = num_bytes - i;
|
||||
size_t num_found_matches;
|
||||
size_t cur_match_end;
|
||||
size_t j;
|
||||
/* Ensure that we have enough free slots. */
|
||||
BROTLI_ENSURE_CAPACITY(m, BackwardMatch, matches, matches_size,
|
||||
cur_match_pos + MAX_NUM_MATCHES_H10);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
num_found_matches = FindAllMatchesH10(hasher, dictionary, ringbuffer,
|
||||
ringbuffer_mask, pos, max_length, max_distance, params,
|
||||
&matches[cur_match_pos]);
|
||||
cur_match_end = cur_match_pos + num_found_matches;
|
||||
for (j = cur_match_pos; j + 1 < cur_match_end; ++j) {
|
||||
assert(BackwardMatchLength(&matches[j]) <
|
||||
BackwardMatchLength(&matches[j + 1]));
|
||||
assert(matches[j].distance > max_distance ||
|
||||
matches[j].distance <= matches[j + 1].distance);
|
||||
}
|
||||
num_matches[i] = (uint32_t)num_found_matches;
|
||||
if (num_found_matches > 0) {
|
||||
const size_t match_len = BackwardMatchLength(&matches[cur_match_end - 1]);
|
||||
if (match_len > MAX_ZOPFLI_LEN_QUALITY_11) {
|
||||
const size_t skip = match_len - 1;
|
||||
matches[cur_match_pos++] = matches[cur_match_end - 1];
|
||||
num_matches[i] = 1;
|
||||
/* Add the tail of the copy to the hasher. */
|
||||
StoreRangeH10(hasher, ringbuffer, ringbuffer_mask, pos + 1,
|
||||
BROTLI_MIN(size_t, pos + match_len, store_end));
|
||||
memset(&num_matches[i + 1], 0, skip * sizeof(num_matches[0]));
|
||||
i += skip;
|
||||
} else {
|
||||
cur_match_pos = cur_match_end;
|
||||
}
|
||||
}
|
||||
}
|
||||
orig_num_literals = *num_literals;
|
||||
orig_last_insert_len = *last_insert_len;
|
||||
memcpy(orig_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
|
||||
orig_num_commands = *num_commands;
|
||||
nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
InitZopfliCostModel(m, &model, num_bytes);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
for (i = 0; i < 2; i++) {
|
||||
BrotliInitZopfliNodes(nodes, num_bytes + 1);
|
||||
if (i == 0) {
|
||||
ZopfliCostModelSetFromLiteralCosts(
|
||||
&model, position, ringbuffer, ringbuffer_mask);
|
||||
} else {
|
||||
ZopfliCostModelSetFromCommands(&model, position, ringbuffer,
|
||||
ringbuffer_mask, commands, *num_commands - orig_num_commands,
|
||||
orig_last_insert_len);
|
||||
}
|
||||
*num_commands = orig_num_commands;
|
||||
*num_literals = orig_num_literals;
|
||||
*last_insert_len = orig_last_insert_len;
|
||||
memcpy(dist_cache, orig_dist_cache, 4 * sizeof(dist_cache[0]));
|
||||
*num_commands += ZopfliIterate(num_bytes, position, ringbuffer,
|
||||
ringbuffer_mask, params, max_backward_limit, dist_cache,
|
||||
&model, num_matches, matches, nodes);
|
||||
BrotliZopfliCreateCommands(num_bytes, position, max_backward_limit,
|
||||
nodes, dist_cache, last_insert_len, commands, num_literals);
|
||||
}
|
||||
CleanupZopfliCostModel(m, &model);
|
||||
BROTLI_FREE(m, nodes);
|
||||
BROTLI_FREE(m, matches);
|
||||
BROTLI_FREE(m, num_matches);
|
||||
}
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
#include "../common/constants.h"
|
||||
#include "../common/dictionary.h"
|
||||
#include <brotli/types.h>
|
||||
#include "../types.h"
|
||||
#include "./command.h"
|
||||
#include "./hash.h"
|
||||
#include "./memory.h"
|
||||
|
||||
@@ -1,35 +0,0 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Functions to estimate the bit cost of Huffman trees. */
|
||||
|
||||
#include "./bit_cost.h"
|
||||
|
||||
#include "../common/constants.h"
|
||||
#include <brotli/types.h>
|
||||
#include "./fast_log.h"
|
||||
#include "./histogram.h"
|
||||
#include "./port.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define FN(X) X ## Literal
|
||||
#include "./bit_cost_inc.h" /* NOLINT(build/include) */
|
||||
#undef FN
|
||||
|
||||
#define FN(X) X ## Command
|
||||
#include "./bit_cost_inc.h" /* NOLINT(build/include) */
|
||||
#undef FN
|
||||
|
||||
#define FN(X) X ## Distance
|
||||
#include "./bit_cost_inc.h" /* NOLINT(build/include) */
|
||||
#undef FN
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
@@ -9,7 +9,7 @@
|
||||
#ifndef BROTLI_ENC_BIT_COST_H_
|
||||
#define BROTLI_ENC_BIT_COST_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
#include "../types.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./histogram.h"
|
||||
#include "./port.h"
|
||||
|
||||
@@ -1,197 +0,0 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Block split point selection utilities. */
|
||||
|
||||
#include "./block_splitter.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h> /* memcpy, memset */
|
||||
|
||||
#include "./bit_cost.h"
|
||||
#include "./cluster.h"
|
||||
#include "./command.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./histogram.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
#include "./quality.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static const size_t kMaxLiteralHistograms = 100;
|
||||
static const size_t kMaxCommandHistograms = 50;
|
||||
static const double kLiteralBlockSwitchCost = 28.1;
|
||||
static const double kCommandBlockSwitchCost = 13.5;
|
||||
static const double kDistanceBlockSwitchCost = 14.6;
|
||||
static const size_t kLiteralStrideLength = 70;
|
||||
static const size_t kCommandStrideLength = 40;
|
||||
static const size_t kSymbolsPerLiteralHistogram = 544;
|
||||
static const size_t kSymbolsPerCommandHistogram = 530;
|
||||
static const size_t kSymbolsPerDistanceHistogram = 544;
|
||||
static const size_t kMinLengthForBlockSplitting = 128;
|
||||
static const size_t kIterMulForRefining = 2;
|
||||
static const size_t kMinItersForRefining = 100;
|
||||
|
||||
static size_t CountLiterals(const Command* cmds, const size_t num_commands) {
|
||||
/* Count how many we have. */
|
||||
size_t total_length = 0;
|
||||
size_t i;
|
||||
for (i = 0; i < num_commands; ++i) {
|
||||
total_length += cmds[i].insert_len_;
|
||||
}
|
||||
return total_length;
|
||||
}
|
||||
|
||||
static void CopyLiteralsToByteArray(const Command* cmds,
|
||||
const size_t num_commands,
|
||||
const uint8_t* data,
|
||||
const size_t offset,
|
||||
const size_t mask,
|
||||
uint8_t* literals) {
|
||||
size_t pos = 0;
|
||||
size_t from_pos = offset & mask;
|
||||
size_t i;
|
||||
for (i = 0; i < num_commands; ++i) {
|
||||
size_t insert_len = cmds[i].insert_len_;
|
||||
if (from_pos + insert_len > mask) {
|
||||
size_t head_size = mask + 1 - from_pos;
|
||||
memcpy(literals + pos, data + from_pos, head_size);
|
||||
from_pos = 0;
|
||||
pos += head_size;
|
||||
insert_len -= head_size;
|
||||
}
|
||||
if (insert_len > 0) {
|
||||
memcpy(literals + pos, data + from_pos, insert_len);
|
||||
pos += insert_len;
|
||||
}
|
||||
from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask;
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE unsigned int MyRand(unsigned int* seed) {
|
||||
*seed *= 16807U;
|
||||
if (*seed == 0) {
|
||||
*seed = 1;
|
||||
}
|
||||
return *seed;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE double BitCost(size_t count) {
|
||||
return count == 0 ? -2.0 : FastLog2(count);
|
||||
}
|
||||
|
||||
#define HISTOGRAMS_PER_BATCH 64
|
||||
#define CLUSTERS_PER_BATCH 16
|
||||
|
||||
#define FN(X) X ## Literal
|
||||
#define DataType uint8_t
|
||||
/* NOLINTNEXTLINE(build/include) */
|
||||
#include "./block_splitter_inc.h"
|
||||
#undef DataType
|
||||
#undef FN
|
||||
|
||||
#define FN(X) X ## Command
|
||||
#define DataType uint16_t
|
||||
/* NOLINTNEXTLINE(build/include) */
|
||||
#include "./block_splitter_inc.h"
|
||||
#undef FN
|
||||
|
||||
#define FN(X) X ## Distance
|
||||
/* NOLINTNEXTLINE(build/include) */
|
||||
#include "./block_splitter_inc.h"
|
||||
#undef DataType
|
||||
#undef FN
|
||||
|
||||
void BrotliInitBlockSplit(BlockSplit* self) {
|
||||
self->num_types = 0;
|
||||
self->num_blocks = 0;
|
||||
self->types = 0;
|
||||
self->lengths = 0;
|
||||
self->types_alloc_size = 0;
|
||||
self->lengths_alloc_size = 0;
|
||||
}
|
||||
|
||||
void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) {
|
||||
BROTLI_FREE(m, self->types);
|
||||
BROTLI_FREE(m, self->lengths);
|
||||
}
|
||||
|
||||
void BrotliSplitBlock(MemoryManager* m,
|
||||
const Command* cmds,
|
||||
const size_t num_commands,
|
||||
const uint8_t* data,
|
||||
const size_t pos,
|
||||
const size_t mask,
|
||||
const BrotliEncoderParams* params,
|
||||
BlockSplit* literal_split,
|
||||
BlockSplit* insert_and_copy_split,
|
||||
BlockSplit* dist_split) {
|
||||
{
|
||||
size_t literals_count = CountLiterals(cmds, num_commands);
|
||||
uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
/* Create a continuous array of literals. */
|
||||
CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals);
|
||||
/* Create the block split on the array of literals.
|
||||
Literal histograms have alphabet size 256. */
|
||||
SplitByteVectorLiteral(
|
||||
m, literals, literals_count,
|
||||
kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
|
||||
kLiteralStrideLength, kLiteralBlockSwitchCost, params,
|
||||
literal_split);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
BROTLI_FREE(m, literals);
|
||||
}
|
||||
|
||||
{
|
||||
/* Compute prefix codes for commands. */
|
||||
uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands);
|
||||
size_t i;
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
for (i = 0; i < num_commands; ++i) {
|
||||
insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
|
||||
}
|
||||
/* Create the block split on the array of command prefixes. */
|
||||
SplitByteVectorCommand(
|
||||
m, insert_and_copy_codes, num_commands,
|
||||
kSymbolsPerCommandHistogram, kMaxCommandHistograms,
|
||||
kCommandStrideLength, kCommandBlockSwitchCost, params,
|
||||
insert_and_copy_split);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
/* TODO: reuse for distances? */
|
||||
BROTLI_FREE(m, insert_and_copy_codes);
|
||||
}
|
||||
|
||||
{
|
||||
/* Create a continuous array of distance prefixes. */
|
||||
uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands);
|
||||
size_t j = 0;
|
||||
size_t i;
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
for (i = 0; i < num_commands; ++i) {
|
||||
const Command* cmd = &cmds[i];
|
||||
if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
|
||||
distance_prefixes[j++] = cmd->dist_prefix_;
|
||||
}
|
||||
}
|
||||
/* Create the block split on the array of distance prefixes. */
|
||||
SplitByteVectorDistance(
|
||||
m, distance_prefixes, j,
|
||||
kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
|
||||
kCommandStrideLength, kDistanceBlockSwitchCost, params,
|
||||
dist_split);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
BROTLI_FREE(m, distance_prefixes);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
@@ -9,7 +9,7 @@
|
||||
#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
|
||||
#define BROTLI_ENC_BLOCK_SPLITTER_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
#include "../types.h"
|
||||
#include "./command.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -16,7 +16,7 @@
|
||||
#ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_
|
||||
#define BROTLI_ENC_BROTLI_BIT_STREAM_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
#include "../types.h"
|
||||
#include "./command.h"
|
||||
#include "./context.h"
|
||||
#include "./entropy_encode.h"
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Functions for clustering similar histograms together. */
|
||||
|
||||
#include "./cluster.h"
|
||||
|
||||
#include <brotli/types.h>
|
||||
#include "./bit_cost.h" /* BrotliPopulationCost */
|
||||
#include "./fast_log.h"
|
||||
#include "./histogram.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static BROTLI_INLINE BROTLI_BOOL HistogramPairIsLess(
|
||||
const HistogramPair* p1, const HistogramPair* p2) {
|
||||
if (p1->cost_diff != p2->cost_diff) {
|
||||
return TO_BROTLI_BOOL(p1->cost_diff > p2->cost_diff);
|
||||
}
|
||||
return TO_BROTLI_BOOL((p1->idx2 - p1->idx1) > (p2->idx2 - p2->idx1));
|
||||
}
|
||||
|
||||
/* Returns entropy reduction of the context map when we combine two clusters. */
|
||||
static BROTLI_INLINE double ClusterCostDiff(size_t size_a, size_t size_b) {
|
||||
size_t size_c = size_a + size_b;
|
||||
return (double)size_a * FastLog2(size_a) +
|
||||
(double)size_b * FastLog2(size_b) -
|
||||
(double)size_c * FastLog2(size_c);
|
||||
}
|
||||
|
||||
#define CODE(X) X
|
||||
|
||||
#define FN(X) X ## Literal
|
||||
#include "./cluster_inc.h" /* NOLINT(build/include) */
|
||||
#undef FN
|
||||
|
||||
#define FN(X) X ## Command
|
||||
#include "./cluster_inc.h" /* NOLINT(build/include) */
|
||||
#undef FN
|
||||
|
||||
#define FN(X) X ## Distance
|
||||
#include "./cluster_inc.h" /* NOLINT(build/include) */
|
||||
#undef FN
|
||||
|
||||
#undef CODE
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
@@ -9,7 +9,7 @@
|
||||
#ifndef BROTLI_ENC_CLUSTER_H_
|
||||
#define BROTLI_ENC_CLUSTER_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
#include "../types.h"
|
||||
#include "./histogram.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
|
||||
@@ -10,8 +10,8 @@
|
||||
#define BROTLI_ENC_COMMAND_H_
|
||||
|
||||
#include "../common/constants.h"
|
||||
#include <brotli/port.h>
|
||||
#include <brotli/types.h>
|
||||
#include "../port.h"
|
||||
#include "../types.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./prefix.h"
|
||||
|
||||
|
||||
@@ -1,791 +0,0 @@
|
||||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Function for fast encoding of an input fragment, independently from the input
|
||||
history. This function uses one-pass processing: when we find a backward
|
||||
match, we immediately emit the corresponding command and literal codes to
|
||||
the bit stream.
|
||||
|
||||
Adapted from the CompressFragment() function in
|
||||
https://github.com/google/snappy/blob/master/snappy.cc */
|
||||
|
||||
#include "./compress_fragment.h"
|
||||
|
||||
#include <string.h> /* memcmp, memcpy, memset */
|
||||
|
||||
#include "../common/constants.h"
|
||||
#include <brotli/types.h>
|
||||
#include "./brotli_bit_stream.h"
|
||||
#include "./entropy_encode.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./find_match_length.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
#include "./write_bits.h"
|
||||
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
|
||||
|
||||
/* kHashMul32 multiplier has these properties:
|
||||
* The multiplier must be odd. Otherwise we may lose the highest bit.
|
||||
* No long streaks of ones or zeros.
|
||||
* There is no effort to ensure that it is a prime, the oddity is enough
|
||||
for this use.
|
||||
* The number has been tuned heuristically against compression benchmarks. */
|
||||
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
||||
|
||||
static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
|
||||
const uint64_t h = (BROTLI_UNALIGNED_LOAD64(p) << 24) * kHashMul32;
|
||||
return (uint32_t)(h >> shift);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t HashBytesAtOffset(
|
||||
uint64_t v, int offset, size_t shift) {
|
||||
assert(offset >= 0);
|
||||
assert(offset <= 3);
|
||||
{
|
||||
const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
|
||||
return (uint32_t)(h >> shift);
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) {
|
||||
return TO_BROTLI_BOOL(
|
||||
BROTLI_UNALIGNED_LOAD32(p1) == BROTLI_UNALIGNED_LOAD32(p2) &&
|
||||
p1[4] == p2[4]);
|
||||
}
|
||||
|
||||
/* Builds a literal prefix code into "depths" and "bits" based on the statistics
|
||||
of the "input" string and stores it into the bit stream.
|
||||
Note that the prefix code here is built from the pre-LZ77 input, therefore
|
||||
we can only approximate the statistics of the actual literal stream.
|
||||
Moreover, for long inputs we build a histogram from a sample of the input
|
||||
and thus have to assign a non-zero depth for each literal.
|
||||
Returns estimated compression ratio millibytes/char for encoding given input
|
||||
with generated code. */
|
||||
static size_t BuildAndStoreLiteralPrefixCode(MemoryManager* m,
|
||||
const uint8_t* input,
|
||||
const size_t input_size,
|
||||
uint8_t depths[256],
|
||||
uint16_t bits[256],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
uint32_t histogram[256] = { 0 };
|
||||
size_t histogram_total;
|
||||
size_t i;
|
||||
if (input_size < (1 << 15)) {
|
||||
for (i = 0; i < input_size; ++i) {
|
||||
++histogram[input[i]];
|
||||
}
|
||||
histogram_total = input_size;
|
||||
for (i = 0; i < 256; ++i) {
|
||||
/* We weigh the first 11 samples with weight 3 to account for the
|
||||
balancing effect of the LZ77 phase on the histogram. */
|
||||
const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
|
||||
histogram[i] += adjust;
|
||||
histogram_total += adjust;
|
||||
}
|
||||
} else {
|
||||
static const size_t kSampleRate = 29;
|
||||
for (i = 0; i < input_size; i += kSampleRate) {
|
||||
++histogram[input[i]];
|
||||
}
|
||||
histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
|
||||
for (i = 0; i < 256; ++i) {
|
||||
/* We add 1 to each population count to avoid 0 bit depths (since this is
|
||||
only a sample and we don't know if the symbol appears or not), and we
|
||||
weigh the first 11 samples with weight 3 to account for the balancing
|
||||
effect of the LZ77 phase on the histogram (more frequent symbols are
|
||||
more likely to be in backward references instead as literals). */
|
||||
const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
|
||||
histogram[i] += adjust;
|
||||
histogram_total += adjust;
|
||||
}
|
||||
}
|
||||
BrotliBuildAndStoreHuffmanTreeFast(m, histogram, histogram_total,
|
||||
/* max_bits = */ 8,
|
||||
depths, bits, storage_ix, storage);
|
||||
if (BROTLI_IS_OOM(m)) return 0;
|
||||
{
|
||||
size_t literal_ratio = 0;
|
||||
for (i = 0; i < 256; ++i) {
|
||||
if (histogram[i]) literal_ratio += histogram[i] * depths[i];
|
||||
}
|
||||
/* Estimated encoding ratio, millibytes per symbol. */
|
||||
return (literal_ratio * 125) / histogram_total;
|
||||
}
|
||||
}
|
||||
|
||||
/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
|
||||
"bits" based on "histogram" and stores it into the bit stream. */
|
||||
static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
|
||||
uint8_t depth[128], uint16_t bits[128], size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
/* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
|
||||
HuffmanTree tree[129];
|
||||
uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
|
||||
uint16_t cmd_bits[64];
|
||||
|
||||
BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
|
||||
BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
|
||||
/* We have to jump through a few hoops here in order to compute
|
||||
the command bits because the symbols are in a different order than in
|
||||
the full alphabet. This looks complicated, but having the symbols
|
||||
in this order in the command bits saves a few branches in the Emit*
|
||||
functions. */
|
||||
memcpy(cmd_depth, depth, 24);
|
||||
memcpy(cmd_depth + 24, depth + 40, 8);
|
||||
memcpy(cmd_depth + 32, depth + 24, 8);
|
||||
memcpy(cmd_depth + 40, depth + 48, 8);
|
||||
memcpy(cmd_depth + 48, depth + 32, 8);
|
||||
memcpy(cmd_depth + 56, depth + 56, 8);
|
||||
BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
|
||||
memcpy(bits, cmd_bits, 48);
|
||||
memcpy(bits + 24, cmd_bits + 32, 16);
|
||||
memcpy(bits + 32, cmd_bits + 48, 16);
|
||||
memcpy(bits + 40, cmd_bits + 24, 16);
|
||||
memcpy(bits + 48, cmd_bits + 40, 16);
|
||||
memcpy(bits + 56, cmd_bits + 56, 16);
|
||||
BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
|
||||
{
|
||||
/* Create the bit length array for the full command alphabet. */
|
||||
size_t i;
|
||||
memset(cmd_depth, 0, 64); /* only 64 first values were used */
|
||||
memcpy(cmd_depth, depth, 8);
|
||||
memcpy(cmd_depth + 64, depth + 8, 8);
|
||||
memcpy(cmd_depth + 128, depth + 16, 8);
|
||||
memcpy(cmd_depth + 192, depth + 24, 8);
|
||||
memcpy(cmd_depth + 384, depth + 32, 8);
|
||||
for (i = 0; i < 8; ++i) {
|
||||
cmd_depth[128 + 8 * i] = depth[40 + i];
|
||||
cmd_depth[256 + 8 * i] = depth[48 + i];
|
||||
cmd_depth[448 + 8 * i] = depth[56 + i];
|
||||
}
|
||||
BrotliStoreHuffmanTree(
|
||||
cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
|
||||
}
|
||||
BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
|
||||
}
|
||||
|
||||
/* REQUIRES: insertlen < 6210 */
|
||||
static BROTLI_INLINE void EmitInsertLen(size_t insertlen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
if (insertlen < 6) {
|
||||
const size_t code = insertlen + 40;
|
||||
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
++histo[code];
|
||||
} else if (insertlen < 130) {
|
||||
const size_t tail = insertlen - 2;
|
||||
const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
|
||||
const size_t prefix = tail >> nbits;
|
||||
const size_t inscode = (nbits << 1) + prefix + 42;
|
||||
BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage);
|
||||
BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
|
||||
++histo[inscode];
|
||||
} else if (insertlen < 2114) {
|
||||
const size_t tail = insertlen - 66;
|
||||
const uint32_t nbits = Log2FloorNonZero(tail);
|
||||
const size_t code = nbits + 50;
|
||||
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
|
||||
++histo[code];
|
||||
} else {
|
||||
BrotliWriteBits(depth[61], bits[61], storage_ix, storage);
|
||||
BrotliWriteBits(12, insertlen - 2114, storage_ix, storage);
|
||||
++histo[21];
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
if (insertlen < 22594) {
|
||||
BrotliWriteBits(depth[62], bits[62], storage_ix, storage);
|
||||
BrotliWriteBits(14, insertlen - 6210, storage_ix, storage);
|
||||
++histo[22];
|
||||
} else {
|
||||
BrotliWriteBits(depth[63], bits[63], storage_ix, storage);
|
||||
BrotliWriteBits(24, insertlen - 22594, storage_ix, storage);
|
||||
++histo[23];
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void EmitCopyLen(size_t copylen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
if (copylen < 10) {
|
||||
BrotliWriteBits(
|
||||
depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
|
||||
++histo[copylen + 14];
|
||||
} else if (copylen < 134) {
|
||||
const size_t tail = copylen - 6;
|
||||
const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
|
||||
const size_t prefix = tail >> nbits;
|
||||
const size_t code = (nbits << 1) + prefix + 20;
|
||||
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
|
||||
++histo[code];
|
||||
} else if (copylen < 2118) {
|
||||
const size_t tail = copylen - 70;
|
||||
const uint32_t nbits = Log2FloorNonZero(tail);
|
||||
const size_t code = nbits + 28;
|
||||
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
|
||||
++histo[code];
|
||||
} else {
|
||||
BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
|
||||
BrotliWriteBits(24, copylen - 2118, storage_ix, storage);
|
||||
++histo[47];
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
if (copylen < 12) {
|
||||
BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
|
||||
++histo[copylen - 4];
|
||||
} else if (copylen < 72) {
|
||||
const size_t tail = copylen - 8;
|
||||
const uint32_t nbits = Log2FloorNonZero(tail) - 1;
|
||||
const size_t prefix = tail >> nbits;
|
||||
const size_t code = (nbits << 1) + prefix + 4;
|
||||
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
|
||||
++histo[code];
|
||||
} else if (copylen < 136) {
|
||||
const size_t tail = copylen - 8;
|
||||
const size_t code = (tail >> 5) + 30;
|
||||
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
BrotliWriteBits(5, tail & 31, storage_ix, storage);
|
||||
BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
|
||||
++histo[code];
|
||||
++histo[64];
|
||||
} else if (copylen < 2120) {
|
||||
const size_t tail = copylen - 72;
|
||||
const uint32_t nbits = Log2FloorNonZero(tail);
|
||||
const size_t code = nbits + 28;
|
||||
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
|
||||
BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
|
||||
BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
|
||||
++histo[code];
|
||||
++histo[64];
|
||||
} else {
|
||||
BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
|
||||
BrotliWriteBits(24, copylen - 2120, storage_ix, storage);
|
||||
BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
|
||||
++histo[47];
|
||||
++histo[64];
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void EmitDistance(size_t distance,
|
||||
const uint8_t depth[128],
|
||||
const uint16_t bits[128],
|
||||
uint32_t histo[128],
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
const size_t d = distance + 3;
|
||||
const uint32_t nbits = Log2FloorNonZero(d) - 1u;
|
||||
const size_t prefix = (d >> nbits) & 1;
|
||||
const size_t offset = (2 + prefix) << nbits;
|
||||
const size_t distcode = 2 * (nbits - 1) + prefix + 80;
|
||||
BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage);
|
||||
BrotliWriteBits(nbits, d - offset, storage_ix, storage);
|
||||
++histo[distcode];
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len,
|
||||
const uint8_t depth[256],
|
||||
const uint16_t bits[256],
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
size_t j;
|
||||
for (j = 0; j < len; j++) {
|
||||
const uint8_t lit = input[j];
|
||||
BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage);
|
||||
}
|
||||
}
|
||||
|
||||
/* REQUIRES: len <= 1 << 24. */
|
||||
static void BrotliStoreMetaBlockHeader(
|
||||
size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
size_t nibbles = 6;
|
||||
/* ISLAST */
|
||||
BrotliWriteBits(1, 0, storage_ix, storage);
|
||||
if (len <= (1U << 16)) {
|
||||
nibbles = 4;
|
||||
} else if (len <= (1U << 20)) {
|
||||
nibbles = 5;
|
||||
}
|
||||
BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
|
||||
BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
|
||||
/* ISUNCOMPRESSED */
|
||||
BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
|
||||
}
|
||||
|
||||
static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos,
|
||||
uint8_t *array) {
|
||||
while (n_bits > 0) {
|
||||
size_t byte_pos = pos >> 3;
|
||||
size_t n_unchanged_bits = pos & 7;
|
||||
size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits);
|
||||
size_t total_bits = n_unchanged_bits + n_changed_bits;
|
||||
uint32_t mask =
|
||||
(~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u);
|
||||
uint32_t unchanged_bits = array[byte_pos] & mask;
|
||||
uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u);
|
||||
array[byte_pos] =
|
||||
(uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits);
|
||||
n_bits -= n_changed_bits;
|
||||
bits >>= n_changed_bits;
|
||||
pos += n_changed_bits;
|
||||
}
|
||||
}
|
||||
|
||||
static void RewindBitPosition(const size_t new_storage_ix,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
const size_t bitpos = new_storage_ix & 7;
|
||||
const size_t mask = (1u << bitpos) - 1;
|
||||
storage[new_storage_ix >> 3] &= (uint8_t)mask;
|
||||
*storage_ix = new_storage_ix;
|
||||
}
|
||||
|
||||
static BROTLI_BOOL ShouldMergeBlock(
|
||||
const uint8_t* data, size_t len, const uint8_t* depths) {
|
||||
size_t histo[256] = { 0 };
|
||||
static const size_t kSampleRate = 43;
|
||||
size_t i;
|
||||
for (i = 0; i < len; i += kSampleRate) {
|
||||
++histo[data[i]];
|
||||
}
|
||||
{
|
||||
const size_t total = (len + kSampleRate - 1) / kSampleRate;
|
||||
double r = (FastLog2(total) + 0.5) * (double)total + 200;
|
||||
for (i = 0; i < 256; ++i) {
|
||||
r -= (double)histo[i] * (depths[i] + FastLog2(histo[i]));
|
||||
}
|
||||
return TO_BROTLI_BOOL(r >= 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
/* Acceptable loss for uncompressible speedup is 2% */
|
||||
#define MIN_RATIO 980
|
||||
|
||||
static BROTLI_INLINE BROTLI_BOOL ShouldUseUncompressedMode(
|
||||
const uint8_t* metablock_start, const uint8_t* next_emit,
|
||||
const size_t insertlen, const size_t literal_ratio) {
|
||||
const size_t compressed = (size_t)(next_emit - metablock_start);
|
||||
if (compressed * 50 > insertlen) {
|
||||
return BROTLI_FALSE;
|
||||
} else {
|
||||
return TO_BROTLI_BOOL(literal_ratio > MIN_RATIO);
|
||||
}
|
||||
}
|
||||
|
||||
static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
|
||||
const size_t storage_ix_start,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
const size_t len = (size_t)(end - begin);
|
||||
RewindBitPosition(storage_ix_start, storage_ix, storage);
|
||||
BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage);
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
memcpy(&storage[*storage_ix >> 3], begin, len);
|
||||
*storage_ix += len << 3;
|
||||
storage[*storage_ix >> 3] = 0;
|
||||
}
|
||||
|
||||
static uint32_t kCmdHistoSeed[128] = {
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
static BROTLI_INLINE void BrotliCompressFragmentFastImpl(
|
||||
MemoryManager* m, const uint8_t* input, size_t input_size,
|
||||
BROTLI_BOOL is_last, int* table, size_t table_bits, uint8_t cmd_depth[128],
|
||||
uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
uint32_t cmd_histo[128];
|
||||
const uint8_t* ip_end;
|
||||
|
||||
/* "next_emit" is a pointer to the first byte that is not covered by a
|
||||
previous copy. Bytes between "next_emit" and the start of the next copy or
|
||||
the end of the input will be emitted as literal bytes. */
|
||||
const uint8_t* next_emit = input;
|
||||
/* Save the start of the first block for position and distance computations.
|
||||
*/
|
||||
const uint8_t* base_ip = input;
|
||||
|
||||
static const size_t kFirstBlockSize = 3 << 15;
|
||||
static const size_t kMergeBlockSize = 1 << 16;
|
||||
|
||||
const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
|
||||
const size_t kMinMatchLen = 5;
|
||||
|
||||
const uint8_t* metablock_start = input;
|
||||
size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
|
||||
size_t total_block_size = block_size;
|
||||
/* Save the bit position of the MLEN field of the meta-block header, so that
|
||||
we can update it later if we decide to extend this meta-block. */
|
||||
size_t mlen_storage_ix = *storage_ix + 3;
|
||||
|
||||
uint8_t lit_depth[256];
|
||||
uint16_t lit_bits[256];
|
||||
|
||||
size_t literal_ratio;
|
||||
|
||||
const uint8_t* ip;
|
||||
int last_distance;
|
||||
|
||||
const size_t shift = 64u - table_bits;
|
||||
|
||||
BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
|
||||
/* No block splits, no contexts. */
|
||||
BrotliWriteBits(13, 0, storage_ix, storage);
|
||||
|
||||
literal_ratio = BuildAndStoreLiteralPrefixCode(
|
||||
m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
|
||||
{
|
||||
/* Store the pre-compressed command and distance prefix codes. */
|
||||
size_t i;
|
||||
for (i = 0; i + 7 < *cmd_code_numbits; i += 8) {
|
||||
BrotliWriteBits(8, cmd_code[i >> 3], storage_ix, storage);
|
||||
}
|
||||
}
|
||||
BrotliWriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3],
|
||||
storage_ix, storage);
|
||||
|
||||
emit_commands:
|
||||
/* Initialize the command and distance histograms. We will gather
|
||||
statistics of command and distance codes during the processing
|
||||
of this block and use it to update the command and distance
|
||||
prefix codes for the next block. */
|
||||
memcpy(cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed));
|
||||
|
||||
/* "ip" is the input pointer. */
|
||||
ip = input;
|
||||
last_distance = -1;
|
||||
ip_end = input + block_size;
|
||||
|
||||
if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
|
||||
/* For the last block, we need to keep a 16 bytes margin so that we can be
|
||||
sure that all distances are at most window size - 16.
|
||||
For all other blocks, we only need to keep a margin of 5 bytes so that
|
||||
we don't go over the block size with a copy. */
|
||||
const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
|
||||
input_size - kInputMarginBytes);
|
||||
const uint8_t* ip_limit = input + len_limit;
|
||||
|
||||
uint32_t next_hash;
|
||||
for (next_hash = Hash(++ip, shift); ; ) {
|
||||
/* Step 1: Scan forward in the input looking for a 5-byte-long match.
|
||||
If we get close to exhausting the input then goto emit_remainder.
|
||||
|
||||
Heuristic match skipping: If 32 bytes are scanned with no matches
|
||||
found, start looking only at every other byte. If 32 more bytes are
|
||||
scanned, look at every third byte, etc.. When a match is found,
|
||||
immediately go back to looking at every byte. This is a small loss
|
||||
(~5% performance, ~0.1% density) for compressible data due to more
|
||||
bookkeeping, but for non-compressible data (such as JPEG) it's a huge
|
||||
win since the compressor quickly "realizes" the data is incompressible
|
||||
and doesn't bother looking for matches everywhere.
|
||||
|
||||
The "skip" variable keeps track of how many bytes there are since the
|
||||
last match; dividing it by 32 (i.e. right-shifting by five) gives the
|
||||
number of bytes to move ahead for each iteration. */
|
||||
uint32_t skip = 32;
|
||||
|
||||
const uint8_t* next_ip = ip;
|
||||
const uint8_t* candidate;
|
||||
assert(next_emit < ip);
|
||||
trawl:
|
||||
do {
|
||||
uint32_t hash = next_hash;
|
||||
uint32_t bytes_between_hash_lookups = skip++ >> 5;
|
||||
assert(hash == Hash(next_ip, shift));
|
||||
ip = next_ip;
|
||||
next_ip = ip + bytes_between_hash_lookups;
|
||||
if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
next_hash = Hash(next_ip, shift);
|
||||
candidate = ip - last_distance;
|
||||
if (IsMatch(ip, candidate)) {
|
||||
if (BROTLI_PREDICT_TRUE(candidate < ip)) {
|
||||
table[hash] = (int)(ip - base_ip);
|
||||
break;
|
||||
}
|
||||
}
|
||||
candidate = base_ip + table[hash];
|
||||
assert(candidate >= base_ip);
|
||||
assert(candidate < ip);
|
||||
|
||||
table[hash] = (int)(ip - base_ip);
|
||||
} while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate)));
|
||||
|
||||
/* Check copy distance. If candidate is not feasible, continue search.
|
||||
Checking is done outside of hot loop to reduce overhead. */
|
||||
if (ip - candidate > MAX_DISTANCE) goto trawl;
|
||||
|
||||
/* Step 2: Emit the found match together with the literal bytes from
|
||||
"next_emit" to the bit stream, and then see if we can find a next match
|
||||
immediately afterwards. Repeat until we find no match for the input
|
||||
without emitting some literal bytes. */
|
||||
|
||||
{
|
||||
/* We have a 5-byte match at ip, and we need to emit bytes in
|
||||
[next_emit, ip). */
|
||||
const uint8_t* base = ip;
|
||||
size_t matched = 5 + FindMatchLengthWithLimit(
|
||||
candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
|
||||
int distance = (int)(base - candidate); /* > 0 */
|
||||
size_t insert = (size_t)(base - next_emit);
|
||||
ip += matched;
|
||||
assert(0 == memcmp(base, candidate, matched));
|
||||
if (BROTLI_PREDICT_TRUE(insert < 6210)) {
|
||||
EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
} else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
|
||||
literal_ratio)) {
|
||||
EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
|
||||
storage_ix, storage);
|
||||
input_size -= (size_t)(base - input);
|
||||
input = base;
|
||||
next_emit = input;
|
||||
goto next_block;
|
||||
} else {
|
||||
EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
}
|
||||
EmitLiterals(next_emit, insert, lit_depth, lit_bits,
|
||||
storage_ix, storage);
|
||||
if (distance == last_distance) {
|
||||
BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
|
||||
++cmd_histo[64];
|
||||
} else {
|
||||
EmitDistance((size_t)distance, cmd_depth, cmd_bits,
|
||||
cmd_histo, storage_ix, storage);
|
||||
last_distance = distance;
|
||||
}
|
||||
EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
|
||||
next_emit = ip;
|
||||
if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
/* We could immediately start working at ip now, but to improve
|
||||
compression we first update "table" with the hashes of some positions
|
||||
within the last copy. */
|
||||
{
|
||||
uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 3);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 1);
|
||||
|
||||
candidate = base_ip + table[cur_hash];
|
||||
table[cur_hash] = (int)(ip - base_ip);
|
||||
}
|
||||
}
|
||||
|
||||
while (IsMatch(ip, candidate)) {
|
||||
/* We have a 5-byte match at ip, and no need to emit any literal bytes
|
||||
prior to ip. */
|
||||
const uint8_t* base = ip;
|
||||
size_t matched = 5 + FindMatchLengthWithLimit(
|
||||
candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
|
||||
if (ip - candidate > MAX_DISTANCE) break;
|
||||
ip += matched;
|
||||
last_distance = (int)(base - candidate); /* > 0 */
|
||||
assert(0 == memcmp(base, candidate, matched));
|
||||
EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
EmitDistance((size_t)last_distance, cmd_depth, cmd_bits,
|
||||
cmd_histo, storage_ix, storage);
|
||||
|
||||
next_emit = ip;
|
||||
if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
/* We could immediately start working at ip now, but to improve
|
||||
compression we first update "table" with the hashes of some positions
|
||||
within the last copy. */
|
||||
{
|
||||
uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 3);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 1);
|
||||
|
||||
candidate = base_ip + table[cur_hash];
|
||||
table[cur_hash] = (int)(ip - base_ip);
|
||||
}
|
||||
}
|
||||
|
||||
next_hash = Hash(++ip, shift);
|
||||
}
|
||||
}
|
||||
|
||||
emit_remainder:
|
||||
assert(next_emit <= ip_end);
|
||||
input += block_size;
|
||||
input_size -= block_size;
|
||||
block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize);
|
||||
|
||||
/* Decide if we want to continue this meta-block instead of emitting the
|
||||
last insert-only command. */
|
||||
if (input_size > 0 &&
|
||||
total_block_size + block_size <= (1 << 20) &&
|
||||
ShouldMergeBlock(input, block_size, lit_depth)) {
|
||||
assert(total_block_size > (1 << 16));
|
||||
/* Update the size of the current meta-block and continue emitting commands.
|
||||
We can do this because the current size and the new size both have 5
|
||||
nibbles. */
|
||||
total_block_size += block_size;
|
||||
UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage);
|
||||
goto emit_commands;
|
||||
}
|
||||
|
||||
/* Emit the remaining bytes as literals. */
|
||||
if (next_emit < ip_end) {
|
||||
const size_t insert = (size_t)(ip_end - next_emit);
|
||||
if (BROTLI_PREDICT_TRUE(insert < 6210)) {
|
||||
EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
|
||||
} else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
|
||||
literal_ratio)) {
|
||||
EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
|
||||
storage_ix, storage);
|
||||
} else {
|
||||
EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
|
||||
storage_ix, storage);
|
||||
EmitLiterals(next_emit, insert, lit_depth, lit_bits,
|
||||
storage_ix, storage);
|
||||
}
|
||||
}
|
||||
next_emit = ip_end;
|
||||
|
||||
next_block:
|
||||
/* If we have more data, write a new meta-block header and prefix codes and
|
||||
then continue emitting commands. */
|
||||
if (input_size > 0) {
|
||||
metablock_start = input;
|
||||
block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
|
||||
total_block_size = block_size;
|
||||
/* Save the bit position of the MLEN field of the meta-block header, so that
|
||||
we can update it later if we decide to extend this meta-block. */
|
||||
mlen_storage_ix = *storage_ix + 3;
|
||||
BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
|
||||
/* No block splits, no contexts. */
|
||||
BrotliWriteBits(13, 0, storage_ix, storage);
|
||||
literal_ratio = BuildAndStoreLiteralPrefixCode(
|
||||
m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
|
||||
storage_ix, storage);
|
||||
goto emit_commands;
|
||||
}
|
||||
|
||||
if (!is_last) {
|
||||
/* If this is not the last block, update the command and distance prefix
|
||||
codes for the next block and store the compressed forms. */
|
||||
cmd_code[0] = 0;
|
||||
*cmd_code_numbits = 0;
|
||||
BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
|
||||
cmd_code_numbits, cmd_code);
|
||||
}
|
||||
}
|
||||
|
||||
#define FOR_TABLE_BITS_(X) X(9) X(11) X(13) X(15)
|
||||
|
||||
#define BAKE_METHOD_PARAM_(B) \
|
||||
static BROTLI_NOINLINE void BrotliCompressFragmentFastImpl ## B( \
|
||||
MemoryManager* m, const uint8_t* input, size_t input_size, \
|
||||
BROTLI_BOOL is_last, int* table, uint8_t cmd_depth[128], \
|
||||
uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code, \
|
||||
size_t* storage_ix, uint8_t* storage) { \
|
||||
BrotliCompressFragmentFastImpl(m, input, input_size, is_last, table, B, \
|
||||
cmd_depth, cmd_bits, cmd_code_numbits, cmd_code, storage_ix, storage); \
|
||||
}
|
||||
FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
|
||||
#undef BAKE_METHOD_PARAM_
|
||||
|
||||
void BrotliCompressFragmentFast(
|
||||
MemoryManager* m, const uint8_t* input, size_t input_size,
|
||||
BROTLI_BOOL is_last, int* table, size_t table_size, uint8_t cmd_depth[128],
|
||||
uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
const size_t initial_storage_ix = *storage_ix;
|
||||
const size_t table_bits = Log2FloorNonZero(table_size);
|
||||
|
||||
if (input_size == 0) {
|
||||
assert(is_last);
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
return;
|
||||
}
|
||||
|
||||
switch (table_bits) {
|
||||
#define CASE_(B) \
|
||||
case B: \
|
||||
BrotliCompressFragmentFastImpl ## B( \
|
||||
m, input, input_size, is_last, table, cmd_depth, cmd_bits, \
|
||||
cmd_code_numbits, cmd_code, storage_ix, storage); \
|
||||
break;
|
||||
FOR_TABLE_BITS_(CASE_)
|
||||
#undef CASE_
|
||||
default: assert(0); break;
|
||||
}
|
||||
|
||||
/* If output is larger than single uncompressed block, rewrite it. */
|
||||
if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
|
||||
EmitUncompressedMetaBlock(input, input + input_size, initial_storage_ix,
|
||||
storage_ix, storage);
|
||||
}
|
||||
|
||||
if (is_last) {
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
}
|
||||
}
|
||||
|
||||
#undef FOR_TABLE_BITS_
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
@@ -12,7 +12,7 @@
|
||||
#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_H_
|
||||
#define BROTLI_ENC_COMPRESS_FRAGMENT_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
#include "../types.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
|
||||
|
||||
@@ -1,612 +0,0 @@
|
||||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Function for fast encoding of an input fragment, independently from the input
|
||||
history. This function uses two-pass processing: in the first pass we save
|
||||
the found backward matches and literal bytes into a buffer, and in the
|
||||
second pass we emit them into the bit stream using prefix codes built based
|
||||
on the actual command and literal byte histograms. */
|
||||
|
||||
#include "./compress_fragment_two_pass.h"
|
||||
|
||||
#include <string.h> /* memcmp, memcpy, memset */
|
||||
|
||||
#include "../common/constants.h"
|
||||
#include <brotli/types.h>
|
||||
#include "./bit_cost.h"
|
||||
#include "./brotli_bit_stream.h"
|
||||
#include "./entropy_encode.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./find_match_length.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
#include "./write_bits.h"
|
||||
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
|
||||
|
||||
/* kHashMul32 multiplier has these properties:
|
||||
* The multiplier must be odd. Otherwise we may lose the highest bit.
|
||||
* No long streaks of ones or zeros.
|
||||
* There is no effort to ensure that it is a prime, the oddity is enough
|
||||
for this use.
|
||||
* The number has been tuned heuristically against compression benchmarks. */
|
||||
static const uint32_t kHashMul32 = 0x1e35a7bd;
|
||||
|
||||
static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
|
||||
const uint64_t h = (BROTLI_UNALIGNED_LOAD64(p) << 16) * kHashMul32;
|
||||
return (uint32_t)(h >> shift);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE uint32_t HashBytesAtOffset(
|
||||
uint64_t v, int offset, size_t shift) {
|
||||
assert(offset >= 0);
|
||||
assert(offset <= 2);
|
||||
{
|
||||
const uint64_t h = ((v >> (8 * offset)) << 16) * kHashMul32;
|
||||
return (uint32_t)(h >> shift);
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) {
|
||||
return TO_BROTLI_BOOL(
|
||||
BROTLI_UNALIGNED_LOAD32(p1) == BROTLI_UNALIGNED_LOAD32(p2) &&
|
||||
p1[4] == p2[4] &&
|
||||
p1[5] == p2[5]);
|
||||
}
|
||||
|
||||
/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
|
||||
"bits" based on "histogram" and stores it into the bit stream. */
|
||||
static void BuildAndStoreCommandPrefixCode(
|
||||
const uint32_t histogram[128],
|
||||
uint8_t depth[128], uint16_t bits[128],
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
/* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
|
||||
HuffmanTree tree[129];
|
||||
uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
|
||||
uint16_t cmd_bits[64];
|
||||
BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
|
||||
BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
|
||||
/* We have to jump through a few hoops here in order to compute
|
||||
the command bits because the symbols are in a different order than in
|
||||
the full alphabet. This looks complicated, but having the symbols
|
||||
in this order in the command bits saves a few branches in the Emit*
|
||||
functions. */
|
||||
memcpy(cmd_depth, depth + 24, 24);
|
||||
memcpy(cmd_depth + 24, depth, 8);
|
||||
memcpy(cmd_depth + 32, depth + 48, 8);
|
||||
memcpy(cmd_depth + 40, depth + 8, 8);
|
||||
memcpy(cmd_depth + 48, depth + 56, 8);
|
||||
memcpy(cmd_depth + 56, depth + 16, 8);
|
||||
BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
|
||||
memcpy(bits, cmd_bits + 24, 16);
|
||||
memcpy(bits + 8, cmd_bits + 40, 16);
|
||||
memcpy(bits + 16, cmd_bits + 56, 16);
|
||||
memcpy(bits + 24, cmd_bits, 48);
|
||||
memcpy(bits + 48, cmd_bits + 32, 16);
|
||||
memcpy(bits + 56, cmd_bits + 48, 16);
|
||||
BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
|
||||
{
|
||||
/* Create the bit length array for the full command alphabet. */
|
||||
size_t i;
|
||||
memset(cmd_depth, 0, 64); /* only 64 first values were used */
|
||||
memcpy(cmd_depth, depth + 24, 8);
|
||||
memcpy(cmd_depth + 64, depth + 32, 8);
|
||||
memcpy(cmd_depth + 128, depth + 40, 8);
|
||||
memcpy(cmd_depth + 192, depth + 48, 8);
|
||||
memcpy(cmd_depth + 384, depth + 56, 8);
|
||||
for (i = 0; i < 8; ++i) {
|
||||
cmd_depth[128 + 8 * i] = depth[i];
|
||||
cmd_depth[256 + 8 * i] = depth[8 + i];
|
||||
cmd_depth[448 + 8 * i] = depth[16 + i];
|
||||
}
|
||||
BrotliStoreHuffmanTree(
|
||||
cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
|
||||
}
|
||||
BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void EmitInsertLen(
|
||||
uint32_t insertlen, uint32_t** commands) {
|
||||
if (insertlen < 6) {
|
||||
**commands = insertlen;
|
||||
} else if (insertlen < 130) {
|
||||
const uint32_t tail = insertlen - 2;
|
||||
const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
|
||||
const uint32_t prefix = tail >> nbits;
|
||||
const uint32_t inscode = (nbits << 1) + prefix + 2;
|
||||
const uint32_t extra = tail - (prefix << nbits);
|
||||
**commands = inscode | (extra << 8);
|
||||
} else if (insertlen < 2114) {
|
||||
const uint32_t tail = insertlen - 66;
|
||||
const uint32_t nbits = Log2FloorNonZero(tail);
|
||||
const uint32_t code = nbits + 10;
|
||||
const uint32_t extra = tail - (1u << nbits);
|
||||
**commands = code | (extra << 8);
|
||||
} else if (insertlen < 6210) {
|
||||
const uint32_t extra = insertlen - 2114;
|
||||
**commands = 21 | (extra << 8);
|
||||
} else if (insertlen < 22594) {
|
||||
const uint32_t extra = insertlen - 6210;
|
||||
**commands = 22 | (extra << 8);
|
||||
} else {
|
||||
const uint32_t extra = insertlen - 22594;
|
||||
**commands = 23 | (extra << 8);
|
||||
}
|
||||
++(*commands);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void EmitCopyLen(size_t copylen, uint32_t** commands) {
|
||||
if (copylen < 10) {
|
||||
**commands = (uint32_t)(copylen + 38);
|
||||
} else if (copylen < 134) {
|
||||
const size_t tail = copylen - 6;
|
||||
const size_t nbits = Log2FloorNonZero(tail) - 1;
|
||||
const size_t prefix = tail >> nbits;
|
||||
const size_t code = (nbits << 1) + prefix + 44;
|
||||
const size_t extra = tail - (prefix << nbits);
|
||||
**commands = (uint32_t)(code | (extra << 8));
|
||||
} else if (copylen < 2118) {
|
||||
const size_t tail = copylen - 70;
|
||||
const size_t nbits = Log2FloorNonZero(tail);
|
||||
const size_t code = nbits + 52;
|
||||
const size_t extra = tail - ((size_t)1 << nbits);
|
||||
**commands = (uint32_t)(code | (extra << 8));
|
||||
} else {
|
||||
const size_t extra = copylen - 2118;
|
||||
**commands = (uint32_t)(63 | (extra << 8));
|
||||
}
|
||||
++(*commands);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void EmitCopyLenLastDistance(
|
||||
size_t copylen, uint32_t** commands) {
|
||||
if (copylen < 12) {
|
||||
**commands = (uint32_t)(copylen + 20);
|
||||
++(*commands);
|
||||
} else if (copylen < 72) {
|
||||
const size_t tail = copylen - 8;
|
||||
const size_t nbits = Log2FloorNonZero(tail) - 1;
|
||||
const size_t prefix = tail >> nbits;
|
||||
const size_t code = (nbits << 1) + prefix + 28;
|
||||
const size_t extra = tail - (prefix << nbits);
|
||||
**commands = (uint32_t)(code | (extra << 8));
|
||||
++(*commands);
|
||||
} else if (copylen < 136) {
|
||||
const size_t tail = copylen - 8;
|
||||
const size_t code = (tail >> 5) + 54;
|
||||
const size_t extra = tail & 31;
|
||||
**commands = (uint32_t)(code | (extra << 8));
|
||||
++(*commands);
|
||||
**commands = 64;
|
||||
++(*commands);
|
||||
} else if (copylen < 2120) {
|
||||
const size_t tail = copylen - 72;
|
||||
const size_t nbits = Log2FloorNonZero(tail);
|
||||
const size_t code = nbits + 52;
|
||||
const size_t extra = tail - ((size_t)1 << nbits);
|
||||
**commands = (uint32_t)(code | (extra << 8));
|
||||
++(*commands);
|
||||
**commands = 64;
|
||||
++(*commands);
|
||||
} else {
|
||||
const size_t extra = copylen - 2120;
|
||||
**commands = (uint32_t)(63 | (extra << 8));
|
||||
++(*commands);
|
||||
**commands = 64;
|
||||
++(*commands);
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void EmitDistance(uint32_t distance, uint32_t** commands) {
|
||||
uint32_t d = distance + 3;
|
||||
uint32_t nbits = Log2FloorNonZero(d) - 1;
|
||||
const uint32_t prefix = (d >> nbits) & 1;
|
||||
const uint32_t offset = (2 + prefix) << nbits;
|
||||
const uint32_t distcode = 2 * (nbits - 1) + prefix + 80;
|
||||
uint32_t extra = d - offset;
|
||||
**commands = distcode | (extra << 8);
|
||||
++(*commands);
|
||||
}
|
||||
|
||||
/* REQUIRES: len <= 1 << 24. */
|
||||
static void BrotliStoreMetaBlockHeader(
|
||||
size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
|
||||
uint8_t* storage) {
|
||||
size_t nibbles = 6;
|
||||
/* ISLAST */
|
||||
BrotliWriteBits(1, 0, storage_ix, storage);
|
||||
if (len <= (1U << 16)) {
|
||||
nibbles = 4;
|
||||
} else if (len <= (1U << 20)) {
|
||||
nibbles = 5;
|
||||
}
|
||||
BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
|
||||
BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
|
||||
/* ISUNCOMPRESSED */
|
||||
BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void CreateCommands(const uint8_t* input,
|
||||
size_t block_size, size_t input_size, const uint8_t* base_ip, int* table,
|
||||
size_t table_bits, uint8_t** literals, uint32_t** commands) {
|
||||
/* "ip" is the input pointer. */
|
||||
const uint8_t* ip = input;
|
||||
const size_t shift = 64u - table_bits;
|
||||
const uint8_t* ip_end = input + block_size;
|
||||
/* "next_emit" is a pointer to the first byte that is not covered by a
|
||||
previous copy. Bytes between "next_emit" and the start of the next copy or
|
||||
the end of the input will be emitted as literal bytes. */
|
||||
const uint8_t* next_emit = input;
|
||||
|
||||
int last_distance = -1;
|
||||
const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
|
||||
const size_t kMinMatchLen = 6;
|
||||
|
||||
if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
|
||||
/* For the last block, we need to keep a 16 bytes margin so that we can be
|
||||
sure that all distances are at most window size - 16.
|
||||
For all other blocks, we only need to keep a margin of 5 bytes so that
|
||||
we don't go over the block size with a copy. */
|
||||
const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
|
||||
input_size - kInputMarginBytes);
|
||||
const uint8_t* ip_limit = input + len_limit;
|
||||
|
||||
uint32_t next_hash;
|
||||
for (next_hash = Hash(++ip, shift); ; ) {
|
||||
/* Step 1: Scan forward in the input looking for a 6-byte-long match.
|
||||
If we get close to exhausting the input then goto emit_remainder.
|
||||
|
||||
Heuristic match skipping: If 32 bytes are scanned with no matches
|
||||
found, start looking only at every other byte. If 32 more bytes are
|
||||
scanned, look at every third byte, etc.. When a match is found,
|
||||
immediately go back to looking at every byte. This is a small loss
|
||||
(~5% performance, ~0.1% density) for compressible data due to more
|
||||
bookkeeping, but for non-compressible data (such as JPEG) it's a huge
|
||||
win since the compressor quickly "realizes" the data is incompressible
|
||||
and doesn't bother looking for matches everywhere.
|
||||
|
||||
The "skip" variable keeps track of how many bytes there are since the
|
||||
last match; dividing it by 32 (ie. right-shifting by five) gives the
|
||||
number of bytes to move ahead for each iteration. */
|
||||
uint32_t skip = 32;
|
||||
|
||||
const uint8_t* next_ip = ip;
|
||||
const uint8_t* candidate;
|
||||
|
||||
assert(next_emit < ip);
|
||||
trawl:
|
||||
do {
|
||||
uint32_t hash = next_hash;
|
||||
uint32_t bytes_between_hash_lookups = skip++ >> 5;
|
||||
ip = next_ip;
|
||||
assert(hash == Hash(ip, shift));
|
||||
next_ip = ip + bytes_between_hash_lookups;
|
||||
if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
next_hash = Hash(next_ip, shift);
|
||||
candidate = ip - last_distance;
|
||||
if (IsMatch(ip, candidate)) {
|
||||
if (BROTLI_PREDICT_TRUE(candidate < ip)) {
|
||||
table[hash] = (int)(ip - base_ip);
|
||||
break;
|
||||
}
|
||||
}
|
||||
candidate = base_ip + table[hash];
|
||||
assert(candidate >= base_ip);
|
||||
assert(candidate < ip);
|
||||
|
||||
table[hash] = (int)(ip - base_ip);
|
||||
} while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate)));
|
||||
|
||||
/* Check copy distance. If candidate is not feasible, continue search.
|
||||
Checking is done outside of hot loop to reduce overhead. */
|
||||
if (ip - candidate > MAX_DISTANCE) goto trawl;
|
||||
|
||||
/* Step 2: Emit the found match together with the literal bytes from
|
||||
"next_emit", and then see if we can find a next match immediately
|
||||
afterwards. Repeat until we find no match for the input
|
||||
without emitting some literal bytes. */
|
||||
|
||||
{
|
||||
/* We have a 6-byte match at ip, and we need to emit bytes in
|
||||
[next_emit, ip). */
|
||||
const uint8_t* base = ip;
|
||||
size_t matched = 6 + FindMatchLengthWithLimit(
|
||||
candidate + 6, ip + 6, (size_t)(ip_end - ip) - 6);
|
||||
int distance = (int)(base - candidate); /* > 0 */
|
||||
int insert = (int)(base - next_emit);
|
||||
ip += matched;
|
||||
assert(0 == memcmp(base, candidate, matched));
|
||||
EmitInsertLen((uint32_t)insert, commands);
|
||||
memcpy(*literals, next_emit, (size_t)insert);
|
||||
*literals += insert;
|
||||
if (distance == last_distance) {
|
||||
**commands = 64;
|
||||
++(*commands);
|
||||
} else {
|
||||
EmitDistance((uint32_t)distance, commands);
|
||||
last_distance = distance;
|
||||
}
|
||||
EmitCopyLenLastDistance(matched, commands);
|
||||
|
||||
next_emit = ip;
|
||||
if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
{
|
||||
/* We could immediately start working at ip now, but to improve
|
||||
compression we first update "table" with the hashes of some
|
||||
positions within the last copy. */
|
||||
uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
uint32_t cur_hash;
|
||||
table[prev_hash] = (int)(ip - base_ip - 5);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 4);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 3);
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
|
||||
cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 1);
|
||||
|
||||
candidate = base_ip + table[cur_hash];
|
||||
table[cur_hash] = (int)(ip - base_ip);
|
||||
}
|
||||
}
|
||||
|
||||
while (ip - candidate <= MAX_DISTANCE && IsMatch(ip, candidate)) {
|
||||
/* We have a 6-byte match at ip, and no need to emit any
|
||||
literal bytes prior to ip. */
|
||||
const uint8_t* base = ip;
|
||||
size_t matched = 6 + FindMatchLengthWithLimit(
|
||||
candidate + 6, ip + 6, (size_t)(ip_end - ip) - 6);
|
||||
ip += matched;
|
||||
last_distance = (int)(base - candidate); /* > 0 */
|
||||
assert(0 == memcmp(base, candidate, matched));
|
||||
EmitCopyLen(matched, commands);
|
||||
EmitDistance((uint32_t)last_distance, commands);
|
||||
|
||||
next_emit = ip;
|
||||
if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
|
||||
goto emit_remainder;
|
||||
}
|
||||
{
|
||||
/* We could immediately start working at ip now, but to improve
|
||||
compression we first update "table" with the hashes of some
|
||||
positions within the last copy. */
|
||||
uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
|
||||
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
uint32_t cur_hash;
|
||||
table[prev_hash] = (int)(ip - base_ip - 5);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 4);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 3);
|
||||
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
|
||||
cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 2);
|
||||
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
|
||||
table[prev_hash] = (int)(ip - base_ip - 1);
|
||||
|
||||
candidate = base_ip + table[cur_hash];
|
||||
table[cur_hash] = (int)(ip - base_ip);
|
||||
}
|
||||
}
|
||||
|
||||
next_hash = Hash(++ip, shift);
|
||||
}
|
||||
}
|
||||
|
||||
emit_remainder:
|
||||
assert(next_emit <= ip_end);
|
||||
/* Emit the remaining bytes as literals. */
|
||||
if (next_emit < ip_end) {
|
||||
const uint32_t insert = (uint32_t)(ip_end - next_emit);
|
||||
EmitInsertLen(insert, commands);
|
||||
memcpy(*literals, next_emit, insert);
|
||||
*literals += insert;
|
||||
}
|
||||
}
|
||||
|
||||
static void StoreCommands(MemoryManager* m,
|
||||
const uint8_t* literals, const size_t num_literals,
|
||||
const uint32_t* commands, const size_t num_commands,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
static const uint32_t kNumExtraBits[128] = {
|
||||
0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
|
||||
9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
|
||||
17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24,
|
||||
};
|
||||
static const uint32_t kInsertOffset[24] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66, 98, 130, 194, 322, 578,
|
||||
1090, 2114, 6210, 22594,
|
||||
};
|
||||
|
||||
uint8_t lit_depths[256];
|
||||
uint16_t lit_bits[256];
|
||||
uint32_t lit_histo[256] = { 0 };
|
||||
uint8_t cmd_depths[128] = { 0 };
|
||||
uint16_t cmd_bits[128] = { 0 };
|
||||
uint32_t cmd_histo[128] = { 0 };
|
||||
size_t i;
|
||||
for (i = 0; i < num_literals; ++i) {
|
||||
++lit_histo[literals[i]];
|
||||
}
|
||||
BrotliBuildAndStoreHuffmanTreeFast(m, lit_histo, num_literals,
|
||||
/* max_bits = */ 8,
|
||||
lit_depths, lit_bits,
|
||||
storage_ix, storage);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
|
||||
for (i = 0; i < num_commands; ++i) {
|
||||
const uint32_t code = commands[i] & 0xFF;
|
||||
assert(code < 128);
|
||||
++cmd_histo[code];
|
||||
}
|
||||
cmd_histo[1] += 1;
|
||||
cmd_histo[2] += 1;
|
||||
cmd_histo[64] += 1;
|
||||
cmd_histo[84] += 1;
|
||||
BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depths, cmd_bits,
|
||||
storage_ix, storage);
|
||||
|
||||
for (i = 0; i < num_commands; ++i) {
|
||||
const uint32_t cmd = commands[i];
|
||||
const uint32_t code = cmd & 0xFF;
|
||||
const uint32_t extra = cmd >> 8;
|
||||
assert(code < 128);
|
||||
BrotliWriteBits(cmd_depths[code], cmd_bits[code], storage_ix, storage);
|
||||
BrotliWriteBits(kNumExtraBits[code], extra, storage_ix, storage);
|
||||
if (code < 24) {
|
||||
const uint32_t insert = kInsertOffset[code] + extra;
|
||||
uint32_t j;
|
||||
for (j = 0; j < insert; ++j) {
|
||||
const uint8_t lit = *literals;
|
||||
BrotliWriteBits(lit_depths[lit], lit_bits[lit], storage_ix, storage);
|
||||
++literals;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Acceptable loss for uncompressible speedup is 2% */
|
||||
#define MIN_RATIO 0.98
|
||||
#define SAMPLE_RATE 43
|
||||
|
||||
static BROTLI_BOOL ShouldCompress(
|
||||
const uint8_t* input, size_t input_size, size_t num_literals) {
|
||||
double corpus_size = (double)input_size;
|
||||
if (num_literals < MIN_RATIO * corpus_size) {
|
||||
return BROTLI_TRUE;
|
||||
} else {
|
||||
uint32_t literal_histo[256] = { 0 };
|
||||
const double max_total_bit_cost = corpus_size * 8 * MIN_RATIO / SAMPLE_RATE;
|
||||
size_t i;
|
||||
for (i = 0; i < input_size; i += SAMPLE_RATE) {
|
||||
++literal_histo[input[i]];
|
||||
}
|
||||
return TO_BROTLI_BOOL(BitsEntropy(literal_histo, 256) < max_total_bit_cost);
|
||||
}
|
||||
}
|
||||
|
||||
static void RewindBitPosition(const size_t new_storage_ix,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
const size_t bitpos = new_storage_ix & 7;
|
||||
const size_t mask = (1u << bitpos) - 1;
|
||||
storage[new_storage_ix >> 3] &= (uint8_t)mask;
|
||||
*storage_ix = new_storage_ix;
|
||||
}
|
||||
|
||||
static void EmitUncompressedMetaBlock(const uint8_t* input, size_t input_size,
|
||||
size_t* storage_ix, uint8_t* storage) {
|
||||
BrotliStoreMetaBlockHeader(input_size, 1, storage_ix, storage);
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
memcpy(&storage[*storage_ix >> 3], input, input_size);
|
||||
*storage_ix += input_size << 3;
|
||||
storage[*storage_ix >> 3] = 0;
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void BrotliCompressFragmentTwoPassImpl(
|
||||
MemoryManager* m, const uint8_t* input, size_t input_size,
|
||||
BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
|
||||
int* table, size_t table_bits, size_t* storage_ix, uint8_t* storage) {
|
||||
/* Save the start of the first block for position and distance computations.
|
||||
*/
|
||||
const uint8_t* base_ip = input;
|
||||
BROTLI_UNUSED(is_last);
|
||||
|
||||
while (input_size > 0) {
|
||||
size_t block_size =
|
||||
BROTLI_MIN(size_t, input_size, kCompressFragmentTwoPassBlockSize);
|
||||
uint32_t* commands = command_buf;
|
||||
uint8_t* literals = literal_buf;
|
||||
size_t num_literals;
|
||||
CreateCommands(input, block_size, input_size, base_ip, table, table_bits,
|
||||
&literals, &commands);
|
||||
num_literals = (size_t)(literals - literal_buf);
|
||||
if (ShouldCompress(input, block_size, num_literals)) {
|
||||
const size_t num_commands = (size_t)(commands - command_buf);
|
||||
BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
|
||||
/* No block splits, no contexts. */
|
||||
BrotliWriteBits(13, 0, storage_ix, storage);
|
||||
StoreCommands(m, literal_buf, num_literals, command_buf, num_commands,
|
||||
storage_ix, storage);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
} else {
|
||||
/* Since we did not find many backward references and the entropy of
|
||||
the data is close to 8 bits, we can simply emit an uncompressed block.
|
||||
This makes compression speed of uncompressible data about 3x faster. */
|
||||
EmitUncompressedMetaBlock(input, block_size, storage_ix, storage);
|
||||
}
|
||||
input += block_size;
|
||||
input_size -= block_size;
|
||||
}
|
||||
}
|
||||
|
||||
#define FOR_TABLE_BITS_(X) \
|
||||
X(8) X(9) X(10) X(11) X(12) X(13) X(14) X(15) X(16) X(17)
|
||||
|
||||
#define BAKE_METHOD_PARAM_(B) \
|
||||
static BROTLI_NOINLINE void BrotliCompressFragmentTwoPassImpl ## B( \
|
||||
MemoryManager* m, const uint8_t* input, size_t input_size, \
|
||||
BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf, \
|
||||
int* table, size_t* storage_ix, uint8_t* storage) { \
|
||||
BrotliCompressFragmentTwoPassImpl(m, input, input_size, is_last, command_buf,\
|
||||
literal_buf, table, B, storage_ix, storage); \
|
||||
}
|
||||
FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
|
||||
#undef BAKE_METHOD_PARAM_
|
||||
|
||||
void BrotliCompressFragmentTwoPass(
|
||||
MemoryManager* m, const uint8_t* input, size_t input_size,
|
||||
BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
|
||||
int* table, size_t table_size, size_t* storage_ix, uint8_t* storage) {
|
||||
const size_t initial_storage_ix = *storage_ix;
|
||||
const size_t table_bits = Log2FloorNonZero(table_size);
|
||||
switch (table_bits) {
|
||||
#define CASE_(B) \
|
||||
case B: \
|
||||
BrotliCompressFragmentTwoPassImpl ## B( \
|
||||
m, input, input_size, is_last, command_buf, \
|
||||
literal_buf, table, storage_ix, storage); \
|
||||
break;
|
||||
FOR_TABLE_BITS_(CASE_)
|
||||
#undef CASE_
|
||||
default: assert(0); break;
|
||||
}
|
||||
|
||||
/* If output is larger than single uncompressed block, rewrite it. */
|
||||
if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
|
||||
RewindBitPosition(initial_storage_ix, storage_ix, storage);
|
||||
EmitUncompressedMetaBlock(input, input_size, storage_ix, storage);
|
||||
}
|
||||
|
||||
if (is_last) {
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
|
||||
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
|
||||
*storage_ix = (*storage_ix + 7u) & ~7u;
|
||||
}
|
||||
}
|
||||
|
||||
#undef FOR_TABLE_BITS_
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
@@ -13,7 +13,7 @@
|
||||
#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
|
||||
#define BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
#include "../types.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
|
||||
|
||||
@@ -9,8 +9,8 @@
|
||||
#ifndef BROTLI_ENC_CONTEXT_H_
|
||||
#define BROTLI_ENC_CONTEXT_H_
|
||||
|
||||
#include <brotli/port.h>
|
||||
#include <brotli/types.h>
|
||||
#include "../port.h"
|
||||
#include "../types.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -9,7 +9,7 @@
|
||||
#ifndef BROTLI_ENC_DICTIONARY_HASH_H_
|
||||
#define BROTLI_ENC_DICTIONARY_HASH_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
#include "../types.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,501 +0,0 @@
|
||||
/* Copyright 2010 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Entropy encoding (Huffman) utilities. */
|
||||
|
||||
#include "./entropy_encode.h"
|
||||
|
||||
#include <string.h> /* memset */
|
||||
|
||||
#include "../common/constants.h"
|
||||
#include <brotli/types.h>
|
||||
#include "./port.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
BROTLI_BOOL BrotliSetDepth(
|
||||
int p0, HuffmanTree* pool, uint8_t* depth, int max_depth) {
|
||||
int stack[16];
|
||||
int level = 0;
|
||||
int p = p0;
|
||||
assert(max_depth <= 15);
|
||||
stack[0] = -1;
|
||||
while (BROTLI_TRUE) {
|
||||
if (pool[p].index_left_ >= 0) {
|
||||
level++;
|
||||
if (level > max_depth) return BROTLI_FALSE;
|
||||
stack[level] = pool[p].index_right_or_value_;
|
||||
p = pool[p].index_left_;
|
||||
continue;
|
||||
} else {
|
||||
depth[pool[p].index_right_or_value_] = (uint8_t)level;
|
||||
}
|
||||
while (level >= 0 && stack[level] == -1) level--;
|
||||
if (level < 0) return BROTLI_TRUE;
|
||||
p = stack[level];
|
||||
stack[level] = -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Sort the root nodes, least popular first. */
|
||||
static BROTLI_INLINE BROTLI_BOOL SortHuffmanTree(
|
||||
const HuffmanTree* v0, const HuffmanTree* v1) {
|
||||
if (v0->total_count_ != v1->total_count_) {
|
||||
return TO_BROTLI_BOOL(v0->total_count_ < v1->total_count_);
|
||||
}
|
||||
return TO_BROTLI_BOOL(v0->index_right_or_value_ > v1->index_right_or_value_);
|
||||
}
|
||||
|
||||
/* This function will create a Huffman tree.
|
||||
|
||||
The catch here is that the tree cannot be arbitrarily deep.
|
||||
Brotli specifies a maximum depth of 15 bits for "code trees"
|
||||
and 7 bits for "code length code trees."
|
||||
|
||||
count_limit is the value that is to be faked as the minimum value
|
||||
and this minimum value is raised until the tree matches the
|
||||
maximum length requirement.
|
||||
|
||||
This algorithm is not of excellent performance for very long data blocks,
|
||||
especially when population counts are longer than 2**tree_limit, but
|
||||
we are not planning to use this with extremely long blocks.
|
||||
|
||||
See http://en.wikipedia.org/wiki/Huffman_coding */
|
||||
void BrotliCreateHuffmanTree(const uint32_t *data,
|
||||
const size_t length,
|
||||
const int tree_limit,
|
||||
HuffmanTree* tree,
|
||||
uint8_t *depth) {
|
||||
uint32_t count_limit;
|
||||
HuffmanTree sentinel;
|
||||
InitHuffmanTree(&sentinel, BROTLI_UINT32_MAX, -1, -1);
|
||||
/* For block sizes below 64 kB, we never need to do a second iteration
|
||||
of this loop. Probably all of our block sizes will be smaller than
|
||||
that, so this loop is mostly of academic interest. If we actually
|
||||
would need this, we would be better off with the Katajainen algorithm. */
|
||||
for (count_limit = 1; ; count_limit *= 2) {
|
||||
size_t n = 0;
|
||||
size_t i;
|
||||
size_t j;
|
||||
size_t k;
|
||||
for (i = length; i != 0;) {
|
||||
--i;
|
||||
if (data[i]) {
|
||||
const uint32_t count = BROTLI_MAX(uint32_t, data[i], count_limit);
|
||||
InitHuffmanTree(&tree[n++], count, -1, (int16_t)i);
|
||||
}
|
||||
}
|
||||
|
||||
if (n == 1) {
|
||||
depth[tree[0].index_right_or_value_] = 1; /* Only one element. */
|
||||
break;
|
||||
}
|
||||
|
||||
SortHuffmanTreeItems(tree, n, SortHuffmanTree);
|
||||
|
||||
/* The nodes are:
|
||||
[0, n): the sorted leaf nodes that we start with.
|
||||
[n]: we add a sentinel here.
|
||||
[n + 1, 2n): new parent nodes are added here, starting from
|
||||
(n+1). These are naturally in ascending order.
|
||||
[2n]: we add a sentinel at the end as well.
|
||||
There will be (2n+1) elements at the end. */
|
||||
tree[n] = sentinel;
|
||||
tree[n + 1] = sentinel;
|
||||
|
||||
i = 0; /* Points to the next leaf node. */
|
||||
j = n + 1; /* Points to the next non-leaf node. */
|
||||
for (k = n - 1; k != 0; --k) {
|
||||
size_t left, right;
|
||||
if (tree[i].total_count_ <= tree[j].total_count_) {
|
||||
left = i;
|
||||
++i;
|
||||
} else {
|
||||
left = j;
|
||||
++j;
|
||||
}
|
||||
if (tree[i].total_count_ <= tree[j].total_count_) {
|
||||
right = i;
|
||||
++i;
|
||||
} else {
|
||||
right = j;
|
||||
++j;
|
||||
}
|
||||
|
||||
{
|
||||
/* The sentinel node becomes the parent node. */
|
||||
size_t j_end = 2 * n - k;
|
||||
tree[j_end].total_count_ =
|
||||
tree[left].total_count_ + tree[right].total_count_;
|
||||
tree[j_end].index_left_ = (int16_t)left;
|
||||
tree[j_end].index_right_or_value_ = (int16_t)right;
|
||||
|
||||
/* Add back the last sentinel node. */
|
||||
tree[j_end + 1] = sentinel;
|
||||
}
|
||||
}
|
||||
if (BrotliSetDepth((int)(2 * n - 1), &tree[0], depth, tree_limit)) {
|
||||
/* We need to pack the Huffman tree in tree_limit bits. If this was not
|
||||
successful, add fake entities to the lowest values and retry. */
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void Reverse(uint8_t* v, size_t start, size_t end) {
|
||||
--end;
|
||||
while (start < end) {
|
||||
uint8_t tmp = v[start];
|
||||
v[start] = v[end];
|
||||
v[end] = tmp;
|
||||
++start;
|
||||
--end;
|
||||
}
|
||||
}
|
||||
|
||||
static void BrotliWriteHuffmanTreeRepetitions(
|
||||
const uint8_t previous_value,
|
||||
const uint8_t value,
|
||||
size_t repetitions,
|
||||
size_t* tree_size,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits_data) {
|
||||
assert(repetitions > 0);
|
||||
if (previous_value != value) {
|
||||
tree[*tree_size] = value;
|
||||
extra_bits_data[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
--repetitions;
|
||||
}
|
||||
if (repetitions == 7) {
|
||||
tree[*tree_size] = value;
|
||||
extra_bits_data[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
--repetitions;
|
||||
}
|
||||
if (repetitions < 3) {
|
||||
size_t i;
|
||||
for (i = 0; i < repetitions; ++i) {
|
||||
tree[*tree_size] = value;
|
||||
extra_bits_data[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
}
|
||||
} else {
|
||||
size_t start = *tree_size;
|
||||
repetitions -= 3;
|
||||
while (BROTLI_TRUE) {
|
||||
tree[*tree_size] = BROTLI_REPEAT_PREVIOUS_CODE_LENGTH;
|
||||
extra_bits_data[*tree_size] = repetitions & 0x3;
|
||||
++(*tree_size);
|
||||
repetitions >>= 2;
|
||||
if (repetitions == 0) {
|
||||
break;
|
||||
}
|
||||
--repetitions;
|
||||
}
|
||||
Reverse(tree, start, *tree_size);
|
||||
Reverse(extra_bits_data, start, *tree_size);
|
||||
}
|
||||
}
|
||||
|
||||
static void BrotliWriteHuffmanTreeRepetitionsZeros(
|
||||
size_t repetitions,
|
||||
size_t* tree_size,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits_data) {
|
||||
if (repetitions == 11) {
|
||||
tree[*tree_size] = 0;
|
||||
extra_bits_data[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
--repetitions;
|
||||
}
|
||||
if (repetitions < 3) {
|
||||
size_t i;
|
||||
for (i = 0; i < repetitions; ++i) {
|
||||
tree[*tree_size] = 0;
|
||||
extra_bits_data[*tree_size] = 0;
|
||||
++(*tree_size);
|
||||
}
|
||||
} else {
|
||||
size_t start = *tree_size;
|
||||
repetitions -= 3;
|
||||
while (BROTLI_TRUE) {
|
||||
tree[*tree_size] = BROTLI_REPEAT_ZERO_CODE_LENGTH;
|
||||
extra_bits_data[*tree_size] = repetitions & 0x7;
|
||||
++(*tree_size);
|
||||
repetitions >>= 3;
|
||||
if (repetitions == 0) {
|
||||
break;
|
||||
}
|
||||
--repetitions;
|
||||
}
|
||||
Reverse(tree, start, *tree_size);
|
||||
Reverse(extra_bits_data, start, *tree_size);
|
||||
}
|
||||
}
|
||||
|
||||
void BrotliOptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
|
||||
uint8_t* good_for_rle) {
|
||||
size_t nonzero_count = 0;
|
||||
size_t stride;
|
||||
size_t limit;
|
||||
size_t sum;
|
||||
const size_t streak_limit = 1240;
|
||||
/* Let's make the Huffman code more compatible with RLE encoding. */
|
||||
size_t i;
|
||||
for (i = 0; i < length; i++) {
|
||||
if (counts[i]) {
|
||||
++nonzero_count;
|
||||
}
|
||||
}
|
||||
if (nonzero_count < 16) {
|
||||
return;
|
||||
}
|
||||
while (length != 0 && counts[length - 1] == 0) {
|
||||
--length;
|
||||
}
|
||||
if (length == 0) {
|
||||
return; /* All zeros. */
|
||||
}
|
||||
/* Now counts[0..length - 1] does not have trailing zeros. */
|
||||
{
|
||||
size_t nonzeros = 0;
|
||||
uint32_t smallest_nonzero = 1 << 30;
|
||||
for (i = 0; i < length; ++i) {
|
||||
if (counts[i] != 0) {
|
||||
++nonzeros;
|
||||
if (smallest_nonzero > counts[i]) {
|
||||
smallest_nonzero = counts[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
if (nonzeros < 5) {
|
||||
/* Small histogram will model it well. */
|
||||
return;
|
||||
}
|
||||
if (smallest_nonzero < 4) {
|
||||
size_t zeros = length - nonzeros;
|
||||
if (zeros < 6) {
|
||||
for (i = 1; i < length - 1; ++i) {
|
||||
if (counts[i - 1] != 0 && counts[i] == 0 && counts[i + 1] != 0) {
|
||||
counts[i] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (nonzeros < 28) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* 2) Let's mark all population counts that already can be encoded
|
||||
with an RLE code. */
|
||||
memset(good_for_rle, 0, length);
|
||||
{
|
||||
/* Let's not spoil any of the existing good RLE codes.
|
||||
Mark any seq of 0's that is longer as 5 as a good_for_rle.
|
||||
Mark any seq of non-0's that is longer as 7 as a good_for_rle. */
|
||||
uint32_t symbol = counts[0];
|
||||
size_t step = 0;
|
||||
for (i = 0; i <= length; ++i) {
|
||||
if (i == length || counts[i] != symbol) {
|
||||
if ((symbol == 0 && step >= 5) ||
|
||||
(symbol != 0 && step >= 7)) {
|
||||
size_t k;
|
||||
for (k = 0; k < step; ++k) {
|
||||
good_for_rle[i - k - 1] = 1;
|
||||
}
|
||||
}
|
||||
step = 1;
|
||||
if (i != length) {
|
||||
symbol = counts[i];
|
||||
}
|
||||
} else {
|
||||
++step;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* 3) Let's replace those population counts that lead to more RLE codes.
|
||||
Math here is in 24.8 fixed point representation. */
|
||||
stride = 0;
|
||||
limit = 256 * (counts[0] + counts[1] + counts[2]) / 3 + 420;
|
||||
sum = 0;
|
||||
for (i = 0; i <= length; ++i) {
|
||||
if (i == length || good_for_rle[i] ||
|
||||
(i != 0 && good_for_rle[i - 1]) ||
|
||||
(256 * counts[i] - limit + streak_limit) >= 2 * streak_limit) {
|
||||
if (stride >= 4 || (stride >= 3 && sum == 0)) {
|
||||
size_t k;
|
||||
/* The stride must end, collapse what we have, if we have enough (4). */
|
||||
size_t count = (sum + stride / 2) / stride;
|
||||
if (count == 0) {
|
||||
count = 1;
|
||||
}
|
||||
if (sum == 0) {
|
||||
/* Don't make an all zeros stride to be upgraded to ones. */
|
||||
count = 0;
|
||||
}
|
||||
for (k = 0; k < stride; ++k) {
|
||||
/* We don't want to change value at counts[i],
|
||||
that is already belonging to the next stride. Thus - 1. */
|
||||
counts[i - k - 1] = (uint32_t)count;
|
||||
}
|
||||
}
|
||||
stride = 0;
|
||||
sum = 0;
|
||||
if (i < length - 2) {
|
||||
/* All interesting strides have a count of at least 4, */
|
||||
/* at least when non-zeros. */
|
||||
limit = 256 * (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 420;
|
||||
} else if (i < length) {
|
||||
limit = 256 * counts[i];
|
||||
} else {
|
||||
limit = 0;
|
||||
}
|
||||
}
|
||||
++stride;
|
||||
if (i != length) {
|
||||
sum += counts[i];
|
||||
if (stride >= 4) {
|
||||
limit = (256 * sum + stride / 2) / stride;
|
||||
}
|
||||
if (stride == 4) {
|
||||
limit += 120;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void DecideOverRleUse(const uint8_t* depth, const size_t length,
|
||||
BROTLI_BOOL *use_rle_for_non_zero,
|
||||
BROTLI_BOOL *use_rle_for_zero) {
|
||||
size_t total_reps_zero = 0;
|
||||
size_t total_reps_non_zero = 0;
|
||||
size_t count_reps_zero = 1;
|
||||
size_t count_reps_non_zero = 1;
|
||||
size_t i;
|
||||
for (i = 0; i < length;) {
|
||||
const uint8_t value = depth[i];
|
||||
size_t reps = 1;
|
||||
size_t k;
|
||||
for (k = i + 1; k < length && depth[k] == value; ++k) {
|
||||
++reps;
|
||||
}
|
||||
if (reps >= 3 && value == 0) {
|
||||
total_reps_zero += reps;
|
||||
++count_reps_zero;
|
||||
}
|
||||
if (reps >= 4 && value != 0) {
|
||||
total_reps_non_zero += reps;
|
||||
++count_reps_non_zero;
|
||||
}
|
||||
i += reps;
|
||||
}
|
||||
*use_rle_for_non_zero =
|
||||
TO_BROTLI_BOOL(total_reps_non_zero > count_reps_non_zero * 2);
|
||||
*use_rle_for_zero = TO_BROTLI_BOOL(total_reps_zero > count_reps_zero * 2);
|
||||
}
|
||||
|
||||
void BrotliWriteHuffmanTree(const uint8_t* depth,
|
||||
size_t length,
|
||||
size_t* tree_size,
|
||||
uint8_t* tree,
|
||||
uint8_t* extra_bits_data) {
|
||||
uint8_t previous_value = BROTLI_INITIAL_REPEATED_CODE_LENGTH;
|
||||
size_t i;
|
||||
BROTLI_BOOL use_rle_for_non_zero = BROTLI_FALSE;
|
||||
BROTLI_BOOL use_rle_for_zero = BROTLI_FALSE;
|
||||
|
||||
/* Throw away trailing zeros. */
|
||||
size_t new_length = length;
|
||||
for (i = 0; i < length; ++i) {
|
||||
if (depth[length - i - 1] == 0) {
|
||||
--new_length;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* First gather statistics on if it is a good idea to do RLE. */
|
||||
if (length > 50) {
|
||||
/* Find RLE coding for longer codes.
|
||||
Shorter codes seem not to benefit from RLE. */
|
||||
DecideOverRleUse(depth, new_length,
|
||||
&use_rle_for_non_zero, &use_rle_for_zero);
|
||||
}
|
||||
|
||||
/* Actual RLE coding. */
|
||||
for (i = 0; i < new_length;) {
|
||||
const uint8_t value = depth[i];
|
||||
size_t reps = 1;
|
||||
if ((value != 0 && use_rle_for_non_zero) ||
|
||||
(value == 0 && use_rle_for_zero)) {
|
||||
size_t k;
|
||||
for (k = i + 1; k < new_length && depth[k] == value; ++k) {
|
||||
++reps;
|
||||
}
|
||||
}
|
||||
if (value == 0) {
|
||||
BrotliWriteHuffmanTreeRepetitionsZeros(
|
||||
reps, tree_size, tree, extra_bits_data);
|
||||
} else {
|
||||
BrotliWriteHuffmanTreeRepetitions(previous_value,
|
||||
value, reps, tree_size,
|
||||
tree, extra_bits_data);
|
||||
previous_value = value;
|
||||
}
|
||||
i += reps;
|
||||
}
|
||||
}
|
||||
|
||||
static uint16_t BrotliReverseBits(size_t num_bits, uint16_t bits) {
|
||||
static const size_t kLut[16] = { /* Pre-reversed 4-bit values. */
|
||||
0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
|
||||
0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
|
||||
};
|
||||
size_t retval = kLut[bits & 0xf];
|
||||
size_t i;
|
||||
for (i = 4; i < num_bits; i += 4) {
|
||||
retval <<= 4;
|
||||
bits = (uint16_t)(bits >> 4);
|
||||
retval |= kLut[bits & 0xf];
|
||||
}
|
||||
retval >>= ((0 - num_bits) & 0x3);
|
||||
return (uint16_t)retval;
|
||||
}
|
||||
|
||||
/* 0..15 are values for bits */
|
||||
#define MAX_HUFFMAN_BITS 16
|
||||
|
||||
void BrotliConvertBitDepthsToSymbols(const uint8_t *depth,
|
||||
size_t len,
|
||||
uint16_t *bits) {
|
||||
/* In Brotli, all bit depths are [1..15]
|
||||
0 bit depth means that the symbol does not exist. */
|
||||
uint16_t bl_count[MAX_HUFFMAN_BITS] = { 0 };
|
||||
uint16_t next_code[MAX_HUFFMAN_BITS];
|
||||
size_t i;
|
||||
int code = 0;
|
||||
for (i = 0; i < len; ++i) {
|
||||
++bl_count[depth[i]];
|
||||
}
|
||||
bl_count[0] = 0;
|
||||
next_code[0] = 0;
|
||||
for (i = 1; i < MAX_HUFFMAN_BITS; ++i) {
|
||||
code = (code + bl_count[i - 1]) << 1;
|
||||
next_code[i] = (uint16_t)code;
|
||||
}
|
||||
for (i = 0; i < len; ++i) {
|
||||
if (depth[i]) {
|
||||
bits[i] = BrotliReverseBits(depth[i], next_code[depth[i]]++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
@@ -9,7 +9,7 @@
|
||||
#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
|
||||
#define BROTLI_ENC_ENTROPY_ENCODE_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
#include "../types.h"
|
||||
#include "./port.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
|
||||
@@ -10,8 +10,8 @@
|
||||
#define BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
|
||||
|
||||
#include "../common/constants.h"
|
||||
#include <brotli/port.h>
|
||||
#include <brotli/types.h>
|
||||
#include "../port.h"
|
||||
#include "../types.h"
|
||||
#include "./write_bits.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
|
||||
@@ -11,8 +11,8 @@
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <brotli/types.h>
|
||||
#include <brotli/port.h>
|
||||
#include "../types.h"
|
||||
#include "../port.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
|
||||
#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
#include "../types.h"
|
||||
#include "./port.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
#include "../common/constants.h"
|
||||
#include "../common/dictionary.h"
|
||||
#include <brotli/types.h>
|
||||
#include "../types.h"
|
||||
#include "./fast_log.h"
|
||||
#include "./find_match_length.h"
|
||||
#include "./memory.h"
|
||||
|
||||
@@ -1,97 +0,0 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Build per-context histograms of literals, commands and distance codes. */
|
||||
|
||||
#include "./histogram.h"
|
||||
|
||||
#include "./block_splitter.h"
|
||||
#include "./command.h"
|
||||
#include "./context.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct BlockSplitIterator {
|
||||
const BlockSplit* split_; /* Not owned. */
|
||||
size_t idx_;
|
||||
size_t type_;
|
||||
size_t length_;
|
||||
} BlockSplitIterator;
|
||||
|
||||
static void InitBlockSplitIterator(BlockSplitIterator* self,
|
||||
const BlockSplit* split) {
|
||||
self->split_ = split;
|
||||
self->idx_ = 0;
|
||||
self->type_ = 0;
|
||||
self->length_ = split->lengths ? split->lengths[0] : 0;
|
||||
}
|
||||
|
||||
static void BlockSplitIteratorNext(BlockSplitIterator* self) {
|
||||
if (self->length_ == 0) {
|
||||
++self->idx_;
|
||||
self->type_ = self->split_->types[self->idx_];
|
||||
self->length_ = self->split_->lengths[self->idx_];
|
||||
}
|
||||
--self->length_;
|
||||
}
|
||||
|
||||
void BrotliBuildHistogramsWithContext(
|
||||
const Command* cmds, const size_t num_commands,
|
||||
const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
|
||||
const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t start_pos,
|
||||
size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
|
||||
const ContextType* context_modes, HistogramLiteral* literal_histograms,
|
||||
HistogramCommand* insert_and_copy_histograms,
|
||||
HistogramDistance* copy_dist_histograms) {
|
||||
size_t pos = start_pos;
|
||||
BlockSplitIterator literal_it;
|
||||
BlockSplitIterator insert_and_copy_it;
|
||||
BlockSplitIterator dist_it;
|
||||
size_t i;
|
||||
|
||||
InitBlockSplitIterator(&literal_it, literal_split);
|
||||
InitBlockSplitIterator(&insert_and_copy_it, insert_and_copy_split);
|
||||
InitBlockSplitIterator(&dist_it, dist_split);
|
||||
for (i = 0; i < num_commands; ++i) {
|
||||
const Command* cmd = &cmds[i];
|
||||
size_t j;
|
||||
BlockSplitIteratorNext(&insert_and_copy_it);
|
||||
HistogramAddCommand(&insert_and_copy_histograms[insert_and_copy_it.type_],
|
||||
cmd->cmd_prefix_);
|
||||
for (j = cmd->insert_len_; j != 0; --j) {
|
||||
size_t context;
|
||||
BlockSplitIteratorNext(&literal_it);
|
||||
context = context_modes ?
|
||||
((literal_it.type_ << BROTLI_LITERAL_CONTEXT_BITS) +
|
||||
Context(prev_byte, prev_byte2, context_modes[literal_it.type_])) :
|
||||
literal_it.type_;
|
||||
HistogramAddLiteral(&literal_histograms[context],
|
||||
ringbuffer[pos & mask]);
|
||||
prev_byte2 = prev_byte;
|
||||
prev_byte = ringbuffer[pos & mask];
|
||||
++pos;
|
||||
}
|
||||
pos += CommandCopyLen(cmd);
|
||||
if (CommandCopyLen(cmd)) {
|
||||
prev_byte2 = ringbuffer[(pos - 2) & mask];
|
||||
prev_byte = ringbuffer[(pos - 1) & mask];
|
||||
if (cmd->cmd_prefix_ >= 128) {
|
||||
size_t context;
|
||||
BlockSplitIteratorNext(&dist_it);
|
||||
context = (dist_it.type_ << BROTLI_DISTANCE_CONTEXT_BITS) +
|
||||
CommandDistanceContext(cmd);
|
||||
HistogramAddDistance(©_dist_histograms[context],
|
||||
cmd->dist_prefix_);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
@@ -12,7 +12,7 @@
|
||||
#include <string.h> /* memset */
|
||||
|
||||
#include "../common/constants.h"
|
||||
#include <brotli/types.h>
|
||||
#include "../types.h"
|
||||
#include "./block_splitter.h"
|
||||
#include "./command.h"
|
||||
#include "./context.h"
|
||||
|
||||
@@ -1,175 +0,0 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Literal cost model to allow backward reference replacement to be efficient.
|
||||
*/
|
||||
|
||||
#include "./literal_cost.h"
|
||||
|
||||
#include <brotli/types.h>
|
||||
#include "./fast_log.h"
|
||||
#include "./port.h"
|
||||
#include "./utf8_util.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
|
||||
if (c < 128) {
|
||||
return 0; /* Next one is the 'Byte 1' again. */
|
||||
} else if (c >= 192) { /* Next one is the 'Byte 2' of utf-8 encoding. */
|
||||
return BROTLI_MIN(size_t, 1, clamp);
|
||||
} else {
|
||||
/* Let's decide over the last byte if this ends the sequence. */
|
||||
if (last < 0xe0) {
|
||||
return 0; /* Completed two or three byte coding. */
|
||||
} else { /* Next one is the 'Byte 3' of utf-8 encoding. */
|
||||
return BROTLI_MIN(size_t, 2, clamp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
|
||||
const uint8_t *data) {
|
||||
size_t counts[3] = { 0 };
|
||||
size_t max_utf8 = 1; /* should be 2, but 1 compresses better. */
|
||||
size_t last_c = 0;
|
||||
size_t i;
|
||||
for (i = 0; i < len; ++i) {
|
||||
size_t c = data[(pos + i) & mask];
|
||||
++counts[UTF8Position(last_c, c, 2)];
|
||||
last_c = c;
|
||||
}
|
||||
if (counts[2] < 500) {
|
||||
max_utf8 = 1;
|
||||
}
|
||||
if (counts[1] + counts[2] < 25) {
|
||||
max_utf8 = 0;
|
||||
}
|
||||
return max_utf8;
|
||||
}
|
||||
|
||||
static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
|
||||
const uint8_t *data, float *cost) {
|
||||
/* max_utf8 is 0 (normal ASCII single byte modeling),
|
||||
1 (for 2-byte UTF-8 modeling), or 2 (for 3-byte UTF-8 modeling). */
|
||||
const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
|
||||
size_t histogram[3][256] = { { 0 } };
|
||||
size_t window_half = 495;
|
||||
size_t in_window = BROTLI_MIN(size_t, window_half, len);
|
||||
size_t in_window_utf8[3] = { 0 };
|
||||
|
||||
size_t i;
|
||||
{ /* Bootstrap histograms. */
|
||||
size_t last_c = 0;
|
||||
size_t utf8_pos = 0;
|
||||
for (i = 0; i < in_window; ++i) {
|
||||
size_t c = data[(pos + i) & mask];
|
||||
++histogram[utf8_pos][c];
|
||||
++in_window_utf8[utf8_pos];
|
||||
utf8_pos = UTF8Position(last_c, c, max_utf8);
|
||||
last_c = c;
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute bit costs with sliding window. */
|
||||
for (i = 0; i < len; ++i) {
|
||||
if (i >= window_half) {
|
||||
/* Remove a byte in the past. */
|
||||
size_t c =
|
||||
i < window_half + 1 ? 0 : data[(pos + i - window_half - 1) & mask];
|
||||
size_t last_c =
|
||||
i < window_half + 2 ? 0 : data[(pos + i - window_half - 2) & mask];
|
||||
size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
|
||||
--histogram[utf8_pos2][data[(pos + i - window_half) & mask]];
|
||||
--in_window_utf8[utf8_pos2];
|
||||
}
|
||||
if (i + window_half < len) {
|
||||
/* Add a byte in the future. */
|
||||
size_t c = data[(pos + i + window_half - 1) & mask];
|
||||
size_t last_c = data[(pos + i + window_half - 2) & mask];
|
||||
size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
|
||||
++histogram[utf8_pos2][data[(pos + i + window_half) & mask]];
|
||||
++in_window_utf8[utf8_pos2];
|
||||
}
|
||||
{
|
||||
size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
|
||||
size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
|
||||
size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
|
||||
size_t masked_pos = (pos + i) & mask;
|
||||
size_t histo = histogram[utf8_pos][data[masked_pos]];
|
||||
double lit_cost;
|
||||
if (histo == 0) {
|
||||
histo = 1;
|
||||
}
|
||||
lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
|
||||
lit_cost += 0.02905;
|
||||
if (lit_cost < 1.0) {
|
||||
lit_cost *= 0.5;
|
||||
lit_cost += 0.5;
|
||||
}
|
||||
/* Make the first bytes more expensive -- seems to help, not sure why.
|
||||
Perhaps because the entropy source is changing its properties
|
||||
rapidly in the beginning of the file, perhaps because the beginning
|
||||
of the data is a statistical "anomaly". */
|
||||
if (i < 2000) {
|
||||
lit_cost += 0.7 - ((double)(2000 - i) / 2000.0 * 0.35);
|
||||
}
|
||||
cost[i] = (float)lit_cost;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BrotliEstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
|
||||
const uint8_t *data, float *cost) {
|
||||
if (BrotliIsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
|
||||
EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
|
||||
return;
|
||||
} else {
|
||||
size_t histogram[256] = { 0 };
|
||||
size_t window_half = 2000;
|
||||
size_t in_window = BROTLI_MIN(size_t, window_half, len);
|
||||
|
||||
/* Bootstrap histogram. */
|
||||
size_t i;
|
||||
for (i = 0; i < in_window; ++i) {
|
||||
++histogram[data[(pos + i) & mask]];
|
||||
}
|
||||
|
||||
/* Compute bit costs with sliding window. */
|
||||
for (i = 0; i < len; ++i) {
|
||||
size_t histo;
|
||||
if (i >= window_half) {
|
||||
/* Remove a byte in the past. */
|
||||
--histogram[data[(pos + i - window_half) & mask]];
|
||||
--in_window;
|
||||
}
|
||||
if (i + window_half < len) {
|
||||
/* Add a byte in the future. */
|
||||
++histogram[data[(pos + i + window_half) & mask]];
|
||||
++in_window;
|
||||
}
|
||||
histo = histogram[data[(pos + i) & mask]];
|
||||
if (histo == 0) {
|
||||
histo = 1;
|
||||
}
|
||||
{
|
||||
double lit_cost = FastLog2(in_window) - FastLog2(histo);
|
||||
lit_cost += 0.029;
|
||||
if (lit_cost < 1.0) {
|
||||
lit_cost *= 0.5;
|
||||
lit_cost += 0.5;
|
||||
}
|
||||
cost[i] = (float)lit_cost;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
@@ -10,7 +10,7 @@
|
||||
#ifndef BROTLI_ENC_LITERAL_COST_H_
|
||||
#define BROTLI_ENC_LITERAL_COST_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
#include "../types.h"
|
||||
#include "./port.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
|
||||
@@ -1,181 +0,0 @@
|
||||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Algorithms for distributing the literals and commands of a metablock between
|
||||
block types and contexts. */
|
||||
|
||||
#include "./memory.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h> /* exit, free, malloc */
|
||||
#include <string.h> /* memcpy */
|
||||
|
||||
#include <brotli/types.h>
|
||||
#include "./port.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define MAX_PERM_ALLOCATED 128
|
||||
#define MAX_NEW_ALLOCATED 64
|
||||
#define MAX_NEW_FREED 64
|
||||
|
||||
#define PERM_ALLOCATED_OFFSET 0
|
||||
#define NEW_ALLOCATED_OFFSET MAX_PERM_ALLOCATED
|
||||
#define NEW_FREED_OFFSET (MAX_PERM_ALLOCATED + MAX_NEW_ALLOCATED)
|
||||
|
||||
static void* DefaultAllocFunc(void* opaque, size_t size) {
|
||||
BROTLI_UNUSED(opaque);
|
||||
return malloc(size);
|
||||
}
|
||||
|
||||
static void DefaultFreeFunc(void* opaque, void* address) {
|
||||
BROTLI_UNUSED(opaque);
|
||||
free(address);
|
||||
}
|
||||
|
||||
void BrotliInitMemoryManager(
|
||||
MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func,
|
||||
void* opaque) {
|
||||
if (!alloc_func) {
|
||||
m->alloc_func = DefaultAllocFunc;
|
||||
m->free_func = DefaultFreeFunc;
|
||||
m->opaque = 0;
|
||||
} else {
|
||||
m->alloc_func = alloc_func;
|
||||
m->free_func = free_func;
|
||||
m->opaque = opaque;
|
||||
}
|
||||
#if !defined(BROTLI_ENCODER_EXIT_ON_OOM)
|
||||
m->is_oom = BROTLI_FALSE;
|
||||
m->perm_allocated = 0;
|
||||
m->new_allocated = 0;
|
||||
m->new_freed = 0;
|
||||
#endif /* BROTLI_ENCODER_EXIT_ON_OOM */
|
||||
}
|
||||
|
||||
#if defined(BROTLI_ENCODER_EXIT_ON_OOM)
|
||||
|
||||
void* BrotliAllocate(MemoryManager* m, size_t n) {
|
||||
void* result = m->alloc_func(m->opaque, n);
|
||||
if (!result) exit(EXIT_FAILURE);
|
||||
return result;
|
||||
}
|
||||
|
||||
void BrotliFree(MemoryManager* m, void* p) {
|
||||
m->free_func(m->opaque, p);
|
||||
}
|
||||
|
||||
void BrotliWipeOutMemoryManager(MemoryManager* m) {
|
||||
BROTLI_UNUSED(m);
|
||||
}
|
||||
|
||||
#else /* BROTLI_ENCODER_EXIT_ON_OOM */
|
||||
|
||||
static void SortPointers(void** items, const size_t n) {
|
||||
/* Shell sort. */
|
||||
static const size_t gaps[] = {23, 10, 4, 1};
|
||||
int g = 0;
|
||||
for (; g < 4; ++g) {
|
||||
size_t gap = gaps[g];
|
||||
size_t i;
|
||||
for (i = gap; i < n; ++i) {
|
||||
size_t j = i;
|
||||
void* tmp = items[i];
|
||||
for (; j >= gap && tmp < items[j - gap]; j -= gap) {
|
||||
items[j] = items[j - gap];
|
||||
}
|
||||
items[j] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static size_t Annihilate(void** a, size_t a_len, void** b, size_t b_len) {
|
||||
size_t a_read_index = 0;
|
||||
size_t b_read_index = 0;
|
||||
size_t a_write_index = 0;
|
||||
size_t b_write_index = 0;
|
||||
size_t annihilated = 0;
|
||||
while (a_read_index < a_len && b_read_index < b_len) {
|
||||
if (a[a_read_index] == b[b_read_index]) {
|
||||
a_read_index++;
|
||||
b_read_index++;
|
||||
annihilated++;
|
||||
} else if (a[a_read_index] < b[b_read_index]) {
|
||||
a[a_write_index++] = a[a_read_index++];
|
||||
} else {
|
||||
b[b_write_index++] = b[b_read_index++];
|
||||
}
|
||||
}
|
||||
while (a_read_index < a_len) a[a_write_index++] = a[a_read_index++];
|
||||
while (b_read_index < b_len) b[b_write_index++] = b[b_read_index++];
|
||||
return annihilated;
|
||||
}
|
||||
|
||||
static void CollectGarbagePointers(MemoryManager* m) {
|
||||
size_t annihilated;
|
||||
SortPointers(m->pointers + NEW_ALLOCATED_OFFSET, m->new_allocated);
|
||||
SortPointers(m->pointers + NEW_FREED_OFFSET, m->new_freed);
|
||||
annihilated = Annihilate(
|
||||
m->pointers + NEW_ALLOCATED_OFFSET, m->new_allocated,
|
||||
m->pointers + NEW_FREED_OFFSET, m->new_freed);
|
||||
m->new_allocated -= annihilated;
|
||||
m->new_freed -= annihilated;
|
||||
|
||||
if (m->new_freed != 0) {
|
||||
annihilated = Annihilate(
|
||||
m->pointers + PERM_ALLOCATED_OFFSET, m->perm_allocated,
|
||||
m->pointers + NEW_FREED_OFFSET, m->new_freed);
|
||||
m->perm_allocated -= annihilated;
|
||||
m->new_freed -= annihilated;
|
||||
assert(m->new_freed == 0);
|
||||
}
|
||||
|
||||
if (m->new_allocated != 0) {
|
||||
assert(m->perm_allocated + m->new_allocated <= MAX_PERM_ALLOCATED);
|
||||
memcpy(m->pointers + PERM_ALLOCATED_OFFSET + m->perm_allocated,
|
||||
m->pointers + NEW_ALLOCATED_OFFSET,
|
||||
sizeof(void*) * m->new_allocated);
|
||||
m->perm_allocated += m->new_allocated;
|
||||
m->new_allocated = 0;
|
||||
SortPointers(m->pointers + PERM_ALLOCATED_OFFSET, m->perm_allocated);
|
||||
}
|
||||
}
|
||||
|
||||
void* BrotliAllocate(MemoryManager* m, size_t n) {
|
||||
void* result = m->alloc_func(m->opaque, n);
|
||||
if (!result) {
|
||||
m->is_oom = BROTLI_TRUE;
|
||||
return NULL;
|
||||
}
|
||||
if (m->new_allocated == MAX_NEW_ALLOCATED) CollectGarbagePointers(m);
|
||||
m->pointers[NEW_ALLOCATED_OFFSET + (m->new_allocated++)] = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
void BrotliFree(MemoryManager* m, void* p) {
|
||||
if (!p) return;
|
||||
m->free_func(m->opaque, p);
|
||||
if (m->new_freed == MAX_NEW_FREED) CollectGarbagePointers(m);
|
||||
m->pointers[NEW_FREED_OFFSET + (m->new_freed++)] = p;
|
||||
}
|
||||
|
||||
void BrotliWipeOutMemoryManager(MemoryManager* m) {
|
||||
size_t i;
|
||||
CollectGarbagePointers(m);
|
||||
/* Now all unfreed pointers are in perm-allocated list. */
|
||||
for (i = 0; i < m->perm_allocated; ++i) {
|
||||
m->free_func(m->opaque, m->pointers[PERM_ALLOCATED_OFFSET + i]);
|
||||
}
|
||||
m->perm_allocated = 0;
|
||||
}
|
||||
|
||||
#endif /* BROTLI_ENCODER_EXIT_ON_OOM */
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
@@ -9,7 +9,7 @@
|
||||
#ifndef BROTLI_ENC_MEMORY_H_
|
||||
#define BROTLI_ENC_MEMORY_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
#include "../types.h"
|
||||
#include "./port.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
|
||||
@@ -1,528 +0,0 @@
|
||||
/* Copyright 2015 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Algorithms for distributing the literals and commands of a metablock between
|
||||
block types and contexts. */
|
||||
|
||||
#include "./metablock.h"
|
||||
|
||||
#include "../common/constants.h"
|
||||
#include <brotli/types.h>
|
||||
#include "./bit_cost.h"
|
||||
#include "./block_splitter.h"
|
||||
#include "./cluster.h"
|
||||
#include "./context.h"
|
||||
#include "./entropy_encode.h"
|
||||
#include "./histogram.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
#include "./quality.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void BrotliBuildMetaBlock(MemoryManager* m,
|
||||
const uint8_t* ringbuffer,
|
||||
const size_t pos,
|
||||
const size_t mask,
|
||||
const BrotliEncoderParams* params,
|
||||
uint8_t prev_byte,
|
||||
uint8_t prev_byte2,
|
||||
const Command* cmds,
|
||||
size_t num_commands,
|
||||
ContextType literal_context_mode,
|
||||
MetaBlockSplit* mb) {
|
||||
/* Histogram ids need to fit in one byte. */
|
||||
static const size_t kMaxNumberOfHistograms = 256;
|
||||
HistogramDistance* distance_histograms;
|
||||
HistogramLiteral* literal_histograms;
|
||||
ContextType* literal_context_modes = NULL;
|
||||
size_t literal_histograms_size;
|
||||
size_t distance_histograms_size;
|
||||
size_t i;
|
||||
size_t literal_context_multiplier = 1;
|
||||
|
||||
BrotliSplitBlock(m, cmds, num_commands,
|
||||
ringbuffer, pos, mask, params,
|
||||
&mb->literal_split,
|
||||
&mb->command_split,
|
||||
&mb->distance_split);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
|
||||
if (!params->disable_literal_context_modeling) {
|
||||
literal_context_multiplier = 1 << BROTLI_LITERAL_CONTEXT_BITS;
|
||||
literal_context_modes =
|
||||
BROTLI_ALLOC(m, ContextType, mb->literal_split.num_types);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
for (i = 0; i < mb->literal_split.num_types; ++i) {
|
||||
literal_context_modes[i] = literal_context_mode;
|
||||
}
|
||||
}
|
||||
|
||||
literal_histograms_size =
|
||||
mb->literal_split.num_types * literal_context_multiplier;
|
||||
literal_histograms =
|
||||
BROTLI_ALLOC(m, HistogramLiteral, literal_histograms_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
ClearHistogramsLiteral(literal_histograms, literal_histograms_size);
|
||||
|
||||
distance_histograms_size =
|
||||
mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
|
||||
distance_histograms =
|
||||
BROTLI_ALLOC(m, HistogramDistance, distance_histograms_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
ClearHistogramsDistance(distance_histograms, distance_histograms_size);
|
||||
|
||||
assert(mb->command_histograms == 0);
|
||||
mb->command_histograms_size = mb->command_split.num_types;
|
||||
mb->command_histograms =
|
||||
BROTLI_ALLOC(m, HistogramCommand, mb->command_histograms_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
ClearHistogramsCommand(mb->command_histograms, mb->command_histograms_size);
|
||||
|
||||
BrotliBuildHistogramsWithContext(cmds, num_commands,
|
||||
&mb->literal_split, &mb->command_split, &mb->distance_split,
|
||||
ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_modes,
|
||||
literal_histograms, mb->command_histograms, distance_histograms);
|
||||
BROTLI_FREE(m, literal_context_modes);
|
||||
|
||||
assert(mb->literal_context_map == 0);
|
||||
mb->literal_context_map_size =
|
||||
mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
|
||||
mb->literal_context_map =
|
||||
BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
|
||||
assert(mb->literal_histograms == 0);
|
||||
mb->literal_histograms_size = mb->literal_context_map_size;
|
||||
mb->literal_histograms =
|
||||
BROTLI_ALLOC(m, HistogramLiteral, mb->literal_histograms_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
|
||||
BrotliClusterHistogramsLiteral(m, literal_histograms, literal_histograms_size,
|
||||
kMaxNumberOfHistograms, mb->literal_histograms,
|
||||
&mb->literal_histograms_size, mb->literal_context_map);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
BROTLI_FREE(m, literal_histograms);
|
||||
|
||||
if (params->disable_literal_context_modeling) {
|
||||
/* Distribute assignment to all contexts. */
|
||||
for (i = mb->literal_split.num_types; i != 0;) {
|
||||
size_t j = 0;
|
||||
i--;
|
||||
for (; j < (1 << BROTLI_LITERAL_CONTEXT_BITS); j++) {
|
||||
mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] =
|
||||
mb->literal_context_map[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert(mb->distance_context_map == 0);
|
||||
mb->distance_context_map_size =
|
||||
mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
|
||||
mb->distance_context_map =
|
||||
BROTLI_ALLOC(m, uint32_t, mb->distance_context_map_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
|
||||
assert(mb->distance_histograms == 0);
|
||||
mb->distance_histograms_size = mb->distance_context_map_size;
|
||||
mb->distance_histograms =
|
||||
BROTLI_ALLOC(m, HistogramDistance, mb->distance_histograms_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
|
||||
BrotliClusterHistogramsDistance(m, distance_histograms,
|
||||
mb->distance_context_map_size,
|
||||
kMaxNumberOfHistograms,
|
||||
mb->distance_histograms,
|
||||
&mb->distance_histograms_size,
|
||||
mb->distance_context_map);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
BROTLI_FREE(m, distance_histograms);
|
||||
}
|
||||
|
||||
#define FN(X) X ## Literal
|
||||
#include "./metablock_inc.h" /* NOLINT(build/include) */
|
||||
#undef FN
|
||||
|
||||
#define FN(X) X ## Command
|
||||
#include "./metablock_inc.h" /* NOLINT(build/include) */
|
||||
#undef FN
|
||||
|
||||
#define FN(X) X ## Distance
|
||||
#include "./metablock_inc.h" /* NOLINT(build/include) */
|
||||
#undef FN
|
||||
|
||||
#define BROTLI_MAX_STATIC_CONTEXTS 13
|
||||
|
||||
/* Greedy block splitter for one block category (literal, command or distance).
|
||||
Gathers histograms for all context buckets. */
|
||||
typedef struct ContextBlockSplitter {
|
||||
/* Alphabet size of particular block category. */
|
||||
size_t alphabet_size_;
|
||||
size_t num_contexts_;
|
||||
size_t max_block_types_;
|
||||
/* We collect at least this many symbols for each block. */
|
||||
size_t min_block_size_;
|
||||
/* We merge histograms A and B if
|
||||
entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
|
||||
where A is the current histogram and B is the histogram of the last or the
|
||||
second last block type. */
|
||||
double split_threshold_;
|
||||
|
||||
size_t num_blocks_;
|
||||
BlockSplit* split_; /* not owned */
|
||||
HistogramLiteral* histograms_; /* not owned */
|
||||
size_t* histograms_size_; /* not owned */
|
||||
|
||||
/* The number of symbols that we want to collect before deciding on whether
|
||||
or not to merge the block with a previous one or emit a new block. */
|
||||
size_t target_block_size_;
|
||||
/* The number of symbols in the current histogram. */
|
||||
size_t block_size_;
|
||||
/* Offset of the current histogram. */
|
||||
size_t curr_histogram_ix_;
|
||||
/* Offset of the histograms of the previous two block types. */
|
||||
size_t last_histogram_ix_[2];
|
||||
/* Entropy of the previous two block types. */
|
||||
double last_entropy_[2 * BROTLI_MAX_STATIC_CONTEXTS];
|
||||
/* The number of times we merged the current block with the last one. */
|
||||
size_t merge_last_count_;
|
||||
} ContextBlockSplitter;
|
||||
|
||||
static void InitContextBlockSplitter(
|
||||
MemoryManager* m, ContextBlockSplitter* self, size_t alphabet_size,
|
||||
size_t num_contexts, size_t min_block_size, double split_threshold,
|
||||
size_t num_symbols, BlockSplit* split, HistogramLiteral** histograms,
|
||||
size_t* histograms_size) {
|
||||
size_t max_num_blocks = num_symbols / min_block_size + 1;
|
||||
size_t max_num_types;
|
||||
assert(num_contexts <= BROTLI_MAX_STATIC_CONTEXTS);
|
||||
|
||||
self->alphabet_size_ = alphabet_size;
|
||||
self->num_contexts_ = num_contexts;
|
||||
self->max_block_types_ = BROTLI_MAX_NUMBER_OF_BLOCK_TYPES / num_contexts;
|
||||
self->min_block_size_ = min_block_size;
|
||||
self->split_threshold_ = split_threshold;
|
||||
self->num_blocks_ = 0;
|
||||
self->split_ = split;
|
||||
self->histograms_size_ = histograms_size;
|
||||
self->target_block_size_ = min_block_size;
|
||||
self->block_size_ = 0;
|
||||
self->curr_histogram_ix_ = 0;
|
||||
self->merge_last_count_ = 0;
|
||||
|
||||
/* We have to allocate one more histogram than the maximum number of block
|
||||
types for the current histogram when the meta-block is too big. */
|
||||
max_num_types =
|
||||
BROTLI_MIN(size_t, max_num_blocks, self->max_block_types_ + 1);
|
||||
BROTLI_ENSURE_CAPACITY(m, uint8_t,
|
||||
split->types, split->types_alloc_size, max_num_blocks);
|
||||
BROTLI_ENSURE_CAPACITY(m, uint32_t,
|
||||
split->lengths, split->lengths_alloc_size, max_num_blocks);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
split->num_blocks = max_num_blocks;
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
assert(*histograms == 0);
|
||||
*histograms_size = max_num_types * num_contexts;
|
||||
*histograms = BROTLI_ALLOC(m, HistogramLiteral, *histograms_size);
|
||||
self->histograms_ = *histograms;
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
/* Clear only current histogram. */
|
||||
ClearHistogramsLiteral(&self->histograms_[0], num_contexts);
|
||||
self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
|
||||
}
|
||||
|
||||
/* Does either of three things:
|
||||
(1) emits the current block with a new block type;
|
||||
(2) emits the current block with the type of the second last block;
|
||||
(3) merges the current block with the last block. */
|
||||
static void ContextBlockSplitterFinishBlock(
|
||||
ContextBlockSplitter* self, MemoryManager* m, BROTLI_BOOL is_final) {
|
||||
BlockSplit* split = self->split_;
|
||||
const size_t num_contexts = self->num_contexts_;
|
||||
double* last_entropy = self->last_entropy_;
|
||||
HistogramLiteral* histograms = self->histograms_;
|
||||
|
||||
if (self->block_size_ < self->min_block_size_) {
|
||||
self->block_size_ = self->min_block_size_;
|
||||
}
|
||||
if (self->num_blocks_ == 0) {
|
||||
size_t i;
|
||||
/* Create first block. */
|
||||
split->lengths[0] = (uint32_t)self->block_size_;
|
||||
split->types[0] = 0;
|
||||
|
||||
for (i = 0; i < num_contexts; ++i) {
|
||||
last_entropy[i] =
|
||||
BitsEntropy(histograms[i].data_, self->alphabet_size_);
|
||||
last_entropy[num_contexts + i] = last_entropy[i];
|
||||
}
|
||||
++self->num_blocks_;
|
||||
++split->num_types;
|
||||
self->curr_histogram_ix_ += num_contexts;
|
||||
if (self->curr_histogram_ix_ < *self->histograms_size_) {
|
||||
ClearHistogramsLiteral(
|
||||
&self->histograms_[self->curr_histogram_ix_], self->num_contexts_);
|
||||
}
|
||||
self->block_size_ = 0;
|
||||
} else if (self->block_size_ > 0) {
|
||||
/* Try merging the set of histograms for the current block type with the
|
||||
respective set of histograms for the last and second last block types.
|
||||
Decide over the split based on the total reduction of entropy across
|
||||
all contexts. */
|
||||
double entropy[BROTLI_MAX_STATIC_CONTEXTS];
|
||||
HistogramLiteral* combined_histo =
|
||||
BROTLI_ALLOC(m, HistogramLiteral, 2 * num_contexts);
|
||||
double combined_entropy[2 * BROTLI_MAX_STATIC_CONTEXTS];
|
||||
double diff[2] = { 0.0 };
|
||||
size_t i;
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
for (i = 0; i < num_contexts; ++i) {
|
||||
size_t curr_histo_ix = self->curr_histogram_ix_ + i;
|
||||
size_t j;
|
||||
entropy[i] = BitsEntropy(histograms[curr_histo_ix].data_,
|
||||
self->alphabet_size_);
|
||||
for (j = 0; j < 2; ++j) {
|
||||
size_t jx = j * num_contexts + i;
|
||||
size_t last_histogram_ix = self->last_histogram_ix_[j] + i;
|
||||
combined_histo[jx] = histograms[curr_histo_ix];
|
||||
HistogramAddHistogramLiteral(&combined_histo[jx],
|
||||
&histograms[last_histogram_ix]);
|
||||
combined_entropy[jx] = BitsEntropy(
|
||||
&combined_histo[jx].data_[0], self->alphabet_size_);
|
||||
diff[j] += combined_entropy[jx] - entropy[i] - last_entropy[jx];
|
||||
}
|
||||
}
|
||||
|
||||
if (split->num_types < self->max_block_types_ &&
|
||||
diff[0] > self->split_threshold_ &&
|
||||
diff[1] > self->split_threshold_) {
|
||||
/* Create new block. */
|
||||
split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
|
||||
split->types[self->num_blocks_] = (uint8_t)split->num_types;
|
||||
self->last_histogram_ix_[1] = self->last_histogram_ix_[0];
|
||||
self->last_histogram_ix_[0] = split->num_types * num_contexts;
|
||||
for (i = 0; i < num_contexts; ++i) {
|
||||
last_entropy[num_contexts + i] = last_entropy[i];
|
||||
last_entropy[i] = entropy[i];
|
||||
}
|
||||
++self->num_blocks_;
|
||||
++split->num_types;
|
||||
self->curr_histogram_ix_ += num_contexts;
|
||||
if (self->curr_histogram_ix_ < *self->histograms_size_) {
|
||||
ClearHistogramsLiteral(
|
||||
&self->histograms_[self->curr_histogram_ix_], self->num_contexts_);
|
||||
}
|
||||
self->block_size_ = 0;
|
||||
self->merge_last_count_ = 0;
|
||||
self->target_block_size_ = self->min_block_size_;
|
||||
} else if (diff[1] < diff[0] - 20.0) {
|
||||
/* Combine this block with second last block. */
|
||||
split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
|
||||
split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
|
||||
BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
|
||||
for (i = 0; i < num_contexts; ++i) {
|
||||
histograms[self->last_histogram_ix_[0] + i] =
|
||||
combined_histo[num_contexts + i];
|
||||
last_entropy[num_contexts + i] = last_entropy[i];
|
||||
last_entropy[i] = combined_entropy[num_contexts + i];
|
||||
HistogramClearLiteral(&histograms[self->curr_histogram_ix_ + i]);
|
||||
}
|
||||
++self->num_blocks_;
|
||||
self->block_size_ = 0;
|
||||
self->merge_last_count_ = 0;
|
||||
self->target_block_size_ = self->min_block_size_;
|
||||
} else {
|
||||
/* Combine this block with last block. */
|
||||
split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
|
||||
for (i = 0; i < num_contexts; ++i) {
|
||||
histograms[self->last_histogram_ix_[0] + i] = combined_histo[i];
|
||||
last_entropy[i] = combined_entropy[i];
|
||||
if (split->num_types == 1) {
|
||||
last_entropy[num_contexts + i] = last_entropy[i];
|
||||
}
|
||||
HistogramClearLiteral(&histograms[self->curr_histogram_ix_ + i]);
|
||||
}
|
||||
self->block_size_ = 0;
|
||||
if (++self->merge_last_count_ > 1) {
|
||||
self->target_block_size_ += self->min_block_size_;
|
||||
}
|
||||
}
|
||||
BROTLI_FREE(m, combined_histo);
|
||||
}
|
||||
if (is_final) {
|
||||
*self->histograms_size_ = split->num_types * num_contexts;
|
||||
split->num_blocks = self->num_blocks_;
|
||||
}
|
||||
}
|
||||
|
||||
/* Adds the next symbol to the current block type and context. When the
|
||||
current block reaches the target size, decides on merging the block. */
|
||||
static void ContextBlockSplitterAddSymbol(
|
||||
ContextBlockSplitter* self, MemoryManager* m,
|
||||
size_t symbol, size_t context) {
|
||||
HistogramAddLiteral(&self->histograms_[self->curr_histogram_ix_ + context],
|
||||
symbol);
|
||||
++self->block_size_;
|
||||
if (self->block_size_ == self->target_block_size_) {
|
||||
ContextBlockSplitterFinishBlock(self, m, /* is_final = */ BROTLI_FALSE);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
}
|
||||
}
|
||||
|
||||
static void MapStaticContexts(MemoryManager* m,
|
||||
size_t num_contexts,
|
||||
const uint32_t* static_context_map,
|
||||
MetaBlockSplit* mb) {
|
||||
size_t i;
|
||||
assert(mb->literal_context_map == 0);
|
||||
mb->literal_context_map_size =
|
||||
mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
|
||||
mb->literal_context_map =
|
||||
BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
|
||||
for (i = 0; i < mb->literal_split.num_types; ++i) {
|
||||
uint32_t offset = (uint32_t)(i * num_contexts);
|
||||
size_t j;
|
||||
for (j = 0; j < (1u << BROTLI_LITERAL_CONTEXT_BITS); ++j) {
|
||||
mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] =
|
||||
offset + static_context_map[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal(
|
||||
MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
|
||||
uint8_t prev_byte, uint8_t prev_byte2, ContextType literal_context_mode,
|
||||
const size_t num_contexts, const uint32_t* static_context_map,
|
||||
const Command *commands, size_t n_commands, MetaBlockSplit* mb) {
|
||||
union {
|
||||
BlockSplitterLiteral plain;
|
||||
ContextBlockSplitter ctx;
|
||||
} lit_blocks;
|
||||
BlockSplitterCommand cmd_blocks;
|
||||
BlockSplitterDistance dist_blocks;
|
||||
size_t num_literals = 0;
|
||||
size_t i;
|
||||
for (i = 0; i < n_commands; ++i) {
|
||||
num_literals += commands[i].insert_len_;
|
||||
}
|
||||
|
||||
if (num_contexts == 1) {
|
||||
InitBlockSplitterLiteral(m, &lit_blocks.plain, 256, 512, 400.0,
|
||||
num_literals, &mb->literal_split, &mb->literal_histograms,
|
||||
&mb->literal_histograms_size);
|
||||
} else {
|
||||
InitContextBlockSplitter(m, &lit_blocks.ctx, 256, num_contexts, 512, 400.0,
|
||||
num_literals, &mb->literal_split, &mb->literal_histograms,
|
||||
&mb->literal_histograms_size);
|
||||
}
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
InitBlockSplitterCommand(m, &cmd_blocks, BROTLI_NUM_COMMAND_SYMBOLS, 1024,
|
||||
500.0, n_commands, &mb->command_split, &mb->command_histograms,
|
||||
&mb->command_histograms_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
InitBlockSplitterDistance(m, &dist_blocks, 64, 512, 100.0, n_commands,
|
||||
&mb->distance_split, &mb->distance_histograms,
|
||||
&mb->distance_histograms_size);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
|
||||
for (i = 0; i < n_commands; ++i) {
|
||||
const Command cmd = commands[i];
|
||||
size_t j;
|
||||
BlockSplitterAddSymbolCommand(&cmd_blocks, cmd.cmd_prefix_);
|
||||
for (j = cmd.insert_len_; j != 0; --j) {
|
||||
uint8_t literal = ringbuffer[pos & mask];
|
||||
if (num_contexts == 1) {
|
||||
BlockSplitterAddSymbolLiteral(&lit_blocks.plain, literal);
|
||||
} else {
|
||||
size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
|
||||
ContextBlockSplitterAddSymbol(&lit_blocks.ctx, m, literal,
|
||||
static_context_map[context]);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
}
|
||||
prev_byte2 = prev_byte;
|
||||
prev_byte = literal;
|
||||
++pos;
|
||||
}
|
||||
pos += CommandCopyLen(&cmd);
|
||||
if (CommandCopyLen(&cmd)) {
|
||||
prev_byte2 = ringbuffer[(pos - 2) & mask];
|
||||
prev_byte = ringbuffer[(pos - 1) & mask];
|
||||
if (cmd.cmd_prefix_ >= 128) {
|
||||
BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (num_contexts == 1) {
|
||||
BlockSplitterFinishBlockLiteral(
|
||||
&lit_blocks.plain, /* is_final = */ BROTLI_TRUE);
|
||||
} else {
|
||||
ContextBlockSplitterFinishBlock(
|
||||
&lit_blocks.ctx, m, /* is_final = */ BROTLI_TRUE);
|
||||
if (BROTLI_IS_OOM(m)) return;
|
||||
}
|
||||
BlockSplitterFinishBlockCommand(&cmd_blocks, /* is_final = */ BROTLI_TRUE);
|
||||
BlockSplitterFinishBlockDistance(&dist_blocks, /* is_final = */ BROTLI_TRUE);
|
||||
|
||||
if (num_contexts > 1) {
|
||||
MapStaticContexts(m, num_contexts, static_context_map, mb);
|
||||
}
|
||||
}
|
||||
|
||||
void BrotliBuildMetaBlockGreedy(MemoryManager* m,
|
||||
const uint8_t* ringbuffer,
|
||||
size_t pos,
|
||||
size_t mask,
|
||||
uint8_t prev_byte,
|
||||
uint8_t prev_byte2,
|
||||
ContextType literal_context_mode,
|
||||
size_t num_contexts,
|
||||
const uint32_t* static_context_map,
|
||||
const Command* commands,
|
||||
size_t n_commands,
|
||||
MetaBlockSplit* mb) {
|
||||
if (num_contexts == 1) {
|
||||
BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte,
|
||||
prev_byte2, literal_context_mode, 1, NULL, commands, n_commands, mb);
|
||||
} else {
|
||||
BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte,
|
||||
prev_byte2, literal_context_mode, num_contexts, static_context_map,
|
||||
commands, n_commands, mb);
|
||||
}
|
||||
}
|
||||
|
||||
void BrotliOptimizeHistograms(size_t num_direct_distance_codes,
|
||||
size_t distance_postfix_bits,
|
||||
MetaBlockSplit* mb) {
|
||||
uint8_t good_for_rle[BROTLI_NUM_COMMAND_SYMBOLS];
|
||||
size_t num_distance_codes;
|
||||
size_t i;
|
||||
for (i = 0; i < mb->literal_histograms_size; ++i) {
|
||||
BrotliOptimizeHuffmanCountsForRle(256, mb->literal_histograms[i].data_,
|
||||
good_for_rle);
|
||||
}
|
||||
for (i = 0; i < mb->command_histograms_size; ++i) {
|
||||
BrotliOptimizeHuffmanCountsForRle(BROTLI_NUM_COMMAND_SYMBOLS,
|
||||
mb->command_histograms[i].data_,
|
||||
good_for_rle);
|
||||
}
|
||||
num_distance_codes = BROTLI_NUM_DISTANCE_SHORT_CODES +
|
||||
num_direct_distance_codes +
|
||||
((2 * BROTLI_MAX_DISTANCE_BITS) << distance_postfix_bits);
|
||||
for (i = 0; i < mb->distance_histograms_size; ++i) {
|
||||
BrotliOptimizeHuffmanCountsForRle(num_distance_codes,
|
||||
mb->distance_histograms[i].data_,
|
||||
good_for_rle);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
@@ -10,7 +10,7 @@
|
||||
#ifndef BROTLI_ENC_METABLOCK_H_
|
||||
#define BROTLI_ENC_METABLOCK_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
#include "../types.h"
|
||||
#include "./block_splitter.h"
|
||||
#include "./command.h"
|
||||
#include "./context.h"
|
||||
|
||||
@@ -12,8 +12,8 @@
|
||||
#include <assert.h>
|
||||
#include <string.h> /* memcpy */
|
||||
|
||||
#include <brotli/port.h>
|
||||
#include <brotli/types.h>
|
||||
#include "../port.h"
|
||||
#include "../types.h"
|
||||
|
||||
#if defined OS_LINUX || defined OS_CYGWIN
|
||||
#include <endian.h>
|
||||
|
||||
@@ -11,8 +11,8 @@
|
||||
#define BROTLI_ENC_PREFIX_H_
|
||||
|
||||
#include "../common/constants.h"
|
||||
#include <brotli/port.h>
|
||||
#include <brotli/types.h>
|
||||
#include "../port.h"
|
||||
#include "../types.h"
|
||||
#include "./fast_log.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
#ifndef BROTLI_ENC_QUALITY_H_
|
||||
#define BROTLI_ENC_QUALITY_H_
|
||||
|
||||
#include <brotli/encode.h>
|
||||
#include "../encode.h"
|
||||
|
||||
#define FAST_ONE_PASS_COMPRESSION_QUALITY 0
|
||||
#define FAST_TWO_PASS_COMPRESSION_QUALITY 1
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
|
||||
#include <string.h> /* memcpy */
|
||||
|
||||
#include <brotli/types.h>
|
||||
#include "../types.h"
|
||||
#include "./memory.h"
|
||||
#include "./port.h"
|
||||
#include "./quality.h"
|
||||
|
||||
@@ -1,482 +0,0 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
#include "./static_dict.h"
|
||||
|
||||
#include "../common/dictionary.h"
|
||||
#include "./find_match_length.h"
|
||||
#include "./port.h"
|
||||
#include "./static_dict_lut.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static const uint8_t kUppercaseFirst = 10;
|
||||
static const uint8_t kOmitLastNTransforms[10] = {
|
||||
0, 12, 27, 23, 42, 63, 56, 48, 59, 64,
|
||||
};
|
||||
|
||||
static BROTLI_INLINE uint32_t Hash(const uint8_t *data) {
|
||||
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32;
|
||||
/* The higher bits contain more mixture from the multiplication,
|
||||
so we take our results from there. */
|
||||
return h >> (32 - kDictNumBits);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code,
|
||||
uint32_t* matches) {
|
||||
uint32_t match = (uint32_t)((distance << 5) + len_code);
|
||||
matches[len] = BROTLI_MIN(uint32_t, matches[len], match);
|
||||
}
|
||||
|
||||
static BROTLI_INLINE size_t DictMatchLength(const BrotliDictionary* dictionary,
|
||||
const uint8_t* data,
|
||||
size_t id,
|
||||
size_t len,
|
||||
size_t maxlen) {
|
||||
const size_t offset = dictionary->offsets_by_length[len] + len * id;
|
||||
return FindMatchLengthWithLimit(&dictionary->data[offset], data,
|
||||
BROTLI_MIN(size_t, len, maxlen));
|
||||
}
|
||||
|
||||
static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary,
|
||||
DictWord w, const uint8_t* data, size_t max_length) {
|
||||
if (w.len > max_length) {
|
||||
return BROTLI_FALSE;
|
||||
} else {
|
||||
const size_t offset = dictionary->offsets_by_length[w.len] +
|
||||
(size_t)w.len * (size_t)w.idx;
|
||||
const uint8_t* dict = &dictionary->data[offset];
|
||||
if (w.transform == 0) {
|
||||
/* Match against base dictionary word. */
|
||||
return
|
||||
TO_BROTLI_BOOL(FindMatchLengthWithLimit(dict, data, w.len) == w.len);
|
||||
} else if (w.transform == 10) {
|
||||
/* Match against uppercase first transform.
|
||||
Note that there are only ASCII uppercase words in the lookup table. */
|
||||
return TO_BROTLI_BOOL(dict[0] >= 'a' && dict[0] <= 'z' &&
|
||||
(dict[0] ^ 32) == data[0] &&
|
||||
FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
|
||||
w.len - 1u);
|
||||
} else {
|
||||
/* Match against uppercase all transform.
|
||||
Note that there are only ASCII uppercase words in the lookup table. */
|
||||
size_t i;
|
||||
for (i = 0; i < w.len; ++i) {
|
||||
if (dict[i] >= 'a' && dict[i] <= 'z') {
|
||||
if ((dict[i] ^ 32) != data[i]) return BROTLI_FALSE;
|
||||
} else {
|
||||
if (dict[i] != data[i]) return BROTLI_FALSE;
|
||||
}
|
||||
}
|
||||
return BROTLI_TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
|
||||
const BrotliDictionary* dictionary, const uint8_t* data, size_t min_length,
|
||||
size_t max_length, uint32_t* matches) {
|
||||
BROTLI_BOOL has_found_match = BROTLI_FALSE;
|
||||
{
|
||||
size_t offset = kStaticDictionaryBuckets[Hash(data)];
|
||||
BROTLI_BOOL end = !offset;
|
||||
while (!end) {
|
||||
DictWord w = kStaticDictionaryWords[offset++];
|
||||
const size_t l = w.len & 0x1F;
|
||||
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
|
||||
const size_t id = w.idx;
|
||||
end = !!(w.len & 0x80);
|
||||
w.len = (uint8_t)l;
|
||||
if (w.transform == 0) {
|
||||
const size_t matchlen =
|
||||
DictMatchLength(dictionary, data, id, l, max_length);
|
||||
const uint8_t* s;
|
||||
size_t minlen;
|
||||
size_t maxlen;
|
||||
size_t len;
|
||||
/* Transform "" + kIdentity + "" */
|
||||
if (matchlen == l) {
|
||||
AddMatch(id, l, l, matches);
|
||||
has_found_match = BROTLI_TRUE;
|
||||
}
|
||||
/* Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing " */
|
||||
if (matchlen >= l - 1) {
|
||||
AddMatch(id + 12 * n, l - 1, l, matches);
|
||||
if (l + 2 < max_length &&
|
||||
data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' &&
|
||||
data[l + 2] == ' ') {
|
||||
AddMatch(id + 49 * n, l + 3, l, matches);
|
||||
}
|
||||
has_found_match = BROTLI_TRUE;
|
||||
}
|
||||
/* Transform "" + kOmitLastN + "" (N = 2 .. 9) */
|
||||
minlen = min_length;
|
||||
if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);
|
||||
maxlen = BROTLI_MIN(size_t, matchlen, l - 2);
|
||||
for (len = minlen; len <= maxlen; ++len) {
|
||||
AddMatch(id + kOmitLastNTransforms[l - len] * n, len, l, matches);
|
||||
has_found_match = BROTLI_TRUE;
|
||||
}
|
||||
if (matchlen < l || l + 6 >= max_length) {
|
||||
continue;
|
||||
}
|
||||
s = &data[l];
|
||||
/* Transforms "" + kIdentity + <suffix> */
|
||||
if (s[0] == ' ') {
|
||||
AddMatch(id + n, l + 1, l, matches);
|
||||
if (s[1] == 'a') {
|
||||
if (s[2] == ' ') {
|
||||
AddMatch(id + 28 * n, l + 3, l, matches);
|
||||
} else if (s[2] == 's') {
|
||||
if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches);
|
||||
} else if (s[2] == 't') {
|
||||
if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches);
|
||||
} else if (s[2] == 'n') {
|
||||
if (s[3] == 'd' && s[4] == ' ') {
|
||||
AddMatch(id + 10 * n, l + 5, l, matches);
|
||||
}
|
||||
}
|
||||
} else if (s[1] == 'b') {
|
||||
if (s[2] == 'y' && s[3] == ' ') {
|
||||
AddMatch(id + 38 * n, l + 4, l, matches);
|
||||
}
|
||||
} else if (s[1] == 'i') {
|
||||
if (s[2] == 'n') {
|
||||
if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);
|
||||
} else if (s[2] == 's') {
|
||||
if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches);
|
||||
}
|
||||
} else if (s[1] == 'f') {
|
||||
if (s[2] == 'o') {
|
||||
if (s[3] == 'r' && s[4] == ' ') {
|
||||
AddMatch(id + 25 * n, l + 5, l, matches);
|
||||
}
|
||||
} else if (s[2] == 'r') {
|
||||
if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') {
|
||||
AddMatch(id + 37 * n, l + 6, l, matches);
|
||||
}
|
||||
}
|
||||
} else if (s[1] == 'o') {
|
||||
if (s[2] == 'f') {
|
||||
if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches);
|
||||
} else if (s[2] == 'n') {
|
||||
if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches);
|
||||
}
|
||||
} else if (s[1] == 'n') {
|
||||
if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') {
|
||||
AddMatch(id + 80 * n, l + 5, l, matches);
|
||||
}
|
||||
} else if (s[1] == 't') {
|
||||
if (s[2] == 'h') {
|
||||
if (s[3] == 'e') {
|
||||
if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches);
|
||||
} else if (s[3] == 'a') {
|
||||
if (s[4] == 't' && s[5] == ' ') {
|
||||
AddMatch(id + 29 * n, l + 6, l, matches);
|
||||
}
|
||||
}
|
||||
} else if (s[2] == 'o') {
|
||||
if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches);
|
||||
}
|
||||
} else if (s[1] == 'w') {
|
||||
if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') {
|
||||
AddMatch(id + 35 * n, l + 6, l, matches);
|
||||
}
|
||||
}
|
||||
} else if (s[0] == '"') {
|
||||
AddMatch(id + 19 * n, l + 1, l, matches);
|
||||
if (s[1] == '>') {
|
||||
AddMatch(id + 21 * n, l + 2, l, matches);
|
||||
}
|
||||
} else if (s[0] == '.') {
|
||||
AddMatch(id + 20 * n, l + 1, l, matches);
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + 31 * n, l + 2, l, matches);
|
||||
if (s[2] == 'T' && s[3] == 'h') {
|
||||
if (s[4] == 'e') {
|
||||
if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches);
|
||||
} else if (s[4] == 'i') {
|
||||
if (s[5] == 's' && s[6] == ' ') {
|
||||
AddMatch(id + 75 * n, l + 7, l, matches);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (s[0] == ',') {
|
||||
AddMatch(id + 76 * n, l + 1, l, matches);
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + 14 * n, l + 2, l, matches);
|
||||
}
|
||||
} else if (s[0] == '\n') {
|
||||
AddMatch(id + 22 * n, l + 1, l, matches);
|
||||
if (s[1] == '\t') {
|
||||
AddMatch(id + 50 * n, l + 2, l, matches);
|
||||
}
|
||||
} else if (s[0] == ']') {
|
||||
AddMatch(id + 24 * n, l + 1, l, matches);
|
||||
} else if (s[0] == '\'') {
|
||||
AddMatch(id + 36 * n, l + 1, l, matches);
|
||||
} else if (s[0] == ':') {
|
||||
AddMatch(id + 51 * n, l + 1, l, matches);
|
||||
} else if (s[0] == '(') {
|
||||
AddMatch(id + 57 * n, l + 1, l, matches);
|
||||
} else if (s[0] == '=') {
|
||||
if (s[1] == '"') {
|
||||
AddMatch(id + 70 * n, l + 2, l, matches);
|
||||
} else if (s[1] == '\'') {
|
||||
AddMatch(id + 86 * n, l + 2, l, matches);
|
||||
}
|
||||
} else if (s[0] == 'a') {
|
||||
if (s[1] == 'l' && s[2] == ' ') {
|
||||
AddMatch(id + 84 * n, l + 3, l, matches);
|
||||
}
|
||||
} else if (s[0] == 'e') {
|
||||
if (s[1] == 'd') {
|
||||
if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches);
|
||||
} else if (s[1] == 'r') {
|
||||
if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches);
|
||||
} else if (s[1] == 's') {
|
||||
if (s[2] == 't' && s[3] == ' ') {
|
||||
AddMatch(id + 95 * n, l + 4, l, matches);
|
||||
}
|
||||
}
|
||||
} else if (s[0] == 'f') {
|
||||
if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') {
|
||||
AddMatch(id + 90 * n, l + 4, l, matches);
|
||||
}
|
||||
} else if (s[0] == 'i') {
|
||||
if (s[1] == 'v') {
|
||||
if (s[2] == 'e' && s[3] == ' ') {
|
||||
AddMatch(id + 92 * n, l + 4, l, matches);
|
||||
}
|
||||
} else if (s[1] == 'z') {
|
||||
if (s[2] == 'e' && s[3] == ' ') {
|
||||
AddMatch(id + 100 * n, l + 4, l, matches);
|
||||
}
|
||||
}
|
||||
} else if (s[0] == 'l') {
|
||||
if (s[1] == 'e') {
|
||||
if (s[2] == 's' && s[3] == 's' && s[4] == ' ') {
|
||||
AddMatch(id + 93 * n, l + 5, l, matches);
|
||||
}
|
||||
} else if (s[1] == 'y') {
|
||||
if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches);
|
||||
}
|
||||
} else if (s[0] == 'o') {
|
||||
if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') {
|
||||
AddMatch(id + 106 * n, l + 4, l, matches);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Set is_all_caps=0 for kUppercaseFirst and
|
||||
is_all_caps=1 otherwise (kUppercaseAll) transform. */
|
||||
const BROTLI_BOOL is_all_caps =
|
||||
TO_BROTLI_BOOL(w.transform != kUppercaseFirst);
|
||||
const uint8_t* s;
|
||||
if (!IsMatch(dictionary, w, data, max_length)) {
|
||||
continue;
|
||||
}
|
||||
/* Transform "" + kUppercase{First,All} + "" */
|
||||
AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches);
|
||||
has_found_match = BROTLI_TRUE;
|
||||
if (l + 1 >= max_length) {
|
||||
continue;
|
||||
}
|
||||
/* Transforms "" + kUppercase{First,All} + <suffix> */
|
||||
s = &data[l];
|
||||
if (s[0] == ' ') {
|
||||
AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches);
|
||||
} else if (s[0] == '"') {
|
||||
AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches);
|
||||
if (s[1] == '>') {
|
||||
AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches);
|
||||
}
|
||||
} else if (s[0] == '.') {
|
||||
AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches);
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches);
|
||||
}
|
||||
} else if (s[0] == ',') {
|
||||
AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches);
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches);
|
||||
}
|
||||
} else if (s[0] == '\'') {
|
||||
AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches);
|
||||
} else if (s[0] == '(') {
|
||||
AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches);
|
||||
} else if (s[0] == '=') {
|
||||
if (s[1] == '"') {
|
||||
AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches);
|
||||
} else if (s[1] == '\'') {
|
||||
AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* Transforms with prefixes " " and "." */
|
||||
if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
|
||||
BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' ');
|
||||
size_t offset = kStaticDictionaryBuckets[Hash(&data[1])];
|
||||
BROTLI_BOOL end = !offset;
|
||||
while (!end) {
|
||||
DictWord w = kStaticDictionaryWords[offset++];
|
||||
const size_t l = w.len & 0x1F;
|
||||
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
|
||||
const size_t id = w.idx;
|
||||
end = !!(w.len & 0x80);
|
||||
w.len = (uint8_t)l;
|
||||
if (w.transform == 0) {
|
||||
const uint8_t* s;
|
||||
if (!IsMatch(dictionary, w, &data[1], max_length - 1)) {
|
||||
continue;
|
||||
}
|
||||
/* Transforms " " + kIdentity + "" and "." + kIdentity + "" */
|
||||
AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
|
||||
has_found_match = BROTLI_TRUE;
|
||||
if (l + 2 >= max_length) {
|
||||
continue;
|
||||
}
|
||||
/* Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix>
|
||||
*/
|
||||
s = &data[l + 1];
|
||||
if (s[0] == ' ') {
|
||||
AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
|
||||
} else if (s[0] == '(') {
|
||||
AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches);
|
||||
} else if (is_space) {
|
||||
if (s[0] == ',') {
|
||||
AddMatch(id + 103 * n, l + 2, l, matches);
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + 33 * n, l + 3, l, matches);
|
||||
}
|
||||
} else if (s[0] == '.') {
|
||||
AddMatch(id + 71 * n, l + 2, l, matches);
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + 52 * n, l + 3, l, matches);
|
||||
}
|
||||
} else if (s[0] == '=') {
|
||||
if (s[1] == '"') {
|
||||
AddMatch(id + 81 * n, l + 3, l, matches);
|
||||
} else if (s[1] == '\'') {
|
||||
AddMatch(id + 98 * n, l + 3, l, matches);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (is_space) {
|
||||
/* Set is_all_caps=0 for kUppercaseFirst and
|
||||
is_all_caps=1 otherwise (kUppercaseAll) transform. */
|
||||
const BROTLI_BOOL is_all_caps =
|
||||
TO_BROTLI_BOOL(w.transform != kUppercaseFirst);
|
||||
const uint8_t* s;
|
||||
if (!IsMatch(dictionary, w, &data[1], max_length - 1)) {
|
||||
continue;
|
||||
}
|
||||
/* Transforms " " + kUppercase{First,All} + "" */
|
||||
AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches);
|
||||
has_found_match = BROTLI_TRUE;
|
||||
if (l + 2 >= max_length) {
|
||||
continue;
|
||||
}
|
||||
/* Transforms " " + kUppercase{First,All} + <suffix> */
|
||||
s = &data[l + 1];
|
||||
if (s[0] == ' ') {
|
||||
AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches);
|
||||
} else if (s[0] == ',') {
|
||||
if (!is_all_caps) {
|
||||
AddMatch(id + 109 * n, l + 2, l, matches);
|
||||
}
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches);
|
||||
}
|
||||
} else if (s[0] == '.') {
|
||||
AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches);
|
||||
if (s[1] == ' ') {
|
||||
AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches);
|
||||
}
|
||||
} else if (s[0] == '=') {
|
||||
if (s[1] == '"') {
|
||||
AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches);
|
||||
} else if (s[1] == '\'') {
|
||||
AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (max_length >= 6) {
|
||||
/* Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0" */
|
||||
if ((data[1] == ' ' &&
|
||||
(data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
|
||||
(data[0] == 0xc2 && data[1] == 0xa0)) {
|
||||
size_t offset = kStaticDictionaryBuckets[Hash(&data[2])];
|
||||
BROTLI_BOOL end = !offset;
|
||||
while (!end) {
|
||||
DictWord w = kStaticDictionaryWords[offset++];
|
||||
const size_t l = w.len & 0x1F;
|
||||
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
|
||||
const size_t id = w.idx;
|
||||
end = !!(w.len & 0x80);
|
||||
w.len = (uint8_t)l;
|
||||
if (w.transform == 0 &&
|
||||
IsMatch(dictionary, w, &data[2], max_length - 2)) {
|
||||
if (data[0] == 0xc2) {
|
||||
AddMatch(id + 102 * n, l + 2, l, matches);
|
||||
has_found_match = BROTLI_TRUE;
|
||||
} else if (l + 2 < max_length && data[l + 2] == ' ') {
|
||||
size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
|
||||
AddMatch(id + t * n, l + 3, l, matches);
|
||||
has_found_match = BROTLI_TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (max_length >= 9) {
|
||||
/* Transforms with prefixes " the " and ".com/" */
|
||||
if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&
|
||||
data[3] == 'e' && data[4] == ' ') ||
|
||||
(data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
|
||||
data[3] == 'm' && data[4] == '/')) {
|
||||
size_t offset = kStaticDictionaryBuckets[Hash(&data[5])];
|
||||
BROTLI_BOOL end = !offset;
|
||||
while (!end) {
|
||||
DictWord w = kStaticDictionaryWords[offset++];
|
||||
const size_t l = w.len & 0x1F;
|
||||
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
|
||||
const size_t id = w.idx;
|
||||
end = !!(w.len & 0x80);
|
||||
w.len = (uint8_t)l;
|
||||
if (w.transform == 0 &&
|
||||
IsMatch(dictionary, w, &data[5], max_length - 5)) {
|
||||
AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
|
||||
has_found_match = BROTLI_TRUE;
|
||||
if (l + 5 < max_length) {
|
||||
const uint8_t* s = &data[l + 5];
|
||||
if (data[0] == ' ') {
|
||||
if (l + 8 < max_length &&
|
||||
s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') {
|
||||
AddMatch(id + 62 * n, l + 9, l, matches);
|
||||
if (l + 12 < max_length &&
|
||||
s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') {
|
||||
AddMatch(id + 73 * n, l + 13, l, matches);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return has_found_match;
|
||||
}
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
@@ -10,7 +10,7 @@
|
||||
#define BROTLI_ENC_STATIC_DICT_H_
|
||||
|
||||
#include "../common/dictionary.h"
|
||||
#include <brotli/types.h>
|
||||
#include "../types.h"
|
||||
#include "./port.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
#ifndef BROTLI_ENC_STATIC_DICT_LUT_H_
|
||||
#define BROTLI_ENC_STATIC_DICT_LUT_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
#include "../types.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
|
||||
@@ -1,85 +0,0 @@
|
||||
/* Copyright 2013 Google Inc. All Rights Reserved.
|
||||
|
||||
Distributed under MIT license.
|
||||
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
|
||||
*/
|
||||
|
||||
/* Heuristics for deciding about the UTF8-ness of strings. */
|
||||
|
||||
#include "./utf8_util.h"
|
||||
|
||||
#include <brotli/types.h>
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static size_t BrotliParseAsUTF8(
|
||||
int* symbol, const uint8_t* input, size_t size) {
|
||||
/* ASCII */
|
||||
if ((input[0] & 0x80) == 0) {
|
||||
*symbol = input[0];
|
||||
if (*symbol > 0) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
/* 2-byte UTF8 */
|
||||
if (size > 1u &&
|
||||
(input[0] & 0xe0) == 0xc0 &&
|
||||
(input[1] & 0xc0) == 0x80) {
|
||||
*symbol = (((input[0] & 0x1f) << 6) |
|
||||
(input[1] & 0x3f));
|
||||
if (*symbol > 0x7f) {
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
/* 3-byte UFT8 */
|
||||
if (size > 2u &&
|
||||
(input[0] & 0xf0) == 0xe0 &&
|
||||
(input[1] & 0xc0) == 0x80 &&
|
||||
(input[2] & 0xc0) == 0x80) {
|
||||
*symbol = (((input[0] & 0x0f) << 12) |
|
||||
((input[1] & 0x3f) << 6) |
|
||||
(input[2] & 0x3f));
|
||||
if (*symbol > 0x7ff) {
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
/* 4-byte UFT8 */
|
||||
if (size > 3u &&
|
||||
(input[0] & 0xf8) == 0xf0 &&
|
||||
(input[1] & 0xc0) == 0x80 &&
|
||||
(input[2] & 0xc0) == 0x80 &&
|
||||
(input[3] & 0xc0) == 0x80) {
|
||||
*symbol = (((input[0] & 0x07) << 18) |
|
||||
((input[1] & 0x3f) << 12) |
|
||||
((input[2] & 0x3f) << 6) |
|
||||
(input[3] & 0x3f));
|
||||
if (*symbol > 0xffff && *symbol <= 0x10ffff) {
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
/* Not UTF8, emit a special symbol above the UTF8-code space */
|
||||
*symbol = 0x110000 | input[0];
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Returns 1 if at least min_fraction of the data is UTF8-encoded.*/
|
||||
BROTLI_BOOL BrotliIsMostlyUTF8(
|
||||
const uint8_t* data, const size_t pos, const size_t mask,
|
||||
const size_t length, const double min_fraction) {
|
||||
size_t size_utf8 = 0;
|
||||
size_t i = 0;
|
||||
while (i < length) {
|
||||
int symbol;
|
||||
size_t bytes_read =
|
||||
BrotliParseAsUTF8(&symbol, &data[(pos + i) & mask], length - i);
|
||||
i += bytes_read;
|
||||
if (symbol < 0x110000) size_utf8 += bytes_read;
|
||||
}
|
||||
return TO_BROTLI_BOOL(size_utf8 > min_fraction * (double)length);
|
||||
}
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
@@ -9,7 +9,7 @@
|
||||
#ifndef BROTLI_ENC_UTF8_UTIL_H_
|
||||
#define BROTLI_ENC_UTF8_UTIL_H_
|
||||
|
||||
#include <brotli/types.h>
|
||||
#include "../types.h"
|
||||
#include "./port.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
#include <assert.h>
|
||||
#include <stdio.h> /* printf */
|
||||
|
||||
#include <brotli/types.h>
|
||||
#include "../types.h"
|
||||
#include "./port.h"
|
||||
|
||||
#if defined(__cplusplus) || defined(c_plusplus)
|
||||
|
||||
Reference in New Issue
Block a user