Major changes, including Brotli and Lizard

- update of zstd-mt library
- add brotli v0.6.0
- add lizard v2.0
- xxhash is from zstd for lz4, lz5 and lizard now
- update also the documentation, where needed
This commit is contained in:
Tino Reichardt
2017-05-25 18:40:15 +02:00
parent 40e87f615c
commit 5ff0657d9f
173 changed files with 3936 additions and 6591 deletions

View File

@@ -1,130 +0,0 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Function to find backward reference copies. */
#include "./backward_references.h"
#include "../common/constants.h"
#include "../common/dictionary.h"
#include <brotli/types.h>
#include "./command.h"
#include "./dictionary_hash.h"
#include "./memory.h"
#include "./port.h"
#include "./quality.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance,
size_t max_distance,
const int* dist_cache) {
if (distance <= max_distance) {
size_t distance_plus_3 = distance + 3;
size_t offset0 = distance_plus_3 - (size_t)dist_cache[0];
size_t offset1 = distance_plus_3 - (size_t)dist_cache[1];
if (distance == (size_t)dist_cache[0]) {
return 0;
} else if (distance == (size_t)dist_cache[1]) {
return 1;
} else if (offset0 < 7) {
return (0x9750468 >> (4 * offset0)) & 0xF;
} else if (offset1 < 7) {
return (0xFDB1ACE >> (4 * offset1)) & 0xF;
} else if (distance == (size_t)dist_cache[2]) {
return 2;
} else if (distance == (size_t)dist_cache[3]) {
return 3;
}
}
return distance + BROTLI_NUM_DISTANCE_SHORT_CODES - 1;
}
#define EXPAND_CAT(a, b) CAT(a, b)
#define CAT(a, b) a ## b
#define FN(X) EXPAND_CAT(X, HASHER())
#define HASHER() H2
/* NOLINTNEXTLINE(build/include) */
#include "./backward_references_inc.h"
#undef HASHER
#define HASHER() H3
/* NOLINTNEXTLINE(build/include) */
#include "./backward_references_inc.h"
#undef HASHER
#define HASHER() H4
/* NOLINTNEXTLINE(build/include) */
#include "./backward_references_inc.h"
#undef HASHER
#define HASHER() H5
/* NOLINTNEXTLINE(build/include) */
#include "./backward_references_inc.h"
#undef HASHER
#define HASHER() H6
/* NOLINTNEXTLINE(build/include) */
#include "./backward_references_inc.h"
#undef HASHER
#define HASHER() H40
/* NOLINTNEXTLINE(build/include) */
#include "./backward_references_inc.h"
#undef HASHER
#define HASHER() H41
/* NOLINTNEXTLINE(build/include) */
#include "./backward_references_inc.h"
#undef HASHER
#define HASHER() H42
/* NOLINTNEXTLINE(build/include) */
#include "./backward_references_inc.h"
#undef HASHER
#define HASHER() H54
/* NOLINTNEXTLINE(build/include) */
#include "./backward_references_inc.h"
#undef HASHER
#undef FN
#undef CAT
#undef EXPAND_CAT
void BrotliCreateBackwardReferences(const BrotliDictionary* dictionary,
size_t num_bytes,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const BrotliEncoderParams* params,
HasherHandle hasher,
int* dist_cache,
size_t* last_insert_len,
Command* commands,
size_t* num_commands,
size_t* num_literals) {
switch (params->hasher.type) {
#define CASE_(N) \
case N: \
CreateBackwardReferencesH ## N(dictionary, \
kStaticDictionaryHash, num_bytes, position, ringbuffer, \
ringbuffer_mask, params, hasher, dist_cache, \
last_insert_len, commands, num_commands, num_literals); \
break;
FOR_GENERIC_HASHERS(CASE_)
#undef CASE_
default:
break;
}
}
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

View File

@@ -11,7 +11,7 @@
#include "../common/constants.h"
#include "../common/dictionary.h"
#include <brotli/types.h>
#include "../types.h"
#include "./command.h"
#include "./hash.h"
#include "./port.h"

View File

@@ -1,790 +0,0 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Function to find backward reference copies. */
#include "./backward_references_hq.h"
#include <string.h> /* memcpy, memset */
#include "../common/constants.h"
#include <brotli/types.h>
#include "./command.h"
#include "./fast_log.h"
#include "./find_match_length.h"
#include "./literal_cost.h"
#include "./memory.h"
#include "./port.h"
#include "./prefix.h"
#include "./quality.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
static const float kInfinity = 1.7e38f; /* ~= 2 ^ 127 */
static const uint32_t kDistanceCacheIndex[] = {
0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
};
static const int kDistanceCacheOffset[] = {
0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
};
void BrotliInitZopfliNodes(ZopfliNode* array, size_t length) {
ZopfliNode stub;
size_t i;
stub.length = 1;
stub.distance = 0;
stub.insert_length = 0;
stub.u.cost = kInfinity;
for (i = 0; i < length; ++i) array[i] = stub;
}
static BROTLI_INLINE uint32_t ZopfliNodeCopyLength(const ZopfliNode* self) {
return self->length & 0xffffff;
}
static BROTLI_INLINE uint32_t ZopfliNodeLengthCode(const ZopfliNode* self) {
const uint32_t modifier = self->length >> 24;
return ZopfliNodeCopyLength(self) + 9u - modifier;
}
static BROTLI_INLINE uint32_t ZopfliNodeCopyDistance(const ZopfliNode* self) {
return self->distance & 0x1ffffff;
}
static BROTLI_INLINE uint32_t ZopfliNodeDistanceCode(const ZopfliNode* self) {
const uint32_t short_code = self->distance >> 25;
return short_code == 0 ?
ZopfliNodeCopyDistance(self) + BROTLI_NUM_DISTANCE_SHORT_CODES - 1 :
short_code - 1;
}
static BROTLI_INLINE uint32_t ZopfliNodeCommandLength(const ZopfliNode* self) {
return ZopfliNodeCopyLength(self) + self->insert_length;
}
/* Histogram based cost model for zopflification. */
typedef struct ZopfliCostModel {
/* The insert and copy length symbols. */
float cost_cmd_[BROTLI_NUM_COMMAND_SYMBOLS];
float cost_dist_[BROTLI_NUM_DISTANCE_SYMBOLS];
/* Cumulative costs of literals per position in the stream. */
float* literal_costs_;
float min_cost_cmd_;
size_t num_bytes_;
} ZopfliCostModel;
static void InitZopfliCostModel(
MemoryManager* m, ZopfliCostModel* self, size_t num_bytes) {
self->num_bytes_ = num_bytes;
self->literal_costs_ = BROTLI_ALLOC(m, float, num_bytes + 2);
if (BROTLI_IS_OOM(m)) return;
}
static void CleanupZopfliCostModel(MemoryManager* m, ZopfliCostModel* self) {
BROTLI_FREE(m, self->literal_costs_);
}
static void SetCost(const uint32_t* histogram, size_t histogram_size,
float* cost) {
size_t sum = 0;
float log2sum;
size_t i;
for (i = 0; i < histogram_size; i++) {
sum += histogram[i];
}
log2sum = (float)FastLog2(sum);
for (i = 0; i < histogram_size; i++) {
if (histogram[i] == 0) {
cost[i] = log2sum + 2;
continue;
}
/* Shannon bits for this symbol. */
cost[i] = log2sum - (float)FastLog2(histogram[i]);
/* Cannot be coded with less than 1 bit */
if (cost[i] < 1) cost[i] = 1;
}
}
static void ZopfliCostModelSetFromCommands(ZopfliCostModel* self,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const Command* commands,
size_t num_commands,
size_t last_insert_len) {
uint32_t histogram_literal[BROTLI_NUM_LITERAL_SYMBOLS];
uint32_t histogram_cmd[BROTLI_NUM_COMMAND_SYMBOLS];
uint32_t histogram_dist[BROTLI_NUM_DISTANCE_SYMBOLS];
float cost_literal[BROTLI_NUM_LITERAL_SYMBOLS];
size_t pos = position - last_insert_len;
float min_cost_cmd = kInfinity;
size_t i;
float* cost_cmd = self->cost_cmd_;
memset(histogram_literal, 0, sizeof(histogram_literal));
memset(histogram_cmd, 0, sizeof(histogram_cmd));
memset(histogram_dist, 0, sizeof(histogram_dist));
for (i = 0; i < num_commands; i++) {
size_t inslength = commands[i].insert_len_;
size_t copylength = CommandCopyLen(&commands[i]);
size_t distcode = commands[i].dist_prefix_;
size_t cmdcode = commands[i].cmd_prefix_;
size_t j;
histogram_cmd[cmdcode]++;
if (cmdcode >= 128) histogram_dist[distcode]++;
for (j = 0; j < inslength; j++) {
histogram_literal[ringbuffer[(pos + j) & ringbuffer_mask]]++;
}
pos += inslength + copylength;
}
SetCost(histogram_literal, BROTLI_NUM_LITERAL_SYMBOLS, cost_literal);
SetCost(histogram_cmd, BROTLI_NUM_COMMAND_SYMBOLS, cost_cmd);
SetCost(histogram_dist, BROTLI_NUM_DISTANCE_SYMBOLS, self->cost_dist_);
for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
min_cost_cmd = BROTLI_MIN(float, min_cost_cmd, cost_cmd[i]);
}
self->min_cost_cmd_ = min_cost_cmd;
{
float* literal_costs = self->literal_costs_;
size_t num_bytes = self->num_bytes_;
literal_costs[0] = 0.0;
for (i = 0; i < num_bytes; ++i) {
literal_costs[i + 1] = literal_costs[i] +
cost_literal[ringbuffer[(position + i) & ringbuffer_mask]];
}
}
}
static void ZopfliCostModelSetFromLiteralCosts(ZopfliCostModel* self,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask) {
float* literal_costs = self->literal_costs_;
float* cost_dist = self->cost_dist_;
float* cost_cmd = self->cost_cmd_;
size_t num_bytes = self->num_bytes_;
size_t i;
BrotliEstimateBitCostsForLiterals(position, num_bytes, ringbuffer_mask,
ringbuffer, &literal_costs[1]);
literal_costs[0] = 0.0;
for (i = 0; i < num_bytes; ++i) {
literal_costs[i + 1] += literal_costs[i];
}
for (i = 0; i < BROTLI_NUM_COMMAND_SYMBOLS; ++i) {
cost_cmd[i] = (float)FastLog2(11 + (uint32_t)i);
}
for (i = 0; i < BROTLI_NUM_DISTANCE_SYMBOLS; ++i) {
cost_dist[i] = (float)FastLog2(20 + (uint32_t)i);
}
self->min_cost_cmd_ = (float)FastLog2(11);
}
static BROTLI_INLINE float ZopfliCostModelGetCommandCost(
const ZopfliCostModel* self, uint16_t cmdcode) {
return self->cost_cmd_[cmdcode];
}
static BROTLI_INLINE float ZopfliCostModelGetDistanceCost(
const ZopfliCostModel* self, size_t distcode) {
return self->cost_dist_[distcode];
}
static BROTLI_INLINE float ZopfliCostModelGetLiteralCosts(
const ZopfliCostModel* self, size_t from, size_t to) {
return self->literal_costs_[to] - self->literal_costs_[from];
}
static BROTLI_INLINE float ZopfliCostModelGetMinCostCmd(
const ZopfliCostModel* self) {
return self->min_cost_cmd_;
}
/* REQUIRES: len >= 2, start_pos <= pos */
/* REQUIRES: cost < kInfinity, nodes[start_pos].cost < kInfinity */
/* Maintains the "ZopfliNode array invariant". */
static BROTLI_INLINE void UpdateZopfliNode(ZopfliNode* nodes, size_t pos,
size_t start_pos, size_t len, size_t len_code, size_t dist,
size_t short_code, float cost) {
ZopfliNode* next = &nodes[pos + len];
next->length = (uint32_t)(len | ((len + 9u - len_code) << 24));
next->distance = (uint32_t)(dist | (short_code << 25));
next->insert_length = (uint32_t)(pos - start_pos);
next->u.cost = cost;
}
typedef struct PosData {
size_t pos;
int distance_cache[4];
float costdiff;
float cost;
} PosData;
/* Maintains the smallest 8 cost difference together with their positions */
typedef struct StartPosQueue {
PosData q_[8];
size_t idx_;
} StartPosQueue;
static BROTLI_INLINE void InitStartPosQueue(StartPosQueue* self) {
self->idx_ = 0;
}
static size_t StartPosQueueSize(const StartPosQueue* self) {
return BROTLI_MIN(size_t, self->idx_, 8);
}
static void StartPosQueuePush(StartPosQueue* self, const PosData* posdata) {
size_t offset = ~(self->idx_++) & 7;
size_t len = StartPosQueueSize(self);
size_t i;
PosData* q = self->q_;
q[offset] = *posdata;
/* Restore the sorted order. In the list of |len| items at most |len - 1|
adjacent element comparisons / swaps are required. */
for (i = 1; i < len; ++i) {
if (q[offset & 7].costdiff > q[(offset + 1) & 7].costdiff) {
BROTLI_SWAP(PosData, q, offset & 7, (offset + 1) & 7);
}
++offset;
}
}
static const PosData* StartPosQueueAt(const StartPosQueue* self, size_t k) {
return &self->q_[(k - self->idx_) & 7];
}
/* Returns the minimum possible copy length that can improve the cost of any */
/* future position. */
static size_t ComputeMinimumCopyLength(const float start_cost,
const ZopfliNode* nodes,
const size_t num_bytes,
const size_t pos) {
/* Compute the minimum possible cost of reaching any future position. */
float min_cost = start_cost;
size_t len = 2;
size_t next_len_bucket = 4;
size_t next_len_offset = 10;
while (pos + len <= num_bytes && nodes[pos + len].u.cost <= min_cost) {
/* We already reached (pos + len) with no more cost than the minimum
possible cost of reaching anything from this pos, so there is no point in
looking for lengths <= len. */
++len;
if (len == next_len_offset) {
/* We reached the next copy length code bucket, so we add one more
extra bit to the minimum cost. */
min_cost += 1.0f;
next_len_offset += next_len_bucket;
next_len_bucket *= 2;
}
}
return len;
}
/* REQUIRES: nodes[pos].cost < kInfinity
REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". */
static uint32_t ComputeDistanceShortcut(const size_t block_start,
const size_t pos,
const size_t max_backward,
const ZopfliNode* nodes) {
const size_t clen = ZopfliNodeCopyLength(&nodes[pos]);
const size_t ilen = nodes[pos].insert_length;
const size_t dist = ZopfliNodeCopyDistance(&nodes[pos]);
/* Since |block_start + pos| is the end position of the command, the copy part
starts from |block_start + pos - clen|. Distances that are greater than
this or greater than |max_backward| are static dictionary references, and
do not update the last distances. Also distance code 0 (last distance)
does not update the last distances. */
if (pos == 0) {
return 0;
} else if (dist + clen <= block_start + pos &&
dist <= max_backward &&
ZopfliNodeDistanceCode(&nodes[pos]) > 0) {
return (uint32_t)pos;
} else {
return nodes[pos - clen - ilen].u.shortcut;
}
}
/* Fills in dist_cache[0..3] with the last four distances (as defined by
Section 4. of the Spec) that would be used at (block_start + pos) if we
used the shortest path of commands from block_start, computed from
nodes[0..pos]. The last four distances at block_start are in
starting_dist_cache[0..3].
REQUIRES: nodes[pos].cost < kInfinity
REQUIRES: nodes[0..pos] satisfies that "ZopfliNode array invariant". */
static void ComputeDistanceCache(const size_t pos,
const int* starting_dist_cache,
const ZopfliNode* nodes,
int* dist_cache) {
int idx = 0;
size_t p = nodes[pos].u.shortcut;
while (idx < 4 && p > 0) {
const size_t ilen = nodes[p].insert_length;
const size_t clen = ZopfliNodeCopyLength(&nodes[p]);
const size_t dist = ZopfliNodeCopyDistance(&nodes[p]);
dist_cache[idx++] = (int)dist;
/* Because of prerequisite, p >= clen + ilen >= 2. */
p = nodes[p - clen - ilen].u.shortcut;
}
for (; idx < 4; ++idx) {
dist_cache[idx] = *starting_dist_cache++;
}
}
/* Maintains "ZopfliNode array invariant" and pushes node to the queue, if it
is eligible. */
static void EvaluateNode(
const size_t block_start, const size_t pos, const size_t max_backward_limit,
const int* starting_dist_cache, const ZopfliCostModel* model,
StartPosQueue* queue, ZopfliNode* nodes) {
/* Save cost, because ComputeDistanceCache invalidates it. */
float node_cost = nodes[pos].u.cost;
nodes[pos].u.shortcut = ComputeDistanceShortcut(
block_start, pos, max_backward_limit, nodes);
if (node_cost <= ZopfliCostModelGetLiteralCosts(model, 0, pos)) {
PosData posdata;
posdata.pos = pos;
posdata.cost = node_cost;
posdata.costdiff = node_cost -
ZopfliCostModelGetLiteralCosts(model, 0, pos);
ComputeDistanceCache(
pos, starting_dist_cache, nodes, posdata.distance_cache);
StartPosQueuePush(queue, &posdata);
}
}
/* Returns longest copy length. */
static size_t UpdateNodes(
const size_t num_bytes, const size_t block_start, const size_t pos,
const uint8_t* ringbuffer, const size_t ringbuffer_mask,
const BrotliEncoderParams* params, const size_t max_backward_limit,
const int* starting_dist_cache, const size_t num_matches,
const BackwardMatch* matches, const ZopfliCostModel* model,
StartPosQueue* queue, ZopfliNode* nodes) {
const size_t cur_ix = block_start + pos;
const size_t cur_ix_masked = cur_ix & ringbuffer_mask;
const size_t max_distance = BROTLI_MIN(size_t, cur_ix, max_backward_limit);
const size_t max_len = num_bytes - pos;
const size_t max_zopfli_len = MaxZopfliLen(params);
const size_t max_iters = MaxZopfliCandidates(params);
size_t min_len;
size_t result = 0;
size_t k;
EvaluateNode(block_start, pos, max_backward_limit, starting_dist_cache, model,
queue, nodes);
{
const PosData* posdata = StartPosQueueAt(queue, 0);
float min_cost = (posdata->cost + ZopfliCostModelGetMinCostCmd(model) +
ZopfliCostModelGetLiteralCosts(model, posdata->pos, pos));
min_len = ComputeMinimumCopyLength(min_cost, nodes, num_bytes, pos);
}
/* Go over the command starting positions in order of increasing cost
difference. */
for (k = 0; k < max_iters && k < StartPosQueueSize(queue); ++k) {
const PosData* posdata = StartPosQueueAt(queue, k);
const size_t start = posdata->pos;
const uint16_t inscode = GetInsertLengthCode(pos - start);
const float start_costdiff = posdata->costdiff;
const float base_cost = start_costdiff + (float)GetInsertExtra(inscode) +
ZopfliCostModelGetLiteralCosts(model, 0, pos);
/* Look for last distance matches using the distance cache from this
starting position. */
size_t best_len = min_len - 1;
size_t j = 0;
for (; j < BROTLI_NUM_DISTANCE_SHORT_CODES && best_len < max_len; ++j) {
const size_t idx = kDistanceCacheIndex[j];
const size_t backward =
(size_t)(posdata->distance_cache[idx] + kDistanceCacheOffset[j]);
size_t prev_ix = cur_ix - backward;
if (prev_ix >= cur_ix) {
continue;
}
if (BROTLI_PREDICT_FALSE(backward > max_distance)) {
continue;
}
prev_ix &= ringbuffer_mask;
if (cur_ix_masked + best_len > ringbuffer_mask ||
prev_ix + best_len > ringbuffer_mask ||
ringbuffer[cur_ix_masked + best_len] !=
ringbuffer[prev_ix + best_len]) {
continue;
}
{
const size_t len =
FindMatchLengthWithLimit(&ringbuffer[prev_ix],
&ringbuffer[cur_ix_masked],
max_len);
const float dist_cost = base_cost +
ZopfliCostModelGetDistanceCost(model, j);
size_t l;
for (l = best_len + 1; l <= len; ++l) {
const uint16_t copycode = GetCopyLengthCode(l);
const uint16_t cmdcode =
CombineLengthCodes(inscode, copycode, j == 0);
const float cost = (cmdcode < 128 ? base_cost : dist_cost) +
(float)GetCopyExtra(copycode) +
ZopfliCostModelGetCommandCost(model, cmdcode);
if (cost < nodes[pos + l].u.cost) {
UpdateZopfliNode(nodes, pos, start, l, l, backward, j + 1, cost);
result = BROTLI_MAX(size_t, result, l);
}
best_len = l;
}
}
}
/* At higher iterations look only for new last distance matches, since
looking only for new command start positions with the same distances
does not help much. */
if (k >= 2) continue;
{
/* Loop through all possible copy lengths at this position. */
size_t len = min_len;
for (j = 0; j < num_matches; ++j) {
BackwardMatch match = matches[j];
size_t dist = match.distance;
BROTLI_BOOL is_dictionary_match = TO_BROTLI_BOOL(dist > max_distance);
/* We already tried all possible last distance matches, so we can use
normal distance code here. */
size_t dist_code = dist + BROTLI_NUM_DISTANCE_SHORT_CODES - 1;
uint16_t dist_symbol;
uint32_t distextra;
uint32_t distnumextra;
float dist_cost;
size_t max_match_len;
PrefixEncodeCopyDistance(dist_code, 0, 0, &dist_symbol, &distextra);
distnumextra = distextra >> 24;
dist_cost = base_cost + (float)distnumextra +
ZopfliCostModelGetDistanceCost(model, dist_symbol);
/* Try all copy lengths up until the maximum copy length corresponding
to this distance. If the distance refers to the static dictionary, or
the maximum length is long enough, try only one maximum length. */
max_match_len = BackwardMatchLength(&match);
if (len < max_match_len &&
(is_dictionary_match || max_match_len > max_zopfli_len)) {
len = max_match_len;
}
for (; len <= max_match_len; ++len) {
const size_t len_code =
is_dictionary_match ? BackwardMatchLengthCode(&match) : len;
const uint16_t copycode = GetCopyLengthCode(len_code);
const uint16_t cmdcode = CombineLengthCodes(inscode, copycode, 0);
const float cost = dist_cost + (float)GetCopyExtra(copycode) +
ZopfliCostModelGetCommandCost(model, cmdcode);
if (cost < nodes[pos + len].u.cost) {
UpdateZopfliNode(nodes, pos, start, len, len_code, dist, 0, cost);
result = BROTLI_MAX(size_t, result, len);
}
}
}
}
}
return result;
}
static size_t ComputeShortestPathFromNodes(size_t num_bytes,
ZopfliNode* nodes) {
size_t index = num_bytes;
size_t num_commands = 0;
while (nodes[index].insert_length == 0 && nodes[index].length == 1) --index;
nodes[index].u.next = BROTLI_UINT32_MAX;
while (index != 0) {
size_t len = ZopfliNodeCommandLength(&nodes[index]);
index -= len;
nodes[index].u.next = (uint32_t)len;
num_commands++;
}
return num_commands;
}
/* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */
void BrotliZopfliCreateCommands(const size_t num_bytes,
const size_t block_start,
const size_t max_backward_limit,
const ZopfliNode* nodes,
int* dist_cache,
size_t* last_insert_len,
Command* commands,
size_t* num_literals) {
size_t pos = 0;
uint32_t offset = nodes[0].u.next;
size_t i;
for (i = 0; offset != BROTLI_UINT32_MAX; i++) {
const ZopfliNode* next = &nodes[pos + offset];
size_t copy_length = ZopfliNodeCopyLength(next);
size_t insert_length = next->insert_length;
pos += insert_length;
offset = next->u.next;
if (i == 0) {
insert_length += *last_insert_len;
*last_insert_len = 0;
}
{
size_t distance = ZopfliNodeCopyDistance(next);
size_t len_code = ZopfliNodeLengthCode(next);
size_t max_distance =
BROTLI_MIN(size_t, block_start + pos, max_backward_limit);
BROTLI_BOOL is_dictionary = TO_BROTLI_BOOL(distance > max_distance);
size_t dist_code = ZopfliNodeDistanceCode(next);
InitCommand(
&commands[i], insert_length, copy_length, len_code, dist_code);
if (!is_dictionary && dist_code > 0) {
dist_cache[3] = dist_cache[2];
dist_cache[2] = dist_cache[1];
dist_cache[1] = dist_cache[0];
dist_cache[0] = (int)distance;
}
}
*num_literals += insert_length;
pos += copy_length;
}
*last_insert_len += num_bytes - pos;
}
static size_t ZopfliIterate(size_t num_bytes,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const BrotliEncoderParams* params,
const size_t max_backward_limit,
const int* dist_cache,
const ZopfliCostModel* model,
const uint32_t* num_matches,
const BackwardMatch* matches,
ZopfliNode* nodes) {
const size_t max_zopfli_len = MaxZopfliLen(params);
StartPosQueue queue;
size_t cur_match_pos = 0;
size_t i;
nodes[0].length = 0;
nodes[0].u.cost = 0;
InitStartPosQueue(&queue);
for (i = 0; i + 3 < num_bytes; i++) {
size_t skip = UpdateNodes(num_bytes, position, i, ringbuffer,
ringbuffer_mask, params, max_backward_limit, dist_cache,
num_matches[i], &matches[cur_match_pos], model, &queue, nodes);
if (skip < BROTLI_LONG_COPY_QUICK_STEP) skip = 0;
cur_match_pos += num_matches[i];
if (num_matches[i] == 1 &&
BackwardMatchLength(&matches[cur_match_pos - 1]) > max_zopfli_len) {
skip = BROTLI_MAX(size_t,
BackwardMatchLength(&matches[cur_match_pos - 1]), skip);
}
if (skip > 1) {
skip--;
while (skip) {
i++;
if (i + 3 >= num_bytes) break;
EvaluateNode(
position, i, max_backward_limit, dist_cache, model, &queue, nodes);
cur_match_pos += num_matches[i];
skip--;
}
}
}
return ComputeShortestPathFromNodes(num_bytes, nodes);
}
/* REQUIRES: nodes != NULL and len(nodes) >= num_bytes + 1 */
size_t BrotliZopfliComputeShortestPath(MemoryManager* m,
const BrotliDictionary* dictionary,
size_t num_bytes,
size_t position,
const uint8_t* ringbuffer,
size_t ringbuffer_mask,
const BrotliEncoderParams* params,
const size_t max_backward_limit,
const int* dist_cache,
HasherHandle hasher,
ZopfliNode* nodes) {
const size_t max_zopfli_len = MaxZopfliLen(params);
ZopfliCostModel model;
StartPosQueue queue;
BackwardMatch matches[MAX_NUM_MATCHES_H10];
const size_t store_end = num_bytes >= StoreLookaheadH10() ?
position + num_bytes - StoreLookaheadH10() + 1 : position;
size_t i;
nodes[0].length = 0;
nodes[0].u.cost = 0;
InitZopfliCostModel(m, &model, num_bytes);
if (BROTLI_IS_OOM(m)) return 0;
ZopfliCostModelSetFromLiteralCosts(
&model, position, ringbuffer, ringbuffer_mask);
InitStartPosQueue(&queue);
for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; i++) {
const size_t pos = position + i;
const size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit);
size_t num_matches = FindAllMatchesH10(hasher, dictionary, ringbuffer,
ringbuffer_mask, pos, num_bytes - i, max_distance, params, matches);
size_t skip;
if (num_matches > 0 &&
BackwardMatchLength(&matches[num_matches - 1]) > max_zopfli_len) {
matches[0] = matches[num_matches - 1];
num_matches = 1;
}
skip = UpdateNodes(num_bytes, position, i, ringbuffer, ringbuffer_mask,
params, max_backward_limit, dist_cache, num_matches, matches, &model,
&queue, nodes);
if (skip < BROTLI_LONG_COPY_QUICK_STEP) skip = 0;
if (num_matches == 1 && BackwardMatchLength(&matches[0]) > max_zopfli_len) {
skip = BROTLI_MAX(size_t, BackwardMatchLength(&matches[0]), skip);
}
if (skip > 1) {
/* Add the tail of the copy to the hasher. */
StoreRangeH10(hasher, ringbuffer, ringbuffer_mask, pos + 1, BROTLI_MIN(
size_t, pos + skip, store_end));
skip--;
while (skip) {
i++;
if (i + HashTypeLengthH10() - 1 >= num_bytes) break;
EvaluateNode(
position, i, max_backward_limit, dist_cache, &model, &queue, nodes);
skip--;
}
}
}
CleanupZopfliCostModel(m, &model);
return ComputeShortestPathFromNodes(num_bytes, nodes);
}
void BrotliCreateZopfliBackwardReferences(
MemoryManager* m, const BrotliDictionary* dictionary, size_t num_bytes,
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache,
size_t* last_insert_len, Command* commands, size_t* num_commands,
size_t* num_literals) {
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
ZopfliNode* nodes;
nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
if (BROTLI_IS_OOM(m)) return;
BrotliInitZopfliNodes(nodes, num_bytes + 1);
*num_commands += BrotliZopfliComputeShortestPath(m, dictionary, num_bytes,
position, ringbuffer, ringbuffer_mask, params, max_backward_limit,
dist_cache, hasher, nodes);
if (BROTLI_IS_OOM(m)) return;
BrotliZopfliCreateCommands(num_bytes, position, max_backward_limit, nodes,
dist_cache, last_insert_len, commands, num_literals);
BROTLI_FREE(m, nodes);
}
void BrotliCreateHqZopfliBackwardReferences(
MemoryManager* m, const BrotliDictionary* dictionary, size_t num_bytes,
size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
const BrotliEncoderParams* params, HasherHandle hasher, int* dist_cache,
size_t* last_insert_len, Command* commands, size_t* num_commands,
size_t* num_literals) {
const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(params->lgwin);
uint32_t* num_matches = BROTLI_ALLOC(m, uint32_t, num_bytes);
size_t matches_size = 4 * num_bytes;
const size_t store_end = num_bytes >= StoreLookaheadH10() ?
position + num_bytes - StoreLookaheadH10() + 1 : position;
size_t cur_match_pos = 0;
size_t i;
size_t orig_num_literals;
size_t orig_last_insert_len;
int orig_dist_cache[4];
size_t orig_num_commands;
ZopfliCostModel model;
ZopfliNode* nodes;
BackwardMatch* matches = BROTLI_ALLOC(m, BackwardMatch, matches_size);
if (BROTLI_IS_OOM(m)) return;
for (i = 0; i + HashTypeLengthH10() - 1 < num_bytes; ++i) {
const size_t pos = position + i;
size_t max_distance = BROTLI_MIN(size_t, pos, max_backward_limit);
size_t max_length = num_bytes - i;
size_t num_found_matches;
size_t cur_match_end;
size_t j;
/* Ensure that we have enough free slots. */
BROTLI_ENSURE_CAPACITY(m, BackwardMatch, matches, matches_size,
cur_match_pos + MAX_NUM_MATCHES_H10);
if (BROTLI_IS_OOM(m)) return;
num_found_matches = FindAllMatchesH10(hasher, dictionary, ringbuffer,
ringbuffer_mask, pos, max_length, max_distance, params,
&matches[cur_match_pos]);
cur_match_end = cur_match_pos + num_found_matches;
for (j = cur_match_pos; j + 1 < cur_match_end; ++j) {
assert(BackwardMatchLength(&matches[j]) <
BackwardMatchLength(&matches[j + 1]));
assert(matches[j].distance > max_distance ||
matches[j].distance <= matches[j + 1].distance);
}
num_matches[i] = (uint32_t)num_found_matches;
if (num_found_matches > 0) {
const size_t match_len = BackwardMatchLength(&matches[cur_match_end - 1]);
if (match_len > MAX_ZOPFLI_LEN_QUALITY_11) {
const size_t skip = match_len - 1;
matches[cur_match_pos++] = matches[cur_match_end - 1];
num_matches[i] = 1;
/* Add the tail of the copy to the hasher. */
StoreRangeH10(hasher, ringbuffer, ringbuffer_mask, pos + 1,
BROTLI_MIN(size_t, pos + match_len, store_end));
memset(&num_matches[i + 1], 0, skip * sizeof(num_matches[0]));
i += skip;
} else {
cur_match_pos = cur_match_end;
}
}
}
orig_num_literals = *num_literals;
orig_last_insert_len = *last_insert_len;
memcpy(orig_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
orig_num_commands = *num_commands;
nodes = BROTLI_ALLOC(m, ZopfliNode, num_bytes + 1);
if (BROTLI_IS_OOM(m)) return;
InitZopfliCostModel(m, &model, num_bytes);
if (BROTLI_IS_OOM(m)) return;
for (i = 0; i < 2; i++) {
BrotliInitZopfliNodes(nodes, num_bytes + 1);
if (i == 0) {
ZopfliCostModelSetFromLiteralCosts(
&model, position, ringbuffer, ringbuffer_mask);
} else {
ZopfliCostModelSetFromCommands(&model, position, ringbuffer,
ringbuffer_mask, commands, *num_commands - orig_num_commands,
orig_last_insert_len);
}
*num_commands = orig_num_commands;
*num_literals = orig_num_literals;
*last_insert_len = orig_last_insert_len;
memcpy(dist_cache, orig_dist_cache, 4 * sizeof(dist_cache[0]));
*num_commands += ZopfliIterate(num_bytes, position, ringbuffer,
ringbuffer_mask, params, max_backward_limit, dist_cache,
&model, num_matches, matches, nodes);
BrotliZopfliCreateCommands(num_bytes, position, max_backward_limit,
nodes, dist_cache, last_insert_len, commands, num_literals);
}
CleanupZopfliCostModel(m, &model);
BROTLI_FREE(m, nodes);
BROTLI_FREE(m, matches);
BROTLI_FREE(m, num_matches);
}
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

View File

@@ -11,7 +11,7 @@
#include "../common/constants.h"
#include "../common/dictionary.h"
#include <brotli/types.h>
#include "../types.h"
#include "./command.h"
#include "./hash.h"
#include "./memory.h"

View File

@@ -1,35 +0,0 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Functions to estimate the bit cost of Huffman trees. */
#include "./bit_cost.h"
#include "../common/constants.h"
#include <brotli/types.h>
#include "./fast_log.h"
#include "./histogram.h"
#include "./port.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
#define FN(X) X ## Literal
#include "./bit_cost_inc.h" /* NOLINT(build/include) */
#undef FN
#define FN(X) X ## Command
#include "./bit_cost_inc.h" /* NOLINT(build/include) */
#undef FN
#define FN(X) X ## Distance
#include "./bit_cost_inc.h" /* NOLINT(build/include) */
#undef FN
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

View File

@@ -9,7 +9,7 @@
#ifndef BROTLI_ENC_BIT_COST_H_
#define BROTLI_ENC_BIT_COST_H_
#include <brotli/types.h>
#include "../types.h"
#include "./fast_log.h"
#include "./histogram.h"
#include "./port.h"

View File

@@ -1,197 +0,0 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Block split point selection utilities. */
#include "./block_splitter.h"
#include <assert.h>
#include <string.h> /* memcpy, memset */
#include "./bit_cost.h"
#include "./cluster.h"
#include "./command.h"
#include "./fast_log.h"
#include "./histogram.h"
#include "./memory.h"
#include "./port.h"
#include "./quality.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
static const size_t kMaxLiteralHistograms = 100;
static const size_t kMaxCommandHistograms = 50;
static const double kLiteralBlockSwitchCost = 28.1;
static const double kCommandBlockSwitchCost = 13.5;
static const double kDistanceBlockSwitchCost = 14.6;
static const size_t kLiteralStrideLength = 70;
static const size_t kCommandStrideLength = 40;
static const size_t kSymbolsPerLiteralHistogram = 544;
static const size_t kSymbolsPerCommandHistogram = 530;
static const size_t kSymbolsPerDistanceHistogram = 544;
static const size_t kMinLengthForBlockSplitting = 128;
static const size_t kIterMulForRefining = 2;
static const size_t kMinItersForRefining = 100;
static size_t CountLiterals(const Command* cmds, const size_t num_commands) {
/* Count how many we have. */
size_t total_length = 0;
size_t i;
for (i = 0; i < num_commands; ++i) {
total_length += cmds[i].insert_len_;
}
return total_length;
}
static void CopyLiteralsToByteArray(const Command* cmds,
const size_t num_commands,
const uint8_t* data,
const size_t offset,
const size_t mask,
uint8_t* literals) {
size_t pos = 0;
size_t from_pos = offset & mask;
size_t i;
for (i = 0; i < num_commands; ++i) {
size_t insert_len = cmds[i].insert_len_;
if (from_pos + insert_len > mask) {
size_t head_size = mask + 1 - from_pos;
memcpy(literals + pos, data + from_pos, head_size);
from_pos = 0;
pos += head_size;
insert_len -= head_size;
}
if (insert_len > 0) {
memcpy(literals + pos, data + from_pos, insert_len);
pos += insert_len;
}
from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask;
}
}
static BROTLI_INLINE unsigned int MyRand(unsigned int* seed) {
*seed *= 16807U;
if (*seed == 0) {
*seed = 1;
}
return *seed;
}
static BROTLI_INLINE double BitCost(size_t count) {
return count == 0 ? -2.0 : FastLog2(count);
}
#define HISTOGRAMS_PER_BATCH 64
#define CLUSTERS_PER_BATCH 16
#define FN(X) X ## Literal
#define DataType uint8_t
/* NOLINTNEXTLINE(build/include) */
#include "./block_splitter_inc.h"
#undef DataType
#undef FN
#define FN(X) X ## Command
#define DataType uint16_t
/* NOLINTNEXTLINE(build/include) */
#include "./block_splitter_inc.h"
#undef FN
#define FN(X) X ## Distance
/* NOLINTNEXTLINE(build/include) */
#include "./block_splitter_inc.h"
#undef DataType
#undef FN
void BrotliInitBlockSplit(BlockSplit* self) {
self->num_types = 0;
self->num_blocks = 0;
self->types = 0;
self->lengths = 0;
self->types_alloc_size = 0;
self->lengths_alloc_size = 0;
}
void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) {
BROTLI_FREE(m, self->types);
BROTLI_FREE(m, self->lengths);
}
void BrotliSplitBlock(MemoryManager* m,
const Command* cmds,
const size_t num_commands,
const uint8_t* data,
const size_t pos,
const size_t mask,
const BrotliEncoderParams* params,
BlockSplit* literal_split,
BlockSplit* insert_and_copy_split,
BlockSplit* dist_split) {
{
size_t literals_count = CountLiterals(cmds, num_commands);
uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count);
if (BROTLI_IS_OOM(m)) return;
/* Create a continuous array of literals. */
CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals);
/* Create the block split on the array of literals.
Literal histograms have alphabet size 256. */
SplitByteVectorLiteral(
m, literals, literals_count,
kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
kLiteralStrideLength, kLiteralBlockSwitchCost, params,
literal_split);
if (BROTLI_IS_OOM(m)) return;
BROTLI_FREE(m, literals);
}
{
/* Compute prefix codes for commands. */
uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands);
size_t i;
if (BROTLI_IS_OOM(m)) return;
for (i = 0; i < num_commands; ++i) {
insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
}
/* Create the block split on the array of command prefixes. */
SplitByteVectorCommand(
m, insert_and_copy_codes, num_commands,
kSymbolsPerCommandHistogram, kMaxCommandHistograms,
kCommandStrideLength, kCommandBlockSwitchCost, params,
insert_and_copy_split);
if (BROTLI_IS_OOM(m)) return;
/* TODO: reuse for distances? */
BROTLI_FREE(m, insert_and_copy_codes);
}
{
/* Create a continuous array of distance prefixes. */
uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands);
size_t j = 0;
size_t i;
if (BROTLI_IS_OOM(m)) return;
for (i = 0; i < num_commands; ++i) {
const Command* cmd = &cmds[i];
if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
distance_prefixes[j++] = cmd->dist_prefix_;
}
}
/* Create the block split on the array of distance prefixes. */
SplitByteVectorDistance(
m, distance_prefixes, j,
kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
kCommandStrideLength, kDistanceBlockSwitchCost, params,
dist_split);
if (BROTLI_IS_OOM(m)) return;
BROTLI_FREE(m, distance_prefixes);
}
}
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

View File

@@ -9,7 +9,7 @@
#ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
#define BROTLI_ENC_BLOCK_SPLITTER_H_
#include <brotli/types.h>
#include "../types.h"
#include "./command.h"
#include "./memory.h"
#include "./port.h"

View File

File diff suppressed because it is too large Load Diff

View File

@@ -16,7 +16,7 @@
#ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_
#define BROTLI_ENC_BROTLI_BIT_STREAM_H_
#include <brotli/types.h>
#include "../types.h"
#include "./command.h"
#include "./context.h"
#include "./entropy_encode.h"

View File

@@ -1,56 +0,0 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Functions for clustering similar histograms together. */
#include "./cluster.h"
#include <brotli/types.h>
#include "./bit_cost.h" /* BrotliPopulationCost */
#include "./fast_log.h"
#include "./histogram.h"
#include "./memory.h"
#include "./port.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
static BROTLI_INLINE BROTLI_BOOL HistogramPairIsLess(
const HistogramPair* p1, const HistogramPair* p2) {
if (p1->cost_diff != p2->cost_diff) {
return TO_BROTLI_BOOL(p1->cost_diff > p2->cost_diff);
}
return TO_BROTLI_BOOL((p1->idx2 - p1->idx1) > (p2->idx2 - p2->idx1));
}
/* Returns entropy reduction of the context map when we combine two clusters. */
static BROTLI_INLINE double ClusterCostDiff(size_t size_a, size_t size_b) {
size_t size_c = size_a + size_b;
return (double)size_a * FastLog2(size_a) +
(double)size_b * FastLog2(size_b) -
(double)size_c * FastLog2(size_c);
}
#define CODE(X) X
#define FN(X) X ## Literal
#include "./cluster_inc.h" /* NOLINT(build/include) */
#undef FN
#define FN(X) X ## Command
#include "./cluster_inc.h" /* NOLINT(build/include) */
#undef FN
#define FN(X) X ## Distance
#include "./cluster_inc.h" /* NOLINT(build/include) */
#undef FN
#undef CODE
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

View File

@@ -9,7 +9,7 @@
#ifndef BROTLI_ENC_CLUSTER_H_
#define BROTLI_ENC_CLUSTER_H_
#include <brotli/types.h>
#include "../types.h"
#include "./histogram.h"
#include "./memory.h"
#include "./port.h"

View File

@@ -10,8 +10,8 @@
#define BROTLI_ENC_COMMAND_H_
#include "../common/constants.h"
#include <brotli/port.h>
#include <brotli/types.h>
#include "../port.h"
#include "../types.h"
#include "./fast_log.h"
#include "./prefix.h"

View File

@@ -1,791 +0,0 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Function for fast encoding of an input fragment, independently from the input
history. This function uses one-pass processing: when we find a backward
match, we immediately emit the corresponding command and literal codes to
the bit stream.
Adapted from the CompressFragment() function in
https://github.com/google/snappy/blob/master/snappy.cc */
#include "./compress_fragment.h"
#include <string.h> /* memcmp, memcpy, memset */
#include "../common/constants.h"
#include <brotli/types.h>
#include "./brotli_bit_stream.h"
#include "./entropy_encode.h"
#include "./fast_log.h"
#include "./find_match_length.h"
#include "./memory.h"
#include "./port.h"
#include "./write_bits.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
#define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
/* kHashMul32 multiplier has these properties:
* The multiplier must be odd. Otherwise we may lose the highest bit.
* No long streaks of ones or zeros.
* There is no effort to ensure that it is a prime, the oddity is enough
for this use.
* The number has been tuned heuristically against compression benchmarks. */
static const uint32_t kHashMul32 = 0x1e35a7bd;
static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
const uint64_t h = (BROTLI_UNALIGNED_LOAD64(p) << 24) * kHashMul32;
return (uint32_t)(h >> shift);
}
static BROTLI_INLINE uint32_t HashBytesAtOffset(
uint64_t v, int offset, size_t shift) {
assert(offset >= 0);
assert(offset <= 3);
{
const uint64_t h = ((v >> (8 * offset)) << 24) * kHashMul32;
return (uint32_t)(h >> shift);
}
}
static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) {
return TO_BROTLI_BOOL(
BROTLI_UNALIGNED_LOAD32(p1) == BROTLI_UNALIGNED_LOAD32(p2) &&
p1[4] == p2[4]);
}
/* Builds a literal prefix code into "depths" and "bits" based on the statistics
of the "input" string and stores it into the bit stream.
Note that the prefix code here is built from the pre-LZ77 input, therefore
we can only approximate the statistics of the actual literal stream.
Moreover, for long inputs we build a histogram from a sample of the input
and thus have to assign a non-zero depth for each literal.
Returns estimated compression ratio millibytes/char for encoding given input
with generated code. */
static size_t BuildAndStoreLiteralPrefixCode(MemoryManager* m,
const uint8_t* input,
const size_t input_size,
uint8_t depths[256],
uint16_t bits[256],
size_t* storage_ix,
uint8_t* storage) {
uint32_t histogram[256] = { 0 };
size_t histogram_total;
size_t i;
if (input_size < (1 << 15)) {
for (i = 0; i < input_size; ++i) {
++histogram[input[i]];
}
histogram_total = input_size;
for (i = 0; i < 256; ++i) {
/* We weigh the first 11 samples with weight 3 to account for the
balancing effect of the LZ77 phase on the histogram. */
const uint32_t adjust = 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
histogram[i] += adjust;
histogram_total += adjust;
}
} else {
static const size_t kSampleRate = 29;
for (i = 0; i < input_size; i += kSampleRate) {
++histogram[input[i]];
}
histogram_total = (input_size + kSampleRate - 1) / kSampleRate;
for (i = 0; i < 256; ++i) {
/* We add 1 to each population count to avoid 0 bit depths (since this is
only a sample and we don't know if the symbol appears or not), and we
weigh the first 11 samples with weight 3 to account for the balancing
effect of the LZ77 phase on the histogram (more frequent symbols are
more likely to be in backward references instead as literals). */
const uint32_t adjust = 1 + 2 * BROTLI_MIN(uint32_t, histogram[i], 11u);
histogram[i] += adjust;
histogram_total += adjust;
}
}
BrotliBuildAndStoreHuffmanTreeFast(m, histogram, histogram_total,
/* max_bits = */ 8,
depths, bits, storage_ix, storage);
if (BROTLI_IS_OOM(m)) return 0;
{
size_t literal_ratio = 0;
for (i = 0; i < 256; ++i) {
if (histogram[i]) literal_ratio += histogram[i] * depths[i];
}
/* Estimated encoding ratio, millibytes per symbol. */
return (literal_ratio * 125) / histogram_total;
}
}
/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
"bits" based on "histogram" and stores it into the bit stream. */
static void BuildAndStoreCommandPrefixCode(const uint32_t histogram[128],
uint8_t depth[128], uint16_t bits[128], size_t* storage_ix,
uint8_t* storage) {
/* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
HuffmanTree tree[129];
uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
uint16_t cmd_bits[64];
BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
/* We have to jump through a few hoops here in order to compute
the command bits because the symbols are in a different order than in
the full alphabet. This looks complicated, but having the symbols
in this order in the command bits saves a few branches in the Emit*
functions. */
memcpy(cmd_depth, depth, 24);
memcpy(cmd_depth + 24, depth + 40, 8);
memcpy(cmd_depth + 32, depth + 24, 8);
memcpy(cmd_depth + 40, depth + 48, 8);
memcpy(cmd_depth + 48, depth + 32, 8);
memcpy(cmd_depth + 56, depth + 56, 8);
BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
memcpy(bits, cmd_bits, 48);
memcpy(bits + 24, cmd_bits + 32, 16);
memcpy(bits + 32, cmd_bits + 48, 16);
memcpy(bits + 40, cmd_bits + 24, 16);
memcpy(bits + 48, cmd_bits + 40, 16);
memcpy(bits + 56, cmd_bits + 56, 16);
BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
{
/* Create the bit length array for the full command alphabet. */
size_t i;
memset(cmd_depth, 0, 64); /* only 64 first values were used */
memcpy(cmd_depth, depth, 8);
memcpy(cmd_depth + 64, depth + 8, 8);
memcpy(cmd_depth + 128, depth + 16, 8);
memcpy(cmd_depth + 192, depth + 24, 8);
memcpy(cmd_depth + 384, depth + 32, 8);
for (i = 0; i < 8; ++i) {
cmd_depth[128 + 8 * i] = depth[40 + i];
cmd_depth[256 + 8 * i] = depth[48 + i];
cmd_depth[448 + 8 * i] = depth[56 + i];
}
BrotliStoreHuffmanTree(
cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
}
BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
}
/* REQUIRES: insertlen < 6210 */
static BROTLI_INLINE void EmitInsertLen(size_t insertlen,
const uint8_t depth[128],
const uint16_t bits[128],
uint32_t histo[128],
size_t* storage_ix,
uint8_t* storage) {
if (insertlen < 6) {
const size_t code = insertlen + 40;
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
++histo[code];
} else if (insertlen < 130) {
const size_t tail = insertlen - 2;
const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
const size_t prefix = tail >> nbits;
const size_t inscode = (nbits << 1) + prefix + 42;
BrotliWriteBits(depth[inscode], bits[inscode], storage_ix, storage);
BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
++histo[inscode];
} else if (insertlen < 2114) {
const size_t tail = insertlen - 66;
const uint32_t nbits = Log2FloorNonZero(tail);
const size_t code = nbits + 50;
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
++histo[code];
} else {
BrotliWriteBits(depth[61], bits[61], storage_ix, storage);
BrotliWriteBits(12, insertlen - 2114, storage_ix, storage);
++histo[21];
}
}
static BROTLI_INLINE void EmitLongInsertLen(size_t insertlen,
const uint8_t depth[128],
const uint16_t bits[128],
uint32_t histo[128],
size_t* storage_ix,
uint8_t* storage) {
if (insertlen < 22594) {
BrotliWriteBits(depth[62], bits[62], storage_ix, storage);
BrotliWriteBits(14, insertlen - 6210, storage_ix, storage);
++histo[22];
} else {
BrotliWriteBits(depth[63], bits[63], storage_ix, storage);
BrotliWriteBits(24, insertlen - 22594, storage_ix, storage);
++histo[23];
}
}
static BROTLI_INLINE void EmitCopyLen(size_t copylen,
const uint8_t depth[128],
const uint16_t bits[128],
uint32_t histo[128],
size_t* storage_ix,
uint8_t* storage) {
if (copylen < 10) {
BrotliWriteBits(
depth[copylen + 14], bits[copylen + 14], storage_ix, storage);
++histo[copylen + 14];
} else if (copylen < 134) {
const size_t tail = copylen - 6;
const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
const size_t prefix = tail >> nbits;
const size_t code = (nbits << 1) + prefix + 20;
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
++histo[code];
} else if (copylen < 2118) {
const size_t tail = copylen - 70;
const uint32_t nbits = Log2FloorNonZero(tail);
const size_t code = nbits + 28;
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
++histo[code];
} else {
BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
BrotliWriteBits(24, copylen - 2118, storage_ix, storage);
++histo[47];
}
}
static BROTLI_INLINE void EmitCopyLenLastDistance(size_t copylen,
const uint8_t depth[128],
const uint16_t bits[128],
uint32_t histo[128],
size_t* storage_ix,
uint8_t* storage) {
if (copylen < 12) {
BrotliWriteBits(depth[copylen - 4], bits[copylen - 4], storage_ix, storage);
++histo[copylen - 4];
} else if (copylen < 72) {
const size_t tail = copylen - 8;
const uint32_t nbits = Log2FloorNonZero(tail) - 1;
const size_t prefix = tail >> nbits;
const size_t code = (nbits << 1) + prefix + 4;
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
BrotliWriteBits(nbits, tail - (prefix << nbits), storage_ix, storage);
++histo[code];
} else if (copylen < 136) {
const size_t tail = copylen - 8;
const size_t code = (tail >> 5) + 30;
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
BrotliWriteBits(5, tail & 31, storage_ix, storage);
BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
++histo[code];
++histo[64];
} else if (copylen < 2120) {
const size_t tail = copylen - 72;
const uint32_t nbits = Log2FloorNonZero(tail);
const size_t code = nbits + 28;
BrotliWriteBits(depth[code], bits[code], storage_ix, storage);
BrotliWriteBits(nbits, tail - ((size_t)1 << nbits), storage_ix, storage);
BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
++histo[code];
++histo[64];
} else {
BrotliWriteBits(depth[39], bits[39], storage_ix, storage);
BrotliWriteBits(24, copylen - 2120, storage_ix, storage);
BrotliWriteBits(depth[64], bits[64], storage_ix, storage);
++histo[47];
++histo[64];
}
}
static BROTLI_INLINE void EmitDistance(size_t distance,
const uint8_t depth[128],
const uint16_t bits[128],
uint32_t histo[128],
size_t* storage_ix, uint8_t* storage) {
const size_t d = distance + 3;
const uint32_t nbits = Log2FloorNonZero(d) - 1u;
const size_t prefix = (d >> nbits) & 1;
const size_t offset = (2 + prefix) << nbits;
const size_t distcode = 2 * (nbits - 1) + prefix + 80;
BrotliWriteBits(depth[distcode], bits[distcode], storage_ix, storage);
BrotliWriteBits(nbits, d - offset, storage_ix, storage);
++histo[distcode];
}
static BROTLI_INLINE void EmitLiterals(const uint8_t* input, const size_t len,
const uint8_t depth[256],
const uint16_t bits[256],
size_t* storage_ix, uint8_t* storage) {
size_t j;
for (j = 0; j < len; j++) {
const uint8_t lit = input[j];
BrotliWriteBits(depth[lit], bits[lit], storage_ix, storage);
}
}
/* REQUIRES: len <= 1 << 24. */
static void BrotliStoreMetaBlockHeader(
size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
uint8_t* storage) {
size_t nibbles = 6;
/* ISLAST */
BrotliWriteBits(1, 0, storage_ix, storage);
if (len <= (1U << 16)) {
nibbles = 4;
} else if (len <= (1U << 20)) {
nibbles = 5;
}
BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
/* ISUNCOMPRESSED */
BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
}
static void UpdateBits(size_t n_bits, uint32_t bits, size_t pos,
uint8_t *array) {
while (n_bits > 0) {
size_t byte_pos = pos >> 3;
size_t n_unchanged_bits = pos & 7;
size_t n_changed_bits = BROTLI_MIN(size_t, n_bits, 8 - n_unchanged_bits);
size_t total_bits = n_unchanged_bits + n_changed_bits;
uint32_t mask =
(~((1u << total_bits) - 1u)) | ((1u << n_unchanged_bits) - 1u);
uint32_t unchanged_bits = array[byte_pos] & mask;
uint32_t changed_bits = bits & ((1u << n_changed_bits) - 1u);
array[byte_pos] =
(uint8_t)((changed_bits << n_unchanged_bits) | unchanged_bits);
n_bits -= n_changed_bits;
bits >>= n_changed_bits;
pos += n_changed_bits;
}
}
static void RewindBitPosition(const size_t new_storage_ix,
size_t* storage_ix, uint8_t* storage) {
const size_t bitpos = new_storage_ix & 7;
const size_t mask = (1u << bitpos) - 1;
storage[new_storage_ix >> 3] &= (uint8_t)mask;
*storage_ix = new_storage_ix;
}
static BROTLI_BOOL ShouldMergeBlock(
const uint8_t* data, size_t len, const uint8_t* depths) {
size_t histo[256] = { 0 };
static const size_t kSampleRate = 43;
size_t i;
for (i = 0; i < len; i += kSampleRate) {
++histo[data[i]];
}
{
const size_t total = (len + kSampleRate - 1) / kSampleRate;
double r = (FastLog2(total) + 0.5) * (double)total + 200;
for (i = 0; i < 256; ++i) {
r -= (double)histo[i] * (depths[i] + FastLog2(histo[i]));
}
return TO_BROTLI_BOOL(r >= 0.0);
}
}
/* Acceptable loss for uncompressible speedup is 2% */
#define MIN_RATIO 980
static BROTLI_INLINE BROTLI_BOOL ShouldUseUncompressedMode(
const uint8_t* metablock_start, const uint8_t* next_emit,
const size_t insertlen, const size_t literal_ratio) {
const size_t compressed = (size_t)(next_emit - metablock_start);
if (compressed * 50 > insertlen) {
return BROTLI_FALSE;
} else {
return TO_BROTLI_BOOL(literal_ratio > MIN_RATIO);
}
}
static void EmitUncompressedMetaBlock(const uint8_t* begin, const uint8_t* end,
const size_t storage_ix_start,
size_t* storage_ix, uint8_t* storage) {
const size_t len = (size_t)(end - begin);
RewindBitPosition(storage_ix_start, storage_ix, storage);
BrotliStoreMetaBlockHeader(len, 1, storage_ix, storage);
*storage_ix = (*storage_ix + 7u) & ~7u;
memcpy(&storage[*storage_ix >> 3], begin, len);
*storage_ix += len << 3;
storage[*storage_ix >> 3] = 0;
}
static uint32_t kCmdHistoSeed[128] = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 0, 0, 0, 0,
};
static BROTLI_INLINE void BrotliCompressFragmentFastImpl(
MemoryManager* m, const uint8_t* input, size_t input_size,
BROTLI_BOOL is_last, int* table, size_t table_bits, uint8_t cmd_depth[128],
uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
size_t* storage_ix, uint8_t* storage) {
uint32_t cmd_histo[128];
const uint8_t* ip_end;
/* "next_emit" is a pointer to the first byte that is not covered by a
previous copy. Bytes between "next_emit" and the start of the next copy or
the end of the input will be emitted as literal bytes. */
const uint8_t* next_emit = input;
/* Save the start of the first block for position and distance computations.
*/
const uint8_t* base_ip = input;
static const size_t kFirstBlockSize = 3 << 15;
static const size_t kMergeBlockSize = 1 << 16;
const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
const size_t kMinMatchLen = 5;
const uint8_t* metablock_start = input;
size_t block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
size_t total_block_size = block_size;
/* Save the bit position of the MLEN field of the meta-block header, so that
we can update it later if we decide to extend this meta-block. */
size_t mlen_storage_ix = *storage_ix + 3;
uint8_t lit_depth[256];
uint16_t lit_bits[256];
size_t literal_ratio;
const uint8_t* ip;
int last_distance;
const size_t shift = 64u - table_bits;
BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
/* No block splits, no contexts. */
BrotliWriteBits(13, 0, storage_ix, storage);
literal_ratio = BuildAndStoreLiteralPrefixCode(
m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
if (BROTLI_IS_OOM(m)) return;
{
/* Store the pre-compressed command and distance prefix codes. */
size_t i;
for (i = 0; i + 7 < *cmd_code_numbits; i += 8) {
BrotliWriteBits(8, cmd_code[i >> 3], storage_ix, storage);
}
}
BrotliWriteBits(*cmd_code_numbits & 7, cmd_code[*cmd_code_numbits >> 3],
storage_ix, storage);
emit_commands:
/* Initialize the command and distance histograms. We will gather
statistics of command and distance codes during the processing
of this block and use it to update the command and distance
prefix codes for the next block. */
memcpy(cmd_histo, kCmdHistoSeed, sizeof(kCmdHistoSeed));
/* "ip" is the input pointer. */
ip = input;
last_distance = -1;
ip_end = input + block_size;
if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
/* For the last block, we need to keep a 16 bytes margin so that we can be
sure that all distances are at most window size - 16.
For all other blocks, we only need to keep a margin of 5 bytes so that
we don't go over the block size with a copy. */
const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
input_size - kInputMarginBytes);
const uint8_t* ip_limit = input + len_limit;
uint32_t next_hash;
for (next_hash = Hash(++ip, shift); ; ) {
/* Step 1: Scan forward in the input looking for a 5-byte-long match.
If we get close to exhausting the input then goto emit_remainder.
Heuristic match skipping: If 32 bytes are scanned with no matches
found, start looking only at every other byte. If 32 more bytes are
scanned, look at every third byte, etc.. When a match is found,
immediately go back to looking at every byte. This is a small loss
(~5% performance, ~0.1% density) for compressible data due to more
bookkeeping, but for non-compressible data (such as JPEG) it's a huge
win since the compressor quickly "realizes" the data is incompressible
and doesn't bother looking for matches everywhere.
The "skip" variable keeps track of how many bytes there are since the
last match; dividing it by 32 (i.e. right-shifting by five) gives the
number of bytes to move ahead for each iteration. */
uint32_t skip = 32;
const uint8_t* next_ip = ip;
const uint8_t* candidate;
assert(next_emit < ip);
trawl:
do {
uint32_t hash = next_hash;
uint32_t bytes_between_hash_lookups = skip++ >> 5;
assert(hash == Hash(next_ip, shift));
ip = next_ip;
next_ip = ip + bytes_between_hash_lookups;
if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
goto emit_remainder;
}
next_hash = Hash(next_ip, shift);
candidate = ip - last_distance;
if (IsMatch(ip, candidate)) {
if (BROTLI_PREDICT_TRUE(candidate < ip)) {
table[hash] = (int)(ip - base_ip);
break;
}
}
candidate = base_ip + table[hash];
assert(candidate >= base_ip);
assert(candidate < ip);
table[hash] = (int)(ip - base_ip);
} while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate)));
/* Check copy distance. If candidate is not feasible, continue search.
Checking is done outside of hot loop to reduce overhead. */
if (ip - candidate > MAX_DISTANCE) goto trawl;
/* Step 2: Emit the found match together with the literal bytes from
"next_emit" to the bit stream, and then see if we can find a next match
immediately afterwards. Repeat until we find no match for the input
without emitting some literal bytes. */
{
/* We have a 5-byte match at ip, and we need to emit bytes in
[next_emit, ip). */
const uint8_t* base = ip;
size_t matched = 5 + FindMatchLengthWithLimit(
candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
int distance = (int)(base - candidate); /* > 0 */
size_t insert = (size_t)(base - next_emit);
ip += matched;
assert(0 == memcmp(base, candidate, matched));
if (BROTLI_PREDICT_TRUE(insert < 6210)) {
EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
storage_ix, storage);
} else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
literal_ratio)) {
EmitUncompressedMetaBlock(metablock_start, base, mlen_storage_ix - 3,
storage_ix, storage);
input_size -= (size_t)(base - input);
input = base;
next_emit = input;
goto next_block;
} else {
EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
storage_ix, storage);
}
EmitLiterals(next_emit, insert, lit_depth, lit_bits,
storage_ix, storage);
if (distance == last_distance) {
BrotliWriteBits(cmd_depth[64], cmd_bits[64], storage_ix, storage);
++cmd_histo[64];
} else {
EmitDistance((size_t)distance, cmd_depth, cmd_bits,
cmd_histo, storage_ix, storage);
last_distance = distance;
}
EmitCopyLenLastDistance(matched, cmd_depth, cmd_bits, cmd_histo,
storage_ix, storage);
next_emit = ip;
if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
goto emit_remainder;
}
/* We could immediately start working at ip now, but to improve
compression we first update "table" with the hashes of some positions
within the last copy. */
{
uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
table[prev_hash] = (int)(ip - base_ip - 3);
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
table[prev_hash] = (int)(ip - base_ip - 2);
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
table[prev_hash] = (int)(ip - base_ip - 1);
candidate = base_ip + table[cur_hash];
table[cur_hash] = (int)(ip - base_ip);
}
}
while (IsMatch(ip, candidate)) {
/* We have a 5-byte match at ip, and no need to emit any literal bytes
prior to ip. */
const uint8_t* base = ip;
size_t matched = 5 + FindMatchLengthWithLimit(
candidate + 5, ip + 5, (size_t)(ip_end - ip) - 5);
if (ip - candidate > MAX_DISTANCE) break;
ip += matched;
last_distance = (int)(base - candidate); /* > 0 */
assert(0 == memcmp(base, candidate, matched));
EmitCopyLen(matched, cmd_depth, cmd_bits, cmd_histo,
storage_ix, storage);
EmitDistance((size_t)last_distance, cmd_depth, cmd_bits,
cmd_histo, storage_ix, storage);
next_emit = ip;
if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
goto emit_remainder;
}
/* We could immediately start working at ip now, but to improve
compression we first update "table" with the hashes of some positions
within the last copy. */
{
uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 3);
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
uint32_t cur_hash = HashBytesAtOffset(input_bytes, 3, shift);
table[prev_hash] = (int)(ip - base_ip - 3);
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
table[prev_hash] = (int)(ip - base_ip - 2);
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
table[prev_hash] = (int)(ip - base_ip - 1);
candidate = base_ip + table[cur_hash];
table[cur_hash] = (int)(ip - base_ip);
}
}
next_hash = Hash(++ip, shift);
}
}
emit_remainder:
assert(next_emit <= ip_end);
input += block_size;
input_size -= block_size;
block_size = BROTLI_MIN(size_t, input_size, kMergeBlockSize);
/* Decide if we want to continue this meta-block instead of emitting the
last insert-only command. */
if (input_size > 0 &&
total_block_size + block_size <= (1 << 20) &&
ShouldMergeBlock(input, block_size, lit_depth)) {
assert(total_block_size > (1 << 16));
/* Update the size of the current meta-block and continue emitting commands.
We can do this because the current size and the new size both have 5
nibbles. */
total_block_size += block_size;
UpdateBits(20, (uint32_t)(total_block_size - 1), mlen_storage_ix, storage);
goto emit_commands;
}
/* Emit the remaining bytes as literals. */
if (next_emit < ip_end) {
const size_t insert = (size_t)(ip_end - next_emit);
if (BROTLI_PREDICT_TRUE(insert < 6210)) {
EmitInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
storage_ix, storage);
EmitLiterals(next_emit, insert, lit_depth, lit_bits, storage_ix, storage);
} else if (ShouldUseUncompressedMode(metablock_start, next_emit, insert,
literal_ratio)) {
EmitUncompressedMetaBlock(metablock_start, ip_end, mlen_storage_ix - 3,
storage_ix, storage);
} else {
EmitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo,
storage_ix, storage);
EmitLiterals(next_emit, insert, lit_depth, lit_bits,
storage_ix, storage);
}
}
next_emit = ip_end;
next_block:
/* If we have more data, write a new meta-block header and prefix codes and
then continue emitting commands. */
if (input_size > 0) {
metablock_start = input;
block_size = BROTLI_MIN(size_t, input_size, kFirstBlockSize);
total_block_size = block_size;
/* Save the bit position of the MLEN field of the meta-block header, so that
we can update it later if we decide to extend this meta-block. */
mlen_storage_ix = *storage_ix + 3;
BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
/* No block splits, no contexts. */
BrotliWriteBits(13, 0, storage_ix, storage);
literal_ratio = BuildAndStoreLiteralPrefixCode(
m, input, block_size, lit_depth, lit_bits, storage_ix, storage);
if (BROTLI_IS_OOM(m)) return;
BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
storage_ix, storage);
goto emit_commands;
}
if (!is_last) {
/* If this is not the last block, update the command and distance prefix
codes for the next block and store the compressed forms. */
cmd_code[0] = 0;
*cmd_code_numbits = 0;
BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depth, cmd_bits,
cmd_code_numbits, cmd_code);
}
}
#define FOR_TABLE_BITS_(X) X(9) X(11) X(13) X(15)
#define BAKE_METHOD_PARAM_(B) \
static BROTLI_NOINLINE void BrotliCompressFragmentFastImpl ## B( \
MemoryManager* m, const uint8_t* input, size_t input_size, \
BROTLI_BOOL is_last, int* table, uint8_t cmd_depth[128], \
uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code, \
size_t* storage_ix, uint8_t* storage) { \
BrotliCompressFragmentFastImpl(m, input, input_size, is_last, table, B, \
cmd_depth, cmd_bits, cmd_code_numbits, cmd_code, storage_ix, storage); \
}
FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
#undef BAKE_METHOD_PARAM_
void BrotliCompressFragmentFast(
MemoryManager* m, const uint8_t* input, size_t input_size,
BROTLI_BOOL is_last, int* table, size_t table_size, uint8_t cmd_depth[128],
uint16_t cmd_bits[128], size_t* cmd_code_numbits, uint8_t* cmd_code,
size_t* storage_ix, uint8_t* storage) {
const size_t initial_storage_ix = *storage_ix;
const size_t table_bits = Log2FloorNonZero(table_size);
if (input_size == 0) {
assert(is_last);
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
*storage_ix = (*storage_ix + 7u) & ~7u;
return;
}
switch (table_bits) {
#define CASE_(B) \
case B: \
BrotliCompressFragmentFastImpl ## B( \
m, input, input_size, is_last, table, cmd_depth, cmd_bits, \
cmd_code_numbits, cmd_code, storage_ix, storage); \
break;
FOR_TABLE_BITS_(CASE_)
#undef CASE_
default: assert(0); break;
}
/* If output is larger than single uncompressed block, rewrite it. */
if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
EmitUncompressedMetaBlock(input, input + input_size, initial_storage_ix,
storage_ix, storage);
}
if (is_last) {
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
*storage_ix = (*storage_ix + 7u) & ~7u;
}
}
#undef FOR_TABLE_BITS_
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

View File

@@ -12,7 +12,7 @@
#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_H_
#define BROTLI_ENC_COMPRESS_FRAGMENT_H_
#include <brotli/types.h>
#include "../types.h"
#include "./memory.h"
#include "./port.h"

View File

@@ -1,612 +0,0 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Function for fast encoding of an input fragment, independently from the input
history. This function uses two-pass processing: in the first pass we save
the found backward matches and literal bytes into a buffer, and in the
second pass we emit them into the bit stream using prefix codes built based
on the actual command and literal byte histograms. */
#include "./compress_fragment_two_pass.h"
#include <string.h> /* memcmp, memcpy, memset */
#include "../common/constants.h"
#include <brotli/types.h>
#include "./bit_cost.h"
#include "./brotli_bit_stream.h"
#include "./entropy_encode.h"
#include "./fast_log.h"
#include "./find_match_length.h"
#include "./memory.h"
#include "./port.h"
#include "./write_bits.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
#define MAX_DISTANCE (long)BROTLI_MAX_BACKWARD_LIMIT(18)
/* kHashMul32 multiplier has these properties:
* The multiplier must be odd. Otherwise we may lose the highest bit.
* No long streaks of ones or zeros.
* There is no effort to ensure that it is a prime, the oddity is enough
for this use.
* The number has been tuned heuristically against compression benchmarks. */
static const uint32_t kHashMul32 = 0x1e35a7bd;
static BROTLI_INLINE uint32_t Hash(const uint8_t* p, size_t shift) {
const uint64_t h = (BROTLI_UNALIGNED_LOAD64(p) << 16) * kHashMul32;
return (uint32_t)(h >> shift);
}
static BROTLI_INLINE uint32_t HashBytesAtOffset(
uint64_t v, int offset, size_t shift) {
assert(offset >= 0);
assert(offset <= 2);
{
const uint64_t h = ((v >> (8 * offset)) << 16) * kHashMul32;
return (uint32_t)(h >> shift);
}
}
static BROTLI_INLINE BROTLI_BOOL IsMatch(const uint8_t* p1, const uint8_t* p2) {
return TO_BROTLI_BOOL(
BROTLI_UNALIGNED_LOAD32(p1) == BROTLI_UNALIGNED_LOAD32(p2) &&
p1[4] == p2[4] &&
p1[5] == p2[5]);
}
/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
"bits" based on "histogram" and stores it into the bit stream. */
static void BuildAndStoreCommandPrefixCode(
const uint32_t histogram[128],
uint8_t depth[128], uint16_t bits[128],
size_t* storage_ix, uint8_t* storage) {
/* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
HuffmanTree tree[129];
uint8_t cmd_depth[BROTLI_NUM_COMMAND_SYMBOLS] = { 0 };
uint16_t cmd_bits[64];
BrotliCreateHuffmanTree(histogram, 64, 15, tree, depth);
BrotliCreateHuffmanTree(&histogram[64], 64, 14, tree, &depth[64]);
/* We have to jump through a few hoops here in order to compute
the command bits because the symbols are in a different order than in
the full alphabet. This looks complicated, but having the symbols
in this order in the command bits saves a few branches in the Emit*
functions. */
memcpy(cmd_depth, depth + 24, 24);
memcpy(cmd_depth + 24, depth, 8);
memcpy(cmd_depth + 32, depth + 48, 8);
memcpy(cmd_depth + 40, depth + 8, 8);
memcpy(cmd_depth + 48, depth + 56, 8);
memcpy(cmd_depth + 56, depth + 16, 8);
BrotliConvertBitDepthsToSymbols(cmd_depth, 64, cmd_bits);
memcpy(bits, cmd_bits + 24, 16);
memcpy(bits + 8, cmd_bits + 40, 16);
memcpy(bits + 16, cmd_bits + 56, 16);
memcpy(bits + 24, cmd_bits, 48);
memcpy(bits + 48, cmd_bits + 32, 16);
memcpy(bits + 56, cmd_bits + 48, 16);
BrotliConvertBitDepthsToSymbols(&depth[64], 64, &bits[64]);
{
/* Create the bit length array for the full command alphabet. */
size_t i;
memset(cmd_depth, 0, 64); /* only 64 first values were used */
memcpy(cmd_depth, depth + 24, 8);
memcpy(cmd_depth + 64, depth + 32, 8);
memcpy(cmd_depth + 128, depth + 40, 8);
memcpy(cmd_depth + 192, depth + 48, 8);
memcpy(cmd_depth + 384, depth + 56, 8);
for (i = 0; i < 8; ++i) {
cmd_depth[128 + 8 * i] = depth[i];
cmd_depth[256 + 8 * i] = depth[8 + i];
cmd_depth[448 + 8 * i] = depth[16 + i];
}
BrotliStoreHuffmanTree(
cmd_depth, BROTLI_NUM_COMMAND_SYMBOLS, tree, storage_ix, storage);
}
BrotliStoreHuffmanTree(&depth[64], 64, tree, storage_ix, storage);
}
static BROTLI_INLINE void EmitInsertLen(
uint32_t insertlen, uint32_t** commands) {
if (insertlen < 6) {
**commands = insertlen;
} else if (insertlen < 130) {
const uint32_t tail = insertlen - 2;
const uint32_t nbits = Log2FloorNonZero(tail) - 1u;
const uint32_t prefix = tail >> nbits;
const uint32_t inscode = (nbits << 1) + prefix + 2;
const uint32_t extra = tail - (prefix << nbits);
**commands = inscode | (extra << 8);
} else if (insertlen < 2114) {
const uint32_t tail = insertlen - 66;
const uint32_t nbits = Log2FloorNonZero(tail);
const uint32_t code = nbits + 10;
const uint32_t extra = tail - (1u << nbits);
**commands = code | (extra << 8);
} else if (insertlen < 6210) {
const uint32_t extra = insertlen - 2114;
**commands = 21 | (extra << 8);
} else if (insertlen < 22594) {
const uint32_t extra = insertlen - 6210;
**commands = 22 | (extra << 8);
} else {
const uint32_t extra = insertlen - 22594;
**commands = 23 | (extra << 8);
}
++(*commands);
}
static BROTLI_INLINE void EmitCopyLen(size_t copylen, uint32_t** commands) {
if (copylen < 10) {
**commands = (uint32_t)(copylen + 38);
} else if (copylen < 134) {
const size_t tail = copylen - 6;
const size_t nbits = Log2FloorNonZero(tail) - 1;
const size_t prefix = tail >> nbits;
const size_t code = (nbits << 1) + prefix + 44;
const size_t extra = tail - (prefix << nbits);
**commands = (uint32_t)(code | (extra << 8));
} else if (copylen < 2118) {
const size_t tail = copylen - 70;
const size_t nbits = Log2FloorNonZero(tail);
const size_t code = nbits + 52;
const size_t extra = tail - ((size_t)1 << nbits);
**commands = (uint32_t)(code | (extra << 8));
} else {
const size_t extra = copylen - 2118;
**commands = (uint32_t)(63 | (extra << 8));
}
++(*commands);
}
static BROTLI_INLINE void EmitCopyLenLastDistance(
size_t copylen, uint32_t** commands) {
if (copylen < 12) {
**commands = (uint32_t)(copylen + 20);
++(*commands);
} else if (copylen < 72) {
const size_t tail = copylen - 8;
const size_t nbits = Log2FloorNonZero(tail) - 1;
const size_t prefix = tail >> nbits;
const size_t code = (nbits << 1) + prefix + 28;
const size_t extra = tail - (prefix << nbits);
**commands = (uint32_t)(code | (extra << 8));
++(*commands);
} else if (copylen < 136) {
const size_t tail = copylen - 8;
const size_t code = (tail >> 5) + 54;
const size_t extra = tail & 31;
**commands = (uint32_t)(code | (extra << 8));
++(*commands);
**commands = 64;
++(*commands);
} else if (copylen < 2120) {
const size_t tail = copylen - 72;
const size_t nbits = Log2FloorNonZero(tail);
const size_t code = nbits + 52;
const size_t extra = tail - ((size_t)1 << nbits);
**commands = (uint32_t)(code | (extra << 8));
++(*commands);
**commands = 64;
++(*commands);
} else {
const size_t extra = copylen - 2120;
**commands = (uint32_t)(63 | (extra << 8));
++(*commands);
**commands = 64;
++(*commands);
}
}
static BROTLI_INLINE void EmitDistance(uint32_t distance, uint32_t** commands) {
uint32_t d = distance + 3;
uint32_t nbits = Log2FloorNonZero(d) - 1;
const uint32_t prefix = (d >> nbits) & 1;
const uint32_t offset = (2 + prefix) << nbits;
const uint32_t distcode = 2 * (nbits - 1) + prefix + 80;
uint32_t extra = d - offset;
**commands = distcode | (extra << 8);
++(*commands);
}
/* REQUIRES: len <= 1 << 24. */
static void BrotliStoreMetaBlockHeader(
size_t len, BROTLI_BOOL is_uncompressed, size_t* storage_ix,
uint8_t* storage) {
size_t nibbles = 6;
/* ISLAST */
BrotliWriteBits(1, 0, storage_ix, storage);
if (len <= (1U << 16)) {
nibbles = 4;
} else if (len <= (1U << 20)) {
nibbles = 5;
}
BrotliWriteBits(2, nibbles - 4, storage_ix, storage);
BrotliWriteBits(nibbles * 4, len - 1, storage_ix, storage);
/* ISUNCOMPRESSED */
BrotliWriteBits(1, (uint64_t)is_uncompressed, storage_ix, storage);
}
static BROTLI_INLINE void CreateCommands(const uint8_t* input,
size_t block_size, size_t input_size, const uint8_t* base_ip, int* table,
size_t table_bits, uint8_t** literals, uint32_t** commands) {
/* "ip" is the input pointer. */
const uint8_t* ip = input;
const size_t shift = 64u - table_bits;
const uint8_t* ip_end = input + block_size;
/* "next_emit" is a pointer to the first byte that is not covered by a
previous copy. Bytes between "next_emit" and the start of the next copy or
the end of the input will be emitted as literal bytes. */
const uint8_t* next_emit = input;
int last_distance = -1;
const size_t kInputMarginBytes = BROTLI_WINDOW_GAP;
const size_t kMinMatchLen = 6;
if (BROTLI_PREDICT_TRUE(block_size >= kInputMarginBytes)) {
/* For the last block, we need to keep a 16 bytes margin so that we can be
sure that all distances are at most window size - 16.
For all other blocks, we only need to keep a margin of 5 bytes so that
we don't go over the block size with a copy. */
const size_t len_limit = BROTLI_MIN(size_t, block_size - kMinMatchLen,
input_size - kInputMarginBytes);
const uint8_t* ip_limit = input + len_limit;
uint32_t next_hash;
for (next_hash = Hash(++ip, shift); ; ) {
/* Step 1: Scan forward in the input looking for a 6-byte-long match.
If we get close to exhausting the input then goto emit_remainder.
Heuristic match skipping: If 32 bytes are scanned with no matches
found, start looking only at every other byte. If 32 more bytes are
scanned, look at every third byte, etc.. When a match is found,
immediately go back to looking at every byte. This is a small loss
(~5% performance, ~0.1% density) for compressible data due to more
bookkeeping, but for non-compressible data (such as JPEG) it's a huge
win since the compressor quickly "realizes" the data is incompressible
and doesn't bother looking for matches everywhere.
The "skip" variable keeps track of how many bytes there are since the
last match; dividing it by 32 (ie. right-shifting by five) gives the
number of bytes to move ahead for each iteration. */
uint32_t skip = 32;
const uint8_t* next_ip = ip;
const uint8_t* candidate;
assert(next_emit < ip);
trawl:
do {
uint32_t hash = next_hash;
uint32_t bytes_between_hash_lookups = skip++ >> 5;
ip = next_ip;
assert(hash == Hash(ip, shift));
next_ip = ip + bytes_between_hash_lookups;
if (BROTLI_PREDICT_FALSE(next_ip > ip_limit)) {
goto emit_remainder;
}
next_hash = Hash(next_ip, shift);
candidate = ip - last_distance;
if (IsMatch(ip, candidate)) {
if (BROTLI_PREDICT_TRUE(candidate < ip)) {
table[hash] = (int)(ip - base_ip);
break;
}
}
candidate = base_ip + table[hash];
assert(candidate >= base_ip);
assert(candidate < ip);
table[hash] = (int)(ip - base_ip);
} while (BROTLI_PREDICT_TRUE(!IsMatch(ip, candidate)));
/* Check copy distance. If candidate is not feasible, continue search.
Checking is done outside of hot loop to reduce overhead. */
if (ip - candidate > MAX_DISTANCE) goto trawl;
/* Step 2: Emit the found match together with the literal bytes from
"next_emit", and then see if we can find a next match immediately
afterwards. Repeat until we find no match for the input
without emitting some literal bytes. */
{
/* We have a 6-byte match at ip, and we need to emit bytes in
[next_emit, ip). */
const uint8_t* base = ip;
size_t matched = 6 + FindMatchLengthWithLimit(
candidate + 6, ip + 6, (size_t)(ip_end - ip) - 6);
int distance = (int)(base - candidate); /* > 0 */
int insert = (int)(base - next_emit);
ip += matched;
assert(0 == memcmp(base, candidate, matched));
EmitInsertLen((uint32_t)insert, commands);
memcpy(*literals, next_emit, (size_t)insert);
*literals += insert;
if (distance == last_distance) {
**commands = 64;
++(*commands);
} else {
EmitDistance((uint32_t)distance, commands);
last_distance = distance;
}
EmitCopyLenLastDistance(matched, commands);
next_emit = ip;
if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
goto emit_remainder;
}
{
/* We could immediately start working at ip now, but to improve
compression we first update "table" with the hashes of some
positions within the last copy. */
uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
uint32_t cur_hash;
table[prev_hash] = (int)(ip - base_ip - 5);
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
table[prev_hash] = (int)(ip - base_ip - 4);
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
table[prev_hash] = (int)(ip - base_ip - 3);
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
table[prev_hash] = (int)(ip - base_ip - 2);
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
table[prev_hash] = (int)(ip - base_ip - 1);
candidate = base_ip + table[cur_hash];
table[cur_hash] = (int)(ip - base_ip);
}
}
while (ip - candidate <= MAX_DISTANCE && IsMatch(ip, candidate)) {
/* We have a 6-byte match at ip, and no need to emit any
literal bytes prior to ip. */
const uint8_t* base = ip;
size_t matched = 6 + FindMatchLengthWithLimit(
candidate + 6, ip + 6, (size_t)(ip_end - ip) - 6);
ip += matched;
last_distance = (int)(base - candidate); /* > 0 */
assert(0 == memcmp(base, candidate, matched));
EmitCopyLen(matched, commands);
EmitDistance((uint32_t)last_distance, commands);
next_emit = ip;
if (BROTLI_PREDICT_FALSE(ip >= ip_limit)) {
goto emit_remainder;
}
{
/* We could immediately start working at ip now, but to improve
compression we first update "table" with the hashes of some
positions within the last copy. */
uint64_t input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 5);
uint32_t prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
uint32_t cur_hash;
table[prev_hash] = (int)(ip - base_ip - 5);
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
table[prev_hash] = (int)(ip - base_ip - 4);
prev_hash = HashBytesAtOffset(input_bytes, 2, shift);
table[prev_hash] = (int)(ip - base_ip - 3);
input_bytes = BROTLI_UNALIGNED_LOAD64(ip - 2);
cur_hash = HashBytesAtOffset(input_bytes, 2, shift);
prev_hash = HashBytesAtOffset(input_bytes, 0, shift);
table[prev_hash] = (int)(ip - base_ip - 2);
prev_hash = HashBytesAtOffset(input_bytes, 1, shift);
table[prev_hash] = (int)(ip - base_ip - 1);
candidate = base_ip + table[cur_hash];
table[cur_hash] = (int)(ip - base_ip);
}
}
next_hash = Hash(++ip, shift);
}
}
emit_remainder:
assert(next_emit <= ip_end);
/* Emit the remaining bytes as literals. */
if (next_emit < ip_end) {
const uint32_t insert = (uint32_t)(ip_end - next_emit);
EmitInsertLen(insert, commands);
memcpy(*literals, next_emit, insert);
*literals += insert;
}
}
static void StoreCommands(MemoryManager* m,
const uint8_t* literals, const size_t num_literals,
const uint32_t* commands, const size_t num_commands,
size_t* storage_ix, uint8_t* storage) {
static const uint32_t kNumExtraBits[128] = {
0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,
9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16,
17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24,
};
static const uint32_t kInsertOffset[24] = {
0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66, 98, 130, 194, 322, 578,
1090, 2114, 6210, 22594,
};
uint8_t lit_depths[256];
uint16_t lit_bits[256];
uint32_t lit_histo[256] = { 0 };
uint8_t cmd_depths[128] = { 0 };
uint16_t cmd_bits[128] = { 0 };
uint32_t cmd_histo[128] = { 0 };
size_t i;
for (i = 0; i < num_literals; ++i) {
++lit_histo[literals[i]];
}
BrotliBuildAndStoreHuffmanTreeFast(m, lit_histo, num_literals,
/* max_bits = */ 8,
lit_depths, lit_bits,
storage_ix, storage);
if (BROTLI_IS_OOM(m)) return;
for (i = 0; i < num_commands; ++i) {
const uint32_t code = commands[i] & 0xFF;
assert(code < 128);
++cmd_histo[code];
}
cmd_histo[1] += 1;
cmd_histo[2] += 1;
cmd_histo[64] += 1;
cmd_histo[84] += 1;
BuildAndStoreCommandPrefixCode(cmd_histo, cmd_depths, cmd_bits,
storage_ix, storage);
for (i = 0; i < num_commands; ++i) {
const uint32_t cmd = commands[i];
const uint32_t code = cmd & 0xFF;
const uint32_t extra = cmd >> 8;
assert(code < 128);
BrotliWriteBits(cmd_depths[code], cmd_bits[code], storage_ix, storage);
BrotliWriteBits(kNumExtraBits[code], extra, storage_ix, storage);
if (code < 24) {
const uint32_t insert = kInsertOffset[code] + extra;
uint32_t j;
for (j = 0; j < insert; ++j) {
const uint8_t lit = *literals;
BrotliWriteBits(lit_depths[lit], lit_bits[lit], storage_ix, storage);
++literals;
}
}
}
}
/* Acceptable loss for uncompressible speedup is 2% */
#define MIN_RATIO 0.98
#define SAMPLE_RATE 43
static BROTLI_BOOL ShouldCompress(
const uint8_t* input, size_t input_size, size_t num_literals) {
double corpus_size = (double)input_size;
if (num_literals < MIN_RATIO * corpus_size) {
return BROTLI_TRUE;
} else {
uint32_t literal_histo[256] = { 0 };
const double max_total_bit_cost = corpus_size * 8 * MIN_RATIO / SAMPLE_RATE;
size_t i;
for (i = 0; i < input_size; i += SAMPLE_RATE) {
++literal_histo[input[i]];
}
return TO_BROTLI_BOOL(BitsEntropy(literal_histo, 256) < max_total_bit_cost);
}
}
static void RewindBitPosition(const size_t new_storage_ix,
size_t* storage_ix, uint8_t* storage) {
const size_t bitpos = new_storage_ix & 7;
const size_t mask = (1u << bitpos) - 1;
storage[new_storage_ix >> 3] &= (uint8_t)mask;
*storage_ix = new_storage_ix;
}
static void EmitUncompressedMetaBlock(const uint8_t* input, size_t input_size,
size_t* storage_ix, uint8_t* storage) {
BrotliStoreMetaBlockHeader(input_size, 1, storage_ix, storage);
*storage_ix = (*storage_ix + 7u) & ~7u;
memcpy(&storage[*storage_ix >> 3], input, input_size);
*storage_ix += input_size << 3;
storage[*storage_ix >> 3] = 0;
}
static BROTLI_INLINE void BrotliCompressFragmentTwoPassImpl(
MemoryManager* m, const uint8_t* input, size_t input_size,
BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
int* table, size_t table_bits, size_t* storage_ix, uint8_t* storage) {
/* Save the start of the first block for position and distance computations.
*/
const uint8_t* base_ip = input;
BROTLI_UNUSED(is_last);
while (input_size > 0) {
size_t block_size =
BROTLI_MIN(size_t, input_size, kCompressFragmentTwoPassBlockSize);
uint32_t* commands = command_buf;
uint8_t* literals = literal_buf;
size_t num_literals;
CreateCommands(input, block_size, input_size, base_ip, table, table_bits,
&literals, &commands);
num_literals = (size_t)(literals - literal_buf);
if (ShouldCompress(input, block_size, num_literals)) {
const size_t num_commands = (size_t)(commands - command_buf);
BrotliStoreMetaBlockHeader(block_size, 0, storage_ix, storage);
/* No block splits, no contexts. */
BrotliWriteBits(13, 0, storage_ix, storage);
StoreCommands(m, literal_buf, num_literals, command_buf, num_commands,
storage_ix, storage);
if (BROTLI_IS_OOM(m)) return;
} else {
/* Since we did not find many backward references and the entropy of
the data is close to 8 bits, we can simply emit an uncompressed block.
This makes compression speed of uncompressible data about 3x faster. */
EmitUncompressedMetaBlock(input, block_size, storage_ix, storage);
}
input += block_size;
input_size -= block_size;
}
}
#define FOR_TABLE_BITS_(X) \
X(8) X(9) X(10) X(11) X(12) X(13) X(14) X(15) X(16) X(17)
#define BAKE_METHOD_PARAM_(B) \
static BROTLI_NOINLINE void BrotliCompressFragmentTwoPassImpl ## B( \
MemoryManager* m, const uint8_t* input, size_t input_size, \
BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf, \
int* table, size_t* storage_ix, uint8_t* storage) { \
BrotliCompressFragmentTwoPassImpl(m, input, input_size, is_last, command_buf,\
literal_buf, table, B, storage_ix, storage); \
}
FOR_TABLE_BITS_(BAKE_METHOD_PARAM_)
#undef BAKE_METHOD_PARAM_
void BrotliCompressFragmentTwoPass(
MemoryManager* m, const uint8_t* input, size_t input_size,
BROTLI_BOOL is_last, uint32_t* command_buf, uint8_t* literal_buf,
int* table, size_t table_size, size_t* storage_ix, uint8_t* storage) {
const size_t initial_storage_ix = *storage_ix;
const size_t table_bits = Log2FloorNonZero(table_size);
switch (table_bits) {
#define CASE_(B) \
case B: \
BrotliCompressFragmentTwoPassImpl ## B( \
m, input, input_size, is_last, command_buf, \
literal_buf, table, storage_ix, storage); \
break;
FOR_TABLE_BITS_(CASE_)
#undef CASE_
default: assert(0); break;
}
/* If output is larger than single uncompressed block, rewrite it. */
if (*storage_ix - initial_storage_ix > 31 + (input_size << 3)) {
RewindBitPosition(initial_storage_ix, storage_ix, storage);
EmitUncompressedMetaBlock(input, input_size, storage_ix, storage);
}
if (is_last) {
BrotliWriteBits(1, 1, storage_ix, storage); /* islast */
BrotliWriteBits(1, 1, storage_ix, storage); /* isempty */
*storage_ix = (*storage_ix + 7u) & ~7u;
}
}
#undef FOR_TABLE_BITS_
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

View File

@@ -13,7 +13,7 @@
#ifndef BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
#define BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
#include <brotli/types.h>
#include "../types.h"
#include "./memory.h"
#include "./port.h"

View File

@@ -9,8 +9,8 @@
#ifndef BROTLI_ENC_CONTEXT_H_
#define BROTLI_ENC_CONTEXT_H_
#include <brotli/port.h>
#include <brotli/types.h>
#include "../port.h"
#include "../types.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {

View File

File diff suppressed because it is too large Load Diff

View File

@@ -9,7 +9,7 @@
#ifndef BROTLI_ENC_DICTIONARY_HASH_H_
#define BROTLI_ENC_DICTIONARY_HASH_H_
#include <brotli/types.h>
#include "../types.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {

View File

File diff suppressed because it is too large Load Diff

View File

@@ -1,501 +0,0 @@
/* Copyright 2010 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Entropy encoding (Huffman) utilities. */
#include "./entropy_encode.h"
#include <string.h> /* memset */
#include "../common/constants.h"
#include <brotli/types.h>
#include "./port.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
BROTLI_BOOL BrotliSetDepth(
int p0, HuffmanTree* pool, uint8_t* depth, int max_depth) {
int stack[16];
int level = 0;
int p = p0;
assert(max_depth <= 15);
stack[0] = -1;
while (BROTLI_TRUE) {
if (pool[p].index_left_ >= 0) {
level++;
if (level > max_depth) return BROTLI_FALSE;
stack[level] = pool[p].index_right_or_value_;
p = pool[p].index_left_;
continue;
} else {
depth[pool[p].index_right_or_value_] = (uint8_t)level;
}
while (level >= 0 && stack[level] == -1) level--;
if (level < 0) return BROTLI_TRUE;
p = stack[level];
stack[level] = -1;
}
}
/* Sort the root nodes, least popular first. */
static BROTLI_INLINE BROTLI_BOOL SortHuffmanTree(
const HuffmanTree* v0, const HuffmanTree* v1) {
if (v0->total_count_ != v1->total_count_) {
return TO_BROTLI_BOOL(v0->total_count_ < v1->total_count_);
}
return TO_BROTLI_BOOL(v0->index_right_or_value_ > v1->index_right_or_value_);
}
/* This function will create a Huffman tree.
The catch here is that the tree cannot be arbitrarily deep.
Brotli specifies a maximum depth of 15 bits for "code trees"
and 7 bits for "code length code trees."
count_limit is the value that is to be faked as the minimum value
and this minimum value is raised until the tree matches the
maximum length requirement.
This algorithm is not of excellent performance for very long data blocks,
especially when population counts are longer than 2**tree_limit, but
we are not planning to use this with extremely long blocks.
See http://en.wikipedia.org/wiki/Huffman_coding */
void BrotliCreateHuffmanTree(const uint32_t *data,
const size_t length,
const int tree_limit,
HuffmanTree* tree,
uint8_t *depth) {
uint32_t count_limit;
HuffmanTree sentinel;
InitHuffmanTree(&sentinel, BROTLI_UINT32_MAX, -1, -1);
/* For block sizes below 64 kB, we never need to do a second iteration
of this loop. Probably all of our block sizes will be smaller than
that, so this loop is mostly of academic interest. If we actually
would need this, we would be better off with the Katajainen algorithm. */
for (count_limit = 1; ; count_limit *= 2) {
size_t n = 0;
size_t i;
size_t j;
size_t k;
for (i = length; i != 0;) {
--i;
if (data[i]) {
const uint32_t count = BROTLI_MAX(uint32_t, data[i], count_limit);
InitHuffmanTree(&tree[n++], count, -1, (int16_t)i);
}
}
if (n == 1) {
depth[tree[0].index_right_or_value_] = 1; /* Only one element. */
break;
}
SortHuffmanTreeItems(tree, n, SortHuffmanTree);
/* The nodes are:
[0, n): the sorted leaf nodes that we start with.
[n]: we add a sentinel here.
[n + 1, 2n): new parent nodes are added here, starting from
(n+1). These are naturally in ascending order.
[2n]: we add a sentinel at the end as well.
There will be (2n+1) elements at the end. */
tree[n] = sentinel;
tree[n + 1] = sentinel;
i = 0; /* Points to the next leaf node. */
j = n + 1; /* Points to the next non-leaf node. */
for (k = n - 1; k != 0; --k) {
size_t left, right;
if (tree[i].total_count_ <= tree[j].total_count_) {
left = i;
++i;
} else {
left = j;
++j;
}
if (tree[i].total_count_ <= tree[j].total_count_) {
right = i;
++i;
} else {
right = j;
++j;
}
{
/* The sentinel node becomes the parent node. */
size_t j_end = 2 * n - k;
tree[j_end].total_count_ =
tree[left].total_count_ + tree[right].total_count_;
tree[j_end].index_left_ = (int16_t)left;
tree[j_end].index_right_or_value_ = (int16_t)right;
/* Add back the last sentinel node. */
tree[j_end + 1] = sentinel;
}
}
if (BrotliSetDepth((int)(2 * n - 1), &tree[0], depth, tree_limit)) {
/* We need to pack the Huffman tree in tree_limit bits. If this was not
successful, add fake entities to the lowest values and retry. */
break;
}
}
}
static void Reverse(uint8_t* v, size_t start, size_t end) {
--end;
while (start < end) {
uint8_t tmp = v[start];
v[start] = v[end];
v[end] = tmp;
++start;
--end;
}
}
static void BrotliWriteHuffmanTreeRepetitions(
const uint8_t previous_value,
const uint8_t value,
size_t repetitions,
size_t* tree_size,
uint8_t* tree,
uint8_t* extra_bits_data) {
assert(repetitions > 0);
if (previous_value != value) {
tree[*tree_size] = value;
extra_bits_data[*tree_size] = 0;
++(*tree_size);
--repetitions;
}
if (repetitions == 7) {
tree[*tree_size] = value;
extra_bits_data[*tree_size] = 0;
++(*tree_size);
--repetitions;
}
if (repetitions < 3) {
size_t i;
for (i = 0; i < repetitions; ++i) {
tree[*tree_size] = value;
extra_bits_data[*tree_size] = 0;
++(*tree_size);
}
} else {
size_t start = *tree_size;
repetitions -= 3;
while (BROTLI_TRUE) {
tree[*tree_size] = BROTLI_REPEAT_PREVIOUS_CODE_LENGTH;
extra_bits_data[*tree_size] = repetitions & 0x3;
++(*tree_size);
repetitions >>= 2;
if (repetitions == 0) {
break;
}
--repetitions;
}
Reverse(tree, start, *tree_size);
Reverse(extra_bits_data, start, *tree_size);
}
}
static void BrotliWriteHuffmanTreeRepetitionsZeros(
size_t repetitions,
size_t* tree_size,
uint8_t* tree,
uint8_t* extra_bits_data) {
if (repetitions == 11) {
tree[*tree_size] = 0;
extra_bits_data[*tree_size] = 0;
++(*tree_size);
--repetitions;
}
if (repetitions < 3) {
size_t i;
for (i = 0; i < repetitions; ++i) {
tree[*tree_size] = 0;
extra_bits_data[*tree_size] = 0;
++(*tree_size);
}
} else {
size_t start = *tree_size;
repetitions -= 3;
while (BROTLI_TRUE) {
tree[*tree_size] = BROTLI_REPEAT_ZERO_CODE_LENGTH;
extra_bits_data[*tree_size] = repetitions & 0x7;
++(*tree_size);
repetitions >>= 3;
if (repetitions == 0) {
break;
}
--repetitions;
}
Reverse(tree, start, *tree_size);
Reverse(extra_bits_data, start, *tree_size);
}
}
void BrotliOptimizeHuffmanCountsForRle(size_t length, uint32_t* counts,
uint8_t* good_for_rle) {
size_t nonzero_count = 0;
size_t stride;
size_t limit;
size_t sum;
const size_t streak_limit = 1240;
/* Let's make the Huffman code more compatible with RLE encoding. */
size_t i;
for (i = 0; i < length; i++) {
if (counts[i]) {
++nonzero_count;
}
}
if (nonzero_count < 16) {
return;
}
while (length != 0 && counts[length - 1] == 0) {
--length;
}
if (length == 0) {
return; /* All zeros. */
}
/* Now counts[0..length - 1] does not have trailing zeros. */
{
size_t nonzeros = 0;
uint32_t smallest_nonzero = 1 << 30;
for (i = 0; i < length; ++i) {
if (counts[i] != 0) {
++nonzeros;
if (smallest_nonzero > counts[i]) {
smallest_nonzero = counts[i];
}
}
}
if (nonzeros < 5) {
/* Small histogram will model it well. */
return;
}
if (smallest_nonzero < 4) {
size_t zeros = length - nonzeros;
if (zeros < 6) {
for (i = 1; i < length - 1; ++i) {
if (counts[i - 1] != 0 && counts[i] == 0 && counts[i + 1] != 0) {
counts[i] = 1;
}
}
}
}
if (nonzeros < 28) {
return;
}
}
/* 2) Let's mark all population counts that already can be encoded
with an RLE code. */
memset(good_for_rle, 0, length);
{
/* Let's not spoil any of the existing good RLE codes.
Mark any seq of 0's that is longer as 5 as a good_for_rle.
Mark any seq of non-0's that is longer as 7 as a good_for_rle. */
uint32_t symbol = counts[0];
size_t step = 0;
for (i = 0; i <= length; ++i) {
if (i == length || counts[i] != symbol) {
if ((symbol == 0 && step >= 5) ||
(symbol != 0 && step >= 7)) {
size_t k;
for (k = 0; k < step; ++k) {
good_for_rle[i - k - 1] = 1;
}
}
step = 1;
if (i != length) {
symbol = counts[i];
}
} else {
++step;
}
}
}
/* 3) Let's replace those population counts that lead to more RLE codes.
Math here is in 24.8 fixed point representation. */
stride = 0;
limit = 256 * (counts[0] + counts[1] + counts[2]) / 3 + 420;
sum = 0;
for (i = 0; i <= length; ++i) {
if (i == length || good_for_rle[i] ||
(i != 0 && good_for_rle[i - 1]) ||
(256 * counts[i] - limit + streak_limit) >= 2 * streak_limit) {
if (stride >= 4 || (stride >= 3 && sum == 0)) {
size_t k;
/* The stride must end, collapse what we have, if we have enough (4). */
size_t count = (sum + stride / 2) / stride;
if (count == 0) {
count = 1;
}
if (sum == 0) {
/* Don't make an all zeros stride to be upgraded to ones. */
count = 0;
}
for (k = 0; k < stride; ++k) {
/* We don't want to change value at counts[i],
that is already belonging to the next stride. Thus - 1. */
counts[i - k - 1] = (uint32_t)count;
}
}
stride = 0;
sum = 0;
if (i < length - 2) {
/* All interesting strides have a count of at least 4, */
/* at least when non-zeros. */
limit = 256 * (counts[i] + counts[i + 1] + counts[i + 2]) / 3 + 420;
} else if (i < length) {
limit = 256 * counts[i];
} else {
limit = 0;
}
}
++stride;
if (i != length) {
sum += counts[i];
if (stride >= 4) {
limit = (256 * sum + stride / 2) / stride;
}
if (stride == 4) {
limit += 120;
}
}
}
}
static void DecideOverRleUse(const uint8_t* depth, const size_t length,
BROTLI_BOOL *use_rle_for_non_zero,
BROTLI_BOOL *use_rle_for_zero) {
size_t total_reps_zero = 0;
size_t total_reps_non_zero = 0;
size_t count_reps_zero = 1;
size_t count_reps_non_zero = 1;
size_t i;
for (i = 0; i < length;) {
const uint8_t value = depth[i];
size_t reps = 1;
size_t k;
for (k = i + 1; k < length && depth[k] == value; ++k) {
++reps;
}
if (reps >= 3 && value == 0) {
total_reps_zero += reps;
++count_reps_zero;
}
if (reps >= 4 && value != 0) {
total_reps_non_zero += reps;
++count_reps_non_zero;
}
i += reps;
}
*use_rle_for_non_zero =
TO_BROTLI_BOOL(total_reps_non_zero > count_reps_non_zero * 2);
*use_rle_for_zero = TO_BROTLI_BOOL(total_reps_zero > count_reps_zero * 2);
}
void BrotliWriteHuffmanTree(const uint8_t* depth,
size_t length,
size_t* tree_size,
uint8_t* tree,
uint8_t* extra_bits_data) {
uint8_t previous_value = BROTLI_INITIAL_REPEATED_CODE_LENGTH;
size_t i;
BROTLI_BOOL use_rle_for_non_zero = BROTLI_FALSE;
BROTLI_BOOL use_rle_for_zero = BROTLI_FALSE;
/* Throw away trailing zeros. */
size_t new_length = length;
for (i = 0; i < length; ++i) {
if (depth[length - i - 1] == 0) {
--new_length;
} else {
break;
}
}
/* First gather statistics on if it is a good idea to do RLE. */
if (length > 50) {
/* Find RLE coding for longer codes.
Shorter codes seem not to benefit from RLE. */
DecideOverRleUse(depth, new_length,
&use_rle_for_non_zero, &use_rle_for_zero);
}
/* Actual RLE coding. */
for (i = 0; i < new_length;) {
const uint8_t value = depth[i];
size_t reps = 1;
if ((value != 0 && use_rle_for_non_zero) ||
(value == 0 && use_rle_for_zero)) {
size_t k;
for (k = i + 1; k < new_length && depth[k] == value; ++k) {
++reps;
}
}
if (value == 0) {
BrotliWriteHuffmanTreeRepetitionsZeros(
reps, tree_size, tree, extra_bits_data);
} else {
BrotliWriteHuffmanTreeRepetitions(previous_value,
value, reps, tree_size,
tree, extra_bits_data);
previous_value = value;
}
i += reps;
}
}
static uint16_t BrotliReverseBits(size_t num_bits, uint16_t bits) {
static const size_t kLut[16] = { /* Pre-reversed 4-bit values. */
0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf
};
size_t retval = kLut[bits & 0xf];
size_t i;
for (i = 4; i < num_bits; i += 4) {
retval <<= 4;
bits = (uint16_t)(bits >> 4);
retval |= kLut[bits & 0xf];
}
retval >>= ((0 - num_bits) & 0x3);
return (uint16_t)retval;
}
/* 0..15 are values for bits */
#define MAX_HUFFMAN_BITS 16
void BrotliConvertBitDepthsToSymbols(const uint8_t *depth,
size_t len,
uint16_t *bits) {
/* In Brotli, all bit depths are [1..15]
0 bit depth means that the symbol does not exist. */
uint16_t bl_count[MAX_HUFFMAN_BITS] = { 0 };
uint16_t next_code[MAX_HUFFMAN_BITS];
size_t i;
int code = 0;
for (i = 0; i < len; ++i) {
++bl_count[depth[i]];
}
bl_count[0] = 0;
next_code[0] = 0;
for (i = 1; i < MAX_HUFFMAN_BITS; ++i) {
code = (code + bl_count[i - 1]) << 1;
next_code[i] = (uint16_t)code;
}
for (i = 0; i < len; ++i) {
if (depth[i]) {
bits[i] = BrotliReverseBits(depth[i], next_code[depth[i]]++);
}
}
}
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

View File

@@ -9,7 +9,7 @@
#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
#define BROTLI_ENC_ENTROPY_ENCODE_H_
#include <brotli/types.h>
#include "../types.h"
#include "./port.h"
#if defined(__cplusplus) || defined(c_plusplus)

View File

@@ -10,8 +10,8 @@
#define BROTLI_ENC_ENTROPY_ENCODE_STATIC_H_
#include "../common/constants.h"
#include <brotli/port.h>
#include <brotli/types.h>
#include "../port.h"
#include "../types.h"
#include "./write_bits.h"
#if defined(__cplusplus) || defined(c_plusplus)

View File

@@ -11,8 +11,8 @@
#include <math.h>
#include <brotli/types.h>
#include <brotli/port.h>
#include "../types.h"
#include "../port.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {

View File

@@ -9,7 +9,7 @@
#ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
#define BROTLI_ENC_FIND_MATCH_LENGTH_H_
#include <brotli/types.h>
#include "../types.h"
#include "./port.h"
#if defined(__cplusplus) || defined(c_plusplus)

View File

@@ -14,7 +14,7 @@
#include "../common/constants.h"
#include "../common/dictionary.h"
#include <brotli/types.h>
#include "../types.h"
#include "./fast_log.h"
#include "./find_match_length.h"
#include "./memory.h"

View File

@@ -1,97 +0,0 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Build per-context histograms of literals, commands and distance codes. */
#include "./histogram.h"
#include "./block_splitter.h"
#include "./command.h"
#include "./context.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
typedef struct BlockSplitIterator {
const BlockSplit* split_; /* Not owned. */
size_t idx_;
size_t type_;
size_t length_;
} BlockSplitIterator;
static void InitBlockSplitIterator(BlockSplitIterator* self,
const BlockSplit* split) {
self->split_ = split;
self->idx_ = 0;
self->type_ = 0;
self->length_ = split->lengths ? split->lengths[0] : 0;
}
static void BlockSplitIteratorNext(BlockSplitIterator* self) {
if (self->length_ == 0) {
++self->idx_;
self->type_ = self->split_->types[self->idx_];
self->length_ = self->split_->lengths[self->idx_];
}
--self->length_;
}
void BrotliBuildHistogramsWithContext(
const Command* cmds, const size_t num_commands,
const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t start_pos,
size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
const ContextType* context_modes, HistogramLiteral* literal_histograms,
HistogramCommand* insert_and_copy_histograms,
HistogramDistance* copy_dist_histograms) {
size_t pos = start_pos;
BlockSplitIterator literal_it;
BlockSplitIterator insert_and_copy_it;
BlockSplitIterator dist_it;
size_t i;
InitBlockSplitIterator(&literal_it, literal_split);
InitBlockSplitIterator(&insert_and_copy_it, insert_and_copy_split);
InitBlockSplitIterator(&dist_it, dist_split);
for (i = 0; i < num_commands; ++i) {
const Command* cmd = &cmds[i];
size_t j;
BlockSplitIteratorNext(&insert_and_copy_it);
HistogramAddCommand(&insert_and_copy_histograms[insert_and_copy_it.type_],
cmd->cmd_prefix_);
for (j = cmd->insert_len_; j != 0; --j) {
size_t context;
BlockSplitIteratorNext(&literal_it);
context = context_modes ?
((literal_it.type_ << BROTLI_LITERAL_CONTEXT_BITS) +
Context(prev_byte, prev_byte2, context_modes[literal_it.type_])) :
literal_it.type_;
HistogramAddLiteral(&literal_histograms[context],
ringbuffer[pos & mask]);
prev_byte2 = prev_byte;
prev_byte = ringbuffer[pos & mask];
++pos;
}
pos += CommandCopyLen(cmd);
if (CommandCopyLen(cmd)) {
prev_byte2 = ringbuffer[(pos - 2) & mask];
prev_byte = ringbuffer[(pos - 1) & mask];
if (cmd->cmd_prefix_ >= 128) {
size_t context;
BlockSplitIteratorNext(&dist_it);
context = (dist_it.type_ << BROTLI_DISTANCE_CONTEXT_BITS) +
CommandDistanceContext(cmd);
HistogramAddDistance(&copy_dist_histograms[context],
cmd->dist_prefix_);
}
}
}
}
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

View File

@@ -12,7 +12,7 @@
#include <string.h> /* memset */
#include "../common/constants.h"
#include <brotli/types.h>
#include "../types.h"
#include "./block_splitter.h"
#include "./command.h"
#include "./context.h"

View File

@@ -1,175 +0,0 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Literal cost model to allow backward reference replacement to be efficient.
*/
#include "./literal_cost.h"
#include <brotli/types.h>
#include "./fast_log.h"
#include "./port.h"
#include "./utf8_util.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
if (c < 128) {
return 0; /* Next one is the 'Byte 1' again. */
} else if (c >= 192) { /* Next one is the 'Byte 2' of utf-8 encoding. */
return BROTLI_MIN(size_t, 1, clamp);
} else {
/* Let's decide over the last byte if this ends the sequence. */
if (last < 0xe0) {
return 0; /* Completed two or three byte coding. */
} else { /* Next one is the 'Byte 3' of utf-8 encoding. */
return BROTLI_MIN(size_t, 2, clamp);
}
}
}
static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
const uint8_t *data) {
size_t counts[3] = { 0 };
size_t max_utf8 = 1; /* should be 2, but 1 compresses better. */
size_t last_c = 0;
size_t i;
for (i = 0; i < len; ++i) {
size_t c = data[(pos + i) & mask];
++counts[UTF8Position(last_c, c, 2)];
last_c = c;
}
if (counts[2] < 500) {
max_utf8 = 1;
}
if (counts[1] + counts[2] < 25) {
max_utf8 = 0;
}
return max_utf8;
}
static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
const uint8_t *data, float *cost) {
/* max_utf8 is 0 (normal ASCII single byte modeling),
1 (for 2-byte UTF-8 modeling), or 2 (for 3-byte UTF-8 modeling). */
const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
size_t histogram[3][256] = { { 0 } };
size_t window_half = 495;
size_t in_window = BROTLI_MIN(size_t, window_half, len);
size_t in_window_utf8[3] = { 0 };
size_t i;
{ /* Bootstrap histograms. */
size_t last_c = 0;
size_t utf8_pos = 0;
for (i = 0; i < in_window; ++i) {
size_t c = data[(pos + i) & mask];
++histogram[utf8_pos][c];
++in_window_utf8[utf8_pos];
utf8_pos = UTF8Position(last_c, c, max_utf8);
last_c = c;
}
}
/* Compute bit costs with sliding window. */
for (i = 0; i < len; ++i) {
if (i >= window_half) {
/* Remove a byte in the past. */
size_t c =
i < window_half + 1 ? 0 : data[(pos + i - window_half - 1) & mask];
size_t last_c =
i < window_half + 2 ? 0 : data[(pos + i - window_half - 2) & mask];
size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
--histogram[utf8_pos2][data[(pos + i - window_half) & mask]];
--in_window_utf8[utf8_pos2];
}
if (i + window_half < len) {
/* Add a byte in the future. */
size_t c = data[(pos + i + window_half - 1) & mask];
size_t last_c = data[(pos + i + window_half - 2) & mask];
size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
++histogram[utf8_pos2][data[(pos + i + window_half) & mask]];
++in_window_utf8[utf8_pos2];
}
{
size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
size_t masked_pos = (pos + i) & mask;
size_t histo = histogram[utf8_pos][data[masked_pos]];
double lit_cost;
if (histo == 0) {
histo = 1;
}
lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
lit_cost += 0.02905;
if (lit_cost < 1.0) {
lit_cost *= 0.5;
lit_cost += 0.5;
}
/* Make the first bytes more expensive -- seems to help, not sure why.
Perhaps because the entropy source is changing its properties
rapidly in the beginning of the file, perhaps because the beginning
of the data is a statistical "anomaly". */
if (i < 2000) {
lit_cost += 0.7 - ((double)(2000 - i) / 2000.0 * 0.35);
}
cost[i] = (float)lit_cost;
}
}
}
void BrotliEstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
const uint8_t *data, float *cost) {
if (BrotliIsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, cost);
return;
} else {
size_t histogram[256] = { 0 };
size_t window_half = 2000;
size_t in_window = BROTLI_MIN(size_t, window_half, len);
/* Bootstrap histogram. */
size_t i;
for (i = 0; i < in_window; ++i) {
++histogram[data[(pos + i) & mask]];
}
/* Compute bit costs with sliding window. */
for (i = 0; i < len; ++i) {
size_t histo;
if (i >= window_half) {
/* Remove a byte in the past. */
--histogram[data[(pos + i - window_half) & mask]];
--in_window;
}
if (i + window_half < len) {
/* Add a byte in the future. */
++histogram[data[(pos + i + window_half) & mask]];
++in_window;
}
histo = histogram[data[(pos + i) & mask]];
if (histo == 0) {
histo = 1;
}
{
double lit_cost = FastLog2(in_window) - FastLog2(histo);
lit_cost += 0.029;
if (lit_cost < 1.0) {
lit_cost *= 0.5;
lit_cost += 0.5;
}
cost[i] = (float)lit_cost;
}
}
}
}
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

View File

@@ -10,7 +10,7 @@
#ifndef BROTLI_ENC_LITERAL_COST_H_
#define BROTLI_ENC_LITERAL_COST_H_
#include <brotli/types.h>
#include "../types.h"
#include "./port.h"
#if defined(__cplusplus) || defined(c_plusplus)

View File

@@ -1,181 +0,0 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Algorithms for distributing the literals and commands of a metablock between
block types and contexts. */
#include "./memory.h"
#include <assert.h>
#include <stdlib.h> /* exit, free, malloc */
#include <string.h> /* memcpy */
#include <brotli/types.h>
#include "./port.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
#define MAX_PERM_ALLOCATED 128
#define MAX_NEW_ALLOCATED 64
#define MAX_NEW_FREED 64
#define PERM_ALLOCATED_OFFSET 0
#define NEW_ALLOCATED_OFFSET MAX_PERM_ALLOCATED
#define NEW_FREED_OFFSET (MAX_PERM_ALLOCATED + MAX_NEW_ALLOCATED)
static void* DefaultAllocFunc(void* opaque, size_t size) {
BROTLI_UNUSED(opaque);
return malloc(size);
}
static void DefaultFreeFunc(void* opaque, void* address) {
BROTLI_UNUSED(opaque);
free(address);
}
void BrotliInitMemoryManager(
MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func,
void* opaque) {
if (!alloc_func) {
m->alloc_func = DefaultAllocFunc;
m->free_func = DefaultFreeFunc;
m->opaque = 0;
} else {
m->alloc_func = alloc_func;
m->free_func = free_func;
m->opaque = opaque;
}
#if !defined(BROTLI_ENCODER_EXIT_ON_OOM)
m->is_oom = BROTLI_FALSE;
m->perm_allocated = 0;
m->new_allocated = 0;
m->new_freed = 0;
#endif /* BROTLI_ENCODER_EXIT_ON_OOM */
}
#if defined(BROTLI_ENCODER_EXIT_ON_OOM)
void* BrotliAllocate(MemoryManager* m, size_t n) {
void* result = m->alloc_func(m->opaque, n);
if (!result) exit(EXIT_FAILURE);
return result;
}
void BrotliFree(MemoryManager* m, void* p) {
m->free_func(m->opaque, p);
}
void BrotliWipeOutMemoryManager(MemoryManager* m) {
BROTLI_UNUSED(m);
}
#else /* BROTLI_ENCODER_EXIT_ON_OOM */
static void SortPointers(void** items, const size_t n) {
/* Shell sort. */
static const size_t gaps[] = {23, 10, 4, 1};
int g = 0;
for (; g < 4; ++g) {
size_t gap = gaps[g];
size_t i;
for (i = gap; i < n; ++i) {
size_t j = i;
void* tmp = items[i];
for (; j >= gap && tmp < items[j - gap]; j -= gap) {
items[j] = items[j - gap];
}
items[j] = tmp;
}
}
}
static size_t Annihilate(void** a, size_t a_len, void** b, size_t b_len) {
size_t a_read_index = 0;
size_t b_read_index = 0;
size_t a_write_index = 0;
size_t b_write_index = 0;
size_t annihilated = 0;
while (a_read_index < a_len && b_read_index < b_len) {
if (a[a_read_index] == b[b_read_index]) {
a_read_index++;
b_read_index++;
annihilated++;
} else if (a[a_read_index] < b[b_read_index]) {
a[a_write_index++] = a[a_read_index++];
} else {
b[b_write_index++] = b[b_read_index++];
}
}
while (a_read_index < a_len) a[a_write_index++] = a[a_read_index++];
while (b_read_index < b_len) b[b_write_index++] = b[b_read_index++];
return annihilated;
}
static void CollectGarbagePointers(MemoryManager* m) {
size_t annihilated;
SortPointers(m->pointers + NEW_ALLOCATED_OFFSET, m->new_allocated);
SortPointers(m->pointers + NEW_FREED_OFFSET, m->new_freed);
annihilated = Annihilate(
m->pointers + NEW_ALLOCATED_OFFSET, m->new_allocated,
m->pointers + NEW_FREED_OFFSET, m->new_freed);
m->new_allocated -= annihilated;
m->new_freed -= annihilated;
if (m->new_freed != 0) {
annihilated = Annihilate(
m->pointers + PERM_ALLOCATED_OFFSET, m->perm_allocated,
m->pointers + NEW_FREED_OFFSET, m->new_freed);
m->perm_allocated -= annihilated;
m->new_freed -= annihilated;
assert(m->new_freed == 0);
}
if (m->new_allocated != 0) {
assert(m->perm_allocated + m->new_allocated <= MAX_PERM_ALLOCATED);
memcpy(m->pointers + PERM_ALLOCATED_OFFSET + m->perm_allocated,
m->pointers + NEW_ALLOCATED_OFFSET,
sizeof(void*) * m->new_allocated);
m->perm_allocated += m->new_allocated;
m->new_allocated = 0;
SortPointers(m->pointers + PERM_ALLOCATED_OFFSET, m->perm_allocated);
}
}
void* BrotliAllocate(MemoryManager* m, size_t n) {
void* result = m->alloc_func(m->opaque, n);
if (!result) {
m->is_oom = BROTLI_TRUE;
return NULL;
}
if (m->new_allocated == MAX_NEW_ALLOCATED) CollectGarbagePointers(m);
m->pointers[NEW_ALLOCATED_OFFSET + (m->new_allocated++)] = result;
return result;
}
void BrotliFree(MemoryManager* m, void* p) {
if (!p) return;
m->free_func(m->opaque, p);
if (m->new_freed == MAX_NEW_FREED) CollectGarbagePointers(m);
m->pointers[NEW_FREED_OFFSET + (m->new_freed++)] = p;
}
void BrotliWipeOutMemoryManager(MemoryManager* m) {
size_t i;
CollectGarbagePointers(m);
/* Now all unfreed pointers are in perm-allocated list. */
for (i = 0; i < m->perm_allocated; ++i) {
m->free_func(m->opaque, m->pointers[PERM_ALLOCATED_OFFSET + i]);
}
m->perm_allocated = 0;
}
#endif /* BROTLI_ENCODER_EXIT_ON_OOM */
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

View File

@@ -9,7 +9,7 @@
#ifndef BROTLI_ENC_MEMORY_H_
#define BROTLI_ENC_MEMORY_H_
#include <brotli/types.h>
#include "../types.h"
#include "./port.h"
#if defined(__cplusplus) || defined(c_plusplus)

View File

@@ -1,528 +0,0 @@
/* Copyright 2015 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Algorithms for distributing the literals and commands of a metablock between
block types and contexts. */
#include "./metablock.h"
#include "../common/constants.h"
#include <brotli/types.h>
#include "./bit_cost.h"
#include "./block_splitter.h"
#include "./cluster.h"
#include "./context.h"
#include "./entropy_encode.h"
#include "./histogram.h"
#include "./memory.h"
#include "./port.h"
#include "./quality.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
void BrotliBuildMetaBlock(MemoryManager* m,
const uint8_t* ringbuffer,
const size_t pos,
const size_t mask,
const BrotliEncoderParams* params,
uint8_t prev_byte,
uint8_t prev_byte2,
const Command* cmds,
size_t num_commands,
ContextType literal_context_mode,
MetaBlockSplit* mb) {
/* Histogram ids need to fit in one byte. */
static const size_t kMaxNumberOfHistograms = 256;
HistogramDistance* distance_histograms;
HistogramLiteral* literal_histograms;
ContextType* literal_context_modes = NULL;
size_t literal_histograms_size;
size_t distance_histograms_size;
size_t i;
size_t literal_context_multiplier = 1;
BrotliSplitBlock(m, cmds, num_commands,
ringbuffer, pos, mask, params,
&mb->literal_split,
&mb->command_split,
&mb->distance_split);
if (BROTLI_IS_OOM(m)) return;
if (!params->disable_literal_context_modeling) {
literal_context_multiplier = 1 << BROTLI_LITERAL_CONTEXT_BITS;
literal_context_modes =
BROTLI_ALLOC(m, ContextType, mb->literal_split.num_types);
if (BROTLI_IS_OOM(m)) return;
for (i = 0; i < mb->literal_split.num_types; ++i) {
literal_context_modes[i] = literal_context_mode;
}
}
literal_histograms_size =
mb->literal_split.num_types * literal_context_multiplier;
literal_histograms =
BROTLI_ALLOC(m, HistogramLiteral, literal_histograms_size);
if (BROTLI_IS_OOM(m)) return;
ClearHistogramsLiteral(literal_histograms, literal_histograms_size);
distance_histograms_size =
mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
distance_histograms =
BROTLI_ALLOC(m, HistogramDistance, distance_histograms_size);
if (BROTLI_IS_OOM(m)) return;
ClearHistogramsDistance(distance_histograms, distance_histograms_size);
assert(mb->command_histograms == 0);
mb->command_histograms_size = mb->command_split.num_types;
mb->command_histograms =
BROTLI_ALLOC(m, HistogramCommand, mb->command_histograms_size);
if (BROTLI_IS_OOM(m)) return;
ClearHistogramsCommand(mb->command_histograms, mb->command_histograms_size);
BrotliBuildHistogramsWithContext(cmds, num_commands,
&mb->literal_split, &mb->command_split, &mb->distance_split,
ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_modes,
literal_histograms, mb->command_histograms, distance_histograms);
BROTLI_FREE(m, literal_context_modes);
assert(mb->literal_context_map == 0);
mb->literal_context_map_size =
mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
mb->literal_context_map =
BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
if (BROTLI_IS_OOM(m)) return;
assert(mb->literal_histograms == 0);
mb->literal_histograms_size = mb->literal_context_map_size;
mb->literal_histograms =
BROTLI_ALLOC(m, HistogramLiteral, mb->literal_histograms_size);
if (BROTLI_IS_OOM(m)) return;
BrotliClusterHistogramsLiteral(m, literal_histograms, literal_histograms_size,
kMaxNumberOfHistograms, mb->literal_histograms,
&mb->literal_histograms_size, mb->literal_context_map);
if (BROTLI_IS_OOM(m)) return;
BROTLI_FREE(m, literal_histograms);
if (params->disable_literal_context_modeling) {
/* Distribute assignment to all contexts. */
for (i = mb->literal_split.num_types; i != 0;) {
size_t j = 0;
i--;
for (; j < (1 << BROTLI_LITERAL_CONTEXT_BITS); j++) {
mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] =
mb->literal_context_map[i];
}
}
}
assert(mb->distance_context_map == 0);
mb->distance_context_map_size =
mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
mb->distance_context_map =
BROTLI_ALLOC(m, uint32_t, mb->distance_context_map_size);
if (BROTLI_IS_OOM(m)) return;
assert(mb->distance_histograms == 0);
mb->distance_histograms_size = mb->distance_context_map_size;
mb->distance_histograms =
BROTLI_ALLOC(m, HistogramDistance, mb->distance_histograms_size);
if (BROTLI_IS_OOM(m)) return;
BrotliClusterHistogramsDistance(m, distance_histograms,
mb->distance_context_map_size,
kMaxNumberOfHistograms,
mb->distance_histograms,
&mb->distance_histograms_size,
mb->distance_context_map);
if (BROTLI_IS_OOM(m)) return;
BROTLI_FREE(m, distance_histograms);
}
#define FN(X) X ## Literal
#include "./metablock_inc.h" /* NOLINT(build/include) */
#undef FN
#define FN(X) X ## Command
#include "./metablock_inc.h" /* NOLINT(build/include) */
#undef FN
#define FN(X) X ## Distance
#include "./metablock_inc.h" /* NOLINT(build/include) */
#undef FN
#define BROTLI_MAX_STATIC_CONTEXTS 13
/* Greedy block splitter for one block category (literal, command or distance).
Gathers histograms for all context buckets. */
typedef struct ContextBlockSplitter {
/* Alphabet size of particular block category. */
size_t alphabet_size_;
size_t num_contexts_;
size_t max_block_types_;
/* We collect at least this many symbols for each block. */
size_t min_block_size_;
/* We merge histograms A and B if
entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
where A is the current histogram and B is the histogram of the last or the
second last block type. */
double split_threshold_;
size_t num_blocks_;
BlockSplit* split_; /* not owned */
HistogramLiteral* histograms_; /* not owned */
size_t* histograms_size_; /* not owned */
/* The number of symbols that we want to collect before deciding on whether
or not to merge the block with a previous one or emit a new block. */
size_t target_block_size_;
/* The number of symbols in the current histogram. */
size_t block_size_;
/* Offset of the current histogram. */
size_t curr_histogram_ix_;
/* Offset of the histograms of the previous two block types. */
size_t last_histogram_ix_[2];
/* Entropy of the previous two block types. */
double last_entropy_[2 * BROTLI_MAX_STATIC_CONTEXTS];
/* The number of times we merged the current block with the last one. */
size_t merge_last_count_;
} ContextBlockSplitter;
static void InitContextBlockSplitter(
MemoryManager* m, ContextBlockSplitter* self, size_t alphabet_size,
size_t num_contexts, size_t min_block_size, double split_threshold,
size_t num_symbols, BlockSplit* split, HistogramLiteral** histograms,
size_t* histograms_size) {
size_t max_num_blocks = num_symbols / min_block_size + 1;
size_t max_num_types;
assert(num_contexts <= BROTLI_MAX_STATIC_CONTEXTS);
self->alphabet_size_ = alphabet_size;
self->num_contexts_ = num_contexts;
self->max_block_types_ = BROTLI_MAX_NUMBER_OF_BLOCK_TYPES / num_contexts;
self->min_block_size_ = min_block_size;
self->split_threshold_ = split_threshold;
self->num_blocks_ = 0;
self->split_ = split;
self->histograms_size_ = histograms_size;
self->target_block_size_ = min_block_size;
self->block_size_ = 0;
self->curr_histogram_ix_ = 0;
self->merge_last_count_ = 0;
/* We have to allocate one more histogram than the maximum number of block
types for the current histogram when the meta-block is too big. */
max_num_types =
BROTLI_MIN(size_t, max_num_blocks, self->max_block_types_ + 1);
BROTLI_ENSURE_CAPACITY(m, uint8_t,
split->types, split->types_alloc_size, max_num_blocks);
BROTLI_ENSURE_CAPACITY(m, uint32_t,
split->lengths, split->lengths_alloc_size, max_num_blocks);
if (BROTLI_IS_OOM(m)) return;
split->num_blocks = max_num_blocks;
if (BROTLI_IS_OOM(m)) return;
assert(*histograms == 0);
*histograms_size = max_num_types * num_contexts;
*histograms = BROTLI_ALLOC(m, HistogramLiteral, *histograms_size);
self->histograms_ = *histograms;
if (BROTLI_IS_OOM(m)) return;
/* Clear only current histogram. */
ClearHistogramsLiteral(&self->histograms_[0], num_contexts);
self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
}
/* Does either of three things:
(1) emits the current block with a new block type;
(2) emits the current block with the type of the second last block;
(3) merges the current block with the last block. */
static void ContextBlockSplitterFinishBlock(
ContextBlockSplitter* self, MemoryManager* m, BROTLI_BOOL is_final) {
BlockSplit* split = self->split_;
const size_t num_contexts = self->num_contexts_;
double* last_entropy = self->last_entropy_;
HistogramLiteral* histograms = self->histograms_;
if (self->block_size_ < self->min_block_size_) {
self->block_size_ = self->min_block_size_;
}
if (self->num_blocks_ == 0) {
size_t i;
/* Create first block. */
split->lengths[0] = (uint32_t)self->block_size_;
split->types[0] = 0;
for (i = 0; i < num_contexts; ++i) {
last_entropy[i] =
BitsEntropy(histograms[i].data_, self->alphabet_size_);
last_entropy[num_contexts + i] = last_entropy[i];
}
++self->num_blocks_;
++split->num_types;
self->curr_histogram_ix_ += num_contexts;
if (self->curr_histogram_ix_ < *self->histograms_size_) {
ClearHistogramsLiteral(
&self->histograms_[self->curr_histogram_ix_], self->num_contexts_);
}
self->block_size_ = 0;
} else if (self->block_size_ > 0) {
/* Try merging the set of histograms for the current block type with the
respective set of histograms for the last and second last block types.
Decide over the split based on the total reduction of entropy across
all contexts. */
double entropy[BROTLI_MAX_STATIC_CONTEXTS];
HistogramLiteral* combined_histo =
BROTLI_ALLOC(m, HistogramLiteral, 2 * num_contexts);
double combined_entropy[2 * BROTLI_MAX_STATIC_CONTEXTS];
double diff[2] = { 0.0 };
size_t i;
if (BROTLI_IS_OOM(m)) return;
for (i = 0; i < num_contexts; ++i) {
size_t curr_histo_ix = self->curr_histogram_ix_ + i;
size_t j;
entropy[i] = BitsEntropy(histograms[curr_histo_ix].data_,
self->alphabet_size_);
for (j = 0; j < 2; ++j) {
size_t jx = j * num_contexts + i;
size_t last_histogram_ix = self->last_histogram_ix_[j] + i;
combined_histo[jx] = histograms[curr_histo_ix];
HistogramAddHistogramLiteral(&combined_histo[jx],
&histograms[last_histogram_ix]);
combined_entropy[jx] = BitsEntropy(
&combined_histo[jx].data_[0], self->alphabet_size_);
diff[j] += combined_entropy[jx] - entropy[i] - last_entropy[jx];
}
}
if (split->num_types < self->max_block_types_ &&
diff[0] > self->split_threshold_ &&
diff[1] > self->split_threshold_) {
/* Create new block. */
split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
split->types[self->num_blocks_] = (uint8_t)split->num_types;
self->last_histogram_ix_[1] = self->last_histogram_ix_[0];
self->last_histogram_ix_[0] = split->num_types * num_contexts;
for (i = 0; i < num_contexts; ++i) {
last_entropy[num_contexts + i] = last_entropy[i];
last_entropy[i] = entropy[i];
}
++self->num_blocks_;
++split->num_types;
self->curr_histogram_ix_ += num_contexts;
if (self->curr_histogram_ix_ < *self->histograms_size_) {
ClearHistogramsLiteral(
&self->histograms_[self->curr_histogram_ix_], self->num_contexts_);
}
self->block_size_ = 0;
self->merge_last_count_ = 0;
self->target_block_size_ = self->min_block_size_;
} else if (diff[1] < diff[0] - 20.0) {
/* Combine this block with second last block. */
split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
for (i = 0; i < num_contexts; ++i) {
histograms[self->last_histogram_ix_[0] + i] =
combined_histo[num_contexts + i];
last_entropy[num_contexts + i] = last_entropy[i];
last_entropy[i] = combined_entropy[num_contexts + i];
HistogramClearLiteral(&histograms[self->curr_histogram_ix_ + i]);
}
++self->num_blocks_;
self->block_size_ = 0;
self->merge_last_count_ = 0;
self->target_block_size_ = self->min_block_size_;
} else {
/* Combine this block with last block. */
split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
for (i = 0; i < num_contexts; ++i) {
histograms[self->last_histogram_ix_[0] + i] = combined_histo[i];
last_entropy[i] = combined_entropy[i];
if (split->num_types == 1) {
last_entropy[num_contexts + i] = last_entropy[i];
}
HistogramClearLiteral(&histograms[self->curr_histogram_ix_ + i]);
}
self->block_size_ = 0;
if (++self->merge_last_count_ > 1) {
self->target_block_size_ += self->min_block_size_;
}
}
BROTLI_FREE(m, combined_histo);
}
if (is_final) {
*self->histograms_size_ = split->num_types * num_contexts;
split->num_blocks = self->num_blocks_;
}
}
/* Adds the next symbol to the current block type and context. When the
current block reaches the target size, decides on merging the block. */
static void ContextBlockSplitterAddSymbol(
ContextBlockSplitter* self, MemoryManager* m,
size_t symbol, size_t context) {
HistogramAddLiteral(&self->histograms_[self->curr_histogram_ix_ + context],
symbol);
++self->block_size_;
if (self->block_size_ == self->target_block_size_) {
ContextBlockSplitterFinishBlock(self, m, /* is_final = */ BROTLI_FALSE);
if (BROTLI_IS_OOM(m)) return;
}
}
static void MapStaticContexts(MemoryManager* m,
size_t num_contexts,
const uint32_t* static_context_map,
MetaBlockSplit* mb) {
size_t i;
assert(mb->literal_context_map == 0);
mb->literal_context_map_size =
mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
mb->literal_context_map =
BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
if (BROTLI_IS_OOM(m)) return;
for (i = 0; i < mb->literal_split.num_types; ++i) {
uint32_t offset = (uint32_t)(i * num_contexts);
size_t j;
for (j = 0; j < (1u << BROTLI_LITERAL_CONTEXT_BITS); ++j) {
mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] =
offset + static_context_map[j];
}
}
}
static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal(
MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
uint8_t prev_byte, uint8_t prev_byte2, ContextType literal_context_mode,
const size_t num_contexts, const uint32_t* static_context_map,
const Command *commands, size_t n_commands, MetaBlockSplit* mb) {
union {
BlockSplitterLiteral plain;
ContextBlockSplitter ctx;
} lit_blocks;
BlockSplitterCommand cmd_blocks;
BlockSplitterDistance dist_blocks;
size_t num_literals = 0;
size_t i;
for (i = 0; i < n_commands; ++i) {
num_literals += commands[i].insert_len_;
}
if (num_contexts == 1) {
InitBlockSplitterLiteral(m, &lit_blocks.plain, 256, 512, 400.0,
num_literals, &mb->literal_split, &mb->literal_histograms,
&mb->literal_histograms_size);
} else {
InitContextBlockSplitter(m, &lit_blocks.ctx, 256, num_contexts, 512, 400.0,
num_literals, &mb->literal_split, &mb->literal_histograms,
&mb->literal_histograms_size);
}
if (BROTLI_IS_OOM(m)) return;
InitBlockSplitterCommand(m, &cmd_blocks, BROTLI_NUM_COMMAND_SYMBOLS, 1024,
500.0, n_commands, &mb->command_split, &mb->command_histograms,
&mb->command_histograms_size);
if (BROTLI_IS_OOM(m)) return;
InitBlockSplitterDistance(m, &dist_blocks, 64, 512, 100.0, n_commands,
&mb->distance_split, &mb->distance_histograms,
&mb->distance_histograms_size);
if (BROTLI_IS_OOM(m)) return;
for (i = 0; i < n_commands; ++i) {
const Command cmd = commands[i];
size_t j;
BlockSplitterAddSymbolCommand(&cmd_blocks, cmd.cmd_prefix_);
for (j = cmd.insert_len_; j != 0; --j) {
uint8_t literal = ringbuffer[pos & mask];
if (num_contexts == 1) {
BlockSplitterAddSymbolLiteral(&lit_blocks.plain, literal);
} else {
size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
ContextBlockSplitterAddSymbol(&lit_blocks.ctx, m, literal,
static_context_map[context]);
if (BROTLI_IS_OOM(m)) return;
}
prev_byte2 = prev_byte;
prev_byte = literal;
++pos;
}
pos += CommandCopyLen(&cmd);
if (CommandCopyLen(&cmd)) {
prev_byte2 = ringbuffer[(pos - 2) & mask];
prev_byte = ringbuffer[(pos - 1) & mask];
if (cmd.cmd_prefix_ >= 128) {
BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_);
}
}
}
if (num_contexts == 1) {
BlockSplitterFinishBlockLiteral(
&lit_blocks.plain, /* is_final = */ BROTLI_TRUE);
} else {
ContextBlockSplitterFinishBlock(
&lit_blocks.ctx, m, /* is_final = */ BROTLI_TRUE);
if (BROTLI_IS_OOM(m)) return;
}
BlockSplitterFinishBlockCommand(&cmd_blocks, /* is_final = */ BROTLI_TRUE);
BlockSplitterFinishBlockDistance(&dist_blocks, /* is_final = */ BROTLI_TRUE);
if (num_contexts > 1) {
MapStaticContexts(m, num_contexts, static_context_map, mb);
}
}
void BrotliBuildMetaBlockGreedy(MemoryManager* m,
const uint8_t* ringbuffer,
size_t pos,
size_t mask,
uint8_t prev_byte,
uint8_t prev_byte2,
ContextType literal_context_mode,
size_t num_contexts,
const uint32_t* static_context_map,
const Command* commands,
size_t n_commands,
MetaBlockSplit* mb) {
if (num_contexts == 1) {
BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte,
prev_byte2, literal_context_mode, 1, NULL, commands, n_commands, mb);
} else {
BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte,
prev_byte2, literal_context_mode, num_contexts, static_context_map,
commands, n_commands, mb);
}
}
void BrotliOptimizeHistograms(size_t num_direct_distance_codes,
size_t distance_postfix_bits,
MetaBlockSplit* mb) {
uint8_t good_for_rle[BROTLI_NUM_COMMAND_SYMBOLS];
size_t num_distance_codes;
size_t i;
for (i = 0; i < mb->literal_histograms_size; ++i) {
BrotliOptimizeHuffmanCountsForRle(256, mb->literal_histograms[i].data_,
good_for_rle);
}
for (i = 0; i < mb->command_histograms_size; ++i) {
BrotliOptimizeHuffmanCountsForRle(BROTLI_NUM_COMMAND_SYMBOLS,
mb->command_histograms[i].data_,
good_for_rle);
}
num_distance_codes = BROTLI_NUM_DISTANCE_SHORT_CODES +
num_direct_distance_codes +
((2 * BROTLI_MAX_DISTANCE_BITS) << distance_postfix_bits);
for (i = 0; i < mb->distance_histograms_size; ++i) {
BrotliOptimizeHuffmanCountsForRle(num_distance_codes,
mb->distance_histograms[i].data_,
good_for_rle);
}
}
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

View File

@@ -10,7 +10,7 @@
#ifndef BROTLI_ENC_METABLOCK_H_
#define BROTLI_ENC_METABLOCK_H_
#include <brotli/types.h>
#include "../types.h"
#include "./block_splitter.h"
#include "./command.h"
#include "./context.h"

View File

@@ -12,8 +12,8 @@
#include <assert.h>
#include <string.h> /* memcpy */
#include <brotli/port.h>
#include <brotli/types.h>
#include "../port.h"
#include "../types.h"
#if defined OS_LINUX || defined OS_CYGWIN
#include <endian.h>

View File

@@ -11,8 +11,8 @@
#define BROTLI_ENC_PREFIX_H_
#include "../common/constants.h"
#include <brotli/port.h>
#include <brotli/types.h>
#include "../port.h"
#include "../types.h"
#include "./fast_log.h"
#if defined(__cplusplus) || defined(c_plusplus)

View File

@@ -10,7 +10,7 @@
#ifndef BROTLI_ENC_QUALITY_H_
#define BROTLI_ENC_QUALITY_H_
#include <brotli/encode.h>
#include "../encode.h"
#define FAST_ONE_PASS_COMPRESSION_QUALITY 0
#define FAST_TWO_PASS_COMPRESSION_QUALITY 1

View File

@@ -11,7 +11,7 @@
#include <string.h> /* memcpy */
#include <brotli/types.h>
#include "../types.h"
#include "./memory.h"
#include "./port.h"
#include "./quality.h"

View File

@@ -1,482 +0,0 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
#include "./static_dict.h"
#include "../common/dictionary.h"
#include "./find_match_length.h"
#include "./port.h"
#include "./static_dict_lut.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
static const uint8_t kUppercaseFirst = 10;
static const uint8_t kOmitLastNTransforms[10] = {
0, 12, 27, 23, 42, 63, 56, 48, 59, 64,
};
static BROTLI_INLINE uint32_t Hash(const uint8_t *data) {
uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kDictHashMul32;
/* The higher bits contain more mixture from the multiplication,
so we take our results from there. */
return h >> (32 - kDictNumBits);
}
static BROTLI_INLINE void AddMatch(size_t distance, size_t len, size_t len_code,
uint32_t* matches) {
uint32_t match = (uint32_t)((distance << 5) + len_code);
matches[len] = BROTLI_MIN(uint32_t, matches[len], match);
}
static BROTLI_INLINE size_t DictMatchLength(const BrotliDictionary* dictionary,
const uint8_t* data,
size_t id,
size_t len,
size_t maxlen) {
const size_t offset = dictionary->offsets_by_length[len] + len * id;
return FindMatchLengthWithLimit(&dictionary->data[offset], data,
BROTLI_MIN(size_t, len, maxlen));
}
static BROTLI_INLINE BROTLI_BOOL IsMatch(const BrotliDictionary* dictionary,
DictWord w, const uint8_t* data, size_t max_length) {
if (w.len > max_length) {
return BROTLI_FALSE;
} else {
const size_t offset = dictionary->offsets_by_length[w.len] +
(size_t)w.len * (size_t)w.idx;
const uint8_t* dict = &dictionary->data[offset];
if (w.transform == 0) {
/* Match against base dictionary word. */
return
TO_BROTLI_BOOL(FindMatchLengthWithLimit(dict, data, w.len) == w.len);
} else if (w.transform == 10) {
/* Match against uppercase first transform.
Note that there are only ASCII uppercase words in the lookup table. */
return TO_BROTLI_BOOL(dict[0] >= 'a' && dict[0] <= 'z' &&
(dict[0] ^ 32) == data[0] &&
FindMatchLengthWithLimit(&dict[1], &data[1], w.len - 1u) ==
w.len - 1u);
} else {
/* Match against uppercase all transform.
Note that there are only ASCII uppercase words in the lookup table. */
size_t i;
for (i = 0; i < w.len; ++i) {
if (dict[i] >= 'a' && dict[i] <= 'z') {
if ((dict[i] ^ 32) != data[i]) return BROTLI_FALSE;
} else {
if (dict[i] != data[i]) return BROTLI_FALSE;
}
}
return BROTLI_TRUE;
}
}
}
BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
const BrotliDictionary* dictionary, const uint8_t* data, size_t min_length,
size_t max_length, uint32_t* matches) {
BROTLI_BOOL has_found_match = BROTLI_FALSE;
{
size_t offset = kStaticDictionaryBuckets[Hash(data)];
BROTLI_BOOL end = !offset;
while (!end) {
DictWord w = kStaticDictionaryWords[offset++];
const size_t l = w.len & 0x1F;
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
const size_t id = w.idx;
end = !!(w.len & 0x80);
w.len = (uint8_t)l;
if (w.transform == 0) {
const size_t matchlen =
DictMatchLength(dictionary, data, id, l, max_length);
const uint8_t* s;
size_t minlen;
size_t maxlen;
size_t len;
/* Transform "" + kIdentity + "" */
if (matchlen == l) {
AddMatch(id, l, l, matches);
has_found_match = BROTLI_TRUE;
}
/* Transforms "" + kOmitLast1 + "" and "" + kOmitLast1 + "ing " */
if (matchlen >= l - 1) {
AddMatch(id + 12 * n, l - 1, l, matches);
if (l + 2 < max_length &&
data[l - 1] == 'i' && data[l] == 'n' && data[l + 1] == 'g' &&
data[l + 2] == ' ') {
AddMatch(id + 49 * n, l + 3, l, matches);
}
has_found_match = BROTLI_TRUE;
}
/* Transform "" + kOmitLastN + "" (N = 2 .. 9) */
minlen = min_length;
if (l > 9) minlen = BROTLI_MAX(size_t, minlen, l - 9);
maxlen = BROTLI_MIN(size_t, matchlen, l - 2);
for (len = minlen; len <= maxlen; ++len) {
AddMatch(id + kOmitLastNTransforms[l - len] * n, len, l, matches);
has_found_match = BROTLI_TRUE;
}
if (matchlen < l || l + 6 >= max_length) {
continue;
}
s = &data[l];
/* Transforms "" + kIdentity + <suffix> */
if (s[0] == ' ') {
AddMatch(id + n, l + 1, l, matches);
if (s[1] == 'a') {
if (s[2] == ' ') {
AddMatch(id + 28 * n, l + 3, l, matches);
} else if (s[2] == 's') {
if (s[3] == ' ') AddMatch(id + 46 * n, l + 4, l, matches);
} else if (s[2] == 't') {
if (s[3] == ' ') AddMatch(id + 60 * n, l + 4, l, matches);
} else if (s[2] == 'n') {
if (s[3] == 'd' && s[4] == ' ') {
AddMatch(id + 10 * n, l + 5, l, matches);
}
}
} else if (s[1] == 'b') {
if (s[2] == 'y' && s[3] == ' ') {
AddMatch(id + 38 * n, l + 4, l, matches);
}
} else if (s[1] == 'i') {
if (s[2] == 'n') {
if (s[3] == ' ') AddMatch(id + 16 * n, l + 4, l, matches);
} else if (s[2] == 's') {
if (s[3] == ' ') AddMatch(id + 47 * n, l + 4, l, matches);
}
} else if (s[1] == 'f') {
if (s[2] == 'o') {
if (s[3] == 'r' && s[4] == ' ') {
AddMatch(id + 25 * n, l + 5, l, matches);
}
} else if (s[2] == 'r') {
if (s[3] == 'o' && s[4] == 'm' && s[5] == ' ') {
AddMatch(id + 37 * n, l + 6, l, matches);
}
}
} else if (s[1] == 'o') {
if (s[2] == 'f') {
if (s[3] == ' ') AddMatch(id + 8 * n, l + 4, l, matches);
} else if (s[2] == 'n') {
if (s[3] == ' ') AddMatch(id + 45 * n, l + 4, l, matches);
}
} else if (s[1] == 'n') {
if (s[2] == 'o' && s[3] == 't' && s[4] == ' ') {
AddMatch(id + 80 * n, l + 5, l, matches);
}
} else if (s[1] == 't') {
if (s[2] == 'h') {
if (s[3] == 'e') {
if (s[4] == ' ') AddMatch(id + 5 * n, l + 5, l, matches);
} else if (s[3] == 'a') {
if (s[4] == 't' && s[5] == ' ') {
AddMatch(id + 29 * n, l + 6, l, matches);
}
}
} else if (s[2] == 'o') {
if (s[3] == ' ') AddMatch(id + 17 * n, l + 4, l, matches);
}
} else if (s[1] == 'w') {
if (s[2] == 'i' && s[3] == 't' && s[4] == 'h' && s[5] == ' ') {
AddMatch(id + 35 * n, l + 6, l, matches);
}
}
} else if (s[0] == '"') {
AddMatch(id + 19 * n, l + 1, l, matches);
if (s[1] == '>') {
AddMatch(id + 21 * n, l + 2, l, matches);
}
} else if (s[0] == '.') {
AddMatch(id + 20 * n, l + 1, l, matches);
if (s[1] == ' ') {
AddMatch(id + 31 * n, l + 2, l, matches);
if (s[2] == 'T' && s[3] == 'h') {
if (s[4] == 'e') {
if (s[5] == ' ') AddMatch(id + 43 * n, l + 6, l, matches);
} else if (s[4] == 'i') {
if (s[5] == 's' && s[6] == ' ') {
AddMatch(id + 75 * n, l + 7, l, matches);
}
}
}
}
} else if (s[0] == ',') {
AddMatch(id + 76 * n, l + 1, l, matches);
if (s[1] == ' ') {
AddMatch(id + 14 * n, l + 2, l, matches);
}
} else if (s[0] == '\n') {
AddMatch(id + 22 * n, l + 1, l, matches);
if (s[1] == '\t') {
AddMatch(id + 50 * n, l + 2, l, matches);
}
} else if (s[0] == ']') {
AddMatch(id + 24 * n, l + 1, l, matches);
} else if (s[0] == '\'') {
AddMatch(id + 36 * n, l + 1, l, matches);
} else if (s[0] == ':') {
AddMatch(id + 51 * n, l + 1, l, matches);
} else if (s[0] == '(') {
AddMatch(id + 57 * n, l + 1, l, matches);
} else if (s[0] == '=') {
if (s[1] == '"') {
AddMatch(id + 70 * n, l + 2, l, matches);
} else if (s[1] == '\'') {
AddMatch(id + 86 * n, l + 2, l, matches);
}
} else if (s[0] == 'a') {
if (s[1] == 'l' && s[2] == ' ') {
AddMatch(id + 84 * n, l + 3, l, matches);
}
} else if (s[0] == 'e') {
if (s[1] == 'd') {
if (s[2] == ' ') AddMatch(id + 53 * n, l + 3, l, matches);
} else if (s[1] == 'r') {
if (s[2] == ' ') AddMatch(id + 82 * n, l + 3, l, matches);
} else if (s[1] == 's') {
if (s[2] == 't' && s[3] == ' ') {
AddMatch(id + 95 * n, l + 4, l, matches);
}
}
} else if (s[0] == 'f') {
if (s[1] == 'u' && s[2] == 'l' && s[3] == ' ') {
AddMatch(id + 90 * n, l + 4, l, matches);
}
} else if (s[0] == 'i') {
if (s[1] == 'v') {
if (s[2] == 'e' && s[3] == ' ') {
AddMatch(id + 92 * n, l + 4, l, matches);
}
} else if (s[1] == 'z') {
if (s[2] == 'e' && s[3] == ' ') {
AddMatch(id + 100 * n, l + 4, l, matches);
}
}
} else if (s[0] == 'l') {
if (s[1] == 'e') {
if (s[2] == 's' && s[3] == 's' && s[4] == ' ') {
AddMatch(id + 93 * n, l + 5, l, matches);
}
} else if (s[1] == 'y') {
if (s[2] == ' ') AddMatch(id + 61 * n, l + 3, l, matches);
}
} else if (s[0] == 'o') {
if (s[1] == 'u' && s[2] == 's' && s[3] == ' ') {
AddMatch(id + 106 * n, l + 4, l, matches);
}
}
} else {
/* Set is_all_caps=0 for kUppercaseFirst and
is_all_caps=1 otherwise (kUppercaseAll) transform. */
const BROTLI_BOOL is_all_caps =
TO_BROTLI_BOOL(w.transform != kUppercaseFirst);
const uint8_t* s;
if (!IsMatch(dictionary, w, data, max_length)) {
continue;
}
/* Transform "" + kUppercase{First,All} + "" */
AddMatch(id + (is_all_caps ? 44 : 9) * n, l, l, matches);
has_found_match = BROTLI_TRUE;
if (l + 1 >= max_length) {
continue;
}
/* Transforms "" + kUppercase{First,All} + <suffix> */
s = &data[l];
if (s[0] == ' ') {
AddMatch(id + (is_all_caps ? 68 : 4) * n, l + 1, l, matches);
} else if (s[0] == '"') {
AddMatch(id + (is_all_caps ? 87 : 66) * n, l + 1, l, matches);
if (s[1] == '>') {
AddMatch(id + (is_all_caps ? 97 : 69) * n, l + 2, l, matches);
}
} else if (s[0] == '.') {
AddMatch(id + (is_all_caps ? 101 : 79) * n, l + 1, l, matches);
if (s[1] == ' ') {
AddMatch(id + (is_all_caps ? 114 : 88) * n, l + 2, l, matches);
}
} else if (s[0] == ',') {
AddMatch(id + (is_all_caps ? 112 : 99) * n, l + 1, l, matches);
if (s[1] == ' ') {
AddMatch(id + (is_all_caps ? 107 : 58) * n, l + 2, l, matches);
}
} else if (s[0] == '\'') {
AddMatch(id + (is_all_caps ? 94 : 74) * n, l + 1, l, matches);
} else if (s[0] == '(') {
AddMatch(id + (is_all_caps ? 113 : 78) * n, l + 1, l, matches);
} else if (s[0] == '=') {
if (s[1] == '"') {
AddMatch(id + (is_all_caps ? 105 : 104) * n, l + 2, l, matches);
} else if (s[1] == '\'') {
AddMatch(id + (is_all_caps ? 116 : 108) * n, l + 2, l, matches);
}
}
}
}
}
/* Transforms with prefixes " " and "." */
if (max_length >= 5 && (data[0] == ' ' || data[0] == '.')) {
BROTLI_BOOL is_space = TO_BROTLI_BOOL(data[0] == ' ');
size_t offset = kStaticDictionaryBuckets[Hash(&data[1])];
BROTLI_BOOL end = !offset;
while (!end) {
DictWord w = kStaticDictionaryWords[offset++];
const size_t l = w.len & 0x1F;
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
const size_t id = w.idx;
end = !!(w.len & 0x80);
w.len = (uint8_t)l;
if (w.transform == 0) {
const uint8_t* s;
if (!IsMatch(dictionary, w, &data[1], max_length - 1)) {
continue;
}
/* Transforms " " + kIdentity + "" and "." + kIdentity + "" */
AddMatch(id + (is_space ? 6 : 32) * n, l + 1, l, matches);
has_found_match = BROTLI_TRUE;
if (l + 2 >= max_length) {
continue;
}
/* Transforms " " + kIdentity + <suffix> and "." + kIdentity + <suffix>
*/
s = &data[l + 1];
if (s[0] == ' ') {
AddMatch(id + (is_space ? 2 : 77) * n, l + 2, l, matches);
} else if (s[0] == '(') {
AddMatch(id + (is_space ? 89 : 67) * n, l + 2, l, matches);
} else if (is_space) {
if (s[0] == ',') {
AddMatch(id + 103 * n, l + 2, l, matches);
if (s[1] == ' ') {
AddMatch(id + 33 * n, l + 3, l, matches);
}
} else if (s[0] == '.') {
AddMatch(id + 71 * n, l + 2, l, matches);
if (s[1] == ' ') {
AddMatch(id + 52 * n, l + 3, l, matches);
}
} else if (s[0] == '=') {
if (s[1] == '"') {
AddMatch(id + 81 * n, l + 3, l, matches);
} else if (s[1] == '\'') {
AddMatch(id + 98 * n, l + 3, l, matches);
}
}
}
} else if (is_space) {
/* Set is_all_caps=0 for kUppercaseFirst and
is_all_caps=1 otherwise (kUppercaseAll) transform. */
const BROTLI_BOOL is_all_caps =
TO_BROTLI_BOOL(w.transform != kUppercaseFirst);
const uint8_t* s;
if (!IsMatch(dictionary, w, &data[1], max_length - 1)) {
continue;
}
/* Transforms " " + kUppercase{First,All} + "" */
AddMatch(id + (is_all_caps ? 85 : 30) * n, l + 1, l, matches);
has_found_match = BROTLI_TRUE;
if (l + 2 >= max_length) {
continue;
}
/* Transforms " " + kUppercase{First,All} + <suffix> */
s = &data[l + 1];
if (s[0] == ' ') {
AddMatch(id + (is_all_caps ? 83 : 15) * n, l + 2, l, matches);
} else if (s[0] == ',') {
if (!is_all_caps) {
AddMatch(id + 109 * n, l + 2, l, matches);
}
if (s[1] == ' ') {
AddMatch(id + (is_all_caps ? 111 : 65) * n, l + 3, l, matches);
}
} else if (s[0] == '.') {
AddMatch(id + (is_all_caps ? 115 : 96) * n, l + 2, l, matches);
if (s[1] == ' ') {
AddMatch(id + (is_all_caps ? 117 : 91) * n, l + 3, l, matches);
}
} else if (s[0] == '=') {
if (s[1] == '"') {
AddMatch(id + (is_all_caps ? 110 : 118) * n, l + 3, l, matches);
} else if (s[1] == '\'') {
AddMatch(id + (is_all_caps ? 119 : 120) * n, l + 3, l, matches);
}
}
}
}
}
if (max_length >= 6) {
/* Transforms with prefixes "e ", "s ", ", " and "\xc2\xa0" */
if ((data[1] == ' ' &&
(data[0] == 'e' || data[0] == 's' || data[0] == ',')) ||
(data[0] == 0xc2 && data[1] == 0xa0)) {
size_t offset = kStaticDictionaryBuckets[Hash(&data[2])];
BROTLI_BOOL end = !offset;
while (!end) {
DictWord w = kStaticDictionaryWords[offset++];
const size_t l = w.len & 0x1F;
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
const size_t id = w.idx;
end = !!(w.len & 0x80);
w.len = (uint8_t)l;
if (w.transform == 0 &&
IsMatch(dictionary, w, &data[2], max_length - 2)) {
if (data[0] == 0xc2) {
AddMatch(id + 102 * n, l + 2, l, matches);
has_found_match = BROTLI_TRUE;
} else if (l + 2 < max_length && data[l + 2] == ' ') {
size_t t = data[0] == 'e' ? 18 : (data[0] == 's' ? 7 : 13);
AddMatch(id + t * n, l + 3, l, matches);
has_found_match = BROTLI_TRUE;
}
}
}
}
}
if (max_length >= 9) {
/* Transforms with prefixes " the " and ".com/" */
if ((data[0] == ' ' && data[1] == 't' && data[2] == 'h' &&
data[3] == 'e' && data[4] == ' ') ||
(data[0] == '.' && data[1] == 'c' && data[2] == 'o' &&
data[3] == 'm' && data[4] == '/')) {
size_t offset = kStaticDictionaryBuckets[Hash(&data[5])];
BROTLI_BOOL end = !offset;
while (!end) {
DictWord w = kStaticDictionaryWords[offset++];
const size_t l = w.len & 0x1F;
const size_t n = (size_t)1 << dictionary->size_bits_by_length[l];
const size_t id = w.idx;
end = !!(w.len & 0x80);
w.len = (uint8_t)l;
if (w.transform == 0 &&
IsMatch(dictionary, w, &data[5], max_length - 5)) {
AddMatch(id + (data[0] == ' ' ? 41 : 72) * n, l + 5, l, matches);
has_found_match = BROTLI_TRUE;
if (l + 5 < max_length) {
const uint8_t* s = &data[l + 5];
if (data[0] == ' ') {
if (l + 8 < max_length &&
s[0] == ' ' && s[1] == 'o' && s[2] == 'f' && s[3] == ' ') {
AddMatch(id + 62 * n, l + 9, l, matches);
if (l + 12 < max_length &&
s[4] == 't' && s[5] == 'h' && s[6] == 'e' && s[7] == ' ') {
AddMatch(id + 73 * n, l + 13, l, matches);
}
}
}
}
}
}
}
}
return has_found_match;
}
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

View File

@@ -10,7 +10,7 @@
#define BROTLI_ENC_STATIC_DICT_H_
#include "../common/dictionary.h"
#include <brotli/types.h>
#include "../types.h"
#include "./port.h"
#if defined(__cplusplus) || defined(c_plusplus)

View File

@@ -9,7 +9,7 @@
#ifndef BROTLI_ENC_STATIC_DICT_LUT_H_
#define BROTLI_ENC_STATIC_DICT_LUT_H_
#include <brotli/types.h>
#include "../types.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {

View File

@@ -1,85 +0,0 @@
/* Copyright 2013 Google Inc. All Rights Reserved.
Distributed under MIT license.
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
*/
/* Heuristics for deciding about the UTF8-ness of strings. */
#include "./utf8_util.h"
#include <brotli/types.h>
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
static size_t BrotliParseAsUTF8(
int* symbol, const uint8_t* input, size_t size) {
/* ASCII */
if ((input[0] & 0x80) == 0) {
*symbol = input[0];
if (*symbol > 0) {
return 1;
}
}
/* 2-byte UTF8 */
if (size > 1u &&
(input[0] & 0xe0) == 0xc0 &&
(input[1] & 0xc0) == 0x80) {
*symbol = (((input[0] & 0x1f) << 6) |
(input[1] & 0x3f));
if (*symbol > 0x7f) {
return 2;
}
}
/* 3-byte UFT8 */
if (size > 2u &&
(input[0] & 0xf0) == 0xe0 &&
(input[1] & 0xc0) == 0x80 &&
(input[2] & 0xc0) == 0x80) {
*symbol = (((input[0] & 0x0f) << 12) |
((input[1] & 0x3f) << 6) |
(input[2] & 0x3f));
if (*symbol > 0x7ff) {
return 3;
}
}
/* 4-byte UFT8 */
if (size > 3u &&
(input[0] & 0xf8) == 0xf0 &&
(input[1] & 0xc0) == 0x80 &&
(input[2] & 0xc0) == 0x80 &&
(input[3] & 0xc0) == 0x80) {
*symbol = (((input[0] & 0x07) << 18) |
((input[1] & 0x3f) << 12) |
((input[2] & 0x3f) << 6) |
(input[3] & 0x3f));
if (*symbol > 0xffff && *symbol <= 0x10ffff) {
return 4;
}
}
/* Not UTF8, emit a special symbol above the UTF8-code space */
*symbol = 0x110000 | input[0];
return 1;
}
/* Returns 1 if at least min_fraction of the data is UTF8-encoded.*/
BROTLI_BOOL BrotliIsMostlyUTF8(
const uint8_t* data, const size_t pos, const size_t mask,
const size_t length, const double min_fraction) {
size_t size_utf8 = 0;
size_t i = 0;
while (i < length) {
int symbol;
size_t bytes_read =
BrotliParseAsUTF8(&symbol, &data[(pos + i) & mask], length - i);
i += bytes_read;
if (symbol < 0x110000) size_utf8 += bytes_read;
}
return TO_BROTLI_BOOL(size_utf8 > min_fraction * (double)length);
}
#if defined(__cplusplus) || defined(c_plusplus)
} /* extern "C" */
#endif

View File

@@ -9,7 +9,7 @@
#ifndef BROTLI_ENC_UTF8_UTIL_H_
#define BROTLI_ENC_UTF8_UTIL_H_
#include <brotli/types.h>
#include "../types.h"
#include "./port.h"
#if defined(__cplusplus) || defined(c_plusplus)

View File

@@ -12,7 +12,7 @@
#include <assert.h>
#include <stdio.h> /* printf */
#include <brotli/types.h>
#include "../types.h"
#include "./port.h"
#if defined(__cplusplus) || defined(c_plusplus)