mirror of
https://github.com/Xevion/easy7zip.git
synced 2025-12-07 20:06:59 -06:00
326 lines
10 KiB
C++
Executable File
326 lines
10 KiB
C++
Executable File
// Compression/HuffmanEncoder.cpp
|
|
|
|
#include "StdAfx.h"
|
|
|
|
#include "HuffmanEncoder.h"
|
|
#include "Common/Defs.h"
|
|
#include "Common/Alloc.h"
|
|
|
|
namespace NCompression {
|
|
namespace NHuffman {
|
|
|
|
static const char *kIncorrectBitLenCountsMessage = "Incorrect bit len counts";
|
|
|
|
CEncoder::CEncoder():
|
|
m_Items(0),
|
|
m_Heap(0),
|
|
m_Depth(0)
|
|
{}
|
|
|
|
void CEncoder::Free()
|
|
{
|
|
MyFree(m_Items);
|
|
MyFree(m_Heap);
|
|
MyFree(m_Depth);
|
|
m_Items = 0;
|
|
m_Heap = 0;
|
|
m_Depth = 0;
|
|
}
|
|
|
|
bool CEncoder::Create(UInt32 numSymbols,
|
|
const Byte *extraBits, UInt32 extraBase, UInt32 maxLength)
|
|
{
|
|
m_NumSymbols = numSymbols;
|
|
m_ExtraBits = extraBits;
|
|
m_ExtraBase = extraBase;
|
|
m_MaxLength = maxLength;
|
|
m_HeapSize = numSymbols * 2 + 1;
|
|
Free();
|
|
m_Items = (CItem *)MyAlloc(m_HeapSize * sizeof(CItem));
|
|
m_Heap = (UInt32 *)MyAlloc(m_HeapSize * sizeof(UInt32));
|
|
m_Depth = (Byte *)MyAlloc(m_HeapSize * sizeof(Byte));
|
|
if (m_Items == 0 || m_Heap == 0 || m_Depth == 0)
|
|
{
|
|
Free();
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
CEncoder::~CEncoder()
|
|
{
|
|
Free();
|
|
}
|
|
|
|
void CEncoder::StartNewBlock()
|
|
{
|
|
for (UInt32 i = 0; i < m_NumSymbols; i++)
|
|
m_Items[i].Freq = 0;
|
|
}
|
|
|
|
static const int kSmallest = 1;
|
|
|
|
// ===========================================================================
|
|
// Remove the smallest element from the heap and recreate the heap with
|
|
// one less element. Updates heap and m_HeapLength.
|
|
|
|
UInt32 CEncoder::RemoveSmallest()
|
|
{
|
|
UInt32 top = m_Heap[kSmallest];
|
|
m_Heap[kSmallest] = m_Heap[m_HeapLength--];
|
|
DownHeap(kSmallest);
|
|
return top;
|
|
}
|
|
|
|
// ===========================================================================
|
|
// Compares to subtrees, using the tree m_Depth as tie breaker when
|
|
// the subtrees have equal frequency. This minimizes the worst case length.
|
|
|
|
bool CEncoder::Smaller(int n, int m)
|
|
{
|
|
return (m_Items[n].Freq < m_Items[m].Freq ||
|
|
(m_Items[n].Freq == m_Items[m].Freq && m_Depth[n] <= m_Depth[m]));
|
|
}
|
|
|
|
// ===========================================================================
|
|
// Restore the m_Heap property by moving down the tree starting at node k,
|
|
// exchanging a node with the smallest of its two sons if necessary, stopping
|
|
// when the m_Heap property is re-established (each father CompareFreqs than its
|
|
// two sons).
|
|
|
|
void CEncoder::DownHeap(UInt32 k)
|
|
{
|
|
UInt32 symbol = m_Heap[k];
|
|
for (UInt32 j = k << 1; j <= m_HeapLength;) // j: left son of k
|
|
{
|
|
// Set j to the smallest of the two sons:
|
|
if (j < m_HeapLength && Smaller(m_Heap[j+1], m_Heap[j]))
|
|
j++;
|
|
UInt32 htemp = m_Heap[j]; // htemp required because of bug in SASC compiler
|
|
if (Smaller(symbol, htemp)) // Exit if v is smaller than both sons
|
|
break;
|
|
m_Heap[k] = htemp; // Exchange v with the smallest son
|
|
k = j;
|
|
j <<= 1; // And continue down the tree, setting j to the left son of k
|
|
}
|
|
m_Heap[k] = symbol;
|
|
}
|
|
|
|
// ===========================================================================
|
|
// Compute the optimal bit lengths for a tree and update the total bit length
|
|
// for the current block.
|
|
// IN assertion: the fields freq and dad are set, heap[heapMax] and
|
|
// above are the tree nodes sorted by increasing frequency.
|
|
// OUT assertions: the field len is set to the optimal bit length, the
|
|
// array m_BitLenCounters contains the frequencies for each bit length.
|
|
// The length m_BlockBitLength is updated; static_len is also updated if stree is
|
|
// not null.
|
|
|
|
void CEncoder::GenerateBitLen(UInt32 maxCode, UInt32 heapMax)
|
|
{
|
|
int overflow = 0; // number of elements with bit length too large
|
|
|
|
for (UInt32 i = 0; i <= kNumBitsInLongestCode; i++)
|
|
m_BitLenCounters[i] = 0;
|
|
|
|
/* In a first pass, compute the optimal bit lengths (which may
|
|
* overflow in the case of the bit length tree).
|
|
*/
|
|
m_Items[m_Heap[heapMax]].Len = 0; /* root of the heap */
|
|
UInt32 h; /* heap index */
|
|
for (h = heapMax+1; h < m_HeapSize; h++)
|
|
{
|
|
UInt32 symbol = m_Heap[h];
|
|
UInt32 len = m_Items[m_Items[symbol].Dad].Len + 1;
|
|
if (len > m_MaxLength)
|
|
{
|
|
len = m_MaxLength;
|
|
overflow++;
|
|
}
|
|
m_Items[symbol].Len = len; // We overwrite m_Items[symbol].Dad which is no longer needed
|
|
if (symbol > maxCode)
|
|
continue; // not a leaf node
|
|
m_BitLenCounters[len]++;
|
|
UInt32 extraBits;
|
|
if (m_ExtraBits != 0 && symbol >= m_ExtraBase)
|
|
extraBits = m_ExtraBits[symbol - m_ExtraBase];
|
|
else
|
|
extraBits = 0;
|
|
m_BlockBitLength += (m_Items[symbol].Freq * (len + extraBits));
|
|
}
|
|
if (overflow == 0)
|
|
return;
|
|
|
|
// This happens for example on obj2 and pic of the Calgary corpus
|
|
// Find the first bit length which could increase:
|
|
do
|
|
{
|
|
UInt32 bits = m_MaxLength-1;
|
|
while (m_BitLenCounters[bits] == 0)
|
|
bits--;
|
|
m_BitLenCounters[bits]--; // move one leaf down the m_Items
|
|
m_BitLenCounters[bits + 1] += 2; // move one overflow item as its brother
|
|
m_BitLenCounters[m_MaxLength]--;
|
|
// The brother of the overflow item also moves one step up,
|
|
// but this does not affect m_BitLenCounters[m_MaxLength]
|
|
overflow -= 2;
|
|
}
|
|
while (overflow > 0);
|
|
|
|
// Now recompute all bit lengths, scanning in increasing frequency.
|
|
// h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
|
|
// lengths instead of fixing only the wrong ones. This idea is taken
|
|
// from 'ar' written by Haruhiko Okumura.)
|
|
for (UInt32 bits = m_MaxLength; bits != 0; bits--)
|
|
{
|
|
UInt32 numNodes = m_BitLenCounters[bits];
|
|
while (numNodes != 0)
|
|
{
|
|
UInt32 m = m_Heap[--h];
|
|
if (m > maxCode)
|
|
continue;
|
|
if (m_Items[m].Len != (unsigned) bits)
|
|
{
|
|
m_BlockBitLength += ((long)bits - (long)m_Items[m].Len) * (long)m_Items[m].Freq;
|
|
m_Items[m].Len = bits;
|
|
}
|
|
numNodes--;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// ===========================================================================
|
|
// Generate the codes for a given tree and bit counts (which need not be
|
|
// optimal).
|
|
// IN assertion: the array m_BitLenCounters contains the bit length statistics for
|
|
// the given tree and the field len is set for all tree elements.
|
|
// OUT assertion: the field code is set for all tree elements of non
|
|
// zero code length.
|
|
|
|
// UInt32 maxCode = largest code with non zero frequency
|
|
|
|
|
|
void CEncoder::GenerateCodes(UInt32 maxCode)
|
|
{
|
|
UInt32 nextCodes[kNumBitsInLongestCode + 1]; // next code value for each bit length
|
|
UInt32 code = 0; // running code value
|
|
// The distribution counts are first used to generate the code values
|
|
// without bit reversal.
|
|
for (UInt32 bits = 1; bits <= kNumBitsInLongestCode; bits++)
|
|
nextCodes[bits] = code = (code + m_BitLenCounters[bits - 1]) << 1;
|
|
// Check that the bit counts in m_BitLenCounters are consistent. The last code
|
|
// must be all ones.
|
|
if (code + m_BitLenCounters[kNumBitsInLongestCode] - 1 != (1 << kNumBitsInLongestCode) - 1)
|
|
throw kIncorrectBitLenCountsMessage;
|
|
for (UInt32 n = 0; n <= maxCode; n++)
|
|
{
|
|
int len = m_Items[n].Len;
|
|
if (len == 0)
|
|
continue;
|
|
m_Items[n].Code = nextCodes[len]++;
|
|
}
|
|
}
|
|
|
|
|
|
// ===========================================================================
|
|
// Construct one Huffman tree and assigns the code bit strings and lengths.
|
|
// Update the total bit length for the current block.
|
|
// IN assertion: the field freq is set for all tree elements.
|
|
// OUT assertions: the fields len and code are set to the optimal bit length
|
|
// and corresponding code. The length m_BlockBitLength is updated; static_len is
|
|
// also updated if stree is not null. The field max_code is set.
|
|
|
|
void CEncoder::BuildTree(Byte *levels)
|
|
{
|
|
m_BlockBitLength = 0;
|
|
int maxCode = -1; // WAS = -1; largest code with non zero frequency */
|
|
|
|
// Construct the initial m_Heap, with least frequent element in
|
|
// m_Heap[kSmallest]. The sons of m_Heap[n] are m_Heap[2*n] and m_Heap[2*n+1].
|
|
// m_Heap[0] is not used.
|
|
//
|
|
|
|
m_HeapLength = 0;
|
|
UInt32 n; // iterate over m_Heap elements
|
|
for (n = 0; n < m_NumSymbols; n++)
|
|
{
|
|
if (m_Items[n].Freq != 0)
|
|
{
|
|
m_Heap[++m_HeapLength] = maxCode = n;
|
|
m_Depth[n] = 0;
|
|
}
|
|
else
|
|
m_Items[n].Len = 0;
|
|
}
|
|
|
|
// The pkzip format requires that at least one distance code exists,
|
|
// and that at least one bit should be sent even if there is only one
|
|
// possible code. So to avoid special checks later on we force at least
|
|
// two codes of non zero frequency.
|
|
while (m_HeapLength < 2)
|
|
{
|
|
int aNewNode = m_Heap[++m_HeapLength] = (maxCode < 2 ? ++maxCode : 0);
|
|
m_Items[aNewNode].Freq = 1;
|
|
m_Depth[aNewNode] = 0;
|
|
m_BlockBitLength--;
|
|
// if (stree) static_len -= stree[aNewNode].Len;
|
|
// aNewNode is 0 or 1 so it does not have m_ExtraBits bits
|
|
}
|
|
|
|
// The elements m_Heap[m_HeapLength/2+1 .. m_HeapLength] are leaves of the m_Items,
|
|
// establish sub-heaps of increasing lengths:
|
|
for (n = m_HeapLength / 2; n >= 1; n--)
|
|
DownHeap(n);
|
|
|
|
// Construct the Huffman tree by repeatedly combining the least two
|
|
// frequent nodes.
|
|
int node = m_NumSymbols; // next internal node of the tree
|
|
UInt32 heapMax = m_NumSymbols * 2+ 1;
|
|
do
|
|
{
|
|
n = RemoveSmallest(); /* n = node of least frequency */
|
|
UInt32 m = m_Heap[kSmallest]; /* m = node of next least frequency */
|
|
|
|
m_Heap[--heapMax] = n; /* keep the nodes sorted by frequency */
|
|
m_Heap[--heapMax] = m;
|
|
|
|
// Create a new node father of n and m
|
|
m_Items[node].Freq = m_Items[n].Freq + m_Items[m].Freq;
|
|
m_Depth[node] = (Byte) (MyMax(m_Depth[n], m_Depth[m]) + 1);
|
|
m_Items[n].Dad = m_Items[m].Dad = node;
|
|
// and insert the new node in the m_Heap
|
|
m_Heap[kSmallest] = node++;
|
|
DownHeap(kSmallest);
|
|
|
|
}
|
|
while (m_HeapLength >= 2);
|
|
|
|
m_Heap[--heapMax] = m_Heap[kSmallest];
|
|
|
|
// At this point, the fields freq and dad are set. We can now
|
|
// generate the bit lengths.
|
|
GenerateBitLen(maxCode, heapMax);
|
|
|
|
// The field len is now set, we can generate the bit codes
|
|
GenerateCodes (maxCode);
|
|
|
|
for (n = 0; n < m_NumSymbols; n++)
|
|
levels[n] = Byte(m_Items[n].Len);
|
|
}
|
|
|
|
void CEncoder::ReverseBits()
|
|
{
|
|
for (UInt32 symbol = 0; symbol < m_NumSymbols; symbol++)
|
|
{
|
|
CItem &item = m_Items[symbol];
|
|
UInt32 value = item.Code;
|
|
UInt32 reverseValue = 0;
|
|
for(UInt32 i = item.Len; i != 0; i--, value >>= 1)
|
|
reverseValue = (reverseValue << 1) | (value & 1);
|
|
item.Code = reverseValue;
|
|
}
|
|
}
|
|
|
|
}}
|