This commit is contained in:
Igor Pavlov
2005-05-30 00:00:00 +00:00
committed by Kornel Lesiński
parent 8c1b5c7b7e
commit 3c510ba80b
926 changed files with 40559 additions and 23519 deletions

289
7zip/Compress/BWT/BlockSort.cpp Executable file
View File

@@ -0,0 +1,289 @@
// BlockSort.cpp
#include "StdAfx.h"
#include "BlockSort.h"
#include "Common/Alloc.h"
namespace NCompress {
static const int kNumHashBytes = 2;
static const UInt32 kNumHashValues = 1 << (kNumHashBytes * 8);
static const int kNumFlagsBits = 5; // 32 Flags in UInt32 word
static const UInt32 kNumFlagsInWord = (1 << kNumFlagsBits);
static const UInt32 kFlagsMask = kNumFlagsInWord - 1;
static const UInt32 kAllFlags = 0xFFFFFFFF;
bool CBlockSorter::Create(UInt32 blockSizeMax)
{
if (Indices != 0 && blockSizeMax == BlockSizeMax)
return true;
Free();
BlockSizeMax = blockSizeMax;
Indices = (UInt32 *)::BigAlloc((blockSizeMax * 2 +
((blockSizeMax + kFlagsMask) >> kNumFlagsBits) + kNumHashValues) * sizeof(UInt32));
return (Indices != 0);
}
void CBlockSorter::Free()
{
::BigFree(Indices);
Indices = 0;
}
// SortGroup - is recursive Radix-Range-Sort function with Bubble-Sort optimization
// It uses both mask & maskSize (Range-Sort), since it can change values (Groups)
// during sorting
// returns: 0 - if there are groups, 1 - no more groups
UInt32 CBlockSorter::SortGroup(UInt32 groupOffset, UInt32 groupSize, UInt32 mask, UInt32 maskSize)
{
if (groupSize <= 2)
{
if (groupSize <= 1)
return 0;
UInt32 *ind2 = Indices + groupOffset;
UInt32 stringPos = ind2[0] + NumSortedBytes;
if (stringPos >= BlockSize)
stringPos -= BlockSize;
UInt32 group = Groups[stringPos];
stringPos = ind2[1] + NumSortedBytes;
if (stringPos >= BlockSize)
stringPos -= BlockSize;
if (group == Groups[stringPos])
return 1;
if (group > Groups[stringPos])
{
UInt32 temp = ind2[0];
ind2[0] = ind2[1];
ind2[1] = temp;
}
Flags[groupOffset >> kNumFlagsBits] &= ~(1 << (groupOffset & kFlagsMask));
Groups[ind2[1]] = groupOffset + 1;
return 0;
}
// Check that all strings are in one group (cannot sort)
UInt32 *ind2 = Indices + groupOffset;
{
UInt32 stringPos = ind2[0] + NumSortedBytes;
if (stringPos >= BlockSize)
stringPos -= BlockSize;
UInt32 group = Groups[stringPos];
UInt32 j;
for (j = 1; j < groupSize; j++)
{
stringPos = ind2[j] + NumSortedBytes;
if (stringPos >= BlockSize)
stringPos -= BlockSize;
if (Groups[stringPos] != group)
break;
}
if (j == groupSize)
return 1;
}
if (groupSize <= 15)
{
// Bubble-Sort
UInt32 lastChange = groupSize;
do
{
UInt32 stringPos = ind2[0] + NumSortedBytes;
if (stringPos >= BlockSize)
stringPos -= BlockSize;
UInt32 group = Groups[stringPos];
UInt32 sortSize = lastChange;
lastChange = 0;
for (UInt32 j = 1; j < sortSize; j++)
{
stringPos = ind2[j] + NumSortedBytes;
if (stringPos >= BlockSize)
stringPos -= BlockSize;
if (Groups[stringPos] < group)
{
UInt32 temp = ind2[j];
ind2[j] = ind2[j - 1];
ind2[j - 1] = temp;
lastChange = j;
}
else
group = Groups[stringPos];
}
}
while(lastChange > 1);
// Write Flags
UInt32 stringPos = ind2[0] + NumSortedBytes;
if (stringPos >= BlockSize)
stringPos -= BlockSize;
UInt32 group = Groups[stringPos];
UInt32 j;
for (j = 1; j < groupSize; j++)
{
stringPos = ind2[j] + NumSortedBytes;
if (stringPos >= BlockSize)
stringPos -= BlockSize;
if (Groups[stringPos] != group)
{
group = Groups[stringPos];
UInt32 t = groupOffset + j - 1;
Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
}
}
// Write new Groups values and Check that there are groups
UInt32 thereAreGroups = 0;
for (j = 0; j < groupSize; j++)
{
UInt32 group = groupOffset + j;
while (true)
{
Groups[ind2[j]] = group;
if ((Flags[(groupOffset + j) >> kNumFlagsBits] & (1 << ((groupOffset + j) & kFlagsMask))) == 0)
break;
j++;
thereAreGroups = 1;
}
}
return thereAreGroups;
}
// Radix-Range Sort
UInt32 i;
do
{
if (maskSize == 0)
return 1;
UInt32 j = groupSize;
i = 0;
do
{
UInt32 stringPos = ind2[i] + NumSortedBytes;
if (stringPos >= BlockSize)
stringPos -= BlockSize;
if (Groups[stringPos] >= mask)
{
for (j--; j > i; j--)
{
stringPos = ind2[j] + NumSortedBytes;
if (stringPos >= BlockSize)
stringPos -= BlockSize;
if (Groups[stringPos] < mask)
{
UInt32 temp = ind2[i];
ind2[i] = ind2[j];
ind2[j] = temp;
break;
}
}
if (i >= j)
break;
}
}
while(++i < j);
maskSize >>= 1;
if (i == 0)
mask += maskSize;
else if (i == groupSize)
mask -= maskSize;
else
break;
}
while(true);
UInt32 t = (groupOffset + i - 1);
Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
for (UInt32 j = i; j < groupSize; j++)
Groups[ind2[j]] = groupOffset + i;
UInt32 res = SortGroup(groupOffset, i, mask - maskSize, maskSize);
return res | SortGroup(groupOffset + i, groupSize - i, mask + maskSize, maskSize);
}
UInt32 CBlockSorter::Sort(const Byte *data, UInt32 blockSize)
{
BlockSize = blockSize;
UInt32 *counters = Indices + blockSize;
Groups = counters + kNumHashValues;
Flags = Groups + blockSize;
UInt32 i;
// Radix-Sort for 2 bytes
for (i = 0; i < kNumHashValues; i++)
counters[i] = 0;
for (i = 0; i < blockSize - 1; i++)
counters[((UInt32)data[i] << 8) | data[i + 1]]++;
counters[((UInt32)data[i] << 8) | data[0]]++;
{
{
UInt32 numWords = (blockSize + kFlagsMask) >> kNumFlagsBits;
for (i = 0; i < numWords; i++)
Flags[i] = kAllFlags;
}
UInt32 sum = 0;
for (i = 0; i < kNumHashValues; i++)
{
UInt32 groupSize = counters[i];
if (groupSize > 0)
{
UInt32 t = sum + groupSize - 1;
Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
sum += groupSize;
}
counters[i] = sum - groupSize;
}
for (i = 0; i < blockSize - 1; i++)
Groups[i] = counters[((UInt32)data[i] << 8) | data[i + 1]];
Groups[i] = counters[((UInt32)data[i] << 8) | data[0]];
for (i = 0; i < blockSize - 1; i++)
Indices[counters[((UInt32)data[i] << 8) | data[i + 1]]++] = i;
Indices[counters[((UInt32)data[i] << 8) | data[0]]++] = i;
}
UInt32 mask;
for (mask = 2; mask < blockSize; mask <<= 1);
mask >>= 1;
for (NumSortedBytes = kNumHashBytes; true; NumSortedBytes <<= 1)
{
UInt32 newLimit = 0;
for (i = 0; i < blockSize;)
{
if ((Flags[i >> kNumFlagsBits] & (1 << (i & kFlagsMask))) == 0)
{
i++;
continue;
}
UInt32 groupSize;
for(groupSize = 1;
(Flags[(i + groupSize) >> kNumFlagsBits] & (1 << ((i + groupSize) & kFlagsMask))) != 0;
groupSize++);
groupSize++;
if (NumSortedBytes >= blockSize)
for (UInt32 j = 0; j < groupSize; j++)
{
UInt32 t = (i + j);
Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
Groups[Indices[t]] = t;
}
else
if (SortGroup(i, groupSize, mask, mask) != 0)
newLimit = i + groupSize;
i += groupSize;
}
if (newLimit == 0)
break;
}
return Groups[0];
}
}

29
7zip/Compress/BWT/BlockSort.h Executable file
View File

@@ -0,0 +1,29 @@
// BlockSort.h
#ifndef __BLOCKSORT_H
#define __BLOCKSORT_H
#include "Common/Types.h"
namespace NCompress {
class CBlockSorter
{
UInt32 *Groups;
UInt32 *Flags;
UInt32 BlockSize;
UInt32 NumSortedBytes;
UInt32 BlockSizeMax;
UInt32 SortGroup(UInt32 groupOffset, UInt32 groupSize, UInt32 mask, UInt32 maskSize);
public:
UInt32 *Indices;
CBlockSorter(): Indices(0) {}
~CBlockSorter() { Free(); }
bool Create(UInt32 blockSizeMax);
void Free();
UInt32 Sort(const Byte *data, UInt32 blockSize);
};
}
#endif

140
7zip/Compress/BWT/Mtf8.h Executable file
View File

@@ -0,0 +1,140 @@
// Mtf8.h
#ifndef __MTF8_H
#define __MTF8_H
#include "Common/Types.h"
namespace NCompress {
class CMtf8Encoder
{
public:
Byte Buffer[256];
int FindAndMove(Byte v)
{
int pos;
for (pos = 0; Buffer[pos] != v; pos++);
int resPos = pos;
for (; pos >= 8; pos -= 8)
{
Buffer[pos] = Buffer[pos - 1];
Buffer[pos - 1] = Buffer[pos - 2];
Buffer[pos - 2] = Buffer[pos - 3];
Buffer[pos - 3] = Buffer[pos - 4];
Buffer[pos - 4] = Buffer[pos - 5];
Buffer[pos - 5] = Buffer[pos - 6];
Buffer[pos - 6] = Buffer[pos - 7];
Buffer[pos - 7] = Buffer[pos - 8];
}
for (; pos > 0; pos--)
Buffer[pos] = Buffer[pos - 1];
Buffer[0] = v;
return resPos;
}
};
class CMtf8Decoder
{
public:
Byte Buffer[256];
void Init(int size) {};
Byte GetHead() const { return Buffer[0]; }
Byte GetAndMove(int pos)
{
Byte res = Buffer[pos];
for (; pos >= 8; pos -= 8)
{
Buffer[pos] = Buffer[pos - 1];
Buffer[pos - 1] = Buffer[pos - 2];
Buffer[pos - 2] = Buffer[pos - 3];
Buffer[pos - 3] = Buffer[pos - 4];
Buffer[pos - 4] = Buffer[pos - 5];
Buffer[pos - 5] = Buffer[pos - 6];
Buffer[pos - 6] = Buffer[pos - 7];
Buffer[pos - 7] = Buffer[pos - 8];
}
for (; pos > 0; pos--)
Buffer[pos] = Buffer[pos - 1];
Buffer[0] = res;
return res;
}
};
/*
const int kSmallSize = 64;
class CMtf8Decoder
{
Byte SmallBuffer[kSmallSize];
int SmallSize;
Byte Counts[16];
int Size;
public:
Byte Buffer[256];
Byte GetHead() const
{
if (SmallSize > 0)
return SmallBuffer[kSmallSize - SmallSize];
return Buffer[0];
}
void Init(int size)
{
Size = size;
SmallSize = 0;
for (int i = 0; i < 16; i++)
{
Counts[i] = ((size >= 16) ? 16 : size);
size -= Counts[i];
}
}
Byte GetAndMove(int pos)
{
if (pos < SmallSize)
{
Byte *p = SmallBuffer + kSmallSize - SmallSize;
Byte res = p[pos];
for (; pos > 0; pos--)
p[pos] = p[pos - 1];
SmallBuffer[kSmallSize - SmallSize] = res;
return res;
}
if (SmallSize == kSmallSize)
{
int i = Size - 1;
int g = 16;
do
{
g--;
int offset = (g << 4);
for (int t = Counts[g] - 1; t >= 0; t--, i--)
Buffer[i] = Buffer[offset + t];
}
while(g != 0);
for (i = kSmallSize - 1; i >= 0; i--)
Buffer[i] = SmallBuffer[i];
Init(Size);
}
pos -= SmallSize;
int g;
for (g = 0; pos >= Counts[g]; g++)
pos -= Counts[g];
int offset = (g << 4);
Byte res = Buffer[offset + pos];
for (pos; pos < 16 - 1; pos++)
Buffer[offset + pos] = Buffer[offset + pos + 1];
SmallSize++;
SmallBuffer[kSmallSize - SmallSize] = res;
Counts[g]--;
return res;
}
};
*/
}
#endif

6
7zip/Compress/BWT/StdAfx.h Executable file
View File

@@ -0,0 +1,6 @@
// StdAfx.h
#ifndef __STDAFX_H
#define __STDAFX_H
#endif