Crypto++: zdeflate.cpp Source File

00001 // zdeflate.cpp - written and placed in the public domain by Wei Dai
00002 
00003 // Many of the algorithms and tables used here came from the deflate implementation
00004 // by Jean-loup Gailly, which was included in Crypto++ 4.0 and earlier. I completely
00005 // rewrote it in order to fix a bug that I could not figure out. This code
00006 // is less clever, but hopefully more understandable and maintainable.
00007 
00008 #include "pch.h"
00009 #include "zdeflate.h"
00010 #include <functional>
00011 
00012 NAMESPACE_BEGIN(CryptoPP)
00013 
00014 using namespace std;
00015 
00016 LowFirstBitWriter::LowFirstBitWriter(BufferedTransformation *attachment)
00017         : Filter(attachment), m_counting(false), m_buffer(0), m_bitsBuffered(0), m_bytesBuffered(0)
00018 {
00019 }
00020 
00021 void LowFirstBitWriter::StartCounting()
00022 {
00023         assert(!m_counting);
00024         m_counting = true;
00025         m_bitCount = 0;
00026 }
00027 
00028 unsigned long LowFirstBitWriter::FinishCounting()
00029 {
00030         assert(m_counting);
00031         m_counting = false;
00032         return m_bitCount;
00033 }
00034 
00035 void LowFirstBitWriter::PutBits(unsigned long value, unsigned int length)
00036 {
00037         if (m_counting)
00038                 m_bitCount += length;
00039         else
00040         {
00041                 m_buffer |= value << m_bitsBuffered;
00042                 m_bitsBuffered += length;
00043                 assert(m_bitsBuffered <= sizeof(unsigned long)*8);
00044                 while (m_bitsBuffered >= 8)
00045                 {
00046                         m_outputBuffer[m_bytesBuffered++] = (byte)m_buffer;
00047                         if (m_bytesBuffered == m_outputBuffer.size())
00048                         {
00049                                 AttachedTransformation()->PutModifiable(m_outputBuffer, m_bytesBuffered);
00050                                 m_bytesBuffered = 0;
00051                         }
00052                         m_buffer >>= 8;
00053                         m_bitsBuffered -= 8;
00054                 }
00055         }
00056 }
00057 
00058 void LowFirstBitWriter::FlushBitBuffer()
00059 {
00060         if (m_counting)
00061                 m_bitCount += 8*(m_bitsBuffered > 0);
00062         else
00063         {
00064                 if (m_bytesBuffered > 0)
00065                 {
00066                         AttachedTransformation()->PutModifiable(m_outputBuffer, m_bytesBuffered);
00067                         m_bytesBuffered = 0;
00068                 }
00069                 if (m_bitsBuffered > 0)
00070                 {
00071                         AttachedTransformation()->Put((byte)m_buffer);
00072                         m_buffer = 0;
00073                         m_bitsBuffered = 0;
00074                 }
00075         }
00076 }
00077 
00078 void LowFirstBitWriter::ClearBitBuffer()
00079 {
00080         m_buffer = 0;
00081         m_bytesBuffered = 0;
00082         m_bitsBuffered = 0;
00083 }
00084 
00085 HuffmanEncoder::HuffmanEncoder(const unsigned int *codeBits, unsigned int nCodes)
00086 {
00087         Initialize(codeBits, nCodes);
00088 }
00089 
00090 struct HuffmanNode
00091 {
00092         size_t symbol;
00093         union {size_t parent; unsigned depth, freq;};
00094 };
00095 
00096 struct FreqLessThan
00097 {
00098         inline bool operator()(unsigned int lhs, const HuffmanNode &rhs) {return lhs < rhs.freq;}
00099         inline bool operator()(const HuffmanNode &lhs, const HuffmanNode &rhs) const {return lhs.freq < rhs.freq;}
00100         // needed for MSVC .NET 2005
00101         inline bool operator()(const HuffmanNode &lhs, unsigned int rhs) {return lhs.freq < rhs;}
00102 };
00103 
00104 void HuffmanEncoder::GenerateCodeLengths(unsigned int *codeBits, unsigned int maxCodeBits, const unsigned int *codeCounts, size_t nCodes)
00105 {
00106         assert(nCodes > 0);
00107         assert(nCodes <= ((size_t)1 << maxCodeBits));
00108 
00109         size_t i;
00110         SecBlockWithHint<HuffmanNode, 2*286> tree(nCodes);
00111         for (i=0; i<nCodes; i++)
00112         {
00113                 tree[i].symbol = i;
00114                 tree[i].freq = codeCounts[i];
00115         }
00116         sort(tree.begin(), tree.end(), FreqLessThan());
00117         size_t treeBegin = upper_bound(tree.begin(), tree.end(), 0, FreqLessThan()) - tree.begin();
00118         if (treeBegin == nCodes)
00119         {       // special case for no codes
00120                 fill(codeBits, codeBits+nCodes, 0);
00121                 return;
00122         }
00123         tree.resize(nCodes + nCodes - treeBegin - 1);
00124 
00125         size_t leastLeaf = treeBegin, leastInterior = nCodes;
00126         for (i=nCodes; i<tree.size(); i++)
00127         {
00128                 size_t least;
00129                 least = (leastLeaf == nCodes || (leastInterior < i && tree[leastInterior].freq < tree[leastLeaf].freq)) ? leastInterior++ : leastLeaf++;
00130                 tree[i].freq = tree[least].freq;
00131                 tree[least].parent = i;
00132                 least = (leastLeaf == nCodes || (leastInterior < i && tree[leastInterior].freq < tree[leastLeaf].freq)) ? leastInterior++ : leastLeaf++;
00133                 tree[i].freq += tree[least].freq;
00134                 tree[least].parent = i;
00135         }
00136 
00137         tree[tree.size()-1].depth = 0;
00138         if (tree.size() >= 2)
00139                 for (i=tree.size()-2; i>=nCodes; i--)
00140                         tree[i].depth = tree[tree[i].parent].depth + 1;
00141         unsigned int sum = 0;
00142         SecBlockWithHint<unsigned int, 15+1> blCount(maxCodeBits+1);
00143         fill(blCount.begin(), blCount.end(), 0);
00144         for (i=treeBegin; i<nCodes; i++)
00145         {
00146                 size_t depth = STDMIN(maxCodeBits, tree[tree[i].parent].depth + 1);
00147                 blCount[depth]++;
00148                 sum += 1 << (maxCodeBits - depth);
00149         }
00150 
00151         unsigned int overflow = sum > (unsigned int)(1 << maxCodeBits) ? sum - (1 << maxCodeBits) : 0;
00152 
00153         while (overflow--)
00154         {
00155                 unsigned int bits = maxCodeBits-1;
00156                 while (blCount[bits] == 0)
00157                         bits--;
00158                 blCount[bits]--;
00159                 blCount[bits+1] += 2;
00160                 assert(blCount[maxCodeBits] > 0);
00161                 blCount[maxCodeBits]--;
00162         }
00163 
00164         for (i=0; i<treeBegin; i++)
00165                 codeBits[tree[i].symbol] = 0;
00166         unsigned int bits = maxCodeBits;
00167         for (i=treeBegin; i<nCodes; i++)
00168         {
00169                 while (blCount[bits] == 0)
00170                         bits--;
00171                 codeBits[tree[i].symbol] = bits;
00172                 blCount[bits]--;
00173         }
00174         assert(blCount[bits] == 0);
00175 }
00176 
00177 void HuffmanEncoder::Initialize(const unsigned int *codeBits, unsigned int nCodes)
00178 {
00179         assert(nCodes > 0);
00180         unsigned int maxCodeBits = *max_element(codeBits, codeBits+nCodes);
00181         if (maxCodeBits == 0)
00182                 return;         // assume this object won't be used
00183 
00184         SecBlockWithHint<unsigned int, 15+1> blCount(maxCodeBits+1);
00185         fill(blCount.begin(), blCount.end(), 0);
00186         unsigned int i;
00187         for (i=0; i<nCodes; i++)
00188                 blCount[codeBits[i]]++;
00189 
00190         code_t code = 0;
00191         SecBlockWithHint<code_t, 15+1> nextCode(maxCodeBits+1);
00192         nextCode[1] = 0;
00193         for (i=2; i<=maxCodeBits; i++)
00194         {
00195                 code = (code + blCount[i-1]) << 1;
00196                 nextCode[i] = code;
00197         }
00198         assert(maxCodeBits == 1 || code == (1 << maxCodeBits) - blCount[maxCodeBits]);
00199 
00200         m_valueToCode.resize(nCodes);
00201         for (i=0; i<nCodes; i++)
00202         {
00203                 unsigned int len = m_valueToCode[i].len = codeBits[i];
00204                 if (len != 0)
00205                         m_valueToCode[i].code = BitReverse(nextCode[len]++) >> (8*sizeof(code_t)-len);
00206         }
00207 }
00208 
00209 inline void HuffmanEncoder::Encode(LowFirstBitWriter &writer, value_t value) const
00210 {
00211         assert(m_valueToCode[value].len > 0);
00212         writer.PutBits(m_valueToCode[value].code, m_valueToCode[value].len);
00213 }
00214 
00215 Deflator::Deflator(BufferedTransformation *attachment, int deflateLevel, int log2WindowSize, bool detectUncompressible)
00216         : LowFirstBitWriter(attachment)
00217 {
00218         InitializeStaticEncoders();
00219         IsolatedInitialize(MakeParameters("DeflateLevel", deflateLevel)("Log2WindowSize", log2WindowSize)("DetectUncompressible", detectUncompressible));
00220 }
00221 
00222 Deflator::Deflator(const NameValuePairs &parameters, BufferedTransformation *attachment)
00223         : LowFirstBitWriter(attachment)
00224 {
00225         InitializeStaticEncoders();
00226         IsolatedInitialize(parameters);
00227 }
00228 
00229 void Deflator::InitializeStaticEncoders()
00230 {
00231         unsigned int codeLengths[288];
00232         fill(codeLengths + 0, codeLengths + 144, 8);
00233         fill(codeLengths + 144, codeLengths + 256, 9);
00234         fill(codeLengths + 256, codeLengths + 280, 7);
00235         fill(codeLengths + 280, codeLengths + 288, 8);
00236         m_staticLiteralEncoder.Initialize(codeLengths, 288);
00237         fill(codeLengths + 0, codeLengths + 32, 5);
00238         m_staticDistanceEncoder.Initialize(codeLengths, 32);
00239 }
00240 
00241 void Deflator::IsolatedInitialize(const NameValuePairs &parameters)
00242 {
00243         int log2WindowSize = parameters.GetIntValueWithDefault("Log2WindowSize", DEFAULT_LOG2_WINDOW_SIZE);
00244         if (!(MIN_LOG2_WINDOW_SIZE <= log2WindowSize && log2WindowSize <= MAX_LOG2_WINDOW_SIZE))
00245                 throw InvalidArgument("Deflator: " + IntToString(log2WindowSize) + " is an invalid window size");
00246 
00247         m_log2WindowSize = log2WindowSize;
00248         DSIZE = 1 << m_log2WindowSize;
00249         DMASK = DSIZE - 1;
00250         HSIZE = 1 << m_log2WindowSize;
00251         HMASK = HSIZE - 1;
00252         m_byteBuffer.New(2*DSIZE);
00253         m_head.New(HSIZE);
00254         m_prev.New(DSIZE);
00255         m_matchBuffer.New(DSIZE/2);
00256         Reset(true);
00257 
00258         SetDeflateLevel(parameters.GetIntValueWithDefault("DeflateLevel", DEFAULT_DEFLATE_LEVEL));
00259         bool detectUncompressible = parameters.GetValueWithDefault("DetectUncompressible", true);
00260         m_compressibleDeflateLevel = detectUncompressible ? m_deflateLevel : 0;
00261 }
00262 
00263 void Deflator::Reset(bool forceReset)
00264 {
00265         if (forceReset)
00266                 ClearBitBuffer();
00267         else
00268                 assert(m_bitsBuffered == 0);
00269 
00270         m_headerWritten = false;
00271         m_matchAvailable = false;
00272         m_dictionaryEnd = 0;
00273         m_stringStart = 0;
00274         m_lookahead = 0;
00275         m_minLookahead = MAX_MATCH;
00276         m_matchBufferEnd = 0;
00277         m_blockStart = 0;
00278         m_blockLength = 0;
00279 
00280         m_detectCount = 1;
00281         m_detectSkip = 0;
00282 
00283         // m_prev will be initialized automaticly in InsertString
00284         fill(m_head.begin(), m_head.end(), 0);
00285 
00286         fill(m_literalCounts.begin(), m_literalCounts.end(), 0);
00287         fill(m_distanceCounts.begin(), m_distanceCounts.end(), 0);
00288 }
00289 
00290 void Deflator::SetDeflateLevel(int deflateLevel)
00291 {
00292         if (!(MIN_DEFLATE_LEVEL <= deflateLevel && deflateLevel <= MAX_DEFLATE_LEVEL))
00293                 throw InvalidArgument("Deflator: " + IntToString(deflateLevel) + " is an invalid deflate level");
00294 
00295         if (deflateLevel == m_deflateLevel)
00296                 return;
00297 
00298         EndBlock(false);
00299 
00300         static const unsigned int configurationTable[10][4] = {
00301                 /*      good lazy nice chain */
00302                 /* 0 */ {0,    0,  0,    0},  /* store only */
00303                 /* 1 */ {4,    3,  8,    4},  /* maximum speed, no lazy matches */
00304                 /* 2 */ {4,    3, 16,    8},
00305                 /* 3 */ {4,    3, 32,   32},
00306                 /* 4 */ {4,    4, 16,   16},  /* lazy matches */
00307                 /* 5 */ {8,   16, 32,   32},
00308                 /* 6 */ {8,   16, 128, 128},
00309                 /* 7 */ {8,   32, 128, 256},
00310                 /* 8 */ {32, 128, 258, 1024},
00311                 /* 9 */ {32, 258, 258, 4096}}; /* maximum compression */
00312 
00313         GOOD_MATCH = configurationTable[deflateLevel][0];
00314         MAX_LAZYLENGTH = configurationTable[deflateLevel][1];
00315         MAX_CHAIN_LENGTH = configurationTable[deflateLevel][3];
00316 
00317         m_deflateLevel = deflateLevel;
00318 }
00319 
00320 unsigned int Deflator::FillWindow(const byte *str, size_t length)
00321 {
00322         unsigned int maxBlockSize = (unsigned int)STDMIN(2UL*DSIZE, 0xffffUL);
00323 
00324         if (m_stringStart >= maxBlockSize - MAX_MATCH)
00325         {
00326                 if (m_blockStart < DSIZE)
00327                         EndBlock(false);
00328 
00329                 memcpy(m_byteBuffer, m_byteBuffer + DSIZE, DSIZE);
00330 
00331                 m_dictionaryEnd = m_dictionaryEnd < DSIZE ? 0 : m_dictionaryEnd-DSIZE;
00332                 assert(m_stringStart >= DSIZE);
00333                 m_stringStart -= DSIZE;
00334                 assert(!m_matchAvailable || m_previousMatch >= DSIZE);
00335                 m_previousMatch -= DSIZE;
00336                 assert(m_blockStart >= DSIZE);
00337                 m_blockStart -= DSIZE;
00338 
00339                 unsigned int i;
00340 
00341                 for (i=0; i<HSIZE; i++)
00342                         m_head[i] = SaturatingSubtract(m_head[i], DSIZE);
00343 
00344                 for (i=0; i<DSIZE; i++)
00345                         m_prev[i] = SaturatingSubtract(m_prev[i], DSIZE);
00346         }
00347 
00348         assert(maxBlockSize > m_stringStart+m_lookahead);
00349         unsigned int accepted = UnsignedMin(maxBlockSize-(m_stringStart+m_lookahead), length);
00350         assert(accepted > 0);
00351         memcpy(m_byteBuffer + m_stringStart + m_lookahead, str, accepted);
00352         m_lookahead += accepted;
00353         return accepted;
00354 }
00355 
00356 inline unsigned int Deflator::ComputeHash(const byte *str) const
00357 {
00358         assert(str+3 <= m_byteBuffer + m_stringStart + m_lookahead);
00359         return ((str[0] << 10) ^ (str[1] << 5) ^ str[2]) & HMASK;
00360 }
00361 
00362 unsigned int Deflator::LongestMatch(unsigned int &bestMatch) const
00363 {
00364         assert(m_previousLength < MAX_MATCH);
00365 
00366         bestMatch = 0;
00367         unsigned int bestLength = STDMAX(m_previousLength, (unsigned int)MIN_MATCH-1);
00368         if (m_lookahead <= bestLength)
00369                 return 0;
00370 
00371         const byte *scan = m_byteBuffer + m_stringStart, *scanEnd = scan + STDMIN((unsigned int)MAX_MATCH, m_lookahead);
00372         unsigned int limit = m_stringStart > (DSIZE-MAX_MATCH) ? m_stringStart - (DSIZE-MAX_MATCH) : 0;
00373         unsigned int current = m_head[ComputeHash(scan)];
00374 
00375         unsigned int chainLength = MAX_CHAIN_LENGTH;
00376         if (m_previousLength >= GOOD_MATCH)
00377                 chainLength >>= 2;
00378 
00379         while (current > limit && --chainLength > 0)
00380         {
00381                 const byte *match = m_byteBuffer + current;
00382                 assert(scan + bestLength < m_byteBuffer + m_stringStart + m_lookahead);
00383                 if (scan[bestLength-1] == match[bestLength-1] && scan[bestLength] == match[bestLength] && scan[0] == match[0] && scan[1] == match[1])
00384                 {
00385                         assert(scan[2] == match[2]);
00386                         unsigned int len = (unsigned int)(
00387 #if defined(_STDEXT_BEGIN) && !(defined(_MSC_VER) && _MSC_VER < 1400)
00388                                 stdext::unchecked_mismatch
00389 #else
00390                                 std::mismatch
00391 #endif
00392                                 (scan+3, scanEnd, match+3).first - scan);
00393                         assert(len != bestLength);
00394                         if (len > bestLength)
00395                         {
00396                                 bestLength = len;
00397                                 bestMatch = current;
00398                                 if (len == (scanEnd - scan))
00399                                         break;
00400                         }
00401                 }
00402                 current = m_prev[current & DMASK];
00403         }
00404         return (bestMatch > 0) ? bestLength : 0;
00405 }
00406 
00407 inline void Deflator::InsertString(unsigned int start)
00408 {
00409         unsigned int hash = ComputeHash(m_byteBuffer + start);
00410         m_prev[start & DMASK] = m_head[hash];
00411         m_head[hash] = start;
00412 }
00413 
00414 void Deflator::ProcessBuffer()
00415 {
00416         if (!m_headerWritten)
00417         {
00418                 WritePrestreamHeader();
00419                 m_headerWritten = true;
00420         }
00421 
00422         if (m_deflateLevel == 0)
00423         {
00424                 m_stringStart += m_lookahead;
00425                 m_lookahead = 0;
00426                 m_blockLength = m_stringStart - m_blockStart;
00427                 m_matchAvailable = false;
00428                 return;
00429         }
00430 
00431         while (m_lookahead > m_minLookahead)
00432         {
00433                 while (m_dictionaryEnd < m_stringStart && m_dictionaryEnd+3 <= m_stringStart+m_lookahead)
00434                         InsertString(m_dictionaryEnd++);
00435 
00436                 if (m_matchAvailable)
00437                 {
00438                         unsigned int matchPosition, matchLength;
00439                         bool usePreviousMatch;
00440                         if (m_previousLength >= MAX_LAZYLENGTH)
00441                                 usePreviousMatch = true;
00442                         else
00443                         {
00444                                 matchLength = LongestMatch(matchPosition);
00445                                 usePreviousMatch = (matchLength == 0);
00446                         }
00447                         if (usePreviousMatch)
00448                         {
00449                                 MatchFound(m_stringStart-1-m_previousMatch, m_previousLength);
00450                                 m_stringStart += m_previousLength-1;
00451                                 m_lookahead -= m_previousLength-1;
00452                                 m_matchAvailable = false;
00453                         }
00454                         else
00455                         {
00456                                 m_previousLength = matchLength;
00457                                 m_previousMatch = matchPosition;
00458                                 LiteralByte(m_byteBuffer[m_stringStart-1]);
00459                                 m_stringStart++;
00460                                 m_lookahead--;
00461                         }
00462                 }
00463                 else
00464                 {
00465                         m_previousLength = 0;
00466                         m_previousLength = LongestMatch(m_previousMatch);
00467                         if (m_previousLength)
00468                                 m_matchAvailable = true;
00469                         else
00470                                 LiteralByte(m_byteBuffer[m_stringStart]);
00471                         m_stringStart++;
00472                         m_lookahead--;
00473                 }
00474 
00475                 assert(m_stringStart - (m_blockStart+m_blockLength) == (unsigned int)m_matchAvailable);
00476         }
00477 
00478         if (m_minLookahead == 0 && m_matchAvailable)
00479         {
00480                 LiteralByte(m_byteBuffer[m_stringStart-1]);
00481                 m_matchAvailable = false;
00482         }
00483 }
00484 
00485 size_t Deflator::Put2(const byte *str, size_t length, int messageEnd, bool blocking)
00486 {
00487         if (!blocking)
00488                 throw BlockingInputOnly("Deflator");
00489 
00490         size_t accepted = 0;
00491         while (accepted < length)
00492         {
00493                 unsigned int newAccepted = FillWindow(str+accepted, length-accepted);
00494                 ProcessBuffer();
00495                 // call ProcessUncompressedData() after WritePrestreamHeader()
00496                 ProcessUncompressedData(str+accepted, newAccepted);
00497                 accepted += newAccepted;
00498         }
00499         assert(accepted == length);
00500 
00501         if (messageEnd)
00502         {
00503                 m_minLookahead = 0;
00504                 ProcessBuffer();
00505                 EndBlock(true);
00506                 FlushBitBuffer();
00507                 WritePoststreamTail();
00508                 Reset();
00509         }
00510 
00511         Output(0, NULL, 0, messageEnd, blocking);
00512         return 0;
00513 }
00514 
00515 bool Deflator::IsolatedFlush(bool hardFlush, bool blocking)
00516 {
00517         if (!blocking)
00518                 throw BlockingInputOnly("Deflator");
00519 
00520         m_minLookahead = 0;
00521         ProcessBuffer();
00522         m_minLookahead = MAX_MATCH;
00523         EndBlock(false);
00524         if (hardFlush)
00525                 EncodeBlock(false, STORED);
00526         return false;
00527 }
00528 
00529 void Deflator::LiteralByte(byte b)
00530 {
00531         if (m_matchBufferEnd == m_matchBuffer.size())
00532                 EndBlock(false);
00533 
00534         m_matchBuffer[m_matchBufferEnd++].literalCode = b;
00535         m_literalCounts[b]++;
00536         m_blockLength++;
00537 }
00538 
00539 void Deflator::MatchFound(unsigned int distance, unsigned int length)
00540 {
00541         if (m_matchBufferEnd == m_matchBuffer.size())
00542                 EndBlock(false);
00543 
00544         static const unsigned int lengthCodes[] = {
00545                 257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268, 268,
00546                 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272, 272, 272,
00547                 273, 273, 273, 273, 273, 273, 273, 273, 274, 274, 274, 274, 274, 274, 274, 274,
00548                 275, 275, 275, 275, 275, 275, 275, 275, 276, 276, 276, 276, 276, 276, 276, 276,
00549                 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277,
00550                 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278,
00551                 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279,
00552                 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280,
00553                 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281,
00554                 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281,
00555                 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282,
00556                 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282,
00557                 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283,
00558                 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283,
00559                 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284,
00560                 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 285};
00561         static const unsigned int lengthBases[] = {3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258};
00562         static const unsigned int distanceBases[30] = 
00563                 {1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577};
00564 
00565         EncodedMatch &m = m_matchBuffer[m_matchBufferEnd++];
00566         assert(length >= 3);
00567         unsigned int lengthCode = lengthCodes[length-3];
00568         m.literalCode = lengthCode;
00569         m.literalExtra = length - lengthBases[lengthCode-257];
00570         unsigned int distanceCode = (unsigned int)(upper_bound(distanceBases, distanceBases+30, distance) - distanceBases - 1);
00571         m.distanceCode = distanceCode;
00572         m.distanceExtra = distance - distanceBases[distanceCode];
00573 
00574         m_literalCounts[lengthCode]++;
00575         m_distanceCounts[distanceCode]++;
00576         m_blockLength += length;
00577 }
00578 
00579 inline unsigned int CodeLengthEncode(const unsigned int *begin, 
00580                                                                          const unsigned int *end, 
00581                                                                          const unsigned int *& p, 
00582                                                                          unsigned int &extraBits, 
00583                                                                          unsigned int &extraBitsLength)
00584 {
00585         unsigned int v = *p;
00586         if ((end-p) >= 3)
00587         {
00588                 const unsigned int *oldp = p;
00589                 if (v==0 && p[1]==0 && p[2]==0)
00590                 {
00591                         for (p=p+3; p!=end && *p==0 && p!=oldp+138; p++) {}
00592                         unsigned int repeat = (unsigned int)(p - oldp);
00593                         if (repeat <= 10)
00594                         {
00595                                 extraBits = repeat-3;
00596                                 extraBitsLength = 3;
00597                                 return 17;
00598                         }
00599                         else
00600                         {
00601                                 extraBits = repeat-11;
00602                                 extraBitsLength = 7;
00603                                 return 18;
00604                         }
00605                 }
00606                 else if (p!=begin && v==p[-1] && v==p[1] && v==p[2])
00607                 {
00608                         for (p=p+3; p!=end && *p==v && p!=oldp+6; p++) {}
00609                         unsigned int repeat = (unsigned int)(p - oldp);
00610                         extraBits = repeat-3;
00611                         extraBitsLength = 2;
00612                         return 16;
00613                 }
00614         }
00615         p++;
00616         extraBits = 0;
00617         extraBitsLength = 0;
00618         return v;
00619 }
00620 
00621 void Deflator::EncodeBlock(bool eof, unsigned int blockType)
00622 {
00623         PutBits(eof, 1);
00624         PutBits(blockType, 2);
00625 
00626         if (blockType == STORED)
00627         {
00628                 assert(m_blockStart + m_blockLength <= m_byteBuffer.size());
00629                 assert(m_blockLength <= 0xffff);
00630                 FlushBitBuffer();
00631                 AttachedTransformation()->PutWord16(m_blockLength, LITTLE_ENDIAN_ORDER);
00632                 AttachedTransformation()->PutWord16(~m_blockLength, LITTLE_ENDIAN_ORDER);
00633                 AttachedTransformation()->Put(m_byteBuffer + m_blockStart, m_blockLength);
00634         }
00635         else
00636         {
00637                 if (blockType == DYNAMIC)
00638                 {
00639 #if defined(_MSC_VER) && !defined(__MWERKS__) && (_MSC_VER <= 1300)
00640                         // VC60 and VC7 workaround: built-in reverse_iterator has two template parameters, Dinkumware only has one
00641                         typedef reverse_bidirectional_iterator<unsigned int *, unsigned int> RevIt;
00642 #elif defined(_RWSTD_NO_CLASS_PARTIAL_SPEC)
00643         typedef reverse_iterator<unsigned int *, random_access_iterator_tag, unsigned int> RevIt;
00644 #else
00645                         typedef reverse_iterator<unsigned int *> RevIt;
00646 #endif
00647 
00648                         FixedSizeSecBlock<unsigned int, 286> literalCodeLengths;
00649                         FixedSizeSecBlock<unsigned int, 30> distanceCodeLengths;
00650 
00651                         m_literalCounts[256] = 1;
00652                         HuffmanEncoder::GenerateCodeLengths(literalCodeLengths, 15, m_literalCounts, 286);
00653                         m_dynamicLiteralEncoder.Initialize(literalCodeLengths, 286);
00654                         unsigned int hlit = (unsigned int)(find_if(RevIt(literalCodeLengths.end()), RevIt(literalCodeLengths.begin()+257), bind2nd(not_equal_to<unsigned int>(), 0)).base() - (literalCodeLengths.begin()+257));
00655 
00656                         HuffmanEncoder::GenerateCodeLengths(distanceCodeLengths, 15, m_distanceCounts, 30);
00657                         m_dynamicDistanceEncoder.Initialize(distanceCodeLengths, 30);
00658                         unsigned int hdist = (unsigned int)(find_if(RevIt(distanceCodeLengths.end()), RevIt(distanceCodeLengths.begin()+1), bind2nd(not_equal_to<unsigned int>(), 0)).base() - (distanceCodeLengths.begin()+1));
00659 
00660                         SecBlockWithHint<unsigned int, 286+30> combinedLengths(hlit+257+hdist+1);
00661                         memcpy(combinedLengths, literalCodeLengths, (hlit+257)*sizeof(unsigned int));
00662                         memcpy(combinedLengths+hlit+257, distanceCodeLengths, (hdist+1)*sizeof(unsigned int));
00663 
00664                         FixedSizeSecBlock<unsigned int, 19> codeLengthCodeCounts, codeLengthCodeLengths;
00665                         fill(codeLengthCodeCounts.begin(), codeLengthCodeCounts.end(), 0);
00666                         const unsigned int *p = combinedLengths.begin(), *begin = combinedLengths.begin(), *end = combinedLengths.end();
00667                         while (p != end)
00668                         {
00669                                 unsigned int code, extraBits, extraBitsLength;
00670                                 code = CodeLengthEncode(begin, end, p, extraBits, extraBitsLength);
00671                                 codeLengthCodeCounts[code]++;
00672                         }
00673                         HuffmanEncoder::GenerateCodeLengths(codeLengthCodeLengths, 7, codeLengthCodeCounts, 19);
00674                         HuffmanEncoder codeLengthEncoder(codeLengthCodeLengths, 19);
00675                         static const unsigned int border[] = {    // Order of the bit length code lengths
00676                                 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
00677                         unsigned int hclen = 19;
00678                         while (hclen > 4 && codeLengthCodeLengths[border[hclen-1]] == 0)
00679                                 hclen--;
00680                         hclen -= 4;
00681 
00682                         PutBits(hlit, 5);
00683                         PutBits(hdist, 5);
00684                         PutBits(hclen, 4);
00685 
00686                         for (unsigned int i=0; i<hclen+4; i++)
00687                                 PutBits(codeLengthCodeLengths[border[i]], 3);
00688 
00689                         p = combinedLengths.begin();
00690                         while (p != end)
00691                         {
00692                                 unsigned int code, extraBits, extraBitsLength;
00693                                 code = CodeLengthEncode(begin, end, p, extraBits, extraBitsLength);
00694                                 codeLengthEncoder.Encode(*this, code);
00695                                 PutBits(extraBits, extraBitsLength);
00696                         }
00697                 }
00698 
00699                 static const unsigned int lengthExtraBits[] = {
00700                         0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
00701                         3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0};
00702                 static const unsigned int distanceExtraBits[] = {
00703                         0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
00704                         7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
00705                         12, 12, 13, 13};
00706 
00707                 const HuffmanEncoder &literalEncoder = (blockType == STATIC) ? m_staticLiteralEncoder : m_dynamicLiteralEncoder;
00708                 const HuffmanEncoder &distanceEncoder = (blockType == STATIC) ? m_staticDistanceEncoder : m_dynamicDistanceEncoder;
00709 
00710                 for (unsigned int i=0; i<m_matchBufferEnd; i++)
00711                 {
00712                         unsigned int literalCode = m_matchBuffer[i].literalCode;
00713                         literalEncoder.Encode(*this, literalCode);
00714                         if (literalCode >= 257)
00715                         {
00716                                 assert(literalCode <= 285);
00717                                 PutBits(m_matchBuffer[i].literalExtra, lengthExtraBits[literalCode-257]);
00718                                 unsigned int distanceCode = m_matchBuffer[i].distanceCode;
00719                                 distanceEncoder.Encode(*this, distanceCode);
00720                                 PutBits(m_matchBuffer[i].distanceExtra, distanceExtraBits[distanceCode]);
00721                         }
00722                 }
00723                 literalEncoder.Encode(*this, 256);      // end of block
00724         }
00725 }
00726 
00727 void Deflator::EndBlock(bool eof)
00728 {
00729         if (m_blockLength == 0 && !eof)
00730                 return;
00731 
00732         if (m_deflateLevel == 0)
00733         {
00734                 EncodeBlock(eof, STORED);
00735 
00736                 if (m_compressibleDeflateLevel > 0 && ++m_detectCount == m_detectSkip)
00737                 {
00738                         m_deflateLevel = m_compressibleDeflateLevel;
00739                         m_detectCount = 1;
00740                 }
00741         }
00742         else
00743         {
00744                 unsigned long storedLen = 8*((unsigned long)m_blockLength+4) + RoundUpToMultipleOf(m_bitsBuffered+3, 8U)-m_bitsBuffered;
00745 
00746                 StartCounting();
00747                 EncodeBlock(eof, STATIC);
00748                 unsigned long staticLen = FinishCounting();
00749 
00750                 unsigned long dynamicLen;
00751                 if (m_blockLength < 128 && m_deflateLevel < 8)
00752                         dynamicLen = ULONG_MAX;
00753                 else
00754                 {
00755                         StartCounting();
00756                         EncodeBlock(eof, DYNAMIC);
00757                         dynamicLen = FinishCounting();
00758                 }
00759 
00760                 if (storedLen <= staticLen && storedLen <= dynamicLen)
00761                 {
00762                         EncodeBlock(eof, STORED);
00763 
00764                         if (m_compressibleDeflateLevel > 0)
00765                         {
00766                                 if (m_detectSkip)
00767                                         m_deflateLevel = 0;
00768                                 m_detectSkip = m_detectSkip ? STDMIN(2*m_detectSkip, 128U) : 1;
00769                         }
00770                 }
00771                 else
00772                 {
00773                         if (staticLen <= dynamicLen)
00774                                 EncodeBlock(eof, STATIC);
00775                         else
00776                                 EncodeBlock(eof, DYNAMIC);
00777 
00778                         if (m_compressibleDeflateLevel > 0)
00779                                 m_detectSkip = 0;
00780                 }
00781         }
00782 
00783         m_matchBufferEnd = 0;
00784         m_blockStart += m_blockLength;
00785         m_blockLength = 0;
00786         fill(m_literalCounts.begin(), m_literalCounts.end(), 0);
00787         fill(m_distanceCounts.begin(), m_distanceCounts.end(), 0);
00788 }
00789 
00790 NAMESPACE_END