zdeflate.cpp

00001 // zdeflate.cpp - written and placed in the public domain by Wei Dai
00002 
00003 // Many of the algorithms and tables used here came from the deflate implementation
00004 // by Jean-loup Gailly, which was included in Crypto++ 4.0 and earlier. I completely
00005 // rewrote it in order to fix a bug that I could not figure out. This code
00006 // is less clever, but hopefully more understandable and maintainable.
00007 
00008 #include "pch.h"
00009 #include "zdeflate.h"
00010 #include <functional>
00011 
00012 NAMESPACE_BEGIN(CryptoPP)
00013 
00014 using namespace std;
00015 
00016 LowFirstBitWriter::LowFirstBitWriter(BufferedTransformation *attachment)
00017         : Filter(attachment), m_counting(false), m_buffer(0), m_bitsBuffered(0), m_bytesBuffered(0)
00018 {
00019 }
00020 
00021 void LowFirstBitWriter::StartCounting()
00022 {
00023         assert(!m_counting);
00024         m_counting = true;
00025         m_bitCount = 0;
00026 }
00027 
00028 unsigned long LowFirstBitWriter::FinishCounting()
00029 {
00030         assert(m_counting);
00031         m_counting = false;
00032         return m_bitCount;
00033 }
00034 
00035 void LowFirstBitWriter::PutBits(unsigned long value, unsigned int length)
00036 {
00037         if (m_counting)
00038                 m_bitCount += length;
00039         else
00040         {
00041                 m_buffer |= value << m_bitsBuffered;
00042                 m_bitsBuffered += length;
00043                 assert(m_bitsBuffered <= sizeof(unsigned long)*8);
00044                 while (m_bitsBuffered >= 8)
00045                 {
00046                         m_outputBuffer[m_bytesBuffered++] = (byte)m_buffer;
00047                         if (m_bytesBuffered == m_outputBuffer.size())
00048                         {
00049                                 AttachedTransformation()->PutModifiable(m_outputBuffer, m_bytesBuffered);
00050                                 m_bytesBuffered = 0;
00051                         }
00052                         m_buffer >>= 8;
00053                         m_bitsBuffered -= 8;
00054                 }
00055         }
00056 }
00057 
00058 void LowFirstBitWriter::FlushBitBuffer()
00059 {
00060         if (m_counting)
00061                 m_bitCount += 8*(m_bitsBuffered > 0);
00062         else
00063         {
00064                 if (m_bytesBuffered > 0)
00065                 {
00066                         AttachedTransformation()->PutModifiable(m_outputBuffer, m_bytesBuffered);
00067                         m_bytesBuffered = 0;
00068                 }
00069                 if (m_bitsBuffered > 0)
00070                 {
00071                         AttachedTransformation()->Put((byte)m_buffer);
00072                         m_buffer = 0;
00073                         m_bitsBuffered = 0;
00074                 }
00075         }
00076 }
00077 
00078 void LowFirstBitWriter::ClearBitBuffer()
00079 {
00080         m_buffer = 0;
00081         m_bytesBuffered = 0;
00082         m_bitsBuffered = 0;
00083 }
00084 
00085 HuffmanEncoder::HuffmanEncoder(const unsigned int *codeBits, unsigned int nCodes)
00086 {
00087         Initialize(codeBits, nCodes);
00088 }
00089 
00090 struct HuffmanNode
00091 {
00092         size_t symbol;
00093         union {size_t parent; unsigned depth, freq;};
00094 };
00095 
00096 struct FreqLessThan
00097 {
00098         inline bool operator()(unsigned int lhs, const HuffmanNode &rhs) {return lhs < rhs.freq;}
00099         inline bool operator()(const HuffmanNode &lhs, const HuffmanNode &rhs) const {return lhs.freq < rhs.freq;}
00100         // needed for MSVC .NET 2005
00101         inline bool operator()(const HuffmanNode &lhs, unsigned int rhs) {return lhs.freq < rhs;}
00102 };
00103 
00104 void HuffmanEncoder::GenerateCodeLengths(unsigned int *codeBits, unsigned int maxCodeBits, const unsigned int *codeCounts, size_t nCodes)
00105 {
00106         assert(nCodes > 0);
00107         assert(nCodes <= ((size_t)1 << maxCodeBits));
00108 
00109         size_t i;
00110         SecBlockWithHint<HuffmanNode, 2*286> tree(nCodes);
00111         for (i=0; i<nCodes; i++)
00112         {
00113                 tree[i].symbol = i;
00114                 tree[i].freq = codeCounts[i];
00115         }
00116         sort(tree.begin(), tree.end(), FreqLessThan());
00117         size_t treeBegin = upper_bound(tree.begin(), tree.end(), 0, FreqLessThan()) - tree.begin();
00118         if (treeBegin == nCodes)
00119         {       // special case for no codes
00120                 fill(codeBits, codeBits+nCodes, 0);
00121                 return;
00122         }
00123         tree.resize(nCodes + nCodes - treeBegin - 1);
00124 
00125         size_t leastLeaf = treeBegin, leastInterior = nCodes;
00126         for (i=nCodes; i<tree.size(); i++)
00127         {
00128                 size_t least;
00129                 least = (leastLeaf == nCodes || (leastInterior < i && tree[leastInterior].freq < tree[leastLeaf].freq)) ? leastInterior++ : leastLeaf++;
00130                 tree[i].freq = tree[least].freq;
00131                 tree[least].parent = i;
00132                 least = (leastLeaf == nCodes || (leastInterior < i && tree[leastInterior].freq < tree[leastLeaf].freq)) ? leastInterior++ : leastLeaf++;
00133                 tree[i].freq += tree[least].freq;
00134                 tree[least].parent = i;
00135         }
00136 
00137         tree[tree.size()-1].depth = 0;
00138         if (tree.size() >= 2)
00139                 for (i=tree.size()-2; i>=nCodes; i--)
00140                         tree[i].depth = tree[tree[i].parent].depth + 1;
00141         unsigned int sum = 0;
00142         SecBlockWithHint<unsigned int, 15+1> blCount(maxCodeBits+1);
00143         fill(blCount.begin(), blCount.end(), 0);
00144         for (i=treeBegin; i<nCodes; i++)
00145         {
00146                 size_t depth = STDMIN(maxCodeBits, tree[tree[i].parent].depth + 1);
00147                 blCount[depth]++;
00148                 sum += 1 << (maxCodeBits - depth);
00149         }
00150 
00151         unsigned int overflow = sum > (unsigned int)(1 << maxCodeBits) ? sum - (1 << maxCodeBits) : 0;
00152 
00153         while (overflow--)
00154         {
00155                 unsigned int bits = maxCodeBits-1;
00156                 while (blCount[bits] == 0)
00157                         bits--;
00158                 blCount[bits]--;
00159                 blCount[bits+1] += 2;
00160                 assert(blCount[maxCodeBits] > 0);
00161                 blCount[maxCodeBits]--;
00162         }
00163 
00164         for (i=0; i<treeBegin; i++)
00165                 codeBits[tree[i].symbol] = 0;
00166         unsigned int bits = maxCodeBits;
00167         for (i=treeBegin; i<nCodes; i++)
00168         {
00169                 while (blCount[bits] == 0)
00170                         bits--;
00171                 codeBits[tree[i].symbol] = bits;
00172                 blCount[bits]--;
00173         }
00174         assert(blCount[bits] == 0);
00175 }
00176 
00177 void HuffmanEncoder::Initialize(const unsigned int *codeBits, unsigned int nCodes)
00178 {
00179         assert(nCodes > 0);
00180         unsigned int maxCodeBits = *max_element(codeBits, codeBits+nCodes);
00181         if (maxCodeBits == 0)
00182                 return;         // assume this object won't be used
00183 
00184         SecBlockWithHint<unsigned int, 15+1> blCount(maxCodeBits+1);
00185         fill(blCount.begin(), blCount.end(), 0);
00186         unsigned int i;
00187         for (i=0; i<nCodes; i++)
00188                 blCount[codeBits[i]]++;
00189 
00190         code_t code = 0;
00191         SecBlockWithHint<code_t, 15+1> nextCode(maxCodeBits+1);
00192         nextCode[1] = 0;
00193         for (i=2; i<=maxCodeBits; i++)
00194         {
00195                 code = (code + blCount[i-1]) << 1;
00196                 nextCode[i] = code;
00197         }
00198         assert(maxCodeBits == 1 || code == (1 << maxCodeBits) - blCount[maxCodeBits]);
00199 
00200         m_valueToCode.resize(nCodes);
00201         for (i=0; i<nCodes; i++)
00202         {
00203                 unsigned int len = m_valueToCode[i].len = codeBits[i];
00204                 if (len != 0)
00205                         m_valueToCode[i].code = BitReverse(nextCode[len]++) >> (8*sizeof(code_t)-len);
00206         }
00207 }
00208 
00209 inline void HuffmanEncoder::Encode(LowFirstBitWriter &writer, value_t value) const
00210 {
00211         assert(m_valueToCode[value].len > 0);
00212         writer.PutBits(m_valueToCode[value].code, m_valueToCode[value].len);
00213 }
00214 
00215 Deflator::Deflator(BufferedTransformation *attachment, int deflateLevel, int log2WindowSize, bool detectUncompressible)
00216         : LowFirstBitWriter(attachment)
00217 {
00218         InitializeStaticEncoders();
00219         IsolatedInitialize(MakeParameters("DeflateLevel", deflateLevel)("Log2WindowSize", log2WindowSize)("DetectUncompressible", detectUncompressible));
00220 }
00221 
00222 Deflator::Deflator(const NameValuePairs &parameters, BufferedTransformation *attachment)
00223         : LowFirstBitWriter(attachment)
00224         , m_deflateLevel(-1)
00225 {
00226         InitializeStaticEncoders();
00227         IsolatedInitialize(parameters);
00228 }
00229 
00230 void Deflator::InitializeStaticEncoders()
00231 {
00232         unsigned int codeLengths[288];
00233         fill(codeLengths + 0, codeLengths + 144, 8);
00234         fill(codeLengths + 144, codeLengths + 256, 9);
00235         fill(codeLengths + 256, codeLengths + 280, 7);
00236         fill(codeLengths + 280, codeLengths + 288, 8);
00237         m_staticLiteralEncoder.Initialize(codeLengths, 288);
00238         fill(codeLengths + 0, codeLengths + 32, 5);
00239         m_staticDistanceEncoder.Initialize(codeLengths, 32);
00240 }
00241 
00242 void Deflator::IsolatedInitialize(const NameValuePairs &parameters)
00243 {
00244         int log2WindowSize = parameters.GetIntValueWithDefault("Log2WindowSize", DEFAULT_LOG2_WINDOW_SIZE);
00245         if (!(MIN_LOG2_WINDOW_SIZE <= log2WindowSize && log2WindowSize <= MAX_LOG2_WINDOW_SIZE))
00246                 throw InvalidArgument("Deflator: " + IntToString(log2WindowSize) + " is an invalid window size");
00247 
00248         m_log2WindowSize = log2WindowSize;
00249         DSIZE = 1 << m_log2WindowSize;
00250         DMASK = DSIZE - 1;
00251         HSIZE = 1 << m_log2WindowSize;
00252         HMASK = HSIZE - 1;
00253         m_byteBuffer.New(2*DSIZE);
00254         m_head.New(HSIZE);
00255         m_prev.New(DSIZE);
00256         m_matchBuffer.New(DSIZE/2);
00257         Reset(true);
00258 
00259         SetDeflateLevel(parameters.GetIntValueWithDefault("DeflateLevel", DEFAULT_DEFLATE_LEVEL));
00260         bool detectUncompressible = parameters.GetValueWithDefault("DetectUncompressible", true);
00261         m_compressibleDeflateLevel = detectUncompressible ? m_deflateLevel : 0;
00262 }
00263 
00264 void Deflator::Reset(bool forceReset)
00265 {
00266         if (forceReset)
00267                 ClearBitBuffer();
00268         else
00269                 assert(m_bitsBuffered == 0);
00270 
00271         m_headerWritten = false;
00272         m_matchAvailable = false;
00273         m_dictionaryEnd = 0;
00274         m_stringStart = 0;
00275         m_lookahead = 0;
00276         m_minLookahead = MAX_MATCH;
00277         m_matchBufferEnd = 0;
00278         m_blockStart = 0;
00279         m_blockLength = 0;
00280 
00281         m_detectCount = 1;
00282         m_detectSkip = 0;
00283 
00284         // m_prev will be initialized automaticly in InsertString
00285         fill(m_head.begin(), m_head.end(), 0);
00286 
00287         fill(m_literalCounts.begin(), m_literalCounts.end(), 0);
00288         fill(m_distanceCounts.begin(), m_distanceCounts.end(), 0);
00289 }
00290 
00291 void Deflator::SetDeflateLevel(int deflateLevel)
00292 {
00293         if (!(MIN_DEFLATE_LEVEL <= deflateLevel && deflateLevel <= MAX_DEFLATE_LEVEL))
00294                 throw InvalidArgument("Deflator: " + IntToString(deflateLevel) + " is an invalid deflate level");
00295 
00296         if (deflateLevel == m_deflateLevel)
00297                 return;
00298 
00299         EndBlock(false);
00300 
00301         static const unsigned int configurationTable[10][4] = {
00302                 /*      good lazy nice chain */
00303                 /* 0 */ {0,    0,  0,    0},  /* store only */
00304                 /* 1 */ {4,    3,  8,    4},  /* maximum speed, no lazy matches */
00305                 /* 2 */ {4,    3, 16,    8},
00306                 /* 3 */ {4,    3, 32,   32},
00307                 /* 4 */ {4,    4, 16,   16},  /* lazy matches */
00308                 /* 5 */ {8,   16, 32,   32},
00309                 /* 6 */ {8,   16, 128, 128},
00310                 /* 7 */ {8,   32, 128, 256},
00311                 /* 8 */ {32, 128, 258, 1024},
00312                 /* 9 */ {32, 258, 258, 4096}}; /* maximum compression */
00313 
00314         GOOD_MATCH = configurationTable[deflateLevel][0];
00315         MAX_LAZYLENGTH = configurationTable[deflateLevel][1];
00316         MAX_CHAIN_LENGTH = configurationTable[deflateLevel][3];
00317 
00318         m_deflateLevel = deflateLevel;
00319 }
00320 
00321 unsigned int Deflator::FillWindow(const byte *str, size_t length)
00322 {
00323         unsigned int maxBlockSize = (unsigned int)STDMIN(2UL*DSIZE, 0xffffUL);
00324 
00325         if (m_stringStart >= maxBlockSize - MAX_MATCH)
00326         {
00327                 if (m_blockStart < DSIZE)
00328                         EndBlock(false);
00329 
00330                 memcpy(m_byteBuffer, m_byteBuffer + DSIZE, DSIZE);
00331 
00332                 m_dictionaryEnd = m_dictionaryEnd < DSIZE ? 0 : m_dictionaryEnd-DSIZE;
00333                 assert(m_stringStart >= DSIZE);
00334                 m_stringStart -= DSIZE;
00335                 assert(!m_matchAvailable || m_previousMatch >= DSIZE);
00336                 m_previousMatch -= DSIZE;
00337                 assert(m_blockStart >= DSIZE);
00338                 m_blockStart -= DSIZE;
00339 
00340                 unsigned int i;
00341 
00342                 for (i=0; i<HSIZE; i++)
00343                         m_head[i] = SaturatingSubtract(m_head[i], DSIZE);
00344 
00345                 for (i=0; i<DSIZE; i++)
00346                         m_prev[i] = SaturatingSubtract(m_prev[i], DSIZE);
00347         }
00348 
00349         assert(maxBlockSize > m_stringStart+m_lookahead);
00350         unsigned int accepted = UnsignedMin(maxBlockSize-(m_stringStart+m_lookahead), length);
00351         assert(accepted > 0);
00352         memcpy(m_byteBuffer + m_stringStart + m_lookahead, str, accepted);
00353         m_lookahead += accepted;
00354         return accepted;
00355 }
00356 
00357 inline unsigned int Deflator::ComputeHash(const byte *str) const
00358 {
00359         assert(str+3 <= m_byteBuffer + m_stringStart + m_lookahead);
00360         return ((str[0] << 10) ^ (str[1] << 5) ^ str[2]) & HMASK;
00361 }
00362 
00363 unsigned int Deflator::LongestMatch(unsigned int &bestMatch) const
00364 {
00365         assert(m_previousLength < MAX_MATCH);
00366 
00367         bestMatch = 0;
00368         unsigned int bestLength = STDMAX(m_previousLength, (unsigned int)MIN_MATCH-1);
00369         if (m_lookahead <= bestLength)
00370                 return 0;
00371 
00372         const byte *scan = m_byteBuffer + m_stringStart, *scanEnd = scan + STDMIN((unsigned int)MAX_MATCH, m_lookahead);
00373         unsigned int limit = m_stringStart > (DSIZE-MAX_MATCH) ? m_stringStart - (DSIZE-MAX_MATCH) : 0;
00374         unsigned int current = m_head[ComputeHash(scan)];
00375 
00376         unsigned int chainLength = MAX_CHAIN_LENGTH;
00377         if (m_previousLength >= GOOD_MATCH)
00378                 chainLength >>= 2;
00379 
00380         while (current > limit && --chainLength > 0)
00381         {
00382                 const byte *match = m_byteBuffer + current;
00383                 assert(scan + bestLength < m_byteBuffer + m_stringStart + m_lookahead);
00384                 if (scan[bestLength-1] == match[bestLength-1] && scan[bestLength] == match[bestLength] && scan[0] == match[0] && scan[1] == match[1])
00385                 {
00386                         assert(scan[2] == match[2]);
00387                         unsigned int len = (unsigned int)(
00388 #if defined(_STDEXT_BEGIN) && !(defined(_MSC_VER) && _MSC_VER < 1400) && !defined(_STLPORT_VERSION)
00389                                 stdext::unchecked_mismatch
00390 #else
00391                                 std::mismatch
00392 #endif
00393                                 (scan+3, scanEnd, match+3).first - scan);
00394                         assert(len != bestLength);
00395                         if (len > bestLength)
00396                         {
00397                                 bestLength = len;
00398                                 bestMatch = current;
00399                                 if (len == (scanEnd - scan))
00400                                         break;
00401                         }
00402                 }
00403                 current = m_prev[current & DMASK];
00404         }
00405         return (bestMatch > 0) ? bestLength : 0;
00406 }
00407 
00408 inline void Deflator::InsertString(unsigned int start)
00409 {
00410         unsigned int hash = ComputeHash(m_byteBuffer + start);
00411         m_prev[start & DMASK] = m_head[hash];
00412         m_head[hash] = start;
00413 }
00414 
00415 void Deflator::ProcessBuffer()
00416 {
00417         if (!m_headerWritten)
00418         {
00419                 WritePrestreamHeader();
00420                 m_headerWritten = true;
00421         }
00422 
00423         if (m_deflateLevel == 0)
00424         {
00425                 m_stringStart += m_lookahead;
00426                 m_lookahead = 0;
00427                 m_blockLength = m_stringStart - m_blockStart;
00428                 m_matchAvailable = false;
00429                 return;
00430         }
00431 
00432         while (m_lookahead > m_minLookahead)
00433         {
00434                 while (m_dictionaryEnd < m_stringStart && m_dictionaryEnd+3 <= m_stringStart+m_lookahead)
00435                         InsertString(m_dictionaryEnd++);
00436 
00437                 if (m_matchAvailable)
00438                 {
00439                         unsigned int matchPosition, matchLength;
00440                         bool usePreviousMatch;
00441                         if (m_previousLength >= MAX_LAZYLENGTH)
00442                                 usePreviousMatch = true;
00443                         else
00444                         {
00445                                 matchLength = LongestMatch(matchPosition);
00446                                 usePreviousMatch = (matchLength == 0);
00447                         }
00448                         if (usePreviousMatch)
00449                         {
00450                                 MatchFound(m_stringStart-1-m_previousMatch, m_previousLength);
00451                                 m_stringStart += m_previousLength-1;
00452                                 m_lookahead -= m_previousLength-1;
00453                                 m_matchAvailable = false;
00454                         }
00455                         else
00456                         {
00457                                 m_previousLength = matchLength;
00458                                 m_previousMatch = matchPosition;
00459                                 LiteralByte(m_byteBuffer[m_stringStart-1]);
00460                                 m_stringStart++;
00461                                 m_lookahead--;
00462                         }
00463                 }
00464                 else
00465                 {
00466                         m_previousLength = 0;
00467                         m_previousLength = LongestMatch(m_previousMatch);
00468                         if (m_previousLength)
00469                                 m_matchAvailable = true;
00470                         else
00471                                 LiteralByte(m_byteBuffer[m_stringStart]);
00472                         m_stringStart++;
00473                         m_lookahead--;
00474                 }
00475 
00476                 assert(m_stringStart - (m_blockStart+m_blockLength) == (unsigned int)m_matchAvailable);
00477         }
00478 
00479         if (m_minLookahead == 0 && m_matchAvailable)
00480         {
00481                 LiteralByte(m_byteBuffer[m_stringStart-1]);
00482                 m_matchAvailable = false;
00483         }
00484 }
00485 
00486 size_t Deflator::Put2(const byte *str, size_t length, int messageEnd, bool blocking)
00487 {
00488         if (!blocking)
00489                 throw BlockingInputOnly("Deflator");
00490 
00491         size_t accepted = 0;
00492         while (accepted < length)
00493         {
00494                 unsigned int newAccepted = FillWindow(str+accepted, length-accepted);
00495                 ProcessBuffer();
00496                 // call ProcessUncompressedData() after WritePrestreamHeader()
00497                 ProcessUncompressedData(str+accepted, newAccepted);
00498                 accepted += newAccepted;
00499         }
00500         assert(accepted == length);
00501 
00502         if (messageEnd)
00503         {
00504                 m_minLookahead = 0;
00505                 ProcessBuffer();
00506                 EndBlock(true);
00507                 FlushBitBuffer();
00508                 WritePoststreamTail();
00509                 Reset();
00510         }
00511 
00512         Output(0, NULL, 0, messageEnd, blocking);
00513         return 0;
00514 }
00515 
00516 bool Deflator::IsolatedFlush(bool hardFlush, bool blocking)
00517 {
00518         if (!blocking)
00519                 throw BlockingInputOnly("Deflator");
00520 
00521         m_minLookahead = 0;
00522         ProcessBuffer();
00523         m_minLookahead = MAX_MATCH;
00524         EndBlock(false);
00525         if (hardFlush)
00526                 EncodeBlock(false, STORED);
00527         return false;
00528 }
00529 
00530 void Deflator::LiteralByte(byte b)
00531 {
00532         if (m_matchBufferEnd == m_matchBuffer.size())
00533                 EndBlock(false);
00534 
00535         m_matchBuffer[m_matchBufferEnd++].literalCode = b;
00536         m_literalCounts[b]++;
00537         m_blockLength++;
00538 }
00539 
00540 void Deflator::MatchFound(unsigned int distance, unsigned int length)
00541 {
00542         if (m_matchBufferEnd == m_matchBuffer.size())
00543                 EndBlock(false);
00544 
00545         static const unsigned int lengthCodes[] = {
00546                 257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268, 268,
00547                 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272, 272, 272,
00548                 273, 273, 273, 273, 273, 273, 273, 273, 274, 274, 274, 274, 274, 274, 274, 274,
00549                 275, 275, 275, 275, 275, 275, 275, 275, 276, 276, 276, 276, 276, 276, 276, 276,
00550                 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277,
00551                 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278,
00552                 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279,
00553                 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280,
00554                 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281,
00555                 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281,
00556                 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282,
00557                 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282,
00558                 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283,
00559                 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283,
00560                 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284,
00561                 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 285};
00562         static const unsigned int lengthBases[] = {3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258};
00563         static const unsigned int distanceBases[30] = 
00564                 {1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577};
00565 
00566         EncodedMatch &m = m_matchBuffer[m_matchBufferEnd++];
00567         assert(length >= 3);
00568         unsigned int lengthCode = lengthCodes[length-3];
00569         m.literalCode = lengthCode;
00570         m.literalExtra = length - lengthBases[lengthCode-257];
00571         unsigned int distanceCode = (unsigned int)(upper_bound(distanceBases, distanceBases+30, distance) - distanceBases - 1);
00572         m.distanceCode = distanceCode;
00573         m.distanceExtra = distance - distanceBases[distanceCode];
00574 
00575         m_literalCounts[lengthCode]++;
00576         m_distanceCounts[distanceCode]++;
00577         m_blockLength += length;
00578 }
00579 
00580 inline unsigned int CodeLengthEncode(const unsigned int *begin, 
00581                                                                          const unsigned int *end, 
00582                                                                          const unsigned int *& p, 
00583                                                                          unsigned int &extraBits, 
00584                                                                          unsigned int &extraBitsLength)
00585 {
00586         unsigned int v = *p;
00587         if ((end-p) >= 3)
00588         {
00589                 const unsigned int *oldp = p;
00590                 if (v==0 && p[1]==0 && p[2]==0)
00591                 {
00592                         for (p=p+3; p!=end && *p==0 && p!=oldp+138; p++) {}
00593                         unsigned int repeat = (unsigned int)(p - oldp);
00594                         if (repeat <= 10)
00595                         {
00596                                 extraBits = repeat-3;
00597                                 extraBitsLength = 3;
00598                                 return 17;
00599                         }
00600                         else
00601                         {
00602                                 extraBits = repeat-11;
00603                                 extraBitsLength = 7;
00604                                 return 18;
00605                         }
00606                 }
00607                 else if (p!=begin && v==p[-1] && v==p[1] && v==p[2])
00608                 {
00609                         for (p=p+3; p!=end && *p==v && p!=oldp+6; p++) {}
00610                         unsigned int repeat = (unsigned int)(p - oldp);
00611                         extraBits = repeat-3;
00612                         extraBitsLength = 2;
00613                         return 16;
00614                 }
00615         }
00616         p++;
00617         extraBits = 0;
00618         extraBitsLength = 0;
00619         return v;
00620 }
00621 
00622 void Deflator::EncodeBlock(bool eof, unsigned int blockType)
00623 {
00624         PutBits(eof, 1);
00625         PutBits(blockType, 2);
00626 
00627         if (blockType == STORED)
00628         {
00629                 assert(m_blockStart + m_blockLength <= m_byteBuffer.size());
00630                 assert(m_blockLength <= 0xffff);
00631                 FlushBitBuffer();
00632                 AttachedTransformation()->PutWord16(m_blockLength, LITTLE_ENDIAN_ORDER);
00633                 AttachedTransformation()->PutWord16(~m_blockLength, LITTLE_ENDIAN_ORDER);
00634                 AttachedTransformation()->Put(m_byteBuffer + m_blockStart, m_blockLength);
00635         }
00636         else
00637         {
00638                 if (blockType == DYNAMIC)
00639                 {
00640 #if defined(_MSC_VER) && !defined(__MWERKS__) && (_MSC_VER <= 1300)
00641                         // VC60 and VC7 workaround: built-in reverse_iterator has two template parameters, Dinkumware only has one
00642                         typedef reverse_bidirectional_iterator<unsigned int *, unsigned int> RevIt;
00643 #elif defined(_RWSTD_NO_CLASS_PARTIAL_SPEC)
00644         typedef reverse_iterator<unsigned int *, random_access_iterator_tag, unsigned int> RevIt;
00645 #else
00646                         typedef reverse_iterator<unsigned int *> RevIt;
00647 #endif
00648 
00649                         FixedSizeSecBlock<unsigned int, 286> literalCodeLengths;
00650                         FixedSizeSecBlock<unsigned int, 30> distanceCodeLengths;
00651 
00652                         m_literalCounts[256] = 1;
00653                         HuffmanEncoder::GenerateCodeLengths(literalCodeLengths, 15, m_literalCounts, 286);
00654                         m_dynamicLiteralEncoder.Initialize(literalCodeLengths, 286);
00655                         unsigned int hlit = (unsigned int)(find_if(RevIt(literalCodeLengths.end()), RevIt(literalCodeLengths.begin()+257), bind2nd(not_equal_to<unsigned int>(), 0)).base() - (literalCodeLengths.begin()+257));
00656 
00657                         HuffmanEncoder::GenerateCodeLengths(distanceCodeLengths, 15, m_distanceCounts, 30);
00658                         m_dynamicDistanceEncoder.Initialize(distanceCodeLengths, 30);
00659                         unsigned int hdist = (unsigned int)(find_if(RevIt(distanceCodeLengths.end()), RevIt(distanceCodeLengths.begin()+1), bind2nd(not_equal_to<unsigned int>(), 0)).base() - (distanceCodeLengths.begin()+1));
00660 
00661                         SecBlockWithHint<unsigned int, 286+30> combinedLengths(hlit+257+hdist+1);
00662                         memcpy(combinedLengths, literalCodeLengths, (hlit+257)*sizeof(unsigned int));
00663                         memcpy(combinedLengths+hlit+257, distanceCodeLengths, (hdist+1)*sizeof(unsigned int));
00664 
00665                         FixedSizeSecBlock<unsigned int, 19> codeLengthCodeCounts, codeLengthCodeLengths;
00666                         fill(codeLengthCodeCounts.begin(), codeLengthCodeCounts.end(), 0);
00667                         const unsigned int *p = combinedLengths.begin(), *begin = combinedLengths.begin(), *end = combinedLengths.end();
00668                         while (p != end)
00669                         {
00670                                 unsigned int code, extraBits, extraBitsLength;
00671                                 code = CodeLengthEncode(begin, end, p, extraBits, extraBitsLength);
00672                                 codeLengthCodeCounts[code]++;
00673                         }
00674                         HuffmanEncoder::GenerateCodeLengths(codeLengthCodeLengths, 7, codeLengthCodeCounts, 19);
00675                         HuffmanEncoder codeLengthEncoder(codeLengthCodeLengths, 19);
00676                         static const unsigned int border[] = {    // Order of the bit length code lengths
00677                                 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
00678                         unsigned int hclen = 19;
00679                         while (hclen > 4 && codeLengthCodeLengths[border[hclen-1]] == 0)
00680                                 hclen--;
00681                         hclen -= 4;
00682 
00683                         PutBits(hlit, 5);
00684                         PutBits(hdist, 5);
00685                         PutBits(hclen, 4);
00686 
00687                         for (unsigned int i=0; i<hclen+4; i++)
00688                                 PutBits(codeLengthCodeLengths[border[i]], 3);
00689 
00690                         p = combinedLengths.begin();
00691                         while (p != end)
00692                         {
00693                                 unsigned int code, extraBits, extraBitsLength;
00694                                 code = CodeLengthEncode(begin, end, p, extraBits, extraBitsLength);
00695                                 codeLengthEncoder.Encode(*this, code);
00696                                 PutBits(extraBits, extraBitsLength);
00697                         }
00698                 }
00699 
00700                 static const unsigned int lengthExtraBits[] = {
00701                         0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
00702                         3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0};
00703                 static const unsigned int distanceExtraBits[] = {
00704                         0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
00705                         7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
00706                         12, 12, 13, 13};
00707 
00708                 const HuffmanEncoder &literalEncoder = (blockType == STATIC) ? m_staticLiteralEncoder : m_dynamicLiteralEncoder;
00709                 const HuffmanEncoder &distanceEncoder = (blockType == STATIC) ? m_staticDistanceEncoder : m_dynamicDistanceEncoder;
00710 
00711                 for (unsigned int i=0; i<m_matchBufferEnd; i++)
00712                 {
00713                         unsigned int literalCode = m_matchBuffer[i].literalCode;
00714                         literalEncoder.Encode(*this, literalCode);
00715                         if (literalCode >= 257)
00716                         {
00717                                 assert(literalCode <= 285);
00718                                 PutBits(m_matchBuffer[i].literalExtra, lengthExtraBits[literalCode-257]);
00719                                 unsigned int distanceCode = m_matchBuffer[i].distanceCode;
00720                                 distanceEncoder.Encode(*this, distanceCode);
00721                                 PutBits(m_matchBuffer[i].distanceExtra, distanceExtraBits[distanceCode]);
00722                         }
00723                 }
00724                 literalEncoder.Encode(*this, 256);      // end of block
00725         }
00726 }
00727 
00728 void Deflator::EndBlock(bool eof)
00729 {
00730         if (m_blockLength == 0 && !eof)
00731                 return;
00732 
00733         if (m_deflateLevel == 0)
00734         {
00735                 EncodeBlock(eof, STORED);
00736 
00737                 if (m_compressibleDeflateLevel > 0 && ++m_detectCount == m_detectSkip)
00738                 {
00739                         m_deflateLevel = m_compressibleDeflateLevel;
00740                         m_detectCount = 1;
00741                 }
00742         }
00743         else
00744         {
00745                 unsigned long storedLen = 8*((unsigned long)m_blockLength+4) + RoundUpToMultipleOf(m_bitsBuffered+3, 8U)-m_bitsBuffered;
00746 
00747                 StartCounting();
00748                 EncodeBlock(eof, STATIC);
00749                 unsigned long staticLen = FinishCounting();
00750 
00751                 unsigned long dynamicLen;
00752                 if (m_blockLength < 128 && m_deflateLevel < 8)
00753                         dynamicLen = ULONG_MAX;
00754                 else
00755                 {
00756                         StartCounting();
00757                         EncodeBlock(eof, DYNAMIC);
00758                         dynamicLen = FinishCounting();
00759                 }
00760 
00761                 if (storedLen <= staticLen && storedLen <= dynamicLen)
00762                 {
00763                         EncodeBlock(eof, STORED);
00764 
00765                         if (m_compressibleDeflateLevel > 0)
00766                         {
00767                                 if (m_detectSkip)
00768                                         m_deflateLevel = 0;
00769                                 m_detectSkip = m_detectSkip ? STDMIN(2*m_detectSkip, 128U) : 1;
00770                         }
00771                 }
00772                 else
00773                 {
00774                         if (staticLen <= dynamicLen)
00775                                 EncodeBlock(eof, STATIC);
00776                         else
00777                                 EncodeBlock(eof, DYNAMIC);
00778 
00779                         if (m_compressibleDeflateLevel > 0)
00780                                 m_detectSkip = 0;
00781                 }
00782         }
00783 
00784         m_matchBufferEnd = 0;
00785         m_blockStart += m_blockLength;
00786         m_blockLength = 0;
00787         fill(m_literalCounts.begin(), m_literalCounts.end(), 0);
00788         fill(m_distanceCounts.begin(), m_distanceCounts.end(), 0);
00789 }
00790 
00791 NAMESPACE_END

Generated on Fri Feb 6 00:56:26 2009 for Crypto++ by  doxygen 1.4.7