/////////////////////////////////////////////////////////////////////////// // // Copyright (c) 2009-2014 DreamWorks Animation LLC. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of DreamWorks Animation nor the names of // its contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // /////////////////////////////////////////////////////////////////////////// #include "ImfFastHuf.h" #include #include #include #include #include OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_ENTER // // Adapted from hufUnpackEncTable - // We don't need to reconstruct the code book, just the encoded // lengths for each symbol. From the lengths, we can build the // base + offset tables. This should be a bit more efficient // for sparse code books. // // table - ptr to the start of the code length data. Will be // updated as we decode data // // numBytes - size of the encoded table (I think)? // // minSymbol - smallest symbol in the code book // // maxSymbol - largest symbol in the code book. // // rleSymbol - the symbol to trigger RLE in the encoded bitstream // FastHufDecoder::FastHufDecoder (const char *&table, int numBytes, int minSymbol, int maxSymbol, int rleSymbol) : _rleSymbol (rleSymbol), _numSymbols (0), _minCodeLength (255), _maxCodeLength (0), _idToSymbol (0) { // // List of symbols that we find with non-zero code lengths // (listed in the order we find them). Store these in the // same format as the code book stores codes + lengths - // low 6 bits are the length, everything above that is // the symbol. // std::vector symbols; // // The 'base' table is the minimum code at each code length. base[i] // is the smallest code (numerically) of length i. // Int64 base[MAX_CODE_LEN + 1]; // // The 'offset' table is the position (in sorted order) of the first id // of a given code lenght. Array is indexed by code length, like base. // Int64 offset[MAX_CODE_LEN + 1]; // // Count of how many codes at each length there are. Array is // indexed by code length, like base and offset. // size_t codeCount[MAX_CODE_LEN + 1]; for (int i = 0; i <= MAX_CODE_LEN; ++i) { codeCount[i] = 0; base[i] = 0xffffffffffffffffULL; offset[i] = 0; } // // Count the number of codes, the min/max code lengths, the number of // codes with each length, and record symbols with non-zero code // length as we find them. // const char *currByte = table; Int64 currBits = 0; int currBitCount = 0; const int SHORT_ZEROCODE_RUN = 59; const int LONG_ZEROCODE_RUN = 63; const int SHORTEST_LONG_RUN = 2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN; for (Int64 symbol = minSymbol; symbol <= maxSymbol; symbol++) { if (currByte - table > numBytes) { throw Iex::InputExc ("Error decoding Huffman table " "(Truncated table data)."); } // // Next code length - either: // 0-58 (literal code length) // 59-62 (various lengths runs of 0) // 63 (run of n 0's, with n is the next 8 bits) // Int64 codeLen = readBits (6, currBits, currBitCount, currByte); if (codeLen == (Int64) LONG_ZEROCODE_RUN) { if (currByte - table > numBytes) { throw Iex::InputExc ("Error decoding Huffman table " "(Truncated table data)."); } int runLen = readBits (8, currBits, currBitCount, currByte) + SHORTEST_LONG_RUN; if (symbol + runLen > maxSymbol + 1) { throw Iex::InputExc ("Error decoding Huffman table " "(Run beyond end of table)."); } symbol += runLen - 1; } else if (codeLen >= (Int64) SHORT_ZEROCODE_RUN) { int runLen = codeLen - SHORT_ZEROCODE_RUN + 2; if (symbol + runLen > maxSymbol + 1) { throw Iex::InputExc ("Error decoding Huffman table " "(Run beyond end of table)."); } symbol += runLen - 1; } else if (codeLen != 0) { symbols.push_back ((symbol << 6) | (codeLen & 63)); if (codeLen < _minCodeLength) _minCodeLength = codeLen; if (codeLen > _maxCodeLength) _maxCodeLength = codeLen; codeCount[codeLen]++; } } for (int i = 0; i < MAX_CODE_LEN; ++i) _numSymbols += codeCount[i]; table = currByte; // // Compute base - once we have the code length counts, there // is a closed form solution for this // { double* countTmp = new double[_maxCodeLength+1]; for (int l = _minCodeLength; l <= _maxCodeLength; ++l) { countTmp[l] = (double)codeCount[l] * (double)(2 << (_maxCodeLength-l)); } for (int l = _minCodeLength; l <= _maxCodeLength; ++l) { double tmp = 0; for (int k =l + 1; k <= _maxCodeLength; ++k) tmp += countTmp[k]; tmp /= (double)(2 << (_maxCodeLength - l)); base[l] = (Int64)ceil (tmp); } delete [] countTmp; } // // Compute offset - these are the positions of the first // id (not symbol) that has length [i] // offset[_maxCodeLength] = 0; for (int i= _maxCodeLength - 1; i >= _minCodeLength; i--) offset[i] = offset[i + 1] + codeCount[i + 1]; // // Allocate and fill the symbol-to-id mapping. Smaller Ids should be // mapped to less-frequent symbols (which have longer codes). Use // the offset table to tell us where the id's for a given code // length start off. // _idToSymbol = new int[_numSymbols]; Int64 mapping[MAX_CODE_LEN + 1]; for (int i = 0; i < MAX_CODE_LEN + 1; ++i) mapping[i] = -1; for (int i = _minCodeLength; i <= _maxCodeLength; ++i) mapping[i] = offset[i]; for (std::vector::const_iterator i = symbols.begin(); i != symbols.end(); ++i) { int codeLen = *i & 63; int symbol = *i >> 6; if (mapping[codeLen] >= _numSymbols) throw Iex::InputExc ("Huffman decode error " "(Invalid symbol in header)."); _idToSymbol[mapping[codeLen]] = symbol; mapping[codeLen]++; } buildTables(base, offset); } FastHufDecoder::~FastHufDecoder() { delete[] _idToSymbol; } // // Static check if the decoder is enabled. // // ATM, I only have access to little endian hardware for testing, // so I'm not entirely sure that we are reading fom the bit stream // properly on BE. // // If you happen to have more obscure hardware, check that the // byte swapping in refill() is happening sensable, add an endian // check if needed, and fix the preprocessor magic here. // #define READ64(c) \ ((Int64)(c)[0] << 56) | ((Int64)(c)[1] << 48) | ((Int64)(c)[2] << 40) | \ ((Int64)(c)[3] << 32) | ((Int64)(c)[4] << 24) | ((Int64)(c)[5] << 16) | \ ((Int64)(c)[6] << 8) | ((Int64)(c)[7] ) #ifdef __INTEL_COMPILER // ICC built-in swap for LE hosts #if defined (__i386__) || defined(__x86_64__) #undef READ64 #define READ64(c) _bswap64 (*(const Int64*)(c)) #endif #endif bool FastHufDecoder::enabled() { #if defined(__INTEL_COMPILER) || defined(__GNUC__) // // Enabled for ICC, GCC: // __i386__ -> x86 // __x86_64__ -> 64-bit x86 // #if defined (__i386__) || defined(__x86_64__) return true; #else return false; #endif #elif defined (_MSC_VER) // // Enabled for Visual Studio: // _M_IX86 -> x86 // _M_X64 -> 64bit x86 #if defined (_M_IX86) || defined(_M_X64) return true; #else return false; #endif #else // // Unknown compiler - Be safe and disable. // return false; #endif } // // // Built the acceleration tables for lookups on the upper bits // as well as the 'LJ' tables. // void FastHufDecoder::buildTables (Int64 *base, Int64 *offset) { // // Build the 'left justified' base table, by shifting base left.. // for (int i = 0; i <= MAX_CODE_LEN; ++i) { if (base[i] != 0xffffffffffffffffULL) { _ljBase[i] = base[i] << (64 - i); } else { // // Unused code length - insert dummy values // _ljBase[i] = 0xffffffffffffffffULL; } } // // Combine some terms into a big fat constant, which for // lack of a better term we'll call the 'left justified' // offset table (because it serves the same function // as 'offset', when using the left justified base table. // for (int i = 0; i <= MAX_CODE_LEN; ++i) _ljOffset[i] = offset[i] - (_ljBase[i] >> (64 - i)); // // Build the acceleration tables for the lookups of // short codes ( <= TABLE_LOOKUP_BITS long) // for (Int64 i = 0; i < 1 << TABLE_LOOKUP_BITS; ++i) { Int64 value = i << (64 - TABLE_LOOKUP_BITS); _tableSymbol[i] = 0xffff; _tableCodeLen[i] = 0; for (int codeLen = _minCodeLength; codeLen <= _maxCodeLength; ++codeLen) { if (_ljBase[codeLen] <= value) { _tableCodeLen[i] = codeLen; Int64 id = _ljOffset[codeLen] + (value >> (64 - codeLen)); if (id < _numSymbols) { _tableSymbol[i] = _idToSymbol[id]; } else { throw Iex::InputExc ("Huffman decode error " "(Overrun)."); } break; } } } // // Store the smallest value in the table that points to real data. // This should be the entry for the largest length that has // valid data (in our case, non-dummy _ljBase) // int minIdx = TABLE_LOOKUP_BITS; while (minIdx > 0 && _ljBase[minIdx] == 0xffffffffffffffffULL) minIdx--; if (minIdx < 0) { // // Error, no codes with lengths 0-TABLE_LOOKUP_BITS used. // Set the min value such that the table is never tested. // _tableMin = 0xffffffffffffffffULL; } else { _tableMin = _ljBase[minIdx]; } } // // For decoding, we're holding onto 2 Int64's. // // The first (buffer), holds the next bits from the bitstream to be // decoded. For certain paths in the decoder, we only need TABLE_LOOKUP_BITS // valid bits to decode the next symbol. For other paths, we need a full // 64-bits to decode a symbol. // // When we need to refill 'buffer', we could pull bits straight from // the bitstream. But this is very slow and requires lots of book keeping // (what's the next bit in the next byte?). Instead, we keep another Int64 // around that we use to refill from. While this doesn't cut down on the // book keeping (still need to know how many valid bits), it does cut // down on some of the bit shifting crazy and byte access. // // The refill Int64 (bufferBack) gets left-shifted after we've pulled // off bits. If we run out of bits in the input bit stream, we just // shift in 0's to bufferBack. // // The refill act takes numBits from the top of bufferBack and sticks // them in the bottom of buffer. If there arn't enough bits in bufferBack, // it gets refilled (to 64-bits) from the input bitstream. // inline void FastHufDecoder::refill (Int64 &buffer, int numBits, // number of bits to refill Int64 &bufferBack, // the next 64-bits, to refill from int &bufferBackNumBits, // number of bits left in bufferBack const unsigned char *&currByte, // current byte in the bitstream int &currBitsLeft) // number of bits left in the bitsream { // // Refill bits into the bottom of buffer, from the top of bufferBack. // Always top up buffer to be completely full. // buffer |= bufferBack >> (64 - numBits); if (bufferBackNumBits < numBits) { numBits -= bufferBackNumBits; // // Refill all of bufferBack from the bitstream. Either grab // a full 64-bit chunk, or whatever bytes are left. If we // don't have 64-bits left, pad with 0's. // if (currBitsLeft >= 64) { bufferBack = READ64 (currByte); bufferBackNumBits = 64; currByte += sizeof (Int64); currBitsLeft -= 8 * sizeof (Int64); } else { bufferBack = 0; bufferBackNumBits = 64; Int64 shift = 56; while (currBitsLeft > 0) { bufferBack |= ((Int64)(*currByte)) << shift; currByte++; shift -= 8; currBitsLeft -= 8; } // // At this point, currBitsLeft might be negative, just because // we're subtracting whole bytes. To keep anyone from freaking // out, zero the counter. // if (currBitsLeft < 0) currBitsLeft = 0; } buffer |= bufferBack >> (64 - numBits); } bufferBack = bufferBack << numBits; bufferBackNumBits -= numBits; // // We can have cases where the previous shift of bufferBack is << 64 - // in which case no shift occurs. The bit count math still works though, // so if we don't have any bits left, zero out bufferBack. // if (bufferBackNumBits == 0) bufferBack = 0; } // // Read the next few bits out of a bitstream. Will be given a backing buffer // (buffer) that may still have data left over from previous reads // (bufferNumBits). Bitstream pointer (currByte) will be advanced when needed. // inline Int64 FastHufDecoder::readBits (int numBits, Int64 &buffer, // c int &bufferNumBits, // lc const char *&currByte) // in { while (bufferNumBits < numBits) { buffer = (buffer << 8) | *(unsigned char*)(currByte++); bufferNumBits += 8; } bufferNumBits -= numBits; return (buffer >> bufferNumBits) & ((1 << numBits) - 1); } // // Decode using a the 'One-Shift' strategy for decoding, with a // small-ish table to accelerate decoding of short codes. // // If possible, try looking up codes into the acceleration table. // This has a few benifits - there's no search involved; We don't // need an additional lookup to map id to symbol; we don't need // a full 64-bits (so less refilling). // void FastHufDecoder::decode (const unsigned char *src, int numSrcBits, unsigned short *dst, int numDstElems) { if (numSrcBits < 128) throw Iex::InputExc ("Error choosing Huffman decoder implementation " "(insufficient number of bits)."); // // Current position (byte/bit) in the src data stream // (after the first buffer fill) // const unsigned char *currByte = src + 2 * sizeof (Int64); numSrcBits -= 8 * 2 * sizeof (Int64); // // 64-bit buffer holding the current bits in the stream // Int64 buffer = READ64 (src); int bufferNumBits = 64; // // 64-bit buffer holding the next bits in the stream // Int64 bufferBack = READ64 ((src + sizeof (Int64))); int bufferBackNumBits = 64; int dstIdx = 0; while (dstIdx < numDstElems) { int codeLen; int symbol; // // Test if we can be table accelerated. If so, directly // lookup the output symbol. Otherwise, we need to fall // back to searching for the code. // // If we're doing table lookups, we don't really need // a re-filled buffer, so long as we have TABLE_LOOKUP_BITS // left. But for a search, we do need a refilled table. // if (_tableMin <= buffer) { int tableIdx = buffer >> (64 - TABLE_LOOKUP_BITS); // // For invalid codes, _tableCodeLen[] should return 0. This // will cause the decoder to get stuck in the current spot // until we run out of elements, then barf that the codestream // is bad. So we don't need to stick a condition like // if (codeLen > _maxCodeLength) in this inner. // codeLen = _tableCodeLen[tableIdx]; symbol = _tableSymbol[tableIdx]; } else { if (bufferNumBits < 64) { refill (buffer, 64 - bufferNumBits, bufferBack, bufferBackNumBits, currByte, numSrcBits); bufferNumBits = 64; } // // Brute force search: // Find the smallest length where _ljBase[length] <= buffer // codeLen = TABLE_LOOKUP_BITS + 1; while (_ljBase[codeLen] > buffer && codeLen <= _maxCodeLength) codeLen++; if (codeLen > _maxCodeLength) { throw Iex::InputExc ("Huffman decode error " "(Decoded an invalid symbol)."); } Int64 id = _ljOffset[codeLen] + (buffer >> (64 - codeLen)); if (id < _numSymbols) { symbol = _idToSymbol[id]; } else { throw Iex::InputExc ("Huffman decode error " "(Decoded an invalid symbol)."); } } // // Shift over bit stream, and update the bit count in the buffer // buffer = buffer << codeLen; bufferNumBits -= codeLen; // // If we recieved a RLE symbol (_rleSymbol), then we need // to read ahead 8 bits to know how many times to repeat // the previous symbol. Need to ensure we at least have // 8 bits of data in the buffer // if (symbol == _rleSymbol) { if (bufferNumBits < 8) { refill (buffer, 64 - bufferNumBits, bufferBack, bufferBackNumBits, currByte, numSrcBits); bufferNumBits = 64; } int rleCount = buffer >> 56; if (dstIdx < 1) { throw Iex::InputExc ("Huffman decode error (RLE code " "with no previous symbol)."); } if (dstIdx + rleCount > numDstElems) { throw Iex::InputExc ("Huffman decode error (Symbol run " "beyond expected output buffer length)."); } if (rleCount <= 0) { throw Iex::InputExc("Huffman decode error" " (Invalid RLE length)"); } for (int i = 0; i < rleCount; ++i) dst[dstIdx + i] = dst[dstIdx - 1]; dstIdx += rleCount; buffer = buffer << 8; bufferNumBits -= 8; } else { dst[dstIdx] = symbol; dstIdx++; } // // refill bit stream buffer if we're below the number of // bits needed for a table lookup // if (bufferNumBits < TABLE_LOOKUP_BITS) { refill (buffer, 64 - bufferNumBits, bufferBack, bufferBackNumBits, currByte, numSrcBits); bufferNumBits = 64; } } if (numSrcBits != 0) { throw Iex::InputExc ("Huffman decode error (Compressed data remains " "after filling expected output buffer)."); } } OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_EXIT