EVOLUTION-MANAGER
Edit File: Huffman.cpp
/* Copyright 2015 Esri Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. A local copy of the license and additional notices are located with the source distribution at: http://github.com/Esri/lerc/ Contributors: Thomas Maurer */ #include "Huffman.h" #include "BitStuffer2.h" #include <queue> #include <cstring> #include <algorithm> using namespace std; NAMESPACE_LERC_START // -------------------------------------------------------------------------- ; bool Huffman::ComputeCodes(const vector<int>& histo) { if (histo.empty() || histo.size() >= m_maxHistoSize) return false; priority_queue<Node, vector<Node>, less<Node> > pq; int numNodes = 0; int size = (int)histo.size(); for (int i = 0; i < size; i++) // add all leaf nodes if (histo[i] > 0) pq.push(Node((short)i, histo[i])); if (pq.size() < 2) // histo has only 0 or 1 bin that is not empty; quit Huffman and give it to Lerc return false; while (pq.size() > 1) // build the Huffman tree { Node* child0 = new Node(pq.top()); numNodes++; pq.pop(); Node* child1 = new Node(pq.top()); numNodes++; pq.pop(); pq.push(Node(child0, child1)); } m_codeTable.resize(size); memset(&m_codeTable[0], 0, size * sizeof(m_codeTable[0])); if (!pq.top().TreeToLUT(0, 0, m_codeTable)) // fill the LUT return false; Node nodeNonConst = pq.top(); nodeNonConst.FreeTree(numNodes); if (numNodes != 0) // check the ref count return false; if (!ConvertCodesToCanonical()) return false; return true; } // -------------------------------------------------------------------------- ; bool Huffman::ComputeCompressedSize(const std::vector<int>& histo, int& numBytes, double& avgBpp) const { if (histo.empty() || histo.size() >= m_maxHistoSize) return false; numBytes = 0; if (!ComputeNumBytesCodeTable(numBytes)) // header and code table return false; int numBits = 0, numElem = 0; int size = (int)histo.size(); for (int i = 0; i < size; i++) if (histo[i] > 0) { numBits += histo[i] * m_codeTable[i].first; numElem += histo[i]; } /* to please Coverity about potential divide by zero below */ if( numElem == 0 ) return false; int numUInts = ((((numBits + 7) >> 3) + 3) >> 2) + 1; // add one more as the decode LUT can read ahead numBytes += 4 * numUInts; // data huffman coded avgBpp = 8 * numBytes / (double)numElem; return true; } // -------------------------------------------------------------------------- ; bool Huffman::SetCodes(const vector<pair<short, unsigned int> >& codeTable) { if (codeTable.empty() || codeTable.size() >= m_maxHistoSize) return false; m_codeTable = codeTable; return true; } // -------------------------------------------------------------------------- ; bool Huffman::WriteCodeTable(Byte** ppByte) const { if (!ppByte) return false; int i0, i1, maxLen; if (!GetRange(i0, i1, maxLen)) return false; int size = (int)m_codeTable.size(); vector<unsigned int> dataVec(i1 - i0, 0); for (int i = i0; i < i1; i++) { int k = GetIndexWrapAround(i, size); dataVec[i - i0] = (unsigned int)m_codeTable[k].first; } // header vector<int> intVec; intVec.push_back(3); // huffman version, 3 uses canonical huffman codes intVec.push_back(size); intVec.push_back(i0); // code range intVec.push_back(i1); Byte* ptr = *ppByte; for (size_t i = 0; i < intVec.size(); i++) { *((int*)ptr) = intVec[i]; ptr += sizeof(int); } BitStuffer2 bitStuffer2; if (!bitStuffer2.EncodeSimple(&ptr, dataVec)) // code lengths, bit stuffed return false; if (!BitStuffCodes(&ptr, i0, i1)) // variable length codes, bit stuffed return false; *ppByte = ptr; return true; } // -------------------------------------------------------------------------- ; bool Huffman::ReadCodeTable(const Byte** ppByte) { if (!ppByte || !(*ppByte)) return false; const Byte* ptr = *ppByte; int version = *((int*)ptr); // version ptr += sizeof(int); if (version < 2) // allow forward compatibility return false; vector<int> intVec(4, 0); for (size_t i = 1; i < intVec.size(); i++) { intVec[i] = *((int*)ptr); ptr += sizeof(int); } int size = intVec[1]; int i0 = intVec[2]; int i1 = intVec[3]; if (i0 >= i1 || size > (int)m_maxHistoSize) return false; vector<unsigned int> dataVec(i1 - i0, 0); BitStuffer2 bitStuffer2; if (!bitStuffer2.Decode(&ptr, dataVec)) // unstuff the code lengths return false; m_codeTable.resize(size); memset(&m_codeTable[0], 0, size * sizeof(m_codeTable[0])); for (int i = i0; i < i1; i++) { int k = GetIndexWrapAround(i, size); m_codeTable[k].first = (short)dataVec[i - i0]; } if (!BitUnStuffCodes(&ptr, i0, i1)) // unstuff the codes return false; *ppByte = ptr; return true; } // -------------------------------------------------------------------------- ; bool Huffman::BuildTreeFromCodes(int& numBitsLUT) { int i0, i1, maxLen; if (!GetRange(i0, i1, maxLen)) return false; // build decode LUT using max of 12 bits int size = (int)m_codeTable.size(); bool bNeedTree = maxLen > m_maxNumBitsLUT; numBitsLUT = min(maxLen, m_maxNumBitsLUT); m_decodeLUT.clear(); const int lutMaxSize = 1 << numBitsLUT; m_decodeLUT.assign(lutMaxSize, pair<short, short>((short)-1, (short)-1)); for (int i = i0; i < i1; i++) { int k = GetIndexWrapAround(i, size); int len = m_codeTable[k].first; if (len > 0 && len <= numBitsLUT) { int code = m_codeTable[k].second << (numBitsLUT - len); pair<short, short> entry((short)len, (short)k); int numEntries = 1 << (numBitsLUT - len); for (int j = 0; j < numEntries; j++) m_decodeLUT[code | j] = entry; // add the duplicates } } if (!bNeedTree) // decode LUT covers it all, no tree needed return true; // go over the codes too long for the LUT and count how many leading bits are 0 for all of them m_numBitsToSkipInTree = 32; for (int i = i0; i < i1; i++) { int k = GetIndexWrapAround(i, size); int len = m_codeTable[k].first; if (len > 0 && len > numBitsLUT) // only codes not in the decode LUT { unsigned int code = m_codeTable[k].second; int shift = 1; while (code >> shift) shift++; m_numBitsToSkipInTree = min(m_numBitsToSkipInTree, len - shift); } } /* int numNodesCreated = 1; */ Node emptyNode((short)-1, 0); if (!m_root) m_root = new Node(emptyNode); for (int i = i0; i < i1; i++) { int k = GetIndexWrapAround(i, size); int len = m_codeTable[k].first; if (len > 0 && len > numBitsLUT) // add only codes not in the decode LUT { unsigned int code = m_codeTable[k].second; Node* node = m_root; int j = len - m_numBitsToSkipInTree; // reduce len by number of leading bits from above while (--j >= 0) // go over the bits { if (code & (1 << j)) { if (!node->child1) { node->child1 = new Node(emptyNode); /* numNodesCreated++; */ } node = node->child1; } else { if (!node->child0) { node->child0 = new Node(emptyNode); /* numNodesCreated++; */ } node = node->child0; } if (j == 0) // last bit, leaf node { node->value = (short)k; // set the value } } } } return true; } // -------------------------------------------------------------------------- ; void Huffman::Clear() { m_codeTable.clear(); m_decodeLUT.clear(); if (m_root) { int n = 0; m_root->FreeTree(n); delete m_root; m_root = NULL; } } // -------------------------------------------------------------------------- ; // -------------------------------------------------------------------------- ; bool Huffman::ComputeNumBytesCodeTable(int& numBytes) const { int i0, i1, maxLen; if (!GetRange(i0, i1, maxLen)) return false; int size = (int)m_codeTable.size(); int sum = 0; for (int i = i0; i < i1; i++) { int k = GetIndexWrapAround(i, size); sum += m_codeTable[k].first; } numBytes = 4 * sizeof(int); // version, size, first bin, (last + 1) bin BitStuffer2 bitStuffer2; numBytes += bitStuffer2.ComputeNumBytesNeededSimple((unsigned int)(i1 - i0), (unsigned int)maxLen); // code lengths int numUInts = (((sum + 7) >> 3) + 3) >> 2; numBytes += 4 * numUInts; // byte array with the codes bit stuffed return true; } // -------------------------------------------------------------------------- ; bool Huffman::GetRange(int& i0, int& i1, int& maxCodeLength) const { if (m_codeTable.empty() || m_codeTable.size() >= m_maxHistoSize) return false; // first, check for peak somewhere in the middle with 0 stretches left and right int size = (int)m_codeTable.size(); int i = 0; while (i < size && m_codeTable[i].first == 0) i++; i0 = i; i = size - 1; while (i >= 0 && m_codeTable[i].first == 0) i--; i1 = i + 1; // exclusive if (i1 <= i0) return false; // second, cover the common case that the peak is close to 0 pair<int, int> segm(0, 0); int j = 0; while (j < size) // find the largest stretch of 0's, if any { while (j < size && m_codeTable[j].first > 0) j++; int k0 = j; while (j < size && m_codeTable[j].first == 0) j++; int k1 = j; if (k1 - k0 > segm.second) segm = pair<int, int>(k0, k1 - k0); } if (size - segm.second < i1 - i0) { i0 = segm.first + segm.second; i1 = segm.first + size; // do wrap around } if (i1 <= i0) return false; int maxLen = 0; for (i = i0; i < i1; i++) { int k = GetIndexWrapAround(i, size); int len = m_codeTable[k].first; maxLen = max(maxLen, len); } if (maxLen <= 0 || maxLen > 32) return false; maxCodeLength = maxLen; return true; } // -------------------------------------------------------------------------- ; bool Huffman::BitStuffCodes(Byte** ppByte, int i0, int i1) const { if (!ppByte) return false; unsigned int* arr = (unsigned int*)(*ppByte); unsigned int* dstPtr = arr; int size = (int)m_codeTable.size(); int bitPos = 0; for (int i = i0; i < i1; i++) { int k = GetIndexWrapAround(i, size); int len = m_codeTable[k].first; if (len > 0) { unsigned int val = m_codeTable[k].second; if (32 - bitPos >= len) { if (bitPos == 0) *dstPtr = 0; *dstPtr |= val << (32 - bitPos - len); bitPos += len; if (bitPos == 32) { bitPos = 0; dstPtr++; } } else { bitPos += len - 32; *dstPtr++ |= val >> bitPos; *dstPtr = val << (32 - bitPos); } } } size_t numUInts = dstPtr - arr + (bitPos > 0 ? 1 : 0); *ppByte += numUInts * sizeof(unsigned int); return true; } // -------------------------------------------------------------------------- ; bool Huffman::BitUnStuffCodes(const Byte** ppByte, int i0, int i1) { if (!ppByte || !(*ppByte)) return false; const unsigned int* arr = (const unsigned int*)(*ppByte); const unsigned int* srcPtr = arr; int size = (int)m_codeTable.size(); int bitPos = 0; for (int i = i0; i < i1; i++) { int k = GetIndexWrapAround(i, size); int len = m_codeTable[k].first; if (len > 0) { m_codeTable[k].second = ((*srcPtr) << bitPos) >> (32 - len); if (32 - bitPos >= len) { bitPos += len; // cppcheck-suppress shiftTooManyBits if (bitPos == 32) { bitPos = 0; srcPtr++; } } else { bitPos += len - 32; srcPtr++; m_codeTable[k].second |= (*srcPtr) >> (32 - bitPos); } } } size_t numUInts = srcPtr - arr + (bitPos > 0 ? 1 : 0); *ppByte += numUInts * sizeof(unsigned int); return true; } // -------------------------------------------------------------------------- ; struct MyLargerThanOp { inline bool operator() (const pair<int, int>& p0, const pair<int, int>& p1) { return p0.first > p1.first; } }; // -------------------------------------------------------------------------- ; bool Huffman::ConvertCodesToCanonical() { // from the non canonical code book, create an array to be sorted in descending order: // codeLength * tableSize - index int tableSize = (int)m_codeTable.size(); vector<pair<int, int> > sortVec(tableSize); memset(&sortVec[0], 0, tableSize * sizeof(pair<int, int>)); for (int i = 0; i < tableSize; i++) if (m_codeTable[i].first > 0) sortVec[i] = pair<int, int>(m_codeTable[i].first * tableSize - i, i); // sort descending std::sort(sortVec.begin(), sortVec.end(), MyLargerThanOp()); // create canonical codes and assign to orig code table unsigned int codeCanonical = 0; int index = sortVec[0].second; short codeLen = m_codeTable[index].first; int i = 0; while (i < tableSize && sortVec[i].first > 0) { index = sortVec[i++].second; short delta = codeLen - m_codeTable[index].first; codeCanonical >>= delta; codeLen -= delta; m_codeTable[index].second = codeCanonical++; } return true; } // -------------------------------------------------------------------------- ; NAMESPACE_LERC_END