Disabled external gits
This commit is contained in:
573
cs440-acg/ext/openexr/OpenEXR/IlmImf/dwaLookups.cpp
Normal file
573
cs440-acg/ext/openexr/OpenEXR/IlmImf/dwaLookups.cpp
Normal file
@@ -0,0 +1,573 @@
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2009-2014 DreamWorks Animation LLC.
|
||||
//
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following disclaimer
|
||||
// in the documentation and/or other materials provided with the
|
||||
// distribution.
|
||||
// * Neither the name of DreamWorks Animation nor the names of
|
||||
// its contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
//
|
||||
// A program to generate various acceleration lookup tables
|
||||
// for Imf::DwaCompressor
|
||||
//
|
||||
|
||||
#include <cstddef>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <vector>
|
||||
|
||||
#include <OpenEXRConfig.h>
|
||||
|
||||
#ifdef OPENEXR_IMF_HAVE_SYSCONF_NPROCESSORS_ONLN
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <half.h>
|
||||
#include <IlmThread.h>
|
||||
#include <IlmThreadSemaphore.h>
|
||||
#include <ImfIO.h>
|
||||
#include <ImfXdr.h>
|
||||
#include "ImfNamespace.h"
|
||||
|
||||
using namespace OPENEXR_IMF_NAMESPACE;
|
||||
|
||||
namespace {
|
||||
|
||||
class LutHeaderWorker
|
||||
{
|
||||
public:
|
||||
class Runner : public IlmThread::Thread
|
||||
{
|
||||
public:
|
||||
Runner(LutHeaderWorker &worker, bool output):
|
||||
IlmThread::Thread(),
|
||||
_worker(worker),
|
||||
_output(output)
|
||||
{
|
||||
start();
|
||||
}
|
||||
|
||||
virtual ~Runner()
|
||||
{
|
||||
_semaphore.wait();
|
||||
}
|
||||
|
||||
virtual void run()
|
||||
{
|
||||
_semaphore.post();
|
||||
_worker.run(_output);
|
||||
}
|
||||
|
||||
private:
|
||||
LutHeaderWorker &_worker;
|
||||
bool _output;
|
||||
IlmThread::Semaphore _semaphore;
|
||||
|
||||
}; // class LutHeaderWorker::Runner
|
||||
|
||||
|
||||
LutHeaderWorker(size_t startValue,
|
||||
size_t endValue):
|
||||
_lastCandidateCount(0),
|
||||
_startValue(startValue),
|
||||
_endValue(endValue),
|
||||
_numElements(0),
|
||||
_offset(new size_t[numValues()]),
|
||||
_elements(new unsigned short[1024*1024*2])
|
||||
{
|
||||
}
|
||||
|
||||
~LutHeaderWorker()
|
||||
{
|
||||
delete[] _offset;
|
||||
delete[] _elements;
|
||||
}
|
||||
|
||||
size_t lastCandidateCount() const
|
||||
{
|
||||
return _lastCandidateCount;
|
||||
}
|
||||
|
||||
size_t numValues() const
|
||||
{
|
||||
return _endValue - _startValue;
|
||||
}
|
||||
|
||||
size_t numElements() const
|
||||
{
|
||||
return _numElements;
|
||||
}
|
||||
|
||||
const size_t* offset() const
|
||||
{
|
||||
return _offset;
|
||||
}
|
||||
|
||||
const unsigned short* elements() const
|
||||
{
|
||||
return _elements;
|
||||
}
|
||||
|
||||
void run(bool outputProgress)
|
||||
{
|
||||
half candidate[16];
|
||||
int candidateCount = 0;
|
||||
|
||||
for (size_t input=_startValue; input<_endValue; ++input) {
|
||||
|
||||
if (outputProgress) {
|
||||
#ifdef __GNUC__
|
||||
if (input % 100 == 0) {
|
||||
fprintf(stderr,
|
||||
" Building acceleration for DwaCompressor, %.2f %% %c",
|
||||
100.*(float)input/(float)numValues(), 13);
|
||||
}
|
||||
#else
|
||||
if (input % 1000 == 0) {
|
||||
fprintf(stderr,
|
||||
" Building acceleration for DwaCompressor, %.2f %%\n",
|
||||
100.*(float)input/(float)numValues());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
int numSetBits = countSetBits(input);
|
||||
half inputHalf, closestHalf;
|
||||
|
||||
inputHalf.setBits(input);
|
||||
|
||||
_offset[input - _startValue] = _numElements;
|
||||
|
||||
// Gather candidates
|
||||
candidateCount = 0;
|
||||
for (int targetNumSetBits=numSetBits-1; targetNumSetBits>=0;
|
||||
--targetNumSetBits) {
|
||||
bool valueFound = false;
|
||||
|
||||
for (int i=0; i<65536; ++i) {
|
||||
if (countSetBits(i) != targetNumSetBits) continue;
|
||||
|
||||
if (!valueFound) {
|
||||
closestHalf.setBits(i);
|
||||
valueFound = true;
|
||||
} else {
|
||||
half tmpHalf;
|
||||
|
||||
tmpHalf.setBits(i);
|
||||
|
||||
if (fabs((float)inputHalf - (float)tmpHalf) <
|
||||
fabs((float)inputHalf - (float)closestHalf)) {
|
||||
closestHalf = tmpHalf;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (valueFound == false) {
|
||||
fprintf(stderr, "bork bork bork!\n");
|
||||
}
|
||||
|
||||
candidate[candidateCount] = closestHalf;
|
||||
candidateCount++;
|
||||
}
|
||||
|
||||
// Sort candidates by increasing number of bits set
|
||||
for (int i=0; i<candidateCount; ++i) {
|
||||
for (int j=i+1; j<candidateCount; ++j) {
|
||||
|
||||
int iCnt = countSetBits(candidate[i].bits());
|
||||
int jCnt = countSetBits(candidate[j].bits());
|
||||
|
||||
if (jCnt < iCnt) {
|
||||
half tmp = candidate[i];
|
||||
candidate[i] = candidate[j];
|
||||
candidate[j] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Copy candidates to the data buffer;
|
||||
for (int i=0; i<candidateCount; ++i) {
|
||||
_elements[_numElements] = candidate[i].bits();
|
||||
_numElements++;
|
||||
}
|
||||
|
||||
if (input == _endValue-1) {
|
||||
_lastCandidateCount = candidateCount;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
size_t _lastCandidateCount;
|
||||
size_t _startValue;
|
||||
size_t _endValue;
|
||||
size_t _numElements;
|
||||
size_t *_offset;
|
||||
unsigned short *_elements;
|
||||
|
||||
//
|
||||
// Precomputing the bit count runs faster than using
|
||||
// the builtin instruction, at least in one case..
|
||||
//
|
||||
// Precomputing 8-bits is no slower than 16-bits,
|
||||
// and saves a fair bit of overhead..
|
||||
//
|
||||
int countSetBits(unsigned short src)
|
||||
{
|
||||
static const unsigned short numBitsSet[256] =
|
||||
{
|
||||
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
|
||||
};
|
||||
|
||||
return numBitsSet[src & 0xff] + numBitsSet[src >> 8];
|
||||
}
|
||||
|
||||
}; // class LutHeaderWorker
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
//
|
||||
// Generate a no-op LUT, to cut down in conditional branches
|
||||
//
|
||||
void
|
||||
generateNoop()
|
||||
{
|
||||
printf("const unsigned short dwaCompressorNoOp[] = \n");
|
||||
printf("{");
|
||||
for (int i=0; i<65536; ++i) {
|
||||
|
||||
if (i % 8 == 0) {
|
||||
printf("\n ");
|
||||
}
|
||||
|
||||
unsigned short dst;
|
||||
char *tmp = (char *)(&dst);
|
||||
|
||||
unsigned short src = (unsigned short)i;
|
||||
Xdr::write <CharPtrIO> (tmp, src);
|
||||
|
||||
printf("0x%04x, ", dst);
|
||||
}
|
||||
printf("\n};\n");
|
||||
}
|
||||
|
||||
//
|
||||
// Nonlinearly encode luminance. For values below 1.0, we want
|
||||
// to use a gamma 2.2 function to match what is fairly common
|
||||
// for storing output referred. However, > 1, gamma functions blow up,
|
||||
// and log functions are much better behaved. We could use a log
|
||||
// function everywhere, but it tends to over-sample dark
|
||||
// regions and undersample the brighter regions, when
|
||||
// compared to the way real devices reproduce values.
|
||||
//
|
||||
// So, above 1, use a log function which is a smooth blend
|
||||
// into the gamma function.
|
||||
//
|
||||
// Nonlinear(linear) =
|
||||
//
|
||||
// linear^(1./2.2) / linear <= 1.0
|
||||
// |
|
||||
// ln(linear)/ln(e^2.2) + 1 \ otherwise
|
||||
//
|
||||
//
|
||||
// toNonlinear[] needs to take in XDR format half float values,
|
||||
// and output NATIVE format float.
|
||||
//
|
||||
// toLinear[] does the opposite - takes in NATIVE half and
|
||||
// outputs XDR half values.
|
||||
//
|
||||
|
||||
void
|
||||
generateToLinear()
|
||||
{
|
||||
unsigned short toLinear[65536];
|
||||
|
||||
toLinear[0] = 0;
|
||||
|
||||
for (int i=1; i<65536; ++i) {
|
||||
half h;
|
||||
float sign = 1;
|
||||
float logBase = pow(2.7182818, 2.2);
|
||||
|
||||
// map NaN and inf to 0
|
||||
if ((i & 0x7c00) == 0x7c00) {
|
||||
toLinear[i] = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
//
|
||||
// _toLinear - assume i is NATIVE, but our output needs
|
||||
// to get flipped to XDR
|
||||
//
|
||||
h.setBits(i);
|
||||
sign = 1;
|
||||
if ((float)h < 0) {
|
||||
sign = -1;
|
||||
}
|
||||
|
||||
if ( fabs( (float)h) <= 1.0 ) {
|
||||
h = (half)(sign * pow((float)fabs((float)h), 2.2f));
|
||||
} else {
|
||||
h = (half)(sign * pow(logBase, (float)(fabs((float)h) - 1.0)));
|
||||
}
|
||||
|
||||
{
|
||||
char *tmp = (char *)(&toLinear[i]);
|
||||
|
||||
Xdr::write <CharPtrIO> ( tmp, h.bits());
|
||||
}
|
||||
}
|
||||
|
||||
printf("const unsigned short dwaCompressorToLinear[] = \n");
|
||||
printf("{");
|
||||
for (int i=0; i<65536; ++i) {
|
||||
if (i % 8 == 0) {
|
||||
printf("\n ");
|
||||
}
|
||||
printf("0x%04x, ", toLinear[i]);
|
||||
}
|
||||
printf("\n};\n");
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
generateToNonlinear()
|
||||
{
|
||||
unsigned short toNonlinear[65536];
|
||||
|
||||
toNonlinear[0] = 0;
|
||||
|
||||
for (int i=1; i<65536; ++i) {
|
||||
unsigned short usNative, usXdr;
|
||||
half h;
|
||||
float sign = 1;
|
||||
float logBase = pow(2.7182818, 2.2);
|
||||
|
||||
usXdr = i;
|
||||
|
||||
{
|
||||
const char *tmp = (char *)(&usXdr);
|
||||
|
||||
Xdr::read<CharPtrIO>(tmp, usNative);
|
||||
}
|
||||
|
||||
// map NaN and inf to 0
|
||||
if ((usNative & 0x7c00) == 0x7c00) {
|
||||
toNonlinear[i] = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
//
|
||||
// toNonlinear - assume i is XDR
|
||||
//
|
||||
h.setBits(usNative);
|
||||
sign = 1;
|
||||
if ((float)h < 0) {
|
||||
sign = -1;
|
||||
}
|
||||
|
||||
if ( fabs( (float)h ) <= 1.0) {
|
||||
h = (half)(sign * pow(fabs((float)h), 1.f/2.2f));
|
||||
} else {
|
||||
h = (half)(sign * ( log(fabs((float)h)) / log(logBase) + 1.0) );
|
||||
}
|
||||
toNonlinear[i] = h.bits();
|
||||
}
|
||||
|
||||
printf("const unsigned short dwaCompressorToNonlinear[] = \n");
|
||||
printf("{");
|
||||
for (int i=0; i<65536; ++i) {
|
||||
if (i % 8 == 0) {
|
||||
printf("\n ");
|
||||
}
|
||||
printf("0x%04x, ", toNonlinear[i]);
|
||||
}
|
||||
printf("\n};\n");
|
||||
}
|
||||
|
||||
//
|
||||
// Attempt to get available CPUs in a somewhat portable way.
|
||||
//
|
||||
|
||||
int
|
||||
cpuCount()
|
||||
{
|
||||
if (!IlmThread::supportsThreads()) return 1;
|
||||
|
||||
int cpuCount = 1;
|
||||
|
||||
#if defined (OPENEXR_IMF_HAVE_SYSCONF_NPROCESSORS_ONLN)
|
||||
|
||||
cpuCount = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
|
||||
#elif defined (_WIN32)
|
||||
|
||||
SYSTEM_INFO sysinfo;
|
||||
GetSystemInfo( &sysinfo );
|
||||
cpuCount = sysinfo.dwNumberOfProcessors;
|
||||
|
||||
#endif
|
||||
|
||||
if (cpuCount < 1) cpuCount = 1;
|
||||
return cpuCount;
|
||||
}
|
||||
|
||||
//
|
||||
// Generate acceleration luts for the quantization.
|
||||
//
|
||||
// For each possible input value, we want to find the closest numbers
|
||||
// which have one fewer bits set than before.
|
||||
//
|
||||
// This gives us num_bits(input)-1 values per input. If we alloc
|
||||
// space for everything, that's like a 2MB table. We can do better
|
||||
// by compressing all the values to be contigious and using offset
|
||||
// pointers.
|
||||
//
|
||||
// After we've found the candidates with fewer bits set, sort them
|
||||
// based on increasing numbers of bits set. This way, on quantize(),
|
||||
// we can scan through the list and halt once we find the first
|
||||
// candidate within the error range. For small values that can
|
||||
// be quantized to 0, 0 is the first value tested and the search
|
||||
// can exit fairly quickly.
|
||||
//
|
||||
|
||||
void
|
||||
generateLutHeader()
|
||||
{
|
||||
std::vector<LutHeaderWorker*> workers;
|
||||
|
||||
size_t numWorkers = cpuCount();
|
||||
size_t workerInterval = 65536 / numWorkers;
|
||||
|
||||
for (size_t i=0; i<numWorkers; ++i) {
|
||||
if (i != numWorkers-1) {
|
||||
workers.push_back( new LutHeaderWorker( i *workerInterval,
|
||||
(i+1)*workerInterval) );
|
||||
} else {
|
||||
workers.push_back( new LutHeaderWorker(i*workerInterval, 65536) );
|
||||
}
|
||||
}
|
||||
|
||||
if (IlmThread::supportsThreads()) {
|
||||
std::vector<LutHeaderWorker::Runner*> runners;
|
||||
for (size_t i=0; i<workers.size(); ++i) {
|
||||
runners.push_back( new LutHeaderWorker::Runner(*workers[i], (i==0)) );
|
||||
}
|
||||
|
||||
for (size_t i=0; i<workers.size(); ++i) {
|
||||
delete runners[i];
|
||||
}
|
||||
} else {
|
||||
for (size_t i=0; i<workers.size(); ++i) {
|
||||
workers[i]->run(i == 0);
|
||||
}
|
||||
}
|
||||
|
||||
printf("static unsigned int closestDataOffset[] = {\n");
|
||||
int offsetIdx = 0;
|
||||
int offsetPrev = 0;
|
||||
for (size_t i=0; i<workers.size(); ++i) {
|
||||
for (size_t value=0; value<workers[i]->numValues(); ++value) {
|
||||
if (offsetIdx % 8 == 0) {
|
||||
printf(" ");
|
||||
}
|
||||
printf("%6lu, ", workers[i]->offset()[value] + offsetPrev);
|
||||
if (offsetIdx % 8 == 7) {
|
||||
printf("\n");
|
||||
}
|
||||
offsetIdx++;
|
||||
}
|
||||
offsetPrev += workers[i]->offset()[workers[i]->numValues()-1] +
|
||||
workers[i]->lastCandidateCount();
|
||||
}
|
||||
printf("};\n\n\n");
|
||||
|
||||
|
||||
printf("static unsigned short closestData[] = {\n");
|
||||
int elementIdx = 0;
|
||||
for (size_t i=0; i<workers.size(); ++i) {
|
||||
for (size_t element=0; element<workers[i]->numElements(); ++element) {
|
||||
if (elementIdx % 8 == 0) {
|
||||
printf(" ");
|
||||
}
|
||||
printf("%5d, ", workers[i]->elements()[element]);
|
||||
if (elementIdx % 8 == 7) {
|
||||
printf("\n");
|
||||
}
|
||||
elementIdx++;
|
||||
}
|
||||
}
|
||||
printf("};\n\n\n");
|
||||
|
||||
for (size_t i=0; i<workers.size(); ++i) {
|
||||
delete workers[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
printf("#include <cstddef>\n");
|
||||
printf("\n\n\n");
|
||||
|
||||
generateNoop();
|
||||
|
||||
printf("\n\n\n");
|
||||
|
||||
generateToLinear();
|
||||
|
||||
printf("\n\n\n");
|
||||
|
||||
generateToNonlinear();
|
||||
|
||||
printf("\n\n\n");
|
||||
|
||||
generateLutHeader();
|
||||
|
||||
return 0;
|
||||
}
|
Reference in New Issue
Block a user