544 lines
17 KiB
C++
544 lines
17 KiB
C++
///////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Copyright (c) 2009-2014 DreamWorks Animation LLC.
|
|
//
|
|
// All rights reserved.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met:
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above
|
|
// copyright notice, this list of conditions and the following disclaimer
|
|
// in the documentation and/or other materials provided with the
|
|
// distribution.
|
|
// * Neither the name of DreamWorks Animation nor the names of
|
|
// its contributors may be used to endorse or promote products derived
|
|
// from this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
#include <half.h>
|
|
#include <math.h>
|
|
#include <string.h>
|
|
#include <ImfDwaCompressorSimd.h>
|
|
#include <ImfSystemSpecific.h>
|
|
#include <ImathRandom.h>
|
|
#include <iostream>
|
|
#include <assert.h>
|
|
|
|
using namespace OPENEXR_IMF_NAMESPACE;
|
|
using namespace IMATH_NAMESPACE;
|
|
using namespace std;
|
|
|
|
|
|
namespace
|
|
{
|
|
|
|
void
|
|
dumpBuffer (const SimdAlignedBuffer64f &buffer)
|
|
{
|
|
for (int i=0; i<8; ++i)
|
|
{
|
|
for (int j=0; j<8; ++j)
|
|
{
|
|
cout << buffer._buffer[i*8+j] << " ";
|
|
}
|
|
cout << endl;
|
|
}
|
|
}
|
|
|
|
void
|
|
compareBuffer (const SimdAlignedBuffer64f &src,
|
|
const SimdAlignedBuffer64f &dst,
|
|
const float errThresh)
|
|
{
|
|
for (int i=0; i<64; ++i)
|
|
{
|
|
double diff = fabs(src._buffer[i] - dst._buffer[i]);
|
|
|
|
if (diff > errThresh)
|
|
{
|
|
cout << scientific;
|
|
cout << "Error exceeded threshold on element " << i << endl;
|
|
cout << " diff: " << diff << endl;
|
|
cout << "Goal (src): " << scientific << endl;
|
|
dumpBuffer(src);
|
|
cout << "Test (dst): " << endl;
|
|
dumpBuffer(dst);
|
|
|
|
assert(false);
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
compareBufferRelative (const SimdAlignedBuffer64f &src,
|
|
const SimdAlignedBuffer64f &dst,
|
|
const float relErrThresh,
|
|
const float absErrThresh)
|
|
{
|
|
for (int i=0; i<64; ++i)
|
|
{
|
|
double diff = fabs(src._buffer[i] - dst._buffer[i]);
|
|
double relDiff = diff / fabs(src._buffer[i]);
|
|
|
|
if (relDiff > relErrThresh && diff > absErrThresh)
|
|
{
|
|
cout << scientific;
|
|
cout << "Error exceeded threshold on element " << i << endl;
|
|
cout << " diff: " << diff << " relErr: " << fixed << 100.0*relDiff << " %" << endl;
|
|
cout << "Goal (src): " << scientific << endl;
|
|
dumpBuffer(src);
|
|
cout << "Test (dst): " << endl;
|
|
dumpBuffer(dst);
|
|
|
|
assert(false);
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// Test that we can round trip CSC data with reasonable precision
|
|
//
|
|
void
|
|
testCsc()
|
|
{
|
|
const int numIter = 1000000;
|
|
Rand48 rand48(0);
|
|
SimdAlignedBuffer64f orig[3];
|
|
SimdAlignedBuffer64f test[3];
|
|
|
|
cout << " Color Space Conversion Round Trip " << endl;
|
|
cout << " csc709Forward64() - 64 x csc709Inverse()" << endl;
|
|
for (int iter=0; iter<numIter; ++iter)
|
|
{
|
|
for (int i=0; i<64; ++i)
|
|
{
|
|
test[0]._buffer[i] = orig[0]._buffer[i] = rand48.nextf();
|
|
test[1]._buffer[i] = orig[1]._buffer[i] = rand48.nextf();
|
|
test[2]._buffer[i] = orig[2]._buffer[i] = rand48.nextf();
|
|
}
|
|
|
|
csc709Forward64(test[0]._buffer, test[1]._buffer, test[2]._buffer);
|
|
for (int i=0; i<64; ++i)
|
|
{
|
|
csc709Inverse(test[0]._buffer[i], test[1]._buffer[i], test[2]._buffer[i]);
|
|
}
|
|
|
|
compareBuffer(orig[0], test[0], 1e-3);
|
|
compareBuffer(orig[1], test[1], 1e-3);
|
|
compareBuffer(orig[2], test[2], 1e-3);
|
|
|
|
} // iter
|
|
|
|
cout << " csc709Forward64() - csc709Inverse64()" << endl;
|
|
for (int iter=0; iter<numIter; ++iter)
|
|
{
|
|
for (int i=0; i<64; ++i)
|
|
{
|
|
test[0]._buffer[i] = orig[0]._buffer[i] = rand48.nextf();
|
|
test[1]._buffer[i] = orig[1]._buffer[i] = rand48.nextf();
|
|
test[2]._buffer[i] = orig[2]._buffer[i] = rand48.nextf();
|
|
}
|
|
|
|
csc709Forward64(test[0]._buffer, test[1]._buffer, test[2]._buffer);
|
|
csc709Inverse64(test[0]._buffer, test[1]._buffer, test[2]._buffer);
|
|
|
|
compareBuffer(orig[0], test[0], 1e-3);
|
|
compareBuffer(orig[1], test[1], 1e-3);
|
|
compareBuffer(orig[2], test[2], 1e-3);
|
|
|
|
} // iter
|
|
}
|
|
|
|
//
|
|
// Test interleaving two byte arrays
|
|
//
|
|
void
|
|
testInterleave()
|
|
{
|
|
const int bufferLen = 100000;
|
|
const int randomNumIter = 10000;
|
|
const int lengthNumIter = 128;
|
|
Rand48 rand48(0);
|
|
char *srcA = new char[bufferLen];
|
|
char *srcB = new char[bufferLen];
|
|
char *dst = new char[2*bufferLen];
|
|
char *test = new char[2*bufferLen];
|
|
|
|
cout << " Byte Interleaving " << endl;
|
|
|
|
for (int i=0; i<bufferLen; ++i)
|
|
{
|
|
srcA[i] = (char)rand48.nextf(0.0, 255.0);
|
|
srcB[i] = (char)rand48.nextf(0.0, 255.0);
|
|
dst[2*i] = srcA[i];
|
|
dst[2*i+1] = srcB[i];
|
|
}
|
|
|
|
for (int iter=0; iter<randomNumIter; ++iter)
|
|
{
|
|
memset(test, 0, 2*bufferLen);
|
|
|
|
int offset = (int)rand48.nextf(0.0, bufferLen/2);
|
|
int len = (int)rand48.nextf(1.0, bufferLen - 1 - offset);
|
|
|
|
interleaveByte2( test+2*offset, srcA+offset, srcB+offset, len);
|
|
for (int i=0; i<len; ++i) {
|
|
assert( test[2*offset + 2*i] == dst[2*offset + 2*i]);
|
|
assert( test[2*offset + 2*i + 1] == dst[2*offset + 2*i + 1]);
|
|
}
|
|
}
|
|
|
|
//
|
|
// Test increasing length buffers, with varying alignment
|
|
// on all the buffers.
|
|
//
|
|
for (int len=1; len<lengthNumIter; ++len)
|
|
{
|
|
for (int offset=0; offset<16*16*16; ++offset)
|
|
{
|
|
int offsetA = offset % 16;
|
|
int offsetB = (offset / 16) % 16;
|
|
int offsetTest = (offset / 256) % 16;
|
|
|
|
memset(srcA, 255, bufferLen);
|
|
memset(srcB, 255, bufferLen);
|
|
memset(dst, 0, 2*bufferLen);
|
|
memset(test, 0, 2*bufferLen);
|
|
|
|
char *a = srcA + offsetA;
|
|
char *b = srcB + offsetB;
|
|
char *out = test + offsetTest;
|
|
|
|
for (int i=0; i<len; ++i)
|
|
{
|
|
a[i] = (char)rand48.nextf(0.0, 255.0);
|
|
b[i] = (char)rand48.nextf(0.0, 255.0);
|
|
|
|
dst[2*i] = a[i];
|
|
dst[2*i+1] = b[i];
|
|
}
|
|
|
|
interleaveByte2(out, a, b, len);
|
|
|
|
for (int i=0; i<2*len+8; ++i)
|
|
{
|
|
assert( dst[2*i] == out[2*i] );
|
|
assert( dst[2*i+1] == out[2*i+1] );
|
|
}
|
|
}
|
|
}
|
|
|
|
delete[] srcA;
|
|
delete[] srcB;
|
|
delete[] dst;
|
|
delete[] test;
|
|
}
|
|
|
|
//
|
|
// Test that we can route trip DCT data with reasonable precision
|
|
//
|
|
void
|
|
testDct()
|
|
{
|
|
const int numIter = 1000000;
|
|
Rand48 rand48(0);
|
|
SimdAlignedBuffer64f orig;
|
|
SimdAlignedBuffer64f test;
|
|
|
|
cout << " DCT Round Trip " << endl;
|
|
for (int iter=0; iter<numIter; ++iter)
|
|
{
|
|
for (int i=0; i<64; ++i)
|
|
{
|
|
orig._buffer[i] = test._buffer[i] = rand48.nextf();
|
|
}
|
|
|
|
dctForward8x8(test._buffer);
|
|
dctInverse8x8_scalar<0>(test._buffer);
|
|
|
|
compareBufferRelative(orig, test, .02, 1e-3);
|
|
}
|
|
|
|
cout << " Inverse, DC Only" << endl;
|
|
for (int iter=0; iter<numIter; ++iter)
|
|
{
|
|
orig._buffer[0] = test._buffer[0] = rand48.nextf();
|
|
for (int i=1; i<64; ++i)
|
|
{
|
|
orig._buffer[i] = test._buffer[i] = 0;
|
|
}
|
|
|
|
dctInverse8x8_scalar<0>(orig._buffer);
|
|
dctInverse8x8DcOnly(test._buffer);
|
|
|
|
compareBufferRelative(orig, test, .01, 1e-6);
|
|
}
|
|
|
|
|
|
#define INVERSE_DCT_SCALAR_TEST_N(_func, _n, _desc) \
|
|
cout << " " << _desc << endl; \
|
|
for (int iter=0; iter<numIter; ++iter) \
|
|
{ \
|
|
for (int i=0; i<64; ++i) \
|
|
{ \
|
|
if (i < 8*(8-_n)) \
|
|
{ \
|
|
orig._buffer[i] = test._buffer[i] = rand48.nextf(); \
|
|
} else { \
|
|
orig._buffer[i] = test._buffer[i] = 0; \
|
|
} \
|
|
} \
|
|
dctInverse8x8_scalar<0>(orig._buffer); \
|
|
_func<_n>(test._buffer); \
|
|
compareBufferRelative(orig, test, .01, 1e-6); \
|
|
}
|
|
|
|
cout << " Inverse, Scalar: " << endl;
|
|
INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_scalar, 0, "8x8")
|
|
INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_scalar, 1, "7x8")
|
|
INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_scalar, 2, "6x8")
|
|
INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_scalar, 3, "5x8")
|
|
INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_scalar, 4, "4x8")
|
|
INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_scalar, 5, "3x8")
|
|
INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_scalar, 6, "2x8")
|
|
INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_scalar, 7, "1x8")
|
|
|
|
CpuId cpuid;
|
|
if (cpuid.sse2)
|
|
{
|
|
cout << " Inverse, SSE2: " << endl;
|
|
INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_sse2, 0, "8x8")
|
|
INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_sse2, 1, "7x8")
|
|
INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_sse2, 2, "6x8")
|
|
INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_sse2, 3, "5x8")
|
|
INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_sse2, 4, "4x8")
|
|
INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_sse2, 5, "3x8")
|
|
INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_sse2, 6, "2x8")
|
|
INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_sse2, 7, "1x8")
|
|
}
|
|
|
|
if (cpuid.avx)
|
|
{
|
|
cout << " Inverse, AVX: " << endl;
|
|
INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_avx, 0, "8x8")
|
|
INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_avx, 1, "7x8")
|
|
INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_avx, 2, "6x8")
|
|
INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_avx, 3, "5x8")
|
|
INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_avx, 4, "4x8")
|
|
INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_avx, 5, "3x8")
|
|
INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_avx, 6, "2x8")
|
|
INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_avx, 7, "1x8")
|
|
}
|
|
}
|
|
|
|
//
|
|
// Test FLOAT -> HALF conversion, mostly for F16C enabled processors
|
|
//
|
|
void
|
|
testFloatToHalf()
|
|
{
|
|
cout << " FLOAT -> HALF conversion" << endl;
|
|
|
|
const int numIter = 1000000;
|
|
Rand48 rand48(0);
|
|
SimdAlignedBuffer64f src;
|
|
SimdAlignedBuffer64us dst;
|
|
|
|
cout << " convertFloatToHalf64_scalar()" << endl;
|
|
for (int iter=0; iter<numIter; ++iter)
|
|
{
|
|
for (int i=0; i<64; ++i)
|
|
{
|
|
if (i < 32)
|
|
{
|
|
src._buffer[i] = (float)140000*(rand48.nextf()-.5);
|
|
} else
|
|
{
|
|
src._buffer[i] = (float)(rand48.nextf()-.5);
|
|
}
|
|
dst._buffer[i] = 0;
|
|
}
|
|
|
|
convertFloatToHalf64_scalar(dst._buffer, src._buffer);
|
|
|
|
for (int i=0; i<64; ++i)
|
|
{
|
|
half value = (half)src._buffer[i];
|
|
if (value.bits() != dst._buffer[i])
|
|
{
|
|
cout << src._buffer[i] << " -> " << dst._buffer[i]
|
|
<< " expected " << value.bits() << endl;
|
|
assert(false);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
CpuId cpuid;
|
|
if (cpuid.avx && cpuid.f16c)
|
|
{
|
|
cout << " convertFloatToHalf64_f16c()" << endl;
|
|
for (int iter=0; iter<numIter; ++iter)
|
|
{
|
|
for (int i=0; i<64; ++i)
|
|
{
|
|
if (i < 32)
|
|
{
|
|
src._buffer[i] = (float)140000*(rand48.nextf()-.5);
|
|
}
|
|
else
|
|
{
|
|
src._buffer[i] = (float)(rand48.nextf()-.5);
|
|
}
|
|
dst._buffer[i] = 0;
|
|
}
|
|
|
|
convertFloatToHalf64_f16c(dst._buffer, src._buffer);
|
|
|
|
for (int i=0; i<64; ++i)
|
|
{
|
|
half value = (half)src._buffer[i];
|
|
if (value.bits() != dst._buffer[i])
|
|
{
|
|
cout << src._buffer[i] << " -> " << dst._buffer[i]
|
|
<< " expected " << value.bits() << endl;
|
|
assert(false);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// Test ZigZag reordering + HALF -> FLOAT conversion
|
|
//
|
|
void
|
|
testFromHalfZigZag()
|
|
{
|
|
SimdAlignedBuffer64us src;
|
|
SimdAlignedBuffer64f dst;
|
|
|
|
cout << " ZigZag re-ordering with HALF -> FLOAT conversion" << endl;
|
|
|
|
// First off, simple check to see that the reordering is working
|
|
// This pattern, when converted, should give 0.0 - 63.0 as floats
|
|
// in order.
|
|
unsigned short pattern[] = {
|
|
0x0000, 0x3c00, 0x4800, 0x4c00, 0x4880, 0x4000, 0x4200, 0x4900,
|
|
0x4c40, 0x4e00, 0x5000, 0x4e40, 0x4c80, 0x4980, 0x4400, 0x4500,
|
|
0x4a00, 0x4cc0, 0x4e80, 0x5020, 0x5100, 0x5200, 0x5120, 0x5040,
|
|
0x4ec0, 0x4d00, 0x4a80, 0x4600, 0x4700, 0x4b00, 0x4d40, 0x4f00,
|
|
0x5060, 0x5140, 0x5220, 0x5300, 0x5320, 0x5240, 0x5160, 0x5080,
|
|
0x4f40, 0x4d80, 0x4b80, 0x4dc0, 0x4f80, 0x50a0, 0x5180, 0x5260,
|
|
0x5340, 0x5360, 0x5280, 0x51a0, 0x50c0, 0x4fc0, 0x50e0, 0x51c0,
|
|
0x52a0, 0x5380, 0x53a0, 0x52c0, 0x51e0, 0x52e0, 0x53c0, 0x53e0
|
|
};
|
|
|
|
cout << " fromHalfZigZag_scaler()" << endl;
|
|
for (int i=0; i<64; ++i)
|
|
{
|
|
src._buffer[i] = pattern[i];
|
|
}
|
|
fromHalfZigZag_scalar(src._buffer, dst._buffer);
|
|
for (int i=0; i<64; ++i)
|
|
{
|
|
if ( fabsf(dst._buffer[i] - (float)i) > 1e-5 )
|
|
{
|
|
cout << "At index " << i << ": ";
|
|
cout << "expecting " << (float)i << "; got " << dst._buffer[i] << endl;
|
|
assert(false);
|
|
}
|
|
}
|
|
|
|
// Then compare the two implementations, if supported
|
|
CpuId cpuid;
|
|
if (cpuid.avx && cpuid.f16c)
|
|
{
|
|
const int numIter = 1000000;
|
|
Rand48 rand48(0);
|
|
half h;
|
|
SimdAlignedBuffer64f dstF16c;
|
|
|
|
cout << " fromHalfZigZag_f16c()" << endl;
|
|
|
|
for (int iter=0; iter<numIter; ++iter)
|
|
{
|
|
for (int i=0; i<64; ++i)
|
|
{
|
|
if (i < 32)
|
|
{
|
|
h = (half)(140000.*(rand48.nextf() - .5));
|
|
}
|
|
else
|
|
{
|
|
h = (half)(rand48.nextf() - .5);
|
|
}
|
|
src._buffer[i] = h.bits();
|
|
}
|
|
|
|
fromHalfZigZag_scalar(src._buffer, dst._buffer);
|
|
fromHalfZigZag_f16c(src._buffer, dstF16c._buffer);
|
|
|
|
for (int i=0; i<64; ++i)
|
|
{
|
|
if ( fabsf(dst._buffer[i] - dstF16c._buffer[i]) > 1e-5 )
|
|
{
|
|
cout << "At index " << i << ": ";
|
|
cout << "expecting " << dst._buffer[i] << "; got "
|
|
<< dstF16c._buffer[i] << endl;
|
|
assert(false);
|
|
}
|
|
}
|
|
} // iter
|
|
} // f16c
|
|
}
|
|
|
|
|
|
} // namespace
|
|
|
|
void
|
|
testDwaCompressorSimd (const string&)
|
|
{
|
|
cout << "SIMD helper functions for DwaCompressor:" << endl;
|
|
|
|
try
|
|
{
|
|
|
|
testCsc();
|
|
testInterleave();
|
|
testFloatToHalf();
|
|
testFromHalfZigZag();
|
|
|
|
testDct();
|
|
|
|
}
|
|
catch (const exception &e)
|
|
{
|
|
cout << "unexpected exception: " << e.what() << endl;
|
|
assert (false);
|
|
}
|
|
catch (...)
|
|
{
|
|
cout << "unexpected exception" << endl;
|
|
assert (false);
|
|
}
|
|
|
|
cout << "ok\n" << endl;
|
|
}
|