Disabled external gits

This commit is contained in:
2022-04-07 18:46:57 +02:00
parent 88cb3426ad
commit 15e7120d6d
5316 changed files with 4563444 additions and 6 deletions

View File

@@ -0,0 +1,149 @@
#include <Eigen/Sparse>
#include <bench/BenchTimer.h>
#include <set>
using namespace std;
using namespace Eigen;
using namespace Eigen;
#ifndef SIZE
#define SIZE 1024
#endif
#ifndef DENSITY
#define DENSITY 0.01
#endif
#ifndef SCALAR
#define SCALAR double
#endif
typedef SCALAR Scalar;
typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
typedef Matrix<Scalar,Dynamic,1> DenseVector;
typedef SparseMatrix<Scalar> EigenSparseMatrix;
void fillMatrix(float density, int rows, int cols, EigenSparseMatrix& dst)
{
dst.reserve(double(rows)*cols*density);
for(int j = 0; j < cols; j++)
{
for(int i = 0; i < rows; i++)
{
Scalar v = (internal::random<float>(0,1) < density) ? internal::random<Scalar>() : 0;
if (v!=0)
dst.insert(i,j) = v;
}
}
dst.finalize();
}
void fillMatrix2(int nnzPerCol, int rows, int cols, EigenSparseMatrix& dst)
{
// std::cout << "alloc " << nnzPerCol*cols << "\n";
dst.reserve(nnzPerCol*cols);
for(int j = 0; j < cols; j++)
{
std::set<int> aux;
for(int i = 0; i < nnzPerCol; i++)
{
int k = internal::random<int>(0,rows-1);
while (aux.find(k)!=aux.end())
k = internal::random<int>(0,rows-1);
aux.insert(k);
dst.insert(k,j) = internal::random<Scalar>();
}
}
dst.finalize();
}
void eiToDense(const EigenSparseMatrix& src, DenseMatrix& dst)
{
dst.setZero();
for (int j=0; j<src.cols(); ++j)
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
dst(it.index(),j) = it.value();
}
#ifndef NOGMM
#include "gmm/gmm.h"
typedef gmm::csc_matrix<Scalar> GmmSparse;
typedef gmm::col_matrix< gmm::wsvector<Scalar> > GmmDynSparse;
void eiToGmm(const EigenSparseMatrix& src, GmmSparse& dst)
{
GmmDynSparse tmp(src.rows(), src.cols());
for (int j=0; j<src.cols(); ++j)
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
tmp(it.index(),j) = it.value();
gmm::copy(tmp, dst);
}
#endif
#ifndef NOMTL
#include <boost/numeric/mtl/mtl.hpp>
typedef mtl::compressed2D<Scalar, mtl::matrix::parameters<mtl::tag::col_major> > MtlSparse;
typedef mtl::compressed2D<Scalar, mtl::matrix::parameters<mtl::tag::row_major> > MtlSparseRowMajor;
void eiToMtl(const EigenSparseMatrix& src, MtlSparse& dst)
{
mtl::matrix::inserter<MtlSparse> ins(dst);
for (int j=0; j<src.cols(); ++j)
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
ins[it.index()][j] = it.value();
}
#endif
#ifdef CSPARSE
extern "C" {
#include "cs.h"
}
void eiToCSparse(const EigenSparseMatrix& src, cs* &dst)
{
cs* aux = cs_spalloc (0, 0, 1, 1, 1);
for (int j=0; j<src.cols(); ++j)
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
if (!cs_entry(aux, it.index(), j, it.value()))
{
std::cout << "cs_entry error\n";
exit(2);
}
dst = cs_compress(aux);
// cs_spfree(aux);
}
#endif // CSPARSE
#ifndef NOUBLAS
#include <boost/numeric/ublas/vector.hpp>
#include <boost/numeric/ublas/matrix.hpp>
#include <boost/numeric/ublas/io.hpp>
#include <boost/numeric/ublas/triangular.hpp>
#include <boost/numeric/ublas/vector_sparse.hpp>
#include <boost/numeric/ublas/matrix_sparse.hpp>
#include <boost/numeric/ublas/vector_of_vector.hpp>
#include <boost/numeric/ublas/operation.hpp>
typedef boost::numeric::ublas::compressed_matrix<Scalar,boost::numeric::ublas::column_major> UBlasSparse;
void eiToUblas(const EigenSparseMatrix& src, UBlasSparse& dst)
{
dst.resize(src.rows(), src.cols(), false);
for (int j=0; j<src.cols(); ++j)
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
dst(it.index(),j) = it.value();
}
template <typename EigenType, typename UblasType>
void eiToUblasVec(const EigenType& src, UblasType& dst)
{
dst.resize(src.size());
for (int j=0; j<src.size(); ++j)
dst[j] = src.coeff(j);
}
#endif
#ifdef OSKI
extern "C" {
#include <oski/oski.h>
}
#endif

View File

@@ -0,0 +1,195 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef EIGEN_BENCH_TIMERR_H
#define EIGEN_BENCH_TIMERR_H
#if defined(_WIN32) || defined(__CYGWIN__)
# ifndef NOMINMAX
# define NOMINMAX
# define EIGEN_BT_UNDEF_NOMINMAX
# endif
# ifndef WIN32_LEAN_AND_MEAN
# define WIN32_LEAN_AND_MEAN
# define EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
# endif
# include <windows.h>
#elif defined(__APPLE__)
#include <mach/mach_time.h>
#else
# include <unistd.h>
#endif
static void escape(void *p) {
asm volatile("" : : "g"(p) : "memory");
}
static void clobber() {
asm volatile("" : : : "memory");
}
#include <Eigen/Core>
namespace Eigen
{
enum {
CPU_TIMER = 0,
REAL_TIMER = 1
};
/** Elapsed time timer keeping the best try.
*
* On POSIX platforms we use clock_gettime with CLOCK_PROCESS_CPUTIME_ID.
* On Windows we use QueryPerformanceCounter
*
* Important: on linux, you must link with -lrt
*/
class BenchTimer
{
public:
BenchTimer()
{
#if defined(_WIN32) || defined(__CYGWIN__)
LARGE_INTEGER freq;
QueryPerformanceFrequency(&freq);
m_frequency = (double)freq.QuadPart;
#endif
reset();
}
~BenchTimer() {}
inline void reset()
{
m_bests.fill(1e9);
m_worsts.fill(0);
m_totals.setZero();
}
inline void start()
{
m_starts[CPU_TIMER] = getCpuTime();
m_starts[REAL_TIMER] = getRealTime();
}
inline void stop()
{
m_times[CPU_TIMER] = getCpuTime() - m_starts[CPU_TIMER];
m_times[REAL_TIMER] = getRealTime() - m_starts[REAL_TIMER];
#if EIGEN_VERSION_AT_LEAST(2,90,0)
m_bests = m_bests.cwiseMin(m_times);
m_worsts = m_worsts.cwiseMax(m_times);
#else
m_bests(0) = std::min(m_bests(0),m_times(0));
m_bests(1) = std::min(m_bests(1),m_times(1));
m_worsts(0) = std::max(m_worsts(0),m_times(0));
m_worsts(1) = std::max(m_worsts(1),m_times(1));
#endif
m_totals += m_times;
}
/** Return the elapsed time in seconds between the last start/stop pair
*/
inline double value(int TIMER = CPU_TIMER) const
{
return m_times[TIMER];
}
/** Return the best elapsed time in seconds
*/
inline double best(int TIMER = CPU_TIMER) const
{
return m_bests[TIMER];
}
/** Return the worst elapsed time in seconds
*/
inline double worst(int TIMER = CPU_TIMER) const
{
return m_worsts[TIMER];
}
/** Return the total elapsed time in seconds.
*/
inline double total(int TIMER = CPU_TIMER) const
{
return m_totals[TIMER];
}
inline double getCpuTime() const
{
#ifdef _WIN32
LARGE_INTEGER query_ticks;
QueryPerformanceCounter(&query_ticks);
return query_ticks.QuadPart/m_frequency;
#elif __APPLE__
return double(mach_absolute_time())*1e-9;
#else
timespec ts;
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
return double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec);
#endif
}
inline double getRealTime() const
{
#ifdef _WIN32
SYSTEMTIME st;
GetSystemTime(&st);
return (double)st.wSecond + 1.e-3 * (double)st.wMilliseconds;
#elif __APPLE__
return double(mach_absolute_time())*1e-9;
#else
timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
return double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec);
#endif
}
protected:
#if defined(_WIN32) || defined(__CYGWIN__)
double m_frequency;
#endif
Vector2d m_starts;
Vector2d m_times;
Vector2d m_bests;
Vector2d m_worsts;
Vector2d m_totals;
public:
EIGEN_MAKE_ALIGNED_OPERATOR_NEW
};
#define BENCH(TIMER,TRIES,REP,CODE) { \
TIMER.reset(); \
for(int uglyvarname1=0; uglyvarname1<TRIES; ++uglyvarname1){ \
TIMER.start(); \
for(int uglyvarname2=0; uglyvarname2<REP; ++uglyvarname2){ \
CODE; \
} \
TIMER.stop(); \
clobber(); \
} \
}
}
// clean #defined tokens
#ifdef EIGEN_BT_UNDEF_NOMINMAX
# undef EIGEN_BT_UNDEF_NOMINMAX
# undef NOMINMAX
#endif
#ifdef EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
# undef EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
# undef WIN32_LEAN_AND_MEAN
#endif
#endif // EIGEN_BENCH_TIMERR_H

View File

@@ -0,0 +1,92 @@
#ifndef EIGEN_BENCH_UTIL_H
#define EIGEN_BENCH_UTIL_H
#include <Eigen/Core>
#include "BenchTimer.h"
using namespace std;
using namespace Eigen;
#include <boost/preprocessor/repetition/enum_params.hpp>
#include <boost/preprocessor/repetition.hpp>
#include <boost/preprocessor/seq.hpp>
#include <boost/preprocessor/array.hpp>
#include <boost/preprocessor/arithmetic.hpp>
#include <boost/preprocessor/comparison.hpp>
#include <boost/preprocessor/punctuation.hpp>
#include <boost/preprocessor/punctuation/comma.hpp>
#include <boost/preprocessor/stringize.hpp>
template<typename MatrixType> void initMatrix_random(MatrixType& mat) __attribute__((noinline));
template<typename MatrixType> void initMatrix_random(MatrixType& mat)
{
mat.setRandom();// = MatrixType::random(mat.rows(), mat.cols());
}
template<typename MatrixType> void initMatrix_identity(MatrixType& mat) __attribute__((noinline));
template<typename MatrixType> void initMatrix_identity(MatrixType& mat)
{
mat.setIdentity();
}
#ifndef __INTEL_COMPILER
#define DISABLE_SSE_EXCEPTIONS() { \
int aux; \
asm( \
"stmxcsr %[aux] \n\t" \
"orl $32832, %[aux] \n\t" \
"ldmxcsr %[aux] \n\t" \
: : [aux] "m" (aux)); \
}
#else
#define DISABLE_SSE_EXCEPTIONS()
#endif
#ifdef BENCH_GMM
#include <gmm/gmm.h>
template <typename EigenMatrixType, typename GmmMatrixType>
void eiToGmm(const EigenMatrixType& src, GmmMatrixType& dst)
{
dst.resize(src.rows(),src.cols());
for (int j=0; j<src.cols(); ++j)
for (int i=0; i<src.rows(); ++i)
dst(i,j) = src.coeff(i,j);
}
#endif
#ifdef BENCH_GSL
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_linalg.h>
#include <gsl/gsl_eigen.h>
template <typename EigenMatrixType>
void eiToGsl(const EigenMatrixType& src, gsl_matrix** dst)
{
for (int j=0; j<src.cols(); ++j)
for (int i=0; i<src.rows(); ++i)
gsl_matrix_set(*dst, i, j, src.coeff(i,j));
}
#endif
#ifdef BENCH_UBLAS
#include <boost/numeric/ublas/matrix.hpp>
#include <boost/numeric/ublas/vector.hpp>
template <typename EigenMatrixType, typename UblasMatrixType>
void eiToUblas(const EigenMatrixType& src, UblasMatrixType& dst)
{
dst.resize(src.rows(),src.cols());
for (int j=0; j<src.cols(); ++j)
for (int i=0; i<src.rows(); ++i)
dst(i,j) = src.coeff(i,j);
}
template <typename EigenType, typename UblasType>
void eiToUblasVec(const EigenType& src, UblasType& dst)
{
dst.resize(src.size());
for (int j=0; j<src.size(); ++j)
dst[j] = src.coeff(j);
}
#endif
#endif // EIGEN_BENCH_UTIL_H

View File

@@ -0,0 +1,55 @@
This folder contains a couple of benchmark utities and Eigen benchmarks.
****************************
* bench_multi_compilers.sh *
****************************
This script allows to run a benchmark on a set of different compilers/compiler options.
It takes two arguments:
- a file defining the list of the compilers with their options
- the .cpp file of the benchmark
Examples:
$ ./bench_multi_compilers.sh basicbench.cxxlist basicbenchmark.cpp
g++-4.1 -O3 -DNDEBUG -finline-limit=10000
3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 /
0.271102 0.131416 0.422322 0.198633
0.201658 0.102436 0.397566 0.207282
g++-4.2 -O3 -DNDEBUG -finline-limit=10000
3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 /
0.107805 0.0890579 0.30265 0.161843
0.127157 0.0712581 0.278341 0.191029
g++-4.3 -O3 -DNDEBUG -finline-limit=10000
3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 /
0.134318 0.105291 0.3704 0.180966
0.137703 0.0732472 0.31225 0.202204
icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size
3d-3x3 / 4d-4x4 / Xd-4x4 / Xd-20x20 /
0.226145 0.0941319 0.371873 0.159433
0.109302 0.0837538 0.328102 0.173891
$ ./bench_multi_compilers.sh ompbench.cxxlist ompbenchmark.cpp
g++-4.2 -O3 -DNDEBUG -finline-limit=10000 -fopenmp
double, fixed-size 4x4: 0.00165105s 0.0778739s
double, 32x32: 0.0654769s 0.075289s => x0.869674 (2)
double, 128x128: 0.054148s 0.0419669s => x1.29025 (2)
double, 512x512: 0.913799s 0.428533s => x2.13239 (2)
double, 1024x1024: 14.5972s 9.3542s => x1.5605 (2)
icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -openmp
double, fixed-size 4x4: 0.000589848s 0.019949s
double, 32x32: 0.0682781s 0.0449722s => x1.51823 (2)
double, 128x128: 0.0547509s 0.0435519s => x1.25714 (2)
double, 512x512: 0.829436s 0.424438s => x1.9542 (2)
double, 1024x1024: 14.5243s 10.7735s => x1.34815 (2)

View File

@@ -0,0 +1,876 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include <iostream>
#include <cstdint>
#include <cstdlib>
#include <vector>
#include <algorithm>
#include <fstream>
#include <string>
#include <cmath>
#include <cassert>
#include <cstring>
#include <memory>
#include <Eigen/Core>
using namespace std;
const int default_precision = 4;
// see --only-cubic-sizes
bool only_cubic_sizes = false;
// see --dump-tables
bool dump_tables = false;
uint8_t log2_pot(size_t x) {
size_t l = 0;
while (x >>= 1) l++;
return l;
}
uint16_t compact_size_triple(size_t k, size_t m, size_t n)
{
return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n);
}
// just a helper to store a triple of K,M,N sizes for matrix product
struct size_triple_t
{
uint16_t k, m, n;
size_triple_t() : k(0), m(0), n(0) {}
size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {}
size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {}
size_triple_t(uint16_t compact)
{
k = 1 << ((compact & 0xf00) >> 8);
m = 1 << ((compact & 0x0f0) >> 4);
n = 1 << ((compact & 0x00f) >> 0);
}
bool is_cubic() const { return k == m && m == n; }
};
ostream& operator<<(ostream& s, const size_triple_t& t)
{
return s << "(" << t.k << ", " << t.m << ", " << t.n << ")";
}
struct inputfile_entry_t
{
uint16_t product_size;
uint16_t pot_block_size;
size_triple_t nonpot_block_size;
float gflops;
};
struct inputfile_t
{
enum class type_t {
unknown,
all_pot_sizes,
default_sizes
};
string filename;
vector<inputfile_entry_t> entries;
type_t type;
inputfile_t(const string& fname)
: filename(fname)
, type(type_t::unknown)
{
ifstream stream(filename);
if (!stream.is_open()) {
cerr << "couldn't open input file: " << filename << endl;
exit(1);
}
string line;
while (getline(stream, line)) {
if (line.empty()) continue;
if (line.find("BEGIN MEASUREMENTS ALL POT SIZES") == 0) {
if (type != type_t::unknown) {
cerr << "Input file " << filename << " contains redundant BEGIN MEASUREMENTS lines";
exit(1);
}
type = type_t::all_pot_sizes;
continue;
}
if (line.find("BEGIN MEASUREMENTS DEFAULT SIZES") == 0) {
if (type != type_t::unknown) {
cerr << "Input file " << filename << " contains redundant BEGIN MEASUREMENTS lines";
exit(1);
}
type = type_t::default_sizes;
continue;
}
if (type == type_t::unknown) {
continue;
}
switch(type) {
case type_t::all_pot_sizes: {
unsigned int product_size, block_size;
float gflops;
int sscanf_result =
sscanf(line.c_str(), "%x %x %f",
&product_size,
&block_size,
&gflops);
if (3 != sscanf_result ||
!product_size ||
product_size > 0xfff ||
!block_size ||
block_size > 0xfff ||
!isfinite(gflops))
{
cerr << "ill-formed input file: " << filename << endl;
cerr << "offending line:" << endl << line << endl;
exit(1);
}
if (only_cubic_sizes && !size_triple_t(product_size).is_cubic()) {
continue;
}
inputfile_entry_t entry;
entry.product_size = uint16_t(product_size);
entry.pot_block_size = uint16_t(block_size);
entry.gflops = gflops;
entries.push_back(entry);
break;
}
case type_t::default_sizes: {
unsigned int product_size;
float gflops;
int bk, bm, bn;
int sscanf_result =
sscanf(line.c_str(), "%x default(%d, %d, %d) %f",
&product_size,
&bk, &bm, &bn,
&gflops);
if (5 != sscanf_result ||
!product_size ||
product_size > 0xfff ||
!isfinite(gflops))
{
cerr << "ill-formed input file: " << filename << endl;
cerr << "offending line:" << endl << line << endl;
exit(1);
}
if (only_cubic_sizes && !size_triple_t(product_size).is_cubic()) {
continue;
}
inputfile_entry_t entry;
entry.product_size = uint16_t(product_size);
entry.pot_block_size = 0;
entry.nonpot_block_size = size_triple_t(bk, bm, bn);
entry.gflops = gflops;
entries.push_back(entry);
break;
}
default:
break;
}
}
stream.close();
if (type == type_t::unknown) {
cerr << "Unrecognized input file " << filename << endl;
exit(1);
}
if (entries.empty()) {
cerr << "didn't find any measurements in input file: " << filename << endl;
exit(1);
}
}
};
struct preprocessed_inputfile_entry_t
{
uint16_t product_size;
uint16_t block_size;
float efficiency;
};
bool lower_efficiency(const preprocessed_inputfile_entry_t& e1, const preprocessed_inputfile_entry_t& e2)
{
return e1.efficiency < e2.efficiency;
}
struct preprocessed_inputfile_t
{
string filename;
vector<preprocessed_inputfile_entry_t> entries;
preprocessed_inputfile_t(const inputfile_t& inputfile)
: filename(inputfile.filename)
{
if (inputfile.type != inputfile_t::type_t::all_pot_sizes) {
abort();
}
auto it = inputfile.entries.begin();
auto it_first_with_given_product_size = it;
while (it != inputfile.entries.end()) {
++it;
if (it == inputfile.entries.end() ||
it->product_size != it_first_with_given_product_size->product_size)
{
import_input_file_range_one_product_size(it_first_with_given_product_size, it);
it_first_with_given_product_size = it;
}
}
}
private:
void import_input_file_range_one_product_size(
const vector<inputfile_entry_t>::const_iterator& begin,
const vector<inputfile_entry_t>::const_iterator& end)
{
uint16_t product_size = begin->product_size;
float max_gflops = 0.0f;
for (auto it = begin; it != end; ++it) {
if (it->product_size != product_size) {
cerr << "Unexpected ordering of entries in " << filename << endl;
cerr << "(Expected all entries for product size " << hex << product_size << dec << " to be grouped)" << endl;
exit(1);
}
max_gflops = max(max_gflops, it->gflops);
}
for (auto it = begin; it != end; ++it) {
preprocessed_inputfile_entry_t entry;
entry.product_size = it->product_size;
entry.block_size = it->pot_block_size;
entry.efficiency = it->gflops / max_gflops;
entries.push_back(entry);
}
}
};
void check_all_files_in_same_exact_order(
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles)
{
if (preprocessed_inputfiles.empty()) {
return;
}
const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[0];
const size_t num_entries = first_file.entries.size();
for (size_t i = 0; i < preprocessed_inputfiles.size(); i++) {
if (preprocessed_inputfiles[i].entries.size() != num_entries) {
cerr << "these files have different number of entries: "
<< preprocessed_inputfiles[i].filename
<< " and "
<< first_file.filename
<< endl;
exit(1);
}
}
for (size_t entry_index = 0; entry_index < num_entries; entry_index++) {
const uint16_t entry_product_size = first_file.entries[entry_index].product_size;
const uint16_t entry_block_size = first_file.entries[entry_index].block_size;
for (size_t file_index = 0; file_index < preprocessed_inputfiles.size(); file_index++) {
const preprocessed_inputfile_t& cur_file = preprocessed_inputfiles[file_index];
if (cur_file.entries[entry_index].product_size != entry_product_size ||
cur_file.entries[entry_index].block_size != entry_block_size)
{
cerr << "entries not in same order between these files: "
<< first_file.filename
<< " and "
<< cur_file.filename
<< endl;
exit(1);
}
}
}
}
float efficiency_of_subset(
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
const vector<size_t>& subset)
{
if (subset.size() <= 1) {
return 1.0f;
}
const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[subset[0]];
const size_t num_entries = first_file.entries.size();
float efficiency = 1.0f;
size_t entry_index = 0;
size_t first_entry_index_with_this_product_size = 0;
uint16_t product_size = first_file.entries[0].product_size;
while (entry_index < num_entries) {
++entry_index;
if (entry_index == num_entries ||
first_file.entries[entry_index].product_size != product_size)
{
float efficiency_this_product_size = 0.0f;
for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) {
float efficiency_this_entry = 1.0f;
for (auto i = subset.begin(); i != subset.end(); ++i) {
efficiency_this_entry = min(efficiency_this_entry, preprocessed_inputfiles[*i].entries[e].efficiency);
}
efficiency_this_product_size = max(efficiency_this_product_size, efficiency_this_entry);
}
efficiency = min(efficiency, efficiency_this_product_size);
if (entry_index < num_entries) {
first_entry_index_with_this_product_size = entry_index;
product_size = first_file.entries[entry_index].product_size;
}
}
}
return efficiency;
}
void dump_table_for_subset(
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
const vector<size_t>& subset)
{
const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[subset[0]];
const size_t num_entries = first_file.entries.size();
size_t entry_index = 0;
size_t first_entry_index_with_this_product_size = 0;
uint16_t product_size = first_file.entries[0].product_size;
size_t i = 0;
size_triple_t min_product_size(first_file.entries.front().product_size);
size_triple_t max_product_size(first_file.entries.back().product_size);
if (!min_product_size.is_cubic() || !max_product_size.is_cubic()) {
abort();
}
if (only_cubic_sizes) {
cerr << "Can't generate tables with --only-cubic-sizes." << endl;
abort();
}
cout << "struct LookupTable {" << endl;
cout << " static const size_t BaseSize = " << min_product_size.k << ";" << endl;
const size_t NumSizes = log2_pot(max_product_size.k / min_product_size.k) + 1;
const size_t TableSize = NumSizes * NumSizes * NumSizes;
cout << " static const size_t NumSizes = " << NumSizes << ";" << endl;
cout << " static const unsigned short* Data() {" << endl;
cout << " static const unsigned short data[" << TableSize << "] = {";
while (entry_index < num_entries) {
++entry_index;
if (entry_index == num_entries ||
first_file.entries[entry_index].product_size != product_size)
{
float best_efficiency_this_product_size = 0.0f;
uint16_t best_block_size_this_product_size = 0;
for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) {
float efficiency_this_entry = 1.0f;
for (auto i = subset.begin(); i != subset.end(); ++i) {
efficiency_this_entry = min(efficiency_this_entry, preprocessed_inputfiles[*i].entries[e].efficiency);
}
if (efficiency_this_entry > best_efficiency_this_product_size) {
best_efficiency_this_product_size = efficiency_this_entry;
best_block_size_this_product_size = first_file.entries[e].block_size;
}
}
if ((i++) % NumSizes) {
cout << " ";
} else {
cout << endl << " ";
}
cout << "0x" << hex << best_block_size_this_product_size << dec;
if (entry_index < num_entries) {
cout << ",";
first_entry_index_with_this_product_size = entry_index;
product_size = first_file.entries[entry_index].product_size;
}
}
}
if (i != TableSize) {
cerr << endl << "Wrote " << i << " table entries, expected " << TableSize << endl;
abort();
}
cout << endl << " };" << endl;
cout << " return data;" << endl;
cout << " }" << endl;
cout << "};" << endl;
}
float efficiency_of_partition(
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
const vector<vector<size_t>>& partition)
{
float efficiency = 1.0f;
for (auto s = partition.begin(); s != partition.end(); ++s) {
efficiency = min(efficiency, efficiency_of_subset(preprocessed_inputfiles, *s));
}
return efficiency;
}
void make_first_subset(size_t subset_size, vector<size_t>& out_subset, size_t set_size)
{
assert(subset_size >= 1 && subset_size <= set_size);
out_subset.resize(subset_size);
for (size_t i = 0; i < subset_size; i++) {
out_subset[i] = i;
}
}
bool is_last_subset(const vector<size_t>& subset, size_t set_size)
{
return subset[0] == set_size - subset.size();
}
void next_subset(vector<size_t>& inout_subset, size_t set_size)
{
if (is_last_subset(inout_subset, set_size)) {
cerr << "iterating past the last subset" << endl;
abort();
}
size_t i = 1;
while (inout_subset[inout_subset.size() - i] == set_size - i) {
i++;
assert(i <= inout_subset.size());
}
size_t first_index_to_change = inout_subset.size() - i;
inout_subset[first_index_to_change]++;
size_t p = inout_subset[first_index_to_change];
for (size_t j = first_index_to_change + 1; j < inout_subset.size(); j++) {
inout_subset[j] = ++p;
}
}
const size_t number_of_subsets_limit = 100;
const size_t always_search_subsets_of_size_at_least = 2;
bool is_number_of_subsets_feasible(size_t n, size_t p)
{
assert(n>0 && p>0 && p<=n);
uint64_t numerator = 1, denominator = 1;
for (size_t i = 0; i < p; i++) {
numerator *= n - i;
denominator *= i + 1;
if (numerator > denominator * number_of_subsets_limit) {
return false;
}
}
return true;
}
size_t max_feasible_subset_size(size_t n)
{
assert(n > 0);
const size_t minresult = min<size_t>(n-1, always_search_subsets_of_size_at_least);
for (size_t p = 1; p <= n - 1; p++) {
if (!is_number_of_subsets_feasible(n, p+1)) {
return max(p, minresult);
}
}
return n - 1;
}
void find_subset_with_efficiency_higher_than(
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
float required_efficiency_to_beat,
vector<size_t>& inout_remainder,
vector<size_t>& out_subset)
{
out_subset.resize(0);
if (required_efficiency_to_beat >= 1.0f) {
cerr << "can't beat efficiency 1." << endl;
abort();
}
while (!inout_remainder.empty()) {
vector<size_t> candidate_indices(inout_remainder.size());
for (size_t i = 0; i < candidate_indices.size(); i++) {
candidate_indices[i] = i;
}
size_t candidate_indices_subset_size = max_feasible_subset_size(candidate_indices.size());
while (candidate_indices_subset_size >= 1) {
vector<size_t> candidate_indices_subset;
make_first_subset(candidate_indices_subset_size,
candidate_indices_subset,
candidate_indices.size());
vector<size_t> best_candidate_indices_subset;
float best_efficiency = 0.0f;
vector<size_t> trial_subset = out_subset;
trial_subset.resize(out_subset.size() + candidate_indices_subset_size);
while (true)
{
for (size_t i = 0; i < candidate_indices_subset_size; i++) {
trial_subset[out_subset.size() + i] = inout_remainder[candidate_indices_subset[i]];
}
float trial_efficiency = efficiency_of_subset(preprocessed_inputfiles, trial_subset);
if (trial_efficiency > best_efficiency) {
best_efficiency = trial_efficiency;
best_candidate_indices_subset = candidate_indices_subset;
}
if (is_last_subset(candidate_indices_subset, candidate_indices.size())) {
break;
}
next_subset(candidate_indices_subset, candidate_indices.size());
}
if (best_efficiency > required_efficiency_to_beat) {
for (size_t i = 0; i < best_candidate_indices_subset.size(); i++) {
candidate_indices[i] = candidate_indices[best_candidate_indices_subset[i]];
}
candidate_indices.resize(best_candidate_indices_subset.size());
}
candidate_indices_subset_size--;
}
size_t candidate_index = candidate_indices[0];
auto candidate_iterator = inout_remainder.begin() + candidate_index;
vector<size_t> trial_subset = out_subset;
trial_subset.push_back(*candidate_iterator);
float trial_efficiency = efficiency_of_subset(preprocessed_inputfiles, trial_subset);
if (trial_efficiency > required_efficiency_to_beat) {
out_subset.push_back(*candidate_iterator);
inout_remainder.erase(candidate_iterator);
} else {
break;
}
}
}
void find_partition_with_efficiency_higher_than(
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
float required_efficiency_to_beat,
vector<vector<size_t>>& out_partition)
{
out_partition.resize(0);
vector<size_t> remainder;
for (size_t i = 0; i < preprocessed_inputfiles.size(); i++) {
remainder.push_back(i);
}
while (!remainder.empty()) {
vector<size_t> new_subset;
find_subset_with_efficiency_higher_than(
preprocessed_inputfiles,
required_efficiency_to_beat,
remainder,
new_subset);
out_partition.push_back(new_subset);
}
}
void print_partition(
const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
const vector<vector<size_t>>& partition)
{
float efficiency = efficiency_of_partition(preprocessed_inputfiles, partition);
cout << "Partition into " << partition.size() << " subsets for " << efficiency * 100.0f << "% efficiency" << endl;
for (auto subset = partition.begin(); subset != partition.end(); ++subset) {
cout << " Subset " << (subset - partition.begin())
<< ", efficiency " << efficiency_of_subset(preprocessed_inputfiles, *subset) * 100.0f << "%:"
<< endl;
for (auto file = subset->begin(); file != subset->end(); ++file) {
cout << " " << preprocessed_inputfiles[*file].filename << endl;
}
if (dump_tables) {
cout << " Table:" << endl;
dump_table_for_subset(preprocessed_inputfiles, *subset);
}
}
cout << endl;
}
struct action_t
{
virtual const char* invokation_name() const { abort(); return nullptr; }
virtual void run(const vector<string>&) const { abort(); }
virtual ~action_t() {}
};
struct partition_action_t : action_t
{
virtual const char* invokation_name() const override { return "partition"; }
virtual void run(const vector<string>& input_filenames) const override
{
vector<preprocessed_inputfile_t> preprocessed_inputfiles;
if (input_filenames.empty()) {
cerr << "The " << invokation_name() << " action needs a list of input files." << endl;
exit(1);
}
for (auto it = input_filenames.begin(); it != input_filenames.end(); ++it) {
inputfile_t inputfile(*it);
switch (inputfile.type) {
case inputfile_t::type_t::all_pot_sizes:
preprocessed_inputfiles.emplace_back(inputfile);
break;
case inputfile_t::type_t::default_sizes:
cerr << "The " << invokation_name() << " action only uses measurements for all pot sizes, and "
<< "has no use for " << *it << " which contains measurements for default sizes." << endl;
exit(1);
break;
default:
cerr << "Unrecognized input file: " << *it << endl;
exit(1);
}
}
check_all_files_in_same_exact_order(preprocessed_inputfiles);
float required_efficiency_to_beat = 0.0f;
vector<vector<vector<size_t>>> partitions;
cerr << "searching for partitions...\r" << flush;
while (true)
{
vector<vector<size_t>> partition;
find_partition_with_efficiency_higher_than(
preprocessed_inputfiles,
required_efficiency_to_beat,
partition);
float actual_efficiency = efficiency_of_partition(preprocessed_inputfiles, partition);
cerr << "partition " << preprocessed_inputfiles.size() << " files into " << partition.size()
<< " subsets for " << 100.0f * actual_efficiency
<< " % efficiency"
<< " \r" << flush;
partitions.push_back(partition);
if (partition.size() == preprocessed_inputfiles.size() || actual_efficiency == 1.0f) {
break;
}
required_efficiency_to_beat = actual_efficiency;
}
cerr << " " << endl;
while (true) {
bool repeat = false;
for (size_t i = 0; i < partitions.size() - 1; i++) {
if (partitions[i].size() >= partitions[i+1].size()) {
partitions.erase(partitions.begin() + i);
repeat = true;
break;
}
}
if (!repeat) {
break;
}
}
for (auto it = partitions.begin(); it != partitions.end(); ++it) {
print_partition(preprocessed_inputfiles, *it);
}
}
};
struct evaluate_defaults_action_t : action_t
{
struct results_entry_t {
uint16_t product_size;
size_triple_t default_block_size;
uint16_t best_pot_block_size;
float default_gflops;
float best_pot_gflops;
float default_efficiency;
};
friend ostream& operator<<(ostream& s, const results_entry_t& entry)
{
return s
<< "Product size " << size_triple_t(entry.product_size)
<< ": default block size " << entry.default_block_size
<< " -> " << entry.default_gflops
<< " GFlop/s = " << entry.default_efficiency * 100.0f << " %"
<< " of best POT block size " << size_triple_t(entry.best_pot_block_size)
<< " -> " << entry.best_pot_gflops
<< " GFlop/s" << dec;
}
static bool lower_efficiency(const results_entry_t& e1, const results_entry_t& e2) {
return e1.default_efficiency < e2.default_efficiency;
}
virtual const char* invokation_name() const override { return "evaluate-defaults"; }
void show_usage_and_exit() const
{
cerr << "usage: " << invokation_name() << " default-sizes-data all-pot-sizes-data" << endl;
cerr << "checks how well the performance with default sizes compares to the best "
<< "performance measured over all POT sizes." << endl;
exit(1);
}
virtual void run(const vector<string>& input_filenames) const override
{
if (input_filenames.size() != 2) {
show_usage_and_exit();
}
inputfile_t inputfile_default_sizes(input_filenames[0]);
inputfile_t inputfile_all_pot_sizes(input_filenames[1]);
if (inputfile_default_sizes.type != inputfile_t::type_t::default_sizes) {
cerr << inputfile_default_sizes.filename << " is not an input file with default sizes." << endl;
show_usage_and_exit();
}
if (inputfile_all_pot_sizes.type != inputfile_t::type_t::all_pot_sizes) {
cerr << inputfile_all_pot_sizes.filename << " is not an input file with all POT sizes." << endl;
show_usage_and_exit();
}
vector<results_entry_t> results;
vector<results_entry_t> cubic_results;
uint16_t product_size = 0;
auto it_all_pot_sizes = inputfile_all_pot_sizes.entries.begin();
for (auto it_default_sizes = inputfile_default_sizes.entries.begin();
it_default_sizes != inputfile_default_sizes.entries.end();
++it_default_sizes)
{
if (it_default_sizes->product_size == product_size) {
continue;
}
product_size = it_default_sizes->product_size;
while (it_all_pot_sizes != inputfile_all_pot_sizes.entries.end() &&
it_all_pot_sizes->product_size != product_size)
{
++it_all_pot_sizes;
}
if (it_all_pot_sizes == inputfile_all_pot_sizes.entries.end()) {
break;
}
uint16_t best_pot_block_size = 0;
float best_pot_gflops = 0;
for (auto it = it_all_pot_sizes;
it != inputfile_all_pot_sizes.entries.end() && it->product_size == product_size;
++it)
{
if (it->gflops > best_pot_gflops) {
best_pot_gflops = it->gflops;
best_pot_block_size = it->pot_block_size;
}
}
results_entry_t entry;
entry.product_size = product_size;
entry.default_block_size = it_default_sizes->nonpot_block_size;
entry.best_pot_block_size = best_pot_block_size;
entry.default_gflops = it_default_sizes->gflops;
entry.best_pot_gflops = best_pot_gflops;
entry.default_efficiency = entry.default_gflops / entry.best_pot_gflops;
results.push_back(entry);
size_triple_t t(product_size);
if (t.k == t.m && t.m == t.n) {
cubic_results.push_back(entry);
}
}
cout << "All results:" << endl;
for (auto it = results.begin(); it != results.end(); ++it) {
cout << *it << endl;
}
cout << endl;
sort(results.begin(), results.end(), lower_efficiency);
const size_t n = min<size_t>(20, results.size());
cout << n << " worst results:" << endl;
for (size_t i = 0; i < n; i++) {
cout << results[i] << endl;
}
cout << endl;
cout << "cubic results:" << endl;
for (auto it = cubic_results.begin(); it != cubic_results.end(); ++it) {
cout << *it << endl;
}
cout << endl;
sort(cubic_results.begin(), cubic_results.end(), lower_efficiency);
cout.precision(2);
vector<float> a = {0.5f, 0.20f, 0.10f, 0.05f, 0.02f, 0.01f};
for (auto it = a.begin(); it != a.end(); ++it) {
size_t n = min(results.size() - 1, size_t(*it * results.size()));
cout << (100.0f * n / (results.size() - 1))
<< " % of product sizes have default efficiency <= "
<< 100.0f * results[n].default_efficiency << " %" << endl;
}
cout.precision(default_precision);
}
};
void show_usage_and_exit(int argc, char* argv[],
const vector<unique_ptr<action_t>>& available_actions)
{
cerr << "usage: " << argv[0] << " <action> [options...] <input files...>" << endl;
cerr << "available actions:" << endl;
for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
cerr << " " << (*it)->invokation_name() << endl;
}
cerr << "the input files should each contain an output of benchmark-blocking-sizes" << endl;
exit(1);
}
int main(int argc, char* argv[])
{
cout.precision(default_precision);
cerr.precision(default_precision);
vector<unique_ptr<action_t>> available_actions;
available_actions.emplace_back(new partition_action_t);
available_actions.emplace_back(new evaluate_defaults_action_t);
vector<string> input_filenames;
action_t* action = nullptr;
if (argc < 2) {
show_usage_and_exit(argc, argv, available_actions);
}
for (int i = 1; i < argc; i++) {
bool arg_handled = false;
// Step 1. Try to match action invokation names.
for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
if (!strcmp(argv[i], (*it)->invokation_name())) {
if (!action) {
action = it->get();
arg_handled = true;
break;
} else {
cerr << "can't specify more than one action!" << endl;
show_usage_and_exit(argc, argv, available_actions);
}
}
}
if (arg_handled) {
continue;
}
// Step 2. Try to match option names.
if (argv[i][0] == '-') {
if (!strcmp(argv[i], "--only-cubic-sizes")) {
only_cubic_sizes = true;
arg_handled = true;
}
if (!strcmp(argv[i], "--dump-tables")) {
dump_tables = true;
arg_handled = true;
}
if (!arg_handled) {
cerr << "Unrecognized option: " << argv[i] << endl;
show_usage_and_exit(argc, argv, available_actions);
}
}
if (arg_handled) {
continue;
}
// Step 3. Default to interpreting args as input filenames.
input_filenames.emplace_back(argv[i]);
}
if (dump_tables && only_cubic_sizes) {
cerr << "Incompatible options: --only-cubic-sizes and --dump-tables." << endl;
show_usage_and_exit(argc, argv, available_actions);
}
if (!action) {
show_usage_and_exit(argc, argv, available_actions);
}
action->run(input_filenames);
}

View File

@@ -0,0 +1,28 @@
#!/bin/bash
# CLIST[((g++))]="g++-3.4 -O3 -DNDEBUG"
# CLIST[((g++))]="g++-3.4 -O3 -DNDEBUG -finline-limit=20000"
# CLIST[((g++))]="g++-4.1 -O3 -DNDEBUG"
#CLIST[((g++))]="g++-4.1 -O3 -DNDEBUG -finline-limit=20000"
# CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG"
#CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=20000"
# CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=20000 -fprofile-generate"
# CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=20000 -fprofile-use"
# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG"
#CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=20000"
# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=20000 -fprofile-generate"
# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=20000 -fprofile-use"
# CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -prof-genx"
# CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -prof-use"
#CLIST[((g++))]="/opt/intel/Compiler/11.1/072/bin/intel64/icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -lrt"
CLIST[((g++))]="/home/orzel/svn/llvm/Release/bin/clang++ -O3 -DNDEBUG -DEIGEN_DONT_VECTORIZE -lrt"
CLIST[((g++))]="/home/orzel/svn/llvm/Release/bin/clang++ -O3 -DNDEBUG -lrt"
CLIST[((g++))]="g++-4.4.4 -O3 -DNDEBUG -DEIGEN_DONT_VECTORIZE -lrt"
CLIST[((g++))]="g++-4.4.4 -O3 -DNDEBUG -lrt"
CLIST[((g++))]="g++-4.5.0 -O3 -DNDEBUG -DEIGEN_DONT_VECTORIZE -lrt"
CLIST[((g++))]="g++-4.5.0 -O3 -DNDEBUG -lrt"

View File

@@ -0,0 +1,35 @@
#include <iostream>
#include "BenchUtil.h"
#include "basicbenchmark.h"
int main(int argc, char *argv[])
{
DISABLE_SSE_EXCEPTIONS();
// this is the list of matrix type and size we want to bench:
// ((suffix) (matrix size) (number of iterations))
#define MODES ((3d)(3)(4000000)) ((4d)(4)(1000000)) ((Xd)(4)(1000000)) ((Xd)(20)(10000))
// #define MODES ((Xd)(20)(10000))
#define _GENERATE_HEADER(R,ARG,EL) << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_HEAD(EL)) << "-" \
<< BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << "x" \
<< BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << " / "
std::cout BOOST_PP_SEQ_FOR_EACH(_GENERATE_HEADER, ~, MODES ) << endl;
const int tries = 10;
#define _RUN_BENCH(R,ARG,EL) \
std::cout << ARG( \
BOOST_PP_CAT(Matrix, BOOST_PP_SEQ_HEAD(EL)) (\
BOOST_PP_SEQ_ELEM(1,EL),BOOST_PP_SEQ_ELEM(1,EL)), BOOST_PP_SEQ_ELEM(2,EL), tries) \
<< " ";
BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<LazyEval>, MODES );
std::cout << endl;
BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<EarlyEval>, MODES );
std::cout << endl;
return 0;
}

View File

@@ -0,0 +1,63 @@
#ifndef EIGEN_BENCH_BASICBENCH_H
#define EIGEN_BENCH_BASICBENCH_H
enum {LazyEval, EarlyEval, OmpEval};
template<int Mode, typename MatrixType>
void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) __attribute__((noinline));
template<int Mode, typename MatrixType>
void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations)
{
for(int a = 0; a < iterations; a++)
{
if (Mode==LazyEval)
{
asm("#begin_bench_loop LazyEval");
if (MatrixType::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize");
m = (I + 0.00005 * (m + m.lazy() * m)).eval();
}
else if (Mode==OmpEval)
{
asm("#begin_bench_loop OmpEval");
if (MatrixType::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize");
m = (I + 0.00005 * (m + m.lazy() * m)).evalOMP();
}
else
{
asm("#begin_bench_loop EarlyEval");
if (MatrixType::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize");
m = I + 0.00005 * (m + m * m);
}
asm("#end_bench_loop");
}
}
template<int Mode, typename MatrixType>
double benchBasic(const MatrixType& mat, int size, int tries) __attribute__((noinline));
template<int Mode, typename MatrixType>
double benchBasic(const MatrixType& mat, int iterations, int tries)
{
const int rows = mat.rows();
const int cols = mat.cols();
MatrixType I(rows,cols);
MatrixType m(rows,cols);
initMatrix_identity(I);
Eigen::BenchTimer timer;
for(uint t=0; t<tries; ++t)
{
initMatrix_random(m);
timer.start();
benchBasic_loop<Mode>(I, m, iterations);
timer.stop();
cerr << m;
}
return timer.value();
};
#endif // EIGEN_BENCH_BASICBENCH_H

View File

@@ -0,0 +1,219 @@
// g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm -lrt -lcblas
// possible options:
// -DEIGEN_DONT_VECTORIZE
// -msse2
// #define EIGEN_DEFAULT_TO_ROW_MAJOR
#define _FLOAT
#include <iostream>
#include <Eigen/Core>
#include "BenchTimer.h"
// include the BLAS headers
extern "C" {
#include <cblas.h>
}
#include <string>
#ifdef _FLOAT
typedef float Scalar;
#define CBLAS_GEMM cblas_sgemm
#else
typedef double Scalar;
#define CBLAS_GEMM cblas_dgemm
#endif
typedef Eigen::Matrix<Scalar,Eigen::Dynamic,Eigen::Dynamic> MyMatrix;
void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops);
void check_product(int M, int N, int K);
void check_product(void);
int main(int argc, char *argv[])
{
// disable SSE exceptions
#ifdef __GNUC__
{
int aux;
asm(
"stmxcsr %[aux] \n\t"
"orl $32832, %[aux] \n\t"
"ldmxcsr %[aux] \n\t"
: : [aux] "m" (aux));
}
#endif
int nbtries=1, nbloops=1, M, N, K;
if (argc==2)
{
if (std::string(argv[1])=="check")
check_product();
else
M = N = K = atoi(argv[1]);
}
else if ((argc==3) && (std::string(argv[1])=="auto"))
{
M = N = K = atoi(argv[2]);
nbloops = 1000000000/(M*M*M);
if (nbloops<1)
nbloops = 1;
nbtries = 6;
}
else if (argc==4)
{
M = N = K = atoi(argv[1]);
nbloops = atoi(argv[2]);
nbtries = atoi(argv[3]);
}
else if (argc==6)
{
M = atoi(argv[1]);
N = atoi(argv[2]);
K = atoi(argv[3]);
nbloops = atoi(argv[4]);
nbtries = atoi(argv[5]);
}
else
{
std::cout << "Usage: " << argv[0] << " size \n";
std::cout << "Usage: " << argv[0] << " auto size\n";
std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n";
std::cout << "Usage: " << argv[0] << " check\n";
std::cout << "Options:\n";
std::cout << " size unique size of the 2 matrices (integer)\n";
std::cout << " auto automatically set the number of repetitions and tries\n";
std::cout << " nbloops number of times the GEMM routines is executed\n";
std::cout << " nbtries number of times the loop is benched (return the best try)\n";
std::cout << " M N K sizes of the matrices: MxN = MxK * KxN (integers)\n";
std::cout << " check check eigen product using cblas as a reference\n";
exit(1);
}
double nbmad = double(M) * double(N) * double(K) * double(nbloops);
if (!(std::string(argv[1])=="auto"))
std::cout << M << " x " << N << " x " << K << "\n";
Scalar alpha, beta;
MyMatrix ma(M,K), mb(K,N), mc(M,N);
ma = MyMatrix::Random(M,K);
mb = MyMatrix::Random(K,N);
mc = MyMatrix::Random(M,N);
Eigen::BenchTimer timer;
// we simply compute c += a*b, so:
alpha = 1;
beta = 1;
// bench cblas
// ROWS_A, COLS_B, COLS_A, 1.0, A, COLS_A, B, COLS_B, 0.0, C, COLS_B);
if (!(std::string(argv[1])=="auto"))
{
timer.reset();
for (uint k=0 ; k<nbtries ; ++k)
{
timer.start();
for (uint j=0 ; j<nbloops ; ++j)
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, mc.data(), N);
#else
CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, mc.data(), M);
#endif
timer.stop();
}
if (!(std::string(argv[1])=="auto"))
std::cout << "cblas: " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
else
std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
}
// clear
ma = MyMatrix::Random(M,K);
mb = MyMatrix::Random(K,N);
mc = MyMatrix::Random(M,N);
// eigen
// if (!(std::string(argv[1])=="auto"))
{
timer.reset();
for (uint k=0 ; k<nbtries ; ++k)
{
timer.start();
bench_eigengemm(mc, ma, mb, nbloops);
timer.stop();
}
if (!(std::string(argv[1])=="auto"))
std::cout << "eigen : " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
else
std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
}
std::cout << "l1: " << Eigen::l1CacheSize() << std::endl;
std::cout << "l2: " << Eigen::l2CacheSize() << std::endl;
return 0;
}
using namespace Eigen;
void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops)
{
for (uint j=0 ; j<nbloops ; ++j)
mc.noalias() += ma * mb;
}
#define MYVERIFY(A,M) if (!(A)) { \
std::cout << "FAIL: " << M << "\n"; \
}
void check_product(int M, int N, int K)
{
MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N);
ma = MyMatrix::Random(M,K);
mb = MyMatrix::Random(K,N);
maT = ma.transpose();
mbT = mb.transpose();
mc = MyMatrix::Random(M,N);
MyMatrix::Scalar eps = 1e-4;
meigen = mref = mc;
CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M);
meigen += ma * mb;
MYVERIFY(meigen.isApprox(mref, eps),". * .");
meigen = mref = mc;
CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M);
meigen += maT.transpose() * mb;
MYVERIFY(meigen.isApprox(mref, eps),"T * .");
meigen = mref = mc;
CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M);
meigen += (maT.transpose()) * (mbT.transpose());
MYVERIFY(meigen.isApprox(mref, eps),"T * T");
meigen = mref = mc;
CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M);
meigen += ma * mbT.transpose();
MYVERIFY(meigen.isApprox(mref, eps),". * T");
}
void check_product(void)
{
int M, N, K;
for (uint i=0; i<1000; ++i)
{
M = internal::random<int>(1,64);
N = internal::random<int>(1,768);
K = internal::random<int>(1,768);
M = (0 + M) * 1;
std::cout << M << " x " << N << " x " << K << "\n";
check_product(M, N, K);
}
}

View File

@@ -0,0 +1,142 @@
// g++ -DNDEBUG -O3 -I.. benchLLT.cpp -o benchLLT && ./benchLLT
// options:
// -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3
// -DEIGEN_DONT_VECTORIZE
// -msse2
// -DREPEAT=100
// -DTRIES=10
// -DSCALAR=double
#include <iostream>
#include <Eigen/Core>
#include <Eigen/Cholesky>
#include <bench/BenchUtil.h>
using namespace Eigen;
#ifndef REPEAT
#define REPEAT 10000
#endif
#ifndef TRIES
#define TRIES 10
#endif
typedef float Scalar;
template <typename MatrixType>
__attribute__ ((noinline)) void benchLLT(const MatrixType& m)
{
int rows = m.rows();
int cols = m.cols();
double cost = 0;
for (int j=0; j<rows; ++j)
{
int r = std::max(rows - j -1,0);
cost += 2*(r*j+r+j);
}
int repeats = (REPEAT*1000)/(rows*rows);
typedef typename MatrixType::Scalar Scalar;
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
MatrixType a = MatrixType::Random(rows,cols);
SquareMatrixType covMat = a * a.adjoint();
BenchTimer timerNoSqrt, timerSqrt;
Scalar acc = 0;
int r = internal::random<int>(0,covMat.rows()-1);
int c = internal::random<int>(0,covMat.cols()-1);
for (int t=0; t<TRIES; ++t)
{
timerNoSqrt.start();
for (int k=0; k<repeats; ++k)
{
LDLT<SquareMatrixType> cholnosqrt(covMat);
acc += cholnosqrt.matrixL().coeff(r,c);
}
timerNoSqrt.stop();
}
for (int t=0; t<TRIES; ++t)
{
timerSqrt.start();
for (int k=0; k<repeats; ++k)
{
LLT<SquareMatrixType> chol(covMat);
acc += chol.matrixL().coeff(r,c);
}
timerSqrt.stop();
}
if (MatrixType::RowsAtCompileTime==Dynamic)
std::cout << "dyn ";
else
std::cout << "fixed ";
std::cout << covMat.rows() << " \t"
<< (timerNoSqrt.best()) / repeats << "s "
<< "(" << 1e-9 * cost*repeats/timerNoSqrt.best() << " GFLOPS)\t"
<< (timerSqrt.best()) / repeats << "s "
<< "(" << 1e-9 * cost*repeats/timerSqrt.best() << " GFLOPS)\n";
#ifdef BENCH_GSL
if (MatrixType::RowsAtCompileTime==Dynamic)
{
timerSqrt.reset();
gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(),covMat.cols());
gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(),covMat.cols());
eiToGsl(covMat, &gslCovMat);
for (int t=0; t<TRIES; ++t)
{
timerSqrt.start();
for (int k=0; k<repeats; ++k)
{
gsl_matrix_memcpy(gslCopy,gslCovMat);
gsl_linalg_cholesky_decomp(gslCopy);
acc += gsl_matrix_get(gslCopy,r,c);
}
timerSqrt.stop();
}
std::cout << " | \t"
<< timerSqrt.value() * REPEAT / repeats << "s";
gsl_matrix_free(gslCovMat);
}
#endif
std::cout << "\n";
// make sure the compiler does not optimize too much
if (acc==123)
std::cout << acc;
}
int main(int argc, char* argv[])
{
const int dynsizes[] = {4,6,8,16,24,32,49,64,128,256,512,900,1500,0};
std::cout << "size LDLT LLT";
// #ifdef BENCH_GSL
// std::cout << " GSL (standard + double + ATLAS) ";
// #endif
std::cout << "\n";
for (int i=0; dynsizes[i]>0; ++i)
benchLLT(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
benchLLT(Matrix<Scalar,2,2>());
benchLLT(Matrix<Scalar,3,3>());
benchLLT(Matrix<Scalar,4,4>());
benchLLT(Matrix<Scalar,5,5>());
benchLLT(Matrix<Scalar,6,6>());
benchLLT(Matrix<Scalar,7,7>());
benchLLT(Matrix<Scalar,8,8>());
benchLLT(Matrix<Scalar,12,12>());
benchLLT(Matrix<Scalar,16,16>());
return 0;
}

View File

@@ -0,0 +1,212 @@
// g++ -DNDEBUG -O3 -I.. benchEigenSolver.cpp -o benchEigenSolver && ./benchEigenSolver
// options:
// -DBENCH_GMM
// -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3
// -DEIGEN_DONT_VECTORIZE
// -msse2
// -DREPEAT=100
// -DTRIES=10
// -DSCALAR=double
#include <iostream>
#include <Eigen/Core>
#include <Eigen/QR>
#include <bench/BenchUtil.h>
using namespace Eigen;
#ifndef REPEAT
#define REPEAT 1000
#endif
#ifndef TRIES
#define TRIES 4
#endif
#ifndef SCALAR
#define SCALAR float
#endif
typedef SCALAR Scalar;
template <typename MatrixType>
__attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m)
{
int rows = m.rows();
int cols = m.cols();
int stdRepeats = std::max(1,int((REPEAT*1000)/(rows*rows*sqrt(rows))));
int saRepeats = stdRepeats * 4;
typedef typename MatrixType::Scalar Scalar;
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
MatrixType a = MatrixType::Random(rows,cols);
SquareMatrixType covMat = a * a.adjoint();
BenchTimer timerSa, timerStd;
Scalar acc = 0;
int r = internal::random<int>(0,covMat.rows()-1);
int c = internal::random<int>(0,covMat.cols()-1);
{
SelfAdjointEigenSolver<SquareMatrixType> ei(covMat);
for (int t=0; t<TRIES; ++t)
{
timerSa.start();
for (int k=0; k<saRepeats; ++k)
{
ei.compute(covMat);
acc += ei.eigenvectors().coeff(r,c);
}
timerSa.stop();
}
}
{
EigenSolver<SquareMatrixType> ei(covMat);
for (int t=0; t<TRIES; ++t)
{
timerStd.start();
for (int k=0; k<stdRepeats; ++k)
{
ei.compute(covMat);
acc += ei.eigenvectors().coeff(r,c);
}
timerStd.stop();
}
}
if (MatrixType::RowsAtCompileTime==Dynamic)
std::cout << "dyn ";
else
std::cout << "fixed ";
std::cout << covMat.rows() << " \t"
<< timerSa.value() * REPEAT / saRepeats << "s \t"
<< timerStd.value() * REPEAT / stdRepeats << "s";
#ifdef BENCH_GMM
if (MatrixType::RowsAtCompileTime==Dynamic)
{
timerSa.reset();
timerStd.reset();
gmm::dense_matrix<Scalar> gmmCovMat(covMat.rows(),covMat.cols());
gmm::dense_matrix<Scalar> eigvect(covMat.rows(),covMat.cols());
std::vector<Scalar> eigval(covMat.rows());
eiToGmm(covMat, gmmCovMat);
for (int t=0; t<TRIES; ++t)
{
timerSa.start();
for (int k=0; k<saRepeats; ++k)
{
gmm::symmetric_qr_algorithm(gmmCovMat, eigval, eigvect);
acc += eigvect(r,c);
}
timerSa.stop();
}
// the non-selfadjoint solver does not compute the eigen vectors
// for (int t=0; t<TRIES; ++t)
// {
// timerStd.start();
// for (int k=0; k<stdRepeats; ++k)
// {
// gmm::implicit_qr_algorithm(gmmCovMat, eigval, eigvect);
// acc += eigvect(r,c);
// }
// timerStd.stop();
// }
std::cout << " | \t"
<< timerSa.value() * REPEAT / saRepeats << "s"
<< /*timerStd.value() * REPEAT / stdRepeats << "s"*/ " na ";
}
#endif
#ifdef BENCH_GSL
if (MatrixType::RowsAtCompileTime==Dynamic)
{
timerSa.reset();
timerStd.reset();
gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(),covMat.cols());
gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(),covMat.cols());
gsl_matrix* eigvect = gsl_matrix_alloc(covMat.rows(),covMat.cols());
gsl_vector* eigval = gsl_vector_alloc(covMat.rows());
gsl_eigen_symmv_workspace* eisymm = gsl_eigen_symmv_alloc(covMat.rows());
gsl_matrix_complex* eigvectz = gsl_matrix_complex_alloc(covMat.rows(),covMat.cols());
gsl_vector_complex* eigvalz = gsl_vector_complex_alloc(covMat.rows());
gsl_eigen_nonsymmv_workspace* einonsymm = gsl_eigen_nonsymmv_alloc(covMat.rows());
eiToGsl(covMat, &gslCovMat);
for (int t=0; t<TRIES; ++t)
{
timerSa.start();
for (int k=0; k<saRepeats; ++k)
{
gsl_matrix_memcpy(gslCopy,gslCovMat);
gsl_eigen_symmv(gslCopy, eigval, eigvect, eisymm);
acc += gsl_matrix_get(eigvect,r,c);
}
timerSa.stop();
}
for (int t=0; t<TRIES; ++t)
{
timerStd.start();
for (int k=0; k<stdRepeats; ++k)
{
gsl_matrix_memcpy(gslCopy,gslCovMat);
gsl_eigen_nonsymmv(gslCopy, eigvalz, eigvectz, einonsymm);
acc += GSL_REAL(gsl_matrix_complex_get(eigvectz,r,c));
}
timerStd.stop();
}
std::cout << " | \t"
<< timerSa.value() * REPEAT / saRepeats << "s \t"
<< timerStd.value() * REPEAT / stdRepeats << "s";
gsl_matrix_free(gslCovMat);
gsl_vector_free(gslCopy);
gsl_matrix_free(eigvect);
gsl_vector_free(eigval);
gsl_matrix_complex_free(eigvectz);
gsl_vector_complex_free(eigvalz);
gsl_eigen_symmv_free(eisymm);
gsl_eigen_nonsymmv_free(einonsymm);
}
#endif
std::cout << "\n";
// make sure the compiler does not optimize too much
if (acc==123)
std::cout << acc;
}
int main(int argc, char* argv[])
{
const int dynsizes[] = {4,6,8,12,16,24,32,64,128,256,512,0};
std::cout << "size selfadjoint generic";
#ifdef BENCH_GMM
std::cout << " GMM++ ";
#endif
#ifdef BENCH_GSL
std::cout << " GSL (double + ATLAS) ";
#endif
std::cout << "\n";
for (uint i=0; dynsizes[i]>0; ++i)
benchEigenSolver(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
benchEigenSolver(Matrix<Scalar,2,2>());
benchEigenSolver(Matrix<Scalar,3,3>());
benchEigenSolver(Matrix<Scalar,4,4>());
benchEigenSolver(Matrix<Scalar,6,6>());
benchEigenSolver(Matrix<Scalar,8,8>());
benchEigenSolver(Matrix<Scalar,12,12>());
benchEigenSolver(Matrix<Scalar,16,16>());
return 0;
}

View File

@@ -0,0 +1,115 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009 Mark Borgerding mark a borgerding net
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include <iostream>
#include <bench/BenchUtil.h>
#include <complex>
#include <vector>
#include <Eigen/Core>
#include <unsupported/Eigen/FFT>
using namespace Eigen;
using namespace std;
template <typename T>
string nameof();
template <> string nameof<float>() {return "float";}
template <> string nameof<double>() {return "double";}
template <> string nameof<long double>() {return "long double";}
#ifndef TYPE
#define TYPE float
#endif
#ifndef NFFT
#define NFFT 1024
#endif
#ifndef NDATA
#define NDATA 1000000
#endif
using namespace Eigen;
template <typename T>
void bench(int nfft,bool fwd,bool unscaled=false, bool halfspec=false)
{
typedef typename NumTraits<T>::Real Scalar;
typedef typename std::complex<Scalar> Complex;
int nits = NDATA/nfft;
vector<T> inbuf(nfft);
vector<Complex > outbuf(nfft);
FFT< Scalar > fft;
if (unscaled) {
fft.SetFlag(fft.Unscaled);
cout << "unscaled ";
}
if (halfspec) {
fft.SetFlag(fft.HalfSpectrum);
cout << "halfspec ";
}
std::fill(inbuf.begin(),inbuf.end(),0);
fft.fwd( outbuf , inbuf);
BenchTimer timer;
timer.reset();
for (int k=0;k<8;++k) {
timer.start();
if (fwd)
for(int i = 0; i < nits; i++)
fft.fwd( outbuf , inbuf);
else
for(int i = 0; i < nits; i++)
fft.inv(inbuf,outbuf);
timer.stop();
}
cout << nameof<Scalar>() << " ";
double mflops = 5.*nfft*log2((double)nfft) / (1e6 * timer.value() / (double)nits );
if ( NumTraits<T>::IsComplex ) {
cout << "complex";
}else{
cout << "real ";
mflops /= 2;
}
if (fwd)
cout << " fwd";
else
cout << " inv";
cout << " NFFT=" << nfft << " " << (double(1e-6*nfft*nits)/timer.value()) << " MS/s " << mflops << "MFLOPS\n";
}
int main(int argc,char ** argv)
{
bench<complex<float> >(NFFT,true);
bench<complex<float> >(NFFT,false);
bench<float>(NFFT,true);
bench<float>(NFFT,false);
bench<float>(NFFT,false,true);
bench<float>(NFFT,false,true,true);
bench<complex<double> >(NFFT,true);
bench<complex<double> >(NFFT,false);
bench<double>(NFFT,true);
bench<double>(NFFT,false);
bench<complex<long double> >(NFFT,true);
bench<complex<long double> >(NFFT,false);
bench<long double>(NFFT,true);
bench<long double>(NFFT,false);
return 0;
}

View File

@@ -0,0 +1,134 @@
#include <iostream>
#include <iomanip>
#include <Eigen/Core>
#include <Eigen/Geometry>
#include <bench/BenchTimer.h>
using namespace Eigen;
using namespace std;
#ifndef REPEAT
#define REPEAT 1000000
#endif
enum func_opt
{
TV,
TMATV,
TMATVMAT,
};
template <class res, class arg1, class arg2, int opt>
struct func;
template <class res, class arg1, class arg2>
struct func<res, arg1, arg2, TV>
{
static EIGEN_DONT_INLINE res run( arg1& a1, arg2& a2 )
{
asm ("");
return a1 * a2;
}
};
template <class res, class arg1, class arg2>
struct func<res, arg1, arg2, TMATV>
{
static EIGEN_DONT_INLINE res run( arg1& a1, arg2& a2 )
{
asm ("");
return a1.matrix() * a2;
}
};
template <class res, class arg1, class arg2>
struct func<res, arg1, arg2, TMATVMAT>
{
static EIGEN_DONT_INLINE res run( arg1& a1, arg2& a2 )
{
asm ("");
return res(a1.matrix() * a2.matrix());
}
};
template <class func, class arg1, class arg2>
struct test_transform
{
static void run()
{
arg1 a1;
a1.setIdentity();
arg2 a2;
a2.setIdentity();
BenchTimer timer;
timer.reset();
for (int k=0; k<10; ++k)
{
timer.start();
for (int k=0; k<REPEAT; ++k)
a2 = func::run( a1, a2 );
timer.stop();
}
cout << setprecision(4) << fixed << timer.value() << "s " << endl;;
}
};
#define run_vec( op, scalar, mode, option, vsize ) \
std::cout << #scalar << "\t " << #mode << "\t " << #option << " " << #vsize " "; \
{\
typedef Transform<scalar, 3, mode, option> Trans;\
typedef Matrix<scalar, vsize, 1, option> Vec;\
typedef func<Vec,Trans,Vec,op> Func;\
test_transform< Func, Trans, Vec >::run();\
}
#define run_trans( op, scalar, mode, option ) \
std::cout << #scalar << "\t " << #mode << "\t " << #option << " "; \
{\
typedef Transform<scalar, 3, mode, option> Trans;\
typedef func<Trans,Trans,Trans,op> Func;\
test_transform< Func, Trans, Trans >::run();\
}
int main(int argc, char* argv[])
{
cout << "vec = trans * vec" << endl;
run_vec(TV, float, Isometry, AutoAlign, 3);
run_vec(TV, float, Isometry, DontAlign, 3);
run_vec(TV, float, Isometry, AutoAlign, 4);
run_vec(TV, float, Isometry, DontAlign, 4);
run_vec(TV, float, Projective, AutoAlign, 4);
run_vec(TV, float, Projective, DontAlign, 4);
run_vec(TV, double, Isometry, AutoAlign, 3);
run_vec(TV, double, Isometry, DontAlign, 3);
run_vec(TV, double, Isometry, AutoAlign, 4);
run_vec(TV, double, Isometry, DontAlign, 4);
run_vec(TV, double, Projective, AutoAlign, 4);
run_vec(TV, double, Projective, DontAlign, 4);
cout << "vec = trans.matrix() * vec" << endl;
run_vec(TMATV, float, Isometry, AutoAlign, 4);
run_vec(TMATV, float, Isometry, DontAlign, 4);
run_vec(TMATV, double, Isometry, AutoAlign, 4);
run_vec(TMATV, double, Isometry, DontAlign, 4);
cout << "trans = trans1 * trans" << endl;
run_trans(TV, float, Isometry, AutoAlign);
run_trans(TV, float, Isometry, DontAlign);
run_trans(TV, double, Isometry, AutoAlign);
run_trans(TV, double, Isometry, DontAlign);
run_trans(TV, float, Projective, AutoAlign);
run_trans(TV, float, Projective, DontAlign);
run_trans(TV, double, Projective, AutoAlign);
run_trans(TV, double, Projective, DontAlign);
cout << "trans = trans1.matrix() * trans.matrix()" << endl;
run_trans(TMATVMAT, float, Isometry, AutoAlign);
run_trans(TMATVMAT, float, Isometry, DontAlign);
run_trans(TMATVMAT, double, Isometry, AutoAlign);
run_trans(TMATVMAT, double, Isometry, DontAlign);
}

View File

@@ -0,0 +1,135 @@
#include <iostream>
#include <Eigen/Core>
#include <bench/BenchTimer.h>
using namespace Eigen;
#ifndef SIZE
#define SIZE 50
#endif
#ifndef REPEAT
#define REPEAT 10000
#endif
typedef float Scalar;
__attribute__ ((noinline)) void benchVec(Scalar* a, Scalar* b, Scalar* c, int size);
__attribute__ ((noinline)) void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c);
__attribute__ ((noinline)) void benchVec(VectorXf& a, VectorXf& b, VectorXf& c);
int main(int argc, char* argv[])
{
int size = SIZE * 8;
int size2 = size * size;
Scalar* a = internal::aligned_new<Scalar>(size2);
Scalar* b = internal::aligned_new<Scalar>(size2+4)+1;
Scalar* c = internal::aligned_new<Scalar>(size2);
for (int i=0; i<size; ++i)
{
a[i] = b[i] = c[i] = 0;
}
BenchTimer timer;
timer.reset();
for (int k=0; k<10; ++k)
{
timer.start();
benchVec(a, b, c, size2);
timer.stop();
}
std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
return 0;
for (int innersize = size; innersize>2 ; --innersize)
{
if (size2%innersize==0)
{
int outersize = size2/innersize;
MatrixXf ma = Map<MatrixXf>(a, innersize, outersize );
MatrixXf mb = Map<MatrixXf>(b, innersize, outersize );
MatrixXf mc = Map<MatrixXf>(c, innersize, outersize );
timer.reset();
for (int k=0; k<3; ++k)
{
timer.start();
benchVec(ma, mb, mc);
timer.stop();
}
std::cout << innersize << " x " << outersize << " " << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
}
}
VectorXf va = Map<VectorXf>(a, size2);
VectorXf vb = Map<VectorXf>(b, size2);
VectorXf vc = Map<VectorXf>(c, size2);
timer.reset();
for (int k=0; k<3; ++k)
{
timer.start();
benchVec(va, vb, vc);
timer.stop();
}
std::cout << timer.value() << "s " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
return 0;
}
void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c)
{
for (int k=0; k<REPEAT; ++k)
a = a + b;
}
void benchVec(VectorXf& a, VectorXf& b, VectorXf& c)
{
for (int k=0; k<REPEAT; ++k)
a = a + b;
}
void benchVec(Scalar* a, Scalar* b, Scalar* c, int size)
{
typedef internal::packet_traits<Scalar>::type PacketScalar;
const int PacketSize = internal::packet_traits<Scalar>::size;
PacketScalar a0, a1, a2, a3, b0, b1, b2, b3;
for (int k=0; k<REPEAT; ++k)
for (int i=0; i<size; i+=PacketSize*8)
{
// a0 = internal::pload(&a[i]);
// b0 = internal::pload(&b[i]);
// a1 = internal::pload(&a[i+1*PacketSize]);
// b1 = internal::pload(&b[i+1*PacketSize]);
// a2 = internal::pload(&a[i+2*PacketSize]);
// b2 = internal::pload(&b[i+2*PacketSize]);
// a3 = internal::pload(&a[i+3*PacketSize]);
// b3 = internal::pload(&b[i+3*PacketSize]);
// internal::pstore(&a[i], internal::padd(a0, b0));
// a0 = internal::pload(&a[i+4*PacketSize]);
// b0 = internal::pload(&b[i+4*PacketSize]);
//
// internal::pstore(&a[i+1*PacketSize], internal::padd(a1, b1));
// a1 = internal::pload(&a[i+5*PacketSize]);
// b1 = internal::pload(&b[i+5*PacketSize]);
//
// internal::pstore(&a[i+2*PacketSize], internal::padd(a2, b2));
// a2 = internal::pload(&a[i+6*PacketSize]);
// b2 = internal::pload(&b[i+6*PacketSize]);
//
// internal::pstore(&a[i+3*PacketSize], internal::padd(a3, b3));
// a3 = internal::pload(&a[i+7*PacketSize]);
// b3 = internal::pload(&b[i+7*PacketSize]);
//
// internal::pstore(&a[i+4*PacketSize], internal::padd(a0, b0));
// internal::pstore(&a[i+5*PacketSize], internal::padd(a1, b1));
// internal::pstore(&a[i+6*PacketSize], internal::padd(a2, b2));
// internal::pstore(&a[i+7*PacketSize], internal::padd(a3, b3));
internal::pstore(&a[i+2*PacketSize], internal::padd(internal::ploadu(&a[i+2*PacketSize]), internal::ploadu(&b[i+2*PacketSize])));
internal::pstore(&a[i+3*PacketSize], internal::padd(internal::ploadu(&a[i+3*PacketSize]), internal::ploadu(&b[i+3*PacketSize])));
internal::pstore(&a[i+4*PacketSize], internal::padd(internal::ploadu(&a[i+4*PacketSize]), internal::ploadu(&b[i+4*PacketSize])));
internal::pstore(&a[i+5*PacketSize], internal::padd(internal::ploadu(&a[i+5*PacketSize]), internal::ploadu(&b[i+5*PacketSize])));
internal::pstore(&a[i+6*PacketSize], internal::padd(internal::ploadu(&a[i+6*PacketSize]), internal::ploadu(&b[i+6*PacketSize])));
internal::pstore(&a[i+7*PacketSize], internal::padd(internal::ploadu(&a[i+7*PacketSize]), internal::ploadu(&b[i+7*PacketSize])));
}
}

View File

@@ -0,0 +1,341 @@
// g++-4.4 bench_gemm.cpp -I .. -O2 -DNDEBUG -lrt -fopenmp && OMP_NUM_THREADS=2 ./a.out
// icpc bench_gemm.cpp -I .. -O3 -DNDEBUG -lrt -openmp && OMP_NUM_THREADS=2 ./a.out
// Compilation options:
//
// -DSCALAR=std::complex<double>
// -DSCALARA=double or -DSCALARB=double
// -DHAVE_BLAS
// -DDECOUPLED
//
#include <iostream>
#include <Eigen/Core>
#include <bench/BenchTimer.h>
using namespace std;
using namespace Eigen;
#ifndef SCALAR
// #define SCALAR std::complex<float>
#define SCALAR float
#endif
#ifndef SCALARA
#define SCALARA SCALAR
#endif
#ifndef SCALARB
#define SCALARB SCALAR
#endif
typedef SCALAR Scalar;
typedef NumTraits<Scalar>::Real RealScalar;
typedef Matrix<SCALARA,Dynamic,Dynamic> A;
typedef Matrix<SCALARB,Dynamic,Dynamic> B;
typedef Matrix<Scalar,Dynamic,Dynamic> C;
typedef Matrix<RealScalar,Dynamic,Dynamic> M;
#ifdef HAVE_BLAS
extern "C" {
#include <Eigen/src/misc/blas.h>
}
static float fone = 1;
static float fzero = 0;
static double done = 1;
static double szero = 0;
static std::complex<float> cfone = 1;
static std::complex<float> cfzero = 0;
static std::complex<double> cdone = 1;
static std::complex<double> cdzero = 0;
static char notrans = 'N';
static char trans = 'T';
static char nonunit = 'N';
static char lower = 'L';
static char right = 'R';
static int intone = 1;
void blas_gemm(const MatrixXf& a, const MatrixXf& b, MatrixXf& c)
{
int M = c.rows(); int N = c.cols(); int K = a.cols();
int lda = a.rows(); int ldb = b.rows(); int ldc = c.rows();
sgemm_(&notrans,&notrans,&M,&N,&K,&fone,
const_cast<float*>(a.data()),&lda,
const_cast<float*>(b.data()),&ldb,&fone,
c.data(),&ldc);
}
EIGEN_DONT_INLINE void blas_gemm(const MatrixXd& a, const MatrixXd& b, MatrixXd& c)
{
int M = c.rows(); int N = c.cols(); int K = a.cols();
int lda = a.rows(); int ldb = b.rows(); int ldc = c.rows();
dgemm_(&notrans,&notrans,&M,&N,&K,&done,
const_cast<double*>(a.data()),&lda,
const_cast<double*>(b.data()),&ldb,&done,
c.data(),&ldc);
}
void blas_gemm(const MatrixXcf& a, const MatrixXcf& b, MatrixXcf& c)
{
int M = c.rows(); int N = c.cols(); int K = a.cols();
int lda = a.rows(); int ldb = b.rows(); int ldc = c.rows();
cgemm_(&notrans,&notrans,&M,&N,&K,(float*)&cfone,
const_cast<float*>((const float*)a.data()),&lda,
const_cast<float*>((const float*)b.data()),&ldb,(float*)&cfone,
(float*)c.data(),&ldc);
}
void blas_gemm(const MatrixXcd& a, const MatrixXcd& b, MatrixXcd& c)
{
int M = c.rows(); int N = c.cols(); int K = a.cols();
int lda = a.rows(); int ldb = b.rows(); int ldc = c.rows();
zgemm_(&notrans,&notrans,&M,&N,&K,(double*)&cdone,
const_cast<double*>((const double*)a.data()),&lda,
const_cast<double*>((const double*)b.data()),&ldb,(double*)&cdone,
(double*)c.data(),&ldc);
}
#endif
void matlab_cplx_cplx(const M& ar, const M& ai, const M& br, const M& bi, M& cr, M& ci)
{
cr.noalias() += ar * br;
cr.noalias() -= ai * bi;
ci.noalias() += ar * bi;
ci.noalias() += ai * br;
// [cr ci] += [ar ai] * br + [-ai ar] * bi
}
void matlab_real_cplx(const M& a, const M& br, const M& bi, M& cr, M& ci)
{
cr.noalias() += a * br;
ci.noalias() += a * bi;
}
void matlab_cplx_real(const M& ar, const M& ai, const M& b, M& cr, M& ci)
{
cr.noalias() += ar * b;
ci.noalias() += ai * b;
}
template<typename A, typename B, typename C>
EIGEN_DONT_INLINE void gemm(const A& a, const B& b, C& c)
{
c.noalias() += a * b;
}
int main(int argc, char ** argv)
{
std::ptrdiff_t l1 = internal::queryL1CacheSize();
std::ptrdiff_t l2 = internal::queryTopLevelCacheSize();
std::cout << "L1 cache size = " << (l1>0 ? l1/1024 : -1) << " KB\n";
std::cout << "L2/L3 cache size = " << (l2>0 ? l2/1024 : -1) << " KB\n";
typedef internal::gebp_traits<Scalar,Scalar> Traits;
std::cout << "Register blocking = " << Traits::mr << " x " << Traits::nr << "\n";
int rep = 1; // number of repetitions per try
int tries = 2; // number of tries, we keep the best
int s = 2048;
int m = s;
int n = s;
int p = s;
int cache_size1=-1, cache_size2=l2, cache_size3 = 0;
bool need_help = false;
for (int i=1; i<argc;)
{
if(argv[i][0]=='-')
{
if(argv[i][1]=='s')
{
++i;
s = atoi(argv[i++]);
m = n = p = s;
if(argv[i][0]!='-')
{
n = atoi(argv[i++]);
p = atoi(argv[i++]);
}
}
else if(argv[i][1]=='c')
{
++i;
cache_size1 = atoi(argv[i++]);
if(argv[i][0]!='-')
{
cache_size2 = atoi(argv[i++]);
if(argv[i][0]!='-')
cache_size3 = atoi(argv[i++]);
}
}
else if(argv[i][1]=='t')
{
++i;
tries = atoi(argv[i++]);
}
else if(argv[i][1]=='p')
{
++i;
rep = atoi(argv[i++]);
}
}
else
{
need_help = true;
break;
}
}
if(need_help)
{
std::cout << argv[0] << " -s <matrix sizes> -c <cache sizes> -t <nb tries> -p <nb repeats>\n";
std::cout << " <matrix sizes> : size\n";
std::cout << " <matrix sizes> : rows columns depth\n";
return 1;
}
#if EIGEN_VERSION_AT_LEAST(3,2,90)
if(cache_size1>0)
setCpuCacheSizes(cache_size1,cache_size2,cache_size3);
#endif
A a(m,p); a.setRandom();
B b(p,n); b.setRandom();
C c(m,n); c.setOnes();
C rc = c;
std::cout << "Matrix sizes = " << m << "x" << p << " * " << p << "x" << n << "\n";
std::ptrdiff_t mc(m), nc(n), kc(p);
internal::computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
std::cout << "blocking size (mc x kc) = " << mc << " x " << kc << "\n";
C r = c;
// check the parallel product is correct
#if defined EIGEN_HAS_OPENMP
Eigen::initParallel();
int procs = omp_get_max_threads();
if(procs>1)
{
#ifdef HAVE_BLAS
blas_gemm(a,b,r);
#else
omp_set_num_threads(1);
r.noalias() += a * b;
omp_set_num_threads(procs);
#endif
c.noalias() += a * b;
if(!r.isApprox(c)) std::cerr << "Warning, your parallel product is crap!\n\n";
}
#elif defined HAVE_BLAS
blas_gemm(a,b,r);
c.noalias() += a * b;
if(!r.isApprox(c)) {
std::cout << (r - c).norm() << "\n";
std::cerr << "Warning, your product is crap!\n\n";
}
#else
if(1.*m*n*p<2000.*2000*2000)
{
gemm(a,b,c);
r.noalias() += a.cast<Scalar>() .lazyProduct( b.cast<Scalar>() );
if(!r.isApprox(c)) {
std::cout << (r - c).norm() << "\n";
std::cerr << "Warning, your product is crap!\n\n";
}
}
#endif
#ifdef HAVE_BLAS
BenchTimer tblas;
c = rc;
BENCH(tblas, tries, rep, blas_gemm(a,b,c));
std::cout << "blas cpu " << tblas.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tblas.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tblas.total(CPU_TIMER) << "s)\n";
std::cout << "blas real " << tblas.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tblas.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tblas.total(REAL_TIMER) << "s)\n";
#endif
BenchTimer tmt;
c = rc;
BENCH(tmt, tries, rep, gemm(a,b,c));
std::cout << "eigen cpu " << tmt.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER) << "s)\n";
std::cout << "eigen real " << tmt.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER) << "s)\n";
#ifdef EIGEN_HAS_OPENMP
if(procs>1)
{
BenchTimer tmono;
omp_set_num_threads(1);
Eigen::setNbThreads(1);
c = rc;
BENCH(tmono, tries, rep, gemm(a,b,c));
std::cout << "eigen mono cpu " << tmono.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmono.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmono.total(CPU_TIMER) << "s)\n";
std::cout << "eigen mono real " << tmono.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmono.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tmono.total(REAL_TIMER) << "s)\n";
std::cout << "mt speed up x" << tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER) << " => " << (100.0*tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER))/procs << "%\n";
}
#endif
if(1.*m*n*p<30*30*30)
{
BenchTimer tmt;
c = rc;
BENCH(tmt, tries, rep, c.noalias()+=a.lazyProduct(b));
std::cout << "lazy cpu " << tmt.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(CPU_TIMER) << "s)\n";
std::cout << "lazy real " << tmt.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/tmt.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << tmt.total(REAL_TIMER) << "s)\n";
}
#ifdef DECOUPLED
if((NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex))
{
M ar(m,p); ar.setRandom();
M ai(m,p); ai.setRandom();
M br(p,n); br.setRandom();
M bi(p,n); bi.setRandom();
M cr(m,n); cr.setRandom();
M ci(m,n); ci.setRandom();
BenchTimer t;
BENCH(t, tries, rep, matlab_cplx_cplx(ar,ai,br,bi,cr,ci));
std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) << "s)\n";
std::cout << "\"matlab\" real " << t.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n";
}
if((!NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex))
{
M a(m,p); a.setRandom();
M br(p,n); br.setRandom();
M bi(p,n); bi.setRandom();
M cr(m,n); cr.setRandom();
M ci(m,n); ci.setRandom();
BenchTimer t;
BENCH(t, tries, rep, matlab_real_cplx(a,br,bi,cr,ci));
std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) << "s)\n";
std::cout << "\"matlab\" real " << t.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n";
}
if((NumTraits<A::Scalar>::IsComplex) && (!NumTraits<B::Scalar>::IsComplex))
{
M ar(m,p); ar.setRandom();
M ai(m,p); ai.setRandom();
M b(p,n); b.setRandom();
M cr(m,n); cr.setRandom();
M ci(m,n); ci.setRandom();
BenchTimer t;
BENCH(t, tries, rep, matlab_cplx_real(ar,ai,b,cr,ci));
std::cout << "\"matlab\" cpu " << t.best(CPU_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9 << " GFLOPS \t(" << t.total(CPU_TIMER) << "s)\n";
std::cout << "\"matlab\" real " << t.best(REAL_TIMER)/rep << "s \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 << " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n";
}
#endif
return 0;
}

View File

@@ -0,0 +1,28 @@
#!/bin/bash
if (($# < 2)); then
echo "Usage: $0 compilerlist.txt benchfile.cpp"
else
compilerlist=$1
benchfile=$2
g=0
source $compilerlist
# for each compiler, compile benchfile and run the benchmark
for (( i=0 ; i<g ; ++i )) ; do
# check the compiler exists
compiler=`echo ${CLIST[$i]} | cut -d " " -f 1`
if [ -e `which $compiler` ]; then
echo "${CLIST[$i]}"
# echo "${CLIST[$i]} $benchfile -I.. -o bench~"
# if [ -e ./.bench ] ; then rm .bench; fi
${CLIST[$i]} $benchfile -I.. -o .bench && ./.bench 2> /dev/null
echo ""
else
echo "compiler not found: $compiler"
fi
done
fi

View File

@@ -0,0 +1,360 @@
#include <typeinfo>
#include <iostream>
#include <Eigen/Core>
#include "BenchTimer.h"
using namespace Eigen;
using namespace std;
template<typename T>
EIGEN_DONT_INLINE typename T::Scalar sqsumNorm(T& v)
{
return v.norm();
}
template<typename T>
EIGEN_DONT_INLINE typename T::Scalar stableNorm(T& v)
{
return v.stableNorm();
}
template<typename T>
EIGEN_DONT_INLINE typename T::Scalar hypotNorm(T& v)
{
return v.hypotNorm();
}
template<typename T>
EIGEN_DONT_INLINE typename T::Scalar blueNorm(T& v)
{
return v.blueNorm();
}
template<typename T>
EIGEN_DONT_INLINE typename T::Scalar lapackNorm(T& v)
{
typedef typename T::Scalar Scalar;
int n = v.size();
Scalar scale = 0;
Scalar ssq = 1;
for (int i=0;i<n;++i)
{
Scalar ax = std::abs(v.coeff(i));
if (scale >= ax)
{
ssq += numext::abs2(ax/scale);
}
else
{
ssq = Scalar(1) + ssq * numext::abs2(scale/ax);
scale = ax;
}
}
return scale * std::sqrt(ssq);
}
template<typename T>
EIGEN_DONT_INLINE typename T::Scalar twopassNorm(T& v)
{
typedef typename T::Scalar Scalar;
Scalar s = v.array().abs().maxCoeff();
return s*(v/s).norm();
}
template<typename T>
EIGEN_DONT_INLINE typename T::Scalar bl2passNorm(T& v)
{
return v.stableNorm();
}
template<typename T>
EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v)
{
int n =v.size() / 2;
for (int i=0;i<n;++i)
v(i) = v(2*i)*v(2*i) + v(2*i+1)*v(2*i+1);
n = n/2;
while (n>0)
{
for (int i=0;i<n;++i)
v(i) = v(2*i) + v(2*i+1);
n = n/2;
}
return std::sqrt(v(0));
}
namespace Eigen {
namespace internal {
#ifdef EIGEN_VECTORIZE
Packet4f plt(const Packet4f& a, Packet4f& b) { return _mm_cmplt_ps(a,b); }
Packet2d plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a,b); }
Packet4f pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a,b); }
Packet2d pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a,b); }
#endif
}
}
template<typename T>
EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
{
#ifndef EIGEN_VECTORIZE
return v.blueNorm();
#else
typedef typename T::Scalar Scalar;
static int nmax = 0;
static Scalar b1, b2, s1m, s2m, overfl, rbig, relerr;
int n;
if(nmax <= 0)
{
int nbig, ibeta, it, iemin, iemax, iexp;
Scalar abig, eps;
nbig = std::numeric_limits<int>::max(); // largest integer
ibeta = std::numeric_limits<Scalar>::radix; //NumTraits<Scalar>::Base; // base for floating-point numbers
it = std::numeric_limits<Scalar>::digits; //NumTraits<Scalar>::Mantissa; // number of base-beta digits in mantissa
iemin = std::numeric_limits<Scalar>::min_exponent; // minimum exponent
iemax = std::numeric_limits<Scalar>::max_exponent; // maximum exponent
rbig = std::numeric_limits<Scalar>::max(); // largest floating-point number
// Check the basic machine-dependent constants.
if(iemin > 1 - 2*it || 1+it>iemax || (it==2 && ibeta<5)
|| (it<=4 && ibeta <= 3 ) || it<2)
{
eigen_assert(false && "the algorithm cannot be guaranteed on this computer");
}
iexp = -((1-iemin)/2);
b1 = std::pow(ibeta, iexp); // lower boundary of midrange
iexp = (iemax + 1 - it)/2;
b2 = std::pow(ibeta,iexp); // upper boundary of midrange
iexp = (2-iemin)/2;
s1m = std::pow(ibeta,iexp); // scaling factor for lower range
iexp = - ((iemax+it)/2);
s2m = std::pow(ibeta,iexp); // scaling factor for upper range
overfl = rbig*s2m; // overfow boundary for abig
eps = std::pow(ibeta, 1-it);
relerr = std::sqrt(eps); // tolerance for neglecting asml
abig = 1.0/eps - 1.0;
if (Scalar(nbig)>abig) nmax = abig; // largest safe n
else nmax = nbig;
}
typedef typename internal::packet_traits<Scalar>::type Packet;
const int ps = internal::packet_traits<Scalar>::size;
Packet pasml = internal::pset1<Packet>(Scalar(0));
Packet pamed = internal::pset1<Packet>(Scalar(0));
Packet pabig = internal::pset1<Packet>(Scalar(0));
Packet ps2m = internal::pset1<Packet>(s2m);
Packet ps1m = internal::pset1<Packet>(s1m);
Packet pb2 = internal::pset1<Packet>(b2);
Packet pb1 = internal::pset1<Packet>(b1);
for(int j=0; j<v.size(); j+=ps)
{
Packet ax = internal::pabs(v.template packet<Aligned>(j));
Packet ax_s2m = internal::pmul(ax,ps2m);
Packet ax_s1m = internal::pmul(ax,ps1m);
Packet maskBig = internal::plt(pb2,ax);
Packet maskSml = internal::plt(ax,pb1);
// Packet maskMed = internal::pand(maskSml,maskBig);
// Packet scale = internal::pset1(Scalar(0));
// scale = internal::por(scale, internal::pand(maskBig,ps2m));
// scale = internal::por(scale, internal::pand(maskSml,ps1m));
// scale = internal::por(scale, internal::pandnot(internal::pset1(Scalar(1)),maskMed));
// ax = internal::pmul(ax,scale);
// ax = internal::pmul(ax,ax);
// pabig = internal::padd(pabig, internal::pand(maskBig, ax));
// pasml = internal::padd(pasml, internal::pand(maskSml, ax));
// pamed = internal::padd(pamed, internal::pandnot(ax,maskMed));
pabig = internal::padd(pabig, internal::pand(maskBig, internal::pmul(ax_s2m,ax_s2m)));
pasml = internal::padd(pasml, internal::pand(maskSml, internal::pmul(ax_s1m,ax_s1m)));
pamed = internal::padd(pamed, internal::pandnot(internal::pmul(ax,ax),internal::pand(maskSml,maskBig)));
}
Scalar abig = internal::predux(pabig);
Scalar asml = internal::predux(pasml);
Scalar amed = internal::predux(pamed);
if(abig > Scalar(0))
{
abig = std::sqrt(abig);
if(abig > overfl)
{
eigen_assert(false && "overflow");
return rbig;
}
if(amed > Scalar(0))
{
abig = abig/s2m;
amed = std::sqrt(amed);
}
else
{
return abig/s2m;
}
}
else if(asml > Scalar(0))
{
if (amed > Scalar(0))
{
abig = std::sqrt(amed);
amed = std::sqrt(asml) / s1m;
}
else
{
return std::sqrt(asml)/s1m;
}
}
else
{
return std::sqrt(amed);
}
asml = std::min(abig, amed);
abig = std::max(abig, amed);
if(asml <= abig*relerr)
return abig;
else
return abig * std::sqrt(Scalar(1) + numext::abs2(asml/abig));
#endif
}
#define BENCH_PERF(NRM) { \
float af = 0; double ad = 0; std::complex<float> ac = 0; \
Eigen::BenchTimer tf, td, tcf; tf.reset(); td.reset(); tcf.reset();\
for (int k=0; k<tries; ++k) { \
tf.start(); \
for (int i=0; i<iters; ++i) { af += NRM(vf); } \
tf.stop(); \
} \
for (int k=0; k<tries; ++k) { \
td.start(); \
for (int i=0; i<iters; ++i) { ad += NRM(vd); } \
td.stop(); \
} \
/*for (int k=0; k<std::max(1,tries/3); ++k) { \
tcf.start(); \
for (int i=0; i<iters; ++i) { ac += NRM(vcf); } \
tcf.stop(); \
} */\
std::cout << #NRM << "\t" << tf.value() << " " << td.value() << " " << tcf.value() << "\n"; \
}
void check_accuracy(double basef, double based, int s)
{
double yf = basef * std::abs(internal::random<double>());
double yd = based * std::abs(internal::random<double>());
VectorXf vf = VectorXf::Ones(s) * yf;
VectorXd vd = VectorXd::Ones(s) * yd;
std::cout << "reference\t" << std::sqrt(double(s))*yf << "\t" << std::sqrt(double(s))*yd << "\n";
std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\n";
std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\n";
std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\n";
std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\n";
std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\n";
std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\n";
std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\n";
}
void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s)
{
VectorXf vf(s);
VectorXd vd(s);
for (int i=0; i<s; ++i)
{
vf[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ef0,ef1));
vd[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ed0,ed1));
}
//std::cout << "reference\t" << internal::sqrt(double(s))*yf << "\t" << internal::sqrt(double(s))*yd << "\n";
std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\t" << sqsumNorm(vf.cast<long double>()) << "\t" << sqsumNorm(vd.cast<long double>()) << "\n";
std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\t" << hypotNorm(vf.cast<long double>()) << "\t" << hypotNorm(vd.cast<long double>()) << "\n";
std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\t" << blueNorm(vf.cast<long double>()) << "\t" << blueNorm(vd.cast<long double>()) << "\n";
std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\t" << blueNorm(vf.cast<long double>()) << "\t" << blueNorm(vd.cast<long double>()) << "\n";
std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\t" << lapackNorm(vf.cast<long double>()) << "\t" << lapackNorm(vd.cast<long double>()) << "\n";
std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\t" << twopassNorm(vf.cast<long double>()) << "\t" << twopassNorm(vd.cast<long double>()) << "\n";
// std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\t" << bl2passNorm(vf.cast<long double>()) << "\t" << bl2passNorm(vd.cast<long double>()) << "\n";
}
int main(int argc, char** argv)
{
int tries = 10;
int iters = 100000;
double y = 1.1345743233455785456788e12 * internal::random<double>();
VectorXf v = VectorXf::Ones(1024) * y;
// return 0;
int s = 10000;
double basef_ok = 1.1345743233455785456788e15;
double based_ok = 1.1345743233455785456788e95;
double basef_under = 1.1345743233455785456788e-27;
double based_under = 1.1345743233455785456788e-303;
double basef_over = 1.1345743233455785456788e+27;
double based_over = 1.1345743233455785456788e+302;
std::cout.precision(20);
std::cerr << "\nNo under/overflow:\n";
check_accuracy(basef_ok, based_ok, s);
std::cerr << "\nUnderflow:\n";
check_accuracy(basef_under, based_under, s);
std::cerr << "\nOverflow:\n";
check_accuracy(basef_over, based_over, s);
std::cerr << "\nVarying (over):\n";
for (int k=0; k<1; ++k)
{
check_accuracy_var(20,27,190,302,s);
std::cout << "\n";
}
std::cerr << "\nVarying (under):\n";
for (int k=0; k<1; ++k)
{
check_accuracy_var(-27,20,-302,-190,s);
std::cout << "\n";
}
y = 1;
std::cout.precision(4);
int s1 = 1024*1024*32;
std::cerr << "Performance (out of cache, " << s1 << "):\n";
{
int iters = 1;
VectorXf vf = VectorXf::Random(s1) * y;
VectorXd vd = VectorXd::Random(s1) * y;
VectorXcf vcf = VectorXcf::Random(s1) * y;
BENCH_PERF(sqsumNorm);
BENCH_PERF(stableNorm);
BENCH_PERF(blueNorm);
BENCH_PERF(pblueNorm);
BENCH_PERF(lapackNorm);
BENCH_PERF(hypotNorm);
BENCH_PERF(twopassNorm);
BENCH_PERF(bl2passNorm);
}
std::cerr << "\nPerformance (in cache, " << 512 << "):\n";
{
int iters = 100000;
VectorXf vf = VectorXf::Random(512) * y;
VectorXd vd = VectorXd::Random(512) * y;
VectorXcf vcf = VectorXcf::Random(512) * y;
BENCH_PERF(sqsumNorm);
BENCH_PERF(stableNorm);
BENCH_PERF(blueNorm);
BENCH_PERF(pblueNorm);
BENCH_PERF(lapackNorm);
BENCH_PERF(hypotNorm);
BENCH_PERF(twopassNorm);
BENCH_PERF(bl2passNorm);
}
}

View File

@@ -0,0 +1,84 @@
#include <iostream>
#include <Eigen/Core>
#include <bench/BenchUtil.h>
using namespace Eigen;
#ifndef REPEAT
#define REPEAT 100000
#endif
#ifndef TRIES
#define TRIES 20
#endif
typedef double Scalar;
template <typename MatrixType>
__attribute__ ((noinline)) void bench_reverse(const MatrixType& m)
{
int rows = m.rows();
int cols = m.cols();
int size = m.size();
int repeats = (REPEAT*1000)/size;
MatrixType a = MatrixType::Random(rows,cols);
MatrixType b = MatrixType::Random(rows,cols);
BenchTimer timerB, timerH, timerV;
Scalar acc = 0;
int r = internal::random<int>(0,rows-1);
int c = internal::random<int>(0,cols-1);
for (int t=0; t<TRIES; ++t)
{
timerB.start();
for (int k=0; k<repeats; ++k)
{
asm("#begin foo");
b = a.reverse();
asm("#end foo");
acc += b.coeff(r,c);
}
timerB.stop();
}
if (MatrixType::RowsAtCompileTime==Dynamic)
std::cout << "dyn ";
else
std::cout << "fixed ";
std::cout << rows << " x " << cols << " \t"
<< (timerB.value() * REPEAT) / repeats << "s "
<< "(" << 1e-6 * size*repeats/timerB.value() << " MFLOPS)\t";
std::cout << "\n";
// make sure the compiler does not optimize too much
if (acc==123)
std::cout << acc;
}
int main(int argc, char* argv[])
{
const int dynsizes[] = {4,6,8,16,24,32,49,64,128,256,512,900,0};
std::cout << "size no sqrt standard";
// #ifdef BENCH_GSL
// std::cout << " GSL (standard + double + ATLAS) ";
// #endif
std::cout << "\n";
for (uint i=0; dynsizes[i]>0; ++i)
{
bench_reverse(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
bench_reverse(Matrix<Scalar,Dynamic,1>(dynsizes[i]*dynsizes[i]));
}
// bench_reverse(Matrix<Scalar,2,2>());
// bench_reverse(Matrix<Scalar,3,3>());
// bench_reverse(Matrix<Scalar,4,4>());
// bench_reverse(Matrix<Scalar,5,5>());
// bench_reverse(Matrix<Scalar,6,6>());
// bench_reverse(Matrix<Scalar,7,7>());
// bench_reverse(Matrix<Scalar,8,8>());
// bench_reverse(Matrix<Scalar,12,12>());
// bench_reverse(Matrix<Scalar,16,16>());
return 0;
}

View File

@@ -0,0 +1,18 @@
#include <iostream>
#include <Eigen/Core>
using namespace Eigen;
using namespace std;
int main()
{
typedef Matrix<SCALAR,Eigen::Dynamic,1> Vec;
Vec v(SIZE);
v.setZero();
v[0] = 1;
v[1] = 2;
for(int i = 0; i < 1000000; i++)
{
v.coeffRef(0) += v.sum() * SCALAR(1e-20);
}
cout << v.sum() << endl;
}

View File

@@ -0,0 +1,12 @@
#!/bin/bash
# gcc : CXX="g++ -finline-limit=10000 -ftemplate-depth-2000 --param max-inline-recursive-depth=2000"
# icc : CXX="icpc -fast -no-inline-max-size -fno-exceptions"
CXX=${CXX-g++ -finline-limit=10000 -ftemplate-depth-2000 --param max-inline-recursive-depth=2000} # default value
for ((i=1; i<16; ++i)); do
echo "Matrix size: $i x $i :"
$CXX -O3 -I.. -DNDEBUG benchmark.cpp -DMATSIZE=$i -DEIGEN_UNROLLING_LIMIT=400 -o benchmark && time ./benchmark >/dev/null
$CXX -O3 -I.. -DNDEBUG -finline-limit=10000 benchmark.cpp -DMATSIZE=$i -DEIGEN_DONT_USE_UNROLLED_LOOPS=1 -o benchmark && time ./benchmark >/dev/null
echo " "
done

View File

@@ -0,0 +1,677 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include <iostream>
#include <cstdint>
#include <cstdlib>
#include <vector>
#include <fstream>
#include <memory>
#include <cstdio>
bool eigen_use_specific_block_size;
int eigen_block_size_k, eigen_block_size_m, eigen_block_size_n;
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZES eigen_use_specific_block_size
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K eigen_block_size_k
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M eigen_block_size_m
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N eigen_block_size_n
#include <Eigen/Core>
#include <bench/BenchTimer.h>
using namespace Eigen;
using namespace std;
static BenchTimer timer;
// how many times we repeat each measurement.
// measurements are randomly shuffled - we're not doing
// all N identical measurements in a row.
const int measurement_repetitions = 3;
// Timings below this value are too short to be accurate,
// we'll repeat measurements with more iterations until
// we get a timing above that threshold.
const float min_accurate_time = 1e-2f;
// See --min-working-set-size command line parameter.
size_t min_working_set_size = 0;
float max_clock_speed = 0.0f;
// range of sizes that we will benchmark (in all 3 K,M,N dimensions)
const size_t maxsize = 2048;
const size_t minsize = 16;
typedef MatrixXf MatrixType;
typedef MatrixType::Scalar Scalar;
typedef internal::packet_traits<Scalar>::type Packet;
static_assert((maxsize & (maxsize - 1)) == 0, "maxsize must be a power of two");
static_assert((minsize & (minsize - 1)) == 0, "minsize must be a power of two");
static_assert(maxsize > minsize, "maxsize must be larger than minsize");
static_assert(maxsize < (minsize << 16), "maxsize must be less than (minsize<<16)");
// just a helper to store a triple of K,M,N sizes for matrix product
struct size_triple_t
{
size_t k, m, n;
size_triple_t() : k(0), m(0), n(0) {}
size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {}
size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {}
size_triple_t(uint16_t compact)
{
k = 1 << ((compact & 0xf00) >> 8);
m = 1 << ((compact & 0x0f0) >> 4);
n = 1 << ((compact & 0x00f) >> 0);
}
};
uint8_t log2_pot(size_t x) {
size_t l = 0;
while (x >>= 1) l++;
return l;
}
// Convert between size tripes and a compact form fitting in 12 bits
// where each size, which must be a POT, is encoded as its log2, on 4 bits
// so the largest representable size is 2^15 == 32k ... big enough.
uint16_t compact_size_triple(size_t k, size_t m, size_t n)
{
return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n);
}
uint16_t compact_size_triple(const size_triple_t& t)
{
return compact_size_triple(t.k, t.m, t.n);
}
// A single benchmark. Initially only contains benchmark params.
// Then call run(), which stores the result in the gflops field.
struct benchmark_t
{
uint16_t compact_product_size;
uint16_t compact_block_size;
bool use_default_block_size;
float gflops;
benchmark_t()
: compact_product_size(0)
, compact_block_size(0)
, use_default_block_size(false)
, gflops(0)
{
}
benchmark_t(size_t pk, size_t pm, size_t pn,
size_t bk, size_t bm, size_t bn)
: compact_product_size(compact_size_triple(pk, pm, pn))
, compact_block_size(compact_size_triple(bk, bm, bn))
, use_default_block_size(false)
, gflops(0)
{}
benchmark_t(size_t pk, size_t pm, size_t pn)
: compact_product_size(compact_size_triple(pk, pm, pn))
, compact_block_size(0)
, use_default_block_size(true)
, gflops(0)
{}
void run();
};
ostream& operator<<(ostream& s, const benchmark_t& b)
{
s << hex << b.compact_product_size << dec;
if (b.use_default_block_size) {
size_triple_t t(b.compact_product_size);
Index k = t.k, m = t.m, n = t.n;
internal::computeProductBlockingSizes<Scalar, Scalar>(k, m, n);
s << " default(" << k << ", " << m << ", " << n << ")";
} else {
s << " " << hex << b.compact_block_size << dec;
}
s << " " << b.gflops;
return s;
}
// We sort first by increasing benchmark parameters,
// then by decreasing performance.
bool operator<(const benchmark_t& b1, const benchmark_t& b2)
{
return b1.compact_product_size < b2.compact_product_size ||
(b1.compact_product_size == b2.compact_product_size && (
(b1.compact_block_size < b2.compact_block_size || (
b1.compact_block_size == b2.compact_block_size &&
b1.gflops > b2.gflops))));
}
void benchmark_t::run()
{
size_triple_t productsizes(compact_product_size);
if (use_default_block_size) {
eigen_use_specific_block_size = false;
} else {
// feed eigen with our custom blocking params
eigen_use_specific_block_size = true;
size_triple_t blocksizes(compact_block_size);
eigen_block_size_k = blocksizes.k;
eigen_block_size_m = blocksizes.m;
eigen_block_size_n = blocksizes.n;
}
// set up the matrix pool
const size_t combined_three_matrices_sizes =
sizeof(Scalar) *
(productsizes.k * productsizes.m +
productsizes.k * productsizes.n +
productsizes.m * productsizes.n);
// 64 M is large enough that nobody has a cache bigger than that,
// while still being small enough that everybody has this much RAM,
// so conveniently we don't need to special-case platforms here.
const size_t unlikely_large_cache_size = 64 << 20;
const size_t working_set_size =
min_working_set_size ? min_working_set_size : unlikely_large_cache_size;
const size_t matrix_pool_size =
1 + working_set_size / combined_three_matrices_sizes;
MatrixType *lhs = new MatrixType[matrix_pool_size];
MatrixType *rhs = new MatrixType[matrix_pool_size];
MatrixType *dst = new MatrixType[matrix_pool_size];
for (size_t i = 0; i < matrix_pool_size; i++) {
lhs[i] = MatrixType::Zero(productsizes.m, productsizes.k);
rhs[i] = MatrixType::Zero(productsizes.k, productsizes.n);
dst[i] = MatrixType::Zero(productsizes.m, productsizes.n);
}
// main benchmark loop
int iters_at_a_time = 1;
float time_per_iter = 0.0f;
size_t matrix_index = 0;
while (true) {
double starttime = timer.getCpuTime();
for (int i = 0; i < iters_at_a_time; i++) {
dst[matrix_index].noalias() = lhs[matrix_index] * rhs[matrix_index];
matrix_index++;
if (matrix_index == matrix_pool_size) {
matrix_index = 0;
}
}
double endtime = timer.getCpuTime();
const float timing = float(endtime - starttime);
if (timing >= min_accurate_time) {
time_per_iter = timing / iters_at_a_time;
break;
}
iters_at_a_time *= 2;
}
delete[] lhs;
delete[] rhs;
delete[] dst;
gflops = 2e-9 * productsizes.k * productsizes.m * productsizes.n / time_per_iter;
}
void print_cpuinfo()
{
#ifdef __linux__
cout << "contents of /proc/cpuinfo:" << endl;
string line;
ifstream cpuinfo("/proc/cpuinfo");
if (cpuinfo.is_open()) {
while (getline(cpuinfo, line)) {
cout << line << endl;
}
cpuinfo.close();
}
cout << endl;
#elif defined __APPLE__
cout << "output of sysctl hw:" << endl;
system("sysctl hw");
cout << endl;
#endif
}
template <typename T>
string type_name()
{
return "unknown";
}
template<>
string type_name<float>()
{
return "float";
}
template<>
string type_name<double>()
{
return "double";
}
struct action_t
{
virtual const char* invokation_name() const { abort(); return nullptr; }
virtual void run() const { abort(); }
virtual ~action_t() {}
};
void show_usage_and_exit(int /*argc*/, char* argv[],
const vector<unique_ptr<action_t>>& available_actions)
{
cerr << "usage: " << argv[0] << " <action> [options...]" << endl << endl;
cerr << "available actions:" << endl << endl;
for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
cerr << " " << (*it)->invokation_name() << endl;
}
cerr << endl;
cerr << "options:" << endl << endl;
cerr << " --min-working-set-size=N:" << endl;
cerr << " Set the minimum working set size to N bytes." << endl;
cerr << " This is rounded up as needed to a multiple of matrix size." << endl;
cerr << " A larger working set lowers the chance of a warm cache." << endl;
cerr << " The default value 0 means use a large enough working" << endl;
cerr << " set to likely outsize caches." << endl;
cerr << " A value of 1 (that is, 1 byte) would mean don't do anything to" << endl;
cerr << " avoid warm caches." << endl;
exit(1);
}
float measure_clock_speed()
{
cerr << "Measuring clock speed... \r" << flush;
vector<float> all_gflops;
for (int i = 0; i < 8; i++) {
benchmark_t b(1024, 1024, 1024);
b.run();
all_gflops.push_back(b.gflops);
}
sort(all_gflops.begin(), all_gflops.end());
float stable_estimate = all_gflops[2] + all_gflops[3] + all_gflops[4] + all_gflops[5];
// multiply by an arbitrary constant to discourage trying doing anything with the
// returned values besides just comparing them with each other.
float result = stable_estimate * 123.456f;
return result;
}
struct human_duration_t
{
int seconds;
human_duration_t(int s) : seconds(s) {}
};
ostream& operator<<(ostream& s, const human_duration_t& d)
{
int remainder = d.seconds;
if (remainder > 3600) {
int hours = remainder / 3600;
s << hours << " h ";
remainder -= hours * 3600;
}
if (remainder > 60) {
int minutes = remainder / 60;
s << minutes << " min ";
remainder -= minutes * 60;
}
if (d.seconds < 600) {
s << remainder << " s";
}
return s;
}
const char session_filename[] = "/data/local/tmp/benchmark-blocking-sizes-session.data";
void serialize_benchmarks(const char* filename, const vector<benchmark_t>& benchmarks, size_t first_benchmark_to_run)
{
FILE* file = fopen(filename, "w");
if (!file) {
cerr << "Could not open file " << filename << " for writing." << endl;
cerr << "Do you have write permissions on the current working directory?" << endl;
exit(1);
}
size_t benchmarks_vector_size = benchmarks.size();
fwrite(&max_clock_speed, sizeof(max_clock_speed), 1, file);
fwrite(&benchmarks_vector_size, sizeof(benchmarks_vector_size), 1, file);
fwrite(&first_benchmark_to_run, sizeof(first_benchmark_to_run), 1, file);
fwrite(benchmarks.data(), sizeof(benchmark_t), benchmarks.size(), file);
fclose(file);
}
bool deserialize_benchmarks(const char* filename, vector<benchmark_t>& benchmarks, size_t& first_benchmark_to_run)
{
FILE* file = fopen(filename, "r");
if (!file) {
return false;
}
if (1 != fread(&max_clock_speed, sizeof(max_clock_speed), 1, file)) {
return false;
}
size_t benchmarks_vector_size = 0;
if (1 != fread(&benchmarks_vector_size, sizeof(benchmarks_vector_size), 1, file)) {
return false;
}
if (1 != fread(&first_benchmark_to_run, sizeof(first_benchmark_to_run), 1, file)) {
return false;
}
benchmarks.resize(benchmarks_vector_size);
if (benchmarks.size() != fread(benchmarks.data(), sizeof(benchmark_t), benchmarks.size(), file)) {
return false;
}
unlink(filename);
return true;
}
void try_run_some_benchmarks(
vector<benchmark_t>& benchmarks,
double time_start,
size_t& first_benchmark_to_run)
{
if (first_benchmark_to_run == benchmarks.size()) {
return;
}
double time_last_progress_update = 0;
double time_last_clock_speed_measurement = 0;
double time_now = 0;
size_t benchmark_index = first_benchmark_to_run;
while (true) {
float ratio_done = float(benchmark_index) / benchmarks.size();
time_now = timer.getRealTime();
// We check clock speed every minute and at the end.
if (benchmark_index == benchmarks.size() ||
time_now > time_last_clock_speed_measurement + 60.0f)
{
time_last_clock_speed_measurement = time_now;
// Ensure that clock speed is as expected
float current_clock_speed = measure_clock_speed();
// The tolerance needs to be smaller than the relative difference between
// clock speeds that a device could operate under.
// It seems unlikely that a device would be throttling clock speeds by
// amounts smaller than 2%.
// With a value of 1%, I was getting within noise on a Sandy Bridge.
const float clock_speed_tolerance = 0.02f;
if (current_clock_speed > (1 + clock_speed_tolerance) * max_clock_speed) {
// Clock speed is now higher than we previously measured.
// Either our initial measurement was inaccurate, which won't happen
// too many times as we are keeping the best clock speed value and
// and allowing some tolerance; or something really weird happened,
// which invalidates all benchmark results collected so far.
// Either way, we better restart all over again now.
if (benchmark_index) {
cerr << "Restarting at " << 100.0f * ratio_done
<< " % because clock speed increased. " << endl;
}
max_clock_speed = current_clock_speed;
first_benchmark_to_run = 0;
return;
}
bool rerun_last_tests = false;
if (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) {
cerr << "Measurements completed so far: "
<< 100.0f * ratio_done
<< " % " << endl;
cerr << "Clock speed seems to be only "
<< current_clock_speed/max_clock_speed
<< " times what it used to be." << endl;
unsigned int seconds_to_sleep_if_lower_clock_speed = 1;
while (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) {
if (seconds_to_sleep_if_lower_clock_speed > 32) {
cerr << "Sleeping longer probably won't make a difference." << endl;
cerr << "Serializing benchmarks to " << session_filename << endl;
serialize_benchmarks(session_filename, benchmarks, first_benchmark_to_run);
cerr << "Now restart this benchmark, and it should pick up where we left." << endl;
exit(2);
}
rerun_last_tests = true;
cerr << "Sleeping "
<< seconds_to_sleep_if_lower_clock_speed
<< " s... \r" << endl;
sleep(seconds_to_sleep_if_lower_clock_speed);
current_clock_speed = measure_clock_speed();
seconds_to_sleep_if_lower_clock_speed *= 2;
}
}
if (rerun_last_tests) {
cerr << "Redoing the last "
<< 100.0f * float(benchmark_index - first_benchmark_to_run) / benchmarks.size()
<< " % because clock speed had been low. " << endl;
return;
}
// nothing wrong with the clock speed so far, so there won't be a need to rerun
// benchmarks run so far in case we later encounter a lower clock speed.
first_benchmark_to_run = benchmark_index;
}
if (benchmark_index == benchmarks.size()) {
// We're done!
first_benchmark_to_run = benchmarks.size();
// Erase progress info
cerr << " " << endl;
return;
}
// Display progress info on stderr
if (time_now > time_last_progress_update + 1.0f) {
time_last_progress_update = time_now;
cerr << "Measurements... " << 100.0f * ratio_done
<< " %, ETA "
<< human_duration_t(float(time_now - time_start) * (1.0f - ratio_done) / ratio_done)
<< " \r" << flush;
}
// This is where we actually run a benchmark!
benchmarks[benchmark_index].run();
benchmark_index++;
}
}
void run_benchmarks(vector<benchmark_t>& benchmarks)
{
size_t first_benchmark_to_run;
vector<benchmark_t> deserialized_benchmarks;
bool use_deserialized_benchmarks = false;
if (deserialize_benchmarks(session_filename, deserialized_benchmarks, first_benchmark_to_run)) {
cerr << "Found serialized session with "
<< 100.0f * first_benchmark_to_run / deserialized_benchmarks.size()
<< " % already done" << endl;
if (deserialized_benchmarks.size() == benchmarks.size() &&
first_benchmark_to_run > 0 &&
first_benchmark_to_run < benchmarks.size())
{
use_deserialized_benchmarks = true;
}
}
if (use_deserialized_benchmarks) {
benchmarks = deserialized_benchmarks;
} else {
// not using deserialized benchmarks, starting from scratch
first_benchmark_to_run = 0;
// Randomly shuffling benchmarks allows us to get accurate enough progress info,
// as now the cheap/expensive benchmarks are randomly mixed so they average out.
// It also means that if data is corrupted for some time span, the odds are that
// not all repetitions of a given benchmark will be corrupted.
random_shuffle(benchmarks.begin(), benchmarks.end());
}
for (int i = 0; i < 4; i++) {
max_clock_speed = max(max_clock_speed, measure_clock_speed());
}
double time_start = 0.0;
while (first_benchmark_to_run < benchmarks.size()) {
if (first_benchmark_to_run == 0) {
time_start = timer.getRealTime();
}
try_run_some_benchmarks(benchmarks,
time_start,
first_benchmark_to_run);
}
// Sort timings by increasing benchmark parameters, and decreasing gflops.
// The latter is very important. It means that we can ignore all but the first
// benchmark with given parameters.
sort(benchmarks.begin(), benchmarks.end());
// Collect best (i.e. now first) results for each parameter values.
vector<benchmark_t> best_benchmarks;
for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) {
if (best_benchmarks.empty() ||
best_benchmarks.back().compact_product_size != it->compact_product_size ||
best_benchmarks.back().compact_block_size != it->compact_block_size)
{
best_benchmarks.push_back(*it);
}
}
// keep and return only the best benchmarks
benchmarks = best_benchmarks;
}
struct measure_all_pot_sizes_action_t : action_t
{
virtual const char* invokation_name() const { return "all-pot-sizes"; }
virtual void run() const
{
vector<benchmark_t> benchmarks;
for (int repetition = 0; repetition < measurement_repetitions; repetition++) {
for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) {
for (size_t msize = minsize; msize <= maxsize; msize *= 2) {
for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) {
for (size_t kblock = minsize; kblock <= ksize; kblock *= 2) {
for (size_t mblock = minsize; mblock <= msize; mblock *= 2) {
for (size_t nblock = minsize; nblock <= nsize; nblock *= 2) {
benchmarks.emplace_back(ksize, msize, nsize, kblock, mblock, nblock);
}
}
}
}
}
}
}
run_benchmarks(benchmarks);
cout << "BEGIN MEASUREMENTS ALL POT SIZES" << endl;
for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) {
cout << *it << endl;
}
}
};
struct measure_default_sizes_action_t : action_t
{
virtual const char* invokation_name() const { return "default-sizes"; }
virtual void run() const
{
vector<benchmark_t> benchmarks;
for (int repetition = 0; repetition < measurement_repetitions; repetition++) {
for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) {
for (size_t msize = minsize; msize <= maxsize; msize *= 2) {
for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) {
benchmarks.emplace_back(ksize, msize, nsize);
}
}
}
}
run_benchmarks(benchmarks);
cout << "BEGIN MEASUREMENTS DEFAULT SIZES" << endl;
for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) {
cout << *it << endl;
}
}
};
int main(int argc, char* argv[])
{
double time_start = timer.getRealTime();
cout.precision(4);
cerr.precision(4);
vector<unique_ptr<action_t>> available_actions;
available_actions.emplace_back(new measure_all_pot_sizes_action_t);
available_actions.emplace_back(new measure_default_sizes_action_t);
auto action = available_actions.end();
if (argc <= 1) {
show_usage_and_exit(argc, argv, available_actions);
}
for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
if (!strcmp(argv[1], (*it)->invokation_name())) {
action = it;
break;
}
}
if (action == available_actions.end()) {
show_usage_and_exit(argc, argv, available_actions);
}
for (int i = 2; i < argc; i++) {
if (argv[i] == strstr(argv[i], "--min-working-set-size=")) {
const char* equals_sign = strchr(argv[i], '=');
min_working_set_size = strtoul(equals_sign+1, nullptr, 10);
} else {
cerr << "unrecognized option: " << argv[i] << endl << endl;
show_usage_and_exit(argc, argv, available_actions);
}
}
print_cpuinfo();
cout << "benchmark parameters:" << endl;
cout << "pointer size: " << 8*sizeof(void*) << " bits" << endl;
cout << "scalar type: " << type_name<Scalar>() << endl;
cout << "packet size: " << internal::packet_traits<MatrixType::Scalar>::size << endl;
cout << "minsize = " << minsize << endl;
cout << "maxsize = " << maxsize << endl;
cout << "measurement_repetitions = " << measurement_repetitions << endl;
cout << "min_accurate_time = " << min_accurate_time << endl;
cout << "min_working_set_size = " << min_working_set_size;
if (min_working_set_size == 0) {
cout << " (try to outsize caches)";
}
cout << endl << endl;
(*action)->run();
double time_end = timer.getRealTime();
cerr << "Finished in " << human_duration_t(time_end - time_start) << endl;
}

View File

@@ -0,0 +1,39 @@
// g++ -O3 -DNDEBUG -DMATSIZE=<x> benchmark.cpp -o benchmark && time ./benchmark
#include <iostream>
#include <Eigen/Core>
#ifndef MATSIZE
#define MATSIZE 3
#endif
using namespace std;
using namespace Eigen;
#ifndef REPEAT
#define REPEAT 40000000
#endif
#ifndef SCALAR
#define SCALAR double
#endif
int main(int argc, char *argv[])
{
Matrix<SCALAR,MATSIZE,MATSIZE> I = Matrix<SCALAR,MATSIZE,MATSIZE>::Ones();
Matrix<SCALAR,MATSIZE,MATSIZE> m;
for(int i = 0; i < MATSIZE; i++)
for(int j = 0; j < MATSIZE; j++)
{
m(i,j) = (i+MATSIZE*j);
}
asm("#begin");
for(int a = 0; a < REPEAT; a++)
{
m = Matrix<SCALAR,MATSIZE,MATSIZE>::Ones() + 0.00005 * (m + (m*m));
}
asm("#end");
cout << m << endl;
return 0;
}

View File

@@ -0,0 +1,38 @@
// g++ -O3 -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX
#include <iostream>
#include <Eigen/Core>
using namespace std;
using namespace Eigen;
#ifndef REPEAT
#define REPEAT 10000
#endif
#ifndef SCALAR
#define SCALAR float
#endif
int main(int argc, char *argv[])
{
typedef Matrix<SCALAR, Eigen::Dynamic, Eigen::Dynamic> Mat;
Mat m(100, 100);
m.setRandom();
for(int a = 0; a < REPEAT; a++)
{
int r, c, nr, nc;
r = Eigen::internal::random<int>(0,10);
c = Eigen::internal::random<int>(0,10);
nr = Eigen::internal::random<int>(50,80);
nc = Eigen::internal::random<int>(50,80);
m.block(r,c,nr,nc) += Mat::Ones(nr,nc);
m.block(r,c,nr,nc) *= SCALAR(10);
m.block(r,c,nr,nc) -= Mat::constant(nr,nc,10);
m.block(r,c,nr,nc) /= SCALAR(10);
}
cout << m[0] << endl;
return 0;
}

View File

@@ -0,0 +1,36 @@
// g++ -fopenmp -I .. -O3 -DNDEBUG -finline-limit=1000 benchmarkX.cpp -o b && time ./b
#include <iostream>
#include <Eigen/Core>
using namespace std;
using namespace Eigen;
#ifndef MATTYPE
#define MATTYPE MatrixXLd
#endif
#ifndef MATSIZE
#define MATSIZE 400
#endif
#ifndef REPEAT
#define REPEAT 100
#endif
int main(int argc, char *argv[])
{
MATTYPE I = MATTYPE::Ones(MATSIZE,MATSIZE);
MATTYPE m(MATSIZE,MATSIZE);
for(int i = 0; i < MATSIZE; i++) for(int j = 0; j < MATSIZE; j++)
{
m(i,j) = (i+j+1)/(MATSIZE*MATSIZE);
}
for(int a = 0; a < REPEAT; a++)
{
m = I + 0.0001 * (m + m*m);
}
cout << m(0,0) << endl;
return 0;
}

View File

@@ -0,0 +1,35 @@
// g++ -O3 -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX
#include <iostream>
#include <Eigen/Core>
using namespace std;
using namespace Eigen;
#ifndef VECTYPE
#define VECTYPE VectorXLd
#endif
#ifndef VECSIZE
#define VECSIZE 1000000
#endif
#ifndef REPEAT
#define REPEAT 1000
#endif
int main(int argc, char *argv[])
{
VECTYPE I = VECTYPE::Ones(VECSIZE);
VECTYPE m(VECSIZE,1);
for(int i = 0; i < VECSIZE; i++)
{
m[i] = 0.1 * i/VECSIZE;
}
for(int a = 0; a < REPEAT; a++)
{
m = VECTYPE::Ones(VECSIZE) + 0.00005 * (m.cwise().square() + m/4);
}
cout << m[0] << endl;
return 0;
}

View File

@@ -0,0 +1,18 @@
#!/bin/bash
CXX=${CXX-g++} # default value unless caller has defined CXX
echo "Fixed size 3x3, column-major, -DNDEBUG"
$CXX -O3 -I .. -DNDEBUG benchmark.cpp -o benchmark && time ./benchmark >/dev/null
echo "Fixed size 3x3, column-major, with asserts"
$CXX -O3 -I .. benchmark.cpp -o benchmark && time ./benchmark >/dev/null
echo "Fixed size 3x3, row-major, -DNDEBUG"
$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR -DNDEBUG benchmark.cpp -o benchmark && time ./benchmark >/dev/null
echo "Fixed size 3x3, row-major, with asserts"
$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR benchmark.cpp -o benchmark && time ./benchmark >/dev/null
echo "Dynamic size 20x20, column-major, -DNDEBUG"
$CXX -O3 -I .. -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null
echo "Dynamic size 20x20, column-major, with asserts"
$CXX -O3 -I .. benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null
echo "Dynamic size 20x20, row-major, -DNDEBUG"
$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null
echo "Dynamic size 20x20, row-major, with asserts"
$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null

View File

@@ -0,0 +1,107 @@
PROJECT(BTL)
CMAKE_MINIMUM_REQUIRED(VERSION 2.6.2)
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake ${Eigen_SOURCE_DIR}/cmake)
include(MacroOptionalAddSubdirectory)
OPTION(BTL_NOVEC "Disable SSE/Altivec optimizations when possible" OFF)
SET(CMAKE_INCLUDE_CURRENT_DIR ON)
string(REGEX MATCH icpc IS_ICPC ${CMAKE_CXX_COMPILER})
IF(CMAKE_COMPILER_IS_GNUCXX OR IS_ICPC)
SET(CMAKE_CXX_FLAGS "-g0 -O3 -DNDEBUG ${CMAKE_CXX_FLAGS}")
SET(CMAKE_Fortran_FLAGS "-g0 -O3 -DNDEBUG ${CMAKE_Fortran_FLAGS}")
IF(BTL_NOVEC)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_DONT_VECTORIZE")
ENDIF(BTL_NOVEC)
ENDIF(CMAKE_COMPILER_IS_GNUCXX OR IS_ICPC)
IF(MSVC)
SET(CMAKE_CXX_FLAGS " /O2 /Ot /GL /fp:fast -DNDEBUG")
# SET(CMAKE_Fortran_FLAGS "-g0 -O3 -DNDEBUG")
IF(BTL_NOVEC)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_DONT_VECTORIZE")
ENDIF(BTL_NOVEC)
ENDIF(MSVC)
if(IS_ICPC)
set(CMAKE_CXX_FLAGS "-fast ${CMAKE_CXX_FLAGS}")
set(CMAKE_Fortran_FLAGS "-fast ${CMAKE_Fortran_FLAGS}")
endif(IS_ICPC)
include_directories(
${PROJECT_SOURCE_DIR}/actions
${PROJECT_SOURCE_DIR}/generic_bench
${PROJECT_SOURCE_DIR}/generic_bench/utils
${PROJECT_SOURCE_DIR}/libs/STL)
# find_package(MKL)
# if (MKL_FOUND)
# add_definitions(-DHAVE_MKL)
# set(DEFAULT_LIBRARIES ${MKL_LIBRARIES})
# endif (MKL_FOUND)
find_library(EIGEN_BTL_RT_LIBRARY rt)
# if we cannot find it easily, then we don't need it!
if(NOT EIGEN_BTL_RT_LIBRARY)
set(EIGEN_BTL_RT_LIBRARY "")
endif()
MACRO(BTL_ADD_BENCH targetname)
foreach(_current_var ${ARGN})
set(_last_var ${_current_var})
endforeach(_current_var)
set(_sources ${ARGN})
list(LENGTH _sources _argn_length)
list(REMOVE_ITEM _sources ON OFF TRUE FALSE)
list(LENGTH _sources _src_length)
if (${_argn_length} EQUAL ${_src_length})
set(_last_var ON)
endif (${_argn_length} EQUAL ${_src_length})
OPTION(BUILD_${targetname} "Build benchmark ${targetname}" ${_last_var})
IF(BUILD_${targetname})
ADD_EXECUTABLE(${targetname} ${_sources})
ADD_TEST(${targetname} "${targetname}")
target_link_libraries(${targetname} ${DEFAULT_LIBRARIES} ${EIGEN_BTL_RT_LIBRARY})
ENDIF(BUILD_${targetname})
ENDMACRO(BTL_ADD_BENCH)
macro(btl_add_target_property target prop value)
if(BUILD_${target})
get_target_property(previous ${target} ${prop})
if(NOT previous)
set(previous "")
endif()
set_target_properties(${target} PROPERTIES ${prop} "${previous} ${value}")
endif()
endmacro(btl_add_target_property)
ENABLE_TESTING()
add_subdirectory(libs/eigen3)
add_subdirectory(libs/eigen2)
add_subdirectory(libs/tensors)
add_subdirectory(libs/BLAS)
add_subdirectory(libs/ublas)
add_subdirectory(libs/gmm)
add_subdirectory(libs/mtl4)
add_subdirectory(libs/blitz)
add_subdirectory(libs/tvmet)
add_subdirectory(libs/STL)
add_subdirectory(libs/blaze)
add_subdirectory(data)

View File

@@ -0,0 +1,340 @@
GNU GENERAL PUBLIC LICENSE
Version 2, June 1991
Copyright (C) 1989, 1991 Free Software Foundation, Inc.
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users. This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it. (Some other Free Software Foundation software is covered by
the GNU Library General Public License instead.) You can apply it to
your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.
To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have. You must make sure that they, too, receive or can get the
source code. And you must show them these terms so they know their
rights.
We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.
Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software. If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.
Finally, any free program is threatened constantly by software
patents. We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary. To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.
The precise terms and conditions for copying, distribution and
modification follow.
GNU GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License. The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language. (Hereinafter, translation is included without limitation in
the term "modification".) Each licensee is addressed as "you".
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.
1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.
You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.
2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) You must cause the modified files to carry prominent notices
stating that you changed the files and the date of any change.
b) You must cause any work that you distribute or publish, that in
whole or in part contains or is derived from the Program or any
part thereof, to be licensed as a whole at no charge to all third
parties under the terms of this License.
c) If the modified program normally reads commands interactively
when run, you must cause it, when started running for such
interactive use in the most ordinary way, to print or display an
announcement including an appropriate copyright notice and a
notice that there is no warranty (or else, saying that you provide
a warranty) and that users may redistribute the program under
these conditions, and telling the user how to view a copy of this
License. (Exception: if the Program itself is interactive but
does not normally print such an announcement, your work based on
the Program is not required to print an announcement.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.
In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:
a) Accompany it with the complete corresponding machine-readable
source code, which must be distributed under the terms of Sections
1 and 2 above on a medium customarily used for software interchange; or,
b) Accompany it with a written offer, valid for at least three
years, to give any third party, for a charge no more than your
cost of physically performing source distribution, a complete
machine-readable copy of the corresponding source code, to be
distributed under the terms of Sections 1 and 2 above on a medium
customarily used for software interchange; or,
c) Accompany it with the information you received as to the offer
to distribute corresponding source code. (This alternative is
allowed only for noncommercial distribution and only if you
received the program in object code or executable form with such
an offer, in accord with Subsection b above.)
The source code for a work means the preferred form of the work for
making modifications to it. For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable. However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.
If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.
4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License. Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.
5. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Program or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.
6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.
7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all. For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.
If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded. In such case, this License incorporates
the limitation as if written in the body of this License.
9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time. Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation. If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.
10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission. For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this. Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.
NO WARRANTY
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Also add information on how to contact you by electronic and paper mail.
If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:
Gnomovision version 69, Copyright (C) year name of author
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
This is free software, and you are welcome to redistribute it
under certain conditions; type `show c' for details.
The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License. Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
`Gnomovision' (which makes passes at compilers) written by James Hacker.
<signature of Ty Coon>, 1 April 1989
Ty Coon, President of Vice
This General Public License does not permit incorporating your program into
proprietary programs. If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library. If this is what you want to do, use the GNU Library General
Public License instead of this License.

View File

@@ -0,0 +1,154 @@
Bench Template Library
****************************************
Introduction :
The aim of this project is to compare the performance
of available numerical libraries. The code is designed
as generic and modular as possible. Thus, adding new
numerical libraries or new numerical tests should
require minimal effort.
*****************************************
Installation :
BTL uses cmake / ctest:
1 - create a build directory:
$ mkdir build
$ cd build
2 - configure:
$ ccmake ..
3 - run the bench using ctest:
$ ctest -V
You can run the benchmarks only on libraries matching a given regular expression:
ctest -V -R <regexp>
For instance:
ctest -V -R eigen2
You can also select a given set of actions defining the environment variable BTL_CONFIG this way:
BTL_CONFIG="-a action1{:action2}*" ctest -V
An exemple:
BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata" ctest -V -R eigen2
Finally, if bench results already exist (the bench*.dat files) then they merges by keeping the best for each matrix size. If you want to overwrite the previous ones you can simply add the "--overwrite" option:
BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata --overwrite" ctest -V -R eigen2
4 : Analyze the result. different data files (.dat) are produced in each libs directories.
If gnuplot is available, choose a directory name in the data directory to store the results and type:
$ cd data
$ mkdir my_directory
$ cp ../libs/*/*.dat my_directory
Build the data utilities in this (data) directory
make
Then you can look the raw data,
go_mean my_directory
or smooth the data first :
smooth_all.sh my_directory
go_mean my_directory_smooth
*************************************************
Files and directories :
generic_bench : all the bench sources common to all libraries
actions : sources for different action wrappers (axpy, matrix-matrix product) to be tested.
libs/* : bench sources specific to each tested libraries.
machine_dep : directory used to store machine specific Makefile.in
data : directory used to store gnuplot scripts and data analysis utilities
**************************************************
Principles : the code modularity is achieved by defining two concepts :
****** Action concept : This is a class defining which kind
of test must be performed (e.g. a matrix_vector_product).
An Action should define the following methods :
*** Ctor using the size of the problem (matrix or vector size) as an argument
Action action(size);
*** initialize : this method initialize the calculation (e.g. initialize the matrices and vectors arguments)
action.initialize();
*** calculate : this method actually launch the calculation to be benchmarked
action.calculate;
*** nb_op_base() : this method returns the complexity of the calculate method (allowing the mflops evaluation)
*** name() : this method returns the name of the action (std::string)
****** Interface concept : This is a class or namespace defining how to use a given library and
its specific containers (matrix and vector). Up to now an interface should following types
*** real_type : kind of float to be used (float or double)
*** stl_vector : must correspond to std::vector<real_type>
*** stl_matrix : must correspond to std::vector<stl_vector>
*** gene_vector : the vector type for this interface --> e.g. (real_type *) for the C_interface
*** gene_matrix : the matrix type for this interface --> e.g. (gene_vector *) for the C_interface
+ the following common methods
*** free_matrix(gene_matrix & A, int N) dealocation of a N sized gene_matrix A
*** free_vector(gene_vector & B) dealocation of a N sized gene_vector B
*** matrix_from_stl(gene_matrix & A, stl_matrix & A_stl) copy the content of an stl_matrix A_stl into a gene_matrix A.
The allocation of A is done in this function.
*** vector_to_stl(gene_vector & B, stl_vector & B_stl) copy the content of an stl_vector B_stl into a gene_vector B.
The allocation of B is done in this function.
*** matrix_to_stl(gene_matrix & A, stl_matrix & A_stl) copy the content of an gene_matrix A into an stl_matrix A_stl.
The size of A_STL must corresponds to the size of A.
*** vector_to_stl(gene_vector & A, stl_vector & A_stl) copy the content of an gene_vector A into an stl_vector A_stl.
The size of B_STL must corresponds to the size of B.
*** copy_matrix(gene_matrix & source, gene_matrix & cible, int N) : copy the content of source in cible. Both source
and cible must be sized NxN.
*** copy_vector(gene_vector & source, gene_vector & cible, int N) : copy the content of source in cible. Both source
and cible must be sized N.
and the following method corresponding to the action one wants to be benchmarked :
*** matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
*** matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N)
*** ata_product(const gene_matrix & A, gene_matrix & X, int N)
*** aat_product(const gene_matrix & A, gene_matrix & X, int N)
*** axpy(real coef, const gene_vector & X, gene_vector & Y, int N)
The bench algorithm (generic_bench/bench.hh) is templated with an action itself templated with
an interface. A typical main.cpp source stored in a given library directory libs/A_LIB
looks like :
bench< AN_ACTION < AN_INTERFACE > >( 10 , 1000 , 50 ) ;
this function will produce XY data file containing measured mflops as a function of the size for 50
sizes between 10 and 10000.
This algorithm can be adapted by providing a given Perf_Analyzer object which determines how the time
measurements must be done. For example, the X86_Perf_Analyzer use the asm rdtsc function and provides
a very fast and accurate (but less portable) timing method. The default is the Portable_Perf_Analyzer
so
bench< AN_ACTION < AN_INTERFACE > >( 10 , 1000 , 50 ) ;
is equivalent to
bench< Portable_Perf_Analyzer,AN_ACTION < AN_INTERFACE > >( 10 , 1000 , 50 ) ;
If your system supports it we suggest to use a mixed implementation (X86_perf_Analyzer+Portable_Perf_Analyzer).
replace
bench<Portable_Perf_Analyzer,Action>(size_min,size_max,nb_point);
with
bench<Mixed_Perf_Analyzer,Action>(size_min,size_max,nb_point);
in generic/bench.hh
.

View File

@@ -0,0 +1,145 @@
//=====================================================
// File : action_aat_product.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ACTION_AAT_PRODUCT
#define ACTION_AAT_PRODUCT
#include "utilities.h"
#include "STL_interface.hh"
#include <string>
#include "init/init_function.hh"
#include "init/init_vector.hh"
#include "init/init_matrix.hh"
using namespace std;
template<class Interface>
class Action_aat_product {
public :
// Ctor
Action_aat_product( int size ):_size(size)
{
MESSAGE("Action_aat_product Ctor");
// STL matrix and vector initialization
init_matrix<pseudo_random>(A_stl,_size);
init_matrix<null_function>(X_stl,_size);
init_matrix<null_function>(resu_stl,_size);
// generic matrix and vector initialization
Interface::matrix_from_stl(A_ref,A_stl);
Interface::matrix_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(A,A_stl);
Interface::matrix_from_stl(X,X_stl);
}
// invalidate copy ctor
Action_aat_product( const Action_aat_product & )
{
INFOS("illegal call to Action_aat_product Copy Ctor");
exit(0);
}
// Dtor
~Action_aat_product( void ){
MESSAGE("Action_aat_product Dtor");
// deallocation
Interface::free_matrix(A,_size);
Interface::free_matrix(X,_size);
Interface::free_matrix(A_ref,_size);
Interface::free_matrix(X_ref,_size);
}
// action name
static inline std::string name( void )
{
return "aat_"+Interface::name();
}
double nb_op_base( void ){
return double(_size)*double(_size)*double(_size);
}
inline void initialize( void ){
Interface::copy_matrix(A_ref,A,_size);
Interface::copy_matrix(X_ref,X,_size);
}
inline void calculate( void ) {
Interface::aat_product(A,X,_size);
}
void check_result( void ){
if (_size>128) return;
// calculation check
Interface::matrix_to_stl(X,resu_stl);
STL_interface<typename Interface::real_type>::aat_product(A_stl,X_stl,_size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
if (error>1.e-6){
INFOS("WRONG CALCULATION...residual=" << error);
exit(1);
}
}
private :
typename Interface::stl_matrix A_stl;
typename Interface::stl_matrix X_stl;
typename Interface::stl_matrix resu_stl;
typename Interface::gene_matrix A_ref;
typename Interface::gene_matrix X_ref;
typename Interface::gene_matrix A;
typename Interface::gene_matrix X;
int _size;
};
#endif

View File

@@ -0,0 +1,145 @@
//=====================================================
// File : action_ata_product.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ACTION_ATA_PRODUCT
#define ACTION_ATA_PRODUCT
#include "utilities.h"
#include "STL_interface.hh"
#include <string>
#include "init/init_function.hh"
#include "init/init_vector.hh"
#include "init/init_matrix.hh"
using namespace std;
template<class Interface>
class Action_ata_product {
public :
// Ctor
Action_ata_product( int size ):_size(size)
{
MESSAGE("Action_ata_product Ctor");
// STL matrix and vector initialization
init_matrix<pseudo_random>(A_stl,_size);
init_matrix<null_function>(X_stl,_size);
init_matrix<null_function>(resu_stl,_size);
// generic matrix and vector initialization
Interface::matrix_from_stl(A_ref,A_stl);
Interface::matrix_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(A,A_stl);
Interface::matrix_from_stl(X,X_stl);
}
// invalidate copy ctor
Action_ata_product( const Action_ata_product & )
{
INFOS("illegal call to Action_ata_product Copy Ctor");
exit(0);
}
// Dtor
~Action_ata_product( void ){
MESSAGE("Action_ata_product Dtor");
// deallocation
Interface::free_matrix(A,_size);
Interface::free_matrix(X,_size);
Interface::free_matrix(A_ref,_size);
Interface::free_matrix(X_ref,_size);
}
// action name
static inline std::string name( void )
{
return "ata_"+Interface::name();
}
double nb_op_base( void ){
return 2.0*_size*_size*_size;
}
inline void initialize( void ){
Interface::copy_matrix(A_ref,A,_size);
Interface::copy_matrix(X_ref,X,_size);
}
inline void calculate( void ) {
Interface::ata_product(A,X,_size);
}
void check_result( void ){
if (_size>128) return;
// calculation check
Interface::matrix_to_stl(X,resu_stl);
STL_interface<typename Interface::real_type>::ata_product(A_stl,X_stl,_size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
if (error>1.e-6){
INFOS("WRONG CALCULATION...residual=" << error);
exit(1);
}
}
private :
typename Interface::stl_matrix A_stl;
typename Interface::stl_matrix X_stl;
typename Interface::stl_matrix resu_stl;
typename Interface::gene_matrix A_ref;
typename Interface::gene_matrix X_ref;
typename Interface::gene_matrix A;
typename Interface::gene_matrix X;
int _size;
};
#endif

View File

@@ -0,0 +1,134 @@
//=====================================================
// File : action_atv_product.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ACTION_ATV_PRODUCT
#define ACTION_ATV_PRODUCT
#include "utilities.h"
#include "STL_interface.hh"
#include <string>
#include "init/init_function.hh"
#include "init/init_vector.hh"
#include "init/init_matrix.hh"
using namespace std;
template<class Interface>
class Action_atv_product {
public :
Action_atv_product( int size ) : _size(size)
{
MESSAGE("Action_atv_product Ctor");
// STL matrix and vector initialization
init_matrix<pseudo_random>(A_stl,_size);
init_vector<pseudo_random>(B_stl,_size);
init_vector<null_function>(X_stl,_size);
init_vector<null_function>(resu_stl,_size);
// generic matrix and vector initialization
Interface::matrix_from_stl(A_ref,A_stl);
Interface::vector_from_stl(B_ref,B_stl);
Interface::vector_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(A,A_stl);
Interface::vector_from_stl(B,B_stl);
Interface::vector_from_stl(X,X_stl);
}
// invalidate copy ctor
Action_atv_product( const Action_atv_product & )
{
INFOS("illegal call to Action_atv_product Copy Ctor");
exit(1);
}
~Action_atv_product( void )
{
MESSAGE("Action_atv_product Dtor");
Interface::free_matrix(A,_size);
Interface::free_vector(B);
Interface::free_vector(X);
Interface::free_matrix(A_ref,_size);
Interface::free_vector(B_ref);
Interface::free_vector(X_ref);
}
static inline std::string name() { return "atv_" + Interface::name(); }
double nb_op_base( void ) { return 2.0*_size*_size; }
inline void initialize( void ){
Interface::copy_matrix(A_ref,A,_size);
Interface::copy_vector(B_ref,B,_size);
Interface::copy_vector(X_ref,X,_size);
}
BTL_DONT_INLINE void calculate( void ) {
BTL_ASM_COMMENT("begin atv");
Interface::atv_product(A,B,X,_size);
BTL_ASM_COMMENT("end atv");
}
void check_result( void )
{
if (_size>128) return;
Interface::vector_to_stl(X,resu_stl);
STL_interface<typename Interface::real_type>::atv_product(A_stl,B_stl,X_stl,_size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
if (error>1.e-6){
INFOS("WRONG CALCULATION...residual=" << error);
exit(1);
}
}
private :
typename Interface::stl_matrix A_stl;
typename Interface::stl_vector B_stl;
typename Interface::stl_vector X_stl;
typename Interface::stl_vector resu_stl;
typename Interface::gene_matrix A_ref;
typename Interface::gene_vector B_ref;
typename Interface::gene_vector X_ref;
typename Interface::gene_matrix A;
typename Interface::gene_vector B;
typename Interface::gene_vector X;
int _size;
};
#endif

View File

@@ -0,0 +1,127 @@
//=====================================================
// File : action_axpby.hh
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ACTION_AXPBY
#define ACTION_AXPBY
#include "utilities.h"
#include "STL_interface.hh"
#include <string>
#include "init/init_function.hh"
#include "init/init_vector.hh"
#include "init/init_matrix.hh"
using namespace std;
template<class Interface>
class Action_axpby {
public :
// Ctor
Action_axpby( int size ):_alpha(0.5),_beta(0.95),_size(size)
{
MESSAGE("Action_axpby Ctor");
// STL vector initialization
init_vector<pseudo_random>(X_stl,_size);
init_vector<pseudo_random>(Y_stl,_size);
init_vector<null_function>(resu_stl,_size);
// generic matrix and vector initialization
Interface::vector_from_stl(X_ref,X_stl);
Interface::vector_from_stl(Y_ref,Y_stl);
Interface::vector_from_stl(X,X_stl);
Interface::vector_from_stl(Y,Y_stl);
}
// invalidate copy ctor
Action_axpby( const Action_axpby & )
{
INFOS("illegal call to Action_axpby Copy Ctor");
exit(1);
}
// Dtor
~Action_axpby( void ){
MESSAGE("Action_axpby Dtor");
// deallocation
Interface::free_vector(X_ref);
Interface::free_vector(Y_ref);
Interface::free_vector(X);
Interface::free_vector(Y);
}
// action name
static inline std::string name( void )
{
return "axpby_"+Interface::name();
}
double nb_op_base( void ){
return 3.0*_size;
}
inline void initialize( void ){
Interface::copy_vector(X_ref,X,_size);
Interface::copy_vector(Y_ref,Y,_size);
}
inline void calculate( void ) {
BTL_ASM_COMMENT("mybegin axpby");
Interface::axpby(_alpha,X,_beta,Y,_size);
BTL_ASM_COMMENT("myend axpby");
}
void check_result( void ){
if (_size>128) return;
// calculation check
Interface::vector_to_stl(Y,resu_stl);
STL_interface<typename Interface::real_type>::axpby(_alpha,X_stl,_beta,Y_stl,_size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(Y_stl,resu_stl);
if (error>1.e-6){
INFOS("WRONG CALCULATION...residual=" << error);
exit(2);
}
}
private :
typename Interface::stl_vector X_stl;
typename Interface::stl_vector Y_stl;
typename Interface::stl_vector resu_stl;
typename Interface::gene_vector X_ref;
typename Interface::gene_vector Y_ref;
typename Interface::gene_vector X;
typename Interface::gene_vector Y;
typename Interface::real_type _alpha;
typename Interface::real_type _beta;
int _size;
};
#endif

View File

@@ -0,0 +1,139 @@
//=====================================================
// File : action_axpy.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ACTION_AXPY
#define ACTION_AXPY
#include "utilities.h"
#include "STL_interface.hh"
#include <string>
#include "init/init_function.hh"
#include "init/init_vector.hh"
#include "init/init_matrix.hh"
using namespace std;
template<class Interface>
class Action_axpy {
public :
// Ctor
Action_axpy( int size ):_coef(1.0),_size(size)
{
MESSAGE("Action_axpy Ctor");
// STL vector initialization
init_vector<pseudo_random>(X_stl,_size);
init_vector<pseudo_random>(Y_stl,_size);
init_vector<null_function>(resu_stl,_size);
// generic matrix and vector initialization
Interface::vector_from_stl(X_ref,X_stl);
Interface::vector_from_stl(Y_ref,Y_stl);
Interface::vector_from_stl(X,X_stl);
Interface::vector_from_stl(Y,Y_stl);
}
// invalidate copy ctor
Action_axpy( const Action_axpy & )
{
INFOS("illegal call to Action_axpy Copy Ctor");
exit(1);
}
// Dtor
~Action_axpy( void ){
MESSAGE("Action_axpy Dtor");
// deallocation
Interface::free_vector(X_ref);
Interface::free_vector(Y_ref);
Interface::free_vector(X);
Interface::free_vector(Y);
}
// action name
static inline std::string name( void )
{
return "axpy_"+Interface::name();
}
double nb_op_base( void ){
return 2.0*_size;
}
inline void initialize( void ){
Interface::copy_vector(X_ref,X,_size);
Interface::copy_vector(Y_ref,Y,_size);
}
inline void calculate( void ) {
BTL_ASM_COMMENT("mybegin axpy");
Interface::axpy(_coef,X,Y,_size);
BTL_ASM_COMMENT("myend axpy");
}
void check_result( void ){
if (_size>128) return;
// calculation check
Interface::vector_to_stl(Y,resu_stl);
STL_interface<typename Interface::real_type>::axpy(_coef,X_stl,Y_stl,_size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(Y_stl,resu_stl);
if (error>1.e-6){
INFOS("WRONG CALCULATION...residual=" << error);
exit(0);
}
}
private :
typename Interface::stl_vector X_stl;
typename Interface::stl_vector Y_stl;
typename Interface::stl_vector resu_stl;
typename Interface::gene_vector X_ref;
typename Interface::gene_vector Y_ref;
typename Interface::gene_vector X;
typename Interface::gene_vector Y;
typename Interface::real_type _coef;
int _size;
};
#endif

View File

@@ -0,0 +1,128 @@
//=====================================================
// File : action_cholesky.hh
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ACTION_CHOLESKY
#define ACTION_CHOLESKY
#include "utilities.h"
#include "STL_interface.hh"
#include <string>
#include "init/init_function.hh"
#include "init/init_vector.hh"
#include "init/init_matrix.hh"
using namespace std;
template<class Interface>
class Action_cholesky {
public :
// Ctor
Action_cholesky( int size ):_size(size)
{
MESSAGE("Action_cholesky Ctor");
// STL mat/vec initialization
init_matrix_symm<pseudo_random>(X_stl,_size);
init_matrix<null_function>(C_stl,_size);
// make sure X is invertible
for (int i=0; i<_size; ++i)
X_stl[i][i] = std::abs(X_stl[i][i]) * 1e2 + 100;
// generic matrix and vector initialization
Interface::matrix_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(X,X_stl);
Interface::matrix_from_stl(C,C_stl);
_cost = 0;
for (int j=0; j<_size; ++j)
{
double r = std::max(_size - j -1,0);
_cost += 2*(r*j+r+j);
}
}
// invalidate copy ctor
Action_cholesky( const Action_cholesky & )
{
INFOS("illegal call to Action_cholesky Copy Ctor");
exit(1);
}
// Dtor
~Action_cholesky( void ){
MESSAGE("Action_cholesky Dtor");
// deallocation
Interface::free_matrix(X_ref,_size);
Interface::free_matrix(X,_size);
Interface::free_matrix(C,_size);
}
// action name
static inline std::string name( void )
{
return "cholesky_"+Interface::name();
}
double nb_op_base( void ){
return _cost;
}
inline void initialize( void ){
Interface::copy_matrix(X_ref,X,_size);
}
inline void calculate( void ) {
Interface::cholesky(X,C,_size);
}
void check_result( void ){
// calculation check
// STL_interface<typename Interface::real_type>::cholesky(X_stl,C_stl,_size);
//
// typename Interface::real_type error=
// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
//
// if (error>1.e-6){
// INFOS("WRONG CALCULATION...residual=" << error);
// exit(0);
// }
}
private :
typename Interface::stl_matrix X_stl;
typename Interface::stl_matrix C_stl;
typename Interface::gene_matrix X_ref;
typename Interface::gene_matrix X;
typename Interface::gene_matrix C;
int _size;
double _cost;
};
#endif

View File

@@ -0,0 +1,128 @@
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ACTION_GER
#define ACTION_GER
#include "utilities.h"
#include "STL_interface.hh"
#include <string>
#include "init/init_function.hh"
#include "init/init_vector.hh"
#include "init/init_matrix.hh"
using namespace std;
template<class Interface>
class Action_ger {
public :
// Ctor
BTL_DONT_INLINE Action_ger( int size ):_size(size)
{
MESSAGE("Action_ger Ctor");
// STL matrix and vector initialization
typename Interface::stl_matrix tmp;
init_matrix<pseudo_random>(A_stl,_size);
init_vector<pseudo_random>(B_stl,_size);
init_vector<pseudo_random>(X_stl,_size);
init_vector<null_function>(resu_stl,_size);
// generic matrix and vector initialization
Interface::matrix_from_stl(A_ref,A_stl);
Interface::matrix_from_stl(A,A_stl);
Interface::vector_from_stl(B_ref,B_stl);
Interface::vector_from_stl(B,B_stl);
Interface::vector_from_stl(X_ref,X_stl);
Interface::vector_from_stl(X,X_stl);
}
// invalidate copy ctor
Action_ger( const Action_ger & )
{
INFOS("illegal call to Action_ger Copy Ctor");
exit(1);
}
// Dtor
BTL_DONT_INLINE ~Action_ger( void ){
MESSAGE("Action_ger Dtor");
Interface::free_matrix(A,_size);
Interface::free_vector(B);
Interface::free_vector(X);
Interface::free_matrix(A_ref,_size);
Interface::free_vector(B_ref);
Interface::free_vector(X_ref);
}
// action name
static inline std::string name( void )
{
return "ger_" + Interface::name();
}
double nb_op_base( void ){
return 2.0*_size*_size;
}
BTL_DONT_INLINE void initialize( void ){
Interface::copy_matrix(A_ref,A,_size);
Interface::copy_vector(B_ref,B,_size);
Interface::copy_vector(X_ref,X,_size);
}
BTL_DONT_INLINE void calculate( void ) {
BTL_ASM_COMMENT("#begin ger");
Interface::ger(A,B,X,_size);
BTL_ASM_COMMENT("end ger");
}
BTL_DONT_INLINE void check_result( void ){
// calculation check
Interface::vector_to_stl(X,resu_stl);
STL_interface<typename Interface::real_type>::ger(A_stl,B_stl,X_stl,_size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
if (error>1.e-3){
INFOS("WRONG CALCULATION...residual=" << error);
// exit(0);
}
}
private :
typename Interface::stl_matrix A_stl;
typename Interface::stl_vector B_stl;
typename Interface::stl_vector X_stl;
typename Interface::stl_vector resu_stl;
typename Interface::gene_matrix A_ref;
typename Interface::gene_vector B_ref;
typename Interface::gene_vector X_ref;
typename Interface::gene_matrix A;
typename Interface::gene_vector B;
typename Interface::gene_vector X;
int _size;
};
#endif

View File

@@ -0,0 +1,233 @@
//=====================================================
// File : action_hessenberg.hh
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ACTION_HESSENBERG
#define ACTION_HESSENBERG
#include "utilities.h"
#include "STL_interface.hh"
#include <string>
#include "init/init_function.hh"
#include "init/init_vector.hh"
#include "init/init_matrix.hh"
using namespace std;
template<class Interface>
class Action_hessenberg {
public :
// Ctor
Action_hessenberg( int size ):_size(size)
{
MESSAGE("Action_hessenberg Ctor");
// STL vector initialization
init_matrix<pseudo_random>(X_stl,_size);
init_matrix<null_function>(C_stl,_size);
init_matrix<null_function>(resu_stl,_size);
// generic matrix and vector initialization
Interface::matrix_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(X,X_stl);
Interface::matrix_from_stl(C,C_stl);
_cost = 0;
for (int j=0; j<_size-2; ++j)
{
double r = std::max(0,_size-j-1);
double b = std::max(0,_size-j-2);
_cost += 6 + 3*b + r*r*4 + r*_size*4;
}
}
// invalidate copy ctor
Action_hessenberg( const Action_hessenberg & )
{
INFOS("illegal call to Action_hessenberg Copy Ctor");
exit(1);
}
// Dtor
~Action_hessenberg( void ){
MESSAGE("Action_hessenberg Dtor");
// deallocation
Interface::free_matrix(X_ref,_size);
Interface::free_matrix(X,_size);
Interface::free_matrix(C,_size);
}
// action name
static inline std::string name( void )
{
return "hessenberg_"+Interface::name();
}
double nb_op_base( void ){
return _cost;
}
inline void initialize( void ){
Interface::copy_matrix(X_ref,X,_size);
}
inline void calculate( void ) {
Interface::hessenberg(X,C,_size);
}
void check_result( void ){
// calculation check
Interface::matrix_to_stl(C,resu_stl);
// STL_interface<typename Interface::real_type>::hessenberg(X_stl,C_stl,_size);
//
// typename Interface::real_type error=
// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
//
// if (error>1.e-6){
// INFOS("WRONG CALCULATION...residual=" << error);
// exit(0);
// }
}
private :
typename Interface::stl_matrix X_stl;
typename Interface::stl_matrix C_stl;
typename Interface::stl_matrix resu_stl;
typename Interface::gene_matrix X_ref;
typename Interface::gene_matrix X;
typename Interface::gene_matrix C;
int _size;
double _cost;
};
template<class Interface>
class Action_tridiagonalization {
public :
// Ctor
Action_tridiagonalization( int size ):_size(size)
{
MESSAGE("Action_tridiagonalization Ctor");
// STL vector initialization
init_matrix<pseudo_random>(X_stl,_size);
for(int i=0; i<_size; ++i)
{
for(int j=0; j<i; ++j)
X_stl[i][j] = X_stl[j][i];
}
init_matrix<null_function>(C_stl,_size);
init_matrix<null_function>(resu_stl,_size);
// generic matrix and vector initialization
Interface::matrix_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(X,X_stl);
Interface::matrix_from_stl(C,C_stl);
_cost = 0;
for (int j=0; j<_size-2; ++j)
{
double r = std::max(0,_size-j-1);
double b = std::max(0,_size-j-2);
_cost += 6. + 3.*b + r*r*8.;
}
}
// invalidate copy ctor
Action_tridiagonalization( const Action_tridiagonalization & )
{
INFOS("illegal call to Action_tridiagonalization Copy Ctor");
exit(1);
}
// Dtor
~Action_tridiagonalization( void ){
MESSAGE("Action_tridiagonalization Dtor");
// deallocation
Interface::free_matrix(X_ref,_size);
Interface::free_matrix(X,_size);
Interface::free_matrix(C,_size);
}
// action name
static inline std::string name( void ) { return "tridiagonalization_"+Interface::name(); }
double nb_op_base( void ){
return _cost;
}
inline void initialize( void ){
Interface::copy_matrix(X_ref,X,_size);
}
inline void calculate( void ) {
Interface::tridiagonalization(X,C,_size);
}
void check_result( void ){
// calculation check
Interface::matrix_to_stl(C,resu_stl);
// STL_interface<typename Interface::real_type>::tridiagonalization(X_stl,C_stl,_size);
//
// typename Interface::real_type error=
// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
//
// if (error>1.e-6){
// INFOS("WRONG CALCULATION...residual=" << error);
// exit(0);
// }
}
private :
typename Interface::stl_matrix X_stl;
typename Interface::stl_matrix C_stl;
typename Interface::stl_matrix resu_stl;
typename Interface::gene_matrix X_ref;
typename Interface::gene_matrix X;
typename Interface::gene_matrix C;
int _size;
double _cost;
};
#endif

View File

@@ -0,0 +1,124 @@
//=====================================================
// File : action_lu_decomp.hh
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ACTION_LU_DECOMP
#define ACTION_LU_DECOMP
#include "utilities.h"
#include "STL_interface.hh"
#include <string>
#include "init/init_function.hh"
#include "init/init_vector.hh"
#include "init/init_matrix.hh"
using namespace std;
template<class Interface>
class Action_lu_decomp {
public :
// Ctor
Action_lu_decomp( int size ):_size(size)
{
MESSAGE("Action_lu_decomp Ctor");
// STL vector initialization
init_matrix<pseudo_random>(X_stl,_size);
init_matrix<null_function>(C_stl,_size);
init_matrix<null_function>(resu_stl,_size);
// generic matrix and vector initialization
Interface::matrix_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(X,X_stl);
Interface::matrix_from_stl(C,C_stl);
_cost = 2.0*size*size*size/3.0 + size*size;
}
// invalidate copy ctor
Action_lu_decomp( const Action_lu_decomp & )
{
INFOS("illegal call to Action_lu_decomp Copy Ctor");
exit(1);
}
// Dtor
~Action_lu_decomp( void ){
MESSAGE("Action_lu_decomp Dtor");
// deallocation
Interface::free_matrix(X_ref,_size);
Interface::free_matrix(X,_size);
Interface::free_matrix(C,_size);
}
// action name
static inline std::string name( void )
{
return "complete_lu_decomp_"+Interface::name();
}
double nb_op_base( void ){
return _cost;
}
inline void initialize( void ){
Interface::copy_matrix(X_ref,X,_size);
}
inline void calculate( void ) {
Interface::lu_decomp(X,C,_size);
}
void check_result( void ){
// calculation check
Interface::matrix_to_stl(C,resu_stl);
// STL_interface<typename Interface::real_type>::lu_decomp(X_stl,C_stl,_size);
//
// typename Interface::real_type error=
// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
//
// if (error>1.e-6){
// INFOS("WRONG CALCULATION...residual=" << error);
// exit(0);
// }
}
private :
typename Interface::stl_matrix X_stl;
typename Interface::stl_matrix C_stl;
typename Interface::stl_matrix resu_stl;
typename Interface::gene_matrix X_ref;
typename Interface::gene_matrix X;
typename Interface::gene_matrix C;
int _size;
double _cost;
};
#endif

View File

@@ -0,0 +1,136 @@
//=====================================================
// File : action_lu_solve.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ACTION_LU_SOLVE
#define ACTION_LU_SOLVE
#include "utilities.h"
#include "STL_interface.hh"
#include <string>
#include "init/init_function.hh"
#include "init/init_vector.hh"
#include "init/init_matrix.hh"
using namespace std;
template<class Interface>
class Action_lu_solve
{
public :
static inline std::string name( void )
{
return "lu_solve_"+Interface::name();
}
static double nb_op_base(int size){
return 2.0*size*size*size/3.0; // questionable but not really important
}
static double calculate( int nb_calc, int size ) {
// STL matrix and vector initialization
typename Interface::stl_matrix A_stl;
typename Interface::stl_vector B_stl;
typename Interface::stl_vector X_stl;
init_matrix<pseudo_random>(A_stl,size);
init_vector<pseudo_random>(B_stl,size);
init_vector<null_function>(X_stl,size);
// generic matrix and vector initialization
typename Interface::gene_matrix A;
typename Interface::gene_vector B;
typename Interface::gene_vector X;
typename Interface::gene_matrix LU;
Interface::matrix_from_stl(A,A_stl);
Interface::vector_from_stl(B,B_stl);
Interface::vector_from_stl(X,X_stl);
Interface::matrix_from_stl(LU,A_stl);
// local variable :
typename Interface::Pivot_Vector pivot; // pivot vector
Interface::new_Pivot_Vector(pivot,size);
// timer utilities
Portable_Timer chronos;
// time measurement
chronos.start();
for (int ii=0;ii<nb_calc;ii++){
// LU factorization
Interface::copy_matrix(A,LU,size);
Interface::LU_factor(LU,pivot,size);
// LU solve
Interface::LU_solve(LU,pivot,B,X,size);
}
// Time stop
chronos.stop();
double time=chronos.user_time();
// check result :
typename Interface::stl_vector B_new_stl(size);
Interface::vector_to_stl(X,X_stl);
STL_interface<typename Interface::real_type>::matrix_vector_product(A_stl,X_stl,B_new_stl,size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(B_stl,B_new_stl);
if (error>1.e-5){
INFOS("WRONG CALCULATION...residual=" << error);
STL_interface<typename Interface::real_type>::display_vector(B_stl);
STL_interface<typename Interface::real_type>::display_vector(B_new_stl);
exit(0);
}
// deallocation and return time
Interface::free_matrix(A,size);
Interface::free_vector(B);
Interface::free_vector(X);
Interface::free_Pivot_Vector(pivot);
return time;
}
};
#endif

View File

@@ -0,0 +1,150 @@
//=====================================================
// File : action_matrix_matrix_product.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ACTION_MATRIX_MATRIX_PRODUCT
#define ACTION_MATRIX_MATRIX_PRODUCT
#include "utilities.h"
#include "STL_interface.hh"
#include <string>
#include "init/init_function.hh"
#include "init/init_vector.hh"
#include "init/init_matrix.hh"
using namespace std;
template<class Interface>
class Action_matrix_matrix_product {
public :
// Ctor
Action_matrix_matrix_product( int size ):_size(size)
{
MESSAGE("Action_matrix_matrix_product Ctor");
// STL matrix and vector initialization
init_matrix<pseudo_random>(A_stl,_size);
init_matrix<pseudo_random>(B_stl,_size);
init_matrix<null_function>(X_stl,_size);
init_matrix<null_function>(resu_stl,_size);
// generic matrix and vector initialization
Interface::matrix_from_stl(A_ref,A_stl);
Interface::matrix_from_stl(B_ref,B_stl);
Interface::matrix_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(A,A_stl);
Interface::matrix_from_stl(B,B_stl);
Interface::matrix_from_stl(X,X_stl);
}
// invalidate copy ctor
Action_matrix_matrix_product( const Action_matrix_matrix_product & )
{
INFOS("illegal call to Action_matrix_matrix_product Copy Ctor");
exit(0);
}
// Dtor
~Action_matrix_matrix_product( void ){
MESSAGE("Action_matrix_matrix_product Dtor");
// deallocation
Interface::free_matrix(A,_size);
Interface::free_matrix(B,_size);
Interface::free_matrix(X,_size);
Interface::free_matrix(A_ref,_size);
Interface::free_matrix(B_ref,_size);
Interface::free_matrix(X_ref,_size);
}
// action name
static inline std::string name( void )
{
return "matrix_matrix_"+Interface::name();
}
double nb_op_base( void ){
return 2.0*_size*_size*_size;
}
inline void initialize( void ){
Interface::copy_matrix(A_ref,A,_size);
Interface::copy_matrix(B_ref,B,_size);
Interface::copy_matrix(X_ref,X,_size);
}
inline void calculate( void ) {
Interface::matrix_matrix_product(A,B,X,_size);
}
void check_result( void ){
// calculation check
if (_size<200)
{
Interface::matrix_to_stl(X,resu_stl);
STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl,B_stl,X_stl,_size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
if (error>1.e-6){
INFOS("WRONG CALCULATION...residual=" << error);
exit(1);
}
}
}
private :
typename Interface::stl_matrix A_stl;
typename Interface::stl_matrix B_stl;
typename Interface::stl_matrix X_stl;
typename Interface::stl_matrix resu_stl;
typename Interface::gene_matrix A_ref;
typename Interface::gene_matrix B_ref;
typename Interface::gene_matrix X_ref;
typename Interface::gene_matrix A;
typename Interface::gene_matrix B;
typename Interface::gene_matrix X;
int _size;
};
#endif

View File

@@ -0,0 +1,152 @@
//=====================================================
// File : action_matrix_matrix_product_bis.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ACTION_MATRIX_MATRIX_PRODUCT_BIS
#define ACTION_MATRIX_MATRIX_PRODUCT_BIS
#include "utilities.h"
#include "STL_interface.hh"
#include "STL_timer.hh"
#include <string>
#include "init_function.hh"
#include "init_vector.hh"
#include "init_matrix.hh"
using namespace std;
template<class Interface>
class Action_matrix_matrix_product_bis {
public :
static inline std::string name( void )
{
return "matrix_matrix_"+Interface::name();
}
static double nb_op_base(int size){
return 2.0*size*size*size;
}
static double calculate( int nb_calc, int size ) {
// STL matrix and vector initialization
typename Interface::stl_matrix A_stl;
typename Interface::stl_matrix B_stl;
typename Interface::stl_matrix X_stl;
init_matrix<pseudo_random>(A_stl,size);
init_matrix<pseudo_random>(B_stl,size);
init_matrix<null_function>(X_stl,size);
// generic matrix and vector initialization
typename Interface::gene_matrix A_ref;
typename Interface::gene_matrix B_ref;
typename Interface::gene_matrix X_ref;
typename Interface::gene_matrix A;
typename Interface::gene_matrix B;
typename Interface::gene_matrix X;
Interface::matrix_from_stl(A_ref,A_stl);
Interface::matrix_from_stl(B_ref,B_stl);
Interface::matrix_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(A,A_stl);
Interface::matrix_from_stl(B,B_stl);
Interface::matrix_from_stl(X,X_stl);
// STL_timer utilities
STL_timer chronos;
// Baseline evaluation
chronos.start_baseline(nb_calc);
do {
Interface::copy_matrix(A_ref,A,size);
Interface::copy_matrix(B_ref,B,size);
Interface::copy_matrix(X_ref,X,size);
// Interface::matrix_matrix_product(A,B,X,size); This line must be commented !!!!
}
while(chronos.check());
chronos.report(true);
// Time measurement
chronos.start(nb_calc);
do {
Interface::copy_matrix(A_ref,A,size);
Interface::copy_matrix(B_ref,B,size);
Interface::copy_matrix(X_ref,X,size);
Interface::matrix_matrix_product(A,B,X,size); // here it is not commented !!!!
}
while(chronos.check());
chronos.report(true);
double time=chronos.calculated_time/2000.0;
// calculation check
typename Interface::stl_matrix resu_stl(size);
Interface::matrix_to_stl(X,resu_stl);
STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl,B_stl,X_stl,size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
if (error>1.e-6){
INFOS("WRONG CALCULATION...residual=" << error);
exit(1);
}
// deallocation and return time
Interface::free_matrix(A,size);
Interface::free_matrix(B,size);
Interface::free_matrix(X,size);
Interface::free_matrix(A_ref,size);
Interface::free_matrix(B_ref,size);
Interface::free_matrix(X_ref,size);
return time;
}
};
#endif

View File

@@ -0,0 +1,153 @@
//=====================================================
// File : action_matrix_vector_product.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ACTION_MATRIX_VECTOR_PRODUCT
#define ACTION_MATRIX_VECTOR_PRODUCT
#include "utilities.h"
#include "STL_interface.hh"
#include <string>
#include "init/init_function.hh"
#include "init/init_vector.hh"
#include "init/init_matrix.hh"
using namespace std;
template<class Interface>
class Action_matrix_vector_product {
public :
// Ctor
BTL_DONT_INLINE Action_matrix_vector_product( int size ):_size(size)
{
MESSAGE("Action_matrix_vector_product Ctor");
// STL matrix and vector initialization
init_matrix<pseudo_random>(A_stl,_size);
init_vector<pseudo_random>(B_stl,_size);
init_vector<null_function>(X_stl,_size);
init_vector<null_function>(resu_stl,_size);
// generic matrix and vector initialization
Interface::matrix_from_stl(A_ref,A_stl);
Interface::matrix_from_stl(A,A_stl);
Interface::vector_from_stl(B_ref,B_stl);
Interface::vector_from_stl(B,B_stl);
Interface::vector_from_stl(X_ref,X_stl);
Interface::vector_from_stl(X,X_stl);
}
// invalidate copy ctor
Action_matrix_vector_product( const Action_matrix_vector_product & )
{
INFOS("illegal call to Action_matrix_vector_product Copy Ctor");
exit(1);
}
// Dtor
BTL_DONT_INLINE ~Action_matrix_vector_product( void ){
MESSAGE("Action_matrix_vector_product Dtor");
// deallocation
Interface::free_matrix(A,_size);
Interface::free_vector(B);
Interface::free_vector(X);
Interface::free_matrix(A_ref,_size);
Interface::free_vector(B_ref);
Interface::free_vector(X_ref);
}
// action name
static inline std::string name( void )
{
return "matrix_vector_" + Interface::name();
}
double nb_op_base( void ){
return 2.0*_size*_size;
}
BTL_DONT_INLINE void initialize( void ){
Interface::copy_matrix(A_ref,A,_size);
Interface::copy_vector(B_ref,B,_size);
Interface::copy_vector(X_ref,X,_size);
}
BTL_DONT_INLINE void calculate( void ) {
BTL_ASM_COMMENT("#begin matrix_vector_product");
Interface::matrix_vector_product(A,B,X,_size);
BTL_ASM_COMMENT("end matrix_vector_product");
}
BTL_DONT_INLINE void check_result( void ){
// calculation check
Interface::vector_to_stl(X,resu_stl);
STL_interface<typename Interface::real_type>::matrix_vector_product(A_stl,B_stl,X_stl,_size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
if (error>1.e-5){
INFOS("WRONG CALCULATION...residual=" << error);
exit(0);
}
}
private :
typename Interface::stl_matrix A_stl;
typename Interface::stl_vector B_stl;
typename Interface::stl_vector X_stl;
typename Interface::stl_vector resu_stl;
typename Interface::gene_matrix A_ref;
typename Interface::gene_vector B_ref;
typename Interface::gene_vector X_ref;
typename Interface::gene_matrix A;
typename Interface::gene_vector B;
typename Interface::gene_vector X;
int _size;
};
#endif

View File

@@ -0,0 +1,125 @@
//=====================================================
// File : action_lu_decomp.hh
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ACTION_PARTIAL_LU
#define ACTION_PARTIAL_LU
#include "utilities.h"
#include "STL_interface.hh"
#include <string>
#include "init/init_function.hh"
#include "init/init_vector.hh"
#include "init/init_matrix.hh"
using namespace std;
template<class Interface>
class Action_partial_lu {
public :
// Ctor
Action_partial_lu( int size ):_size(size)
{
MESSAGE("Action_partial_lu Ctor");
// STL vector initialization
init_matrix<pseudo_random>(X_stl,_size);
init_matrix<null_function>(C_stl,_size);
// make sure X is invertible
for (int i=0; i<_size; ++i)
X_stl[i][i] = X_stl[i][i] * 1e2 + 1;
// generic matrix and vector initialization
Interface::matrix_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(X,X_stl);
Interface::matrix_from_stl(C,C_stl);
_cost = 2.0*size*size*size/3.0 + size*size;
}
// invalidate copy ctor
Action_partial_lu( const Action_partial_lu & )
{
INFOS("illegal call to Action_partial_lu Copy Ctor");
exit(1);
}
// Dtor
~Action_partial_lu( void ){
MESSAGE("Action_partial_lu Dtor");
// deallocation
Interface::free_matrix(X_ref,_size);
Interface::free_matrix(X,_size);
Interface::free_matrix(C,_size);
}
// action name
static inline std::string name( void )
{
return "partial_lu_decomp_"+Interface::name();
}
double nb_op_base( void ){
return _cost;
}
inline void initialize( void ){
Interface::copy_matrix(X_ref,X,_size);
}
inline void calculate( void ) {
Interface::partial_lu_decomp(X,C,_size);
}
void check_result( void ){
// calculation check
// Interface::matrix_to_stl(C,resu_stl);
// STL_interface<typename Interface::real_type>::lu_decomp(X_stl,C_stl,_size);
//
// typename Interface::real_type error=
// STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
//
// if (error>1.e-6){
// INFOS("WRONG CALCULATION...residual=" << error);
// exit(0);
// }
}
private :
typename Interface::stl_matrix X_stl;
typename Interface::stl_matrix C_stl;
typename Interface::gene_matrix X_ref;
typename Interface::gene_matrix X;
typename Interface::gene_matrix C;
int _size;
double _cost;
};
#endif

View File

@@ -0,0 +1,116 @@
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ACTION_ROT
#define ACTION_ROT
#include "utilities.h"
#include "STL_interface.hh"
#include <string>
#include "init/init_function.hh"
#include "init/init_vector.hh"
#include "init/init_matrix.hh"
using namespace std;
template<class Interface>
class Action_rot {
public :
// Ctor
BTL_DONT_INLINE Action_rot( int size ):_size(size)
{
MESSAGE("Action_rot Ctor");
// STL matrix and vector initialization
typename Interface::stl_matrix tmp;
init_vector<pseudo_random>(A_stl,_size);
init_vector<pseudo_random>(B_stl,_size);
// generic matrix and vector initialization
Interface::vector_from_stl(A_ref,A_stl);
Interface::vector_from_stl(A,A_stl);
Interface::vector_from_stl(B_ref,B_stl);
Interface::vector_from_stl(B,B_stl);
}
// invalidate copy ctor
Action_rot( const Action_rot & )
{
INFOS("illegal call to Action_rot Copy Ctor");
exit(1);
}
// Dtor
BTL_DONT_INLINE ~Action_rot( void ){
MESSAGE("Action_rot Dtor");
Interface::free_vector(A);
Interface::free_vector(B);
Interface::free_vector(A_ref);
Interface::free_vector(B_ref);
}
// action name
static inline std::string name( void )
{
return "rot_" + Interface::name();
}
double nb_op_base( void ){
return 6.0*_size;
}
BTL_DONT_INLINE void initialize( void ){
Interface::copy_vector(A_ref,A,_size);
Interface::copy_vector(B_ref,B,_size);
}
BTL_DONT_INLINE void calculate( void ) {
BTL_ASM_COMMENT("#begin rot");
Interface::rot(A,B,0.5,0.6,_size);
BTL_ASM_COMMENT("end rot");
}
BTL_DONT_INLINE void check_result( void ){
// calculation check
// Interface::vector_to_stl(X,resu_stl);
// STL_interface<typename Interface::real_type>::rot(A_stl,B_stl,X_stl,_size);
// typename Interface::real_type error=
// STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
// if (error>1.e-3){
// INFOS("WRONG CALCULATION...residual=" << error);
// exit(0);
// }
}
private :
typename Interface::stl_vector A_stl;
typename Interface::stl_vector B_stl;
typename Interface::gene_vector A_ref;
typename Interface::gene_vector B_ref;
typename Interface::gene_vector A;
typename Interface::gene_vector B;
int _size;
};
#endif

View File

@@ -0,0 +1,139 @@
//=====================================================
// File : action_symv.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ACTION_SYMV
#define ACTION_SYMV
#include "utilities.h"
#include "STL_interface.hh"
#include <string>
#include "init/init_function.hh"
#include "init/init_vector.hh"
#include "init/init_matrix.hh"
using namespace std;
template<class Interface>
class Action_symv {
public :
// Ctor
BTL_DONT_INLINE Action_symv( int size ):_size(size)
{
MESSAGE("Action_symv Ctor");
// STL matrix and vector initialization
init_matrix_symm<pseudo_random>(A_stl,_size);
init_vector<pseudo_random>(B_stl,_size);
init_vector<null_function>(X_stl,_size);
init_vector<null_function>(resu_stl,_size);
// generic matrix and vector initialization
Interface::matrix_from_stl(A_ref,A_stl);
Interface::matrix_from_stl(A,A_stl);
Interface::vector_from_stl(B_ref,B_stl);
Interface::vector_from_stl(B,B_stl);
Interface::vector_from_stl(X_ref,X_stl);
Interface::vector_from_stl(X,X_stl);
}
// invalidate copy ctor
Action_symv( const Action_symv & )
{
INFOS("illegal call to Action_symv Copy Ctor");
exit(1);
}
// Dtor
BTL_DONT_INLINE ~Action_symv( void ){
Interface::free_matrix(A,_size);
Interface::free_vector(B);
Interface::free_vector(X);
Interface::free_matrix(A_ref,_size);
Interface::free_vector(B_ref);
Interface::free_vector(X_ref);
}
// action name
static inline std::string name( void )
{
return "symv_" + Interface::name();
}
double nb_op_base( void ){
return 2.0*_size*_size;
}
BTL_DONT_INLINE void initialize( void ){
Interface::copy_matrix(A_ref,A,_size);
Interface::copy_vector(B_ref,B,_size);
Interface::copy_vector(X_ref,X,_size);
}
BTL_DONT_INLINE void calculate( void ) {
BTL_ASM_COMMENT("#begin symv");
Interface::symv(A,B,X,_size);
BTL_ASM_COMMENT("end symv");
}
BTL_DONT_INLINE void check_result( void ){
if (_size>128) return;
// calculation check
Interface::vector_to_stl(X,resu_stl);
STL_interface<typename Interface::real_type>::symv(A_stl,B_stl,X_stl,_size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
if (error>1.e-5){
INFOS("WRONG CALCULATION...residual=" << error);
exit(0);
}
}
private :
typename Interface::stl_matrix A_stl;
typename Interface::stl_vector B_stl;
typename Interface::stl_vector X_stl;
typename Interface::stl_vector resu_stl;
typename Interface::gene_matrix A_ref;
typename Interface::gene_vector B_ref;
typename Interface::gene_vector X_ref;
typename Interface::gene_matrix A;
typename Interface::gene_vector B;
typename Interface::gene_vector X;
int _size;
};
#endif

View File

@@ -0,0 +1,133 @@
//=====================================================
// File : action_syr2.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ACTION_SYR2
#define ACTION_SYR2
#include "utilities.h"
#include "STL_interface.hh"
#include <string>
#include "init/init_function.hh"
#include "init/init_vector.hh"
#include "init/init_matrix.hh"
using namespace std;
template<class Interface>
class Action_syr2 {
public :
// Ctor
BTL_DONT_INLINE Action_syr2( int size ):_size(size)
{
// STL matrix and vector initialization
typename Interface::stl_matrix tmp;
init_matrix<pseudo_random>(A_stl,_size);
init_vector<pseudo_random>(B_stl,_size);
init_vector<pseudo_random>(X_stl,_size);
init_vector<null_function>(resu_stl,_size);
// generic matrix and vector initialization
Interface::matrix_from_stl(A_ref,A_stl);
Interface::matrix_from_stl(A,A_stl);
Interface::vector_from_stl(B_ref,B_stl);
Interface::vector_from_stl(B,B_stl);
Interface::vector_from_stl(X_ref,X_stl);
Interface::vector_from_stl(X,X_stl);
}
// invalidate copy ctor
Action_syr2( const Action_syr2 & )
{
INFOS("illegal call to Action_syr2 Copy Ctor");
exit(1);
}
// Dtor
BTL_DONT_INLINE ~Action_syr2( void ){
Interface::free_matrix(A,_size);
Interface::free_vector(B);
Interface::free_vector(X);
Interface::free_matrix(A_ref,_size);
Interface::free_vector(B_ref);
Interface::free_vector(X_ref);
}
// action name
static inline std::string name( void )
{
return "syr2_" + Interface::name();
}
double nb_op_base( void ){
return 2.0*_size*_size;
}
BTL_DONT_INLINE void initialize( void ){
Interface::copy_matrix(A_ref,A,_size);
Interface::copy_vector(B_ref,B,_size);
Interface::copy_vector(X_ref,X,_size);
}
BTL_DONT_INLINE void calculate( void ) {
BTL_ASM_COMMENT("#begin syr2");
Interface::syr2(A,B,X,_size);
BTL_ASM_COMMENT("end syr2");
}
BTL_DONT_INLINE void check_result( void ){
// calculation check
Interface::vector_to_stl(X,resu_stl);
STL_interface<typename Interface::real_type>::syr2(A_stl,B_stl,X_stl,_size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
if (error>1.e-3){
INFOS("WRONG CALCULATION...residual=" << error);
// exit(0);
}
}
private :
typename Interface::stl_matrix A_stl;
typename Interface::stl_vector B_stl;
typename Interface::stl_vector X_stl;
typename Interface::stl_vector resu_stl;
typename Interface::gene_matrix A_ref;
typename Interface::gene_vector B_ref;
typename Interface::gene_vector X_ref;
typename Interface::gene_matrix A;
typename Interface::gene_vector B;
typename Interface::gene_vector X;
int _size;
};
#endif

View File

@@ -0,0 +1,137 @@
//=====================================================
// File : action_trisolve.hh
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ACTION_TRISOLVE
#define ACTION_TRISOLVE
#include "utilities.h"
#include "STL_interface.hh"
#include <string>
#include "init/init_function.hh"
#include "init/init_vector.hh"
#include "init/init_matrix.hh"
using namespace std;
template<class Interface>
class Action_trisolve {
public :
// Ctor
Action_trisolve( int size ):_size(size)
{
MESSAGE("Action_trisolve Ctor");
// STL vector initialization
init_matrix<pseudo_random>(L_stl,_size);
init_vector<pseudo_random>(B_stl,_size);
init_vector<null_function>(X_stl,_size);
for (int j=0; j<_size; ++j)
{
for (int i=0; i<j; ++i)
L_stl[j][i] = 0;
L_stl[j][j] += 3;
}
init_vector<null_function>(resu_stl,_size);
// generic matrix and vector initialization
Interface::matrix_from_stl(L,L_stl);
Interface::vector_from_stl(X,X_stl);
Interface::vector_from_stl(B,B_stl);
_cost = 0;
for (int j=0; j<_size; ++j)
{
_cost += 2*j + 1;
}
}
// invalidate copy ctor
Action_trisolve( const Action_trisolve & )
{
INFOS("illegal call to Action_trisolve Copy Ctor");
exit(1);
}
// Dtor
~Action_trisolve( void ){
MESSAGE("Action_trisolve Dtor");
// deallocation
Interface::free_matrix(L,_size);
Interface::free_vector(B);
Interface::free_vector(X);
}
// action name
static inline std::string name( void )
{
return "trisolve_vector_"+Interface::name();
}
double nb_op_base( void ){
return _cost;
}
inline void initialize( void ){
//Interface::copy_vector(X_ref,X,_size);
}
inline void calculate( void ) {
Interface::trisolve_lower(L,B,X,_size);
}
void check_result(){
if (_size>128) return;
// calculation check
Interface::vector_to_stl(X,resu_stl);
STL_interface<typename Interface::real_type>::trisolve_lower(L_stl,B_stl,X_stl,_size);
typename Interface::real_type error=
STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
if (error>1.e-4){
INFOS("WRONG CALCULATION...residual=" << error);
exit(2);
} //else INFOS("CALCULATION OK...residual=" << error);
}
private :
typename Interface::stl_matrix L_stl;
typename Interface::stl_vector X_stl;
typename Interface::stl_vector B_stl;
typename Interface::stl_vector resu_stl;
typename Interface::gene_matrix L;
typename Interface::gene_vector X;
typename Interface::gene_vector B;
int _size;
double _cost;
};
#endif

View File

@@ -0,0 +1,165 @@
//=====================================================
// File : action_matrix_matrix_product.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ACTION_TRISOLVE_MATRIX_PRODUCT
#define ACTION_TRISOLVE_MATRIX_PRODUCT
#include "utilities.h"
#include "STL_interface.hh"
#include <string>
#include "init/init_function.hh"
#include "init/init_vector.hh"
#include "init/init_matrix.hh"
using namespace std;
template<class Interface>
class Action_trisolve_matrix {
public :
// Ctor
Action_trisolve_matrix( int size ):_size(size)
{
MESSAGE("Action_trisolve_matrix Ctor");
// STL matrix and vector initialization
init_matrix<pseudo_random>(A_stl,_size);
init_matrix<pseudo_random>(B_stl,_size);
init_matrix<null_function>(X_stl,_size);
init_matrix<null_function>(resu_stl,_size);
for (int j=0; j<_size; ++j)
{
for (int i=0; i<j; ++i)
A_stl[j][i] = 0;
A_stl[j][j] += 3;
}
// generic matrix and vector initialization
Interface::matrix_from_stl(A_ref,A_stl);
Interface::matrix_from_stl(B_ref,B_stl);
Interface::matrix_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(A,A_stl);
Interface::matrix_from_stl(B,B_stl);
Interface::matrix_from_stl(X,X_stl);
_cost = 0;
for (int j=0; j<_size; ++j)
{
_cost += 2*j + 1;
}
_cost *= _size;
}
// invalidate copy ctor
Action_trisolve_matrix( const Action_trisolve_matrix & )
{
INFOS("illegal call to Action_trisolve_matrix Copy Ctor");
exit(0);
}
// Dtor
~Action_trisolve_matrix( void ){
MESSAGE("Action_trisolve_matrix Dtor");
// deallocation
Interface::free_matrix(A,_size);
Interface::free_matrix(B,_size);
Interface::free_matrix(X,_size);
Interface::free_matrix(A_ref,_size);
Interface::free_matrix(B_ref,_size);
Interface::free_matrix(X_ref,_size);
}
// action name
static inline std::string name( void )
{
return "trisolve_matrix_"+Interface::name();
}
double nb_op_base( void ){
return _cost;
}
inline void initialize( void ){
Interface::copy_matrix(A_ref,A,_size);
Interface::copy_matrix(B_ref,B,_size);
Interface::copy_matrix(X_ref,X,_size);
}
inline void calculate( void ) {
Interface::trisolve_lower_matrix(A,B,X,_size);
}
void check_result( void ){
// calculation check
// Interface::matrix_to_stl(X,resu_stl);
//
// STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl,B_stl,X_stl,_size);
//
// typename Interface::real_type error=
// STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
//
// if (error>1.e-6){
// INFOS("WRONG CALCULATION...residual=" << error);
// // exit(1);
// }
}
private :
typename Interface::stl_matrix A_stl;
typename Interface::stl_matrix B_stl;
typename Interface::stl_matrix X_stl;
typename Interface::stl_matrix resu_stl;
typename Interface::gene_matrix A_ref;
typename Interface::gene_matrix B_ref;
typename Interface::gene_matrix X_ref;
typename Interface::gene_matrix A;
typename Interface::gene_matrix B;
typename Interface::gene_matrix X;
int _size;
double _cost;
};
#endif

View File

@@ -0,0 +1,165 @@
//=====================================================
// File : action_matrix_matrix_product.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ACTION_TRMM
#define ACTION_TRMM
#include "utilities.h"
#include "STL_interface.hh"
#include <string>
#include "init/init_function.hh"
#include "init/init_vector.hh"
#include "init/init_matrix.hh"
using namespace std;
template<class Interface>
class Action_trmm {
public :
// Ctor
Action_trmm( int size ):_size(size)
{
MESSAGE("Action_trmm Ctor");
// STL matrix and vector initialization
init_matrix<pseudo_random>(A_stl,_size);
init_matrix<pseudo_random>(B_stl,_size);
init_matrix<null_function>(X_stl,_size);
init_matrix<null_function>(resu_stl,_size);
for (int j=0; j<_size; ++j)
{
for (int i=0; i<j; ++i)
A_stl[j][i] = 0;
A_stl[j][j] += 3;
}
// generic matrix and vector initialization
Interface::matrix_from_stl(A_ref,A_stl);
Interface::matrix_from_stl(B_ref,B_stl);
Interface::matrix_from_stl(X_ref,X_stl);
Interface::matrix_from_stl(A,A_stl);
Interface::matrix_from_stl(B,B_stl);
Interface::matrix_from_stl(X,X_stl);
_cost = 0;
for (int j=0; j<_size; ++j)
{
_cost += 2*j + 1;
}
_cost *= _size;
}
// invalidate copy ctor
Action_trmm( const Action_trmm & )
{
INFOS("illegal call to Action_trmm Copy Ctor");
exit(0);
}
// Dtor
~Action_trmm( void ){
MESSAGE("Action_trmm Dtor");
// deallocation
Interface::free_matrix(A,_size);
Interface::free_matrix(B,_size);
Interface::free_matrix(X,_size);
Interface::free_matrix(A_ref,_size);
Interface::free_matrix(B_ref,_size);
Interface::free_matrix(X_ref,_size);
}
// action name
static inline std::string name( void )
{
return "trmm_"+Interface::name();
}
double nb_op_base( void ){
return _cost;
}
inline void initialize( void ){
Interface::copy_matrix(A_ref,A,_size);
Interface::copy_matrix(B_ref,B,_size);
Interface::copy_matrix(X_ref,X,_size);
}
inline void calculate( void ) {
Interface::trmm(A,B,X,_size);
}
void check_result( void ){
// calculation check
// Interface::matrix_to_stl(X,resu_stl);
//
// STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl,B_stl,X_stl,_size);
//
// typename Interface::real_type error=
// STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
//
// if (error>1.e-6){
// INFOS("WRONG CALCULATION...residual=" << error);
// // exit(1);
// }
}
private :
typename Interface::stl_matrix A_stl;
typename Interface::stl_matrix B_stl;
typename Interface::stl_matrix X_stl;
typename Interface::stl_matrix resu_stl;
typename Interface::gene_matrix A_ref;
typename Interface::gene_matrix B_ref;
typename Interface::gene_matrix X_ref;
typename Interface::gene_matrix A;
typename Interface::gene_matrix B;
typename Interface::gene_matrix X;
int _size;
double _cost;
};
#endif

View File

@@ -0,0 +1,21 @@
#include "action_axpy.hh"
#include "action_axpby.hh"
#include "action_matrix_vector_product.hh"
#include "action_atv_product.hh"
#include "action_matrix_matrix_product.hh"
// #include "action_ata_product.hh"
#include "action_aat_product.hh"
#include "action_trisolve.hh"
#include "action_trmm.hh"
#include "action_symv.hh"
// #include "action_symm.hh"
#include "action_syr2.hh"
#include "action_ger.hh"
#include "action_rot.hh"
// #include "action_lu_solve.hh"

View File

@@ -0,0 +1,32 @@
ADD_CUSTOM_TARGET(copy_scripts)
SET(script_files go_mean mk_mean_script.sh mk_new_gnuplot.sh
perlib_plot_settings.txt action_settings.txt gnuplot_common_settings.hh )
FOREACH(script_file ${script_files})
ADD_CUSTOM_COMMAND(
TARGET copy_scripts
POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/${script_file} ${CMAKE_CURRENT_BINARY_DIR}/
ARGS
)
ENDFOREACH(script_file)
ADD_CUSTOM_COMMAND(
TARGET copy_scripts
POST_BUILD
COMMAND ${CMAKE_CXX_COMPILER} --version | head -n 1 > ${CMAKE_CURRENT_BINARY_DIR}/compiler_version.txt
ARGS
)
ADD_CUSTOM_COMMAND(
TARGET copy_scripts
POST_BUILD
COMMAND echo "${Eigen_SOURCE_DIR}" > ${CMAKE_CURRENT_BINARY_DIR}/eigen_root_dir.txt
ARGS
)
add_executable(smooth smooth.cxx)
add_executable(regularize regularize.cxx)
add_executable(main mean.cxx)
add_dependencies(main copy_scripts)

View File

@@ -0,0 +1,19 @@
aat ; "{/*1.5 A x A^T}" ; "matrix size" ; 4:5000
ata ; "{/*1.5 A^T x A}" ; "matrix size" ; 4:5000
atv ; "{/*1.5 matrix^T x vector}" ; "matrix size" ; 4:5000
axpby ; "{/*1.5 Y = alpha X + beta Y}" ; "vector size" ; 5:1000000
axpy ; "{/*1.5 Y += alpha X}" ; "vector size" ; 5:1000000
matrix_matrix ; "{/*1.5 matrix matrix product}" ; "matrix size" ; 4:5000
matrix_vector ; "{/*1.5 matrix vector product}" ; "matrix size" ; 4:5000
trmm ; "{/*1.5 triangular matrix matrix product}" ; "matrix size" ; 4:5000
trisolve_vector ; "{/*1.5 triangular solver - vector (X = inv(L) X)}" ; "size" ; 4:5000
trisolve_matrix ; "{/*1.5 triangular solver - matrix (M = inv(L) M)}" ; "size" ; 4:5000
cholesky ; "{/*1.5 Cholesky decomposition}" ; "matrix size" ; 4:5000
complete_lu_decomp ; "{/*1.5 Complete LU decomposition}" ; "matrix size" ; 4:5000
partial_lu_decomp ; "{/*1.5 Partial LU decomposition}" ; "matrix size" ; 4:5000
tridiagonalization ; "{/*1.5 Tridiagonalization}" ; "matrix size" ; 4:5000
hessenberg ; "{/*1.5 Hessenberg decomposition}" ; "matrix size" ; 4:5000
symv ; "{/*1.5 symmetric matrix vector product}" ; "matrix size" ; 4:5000
syr2 ; "{/*1.5 symmretric rank-2 update (A += u^T v + u v^T)}" ; "matrix size" ; 4:5000
ger ; "{/*1.5 general rank-1 update (A += u v^T)}" ; "matrix size" ; 4:5000
rot ; "{/*1.5 apply rotation in the plane}" ; "vector size" ; 4:1000000

View File

@@ -0,0 +1,87 @@
set noclip points
set clip one
set noclip two
set bar 1.000000
set border 31 lt -1 lw 1.000
set xdata
set ydata
set zdata
set x2data
set y2data
set boxwidth
set dummy x,y
set format x "%g"
set format y "%g"
set format x2 "%g"
set format y2 "%g"
set format z "%g"
set angles radians
set nogrid
set key title ""
set key left top Right noreverse box linetype -2 linewidth 1.000 samplen 4 spacing 1 width 0
set nolabel
set noarrow
# set nolinestyle # deprecated
set nologscale
set logscale x 10
set offsets 0, 0, 0, 0
set pointsize 1
set encoding default
set nopolar
set noparametric
set view 60, 30, 1, 1
set samples 100, 100
set isosamples 10, 10
set surface
set nocontour
set clabel '%8.3g'
set mapping cartesian
set nohidden3d
set cntrparam order 4
set cntrparam linear
set cntrparam levels auto 5
set cntrparam points 5
set size ratio 0 1,1
set origin 0,0
# set data style lines
# set function style lines
set xzeroaxis lt -2 lw 1.000
set x2zeroaxis lt -2 lw 1.000
set yzeroaxis lt -2 lw 1.000
set y2zeroaxis lt -2 lw 1.000
set tics in
set ticslevel 0.5
set tics scale 1, 0.5
set mxtics default
set mytics default
set mx2tics default
set my2tics default
set xtics border mirror norotate autofreq
set ytics border mirror norotate autofreq
set ztics border nomirror norotate autofreq
set nox2tics
set noy2tics
set timestamp "" bottom norotate offset 0,0
set rrange [ * : * ] noreverse nowriteback # (currently [-0:10] )
set trange [ * : * ] noreverse nowriteback # (currently [-5:5] )
set urange [ * : * ] noreverse nowriteback # (currently [-5:5] )
set vrange [ * : * ] noreverse nowriteback # (currently [-5:5] )
set xlabel "matrix size" offset 0,0
set x2label "" offset 0,0
set timefmt "%d/%m/%y\n%H:%M"
set xrange [ 10 : 1000 ] noreverse nowriteback
set x2range [ * : * ] noreverse nowriteback # (currently [-10:10] )
set ylabel "MFLOPS" offset 0,0
set y2label "" offset 0,0
set yrange [ * : * ] noreverse nowriteback # (currently [-10:10] )
set y2range [ * : * ] noreverse nowriteback # (currently [-10:10] )
set zlabel "" offset 0,0
set zrange [ * : * ] noreverse nowriteback # (currently [-10:10] )
set zero 1e-08
set lmargin -1
set bmargin -1
set rmargin -1
set tmargin -1
set locale "C"
set xrange [4:1024]

View File

@@ -0,0 +1,58 @@
#!/bin/bash
if [ $# < 1 ]; then
echo "Usage: $0 working_directory [tiny|large [prefix]]"
else
mkdir -p $1
##cp ../libs/*/*.dat $1
mode=large
if [ $# > 2 ]; then
mode=$2
fi
if [ $# > 3 ]; then
prefix=$3
fi
EIGENDIR=`cat eigen_root_dir.txt`
webpagefilename=$1/index.html
meanstatsfilename=$1/mean.html
echo '' > $meanstatsfilename
echo '' > $webpagefilename
echo '<p><strong>Configuration</strong>' >> $webpagefilename
echo '<ul>'\
'<li>' `cat /proc/cpuinfo | grep "model name" | head -n 1`\
' (' `uname -m` ')</li>'\
'<li> compiler: ' `cat compiler_version.txt` '</li>'\
'<li> eigen3: ' `hg identify -i $EIGENDIR` '</li>'\
'</ul>' \
'</p>' >> $webpagefilename
source mk_mean_script.sh axpy $1 11 2500 100000 250000 $mode $prefix
source mk_mean_script.sh axpby $1 11 2500 100000 250000 $mode $prefix
source mk_mean_script.sh matrix_vector $1 11 50 300 1000 $mode $prefix
source mk_mean_script.sh atv $1 11 50 300 1000 $mode $prefix
source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 $mode $prefix
source mk_mean_script.sh aat $1 11 100 300 1000 $mode $prefix
# source mk_mean_script.sh ata $1 11 100 300 1000 $mode $prefix
source mk_mean_script.sh trmm $1 11 100 300 1000 $mode $prefix
source mk_mean_script.sh trisolve_vector $1 11 100 300 1000 $mode $prefix
source mk_mean_script.sh trisolve_matrix $1 11 100 300 1000 $mode $prefix
source mk_mean_script.sh cholesky $1 11 100 300 1000 $mode $prefix
source mk_mean_script.sh partial_lu_decomp $1 11 100 300 1000 $mode $prefix
source mk_mean_script.sh tridiagonalization $1 11 100 300 1000 $mode $prefix
source mk_mean_script.sh hessenberg $1 11 100 300 1000 $mode $prefix
source mk_mean_script.sh symv $1 11 50 300 1000 $mode $prefix
source mk_mean_script.sh syr2 $1 11 50 300 1000 $mode $prefix
source mk_mean_script.sh ger $1 11 50 300 1000 $mode $prefix
source mk_mean_script.sh rot $1 11 2500 100000 250000 $mode $prefix
source mk_mean_script.sh complete_lu_decomp $1 11 100 300 1000 $mode $prefix
fi
## compile the web page ##
#echo `cat footer.html` >> $webpagefilename

View File

@@ -0,0 +1,182 @@
//=====================================================
// File : mean.cxx
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:15 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#include "utilities.h"
#include <vector>
#include <string>
#include <iostream>
#include <fstream>
#include "bench_parameter.hh"
#include "utils/xy_file.hh"
#include <set>
using namespace std;
double mean_calc(const vector<int> & tab_sizes, const vector<double> & tab_mflops, const int size_min, const int size_max);
class Lib_Mean{
public:
Lib_Mean( void ):_lib_name(),_mean_in_cache(),_mean_out_of_cache(){
MESSAGE("Lib_mean Default Ctor");
MESSAGE("!!! should not be used");
exit(0);
}
Lib_Mean(const string & name, const double & mic, const double & moc):_lib_name(name),_mean_in_cache(mic),_mean_out_of_cache(moc){
MESSAGE("Lib_mean Ctor");
}
Lib_Mean(const Lib_Mean & lm):_lib_name(lm._lib_name),_mean_in_cache(lm._mean_in_cache),_mean_out_of_cache(lm._mean_out_of_cache){
MESSAGE("Lib_mean Copy Ctor");
}
~Lib_Mean( void ){
MESSAGE("Lib_mean Dtor");
}
double _mean_in_cache;
double _mean_out_of_cache;
string _lib_name;
bool operator < ( const Lib_Mean &right) const
{
//return ( this->_mean_out_of_cache > right._mean_out_of_cache) ;
return ( this->_mean_in_cache > right._mean_in_cache) ;
}
};
int main( int argc , char *argv[] )
{
if (argc<6){
INFOS("!!! Error ... usage : main what mic Mic moc Moc filename1 finename2...");
exit(0);
}
INFOS(argc);
int min_in_cache=atoi(argv[2]);
int max_in_cache=atoi(argv[3]);
int min_out_of_cache=atoi(argv[4]);
int max_out_of_cache=atoi(argv[5]);
multiset<Lib_Mean> s_lib_mean ;
for (int i=6;i<argc;i++){
string filename=argv[i];
INFOS(filename);
double mic=0;
double moc=0;
{
vector<int> tab_sizes;
vector<double> tab_mflops;
read_xy_file(filename,tab_sizes,tab_mflops);
mic=mean_calc(tab_sizes,tab_mflops,min_in_cache,max_in_cache);
moc=mean_calc(tab_sizes,tab_mflops,min_out_of_cache,max_out_of_cache);
Lib_Mean cur_lib_mean(filename,mic,moc);
s_lib_mean.insert(cur_lib_mean);
}
}
cout << "<TABLE BORDER CELLPADDING=2>" << endl ;
cout << " <TR>" << endl ;
cout << " <TH ALIGN=CENTER> " << argv[1] << " </TH>" << endl ;
cout << " <TH ALIGN=CENTER> <a href=""#mean_marker""> in cache <BR> mean perf <BR> Mflops </a></TH>" << endl ;
cout << " <TH ALIGN=CENTER> in cache <BR> % best </TH>" << endl ;
cout << " <TH ALIGN=CENTER> <a href=""#mean_marker""> out of cache <BR> mean perf <BR> Mflops </a></TH>" << endl ;
cout << " <TH ALIGN=CENTER> out of cache <BR> % best </TH>" << endl ;
cout << " <TH ALIGN=CENTER> details </TH>" << endl ;
cout << " <TH ALIGN=CENTER> comments </TH>" << endl ;
cout << " </TR>" << endl ;
multiset<Lib_Mean>::iterator is = s_lib_mean.begin();
Lib_Mean best(*is);
for (is=s_lib_mean.begin(); is!=s_lib_mean.end() ; is++){
cout << " <TR>" << endl ;
cout << " <TD> " << is->_lib_name << " </TD>" << endl ;
cout << " <TD> " << is->_mean_in_cache << " </TD>" << endl ;
cout << " <TD> " << 100*(is->_mean_in_cache/best._mean_in_cache) << " </TD>" << endl ;
cout << " <TD> " << is->_mean_out_of_cache << " </TD>" << endl ;
cout << " <TD> " << 100*(is->_mean_out_of_cache/best._mean_out_of_cache) << " </TD>" << endl ;
cout << " <TD> " <<
"<a href=\"#"<<is->_lib_name<<"_"<<argv[1]<<"\">snippet</a>/"
"<a href=\"#"<<is->_lib_name<<"_flags\">flags</a> </TD>" << endl ;
cout << " <TD> " <<
"<a href=\"#"<<is->_lib_name<<"_comments\">click here</a> </TD>" << endl ;
cout << " </TR>" << endl ;
}
cout << "</TABLE>" << endl ;
ofstream output_file ("../order_lib",ios::out) ;
for (is=s_lib_mean.begin(); is!=s_lib_mean.end() ; is++){
output_file << is->_lib_name << endl ;
}
output_file.close();
}
double mean_calc(const vector<int> & tab_sizes, const vector<double> & tab_mflops, const int size_min, const int size_max){
int size=tab_sizes.size();
int nb_sample=0;
double mean=0.0;
for (int i=0;i<size;i++){
if ((tab_sizes[i]>=size_min)&&(tab_sizes[i]<=size_max)){
nb_sample++;
mean+=tab_mflops[i];
}
}
if (nb_sample==0){
INFOS("no data for mean calculation");
return 0.0;
}
return mean/nb_sample;
}

View File

@@ -0,0 +1,68 @@
#! /bin/bash
WHAT=$1
DIR=$2
echo $WHAT script generation
cat $WHAT.hh > $WHAT.gnuplot
DATA_FILE=`find $DIR -name "*.dat" | grep $WHAT`
echo plot \\ >> $WHAT.gnuplot
for FILE in $DATA_FILE
do
LAST=$FILE
done
echo LAST=$LAST
for FILE in $DATA_FILE
do
if [ $FILE != $LAST ]
then
BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
echo "'"$FILE"'" title "'"$TITLE"'" ",\\" >> $WHAT.gnuplot
fi
done
BASE=${LAST##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
echo "'"$LAST"'" title "'"$TITLE"'" >> $WHAT.gnuplot
#echo set term postscript color >> $WHAT.gnuplot
#echo set output "'"$WHAT.ps"'" >> $WHAT.gnuplot
echo set term pbm small color >> $WHAT.gnuplot
echo set output "'"$WHAT.ppm"'" >> $WHAT.gnuplot
echo plot \\ >> $WHAT.gnuplot
for FILE in $DATA_FILE
do
if [ $FILE != $LAST ]
then
BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
echo "'"$FILE"'" title "'"$TITLE"'" ",\\" >> $WHAT.gnuplot
fi
done
BASE=${LAST##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
echo "'"$LAST"'" title "'"$TITLE"'" >> $WHAT.gnuplot
echo set term jpeg large >> $WHAT.gnuplot
echo set output "'"$WHAT.jpg"'" >> $WHAT.gnuplot
echo plot \\ >> $WHAT.gnuplot
for FILE in $DATA_FILE
do
if [ $FILE != $LAST ]
then
BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
echo "'"$FILE"'" title "'"$TITLE"'" ",\\" >> $WHAT.gnuplot
fi
done
BASE=${LAST##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
echo "'"$LAST"'" title "'"$TITLE"'" >> $WHAT.gnuplot
gnuplot -persist < $WHAT.gnuplot
rm $WHAT.gnuplot

View File

@@ -0,0 +1,52 @@
#! /bin/bash
WHAT=$1
DIR=$2
MINIC=$3
MAXIC=$4
MINOC=$5
MAXOC=$6
prefix=$8
meanstatsfilename=$2/mean.html
WORK_DIR=tmp
mkdir $WORK_DIR
DATA_FILE=`find $DIR -name "*.dat" | grep _${WHAT}`
if [ -n "$DATA_FILE" ]; then
echo ""
echo "$1..."
for FILE in $DATA_FILE
do
##echo hello world
##echo "mk_mean_script1" ${FILE}
BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
##echo "mk_mean_script1" ${TITLE}
cp $FILE ${WORK_DIR}/${TITLE}
done
cd $WORK_DIR
../main $1 $3 $4 $5 $6 * >> ../$meanstatsfilename
../mk_new_gnuplot.sh $1 $2 $7
rm -f *.gnuplot
cd ..
echo '<br/>' >> $meanstatsfilename
webpagefilename=$2/index.html
# echo '<h3>'${WHAT}'</h3>' >> $webpagefilename
echo '<hr/><a href="'$prefix$1'.pdf"><img src="'$prefix$1'.png" alt="'${WHAT}'" /></a><br/>' >> $webpagefilename
fi
rm -R $WORK_DIR

View File

@@ -0,0 +1,54 @@
#!/bin/bash
WHAT=$1
DIR=$2
cat ../gnuplot_common_settings.hh > ${WHAT}.gnuplot
echo "set title " `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 2` >> $WHAT.gnuplot
echo "set xlabel " `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 3` " offset 0,0" >> $WHAT.gnuplot
echo "set xrange [" `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 4` "]" >> $WHAT.gnuplot
if [ $# > 3 ]; then
if [ "$3" == "tiny" ]; then
echo "set xrange [2:16]" >> $WHAT.gnuplot
echo "set nologscale" >> $WHAT.gnuplot
fi
fi
DATA_FILE=`cat ../order_lib`
echo set term postscript color rounded enhanced >> $WHAT.gnuplot
echo set output "'"../${DIR}/$WHAT.ps"'" >> $WHAT.gnuplot
# echo set term svg color rounded enhanced >> $WHAT.gnuplot
# echo "set terminal svg enhanced size 1000 1000 fname \"Times\" fsize 36" >> $WHAT.gnuplot
# echo set output "'"../${DIR}/$WHAT.svg"'" >> $WHAT.gnuplot
echo plot \\ >> $WHAT.gnuplot
for FILE in $DATA_FILE
do
LAST=$FILE
done
for FILE in $DATA_FILE
do
BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
echo "'"$FILE"'" `grep $TITLE ../perlib_plot_settings.txt | head -n 1 | cut -d ";" -f 2` "\\" >> $WHAT.gnuplot
if [ $FILE != $LAST ]
then
echo ", \\" >> $WHAT.gnuplot
fi
done
echo " " >> $WHAT.gnuplot
gnuplot -persist < $WHAT.gnuplot
rm $WHAT.gnuplot
ps2pdf ../${DIR}/$WHAT.ps ../${DIR}/$WHAT.pdf
convert -background white -density 120 -rotate 90 -resize 800 +dither -colors 256 -quality 0 ../${DIR}/$WHAT.ps -background white -flatten ../${DIR}/$WHAT.png
# pstoedit -rotate -90 -xscale 0.8 -yscale 0.8 -centered -yshift -50 -xshift -100 -f plot-svg aat.ps aat2.svg

View File

@@ -0,0 +1,16 @@
eigen3 ; with lines lw 4 lt 1 lc rgbcolor "black"
eigen2 ; with lines lw 3 lt 1 lc rgbcolor "#999999"
EigenBLAS ; with lines lw 3 lt 3 lc rgbcolor "#999999"
eigen3_novec ; with lines lw 2 lt 1 lc rgbcolor "#999999"
eigen3_nogccvec ; with lines lw 2 lt 2 lc rgbcolor "#991010"
INTEL_MKL ; with lines lw 3 lt 1 lc rgbcolor "#ff0000"
ATLAS ; with lines lw 3 lt 1 lc rgbcolor "#008000"
gmm ; with lines lw 3 lt 1 lc rgbcolor "#0000ff"
ublas ; with lines lw 3 lt 1 lc rgbcolor "#00b7ff"
mtl4 ; with lines lw 3 lt 1 lc rgbcolor "#d18847"
blitz ; with lines lw 3 lt 1 lc rgbcolor "#ff00ff"
F77 ; with lines lw 3 lt 3 lc rgbcolor "#e6e64c"
OPENBLAS ; with lines lw 3 lt 1 lc rgbcolor "#C05600"
C ; with lines lw 3 lt 3 lc rgbcolor "#e6bd96"
ACML ; with lines lw 2 lt 3 lc rgbcolor "#e6e64c"
blaze ; with lines lw 3 lt 1 lc rgbcolor "#ff00ff"

View File

@@ -0,0 +1,131 @@
//=====================================================
// File : regularize.cxx
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:15 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#include "utilities.h"
#include <vector>
#include <string>
#include <iostream>
#include <fstream>
#include "bench_parameter.hh"
#include <set>
using namespace std;
void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops);
void regularize_curve(const string & filename,
const vector<double> & tab_mflops,
const vector<int> & tab_sizes,
int start_cut_size, int stop_cut_size);
/////////////////////////////////////////////////////////////////////////////////////////////////
int main( int argc , char *argv[] )
{
// input data
if (argc<4){
INFOS("!!! Error ... usage : main filename start_cut_size stop_cut_size regularize_filename");
exit(0);
}
INFOS(argc);
int start_cut_size=atoi(argv[2]);
int stop_cut_size=atoi(argv[3]);
string filename=argv[1];
string regularize_filename=argv[4];
INFOS(filename);
INFOS("start_cut_size="<<start_cut_size);
vector<int> tab_sizes;
vector<double> tab_mflops;
read_xy_file(filename,tab_sizes,tab_mflops);
// regularizeing
regularize_curve(regularize_filename,tab_mflops,tab_sizes,start_cut_size,stop_cut_size);
}
//////////////////////////////////////////////////////////////////////////////////////
void regularize_curve(const string & filename,
const vector<double> & tab_mflops,
const vector<int> & tab_sizes,
int start_cut_size, int stop_cut_size)
{
int size=tab_mflops.size();
ofstream output_file (filename.c_str(),ios::out) ;
int i=0;
while(tab_sizes[i]<start_cut_size){
output_file << tab_sizes[i] << " " << tab_mflops[i] << endl ;
i++;
}
output_file << endl ;
while(tab_sizes[i]<stop_cut_size){
i++;
}
while(i<size){
output_file << tab_sizes[i] << " " << tab_mflops[i] << endl ;
i++;
}
output_file.close();
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops){
ifstream input_file (filename.c_str(),ios::in) ;
if (!input_file){
INFOS("!!! Error opening "<<filename);
exit(0);
}
int nb_point=0;
int size=0;
double mflops=0;
while (input_file >> size >> mflops ){
nb_point++;
tab_sizes.push_back(size);
tab_mflops.push_back(mflops);
}
SCRUTE(nb_point);
input_file.close();
}

View File

@@ -0,0 +1,198 @@
//=====================================================
// File : smooth.cxx
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:15 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#include "utilities.h"
#include <vector>
#include <deque>
#include <string>
#include <iostream>
#include <fstream>
#include "bench_parameter.hh"
#include <set>
using namespace std;
void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops);
void write_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops);
void smooth_curve(const vector<double> & tab_mflops, vector<double> & smooth_tab_mflops,int window_half_width);
void centered_smooth_curve(const vector<double> & tab_mflops, vector<double> & smooth_tab_mflops,int window_half_width);
/////////////////////////////////////////////////////////////////////////////////////////////////
int main( int argc , char *argv[] )
{
// input data
if (argc<3){
INFOS("!!! Error ... usage : main filename window_half_width smooth_filename");
exit(0);
}
INFOS(argc);
int window_half_width=atoi(argv[2]);
string filename=argv[1];
string smooth_filename=argv[3];
INFOS(filename);
INFOS("window_half_width="<<window_half_width);
vector<int> tab_sizes;
vector<double> tab_mflops;
read_xy_file(filename,tab_sizes,tab_mflops);
// smoothing
vector<double> smooth_tab_mflops;
//smooth_curve(tab_mflops,smooth_tab_mflops,window_half_width);
centered_smooth_curve(tab_mflops,smooth_tab_mflops,window_half_width);
// output result
write_xy_file(smooth_filename,tab_sizes,smooth_tab_mflops);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
template<class VECTOR>
double weighted_mean(const VECTOR & data)
{
double mean=0.0;
for (int i=0 ; i<data.size() ; i++){
mean+=data[i];
}
return mean/double(data.size()) ;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void smooth_curve(const vector<double> & tab_mflops, vector<double> & smooth_tab_mflops,int window_half_width){
int window_width=2*window_half_width+1;
int size=tab_mflops.size();
vector<double> sample(window_width);
for (int i=0 ; i < size ; i++){
for ( int j=0 ; j < window_width ; j++ ){
int shifted_index=i+j-window_half_width;
if (shifted_index<0) shifted_index=0;
if (shifted_index>size-1) shifted_index=size-1;
sample[j]=tab_mflops[shifted_index];
}
smooth_tab_mflops.push_back(weighted_mean(sample));
}
}
void centered_smooth_curve(const vector<double> & tab_mflops, vector<double> & smooth_tab_mflops,int window_half_width){
int max_window_width=2*window_half_width+1;
int size=tab_mflops.size();
for (int i=0 ; i < size ; i++){
deque<double> sample;
sample.push_back(tab_mflops[i]);
for ( int j=1 ; j <= window_half_width ; j++ ){
int before=i-j;
int after=i+j;
if ((before>=0)&&(after<size)) // inside of the vector
{
sample.push_front(tab_mflops[before]);
sample.push_back(tab_mflops[after]);
}
}
smooth_tab_mflops.push_back(weighted_mean(sample));
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void write_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops){
ofstream output_file (filename.c_str(),ios::out) ;
for (int i=0 ; i < tab_sizes.size() ; i++)
{
output_file << tab_sizes[i] << " " << tab_mflops[i] << endl ;
}
output_file.close();
}
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops){
ifstream input_file (filename.c_str(),ios::in) ;
if (!input_file){
INFOS("!!! Error opening "<<filename);
exit(0);
}
int nb_point=0;
int size=0;
double mflops=0;
while (input_file >> size >> mflops ){
nb_point++;
tab_sizes.push_back(size);
tab_mflops.push_back(mflops);
}
SCRUTE(nb_point);
input_file.close();
}

View File

@@ -0,0 +1,68 @@
#! /bin/bash
ORIG_DIR=$1
SMOOTH_DIR=${ORIG_DIR}_smooth
mkdir ${SMOOTH_DIR}
AXPY_FILE=`find ${ORIG_DIR} -name "*.dat" | grep axpy`
for FILE in ${AXPY_FILE}
do
echo $FILE
BASE=${FILE##*/}
./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE}_tmp
./regularize ${SMOOTH_DIR}/${BASE}_tmp 2500 15000 ${SMOOTH_DIR}/${BASE}
rm -f ${SMOOTH_DIR}/${BASE}_tmp
done
MATRIX_VECTOR_FILE=`find ${ORIG_DIR} -name "*.dat" | grep matrix_vector`
for FILE in ${MATRIX_VECTOR_FILE}
do
echo $FILE
BASE=${FILE##*/}
./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE}_tmp
./regularize ${SMOOTH_DIR}/${BASE}_tmp 50 180 ${SMOOTH_DIR}/${BASE}
rm -f ${SMOOTH_DIR}/${BASE}_tmp
done
MATRIX_MATRIX_FILE=`find ${ORIG_DIR} -name "*.dat" | grep matrix_matrix`
for FILE in ${MATRIX_MATRIX_FILE}
do
echo $FILE
BASE=${FILE##*/}
./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE}
done
AAT_FILE=`find ${ORIG_DIR} -name "*.dat" | grep _aat`
for FILE in ${AAT_FILE}
do
echo $FILE
BASE=${FILE##*/}
./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE}
done
ATA_FILE=`find ${ORIG_DIR} -name "*.dat" | grep _ata`
for FILE in ${ATA_FILE}
do
echo $FILE
BASE=${FILE##*/}
./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE}
done
### no smoothing for tinyvector and matrices libs
TINY_BLITZ_FILE=`find ${ORIG_DIR} -name "*.dat" | grep tiny_blitz`
for FILE in ${TINY_BLITZ_FILE}
do
echo $FILE
BASE=${FILE##*/}
cp ${ORIG_DIR}/${BASE} ${SMOOTH_DIR}/${BASE}
done
TVMET_FILE=`find ${ORIG_DIR} -name "*.dat" | grep tvmet`
for FILE in ${TVMET_FILE}
do
echo $FILE
BASE=${FILE##*/}
cp ${ORIG_DIR}/${BASE} ${SMOOTH_DIR}/${BASE}
done

View File

@@ -0,0 +1,168 @@
//=====================================================
// File : bench.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:16 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef BENCH_HH
#define BENCH_HH
#include "btl.hh"
#include "bench_parameter.hh"
#include <iostream>
#include "utilities.h"
#include "size_lin_log.hh"
#include "xy_file.hh"
#include <vector>
#include <string>
#include "timers/portable_perf_analyzer.hh"
// #include "timers/mixed_perf_analyzer.hh"
// #include "timers/x86_perf_analyzer.hh"
// #include "timers/STL_perf_analyzer.hh"
#ifdef HAVE_MKL
extern "C" void cblas_saxpy(const int, const float, const float*, const int, float *, const int);
#endif
using namespace std;
template <template<class> class Perf_Analyzer, class Action>
BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
{
if (BtlConfig::skipAction(Action::name()))
return;
string filename="bench_"+Action::name()+".dat";
INFOS("starting " <<filename);
// utilities
std::vector<double> tab_mflops(nb_point);
std::vector<int> tab_sizes(nb_point);
// matrices and vector size calculations
size_lin_log(nb_point,size_min,size_max,tab_sizes);
std::vector<int> oldSizes;
std::vector<double> oldFlops;
bool hasOldResults = read_xy_file(filename, oldSizes, oldFlops, true);
int oldi = oldSizes.size() - 1;
// loop on matrix size
Perf_Analyzer<Action> perf_action;
for (int i=nb_point-1;i>=0;i--)
{
//INFOS("size=" <<tab_sizes[i]<<" ("<<nb_point-i<<"/"<<nb_point<<")");
std::cout << " " << "size = " << tab_sizes[i] << " " << std::flush;
BTL_DISABLE_SSE_EXCEPTIONS();
#ifdef HAVE_MKL
{
float dummy;
cblas_saxpy(1,0,&dummy,1,&dummy,1);
}
#endif
tab_mflops[i] = perf_action.eval_mflops(tab_sizes[i]);
std::cout << tab_mflops[i];
if (hasOldResults)
{
while (oldi>=0 && oldSizes[oldi]>tab_sizes[i])
--oldi;
if (oldi>=0 && oldSizes[oldi]==tab_sizes[i])
{
if (oldFlops[oldi]<tab_mflops[i])
std::cout << "\t > ";
else
std::cout << "\t < ";
std::cout << oldFlops[oldi];
}
--oldi;
}
std::cout << " MFlops (" << nb_point-i << "/" << nb_point << ")" << std::endl;
}
if (!BtlConfig::Instance.overwriteResults)
{
if (hasOldResults)
{
// merge the two data
std::vector<int> newSizes;
std::vector<double> newFlops;
unsigned int i=0;
unsigned int j=0;
while (i<tab_sizes.size() && j<oldSizes.size())
{
if (tab_sizes[i] == oldSizes[j])
{
newSizes.push_back(tab_sizes[i]);
newFlops.push_back(std::max(tab_mflops[i], oldFlops[j]));
++i;
++j;
}
else if (tab_sizes[i] < oldSizes[j])
{
newSizes.push_back(tab_sizes[i]);
newFlops.push_back(tab_mflops[i]);
++i;
}
else
{
newSizes.push_back(oldSizes[j]);
newFlops.push_back(oldFlops[j]);
++j;
}
}
while (i<tab_sizes.size())
{
newSizes.push_back(tab_sizes[i]);
newFlops.push_back(tab_mflops[i]);
++i;
}
while (j<oldSizes.size())
{
newSizes.push_back(oldSizes[j]);
newFlops.push_back(oldFlops[j]);
++j;
}
tab_mflops = newFlops;
tab_sizes = newSizes;
}
}
// dump the result in a file :
dump_xy_file(tab_sizes,tab_mflops,filename);
}
// default Perf Analyzer
template <class Action>
BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point ){
// if the rdtsc is not available :
bench<Portable_Perf_Analyzer,Action>(size_min,size_max,nb_point);
// if the rdtsc is available :
// bench<Mixed_Perf_Analyzer,Action>(size_min,size_max,nb_point);
// Only for small problem size. Otherwize it will be too long
// bench<X86_Perf_Analyzer,Action>(size_min,size_max,nb_point);
// bench<STL_Perf_Analyzer,Action>(size_min,size_max,nb_point);
}
#endif

View File

@@ -0,0 +1,53 @@
//=====================================================
// File : bench_parameter.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:16 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef BENCH_PARAMETER_HH
#define BENCH_PARAMETER_HH
// minimal time for each measurement
#define REAL_TYPE float
// minimal time for each measurement
#define MIN_TIME 0.2
// nb of point on bench curves
#define NB_POINT 100
// min vector size for axpy bench
#define MIN_AXPY 5
// max vector size for axpy bench
#define MAX_AXPY 3000000
// min matrix size for matrix vector product bench
#define MIN_MV 5
// max matrix size for matrix vector product bench
#define MAX_MV 5000
// min matrix size for matrix matrix product bench
#define MIN_MM 5
// max matrix size for matrix matrix product bench
#define MAX_MM MAX_MV
// min matrix size for LU bench
#define MIN_LU 5
// max matrix size for LU bench
#define MAX_LU 3000
// max size for tiny vector and matrix
#define TINY_MV_MAX_SIZE 16
// default nb_sample for x86 timer
#define DEFAULT_NB_SAMPLE 1000
// how many times we run a single bench (keep the best perf)
#define DEFAULT_NB_TRIES 3
#endif

View File

@@ -0,0 +1,242 @@
//=====================================================
// File : btl.hh
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef BTL_HH
#define BTL_HH
#include "bench_parameter.hh"
#include <iostream>
#include <algorithm>
#include <vector>
#include <string>
#include "utilities.h"
#if (defined __GNUC__)
#define BTL_ALWAYS_INLINE __attribute__((always_inline)) inline
#else
#define BTL_ALWAYS_INLINE inline
#endif
#if (defined __GNUC__)
#define BTL_DONT_INLINE __attribute__((noinline))
#else
#define BTL_DONT_INLINE
#endif
#if (defined __GNUC__)
#define BTL_ASM_COMMENT(X) asm("#" X)
#else
#define BTL_ASM_COMMENT(X)
#endif
#ifdef __SSE__
#include "xmmintrin.h"
// This enables flush to zero (FTZ) and denormals are zero (DAZ) modes:
#define BTL_DISABLE_SSE_EXCEPTIONS() { _mm_setcsr(_mm_getcsr() | 0x8040); }
#else
#define BTL_DISABLE_SSE_EXCEPTIONS()
#endif
/** Enhanced std::string
*/
class BtlString : public std::string
{
public:
BtlString() : std::string() {}
BtlString(const BtlString& str) : std::string(static_cast<const std::string&>(str)) {}
BtlString(const std::string& str) : std::string(str) {}
BtlString(const char* str) : std::string(str) {}
operator const char* () const { return c_str(); }
void trim( bool left = true, bool right = true )
{
int lspaces, rspaces, len = length(), i;
lspaces = rspaces = 0;
if ( left )
for (i=0; i<len && (at(i)==' '||at(i)=='\t'||at(i)=='\r'||at(i)=='\n'); ++lspaces,++i);
if ( right && lspaces < len )
for(i=len-1; i>=0 && (at(i)==' '||at(i)=='\t'||at(i)=='\r'||at(i)=='\n'); rspaces++,i--);
*this = substr(lspaces, len-lspaces-rspaces);
}
std::vector<BtlString> split( const BtlString& delims = "\t\n ") const
{
std::vector<BtlString> ret;
unsigned int numSplits = 0;
size_t start, pos;
start = 0;
do
{
pos = find_first_of(delims, start);
if (pos == start)
{
ret.push_back("");
start = pos + 1;
}
else if (pos == npos)
ret.push_back( substr(start) );
else
{
ret.push_back( substr(start, pos - start) );
start = pos + 1;
}
//start = find_first_not_of(delims, start);
++numSplits;
} while (pos != npos);
return ret;
}
bool endsWith(const BtlString& str) const
{
if(str.size()>this->size())
return false;
return this->substr(this->size()-str.size(),str.size()) == str;
}
bool contains(const BtlString& str) const
{
return this->find(str)<this->size();
}
bool beginsWith(const BtlString& str) const
{
if(str.size()>this->size())
return false;
return this->substr(0,str.size()) == str;
}
BtlString toLowerCase( void )
{
std::transform(begin(), end(), begin(), static_cast<int(*)(int)>(::tolower) );
return *this;
}
BtlString toUpperCase( void )
{
std::transform(begin(), end(), begin(), static_cast<int(*)(int)>(::toupper) );
return *this;
}
/** Case insensitive comparison.
*/
bool isEquiv(const BtlString& str) const
{
BtlString str0 = *this;
str0.toLowerCase();
BtlString str1 = str;
str1.toLowerCase();
return str0 == str1;
}
/** Decompose the current string as a path and a file.
For instance: "dir1/dir2/file.ext" leads to path="dir1/dir2/" and filename="file.ext"
*/
void decomposePathAndFile(BtlString& path, BtlString& filename) const
{
std::vector<BtlString> elements = this->split("/\\");
path = "";
filename = elements.back();
elements.pop_back();
if (this->at(0)=='/')
path = "/";
for (unsigned int i=0 ; i<elements.size() ; ++i)
path += elements[i] + "/";
}
};
class BtlConfig
{
public:
BtlConfig()
: overwriteResults(false), checkResults(true), realclock(false), tries(DEFAULT_NB_TRIES)
{
char * _config;
_config = getenv ("BTL_CONFIG");
if (_config!=NULL)
{
std::vector<BtlString> config = BtlString(_config).split(" \t\n");
for (unsigned int i = 0; i<config.size(); i++)
{
if (config[i].beginsWith("-a"))
{
if (i+1==config.size())
{
std::cerr << "error processing option: " << config[i] << "\n";
exit(2);
}
Instance.m_selectedActionNames = config[i+1].split(":");
i += 1;
}
else if (config[i].beginsWith("-t"))
{
if (i+1==config.size())
{
std::cerr << "error processing option: " << config[i] << "\n";
exit(2);
}
Instance.tries = atoi(config[i+1].c_str());
i += 1;
}
else if (config[i].beginsWith("--overwrite"))
{
Instance.overwriteResults = true;
}
else if (config[i].beginsWith("--nocheck"))
{
Instance.checkResults = false;
}
else if (config[i].beginsWith("--real"))
{
Instance.realclock = true;
}
}
}
BTL_DISABLE_SSE_EXCEPTIONS();
}
BTL_DONT_INLINE static bool skipAction(const std::string& _name)
{
if (Instance.m_selectedActionNames.empty())
return false;
BtlString name(_name);
for (unsigned int i=0; i<Instance.m_selectedActionNames.size(); ++i)
if (name.contains(Instance.m_selectedActionNames[i]))
return false;
return true;
}
static BtlConfig Instance;
bool overwriteResults;
bool checkResults;
bool realclock;
int tries;
protected:
std::vector<BtlString> m_selectedActionNames;
};
#define BTL_MAIN \
BtlConfig BtlConfig::Instance
#endif // BTL_HH

View File

@@ -0,0 +1,54 @@
//=====================================================
// File : init_function.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:18 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef INIT_FUNCTION_HH
#define INIT_FUNCTION_HH
double simple_function(int index)
{
return index;
}
double simple_function(int index_i, int index_j)
{
return index_i+index_j;
}
double pseudo_random(int /*index*/)
{
return std::rand()/double(RAND_MAX);
}
double pseudo_random(int /*index_i*/, int /*index_j*/)
{
return std::rand()/double(RAND_MAX);
}
double null_function(int /*index*/)
{
return 0.0;
}
double null_function(int /*index_i*/, int /*index_j*/)
{
return 0.0;
}
#endif

View File

@@ -0,0 +1,64 @@
//=====================================================
// File : init_matrix.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:19 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef INIT_MATRIX_HH
#define INIT_MATRIX_HH
// The Vector class must satisfy the following part of STL vector concept :
// resize() method
// [] operator for setting element
// value_type defined
template<double init_function(int,int), class Vector>
BTL_DONT_INLINE void init_row(Vector & X, int size, int row){
X.resize(size);
for (unsigned int j=0;j<X.size();j++){
X[j]=typename Vector::value_type(init_function(row,j));
}
}
// Matrix is a Vector of Vector
// The Matrix class must satisfy the following part of STL vector concept :
// resize() method
// [] operator for setting rows
template<double init_function(int,int),class Vector>
BTL_DONT_INLINE void init_matrix(Vector & A, int size){
A.resize(size);
for (unsigned int row=0; row<A.size() ; row++){
init_row<init_function>(A[row],size,row);
}
}
template<double init_function(int,int),class Matrix>
BTL_DONT_INLINE void init_matrix_symm(Matrix& A, int size){
A.resize(size);
for (unsigned int row=0; row<A.size() ; row++)
A[row].resize(size);
for (unsigned int row=0; row<A.size() ; row++){
A[row][row] = init_function(row,row);
for (unsigned int col=0; col<row ; col++){
double x = init_function(row,col);
A[row][col] = A[col][row] = x;
}
}
}
#endif

View File

@@ -0,0 +1,37 @@
//=====================================================
// File : init_vector.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:18 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef INIT_VECTOR_HH
#define INIT_VECTOR_HH
// The Vector class must satisfy the following part of STL vector concept :
// resize() method
// [] operator for setting element
// value_type defined
template<double init_function(int), class Vector>
void init_vector(Vector & X, int size){
X.resize(size);
for (unsigned int i=0;i<X.size();i++){
X[i]=typename Vector::value_type(init_function(i));
}
}
#endif

View File

@@ -0,0 +1,80 @@
//=====================================================
// File : bench_static.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:16 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef BENCH_STATIC_HH
#define BENCH_STATIC_HH
#include "btl.hh"
#include "bench_parameter.hh"
#include <iostream>
#include "utilities.h"
#include "xy_file.hh"
#include "static/static_size_generator.hh"
#include "timers/portable_perf_analyzer.hh"
// #include "timers/mixed_perf_analyzer.hh"
// #include "timers/x86_perf_analyzer.hh"
using namespace std;
template <template<class> class Perf_Analyzer, template<class> class Action, template<class,int> class Interface>
BTL_DONT_INLINE void bench_static(void)
{
if (BtlConfig::skipAction(Action<Interface<REAL_TYPE,10> >::name()))
return;
string filename = "bench_" + Action<Interface<REAL_TYPE,10> >::name() + ".dat";
INFOS("starting " << filename);
const int max_size = TINY_MV_MAX_SIZE;
std::vector<double> tab_mflops;
std::vector<double> tab_sizes;
static_size_generator<max_size,Perf_Analyzer,Action,Interface>::go(tab_sizes,tab_mflops);
dump_xy_file(tab_sizes,tab_mflops,filename);
}
// default Perf Analyzer
template <template<class> class Action, template<class,int> class Interface>
BTL_DONT_INLINE void bench_static(void)
{
bench_static<Portable_Perf_Analyzer,Action,Interface>();
//bench_static<Mixed_Perf_Analyzer,Action,Interface>();
//bench_static<X86_Perf_Analyzer,Action,Interface>();
}
#endif

View File

@@ -0,0 +1,66 @@
//=====================================================
// File : intel_bench_fixed_size.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, mar d<>c 3 18:59:37 CET 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef _BENCH_FIXED_SIZE_HH_
#define _BENCH_FIXED_SIZE_HH_
#include "utilities.h"
#include "function_time.hh"
template <class Action>
double bench_fixed_size(int size, unsigned long long & nb_calc,unsigned long long & nb_init)
{
Action action(size);
double time_baseline=time_init(nb_init,action);
while (time_baseline < MIN_TIME) {
//INFOS("nb_init="<<nb_init);
//INFOS("time_baseline="<<time_baseline);
nb_init*=2;
time_baseline=time_init(nb_init,action);
}
time_baseline=time_baseline/(double(nb_init));
double time_action=time_calculate(nb_calc,action);
while (time_action < MIN_TIME) {
nb_calc*=2;
time_action=time_calculate(nb_calc,action);
}
INFOS("nb_init="<<nb_init);
INFOS("nb_calc="<<nb_calc);
time_action=time_action/(double(nb_calc));
action.check_result();
time_action=time_action-time_baseline;
return action.nb_op_base()/(time_action*1000000.0);
}
#endif

View File

@@ -0,0 +1,57 @@
//=====================================================
// File : static_size_generator.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, mar d<>c 3 18:59:36 CET 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef _STATIC_SIZE_GENERATOR_HH
#define _STATIC_SIZE_GENERATOR_HH
#include <vector>
using namespace std;
//recursive generation of statically defined matrix and vector sizes
template <int SIZE,template<class> class Perf_Analyzer, template<class> class Action, template<class,int> class Interface>
struct static_size_generator{
static void go(vector<double> & tab_sizes, vector<double> & tab_mflops)
{
tab_sizes.push_back(SIZE);
std::cout << tab_sizes.back() << " \t" << std::flush;
Perf_Analyzer<Action<Interface<REAL_TYPE,SIZE> > > perf_action;
tab_mflops.push_back(perf_action.eval_mflops(SIZE));
std::cout << tab_mflops.back() << " MFlops" << std::endl;
static_size_generator<SIZE-1,Perf_Analyzer,Action,Interface>::go(tab_sizes,tab_mflops);
};
};
//recursion end
template <template<class> class Perf_Analyzer, template<class> class Action, template<class,int> class Interface>
struct static_size_generator<1,Perf_Analyzer,Action,Interface>{
static void go(vector<double> & tab_sizes, vector<double> & tab_mflops)
{
tab_sizes.push_back(1);
Perf_Analyzer<Action<Interface<REAL_TYPE,1> > > perf_action;
tab_mflops.push_back(perf_action.eval_mflops(1));
};
};
#endif

View File

@@ -0,0 +1,82 @@
//=====================================================
// File : STL_perf_analyzer.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, mar d<>c 3 18:59:35 CET 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef _STL_PERF_ANALYSER_HH
#define _STL_PERF_ANALYSER_HH
#include "STL_timer.hh"
#include "bench_parameter.hh"
template<class ACTION>
class STL_Perf_Analyzer{
public:
STL_Perf_Analyzer(unsigned long long nb_sample=DEFAULT_NB_SAMPLE):_nb_sample(nb_sample),_chronos()
{
MESSAGE("STL_Perf_Analyzer Ctor");
};
STL_Perf_Analyzer( const STL_Perf_Analyzer & ){
INFOS("Copy Ctor not implemented");
exit(0);
};
~STL_Perf_Analyzer( void ){
MESSAGE("STL_Perf_Analyzer Dtor");
};
inline double eval_mflops(int size)
{
ACTION action(size);
_chronos.start_baseline(_nb_sample);
do {
action.initialize();
} while (_chronos.check());
double baseline_time=_chronos.get_time();
_chronos.start(_nb_sample);
do {
action.initialize();
action.calculate();
} while (_chronos.check());
double calculate_time=_chronos.get_time();
double corrected_time=calculate_time-baseline_time;
// cout << size <<" "<<baseline_time<<" "<<calculate_time<<" "<<corrected_time<<" "<<action.nb_op_base() << endl;
return action.nb_op_base()/(corrected_time*1000000.0);
//return action.nb_op_base()/(calculate_time*1000000.0);
}
private:
STL_Timer _chronos;
unsigned long long _nb_sample;
};
#endif

View File

@@ -0,0 +1,78 @@
//=====================================================
// File : STL_Timer.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, mar d<>c 3 18:59:35 CET 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
// STL Timer Class. Adapted (L.P.) from the timer class by Musser et Al
// described int the Book : STL Tutorial and reference guide.
// Define a timer class for analyzing algorithm performance.
#include <iostream>
#include <iomanip>
#include <vector>
#include <map>
#include <algorithm>
using namespace std;
class STL_Timer {
public:
STL_Timer(){ baseline = false; }; // Default constructor
// Start a series of r trials:
void start(unsigned int r){
reps = r;
count = 0;
iterations.clear();
iterations.reserve(reps);
initial = time(0);
};
// Start a series of r trials to determine baseline time:
void start_baseline(unsigned int r)
{
baseline = true;
start(r);
}
// Returns true if the trials have been completed, else false
bool check()
{
++count;
final = time(0);
if (initial < final) {
iterations.push_back(count);
initial = final;
count = 0;
}
return (iterations.size() < reps);
};
// Returns the results for external use
double get_time( void )
{
sort(iterations.begin(), iterations.end());
return 1.0/iterations[reps/2];
};
private:
unsigned int reps; // Number of trials
// For storing loop iterations of a trial
vector<long> iterations;
// For saving initial and final times of a trial
time_t initial, final;
// For counting loop iterations of a trial
unsigned long count;
// true if this is a baseline computation, false otherwise
bool baseline;
// For recording the baseline time
double baseline_time;
};

View File

@@ -0,0 +1,73 @@
//=====================================================
// File : mixed_perf_analyzer.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, mar d<>c 3 18:59:36 CET 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef _MIXED_PERF_ANALYSER_HH
#define _MIXED_PERF_ANALYSER_HH
#include "x86_perf_analyzer.hh"
#include "portable_perf_analyzer.hh"
// choose portable perf analyzer for long calculations and x86 analyser for short ones
template<class Action>
class Mixed_Perf_Analyzer{
public:
Mixed_Perf_Analyzer( void ):_x86pa(),_ppa(),_use_ppa(true)
{
MESSAGE("Mixed_Perf_Analyzer Ctor");
};
Mixed_Perf_Analyzer( const Mixed_Perf_Analyzer & ){
INFOS("Copy Ctor not implemented");
exit(0);
};
~Mixed_Perf_Analyzer( void ){
MESSAGE("Mixed_Perf_Analyzer Dtor");
};
inline double eval_mflops(int size)
{
double result=0.0;
if (_use_ppa){
result=_ppa.eval_mflops(size);
if (_ppa.get_nb_calc()>DEFAULT_NB_SAMPLE){_use_ppa=false;}
}
else{
result=_x86pa.eval_mflops(size);
}
return result;
}
private:
Portable_Perf_Analyzer<Action> _ppa;
X86_Perf_Analyzer<Action> _x86pa;
bool _use_ppa;
};
#endif

View File

@@ -0,0 +1,103 @@
//=====================================================
// File : portable_perf_analyzer.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, mar d<>c 3 18:59:35 CET 2002
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef _PORTABLE_PERF_ANALYZER_HH
#define _PORTABLE_PERF_ANALYZER_HH
#include "utilities.h"
#include "timers/portable_timer.hh"
template <class Action>
class Portable_Perf_Analyzer{
public:
Portable_Perf_Analyzer( ):_nb_calc(0), m_time_action(0), _chronos(){
MESSAGE("Portable_Perf_Analyzer Ctor");
};
Portable_Perf_Analyzer( const Portable_Perf_Analyzer & ){
INFOS("Copy Ctor not implemented");
exit(0);
};
~Portable_Perf_Analyzer(){
MESSAGE("Portable_Perf_Analyzer Dtor");
};
BTL_DONT_INLINE double eval_mflops(int size)
{
Action action(size);
// action.initialize();
// time_action = time_calculate(action);
while (m_time_action < MIN_TIME)
{
if(_nb_calc==0) _nb_calc = 1;
else _nb_calc *= 2;
action.initialize();
m_time_action = time_calculate(action);
}
// optimize
for (int i=1; i<BtlConfig::Instance.tries; ++i)
{
Action _action(size);
std::cout << " " << _action.nb_op_base()*_nb_calc/(m_time_action*1e6) << " ";
_action.initialize();
m_time_action = std::min(m_time_action, time_calculate(_action));
}
double time_action = m_time_action / (double(_nb_calc));
// check
if (BtlConfig::Instance.checkResults && size<128)
{
action.initialize();
action.calculate();
action.check_result();
}
return action.nb_op_base()/(time_action*1e6);
}
BTL_DONT_INLINE double time_calculate(Action & action)
{
// time measurement
action.calculate();
_chronos.start();
for (unsigned int ii=0;ii<_nb_calc;ii++)
{
action.calculate();
}
_chronos.stop();
return _chronos.user_time();
}
unsigned long long get_nb_calc()
{
return _nb_calc;
}
private:
unsigned long long _nb_calc;
double m_time_action;
Portable_Timer _chronos;
};
#endif //_PORTABLE_PERF_ANALYZER_HH

View File

@@ -0,0 +1,134 @@
//=====================================================
// File : portable_perf_analyzer.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, mar d<>c 3 18:59:35 CET 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef _PORTABLE_PERF_ANALYZER_HH
#define _PORTABLE_PERF_ANALYZER_HH
#include "utilities.h"
#include "timers/portable_timer.hh"
template <class Action>
class Portable_Perf_Analyzer{
public:
Portable_Perf_Analyzer( void ):_nb_calc(1),_nb_init(1),_chronos(){
MESSAGE("Portable_Perf_Analyzer Ctor");
};
Portable_Perf_Analyzer( const Portable_Perf_Analyzer & ){
INFOS("Copy Ctor not implemented");
exit(0);
};
~Portable_Perf_Analyzer( void ){
MESSAGE("Portable_Perf_Analyzer Dtor");
};
inline double eval_mflops(int size)
{
Action action(size);
// double time_baseline = time_init(action);
// while (time_baseline < MIN_TIME_INIT)
// {
// _nb_init *= 2;
// time_baseline = time_init(action);
// }
//
// // optimize
// for (int i=1; i<NB_TRIES; ++i)
// time_baseline = std::min(time_baseline, time_init(action));
//
// time_baseline = time_baseline/(double(_nb_init));
double time_action = time_calculate(action);
while (time_action < MIN_TIME)
{
_nb_calc *= 2;
time_action = time_calculate(action);
}
// optimize
for (int i=1; i<NB_TRIES; ++i)
time_action = std::min(time_action, time_calculate(action));
// INFOS("size="<<size);
// INFOS("_nb_init="<<_nb_init);
// INFOS("_nb_calc="<<_nb_calc);
time_action = time_action / (double(_nb_calc));
action.check_result();
double time_baseline = time_init(action);
for (int i=1; i<NB_TRIES; ++i)
time_baseline = std::min(time_baseline, time_init(action));
time_baseline = time_baseline/(double(_nb_init));
// INFOS("time_baseline="<<time_baseline);
// INFOS("time_action="<<time_action);
time_action = time_action - time_baseline;
// INFOS("time_corrected="<<time_action);
return action.nb_op_base()/(time_action*1000000.0);
}
inline double time_init(Action & action)
{
// time measurement
_chronos.start();
for (int ii=0; ii<_nb_init; ii++)
action.initialize();
_chronos.stop();
return _chronos.user_time();
}
inline double time_calculate(Action & action)
{
// time measurement
_chronos.start();
for (int ii=0;ii<_nb_calc;ii++)
{
action.initialize();
action.calculate();
}
_chronos.stop();
return _chronos.user_time();
}
unsigned long long get_nb_calc( void )
{
return _nb_calc;
}
private:
unsigned long long _nb_calc;
unsigned long long _nb_init;
Portable_Timer _chronos;
};
#endif //_PORTABLE_PERF_ANALYZER_HH

View File

@@ -0,0 +1,187 @@
//=====================================================
// File : portable_timer.hh
// Author : L. Plagne <laurent.plagne@edf.fr)> from boost lib
// Copyright (C) EDF R&D, lun sep 30 14:23:17 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
// simple_time extracted from the boost library
//
#ifndef _PORTABLE_TIMER_HH
#define _PORTABLE_TIMER_HH
#include <ctime>
#include <cstdlib>
#include <time.h>
#define USEC_IN_SEC 1000000
// timer -------------------------------------------------------------------//
// A timer object measures CPU time.
#if defined(_MSC_VER)
#define NOMINMAX
#include <windows.h>
/*#ifndef hr_timer
#include "hr_time.h"
#define hr_timer
#endif*/
class Portable_Timer
{
public:
typedef struct {
LARGE_INTEGER start;
LARGE_INTEGER stop;
} stopWatch;
Portable_Timer()
{
startVal.QuadPart = 0;
stopVal.QuadPart = 0;
QueryPerformanceFrequency(&frequency);
}
void start() { QueryPerformanceCounter(&startVal); }
void stop() { QueryPerformanceCounter(&stopVal); }
double elapsed() {
LARGE_INTEGER time;
time.QuadPart = stopVal.QuadPart - startVal.QuadPart;
return LIToSecs(time);
}
double user_time() { return elapsed(); }
private:
double LIToSecs(LARGE_INTEGER& L) {
return ((double)L.QuadPart /(double)frequency.QuadPart) ;
}
LARGE_INTEGER startVal;
LARGE_INTEGER stopVal;
LARGE_INTEGER frequency;
}; // Portable_Timer
#elif defined(__APPLE__)
#include <CoreServices/CoreServices.h>
#include <mach/mach_time.h>
class Portable_Timer
{
public:
Portable_Timer()
{
}
void start()
{
m_start_time = double(mach_absolute_time())*1e-9;;
}
void stop()
{
m_stop_time = double(mach_absolute_time())*1e-9;;
}
double elapsed()
{
return user_time();
}
double user_time()
{
return m_stop_time - m_start_time;
}
private:
double m_stop_time, m_start_time;
}; // Portable_Timer (Apple)
#else
#include <sys/time.h>
#include <sys/resource.h>
#include <unistd.h>
#include <sys/times.h>
class Portable_Timer
{
public:
Portable_Timer()
{
m_clkid = BtlConfig::Instance.realclock ? CLOCK_REALTIME : CLOCK_PROCESS_CPUTIME_ID;
}
Portable_Timer(int clkid) : m_clkid(clkid)
{}
void start()
{
timespec ts;
clock_gettime(m_clkid, &ts);
m_start_time = double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec);
}
void stop()
{
timespec ts;
clock_gettime(m_clkid, &ts);
m_stop_time = double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec);
}
double elapsed()
{
return user_time();
}
double user_time()
{
return m_stop_time - m_start_time;
}
private:
int m_clkid;
double m_stop_time, m_start_time;
}; // Portable_Timer (Linux)
#endif
#endif // PORTABLE_TIMER_HPP

View File

@@ -0,0 +1,108 @@
//=====================================================
// File : x86_perf_analyzer.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, mar d<>c 3 18:59:35 CET 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef _X86_PERF_ANALYSER_HH
#define _X86_PERF_ANALYSER_HH
#include "x86_timer.hh"
#include "bench_parameter.hh"
template<class ACTION>
class X86_Perf_Analyzer{
public:
X86_Perf_Analyzer( unsigned long long nb_sample=DEFAULT_NB_SAMPLE):_nb_sample(nb_sample),_chronos()
{
MESSAGE("X86_Perf_Analyzer Ctor");
_chronos.find_frequency();
};
X86_Perf_Analyzer( const X86_Perf_Analyzer & ){
INFOS("Copy Ctor not implemented");
exit(0);
};
~X86_Perf_Analyzer( void ){
MESSAGE("X86_Perf_Analyzer Dtor");
};
inline double eval_mflops(int size)
{
ACTION action(size);
int nb_loop=5;
double calculate_time=0.0;
double baseline_time=0.0;
for (int j=0 ; j < nb_loop ; j++){
_chronos.clear();
for(int i=0 ; i < _nb_sample ; i++)
{
_chronos.start();
action.initialize();
action.calculate();
_chronos.stop();
_chronos.add_get_click();
}
calculate_time += double(_chronos.get_shortest_clicks())/_chronos.frequency();
if (j==0) action.check_result();
_chronos.clear();
for(int i=0 ; i < _nb_sample ; i++)
{
_chronos.start();
action.initialize();
_chronos.stop();
_chronos.add_get_click();
}
baseline_time+=double(_chronos.get_shortest_clicks())/_chronos.frequency();
}
double corrected_time = (calculate_time-baseline_time)/double(nb_loop);
// INFOS("_nb_sample="<<_nb_sample);
// INFOS("baseline_time="<<baseline_time);
// INFOS("calculate_time="<<calculate_time);
// INFOS("corrected_time="<<corrected_time);
// cout << size <<" "<<baseline_time<<" "<<calculate_time<<" "<<corrected_time<<" "<<action.nb_op_base() << endl;
return action.nb_op_base()/(corrected_time*1000000.0);
//return action.nb_op_base()/(calculate_time*1000000.0);
}
private:
X86_Timer _chronos;
unsigned long long _nb_sample;
};
#endif

View File

@@ -0,0 +1,246 @@
//=====================================================
// File : x86_timer.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, mar d<>c 3 18:59:35 CET 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef _X86_TIMER_HH
#define _X86_TIMER_HH
#include <sys/time.h>
#include <sys/resource.h>
#include <unistd.h>
#include <sys/times.h>
//#include "system_time.h"
#define u32 unsigned int
#include <asm/msr.h>
#include "utilities.h"
#include <map>
#include <fstream>
#include <string>
#include <iostream>
// frequence de la becanne en Hz
//#define FREQUENCY 648000000
//#define FREQUENCY 1400000000
#define FREQUENCY 1695000000
using namespace std;
class X86_Timer {
public :
X86_Timer( void ):_frequency(FREQUENCY),_nb_sample(0)
{
MESSAGE("X86_Timer Default Ctor");
}
inline void start( void ){
rdtsc(_click_start.n32[0],_click_start.n32[1]);
}
inline void stop( void ){
rdtsc(_click_stop.n32[0],_click_stop.n32[1]);
}
inline double frequency( void ){
return _frequency;
}
double get_elapsed_time_in_second( void ){
return (_click_stop.n64-_click_start.n64)/double(FREQUENCY);
}
unsigned long long get_click( void ){
return (_click_stop.n64-_click_start.n64);
}
inline void find_frequency( void ){
time_t initial, final;
int dummy=2;
initial = time(0);
start();
do {
dummy+=2;
}
while(time(0)==initial);
// On est au debut d'un cycle d'une seconde !!!
initial = time(0);
start();
do {
dummy+=2;
}
while(time(0)==initial);
final=time(0);
stop();
// INFOS("fine grained time : "<< get_elapsed_time_in_second());
// INFOS("coarse grained time : "<< final-initial);
_frequency=_frequency*get_elapsed_time_in_second()/double(final-initial);
/// INFOS("CPU frequency : "<< _frequency);
}
void add_get_click( void ){
_nb_sample++;
_counted_clicks[get_click()]++;
fill_history_clicks();
}
void dump_statistics(string filemane){
ofstream outfile (filemane.c_str(),ios::out) ;
std::map<unsigned long long , unsigned long long>::iterator itr;
for(itr=_counted_clicks.begin() ; itr!=_counted_clicks.end() ; itr++)
{
outfile << (*itr).first << " " << (*itr).second << endl ;
}
outfile.close();
}
void dump_history(string filemane){
ofstream outfile (filemane.c_str(),ios::out) ;
for(int i=0 ; i<_history_mean_clicks.size() ; i++)
{
outfile << i << " "
<< _history_mean_clicks[i] << " "
<< _history_shortest_clicks[i] << " "
<< _history_most_occured_clicks[i] << endl ;
}
outfile.close();
}
double get_mean_clicks( void ){
std::map<unsigned long long,unsigned long long>::iterator itr;
unsigned long long mean_clicks=0;
for(itr=_counted_clicks.begin() ; itr!=_counted_clicks.end() ; itr++)
{
mean_clicks+=(*itr).second*(*itr).first;
}
return mean_clicks/double(_nb_sample);
}
double get_shortest_clicks( void ){
return double((*_counted_clicks.begin()).first);
}
void fill_history_clicks( void ){
_history_mean_clicks.push_back(get_mean_clicks());
_history_shortest_clicks.push_back(get_shortest_clicks());
_history_most_occured_clicks.push_back(get_most_occured_clicks());
}
double get_most_occured_clicks( void ){
unsigned long long moc=0;
unsigned long long max_occurence=0;
std::map<unsigned long long,unsigned long long>::iterator itr;
for(itr=_counted_clicks.begin() ; itr!=_counted_clicks.end() ; itr++)
{
if (max_occurence<=(*itr).second){
max_occurence=(*itr).second;
moc=(*itr).first;
}
}
return double(moc);
}
void clear( void )
{
_counted_clicks.clear();
_history_mean_clicks.clear();
_history_shortest_clicks.clear();
_history_most_occured_clicks.clear();
_nb_sample=0;
}
private :
union
{
unsigned long int n32[2] ;
unsigned long long n64 ;
} _click_start;
union
{
unsigned long int n32[2] ;
unsigned long long n64 ;
} _click_stop;
double _frequency ;
map<unsigned long long,unsigned long long> _counted_clicks;
vector<double> _history_mean_clicks;
vector<double> _history_shortest_clicks;
vector<double> _history_most_occured_clicks;
unsigned long long _nb_sample;
};
#endif

View File

@@ -0,0 +1,70 @@
//=====================================================
// File : size_lin_log.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, mar d<>c 3 18:59:37 CET 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef SIZE_LIN_LOG
#define SIZE_LIN_LOG
#include "size_log.hh"
template<class Vector>
void size_lin_log(const int nb_point, const int /*size_min*/, const int size_max, Vector & X)
{
int ten=10;
int nine=9;
X.resize(nb_point);
if (nb_point>ten){
for (int i=0;i<nine;i++){
X[i]=i+1;
}
Vector log_size;
size_log(nb_point-nine,ten,size_max,log_size);
for (int i=0;i<nb_point-nine;i++){
X[i+nine]=log_size[i];
}
}
else{
for (int i=0;i<nb_point;i++){
X[i]=i+1;
}
}
// for (int i=0;i<nb_point;i++){
// INFOS("computed sizes : X["<<i<<"]="<<X[i]);
// }
}
#endif

View File

@@ -0,0 +1,54 @@
//=====================================================
// File : size_log.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:17 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef SIZE_LOG
#define SIZE_LOG
#include "math.h"
// The Vector class must satisfy the following part of STL vector concept :
// resize() method
// [] operator for seting element
// the vector element are int compatible.
template<class Vector>
void size_log(const int nb_point, const int size_min, const int size_max, Vector & X)
{
X.resize(nb_point);
float ls_min=log(float(size_min));
float ls_max=log(float(size_max));
float ls=0.0;
float delta_ls=(ls_max-ls_min)/(float(nb_point-1));
int size=0;
for (int i=0;i<nb_point;i++){
ls = ls_min + float(i)*delta_ls ;
size=int(exp(ls));
X[i]=size;
}
}
#endif

View File

@@ -0,0 +1,90 @@
//=============================================================================
// File : utilities.h
// Created : mar jun 19 13:18:14 CEST 2001
// Author : Antoine YESSAYAN, Paul RASCLE, EDF
// Project : SALOME
// Copyright : EDF 2001
// $Header$
//=============================================================================
/* --- Definition macros file to print information if _DEBUG_ is defined --- */
# ifndef UTILITIES_H
# define UTILITIES_H
# include <stdlib.h>
//# include <iostream> ok for gcc3.01
# include <iostream>
/* --- INFOS is always defined (without _DEBUG_): to be used for warnings, with release version --- */
# define HEREWEARE cout<<flush ; cerr << __FILE__ << " [" << __LINE__ << "] : " << flush ;
# define INFOS(chain) {HEREWEARE ; cerr << chain << endl ;}
# define PYSCRIPT(chain) {cout<<flush ; cerr << "---PYSCRIPT--- " << chain << endl ;}
/* --- To print date and time of compilation of current source on stdout --- */
# if defined ( __GNUC__ )
# define COMPILER "g++" ;
# elif defined ( __sun )
# define COMPILER "CC" ;
# elif defined ( __KCC )
# define COMPILER "KCC" ;
# elif defined ( __PGI )
# define COMPILER "pgCC" ;
# else
# define COMPILER "undefined" ;
# endif
# ifdef INFOS_COMPILATION
# error INFOS_COMPILATION already defined
# endif
# define INFOS_COMPILATION {\
cerr << flush;\
cout << __FILE__ ;\
cout << " [" << __LINE__ << "] : " ;\
cout << "COMPILED with " << COMPILER ;\
cout << ", " << __DATE__ ; \
cout << " at " << __TIME__ << endl ;\
cout << "\n\n" ;\
cout << flush ;\
}
# ifdef _DEBUG_
/* --- the following MACROS are useful at debug time --- */
# define HERE cout<<flush ; cerr << "- Trace " << __FILE__ << " [" << __LINE__ << "] : " << flush ;
# define SCRUTE(var) HERE ; cerr << #var << "=" << var << endl ;
# define MESSAGE(chain) {HERE ; cerr << chain << endl ;}
# define INTERRUPTION(code) HERE ; cerr << "INTERRUPTION return code= " << code << endl ; exit(code) ;
# ifndef ASSERT
# define ASSERT(condition) if (!(condition)){ HERE ; cerr << "CONDITION " << #condition << " NOT VERIFIED"<< endl ; INTERRUPTION(1) ;}
# endif /* ASSERT */
#define REPERE cout<<flush ; cerr << " --------------" << endl << flush ;
#define BEGIN_OF(chain) {REPERE ; HERE ; cerr << "Begin of: " << chain << endl ; REPERE ; }
#define END_OF(chain) {REPERE ; HERE ; cerr << "Normal end of: " << chain << endl ; REPERE ; }
# else /* ifdef _DEBUG_*/
# define HERE
# define SCRUTE(var)
# define MESSAGE(chain)
# define INTERRUPTION(code)
# ifndef ASSERT
# define ASSERT(condition)
# endif /* ASSERT */
#define REPERE
#define BEGIN_OF(chain)
#define END_OF(chain)
# endif /* ifdef _DEBUG_*/
# endif /* ifndef UTILITIES_H */

View File

@@ -0,0 +1,75 @@
//=====================================================
// File : dump_file_x_y.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:20 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef XY_FILE_HH
#define XY_FILE_HH
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
using namespace std;
bool read_xy_file(const std::string & filename, std::vector<int> & tab_sizes,
std::vector<double> & tab_mflops, bool quiet = false)
{
std::ifstream input_file (filename.c_str(),std::ios::in);
if (!input_file){
if (!quiet) {
INFOS("!!! Error opening "<<filename);
}
return false;
}
int nb_point=0;
int size=0;
double mflops=0;
while (input_file >> size >> mflops ){
nb_point++;
tab_sizes.push_back(size);
tab_mflops.push_back(mflops);
}
SCRUTE(nb_point);
input_file.close();
return true;
}
// The Vector class must satisfy the following part of STL vector concept :
// resize() method
// [] operator for seting element
// the vector element must have the << operator define
using namespace std;
template<class Vector_A, class Vector_B>
void dump_xy_file(const Vector_A & X, const Vector_B & Y, const std::string & filename){
ofstream outfile (filename.c_str(),ios::out) ;
int size=X.size();
for (int i=0;i<size;i++)
outfile << X[i] << " " << Y[i] << endl;
outfile.close();
}
#endif

View File

@@ -0,0 +1,47 @@
find_package(ATLAS)
if (ATLAS_FOUND)
btl_add_bench(btl_atlas main.cpp)
if(BUILD_btl_atlas)
target_link_libraries(btl_atlas ${ATLAS_LIBRARIES})
set_target_properties(btl_atlas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=ATLAS -DHAS_LAPACK=1")
endif(BUILD_btl_atlas)
endif (ATLAS_FOUND)
find_package(MKL)
if (MKL_FOUND)
btl_add_bench(btl_mkl main.cpp)
if(BUILD_btl_mkl)
target_link_libraries(btl_mkl ${MKL_LIBRARIES})
set_target_properties(btl_mkl PROPERTIES COMPILE_FLAGS "-DCBLASNAME=INTEL_MKL -DHAS_LAPACK=1")
endif(BUILD_btl_mkl)
endif (MKL_FOUND)
find_package(OPENBLAS)
if (OPENBLAS_FOUND)
btl_add_bench(btl_openblas main.cpp)
if(BUILD_btl_openblas)
target_link_libraries(btl_openblas ${OPENBLAS_LIBRARIES} )
set_target_properties(btl_openblas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=OPENBLAS")
endif(BUILD_btl_openblas)
endif (OPENBLAS_FOUND)
find_package(ACML)
if (ACML_FOUND)
btl_add_bench(btl_acml main.cpp)
if(BUILD_btl_acml)
target_link_libraries(btl_acml ${ACML_LIBRARIES} )
set_target_properties(btl_acml PROPERTIES COMPILE_FLAGS "-DCBLASNAME=ACML -DHAS_LAPACK=1")
endif(BUILD_btl_acml)
endif (ACML_FOUND)
if(Eigen_SOURCE_DIR AND CMAKE_Fortran_COMPILER_WORKS)
# we are inside Eigen and blas/lapack interface is compilable
include_directories(${Eigen_SOURCE_DIR})
btl_add_bench(btl_eigenblas main.cpp)
if(BUILD_btl_eigenblas)
target_link_libraries(btl_eigenblas eigen_blas eigen_lapack )
set_target_properties(btl_eigenblas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=EigenBLAS")
endif()
endif()

View File

@@ -0,0 +1,675 @@
#ifndef BLAS_H
#define BLAS_H
#define BLASFUNC(FUNC) FUNC##_
#ifdef __WIN64__
typedef long long BLASLONG;
typedef unsigned long long BLASULONG;
#else
typedef long BLASLONG;
typedef unsigned long BLASULONG;
#endif
int BLASFUNC(xerbla)(const char *, int *info, int);
float BLASFUNC(sdot) (int *, float *, int *, float *, int *);
float BLASFUNC(sdsdot)(int *, float *, float *, int *, float *, int *);
double BLASFUNC(dsdot) (int *, float *, int *, float *, int *);
double BLASFUNC(ddot) (int *, double *, int *, double *, int *);
double BLASFUNC(qdot) (int *, double *, int *, double *, int *);
#if defined(F_INTERFACE_GFORT) && !defined(__64BIT__)
int BLASFUNC(cdotu) (int *, float * , int *, float *, int *);
int BLASFUNC(cdotc) (int *, float *, int *, float *, int *);
void BLASFUNC(zdotu) (double *, int *, double *, int *, double *, int *);
void BLASFUNC(zdotc) (double *, int *, double *, int *, double *, int *);
void BLASFUNC(xdotu) (double *, int *, double *, int *, double *, int *);
void BLASFUNC(xdotc) (double *, int *, double *, int *, double *, int *);
#elif defined(F_INTERFACE_F2C) || \
defined(F_INTERFACE_PGI) || \
defined(F_INTERFACE_GFORT) || \
(defined(F_INTERFACE_PATHSCALE) && defined(__64BIT__))
void BLASFUNC(cdotu) (float *, int *, float * , int *, float *, int *);
void BLASFUNC(cdotc) (float *, int *, float *, int *, float *, int *);
void BLASFUNC(zdotu) (double *, int *, double *, int *, double *, int *);
void BLASFUNC(zdotc) (double *, int *, double *, int *, double *, int *);
void BLASFUNC(xdotu) (double *, int *, double *, int *, double *, int *);
void BLASFUNC(xdotc) (double *, int *, double *, int *, double *, int *);
#else
std::complex<float> BLASFUNC(cdotu) (int *, float *, int *, float *, int *);
std::complex<float> BLASFUNC(cdotc) (int *, float *, int *, float *, int *);
std::complex<double> BLASFUNC(zdotu) (int *, double *, int *, double *, int *);
std::complex<double> BLASFUNC(zdotc) (int *, double *, int *, double *, int *);
double BLASFUNC(xdotu) (int *, double *, int *, double *, int *);
double BLASFUNC(xdotc) (int *, double *, int *, double *, int *);
#endif
int BLASFUNC(cdotuw) (int *, float *, int *, float *, int *, float*);
int BLASFUNC(cdotcw) (int *, float *, int *, float *, int *, float*);
int BLASFUNC(zdotuw) (int *, double *, int *, double *, int *, double*);
int BLASFUNC(zdotcw) (int *, double *, int *, double *, int *, double*);
int BLASFUNC(saxpy) (int *, float *, float *, int *, float *, int *);
int BLASFUNC(daxpy) (int *, double *, double *, int *, double *, int *);
int BLASFUNC(qaxpy) (int *, double *, double *, int *, double *, int *);
int BLASFUNC(caxpy) (int *, float *, float *, int *, float *, int *);
int BLASFUNC(zaxpy) (int *, double *, double *, int *, double *, int *);
int BLASFUNC(xaxpy) (int *, double *, double *, int *, double *, int *);
int BLASFUNC(caxpyc)(int *, float *, float *, int *, float *, int *);
int BLASFUNC(zaxpyc)(int *, double *, double *, int *, double *, int *);
int BLASFUNC(xaxpyc)(int *, double *, double *, int *, double *, int *);
int BLASFUNC(scopy) (int *, float *, int *, float *, int *);
int BLASFUNC(dcopy) (int *, double *, int *, double *, int *);
int BLASFUNC(qcopy) (int *, double *, int *, double *, int *);
int BLASFUNC(ccopy) (int *, float *, int *, float *, int *);
int BLASFUNC(zcopy) (int *, double *, int *, double *, int *);
int BLASFUNC(xcopy) (int *, double *, int *, double *, int *);
int BLASFUNC(sswap) (int *, float *, int *, float *, int *);
int BLASFUNC(dswap) (int *, double *, int *, double *, int *);
int BLASFUNC(qswap) (int *, double *, int *, double *, int *);
int BLASFUNC(cswap) (int *, float *, int *, float *, int *);
int BLASFUNC(zswap) (int *, double *, int *, double *, int *);
int BLASFUNC(xswap) (int *, double *, int *, double *, int *);
float BLASFUNC(sasum) (int *, float *, int *);
float BLASFUNC(scasum)(int *, float *, int *);
double BLASFUNC(dasum) (int *, double *, int *);
double BLASFUNC(qasum) (int *, double *, int *);
double BLASFUNC(dzasum)(int *, double *, int *);
double BLASFUNC(qxasum)(int *, double *, int *);
int BLASFUNC(isamax)(int *, float *, int *);
int BLASFUNC(idamax)(int *, double *, int *);
int BLASFUNC(iqamax)(int *, double *, int *);
int BLASFUNC(icamax)(int *, float *, int *);
int BLASFUNC(izamax)(int *, double *, int *);
int BLASFUNC(ixamax)(int *, double *, int *);
int BLASFUNC(ismax) (int *, float *, int *);
int BLASFUNC(idmax) (int *, double *, int *);
int BLASFUNC(iqmax) (int *, double *, int *);
int BLASFUNC(icmax) (int *, float *, int *);
int BLASFUNC(izmax) (int *, double *, int *);
int BLASFUNC(ixmax) (int *, double *, int *);
int BLASFUNC(isamin)(int *, float *, int *);
int BLASFUNC(idamin)(int *, double *, int *);
int BLASFUNC(iqamin)(int *, double *, int *);
int BLASFUNC(icamin)(int *, float *, int *);
int BLASFUNC(izamin)(int *, double *, int *);
int BLASFUNC(ixamin)(int *, double *, int *);
int BLASFUNC(ismin)(int *, float *, int *);
int BLASFUNC(idmin)(int *, double *, int *);
int BLASFUNC(iqmin)(int *, double *, int *);
int BLASFUNC(icmin)(int *, float *, int *);
int BLASFUNC(izmin)(int *, double *, int *);
int BLASFUNC(ixmin)(int *, double *, int *);
float BLASFUNC(samax) (int *, float *, int *);
double BLASFUNC(damax) (int *, double *, int *);
double BLASFUNC(qamax) (int *, double *, int *);
float BLASFUNC(scamax)(int *, float *, int *);
double BLASFUNC(dzamax)(int *, double *, int *);
double BLASFUNC(qxamax)(int *, double *, int *);
float BLASFUNC(samin) (int *, float *, int *);
double BLASFUNC(damin) (int *, double *, int *);
double BLASFUNC(qamin) (int *, double *, int *);
float BLASFUNC(scamin)(int *, float *, int *);
double BLASFUNC(dzamin)(int *, double *, int *);
double BLASFUNC(qxamin)(int *, double *, int *);
float BLASFUNC(smax) (int *, float *, int *);
double BLASFUNC(dmax) (int *, double *, int *);
double BLASFUNC(qmax) (int *, double *, int *);
float BLASFUNC(scmax) (int *, float *, int *);
double BLASFUNC(dzmax) (int *, double *, int *);
double BLASFUNC(qxmax) (int *, double *, int *);
float BLASFUNC(smin) (int *, float *, int *);
double BLASFUNC(dmin) (int *, double *, int *);
double BLASFUNC(qmin) (int *, double *, int *);
float BLASFUNC(scmin) (int *, float *, int *);
double BLASFUNC(dzmin) (int *, double *, int *);
double BLASFUNC(qxmin) (int *, double *, int *);
int BLASFUNC(sscal) (int *, float *, float *, int *);
int BLASFUNC(dscal) (int *, double *, double *, int *);
int BLASFUNC(qscal) (int *, double *, double *, int *);
int BLASFUNC(cscal) (int *, float *, float *, int *);
int BLASFUNC(zscal) (int *, double *, double *, int *);
int BLASFUNC(xscal) (int *, double *, double *, int *);
int BLASFUNC(csscal)(int *, float *, float *, int *);
int BLASFUNC(zdscal)(int *, double *, double *, int *);
int BLASFUNC(xqscal)(int *, double *, double *, int *);
float BLASFUNC(snrm2) (int *, float *, int *);
float BLASFUNC(scnrm2)(int *, float *, int *);
double BLASFUNC(dnrm2) (int *, double *, int *);
double BLASFUNC(qnrm2) (int *, double *, int *);
double BLASFUNC(dznrm2)(int *, double *, int *);
double BLASFUNC(qxnrm2)(int *, double *, int *);
int BLASFUNC(srot) (int *, float *, int *, float *, int *, float *, float *);
int BLASFUNC(drot) (int *, double *, int *, double *, int *, double *, double *);
int BLASFUNC(qrot) (int *, double *, int *, double *, int *, double *, double *);
int BLASFUNC(csrot) (int *, float *, int *, float *, int *, float *, float *);
int BLASFUNC(zdrot) (int *, double *, int *, double *, int *, double *, double *);
int BLASFUNC(xqrot) (int *, double *, int *, double *, int *, double *, double *);
int BLASFUNC(srotg) (float *, float *, float *, float *);
int BLASFUNC(drotg) (double *, double *, double *, double *);
int BLASFUNC(qrotg) (double *, double *, double *, double *);
int BLASFUNC(crotg) (float *, float *, float *, float *);
int BLASFUNC(zrotg) (double *, double *, double *, double *);
int BLASFUNC(xrotg) (double *, double *, double *, double *);
int BLASFUNC(srotmg)(float *, float *, float *, float *, float *);
int BLASFUNC(drotmg)(double *, double *, double *, double *, double *);
int BLASFUNC(srotm) (int *, float *, int *, float *, int *, float *);
int BLASFUNC(drotm) (int *, double *, int *, double *, int *, double *);
int BLASFUNC(qrotm) (int *, double *, int *, double *, int *, double *);
/* Level 2 routines */
int BLASFUNC(sger)(int *, int *, float *, float *, int *,
float *, int *, float *, int *);
int BLASFUNC(dger)(int *, int *, double *, double *, int *,
double *, int *, double *, int *);
int BLASFUNC(qger)(int *, int *, double *, double *, int *,
double *, int *, double *, int *);
int BLASFUNC(cgeru)(int *, int *, float *, float *, int *,
float *, int *, float *, int *);
int BLASFUNC(cgerc)(int *, int *, float *, float *, int *,
float *, int *, float *, int *);
int BLASFUNC(zgeru)(int *, int *, double *, double *, int *,
double *, int *, double *, int *);
int BLASFUNC(zgerc)(int *, int *, double *, double *, int *,
double *, int *, double *, int *);
int BLASFUNC(xgeru)(int *, int *, double *, double *, int *,
double *, int *, double *, int *);
int BLASFUNC(xgerc)(int *, int *, double *, double *, int *,
double *, int *, double *, int *);
int BLASFUNC(sgemv)(char *, int *, int *, float *, float *, int *,
float *, int *, float *, float *, int *);
int BLASFUNC(dgemv)(char *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(qgemv)(char *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(cgemv)(char *, int *, int *, float *, float *, int *,
float *, int *, float *, float *, int *);
int BLASFUNC(zgemv)(char *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(xgemv)(char *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(strsv) (char *, char *, char *, int *, float *, int *,
float *, int *);
int BLASFUNC(dtrsv) (char *, char *, char *, int *, double *, int *,
double *, int *);
int BLASFUNC(qtrsv) (char *, char *, char *, int *, double *, int *,
double *, int *);
int BLASFUNC(ctrsv) (char *, char *, char *, int *, float *, int *,
float *, int *);
int BLASFUNC(ztrsv) (char *, char *, char *, int *, double *, int *,
double *, int *);
int BLASFUNC(xtrsv) (char *, char *, char *, int *, double *, int *,
double *, int *);
int BLASFUNC(stpsv) (char *, char *, char *, int *, float *, float *, int *);
int BLASFUNC(dtpsv) (char *, char *, char *, int *, double *, double *, int *);
int BLASFUNC(qtpsv) (char *, char *, char *, int *, double *, double *, int *);
int BLASFUNC(ctpsv) (char *, char *, char *, int *, float *, float *, int *);
int BLASFUNC(ztpsv) (char *, char *, char *, int *, double *, double *, int *);
int BLASFUNC(xtpsv) (char *, char *, char *, int *, double *, double *, int *);
int BLASFUNC(strmv) (char *, char *, char *, int *, float *, int *,
float *, int *);
int BLASFUNC(dtrmv) (char *, char *, char *, int *, double *, int *,
double *, int *);
int BLASFUNC(qtrmv) (char *, char *, char *, int *, double *, int *,
double *, int *);
int BLASFUNC(ctrmv) (char *, char *, char *, int *, float *, int *,
float *, int *);
int BLASFUNC(ztrmv) (char *, char *, char *, int *, double *, int *,
double *, int *);
int BLASFUNC(xtrmv) (char *, char *, char *, int *, double *, int *,
double *, int *);
int BLASFUNC(stpmv) (char *, char *, char *, int *, float *, float *, int *);
int BLASFUNC(dtpmv) (char *, char *, char *, int *, double *, double *, int *);
int BLASFUNC(qtpmv) (char *, char *, char *, int *, double *, double *, int *);
int BLASFUNC(ctpmv) (char *, char *, char *, int *, float *, float *, int *);
int BLASFUNC(ztpmv) (char *, char *, char *, int *, double *, double *, int *);
int BLASFUNC(xtpmv) (char *, char *, char *, int *, double *, double *, int *);
int BLASFUNC(stbmv) (char *, char *, char *, int *, int *, float *, int *, float *, int *);
int BLASFUNC(dtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
int BLASFUNC(qtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
int BLASFUNC(ctbmv) (char *, char *, char *, int *, int *, float *, int *, float *, int *);
int BLASFUNC(ztbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
int BLASFUNC(xtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
int BLASFUNC(stbsv) (char *, char *, char *, int *, int *, float *, int *, float *, int *);
int BLASFUNC(dtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
int BLASFUNC(qtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
int BLASFUNC(ctbsv) (char *, char *, char *, int *, int *, float *, int *, float *, int *);
int BLASFUNC(ztbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
int BLASFUNC(xtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
int BLASFUNC(ssymv) (char *, int *, float *, float *, int *,
float *, int *, float *, float *, int *);
int BLASFUNC(dsymv) (char *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(qsymv) (char *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(csymv) (char *, int *, float *, float *, int *,
float *, int *, float *, float *, int *);
int BLASFUNC(zsymv) (char *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(xsymv) (char *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(sspmv) (char *, int *, float *, float *,
float *, int *, float *, float *, int *);
int BLASFUNC(dspmv) (char *, int *, double *, double *,
double *, int *, double *, double *, int *);
int BLASFUNC(qspmv) (char *, int *, double *, double *,
double *, int *, double *, double *, int *);
int BLASFUNC(cspmv) (char *, int *, float *, float *,
float *, int *, float *, float *, int *);
int BLASFUNC(zspmv) (char *, int *, double *, double *,
double *, int *, double *, double *, int *);
int BLASFUNC(xspmv) (char *, int *, double *, double *,
double *, int *, double *, double *, int *);
int BLASFUNC(ssyr) (char *, int *, float *, float *, int *,
float *, int *);
int BLASFUNC(dsyr) (char *, int *, double *, double *, int *,
double *, int *);
int BLASFUNC(qsyr) (char *, int *, double *, double *, int *,
double *, int *);
int BLASFUNC(csyr) (char *, int *, float *, float *, int *,
float *, int *);
int BLASFUNC(zsyr) (char *, int *, double *, double *, int *,
double *, int *);
int BLASFUNC(xsyr) (char *, int *, double *, double *, int *,
double *, int *);
int BLASFUNC(ssyr2) (char *, int *, float *,
float *, int *, float *, int *, float *, int *);
int BLASFUNC(dsyr2) (char *, int *, double *,
double *, int *, double *, int *, double *, int *);
int BLASFUNC(qsyr2) (char *, int *, double *,
double *, int *, double *, int *, double *, int *);
int BLASFUNC(csyr2) (char *, int *, float *,
float *, int *, float *, int *, float *, int *);
int BLASFUNC(zsyr2) (char *, int *, double *,
double *, int *, double *, int *, double *, int *);
int BLASFUNC(xsyr2) (char *, int *, double *,
double *, int *, double *, int *, double *, int *);
int BLASFUNC(sspr) (char *, int *, float *, float *, int *,
float *);
int BLASFUNC(dspr) (char *, int *, double *, double *, int *,
double *);
int BLASFUNC(qspr) (char *, int *, double *, double *, int *,
double *);
int BLASFUNC(cspr) (char *, int *, float *, float *, int *,
float *);
int BLASFUNC(zspr) (char *, int *, double *, double *, int *,
double *);
int BLASFUNC(xspr) (char *, int *, double *, double *, int *,
double *);
int BLASFUNC(sspr2) (char *, int *, float *,
float *, int *, float *, int *, float *);
int BLASFUNC(dspr2) (char *, int *, double *,
double *, int *, double *, int *, double *);
int BLASFUNC(qspr2) (char *, int *, double *,
double *, int *, double *, int *, double *);
int BLASFUNC(cspr2) (char *, int *, float *,
float *, int *, float *, int *, float *);
int BLASFUNC(zspr2) (char *, int *, double *,
double *, int *, double *, int *, double *);
int BLASFUNC(xspr2) (char *, int *, double *,
double *, int *, double *, int *, double *);
int BLASFUNC(cher) (char *, int *, float *, float *, int *,
float *, int *);
int BLASFUNC(zher) (char *, int *, double *, double *, int *,
double *, int *);
int BLASFUNC(xher) (char *, int *, double *, double *, int *,
double *, int *);
int BLASFUNC(chpr) (char *, int *, float *, float *, int *, float *);
int BLASFUNC(zhpr) (char *, int *, double *, double *, int *, double *);
int BLASFUNC(xhpr) (char *, int *, double *, double *, int *, double *);
int BLASFUNC(cher2) (char *, int *, float *,
float *, int *, float *, int *, float *, int *);
int BLASFUNC(zher2) (char *, int *, double *,
double *, int *, double *, int *, double *, int *);
int BLASFUNC(xher2) (char *, int *, double *,
double *, int *, double *, int *, double *, int *);
int BLASFUNC(chpr2) (char *, int *, float *,
float *, int *, float *, int *, float *);
int BLASFUNC(zhpr2) (char *, int *, double *,
double *, int *, double *, int *, double *);
int BLASFUNC(xhpr2) (char *, int *, double *,
double *, int *, double *, int *, double *);
int BLASFUNC(chemv) (char *, int *, float *, float *, int *,
float *, int *, float *, float *, int *);
int BLASFUNC(zhemv) (char *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(xhemv) (char *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(chpmv) (char *, int *, float *, float *,
float *, int *, float *, float *, int *);
int BLASFUNC(zhpmv) (char *, int *, double *, double *,
double *, int *, double *, double *, int *);
int BLASFUNC(xhpmv) (char *, int *, double *, double *,
double *, int *, double *, double *, int *);
int BLASFUNC(snorm)(char *, int *, int *, float *, int *);
int BLASFUNC(dnorm)(char *, int *, int *, double *, int *);
int BLASFUNC(cnorm)(char *, int *, int *, float *, int *);
int BLASFUNC(znorm)(char *, int *, int *, double *, int *);
int BLASFUNC(sgbmv)(char *, int *, int *, int *, int *, float *, float *, int *,
float *, int *, float *, float *, int *);
int BLASFUNC(dgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(qgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(cgbmv)(char *, int *, int *, int *, int *, float *, float *, int *,
float *, int *, float *, float *, int *);
int BLASFUNC(zgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(xgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(ssbmv)(char *, int *, int *, float *, float *, int *,
float *, int *, float *, float *, int *);
int BLASFUNC(dsbmv)(char *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(qsbmv)(char *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(csbmv)(char *, int *, int *, float *, float *, int *,
float *, int *, float *, float *, int *);
int BLASFUNC(zsbmv)(char *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(xsbmv)(char *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(chbmv)(char *, int *, int *, float *, float *, int *,
float *, int *, float *, float *, int *);
int BLASFUNC(zhbmv)(char *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(xhbmv)(char *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
/* Level 3 routines */
int BLASFUNC(sgemm)(char *, char *, int *, int *, int *, float *,
float *, int *, float *, int *, float *, float *, int *);
int BLASFUNC(dgemm)(char *, char *, int *, int *, int *, double *,
double *, int *, double *, int *, double *, double *, int *);
int BLASFUNC(qgemm)(char *, char *, int *, int *, int *, double *,
double *, int *, double *, int *, double *, double *, int *);
int BLASFUNC(cgemm)(char *, char *, int *, int *, int *, float *,
float *, int *, float *, int *, float *, float *, int *);
int BLASFUNC(zgemm)(char *, char *, int *, int *, int *, double *,
double *, int *, double *, int *, double *, double *, int *);
int BLASFUNC(xgemm)(char *, char *, int *, int *, int *, double *,
double *, int *, double *, int *, double *, double *, int *);
int BLASFUNC(cgemm3m)(char *, char *, int *, int *, int *, float *,
float *, int *, float *, int *, float *, float *, int *);
int BLASFUNC(zgemm3m)(char *, char *, int *, int *, int *, double *,
double *, int *, double *, int *, double *, double *, int *);
int BLASFUNC(xgemm3m)(char *, char *, int *, int *, int *, double *,
double *, int *, double *, int *, double *, double *, int *);
int BLASFUNC(sge2mm)(char *, char *, char *, int *, int *,
float *, float *, int *, float *, int *,
float *, float *, int *);
int BLASFUNC(dge2mm)(char *, char *, char *, int *, int *,
double *, double *, int *, double *, int *,
double *, double *, int *);
int BLASFUNC(cge2mm)(char *, char *, char *, int *, int *,
float *, float *, int *, float *, int *,
float *, float *, int *);
int BLASFUNC(zge2mm)(char *, char *, char *, int *, int *,
double *, double *, int *, double *, int *,
double *, double *, int *);
int BLASFUNC(strsm)(char *, char *, char *, char *, int *, int *,
float *, float *, int *, float *, int *);
int BLASFUNC(dtrsm)(char *, char *, char *, char *, int *, int *,
double *, double *, int *, double *, int *);
int BLASFUNC(qtrsm)(char *, char *, char *, char *, int *, int *,
double *, double *, int *, double *, int *);
int BLASFUNC(ctrsm)(char *, char *, char *, char *, int *, int *,
float *, float *, int *, float *, int *);
int BLASFUNC(ztrsm)(char *, char *, char *, char *, int *, int *,
double *, double *, int *, double *, int *);
int BLASFUNC(xtrsm)(char *, char *, char *, char *, int *, int *,
double *, double *, int *, double *, int *);
int BLASFUNC(strmm)(char *, char *, char *, char *, int *, int *,
float *, float *, int *, float *, int *);
int BLASFUNC(dtrmm)(char *, char *, char *, char *, int *, int *,
double *, double *, int *, double *, int *);
int BLASFUNC(qtrmm)(char *, char *, char *, char *, int *, int *,
double *, double *, int *, double *, int *);
int BLASFUNC(ctrmm)(char *, char *, char *, char *, int *, int *,
float *, float *, int *, float *, int *);
int BLASFUNC(ztrmm)(char *, char *, char *, char *, int *, int *,
double *, double *, int *, double *, int *);
int BLASFUNC(xtrmm)(char *, char *, char *, char *, int *, int *,
double *, double *, int *, double *, int *);
int BLASFUNC(ssymm)(char *, char *, int *, int *, float *, float *, int *,
float *, int *, float *, float *, int *);
int BLASFUNC(dsymm)(char *, char *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(qsymm)(char *, char *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(csymm)(char *, char *, int *, int *, float *, float *, int *,
float *, int *, float *, float *, int *);
int BLASFUNC(zsymm)(char *, char *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(xsymm)(char *, char *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(csymm3m)(char *, char *, int *, int *, float *, float *, int *,
float *, int *, float *, float *, int *);
int BLASFUNC(zsymm3m)(char *, char *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(xsymm3m)(char *, char *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(ssyrk)(char *, char *, int *, int *, float *, float *, int *,
float *, float *, int *);
int BLASFUNC(dsyrk)(char *, char *, int *, int *, double *, double *, int *,
double *, double *, int *);
int BLASFUNC(qsyrk)(char *, char *, int *, int *, double *, double *, int *,
double *, double *, int *);
int BLASFUNC(csyrk)(char *, char *, int *, int *, float *, float *, int *,
float *, float *, int *);
int BLASFUNC(zsyrk)(char *, char *, int *, int *, double *, double *, int *,
double *, double *, int *);
int BLASFUNC(xsyrk)(char *, char *, int *, int *, double *, double *, int *,
double *, double *, int *);
int BLASFUNC(ssyr2k)(char *, char *, int *, int *, float *, float *, int *,
float *, int *, float *, float *, int *);
int BLASFUNC(dsyr2k)(char *, char *, int *, int *, double *, double *, int *,
double*, int *, double *, double *, int *);
int BLASFUNC(qsyr2k)(char *, char *, int *, int *, double *, double *, int *,
double*, int *, double *, double *, int *);
int BLASFUNC(csyr2k)(char *, char *, int *, int *, float *, float *, int *,
float *, int *, float *, float *, int *);
int BLASFUNC(zsyr2k)(char *, char *, int *, int *, double *, double *, int *,
double*, int *, double *, double *, int *);
int BLASFUNC(xsyr2k)(char *, char *, int *, int *, double *, double *, int *,
double*, int *, double *, double *, int *);
int BLASFUNC(chemm)(char *, char *, int *, int *, float *, float *, int *,
float *, int *, float *, float *, int *);
int BLASFUNC(zhemm)(char *, char *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(xhemm)(char *, char *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(chemm3m)(char *, char *, int *, int *, float *, float *, int *,
float *, int *, float *, float *, int *);
int BLASFUNC(zhemm3m)(char *, char *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(xhemm3m)(char *, char *, int *, int *, double *, double *, int *,
double *, int *, double *, double *, int *);
int BLASFUNC(cherk)(char *, char *, int *, int *, float *, float *, int *,
float *, float *, int *);
int BLASFUNC(zherk)(char *, char *, int *, int *, double *, double *, int *,
double *, double *, int *);
int BLASFUNC(xherk)(char *, char *, int *, int *, double *, double *, int *,
double *, double *, int *);
int BLASFUNC(cher2k)(char *, char *, int *, int *, float *, float *, int *,
float *, int *, float *, float *, int *);
int BLASFUNC(zher2k)(char *, char *, int *, int *, double *, double *, int *,
double*, int *, double *, double *, int *);
int BLASFUNC(xher2k)(char *, char *, int *, int *, double *, double *, int *,
double*, int *, double *, double *, int *);
int BLASFUNC(cher2m)(char *, char *, char *, int *, int *, float *, float *, int *,
float *, int *, float *, float *, int *);
int BLASFUNC(zher2m)(char *, char *, char *, int *, int *, double *, double *, int *,
double*, int *, double *, double *, int *);
int BLASFUNC(xher2m)(char *, char *, char *, int *, int *, double *, double *, int *,
double*, int *, double *, double *, int *);
int BLASFUNC(sgemt)(char *, int *, int *, float *, float *, int *,
float *, int *);
int BLASFUNC(dgemt)(char *, int *, int *, double *, double *, int *,
double *, int *);
int BLASFUNC(cgemt)(char *, int *, int *, float *, float *, int *,
float *, int *);
int BLASFUNC(zgemt)(char *, int *, int *, double *, double *, int *,
double *, int *);
int BLASFUNC(sgema)(char *, char *, int *, int *, float *,
float *, int *, float *, float *, int *, float *, int *);
int BLASFUNC(dgema)(char *, char *, int *, int *, double *,
double *, int *, double*, double *, int *, double*, int *);
int BLASFUNC(cgema)(char *, char *, int *, int *, float *,
float *, int *, float *, float *, int *, float *, int *);
int BLASFUNC(zgema)(char *, char *, int *, int *, double *,
double *, int *, double*, double *, int *, double*, int *);
int BLASFUNC(sgems)(char *, char *, int *, int *, float *,
float *, int *, float *, float *, int *, float *, int *);
int BLASFUNC(dgems)(char *, char *, int *, int *, double *,
double *, int *, double*, double *, int *, double*, int *);
int BLASFUNC(cgems)(char *, char *, int *, int *, float *,
float *, int *, float *, float *, int *, float *, int *);
int BLASFUNC(zgems)(char *, char *, int *, int *, double *,
double *, int *, double*, double *, int *, double*, int *);
int BLASFUNC(sgetf2)(int *, int *, float *, int *, int *, int *);
int BLASFUNC(dgetf2)(int *, int *, double *, int *, int *, int *);
int BLASFUNC(qgetf2)(int *, int *, double *, int *, int *, int *);
int BLASFUNC(cgetf2)(int *, int *, float *, int *, int *, int *);
int BLASFUNC(zgetf2)(int *, int *, double *, int *, int *, int *);
int BLASFUNC(xgetf2)(int *, int *, double *, int *, int *, int *);
int BLASFUNC(sgetrf)(int *, int *, float *, int *, int *, int *);
int BLASFUNC(dgetrf)(int *, int *, double *, int *, int *, int *);
int BLASFUNC(qgetrf)(int *, int *, double *, int *, int *, int *);
int BLASFUNC(cgetrf)(int *, int *, float *, int *, int *, int *);
int BLASFUNC(zgetrf)(int *, int *, double *, int *, int *, int *);
int BLASFUNC(xgetrf)(int *, int *, double *, int *, int *, int *);
int BLASFUNC(slaswp)(int *, float *, int *, int *, int *, int *, int *);
int BLASFUNC(dlaswp)(int *, double *, int *, int *, int *, int *, int *);
int BLASFUNC(qlaswp)(int *, double *, int *, int *, int *, int *, int *);
int BLASFUNC(claswp)(int *, float *, int *, int *, int *, int *, int *);
int BLASFUNC(zlaswp)(int *, double *, int *, int *, int *, int *, int *);
int BLASFUNC(xlaswp)(int *, double *, int *, int *, int *, int *, int *);
int BLASFUNC(sgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *);
int BLASFUNC(dgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
int BLASFUNC(qgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
int BLASFUNC(cgetrs)(char *, int *, int *, float *, int *, int *, float *, int *, int *);
int BLASFUNC(zgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
int BLASFUNC(xgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
int BLASFUNC(sgesv)(int *, int *, float *, int *, int *, float *, int *, int *);
int BLASFUNC(dgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
int BLASFUNC(qgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
int BLASFUNC(cgesv)(int *, int *, float *, int *, int *, float *, int *, int *);
int BLASFUNC(zgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
int BLASFUNC(xgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
int BLASFUNC(spotf2)(char *, int *, float *, int *, int *);
int BLASFUNC(dpotf2)(char *, int *, double *, int *, int *);
int BLASFUNC(qpotf2)(char *, int *, double *, int *, int *);
int BLASFUNC(cpotf2)(char *, int *, float *, int *, int *);
int BLASFUNC(zpotf2)(char *, int *, double *, int *, int *);
int BLASFUNC(xpotf2)(char *, int *, double *, int *, int *);
int BLASFUNC(spotrf)(char *, int *, float *, int *, int *);
int BLASFUNC(dpotrf)(char *, int *, double *, int *, int *);
int BLASFUNC(qpotrf)(char *, int *, double *, int *, int *);
int BLASFUNC(cpotrf)(char *, int *, float *, int *, int *);
int BLASFUNC(zpotrf)(char *, int *, double *, int *, int *);
int BLASFUNC(xpotrf)(char *, int *, double *, int *, int *);
int BLASFUNC(slauu2)(char *, int *, float *, int *, int *);
int BLASFUNC(dlauu2)(char *, int *, double *, int *, int *);
int BLASFUNC(qlauu2)(char *, int *, double *, int *, int *);
int BLASFUNC(clauu2)(char *, int *, float *, int *, int *);
int BLASFUNC(zlauu2)(char *, int *, double *, int *, int *);
int BLASFUNC(xlauu2)(char *, int *, double *, int *, int *);
int BLASFUNC(slauum)(char *, int *, float *, int *, int *);
int BLASFUNC(dlauum)(char *, int *, double *, int *, int *);
int BLASFUNC(qlauum)(char *, int *, double *, int *, int *);
int BLASFUNC(clauum)(char *, int *, float *, int *, int *);
int BLASFUNC(zlauum)(char *, int *, double *, int *, int *);
int BLASFUNC(xlauum)(char *, int *, double *, int *, int *);
int BLASFUNC(strti2)(char *, char *, int *, float *, int *, int *);
int BLASFUNC(dtrti2)(char *, char *, int *, double *, int *, int *);
int BLASFUNC(qtrti2)(char *, char *, int *, double *, int *, int *);
int BLASFUNC(ctrti2)(char *, char *, int *, float *, int *, int *);
int BLASFUNC(ztrti2)(char *, char *, int *, double *, int *, int *);
int BLASFUNC(xtrti2)(char *, char *, int *, double *, int *, int *);
int BLASFUNC(strtri)(char *, char *, int *, float *, int *, int *);
int BLASFUNC(dtrtri)(char *, char *, int *, double *, int *, int *);
int BLASFUNC(qtrtri)(char *, char *, int *, double *, int *, int *);
int BLASFUNC(ctrtri)(char *, char *, int *, float *, int *, int *);
int BLASFUNC(ztrtri)(char *, char *, int *, double *, int *, int *);
int BLASFUNC(xtrtri)(char *, char *, int *, double *, int *, int *);
int BLASFUNC(spotri)(char *, int *, float *, int *, int *);
int BLASFUNC(dpotri)(char *, int *, double *, int *, int *);
int BLASFUNC(qpotri)(char *, int *, double *, int *, int *);
int BLASFUNC(cpotri)(char *, int *, float *, int *, int *);
int BLASFUNC(zpotri)(char *, int *, double *, int *, int *);
int BLASFUNC(xpotri)(char *, int *, double *, int *, int *);
#endif

View File

@@ -0,0 +1,83 @@
//=====================================================
// File : blas_interface.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:28 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef blas_PRODUIT_MATRICE_VECTEUR_HH
#define blas_PRODUIT_MATRICE_VECTEUR_HH
#include <c_interface_base.h>
#include <complex>
extern "C"
{
#include "blas.h"
// Cholesky Factorization
// void spotrf_(const char* uplo, const int* n, float *a, const int* ld, int* info);
// void dpotrf_(const char* uplo, const int* n, double *a, const int* ld, int* info);
void ssytrd_(char *uplo, const int *n, float *a, const int *lda, float *d, float *e, float *tau, float *work, int *lwork, int *info );
void dsytrd_(char *uplo, const int *n, double *a, const int *lda, double *d, double *e, double *tau, double *work, int *lwork, int *info );
void sgehrd_( const int *n, int *ilo, int *ihi, float *a, const int *lda, float *tau, float *work, int *lwork, int *info );
void dgehrd_( const int *n, int *ilo, int *ihi, double *a, const int *lda, double *tau, double *work, int *lwork, int *info );
// LU row pivoting
// void dgetrf_( int *m, int *n, double *a, int *lda, int *ipiv, int *info );
// void sgetrf_(const int* m, const int* n, float *a, const int* ld, int* ipivot, int* info);
// LU full pivoting
void sgetc2_(const int* n, float *a, const int *lda, int *ipiv, int *jpiv, int*info );
void dgetc2_(const int* n, double *a, const int *lda, int *ipiv, int *jpiv, int*info );
#ifdef HAS_LAPACK
#endif
}
#define MAKE_STRING2(S) #S
#define MAKE_STRING(S) MAKE_STRING2(S)
#define CAT2(A,B) A##B
#define CAT(A,B) CAT2(A,B)
template<class real> class blas_interface;
static char notrans = 'N';
static char trans = 'T';
static char nonunit = 'N';
static char lower = 'L';
static char right = 'R';
static char left = 'L';
static int intone = 1;
#define SCALAR float
#define SCALAR_PREFIX s
#include "blas_interface_impl.hh"
#undef SCALAR
#undef SCALAR_PREFIX
#define SCALAR double
#define SCALAR_PREFIX d
#include "blas_interface_impl.hh"
#undef SCALAR
#undef SCALAR_PREFIX
#endif

View File

@@ -0,0 +1,147 @@
#define BLAS_FUNC(NAME) CAT(CAT(SCALAR_PREFIX,NAME),_)
template<> class blas_interface<SCALAR> : public c_interface_base<SCALAR>
{
public :
static SCALAR fone;
static SCALAR fzero;
static inline std::string name()
{
return MAKE_STRING(CBLASNAME);
}
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
BLAS_FUNC(gemv)(&notrans,&N,&N,&fone,A,&N,B,&intone,&fzero,X,&intone);
}
static inline void symv(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
BLAS_FUNC(symv)(&lower, &N,&fone,A,&N,B,&intone,&fzero,X,&intone);
}
static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
BLAS_FUNC(syr2)(&lower,&N,&fone,B,&intone,X,&intone,A,&N);
}
static inline void ger(gene_matrix & A, gene_vector & X, gene_vector & Y, int N){
BLAS_FUNC(ger)(&N,&N,&fone,X,&intone,Y,&intone,A,&N);
}
static inline void rot(gene_vector & A, gene_vector & B, SCALAR c, SCALAR s, int N){
BLAS_FUNC(rot)(&N,A,&intone,B,&intone,&c,&s);
}
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
BLAS_FUNC(gemv)(&trans,&N,&N,&fone,A,&N,B,&intone,&fzero,X,&intone);
}
static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
BLAS_FUNC(gemm)(&notrans,&notrans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N);
}
static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
BLAS_FUNC(gemm)(&notrans,&notrans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N);
}
// static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
// ssyrk_(&lower,&trans,&N,&N,&fone,A,&N,&fzero,X,&N);
// }
static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){
BLAS_FUNC(syrk)(&lower,&notrans,&N,&N,&fone,A,&N,&fzero,X,&N);
}
static inline void axpy(SCALAR coef, const gene_vector & X, gene_vector & Y, int N){
BLAS_FUNC(axpy)(&N,&coef,X,&intone,Y,&intone);
}
static inline void axpby(SCALAR a, const gene_vector & X, SCALAR b, gene_vector & Y, int N){
BLAS_FUNC(scal)(&N,&b,Y,&intone);
BLAS_FUNC(axpy)(&N,&a,X,&intone,Y,&intone);
}
static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
int N2 = N*N;
BLAS_FUNC(copy)(&N2, X, &intone, C, &intone);
char uplo = 'L';
int info = 0;
BLAS_FUNC(potrf)(&uplo, &N, C, &N, &info);
if(info!=0) std::cerr << "potrf_ error " << info << "\n";
}
static inline void partial_lu_decomp(const gene_matrix & X, gene_matrix & C, int N){
int N2 = N*N;
BLAS_FUNC(copy)(&N2, X, &intone, C, &intone);
int info = 0;
int * ipiv = (int*)alloca(sizeof(int)*N);
BLAS_FUNC(getrf)(&N, &N, C, &N, ipiv, &info);
if(info!=0) std::cerr << "getrf_ error " << info << "\n";
}
static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
BLAS_FUNC(copy)(&N, B, &intone, X, &intone);
BLAS_FUNC(trsv)(&lower, &notrans, &nonunit, &N, L, &N, X, &intone);
}
static inline void trisolve_lower_matrix(const gene_matrix & L, const gene_matrix& B, gene_matrix & X, int N){
BLAS_FUNC(copy)(&N, B, &intone, X, &intone);
BLAS_FUNC(trsm)(&right, &lower, &notrans, &nonunit, &N, &N, &fone, L, &N, X, &N);
}
static inline void trmm(gene_matrix & A, gene_matrix & B, gene_matrix & /*X*/, int N){
BLAS_FUNC(trmm)(&left, &lower, &notrans,&nonunit, &N,&N,&fone,A,&N,B,&N);
}
#ifdef HAS_LAPACK
static inline void lu_decomp(const gene_matrix & X, gene_matrix & C, int N){
int N2 = N*N;
BLAS_FUNC(copy)(&N2, X, &intone, C, &intone);
int info = 0;
int * ipiv = (int*)alloca(sizeof(int)*N);
int * jpiv = (int*)alloca(sizeof(int)*N);
BLAS_FUNC(getc2)(&N, C, &N, ipiv, jpiv, &info);
}
static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){
{
int N2 = N*N;
int inc = 1;
BLAS_FUNC(copy)(&N2, X, &inc, C, &inc);
}
int info = 0;
int ilo = 1;
int ihi = N;
int bsize = 64;
int worksize = N*bsize;
SCALAR* d = new SCALAR[N+worksize];
BLAS_FUNC(gehrd)(&N, &ilo, &ihi, C, &N, d, d+N, &worksize, &info);
delete[] d;
}
static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){
{
int N2 = N*N;
int inc = 1;
BLAS_FUNC(copy)(&N2, X, &inc, C, &inc);
}
char uplo = 'U';
int info = 0;
int bsize = 64;
int worksize = N*bsize;
SCALAR* d = new SCALAR[3*N+worksize];
BLAS_FUNC(sytrd)(&uplo, &N, C, &N, d, d+N, d+2*N, d+3*N, &worksize, &info);
delete[] d;
}
#endif // HAS_LAPACK
};
SCALAR blas_interface<SCALAR>::fone = SCALAR(1);
SCALAR blas_interface<SCALAR>::fzero = SCALAR(0);

View File

@@ -0,0 +1,73 @@
#ifndef BTL_C_INTERFACE_BASE_H
#define BTL_C_INTERFACE_BASE_H
#include "utilities.h"
#include <vector>
template<class real> class c_interface_base
{
public:
typedef real real_type;
typedef std::vector<real> stl_vector;
typedef std::vector<stl_vector > stl_matrix;
typedef real* gene_matrix;
typedef real* gene_vector;
static void free_matrix(gene_matrix & A, int /*N*/){
delete[] A;
}
static void free_vector(gene_vector & B){
delete[] B;
}
static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
int N = A_stl.size();
A = new real[N*N];
for (int j=0;j<N;j++)
for (int i=0;i<N;i++)
A[i+N*j] = A_stl[j][i];
}
static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
int N = B_stl.size();
B = new real[N];
for (int i=0;i<N;i++)
B[i] = B_stl[i];
}
static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
int N = B_stl.size();
for (int i=0;i<N;i++)
B_stl[i] = B[i];
}
static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
int N = A_stl.size();
for (int j=0;j<N;j++){
A_stl[j].resize(N);
for (int i=0;i<N;i++)
A_stl[j][i] = A[i+N*j];
}
}
static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
for (int i=0;i<N;i++)
cible[i]=source[i];
}
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
for (int j=0;j<N;j++){
for (int i=0;i<N;i++){
cible[i+N*j] = source[i+N*j];
}
}
}
};
#endif

View File

@@ -0,0 +1,73 @@
//=====================================================
// File : main.cpp
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:28 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#include "utilities.h"
#include "blas_interface.hh"
#include "bench.hh"
#include "basic_actions.hh"
#include "action_cholesky.hh"
#include "action_lu_decomp.hh"
#include "action_partial_lu.hh"
#include "action_trisolve_matrix.hh"
#ifdef HAS_LAPACK
#include "action_hessenberg.hh"
#endif
BTL_MAIN;
int main()
{
bench<Action_axpy<blas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_axpby<blas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_matrix_vector_product<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_atv_product<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_symv<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_syr2<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_ger<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_rot<blas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_matrix_matrix_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_ata_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_aat_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_trisolve<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_trisolve_matrix<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_trmm<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_cholesky<blas_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
bench<Action_partial_lu<blas_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
#ifdef HAS_LAPACK
// bench<Action_lu_decomp<blas_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
bench<Action_hessenberg<blas_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
bench<Action_tridiagonalization<blas_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
#endif
//bench<Action_lu_solve<blas_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
return 0;
}

View File

@@ -0,0 +1,2 @@
btl_add_bench(btl_STL main.cpp OFF)

View File

@@ -0,0 +1,244 @@
//=====================================================
// File : STL_interface.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:24 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef STL_INTERFACE_HH
#define STL_INTERFACE_HH
#include <string>
#include <vector>
#include "utilities.h"
using namespace std;
template<class real>
class STL_interface{
public :
typedef real real_type ;
typedef std::vector<real> stl_vector;
typedef std::vector<stl_vector > stl_matrix;
typedef stl_matrix gene_matrix;
typedef stl_vector gene_vector;
static inline std::string name( void )
{
return "STL";
}
static void free_matrix(gene_matrix & /*A*/, int /*N*/){}
static void free_vector(gene_vector & /*B*/){}
static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
A = A_stl;
}
static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
B = B_stl;
}
static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
B_stl = B ;
}
static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
A_stl = A ;
}
static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
for (int i=0;i<N;i++){
cible[i]=source[i];
}
}
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
for (int i=0;i<N;i++)
for (int j=0;j<N;j++)
cible[i][j]=source[i][j];
}
// static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N)
// {
// real somme;
// for (int j=0;j<N;j++){
// for (int i=0;i<N;i++){
// somme=0.0;
// for (int k=0;k<N;k++)
// somme += A[i][k]*A[j][k];
// X[j][i]=somme;
// }
// }
// }
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N)
{
real somme;
for (int j=0;j<N;j++){
for (int i=0;i<N;i++){
somme=0.0;
if(i>=j)
{
for (int k=0;k<N;k++){
somme+=A[k][i]*A[k][j];
}
X[j][i]=somme;
}
}
}
}
static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N)
{
real somme;
for (int j=0;j<N;j++){
for (int i=0;i<N;i++){
somme=0.0;
for (int k=0;k<N;k++)
somme+=A[k][i]*B[j][k];
X[j][i]=somme;
}
}
}
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
{
real somme;
for (int i=0;i<N;i++){
somme=0.0;
for (int j=0;j<N;j++)
somme+=A[j][i]*B[j];
X[i]=somme;
}
}
static inline void symv(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
{
for (int j=0; j<N; ++j)
X[j] = 0;
for (int j=0; j<N; ++j)
{
real t1 = B[j];
real t2 = 0;
X[j] += t1 * A[j][j];
for (int i=j+1; i<N; ++i) {
X[i] += t1 * A[j][i];
t2 += A[j][i] * B[i];
}
X[j] += t2;
}
}
static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
{
for (int j=0; j<N; ++j)
{
for (int i=j; i<N; ++i)
A[j][i] += B[i]*X[j] + B[j]*X[i];
}
}
static inline void ger(gene_matrix & A, gene_vector & X, gene_vector & Y, int N)
{
for (int j=0; j<N; ++j)
{
for (int i=j; i<N; ++i)
A[j][i] += X[i]*Y[j];
}
}
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
{
real somme;
for (int i=0;i<N;i++){
somme = 0.0;
for (int j=0;j<N;j++)
somme += A[i][j]*B[j];
X[i] = somme;
}
}
static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N){
for (int i=0;i<N;i++)
Y[i]+=coef*X[i];
}
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
for (int i=0;i<N;i++)
Y[i] = a*X[i] + b*Y[i];
}
static inline void trisolve_lower(const gene_matrix & L, const gene_vector & B, gene_vector & X, int N){
copy_vector(B,X,N);
for(int i=0; i<N; ++i)
{
X[i] /= L[i][i];
real tmp = X[i];
for (int j=i+1; j<N; ++j)
X[j] -= tmp * L[i][j];
}
}
static inline real norm_diff(const stl_vector & A, const stl_vector & B)
{
int N=A.size();
real somme=0.0;
real somme2=0.0;
for (int i=0;i<N;i++){
real diff=A[i]-B[i];
somme+=diff*diff;
somme2+=A[i]*A[i];
}
return somme/somme2;
}
static inline real norm_diff(const stl_matrix & A, const stl_matrix & B)
{
int N=A[0].size();
real somme=0.0;
real somme2=0.0;
for (int i=0;i<N;i++){
for (int j=0;j<N;j++){
real diff=A[i][j] - B[i][j];
somme += diff*diff;
somme2 += A[i][j]*A[i][j];
}
}
return somme/somme2;
}
static inline void display_vector(const stl_vector & A)
{
int N=A.size();
for (int i=0;i<N;i++){
INFOS("A["<<i<<"]="<<A[i]<<endl);
}
}
};
#endif

View File

@@ -0,0 +1,42 @@
//=====================================================
// File : main.cpp
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:23 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#include "utilities.h"
#include "STL_interface.hh"
#include "bench.hh"
#include "basic_actions.hh"
BTL_MAIN;
int main()
{
bench<Action_axpy<STL_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_axpby<STL_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_matrix_vector_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_atv_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_symv<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_syr2<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_matrix_matrix_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_ata_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_aat_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
return 0;
}

View File

@@ -0,0 +1,13 @@
find_package(BLAZE)
find_package(Boost COMPONENTS system)
if (BLAZE_FOUND AND Boost_FOUND)
include_directories(${BLAZE_INCLUDE_DIR} ${Boost_INCLUDE_DIRS})
btl_add_bench(btl_blaze main.cpp)
# Note: The newest blaze version requires C++14.
# Ideally, we should set this depending on the version of Blaze we found
set_property(TARGET btl_blaze PROPERTY CXX_STANDARD 14)
if(BUILD_btl_blaze)
target_link_libraries(btl_blaze ${Boost_LIBRARIES})
endif()
endif ()

View File

@@ -0,0 +1,140 @@
//=====================================================
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef BLAZE_INTERFACE_HH
#define BLAZE_INTERFACE_HH
#include <blaze/Math.h>
#include <blaze/Blaze.h>
// using namespace blaze;
#include <vector>
template<class real>
class blaze_interface {
public :
typedef real real_type ;
typedef std::vector<real> stl_vector;
typedef std::vector<stl_vector > stl_matrix;
typedef blaze::DynamicMatrix<real,blaze::columnMajor> gene_matrix;
typedef blaze::DynamicVector<real> gene_vector;
static inline std::string name() { return "blaze"; }
static void free_matrix(gene_matrix & A, int N){
return ;
}
static void free_vector(gene_vector & B){
return ;
}
static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
A.resize(A_stl[0].size(), A_stl.size());
for (int j=0; j<A_stl.size() ; j++){
for (int i=0; i<A_stl[j].size() ; i++){
A(i,j) = A_stl[j][i];
}
}
}
static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
B.resize(B_stl.size());
for (int i=0; i<B_stl.size() ; i++){
B[i] = B_stl[i];
}
}
static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
for (int i=0; i<B_stl.size() ; i++){
B_stl[i] = B[i];
}
}
static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
int N=A_stl.size();
for (int j=0;j<N;j++){
A_stl[j].resize(N);
for (int i=0;i<N;i++){
A_stl[j][i] = A(i,j);
}
}
}
static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
X = (A*B);
}
static inline void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
X = (trans(A)*trans(B));
}
static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N){
X = (trans(A)*A);
}
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N){
X = (A*trans(A));
}
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
X = (A*B);
}
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
X = (trans(A)*B);
}
static inline void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){
Y += coef * X;
}
static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
Y = a*X + b*Y;
}
// static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
// C = X;
// recursive_cholesky(C);
// }
// static inline void lu_decomp(const gene_matrix & X, gene_matrix & R, int N){
// R = X;
// std::vector<int> ipvt(N);
// lu_factor(R, ipvt);
// }
// static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
// X = lower_trisolve(L, B);
// }
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
cible = source;
}
static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
cible = source;
}
};
#endif

View File

@@ -0,0 +1,40 @@
//=====================================================
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#include "utilities.h"
#include "blaze_interface.hh"
#include "bench.hh"
#include "basic_actions.hh"
BTL_MAIN;
int main()
{
bench<Action_axpy<blaze_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_axpby<blaze_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_matrix_vector_product<blaze_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_atv_product<blaze_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
// bench<Action_matrix_matrix_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_ata_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_aat_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
return 0;
}

View File

@@ -0,0 +1,17 @@
find_package(Blitz)
if (BLITZ_FOUND)
include_directories(${BLITZ_INCLUDES})
btl_add_bench(btl_blitz btl_blitz.cpp)
if (BUILD_btl_blitz)
target_link_libraries(btl_blitz ${BLITZ_LIBRARIES})
endif (BUILD_btl_blitz)
btl_add_bench(btl_tiny_blitz btl_tiny_blitz.cpp OFF)
if (BUILD_btl_tiny_blitz)
target_link_libraries(btl_tiny_blitz ${BLITZ_LIBRARIES})
endif (BUILD_btl_tiny_blitz)
endif (BLITZ_FOUND)

View File

@@ -0,0 +1,192 @@
//=====================================================
// File : blitz_LU_solve_interface.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:31 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef BLITZ_LU_SOLVE_INTERFACE_HH
#define BLITZ_LU_SOLVE_INTERFACE_HH
#include "blitz/array.h"
#include <vector>
BZ_USING_NAMESPACE(blitz)
template<class real>
class blitz_LU_solve_interface : public blitz_interface<real>
{
public :
typedef typename blitz_interface<real>::gene_matrix gene_matrix;
typedef typename blitz_interface<real>::gene_vector gene_vector;
typedef blitz::Array<int,1> Pivot_Vector;
inline static void new_Pivot_Vector(Pivot_Vector & pivot,int N)
{
pivot.resize(N);
}
inline static void free_Pivot_Vector(Pivot_Vector & pivot)
{
return;
}
static inline real matrix_vector_product_sliced(const gene_matrix & A, gene_vector B, int row, int col_start, int col_end)
{
real somme=0.;
for (int j=col_start ; j<col_end+1 ; j++){
somme+=A(row,j)*B(j);
}
return somme;
}
static inline real matrix_matrix_product_sliced(gene_matrix & A, int row, int col_start, int col_end, gene_matrix & B, int row_shift, int col )
{
real somme=0.;
for (int j=col_start ; j<col_end+1 ; j++){
somme+=A(row,j)*B(j+row_shift,col);
}
return somme;
}
inline static void LU_factor(gene_matrix & LU, Pivot_Vector & pivot, int N)
{
ASSERT( LU.rows()==LU.cols() ) ;
int index_max = 0 ;
real big = 0. ;
real theSum = 0. ;
real dum = 0. ;
// Get the implicit scaling information :
gene_vector ImplicitScaling( N ) ;
for( int i=0; i<N; i++ ) {
big = 0. ;
for( int j=0; j<N; j++ ) {
if( abs( LU( i, j ) )>=big ) big = abs( LU( i, j ) ) ;
}
if( big==0. ) {
INFOS( "blitz_LU_factor::Singular matrix" ) ;
exit( 0 ) ;
}
ImplicitScaling( i ) = 1./big ;
}
// Loop over columns of Crout's method :
for( int j=0; j<N; j++ ) {
for( int i=0; i<j; i++ ) {
theSum = LU( i, j ) ;
theSum -= matrix_matrix_product_sliced(LU, i, 0, i-1, LU, 0, j) ;
// theSum -= sum( LU( i, Range( fromStart, i-1 ) )*LU( Range( fromStart, i-1 ), j ) ) ;
LU( i, j ) = theSum ;
}
// Search for the largest pivot element :
big = 0. ;
for( int i=j; i<N; i++ ) {
theSum = LU( i, j ) ;
theSum -= matrix_matrix_product_sliced(LU, i, 0, j-1, LU, 0, j) ;
// theSum -= sum( LU( i, Range( fromStart, j-1 ) )*LU( Range( fromStart, j-1 ), j ) ) ;
LU( i, j ) = theSum ;
if( (ImplicitScaling( i )*abs( theSum ))>=big ) {
dum = ImplicitScaling( i )*abs( theSum ) ;
big = dum ;
index_max = i ;
}
}
// Interchanging rows and the scale factor :
if( j!=index_max ) {
for( int k=0; k<N; k++ ) {
dum = LU( index_max, k ) ;
LU( index_max, k ) = LU( j, k ) ;
LU( j, k ) = dum ;
}
ImplicitScaling( index_max ) = ImplicitScaling( j ) ;
}
pivot( j ) = index_max ;
if ( LU( j, j )==0. ) LU( j, j ) = 1.e-20 ;
// Divide by the pivot element :
if( j<N ) {
dum = 1./LU( j, j ) ;
for( int i=j+1; i<N; i++ ) LU( i, j ) *= dum ;
}
}
}
inline static void LU_solve(const gene_matrix & LU, const Pivot_Vector pivot, gene_vector &B, gene_vector X, int N)
{
// Pour conserver le meme header, on travaille sur X, copie du second-membre B
X = B.copy() ;
ASSERT( LU.rows()==LU.cols() ) ;
firstIndex indI ;
// Forward substitution :
int ii = 0 ;
real theSum = 0. ;
for( int i=0; i<N; i++ ) {
int ip = pivot( i ) ;
theSum = X( ip ) ;
// theSum = B( ip ) ;
X( ip ) = X( i ) ;
// B( ip ) = B( i ) ;
if( ii ) {
theSum -= matrix_vector_product_sliced(LU, X, i, ii-1, i-1) ;
// theSum -= sum( LU( i, Range( ii-1, i-1 ) )*X( Range( ii-1, i-1 ) ) ) ;
// theSum -= sum( LU( i, Range( ii-1, i-1 ) )*B( Range( ii-1, i-1 ) ) ) ;
} else if( theSum ) {
ii = i+1 ;
}
X( i ) = theSum ;
// B( i ) = theSum ;
}
// Backsubstitution :
for( int i=N-1; i>=0; i-- ) {
theSum = X( i ) ;
// theSum = B( i ) ;
theSum -= matrix_vector_product_sliced(LU, X, i, i+1, N) ;
// theSum -= sum( LU( i, Range( i+1, toEnd ) )*X( Range( i+1, toEnd ) ) ) ;
// theSum -= sum( LU( i, Range( i+1, toEnd ) )*B( Range( i+1, toEnd ) ) ) ;
// Store a component of the solution vector :
X( i ) = theSum/LU( i, i ) ;
// B( i ) = theSum/LU( i, i ) ;
}
}
};
#endif

View File

@@ -0,0 +1,147 @@
//=====================================================
// File : blitz_interface.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:30 CEST 2002
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef BLITZ_INTERFACE_HH
#define BLITZ_INTERFACE_HH
#include <blitz/blitz.h>
#include <blitz/array.h>
#include <blitz/vector-et.h>
#include <blitz/vecwhere.h>
#include <blitz/matrix.h>
#include <vector>
BZ_USING_NAMESPACE(blitz)
template<class real>
class blitz_interface{
public :
typedef real real_type ;
typedef std::vector<real> stl_vector;
typedef std::vector<stl_vector > stl_matrix;
typedef blitz::Array<real, 2> gene_matrix;
typedef blitz::Array<real, 1> gene_vector;
// typedef blitz::Matrix<real, blitz::ColumnMajor> gene_matrix;
// typedef blitz::Vector<real> gene_vector;
static inline std::string name() { return "blitz"; }
static void free_matrix(gene_matrix & A, int N){}
static void free_vector(gene_vector & B){}
static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
A.resize(A_stl[0].size(),A_stl.size());
for (int j=0; j<A_stl.size() ; j++){
for (int i=0; i<A_stl[j].size() ; i++){
A(i,j)=A_stl[j][i];
}
}
}
static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
B.resize(B_stl.size());
for (int i=0; i<B_stl.size() ; i++){
B(i)=B_stl[i];
}
}
static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
for (int i=0; i<B_stl.size() ; i++){
B_stl[i]=B(i);
}
}
static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
int N=A_stl.size();
for (int j=0;j<N;j++){
A_stl[j].resize(N);
for (int i=0;i<N;i++)
A_stl[j][i] = A(i,j);
}
}
static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N)
{
firstIndex i;
secondIndex j;
thirdIndex k;
X = sum(A(i,k) * B(k,j), k);
}
static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N)
{
firstIndex i;
secondIndex j;
thirdIndex k;
X = sum(A(k,i) * A(k,j), k);
}
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N)
{
firstIndex i;
secondIndex j;
thirdIndex k;
X = sum(A(i,k) * A(j,k), k);
}
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
{
firstIndex i;
secondIndex j;
X = sum(A(i,j)*B(j),j);
}
static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
{
firstIndex i;
secondIndex j;
X = sum(A(j,i) * B(j),j);
}
static inline void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N)
{
firstIndex i;
Y = Y(i) + coef * X(i);
//Y += coef * X;
}
static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
cible = source;
//cible.template operator=<gene_matrix>(source);
// for (int i=0;i<N;i++){
// for (int j=0;j<N;j++){
// cible(i,j)=source(i,j);
// }
// }
}
static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
//cible.template operator=<gene_vector>(source);
cible = source;
}
};
#endif

View File

@@ -0,0 +1,51 @@
//=====================================================
// File : main.cpp
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:30 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#include "utilities.h"
#include "blitz_interface.hh"
#include "blitz_LU_solve_interface.hh"
#include "bench.hh"
#include "action_matrix_vector_product.hh"
#include "action_matrix_matrix_product.hh"
#include "action_axpy.hh"
#include "action_lu_solve.hh"
#include "action_ata_product.hh"
#include "action_aat_product.hh"
#include "action_atv_product.hh"
BTL_MAIN;
int main()
{
bench<Action_matrix_vector_product<blitz_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_atv_product<blitz_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_matrix_matrix_product<blitz_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_ata_product<blitz_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_aat_product<blitz_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_axpy<blitz_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
//bench<Action_lu_solve<blitz_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
return 0;
}

View File

@@ -0,0 +1,38 @@
//=====================================================
// File : main.cpp
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:30 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#include "utilities.h"
#include "tiny_blitz_interface.hh"
#include "static/bench_static.hh"
#include "action_matrix_vector_product.hh"
#include "action_matrix_matrix_product.hh"
#include "action_axpy.hh"
BTL_MAIN;
int main()
{
bench_static<Action_axpy,tiny_blitz_interface>();
bench_static<Action_matrix_matrix_product,tiny_blitz_interface>();
bench_static<Action_matrix_vector_product,tiny_blitz_interface>();
return 0;
}

Some files were not shown because too many files have changed in this diff Show More