Disabled external gits
This commit is contained in:
		
							
								
								
									
										149
									
								
								cs440-acg/ext/eigen/bench/BenchSparseUtil.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										149
									
								
								cs440-acg/ext/eigen/bench/BenchSparseUtil.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,149 @@
 | 
			
		||||
 | 
			
		||||
#include <Eigen/Sparse>
 | 
			
		||||
#include <bench/BenchTimer.h>
 | 
			
		||||
#include <set>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Eigen;
 | 
			
		||||
using namespace Eigen;
 | 
			
		||||
 | 
			
		||||
#ifndef SIZE
 | 
			
		||||
#define SIZE 1024
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef DENSITY
 | 
			
		||||
#define DENSITY 0.01
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef SCALAR
 | 
			
		||||
#define SCALAR double
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
typedef SCALAR Scalar;
 | 
			
		||||
typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
 | 
			
		||||
typedef Matrix<Scalar,Dynamic,1> DenseVector;
 | 
			
		||||
typedef SparseMatrix<Scalar> EigenSparseMatrix;
 | 
			
		||||
 | 
			
		||||
void fillMatrix(float density, int rows, int cols,  EigenSparseMatrix& dst)
 | 
			
		||||
{
 | 
			
		||||
  dst.reserve(double(rows)*cols*density);
 | 
			
		||||
  for(int j = 0; j < cols; j++)
 | 
			
		||||
  {
 | 
			
		||||
    for(int i = 0; i < rows; i++)
 | 
			
		||||
    {
 | 
			
		||||
      Scalar v = (internal::random<float>(0,1) < density) ? internal::random<Scalar>() : 0;
 | 
			
		||||
      if (v!=0)
 | 
			
		||||
        dst.insert(i,j) = v;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  dst.finalize();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void fillMatrix2(int nnzPerCol, int rows, int cols,  EigenSparseMatrix& dst)
 | 
			
		||||
{
 | 
			
		||||
//   std::cout << "alloc " << nnzPerCol*cols << "\n";
 | 
			
		||||
  dst.reserve(nnzPerCol*cols);
 | 
			
		||||
  for(int j = 0; j < cols; j++)
 | 
			
		||||
  {
 | 
			
		||||
    std::set<int> aux;
 | 
			
		||||
    for(int i = 0; i < nnzPerCol; i++)
 | 
			
		||||
    {
 | 
			
		||||
      int k = internal::random<int>(0,rows-1);
 | 
			
		||||
      while (aux.find(k)!=aux.end())
 | 
			
		||||
        k = internal::random<int>(0,rows-1);
 | 
			
		||||
      aux.insert(k);
 | 
			
		||||
 | 
			
		||||
      dst.insert(k,j) = internal::random<Scalar>();
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  dst.finalize();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void eiToDense(const EigenSparseMatrix& src, DenseMatrix& dst)
 | 
			
		||||
{
 | 
			
		||||
  dst.setZero();
 | 
			
		||||
  for (int j=0; j<src.cols(); ++j)
 | 
			
		||||
    for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
 | 
			
		||||
      dst(it.index(),j) = it.value();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifndef NOGMM
 | 
			
		||||
#include "gmm/gmm.h"
 | 
			
		||||
typedef gmm::csc_matrix<Scalar> GmmSparse;
 | 
			
		||||
typedef gmm::col_matrix< gmm::wsvector<Scalar> > GmmDynSparse;
 | 
			
		||||
void eiToGmm(const EigenSparseMatrix& src, GmmSparse& dst)
 | 
			
		||||
{
 | 
			
		||||
  GmmDynSparse tmp(src.rows(), src.cols());
 | 
			
		||||
  for (int j=0; j<src.cols(); ++j)
 | 
			
		||||
    for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
 | 
			
		||||
      tmp(it.index(),j) = it.value();
 | 
			
		||||
  gmm::copy(tmp, dst);
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef NOMTL
 | 
			
		||||
#include <boost/numeric/mtl/mtl.hpp>
 | 
			
		||||
typedef mtl::compressed2D<Scalar, mtl::matrix::parameters<mtl::tag::col_major> > MtlSparse;
 | 
			
		||||
typedef mtl::compressed2D<Scalar, mtl::matrix::parameters<mtl::tag::row_major> > MtlSparseRowMajor;
 | 
			
		||||
void eiToMtl(const EigenSparseMatrix& src, MtlSparse& dst)
 | 
			
		||||
{
 | 
			
		||||
  mtl::matrix::inserter<MtlSparse> ins(dst);
 | 
			
		||||
  for (int j=0; j<src.cols(); ++j)
 | 
			
		||||
    for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
 | 
			
		||||
      ins[it.index()][j] = it.value();
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef CSPARSE
 | 
			
		||||
extern "C" {
 | 
			
		||||
#include "cs.h"
 | 
			
		||||
}
 | 
			
		||||
void eiToCSparse(const EigenSparseMatrix& src, cs* &dst)
 | 
			
		||||
{
 | 
			
		||||
  cs* aux = cs_spalloc (0, 0, 1, 1, 1);
 | 
			
		||||
  for (int j=0; j<src.cols(); ++j)
 | 
			
		||||
    for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
 | 
			
		||||
      if (!cs_entry(aux, it.index(), j, it.value()))
 | 
			
		||||
      {
 | 
			
		||||
        std::cout << "cs_entry error\n";
 | 
			
		||||
        exit(2);
 | 
			
		||||
      }
 | 
			
		||||
   dst = cs_compress(aux);
 | 
			
		||||
//    cs_spfree(aux);
 | 
			
		||||
}
 | 
			
		||||
#endif // CSPARSE
 | 
			
		||||
 | 
			
		||||
#ifndef NOUBLAS
 | 
			
		||||
#include <boost/numeric/ublas/vector.hpp>
 | 
			
		||||
#include <boost/numeric/ublas/matrix.hpp>
 | 
			
		||||
#include <boost/numeric/ublas/io.hpp>
 | 
			
		||||
#include <boost/numeric/ublas/triangular.hpp>
 | 
			
		||||
#include <boost/numeric/ublas/vector_sparse.hpp>
 | 
			
		||||
#include <boost/numeric/ublas/matrix_sparse.hpp>
 | 
			
		||||
#include <boost/numeric/ublas/vector_of_vector.hpp>
 | 
			
		||||
#include <boost/numeric/ublas/operation.hpp>
 | 
			
		||||
 | 
			
		||||
typedef boost::numeric::ublas::compressed_matrix<Scalar,boost::numeric::ublas::column_major> UBlasSparse;
 | 
			
		||||
 | 
			
		||||
void eiToUblas(const EigenSparseMatrix& src, UBlasSparse& dst)
 | 
			
		||||
{
 | 
			
		||||
  dst.resize(src.rows(), src.cols(), false);
 | 
			
		||||
  for (int j=0; j<src.cols(); ++j)
 | 
			
		||||
    for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
 | 
			
		||||
      dst(it.index(),j) = it.value();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <typename EigenType, typename UblasType>
 | 
			
		||||
void eiToUblasVec(const EigenType& src, UblasType& dst)
 | 
			
		||||
{
 | 
			
		||||
  dst.resize(src.size());
 | 
			
		||||
  for (int j=0; j<src.size(); ++j)
 | 
			
		||||
      dst[j] = src.coeff(j);
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef OSKI
 | 
			
		||||
extern "C" {
 | 
			
		||||
#include <oski/oski.h>
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										195
									
								
								cs440-acg/ext/eigen/bench/BenchTimer.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										195
									
								
								cs440-acg/ext/eigen/bench/BenchTimer.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,195 @@
 | 
			
		||||
// This file is part of Eigen, a lightweight C++ template library
 | 
			
		||||
// for linear algebra.
 | 
			
		||||
//
 | 
			
		||||
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
 | 
			
		||||
// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
 | 
			
		||||
//
 | 
			
		||||
// This Source Code Form is subject to the terms of the Mozilla
 | 
			
		||||
// Public License v. 2.0. If a copy of the MPL was not distributed
 | 
			
		||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 | 
			
		||||
 | 
			
		||||
#ifndef EIGEN_BENCH_TIMERR_H
 | 
			
		||||
#define EIGEN_BENCH_TIMERR_H
 | 
			
		||||
 | 
			
		||||
#if defined(_WIN32) || defined(__CYGWIN__)
 | 
			
		||||
# ifndef NOMINMAX
 | 
			
		||||
#   define NOMINMAX
 | 
			
		||||
#   define EIGEN_BT_UNDEF_NOMINMAX
 | 
			
		||||
# endif
 | 
			
		||||
# ifndef WIN32_LEAN_AND_MEAN
 | 
			
		||||
#   define WIN32_LEAN_AND_MEAN
 | 
			
		||||
#   define EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
 | 
			
		||||
# endif
 | 
			
		||||
# include <windows.h>
 | 
			
		||||
#elif defined(__APPLE__)
 | 
			
		||||
#include <mach/mach_time.h>
 | 
			
		||||
#else
 | 
			
		||||
# include <unistd.h>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
static void escape(void *p) {
 | 
			
		||||
  asm volatile("" : : "g"(p) : "memory");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void clobber() {
 | 
			
		||||
  asm volatile("" : : : "memory");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#include <Eigen/Core>
 | 
			
		||||
 | 
			
		||||
namespace Eigen
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
enum {
 | 
			
		||||
  CPU_TIMER = 0,
 | 
			
		||||
  REAL_TIMER = 1
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/** Elapsed time timer keeping the best try.
 | 
			
		||||
  *
 | 
			
		||||
  * On POSIX platforms we use clock_gettime with CLOCK_PROCESS_CPUTIME_ID.
 | 
			
		||||
  * On Windows we use QueryPerformanceCounter
 | 
			
		||||
  *
 | 
			
		||||
  * Important: on linux, you must link with -lrt
 | 
			
		||||
  */
 | 
			
		||||
class BenchTimer
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
  BenchTimer()
 | 
			
		||||
  {
 | 
			
		||||
#if defined(_WIN32) || defined(__CYGWIN__)
 | 
			
		||||
    LARGE_INTEGER freq;
 | 
			
		||||
    QueryPerformanceFrequency(&freq);
 | 
			
		||||
    m_frequency = (double)freq.QuadPart;
 | 
			
		||||
#endif
 | 
			
		||||
    reset();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ~BenchTimer() {}
 | 
			
		||||
 | 
			
		||||
  inline void reset()
 | 
			
		||||
  {
 | 
			
		||||
    m_bests.fill(1e9);
 | 
			
		||||
    m_worsts.fill(0);
 | 
			
		||||
    m_totals.setZero();
 | 
			
		||||
  }
 | 
			
		||||
  inline void start()
 | 
			
		||||
  {
 | 
			
		||||
    m_starts[CPU_TIMER]  = getCpuTime();
 | 
			
		||||
    m_starts[REAL_TIMER] = getRealTime();
 | 
			
		||||
  }
 | 
			
		||||
  inline void stop()
 | 
			
		||||
  {
 | 
			
		||||
    m_times[CPU_TIMER] = getCpuTime() - m_starts[CPU_TIMER];
 | 
			
		||||
    m_times[REAL_TIMER] = getRealTime() - m_starts[REAL_TIMER];
 | 
			
		||||
    #if EIGEN_VERSION_AT_LEAST(2,90,0)
 | 
			
		||||
    m_bests = m_bests.cwiseMin(m_times);
 | 
			
		||||
    m_worsts = m_worsts.cwiseMax(m_times);
 | 
			
		||||
    #else
 | 
			
		||||
    m_bests(0) = std::min(m_bests(0),m_times(0));
 | 
			
		||||
    m_bests(1) = std::min(m_bests(1),m_times(1));
 | 
			
		||||
    m_worsts(0) = std::max(m_worsts(0),m_times(0));
 | 
			
		||||
    m_worsts(1) = std::max(m_worsts(1),m_times(1));
 | 
			
		||||
    #endif
 | 
			
		||||
    m_totals += m_times;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /** Return the elapsed time in seconds between the last start/stop pair
 | 
			
		||||
    */
 | 
			
		||||
  inline double value(int TIMER = CPU_TIMER) const
 | 
			
		||||
  {
 | 
			
		||||
    return m_times[TIMER];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /** Return the best elapsed time in seconds
 | 
			
		||||
    */
 | 
			
		||||
  inline double best(int TIMER = CPU_TIMER) const
 | 
			
		||||
  {
 | 
			
		||||
    return m_bests[TIMER];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /** Return the worst elapsed time in seconds
 | 
			
		||||
    */
 | 
			
		||||
  inline double worst(int TIMER = CPU_TIMER) const
 | 
			
		||||
  {
 | 
			
		||||
    return m_worsts[TIMER];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /** Return the total elapsed time in seconds.
 | 
			
		||||
    */
 | 
			
		||||
  inline double total(int TIMER = CPU_TIMER) const
 | 
			
		||||
  {
 | 
			
		||||
    return m_totals[TIMER];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline double getCpuTime() const
 | 
			
		||||
  {
 | 
			
		||||
#ifdef _WIN32
 | 
			
		||||
    LARGE_INTEGER query_ticks;
 | 
			
		||||
    QueryPerformanceCounter(&query_ticks);
 | 
			
		||||
    return query_ticks.QuadPart/m_frequency;
 | 
			
		||||
#elif __APPLE__
 | 
			
		||||
    return double(mach_absolute_time())*1e-9;
 | 
			
		||||
#else
 | 
			
		||||
    timespec ts;
 | 
			
		||||
    clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
 | 
			
		||||
    return double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec);
 | 
			
		||||
#endif
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline double getRealTime() const
 | 
			
		||||
  {
 | 
			
		||||
#ifdef _WIN32
 | 
			
		||||
    SYSTEMTIME st;
 | 
			
		||||
    GetSystemTime(&st);
 | 
			
		||||
    return (double)st.wSecond + 1.e-3 * (double)st.wMilliseconds;
 | 
			
		||||
#elif __APPLE__
 | 
			
		||||
    return double(mach_absolute_time())*1e-9;
 | 
			
		||||
#else
 | 
			
		||||
    timespec ts;
 | 
			
		||||
    clock_gettime(CLOCK_REALTIME, &ts);
 | 
			
		||||
    return double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec);
 | 
			
		||||
#endif
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
protected:
 | 
			
		||||
#if defined(_WIN32) || defined(__CYGWIN__)
 | 
			
		||||
  double m_frequency;
 | 
			
		||||
#endif
 | 
			
		||||
  Vector2d m_starts;
 | 
			
		||||
  Vector2d m_times;
 | 
			
		||||
  Vector2d m_bests;
 | 
			
		||||
  Vector2d m_worsts;
 | 
			
		||||
  Vector2d m_totals;
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#define BENCH(TIMER,TRIES,REP,CODE) { \
 | 
			
		||||
    TIMER.reset(); \
 | 
			
		||||
    for(int uglyvarname1=0; uglyvarname1<TRIES; ++uglyvarname1){ \
 | 
			
		||||
      TIMER.start(); \
 | 
			
		||||
      for(int uglyvarname2=0; uglyvarname2<REP; ++uglyvarname2){ \
 | 
			
		||||
        CODE; \
 | 
			
		||||
      } \
 | 
			
		||||
      TIMER.stop(); \
 | 
			
		||||
      clobber(); \
 | 
			
		||||
    } \
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// clean #defined tokens
 | 
			
		||||
#ifdef EIGEN_BT_UNDEF_NOMINMAX
 | 
			
		||||
# undef EIGEN_BT_UNDEF_NOMINMAX
 | 
			
		||||
# undef NOMINMAX
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
 | 
			
		||||
# undef EIGEN_BT_UNDEF_WIN32_LEAN_AND_MEAN
 | 
			
		||||
# undef WIN32_LEAN_AND_MEAN
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif // EIGEN_BENCH_TIMERR_H
 | 
			
		||||
							
								
								
									
										92
									
								
								cs440-acg/ext/eigen/bench/BenchUtil.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										92
									
								
								cs440-acg/ext/eigen/bench/BenchUtil.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,92 @@
 | 
			
		||||
 | 
			
		||||
#ifndef EIGEN_BENCH_UTIL_H
 | 
			
		||||
#define EIGEN_BENCH_UTIL_H
 | 
			
		||||
 | 
			
		||||
#include <Eigen/Core>
 | 
			
		||||
#include "BenchTimer.h"
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Eigen;
 | 
			
		||||
 | 
			
		||||
#include <boost/preprocessor/repetition/enum_params.hpp>
 | 
			
		||||
#include <boost/preprocessor/repetition.hpp>
 | 
			
		||||
#include <boost/preprocessor/seq.hpp>
 | 
			
		||||
#include <boost/preprocessor/array.hpp>
 | 
			
		||||
#include <boost/preprocessor/arithmetic.hpp>
 | 
			
		||||
#include <boost/preprocessor/comparison.hpp>
 | 
			
		||||
#include <boost/preprocessor/punctuation.hpp>
 | 
			
		||||
#include <boost/preprocessor/punctuation/comma.hpp>
 | 
			
		||||
#include <boost/preprocessor/stringize.hpp>
 | 
			
		||||
 | 
			
		||||
template<typename MatrixType> void initMatrix_random(MatrixType& mat) __attribute__((noinline));
 | 
			
		||||
template<typename MatrixType> void initMatrix_random(MatrixType& mat)
 | 
			
		||||
{
 | 
			
		||||
  mat.setRandom();// = MatrixType::random(mat.rows(), mat.cols());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<typename MatrixType> void initMatrix_identity(MatrixType& mat) __attribute__((noinline));
 | 
			
		||||
template<typename MatrixType> void initMatrix_identity(MatrixType& mat)
 | 
			
		||||
{
 | 
			
		||||
  mat.setIdentity();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#ifndef __INTEL_COMPILER
 | 
			
		||||
#define DISABLE_SSE_EXCEPTIONS()  { \
 | 
			
		||||
  int aux; \
 | 
			
		||||
  asm( \
 | 
			
		||||
  "stmxcsr   %[aux]           \n\t" \
 | 
			
		||||
  "orl       $32832, %[aux]   \n\t" \
 | 
			
		||||
  "ldmxcsr   %[aux]           \n\t" \
 | 
			
		||||
  : : [aux] "m" (aux)); \
 | 
			
		||||
}
 | 
			
		||||
#else
 | 
			
		||||
#define DISABLE_SSE_EXCEPTIONS()  
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef BENCH_GMM
 | 
			
		||||
#include <gmm/gmm.h>
 | 
			
		||||
template <typename EigenMatrixType, typename GmmMatrixType>
 | 
			
		||||
void eiToGmm(const EigenMatrixType& src, GmmMatrixType& dst)
 | 
			
		||||
{
 | 
			
		||||
  dst.resize(src.rows(),src.cols());
 | 
			
		||||
  for (int j=0; j<src.cols(); ++j)
 | 
			
		||||
    for (int i=0; i<src.rows(); ++i)
 | 
			
		||||
      dst(i,j) = src.coeff(i,j);
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef BENCH_GSL
 | 
			
		||||
#include <gsl/gsl_matrix.h>
 | 
			
		||||
#include <gsl/gsl_linalg.h>
 | 
			
		||||
#include <gsl/gsl_eigen.h>
 | 
			
		||||
template <typename EigenMatrixType>
 | 
			
		||||
void eiToGsl(const EigenMatrixType& src, gsl_matrix** dst)
 | 
			
		||||
{
 | 
			
		||||
  for (int j=0; j<src.cols(); ++j)
 | 
			
		||||
    for (int i=0; i<src.rows(); ++i)
 | 
			
		||||
      gsl_matrix_set(*dst, i, j, src.coeff(i,j));
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef BENCH_UBLAS
 | 
			
		||||
#include <boost/numeric/ublas/matrix.hpp>
 | 
			
		||||
#include <boost/numeric/ublas/vector.hpp>
 | 
			
		||||
template <typename EigenMatrixType, typename UblasMatrixType>
 | 
			
		||||
void eiToUblas(const EigenMatrixType& src, UblasMatrixType& dst)
 | 
			
		||||
{
 | 
			
		||||
  dst.resize(src.rows(),src.cols());
 | 
			
		||||
  for (int j=0; j<src.cols(); ++j)
 | 
			
		||||
    for (int i=0; i<src.rows(); ++i)
 | 
			
		||||
      dst(i,j) = src.coeff(i,j);
 | 
			
		||||
}
 | 
			
		||||
template <typename EigenType, typename UblasType>
 | 
			
		||||
void eiToUblasVec(const EigenType& src, UblasType& dst)
 | 
			
		||||
{
 | 
			
		||||
  dst.resize(src.size());
 | 
			
		||||
  for (int j=0; j<src.size(); ++j)
 | 
			
		||||
      dst[j] = src.coeff(j);
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif // EIGEN_BENCH_UTIL_H
 | 
			
		||||
							
								
								
									
										55
									
								
								cs440-acg/ext/eigen/bench/README.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								cs440-acg/ext/eigen/bench/README.txt
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,55 @@
 | 
			
		||||
 | 
			
		||||
This folder contains a couple of benchmark utities and Eigen benchmarks.
 | 
			
		||||
 | 
			
		||||
****************************
 | 
			
		||||
* bench_multi_compilers.sh *
 | 
			
		||||
****************************
 | 
			
		||||
 | 
			
		||||
This script allows to run a benchmark on a set of different compilers/compiler options.
 | 
			
		||||
It takes two arguments:
 | 
			
		||||
 - a file defining the list of the compilers with their options
 | 
			
		||||
 - the .cpp file of the benchmark
 | 
			
		||||
 | 
			
		||||
Examples:
 | 
			
		||||
 | 
			
		||||
$ ./bench_multi_compilers.sh basicbench.cxxlist basicbenchmark.cpp
 | 
			
		||||
 | 
			
		||||
    g++-4.1 -O3 -DNDEBUG -finline-limit=10000
 | 
			
		||||
    3d-3x3   /   4d-4x4   /   Xd-4x4   /   Xd-20x20   /
 | 
			
		||||
    0.271102   0.131416   0.422322   0.198633
 | 
			
		||||
    0.201658   0.102436   0.397566   0.207282
 | 
			
		||||
 | 
			
		||||
    g++-4.2 -O3 -DNDEBUG -finline-limit=10000
 | 
			
		||||
    3d-3x3   /   4d-4x4   /   Xd-4x4   /   Xd-20x20   /
 | 
			
		||||
    0.107805   0.0890579   0.30265   0.161843
 | 
			
		||||
    0.127157   0.0712581   0.278341   0.191029
 | 
			
		||||
 | 
			
		||||
    g++-4.3 -O3 -DNDEBUG -finline-limit=10000
 | 
			
		||||
    3d-3x3   /   4d-4x4   /   Xd-4x4   /   Xd-20x20   /
 | 
			
		||||
    0.134318   0.105291   0.3704   0.180966
 | 
			
		||||
    0.137703   0.0732472   0.31225   0.202204
 | 
			
		||||
 | 
			
		||||
    icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size
 | 
			
		||||
    3d-3x3   /   4d-4x4   /   Xd-4x4   /   Xd-20x20   /
 | 
			
		||||
    0.226145   0.0941319   0.371873   0.159433
 | 
			
		||||
    0.109302   0.0837538   0.328102   0.173891
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
$ ./bench_multi_compilers.sh ompbench.cxxlist ompbenchmark.cpp
 | 
			
		||||
 | 
			
		||||
    g++-4.2 -O3 -DNDEBUG -finline-limit=10000 -fopenmp
 | 
			
		||||
    double, fixed-size 4x4: 0.00165105s  0.0778739s
 | 
			
		||||
    double, 32x32: 0.0654769s 0.075289s  => x0.869674 (2)
 | 
			
		||||
    double, 128x128: 0.054148s 0.0419669s  => x1.29025 (2)
 | 
			
		||||
    double, 512x512: 0.913799s 0.428533s  => x2.13239 (2)
 | 
			
		||||
    double, 1024x1024: 14.5972s 9.3542s  => x1.5605 (2)
 | 
			
		||||
 | 
			
		||||
    icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -openmp
 | 
			
		||||
    double, fixed-size 4x4: 0.000589848s  0.019949s
 | 
			
		||||
    double, 32x32: 0.0682781s 0.0449722s  => x1.51823 (2)
 | 
			
		||||
    double, 128x128: 0.0547509s 0.0435519s  => x1.25714 (2)
 | 
			
		||||
    double, 512x512: 0.829436s 0.424438s  => x1.9542 (2)
 | 
			
		||||
    double, 1024x1024: 14.5243s 10.7735s  => x1.34815 (2)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										876
									
								
								cs440-acg/ext/eigen/bench/analyze-blocking-sizes.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										876
									
								
								cs440-acg/ext/eigen/bench/analyze-blocking-sizes.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,876 @@
 | 
			
		||||
// This file is part of Eigen, a lightweight C++ template library
 | 
			
		||||
// for linear algebra.
 | 
			
		||||
//
 | 
			
		||||
// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
 | 
			
		||||
//
 | 
			
		||||
// This Source Code Form is subject to the terms of the Mozilla
 | 
			
		||||
// Public License v. 2.0. If a copy of the MPL was not distributed
 | 
			
		||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 | 
			
		||||
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <cstdint>
 | 
			
		||||
#include <cstdlib>
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <algorithm>
 | 
			
		||||
#include <fstream>
 | 
			
		||||
#include <string>
 | 
			
		||||
#include <cmath>
 | 
			
		||||
#include <cassert>
 | 
			
		||||
#include <cstring>
 | 
			
		||||
#include <memory>
 | 
			
		||||
 | 
			
		||||
#include <Eigen/Core>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
const int default_precision = 4;
 | 
			
		||||
 | 
			
		||||
// see --only-cubic-sizes
 | 
			
		||||
bool only_cubic_sizes = false;
 | 
			
		||||
 | 
			
		||||
// see --dump-tables
 | 
			
		||||
bool dump_tables = false;
 | 
			
		||||
 | 
			
		||||
uint8_t log2_pot(size_t x) {
 | 
			
		||||
  size_t l = 0;
 | 
			
		||||
  while (x >>= 1) l++;
 | 
			
		||||
  return l;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
uint16_t compact_size_triple(size_t k, size_t m, size_t n)
 | 
			
		||||
{
 | 
			
		||||
  return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// just a helper to store a triple of K,M,N sizes for matrix product
 | 
			
		||||
struct size_triple_t
 | 
			
		||||
{
 | 
			
		||||
  uint16_t k, m, n;
 | 
			
		||||
  size_triple_t() : k(0), m(0), n(0) {}
 | 
			
		||||
  size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {}
 | 
			
		||||
  size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {}
 | 
			
		||||
  size_triple_t(uint16_t compact)
 | 
			
		||||
  {
 | 
			
		||||
    k = 1 << ((compact & 0xf00) >> 8);
 | 
			
		||||
    m = 1 << ((compact & 0x0f0) >> 4);
 | 
			
		||||
    n = 1 << ((compact & 0x00f) >> 0);
 | 
			
		||||
  }
 | 
			
		||||
  bool is_cubic() const { return k == m && m == n; }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
ostream& operator<<(ostream& s, const size_triple_t& t)
 | 
			
		||||
{
 | 
			
		||||
  return s << "(" << t.k << ", " << t.m << ", " << t.n << ")";
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct inputfile_entry_t
 | 
			
		||||
{
 | 
			
		||||
  uint16_t product_size;
 | 
			
		||||
  uint16_t pot_block_size;
 | 
			
		||||
  size_triple_t nonpot_block_size;
 | 
			
		||||
  float gflops;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct inputfile_t
 | 
			
		||||
{
 | 
			
		||||
  enum class type_t {
 | 
			
		||||
    unknown,
 | 
			
		||||
    all_pot_sizes,
 | 
			
		||||
    default_sizes
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  string filename;
 | 
			
		||||
  vector<inputfile_entry_t> entries;
 | 
			
		||||
  type_t type;
 | 
			
		||||
 | 
			
		||||
  inputfile_t(const string& fname)
 | 
			
		||||
    : filename(fname)
 | 
			
		||||
    , type(type_t::unknown)
 | 
			
		||||
  {
 | 
			
		||||
    ifstream stream(filename);
 | 
			
		||||
    if (!stream.is_open()) {
 | 
			
		||||
      cerr << "couldn't open input file: " << filename << endl;
 | 
			
		||||
      exit(1);
 | 
			
		||||
    }
 | 
			
		||||
    string line;
 | 
			
		||||
    while (getline(stream, line)) {
 | 
			
		||||
      if (line.empty()) continue;
 | 
			
		||||
      if (line.find("BEGIN MEASUREMENTS ALL POT SIZES") == 0) {
 | 
			
		||||
        if (type != type_t::unknown) {
 | 
			
		||||
          cerr << "Input file " << filename << " contains redundant BEGIN MEASUREMENTS lines";
 | 
			
		||||
          exit(1);
 | 
			
		||||
        }
 | 
			
		||||
        type = type_t::all_pot_sizes;
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      if (line.find("BEGIN MEASUREMENTS DEFAULT SIZES") == 0) {
 | 
			
		||||
        if (type != type_t::unknown) {
 | 
			
		||||
          cerr << "Input file " << filename << " contains redundant BEGIN MEASUREMENTS lines";
 | 
			
		||||
          exit(1);
 | 
			
		||||
        }
 | 
			
		||||
        type = type_t::default_sizes;
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
 | 
			
		||||
      if (type == type_t::unknown) {
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      switch(type) {
 | 
			
		||||
        case type_t::all_pot_sizes: {
 | 
			
		||||
          unsigned int product_size, block_size;
 | 
			
		||||
          float gflops;
 | 
			
		||||
          int sscanf_result =
 | 
			
		||||
            sscanf(line.c_str(), "%x %x %f",
 | 
			
		||||
                   &product_size,
 | 
			
		||||
                   &block_size,
 | 
			
		||||
                   &gflops);
 | 
			
		||||
          if (3 != sscanf_result ||
 | 
			
		||||
              !product_size ||
 | 
			
		||||
              product_size > 0xfff ||
 | 
			
		||||
              !block_size ||
 | 
			
		||||
              block_size > 0xfff ||
 | 
			
		||||
              !isfinite(gflops))
 | 
			
		||||
          {
 | 
			
		||||
            cerr << "ill-formed input file: " << filename << endl;
 | 
			
		||||
            cerr << "offending line:" << endl << line << endl;
 | 
			
		||||
            exit(1);
 | 
			
		||||
          }
 | 
			
		||||
          if (only_cubic_sizes && !size_triple_t(product_size).is_cubic()) {
 | 
			
		||||
            continue;
 | 
			
		||||
          }
 | 
			
		||||
          inputfile_entry_t entry;
 | 
			
		||||
          entry.product_size = uint16_t(product_size);
 | 
			
		||||
          entry.pot_block_size = uint16_t(block_size);
 | 
			
		||||
          entry.gflops = gflops;
 | 
			
		||||
          entries.push_back(entry);
 | 
			
		||||
          break;
 | 
			
		||||
        }
 | 
			
		||||
        case type_t::default_sizes: {
 | 
			
		||||
          unsigned int product_size;
 | 
			
		||||
          float gflops;
 | 
			
		||||
          int bk, bm, bn;
 | 
			
		||||
          int sscanf_result =
 | 
			
		||||
            sscanf(line.c_str(), "%x default(%d, %d, %d) %f",
 | 
			
		||||
                   &product_size,
 | 
			
		||||
                   &bk, &bm, &bn,
 | 
			
		||||
                   &gflops);
 | 
			
		||||
          if (5 != sscanf_result ||
 | 
			
		||||
              !product_size ||
 | 
			
		||||
              product_size > 0xfff ||
 | 
			
		||||
              !isfinite(gflops))
 | 
			
		||||
          {
 | 
			
		||||
            cerr << "ill-formed input file: " << filename << endl;
 | 
			
		||||
            cerr << "offending line:" << endl << line << endl;
 | 
			
		||||
            exit(1);
 | 
			
		||||
          }
 | 
			
		||||
          if (only_cubic_sizes && !size_triple_t(product_size).is_cubic()) {
 | 
			
		||||
            continue;
 | 
			
		||||
          }
 | 
			
		||||
          inputfile_entry_t entry;
 | 
			
		||||
          entry.product_size = uint16_t(product_size);
 | 
			
		||||
          entry.pot_block_size = 0;
 | 
			
		||||
          entry.nonpot_block_size = size_triple_t(bk, bm, bn);
 | 
			
		||||
          entry.gflops = gflops;
 | 
			
		||||
          entries.push_back(entry);
 | 
			
		||||
          break;
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        default:
 | 
			
		||||
          break;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    stream.close();
 | 
			
		||||
    if (type == type_t::unknown) {
 | 
			
		||||
      cerr << "Unrecognized input file " << filename << endl;
 | 
			
		||||
      exit(1);
 | 
			
		||||
    }
 | 
			
		||||
    if (entries.empty()) {
 | 
			
		||||
      cerr << "didn't find any measurements in input file: " << filename << endl;
 | 
			
		||||
      exit(1);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct preprocessed_inputfile_entry_t
 | 
			
		||||
{
 | 
			
		||||
  uint16_t product_size;
 | 
			
		||||
  uint16_t block_size;
 | 
			
		||||
 | 
			
		||||
  float efficiency;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
bool lower_efficiency(const preprocessed_inputfile_entry_t& e1, const preprocessed_inputfile_entry_t& e2)
 | 
			
		||||
{
 | 
			
		||||
  return e1.efficiency < e2.efficiency;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct preprocessed_inputfile_t
 | 
			
		||||
{
 | 
			
		||||
  string filename;
 | 
			
		||||
  vector<preprocessed_inputfile_entry_t> entries;
 | 
			
		||||
 | 
			
		||||
  preprocessed_inputfile_t(const inputfile_t& inputfile)
 | 
			
		||||
    : filename(inputfile.filename)
 | 
			
		||||
  {
 | 
			
		||||
    if (inputfile.type != inputfile_t::type_t::all_pot_sizes) {
 | 
			
		||||
      abort();
 | 
			
		||||
    }
 | 
			
		||||
    auto it = inputfile.entries.begin();
 | 
			
		||||
    auto it_first_with_given_product_size = it;
 | 
			
		||||
    while (it != inputfile.entries.end()) {
 | 
			
		||||
      ++it;
 | 
			
		||||
      if (it == inputfile.entries.end() ||
 | 
			
		||||
        it->product_size != it_first_with_given_product_size->product_size)
 | 
			
		||||
      {
 | 
			
		||||
        import_input_file_range_one_product_size(it_first_with_given_product_size, it);
 | 
			
		||||
        it_first_with_given_product_size = it;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
  void import_input_file_range_one_product_size(
 | 
			
		||||
    const vector<inputfile_entry_t>::const_iterator& begin,
 | 
			
		||||
    const vector<inputfile_entry_t>::const_iterator& end)
 | 
			
		||||
  {
 | 
			
		||||
    uint16_t product_size = begin->product_size;
 | 
			
		||||
    float max_gflops = 0.0f;
 | 
			
		||||
    for (auto it = begin; it != end; ++it) {
 | 
			
		||||
      if (it->product_size != product_size) {
 | 
			
		||||
        cerr << "Unexpected ordering of entries in " << filename << endl;
 | 
			
		||||
        cerr << "(Expected all entries for product size " << hex << product_size << dec << " to be grouped)" << endl;
 | 
			
		||||
        exit(1);
 | 
			
		||||
      }
 | 
			
		||||
      max_gflops = max(max_gflops, it->gflops);
 | 
			
		||||
    }
 | 
			
		||||
    for (auto it = begin; it != end; ++it) {
 | 
			
		||||
      preprocessed_inputfile_entry_t entry;
 | 
			
		||||
      entry.product_size = it->product_size;
 | 
			
		||||
      entry.block_size = it->pot_block_size;
 | 
			
		||||
      entry.efficiency = it->gflops / max_gflops;
 | 
			
		||||
      entries.push_back(entry);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
void check_all_files_in_same_exact_order(
 | 
			
		||||
       const vector<preprocessed_inputfile_t>& preprocessed_inputfiles)
 | 
			
		||||
{
 | 
			
		||||
  if (preprocessed_inputfiles.empty()) {
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[0];
 | 
			
		||||
  const size_t num_entries = first_file.entries.size();
 | 
			
		||||
 | 
			
		||||
  for (size_t i = 0; i < preprocessed_inputfiles.size(); i++) {
 | 
			
		||||
    if (preprocessed_inputfiles[i].entries.size() != num_entries) {
 | 
			
		||||
      cerr << "these files have different number of entries: "
 | 
			
		||||
           << preprocessed_inputfiles[i].filename
 | 
			
		||||
           << " and "
 | 
			
		||||
           << first_file.filename
 | 
			
		||||
           << endl;
 | 
			
		||||
      exit(1);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  for (size_t entry_index = 0; entry_index < num_entries; entry_index++) {
 | 
			
		||||
    const uint16_t entry_product_size = first_file.entries[entry_index].product_size;
 | 
			
		||||
    const uint16_t entry_block_size = first_file.entries[entry_index].block_size;
 | 
			
		||||
    for (size_t file_index = 0; file_index < preprocessed_inputfiles.size(); file_index++) {
 | 
			
		||||
      const preprocessed_inputfile_t& cur_file = preprocessed_inputfiles[file_index];
 | 
			
		||||
      if (cur_file.entries[entry_index].product_size != entry_product_size ||
 | 
			
		||||
          cur_file.entries[entry_index].block_size != entry_block_size)
 | 
			
		||||
      {
 | 
			
		||||
        cerr << "entries not in same order between these files: "
 | 
			
		||||
             << first_file.filename
 | 
			
		||||
             << " and "
 | 
			
		||||
             << cur_file.filename
 | 
			
		||||
             << endl;
 | 
			
		||||
        exit(1);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
float efficiency_of_subset(
 | 
			
		||||
        const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
 | 
			
		||||
        const vector<size_t>& subset)
 | 
			
		||||
{
 | 
			
		||||
  if (subset.size() <= 1) {
 | 
			
		||||
    return 1.0f;
 | 
			
		||||
  }
 | 
			
		||||
  const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[subset[0]];
 | 
			
		||||
  const size_t num_entries = first_file.entries.size();
 | 
			
		||||
  float efficiency = 1.0f;
 | 
			
		||||
  size_t entry_index = 0;
 | 
			
		||||
  size_t first_entry_index_with_this_product_size = 0;
 | 
			
		||||
  uint16_t product_size = first_file.entries[0].product_size;
 | 
			
		||||
  while (entry_index < num_entries) {
 | 
			
		||||
    ++entry_index;
 | 
			
		||||
    if (entry_index == num_entries ||
 | 
			
		||||
        first_file.entries[entry_index].product_size != product_size)
 | 
			
		||||
    {
 | 
			
		||||
      float efficiency_this_product_size = 0.0f;
 | 
			
		||||
      for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) {
 | 
			
		||||
        float efficiency_this_entry = 1.0f;
 | 
			
		||||
        for (auto i = subset.begin(); i != subset.end(); ++i) {
 | 
			
		||||
          efficiency_this_entry = min(efficiency_this_entry, preprocessed_inputfiles[*i].entries[e].efficiency);
 | 
			
		||||
        }
 | 
			
		||||
        efficiency_this_product_size = max(efficiency_this_product_size, efficiency_this_entry);
 | 
			
		||||
      }
 | 
			
		||||
      efficiency = min(efficiency, efficiency_this_product_size);
 | 
			
		||||
      if (entry_index < num_entries) {
 | 
			
		||||
        first_entry_index_with_this_product_size = entry_index;
 | 
			
		||||
        product_size = first_file.entries[entry_index].product_size;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return efficiency;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void dump_table_for_subset(
 | 
			
		||||
        const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
 | 
			
		||||
        const vector<size_t>& subset)
 | 
			
		||||
{
 | 
			
		||||
  const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[subset[0]];
 | 
			
		||||
  const size_t num_entries = first_file.entries.size();
 | 
			
		||||
  size_t entry_index = 0;
 | 
			
		||||
  size_t first_entry_index_with_this_product_size = 0;
 | 
			
		||||
  uint16_t product_size = first_file.entries[0].product_size;
 | 
			
		||||
  size_t i = 0;
 | 
			
		||||
  size_triple_t min_product_size(first_file.entries.front().product_size);
 | 
			
		||||
  size_triple_t max_product_size(first_file.entries.back().product_size);
 | 
			
		||||
  if (!min_product_size.is_cubic() || !max_product_size.is_cubic()) {
 | 
			
		||||
    abort();
 | 
			
		||||
  }
 | 
			
		||||
  if (only_cubic_sizes) {
 | 
			
		||||
    cerr << "Can't generate tables with --only-cubic-sizes." << endl;
 | 
			
		||||
    abort();
 | 
			
		||||
  }
 | 
			
		||||
  cout << "struct LookupTable {" << endl;
 | 
			
		||||
  cout << "  static const size_t BaseSize = " << min_product_size.k << ";" << endl;
 | 
			
		||||
  const size_t NumSizes = log2_pot(max_product_size.k / min_product_size.k) + 1;
 | 
			
		||||
  const size_t TableSize = NumSizes * NumSizes * NumSizes;
 | 
			
		||||
  cout << "  static const size_t NumSizes = " << NumSizes << ";" << endl;
 | 
			
		||||
  cout << "  static const unsigned short* Data() {" << endl;
 | 
			
		||||
  cout << "    static const unsigned short data[" << TableSize << "] = {";
 | 
			
		||||
  while (entry_index < num_entries) {
 | 
			
		||||
    ++entry_index;
 | 
			
		||||
    if (entry_index == num_entries ||
 | 
			
		||||
        first_file.entries[entry_index].product_size != product_size)
 | 
			
		||||
    {
 | 
			
		||||
      float best_efficiency_this_product_size = 0.0f;
 | 
			
		||||
      uint16_t best_block_size_this_product_size = 0;
 | 
			
		||||
      for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) {
 | 
			
		||||
        float efficiency_this_entry = 1.0f;
 | 
			
		||||
        for (auto i = subset.begin(); i != subset.end(); ++i) {
 | 
			
		||||
          efficiency_this_entry = min(efficiency_this_entry, preprocessed_inputfiles[*i].entries[e].efficiency);
 | 
			
		||||
        }
 | 
			
		||||
        if (efficiency_this_entry > best_efficiency_this_product_size) {
 | 
			
		||||
          best_efficiency_this_product_size = efficiency_this_entry;
 | 
			
		||||
          best_block_size_this_product_size = first_file.entries[e].block_size;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      if ((i++) % NumSizes) {
 | 
			
		||||
        cout << " ";
 | 
			
		||||
      } else {
 | 
			
		||||
        cout << endl << "      ";
 | 
			
		||||
      }
 | 
			
		||||
      cout << "0x" << hex << best_block_size_this_product_size << dec;
 | 
			
		||||
      if (entry_index < num_entries) {
 | 
			
		||||
        cout << ",";
 | 
			
		||||
        first_entry_index_with_this_product_size = entry_index;
 | 
			
		||||
        product_size = first_file.entries[entry_index].product_size;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  if (i != TableSize) {
 | 
			
		||||
    cerr << endl << "Wrote " << i << " table entries, expected " << TableSize << endl;
 | 
			
		||||
    abort();
 | 
			
		||||
  }
 | 
			
		||||
  cout << endl << "    };" << endl;
 | 
			
		||||
  cout << "    return data;" << endl;
 | 
			
		||||
  cout << "  }" << endl;
 | 
			
		||||
  cout << "};" << endl;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
float efficiency_of_partition(
 | 
			
		||||
        const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
 | 
			
		||||
        const vector<vector<size_t>>& partition)
 | 
			
		||||
{
 | 
			
		||||
  float efficiency = 1.0f;
 | 
			
		||||
  for (auto s = partition.begin(); s != partition.end(); ++s) {
 | 
			
		||||
    efficiency = min(efficiency, efficiency_of_subset(preprocessed_inputfiles, *s));
 | 
			
		||||
  }
 | 
			
		||||
  return efficiency;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void make_first_subset(size_t subset_size, vector<size_t>& out_subset, size_t set_size)
 | 
			
		||||
{
 | 
			
		||||
  assert(subset_size >= 1 && subset_size <= set_size);
 | 
			
		||||
  out_subset.resize(subset_size);
 | 
			
		||||
  for (size_t i = 0; i < subset_size; i++) {
 | 
			
		||||
    out_subset[i] = i;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool is_last_subset(const vector<size_t>& subset, size_t set_size)
 | 
			
		||||
{
 | 
			
		||||
  return subset[0] == set_size - subset.size();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void next_subset(vector<size_t>& inout_subset, size_t set_size)
 | 
			
		||||
{
 | 
			
		||||
  if (is_last_subset(inout_subset, set_size)) {
 | 
			
		||||
    cerr << "iterating past the last subset" << endl;
 | 
			
		||||
    abort();
 | 
			
		||||
  }
 | 
			
		||||
  size_t i = 1;
 | 
			
		||||
  while (inout_subset[inout_subset.size() - i] == set_size - i) {
 | 
			
		||||
    i++;
 | 
			
		||||
    assert(i <= inout_subset.size());
 | 
			
		||||
  }
 | 
			
		||||
  size_t first_index_to_change = inout_subset.size() - i;
 | 
			
		||||
  inout_subset[first_index_to_change]++;
 | 
			
		||||
  size_t p = inout_subset[first_index_to_change];
 | 
			
		||||
  for (size_t j = first_index_to_change + 1; j < inout_subset.size(); j++) {
 | 
			
		||||
    inout_subset[j] = ++p;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const size_t number_of_subsets_limit = 100;
 | 
			
		||||
const size_t always_search_subsets_of_size_at_least = 2;
 | 
			
		||||
 | 
			
		||||
bool is_number_of_subsets_feasible(size_t n, size_t p)
 | 
			
		||||
{ 
 | 
			
		||||
  assert(n>0 && p>0 && p<=n);
 | 
			
		||||
  uint64_t numerator = 1, denominator = 1;
 | 
			
		||||
  for (size_t i = 0; i < p; i++) {
 | 
			
		||||
    numerator *= n - i;
 | 
			
		||||
    denominator *= i + 1;
 | 
			
		||||
    if (numerator > denominator * number_of_subsets_limit) {
 | 
			
		||||
      return false;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
size_t max_feasible_subset_size(size_t n)
 | 
			
		||||
{
 | 
			
		||||
  assert(n > 0);
 | 
			
		||||
  const size_t minresult = min<size_t>(n-1, always_search_subsets_of_size_at_least);
 | 
			
		||||
  for (size_t p = 1; p <= n - 1; p++) {
 | 
			
		||||
    if (!is_number_of_subsets_feasible(n, p+1)) {
 | 
			
		||||
      return max(p, minresult);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return n - 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void find_subset_with_efficiency_higher_than(
 | 
			
		||||
       const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
 | 
			
		||||
       float required_efficiency_to_beat,
 | 
			
		||||
       vector<size_t>& inout_remainder,
 | 
			
		||||
       vector<size_t>& out_subset)
 | 
			
		||||
{
 | 
			
		||||
  out_subset.resize(0);
 | 
			
		||||
 | 
			
		||||
  if (required_efficiency_to_beat >= 1.0f) {
 | 
			
		||||
    cerr << "can't beat efficiency 1." << endl;
 | 
			
		||||
    abort();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  while (!inout_remainder.empty()) {
 | 
			
		||||
 | 
			
		||||
    vector<size_t> candidate_indices(inout_remainder.size());
 | 
			
		||||
    for (size_t i = 0; i < candidate_indices.size(); i++) {
 | 
			
		||||
      candidate_indices[i] = i;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    size_t candidate_indices_subset_size = max_feasible_subset_size(candidate_indices.size());
 | 
			
		||||
    while (candidate_indices_subset_size >= 1) {
 | 
			
		||||
      vector<size_t> candidate_indices_subset;
 | 
			
		||||
      make_first_subset(candidate_indices_subset_size,
 | 
			
		||||
                        candidate_indices_subset,
 | 
			
		||||
                        candidate_indices.size());
 | 
			
		||||
 | 
			
		||||
      vector<size_t> best_candidate_indices_subset;
 | 
			
		||||
      float best_efficiency = 0.0f;
 | 
			
		||||
      vector<size_t> trial_subset = out_subset;
 | 
			
		||||
      trial_subset.resize(out_subset.size() + candidate_indices_subset_size);
 | 
			
		||||
      while (true)
 | 
			
		||||
      {
 | 
			
		||||
        for (size_t i = 0; i < candidate_indices_subset_size; i++) {
 | 
			
		||||
          trial_subset[out_subset.size() + i] = inout_remainder[candidate_indices_subset[i]];
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        float trial_efficiency = efficiency_of_subset(preprocessed_inputfiles, trial_subset);
 | 
			
		||||
        if (trial_efficiency > best_efficiency) {
 | 
			
		||||
          best_efficiency = trial_efficiency;
 | 
			
		||||
          best_candidate_indices_subset = candidate_indices_subset;
 | 
			
		||||
        }
 | 
			
		||||
        if (is_last_subset(candidate_indices_subset, candidate_indices.size())) {
 | 
			
		||||
          break;
 | 
			
		||||
        }
 | 
			
		||||
        next_subset(candidate_indices_subset, candidate_indices.size());
 | 
			
		||||
      }
 | 
			
		||||
       
 | 
			
		||||
      if (best_efficiency > required_efficiency_to_beat) {
 | 
			
		||||
        for (size_t i = 0; i < best_candidate_indices_subset.size(); i++) {
 | 
			
		||||
          candidate_indices[i] = candidate_indices[best_candidate_indices_subset[i]];
 | 
			
		||||
        }
 | 
			
		||||
        candidate_indices.resize(best_candidate_indices_subset.size());
 | 
			
		||||
      }
 | 
			
		||||
      candidate_indices_subset_size--;
 | 
			
		||||
    }
 | 
			
		||||
      
 | 
			
		||||
    size_t candidate_index = candidate_indices[0];
 | 
			
		||||
    auto candidate_iterator = inout_remainder.begin() + candidate_index;
 | 
			
		||||
    vector<size_t> trial_subset = out_subset;
 | 
			
		||||
 | 
			
		||||
    trial_subset.push_back(*candidate_iterator);
 | 
			
		||||
    float trial_efficiency = efficiency_of_subset(preprocessed_inputfiles, trial_subset);
 | 
			
		||||
    if (trial_efficiency > required_efficiency_to_beat) {
 | 
			
		||||
      out_subset.push_back(*candidate_iterator);
 | 
			
		||||
      inout_remainder.erase(candidate_iterator);
 | 
			
		||||
    } else {
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void find_partition_with_efficiency_higher_than(
 | 
			
		||||
       const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
 | 
			
		||||
       float required_efficiency_to_beat,
 | 
			
		||||
       vector<vector<size_t>>& out_partition)
 | 
			
		||||
{
 | 
			
		||||
  out_partition.resize(0);
 | 
			
		||||
 | 
			
		||||
  vector<size_t> remainder;
 | 
			
		||||
  for (size_t i = 0; i < preprocessed_inputfiles.size(); i++) {
 | 
			
		||||
    remainder.push_back(i);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  while (!remainder.empty()) {
 | 
			
		||||
    vector<size_t> new_subset;
 | 
			
		||||
    find_subset_with_efficiency_higher_than(
 | 
			
		||||
      preprocessed_inputfiles,
 | 
			
		||||
      required_efficiency_to_beat,
 | 
			
		||||
      remainder,
 | 
			
		||||
      new_subset);
 | 
			
		||||
    out_partition.push_back(new_subset);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void print_partition(
 | 
			
		||||
       const vector<preprocessed_inputfile_t>& preprocessed_inputfiles,
 | 
			
		||||
       const vector<vector<size_t>>& partition)
 | 
			
		||||
{
 | 
			
		||||
  float efficiency = efficiency_of_partition(preprocessed_inputfiles, partition);
 | 
			
		||||
  cout << "Partition into " << partition.size() << " subsets for " << efficiency * 100.0f << "% efficiency"  << endl;
 | 
			
		||||
  for (auto subset = partition.begin(); subset != partition.end(); ++subset) {
 | 
			
		||||
    cout << "  Subset " << (subset - partition.begin())
 | 
			
		||||
         << ", efficiency " << efficiency_of_subset(preprocessed_inputfiles, *subset) * 100.0f << "%:"
 | 
			
		||||
         << endl;
 | 
			
		||||
    for (auto file = subset->begin(); file != subset->end(); ++file) {
 | 
			
		||||
      cout << "    " << preprocessed_inputfiles[*file].filename << endl;
 | 
			
		||||
    }
 | 
			
		||||
    if (dump_tables) {
 | 
			
		||||
      cout << "  Table:" << endl;
 | 
			
		||||
      dump_table_for_subset(preprocessed_inputfiles, *subset);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  cout << endl;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct action_t
 | 
			
		||||
{
 | 
			
		||||
  virtual const char* invokation_name() const { abort(); return nullptr; }
 | 
			
		||||
  virtual void run(const vector<string>&) const { abort(); }
 | 
			
		||||
  virtual ~action_t() {}
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct partition_action_t : action_t
 | 
			
		||||
{
 | 
			
		||||
  virtual const char* invokation_name() const override { return "partition"; }
 | 
			
		||||
  virtual void run(const vector<string>& input_filenames) const override
 | 
			
		||||
  {
 | 
			
		||||
    vector<preprocessed_inputfile_t> preprocessed_inputfiles;
 | 
			
		||||
 | 
			
		||||
    if (input_filenames.empty()) {
 | 
			
		||||
      cerr << "The " << invokation_name() << " action needs a list of input files." << endl;
 | 
			
		||||
      exit(1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for (auto it = input_filenames.begin(); it != input_filenames.end(); ++it) {
 | 
			
		||||
      inputfile_t inputfile(*it);
 | 
			
		||||
      switch (inputfile.type) {
 | 
			
		||||
        case inputfile_t::type_t::all_pot_sizes:
 | 
			
		||||
          preprocessed_inputfiles.emplace_back(inputfile);
 | 
			
		||||
          break;
 | 
			
		||||
        case inputfile_t::type_t::default_sizes:
 | 
			
		||||
          cerr << "The " << invokation_name() << " action only uses measurements for all pot sizes, and "
 | 
			
		||||
               << "has no use for " << *it << " which contains measurements for default sizes." << endl;
 | 
			
		||||
          exit(1);
 | 
			
		||||
          break;
 | 
			
		||||
        default:
 | 
			
		||||
          cerr << "Unrecognized input file: " << *it << endl;
 | 
			
		||||
          exit(1);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    check_all_files_in_same_exact_order(preprocessed_inputfiles);
 | 
			
		||||
 | 
			
		||||
    float required_efficiency_to_beat = 0.0f;
 | 
			
		||||
    vector<vector<vector<size_t>>> partitions;
 | 
			
		||||
    cerr << "searching for partitions...\r" << flush;
 | 
			
		||||
    while (true)
 | 
			
		||||
    {
 | 
			
		||||
      vector<vector<size_t>> partition;
 | 
			
		||||
      find_partition_with_efficiency_higher_than(
 | 
			
		||||
        preprocessed_inputfiles,
 | 
			
		||||
        required_efficiency_to_beat,
 | 
			
		||||
        partition);
 | 
			
		||||
      float actual_efficiency = efficiency_of_partition(preprocessed_inputfiles, partition);
 | 
			
		||||
      cerr << "partition " << preprocessed_inputfiles.size() << " files into " << partition.size()
 | 
			
		||||
           << " subsets for " << 100.0f * actual_efficiency
 | 
			
		||||
           << " % efficiency"
 | 
			
		||||
           << "                  \r" << flush;
 | 
			
		||||
      partitions.push_back(partition);
 | 
			
		||||
      if (partition.size() == preprocessed_inputfiles.size() || actual_efficiency == 1.0f) {
 | 
			
		||||
        break;
 | 
			
		||||
      }
 | 
			
		||||
      required_efficiency_to_beat = actual_efficiency;
 | 
			
		||||
    }
 | 
			
		||||
    cerr << "                                                                  " << endl;
 | 
			
		||||
    while (true) {
 | 
			
		||||
      bool repeat = false;
 | 
			
		||||
      for (size_t i = 0; i < partitions.size() - 1; i++) {
 | 
			
		||||
        if (partitions[i].size() >= partitions[i+1].size()) {
 | 
			
		||||
          partitions.erase(partitions.begin() + i);
 | 
			
		||||
          repeat = true;
 | 
			
		||||
          break;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      if (!repeat) {
 | 
			
		||||
        break;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    for (auto it = partitions.begin(); it != partitions.end(); ++it) {
 | 
			
		||||
      print_partition(preprocessed_inputfiles, *it);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct evaluate_defaults_action_t : action_t
 | 
			
		||||
{
 | 
			
		||||
  struct results_entry_t {
 | 
			
		||||
    uint16_t product_size;
 | 
			
		||||
    size_triple_t default_block_size;
 | 
			
		||||
    uint16_t best_pot_block_size;
 | 
			
		||||
    float default_gflops;
 | 
			
		||||
    float best_pot_gflops;
 | 
			
		||||
    float default_efficiency;
 | 
			
		||||
  };
 | 
			
		||||
  friend ostream& operator<<(ostream& s, const results_entry_t& entry)
 | 
			
		||||
  {
 | 
			
		||||
    return s
 | 
			
		||||
      << "Product size " << size_triple_t(entry.product_size)
 | 
			
		||||
      << ": default block size " << entry.default_block_size
 | 
			
		||||
      << " -> " << entry.default_gflops
 | 
			
		||||
      << " GFlop/s = " << entry.default_efficiency * 100.0f << " %"
 | 
			
		||||
      << " of best POT block size " << size_triple_t(entry.best_pot_block_size)
 | 
			
		||||
      << " -> " << entry.best_pot_gflops
 | 
			
		||||
      << " GFlop/s" << dec;
 | 
			
		||||
  }
 | 
			
		||||
  static bool lower_efficiency(const results_entry_t& e1, const results_entry_t& e2) {
 | 
			
		||||
    return e1.default_efficiency < e2.default_efficiency;
 | 
			
		||||
  }
 | 
			
		||||
  virtual const char* invokation_name() const override { return "evaluate-defaults"; }
 | 
			
		||||
  void show_usage_and_exit() const
 | 
			
		||||
  {
 | 
			
		||||
    cerr << "usage: " << invokation_name() << " default-sizes-data all-pot-sizes-data" << endl;
 | 
			
		||||
    cerr << "checks how well the performance with default sizes compares to the best "
 | 
			
		||||
         << "performance measured over all POT sizes." << endl;
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
  virtual void run(const vector<string>& input_filenames) const override
 | 
			
		||||
  {
 | 
			
		||||
    if (input_filenames.size() != 2) {
 | 
			
		||||
      show_usage_and_exit();
 | 
			
		||||
    }
 | 
			
		||||
    inputfile_t inputfile_default_sizes(input_filenames[0]);
 | 
			
		||||
    inputfile_t inputfile_all_pot_sizes(input_filenames[1]);
 | 
			
		||||
    if (inputfile_default_sizes.type != inputfile_t::type_t::default_sizes) {
 | 
			
		||||
      cerr << inputfile_default_sizes.filename << " is not an input file with default sizes." << endl;
 | 
			
		||||
      show_usage_and_exit();
 | 
			
		||||
    }
 | 
			
		||||
    if (inputfile_all_pot_sizes.type != inputfile_t::type_t::all_pot_sizes) {
 | 
			
		||||
      cerr << inputfile_all_pot_sizes.filename << " is not an input file with all POT sizes." << endl;
 | 
			
		||||
      show_usage_and_exit();
 | 
			
		||||
    }
 | 
			
		||||
    vector<results_entry_t> results;
 | 
			
		||||
    vector<results_entry_t> cubic_results;
 | 
			
		||||
    
 | 
			
		||||
    uint16_t product_size = 0;
 | 
			
		||||
    auto it_all_pot_sizes = inputfile_all_pot_sizes.entries.begin();
 | 
			
		||||
    for (auto it_default_sizes = inputfile_default_sizes.entries.begin();
 | 
			
		||||
         it_default_sizes != inputfile_default_sizes.entries.end();
 | 
			
		||||
         ++it_default_sizes)
 | 
			
		||||
    {
 | 
			
		||||
      if (it_default_sizes->product_size == product_size) {
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
      product_size = it_default_sizes->product_size;
 | 
			
		||||
      while (it_all_pot_sizes != inputfile_all_pot_sizes.entries.end() &&
 | 
			
		||||
             it_all_pot_sizes->product_size != product_size)
 | 
			
		||||
      {
 | 
			
		||||
        ++it_all_pot_sizes;
 | 
			
		||||
      }
 | 
			
		||||
      if (it_all_pot_sizes == inputfile_all_pot_sizes.entries.end()) {
 | 
			
		||||
        break;
 | 
			
		||||
      }
 | 
			
		||||
      uint16_t best_pot_block_size = 0;
 | 
			
		||||
      float best_pot_gflops = 0;
 | 
			
		||||
      for (auto it = it_all_pot_sizes;
 | 
			
		||||
           it != inputfile_all_pot_sizes.entries.end() && it->product_size == product_size;
 | 
			
		||||
           ++it)
 | 
			
		||||
      {
 | 
			
		||||
        if (it->gflops > best_pot_gflops) {
 | 
			
		||||
          best_pot_gflops = it->gflops;
 | 
			
		||||
          best_pot_block_size = it->pot_block_size;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      results_entry_t entry;
 | 
			
		||||
      entry.product_size = product_size;
 | 
			
		||||
      entry.default_block_size = it_default_sizes->nonpot_block_size;
 | 
			
		||||
      entry.best_pot_block_size = best_pot_block_size;
 | 
			
		||||
      entry.default_gflops = it_default_sizes->gflops;
 | 
			
		||||
      entry.best_pot_gflops = best_pot_gflops;
 | 
			
		||||
      entry.default_efficiency = entry.default_gflops / entry.best_pot_gflops;
 | 
			
		||||
      results.push_back(entry);
 | 
			
		||||
 | 
			
		||||
      size_triple_t t(product_size);
 | 
			
		||||
      if (t.k == t.m && t.m == t.n) {
 | 
			
		||||
        cubic_results.push_back(entry);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    cout << "All results:" << endl;
 | 
			
		||||
    for (auto it = results.begin(); it != results.end(); ++it) {
 | 
			
		||||
      cout << *it << endl;
 | 
			
		||||
    }
 | 
			
		||||
    cout << endl;
 | 
			
		||||
 | 
			
		||||
    sort(results.begin(), results.end(), lower_efficiency);
 | 
			
		||||
    
 | 
			
		||||
    const size_t n = min<size_t>(20, results.size());
 | 
			
		||||
    cout << n << " worst results:" << endl;
 | 
			
		||||
    for (size_t i = 0; i < n; i++) {
 | 
			
		||||
      cout << results[i] << endl;
 | 
			
		||||
    }
 | 
			
		||||
    cout << endl;
 | 
			
		||||
 | 
			
		||||
    cout << "cubic results:" << endl;
 | 
			
		||||
    for (auto it = cubic_results.begin(); it != cubic_results.end(); ++it) {
 | 
			
		||||
      cout << *it << endl;
 | 
			
		||||
    }
 | 
			
		||||
    cout << endl;
 | 
			
		||||
 | 
			
		||||
    sort(cubic_results.begin(), cubic_results.end(), lower_efficiency);
 | 
			
		||||
    
 | 
			
		||||
    cout.precision(2);
 | 
			
		||||
    vector<float> a = {0.5f, 0.20f, 0.10f, 0.05f, 0.02f, 0.01f};
 | 
			
		||||
    for (auto it = a.begin(); it != a.end(); ++it) {
 | 
			
		||||
      size_t n = min(results.size() - 1, size_t(*it * results.size()));
 | 
			
		||||
      cout << (100.0f * n / (results.size() - 1))
 | 
			
		||||
           << " % of product sizes have default efficiency <= "
 | 
			
		||||
           << 100.0f * results[n].default_efficiency << " %" << endl;
 | 
			
		||||
    }
 | 
			
		||||
    cout.precision(default_precision);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void show_usage_and_exit(int argc, char* argv[],
 | 
			
		||||
                         const vector<unique_ptr<action_t>>& available_actions)
 | 
			
		||||
{
 | 
			
		||||
  cerr << "usage: " << argv[0] << " <action> [options...] <input files...>" << endl;
 | 
			
		||||
  cerr << "available actions:" << endl;
 | 
			
		||||
  for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
 | 
			
		||||
    cerr << "  " << (*it)->invokation_name() << endl;
 | 
			
		||||
  } 
 | 
			
		||||
  cerr << "the input files should each contain an output of benchmark-blocking-sizes" << endl;
 | 
			
		||||
  exit(1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int main(int argc, char* argv[])
 | 
			
		||||
{
 | 
			
		||||
  cout.precision(default_precision);
 | 
			
		||||
  cerr.precision(default_precision);
 | 
			
		||||
 | 
			
		||||
  vector<unique_ptr<action_t>> available_actions;
 | 
			
		||||
  available_actions.emplace_back(new partition_action_t);
 | 
			
		||||
  available_actions.emplace_back(new evaluate_defaults_action_t);
 | 
			
		||||
 | 
			
		||||
  vector<string> input_filenames;
 | 
			
		||||
 | 
			
		||||
  action_t* action = nullptr;
 | 
			
		||||
 | 
			
		||||
  if (argc < 2) {
 | 
			
		||||
    show_usage_and_exit(argc, argv, available_actions);
 | 
			
		||||
  }
 | 
			
		||||
  for (int i = 1; i < argc; i++) {
 | 
			
		||||
    bool arg_handled = false;
 | 
			
		||||
    // Step 1. Try to match action invokation names.
 | 
			
		||||
    for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
 | 
			
		||||
      if (!strcmp(argv[i], (*it)->invokation_name())) {
 | 
			
		||||
        if (!action) {
 | 
			
		||||
          action = it->get();
 | 
			
		||||
          arg_handled = true;
 | 
			
		||||
          break;
 | 
			
		||||
        } else {
 | 
			
		||||
          cerr << "can't specify more than one action!" << endl;
 | 
			
		||||
          show_usage_and_exit(argc, argv, available_actions);
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    if (arg_handled) {
 | 
			
		||||
      continue;
 | 
			
		||||
    }
 | 
			
		||||
    // Step 2. Try to match option names.
 | 
			
		||||
    if (argv[i][0] == '-') {
 | 
			
		||||
      if (!strcmp(argv[i], "--only-cubic-sizes")) {
 | 
			
		||||
        only_cubic_sizes = true;
 | 
			
		||||
        arg_handled = true;
 | 
			
		||||
      }
 | 
			
		||||
      if (!strcmp(argv[i], "--dump-tables")) {
 | 
			
		||||
        dump_tables = true;
 | 
			
		||||
        arg_handled = true;
 | 
			
		||||
      }
 | 
			
		||||
      if (!arg_handled) {
 | 
			
		||||
        cerr << "Unrecognized option: " << argv[i] << endl;
 | 
			
		||||
        show_usage_and_exit(argc, argv, available_actions);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    if (arg_handled) {
 | 
			
		||||
      continue;
 | 
			
		||||
    }
 | 
			
		||||
    // Step 3. Default to interpreting args as input filenames.
 | 
			
		||||
    input_filenames.emplace_back(argv[i]);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (dump_tables && only_cubic_sizes) {
 | 
			
		||||
    cerr << "Incompatible options: --only-cubic-sizes and --dump-tables." << endl;
 | 
			
		||||
    show_usage_and_exit(argc, argv, available_actions);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (!action) {
 | 
			
		||||
    show_usage_and_exit(argc, argv, available_actions);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  action->run(input_filenames);
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										28
									
								
								cs440-acg/ext/eigen/bench/basicbench.cxxlist
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								cs440-acg/ext/eigen/bench/basicbench.cxxlist
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,28 @@
 | 
			
		||||
#!/bin/bash
 | 
			
		||||
 | 
			
		||||
# CLIST[((g++))]="g++-3.4 -O3 -DNDEBUG"
 | 
			
		||||
# CLIST[((g++))]="g++-3.4 -O3 -DNDEBUG -finline-limit=20000"
 | 
			
		||||
 | 
			
		||||
# CLIST[((g++))]="g++-4.1 -O3 -DNDEBUG"
 | 
			
		||||
#CLIST[((g++))]="g++-4.1 -O3 -DNDEBUG -finline-limit=20000"
 | 
			
		||||
 | 
			
		||||
# CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG"
 | 
			
		||||
#CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=20000"
 | 
			
		||||
# CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=20000 -fprofile-generate"
 | 
			
		||||
# CLIST[((g++))]="g++-4.2 -O3 -DNDEBUG -finline-limit=20000 -fprofile-use"
 | 
			
		||||
 | 
			
		||||
# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG"
 | 
			
		||||
#CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=20000"
 | 
			
		||||
# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=20000 -fprofile-generate"
 | 
			
		||||
# CLIST[((g++))]="g++-4.3 -O3 -DNDEBUG -finline-limit=20000 -fprofile-use"
 | 
			
		||||
 | 
			
		||||
# CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -prof-genx"
 | 
			
		||||
# CLIST[((g++))]="icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -prof-use"
 | 
			
		||||
 | 
			
		||||
#CLIST[((g++))]="/opt/intel/Compiler/11.1/072/bin/intel64/icpc -fast -DNDEBUG -fno-exceptions -no-inline-max-size -lrt"
 | 
			
		||||
CLIST[((g++))]="/home/orzel/svn/llvm/Release/bin/clang++ -O3 -DNDEBUG -DEIGEN_DONT_VECTORIZE -lrt"
 | 
			
		||||
CLIST[((g++))]="/home/orzel/svn/llvm/Release/bin/clang++ -O3 -DNDEBUG -lrt"
 | 
			
		||||
CLIST[((g++))]="g++-4.4.4 -O3 -DNDEBUG -DEIGEN_DONT_VECTORIZE -lrt"
 | 
			
		||||
CLIST[((g++))]="g++-4.4.4 -O3 -DNDEBUG -lrt"
 | 
			
		||||
CLIST[((g++))]="g++-4.5.0 -O3 -DNDEBUG -DEIGEN_DONT_VECTORIZE -lrt"
 | 
			
		||||
CLIST[((g++))]="g++-4.5.0 -O3 -DNDEBUG -lrt"
 | 
			
		||||
							
								
								
									
										35
									
								
								cs440-acg/ext/eigen/bench/basicbenchmark.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								cs440-acg/ext/eigen/bench/basicbenchmark.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,35 @@
 | 
			
		||||
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include "BenchUtil.h"
 | 
			
		||||
#include "basicbenchmark.h"
 | 
			
		||||
 | 
			
		||||
int main(int argc, char *argv[])
 | 
			
		||||
{
 | 
			
		||||
  DISABLE_SSE_EXCEPTIONS();
 | 
			
		||||
 | 
			
		||||
  // this is the list of matrix type and size we want to bench:
 | 
			
		||||
  // ((suffix) (matrix size) (number of iterations))
 | 
			
		||||
  #define MODES ((3d)(3)(4000000)) ((4d)(4)(1000000)) ((Xd)(4)(1000000)) ((Xd)(20)(10000))
 | 
			
		||||
//   #define MODES ((Xd)(20)(10000))
 | 
			
		||||
 | 
			
		||||
  #define _GENERATE_HEADER(R,ARG,EL) << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_HEAD(EL)) << "-" \
 | 
			
		||||
    << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << "x" \
 | 
			
		||||
    << BOOST_PP_STRINGIZE(BOOST_PP_SEQ_ELEM(1,EL)) << "   /   "
 | 
			
		||||
 | 
			
		||||
  std::cout BOOST_PP_SEQ_FOR_EACH(_GENERATE_HEADER, ~, MODES ) << endl;
 | 
			
		||||
 | 
			
		||||
  const int tries = 10;
 | 
			
		||||
 | 
			
		||||
  #define _RUN_BENCH(R,ARG,EL) \
 | 
			
		||||
    std::cout << ARG( \
 | 
			
		||||
      BOOST_PP_CAT(Matrix, BOOST_PP_SEQ_HEAD(EL)) (\
 | 
			
		||||
         BOOST_PP_SEQ_ELEM(1,EL),BOOST_PP_SEQ_ELEM(1,EL)), BOOST_PP_SEQ_ELEM(2,EL), tries) \
 | 
			
		||||
    << "   ";
 | 
			
		||||
 | 
			
		||||
  BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<LazyEval>, MODES );
 | 
			
		||||
  std::cout << endl;
 | 
			
		||||
  BOOST_PP_SEQ_FOR_EACH(_RUN_BENCH, benchBasic<EarlyEval>, MODES );
 | 
			
		||||
  std::cout << endl;
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										63
									
								
								cs440-acg/ext/eigen/bench/basicbenchmark.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								cs440-acg/ext/eigen/bench/basicbenchmark.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,63 @@
 | 
			
		||||
 | 
			
		||||
#ifndef EIGEN_BENCH_BASICBENCH_H
 | 
			
		||||
#define EIGEN_BENCH_BASICBENCH_H
 | 
			
		||||
 | 
			
		||||
enum {LazyEval, EarlyEval, OmpEval};
 | 
			
		||||
 | 
			
		||||
template<int Mode, typename MatrixType>
 | 
			
		||||
void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations) __attribute__((noinline));
 | 
			
		||||
 | 
			
		||||
template<int Mode, typename MatrixType>
 | 
			
		||||
void benchBasic_loop(const MatrixType& I, MatrixType& m, int iterations)
 | 
			
		||||
{
 | 
			
		||||
  for(int a = 0; a < iterations; a++)
 | 
			
		||||
  {
 | 
			
		||||
    if (Mode==LazyEval)
 | 
			
		||||
    {
 | 
			
		||||
      asm("#begin_bench_loop LazyEval");
 | 
			
		||||
      if (MatrixType::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize");
 | 
			
		||||
      m = (I + 0.00005 * (m + m.lazy() * m)).eval();
 | 
			
		||||
    }
 | 
			
		||||
    else if (Mode==OmpEval)
 | 
			
		||||
    {
 | 
			
		||||
      asm("#begin_bench_loop OmpEval");
 | 
			
		||||
      if (MatrixType::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize");
 | 
			
		||||
      m = (I + 0.00005 * (m + m.lazy() * m)).evalOMP();
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
    {
 | 
			
		||||
      asm("#begin_bench_loop EarlyEval");
 | 
			
		||||
      if (MatrixType::SizeAtCompileTime!=Eigen::Dynamic) asm("#fixedsize");
 | 
			
		||||
      m = I + 0.00005 * (m + m * m);
 | 
			
		||||
    }
 | 
			
		||||
    asm("#end_bench_loop");
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<int Mode, typename MatrixType>
 | 
			
		||||
double benchBasic(const MatrixType& mat, int size, int tries) __attribute__((noinline));
 | 
			
		||||
 | 
			
		||||
template<int Mode, typename MatrixType>
 | 
			
		||||
double benchBasic(const MatrixType& mat, int iterations, int tries)
 | 
			
		||||
{
 | 
			
		||||
  const int rows = mat.rows();
 | 
			
		||||
  const int cols = mat.cols();
 | 
			
		||||
 | 
			
		||||
  MatrixType I(rows,cols);
 | 
			
		||||
  MatrixType m(rows,cols);
 | 
			
		||||
 | 
			
		||||
  initMatrix_identity(I);
 | 
			
		||||
 | 
			
		||||
  Eigen::BenchTimer timer;
 | 
			
		||||
  for(uint t=0; t<tries; ++t)
 | 
			
		||||
  {
 | 
			
		||||
    initMatrix_random(m);
 | 
			
		||||
    timer.start();
 | 
			
		||||
    benchBasic_loop<Mode>(I, m, iterations);
 | 
			
		||||
    timer.stop();
 | 
			
		||||
    cerr << m;
 | 
			
		||||
  }
 | 
			
		||||
  return timer.value();
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif // EIGEN_BENCH_BASICBENCH_H
 | 
			
		||||
							
								
								
									
										219
									
								
								cs440-acg/ext/eigen/bench/benchBlasGemm.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										219
									
								
								cs440-acg/ext/eigen/bench/benchBlasGemm.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,219 @@
 | 
			
		||||
// g++ -O3 -DNDEBUG -I.. -L /usr/lib64/atlas/ benchBlasGemm.cpp -o benchBlasGemm -lrt -lcblas
 | 
			
		||||
// possible options:
 | 
			
		||||
//    -DEIGEN_DONT_VECTORIZE
 | 
			
		||||
//    -msse2
 | 
			
		||||
 | 
			
		||||
// #define EIGEN_DEFAULT_TO_ROW_MAJOR
 | 
			
		||||
#define _FLOAT
 | 
			
		||||
 | 
			
		||||
#include <iostream>
 | 
			
		||||
 | 
			
		||||
#include <Eigen/Core>
 | 
			
		||||
#include "BenchTimer.h"
 | 
			
		||||
 | 
			
		||||
// include the BLAS headers
 | 
			
		||||
extern "C" {
 | 
			
		||||
#include <cblas.h>
 | 
			
		||||
}
 | 
			
		||||
#include <string>
 | 
			
		||||
 | 
			
		||||
#ifdef _FLOAT
 | 
			
		||||
typedef float Scalar;
 | 
			
		||||
#define CBLAS_GEMM cblas_sgemm
 | 
			
		||||
#else
 | 
			
		||||
typedef double Scalar;
 | 
			
		||||
#define CBLAS_GEMM cblas_dgemm
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
typedef Eigen::Matrix<Scalar,Eigen::Dynamic,Eigen::Dynamic> MyMatrix;
 | 
			
		||||
void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops);
 | 
			
		||||
void check_product(int M, int N, int K);
 | 
			
		||||
void check_product(void);
 | 
			
		||||
 | 
			
		||||
int main(int argc, char *argv[])
 | 
			
		||||
{
 | 
			
		||||
  // disable SSE exceptions
 | 
			
		||||
  #ifdef __GNUC__
 | 
			
		||||
  {
 | 
			
		||||
    int aux;
 | 
			
		||||
    asm(
 | 
			
		||||
    "stmxcsr   %[aux]           \n\t"
 | 
			
		||||
    "orl       $32832, %[aux]   \n\t"
 | 
			
		||||
    "ldmxcsr   %[aux]           \n\t"
 | 
			
		||||
    : : [aux] "m" (aux));
 | 
			
		||||
  }
 | 
			
		||||
  #endif
 | 
			
		||||
 | 
			
		||||
  int nbtries=1, nbloops=1, M, N, K;
 | 
			
		||||
 | 
			
		||||
  if (argc==2)
 | 
			
		||||
  {
 | 
			
		||||
    if (std::string(argv[1])=="check")
 | 
			
		||||
      check_product();
 | 
			
		||||
    else
 | 
			
		||||
      M = N = K = atoi(argv[1]);
 | 
			
		||||
  }
 | 
			
		||||
  else if ((argc==3) && (std::string(argv[1])=="auto"))
 | 
			
		||||
  {
 | 
			
		||||
    M = N = K = atoi(argv[2]);
 | 
			
		||||
    nbloops = 1000000000/(M*M*M);
 | 
			
		||||
    if (nbloops<1)
 | 
			
		||||
      nbloops = 1;
 | 
			
		||||
    nbtries = 6;
 | 
			
		||||
  }
 | 
			
		||||
  else if (argc==4)
 | 
			
		||||
  {
 | 
			
		||||
    M = N = K = atoi(argv[1]);
 | 
			
		||||
    nbloops = atoi(argv[2]);
 | 
			
		||||
    nbtries = atoi(argv[3]);
 | 
			
		||||
  }
 | 
			
		||||
  else if (argc==6)
 | 
			
		||||
  {
 | 
			
		||||
    M = atoi(argv[1]);
 | 
			
		||||
    N = atoi(argv[2]);
 | 
			
		||||
    K = atoi(argv[3]);
 | 
			
		||||
    nbloops = atoi(argv[4]);
 | 
			
		||||
    nbtries = atoi(argv[5]);
 | 
			
		||||
  }
 | 
			
		||||
  else
 | 
			
		||||
  {
 | 
			
		||||
    std::cout << "Usage: " << argv[0] << " size  \n";
 | 
			
		||||
    std::cout << "Usage: " << argv[0] << " auto size\n";
 | 
			
		||||
    std::cout << "Usage: " << argv[0] << " size nbloops nbtries\n";
 | 
			
		||||
    std::cout << "Usage: " << argv[0] << " M N K nbloops nbtries\n";
 | 
			
		||||
    std::cout << "Usage: " << argv[0] << " check\n";
 | 
			
		||||
    std::cout << "Options:\n";
 | 
			
		||||
    std::cout << "    size       unique size of the 2 matrices (integer)\n";
 | 
			
		||||
    std::cout << "    auto       automatically set the number of repetitions and tries\n";
 | 
			
		||||
    std::cout << "    nbloops    number of times the GEMM routines is executed\n";
 | 
			
		||||
    std::cout << "    nbtries    number of times the loop is benched (return the best try)\n";
 | 
			
		||||
    std::cout << "    M N K      sizes of the matrices: MxN  =  MxK * KxN (integers)\n";
 | 
			
		||||
    std::cout << "    check      check eigen product using cblas as a reference\n";
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double nbmad = double(M) * double(N) * double(K) * double(nbloops);
 | 
			
		||||
 | 
			
		||||
  if (!(std::string(argv[1])=="auto"))
 | 
			
		||||
    std::cout << M << " x " << N << " x " << K << "\n";
 | 
			
		||||
 | 
			
		||||
  Scalar alpha, beta;
 | 
			
		||||
  MyMatrix ma(M,K), mb(K,N), mc(M,N);
 | 
			
		||||
  ma = MyMatrix::Random(M,K);
 | 
			
		||||
  mb = MyMatrix::Random(K,N);
 | 
			
		||||
  mc = MyMatrix::Random(M,N);
 | 
			
		||||
 | 
			
		||||
  Eigen::BenchTimer timer;
 | 
			
		||||
 | 
			
		||||
  // we simply compute c += a*b, so:
 | 
			
		||||
  alpha = 1;
 | 
			
		||||
  beta = 1;
 | 
			
		||||
 | 
			
		||||
  // bench cblas
 | 
			
		||||
  // ROWS_A, COLS_B, COLS_A, 1.0,  A, COLS_A, B, COLS_B, 0.0, C, COLS_B);
 | 
			
		||||
  if (!(std::string(argv[1])=="auto"))
 | 
			
		||||
  {
 | 
			
		||||
    timer.reset();
 | 
			
		||||
    for (uint k=0 ; k<nbtries ; ++k)
 | 
			
		||||
    {
 | 
			
		||||
        timer.start();
 | 
			
		||||
        for (uint j=0 ; j<nbloops ; ++j)
 | 
			
		||||
              #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
 | 
			
		||||
              CBLAS_GEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), K, mb.data(), N, beta, mc.data(), N);
 | 
			
		||||
              #else
 | 
			
		||||
              CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha, ma.data(), M, mb.data(), K, beta, mc.data(), M);
 | 
			
		||||
              #endif
 | 
			
		||||
        timer.stop();
 | 
			
		||||
    }
 | 
			
		||||
    if (!(std::string(argv[1])=="auto"))
 | 
			
		||||
      std::cout << "cblas: " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
 | 
			
		||||
    else
 | 
			
		||||
        std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // clear
 | 
			
		||||
  ma = MyMatrix::Random(M,K);
 | 
			
		||||
  mb = MyMatrix::Random(K,N);
 | 
			
		||||
  mc = MyMatrix::Random(M,N);
 | 
			
		||||
 | 
			
		||||
  // eigen
 | 
			
		||||
//   if (!(std::string(argv[1])=="auto"))
 | 
			
		||||
  {
 | 
			
		||||
      timer.reset();
 | 
			
		||||
      for (uint k=0 ; k<nbtries ; ++k)
 | 
			
		||||
      {
 | 
			
		||||
          timer.start();
 | 
			
		||||
          bench_eigengemm(mc, ma, mb, nbloops);
 | 
			
		||||
          timer.stop();
 | 
			
		||||
      }
 | 
			
		||||
      if (!(std::string(argv[1])=="auto"))
 | 
			
		||||
        std::cout << "eigen : " << timer.value() << " (" << 1e-3*floor(1e-6*nbmad/timer.value()) << " GFlops/s)\n";
 | 
			
		||||
      else
 | 
			
		||||
        std::cout << M << " : " << timer.value() << " ; " << 1e-3*floor(1e-6*nbmad/timer.value()) << "\n";
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::cout << "l1: " << Eigen::l1CacheSize() << std::endl;
 | 
			
		||||
  std::cout << "l2: " << Eigen::l2CacheSize() << std::endl;
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
using namespace Eigen;
 | 
			
		||||
 | 
			
		||||
void bench_eigengemm(MyMatrix& mc, const MyMatrix& ma, const MyMatrix& mb, int nbloops)
 | 
			
		||||
{
 | 
			
		||||
  for (uint j=0 ; j<nbloops ; ++j)
 | 
			
		||||
      mc.noalias() += ma * mb;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define MYVERIFY(A,M) if (!(A)) { \
 | 
			
		||||
    std::cout << "FAIL: " << M << "\n"; \
 | 
			
		||||
  }
 | 
			
		||||
void check_product(int M, int N, int K)
 | 
			
		||||
{
 | 
			
		||||
  MyMatrix ma(M,K), mb(K,N), mc(M,N), maT(K,M), mbT(N,K), meigen(M,N), mref(M,N);
 | 
			
		||||
  ma = MyMatrix::Random(M,K);
 | 
			
		||||
  mb = MyMatrix::Random(K,N);
 | 
			
		||||
  maT = ma.transpose();
 | 
			
		||||
  mbT = mb.transpose();
 | 
			
		||||
  mc = MyMatrix::Random(M,N);
 | 
			
		||||
 | 
			
		||||
  MyMatrix::Scalar eps = 1e-4;
 | 
			
		||||
 | 
			
		||||
  meigen = mref = mc;
 | 
			
		||||
  CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1, ma.data(), M, mb.data(), K, 1, mref.data(), M);
 | 
			
		||||
  meigen += ma * mb;
 | 
			
		||||
  MYVERIFY(meigen.isApprox(mref, eps),". * .");
 | 
			
		||||
 | 
			
		||||
  meigen = mref = mc;
 | 
			
		||||
  CBLAS_GEMM(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1, maT.data(), K, mb.data(), K, 1, mref.data(), M);
 | 
			
		||||
  meigen += maT.transpose() * mb;
 | 
			
		||||
  MYVERIFY(meigen.isApprox(mref, eps),"T * .");
 | 
			
		||||
 | 
			
		||||
  meigen = mref = mc;
 | 
			
		||||
  CBLAS_GEMM(CblasColMajor, CblasTrans, CblasTrans, M, N, K, 1, maT.data(), K, mbT.data(), N, 1, mref.data(), M);
 | 
			
		||||
  meigen += (maT.transpose()) * (mbT.transpose());
 | 
			
		||||
  MYVERIFY(meigen.isApprox(mref, eps),"T * T");
 | 
			
		||||
 | 
			
		||||
  meigen = mref = mc;
 | 
			
		||||
  CBLAS_GEMM(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1, ma.data(), M, mbT.data(), N, 1, mref.data(), M);
 | 
			
		||||
  meigen += ma * mbT.transpose();
 | 
			
		||||
  MYVERIFY(meigen.isApprox(mref, eps),". * T");
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void check_product(void)
 | 
			
		||||
{
 | 
			
		||||
  int M, N, K;
 | 
			
		||||
  for (uint i=0; i<1000; ++i)
 | 
			
		||||
  {
 | 
			
		||||
    M = internal::random<int>(1,64);
 | 
			
		||||
    N = internal::random<int>(1,768);
 | 
			
		||||
    K = internal::random<int>(1,768);
 | 
			
		||||
    M = (0 + M) * 1;
 | 
			
		||||
    std::cout << M << " x " << N << " x " << K << "\n";
 | 
			
		||||
    check_product(M, N, K);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										142
									
								
								cs440-acg/ext/eigen/bench/benchCholesky.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										142
									
								
								cs440-acg/ext/eigen/bench/benchCholesky.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,142 @@
 | 
			
		||||
 | 
			
		||||
// g++ -DNDEBUG -O3 -I.. benchLLT.cpp  -o benchLLT && ./benchLLT
 | 
			
		||||
// options:
 | 
			
		||||
//  -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3
 | 
			
		||||
//  -DEIGEN_DONT_VECTORIZE
 | 
			
		||||
//  -msse2
 | 
			
		||||
//  -DREPEAT=100
 | 
			
		||||
//  -DTRIES=10
 | 
			
		||||
//  -DSCALAR=double
 | 
			
		||||
 | 
			
		||||
#include <iostream>
 | 
			
		||||
 | 
			
		||||
#include <Eigen/Core>
 | 
			
		||||
#include <Eigen/Cholesky>
 | 
			
		||||
#include <bench/BenchUtil.h>
 | 
			
		||||
using namespace Eigen;
 | 
			
		||||
 | 
			
		||||
#ifndef REPEAT
 | 
			
		||||
#define REPEAT 10000
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef TRIES
 | 
			
		||||
#define TRIES 10
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
typedef float Scalar;
 | 
			
		||||
 | 
			
		||||
template <typename MatrixType>
 | 
			
		||||
__attribute__ ((noinline)) void benchLLT(const MatrixType& m)
 | 
			
		||||
{
 | 
			
		||||
  int rows = m.rows();
 | 
			
		||||
  int cols = m.cols();
 | 
			
		||||
 | 
			
		||||
  double cost = 0;
 | 
			
		||||
  for (int j=0; j<rows; ++j)
 | 
			
		||||
  {
 | 
			
		||||
    int r = std::max(rows - j -1,0);
 | 
			
		||||
    cost += 2*(r*j+r+j);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  int repeats = (REPEAT*1000)/(rows*rows);
 | 
			
		||||
 | 
			
		||||
  typedef typename MatrixType::Scalar Scalar;
 | 
			
		||||
  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
 | 
			
		||||
 | 
			
		||||
  MatrixType a = MatrixType::Random(rows,cols);
 | 
			
		||||
  SquareMatrixType covMat =  a * a.adjoint();
 | 
			
		||||
 | 
			
		||||
  BenchTimer timerNoSqrt, timerSqrt;
 | 
			
		||||
 | 
			
		||||
  Scalar acc = 0;
 | 
			
		||||
  int r = internal::random<int>(0,covMat.rows()-1);
 | 
			
		||||
  int c = internal::random<int>(0,covMat.cols()-1);
 | 
			
		||||
  for (int t=0; t<TRIES; ++t)
 | 
			
		||||
  {
 | 
			
		||||
    timerNoSqrt.start();
 | 
			
		||||
    for (int k=0; k<repeats; ++k)
 | 
			
		||||
    {
 | 
			
		||||
      LDLT<SquareMatrixType> cholnosqrt(covMat);
 | 
			
		||||
      acc += cholnosqrt.matrixL().coeff(r,c);
 | 
			
		||||
    }
 | 
			
		||||
    timerNoSqrt.stop();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  for (int t=0; t<TRIES; ++t)
 | 
			
		||||
  {
 | 
			
		||||
    timerSqrt.start();
 | 
			
		||||
    for (int k=0; k<repeats; ++k)
 | 
			
		||||
    {
 | 
			
		||||
      LLT<SquareMatrixType> chol(covMat);
 | 
			
		||||
      acc += chol.matrixL().coeff(r,c);
 | 
			
		||||
    }
 | 
			
		||||
    timerSqrt.stop();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (MatrixType::RowsAtCompileTime==Dynamic)
 | 
			
		||||
    std::cout << "dyn   ";
 | 
			
		||||
  else
 | 
			
		||||
    std::cout << "fixed ";
 | 
			
		||||
  std::cout << covMat.rows() << " \t"
 | 
			
		||||
            << (timerNoSqrt.best()) / repeats << "s "
 | 
			
		||||
            << "(" << 1e-9 * cost*repeats/timerNoSqrt.best() << " GFLOPS)\t"
 | 
			
		||||
            << (timerSqrt.best()) / repeats << "s "
 | 
			
		||||
            << "(" << 1e-9 * cost*repeats/timerSqrt.best() << " GFLOPS)\n";
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  #ifdef BENCH_GSL
 | 
			
		||||
  if (MatrixType::RowsAtCompileTime==Dynamic)
 | 
			
		||||
  {
 | 
			
		||||
    timerSqrt.reset();
 | 
			
		||||
 | 
			
		||||
    gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(),covMat.cols());
 | 
			
		||||
    gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(),covMat.cols());
 | 
			
		||||
 | 
			
		||||
    eiToGsl(covMat, &gslCovMat);
 | 
			
		||||
    for (int t=0; t<TRIES; ++t)
 | 
			
		||||
    {
 | 
			
		||||
      timerSqrt.start();
 | 
			
		||||
      for (int k=0; k<repeats; ++k)
 | 
			
		||||
      {
 | 
			
		||||
        gsl_matrix_memcpy(gslCopy,gslCovMat);
 | 
			
		||||
        gsl_linalg_cholesky_decomp(gslCopy);
 | 
			
		||||
        acc += gsl_matrix_get(gslCopy,r,c);
 | 
			
		||||
      }
 | 
			
		||||
      timerSqrt.stop();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::cout << " | \t"
 | 
			
		||||
              << timerSqrt.value() * REPEAT / repeats << "s";
 | 
			
		||||
 | 
			
		||||
    gsl_matrix_free(gslCovMat);
 | 
			
		||||
  }
 | 
			
		||||
  #endif
 | 
			
		||||
  std::cout << "\n";
 | 
			
		||||
  // make sure the compiler does not optimize too much
 | 
			
		||||
  if (acc==123)
 | 
			
		||||
    std::cout << acc;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int main(int argc, char* argv[])
 | 
			
		||||
{
 | 
			
		||||
  const int dynsizes[] = {4,6,8,16,24,32,49,64,128,256,512,900,1500,0};
 | 
			
		||||
  std::cout << "size            LDLT                            LLT";
 | 
			
		||||
//   #ifdef BENCH_GSL
 | 
			
		||||
//   std::cout << "       GSL (standard + double + ATLAS)  ";
 | 
			
		||||
//   #endif
 | 
			
		||||
  std::cout << "\n";
 | 
			
		||||
  for (int i=0; dynsizes[i]>0; ++i)
 | 
			
		||||
    benchLLT(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
 | 
			
		||||
 | 
			
		||||
  benchLLT(Matrix<Scalar,2,2>());
 | 
			
		||||
  benchLLT(Matrix<Scalar,3,3>());
 | 
			
		||||
  benchLLT(Matrix<Scalar,4,4>());
 | 
			
		||||
  benchLLT(Matrix<Scalar,5,5>());
 | 
			
		||||
  benchLLT(Matrix<Scalar,6,6>());
 | 
			
		||||
  benchLLT(Matrix<Scalar,7,7>());
 | 
			
		||||
  benchLLT(Matrix<Scalar,8,8>());
 | 
			
		||||
  benchLLT(Matrix<Scalar,12,12>());
 | 
			
		||||
  benchLLT(Matrix<Scalar,16,16>());
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										212
									
								
								cs440-acg/ext/eigen/bench/benchEigenSolver.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										212
									
								
								cs440-acg/ext/eigen/bench/benchEigenSolver.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,212 @@
 | 
			
		||||
 | 
			
		||||
// g++ -DNDEBUG -O3 -I.. benchEigenSolver.cpp  -o benchEigenSolver && ./benchEigenSolver
 | 
			
		||||
// options:
 | 
			
		||||
//  -DBENCH_GMM
 | 
			
		||||
//  -DBENCH_GSL -lgsl /usr/lib/libcblas.so.3
 | 
			
		||||
//  -DEIGEN_DONT_VECTORIZE
 | 
			
		||||
//  -msse2
 | 
			
		||||
//  -DREPEAT=100
 | 
			
		||||
//  -DTRIES=10
 | 
			
		||||
//  -DSCALAR=double
 | 
			
		||||
 | 
			
		||||
#include <iostream>
 | 
			
		||||
 | 
			
		||||
#include <Eigen/Core>
 | 
			
		||||
#include <Eigen/QR>
 | 
			
		||||
#include <bench/BenchUtil.h>
 | 
			
		||||
using namespace Eigen;
 | 
			
		||||
 | 
			
		||||
#ifndef REPEAT
 | 
			
		||||
#define REPEAT 1000
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef TRIES
 | 
			
		||||
#define TRIES 4
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef SCALAR
 | 
			
		||||
#define SCALAR float
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
typedef SCALAR Scalar;
 | 
			
		||||
 | 
			
		||||
template <typename MatrixType>
 | 
			
		||||
__attribute__ ((noinline)) void benchEigenSolver(const MatrixType& m)
 | 
			
		||||
{
 | 
			
		||||
  int rows = m.rows();
 | 
			
		||||
  int cols = m.cols();
 | 
			
		||||
 | 
			
		||||
  int stdRepeats = std::max(1,int((REPEAT*1000)/(rows*rows*sqrt(rows))));
 | 
			
		||||
  int saRepeats = stdRepeats * 4;
 | 
			
		||||
 | 
			
		||||
  typedef typename MatrixType::Scalar Scalar;
 | 
			
		||||
  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
 | 
			
		||||
 | 
			
		||||
  MatrixType a = MatrixType::Random(rows,cols);
 | 
			
		||||
  SquareMatrixType covMat =  a * a.adjoint();
 | 
			
		||||
 | 
			
		||||
  BenchTimer timerSa, timerStd;
 | 
			
		||||
 | 
			
		||||
  Scalar acc = 0;
 | 
			
		||||
  int r = internal::random<int>(0,covMat.rows()-1);
 | 
			
		||||
  int c = internal::random<int>(0,covMat.cols()-1);
 | 
			
		||||
  {
 | 
			
		||||
    SelfAdjointEigenSolver<SquareMatrixType> ei(covMat);
 | 
			
		||||
    for (int t=0; t<TRIES; ++t)
 | 
			
		||||
    {
 | 
			
		||||
      timerSa.start();
 | 
			
		||||
      for (int k=0; k<saRepeats; ++k)
 | 
			
		||||
      {
 | 
			
		||||
        ei.compute(covMat);
 | 
			
		||||
        acc += ei.eigenvectors().coeff(r,c);
 | 
			
		||||
      }
 | 
			
		||||
      timerSa.stop();
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  {
 | 
			
		||||
    EigenSolver<SquareMatrixType> ei(covMat);
 | 
			
		||||
    for (int t=0; t<TRIES; ++t)
 | 
			
		||||
    {
 | 
			
		||||
      timerStd.start();
 | 
			
		||||
      for (int k=0; k<stdRepeats; ++k)
 | 
			
		||||
      {
 | 
			
		||||
        ei.compute(covMat);
 | 
			
		||||
        acc += ei.eigenvectors().coeff(r,c);
 | 
			
		||||
      }
 | 
			
		||||
      timerStd.stop();
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (MatrixType::RowsAtCompileTime==Dynamic)
 | 
			
		||||
    std::cout << "dyn   ";
 | 
			
		||||
  else
 | 
			
		||||
    std::cout << "fixed ";
 | 
			
		||||
  std::cout << covMat.rows() << " \t"
 | 
			
		||||
            << timerSa.value() * REPEAT / saRepeats << "s \t"
 | 
			
		||||
            << timerStd.value() * REPEAT / stdRepeats << "s";
 | 
			
		||||
 | 
			
		||||
  #ifdef BENCH_GMM
 | 
			
		||||
  if (MatrixType::RowsAtCompileTime==Dynamic)
 | 
			
		||||
  {
 | 
			
		||||
    timerSa.reset();
 | 
			
		||||
    timerStd.reset();
 | 
			
		||||
 | 
			
		||||
    gmm::dense_matrix<Scalar> gmmCovMat(covMat.rows(),covMat.cols());
 | 
			
		||||
    gmm::dense_matrix<Scalar> eigvect(covMat.rows(),covMat.cols());
 | 
			
		||||
    std::vector<Scalar> eigval(covMat.rows());
 | 
			
		||||
    eiToGmm(covMat, gmmCovMat);
 | 
			
		||||
    for (int t=0; t<TRIES; ++t)
 | 
			
		||||
    {
 | 
			
		||||
      timerSa.start();
 | 
			
		||||
      for (int k=0; k<saRepeats; ++k)
 | 
			
		||||
      {
 | 
			
		||||
        gmm::symmetric_qr_algorithm(gmmCovMat, eigval, eigvect);
 | 
			
		||||
        acc += eigvect(r,c);
 | 
			
		||||
      }
 | 
			
		||||
      timerSa.stop();
 | 
			
		||||
    }
 | 
			
		||||
    // the non-selfadjoint solver does not compute the eigen vectors
 | 
			
		||||
//     for (int t=0; t<TRIES; ++t)
 | 
			
		||||
//     {
 | 
			
		||||
//       timerStd.start();
 | 
			
		||||
//       for (int k=0; k<stdRepeats; ++k)
 | 
			
		||||
//       {
 | 
			
		||||
//         gmm::implicit_qr_algorithm(gmmCovMat, eigval, eigvect);
 | 
			
		||||
//         acc += eigvect(r,c);
 | 
			
		||||
//       }
 | 
			
		||||
//       timerStd.stop();
 | 
			
		||||
//     }
 | 
			
		||||
 | 
			
		||||
    std::cout << " | \t"
 | 
			
		||||
              << timerSa.value() * REPEAT / saRepeats << "s"
 | 
			
		||||
              << /*timerStd.value() * REPEAT / stdRepeats << "s"*/ "   na   ";
 | 
			
		||||
  }
 | 
			
		||||
  #endif
 | 
			
		||||
 | 
			
		||||
  #ifdef BENCH_GSL
 | 
			
		||||
  if (MatrixType::RowsAtCompileTime==Dynamic)
 | 
			
		||||
  {
 | 
			
		||||
    timerSa.reset();
 | 
			
		||||
    timerStd.reset();
 | 
			
		||||
 | 
			
		||||
    gsl_matrix* gslCovMat = gsl_matrix_alloc(covMat.rows(),covMat.cols());
 | 
			
		||||
    gsl_matrix* gslCopy = gsl_matrix_alloc(covMat.rows(),covMat.cols());
 | 
			
		||||
    gsl_matrix* eigvect = gsl_matrix_alloc(covMat.rows(),covMat.cols());
 | 
			
		||||
    gsl_vector* eigval  = gsl_vector_alloc(covMat.rows());
 | 
			
		||||
    gsl_eigen_symmv_workspace* eisymm = gsl_eigen_symmv_alloc(covMat.rows());
 | 
			
		||||
    
 | 
			
		||||
    gsl_matrix_complex* eigvectz = gsl_matrix_complex_alloc(covMat.rows(),covMat.cols());
 | 
			
		||||
    gsl_vector_complex* eigvalz  = gsl_vector_complex_alloc(covMat.rows());
 | 
			
		||||
    gsl_eigen_nonsymmv_workspace* einonsymm = gsl_eigen_nonsymmv_alloc(covMat.rows());
 | 
			
		||||
    
 | 
			
		||||
    eiToGsl(covMat, &gslCovMat);
 | 
			
		||||
    for (int t=0; t<TRIES; ++t)
 | 
			
		||||
    {
 | 
			
		||||
      timerSa.start();
 | 
			
		||||
      for (int k=0; k<saRepeats; ++k)
 | 
			
		||||
      {
 | 
			
		||||
        gsl_matrix_memcpy(gslCopy,gslCovMat);
 | 
			
		||||
        gsl_eigen_symmv(gslCopy, eigval, eigvect, eisymm);
 | 
			
		||||
        acc += gsl_matrix_get(eigvect,r,c);
 | 
			
		||||
      }
 | 
			
		||||
      timerSa.stop();
 | 
			
		||||
    }
 | 
			
		||||
    for (int t=0; t<TRIES; ++t)
 | 
			
		||||
    {
 | 
			
		||||
      timerStd.start();
 | 
			
		||||
      for (int k=0; k<stdRepeats; ++k)
 | 
			
		||||
      {
 | 
			
		||||
        gsl_matrix_memcpy(gslCopy,gslCovMat);
 | 
			
		||||
        gsl_eigen_nonsymmv(gslCopy, eigvalz, eigvectz, einonsymm);
 | 
			
		||||
        acc += GSL_REAL(gsl_matrix_complex_get(eigvectz,r,c));
 | 
			
		||||
      }
 | 
			
		||||
      timerStd.stop();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::cout << " | \t"
 | 
			
		||||
              << timerSa.value() * REPEAT / saRepeats << "s \t"
 | 
			
		||||
              << timerStd.value() * REPEAT / stdRepeats << "s";
 | 
			
		||||
 | 
			
		||||
    gsl_matrix_free(gslCovMat);
 | 
			
		||||
    gsl_vector_free(gslCopy);
 | 
			
		||||
    gsl_matrix_free(eigvect);
 | 
			
		||||
    gsl_vector_free(eigval);
 | 
			
		||||
    gsl_matrix_complex_free(eigvectz);
 | 
			
		||||
    gsl_vector_complex_free(eigvalz);
 | 
			
		||||
    gsl_eigen_symmv_free(eisymm);
 | 
			
		||||
    gsl_eigen_nonsymmv_free(einonsymm);
 | 
			
		||||
  }
 | 
			
		||||
  #endif
 | 
			
		||||
 | 
			
		||||
  std::cout << "\n";
 | 
			
		||||
  
 | 
			
		||||
  // make sure the compiler does not optimize too much
 | 
			
		||||
  if (acc==123)
 | 
			
		||||
    std::cout << acc;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int main(int argc, char* argv[])
 | 
			
		||||
{
 | 
			
		||||
  const int dynsizes[] = {4,6,8,12,16,24,32,64,128,256,512,0};
 | 
			
		||||
  std::cout << "size            selfadjoint       generic";
 | 
			
		||||
  #ifdef BENCH_GMM
 | 
			
		||||
  std::cout << "        GMM++          ";
 | 
			
		||||
  #endif
 | 
			
		||||
  #ifdef BENCH_GSL
 | 
			
		||||
  std::cout << "       GSL (double + ATLAS)  ";
 | 
			
		||||
  #endif
 | 
			
		||||
  std::cout << "\n";
 | 
			
		||||
  for (uint i=0; dynsizes[i]>0; ++i)
 | 
			
		||||
    benchEigenSolver(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
 | 
			
		||||
 | 
			
		||||
  benchEigenSolver(Matrix<Scalar,2,2>());
 | 
			
		||||
  benchEigenSolver(Matrix<Scalar,3,3>());
 | 
			
		||||
  benchEigenSolver(Matrix<Scalar,4,4>());
 | 
			
		||||
  benchEigenSolver(Matrix<Scalar,6,6>());
 | 
			
		||||
  benchEigenSolver(Matrix<Scalar,8,8>());
 | 
			
		||||
  benchEigenSolver(Matrix<Scalar,12,12>());
 | 
			
		||||
  benchEigenSolver(Matrix<Scalar,16,16>());
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										115
									
								
								cs440-acg/ext/eigen/bench/benchFFT.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										115
									
								
								cs440-acg/ext/eigen/bench/benchFFT.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,115 @@
 | 
			
		||||
// This file is part of Eigen, a lightweight C++ template library
 | 
			
		||||
// for linear algebra.
 | 
			
		||||
//
 | 
			
		||||
// Copyright (C) 2009 Mark Borgerding mark a borgerding net
 | 
			
		||||
//
 | 
			
		||||
// This Source Code Form is subject to the terms of the Mozilla
 | 
			
		||||
// Public License v. 2.0. If a copy of the MPL was not distributed
 | 
			
		||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 | 
			
		||||
 | 
			
		||||
#include <iostream>
 | 
			
		||||
 | 
			
		||||
#include <bench/BenchUtil.h>
 | 
			
		||||
#include <complex>
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <Eigen/Core>
 | 
			
		||||
 | 
			
		||||
#include <unsupported/Eigen/FFT>
 | 
			
		||||
 | 
			
		||||
using namespace Eigen;
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <typename T>
 | 
			
		||||
string nameof();
 | 
			
		||||
 | 
			
		||||
template <> string nameof<float>() {return "float";}
 | 
			
		||||
template <> string nameof<double>() {return "double";}
 | 
			
		||||
template <> string nameof<long double>() {return "long double";}
 | 
			
		||||
 | 
			
		||||
#ifndef TYPE
 | 
			
		||||
#define TYPE float
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef NFFT
 | 
			
		||||
#define NFFT 1024
 | 
			
		||||
#endif
 | 
			
		||||
#ifndef NDATA
 | 
			
		||||
#define NDATA 1000000
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
using namespace Eigen;
 | 
			
		||||
 | 
			
		||||
template <typename T>
 | 
			
		||||
void bench(int nfft,bool fwd,bool unscaled=false, bool halfspec=false)
 | 
			
		||||
{
 | 
			
		||||
    typedef typename NumTraits<T>::Real Scalar;
 | 
			
		||||
    typedef typename std::complex<Scalar> Complex;
 | 
			
		||||
    int nits = NDATA/nfft;
 | 
			
		||||
    vector<T> inbuf(nfft);
 | 
			
		||||
    vector<Complex > outbuf(nfft);
 | 
			
		||||
    FFT< Scalar > fft;
 | 
			
		||||
 | 
			
		||||
    if (unscaled) {
 | 
			
		||||
        fft.SetFlag(fft.Unscaled);
 | 
			
		||||
        cout << "unscaled ";
 | 
			
		||||
    }
 | 
			
		||||
    if (halfspec) {
 | 
			
		||||
        fft.SetFlag(fft.HalfSpectrum);
 | 
			
		||||
        cout << "halfspec ";
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    std::fill(inbuf.begin(),inbuf.end(),0);
 | 
			
		||||
    fft.fwd( outbuf , inbuf);
 | 
			
		||||
 | 
			
		||||
    BenchTimer timer;
 | 
			
		||||
    timer.reset();
 | 
			
		||||
    for (int k=0;k<8;++k) {
 | 
			
		||||
        timer.start();
 | 
			
		||||
        if (fwd)
 | 
			
		||||
            for(int i = 0; i < nits; i++)
 | 
			
		||||
                fft.fwd( outbuf , inbuf);
 | 
			
		||||
        else
 | 
			
		||||
            for(int i = 0; i < nits; i++)
 | 
			
		||||
                fft.inv(inbuf,outbuf);
 | 
			
		||||
        timer.stop();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    cout << nameof<Scalar>() << " ";
 | 
			
		||||
    double mflops = 5.*nfft*log2((double)nfft) / (1e6 * timer.value() / (double)nits );
 | 
			
		||||
    if ( NumTraits<T>::IsComplex ) {
 | 
			
		||||
        cout << "complex";
 | 
			
		||||
    }else{
 | 
			
		||||
        cout << "real   ";
 | 
			
		||||
        mflops /= 2;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    if (fwd)
 | 
			
		||||
        cout << " fwd";
 | 
			
		||||
    else
 | 
			
		||||
        cout << " inv";
 | 
			
		||||
 | 
			
		||||
    cout << " NFFT=" << nfft << "  " << (double(1e-6*nfft*nits)/timer.value()) << " MS/s  " << mflops << "MFLOPS\n";
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int main(int argc,char ** argv)
 | 
			
		||||
{
 | 
			
		||||
    bench<complex<float> >(NFFT,true);
 | 
			
		||||
    bench<complex<float> >(NFFT,false);
 | 
			
		||||
    bench<float>(NFFT,true);
 | 
			
		||||
    bench<float>(NFFT,false);
 | 
			
		||||
    bench<float>(NFFT,false,true);
 | 
			
		||||
    bench<float>(NFFT,false,true,true);
 | 
			
		||||
 | 
			
		||||
    bench<complex<double> >(NFFT,true);
 | 
			
		||||
    bench<complex<double> >(NFFT,false);
 | 
			
		||||
    bench<double>(NFFT,true);
 | 
			
		||||
    bench<double>(NFFT,false);
 | 
			
		||||
    bench<complex<long double> >(NFFT,true);
 | 
			
		||||
    bench<complex<long double> >(NFFT,false);
 | 
			
		||||
    bench<long double>(NFFT,true);
 | 
			
		||||
    bench<long double>(NFFT,false);
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										134
									
								
								cs440-acg/ext/eigen/bench/benchGeometry.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										134
									
								
								cs440-acg/ext/eigen/bench/benchGeometry.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,134 @@
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <iomanip>
 | 
			
		||||
#include <Eigen/Core>
 | 
			
		||||
#include <Eigen/Geometry>
 | 
			
		||||
#include <bench/BenchTimer.h>
 | 
			
		||||
 | 
			
		||||
using namespace Eigen;
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
#ifndef REPEAT
 | 
			
		||||
#define REPEAT 1000000
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
enum func_opt
 | 
			
		||||
{
 | 
			
		||||
    TV,
 | 
			
		||||
    TMATV,
 | 
			
		||||
    TMATVMAT,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <class res, class arg1, class arg2, int opt>
 | 
			
		||||
struct func;
 | 
			
		||||
 | 
			
		||||
template <class res, class arg1, class arg2>
 | 
			
		||||
struct func<res, arg1, arg2, TV>
 | 
			
		||||
{
 | 
			
		||||
    static EIGEN_DONT_INLINE res run( arg1& a1, arg2& a2 )
 | 
			
		||||
    {
 | 
			
		||||
	asm ("");
 | 
			
		||||
	return a1 * a2;
 | 
			
		||||
    }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class res, class arg1, class arg2>
 | 
			
		||||
struct func<res, arg1, arg2, TMATV>
 | 
			
		||||
{
 | 
			
		||||
    static EIGEN_DONT_INLINE res run( arg1& a1, arg2& a2 )
 | 
			
		||||
    {
 | 
			
		||||
	asm ("");
 | 
			
		||||
	return a1.matrix() * a2;
 | 
			
		||||
    }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class res, class arg1, class arg2>
 | 
			
		||||
struct func<res, arg1, arg2, TMATVMAT>
 | 
			
		||||
{
 | 
			
		||||
    static EIGEN_DONT_INLINE res run( arg1& a1, arg2& a2 )
 | 
			
		||||
    {
 | 
			
		||||
	asm ("");
 | 
			
		||||
	return res(a1.matrix() * a2.matrix());
 | 
			
		||||
    }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template <class func, class arg1, class arg2>
 | 
			
		||||
struct test_transform
 | 
			
		||||
{
 | 
			
		||||
    static void run()
 | 
			
		||||
    {
 | 
			
		||||
	arg1 a1;
 | 
			
		||||
	a1.setIdentity();
 | 
			
		||||
	arg2 a2;
 | 
			
		||||
	a2.setIdentity();
 | 
			
		||||
 | 
			
		||||
	BenchTimer timer;
 | 
			
		||||
	timer.reset();
 | 
			
		||||
	for (int k=0; k<10; ++k)
 | 
			
		||||
	{
 | 
			
		||||
	    timer.start();
 | 
			
		||||
	    for (int k=0; k<REPEAT; ++k)
 | 
			
		||||
		a2 = func::run( a1, a2 );
 | 
			
		||||
	    timer.stop();
 | 
			
		||||
	}
 | 
			
		||||
	cout << setprecision(4) << fixed << timer.value() << "s  " << endl;;
 | 
			
		||||
    }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define run_vec( op, scalar, mode, option, vsize ) \
 | 
			
		||||
    std::cout << #scalar << "\t " << #mode << "\t " << #option << " " << #vsize " "; \
 | 
			
		||||
    {\
 | 
			
		||||
	typedef Transform<scalar, 3, mode, option> Trans;\
 | 
			
		||||
	typedef Matrix<scalar, vsize, 1, option> Vec;\
 | 
			
		||||
	typedef func<Vec,Trans,Vec,op> Func;\
 | 
			
		||||
	test_transform< Func, Trans, Vec >::run();\
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
#define run_trans( op, scalar, mode, option ) \
 | 
			
		||||
    std::cout << #scalar << "\t " << #mode << "\t " << #option << "   "; \
 | 
			
		||||
    {\
 | 
			
		||||
	typedef Transform<scalar, 3, mode, option> Trans;\
 | 
			
		||||
	typedef func<Trans,Trans,Trans,op> Func;\
 | 
			
		||||
	test_transform< Func, Trans, Trans >::run();\
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
int main(int argc, char* argv[])
 | 
			
		||||
{
 | 
			
		||||
    cout << "vec = trans * vec" << endl;
 | 
			
		||||
    run_vec(TV, float,  Isometry, AutoAlign, 3);
 | 
			
		||||
    run_vec(TV, float,  Isometry, DontAlign, 3);
 | 
			
		||||
    run_vec(TV, float,  Isometry, AutoAlign, 4);
 | 
			
		||||
    run_vec(TV, float,  Isometry, DontAlign, 4);
 | 
			
		||||
    run_vec(TV, float,  Projective, AutoAlign, 4);
 | 
			
		||||
    run_vec(TV, float,  Projective, DontAlign, 4);
 | 
			
		||||
    run_vec(TV, double, Isometry, AutoAlign, 3);
 | 
			
		||||
    run_vec(TV, double, Isometry, DontAlign, 3);
 | 
			
		||||
    run_vec(TV, double, Isometry, AutoAlign, 4);
 | 
			
		||||
    run_vec(TV, double, Isometry, DontAlign, 4);
 | 
			
		||||
    run_vec(TV, double, Projective, AutoAlign, 4);
 | 
			
		||||
    run_vec(TV, double, Projective, DontAlign, 4);
 | 
			
		||||
 | 
			
		||||
    cout << "vec = trans.matrix() * vec" << endl;
 | 
			
		||||
    run_vec(TMATV, float,  Isometry, AutoAlign, 4);
 | 
			
		||||
    run_vec(TMATV, float,  Isometry, DontAlign, 4);
 | 
			
		||||
    run_vec(TMATV, double, Isometry, AutoAlign, 4);
 | 
			
		||||
    run_vec(TMATV, double, Isometry, DontAlign, 4);
 | 
			
		||||
 | 
			
		||||
    cout << "trans = trans1 * trans" << endl;
 | 
			
		||||
    run_trans(TV, float,  Isometry, AutoAlign);
 | 
			
		||||
    run_trans(TV, float,  Isometry, DontAlign);
 | 
			
		||||
    run_trans(TV, double, Isometry, AutoAlign);
 | 
			
		||||
    run_trans(TV, double, Isometry, DontAlign);
 | 
			
		||||
    run_trans(TV, float,  Projective, AutoAlign);
 | 
			
		||||
    run_trans(TV, float,  Projective, DontAlign);
 | 
			
		||||
    run_trans(TV, double, Projective, AutoAlign);
 | 
			
		||||
    run_trans(TV, double, Projective, DontAlign);
 | 
			
		||||
 | 
			
		||||
    cout << "trans = trans1.matrix() * trans.matrix()" << endl;
 | 
			
		||||
    run_trans(TMATVMAT, float,  Isometry, AutoAlign);
 | 
			
		||||
    run_trans(TMATVMAT, float,  Isometry, DontAlign);
 | 
			
		||||
    run_trans(TMATVMAT, double, Isometry, AutoAlign);
 | 
			
		||||
    run_trans(TMATVMAT, double, Isometry, DontAlign);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										135
									
								
								cs440-acg/ext/eigen/bench/benchVecAdd.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										135
									
								
								cs440-acg/ext/eigen/bench/benchVecAdd.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,135 @@
 | 
			
		||||
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <Eigen/Core>
 | 
			
		||||
#include <bench/BenchTimer.h>
 | 
			
		||||
using namespace Eigen;
 | 
			
		||||
 | 
			
		||||
#ifndef SIZE
 | 
			
		||||
#define SIZE 50
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef REPEAT
 | 
			
		||||
#define REPEAT 10000
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
typedef float Scalar;
 | 
			
		||||
 | 
			
		||||
__attribute__ ((noinline)) void benchVec(Scalar* a, Scalar* b, Scalar* c, int size);
 | 
			
		||||
__attribute__ ((noinline)) void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c);
 | 
			
		||||
__attribute__ ((noinline)) void benchVec(VectorXf& a, VectorXf& b, VectorXf& c);
 | 
			
		||||
 | 
			
		||||
int main(int argc, char* argv[])
 | 
			
		||||
{
 | 
			
		||||
    int size = SIZE * 8;
 | 
			
		||||
    int size2 = size * size;
 | 
			
		||||
    Scalar* a = internal::aligned_new<Scalar>(size2);
 | 
			
		||||
    Scalar* b = internal::aligned_new<Scalar>(size2+4)+1;
 | 
			
		||||
    Scalar* c = internal::aligned_new<Scalar>(size2); 
 | 
			
		||||
    
 | 
			
		||||
    for (int i=0; i<size; ++i)
 | 
			
		||||
    {
 | 
			
		||||
        a[i] = b[i] = c[i] = 0;
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    BenchTimer timer;
 | 
			
		||||
    
 | 
			
		||||
    timer.reset();
 | 
			
		||||
    for (int k=0; k<10; ++k)
 | 
			
		||||
    {
 | 
			
		||||
        timer.start();
 | 
			
		||||
        benchVec(a, b, c, size2);
 | 
			
		||||
        timer.stop();
 | 
			
		||||
    }
 | 
			
		||||
    std::cout << timer.value() << "s  " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
 | 
			
		||||
    return 0;
 | 
			
		||||
    for (int innersize = size; innersize>2 ; --innersize)
 | 
			
		||||
    {
 | 
			
		||||
        if (size2%innersize==0)
 | 
			
		||||
        {
 | 
			
		||||
            int outersize = size2/innersize;
 | 
			
		||||
            MatrixXf ma = Map<MatrixXf>(a, innersize, outersize );
 | 
			
		||||
            MatrixXf mb = Map<MatrixXf>(b, innersize, outersize );
 | 
			
		||||
            MatrixXf mc = Map<MatrixXf>(c, innersize, outersize );
 | 
			
		||||
            timer.reset();
 | 
			
		||||
            for (int k=0; k<3; ++k)
 | 
			
		||||
            {
 | 
			
		||||
                timer.start();
 | 
			
		||||
                benchVec(ma, mb, mc);
 | 
			
		||||
                timer.stop();
 | 
			
		||||
            }
 | 
			
		||||
            std::cout << innersize << " x " << outersize << "  " << timer.value() << "s   " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    VectorXf va = Map<VectorXf>(a, size2);
 | 
			
		||||
    VectorXf vb = Map<VectorXf>(b, size2);
 | 
			
		||||
    VectorXf vc = Map<VectorXf>(c, size2);
 | 
			
		||||
    timer.reset();
 | 
			
		||||
    for (int k=0; k<3; ++k)
 | 
			
		||||
    {
 | 
			
		||||
        timer.start();
 | 
			
		||||
        benchVec(va, vb, vc);
 | 
			
		||||
        timer.stop();
 | 
			
		||||
    }
 | 
			
		||||
    std::cout << timer.value() << "s   " << (double(size2*REPEAT)/timer.value())/(1024.*1024.*1024.) << " GFlops\n";
 | 
			
		||||
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void benchVec(MatrixXf& a, MatrixXf& b, MatrixXf& c)
 | 
			
		||||
{
 | 
			
		||||
    for (int k=0; k<REPEAT; ++k)
 | 
			
		||||
        a = a + b;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void benchVec(VectorXf& a, VectorXf& b, VectorXf& c)
 | 
			
		||||
{
 | 
			
		||||
    for (int k=0; k<REPEAT; ++k)
 | 
			
		||||
        a = a + b;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void benchVec(Scalar* a, Scalar* b, Scalar* c, int size)
 | 
			
		||||
{
 | 
			
		||||
    typedef internal::packet_traits<Scalar>::type PacketScalar;
 | 
			
		||||
    const int PacketSize = internal::packet_traits<Scalar>::size;
 | 
			
		||||
    PacketScalar a0, a1, a2, a3, b0, b1, b2, b3;
 | 
			
		||||
    for (int k=0; k<REPEAT; ++k)
 | 
			
		||||
        for (int i=0; i<size; i+=PacketSize*8)
 | 
			
		||||
        {
 | 
			
		||||
//             a0 = internal::pload(&a[i]);
 | 
			
		||||
//             b0 = internal::pload(&b[i]);
 | 
			
		||||
//             a1 = internal::pload(&a[i+1*PacketSize]);
 | 
			
		||||
//             b1 = internal::pload(&b[i+1*PacketSize]);
 | 
			
		||||
//             a2 = internal::pload(&a[i+2*PacketSize]);
 | 
			
		||||
//             b2 = internal::pload(&b[i+2*PacketSize]);
 | 
			
		||||
//             a3 = internal::pload(&a[i+3*PacketSize]);
 | 
			
		||||
//             b3 = internal::pload(&b[i+3*PacketSize]);
 | 
			
		||||
//             internal::pstore(&a[i], internal::padd(a0, b0));
 | 
			
		||||
//             a0 = internal::pload(&a[i+4*PacketSize]);
 | 
			
		||||
//             b0 = internal::pload(&b[i+4*PacketSize]);
 | 
			
		||||
//             
 | 
			
		||||
//             internal::pstore(&a[i+1*PacketSize], internal::padd(a1, b1));
 | 
			
		||||
//             a1 = internal::pload(&a[i+5*PacketSize]);
 | 
			
		||||
//             b1 = internal::pload(&b[i+5*PacketSize]);
 | 
			
		||||
//             
 | 
			
		||||
//             internal::pstore(&a[i+2*PacketSize], internal::padd(a2, b2));
 | 
			
		||||
//             a2 = internal::pload(&a[i+6*PacketSize]);
 | 
			
		||||
//             b2 = internal::pload(&b[i+6*PacketSize]);
 | 
			
		||||
//             
 | 
			
		||||
//             internal::pstore(&a[i+3*PacketSize], internal::padd(a3, b3));
 | 
			
		||||
//             a3 = internal::pload(&a[i+7*PacketSize]);
 | 
			
		||||
//             b3 = internal::pload(&b[i+7*PacketSize]);
 | 
			
		||||
//             
 | 
			
		||||
//             internal::pstore(&a[i+4*PacketSize], internal::padd(a0, b0));
 | 
			
		||||
//             internal::pstore(&a[i+5*PacketSize], internal::padd(a1, b1));
 | 
			
		||||
//             internal::pstore(&a[i+6*PacketSize], internal::padd(a2, b2));
 | 
			
		||||
//             internal::pstore(&a[i+7*PacketSize], internal::padd(a3, b3));
 | 
			
		||||
            
 | 
			
		||||
            internal::pstore(&a[i+2*PacketSize], internal::padd(internal::ploadu(&a[i+2*PacketSize]), internal::ploadu(&b[i+2*PacketSize])));
 | 
			
		||||
            internal::pstore(&a[i+3*PacketSize], internal::padd(internal::ploadu(&a[i+3*PacketSize]), internal::ploadu(&b[i+3*PacketSize])));
 | 
			
		||||
            internal::pstore(&a[i+4*PacketSize], internal::padd(internal::ploadu(&a[i+4*PacketSize]), internal::ploadu(&b[i+4*PacketSize])));
 | 
			
		||||
            internal::pstore(&a[i+5*PacketSize], internal::padd(internal::ploadu(&a[i+5*PacketSize]), internal::ploadu(&b[i+5*PacketSize])));
 | 
			
		||||
            internal::pstore(&a[i+6*PacketSize], internal::padd(internal::ploadu(&a[i+6*PacketSize]), internal::ploadu(&b[i+6*PacketSize])));
 | 
			
		||||
            internal::pstore(&a[i+7*PacketSize], internal::padd(internal::ploadu(&a[i+7*PacketSize]), internal::ploadu(&b[i+7*PacketSize])));
 | 
			
		||||
        }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										341
									
								
								cs440-acg/ext/eigen/bench/bench_gemm.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										341
									
								
								cs440-acg/ext/eigen/bench/bench_gemm.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,341 @@
 | 
			
		||||
 | 
			
		||||
// g++-4.4 bench_gemm.cpp -I .. -O2 -DNDEBUG -lrt -fopenmp && OMP_NUM_THREADS=2  ./a.out
 | 
			
		||||
// icpc bench_gemm.cpp -I .. -O3 -DNDEBUG -lrt -openmp  && OMP_NUM_THREADS=2  ./a.out
 | 
			
		||||
 | 
			
		||||
// Compilation options:
 | 
			
		||||
// 
 | 
			
		||||
// -DSCALAR=std::complex<double>
 | 
			
		||||
// -DSCALARA=double or -DSCALARB=double
 | 
			
		||||
// -DHAVE_BLAS
 | 
			
		||||
// -DDECOUPLED
 | 
			
		||||
//
 | 
			
		||||
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <Eigen/Core>
 | 
			
		||||
#include <bench/BenchTimer.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Eigen;
 | 
			
		||||
 | 
			
		||||
#ifndef SCALAR
 | 
			
		||||
// #define SCALAR std::complex<float>
 | 
			
		||||
#define SCALAR float
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef SCALARA
 | 
			
		||||
#define SCALARA SCALAR
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef SCALARB
 | 
			
		||||
#define SCALARB SCALAR
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
typedef SCALAR Scalar;
 | 
			
		||||
typedef NumTraits<Scalar>::Real RealScalar;
 | 
			
		||||
typedef Matrix<SCALARA,Dynamic,Dynamic> A;
 | 
			
		||||
typedef Matrix<SCALARB,Dynamic,Dynamic> B;
 | 
			
		||||
typedef Matrix<Scalar,Dynamic,Dynamic> C;
 | 
			
		||||
typedef Matrix<RealScalar,Dynamic,Dynamic> M;
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_BLAS
 | 
			
		||||
 | 
			
		||||
extern "C" {
 | 
			
		||||
  #include <Eigen/src/misc/blas.h>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static float fone = 1;
 | 
			
		||||
static float fzero = 0;
 | 
			
		||||
static double done = 1;
 | 
			
		||||
static double szero = 0;
 | 
			
		||||
static std::complex<float> cfone = 1;
 | 
			
		||||
static std::complex<float> cfzero = 0;
 | 
			
		||||
static std::complex<double> cdone = 1;
 | 
			
		||||
static std::complex<double> cdzero = 0;
 | 
			
		||||
static char notrans = 'N';
 | 
			
		||||
static char trans = 'T';  
 | 
			
		||||
static char nonunit = 'N';
 | 
			
		||||
static char lower = 'L';
 | 
			
		||||
static char right = 'R';
 | 
			
		||||
static int intone = 1;
 | 
			
		||||
 | 
			
		||||
void blas_gemm(const MatrixXf& a, const MatrixXf& b, MatrixXf& c)
 | 
			
		||||
{
 | 
			
		||||
  int M = c.rows(); int N = c.cols(); int K = a.cols();
 | 
			
		||||
  int lda = a.rows(); int ldb = b.rows(); int ldc = c.rows();
 | 
			
		||||
 | 
			
		||||
  sgemm_(¬rans,¬rans,&M,&N,&K,&fone,
 | 
			
		||||
         const_cast<float*>(a.data()),&lda,
 | 
			
		||||
         const_cast<float*>(b.data()),&ldb,&fone,
 | 
			
		||||
         c.data(),&ldc);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
EIGEN_DONT_INLINE void blas_gemm(const MatrixXd& a, const MatrixXd& b, MatrixXd& c)
 | 
			
		||||
{
 | 
			
		||||
  int M = c.rows(); int N = c.cols(); int K = a.cols();
 | 
			
		||||
  int lda = a.rows(); int ldb = b.rows(); int ldc = c.rows();
 | 
			
		||||
 | 
			
		||||
  dgemm_(¬rans,¬rans,&M,&N,&K,&done,
 | 
			
		||||
         const_cast<double*>(a.data()),&lda,
 | 
			
		||||
         const_cast<double*>(b.data()),&ldb,&done,
 | 
			
		||||
         c.data(),&ldc);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void blas_gemm(const MatrixXcf& a, const MatrixXcf& b, MatrixXcf& c)
 | 
			
		||||
{
 | 
			
		||||
  int M = c.rows(); int N = c.cols(); int K = a.cols();
 | 
			
		||||
  int lda = a.rows(); int ldb = b.rows(); int ldc = c.rows();
 | 
			
		||||
 | 
			
		||||
  cgemm_(¬rans,¬rans,&M,&N,&K,(float*)&cfone,
 | 
			
		||||
         const_cast<float*>((const float*)a.data()),&lda,
 | 
			
		||||
         const_cast<float*>((const float*)b.data()),&ldb,(float*)&cfone,
 | 
			
		||||
         (float*)c.data(),&ldc);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void blas_gemm(const MatrixXcd& a, const MatrixXcd& b, MatrixXcd& c)
 | 
			
		||||
{
 | 
			
		||||
  int M = c.rows(); int N = c.cols(); int K = a.cols();
 | 
			
		||||
  int lda = a.rows(); int ldb = b.rows(); int ldc = c.rows();
 | 
			
		||||
 | 
			
		||||
  zgemm_(¬rans,¬rans,&M,&N,&K,(double*)&cdone,
 | 
			
		||||
         const_cast<double*>((const double*)a.data()),&lda,
 | 
			
		||||
         const_cast<double*>((const double*)b.data()),&ldb,(double*)&cdone,
 | 
			
		||||
         (double*)c.data(),&ldc);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
void matlab_cplx_cplx(const M& ar, const M& ai, const M& br, const M& bi, M& cr, M& ci)
 | 
			
		||||
{
 | 
			
		||||
  cr.noalias() += ar * br;
 | 
			
		||||
  cr.noalias() -= ai * bi;
 | 
			
		||||
  ci.noalias() += ar * bi;
 | 
			
		||||
  ci.noalias() += ai * br;
 | 
			
		||||
  // [cr ci] += [ar ai] * br + [-ai ar] * bi
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void matlab_real_cplx(const M& a, const M& br, const M& bi, M& cr, M& ci)
 | 
			
		||||
{
 | 
			
		||||
  cr.noalias() += a * br;
 | 
			
		||||
  ci.noalias() += a * bi;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void matlab_cplx_real(const M& ar, const M& ai, const M& b, M& cr, M& ci)
 | 
			
		||||
{
 | 
			
		||||
  cr.noalias() += ar * b;
 | 
			
		||||
  ci.noalias() += ai * b;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<typename A, typename B, typename C>
 | 
			
		||||
EIGEN_DONT_INLINE void gemm(const A& a, const B& b, C& c)
 | 
			
		||||
{
 | 
			
		||||
 c.noalias() += a * b;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int main(int argc, char ** argv)
 | 
			
		||||
{
 | 
			
		||||
  std::ptrdiff_t l1 = internal::queryL1CacheSize();
 | 
			
		||||
  std::ptrdiff_t l2 = internal::queryTopLevelCacheSize();
 | 
			
		||||
  std::cout << "L1 cache size     = " << (l1>0 ? l1/1024 : -1) << " KB\n";
 | 
			
		||||
  std::cout << "L2/L3 cache size  = " << (l2>0 ? l2/1024 : -1) << " KB\n";
 | 
			
		||||
  typedef internal::gebp_traits<Scalar,Scalar> Traits;
 | 
			
		||||
  std::cout << "Register blocking = " << Traits::mr << " x " << Traits::nr << "\n";
 | 
			
		||||
 | 
			
		||||
  int rep = 1;    // number of repetitions per try
 | 
			
		||||
  int tries = 2;  // number of tries, we keep the best
 | 
			
		||||
 | 
			
		||||
  int s = 2048;
 | 
			
		||||
  int m = s;
 | 
			
		||||
  int n = s;
 | 
			
		||||
  int p = s;
 | 
			
		||||
  int cache_size1=-1, cache_size2=l2, cache_size3 = 0;
 | 
			
		||||
 | 
			
		||||
  bool need_help = false;
 | 
			
		||||
  for (int i=1; i<argc;)
 | 
			
		||||
  {
 | 
			
		||||
    if(argv[i][0]=='-')
 | 
			
		||||
    {
 | 
			
		||||
      if(argv[i][1]=='s')
 | 
			
		||||
      {
 | 
			
		||||
        ++i;
 | 
			
		||||
        s = atoi(argv[i++]);
 | 
			
		||||
        m = n = p = s;
 | 
			
		||||
        if(argv[i][0]!='-')
 | 
			
		||||
        {
 | 
			
		||||
          n = atoi(argv[i++]);
 | 
			
		||||
          p = atoi(argv[i++]);
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      else if(argv[i][1]=='c')
 | 
			
		||||
      {
 | 
			
		||||
        ++i;
 | 
			
		||||
        cache_size1 = atoi(argv[i++]);
 | 
			
		||||
        if(argv[i][0]!='-')
 | 
			
		||||
        {
 | 
			
		||||
          cache_size2 = atoi(argv[i++]);
 | 
			
		||||
          if(argv[i][0]!='-')
 | 
			
		||||
            cache_size3 = atoi(argv[i++]);
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      else if(argv[i][1]=='t')
 | 
			
		||||
      {
 | 
			
		||||
        ++i;
 | 
			
		||||
        tries = atoi(argv[i++]);
 | 
			
		||||
      }
 | 
			
		||||
      else if(argv[i][1]=='p')
 | 
			
		||||
      {
 | 
			
		||||
        ++i;
 | 
			
		||||
        rep = atoi(argv[i++]);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
    {
 | 
			
		||||
      need_help = true;
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if(need_help)
 | 
			
		||||
  {
 | 
			
		||||
    std::cout << argv[0] << " -s <matrix sizes> -c <cache sizes> -t <nb tries> -p <nb repeats>\n";
 | 
			
		||||
    std::cout << "   <matrix sizes> : size\n";
 | 
			
		||||
    std::cout << "   <matrix sizes> : rows columns depth\n";
 | 
			
		||||
    return 1;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#if EIGEN_VERSION_AT_LEAST(3,2,90)
 | 
			
		||||
  if(cache_size1>0)
 | 
			
		||||
    setCpuCacheSizes(cache_size1,cache_size2,cache_size3);
 | 
			
		||||
#endif
 | 
			
		||||
  
 | 
			
		||||
  A a(m,p); a.setRandom();
 | 
			
		||||
  B b(p,n); b.setRandom();
 | 
			
		||||
  C c(m,n); c.setOnes();
 | 
			
		||||
  C rc = c;
 | 
			
		||||
 | 
			
		||||
  std::cout << "Matrix sizes = " << m << "x" << p << " * " << p << "x" << n << "\n";
 | 
			
		||||
  std::ptrdiff_t mc(m), nc(n), kc(p);
 | 
			
		||||
  internal::computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
 | 
			
		||||
  std::cout << "blocking size (mc x kc) = " << mc << " x " << kc << "\n";
 | 
			
		||||
 | 
			
		||||
  C r = c;
 | 
			
		||||
 | 
			
		||||
  // check the parallel product is correct
 | 
			
		||||
  #if defined EIGEN_HAS_OPENMP
 | 
			
		||||
  Eigen::initParallel();
 | 
			
		||||
  int procs = omp_get_max_threads();
 | 
			
		||||
  if(procs>1)
 | 
			
		||||
  {
 | 
			
		||||
    #ifdef HAVE_BLAS
 | 
			
		||||
    blas_gemm(a,b,r);
 | 
			
		||||
    #else
 | 
			
		||||
    omp_set_num_threads(1);
 | 
			
		||||
    r.noalias() += a * b;
 | 
			
		||||
    omp_set_num_threads(procs);
 | 
			
		||||
    #endif
 | 
			
		||||
    c.noalias() += a * b;
 | 
			
		||||
    if(!r.isApprox(c)) std::cerr << "Warning, your parallel product is crap!\n\n";
 | 
			
		||||
  }
 | 
			
		||||
  #elif defined HAVE_BLAS
 | 
			
		||||
    blas_gemm(a,b,r);
 | 
			
		||||
    c.noalias() += a * b;
 | 
			
		||||
    if(!r.isApprox(c)) {
 | 
			
		||||
      std::cout << (r  - c).norm() << "\n";
 | 
			
		||||
      std::cerr << "Warning, your product is crap!\n\n";
 | 
			
		||||
    }
 | 
			
		||||
  #else
 | 
			
		||||
    if(1.*m*n*p<2000.*2000*2000)
 | 
			
		||||
    {
 | 
			
		||||
      gemm(a,b,c);
 | 
			
		||||
      r.noalias() += a.cast<Scalar>() .lazyProduct( b.cast<Scalar>() );
 | 
			
		||||
      if(!r.isApprox(c)) {
 | 
			
		||||
        std::cout << (r  - c).norm() << "\n";
 | 
			
		||||
        std::cerr << "Warning, your product is crap!\n\n";
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  #endif
 | 
			
		||||
 | 
			
		||||
  #ifdef HAVE_BLAS
 | 
			
		||||
  BenchTimer tblas;
 | 
			
		||||
  c = rc;
 | 
			
		||||
  BENCH(tblas, tries, rep, blas_gemm(a,b,c));
 | 
			
		||||
  std::cout << "blas  cpu         " << tblas.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/tblas.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << tblas.total(CPU_TIMER)  << "s)\n";
 | 
			
		||||
  std::cout << "blas  real        " << tblas.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/tblas.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << tblas.total(REAL_TIMER) << "s)\n";
 | 
			
		||||
  #endif
 | 
			
		||||
 | 
			
		||||
  BenchTimer tmt;
 | 
			
		||||
  c = rc;
 | 
			
		||||
  BENCH(tmt, tries, rep, gemm(a,b,c));
 | 
			
		||||
  std::cout << "eigen cpu         " << tmt.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/tmt.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << tmt.total(CPU_TIMER)  << "s)\n";
 | 
			
		||||
  std::cout << "eigen real        " << tmt.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/tmt.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << tmt.total(REAL_TIMER) << "s)\n";
 | 
			
		||||
 | 
			
		||||
  #ifdef EIGEN_HAS_OPENMP
 | 
			
		||||
  if(procs>1)
 | 
			
		||||
  {
 | 
			
		||||
    BenchTimer tmono;
 | 
			
		||||
    omp_set_num_threads(1);
 | 
			
		||||
    Eigen::setNbThreads(1);
 | 
			
		||||
    c = rc;
 | 
			
		||||
    BENCH(tmono, tries, rep, gemm(a,b,c));
 | 
			
		||||
    std::cout << "eigen mono cpu    " << tmono.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/tmono.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << tmono.total(CPU_TIMER)  << "s)\n";
 | 
			
		||||
    std::cout << "eigen mono real   " << tmono.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/tmono.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << tmono.total(REAL_TIMER) << "s)\n";
 | 
			
		||||
    std::cout << "mt speed up x" << tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER)  << " => " << (100.0*tmono.best(CPU_TIMER) / tmt.best(REAL_TIMER))/procs << "%\n";
 | 
			
		||||
  }
 | 
			
		||||
  #endif
 | 
			
		||||
  
 | 
			
		||||
  if(1.*m*n*p<30*30*30)
 | 
			
		||||
  {
 | 
			
		||||
      BenchTimer tmt;
 | 
			
		||||
      c = rc;
 | 
			
		||||
      BENCH(tmt, tries, rep, c.noalias()+=a.lazyProduct(b));
 | 
			
		||||
      std::cout << "lazy cpu         " << tmt.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/tmt.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << tmt.total(CPU_TIMER)  << "s)\n";
 | 
			
		||||
      std::cout << "lazy real        " << tmt.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/tmt.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << tmt.total(REAL_TIMER) << "s)\n";
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  #ifdef DECOUPLED
 | 
			
		||||
  if((NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex))
 | 
			
		||||
  {
 | 
			
		||||
    M ar(m,p); ar.setRandom();
 | 
			
		||||
    M ai(m,p); ai.setRandom();
 | 
			
		||||
    M br(p,n); br.setRandom();
 | 
			
		||||
    M bi(p,n); bi.setRandom();
 | 
			
		||||
    M cr(m,n); cr.setRandom();
 | 
			
		||||
    M ci(m,n); ci.setRandom();
 | 
			
		||||
    
 | 
			
		||||
    BenchTimer t;
 | 
			
		||||
    BENCH(t, tries, rep, matlab_cplx_cplx(ar,ai,br,bi,cr,ci));
 | 
			
		||||
    std::cout << "\"matlab\" cpu    " << t.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << t.total(CPU_TIMER)  << "s)\n";
 | 
			
		||||
    std::cout << "\"matlab\" real   " << t.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n";
 | 
			
		||||
  }
 | 
			
		||||
  if((!NumTraits<A::Scalar>::IsComplex) && (NumTraits<B::Scalar>::IsComplex))
 | 
			
		||||
  {
 | 
			
		||||
    M a(m,p);  a.setRandom();
 | 
			
		||||
    M br(p,n); br.setRandom();
 | 
			
		||||
    M bi(p,n); bi.setRandom();
 | 
			
		||||
    M cr(m,n); cr.setRandom();
 | 
			
		||||
    M ci(m,n); ci.setRandom();
 | 
			
		||||
    
 | 
			
		||||
    BenchTimer t;
 | 
			
		||||
    BENCH(t, tries, rep, matlab_real_cplx(a,br,bi,cr,ci));
 | 
			
		||||
    std::cout << "\"matlab\" cpu    " << t.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << t.total(CPU_TIMER)  << "s)\n";
 | 
			
		||||
    std::cout << "\"matlab\" real   " << t.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n";
 | 
			
		||||
  }
 | 
			
		||||
  if((NumTraits<A::Scalar>::IsComplex) && (!NumTraits<B::Scalar>::IsComplex))
 | 
			
		||||
  {
 | 
			
		||||
    M ar(m,p); ar.setRandom();
 | 
			
		||||
    M ai(m,p); ai.setRandom();
 | 
			
		||||
    M b(p,n);  b.setRandom();
 | 
			
		||||
    M cr(m,n); cr.setRandom();
 | 
			
		||||
    M ci(m,n); ci.setRandom();
 | 
			
		||||
    
 | 
			
		||||
    BenchTimer t;
 | 
			
		||||
    BENCH(t, tries, rep, matlab_cplx_real(ar,ai,b,cr,ci));
 | 
			
		||||
    std::cout << "\"matlab\" cpu    " << t.best(CPU_TIMER)/rep  << "s  \t" << (double(m)*n*p*rep*2/t.best(CPU_TIMER))*1e-9  <<  " GFLOPS \t(" << t.total(CPU_TIMER)  << "s)\n";
 | 
			
		||||
    std::cout << "\"matlab\" real   " << t.best(REAL_TIMER)/rep << "s  \t" << (double(m)*n*p*rep*2/t.best(REAL_TIMER))*1e-9 <<  " GFLOPS \t(" << t.total(REAL_TIMER) << "s)\n";
 | 
			
		||||
  }
 | 
			
		||||
  #endif
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										28
									
								
								cs440-acg/ext/eigen/bench/bench_multi_compilers.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										28
									
								
								cs440-acg/ext/eigen/bench/bench_multi_compilers.sh
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,28 @@
 | 
			
		||||
#!/bin/bash
 | 
			
		||||
 | 
			
		||||
if (($# < 2)); then
 | 
			
		||||
    echo "Usage: $0 compilerlist.txt benchfile.cpp"
 | 
			
		||||
else
 | 
			
		||||
 | 
			
		||||
compilerlist=$1
 | 
			
		||||
benchfile=$2
 | 
			
		||||
 | 
			
		||||
g=0
 | 
			
		||||
source $compilerlist
 | 
			
		||||
 | 
			
		||||
# for each compiler, compile benchfile and run the benchmark
 | 
			
		||||
for (( i=0 ; i<g ; ++i )) ; do
 | 
			
		||||
  # check the compiler exists
 | 
			
		||||
  compiler=`echo ${CLIST[$i]} | cut -d " " -f 1`
 | 
			
		||||
  if [ -e `which $compiler` ]; then
 | 
			
		||||
    echo "${CLIST[$i]}"
 | 
			
		||||
#     echo "${CLIST[$i]} $benchfile -I.. -o bench~"
 | 
			
		||||
#     if [ -e ./.bench ] ; then rm .bench; fi
 | 
			
		||||
    ${CLIST[$i]} $benchfile -I.. -o .bench && ./.bench 2> /dev/null
 | 
			
		||||
    echo ""
 | 
			
		||||
  else
 | 
			
		||||
    echo "compiler not found: $compiler"
 | 
			
		||||
  fi
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
fi
 | 
			
		||||
							
								
								
									
										360
									
								
								cs440-acg/ext/eigen/bench/bench_norm.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										360
									
								
								cs440-acg/ext/eigen/bench/bench_norm.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,360 @@
 | 
			
		||||
#include <typeinfo>
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <Eigen/Core>
 | 
			
		||||
#include "BenchTimer.h"
 | 
			
		||||
using namespace Eigen;
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
template<typename T>
 | 
			
		||||
EIGEN_DONT_INLINE typename T::Scalar sqsumNorm(T& v)
 | 
			
		||||
{
 | 
			
		||||
  return v.norm();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<typename T>
 | 
			
		||||
EIGEN_DONT_INLINE typename T::Scalar stableNorm(T& v)
 | 
			
		||||
{
 | 
			
		||||
  return v.stableNorm();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<typename T>
 | 
			
		||||
EIGEN_DONT_INLINE typename T::Scalar hypotNorm(T& v)
 | 
			
		||||
{
 | 
			
		||||
  return v.hypotNorm();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<typename T>
 | 
			
		||||
EIGEN_DONT_INLINE typename T::Scalar blueNorm(T& v)
 | 
			
		||||
{
 | 
			
		||||
  return v.blueNorm();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<typename T>
 | 
			
		||||
EIGEN_DONT_INLINE typename T::Scalar lapackNorm(T& v)
 | 
			
		||||
{
 | 
			
		||||
  typedef typename T::Scalar Scalar;
 | 
			
		||||
  int n = v.size();
 | 
			
		||||
  Scalar scale = 0;
 | 
			
		||||
  Scalar ssq = 1;
 | 
			
		||||
  for (int i=0;i<n;++i)
 | 
			
		||||
  {
 | 
			
		||||
    Scalar ax = std::abs(v.coeff(i));
 | 
			
		||||
    if (scale >= ax)
 | 
			
		||||
    {
 | 
			
		||||
      ssq += numext::abs2(ax/scale);
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
    {
 | 
			
		||||
      ssq = Scalar(1) + ssq * numext::abs2(scale/ax);
 | 
			
		||||
      scale = ax;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return scale * std::sqrt(ssq);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<typename T>
 | 
			
		||||
EIGEN_DONT_INLINE typename T::Scalar twopassNorm(T& v)
 | 
			
		||||
{
 | 
			
		||||
  typedef typename T::Scalar Scalar;
 | 
			
		||||
  Scalar s = v.array().abs().maxCoeff();
 | 
			
		||||
  return s*(v/s).norm();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<typename T>
 | 
			
		||||
EIGEN_DONT_INLINE typename T::Scalar bl2passNorm(T& v)
 | 
			
		||||
{
 | 
			
		||||
  return v.stableNorm();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<typename T>
 | 
			
		||||
EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v)
 | 
			
		||||
{
 | 
			
		||||
  int n =v.size() / 2;
 | 
			
		||||
  for (int i=0;i<n;++i)
 | 
			
		||||
    v(i) = v(2*i)*v(2*i) + v(2*i+1)*v(2*i+1);
 | 
			
		||||
  n = n/2;
 | 
			
		||||
  while (n>0)
 | 
			
		||||
  {
 | 
			
		||||
    for (int i=0;i<n;++i)
 | 
			
		||||
      v(i) = v(2*i) + v(2*i+1);
 | 
			
		||||
    n = n/2;
 | 
			
		||||
  }
 | 
			
		||||
  return std::sqrt(v(0));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
namespace Eigen {
 | 
			
		||||
namespace internal {
 | 
			
		||||
#ifdef EIGEN_VECTORIZE
 | 
			
		||||
Packet4f plt(const Packet4f& a, Packet4f& b) { return _mm_cmplt_ps(a,b); }
 | 
			
		||||
Packet2d plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a,b); }
 | 
			
		||||
 | 
			
		||||
Packet4f pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a,b); }
 | 
			
		||||
Packet2d pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a,b); }
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<typename T>
 | 
			
		||||
EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
 | 
			
		||||
{
 | 
			
		||||
  #ifndef EIGEN_VECTORIZE
 | 
			
		||||
  return v.blueNorm();
 | 
			
		||||
  #else
 | 
			
		||||
  typedef typename T::Scalar Scalar;
 | 
			
		||||
 | 
			
		||||
  static int nmax = 0;
 | 
			
		||||
  static Scalar b1, b2, s1m, s2m, overfl, rbig, relerr;
 | 
			
		||||
  int n;
 | 
			
		||||
 | 
			
		||||
  if(nmax <= 0)
 | 
			
		||||
  {
 | 
			
		||||
    int nbig, ibeta, it, iemin, iemax, iexp;
 | 
			
		||||
    Scalar abig, eps;
 | 
			
		||||
 | 
			
		||||
    nbig  = std::numeric_limits<int>::max();            // largest integer
 | 
			
		||||
    ibeta = std::numeric_limits<Scalar>::radix; //NumTraits<Scalar>::Base;                    // base for floating-point numbers
 | 
			
		||||
    it    = std::numeric_limits<Scalar>::digits; //NumTraits<Scalar>::Mantissa;                // number of base-beta digits in mantissa
 | 
			
		||||
    iemin = std::numeric_limits<Scalar>::min_exponent;  // minimum exponent
 | 
			
		||||
    iemax = std::numeric_limits<Scalar>::max_exponent;  // maximum exponent
 | 
			
		||||
    rbig  = std::numeric_limits<Scalar>::max();         // largest floating-point number
 | 
			
		||||
 | 
			
		||||
    // Check the basic machine-dependent constants.
 | 
			
		||||
    if(iemin > 1 - 2*it || 1+it>iemax || (it==2 && ibeta<5)
 | 
			
		||||
      || (it<=4 && ibeta <= 3 ) || it<2)
 | 
			
		||||
    {
 | 
			
		||||
      eigen_assert(false && "the algorithm cannot be guaranteed on this computer");
 | 
			
		||||
    }
 | 
			
		||||
    iexp  = -((1-iemin)/2);
 | 
			
		||||
    b1    = std::pow(ibeta, iexp);  // lower boundary of midrange
 | 
			
		||||
    iexp  = (iemax + 1 - it)/2;
 | 
			
		||||
    b2    = std::pow(ibeta,iexp);   // upper boundary of midrange
 | 
			
		||||
 | 
			
		||||
    iexp  = (2-iemin)/2;
 | 
			
		||||
    s1m   = std::pow(ibeta,iexp);   // scaling factor for lower range
 | 
			
		||||
    iexp  = - ((iemax+it)/2);
 | 
			
		||||
    s2m   = std::pow(ibeta,iexp);   // scaling factor for upper range
 | 
			
		||||
 | 
			
		||||
    overfl  = rbig*s2m;          // overfow boundary for abig
 | 
			
		||||
    eps     = std::pow(ibeta, 1-it);
 | 
			
		||||
    relerr  = std::sqrt(eps);      // tolerance for neglecting asml
 | 
			
		||||
    abig    = 1.0/eps - 1.0;
 | 
			
		||||
    if (Scalar(nbig)>abig)  nmax = abig;  // largest safe n
 | 
			
		||||
    else                    nmax = nbig;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  typedef typename internal::packet_traits<Scalar>::type Packet;
 | 
			
		||||
  const int ps = internal::packet_traits<Scalar>::size;
 | 
			
		||||
  Packet pasml = internal::pset1<Packet>(Scalar(0));
 | 
			
		||||
  Packet pamed = internal::pset1<Packet>(Scalar(0));
 | 
			
		||||
  Packet pabig = internal::pset1<Packet>(Scalar(0));
 | 
			
		||||
  Packet ps2m = internal::pset1<Packet>(s2m);
 | 
			
		||||
  Packet ps1m = internal::pset1<Packet>(s1m);
 | 
			
		||||
  Packet pb2  = internal::pset1<Packet>(b2);
 | 
			
		||||
  Packet pb1  = internal::pset1<Packet>(b1);
 | 
			
		||||
  for(int j=0; j<v.size(); j+=ps)
 | 
			
		||||
  {
 | 
			
		||||
    Packet ax = internal::pabs(v.template packet<Aligned>(j));
 | 
			
		||||
    Packet ax_s2m = internal::pmul(ax,ps2m);
 | 
			
		||||
    Packet ax_s1m = internal::pmul(ax,ps1m);
 | 
			
		||||
    Packet maskBig = internal::plt(pb2,ax);
 | 
			
		||||
    Packet maskSml = internal::plt(ax,pb1);
 | 
			
		||||
 | 
			
		||||
//     Packet maskMed = internal::pand(maskSml,maskBig);
 | 
			
		||||
//     Packet scale = internal::pset1(Scalar(0));
 | 
			
		||||
//     scale = internal::por(scale, internal::pand(maskBig,ps2m));
 | 
			
		||||
//     scale = internal::por(scale, internal::pand(maskSml,ps1m));
 | 
			
		||||
//     scale = internal::por(scale, internal::pandnot(internal::pset1(Scalar(1)),maskMed));
 | 
			
		||||
//     ax = internal::pmul(ax,scale);
 | 
			
		||||
//     ax = internal::pmul(ax,ax);
 | 
			
		||||
//     pabig = internal::padd(pabig, internal::pand(maskBig, ax));
 | 
			
		||||
//     pasml = internal::padd(pasml, internal::pand(maskSml, ax));
 | 
			
		||||
//     pamed = internal::padd(pamed, internal::pandnot(ax,maskMed));
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    pabig = internal::padd(pabig, internal::pand(maskBig, internal::pmul(ax_s2m,ax_s2m)));
 | 
			
		||||
    pasml = internal::padd(pasml, internal::pand(maskSml, internal::pmul(ax_s1m,ax_s1m)));
 | 
			
		||||
    pamed = internal::padd(pamed, internal::pandnot(internal::pmul(ax,ax),internal::pand(maskSml,maskBig)));
 | 
			
		||||
  }
 | 
			
		||||
  Scalar abig = internal::predux(pabig);
 | 
			
		||||
  Scalar asml = internal::predux(pasml);
 | 
			
		||||
  Scalar amed = internal::predux(pamed);
 | 
			
		||||
  if(abig > Scalar(0))
 | 
			
		||||
  {
 | 
			
		||||
    abig = std::sqrt(abig);
 | 
			
		||||
    if(abig > overfl)
 | 
			
		||||
    {
 | 
			
		||||
      eigen_assert(false && "overflow");
 | 
			
		||||
      return rbig;
 | 
			
		||||
    }
 | 
			
		||||
    if(amed > Scalar(0))
 | 
			
		||||
    {
 | 
			
		||||
      abig = abig/s2m;
 | 
			
		||||
      amed = std::sqrt(amed);
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
    {
 | 
			
		||||
      return abig/s2m;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
  else if(asml > Scalar(0))
 | 
			
		||||
  {
 | 
			
		||||
    if (amed > Scalar(0))
 | 
			
		||||
    {
 | 
			
		||||
      abig = std::sqrt(amed);
 | 
			
		||||
      amed = std::sqrt(asml) / s1m;
 | 
			
		||||
    }
 | 
			
		||||
    else
 | 
			
		||||
    {
 | 
			
		||||
      return std::sqrt(asml)/s1m;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  else
 | 
			
		||||
  {
 | 
			
		||||
    return std::sqrt(amed);
 | 
			
		||||
  }
 | 
			
		||||
  asml = std::min(abig, amed);
 | 
			
		||||
  abig = std::max(abig, amed);
 | 
			
		||||
  if(asml <= abig*relerr)
 | 
			
		||||
    return abig;
 | 
			
		||||
  else
 | 
			
		||||
    return abig * std::sqrt(Scalar(1) + numext::abs2(asml/abig));
 | 
			
		||||
  #endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define BENCH_PERF(NRM) { \
 | 
			
		||||
  float af = 0; double ad = 0; std::complex<float> ac = 0; \
 | 
			
		||||
  Eigen::BenchTimer tf, td, tcf; tf.reset(); td.reset(); tcf.reset();\
 | 
			
		||||
  for (int k=0; k<tries; ++k) { \
 | 
			
		||||
    tf.start(); \
 | 
			
		||||
    for (int i=0; i<iters; ++i) { af += NRM(vf); } \
 | 
			
		||||
    tf.stop(); \
 | 
			
		||||
  } \
 | 
			
		||||
  for (int k=0; k<tries; ++k) { \
 | 
			
		||||
    td.start(); \
 | 
			
		||||
    for (int i=0; i<iters; ++i) { ad += NRM(vd); } \
 | 
			
		||||
    td.stop(); \
 | 
			
		||||
  } \
 | 
			
		||||
  /*for (int k=0; k<std::max(1,tries/3); ++k) { \
 | 
			
		||||
    tcf.start(); \
 | 
			
		||||
    for (int i=0; i<iters; ++i) { ac += NRM(vcf); } \
 | 
			
		||||
    tcf.stop(); \
 | 
			
		||||
  } */\
 | 
			
		||||
  std::cout << #NRM << "\t" << tf.value() << "   " << td.value() <<  "    " << tcf.value() << "\n"; \
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void check_accuracy(double basef, double based, int s)
 | 
			
		||||
{
 | 
			
		||||
  double yf = basef * std::abs(internal::random<double>());
 | 
			
		||||
  double yd = based * std::abs(internal::random<double>());
 | 
			
		||||
  VectorXf vf = VectorXf::Ones(s) * yf;
 | 
			
		||||
  VectorXd vd = VectorXd::Ones(s) * yd;
 | 
			
		||||
 | 
			
		||||
  std::cout << "reference\t" << std::sqrt(double(s))*yf << "\t" << std::sqrt(double(s))*yd << "\n";
 | 
			
		||||
  std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\n";
 | 
			
		||||
  std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\n";
 | 
			
		||||
  std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\n";
 | 
			
		||||
  std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\n";
 | 
			
		||||
  std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\n";
 | 
			
		||||
  std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\n";
 | 
			
		||||
  std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\n";
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s)
 | 
			
		||||
{
 | 
			
		||||
  VectorXf vf(s);
 | 
			
		||||
  VectorXd vd(s);
 | 
			
		||||
  for (int i=0; i<s; ++i)
 | 
			
		||||
  {
 | 
			
		||||
    vf[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ef0,ef1));
 | 
			
		||||
    vd[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ed0,ed1));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  //std::cout << "reference\t" << internal::sqrt(double(s))*yf << "\t" << internal::sqrt(double(s))*yd << "\n";
 | 
			
		||||
  std::cout << "sqsumNorm\t"  << sqsumNorm(vf)  << "\t" << sqsumNorm(vd)  << "\t" << sqsumNorm(vf.cast<long double>()) << "\t" << sqsumNorm(vd.cast<long double>()) << "\n";
 | 
			
		||||
  std::cout << "hypotNorm\t"  << hypotNorm(vf)  << "\t" << hypotNorm(vd)  << "\t" << hypotNorm(vf.cast<long double>()) << "\t" << hypotNorm(vd.cast<long double>()) << "\n";
 | 
			
		||||
  std::cout << "blueNorm\t"   << blueNorm(vf)   << "\t" << blueNorm(vd)   << "\t" << blueNorm(vf.cast<long double>()) << "\t" << blueNorm(vd.cast<long double>()) << "\n";
 | 
			
		||||
  std::cout << "pblueNorm\t"  << pblueNorm(vf)  << "\t" << pblueNorm(vd)  << "\t" << blueNorm(vf.cast<long double>()) << "\t" << blueNorm(vd.cast<long double>()) << "\n";
 | 
			
		||||
  std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\t" << lapackNorm(vf.cast<long double>()) << "\t" << lapackNorm(vd.cast<long double>()) << "\n";
 | 
			
		||||
  std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\t" << twopassNorm(vf.cast<long double>()) << "\t" << twopassNorm(vd.cast<long double>()) << "\n";
 | 
			
		||||
//   std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\t" << bl2passNorm(vf.cast<long double>()) << "\t" << bl2passNorm(vd.cast<long double>()) << "\n";
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int main(int argc, char** argv)
 | 
			
		||||
{
 | 
			
		||||
  int tries = 10;
 | 
			
		||||
  int iters = 100000;
 | 
			
		||||
  double y = 1.1345743233455785456788e12 * internal::random<double>();
 | 
			
		||||
  VectorXf v = VectorXf::Ones(1024) * y;
 | 
			
		||||
 | 
			
		||||
// return 0;
 | 
			
		||||
  int s = 10000;
 | 
			
		||||
  double basef_ok = 1.1345743233455785456788e15;
 | 
			
		||||
  double based_ok = 1.1345743233455785456788e95;
 | 
			
		||||
 | 
			
		||||
  double basef_under = 1.1345743233455785456788e-27;
 | 
			
		||||
  double based_under = 1.1345743233455785456788e-303;
 | 
			
		||||
 | 
			
		||||
  double basef_over = 1.1345743233455785456788e+27;
 | 
			
		||||
  double based_over = 1.1345743233455785456788e+302;
 | 
			
		||||
 | 
			
		||||
  std::cout.precision(20);
 | 
			
		||||
 | 
			
		||||
  std::cerr << "\nNo under/overflow:\n";
 | 
			
		||||
  check_accuracy(basef_ok, based_ok, s);
 | 
			
		||||
 | 
			
		||||
  std::cerr << "\nUnderflow:\n";
 | 
			
		||||
  check_accuracy(basef_under, based_under, s);
 | 
			
		||||
 | 
			
		||||
  std::cerr << "\nOverflow:\n";
 | 
			
		||||
  check_accuracy(basef_over, based_over, s);
 | 
			
		||||
 | 
			
		||||
  std::cerr << "\nVarying (over):\n";
 | 
			
		||||
  for (int k=0; k<1; ++k)
 | 
			
		||||
  {
 | 
			
		||||
    check_accuracy_var(20,27,190,302,s);
 | 
			
		||||
    std::cout << "\n";
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::cerr << "\nVarying (under):\n";
 | 
			
		||||
  for (int k=0; k<1; ++k)
 | 
			
		||||
  {
 | 
			
		||||
    check_accuracy_var(-27,20,-302,-190,s);
 | 
			
		||||
    std::cout << "\n";
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  y = 1;
 | 
			
		||||
  std::cout.precision(4);
 | 
			
		||||
  int s1 = 1024*1024*32;
 | 
			
		||||
  std::cerr << "Performance (out of cache, " << s1 << "):\n";
 | 
			
		||||
  {
 | 
			
		||||
    int iters = 1;
 | 
			
		||||
    VectorXf vf = VectorXf::Random(s1) * y;
 | 
			
		||||
    VectorXd vd = VectorXd::Random(s1) * y;
 | 
			
		||||
    VectorXcf vcf = VectorXcf::Random(s1) * y;
 | 
			
		||||
    BENCH_PERF(sqsumNorm);
 | 
			
		||||
    BENCH_PERF(stableNorm);
 | 
			
		||||
    BENCH_PERF(blueNorm);
 | 
			
		||||
    BENCH_PERF(pblueNorm);
 | 
			
		||||
    BENCH_PERF(lapackNorm);
 | 
			
		||||
    BENCH_PERF(hypotNorm);
 | 
			
		||||
    BENCH_PERF(twopassNorm);
 | 
			
		||||
    BENCH_PERF(bl2passNorm);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::cerr << "\nPerformance (in cache, " << 512 << "):\n";
 | 
			
		||||
  {
 | 
			
		||||
    int iters = 100000;
 | 
			
		||||
    VectorXf vf = VectorXf::Random(512) * y;
 | 
			
		||||
    VectorXd vd = VectorXd::Random(512) * y;
 | 
			
		||||
    VectorXcf vcf = VectorXcf::Random(512) * y;
 | 
			
		||||
    BENCH_PERF(sqsumNorm);
 | 
			
		||||
    BENCH_PERF(stableNorm);
 | 
			
		||||
    BENCH_PERF(blueNorm);
 | 
			
		||||
    BENCH_PERF(pblueNorm);
 | 
			
		||||
    BENCH_PERF(lapackNorm);
 | 
			
		||||
    BENCH_PERF(hypotNorm);
 | 
			
		||||
    BENCH_PERF(twopassNorm);
 | 
			
		||||
    BENCH_PERF(bl2passNorm);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										84
									
								
								cs440-acg/ext/eigen/bench/bench_reverse.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								cs440-acg/ext/eigen/bench/bench_reverse.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,84 @@
 | 
			
		||||
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <Eigen/Core>
 | 
			
		||||
#include <bench/BenchUtil.h>
 | 
			
		||||
using namespace Eigen;
 | 
			
		||||
 | 
			
		||||
#ifndef REPEAT
 | 
			
		||||
#define REPEAT 100000
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef TRIES
 | 
			
		||||
#define TRIES 20
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
typedef double Scalar;
 | 
			
		||||
 | 
			
		||||
template <typename MatrixType>
 | 
			
		||||
__attribute__ ((noinline)) void bench_reverse(const MatrixType& m)
 | 
			
		||||
{
 | 
			
		||||
  int rows = m.rows();
 | 
			
		||||
  int cols = m.cols();
 | 
			
		||||
  int size = m.size();
 | 
			
		||||
 | 
			
		||||
  int repeats = (REPEAT*1000)/size;
 | 
			
		||||
  MatrixType a = MatrixType::Random(rows,cols);
 | 
			
		||||
  MatrixType b = MatrixType::Random(rows,cols);
 | 
			
		||||
 | 
			
		||||
  BenchTimer timerB, timerH, timerV;
 | 
			
		||||
 | 
			
		||||
  Scalar acc = 0;
 | 
			
		||||
  int r = internal::random<int>(0,rows-1);
 | 
			
		||||
  int c = internal::random<int>(0,cols-1);
 | 
			
		||||
  for (int t=0; t<TRIES; ++t)
 | 
			
		||||
  {
 | 
			
		||||
    timerB.start();
 | 
			
		||||
    for (int k=0; k<repeats; ++k)
 | 
			
		||||
    {
 | 
			
		||||
      asm("#begin foo");
 | 
			
		||||
      b = a.reverse();
 | 
			
		||||
      asm("#end foo");
 | 
			
		||||
      acc += b.coeff(r,c);
 | 
			
		||||
    }
 | 
			
		||||
    timerB.stop();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (MatrixType::RowsAtCompileTime==Dynamic)
 | 
			
		||||
    std::cout << "dyn   ";
 | 
			
		||||
  else
 | 
			
		||||
    std::cout << "fixed ";
 | 
			
		||||
  std::cout << rows << " x " << cols << " \t"
 | 
			
		||||
            << (timerB.value() * REPEAT) / repeats << "s "
 | 
			
		||||
            << "(" << 1e-6 * size*repeats/timerB.value() << " MFLOPS)\t";
 | 
			
		||||
 | 
			
		||||
  std::cout << "\n";
 | 
			
		||||
  // make sure the compiler does not optimize too much
 | 
			
		||||
  if (acc==123)
 | 
			
		||||
    std::cout << acc;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int main(int argc, char* argv[])
 | 
			
		||||
{
 | 
			
		||||
  const int dynsizes[] = {4,6,8,16,24,32,49,64,128,256,512,900,0};
 | 
			
		||||
  std::cout << "size            no sqrt                           standard";
 | 
			
		||||
//   #ifdef BENCH_GSL
 | 
			
		||||
//   std::cout << "       GSL (standard + double + ATLAS)  ";
 | 
			
		||||
//   #endif
 | 
			
		||||
  std::cout << "\n";
 | 
			
		||||
  for (uint i=0; dynsizes[i]>0; ++i)
 | 
			
		||||
  {
 | 
			
		||||
    bench_reverse(Matrix<Scalar,Dynamic,Dynamic>(dynsizes[i],dynsizes[i]));
 | 
			
		||||
    bench_reverse(Matrix<Scalar,Dynamic,1>(dynsizes[i]*dynsizes[i]));
 | 
			
		||||
  }
 | 
			
		||||
//   bench_reverse(Matrix<Scalar,2,2>());
 | 
			
		||||
//   bench_reverse(Matrix<Scalar,3,3>());
 | 
			
		||||
//   bench_reverse(Matrix<Scalar,4,4>());
 | 
			
		||||
//   bench_reverse(Matrix<Scalar,5,5>());
 | 
			
		||||
//   bench_reverse(Matrix<Scalar,6,6>());
 | 
			
		||||
//   bench_reverse(Matrix<Scalar,7,7>());
 | 
			
		||||
//   bench_reverse(Matrix<Scalar,8,8>());
 | 
			
		||||
//   bench_reverse(Matrix<Scalar,12,12>());
 | 
			
		||||
//   bench_reverse(Matrix<Scalar,16,16>());
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										18
									
								
								cs440-acg/ext/eigen/bench/bench_sum.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								cs440-acg/ext/eigen/bench/bench_sum.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,18 @@
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <Eigen/Core>
 | 
			
		||||
using namespace Eigen;
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
int main() 
 | 
			
		||||
{
 | 
			
		||||
  typedef Matrix<SCALAR,Eigen::Dynamic,1> Vec;
 | 
			
		||||
  Vec v(SIZE);
 | 
			
		||||
  v.setZero();
 | 
			
		||||
  v[0] = 1;
 | 
			
		||||
  v[1] = 2;
 | 
			
		||||
  for(int i = 0; i < 1000000; i++)
 | 
			
		||||
  {
 | 
			
		||||
    v.coeffRef(0) += v.sum() * SCALAR(1e-20);
 | 
			
		||||
  }
 | 
			
		||||
  cout << v.sum() << endl;
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										12
									
								
								cs440-acg/ext/eigen/bench/bench_unrolling
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										12
									
								
								cs440-acg/ext/eigen/bench/bench_unrolling
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,12 @@
 | 
			
		||||
#!/bin/bash
 | 
			
		||||
 | 
			
		||||
# gcc : CXX="g++  -finline-limit=10000 -ftemplate-depth-2000 --param max-inline-recursive-depth=2000"
 | 
			
		||||
# icc : CXX="icpc -fast -no-inline-max-size -fno-exceptions"
 | 
			
		||||
CXX=${CXX-g++  -finline-limit=10000 -ftemplate-depth-2000 --param max-inline-recursive-depth=2000} # default value
 | 
			
		||||
 | 
			
		||||
for ((i=1; i<16; ++i)); do
 | 
			
		||||
    echo "Matrix size: $i x $i :"
 | 
			
		||||
    $CXX -O3 -I.. -DNDEBUG  benchmark.cpp -DMATSIZE=$i -DEIGEN_UNROLLING_LIMIT=400 -o benchmark && time ./benchmark >/dev/null
 | 
			
		||||
    $CXX -O3 -I.. -DNDEBUG -finline-limit=10000 benchmark.cpp -DMATSIZE=$i -DEIGEN_DONT_USE_UNROLLED_LOOPS=1 -o benchmark && time ./benchmark >/dev/null
 | 
			
		||||
    echo " "
 | 
			
		||||
done
 | 
			
		||||
							
								
								
									
										677
									
								
								cs440-acg/ext/eigen/bench/benchmark-blocking-sizes.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										677
									
								
								cs440-acg/ext/eigen/bench/benchmark-blocking-sizes.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,677 @@
 | 
			
		||||
// This file is part of Eigen, a lightweight C++ template library
 | 
			
		||||
// for linear algebra.
 | 
			
		||||
//
 | 
			
		||||
// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com>
 | 
			
		||||
//
 | 
			
		||||
// This Source Code Form is subject to the terms of the Mozilla
 | 
			
		||||
// Public License v. 2.0. If a copy of the MPL was not distributed
 | 
			
		||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 | 
			
		||||
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <cstdint>
 | 
			
		||||
#include <cstdlib>
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <fstream>
 | 
			
		||||
#include <memory>
 | 
			
		||||
#include <cstdio>
 | 
			
		||||
 | 
			
		||||
bool eigen_use_specific_block_size;
 | 
			
		||||
int eigen_block_size_k, eigen_block_size_m, eigen_block_size_n;
 | 
			
		||||
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZES eigen_use_specific_block_size
 | 
			
		||||
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K eigen_block_size_k
 | 
			
		||||
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M eigen_block_size_m
 | 
			
		||||
#define EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N eigen_block_size_n
 | 
			
		||||
#include <Eigen/Core>
 | 
			
		||||
 | 
			
		||||
#include <bench/BenchTimer.h>
 | 
			
		||||
 | 
			
		||||
using namespace Eigen;
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
static BenchTimer timer;
 | 
			
		||||
 | 
			
		||||
// how many times we repeat each measurement.
 | 
			
		||||
// measurements are randomly shuffled - we're not doing
 | 
			
		||||
// all N identical measurements in a row.
 | 
			
		||||
const int measurement_repetitions = 3;
 | 
			
		||||
 | 
			
		||||
// Timings below this value are too short to be accurate,
 | 
			
		||||
// we'll repeat measurements with more iterations until
 | 
			
		||||
// we get a timing above that threshold.
 | 
			
		||||
const float min_accurate_time = 1e-2f;
 | 
			
		||||
 | 
			
		||||
// See --min-working-set-size command line parameter.
 | 
			
		||||
size_t min_working_set_size = 0;
 | 
			
		||||
 | 
			
		||||
float max_clock_speed = 0.0f;
 | 
			
		||||
 | 
			
		||||
// range of sizes that we will benchmark (in all 3 K,M,N dimensions)
 | 
			
		||||
const size_t maxsize = 2048;
 | 
			
		||||
const size_t minsize = 16;
 | 
			
		||||
 | 
			
		||||
typedef MatrixXf MatrixType;
 | 
			
		||||
typedef MatrixType::Scalar Scalar;
 | 
			
		||||
typedef internal::packet_traits<Scalar>::type Packet;
 | 
			
		||||
 | 
			
		||||
static_assert((maxsize & (maxsize - 1)) == 0, "maxsize must be a power of two");
 | 
			
		||||
static_assert((minsize & (minsize - 1)) == 0, "minsize must be a power of two");
 | 
			
		||||
static_assert(maxsize > minsize, "maxsize must be larger than minsize");
 | 
			
		||||
static_assert(maxsize < (minsize << 16), "maxsize must be less than (minsize<<16)");
 | 
			
		||||
 | 
			
		||||
// just a helper to store a triple of K,M,N sizes for matrix product
 | 
			
		||||
struct size_triple_t
 | 
			
		||||
{
 | 
			
		||||
  size_t k, m, n;
 | 
			
		||||
  size_triple_t() : k(0), m(0), n(0) {}
 | 
			
		||||
  size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {}
 | 
			
		||||
  size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {}
 | 
			
		||||
  size_triple_t(uint16_t compact)
 | 
			
		||||
  {
 | 
			
		||||
    k = 1 << ((compact & 0xf00) >> 8);
 | 
			
		||||
    m = 1 << ((compact & 0x0f0) >> 4);
 | 
			
		||||
    n = 1 << ((compact & 0x00f) >> 0);
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
uint8_t log2_pot(size_t x) {
 | 
			
		||||
  size_t l = 0;
 | 
			
		||||
  while (x >>= 1) l++;
 | 
			
		||||
  return l;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Convert between size tripes and a compact form fitting in 12 bits
 | 
			
		||||
// where each size, which must be a POT, is encoded as its log2, on 4 bits
 | 
			
		||||
// so the largest representable size is 2^15 == 32k  ... big enough.
 | 
			
		||||
uint16_t compact_size_triple(size_t k, size_t m, size_t n)
 | 
			
		||||
{
 | 
			
		||||
  return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
uint16_t compact_size_triple(const size_triple_t& t)
 | 
			
		||||
{
 | 
			
		||||
  return compact_size_triple(t.k, t.m, t.n);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// A single benchmark. Initially only contains benchmark params.
 | 
			
		||||
// Then call run(), which stores the result in the gflops field.
 | 
			
		||||
struct benchmark_t
 | 
			
		||||
{
 | 
			
		||||
  uint16_t compact_product_size;
 | 
			
		||||
  uint16_t compact_block_size;
 | 
			
		||||
  bool use_default_block_size;
 | 
			
		||||
  float gflops;
 | 
			
		||||
  benchmark_t()
 | 
			
		||||
    : compact_product_size(0)
 | 
			
		||||
    , compact_block_size(0)
 | 
			
		||||
    , use_default_block_size(false)
 | 
			
		||||
    , gflops(0)
 | 
			
		||||
  {
 | 
			
		||||
  }
 | 
			
		||||
  benchmark_t(size_t pk, size_t pm, size_t pn,
 | 
			
		||||
              size_t bk, size_t bm, size_t bn)
 | 
			
		||||
    : compact_product_size(compact_size_triple(pk, pm, pn))
 | 
			
		||||
    , compact_block_size(compact_size_triple(bk, bm, bn))
 | 
			
		||||
    , use_default_block_size(false)
 | 
			
		||||
    , gflops(0)
 | 
			
		||||
  {}
 | 
			
		||||
  benchmark_t(size_t pk, size_t pm, size_t pn)
 | 
			
		||||
    : compact_product_size(compact_size_triple(pk, pm, pn))
 | 
			
		||||
    , compact_block_size(0)
 | 
			
		||||
    , use_default_block_size(true)
 | 
			
		||||
    , gflops(0)
 | 
			
		||||
  {}
 | 
			
		||||
 | 
			
		||||
  void run();
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
ostream& operator<<(ostream& s, const benchmark_t& b)
 | 
			
		||||
{
 | 
			
		||||
  s << hex << b.compact_product_size << dec;
 | 
			
		||||
  if (b.use_default_block_size) {
 | 
			
		||||
    size_triple_t t(b.compact_product_size);
 | 
			
		||||
    Index k = t.k, m = t.m, n = t.n;
 | 
			
		||||
    internal::computeProductBlockingSizes<Scalar, Scalar>(k, m, n);
 | 
			
		||||
    s << " default(" << k << ", " << m << ", " << n << ")";
 | 
			
		||||
  } else {
 | 
			
		||||
    s << " " << hex << b.compact_block_size << dec;
 | 
			
		||||
  }
 | 
			
		||||
  s << " " << b.gflops;
 | 
			
		||||
  return s;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// We sort first by increasing benchmark parameters,
 | 
			
		||||
// then by decreasing performance.
 | 
			
		||||
bool operator<(const benchmark_t& b1, const benchmark_t& b2)
 | 
			
		||||
{ 
 | 
			
		||||
  return b1.compact_product_size < b2.compact_product_size ||
 | 
			
		||||
           (b1.compact_product_size == b2.compact_product_size && (
 | 
			
		||||
             (b1.compact_block_size < b2.compact_block_size || (
 | 
			
		||||
               b1.compact_block_size == b2.compact_block_size &&
 | 
			
		||||
                 b1.gflops > b2.gflops))));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void benchmark_t::run()
 | 
			
		||||
{
 | 
			
		||||
  size_triple_t productsizes(compact_product_size);
 | 
			
		||||
 | 
			
		||||
  if (use_default_block_size) {
 | 
			
		||||
    eigen_use_specific_block_size = false;
 | 
			
		||||
  } else {
 | 
			
		||||
    // feed eigen with our custom blocking params
 | 
			
		||||
    eigen_use_specific_block_size = true;
 | 
			
		||||
    size_triple_t blocksizes(compact_block_size);
 | 
			
		||||
    eigen_block_size_k = blocksizes.k;
 | 
			
		||||
    eigen_block_size_m = blocksizes.m;
 | 
			
		||||
    eigen_block_size_n = blocksizes.n;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // set up the matrix pool
 | 
			
		||||
 | 
			
		||||
  const size_t combined_three_matrices_sizes =
 | 
			
		||||
    sizeof(Scalar) *
 | 
			
		||||
      (productsizes.k * productsizes.m +
 | 
			
		||||
       productsizes.k * productsizes.n +
 | 
			
		||||
       productsizes.m * productsizes.n);
 | 
			
		||||
 | 
			
		||||
  // 64 M is large enough that nobody has a cache bigger than that,
 | 
			
		||||
  // while still being small enough that everybody has this much RAM,
 | 
			
		||||
  // so conveniently we don't need to special-case platforms here.
 | 
			
		||||
  const size_t unlikely_large_cache_size = 64 << 20;
 | 
			
		||||
 | 
			
		||||
  const size_t working_set_size =
 | 
			
		||||
    min_working_set_size ? min_working_set_size : unlikely_large_cache_size;
 | 
			
		||||
 | 
			
		||||
  const size_t matrix_pool_size =
 | 
			
		||||
    1 + working_set_size / combined_three_matrices_sizes;
 | 
			
		||||
 | 
			
		||||
  MatrixType *lhs = new MatrixType[matrix_pool_size];
 | 
			
		||||
  MatrixType *rhs = new MatrixType[matrix_pool_size];
 | 
			
		||||
  MatrixType *dst = new MatrixType[matrix_pool_size];
 | 
			
		||||
  
 | 
			
		||||
  for (size_t i = 0; i < matrix_pool_size; i++) {
 | 
			
		||||
    lhs[i] = MatrixType::Zero(productsizes.m, productsizes.k);
 | 
			
		||||
    rhs[i] = MatrixType::Zero(productsizes.k, productsizes.n);
 | 
			
		||||
    dst[i] = MatrixType::Zero(productsizes.m, productsizes.n);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // main benchmark loop
 | 
			
		||||
 | 
			
		||||
  int iters_at_a_time = 1;
 | 
			
		||||
  float time_per_iter = 0.0f;
 | 
			
		||||
  size_t matrix_index = 0;
 | 
			
		||||
  while (true) {
 | 
			
		||||
 | 
			
		||||
    double starttime = timer.getCpuTime();
 | 
			
		||||
    for (int i = 0; i < iters_at_a_time; i++) {
 | 
			
		||||
      dst[matrix_index].noalias() = lhs[matrix_index] * rhs[matrix_index];
 | 
			
		||||
      matrix_index++;
 | 
			
		||||
      if (matrix_index == matrix_pool_size) {
 | 
			
		||||
        matrix_index = 0;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    double endtime = timer.getCpuTime();
 | 
			
		||||
 | 
			
		||||
    const float timing = float(endtime - starttime);
 | 
			
		||||
 | 
			
		||||
    if (timing >= min_accurate_time) {
 | 
			
		||||
      time_per_iter = timing / iters_at_a_time;
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    iters_at_a_time *= 2;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  delete[] lhs;
 | 
			
		||||
  delete[] rhs;
 | 
			
		||||
  delete[] dst;
 | 
			
		||||
 | 
			
		||||
  gflops = 2e-9 * productsizes.k * productsizes.m * productsizes.n / time_per_iter;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void print_cpuinfo()
 | 
			
		||||
{
 | 
			
		||||
#ifdef __linux__
 | 
			
		||||
  cout << "contents of /proc/cpuinfo:" << endl;
 | 
			
		||||
  string line;
 | 
			
		||||
  ifstream cpuinfo("/proc/cpuinfo");
 | 
			
		||||
  if (cpuinfo.is_open()) {
 | 
			
		||||
    while (getline(cpuinfo, line)) {
 | 
			
		||||
      cout << line << endl;
 | 
			
		||||
    }
 | 
			
		||||
    cpuinfo.close();
 | 
			
		||||
  }
 | 
			
		||||
  cout << endl;
 | 
			
		||||
#elif defined __APPLE__
 | 
			
		||||
  cout << "output of sysctl hw:" << endl;
 | 
			
		||||
  system("sysctl hw");
 | 
			
		||||
  cout << endl;
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <typename T>
 | 
			
		||||
string type_name()
 | 
			
		||||
{
 | 
			
		||||
  return "unknown";
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<>
 | 
			
		||||
string type_name<float>()
 | 
			
		||||
{
 | 
			
		||||
  return "float";
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<>
 | 
			
		||||
string type_name<double>()
 | 
			
		||||
{
 | 
			
		||||
  return "double";
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct action_t
 | 
			
		||||
{
 | 
			
		||||
  virtual const char* invokation_name() const { abort(); return nullptr; }
 | 
			
		||||
  virtual void run() const { abort(); }
 | 
			
		||||
  virtual ~action_t() {}
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
void show_usage_and_exit(int /*argc*/, char* argv[],
 | 
			
		||||
                         const vector<unique_ptr<action_t>>& available_actions)
 | 
			
		||||
{
 | 
			
		||||
  cerr << "usage: " << argv[0] << " <action> [options...]" << endl << endl;
 | 
			
		||||
  cerr << "available actions:" << endl << endl;
 | 
			
		||||
  for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
 | 
			
		||||
    cerr << "  " << (*it)->invokation_name() << endl;
 | 
			
		||||
  }
 | 
			
		||||
  cerr << endl;
 | 
			
		||||
  cerr << "options:" << endl << endl;
 | 
			
		||||
  cerr << "  --min-working-set-size=N:" << endl;
 | 
			
		||||
  cerr << "       Set the minimum working set size to N bytes." << endl;
 | 
			
		||||
  cerr << "       This is rounded up as needed to a multiple of matrix size." << endl;
 | 
			
		||||
  cerr << "       A larger working set lowers the chance of a warm cache." << endl;
 | 
			
		||||
  cerr << "       The default value 0 means use a large enough working" << endl;
 | 
			
		||||
  cerr << "       set to likely outsize caches." << endl;
 | 
			
		||||
  cerr << "       A value of 1 (that is, 1 byte) would mean don't do anything to" << endl;
 | 
			
		||||
  cerr << "       avoid warm caches." << endl;
 | 
			
		||||
  exit(1);
 | 
			
		||||
}
 | 
			
		||||
     
 | 
			
		||||
float measure_clock_speed()
 | 
			
		||||
{
 | 
			
		||||
  cerr << "Measuring clock speed...                              \r" << flush;
 | 
			
		||||
          
 | 
			
		||||
  vector<float> all_gflops;
 | 
			
		||||
  for (int i = 0; i < 8; i++) {
 | 
			
		||||
    benchmark_t b(1024, 1024, 1024);
 | 
			
		||||
    b.run();
 | 
			
		||||
    all_gflops.push_back(b.gflops);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  sort(all_gflops.begin(), all_gflops.end());
 | 
			
		||||
  float stable_estimate = all_gflops[2] + all_gflops[3] + all_gflops[4] + all_gflops[5];
 | 
			
		||||
 | 
			
		||||
  // multiply by an arbitrary constant to discourage trying doing anything with the
 | 
			
		||||
  // returned values besides just comparing them with each other.
 | 
			
		||||
  float result = stable_estimate * 123.456f;
 | 
			
		||||
 | 
			
		||||
  return result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct human_duration_t
 | 
			
		||||
{
 | 
			
		||||
  int seconds;
 | 
			
		||||
  human_duration_t(int s) : seconds(s) {}
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
ostream& operator<<(ostream& s, const human_duration_t& d)
 | 
			
		||||
{
 | 
			
		||||
  int remainder = d.seconds;
 | 
			
		||||
  if (remainder > 3600) {
 | 
			
		||||
    int hours = remainder / 3600;
 | 
			
		||||
    s << hours << " h ";
 | 
			
		||||
    remainder -= hours * 3600;
 | 
			
		||||
  }
 | 
			
		||||
  if (remainder > 60) {
 | 
			
		||||
    int minutes = remainder / 60;
 | 
			
		||||
    s << minutes << " min ";
 | 
			
		||||
    remainder -= minutes * 60;
 | 
			
		||||
  }
 | 
			
		||||
  if (d.seconds < 600) {
 | 
			
		||||
    s << remainder << " s";
 | 
			
		||||
  }
 | 
			
		||||
  return s;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const char session_filename[] = "/data/local/tmp/benchmark-blocking-sizes-session.data";
 | 
			
		||||
 | 
			
		||||
void serialize_benchmarks(const char* filename, const vector<benchmark_t>& benchmarks, size_t first_benchmark_to_run)
 | 
			
		||||
{
 | 
			
		||||
  FILE* file = fopen(filename, "w");
 | 
			
		||||
  if (!file) {
 | 
			
		||||
    cerr << "Could not open file " << filename << " for writing." << endl;
 | 
			
		||||
    cerr << "Do you have write permissions on the current working directory?" << endl;
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
  size_t benchmarks_vector_size = benchmarks.size();
 | 
			
		||||
  fwrite(&max_clock_speed, sizeof(max_clock_speed), 1, file);
 | 
			
		||||
  fwrite(&benchmarks_vector_size, sizeof(benchmarks_vector_size), 1, file);
 | 
			
		||||
  fwrite(&first_benchmark_to_run, sizeof(first_benchmark_to_run), 1, file);
 | 
			
		||||
  fwrite(benchmarks.data(), sizeof(benchmark_t), benchmarks.size(), file);
 | 
			
		||||
  fclose(file);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool deserialize_benchmarks(const char* filename, vector<benchmark_t>& benchmarks, size_t& first_benchmark_to_run)
 | 
			
		||||
{
 | 
			
		||||
  FILE* file = fopen(filename, "r");
 | 
			
		||||
  if (!file) {
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
  if (1 != fread(&max_clock_speed, sizeof(max_clock_speed), 1, file)) {
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
  size_t benchmarks_vector_size = 0;
 | 
			
		||||
  if (1 != fread(&benchmarks_vector_size, sizeof(benchmarks_vector_size), 1, file)) {
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
  if (1 != fread(&first_benchmark_to_run, sizeof(first_benchmark_to_run), 1, file)) {
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
  benchmarks.resize(benchmarks_vector_size);
 | 
			
		||||
  if (benchmarks.size() != fread(benchmarks.data(), sizeof(benchmark_t), benchmarks.size(), file)) {
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
  unlink(filename);
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void try_run_some_benchmarks(
 | 
			
		||||
  vector<benchmark_t>& benchmarks,
 | 
			
		||||
  double time_start,
 | 
			
		||||
  size_t& first_benchmark_to_run)
 | 
			
		||||
{
 | 
			
		||||
  if (first_benchmark_to_run == benchmarks.size()) {
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double time_last_progress_update = 0;
 | 
			
		||||
  double time_last_clock_speed_measurement = 0;
 | 
			
		||||
  double time_now = 0;
 | 
			
		||||
 | 
			
		||||
  size_t benchmark_index = first_benchmark_to_run;
 | 
			
		||||
 | 
			
		||||
  while (true) {
 | 
			
		||||
    float ratio_done = float(benchmark_index) / benchmarks.size();
 | 
			
		||||
    time_now = timer.getRealTime();
 | 
			
		||||
 | 
			
		||||
    // We check clock speed every minute and at the end.
 | 
			
		||||
    if (benchmark_index == benchmarks.size() ||
 | 
			
		||||
        time_now > time_last_clock_speed_measurement + 60.0f)
 | 
			
		||||
    {
 | 
			
		||||
      time_last_clock_speed_measurement = time_now;
 | 
			
		||||
 | 
			
		||||
      // Ensure that clock speed is as expected
 | 
			
		||||
      float current_clock_speed = measure_clock_speed();
 | 
			
		||||
 | 
			
		||||
      // The tolerance needs to be smaller than the relative difference between
 | 
			
		||||
      // clock speeds that a device could operate under.
 | 
			
		||||
      // It seems unlikely that a device would be throttling clock speeds by
 | 
			
		||||
      // amounts smaller than 2%.
 | 
			
		||||
      // With a value of 1%, I was getting within noise on a Sandy Bridge.
 | 
			
		||||
      const float clock_speed_tolerance = 0.02f;
 | 
			
		||||
 | 
			
		||||
      if (current_clock_speed > (1 + clock_speed_tolerance) * max_clock_speed) {
 | 
			
		||||
        // Clock speed is now higher than we previously measured.
 | 
			
		||||
        // Either our initial measurement was inaccurate, which won't happen
 | 
			
		||||
        // too many times as we are keeping the best clock speed value and
 | 
			
		||||
        // and allowing some tolerance; or something really weird happened,
 | 
			
		||||
        // which invalidates all benchmark results collected so far.
 | 
			
		||||
        // Either way, we better restart all over again now.
 | 
			
		||||
        if (benchmark_index) {
 | 
			
		||||
          cerr << "Restarting at " << 100.0f * ratio_done
 | 
			
		||||
               << " % because clock speed increased.          " << endl;
 | 
			
		||||
        }
 | 
			
		||||
        max_clock_speed = current_clock_speed;
 | 
			
		||||
        first_benchmark_to_run = 0;
 | 
			
		||||
        return;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      bool rerun_last_tests = false;
 | 
			
		||||
 | 
			
		||||
      if (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) {
 | 
			
		||||
        cerr << "Measurements completed so far: "
 | 
			
		||||
             << 100.0f * ratio_done
 | 
			
		||||
             << " %                             " << endl;
 | 
			
		||||
        cerr << "Clock speed seems to be only "
 | 
			
		||||
             << current_clock_speed/max_clock_speed
 | 
			
		||||
             << " times what it used to be." << endl;
 | 
			
		||||
 | 
			
		||||
        unsigned int seconds_to_sleep_if_lower_clock_speed = 1;
 | 
			
		||||
 | 
			
		||||
        while (current_clock_speed < (1 - clock_speed_tolerance) * max_clock_speed) {
 | 
			
		||||
          if (seconds_to_sleep_if_lower_clock_speed > 32) {
 | 
			
		||||
            cerr << "Sleeping longer probably won't make a difference." << endl;
 | 
			
		||||
            cerr << "Serializing benchmarks to " << session_filename << endl;
 | 
			
		||||
            serialize_benchmarks(session_filename, benchmarks, first_benchmark_to_run);
 | 
			
		||||
            cerr << "Now restart this benchmark, and it should pick up where we left." << endl;
 | 
			
		||||
            exit(2);
 | 
			
		||||
          }
 | 
			
		||||
          rerun_last_tests = true;
 | 
			
		||||
          cerr << "Sleeping "
 | 
			
		||||
               << seconds_to_sleep_if_lower_clock_speed
 | 
			
		||||
               << " s...                                   \r" << endl;
 | 
			
		||||
          sleep(seconds_to_sleep_if_lower_clock_speed);
 | 
			
		||||
          current_clock_speed = measure_clock_speed();
 | 
			
		||||
          seconds_to_sleep_if_lower_clock_speed *= 2;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      if (rerun_last_tests) {
 | 
			
		||||
        cerr << "Redoing the last "
 | 
			
		||||
             << 100.0f * float(benchmark_index - first_benchmark_to_run) / benchmarks.size()
 | 
			
		||||
             << " % because clock speed had been low.   " << endl;
 | 
			
		||||
        return;
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // nothing wrong with the clock speed so far, so there won't be a need to rerun
 | 
			
		||||
      // benchmarks run so far in case we later encounter a lower clock speed.
 | 
			
		||||
      first_benchmark_to_run = benchmark_index;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (benchmark_index == benchmarks.size()) {
 | 
			
		||||
      // We're done!
 | 
			
		||||
      first_benchmark_to_run = benchmarks.size();
 | 
			
		||||
      // Erase progress info
 | 
			
		||||
      cerr << "                                                            " << endl;
 | 
			
		||||
      return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Display progress info on stderr
 | 
			
		||||
    if (time_now > time_last_progress_update + 1.0f) {
 | 
			
		||||
      time_last_progress_update = time_now;
 | 
			
		||||
      cerr << "Measurements... " << 100.0f * ratio_done
 | 
			
		||||
           << " %, ETA "
 | 
			
		||||
           << human_duration_t(float(time_now - time_start) * (1.0f - ratio_done) / ratio_done)
 | 
			
		||||
           << "                          \r" << flush;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // This is where we actually run a benchmark!
 | 
			
		||||
    benchmarks[benchmark_index].run();
 | 
			
		||||
    benchmark_index++;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void run_benchmarks(vector<benchmark_t>& benchmarks)
 | 
			
		||||
{
 | 
			
		||||
  size_t first_benchmark_to_run;
 | 
			
		||||
  vector<benchmark_t> deserialized_benchmarks;
 | 
			
		||||
  bool use_deserialized_benchmarks = false;
 | 
			
		||||
  if (deserialize_benchmarks(session_filename, deserialized_benchmarks, first_benchmark_to_run)) {
 | 
			
		||||
    cerr << "Found serialized session with "
 | 
			
		||||
         << 100.0f * first_benchmark_to_run / deserialized_benchmarks.size()
 | 
			
		||||
         << " % already done" << endl;
 | 
			
		||||
    if (deserialized_benchmarks.size() == benchmarks.size() &&
 | 
			
		||||
        first_benchmark_to_run > 0 &&
 | 
			
		||||
        first_benchmark_to_run < benchmarks.size())
 | 
			
		||||
    {
 | 
			
		||||
      use_deserialized_benchmarks = true;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (use_deserialized_benchmarks) {
 | 
			
		||||
    benchmarks = deserialized_benchmarks;
 | 
			
		||||
  } else {
 | 
			
		||||
    // not using deserialized benchmarks, starting from scratch
 | 
			
		||||
    first_benchmark_to_run = 0;
 | 
			
		||||
 | 
			
		||||
    // Randomly shuffling benchmarks allows us to get accurate enough progress info,
 | 
			
		||||
    // as now the cheap/expensive benchmarks are randomly mixed so they average out.
 | 
			
		||||
    // It also means that if data is corrupted for some time span, the odds are that
 | 
			
		||||
    // not all repetitions of a given benchmark will be corrupted.
 | 
			
		||||
    random_shuffle(benchmarks.begin(), benchmarks.end());
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  for (int i = 0; i < 4; i++) {
 | 
			
		||||
    max_clock_speed = max(max_clock_speed, measure_clock_speed());
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  double time_start = 0.0;
 | 
			
		||||
  while (first_benchmark_to_run < benchmarks.size()) {
 | 
			
		||||
    if (first_benchmark_to_run == 0) {
 | 
			
		||||
      time_start = timer.getRealTime();
 | 
			
		||||
    }
 | 
			
		||||
    try_run_some_benchmarks(benchmarks,
 | 
			
		||||
                            time_start,
 | 
			
		||||
                            first_benchmark_to_run);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Sort timings by increasing benchmark parameters, and decreasing gflops.
 | 
			
		||||
  // The latter is very important. It means that we can ignore all but the first
 | 
			
		||||
  // benchmark with given parameters.
 | 
			
		||||
  sort(benchmarks.begin(), benchmarks.end());
 | 
			
		||||
 | 
			
		||||
  // Collect best (i.e. now first) results for each parameter values.
 | 
			
		||||
  vector<benchmark_t> best_benchmarks;
 | 
			
		||||
  for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) {
 | 
			
		||||
    if (best_benchmarks.empty() ||
 | 
			
		||||
        best_benchmarks.back().compact_product_size != it->compact_product_size ||
 | 
			
		||||
        best_benchmarks.back().compact_block_size != it->compact_block_size)
 | 
			
		||||
    {
 | 
			
		||||
      best_benchmarks.push_back(*it);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // keep and return only the best benchmarks
 | 
			
		||||
  benchmarks = best_benchmarks;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct measure_all_pot_sizes_action_t : action_t
 | 
			
		||||
{
 | 
			
		||||
  virtual const char* invokation_name() const { return "all-pot-sizes"; }
 | 
			
		||||
  virtual void run() const
 | 
			
		||||
  {
 | 
			
		||||
    vector<benchmark_t> benchmarks;
 | 
			
		||||
    for (int repetition = 0; repetition < measurement_repetitions; repetition++) {
 | 
			
		||||
      for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) {
 | 
			
		||||
        for (size_t msize = minsize; msize <= maxsize; msize *= 2) {
 | 
			
		||||
          for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) {
 | 
			
		||||
            for (size_t kblock = minsize; kblock <= ksize; kblock *= 2) {
 | 
			
		||||
              for (size_t mblock = minsize; mblock <= msize; mblock *= 2) {
 | 
			
		||||
                for (size_t nblock = minsize; nblock <= nsize; nblock *= 2) {
 | 
			
		||||
                  benchmarks.emplace_back(ksize, msize, nsize, kblock, mblock, nblock);
 | 
			
		||||
                }
 | 
			
		||||
              }
 | 
			
		||||
            }
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    run_benchmarks(benchmarks);
 | 
			
		||||
 | 
			
		||||
    cout << "BEGIN MEASUREMENTS ALL POT SIZES" << endl;
 | 
			
		||||
    for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) {
 | 
			
		||||
      cout << *it << endl;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct measure_default_sizes_action_t : action_t
 | 
			
		||||
{
 | 
			
		||||
  virtual const char* invokation_name() const { return "default-sizes"; }
 | 
			
		||||
  virtual void run() const
 | 
			
		||||
  {
 | 
			
		||||
    vector<benchmark_t> benchmarks;
 | 
			
		||||
    for (int repetition = 0; repetition < measurement_repetitions; repetition++) {
 | 
			
		||||
      for (size_t ksize = minsize; ksize <= maxsize; ksize *= 2) {
 | 
			
		||||
        for (size_t msize = minsize; msize <= maxsize; msize *= 2) {
 | 
			
		||||
          for (size_t nsize = minsize; nsize <= maxsize; nsize *= 2) {
 | 
			
		||||
            benchmarks.emplace_back(ksize, msize, nsize);
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    run_benchmarks(benchmarks);
 | 
			
		||||
 | 
			
		||||
    cout << "BEGIN MEASUREMENTS DEFAULT SIZES" << endl;
 | 
			
		||||
    for (auto it = benchmarks.begin(); it != benchmarks.end(); ++it) {
 | 
			
		||||
      cout << *it << endl;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
int main(int argc, char* argv[])
 | 
			
		||||
{
 | 
			
		||||
  double time_start = timer.getRealTime();
 | 
			
		||||
  cout.precision(4);
 | 
			
		||||
  cerr.precision(4);
 | 
			
		||||
 | 
			
		||||
  vector<unique_ptr<action_t>> available_actions;
 | 
			
		||||
  available_actions.emplace_back(new measure_all_pot_sizes_action_t);
 | 
			
		||||
  available_actions.emplace_back(new measure_default_sizes_action_t);
 | 
			
		||||
 | 
			
		||||
  auto action = available_actions.end();
 | 
			
		||||
 | 
			
		||||
  if (argc <= 1) {
 | 
			
		||||
    show_usage_and_exit(argc, argv, available_actions);
 | 
			
		||||
  }
 | 
			
		||||
  for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
 | 
			
		||||
    if (!strcmp(argv[1], (*it)->invokation_name())) {
 | 
			
		||||
      action = it;
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (action == available_actions.end()) {
 | 
			
		||||
    show_usage_and_exit(argc, argv, available_actions);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  for (int i = 2; i < argc; i++) {
 | 
			
		||||
    if (argv[i] == strstr(argv[i], "--min-working-set-size=")) {
 | 
			
		||||
      const char* equals_sign = strchr(argv[i], '=');
 | 
			
		||||
      min_working_set_size = strtoul(equals_sign+1, nullptr, 10);
 | 
			
		||||
    } else {
 | 
			
		||||
      cerr << "unrecognized option: " << argv[i] << endl << endl;
 | 
			
		||||
      show_usage_and_exit(argc, argv, available_actions);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  print_cpuinfo();
 | 
			
		||||
 | 
			
		||||
  cout << "benchmark parameters:" << endl;
 | 
			
		||||
  cout << "pointer size: " << 8*sizeof(void*) << " bits" << endl;
 | 
			
		||||
  cout << "scalar type: " << type_name<Scalar>() << endl;
 | 
			
		||||
  cout << "packet size: " << internal::packet_traits<MatrixType::Scalar>::size << endl;
 | 
			
		||||
  cout << "minsize = " << minsize << endl;
 | 
			
		||||
  cout << "maxsize = " << maxsize << endl;
 | 
			
		||||
  cout << "measurement_repetitions = " << measurement_repetitions << endl;
 | 
			
		||||
  cout << "min_accurate_time = " << min_accurate_time << endl;
 | 
			
		||||
  cout << "min_working_set_size = " << min_working_set_size;
 | 
			
		||||
  if (min_working_set_size == 0) {
 | 
			
		||||
    cout << " (try to outsize caches)";
 | 
			
		||||
  }
 | 
			
		||||
  cout << endl << endl;
 | 
			
		||||
 | 
			
		||||
  (*action)->run();
 | 
			
		||||
 | 
			
		||||
  double time_end = timer.getRealTime();
 | 
			
		||||
  cerr << "Finished in " << human_duration_t(time_end - time_start) << endl;
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										39
									
								
								cs440-acg/ext/eigen/bench/benchmark.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								cs440-acg/ext/eigen/bench/benchmark.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,39 @@
 | 
			
		||||
// g++ -O3 -DNDEBUG -DMATSIZE=<x> benchmark.cpp -o benchmark && time ./benchmark
 | 
			
		||||
 | 
			
		||||
#include <iostream>
 | 
			
		||||
 | 
			
		||||
#include <Eigen/Core>
 | 
			
		||||
 | 
			
		||||
#ifndef MATSIZE
 | 
			
		||||
#define MATSIZE 3
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Eigen;
 | 
			
		||||
 | 
			
		||||
#ifndef REPEAT
 | 
			
		||||
#define REPEAT 40000000
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef SCALAR
 | 
			
		||||
#define SCALAR double
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int main(int argc, char *argv[])
 | 
			
		||||
{
 | 
			
		||||
    Matrix<SCALAR,MATSIZE,MATSIZE> I = Matrix<SCALAR,MATSIZE,MATSIZE>::Ones();
 | 
			
		||||
    Matrix<SCALAR,MATSIZE,MATSIZE> m;
 | 
			
		||||
    for(int i = 0; i < MATSIZE; i++)
 | 
			
		||||
        for(int j = 0; j < MATSIZE; j++)
 | 
			
		||||
        {
 | 
			
		||||
            m(i,j) = (i+MATSIZE*j);
 | 
			
		||||
        }
 | 
			
		||||
    asm("#begin");
 | 
			
		||||
    for(int a = 0; a < REPEAT; a++)
 | 
			
		||||
    {
 | 
			
		||||
        m = Matrix<SCALAR,MATSIZE,MATSIZE>::Ones() + 0.00005 * (m + (m*m));
 | 
			
		||||
    }
 | 
			
		||||
    asm("#end");
 | 
			
		||||
    cout << m << endl;
 | 
			
		||||
    return 0;
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										38
									
								
								cs440-acg/ext/eigen/bench/benchmarkSlice.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								cs440-acg/ext/eigen/bench/benchmarkSlice.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,38 @@
 | 
			
		||||
// g++ -O3 -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX
 | 
			
		||||
 | 
			
		||||
#include <iostream>
 | 
			
		||||
 | 
			
		||||
#include <Eigen/Core>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Eigen;
 | 
			
		||||
 | 
			
		||||
#ifndef REPEAT
 | 
			
		||||
#define REPEAT 10000
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef SCALAR
 | 
			
		||||
#define SCALAR float
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int main(int argc, char *argv[])
 | 
			
		||||
{
 | 
			
		||||
  typedef Matrix<SCALAR, Eigen::Dynamic, Eigen::Dynamic> Mat;
 | 
			
		||||
  Mat m(100, 100);
 | 
			
		||||
  m.setRandom();
 | 
			
		||||
 | 
			
		||||
  for(int a = 0; a < REPEAT; a++)
 | 
			
		||||
  {
 | 
			
		||||
    int r, c, nr, nc;
 | 
			
		||||
    r = Eigen::internal::random<int>(0,10);
 | 
			
		||||
    c = Eigen::internal::random<int>(0,10);
 | 
			
		||||
    nr = Eigen::internal::random<int>(50,80);
 | 
			
		||||
    nc = Eigen::internal::random<int>(50,80);
 | 
			
		||||
    m.block(r,c,nr,nc) += Mat::Ones(nr,nc);
 | 
			
		||||
    m.block(r,c,nr,nc) *= SCALAR(10);
 | 
			
		||||
    m.block(r,c,nr,nc) -= Mat::constant(nr,nc,10);
 | 
			
		||||
    m.block(r,c,nr,nc) /= SCALAR(10);
 | 
			
		||||
  }
 | 
			
		||||
  cout << m[0] << endl;
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										36
									
								
								cs440-acg/ext/eigen/bench/benchmarkX.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								cs440-acg/ext/eigen/bench/benchmarkX.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,36 @@
 | 
			
		||||
// g++ -fopenmp -I .. -O3 -DNDEBUG -finline-limit=1000 benchmarkX.cpp -o b && time ./b
 | 
			
		||||
 | 
			
		||||
#include <iostream>
 | 
			
		||||
 | 
			
		||||
#include <Eigen/Core>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Eigen;
 | 
			
		||||
 | 
			
		||||
#ifndef MATTYPE
 | 
			
		||||
#define MATTYPE MatrixXLd
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef MATSIZE
 | 
			
		||||
#define MATSIZE 400
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef REPEAT
 | 
			
		||||
#define REPEAT 100
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int main(int argc, char *argv[])
 | 
			
		||||
{
 | 
			
		||||
	MATTYPE I = MATTYPE::Ones(MATSIZE,MATSIZE);
 | 
			
		||||
	MATTYPE m(MATSIZE,MATSIZE);
 | 
			
		||||
	for(int i = 0; i < MATSIZE; i++) for(int j = 0; j < MATSIZE; j++)
 | 
			
		||||
	{
 | 
			
		||||
		m(i,j) = (i+j+1)/(MATSIZE*MATSIZE);
 | 
			
		||||
	}
 | 
			
		||||
	for(int a = 0; a < REPEAT; a++)
 | 
			
		||||
	{
 | 
			
		||||
		m = I + 0.0001 * (m + m*m);
 | 
			
		||||
	}
 | 
			
		||||
	cout << m(0,0) << endl;
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										35
									
								
								cs440-acg/ext/eigen/bench/benchmarkXcwise.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								cs440-acg/ext/eigen/bench/benchmarkXcwise.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,35 @@
 | 
			
		||||
// g++ -O3 -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX
 | 
			
		||||
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <Eigen/Core>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Eigen;
 | 
			
		||||
 | 
			
		||||
#ifndef VECTYPE
 | 
			
		||||
#define VECTYPE VectorXLd
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef VECSIZE
 | 
			
		||||
#define VECSIZE 1000000
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifndef REPEAT
 | 
			
		||||
#define REPEAT 1000
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int main(int argc, char *argv[])
 | 
			
		||||
{
 | 
			
		||||
	VECTYPE I = VECTYPE::Ones(VECSIZE);
 | 
			
		||||
	VECTYPE m(VECSIZE,1);
 | 
			
		||||
	for(int i = 0; i < VECSIZE; i++)
 | 
			
		||||
	{
 | 
			
		||||
		m[i] = 0.1 * i/VECSIZE;
 | 
			
		||||
	}
 | 
			
		||||
	for(int a = 0; a < REPEAT; a++)
 | 
			
		||||
	{
 | 
			
		||||
		m = VECTYPE::Ones(VECSIZE) + 0.00005 * (m.cwise().square() + m/4);
 | 
			
		||||
	}
 | 
			
		||||
	cout << m[0] << endl;
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										18
									
								
								cs440-acg/ext/eigen/bench/benchmark_suite
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										18
									
								
								cs440-acg/ext/eigen/bench/benchmark_suite
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,18 @@
 | 
			
		||||
#!/bin/bash
 | 
			
		||||
CXX=${CXX-g++} # default value unless caller has defined CXX
 | 
			
		||||
echo "Fixed size 3x3, column-major, -DNDEBUG"
 | 
			
		||||
$CXX -O3 -I .. -DNDEBUG benchmark.cpp -o benchmark && time ./benchmark >/dev/null
 | 
			
		||||
echo "Fixed size 3x3, column-major, with asserts"
 | 
			
		||||
$CXX -O3 -I .. benchmark.cpp -o benchmark && time ./benchmark >/dev/null
 | 
			
		||||
echo "Fixed size 3x3, row-major, -DNDEBUG"
 | 
			
		||||
$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR -DNDEBUG benchmark.cpp -o benchmark && time ./benchmark >/dev/null
 | 
			
		||||
echo "Fixed size 3x3, row-major, with asserts"
 | 
			
		||||
$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR benchmark.cpp -o benchmark && time ./benchmark >/dev/null
 | 
			
		||||
echo "Dynamic size 20x20, column-major, -DNDEBUG"
 | 
			
		||||
$CXX -O3 -I .. -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null
 | 
			
		||||
echo "Dynamic size 20x20, column-major, with asserts"
 | 
			
		||||
$CXX -O3 -I .. benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null
 | 
			
		||||
echo "Dynamic size 20x20, row-major, -DNDEBUG"
 | 
			
		||||
$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR -DNDEBUG benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null
 | 
			
		||||
echo "Dynamic size 20x20, row-major, with asserts"
 | 
			
		||||
$CXX -O3 -I .. -DEIGEN_DEFAULT_TO_ROW_MAJOR benchmarkX.cpp -o benchmarkX && time ./benchmarkX >/dev/null
 | 
			
		||||
							
								
								
									
										107
									
								
								cs440-acg/ext/eigen/bench/btl/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										107
									
								
								cs440-acg/ext/eigen/bench/btl/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,107 @@
 | 
			
		||||
PROJECT(BTL)
 | 
			
		||||
 | 
			
		||||
CMAKE_MINIMUM_REQUIRED(VERSION 2.6.2)
 | 
			
		||||
 | 
			
		||||
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake ${Eigen_SOURCE_DIR}/cmake)
 | 
			
		||||
include(MacroOptionalAddSubdirectory)
 | 
			
		||||
 | 
			
		||||
OPTION(BTL_NOVEC "Disable SSE/Altivec optimizations when possible" OFF)
 | 
			
		||||
 | 
			
		||||
SET(CMAKE_INCLUDE_CURRENT_DIR ON)
 | 
			
		||||
 | 
			
		||||
string(REGEX MATCH icpc IS_ICPC ${CMAKE_CXX_COMPILER})
 | 
			
		||||
IF(CMAKE_COMPILER_IS_GNUCXX OR IS_ICPC)
 | 
			
		||||
  SET(CMAKE_CXX_FLAGS "-g0 -O3 -DNDEBUG ${CMAKE_CXX_FLAGS}")
 | 
			
		||||
  SET(CMAKE_Fortran_FLAGS "-g0 -O3 -DNDEBUG ${CMAKE_Fortran_FLAGS}")
 | 
			
		||||
  IF(BTL_NOVEC)
 | 
			
		||||
    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_DONT_VECTORIZE")
 | 
			
		||||
  ENDIF(BTL_NOVEC)
 | 
			
		||||
ENDIF(CMAKE_COMPILER_IS_GNUCXX OR IS_ICPC)
 | 
			
		||||
 | 
			
		||||
IF(MSVC)
 | 
			
		||||
  SET(CMAKE_CXX_FLAGS " /O2 /Ot /GL /fp:fast -DNDEBUG")
 | 
			
		||||
#   SET(CMAKE_Fortran_FLAGS "-g0 -O3 -DNDEBUG")
 | 
			
		||||
  IF(BTL_NOVEC)
 | 
			
		||||
    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DEIGEN_DONT_VECTORIZE")
 | 
			
		||||
  ENDIF(BTL_NOVEC)
 | 
			
		||||
ENDIF(MSVC)
 | 
			
		||||
 | 
			
		||||
if(IS_ICPC)
 | 
			
		||||
  set(CMAKE_CXX_FLAGS "-fast ${CMAKE_CXX_FLAGS}")
 | 
			
		||||
  set(CMAKE_Fortran_FLAGS "-fast ${CMAKE_Fortran_FLAGS}")
 | 
			
		||||
endif(IS_ICPC)
 | 
			
		||||
 | 
			
		||||
include_directories(
 | 
			
		||||
  ${PROJECT_SOURCE_DIR}/actions
 | 
			
		||||
  ${PROJECT_SOURCE_DIR}/generic_bench
 | 
			
		||||
  ${PROJECT_SOURCE_DIR}/generic_bench/utils
 | 
			
		||||
  ${PROJECT_SOURCE_DIR}/libs/STL)
 | 
			
		||||
 | 
			
		||||
# find_package(MKL)
 | 
			
		||||
# if (MKL_FOUND)
 | 
			
		||||
#   add_definitions(-DHAVE_MKL)
 | 
			
		||||
#   set(DEFAULT_LIBRARIES ${MKL_LIBRARIES})
 | 
			
		||||
# endif (MKL_FOUND)
 | 
			
		||||
 | 
			
		||||
find_library(EIGEN_BTL_RT_LIBRARY rt)
 | 
			
		||||
# if we cannot find it easily, then we don't need it!
 | 
			
		||||
if(NOT EIGEN_BTL_RT_LIBRARY)
 | 
			
		||||
  set(EIGEN_BTL_RT_LIBRARY "")
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
MACRO(BTL_ADD_BENCH targetname)
 | 
			
		||||
 | 
			
		||||
  foreach(_current_var ${ARGN})
 | 
			
		||||
    set(_last_var ${_current_var})
 | 
			
		||||
  endforeach(_current_var)
 | 
			
		||||
 | 
			
		||||
  set(_sources ${ARGN})
 | 
			
		||||
  list(LENGTH _sources _argn_length)
 | 
			
		||||
 | 
			
		||||
  list(REMOVE_ITEM _sources ON OFF TRUE FALSE)
 | 
			
		||||
 | 
			
		||||
  list(LENGTH _sources _src_length)
 | 
			
		||||
 | 
			
		||||
  if (${_argn_length} EQUAL ${_src_length})
 | 
			
		||||
    set(_last_var ON)
 | 
			
		||||
  endif (${_argn_length} EQUAL ${_src_length})
 | 
			
		||||
 | 
			
		||||
  OPTION(BUILD_${targetname} "Build benchmark ${targetname}" ${_last_var})
 | 
			
		||||
 | 
			
		||||
  IF(BUILD_${targetname})
 | 
			
		||||
    ADD_EXECUTABLE(${targetname} ${_sources})
 | 
			
		||||
    ADD_TEST(${targetname} "${targetname}")
 | 
			
		||||
    target_link_libraries(${targetname} ${DEFAULT_LIBRARIES} ${EIGEN_BTL_RT_LIBRARY})
 | 
			
		||||
  ENDIF(BUILD_${targetname})
 | 
			
		||||
 | 
			
		||||
ENDMACRO(BTL_ADD_BENCH)
 | 
			
		||||
 | 
			
		||||
macro(btl_add_target_property target prop value)
 | 
			
		||||
 | 
			
		||||
  if(BUILD_${target})
 | 
			
		||||
    get_target_property(previous ${target} ${prop})
 | 
			
		||||
    if(NOT previous)
 | 
			
		||||
      set(previous "")
 | 
			
		||||
    endif()
 | 
			
		||||
    set_target_properties(${target} PROPERTIES ${prop} "${previous} ${value}")
 | 
			
		||||
  endif()
 | 
			
		||||
 | 
			
		||||
endmacro(btl_add_target_property)
 | 
			
		||||
 | 
			
		||||
ENABLE_TESTING()
 | 
			
		||||
 | 
			
		||||
add_subdirectory(libs/eigen3)
 | 
			
		||||
add_subdirectory(libs/eigen2)
 | 
			
		||||
add_subdirectory(libs/tensors)
 | 
			
		||||
add_subdirectory(libs/BLAS)
 | 
			
		||||
add_subdirectory(libs/ublas)
 | 
			
		||||
add_subdirectory(libs/gmm)
 | 
			
		||||
add_subdirectory(libs/mtl4)
 | 
			
		||||
add_subdirectory(libs/blitz)
 | 
			
		||||
add_subdirectory(libs/tvmet)
 | 
			
		||||
add_subdirectory(libs/STL)
 | 
			
		||||
add_subdirectory(libs/blaze)
 | 
			
		||||
 | 
			
		||||
add_subdirectory(data)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										340
									
								
								cs440-acg/ext/eigen/bench/btl/COPYING
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										340
									
								
								cs440-acg/ext/eigen/bench/btl/COPYING
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,340 @@
 | 
			
		||||
                    GNU GENERAL PUBLIC LICENSE
 | 
			
		||||
                       Version 2, June 1991
 | 
			
		||||
 | 
			
		||||
 Copyright (C) 1989, 1991 Free Software Foundation, Inc.
 | 
			
		||||
                       59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
			
		||||
 Everyone is permitted to copy and distribute verbatim copies
 | 
			
		||||
 of this license document, but changing it is not allowed.
 | 
			
		||||
 | 
			
		||||
                            Preamble
 | 
			
		||||
 | 
			
		||||
  The licenses for most software are designed to take away your
 | 
			
		||||
freedom to share and change it.  By contrast, the GNU General Public
 | 
			
		||||
License is intended to guarantee your freedom to share and change free
 | 
			
		||||
software--to make sure the software is free for all its users.  This
 | 
			
		||||
General Public License applies to most of the Free Software
 | 
			
		||||
Foundation's software and to any other program whose authors commit to
 | 
			
		||||
using it.  (Some other Free Software Foundation software is covered by
 | 
			
		||||
the GNU Library General Public License instead.)  You can apply it to
 | 
			
		||||
your programs, too.
 | 
			
		||||
 | 
			
		||||
  When we speak of free software, we are referring to freedom, not
 | 
			
		||||
price.  Our General Public Licenses are designed to make sure that you
 | 
			
		||||
have the freedom to distribute copies of free software (and charge for
 | 
			
		||||
this service if you wish), that you receive source code or can get it
 | 
			
		||||
if you want it, that you can change the software or use pieces of it
 | 
			
		||||
in new free programs; and that you know you can do these things.
 | 
			
		||||
 | 
			
		||||
  To protect your rights, we need to make restrictions that forbid
 | 
			
		||||
anyone to deny you these rights or to ask you to surrender the rights.
 | 
			
		||||
These restrictions translate to certain responsibilities for you if you
 | 
			
		||||
distribute copies of the software, or if you modify it.
 | 
			
		||||
 | 
			
		||||
  For example, if you distribute copies of such a program, whether
 | 
			
		||||
gratis or for a fee, you must give the recipients all the rights that
 | 
			
		||||
you have.  You must make sure that they, too, receive or can get the
 | 
			
		||||
source code.  And you must show them these terms so they know their
 | 
			
		||||
rights.
 | 
			
		||||
 | 
			
		||||
  We protect your rights with two steps: (1) copyright the software, and
 | 
			
		||||
(2) offer you this license which gives you legal permission to copy,
 | 
			
		||||
distribute and/or modify the software.
 | 
			
		||||
 | 
			
		||||
  Also, for each author's protection and ours, we want to make certain
 | 
			
		||||
that everyone understands that there is no warranty for this free
 | 
			
		||||
software.  If the software is modified by someone else and passed on, we
 | 
			
		||||
want its recipients to know that what they have is not the original, so
 | 
			
		||||
that any problems introduced by others will not reflect on the original
 | 
			
		||||
authors' reputations.
 | 
			
		||||
 | 
			
		||||
  Finally, any free program is threatened constantly by software
 | 
			
		||||
patents.  We wish to avoid the danger that redistributors of a free
 | 
			
		||||
program will individually obtain patent licenses, in effect making the
 | 
			
		||||
program proprietary.  To prevent this, we have made it clear that any
 | 
			
		||||
patent must be licensed for everyone's free use or not licensed at all.
 | 
			
		||||
 | 
			
		||||
  The precise terms and conditions for copying, distribution and
 | 
			
		||||
modification follow.
 | 
			
		||||
 | 
			
		||||
                    GNU GENERAL PUBLIC LICENSE
 | 
			
		||||
   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 | 
			
		||||
 | 
			
		||||
  0. This License applies to any program or other work which contains
 | 
			
		||||
a notice placed by the copyright holder saying it may be distributed
 | 
			
		||||
under the terms of this General Public License.  The "Program", below,
 | 
			
		||||
refers to any such program or work, and a "work based on the Program"
 | 
			
		||||
means either the Program or any derivative work under copyright law:
 | 
			
		||||
that is to say, a work containing the Program or a portion of it,
 | 
			
		||||
either verbatim or with modifications and/or translated into another
 | 
			
		||||
language.  (Hereinafter, translation is included without limitation in
 | 
			
		||||
the term "modification".)  Each licensee is addressed as "you".
 | 
			
		||||
 | 
			
		||||
Activities other than copying, distribution and modification are not
 | 
			
		||||
covered by this License; they are outside its scope.  The act of
 | 
			
		||||
running the Program is not restricted, and the output from the Program
 | 
			
		||||
is covered only if its contents constitute a work based on the
 | 
			
		||||
Program (independent of having been made by running the Program).
 | 
			
		||||
Whether that is true depends on what the Program does.
 | 
			
		||||
 | 
			
		||||
  1. You may copy and distribute verbatim copies of the Program's
 | 
			
		||||
source code as you receive it, in any medium, provided that you
 | 
			
		||||
conspicuously and appropriately publish on each copy an appropriate
 | 
			
		||||
copyright notice and disclaimer of warranty; keep intact all the
 | 
			
		||||
notices that refer to this License and to the absence of any warranty;
 | 
			
		||||
and give any other recipients of the Program a copy of this License
 | 
			
		||||
along with the Program.
 | 
			
		||||
 | 
			
		||||
You may charge a fee for the physical act of transferring a copy, and
 | 
			
		||||
you may at your option offer warranty protection in exchange for a fee.
 | 
			
		||||
 | 
			
		||||
  2. You may modify your copy or copies of the Program or any portion
 | 
			
		||||
of it, thus forming a work based on the Program, and copy and
 | 
			
		||||
distribute such modifications or work under the terms of Section 1
 | 
			
		||||
above, provided that you also meet all of these conditions:
 | 
			
		||||
 | 
			
		||||
    a) You must cause the modified files to carry prominent notices
 | 
			
		||||
    stating that you changed the files and the date of any change.
 | 
			
		||||
 | 
			
		||||
    b) You must cause any work that you distribute or publish, that in
 | 
			
		||||
    whole or in part contains or is derived from the Program or any
 | 
			
		||||
    part thereof, to be licensed as a whole at no charge to all third
 | 
			
		||||
    parties under the terms of this License.
 | 
			
		||||
 | 
			
		||||
    c) If the modified program normally reads commands interactively
 | 
			
		||||
    when run, you must cause it, when started running for such
 | 
			
		||||
    interactive use in the most ordinary way, to print or display an
 | 
			
		||||
    announcement including an appropriate copyright notice and a
 | 
			
		||||
    notice that there is no warranty (or else, saying that you provide
 | 
			
		||||
    a warranty) and that users may redistribute the program under
 | 
			
		||||
    these conditions, and telling the user how to view a copy of this
 | 
			
		||||
    License.  (Exception: if the Program itself is interactive but
 | 
			
		||||
    does not normally print such an announcement, your work based on
 | 
			
		||||
    the Program is not required to print an announcement.)
 | 
			
		||||
 | 
			
		||||
These requirements apply to the modified work as a whole.  If
 | 
			
		||||
identifiable sections of that work are not derived from the Program,
 | 
			
		||||
and can be reasonably considered independent and separate works in
 | 
			
		||||
themselves, then this License, and its terms, do not apply to those
 | 
			
		||||
sections when you distribute them as separate works.  But when you
 | 
			
		||||
distribute the same sections as part of a whole which is a work based
 | 
			
		||||
on the Program, the distribution of the whole must be on the terms of
 | 
			
		||||
this License, whose permissions for other licensees extend to the
 | 
			
		||||
entire whole, and thus to each and every part regardless of who wrote it.
 | 
			
		||||
 | 
			
		||||
Thus, it is not the intent of this section to claim rights or contest
 | 
			
		||||
your rights to work written entirely by you; rather, the intent is to
 | 
			
		||||
exercise the right to control the distribution of derivative or
 | 
			
		||||
collective works based on the Program.
 | 
			
		||||
 | 
			
		||||
In addition, mere aggregation of another work not based on the Program
 | 
			
		||||
with the Program (or with a work based on the Program) on a volume of
 | 
			
		||||
a storage or distribution medium does not bring the other work under
 | 
			
		||||
the scope of this License.
 | 
			
		||||
 | 
			
		||||
  3. You may copy and distribute the Program (or a work based on it,
 | 
			
		||||
under Section 2) in object code or executable form under the terms of
 | 
			
		||||
Sections 1 and 2 above provided that you also do one of the following:
 | 
			
		||||
 | 
			
		||||
    a) Accompany it with the complete corresponding machine-readable
 | 
			
		||||
    source code, which must be distributed under the terms of Sections
 | 
			
		||||
    1 and 2 above on a medium customarily used for software interchange; or,
 | 
			
		||||
 | 
			
		||||
    b) Accompany it with a written offer, valid for at least three
 | 
			
		||||
    years, to give any third party, for a charge no more than your
 | 
			
		||||
    cost of physically performing source distribution, a complete
 | 
			
		||||
    machine-readable copy of the corresponding source code, to be
 | 
			
		||||
    distributed under the terms of Sections 1 and 2 above on a medium
 | 
			
		||||
    customarily used for software interchange; or,
 | 
			
		||||
 | 
			
		||||
    c) Accompany it with the information you received as to the offer
 | 
			
		||||
    to distribute corresponding source code.  (This alternative is
 | 
			
		||||
    allowed only for noncommercial distribution and only if you
 | 
			
		||||
    received the program in object code or executable form with such
 | 
			
		||||
    an offer, in accord with Subsection b above.)
 | 
			
		||||
 | 
			
		||||
The source code for a work means the preferred form of the work for
 | 
			
		||||
making modifications to it.  For an executable work, complete source
 | 
			
		||||
code means all the source code for all modules it contains, plus any
 | 
			
		||||
associated interface definition files, plus the scripts used to
 | 
			
		||||
control compilation and installation of the executable.  However, as a
 | 
			
		||||
special exception, the source code distributed need not include
 | 
			
		||||
anything that is normally distributed (in either source or binary
 | 
			
		||||
form) with the major components (compiler, kernel, and so on) of the
 | 
			
		||||
operating system on which the executable runs, unless that component
 | 
			
		||||
itself accompanies the executable.
 | 
			
		||||
 | 
			
		||||
If distribution of executable or object code is made by offering
 | 
			
		||||
access to copy from a designated place, then offering equivalent
 | 
			
		||||
access to copy the source code from the same place counts as
 | 
			
		||||
distribution of the source code, even though third parties are not
 | 
			
		||||
compelled to copy the source along with the object code.
 | 
			
		||||
 | 
			
		||||
  4. You may not copy, modify, sublicense, or distribute the Program
 | 
			
		||||
except as expressly provided under this License.  Any attempt
 | 
			
		||||
otherwise to copy, modify, sublicense or distribute the Program is
 | 
			
		||||
void, and will automatically terminate your rights under this License.
 | 
			
		||||
However, parties who have received copies, or rights, from you under
 | 
			
		||||
this License will not have their licenses terminated so long as such
 | 
			
		||||
parties remain in full compliance.
 | 
			
		||||
 | 
			
		||||
  5. You are not required to accept this License, since you have not
 | 
			
		||||
signed it.  However, nothing else grants you permission to modify or
 | 
			
		||||
distribute the Program or its derivative works.  These actions are
 | 
			
		||||
prohibited by law if you do not accept this License.  Therefore, by
 | 
			
		||||
modifying or distributing the Program (or any work based on the
 | 
			
		||||
Program), you indicate your acceptance of this License to do so, and
 | 
			
		||||
all its terms and conditions for copying, distributing or modifying
 | 
			
		||||
the Program or works based on it.
 | 
			
		||||
 | 
			
		||||
  6. Each time you redistribute the Program (or any work based on the
 | 
			
		||||
Program), the recipient automatically receives a license from the
 | 
			
		||||
original licensor to copy, distribute or modify the Program subject to
 | 
			
		||||
these terms and conditions.  You may not impose any further
 | 
			
		||||
restrictions on the recipients' exercise of the rights granted herein.
 | 
			
		||||
You are not responsible for enforcing compliance by third parties to
 | 
			
		||||
this License.
 | 
			
		||||
 | 
			
		||||
  7. If, as a consequence of a court judgment or allegation of patent
 | 
			
		||||
infringement or for any other reason (not limited to patent issues),
 | 
			
		||||
conditions are imposed on you (whether by court order, agreement or
 | 
			
		||||
otherwise) that contradict the conditions of this License, they do not
 | 
			
		||||
excuse you from the conditions of this License.  If you cannot
 | 
			
		||||
distribute so as to satisfy simultaneously your obligations under this
 | 
			
		||||
License and any other pertinent obligations, then as a consequence you
 | 
			
		||||
may not distribute the Program at all.  For example, if a patent
 | 
			
		||||
license would not permit royalty-free redistribution of the Program by
 | 
			
		||||
all those who receive copies directly or indirectly through you, then
 | 
			
		||||
the only way you could satisfy both it and this License would be to
 | 
			
		||||
refrain entirely from distribution of the Program.
 | 
			
		||||
 | 
			
		||||
If any portion of this section is held invalid or unenforceable under
 | 
			
		||||
any particular circumstance, the balance of the section is intended to
 | 
			
		||||
apply and the section as a whole is intended to apply in other
 | 
			
		||||
circumstances.
 | 
			
		||||
 | 
			
		||||
It is not the purpose of this section to induce you to infringe any
 | 
			
		||||
patents or other property right claims or to contest validity of any
 | 
			
		||||
such claims; this section has the sole purpose of protecting the
 | 
			
		||||
integrity of the free software distribution system, which is
 | 
			
		||||
implemented by public license practices.  Many people have made
 | 
			
		||||
generous contributions to the wide range of software distributed
 | 
			
		||||
through that system in reliance on consistent application of that
 | 
			
		||||
system; it is up to the author/donor to decide if he or she is willing
 | 
			
		||||
to distribute software through any other system and a licensee cannot
 | 
			
		||||
impose that choice.
 | 
			
		||||
 | 
			
		||||
This section is intended to make thoroughly clear what is believed to
 | 
			
		||||
be a consequence of the rest of this License.
 | 
			
		||||
 | 
			
		||||
  8. If the distribution and/or use of the Program is restricted in
 | 
			
		||||
certain countries either by patents or by copyrighted interfaces, the
 | 
			
		||||
original copyright holder who places the Program under this License
 | 
			
		||||
may add an explicit geographical distribution limitation excluding
 | 
			
		||||
those countries, so that distribution is permitted only in or among
 | 
			
		||||
countries not thus excluded.  In such case, this License incorporates
 | 
			
		||||
the limitation as if written in the body of this License.
 | 
			
		||||
 | 
			
		||||
  9. The Free Software Foundation may publish revised and/or new versions
 | 
			
		||||
of the General Public License from time to time.  Such new versions will
 | 
			
		||||
be similar in spirit to the present version, but may differ in detail to
 | 
			
		||||
address new problems or concerns.
 | 
			
		||||
 | 
			
		||||
Each version is given a distinguishing version number.  If the Program
 | 
			
		||||
specifies a version number of this License which applies to it and "any
 | 
			
		||||
later version", you have the option of following the terms and conditions
 | 
			
		||||
either of that version or of any later version published by the Free
 | 
			
		||||
Software Foundation.  If the Program does not specify a version number of
 | 
			
		||||
this License, you may choose any version ever published by the Free Software
 | 
			
		||||
Foundation.
 | 
			
		||||
 | 
			
		||||
  10. If you wish to incorporate parts of the Program into other free
 | 
			
		||||
programs whose distribution conditions are different, write to the author
 | 
			
		||||
to ask for permission.  For software which is copyrighted by the Free
 | 
			
		||||
Software Foundation, write to the Free Software Foundation; we sometimes
 | 
			
		||||
make exceptions for this.  Our decision will be guided by the two goals
 | 
			
		||||
of preserving the free status of all derivatives of our free software and
 | 
			
		||||
of promoting the sharing and reuse of software generally.
 | 
			
		||||
 | 
			
		||||
                            NO WARRANTY
 | 
			
		||||
 | 
			
		||||
  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
 | 
			
		||||
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
 | 
			
		||||
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
 | 
			
		||||
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
 | 
			
		||||
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 | 
			
		||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
 | 
			
		||||
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
 | 
			
		||||
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
 | 
			
		||||
REPAIR OR CORRECTION.
 | 
			
		||||
 | 
			
		||||
  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
 | 
			
		||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
 | 
			
		||||
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
 | 
			
		||||
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
 | 
			
		||||
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
 | 
			
		||||
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
 | 
			
		||||
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
 | 
			
		||||
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
 | 
			
		||||
POSSIBILITY OF SUCH DAMAGES.
 | 
			
		||||
 | 
			
		||||
                     END OF TERMS AND CONDITIONS
 | 
			
		||||
 | 
			
		||||
            How to Apply These Terms to Your New Programs
 | 
			
		||||
 | 
			
		||||
  If you develop a new program, and you want it to be of the greatest
 | 
			
		||||
possible use to the public, the best way to achieve this is to make it
 | 
			
		||||
free software which everyone can redistribute and change under these terms.
 | 
			
		||||
 | 
			
		||||
  To do so, attach the following notices to the program.  It is safest
 | 
			
		||||
to attach them to the start of each source file to most effectively
 | 
			
		||||
convey the exclusion of warranty; and each file should have at least
 | 
			
		||||
the "copyright" line and a pointer to where the full notice is found.
 | 
			
		||||
 | 
			
		||||
    <one line to give the program's name and a brief idea of what it does.>
 | 
			
		||||
    Copyright (C) <year>  <name of author>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License
 | 
			
		||||
    along with this program; if not, write to the Free Software
 | 
			
		||||
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Also add information on how to contact you by electronic and paper mail.
 | 
			
		||||
 | 
			
		||||
If the program is interactive, make it output a short notice like this
 | 
			
		||||
when it starts in an interactive mode:
 | 
			
		||||
 | 
			
		||||
    Gnomovision version 69, Copyright (C) year name of author
 | 
			
		||||
    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
 | 
			
		||||
    This is free software, and you are welcome to redistribute it
 | 
			
		||||
    under certain conditions; type `show c' for details.
 | 
			
		||||
 | 
			
		||||
The hypothetical commands `show w' and `show c' should show the appropriate
 | 
			
		||||
parts of the General Public License.  Of course, the commands you use may
 | 
			
		||||
be called something other than `show w' and `show c'; they could even be
 | 
			
		||||
mouse-clicks or menu items--whatever suits your program.
 | 
			
		||||
 | 
			
		||||
You should also get your employer (if you work as a programmer) or your
 | 
			
		||||
school, if any, to sign a "copyright disclaimer" for the program, if
 | 
			
		||||
necessary.  Here is a sample; alter the names:
 | 
			
		||||
 | 
			
		||||
  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
 | 
			
		||||
  `Gnomovision' (which makes passes at compilers) written by James Hacker.
 | 
			
		||||
 | 
			
		||||
  <signature of Ty Coon>, 1 April 1989
 | 
			
		||||
  Ty Coon, President of Vice
 | 
			
		||||
 | 
			
		||||
This General Public License does not permit incorporating your program into
 | 
			
		||||
proprietary programs.  If your program is a subroutine library, you may
 | 
			
		||||
consider it more useful to permit linking proprietary applications with the
 | 
			
		||||
library.  If this is what you want to do, use the GNU Library General
 | 
			
		||||
Public License instead of this License.
 | 
			
		||||
							
								
								
									
										154
									
								
								cs440-acg/ext/eigen/bench/btl/README
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										154
									
								
								cs440-acg/ext/eigen/bench/btl/README
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,154 @@
 | 
			
		||||
Bench Template Library
 | 
			
		||||
 | 
			
		||||
****************************************
 | 
			
		||||
Introduction :
 | 
			
		||||
 | 
			
		||||
The aim of this project is to compare the performance
 | 
			
		||||
of available numerical libraries. The code is designed
 | 
			
		||||
as generic and modular as possible. Thus, adding new
 | 
			
		||||
numerical libraries or new numerical tests should
 | 
			
		||||
require minimal effort.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
*****************************************
 | 
			
		||||
 | 
			
		||||
Installation :
 | 
			
		||||
 | 
			
		||||
BTL uses cmake / ctest:
 | 
			
		||||
 | 
			
		||||
1 - create a build directory:
 | 
			
		||||
 | 
			
		||||
  $ mkdir build
 | 
			
		||||
  $ cd build
 | 
			
		||||
 | 
			
		||||
2 - configure:
 | 
			
		||||
 | 
			
		||||
  $ ccmake ..
 | 
			
		||||
 | 
			
		||||
3 - run the bench using ctest:
 | 
			
		||||
 | 
			
		||||
  $ ctest -V
 | 
			
		||||
 | 
			
		||||
You can run the benchmarks only on libraries matching a given regular expression:
 | 
			
		||||
  ctest -V -R <regexp>
 | 
			
		||||
For instance:
 | 
			
		||||
  ctest -V -R eigen2
 | 
			
		||||
 | 
			
		||||
You can also select a given set of actions defining the environment variable BTL_CONFIG this way:
 | 
			
		||||
  BTL_CONFIG="-a action1{:action2}*" ctest -V
 | 
			
		||||
An exemple:
 | 
			
		||||
  BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata" ctest -V -R eigen2
 | 
			
		||||
 | 
			
		||||
Finally, if bench results already exist (the bench*.dat files) then they merges by keeping the best for each matrix size. If you want to overwrite the previous ones you can simply add the "--overwrite" option:
 | 
			
		||||
  BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata --overwrite" ctest -V -R eigen2
 | 
			
		||||
 | 
			
		||||
4 : Analyze the result. different data files (.dat) are produced in each libs directories.
 | 
			
		||||
 If gnuplot is available, choose a directory name in the data directory to store the results and type:
 | 
			
		||||
        $ cd data
 | 
			
		||||
        $ mkdir my_directory
 | 
			
		||||
        $ cp ../libs/*/*.dat my_directory
 | 
			
		||||
 Build the data utilities in this (data) directory
 | 
			
		||||
        make
 | 
			
		||||
 Then you can look the raw data,
 | 
			
		||||
        go_mean my_directory
 | 
			
		||||
 or smooth the data first :
 | 
			
		||||
	smooth_all.sh my_directory
 | 
			
		||||
	go_mean my_directory_smooth
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
*************************************************
 | 
			
		||||
 | 
			
		||||
Files and directories :
 | 
			
		||||
 | 
			
		||||
 generic_bench : all the bench sources common to all libraries
 | 
			
		||||
 | 
			
		||||
 actions : sources for different action wrappers (axpy, matrix-matrix product) to be tested.
 | 
			
		||||
 | 
			
		||||
 libs/* : bench sources specific to each tested libraries.
 | 
			
		||||
 | 
			
		||||
 machine_dep : directory used to store machine specific Makefile.in
 | 
			
		||||
 | 
			
		||||
 data : directory used to store gnuplot scripts and data analysis utilities
 | 
			
		||||
 | 
			
		||||
**************************************************
 | 
			
		||||
 | 
			
		||||
Principles : the code modularity is achieved by defining two concepts :
 | 
			
		||||
 | 
			
		||||
 ****** Action concept : This is a class defining which kind
 | 
			
		||||
  of test must be performed (e.g. a matrix_vector_product).
 | 
			
		||||
	An Action should define the following methods :
 | 
			
		||||
 | 
			
		||||
        *** Ctor using the size of the problem (matrix or vector size) as an argument
 | 
			
		||||
	    Action action(size);
 | 
			
		||||
        *** initialize : this method initialize the calculation (e.g. initialize the matrices and vectors arguments)
 | 
			
		||||
	    action.initialize();
 | 
			
		||||
	*** calculate : this method actually launch the calculation to be benchmarked
 | 
			
		||||
	    action.calculate;
 | 
			
		||||
	*** nb_op_base() : this method returns the complexity of the calculate method (allowing the mflops evaluation)
 | 
			
		||||
        *** name() : this method returns the name of the action (std::string)
 | 
			
		||||
 | 
			
		||||
 ****** Interface concept : This is a class or namespace defining how to use a given library and
 | 
			
		||||
  its specific containers (matrix and vector). Up to now an interface should following types
 | 
			
		||||
 | 
			
		||||
	*** real_type : kind of float to be used (float or double)
 | 
			
		||||
	*** stl_vector : must correspond to std::vector<real_type>
 | 
			
		||||
	*** stl_matrix : must correspond to std::vector<stl_vector>
 | 
			
		||||
	*** gene_vector : the vector type for this interface        --> e.g. (real_type *) for the C_interface
 | 
			
		||||
	*** gene_matrix : the matrix type for this interface        --> e.g. (gene_vector *) for the C_interface
 | 
			
		||||
 | 
			
		||||
	+ the following common methods
 | 
			
		||||
 | 
			
		||||
        *** free_matrix(gene_matrix & A, int N)  dealocation of a N sized gene_matrix A
 | 
			
		||||
        *** free_vector(gene_vector & B)  dealocation of a N sized gene_vector B
 | 
			
		||||
        *** matrix_from_stl(gene_matrix & A, stl_matrix & A_stl) copy the content of an stl_matrix A_stl into a gene_matrix A.
 | 
			
		||||
	     The allocation of A is done in this function.
 | 
			
		||||
	*** vector_to_stl(gene_vector & B, stl_vector & B_stl)  copy the content of an stl_vector B_stl into a gene_vector B.
 | 
			
		||||
	     The allocation of B is done in this function.
 | 
			
		||||
        *** matrix_to_stl(gene_matrix & A, stl_matrix & A_stl) copy the content of an gene_matrix A into an stl_matrix A_stl.
 | 
			
		||||
             The size of A_STL must corresponds to the size of A.
 | 
			
		||||
        *** vector_to_stl(gene_vector & A, stl_vector & A_stl) copy the content of an gene_vector A into an stl_vector A_stl.
 | 
			
		||||
             The size of B_STL must corresponds to the size of B.
 | 
			
		||||
	*** copy_matrix(gene_matrix & source, gene_matrix & cible, int N) : copy the content of source in cible. Both source
 | 
			
		||||
		and cible must be sized NxN.
 | 
			
		||||
	*** copy_vector(gene_vector & source, gene_vector & cible, int N) : copy the content of source in cible. Both source
 | 
			
		||||
 		and cible must be sized N.
 | 
			
		||||
 | 
			
		||||
	and the following method corresponding to the action one wants to be benchmarked :
 | 
			
		||||
 | 
			
		||||
	***  matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
 | 
			
		||||
	***  matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N)
 | 
			
		||||
        ***  ata_product(const gene_matrix & A, gene_matrix & X, int N)
 | 
			
		||||
	***  aat_product(const gene_matrix & A, gene_matrix & X, int N)
 | 
			
		||||
        ***  axpy(real coef, const gene_vector & X, gene_vector & Y, int N)
 | 
			
		||||
 | 
			
		||||
 The bench algorithm (generic_bench/bench.hh) is templated with an action itself templated with
 | 
			
		||||
 an interface. A typical main.cpp source stored in a given library directory libs/A_LIB
 | 
			
		||||
 looks like :
 | 
			
		||||
 | 
			
		||||
 bench< AN_ACTION < AN_INTERFACE > >( 10 , 1000 , 50 ) ;
 | 
			
		||||
 | 
			
		||||
 this function will produce XY data file containing measured  mflops as a function of the size for 50
 | 
			
		||||
 sizes between 10 and 10000.
 | 
			
		||||
 | 
			
		||||
 This algorithm can be adapted by providing a given Perf_Analyzer object which determines how the time
 | 
			
		||||
 measurements must be done. For example, the X86_Perf_Analyzer use the asm rdtsc function and provides
 | 
			
		||||
 a very fast and accurate (but less portable) timing method. The default is the Portable_Perf_Analyzer
 | 
			
		||||
 so
 | 
			
		||||
 | 
			
		||||
 bench< AN_ACTION < AN_INTERFACE > >( 10 , 1000 , 50 ) ;
 | 
			
		||||
 | 
			
		||||
 is equivalent to
 | 
			
		||||
 | 
			
		||||
 bench< Portable_Perf_Analyzer,AN_ACTION < AN_INTERFACE > >( 10 , 1000 , 50 ) ;
 | 
			
		||||
 | 
			
		||||
 If your system supports it we suggest to use a mixed implementation (X86_perf_Analyzer+Portable_Perf_Analyzer).
 | 
			
		||||
 replace
 | 
			
		||||
     bench<Portable_Perf_Analyzer,Action>(size_min,size_max,nb_point);
 | 
			
		||||
 with
 | 
			
		||||
     bench<Mixed_Perf_Analyzer,Action>(size_min,size_max,nb_point);
 | 
			
		||||
 in generic/bench.hh
 | 
			
		||||
 | 
			
		||||
.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										145
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_aat_product.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										145
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_aat_product.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,145 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  action_aat_product.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef ACTION_AAT_PRODUCT
 | 
			
		||||
#define ACTION_AAT_PRODUCT
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "STL_interface.hh"
 | 
			
		||||
#include <string>
 | 
			
		||||
#include "init/init_function.hh"
 | 
			
		||||
#include "init/init_vector.hh"
 | 
			
		||||
#include "init/init_matrix.hh"
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
template<class Interface>
 | 
			
		||||
class Action_aat_product {
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  // Ctor
 | 
			
		||||
 | 
			
		||||
  Action_aat_product( int size ):_size(size)
 | 
			
		||||
  {
 | 
			
		||||
    MESSAGE("Action_aat_product Ctor");
 | 
			
		||||
 | 
			
		||||
    // STL matrix and vector initialization
 | 
			
		||||
 | 
			
		||||
    init_matrix<pseudo_random>(A_stl,_size);
 | 
			
		||||
    init_matrix<null_function>(X_stl,_size);
 | 
			
		||||
    init_matrix<null_function>(resu_stl,_size);
 | 
			
		||||
 | 
			
		||||
    // generic matrix and vector initialization
 | 
			
		||||
 | 
			
		||||
    Interface::matrix_from_stl(A_ref,A_stl);
 | 
			
		||||
    Interface::matrix_from_stl(X_ref,X_stl);
 | 
			
		||||
 | 
			
		||||
    Interface::matrix_from_stl(A,A_stl);
 | 
			
		||||
    Interface::matrix_from_stl(X,X_stl);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // invalidate copy ctor
 | 
			
		||||
 | 
			
		||||
  Action_aat_product( const  Action_aat_product & )
 | 
			
		||||
  {
 | 
			
		||||
    INFOS("illegal call to Action_aat_product Copy Ctor");
 | 
			
		||||
    exit(0);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Dtor
 | 
			
		||||
 | 
			
		||||
  ~Action_aat_product( void ){
 | 
			
		||||
 | 
			
		||||
    MESSAGE("Action_aat_product Dtor");
 | 
			
		||||
 | 
			
		||||
    // deallocation
 | 
			
		||||
 | 
			
		||||
    Interface::free_matrix(A,_size);
 | 
			
		||||
    Interface::free_matrix(X,_size);
 | 
			
		||||
 | 
			
		||||
    Interface::free_matrix(A_ref,_size);
 | 
			
		||||
    Interface::free_matrix(X_ref,_size);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // action name
 | 
			
		||||
 | 
			
		||||
  static inline std::string name( void )
 | 
			
		||||
  {
 | 
			
		||||
    return "aat_"+Interface::name();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double nb_op_base( void ){
 | 
			
		||||
    return double(_size)*double(_size)*double(_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void initialize( void ){
 | 
			
		||||
 | 
			
		||||
    Interface::copy_matrix(A_ref,A,_size);
 | 
			
		||||
    Interface::copy_matrix(X_ref,X,_size);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void calculate( void ) {
 | 
			
		||||
 | 
			
		||||
      Interface::aat_product(A,X,_size);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void check_result( void ){
 | 
			
		||||
    if (_size>128) return;
 | 
			
		||||
    // calculation check
 | 
			
		||||
 | 
			
		||||
    Interface::matrix_to_stl(X,resu_stl);
 | 
			
		||||
 | 
			
		||||
    STL_interface<typename Interface::real_type>::aat_product(A_stl,X_stl,_size);
 | 
			
		||||
 | 
			
		||||
    typename Interface::real_type error=
 | 
			
		||||
      STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
 | 
			
		||||
 | 
			
		||||
    if (error>1.e-6){
 | 
			
		||||
      INFOS("WRONG CALCULATION...residual=" << error);
 | 
			
		||||
      exit(1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
private :
 | 
			
		||||
 | 
			
		||||
  typename Interface::stl_matrix A_stl;
 | 
			
		||||
  typename Interface::stl_matrix X_stl;
 | 
			
		||||
  typename Interface::stl_matrix resu_stl;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix A_ref;
 | 
			
		||||
  typename Interface::gene_matrix X_ref;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix A;
 | 
			
		||||
  typename Interface::gene_matrix X;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  int _size;
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										145
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_ata_product.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										145
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_ata_product.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,145 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  action_ata_product.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef ACTION_ATA_PRODUCT
 | 
			
		||||
#define ACTION_ATA_PRODUCT
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "STL_interface.hh"
 | 
			
		||||
#include <string>
 | 
			
		||||
#include "init/init_function.hh"
 | 
			
		||||
#include "init/init_vector.hh"
 | 
			
		||||
#include "init/init_matrix.hh"
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
template<class Interface>
 | 
			
		||||
class Action_ata_product {
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  // Ctor
 | 
			
		||||
 | 
			
		||||
  Action_ata_product( int size ):_size(size)
 | 
			
		||||
  {
 | 
			
		||||
    MESSAGE("Action_ata_product Ctor");
 | 
			
		||||
 | 
			
		||||
    // STL matrix and vector initialization
 | 
			
		||||
 | 
			
		||||
    init_matrix<pseudo_random>(A_stl,_size);
 | 
			
		||||
    init_matrix<null_function>(X_stl,_size);
 | 
			
		||||
    init_matrix<null_function>(resu_stl,_size);
 | 
			
		||||
 | 
			
		||||
    // generic matrix and vector initialization
 | 
			
		||||
 | 
			
		||||
    Interface::matrix_from_stl(A_ref,A_stl);
 | 
			
		||||
    Interface::matrix_from_stl(X_ref,X_stl);
 | 
			
		||||
 | 
			
		||||
    Interface::matrix_from_stl(A,A_stl);
 | 
			
		||||
    Interface::matrix_from_stl(X,X_stl);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // invalidate copy ctor
 | 
			
		||||
 | 
			
		||||
  Action_ata_product( const  Action_ata_product & )
 | 
			
		||||
  {
 | 
			
		||||
    INFOS("illegal call to Action_ata_product Copy Ctor");
 | 
			
		||||
    exit(0);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Dtor
 | 
			
		||||
 | 
			
		||||
  ~Action_ata_product( void ){
 | 
			
		||||
 | 
			
		||||
    MESSAGE("Action_ata_product Dtor");
 | 
			
		||||
 | 
			
		||||
    // deallocation
 | 
			
		||||
 | 
			
		||||
    Interface::free_matrix(A,_size);
 | 
			
		||||
    Interface::free_matrix(X,_size);
 | 
			
		||||
 | 
			
		||||
    Interface::free_matrix(A_ref,_size);
 | 
			
		||||
    Interface::free_matrix(X_ref,_size);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // action name
 | 
			
		||||
 | 
			
		||||
  static inline std::string name( void )
 | 
			
		||||
  {
 | 
			
		||||
    return "ata_"+Interface::name();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double nb_op_base( void ){
 | 
			
		||||
    return 2.0*_size*_size*_size;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void initialize( void ){
 | 
			
		||||
 | 
			
		||||
    Interface::copy_matrix(A_ref,A,_size);
 | 
			
		||||
    Interface::copy_matrix(X_ref,X,_size);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void calculate( void ) {
 | 
			
		||||
 | 
			
		||||
      Interface::ata_product(A,X,_size);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void check_result( void ){
 | 
			
		||||
    if (_size>128) return;
 | 
			
		||||
    // calculation check
 | 
			
		||||
 | 
			
		||||
    Interface::matrix_to_stl(X,resu_stl);
 | 
			
		||||
 | 
			
		||||
    STL_interface<typename Interface::real_type>::ata_product(A_stl,X_stl,_size);
 | 
			
		||||
 | 
			
		||||
    typename Interface::real_type error=
 | 
			
		||||
      STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
 | 
			
		||||
 | 
			
		||||
    if (error>1.e-6){
 | 
			
		||||
      INFOS("WRONG CALCULATION...residual=" << error);
 | 
			
		||||
      exit(1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
private :
 | 
			
		||||
 | 
			
		||||
  typename Interface::stl_matrix A_stl;
 | 
			
		||||
  typename Interface::stl_matrix X_stl;
 | 
			
		||||
  typename Interface::stl_matrix resu_stl;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix A_ref;
 | 
			
		||||
  typename Interface::gene_matrix X_ref;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix A;
 | 
			
		||||
  typename Interface::gene_matrix X;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  int _size;
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										134
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_atv_product.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										134
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_atv_product.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,134 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  action_atv_product.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef ACTION_ATV_PRODUCT
 | 
			
		||||
#define ACTION_ATV_PRODUCT
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "STL_interface.hh"
 | 
			
		||||
#include <string>
 | 
			
		||||
#include "init/init_function.hh"
 | 
			
		||||
#include "init/init_vector.hh"
 | 
			
		||||
#include "init/init_matrix.hh"
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
template<class Interface>
 | 
			
		||||
class Action_atv_product {
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  Action_atv_product( int size ) : _size(size)
 | 
			
		||||
  {
 | 
			
		||||
    MESSAGE("Action_atv_product Ctor");
 | 
			
		||||
 | 
			
		||||
    // STL matrix and vector initialization
 | 
			
		||||
 | 
			
		||||
    init_matrix<pseudo_random>(A_stl,_size);
 | 
			
		||||
    init_vector<pseudo_random>(B_stl,_size);
 | 
			
		||||
    init_vector<null_function>(X_stl,_size);
 | 
			
		||||
    init_vector<null_function>(resu_stl,_size);
 | 
			
		||||
 | 
			
		||||
    // generic matrix and vector initialization
 | 
			
		||||
 | 
			
		||||
    Interface::matrix_from_stl(A_ref,A_stl);
 | 
			
		||||
    Interface::vector_from_stl(B_ref,B_stl);
 | 
			
		||||
    Interface::vector_from_stl(X_ref,X_stl);
 | 
			
		||||
 | 
			
		||||
    Interface::matrix_from_stl(A,A_stl);
 | 
			
		||||
    Interface::vector_from_stl(B,B_stl);
 | 
			
		||||
    Interface::vector_from_stl(X,X_stl);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // invalidate copy ctor
 | 
			
		||||
  Action_atv_product( const  Action_atv_product & )
 | 
			
		||||
  {
 | 
			
		||||
    INFOS("illegal call to Action_atv_product Copy Ctor");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ~Action_atv_product( void )
 | 
			
		||||
  {
 | 
			
		||||
    MESSAGE("Action_atv_product Dtor");
 | 
			
		||||
 | 
			
		||||
    Interface::free_matrix(A,_size);
 | 
			
		||||
    Interface::free_vector(B);
 | 
			
		||||
    Interface::free_vector(X);
 | 
			
		||||
 | 
			
		||||
    Interface::free_matrix(A_ref,_size);
 | 
			
		||||
    Interface::free_vector(B_ref);
 | 
			
		||||
    Interface::free_vector(X_ref);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline std::string name() { return "atv_" + Interface::name(); }
 | 
			
		||||
 | 
			
		||||
  double nb_op_base( void ) { return 2.0*_size*_size; }
 | 
			
		||||
 | 
			
		||||
  inline void initialize( void ){
 | 
			
		||||
    Interface::copy_matrix(A_ref,A,_size);
 | 
			
		||||
    Interface::copy_vector(B_ref,B,_size);
 | 
			
		||||
    Interface::copy_vector(X_ref,X,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  BTL_DONT_INLINE void calculate( void ) {
 | 
			
		||||
    BTL_ASM_COMMENT("begin atv");
 | 
			
		||||
    Interface::atv_product(A,B,X,_size);
 | 
			
		||||
    BTL_ASM_COMMENT("end atv");
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void check_result( void )
 | 
			
		||||
  {
 | 
			
		||||
    if (_size>128) return;
 | 
			
		||||
    Interface::vector_to_stl(X,resu_stl);
 | 
			
		||||
 | 
			
		||||
    STL_interface<typename Interface::real_type>::atv_product(A_stl,B_stl,X_stl,_size);
 | 
			
		||||
 | 
			
		||||
    typename Interface::real_type error=
 | 
			
		||||
      STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
 | 
			
		||||
 | 
			
		||||
    if (error>1.e-6){
 | 
			
		||||
      INFOS("WRONG CALCULATION...residual=" << error);
 | 
			
		||||
      exit(1);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
private :
 | 
			
		||||
 | 
			
		||||
  typename Interface::stl_matrix A_stl;
 | 
			
		||||
  typename Interface::stl_vector B_stl;
 | 
			
		||||
  typename Interface::stl_vector X_stl;
 | 
			
		||||
  typename Interface::stl_vector resu_stl;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix A_ref;
 | 
			
		||||
  typename Interface::gene_vector B_ref;
 | 
			
		||||
  typename Interface::gene_vector X_ref;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix A;
 | 
			
		||||
  typename Interface::gene_vector B;
 | 
			
		||||
  typename Interface::gene_vector X;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  int _size;
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										127
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_axpby.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										127
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_axpby.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,127 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  action_axpby.hh
 | 
			
		||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef ACTION_AXPBY
 | 
			
		||||
#define ACTION_AXPBY
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "STL_interface.hh"
 | 
			
		||||
#include <string>
 | 
			
		||||
#include "init/init_function.hh"
 | 
			
		||||
#include "init/init_vector.hh"
 | 
			
		||||
#include "init/init_matrix.hh"
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
template<class Interface>
 | 
			
		||||
class Action_axpby {
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  // Ctor
 | 
			
		||||
  Action_axpby( int size ):_alpha(0.5),_beta(0.95),_size(size)
 | 
			
		||||
  {
 | 
			
		||||
    MESSAGE("Action_axpby Ctor");
 | 
			
		||||
 | 
			
		||||
    // STL vector initialization
 | 
			
		||||
    init_vector<pseudo_random>(X_stl,_size);
 | 
			
		||||
    init_vector<pseudo_random>(Y_stl,_size);
 | 
			
		||||
    init_vector<null_function>(resu_stl,_size);
 | 
			
		||||
 | 
			
		||||
    // generic matrix and vector initialization
 | 
			
		||||
    Interface::vector_from_stl(X_ref,X_stl);
 | 
			
		||||
    Interface::vector_from_stl(Y_ref,Y_stl);
 | 
			
		||||
 | 
			
		||||
    Interface::vector_from_stl(X,X_stl);
 | 
			
		||||
    Interface::vector_from_stl(Y,Y_stl);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // invalidate copy ctor
 | 
			
		||||
  Action_axpby( const  Action_axpby & )
 | 
			
		||||
  {
 | 
			
		||||
    INFOS("illegal call to Action_axpby Copy Ctor");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Dtor
 | 
			
		||||
  ~Action_axpby( void ){
 | 
			
		||||
    MESSAGE("Action_axpby Dtor");
 | 
			
		||||
 | 
			
		||||
    // deallocation
 | 
			
		||||
    Interface::free_vector(X_ref);
 | 
			
		||||
    Interface::free_vector(Y_ref);
 | 
			
		||||
 | 
			
		||||
    Interface::free_vector(X);
 | 
			
		||||
    Interface::free_vector(Y);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // action name
 | 
			
		||||
  static inline std::string name( void )
 | 
			
		||||
  {
 | 
			
		||||
    return "axpby_"+Interface::name();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double nb_op_base( void ){
 | 
			
		||||
    return 3.0*_size;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void initialize( void ){
 | 
			
		||||
    Interface::copy_vector(X_ref,X,_size);
 | 
			
		||||
    Interface::copy_vector(Y_ref,Y,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void calculate( void ) {
 | 
			
		||||
    BTL_ASM_COMMENT("mybegin axpby");
 | 
			
		||||
    Interface::axpby(_alpha,X,_beta,Y,_size);
 | 
			
		||||
    BTL_ASM_COMMENT("myend axpby");
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void check_result( void ){
 | 
			
		||||
    if (_size>128) return;
 | 
			
		||||
    // calculation check
 | 
			
		||||
    Interface::vector_to_stl(Y,resu_stl);
 | 
			
		||||
 | 
			
		||||
    STL_interface<typename Interface::real_type>::axpby(_alpha,X_stl,_beta,Y_stl,_size);
 | 
			
		||||
 | 
			
		||||
    typename Interface::real_type error=
 | 
			
		||||
      STL_interface<typename Interface::real_type>::norm_diff(Y_stl,resu_stl);
 | 
			
		||||
 | 
			
		||||
    if (error>1.e-6){
 | 
			
		||||
      INFOS("WRONG CALCULATION...residual=" << error);
 | 
			
		||||
      exit(2);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
private :
 | 
			
		||||
 | 
			
		||||
  typename Interface::stl_vector X_stl;
 | 
			
		||||
  typename Interface::stl_vector Y_stl;
 | 
			
		||||
  typename Interface::stl_vector resu_stl;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_vector X_ref;
 | 
			
		||||
  typename Interface::gene_vector Y_ref;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_vector X;
 | 
			
		||||
  typename Interface::gene_vector Y;
 | 
			
		||||
 | 
			
		||||
  typename Interface::real_type _alpha;
 | 
			
		||||
  typename Interface::real_type _beta;
 | 
			
		||||
 | 
			
		||||
  int _size;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										139
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_axpy.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										139
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_axpy.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,139 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  action_axpy.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef ACTION_AXPY
 | 
			
		||||
#define ACTION_AXPY
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "STL_interface.hh"
 | 
			
		||||
#include <string>
 | 
			
		||||
#include "init/init_function.hh"
 | 
			
		||||
#include "init/init_vector.hh"
 | 
			
		||||
#include "init/init_matrix.hh"
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
template<class Interface>
 | 
			
		||||
class Action_axpy {
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  // Ctor
 | 
			
		||||
 | 
			
		||||
  Action_axpy( int size ):_coef(1.0),_size(size)
 | 
			
		||||
  {
 | 
			
		||||
    MESSAGE("Action_axpy Ctor");
 | 
			
		||||
 | 
			
		||||
    // STL vector initialization
 | 
			
		||||
 | 
			
		||||
    init_vector<pseudo_random>(X_stl,_size);
 | 
			
		||||
    init_vector<pseudo_random>(Y_stl,_size);
 | 
			
		||||
    init_vector<null_function>(resu_stl,_size);
 | 
			
		||||
 | 
			
		||||
    // generic matrix and vector initialization
 | 
			
		||||
 | 
			
		||||
    Interface::vector_from_stl(X_ref,X_stl);
 | 
			
		||||
    Interface::vector_from_stl(Y_ref,Y_stl);
 | 
			
		||||
 | 
			
		||||
    Interface::vector_from_stl(X,X_stl);
 | 
			
		||||
    Interface::vector_from_stl(Y,Y_stl);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // invalidate copy ctor
 | 
			
		||||
 | 
			
		||||
  Action_axpy( const  Action_axpy & )
 | 
			
		||||
  {
 | 
			
		||||
    INFOS("illegal call to Action_axpy Copy Ctor");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Dtor
 | 
			
		||||
 | 
			
		||||
  ~Action_axpy( void ){
 | 
			
		||||
 | 
			
		||||
    MESSAGE("Action_axpy Dtor");
 | 
			
		||||
 | 
			
		||||
    // deallocation
 | 
			
		||||
 | 
			
		||||
    Interface::free_vector(X_ref);
 | 
			
		||||
    Interface::free_vector(Y_ref);
 | 
			
		||||
 | 
			
		||||
    Interface::free_vector(X);
 | 
			
		||||
    Interface::free_vector(Y);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // action name
 | 
			
		||||
 | 
			
		||||
  static inline std::string name( void )
 | 
			
		||||
  {
 | 
			
		||||
    return "axpy_"+Interface::name();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double nb_op_base( void ){
 | 
			
		||||
    return 2.0*_size;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void initialize( void ){
 | 
			
		||||
    Interface::copy_vector(X_ref,X,_size);
 | 
			
		||||
    Interface::copy_vector(Y_ref,Y,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void calculate( void ) {
 | 
			
		||||
    BTL_ASM_COMMENT("mybegin axpy");
 | 
			
		||||
    Interface::axpy(_coef,X,Y,_size);
 | 
			
		||||
    BTL_ASM_COMMENT("myend axpy");
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void check_result( void ){
 | 
			
		||||
    if (_size>128) return;
 | 
			
		||||
    // calculation check
 | 
			
		||||
 | 
			
		||||
    Interface::vector_to_stl(Y,resu_stl);
 | 
			
		||||
 | 
			
		||||
    STL_interface<typename Interface::real_type>::axpy(_coef,X_stl,Y_stl,_size);
 | 
			
		||||
 | 
			
		||||
    typename Interface::real_type error=
 | 
			
		||||
      STL_interface<typename Interface::real_type>::norm_diff(Y_stl,resu_stl);
 | 
			
		||||
 | 
			
		||||
    if (error>1.e-6){
 | 
			
		||||
      INFOS("WRONG CALCULATION...residual=" << error);
 | 
			
		||||
      exit(0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
private :
 | 
			
		||||
 | 
			
		||||
  typename Interface::stl_vector X_stl;
 | 
			
		||||
  typename Interface::stl_vector Y_stl;
 | 
			
		||||
  typename Interface::stl_vector resu_stl;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_vector X_ref;
 | 
			
		||||
  typename Interface::gene_vector Y_ref;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_vector X;
 | 
			
		||||
  typename Interface::gene_vector Y;
 | 
			
		||||
 | 
			
		||||
  typename Interface::real_type _coef;
 | 
			
		||||
 | 
			
		||||
  int _size;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										128
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_cholesky.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										128
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_cholesky.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,128 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  action_cholesky.hh
 | 
			
		||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef ACTION_CHOLESKY
 | 
			
		||||
#define ACTION_CHOLESKY
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "STL_interface.hh"
 | 
			
		||||
#include <string>
 | 
			
		||||
#include "init/init_function.hh"
 | 
			
		||||
#include "init/init_vector.hh"
 | 
			
		||||
#include "init/init_matrix.hh"
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
template<class Interface>
 | 
			
		||||
class Action_cholesky {
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  // Ctor
 | 
			
		||||
 | 
			
		||||
  Action_cholesky( int size ):_size(size)
 | 
			
		||||
  {
 | 
			
		||||
    MESSAGE("Action_cholesky Ctor");
 | 
			
		||||
 | 
			
		||||
    // STL mat/vec initialization
 | 
			
		||||
    init_matrix_symm<pseudo_random>(X_stl,_size);
 | 
			
		||||
    init_matrix<null_function>(C_stl,_size);
 | 
			
		||||
 | 
			
		||||
    // make sure X is invertible
 | 
			
		||||
    for (int i=0; i<_size; ++i)
 | 
			
		||||
      X_stl[i][i] = std::abs(X_stl[i][i]) * 1e2 + 100;
 | 
			
		||||
 | 
			
		||||
    // generic matrix and vector initialization
 | 
			
		||||
    Interface::matrix_from_stl(X_ref,X_stl);
 | 
			
		||||
    Interface::matrix_from_stl(X,X_stl);
 | 
			
		||||
    Interface::matrix_from_stl(C,C_stl);
 | 
			
		||||
 | 
			
		||||
    _cost = 0;
 | 
			
		||||
    for (int j=0; j<_size; ++j)
 | 
			
		||||
    {
 | 
			
		||||
      double r = std::max(_size - j -1,0);
 | 
			
		||||
      _cost += 2*(r*j+r+j);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // invalidate copy ctor
 | 
			
		||||
 | 
			
		||||
  Action_cholesky( const  Action_cholesky & )
 | 
			
		||||
  {
 | 
			
		||||
    INFOS("illegal call to Action_cholesky Copy Ctor");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Dtor
 | 
			
		||||
 | 
			
		||||
  ~Action_cholesky( void ){
 | 
			
		||||
 | 
			
		||||
    MESSAGE("Action_cholesky Dtor");
 | 
			
		||||
 | 
			
		||||
    // deallocation
 | 
			
		||||
    Interface::free_matrix(X_ref,_size);
 | 
			
		||||
    Interface::free_matrix(X,_size);
 | 
			
		||||
    Interface::free_matrix(C,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // action name
 | 
			
		||||
 | 
			
		||||
  static inline std::string name( void )
 | 
			
		||||
  {
 | 
			
		||||
    return "cholesky_"+Interface::name();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double nb_op_base( void ){
 | 
			
		||||
    return _cost;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void initialize( void ){
 | 
			
		||||
    Interface::copy_matrix(X_ref,X,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void calculate( void ) {
 | 
			
		||||
      Interface::cholesky(X,C,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void check_result( void ){
 | 
			
		||||
    // calculation check
 | 
			
		||||
//     STL_interface<typename Interface::real_type>::cholesky(X_stl,C_stl,_size);
 | 
			
		||||
//
 | 
			
		||||
//     typename Interface::real_type error=
 | 
			
		||||
//       STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
 | 
			
		||||
//
 | 
			
		||||
//     if (error>1.e-6){
 | 
			
		||||
//       INFOS("WRONG CALCULATION...residual=" << error);
 | 
			
		||||
//       exit(0);
 | 
			
		||||
//     }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
private :
 | 
			
		||||
 | 
			
		||||
  typename Interface::stl_matrix X_stl;
 | 
			
		||||
  typename Interface::stl_matrix C_stl;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix X_ref;
 | 
			
		||||
  typename Interface::gene_matrix X;
 | 
			
		||||
  typename Interface::gene_matrix C;
 | 
			
		||||
 | 
			
		||||
  int _size;
 | 
			
		||||
  double _cost;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										128
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_ger.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										128
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_ger.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,128 @@
 | 
			
		||||
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef ACTION_GER
 | 
			
		||||
#define ACTION_GER
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "STL_interface.hh"
 | 
			
		||||
#include <string>
 | 
			
		||||
#include "init/init_function.hh"
 | 
			
		||||
#include "init/init_vector.hh"
 | 
			
		||||
#include "init/init_matrix.hh"
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
template<class Interface>
 | 
			
		||||
class Action_ger {
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  // Ctor
 | 
			
		||||
  BTL_DONT_INLINE Action_ger( int size ):_size(size)
 | 
			
		||||
  {
 | 
			
		||||
    MESSAGE("Action_ger Ctor");
 | 
			
		||||
 | 
			
		||||
    // STL matrix and vector initialization
 | 
			
		||||
    typename Interface::stl_matrix tmp;
 | 
			
		||||
    init_matrix<pseudo_random>(A_stl,_size);
 | 
			
		||||
    init_vector<pseudo_random>(B_stl,_size);
 | 
			
		||||
    init_vector<pseudo_random>(X_stl,_size);
 | 
			
		||||
    init_vector<null_function>(resu_stl,_size);
 | 
			
		||||
 | 
			
		||||
    // generic matrix and vector initialization
 | 
			
		||||
    Interface::matrix_from_stl(A_ref,A_stl);
 | 
			
		||||
    Interface::matrix_from_stl(A,A_stl);
 | 
			
		||||
    Interface::vector_from_stl(B_ref,B_stl);
 | 
			
		||||
    Interface::vector_from_stl(B,B_stl);
 | 
			
		||||
    Interface::vector_from_stl(X_ref,X_stl);
 | 
			
		||||
    Interface::vector_from_stl(X,X_stl);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // invalidate copy ctor
 | 
			
		||||
  Action_ger( const  Action_ger & )
 | 
			
		||||
  {
 | 
			
		||||
    INFOS("illegal call to Action_ger Copy Ctor");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Dtor
 | 
			
		||||
  BTL_DONT_INLINE ~Action_ger( void ){
 | 
			
		||||
    MESSAGE("Action_ger Dtor");
 | 
			
		||||
    Interface::free_matrix(A,_size);
 | 
			
		||||
    Interface::free_vector(B);
 | 
			
		||||
    Interface::free_vector(X);
 | 
			
		||||
    Interface::free_matrix(A_ref,_size);
 | 
			
		||||
    Interface::free_vector(B_ref);
 | 
			
		||||
    Interface::free_vector(X_ref);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // action name
 | 
			
		||||
  static inline std::string name( void )
 | 
			
		||||
  {
 | 
			
		||||
    return "ger_" + Interface::name();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double nb_op_base( void ){
 | 
			
		||||
    return 2.0*_size*_size;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  BTL_DONT_INLINE  void initialize( void ){
 | 
			
		||||
    Interface::copy_matrix(A_ref,A,_size);
 | 
			
		||||
    Interface::copy_vector(B_ref,B,_size);
 | 
			
		||||
    Interface::copy_vector(X_ref,X,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  BTL_DONT_INLINE void calculate( void ) {
 | 
			
		||||
    BTL_ASM_COMMENT("#begin ger");
 | 
			
		||||
    Interface::ger(A,B,X,_size);
 | 
			
		||||
    BTL_ASM_COMMENT("end ger");
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  BTL_DONT_INLINE void check_result( void ){
 | 
			
		||||
    // calculation check
 | 
			
		||||
    Interface::vector_to_stl(X,resu_stl);
 | 
			
		||||
 | 
			
		||||
    STL_interface<typename Interface::real_type>::ger(A_stl,B_stl,X_stl,_size);
 | 
			
		||||
 | 
			
		||||
    typename Interface::real_type error=
 | 
			
		||||
      STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
 | 
			
		||||
 | 
			
		||||
    if (error>1.e-3){
 | 
			
		||||
      INFOS("WRONG CALCULATION...residual=" << error);
 | 
			
		||||
//       exit(0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
private :
 | 
			
		||||
 | 
			
		||||
  typename Interface::stl_matrix A_stl;
 | 
			
		||||
  typename Interface::stl_vector B_stl;
 | 
			
		||||
  typename Interface::stl_vector X_stl;
 | 
			
		||||
  typename Interface::stl_vector resu_stl;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix A_ref;
 | 
			
		||||
  typename Interface::gene_vector B_ref;
 | 
			
		||||
  typename Interface::gene_vector X_ref;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix A;
 | 
			
		||||
  typename Interface::gene_vector B;
 | 
			
		||||
  typename Interface::gene_vector X;
 | 
			
		||||
 | 
			
		||||
  int _size;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										233
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_hessenberg.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										233
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_hessenberg.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,233 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  action_hessenberg.hh
 | 
			
		||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef ACTION_HESSENBERG
 | 
			
		||||
#define ACTION_HESSENBERG
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "STL_interface.hh"
 | 
			
		||||
#include <string>
 | 
			
		||||
#include "init/init_function.hh"
 | 
			
		||||
#include "init/init_vector.hh"
 | 
			
		||||
#include "init/init_matrix.hh"
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
template<class Interface>
 | 
			
		||||
class Action_hessenberg {
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  // Ctor
 | 
			
		||||
 | 
			
		||||
  Action_hessenberg( int size ):_size(size)
 | 
			
		||||
  {
 | 
			
		||||
    MESSAGE("Action_hessenberg Ctor");
 | 
			
		||||
 | 
			
		||||
    // STL vector initialization
 | 
			
		||||
    init_matrix<pseudo_random>(X_stl,_size);
 | 
			
		||||
 | 
			
		||||
    init_matrix<null_function>(C_stl,_size);
 | 
			
		||||
    init_matrix<null_function>(resu_stl,_size);
 | 
			
		||||
 | 
			
		||||
    // generic matrix and vector initialization
 | 
			
		||||
    Interface::matrix_from_stl(X_ref,X_stl);
 | 
			
		||||
    Interface::matrix_from_stl(X,X_stl);
 | 
			
		||||
    Interface::matrix_from_stl(C,C_stl);
 | 
			
		||||
 | 
			
		||||
    _cost = 0;
 | 
			
		||||
    for (int j=0; j<_size-2; ++j)
 | 
			
		||||
    {
 | 
			
		||||
      double r = std::max(0,_size-j-1);
 | 
			
		||||
      double b = std::max(0,_size-j-2);
 | 
			
		||||
      _cost += 6 + 3*b + r*r*4 + r*_size*4;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // invalidate copy ctor
 | 
			
		||||
 | 
			
		||||
  Action_hessenberg( const  Action_hessenberg & )
 | 
			
		||||
  {
 | 
			
		||||
    INFOS("illegal call to Action_hessenberg Copy Ctor");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Dtor
 | 
			
		||||
 | 
			
		||||
  ~Action_hessenberg( void ){
 | 
			
		||||
 | 
			
		||||
    MESSAGE("Action_hessenberg Dtor");
 | 
			
		||||
 | 
			
		||||
    // deallocation
 | 
			
		||||
    Interface::free_matrix(X_ref,_size);
 | 
			
		||||
    Interface::free_matrix(X,_size);
 | 
			
		||||
    Interface::free_matrix(C,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // action name
 | 
			
		||||
 | 
			
		||||
  static inline std::string name( void )
 | 
			
		||||
  {
 | 
			
		||||
    return "hessenberg_"+Interface::name();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double nb_op_base( void ){
 | 
			
		||||
    return _cost;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void initialize( void ){
 | 
			
		||||
    Interface::copy_matrix(X_ref,X,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void calculate( void ) {
 | 
			
		||||
      Interface::hessenberg(X,C,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void check_result( void ){
 | 
			
		||||
    // calculation check
 | 
			
		||||
    Interface::matrix_to_stl(C,resu_stl);
 | 
			
		||||
 | 
			
		||||
//     STL_interface<typename Interface::real_type>::hessenberg(X_stl,C_stl,_size);
 | 
			
		||||
//
 | 
			
		||||
//     typename Interface::real_type error=
 | 
			
		||||
//       STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
 | 
			
		||||
//
 | 
			
		||||
//     if (error>1.e-6){
 | 
			
		||||
//       INFOS("WRONG CALCULATION...residual=" << error);
 | 
			
		||||
//       exit(0);
 | 
			
		||||
//     }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
private :
 | 
			
		||||
 | 
			
		||||
  typename Interface::stl_matrix X_stl;
 | 
			
		||||
  typename Interface::stl_matrix C_stl;
 | 
			
		||||
  typename Interface::stl_matrix resu_stl;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix X_ref;
 | 
			
		||||
  typename Interface::gene_matrix X;
 | 
			
		||||
  typename Interface::gene_matrix C;
 | 
			
		||||
 | 
			
		||||
  int _size;
 | 
			
		||||
  double _cost;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
template<class Interface>
 | 
			
		||||
class Action_tridiagonalization {
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  // Ctor
 | 
			
		||||
 | 
			
		||||
  Action_tridiagonalization( int size ):_size(size)
 | 
			
		||||
  {
 | 
			
		||||
    MESSAGE("Action_tridiagonalization Ctor");
 | 
			
		||||
 | 
			
		||||
    // STL vector initialization
 | 
			
		||||
    init_matrix<pseudo_random>(X_stl,_size);
 | 
			
		||||
    
 | 
			
		||||
    for(int i=0; i<_size; ++i)
 | 
			
		||||
    {
 | 
			
		||||
      for(int j=0; j<i; ++j)
 | 
			
		||||
        X_stl[i][j] = X_stl[j][i];
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    init_matrix<null_function>(C_stl,_size);
 | 
			
		||||
    init_matrix<null_function>(resu_stl,_size);
 | 
			
		||||
 | 
			
		||||
    // generic matrix and vector initialization
 | 
			
		||||
    Interface::matrix_from_stl(X_ref,X_stl);
 | 
			
		||||
    Interface::matrix_from_stl(X,X_stl);
 | 
			
		||||
    Interface::matrix_from_stl(C,C_stl);
 | 
			
		||||
 | 
			
		||||
    _cost = 0;
 | 
			
		||||
    for (int j=0; j<_size-2; ++j)
 | 
			
		||||
    {
 | 
			
		||||
      double r = std::max(0,_size-j-1);
 | 
			
		||||
      double b = std::max(0,_size-j-2);
 | 
			
		||||
      _cost += 6. + 3.*b + r*r*8.;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // invalidate copy ctor
 | 
			
		||||
 | 
			
		||||
  Action_tridiagonalization( const  Action_tridiagonalization & )
 | 
			
		||||
  {
 | 
			
		||||
    INFOS("illegal call to Action_tridiagonalization Copy Ctor");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Dtor
 | 
			
		||||
 | 
			
		||||
  ~Action_tridiagonalization( void ){
 | 
			
		||||
 | 
			
		||||
    MESSAGE("Action_tridiagonalization Dtor");
 | 
			
		||||
 | 
			
		||||
    // deallocation
 | 
			
		||||
    Interface::free_matrix(X_ref,_size);
 | 
			
		||||
    Interface::free_matrix(X,_size);
 | 
			
		||||
    Interface::free_matrix(C,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // action name
 | 
			
		||||
 | 
			
		||||
  static inline std::string name( void ) { return "tridiagonalization_"+Interface::name(); }
 | 
			
		||||
 | 
			
		||||
  double nb_op_base( void ){
 | 
			
		||||
    return _cost;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void initialize( void ){
 | 
			
		||||
    Interface::copy_matrix(X_ref,X,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void calculate( void ) {
 | 
			
		||||
      Interface::tridiagonalization(X,C,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void check_result( void ){
 | 
			
		||||
    // calculation check
 | 
			
		||||
    Interface::matrix_to_stl(C,resu_stl);
 | 
			
		||||
 | 
			
		||||
//     STL_interface<typename Interface::real_type>::tridiagonalization(X_stl,C_stl,_size);
 | 
			
		||||
//
 | 
			
		||||
//     typename Interface::real_type error=
 | 
			
		||||
//       STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
 | 
			
		||||
//
 | 
			
		||||
//     if (error>1.e-6){
 | 
			
		||||
//       INFOS("WRONG CALCULATION...residual=" << error);
 | 
			
		||||
//       exit(0);
 | 
			
		||||
//     }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
private :
 | 
			
		||||
 | 
			
		||||
  typename Interface::stl_matrix X_stl;
 | 
			
		||||
  typename Interface::stl_matrix C_stl;
 | 
			
		||||
  typename Interface::stl_matrix resu_stl;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix X_ref;
 | 
			
		||||
  typename Interface::gene_matrix X;
 | 
			
		||||
  typename Interface::gene_matrix C;
 | 
			
		||||
 | 
			
		||||
  int _size;
 | 
			
		||||
  double _cost;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										124
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_lu_decomp.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										124
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_lu_decomp.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,124 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  action_lu_decomp.hh
 | 
			
		||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef ACTION_LU_DECOMP
 | 
			
		||||
#define ACTION_LU_DECOMP
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "STL_interface.hh"
 | 
			
		||||
#include <string>
 | 
			
		||||
#include "init/init_function.hh"
 | 
			
		||||
#include "init/init_vector.hh"
 | 
			
		||||
#include "init/init_matrix.hh"
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
template<class Interface>
 | 
			
		||||
class Action_lu_decomp {
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  // Ctor
 | 
			
		||||
 | 
			
		||||
  Action_lu_decomp( int size ):_size(size)
 | 
			
		||||
  {
 | 
			
		||||
    MESSAGE("Action_lu_decomp Ctor");
 | 
			
		||||
 | 
			
		||||
    // STL vector initialization
 | 
			
		||||
    init_matrix<pseudo_random>(X_stl,_size);
 | 
			
		||||
 | 
			
		||||
    init_matrix<null_function>(C_stl,_size);
 | 
			
		||||
    init_matrix<null_function>(resu_stl,_size);
 | 
			
		||||
 | 
			
		||||
    // generic matrix and vector initialization
 | 
			
		||||
    Interface::matrix_from_stl(X_ref,X_stl);
 | 
			
		||||
    Interface::matrix_from_stl(X,X_stl);
 | 
			
		||||
    Interface::matrix_from_stl(C,C_stl);
 | 
			
		||||
 | 
			
		||||
    _cost = 2.0*size*size*size/3.0 + size*size;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // invalidate copy ctor
 | 
			
		||||
 | 
			
		||||
  Action_lu_decomp( const  Action_lu_decomp & )
 | 
			
		||||
  {
 | 
			
		||||
    INFOS("illegal call to Action_lu_decomp Copy Ctor");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Dtor
 | 
			
		||||
 | 
			
		||||
  ~Action_lu_decomp( void ){
 | 
			
		||||
 | 
			
		||||
    MESSAGE("Action_lu_decomp Dtor");
 | 
			
		||||
 | 
			
		||||
    // deallocation
 | 
			
		||||
    Interface::free_matrix(X_ref,_size);
 | 
			
		||||
    Interface::free_matrix(X,_size);
 | 
			
		||||
    Interface::free_matrix(C,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // action name
 | 
			
		||||
 | 
			
		||||
  static inline std::string name( void )
 | 
			
		||||
  {
 | 
			
		||||
    return "complete_lu_decomp_"+Interface::name();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double nb_op_base( void ){
 | 
			
		||||
    return _cost;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void initialize( void ){
 | 
			
		||||
    Interface::copy_matrix(X_ref,X,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void calculate( void ) {
 | 
			
		||||
      Interface::lu_decomp(X,C,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void check_result( void ){
 | 
			
		||||
    // calculation check
 | 
			
		||||
    Interface::matrix_to_stl(C,resu_stl);
 | 
			
		||||
 | 
			
		||||
//     STL_interface<typename Interface::real_type>::lu_decomp(X_stl,C_stl,_size);
 | 
			
		||||
//
 | 
			
		||||
//     typename Interface::real_type error=
 | 
			
		||||
//       STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
 | 
			
		||||
//
 | 
			
		||||
//     if (error>1.e-6){
 | 
			
		||||
//       INFOS("WRONG CALCULATION...residual=" << error);
 | 
			
		||||
//       exit(0);
 | 
			
		||||
//     }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
private :
 | 
			
		||||
 | 
			
		||||
  typename Interface::stl_matrix X_stl;
 | 
			
		||||
  typename Interface::stl_matrix C_stl;
 | 
			
		||||
  typename Interface::stl_matrix resu_stl;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix X_ref;
 | 
			
		||||
  typename Interface::gene_matrix X;
 | 
			
		||||
  typename Interface::gene_matrix C;
 | 
			
		||||
 | 
			
		||||
  int _size;
 | 
			
		||||
  double _cost;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										136
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_lu_solve.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										136
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_lu_solve.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,136 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  action_lu_solve.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>        
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
// 
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
// 
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
// 
 | 
			
		||||
#ifndef ACTION_LU_SOLVE
 | 
			
		||||
#define ACTION_LU_SOLVE
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "STL_interface.hh"
 | 
			
		||||
#include <string>
 | 
			
		||||
#include "init/init_function.hh"
 | 
			
		||||
#include "init/init_vector.hh"
 | 
			
		||||
#include "init/init_matrix.hh"
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
template<class Interface>
 | 
			
		||||
class Action_lu_solve 
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  static inline std::string name( void )
 | 
			
		||||
  {
 | 
			
		||||
    return "lu_solve_"+Interface::name();
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  static double nb_op_base(int size){
 | 
			
		||||
    return 2.0*size*size*size/3.0;  // questionable but not really important
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  static double calculate( int nb_calc, int size ) {
 | 
			
		||||
 | 
			
		||||
    // STL matrix and vector initialization
 | 
			
		||||
    
 | 
			
		||||
    typename Interface::stl_matrix A_stl;
 | 
			
		||||
    typename Interface::stl_vector B_stl;
 | 
			
		||||
    typename Interface::stl_vector X_stl;
 | 
			
		||||
 | 
			
		||||
    init_matrix<pseudo_random>(A_stl,size);
 | 
			
		||||
    init_vector<pseudo_random>(B_stl,size);
 | 
			
		||||
    init_vector<null_function>(X_stl,size);
 | 
			
		||||
 | 
			
		||||
    // generic matrix and vector initialization
 | 
			
		||||
 | 
			
		||||
    typename Interface::gene_matrix A;
 | 
			
		||||
    typename Interface::gene_vector B;
 | 
			
		||||
    typename Interface::gene_vector X;
 | 
			
		||||
 | 
			
		||||
    typename Interface::gene_matrix LU; 
 | 
			
		||||
 | 
			
		||||
    Interface::matrix_from_stl(A,A_stl);
 | 
			
		||||
    Interface::vector_from_stl(B,B_stl);
 | 
			
		||||
    Interface::vector_from_stl(X,X_stl);
 | 
			
		||||
    Interface::matrix_from_stl(LU,A_stl);
 | 
			
		||||
  
 | 
			
		||||
    // local variable :
 | 
			
		||||
 | 
			
		||||
    typename Interface::Pivot_Vector pivot; // pivot vector
 | 
			
		||||
    Interface::new_Pivot_Vector(pivot,size);
 | 
			
		||||
    
 | 
			
		||||
    // timer utilities
 | 
			
		||||
 | 
			
		||||
    Portable_Timer chronos;
 | 
			
		||||
 | 
			
		||||
    // time measurement
 | 
			
		||||
 | 
			
		||||
    chronos.start();
 | 
			
		||||
    
 | 
			
		||||
    for (int ii=0;ii<nb_calc;ii++){
 | 
			
		||||
 | 
			
		||||
      // LU factorization
 | 
			
		||||
      Interface::copy_matrix(A,LU,size);
 | 
			
		||||
      Interface::LU_factor(LU,pivot,size);
 | 
			
		||||
      
 | 
			
		||||
      // LU solve
 | 
			
		||||
 | 
			
		||||
      Interface::LU_solve(LU,pivot,B,X,size);
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Time stop
 | 
			
		||||
 | 
			
		||||
    chronos.stop();
 | 
			
		||||
 | 
			
		||||
    double time=chronos.user_time();
 | 
			
		||||
  
 | 
			
		||||
    // check result :
 | 
			
		||||
 | 
			
		||||
    typename Interface::stl_vector B_new_stl(size);
 | 
			
		||||
    Interface::vector_to_stl(X,X_stl);
 | 
			
		||||
 | 
			
		||||
    STL_interface<typename Interface::real_type>::matrix_vector_product(A_stl,X_stl,B_new_stl,size); 
 | 
			
		||||
  
 | 
			
		||||
    typename Interface::real_type error=
 | 
			
		||||
      STL_interface<typename Interface::real_type>::norm_diff(B_stl,B_new_stl);
 | 
			
		||||
    
 | 
			
		||||
    if (error>1.e-5){
 | 
			
		||||
      INFOS("WRONG CALCULATION...residual=" << error);
 | 
			
		||||
      STL_interface<typename Interface::real_type>::display_vector(B_stl);
 | 
			
		||||
      STL_interface<typename Interface::real_type>::display_vector(B_new_stl);
 | 
			
		||||
      exit(0);
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    // deallocation and return time
 | 
			
		||||
    
 | 
			
		||||
    Interface::free_matrix(A,size);
 | 
			
		||||
    Interface::free_vector(B);
 | 
			
		||||
    Interface::free_vector(X);
 | 
			
		||||
    Interface::free_Pivot_Vector(pivot);
 | 
			
		||||
 | 
			
		||||
    return time;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,150 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  action_matrix_matrix_product.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef ACTION_MATRIX_MATRIX_PRODUCT
 | 
			
		||||
#define ACTION_MATRIX_MATRIX_PRODUCT
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "STL_interface.hh"
 | 
			
		||||
#include <string>
 | 
			
		||||
#include "init/init_function.hh"
 | 
			
		||||
#include "init/init_vector.hh"
 | 
			
		||||
#include "init/init_matrix.hh"
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
template<class Interface>
 | 
			
		||||
class Action_matrix_matrix_product {
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  // Ctor
 | 
			
		||||
 | 
			
		||||
  Action_matrix_matrix_product( int size ):_size(size)
 | 
			
		||||
  {
 | 
			
		||||
    MESSAGE("Action_matrix_matrix_product Ctor");
 | 
			
		||||
 | 
			
		||||
    // STL matrix and vector initialization
 | 
			
		||||
 | 
			
		||||
    init_matrix<pseudo_random>(A_stl,_size);
 | 
			
		||||
    init_matrix<pseudo_random>(B_stl,_size);
 | 
			
		||||
    init_matrix<null_function>(X_stl,_size);
 | 
			
		||||
    init_matrix<null_function>(resu_stl,_size);
 | 
			
		||||
 | 
			
		||||
    // generic matrix and vector initialization
 | 
			
		||||
 | 
			
		||||
    Interface::matrix_from_stl(A_ref,A_stl);
 | 
			
		||||
    Interface::matrix_from_stl(B_ref,B_stl);
 | 
			
		||||
    Interface::matrix_from_stl(X_ref,X_stl);
 | 
			
		||||
 | 
			
		||||
    Interface::matrix_from_stl(A,A_stl);
 | 
			
		||||
    Interface::matrix_from_stl(B,B_stl);
 | 
			
		||||
    Interface::matrix_from_stl(X,X_stl);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // invalidate copy ctor
 | 
			
		||||
 | 
			
		||||
  Action_matrix_matrix_product( const  Action_matrix_matrix_product & )
 | 
			
		||||
  {
 | 
			
		||||
    INFOS("illegal call to Action_matrix_matrix_product Copy Ctor");
 | 
			
		||||
    exit(0);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Dtor
 | 
			
		||||
 | 
			
		||||
  ~Action_matrix_matrix_product( void ){
 | 
			
		||||
 | 
			
		||||
    MESSAGE("Action_matrix_matrix_product Dtor");
 | 
			
		||||
 | 
			
		||||
    // deallocation
 | 
			
		||||
 | 
			
		||||
    Interface::free_matrix(A,_size);
 | 
			
		||||
    Interface::free_matrix(B,_size);
 | 
			
		||||
    Interface::free_matrix(X,_size);
 | 
			
		||||
 | 
			
		||||
    Interface::free_matrix(A_ref,_size);
 | 
			
		||||
    Interface::free_matrix(B_ref,_size);
 | 
			
		||||
    Interface::free_matrix(X_ref,_size);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // action name
 | 
			
		||||
 | 
			
		||||
  static inline std::string name( void )
 | 
			
		||||
  {
 | 
			
		||||
    return "matrix_matrix_"+Interface::name();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double nb_op_base( void ){
 | 
			
		||||
    return 2.0*_size*_size*_size;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void initialize( void ){
 | 
			
		||||
 | 
			
		||||
    Interface::copy_matrix(A_ref,A,_size);
 | 
			
		||||
    Interface::copy_matrix(B_ref,B,_size);
 | 
			
		||||
    Interface::copy_matrix(X_ref,X,_size);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void calculate( void ) {
 | 
			
		||||
      Interface::matrix_matrix_product(A,B,X,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void check_result( void ){
 | 
			
		||||
 | 
			
		||||
    // calculation check
 | 
			
		||||
    if (_size<200)
 | 
			
		||||
    {
 | 
			
		||||
      Interface::matrix_to_stl(X,resu_stl);
 | 
			
		||||
      STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl,B_stl,X_stl,_size);
 | 
			
		||||
      typename Interface::real_type error=
 | 
			
		||||
        STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
 | 
			
		||||
      if (error>1.e-6){
 | 
			
		||||
        INFOS("WRONG CALCULATION...residual=" << error);
 | 
			
		||||
        exit(1);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
private :
 | 
			
		||||
 | 
			
		||||
  typename Interface::stl_matrix A_stl;
 | 
			
		||||
  typename Interface::stl_matrix B_stl;
 | 
			
		||||
  typename Interface::stl_matrix X_stl;
 | 
			
		||||
  typename Interface::stl_matrix resu_stl;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix A_ref;
 | 
			
		||||
  typename Interface::gene_matrix B_ref;
 | 
			
		||||
  typename Interface::gene_matrix X_ref;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix A;
 | 
			
		||||
  typename Interface::gene_matrix B;
 | 
			
		||||
  typename Interface::gene_matrix X;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  int _size;
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,152 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  action_matrix_matrix_product_bis.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef ACTION_MATRIX_MATRIX_PRODUCT_BIS
 | 
			
		||||
#define ACTION_MATRIX_MATRIX_PRODUCT_BIS
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "STL_interface.hh"
 | 
			
		||||
#include "STL_timer.hh"
 | 
			
		||||
#include <string>
 | 
			
		||||
#include "init_function.hh"
 | 
			
		||||
#include "init_vector.hh"
 | 
			
		||||
#include "init_matrix.hh"
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
template<class Interface>
 | 
			
		||||
class Action_matrix_matrix_product_bis {
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  static inline std::string name( void )
 | 
			
		||||
  {
 | 
			
		||||
    return "matrix_matrix_"+Interface::name();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static double nb_op_base(int size){
 | 
			
		||||
    return 2.0*size*size*size;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static double calculate( int nb_calc, int size ) {
 | 
			
		||||
 | 
			
		||||
    // STL matrix and vector initialization
 | 
			
		||||
 | 
			
		||||
    typename Interface::stl_matrix A_stl;
 | 
			
		||||
    typename Interface::stl_matrix B_stl;
 | 
			
		||||
    typename Interface::stl_matrix X_stl;
 | 
			
		||||
 | 
			
		||||
    init_matrix<pseudo_random>(A_stl,size);
 | 
			
		||||
    init_matrix<pseudo_random>(B_stl,size);
 | 
			
		||||
    init_matrix<null_function>(X_stl,size);
 | 
			
		||||
 | 
			
		||||
    // generic matrix and vector initialization
 | 
			
		||||
 | 
			
		||||
    typename Interface::gene_matrix A_ref;
 | 
			
		||||
    typename Interface::gene_matrix B_ref;
 | 
			
		||||
    typename Interface::gene_matrix X_ref;
 | 
			
		||||
 | 
			
		||||
    typename Interface::gene_matrix A;
 | 
			
		||||
    typename Interface::gene_matrix B;
 | 
			
		||||
    typename Interface::gene_matrix X;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    Interface::matrix_from_stl(A_ref,A_stl);
 | 
			
		||||
    Interface::matrix_from_stl(B_ref,B_stl);
 | 
			
		||||
    Interface::matrix_from_stl(X_ref,X_stl);
 | 
			
		||||
 | 
			
		||||
    Interface::matrix_from_stl(A,A_stl);
 | 
			
		||||
    Interface::matrix_from_stl(B,B_stl);
 | 
			
		||||
    Interface::matrix_from_stl(X,X_stl);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    // STL_timer utilities
 | 
			
		||||
 | 
			
		||||
    STL_timer chronos;
 | 
			
		||||
 | 
			
		||||
    // Baseline evaluation
 | 
			
		||||
 | 
			
		||||
    chronos.start_baseline(nb_calc);
 | 
			
		||||
 | 
			
		||||
    do {
 | 
			
		||||
 | 
			
		||||
      Interface::copy_matrix(A_ref,A,size);
 | 
			
		||||
      Interface::copy_matrix(B_ref,B,size);
 | 
			
		||||
      Interface::copy_matrix(X_ref,X,size);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      //      Interface::matrix_matrix_product(A,B,X,size); This line must be commented !!!!
 | 
			
		||||
    }
 | 
			
		||||
    while(chronos.check());
 | 
			
		||||
 | 
			
		||||
    chronos.report(true);
 | 
			
		||||
 | 
			
		||||
    // Time measurement
 | 
			
		||||
 | 
			
		||||
    chronos.start(nb_calc);
 | 
			
		||||
 | 
			
		||||
    do {
 | 
			
		||||
 | 
			
		||||
      Interface::copy_matrix(A_ref,A,size);
 | 
			
		||||
      Interface::copy_matrix(B_ref,B,size);
 | 
			
		||||
      Interface::copy_matrix(X_ref,X,size);
 | 
			
		||||
 | 
			
		||||
      Interface::matrix_matrix_product(A,B,X,size); // here it is not commented !!!!
 | 
			
		||||
    }
 | 
			
		||||
    while(chronos.check());
 | 
			
		||||
 | 
			
		||||
    chronos.report(true);
 | 
			
		||||
 | 
			
		||||
    double time=chronos.calculated_time/2000.0;
 | 
			
		||||
 | 
			
		||||
    // calculation check
 | 
			
		||||
 | 
			
		||||
    typename Interface::stl_matrix resu_stl(size);
 | 
			
		||||
 | 
			
		||||
    Interface::matrix_to_stl(X,resu_stl);
 | 
			
		||||
 | 
			
		||||
    STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl,B_stl,X_stl,size);
 | 
			
		||||
 | 
			
		||||
    typename Interface::real_type error=
 | 
			
		||||
      STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
 | 
			
		||||
 | 
			
		||||
    if (error>1.e-6){
 | 
			
		||||
      INFOS("WRONG CALCULATION...residual=" << error);
 | 
			
		||||
      exit(1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // deallocation and return time
 | 
			
		||||
 | 
			
		||||
    Interface::free_matrix(A,size);
 | 
			
		||||
    Interface::free_matrix(B,size);
 | 
			
		||||
    Interface::free_matrix(X,size);
 | 
			
		||||
 | 
			
		||||
    Interface::free_matrix(A_ref,size);
 | 
			
		||||
    Interface::free_matrix(B_ref,size);
 | 
			
		||||
    Interface::free_matrix(X_ref,size);
 | 
			
		||||
 | 
			
		||||
    return time;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,153 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  action_matrix_vector_product.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef ACTION_MATRIX_VECTOR_PRODUCT
 | 
			
		||||
#define ACTION_MATRIX_VECTOR_PRODUCT
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "STL_interface.hh"
 | 
			
		||||
#include <string>
 | 
			
		||||
#include "init/init_function.hh"
 | 
			
		||||
#include "init/init_vector.hh"
 | 
			
		||||
#include "init/init_matrix.hh"
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
template<class Interface>
 | 
			
		||||
class Action_matrix_vector_product {
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  // Ctor
 | 
			
		||||
 | 
			
		||||
  BTL_DONT_INLINE Action_matrix_vector_product( int size ):_size(size)
 | 
			
		||||
  {
 | 
			
		||||
    MESSAGE("Action_matrix_vector_product Ctor");
 | 
			
		||||
 | 
			
		||||
    // STL matrix and vector initialization
 | 
			
		||||
 | 
			
		||||
    init_matrix<pseudo_random>(A_stl,_size);
 | 
			
		||||
    init_vector<pseudo_random>(B_stl,_size);
 | 
			
		||||
    init_vector<null_function>(X_stl,_size);
 | 
			
		||||
    init_vector<null_function>(resu_stl,_size);
 | 
			
		||||
 | 
			
		||||
    // generic matrix and vector initialization
 | 
			
		||||
 | 
			
		||||
    Interface::matrix_from_stl(A_ref,A_stl);
 | 
			
		||||
    Interface::matrix_from_stl(A,A_stl);
 | 
			
		||||
    Interface::vector_from_stl(B_ref,B_stl);
 | 
			
		||||
    Interface::vector_from_stl(B,B_stl);
 | 
			
		||||
    Interface::vector_from_stl(X_ref,X_stl);
 | 
			
		||||
    Interface::vector_from_stl(X,X_stl);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // invalidate copy ctor
 | 
			
		||||
 | 
			
		||||
  Action_matrix_vector_product( const  Action_matrix_vector_product & )
 | 
			
		||||
  {
 | 
			
		||||
    INFOS("illegal call to Action_matrix_vector_product Copy Ctor");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Dtor
 | 
			
		||||
 | 
			
		||||
  BTL_DONT_INLINE ~Action_matrix_vector_product( void ){
 | 
			
		||||
 | 
			
		||||
    MESSAGE("Action_matrix_vector_product Dtor");
 | 
			
		||||
 | 
			
		||||
    // deallocation
 | 
			
		||||
 | 
			
		||||
    Interface::free_matrix(A,_size);
 | 
			
		||||
    Interface::free_vector(B);
 | 
			
		||||
    Interface::free_vector(X);
 | 
			
		||||
 | 
			
		||||
    Interface::free_matrix(A_ref,_size);
 | 
			
		||||
    Interface::free_vector(B_ref);
 | 
			
		||||
    Interface::free_vector(X_ref);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // action name
 | 
			
		||||
 | 
			
		||||
  static inline std::string name( void )
 | 
			
		||||
  {
 | 
			
		||||
    return "matrix_vector_" + Interface::name();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double nb_op_base( void ){
 | 
			
		||||
    return 2.0*_size*_size;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  BTL_DONT_INLINE  void initialize( void ){
 | 
			
		||||
 | 
			
		||||
    Interface::copy_matrix(A_ref,A,_size);
 | 
			
		||||
    Interface::copy_vector(B_ref,B,_size);
 | 
			
		||||
    Interface::copy_vector(X_ref,X,_size);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  BTL_DONT_INLINE void calculate( void ) {
 | 
			
		||||
      BTL_ASM_COMMENT("#begin matrix_vector_product");
 | 
			
		||||
      Interface::matrix_vector_product(A,B,X,_size);
 | 
			
		||||
      BTL_ASM_COMMENT("end matrix_vector_product");
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  BTL_DONT_INLINE void check_result( void ){
 | 
			
		||||
 | 
			
		||||
    // calculation check
 | 
			
		||||
 | 
			
		||||
    Interface::vector_to_stl(X,resu_stl);
 | 
			
		||||
 | 
			
		||||
    STL_interface<typename Interface::real_type>::matrix_vector_product(A_stl,B_stl,X_stl,_size);
 | 
			
		||||
 | 
			
		||||
    typename Interface::real_type error=
 | 
			
		||||
      STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
 | 
			
		||||
 | 
			
		||||
    if (error>1.e-5){
 | 
			
		||||
      INFOS("WRONG CALCULATION...residual=" << error);
 | 
			
		||||
      exit(0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
private :
 | 
			
		||||
 | 
			
		||||
  typename Interface::stl_matrix A_stl;
 | 
			
		||||
  typename Interface::stl_vector B_stl;
 | 
			
		||||
  typename Interface::stl_vector X_stl;
 | 
			
		||||
  typename Interface::stl_vector resu_stl;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix A_ref;
 | 
			
		||||
  typename Interface::gene_vector B_ref;
 | 
			
		||||
  typename Interface::gene_vector X_ref;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix A;
 | 
			
		||||
  typename Interface::gene_vector B;
 | 
			
		||||
  typename Interface::gene_vector X;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  int _size;
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										125
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_partial_lu.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										125
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_partial_lu.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,125 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  action_lu_decomp.hh
 | 
			
		||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef ACTION_PARTIAL_LU
 | 
			
		||||
#define ACTION_PARTIAL_LU
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "STL_interface.hh"
 | 
			
		||||
#include <string>
 | 
			
		||||
#include "init/init_function.hh"
 | 
			
		||||
#include "init/init_vector.hh"
 | 
			
		||||
#include "init/init_matrix.hh"
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
template<class Interface>
 | 
			
		||||
class Action_partial_lu {
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  // Ctor
 | 
			
		||||
 | 
			
		||||
  Action_partial_lu( int size ):_size(size)
 | 
			
		||||
  {
 | 
			
		||||
    MESSAGE("Action_partial_lu Ctor");
 | 
			
		||||
 | 
			
		||||
    // STL vector initialization
 | 
			
		||||
    init_matrix<pseudo_random>(X_stl,_size);
 | 
			
		||||
    init_matrix<null_function>(C_stl,_size);
 | 
			
		||||
 | 
			
		||||
    // make sure X is invertible
 | 
			
		||||
    for (int i=0; i<_size; ++i)
 | 
			
		||||
      X_stl[i][i] = X_stl[i][i] * 1e2 + 1;
 | 
			
		||||
 | 
			
		||||
    // generic matrix and vector initialization
 | 
			
		||||
    Interface::matrix_from_stl(X_ref,X_stl);
 | 
			
		||||
    Interface::matrix_from_stl(X,X_stl);
 | 
			
		||||
    Interface::matrix_from_stl(C,C_stl);
 | 
			
		||||
 | 
			
		||||
    _cost = 2.0*size*size*size/3.0 + size*size;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // invalidate copy ctor
 | 
			
		||||
 | 
			
		||||
  Action_partial_lu( const  Action_partial_lu & )
 | 
			
		||||
  {
 | 
			
		||||
    INFOS("illegal call to Action_partial_lu Copy Ctor");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Dtor
 | 
			
		||||
 | 
			
		||||
  ~Action_partial_lu( void ){
 | 
			
		||||
 | 
			
		||||
    MESSAGE("Action_partial_lu Dtor");
 | 
			
		||||
 | 
			
		||||
    // deallocation
 | 
			
		||||
    Interface::free_matrix(X_ref,_size);
 | 
			
		||||
    Interface::free_matrix(X,_size);
 | 
			
		||||
    Interface::free_matrix(C,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // action name
 | 
			
		||||
 | 
			
		||||
  static inline std::string name( void )
 | 
			
		||||
  {
 | 
			
		||||
    return "partial_lu_decomp_"+Interface::name();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double nb_op_base( void ){
 | 
			
		||||
    return _cost;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void initialize( void ){
 | 
			
		||||
    Interface::copy_matrix(X_ref,X,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void calculate( void ) {
 | 
			
		||||
      Interface::partial_lu_decomp(X,C,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void check_result( void ){
 | 
			
		||||
    // calculation check
 | 
			
		||||
//     Interface::matrix_to_stl(C,resu_stl);
 | 
			
		||||
 | 
			
		||||
//     STL_interface<typename Interface::real_type>::lu_decomp(X_stl,C_stl,_size);
 | 
			
		||||
//
 | 
			
		||||
//     typename Interface::real_type error=
 | 
			
		||||
//       STL_interface<typename Interface::real_type>::norm_diff(C_stl,resu_stl);
 | 
			
		||||
//
 | 
			
		||||
//     if (error>1.e-6){
 | 
			
		||||
//       INFOS("WRONG CALCULATION...residual=" << error);
 | 
			
		||||
//       exit(0);
 | 
			
		||||
//     }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
private :
 | 
			
		||||
 | 
			
		||||
  typename Interface::stl_matrix X_stl;
 | 
			
		||||
  typename Interface::stl_matrix C_stl;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix X_ref;
 | 
			
		||||
  typename Interface::gene_matrix X;
 | 
			
		||||
  typename Interface::gene_matrix C;
 | 
			
		||||
 | 
			
		||||
  int _size;
 | 
			
		||||
  double _cost;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										116
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_rot.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										116
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_rot.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,116 @@
 | 
			
		||||
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef ACTION_ROT
 | 
			
		||||
#define ACTION_ROT
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "STL_interface.hh"
 | 
			
		||||
#include <string>
 | 
			
		||||
#include "init/init_function.hh"
 | 
			
		||||
#include "init/init_vector.hh"
 | 
			
		||||
#include "init/init_matrix.hh"
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
template<class Interface>
 | 
			
		||||
class Action_rot {
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  // Ctor
 | 
			
		||||
  BTL_DONT_INLINE Action_rot( int size ):_size(size)
 | 
			
		||||
  {
 | 
			
		||||
    MESSAGE("Action_rot Ctor");
 | 
			
		||||
 | 
			
		||||
    // STL matrix and vector initialization
 | 
			
		||||
    typename Interface::stl_matrix tmp;
 | 
			
		||||
    init_vector<pseudo_random>(A_stl,_size);
 | 
			
		||||
    init_vector<pseudo_random>(B_stl,_size);
 | 
			
		||||
 | 
			
		||||
    // generic matrix and vector initialization
 | 
			
		||||
    Interface::vector_from_stl(A_ref,A_stl);
 | 
			
		||||
    Interface::vector_from_stl(A,A_stl);
 | 
			
		||||
    Interface::vector_from_stl(B_ref,B_stl);
 | 
			
		||||
    Interface::vector_from_stl(B,B_stl);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // invalidate copy ctor
 | 
			
		||||
  Action_rot( const  Action_rot & )
 | 
			
		||||
  {
 | 
			
		||||
    INFOS("illegal call to Action_rot Copy Ctor");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Dtor
 | 
			
		||||
  BTL_DONT_INLINE ~Action_rot( void ){
 | 
			
		||||
    MESSAGE("Action_rot Dtor");
 | 
			
		||||
    Interface::free_vector(A);
 | 
			
		||||
    Interface::free_vector(B);
 | 
			
		||||
    Interface::free_vector(A_ref);
 | 
			
		||||
    Interface::free_vector(B_ref);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // action name
 | 
			
		||||
  static inline std::string name( void )
 | 
			
		||||
  {
 | 
			
		||||
    return "rot_" + Interface::name();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double nb_op_base( void ){
 | 
			
		||||
    return 6.0*_size;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  BTL_DONT_INLINE  void initialize( void ){
 | 
			
		||||
    Interface::copy_vector(A_ref,A,_size);
 | 
			
		||||
    Interface::copy_vector(B_ref,B,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  BTL_DONT_INLINE void calculate( void ) {
 | 
			
		||||
    BTL_ASM_COMMENT("#begin rot");
 | 
			
		||||
    Interface::rot(A,B,0.5,0.6,_size);
 | 
			
		||||
    BTL_ASM_COMMENT("end rot");
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  BTL_DONT_INLINE void check_result( void ){
 | 
			
		||||
    // calculation check
 | 
			
		||||
//     Interface::vector_to_stl(X,resu_stl);
 | 
			
		||||
 | 
			
		||||
//     STL_interface<typename Interface::real_type>::rot(A_stl,B_stl,X_stl,_size);
 | 
			
		||||
 | 
			
		||||
//     typename Interface::real_type error=
 | 
			
		||||
//       STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
 | 
			
		||||
 | 
			
		||||
//     if (error>1.e-3){
 | 
			
		||||
//       INFOS("WRONG CALCULATION...residual=" << error);
 | 
			
		||||
//       exit(0);
 | 
			
		||||
//     }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
private :
 | 
			
		||||
 | 
			
		||||
  typename Interface::stl_vector A_stl;
 | 
			
		||||
  typename Interface::stl_vector B_stl;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_vector A_ref;
 | 
			
		||||
  typename Interface::gene_vector B_ref;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_vector A;
 | 
			
		||||
  typename Interface::gene_vector B;
 | 
			
		||||
 | 
			
		||||
  int _size;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										139
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_symv.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										139
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_symv.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,139 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  action_symv.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef ACTION_SYMV
 | 
			
		||||
#define ACTION_SYMV
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "STL_interface.hh"
 | 
			
		||||
#include <string>
 | 
			
		||||
#include "init/init_function.hh"
 | 
			
		||||
#include "init/init_vector.hh"
 | 
			
		||||
#include "init/init_matrix.hh"
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
template<class Interface>
 | 
			
		||||
class Action_symv {
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  // Ctor
 | 
			
		||||
 | 
			
		||||
  BTL_DONT_INLINE Action_symv( int size ):_size(size)
 | 
			
		||||
  {
 | 
			
		||||
    MESSAGE("Action_symv Ctor");
 | 
			
		||||
 | 
			
		||||
    // STL matrix and vector initialization
 | 
			
		||||
    init_matrix_symm<pseudo_random>(A_stl,_size);
 | 
			
		||||
    init_vector<pseudo_random>(B_stl,_size);
 | 
			
		||||
    init_vector<null_function>(X_stl,_size);
 | 
			
		||||
    init_vector<null_function>(resu_stl,_size);
 | 
			
		||||
 | 
			
		||||
    // generic matrix and vector initialization
 | 
			
		||||
    Interface::matrix_from_stl(A_ref,A_stl);
 | 
			
		||||
    Interface::matrix_from_stl(A,A_stl);
 | 
			
		||||
    Interface::vector_from_stl(B_ref,B_stl);
 | 
			
		||||
    Interface::vector_from_stl(B,B_stl);
 | 
			
		||||
    Interface::vector_from_stl(X_ref,X_stl);
 | 
			
		||||
    Interface::vector_from_stl(X,X_stl);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // invalidate copy ctor
 | 
			
		||||
 | 
			
		||||
  Action_symv( const  Action_symv & )
 | 
			
		||||
  {
 | 
			
		||||
    INFOS("illegal call to Action_symv Copy Ctor");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Dtor
 | 
			
		||||
  BTL_DONT_INLINE ~Action_symv( void ){
 | 
			
		||||
    Interface::free_matrix(A,_size);
 | 
			
		||||
    Interface::free_vector(B);
 | 
			
		||||
    Interface::free_vector(X);
 | 
			
		||||
    Interface::free_matrix(A_ref,_size);
 | 
			
		||||
    Interface::free_vector(B_ref);
 | 
			
		||||
    Interface::free_vector(X_ref);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // action name
 | 
			
		||||
 | 
			
		||||
  static inline std::string name( void )
 | 
			
		||||
  {
 | 
			
		||||
    return "symv_" + Interface::name();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double nb_op_base( void ){
 | 
			
		||||
    return 2.0*_size*_size;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  BTL_DONT_INLINE  void initialize( void ){
 | 
			
		||||
 | 
			
		||||
    Interface::copy_matrix(A_ref,A,_size);
 | 
			
		||||
    Interface::copy_vector(B_ref,B,_size);
 | 
			
		||||
    Interface::copy_vector(X_ref,X,_size);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  BTL_DONT_INLINE void calculate( void ) {
 | 
			
		||||
      BTL_ASM_COMMENT("#begin symv");
 | 
			
		||||
      Interface::symv(A,B,X,_size);
 | 
			
		||||
      BTL_ASM_COMMENT("end symv");
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  BTL_DONT_INLINE void check_result( void ){
 | 
			
		||||
    if (_size>128) return;
 | 
			
		||||
    // calculation check
 | 
			
		||||
    Interface::vector_to_stl(X,resu_stl);
 | 
			
		||||
 | 
			
		||||
    STL_interface<typename Interface::real_type>::symv(A_stl,B_stl,X_stl,_size);
 | 
			
		||||
 | 
			
		||||
    typename Interface::real_type error=
 | 
			
		||||
      STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
 | 
			
		||||
 | 
			
		||||
    if (error>1.e-5){
 | 
			
		||||
      INFOS("WRONG CALCULATION...residual=" << error);
 | 
			
		||||
      exit(0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
private :
 | 
			
		||||
 | 
			
		||||
  typename Interface::stl_matrix A_stl;
 | 
			
		||||
  typename Interface::stl_vector B_stl;
 | 
			
		||||
  typename Interface::stl_vector X_stl;
 | 
			
		||||
  typename Interface::stl_vector resu_stl;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix A_ref;
 | 
			
		||||
  typename Interface::gene_vector B_ref;
 | 
			
		||||
  typename Interface::gene_vector X_ref;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix A;
 | 
			
		||||
  typename Interface::gene_vector B;
 | 
			
		||||
  typename Interface::gene_vector X;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  int _size;
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										133
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_syr2.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										133
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_syr2.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,133 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  action_syr2.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef ACTION_SYR2
 | 
			
		||||
#define ACTION_SYR2
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "STL_interface.hh"
 | 
			
		||||
#include <string>
 | 
			
		||||
#include "init/init_function.hh"
 | 
			
		||||
#include "init/init_vector.hh"
 | 
			
		||||
#include "init/init_matrix.hh"
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
template<class Interface>
 | 
			
		||||
class Action_syr2 {
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  // Ctor
 | 
			
		||||
 | 
			
		||||
  BTL_DONT_INLINE Action_syr2( int size ):_size(size)
 | 
			
		||||
  {
 | 
			
		||||
    // STL matrix and vector initialization
 | 
			
		||||
    typename Interface::stl_matrix tmp;
 | 
			
		||||
    init_matrix<pseudo_random>(A_stl,_size);
 | 
			
		||||
    init_vector<pseudo_random>(B_stl,_size);
 | 
			
		||||
    init_vector<pseudo_random>(X_stl,_size);
 | 
			
		||||
    init_vector<null_function>(resu_stl,_size);
 | 
			
		||||
 | 
			
		||||
    // generic matrix and vector initialization
 | 
			
		||||
    Interface::matrix_from_stl(A_ref,A_stl);
 | 
			
		||||
    Interface::matrix_from_stl(A,A_stl);
 | 
			
		||||
    Interface::vector_from_stl(B_ref,B_stl);
 | 
			
		||||
    Interface::vector_from_stl(B,B_stl);
 | 
			
		||||
    Interface::vector_from_stl(X_ref,X_stl);
 | 
			
		||||
    Interface::vector_from_stl(X,X_stl);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // invalidate copy ctor
 | 
			
		||||
  Action_syr2( const  Action_syr2 & )
 | 
			
		||||
  {
 | 
			
		||||
    INFOS("illegal call to Action_syr2 Copy Ctor");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Dtor
 | 
			
		||||
  BTL_DONT_INLINE ~Action_syr2( void ){
 | 
			
		||||
    Interface::free_matrix(A,_size);
 | 
			
		||||
    Interface::free_vector(B);
 | 
			
		||||
    Interface::free_vector(X);
 | 
			
		||||
    Interface::free_matrix(A_ref,_size);
 | 
			
		||||
    Interface::free_vector(B_ref);
 | 
			
		||||
    Interface::free_vector(X_ref);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // action name
 | 
			
		||||
 | 
			
		||||
  static inline std::string name( void )
 | 
			
		||||
  {
 | 
			
		||||
    return "syr2_" + Interface::name();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double nb_op_base( void ){
 | 
			
		||||
    return 2.0*_size*_size;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  BTL_DONT_INLINE  void initialize( void ){
 | 
			
		||||
    Interface::copy_matrix(A_ref,A,_size);
 | 
			
		||||
    Interface::copy_vector(B_ref,B,_size);
 | 
			
		||||
    Interface::copy_vector(X_ref,X,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  BTL_DONT_INLINE void calculate( void ) {
 | 
			
		||||
      BTL_ASM_COMMENT("#begin syr2");
 | 
			
		||||
      Interface::syr2(A,B,X,_size);
 | 
			
		||||
      BTL_ASM_COMMENT("end syr2");
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  BTL_DONT_INLINE void check_result( void ){
 | 
			
		||||
    // calculation check
 | 
			
		||||
    Interface::vector_to_stl(X,resu_stl);
 | 
			
		||||
 | 
			
		||||
    STL_interface<typename Interface::real_type>::syr2(A_stl,B_stl,X_stl,_size);
 | 
			
		||||
 | 
			
		||||
    typename Interface::real_type error=
 | 
			
		||||
      STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
 | 
			
		||||
 | 
			
		||||
    if (error>1.e-3){
 | 
			
		||||
      INFOS("WRONG CALCULATION...residual=" << error);
 | 
			
		||||
//       exit(0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
private :
 | 
			
		||||
 | 
			
		||||
  typename Interface::stl_matrix A_stl;
 | 
			
		||||
  typename Interface::stl_vector B_stl;
 | 
			
		||||
  typename Interface::stl_vector X_stl;
 | 
			
		||||
  typename Interface::stl_vector resu_stl;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix A_ref;
 | 
			
		||||
  typename Interface::gene_vector B_ref;
 | 
			
		||||
  typename Interface::gene_vector X_ref;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix A;
 | 
			
		||||
  typename Interface::gene_vector B;
 | 
			
		||||
  typename Interface::gene_vector X;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  int _size;
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										137
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_trisolve.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										137
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_trisolve.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,137 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  action_trisolve.hh
 | 
			
		||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef ACTION_TRISOLVE
 | 
			
		||||
#define ACTION_TRISOLVE
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "STL_interface.hh"
 | 
			
		||||
#include <string>
 | 
			
		||||
#include "init/init_function.hh"
 | 
			
		||||
#include "init/init_vector.hh"
 | 
			
		||||
#include "init/init_matrix.hh"
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
template<class Interface>
 | 
			
		||||
class Action_trisolve {
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  // Ctor
 | 
			
		||||
 | 
			
		||||
  Action_trisolve( int size ):_size(size)
 | 
			
		||||
  {
 | 
			
		||||
    MESSAGE("Action_trisolve Ctor");
 | 
			
		||||
 | 
			
		||||
    // STL vector initialization
 | 
			
		||||
    init_matrix<pseudo_random>(L_stl,_size);
 | 
			
		||||
    init_vector<pseudo_random>(B_stl,_size);
 | 
			
		||||
    init_vector<null_function>(X_stl,_size);
 | 
			
		||||
    for (int j=0; j<_size; ++j)
 | 
			
		||||
    {
 | 
			
		||||
      for (int i=0; i<j; ++i)
 | 
			
		||||
        L_stl[j][i] = 0;
 | 
			
		||||
      L_stl[j][j] += 3;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    init_vector<null_function>(resu_stl,_size);
 | 
			
		||||
 | 
			
		||||
    // generic matrix and vector initialization
 | 
			
		||||
    Interface::matrix_from_stl(L,L_stl);
 | 
			
		||||
    Interface::vector_from_stl(X,X_stl);
 | 
			
		||||
    Interface::vector_from_stl(B,B_stl);
 | 
			
		||||
 | 
			
		||||
    _cost = 0;
 | 
			
		||||
    for (int j=0; j<_size; ++j)
 | 
			
		||||
    {
 | 
			
		||||
      _cost += 2*j + 1;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // invalidate copy ctor
 | 
			
		||||
 | 
			
		||||
  Action_trisolve( const  Action_trisolve & )
 | 
			
		||||
  {
 | 
			
		||||
    INFOS("illegal call to Action_trisolve Copy Ctor");
 | 
			
		||||
    exit(1);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Dtor
 | 
			
		||||
 | 
			
		||||
  ~Action_trisolve( void ){
 | 
			
		||||
 | 
			
		||||
    MESSAGE("Action_trisolve Dtor");
 | 
			
		||||
 | 
			
		||||
    // deallocation
 | 
			
		||||
    Interface::free_matrix(L,_size);
 | 
			
		||||
    Interface::free_vector(B);
 | 
			
		||||
    Interface::free_vector(X);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // action name
 | 
			
		||||
 | 
			
		||||
  static inline std::string name( void )
 | 
			
		||||
  {
 | 
			
		||||
    return "trisolve_vector_"+Interface::name();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double nb_op_base( void ){
 | 
			
		||||
    return _cost;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void initialize( void ){
 | 
			
		||||
    //Interface::copy_vector(X_ref,X,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void calculate( void ) {
 | 
			
		||||
      Interface::trisolve_lower(L,B,X,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void check_result(){
 | 
			
		||||
    if (_size>128) return;
 | 
			
		||||
    // calculation check
 | 
			
		||||
    Interface::vector_to_stl(X,resu_stl);
 | 
			
		||||
 | 
			
		||||
    STL_interface<typename Interface::real_type>::trisolve_lower(L_stl,B_stl,X_stl,_size);
 | 
			
		||||
 | 
			
		||||
    typename Interface::real_type error=
 | 
			
		||||
      STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
 | 
			
		||||
 | 
			
		||||
    if (error>1.e-4){
 | 
			
		||||
      INFOS("WRONG CALCULATION...residual=" << error);
 | 
			
		||||
      exit(2);
 | 
			
		||||
    } //else INFOS("CALCULATION OK...residual=" << error);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
private :
 | 
			
		||||
 | 
			
		||||
  typename Interface::stl_matrix L_stl;
 | 
			
		||||
  typename Interface::stl_vector X_stl;
 | 
			
		||||
  typename Interface::stl_vector B_stl;
 | 
			
		||||
  typename Interface::stl_vector resu_stl;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix L;
 | 
			
		||||
  typename Interface::gene_vector X;
 | 
			
		||||
  typename Interface::gene_vector B;
 | 
			
		||||
 | 
			
		||||
  int _size;
 | 
			
		||||
  double _cost;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										165
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_trisolve_matrix.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										165
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_trisolve_matrix.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,165 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  action_matrix_matrix_product.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef ACTION_TRISOLVE_MATRIX_PRODUCT
 | 
			
		||||
#define ACTION_TRISOLVE_MATRIX_PRODUCT
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "STL_interface.hh"
 | 
			
		||||
#include <string>
 | 
			
		||||
#include "init/init_function.hh"
 | 
			
		||||
#include "init/init_vector.hh"
 | 
			
		||||
#include "init/init_matrix.hh"
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
template<class Interface>
 | 
			
		||||
class Action_trisolve_matrix {
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  // Ctor
 | 
			
		||||
 | 
			
		||||
  Action_trisolve_matrix( int size ):_size(size)
 | 
			
		||||
  {
 | 
			
		||||
    MESSAGE("Action_trisolve_matrix Ctor");
 | 
			
		||||
 | 
			
		||||
    // STL matrix and vector initialization
 | 
			
		||||
 | 
			
		||||
    init_matrix<pseudo_random>(A_stl,_size);
 | 
			
		||||
    init_matrix<pseudo_random>(B_stl,_size);
 | 
			
		||||
    init_matrix<null_function>(X_stl,_size);
 | 
			
		||||
    init_matrix<null_function>(resu_stl,_size);
 | 
			
		||||
 | 
			
		||||
    for (int j=0; j<_size; ++j)
 | 
			
		||||
    {
 | 
			
		||||
      for (int i=0; i<j; ++i)
 | 
			
		||||
        A_stl[j][i] = 0;
 | 
			
		||||
      A_stl[j][j] += 3;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // generic matrix and vector initialization
 | 
			
		||||
 | 
			
		||||
    Interface::matrix_from_stl(A_ref,A_stl);
 | 
			
		||||
    Interface::matrix_from_stl(B_ref,B_stl);
 | 
			
		||||
    Interface::matrix_from_stl(X_ref,X_stl);
 | 
			
		||||
 | 
			
		||||
    Interface::matrix_from_stl(A,A_stl);
 | 
			
		||||
    Interface::matrix_from_stl(B,B_stl);
 | 
			
		||||
    Interface::matrix_from_stl(X,X_stl);
 | 
			
		||||
 | 
			
		||||
    _cost = 0;
 | 
			
		||||
    for (int j=0; j<_size; ++j)
 | 
			
		||||
    {
 | 
			
		||||
      _cost += 2*j + 1;
 | 
			
		||||
    }
 | 
			
		||||
    _cost *= _size;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // invalidate copy ctor
 | 
			
		||||
 | 
			
		||||
  Action_trisolve_matrix( const  Action_trisolve_matrix & )
 | 
			
		||||
  {
 | 
			
		||||
    INFOS("illegal call to Action_trisolve_matrix Copy Ctor");
 | 
			
		||||
    exit(0);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Dtor
 | 
			
		||||
 | 
			
		||||
  ~Action_trisolve_matrix( void ){
 | 
			
		||||
 | 
			
		||||
    MESSAGE("Action_trisolve_matrix Dtor");
 | 
			
		||||
 | 
			
		||||
    // deallocation
 | 
			
		||||
 | 
			
		||||
    Interface::free_matrix(A,_size);
 | 
			
		||||
    Interface::free_matrix(B,_size);
 | 
			
		||||
    Interface::free_matrix(X,_size);
 | 
			
		||||
 | 
			
		||||
    Interface::free_matrix(A_ref,_size);
 | 
			
		||||
    Interface::free_matrix(B_ref,_size);
 | 
			
		||||
    Interface::free_matrix(X_ref,_size);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // action name
 | 
			
		||||
 | 
			
		||||
  static inline std::string name( void )
 | 
			
		||||
  {
 | 
			
		||||
    return "trisolve_matrix_"+Interface::name();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double nb_op_base( void ){
 | 
			
		||||
    return _cost;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void initialize( void ){
 | 
			
		||||
 | 
			
		||||
    Interface::copy_matrix(A_ref,A,_size);
 | 
			
		||||
    Interface::copy_matrix(B_ref,B,_size);
 | 
			
		||||
    Interface::copy_matrix(X_ref,X,_size);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void calculate( void ) {
 | 
			
		||||
      Interface::trisolve_lower_matrix(A,B,X,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void check_result( void ){
 | 
			
		||||
 | 
			
		||||
    // calculation check
 | 
			
		||||
 | 
			
		||||
//     Interface::matrix_to_stl(X,resu_stl);
 | 
			
		||||
//
 | 
			
		||||
//     STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl,B_stl,X_stl,_size);
 | 
			
		||||
//
 | 
			
		||||
//     typename Interface::real_type error=
 | 
			
		||||
//       STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
 | 
			
		||||
//
 | 
			
		||||
//     if (error>1.e-6){
 | 
			
		||||
//       INFOS("WRONG CALCULATION...residual=" << error);
 | 
			
		||||
// //       exit(1);
 | 
			
		||||
//     }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
private :
 | 
			
		||||
 | 
			
		||||
  typename Interface::stl_matrix A_stl;
 | 
			
		||||
  typename Interface::stl_matrix B_stl;
 | 
			
		||||
  typename Interface::stl_matrix X_stl;
 | 
			
		||||
  typename Interface::stl_matrix resu_stl;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix A_ref;
 | 
			
		||||
  typename Interface::gene_matrix B_ref;
 | 
			
		||||
  typename Interface::gene_matrix X_ref;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix A;
 | 
			
		||||
  typename Interface::gene_matrix B;
 | 
			
		||||
  typename Interface::gene_matrix X;
 | 
			
		||||
 | 
			
		||||
  int _size;
 | 
			
		||||
  double _cost;
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										165
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_trmm.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										165
									
								
								cs440-acg/ext/eigen/bench/btl/actions/action_trmm.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,165 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  action_matrix_matrix_product.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef ACTION_TRMM
 | 
			
		||||
#define ACTION_TRMM
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "STL_interface.hh"
 | 
			
		||||
#include <string>
 | 
			
		||||
#include "init/init_function.hh"
 | 
			
		||||
#include "init/init_vector.hh"
 | 
			
		||||
#include "init/init_matrix.hh"
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
template<class Interface>
 | 
			
		||||
class Action_trmm {
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  // Ctor
 | 
			
		||||
 | 
			
		||||
  Action_trmm( int size ):_size(size)
 | 
			
		||||
  {
 | 
			
		||||
    MESSAGE("Action_trmm Ctor");
 | 
			
		||||
 | 
			
		||||
    // STL matrix and vector initialization
 | 
			
		||||
 | 
			
		||||
    init_matrix<pseudo_random>(A_stl,_size);
 | 
			
		||||
    init_matrix<pseudo_random>(B_stl,_size);
 | 
			
		||||
    init_matrix<null_function>(X_stl,_size);
 | 
			
		||||
    init_matrix<null_function>(resu_stl,_size);
 | 
			
		||||
 | 
			
		||||
    for (int j=0; j<_size; ++j)
 | 
			
		||||
    {
 | 
			
		||||
      for (int i=0; i<j; ++i)
 | 
			
		||||
        A_stl[j][i] = 0;
 | 
			
		||||
      A_stl[j][j] += 3;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // generic matrix and vector initialization
 | 
			
		||||
 | 
			
		||||
    Interface::matrix_from_stl(A_ref,A_stl);
 | 
			
		||||
    Interface::matrix_from_stl(B_ref,B_stl);
 | 
			
		||||
    Interface::matrix_from_stl(X_ref,X_stl);
 | 
			
		||||
 | 
			
		||||
    Interface::matrix_from_stl(A,A_stl);
 | 
			
		||||
    Interface::matrix_from_stl(B,B_stl);
 | 
			
		||||
    Interface::matrix_from_stl(X,X_stl);
 | 
			
		||||
 | 
			
		||||
    _cost = 0;
 | 
			
		||||
    for (int j=0; j<_size; ++j)
 | 
			
		||||
    {
 | 
			
		||||
      _cost += 2*j + 1;
 | 
			
		||||
    }
 | 
			
		||||
    _cost *= _size;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // invalidate copy ctor
 | 
			
		||||
 | 
			
		||||
  Action_trmm( const  Action_trmm & )
 | 
			
		||||
  {
 | 
			
		||||
    INFOS("illegal call to Action_trmm Copy Ctor");
 | 
			
		||||
    exit(0);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Dtor
 | 
			
		||||
 | 
			
		||||
  ~Action_trmm( void ){
 | 
			
		||||
 | 
			
		||||
    MESSAGE("Action_trmm Dtor");
 | 
			
		||||
 | 
			
		||||
    // deallocation
 | 
			
		||||
 | 
			
		||||
    Interface::free_matrix(A,_size);
 | 
			
		||||
    Interface::free_matrix(B,_size);
 | 
			
		||||
    Interface::free_matrix(X,_size);
 | 
			
		||||
 | 
			
		||||
    Interface::free_matrix(A_ref,_size);
 | 
			
		||||
    Interface::free_matrix(B_ref,_size);
 | 
			
		||||
    Interface::free_matrix(X_ref,_size);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // action name
 | 
			
		||||
 | 
			
		||||
  static inline std::string name( void )
 | 
			
		||||
  {
 | 
			
		||||
    return "trmm_"+Interface::name();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double nb_op_base( void ){
 | 
			
		||||
    return _cost;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void initialize( void ){
 | 
			
		||||
 | 
			
		||||
    Interface::copy_matrix(A_ref,A,_size);
 | 
			
		||||
    Interface::copy_matrix(B_ref,B,_size);
 | 
			
		||||
    Interface::copy_matrix(X_ref,X,_size);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void calculate( void ) {
 | 
			
		||||
      Interface::trmm(A,B,X,_size);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void check_result( void ){
 | 
			
		||||
 | 
			
		||||
    // calculation check
 | 
			
		||||
 | 
			
		||||
//     Interface::matrix_to_stl(X,resu_stl);
 | 
			
		||||
//
 | 
			
		||||
//     STL_interface<typename Interface::real_type>::matrix_matrix_product(A_stl,B_stl,X_stl,_size);
 | 
			
		||||
//
 | 
			
		||||
//     typename Interface::real_type error=
 | 
			
		||||
//       STL_interface<typename Interface::real_type>::norm_diff(X_stl,resu_stl);
 | 
			
		||||
//
 | 
			
		||||
//     if (error>1.e-6){
 | 
			
		||||
//       INFOS("WRONG CALCULATION...residual=" << error);
 | 
			
		||||
// //       exit(1);
 | 
			
		||||
//     }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
private :
 | 
			
		||||
 | 
			
		||||
  typename Interface::stl_matrix A_stl;
 | 
			
		||||
  typename Interface::stl_matrix B_stl;
 | 
			
		||||
  typename Interface::stl_matrix X_stl;
 | 
			
		||||
  typename Interface::stl_matrix resu_stl;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix A_ref;
 | 
			
		||||
  typename Interface::gene_matrix B_ref;
 | 
			
		||||
  typename Interface::gene_matrix X_ref;
 | 
			
		||||
 | 
			
		||||
  typename Interface::gene_matrix A;
 | 
			
		||||
  typename Interface::gene_matrix B;
 | 
			
		||||
  typename Interface::gene_matrix X;
 | 
			
		||||
 | 
			
		||||
  int _size;
 | 
			
		||||
  double _cost;
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										21
									
								
								cs440-acg/ext/eigen/bench/btl/actions/basic_actions.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								cs440-acg/ext/eigen/bench/btl/actions/basic_actions.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,21 @@
 | 
			
		||||
 | 
			
		||||
#include "action_axpy.hh"
 | 
			
		||||
#include "action_axpby.hh"
 | 
			
		||||
 | 
			
		||||
#include "action_matrix_vector_product.hh"
 | 
			
		||||
#include "action_atv_product.hh"
 | 
			
		||||
 | 
			
		||||
#include "action_matrix_matrix_product.hh"
 | 
			
		||||
// #include "action_ata_product.hh"
 | 
			
		||||
#include "action_aat_product.hh"
 | 
			
		||||
 | 
			
		||||
#include "action_trisolve.hh"
 | 
			
		||||
#include "action_trmm.hh"
 | 
			
		||||
#include "action_symv.hh"
 | 
			
		||||
// #include "action_symm.hh"
 | 
			
		||||
#include "action_syr2.hh"
 | 
			
		||||
#include "action_ger.hh"
 | 
			
		||||
#include "action_rot.hh"
 | 
			
		||||
 | 
			
		||||
// #include "action_lu_solve.hh"
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										32
									
								
								cs440-acg/ext/eigen/bench/btl/data/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								cs440-acg/ext/eigen/bench/btl/data/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,32 @@
 | 
			
		||||
 | 
			
		||||
ADD_CUSTOM_TARGET(copy_scripts)
 | 
			
		||||
 | 
			
		||||
SET(script_files go_mean mk_mean_script.sh mk_new_gnuplot.sh
 | 
			
		||||
    perlib_plot_settings.txt action_settings.txt gnuplot_common_settings.hh )
 | 
			
		||||
 | 
			
		||||
FOREACH(script_file ${script_files})
 | 
			
		||||
ADD_CUSTOM_COMMAND(
 | 
			
		||||
  TARGET copy_scripts
 | 
			
		||||
  POST_BUILD
 | 
			
		||||
  COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/${script_file} ${CMAKE_CURRENT_BINARY_DIR}/
 | 
			
		||||
  ARGS
 | 
			
		||||
)
 | 
			
		||||
ENDFOREACH(script_file)
 | 
			
		||||
 | 
			
		||||
ADD_CUSTOM_COMMAND(
 | 
			
		||||
  TARGET copy_scripts
 | 
			
		||||
  POST_BUILD
 | 
			
		||||
  COMMAND ${CMAKE_CXX_COMPILER} --version | head -n 1 > ${CMAKE_CURRENT_BINARY_DIR}/compiler_version.txt
 | 
			
		||||
  ARGS
 | 
			
		||||
)
 | 
			
		||||
ADD_CUSTOM_COMMAND(
 | 
			
		||||
  TARGET copy_scripts
 | 
			
		||||
  POST_BUILD
 | 
			
		||||
  COMMAND echo "${Eigen_SOURCE_DIR}" > ${CMAKE_CURRENT_BINARY_DIR}/eigen_root_dir.txt
 | 
			
		||||
  ARGS
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
add_executable(smooth smooth.cxx)
 | 
			
		||||
add_executable(regularize regularize.cxx)
 | 
			
		||||
add_executable(main mean.cxx)
 | 
			
		||||
add_dependencies(main copy_scripts)
 | 
			
		||||
							
								
								
									
										19
									
								
								cs440-acg/ext/eigen/bench/btl/data/action_settings.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								cs440-acg/ext/eigen/bench/btl/data/action_settings.txt
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,19 @@
 | 
			
		||||
aat ; "{/*1.5 A x A^T}" ; "matrix size" ; 4:5000
 | 
			
		||||
ata ; "{/*1.5 A^T x A}" ; "matrix size" ; 4:5000
 | 
			
		||||
atv ; "{/*1.5 matrix^T x vector}" ; "matrix size" ; 4:5000
 | 
			
		||||
axpby ; "{/*1.5 Y = alpha X + beta Y}" ; "vector size" ; 5:1000000
 | 
			
		||||
axpy ; "{/*1.5 Y += alpha X}" ; "vector size" ; 5:1000000
 | 
			
		||||
matrix_matrix ; "{/*1.5 matrix matrix product}" ; "matrix size" ; 4:5000
 | 
			
		||||
matrix_vector ; "{/*1.5 matrix vector product}" ; "matrix size" ; 4:5000
 | 
			
		||||
trmm ; "{/*1.5 triangular matrix matrix product}" ; "matrix size" ; 4:5000
 | 
			
		||||
trisolve_vector ; "{/*1.5 triangular solver - vector (X = inv(L) X)}" ; "size" ; 4:5000
 | 
			
		||||
trisolve_matrix ; "{/*1.5 triangular solver - matrix (M = inv(L) M)}" ; "size" ; 4:5000
 | 
			
		||||
cholesky ; "{/*1.5 Cholesky decomposition}" ; "matrix size" ; 4:5000
 | 
			
		||||
complete_lu_decomp ; "{/*1.5 Complete LU decomposition}" ; "matrix size" ; 4:5000
 | 
			
		||||
partial_lu_decomp ; "{/*1.5 Partial LU decomposition}" ; "matrix size" ; 4:5000
 | 
			
		||||
tridiagonalization ; "{/*1.5 Tridiagonalization}" ; "matrix size" ; 4:5000
 | 
			
		||||
hessenberg ; "{/*1.5 Hessenberg decomposition}" ; "matrix size" ; 4:5000
 | 
			
		||||
symv ; "{/*1.5 symmetric matrix vector product}" ; "matrix size" ; 4:5000
 | 
			
		||||
syr2 ; "{/*1.5 symmretric rank-2 update (A += u^T v + u v^T)}" ; "matrix size" ; 4:5000
 | 
			
		||||
ger ; "{/*1.5 general rank-1 update (A += u v^T)}" ; "matrix size" ; 4:5000
 | 
			
		||||
rot ; "{/*1.5 apply rotation in the plane}" ; "vector size" ; 4:1000000
 | 
			
		||||
@@ -0,0 +1,87 @@
 | 
			
		||||
set noclip points
 | 
			
		||||
set clip one
 | 
			
		||||
set noclip two
 | 
			
		||||
set bar 1.000000
 | 
			
		||||
set border 31 lt -1 lw 1.000
 | 
			
		||||
set xdata
 | 
			
		||||
set ydata
 | 
			
		||||
set zdata
 | 
			
		||||
set x2data
 | 
			
		||||
set y2data
 | 
			
		||||
set boxwidth
 | 
			
		||||
set dummy x,y
 | 
			
		||||
set format x "%g"
 | 
			
		||||
set format y "%g"
 | 
			
		||||
set format x2 "%g"
 | 
			
		||||
set format y2 "%g"
 | 
			
		||||
set format z "%g"
 | 
			
		||||
set angles radians
 | 
			
		||||
set nogrid
 | 
			
		||||
set key title ""
 | 
			
		||||
set key left top Right noreverse box linetype -2 linewidth 1.000 samplen 4 spacing 1 width 0
 | 
			
		||||
set nolabel
 | 
			
		||||
set noarrow
 | 
			
		||||
# set nolinestyle # deprecated
 | 
			
		||||
set nologscale
 | 
			
		||||
set logscale x 10
 | 
			
		||||
set offsets 0, 0, 0, 0
 | 
			
		||||
set pointsize 1
 | 
			
		||||
set encoding default
 | 
			
		||||
set nopolar
 | 
			
		||||
set noparametric
 | 
			
		||||
set view 60, 30, 1, 1
 | 
			
		||||
set samples 100, 100
 | 
			
		||||
set isosamples 10, 10
 | 
			
		||||
set surface
 | 
			
		||||
set nocontour
 | 
			
		||||
set clabel '%8.3g'
 | 
			
		||||
set mapping cartesian
 | 
			
		||||
set nohidden3d
 | 
			
		||||
set cntrparam order 4
 | 
			
		||||
set cntrparam linear
 | 
			
		||||
set cntrparam levels auto 5
 | 
			
		||||
set cntrparam points 5
 | 
			
		||||
set size ratio 0 1,1
 | 
			
		||||
set origin 0,0
 | 
			
		||||
# set data style lines
 | 
			
		||||
# set function style lines
 | 
			
		||||
set xzeroaxis lt -2 lw 1.000
 | 
			
		||||
set x2zeroaxis lt -2 lw 1.000
 | 
			
		||||
set yzeroaxis lt -2 lw 1.000
 | 
			
		||||
set y2zeroaxis lt -2 lw 1.000
 | 
			
		||||
set tics in
 | 
			
		||||
set ticslevel 0.5
 | 
			
		||||
set tics scale 1, 0.5
 | 
			
		||||
set mxtics default
 | 
			
		||||
set mytics default
 | 
			
		||||
set mx2tics default
 | 
			
		||||
set my2tics default
 | 
			
		||||
set xtics border mirror norotate autofreq
 | 
			
		||||
set ytics border mirror norotate autofreq
 | 
			
		||||
set ztics border nomirror norotate autofreq
 | 
			
		||||
set nox2tics
 | 
			
		||||
set noy2tics
 | 
			
		||||
set timestamp "" bottom norotate offset 0,0
 | 
			
		||||
set rrange [ * : * ] noreverse nowriteback  # (currently [-0:10] )
 | 
			
		||||
set trange [ * : * ] noreverse nowriteback  # (currently [-5:5] )
 | 
			
		||||
set urange [ * : * ] noreverse nowriteback  # (currently [-5:5] )
 | 
			
		||||
set vrange [ * : * ] noreverse nowriteback  # (currently [-5:5] )
 | 
			
		||||
set xlabel "matrix size" offset 0,0
 | 
			
		||||
set x2label "" offset 0,0
 | 
			
		||||
set timefmt "%d/%m/%y\n%H:%M"
 | 
			
		||||
set xrange [ 10 : 1000 ] noreverse nowriteback
 | 
			
		||||
set x2range [ * : * ] noreverse nowriteback  # (currently [-10:10] )
 | 
			
		||||
set ylabel "MFLOPS" offset 0,0
 | 
			
		||||
set y2label "" offset 0,0
 | 
			
		||||
set yrange [ * : * ] noreverse nowriteback  # (currently [-10:10] )
 | 
			
		||||
set y2range [ * : * ] noreverse nowriteback  # (currently [-10:10] )
 | 
			
		||||
set zlabel "" offset 0,0
 | 
			
		||||
set zrange [ * : * ] noreverse nowriteback  # (currently [-10:10] )
 | 
			
		||||
set zero 1e-08
 | 
			
		||||
set lmargin -1
 | 
			
		||||
set bmargin -1
 | 
			
		||||
set rmargin -1
 | 
			
		||||
set tmargin -1
 | 
			
		||||
set locale "C"
 | 
			
		||||
set xrange [4:1024]
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										58
									
								
								cs440-acg/ext/eigen/bench/btl/data/go_mean
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										58
									
								
								cs440-acg/ext/eigen/bench/btl/data/go_mean
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,58 @@
 | 
			
		||||
#!/bin/bash
 | 
			
		||||
 | 
			
		||||
if [ $# < 1 ]; then
 | 
			
		||||
  echo "Usage: $0 working_directory [tiny|large [prefix]]"
 | 
			
		||||
else
 | 
			
		||||
 | 
			
		||||
mkdir -p $1
 | 
			
		||||
##cp ../libs/*/*.dat $1
 | 
			
		||||
 | 
			
		||||
mode=large
 | 
			
		||||
if [ $# > 2 ]; then
 | 
			
		||||
  mode=$2
 | 
			
		||||
fi
 | 
			
		||||
if [ $# > 3 ]; then
 | 
			
		||||
  prefix=$3
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
EIGENDIR=`cat eigen_root_dir.txt`
 | 
			
		||||
 | 
			
		||||
webpagefilename=$1/index.html
 | 
			
		||||
meanstatsfilename=$1/mean.html
 | 
			
		||||
 | 
			
		||||
echo ''  > $meanstatsfilename
 | 
			
		||||
echo ''  > $webpagefilename
 | 
			
		||||
echo '<p><strong>Configuration</strong>'  >> $webpagefilename
 | 
			
		||||
echo '<ul>'\
 | 
			
		||||
  '<li>' `cat /proc/cpuinfo | grep "model name" | head -n 1`\
 | 
			
		||||
  '  (' `uname -m` ')</li>'\
 | 
			
		||||
  '<li> compiler: ' `cat compiler_version.txt` '</li>'\
 | 
			
		||||
  '<li> eigen3: ' `hg identify -i $EIGENDIR` '</li>'\
 | 
			
		||||
  '</ul>' \
 | 
			
		||||
  '</p>'  >> $webpagefilename
 | 
			
		||||
 | 
			
		||||
source mk_mean_script.sh axpy $1 11 2500 100000 250000  $mode $prefix
 | 
			
		||||
source mk_mean_script.sh axpby $1 11 2500 100000 250000 $mode $prefix
 | 
			
		||||
source mk_mean_script.sh matrix_vector $1 11 50 300 1000 $mode $prefix
 | 
			
		||||
source mk_mean_script.sh atv $1 11 50 300 1000 $mode $prefix
 | 
			
		||||
source mk_mean_script.sh matrix_matrix $1 11 100 300 1000 $mode $prefix
 | 
			
		||||
source mk_mean_script.sh aat $1 11 100 300 1000 $mode $prefix
 | 
			
		||||
# source mk_mean_script.sh ata $1 11 100 300 1000 $mode $prefix
 | 
			
		||||
source mk_mean_script.sh trmm $1 11 100 300 1000 $mode $prefix
 | 
			
		||||
source mk_mean_script.sh trisolve_vector $1 11 100 300 1000 $mode $prefix
 | 
			
		||||
source mk_mean_script.sh trisolve_matrix $1 11 100 300 1000 $mode $prefix
 | 
			
		||||
source mk_mean_script.sh cholesky $1 11 100 300 1000 $mode $prefix
 | 
			
		||||
source mk_mean_script.sh partial_lu_decomp $1 11 100 300 1000 $mode $prefix
 | 
			
		||||
source mk_mean_script.sh tridiagonalization $1 11 100 300 1000 $mode $prefix
 | 
			
		||||
source mk_mean_script.sh hessenberg $1 11 100 300 1000 $mode $prefix
 | 
			
		||||
source mk_mean_script.sh symv $1 11 50 300 1000 $mode $prefix
 | 
			
		||||
source mk_mean_script.sh syr2 $1 11 50 300 1000 $mode $prefix
 | 
			
		||||
source mk_mean_script.sh ger $1 11 50 300 1000 $mode $prefix
 | 
			
		||||
source mk_mean_script.sh rot $1 11 2500 100000 250000 $mode $prefix
 | 
			
		||||
source mk_mean_script.sh complete_lu_decomp $1 11 100 300 1000 $mode $prefix
 | 
			
		||||
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
## compile the web page ##
 | 
			
		||||
 | 
			
		||||
#echo `cat footer.html` >> $webpagefilename
 | 
			
		||||
							
								
								
									
										182
									
								
								cs440-acg/ext/eigen/bench/btl/data/mean.cxx
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										182
									
								
								cs440-acg/ext/eigen/bench/btl/data/mean.cxx
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,182 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  mean.cxx
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>        
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:15 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
// 
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
// 
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
// 
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <string>
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <fstream>
 | 
			
		||||
#include "bench_parameter.hh"
 | 
			
		||||
#include "utils/xy_file.hh"
 | 
			
		||||
#include <set>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
double mean_calc(const vector<int> & tab_sizes, const vector<double> & tab_mflops, const int size_min, const int size_max);
 | 
			
		||||
 | 
			
		||||
class Lib_Mean{
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
  Lib_Mean( void ):_lib_name(),_mean_in_cache(),_mean_out_of_cache(){
 | 
			
		||||
    MESSAGE("Lib_mean Default Ctor");
 | 
			
		||||
    MESSAGE("!!! should not be used");
 | 
			
		||||
    exit(0);
 | 
			
		||||
  }
 | 
			
		||||
  Lib_Mean(const string & name, const double & mic, const double & moc):_lib_name(name),_mean_in_cache(mic),_mean_out_of_cache(moc){
 | 
			
		||||
    MESSAGE("Lib_mean Ctor");
 | 
			
		||||
  }
 | 
			
		||||
  Lib_Mean(const Lib_Mean & lm):_lib_name(lm._lib_name),_mean_in_cache(lm._mean_in_cache),_mean_out_of_cache(lm._mean_out_of_cache){
 | 
			
		||||
    MESSAGE("Lib_mean Copy Ctor");
 | 
			
		||||
  }
 | 
			
		||||
  ~Lib_Mean( void ){
 | 
			
		||||
    MESSAGE("Lib_mean Dtor");
 | 
			
		||||
  }
 | 
			
		||||
    
 | 
			
		||||
  double _mean_in_cache;
 | 
			
		||||
  double _mean_out_of_cache;
 | 
			
		||||
  string _lib_name;
 | 
			
		||||
 | 
			
		||||
  bool operator < ( const Lib_Mean &right) const 
 | 
			
		||||
  {
 | 
			
		||||
    //return ( this->_mean_out_of_cache > right._mean_out_of_cache) ;
 | 
			
		||||
    return ( this->_mean_in_cache > right._mean_in_cache) ;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
}; 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
int main( int argc , char *argv[] )
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
  if (argc<6){
 | 
			
		||||
    INFOS("!!! Error ... usage : main what mic Mic moc Moc filename1 finename2...");
 | 
			
		||||
    exit(0);
 | 
			
		||||
  }
 | 
			
		||||
  INFOS(argc);
 | 
			
		||||
 | 
			
		||||
  int min_in_cache=atoi(argv[2]);
 | 
			
		||||
  int max_in_cache=atoi(argv[3]);
 | 
			
		||||
  int min_out_of_cache=atoi(argv[4]);
 | 
			
		||||
  int max_out_of_cache=atoi(argv[5]);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  multiset<Lib_Mean> s_lib_mean ;
 | 
			
		||||
 | 
			
		||||
  for (int i=6;i<argc;i++){
 | 
			
		||||
    
 | 
			
		||||
    string filename=argv[i];
 | 
			
		||||
    
 | 
			
		||||
    INFOS(filename);
 | 
			
		||||
 | 
			
		||||
    double mic=0;
 | 
			
		||||
    double moc=0;
 | 
			
		||||
 | 
			
		||||
    {
 | 
			
		||||
      
 | 
			
		||||
      vector<int> tab_sizes;
 | 
			
		||||
      vector<double> tab_mflops;
 | 
			
		||||
 | 
			
		||||
      read_xy_file(filename,tab_sizes,tab_mflops);
 | 
			
		||||
 | 
			
		||||
      mic=mean_calc(tab_sizes,tab_mflops,min_in_cache,max_in_cache);
 | 
			
		||||
      moc=mean_calc(tab_sizes,tab_mflops,min_out_of_cache,max_out_of_cache);
 | 
			
		||||
 | 
			
		||||
      Lib_Mean cur_lib_mean(filename,mic,moc);
 | 
			
		||||
      
 | 
			
		||||
      s_lib_mean.insert(cur_lib_mean);	
 | 
			
		||||
 | 
			
		||||
    }   
 | 
			
		||||
           
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  cout << "<TABLE BORDER CELLPADDING=2>" << endl ;
 | 
			
		||||
  cout << "  <TR>" << endl ;
 | 
			
		||||
  cout << "    <TH ALIGN=CENTER> " << argv[1] << " </TH>" << endl ;
 | 
			
		||||
  cout << "    <TH ALIGN=CENTER> <a href=""#mean_marker""> in cache <BR> mean perf <BR> Mflops </a></TH>" << endl ;
 | 
			
		||||
  cout << "    <TH ALIGN=CENTER> in cache <BR> % best </TH>" << endl ;
 | 
			
		||||
  cout << "    <TH ALIGN=CENTER> <a href=""#mean_marker""> out of cache <BR> mean perf <BR> Mflops </a></TH>" << endl ;
 | 
			
		||||
  cout << "    <TH ALIGN=CENTER> out of cache <BR> % best </TH>" << endl ;
 | 
			
		||||
  cout << "    <TH ALIGN=CENTER> details </TH>" << endl ;
 | 
			
		||||
  cout << "    <TH ALIGN=CENTER> comments </TH>" << endl ;
 | 
			
		||||
  cout << "  </TR>" << endl ;
 | 
			
		||||
 | 
			
		||||
  multiset<Lib_Mean>::iterator is = s_lib_mean.begin();
 | 
			
		||||
  Lib_Mean best(*is);  
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
  for (is=s_lib_mean.begin(); is!=s_lib_mean.end() ; is++){
 | 
			
		||||
 | 
			
		||||
    cout << "  <TR>" << endl ;
 | 
			
		||||
    cout << "     <TD> " << is->_lib_name << " </TD>" << endl ;
 | 
			
		||||
    cout << "     <TD> " << is->_mean_in_cache << " </TD>" << endl ;
 | 
			
		||||
    cout << "     <TD> " << 100*(is->_mean_in_cache/best._mean_in_cache) << " </TD>" << endl ;
 | 
			
		||||
    cout << "     <TD> " << is->_mean_out_of_cache << " </TD>" << endl ;
 | 
			
		||||
    cout << "     <TD> " << 100*(is->_mean_out_of_cache/best._mean_out_of_cache) << " </TD>" << endl ;
 | 
			
		||||
    cout << "     <TD> " << 
 | 
			
		||||
      "<a href=\"#"<<is->_lib_name<<"_"<<argv[1]<<"\">snippet</a>/" 
 | 
			
		||||
      "<a href=\"#"<<is->_lib_name<<"_flags\">flags</a>  </TD>" << endl ;
 | 
			
		||||
    cout << "     <TD> " << 
 | 
			
		||||
      "<a href=\"#"<<is->_lib_name<<"_comments\">click here</a>  </TD>" << endl ;
 | 
			
		||||
    cout << "  </TR>" << endl ;
 | 
			
		||||
  
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  cout << "</TABLE>" << endl ;
 | 
			
		||||
 | 
			
		||||
  ofstream output_file ("../order_lib",ios::out) ;
 | 
			
		||||
  
 | 
			
		||||
  for (is=s_lib_mean.begin(); is!=s_lib_mean.end() ; is++){
 | 
			
		||||
    output_file << is->_lib_name << endl ;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  output_file.close();
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
double mean_calc(const vector<int> & tab_sizes, const vector<double> & tab_mflops, const int size_min, const int size_max){
 | 
			
		||||
  
 | 
			
		||||
  int size=tab_sizes.size();
 | 
			
		||||
  int nb_sample=0;
 | 
			
		||||
  double mean=0.0;
 | 
			
		||||
 | 
			
		||||
  for (int i=0;i<size;i++){
 | 
			
		||||
    
 | 
			
		||||
    
 | 
			
		||||
    if ((tab_sizes[i]>=size_min)&&(tab_sizes[i]<=size_max)){
 | 
			
		||||
      
 | 
			
		||||
      nb_sample++;
 | 
			
		||||
      mean+=tab_mflops[i];
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (nb_sample==0){
 | 
			
		||||
    INFOS("no data for mean calculation");
 | 
			
		||||
    return 0.0;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return mean/nb_sample;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										68
									
								
								cs440-acg/ext/eigen/bench/btl/data/mk_gnuplot_script.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										68
									
								
								cs440-acg/ext/eigen/bench/btl/data/mk_gnuplot_script.sh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,68 @@
 | 
			
		||||
#! /bin/bash
 | 
			
		||||
WHAT=$1
 | 
			
		||||
DIR=$2
 | 
			
		||||
echo $WHAT script generation
 | 
			
		||||
cat $WHAT.hh > $WHAT.gnuplot
 | 
			
		||||
 | 
			
		||||
DATA_FILE=`find $DIR -name "*.dat" | grep $WHAT`
 | 
			
		||||
 | 
			
		||||
echo plot \\ >> $WHAT.gnuplot
 | 
			
		||||
 | 
			
		||||
for FILE in $DATA_FILE
 | 
			
		||||
do
 | 
			
		||||
    LAST=$FILE
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
echo LAST=$LAST
 | 
			
		||||
 | 
			
		||||
for FILE in $DATA_FILE
 | 
			
		||||
do
 | 
			
		||||
     if [ $FILE != $LAST ]
 | 
			
		||||
     then
 | 
			
		||||
	BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
 | 
			
		||||
	echo "'"$FILE"'" title "'"$TITLE"'" ",\\" >>  $WHAT.gnuplot
 | 
			
		||||
     fi
 | 
			
		||||
done
 | 
			
		||||
BASE=${LAST##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
 | 
			
		||||
echo "'"$LAST"'" title "'"$TITLE"'" >>  $WHAT.gnuplot
 | 
			
		||||
 | 
			
		||||
#echo set term postscript color >> $WHAT.gnuplot
 | 
			
		||||
#echo set output "'"$WHAT.ps"'" >> $WHAT.gnuplot
 | 
			
		||||
echo set term pbm small color >> $WHAT.gnuplot
 | 
			
		||||
echo set output "'"$WHAT.ppm"'" >> $WHAT.gnuplot
 | 
			
		||||
echo plot \\ >> $WHAT.gnuplot
 | 
			
		||||
 | 
			
		||||
for FILE in $DATA_FILE
 | 
			
		||||
do
 | 
			
		||||
     if [ $FILE != $LAST ]
 | 
			
		||||
     then
 | 
			
		||||
	BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
 | 
			
		||||
	echo "'"$FILE"'" title "'"$TITLE"'" ",\\" >>  $WHAT.gnuplot
 | 
			
		||||
     fi
 | 
			
		||||
done
 | 
			
		||||
BASE=${LAST##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
 | 
			
		||||
echo "'"$LAST"'" title "'"$TITLE"'" >>  $WHAT.gnuplot
 | 
			
		||||
 | 
			
		||||
echo set term jpeg large >> $WHAT.gnuplot
 | 
			
		||||
echo set output "'"$WHAT.jpg"'" >> $WHAT.gnuplot
 | 
			
		||||
echo plot \\ >> $WHAT.gnuplot
 | 
			
		||||
 | 
			
		||||
for FILE in $DATA_FILE
 | 
			
		||||
do
 | 
			
		||||
     if [ $FILE != $LAST ]
 | 
			
		||||
     then
 | 
			
		||||
	BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
 | 
			
		||||
	echo "'"$FILE"'" title "'"$TITLE"'" ",\\" >>  $WHAT.gnuplot
 | 
			
		||||
     fi
 | 
			
		||||
done
 | 
			
		||||
BASE=${LAST##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
 | 
			
		||||
echo "'"$LAST"'" title "'"$TITLE"'" >>  $WHAT.gnuplot
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
gnuplot -persist < $WHAT.gnuplot
 | 
			
		||||
 | 
			
		||||
rm $WHAT.gnuplot
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										52
									
								
								cs440-acg/ext/eigen/bench/btl/data/mk_mean_script.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								cs440-acg/ext/eigen/bench/btl/data/mk_mean_script.sh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,52 @@
 | 
			
		||||
#! /bin/bash
 | 
			
		||||
WHAT=$1
 | 
			
		||||
DIR=$2
 | 
			
		||||
MINIC=$3
 | 
			
		||||
MAXIC=$4
 | 
			
		||||
MINOC=$5
 | 
			
		||||
MAXOC=$6
 | 
			
		||||
prefix=$8
 | 
			
		||||
 | 
			
		||||
meanstatsfilename=$2/mean.html
 | 
			
		||||
 | 
			
		||||
WORK_DIR=tmp
 | 
			
		||||
mkdir $WORK_DIR
 | 
			
		||||
 | 
			
		||||
DATA_FILE=`find $DIR -name "*.dat" | grep _${WHAT}`
 | 
			
		||||
 | 
			
		||||
if [ -n "$DATA_FILE" ]; then
 | 
			
		||||
 | 
			
		||||
  echo ""
 | 
			
		||||
  echo "$1..."
 | 
			
		||||
  for FILE in $DATA_FILE
 | 
			
		||||
  do
 | 
			
		||||
          ##echo hello world
 | 
			
		||||
          ##echo "mk_mean_script1" ${FILE}
 | 
			
		||||
    BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
 | 
			
		||||
 | 
			
		||||
    ##echo "mk_mean_script1" ${TITLE}
 | 
			
		||||
    cp $FILE ${WORK_DIR}/${TITLE}
 | 
			
		||||
 | 
			
		||||
  done
 | 
			
		||||
 | 
			
		||||
  cd $WORK_DIR
 | 
			
		||||
  ../main $1 $3 $4 $5 $6 * >> ../$meanstatsfilename
 | 
			
		||||
  ../mk_new_gnuplot.sh $1 $2 $7
 | 
			
		||||
  rm -f *.gnuplot
 | 
			
		||||
  cd ..
 | 
			
		||||
 | 
			
		||||
  echo '<br/>' >> $meanstatsfilename
 | 
			
		||||
 | 
			
		||||
  webpagefilename=$2/index.html
 | 
			
		||||
  # echo '<h3>'${WHAT}'</h3>'  >> $webpagefilename
 | 
			
		||||
  echo '<hr/><a href="'$prefix$1'.pdf"><img src="'$prefix$1'.png" alt="'${WHAT}'" /></a><br/>'  >> $webpagefilename
 | 
			
		||||
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
rm -R $WORK_DIR
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										54
									
								
								cs440-acg/ext/eigen/bench/btl/data/mk_new_gnuplot.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										54
									
								
								cs440-acg/ext/eigen/bench/btl/data/mk_new_gnuplot.sh
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,54 @@
 | 
			
		||||
#!/bin/bash
 | 
			
		||||
WHAT=$1
 | 
			
		||||
DIR=$2
 | 
			
		||||
 | 
			
		||||
cat ../gnuplot_common_settings.hh > ${WHAT}.gnuplot
 | 
			
		||||
 | 
			
		||||
echo "set title " `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 2` >> $WHAT.gnuplot
 | 
			
		||||
echo "set xlabel " `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 3` " offset 0,0" >> $WHAT.gnuplot
 | 
			
		||||
echo "set xrange [" `grep ${WHAT} ../action_settings.txt | head -n 1 | cut -d ";" -f 4` "]" >> $WHAT.gnuplot
 | 
			
		||||
 | 
			
		||||
if [ $# > 3 ]; then
 | 
			
		||||
  if [ "$3" == "tiny" ]; then
 | 
			
		||||
    echo "set xrange [2:16]" >> $WHAT.gnuplot
 | 
			
		||||
    echo "set nologscale" >> $WHAT.gnuplot
 | 
			
		||||
  fi
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
DATA_FILE=`cat ../order_lib`
 | 
			
		||||
echo set term postscript color rounded enhanced >> $WHAT.gnuplot
 | 
			
		||||
echo set output "'"../${DIR}/$WHAT.ps"'" >> $WHAT.gnuplot
 | 
			
		||||
 | 
			
		||||
# echo set term svg color rounded enhanced >> $WHAT.gnuplot
 | 
			
		||||
# echo "set terminal svg enhanced size 1000 1000 fname \"Times\" fsize 36" >> $WHAT.gnuplot
 | 
			
		||||
# echo set output "'"../${DIR}/$WHAT.svg"'" >> $WHAT.gnuplot
 | 
			
		||||
 | 
			
		||||
echo plot \\ >> $WHAT.gnuplot
 | 
			
		||||
 | 
			
		||||
for FILE in $DATA_FILE
 | 
			
		||||
do
 | 
			
		||||
    LAST=$FILE
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
for FILE in $DATA_FILE
 | 
			
		||||
do
 | 
			
		||||
    BASE=${FILE##*/} ; BASE=${FILE##*/} ; AVANT=bench_${WHAT}_ ; REDUC=${BASE##*$AVANT} ; TITLE=${REDUC%.dat}
 | 
			
		||||
 | 
			
		||||
    echo "'"$FILE"'" `grep $TITLE ../perlib_plot_settings.txt | head -n 1 | cut -d ";" -f 2` "\\" >>  $WHAT.gnuplot
 | 
			
		||||
    if [ $FILE != $LAST ]
 | 
			
		||||
    then
 | 
			
		||||
      echo ", \\" >>  $WHAT.gnuplot
 | 
			
		||||
    fi
 | 
			
		||||
done
 | 
			
		||||
echo " " >>  $WHAT.gnuplot
 | 
			
		||||
 | 
			
		||||
gnuplot -persist < $WHAT.gnuplot
 | 
			
		||||
 | 
			
		||||
rm $WHAT.gnuplot
 | 
			
		||||
 | 
			
		||||
ps2pdf ../${DIR}/$WHAT.ps ../${DIR}/$WHAT.pdf
 | 
			
		||||
convert -background white -density 120 -rotate 90 -resize 800 +dither -colors 256 -quality 0 ../${DIR}/$WHAT.ps -background white -flatten  ../${DIR}/$WHAT.png
 | 
			
		||||
 | 
			
		||||
# pstoedit -rotate -90 -xscale 0.8 -yscale 0.8 -centered -yshift -50 -xshift -100  -f plot-svg aat.ps  aat2.svg
 | 
			
		||||
							
								
								
									
										16
									
								
								cs440-acg/ext/eigen/bench/btl/data/perlib_plot_settings.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								cs440-acg/ext/eigen/bench/btl/data/perlib_plot_settings.txt
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,16 @@
 | 
			
		||||
eigen3 ;          with lines lw 4 lt 1 lc rgbcolor "black"
 | 
			
		||||
eigen2 ;          with lines lw 3 lt 1 lc rgbcolor "#999999"
 | 
			
		||||
EigenBLAS ;       with lines lw 3 lt 3 lc rgbcolor "#999999"
 | 
			
		||||
eigen3_novec ;    with lines lw 2 lt 1 lc rgbcolor "#999999"
 | 
			
		||||
eigen3_nogccvec ; with lines lw 2 lt 2 lc rgbcolor "#991010"
 | 
			
		||||
INTEL_MKL ;       with lines lw 3 lt 1 lc rgbcolor "#ff0000"
 | 
			
		||||
ATLAS ;           with lines lw 3 lt 1 lc rgbcolor "#008000"
 | 
			
		||||
gmm ;             with lines lw 3 lt 1 lc rgbcolor "#0000ff"
 | 
			
		||||
ublas ;           with lines lw 3 lt 1 lc rgbcolor "#00b7ff"
 | 
			
		||||
mtl4 ;            with lines lw 3 lt 1 lc rgbcolor "#d18847"
 | 
			
		||||
blitz ;           with lines lw 3 lt 1 lc rgbcolor "#ff00ff"
 | 
			
		||||
F77 ;             with lines lw 3 lt 3 lc rgbcolor "#e6e64c"
 | 
			
		||||
OPENBLAS ;        with lines lw 3 lt 1 lc rgbcolor "#C05600"
 | 
			
		||||
C ;               with lines lw 3 lt 3 lc rgbcolor "#e6bd96"
 | 
			
		||||
ACML ;            with lines lw 2 lt 3 lc rgbcolor "#e6e64c"
 | 
			
		||||
blaze ;           with lines lw 3 lt 1 lc rgbcolor "#ff00ff"
 | 
			
		||||
							
								
								
									
										131
									
								
								cs440-acg/ext/eigen/bench/btl/data/regularize.cxx
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										131
									
								
								cs440-acg/ext/eigen/bench/btl/data/regularize.cxx
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,131 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  regularize.cxx
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>        
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:15 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
// 
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
// 
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
// 
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <string>
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <fstream>
 | 
			
		||||
#include "bench_parameter.hh"
 | 
			
		||||
#include <set>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops);
 | 
			
		||||
void regularize_curve(const string & filename,
 | 
			
		||||
		      const vector<double> & tab_mflops, 
 | 
			
		||||
		      const vector<int> & tab_sizes, 
 | 
			
		||||
		      int start_cut_size, int stop_cut_size);
 | 
			
		||||
/////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
int main( int argc , char *argv[] )
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
  // input data
 | 
			
		||||
 | 
			
		||||
  if (argc<4){
 | 
			
		||||
    INFOS("!!! Error ... usage : main filename start_cut_size stop_cut_size regularize_filename");
 | 
			
		||||
    exit(0);
 | 
			
		||||
  }
 | 
			
		||||
  INFOS(argc);
 | 
			
		||||
 | 
			
		||||
  int start_cut_size=atoi(argv[2]);
 | 
			
		||||
  int stop_cut_size=atoi(argv[3]);
 | 
			
		||||
 | 
			
		||||
  string filename=argv[1];
 | 
			
		||||
  string regularize_filename=argv[4];
 | 
			
		||||
  
 | 
			
		||||
  INFOS(filename);
 | 
			
		||||
  INFOS("start_cut_size="<<start_cut_size);
 | 
			
		||||
 | 
			
		||||
  vector<int> tab_sizes;
 | 
			
		||||
  vector<double> tab_mflops;
 | 
			
		||||
 | 
			
		||||
  read_xy_file(filename,tab_sizes,tab_mflops);
 | 
			
		||||
 | 
			
		||||
  // regularizeing
 | 
			
		||||
 | 
			
		||||
  regularize_curve(regularize_filename,tab_mflops,tab_sizes,start_cut_size,stop_cut_size);
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
void regularize_curve(const string & filename,
 | 
			
		||||
		      const vector<double> & tab_mflops, 
 | 
			
		||||
		      const vector<int> & tab_sizes, 
 | 
			
		||||
		      int start_cut_size, int stop_cut_size)
 | 
			
		||||
{
 | 
			
		||||
  int size=tab_mflops.size();
 | 
			
		||||
  ofstream output_file (filename.c_str(),ios::out) ;
 | 
			
		||||
 | 
			
		||||
  int i=0;
 | 
			
		||||
 | 
			
		||||
  while(tab_sizes[i]<start_cut_size){
 | 
			
		||||
    
 | 
			
		||||
    output_file << tab_sizes[i] << " " <<  tab_mflops[i] << endl ;
 | 
			
		||||
    i++;
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
    
 | 
			
		||||
  output_file << endl ;
 | 
			
		||||
 | 
			
		||||
  while(tab_sizes[i]<stop_cut_size){
 | 
			
		||||
    
 | 
			
		||||
    i++;
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  while(i<size){
 | 
			
		||||
    
 | 
			
		||||
    output_file << tab_sizes[i] << " " <<  tab_mflops[i] << endl ;
 | 
			
		||||
    i++;
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  output_file.close();
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops){
 | 
			
		||||
 | 
			
		||||
  ifstream input_file (filename.c_str(),ios::in) ;
 | 
			
		||||
 | 
			
		||||
  if (!input_file){
 | 
			
		||||
    INFOS("!!! Error opening "<<filename);
 | 
			
		||||
    exit(0);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  int nb_point=0;
 | 
			
		||||
  int size=0;
 | 
			
		||||
  double mflops=0;
 | 
			
		||||
 | 
			
		||||
  while (input_file >> size >> mflops ){
 | 
			
		||||
    nb_point++;
 | 
			
		||||
    tab_sizes.push_back(size);
 | 
			
		||||
    tab_mflops.push_back(mflops);
 | 
			
		||||
  }
 | 
			
		||||
  SCRUTE(nb_point);
 | 
			
		||||
 | 
			
		||||
  input_file.close();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										198
									
								
								cs440-acg/ext/eigen/bench/btl/data/smooth.cxx
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										198
									
								
								cs440-acg/ext/eigen/bench/btl/data/smooth.cxx
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,198 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  smooth.cxx
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>        
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:15 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
// 
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
// 
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
// 
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <deque>
 | 
			
		||||
#include <string>
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <fstream>
 | 
			
		||||
#include "bench_parameter.hh"
 | 
			
		||||
#include <set>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops);
 | 
			
		||||
void write_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops);
 | 
			
		||||
void smooth_curve(const vector<double> & tab_mflops, vector<double> & smooth_tab_mflops,int window_half_width);
 | 
			
		||||
void centered_smooth_curve(const vector<double> & tab_mflops, vector<double> & smooth_tab_mflops,int window_half_width);
 | 
			
		||||
 | 
			
		||||
/////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
int main( int argc , char *argv[] )
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
  // input data
 | 
			
		||||
 | 
			
		||||
  if (argc<3){
 | 
			
		||||
    INFOS("!!! Error ... usage : main filename window_half_width smooth_filename");
 | 
			
		||||
    exit(0);
 | 
			
		||||
  }
 | 
			
		||||
  INFOS(argc);
 | 
			
		||||
 | 
			
		||||
  int window_half_width=atoi(argv[2]);
 | 
			
		||||
 | 
			
		||||
  string filename=argv[1];
 | 
			
		||||
  string smooth_filename=argv[3];
 | 
			
		||||
  
 | 
			
		||||
  INFOS(filename);
 | 
			
		||||
  INFOS("window_half_width="<<window_half_width);
 | 
			
		||||
 | 
			
		||||
  vector<int> tab_sizes;
 | 
			
		||||
  vector<double> tab_mflops;
 | 
			
		||||
 | 
			
		||||
  read_xy_file(filename,tab_sizes,tab_mflops);
 | 
			
		||||
 | 
			
		||||
  // smoothing
 | 
			
		||||
 | 
			
		||||
  vector<double> smooth_tab_mflops;
 | 
			
		||||
 | 
			
		||||
  //smooth_curve(tab_mflops,smooth_tab_mflops,window_half_width);
 | 
			
		||||
  centered_smooth_curve(tab_mflops,smooth_tab_mflops,window_half_width);
 | 
			
		||||
 | 
			
		||||
  // output result
 | 
			
		||||
 | 
			
		||||
  write_xy_file(smooth_filename,tab_sizes,smooth_tab_mflops);
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
template<class VECTOR>
 | 
			
		||||
double weighted_mean(const VECTOR & data)
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
  double mean=0.0;
 | 
			
		||||
  
 | 
			
		||||
  for (int i=0 ; i<data.size() ; i++){
 | 
			
		||||
 | 
			
		||||
    mean+=data[i];
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  return mean/double(data.size()) ;
 | 
			
		||||
 | 
			
		||||
}    
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
void smooth_curve(const vector<double> & tab_mflops, vector<double> & smooth_tab_mflops,int window_half_width){
 | 
			
		||||
  
 | 
			
		||||
  int window_width=2*window_half_width+1;
 | 
			
		||||
 | 
			
		||||
  int size=tab_mflops.size();
 | 
			
		||||
 | 
			
		||||
  vector<double> sample(window_width);
 | 
			
		||||
  
 | 
			
		||||
  for (int i=0 ; i < size ; i++){
 | 
			
		||||
    
 | 
			
		||||
    for ( int j=0 ; j < window_width ; j++ ){
 | 
			
		||||
      
 | 
			
		||||
      int shifted_index=i+j-window_half_width;
 | 
			
		||||
      if (shifted_index<0) shifted_index=0;
 | 
			
		||||
      if (shifted_index>size-1) shifted_index=size-1;
 | 
			
		||||
      sample[j]=tab_mflops[shifted_index];
 | 
			
		||||
      
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    smooth_tab_mflops.push_back(weighted_mean(sample));
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void centered_smooth_curve(const vector<double> & tab_mflops, vector<double> & smooth_tab_mflops,int window_half_width){
 | 
			
		||||
  
 | 
			
		||||
  int max_window_width=2*window_half_width+1;
 | 
			
		||||
 | 
			
		||||
  int size=tab_mflops.size();
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
  for (int i=0 ; i < size ; i++){
 | 
			
		||||
 | 
			
		||||
    deque<double> sample;
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
    sample.push_back(tab_mflops[i]);
 | 
			
		||||
 | 
			
		||||
    for ( int j=1 ; j <= window_half_width ; j++ ){
 | 
			
		||||
      
 | 
			
		||||
      int before=i-j;
 | 
			
		||||
      int after=i+j;
 | 
			
		||||
      
 | 
			
		||||
      if ((before>=0)&&(after<size)) // inside of the vector
 | 
			
		||||
	{ 
 | 
			
		||||
	  sample.push_front(tab_mflops[before]);
 | 
			
		||||
	  sample.push_back(tab_mflops[after]);
 | 
			
		||||
	}
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    smooth_tab_mflops.push_back(weighted_mean(sample));
 | 
			
		||||
    
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
void write_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops){
 | 
			
		||||
 | 
			
		||||
  ofstream output_file (filename.c_str(),ios::out) ;
 | 
			
		||||
  
 | 
			
		||||
  for (int i=0 ; i < tab_sizes.size() ; i++)
 | 
			
		||||
    {
 | 
			
		||||
      output_file << tab_sizes[i] << " " <<  tab_mflops[i] << endl ;
 | 
			
		||||
    }
 | 
			
		||||
  
 | 
			
		||||
  output_file.close();
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
			
		||||
 | 
			
		||||
void read_xy_file(const string & filename, vector<int> & tab_sizes, vector<double> & tab_mflops){
 | 
			
		||||
 | 
			
		||||
  ifstream input_file (filename.c_str(),ios::in) ;
 | 
			
		||||
 | 
			
		||||
  if (!input_file){
 | 
			
		||||
    INFOS("!!! Error opening "<<filename);
 | 
			
		||||
    exit(0);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  int nb_point=0;
 | 
			
		||||
  int size=0;
 | 
			
		||||
  double mflops=0;
 | 
			
		||||
 | 
			
		||||
  while (input_file >> size >> mflops ){
 | 
			
		||||
    nb_point++;
 | 
			
		||||
    tab_sizes.push_back(size);
 | 
			
		||||
    tab_mflops.push_back(mflops);
 | 
			
		||||
  }
 | 
			
		||||
  SCRUTE(nb_point);
 | 
			
		||||
 | 
			
		||||
  input_file.close();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										68
									
								
								cs440-acg/ext/eigen/bench/btl/data/smooth_all.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										68
									
								
								cs440-acg/ext/eigen/bench/btl/data/smooth_all.sh
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,68 @@
 | 
			
		||||
#! /bin/bash
 | 
			
		||||
ORIG_DIR=$1
 | 
			
		||||
SMOOTH_DIR=${ORIG_DIR}_smooth
 | 
			
		||||
mkdir ${SMOOTH_DIR}
 | 
			
		||||
 | 
			
		||||
AXPY_FILE=`find ${ORIG_DIR} -name "*.dat" | grep axpy`
 | 
			
		||||
for FILE in ${AXPY_FILE}
 | 
			
		||||
do
 | 
			
		||||
    echo $FILE
 | 
			
		||||
    BASE=${FILE##*/}
 | 
			
		||||
    ./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE}_tmp
 | 
			
		||||
    ./regularize ${SMOOTH_DIR}/${BASE}_tmp 2500 15000 ${SMOOTH_DIR}/${BASE}
 | 
			
		||||
    rm -f  ${SMOOTH_DIR}/${BASE}_tmp
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
MATRIX_VECTOR_FILE=`find ${ORIG_DIR} -name "*.dat" | grep matrix_vector`
 | 
			
		||||
for FILE in ${MATRIX_VECTOR_FILE}
 | 
			
		||||
do
 | 
			
		||||
    echo $FILE
 | 
			
		||||
    BASE=${FILE##*/}
 | 
			
		||||
    ./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE}_tmp
 | 
			
		||||
    ./regularize ${SMOOTH_DIR}/${BASE}_tmp 50 180 ${SMOOTH_DIR}/${BASE}
 | 
			
		||||
    rm -f  ${SMOOTH_DIR}/${BASE}_tmp
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
MATRIX_MATRIX_FILE=`find ${ORIG_DIR} -name "*.dat" | grep matrix_matrix`
 | 
			
		||||
for FILE in ${MATRIX_MATRIX_FILE}
 | 
			
		||||
do
 | 
			
		||||
    echo $FILE
 | 
			
		||||
    BASE=${FILE##*/}
 | 
			
		||||
    ./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE}
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
AAT_FILE=`find ${ORIG_DIR} -name "*.dat" | grep _aat`
 | 
			
		||||
for FILE in ${AAT_FILE}
 | 
			
		||||
do
 | 
			
		||||
    echo $FILE
 | 
			
		||||
    BASE=${FILE##*/}
 | 
			
		||||
    ./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE}
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
ATA_FILE=`find ${ORIG_DIR} -name "*.dat" | grep _ata`
 | 
			
		||||
for FILE in ${ATA_FILE}
 | 
			
		||||
do
 | 
			
		||||
    echo $FILE
 | 
			
		||||
    BASE=${FILE##*/}
 | 
			
		||||
    ./smooth ${ORIG_DIR}/${BASE} 4 ${SMOOTH_DIR}/${BASE}
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
### no smoothing for tinyvector and matrices libs
 | 
			
		||||
 | 
			
		||||
TINY_BLITZ_FILE=`find ${ORIG_DIR} -name "*.dat" | grep tiny_blitz`
 | 
			
		||||
for FILE in ${TINY_BLITZ_FILE}
 | 
			
		||||
do
 | 
			
		||||
    echo $FILE
 | 
			
		||||
    BASE=${FILE##*/}
 | 
			
		||||
    cp ${ORIG_DIR}/${BASE} ${SMOOTH_DIR}/${BASE}
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
TVMET_FILE=`find ${ORIG_DIR} -name "*.dat" | grep tvmet`
 | 
			
		||||
for FILE in ${TVMET_FILE}
 | 
			
		||||
do
 | 
			
		||||
    echo $FILE
 | 
			
		||||
    BASE=${FILE##*/}
 | 
			
		||||
    cp ${ORIG_DIR}/${BASE} ${SMOOTH_DIR}/${BASE}
 | 
			
		||||
done
 | 
			
		||||
							
								
								
									
										168
									
								
								cs440-acg/ext/eigen/bench/btl/generic_bench/bench.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										168
									
								
								cs440-acg/ext/eigen/bench/btl/generic_bench/bench.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,168 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  bench.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:16 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef BENCH_HH
 | 
			
		||||
#define BENCH_HH
 | 
			
		||||
 | 
			
		||||
#include "btl.hh"
 | 
			
		||||
#include "bench_parameter.hh"
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "size_lin_log.hh"
 | 
			
		||||
#include "xy_file.hh"
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <string>
 | 
			
		||||
#include "timers/portable_perf_analyzer.hh"
 | 
			
		||||
// #include "timers/mixed_perf_analyzer.hh"
 | 
			
		||||
// #include "timers/x86_perf_analyzer.hh"
 | 
			
		||||
// #include "timers/STL_perf_analyzer.hh"
 | 
			
		||||
#ifdef HAVE_MKL
 | 
			
		||||
extern "C" void cblas_saxpy(const int, const float, const float*, const int, float *, const int);
 | 
			
		||||
#endif
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
template <template<class> class Perf_Analyzer, class Action>
 | 
			
		||||
BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point )
 | 
			
		||||
{
 | 
			
		||||
  if (BtlConfig::skipAction(Action::name()))
 | 
			
		||||
    return;
 | 
			
		||||
 | 
			
		||||
  string filename="bench_"+Action::name()+".dat";
 | 
			
		||||
 | 
			
		||||
  INFOS("starting " <<filename);
 | 
			
		||||
 | 
			
		||||
  // utilities
 | 
			
		||||
 | 
			
		||||
  std::vector<double> tab_mflops(nb_point);
 | 
			
		||||
  std::vector<int> tab_sizes(nb_point);
 | 
			
		||||
 | 
			
		||||
  // matrices and vector size calculations
 | 
			
		||||
  size_lin_log(nb_point,size_min,size_max,tab_sizes);
 | 
			
		||||
 | 
			
		||||
  std::vector<int> oldSizes;
 | 
			
		||||
  std::vector<double> oldFlops;
 | 
			
		||||
  bool hasOldResults = read_xy_file(filename, oldSizes, oldFlops, true);
 | 
			
		||||
  int oldi = oldSizes.size() - 1;
 | 
			
		||||
 | 
			
		||||
  // loop on matrix size
 | 
			
		||||
  Perf_Analyzer<Action> perf_action;
 | 
			
		||||
  for (int i=nb_point-1;i>=0;i--)
 | 
			
		||||
  {
 | 
			
		||||
    //INFOS("size=" <<tab_sizes[i]<<"   ("<<nb_point-i<<"/"<<nb_point<<")");
 | 
			
		||||
    std::cout << " " << "size = " << tab_sizes[i] << "  " << std::flush;
 | 
			
		||||
 | 
			
		||||
    BTL_DISABLE_SSE_EXCEPTIONS();
 | 
			
		||||
    #ifdef HAVE_MKL
 | 
			
		||||
    {
 | 
			
		||||
      float dummy;
 | 
			
		||||
      cblas_saxpy(1,0,&dummy,1,&dummy,1);
 | 
			
		||||
    }
 | 
			
		||||
    #endif
 | 
			
		||||
 | 
			
		||||
    tab_mflops[i] = perf_action.eval_mflops(tab_sizes[i]);
 | 
			
		||||
    std::cout << tab_mflops[i];
 | 
			
		||||
    
 | 
			
		||||
    if (hasOldResults)
 | 
			
		||||
    {
 | 
			
		||||
      while (oldi>=0 && oldSizes[oldi]>tab_sizes[i])
 | 
			
		||||
        --oldi;
 | 
			
		||||
      if (oldi>=0 && oldSizes[oldi]==tab_sizes[i])
 | 
			
		||||
      {
 | 
			
		||||
        if (oldFlops[oldi]<tab_mflops[i])
 | 
			
		||||
          std::cout << "\t > ";
 | 
			
		||||
        else
 | 
			
		||||
          std::cout << "\t < ";
 | 
			
		||||
        std::cout << oldFlops[oldi];
 | 
			
		||||
      }
 | 
			
		||||
      --oldi;
 | 
			
		||||
    }
 | 
			
		||||
    std::cout << " MFlops    (" << nb_point-i << "/" << nb_point << ")" << std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (!BtlConfig::Instance.overwriteResults)
 | 
			
		||||
  {
 | 
			
		||||
    if (hasOldResults)
 | 
			
		||||
    {
 | 
			
		||||
      // merge the two data
 | 
			
		||||
      std::vector<int> newSizes;
 | 
			
		||||
      std::vector<double> newFlops;
 | 
			
		||||
      unsigned int i=0;
 | 
			
		||||
      unsigned int j=0;
 | 
			
		||||
      while (i<tab_sizes.size() && j<oldSizes.size())
 | 
			
		||||
      {
 | 
			
		||||
        if (tab_sizes[i] == oldSizes[j])
 | 
			
		||||
        {
 | 
			
		||||
          newSizes.push_back(tab_sizes[i]);
 | 
			
		||||
          newFlops.push_back(std::max(tab_mflops[i], oldFlops[j]));
 | 
			
		||||
          ++i;
 | 
			
		||||
          ++j;
 | 
			
		||||
        }
 | 
			
		||||
        else if (tab_sizes[i] < oldSizes[j])
 | 
			
		||||
        {
 | 
			
		||||
          newSizes.push_back(tab_sizes[i]);
 | 
			
		||||
          newFlops.push_back(tab_mflops[i]);
 | 
			
		||||
          ++i;
 | 
			
		||||
        }
 | 
			
		||||
        else
 | 
			
		||||
        {
 | 
			
		||||
          newSizes.push_back(oldSizes[j]);
 | 
			
		||||
          newFlops.push_back(oldFlops[j]);
 | 
			
		||||
          ++j;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      while (i<tab_sizes.size())
 | 
			
		||||
      {
 | 
			
		||||
        newSizes.push_back(tab_sizes[i]);
 | 
			
		||||
        newFlops.push_back(tab_mflops[i]);
 | 
			
		||||
        ++i;
 | 
			
		||||
      }
 | 
			
		||||
      while (j<oldSizes.size())
 | 
			
		||||
      {
 | 
			
		||||
        newSizes.push_back(oldSizes[j]);
 | 
			
		||||
        newFlops.push_back(oldFlops[j]);
 | 
			
		||||
        ++j;
 | 
			
		||||
      }
 | 
			
		||||
      tab_mflops = newFlops;
 | 
			
		||||
      tab_sizes = newSizes;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // dump the result in a file  :
 | 
			
		||||
  dump_xy_file(tab_sizes,tab_mflops,filename);
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// default Perf Analyzer
 | 
			
		||||
 | 
			
		||||
template <class Action>
 | 
			
		||||
BTL_DONT_INLINE void bench( int size_min, int size_max, int nb_point ){
 | 
			
		||||
 | 
			
		||||
  // if the rdtsc is not available :
 | 
			
		||||
  bench<Portable_Perf_Analyzer,Action>(size_min,size_max,nb_point);
 | 
			
		||||
  // if the rdtsc is available :
 | 
			
		||||
//    bench<Mixed_Perf_Analyzer,Action>(size_min,size_max,nb_point);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  // Only for small problem size. Otherwize it will be too long
 | 
			
		||||
//   bench<X86_Perf_Analyzer,Action>(size_min,size_max,nb_point);
 | 
			
		||||
//   bench<STL_Perf_Analyzer,Action>(size_min,size_max,nb_point);
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -0,0 +1,53 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  bench_parameter.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:16 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef BENCH_PARAMETER_HH
 | 
			
		||||
#define BENCH_PARAMETER_HH
 | 
			
		||||
 | 
			
		||||
// minimal time for each measurement
 | 
			
		||||
#define REAL_TYPE float
 | 
			
		||||
// minimal time for each measurement
 | 
			
		||||
#define MIN_TIME 0.2
 | 
			
		||||
// nb of point on bench curves
 | 
			
		||||
#define NB_POINT 100
 | 
			
		||||
// min vector size for axpy bench
 | 
			
		||||
#define MIN_AXPY 5
 | 
			
		||||
// max vector size for axpy bench
 | 
			
		||||
#define MAX_AXPY 3000000
 | 
			
		||||
// min matrix size for matrix vector product bench
 | 
			
		||||
#define MIN_MV 5
 | 
			
		||||
// max matrix size for matrix vector product bench
 | 
			
		||||
#define MAX_MV 5000
 | 
			
		||||
// min matrix size for matrix matrix product bench
 | 
			
		||||
#define MIN_MM 5
 | 
			
		||||
// max matrix size for matrix matrix product bench
 | 
			
		||||
#define MAX_MM MAX_MV
 | 
			
		||||
// min matrix size for LU bench
 | 
			
		||||
#define MIN_LU 5
 | 
			
		||||
// max matrix size for LU bench
 | 
			
		||||
#define MAX_LU 3000
 | 
			
		||||
// max size for tiny vector and matrix
 | 
			
		||||
#define TINY_MV_MAX_SIZE 16
 | 
			
		||||
// default nb_sample for x86 timer
 | 
			
		||||
#define DEFAULT_NB_SAMPLE 1000
 | 
			
		||||
 | 
			
		||||
// how many times we run a single bench (keep the best perf)
 | 
			
		||||
#define DEFAULT_NB_TRIES 3
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										242
									
								
								cs440-acg/ext/eigen/bench/btl/generic_bench/btl.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										242
									
								
								cs440-acg/ext/eigen/bench/btl/generic_bench/btl.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,242 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  btl.hh
 | 
			
		||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef BTL_HH
 | 
			
		||||
#define BTL_HH
 | 
			
		||||
 | 
			
		||||
#include "bench_parameter.hh"
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <algorithm>
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <string>
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
 | 
			
		||||
#if (defined __GNUC__)
 | 
			
		||||
#define BTL_ALWAYS_INLINE __attribute__((always_inline)) inline
 | 
			
		||||
#else
 | 
			
		||||
#define BTL_ALWAYS_INLINE inline
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if (defined __GNUC__)
 | 
			
		||||
#define BTL_DONT_INLINE __attribute__((noinline))
 | 
			
		||||
#else
 | 
			
		||||
#define BTL_DONT_INLINE
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if (defined __GNUC__)
 | 
			
		||||
#define BTL_ASM_COMMENT(X)  asm("#" X)
 | 
			
		||||
#else
 | 
			
		||||
#define BTL_ASM_COMMENT(X)
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef __SSE__
 | 
			
		||||
#include "xmmintrin.h"
 | 
			
		||||
// This enables flush to zero (FTZ) and denormals are zero (DAZ) modes:
 | 
			
		||||
#define BTL_DISABLE_SSE_EXCEPTIONS()  { _mm_setcsr(_mm_getcsr() | 0x8040); }
 | 
			
		||||
#else
 | 
			
		||||
#define BTL_DISABLE_SSE_EXCEPTIONS()
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/** Enhanced std::string
 | 
			
		||||
*/
 | 
			
		||||
class BtlString : public std::string
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
    BtlString() : std::string() {}
 | 
			
		||||
    BtlString(const BtlString& str) : std::string(static_cast<const std::string&>(str)) {}
 | 
			
		||||
    BtlString(const std::string& str) : std::string(str) {}
 | 
			
		||||
    BtlString(const char* str) : std::string(str) {}
 | 
			
		||||
 | 
			
		||||
    operator const char* () const { return c_str(); }
 | 
			
		||||
 | 
			
		||||
    void trim( bool left = true, bool right = true )
 | 
			
		||||
    {
 | 
			
		||||
        int lspaces, rspaces, len = length(), i;
 | 
			
		||||
        lspaces = rspaces = 0;
 | 
			
		||||
 | 
			
		||||
        if ( left )
 | 
			
		||||
            for (i=0; i<len && (at(i)==' '||at(i)=='\t'||at(i)=='\r'||at(i)=='\n'); ++lspaces,++i);
 | 
			
		||||
 | 
			
		||||
        if ( right && lspaces < len )
 | 
			
		||||
            for(i=len-1; i>=0 && (at(i)==' '||at(i)=='\t'||at(i)=='\r'||at(i)=='\n'); rspaces++,i--);
 | 
			
		||||
 | 
			
		||||
        *this = substr(lspaces, len-lspaces-rspaces);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    std::vector<BtlString> split( const BtlString& delims = "\t\n ") const
 | 
			
		||||
    {
 | 
			
		||||
        std::vector<BtlString> ret;
 | 
			
		||||
        unsigned int numSplits = 0;
 | 
			
		||||
        size_t start, pos;
 | 
			
		||||
        start = 0;
 | 
			
		||||
        do
 | 
			
		||||
        {
 | 
			
		||||
            pos = find_first_of(delims, start);
 | 
			
		||||
            if (pos == start)
 | 
			
		||||
            {
 | 
			
		||||
                ret.push_back("");
 | 
			
		||||
                start = pos + 1;
 | 
			
		||||
            }
 | 
			
		||||
            else if (pos == npos)
 | 
			
		||||
                ret.push_back( substr(start) );
 | 
			
		||||
            else
 | 
			
		||||
            {
 | 
			
		||||
                ret.push_back( substr(start, pos - start) );
 | 
			
		||||
                start = pos + 1;
 | 
			
		||||
            }
 | 
			
		||||
            //start = find_first_not_of(delims, start);
 | 
			
		||||
            ++numSplits;
 | 
			
		||||
        } while (pos != npos);
 | 
			
		||||
        return ret;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    bool endsWith(const BtlString& str) const
 | 
			
		||||
    {
 | 
			
		||||
        if(str.size()>this->size())
 | 
			
		||||
            return false;
 | 
			
		||||
        return this->substr(this->size()-str.size(),str.size()) == str;
 | 
			
		||||
    }
 | 
			
		||||
    bool contains(const BtlString& str) const
 | 
			
		||||
    {
 | 
			
		||||
        return this->find(str)<this->size();
 | 
			
		||||
    }
 | 
			
		||||
    bool beginsWith(const BtlString& str) const
 | 
			
		||||
    {
 | 
			
		||||
        if(str.size()>this->size())
 | 
			
		||||
            return false;
 | 
			
		||||
        return this->substr(0,str.size()) == str;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    BtlString toLowerCase( void )
 | 
			
		||||
    {
 | 
			
		||||
        std::transform(begin(), end(), begin(), static_cast<int(*)(int)>(::tolower) );
 | 
			
		||||
        return *this;
 | 
			
		||||
    }
 | 
			
		||||
    BtlString toUpperCase( void )
 | 
			
		||||
    {
 | 
			
		||||
        std::transform(begin(), end(), begin(), static_cast<int(*)(int)>(::toupper) );
 | 
			
		||||
        return *this;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /** Case insensitive comparison.
 | 
			
		||||
    */
 | 
			
		||||
    bool isEquiv(const BtlString& str) const
 | 
			
		||||
    {
 | 
			
		||||
        BtlString str0 = *this;
 | 
			
		||||
        str0.toLowerCase();
 | 
			
		||||
        BtlString str1 = str;
 | 
			
		||||
        str1.toLowerCase();
 | 
			
		||||
        return str0 == str1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /** Decompose the current string as a path and a file.
 | 
			
		||||
        For instance: "dir1/dir2/file.ext" leads to path="dir1/dir2/" and filename="file.ext"
 | 
			
		||||
    */
 | 
			
		||||
    void decomposePathAndFile(BtlString& path, BtlString& filename) const
 | 
			
		||||
    {
 | 
			
		||||
        std::vector<BtlString> elements = this->split("/\\");
 | 
			
		||||
        path = "";
 | 
			
		||||
        filename = elements.back();
 | 
			
		||||
        elements.pop_back();
 | 
			
		||||
        if (this->at(0)=='/')
 | 
			
		||||
            path = "/";
 | 
			
		||||
        for (unsigned int i=0 ; i<elements.size() ; ++i)
 | 
			
		||||
            path += elements[i] + "/";
 | 
			
		||||
    }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class BtlConfig
 | 
			
		||||
{
 | 
			
		||||
public:
 | 
			
		||||
  BtlConfig()
 | 
			
		||||
    : overwriteResults(false), checkResults(true), realclock(false), tries(DEFAULT_NB_TRIES)
 | 
			
		||||
  {
 | 
			
		||||
    char * _config;
 | 
			
		||||
    _config = getenv ("BTL_CONFIG");
 | 
			
		||||
    if (_config!=NULL)
 | 
			
		||||
    {
 | 
			
		||||
      std::vector<BtlString> config = BtlString(_config).split(" \t\n");
 | 
			
		||||
      for (unsigned int i = 0; i<config.size(); i++)
 | 
			
		||||
      {
 | 
			
		||||
        if (config[i].beginsWith("-a"))
 | 
			
		||||
        {
 | 
			
		||||
          if (i+1==config.size())
 | 
			
		||||
          {
 | 
			
		||||
            std::cerr << "error processing option: " << config[i] << "\n";
 | 
			
		||||
            exit(2);
 | 
			
		||||
          }
 | 
			
		||||
          Instance.m_selectedActionNames = config[i+1].split(":");
 | 
			
		||||
 | 
			
		||||
          i += 1;
 | 
			
		||||
        }
 | 
			
		||||
        else if (config[i].beginsWith("-t"))
 | 
			
		||||
        {
 | 
			
		||||
          if (i+1==config.size())
 | 
			
		||||
          {
 | 
			
		||||
            std::cerr << "error processing option: " << config[i] << "\n";
 | 
			
		||||
            exit(2);
 | 
			
		||||
          }
 | 
			
		||||
          Instance.tries = atoi(config[i+1].c_str());
 | 
			
		||||
 | 
			
		||||
          i += 1;
 | 
			
		||||
        }
 | 
			
		||||
        else if (config[i].beginsWith("--overwrite"))
 | 
			
		||||
        {
 | 
			
		||||
          Instance.overwriteResults = true;
 | 
			
		||||
        }
 | 
			
		||||
        else if (config[i].beginsWith("--nocheck"))
 | 
			
		||||
        {
 | 
			
		||||
          Instance.checkResults = false;
 | 
			
		||||
        }
 | 
			
		||||
        else if (config[i].beginsWith("--real"))
 | 
			
		||||
        {
 | 
			
		||||
          Instance.realclock = true;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    BTL_DISABLE_SSE_EXCEPTIONS();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  BTL_DONT_INLINE static bool skipAction(const std::string& _name)
 | 
			
		||||
  {
 | 
			
		||||
    if (Instance.m_selectedActionNames.empty())
 | 
			
		||||
      return false;
 | 
			
		||||
 | 
			
		||||
    BtlString name(_name);
 | 
			
		||||
    for (unsigned int i=0; i<Instance.m_selectedActionNames.size(); ++i)
 | 
			
		||||
      if (name.contains(Instance.m_selectedActionNames[i]))
 | 
			
		||||
        return false;
 | 
			
		||||
 | 
			
		||||
    return true;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static BtlConfig Instance;
 | 
			
		||||
  bool overwriteResults;
 | 
			
		||||
  bool checkResults;
 | 
			
		||||
  bool realclock;
 | 
			
		||||
  int tries;
 | 
			
		||||
 | 
			
		||||
protected:
 | 
			
		||||
  std::vector<BtlString> m_selectedActionNames;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#define BTL_MAIN \
 | 
			
		||||
  BtlConfig BtlConfig::Instance
 | 
			
		||||
 | 
			
		||||
#endif // BTL_HH
 | 
			
		||||
@@ -0,0 +1,54 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  init_function.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>        
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:18 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
// 
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
// 
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
// 
 | 
			
		||||
#ifndef INIT_FUNCTION_HH
 | 
			
		||||
#define INIT_FUNCTION_HH
 | 
			
		||||
 | 
			
		||||
double simple_function(int index)
 | 
			
		||||
{
 | 
			
		||||
  return index;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
double simple_function(int index_i, int index_j)
 | 
			
		||||
{
 | 
			
		||||
  return index_i+index_j;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
double pseudo_random(int /*index*/)
 | 
			
		||||
{
 | 
			
		||||
  return std::rand()/double(RAND_MAX);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
double pseudo_random(int /*index_i*/, int /*index_j*/)
 | 
			
		||||
{
 | 
			
		||||
  return std::rand()/double(RAND_MAX);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
double null_function(int /*index*/)
 | 
			
		||||
{
 | 
			
		||||
  return 0.0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
double null_function(int /*index_i*/, int /*index_j*/)
 | 
			
		||||
{
 | 
			
		||||
  return 0.0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -0,0 +1,64 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  init_matrix.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:19 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef INIT_MATRIX_HH
 | 
			
		||||
#define INIT_MATRIX_HH
 | 
			
		||||
 | 
			
		||||
// The Vector class must satisfy the following part of STL vector concept :
 | 
			
		||||
//            resize() method
 | 
			
		||||
//            [] operator for setting element
 | 
			
		||||
//            value_type defined
 | 
			
		||||
template<double init_function(int,int), class Vector>
 | 
			
		||||
BTL_DONT_INLINE void init_row(Vector & X, int size, int row){
 | 
			
		||||
 | 
			
		||||
  X.resize(size);
 | 
			
		||||
 | 
			
		||||
  for (unsigned int j=0;j<X.size();j++){
 | 
			
		||||
    X[j]=typename Vector::value_type(init_function(row,j));
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// Matrix is a Vector of Vector
 | 
			
		||||
// The Matrix class must satisfy the following part of STL vector concept :
 | 
			
		||||
//            resize() method
 | 
			
		||||
//            [] operator for setting rows
 | 
			
		||||
template<double init_function(int,int),class Vector>
 | 
			
		||||
BTL_DONT_INLINE void init_matrix(Vector &  A, int size){
 | 
			
		||||
  A.resize(size);
 | 
			
		||||
  for (unsigned int row=0; row<A.size() ; row++){
 | 
			
		||||
    init_row<init_function>(A[row],size,row);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<double init_function(int,int),class Matrix>
 | 
			
		||||
BTL_DONT_INLINE void init_matrix_symm(Matrix&  A, int size){
 | 
			
		||||
  A.resize(size);
 | 
			
		||||
  for (unsigned int row=0; row<A.size() ; row++)
 | 
			
		||||
    A[row].resize(size);
 | 
			
		||||
  for (unsigned int row=0; row<A.size() ; row++){
 | 
			
		||||
    A[row][row] = init_function(row,row);
 | 
			
		||||
    for (unsigned int col=0; col<row ; col++){
 | 
			
		||||
      double x = init_function(row,col);
 | 
			
		||||
      A[row][col] = A[col][row] = x;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -0,0 +1,37 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  init_vector.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:18 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef INIT_VECTOR_HH
 | 
			
		||||
#define INIT_VECTOR_HH
 | 
			
		||||
 | 
			
		||||
// The Vector class must satisfy the following part of STL vector concept :
 | 
			
		||||
//            resize() method
 | 
			
		||||
//            [] operator for setting element
 | 
			
		||||
//            value_type defined
 | 
			
		||||
template<double init_function(int), class Vector>
 | 
			
		||||
void init_vector(Vector & X, int size){
 | 
			
		||||
 | 
			
		||||
  X.resize(size);
 | 
			
		||||
 | 
			
		||||
  for (unsigned int i=0;i<X.size();i++){
 | 
			
		||||
    X[i]=typename Vector::value_type(init_function(i));
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -0,0 +1,80 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  bench_static.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:16 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef BENCH_STATIC_HH
 | 
			
		||||
#define BENCH_STATIC_HH
 | 
			
		||||
 | 
			
		||||
#include "btl.hh"
 | 
			
		||||
#include "bench_parameter.hh"
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "xy_file.hh"
 | 
			
		||||
#include "static/static_size_generator.hh"
 | 
			
		||||
#include "timers/portable_perf_analyzer.hh"
 | 
			
		||||
// #include "timers/mixed_perf_analyzer.hh"
 | 
			
		||||
// #include "timers/x86_perf_analyzer.hh"
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template <template<class> class Perf_Analyzer, template<class> class Action, template<class,int> class Interface>
 | 
			
		||||
BTL_DONT_INLINE  void bench_static(void)
 | 
			
		||||
{
 | 
			
		||||
  if (BtlConfig::skipAction(Action<Interface<REAL_TYPE,10> >::name()))
 | 
			
		||||
    return;
 | 
			
		||||
 | 
			
		||||
  string filename = "bench_" + Action<Interface<REAL_TYPE,10> >::name() + ".dat";
 | 
			
		||||
 | 
			
		||||
  INFOS("starting " << filename);
 | 
			
		||||
 | 
			
		||||
  const int max_size = TINY_MV_MAX_SIZE;
 | 
			
		||||
 | 
			
		||||
  std::vector<double> tab_mflops;
 | 
			
		||||
  std::vector<double> tab_sizes;
 | 
			
		||||
 | 
			
		||||
  static_size_generator<max_size,Perf_Analyzer,Action,Interface>::go(tab_sizes,tab_mflops);
 | 
			
		||||
 | 
			
		||||
  dump_xy_file(tab_sizes,tab_mflops,filename);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// default Perf Analyzer
 | 
			
		||||
template <template<class> class Action, template<class,int> class Interface>
 | 
			
		||||
BTL_DONT_INLINE  void bench_static(void)
 | 
			
		||||
{
 | 
			
		||||
  bench_static<Portable_Perf_Analyzer,Action,Interface>();
 | 
			
		||||
  //bench_static<Mixed_Perf_Analyzer,Action,Interface>();
 | 
			
		||||
  //bench_static<X86_Perf_Analyzer,Action,Interface>();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,66 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  intel_bench_fixed_size.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>        
 | 
			
		||||
// Copyright (C) EDF R&D,  mar d<>c 3 18:59:37 CET 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
// 
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
// 
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
// 
 | 
			
		||||
#ifndef _BENCH_FIXED_SIZE_HH_
 | 
			
		||||
#define _BENCH_FIXED_SIZE_HH_
 | 
			
		||||
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "function_time.hh"
 | 
			
		||||
 | 
			
		||||
template <class Action>
 | 
			
		||||
double bench_fixed_size(int size, unsigned long long  & nb_calc,unsigned long long & nb_init)
 | 
			
		||||
{
 | 
			
		||||
  
 | 
			
		||||
  Action action(size);
 | 
			
		||||
  
 | 
			
		||||
  double time_baseline=time_init(nb_init,action);
 | 
			
		||||
 | 
			
		||||
  while (time_baseline < MIN_TIME) {
 | 
			
		||||
 | 
			
		||||
    //INFOS("nb_init="<<nb_init);
 | 
			
		||||
    //INFOS("time_baseline="<<time_baseline);
 | 
			
		||||
    nb_init*=2;
 | 
			
		||||
    time_baseline=time_init(nb_init,action);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  time_baseline=time_baseline/(double(nb_init));
 | 
			
		||||
  
 | 
			
		||||
  double time_action=time_calculate(nb_calc,action);
 | 
			
		||||
  
 | 
			
		||||
  while (time_action < MIN_TIME) {
 | 
			
		||||
    
 | 
			
		||||
    nb_calc*=2;
 | 
			
		||||
    time_action=time_calculate(nb_calc,action);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  INFOS("nb_init="<<nb_init);
 | 
			
		||||
  INFOS("nb_calc="<<nb_calc);
 | 
			
		||||
  
 | 
			
		||||
  
 | 
			
		||||
  time_action=time_action/(double(nb_calc));
 | 
			
		||||
  
 | 
			
		||||
  action.check_result();
 | 
			
		||||
  
 | 
			
		||||
  time_action=time_action-time_baseline;
 | 
			
		||||
 | 
			
		||||
  return action.nb_op_base()/(time_action*1000000.0);
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -0,0 +1,57 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  static_size_generator.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>        
 | 
			
		||||
// Copyright (C) EDF R&D,  mar d<>c 3 18:59:36 CET 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
// 
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
// 
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
// 
 | 
			
		||||
#ifndef _STATIC_SIZE_GENERATOR_HH
 | 
			
		||||
#define _STATIC_SIZE_GENERATOR_HH
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
//recursive generation of statically defined matrix and vector sizes
 | 
			
		||||
 | 
			
		||||
template <int SIZE,template<class> class Perf_Analyzer, template<class> class Action, template<class,int> class Interface> 
 | 
			
		||||
struct static_size_generator{
 | 
			
		||||
  static void go(vector<double> & tab_sizes, vector<double> & tab_mflops)
 | 
			
		||||
  {
 | 
			
		||||
    tab_sizes.push_back(SIZE);
 | 
			
		||||
    std::cout << tab_sizes.back() << " \t" << std::flush;
 | 
			
		||||
    Perf_Analyzer<Action<Interface<REAL_TYPE,SIZE> > > perf_action;
 | 
			
		||||
    tab_mflops.push_back(perf_action.eval_mflops(SIZE));
 | 
			
		||||
    std::cout << tab_mflops.back() << " MFlops" << std::endl;
 | 
			
		||||
    static_size_generator<SIZE-1,Perf_Analyzer,Action,Interface>::go(tab_sizes,tab_mflops);
 | 
			
		||||
  };
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
//recursion end
 | 
			
		||||
 | 
			
		||||
template <template<class> class Perf_Analyzer, template<class> class Action, template<class,int> class Interface> 
 | 
			
		||||
struct static_size_generator<1,Perf_Analyzer,Action,Interface>{  
 | 
			
		||||
  static  void go(vector<double> & tab_sizes, vector<double> & tab_mflops)
 | 
			
		||||
  {
 | 
			
		||||
    tab_sizes.push_back(1);
 | 
			
		||||
    Perf_Analyzer<Action<Interface<REAL_TYPE,1> > > perf_action;
 | 
			
		||||
    tab_mflops.push_back(perf_action.eval_mflops(1));
 | 
			
		||||
  };
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
  
 | 
			
		||||
  
 | 
			
		||||
  
 | 
			
		||||
  
 | 
			
		||||
@@ -0,0 +1,82 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  STL_perf_analyzer.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>        
 | 
			
		||||
// Copyright (C) EDF R&D,  mar d<>c 3 18:59:35 CET 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
// 
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
// 
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
// 
 | 
			
		||||
#ifndef _STL_PERF_ANALYSER_HH
 | 
			
		||||
#define _STL_PERF_ANALYSER_HH
 | 
			
		||||
 | 
			
		||||
#include "STL_timer.hh"
 | 
			
		||||
#include "bench_parameter.hh"
 | 
			
		||||
 | 
			
		||||
template<class ACTION>
 | 
			
		||||
class STL_Perf_Analyzer{
 | 
			
		||||
public:  
 | 
			
		||||
  STL_Perf_Analyzer(unsigned long long nb_sample=DEFAULT_NB_SAMPLE):_nb_sample(nb_sample),_chronos()
 | 
			
		||||
  {
 | 
			
		||||
    MESSAGE("STL_Perf_Analyzer Ctor");
 | 
			
		||||
  }; 
 | 
			
		||||
  STL_Perf_Analyzer( const STL_Perf_Analyzer & ){
 | 
			
		||||
    INFOS("Copy Ctor not implemented");
 | 
			
		||||
    exit(0);
 | 
			
		||||
  };
 | 
			
		||||
  ~STL_Perf_Analyzer( void ){
 | 
			
		||||
    MESSAGE("STL_Perf_Analyzer Dtor");
 | 
			
		||||
  };
 | 
			
		||||
  
 | 
			
		||||
  
 | 
			
		||||
  inline double eval_mflops(int size)
 | 
			
		||||
  {
 | 
			
		||||
 | 
			
		||||
    ACTION action(size);
 | 
			
		||||
 | 
			
		||||
    _chronos.start_baseline(_nb_sample);
 | 
			
		||||
      
 | 
			
		||||
    do {
 | 
			
		||||
 | 
			
		||||
      action.initialize();
 | 
			
		||||
    } while (_chronos.check());
 | 
			
		||||
 | 
			
		||||
    double baseline_time=_chronos.get_time();
 | 
			
		||||
 | 
			
		||||
    _chronos.start(_nb_sample);
 | 
			
		||||
    do {
 | 
			
		||||
      action.initialize();
 | 
			
		||||
      action.calculate();
 | 
			
		||||
    } while (_chronos.check());
 | 
			
		||||
 | 
			
		||||
    double calculate_time=_chronos.get_time();
 | 
			
		||||
 | 
			
		||||
    double corrected_time=calculate_time-baseline_time;
 | 
			
		||||
    
 | 
			
		||||
    //    cout << size <<" "<<baseline_time<<" "<<calculate_time<<" "<<corrected_time<<" "<<action.nb_op_base() << endl;    
 | 
			
		||||
    
 | 
			
		||||
    return action.nb_op_base()/(corrected_time*1000000.0);
 | 
			
		||||
    //return action.nb_op_base()/(calculate_time*1000000.0);
 | 
			
		||||
    
 | 
			
		||||
  }
 | 
			
		||||
private:
 | 
			
		||||
 | 
			
		||||
  STL_Timer _chronos;
 | 
			
		||||
  unsigned long long _nb_sample;
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
  
 | 
			
		||||
#endif
 | 
			
		||||
@@ -0,0 +1,78 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  STL_Timer.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>        
 | 
			
		||||
// Copyright (C) EDF R&D,  mar d<>c 3 18:59:35 CET 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
// 
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
// 
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
// 
 | 
			
		||||
// STL Timer Class. Adapted (L.P.) from the timer class by Musser et Al
 | 
			
		||||
// described int the Book : STL Tutorial and reference guide.
 | 
			
		||||
// Define a timer class for analyzing algorithm performance.
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <iomanip>
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <map>
 | 
			
		||||
#include <algorithm>
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
class STL_Timer {
 | 
			
		||||
public:
 | 
			
		||||
  STL_Timer(){ baseline = false; };  // Default constructor
 | 
			
		||||
  // Start a series of r trials:
 | 
			
		||||
  void start(unsigned int r){
 | 
			
		||||
    reps = r;
 | 
			
		||||
    count = 0;
 | 
			
		||||
    iterations.clear();
 | 
			
		||||
    iterations.reserve(reps);
 | 
			
		||||
    initial = time(0);
 | 
			
		||||
  };
 | 
			
		||||
  // Start a series of r trials to determine baseline time:
 | 
			
		||||
  void start_baseline(unsigned int r)
 | 
			
		||||
  {
 | 
			
		||||
    baseline = true;
 | 
			
		||||
    start(r);
 | 
			
		||||
  }
 | 
			
		||||
  // Returns true if the trials have been completed, else false
 | 
			
		||||
  bool check()
 | 
			
		||||
  {
 | 
			
		||||
    ++count;
 | 
			
		||||
    final = time(0);
 | 
			
		||||
    if (initial < final) {
 | 
			
		||||
      iterations.push_back(count);  
 | 
			
		||||
      initial = final;
 | 
			
		||||
      count = 0;
 | 
			
		||||
    }
 | 
			
		||||
    return (iterations.size() < reps);
 | 
			
		||||
  };
 | 
			
		||||
  // Returns the results for external use
 | 
			
		||||
  double get_time( void )
 | 
			
		||||
  {
 | 
			
		||||
    sort(iterations.begin(), iterations.end());
 | 
			
		||||
    return 1.0/iterations[reps/2];
 | 
			
		||||
  };
 | 
			
		||||
private:
 | 
			
		||||
  unsigned int reps;  // Number of trials
 | 
			
		||||
  // For storing loop iterations of a trial
 | 
			
		||||
  vector<long> iterations;
 | 
			
		||||
  // For saving initial and final times of a trial
 | 
			
		||||
  time_t initial, final;
 | 
			
		||||
  // For counting loop iterations of a trial
 | 
			
		||||
  unsigned long count;
 | 
			
		||||
  // true if this is a baseline computation, false otherwise
 | 
			
		||||
  bool baseline;
 | 
			
		||||
  // For recording the baseline time 
 | 
			
		||||
  double baseline_time;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,73 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  mixed_perf_analyzer.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>        
 | 
			
		||||
// Copyright (C) EDF R&D,  mar d<>c 3 18:59:36 CET 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
// 
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
// 
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
// 
 | 
			
		||||
#ifndef _MIXED_PERF_ANALYSER_HH
 | 
			
		||||
#define _MIXED_PERF_ANALYSER_HH
 | 
			
		||||
 | 
			
		||||
#include "x86_perf_analyzer.hh"
 | 
			
		||||
#include "portable_perf_analyzer.hh"
 | 
			
		||||
 | 
			
		||||
// choose portable perf analyzer for long calculations and x86 analyser for short ones
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template<class Action>
 | 
			
		||||
class Mixed_Perf_Analyzer{
 | 
			
		||||
  
 | 
			
		||||
public:  
 | 
			
		||||
  Mixed_Perf_Analyzer( void ):_x86pa(),_ppa(),_use_ppa(true)
 | 
			
		||||
  {
 | 
			
		||||
    MESSAGE("Mixed_Perf_Analyzer Ctor");
 | 
			
		||||
  }; 
 | 
			
		||||
  Mixed_Perf_Analyzer( const Mixed_Perf_Analyzer & ){
 | 
			
		||||
    INFOS("Copy Ctor not implemented");
 | 
			
		||||
    exit(0);
 | 
			
		||||
  };
 | 
			
		||||
  ~Mixed_Perf_Analyzer( void ){
 | 
			
		||||
    MESSAGE("Mixed_Perf_Analyzer Dtor");
 | 
			
		||||
  };
 | 
			
		||||
    
 | 
			
		||||
  
 | 
			
		||||
  inline double eval_mflops(int size)
 | 
			
		||||
  {
 | 
			
		||||
 | 
			
		||||
    double result=0.0;
 | 
			
		||||
    if (_use_ppa){      
 | 
			
		||||
      result=_ppa.eval_mflops(size);
 | 
			
		||||
      if (_ppa.get_nb_calc()>DEFAULT_NB_SAMPLE){_use_ppa=false;}      
 | 
			
		||||
    }
 | 
			
		||||
    else{      
 | 
			
		||||
      result=_x86pa.eval_mflops(size);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return result;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
 | 
			
		||||
  Portable_Perf_Analyzer<Action> _ppa;
 | 
			
		||||
  X86_Perf_Analyzer<Action> _x86pa;
 | 
			
		||||
  bool _use_ppa;
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
    
 | 
			
		||||
  
 | 
			
		||||
@@ -0,0 +1,103 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  portable_perf_analyzer.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  mar d<>c 3 18:59:35 CET 2002
 | 
			
		||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef _PORTABLE_PERF_ANALYZER_HH
 | 
			
		||||
#define _PORTABLE_PERF_ANALYZER_HH
 | 
			
		||||
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "timers/portable_timer.hh"
 | 
			
		||||
 | 
			
		||||
template <class Action>
 | 
			
		||||
class Portable_Perf_Analyzer{
 | 
			
		||||
public:
 | 
			
		||||
  Portable_Perf_Analyzer( ):_nb_calc(0), m_time_action(0), _chronos(){
 | 
			
		||||
    MESSAGE("Portable_Perf_Analyzer Ctor");
 | 
			
		||||
  };
 | 
			
		||||
  Portable_Perf_Analyzer( const Portable_Perf_Analyzer & ){
 | 
			
		||||
    INFOS("Copy Ctor not implemented");
 | 
			
		||||
    exit(0);
 | 
			
		||||
  };
 | 
			
		||||
  ~Portable_Perf_Analyzer(){
 | 
			
		||||
    MESSAGE("Portable_Perf_Analyzer Dtor");
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  BTL_DONT_INLINE double eval_mflops(int size)
 | 
			
		||||
  {
 | 
			
		||||
    Action action(size);
 | 
			
		||||
 | 
			
		||||
//     action.initialize();
 | 
			
		||||
//     time_action = time_calculate(action);
 | 
			
		||||
    while (m_time_action < MIN_TIME)
 | 
			
		||||
    {
 | 
			
		||||
      if(_nb_calc==0) _nb_calc = 1;
 | 
			
		||||
      else            _nb_calc *= 2;
 | 
			
		||||
      action.initialize();
 | 
			
		||||
      m_time_action = time_calculate(action);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // optimize
 | 
			
		||||
    for (int i=1; i<BtlConfig::Instance.tries; ++i)
 | 
			
		||||
    {
 | 
			
		||||
      Action _action(size);
 | 
			
		||||
      std::cout << " " << _action.nb_op_base()*_nb_calc/(m_time_action*1e6) << " ";
 | 
			
		||||
      _action.initialize();
 | 
			
		||||
      m_time_action = std::min(m_time_action, time_calculate(_action));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    double time_action = m_time_action / (double(_nb_calc));
 | 
			
		||||
 | 
			
		||||
    // check
 | 
			
		||||
    if (BtlConfig::Instance.checkResults && size<128)
 | 
			
		||||
    {
 | 
			
		||||
      action.initialize();
 | 
			
		||||
      action.calculate();
 | 
			
		||||
      action.check_result();
 | 
			
		||||
    }
 | 
			
		||||
    return action.nb_op_base()/(time_action*1e6);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  BTL_DONT_INLINE double time_calculate(Action & action)
 | 
			
		||||
  {
 | 
			
		||||
    // time measurement
 | 
			
		||||
    action.calculate();
 | 
			
		||||
    _chronos.start();
 | 
			
		||||
    for (unsigned int ii=0;ii<_nb_calc;ii++)
 | 
			
		||||
    {
 | 
			
		||||
      action.calculate();
 | 
			
		||||
    }
 | 
			
		||||
    _chronos.stop();
 | 
			
		||||
    return _chronos.user_time();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  unsigned long long get_nb_calc()
 | 
			
		||||
  {
 | 
			
		||||
    return _nb_calc;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
  unsigned long long _nb_calc;
 | 
			
		||||
  double m_time_action;
 | 
			
		||||
  Portable_Timer _chronos;
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif //_PORTABLE_PERF_ANALYZER_HH
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,134 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  portable_perf_analyzer.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  mar d<>c 3 18:59:35 CET 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef _PORTABLE_PERF_ANALYZER_HH
 | 
			
		||||
#define _PORTABLE_PERF_ANALYZER_HH
 | 
			
		||||
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "timers/portable_timer.hh"
 | 
			
		||||
 | 
			
		||||
template <class Action>
 | 
			
		||||
class Portable_Perf_Analyzer{
 | 
			
		||||
public:
 | 
			
		||||
  Portable_Perf_Analyzer( void ):_nb_calc(1),_nb_init(1),_chronos(){
 | 
			
		||||
    MESSAGE("Portable_Perf_Analyzer Ctor");
 | 
			
		||||
  };
 | 
			
		||||
  Portable_Perf_Analyzer( const Portable_Perf_Analyzer & ){
 | 
			
		||||
    INFOS("Copy Ctor not implemented");
 | 
			
		||||
    exit(0);
 | 
			
		||||
  };
 | 
			
		||||
  ~Portable_Perf_Analyzer( void ){
 | 
			
		||||
    MESSAGE("Portable_Perf_Analyzer Dtor");
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  inline double eval_mflops(int size)
 | 
			
		||||
  {
 | 
			
		||||
 | 
			
		||||
    Action action(size);
 | 
			
		||||
 | 
			
		||||
//     double time_baseline = time_init(action);
 | 
			
		||||
//     while (time_baseline < MIN_TIME_INIT)
 | 
			
		||||
//     {
 | 
			
		||||
//       _nb_init *= 2;
 | 
			
		||||
//       time_baseline = time_init(action);
 | 
			
		||||
//     }
 | 
			
		||||
//
 | 
			
		||||
//     // optimize
 | 
			
		||||
//     for (int i=1; i<NB_TRIES; ++i)
 | 
			
		||||
//       time_baseline = std::min(time_baseline, time_init(action));
 | 
			
		||||
//
 | 
			
		||||
//     time_baseline = time_baseline/(double(_nb_init));
 | 
			
		||||
 | 
			
		||||
    double time_action = time_calculate(action);
 | 
			
		||||
    while (time_action < MIN_TIME)
 | 
			
		||||
    {
 | 
			
		||||
      _nb_calc *= 2;
 | 
			
		||||
      time_action = time_calculate(action);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // optimize
 | 
			
		||||
    for (int i=1; i<NB_TRIES; ++i)
 | 
			
		||||
      time_action = std::min(time_action, time_calculate(action));
 | 
			
		||||
 | 
			
		||||
//     INFOS("size="<<size);
 | 
			
		||||
//     INFOS("_nb_init="<<_nb_init);
 | 
			
		||||
//     INFOS("_nb_calc="<<_nb_calc);
 | 
			
		||||
 | 
			
		||||
    time_action = time_action / (double(_nb_calc));
 | 
			
		||||
 | 
			
		||||
    action.check_result();
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    double time_baseline = time_init(action);
 | 
			
		||||
    for (int i=1; i<NB_TRIES; ++i)
 | 
			
		||||
      time_baseline = std::min(time_baseline, time_init(action));
 | 
			
		||||
    time_baseline = time_baseline/(double(_nb_init));
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
//     INFOS("time_baseline="<<time_baseline);
 | 
			
		||||
//     INFOS("time_action="<<time_action);
 | 
			
		||||
 | 
			
		||||
    time_action = time_action - time_baseline;
 | 
			
		||||
 | 
			
		||||
//     INFOS("time_corrected="<<time_action);
 | 
			
		||||
 | 
			
		||||
    return action.nb_op_base()/(time_action*1000000.0);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline double time_init(Action & action)
 | 
			
		||||
  {
 | 
			
		||||
    // time measurement
 | 
			
		||||
    _chronos.start();
 | 
			
		||||
    for (int ii=0; ii<_nb_init; ii++)
 | 
			
		||||
      action.initialize();
 | 
			
		||||
    _chronos.stop();
 | 
			
		||||
    return _chronos.user_time();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  inline double time_calculate(Action & action)
 | 
			
		||||
  {
 | 
			
		||||
    // time measurement
 | 
			
		||||
    _chronos.start();
 | 
			
		||||
    for (int ii=0;ii<_nb_calc;ii++)
 | 
			
		||||
    {
 | 
			
		||||
      action.initialize();
 | 
			
		||||
      action.calculate();
 | 
			
		||||
    }
 | 
			
		||||
    _chronos.stop();
 | 
			
		||||
    return _chronos.user_time();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  unsigned long long get_nb_calc( void )
 | 
			
		||||
  {
 | 
			
		||||
    return _nb_calc;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
  unsigned long long _nb_calc;
 | 
			
		||||
  unsigned long long _nb_init;
 | 
			
		||||
  Portable_Timer _chronos;
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif //_PORTABLE_PERF_ANALYZER_HH
 | 
			
		||||
							
								
								
									
										187
									
								
								cs440-acg/ext/eigen/bench/btl/generic_bench/timers/portable_timer.hh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										187
									
								
								cs440-acg/ext/eigen/bench/btl/generic_bench/timers/portable_timer.hh
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,187 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  portable_timer.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)> from boost lib
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:17 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
//  simple_time extracted from the boost library
 | 
			
		||||
//
 | 
			
		||||
#ifndef _PORTABLE_TIMER_HH
 | 
			
		||||
#define _PORTABLE_TIMER_HH
 | 
			
		||||
 | 
			
		||||
#include <ctime>
 | 
			
		||||
#include <cstdlib>
 | 
			
		||||
 | 
			
		||||
#include <time.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define USEC_IN_SEC 1000000
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
//  timer  -------------------------------------------------------------------//
 | 
			
		||||
 | 
			
		||||
//  A timer object measures CPU time.
 | 
			
		||||
#if defined(_MSC_VER)
 | 
			
		||||
 | 
			
		||||
#define NOMINMAX
 | 
			
		||||
#include <windows.h>
 | 
			
		||||
 | 
			
		||||
/*#ifndef hr_timer
 | 
			
		||||
#include "hr_time.h"
 | 
			
		||||
#define hr_timer
 | 
			
		||||
#endif*/
 | 
			
		||||
 | 
			
		||||
 class Portable_Timer
 | 
			
		||||
 {
 | 
			
		||||
  public:
 | 
			
		||||
 | 
			
		||||
   typedef struct {
 | 
			
		||||
    LARGE_INTEGER start;
 | 
			
		||||
    LARGE_INTEGER stop;
 | 
			
		||||
   } stopWatch;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
   Portable_Timer()
 | 
			
		||||
   {
 | 
			
		||||
	 startVal.QuadPart = 0;
 | 
			
		||||
	 stopVal.QuadPart = 0;
 | 
			
		||||
	 QueryPerformanceFrequency(&frequency);
 | 
			
		||||
   }
 | 
			
		||||
 | 
			
		||||
   void start() { QueryPerformanceCounter(&startVal); }
 | 
			
		||||
 | 
			
		||||
   void stop() { QueryPerformanceCounter(&stopVal); }
 | 
			
		||||
 | 
			
		||||
   double elapsed() {
 | 
			
		||||
	 LARGE_INTEGER time;
 | 
			
		||||
     time.QuadPart = stopVal.QuadPart - startVal.QuadPart;
 | 
			
		||||
     return LIToSecs(time);
 | 
			
		||||
   }
 | 
			
		||||
 | 
			
		||||
   double user_time() { return elapsed(); }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 private:
 | 
			
		||||
 | 
			
		||||
   double LIToSecs(LARGE_INTEGER& L) {
 | 
			
		||||
     return ((double)L.QuadPart /(double)frequency.QuadPart) ;
 | 
			
		||||
   }
 | 
			
		||||
 | 
			
		||||
   LARGE_INTEGER startVal;
 | 
			
		||||
   LARGE_INTEGER stopVal;
 | 
			
		||||
   LARGE_INTEGER frequency;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 }; // Portable_Timer
 | 
			
		||||
 | 
			
		||||
#elif defined(__APPLE__)
 | 
			
		||||
#include <CoreServices/CoreServices.h>
 | 
			
		||||
#include <mach/mach_time.h>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Portable_Timer
 | 
			
		||||
{
 | 
			
		||||
 public:
 | 
			
		||||
 | 
			
		||||
  Portable_Timer()
 | 
			
		||||
  {
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void start()
 | 
			
		||||
  {
 | 
			
		||||
    m_start_time = double(mach_absolute_time())*1e-9;;
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void stop()
 | 
			
		||||
  {
 | 
			
		||||
    m_stop_time = double(mach_absolute_time())*1e-9;;
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double elapsed()
 | 
			
		||||
  {
 | 
			
		||||
    return  user_time();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double user_time()
 | 
			
		||||
  {
 | 
			
		||||
    return m_stop_time - m_start_time;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
 | 
			
		||||
  double m_stop_time, m_start_time;
 | 
			
		||||
 | 
			
		||||
}; // Portable_Timer (Apple)
 | 
			
		||||
 | 
			
		||||
#else
 | 
			
		||||
 | 
			
		||||
#include <sys/time.h>
 | 
			
		||||
#include <sys/resource.h>
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
#include <sys/times.h>
 | 
			
		||||
 | 
			
		||||
class Portable_Timer
 | 
			
		||||
{
 | 
			
		||||
 public:
 | 
			
		||||
 | 
			
		||||
  Portable_Timer()
 | 
			
		||||
  {
 | 
			
		||||
    m_clkid = BtlConfig::Instance.realclock ? CLOCK_REALTIME : CLOCK_PROCESS_CPUTIME_ID;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  Portable_Timer(int clkid) : m_clkid(clkid)
 | 
			
		||||
  {}
 | 
			
		||||
 | 
			
		||||
  void start()
 | 
			
		||||
  {
 | 
			
		||||
    timespec ts;
 | 
			
		||||
    clock_gettime(m_clkid, &ts);
 | 
			
		||||
    m_start_time = double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void stop()
 | 
			
		||||
  {
 | 
			
		||||
    timespec ts;
 | 
			
		||||
    clock_gettime(m_clkid, &ts);
 | 
			
		||||
    m_stop_time = double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double elapsed()
 | 
			
		||||
  {
 | 
			
		||||
    return  user_time();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double user_time()
 | 
			
		||||
  {
 | 
			
		||||
    return m_stop_time - m_start_time;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
 | 
			
		||||
  int m_clkid;
 | 
			
		||||
  double m_stop_time, m_start_time;
 | 
			
		||||
 | 
			
		||||
}; // Portable_Timer (Linux)
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#endif  // PORTABLE_TIMER_HPP
 | 
			
		||||
@@ -0,0 +1,108 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  x86_perf_analyzer.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  mar d<>c 3 18:59:35 CET 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef _X86_PERF_ANALYSER_HH
 | 
			
		||||
#define _X86_PERF_ANALYSER_HH
 | 
			
		||||
 | 
			
		||||
#include "x86_timer.hh"
 | 
			
		||||
#include "bench_parameter.hh"
 | 
			
		||||
 | 
			
		||||
template<class ACTION>
 | 
			
		||||
class X86_Perf_Analyzer{
 | 
			
		||||
public:
 | 
			
		||||
  X86_Perf_Analyzer( unsigned long long nb_sample=DEFAULT_NB_SAMPLE):_nb_sample(nb_sample),_chronos()
 | 
			
		||||
  {
 | 
			
		||||
    MESSAGE("X86_Perf_Analyzer Ctor");
 | 
			
		||||
    _chronos.find_frequency();
 | 
			
		||||
  };
 | 
			
		||||
  X86_Perf_Analyzer( const X86_Perf_Analyzer & ){
 | 
			
		||||
    INFOS("Copy Ctor not implemented");
 | 
			
		||||
    exit(0);
 | 
			
		||||
  };
 | 
			
		||||
  ~X86_Perf_Analyzer( void ){
 | 
			
		||||
    MESSAGE("X86_Perf_Analyzer Dtor");
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  inline double eval_mflops(int size)
 | 
			
		||||
  {
 | 
			
		||||
 | 
			
		||||
    ACTION action(size);
 | 
			
		||||
 | 
			
		||||
    int nb_loop=5;
 | 
			
		||||
    double calculate_time=0.0;
 | 
			
		||||
    double baseline_time=0.0;
 | 
			
		||||
 | 
			
		||||
    for (int j=0 ; j < nb_loop ; j++){
 | 
			
		||||
 | 
			
		||||
      _chronos.clear();
 | 
			
		||||
 | 
			
		||||
      for(int i=0 ; i < _nb_sample  ; i++)
 | 
			
		||||
      {
 | 
			
		||||
        _chronos.start();
 | 
			
		||||
        action.initialize();
 | 
			
		||||
        action.calculate();
 | 
			
		||||
        _chronos.stop();
 | 
			
		||||
        _chronos.add_get_click();
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      calculate_time += double(_chronos.get_shortest_clicks())/_chronos.frequency();
 | 
			
		||||
 | 
			
		||||
      if (j==0) action.check_result();
 | 
			
		||||
 | 
			
		||||
      _chronos.clear();
 | 
			
		||||
 | 
			
		||||
      for(int i=0 ; i < _nb_sample  ; i++)
 | 
			
		||||
      {
 | 
			
		||||
        _chronos.start();
 | 
			
		||||
        action.initialize();
 | 
			
		||||
        _chronos.stop();
 | 
			
		||||
        _chronos.add_get_click();
 | 
			
		||||
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      baseline_time+=double(_chronos.get_shortest_clicks())/_chronos.frequency();
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    double corrected_time = (calculate_time-baseline_time)/double(nb_loop);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
//     INFOS("_nb_sample="<<_nb_sample);
 | 
			
		||||
//     INFOS("baseline_time="<<baseline_time);
 | 
			
		||||
//     INFOS("calculate_time="<<calculate_time);
 | 
			
		||||
//     INFOS("corrected_time="<<corrected_time);
 | 
			
		||||
 | 
			
		||||
//    cout << size <<" "<<baseline_time<<" "<<calculate_time<<" "<<corrected_time<<" "<<action.nb_op_base() << endl;
 | 
			
		||||
 | 
			
		||||
    return action.nb_op_base()/(corrected_time*1000000.0);
 | 
			
		||||
    //return action.nb_op_base()/(calculate_time*1000000.0);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
 | 
			
		||||
  X86_Timer _chronos;
 | 
			
		||||
  unsigned long long _nb_sample;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										246
									
								
								cs440-acg/ext/eigen/bench/btl/generic_bench/timers/x86_timer.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										246
									
								
								cs440-acg/ext/eigen/bench/btl/generic_bench/timers/x86_timer.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,246 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  x86_timer.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>        
 | 
			
		||||
// Copyright (C) EDF R&D,  mar d<>c 3 18:59:35 CET 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
// 
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
// 
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
// 
 | 
			
		||||
#ifndef _X86_TIMER_HH
 | 
			
		||||
#define _X86_TIMER_HH
 | 
			
		||||
 | 
			
		||||
#include <sys/time.h>
 | 
			
		||||
#include <sys/resource.h>
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
#include <sys/times.h>
 | 
			
		||||
//#include "system_time.h"
 | 
			
		||||
#define u32 unsigned int
 | 
			
		||||
#include <asm/msr.h>
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include <map>
 | 
			
		||||
#include <fstream>
 | 
			
		||||
#include <string>
 | 
			
		||||
#include <iostream>
 | 
			
		||||
 | 
			
		||||
// frequence de la becanne en Hz
 | 
			
		||||
//#define FREQUENCY 648000000
 | 
			
		||||
//#define FREQUENCY 1400000000
 | 
			
		||||
#define FREQUENCY 1695000000
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class X86_Timer {
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  X86_Timer( void ):_frequency(FREQUENCY),_nb_sample(0)
 | 
			
		||||
  {
 | 
			
		||||
    MESSAGE("X86_Timer Default Ctor");    
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline void start( void ){
 | 
			
		||||
 | 
			
		||||
    rdtsc(_click_start.n32[0],_click_start.n32[1]);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  inline void stop( void ){
 | 
			
		||||
 | 
			
		||||
    rdtsc(_click_stop.n32[0],_click_stop.n32[1]);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
  inline double frequency( void ){
 | 
			
		||||
    return _frequency;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double get_elapsed_time_in_second( void ){
 | 
			
		||||
 | 
			
		||||
    return (_click_stop.n64-_click_start.n64)/double(FREQUENCY);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  }    
 | 
			
		||||
 | 
			
		||||
  unsigned long long  get_click( void ){
 | 
			
		||||
    
 | 
			
		||||
    return (_click_stop.n64-_click_start.n64);
 | 
			
		||||
 | 
			
		||||
  }    
 | 
			
		||||
 | 
			
		||||
  inline void find_frequency( void ){
 | 
			
		||||
 | 
			
		||||
    time_t initial, final;
 | 
			
		||||
    int dummy=2;
 | 
			
		||||
 | 
			
		||||
    initial = time(0);
 | 
			
		||||
    start();
 | 
			
		||||
    do {
 | 
			
		||||
      dummy+=2;
 | 
			
		||||
    }
 | 
			
		||||
    while(time(0)==initial);
 | 
			
		||||
    // On est au debut d'un cycle d'une seconde !!!
 | 
			
		||||
    initial = time(0);
 | 
			
		||||
    start();
 | 
			
		||||
    do {
 | 
			
		||||
      dummy+=2;
 | 
			
		||||
    }
 | 
			
		||||
    while(time(0)==initial);
 | 
			
		||||
    final=time(0);
 | 
			
		||||
    stop();
 | 
			
		||||
    //    INFOS("fine grained time : "<<  get_elapsed_time_in_second());
 | 
			
		||||
    //  INFOS("coarse grained time : "<<  final-initial);
 | 
			
		||||
    _frequency=_frequency*get_elapsed_time_in_second()/double(final-initial);
 | 
			
		||||
    ///  INFOS("CPU frequency : "<<  _frequency);        
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void  add_get_click( void ){
 | 
			
		||||
       
 | 
			
		||||
    _nb_sample++;
 | 
			
		||||
    _counted_clicks[get_click()]++;
 | 
			
		||||
    fill_history_clicks();
 | 
			
		||||
 | 
			
		||||
  }    
 | 
			
		||||
 | 
			
		||||
  void dump_statistics(string filemane){
 | 
			
		||||
    
 | 
			
		||||
    ofstream outfile (filemane.c_str(),ios::out) ;
 | 
			
		||||
 | 
			
		||||
    std::map<unsigned long long , unsigned long long>::iterator itr;
 | 
			
		||||
    for(itr=_counted_clicks.begin() ; itr!=_counted_clicks.end()  ; itr++)
 | 
			
		||||
      {      
 | 
			
		||||
      outfile  << (*itr).first << "  " << (*itr).second << endl ;       
 | 
			
		||||
      }      
 | 
			
		||||
    
 | 
			
		||||
    outfile.close();
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void dump_history(string filemane){
 | 
			
		||||
    
 | 
			
		||||
    ofstream outfile (filemane.c_str(),ios::out) ;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    for(int i=0 ; i<_history_mean_clicks.size() ; i++)
 | 
			
		||||
      {      
 | 
			
		||||
	outfile  << i << " " 
 | 
			
		||||
		 << _history_mean_clicks[i] << " " 
 | 
			
		||||
		 << _history_shortest_clicks[i] << " " 
 | 
			
		||||
		 << _history_most_occured_clicks[i] << endl ;
 | 
			
		||||
      }      
 | 
			
		||||
    
 | 
			
		||||
    outfile.close();
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
     
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  double get_mean_clicks( void ){
 | 
			
		||||
    
 | 
			
		||||
    std::map<unsigned long long,unsigned long long>::iterator itr;
 | 
			
		||||
    
 | 
			
		||||
    unsigned long long mean_clicks=0;
 | 
			
		||||
 | 
			
		||||
    for(itr=_counted_clicks.begin() ; itr!=_counted_clicks.end()  ; itr++)
 | 
			
		||||
      {      
 | 
			
		||||
	
 | 
			
		||||
	mean_clicks+=(*itr).second*(*itr).first;
 | 
			
		||||
      }      
 | 
			
		||||
 | 
			
		||||
    return mean_clicks/double(_nb_sample);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  double get_shortest_clicks( void ){
 | 
			
		||||
    
 | 
			
		||||
    return double((*_counted_clicks.begin()).first);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void fill_history_clicks( void ){
 | 
			
		||||
 | 
			
		||||
    _history_mean_clicks.push_back(get_mean_clicks());
 | 
			
		||||
    _history_shortest_clicks.push_back(get_shortest_clicks());
 | 
			
		||||
    _history_most_occured_clicks.push_back(get_most_occured_clicks());
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  double get_most_occured_clicks( void ){
 | 
			
		||||
 | 
			
		||||
    unsigned long long moc=0;
 | 
			
		||||
    unsigned long long max_occurence=0;
 | 
			
		||||
 | 
			
		||||
    std::map<unsigned long long,unsigned long long>::iterator itr;
 | 
			
		||||
 | 
			
		||||
    for(itr=_counted_clicks.begin() ; itr!=_counted_clicks.end()  ; itr++)
 | 
			
		||||
      {      
 | 
			
		||||
	
 | 
			
		||||
	if (max_occurence<=(*itr).second){
 | 
			
		||||
	  max_occurence=(*itr).second;
 | 
			
		||||
	  moc=(*itr).first;
 | 
			
		||||
	}
 | 
			
		||||
      }      
 | 
			
		||||
    
 | 
			
		||||
    return double(moc);    
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  void clear( void )
 | 
			
		||||
  {
 | 
			
		||||
    _counted_clicks.clear();
 | 
			
		||||
 | 
			
		||||
    _history_mean_clicks.clear();
 | 
			
		||||
    _history_shortest_clicks.clear();
 | 
			
		||||
    _history_most_occured_clicks.clear();
 | 
			
		||||
 | 
			
		||||
    _nb_sample=0;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
private :
 | 
			
		||||
  
 | 
			
		||||
  union
 | 
			
		||||
  {
 | 
			
		||||
    unsigned long int n32[2] ;
 | 
			
		||||
    unsigned long long n64 ;
 | 
			
		||||
  } _click_start;
 | 
			
		||||
 | 
			
		||||
  union
 | 
			
		||||
  {
 | 
			
		||||
    unsigned long int n32[2] ;
 | 
			
		||||
    unsigned long long n64 ;
 | 
			
		||||
  } _click_stop;
 | 
			
		||||
 | 
			
		||||
  double _frequency ;
 | 
			
		||||
 | 
			
		||||
  map<unsigned long long,unsigned long long> _counted_clicks;
 | 
			
		||||
 | 
			
		||||
  vector<double> _history_mean_clicks;
 | 
			
		||||
  vector<double> _history_shortest_clicks;
 | 
			
		||||
  vector<double> _history_most_occured_clicks;
 | 
			
		||||
 | 
			
		||||
  unsigned long long _nb_sample;
 | 
			
		||||
 | 
			
		||||
  
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -0,0 +1,70 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  size_lin_log.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>        
 | 
			
		||||
// Copyright (C) EDF R&D,  mar d<>c 3 18:59:37 CET 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
// 
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
// 
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
// 
 | 
			
		||||
#ifndef SIZE_LIN_LOG
 | 
			
		||||
#define SIZE_LIN_LOG
 | 
			
		||||
 | 
			
		||||
#include "size_log.hh"
 | 
			
		||||
 | 
			
		||||
template<class Vector>
 | 
			
		||||
void size_lin_log(const int nb_point, const int /*size_min*/, const int size_max, Vector & X)
 | 
			
		||||
{
 | 
			
		||||
  int ten=10;
 | 
			
		||||
  int nine=9;
 | 
			
		||||
 | 
			
		||||
  X.resize(nb_point);
 | 
			
		||||
 | 
			
		||||
  if (nb_point>ten){
 | 
			
		||||
 | 
			
		||||
    for (int i=0;i<nine;i++){
 | 
			
		||||
      
 | 
			
		||||
      X[i]=i+1;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Vector log_size;
 | 
			
		||||
    size_log(nb_point-nine,ten,size_max,log_size);
 | 
			
		||||
 | 
			
		||||
    for (int i=0;i<nb_point-nine;i++){
 | 
			
		||||
      
 | 
			
		||||
      X[i+nine]=log_size[i];
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  else{
 | 
			
		||||
 | 
			
		||||
    for (int i=0;i<nb_point;i++){
 | 
			
		||||
      
 | 
			
		||||
      X[i]=i+1;
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 //  for (int i=0;i<nb_point;i++){
 | 
			
		||||
    
 | 
			
		||||
//        INFOS("computed sizes : X["<<i<<"]="<<X[i]);
 | 
			
		||||
    
 | 
			
		||||
//   }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
  
 | 
			
		||||
#endif
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -0,0 +1,54 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  size_log.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>        
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:17 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
// 
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
// 
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
// 
 | 
			
		||||
#ifndef SIZE_LOG
 | 
			
		||||
#define SIZE_LOG
 | 
			
		||||
 | 
			
		||||
#include "math.h"
 | 
			
		||||
// The Vector class must satisfy the following part of STL vector concept :
 | 
			
		||||
//            resize() method
 | 
			
		||||
//            [] operator for seting element
 | 
			
		||||
// the vector element are int compatible.
 | 
			
		||||
template<class Vector>
 | 
			
		||||
void size_log(const int nb_point, const int size_min, const int size_max, Vector & X)
 | 
			
		||||
{
 | 
			
		||||
  X.resize(nb_point);
 | 
			
		||||
 | 
			
		||||
  float ls_min=log(float(size_min));
 | 
			
		||||
  float ls_max=log(float(size_max));
 | 
			
		||||
 | 
			
		||||
  float ls=0.0;
 | 
			
		||||
 | 
			
		||||
  float delta_ls=(ls_max-ls_min)/(float(nb_point-1));
 | 
			
		||||
 | 
			
		||||
  int size=0;
 | 
			
		||||
 | 
			
		||||
  for (int i=0;i<nb_point;i++){
 | 
			
		||||
 | 
			
		||||
    ls = ls_min + float(i)*delta_ls ;
 | 
			
		||||
    
 | 
			
		||||
    size=int(exp(ls)); 
 | 
			
		||||
 | 
			
		||||
    X[i]=size;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -0,0 +1,90 @@
 | 
			
		||||
//=============================================================================
 | 
			
		||||
// File      : utilities.h
 | 
			
		||||
// Created   : mar jun 19 13:18:14 CEST 2001
 | 
			
		||||
// Author    : Antoine YESSAYAN, Paul RASCLE, EDF
 | 
			
		||||
// Project   : SALOME
 | 
			
		||||
// Copyright : EDF 2001
 | 
			
		||||
// $Header$
 | 
			
		||||
//=============================================================================
 | 
			
		||||
 | 
			
		||||
/* ---  Definition macros file to print information if _DEBUG_ is defined --- */
 | 
			
		||||
 | 
			
		||||
# ifndef UTILITIES_H
 | 
			
		||||
# define UTILITIES_H
 | 
			
		||||
 | 
			
		||||
# include <stdlib.h>
 | 
			
		||||
//# include <iostream> ok for gcc3.01
 | 
			
		||||
# include <iostream>
 | 
			
		||||
 | 
			
		||||
/* ---  INFOS is always defined (without _DEBUG_): to be used for warnings, with release version --- */
 | 
			
		||||
 | 
			
		||||
# define HEREWEARE cout<<flush ; cerr << __FILE__ << " [" << __LINE__ << "] : " << flush ;
 | 
			
		||||
# define INFOS(chain) {HEREWEARE ; cerr << chain << endl ;}
 | 
			
		||||
# define PYSCRIPT(chain) {cout<<flush ; cerr << "---PYSCRIPT--- " << chain << endl ;}
 | 
			
		||||
 | 
			
		||||
/* --- To print date and time of compilation of current source on stdout --- */
 | 
			
		||||
 | 
			
		||||
# if defined ( __GNUC__ )
 | 
			
		||||
# define COMPILER		"g++" ;
 | 
			
		||||
# elif defined ( __sun )
 | 
			
		||||
# define COMPILER		"CC" ;
 | 
			
		||||
# elif defined ( __KCC )
 | 
			
		||||
# define COMPILER		"KCC" ;
 | 
			
		||||
# elif defined ( __PGI )
 | 
			
		||||
# define COMPILER		"pgCC" ;
 | 
			
		||||
# else
 | 
			
		||||
# define COMPILER		"undefined" ;
 | 
			
		||||
# endif
 | 
			
		||||
 | 
			
		||||
# ifdef INFOS_COMPILATION
 | 
			
		||||
# error INFOS_COMPILATION already defined
 | 
			
		||||
# endif
 | 
			
		||||
# define INFOS_COMPILATION	{\
 | 
			
		||||
					cerr << flush;\
 | 
			
		||||
					cout << __FILE__ ;\
 | 
			
		||||
					cout << " [" << __LINE__ << "] : " ;\
 | 
			
		||||
					cout << "COMPILED with " << COMPILER ;\
 | 
			
		||||
					cout << ", " << __DATE__ ; \
 | 
			
		||||
					cout << " at " << __TIME__ << endl ;\
 | 
			
		||||
					cout << "\n\n" ;\
 | 
			
		||||
					cout << flush ;\
 | 
			
		||||
				}
 | 
			
		||||
 | 
			
		||||
# ifdef _DEBUG_
 | 
			
		||||
 | 
			
		||||
/* --- the following MACROS are useful at debug time --- */
 | 
			
		||||
 | 
			
		||||
# define HERE cout<<flush ; cerr << "- Trace " << __FILE__ << " [" << __LINE__ << "] : " << flush ;
 | 
			
		||||
# define SCRUTE(var) HERE ; cerr << #var << "=" << var << endl ;
 | 
			
		||||
# define MESSAGE(chain) {HERE ; cerr << chain << endl ;}
 | 
			
		||||
# define INTERRUPTION(code) HERE ; cerr << "INTERRUPTION return code= " << code << endl ; exit(code) ;
 | 
			
		||||
 | 
			
		||||
# ifndef ASSERT
 | 
			
		||||
# define ASSERT(condition) if (!(condition)){ HERE ; cerr << "CONDITION " << #condition << " NOT VERIFIED"<< endl ; INTERRUPTION(1) ;}
 | 
			
		||||
# endif /* ASSERT */
 | 
			
		||||
 | 
			
		||||
#define REPERE cout<<flush ; cerr << "   --------------" << endl << flush ;
 | 
			
		||||
#define BEGIN_OF(chain) {REPERE ; HERE ; cerr << "Begin of: " << chain << endl ; REPERE ; }
 | 
			
		||||
#define END_OF(chain) {REPERE ; HERE ; cerr << "Normal end of: " << chain << endl ; REPERE ; }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# else /* ifdef _DEBUG_*/
 | 
			
		||||
 | 
			
		||||
# define HERE
 | 
			
		||||
# define SCRUTE(var)
 | 
			
		||||
# define MESSAGE(chain)
 | 
			
		||||
# define INTERRUPTION(code)
 | 
			
		||||
 | 
			
		||||
# ifndef ASSERT
 | 
			
		||||
# define ASSERT(condition)
 | 
			
		||||
# endif /* ASSERT */
 | 
			
		||||
 | 
			
		||||
#define REPERE
 | 
			
		||||
#define BEGIN_OF(chain)
 | 
			
		||||
#define END_OF(chain)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# endif /* ifdef _DEBUG_*/
 | 
			
		||||
 | 
			
		||||
# endif /* ifndef UTILITIES_H */
 | 
			
		||||
							
								
								
									
										75
									
								
								cs440-acg/ext/eigen/bench/btl/generic_bench/utils/xy_file.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								cs440-acg/ext/eigen/bench/btl/generic_bench/utils/xy_file.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,75 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  dump_file_x_y.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>        
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:20 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
// 
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
// 
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
// 
 | 
			
		||||
#ifndef XY_FILE_HH
 | 
			
		||||
#define XY_FILE_HH
 | 
			
		||||
#include <fstream>
 | 
			
		||||
#include <iostream>
 | 
			
		||||
#include <string>
 | 
			
		||||
#include <vector>
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
bool read_xy_file(const std::string & filename, std::vector<int> & tab_sizes,
 | 
			
		||||
                  std::vector<double> & tab_mflops, bool quiet = false)
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
  std::ifstream input_file (filename.c_str(),std::ios::in);
 | 
			
		||||
 | 
			
		||||
  if (!input_file){
 | 
			
		||||
    if (!quiet) {
 | 
			
		||||
      INFOS("!!! Error opening "<<filename);
 | 
			
		||||
    }
 | 
			
		||||
    return false;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  int nb_point=0;
 | 
			
		||||
  int size=0;
 | 
			
		||||
  double mflops=0;
 | 
			
		||||
 | 
			
		||||
  while (input_file >> size >> mflops ){
 | 
			
		||||
    nb_point++;
 | 
			
		||||
    tab_sizes.push_back(size);
 | 
			
		||||
    tab_mflops.push_back(mflops);
 | 
			
		||||
  }
 | 
			
		||||
  SCRUTE(nb_point);
 | 
			
		||||
 | 
			
		||||
  input_file.close();
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// The Vector class must satisfy the following part of STL vector concept :
 | 
			
		||||
//            resize() method
 | 
			
		||||
//            [] operator for seting element
 | 
			
		||||
// the vector element must have the << operator define
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
template<class Vector_A, class Vector_B>
 | 
			
		||||
void dump_xy_file(const Vector_A & X, const Vector_B & Y, const std::string & filename){
 | 
			
		||||
  
 | 
			
		||||
  ofstream outfile (filename.c_str(),ios::out) ;
 | 
			
		||||
  int size=X.size();
 | 
			
		||||
  
 | 
			
		||||
  for (int i=0;i<size;i++)
 | 
			
		||||
    outfile << X[i] << " " << Y[i] << endl;
 | 
			
		||||
 | 
			
		||||
  outfile.close();
 | 
			
		||||
} 
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										47
									
								
								cs440-acg/ext/eigen/bench/btl/libs/BLAS/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								cs440-acg/ext/eigen/bench/btl/libs/BLAS/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,47 @@
 | 
			
		||||
 | 
			
		||||
find_package(ATLAS)
 | 
			
		||||
if (ATLAS_FOUND)
 | 
			
		||||
  btl_add_bench(btl_atlas main.cpp)
 | 
			
		||||
  if(BUILD_btl_atlas)
 | 
			
		||||
    target_link_libraries(btl_atlas ${ATLAS_LIBRARIES})
 | 
			
		||||
    set_target_properties(btl_atlas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=ATLAS -DHAS_LAPACK=1")
 | 
			
		||||
  endif(BUILD_btl_atlas)
 | 
			
		||||
endif (ATLAS_FOUND)
 | 
			
		||||
 | 
			
		||||
find_package(MKL)
 | 
			
		||||
if (MKL_FOUND)
 | 
			
		||||
  btl_add_bench(btl_mkl main.cpp)
 | 
			
		||||
  if(BUILD_btl_mkl)
 | 
			
		||||
    target_link_libraries(btl_mkl ${MKL_LIBRARIES})
 | 
			
		||||
    set_target_properties(btl_mkl PROPERTIES COMPILE_FLAGS "-DCBLASNAME=INTEL_MKL -DHAS_LAPACK=1")
 | 
			
		||||
  endif(BUILD_btl_mkl)
 | 
			
		||||
endif (MKL_FOUND)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
find_package(OPENBLAS)
 | 
			
		||||
if (OPENBLAS_FOUND)
 | 
			
		||||
  btl_add_bench(btl_openblas main.cpp)
 | 
			
		||||
  if(BUILD_btl_openblas)
 | 
			
		||||
    target_link_libraries(btl_openblas ${OPENBLAS_LIBRARIES} )
 | 
			
		||||
    set_target_properties(btl_openblas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=OPENBLAS")
 | 
			
		||||
  endif(BUILD_btl_openblas)
 | 
			
		||||
endif (OPENBLAS_FOUND)
 | 
			
		||||
 | 
			
		||||
find_package(ACML)
 | 
			
		||||
if (ACML_FOUND)
 | 
			
		||||
  btl_add_bench(btl_acml main.cpp)
 | 
			
		||||
  if(BUILD_btl_acml)
 | 
			
		||||
    target_link_libraries(btl_acml ${ACML_LIBRARIES} )
 | 
			
		||||
    set_target_properties(btl_acml PROPERTIES COMPILE_FLAGS "-DCBLASNAME=ACML -DHAS_LAPACK=1")
 | 
			
		||||
  endif(BUILD_btl_acml)
 | 
			
		||||
endif (ACML_FOUND)
 | 
			
		||||
 | 
			
		||||
if(Eigen_SOURCE_DIR AND CMAKE_Fortran_COMPILER_WORKS)
 | 
			
		||||
  # we are inside Eigen and blas/lapack interface is compilable
 | 
			
		||||
  include_directories(${Eigen_SOURCE_DIR})
 | 
			
		||||
  btl_add_bench(btl_eigenblas main.cpp)
 | 
			
		||||
  if(BUILD_btl_eigenblas)
 | 
			
		||||
    target_link_libraries(btl_eigenblas eigen_blas eigen_lapack )
 | 
			
		||||
    set_target_properties(btl_eigenblas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=EigenBLAS")
 | 
			
		||||
  endif()
 | 
			
		||||
endif()
 | 
			
		||||
							
								
								
									
										675
									
								
								cs440-acg/ext/eigen/bench/btl/libs/BLAS/blas.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										675
									
								
								cs440-acg/ext/eigen/bench/btl/libs/BLAS/blas.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,675 @@
 | 
			
		||||
#ifndef BLAS_H
 | 
			
		||||
#define BLAS_H
 | 
			
		||||
 | 
			
		||||
#define BLASFUNC(FUNC) FUNC##_
 | 
			
		||||
 | 
			
		||||
#ifdef __WIN64__
 | 
			
		||||
typedef long long BLASLONG;
 | 
			
		||||
typedef unsigned long long BLASULONG;
 | 
			
		||||
#else
 | 
			
		||||
typedef long BLASLONG;
 | 
			
		||||
typedef unsigned long BLASULONG;
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int    BLASFUNC(xerbla)(const char *, int *info, int);
 | 
			
		||||
 | 
			
		||||
float  BLASFUNC(sdot)  (int *, float  *, int *, float  *, int *);
 | 
			
		||||
float  BLASFUNC(sdsdot)(int *, float  *,        float  *, int *, float  *, int *);
 | 
			
		||||
 | 
			
		||||
double BLASFUNC(dsdot) (int *, float  *, int *, float  *, int *);
 | 
			
		||||
double BLASFUNC(ddot)  (int *, double *, int *, double *, int *);
 | 
			
		||||
double BLASFUNC(qdot)  (int *, double *, int *, double *, int *);
 | 
			
		||||
 | 
			
		||||
#if defined(F_INTERFACE_GFORT) && !defined(__64BIT__)
 | 
			
		||||
int   BLASFUNC(cdotu)  (int *, float  * , int *, float  *,  int *);
 | 
			
		||||
int   BLASFUNC(cdotc)  (int *, float  *,  int *, float  *,  int *);
 | 
			
		||||
void  BLASFUNC(zdotu)  (double *, int *, double  *, int *, double  *, int *);
 | 
			
		||||
void  BLASFUNC(zdotc)  (double *, int *, double  *, int *, double  *, int *);
 | 
			
		||||
void  BLASFUNC(xdotu)  (double *, int *, double  *, int *, double  *, int *);
 | 
			
		||||
void  BLASFUNC(xdotc)  (double *, int *, double  *, int *, double  *, int *);
 | 
			
		||||
#elif  defined(F_INTERFACE_F2C) || \
 | 
			
		||||
     defined(F_INTERFACE_PGI) || \
 | 
			
		||||
     defined(F_INTERFACE_GFORT) || \
 | 
			
		||||
    (defined(F_INTERFACE_PATHSCALE) && defined(__64BIT__))
 | 
			
		||||
void  BLASFUNC(cdotu)  (float *,  int *, float  * , int *, float  *,  int *);
 | 
			
		||||
void  BLASFUNC(cdotc)  (float *,  int *, float  *,  int *, float  *,  int *);
 | 
			
		||||
void  BLASFUNC(zdotu)  (double *, int *, double  *, int *, double  *, int *);
 | 
			
		||||
void  BLASFUNC(zdotc)  (double *, int *, double  *, int *, double  *, int *);
 | 
			
		||||
void  BLASFUNC(xdotu)  (double *, int *, double  *, int *, double  *, int *);
 | 
			
		||||
void  BLASFUNC(xdotc)  (double *, int *, double  *, int *, double  *, int *);
 | 
			
		||||
#else
 | 
			
		||||
std::complex<float>   BLASFUNC(cdotu)  (int *, float  *, int *, float  *, int *);
 | 
			
		||||
std::complex<float>   BLASFUNC(cdotc)  (int *, float  *, int *, float  *, int *);
 | 
			
		||||
std::complex<double>  BLASFUNC(zdotu)  (int *, double  *, int *, double  *, int *);
 | 
			
		||||
std::complex<double>  BLASFUNC(zdotc)  (int *, double  *, int *, double  *, int *);
 | 
			
		||||
double  BLASFUNC(xdotu)  (int *, double  *, int *, double  *, int *);
 | 
			
		||||
double  BLASFUNC(xdotc)  (int *, double  *, int *, double  *, int *);
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
int  BLASFUNC(cdotuw)  (int *, float  *, int *, float  *, int *, float*);
 | 
			
		||||
int  BLASFUNC(cdotcw)  (int *, float  *, int *, float  *, int *, float*);
 | 
			
		||||
int  BLASFUNC(zdotuw)  (int *, double  *, int *, double  *, int *, double*);
 | 
			
		||||
int  BLASFUNC(zdotcw)  (int *, double  *, int *, double  *, int *, double*);
 | 
			
		||||
 | 
			
		||||
int    BLASFUNC(saxpy) (int *, float  *, float  *, int *, float  *, int *);
 | 
			
		||||
int    BLASFUNC(daxpy) (int *, double *, double *, int *, double *, int *);
 | 
			
		||||
int    BLASFUNC(qaxpy) (int *, double *, double *, int *, double *, int *);
 | 
			
		||||
int    BLASFUNC(caxpy) (int *, float  *, float  *, int *, float  *, int *);
 | 
			
		||||
int    BLASFUNC(zaxpy) (int *, double *, double *, int *, double *, int *);
 | 
			
		||||
int    BLASFUNC(xaxpy) (int *, double *, double *, int *, double *, int *);
 | 
			
		||||
int    BLASFUNC(caxpyc)(int *, float  *, float  *, int *, float  *, int *);
 | 
			
		||||
int    BLASFUNC(zaxpyc)(int *, double *, double *, int *, double *, int *);
 | 
			
		||||
int    BLASFUNC(xaxpyc)(int *, double *, double *, int *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int    BLASFUNC(scopy) (int *, float  *, int *, float  *, int *);
 | 
			
		||||
int    BLASFUNC(dcopy) (int *, double *, int *, double *, int *);
 | 
			
		||||
int    BLASFUNC(qcopy) (int *, double *, int *, double *, int *);
 | 
			
		||||
int    BLASFUNC(ccopy) (int *, float  *, int *, float  *, int *);
 | 
			
		||||
int    BLASFUNC(zcopy) (int *, double *, int *, double *, int *);
 | 
			
		||||
int    BLASFUNC(xcopy) (int *, double *, int *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int    BLASFUNC(sswap) (int *, float  *, int *, float  *, int *);
 | 
			
		||||
int    BLASFUNC(dswap) (int *, double *, int *, double *, int *);
 | 
			
		||||
int    BLASFUNC(qswap) (int *, double *, int *, double *, int *);
 | 
			
		||||
int    BLASFUNC(cswap) (int *, float  *, int *, float  *, int *);
 | 
			
		||||
int    BLASFUNC(zswap) (int *, double *, int *, double *, int *);
 | 
			
		||||
int    BLASFUNC(xswap) (int *, double *, int *, double *, int *);
 | 
			
		||||
 | 
			
		||||
float  BLASFUNC(sasum) (int *, float  *, int *);
 | 
			
		||||
float  BLASFUNC(scasum)(int *, float  *, int *);
 | 
			
		||||
double BLASFUNC(dasum) (int *, double *, int *);
 | 
			
		||||
double BLASFUNC(qasum) (int *, double *, int *);
 | 
			
		||||
double BLASFUNC(dzasum)(int *, double *, int *);
 | 
			
		||||
double BLASFUNC(qxasum)(int *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int    BLASFUNC(isamax)(int *, float  *, int *);
 | 
			
		||||
int    BLASFUNC(idamax)(int *, double *, int *);
 | 
			
		||||
int    BLASFUNC(iqamax)(int *, double *, int *);
 | 
			
		||||
int    BLASFUNC(icamax)(int *, float  *, int *);
 | 
			
		||||
int    BLASFUNC(izamax)(int *, double *, int *);
 | 
			
		||||
int    BLASFUNC(ixamax)(int *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int    BLASFUNC(ismax) (int *, float  *, int *);
 | 
			
		||||
int    BLASFUNC(idmax) (int *, double *, int *);
 | 
			
		||||
int    BLASFUNC(iqmax) (int *, double *, int *);
 | 
			
		||||
int    BLASFUNC(icmax) (int *, float  *, int *);
 | 
			
		||||
int    BLASFUNC(izmax) (int *, double *, int *);
 | 
			
		||||
int    BLASFUNC(ixmax) (int *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int    BLASFUNC(isamin)(int *, float  *, int *);
 | 
			
		||||
int    BLASFUNC(idamin)(int *, double *, int *);
 | 
			
		||||
int    BLASFUNC(iqamin)(int *, double *, int *);
 | 
			
		||||
int    BLASFUNC(icamin)(int *, float  *, int *);
 | 
			
		||||
int    BLASFUNC(izamin)(int *, double *, int *);
 | 
			
		||||
int    BLASFUNC(ixamin)(int *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int    BLASFUNC(ismin)(int *, float  *, int *);
 | 
			
		||||
int    BLASFUNC(idmin)(int *, double *, int *);
 | 
			
		||||
int    BLASFUNC(iqmin)(int *, double *, int *);
 | 
			
		||||
int    BLASFUNC(icmin)(int *, float  *, int *);
 | 
			
		||||
int    BLASFUNC(izmin)(int *, double *, int *);
 | 
			
		||||
int    BLASFUNC(ixmin)(int *, double *, int *);
 | 
			
		||||
 | 
			
		||||
float  BLASFUNC(samax) (int *, float  *, int *);
 | 
			
		||||
double BLASFUNC(damax) (int *, double *, int *);
 | 
			
		||||
double BLASFUNC(qamax) (int *, double *, int *);
 | 
			
		||||
float  BLASFUNC(scamax)(int *, float  *, int *);
 | 
			
		||||
double BLASFUNC(dzamax)(int *, double *, int *);
 | 
			
		||||
double BLASFUNC(qxamax)(int *, double *, int *);
 | 
			
		||||
 | 
			
		||||
float  BLASFUNC(samin) (int *, float  *, int *);
 | 
			
		||||
double BLASFUNC(damin) (int *, double *, int *);
 | 
			
		||||
double BLASFUNC(qamin) (int *, double *, int *);
 | 
			
		||||
float  BLASFUNC(scamin)(int *, float  *, int *);
 | 
			
		||||
double BLASFUNC(dzamin)(int *, double *, int *);
 | 
			
		||||
double BLASFUNC(qxamin)(int *, double *, int *);
 | 
			
		||||
 | 
			
		||||
float  BLASFUNC(smax)  (int *, float  *, int *);
 | 
			
		||||
double BLASFUNC(dmax)  (int *, double *, int *);
 | 
			
		||||
double BLASFUNC(qmax)  (int *, double *, int *);
 | 
			
		||||
float  BLASFUNC(scmax) (int *, float  *, int *);
 | 
			
		||||
double BLASFUNC(dzmax) (int *, double *, int *);
 | 
			
		||||
double BLASFUNC(qxmax) (int *, double *, int *);
 | 
			
		||||
 | 
			
		||||
float  BLASFUNC(smin)  (int *, float  *, int *);
 | 
			
		||||
double BLASFUNC(dmin)  (int *, double *, int *);
 | 
			
		||||
double BLASFUNC(qmin)  (int *, double *, int *);
 | 
			
		||||
float  BLASFUNC(scmin) (int *, float  *, int *);
 | 
			
		||||
double BLASFUNC(dzmin) (int *, double *, int *);
 | 
			
		||||
double BLASFUNC(qxmin) (int *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int    BLASFUNC(sscal) (int *,  float  *, float  *, int *);
 | 
			
		||||
int    BLASFUNC(dscal) (int *,  double *, double *, int *);
 | 
			
		||||
int    BLASFUNC(qscal) (int *,  double *, double *, int *);
 | 
			
		||||
int    BLASFUNC(cscal) (int *,  float  *, float  *, int *);
 | 
			
		||||
int    BLASFUNC(zscal) (int *,  double *, double *, int *);
 | 
			
		||||
int    BLASFUNC(xscal) (int *,  double *, double *, int *);
 | 
			
		||||
int    BLASFUNC(csscal)(int *,  float  *, float  *, int *);
 | 
			
		||||
int    BLASFUNC(zdscal)(int *,  double *, double *, int *);
 | 
			
		||||
int    BLASFUNC(xqscal)(int *,  double *, double *, int *);
 | 
			
		||||
 | 
			
		||||
float  BLASFUNC(snrm2) (int *, float  *, int *);
 | 
			
		||||
float  BLASFUNC(scnrm2)(int *, float  *, int *);
 | 
			
		||||
 | 
			
		||||
double BLASFUNC(dnrm2) (int *, double *, int *);
 | 
			
		||||
double BLASFUNC(qnrm2) (int *, double *, int *);
 | 
			
		||||
double BLASFUNC(dznrm2)(int *, double *, int *);
 | 
			
		||||
double BLASFUNC(qxnrm2)(int *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int    BLASFUNC(srot)  (int *, float  *, int *, float  *, int *, float  *, float  *);
 | 
			
		||||
int    BLASFUNC(drot)  (int *, double *, int *, double *, int *, double *, double *);
 | 
			
		||||
int    BLASFUNC(qrot)  (int *, double *, int *, double *, int *, double *, double *);
 | 
			
		||||
int    BLASFUNC(csrot) (int *, float  *, int *, float  *, int *, float  *, float  *);
 | 
			
		||||
int    BLASFUNC(zdrot) (int *, double *, int *, double *, int *, double *, double *);
 | 
			
		||||
int    BLASFUNC(xqrot) (int *, double *, int *, double *, int *, double *, double *);
 | 
			
		||||
 | 
			
		||||
int    BLASFUNC(srotg) (float  *, float  *, float  *, float  *);
 | 
			
		||||
int    BLASFUNC(drotg) (double *, double *, double *, double *);
 | 
			
		||||
int    BLASFUNC(qrotg) (double *, double *, double *, double *);
 | 
			
		||||
int    BLASFUNC(crotg) (float  *, float  *, float  *, float  *);
 | 
			
		||||
int    BLASFUNC(zrotg) (double *, double *, double *, double *);
 | 
			
		||||
int    BLASFUNC(xrotg) (double *, double *, double *, double *);
 | 
			
		||||
 | 
			
		||||
int    BLASFUNC(srotmg)(float  *, float  *, float  *, float  *, float  *);
 | 
			
		||||
int    BLASFUNC(drotmg)(double *, double *, double *, double *, double *);
 | 
			
		||||
 | 
			
		||||
int    BLASFUNC(srotm) (int *, float  *, int *, float  *, int *, float  *);
 | 
			
		||||
int    BLASFUNC(drotm) (int *, double *, int *, double *, int *, double *);
 | 
			
		||||
int    BLASFUNC(qrotm) (int *, double *, int *, double *, int *, double *);
 | 
			
		||||
 | 
			
		||||
/* Level 2 routines */
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(sger)(int *,    int *, float *,  float *, int *,
 | 
			
		||||
		   float *,  int *, float *,  int *);
 | 
			
		||||
int BLASFUNC(dger)(int *,    int *, double *, double *, int *,
 | 
			
		||||
		   double *, int *, double *, int *);
 | 
			
		||||
int BLASFUNC(qger)(int *,    int *, double *, double *, int *,
 | 
			
		||||
		   double *, int *, double *, int *);
 | 
			
		||||
int BLASFUNC(cgeru)(int *,    int *, float *,  float *, int *,
 | 
			
		||||
		    float *,  int *, float *,  int *);
 | 
			
		||||
int BLASFUNC(cgerc)(int *,    int *, float *,  float *, int *,
 | 
			
		||||
		    float *,  int *, float *,  int *);
 | 
			
		||||
int BLASFUNC(zgeru)(int *,    int *, double *, double *, int *,
 | 
			
		||||
		    double *, int *, double *, int *);
 | 
			
		||||
int BLASFUNC(zgerc)(int *,    int *, double *, double *, int *,
 | 
			
		||||
		    double *, int *, double *, int *);
 | 
			
		||||
int BLASFUNC(xgeru)(int *,    int *, double *, double *, int *,
 | 
			
		||||
		    double *, int *, double *, int *);
 | 
			
		||||
int BLASFUNC(xgerc)(int *,    int *, double *, double *, int *,
 | 
			
		||||
		    double *, int *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(sgemv)(char *, int *, int *, float  *, float  *, int *,
 | 
			
		||||
		    float  *, int *, float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(dgemv)(char *, int *, int *, double *, double *, int *,
 | 
			
		||||
		    double *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(qgemv)(char *, int *, int *, double *, double *, int *,
 | 
			
		||||
		    double *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(cgemv)(char *, int *, int *, float  *, float  *, int *,
 | 
			
		||||
		    float  *, int *, float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(zgemv)(char *, int *, int *, double *, double *, int *,
 | 
			
		||||
		    double *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(xgemv)(char *, int *, int *, double *, double *, int *,
 | 
			
		||||
		    double *, int *, double *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(strsv) (char *, char *, char *, int *, float  *, int *,
 | 
			
		||||
		     float  *, int *);
 | 
			
		||||
int BLASFUNC(dtrsv) (char *, char *, char *, int *, double *, int *,
 | 
			
		||||
		     double *, int *);
 | 
			
		||||
int BLASFUNC(qtrsv) (char *, char *, char *, int *, double *, int *,
 | 
			
		||||
		     double *, int *);
 | 
			
		||||
int BLASFUNC(ctrsv) (char *, char *, char *, int *, float  *, int *,
 | 
			
		||||
		     float  *, int *);
 | 
			
		||||
int BLASFUNC(ztrsv) (char *, char *, char *, int *, double *, int *,
 | 
			
		||||
		     double *, int *);
 | 
			
		||||
int BLASFUNC(xtrsv) (char *, char *, char *, int *, double *, int *,
 | 
			
		||||
		     double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(stpsv) (char *, char *, char *, int *, float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(dtpsv) (char *, char *, char *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(qtpsv) (char *, char *, char *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(ctpsv) (char *, char *, char *, int *, float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(ztpsv) (char *, char *, char *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(xtpsv) (char *, char *, char *, int *, double *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(strmv) (char *, char *, char *, int *, float  *, int *,
 | 
			
		||||
		     float  *, int *);
 | 
			
		||||
int BLASFUNC(dtrmv) (char *, char *, char *, int *, double *, int *,
 | 
			
		||||
		     double *, int *);
 | 
			
		||||
int BLASFUNC(qtrmv) (char *, char *, char *, int *, double *, int *,
 | 
			
		||||
		     double *, int *);
 | 
			
		||||
int BLASFUNC(ctrmv) (char *, char *, char *, int *, float  *, int *,
 | 
			
		||||
		     float  *, int *);
 | 
			
		||||
int BLASFUNC(ztrmv) (char *, char *, char *, int *, double *, int *,
 | 
			
		||||
		     double *, int *);
 | 
			
		||||
int BLASFUNC(xtrmv) (char *, char *, char *, int *, double *, int *,
 | 
			
		||||
		     double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(stpmv) (char *, char *, char *, int *, float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(dtpmv) (char *, char *, char *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(qtpmv) (char *, char *, char *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(ctpmv) (char *, char *, char *, int *, float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(ztpmv) (char *, char *, char *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(xtpmv) (char *, char *, char *, int *, double *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(stbmv) (char *, char *, char *, int *, int *, float  *, int *, float  *, int *);
 | 
			
		||||
int BLASFUNC(dtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
 | 
			
		||||
int BLASFUNC(qtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
 | 
			
		||||
int BLASFUNC(ctbmv) (char *, char *, char *, int *, int *, float  *, int *, float  *, int *);
 | 
			
		||||
int BLASFUNC(ztbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
 | 
			
		||||
int BLASFUNC(xtbmv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(stbsv) (char *, char *, char *, int *, int *, float  *, int *, float  *, int *);
 | 
			
		||||
int BLASFUNC(dtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
 | 
			
		||||
int BLASFUNC(qtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
 | 
			
		||||
int BLASFUNC(ctbsv) (char *, char *, char *, int *, int *, float  *, int *, float  *, int *);
 | 
			
		||||
int BLASFUNC(ztbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
 | 
			
		||||
int BLASFUNC(xtbsv) (char *, char *, char *, int *, int *, double *, int *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(ssymv) (char *, int *, float  *, float *, int *,
 | 
			
		||||
		     float  *, int *, float *, float *, int *);
 | 
			
		||||
int BLASFUNC(dsymv) (char *, int *, double  *, double *, int *,
 | 
			
		||||
		     double  *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(qsymv) (char *, int *, double  *, double *, int *,
 | 
			
		||||
		     double  *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(csymv) (char *, int *, float  *, float *, int *,
 | 
			
		||||
		     float  *, int *, float *, float *, int *);
 | 
			
		||||
int BLASFUNC(zsymv) (char *, int *, double  *, double *, int *,
 | 
			
		||||
		     double  *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(xsymv) (char *, int *, double  *, double *, int *,
 | 
			
		||||
		     double  *, int *, double *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(sspmv) (char *, int *, float  *, float *,
 | 
			
		||||
		     float  *, int *, float *, float *, int *);
 | 
			
		||||
int BLASFUNC(dspmv) (char *, int *, double  *, double *,
 | 
			
		||||
		     double  *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(qspmv) (char *, int *, double  *, double *,
 | 
			
		||||
		     double  *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(cspmv) (char *, int *, float  *, float *,
 | 
			
		||||
		     float  *, int *, float *, float *, int *);
 | 
			
		||||
int BLASFUNC(zspmv) (char *, int *, double  *, double *,
 | 
			
		||||
		     double  *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(xspmv) (char *, int *, double  *, double *,
 | 
			
		||||
		     double  *, int *, double *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(ssyr) (char *, int *, float   *, float  *, int *,
 | 
			
		||||
		    float  *, int *);
 | 
			
		||||
int BLASFUNC(dsyr) (char *, int *, double  *, double *, int *,
 | 
			
		||||
		    double *, int *);
 | 
			
		||||
int BLASFUNC(qsyr) (char *, int *, double  *, double *, int *,
 | 
			
		||||
		    double *, int *);
 | 
			
		||||
int BLASFUNC(csyr) (char *, int *, float   *, float  *, int *,
 | 
			
		||||
		    float  *, int *);
 | 
			
		||||
int BLASFUNC(zsyr) (char *, int *, double  *, double *, int *,
 | 
			
		||||
		    double *, int *);
 | 
			
		||||
int BLASFUNC(xsyr) (char *, int *, double  *, double *, int *,
 | 
			
		||||
		    double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(ssyr2) (char *, int *, float   *,
 | 
			
		||||
		     float  *, int *, float  *, int *, float  *, int *);
 | 
			
		||||
int BLASFUNC(dsyr2) (char *, int *, double  *,
 | 
			
		||||
		     double *, int *, double *, int *, double *, int *);
 | 
			
		||||
int BLASFUNC(qsyr2) (char *, int *, double  *,
 | 
			
		||||
		     double *, int *, double *, int *, double *, int *);
 | 
			
		||||
int BLASFUNC(csyr2) (char *, int *, float   *,
 | 
			
		||||
		     float  *, int *, float  *, int *, float  *, int *);
 | 
			
		||||
int BLASFUNC(zsyr2) (char *, int *, double  *,
 | 
			
		||||
		     double *, int *, double *, int *, double *, int *);
 | 
			
		||||
int BLASFUNC(xsyr2) (char *, int *, double  *,
 | 
			
		||||
		     double *, int *, double *, int *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(sspr) (char *, int *, float   *, float  *, int *,
 | 
			
		||||
		    float  *);
 | 
			
		||||
int BLASFUNC(dspr) (char *, int *, double  *, double *, int *,
 | 
			
		||||
		    double *);
 | 
			
		||||
int BLASFUNC(qspr) (char *, int *, double  *, double *, int *,
 | 
			
		||||
		    double *);
 | 
			
		||||
int BLASFUNC(cspr) (char *, int *, float   *, float  *, int *,
 | 
			
		||||
		    float  *);
 | 
			
		||||
int BLASFUNC(zspr) (char *, int *, double  *, double *, int *,
 | 
			
		||||
		    double *);
 | 
			
		||||
int BLASFUNC(xspr) (char *, int *, double  *, double *, int *,
 | 
			
		||||
		    double *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(sspr2) (char *, int *, float   *,
 | 
			
		||||
		     float  *, int *, float  *, int *, float  *);
 | 
			
		||||
int BLASFUNC(dspr2) (char *, int *, double  *,
 | 
			
		||||
		     double *, int *, double *, int *, double *);
 | 
			
		||||
int BLASFUNC(qspr2) (char *, int *, double  *,
 | 
			
		||||
		     double *, int *, double *, int *, double *);
 | 
			
		||||
int BLASFUNC(cspr2) (char *, int *, float   *,
 | 
			
		||||
		     float  *, int *, float  *, int *, float  *);
 | 
			
		||||
int BLASFUNC(zspr2) (char *, int *, double  *,
 | 
			
		||||
		     double *, int *, double *, int *, double *);
 | 
			
		||||
int BLASFUNC(xspr2) (char *, int *, double  *,
 | 
			
		||||
		     double *, int *, double *, int *, double *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(cher) (char *, int *, float   *, float  *, int *,
 | 
			
		||||
		    float  *, int *);
 | 
			
		||||
int BLASFUNC(zher) (char *, int *, double  *, double *, int *,
 | 
			
		||||
		    double *, int *);
 | 
			
		||||
int BLASFUNC(xher) (char *, int *, double  *, double *, int *,
 | 
			
		||||
		    double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(chpr) (char *, int *, float   *, float  *, int *, float  *);
 | 
			
		||||
int BLASFUNC(zhpr) (char *, int *, double  *, double *, int *, double *);
 | 
			
		||||
int BLASFUNC(xhpr) (char *, int *, double  *, double *, int *, double *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(cher2) (char *, int *, float   *,
 | 
			
		||||
		     float  *, int *, float  *, int *, float  *, int *);
 | 
			
		||||
int BLASFUNC(zher2) (char *, int *, double  *,
 | 
			
		||||
		     double *, int *, double *, int *, double *, int *);
 | 
			
		||||
int BLASFUNC(xher2) (char *, int *, double  *,
 | 
			
		||||
		     double *, int *, double *, int *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(chpr2) (char *, int *, float   *,
 | 
			
		||||
		     float  *, int *, float  *, int *, float  *);
 | 
			
		||||
int BLASFUNC(zhpr2) (char *, int *, double  *,
 | 
			
		||||
		     double *, int *, double *, int *, double *);
 | 
			
		||||
int BLASFUNC(xhpr2) (char *, int *, double  *,
 | 
			
		||||
		     double *, int *, double *, int *, double *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(chemv) (char *, int *, float  *, float *, int *,
 | 
			
		||||
		     float  *, int *, float *, float *, int *);
 | 
			
		||||
int BLASFUNC(zhemv) (char *, int *, double  *, double *, int *,
 | 
			
		||||
		     double  *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(xhemv) (char *, int *, double  *, double *, int *,
 | 
			
		||||
		     double  *, int *, double *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(chpmv) (char *, int *, float  *, float *,
 | 
			
		||||
		     float  *, int *, float *, float *, int *);
 | 
			
		||||
int BLASFUNC(zhpmv) (char *, int *, double  *, double *,
 | 
			
		||||
		     double  *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(xhpmv) (char *, int *, double  *, double *,
 | 
			
		||||
		     double  *, int *, double *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(snorm)(char *, int *, int *, float  *, int *);
 | 
			
		||||
int BLASFUNC(dnorm)(char *, int *, int *, double *, int *);
 | 
			
		||||
int BLASFUNC(cnorm)(char *, int *, int *, float  *, int *);
 | 
			
		||||
int BLASFUNC(znorm)(char *, int *, int *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(sgbmv)(char *, int *, int *, int *, int *, float  *, float  *, int *,
 | 
			
		||||
		    float  *, int *, float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(dgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
 | 
			
		||||
		    double *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(qgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
 | 
			
		||||
		    double *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(cgbmv)(char *, int *, int *, int *, int *, float  *, float  *, int *,
 | 
			
		||||
		    float  *, int *, float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(zgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
 | 
			
		||||
		    double *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(xgbmv)(char *, int *, int *, int *, int *, double *, double *, int *,
 | 
			
		||||
		    double *, int *, double *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(ssbmv)(char *, int *, int *, float  *, float  *, int *,
 | 
			
		||||
		    float  *, int *, float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(dsbmv)(char *, int *, int *, double *, double *, int *,
 | 
			
		||||
		    double *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(qsbmv)(char *, int *, int *, double *, double *, int *,
 | 
			
		||||
		    double *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(csbmv)(char *, int *, int *, float  *, float  *, int *,
 | 
			
		||||
		    float  *, int *, float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(zsbmv)(char *, int *, int *, double *, double *, int *,
 | 
			
		||||
		    double *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(xsbmv)(char *, int *, int *, double *, double *, int *,
 | 
			
		||||
		    double *, int *, double *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(chbmv)(char *, int *, int *, float  *, float  *, int *,
 | 
			
		||||
		    float  *, int *, float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(zhbmv)(char *, int *, int *, double *, double *, int *,
 | 
			
		||||
		    double *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(xhbmv)(char *, int *, int *, double *, double *, int *,
 | 
			
		||||
		    double *, int *, double *, double *, int *);
 | 
			
		||||
 | 
			
		||||
/* Level 3 routines */
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(sgemm)(char *, char *, int *, int *, int *, float *,
 | 
			
		||||
	   float  *, int *, float  *, int *, float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(dgemm)(char *, char *, int *, int *, int *, double *,
 | 
			
		||||
	   double *, int *, double *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(qgemm)(char *, char *, int *, int *, int *, double *,
 | 
			
		||||
	   double *, int *, double *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(cgemm)(char *, char *, int *, int *, int *, float *,
 | 
			
		||||
	   float  *, int *, float  *, int *, float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(zgemm)(char *, char *, int *, int *, int *, double *,
 | 
			
		||||
	   double *, int *, double *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(xgemm)(char *, char *, int *, int *, int *, double *,
 | 
			
		||||
	   double *, int *, double *, int *, double *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(cgemm3m)(char *, char *, int *, int *, int *, float *,
 | 
			
		||||
	   float  *, int *, float  *, int *, float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(zgemm3m)(char *, char *, int *, int *, int *, double *,
 | 
			
		||||
	   double *, int *, double *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(xgemm3m)(char *, char *, int *, int *, int *, double *,
 | 
			
		||||
	   double *, int *, double *, int *, double *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(sge2mm)(char *, char *, char *, int *, int *,
 | 
			
		||||
		     float *, float  *, int *, float  *, int *,
 | 
			
		||||
		     float *, float  *, int *);
 | 
			
		||||
int BLASFUNC(dge2mm)(char *, char *, char *, int *, int *,
 | 
			
		||||
		     double *, double  *, int *, double  *, int *,
 | 
			
		||||
		     double *, double  *, int *);
 | 
			
		||||
int BLASFUNC(cge2mm)(char *, char *, char *, int *, int *,
 | 
			
		||||
		     float *, float  *, int *, float  *, int *,
 | 
			
		||||
		     float *, float  *, int *);
 | 
			
		||||
int BLASFUNC(zge2mm)(char *, char *, char *, int *, int *,
 | 
			
		||||
		     double *, double  *, int *, double  *, int *,
 | 
			
		||||
		     double *, double  *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(strsm)(char *, char *, char *, char *, int *, int *,
 | 
			
		||||
	   float *,  float *, int *, float *, int *);
 | 
			
		||||
int BLASFUNC(dtrsm)(char *, char *, char *, char *, int *, int *,
 | 
			
		||||
	   double *,  double *, int *, double *, int *);
 | 
			
		||||
int BLASFUNC(qtrsm)(char *, char *, char *, char *, int *, int *,
 | 
			
		||||
	   double *,  double *, int *, double *, int *);
 | 
			
		||||
int BLASFUNC(ctrsm)(char *, char *, char *, char *, int *, int *,
 | 
			
		||||
	   float *,  float *, int *, float *, int *);
 | 
			
		||||
int BLASFUNC(ztrsm)(char *, char *, char *, char *, int *, int *,
 | 
			
		||||
	   double *,  double *, int *, double *, int *);
 | 
			
		||||
int BLASFUNC(xtrsm)(char *, char *, char *, char *, int *, int *,
 | 
			
		||||
	   double *,  double *, int *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(strmm)(char *, char *, char *, char *, int *, int *,
 | 
			
		||||
	   float *,  float *, int *, float *, int *);
 | 
			
		||||
int BLASFUNC(dtrmm)(char *, char *, char *, char *, int *, int *,
 | 
			
		||||
	   double *,  double *, int *, double *, int *);
 | 
			
		||||
int BLASFUNC(qtrmm)(char *, char *, char *, char *, int *, int *,
 | 
			
		||||
	   double *,  double *, int *, double *, int *);
 | 
			
		||||
int BLASFUNC(ctrmm)(char *, char *, char *, char *, int *, int *,
 | 
			
		||||
	   float *,  float *, int *, float *, int *);
 | 
			
		||||
int BLASFUNC(ztrmm)(char *, char *, char *, char *, int *, int *,
 | 
			
		||||
	   double *,  double *, int *, double *, int *);
 | 
			
		||||
int BLASFUNC(xtrmm)(char *, char *, char *, char *, int *, int *,
 | 
			
		||||
	   double *,  double *, int *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(ssymm)(char *, char *, int *, int *, float  *, float  *, int *,
 | 
			
		||||
	   float  *, int *, float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(dsymm)(char *, char *, int *, int *, double *, double *, int *,
 | 
			
		||||
	   double *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(qsymm)(char *, char *, int *, int *, double *, double *, int *,
 | 
			
		||||
	   double *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(csymm)(char *, char *, int *, int *, float  *, float  *, int *,
 | 
			
		||||
	   float  *, int *, float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(zsymm)(char *, char *, int *, int *, double *, double *, int *,
 | 
			
		||||
	   double *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(xsymm)(char *, char *, int *, int *, double *, double *, int *,
 | 
			
		||||
	   double *, int *, double *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(csymm3m)(char *, char *, int *, int *, float  *, float  *, int *,
 | 
			
		||||
	   float  *, int *, float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(zsymm3m)(char *, char *, int *, int *, double *, double *, int *,
 | 
			
		||||
	   double *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(xsymm3m)(char *, char *, int *, int *, double *, double *, int *,
 | 
			
		||||
	   double *, int *, double *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(ssyrk)(char *, char *, int *, int *, float  *, float  *, int *,
 | 
			
		||||
	   float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(dsyrk)(char *, char *, int *, int *, double *, double *, int *,
 | 
			
		||||
	   double *, double *, int *);
 | 
			
		||||
int BLASFUNC(qsyrk)(char *, char *, int *, int *, double *, double *, int *,
 | 
			
		||||
	   double *, double *, int *);
 | 
			
		||||
int BLASFUNC(csyrk)(char *, char *, int *, int *, float  *, float  *, int *,
 | 
			
		||||
	   float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(zsyrk)(char *, char *, int *, int *, double *, double *, int *,
 | 
			
		||||
	   double *, double *, int *);
 | 
			
		||||
int BLASFUNC(xsyrk)(char *, char *, int *, int *, double *, double *, int *,
 | 
			
		||||
	   double *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(ssyr2k)(char *, char *, int *, int *, float  *, float  *, int *,
 | 
			
		||||
	   float *, int *, float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(dsyr2k)(char *, char *, int *, int *, double *, double *, int *,
 | 
			
		||||
	   double*, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(qsyr2k)(char *, char *, int *, int *, double *, double *, int *,
 | 
			
		||||
	   double*, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(csyr2k)(char *, char *, int *, int *, float  *, float  *, int *,
 | 
			
		||||
	   float *, int *, float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(zsyr2k)(char *, char *, int *, int *, double *, double *, int *,
 | 
			
		||||
	   double*, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(xsyr2k)(char *, char *, int *, int *, double *, double *, int *,
 | 
			
		||||
	   double*, int *, double *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(chemm)(char *, char *, int *, int *, float  *, float  *, int *,
 | 
			
		||||
	   float  *, int *, float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(zhemm)(char *, char *, int *, int *, double *, double *, int *,
 | 
			
		||||
	   double *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(xhemm)(char *, char *, int *, int *, double *, double *, int *,
 | 
			
		||||
	   double *, int *, double *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(chemm3m)(char *, char *, int *, int *, float  *, float  *, int *,
 | 
			
		||||
	   float  *, int *, float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(zhemm3m)(char *, char *, int *, int *, double *, double *, int *,
 | 
			
		||||
	   double *, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(xhemm3m)(char *, char *, int *, int *, double *, double *, int *,
 | 
			
		||||
	   double *, int *, double *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(cherk)(char *, char *, int *, int *, float  *, float  *, int *,
 | 
			
		||||
	   float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(zherk)(char *, char *, int *, int *, double *, double *, int *,
 | 
			
		||||
	   double *, double *, int *);
 | 
			
		||||
int BLASFUNC(xherk)(char *, char *, int *, int *, double *, double *, int *,
 | 
			
		||||
	   double *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(cher2k)(char *, char *, int *, int *, float  *, float  *, int *,
 | 
			
		||||
	   float *, int *, float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(zher2k)(char *, char *, int *, int *, double *, double *, int *,
 | 
			
		||||
	   double*, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(xher2k)(char *, char *, int *, int *, double *, double *, int *,
 | 
			
		||||
	   double*, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(cher2m)(char *, char *, char *, int *, int *, float  *, float  *, int *,
 | 
			
		||||
	   float *, int *, float  *, float  *, int *);
 | 
			
		||||
int BLASFUNC(zher2m)(char *, char *, char *, int *, int *, double *, double *, int *,
 | 
			
		||||
	   double*, int *, double *, double *, int *);
 | 
			
		||||
int BLASFUNC(xher2m)(char *, char *, char *, int *, int *, double *, double *, int *,
 | 
			
		||||
	   double*, int *, double *, double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(sgemt)(char *, int *, int *, float  *, float  *, int *,
 | 
			
		||||
		    float  *, int *);
 | 
			
		||||
int BLASFUNC(dgemt)(char *, int *, int *, double *, double *, int *,
 | 
			
		||||
		    double *, int *);
 | 
			
		||||
int BLASFUNC(cgemt)(char *, int *, int *, float  *, float  *, int *,
 | 
			
		||||
		    float  *, int *);
 | 
			
		||||
int BLASFUNC(zgemt)(char *, int *, int *, double *, double *, int *,
 | 
			
		||||
		    double *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(sgema)(char *, char *, int *, int *, float  *,
 | 
			
		||||
		    float  *, int *, float *, float  *, int *, float *, int *);
 | 
			
		||||
int BLASFUNC(dgema)(char *, char *, int *, int *, double *,
 | 
			
		||||
		    double *, int *, double*, double *, int *, double*, int *);
 | 
			
		||||
int BLASFUNC(cgema)(char *, char *, int *, int *, float  *,
 | 
			
		||||
		    float  *, int *, float *, float  *, int *, float *, int *);
 | 
			
		||||
int BLASFUNC(zgema)(char *, char *, int *, int *, double *,
 | 
			
		||||
		    double *, int *, double*, double *, int *, double*, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(sgems)(char *, char *, int *, int *, float  *,
 | 
			
		||||
		    float  *, int *, float *, float  *, int *, float *, int *);
 | 
			
		||||
int BLASFUNC(dgems)(char *, char *, int *, int *, double *,
 | 
			
		||||
		    double *, int *, double*, double *, int *, double*, int *);
 | 
			
		||||
int BLASFUNC(cgems)(char *, char *, int *, int *, float  *,
 | 
			
		||||
		    float  *, int *, float *, float  *, int *, float *, int *);
 | 
			
		||||
int BLASFUNC(zgems)(char *, char *, int *, int *, double *,
 | 
			
		||||
		    double *, int *, double*, double *, int *, double*, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(sgetf2)(int *, int *, float  *, int *, int *, int *);
 | 
			
		||||
int BLASFUNC(dgetf2)(int *, int *, double *, int *, int *, int *);
 | 
			
		||||
int BLASFUNC(qgetf2)(int *, int *, double *, int *, int *, int *);
 | 
			
		||||
int BLASFUNC(cgetf2)(int *, int *, float  *, int *, int *, int *);
 | 
			
		||||
int BLASFUNC(zgetf2)(int *, int *, double *, int *, int *, int *);
 | 
			
		||||
int BLASFUNC(xgetf2)(int *, int *, double *, int *, int *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(sgetrf)(int *, int *, float  *, int *, int *, int *);
 | 
			
		||||
int BLASFUNC(dgetrf)(int *, int *, double *, int *, int *, int *);
 | 
			
		||||
int BLASFUNC(qgetrf)(int *, int *, double *, int *, int *, int *);
 | 
			
		||||
int BLASFUNC(cgetrf)(int *, int *, float  *, int *, int *, int *);
 | 
			
		||||
int BLASFUNC(zgetrf)(int *, int *, double *, int *, int *, int *);
 | 
			
		||||
int BLASFUNC(xgetrf)(int *, int *, double *, int *, int *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(slaswp)(int *, float  *, int *, int *, int *, int *, int *);
 | 
			
		||||
int BLASFUNC(dlaswp)(int *, double *, int *, int *, int *, int *, int *);
 | 
			
		||||
int BLASFUNC(qlaswp)(int *, double *, int *, int *, int *, int *, int *);
 | 
			
		||||
int BLASFUNC(claswp)(int *, float  *, int *, int *, int *, int *, int *);
 | 
			
		||||
int BLASFUNC(zlaswp)(int *, double *, int *, int *, int *, int *, int *);
 | 
			
		||||
int BLASFUNC(xlaswp)(int *, double *, int *, int *, int *, int *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(sgetrs)(char *, int *, int *, float  *, int *, int *, float  *, int *, int *);
 | 
			
		||||
int BLASFUNC(dgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
 | 
			
		||||
int BLASFUNC(qgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
 | 
			
		||||
int BLASFUNC(cgetrs)(char *, int *, int *, float  *, int *, int *, float  *, int *, int *);
 | 
			
		||||
int BLASFUNC(zgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
 | 
			
		||||
int BLASFUNC(xgetrs)(char *, int *, int *, double *, int *, int *, double *, int *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(sgesv)(int *, int *, float  *, int *, int *, float *, int *, int *);
 | 
			
		||||
int BLASFUNC(dgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
 | 
			
		||||
int BLASFUNC(qgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
 | 
			
		||||
int BLASFUNC(cgesv)(int *, int *, float  *, int *, int *, float *, int *, int *);
 | 
			
		||||
int BLASFUNC(zgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
 | 
			
		||||
int BLASFUNC(xgesv)(int *, int *, double *, int *, int *, double*, int *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(spotf2)(char *, int *, float  *, int *, int *);
 | 
			
		||||
int BLASFUNC(dpotf2)(char *, int *, double *, int *, int *);
 | 
			
		||||
int BLASFUNC(qpotf2)(char *, int *, double *, int *, int *);
 | 
			
		||||
int BLASFUNC(cpotf2)(char *, int *, float  *, int *, int *);
 | 
			
		||||
int BLASFUNC(zpotf2)(char *, int *, double *, int *, int *);
 | 
			
		||||
int BLASFUNC(xpotf2)(char *, int *, double *, int *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(spotrf)(char *, int *, float  *, int *, int *);
 | 
			
		||||
int BLASFUNC(dpotrf)(char *, int *, double *, int *, int *);
 | 
			
		||||
int BLASFUNC(qpotrf)(char *, int *, double *, int *, int *);
 | 
			
		||||
int BLASFUNC(cpotrf)(char *, int *, float  *, int *, int *);
 | 
			
		||||
int BLASFUNC(zpotrf)(char *, int *, double *, int *, int *);
 | 
			
		||||
int BLASFUNC(xpotrf)(char *, int *, double *, int *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(slauu2)(char *, int *, float  *, int *, int *);
 | 
			
		||||
int BLASFUNC(dlauu2)(char *, int *, double *, int *, int *);
 | 
			
		||||
int BLASFUNC(qlauu2)(char *, int *, double *, int *, int *);
 | 
			
		||||
int BLASFUNC(clauu2)(char *, int *, float  *, int *, int *);
 | 
			
		||||
int BLASFUNC(zlauu2)(char *, int *, double *, int *, int *);
 | 
			
		||||
int BLASFUNC(xlauu2)(char *, int *, double *, int *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(slauum)(char *, int *, float  *, int *, int *);
 | 
			
		||||
int BLASFUNC(dlauum)(char *, int *, double *, int *, int *);
 | 
			
		||||
int BLASFUNC(qlauum)(char *, int *, double *, int *, int *);
 | 
			
		||||
int BLASFUNC(clauum)(char *, int *, float  *, int *, int *);
 | 
			
		||||
int BLASFUNC(zlauum)(char *, int *, double *, int *, int *);
 | 
			
		||||
int BLASFUNC(xlauum)(char *, int *, double *, int *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(strti2)(char *, char *, int *, float  *, int *, int *);
 | 
			
		||||
int BLASFUNC(dtrti2)(char *, char *, int *, double *, int *, int *);
 | 
			
		||||
int BLASFUNC(qtrti2)(char *, char *, int *, double *, int *, int *);
 | 
			
		||||
int BLASFUNC(ctrti2)(char *, char *, int *, float  *, int *, int *);
 | 
			
		||||
int BLASFUNC(ztrti2)(char *, char *, int *, double *, int *, int *);
 | 
			
		||||
int BLASFUNC(xtrti2)(char *, char *, int *, double *, int *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(strtri)(char *, char *, int *, float  *, int *, int *);
 | 
			
		||||
int BLASFUNC(dtrtri)(char *, char *, int *, double *, int *, int *);
 | 
			
		||||
int BLASFUNC(qtrtri)(char *, char *, int *, double *, int *, int *);
 | 
			
		||||
int BLASFUNC(ctrtri)(char *, char *, int *, float  *, int *, int *);
 | 
			
		||||
int BLASFUNC(ztrtri)(char *, char *, int *, double *, int *, int *);
 | 
			
		||||
int BLASFUNC(xtrtri)(char *, char *, int *, double *, int *, int *);
 | 
			
		||||
 | 
			
		||||
int BLASFUNC(spotri)(char *, int *, float  *, int *, int *);
 | 
			
		||||
int BLASFUNC(dpotri)(char *, int *, double *, int *, int *);
 | 
			
		||||
int BLASFUNC(qpotri)(char *, int *, double *, int *, int *);
 | 
			
		||||
int BLASFUNC(cpotri)(char *, int *, float  *, int *, int *);
 | 
			
		||||
int BLASFUNC(zpotri)(char *, int *, double *, int *, int *);
 | 
			
		||||
int BLASFUNC(xpotri)(char *, int *, double *, int *, int *);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										83
									
								
								cs440-acg/ext/eigen/bench/btl/libs/BLAS/blas_interface.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										83
									
								
								cs440-acg/ext/eigen/bench/btl/libs/BLAS/blas_interface.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,83 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  blas_interface.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:28 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef blas_PRODUIT_MATRICE_VECTEUR_HH
 | 
			
		||||
#define blas_PRODUIT_MATRICE_VECTEUR_HH
 | 
			
		||||
 | 
			
		||||
#include <c_interface_base.h>
 | 
			
		||||
#include <complex>
 | 
			
		||||
extern "C"
 | 
			
		||||
{
 | 
			
		||||
#include "blas.h"
 | 
			
		||||
 | 
			
		||||
  // Cholesky Factorization
 | 
			
		||||
//   void spotrf_(const char* uplo, const int* n, float *a, const int* ld, int* info);
 | 
			
		||||
//   void dpotrf_(const char* uplo, const int* n, double *a, const int* ld, int* info);
 | 
			
		||||
  void ssytrd_(char *uplo, const int *n, float *a, const int *lda, float *d, float *e, float *tau, float *work, int *lwork, int *info );
 | 
			
		||||
  void dsytrd_(char *uplo, const int *n, double *a, const int *lda, double *d, double *e, double *tau, double *work, int *lwork, int *info );
 | 
			
		||||
  void sgehrd_( const int *n, int *ilo, int *ihi, float *a, const int *lda, float *tau, float *work, int *lwork, int *info );
 | 
			
		||||
  void dgehrd_( const int *n, int *ilo, int *ihi, double *a, const int *lda, double *tau, double *work, int *lwork, int *info );
 | 
			
		||||
 | 
			
		||||
  // LU row pivoting
 | 
			
		||||
//   void dgetrf_( int *m, int *n, double *a, int *lda, int *ipiv, int *info );
 | 
			
		||||
//   void sgetrf_(const int* m, const int* n, float *a, const int* ld, int* ipivot, int* info);
 | 
			
		||||
  // LU full pivoting
 | 
			
		||||
  void sgetc2_(const int* n, float *a, const int *lda, int *ipiv, int *jpiv, int*info );
 | 
			
		||||
  void dgetc2_(const int* n, double *a, const int *lda, int *ipiv, int *jpiv, int*info );
 | 
			
		||||
#ifdef HAS_LAPACK
 | 
			
		||||
#endif
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#define MAKE_STRING2(S) #S
 | 
			
		||||
#define MAKE_STRING(S) MAKE_STRING2(S)
 | 
			
		||||
 | 
			
		||||
#define CAT2(A,B) A##B
 | 
			
		||||
#define CAT(A,B) CAT2(A,B)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
template<class real> class blas_interface;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
static char notrans = 'N';
 | 
			
		||||
static char trans = 'T';
 | 
			
		||||
static char nonunit = 'N';
 | 
			
		||||
static char lower = 'L';
 | 
			
		||||
static char right = 'R';
 | 
			
		||||
static char left = 'L';
 | 
			
		||||
static int intone = 1;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define SCALAR        float
 | 
			
		||||
#define SCALAR_PREFIX s
 | 
			
		||||
#include "blas_interface_impl.hh"
 | 
			
		||||
#undef SCALAR
 | 
			
		||||
#undef SCALAR_PREFIX
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define SCALAR        double
 | 
			
		||||
#define SCALAR_PREFIX d
 | 
			
		||||
#include "blas_interface_impl.hh"
 | 
			
		||||
#undef SCALAR
 | 
			
		||||
#undef SCALAR_PREFIX
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										147
									
								
								cs440-acg/ext/eigen/bench/btl/libs/BLAS/blas_interface_impl.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										147
									
								
								cs440-acg/ext/eigen/bench/btl/libs/BLAS/blas_interface_impl.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,147 @@
 | 
			
		||||
 | 
			
		||||
#define BLAS_FUNC(NAME) CAT(CAT(SCALAR_PREFIX,NAME),_)
 | 
			
		||||
 | 
			
		||||
template<> class blas_interface<SCALAR> : public c_interface_base<SCALAR>
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
  
 | 
			
		||||
  static SCALAR fone;
 | 
			
		||||
  static SCALAR fzero;
 | 
			
		||||
 | 
			
		||||
  static inline std::string name()
 | 
			
		||||
  {
 | 
			
		||||
    return MAKE_STRING(CBLASNAME);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
 | 
			
		||||
    BLAS_FUNC(gemv)(¬rans,&N,&N,&fone,A,&N,B,&intone,&fzero,X,&intone);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void symv(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
 | 
			
		||||
    BLAS_FUNC(symv)(&lower, &N,&fone,A,&N,B,&intone,&fzero,X,&intone);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
 | 
			
		||||
    BLAS_FUNC(syr2)(&lower,&N,&fone,B,&intone,X,&intone,A,&N);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void ger(gene_matrix & A, gene_vector & X, gene_vector & Y, int N){
 | 
			
		||||
    BLAS_FUNC(ger)(&N,&N,&fone,X,&intone,Y,&intone,A,&N);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void rot(gene_vector & A,  gene_vector & B, SCALAR c, SCALAR s, int N){
 | 
			
		||||
    BLAS_FUNC(rot)(&N,A,&intone,B,&intone,&c,&s);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
 | 
			
		||||
    BLAS_FUNC(gemv)(&trans,&N,&N,&fone,A,&N,B,&intone,&fzero,X,&intone);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
 | 
			
		||||
    BLAS_FUNC(gemm)(¬rans,¬rans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void transposed_matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N){
 | 
			
		||||
    BLAS_FUNC(gemm)(¬rans,¬rans,&N,&N,&N,&fone,A,&N,B,&N,&fzero,X,&N);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
//   static inline void ata_product(gene_matrix & A, gene_matrix & X, int N){
 | 
			
		||||
//     ssyrk_(&lower,&trans,&N,&N,&fone,A,&N,&fzero,X,&N);
 | 
			
		||||
//   }
 | 
			
		||||
 | 
			
		||||
  static inline void aat_product(gene_matrix & A, gene_matrix & X, int N){
 | 
			
		||||
    BLAS_FUNC(syrk)(&lower,¬rans,&N,&N,&fone,A,&N,&fzero,X,&N);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void axpy(SCALAR coef, const gene_vector & X, gene_vector & Y, int N){
 | 
			
		||||
    BLAS_FUNC(axpy)(&N,&coef,X,&intone,Y,&intone);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void axpby(SCALAR a, const gene_vector & X, SCALAR b, gene_vector & Y, int N){
 | 
			
		||||
    BLAS_FUNC(scal)(&N,&b,Y,&intone);
 | 
			
		||||
    BLAS_FUNC(axpy)(&N,&a,X,&intone,Y,&intone);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
 | 
			
		||||
    int N2 = N*N;
 | 
			
		||||
    BLAS_FUNC(copy)(&N2, X, &intone, C, &intone);
 | 
			
		||||
    char uplo = 'L';
 | 
			
		||||
    int info = 0;
 | 
			
		||||
    BLAS_FUNC(potrf)(&uplo, &N, C, &N, &info);
 | 
			
		||||
    if(info!=0) std::cerr << "potrf_ error " << info << "\n";
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void partial_lu_decomp(const gene_matrix & X, gene_matrix & C, int N){
 | 
			
		||||
    int N2 = N*N;
 | 
			
		||||
    BLAS_FUNC(copy)(&N2, X, &intone, C, &intone);
 | 
			
		||||
    int info = 0;
 | 
			
		||||
    int * ipiv = (int*)alloca(sizeof(int)*N);
 | 
			
		||||
    BLAS_FUNC(getrf)(&N, &N, C, &N, ipiv, &info);
 | 
			
		||||
    if(info!=0) std::cerr << "getrf_ error " << info << "\n";
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
 | 
			
		||||
    BLAS_FUNC(copy)(&N, B, &intone, X, &intone);
 | 
			
		||||
    BLAS_FUNC(trsv)(&lower, ¬rans, &nonunit, &N, L, &N, X, &intone);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void trisolve_lower_matrix(const gene_matrix & L, const gene_matrix& B, gene_matrix & X, int N){
 | 
			
		||||
    BLAS_FUNC(copy)(&N, B, &intone, X, &intone);
 | 
			
		||||
    BLAS_FUNC(trsm)(&right, &lower, ¬rans, &nonunit, &N, &N, &fone, L, &N, X, &N);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void trmm(gene_matrix & A, gene_matrix & B, gene_matrix & /*X*/, int N){
 | 
			
		||||
    BLAS_FUNC(trmm)(&left, &lower, ¬rans,&nonunit, &N,&N,&fone,A,&N,B,&N);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  #ifdef HAS_LAPACK
 | 
			
		||||
 | 
			
		||||
  static inline void lu_decomp(const gene_matrix & X, gene_matrix & C, int N){
 | 
			
		||||
    int N2 = N*N;
 | 
			
		||||
    BLAS_FUNC(copy)(&N2, X, &intone, C, &intone);
 | 
			
		||||
    int info = 0;
 | 
			
		||||
    int * ipiv = (int*)alloca(sizeof(int)*N);
 | 
			
		||||
    int * jpiv = (int*)alloca(sizeof(int)*N);
 | 
			
		||||
    BLAS_FUNC(getc2)(&N, C, &N, ipiv, jpiv, &info);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  static inline void hessenberg(const gene_matrix & X, gene_matrix & C, int N){
 | 
			
		||||
    {
 | 
			
		||||
      int N2 = N*N;
 | 
			
		||||
      int inc = 1;
 | 
			
		||||
      BLAS_FUNC(copy)(&N2, X, &inc, C, &inc);
 | 
			
		||||
    }
 | 
			
		||||
    int info = 0;
 | 
			
		||||
    int ilo = 1;
 | 
			
		||||
    int ihi = N;
 | 
			
		||||
    int bsize = 64;
 | 
			
		||||
    int worksize = N*bsize;
 | 
			
		||||
    SCALAR* d = new SCALAR[N+worksize];
 | 
			
		||||
    BLAS_FUNC(gehrd)(&N, &ilo, &ihi, C, &N, d, d+N, &worksize, &info);
 | 
			
		||||
    delete[] d;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void tridiagonalization(const gene_matrix & X, gene_matrix & C, int N){
 | 
			
		||||
    {
 | 
			
		||||
      int N2 = N*N;
 | 
			
		||||
      int inc = 1;
 | 
			
		||||
      BLAS_FUNC(copy)(&N2, X, &inc, C, &inc);
 | 
			
		||||
    }
 | 
			
		||||
    char uplo = 'U';
 | 
			
		||||
    int info = 0;
 | 
			
		||||
    int bsize = 64;
 | 
			
		||||
    int worksize = N*bsize;
 | 
			
		||||
    SCALAR* d = new SCALAR[3*N+worksize];
 | 
			
		||||
    BLAS_FUNC(sytrd)(&uplo, &N, C, &N, d, d+N, d+2*N, d+3*N, &worksize, &info);
 | 
			
		||||
    delete[] d;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  #endif // HAS_LAPACK
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
SCALAR blas_interface<SCALAR>::fone = SCALAR(1);
 | 
			
		||||
SCALAR blas_interface<SCALAR>::fzero = SCALAR(0);
 | 
			
		||||
							
								
								
									
										73
									
								
								cs440-acg/ext/eigen/bench/btl/libs/BLAS/c_interface_base.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								cs440-acg/ext/eigen/bench/btl/libs/BLAS/c_interface_base.h
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,73 @@
 | 
			
		||||
 | 
			
		||||
#ifndef BTL_C_INTERFACE_BASE_H
 | 
			
		||||
#define BTL_C_INTERFACE_BASE_H
 | 
			
		||||
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
template<class real> class c_interface_base
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
 | 
			
		||||
  typedef real                      real_type;
 | 
			
		||||
  typedef std::vector<real>         stl_vector;
 | 
			
		||||
  typedef std::vector<stl_vector >  stl_matrix;
 | 
			
		||||
 | 
			
		||||
  typedef real* gene_matrix;
 | 
			
		||||
  typedef real* gene_vector;
 | 
			
		||||
 | 
			
		||||
  static void free_matrix(gene_matrix & A, int /*N*/){
 | 
			
		||||
    delete[] A;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static void free_vector(gene_vector & B){
 | 
			
		||||
    delete[] B;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
 | 
			
		||||
    int N = A_stl.size();
 | 
			
		||||
    A = new real[N*N];
 | 
			
		||||
    for (int j=0;j<N;j++)
 | 
			
		||||
      for (int i=0;i<N;i++)
 | 
			
		||||
        A[i+N*j] = A_stl[j][i];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
 | 
			
		||||
    int N = B_stl.size();
 | 
			
		||||
    B = new real[N];
 | 
			
		||||
    for (int i=0;i<N;i++)
 | 
			
		||||
      B[i] = B_stl[i];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
 | 
			
		||||
    int N = B_stl.size();
 | 
			
		||||
    for (int i=0;i<N;i++)
 | 
			
		||||
      B_stl[i] = B[i];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
 | 
			
		||||
    int N = A_stl.size();
 | 
			
		||||
    for (int j=0;j<N;j++){
 | 
			
		||||
      A_stl[j].resize(N);
 | 
			
		||||
      for (int i=0;i<N;i++)
 | 
			
		||||
        A_stl[j][i] = A[i+N*j];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
 | 
			
		||||
    for (int i=0;i<N;i++)
 | 
			
		||||
      cible[i]=source[i];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
 | 
			
		||||
    for (int j=0;j<N;j++){
 | 
			
		||||
      for (int i=0;i<N;i++){
 | 
			
		||||
        cible[i+N*j] = source[i+N*j];
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										73
									
								
								cs440-acg/ext/eigen/bench/btl/libs/BLAS/main.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								cs440-acg/ext/eigen/bench/btl/libs/BLAS/main.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,73 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  main.cpp
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:28 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "blas_interface.hh"
 | 
			
		||||
#include "bench.hh"
 | 
			
		||||
#include "basic_actions.hh"
 | 
			
		||||
 | 
			
		||||
#include "action_cholesky.hh"
 | 
			
		||||
#include "action_lu_decomp.hh"
 | 
			
		||||
#include "action_partial_lu.hh"
 | 
			
		||||
#include "action_trisolve_matrix.hh"
 | 
			
		||||
 | 
			
		||||
#ifdef HAS_LAPACK
 | 
			
		||||
#include "action_hessenberg.hh"
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
BTL_MAIN;
 | 
			
		||||
 | 
			
		||||
int main()
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
  bench<Action_axpy<blas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
 | 
			
		||||
  bench<Action_axpby<blas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
 | 
			
		||||
 | 
			
		||||
  bench<Action_matrix_vector_product<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
 | 
			
		||||
  bench<Action_atv_product<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
 | 
			
		||||
  bench<Action_symv<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
 | 
			
		||||
  bench<Action_syr2<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
 | 
			
		||||
 | 
			
		||||
  bench<Action_ger<blas_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
 | 
			
		||||
  bench<Action_rot<blas_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
 | 
			
		||||
 | 
			
		||||
  bench<Action_matrix_matrix_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
 | 
			
		||||
//   bench<Action_ata_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
 | 
			
		||||
  bench<Action_aat_product<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
 | 
			
		||||
 | 
			
		||||
  bench<Action_trisolve<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
 | 
			
		||||
  bench<Action_trisolve_matrix<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
 | 
			
		||||
 | 
			
		||||
  bench<Action_trmm<blas_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
 | 
			
		||||
 | 
			
		||||
  bench<Action_cholesky<blas_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
 | 
			
		||||
  bench<Action_partial_lu<blas_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
 | 
			
		||||
 | 
			
		||||
  #ifdef HAS_LAPACK
 | 
			
		||||
//   bench<Action_lu_decomp<blas_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
 | 
			
		||||
  bench<Action_hessenberg<blas_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
 | 
			
		||||
  bench<Action_tridiagonalization<blas_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
 | 
			
		||||
  #endif
 | 
			
		||||
 | 
			
		||||
  //bench<Action_lu_solve<blas_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										2
									
								
								cs440-acg/ext/eigen/bench/btl/libs/STL/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								cs440-acg/ext/eigen/bench/btl/libs/STL/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,2 @@
 | 
			
		||||
 | 
			
		||||
btl_add_bench(btl_STL main.cpp OFF)
 | 
			
		||||
							
								
								
									
										244
									
								
								cs440-acg/ext/eigen/bench/btl/libs/STL/STL_interface.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										244
									
								
								cs440-acg/ext/eigen/bench/btl/libs/STL/STL_interface.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,244 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  STL_interface.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:24 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef STL_INTERFACE_HH
 | 
			
		||||
#define STL_INTERFACE_HH
 | 
			
		||||
#include <string>
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
 | 
			
		||||
template<class real>
 | 
			
		||||
class STL_interface{
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  typedef real real_type ;
 | 
			
		||||
 | 
			
		||||
  typedef std::vector<real>  stl_vector;
 | 
			
		||||
  typedef std::vector<stl_vector > stl_matrix;
 | 
			
		||||
 | 
			
		||||
  typedef stl_matrix gene_matrix;
 | 
			
		||||
 | 
			
		||||
  typedef stl_vector gene_vector;
 | 
			
		||||
 | 
			
		||||
  static inline std::string name( void )
 | 
			
		||||
  {
 | 
			
		||||
    return "STL";
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static void free_matrix(gene_matrix & /*A*/, int /*N*/){}
 | 
			
		||||
 | 
			
		||||
  static void free_vector(gene_vector & /*B*/){}
 | 
			
		||||
 | 
			
		||||
  static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
 | 
			
		||||
    A = A_stl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
 | 
			
		||||
    B = B_stl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
 | 
			
		||||
    B_stl = B ;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
 | 
			
		||||
    A_stl = A ;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
 | 
			
		||||
    for (int i=0;i<N;i++){
 | 
			
		||||
      cible[i]=source[i];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
 | 
			
		||||
    for (int i=0;i<N;i++)
 | 
			
		||||
      for (int j=0;j<N;j++)
 | 
			
		||||
        cible[i][j]=source[i][j];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
//   static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N)
 | 
			
		||||
//   {
 | 
			
		||||
//     real somme;
 | 
			
		||||
//     for (int j=0;j<N;j++){
 | 
			
		||||
//       for (int i=0;i<N;i++){
 | 
			
		||||
//         somme=0.0;
 | 
			
		||||
//         for (int k=0;k<N;k++)
 | 
			
		||||
//           somme += A[i][k]*A[j][k];
 | 
			
		||||
//         X[j][i]=somme;
 | 
			
		||||
//       }
 | 
			
		||||
//     }
 | 
			
		||||
//   }
 | 
			
		||||
 | 
			
		||||
  static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N)
 | 
			
		||||
  {
 | 
			
		||||
    real somme;
 | 
			
		||||
    for (int j=0;j<N;j++){
 | 
			
		||||
      for (int i=0;i<N;i++){
 | 
			
		||||
        somme=0.0;
 | 
			
		||||
        if(i>=j)
 | 
			
		||||
        {
 | 
			
		||||
          for (int k=0;k<N;k++){
 | 
			
		||||
            somme+=A[k][i]*A[k][j];
 | 
			
		||||
          }
 | 
			
		||||
          X[j][i]=somme;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N)
 | 
			
		||||
  {
 | 
			
		||||
    real somme;
 | 
			
		||||
    for (int j=0;j<N;j++){
 | 
			
		||||
      for (int i=0;i<N;i++){
 | 
			
		||||
        somme=0.0;
 | 
			
		||||
        for (int k=0;k<N;k++)
 | 
			
		||||
          somme+=A[k][i]*B[j][k];
 | 
			
		||||
        X[j][i]=somme;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
 | 
			
		||||
  {
 | 
			
		||||
    real somme;
 | 
			
		||||
    for (int i=0;i<N;i++){
 | 
			
		||||
      somme=0.0;
 | 
			
		||||
      for (int j=0;j<N;j++)
 | 
			
		||||
        somme+=A[j][i]*B[j];
 | 
			
		||||
      X[i]=somme;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void symv(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
 | 
			
		||||
  {
 | 
			
		||||
    for (int j=0; j<N; ++j)
 | 
			
		||||
      X[j] = 0;
 | 
			
		||||
    for (int j=0; j<N; ++j)
 | 
			
		||||
    {
 | 
			
		||||
      real t1 = B[j];
 | 
			
		||||
      real t2 = 0;
 | 
			
		||||
      X[j] += t1 * A[j][j];
 | 
			
		||||
      for (int i=j+1; i<N; ++i) {
 | 
			
		||||
        X[i] += t1 * A[j][i];
 | 
			
		||||
        t2 += A[j][i] * B[i];
 | 
			
		||||
      }
 | 
			
		||||
      X[j] += t2;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  static inline void syr2(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
 | 
			
		||||
  {
 | 
			
		||||
    for (int j=0; j<N; ++j)
 | 
			
		||||
    {
 | 
			
		||||
      for (int i=j; i<N; ++i)
 | 
			
		||||
        A[j][i] += B[i]*X[j] + B[j]*X[i];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void ger(gene_matrix & A, gene_vector & X, gene_vector & Y, int N)
 | 
			
		||||
  {
 | 
			
		||||
    for (int j=0; j<N; ++j)
 | 
			
		||||
    {
 | 
			
		||||
      for (int i=j; i<N; ++i)
 | 
			
		||||
        A[j][i] += X[i]*Y[j];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
 | 
			
		||||
  {
 | 
			
		||||
    real somme;
 | 
			
		||||
    for (int i=0;i<N;i++){
 | 
			
		||||
      somme = 0.0;
 | 
			
		||||
      for (int j=0;j<N;j++)
 | 
			
		||||
        somme += A[i][j]*B[j];
 | 
			
		||||
      X[i] = somme;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N){
 | 
			
		||||
    for (int i=0;i<N;i++)
 | 
			
		||||
      Y[i]+=coef*X[i];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
 | 
			
		||||
    for (int i=0;i<N;i++)
 | 
			
		||||
      Y[i] = a*X[i] + b*Y[i];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void trisolve_lower(const gene_matrix & L, const gene_vector & B, gene_vector & X, int N){
 | 
			
		||||
    copy_vector(B,X,N);
 | 
			
		||||
    for(int i=0; i<N; ++i)
 | 
			
		||||
    {
 | 
			
		||||
      X[i] /= L[i][i];
 | 
			
		||||
      real tmp = X[i];
 | 
			
		||||
      for (int j=i+1; j<N; ++j)
 | 
			
		||||
        X[j] -= tmp * L[i][j];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline real norm_diff(const stl_vector & A, const stl_vector & B)
 | 
			
		||||
  {
 | 
			
		||||
    int N=A.size();
 | 
			
		||||
    real somme=0.0;
 | 
			
		||||
    real somme2=0.0;
 | 
			
		||||
 | 
			
		||||
    for (int i=0;i<N;i++){
 | 
			
		||||
      real diff=A[i]-B[i];
 | 
			
		||||
      somme+=diff*diff;
 | 
			
		||||
      somme2+=A[i]*A[i];
 | 
			
		||||
    }
 | 
			
		||||
    return somme/somme2;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline real norm_diff(const stl_matrix & A, const stl_matrix & B)
 | 
			
		||||
  {
 | 
			
		||||
    int N=A[0].size();
 | 
			
		||||
    real somme=0.0;
 | 
			
		||||
    real somme2=0.0;
 | 
			
		||||
 | 
			
		||||
    for (int i=0;i<N;i++){
 | 
			
		||||
      for (int j=0;j<N;j++){
 | 
			
		||||
        real diff=A[i][j] - B[i][j];
 | 
			
		||||
        somme += diff*diff;
 | 
			
		||||
        somme2 += A[i][j]*A[i][j];
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return somme/somme2;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void display_vector(const stl_vector & A)
 | 
			
		||||
  {
 | 
			
		||||
    int N=A.size();
 | 
			
		||||
    for (int i=0;i<N;i++){
 | 
			
		||||
      INFOS("A["<<i<<"]="<<A[i]<<endl);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										42
									
								
								cs440-acg/ext/eigen/bench/btl/libs/STL/main.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								cs440-acg/ext/eigen/bench/btl/libs/STL/main.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,42 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  main.cpp
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:23 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "STL_interface.hh"
 | 
			
		||||
#include "bench.hh"
 | 
			
		||||
#include "basic_actions.hh"
 | 
			
		||||
 | 
			
		||||
BTL_MAIN;
 | 
			
		||||
 | 
			
		||||
int main()
 | 
			
		||||
{
 | 
			
		||||
  bench<Action_axpy<STL_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
 | 
			
		||||
  bench<Action_axpby<STL_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
 | 
			
		||||
  bench<Action_matrix_vector_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
 | 
			
		||||
  bench<Action_atv_product<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
 | 
			
		||||
  bench<Action_symv<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
 | 
			
		||||
  bench<Action_syr2<STL_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
 | 
			
		||||
  bench<Action_matrix_matrix_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
 | 
			
		||||
  bench<Action_ata_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
 | 
			
		||||
  bench<Action_aat_product<STL_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										13
									
								
								cs440-acg/ext/eigen/bench/btl/libs/blaze/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								cs440-acg/ext/eigen/bench/btl/libs/blaze/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,13 @@
 | 
			
		||||
 | 
			
		||||
find_package(BLAZE)
 | 
			
		||||
find_package(Boost COMPONENTS system)
 | 
			
		||||
if (BLAZE_FOUND AND Boost_FOUND)
 | 
			
		||||
  include_directories(${BLAZE_INCLUDE_DIR} ${Boost_INCLUDE_DIRS})
 | 
			
		||||
  btl_add_bench(btl_blaze main.cpp)
 | 
			
		||||
  # Note: The newest blaze version requires C++14.
 | 
			
		||||
  # Ideally, we should set this depending on the version of Blaze we found
 | 
			
		||||
  set_property(TARGET btl_blaze PROPERTY CXX_STANDARD 14)
 | 
			
		||||
  if(BUILD_btl_blaze)
 | 
			
		||||
    target_link_libraries(btl_blaze ${Boost_LIBRARIES})
 | 
			
		||||
  endif()
 | 
			
		||||
endif ()
 | 
			
		||||
							
								
								
									
										140
									
								
								cs440-acg/ext/eigen/bench/btl/libs/blaze/blaze_interface.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										140
									
								
								cs440-acg/ext/eigen/bench/btl/libs/blaze/blaze_interface.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,140 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef BLAZE_INTERFACE_HH
 | 
			
		||||
#define BLAZE_INTERFACE_HH
 | 
			
		||||
 | 
			
		||||
#include <blaze/Math.h>
 | 
			
		||||
#include <blaze/Blaze.h>
 | 
			
		||||
// using namespace blaze;
 | 
			
		||||
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
template<class real>
 | 
			
		||||
class blaze_interface {
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  typedef real real_type ;
 | 
			
		||||
 | 
			
		||||
  typedef std::vector<real>        stl_vector;
 | 
			
		||||
  typedef std::vector<stl_vector > stl_matrix;
 | 
			
		||||
 | 
			
		||||
  typedef blaze::DynamicMatrix<real,blaze::columnMajor>  gene_matrix;
 | 
			
		||||
  typedef blaze::DynamicVector<real>  gene_vector;
 | 
			
		||||
 | 
			
		||||
  static inline std::string name() { return "blaze"; }
 | 
			
		||||
 | 
			
		||||
  static void free_matrix(gene_matrix & A, int N){
 | 
			
		||||
    return ;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static void free_vector(gene_vector & B){
 | 
			
		||||
    return ;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
 | 
			
		||||
    A.resize(A_stl[0].size(), A_stl.size());
 | 
			
		||||
 | 
			
		||||
    for (int j=0; j<A_stl.size() ; j++){
 | 
			
		||||
      for (int i=0; i<A_stl[j].size() ; i++){
 | 
			
		||||
        A(i,j) = A_stl[j][i];
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
 | 
			
		||||
    B.resize(B_stl.size());
 | 
			
		||||
    for (int i=0; i<B_stl.size() ; i++){
 | 
			
		||||
      B[i] = B_stl[i];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
 | 
			
		||||
    for (int i=0; i<B_stl.size() ; i++){
 | 
			
		||||
      B_stl[i] = B[i];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
 | 
			
		||||
    int N=A_stl.size();
 | 
			
		||||
    for (int j=0;j<N;j++){
 | 
			
		||||
      A_stl[j].resize(N);
 | 
			
		||||
      for (int i=0;i<N;i++){
 | 
			
		||||
        A_stl[j][i] = A(i,j);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
 | 
			
		||||
    X = (A*B);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
 | 
			
		||||
    X = (trans(A)*trans(B));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N){
 | 
			
		||||
    X = (trans(A)*A);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N){
 | 
			
		||||
    X = (A*trans(A));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
 | 
			
		||||
    X = (A*B);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
 | 
			
		||||
    X = (trans(A)*B);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N){
 | 
			
		||||
    Y += coef * X;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void axpby(real a, const gene_vector & X, real b, gene_vector & Y, int N){
 | 
			
		||||
    Y = a*X + b*Y;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
//   static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
 | 
			
		||||
//     C = X;
 | 
			
		||||
//     recursive_cholesky(C);
 | 
			
		||||
//   }
 | 
			
		||||
 | 
			
		||||
//   static inline void lu_decomp(const gene_matrix & X, gene_matrix & R, int N){
 | 
			
		||||
//     R = X;
 | 
			
		||||
//     std::vector<int> ipvt(N);
 | 
			
		||||
//     lu_factor(R, ipvt);
 | 
			
		||||
//   }
 | 
			
		||||
 | 
			
		||||
//   static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector & X, int N){
 | 
			
		||||
//     X = lower_trisolve(L, B);
 | 
			
		||||
//   }
 | 
			
		||||
 | 
			
		||||
  static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
 | 
			
		||||
    cible = source;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
 | 
			
		||||
    cible = source;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										40
									
								
								cs440-acg/ext/eigen/bench/btl/libs/blaze/main.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								cs440-acg/ext/eigen/bench/btl/libs/blaze/main.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,40 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "blaze_interface.hh"
 | 
			
		||||
#include "bench.hh"
 | 
			
		||||
#include "basic_actions.hh"
 | 
			
		||||
 | 
			
		||||
BTL_MAIN;
 | 
			
		||||
 | 
			
		||||
int main()
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
  bench<Action_axpy<blaze_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
 | 
			
		||||
  bench<Action_axpby<blaze_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
 | 
			
		||||
 | 
			
		||||
  bench<Action_matrix_vector_product<blaze_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
 | 
			
		||||
  bench<Action_atv_product<blaze_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
 | 
			
		||||
//   bench<Action_matrix_matrix_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
 | 
			
		||||
//   bench<Action_ata_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
 | 
			
		||||
//   bench<Action_aat_product<blaze_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										17
									
								
								cs440-acg/ext/eigen/bench/btl/libs/blitz/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								cs440-acg/ext/eigen/bench/btl/libs/blitz/CMakeLists.txt
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,17 @@
 | 
			
		||||
 | 
			
		||||
find_package(Blitz)
 | 
			
		||||
 | 
			
		||||
if (BLITZ_FOUND)
 | 
			
		||||
  include_directories(${BLITZ_INCLUDES})
 | 
			
		||||
 | 
			
		||||
  btl_add_bench(btl_blitz btl_blitz.cpp)
 | 
			
		||||
  if (BUILD_btl_blitz)
 | 
			
		||||
    target_link_libraries(btl_blitz ${BLITZ_LIBRARIES})
 | 
			
		||||
  endif (BUILD_btl_blitz)
 | 
			
		||||
 | 
			
		||||
  btl_add_bench(btl_tiny_blitz btl_tiny_blitz.cpp OFF)
 | 
			
		||||
  if (BUILD_btl_tiny_blitz)
 | 
			
		||||
    target_link_libraries(btl_tiny_blitz ${BLITZ_LIBRARIES})
 | 
			
		||||
  endif (BUILD_btl_tiny_blitz)
 | 
			
		||||
 | 
			
		||||
endif (BLITZ_FOUND)
 | 
			
		||||
@@ -0,0 +1,192 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  blitz_LU_solve_interface.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>        
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:31 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
// 
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
// 
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
// 
 | 
			
		||||
#ifndef BLITZ_LU_SOLVE_INTERFACE_HH
 | 
			
		||||
#define BLITZ_LU_SOLVE_INTERFACE_HH
 | 
			
		||||
 | 
			
		||||
#include "blitz/array.h"
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
BZ_USING_NAMESPACE(blitz)
 | 
			
		||||
 | 
			
		||||
template<class real>
 | 
			
		||||
class blitz_LU_solve_interface : public blitz_interface<real>
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  typedef typename blitz_interface<real>::gene_matrix gene_matrix;
 | 
			
		||||
  typedef typename blitz_interface<real>::gene_vector gene_vector;
 | 
			
		||||
 | 
			
		||||
  typedef blitz::Array<int,1> Pivot_Vector;
 | 
			
		||||
 | 
			
		||||
  inline static void new_Pivot_Vector(Pivot_Vector & pivot,int N)
 | 
			
		||||
  {
 | 
			
		||||
 | 
			
		||||
    pivot.resize(N);
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline static void free_Pivot_Vector(Pivot_Vector & pivot)
 | 
			
		||||
  {
 | 
			
		||||
    
 | 
			
		||||
    return;
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  static inline real matrix_vector_product_sliced(const gene_matrix & A, gene_vector B, int row, int col_start, int col_end)
 | 
			
		||||
  {
 | 
			
		||||
    
 | 
			
		||||
    real somme=0.;
 | 
			
		||||
    
 | 
			
		||||
    for (int j=col_start ; j<col_end+1 ; j++){
 | 
			
		||||
	
 | 
			
		||||
	somme+=A(row,j)*B(j);
 | 
			
		||||
	
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return somme;
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  static inline real matrix_matrix_product_sliced(gene_matrix & A, int row, int col_start, int col_end, gene_matrix & B, int row_shift, int col )
 | 
			
		||||
  {
 | 
			
		||||
    
 | 
			
		||||
    real somme=0.;
 | 
			
		||||
    
 | 
			
		||||
    for (int j=col_start ; j<col_end+1 ; j++){
 | 
			
		||||
	
 | 
			
		||||
	somme+=A(row,j)*B(j+row_shift,col);
 | 
			
		||||
	
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return somme;
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline static void LU_factor(gene_matrix & LU, Pivot_Vector & pivot, int N)
 | 
			
		||||
  {
 | 
			
		||||
 | 
			
		||||
    ASSERT( LU.rows()==LU.cols() ) ;
 | 
			
		||||
    int index_max = 0 ;
 | 
			
		||||
    real big = 0. ;
 | 
			
		||||
    real theSum = 0. ;
 | 
			
		||||
    real dum = 0. ;
 | 
			
		||||
    // Get the implicit scaling information :
 | 
			
		||||
    gene_vector ImplicitScaling( N ) ;
 | 
			
		||||
    for( int i=0; i<N; i++ ) {
 | 
			
		||||
      big = 0. ;
 | 
			
		||||
      for( int j=0; j<N; j++ ) {
 | 
			
		||||
	if( abs( LU( i, j ) )>=big ) big = abs( LU( i, j ) ) ;
 | 
			
		||||
      }
 | 
			
		||||
      if( big==0. ) {
 | 
			
		||||
	INFOS( "blitz_LU_factor::Singular matrix" ) ;
 | 
			
		||||
	exit( 0 ) ;
 | 
			
		||||
      }
 | 
			
		||||
      ImplicitScaling( i ) = 1./big ;
 | 
			
		||||
    }
 | 
			
		||||
    // Loop over columns of Crout's method :
 | 
			
		||||
    for( int j=0; j<N; j++ ) {
 | 
			
		||||
      for( int i=0; i<j; i++ ) {
 | 
			
		||||
	theSum = LU( i, j ) ;
 | 
			
		||||
	theSum -= matrix_matrix_product_sliced(LU, i, 0, i-1, LU, 0, j) ;
 | 
			
		||||
	//	theSum -= sum( LU( i, Range( fromStart, i-1 ) )*LU( Range( fromStart, i-1 ), j ) ) ;
 | 
			
		||||
	LU( i, j ) = theSum ;
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      // Search for the largest pivot element :
 | 
			
		||||
      big = 0. ;
 | 
			
		||||
      for( int i=j; i<N; i++ ) {
 | 
			
		||||
	theSum = LU( i, j ) ;
 | 
			
		||||
	theSum -= matrix_matrix_product_sliced(LU, i, 0, j-1, LU, 0, j) ;
 | 
			
		||||
	//	theSum -= sum( LU( i, Range( fromStart, j-1 ) )*LU( Range( fromStart, j-1 ), j ) ) ;
 | 
			
		||||
	LU( i, j ) = theSum ;
 | 
			
		||||
	if( (ImplicitScaling( i )*abs( theSum ))>=big ) {
 | 
			
		||||
	  dum = ImplicitScaling( i )*abs( theSum ) ;
 | 
			
		||||
	  big = dum ;
 | 
			
		||||
	  index_max = i ;
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
      // Interchanging rows and the scale factor :
 | 
			
		||||
      if( j!=index_max ) {
 | 
			
		||||
	for( int k=0; k<N; k++ ) {
 | 
			
		||||
	  dum = LU( index_max, k ) ;
 | 
			
		||||
	  LU( index_max, k ) = LU( j, k ) ;
 | 
			
		||||
	  LU( j, k ) = dum ;
 | 
			
		||||
	}
 | 
			
		||||
	ImplicitScaling( index_max ) = ImplicitScaling( j ) ;
 | 
			
		||||
      }
 | 
			
		||||
      pivot( j ) = index_max ;
 | 
			
		||||
      if ( LU( j, j )==0. ) LU( j, j ) = 1.e-20 ;
 | 
			
		||||
      // Divide by the pivot element :
 | 
			
		||||
      if( j<N ) {
 | 
			
		||||
	dum = 1./LU( j, j ) ;
 | 
			
		||||
	for( int i=j+1; i<N; i++ ) LU( i, j ) *= dum ;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline static void LU_solve(const gene_matrix & LU, const Pivot_Vector pivot, gene_vector &B, gene_vector X, int N)
 | 
			
		||||
  {
 | 
			
		||||
 | 
			
		||||
    // Pour conserver le meme header, on travaille sur X, copie du second-membre B
 | 
			
		||||
    X = B.copy() ;
 | 
			
		||||
    ASSERT( LU.rows()==LU.cols() ) ;
 | 
			
		||||
    firstIndex indI ;
 | 
			
		||||
    // Forward substitution :
 | 
			
		||||
    int ii = 0 ;
 | 
			
		||||
    real theSum = 0. ;
 | 
			
		||||
    for( int i=0; i<N; i++ ) {
 | 
			
		||||
      int ip = pivot( i ) ;
 | 
			
		||||
      theSum = X( ip ) ;
 | 
			
		||||
      //      theSum = B( ip ) ;
 | 
			
		||||
      X( ip ) = X( i ) ;
 | 
			
		||||
      //      B( ip ) = B( i ) ;
 | 
			
		||||
      if( ii ) {
 | 
			
		||||
	theSum -= matrix_vector_product_sliced(LU, X, i, ii-1, i-1) ;
 | 
			
		||||
	//	theSum -= sum( LU( i, Range( ii-1, i-1 ) )*X( Range( ii-1, i-1 ) ) ) ;
 | 
			
		||||
	//	theSum -= sum( LU( i, Range( ii-1, i-1 ) )*B( Range( ii-1, i-1 ) ) ) ;
 | 
			
		||||
      } else if( theSum ) {
 | 
			
		||||
	ii = i+1 ;
 | 
			
		||||
      }
 | 
			
		||||
      X( i ) = theSum ;
 | 
			
		||||
      //      B( i ) = theSum ;
 | 
			
		||||
    }
 | 
			
		||||
    // Backsubstitution :
 | 
			
		||||
    for( int i=N-1; i>=0; i-- ) {
 | 
			
		||||
      theSum = X( i ) ;
 | 
			
		||||
      //      theSum = B( i ) ;
 | 
			
		||||
      theSum -= matrix_vector_product_sliced(LU, X, i, i+1, N) ;
 | 
			
		||||
      //      theSum -= sum( LU( i, Range( i+1, toEnd ) )*X( Range( i+1, toEnd ) ) ) ;
 | 
			
		||||
      //      theSum -= sum( LU( i, Range( i+1, toEnd ) )*B( Range( i+1, toEnd ) ) ) ;
 | 
			
		||||
      // Store a component of the solution vector :
 | 
			
		||||
      X( i ) = theSum/LU( i, i ) ;
 | 
			
		||||
      //      B( i ) = theSum/LU( i, i ) ;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										147
									
								
								cs440-acg/ext/eigen/bench/btl/libs/blitz/blitz_interface.hh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										147
									
								
								cs440-acg/ext/eigen/bench/btl/libs/blitz/blitz_interface.hh
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,147 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  blitz_interface.hh
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:30 CEST 2002
 | 
			
		||||
// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#ifndef BLITZ_INTERFACE_HH
 | 
			
		||||
#define BLITZ_INTERFACE_HH
 | 
			
		||||
 | 
			
		||||
#include <blitz/blitz.h>
 | 
			
		||||
#include <blitz/array.h>
 | 
			
		||||
#include <blitz/vector-et.h>
 | 
			
		||||
#include <blitz/vecwhere.h>
 | 
			
		||||
#include <blitz/matrix.h>
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
BZ_USING_NAMESPACE(blitz)
 | 
			
		||||
 | 
			
		||||
template<class real>
 | 
			
		||||
class blitz_interface{
 | 
			
		||||
 | 
			
		||||
public :
 | 
			
		||||
 | 
			
		||||
  typedef real real_type ;
 | 
			
		||||
 | 
			
		||||
  typedef std::vector<real>  stl_vector;
 | 
			
		||||
  typedef std::vector<stl_vector > stl_matrix;
 | 
			
		||||
 | 
			
		||||
  typedef blitz::Array<real, 2>  gene_matrix;
 | 
			
		||||
  typedef blitz::Array<real, 1>  gene_vector;
 | 
			
		||||
//   typedef blitz::Matrix<real, blitz::ColumnMajor>  gene_matrix;
 | 
			
		||||
//   typedef blitz::Vector<real> gene_vector;
 | 
			
		||||
 | 
			
		||||
  static inline std::string name() { return "blitz"; }
 | 
			
		||||
 | 
			
		||||
  static void free_matrix(gene_matrix & A, int N){}
 | 
			
		||||
 | 
			
		||||
  static void free_vector(gene_vector & B){}
 | 
			
		||||
 | 
			
		||||
  static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
 | 
			
		||||
    A.resize(A_stl[0].size(),A_stl.size());
 | 
			
		||||
    for (int j=0; j<A_stl.size() ; j++){
 | 
			
		||||
      for (int i=0; i<A_stl[j].size() ; i++){
 | 
			
		||||
        A(i,j)=A_stl[j][i];
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
 | 
			
		||||
    B.resize(B_stl.size());
 | 
			
		||||
    for (int i=0; i<B_stl.size() ; i++){
 | 
			
		||||
      B(i)=B_stl[i];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void vector_to_stl(gene_vector & B, stl_vector & B_stl){
 | 
			
		||||
    for (int i=0; i<B_stl.size() ; i++){
 | 
			
		||||
      B_stl[i]=B(i);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void matrix_to_stl(gene_matrix & A, stl_matrix & A_stl){
 | 
			
		||||
    int N=A_stl.size();
 | 
			
		||||
    for (int j=0;j<N;j++){
 | 
			
		||||
      A_stl[j].resize(N);
 | 
			
		||||
      for (int i=0;i<N;i++)
 | 
			
		||||
        A_stl[j][i] = A(i,j);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N)
 | 
			
		||||
  {
 | 
			
		||||
    firstIndex i;
 | 
			
		||||
    secondIndex j;
 | 
			
		||||
    thirdIndex k;
 | 
			
		||||
    X = sum(A(i,k) * B(k,j), k);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N)
 | 
			
		||||
  {
 | 
			
		||||
    firstIndex i;
 | 
			
		||||
    secondIndex j;
 | 
			
		||||
    thirdIndex k;
 | 
			
		||||
    X = sum(A(k,i) * A(k,j), k);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N)
 | 
			
		||||
  {
 | 
			
		||||
    firstIndex i;
 | 
			
		||||
    secondIndex j;
 | 
			
		||||
    thirdIndex k;
 | 
			
		||||
    X = sum(A(i,k) * A(j,k), k);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
 | 
			
		||||
  {
 | 
			
		||||
    firstIndex i;
 | 
			
		||||
    secondIndex j;
 | 
			
		||||
    X = sum(A(i,j)*B(j),j);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
 | 
			
		||||
  {
 | 
			
		||||
    firstIndex i;
 | 
			
		||||
    secondIndex j;
 | 
			
		||||
    X = sum(A(j,i) * B(j),j);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void axpy(const real coef, const gene_vector & X, gene_vector & Y, int N)
 | 
			
		||||
  {
 | 
			
		||||
    firstIndex i;
 | 
			
		||||
    Y = Y(i) + coef * X(i);
 | 
			
		||||
    //Y += coef * X;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void copy_matrix(const gene_matrix & source, gene_matrix & cible, int N){
 | 
			
		||||
    cible = source;
 | 
			
		||||
    //cible.template operator=<gene_matrix>(source);
 | 
			
		||||
//     for (int i=0;i<N;i++){
 | 
			
		||||
//       for (int j=0;j<N;j++){
 | 
			
		||||
//         cible(i,j)=source(i,j);
 | 
			
		||||
//       }
 | 
			
		||||
//     }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  static inline void copy_vector(const gene_vector & source, gene_vector & cible, int N){
 | 
			
		||||
    //cible.template operator=<gene_vector>(source);
 | 
			
		||||
    cible = source;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
							
								
								
									
										51
									
								
								cs440-acg/ext/eigen/bench/btl/libs/blitz/btl_blitz.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										51
									
								
								cs440-acg/ext/eigen/bench/btl/libs/blitz/btl_blitz.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,51 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  main.cpp
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:30 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "blitz_interface.hh"
 | 
			
		||||
#include "blitz_LU_solve_interface.hh"
 | 
			
		||||
#include "bench.hh"
 | 
			
		||||
#include "action_matrix_vector_product.hh"
 | 
			
		||||
#include "action_matrix_matrix_product.hh"
 | 
			
		||||
#include "action_axpy.hh"
 | 
			
		||||
#include "action_lu_solve.hh"
 | 
			
		||||
#include "action_ata_product.hh"
 | 
			
		||||
#include "action_aat_product.hh"
 | 
			
		||||
#include "action_atv_product.hh"
 | 
			
		||||
 | 
			
		||||
BTL_MAIN;
 | 
			
		||||
 | 
			
		||||
int main()
 | 
			
		||||
{
 | 
			
		||||
 | 
			
		||||
  bench<Action_matrix_vector_product<blitz_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
 | 
			
		||||
  bench<Action_atv_product<blitz_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
 | 
			
		||||
 | 
			
		||||
  bench<Action_matrix_matrix_product<blitz_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
 | 
			
		||||
  bench<Action_ata_product<blitz_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
 | 
			
		||||
  bench<Action_aat_product<blitz_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
 | 
			
		||||
 | 
			
		||||
  bench<Action_axpy<blitz_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
 | 
			
		||||
 | 
			
		||||
  //bench<Action_lu_solve<blitz_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										38
									
								
								cs440-acg/ext/eigen/bench/btl/libs/blitz/btl_tiny_blitz.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								cs440-acg/ext/eigen/bench/btl/libs/blitz/btl_tiny_blitz.cpp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,38 @@
 | 
			
		||||
//=====================================================
 | 
			
		||||
// File   :  main.cpp
 | 
			
		||||
// Author :  L. Plagne <laurent.plagne@edf.fr)>
 | 
			
		||||
// Copyright (C) EDF R&D,  lun sep 30 14:23:30 CEST 2002
 | 
			
		||||
//=====================================================
 | 
			
		||||
//
 | 
			
		||||
// This program is free software; you can redistribute it and/or
 | 
			
		||||
// modify it under the terms of the GNU General Public License
 | 
			
		||||
// as published by the Free Software Foundation; either version 2
 | 
			
		||||
// of the License, or (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program; if not, write to the Free Software
 | 
			
		||||
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 | 
			
		||||
//
 | 
			
		||||
#include "utilities.h"
 | 
			
		||||
#include "tiny_blitz_interface.hh"
 | 
			
		||||
#include "static/bench_static.hh"
 | 
			
		||||
#include "action_matrix_vector_product.hh"
 | 
			
		||||
#include "action_matrix_matrix_product.hh"
 | 
			
		||||
#include "action_axpy.hh"
 | 
			
		||||
 | 
			
		||||
BTL_MAIN;
 | 
			
		||||
 | 
			
		||||
int main()
 | 
			
		||||
{
 | 
			
		||||
  bench_static<Action_axpy,tiny_blitz_interface>();
 | 
			
		||||
  bench_static<Action_matrix_matrix_product,tiny_blitz_interface>();
 | 
			
		||||
  bench_static<Action_matrix_vector_product,tiny_blitz_interface>();
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user