155 lines
		
	
	
		
			6.3 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			155 lines
		
	
	
		
			6.3 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
Bench Template Library
 | 
						|
 | 
						|
****************************************
 | 
						|
Introduction :
 | 
						|
 | 
						|
The aim of this project is to compare the performance
 | 
						|
of available numerical libraries. The code is designed
 | 
						|
as generic and modular as possible. Thus, adding new
 | 
						|
numerical libraries or new numerical tests should
 | 
						|
require minimal effort.
 | 
						|
 | 
						|
 | 
						|
*****************************************
 | 
						|
 | 
						|
Installation :
 | 
						|
 | 
						|
BTL uses cmake / ctest:
 | 
						|
 | 
						|
1 - create a build directory:
 | 
						|
 | 
						|
  $ mkdir build
 | 
						|
  $ cd build
 | 
						|
 | 
						|
2 - configure:
 | 
						|
 | 
						|
  $ ccmake ..
 | 
						|
 | 
						|
3 - run the bench using ctest:
 | 
						|
 | 
						|
  $ ctest -V
 | 
						|
 | 
						|
You can run the benchmarks only on libraries matching a given regular expression:
 | 
						|
  ctest -V -R <regexp>
 | 
						|
For instance:
 | 
						|
  ctest -V -R eigen2
 | 
						|
 | 
						|
You can also select a given set of actions defining the environment variable BTL_CONFIG this way:
 | 
						|
  BTL_CONFIG="-a action1{:action2}*" ctest -V
 | 
						|
An exemple:
 | 
						|
  BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata" ctest -V -R eigen2
 | 
						|
 | 
						|
Finally, if bench results already exist (the bench*.dat files) then they merges by keeping the best for each matrix size. If you want to overwrite the previous ones you can simply add the "--overwrite" option:
 | 
						|
  BTL_CONFIG="-a axpy:vector_matrix:trisolve:ata --overwrite" ctest -V -R eigen2
 | 
						|
 | 
						|
4 : Analyze the result. different data files (.dat) are produced in each libs directories.
 | 
						|
 If gnuplot is available, choose a directory name in the data directory to store the results and type:
 | 
						|
        $ cd data
 | 
						|
        $ mkdir my_directory
 | 
						|
        $ cp ../libs/*/*.dat my_directory
 | 
						|
 Build the data utilities in this (data) directory
 | 
						|
        make
 | 
						|
 Then you can look the raw data,
 | 
						|
        go_mean my_directory
 | 
						|
 or smooth the data first :
 | 
						|
	smooth_all.sh my_directory
 | 
						|
	go_mean my_directory_smooth
 | 
						|
 | 
						|
 | 
						|
*************************************************
 | 
						|
 | 
						|
Files and directories :
 | 
						|
 | 
						|
 generic_bench : all the bench sources common to all libraries
 | 
						|
 | 
						|
 actions : sources for different action wrappers (axpy, matrix-matrix product) to be tested.
 | 
						|
 | 
						|
 libs/* : bench sources specific to each tested libraries.
 | 
						|
 | 
						|
 machine_dep : directory used to store machine specific Makefile.in
 | 
						|
 | 
						|
 data : directory used to store gnuplot scripts and data analysis utilities
 | 
						|
 | 
						|
**************************************************
 | 
						|
 | 
						|
Principles : the code modularity is achieved by defining two concepts :
 | 
						|
 | 
						|
 ****** Action concept : This is a class defining which kind
 | 
						|
  of test must be performed (e.g. a matrix_vector_product).
 | 
						|
	An Action should define the following methods :
 | 
						|
 | 
						|
        *** Ctor using the size of the problem (matrix or vector size) as an argument
 | 
						|
	    Action action(size);
 | 
						|
        *** initialize : this method initialize the calculation (e.g. initialize the matrices and vectors arguments)
 | 
						|
	    action.initialize();
 | 
						|
	*** calculate : this method actually launch the calculation to be benchmarked
 | 
						|
	    action.calculate;
 | 
						|
	*** nb_op_base() : this method returns the complexity of the calculate method (allowing the mflops evaluation)
 | 
						|
        *** name() : this method returns the name of the action (std::string)
 | 
						|
 | 
						|
 ****** Interface concept : This is a class or namespace defining how to use a given library and
 | 
						|
  its specific containers (matrix and vector). Up to now an interface should following types
 | 
						|
 | 
						|
	*** real_type : kind of float to be used (float or double)
 | 
						|
	*** stl_vector : must correspond to std::vector<real_type>
 | 
						|
	*** stl_matrix : must correspond to std::vector<stl_vector>
 | 
						|
	*** gene_vector : the vector type for this interface        --> e.g. (real_type *) for the C_interface
 | 
						|
	*** gene_matrix : the matrix type for this interface        --> e.g. (gene_vector *) for the C_interface
 | 
						|
 | 
						|
	+ the following common methods
 | 
						|
 | 
						|
        *** free_matrix(gene_matrix & A, int N)  dealocation of a N sized gene_matrix A
 | 
						|
        *** free_vector(gene_vector & B)  dealocation of a N sized gene_vector B
 | 
						|
        *** matrix_from_stl(gene_matrix & A, stl_matrix & A_stl) copy the content of an stl_matrix A_stl into a gene_matrix A.
 | 
						|
	     The allocation of A is done in this function.
 | 
						|
	*** vector_to_stl(gene_vector & B, stl_vector & B_stl)  copy the content of an stl_vector B_stl into a gene_vector B.
 | 
						|
	     The allocation of B is done in this function.
 | 
						|
        *** matrix_to_stl(gene_matrix & A, stl_matrix & A_stl) copy the content of an gene_matrix A into an stl_matrix A_stl.
 | 
						|
             The size of A_STL must corresponds to the size of A.
 | 
						|
        *** vector_to_stl(gene_vector & A, stl_vector & A_stl) copy the content of an gene_vector A into an stl_vector A_stl.
 | 
						|
             The size of B_STL must corresponds to the size of B.
 | 
						|
	*** copy_matrix(gene_matrix & source, gene_matrix & cible, int N) : copy the content of source in cible. Both source
 | 
						|
		and cible must be sized NxN.
 | 
						|
	*** copy_vector(gene_vector & source, gene_vector & cible, int N) : copy the content of source in cible. Both source
 | 
						|
 		and cible must be sized N.
 | 
						|
 | 
						|
	and the following method corresponding to the action one wants to be benchmarked :
 | 
						|
 | 
						|
	***  matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
 | 
						|
	***  matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N)
 | 
						|
        ***  ata_product(const gene_matrix & A, gene_matrix & X, int N)
 | 
						|
	***  aat_product(const gene_matrix & A, gene_matrix & X, int N)
 | 
						|
        ***  axpy(real coef, const gene_vector & X, gene_vector & Y, int N)
 | 
						|
 | 
						|
 The bench algorithm (generic_bench/bench.hh) is templated with an action itself templated with
 | 
						|
 an interface. A typical main.cpp source stored in a given library directory libs/A_LIB
 | 
						|
 looks like :
 | 
						|
 | 
						|
 bench< AN_ACTION < AN_INTERFACE > >( 10 , 1000 , 50 ) ;
 | 
						|
 | 
						|
 this function will produce XY data file containing measured  mflops as a function of the size for 50
 | 
						|
 sizes between 10 and 10000.
 | 
						|
 | 
						|
 This algorithm can be adapted by providing a given Perf_Analyzer object which determines how the time
 | 
						|
 measurements must be done. For example, the X86_Perf_Analyzer use the asm rdtsc function and provides
 | 
						|
 a very fast and accurate (but less portable) timing method. The default is the Portable_Perf_Analyzer
 | 
						|
 so
 | 
						|
 | 
						|
 bench< AN_ACTION < AN_INTERFACE > >( 10 , 1000 , 50 ) ;
 | 
						|
 | 
						|
 is equivalent to
 | 
						|
 | 
						|
 bench< Portable_Perf_Analyzer,AN_ACTION < AN_INTERFACE > >( 10 , 1000 , 50 ) ;
 | 
						|
 | 
						|
 If your system supports it we suggest to use a mixed implementation (X86_perf_Analyzer+Portable_Perf_Analyzer).
 | 
						|
 replace
 | 
						|
     bench<Portable_Perf_Analyzer,Action>(size_min,size_max,nb_point);
 | 
						|
 with
 | 
						|
     bench<Mixed_Perf_Analyzer,Action>(size_min,size_max,nb_point);
 | 
						|
 in generic/bench.hh
 | 
						|
 | 
						|
.
 | 
						|
 | 
						|
 | 
						|
 |