gblas.h

Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027 
00043 #ifndef GBLAS
00044 #define GBLAS
00045 #include <ctime>
00046 #include "Failure.h"
00047 
00048 // We need to include config.h to get USE_LINALG_TEMPLATES flag
00049 #include "config.h"
00050 
00051 #include "template_lapack_common.h"
00052 
00053 /* LEVEL 3 */
00054 extern "C" void dgemm_(const char *ta,const char *tb,
00055                        const int *n, const int *k, const int *l,
00056                        const double *alpha,const double *A,const int *lda,
00057                        const double *B, const int *ldb,
00058                        const double *beta, double *C, const int *ldc);
00059 extern "C" void dpptrf_(const char *uplo,const int *n, double* ap, int *info);
00060 extern "C" void dspgst_(const int *itype, const char *uplo,const int *n,
00061                         double* ap,const double *bp,int *info);
00062 extern "C" void dtptri_(const char *uplo,const char *diag,const int *n,
00063                         double* ap,int *info);
00064 /* unit triangular means that a value of 1.0 is assumed  */
00065 /* for the diagonal elements (hence diagonal not stored in packed format) */
00066 extern "C" void dtrmm_(const char *side,const char *uplo,const char *transa,
00067                        const char *diag,const int *m,const int *n,
00068                        const double *alpha,const double *A,const int *lda,
00069                        double *B,const int *ldb);
00070 extern "C" void dsygv_(const int *itype,const char *jobz,
00071                        const char *uplo,const int *n,
00072                        double *A,const int *lda,double *B,const int *ldb,
00073                        double* w,double* work,const int *lwork,int *info);
00074 extern "C" void dggev_(const char *jobbl, const char *jobvr, const int *n,
00075                        double *A, const int *lda, double *B, const int *ldb,
00076                        double *alphar, double *alphai, double *beta,
00077                        double *vl, const int *ldvl,
00078                        double *vr, const int *ldvr,
00079                        double *work, const int *lwork, int *info);
00080 extern "C" void dpotrf_(const char *uplo, const int *n, double *A, 
00081                         const int *lda, int *info);
00082 extern "C" void dtrtri_(const char *uplo,const char *diag,const int *n,
00083                         double *A, const int *lda, int *info);
00084 extern "C" void dsyrk_(const char *uplo, const char *trans, const int *n, 
00085                        const int *k, const double *alpha, const double *A, 
00086                        const int *lda, const double *beta, 
00087                        double *C, const int *ldc);
00088 extern "C" void dsymm_(const char *side,const char *uplo,
00089                        const int *m,const int *n,
00090                        const double *alpha,const double *A,const int *lda,
00091                        const double *B,const int *ldb, const double* beta,
00092                        double *C,const int *ldc);
00093 extern "C" void dpocon_(const char *uplo, const int *n, const double *A,
00094                         const int *lda, const double *anorm, double *rcond,
00095                         double *work, int *iwork, int *info);
00096 extern "C" void dstevx_(const char *jobz, const char *range, const int *n, 
00097                         double *d, double *e, const double *vl, 
00098                         const double *vu, const int *il, const int *iu, 
00099                         const double *abstol, int *m, double *w, double *z, 
00100                         const int *ldz, double *work, int *iwork, int *ifail, 
00101                         int *info);
00102 extern "C" void dstevr_(const char *jobz, const char *range, const int *n, 
00103                         double *d, double *e, const double *vl, 
00104                         const double *vu, const int *il, const int *iu, 
00105                         const double *abstol, int *m, double *w, double *z, 
00106                         const int *ldz, int* isuppz, double *work, int* lwork, 
00107                         int *iwork, int* liwork, int *info);
00108 extern "C" void dsyev_(const char *jobz, const char *uplo, const int *n, 
00109                        double *a, const int *lda, double *w, double *work, 
00110                        const int *lwork, int *info);
00111 
00112 /* LEVEL 2 */
00113 extern "C" void dgemv_(const char *ta, const int *m, const int *n, 
00114                        const double *alpha, const double *A, const int *lda, 
00115                        const double *x, const int *incx, const double *beta, 
00116                        double *y, const int *incy);
00117 extern "C" void dsymv_(const char *uplo, const int *n, 
00118                        const double *alpha, const double *A, const int *lda, 
00119                        const double *x, const int *incx, const double *beta, 
00120                        double *y, const int *incy);
00121 extern "C" void dtrmv_(const char *uplo, const char *trans, const char *diag,
00122                        const int *n, const double *A, const int *lda, 
00123                        double *x, const int *incx);
00124 /* LEVEL 1 */
00125 extern "C" void dscal_(const int* n,const double* da, double* dx,
00126                        const int* incx);
00127 extern "C" double ddot_(const int* n, const double* dx, const int* incx, 
00128                         const double* dy, const int* incy);
00129 extern "C" void daxpy_(const int* n, const double* da, const double* dx,
00130                        const int* incx, double* dy,const int* incy);
00131 
00132 /* Single precision */
00133 /* LEVEL 3 */
00134 extern "C" void sgemm_(const char *ta,const char *tb,
00135                        const int *n, const int *k, const int *l,
00136                        const float *alpha,const float *A,const int *lda,
00137                        const float *B, const int *ldb,
00138                        const float *beta, float *C, const int *ldc);
00139 extern "C" void spptrf_(const char *uplo,const int *n, float* ap, int *info);
00140 extern "C" void sspgst_(const int *itype, const char *uplo,const int *n,
00141                         float* ap,const float *bp,int *info);
00142 extern "C" void stptri_(const char *uplo,const char *diag,const int *n,
00143                         float* ap,int *info);
00144 /* unit triangular means that a value of 1.0 is assumed  */
00145 /* for the diagonal elements (hence diagonal not stored in packed format) */
00146 extern "C" void strmm_(const char *side,const char *uplo,const char *transa,
00147                        const char *diag,const int *m,const int *n,
00148                        const float *alpha,const float *A,const int *lda,
00149                        float *B,const int *ldb);
00150 extern "C" void ssygv_(const int *itype,const char *jobz,
00151                        const char *uplo,const int *n,
00152                        float *A,const int *lda,float *B,const int *ldb,
00153                        float* w,float* work,const int *lwork,int *info);
00154 extern "C" void sggev_(const char *jobbl, const char *jobvr, const int *n,
00155                        float *A, const int *lda, float *B, const int *ldb,
00156                        float *alphar, float *alphai, float *beta,
00157                        float *vl, const int *ldvl,
00158                        float *vr, const int *ldvr,
00159                        float *work, const int *lwork, int *info);
00160 extern "C" void spotrf_(const char *uplo, const int *n, float *A, 
00161                         const int *lda, int *info);
00162 extern "C" void strtri_(const char *uplo,const char *diag,const int *n,
00163                         float *A, const int *lda, int *info);
00164 extern "C" void ssyrk_(const char *uplo, const char *trans, const int *n, 
00165                        const int *k, const float *alpha, const float *A, 
00166                        const int *lda, const float *beta, 
00167                        float *C, const int *ldc);
00168 extern "C" void ssymm_(const char *side,const char *uplo,
00169                        const int *m,const int *n,
00170                        const float *alpha,const float *A,const int *lda,
00171                        const float *B,const int *ldb, const float* beta,
00172                        float *C,const int *ldc);
00173 extern "C" void spocon_(const char *uplo, const int *n, const float *A,
00174                         const int *lda, const float *anorm, float *rcond,
00175                         float *work, int *iwork, int *info);
00176 extern "C" void sstevx_(const char *jobz, const char *range, const int *n, 
00177                         float *d, float *e, const float *vl, 
00178                         const float *vu, const int *il, const int *iu, 
00179                         const float *abstol, int *m, float *w, float *z, 
00180                         const int *ldz, float *work, int *iwork, int *ifail, 
00181                         int *info);
00182 extern "C" void sstevr_(const char *jobz, const char *range, const int *n, 
00183                         float *d, float *e, const float *vl, 
00184                         const float *vu, const int *il, const int *iu, 
00185                         const float *abstol, int *m, float *w, float *z, 
00186                         const int *ldz, int* isuppz, float *work, int* lwork, 
00187                         int *iwork, int* liwork, int *info);
00188 extern "C" void ssyev_(const char *jobz, const char *uplo, const int *n, 
00189                        float *a, const int *lda, float *w, float *work, 
00190                        const int *lwork, int *info);
00191 
00192 /* LEVEL 2 */
00193 extern "C" void sgemv_(const char *ta, const int *m, const int *n, 
00194                        const float *alpha, const float *A, const int *lda, 
00195                        const float *x, const int *incx, const float *beta, 
00196                        float *y, const int *incy);
00197 extern "C" void ssymv_(const char *uplo, const int *n, 
00198                        const float *alpha, const float *A, const int *lda, 
00199                        const float *x, const int *incx, const float *beta, 
00200                        float *y, const int *incy);
00201 extern "C" void strmv_(const char *uplo, const char *trans, const char *diag,
00202                        const int *n, const float *A, const int *lda, 
00203                        float *x, const int *incx);
00204 /* LEVEL 1 */
00205 extern "C" void sscal_(const int* n,const float* da, float* dx,
00206                        const int* incx);
00207 #if 0
00208 // sdot_ is unreliable because of varying return type in different 
00209 // implementations. We therefore always use template dot for single precision 
00210 extern "C" double sdot_(const int* n, const float* dx, const int* incx, 
00211                        const float* dy, const int* incy);
00212 #endif
00213 extern "C" void saxpy_(const int* n, const float* da, const float* dx,
00214                        const int* incx, float* dy,const int* incy);
00215 
00216 namespace mat
00217 {
00218   struct Gblas {
00219     static float time;
00220     static bool timekeeping;
00221   };
00222   
00223   /*************** Default version throws exception */
00224   template<class T>
00225     inline static void gemm(const char *ta,const char *tb,
00226                             const int *n, const int *k, const int *l,
00227                             const T *alpha,const T *A,const int *lda,
00228                             const T *B, const int *ldb,
00229                             const T *beta,T *C, const int *ldc) {
00230     template_blas_gemm(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
00231   }
00232 
00233   
00234   /*  Computes the Cholesky factorization of a symmetric   *
00235    *  positive definite matrix in packed storage.          */
00236   template<class T>
00237     inline static void pptrf(const char *uplo,const int *n, T* ap, int *info) {
00238     template_lapack_pptrf(uplo,n,ap,info);
00239   }
00240   
00241   template<class T>
00242     inline static void spgst(const int *itype, const char *uplo,const int *n,
00243                              T* ap,const T *bp,int *info) {
00244     template_lapack_spgst(itype,uplo,n,ap,bp,info);
00245   }
00246 
00247   /* Computes the inverse of a triangular matrix in packed storage. */
00248   template<class T>
00249     inline static void tptri(const char *uplo,const char *diag,const int *n,
00250                              T* ap,int *info) {
00251     template_lapack_tptri(uplo,diag,n,ap,info);
00252   }
00253   
00254   template<class T>
00255     inline static void trmm(const char *side,const char *uplo,
00256                             const char *transa, const char *diag,
00257                             const int *m,const int *n,
00258                             const T *alpha,const T *A,const int *lda,
00259                             T *B,const int *ldb) {
00260     template_blas_trmm(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
00261   }
00262   
00263   /* Computes all eigenvalues and the eigenvectors of a generalized  *
00264    * symmetric-definite generalized eigenproblem,                    *
00265    * Ax= lambda Bx,  ABx= lambda x,  or BAx= lambda x.               */
00266   template<class T>
00267     inline static void sygv(const int *itype,const char *jobz,
00268                             const char *uplo,const int *n,
00269                             T *A,const int *lda,T *B,const int *ldb,
00270                             T* w,T* work,const int *lwork,int *info) {
00271     template_lapack_sygv(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
00272   }
00273 
00274   template<class T>
00275     inline static void ggev(const char *jobbl, const char *jobvr, 
00276                             const int *n, T *A, const int *lda, 
00277                             T *B, const int *ldb, T *alphar, 
00278                             T *alphai, T *beta, T *vl, 
00279                             const int *ldvl, T *vr, const int *ldvr,
00280                             T *work, const int *lwork, int *info) {
00281     template_lapack_ggev(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl, 
00282                          ldvl, vr, ldvr, work, lwork, info);
00283   }
00284 
00285   /* Computes the Cholesky factorization of a symmetric *
00286    * positive definite matrix in packed storage.        */
00287   template<class T>
00288     inline static void potrf(const char *uplo, const int *n, T *A, 
00289                              const int *lda, int *info) {
00290     template_lapack_potrf(uplo, n, A, lda, info);
00291   }
00292 
00293   /* Computes the inverse of a triangular matrix.                   */
00294   template<class T>
00295     inline static void trtri(const char *uplo,const char *diag,const int *n,
00296                              T *A, const int *lda, int *info) {
00297     // Create copies of strings because they cannot be const inside trtri.
00298     char uploCopy[2];
00299     char diagCopy[2];
00300     uploCopy[0] = uplo[0];
00301     uploCopy[1] = '\0';
00302     diagCopy[0] = diag[0];
00303     diagCopy[1] = '\0';
00304     template_lapack_trtri(uploCopy, diagCopy, n, A, lda, info);
00305   }
00306 
00307   template<class T>
00308     inline static void syrk(const char *uplo, const char *trans, const int *n, 
00309                             const int *k, const T *alpha, const T *A, 
00310                             const int *lda, const T *beta, 
00311                             T *C, const int *ldc) {
00312     template_blas_syrk(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
00313   }
00314   
00315   template<class T>
00316     inline static void symm(const char *side,const char *uplo,
00317                             const int *m,const int *n,
00318                             const T *alpha,const T *A,const int *lda,
00319                             const T *B,const int *ldb, const T* beta,
00320                             T *C,const int *ldc) {
00321     template_blas_symm(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
00322   }
00323   
00324   template<class T>
00325     inline static void pocon(const char *uplo, const int *n, const T *A,
00326                              const int *lda, const T *anorm, T *rcond,
00327                              T *work, int *iwork, int *info) {
00328     template_lapack_pocon(uplo, n, A, lda, anorm, rcond, work, iwork, info);
00329   }
00330 
00331   template<class T>
00332     inline static void stevx(const char *jobz, const char *range, 
00333                              const int *n, T *d, T *e, const T *vl, 
00334                              const T *vu, const int *il, const int *iu, 
00335                              const T *abstol, int *m, T *w, T *z, 
00336                              const int *ldz, T *work, int *iwork, int *ifail, 
00337                              int *info) {
00338     template_lapack_stevx(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz, 
00339                           work, iwork, ifail, info);
00340   }
00341   
00342   template<class T>
00343     inline static void stevr(const char *jobz, const char *range, const int *n, 
00344                              T *d, T *e, const T *vl, 
00345                              const T *vu, const int *il, const int *iu, 
00346                              const T *abstol, int *m, T *w, T *z, 
00347                              const int *ldz, int* isuppz, T *work, int* lwork, 
00348                              int *iwork, int* liwork, int *info) {
00349     template_lapack_stevr(jobz, range, n, d, e, vl, vu, il, iu, abstol, 
00350                           m, w, z, ldz, isuppz, 
00351                           work, lwork, iwork, liwork, info);
00352   }
00353 
00354 
00355   template<class T>
00356     inline static void syev(const char *jobz, const char *uplo, const int *n, 
00357                             T *a, const int *lda, T *w, T *work, 
00358                             const int *lwork, int *info) {
00359     template_lapack_syev(jobz, uplo, n, a, lda, w, work, lwork, info);
00360   }
00361 
00362 
00363   /* LEVEL 2 */
00364   template<class T>
00365     inline static void gemv(const char *ta, const int *m, const int *n, 
00366                             const T *alpha, const T *A, 
00367                             const int *lda, 
00368                             const T *x, const int *incx, 
00369                             const T *beta, T *y, const int *incy) {
00370     template_blas_gemv(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
00371   }
00372 
00373   template<class T>
00374     inline static void symv(const char *uplo, const int *n, 
00375                             const T *alpha, const T *A, 
00376                             const int *lda, const T *x, 
00377                             const int *incx, const T *beta, 
00378                             T *y, const int *incy) {
00379     template_blas_symv(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
00380   }
00381 
00382   template<class T>
00383     inline static void trmv(const char *uplo, const char *trans, 
00384                             const char *diag, const int *n, 
00385                             const T *A, const int *lda, 
00386                             T *x, const int *incx) {
00387     template_blas_trmv(uplo, trans, diag, n, A, lda, x, incx);    
00388   }
00389 
00390 
00391   /* LEVEL 1 */
00392   template<class T>
00393     inline static void scal(const int* n,const T* da, T* dx,
00394                             const int* incx) {
00395     template_blas_scal(n, da, dx, incx);
00396   }
00397 
00398   template<class T>
00399     inline static T dot(const int* n, const T* dx, const int* incx, 
00400                         const T* dy, const int* incy) {
00401     return template_blas_dot(n, dx, incx, dy, incy);
00402   }
00403 
00404   template<class T>
00405     inline static void axpy(const int* n, const T* da, const T* dx,
00406                             const int* incx, T* dy,const int* incy) {
00407     template_blas_axpy(n, da, dx, incx, dy, incy);
00408   }
00409 
00410 
00411 
00412   
00413   /* Below follows specializations for double, single, etc. 
00414      These specializations are not needed if template_blas and template_lapack are used,
00415      so in that case we skip this entire section. */
00416 #ifndef USE_LINALG_TEMPLATES
00417 
00418 
00419   /*************** Double specialization */
00420   template<>
00421     inline void gemm<double>(const char *ta,const char *tb,
00422                              const int *n, const int *k, const int *l,
00423                              const double *alpha,
00424                              const double *A,const int *lda,
00425                              const double *B, const int *ldb,
00426                              const double *beta,
00427                              double *C, const int *ldc) {
00428     if (Gblas::timekeeping) {
00429       clock_t start = clock();
00430       dgemm_(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
00431       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00432     }
00433     else {
00434       dgemm_(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
00435     }
00436   }
00437   
00438   template<>
00439     inline  void pptrf<double>(const char *uplo,const int *n, 
00440                                double* ap, int *info) {
00441     if (Gblas::timekeeping) {
00442       clock_t start = clock();
00443       dpptrf_(uplo,n,ap,info);
00444       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00445     }
00446     else {
00447       dpptrf_(uplo,n,ap,info);
00448     }
00449   }  
00450 
00451   template<>
00452     inline  void spgst<double>(const int *itype, const char *uplo,
00453                                const int *n,
00454                                double* ap,const double *bp,int *info) {
00455     if (Gblas::timekeeping) {
00456       clock_t start = clock();
00457       dspgst_(itype,uplo,n,ap,bp,info);
00458       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00459     }
00460     else {
00461       dspgst_(itype,uplo,n,ap,bp,info); 
00462     }
00463   } 
00464   
00465   template<>
00466     inline  void tptri<double>(const char *uplo,const char *diag,const int *n,
00467                                double* ap,int *info) {
00468     if (Gblas::timekeeping) {
00469       clock_t start = clock();
00470       dtptri_(uplo,diag,n,ap,info);
00471       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00472     }
00473     else {
00474       dtptri_(uplo,diag,n,ap,info);
00475     }
00476   }
00477   
00478   template<>
00479     inline  void trmm<double>(const char *side,const char *uplo,
00480                               const char *transa,
00481                               const char *diag,const int *m,const int *n,
00482                               const double *alpha,
00483                               const double *A,const int *lda,
00484                               double *B,const int *ldb) {
00485     if (Gblas::timekeeping) {
00486       clock_t start = clock();
00487       dtrmm_(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
00488       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00489     }
00490     else {
00491       dtrmm_(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
00492     }
00493   }
00494 
00495   template<>
00496     inline  void sygv<double>(const int *itype,const char *jobz,
00497                               const char *uplo,const int *n,
00498                               double *A,const int *lda,
00499                               double *B,const int *ldb,
00500                               double* w,double* work,
00501                               const int *lwork,int *info) {
00502     if (Gblas::timekeeping) {
00503       clock_t start = clock();
00504       dsygv_(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
00505       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00506     }
00507     else {
00508       dsygv_(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);  
00509     }
00510   }
00511 
00512   template<>
00513     inline void ggev<double>(const char *jobbl, const char *jobvr, 
00514                              const int *n, double *A, const int *lda, 
00515                              double *B, const int *ldb, double *alphar, 
00516                              double *alphai, double *beta, double *vl, 
00517                              const int *ldvl, double *vr, const int *ldvr,
00518                              double *work, const int *lwork, int *info) {
00519     if (Gblas::timekeeping) {
00520       clock_t start = clock();
00521       dggev_(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl, 
00522              ldvl, vr, ldvr, work, lwork, info);
00523       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00524     }
00525     else {
00526       dggev_(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl, 
00527              ldvl, vr, ldvr, work, lwork, info);
00528     }
00529   }
00530 
00531 
00532   template<>
00533     inline void potrf<double>(const char *uplo, const int *n, double *A, 
00534                               const int *lda, int *info) {
00535     if (Gblas::timekeeping) {
00536       clock_t start = clock();
00537       dpotrf_(uplo, n, A, lda, info);
00538       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00539     }
00540     else {
00541       dpotrf_(uplo, n, A, lda, info);      
00542     }
00543   }
00544 
00545   template<>
00546     inline void trtri<double>(const char *uplo,const char *diag,const int *n,
00547                               double *A, const int *lda, int *info) {
00548     if (Gblas::timekeeping) {
00549       clock_t start = clock();
00550       dtrtri_(uplo, diag, n, A, lda, info);
00551       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00552     }
00553     else {
00554       dtrtri_(uplo, diag, n, A, lda, info);     
00555     }
00556   }
00557 
00558   template<>
00559     inline void syrk<double>(const char *uplo, const char *trans, 
00560                              const int *n, const int *k, const double *alpha, 
00561                              const double *A, const int *lda, 
00562                              const double *beta, double *C, const int *ldc) {
00563     if (Gblas::timekeeping) {
00564       clock_t start = clock();
00565       dsyrk_(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
00566       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00567     }
00568     else {
00569       dsyrk_(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
00570     }
00571   }
00572 
00573   template<>
00574     inline void symm<double>(const char *side,const char *uplo,
00575                              const int *m,const int *n, const double *alpha,
00576                              const double *A,const int *lda,
00577                              const double *B,const int *ldb, 
00578                              const double* beta,
00579                              double *C,const int *ldc) {
00580     if (Gblas::timekeeping) {
00581       clock_t start = clock();
00582       dsymm_(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
00583       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00584     }
00585     else {
00586       dsymm_(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
00587     } 
00588   }
00589 
00590   template<>
00591     inline void pocon<double>(const char *uplo, const int *n, 
00592                               const double *A, const int *lda, 
00593                               const double *anorm, double *rcond, 
00594                               double *work, int *iwork, int *info) {
00595     if (Gblas::timekeeping) {
00596       clock_t start = clock();
00597       dpocon_(uplo, n, A, lda, anorm, rcond, work, iwork, info);
00598       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00599     }
00600     else {
00601       dpocon_(uplo, n, A, lda, anorm, rcond, work, iwork, info);
00602     }     
00603   }
00604 
00605   template<>
00606     inline void stevx<double>(const char *jobz, const char *range, 
00607                               const int *n, double *d, double *e, 
00608                               const double *vl, 
00609                               const double *vu, const int *il, const int *iu, 
00610                               const double *abstol, int *m, double *w, 
00611                               double *z, 
00612                               const int *ldz, double *work, int *iwork, 
00613                               int *ifail, int *info) {
00614     if (Gblas::timekeeping) {
00615       clock_t start = clock();
00616       dstevx_(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz, 
00617               work, iwork, ifail, info);
00618       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00619     }
00620     else {
00621       dstevx_(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz, 
00622               work, iwork, ifail, info);
00623     }     
00624   }
00625 
00626   template<>
00627     inline void stevr<double>(const char *jobz, const char *range, 
00628                               const int *n, double *d, double *e, 
00629                               const double *vl, const double *vu, 
00630                               const int *il, const int *iu, 
00631                               const double *abstol, 
00632                               int *m, double *w, 
00633                               double *z, const int *ldz, int* isuppz, 
00634                               double *work, int* lwork, 
00635                               int *iwork, int* liwork, int *info) {
00636     if (Gblas::timekeeping) {
00637       clock_t start = clock();
00638       dstevr_(jobz, range, n, d, e, vl, vu, il, iu, abstol, 
00639               m, w, z, ldz, isuppz, work, lwork, iwork, liwork, info);
00640       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00641     }
00642     else {
00643       dstevr_(jobz, range, n, d, e, vl, vu, il, iu, abstol, 
00644               m, w, z, ldz, isuppz, work, lwork, iwork, liwork, info);
00645     }     
00646   }
00647 
00648 
00649   
00650   template<>
00651     inline void syev<double>(const char *jobz, const char *uplo, const int *n, 
00652                              double *a, const int *lda, double *w, 
00653                              double *work, const int *lwork, int *info) {
00654     if (Gblas::timekeeping) {
00655       clock_t start = clock();
00656       dsyev_(jobz, uplo, n, a, lda, w, work, lwork, info);
00657       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00658     }
00659     else {
00660       dsyev_(jobz, uplo, n, a, lda, w, work, lwork, info);
00661     }     
00662   }
00663 
00664 
00665   /* LEVEL 2 */
00666   template<>
00667     inline void gemv<double>(const char *ta, const int *m, const int *n, 
00668                              const double *alpha, const double *A, 
00669                              const int *lda, 
00670                              const double *x, const int *incx, 
00671                              const double *beta, double *y, const int *incy) {
00672     if (Gblas::timekeeping) {
00673       clock_t start = clock();
00674       dgemv_(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
00675       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00676     }
00677     else {
00678       dgemv_(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
00679     }     
00680   }
00681   
00682   template<>
00683     inline void symv<double>(const char *uplo, const int *n, 
00684                              const double *alpha, const double *A, 
00685                              const int *lda, const double *x, 
00686                              const int *incx, const double *beta, 
00687                              double *y, const int *incy) {
00688     if (Gblas::timekeeping) {
00689       clock_t start = clock();
00690       dsymv_(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
00691       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00692     }
00693     else {
00694       dsymv_(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
00695     }     
00696   }
00697 
00698   template<>
00699     inline void trmv<double>(const char *uplo, const char *trans, 
00700                              const char *diag, const int *n, 
00701                              const double *A, const int *lda, 
00702                              double *x, const int *incx) {
00703     if (Gblas::timekeeping) {
00704       clock_t start = clock();
00705       dtrmv_(uplo, trans, diag, n, A, lda, x, incx);
00706       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00707     }
00708     else {
00709       dtrmv_(uplo, trans, diag, n, A, lda, x, incx);
00710     }     
00711   }
00712 
00713 
00714   /* LEVEL 1 */
00715   template<>
00716     inline void scal<double>(const int* n,const double* da, double* dx,
00717                              const int* incx) {
00718     if (Gblas::timekeeping) {
00719       clock_t start = clock();
00720       dscal_(n, da, dx, incx);
00721       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00722     }
00723     else {
00724       dscal_(n, da, dx, incx);
00725     }     
00726   }
00727   
00728   template<>
00729     inline double dot<double>(const int* n, const double* dx, const int* incx, 
00730                               const double* dy, const int* incy) {
00731     double tmp = 0;
00732     if (Gblas::timekeeping) {
00733       clock_t start = clock();
00734       tmp = ddot_(n, dx, incx, dy, incy);
00735       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00736     }
00737     else {
00738       tmp = ddot_(n, dx, incx, dy, incy);
00739     }     
00740     return tmp;
00741   }
00742   
00743   template<>
00744     inline void axpy<double>(const int* n, const double* da, const double* dx,
00745                              const int* incx, double* dy,const int* incy) {
00746     if (Gblas::timekeeping) {
00747       clock_t start = clock();
00748       daxpy_(n, da, dx, incx, dy, incy);
00749       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00750     }
00751     else {
00752       daxpy_(n, da, dx, incx, dy, incy);
00753     }     
00754   }
00755 
00756   
00757   /*************** Single specialization */
00758   template<>
00759     inline void gemm<float>(const char *ta,const char *tb,
00760                             const int *n, const int *k, const int *l,
00761                             const float *alpha,
00762                             const float *A,const int *lda,
00763                             const float *B, const int *ldb,
00764                             const float *beta,
00765                             float *C, const int *ldc) {
00766     if (Gblas::timekeeping) {
00767       clock_t start = clock();
00768       sgemm_(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
00769       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00770     }
00771     else {
00772       sgemm_(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
00773     }
00774   }
00775   
00776   template<>
00777     inline void pptrf<float>(const char *uplo,const int *n, 
00778                              float* ap, int *info) {
00779     if (Gblas::timekeeping) {
00780       clock_t start = clock();
00781       spptrf_(uplo,n,ap,info);
00782       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00783     }
00784     else {
00785       spptrf_(uplo,n,ap,info);
00786     }
00787   }
00788   
00789   template<>
00790     inline  void spgst<float>(const int *itype, const char *uplo,
00791                               const int *n,
00792                               float* ap,const float *bp,int *info) {
00793     if (Gblas::timekeeping) {
00794       clock_t start = clock();
00795       sspgst_(itype,uplo,n,ap,bp,info);
00796       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00797     }
00798     else {
00799       sspgst_(itype,uplo,n,ap,bp,info);
00800     }
00801   } 
00802 
00803   template<>
00804     inline  void tptri<float>(const char *uplo,const char *diag,
00805                               const int *n,
00806                               float* ap,int *info) {
00807     if (Gblas::timekeeping) {
00808       clock_t start = clock();
00809       stptri_(uplo,diag,n,ap,info);
00810       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00811     }
00812     else {
00813       stptri_(uplo,diag,n,ap,info);
00814     }
00815   }  
00816 
00817   template<>
00818     inline  void trmm<float>(const char *side,const char *uplo,
00819                              const char *transa,
00820                              const char *diag,const int *m,const int *n,
00821                              const float *alpha,
00822                              const float *A,const int *lda,
00823                              float *B,const int *ldb) {
00824     if (Gblas::timekeeping) {
00825       clock_t start = clock();
00826       strmm_(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
00827       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00828     }
00829     else {
00830       strmm_(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
00831     }
00832   }
00833   
00834   template<>
00835     inline  void sygv<float>(const int *itype,const char *jobz,
00836                              const char *uplo,const int *n,
00837                              float *A,const int *lda,
00838                              float *B,const int *ldb,
00839                              float* w,float* work,
00840                              const int *lwork,int *info) {
00841     if (Gblas::timekeeping) {
00842       clock_t start = clock();
00843       ssygv_(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
00844       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00845     }
00846     else {
00847       ssygv_(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
00848     }
00849   }
00850 
00851   template<>
00852     inline void ggev<float>(const char *jobbl, const char *jobvr, 
00853                             const int *n, float *A, const int *lda, 
00854                             float *B, const int *ldb, float *alphar, 
00855                             float *alphai, float *beta, float *vl, 
00856                             const int *ldvl, float *vr, const int *ldvr,
00857                             float *work, const int *lwork, int *info) {
00858     if (Gblas::timekeeping) {
00859       clock_t start = clock();
00860       sggev_(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl, 
00861              ldvl, vr, ldvr, work, lwork, info);
00862       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00863     }
00864     else {
00865       sggev_(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl, 
00866              ldvl, vr, ldvr, work, lwork, info);
00867     }
00868   }
00869 
00870 
00871   template<>
00872     inline void potrf<float>(const char *uplo, const int *n, float *A, 
00873                              const int *lda, int *info) {
00874     if (Gblas::timekeeping) {
00875       clock_t start = clock();
00876       spotrf_(uplo, n, A, lda, info);
00877       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00878     }
00879     else {
00880       spotrf_(uplo, n, A, lda, info);      
00881     }
00882   }
00883 
00884   template<>
00885     inline void trtri<float>(const char *uplo,const char *diag,const int *n,
00886                              float *A, const int *lda, int *info) {
00887     if (Gblas::timekeeping) {
00888       clock_t start = clock();
00889       strtri_(uplo, diag, n, A, lda, info);
00890       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00891     }
00892     else {
00893       strtri_(uplo, diag, n, A, lda, info);     
00894     }
00895   }
00896 
00897   template<>
00898     inline void syrk<float>(const char *uplo, const char *trans, 
00899                             const int *n, const int *k, const float *alpha, 
00900                             const float *A, const int *lda, 
00901                             const float *beta, float *C, const int *ldc) {
00902     if (Gblas::timekeeping) {
00903       clock_t start = clock();
00904       ssyrk_(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
00905       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00906     }
00907     else {
00908       ssyrk_(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
00909     }
00910   }
00911 
00912   template<>
00913     inline void symm<float>(const char *side,const char *uplo,
00914                             const int *m,const int *n, const float *alpha,
00915                             const float *A,const int *lda,
00916                             const float *B,const int *ldb, 
00917                             const float* beta,
00918                             float *C,const int *ldc) {
00919     if (Gblas::timekeeping) {
00920       clock_t start = clock();
00921       ssymm_(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
00922       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00923     }
00924     else {
00925       ssymm_(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
00926     } 
00927   }
00928 
00929   template<>
00930     inline void pocon<float>(const char *uplo, const int *n, 
00931                              const float *A, const int *lda, 
00932                              const float *anorm, float *rcond, 
00933                              float *work, int *iwork, int *info) {
00934     if (Gblas::timekeeping) {
00935       clock_t start = clock();
00936       spocon_(uplo, n, A, lda, anorm, rcond, work, iwork, info);
00937       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00938     }
00939     else {
00940       spocon_(uplo, n, A, lda, anorm, rcond, work, iwork, info);
00941     }     
00942   }
00943 
00944   template<>
00945     inline void stevx<float>(const char *jobz, const char *range, 
00946                              const int *n, float *d, float *e, 
00947                               const float *vl, 
00948                              const float *vu, const int *il, const int *iu, 
00949                              const float *abstol, int *m, float *w, 
00950                              float *z, 
00951                              const int *ldz, float *work, int *iwork, 
00952                              int *ifail, int *info) {
00953     if (Gblas::timekeeping) {
00954       clock_t start = clock();
00955       sstevx_(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz, 
00956               work, iwork, ifail, info);
00957       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00958     }
00959     else {
00960       sstevx_(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz, 
00961               work, iwork, ifail, info);
00962     }     
00963   }
00964 
00965   template<>
00966     inline void stevr<float>(const char *jobz, const char *range, 
00967                              const int *n, float *d, float *e, 
00968                              const float *vl, const float *vu, 
00969                              const int *il, const int *iu, 
00970                              const float *abstol, 
00971                              int *m, float *w, 
00972                              float *z, const int *ldz, int* isuppz, 
00973                              float *work, int* lwork, 
00974                              int *iwork, int* liwork, int *info) {
00975     if (Gblas::timekeeping) {
00976       clock_t start = clock();
00977       sstevr_(jobz, range, n, d, e, vl, vu, il, iu, abstol, 
00978               m, w, z, ldz, isuppz, work, lwork, iwork, liwork, info);
00979       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00980     }
00981     else {
00982       sstevr_(jobz, range, n, d, e, vl, vu, il, iu, abstol, 
00983               m, w, z, ldz, isuppz, work, lwork, iwork, liwork, info);
00984     }     
00985   }
00986 
00987   template<>
00988     inline void syev<float>(const char *jobz, const char *uplo, const int *n, 
00989                             float *a, const int *lda, float *w, 
00990                             float *work, const int *lwork, int *info) {
00991     if (Gblas::timekeeping) {
00992       clock_t start = clock();
00993       ssyev_(jobz, uplo, n, a, lda, w, work, lwork, info);
00994       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
00995     }
00996     else {
00997       ssyev_(jobz, uplo, n, a, lda, w, work, lwork, info);
00998     }     
00999   }
01000 
01001 
01002   /* LEVEL 2 */
01003   template<>
01004     inline void gemv<float>(const char *ta, const int *m, const int *n, 
01005                             const float *alpha, const float *A, 
01006                             const int *lda, 
01007                             const float *x, const int *incx, 
01008                             const float *beta, float *y, const int *incy) {
01009     if (Gblas::timekeeping) {
01010       clock_t start = clock();
01011       sgemv_(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
01012       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
01013     }
01014     else {
01015       sgemv_(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
01016     }     
01017   }
01018 
01019   template<>
01020     inline void symv<float>(const char *uplo, const int *n, 
01021                             const float *alpha, const float *A, 
01022                             const int *lda, const float *x, 
01023                             const int *incx, const float *beta, 
01024                             float *y, const int *incy) {
01025     if (Gblas::timekeeping) {
01026       clock_t start = clock();
01027       ssymv_(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
01028       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
01029     }
01030     else {
01031       ssymv_(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
01032     }     
01033   }
01034 
01035   template<>
01036     inline void trmv<float>(const char *uplo, const char *trans, 
01037                              const char *diag, const int *n, 
01038                              const float *A, const int *lda, 
01039                              float *x, const int *incx) {
01040     if (Gblas::timekeeping) {
01041       clock_t start = clock();
01042       strmv_(uplo, trans, diag, n, A, lda, x, incx);
01043       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
01044     }
01045     else {
01046       strmv_(uplo, trans, diag, n, A, lda, x, incx);
01047     }     
01048   }
01049 
01050   /* LEVEL 1 */
01051   template<>
01052     inline void scal<float>(const int* n,const float* da, float* dx,
01053                             const int* incx) {
01054     if (Gblas::timekeeping) {
01055       clock_t start = clock();
01056       sscal_(n, da, dx, incx);
01057       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
01058     }
01059     else {
01060       sscal_(n, da, dx, incx);
01061     }     
01062   }
01063 #if 0
01064   // Sdot has different return type in different BLAS implementations 
01065   // Therefore the specialization for single precision is removed
01066   template<>
01067     inline float dot<float>(const int* n, const float* dx, const int* incx, 
01068                             const float* dy, const int* incy) {
01069     float tmp;
01070     if (Gblas::timekeeping) {
01071       clock_t start = clock();
01072       sdot_(n, dx, incx, dy, incy);
01073       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
01074     }
01075     else {
01076       sdot_(n, dx, incx, dy, incy);
01077     }     
01078     return tmp;
01079   }
01080 #endif
01081 
01082   template<>
01083     inline void axpy<float>(const int* n, const float* da, const float* dx,
01084                              const int* incx, float* dy,const int* incy) {
01085     if (Gblas::timekeeping) {
01086       clock_t start = clock();
01087       saxpy_(n, da, dx, incx, dy, incy);
01088       Gblas::time +=    ((float)(clock() - start)) / (CLOCKS_PER_SEC);
01089     }
01090     else {
01091       saxpy_(n, da, dx, incx, dy, incy);
01092     }     
01093   }
01094 
01095   /* END OF SPECIALIZATIONS */
01096 #endif
01097 
01098 
01099 
01100 
01101 
01102 
01103 
01104 
01105 
01106   /* Other */
01107   
01108   template<class Treal>
01109     static void fulltopacked(const Treal* full, Treal* packed, const int size){
01110     int pind=0;
01111     for (int col=0;col<size;col++)
01112       {
01113         for(int row=0;row<=col;row++)
01114           {
01115             packed[pind]=full[col*size+row];
01116             pind++;
01117           }
01118       }
01119   }
01120 
01121   template<class Treal>
01122     static void packedtofull(const Treal* packed, Treal* full, const int size){
01123     int psize=(size+1)*size/2;
01124     int col=0;
01125     int row=0;
01126     for(int pind=0;pind<psize;pind++)
01127       {
01128         if (col==row)
01129           {
01130             full[col*size+row]=packed[pind];
01131             col++;
01132             row=0;
01133           }
01134         else
01135           {
01136             full[col*size+row]=packed[pind];
01137             full[row*size+col]=packed[pind];
01138             row++;
01139           }
01140       }
01141   }
01142 
01143   template<class Treal>
01144     static void tripackedtofull(const Treal* packed,Treal* full,
01145                                 const int size) {
01146     int psize=(size+1)*size/2;
01147     int col=0;
01148     int row=0;
01149     for(int pind=0;pind<psize;pind++)
01150       {
01151         if (col==row)
01152           {
01153             full[col*size+row]=packed[pind];
01154             col++;
01155             row=0;
01156           }
01157         else
01158           {
01159             full[col*size+row]=packed[pind];
01160             full[row*size+col]=0;
01161             row++;
01162           }
01163       }
01164   }
01165 
01166   template<class Treal>
01167     static void trifulltofull(Treal* trifull, const int size) {
01168     for(int col = 0; col < size - 1; col++) 
01169       for(int row = col + 1; row < size; row++) 
01170         trifull[col * size +  row] = 0;
01171   }
01172   
01173 
01174 } /* namespace mat */
01175 
01176 #endif /* GBLAS */

Generated on 21 Nov 2012 for ergo by  doxygen 1.6.1