template_blas_gemm.h

Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_BLAS_GEMM_HEADER
00036 #define TEMPLATE_BLAS_GEMM_HEADER
00037 
00038 #include "template_blas_common.h"
00039 
00040 template<class Treal>
00041 int template_blas_gemm(const char *transa, const char *transb, const integer *m, const integer *
00042         n, const integer *k, const Treal *alpha, const Treal *a, const integer *lda, 
00043         const Treal *b, const integer *ldb, const Treal *beta, Treal *c__, 
00044         const integer *ldc)
00045 {
00046     /* System generated locals */
00047     integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
00048             i__3;
00049     /* Local variables */
00050      integer info;
00051      logical nota, notb;
00052      Treal temp;
00053      integer i__, j, l;
00054      integer nrowa, nrowb;
00055 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
00056 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
00057 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
00058 /*  Purpose   
00059     =======   
00060     DGEMM  performs one of the matrix-matrix operations   
00061        C := alpha*op( A )*op( B ) + beta*C,   
00062     where  op( X ) is one of   
00063        op( X ) = X   or   op( X ) = X',   
00064     alpha and beta are scalars, and A, B and C are matrices, with op( A )   
00065     an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.   
00066     Parameters   
00067     ==========   
00068     TRANSA - CHARACTER*1.   
00069              On entry, TRANSA specifies the form of op( A ) to be used in   
00070              the matrix multiplication as follows:   
00071                 TRANSA = 'N' or 'n',  op( A ) = A.   
00072                 TRANSA = 'T' or 't',  op( A ) = A'.   
00073                 TRANSA = 'C' or 'c',  op( A ) = A'.   
00074              Unchanged on exit.   
00075     TRANSB - CHARACTER*1.   
00076              On entry, TRANSB specifies the form of op( B ) to be used in   
00077              the matrix multiplication as follows:   
00078                 TRANSB = 'N' or 'n',  op( B ) = B.   
00079                 TRANSB = 'T' or 't',  op( B ) = B'.   
00080                 TRANSB = 'C' or 'c',  op( B ) = B'.   
00081              Unchanged on exit.   
00082     M      - INTEGER.   
00083              On entry,  M  specifies  the number  of rows  of the  matrix   
00084              op( A )  and of the  matrix  C.  M  must  be at least  zero.   
00085              Unchanged on exit.   
00086     N      - INTEGER.   
00087              On entry,  N  specifies the number  of columns of the matrix   
00088              op( B ) and the number of columns of the matrix C. N must be   
00089              at least zero.   
00090              Unchanged on exit.   
00091     K      - INTEGER.   
00092              On entry,  K  specifies  the number of columns of the matrix   
00093              op( A ) and the number of rows of the matrix op( B ). K must   
00094              be at least  zero.   
00095              Unchanged on exit.   
00096     ALPHA  - DOUBLE PRECISION.   
00097              On entry, ALPHA specifies the scalar alpha.   
00098              Unchanged on exit.   
00099     A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is   
00100              k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.   
00101              Before entry with  TRANSA = 'N' or 'n',  the leading  m by k   
00102              part of the array  A  must contain the matrix  A,  otherwise   
00103              the leading  k by m  part of the array  A  must contain  the   
00104              matrix A.   
00105              Unchanged on exit.   
00106     LDA    - INTEGER.   
00107              On entry, LDA specifies the first dimension of A as declared   
00108              in the calling (sub) program. When  TRANSA = 'N' or 'n' then   
00109              LDA must be at least  max( 1, m ), otherwise  LDA must be at   
00110              least  max( 1, k ).   
00111              Unchanged on exit.   
00112     B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is   
00113              n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.   
00114              Before entry with  TRANSB = 'N' or 'n',  the leading  k by n   
00115              part of the array  B  must contain the matrix  B,  otherwise   
00116              the leading  n by k  part of the array  B  must contain  the   
00117              matrix B.   
00118              Unchanged on exit.   
00119     LDB    - INTEGER.   
00120              On entry, LDB specifies the first dimension of B as declared   
00121              in the calling (sub) program. When  TRANSB = 'N' or 'n' then   
00122              LDB must be at least  max( 1, k ), otherwise  LDB must be at   
00123              least  max( 1, n ).   
00124              Unchanged on exit.   
00125     BETA   - DOUBLE PRECISION.   
00126              On entry,  BETA  specifies the scalar  beta.  When  BETA  is   
00127              supplied as zero then C need not be set on input.   
00128              Unchanged on exit.   
00129     C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).   
00130              Before entry, the leading  m by n  part of the array  C must   
00131              contain the matrix  C,  except when  beta  is zero, in which   
00132              case C need not be set on entry.   
00133              On exit, the array  C  is overwritten by the  m by n  matrix   
00134              ( alpha*op( A )*op( B ) + beta*C ).   
00135     LDC    - INTEGER.   
00136              On entry, LDC specifies the first dimension of C as declared   
00137              in  the  calling  (sub)  program.   LDC  must  be  at  least   
00138              max( 1, m ).   
00139              Unchanged on exit.   
00140     Level 3 Blas routine.   
00141     -- Written on 8-February-1989.   
00142        Jack Dongarra, Argonne National Laboratory.   
00143        Iain Duff, AERE Harwell.   
00144        Jeremy Du Croz, Numerical Algorithms Group Ltd.   
00145        Sven Hammarling, Numerical Algorithms Group Ltd.   
00146        Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not   
00147        transposed and set  NROWA, NCOLA and  NROWB  as the number of rows   
00148        and  columns of  A  and the  number of  rows  of  B  respectively.   
00149        Parameter adjustments */
00150     a_dim1 = *lda;
00151     a_offset = 1 + a_dim1 * 1;
00152     a -= a_offset;
00153     b_dim1 = *ldb;
00154     b_offset = 1 + b_dim1 * 1;
00155     b -= b_offset;
00156     c_dim1 = *ldc;
00157     c_offset = 1 + c_dim1 * 1;
00158     c__ -= c_offset;
00159     /* Function Body */
00160     nota = template_blas_lsame(transa, "N");
00161     notb = template_blas_lsame(transb, "N");
00162     if (nota) {
00163         nrowa = *m;
00164     } else {
00165         nrowa = *k;
00166     }
00167     if (notb) {
00168         nrowb = *k;
00169     } else {
00170         nrowb = *n;
00171     }
00172 /*     Test the input parameters. */
00173     info = 0;
00174     if (! nota && ! template_blas_lsame(transa, "C") && ! template_blas_lsame(
00175             transa, "T")) {
00176         info = 1;
00177     } else if (! notb && ! template_blas_lsame(transb, "C") && ! 
00178             template_blas_lsame(transb, "T")) {
00179         info = 2;
00180     } else if (*m < 0) {
00181         info = 3;
00182     } else if (*n < 0) {
00183         info = 4;
00184     } else if (*k < 0) {
00185         info = 5;
00186     } else if (*lda < maxMACRO(1,nrowa)) {
00187         info = 8;
00188     } else if (*ldb < maxMACRO(1,nrowb)) {
00189         info = 10;
00190     } else if (*ldc < maxMACRO(1,*m)) {
00191         info = 13;
00192     }
00193     if (info != 0) {
00194         template_blas_erbla("DGEMM ", &info);
00195         return 0;
00196     }
00197 /*     Quick return if possible. */
00198     if (*m == 0 || *n == 0 || ( (*alpha == 0. || *k == 0) && *beta == 1.) ) {
00199         return 0;
00200     }
00201 /*     And if  alpha.eq.zero. */
00202     if (*alpha == 0.) {
00203         if (*beta == 0.) {
00204             i__1 = *n;
00205             for (j = 1; j <= i__1; ++j) {
00206                 i__2 = *m;
00207                 for (i__ = 1; i__ <= i__2; ++i__) {
00208                     c___ref(i__, j) = 0.;
00209 /* L10: */
00210                 }
00211 /* L20: */
00212             }
00213         } else {
00214             i__1 = *n;
00215             for (j = 1; j <= i__1; ++j) {
00216                 i__2 = *m;
00217                 for (i__ = 1; i__ <= i__2; ++i__) {
00218                     c___ref(i__, j) = *beta * c___ref(i__, j);
00219 /* L30: */
00220                 }
00221 /* L40: */
00222             }
00223         }
00224         return 0;
00225     }
00226 /*     Start the operations. */
00227     if (notb) {
00228         if (nota) {
00229 /*           Form  C := alpha*A*B + beta*C. */
00230             i__1 = *n;
00231             for (j = 1; j <= i__1; ++j) {
00232                 if (*beta == 0.) {
00233                     i__2 = *m;
00234                     for (i__ = 1; i__ <= i__2; ++i__) {
00235                         c___ref(i__, j) = 0.;
00236 /* L50: */
00237                     }
00238                 } else if (*beta != 1.) {
00239                     i__2 = *m;
00240                     for (i__ = 1; i__ <= i__2; ++i__) {
00241                         c___ref(i__, j) = *beta * c___ref(i__, j);
00242 /* L60: */
00243                     }
00244                 }
00245                 i__2 = *k;
00246                 for (l = 1; l <= i__2; ++l) {
00247                     if (b_ref(l, j) != 0.) {
00248                         temp = *alpha * b_ref(l, j);
00249                         i__3 = *m;
00250                         for (i__ = 1; i__ <= i__3; ++i__) {
00251                             c___ref(i__, j) = c___ref(i__, j) + temp * a_ref(
00252                                     i__, l);
00253 /* L70: */
00254                         }
00255                     }
00256 /* L80: */
00257                 }
00258 /* L90: */
00259             }
00260         } else {
00261 /*           Form  C := alpha*A'*B + beta*C */
00262             i__1 = *n;
00263             for (j = 1; j <= i__1; ++j) {
00264                 i__2 = *m;
00265                 for (i__ = 1; i__ <= i__2; ++i__) {
00266                     temp = 0.;
00267                     i__3 = *k;
00268                     for (l = 1; l <= i__3; ++l) {
00269                         temp += a_ref(l, i__) * b_ref(l, j);
00270 /* L100: */
00271                     }
00272                     if (*beta == 0.) {
00273                         c___ref(i__, j) = *alpha * temp;
00274                     } else {
00275                         c___ref(i__, j) = *alpha * temp + *beta * c___ref(i__,
00276                                  j);
00277                     }
00278 /* L110: */
00279                 }
00280 /* L120: */
00281             }
00282         }
00283     } else {
00284         if (nota) {
00285 /*           Form  C := alpha*A*B' + beta*C */
00286             i__1 = *n;
00287             for (j = 1; j <= i__1; ++j) {
00288                 if (*beta == 0.) {
00289                     i__2 = *m;
00290                     for (i__ = 1; i__ <= i__2; ++i__) {
00291                         c___ref(i__, j) = 0.;
00292 /* L130: */
00293                     }
00294                 } else if (*beta != 1.) {
00295                     i__2 = *m;
00296                     for (i__ = 1; i__ <= i__2; ++i__) {
00297                         c___ref(i__, j) = *beta * c___ref(i__, j);
00298 /* L140: */
00299                     }
00300                 }
00301                 i__2 = *k;
00302                 for (l = 1; l <= i__2; ++l) {
00303                     if (b_ref(j, l) != 0.) {
00304                         temp = *alpha * b_ref(j, l);
00305                         i__3 = *m;
00306                         for (i__ = 1; i__ <= i__3; ++i__) {
00307                             c___ref(i__, j) = c___ref(i__, j) + temp * a_ref(
00308                                     i__, l);
00309 /* L150: */
00310                         }
00311                     }
00312 /* L160: */
00313                 }
00314 /* L170: */
00315             }
00316         } else {
00317 /*           Form  C := alpha*A'*B' + beta*C */
00318             i__1 = *n;
00319             for (j = 1; j <= i__1; ++j) {
00320                 i__2 = *m;
00321                 for (i__ = 1; i__ <= i__2; ++i__) {
00322                     temp = 0.;
00323                     i__3 = *k;
00324                     for (l = 1; l <= i__3; ++l) {
00325                         temp += a_ref(l, i__) * b_ref(j, l);
00326 /* L180: */
00327                     }
00328                     if (*beta == 0.) {
00329                         c___ref(i__, j) = *alpha * temp;
00330                     } else {
00331                         c___ref(i__, j) = *alpha * temp + *beta * c___ref(i__,
00332                                  j);
00333                     }
00334 /* L190: */
00335                 }
00336 /* L200: */
00337             }
00338         }
00339     }
00340     return 0;
00341 /*     End of DGEMM . */
00342 } /* dgemm_ */
00343 #undef c___ref
00344 #undef b_ref
00345 #undef a_ref
00346 
00347 #endif

Generated on 21 Nov 2012 for ergo by  doxygen 1.6.1