template_blas_syr2k.h

Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_BLAS_SYR2K_HEADER
00036 #define TEMPLATE_BLAS_SYR2K_HEADER
00037 
00038 
00039 template<class Treal>
00040 int template_blas_syr2k(const char *uplo, const char *trans, const integer *n, 
00041                         const integer *k, const Treal *alpha, const Treal *a, 
00042                         const integer *lda, const Treal *b, const integer *ldb, 
00043                         const Treal *beta, Treal *c__, const integer *ldc)
00044 {
00045     /* System generated locals */
00046     integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
00047             i__3;
00048     /* Local variables */
00049      integer info;
00050      Treal temp1, temp2;
00051      integer i__, j, l;
00052      integer nrowa;
00053      logical upper;
00054 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
00055 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
00056 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
00057 /*  Purpose   
00058     =======   
00059     DSYR2K  performs one of the symmetric rank 2k operations   
00060        C := alpha*A*B' + alpha*B*A' + beta*C,   
00061     or   
00062        C := alpha*A'*B + alpha*B'*A + beta*C,   
00063     where  alpha and beta  are scalars, C is an  n by n  symmetric matrix   
00064     and  A and B  are  n by k  matrices  in the  first  case  and  k by n   
00065     matrices in the second case.   
00066     Parameters   
00067     ==========   
00068     UPLO   - CHARACTER*1.   
00069              On  entry,   UPLO  specifies  whether  the  upper  or  lower   
00070              triangular  part  of the  array  C  is to be  referenced  as   
00071              follows:   
00072                 UPLO = 'U' or 'u'   Only the  upper triangular part of  C   
00073                                     is to be referenced.   
00074                 UPLO = 'L' or 'l'   Only the  lower triangular part of  C   
00075                                     is to be referenced.   
00076              Unchanged on exit.   
00077     TRANS  - CHARACTER*1.   
00078              On entry,  TRANS  specifies the operation to be performed as   
00079              follows:   
00080                 TRANS = 'N' or 'n'   C := alpha*A*B' + alpha*B*A' +   
00081                                           beta*C.   
00082                 TRANS = 'T' or 't'   C := alpha*A'*B + alpha*B'*A +   
00083                                           beta*C.   
00084                 TRANS = 'C' or 'c'   C := alpha*A'*B + alpha*B'*A +   
00085                                           beta*C.   
00086              Unchanged on exit.   
00087     N      - INTEGER.   
00088              On entry,  N specifies the order of the matrix C.  N must be   
00089              at least zero.   
00090              Unchanged on exit.   
00091     K      - INTEGER.   
00092              On entry with  TRANS = 'N' or 'n',  K  specifies  the number   
00093              of  columns  of the  matrices  A and B,  and on  entry  with   
00094              TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number   
00095              of rows of the matrices  A and B.  K must be at least  zero.   
00096              Unchanged on exit.   
00097     ALPHA  - DOUBLE PRECISION.   
00098              On entry, ALPHA specifies the scalar alpha.   
00099              Unchanged on exit.   
00100     A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is   
00101              k  when  TRANS = 'N' or 'n',  and is  n  otherwise.   
00102              Before entry with  TRANS = 'N' or 'n',  the  leading  n by k   
00103              part of the array  A  must contain the matrix  A,  otherwise   
00104              the leading  k by n  part of the array  A  must contain  the   
00105              matrix A.   
00106              Unchanged on exit.   
00107     LDA    - INTEGER.   
00108              On entry, LDA specifies the first dimension of A as declared   
00109              in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'   
00110              then  LDA must be at least  max( 1, n ), otherwise  LDA must   
00111              be at least  max( 1, k ).   
00112              Unchanged on exit.   
00113     B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is   
00114              k  when  TRANS = 'N' or 'n',  and is  n  otherwise.   
00115              Before entry with  TRANS = 'N' or 'n',  the  leading  n by k   
00116              part of the array  B  must contain the matrix  B,  otherwise   
00117              the leading  k by n  part of the array  B  must contain  the   
00118              matrix B.   
00119              Unchanged on exit.   
00120     LDB    - INTEGER.   
00121              On entry, LDB specifies the first dimension of B as declared   
00122              in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'   
00123              then  LDB must be at least  max( 1, n ), otherwise  LDB must   
00124              be at least  max( 1, k ).   
00125              Unchanged on exit.   
00126     BETA   - DOUBLE PRECISION.   
00127              On entry, BETA specifies the scalar beta.   
00128              Unchanged on exit.   
00129     C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).   
00130              Before entry  with  UPLO = 'U' or 'u',  the leading  n by n   
00131              upper triangular part of the array C must contain the upper   
00132              triangular part  of the  symmetric matrix  and the strictly   
00133              lower triangular part of C is not referenced.  On exit, the   
00134              upper triangular part of the array  C is overwritten by the   
00135              upper triangular part of the updated matrix.   
00136              Before entry  with  UPLO = 'L' or 'l',  the leading  n by n   
00137              lower triangular part of the array C must contain the lower   
00138              triangular part  of the  symmetric matrix  and the strictly   
00139              upper triangular part of C is not referenced.  On exit, the   
00140              lower triangular part of the array  C is overwritten by the   
00141              lower triangular part of the updated matrix.   
00142     LDC    - INTEGER.   
00143              On entry, LDC specifies the first dimension of C as declared   
00144              in  the  calling  (sub)  program.   LDC  must  be  at  least   
00145              max( 1, n ).   
00146              Unchanged on exit.   
00147     Level 3 Blas routine.   
00148     -- Written on 8-February-1989.   
00149        Jack Dongarra, Argonne National Laboratory.   
00150        Iain Duff, AERE Harwell.   
00151        Jeremy Du Croz, Numerical Algorithms Group Ltd.   
00152        Sven Hammarling, Numerical Algorithms Group Ltd.   
00153        Test the input parameters.   
00154        Parameter adjustments */
00155     a_dim1 = *lda;
00156     a_offset = 1 + a_dim1 * 1;
00157     a -= a_offset;
00158     b_dim1 = *ldb;
00159     b_offset = 1 + b_dim1 * 1;
00160     b -= b_offset;
00161     c_dim1 = *ldc;
00162     c_offset = 1 + c_dim1 * 1;
00163     c__ -= c_offset;
00164     /* Function Body */
00165     if (template_blas_lsame(trans, "N")) {
00166         nrowa = *n;
00167     } else {
00168         nrowa = *k;
00169     }
00170     upper = template_blas_lsame(uplo, "U");
00171     info = 0;
00172     if (! upper && ! template_blas_lsame(uplo, "L")) {
00173         info = 1;
00174     } else if (! template_blas_lsame(trans, "N") && ! template_blas_lsame(trans, 
00175             "T") && ! template_blas_lsame(trans, "C")) {
00176         info = 2;
00177     } else if (*n < 0) {
00178         info = 3;
00179     } else if (*k < 0) {
00180         info = 4;
00181     } else if (*lda < maxMACRO(1,nrowa)) {
00182         info = 7;
00183     } else if (*ldb < maxMACRO(1,nrowa)) {
00184         info = 9;
00185     } else if (*ldc < maxMACRO(1,*n)) {
00186         info = 12;
00187     }
00188     if (info != 0) {
00189         template_blas_erbla("SYR2K ", &info);
00190         return 0;
00191     }
00192 /*     Quick return if possible. */
00193     if (*n == 0 || ( (*alpha == 0. || *k == 0) && *beta == 1. ) ) {
00194         return 0;
00195     }
00196 /*     And when  alpha.eq.zero. */
00197     if (*alpha == 0.) {
00198         if (upper) {
00199             if (*beta == 0.) {
00200                 i__1 = *n;
00201                 for (j = 1; j <= i__1; ++j) {
00202                     i__2 = j;
00203                     for (i__ = 1; i__ <= i__2; ++i__) {
00204                         c___ref(i__, j) = 0.;
00205 /* L10: */
00206                     }
00207 /* L20: */
00208                 }
00209             } else {
00210                 i__1 = *n;
00211                 for (j = 1; j <= i__1; ++j) {
00212                     i__2 = j;
00213                     for (i__ = 1; i__ <= i__2; ++i__) {
00214                         c___ref(i__, j) = *beta * c___ref(i__, j);
00215 /* L30: */
00216                     }
00217 /* L40: */
00218                 }
00219             }
00220         } else {
00221             if (*beta == 0.) {
00222                 i__1 = *n;
00223                 for (j = 1; j <= i__1; ++j) {
00224                     i__2 = *n;
00225                     for (i__ = j; i__ <= i__2; ++i__) {
00226                         c___ref(i__, j) = 0.;
00227 /* L50: */
00228                     }
00229 /* L60: */
00230                 }
00231             } else {
00232                 i__1 = *n;
00233                 for (j = 1; j <= i__1; ++j) {
00234                     i__2 = *n;
00235                     for (i__ = j; i__ <= i__2; ++i__) {
00236                         c___ref(i__, j) = *beta * c___ref(i__, j);
00237 /* L70: */
00238                     }
00239 /* L80: */
00240                 }
00241             }
00242         }
00243         return 0;
00244     }
00245 /*     Start the operations. */
00246     if (template_blas_lsame(trans, "N")) {
00247 /*        Form  C := alpha*A*B' + alpha*B*A' + C. */
00248         if (upper) {
00249             i__1 = *n;
00250             for (j = 1; j <= i__1; ++j) {
00251                 if (*beta == 0.) {
00252                     i__2 = j;
00253                     for (i__ = 1; i__ <= i__2; ++i__) {
00254                         c___ref(i__, j) = 0.;
00255 /* L90: */
00256                     }
00257                 } else if (*beta != 1.) {
00258                     i__2 = j;
00259                     for (i__ = 1; i__ <= i__2; ++i__) {
00260                         c___ref(i__, j) = *beta * c___ref(i__, j);
00261 /* L100: */
00262                     }
00263                 }
00264                 i__2 = *k;
00265                 for (l = 1; l <= i__2; ++l) {
00266                     if (a_ref(j, l) != 0. || b_ref(j, l) != 0.) {
00267                         temp1 = *alpha * b_ref(j, l);
00268                         temp2 = *alpha * a_ref(j, l);
00269                         i__3 = j;
00270                         for (i__ = 1; i__ <= i__3; ++i__) {
00271                             c___ref(i__, j) = c___ref(i__, j) + a_ref(i__, l) 
00272                                     * temp1 + b_ref(i__, l) * temp2;
00273 /* L110: */
00274                         }
00275                     }
00276 /* L120: */
00277                 }
00278 /* L130: */
00279             }
00280         } else {
00281             i__1 = *n;
00282             for (j = 1; j <= i__1; ++j) {
00283                 if (*beta == 0.) {
00284                     i__2 = *n;
00285                     for (i__ = j; i__ <= i__2; ++i__) {
00286                         c___ref(i__, j) = 0.;
00287 /* L140: */
00288                     }
00289                 } else if (*beta != 1.) {
00290                     i__2 = *n;
00291                     for (i__ = j; i__ <= i__2; ++i__) {
00292                         c___ref(i__, j) = *beta * c___ref(i__, j);
00293 /* L150: */
00294                     }
00295                 }
00296                 i__2 = *k;
00297                 for (l = 1; l <= i__2; ++l) {
00298                     if (a_ref(j, l) != 0. || b_ref(j, l) != 0.) {
00299                         temp1 = *alpha * b_ref(j, l);
00300                         temp2 = *alpha * a_ref(j, l);
00301                         i__3 = *n;
00302                         for (i__ = j; i__ <= i__3; ++i__) {
00303                             c___ref(i__, j) = c___ref(i__, j) + a_ref(i__, l) 
00304                                     * temp1 + b_ref(i__, l) * temp2;
00305 /* L160: */
00306                         }
00307                     }
00308 /* L170: */
00309                 }
00310 /* L180: */
00311             }
00312         }
00313     } else {
00314 /*        Form  C := alpha*A'*B + alpha*B'*A + C. */
00315         if (upper) {
00316             i__1 = *n;
00317             for (j = 1; j <= i__1; ++j) {
00318                 i__2 = j;
00319                 for (i__ = 1; i__ <= i__2; ++i__) {
00320                     temp1 = 0.;
00321                     temp2 = 0.;
00322                     i__3 = *k;
00323                     for (l = 1; l <= i__3; ++l) {
00324                         temp1 += a_ref(l, i__) * b_ref(l, j);
00325                         temp2 += b_ref(l, i__) * a_ref(l, j);
00326 /* L190: */
00327                     }
00328                     if (*beta == 0.) {
00329                         c___ref(i__, j) = *alpha * temp1 + *alpha * temp2;
00330                     } else {
00331                         c___ref(i__, j) = *beta * c___ref(i__, j) + *alpha * 
00332                                 temp1 + *alpha * temp2;
00333                     }
00334 /* L200: */
00335                 }
00336 /* L210: */
00337             }
00338         } else {
00339             i__1 = *n;
00340             for (j = 1; j <= i__1; ++j) {
00341                 i__2 = *n;
00342                 for (i__ = j; i__ <= i__2; ++i__) {
00343                     temp1 = 0.;
00344                     temp2 = 0.;
00345                     i__3 = *k;
00346                     for (l = 1; l <= i__3; ++l) {
00347                         temp1 += a_ref(l, i__) * b_ref(l, j);
00348                         temp2 += b_ref(l, i__) * a_ref(l, j);
00349 /* L220: */
00350                     }
00351                     if (*beta == 0.) {
00352                         c___ref(i__, j) = *alpha * temp1 + *alpha * temp2;
00353                     } else {
00354                         c___ref(i__, j) = *beta * c___ref(i__, j) + *alpha * 
00355                                 temp1 + *alpha * temp2;
00356                     }
00357 /* L230: */
00358                 }
00359 /* L240: */
00360             }
00361         }
00362     }
00363     return 0;
00364 /*     End of DSYR2K. */
00365 } /* dsyr2k_ */
00366 #undef c___ref
00367 #undef b_ref
00368 #undef a_ref
00369 
00370 #endif

Generated on 21 Nov 2012 for ergo by  doxygen 1.6.1