00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_BLAS_SYMM_HEADER 00036 #define TEMPLATE_BLAS_SYMM_HEADER 00037 00038 00039 template<class Treal> 00040 int template_blas_symm(const char *side, const char *uplo, const integer *m, const integer *n, 00041 const Treal *alpha, const Treal *a, const integer *lda, const Treal *b, 00042 const integer *ldb, const Treal *beta, Treal *c__, const integer *ldc) 00043 { 00044 /* System generated locals */ 00045 integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 00046 i__3; 00047 /* Local variables */ 00048 integer info; 00049 Treal temp1, temp2; 00050 integer i__, j, k; 00051 integer nrowa; 00052 logical upper; 00053 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 00054 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] 00055 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1] 00056 /* Purpose 00057 ======= 00058 DSYMM performs one of the matrix-matrix operations 00059 C := alpha*A*B + beta*C, 00060 or 00061 C := alpha*B*A + beta*C, 00062 where alpha and beta are scalars, A is a symmetric matrix and B and 00063 C are m by n matrices. 00064 Parameters 00065 ========== 00066 SIDE - CHARACTER*1. 00067 On entry, SIDE specifies whether the symmetric matrix A 00068 appears on the left or right in the operation as follows: 00069 SIDE = 'L' or 'l' C := alpha*A*B + beta*C, 00070 SIDE = 'R' or 'r' C := alpha*B*A + beta*C, 00071 Unchanged on exit. 00072 UPLO - CHARACTER*1. 00073 On entry, UPLO specifies whether the upper or lower 00074 triangular part of the symmetric matrix A is to be 00075 referenced as follows: 00076 UPLO = 'U' or 'u' Only the upper triangular part of the 00077 symmetric matrix is to be referenced. 00078 UPLO = 'L' or 'l' Only the lower triangular part of the 00079 symmetric matrix is to be referenced. 00080 Unchanged on exit. 00081 M - INTEGER. 00082 On entry, M specifies the number of rows of the matrix C. 00083 M must be at least zero. 00084 Unchanged on exit. 00085 N - INTEGER. 00086 On entry, N specifies the number of columns of the matrix C. 00087 N must be at least zero. 00088 Unchanged on exit. 00089 ALPHA - DOUBLE PRECISION. 00090 On entry, ALPHA specifies the scalar alpha. 00091 Unchanged on exit. 00092 A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is 00093 m when SIDE = 'L' or 'l' and is n otherwise. 00094 Before entry with SIDE = 'L' or 'l', the m by m part of 00095 the array A must contain the symmetric matrix, such that 00096 when UPLO = 'U' or 'u', the leading m by m upper triangular 00097 part of the array A must contain the upper triangular part 00098 of the symmetric matrix and the strictly lower triangular 00099 part of A is not referenced, and when UPLO = 'L' or 'l', 00100 the leading m by m lower triangular part of the array A 00101 must contain the lower triangular part of the symmetric 00102 matrix and the strictly upper triangular part of A is not 00103 referenced. 00104 Before entry with SIDE = 'R' or 'r', the n by n part of 00105 the array A must contain the symmetric matrix, such that 00106 when UPLO = 'U' or 'u', the leading n by n upper triangular 00107 part of the array A must contain the upper triangular part 00108 of the symmetric matrix and the strictly lower triangular 00109 part of A is not referenced, and when UPLO = 'L' or 'l', 00110 the leading n by n lower triangular part of the array A 00111 must contain the lower triangular part of the symmetric 00112 matrix and the strictly upper triangular part of A is not 00113 referenced. 00114 Unchanged on exit. 00115 LDA - INTEGER. 00116 On entry, LDA specifies the first dimension of A as declared 00117 in the calling (sub) program. When SIDE = 'L' or 'l' then 00118 LDA must be at least max( 1, m ), otherwise LDA must be at 00119 least max( 1, n ). 00120 Unchanged on exit. 00121 B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). 00122 Before entry, the leading m by n part of the array B must 00123 contain the matrix B. 00124 Unchanged on exit. 00125 LDB - INTEGER. 00126 On entry, LDB specifies the first dimension of B as declared 00127 in the calling (sub) program. LDB must be at least 00128 max( 1, m ). 00129 Unchanged on exit. 00130 BETA - DOUBLE PRECISION. 00131 On entry, BETA specifies the scalar beta. When BETA is 00132 supplied as zero then C need not be set on input. 00133 Unchanged on exit. 00134 C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). 00135 Before entry, the leading m by n part of the array C must 00136 contain the matrix C, except when beta is zero, in which 00137 case C need not be set on entry. 00138 On exit, the array C is overwritten by the m by n updated 00139 matrix. 00140 LDC - INTEGER. 00141 On entry, LDC specifies the first dimension of C as declared 00142 in the calling (sub) program. LDC must be at least 00143 max( 1, m ). 00144 Unchanged on exit. 00145 Level 3 Blas routine. 00146 -- Written on 8-February-1989. 00147 Jack Dongarra, Argonne National Laboratory. 00148 Iain Duff, AERE Harwell. 00149 Jeremy Du Croz, Numerical Algorithms Group Ltd. 00150 Sven Hammarling, Numerical Algorithms Group Ltd. 00151 Set NROWA as the number of rows of A. 00152 Parameter adjustments */ 00153 a_dim1 = *lda; 00154 a_offset = 1 + a_dim1 * 1; 00155 a -= a_offset; 00156 b_dim1 = *ldb; 00157 b_offset = 1 + b_dim1 * 1; 00158 b -= b_offset; 00159 c_dim1 = *ldc; 00160 c_offset = 1 + c_dim1 * 1; 00161 c__ -= c_offset; 00162 /* Function Body */ 00163 if (template_blas_lsame(side, "L")) { 00164 nrowa = *m; 00165 } else { 00166 nrowa = *n; 00167 } 00168 upper = template_blas_lsame(uplo, "U"); 00169 /* Test the input parameters. */ 00170 info = 0; 00171 if (! template_blas_lsame(side, "L") && ! template_blas_lsame(side, "R")) { 00172 info = 1; 00173 } else if (! upper && ! template_blas_lsame(uplo, "L")) { 00174 info = 2; 00175 } else if (*m < 0) { 00176 info = 3; 00177 } else if (*n < 0) { 00178 info = 4; 00179 } else if (*lda < maxMACRO(1,nrowa)) { 00180 info = 7; 00181 } else if (*ldb < maxMACRO(1,*m)) { 00182 info = 9; 00183 } else if (*ldc < maxMACRO(1,*m)) { 00184 info = 12; 00185 } 00186 if (info != 0) { 00187 template_blas_erbla("SYMM ", &info); 00188 return 0; 00189 } 00190 /* Quick return if possible. */ 00191 if (*m == 0 || *n == 0 || ( *alpha == 0. && *beta == 1. ) ) { 00192 return 0; 00193 } 00194 /* And when alpha.eq.zero. */ 00195 if (*alpha == 0.) { 00196 if (*beta == 0.) { 00197 i__1 = *n; 00198 for (j = 1; j <= i__1; ++j) { 00199 i__2 = *m; 00200 for (i__ = 1; i__ <= i__2; ++i__) { 00201 c___ref(i__, j) = 0.; 00202 /* L10: */ 00203 } 00204 /* L20: */ 00205 } 00206 } else { 00207 i__1 = *n; 00208 for (j = 1; j <= i__1; ++j) { 00209 i__2 = *m; 00210 for (i__ = 1; i__ <= i__2; ++i__) { 00211 c___ref(i__, j) = *beta * c___ref(i__, j); 00212 /* L30: */ 00213 } 00214 /* L40: */ 00215 } 00216 } 00217 return 0; 00218 } 00219 /* Start the operations. */ 00220 if (template_blas_lsame(side, "L")) { 00221 /* Form C := alpha*A*B + beta*C. */ 00222 if (upper) { 00223 i__1 = *n; 00224 for (j = 1; j <= i__1; ++j) { 00225 i__2 = *m; 00226 for (i__ = 1; i__ <= i__2; ++i__) { 00227 temp1 = *alpha * b_ref(i__, j); 00228 temp2 = 0.; 00229 i__3 = i__ - 1; 00230 for (k = 1; k <= i__3; ++k) { 00231 c___ref(k, j) = c___ref(k, j) + temp1 * a_ref(k, i__); 00232 temp2 += b_ref(k, j) * a_ref(k, i__); 00233 /* L50: */ 00234 } 00235 if (*beta == 0.) { 00236 c___ref(i__, j) = temp1 * a_ref(i__, i__) + *alpha * 00237 temp2; 00238 } else { 00239 c___ref(i__, j) = *beta * c___ref(i__, j) + temp1 * 00240 a_ref(i__, i__) + *alpha * temp2; 00241 } 00242 /* L60: */ 00243 } 00244 /* L70: */ 00245 } 00246 } else { 00247 i__1 = *n; 00248 for (j = 1; j <= i__1; ++j) { 00249 for (i__ = *m; i__ >= 1; --i__) { 00250 temp1 = *alpha * b_ref(i__, j); 00251 temp2 = 0.; 00252 i__2 = *m; 00253 for (k = i__ + 1; k <= i__2; ++k) { 00254 c___ref(k, j) = c___ref(k, j) + temp1 * a_ref(k, i__); 00255 temp2 += b_ref(k, j) * a_ref(k, i__); 00256 /* L80: */ 00257 } 00258 if (*beta == 0.) { 00259 c___ref(i__, j) = temp1 * a_ref(i__, i__) + *alpha * 00260 temp2; 00261 } else { 00262 c___ref(i__, j) = *beta * c___ref(i__, j) + temp1 * 00263 a_ref(i__, i__) + *alpha * temp2; 00264 } 00265 /* L90: */ 00266 } 00267 /* L100: */ 00268 } 00269 } 00270 } else { 00271 /* Form C := alpha*B*A + beta*C. */ 00272 i__1 = *n; 00273 for (j = 1; j <= i__1; ++j) { 00274 temp1 = *alpha * a_ref(j, j); 00275 if (*beta == 0.) { 00276 i__2 = *m; 00277 for (i__ = 1; i__ <= i__2; ++i__) { 00278 c___ref(i__, j) = temp1 * b_ref(i__, j); 00279 /* L110: */ 00280 } 00281 } else { 00282 i__2 = *m; 00283 for (i__ = 1; i__ <= i__2; ++i__) { 00284 c___ref(i__, j) = *beta * c___ref(i__, j) + temp1 * b_ref( 00285 i__, j); 00286 /* L120: */ 00287 } 00288 } 00289 i__2 = j - 1; 00290 for (k = 1; k <= i__2; ++k) { 00291 if (upper) { 00292 temp1 = *alpha * a_ref(k, j); 00293 } else { 00294 temp1 = *alpha * a_ref(j, k); 00295 } 00296 i__3 = *m; 00297 for (i__ = 1; i__ <= i__3; ++i__) { 00298 c___ref(i__, j) = c___ref(i__, j) + temp1 * b_ref(i__, k); 00299 /* L130: */ 00300 } 00301 /* L140: */ 00302 } 00303 i__2 = *n; 00304 for (k = j + 1; k <= i__2; ++k) { 00305 if (upper) { 00306 temp1 = *alpha * a_ref(j, k); 00307 } else { 00308 temp1 = *alpha * a_ref(k, j); 00309 } 00310 i__3 = *m; 00311 for (i__ = 1; i__ <= i__3; ++i__) { 00312 c___ref(i__, j) = c___ref(i__, j) + temp1 * b_ref(i__, k); 00313 /* L150: */ 00314 } 00315 /* L160: */ 00316 } 00317 /* L170: */ 00318 } 00319 } 00320 return 0; 00321 /* End of DSYMM . */ 00322 } /* dsymm_ */ 00323 #undef c___ref 00324 #undef b_ref 00325 #undef a_ref 00326 00327 #endif