template_lapack_geqrf.h

Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_LAPACK_GEQRF_HEADER
00036 #define TEMPLATE_LAPACK_GEQRF_HEADER
00037 
00038 
00039 template<class Treal>
00040 int template_lapack_geqrf(const integer *m, const integer *n, Treal *a, const integer *
00041         lda, Treal *tau, Treal *work, const integer *lwork, integer *info)
00042 {
00043 /*  -- LAPACK routine (version 3.0) --   
00044        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
00045        Courant Institute, Argonne National Lab, and Rice University   
00046        June 30, 1999   
00047 
00048 
00049     Purpose   
00050     =======   
00051 
00052     DGEQRF computes a QR factorization of a real M-by-N matrix A:   
00053     A = Q * R.   
00054 
00055     Arguments   
00056     =========   
00057 
00058     M       (input) INTEGER   
00059             The number of rows of the matrix A.  M >= 0.   
00060 
00061     N       (input) INTEGER   
00062             The number of columns of the matrix A.  N >= 0.   
00063 
00064     A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)   
00065             On entry, the M-by-N matrix A.   
00066             On exit, the elements on and above the diagonal of the array   
00067             contain the min(M,N)-by-N upper trapezoidal matrix R (R is   
00068             upper triangular if m >= n); the elements below the diagonal,   
00069             with the array TAU, represent the orthogonal matrix Q as a   
00070             product of min(m,n) elementary reflectors (see Further   
00071             Details).   
00072 
00073     LDA     (input) INTEGER   
00074             The leading dimension of the array A.  LDA >= max(1,M).   
00075 
00076     TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))   
00077             The scalar factors of the elementary reflectors (see Further   
00078             Details).   
00079 
00080     WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)   
00081             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   
00082 
00083     LWORK   (input) INTEGER   
00084             The dimension of the array WORK.  LWORK >= max(1,N).   
00085             For optimum performance LWORK >= N*NB, where NB is   
00086             the optimal blocksize.   
00087 
00088             If LWORK = -1, then a workspace query is assumed; the routine   
00089             only calculates the optimal size of the WORK array, returns   
00090             this value as the first entry of the WORK array, and no error   
00091             message related to LWORK is issued by XERBLA.   
00092 
00093     INFO    (output) INTEGER   
00094             = 0:  successful exit   
00095             < 0:  if INFO = -i, the i-th argument had an illegal value   
00096 
00097     Further Details   
00098     ===============   
00099 
00100     The matrix Q is represented as a product of elementary reflectors   
00101 
00102        Q = H(1) H(2) . . . H(k), where k = min(m,n).   
00103 
00104     Each H(i) has the form   
00105 
00106        H(i) = I - tau * v * v'   
00107 
00108     where tau is a real scalar, and v is a real vector with   
00109     v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),   
00110     and tau in TAU(i).   
00111 
00112     =====================================================================   
00113 
00114 
00115        Test the input arguments   
00116 
00117        Parameter adjustments */
00118     /* Table of constant values */
00119      integer c__1 = 1;
00120      integer c_n1 = -1;
00121      integer c__3 = 3;
00122      integer c__2 = 2;
00123     
00124     /* System generated locals */
00125     integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
00126     /* Local variables */
00127      integer i__, k, nbmin, iinfo;
00128      integer ib, nb;
00129      integer nx;
00130      integer ldwork, lwkopt;
00131      logical lquery;
00132      integer iws;
00133 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
00134 
00135 
00136     a_dim1 = *lda;
00137     a_offset = 1 + a_dim1 * 1;
00138     a -= a_offset;
00139     --tau;
00140     --work;
00141 
00142     /* Function Body */
00143     *info = 0;
00144     nb = template_lapack_ilaenv(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
00145             1);
00146     lwkopt = *n * nb;
00147     work[1] = (Treal) lwkopt;
00148     lquery = *lwork == -1;
00149     if (*m < 0) {
00150         *info = -1;
00151     } else if (*n < 0) {
00152         *info = -2;
00153     } else if (*lda < maxMACRO(1,*m)) {
00154         *info = -4;
00155     } else if (*lwork < maxMACRO(1,*n) && ! lquery) {
00156         *info = -7;
00157     }
00158     if (*info != 0) {
00159         i__1 = -(*info);
00160         template_blas_erbla("GEQRF ", &i__1);
00161         return 0;
00162     } else if (lquery) {
00163         return 0;
00164     }
00165 
00166 /*     Quick return if possible */
00167 
00168     k = minMACRO(*m,*n);
00169     if (k == 0) {
00170         work[1] = 1.;
00171         return 0;
00172     }
00173 
00174     nbmin = 2;
00175     nx = 0;
00176     iws = *n;
00177     if (nb > 1 && nb < k) {
00178 
00179 /*        Determine when to cross over from blocked to unblocked code.   
00180 
00181    Computing MAX */
00182         i__1 = 0, i__2 = template_lapack_ilaenv(&c__3, "DGEQRF", " ", m, n, &c_n1, &c_n1, (
00183                 ftnlen)6, (ftnlen)1);
00184         nx = maxMACRO(i__1,i__2);
00185         if (nx < k) {
00186 
00187 /*           Determine if workspace is large enough for blocked code. */
00188 
00189             ldwork = *n;
00190             iws = ldwork * nb;
00191             if (*lwork < iws) {
00192 
00193 /*              Not enough workspace to use optimal NB:  reduce NB and   
00194                 determine the minimum value of NB. */
00195 
00196                 nb = *lwork / ldwork;
00197 /* Computing MAX */
00198                 i__1 = 2, i__2 = template_lapack_ilaenv(&c__2, "DGEQRF", " ", m, n, &c_n1, &
00199                         c_n1, (ftnlen)6, (ftnlen)1);
00200                 nbmin = maxMACRO(i__1,i__2);
00201             }
00202         }
00203     }
00204 
00205     if (nb >= nbmin && nb < k && nx < k) {
00206 
00207 /*        Use blocked code initially */
00208 
00209         i__1 = k - nx;
00210         i__2 = nb;
00211         for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
00212 /* Computing MIN */
00213             i__3 = k - i__ + 1;
00214             ib = minMACRO(i__3,nb);
00215 
00216 /*           Compute the QR factorization of the current block   
00217              A(i:m,i:i+ib-1) */
00218 
00219             i__3 = *m - i__ + 1;
00220             template_lapack_geqr2(&i__3, &ib, &a_ref(i__, i__), lda, &tau[i__], &work[1], &
00221                     iinfo);
00222             if (i__ + ib <= *n) {
00223 
00224 /*              Form the triangular factor of the block reflector   
00225                 H = H(i) H(i+1) . . . H(i+ib-1) */
00226 
00227                 i__3 = *m - i__ + 1;
00228                 template_lapack_larft("Forward", "Columnwise", &i__3, &ib, &a_ref(i__, i__),
00229                          lda, &tau[i__], &work[1], &ldwork);
00230 
00231 /*              Apply H' to A(i:m,i+ib:n) from the left */
00232 
00233                 i__3 = *m - i__ + 1;
00234                 i__4 = *n - i__ - ib + 1;
00235                 template_lapack_larfb("Left", "Transpose", "Forward", "Columnwise", &i__3, &
00236                         i__4, &ib, &a_ref(i__, i__), lda, &work[1], &ldwork, &
00237                         a_ref(i__, i__ + ib), lda, &work[ib + 1], &ldwork);
00238             }
00239 /* L10: */
00240         }
00241     } else {
00242         i__ = 1;
00243     }
00244 
00245 /*     Use unblocked code to factor the last or only block. */
00246 
00247     if (i__ <= k) {
00248         i__2 = *m - i__ + 1;
00249         i__1 = *n - i__ + 1;
00250         template_lapack_geqr2(&i__2, &i__1, &a_ref(i__, i__), lda, &tau[i__], &work[1], &
00251                 iinfo);
00252     }
00253 
00254     work[1] = (Treal) iws;
00255     return 0;
00256 
00257 /*     End of DGEQRF */
00258 
00259 } /* dgeqrf_ */
00260 
00261 #undef a_ref
00262 
00263 
00264 #endif

Generated on 21 Nov 2012 for ergo by  doxygen 1.6.1