template_lapack_larre.h

Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_LAPACK_LARRE_HEADER
00036 #define TEMPLATE_LAPACK_LARRE_HEADER
00037 
00038 
00039 template<class Treal>
00040 int template_lapack_larre(const char *range, const integer *n, Treal *vl, 
00041         Treal *vu, integer *il, integer *iu, Treal *d__, Treal 
00042         *e, Treal *e2, Treal *rtol1, Treal *rtol2, Treal *
00043         spltol, integer *nsplit, integer *isplit, integer *m, Treal *w, 
00044         Treal *werr, Treal *wgap, integer *iblock, integer *indexw, 
00045         Treal *gers, Treal *pivmin, Treal *work, integer *
00046         iwork, integer *info)
00047 {
00048     /* System generated locals */
00049     integer i__1, i__2;
00050     Treal d__1, d__2, d__3;
00051 
00052 
00053     /* Local variables */
00054     integer i__, j;
00055     Treal s1, s2;
00056     integer mb = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
00057     Treal gl;
00058     integer in, mm;
00059     Treal gu;
00060     integer cnt;
00061     Treal eps, tau, tmp, rtl;
00062     integer cnt1, cnt2;
00063     Treal tmp1, eabs;
00064     integer iend, jblk;
00065     Treal eold;
00066     integer indl;
00067     Treal dmax__, emax;
00068     integer wend = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
00069     integer idum, indu;
00070     Treal rtol;
00071     integer iseed[4];
00072     Treal avgap, sigma;
00073     integer iinfo;
00074     logical norep;
00075     integer ibegin;
00076     logical forceb;
00077     integer irange = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
00078     Treal sgndef;
00079     integer wbegin;
00080     Treal safmin, spdiam;
00081     logical usedqd;
00082     Treal clwdth, isleft;
00083     Treal isrght, bsrtol, dpivot;
00084 
00085 
00086 /*  -- LAPACK auxiliary routine (version 3.2) -- */
00087 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
00088 /*     November 2006 */
00089 
00090 /*     .. Scalar Arguments .. */
00091 /*     .. */
00092 /*     .. Array Arguments .. */
00093 /*     .. */
00094 
00095 /*  Purpose */
00096 /*  ======= */
00097 
00098 /*  To find the desired eigenvalues of a given real symmetric */
00099 /*  tridiagonal matrix T, DLARRE sets any "small" off-diagonal */
00100 /*  elements to zero, and for each unreduced block T_i, it finds */
00101 /*  (a) a suitable shift at one end of the block's spectrum, */
00102 /*  (b) the base representation, T_i - sigma_i I = L_i D_i L_i^T, and */
00103 /*  (c) eigenvalues of each L_i D_i L_i^T. */
00104 /*  The representations and eigenvalues found are then used by */
00105 /*  DSTEMR to compute the eigenvectors of T. */
00106 /*  The accuracy varies depending on whether bisection is used to */
00107 /*  find a few eigenvalues or the dqds algorithm (subroutine DLASQ2) to */
00108 /*  conpute all and then discard any unwanted one. */
00109 /*  As an added benefit, DLARRE also outputs the n */
00110 /*  Gerschgorin intervals for the matrices L_i D_i L_i^T. */
00111 
00112 /*  Arguments */
00113 /*  ========= */
00114 
00115 /*  RANGE   (input) CHARACTER */
00116 /*          = 'A': ("All")   all eigenvalues will be found. */
00117 /*          = 'V': ("Value") all eigenvalues in the half-open interval */
00118 /*                           (VL, VU] will be found. */
00119 /*          = 'I': ("Index") the IL-th through IU-th eigenvalues (of the */
00120 /*                           entire matrix) will be found. */
00121 
00122 /*  N       (input) INTEGER */
00123 /*          The order of the matrix. N > 0. */
00124 
00125 /*  VL      (input/output) DOUBLE PRECISION */
00126 /*  VU      (input/output) DOUBLE PRECISION */
00127 /*          If RANGE='V', the lower and upper bounds for the eigenvalues. */
00128 /*          Eigenvalues less than or equal to VL, or greater than VU, */
00129 /*          will not be returned.  VL < VU. */
00130 /*          If RANGE='I' or ='A', DLARRE computes bounds on the desired */
00131 /*          part of the spectrum. */
00132 
00133 /*  IL      (input) INTEGER */
00134 /*  IU      (input) INTEGER */
00135 /*          If RANGE='I', the indices (in ascending order) of the */
00136 /*          smallest and largest eigenvalues to be returned. */
00137 /*          1 <= IL <= IU <= N. */
00138 
00139 /*  D       (input/output) DOUBLE PRECISION array, dimension (N) */
00140 /*          On entry, the N diagonal elements of the tridiagonal */
00141 /*          matrix T. */
00142 /*          On exit, the N diagonal elements of the diagonal */
00143 /*          matrices D_i. */
00144 
00145 /*  E       (input/output) DOUBLE PRECISION array, dimension (N) */
00146 /*          On entry, the first (N-1) entries contain the subdiagonal */
00147 /*          elements of the tridiagonal matrix T; E(N) need not be set. */
00148 /*          On exit, E contains the subdiagonal elements of the unit */
00149 /*          bidiagonal matrices L_i. The entries E( ISPLIT( I ) ), */
00150 /*          1 <= I <= NSPLIT, contain the base points sigma_i on output. */
00151 
00152 /*  E2      (input/output) DOUBLE PRECISION array, dimension (N) */
00153 /*          On entry, the first (N-1) entries contain the SQUARES of the */
00154 /*          subdiagonal elements of the tridiagonal matrix T; */
00155 /*          E2(N) need not be set. */
00156 /*          On exit, the entries E2( ISPLIT( I ) ), */
00157 /*          1 <= I <= NSPLIT, have been set to zero */
00158 
00159 /*  RTOL1   (input) DOUBLE PRECISION */
00160 /*  RTOL2   (input) DOUBLE PRECISION */
00161 /*           Parameters for bisection. */
00162 /*           An interval [LEFT,RIGHT] has converged if */
00163 /*           RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */
00164 
00165 /*  SPLTOL (input) DOUBLE PRECISION */
00166 /*          The threshold for splitting. */
00167 
00168 /*  NSPLIT  (output) INTEGER */
00169 /*          The number of blocks T splits into. 1 <= NSPLIT <= N. */
00170 
00171 /*  ISPLIT  (output) INTEGER array, dimension (N) */
00172 /*          The splitting points, at which T breaks up into blocks. */
00173 /*          The first block consists of rows/columns 1 to ISPLIT(1), */
00174 /*          the second of rows/columns ISPLIT(1)+1 through ISPLIT(2), */
00175 /*          etc., and the NSPLIT-th consists of rows/columns */
00176 /*          ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N. */
00177 
00178 /*  M       (output) INTEGER */
00179 /*          The total number of eigenvalues (of all L_i D_i L_i^T) */
00180 /*          found. */
00181 
00182 /*  W       (output) DOUBLE PRECISION array, dimension (N) */
00183 /*          The first M elements contain the eigenvalues. The */
00184 /*          eigenvalues of each of the blocks, L_i D_i L_i^T, are */
00185 /*          sorted in ascending order ( DLARRE may use the */
00186 /*          remaining N-M elements as workspace). */
00187 
00188 /*  WERR    (output) DOUBLE PRECISION array, dimension (N) */
00189 /*          The error bound on the corresponding eigenvalue in W. */
00190 
00191 /*  WGAP    (output) DOUBLE PRECISION array, dimension (N) */
00192 /*          The separation from the right neighbor eigenvalue in W. */
00193 /*          The gap is only with respect to the eigenvalues of the same block */
00194 /*          as each block has its own representation tree. */
00195 /*          Exception: at the right end of a block we store the left gap */
00196 
00197 /*  IBLOCK  (output) INTEGER array, dimension (N) */
00198 /*          The indices of the blocks (submatrices) associated with the */
00199 /*          corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */
00200 /*          W(i) belongs to the first block from the top, =2 if W(i) */
00201 /*          belongs to the second block, etc. */
00202 
00203 /*  INDEXW  (output) INTEGER array, dimension (N) */
00204 /*          The indices of the eigenvalues within each block (submatrix); */
00205 /*          for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */
00206 /*          i-th eigenvalue W(i) is the 10-th eigenvalue in block 2 */
00207 
00208 /*  GERS    (output) DOUBLE PRECISION array, dimension (2*N) */
00209 /*          The N Gerschgorin intervals (the i-th Gerschgorin interval */
00210 /*          is (GERS(2*i-1), GERS(2*i)). */
00211 
00212 /*  PIVMIN  (output) DOUBLE PRECISION */
00213 /*          The minimum pivot in the Sturm sequence for T. */
00214 
00215 /*  WORK    (workspace) DOUBLE PRECISION array, dimension (6*N) */
00216 /*          Workspace. */
00217 
00218 /*  IWORK   (workspace) INTEGER array, dimension (5*N) */
00219 /*          Workspace. */
00220 
00221 /*  INFO    (output) INTEGER */
00222 /*          = 0:  successful exit */
00223 /*          > 0:  A problem occured in DLARRE. */
00224 /*          < 0:  One of the called subroutines signaled an internal problem. */
00225 /*                Needs inspection of the corresponding parameter IINFO */
00226 /*                for further information. */
00227 
00228 /*          =-1:  Problem in DLARRD. */
00229 /*          = 2:  No base representation could be found in MAXTRY iterations. */
00230 /*                Increasing MAXTRY and recompilation might be a remedy. */
00231 /*          =-3:  Problem in DLARRB when computing the refined root */
00232 /*                representation for DLASQ2. */
00233 /*          =-4:  Problem in DLARRB when preforming bisection on the */
00234 /*                desired part of the spectrum. */
00235 /*          =-5:  Problem in DLASQ2. */
00236 /*          =-6:  Problem in DLASQ2. */
00237 
00238 /*  Further Details */
00239 /*  The base representations are required to suffer very little */
00240 /*  element growth and consequently define all their eigenvalues to */
00241 /*  high relative accuracy. */
00242 /*  =============== */
00243 
00244 /*  Based on contributions by */
00245 /*     Beresford Parlett, University of California, Berkeley, USA */
00246 /*     Jim Demmel, University of California, Berkeley, USA */
00247 /*     Inderjit Dhillon, University of Texas, Austin, USA */
00248 /*     Osni Marques, LBNL/NERSC, USA */
00249 /*     Christof Voemel, University of California, Berkeley, USA */
00250 
00251 /*  ===================================================================== */
00252 
00253 /*     .. Parameters .. */
00254 /*     .. */
00255 /*     .. Local Scalars .. */
00256 /*     .. */
00257 /*     .. Local Arrays .. */
00258 /*     .. */
00259 /*     .. External Functions .. */
00260 /*     .. */
00261 /*     .. External Subroutines .. */
00262 /*     .. */
00263 /*     .. Intrinsic Functions .. */
00264 /*     .. */
00265 /*     .. Executable Statements .. */
00266 
00267     /* Parameter adjustments */
00268 
00269 
00270     /* Table of constant values */
00271     
00272     integer c__1 = 1;
00273     integer c__2 = 2;
00274     
00275 
00276     --iwork;
00277     --work;
00278     --gers;
00279     --indexw;
00280     --iblock;
00281     --wgap;
00282     --werr;
00283     --w;
00284     --isplit;
00285     --e2;
00286     --e;
00287     --d__;
00288 
00289     /* Initialization added by Elias to get rid of compiler warnings. */
00290     mm = 0;
00291     /* Function Body */
00292     *info = 0;
00293 
00294 /*     Decode RANGE */
00295 
00296     if (template_blas_lsame(range, "A")) {
00297         irange = 1;
00298     } else if (template_blas_lsame(range, "V")) {
00299         irange = 3;
00300     } else if (template_blas_lsame(range, "I")) {
00301         irange = 2;
00302     }
00303     *m = 0;
00304 /*     Get machine constants */
00305     safmin = template_lapack_lamch("S",(Treal)0);
00306     eps = template_lapack_lamch("P",(Treal)0);
00307 /*     Set parameters */
00308     rtl = template_blas_sqrt(eps);
00309     bsrtol = template_blas_sqrt(eps);
00310 /*     Treat case of 1x1 matrix for quick return */
00311     if (*n == 1) {
00312       if (irange == 1 || ( irange == 3 && d__[1] > *vl && d__[1] <= *vu ) || 
00313           ( irange == 2 && *il == 1 && *iu == 1 ) ) {
00314             *m = 1;
00315             w[1] = d__[1];
00316 /*           The computation error of the eigenvalue is zero */
00317             werr[1] = 0.;
00318             wgap[1] = 0.;
00319             iblock[1] = 1;
00320             indexw[1] = 1;
00321             gers[1] = d__[1];
00322             gers[2] = d__[1];
00323         }
00324 /*        store the shift for the initial RRR, which is zero in this case */
00325         e[1] = 0.;
00326         return 0;
00327     }
00328 /*     General case: tridiagonal matrix of order > 1 */
00329 
00330 /*     Init WERR, WGAP. Compute Gerschgorin intervals and spectral diameter. */
00331 /*     Compute maximum off-diagonal entry and pivmin. */
00332     gl = d__[1];
00333     gu = d__[1];
00334     eold = 0.;
00335     emax = 0.;
00336     e[*n] = 0.;
00337     i__1 = *n;
00338     for (i__ = 1; i__ <= i__1; ++i__) {
00339         werr[i__] = 0.;
00340         wgap[i__] = 0.;
00341         eabs = (d__1 = e[i__], absMACRO(d__1));
00342         if (eabs >= emax) {
00343             emax = eabs;
00344         }
00345         tmp1 = eabs + eold;
00346         gers[(i__ << 1) - 1] = d__[i__] - tmp1;
00347 /* Computing MIN */
00348         d__1 = gl, d__2 = gers[(i__ << 1) - 1];
00349         gl = minMACRO(d__1,d__2);
00350         gers[i__ * 2] = d__[i__] + tmp1;
00351 /* Computing MAX */
00352         d__1 = gu, d__2 = gers[i__ * 2];
00353         gu = maxMACRO(d__1,d__2);
00354         eold = eabs;
00355 /* L5: */
00356     }
00357 /*     The minimum pivot allowed in the Sturm sequence for T */
00358 /* Computing MAX */
00359 /* Computing 2nd power */
00360     d__3 = emax;
00361     d__1 = 1., d__2 = d__3 * d__3;
00362     *pivmin = safmin * maxMACRO(d__1,d__2);
00363 /*     Compute spectral diameter. The Gerschgorin bounds give an */
00364 /*     estimate that is wrong by at most a factor of SQRT(2) */
00365     spdiam = gu - gl;
00366 /*     Compute splitting points */
00367     template_lapack_larra(n, &d__[1], &e[1], &e2[1], spltol, &spdiam, nsplit, &isplit[1], &
00368             iinfo);
00369 /*     Can force use of bisection instead of faster DQDS. */
00370 /*     Option left in the code for future multisection work. */
00371     forceb = FALSE_;
00372 /*     Initialize USEDQD, DQDS should be used for ALLRNG unless someone */
00373 /*     explicitly wants bisection. */
00374     usedqd = irange == 1 && ! forceb;
00375     if (irange == 1 && ! forceb) {
00376 /*        Set interval [VL,VU] that contains all eigenvalues */
00377         *vl = gl;
00378         *vu = gu;
00379     } else {
00380 /*        We call DLARRD to find crude approximations to the eigenvalues */
00381 /*        in the desired range. In case IRANGE = INDRNG, we also obtain the */
00382 /*        interval (VL,VU] that contains all the wanted eigenvalues. */
00383 /*        An interval [LEFT,RIGHT] has converged if */
00384 /*        RIGHT-LEFT.LT.RTOL*MAX(ABS(LEFT),ABS(RIGHT)) */
00385 /*        DLARRD needs a WORK of size 4*N, IWORK of size 3*N */
00386         template_lapack_larrd(range, "B", n, vl, vu, il, iu, &gers[1], &bsrtol, &d__[1], &e[
00387                 1], &e2[1], pivmin, nsplit, &isplit[1], &mm, &w[1], &werr[1], 
00388                 vl, vu, &iblock[1], &indexw[1], &work[1], &iwork[1], &iinfo);
00389         if (iinfo != 0) {
00390             *info = -1;
00391             return 0;
00392         }
00393 /*        Make sure that the entries M+1 to N in W, WERR, IBLOCK, INDEXW are 0 */
00394         i__1 = *n;
00395         for (i__ = mm + 1; i__ <= i__1; ++i__) {
00396             w[i__] = 0.;
00397             werr[i__] = 0.;
00398             iblock[i__] = 0;
00399             indexw[i__] = 0;
00400 /* L14: */
00401         }
00402     }
00403 /* ** */
00404 /*     Loop over unreduced blocks */
00405     ibegin = 1;
00406     wbegin = 1;
00407     i__1 = *nsplit;
00408     for (jblk = 1; jblk <= i__1; ++jblk) {
00409         iend = isplit[jblk];
00410         in = iend - ibegin + 1;
00411 /*        1 X 1 block */
00412         if (in == 1) {
00413           if (irange == 1 || ( irange == 3 && d__[ibegin] > *vl && d__[ibegin]
00414                                <= *vu ) || ( irange == 2 && iblock[wbegin] == jblk ) ) {
00415                 ++(*m);
00416                 w[*m] = d__[ibegin];
00417                 werr[*m] = 0.;
00418 /*              The gap for a single block doesn't matter for the later */
00419 /*              algorithm and is assigned an arbitrary large value */
00420                 wgap[*m] = 0.;
00421                 iblock[*m] = jblk;
00422                 indexw[*m] = 1;
00423                 ++wbegin;
00424             }
00425 /*           E( IEND ) holds the shift for the initial RRR */
00426             e[iend] = 0.;
00427             ibegin = iend + 1;
00428             goto L170;
00429         }
00430 
00431 /*        Blocks of size larger than 1x1 */
00432 
00433 /*        E( IEND ) will hold the shift for the initial RRR, for now set it =0 */
00434         e[iend] = 0.;
00435 
00436 /*        Find local outer bounds GL,GU for the block */
00437         gl = d__[ibegin];
00438         gu = d__[ibegin];
00439         i__2 = iend;
00440         for (i__ = ibegin; i__ <= i__2; ++i__) {
00441 /* Computing MIN */
00442             d__1 = gers[(i__ << 1) - 1];
00443             gl = minMACRO(d__1,gl);
00444 /* Computing MAX */
00445             d__1 = gers[i__ * 2];
00446             gu = maxMACRO(d__1,gu);
00447 /* L15: */
00448         }
00449         spdiam = gu - gl;
00450         if (! (irange == 1 && ! forceb)) {
00451 /*           Count the number of eigenvalues in the current block. */
00452             mb = 0;
00453             i__2 = mm;
00454             for (i__ = wbegin; i__ <= i__2; ++i__) {
00455                 if (iblock[i__] == jblk) {
00456                     ++mb;
00457                 } else {
00458                     goto L21;
00459                 }
00460 /* L20: */
00461             }
00462 L21:
00463             if (mb == 0) {
00464 /*              No eigenvalue in the current block lies in the desired range */
00465 /*              E( IEND ) holds the shift for the initial RRR */
00466                 e[iend] = 0.;
00467                 ibegin = iend + 1;
00468                 goto L170;
00469             } else {
00470 /*              Decide whether dqds or bisection is more efficient */
00471                 usedqd = (Treal) mb > in * .5 && ! forceb;
00472                 wend = wbegin + mb - 1;
00473 /*              Calculate gaps for the current block */
00474 /*              In later stages, when representations for individual */
00475 /*              eigenvalues are different, we use SIGMA = E( IEND ). */
00476                 sigma = 0.;
00477                 i__2 = wend - 1;
00478                 for (i__ = wbegin; i__ <= i__2; ++i__) {
00479 /* Computing MAX */
00480                     d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] + 
00481                             werr[i__]);
00482                     wgap[i__] = maxMACRO(d__1,d__2);
00483 /* L30: */
00484                 }
00485 /* Computing MAX */
00486                 d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]);
00487                 wgap[wend] = maxMACRO(d__1,d__2);
00488 /*              Find local index of the first and last desired evalue. */
00489                 indl = indexw[wbegin];
00490                 indu = indexw[wend];
00491             }
00492         }
00493         if ( ( irange == 1 && ! forceb ) || usedqd) {
00494 /*           Case of DQDS */
00495 /*           Find approximations to the extremal eigenvalues of the block */
00496             template_lapack_larrk(&in, &c__1, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, &
00497                     rtl, &tmp, &tmp1, &iinfo);
00498             if (iinfo != 0) {
00499                 *info = -1;
00500                 return 0;
00501             }
00502 /* Computing MAX */
00503             d__2 = gl, d__3 = tmp - tmp1 - eps * 100. * (d__1 = tmp - tmp1, 
00504                     absMACRO(d__1));
00505             isleft = maxMACRO(d__2,d__3);
00506             template_lapack_larrk(&in, &in, &gl, &gu, &d__[ibegin], &e2[ibegin], pivmin, &
00507                     rtl, &tmp, &tmp1, &iinfo);
00508             if (iinfo != 0) {
00509                 *info = -1;
00510                 return 0;
00511             }
00512 /* Computing MIN */
00513             d__2 = gu, d__3 = tmp + tmp1 + eps * 100. * (d__1 = tmp + tmp1, 
00514                     absMACRO(d__1));
00515             isrght = minMACRO(d__2,d__3);
00516 /*           Improve the estimate of the spectral diameter */
00517             spdiam = isrght - isleft;
00518         } else {
00519 /*           Case of bisection */
00520 /*           Find approximations to the wanted extremal eigenvalues */
00521 /* Computing MAX */
00522             d__2 = gl, d__3 = w[wbegin] - werr[wbegin] - eps * 100. * (d__1 = 
00523                     w[wbegin] - werr[wbegin], absMACRO(d__1));
00524             isleft = maxMACRO(d__2,d__3);
00525 /* Computing MIN */
00526             d__2 = gu, d__3 = w[wend] + werr[wend] + eps * 100. * (d__1 = w[
00527                     wend] + werr[wend], absMACRO(d__1));
00528             isrght = minMACRO(d__2,d__3);
00529         }
00530 /*        Decide whether the base representation for the current block */
00531 /*        L_JBLK D_JBLK L_JBLK^T = T_JBLK - sigma_JBLK I */
00532 /*        should be on the left or the right end of the current block. */
00533 /*        The strategy is to shift to the end which is "more populated" */
00534 /*        Furthermore, decide whether to use DQDS for the computation of */
00535 /*        the eigenvalue approximations at the end of DLARRE or bisection. */
00536 /*        dqds is chosen if all eigenvalues are desired or the number of */
00537 /*        eigenvalues to be computed is large compared to the blocksize. */
00538         if (irange == 1 && ! forceb) {
00539 /*           If all the eigenvalues have to be computed, we use dqd */
00540             usedqd = TRUE_;
00541 /*           INDL is the local index of the first eigenvalue to compute */
00542             indl = 1;
00543             indu = in;
00544 /*           MB =  number of eigenvalues to compute */
00545             mb = in;
00546             wend = wbegin + mb - 1;
00547 /*           Define 1/4 and 3/4 points of the spectrum */
00548             s1 = isleft + spdiam * .25;
00549             s2 = isrght - spdiam * .25;
00550         } else {
00551 /*           DLARRD has computed IBLOCK and INDEXW for each eigenvalue */
00552 /*           approximation. */
00553 /*           choose sigma */
00554             if (usedqd) {
00555                 s1 = isleft + spdiam * .25;
00556                 s2 = isrght - spdiam * .25;
00557             } else {
00558                 tmp = minMACRO(isrght,*vu) - maxMACRO(isleft,*vl);
00559                 s1 = maxMACRO(isleft,*vl) + tmp * .25;
00560                 s2 = minMACRO(isrght,*vu) - tmp * .25;
00561             }
00562         }
00563 /*        Compute the negcount at the 1/4 and 3/4 points */
00564         if (mb > 1) {
00565             template_lapack_larrc("T", &in, &s1, &s2, &d__[ibegin], &e[ibegin], pivmin, &
00566                     cnt, &cnt1, &cnt2, &iinfo);
00567         }
00568         if (mb == 1) {
00569             sigma = gl;
00570             sgndef = 1.;
00571         } else if (cnt1 - indl >= indu - cnt2) {
00572             if (irange == 1 && ! forceb) {
00573                 sigma = maxMACRO(isleft,gl);
00574             } else if (usedqd) {
00575 /*              use Gerschgorin bound as shift to get pos def matrix */
00576 /*              for dqds */
00577                 sigma = isleft;
00578             } else {
00579 /*              use approximation of the first desired eigenvalue of the */
00580 /*              block as shift */
00581                 sigma = maxMACRO(isleft,*vl);
00582             }
00583             sgndef = 1.;
00584         } else {
00585             if (irange == 1 && ! forceb) {
00586                 sigma = minMACRO(isrght,gu);
00587             } else if (usedqd) {
00588 /*              use Gerschgorin bound as shift to get neg def matrix */
00589 /*              for dqds */
00590                 sigma = isrght;
00591             } else {
00592 /*              use approximation of the first desired eigenvalue of the */
00593 /*              block as shift */
00594                 sigma = minMACRO(isrght,*vu);
00595             }
00596             sgndef = -1.;
00597         }
00598 /*        An initial SIGMA has been chosen that will be used for computing */
00599 /*        T - SIGMA I = L D L^T */
00600 /*        Define the increment TAU of the shift in case the initial shift */
00601 /*        needs to be refined to obtain a factorization with not too much */
00602 /*        element growth. */
00603         if (usedqd) {
00604 /*           The initial SIGMA was to the outer end of the spectrum */
00605 /*           the matrix is definite and we need not retreat. */
00606             tau = spdiam * eps * *n + *pivmin * 2.;
00607         } else {
00608             if (mb > 1) {
00609                 clwdth = w[wend] + werr[wend] - w[wbegin] - werr[wbegin];
00610                 avgap = (d__1 = clwdth / (Treal) (wend - wbegin), absMACRO(
00611                         d__1));
00612                 if (sgndef == 1.) {
00613 /* Computing MAX */
00614                     d__1 = wgap[wbegin];
00615                     tau = maxMACRO(d__1,avgap) * .5;
00616 /* Computing MAX */
00617                     d__1 = tau, d__2 = werr[wbegin];
00618                     tau = maxMACRO(d__1,d__2);
00619                 } else {
00620 /* Computing MAX */
00621                     d__1 = wgap[wend - 1];
00622                     tau = maxMACRO(d__1,avgap) * .5;
00623 /* Computing MAX */
00624                     d__1 = tau, d__2 = werr[wend];
00625                     tau = maxMACRO(d__1,d__2);
00626                 }
00627             } else {
00628                 tau = werr[wbegin];
00629             }
00630         }
00631 
00632         for (idum = 1; idum <= 6; ++idum) {
00633 /*           Compute L D L^T factorization of tridiagonal matrix T - sigma I. */
00634 /*           Store D in WORK(1:IN), L in WORK(IN+1:2*IN), and reciprocals of */
00635 /*           pivots in WORK(2*IN+1:3*IN) */
00636             dpivot = d__[ibegin] - sigma;
00637             work[1] = dpivot;
00638             dmax__ = absMACRO(work[1]);
00639             j = ibegin;
00640             i__2 = in - 1;
00641             for (i__ = 1; i__ <= i__2; ++i__) {
00642                 work[(in << 1) + i__] = 1. / work[i__];
00643                 tmp = e[j] * work[(in << 1) + i__];
00644                 work[in + i__] = tmp;
00645                 dpivot = d__[j + 1] - sigma - tmp * e[j];
00646                 work[i__ + 1] = dpivot;
00647 /* Computing MAX */
00648                 d__1 = dmax__, d__2 = absMACRO(dpivot);
00649                 dmax__ = maxMACRO(d__1,d__2);
00650                 ++j;
00651 /* L70: */
00652             }
00653 /*           check for element growth */
00654             if (dmax__ > spdiam * 64.) {
00655                 norep = TRUE_;
00656             } else {
00657                 norep = FALSE_;
00658             }
00659             if (usedqd && ! norep) {
00660 /*              Ensure the definiteness of the representation */
00661 /*              All entries of D (of L D L^T) must have the same sign */
00662                 i__2 = in;
00663                 for (i__ = 1; i__ <= i__2; ++i__) {
00664                     tmp = sgndef * work[i__];
00665                     if (tmp < 0.) {
00666                         norep = TRUE_;
00667                     }
00668 /* L71: */
00669                 }
00670             }
00671             if (norep) {
00672 /*              Note that in the case of IRANGE=ALLRNG, we use the Gerschgorin */
00673 /*              shift which makes the matrix definite. So we should end up */
00674 /*              here really only in the case of IRANGE = VALRNG or INDRNG. */
00675                 if (idum == 5) {
00676                     if (sgndef == 1.) {
00677 /*                    The fudged Gerschgorin shift should succeed */
00678                         sigma = gl - spdiam * 2. * eps * *n - *pivmin * 4.;
00679                     } else {
00680                         sigma = gu + spdiam * 2. * eps * *n + *pivmin * 4.;
00681                     }
00682                 } else {
00683                     sigma -= sgndef * tau;
00684                     tau *= 2.;
00685                 }
00686             } else {
00687 /*              an initial RRR is found */
00688                 goto L83;
00689             }
00690 /* L80: */
00691         }
00692 /*        if the program reaches this point, no base representation could be */
00693 /*        found in MAXTRY iterations. */
00694         *info = 2;
00695         return 0;
00696 L83:
00697 /*        At this point, we have found an initial base representation */
00698 /*        T - SIGMA I = L D L^T with not too much element growth. */
00699 /*        Store the shift. */
00700         e[iend] = sigma;
00701 /*        Store D and L. */
00702         template_blas_copy(&in, &work[1], &c__1, &d__[ibegin], &c__1);
00703         i__2 = in - 1;
00704         template_blas_copy(&i__2, &work[in + 1], &c__1, &e[ibegin], &c__1);
00705         if (mb > 1) {
00706 
00707 /*           Perturb each entry of the base representation by a small */
00708 /*           (but random) relative amount to overcome difficulties with */
00709 /*           glued matrices. */
00710 
00711             for (i__ = 1; i__ <= 4; ++i__) {
00712                 iseed[i__ - 1] = 1;
00713 /* L122: */
00714             }
00715             i__2 = (in << 1) - 1;
00716             template_lapack_larnv(&c__2, iseed, &i__2, &work[1]);
00717             i__2 = in - 1;
00718             for (i__ = 1; i__ <= i__2; ++i__) {
00719                 d__[ibegin + i__ - 1] *= eps * 8. * work[i__] + 1.;
00720                 e[ibegin + i__ - 1] *= eps * 8. * work[in + i__] + 1.;
00721 /* L125: */
00722             }
00723             d__[iend] *= eps * 4. * work[in] + 1.;
00724 
00725         }
00726 
00727 /*        Don't update the Gerschgorin intervals because keeping track */
00728 /*        of the updates would be too much work in DLARRV. */
00729 /*        We update W instead and use it to locate the proper Gerschgorin */
00730 /*        intervals. */
00731 /*        Compute the required eigenvalues of L D L' by bisection or dqds */
00732         if (! usedqd) {
00733 /*           If DLARRD has been used, shift the eigenvalue approximations */
00734 /*           according to their representation. This is necessary for */
00735 /*           a uniform DLARRV since dqds computes eigenvalues of the */
00736 /*           shifted representation. In DLARRV, W will always hold the */
00737 /*           UNshifted eigenvalue approximation. */
00738             i__2 = wend;
00739             for (j = wbegin; j <= i__2; ++j) {
00740                 w[j] -= sigma;
00741                 werr[j] += (d__1 = w[j], absMACRO(d__1)) * eps;
00742 /* L134: */
00743             }
00744 /*           call DLARRB to reduce eigenvalue error of the approximations */
00745 /*           from DLARRD */
00746             i__2 = iend - 1;
00747             for (i__ = ibegin; i__ <= i__2; ++i__) {
00748 /* Computing 2nd power */
00749                 d__1 = e[i__];
00750                 work[i__] = d__[i__] * (d__1 * d__1);
00751 /* L135: */
00752             }
00753 /*           use bisection to find EV from INDL to INDU */
00754             i__2 = indl - 1;
00755             template_lapack_larrb(&in, &d__[ibegin], &work[ibegin], &indl, &indu, rtol1, 
00756                     rtol2, &i__2, &w[wbegin], &wgap[wbegin], &werr[wbegin], &
00757                     work[(*n << 1) + 1], &iwork[1], pivmin, &spdiam, &in, &
00758                     iinfo);
00759             if (iinfo != 0) {
00760                 *info = -4;
00761                 return 0;
00762             }
00763 /*           DLARRB computes all gaps correctly except for the last one */
00764 /*           Record distance to VU/GU */
00765 /* Computing MAX */
00766             d__1 = 0., d__2 = *vu - sigma - (w[wend] + werr[wend]);
00767             wgap[wend] = maxMACRO(d__1,d__2);
00768             i__2 = indu;
00769             for (i__ = indl; i__ <= i__2; ++i__) {
00770                 ++(*m);
00771                 iblock[*m] = jblk;
00772                 indexw[*m] = i__;
00773 /* L138: */
00774             }
00775         } else {
00776 /*           Call dqds to get all eigs (and then possibly delete unwanted */
00777 /*           eigenvalues). */
00778 /*           Note that dqds finds the eigenvalues of the L D L^T representation */
00779 /*           of T to high relative accuracy. High relative accuracy */
00780 /*           might be lost when the shift of the RRR is subtracted to obtain */
00781 /*           the eigenvalues of T. However, T is not guaranteed to define its */
00782 /*           eigenvalues to high relative accuracy anyway. */
00783 /*           Set RTOL to the order of the tolerance used in DLASQ2 */
00784 /*           This is an ESTIMATED error, the worst case bound is 4*N*EPS */
00785 /*           which is usually too large and requires unnecessary work to be */
00786 /*           done by bisection when computing the eigenvectors */
00787             rtol = template_blas_log((Treal) in) * 4. * eps;
00788             j = ibegin;
00789             i__2 = in - 1;
00790             for (i__ = 1; i__ <= i__2; ++i__) {
00791                 work[(i__ << 1) - 1] = (d__1 = d__[j], absMACRO(d__1));
00792                 work[i__ * 2] = e[j] * e[j] * work[(i__ << 1) - 1];
00793                 ++j;
00794 /* L140: */
00795             }
00796             work[(in << 1) - 1] = (d__1 = d__[iend], absMACRO(d__1));
00797             work[in * 2] = 0.;
00798             template_lapack_lasq2(&in, &work[1], &iinfo);
00799             if (iinfo != 0) {
00800 /*              If IINFO = -5 then an index is part of a tight cluster */
00801 /*              and should be changed. The index is in IWORK(1) and the */
00802 /*              gap is in WORK(N+1) */
00803                 *info = -5;
00804                 return 0;
00805             } else {
00806 /*              Test that all eigenvalues are positive as expected */
00807                 i__2 = in;
00808                 for (i__ = 1; i__ <= i__2; ++i__) {
00809                     if (work[i__] < 0.) {
00810                         *info = -6;
00811                         return 0;
00812                     }
00813 /* L149: */
00814                 }
00815             }
00816             if (sgndef > 0.) {
00817                 i__2 = indu;
00818                 for (i__ = indl; i__ <= i__2; ++i__) {
00819                     ++(*m);
00820                     w[*m] = work[in - i__ + 1];
00821                     iblock[*m] = jblk;
00822                     indexw[*m] = i__;
00823 /* L150: */
00824                 }
00825             } else {
00826                 i__2 = indu;
00827                 for (i__ = indl; i__ <= i__2; ++i__) {
00828                     ++(*m);
00829                     w[*m] = -work[i__];
00830                     iblock[*m] = jblk;
00831                     indexw[*m] = i__;
00832 /* L160: */
00833                 }
00834             }
00835             i__2 = *m;
00836             for (i__ = *m - mb + 1; i__ <= i__2; ++i__) {
00837 /*              the value of RTOL below should be the tolerance in DLASQ2 */
00838                 werr[i__] = rtol * (d__1 = w[i__], absMACRO(d__1));
00839 /* L165: */
00840             }
00841             i__2 = *m - 1;
00842             for (i__ = *m - mb + 1; i__ <= i__2; ++i__) {
00843 /*              compute the right gap between the intervals */
00844 /* Computing MAX */
00845                 d__1 = 0., d__2 = w[i__ + 1] - werr[i__ + 1] - (w[i__] + werr[
00846                         i__]);
00847                 wgap[i__] = maxMACRO(d__1,d__2);
00848 /* L166: */
00849             }
00850 /* Computing MAX */
00851             d__1 = 0., d__2 = *vu - sigma - (w[*m] + werr[*m]);
00852             wgap[*m] = maxMACRO(d__1,d__2);
00853         }
00854 /*        proceed with next block */
00855         ibegin = iend + 1;
00856         wbegin = wend + 1;
00857 L170:
00858         ;
00859     }
00860 
00861     return 0;
00862 
00863 /*     end of DLARRE */
00864 
00865 } /* dlarre_ */
00866 
00867 #endif

Generated on 21 Nov 2012 for ergo by  doxygen 1.6.1