template_lapack_lasq3.h

Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_LAPACK_LASQ3_HEADER
00036 #define TEMPLATE_LAPACK_LASQ3_HEADER
00037 
00038 template<class Treal>
00039 int template_lapack_lasq3(integer *i0, integer *n0, Treal *z__, 
00040         integer *pp, Treal *dmin__, Treal *sigma, Treal *desig, 
00041          Treal *qmax, integer *nfail, integer *iter, integer *ndiv, 
00042         logical *ieee, integer *ttype, Treal *dmin1, Treal *dmin2, 
00043         Treal *dn, Treal *dn1, Treal *dn2, Treal *g, 
00044         Treal *tau)
00045 {
00046     /* System generated locals */
00047     integer i__1;
00048     Treal d__1, d__2;
00049 
00050 
00051     /* Local variables */
00052     Treal s, t;
00053     integer j4, nn;
00054     Treal eps, tol;
00055     integer n0in, ipn4;
00056     Treal tol2, temp;
00057 
00058 
00059 /*  -- LAPACK routine (version 3.2)                                    -- */
00060 
00061 /*  -- Contributed by Osni Marques of the Lawrence Berkeley National   -- */
00062 /*  -- Laboratory and Beresford Parlett of the Univ. of California at  -- */
00063 /*  -- Berkeley                                                        -- */
00064 /*  -- November 2008                                                   -- */
00065 
00066 /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
00067 /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
00068 
00069 /*     .. Scalar Arguments .. */
00070 /*     .. */
00071 /*     .. Array Arguments .. */
00072 /*     .. */
00073 
00074 /*  Purpose */
00075 /*  ======= */
00076 
00077 /*  DLASQ3 checks for deflation, computes a shift (TAU) and calls dqds. */
00078 /*  In case of failure it changes shifts, and tries again until output */
00079 /*  is positive. */
00080 
00081 /*  Arguments */
00082 /*  ========= */
00083 
00084 /*  I0     (input) INTEGER */
00085 /*         First index. */
00086 
00087 /*  N0     (input) INTEGER */
00088 /*         Last index. */
00089 
00090 /*  Z      (input) DOUBLE PRECISION array, dimension ( 4*N ) */
00091 /*         Z holds the qd array. */
00092 
00093 /*  PP     (input/output) INTEGER */
00094 /*         PP=0 for ping, PP=1 for pong. */
00095 /*         PP=2 indicates that flipping was applied to the Z array */
00096 /*         and that the initial tests for deflation should not be */
00097 /*         performed. */
00098 
00099 /*  DMIN   (output) DOUBLE PRECISION */
00100 /*         Minimum value of d. */
00101 
00102 /*  SIGMA  (output) DOUBLE PRECISION */
00103 /*         Sum of shifts used in current segment. */
00104 
00105 /*  DESIG  (input/output) DOUBLE PRECISION */
00106 /*         Lower order part of SIGMA */
00107 
00108 /*  QMAX   (input) DOUBLE PRECISION */
00109 /*         Maximum value of q. */
00110 
00111 /*  NFAIL  (output) INTEGER */
00112 /*         Number of times shift was too big. */
00113 
00114 /*  ITER   (output) INTEGER */
00115 /*         Number of iterations. */
00116 
00117 /*  NDIV   (output) INTEGER */
00118 /*         Number of divisions. */
00119 
00120 /*  IEEE   (input) LOGICAL */
00121 /*         Flag for IEEE or non IEEE arithmetic (passed to DLASQ5). */
00122 
00123 /*  TTYPE  (input/output) INTEGER */
00124 /*         Shift type. */
00125 
00126 /*  DMIN1, DMIN2, DN, DN1, DN2, G, TAU (input/output) DOUBLE PRECISION */
00127 /*         These are passed as arguments in order to save their values */
00128 /*         between calls to DLASQ3. */
00129 
00130 /*  ===================================================================== */
00131 
00132 /*     .. Parameters .. */
00133 /*     .. */
00134 /*     .. Local Scalars .. */
00135 /*     .. */
00136 /*     .. External Subroutines .. */
00137 /*     .. */
00138 /*     .. External Function .. */
00139 /*     .. */
00140 /*     .. Intrinsic Functions .. */
00141 /*     .. */
00142 /*     .. Executable Statements .. */
00143 
00144     /* Parameter adjustments */
00145     --z__;
00146 
00147     /* Function Body */
00148     n0in = *n0;
00149     eps = template_lapack_lamch("Precision", (Treal)0);
00150     tol = eps * 100.;
00151 /* Computing 2nd power */
00152     d__1 = tol;
00153     tol2 = d__1 * d__1;
00154 
00155 /*     Check for deflation. */
00156 
00157 L10:
00158 
00159     if (*n0 < *i0) {
00160         return 0;
00161     }
00162     if (*n0 == *i0) {
00163         goto L20;
00164     }
00165     nn = (*n0 << 2) + *pp;
00166     if (*n0 == *i0 + 1) {
00167         goto L40;
00168     }
00169 
00170 /*     Check whether E(N0-1) is negligible, 1 eigenvalue. */
00171 
00172     if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) - 
00173             4] > tol2 * z__[nn - 7]) {
00174         goto L30;
00175     }
00176 
00177 L20:
00178 
00179     z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma;
00180     --(*n0);
00181     goto L10;
00182 
00183 /*     Check  whether E(N0-2) is negligible, 2 eigenvalues. */
00184 
00185 L30:
00186 
00187     if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[
00188             nn - 11]) {
00189         goto L50;
00190     }
00191 
00192 L40:
00193 
00194     if (z__[nn - 3] > z__[nn - 7]) {
00195         s = z__[nn - 3];
00196         z__[nn - 3] = z__[nn - 7];
00197         z__[nn - 7] = s;
00198     }
00199     if (z__[nn - 5] > z__[nn - 3] * tol2) {
00200         t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5;
00201         s = z__[nn - 3] * (z__[nn - 5] / t);
00202         if (s <= t) {
00203             s = z__[nn - 3] * (z__[nn - 5] / (t * (template_blas_sqrt(s / t + 1.) + 1.)));
00204         } else {
00205             s = z__[nn - 3] * (z__[nn - 5] / (t + template_blas_sqrt(t) * template_blas_sqrt(t + s)));
00206         }
00207         t = z__[nn - 7] + (s + z__[nn - 5]);
00208         z__[nn - 3] *= z__[nn - 7] / t;
00209         z__[nn - 7] = t;
00210     }
00211     z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma;
00212     z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma;
00213     *n0 += -2;
00214     goto L10;
00215 
00216 L50:
00217     if (*pp == 2) {
00218         *pp = 0;
00219     }
00220 
00221 /*     Reverse the qd-array, if warranted. */
00222 
00223     if (*dmin__ <= 0. || *n0 < n0in) {
00224         if (z__[(*i0 << 2) + *pp - 3] * 1.5 < z__[(*n0 << 2) + *pp - 3]) {
00225           ipn4 = ( *i0 + *n0 ) << 2;
00226           i__1 = ( *i0 + *n0 - 1 ) << 1;
00227             for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
00228                 temp = z__[j4 - 3];
00229                 z__[j4 - 3] = z__[ipn4 - j4 - 3];
00230                 z__[ipn4 - j4 - 3] = temp;
00231                 temp = z__[j4 - 2];
00232                 z__[j4 - 2] = z__[ipn4 - j4 - 2];
00233                 z__[ipn4 - j4 - 2] = temp;
00234                 temp = z__[j4 - 1];
00235                 z__[j4 - 1] = z__[ipn4 - j4 - 5];
00236                 z__[ipn4 - j4 - 5] = temp;
00237                 temp = z__[j4];
00238                 z__[j4] = z__[ipn4 - j4 - 4];
00239                 z__[ipn4 - j4 - 4] = temp;
00240 /* L60: */
00241             }
00242             if (*n0 - *i0 <= 4) {
00243                 z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1];
00244                 z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp];
00245             }
00246 /* Computing MIN */
00247             d__1 = *dmin2, d__2 = z__[(*n0 << 2) + *pp - 1];
00248             *dmin2 = minMACRO(d__1,d__2);
00249 /* Computing MIN */
00250             d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*i0 << 2) + *pp - 1]
00251                     , d__1 = minMACRO(d__1,d__2), d__2 = z__[(*i0 << 2) + *pp + 3];
00252             z__[(*n0 << 2) + *pp - 1] = minMACRO(d__1,d__2);
00253 /* Computing MIN */
00254             d__1 = z__[(*n0 << 2) - *pp], d__2 = z__[(*i0 << 2) - *pp], d__1 =
00255                      minMACRO(d__1,d__2), d__2 = z__[(*i0 << 2) - *pp + 4];
00256             z__[(*n0 << 2) - *pp] = minMACRO(d__1,d__2);
00257 /* Computing MAX */
00258             d__1 = *qmax, d__2 = z__[(*i0 << 2) + *pp - 3], d__1 = maxMACRO(d__1,
00259                     d__2), d__2 = z__[(*i0 << 2) + *pp + 1];
00260             *qmax = maxMACRO(d__1,d__2);
00261             *dmin__ = -0.;
00262         }
00263     }
00264 
00265 /*     Choose a shift. */
00266 
00267     template_lapack_lasq4(i0, n0, &z__[1], pp, &n0in, dmin__, dmin1, dmin2, dn, dn1, dn2, 
00268             tau, ttype, g);
00269 
00270 /*     Call dqds until DMIN > 0. */
00271 
00272 L70:
00273 
00274     template_lapack_lasq5(i0, n0, &z__[1], pp, tau, dmin__, dmin1, dmin2, dn, dn1, dn2, 
00275             ieee);
00276 
00277     *ndiv += *n0 - *i0 + 2;
00278     ++(*iter);
00279 
00280 /*     Check status. */
00281 
00282     if (*dmin__ >= 0. && *dmin1 > 0.) {
00283 
00284 /*        Success. */
00285 
00286         goto L90;
00287 
00288     } else if (*dmin__ < 0. && *dmin1 > 0. && z__[( ( *n0 - 1 ) << 2) - *pp] < tol 
00289             * (*sigma + *dn1) && absMACRO(*dn) < tol * *sigma) {
00290 
00291 /*        Convergence hidden by negative DN. */
00292 
00293       z__[( ( *n0 - 1 ) << 2) - *pp + 2] = 0.;
00294         *dmin__ = 0.;
00295         goto L90;
00296     } else if (*dmin__ < 0.) {
00297 
00298 /*        TAU too big. Select new TAU and try again. */
00299 
00300         ++(*nfail);
00301         if (*ttype < -22) {
00302 
00303 /*           Failed twice. Play it safe. */
00304 
00305             *tau = 0.;
00306         } else if (*dmin1 > 0.) {
00307 
00308 /*           Late failure. Gives excellent shift. */
00309 
00310             *tau = (*tau + *dmin__) * (1. - eps * 2.);
00311             *ttype += -11;
00312         } else {
00313 
00314 /*           Early failure. Divide by 4. */
00315 
00316             *tau *= .25;
00317             *ttype += -12;
00318         }
00319         goto L70;
00320     } else if (template_lapack_isnan(dmin__)) {
00321 
00322 /*        NaN. */
00323 
00324         if (*tau == 0.) {
00325             goto L80;
00326         } else {
00327             *tau = 0.;
00328             goto L70;
00329         }
00330     } else {
00331 
00332 /*        Possible underflow. Play it safe. */
00333 
00334         goto L80;
00335     }
00336 
00337 /*     Risk of underflow. */
00338 
00339 L80:
00340     template_lapack_lasq6(i0, n0, &z__[1], pp, dmin__, dmin1, dmin2, dn, dn1, dn2);
00341     *ndiv += *n0 - *i0 + 2;
00342     ++(*iter);
00343     *tau = 0.;
00344 
00345 L90:
00346     if (*tau < *sigma) {
00347         *desig += *tau;
00348         t = *sigma + *desig;
00349         *desig -= t - *sigma;
00350     } else {
00351         t = *sigma + *tau;
00352         *desig = *sigma - (t - *tau) + *desig;
00353     }
00354     *sigma = t;
00355 
00356     return 0;
00357 
00358 /*     End of DLASQ3 */
00359 
00360 } /* dlasq3_ */
00361 
00362 #endif

Generated on 21 Nov 2012 for ergo by  doxygen 1.6.1