From 84d26e66ea695f0ebad4a4b611ff05df8f2adf96 Mon Sep 17 00:00:00 2001
From: Tim Daly
Date: Tue, 5 Apr 2016 20:03:30 -0400
Subject: [PATCH] books/bookvolbib add reference material for LAPACK
Goal: Axiom Literate Programming
\section{Numerical Algorithms} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@article{Anda94,
author = "Anda, A.A. and Park, H.",
title = "Fast plane rotations with dynamic scaling",
journal = "SIAM J. matrix Anal. Appl.",
volume = "15",
year = "1994",
pages = "162-174"
}
@phdthesis{Anda95,
author = "Anda, Andrew Allen",
title = "Self-Scaling Fast Plane Rotation Algorithms",
school = "University of Minnesota",
year = "1995",
month = "February",
paper = "Anda95.pdf",
abstract =
"A suite of {\sl self-scaling} fast circular plane rotation algorithms
is developed which obviates the monitoring and periodic rescaling
necessitated by the standard set of fast plane rotation
algorithms. Self-Scaling fast rotations dynamically preserve the
normed scaling of the diagonal factor matrix whereas standard fast
rotations exhibit divergent scaling. Variations on standard fast
rotation matrices are developed and algorithms which implement them
are offered. Self-Scaling fast rotations are shown to be essentially
as accurate as slow rotations and at least as efficient as standard
fast rotations. Computational experimental results utilizing the
Cray-2 illustrate the effectively stable scaling exhibited by
self-scaling fast rotations. Jacobi-class algorithms with one-sided
alterations are developed for the algebraic eigenvalue decomposition
using self-scaling fast plane rotations and one-sided
modifications. The new algorithms are shown to be both accurate and
efficient on both vector and parallel architectures. The utility is
described of applying fast plane rotations towards the rank-one update
and downdate of least squares factorizations. The equivalence is
illuminated of LINPACK, hyperbolic rotation, and fast negatively
weighted plane rotation downdating. Algorithms are presented which
apply self-scaling fast plane rotations to the QR factorization for
stiff least squares problems. Both fast and standard Givens
rotation-based algorithms are shown to produce accurate results
regardless of row sorting even with extremely heavily row weighted
matrices. Such matrices emanate, e.g. from equality constrainted least
squares problems solved via the weighting method. The necessity of
column sorting is emphasized. Numerical tests expose the Householder
QR factorization algorithm to be sensitive to row sorting and it
yields less accurate results for greater weights. Additionally, the
modified Gram-Schmidt algorithm is shown to be sensitive to row
sorting to a notably significant but lesser degree. Self-Scaling fast
plane rotation algorithms, having competitive computational
complexities, must therefore be the method of choice for the QR
factorization of stiff matrices. Timing reults on the Cray 2 [XY]M/P,
and C90, of rotations both in and out of a matrix factorization
context are presented. Architectural features that can best exploit
the advantagous features of the new fast rotations are subsequently
discussed."
}
@misc{Baud12,
author = "Baudin, Michael and Smith, Robert L.",
title = "A Robust Complex Division in Scilab",
url = "http://arxiv.org/pdf/1210.4539.pdf",
year = "2012",
month = "October",
paper = "Baud12.pdf",
abstract =
"The most widely used algorithm for floating point complex division,
known as Smith's method, may fail more often than expected. This
document presents two improved complex division algorithms. We present
a proof of robustness of the first improved algorithm. Numerical
simulations show that this algorithm performs well in practice and is
significantly more robust than other known implementations. By
combining additional scaling methods with this first algorithm, we
were able to create a second algorithm, which rarely fails."
}
@article{Demm90,
author = "Demmel, James and Kahan, W.",
title = "Accurate Singular Values of Bidiagonal Matrices",
journal = "SIAM J. Sci. Stat. Comput.",
volume = "11",
number = "5",
pages = "873-912",
year = "1990",
url = "http://www.netlib.org/lapack/lawnspdf/lawn03.pdf",
paper = "Demm90.pdf",
abstract =
"Computing the singular values of a bidiagonal matrix is the final
phase of the standard algorithm for the singular value decomposition
of a general matrix. We present a new algorithm which computes all the
singular values of a bidiagonal matrix to high relative accuracy
independent of their magnitudes. In contrast, the standard algorithm
for bidiagonal matrices may compute small singular values with no
relative accuracty at all. Numerical experiments show that the new
algorithm is comparable in speed to the standard algorithm, and
frequently faster."
}
@article{Demm92,
author = "Demmel, James and Veselic, Kresimir",
title = "Jacobi's Method is More Accurate than QR",
journal = "SIAM J. Matrix Anal. and Appl",
volume = "13",
number = "4",
pages = "1204-1245",
url = "http://www.netlib.org/lapack/lawnspdf/lawn15.pdf",
paper = "Demm92.pdf",
abstract =
"We show that Jacobi's method (with a proper stopping criterion)
computes small eigenvalues of symmetric positive definite matrices
with a uniformly better relative accuracy bound than QR, divide and
conquer, traditional bisection, or any algorithm which first involves
tridiagonalizing the matrix. In fact, modulo an assumption based on
extensive numerical tests, we show that Jacobi's method is optimally
accurate in the following sense: if the matrix is such that small
relative errors in its entries cause small relative errors in its
eigenvalues, Jacobi will compute them with nearly this accuracy. In
other words, as long as the initial matrix has small relative errors
in each component, even using infinite precision will not improve on
Jacobi (modulo factors of dimensionality). We also show the
eigenvectors are computed more accurately by Jacobi than previously
thought possible. We prove similar results for using one-sided Jacobi
for the singular value decomposition of a general matrix."
}
@article{Rijk98,
author = "de Rijk, P.P.",
title = "A One-sided Jacobi Algorithm for Computing the Singular Value
Decomposition on a vector computer",
journal = "SIAM J. Sci. Stat. Comput.",
volume = "10",
number = "2",
month = "March",
year = "1989",
pages = "359-371",
abstract =
"An old algorithm for computing the singular value decomposition,
which was first mentioned by Hestenes, has gained renewed interest
because of its properties of parallelism and vectorizability. Some
computational modifications are given and a comparison with the
well-known Golub-Reinsch algorithm is made. Comparative experiments on
a CYBER 205 are reported."
}
@article{Drma97,
author = "Drmac, Zlatko",
title = "Implementation of Jacobi Rotations for Accurate Singular Value
Computation in Floating Point Arithmetic",
journal = "SIAM Journal on Scientific Computing",
volume = "18",
number = "4",
year = "1997",
month = "July",
pages = "1200-1222",
abstract =
"In this paper we consider how to compute the singular value
decomposition (SVD) $A = U\Sigma{}T^{\tau}$ of
$A=[a_1,a_2] \in R^{m\times 2}$
accurately in floating point arithmetic. It is shown
how to compute the Jacobi rotation $V$ (the right singular vector
matrix) and how to compute $AV=U\Sigma$ even if the floating point
representation of $V$ is the identity matrix. In the case
$\ns{a_1}\gg\ns{a_2}$, underflow can produce the identity matrix as
the floating point value of $V$ even for $a1$, $a2$ that are far from
being mutually orthogonal. This can cause loss of accuracy and failure
of convergence of the floating point implementation of the Jacobi
method for computing the SVD. The modified Jacobi method recommended
in this paper can be implemented as a reliable and highly accurate
procedure for computing the SVD of general real matrices whenever the
exact singular values do not exceed the underflow or overflow limits."
}
@article{Drma08a,
author = "Drmac, Zlatko and Veselic, Kresimir",
title = "New fast and accurate Jacobi SVD algorithm I",
journal = "SIAM J. Matrix Anal. Appl.",
volume = "35",
number = "2",
year = "2008",
pages = "1322-1342",
comment = "LAPACK Working note 169",
url = "http://www.netlib.org/lapack/lawnspdf/lawn169.pdf",
paper = "Drma08a.pdf",
abstract =
"This paper is the result of contrived efforts to break the barrier
between numerical accuracy and run time efficiency in computing the
fundamental decomposition of numerical linear algebra - the singular
value decomposition (SVD) of a general dense matrix. It is an
unfortunate fact that the numerically most accurate one-sided Jacobi
SVD algorithm is several times slower than generally less accurate
bidiagonalization based methods such as the QR or the divide and
conquer algorithm. Despite its sound numerical qualities, the Jacobi
SVD is not included in the state of the art matrix computation
libraries and it is even considered obsolete by some leading
researchers. Our quest for a highly accurate and efficient SVD
algorithm has led us to a new, superior variant of the Jacobi
algorithm. The new algorithm has inherited all good high accuracy
properties, and it outperforms not only the best implementations of
the one-sided Jacobi algorithm but also the QR algorithm. Moreoever,
it seems that the potential of the new approach is yet to be fully
exploited."
}
@article{Drma08b,
author = "Drmac, Zlatko and Veselic, Kresimir",
title = "New fast and accurate Jacobi SVD algorithm II",
journal = "SIAM J. Matrix Anal. Appl.",
volume = "35",
number = "2",
year = "2008",
pages = "1343-1362",
comment = "LAPACK Working note 170",
url = "http://www.netlib.org/lapack/lawnspdf/lawn170.pdf",
paper = "Drma08b.pdf",
abstract =
"This paper presents new implementation of one-sided Jacobi SVD for
triangular matrices and its use as the core routine in a new
preconditioned Jacobi SVD algorithm, recently proposed by the
authors. New pivot strategy exploits the triangular form and uses the
fact that the input triangular matrix is the result of rank revealing
QR factorization. If used in the preconditioned Jacobi SVD algorithm,
described in the first part of this report, it delivers superior
performance leading to the currently fastest method for computing SVD
decomposition with high relative accuracy. Furthermore, the efficiency
of the new algorithm is comparable to the less accurate
bidiagonalization based methods. The paper also discusses underflow
issues in floating point implementation, and shows how to use
perturbation theory to fix the imperfectness of machine arithmetic on
some systems."
}
@article{Drma08c,
author = "Drmac, Zlatko and Bujanovic, Zvonimir",
title = "On the failure of rank-revealing QR factorization software -
a case study.",
journal = "ACM Trans. math. Softw.",
volume = "35",
number = "2",
year = "2008",
pages = "1-28",
comment = "LAPACK Working note 176",
url = "http://www.netlib.org/lapack/lawnspdf/lawn176.pdf",
paper = "Drma08c.pdf",
abstract =
"This note reports an unexpected and rather erratic behavior of the
LAPACK software implementation of the QR factorization with
Businger-Golub column pivoting. It is shown that, due to finite
precision arithmetic, software implementation of the factorization can
catastrophically fail to produce triangular factor with the structure
characteristic to the Businger-Golub pivot strategy. The failure of
current {\sl state of the art} software, and a proposed alternative
implementations are analyzed in detail."
}
@phdthesis{Dhil97,
author = "Dhillon, Inderjit Singh",
title = "A New $O(n^2)$ Algorithm for the Symmetric Tridiagonal
Eigenvalue/Eigenvector Problem",
school = "University of California, Berkeley",
year = "1997",
url = "http://www.eecs.berkeley.edu/Pubs/TechRpts/1997/CSD-97-971.pdf",
paper = "Dhil97.pdf",
abstract =
"Computing the eigenvalues and orthogonal eigenvectors of an $n\times n$
symmetric tridiagonal matrix is an important task that arises while
solving any symmetric eigenproblem. All practical software requires
$O(n^3)$ time to compute all the eigenvectors and ensure their
orthogonality when eigenvalues are close. In the first part of this
thesis we review earlier work and show how some existing
implementations of inverse iteration can fail in surprising ways.
The main contribution of this thesis is a new $O(n^2)$, easily
parallelizable algorithm for solving the tridiagonal
eigenproblem. Three main advances lead to our new algorithm. A
tridiagonal matrix is traditionally represented by its diagonal and
off-diagonal elements. Our most important advance is in recognizing
that its bidiagonal factors are ``better'' for computational
purposes. The use of bidiagonals enables us to invoke a relative
criterion to judge when eigenvalues are ``close''. The second advance
comes with using multiple bidiagonal factorizations in order to
compute different eigenvectors independently of each other. Thirdly,
we use carefully chosen dqds-like transformations as inner loops to
compute eigenpairs that are highly accurate and ``faithful'' to the
various bidiagonal representations. Orthogonality of the eigenvectors
is a consequence of this accuracy. Only $O(n)$ work per eigenpair is
neede by our new algorithm.
Conventional wisdom is that there is usually a trade-off between speed
and accuracy in numerical procedures, i.e., higher accuracy can be
achieved only at the expense of greater computing time. An interesting
aspect of our work is that increased accuracy in the eigenvalues and
eigenvectors obviates the need for explicit orthogonalization and
leads to greater speed.
We present timing and accuracy results comparing a computer
implementation of our new algorithm with four existing EISPACK and
LAPACK software routines. Our test-bed contains a variety of
tridiagonal matrices, some coming from quantum chemistry
applications. The numerical results demonstrate the superiority of
our new algorithm. For example, on a matrix of order 966 that occurs in
the modeling of a biphenyl molecule our method is about 10 times
faster than LAPACK's inverse iteration on a serial IBM RS/6000
processor and nearly 100 times faster on a 128 processor IBM SP2
parallel machine."
}
@article{Dhil04a,
author = "Dhillon, Inderjit S. and Parlett, Beresford N.",
title = "Multiple representations to compute orthogonal eigenvectors
of symmetric tridiagonal matrices",
journal = "Linear Algebra and its Applications",
volume = "387",
number ="1",
pages = "1-28",
year = "2004",
month = "August",
paper = "Dhil04a.pdf",
abstract =
"In this paper we present an $O(nk)$ procedure, Algorithm $MR^3$, for
computing $k$ eigenvectors of an $n\times n$ symmetric tridiagonal
matrix $T$. A salient feature of the algorithm is that a number of
different $LDL^t$ products ($L$ unit lower triangular, $D$ diagonal)
are computed. In exact arithmetic each $LDL^t$ is a factorization of a
translate of $T$. We call the various $LDL^t$ productions
{\sl representations} (of $T$) and, roughly speaking, there is a
representation for each cluster of close eigenvalues. The unfolding of
the algorithm, for each matrix, is well described by a
{\sl representation tree}. We present the tree and use it to show that if
each representation satisfies three prescribed conditions then the
computed eigenvectors are orthogonal to working accuracy and have
small residual norms with respect to the original matrix $T$."
}
@article{Dhil04,
author = "Dhillon, Inderjit S. and Parlett, Beresford N.",
title = "Orthogonal Eigenvectors and Relative Gaps",
journal = "SIAM Journal on Matrix Analysis and Applications",
volume = "25",
year = "2004",
paper = "Dhil04.pdf",
abstract =
"Let $LDL^t$ be the triangular factorization of a real symmetric
$n\times n$ tridiagonal matrix so that $L$ is a unit lower bidiagonal
matrix, $D$ is diagonal. Let $(\lambda,\nu)$ be an eigenpair,
$\lambda \ne 0$, with the property that both $\lambda$ and $\nu$ are
determined to high relative accuracy by the parameters in $L$ and $D$.
Suppose also that the relative gap between $\lambda$ and its nearest
neighbor $\mu$ in the spectrum exceeds $1/n; n|\lambda-\mu| > |\lambda|$.
This paper presents a new $O(n)$ algorithm and a proof that, in the
presence of round-off errors, the algorithm computes an approximate
eigenvector $\hat{\nu}$ that is accurate to working precision
$|sin \angle(\nu,\hat{\nu})| = O(n\epsilon)$, where $\epsilon$ is the
round-off unit. It follows that $\hat{\nu}$ is numerically orthogonal to
all the other eigenvectors. This result forms part of a program to
compute numerically orthogonal eigenvectors without resorting to the
Gram-Schmidt process.
The contents of this paper provide a high-level description and
theoretical justification for LAPACK (version 3.0) subroutine DLAR1V."
}
@article{Gent74,
author = "Gentlman, W. Morven and Marovich, Scott B.",
title = "More on algorithms that reveal properties of floating point
arithmetic units.",
journal = "Comm. of the ACM",
year = "1974",
month = "April",
volume = "17",
number = "5",
pages = "276-277",
abstract =
"In the interests of producing portable mathematical software, it is
highly desirable for a program to be able directly to obtain
fundamental properties of the environment in which it is to run. The
installer would then not be obliged to change appropriate magic
constants in the source code, and the user would not have to provide
information he may very well not understand. Until the standard
definitions of programming languages are changed to require builtin
functions that provide this information, we will have to resort to
writing routines that discover it."
}
@article{High86,
author = "Higham, Nicholas J.",
title = "Efficient Algorithms for Computing the Condition Number of a
Tridiagonal Matrix",
journal = "SIAM J. Sci. Stat. Comput.",
volume = "7",
number = "1",
year = "1986",
month = "January",
paper = "High86.pdf",
abstract =
"Let $A$ be a tridiagonal matrix of order $n$. We show that it is
possible to compute $||A^{-1}||_{\infty}$ and hence
${\rm cond}_{\infty}(A)$ in $O(n)$ operations. Several algorithms
which perform this task are given and their numerical properties are
investigated.
If $A$ is also positive definite then $||A^{-1}||_{\infty}$ can be
computed as the norm of the solution to a positive definite
tridiagonal linear system whose coefficient matrix is closely related
to $A$. We show how this computation can be carried out in parallel
with the solution of a linear system $Ax=b$. In particular we describe
some simple modifications to the LINPACK routine SPTSL which enable
this routine to compute ${\rm cond}_1(A)$, efficiently, in addtion to
solving $Ax=b$."
}
@article{Kags89,
author = "Kagstrom, Bo and Westin, L.",
title = "Generalized Schur Methods with Condition Estimators for Solving
the Generalized Sylvester Equation",
journal = "IEEE Transactions on Automatic Control",
volume = "34",
number = "7",
year = "1989",
month = "July",
pages = "745-751",
abstract =
"Stable algorithms are presented for solving the generalized Sylvester
equation. They are based on orthogonal equivalence transformations of
the original problem. Perturbation theory and rounding error analysis
are included. Condition estimators (${\rm Dif}^{-1}$-estimators) are
developed which when substituted into derived error bounds give
accuracy estimates of a computed solution. Results from numerical
experiments on well-conditioned and ill-conditioned problems are
reported."
}
@book{Kags93,
author = "Kagstrom, B.",
title = "A Direct Method for Reordering Eigenvalues in the
Generalized Real Schur Form of a Regular Matrix Pair (A, B)",
year = "1993",
pages = "195-218",
booktitle = "Linear Algebra for Large Scale and Real-Time Applications",
volume = "232",
publisher = "NATO ASI Series",
isbn = "978-90-481-4246-0",
abstract =
"A direct orthogonal equivalence transformation method for reordering
the eigenvalues along the diagonal in the generalized real Schur form
of a regular matrix pair $(A,B)$ is presented. Each swap of two
adjacent eigenvalues (real, or complex conjugate pairs) involves
solving a generalized Sylvester equation and the construction of two
orthogonal transformation matrices from certain eigenspaces associated
with the corresponding diagonal blocks. An error analysis of the
direct reordering method is presented. Results from numerical
experiments on well-conditionsed as well as ill-conditioned problems
illustrate the stability and the accuracy of the method. Finally, a
direct reordering algorithm with controlled backward error is described."
}
@article{Kags93,
author = "Kagstrom, Bo and Poromaa, Peter",
title = "LAPACK-Style Algorithms and Software for Solving the Generalized
Sylvester Equation and Estimating the Separation between
Regular Matrix Pairs",
year = "1993",
month = "December",
url = "http://www.netlib.org/lapack/lawnspdf/lawn75.pdf",
paper = "Kags93.pdf",
comment = "LAPACK Working Note 75",
abstract =
"Level 3 algorithms for solving the generalized Sylvester equation
$(AR-LB,DR-LE)=(C,F)$ and the transposed analogue
$(A^TU+D^TV,-UB^T-VE^T)=(C,F)$ are presented. These blocked algorithms
permit reuse of data in complex memory hierarchies of current advanced
computer architectures. The separation of two regular matrix pairs
$(A,D)$ and $(B,E)$, Dif[$(A,D)$,$(B,E)$], is defined in terms of the
generalized Sylvester operator $(AR-LB,DR-LE)$. Robust, efficient and
reliable Dif-estimators are presented. The basic problem is to find a
lower bound on Dif$^{-1}$, which can be done by solving generalized
Sylvester equations in triangular form. Frobenius norm-based and one
norm based Dif estimators are described and evaluated. These estimates
lead to computable error bounds for the generalized Sylvester
equation. The one-norm-based estimator makes the condition estimation
uniform with LAPACK. Fortran 77 software that implements our
algorithms for solving generalized Sylvester equations, and for
computing error bounds and Dif-estimators are presented. Computational
experiments that illustrate the accuracy, efficiency and reliability
of our software are also described."
}
@article{Kags94,
author = "Kagstrom, Bo and Poromaa, Peter",
title = "Computing Eigenspaces with Specified Eigenvalues of a Regular
Matrix Pair (A, B) and Condition Estimation: Theory,
Algorithms and Software",
year = "1994",
month = "April",
url = "http://www.netlib.org/lapack/lawns/lawn87.ps",
paper = "Kags94.pdf",
abstract =
"Theory, algorithms and LAPACK style software for computing a pair of
deflating subspaces with specified eigenvalues of a regular matrix
pair $(A,B)$ and error bounds for computed quantities (eigenvalues and
eigenspaces) are presented. The {\sl reordering} of specified
eigenvalues is performed with a direct orthogonal transformation
method with guaranteed numerical stability. Each swap of two adjacent
diagonal blocks in the real generalized Schur form, where at least one
of them corresponds to a complex conjugate pair of eigenvalues,
involves solving a generalized Sylvester equation and the construction
of two orthogonal transformation matrices from certain eigenspaces
associated with the diagonal blocks. The swapping of two $1\cross 1$
blocks is performed using orthogonal (unitary) Givens rotations. The
{\sl error bounds} are based on estimates of condition numbers for
eigenvalues and eigenspaces. The software computes reciprocal values
of a condition number for an individual eigenvalue (or a cluster of
eigenvalues), a condition number for an eigenvector (or eigenspace),
and spectral projectors onto a selected cluster. By computing
reciprocal values we avoid overflow. Changes in eigenvectors and
eigenspaces are measured by their change in angle. The condition
numbers yield both {\sl asymptotic} and {\sl global} error bounds. The
asymptotic bounds are only accurate for small perturbations $(E,F)$ of
$(A,B)$, while the global bounds work for all $||(E,F)||$ up to a
certain bound, whose size is determined by the conditioning of the
problem. It is also shown how these upper bounds can be
estimated. Fortran 77 {\sl software} that implements our algorithms
for reordering eigenvalues, computing (left and right) deflating
subspaces with specified eigenvalues and condition number estimation
are presented. Computational experiments that illustrate the accuracy,
efficiency and reliability of our software are also described."
}
@article{Kags94b,
author = "Kagstrom, Bo",
title = "A Perturbation Analysis of the Generalized Sylvester Equation
$(AR-LB,DR-LE)=(C,F)$",
journal = "SIAM J. Matrix Anal. and Appl.",
volume = "15",
number = "4",
pages = "1045-1060",
year = "1994",
abstract =
"Perturbation and error bounds for the generalized Sylvester equation
$(AR-LB,DR-LE)=(C,F)$ are presented. An explicit expression for the
normwise relative backward error associated with an approximate
solution of the generalized Sylvester equation is derived and
conditions when it can be much greater than the relative residual are
given. This analysis is applicable to any method that solves the
generalized Sylvester equation. A condition number that reflects the
structure of the problem and a normwise forward error bound based on
${\rm Dif}^{-1}[(A,D),B,E)]$ and the residual are derived. The
structure-preserving condition number can be arbitrarily smaller than
a ${\rm Dif}^{-1}$-based condition number. The normwise error bound
can be evaluated robustly and at moderate cost by using a reliable
${\rm Dif}^{-1}$ estimator. A componentwise LAPACK-style forward error
bound that can be stronger than the normwise error bound is
presented. A componentwise approximate error bound that can be
evaluated to a much lower cost is also proposed. Finally, some
computational experiments that validate and evaluate the perturbation
and error bounds are presented."
}
@techreport{Kaha66,
author = "Kahan, W.",
title = "Accurate Eigenvalues of a Symmetric Tri-Diagonal Matrix",
year = "1966",
month = "July",
institution = "Stanford University",
type = "Technical Report",
number = "CS41",
paper = "Kaha66.pdf",
abstract =
"Having established tight bounds for the quotient of two different
lub-norms of the same tri-diagonal matrix J, the author observes that
these bounds could be of use in an error-analysis provided a suitable
algorithm were found. Such an algorithm is exhibited, and its errors
are thoroughly accounted for, including the effects of scaling,
over/underflow and roundoff. A typical result is that, on a computer
using rounded floating point binary arithmetic, the biggest eigenvalue
of J can be computed easily to within 2.5 units in its last place, and
the smaller eigenvalues will suffer absolute errors which are no
larger. These results are somewhat stronger than had been known before."
}
@article{Livn04,
author = "Livne, Oren E. and Golub, Gene H.",
title = "Scaling by Binormalization",
journal = "Numerical Algorithms",
volume = "35",
number = "1",
pages = "97-120",
year = "2004",
month = "January",
paper = "Livn04.pdf",
abstract =
"We present an interative algorithm (BIN) for scaling all the rows and
columns of a real symmetric matrix to unit 2-norm. We study the
theoretical convergence properties and its relation to optimal
conditioning. Numerical experiments show that BIN requires 2-4
matrix-vector multiplications to obtain an adequate scaling, and in
many cases significantly reduces the condition number, more than other
scaling algorithms. We present generalizations to complex,
non-symmetric and rectangular matrices."
}
@article{Malc72,
author = "Malcolm, Michael A.",
title = "Algorithms to reveal properties of floating-point arithmetic",
journal = "Comms of the ACM",
volume = "15",
year = "1972",
pages = "949-951",
url = "http://www.dtic.mil/dtic/tr/fulltext/u2/727104.pdf",
paper = "Malc72.pdf",
abstract =
"Two algorithms are presented in the form of Fortran subroutines. Each
subroutine computes the radix and number of digits of the floating
point numbers and whether rounding or chopping is done by the machine
on which it is run. The methods are shown to work on any ``reasonable''
floating-point computer."
}
@article{Marq06,
author = "Marques, Osni A. and Reidy, Jason and Vomel, Christof",
title = "Benefits of IEEE-754 Features in Modern Symmetric Tridiagonal
Eigensolvers",
journal = "SIAM Journal on Scientific Computing",
volume = "28",
number = "5",
year = "2006",
url = "http://www.netlib.org/lapack/lawnspdf/lawn172.pdf",
paper = "Marq06.pdf",
abstract =
"Bisection is one of the most common methods used to compute the
eigenvalues of symmetric tridiagonal matrices. Bisection relies on the
{\sl Sturm count}: for a given shift $\sigma$, the number of negative
pivots in the factorization $T-\sigma{}I=LDL^T$ equals the number of
eigenvalues of $T$ that are smaller than $\sigma$. In IEEE-754
arithmetic, the value $\infty$ permits the computation to continue
past a zero pivot, producing a correct Sturm count when $T$ is
unreduced. Demmel and Li showed in the 90s that using $\infty$ rather
than testing for zero pivots within the loop could improve performance
significantly on certain architectures.
When eigenvalues are to be computed to high relative accuracy, it is
often preferable to work with $LDL^T$ factorizations instead of the
original tridiagonal $T$, see for example the MRRR algorithm. In these
cases, the Sturm count has to be computed from $LDL^T$. The
differential stationary and progressive qds algorithms are the method
of choice.
While it seems trivial to replace $T$ by $LDL^T$, in reality these
algorithms are more complicated: in IEEE-754 arithmetic, a zero pivot
produces an overflow, followed by an invalid exception (NaN), that
renders the Sturm count incorrect.
We present alternative, safe formulations that are guaranteed to
produce the correct result.
Benchmarking these algorithms on a variety of platforms shows that the
original formulation without tests is always faster provided no
exception occurs. The transforms see speed-ups of up to 2.6x over the
careful formulation.
Tests on industrial matrices show that encountering exceptions in
practice is rare. This leads to the following design: First, compute
the Sturm count by the fast but unsafe algorithm. Then, if an
exception occured, recompute the count by a safe, slower alternative.
The new Sturm count algorithms improve the speed of bisection by up to
2x on our test matrices. Furthermore, unlike the traditional
tiny-pivot substitutions, proper use of IEEE-754 features provides a
careful formulation that imposed no input range restrictions."
}
@book{Moon93,
author = "Moonen, Marc S. and Golub, Gene H. and De Moor, Bart L.R.",
title = "Linear Algebra and Large Scale and Real-Time Applications",
year = "1993",
publisher = "NATO ASI Series",
isbn = "978-904814246-0"
}
@article{Ward81,
author = "Ward, Robert C.",
title = "Balancing the generalized eigenvalue problem",
journal = "SIAM J. Sci. and Stat. Comput.",
volume = "2",
number = "2",
pages = "141-152",
abstract =
"An algorithm is presented for balancing the $A$ and $B$ matirces
prior to computing the eigensystem of the generalized eigenvalue
problem $Ax=\lambda Bx$. The three-step algorithm is specifically
designed to preceed the $QZ$-type algorithms, but improved performance
is expected from most eigensystem solvers. Permutations and two-sided
diagonal transformations are applied to $A$ and $B$ to produce
matrices with certain desirable properties. Test cases are presented
to illustrate the improved accuracy of the computed eigenvalues."
}
---
books/bookvolbib.pamphlet | 833 +++++++++++++++++++++++++++++++++++++++-
changelog | 2 +
patch | 702 +++++++++++++++++++++++++++++++++-
src/axiom-website/patches.html | 2 +
4 files changed, 1531 insertions(+), 8 deletions(-)
diff --git a/books/bookvolbib.pamphlet b/books/bookvolbib.pamphlet
index 9e861a2..b982f3e 100644
--- a/books/bookvolbib.pamphlet
+++ b/books/bookvolbib.pamphlet
@@ -1880,6 +1880,96 @@ when shown in factored form.
\section{Numerical Algorithms} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\index{Anda, A.A.}
+\index{Park,H.}
+\begin{chunk}{axiom.bib}
+@article{Anda94,
+ author = "Anda, A.A. and Park, H.",
+ title = "Fast plane rotations with dynamic scaling",
+ journal = "SIAM J. matrix Anal. Appl.",
+ volume = "15",
+ year = "1994",
+ pages = "162-174"
+}
+
+\end{chunk}
+
+\index{Anda, Andrew Allen}
+\begin{chunk}{axiom.bib}
+@phdthesis{Anda95,
+ author = "Anda, Andrew Allen",
+ title = "Self-Scaling Fast Plane Rotation Algorithms",
+ school = "University of Minnesota",
+ year = "1995",
+ month = "February",
+ paper = "Anda95.pdf",
+ abstract =
+ "A suite of {\sl self-scaling} fast circular plane rotation algorithms
+ is developed which obviates the monitoring and periodic rescaling
+ necessitated by the standard set of fast plane rotation
+ algorithms. Self-Scaling fast rotations dynamically preserve the
+ normed scaling of the diagonal factor matrix whereas standard fast
+ rotations exhibit divergent scaling. Variations on standard fast
+ rotation matrices are developed and algorithms which implement them
+ are offered. Self-Scaling fast rotations are shown to be essentially
+ as accurate as slow rotations and at least as efficient as standard
+ fast rotations. Computational experimental results utilizing the
+ Cray-2 illustrate the effectively stable scaling exhibited by
+ self-scaling fast rotations. Jacobi-class algorithms with one-sided
+ alterations are developed for the algebraic eigenvalue decomposition
+ using self-scaling fast plane rotations and one-sided
+ modifications. The new algorithms are shown to be both accurate and
+ efficient on both vector and parallel architectures. The utility is
+ described of applying fast plane rotations towards the rank-one update
+ and downdate of least squares factorizations. The equivalence is
+ illuminated of LINPACK, hyperbolic rotation, and fast negatively
+ weighted plane rotation downdating. Algorithms are presented which
+ apply self-scaling fast plane rotations to the QR factorization for
+ stiff least squares problems. Both fast and standard Givens
+ rotation-based algorithms are shown to produce accurate results
+ regardless of row sorting even with extremely heavily row weighted
+ matrices. Such matrices emanate, e.g. from equality constrainted least
+ squares problems solved via the weighting method. The necessity of
+ column sorting is emphasized. Numerical tests expose the Householder
+ QR factorization algorithm to be sensitive to row sorting and it
+ yields less accurate results for greater weights. Additionally, the
+ modified Gram-Schmidt algorithm is shown to be sensitive to row
+ sorting to a notably significant but lesser degree. Self-Scaling fast
+ plane rotation algorithms, having competitive computational
+ complexities, must therefore be the method of choice for the QR
+ factorization of stiff matrices. Timing reults on the Cray 2 [XY]M/P,
+ and C90, of rotations both in and out of a matrix factorization
+ context are presented. Architectural features that can best exploit
+ the advantagous features of the new fast rotations are subsequently
+ discussed."
+
+}
+
+\end{chunk}
+
+\index{Baudin, Michael}
+\index{Smith, Robert L.}
+\begin{chunk}{axiom.bib}
+@misc{Baud12,
+ author = "Baudin, Michael and Smith, Robert L.",
+ title = "A Robust Complex Division in Scilab",
+ url = "http://arxiv.org/pdf/1210.4539.pdf",
+ year = "2012",
+ month = "October",
+ paper = "Baud12.pdf",
+ abstract =
+ "The most widely used algorithm for floating point complex division,
+ known as Smith's method, may fail more often than expected. This
+ document presents two improved complex division algorithms. We present
+ a proof of robustness of the first improved algorithm. Numerical
+ simulations show that this algorithm performs well in practice and is
+ significantly more robust than other known implementations. By
+ combining additional scaling methods with this first algorithm, we
+ were able to create a second algorithm, which rarely fails."
+}
+
+\end{chunk}
+
\index{Bronstein, Manuel}
\begin{chunk}{ignore}
{Bro99,
@@ -2012,6 +2102,344 @@ when shown in factored form.
\end{chunk}
+\index{Demmel, James}
+\index{Kahan, W.}
+\begin{chunk}{axiom.bib}
+@article{Demm90,
+ author = "Demmel, James and Kahan, W.",
+ title = "Accurate Singular Values of Bidiagonal Matrices",
+ journal = "SIAM J. Sci. Stat. Comput.",
+ volume = "11",
+ number = "5",
+ pages = "873-912",
+ year = "1990",
+ url = "http://www.netlib.org/lapack/lawnspdf/lawn03.pdf",
+ paper = "Demm90.pdf",
+ abstract =
+ "Computing the singular values of a bidiagonal matrix is the final
+ phase of the standard algorithm for the singular value decomposition
+ of a general matrix. We present a new algorithm which computes all the
+ singular values of a bidiagonal matrix to high relative accuracy
+ independent of their magnitudes. In contrast, the standard algorithm
+ for bidiagonal matrices may compute small singular values with no
+ relative accuracty at all. Numerical experiments show that the new
+ algorithm is comparable in speed to the standard algorithm, and
+ frequently faster."
+
+}
+
+\end{chunk}
+
+\index{Demmel, James}
+\index{Veselic, Kresimir}
+\begin{chunk}{axiom.bib}
+@article{Demm92,
+ author = "Demmel, James and Veselic, Kresimir",
+ title = "Jacobi's Method is More Accurate than QR",
+ journal = "SIAM J. Matrix Anal. and Appl",
+ volume = "13",
+ number = "4",
+ pages = "1204-1245",
+ url = "http://www.netlib.org/lapack/lawnspdf/lawn15.pdf",
+ paper = "Demm92.pdf",
+ abstract =
+ "We show that Jacobi's method (with a proper stopping criterion)
+ computes small eigenvalues of symmetric positive definite matrices
+ with a uniformly better relative accuracy bound than QR, divide and
+ conquer, traditional bisection, or any algorithm which first involves
+ tridiagonalizing the matrix. In fact, modulo an assumption based on
+ extensive numerical tests, we show that Jacobi's method is optimally
+ accurate in the following sense: if the matrix is such that small
+ relative errors in its entries cause small relative errors in its
+ eigenvalues, Jacobi will compute them with nearly this accuracy. In
+ other words, as long as the initial matrix has small relative errors
+ in each component, even using infinite precision will not improve on
+ Jacobi (modulo factors of dimensionality). We also show the
+ eigenvectors are computed more accurately by Jacobi than previously
+ thought possible. We prove similar results for using one-sided Jacobi
+ for the singular value decomposition of a general matrix."
+}
+
+\end{chunk}
+
+\index{de Rijk, P.P.}
+\begin{chunk}{axiom.bib}
+@article{Rijk98,
+ author = "de Rijk, P.P.",
+ title = "A One-sided Jacobi Algorithm for Computing the Singular Value
+ Decomposition on a vector computer",
+ journal = "SIAM J. Sci. Stat. Comput.",
+ volume = "10",
+ number = "2",
+ month = "March",
+ year = "1989",
+ pages = "359-371",
+ abstract =
+ "An old algorithm for computing the singular value decomposition,
+ which was first mentioned by Hestenes, has gained renewed interest
+ because of its properties of parallelism and vectorizability. Some
+ computational modifications are given and a comparison with the
+ well-known Golub-Reinsch algorithm is made. Comparative experiments on
+ a CYBER 205 are reported."
+
+}
+
+\end{chunk}
+
+\index{Drmac, Zlatko}
+\begin{chunk}{axiom.bib}
+@article{Drma97,
+ author = "Drmac, Zlatko",
+ title = "Implementation of Jacobi Rotations for Accurate Singular Value
+ Computation in Floating Point Arithmetic",
+ journal = "SIAM Journal on Scientific Computing",
+ volume = "18",
+ number = "4",
+ year = "1997",
+ month = "July",
+ pages = "1200-1222",
+ abstract =
+ "In this paper we consider how to compute the singular value
+ decomposition (SVD) $A = U\Sigma{}T^{\tau}$ of
+ $A=[a_1,a_2] \in R^{m\times 2}$
+ accurately in floating point arithmetic. It is shown
+ how to compute the Jacobi rotation $V$ (the right singular vector
+ matrix) and how to compute $AV=U\Sigma$ even if the floating point
+ representation of $V$ is the identity matrix. In the case
+ $\ns{a_1}\gg\ns{a_2}$, underflow can produce the identity matrix as
+ the floating point value of $V$ even for $a1$, $a2$ that are far from
+ being mutually orthogonal. This can cause loss of accuracy and failure
+ of convergence of the floating point implementation of the Jacobi
+ method for computing the SVD. The modified Jacobi method recommended
+ in this paper can be implemented as a reliable and highly accurate
+ procedure for computing the SVD of general real matrices whenever the
+ exact singular values do not exceed the underflow or overflow limits."
+
+}
+\end{chunk}
+
+\index{Drmac, Zlatko}
+\index{Veselic, Kresimir}
+\begin{chunk}{axiom.bib}
+@article{Drma08a,
+ author = "Drmac, Zlatko and Veselic, Kresimir",
+ title = "New fast and accurate Jacobi SVD algorithm I",
+ journal = "SIAM J. Matrix Anal. Appl.",
+ volume = "35",
+ number = "2",
+ year = "2008",
+ pages = "1322-1342",
+ comment = "LAPACK Working note 169",
+ url = "http://www.netlib.org/lapack/lawnspdf/lawn169.pdf",
+ paper = "Drma08a.pdf",
+ abstract =
+ "This paper is the result of contrived efforts to break the barrier
+ between numerical accuracy and run time efficiency in computing the
+ fundamental decomposition of numerical linear algebra - the singular
+ value decomposition (SVD) of a general dense matrix. It is an
+ unfortunate fact that the numerically most accurate one-sided Jacobi
+ SVD algorithm is several times slower than generally less accurate
+ bidiagonalization based methods such as the QR or the divide and
+ conquer algorithm. Despite its sound numerical qualities, the Jacobi
+ SVD is not included in the state of the art matrix computation
+ libraries and it is even considered obsolete by some leading
+ researchers. Our quest for a highly accurate and efficient SVD
+ algorithm has led us to a new, superior variant of the Jacobi
+ algorithm. The new algorithm has inherited all good high accuracy
+ properties, and it outperforms not only the best implementations of
+ the one-sided Jacobi algorithm but also the QR algorithm. Moreoever,
+ it seems that the potential of the new approach is yet to be fully
+ exploited."
+}
+
+\end{chunk}
+
+
+\index{Drmac, Zlatko}
+\index{Veselic, Kresimir}
+\begin{chunk}{axiom.bib}
+@article{Drma08b,
+ author = "Drmac, Zlatko and Veselic, Kresimir",
+ title = "New fast and accurate Jacobi SVD algorithm II",
+ journal = "SIAM J. Matrix Anal. Appl.",
+ volume = "35",
+ number = "2",
+ year = "2008",
+ pages = "1343-1362",
+ comment = "LAPACK Working note 170",
+ url = "http://www.netlib.org/lapack/lawnspdf/lawn170.pdf",
+ paper = "Drma08b.pdf",
+ abstract =
+ "This paper presents new implementation of one-sided Jacobi SVD for
+ triangular matrices and its use as the core routine in a new
+ preconditioned Jacobi SVD algorithm, recently proposed by the
+ authors. New pivot strategy exploits the triangular form and uses the
+ fact that the input triangular matrix is the result of rank revealing
+ QR factorization. If used in the preconditioned Jacobi SVD algorithm,
+ described in the first part of this report, it delivers superior
+ performance leading to the currently fastest method for computing SVD
+ decomposition with high relative accuracy. Furthermore, the efficiency
+ of the new algorithm is comparable to the less accurate
+ bidiagonalization based methods. The paper also discusses underflow
+ issues in floating point implementation, and shows how to use
+ perturbation theory to fix the imperfectness of machine arithmetic on
+ some systems."
+}
+
+\end{chunk}
+
+\index{Drmac, Zlatko}
+\index{Bujanovic, Zvonimir}
+\begin{chunk}{axiom.bib}
+@article{Drma08c,
+ author = "Drmac, Zlatko and Bujanovic, Zvonimir",
+ title = "On the failure of rank-revealing QR factorization software -
+ a case study.",
+ journal = "ACM Trans. math. Softw.",
+ volume = "35",
+ number = "2",
+ year = "2008",
+ pages = "1-28",
+ comment = "LAPACK Working note 176",
+ url = "http://www.netlib.org/lapack/lawnspdf/lawn176.pdf",
+ paper = "Drma08c.pdf",
+ abstract =
+ "This note reports an unexpected and rather erratic behavior of the
+ LAPACK software implementation of the QR factorization with
+ Businger-Golub column pivoting. It is shown that, due to finite
+ precision arithmetic, software implementation of the factorization can
+ catastrophically fail to produce triangular factor with the structure
+ characteristic to the Businger-Golub pivot strategy. The failure of
+ current {\sl state of the art} software, and a proposed alternative
+ implementations are analyzed in detail."
+}
+
+\end{chunk}
+
+
+
+\index{Dhillon, Inderjit Singh}
+\begin{chunk}{axiom.bib}
+@phdthesis{Dhil97,
+ author = "Dhillon, Inderjit Singh",
+ title = "A New $O(n^2)$ Algorithm for the Symmetric Tridiagonal
+ Eigenvalue/Eigenvector Problem",
+ school = "University of California, Berkeley",
+ year = "1997",
+ url = "http://www.eecs.berkeley.edu/Pubs/TechRpts/1997/CSD-97-971.pdf",
+ paper = "Dhil97.pdf",
+ abstract =
+ "Computing the eigenvalues and orthogonal eigenvectors of an $n\times n$
+ symmetric tridiagonal matrix is an important task that arises while
+ solving any symmetric eigenproblem. All practical software requires
+ $O(n^3)$ time to compute all the eigenvectors and ensure their
+ orthogonality when eigenvalues are close. In the first part of this
+ thesis we review earlier work and show how some existing
+ implementations of inverse iteration can fail in surprising ways.
+
+ The main contribution of this thesis is a new $O(n^2)$, easily
+ parallelizable algorithm for solving the tridiagonal
+ eigenproblem. Three main advances lead to our new algorithm. A
+ tridiagonal matrix is traditionally represented by its diagonal and
+ off-diagonal elements. Our most important advance is in recognizing
+ that its bidiagonal factors are ``better'' for computational
+ purposes. The use of bidiagonals enables us to invoke a relative
+ criterion to judge when eigenvalues are ``close''. The second advance
+ comes with using multiple bidiagonal factorizations in order to
+ compute different eigenvectors independently of each other. Thirdly,
+ we use carefully chosen dqds-like transformations as inner loops to
+ compute eigenpairs that are highly accurate and ``faithful'' to the
+ various bidiagonal representations. Orthogonality of the eigenvectors
+ is a consequence of this accuracy. Only $O(n)$ work per eigenpair is
+ neede by our new algorithm.
+
+ Conventional wisdom is that there is usually a trade-off between speed
+ and accuracy in numerical procedures, i.e., higher accuracy can be
+ achieved only at the expense of greater computing time. An interesting
+ aspect of our work is that increased accuracy in the eigenvalues and
+ eigenvectors obviates the need for explicit orthogonalization and
+ leads to greater speed.
+
+ We present timing and accuracy results comparing a computer
+ implementation of our new algorithm with four existing EISPACK and
+ LAPACK software routines. Our test-bed contains a variety of
+ tridiagonal matrices, some coming from quantum chemistry
+ applications. The numerical results demonstrate the superiority of
+ our new algorithm. For example, on a matrix of order 966 that occurs in
+ the modeling of a biphenyl molecule our method is about 10 times
+ faster than LAPACK's inverse iteration on a serial IBM RS/6000
+ processor and nearly 100 times faster on a 128 processor IBM SP2
+ parallel machine."
+}
+
+\end{chunk}
+
+\index{Dhillon, Inderjit S.}
+\index{Parlett, Beresford N.}
+\begin{chunk}{axiom.bib}
+@article{Dhil04a,
+ author = "Dhillon, Inderjit S. and Parlett, Beresford N.",
+ title = "Multiple representations to compute orthogonal eigenvectors
+ of symmetric tridiagonal matrices",
+ journal = "Linear Algebra and its Applications",
+ volume = "387",
+ number ="1",
+ pages = "1-28",
+ year = "2004",
+ month = "August",
+ paper = "Dhil04a.pdf",
+ abstract =
+ "In this paper we present an $O(nk)$ procedure, Algorithm $MR^3$, for
+ computing $k$ eigenvectors of an $n\times n$ symmetric tridiagonal
+ matrix $T$. A salient feature of the algorithm is that a number of
+ different $LDL^t$ products ($L$ unit lower triangular, $D$ diagonal)
+ are computed. In exact arithmetic each $LDL^t$ is a factorization of a
+ translate of $T$. We call the various $LDL^t$ productions
+ {\sl representations} (of $T$) and, roughly speaking, there is a
+ representation for each cluster of close eigenvalues. The unfolding of
+ the algorithm, for each matrix, is well described by a
+ {\sl representation tree}. We present the tree and use it to show that if
+ each representation satisfies three prescribed conditions then the
+ computed eigenvectors are orthogonal to working accuracy and have
+ small residual norms with respect to the original matrix $T$."
+}
+
+\end{chunk}
+
+\index{Dhillon, Inderjit S.}
+\index{Parlett, Beresford N.}
+\begin{chunk}{axiom.bib}
+@article{Dhil04,
+ author = "Dhillon, Inderjit S. and Parlett, Beresford N.",
+ title = "Orthogonal Eigenvectors and Relative Gaps",
+ journal = "SIAM Journal on Matrix Analysis and Applications",
+ volume = "25",
+ year = "2004",
+ paper = "Dhil04.pdf",
+ abstract =
+ "Let $LDL^t$ be the triangular factorization of a real symmetric
+ $n\times n$ tridiagonal matrix so that $L$ is a unit lower bidiagonal
+ matrix, $D$ is diagonal. Let $(\lambda,\nu)$ be an eigenpair,
+ $\lambda \ne 0$, with the property that both $\lambda$ and $\nu$ are
+ determined to high relative accuracy by the parameters in $L$ and $D$.
+ Suppose also that the relative gap between $\lambda$ and its nearest
+ neighbor $\mu$ in the spectrum exceeds $1/n; n|\lambda-\mu| > |\lambda|$.
+
+ This paper presents a new $O(n)$ algorithm and a proof that, in the
+ presence of round-off errors, the algorithm computes an approximate
+ eigenvector $\hat{\nu}$ that is accurate to working precision
+ $|sin \angle(\nu,\hat{\nu})| = O(n\epsilon)$, where $\epsilon$ is the
+ round-off unit. It follows that $\hat{\nu}$ is numerically orthogonal to
+ all the other eigenvectors. This result forms part of a program to
+ compute numerically orthogonal eigenvectors without resorting to the
+ Gram-Schmidt process.
+
+ The contents of this paper provide a high-level description and
+ theoretical justification for LAPACK (version 3.0) subroutine DLAR1V."
+}
+
+\end{chunk}
+
\index{Elmroth, E.}
\index{Gustavson, F. G.}
\begin{chunk}{axiom.bib}
@@ -2123,9 +2551,264 @@ when shown in factored form.
\end{chunk}
+\index{Gentlman, W. Morven}
+\index{Marovich, Scott B.}
+\begin{chunk}{axiom.bib}
+@article{Gent74,
+ author = "Gentlman, W. Morven and Marovich, Scott B.",
+ title = "More on algorithms that reveal properties of floating point
+ arithmetic units.",
+ journal = "Comm. of the ACM",
+ year = "1974",
+ month = "April",
+ volume = "17",
+ number = "5",
+ pages = "276-277",
+ abstract =
+ "In the interests of producing portable mathematical software, it is
+ highly desirable for a program to be able directly to obtain
+ fundamental properties of the environment in which it is to run. The
+ installer would then not be obliged to change appropriate magic
+ constants in the source code, and the user would not have to provide
+ information he may very well not understand. Until the standard
+ definitions of programming languages are changed to require builtin
+ functions that provide this information, we will have to resort to
+ writing routines that discover it."
+}
+
+\end{chunk}
+
+\index{Higham, Nicholas J.}
+\begin{chunk}{axiom.bib}
+@article{High86,
+ author = "Higham, Nicholas J.",
+ title = "Efficient Algorithms for Computing the Condition Number of a
+ Tridiagonal Matrix",
+ journal = "SIAM J. Sci. Stat. Comput.",
+ volume = "7",
+ number = "1",
+ year = "1986",
+ month = "January",
+ paper = "High86.pdf",
+ abstract =
+ "Let $A$ be a tridiagonal matrix of order $n$. We show that it is
+ possible to compute $||A^{-1}||_{\infty}$ and hence
+ ${\rm cond}_{\infty}(A)$ in $O(n)$ operations. Several algorithms
+ which perform this task are given and their numerical properties are
+ investigated.
+
+ If $A$ is also positive definite then $||A^{-1}||_{\infty}$ can be
+ computed as the norm of the solution to a positive definite
+ tridiagonal linear system whose coefficient matrix is closely related
+ to $A$. We show how this computation can be carried out in parallel
+ with the solution of a linear system $Ax=b$. In particular we describe
+ some simple modifications to the LINPACK routine SPTSL which enable
+ this routine to compute ${\rm cond}_1(A)$, efficiently, in addtion to
+ solving $Ax=b$."
+}
+
+\end{chunk}
+
+\index{Kagstrom, Bo}
+\index{Westin, L.}
+\begin{chunk}{axiom.bib}
+@article{Kags89,
+ author = "Kagstrom, Bo and Westin, L.",
+ title = "Generalized Schur Methods with Condition Estimators for Solving
+ the Generalized Sylvester Equation",
+ journal = "IEEE Transactions on Automatic Control",
+ volume = "34",
+ number = "7",
+ year = "1989",
+ month = "July",
+ pages = "745-751",
+ abstract =
+ "Stable algorithms are presented for solving the generalized Sylvester
+ equation. They are based on orthogonal equivalence transformations of
+ the original problem. Perturbation theory and rounding error analysis
+ are included. Condition estimators (${\rm Dif}^{-1}$-estimators) are
+ developed which when substituted into derived error bounds give
+ accuracy estimates of a computed solution. Results from numerical
+ experiments on well-conditioned and ill-conditioned problems are
+ reported."
+}
+
+\end{chunk}
+
+\index{Kagstrom, B.}
+\begin{chunk}{axiom.bib}
+@book{Kags93,
+ author = "Kagstrom, B.",
+ title = "A Direct Method for Reordering Eigenvalues in the
+ Generalized Real Schur Form of a Regular Matrix Pair (A, B)",
+ year = "1993",
+ pages = "195-218",
+ booktitle = "Linear Algebra for Large Scale and Real-Time Applications",
+ volume = "232",
+ publisher = "NATO ASI Series",
+ isbn = "978-90-481-4246-0",
+ abstract =
+ "A direct orthogonal equivalence transformation method for reordering
+ the eigenvalues along the diagonal in the generalized real Schur form
+ of a regular matrix pair $(A,B)$ is presented. Each swap of two
+ adjacent eigenvalues (real, or complex conjugate pairs) involves
+ solving a generalized Sylvester equation and the construction of two
+ orthogonal transformation matrices from certain eigenspaces associated
+ with the corresponding diagonal blocks. An error analysis of the
+ direct reordering method is presented. Results from numerical
+ experiments on well-conditionsed as well as ill-conditioned problems
+ illustrate the stability and the accuracy of the method. Finally, a
+ direct reordering algorithm with controlled backward error is described."
+}
+
+\end{chunk}
+
+\index{Kagstrom, Bo}
+\index{Poromaa, Peter}
+\begin{chunk}{axiom.bib}
+@article{Kags93,
+ author = "Kagstrom, Bo and Poromaa, Peter",
+ title = "LAPACK-Style Algorithms and Software for Solving the Generalized
+ Sylvester Equation and Estimating the Separation between
+ Regular Matrix Pairs",
+ year = "1993",
+ month = "December",
+ url = "http://www.netlib.org/lapack/lawnspdf/lawn75.pdf",
+ paper = "Kags93.pdf",
+ comment = "LAPACK Working Note 75",
+ abstract =
+ "Level 3 algorithms for solving the generalized Sylvester equation
+ $(AR-LB,DR-LE)=(C,F)$ and the transposed analogue
+ $(A^TU+D^TV,-UB^T-VE^T)=(C,F)$ are presented. These blocked algorithms
+ permit reuse of data in complex memory hierarchies of current advanced
+ computer architectures. The separation of two regular matrix pairs
+ $(A,D)$ and $(B,E)$, Dif[$(A,D)$,$(B,E)$], is defined in terms of the
+ generalized Sylvester operator $(AR-LB,DR-LE)$. Robust, efficient and
+ reliable Dif-estimators are presented. The basic problem is to find a
+ lower bound on Dif$^{-1}$, which can be done by solving generalized
+ Sylvester equations in triangular form. Frobenius norm-based and one
+ norm based Dif estimators are described and evaluated. These estimates
+ lead to computable error bounds for the generalized Sylvester
+ equation. The one-norm-based estimator makes the condition estimation
+ uniform with LAPACK. Fortran 77 software that implements our
+ algorithms for solving generalized Sylvester equations, and for
+ computing error bounds and Dif-estimators are presented. Computational
+ experiments that illustrate the accuracy, efficiency and reliability
+ of our software are also described."
+
+}
+
+\end{chunk}
+
+\index{Kagstrom, Bo}
+\index{Poromaa, Peter}
+\begin{chunk}{axiom.bib}
+@article{Kags94,
+ author = "Kagstrom, Bo and Poromaa, Peter",
+ title = "Computing Eigenspaces with Specified Eigenvalues of a Regular
+ Matrix Pair (A, B) and Condition Estimation: Theory,
+ Algorithms and Software",
+ year = "1994",
+ month = "April",
+ url = "http://www.netlib.org/lapack/lawns/lawn87.ps",
+ paper = "Kags94.pdf",
+ abstract =
+ "Theory, algorithms and LAPACK style software for computing a pair of
+ deflating subspaces with specified eigenvalues of a regular matrix
+ pair $(A,B)$ and error bounds for computed quantities (eigenvalues and
+ eigenspaces) are presented. The {\sl reordering} of specified
+ eigenvalues is performed with a direct orthogonal transformation
+ method with guaranteed numerical stability. Each swap of two adjacent
+ diagonal blocks in the real generalized Schur form, where at least one
+ of them corresponds to a complex conjugate pair of eigenvalues,
+ involves solving a generalized Sylvester equation and the construction
+ of two orthogonal transformation matrices from certain eigenspaces
+ associated with the diagonal blocks. The swapping of two $1\cross 1$
+ blocks is performed using orthogonal (unitary) Givens rotations. The
+ {\sl error bounds} are based on estimates of condition numbers for
+ eigenvalues and eigenspaces. The software computes reciprocal values
+ of a condition number for an individual eigenvalue (or a cluster of
+ eigenvalues), a condition number for an eigenvector (or eigenspace),
+ and spectral projectors onto a selected cluster. By computing
+ reciprocal values we avoid overflow. Changes in eigenvectors and
+ eigenspaces are measured by their change in angle. The condition
+ numbers yield both {\sl asymptotic} and {\sl global} error bounds. The
+ asymptotic bounds are only accurate for small perturbations $(E,F)$ of
+ $(A,B)$, while the global bounds work for all $||(E,F)||$ up to a
+ certain bound, whose size is determined by the conditioning of the
+ problem. It is also shown how these upper bounds can be
+ estimated. Fortran 77 {\sl software} that implements our algorithms
+ for reordering eigenvalues, computing (left and right) deflating
+ subspaces with specified eigenvalues and condition number estimation
+ are presented. Computational experiments that illustrate the accuracy,
+ efficiency and reliability of our software are also described."
+}
+
+\end{chunk}
+
+\index{Kagstrom, Bo}
+\begin{chunk}{axiom.bib}
+@article{Kags94b,
+ author = "Kagstrom, Bo",
+ title = "A Perturbation Analysis of the Generalized Sylvester Equation
+ $(AR-LB,DR-LE)=(C,F)$",
+ journal = "SIAM J. Matrix Anal. and Appl.",
+ volume = "15",
+ number = "4",
+ pages = "1045-1060",
+ year = "1994",
+ abstract =
+ "Perturbation and error bounds for the generalized Sylvester equation
+ $(AR-LB,DR-LE)=(C,F)$ are presented. An explicit expression for the
+ normwise relative backward error associated with an approximate
+ solution of the generalized Sylvester equation is derived and
+ conditions when it can be much greater than the relative residual are
+ given. This analysis is applicable to any method that solves the
+ generalized Sylvester equation. A condition number that reflects the
+ structure of the problem and a normwise forward error bound based on
+ ${\rm Dif}^{-1}[(A,D),B,E)]$ and the residual are derived. The
+ structure-preserving condition number can be arbitrarily smaller than
+ a ${\rm Dif}^{-1}$-based condition number. The normwise error bound
+ can be evaluated robustly and at moderate cost by using a reliable
+ ${\rm Dif}^{-1}$ estimator. A componentwise LAPACK-style forward error
+ bound that can be stronger than the normwise error bound is
+ presented. A componentwise approximate error bound that can be
+ evaluated to a much lower cost is also proposed. Finally, some
+ computational experiments that validate and evaluate the perturbation
+ and error bounds are presented."
+}
+
+\end{chunk}
+
+\index{Kahan, W.}
+\begin{chunk}{axiom.bib}
+@techreport{Kaha66,
+ author = "Kahan, W.",
+ title = "Accurate Eigenvalues of a Symmetric Tri-Diagonal Matrix",
+ year = "1966",
+ month = "July",
+ institution = "Stanford University",
+ type = "Technical Report",
+ number = "CS41",
+ paper = "Kaha66.pdf",
+ abstract =
+ "Having established tight bounds for the quotient of two different
+ lub-norms of the same tri-diagonal matrix J, the author observes that
+ these bounds could be of use in an error-analysis provided a suitable
+ algorithm were found. Such an algorithm is exhibited, and its errors
+ are thoroughly accounted for, including the effects of scaling,
+ over/underflow and roundoff. A typical result is that, on a computer
+ using rounded floating point binary arithmetic, the biggest eigenvalue
+ of J can be computed easily to within 2.5 units in its last place, and
+ the smaller eigenvalues will suffer absolute errors which are no
+ larger. These results are somewhat stronger than had been known before."
+}
+
+\end{chunk}
+
\index{Kelsey, Tom}
-\begin{chunk}{ignore}
-{Kel00,
+\begin{chunk}{axiom.bib}
+@misc{Kel00,
author = "Kelsey, Tom",
title = "Exact Numerical Computation via Symbolic Computation",
url = "http://tom.host.cs.st-andrews.ac.uk/pub/ccapaper.pdf",
@@ -2244,6 +2927,112 @@ when shown in factored form.
\end{chunk}
+\index{Livne, Oren E.}
+\index{Golub, Gene H.}
+\begin{chunk}{axiom.bib}
+@article{Livn04,
+ author = "Livne, Oren E. and Golub, Gene H.",
+ title = "Scaling by Binormalization",
+ journal = "Numerical Algorithms",
+ volume = "35",
+ number = "1",
+ pages = "97-120",
+ year = "2004",
+ month = "January",
+ paper = "Livn04.pdf",
+ abstract =
+ "We present an interative algorithm (BIN) for scaling all the rows and
+ columns of a real symmetric matrix to unit 2-norm. We study the
+ theoretical convergence properties and its relation to optimal
+ conditioning. Numerical experiments show that BIN requires 2-4
+ matrix-vector multiplications to obtain an adequate scaling, and in
+ many cases significantly reduces the condition number, more than other
+ scaling algorithms. We present generalizations to complex,
+ non-symmetric and rectangular matrices."
+}
+
+\end{chunk}
+
+\index{Malcolm, Michael A.}
+\begin{chunk}{axiom.bib}
+@article{Malc72,
+ author = "Malcolm, Michael A.",
+ title = "Algorithms to reveal properties of floating-point arithmetic",
+ journal = "Comms of the ACM",
+ volume = "15",
+ year = "1972",
+ pages = "949-951",
+ url = "http://www.dtic.mil/dtic/tr/fulltext/u2/727104.pdf",
+ paper = "Malc72.pdf",
+ abstract =
+ "Two algorithms are presented in the form of Fortran subroutines. Each
+ subroutine computes the radix and number of digits of the floating
+ point numbers and whether rounding or chopping is done by the machine
+ on which it is run. The methods are shown to work on any ``reasonable''
+ floating-point computer."
+}
+
+\end{chunk}
+
+\index{Marques, Osni A.}
+\index{Reidy, Jason}
+\index{Vomel, Christof}
+\begin{chunk}{axiom.bib}
+@article{Marq06,
+ author = "Marques, Osni A. and Reidy, Jason and Vomel, Christof",
+ title = "Benefits of IEEE-754 Features in Modern Symmetric Tridiagonal
+ Eigensolvers",
+ journal = "SIAM Journal on Scientific Computing",
+ volume = "28",
+ number = "5",
+ year = "2006",
+ url = "http://www.netlib.org/lapack/lawnspdf/lawn172.pdf",
+ paper = "Marq06.pdf",
+ abstract =
+ "Bisection is one of the most common methods used to compute the
+ eigenvalues of symmetric tridiagonal matrices. Bisection relies on the
+ {\sl Sturm count}: for a given shift $\sigma$, the number of negative
+ pivots in the factorization $T-\sigma{}I=LDL^T$ equals the number of
+ eigenvalues of $T$ that are smaller than $\sigma$. In IEEE-754
+ arithmetic, the value $\infty$ permits the computation to continue
+ past a zero pivot, producing a correct Sturm count when $T$ is
+ unreduced. Demmel and Li showed in the 90s that using $\infty$ rather
+ than testing for zero pivots within the loop could improve performance
+ significantly on certain architectures.
+
+ When eigenvalues are to be computed to high relative accuracy, it is
+ often preferable to work with $LDL^T$ factorizations instead of the
+ original tridiagonal $T$, see for example the MRRR algorithm. In these
+ cases, the Sturm count has to be computed from $LDL^T$. The
+ differential stationary and progressive qds algorithms are the method
+ of choice.
+
+ While it seems trivial to replace $T$ by $LDL^T$, in reality these
+ algorithms are more complicated: in IEEE-754 arithmetic, a zero pivot
+ produces an overflow, followed by an invalid exception (NaN), that
+ renders the Sturm count incorrect.
+
+ We present alternative, safe formulations that are guaranteed to
+ produce the correct result.
+
+ Benchmarking these algorithms on a variety of platforms shows that the
+ original formulation without tests is always faster provided no
+ exception occurs. The transforms see speed-ups of up to 2.6x over the
+ careful formulation.
+
+ Tests on industrial matrices show that encountering exceptions in
+ practice is rare. This leads to the following design: First, compute
+ the Sturm count by the fast but unsafe algorithm. Then, if an
+ exception occured, recompute the count by a safe, slower alternative.
+
+ The new Sturm count algorithms improve the speed of bisection by up to
+ 2x on our test matrices. Furthermore, unlike the traditional
+ tiny-pivot substitutions, proper use of IEEE-754 features provides a
+ careful formulation that imposed no input range restrictions."
+}
+
+\end{chunk}
+
\index{Moler, C.B.}
\index{Stewart, G.W.}
\begin{chunk}{axiom.bib}
@@ -2266,6 +3055,20 @@ when shown in factored form.
\end{chunk}
+\index{Moonen, Marc S.}
+\index{Golub, Gene H.}
+\index{De Moor, Bart L.R.}
+\begin{chunk}{axiom.bib}
+@book{Moon93,
+ author = "Moonen, Marc S. and Golub, Gene H. and De Moor, Bart L.R.",
+ title = "Linear Algebra and Large Scale and Real-Time Applications",
+ year = "1993",
+ publisher = "NATO ASI Series",
+ isbn = "978-904814246-0"
+}
+
+\end{chunk}
+
\index{Stoutemyer, David R.}
\begin{chunk}{axiom.bib}
@article{Stou07,
@@ -2334,6 +3137,28 @@ when shown in factored form.
\end{chunk}
+\index{Ward, Robert C.}
+\begin{chunk}{axiom.sty}
+@article{Ward81,
+ author = "Ward, Robert C.",
+ title = "Balancing the generalized eigenvalue problem",
+ journal = "SIAM J. Sci. and Stat. Comput.",
+ volume = "2",
+ number = "2",
+ pages = "141-152",
+ abstract =
+ "An algorithm is presented for balancing the $A$ and $B$ matirces
+ prior to computing the eigensystem of the generalized eigenvalue
+ problem $Ax=\lambda Bx$. The three-step algorithm is specifically
+ designed to preceed the $QZ$-type algorithms, but improved performance
+ is expected from most eigensystem solvers. Permutations and two-sided
+ diagonal transformations are applied to $A$ and $B$ to produce
+ matrices with certain desirable properties. Test cases are presented
+ to illustrate the improved accuracy of the computed eigenvalues."
+}
+
+\end{chunk}
+
\section{Special Functions} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\index{Corless, Robert M.}
@@ -2901,7 +3726,7 @@ Kelsey, Tom; Martin, Ursula; Owre, Sam
school = "Cornell University",
year = "1995",
month = "1",
- file = "Jack95.pdf",
+ paper = "Jack95.pdf",
keyword = "axiomref",
note = {
This thesis describes substantial enhancements that were made to the
@@ -17370,7 +18195,7 @@ Math. Tables Aids Comput. 10 91--96. (1956)
\eject
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\chapter{Index}
+\addcontentsline{toc}{chapter}{Index}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\printindex
\end{document}
diff --git a/changelog b/changelog
index ed4685c..40a5bb1 100644
--- a/changelog
+++ b/changelog
@@ -1,3 +1,5 @@
+20160405 tpd src/axiom-website/patches.html 20160405.02.tpd.patch
+20160405 tpd books/bookvolbib add reference material for LAPACK
20160405 tpd src/axiom-website/patches.html 20160405.01.tpd.patch
20160405 tpd books/bookvol* fix table of contents data
20160404 tpd src/axiom-website/patches.html 20160404.04.tpd.patch
diff --git a/patch b/patch
index e8638af..61d47af 100644
--- a/patch
+++ b/patch
@@ -1,7 +1,701 @@
-books/bookvol* fix table of contents data
+books/bookvolbib add reference material for LAPACK
Goal: Axiom Literate Programming
+
+ \section{Numerical Algorithms} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+@article{Anda94,
+ author = "Anda, A.A. and Park, H.",
+ title = "Fast plane rotations with dynamic scaling",
+ journal = "SIAM J. matrix Anal. Appl.",
+ volume = "15",
+ year = "1994",
+ pages = "162-174"
+}
-Instead of using \chapter, use \addcontentsline{toc}{chapter}{}
-for Bibliography and Index chapter headings. This saves pages
-in the PDFs.
+@phdthesis{Anda95,
+ author = "Anda, Andrew Allen",
+ title = "Self-Scaling Fast Plane Rotation Algorithms",
+ school = "University of Minnesota",
+ year = "1995",
+ month = "February",
+ paper = "Anda95.pdf",
+ abstract =
+ "A suite of {\sl self-scaling} fast circular plane rotation algorithms
+ is developed which obviates the monitoring and periodic rescaling
+ necessitated by the standard set of fast plane rotation
+ algorithms. Self-Scaling fast rotations dynamically preserve the
+ normed scaling of the diagonal factor matrix whereas standard fast
+ rotations exhibit divergent scaling. Variations on standard fast
+ rotation matrices are developed and algorithms which implement them
+ are offered. Self-Scaling fast rotations are shown to be essentially
+ as accurate as slow rotations and at least as efficient as standard
+ fast rotations. Computational experimental results utilizing the
+ Cray-2 illustrate the effectively stable scaling exhibited by
+ self-scaling fast rotations. Jacobi-class algorithms with one-sided
+ alterations are developed for the algebraic eigenvalue decomposition
+ using self-scaling fast plane rotations and one-sided
+ modifications. The new algorithms are shown to be both accurate and
+ efficient on both vector and parallel architectures. The utility is
+ described of applying fast plane rotations towards the rank-one update
+ and downdate of least squares factorizations. The equivalence is
+ illuminated of LINPACK, hyperbolic rotation, and fast negatively
+ weighted plane rotation downdating. Algorithms are presented which
+ apply self-scaling fast plane rotations to the QR factorization for
+ stiff least squares problems. Both fast and standard Givens
+ rotation-based algorithms are shown to produce accurate results
+ regardless of row sorting even with extremely heavily row weighted
+ matrices. Such matrices emanate, e.g. from equality constrainted least
+ squares problems solved via the weighting method. The necessity of
+ column sorting is emphasized. Numerical tests expose the Householder
+ QR factorization algorithm to be sensitive to row sorting and it
+ yields less accurate results for greater weights. Additionally, the
+ modified Gram-Schmidt algorithm is shown to be sensitive to row
+ sorting to a notably significant but lesser degree. Self-Scaling fast
+ plane rotation algorithms, having competitive computational
+ complexities, must therefore be the method of choice for the QR
+ factorization of stiff matrices. Timing reults on the Cray 2 [XY]M/P,
+ and C90, of rotations both in and out of a matrix factorization
+ context are presented. Architectural features that can best exploit
+ the advantagous features of the new fast rotations are subsequently
+ discussed."
+}
+
+@misc{Baud12,
+ author = "Baudin, Michael and Smith, Robert L.",
+ title = "A Robust Complex Division in Scilab",
+ url = "http://arxiv.org/pdf/1210.4539.pdf",
+ year = "2012",
+ month = "October",
+ paper = "Baud12.pdf",
+ abstract =
+ "The most widely used algorithm for floating point complex division,
+ known as Smith's method, may fail more often than expected. This
+ document presents two improved complex division algorithms. We present
+ a proof of robustness of the first improved algorithm. Numerical
+ simulations show that this algorithm performs well in practice and is
+ significantly more robust than other known implementations. By
+ combining additional scaling methods with this first algorithm, we
+ were able to create a second algorithm, which rarely fails."
+}
+
+@article{Demm90,
+ author = "Demmel, James and Kahan, W.",
+ title = "Accurate Singular Values of Bidiagonal Matrices",
+ journal = "SIAM J. Sci. Stat. Comput.",
+ volume = "11",
+ number = "5",
+ pages = "873-912",
+ year = "1990",
+ url = "http://www.netlib.org/lapack/lawnspdf/lawn03.pdf",
+ paper = "Demm90.pdf",
+ abstract =
+ "Computing the singular values of a bidiagonal matrix is the final
+ phase of the standard algorithm for the singular value decomposition
+ of a general matrix. We present a new algorithm which computes all the
+ singular values of a bidiagonal matrix to high relative accuracy
+ independent of their magnitudes. In contrast, the standard algorithm
+ for bidiagonal matrices may compute small singular values with no
+ relative accuracty at all. Numerical experiments show that the new
+ algorithm is comparable in speed to the standard algorithm, and
+ frequently faster."
+}
+
+@article{Demm92,
+ author = "Demmel, James and Veselic, Kresimir",
+ title = "Jacobi's Method is More Accurate than QR",
+ journal = "SIAM J. Matrix Anal. and Appl",
+ volume = "13",
+ number = "4",
+ pages = "1204-1245",
+ url = "http://www.netlib.org/lapack/lawnspdf/lawn15.pdf",
+ paper = "Demm92.pdf",
+ abstract =
+ "We show that Jacobi's method (with a proper stopping criterion)
+ computes small eigenvalues of symmetric positive definite matrices
+ with a uniformly better relative accuracy bound than QR, divide and
+ conquer, traditional bisection, or any algorithm which first involves
+ tridiagonalizing the matrix. In fact, modulo an assumption based on
+ extensive numerical tests, we show that Jacobi's method is optimally
+ accurate in the following sense: if the matrix is such that small
+ relative errors in its entries cause small relative errors in its
+ eigenvalues, Jacobi will compute them with nearly this accuracy. In
+ other words, as long as the initial matrix has small relative errors
+ in each component, even using infinite precision will not improve on
+ Jacobi (modulo factors of dimensionality). We also show the
+ eigenvectors are computed more accurately by Jacobi than previously
+ thought possible. We prove similar results for using one-sided Jacobi
+ for the singular value decomposition of a general matrix."
+}
+
+@article{Rijk98,
+ author = "de Rijk, P.P.",
+ title = "A One-sided Jacobi Algorithm for Computing the Singular Value
+ Decomposition on a vector computer",
+ journal = "SIAM J. Sci. Stat. Comput.",
+ volume = "10",
+ number = "2",
+ month = "March",
+ year = "1989",
+ pages = "359-371",
+ abstract =
+ "An old algorithm for computing the singular value decomposition,
+ which was first mentioned by Hestenes, has gained renewed interest
+ because of its properties of parallelism and vectorizability. Some
+ computational modifications are given and a comparison with the
+ well-known Golub-Reinsch algorithm is made. Comparative experiments on
+ a CYBER 205 are reported."
+}
+
+@article{Drma97,
+ author = "Drmac, Zlatko",
+ title = "Implementation of Jacobi Rotations for Accurate Singular Value
+ Computation in Floating Point Arithmetic",
+ journal = "SIAM Journal on Scientific Computing",
+ volume = "18",
+ number = "4",
+ year = "1997",
+ month = "July",
+ pages = "1200-1222",
+ abstract =
+ "In this paper we consider how to compute the singular value
+ decomposition (SVD) $A = U\Sigma{}T^{\tau}$ of
+ $A=[a_1,a_2] \in R^{m\times 2}$
+ accurately in floating point arithmetic. It is shown
+ how to compute the Jacobi rotation $V$ (the right singular vector
+ matrix) and how to compute $AV=U\Sigma$ even if the floating point
+ representation of $V$ is the identity matrix. In the case
+ $\ns{a_1}\gg\ns{a_2}$, underflow can produce the identity matrix as
+ the floating point value of $V$ even for $a1$, $a2$ that are far from
+ being mutually orthogonal. This can cause loss of accuracy and failure
+ of convergence of the floating point implementation of the Jacobi
+ method for computing the SVD. The modified Jacobi method recommended
+ in this paper can be implemented as a reliable and highly accurate
+ procedure for computing the SVD of general real matrices whenever the
+ exact singular values do not exceed the underflow or overflow limits."
+}
+
+@article{Drma08a,
+ author = "Drmac, Zlatko and Veselic, Kresimir",
+ title = "New fast and accurate Jacobi SVD algorithm I",
+ journal = "SIAM J. Matrix Anal. Appl.",
+ volume = "35",
+ number = "2",
+ year = "2008",
+ pages = "1322-1342",
+ comment = "LAPACK Working note 169",
+ url = "http://www.netlib.org/lapack/lawnspdf/lawn169.pdf",
+ paper = "Drma08a.pdf",
+ abstract =
+ "This paper is the result of contrived efforts to break the barrier
+ between numerical accuracy and run time efficiency in computing the
+ fundamental decomposition of numerical linear algebra - the singular
+ value decomposition (SVD) of a general dense matrix. It is an
+ unfortunate fact that the numerically most accurate one-sided Jacobi
+ SVD algorithm is several times slower than generally less accurate
+ bidiagonalization based methods such as the QR or the divide and
+ conquer algorithm. Despite its sound numerical qualities, the Jacobi
+ SVD is not included in the state of the art matrix computation
+ libraries and it is even considered obsolete by some leading
+ researchers. Our quest for a highly accurate and efficient SVD
+ algorithm has led us to a new, superior variant of the Jacobi
+ algorithm. The new algorithm has inherited all good high accuracy
+ properties, and it outperforms not only the best implementations of
+ the one-sided Jacobi algorithm but also the QR algorithm. Moreoever,
+ it seems that the potential of the new approach is yet to be fully
+ exploited."
+}
+
+@article{Drma08b,
+ author = "Drmac, Zlatko and Veselic, Kresimir",
+ title = "New fast and accurate Jacobi SVD algorithm II",
+ journal = "SIAM J. Matrix Anal. Appl.",
+ volume = "35",
+ number = "2",
+ year = "2008",
+ pages = "1343-1362",
+ comment = "LAPACK Working note 170",
+ url = "http://www.netlib.org/lapack/lawnspdf/lawn170.pdf",
+ paper = "Drma08b.pdf",
+ abstract =
+ "This paper presents new implementation of one-sided Jacobi SVD for
+ triangular matrices and its use as the core routine in a new
+ preconditioned Jacobi SVD algorithm, recently proposed by the
+ authors. New pivot strategy exploits the triangular form and uses the
+ fact that the input triangular matrix is the result of rank revealing
+ QR factorization. If used in the preconditioned Jacobi SVD algorithm,
+ described in the first part of this report, it delivers superior
+ performance leading to the currently fastest method for computing SVD
+ decomposition with high relative accuracy. Furthermore, the efficiency
+ of the new algorithm is comparable to the less accurate
+ bidiagonalization based methods. The paper also discusses underflow
+ issues in floating point implementation, and shows how to use
+ perturbation theory to fix the imperfectness of machine arithmetic on
+ some systems."
+}
+
+@article{Drma08c,
+ author = "Drmac, Zlatko and Bujanovic, Zvonimir",
+ title = "On the failure of rank-revealing QR factorization software -
+ a case study.",
+ journal = "ACM Trans. math. Softw.",
+ volume = "35",
+ number = "2",
+ year = "2008",
+ pages = "1-28",
+ comment = "LAPACK Working note 176",
+ url = "http://www.netlib.org/lapack/lawnspdf/lawn176.pdf",
+ paper = "Drma08c.pdf",
+ abstract =
+ "This note reports an unexpected and rather erratic behavior of the
+ LAPACK software implementation of the QR factorization with
+ Businger-Golub column pivoting. It is shown that, due to finite
+ precision arithmetic, software implementation of the factorization can
+ catastrophically fail to produce triangular factor with the structure
+ characteristic to the Businger-Golub pivot strategy. The failure of
+ current {\sl state of the art} software, and a proposed alternative
+ implementations are analyzed in detail."
+}
+
+@phdthesis{Dhil97,
+ author = "Dhillon, Inderjit Singh",
+ title = "A New $O(n^2)$ Algorithm for the Symmetric Tridiagonal
+ Eigenvalue/Eigenvector Problem",
+ school = "University of California, Berkeley",
+ year = "1997",
+ url = "http://www.eecs.berkeley.edu/Pubs/TechRpts/1997/CSD-97-971.pdf",
+ paper = "Dhil97.pdf",
+ abstract =
+ "Computing the eigenvalues and orthogonal eigenvectors of an $n\times n$
+ symmetric tridiagonal matrix is an important task that arises while
+ solving any symmetric eigenproblem. All practical software requires
+ $O(n^3)$ time to compute all the eigenvectors and ensure their
+ orthogonality when eigenvalues are close. In the first part of this
+ thesis we review earlier work and show how some existing
+ implementations of inverse iteration can fail in surprising ways.
+
+ The main contribution of this thesis is a new $O(n^2)$, easily
+ parallelizable algorithm for solving the tridiagonal
+ eigenproblem. Three main advances lead to our new algorithm. A
+ tridiagonal matrix is traditionally represented by its diagonal and
+ off-diagonal elements. Our most important advance is in recognizing
+ that its bidiagonal factors are ``better'' for computational
+ purposes. The use of bidiagonals enables us to invoke a relative
+ criterion to judge when eigenvalues are ``close''. The second advance
+ comes with using multiple bidiagonal factorizations in order to
+ compute different eigenvectors independently of each other. Thirdly,
+ we use carefully chosen dqds-like transformations as inner loops to
+ compute eigenpairs that are highly accurate and ``faithful'' to the
+ various bidiagonal representations. Orthogonality of the eigenvectors
+ is a consequence of this accuracy. Only $O(n)$ work per eigenpair is
+ neede by our new algorithm.
+
+ Conventional wisdom is that there is usually a trade-off between speed
+ and accuracy in numerical procedures, i.e., higher accuracy can be
+ achieved only at the expense of greater computing time. An interesting
+ aspect of our work is that increased accuracy in the eigenvalues and
+ eigenvectors obviates the need for explicit orthogonalization and
+ leads to greater speed.
+
+ We present timing and accuracy results comparing a computer
+ implementation of our new algorithm with four existing EISPACK and
+ LAPACK software routines. Our test-bed contains a variety of
+ tridiagonal matrices, some coming from quantum chemistry
+ applications. The numerical results demonstrate the superiority of
+ our new algorithm. For example, on a matrix of order 966 that occurs in
+ the modeling of a biphenyl molecule our method is about 10 times
+ faster than LAPACK's inverse iteration on a serial IBM RS/6000
+ processor and nearly 100 times faster on a 128 processor IBM SP2
+ parallel machine."
+}
+
+@article{Dhil04a,
+ author = "Dhillon, Inderjit S. and Parlett, Beresford N.",
+ title = "Multiple representations to compute orthogonal eigenvectors
+ of symmetric tridiagonal matrices",
+ journal = "Linear Algebra and its Applications",
+ volume = "387",
+ number ="1",
+ pages = "1-28",
+ year = "2004",
+ month = "August",
+ paper = "Dhil04a.pdf",
+ abstract =
+ "In this paper we present an $O(nk)$ procedure, Algorithm $MR^3$, for
+ computing $k$ eigenvectors of an $n\times n$ symmetric tridiagonal
+ matrix $T$. A salient feature of the algorithm is that a number of
+ different $LDL^t$ products ($L$ unit lower triangular, $D$ diagonal)
+ are computed. In exact arithmetic each $LDL^t$ is a factorization of a
+ translate of $T$. We call the various $LDL^t$ productions
+ {\sl representations} (of $T$) and, roughly speaking, there is a
+ representation for each cluster of close eigenvalues. The unfolding of
+ the algorithm, for each matrix, is well described by a
+ {\sl representation tree}. We present the tree and use it to show that if
+ each representation satisfies three prescribed conditions then the
+ computed eigenvectors are orthogonal to working accuracy and have
+ small residual norms with respect to the original matrix $T$."
+}
+
+@article{Dhil04,
+ author = "Dhillon, Inderjit S. and Parlett, Beresford N.",
+ title = "Orthogonal Eigenvectors and Relative Gaps",
+ journal = "SIAM Journal on Matrix Analysis and Applications",
+ volume = "25",
+ year = "2004",
+ paper = "Dhil04.pdf",
+ abstract =
+ "Let $LDL^t$ be the triangular factorization of a real symmetric
+ $n\times n$ tridiagonal matrix so that $L$ is a unit lower bidiagonal
+ matrix, $D$ is diagonal. Let $(\lambda,\nu)$ be an eigenpair,
+ $\lambda \ne 0$, with the property that both $\lambda$ and $\nu$ are
+ determined to high relative accuracy by the parameters in $L$ and $D$.
+ Suppose also that the relative gap between $\lambda$ and its nearest
+ neighbor $\mu$ in the spectrum exceeds $1/n; n|\lambda-\mu| > |\lambda|$.
+
+ This paper presents a new $O(n)$ algorithm and a proof that, in the
+ presence of round-off errors, the algorithm computes an approximate
+ eigenvector $\hat{\nu}$ that is accurate to working precision
+ $|sin \angle(\nu,\hat{\nu})| = O(n\epsilon)$, where $\epsilon$ is the
+ round-off unit. It follows that $\hat{\nu}$ is numerically orthogonal to
+ all the other eigenvectors. This result forms part of a program to
+ compute numerically orthogonal eigenvectors without resorting to the
+ Gram-Schmidt process.
+
+ The contents of this paper provide a high-level description and
+ theoretical justification for LAPACK (version 3.0) subroutine DLAR1V."
+}
+
+@article{Gent74,
+ author = "Gentlman, W. Morven and Marovich, Scott B.",
+ title = "More on algorithms that reveal properties of floating point
+ arithmetic units.",
+ journal = "Comm. of the ACM",
+ year = "1974",
+ month = "April",
+ volume = "17",
+ number = "5",
+ pages = "276-277",
+ abstract =
+ "In the interests of producing portable mathematical software, it is
+ highly desirable for a program to be able directly to obtain
+ fundamental properties of the environment in which it is to run. The
+ installer would then not be obliged to change appropriate magic
+ constants in the source code, and the user would not have to provide
+ information he may very well not understand. Until the standard
+ definitions of programming languages are changed to require builtin
+ functions that provide this information, we will have to resort to
+ writing routines that discover it."
+}
+
+@article{High86,
+ author = "Higham, Nicholas J.",
+ title = "Efficient Algorithms for Computing the Condition Number of a
+ Tridiagonal Matrix",
+ journal = "SIAM J. Sci. Stat. Comput.",
+ volume = "7",
+ number = "1",
+ year = "1986",
+ month = "January",
+ paper = "High86.pdf",
+ abstract =
+ "Let $A$ be a tridiagonal matrix of order $n$. We show that it is
+ possible to compute $||A^{-1}||_{\infty}$ and hence
+ ${\rm cond}_{\infty}(A)$ in $O(n)$ operations. Several algorithms
+ which perform this task are given and their numerical properties are
+ investigated.
+
+ If $A$ is also positive definite then $||A^{-1}||_{\infty}$ can be
+ computed as the norm of the solution to a positive definite
+ tridiagonal linear system whose coefficient matrix is closely related
+ to $A$. We show how this computation can be carried out in parallel
+ with the solution of a linear system $Ax=b$. In particular we describe
+ some simple modifications to the LINPACK routine SPTSL which enable
+ this routine to compute ${\rm cond}_1(A)$, efficiently, in addtion to
+ solving $Ax=b$."
+}
+
+@article{Kags89,
+ author = "Kagstrom, Bo and Westin, L.",
+ title = "Generalized Schur Methods with Condition Estimators for Solving
+ the Generalized Sylvester Equation",
+ journal = "IEEE Transactions on Automatic Control",
+ volume = "34",
+ number = "7",
+ year = "1989",
+ month = "July",
+ pages = "745-751",
+ abstract =
+ "Stable algorithms are presented for solving the generalized Sylvester
+ equation. They are based on orthogonal equivalence transformations of
+ the original problem. Perturbation theory and rounding error analysis
+ are included. Condition estimators (${\rm Dif}^{-1}$-estimators) are
+ developed which when substituted into derived error bounds give
+ accuracy estimates of a computed solution. Results from numerical
+ experiments on well-conditioned and ill-conditioned problems are
+ reported."
+}
+
+@book{Kags93,
+ author = "Kagstrom, B.",
+ title = "A Direct Method for Reordering Eigenvalues in the
+ Generalized Real Schur Form of a Regular Matrix Pair (A, B)",
+ year = "1993",
+ pages = "195-218",
+ booktitle = "Linear Algebra for Large Scale and Real-Time Applications",
+ volume = "232",
+ publisher = "NATO ASI Series",
+ isbn = "978-90-481-4246-0",
+ abstract =
+ "A direct orthogonal equivalence transformation method for reordering
+ the eigenvalues along the diagonal in the generalized real Schur form
+ of a regular matrix pair $(A,B)$ is presented. Each swap of two
+ adjacent eigenvalues (real, or complex conjugate pairs) involves
+ solving a generalized Sylvester equation and the construction of two
+ orthogonal transformation matrices from certain eigenspaces associated
+ with the corresponding diagonal blocks. An error analysis of the
+ direct reordering method is presented. Results from numerical
+ experiments on well-conditionsed as well as ill-conditioned problems
+ illustrate the stability and the accuracy of the method. Finally, a
+ direct reordering algorithm with controlled backward error is described."
+}
+
+@article{Kags93,
+ author = "Kagstrom, Bo and Poromaa, Peter",
+ title = "LAPACK-Style Algorithms and Software for Solving the Generalized
+ Sylvester Equation and Estimating the Separation between
+ Regular Matrix Pairs",
+ year = "1993",
+ month = "December",
+ url = "http://www.netlib.org/lapack/lawnspdf/lawn75.pdf",
+ paper = "Kags93.pdf",
+ comment = "LAPACK Working Note 75",
+ abstract =
+ "Level 3 algorithms for solving the generalized Sylvester equation
+ $(AR-LB,DR-LE)=(C,F)$ and the transposed analogue
+ $(A^TU+D^TV,-UB^T-VE^T)=(C,F)$ are presented. These blocked algorithms
+ permit reuse of data in complex memory hierarchies of current advanced
+ computer architectures. The separation of two regular matrix pairs
+ $(A,D)$ and $(B,E)$, Dif[$(A,D)$,$(B,E)$], is defined in terms of the
+ generalized Sylvester operator $(AR-LB,DR-LE)$. Robust, efficient and
+ reliable Dif-estimators are presented. The basic problem is to find a
+ lower bound on Dif$^{-1}$, which can be done by solving generalized
+ Sylvester equations in triangular form. Frobenius norm-based and one
+ norm based Dif estimators are described and evaluated. These estimates
+ lead to computable error bounds for the generalized Sylvester
+ equation. The one-norm-based estimator makes the condition estimation
+ uniform with LAPACK. Fortran 77 software that implements our
+ algorithms for solving generalized Sylvester equations, and for
+ computing error bounds and Dif-estimators are presented. Computational
+ experiments that illustrate the accuracy, efficiency and reliability
+ of our software are also described."
+}
+
+@article{Kags94,
+ author = "Kagstrom, Bo and Poromaa, Peter",
+ title = "Computing Eigenspaces with Specified Eigenvalues of a Regular
+ Matrix Pair (A, B) and Condition Estimation: Theory,
+ Algorithms and Software",
+ year = "1994",
+ month = "April",
+ url = "http://www.netlib.org/lapack/lawns/lawn87.ps",
+ paper = "Kags94.pdf",
+ abstract =
+ "Theory, algorithms and LAPACK style software for computing a pair of
+ deflating subspaces with specified eigenvalues of a regular matrix
+ pair $(A,B)$ and error bounds for computed quantities (eigenvalues and
+ eigenspaces) are presented. The {\sl reordering} of specified
+ eigenvalues is performed with a direct orthogonal transformation
+ method with guaranteed numerical stability. Each swap of two adjacent
+ diagonal blocks in the real generalized Schur form, where at least one
+ of them corresponds to a complex conjugate pair of eigenvalues,
+ involves solving a generalized Sylvester equation and the construction
+ of two orthogonal transformation matrices from certain eigenspaces
+ associated with the diagonal blocks. The swapping of two $1\cross 1$
+ blocks is performed using orthogonal (unitary) Givens rotations. The
+ {\sl error bounds} are based on estimates of condition numbers for
+ eigenvalues and eigenspaces. The software computes reciprocal values
+ of a condition number for an individual eigenvalue (or a cluster of
+ eigenvalues), a condition number for an eigenvector (or eigenspace),
+ and spectral projectors onto a selected cluster. By computing
+ reciprocal values we avoid overflow. Changes in eigenvectors and
+ eigenspaces are measured by their change in angle. The condition
+ numbers yield both {\sl asymptotic} and {\sl global} error bounds. The
+ asymptotic bounds are only accurate for small perturbations $(E,F)$ of
+ $(A,B)$, while the global bounds work for all $||(E,F)||$ up to a
+ certain bound, whose size is determined by the conditioning of the
+ problem. It is also shown how these upper bounds can be
+ estimated. Fortran 77 {\sl software} that implements our algorithms
+ for reordering eigenvalues, computing (left and right) deflating
+ subspaces with specified eigenvalues and condition number estimation
+ are presented. Computational experiments that illustrate the accuracy,
+ efficiency and reliability of our software are also described."
+}
+
+@article{Kags94b,
+ author = "Kagstrom, Bo",
+ title = "A Perturbation Analysis of the Generalized Sylvester Equation
+ $(AR-LB,DR-LE)=(C,F)$",
+ journal = "SIAM J. Matrix Anal. and Appl.",
+ volume = "15",
+ number = "4",
+ pages = "1045-1060",
+ year = "1994",
+ abstract =
+ "Perturbation and error bounds for the generalized Sylvester equation
+ $(AR-LB,DR-LE)=(C,F)$ are presented. An explicit expression for the
+ normwise relative backward error associated with an approximate
+ solution of the generalized Sylvester equation is derived and
+ conditions when it can be much greater than the relative residual are
+ given. This analysis is applicable to any method that solves the
+ generalized Sylvester equation. A condition number that reflects the
+ structure of the problem and a normwise forward error bound based on
+ ${\rm Dif}^{-1}[(A,D),B,E)]$ and the residual are derived. The
+ structure-preserving condition number can be arbitrarily smaller than
+ a ${\rm Dif}^{-1}$-based condition number. The normwise error bound
+ can be evaluated robustly and at moderate cost by using a reliable
+ ${\rm Dif}^{-1}$ estimator. A componentwise LAPACK-style forward error
+ bound that can be stronger than the normwise error bound is
+ presented. A componentwise approximate error bound that can be
+ evaluated to a much lower cost is also proposed. Finally, some
+ computational experiments that validate and evaluate the perturbation
+ and error bounds are presented."
+}
+
+@techreport{Kaha66,
+ author = "Kahan, W.",
+ title = "Accurate Eigenvalues of a Symmetric Tri-Diagonal Matrix",
+ year = "1966",
+ month = "July",
+ institution = "Stanford University",
+ type = "Technical Report",
+ number = "CS41",
+ paper = "Kaha66.pdf",
+ abstract =
+ "Having established tight bounds for the quotient of two different
+ lub-norms of the same tri-diagonal matrix J, the author observes that
+ these bounds could be of use in an error-analysis provided a suitable
+ algorithm were found. Such an algorithm is exhibited, and its errors
+ are thoroughly accounted for, including the effects of scaling,
+ over/underflow and roundoff. A typical result is that, on a computer
+ using rounded floating point binary arithmetic, the biggest eigenvalue
+ of J can be computed easily to within 2.5 units in its last place, and
+ the smaller eigenvalues will suffer absolute errors which are no
+ larger. These results are somewhat stronger than had been known before."
+}
+
+@article{Livn04,
+ author = "Livne, Oren E. and Golub, Gene H.",
+ title = "Scaling by Binormalization",
+ journal = "Numerical Algorithms",
+ volume = "35",
+ number = "1",
+ pages = "97-120",
+ year = "2004",
+ month = "January",
+ paper = "Livn04.pdf",
+ abstract =
+ "We present an interative algorithm (BIN) for scaling all the rows and
+ columns of a real symmetric matrix to unit 2-norm. We study the
+ theoretical convergence properties and its relation to optimal
+ conditioning. Numerical experiments show that BIN requires 2-4
+ matrix-vector multiplications to obtain an adequate scaling, and in
+ many cases significantly reduces the condition number, more than other
+ scaling algorithms. We present generalizations to complex,
+ non-symmetric and rectangular matrices."
+}
+
+@article{Malc72,
+ author = "Malcolm, Michael A.",
+ title = "Algorithms to reveal properties of floating-point arithmetic",
+ journal = "Comms of the ACM",
+ volume = "15",
+ year = "1972",
+ pages = "949-951",
+ url = "http://www.dtic.mil/dtic/tr/fulltext/u2/727104.pdf",
+ paper = "Malc72.pdf",
+ abstract =
+ "Two algorithms are presented in the form of Fortran subroutines. Each
+ subroutine computes the radix and number of digits of the floating
+ point numbers and whether rounding or chopping is done by the machine
+ on which it is run. The methods are shown to work on any ``reasonable''
+ floating-point computer."
+}
+
+@article{Marq06,
+ author = "Marques, Osni A. and Reidy, Jason and Vomel, Christof",
+ title = "Benefits of IEEE-754 Features in Modern Symmetric Tridiagonal
+ Eigensolvers",
+ journal = "SIAM Journal on Scientific Computing",
+ volume = "28",
+ number = "5",
+ year = "2006",
+ url = "http://www.netlib.org/lapack/lawnspdf/lawn172.pdf",
+ paper = "Marq06.pdf",
+ abstract =
+ "Bisection is one of the most common methods used to compute the
+ eigenvalues of symmetric tridiagonal matrices. Bisection relies on the
+ {\sl Sturm count}: for a given shift $\sigma$, the number of negative
+ pivots in the factorization $T-\sigma{}I=LDL^T$ equals the number of
+ eigenvalues of $T$ that are smaller than $\sigma$. In IEEE-754
+ arithmetic, the value $\infty$ permits the computation to continue
+ past a zero pivot, producing a correct Sturm count when $T$ is
+ unreduced. Demmel and Li showed in the 90s that using $\infty$ rather
+ than testing for zero pivots within the loop could improve performance
+ significantly on certain architectures.
+
+ When eigenvalues are to be computed to high relative accuracy, it is
+ often preferable to work with $LDL^T$ factorizations instead of the
+ original tridiagonal $T$, see for example the MRRR algorithm. In these
+ cases, the Sturm count has to be computed from $LDL^T$. The
+ differential stationary and progressive qds algorithms are the method
+ of choice.
+
+ While it seems trivial to replace $T$ by $LDL^T$, in reality these
+ algorithms are more complicated: in IEEE-754 arithmetic, a zero pivot
+ produces an overflow, followed by an invalid exception (NaN), that
+ renders the Sturm count incorrect.
+
+ We present alternative, safe formulations that are guaranteed to
+ produce the correct result.
+
+ Benchmarking these algorithms on a variety of platforms shows that the
+ original formulation without tests is always faster provided no
+ exception occurs. The transforms see speed-ups of up to 2.6x over the
+ careful formulation.
+
+ Tests on industrial matrices show that encountering exceptions in
+ practice is rare. This leads to the following design: First, compute
+ the Sturm count by the fast but unsafe algorithm. Then, if an
+ exception occured, recompute the count by a safe, slower alternative.
+
+ The new Sturm count algorithms improve the speed of bisection by up to
+ 2x on our test matrices. Furthermore, unlike the traditional
+ tiny-pivot substitutions, proper use of IEEE-754 features provides a
+ careful formulation that imposed no input range restrictions."
+}
+
+@book{Moon93,
+ author = "Moonen, Marc S. and Golub, Gene H. and De Moor, Bart L.R.",
+ title = "Linear Algebra and Large Scale and Real-Time Applications",
+ year = "1993",
+ publisher = "NATO ASI Series",
+ isbn = "978-904814246-0"
+}
+
+@article{Ward81,
+ author = "Ward, Robert C.",
+ title = "Balancing the generalized eigenvalue problem",
+ journal = "SIAM J. Sci. and Stat. Comput.",
+ volume = "2",
+ number = "2",
+ pages = "141-152",
+ abstract =
+ "An algorithm is presented for balancing the $A$ and $B$ matirces
+ prior to computing the eigensystem of the generalized eigenvalue
+ problem $Ax=\lambda Bx$. The three-step algorithm is specifically
+ designed to preceed the $QZ$-type algorithms, but improved performance
+ is expected from most eigensystem solvers. Permutations and two-sided
+ diagonal transformations are applied to $A$ and $B$ to produce
+ matrices with certain desirable properties. Test cases are presented
+ to illustrate the improved accuracy of the computed eigenvalues."
+}
diff --git a/src/axiom-website/patches.html b/src/axiom-website/patches.html
index ecb00cf..086185c 100644
--- a/src/axiom-website/patches.html
+++ b/src/axiom-website/patches.html
@@ -5274,6 +5274,8 @@ books/bookvol10.5 add BLAS and LAPACK code

books/bookvol4 use bookheader.tex

20160405.01.tpd.patch
books/bookvol* fix table of contents data

+20160405.02.tpd.patch
+books/bookvolbib add reference material for LAPACK

--
1.7.5.4