From c2607211879a2670d8f82d650f4d90cbf65f9a37 Mon Sep 17 00:00:00 2001
From: Tim Daly
Date: Tue, 12 Apr 2016 19:45:08 0400
Subject: [PATCH] books/bookvolbib add Pete12, The Matrix Cookbook
Goal: Axiom Numerics
@misc{Pete12,
author = "Petersen, Kaare Brandt and Pedersen, Michael Syskind",
title = "The Matrix Cookbook",
url =
"http://www2.imm.dtu.dk/pubdb/views/edoc\_download.php/3274/pdf/imm3274.pdf",
year = "2012",
month = "November"
}

books/bookvolbib.pamphlet  127 ++++++++++++++++++++++++++
changelog  2 +
patch  37 +++
src/axiomwebsite/patches.html  2 +
4 files changed, 95 insertions(+), 73 deletions()
diff git a/books/bookvolbib.pamphlet b/books/bookvolbib.pamphlet
index 5c26564..1317856 100644
 a/books/bookvolbib.pamphlet
+++ b/books/bookvolbib.pamphlet
@@ 1880,6 +1880,50 @@ when shown in factored form.
\section{Numerical Algorithms} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\index{Ahrens, Peter}
+\index{Nguyen, Hong Diep}
+\index{Demmel, James}
+\begin{chunk}{axiom.bib}
+@techreport{Ahre15,
+ author = "Ahrens, Peter and Nguyen, Hong Diep and Demmel, James",
+ title = "Efficient Reproducible Floating Point Summation and BLAS",
+ institution = "University of California, Berkeley",
+ year = "2015",
+ month = "December",
+ type = "technical report",
+ number = "229",
+ paper = "Ahre15.pdf",
+ url = "http://www.eecs.berkeley.edu/Pubs/TechRpts/2015/EECS2015229.pdf",
+ abstract =
+ "We define reproducibility to mean getting bitwise identical results
+ from multiple runs of the same program, perhaps with different
+ hardware resources or other changes that should ideally not change the
+ answer. Many users depend on reproducibility for debugging or
+ correctness. However, dynamic scheduling of parallel computing
+ resources, combined with nonassociativity of floating point addition,
+ makes attaining reproducibility a challenge even for simple operations
+ like summing a vector of numbers, or more complicated operations like
+ Basic Linear Algebra Subprograms (BLAS). We describe an algorithm that
+ computes a reproducible sum of floating point numbers independent of
+ the order of summation. The algorithm depends only on a subset of the
+ IEEE Floating Point Standard 7542008. It is communicationoptimal, in
+ the sense that it does just one pass over the data in the sequential
+ case, or one reduction operation in the parallel case, requiring an
+ ``accumulator'' represented by just 6 floating point words (more can
+ be used if higher precision is desired). Th arithmetic code with a
+ 6word accumulator is $7n$ floating point additions to sum $n$ words,
+ and (in IEEEE double precision) the final error bound can be up to
+ $10^8$ times smaller than the error bound for conventional
+ summation. We describe the basic summation algorithm, the software
+ infrastructure used to build reproducible BLAS (ReproBLAS), and
+ performance results. For example, when computing the dot product of
+ 4096 double precision floating point numbers, we get a $4x$ slowdown
+ compared to Intel Math Kernel Library (MKL) running on an Intel Core
+ i72600 CPU operating at 3.4 GHz and 256 KB L2 Cache."
+}
+
+\end{chunk}
+
\index{Anda, A.A.}
\index{Park,H.}
\begin{chunk}{axiom.bib}
@@ 1970,6 +2014,31 @@ when shown in factored form.
\end{chunk}
+\index{Boisvert, Ronald F.}
+\index{Pozo, Roldan}
+\index{Remington, Karin A.}
+\begin{chunk}{axiom.bib}
+@techreport{MMEF96,
+ author = "Boisvert, Ronald F. and Pozo, Roldan and Remington, Karin A.",
+ title = "The Matrix Market Exchange Formats: Initial Design",
+ year = "1996",
+ month = "December",
+ institution = "National Institute of Standards and Technology",
+ type = "Technical Report",
+ url = "http://math.nist.gov/MatrixMarket/reports/MMformat.ps",
+ paper = "MMEF96.pdf",
+ abstract =
+ "We propose elementary ASCII exchange formats for matrices. Specific
+ instances of the format are defined for dense and sparse matrices with
+ real, complex, integer and pattern entries, with special cases for
+ symmetric, skewsymmetric and Hermitian matrices. Sparse matrices are
+ represented in a coordinate storage format. The overall file structure
+ is designed to allow future definition of other specialized matrix
+ formats, as well as for objects other than matrices."
+}
+
+\end{chunk}
+
\index{Bronstein, Manuel}
\begin{chunk}{ignore}
{Bro99,
@@ 2102,50 +2171,6 @@ when shown in factored form.
\end{chunk}
\index{Ahrens, Peter}
\index{Nguyen, Hong Diep}
\index{Demmel, James}
\begin{chunk}{axiom.bib}
@techreport{Ahre15,
 author = "Ahrens, Peter and Nguyen, Hong Diep and Demmel, James",
 title = "Efficient Reproducible Floating Point Summation and BLAS",
 institution = "University of California, Berkeley",
 year = "2015",
 month = "December",
 type = "technical report",
 number = "229",
 paper = "Ahre15.pdf",
 url = "http://www.eecs.berkeley.edu/Pubs/TechRpts/2015/EECS2015229.pdf",
 abstract =
 "We define reproducibility to mean getting bitwise identical results
 from multiple runs of the same program, perhaps with different
 hardware resources or other changes that should ideally not change the
 answer. Many users depend on reproducibility for debugging or
 correctness. However, dynamic scheduling of parallel computing
 resources, combined with nonassociativity of floating point addition,
 makes attaining reproducibility a challenge even for simple operations
 like summing a vector of numbers, or more complicated operations like
 Basic Linear Algebra Subprograms (BLAS). We describe an algorithm that
 computes a reproducible sum of floating point numbers independent of
 the order of summation. The algorithm depends only on a subset of the
 IEEE Floating Point Standard 7542008. It is communicationoptimal, in
 the sense that it does just one pass over the data in the sequential
 case, or one reduction operation in the parallel case, requiring an
 ``accumulator'' represented by just 6 floating point words (more can
 be used if higher precision is desired). Th arithmetic code with a
 6word accumulator is $7n$ floating point additions to sum $n$ words,
 and (in IEEEE double precision) the final error bound can be up to
 $10^8$ times smaller than the error bound for conventional
 summation. We describe the basic summation algorithm, the software
 infrastructure used to build reproducible BLAS (ReproBLAS), and
 performance results. For example, when computing the dot product of
 4096 double precision floating point numbers, we get a $4x$ slowdown
 compared to Intel Math Kernel Library (MKL) running on an Intel Core
 i72600 CPU operating at 3.4 GHz and 256 KB L2 Cache."
}

\end{chunk}

\index{Davis, Timothy A.}
\index{Hu, Yifan}
\begin{chunk}{axiom.bib}
@@ 3214,6 +3239,20 @@ when shown in factored form.
\end{chunk}
+\index{Petersen, Kaare Brandt}
+\index{Pedersen, Michael Syskind}
+\begin{chunk}{axiom.bib}
+@misc{Pete12,
+ author = "Petersen, Kaare Brandt and Pedersen, Michael Syskind",
+ title = "The Matrix Cookbook",
+ url =
+ "http://www2.imm.dtu.dk/pubdb/views/edoc_download.php/3274/pdf/imm3274.pdf",
+ year = "2012",
+ month = "November"
+}
+
+\end{chunk}
+
\index{Sutton, Brian D.}
\begin{chunk}{axiom.bib}
@article{Sutt13,
diff git a/changelog b/changelog
index eebc64e..6210522 100644
 a/changelog
+++ b/changelog
@@ 1,3 +1,5 @@
+20160412 tpd src/axiomwebsite/patches.html 20160412.01.tpd.patch
+20160412 tpd books/bookvolbib add Pete12, The Matrix Cookbook
20160406 tpd src/axiomwebsite/patches.html 20160406.04.tpd.patch
20160406 tpd books/bookvolbib add Davi16, Survey of Direct Sparse Methods
20160406 tpd src/axiomwebsite/patches.html 20160406.03.tpd.patch
diff git a/patch b/patch
index 3e0b2eb..6c585a2 100644
 a/patch
+++ b/patch
@@ 1,33 +1,12 @@
books/bookvolbib add Davi16, Survey of Direct Sparse Methods
+books/bookvolbib add Pete12, The Matrix Cookbook
Goal: Axiom Numerics
@techreport{Davi16,
 author = "Davis, Timothy and Rajamanickam, Sivasankaran and
 SidLakhdar, Wissam M.",
 title = "A survey of direct methods for sparse linear systems",
 year = "2016",
 month = "April",
 institution = "Texas A and M",
 type = "Technical Report",
+@misc{Pete12,
+ author = "Petersen, Kaare Brandt and Pedersen, Michael Syskind",
+ title = "The Matrix Cookbook",
url =
"http://faculty.cse.tamu.edu/davis/publications_files/survey_tech_report.pdf",
 paper = "Davi16.pdf",
 abstract =
 "Wilkinson defined a sparse matrix as one with enough zeros that it
 pays to take advantage of them. This informal yet practical definition
 captures the essence of the goal of direct methods for solving sparse
 matrix problems. They exploit the sparsity of a matrix to solve
 problems economically: much faster and using far less memory than if
 all the entries of a matrix were stored and took part in explicit
 computations. These methods form the backbone of a wide range of
 problems in computational science. A glimpse of the breadth of
 applications relying on sparse solvers can be seen in the origins of
 matrices in published matrix benchmark collections. The goal of this
 survey article is to impart a working knowledge of the underlying
 theory and practice of sparse direct methods for solving linear
 systems and leastsquares problems, and to provide an overview of the
 algorithms, data structures, and software available to solve these
 problems, so that the reader can both understand the methods and know
 how best to use them."
}
+ "http://www2.imm.dtu.dk/pubdb/views/edoc\_download.php/3274/pdf/imm3274.pdf",
+ year = "2012",
+ month = "November"
+}
diff git a/src/axiomwebsite/patches.html b/src/axiomwebsite/patches.html
index 8db7322..0912c50 100644
 a/src/axiomwebsite/patches.html
+++ b/src/axiomwebsite/patches.html
@@ 5292,6 +5292,8 @@ books/bookvolbib add Ahre15
books/bookvolbib add Davi11, The UFlorida Sparse Matrix Coll.
20160406.04.tpd.patch
books/bookvolbib add Davi16, Survey of Direct Sparse Methods
+20160412.01.tpd.patch
+books/bookvolbib add Pete12, The Matrix Cookbook

1.7.5.4