summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorAndreas Fried <andi@0xaf.de>2013-08-23 14:00:39 +0200
committerAndreas Fried <andi@0xaf.de>2013-08-23 14:00:39 +0200
commit092a2c12701ae3a767af8c2ca384a87ba199f50f (patch)
tree519458790103774b98bb5689e2bf977e138880a3
parent2e0eb3bfa78c8a85221dd3505399e36c626371f6 (diff)
Use QR decomposition instead of Gaussian elimination in execfreq.execfreq-new
The matrices generated by execfreq tend to be not well conditioned. Therefore, it is advisable to use a numerically stable algorithm, such as QR decomposition. Besides, QR decomposition can be used to directly get a matrix' null space, which is what execfreq actually needs.
-rw-r--r--ir/ana/execfreq.c115
1 files changed, 97 insertions, 18 deletions
diff --git a/ir/ana/execfreq.c b/ir/ana/execfreq.c
index 6ee9323..c898e73 100644
--- a/ir/ana/execfreq.c
+++ b/ir/ana/execfreq.c
@@ -63,6 +63,102 @@
static hook_entry_t hook;
+/*
+ * Algorithm for QR decomposition taken from JAMA/C++ Linear Algebra
+ * Library.
+ * See http://math.nist.gov/tnt/jama_doxygen/jama_qr_h-source.html.
+ * License: Public domain (U.S. government work).
+ */
+
+/**
+ * Use QR decomposition to find the nullspace of A (size n * n). This
+ * function assumes that rank(a) = n-1.
+ */
+static int nullspace(double *A, double *nullspace, int n)
+{
+ // The nullspace of A is the n-th column of Q, where Q * R is
+ // the QR decomposition of transpose(A).
+
+ stat_ev_dbl("execfreq_matrix_size", n);
+ stat_ev_tim_push();
+
+#define ENTRY(m,r,c) (m[n * (r) + (c)])
+ double *QR = malloc(n * n * sizeof(double));
+
+ // Transpose A
+ for (int x = 0; x < n; x++) {
+ for (int y = 0; y < n; y++) {
+ ENTRY(QR, x, y) = ENTRY(A, y, x);
+ }
+ }
+
+ // In-place computation of QR.
+ for (int k = 0; k < n; k++) {
+ // Compute 2-norm of k-th column without under/overflow.
+ double nrm = 0;
+ for (int i = k; i < n; i++) {
+ nrm = hypot(nrm, ENTRY(QR, i, k));
+ }
+
+ if (nrm != 0.0) {
+ // Form k-th Householder vector.
+ if (ENTRY(QR, k, k) < 0) {
+ nrm = -nrm;
+ }
+ for (int i = k; i < n; i++) {
+ ENTRY(QR, i, k) /= nrm;
+ }
+ ENTRY(QR, k, k) += 1.0;
+
+ // Apply transformation to remaining columns.
+ for (int j = k+1; j < n; j++) {
+ double s = 0.0;
+ for (int i = k; i < n; i++) {
+ s += ENTRY(QR, i, k) * ENTRY(QR, i, j);
+ }
+ s = -s / ENTRY(QR, k, k);
+ for (int i = k; i < n; i++) {
+ ENTRY(QR, i, j) += s * ENTRY(QR, i, k);
+ }
+ }
+ }
+ }
+
+ double *Q = malloc(n * n * sizeof(double));
+
+ // Computation of Q from QR.
+ for (int k = n-1; k >= 0; k--) {
+ for (int i = 0; i < n; i++) {
+ ENTRY(Q, i, k) = 0.0;
+ }
+ ENTRY(Q, k, k) = 1.0;
+ for (int j = k; j < n; j++) {
+ if (ENTRY(QR, k, k) != 0) {
+ double s = 0.0;
+ for (int i = k; i < n; i++) {
+ s += ENTRY(QR, i, k) * ENTRY(Q, i, j);
+ }
+ s = -s / ENTRY(QR, k, k);
+ for (int i = k; i < n; i++) {
+ ENTRY(Q, i, j) += s * ENTRY(QR, i, k);
+ }
+ }
+ }
+ }
+
+ // Fill nullspace with required information
+ for (int i = 0; i < n; i++) {
+ nullspace[i] = ENTRY(Q, i, n-1);
+ }
+#undef ENTRY
+ /* This has nothing to do with Seidel iteration, but that's
+ * what the timer is called... */
+ stat_ev_tim_pop("execfreq_seidel_time");
+ free(QR);
+ free(Q);
+ return 0;
+}
+
double get_block_execfreq(const ir_node *block)
{
return block->attr.block.execfreq;
@@ -96,22 +192,6 @@ void exit_execfreq(void)
unregister_hook(hook_node_info, &hook);
}
-
-static int solve_lgs(double *mat, double *x, int size)
-{
- /* better convergence. */
- double init = 1.0 / size;
- for (int i = 0; i < size; ++i)
- x[i] = init;
-
- stat_ev_dbl("execfreq_matrix_size", size);
- stat_ev_tim_push();
- int result = firm_gaussjordansolve(mat, x, size);
- stat_ev_tim_pop("execfreq_seidel_time");
-
- return result;
-}
-
static bool has_path_to_end(const ir_node *block)
{
return Block_block_visited(block);
@@ -444,11 +524,10 @@ void ir_estimate_execfreq(ir_graph *irg)
lgs_x[0] = 1.0;
valid_freq = true;
} else {
- int lgs_result = solve_lgs(lgs_matrix, lgs_x, lgs_size);
+ int lgs_result = nullspace(lgs_matrix, lgs_x, lgs_size);
valid_freq = !lgs_result; /* solve_lgs returns -1 on error. */
}
-
/* compute the normalization factor.
* 1.0 / exec freq of end block.
*/