SparsityPatternRef: bulk-copy fast path for compressed sources libeigen/eigen!2495 Co-authored-by: Rasmus Munk Larsen <rmlarsen@gmail.com>
diff --git a/Eigen/src/SparseCore/SparsityPatternRef.h b/Eigen/src/SparseCore/SparsityPatternRef.h index e8ee1ce..110f557 100644 --- a/Eigen/src/SparseCore/SparsityPatternRef.h +++ b/Eigen/src/SparseCore/SparsityPatternRef.h
@@ -143,22 +143,49 @@ out.resize(n_inner, n_outer); Index total = 0; - for (Index j = 0; j < n_outer; ++j) total += pat.nonZeros(j); + if (pat.isCompressed()) { + total = pat.outer[n_outer]; + } else { + for (Index j = 0; j < n_outer; ++j) total += pat.nonZeros(j); + } out.resizeNonZeros(total); StorageIndex* o_outer = out.outerIndexPtr(); StorageIndex* o_inner = out.innerIndexPtr(); - o_outer[0] = 0; - for (Index j = 0; j < n_outer; ++j) { - const Index nz = pat.nonZeros(j); - o_outer[j + 1] = convert_index<StorageIndex>(o_outer[j] + nz); - const StorageIndex* src = pat.inner + pat.outer[j]; - StorageIndex* dst = o_inner + o_outer[j]; + if (pat.isCompressed()) { + // The source's outer array is already in CSC form, and SparseMatrix's + // outerIndexPtr() always starts at zero (as does the buffer-built fallback + // in make_col_major_pattern_ref), so the result's outer indices are a + // verbatim copy. For the no-permutation case the inner indices are also a + // straight bulk copy. Both reduce to single std::copy / memcpy calls. + std::copy(pat.outer, pat.outer + n_outer + 1, o_outer); if (row_perm == nullptr) { - std::copy(src, src + nz, dst); + std::copy(pat.inner, pat.inner + total, o_inner); } else { - for (Index k = 0; k < nz; ++k) dst[k] = row_perm[src[k]]; - std::sort(dst, dst + nz); + // Remap-and-sort per column, reading from the source's inner array + // directly to avoid the write-amplification of bulk-copying then + // overwriting in place. + for (Index j = 0; j < n_outer; ++j) { + const StorageIndex* src = pat.inner + o_outer[j]; + StorageIndex* dst = o_inner + o_outer[j]; + const Index nz = o_outer[j + 1] - o_outer[j]; + for (Index k = 0; k < nz; ++k) dst[k] = row_perm[src[k]]; + std::sort(dst, dst + nz); + } + } + } else { + o_outer[0] = 0; + for (Index j = 0; j < n_outer; ++j) { + const Index nz = pat.nonZeros(j); + o_outer[j + 1] = convert_index<StorageIndex>(o_outer[j] + nz); + const StorageIndex* src = pat.inner + pat.outer[j]; + StorageIndex* dst = o_inner + o_outer[j]; + if (row_perm == nullptr) { + std::copy(src, src + nz, dst); + } else { + for (Index k = 0; k < nz; ++k) dst[k] = row_perm[src[k]]; + std::sort(dst, dst + nz); + } } } // Fill values with a fixed nonzero sentinel so downstream consumers that