|  | // This file is part of Eigen, a lightweight C++ template library | 
|  | // for linear algebra. | 
|  | // | 
|  | // Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> | 
|  | // Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr> | 
|  | // | 
|  | // This Source Code Form is subject to the terms of the Mozilla | 
|  | // Public License v. 2.0. If a copy of the MPL was not distributed | 
|  | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | 
|  |  | 
|  | #ifndef SPARSELU_KERNEL_BMOD_H | 
|  | #define SPARSELU_KERNEL_BMOD_H | 
|  |  | 
|  | // IWYU pragma: private | 
|  | #include "./InternalHeaderCheck.h" | 
|  |  | 
|  | namespace Eigen { | 
|  | namespace internal { | 
|  |  | 
|  | template <int SegSizeAtCompileTime> | 
|  | struct LU_kernel_bmod { | 
|  | /** \internal | 
|  | * \brief Performs numeric block updates from a given supernode to a single column | 
|  | * | 
|  | * \param segsize Size of the segment (and blocks ) to use for updates | 
|  | * \param[in,out] dense Packed values of the original matrix | 
|  | * \param tempv temporary vector to use for updates | 
|  | * \param lusup array containing the supernodes | 
|  | * \param lda Leading dimension in the supernode | 
|  | * \param nrow Number of rows in the rectangular part of the supernode | 
|  | * \param lsub compressed row subscripts of supernodes | 
|  | * \param lptr pointer to the first column of the current supernode in lsub | 
|  | * \param no_zeros Number of nonzeros elements before the diagonal part of the supernode | 
|  | */ | 
|  | template <typename BlockScalarVector, typename ScalarVector, typename IndexVector> | 
|  | static EIGEN_DONT_INLINE void run(const Index segsize, BlockScalarVector& dense, ScalarVector& tempv, | 
|  | ScalarVector& lusup, Index& luptr, const Index lda, const Index nrow, | 
|  | IndexVector& lsub, const Index lptr, const Index no_zeros); | 
|  | }; | 
|  |  | 
|  | template <int SegSizeAtCompileTime> | 
|  | template <typename BlockScalarVector, typename ScalarVector, typename IndexVector> | 
|  | EIGEN_DONT_INLINE void LU_kernel_bmod<SegSizeAtCompileTime>::run(const Index segsize, BlockScalarVector& dense, | 
|  | ScalarVector& tempv, ScalarVector& lusup, Index& luptr, | 
|  | const Index lda, const Index nrow, IndexVector& lsub, | 
|  | const Index lptr, const Index no_zeros) { | 
|  | typedef typename ScalarVector::Scalar Scalar; | 
|  | // First, copy U[*,j] segment from dense(*) to tempv(*) | 
|  | // The result of triangular solve is in tempv[*]; | 
|  | // The result of matric-vector update is in dense[*] | 
|  | Index isub = lptr + no_zeros; | 
|  | Index i; | 
|  | Index irow; | 
|  | for (i = 0; i < ((SegSizeAtCompileTime == Dynamic) ? segsize : SegSizeAtCompileTime); i++) { | 
|  | irow = lsub(isub); | 
|  | tempv(i) = dense(irow); | 
|  | ++isub; | 
|  | } | 
|  | // Dense triangular solve -- start effective triangle | 
|  | luptr += lda * no_zeros + no_zeros; | 
|  | // Form Eigen matrix and vector | 
|  | Map<Matrix<Scalar, SegSizeAtCompileTime, SegSizeAtCompileTime, ColMajor>, 0, OuterStride<> > A( | 
|  | &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(lda)); | 
|  | Map<Matrix<Scalar, SegSizeAtCompileTime, 1> > u(tempv.data(), segsize); | 
|  |  | 
|  | u = A.template triangularView<UnitLower>().solve(u); | 
|  |  | 
|  | // Dense matrix-vector product y <-- B*x | 
|  | luptr += segsize; | 
|  | const Index PacketSize = internal::packet_traits<Scalar>::size; | 
|  | Index ldl = internal::first_multiple(nrow, PacketSize); | 
|  | Map<Matrix<Scalar, Dynamic, SegSizeAtCompileTime, ColMajor>, 0, OuterStride<> > B(&(lusup.data()[luptr]), nrow, | 
|  | segsize, OuterStride<>(lda)); | 
|  | Index aligned_offset = internal::first_default_aligned(tempv.data() + segsize, PacketSize); | 
|  | Index aligned_with_B_offset = (PacketSize - internal::first_default_aligned(B.data(), PacketSize)) % PacketSize; | 
|  | Map<Matrix<Scalar, Dynamic, 1>, 0, OuterStride<> > l(tempv.data() + segsize + aligned_offset + aligned_with_B_offset, | 
|  | nrow, OuterStride<>(ldl)); | 
|  |  | 
|  | l.noalias() = B * u; | 
|  |  | 
|  | // Scatter tempv[] into SPA dense[] as a temporary storage | 
|  | isub = lptr + no_zeros; | 
|  | for (i = 0; i < ((SegSizeAtCompileTime == Dynamic) ? segsize : SegSizeAtCompileTime); i++) { | 
|  | irow = lsub(isub++); | 
|  | dense(irow) = tempv(i); | 
|  | } | 
|  |  | 
|  | // Scatter l into SPA dense[] | 
|  | for (i = 0; i < nrow; i++) { | 
|  | irow = lsub(isub++); | 
|  | dense(irow) -= l(i); | 
|  | } | 
|  | } | 
|  |  | 
|  | template <> | 
|  | struct LU_kernel_bmod<1> { | 
|  | template <typename BlockScalarVector, typename ScalarVector, typename IndexVector> | 
|  | static EIGEN_DONT_INLINE void run(const Index /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, | 
|  | ScalarVector& lusup, Index& luptr, const Index lda, const Index nrow, | 
|  | IndexVector& lsub, const Index lptr, const Index no_zeros); | 
|  | }; | 
|  |  | 
|  | template <typename BlockScalarVector, typename ScalarVector, typename IndexVector> | 
|  | EIGEN_DONT_INLINE void LU_kernel_bmod<1>::run(const Index /*segsize*/, BlockScalarVector& dense, | 
|  | ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr, | 
|  | const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, | 
|  | const Index no_zeros) { | 
|  | typedef typename ScalarVector::Scalar Scalar; | 
|  | typedef typename IndexVector::Scalar StorageIndex; | 
|  | Scalar f = dense(lsub(lptr + no_zeros)); | 
|  | luptr += lda * no_zeros + no_zeros + 1; | 
|  | const Scalar* a(lusup.data() + luptr); | 
|  | const StorageIndex* irow(lsub.data() + lptr + no_zeros + 1); | 
|  | Index i = 0; | 
|  | for (; i + 1 < nrow; i += 2) { | 
|  | Index i0 = *(irow++); | 
|  | Index i1 = *(irow++); | 
|  | Scalar a0 = *(a++); | 
|  | Scalar a1 = *(a++); | 
|  | Scalar d0 = dense.coeff(i0); | 
|  | Scalar d1 = dense.coeff(i1); | 
|  | d0 -= f * a0; | 
|  | d1 -= f * a1; | 
|  | dense.coeffRef(i0) = d0; | 
|  | dense.coeffRef(i1) = d1; | 
|  | } | 
|  | if (i < nrow) dense.coeffRef(*(irow++)) -= f * *(a++); | 
|  | } | 
|  |  | 
|  | }  // end namespace internal | 
|  |  | 
|  | }  // end namespace Eigen | 
|  | #endif  // SPARSELU_KERNEL_BMOD_H |