blob: 17f8cc1657439f1f517b1cdd83b03122eef4a432 [file]
// This file is part of Eigen, a lightweight C template library
// for linear algebra.
//
// Copyright (C) 2024 Kseniya Zaytseva <kseniya.zaytseva@syntacore.com>
// Copyright (C) 2025 Chip Kerchner <ckerchner@tenstorrent.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// SPDX-License-Identifier: MPL-2.0
#ifndef EIGEN_RVV10_GENERAL_BLOCK_KERNEL_H
#define EIGEN_RVV10_GENERAL_BLOCK_KERNEL_H
// IWYU pragma: private
#include "../../InternalHeaderCheck.h"
namespace Eigen {
namespace internal {
/********************************* real ************************************/
template <>
struct gebp_traits<float, float, false, false, Architecture::RVV10, GEBPPacketFull>
: gebp_traits<float, float, false, false, Architecture::Generic, GEBPPacketFull> {
typedef float RhsPacket;
typedef QuadPacket<float> RhsPacketx4;
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const { dest = pset1<RhsPacket>(*b); }
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const {
pbroadcast4(b, dest.B_0, dest.B1, dest.B2, dest.B3);
}
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacket& dest) const { loadRhs(b, dest); }
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const {}
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const { dest = ploadquad<RhsPacket>(b); }
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& /*tmp*/,
const FixedInt<0>&) const {
#if EIGEN_RISCV64_DEFAULT_LMUL == 1
c = __riscv_vfmadd_vf_f32m1(a, b, c, unpacket_traits<AccPacket>::size);
#elif EIGEN_RISCV64_DEFAULT_LMUL == 2
c = __riscv_vfmadd_vf_f32m2(a, b, c, unpacket_traits<AccPacket>::size);
#elif EIGEN_RISCV64_DEFAULT_LMUL == 4
c = __riscv_vfmadd_vf_f32m4(a, b, c, unpacket_traits<AccPacket>::size);
#endif
}
#if EIGEN_RISCV64_DEFAULT_LMUL >= 2
EIGEN_STRONG_INLINE void madd(const Packet1Xf& a, const RhsPacket& b, Packet1Xf& c, RhsPacket& /*tmp*/,
const FixedInt<0>&) const {
c = __riscv_vfmadd_vf_f32m1(a, b, c, unpacket_traits<Packet1Xf>::size);
}
#endif
#if EIGEN_RISCV64_DEFAULT_LMUL == 4
EIGEN_STRONG_INLINE void madd(const Packet2Xf& a, const RhsPacket& b, Packet2Xf& c, RhsPacket& /*tmp*/,
const FixedInt<0>&) const {
c = __riscv_vfmadd_vf_f32m2(a, b, c, unpacket_traits<Packet2Xf>::size);
}
#endif
template <typename LaneIdType>
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/,
const LaneIdType& lane) const {
#if EIGEN_RISCV64_DEFAULT_LMUL == 1
c = __riscv_vfmadd_vf_f32m1(a, b.get(lane), c, unpacket_traits<AccPacket>::size);
#elif EIGEN_RISCV64_DEFAULT_LMUL == 2
c = __riscv_vfmadd_vf_f32m2(a, b.get(lane), c, unpacket_traits<AccPacket>::size);
#elif EIGEN_RISCV64_DEFAULT_LMUL == 4
c = __riscv_vfmadd_vf_f32m4(a, b.get(lane), c, unpacket_traits<AccPacket>::size);
#endif
}
};
template <>
struct gebp_traits<double, double, false, false, Architecture::RVV10, GEBPPacketFull>
: gebp_traits<double, double, false, false, Architecture::Generic, GEBPPacketFull> {
typedef double RhsPacket;
typedef QuadPacket<double> RhsPacketx4;
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const { dest = pset1<RhsPacket>(*b); }
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const {
pbroadcast4(b, dest.B_0, dest.B1, dest.B2, dest.B3);
}
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacket& dest) const { loadRhs(b, dest); }
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const {}
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const { dest = ploadquad<RhsPacket>(b); }
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& /*tmp*/,
const FixedInt<0>&) const {
#if EIGEN_RISCV64_DEFAULT_LMUL == 1
c = __riscv_vfmadd_vf_f64m1(a, b, c, unpacket_traits<AccPacket>::size);
#elif EIGEN_RISCV64_DEFAULT_LMUL == 2
c = __riscv_vfmadd_vf_f64m2(a, b, c, unpacket_traits<AccPacket>::size);
#elif EIGEN_RISCV64_DEFAULT_LMUL == 4
c = __riscv_vfmadd_vf_f64m4(a, b, c, unpacket_traits<AccPacket>::size);
#endif
}
#if EIGEN_RISCV64_DEFAULT_LMUL >= 2
EIGEN_STRONG_INLINE void madd(const Packet1Xd& a, const RhsPacket& b, Packet1Xd& c, RhsPacket& /*tmp*/,
const FixedInt<0>&) const {
c = __riscv_vfmadd_vf_f64m1(a, b, c, unpacket_traits<Packet1Xd>::size);
}
#endif
#if EIGEN_RISCV64_DEFAULT_LMUL == 4
EIGEN_STRONG_INLINE void madd(const Packet2Xd& a, const RhsPacket& b, Packet2Xd& c, RhsPacket& /*tmp*/,
const FixedInt<0>&) const {
c = __riscv_vfmadd_vf_f64m2(a, b, c, unpacket_traits<Packet2Xd>::size);
}
#endif
template <typename LaneIdType>
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/,
const LaneIdType& lane) const {
#if EIGEN_RISCV64_DEFAULT_LMUL == 1
c = __riscv_vfmadd_vf_f64m1(a, b.get(lane), c, unpacket_traits<AccPacket>::size);
#elif EIGEN_RISCV64_DEFAULT_LMUL == 2
c = __riscv_vfmadd_vf_f64m2(a, b.get(lane), c, unpacket_traits<AccPacket>::size);
#elif EIGEN_RISCV64_DEFAULT_LMUL == 4
c = __riscv_vfmadd_vf_f64m4(a, b.get(lane), c, unpacket_traits<AccPacket>::size);
#endif
}
};
#if defined(EIGEN_VECTORIZE_RVV10FP16)
template <>
struct gebp_traits<half, half, false, false, Architecture::RVV10>
: gebp_traits<half, half, false, false, Architecture::Generic> {
typedef half RhsPacket;
typedef PacketXh LhsPacket;
typedef PacketXh AccPacket;
typedef QuadPacket<half> RhsPacketx4;
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const { dest = pset1<RhsPacket>(*b); }
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const {
pbroadcast4(b, dest.B_0, dest.B1, dest.B2, dest.B3);
}
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacket& dest) const { loadRhs(b, dest); }
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const {}
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const { dest = pload<RhsPacket>(b); }
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& /*tmp*/,
const FixedInt<0>&) const {
#if EIGEN_RISCV64_DEFAULT_LMUL == 1
c = __riscv_vfmadd_vf_f16m1(a, numext::bit_cast<_Float16>(b), c, unpacket_traits<AccPacket>::size);
#else
c = __riscv_vfmadd_vf_f16m2(a, numext::bit_cast<_Float16>(b), c, unpacket_traits<AccPacket>::size);
#endif
}
#if EIGEN_RISCV64_DEFAULT_LMUL >= 2
EIGEN_STRONG_INLINE void madd(const Packet1Xh& a, const RhsPacket& b, Packet1Xh& c, RhsPacket& /*tmp*/,
const FixedInt<0>&) const {
c = __riscv_vfmadd_vf_f16m1(a, numext::bit_cast<_Float16>(b), c, unpacket_traits<Packet1Xh>::size);
}
#endif
template <typename LaneIdType>
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/,
const LaneIdType& lane) const {
#if EIGEN_RISCV64_DEFAULT_LMUL == 1
c = __riscv_vfmadd_vf_f16m1(a, numext::bit_cast<_Float16>(b.get(lane)), c, unpacket_traits<AccPacket>::size);
#else
c = __riscv_vfmadd_vf_f16m2(a, numext::bit_cast<_Float16>(b.get(lane)), c, unpacket_traits<AccPacket>::size);
#endif
}
};
#endif
#if defined(EIGEN_VECTORIZE_RVV10BF16)
template <>
struct gebp_traits<bfloat16, bfloat16, false, false, Architecture::RVV10>
: gebp_traits<bfloat16, bfloat16, false, false, Architecture::Generic> {
typedef bfloat16 RhsPacket;
typedef PacketXbf LhsPacket;
typedef PacketXbf AccPacket;
typedef QuadPacket<bfloat16> RhsPacketx4;
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const { dest = pset1<RhsPacket>(*b); }
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const {
pbroadcast4(b, dest.B_0, dest.B1, dest.B2, dest.B3);
}
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacket& dest) const { loadRhs(b, dest); }
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const {}
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const { dest = pload<RhsPacket>(b); }
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& /*tmp*/,
const FixedInt<0>&) const {
#if EIGEN_RISCV64_DEFAULT_LMUL == 1
c = F32ToBf16(
__riscv_vfwmaccbf16_vf_f32m2(Bf16ToF32(c), numext::bit_cast<__bf16>(b), a, unpacket_traits<AccPacket>::size));
#else
c = F32ToBf16(
__riscv_vfwmaccbf16_vf_f32m4(Bf16ToF32(c), numext::bit_cast<__bf16>(b), a, unpacket_traits<AccPacket>::size));
#endif
}
#if EIGEN_RISCV64_DEFAULT_LMUL >= 2
EIGEN_STRONG_INLINE void madd(const Packet1Xbf& a, const RhsPacket& b, Packet1Xbf& c, RhsPacket& /*tmp*/,
const FixedInt<0>&) const {
c = F32ToBf16(
__riscv_vfwmaccbf16_vf_f32m2(Bf16ToF32(c), numext::bit_cast<__bf16>(b), a, unpacket_traits<Packet1Xbf>::size));
}
#endif
template <typename LaneIdType>
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c, RhsPacket& /*tmp*/,
const LaneIdType& lane) const {
#if EIGEN_RISCV64_DEFAULT_LMUL == 1
c = F32ToBf16(__riscv_vfwmaccbf16_vf_f32m2(Bf16ToF32(c), numext::bit_cast<__bf16>(b.get(lane)), a,
unpacket_traits<AccPacket>::size));
#else
c = F32ToBf16(__riscv_vfwmaccbf16_vf_f32m4(Bf16ToF32(c), numext::bit_cast<__bf16>(b.get(lane)), a,
unpacket_traits<AccPacket>::size));
#endif
}
};
#endif
#if EIGEN_RISCV64_DEFAULT_LMUL == 1
/********************************* complex ************************************/
#define PACKET_DECL_COND_POSTFIX(postfix, name, packet_size) \
typedef typename packet_conditional< \
packet_size, typename packet_traits<name##Scalar>::type, typename packet_traits<name##Scalar>::half, \
typename unpacket_traits<typename packet_traits<name##Scalar>::half>::half>::type name##Packet##postfix
#define RISCV_COMPLEX_PACKET_DECL_COND_SCALAR(packet_size) \
typedef typename packet_conditional< \
packet_size, typename packet_traits<Scalar>::type, typename packet_traits<Scalar>::half, \
typename unpacket_traits<typename packet_traits<Scalar>::half>::half>::type ScalarPacket
template <typename RealScalar, bool ConjLhs_, bool ConjRhs_, int PacketSize_>
struct gebp_traits<std::complex<RealScalar>, std::complex<RealScalar>, ConjLhs_, ConjRhs_, Architecture::RVV10,
PacketSize_> : gebp_traits<std::complex<RealScalar>, std::complex<RealScalar>, ConjLhs_, ConjRhs_,
Architecture::Generic, PacketSize_> {
typedef std::complex<RealScalar> Scalar;
typedef std::complex<RealScalar> LhsScalar;
typedef std::complex<RealScalar> RhsScalar;
typedef std::complex<RealScalar> ResScalar;
typedef typename packet_traits<std::complex<RealScalar>>::type RealPacket;
PACKET_DECL_COND_POSTFIX(_, Lhs, PacketSize_);
PACKET_DECL_COND_POSTFIX(_, Rhs, PacketSize_);
PACKET_DECL_COND_POSTFIX(_, Res, PacketSize_);
RISCV_COMPLEX_PACKET_DECL_COND_SCALAR(PacketSize_);
#undef RISCV_COMPLEX_PACKET_DECL_COND_SCALAR
enum {
ConjLhs = ConjLhs_,
ConjRhs = ConjRhs_,
Vectorizable = unpacket_traits<RealPacket>::vectorizable && unpacket_traits<ScalarPacket>::vectorizable,
ResPacketSize = Vectorizable ? unpacket_traits<ResPacket_>::size : 1,
LhsPacketSize = Vectorizable ? unpacket_traits<LhsPacket_>::size : 1,
RhsPacketSize = Vectorizable ? unpacket_traits<RhsScalar>::size : 1,
RealPacketSize = Vectorizable ? unpacket_traits<RealPacket>::size : 1,
nr = 4,
mr = ResPacketSize,
LhsProgress = ResPacketSize,
RhsProgress = 1
};
typedef DoublePacket<RealPacket> DoublePacketType;
typedef std::conditional_t<Vectorizable, ScalarPacket, Scalar> LhsPacket4Packing;
typedef std::conditional_t<Vectorizable, RealPacket, Scalar> LhsPacket;
typedef std::conditional_t<Vectorizable, DoublePacket<RealScalar>, Scalar> RhsPacket;
typedef std::conditional_t<Vectorizable, ScalarPacket, Scalar> ResPacket;
typedef std::conditional_t<Vectorizable, DoublePacketType, Scalar> AccPacket;
typedef QuadPacket<RhsPacket> RhsPacketx4;
EIGEN_STRONG_INLINE void initAcc(Scalar& p) { p = Scalar(0); }
EIGEN_STRONG_INLINE void initAcc(DoublePacketType& p) {
p.first = pset1<RealPacket>(RealScalar(0));
p.second = pset1<RealPacket>(RealScalar(0));
}
// Scalar path
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, ScalarPacket& dest) const { dest = pset1<ScalarPacket>(*b); }
// Vectorized path
template <typename RealPacketType>
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, DoublePacket<RealPacketType>& dest) const {
dest.first = pset1<RealPacketType>(numext::real(*b));
dest.second = pset1<RealPacketType>(numext::imag(*b));
}
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const {
loadRhs(b, dest.B_0);
loadRhs(b + 1, dest.B1);
loadRhs(b + 2, dest.B2);
loadRhs(b + 3, dest.B3);
}
// Scalar path
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, ScalarPacket& dest) const { loadRhs(b, dest); }
// Vectorized path
template <typename RealPacketType>
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, DoublePacket<RealPacketType>& dest) const {
loadRhs(b, dest);
}
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const {}
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, ResPacket& dest) const { loadRhs(b, dest); }
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, DoublePacket<RealScalar>& dest) const {
loadQuadToDoublePacket(b, dest);
}
// nothing special here
EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const {
dest = pload<LhsPacket>((const typename unpacket_traits<LhsPacket>::type*)(a));
}
template <typename LhsPacketType>
EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacketType& dest) const {
dest = ploadu<LhsPacketType>((const typename unpacket_traits<LhsPacketType>::type*)(a));
}
EIGEN_STRONG_INLINE Packet1Xcf pmadd_scalar(const Packet1Xcf& a, float b, const Packet1Xcf& c) const {
return Packet1Xcf(__riscv_vfmadd_vf_f32m1(a.v, b, c.v, unpacket_traits<Packet1Xf>::size));
}
EIGEN_STRONG_INLINE Packet1Xcd pmadd_scalar(const Packet1Xcd& a, double b, const Packet1Xcd& c) const {
return Packet1Xcd(__riscv_vfmadd_vf_f64m1(a.v, b, c.v, unpacket_traits<Packet1Xd>::size));
}
#if EIGEN_RISCV64_DEFAULT_LMUL >= 2
EIGEN_STRONG_INLINE Packet2Xcf pmadd_scalar(const Packet2Xcf& a, float b, const Packet2Xcf& c) const {
return Packet2Xcf(__riscv_vfmadd_vf_f32m2(a.v, b, c.v, unpacket_traits<Packet2Xf>::size));
}
EIGEN_STRONG_INLINE Packet2Xcd pmadd_scalar(const Packet2Xcd& a, double b, const Packet2Xcd& c) const {
return Packet2Xcd(__riscv_vfmadd_vf_f64m2(a.v, b, c.v, unpacket_traits<Packet2Xd>::size));
}
#endif
#if EIGEN_RISCV64_DEFAULT_LMUL == 4
EIGEN_STRONG_INLINE Packet4Xcf pmadd_scalar(const Packet4Xcf& a, float b, const Packet4Xcf& c) const {
return Packet4Xcf(__riscv_vfmadd_vf_f32m4(a.v, b, c.v, unpacket_traits<Packet4Xf>::size));
}
EIGEN_STRONG_INLINE Packet4Xcd pmadd_scalar(const Packet4Xcd& a, double b, const Packet4Xcd& c) const {
return Packet4Xcd(__riscv_vfmadd_vf_f64m4(a.v, b, c.v, unpacket_traits<Packet4Xd>::size));
}
#endif
template <typename LhsPacketType, typename RhsPacketType, typename ResPacketType, typename TmpType,
typename LaneIdType>
EIGEN_STRONG_INLINE std::enable_if_t<!is_same<RhsPacketType, RhsPacketx4>::value> madd(const LhsPacketType& a,
const RhsPacketType& b,
DoublePacket<ResPacketType>& c,
TmpType& /*tmp*/,
const LaneIdType&) const {
c.first = pmadd_scalar(a, b.first, c.first);
c.second = pmadd_scalar(a, b.second, c.second);
}
template <typename LhsPacketType, typename AccPacketType, typename LaneIdType>
EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketx4& b, AccPacketType& c, RhsPacket& tmp,
const LaneIdType& lane) const {
madd(a, b.get(lane), c, tmp, lane);
}
template <typename LaneIdType>
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, ResPacket& c, RhsPacket& /*tmp*/,
const LaneIdType&) const {
c = cj.pmadd(a, b, c);
}
template <typename RealPacketType, typename ResPacketType>
EIGEN_STRONG_INLINE void acc(const DoublePacket<RealPacketType>& c, const ResPacketType& alpha,
ResPacketType& r) const {
// assemble c
ResPacketType tmp;
if ((!ConjLhs) && (!ConjRhs)) {
tmp = pcplxflip(pconj(ResPacketType(c.second)));
tmp = padd(ResPacketType(c.first), tmp);
} else if ((!ConjLhs) && (ConjRhs)) {
tmp = pconj(pcplxflip(ResPacketType(c.second)));
tmp = padd(ResPacketType(c.first), tmp);
} else if ((ConjLhs) && (!ConjRhs)) {
tmp = pcplxflip(ResPacketType(c.second));
tmp = padd(pconj(ResPacketType(c.first)), tmp);
} else if ((ConjLhs) && (ConjRhs)) {
tmp = pcplxflip(ResPacketType(c.second));
tmp = psub(pconj(ResPacketType(c.first)), tmp);
}
r = pmadd(tmp, alpha, r);
}
template <typename ResPacketType, typename AccPacketType>
EIGEN_STRONG_INLINE void acc(const AccPacketType& c, const ResPacketType& alpha, ResPacketType& r) const {
r = cj.pmadd(c, alpha, r);
}
EIGEN_STRONG_INLINE void acc(const Packet1Xf& c, const Packet1Xcf& alpha, Packet1Xcf& r) const {
r = cj.pmadd(c, alpha, r);
}
EIGEN_STRONG_INLINE void acc(const Packet1Xd& c, const Packet1Xcd& alpha, Packet1Xcd& r) const {
r = cj.pmadd(c, alpha, r);
}
#if EIGEN_RISCV64_DEFAULT_LMUL >= 2
EIGEN_STRONG_INLINE void acc(const Packet2Xf& c, const Packet2Xcf& alpha, Packet2Xcf& r) const {
r = cj.pmadd(c, alpha, r);
}
EIGEN_STRONG_INLINE void acc(const Packet2Xd& c, const Packet2Xcd& alpha, Packet2Xcd& r) const {
r = cj.pmadd(c, alpha, r);
}
#endif
#if EIGEN_RISCV64_DEFAULT_LMUL == 4
EIGEN_STRONG_INLINE void acc(const Packet4Xf& c, const Packet4Xcf& alpha, Packet4Xcf& r) const {
r = cj.pmadd(c, alpha, r);
}
EIGEN_STRONG_INLINE void acc(const Packet4Xd& c, const Packet4Xcd& alpha, Packet4Xcd& r) const {
r = cj.pmadd(c, alpha, r);
}
#endif
protected:
conj_helper<LhsScalar, RhsScalar, ConjLhs, ConjRhs> cj;
};
#define PACKET_DECL_COND_SCALAR_POSTFIX(postfix, packet_size) \
typedef typename packet_conditional< \
packet_size, typename packet_traits<Scalar>::type, typename packet_traits<Scalar>::half, \
typename unpacket_traits<typename packet_traits<Scalar>::half>::half>::type ScalarPacket##postfix
template <typename RealScalar, bool ConjRhs_, int PacketSize_>
class gebp_traits<RealScalar, std::complex<RealScalar>, false, ConjRhs_, Architecture::RVV10, PacketSize_>
: public gebp_traits<RealScalar, std::complex<RealScalar>, false, ConjRhs_, Architecture::Generic, PacketSize_> {
public:
typedef std::complex<RealScalar> Scalar;
typedef RealScalar LhsScalar;
typedef Scalar RhsScalar;
typedef Scalar ResScalar;
PACKET_DECL_COND_POSTFIX(_, Lhs, PacketSize_);
PACKET_DECL_COND_POSTFIX(_, Rhs, PacketSize_);
PACKET_DECL_COND_POSTFIX(_, Res, PacketSize_);
PACKET_DECL_COND_POSTFIX(_, Real, PacketSize_);
PACKET_DECL_COND_SCALAR_POSTFIX(_, PacketSize_);
#undef PACKET_DECL_COND_SCALAR_POSTFIX
enum {
ConjLhs = false,
ConjRhs = ConjRhs_,
Vectorizable = unpacket_traits<RealPacket_>::vectorizable && unpacket_traits<ScalarPacket_>::vectorizable,
LhsPacketSize = Vectorizable ? unpacket_traits<LhsPacket_>::size : 1,
RhsPacketSize = Vectorizable ? unpacket_traits<RhsPacket_>::size : 1,
ResPacketSize = Vectorizable ? unpacket_traits<ResPacket_>::size : 1,
NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
// FIXME: should depend on NumberOfRegisters
nr = 4,
mr = (plain_enum_min(16, NumberOfRegisters) / 2 / nr) * ResPacketSize,
LhsProgress = ResPacketSize,
RhsProgress = 1
};
typedef std::conditional_t<Vectorizable, LhsPacket_, LhsScalar> LhsPacket;
typedef RhsScalar RhsPacket;
typedef std::conditional_t<Vectorizable, ResPacket_, ResScalar> ResPacket;
typedef LhsPacket LhsPacket4Packing;
typedef QuadPacket<RhsPacket> RhsPacketx4;
typedef ResPacket AccPacket;
EIGEN_STRONG_INLINE void initAcc(AccPacket& p) const { p = pset1<ResPacket>(ResScalar(0)); }
template <typename RhsPacketType>
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketType& dest) const {
dest = pset1<RhsPacketType>(*b);
}
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const {
pbroadcast4(b, dest.B_0, dest.B1, dest.B2, dest.B3);
}
template <typename RhsPacketType>
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacketType& dest) const {
loadRhs(b, dest);
}
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const {}
EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const { dest = ploaddup<LhsPacket>(a); }
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const { dest = ploadquad<RhsPacket>(b); }
template <typename LhsPacketType>
EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacketType& dest) const {
dest = ploaddup<LhsPacketType>(a);
}
template <typename LhsPacketType, typename RhsPacketType, typename AccPacketType, typename LaneIdType>
EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c, RhsPacketType& tmp,
const LaneIdType&) const {
madd_impl(a, b, c, tmp, std::conditional_t<Vectorizable, true_type, false_type>());
}
EIGEN_STRONG_INLINE Packet1Xcf pmadd_scalar(const Packet1Xf& a, std::complex<float> b, const Packet1Xcf& c) const {
Packet1Xcf bp = pset1<Packet1Xcf>(b);
return Packet1Xcf(__riscv_vfmadd_vv_f32m1(a, bp.v, c.v, unpacket_traits<Packet1Xf>::size));
}
EIGEN_STRONG_INLINE Packet1Xcd pmadd_scalar(const Packet1Xd& a, std::complex<double> b, const Packet1Xcd& c) const {
Packet1Xcd bp = pset1<Packet1Xcd>(b);
return Packet1Xcd(__riscv_vfmadd_vv_f64m1(a, bp.v, c.v, unpacket_traits<Packet1Xd>::size));
}
#if EIGEN_RISCV64_DEFAULT_LMUL >= 2
EIGEN_STRONG_INLINE Packet2Xcf pmadd_scalar(const Packet2Xf& a, std::complex<float> b, const Packet2Xcf& c) const {
Packet2Xcf bp = pset1<Packet2Xcf>(b);
return Packet2Xcf(__riscv_vfmadd_vv_f32m2(a, bp.v, c.v, unpacket_traits<Packet2Xf>::size));
}
EIGEN_STRONG_INLINE Packet2Xcd pmadd_scalar(const Packet2Xd& a, std::complex<double> b, const Packet2Xcd& c) const {
Packet2Xcd bp = pset1<Packet2Xcd>(b);
return Packet2Xcd(__riscv_vfmadd_vv_f64m2(a, bp.v, c.v, unpacket_traits<Packet2Xd>::size));
}
#endif
#if EIGEN_RISCV64_DEFAULT_LMUL == 4
EIGEN_STRONG_INLINE Packet4Xcf pmadd_scalar(const Packet4Xf& a, std::complex<float> b, const Packet4Xcf& c) const {
Packet4Xcf bp = pset1<Packet4Xcf>(b);
return Packet4Xcf(__riscv_vfmadd_vv_f32m4(a, bp.v, c.v, unpacket_traits<Packet4Xf>::size));
}
EIGEN_STRONG_INLINE Packet4Xcd pmadd_scalar(const Packet4Xd& a, std::complex<double> b, const Packet4Xcd& c) const {
Packet4Xcd bp = pset1<Packet4Xcd>(b);
return Packet4Xcd(__riscv_vfmadd_vv_f64m4(a, bp.v, c.v, unpacket_traits<Packet4Xd>::size));
}
#endif
template <typename LhsPacketType, typename RhsPacketType, typename AccPacketType>
EIGEN_STRONG_INLINE void madd_impl(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c,
RhsPacketType& tmp, const true_type&) const {
EIGEN_UNUSED_VARIABLE(tmp);
c = pmadd_scalar(a, b, c);
}
EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/,
const false_type&) const {
c += a * b;
}
template <typename LhsPacketType, typename AccPacketType, typename LaneIdType>
EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketx4& b, AccPacketType& c, RhsPacket& tmp,
const LaneIdType& lane) const {
madd(a, b.get(lane), c, tmp, lane);
}
template <typename ResPacketType, typename AccPacketType>
EIGEN_STRONG_INLINE void acc(const AccPacketType& c, const ResPacketType& alpha, ResPacketType& r) const {
conj_helper<ResPacketType, ResPacketType, false, ConjRhs> cj;
r = cj.pmadd(alpha, c, r);
}
};
template <typename RealScalar, bool ConjLhs_, int PacketSize_>
class gebp_traits<std::complex<RealScalar>, RealScalar, ConjLhs_, false, Architecture::RVV10, PacketSize_>
: public gebp_traits<RealScalar, std::complex<RealScalar>, ConjLhs_, false, Architecture::Generic, PacketSize_> {
public:
typedef std::complex<RealScalar> LhsScalar;
typedef RealScalar RhsScalar;
typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
PACKET_DECL_COND_POSTFIX(_, Lhs, PacketSize_);
PACKET_DECL_COND_POSTFIX(_, Rhs, PacketSize_);
PACKET_DECL_COND_POSTFIX(_, Res, PacketSize_);
#undef PACKET_DECL_COND_POSTFIX
enum {
ConjLhs = ConjLhs_,
ConjRhs = false,
Vectorizable = unpacket_traits<LhsPacket_>::vectorizable && unpacket_traits<RhsPacket_>::vectorizable,
LhsPacketSize = Vectorizable ? unpacket_traits<LhsPacket_>::size : 1,
RhsPacketSize = Vectorizable ? unpacket_traits<RhsPacket_>::size : 1,
ResPacketSize = Vectorizable ? unpacket_traits<ResPacket_>::size : 1,
nr = 4,
mr = 3 * LhsPacketSize,
LhsProgress = LhsPacketSize,
RhsProgress = 1
};
typedef std::conditional_t<Vectorizable, LhsPacket_, LhsScalar> LhsPacket;
typedef RhsScalar RhsPacket;
typedef std::conditional_t<Vectorizable, ResPacket_, ResScalar> ResPacket;
typedef LhsPacket LhsPacket4Packing;
typedef QuadPacket<RhsPacket> RhsPacketx4;
typedef ResPacket AccPacket;
EIGEN_STRONG_INLINE void initAcc(AccPacket& p) { p = pset1<ResPacket>(ResScalar(0)); }
template <typename RhsPacketType>
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketType& dest) const {
dest = pset1<RhsPacketType>(*b);
}
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacketx4& dest) const {
pbroadcast4(b, dest.B_0, dest.B1, dest.B2, dest.B3);
}
template <typename RhsPacketType>
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar* b, RhsPacketType& dest) const {
loadRhs(b, dest);
}
EIGEN_STRONG_INLINE void updateRhs(const RhsScalar*, RhsPacketx4&) const {}
EIGEN_STRONG_INLINE void loadRhsQuad(const RhsScalar* b, RhsPacket& dest) const {
loadRhsQuad_impl(b, dest, std::conditional_t<RhsPacketSize == 16, true_type, false_type>());
}
EIGEN_STRONG_INLINE void loadRhsQuad_impl(const RhsScalar* b, RhsPacket& dest, const true_type&) const {
// FIXME we can do better!
// what we want here is a ploadheight
RhsScalar tmp[4] = {b[0], b[0], b[1], b[1]};
dest = ploadquad<RhsPacket>(tmp);
}
EIGEN_STRONG_INLINE void loadRhsQuad_impl(const RhsScalar* b, RhsPacket& dest, const false_type&) const {
eigen_internal_assert(RhsPacketSize <= 8);
dest = pset1<RhsPacket>(*b);
}
EIGEN_STRONG_INLINE void loadLhs(const LhsScalar* a, LhsPacket& dest) const { dest = pload<LhsPacket>(a); }
template <typename LhsPacketType>
EIGEN_STRONG_INLINE void loadLhsUnaligned(const LhsScalar* a, LhsPacketType& dest) const {
dest = ploadu<LhsPacketType>(a);
}
template <typename LhsPacketType, typename RhsPacketType, typename AccPacketType, typename LaneIdType>
EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c, RhsPacketType& tmp,
const LaneIdType&) const {
madd_impl(a, b, c, tmp, std::conditional_t<Vectorizable, true_type, false_type>());
}
EIGEN_STRONG_INLINE Packet1Xcf pmadd_scalar(const Packet1Xcf& a, float b, const Packet1Xcf& c) const {
return Packet1Xcf(__riscv_vfmadd_vf_f32m1(a.v, b, c.v, unpacket_traits<Packet1Xf>::size));
}
EIGEN_STRONG_INLINE Packet1Xcd pmadd_scalar(const Packet1Xcd& a, double b, const Packet1Xcd& c) const {
return Packet1Xcd(__riscv_vfmadd_vf_f64m1(a.v, b, c.v, unpacket_traits<Packet1Xd>::size));
}
#if EIGEN_RISCV64_DEFAULT_LMUL >= 2
EIGEN_STRONG_INLINE Packet2Xcf pmadd_scalar(const Packet2Xcf& a, float b, const Packet2Xcf& c) const {
return Packet2Xcf(__riscv_vfmadd_vf_f32m2(a.v, b, c.v, unpacket_traits<Packet2Xf>::size));
}
EIGEN_STRONG_INLINE Packet2Xcd pmadd_scalar(const Packet2Xcd& a, double b, const Packet2Xcd& c) const {
return Packet2Xcd(__riscv_vfmadd_vf_f64m2(a.v, b, c.v, unpacket_traits<Packet2Xd>::size));
}
#endif
#if EIGEN_RISCV64_DEFAULT_LMUL == 4
EIGEN_STRONG_INLINE Packet4Xcf pmadd_scalar(const Packet4Xcf& a, float b, const Packet4Xcf& c) const {
return Packet4Xcf(__riscv_vfmadd_vf_f32m4(a.v, b, c.v, unpacket_traits<Packet4Xf>::size));
}
EIGEN_STRONG_INLINE Packet4Xcd pmadd_scalar(const Packet4Xcd& a, double b, const Packet4Xcd& c) const {
return Packet4Xcd(__riscv_vfmadd_vf_f64m4(a.v, b, c.v, unpacket_traits<Packet4Xd>::size));
}
#endif
template <typename LhsPacketType, typename RhsPacketType, typename AccPacketType>
EIGEN_STRONG_INLINE void madd_impl(const LhsPacketType& a, const RhsPacketType& b, AccPacketType& c,
RhsPacketType& tmp, const true_type&) const {
EIGEN_UNUSED_VARIABLE(tmp);
c = pmadd_scalar(a, b, c);
}
EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/,
const false_type&) const {
c += a * b;
}
template <typename LhsPacketType, typename AccPacketType, typename LaneIdType>
EIGEN_STRONG_INLINE void madd(const LhsPacketType& a, const RhsPacketx4& b, AccPacketType& c, RhsPacket& tmp,
const LaneIdType& lane) const {
madd(a, b.get(lane), c, tmp, lane);
}
template <typename ResPacketType, typename AccPacketType>
EIGEN_STRONG_INLINE void acc(const AccPacketType& c, const ResPacketType& alpha, ResPacketType& r) const {
conj_helper<ResPacketType, ResPacketType, ConjLhs, false> cj;
r = cj.pmadd(c, alpha, r);
}
};
#endif
} // namespace internal
} // namespace Eigen
#endif // EIGEN_RVV10_GENERAL_BLOCK_KERNEL_H