Avoid promotion of Arm __fp16 to float in Neon PacketMath
Using overloaded arithmetic operators for Arm __fp16 always
causes a promotion to float. We replace operator* by vmulh_f16
to avoid this.
diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h
index a51fc88..30edd70 100644
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -4355,7 +4355,7 @@
prod = vmul_f16(prod, vrev64_f16(prod));
Eigen::half h;
- h.x = vget_lane_f16(prod, 0) * vget_lane_f16(prod, 1);
+ h.x = vmulh_f16(vget_lane_f16(prod, 0), vget_lane_f16(prod, 1));
return h;
}
@@ -4364,7 +4364,7 @@
float16x4_t prod;
prod = vmul_f16(a, vrev64_f16(a));
Eigen::half h;
- h.x = vget_lane_f16(prod, 0) * vget_lane_f16(prod, 1);
+ h.x = vmulh_f16(vget_lane_f16(prod, 0), vget_lane_f16(prod, 1));
return h;
}