Skip to content

Implement SVE2 BitwiseSelect, BitwiseSelectLeftInverted, BitwiseSelectRightInverted #115775

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2661,13 +2661,27 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
case NI_Sve2_BitwiseClearXor:
if (targetReg != op1Reg)
{
assert(targetReg != op2Reg);
assert(targetReg != op2Reg && targetReg != op3Reg);
GetEmitter()->emitInsSve_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
}
// Always use the lane size D. It's a bitwise operation so this is fine for all integer vector types.
GetEmitter()->emitInsSve_R_R_R(ins, emitSize, targetReg, op2Reg, op3Reg, INS_OPTS_SCALABLE_D);
break;

case NI_Sve2_BitwiseSelect:
case NI_Sve2_BitwiseSelectLeftInverted:
case NI_Sve2_BitwiseSelectRightInverted:
// op1: select, op2: left, op3: right
// Operation is destructive on the 'left' operand.
if (targetReg != op2Reg)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be op1Reg because 1st operand has RMW semantics, not 2nd.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kunalspathak this didn't need addressing because the logic was already correct, right?

The instruction is BSL <Zdn>.D, <Zdn>.D, <Zm>.D, <Zk>.D and the operation is:

CheckSVEEnabled();
constant integer VL = CurrentVL;
bits(VL) operand1 = Z[dn, VL];
bits(VL) operand2 = Z[m, VL];
bits(VL) operand3 = Z[k, VL];

Z[dn, VL] = (operand1 AND operand3) OR (operand2 AND NOT(operand3));

So we end up emitting mov tgt, op2; BSL tgt, op3, op1

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right, it was already resolved in #115775 (comment)

{
assert(targetReg != op3Reg && targetReg != op1Reg);
GetEmitter()->emitInsSve_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op2Reg);
}
// Always use the lane size D. It's a bitwise operation so this is fine for all integer vector types.
GetEmitter()->emitInsSve_R_R_R(ins, emitSize, targetReg, op3Reg, op1Reg, INS_OPTS_SCALABLE_D);
break;

default:
unreached();
}
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,9 @@ HARDWARE_INTRINSIC(Sve, ZipLow,
// SVE2 Intrinsics
#define FIRST_NI_Sve2 NI_Sve2_BitwiseClearXor
HARDWARE_INTRINSIC(Sve2, BitwiseClearXor, -1, 3, {INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve2, BitwiseSelect, -1, 3, {INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve2, BitwiseSelectLeftInverted, -1, 3, {INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve2, BitwiseSelectRightInverted, -1, 3, {INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_sve_bsl2n, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve2, ShiftLeftAndInsert, -1, 3, {INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_sve_sli, INS_invalid, INS_invalid}, HW_Category_ShiftLeftByImmediate, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics)
#define LAST_NI_Sve2 NI_Sve2_ShiftLeftAndInsert

Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2290,6 +2290,9 @@ GenTree* LinearScan::getDelayFreeOperand(GenTreeHWIntrinsic* intrinsicTree, bool
break;

case NI_Sve_CreateBreakPropagateMask:
case NI_Sve2_BitwiseSelect:
case NI_Sve2_BitwiseSelectLeftInverted:
case NI_Sve2_BitwiseSelectRightInverted:
// RMW operates on the second op.
assert(isRMW);
delayFreeOp = intrinsicTree->Op(2);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,159 @@ internal Arm64() { }
/// </summary>
public static unsafe Vector<ulong> BitwiseClearXor(Vector<ulong> xor, Vector<ulong> value, Vector<ulong> mask) { throw new PlatformNotSupportedException(); }


// Bitwise select

/// <summary>
/// svuint8_t svbsl[_u8](svuint8_t op1, svuint8_t op2, svuint8_t op3)
/// BSL Ztied1.D, Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<byte> BitwiseSelect(Vector<byte> select, Vector<byte> left, Vector<byte> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint16_t svbsl[_s16](svint16_t op1, svint16_t op2, svint16_t op3)
/// BSL Ztied1.D, Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<short> BitwiseSelect(Vector<short> select, Vector<short> left, Vector<short> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svbsl[_s32](svint32_t op1, svint32_t op2, svint32_t op3)
/// BSL Ztied1.D, Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<int> BitwiseSelect(Vector<int> select, Vector<int> left, Vector<int> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svbsl[_s64](svint64_t op1, svint64_t op2, svint64_t op3)
/// BSL Ztied1.D, Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<long> BitwiseSelect(Vector<long> select, Vector<long> left, Vector<long> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint8_t svbsl[_s8](svint8_t op1, svint8_t op2, svint8_t op3)
/// BSL Ztied1.D, Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<sbyte> BitwiseSelect(Vector<sbyte> select, Vector<sbyte> left, Vector<sbyte> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint16_t svbsl[_u16](svuint16_t op1, svuint16_t op2, svuint16_t op3)
/// BSL Ztied1.D, Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<ushort> BitwiseSelect(Vector<ushort> select, Vector<ushort> left, Vector<ushort> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svbsl[_u32](svuint32_t op1, svuint32_t op2, svuint32_t op3)
/// BSL Ztied1.D, Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<uint> BitwiseSelect(Vector<uint> select, Vector<uint> left, Vector<uint> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svbsl[_u64](svuint64_t op1, svuint64_t op2, svuint64_t op3)
/// BSL Ztied1.D, Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<ulong> BitwiseSelect(Vector<ulong> select, Vector<ulong> left, Vector<ulong> right) { throw new PlatformNotSupportedException(); }


// Bitwise select with first input inverted

/// <summary>
/// svuint8_t svbsl1n[_u8](svuint8_t op1, svuint8_t op2, svuint8_t op3)
/// BSL1N Ztied1.D, Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<byte> BitwiseSelectLeftInverted(Vector<byte> select, Vector<byte> left, Vector<byte> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint16_t svbsl1n[_s16](svint16_t op1, svint16_t op2, svint16_t op3)
/// BSL1N Ztied1.D, Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<short> BitwiseSelectLeftInverted(Vector<short> select, Vector<short> left, Vector<short> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svbsl1n[_s32](svint32_t op1, svint32_t op2, svint32_t op3)
/// BSL1N Ztied1.D, Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<int> BitwiseSelectLeftInverted(Vector<int> select, Vector<int> left, Vector<int> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svbsl1n[_s64](svint64_t op1, svint64_t op2, svint64_t op3)
/// BSL1N Ztied1.D, Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<long> BitwiseSelectLeftInverted(Vector<long> select, Vector<long> left, Vector<long> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint8_t svbsl1n[_s8](svint8_t op1, svint8_t op2, svint8_t op3)
/// BSL1N Ztied1.D, Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<sbyte> BitwiseSelectLeftInverted(Vector<sbyte> select, Vector<sbyte> left, Vector<sbyte> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint16_t svbsl1n[_u16](svuint16_t op1, svuint16_t op2, svuint16_t op3)
/// BSL1N Ztied1.D, Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<ushort> BitwiseSelectLeftInverted(Vector<ushort> select, Vector<ushort> left, Vector<ushort> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svbsl1n[_u32](svuint32_t op1, svuint32_t op2, svuint32_t op3)
/// BSL1N Ztied1.D, Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<uint> BitwiseSelectLeftInverted(Vector<uint> select, Vector<uint> left, Vector<uint> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svbsl1n[_u64](svuint64_t op1, svuint64_t op2, svuint64_t op3)
/// BSL1N Ztied1.D, Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<ulong> BitwiseSelectLeftInverted(Vector<ulong> select, Vector<ulong> left, Vector<ulong> right) { throw new PlatformNotSupportedException(); }


// Bitwise select with second input inverted

/// <summary>
/// svuint8_t svbsl2n[_u8](svuint8_t op1, svuint8_t op2, svuint8_t op3)
/// BSL2N Ztied1.D, Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<byte> BitwiseSelectRightInverted(Vector<byte> select, Vector<byte> left, Vector<byte> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint16_t svbsl2n[_s16](svint16_t op1, svint16_t op2, svint16_t op3)
/// BSL2N Ztied1.D, Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<short> BitwiseSelectRightInverted(Vector<short> select, Vector<short> left, Vector<short> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svbsl2n[_s32](svint32_t op1, svint32_t op2, svint32_t op3)
/// BSL2N Ztied1.D, Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<int> BitwiseSelectRightInverted(Vector<int> select, Vector<int> left, Vector<int> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svbsl2n[_s64](svint64_t op1, svint64_t op2, svint64_t op3)
/// BSL2N Ztied1.D, Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<long> BitwiseSelectRightInverted(Vector<long> select, Vector<long> left, Vector<long> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint8_t svbsl2n[_s8](svint8_t op1, svint8_t op2, svint8_t op3)
/// BSL2N Ztied1.D, Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<sbyte> BitwiseSelectRightInverted(Vector<sbyte> select, Vector<sbyte> left, Vector<sbyte> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint16_t svbsl2n[_u16](svuint16_t op1, svuint16_t op2, svuint16_t op3)
/// BSL2N Ztied1.D, Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<ushort> BitwiseSelectRightInverted(Vector<ushort> select, Vector<ushort> left, Vector<ushort> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svbsl2n[_u32](svuint32_t op1, svuint32_t op2, svuint32_t op3)
/// BSL2N Ztied1.D, Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<uint> BitwiseSelectRightInverted(Vector<uint> select, Vector<uint> left, Vector<uint> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svbsl2n[_u64](svuint64_t op1, svuint64_t op2, svuint64_t op3)
/// BSL2N Ztied1.D, Ztied1.D, Zop2.D, Zop3.D
/// </summary>
public static unsafe Vector<ulong> BitwiseSelectRightInverted(Vector<ulong> select, Vector<ulong> left, Vector<ulong> right) { throw new PlatformNotSupportedException(); }

// Shift left and insert

/// <summary>
Expand Down
Loading
Loading