Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
53bb222
Add Wasm packed SIMD instruction encodings to JIT emitter
May 8, 2026
21f74bc
Add debug emitter unit tests for Wasm packed SIMD encodings
May 11, 2026
5e5de0f
Merge branch 'main' of github.com:dotnet/runtime into adamperlin/wasm…
May 12, 2026
93d4d06
Add temp emitter unit test block to wasm control flow stack and set o…
May 12, 2026
f4c1844
Use the same instrDesc for v128.const and v128.shuffle
May 13, 2026
917d763
Properly emit branch to skip over emitter unit test instructions, and…
May 14, 2026
1111af1
Remove unneeded include
May 14, 2026
98f0ffe
Potential fix for pull request finding
adamperlin May 14, 2026
bb42076
jit-format
May 20, 2026
b223400
Merge branch 'adamperlin/wasm-simd-encodings' of github.com:adamperli…
May 20, 2026
f1dd491
Merge branch 'main' of github.com:dotnet/runtime into adamperlin/wasm…
May 20, 2026
0f6956a
Fix some feedback
May 20, 2026
c505813
Add comment to skip over emitter unit tests for Wasm
May 20, 2026
12eecab
Make wasm emitter simd unit tests type check valid wasm
May 20, 2026
b5f7f72
Address PR review comments for Wasm SIMD encodings
adamperlin May 20, 2026
58727a4
jit-format
adamperlin May 20, 2026
bab42c8
More feedback
adamperlin May 21, 2026
e8c3a23
Add Wasm emitter bounds checks for SIMD lane indices
adamperlin May 21, 2026
3dbc9a8
Additional review feedback
adamperlin May 21, 2026
35b8b7c
Potential fix for pull request finding
adamperlin May 21, 2026
fbaf6a6
Additional feedback
adamperlin May 21, 2026
4b6201e
More feedback
adamperlin May 21, 2026
6bdc881
Change genDefineTempLabel for Wasm emitter unit tests
adamperlin May 21, 2026
6b6fdf1
Remove static
adamperlin May 22, 2026
017a70f
Additional feedback
adamperlin May 22, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,10 @@ class CodeGen final : public CodeGenInterface
void genAmd64EmitterUnitTestsCTEST();
#endif

#if defined(TARGET_WASM)
void genWasmEmitterUnitTestsSimd();
#endif

#endif // defined(DEBUG)

#ifdef TARGET_ARM64
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/codegeninterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,12 @@ class CodeGenInterface

bool IsEmbeddedBroadcastEnabled(instruction ins, GenTree* op);
#endif // TARGET_XARCH
#if defined(TARGET_WASM)
// On wasm, we store the simd element size in the upper 7 bits of the instruction info.
// The lower bit is reserved as an FP flag.
static constexpr unsigned InstInfoElemSizeShift = 1;
static uint8_t instSimdElemSize(instruction ins);
#endif
Comment thread
adamperlin marked this conversation as resolved.
//-------------------------------------------------------------------------
// Liveness-related fields & methods
public:
Expand Down
15 changes: 15 additions & 0 deletions src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2703,6 +2703,7 @@ void CodeGen::genCodeForSetcc(GenTreeCC* setcc)
* Possible values for JitEmitUnitTestsSections:
* Amd64: all, sse2
* Arm64: all, general, advsimd, sve
* Wasm: all, simd
*/

#if defined(DEBUG)
Expand All @@ -2727,7 +2728,14 @@ void CodeGen::genEmitterUnitTests()

// Jump over the generated tests as they are not intended to be run.
BasicBlock* skipLabel = genCreateTempLabel();
#ifndef TARGET_WASM
inst_JMP(EJ_jmp, skipLabel);
#else
// On Wasm, we skip over the generated emitter test code by nesting it in a block where the
// first instruction branches to the end of the block.
GetEmitter()->emitIns_BlockTy(INS_block);
GetEmitter()->emitIns_J(INS_br, EA_4BYTE, 0, nullptr);
#endif
Comment thread
adamperlin marked this conversation as resolved.

// Add NOPs at the start and end for easier script parsing.
instGen(INS_nop);
Expand Down Expand Up @@ -2777,6 +2785,13 @@ void CodeGen::genEmitterUnitTests()
{
genArm64EmitterUnitTestsPac();
}

#elif defined(TARGET_WASM)
if (unitTestSectionAll || (strstr(unitTestSection, "simd") != nullptr))
{
genWasmEmitterUnitTestsSimd();
}
instGen(INS_end);
#endif

genDefineTempLabel(skipLabel);
Expand Down
232 changes: 232 additions & 0 deletions src/coreclr/jit/codegenwasm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3445,6 +3445,238 @@ void CodeGen::inst_JMP(emitJumpKind jmp, BasicBlock* tgtBlock)
GetEmitter()->emitIns_J(instr, EA_4BYTE, depth, tgtBlock);
}

#if defined(DEBUG)

//------------------------------------------------------------------------
// genWasmEmitterUnitTestsSimd: Exercise the packed SIMD instruction emit
// functions added for Wasm (v128.const, extract/replace lane, shuffle,
// load/store lane, and plain-opcode SIMD instructions).
//
// This is a temporary debug-only test that verifies the encoding paths
// do not assert or crash. Each instruction is emitted with valid stack
// operands so the resulting bytecode is semantically valid Wasm.
//
void CodeGen::genWasmEmitterUnitTestsSimd()
{
emitter* emit = GetEmitter();

// Helper macros to push typed constants, ensuring valid stack state.
// clang-format off
#define PUSH_V128(bytes) emit->emitIns_V128Imm(INS_v128_const, (bytes))
#define PUSH_I32(val) emit->emitIns_I(INS_i32_const, EA_4BYTE, (val))
#define PUSH_I64(val) emit->emitIns_I(INS_i64_const, EA_8BYTE, (val))
#define PUSH_F32(val) emit->emitIns_I(INS_f32_const, EA_4BYTE, (val))
#define PUSH_F64(val) emit->emitIns_I(INS_f64_const, EA_8BYTE, (val))
#define DROP emit->emitIns(INS_drop)

// Unary v128 -> result: push operand, emit instruction, drop result
#define TEST_UNARY_V128(bytes, ins) \
PUSH_V128(bytes); \
emit->emitIns(ins); \
DROP

// Binary v128 x v128 -> v128: push two operands, emit instruction, drop result
#define TEST_BINARY_V128(bytes, ins) \
PUSH_V128(bytes); \
PUSH_V128(bytes); \
emit->emitIns(ins); \
DROP

// Extract lane: v128 -> scalar (i32/i64/f32/f64), then drop
#define TEST_EXTRACT_LANE(bytes, ins, attr, lane) \
PUSH_V128(bytes); \
emit->emitIns_Lane(ins, attr, lane); \
DROP

// Replace lane: [v128, scalar] -> v128, then drop
#define TEST_REPLACE_LANE_I32(bytes, ins, attr, lane) \
PUSH_V128(bytes); \
PUSH_I32(42); \
emit->emitIns_Lane(ins, attr, lane); \
DROP

#define TEST_REPLACE_LANE_I64(bytes, ins, attr, lane) \
PUSH_V128(bytes); \
PUSH_I64(42); \
emit->emitIns_Lane(ins, attr, lane); \
DROP

#define TEST_REPLACE_LANE_F32(bytes, ins, attr, lane) \
PUSH_V128(bytes); \
PUSH_F32(0); \
emit->emitIns_Lane(ins, attr, lane); \
DROP

#define TEST_REPLACE_LANE_F64(bytes, ins, attr, lane) \
PUSH_V128(bytes); \
PUSH_F64(0); \
emit->emitIns_Lane(ins, attr, lane); \
DROP

// Load lane: [i32_addr, v128] -> v128, then drop
#define TEST_LOAD_LANE(bytes, ins, attr, offset, lane) \
PUSH_I32(0); \
PUSH_V128(bytes); \
emit->emitIns_MemargLane(ins, attr, offset, lane); \
DROP

// Store lane: [i32_addr, v128] -> void
#define TEST_STORE_LANE(bytes, ins, attr, offset, lane) \
PUSH_I32(0); \
PUSH_V128(bytes); \
emit->emitIns_MemargLane(ins, attr, offset, lane)

// Shuffle: [v128, v128] -> v128, then drop
#define TEST_SHUFFLE(bytes, shuffleBytes) \
PUSH_V128(bytes); \
PUSH_V128(bytes); \
emit->emitIns_V128Imm(INS_i8x16_shuffle, shuffleBytes); \
DROP
// clang-format on

// --- IF_V128: v128.const with 16 raw bytes ---
const uint8_t v128Bytes[16] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F};
PUSH_V128(v128Bytes);
DROP;

// All-zeros and all-ones constants
const uint8_t v128Zeros[16] = {0};
PUSH_V128(v128Zeros);
DROP;

const uint8_t v128Ones[16] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF};
PUSH_V128(v128Ones);
DROP;

// --- IF_LANE: extract/replace lane instructions ---
// i8x16 lanes (0..15)
TEST_EXTRACT_LANE(v128Ones, INS_i8x16_extract_lane_s, EA_1BYTE, 0);
TEST_EXTRACT_LANE(v128Ones, INS_i8x16_extract_lane_u, EA_1BYTE, 15);
TEST_REPLACE_LANE_I32(v128Ones, INS_i8x16_replace_lane, EA_1BYTE, 7);

// i16x8 lanes (0..7)
TEST_EXTRACT_LANE(v128Ones, INS_i16x8_extract_lane_s, EA_2BYTE, 0);
TEST_EXTRACT_LANE(v128Ones, INS_i16x8_extract_lane_u, EA_2BYTE, 7);
TEST_REPLACE_LANE_I32(v128Ones, INS_i16x8_replace_lane, EA_2BYTE, 3);

// i32x4 lanes (0..3)
TEST_EXTRACT_LANE(v128Ones, INS_i32x4_extract_lane, EA_4BYTE, 0);
TEST_REPLACE_LANE_I32(v128Ones, INS_i32x4_replace_lane, EA_4BYTE, 3);

// i64x2 lanes (0..1)
TEST_EXTRACT_LANE(v128Ones, INS_i64x2_extract_lane, EA_8BYTE, 0);
TEST_REPLACE_LANE_I64(v128Ones, INS_i64x2_replace_lane, EA_8BYTE, 1);

// f32x4 lanes (0..3)
TEST_EXTRACT_LANE(v128Ones, INS_f32x4_extract_lane, EA_4BYTE, 3);
TEST_REPLACE_LANE_F32(v128Ones, INS_f32x4_replace_lane, EA_4BYTE, 0);

// f64x2 lanes (0..1)
TEST_EXTRACT_LANE(v128Ones, INS_f64x2_extract_lane, EA_8BYTE, 0);
TEST_REPLACE_LANE_F64(v128Ones, INS_f64x2_replace_lane, EA_8BYTE, 1);

// --- IF_MEMARG_LANE: load/store lane with memarg ---
TEST_LOAD_LANE(v128Ones, INS_v128_load8_lane, EA_1BYTE, 0, 5);
TEST_LOAD_LANE(v128Ones, INS_v128_load16_lane, EA_2BYTE, 16, 3);
TEST_LOAD_LANE(v128Ones, INS_v128_load32_lane, EA_4BYTE, 64, 2);
TEST_LOAD_LANE(v128Ones, INS_v128_load64_lane, EA_8BYTE, 128, 1);
TEST_STORE_LANE(v128Ones, INS_v128_store8_lane, EA_1BYTE, 0, 0);
TEST_STORE_LANE(v128Ones, INS_v128_store16_lane, EA_2BYTE, 8, 7);
TEST_STORE_LANE(v128Ones, INS_v128_store32_lane, EA_4BYTE, 32, 1);
TEST_STORE_LANE(v128Ones, INS_v128_store64_lane, EA_8BYTE, 256, 0);

// --- IF_V128: i8x16.shuffle with 16 lane-index bytes ---
// Identity shuffle
const uint8_t identityShuffle[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
TEST_SHUFFLE(v128Bytes, identityShuffle);

// Reverse bytes
const uint8_t reverseShuffle[16] = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0};
TEST_SHUFFLE(v128Bytes, reverseShuffle);

// Cross-operand shuffle (indices 16..31 refer to the second operand)
const uint8_t crossShuffle[16] = {0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31};
TEST_SHUFFLE(v128Bytes, crossShuffle);

// --- IF_OPCODE: plain opcode SIMD instructions (representative sample) ---
// Splat operations: push scalar, splat to v128, drop
PUSH_I32(1);
emit->emitIns(INS_i8x16_splat);
DROP;

PUSH_I32(2);
emit->emitIns(INS_i16x8_splat);
DROP;

PUSH_I32(3);
emit->emitIns(INS_i32x4_splat);
DROP;

PUSH_I64(4);
emit->emitIns(INS_i64x2_splat);
DROP;

PUSH_F32(0);
emit->emitIns(INS_f32x4_splat);
DROP;

PUSH_F64(0);
emit->emitIns(INS_f64x2_splat);
DROP;

// Swizzle: [v128, v128] -> v128
TEST_BINARY_V128(v128Ones, INS_i8x16_swizzle);

// A few comparisons: [v128, v128] -> v128
TEST_BINARY_V128(v128Ones, INS_i8x16_eq);
TEST_BINARY_V128(v128Ones, INS_i32x4_ne);
TEST_BINARY_V128(v128Ones, INS_f64x2_lt);

// A few arithmetic ops
TEST_BINARY_V128(v128Ones, INS_i8x16_add);
TEST_BINARY_V128(v128Ones, INS_i32x4_mul);
TEST_UNARY_V128(v128Ones, INS_f32x4_sqrt);
TEST_UNARY_V128(v128Ones, INS_f64x2_neg);

// Bitwise ops
TEST_UNARY_V128(v128Ones, INS_v128_not);
TEST_BINARY_V128(v128Ones, INS_v128_and);
TEST_BINARY_V128(v128Ones, INS_v128_or);
TEST_BINARY_V128(v128Ones, INS_v128_xor);
TEST_BINARY_V128(v128Ones, INS_v128_andnot);

// Bitmask / any_true / all_true: v128 -> i32
TEST_UNARY_V128(v128Ones, INS_v128_any_true);
TEST_UNARY_V128(v128Ones, INS_i8x16_all_true);
TEST_UNARY_V128(v128Ones, INS_i32x4_bitmask);

// Conversion operations: v128 -> v128
TEST_UNARY_V128(v128Ones, INS_f32x4_convert_s_i32x4);
TEST_UNARY_V128(v128Ones, INS_f64x2_convert_low_u_i32x4);
TEST_UNARY_V128(v128Ones, INS_i32x4_trunc_sat_s_f32x4);

#undef PUSH_V128
#undef PUSH_I32
#undef PUSH_I64
#undef PUSH_F32
#undef PUSH_F64
#undef DROP
#undef TEST_UNARY_V128
#undef TEST_BINARY_V128
#undef TEST_EXTRACT_LANE
Comment thread
adamperlin marked this conversation as resolved.
#undef TEST_REPLACE_LANE_I32
#undef TEST_REPLACE_LANE_I64
#undef TEST_REPLACE_LANE_F32
#undef TEST_REPLACE_LANE_F64
#undef TEST_LOAD_LANE
#undef TEST_STORE_LANE
#undef TEST_SHUFFLE
}

#endif // defined(DEBUG)

void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* code))
{
IAllocator* allowZeroAlloc = new (m_compiler, CMK_GC) CompIAllocator(m_compiler->getAllocatorGC());
Expand Down
49 changes: 49 additions & 0 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -689,6 +689,9 @@ class emitter
// TODO-LoongArch64: not include SIMD-vector.
static_assert(INS_count <= 512);
instruction _idIns : 9;
#elif defined(TARGET_WASM)
static_assert(INS_count <= 512);
instruction _idIns : 9;
#else
static_assert(INS_count <= 256);
instruction _idIns : 8;
Expand Down Expand Up @@ -1321,6 +1324,17 @@ class emitter
{
return _idInsFmt == IF_TRY_TABLE;
}

bool idIsV128Imm() const
{
return _idInsFmt == IF_V128;
}

bool idIsMemargLaneImm() const
{
return _idInsFmt == IF_MEMARG_LANE;
}

#endif

#ifdef TARGET_ARM64
Expand Down Expand Up @@ -2414,6 +2428,41 @@ class emitter
imm = i;
}
};

struct instrDescV128Imm : instrDesc
{
instrDescV128Imm() = delete;

uint8_t v128Bytes[16];

void idV128Const(const uint8_t bytes[16])
{
assert(bytes != nullptr);
memcpy(v128Bytes, bytes, 16);
}

const uint8_t* idV128Const() const
{
return v128Bytes;
}
};

struct instrDescMemargLane : instrDescCns
{
instrDescMemargLane() = delete;

uint8_t lane;

void idLaneIdx(uint8_t idx)
{
lane = idx;
}

uint8_t idLaneIdx() const
{
return lane;
}
};
#endif // TARGET_WASM

#ifdef TARGET_RISCV64
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/emitfmtswasm.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ IF_DEF(CALL_INDIRECT, IS_NONE, NONE) // <opcode> <ULEB128 immediate> <ULEB128 im
IF_DEF(MEMIDX_MEMIDX, IS_NONE, NONE) // <memory index> <memory index>
IF_DEF(TRY_TABLE, IS_NONE, NONE) // <opcode> <sig = 0x40> <len = 0x01>
IF_DEF(CATCH_DECL, IS_NONE, NONE) // <catch-type> <ULEB128 immediate (type reloc)> <ULEB128 immediate>
IF_DEF(V128, IS_NONE, NONE) // <opcode> <16 raw bytes>
IF_DEF(LANE, IS_NONE, NONE) // <opcode> <u8 lane index>
IF_DEF(MEMARG_LANE, IS_NONE, NONE) // <opcode> <memarg> <u8 lane index>

#undef IF_DEF
#endif // !DEFINE_ID_OPS
Expand Down
Loading
Loading