diff --git a/cranelift/codegen/meta/src/isa/riscv64.rs b/cranelift/codegen/meta/src/isa/riscv64.rs index 624fa16da52a..d163bb6241c7 100644 --- a/cranelift/codegen/meta/src/isa/riscv64.rs +++ b/cranelift/codegen/meta/src/isa/riscv64.rs @@ -154,6 +154,13 @@ pub(crate) fn define() -> TargetIsa { false, ); + let _has_zvbb = setting.add_bool( + "has_zvbb", + "has extension Zvbb?", + "Zvbb: Vector Basic Bit-manipulation", + false, + ); + let _has_zicsr = setting.add_bool( "has_zicsr", "has extension zicsr?", diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index 034485209d3c..d9b274a17c39 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -1039,6 +1039,9 @@ (decl pure has_zicond () bool) (extern constructor has_zicond has_zicond) +(decl pure has_zvbb () bool) +(extern constructor has_zvbb has_zvbb) + ;;;; Type Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/codegen/src/isa/riscv64/inst/vector.rs b/cranelift/codegen/src/isa/riscv64/inst/vector.rs index bc803939f172..00489b32be5c 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/vector.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/vector.rs @@ -451,6 +451,10 @@ impl VecAluOpRRR { | VecAluOpRRR::VmnandMM => 0b011101, VecAluOpRRR::VmsgtuVX | VecAluOpRRR::VmnorMM => 0b011110, VecAluOpRRR::VmsgtVX | VecAluOpRRR::VmfgeVF => 0b011111, + // Zvbb + VecAluOpRRR::VandnVV | VecAluOpRRR::VandnVX => 0b000001, + VecAluOpRRR::VrorVV | VecAluOpRRR::VrorVX => 0b010100, + VecAluOpRRR::VrolVV | VecAluOpRRR::VrolVX => 0b010101, } } @@ -480,7 +484,10 @@ impl VecAluOpRRR { | VecAluOpRRR::VmsltuVV | VecAluOpRRR::VmsltVV | VecAluOpRRR::VmsleuVV - | VecAluOpRRR::VmsleVV => VecOpCategory::OPIVV, + | VecAluOpRRR::VmsleVV + | VecAluOpRRR::VandnVV + | VecAluOpRRR::VrorVV + | VecAluOpRRR::VrolVV => VecOpCategory::OPIVV, VecAluOpRRR::VwaddVV | VecAluOpRRR::VwaddWV | VecAluOpRRR::VwadduVV @@ -538,7 +545,10 @@ impl VecAluOpRRR { | VecAluOpRRR::VmsleuVX | VecAluOpRRR::VmsleVX | VecAluOpRRR::VmsgtuVX - | VecAluOpRRR::VmsgtVX => VecOpCategory::OPIVX, + | VecAluOpRRR::VmsgtVX + | VecAluOpRRR::VandnVX + | VecAluOpRRR::VrorVX + | VecAluOpRRR::VrolVX => VecOpCategory::OPIVX, VecAluOpRRR::VfaddVV | VecAluOpRRR::VfsubVV | VecAluOpRRR::VfmulVV @@ -687,6 +697,8 @@ impl VecAluOpRRImm5 { VecAluOpRRImm5::VmsleVI => 0b011101, VecAluOpRRImm5::VmsgtuVI => 0b011110, VecAluOpRRImm5::VmsgtVI => 0b011111, + // Zvbb + VecAluOpRRImm5::VrorVI => 0b010100, } } @@ -714,7 +726,8 @@ impl VecAluOpRRImm5 { | VecAluOpRRImm5::VmsleuVI | VecAluOpRRImm5::VmsleVI | VecAluOpRRImm5::VmsgtuVI - | VecAluOpRRImm5::VmsgtVI => VecOpCategory::OPIVI, + | VecAluOpRRImm5::VmsgtVI + | VecAluOpRRImm5::VrorVI => VecOpCategory::OPIVI, } } @@ -728,7 +741,8 @@ impl VecAluOpRRImm5 { | VecAluOpRRImm5::VrgatherVI | VecAluOpRRImm5::VmvrV | VecAluOpRRImm5::VnclipWI - | VecAluOpRRImm5::VnclipuWI => true, + | VecAluOpRRImm5::VnclipuWI + | VecAluOpRRImm5::VrorVI => true, VecAluOpRRImm5::VaddVI | VecAluOpRRImm5::VrsubVI | VecAluOpRRImm5::VandVI @@ -815,6 +829,13 @@ impl VecAluOpRR { | VecAluOpRR::VfcvtfxV | VecAluOpRR::VfwcvtffV | VecAluOpRR::VfncvtffW => 0b010010, + // Zvbb + VecAluOpRR::VbrevV + | VecAluOpRR::Vbrev8V + | VecAluOpRR::Vrev8V + | VecAluOpRR::VclzV + | VecAluOpRR::VctzV + | VecAluOpRR::VcpopV => 0b010010, } } @@ -827,7 +848,13 @@ impl VecAluOpRR { | VecAluOpRR::VzextVF8 | VecAluOpRR::VsextVF2 | VecAluOpRR::VsextVF4 - | VecAluOpRR::VsextVF8 => VecOpCategory::OPMVV, + | VecAluOpRR::VsextVF8 + | VecAluOpRR::VbrevV + | VecAluOpRR::Vbrev8V + | VecAluOpRR::Vrev8V + | VecAluOpRR::VclzV + | VecAluOpRR::VctzV + | VecAluOpRR::VcpopV => VecOpCategory::OPMVV, VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => VecOpCategory::OPFVF, VecAluOpRR::VfmvFS | VecAluOpRR::VfsqrtV @@ -879,6 +906,13 @@ impl VecAluOpRR { // These don't have a explicit encoding table, but Section 11.16 Vector Integer Move Instruction states: // > The first operand specifier (vs2) must contain v0, and any other vector register number in vs2 is reserved. VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0, + // Zvbb VXUNARY0 + VecAluOpRR::Vbrev8V => 0b01000, + VecAluOpRR::Vrev8V => 0b01001, + VecAluOpRR::VbrevV => 0b01010, + VecAluOpRR::VclzV => 0b01100, + VecAluOpRR::VctzV => 0b01101, + VecAluOpRR::VcpopV => 0b01110, } } @@ -903,7 +937,13 @@ impl VecAluOpRR { | VecAluOpRR::VfcvtfxuV | VecAluOpRR::VfcvtfxV | VecAluOpRR::VfwcvtffV - | VecAluOpRR::VfncvtffW => true, + | VecAluOpRR::VfncvtffW + | VecAluOpRR::VbrevV + | VecAluOpRR::Vbrev8V + | VecAluOpRR::Vrev8V + | VecAluOpRR::VclzV + | VecAluOpRR::VctzV + | VecAluOpRR::VcpopV => true, VecAluOpRR::VmvSX | VecAluOpRR::VfmvSF | VecAluOpRR::VmvVV @@ -933,7 +973,13 @@ impl VecAluOpRR { | VecAluOpRR::VfcvtfxuV | VecAluOpRR::VfcvtfxV | VecAluOpRR::VfwcvtffV - | VecAluOpRR::VfncvtffW => RegClass::Vector, + | VecAluOpRR::VfncvtffW + | VecAluOpRR::VbrevV + | VecAluOpRR::Vbrev8V + | VecAluOpRR::Vrev8V + | VecAluOpRR::VclzV + | VecAluOpRR::VctzV + | VecAluOpRR::VcpopV => RegClass::Vector, VecAluOpRR::VmvXS => RegClass::Int, VecAluOpRR::VfmvFS => RegClass::Float, } @@ -958,7 +1004,13 @@ impl VecAluOpRR { | VecAluOpRR::VfcvtfxuV | VecAluOpRR::VfcvtfxV | VecAluOpRR::VfwcvtffV - | VecAluOpRR::VfncvtffW => RegClass::Vector, + | VecAluOpRR::VfncvtffW + | VecAluOpRR::VbrevV + | VecAluOpRR::Vbrev8V + | VecAluOpRR::Vrev8V + | VecAluOpRR::VclzV + | VecAluOpRR::VctzV + | VecAluOpRR::VcpopV => RegClass::Vector, VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => RegClass::Float, VecAluOpRR::VmvSX | VecAluOpRR::VmvVX => RegClass::Int, } @@ -1006,6 +1058,12 @@ impl fmt::Display for VecAluOpRR { VecAluOpRR::VfcvtfxV => "vfcvt.f.x.v", VecAluOpRR::VfwcvtffV => "vfwcvt.f.f.v", VecAluOpRR::VfncvtffW => "vfncvt.f.f.w", + VecAluOpRR::VbrevV => "vbrev.v", + VecAluOpRR::Vbrev8V => "vbrev8.v", + VecAluOpRR::Vrev8V => "vrev8.v", + VecAluOpRR::VclzV => "vclz.v", + VecAluOpRR::VctzV => "vctz.v", + VecAluOpRR::VcpopV => "vcpop.v", }) } } diff --git a/cranelift/codegen/src/isa/riscv64/inst_vector.isle b/cranelift/codegen/src/isa/riscv64/inst_vector.isle index da13fe08eac2..4ed04a81bf83 100644 --- a/cranelift/codegen/src/isa/riscv64/inst_vector.isle +++ b/cranelift/codegen/src/isa/riscv64/inst_vector.isle @@ -203,6 +203,14 @@ (VmfleVF) (VmfgtVF) (VmfgeVF) + + ;; Zvbb: Vector Basic Bit-manipulation + (VandnVV) + (VandnVX) + (VrolVV) + (VrolVX) + (VrorVV) + (VrorVX) )) @@ -260,6 +268,9 @@ (VmsleVI) (VmsgtuVI) (VmsgtVI) + + ;; Zvbb: Vector Basic Bit-manipulation + (VrorVI) )) ;; Imm only ALU Ops @@ -294,6 +305,14 @@ (VfcvtfxV) (VfwcvtffV) (VfncvtffW) + + ;; Zvbb: Vector Basic Bit-manipulation + (VbrevV) + (Vbrev8V) + (Vrev8V) + (VclzV) + (VctzV) + (VcpopV) )) ;; Returns the canonical destination type for a VecAluOpRRImm5. @@ -803,6 +822,60 @@ (rule (rv_vxor_vi vs2 imm mask vstate) (vec_alu_rr_imm5 (VecAluOpRRImm5.VxorVI) vs2 imm mask vstate)) +;;;; Zvbb: Vector Basic Bit-manipulation ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl rv_vandn_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vandn_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VandnVV) vs2 vs1 mask vstate)) + +(decl rv_vandn_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vandn_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VandnVX) vs2 vs1 mask vstate)) + +(decl rv_vrol_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vrol_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VrolVV) vs2 vs1 mask vstate)) + +(decl rv_vrol_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vrol_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VrolVX) vs2 vs1 mask vstate)) + +(decl rv_vror_vv (VReg VReg VecOpMasking VState) VReg) +(rule (rv_vror_vv vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VrorVV) vs2 vs1 mask vstate)) + +(decl rv_vror_vx (VReg XReg VecOpMasking VState) VReg) +(rule (rv_vror_vx vs2 vs1 mask vstate) + (vec_alu_rrr (VecAluOpRRR.VrorVX) vs2 vs1 mask vstate)) + +(decl rv_vror_vi (VReg Imm5 VecOpMasking VState) VReg) +(rule (rv_vror_vi vs2 imm mask vstate) + (vec_alu_rr_imm5 (VecAluOpRRImm5.VrorVI) vs2 imm mask vstate)) + +(decl rv_vbrev_v (VReg VecOpMasking VState) VReg) +(rule (rv_vbrev_v vs mask vstate) + (vec_alu_rr (VecAluOpRR.VbrevV) vs mask vstate)) + +(decl rv_vbrev8_v (VReg VecOpMasking VState) VReg) +(rule (rv_vbrev8_v vs mask vstate) + (vec_alu_rr (VecAluOpRR.Vbrev8V) vs mask vstate)) + +(decl rv_vrev8_v (VReg VecOpMasking VState) VReg) +(rule (rv_vrev8_v vs mask vstate) + (vec_alu_rr (VecAluOpRR.Vrev8V) vs mask vstate)) + +(decl rv_vclz_v (VReg VecOpMasking VState) VReg) +(rule (rv_vclz_v vs mask vstate) + (vec_alu_rr (VecAluOpRR.VclzV) vs mask vstate)) + +(decl rv_vctz_v (VReg VecOpMasking VState) VReg) +(rule (rv_vctz_v vs mask vstate) + (vec_alu_rr (VecAluOpRR.VctzV) vs mask vstate)) + +(decl rv_vcpop_v (VReg VecOpMasking VState) VReg) +(rule (rv_vcpop_v vs mask vstate) + (vec_alu_rr (VecAluOpRR.VcpopV) vs mask vstate)) + ;; Helper for emitting the `vssrl.vi` instruction. ;; ;; vd[i] = (unsigned(vs2[i]) >> imm) + r diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 259c13216298..0edfb93f4b58 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -765,6 +765,25 @@ (high XReg (rv_andn (value_regs_get x 1) (value_regs_get y 1)))) (value_regs low high))) +;; Zvbb: band(x, bnot(y)) -> vandn.vv +(rule 18 (lower (band (ty_supported_vec ty) x (bnot _ y))) + (if-let true (has_zvbb)) + (rv_vandn_vv x y (unmasked) ty)) + +(rule 19 (lower (band (ty_supported_vec ty) (bnot _ y) x)) + (if-let true (has_zvbb)) + (rv_vandn_vv x y (unmasked) ty)) + +(rule 20 (lower (band (ty_supported_vec ty) x (bnot _ (splat _ y)))) + (if-let true (has_zvbb)) + (if (ty_vector_not_float ty)) + (rv_vandn_vx x y (unmasked) ty)) + +(rule 21 (lower (band (ty_supported_vec ty) (bnot _ (splat _ y)) x)) + (if-let true (has_zvbb)) + (if (ty_vector_not_float ty)) + (rv_vandn_vx x y (unmasked) ty)) + (rule 9 (lower (band (ty_supported_vec ty) x y)) (rv_vand_vv x y (unmasked) ty)) @@ -1107,6 +1126,7 @@ (rv_rev8 x)) ;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + (rule (lower (ctz (fits_in_64 ty) x)) (lower_ctz ty x)) @@ -1122,6 +1142,7 @@ (value_regs result (imm $I64 0)))) ;;;; Rules for `clz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + (rule 0 (lower (clz (fits_in_64 ty) x)) (gen_cltz true x ty)) @@ -1249,6 +1270,11 @@ (result XReg (rv_add low high))) (value_regs result (imm $I64 0)))) +;; Zvbb: vcpop.v +(rule 5 (lower (popcnt (ty_supported_vec ty) x)) + (if-let true (has_zvbb)) + (rv_vcpop_v x (unmasked) ty)) + ;; Popcount using multiply. ;; This is popcount64c() from ;; http://en.wikipedia.org/wiki/Hamming_weight @@ -1470,6 +1496,12 @@ ;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Zvbb: vrol.vx (rotl on vector with scalar amount) +(rule 5 (lower (rotl (ty_supported_vec ty) x y)) + (if-let true (has_zvbb)) + (if (ty_vector_not_float ty)) + (rv_vrol_vx x y (unmasked) ty)) + (rule 0 (lower (rotl (fits_in_64 ty) rs amount)) (let ((rs XReg (zext rs)) @@ -1526,6 +1558,12 @@ ;;;; Rules for `rotr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Zvbb: vror.vx (rotr on vector with scalar amount) +(rule 5 (lower (rotr (ty_supported_vec ty) x y)) + (if-let true (has_zvbb)) + (if (ty_vector_not_float ty)) + (rv_vror_vx x y (unmasked) ty)) + (rule (lower (rotr (fits_in_64 ty) rs amount)) (let ((rs XReg (zext rs)) diff --git a/cranelift/codegen/src/isa/riscv64/lower/isle.rs b/cranelift/codegen/src/isa/riscv64/lower/isle.rs index a34f555bcf76..4154c9c82dcc 100644 --- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs +++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs @@ -525,6 +525,10 @@ impl generated_code::Context for RV64IsleContext<'_, '_, MInst, Riscv64Backend> self.backend.isa_flags.has_zicond() } + fn has_zvbb(&mut self) -> bool { + self.backend.isa_flags.has_zvbb() + } + fn gen_reg_offset_amode(&mut self, base: Reg, offset: i64) -> AMode { AMode::RegOffset(base, offset) } diff --git a/cranelift/filetests/filetests/isa/riscv64/zvbb.clif b/cranelift/filetests/filetests/isa/riscv64/zvbb.clif new file mode 100644 index 000000000000..de5c8a8396c5 --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/zvbb.clif @@ -0,0 +1,364 @@ +test compile precise-output +set enable_multi_ret_implicit_sret +set unwind_info=false +target riscv64 has_v has_zvbb + +function %vandn_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = bnot v1 + v3 = band v0, v2 + return v3 +} + +; VCode: +; addi sp,sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v8,-32(incoming_arg) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v9,-16(incoming_arg) #avl=16, #vtype=(e8, m1, ta, ma) +; vandn.vv v8,v8,v9 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; addi sp,sp,16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; mv s0, sp +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, sp, 0x10 +; .byte 0x07, 0x84, 0x0f, 0x02 +; addi t6, sp, 0x20 +; .byte 0x87, 0x84, 0x0f, 0x02 +; .byte 0x57, 0x84, 0x84, 0x06 +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %vandn_i32x4(i32x4, i32x4) -> i32x4 { +block0(v0: i32x4, v1: i32x4): + v2 = bnot v1 + v3 = band v0, v2 + return v3 +} + +; VCode: +; addi sp,sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v8,-32(incoming_arg) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v9,-16(incoming_arg) #avl=16, #vtype=(e8, m1, ta, ma) +; vandn.vv v8,v8,v9 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; addi sp,sp,16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; mv s0, sp +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, sp, 0x10 +; .byte 0x07, 0x84, 0x0f, 0x02 +; addi t6, sp, 0x20 +; .byte 0x87, 0x84, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x84, 0x84, 0x06 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %vandn_i64x2(i64x2, i64x2) -> i64x2 { +block0(v0: i64x2, v1: i64x2): + v2 = bnot v1 + v3 = band v0, v2 + return v3 +} + +; VCode: +; addi sp,sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v8,-32(incoming_arg) #avl=16, #vtype=(e8, m1, ta, ma) +; vle8.v v9,-16(incoming_arg) #avl=16, #vtype=(e8, m1, ta, ma) +; vandn.vv v8,v8,v9 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; addi sp,sp,16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; mv s0, sp +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, sp, 0x10 +; .byte 0x07, 0x84, 0x0f, 0x02 +; addi t6, sp, 0x20 +; .byte 0x87, 0x84, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0x84, 0x84, 0x06 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %vcpop_i8x16(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = popcnt v0 + return v1 +} + +; VCode: +; addi sp,sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v8,-16(incoming_arg) #avl=16, #vtype=(e8, m1, ta, ma) +; vcpop.v v8,v8 #avl=16, #vtype=(e8, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; addi sp,sp,16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; mv s0, sp +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, sp, 0x10 +; .byte 0x07, 0x84, 0x0f, 0x02 +; .byte 0x57, 0x24, 0x87, 0x4a +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %vcpop_i32x4(i32x4) -> i32x4 { +block0(v0: i32x4): + v1 = popcnt v0 + return v1 +} + +; VCode: +; addi sp,sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v8,-16(incoming_arg) #avl=16, #vtype=(e8, m1, ta, ma) +; vcpop.v v8,v8 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; addi sp,sp,16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; mv s0, sp +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, sp, 0x10 +; .byte 0x07, 0x84, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0x24, 0x87, 0x4a +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %vrol_i32x4(i32x4, i32) -> i32x4 { +block0(v0: i32x4, v1: i32): + v2 = rotl v0, v1 + return v2 +} + +; VCode: +; addi sp,sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v8,-16(incoming_arg) #avl=16, #vtype=(e8, m1, ta, ma) +; vrol.vx v8,v8,a1 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; addi sp,sp,16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; mv s0, sp +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, sp, 0x10 +; .byte 0x07, 0x84, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0xc4, 0x85, 0x56 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %vrol_i64x2(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = rotl v0, v1 + return v2 +} + +; VCode: +; addi sp,sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v8,-16(incoming_arg) #avl=16, #vtype=(e8, m1, ta, ma) +; vrol.vx v8,v8,a1 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; addi sp,sp,16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; mv s0, sp +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, sp, 0x10 +; .byte 0x07, 0x84, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0xc4, 0x85, 0x56 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %vror_i32x4(i32x4, i32) -> i32x4 { +block0(v0: i32x4, v1: i32): + v2 = rotr v0, v1 + return v2 +} + +; VCode: +; addi sp,sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v8,-16(incoming_arg) #avl=16, #vtype=(e8, m1, ta, ma) +; vror.vx v8,v8,a1 #avl=4, #vtype=(e32, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; addi sp,sp,16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; mv s0, sp +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, sp, 0x10 +; .byte 0x07, 0x84, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x02, 0xcd +; .byte 0x57, 0xc4, 0x85, 0x52 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + +function %vror_i64x2(i64x2, i64) -> i64x2 { +block0(v0: i64x2, v1: i64): + v2 = rotr v0, v1 + return v2 +} + +; VCode: +; addi sp,sp,-16 +; sd ra,8(sp) +; sd fp,0(sp) +; mv fp,sp +; block0: +; vle8.v v8,-16(incoming_arg) #avl=16, #vtype=(e8, m1, ta, ma) +; vror.vx v8,v8,a1 #avl=2, #vtype=(e64, m1, ta, ma) +; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma) +; ld ra,8(sp) +; ld fp,0(sp) +; addi sp,sp,16 +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; addi sp, sp, -0x10 +; sd ra, 8(sp) +; sd s0, 0(sp) +; mv s0, sp +; block1: ; offset 0x10 +; .byte 0x57, 0x70, 0x08, 0xcc +; addi t6, sp, 0x10 +; .byte 0x07, 0x84, 0x0f, 0x02 +; .byte 0x57, 0x70, 0x81, 0xcd +; .byte 0x57, 0xc4, 0x85, 0x52 +; .byte 0x57, 0x70, 0x08, 0xcc +; .byte 0x27, 0x04, 0x05, 0x02 +; ld ra, 8(sp) +; ld s0, 0(sp) +; addi sp, sp, 0x10 +; ret + diff --git a/cranelift/filetests/filetests/runtests/simd-bitops-zvbb.clif b/cranelift/filetests/filetests/runtests/simd-bitops-zvbb.clif new file mode 100644 index 000000000000..3533161bde54 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/simd-bitops-zvbb.clif @@ -0,0 +1,40 @@ +test interpret +test run +set enable_multi_ret_implicit_sret +target riscv64 has_v has_zvbb + +; vandn: band(x, bnot(y)) = x AND (NOT y) +; 0xFF AND (NOT 0x0F) = 0xFF AND 0xF0 = 0xF0 +function %vandn_i8x16(i8x16, i8x16) -> i8x16 { +block0(v0: i8x16, v1: i8x16): + v2 = bnot v1 + v3 = band v0, v2 + return v3 +} +; run: %vandn_i8x16([0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF 0xFF], [0x0F 0x0F 0x0F 0x0F 0x0F 0x0F 0x0F 0x0F 0x0F 0x0F 0x0F 0x0F 0x0F 0x0F 0x0F 0x0F]) == [0xF0 0xF0 0xF0 0xF0 0xF0 0xF0 0xF0 0xF0 0xF0 0xF0 0xF0 0xF0 0xF0 0xF0 0xF0 0xF0] + +; vcpop: popcnt of each element +function %vcpop_i8x16(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = popcnt v0 + return v1 +} +; run: %vcpop_i8x16([0x00 0x01 0x03 0x07 0x0F 0x1F 0x3F 0x7F 0xFF 0x80 0xC0 0xE0 0xF0 0xAA 0x55 0x12]) == [0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x01 0x02 0x03 0x04 0x04 0x04 0x02] + +; vrol: rotate left each 32-bit element by scalar amount +function %vrol_i32x4(i32x4, i32) -> i32x4 { +block0(v0: i32x4, v1: i32): + v2 = rotl v0, v1 + return v2 +} +; rotl(0x80000001, 1) = 0x00000003 +; run: %vrol_i32x4([0x80000001 0x80000001 0x80000001 0x80000001], 1) == [0x00000003 0x00000003 0x00000003 0x00000003] + +; vror: rotate right each 32-bit element by scalar amount +function %vror_i32x4(i32x4, i32) -> i32x4 { +block0(v0: i32x4, v1: i32): + v2 = rotr v0, v1 + return v2 +} +; rotr(0x80000001, 1) = 0xC0000000 +; run: %vror_i32x4([0x80000001 0x80000001 0x80000001 0x80000001], 1) == [0xC0000000 0xC0000000 0xC0000000 0xC0000000]