mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 13:48:06 +02:00
aco: Implement signed idot instructions on GFX11.
Signed-off-by: Georg Lehmann <dadschoorse@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19114>
This commit is contained in:
parent
cea6723243
commit
361b47b1f0
3 changed files with 27 additions and 5 deletions
|
|
@ -991,7 +991,8 @@ emit_vop3p_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, T
|
|||
}
|
||||
|
||||
void
|
||||
emit_idot_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst, bool clamp)
|
||||
emit_idot_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst, bool clamp,
|
||||
unsigned neg_lo = 0)
|
||||
{
|
||||
Temp src[3] = {Temp(0, v1), Temp(0, v1), Temp(0, v1)};
|
||||
bool has_sgpr = false;
|
||||
|
|
@ -1005,7 +1006,11 @@ emit_idot_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Te
|
|||
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
bld.is_precise = instr->exact;
|
||||
bld.vop3p(op, Definition(dst), src[0], src[1], src[2], 0x0, 0x7).instr->vop3p().clamp = clamp;
|
||||
VOP3P_instruction& vop3p =
|
||||
bld.vop3p(op, Definition(dst), src[0], src[1], src[2], 0x0, 0x7).instr->vop3p();
|
||||
vop3p.clamp = clamp;
|
||||
u_foreach_bit (i, neg_lo)
|
||||
vop3p.neg_lo[i] = true;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -2459,11 +2464,25 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||
break;
|
||||
}
|
||||
case nir_op_sdot_4x8_iadd: {
|
||||
emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_i32_i8, dst, false);
|
||||
if (ctx->options->gfx_level >= GFX11)
|
||||
emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_i32_iu8, dst, false, 0x3);
|
||||
else
|
||||
emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_i32_i8, dst, false);
|
||||
break;
|
||||
}
|
||||
case nir_op_sdot_4x8_iadd_sat: {
|
||||
emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_i32_i8, dst, true);
|
||||
if (ctx->options->gfx_level >= GFX11)
|
||||
emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_i32_iu8, dst, true, 0x3);
|
||||
else
|
||||
emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_i32_i8, dst, true);
|
||||
break;
|
||||
}
|
||||
case nir_op_sudot_4x8_iadd: {
|
||||
emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_i32_iu8, dst, false, 0x1);
|
||||
break;
|
||||
}
|
||||
case nir_op_sudot_4x8_iadd_sat: {
|
||||
emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_i32_iu8, dst, true, 0x1);
|
||||
break;
|
||||
}
|
||||
case nir_op_udot_4x8_uadd: {
|
||||
|
|
|
|||
|
|
@ -523,8 +523,10 @@ init_context(isel_context* ctx, nir_shader* shader)
|
|||
case nir_op_sad_u8x4:
|
||||
case nir_op_udot_4x8_uadd:
|
||||
case nir_op_sdot_4x8_iadd:
|
||||
case nir_op_sudot_4x8_iadd:
|
||||
case nir_op_udot_4x8_uadd_sat:
|
||||
case nir_op_sdot_4x8_iadd_sat:
|
||||
case nir_op_sudot_4x8_iadd_sat:
|
||||
case nir_op_udot_2x16_uadd:
|
||||
case nir_op_sdot_2x16_iadd:
|
||||
case nir_op_udot_2x16_uadd_sat:
|
||||
|
|
|
|||
|
|
@ -931,7 +931,8 @@ propagate_constants_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr, ssa_info& i
|
|||
/* The accumulation operand of dot product instructions ignores opsel. */
|
||||
bool cannot_use_opsel =
|
||||
(instr->opcode == aco_opcode::v_dot4_i32_i8 || instr->opcode == aco_opcode::v_dot2_i32_i16 ||
|
||||
instr->opcode == aco_opcode::v_dot4_u32_u8 || instr->opcode == aco_opcode::v_dot2_u32_u16) &&
|
||||
instr->opcode == aco_opcode::v_dot4_i32_iu8 || instr->opcode == aco_opcode::v_dot4_u32_u8 ||
|
||||
instr->opcode == aco_opcode::v_dot2_u32_u16) &&
|
||||
i == 2;
|
||||
if (cannot_use_opsel)
|
||||
return;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue