aco: Implement signed idot instructions on GFX11.

Signed-off-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19114>
This commit is contained in:
Georg Lehmann 2022-10-17 11:12:59 +02:00 committed by Marge Bot
parent cea6723243
commit 361b47b1f0
3 changed files with 27 additions and 5 deletions

View file

@ -991,7 +991,8 @@ emit_vop3p_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, T
}
void
emit_idot_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst, bool clamp)
emit_idot_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Temp dst, bool clamp,
unsigned neg_lo = 0)
{
Temp src[3] = {Temp(0, v1), Temp(0, v1), Temp(0, v1)};
bool has_sgpr = false;
@ -1005,7 +1006,11 @@ emit_idot_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Te
Builder bld(ctx->program, ctx->block);
bld.is_precise = instr->exact;
bld.vop3p(op, Definition(dst), src[0], src[1], src[2], 0x0, 0x7).instr->vop3p().clamp = clamp;
VOP3P_instruction& vop3p =
bld.vop3p(op, Definition(dst), src[0], src[1], src[2], 0x0, 0x7).instr->vop3p();
vop3p.clamp = clamp;
u_foreach_bit (i, neg_lo)
vop3p.neg_lo[i] = true;
}
void
@ -2459,11 +2464,25 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
break;
}
case nir_op_sdot_4x8_iadd: {
emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_i32_i8, dst, false);
if (ctx->options->gfx_level >= GFX11)
emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_i32_iu8, dst, false, 0x3);
else
emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_i32_i8, dst, false);
break;
}
case nir_op_sdot_4x8_iadd_sat: {
emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_i32_i8, dst, true);
if (ctx->options->gfx_level >= GFX11)
emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_i32_iu8, dst, true, 0x3);
else
emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_i32_i8, dst, true);
break;
}
case nir_op_sudot_4x8_iadd: {
emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_i32_iu8, dst, false, 0x1);
break;
}
case nir_op_sudot_4x8_iadd_sat: {
emit_idot_instruction(ctx, instr, aco_opcode::v_dot4_i32_iu8, dst, true, 0x1);
break;
}
case nir_op_udot_4x8_uadd: {

View file

@ -523,8 +523,10 @@ init_context(isel_context* ctx, nir_shader* shader)
case nir_op_sad_u8x4:
case nir_op_udot_4x8_uadd:
case nir_op_sdot_4x8_iadd:
case nir_op_sudot_4x8_iadd:
case nir_op_udot_4x8_uadd_sat:
case nir_op_sdot_4x8_iadd_sat:
case nir_op_sudot_4x8_iadd_sat:
case nir_op_udot_2x16_uadd:
case nir_op_sdot_2x16_iadd:
case nir_op_udot_2x16_uadd_sat:

View file

@ -931,7 +931,8 @@ propagate_constants_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr, ssa_info& i
/* The accumulation operand of dot product instructions ignores opsel. */
bool cannot_use_opsel =
(instr->opcode == aco_opcode::v_dot4_i32_i8 || instr->opcode == aco_opcode::v_dot2_i32_i16 ||
instr->opcode == aco_opcode::v_dot4_u32_u8 || instr->opcode == aco_opcode::v_dot2_u32_u16) &&
instr->opcode == aco_opcode::v_dot4_i32_iu8 || instr->opcode == aco_opcode::v_dot4_u32_u8 ||
instr->opcode == aco_opcode::v_dot2_u32_u16) &&
i == 2;
if (cannot_use_opsel)
return;