mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
aco/optimizer: use new helpers to apply packed fsat
No Foz-DB changes. Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38658>
This commit is contained in:
parent
0a82c8cb13
commit
d60ce9ceef
2 changed files with 58 additions and 53 deletions
|
|
@ -3768,52 +3768,6 @@ apply_load_extract(opt_ctx& ctx, aco_ptr<Instruction>& extract, Instruction* loa
|
|||
return load;
|
||||
}
|
||||
|
||||
void
|
||||
propagate_swizzles(VALU_instruction* instr, bool opsel_lo, bool opsel_hi)
|
||||
{
|
||||
/* propagate swizzles which apply to a result down to the instruction's operands:
|
||||
* result = a.xy + b.xx -> result.yx = a.yx + b.xx */
|
||||
uint8_t tmp_lo = instr->opsel_lo;
|
||||
uint8_t tmp_hi = instr->opsel_hi;
|
||||
uint8_t neg_lo = instr->neg_lo;
|
||||
uint8_t neg_hi = instr->neg_hi;
|
||||
if (opsel_lo == 1) {
|
||||
instr->opsel_lo = tmp_hi;
|
||||
instr->neg_lo = neg_hi;
|
||||
}
|
||||
if (opsel_hi == 0) {
|
||||
instr->opsel_hi = tmp_lo;
|
||||
instr->neg_hi = neg_lo;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
{
|
||||
VALU_instruction* vop3p = &instr->valu();
|
||||
|
||||
/* apply clamp */
|
||||
if (instr->opcode == aco_opcode::v_pk_mul_f16 && instr->operands[1].constantEquals(0x3C00) &&
|
||||
vop3p->clamp && instr->operands[0].isTemp() && ctx.uses[instr->operands[0].tempId()] == 1 &&
|
||||
!vop3p->opsel_lo[1] && !vop3p->opsel_hi[1]) {
|
||||
|
||||
Instruction* op_instr = ctx.info[instr->operands[0].tempId()].parent_instr;
|
||||
const aco_alu_opcode_info& opcode_info = instr_info.alu_opcode_infos[(int)op_instr->opcode];
|
||||
aco_type op_type = opcode_info.def_types[0];
|
||||
if (op_instr->isVOP3P() && op_type.num_components == 2 &&
|
||||
op_type.base_type == aco_base_type_float && op_type.bit_size == 16 &&
|
||||
opcode_info.output_modifiers) {
|
||||
op_instr->valu().clamp = true;
|
||||
propagate_swizzles(&op_instr->valu(), vop3p->opsel_lo[0], vop3p->opsel_hi[0]);
|
||||
instr->definitions[0].swapTemp(op_instr->definitions[0]);
|
||||
ctx.info[op_instr->definitions[0].tempId()].parent_instr = op_instr;
|
||||
ctx.info[instr->definitions[0].tempId()].parent_instr = instr.get();
|
||||
ctx.uses[instr->definitions[0].tempId()]--;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
can_use_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
{
|
||||
|
|
@ -4001,7 +3955,8 @@ apply_output_impl(opt_ctx& ctx, aco_ptr<Instruction>& instr, Instruction* parent
|
|||
else if (instr->opcode == aco_opcode::s_abs_i32)
|
||||
return apply_s_abs(ctx, instr, parent);
|
||||
else if (instr->opcode == aco_opcode::v_mul_f64 || instr->opcode == aco_opcode::v_mul_f64_e64 ||
|
||||
instr->opcode == aco_opcode::v_mul_f32 || instr->opcode == aco_opcode::v_mul_f16)
|
||||
instr->opcode == aco_opcode::v_mul_f32 || instr->opcode == aco_opcode::v_mul_f16 ||
|
||||
instr->opcode == aco_opcode::v_pk_mul_f16)
|
||||
return apply_output_mul(ctx, instr, parent);
|
||||
else
|
||||
UNREACHABLE("unhandled opcode");
|
||||
|
|
@ -4021,7 +3976,8 @@ apply_output(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
case aco_opcode::v_mul_f64:
|
||||
case aco_opcode::v_mul_f64_e64:
|
||||
case aco_opcode::v_mul_f32:
|
||||
case aco_opcode::v_mul_f16: break;
|
||||
case aco_opcode::v_mul_f16:
|
||||
case aco_opcode::v_pk_mul_f16: break;
|
||||
default: return false;
|
||||
}
|
||||
|
||||
|
|
@ -4291,11 +4247,6 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
apply_insert(ctx, instr);
|
||||
}
|
||||
|
||||
if (instr->isVOP3P() && instr->opcode != aco_opcode::v_fma_mix_f32 &&
|
||||
instr->opcode != aco_opcode::v_fma_mixlo_f16) {
|
||||
combine_vop3p(ctx, instr);
|
||||
}
|
||||
|
||||
if (instr->isDPP())
|
||||
return;
|
||||
|
||||
|
|
|
|||
|
|
@ -2317,3 +2317,57 @@ BEGIN_TEST(optimizer.pk_fma)
|
|||
finish_opt_test();
|
||||
}
|
||||
END_TEST
|
||||
|
||||
static Builder::Result
|
||||
cvt_pk_rtz(Definition def, Builder::Op op1, Builder::Op op2)
|
||||
{
|
||||
if (bld.program->gfx_level >= GFX8 && bld.program->gfx_level < GFX10)
|
||||
return bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32_e64, def, op1, op2);
|
||||
else
|
||||
return bld.vop2(aco_opcode::v_cvt_pkrtz_f16_f32, def, op1, op2);
|
||||
}
|
||||
|
||||
BEGIN_TEST(optimizer.pk_mul_pk_cvt)
|
||||
for (unsigned i = GFX9; i <= GFX10; i++) {
|
||||
//>> v1: %a:v[0], v1: %b:v[1] = p_startpgm
|
||||
if (!setup_cs("v1 v1", (amd_gfx_level)i))
|
||||
continue;
|
||||
|
||||
Temp a = inputs[0];
|
||||
Temp b = inputs[1];
|
||||
|
||||
//~gfx9! v1: %res0 = v_cvt_pkrtz_f16_f32_e64 %a, %b
|
||||
//~gfx10! v1: %res0 = v_cvt_pkrtz_f16_f32 %a, %b
|
||||
//! p_unit_test 0, %res0
|
||||
Builder::Result cvt = cvt_pk_rtz(bld.def(v1), a, b);
|
||||
Builder::Result mul =
|
||||
bld.vop3p(aco_opcode::v_pk_mul_f16, bld.def(v1), cvt, Operand::c16(0x3c00), 0x0, 0x1);
|
||||
writeout(0, mul);
|
||||
|
||||
//~gfx9! v1: %res1 = v_cvt_pkrtz_f16_f32_e64 -%b, %b
|
||||
//~gfx10! v1: %res1 = v_cvt_pkrtz_f16_f32 -%b, %b
|
||||
//! p_unit_test 1, %res1
|
||||
cvt = cvt_pk_rtz(bld.def(v1), a, b);
|
||||
mul = bld.vop3p(aco_opcode::v_pk_mul_f16, bld.def(v1), cvt, Operand::c16(0x3c00), 0x1, 0x1);
|
||||
mul->valu().neg_lo[1] = true;
|
||||
writeout(1, mul);
|
||||
|
||||
//~gfx9! v1: %tmp = v_cvt_pkrtz_f16_f32_e64 %a, %b
|
||||
//~gfx10! v1: %tmp = v_cvt_pkrtz_f16_f32 %a, %b
|
||||
//! v1: %res2 = v_pk_mul_f16 %tmp, 1.0.xx clamp
|
||||
//! p_unit_test 2, %res2
|
||||
cvt = cvt_pk_rtz(bld.def(v1), a, b);
|
||||
mul = bld.vop3p(aco_opcode::v_pk_mul_f16, bld.def(v1), cvt, Operand::c16(0x3c00), 0x0, 0x1);
|
||||
mul->valu().clamp = true;
|
||||
writeout(2, mul);
|
||||
|
||||
//~gfx9! v1: %res3 = v_cvt_pkrtz_f16_f32_e64 %b, %a
|
||||
//~gfx10! v1: %res3 = v_cvt_pkrtz_f16_f32 %b, %a
|
||||
//! p_unit_test 3, %res3
|
||||
cvt = cvt_pk_rtz(bld.def(v1), a, b);
|
||||
mul = bld.vop3p(aco_opcode::v_pk_mul_f16, bld.def(v1), cvt, Operand::c16(0x3c00), 0x1, 0x0);
|
||||
writeout(3, mul);
|
||||
|
||||
finish_opt_test();
|
||||
}
|
||||
END_TEST
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue