mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 05:18:08 +02:00
aco/ra: convert bitwise instruction to gfx11+ 16bit on demand
The 32bit versions are smaller, allow more optimizations and VOPD, so only use the 16bit opcodes if nessecary. Foz-DB Navi31: Totals from 84 (0.10% of 80237) affected shaders: Instrs: 176673 -> 176347 (-0.18%); split: -0.20%, +0.01% CodeSize: 970148 -> 969716 (-0.04%); split: -0.08%, +0.03% VGPRs: 5876 -> 5864 (-0.20%) Latency: 2805974 -> 2805674 (-0.01%); split: -0.02%, +0.01% InvThroughput: 769007 -> 768738 (-0.03%); split: -0.04%, +0.01% VClause: 2593 -> 2597 (+0.15%) Copies: 23749 -> 23487 (-1.10%); split: -1.11%, +0.00% VALU: 107124 -> 106862 (-0.24%); split: -0.25%, +0.00% Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35919>
This commit is contained in:
parent
404e1f13e8
commit
a6a6c2f691
2 changed files with 60 additions and 0 deletions
|
|
@ -271,6 +271,12 @@ withoutVOP3(Format format)
|
|||
return (Format)((uint32_t)format & ~((uint32_t)Format::VOP3));
|
||||
}
|
||||
|
||||
constexpr Format
|
||||
withoutVOP2(Format format)
|
||||
{
|
||||
return (Format)((uint32_t)format & ~((uint32_t)Format::VOP2));
|
||||
}
|
||||
|
||||
enum class RegType {
|
||||
sgpr,
|
||||
vgpr,
|
||||
|
|
|
|||
|
|
@ -568,6 +568,36 @@ is_sgpr_writable_without_side_effects(amd_gfx_level gfx_level, PhysReg reg)
|
|||
(!has_flat_scr_lo_gfx7_or_xnack_mask || (reg != 104 || reg != 105));
|
||||
}
|
||||
|
||||
static bool
|
||||
convert_bitwise_to_16bit(Instruction* instr)
|
||||
{
|
||||
if (instr->opcode == aco_opcode::v_cndmask_b32) {
|
||||
instr->opcode = aco_opcode::v_cndmask_b16;
|
||||
instr->format = withoutVOP2(asVOP3(instr->format));
|
||||
instr->valu().abs = 0;
|
||||
instr->valu().neg = 0;
|
||||
} else if (instr->opcode == aco_opcode::v_mov_b32) {
|
||||
instr->opcode = aco_opcode::v_mov_b16;
|
||||
instr->valu().abs = 0;
|
||||
instr->valu().neg = 0;
|
||||
} else if (instr->opcode == aco_opcode::v_not_b32) {
|
||||
instr->opcode = aco_opcode::v_not_b16;
|
||||
} else if (instr->opcode == aco_opcode::v_and_b32) {
|
||||
instr->opcode = aco_opcode::v_and_b16;
|
||||
instr->format = withoutVOP2(asVOP3(instr->format));
|
||||
} else if (instr->opcode == aco_opcode::v_or_b32) {
|
||||
instr->opcode = aco_opcode::v_or_b16;
|
||||
instr->format = withoutVOP2(asVOP3(instr->format));
|
||||
} else if (instr->opcode == aco_opcode::v_xor_b32) {
|
||||
instr->opcode = aco_opcode::v_xor_b16;
|
||||
instr->format = withoutVOP2(asVOP3(instr->format));
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned
|
||||
get_subdword_operand_stride(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr,
|
||||
unsigned idx, RegClass rc)
|
||||
|
|
@ -593,6 +623,13 @@ get_subdword_operand_stride(amd_gfx_level gfx_level, const aco_ptr<Instruction>&
|
|||
}
|
||||
|
||||
switch (instr->opcode) {
|
||||
case aco_opcode::v_mov_b32:
|
||||
case aco_opcode::v_not_b32:
|
||||
case aco_opcode::v_and_b32:
|
||||
case aco_opcode::v_or_b32:
|
||||
case aco_opcode::v_xor_b32:
|
||||
case aco_opcode::v_cndmask_b32:
|
||||
return gfx_level >= GFX11 && instr->definitions[0].bytes() <= 2 ? 2 : 4;
|
||||
case aco_opcode::v_cvt_f32_ubyte0: return 1;
|
||||
case aco_opcode::ds_write_b8:
|
||||
case aco_opcode::ds_write_b16: return gfx_level >= GFX9 ? 2 : 4;
|
||||
|
|
@ -643,6 +680,8 @@ add_subdword_operand(ra_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, uns
|
|||
return;
|
||||
}
|
||||
|
||||
convert_bitwise_to_16bit(instr.get());
|
||||
|
||||
assert(can_use_opsel(gfx_level, instr->opcode, idx));
|
||||
instr->valu().opsel[idx] = true;
|
||||
return;
|
||||
|
|
@ -706,6 +745,16 @@ DefInfo::get_subdword_definition_info(Program* program, const aco_ptr<Instructio
|
|||
can_use_opsel(gfx_level, instr->opcode, -1)) {
|
||||
data_stride = 2;
|
||||
stride = rc == v2b ? 2 : stride;
|
||||
} else if ((instr->opcode == aco_opcode::v_cndmask_b32 ||
|
||||
instr->opcode == aco_opcode::v_mov_b32 ||
|
||||
instr->opcode == aco_opcode::v_not_b32 ||
|
||||
instr->opcode == aco_opcode::v_and_b32 || instr->opcode == aco_opcode::v_or_b32 ||
|
||||
instr->opcode == aco_opcode::v_xor_b32) &&
|
||||
program->gfx_level >= GFX11) {
|
||||
/* Convert to 16bit opcode on demand. */
|
||||
rc = v2b;
|
||||
data_stride = 2;
|
||||
stride = 2;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
|
@ -785,6 +834,11 @@ add_subdword_definition(Program* program, aco_ptr<Instruction>& instr, PhysReg r
|
|||
return;
|
||||
}
|
||||
|
||||
if (convert_bitwise_to_16bit(instr.get())) {
|
||||
if (reg.byte() == 0)
|
||||
return;
|
||||
}
|
||||
|
||||
/* use opsel */
|
||||
assert(reg.byte() == 2);
|
||||
assert(can_use_opsel(gfx_level, instr->opcode, -1));
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue