mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 06:30:10 +01:00
aco: implement load buffer with ACCESS_USES_FORMAT_AMD
This is used by radeonsi for vs input load and cdna image load emulation. Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23094>
This commit is contained in:
parent
c5c98d2f20
commit
460b528c9e
1 changed files with 102 additions and 7 deletions
|
|
@ -4517,6 +4517,93 @@ mubuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigne
|
|||
|
||||
const EmitLoadParameters mubuf_load_params{mubuf_load_callback, true, true, 4096};
|
||||
|
||||
Temp
|
||||
mubuf_load_format_callback(Builder& bld, const LoadEmitInfo& info, Temp offset,
|
||||
unsigned bytes_needed, unsigned align_, unsigned const_offset,
|
||||
Temp dst_hint)
|
||||
{
|
||||
Operand vaddr = offset.type() == RegType::vgpr ? Operand(offset) : Operand(v1);
|
||||
Operand soffset = offset.type() == RegType::sgpr ? Operand(offset) : Operand::c32(0);
|
||||
|
||||
if (info.soffset.id()) {
|
||||
if (soffset.isTemp())
|
||||
vaddr = bld.copy(bld.def(v1), soffset);
|
||||
soffset = Operand(info.soffset);
|
||||
}
|
||||
|
||||
if (soffset.isUndefined())
|
||||
soffset = Operand::zero();
|
||||
|
||||
bool offen = !vaddr.isUndefined();
|
||||
bool idxen = info.idx.id();
|
||||
|
||||
if (offen && idxen)
|
||||
vaddr = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), info.idx, vaddr);
|
||||
else if (idxen)
|
||||
vaddr = Operand(info.idx);
|
||||
|
||||
aco_opcode op = aco_opcode::num_opcodes;
|
||||
if (info.component_size == 2) {
|
||||
switch (bytes_needed) {
|
||||
case 2:
|
||||
op = aco_opcode::buffer_load_format_d16_x;
|
||||
break;
|
||||
case 4:
|
||||
op = aco_opcode::buffer_load_format_d16_xy;
|
||||
break;
|
||||
case 6:
|
||||
op = aco_opcode::buffer_load_format_d16_xyz;
|
||||
break;
|
||||
case 8:
|
||||
op = aco_opcode::buffer_load_format_d16_xyzw;
|
||||
break;
|
||||
default:
|
||||
unreachable("invalid buffer load format size");
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
assert(info.component_size == 4);
|
||||
switch (bytes_needed) {
|
||||
case 4:
|
||||
op = aco_opcode::buffer_load_format_x;
|
||||
break;
|
||||
case 8:
|
||||
op = aco_opcode::buffer_load_format_xy;
|
||||
break;
|
||||
case 12:
|
||||
op = aco_opcode::buffer_load_format_xyz;
|
||||
break;
|
||||
case 16:
|
||||
op = aco_opcode::buffer_load_format_xyzw;
|
||||
break;
|
||||
default:
|
||||
unreachable("invalid buffer load format size");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
|
||||
mubuf->operands[0] = Operand(info.resource);
|
||||
mubuf->operands[1] = vaddr;
|
||||
mubuf->operands[2] = soffset;
|
||||
mubuf->offen = offen;
|
||||
mubuf->idxen = idxen;
|
||||
mubuf->glc = info.glc;
|
||||
mubuf->dlc =
|
||||
info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
|
||||
mubuf->slc = info.slc;
|
||||
mubuf->sync = info.sync;
|
||||
mubuf->offset = const_offset;
|
||||
RegClass rc = RegClass::get(RegType::vgpr, bytes_needed);
|
||||
Temp val = dst_hint.id() && rc == dst_hint.regClass() ? dst_hint : bld.tmp(rc);
|
||||
mubuf->definitions[0] = Definition(val);
|
||||
bld.insert(std::move(mubuf));
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
const EmitLoadParameters mubuf_load_format_params{mubuf_load_format_callback, false, true, 4096};
|
||||
|
||||
Temp
|
||||
scratch_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned bytes_needed,
|
||||
unsigned align_, unsigned const_offset, Temp dst_hint)
|
||||
|
|
@ -7031,15 +7118,23 @@ visit_load_buffer(isel_context* ctx, nir_intrinsic_instr* intrin)
|
|||
|
||||
emit_load(ctx, bld, info, mtbuf_load_params);
|
||||
} else {
|
||||
const unsigned swizzle_element_size =
|
||||
swizzled ? (ctx->program->gfx_level <= GFX8 ? 4 : 16) : 0;
|
||||
assert(intrin->intrinsic == nir_intrinsic_load_buffer_amd);
|
||||
|
||||
info.component_stride = swizzle_element_size;
|
||||
info.swizzle_component_size = swizzle_element_size ? 4 : 0;
|
||||
info.align_mul = MIN2(elem_size_bytes, 4);
|
||||
info.align_offset = 0;
|
||||
if (nir_intrinsic_access(intrin) & ACCESS_USES_FORMAT_AMD) {
|
||||
assert(!swizzled);
|
||||
|
||||
emit_load(ctx, bld, info, mubuf_load_params);
|
||||
emit_load(ctx, bld, info, mubuf_load_format_params);
|
||||
} else {
|
||||
const unsigned swizzle_element_size =
|
||||
swizzled ? (ctx->program->gfx_level <= GFX8 ? 4 : 16) : 0;
|
||||
|
||||
info.component_stride = swizzle_element_size;
|
||||
info.swizzle_component_size = swizzle_element_size ? 4 : 0;
|
||||
info.align_mul = MIN2(elem_size_bytes, 4);
|
||||
info.align_offset = 0;
|
||||
|
||||
emit_load(ctx, bld, info, mubuf_load_params);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue