mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 00:30:13 +01:00
aco: mostly implement FS input loads on GFX11
Quad-divergent CF and vertex selection doesn't work, but should at least prevent crashes. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17333>
This commit is contained in:
parent
826ed52174
commit
3730be9873
1 changed files with 47 additions and 5 deletions
|
|
@ -5262,10 +5262,38 @@ visit_store_output(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
emit_interp_instr_gfx11(isel_context* ctx, unsigned idx, unsigned component, Temp src, Temp dst,
|
||||||
|
Temp prim_mask)
|
||||||
|
{
|
||||||
|
Temp coord1 = emit_extract_vector(ctx, src, 0, v1);
|
||||||
|
Temp coord2 = emit_extract_vector(ctx, src, 1, v1);
|
||||||
|
|
||||||
|
Builder bld(ctx->program, ctx->block);
|
||||||
|
|
||||||
|
//TODO: this doesn't work in quad-divergent control flow
|
||||||
|
|
||||||
|
Temp p = bld.ldsdir(aco_opcode::lds_param_load, bld.def(v1), bld.m0(prim_mask), idx, component);
|
||||||
|
|
||||||
|
if (dst.regClass() == v2b) {
|
||||||
|
Temp p10 =
|
||||||
|
bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, bld.def(v1), p, coord1, p);
|
||||||
|
bld.vinterp_inreg(aco_opcode::v_interp_p2_f16_f32_inreg, Definition(dst), p, coord2, p10);
|
||||||
|
} else {
|
||||||
|
Temp p10 = bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, bld.def(v1), p, coord1, p);
|
||||||
|
bld.vinterp_inreg(aco_opcode::v_interp_p2_f32_inreg, Definition(dst), p, coord2, p10);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
emit_interp_instr(isel_context* ctx, unsigned idx, unsigned component, Temp src, Temp dst,
|
emit_interp_instr(isel_context* ctx, unsigned idx, unsigned component, Temp src, Temp dst,
|
||||||
Temp prim_mask)
|
Temp prim_mask)
|
||||||
{
|
{
|
||||||
|
if (ctx->options->gfx_level >= GFX11) {
|
||||||
|
emit_interp_instr_gfx11(ctx, idx, component, src, dst, prim_mask);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
Temp coord1 = emit_extract_vector(ctx, src, 0, v1);
|
Temp coord1 = emit_extract_vector(ctx, src, 0, v1);
|
||||||
Temp coord2 = emit_extract_vector(ctx, src, 1, v1);
|
Temp coord2 = emit_extract_vector(ctx, src, 1, v1);
|
||||||
|
|
||||||
|
|
@ -5304,6 +5332,22 @@ emit_interp_instr(isel_context* ctx, unsigned idx, unsigned component, Temp src,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
emit_interp_mov_instr(isel_context* ctx, unsigned idx, unsigned component, unsigned vertex_id,
|
||||||
|
Temp dst, Temp prim_mask)
|
||||||
|
{
|
||||||
|
Builder bld(ctx->program, ctx->block);
|
||||||
|
if (ctx->options->gfx_level >= GFX11) {
|
||||||
|
//TODO: this doesn't work in quad-divergent control flow and ignores vertex_id
|
||||||
|
Temp p = bld.ldsdir(aco_opcode::lds_param_load, bld.def(v1), bld.m0(prim_mask), idx, component);
|
||||||
|
uint16_t dpp_ctrl = dpp_quad_perm(0, 0, 0, 0);
|
||||||
|
bld.vop1_dpp(aco_opcode::v_mov_b32, Definition(dst), p, dpp_ctrl);
|
||||||
|
} else {
|
||||||
|
bld.vintrp(aco_opcode::v_interp_mov_f32, Definition(dst), Operand::c32(vertex_id),
|
||||||
|
bld.m0(prim_mask), idx, component);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
emit_load_frag_coord(isel_context* ctx, Temp dst, unsigned num_components)
|
emit_load_frag_coord(isel_context* ctx, Temp dst, unsigned num_components)
|
||||||
{
|
{
|
||||||
|
|
@ -5720,8 +5764,7 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||||
|
|
||||||
if (instr->dest.ssa.num_components == 1 &&
|
if (instr->dest.ssa.num_components == 1 &&
|
||||||
instr->dest.ssa.bit_size != 64) {
|
instr->dest.ssa.bit_size != 64) {
|
||||||
bld.vintrp(aco_opcode::v_interp_mov_f32, Definition(dst), Operand::c32(vertex_id),
|
emit_interp_mov_instr(ctx, idx, component, vertex_id, dst, prim_mask);
|
||||||
bld.m0(prim_mask), idx, component);
|
|
||||||
} else {
|
} else {
|
||||||
unsigned num_components = instr->dest.ssa.num_components;
|
unsigned num_components = instr->dest.ssa.num_components;
|
||||||
if (instr->dest.ssa.bit_size == 64)
|
if (instr->dest.ssa.bit_size == 64)
|
||||||
|
|
@ -5731,9 +5774,8 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||||
for (unsigned i = 0; i < num_components; i++) {
|
for (unsigned i = 0; i < num_components; i++) {
|
||||||
unsigned chan_component = (component + i) % 4;
|
unsigned chan_component = (component + i) % 4;
|
||||||
unsigned chan_idx = idx + (component + i) / 4;
|
unsigned chan_idx = idx + (component + i) / 4;
|
||||||
vec->operands[i] = bld.vintrp(
|
vec->operands[i] = Operand(bld.tmp(instr->dest.ssa.bit_size == 16 ? v2b : v1));
|
||||||
aco_opcode::v_interp_mov_f32, bld.def(instr->dest.ssa.bit_size == 16 ? v2b : v1),
|
emit_interp_mov_instr(ctx, chan_idx, chan_component, vertex_id, vec->operands[i].getTemp(), prim_mask);
|
||||||
Operand::c32(vertex_id), bld.m0(prim_mask), chan_idx, chan_component);
|
|
||||||
}
|
}
|
||||||
vec->definitions[0] = Definition(dst);
|
vec->definitions[0] = Definition(dst);
|
||||||
bld.insert(std::move(vec));
|
bld.insert(std::move(vec));
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue