mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 11:18:08 +02:00
aco: don't reuse misaligned attribute destination VGPRs in VS prologs
Since we split misaligned attributes, we could overwrite one of these VGPRs in the middle of loading the attribute. For example: v_add_u32_e32 v4, vcc, s7, v1 s_waitcnt lgkmcnt(0) buffer_load_dword v4, v4, s[32:35], 0 idxen buffer_load_dword v5, v4, s[32:35], 0 idxen offset:4 can overwrite the vertex index in the load of the first component. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Cc: mesa-stable Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27920>
This commit is contained in:
parent
df7024bcdd
commit
ec892c4d2b
1 changed files with 39 additions and 19 deletions
|
|
@ -12767,6 +12767,20 @@ select_rt_prolog(Program* program, ac_shader_config* config,
|
||||||
program->config->num_sgprs = get_sgpr_alloc(program, num_sgprs);
|
program->config->num_sgprs = get_sgpr_alloc(program, num_sgprs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PhysReg
|
||||||
|
get_next_vgpr(unsigned size, unsigned* num, int *offset = NULL)
|
||||||
|
{
|
||||||
|
unsigned reg = *num + (offset ? *offset : 0);
|
||||||
|
if (reg + size >= *num) {
|
||||||
|
*num = reg + size;
|
||||||
|
if (offset)
|
||||||
|
*offset = 0;
|
||||||
|
} else if (offset) {
|
||||||
|
*offset += size;
|
||||||
|
}
|
||||||
|
return PhysReg(256 + reg);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_shader_config* config,
|
select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_shader_config* config,
|
||||||
const struct aco_compiler_options* options, const struct aco_shader_info* info,
|
const struct aco_compiler_options* options, const struct aco_shader_info* info,
|
||||||
|
|
@ -12808,13 +12822,30 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
|
||||||
Operand start_instance = get_arg_fixed(args, args->start_instance);
|
Operand start_instance = get_arg_fixed(args, args->start_instance);
|
||||||
Operand instance_id = get_arg_fixed(args, args->instance_id);
|
Operand instance_id = get_arg_fixed(args, args->instance_id);
|
||||||
|
|
||||||
PhysReg attributes_start(256 + args->num_vgprs_used);
|
bool needs_instance_index =
|
||||||
/* choose vgprs that won't be used for anything else until the last attribute load */
|
pinfo->instance_rate_inputs &
|
||||||
PhysReg vertex_index(attributes_start.reg() + pinfo->num_attributes * 4 - 1);
|
~(pinfo->zero_divisors | pinfo->nontrivial_divisors); /* divisor is 1 */
|
||||||
PhysReg instance_index(attributes_start.reg() + pinfo->num_attributes * 4 - 2);
|
bool needs_start_instance = pinfo->instance_rate_inputs & pinfo->zero_divisors;
|
||||||
PhysReg start_instance_vgpr(attributes_start.reg() + pinfo->num_attributes * 4 - 3);
|
bool needs_vertex_index = ~pinfo->instance_rate_inputs & attrib_mask;
|
||||||
PhysReg nontrivial_tmp_vgpr0(attributes_start.reg() + pinfo->num_attributes * 4 - 4);
|
bool needs_tmp_vgpr0 = has_nontrivial_divisors;
|
||||||
PhysReg nontrivial_tmp_vgpr1(attributes_start.reg() + pinfo->num_attributes * 4);
|
bool needs_tmp_vgpr1 = has_nontrivial_divisors &&
|
||||||
|
(program->gfx_level <= GFX8 || program->gfx_level >= GFX11);
|
||||||
|
|
||||||
|
int vgpr_offset = pinfo->misaligned_mask & (1u << (pinfo->num_attributes - 1)) ? 0 : -4;
|
||||||
|
|
||||||
|
unsigned num_vgprs = args->num_vgprs_used;
|
||||||
|
PhysReg attributes_start = get_next_vgpr(pinfo->num_attributes * 4, &num_vgprs);
|
||||||
|
PhysReg vertex_index, instance_index, start_instance_vgpr, nontrivial_tmp_vgpr0, nontrivial_tmp_vgpr1;
|
||||||
|
if (needs_vertex_index)
|
||||||
|
vertex_index = get_next_vgpr(1, &num_vgprs, &vgpr_offset);
|
||||||
|
if (needs_instance_index)
|
||||||
|
instance_index = get_next_vgpr(1, &num_vgprs, &vgpr_offset);
|
||||||
|
if (needs_start_instance)
|
||||||
|
start_instance_vgpr = get_next_vgpr(1, &num_vgprs, &vgpr_offset);
|
||||||
|
if (needs_tmp_vgpr0)
|
||||||
|
nontrivial_tmp_vgpr0 = get_next_vgpr(1, &num_vgprs, &vgpr_offset);
|
||||||
|
if (needs_tmp_vgpr1)
|
||||||
|
nontrivial_tmp_vgpr1 = get_next_vgpr(1, &num_vgprs, &vgpr_offset);
|
||||||
|
|
||||||
bld.sop1(aco_opcode::s_mov_b32, Definition(vertex_buffers, s1),
|
bld.sop1(aco_opcode::s_mov_b32, Definition(vertex_buffers, s1),
|
||||||
get_arg_fixed(args, args->vertex_buffers));
|
get_arg_fixed(args, args->vertex_buffers));
|
||||||
|
|
@ -12826,16 +12857,10 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
|
||||||
Operand::c32((unsigned)options->address32_hi));
|
Operand::c32((unsigned)options->address32_hi));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* calculate vgpr requirements */
|
|
||||||
unsigned num_vgprs = attributes_start.reg() - 256;
|
|
||||||
num_vgprs += pinfo->num_attributes * 4;
|
|
||||||
if (has_nontrivial_divisors && program->gfx_level <= GFX8)
|
|
||||||
num_vgprs++; /* make space for nontrivial_tmp_vgpr1 */
|
|
||||||
unsigned num_sgprs = 0;
|
|
||||||
|
|
||||||
const struct ac_vtx_format_info* vtx_info_table =
|
const struct ac_vtx_format_info* vtx_info_table =
|
||||||
ac_get_vtx_format_info_table(GFX8, CHIP_POLARIS10);
|
ac_get_vtx_format_info_table(GFX8, CHIP_POLARIS10);
|
||||||
|
|
||||||
|
unsigned num_sgprs = 0;
|
||||||
for (unsigned loc = 0; loc < pinfo->num_attributes;) {
|
for (unsigned loc = 0; loc < pinfo->num_attributes;) {
|
||||||
unsigned num_descs =
|
unsigned num_descs =
|
||||||
load_vb_descs(bld, desc, Operand(vertex_buffers, s2), loc, pinfo->num_attributes - loc);
|
load_vb_descs(bld, desc, Operand(vertex_buffers, s2), loc, pinfo->num_attributes - loc);
|
||||||
|
|
@ -12875,11 +12900,6 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_info* pinfo, ac_sh
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool needs_instance_index =
|
|
||||||
pinfo->instance_rate_inputs &
|
|
||||||
~(pinfo->zero_divisors | pinfo->nontrivial_divisors); /* divisor is 1 */
|
|
||||||
bool needs_start_instance = pinfo->instance_rate_inputs & pinfo->zero_divisors;
|
|
||||||
bool needs_vertex_index = ~pinfo->instance_rate_inputs & attrib_mask;
|
|
||||||
if (needs_vertex_index)
|
if (needs_vertex_index)
|
||||||
bld.vadd32(Definition(vertex_index, v1), get_arg_fixed(args, args->base_vertex),
|
bld.vadd32(Definition(vertex_index, v1), get_arg_fixed(args, args->base_vertex),
|
||||||
get_arg_fixed(args, args->vertex_id), false, Operand(s2), true);
|
get_arg_fixed(args, args->vertex_id), false, Operand(s2), true);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue