nir_to_tgsi: Extract const components of atomic counter offsets into Index.

virglrenderer maps atomic accesses to atomic counter declarations using
the .Index field.  We were previously emitting a .Index of 0 for array
accesses, so virglrenderer would emit
atomicIncrement(first_counter[counter_offset+array_index]).  This would
mostly work because hardware doesn't care about the bounds of counter
declarations, but if the first counter was a non-array, then the [] GLSL
emit gets dropped (can't array access a scalar!) and you'd access the
non-array first_counter instead.

Acked-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15824>
This commit is contained in:
Emma Anholt 2022-04-05 09:45:36 -07:00 committed by Marge Bot
parent 8dcf7646ce
commit 664f69a4d5

View file

@ -523,6 +523,59 @@ ntt_allocate_regs(struct ntt_compile *c, nir_function_impl *impl)
}
}
/**
* Try to find an iadd of a constant value with a non-constant value in the
* nir_src's first component, returning the constant offset and replacing *src
* with the non-constant component.
*/
static const uint32_t
ntt_extract_const_src_offset(nir_src *src)
{
if (!src->is_ssa)
return 0;
nir_ssa_scalar s = nir_get_ssa_scalar(src->ssa, 0);
while (nir_ssa_scalar_is_alu(s)) {
nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
if (!alu->src[i].src.is_ssa)
return 0;
}
if (alu->op == nir_op_iadd) {
for (int i = 0; i < 2; i++) {
nir_const_value *v = nir_src_as_const_value(alu->src[i].src);
if (v && !alu->src[i].negate && !alu->src[i].abs) {
*src = alu->src[1 - i].src;
return v[alu->src[i].swizzle[s.comp]].u32;
}
}
return 0;
}
/* We'd like to reuse nir_ssa_scalar_chase_movs(), but it assumes SSA and that
* seems reasonable for something used in inner loops of the compiler.
*/
if (!nir_alu_instr_is_copy(alu))
return 0;
if (alu->op == nir_op_mov) {
s.def = alu->src[0].src.ssa;
s.comp = alu->src[0].swizzle[s.comp];
} else if (nir_op_is_vec(alu->op)) {
s.def = alu->src[s.comp].src.ssa;
s.comp = alu->src[s.comp].swizzle[0];
} else {
return 0;
}
}
return 0;
}
static const struct glsl_type *
ntt_shader_input_type(struct ntt_compile *c,
struct nir_variable *var)
@ -1839,7 +1892,7 @@ ntt_emit_mem(struct ntt_compile *c, nir_intrinsic_instr *instr,
unsigned opcode;
struct ureg_src src[4];
int num_src = 0;
int nir_src;
int next_src;
struct ureg_dst addr_temp = ureg_dst_undef();
struct ureg_src memory;
@ -1847,24 +1900,26 @@ ntt_emit_mem(struct ntt_compile *c, nir_intrinsic_instr *instr,
case nir_var_mem_ssbo:
memory = ntt_ureg_src_indirect(c, ureg_src_register(TGSI_FILE_BUFFER, 0),
instr->src[is_store ? 1 : 0]);
nir_src = 1;
next_src = 1;
break;
case nir_var_mem_shared:
memory = ureg_src_register(TGSI_FILE_MEMORY, 0);
nir_src = 0;
next_src = 0;
break;
case nir_var_uniform: { /* HW atomic buffers */
memory = ureg_src_register(TGSI_FILE_HW_ATOMIC, 0);
nir_src src = instr->src[0];
uint32_t offset = ntt_extract_const_src_offset(&src) / 4;
memory = ureg_src_register(TGSI_FILE_HW_ATOMIC, offset);
/* ntt_ureg_src_indirect, except dividing by 4 */
if (nir_src_is_const(instr->src[0])) {
memory.Index += nir_src_as_uint(instr->src[0]) / 4;
if (nir_src_is_const(src)) {
memory.Index += nir_src_as_uint(src) / 4;
} else {
addr_temp = ntt_temp(c);
ntt_USHR(c, addr_temp, ntt_get_src(c, instr->src[0]), ureg_imm1i(c->ureg, 2));
ntt_USHR(c, addr_temp, ntt_get_src(c, src), ureg_imm1i(c->ureg, 2));
memory = ureg_src_indirect(memory, ntt_reladdr(c, ureg_src(addr_temp), 2));
}
memory = ureg_src_dimension(memory, nir_intrinsic_base(instr));
nir_src = 0;
next_src = 0;
break;
}
@ -1873,12 +1928,12 @@ ntt_emit_mem(struct ntt_compile *c, nir_intrinsic_instr *instr,
}
if (is_store) {
src[num_src++] = ntt_get_src(c, instr->src[nir_src + 1]); /* offset */
src[num_src++] = ntt_get_src(c, instr->src[next_src + 1]); /* offset */
src[num_src++] = ntt_get_src(c, instr->src[0]); /* value */
} else {
src[num_src++] = memory;
if (instr->intrinsic != nir_intrinsic_get_ssbo_size) {
src[num_src++] = ntt_get_src(c, instr->src[nir_src++]); /* offset */
src[num_src++] = ntt_get_src(c, instr->src[next_src++]); /* offset */
switch (instr->intrinsic) {
case nir_intrinsic_atomic_counter_inc:
src[num_src++] = ureg_imm1i(c->ureg, 1);
@ -1888,7 +1943,7 @@ ntt_emit_mem(struct ntt_compile *c, nir_intrinsic_instr *instr,
break;
default:
if (!is_load)
src[num_src++] = ntt_get_src(c, instr->src[nir_src++]); /* value */
src[num_src++] = ntt_get_src(c, instr->src[next_src++]); /* value */
break;
}
}
@ -1949,7 +2004,7 @@ ntt_emit_mem(struct ntt_compile *c, nir_intrinsic_instr *instr,
case nir_intrinsic_ssbo_atomic_comp_swap:
case nir_intrinsic_shared_atomic_comp_swap:
opcode = TGSI_OPCODE_ATOMCAS;
src[num_src++] = ntt_get_src(c, instr->src[nir_src++]);
src[num_src++] = ntt_get_src(c, instr->src[next_src++]);
break;
case nir_intrinsic_atomic_counter_read:
case nir_intrinsic_load_ssbo: