mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-28 14:40:10 +01:00
broadcom/compiler: Convert to new-style NIR registers
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24153>
This commit is contained in:
parent
355afc92d1
commit
d89ca14e71
6 changed files with 83 additions and 30 deletions
|
|
@ -120,7 +120,7 @@ traces:
|
|||
|
||||
minetest/minetest-v2.trace:
|
||||
broadcom-rpi4:
|
||||
checksum: 0e00019366845e1fae9727df8b24c149
|
||||
checksum: 09e693eae151f992c621ad8d0da3151d
|
||||
|
||||
neverball/neverball-v2.trace:
|
||||
broadcom-rpi4:
|
||||
|
|
|
|||
|
|
@ -307,8 +307,13 @@ ntq_add_pending_tmu_flush(struct v3d_compile *c,
|
|||
|
||||
if (num_components > 0) {
|
||||
c->tmu.output_fifo_size += num_components;
|
||||
if (!dest->is_ssa)
|
||||
_mesa_set_add(c->tmu.outstanding_regs, dest->reg.reg);
|
||||
|
||||
assert(dest->is_ssa);
|
||||
nir_intrinsic_instr *store = nir_store_reg_for_def(&dest->ssa);
|
||||
if (store != NULL) {
|
||||
nir_ssa_def *reg = store->src[1].ssa;
|
||||
_mesa_set_add(c->tmu.outstanding_regs, reg);
|
||||
}
|
||||
}
|
||||
|
||||
c->tmu.flush[c->tmu.flush_count].dest = dest;
|
||||
|
|
@ -772,7 +777,9 @@ ntq_store_dest(struct v3d_compile *c, nir_dest *dest, int chan,
|
|||
assert(result.file == QFILE_TEMP && last_inst &&
|
||||
(last_inst == c->defs[result.index] || is_reused_uniform));
|
||||
|
||||
if (dest->is_ssa) {
|
||||
assert(dest->is_ssa);
|
||||
nir_intrinsic_instr *store = nir_store_reg_for_def(&dest->ssa);
|
||||
if (store == NULL) {
|
||||
assert(chan < dest->ssa.num_components);
|
||||
|
||||
struct qreg *qregs;
|
||||
|
|
@ -786,8 +793,10 @@ ntq_store_dest(struct v3d_compile *c, nir_dest *dest, int chan,
|
|||
|
||||
qregs[chan] = result;
|
||||
} else {
|
||||
nir_register *reg = dest->reg.reg;
|
||||
assert(reg->num_array_elems == 0);
|
||||
nir_ssa_def *reg = store->src[1].ssa;
|
||||
ASSERTED nir_intrinsic_instr *decl = nir_reg_get_decl(reg);
|
||||
assert(nir_intrinsic_base(store) == 0);
|
||||
assert(nir_intrinsic_num_array_elems(decl) == 0);
|
||||
struct hash_entry *entry =
|
||||
_mesa_hash_table_search(c->def_ht, reg);
|
||||
struct qreg *qregs = entry->data;
|
||||
|
|
@ -842,7 +851,10 @@ struct qreg
|
|||
ntq_get_src(struct v3d_compile *c, nir_src src, int i)
|
||||
{
|
||||
struct hash_entry *entry;
|
||||
if (src.is_ssa) {
|
||||
|
||||
assert(src.is_ssa);
|
||||
nir_intrinsic_instr *load = nir_load_reg_for_def(src.ssa);
|
||||
if (load == NULL) {
|
||||
assert(i < src.ssa->num_components);
|
||||
|
||||
entry = _mesa_hash_table_search(c->def_ht, src.ssa);
|
||||
|
|
@ -851,9 +863,11 @@ ntq_get_src(struct v3d_compile *c, nir_src src, int i)
|
|||
entry = _mesa_hash_table_search(c->def_ht, src.ssa);
|
||||
}
|
||||
} else {
|
||||
nir_register *reg = src.reg.reg;
|
||||
assert(reg->num_array_elems == 0);
|
||||
assert(i < reg->num_components);
|
||||
nir_ssa_def *reg = load->src[0].ssa;
|
||||
ASSERTED nir_intrinsic_instr *decl = nir_reg_get_decl(reg);
|
||||
assert(nir_intrinsic_base(load) == 0);
|
||||
assert(nir_intrinsic_num_array_elems(decl) == 0);
|
||||
assert(i < nir_intrinsic_num_components(decl));
|
||||
|
||||
if (_mesa_set_search(c->tmu.outstanding_regs, reg))
|
||||
ntq_flush_tmu(c);
|
||||
|
|
@ -1218,7 +1232,8 @@ ntq_emit_comparison(struct v3d_compile *c,
|
|||
static struct nir_alu_instr *
|
||||
ntq_get_alu_parent(nir_src src)
|
||||
{
|
||||
if (!src.is_ssa || src.ssa->parent_instr->type != nir_instr_type_alu)
|
||||
assert(src.is_ssa);
|
||||
if (src.ssa->parent_instr->type != nir_instr_type_alu)
|
||||
return NULL;
|
||||
nir_alu_instr *instr = nir_instr_as_alu(src.ssa->parent_instr);
|
||||
if (!instr)
|
||||
|
|
@ -1229,7 +1244,8 @@ ntq_get_alu_parent(nir_src src)
|
|||
* src.
|
||||
*/
|
||||
for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
|
||||
if (!instr->src[i].src.is_ssa)
|
||||
assert(instr->src[i].src.is_ssa);
|
||||
if (nir_load_reg_for_def(instr->src[i].src.ssa))
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
@ -2466,17 +2482,19 @@ ntq_setup_outputs(struct v3d_compile *c)
|
|||
* Each nir_register gets a struct qreg per 32-bit component being stored.
|
||||
*/
|
||||
static void
|
||||
ntq_setup_registers(struct v3d_compile *c, struct exec_list *list)
|
||||
ntq_setup_registers(struct v3d_compile *c, nir_function_impl *impl)
|
||||
{
|
||||
foreach_list_typed(nir_register, nir_reg, node, list) {
|
||||
unsigned array_len = MAX2(nir_reg->num_array_elems, 1);
|
||||
nir_foreach_reg_decl(decl, impl) {
|
||||
unsigned num_components = nir_intrinsic_num_components(decl);
|
||||
unsigned array_len = nir_intrinsic_num_array_elems(decl);
|
||||
array_len = MAX2(array_len, 1);
|
||||
struct qreg *qregs = ralloc_array(c->def_ht, struct qreg,
|
||||
array_len *
|
||||
nir_reg->num_components);
|
||||
array_len * num_components);
|
||||
|
||||
nir_ssa_def *nir_reg = &decl->dest.ssa;
|
||||
_mesa_hash_table_insert(c->def_ht, nir_reg, qregs);
|
||||
|
||||
for (int i = 0; i < array_len * nir_reg->num_components; i++)
|
||||
for (int i = 0; i < array_len * num_components; i++)
|
||||
qregs[i] = vir_get_temp(c);
|
||||
}
|
||||
}
|
||||
|
|
@ -3299,6 +3317,11 @@ static void
|
|||
ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
||||
{
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_decl_reg:
|
||||
case nir_intrinsic_load_reg:
|
||||
case nir_intrinsic_store_reg:
|
||||
break; /* Ignore these */
|
||||
|
||||
case nir_intrinsic_load_uniform:
|
||||
ntq_emit_load_uniform(c, instr);
|
||||
break;
|
||||
|
|
@ -3827,6 +3850,17 @@ is_cheap_block(nir_block *block)
|
|||
if (--cost <= 0)
|
||||
return false;
|
||||
break;
|
||||
case nir_instr_type_intrinsic: {
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_decl_reg:
|
||||
case nir_intrinsic_load_reg:
|
||||
case nir_intrinsic_store_reg:
|
||||
continue;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
@ -4310,7 +4344,7 @@ ntq_emit_cf_list(struct v3d_compile *c, struct exec_list *list)
|
|||
static void
|
||||
ntq_emit_impl(struct v3d_compile *c, nir_function_impl *impl)
|
||||
{
|
||||
ntq_setup_registers(c, &impl->registers);
|
||||
ntq_setup_registers(c, impl);
|
||||
ntq_emit_cf_list(c, &impl->body);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -135,9 +135,7 @@ v3d33_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
|
|||
* instruction writes and how many the instruction could produce.
|
||||
*/
|
||||
p1_unpacked.return_words_of_texture_data =
|
||||
instr->dest.is_ssa ?
|
||||
nir_ssa_def_components_read(&instr->dest.ssa) :
|
||||
(1 << instr->dest.reg.reg->num_components) - 1;
|
||||
nir_ssa_def_components_read(&instr->dest.ssa);
|
||||
|
||||
uint32_t p0_packed;
|
||||
V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_pack(NULL,
|
||||
|
|
|
|||
|
|
@ -244,10 +244,17 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
|
|||
/* Limit the number of channels returned to both how many the NIR
|
||||
* instruction writes and how many the instruction could produce.
|
||||
*/
|
||||
if (instr->dest.is_ssa) {
|
||||
assert(instr->dest.is_ssa);
|
||||
nir_intrinsic_instr *store = nir_store_reg_for_def(&instr->dest.ssa);
|
||||
if (store == NULL) {
|
||||
p0_unpacked.return_words_of_texture_data =
|
||||
nir_ssa_def_components_read(&instr->dest.ssa);
|
||||
} else {
|
||||
nir_ssa_def *reg = store->src[1].ssa;
|
||||
nir_intrinsic_instr *decl = nir_reg_get_decl(reg);
|
||||
unsigned reg_num_components =
|
||||
nir_intrinsic_num_components(decl);
|
||||
|
||||
/* For the non-ssa case we don't have a full equivalent to
|
||||
* nir_ssa_def_components_read. This is a problem for the 16
|
||||
* bit case. nir_lower_tex will not change the destination as
|
||||
|
|
@ -256,8 +263,8 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
|
|||
* manually ensure that here.
|
||||
*/
|
||||
uint32_t num_components = output_type_32_bit ?
|
||||
MIN2(instr->dest.reg.reg->num_components, 4) :
|
||||
MIN2(instr->dest.reg.reg->num_components, 2);
|
||||
MIN2(reg_num_components, 4) :
|
||||
MIN2(reg_num_components, 2);
|
||||
|
||||
p0_unpacked.return_words_of_texture_data = (1 << num_components) - 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -72,10 +72,11 @@ lower_line_smooth_func(struct lower_line_smooth_state *state,
|
|||
|
||||
if (intr->intrinsic != nir_intrinsic_store_output ||
|
||||
nir_intrinsic_base(intr) != 0 ||
|
||||
intr->num_components != 4 ||
|
||||
!intr->src[0].is_ssa)
|
||||
intr->num_components != 4)
|
||||
continue;
|
||||
|
||||
assert(intr->src[0].is_ssa);
|
||||
|
||||
lower_line_smooth_intrinsic(state, &b, intr);
|
||||
progress = true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1167,9 +1167,13 @@ v3d_instr_delay_cb(nir_instr *instr, void *data)
|
|||
* we are trying to strike a balance based on empirical testing.
|
||||
*/
|
||||
case nir_instr_type_intrinsic: {
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
if (!c->disable_general_tmu_sched) {
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_decl_reg:
|
||||
case nir_intrinsic_load_reg:
|
||||
case nir_intrinsic_store_reg:
|
||||
return 0;
|
||||
case nir_intrinsic_load_ssbo:
|
||||
case nir_intrinsic_load_scratch:
|
||||
case nir_intrinsic_load_shared:
|
||||
|
|
@ -1183,7 +1187,14 @@ v3d_instr_delay_cb(nir_instr *instr, void *data)
|
|||
return 1;
|
||||
}
|
||||
} else {
|
||||
return 1;
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_decl_reg:
|
||||
case nir_intrinsic_load_reg:
|
||||
case nir_intrinsic_store_reg:
|
||||
return 0;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
@ -1624,7 +1635,7 @@ v3d_attempt_compile(struct v3d_compile *c)
|
|||
NIR_PASS(_, c->s, nir_lower_bool_to_int32);
|
||||
NIR_PASS(_, c->s, nir_convert_to_lcssa, true, true);
|
||||
NIR_PASS_V(c->s, nir_divergence_analysis);
|
||||
NIR_PASS(_, c->s, nir_convert_from_ssa, true, false);
|
||||
NIR_PASS(_, c->s, nir_convert_from_ssa, true, true);
|
||||
|
||||
struct nir_schedule_options schedule_options = {
|
||||
/* Schedule for about half our register space, to enable more
|
||||
|
|
@ -1659,6 +1670,8 @@ v3d_attempt_compile(struct v3d_compile *c)
|
|||
nir_move_const_undef |
|
||||
buffer_opts);
|
||||
|
||||
NIR_PASS_V(c->s, nir_trivialize_registers);
|
||||
|
||||
v3d_nir_to_vir(c);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue