diff --git a/src/broadcom/ci/traces-broadcom.yml b/src/broadcom/ci/traces-broadcom.yml index a763a027079..d575656a386 100644 --- a/src/broadcom/ci/traces-broadcom.yml +++ b/src/broadcom/ci/traces-broadcom.yml @@ -120,7 +120,7 @@ traces: minetest/minetest-v2.trace: broadcom-rpi4: - checksum: 0e00019366845e1fae9727df8b24c149 + checksum: 09e693eae151f992c621ad8d0da3151d neverball/neverball-v2.trace: broadcom-rpi4: diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 9935d95ddb7..1bc11509575 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -307,8 +307,13 @@ ntq_add_pending_tmu_flush(struct v3d_compile *c, if (num_components > 0) { c->tmu.output_fifo_size += num_components; - if (!dest->is_ssa) - _mesa_set_add(c->tmu.outstanding_regs, dest->reg.reg); + + assert(dest->is_ssa); + nir_intrinsic_instr *store = nir_store_reg_for_def(&dest->ssa); + if (store != NULL) { + nir_ssa_def *reg = store->src[1].ssa; + _mesa_set_add(c->tmu.outstanding_regs, reg); + } } c->tmu.flush[c->tmu.flush_count].dest = dest; @@ -772,7 +777,9 @@ ntq_store_dest(struct v3d_compile *c, nir_dest *dest, int chan, assert(result.file == QFILE_TEMP && last_inst && (last_inst == c->defs[result.index] || is_reused_uniform)); - if (dest->is_ssa) { + assert(dest->is_ssa); + nir_intrinsic_instr *store = nir_store_reg_for_def(&dest->ssa); + if (store == NULL) { assert(chan < dest->ssa.num_components); struct qreg *qregs; @@ -786,8 +793,10 @@ ntq_store_dest(struct v3d_compile *c, nir_dest *dest, int chan, qregs[chan] = result; } else { - nir_register *reg = dest->reg.reg; - assert(reg->num_array_elems == 0); + nir_ssa_def *reg = store->src[1].ssa; + ASSERTED nir_intrinsic_instr *decl = nir_reg_get_decl(reg); + assert(nir_intrinsic_base(store) == 0); + assert(nir_intrinsic_num_array_elems(decl) == 0); struct hash_entry *entry = _mesa_hash_table_search(c->def_ht, reg); struct qreg *qregs = entry->data; @@ -842,7 +851,10 @@ struct qreg ntq_get_src(struct v3d_compile *c, nir_src src, int i) { struct hash_entry *entry; - if (src.is_ssa) { + + assert(src.is_ssa); + nir_intrinsic_instr *load = nir_load_reg_for_def(src.ssa); + if (load == NULL) { assert(i < src.ssa->num_components); entry = _mesa_hash_table_search(c->def_ht, src.ssa); @@ -851,9 +863,11 @@ ntq_get_src(struct v3d_compile *c, nir_src src, int i) entry = _mesa_hash_table_search(c->def_ht, src.ssa); } } else { - nir_register *reg = src.reg.reg; - assert(reg->num_array_elems == 0); - assert(i < reg->num_components); + nir_ssa_def *reg = load->src[0].ssa; + ASSERTED nir_intrinsic_instr *decl = nir_reg_get_decl(reg); + assert(nir_intrinsic_base(load) == 0); + assert(nir_intrinsic_num_array_elems(decl) == 0); + assert(i < nir_intrinsic_num_components(decl)); if (_mesa_set_search(c->tmu.outstanding_regs, reg)) ntq_flush_tmu(c); @@ -1218,7 +1232,8 @@ ntq_emit_comparison(struct v3d_compile *c, static struct nir_alu_instr * ntq_get_alu_parent(nir_src src) { - if (!src.is_ssa || src.ssa->parent_instr->type != nir_instr_type_alu) + assert(src.is_ssa); + if (src.ssa->parent_instr->type != nir_instr_type_alu) return NULL; nir_alu_instr *instr = nir_instr_as_alu(src.ssa->parent_instr); if (!instr) @@ -1229,7 +1244,8 @@ ntq_get_alu_parent(nir_src src) * src. */ for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { - if (!instr->src[i].src.is_ssa) + assert(instr->src[i].src.is_ssa); + if (nir_load_reg_for_def(instr->src[i].src.ssa)) return NULL; } @@ -2466,17 +2482,19 @@ ntq_setup_outputs(struct v3d_compile *c) * Each nir_register gets a struct qreg per 32-bit component being stored. */ static void -ntq_setup_registers(struct v3d_compile *c, struct exec_list *list) +ntq_setup_registers(struct v3d_compile *c, nir_function_impl *impl) { - foreach_list_typed(nir_register, nir_reg, node, list) { - unsigned array_len = MAX2(nir_reg->num_array_elems, 1); + nir_foreach_reg_decl(decl, impl) { + unsigned num_components = nir_intrinsic_num_components(decl); + unsigned array_len = nir_intrinsic_num_array_elems(decl); + array_len = MAX2(array_len, 1); struct qreg *qregs = ralloc_array(c->def_ht, struct qreg, - array_len * - nir_reg->num_components); + array_len * num_components); + nir_ssa_def *nir_reg = &decl->dest.ssa; _mesa_hash_table_insert(c->def_ht, nir_reg, qregs); - for (int i = 0; i < array_len * nir_reg->num_components; i++) + for (int i = 0; i < array_len * num_components; i++) qregs[i] = vir_get_temp(c); } } @@ -3299,6 +3317,11 @@ static void ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) { switch (instr->intrinsic) { + case nir_intrinsic_decl_reg: + case nir_intrinsic_load_reg: + case nir_intrinsic_store_reg: + break; /* Ignore these */ + case nir_intrinsic_load_uniform: ntq_emit_load_uniform(c, instr); break; @@ -3827,6 +3850,17 @@ is_cheap_block(nir_block *block) if (--cost <= 0) return false; break; + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + switch (intr->intrinsic) { + case nir_intrinsic_decl_reg: + case nir_intrinsic_load_reg: + case nir_intrinsic_store_reg: + continue; + default: + return false; + } + } default: return false; } @@ -4310,7 +4344,7 @@ ntq_emit_cf_list(struct v3d_compile *c, struct exec_list *list) static void ntq_emit_impl(struct v3d_compile *c, nir_function_impl *impl) { - ntq_setup_registers(c, &impl->registers); + ntq_setup_registers(c, impl); ntq_emit_cf_list(c, &impl->body); } diff --git a/src/broadcom/compiler/v3d33_tex.c b/src/broadcom/compiler/v3d33_tex.c index 47bf2458d27..b4b10f77eb6 100644 --- a/src/broadcom/compiler/v3d33_tex.c +++ b/src/broadcom/compiler/v3d33_tex.c @@ -135,9 +135,7 @@ v3d33_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) * instruction writes and how many the instruction could produce. */ p1_unpacked.return_words_of_texture_data = - instr->dest.is_ssa ? - nir_ssa_def_components_read(&instr->dest.ssa) : - (1 << instr->dest.reg.reg->num_components) - 1; + nir_ssa_def_components_read(&instr->dest.ssa); uint32_t p0_packed; V3D33_TEXTURE_UNIFORM_PARAMETER_0_CFG_MODE1_pack(NULL, diff --git a/src/broadcom/compiler/v3d40_tex.c b/src/broadcom/compiler/v3d40_tex.c index a20ee110a23..8f8983b15ff 100644 --- a/src/broadcom/compiler/v3d40_tex.c +++ b/src/broadcom/compiler/v3d40_tex.c @@ -244,10 +244,17 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) /* Limit the number of channels returned to both how many the NIR * instruction writes and how many the instruction could produce. */ - if (instr->dest.is_ssa) { + assert(instr->dest.is_ssa); + nir_intrinsic_instr *store = nir_store_reg_for_def(&instr->dest.ssa); + if (store == NULL) { p0_unpacked.return_words_of_texture_data = nir_ssa_def_components_read(&instr->dest.ssa); } else { + nir_ssa_def *reg = store->src[1].ssa; + nir_intrinsic_instr *decl = nir_reg_get_decl(reg); + unsigned reg_num_components = + nir_intrinsic_num_components(decl); + /* For the non-ssa case we don't have a full equivalent to * nir_ssa_def_components_read. This is a problem for the 16 * bit case. nir_lower_tex will not change the destination as @@ -256,8 +263,8 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr) * manually ensure that here. */ uint32_t num_components = output_type_32_bit ? - MIN2(instr->dest.reg.reg->num_components, 4) : - MIN2(instr->dest.reg.reg->num_components, 2); + MIN2(reg_num_components, 4) : + MIN2(reg_num_components, 2); p0_unpacked.return_words_of_texture_data = (1 << num_components) - 1; } diff --git a/src/broadcom/compiler/v3d_nir_lower_line_smooth.c b/src/broadcom/compiler/v3d_nir_lower_line_smooth.c index 32ccad6a0a5..750ee4d32cd 100644 --- a/src/broadcom/compiler/v3d_nir_lower_line_smooth.c +++ b/src/broadcom/compiler/v3d_nir_lower_line_smooth.c @@ -72,10 +72,11 @@ lower_line_smooth_func(struct lower_line_smooth_state *state, if (intr->intrinsic != nir_intrinsic_store_output || nir_intrinsic_base(intr) != 0 || - intr->num_components != 4 || - !intr->src[0].is_ssa) + intr->num_components != 4) continue; + assert(intr->src[0].is_ssa); + lower_line_smooth_intrinsic(state, &b, intr); progress = true; } diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 660b11b0577..019c86e51c8 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -1167,9 +1167,13 @@ v3d_instr_delay_cb(nir_instr *instr, void *data) * we are trying to strike a balance based on empirical testing. */ case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); if (!c->disable_general_tmu_sched) { - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); switch (intr->intrinsic) { + case nir_intrinsic_decl_reg: + case nir_intrinsic_load_reg: + case nir_intrinsic_store_reg: + return 0; case nir_intrinsic_load_ssbo: case nir_intrinsic_load_scratch: case nir_intrinsic_load_shared: @@ -1183,7 +1187,14 @@ v3d_instr_delay_cb(nir_instr *instr, void *data) return 1; } } else { - return 1; + switch (intr->intrinsic) { + case nir_intrinsic_decl_reg: + case nir_intrinsic_load_reg: + case nir_intrinsic_store_reg: + return 0; + default: + return 1; + } } break; } @@ -1624,7 +1635,7 @@ v3d_attempt_compile(struct v3d_compile *c) NIR_PASS(_, c->s, nir_lower_bool_to_int32); NIR_PASS(_, c->s, nir_convert_to_lcssa, true, true); NIR_PASS_V(c->s, nir_divergence_analysis); - NIR_PASS(_, c->s, nir_convert_from_ssa, true, false); + NIR_PASS(_, c->s, nir_convert_from_ssa, true, true); struct nir_schedule_options schedule_options = { /* Schedule for about half our register space, to enable more @@ -1659,6 +1670,8 @@ v3d_attempt_compile(struct v3d_compile *c) nir_move_const_undef | buffer_opts); + NIR_PASS_V(c->s, nir_trivialize_registers); + v3d_nir_to_vir(c); }