From c4b45f1ec86b0dffb5780052760b1b444c475ad3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Fri, 13 Dec 2024 07:01:42 +0100 Subject: [PATCH] ac/nir: Pass ac_nir_prerast_out to ac_nir_export_position. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In a subsequent commit, ac_nir_export_position will start using other fields from ac_nir_prerast_out. Signed-off-by: Timur Kristóf Acked-by: Marek Olšák Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/common/ac_nir.c | 36 +++++++++++++++---------------- src/amd/common/ac_nir_helpers.h | 2 +- src/amd/common/ac_nir_lower_ngg.c | 14 ++++++------ 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/src/amd/common/ac_nir.c b/src/amd/common/ac_nir.c index 32b5b3b8499..efff6643b3a 100644 --- a/src/amd/common/ac_nir.c +++ b/src/amd/common/ac_nir.c @@ -367,7 +367,7 @@ ac_nir_export_position(nir_builder *b, bool force_vrs, bool done, uint64_t outputs_written, - nir_def *(*outputs)[4], + ac_nir_prerast_out *out, nir_def *row) { nir_intrinsic_instr *exp[4]; @@ -379,7 +379,7 @@ ac_nir_export_position(nir_builder *b, * Setting valid_mask=1 prevents it and has no other effect. */ const unsigned pos_flags = gfx_level == GFX10 ? AC_EXP_FLAG_VALID_MASK : 0; - nir_def *pos = get_pos0_output(b, outputs[VARYING_SLOT_POS]); + nir_def *pos = get_pos0_output(b, out->outputs[VARYING_SLOT_POS]); exp[exp_num] = export(b, pos, row, V_008DFC_SQ_EXP_POS + exp_num, pos_flags, 0xf); exp_num++; @@ -395,15 +395,15 @@ ac_nir_export_position(nir_builder *b, VARYING_BIT_PRIMITIVE_SHADING_RATE; /* clear output mask if no one written */ - if (!outputs[VARYING_SLOT_PSIZ][0]) + if (!out->outputs[VARYING_SLOT_PSIZ][0]) outputs_written &= ~VARYING_BIT_PSIZ; - if (!outputs[VARYING_SLOT_EDGE][0]) + if (!out->outputs[VARYING_SLOT_EDGE][0]) outputs_written &= ~VARYING_BIT_EDGE; - if (!outputs[VARYING_SLOT_PRIMITIVE_SHADING_RATE][0]) + if (!out->outputs[VARYING_SLOT_PRIMITIVE_SHADING_RATE][0]) outputs_written &= ~VARYING_BIT_PRIMITIVE_SHADING_RATE; - if (!outputs[VARYING_SLOT_LAYER][0]) + if (!out->outputs[VARYING_SLOT_LAYER][0]) outputs_written &= ~VARYING_BIT_LAYER; - if (!outputs[VARYING_SLOT_VIEWPORT][0]) + if (!out->outputs[VARYING_SLOT_VIEWPORT][0]) outputs_written &= ~VARYING_BIT_VIEWPORT; if ((outputs_written & mask) || force_vrs) { @@ -412,21 +412,21 @@ ac_nir_export_position(nir_builder *b, unsigned write_mask = 0; if (outputs_written & VARYING_BIT_PSIZ) { - vec[0] = outputs[VARYING_SLOT_PSIZ][0]; + vec[0] = out->outputs[VARYING_SLOT_PSIZ][0]; write_mask |= BITFIELD_BIT(0); } if (outputs_written & VARYING_BIT_EDGE) { - vec[1] = nir_umin(b, outputs[VARYING_SLOT_EDGE][0], nir_imm_int(b, 1)); + vec[1] = nir_umin(b, out->outputs[VARYING_SLOT_EDGE][0], nir_imm_int(b, 1)); write_mask |= BITFIELD_BIT(1); } nir_def *rates = NULL; if (outputs_written & VARYING_BIT_PRIMITIVE_SHADING_RATE) { - rates = outputs[VARYING_SLOT_PRIMITIVE_SHADING_RATE][0]; + rates = out->outputs[VARYING_SLOT_PRIMITIVE_SHADING_RATE][0]; } else if (force_vrs) { /* If Pos.W != 1 (typical for non-GUI elements), use coarse shading. */ - nir_def *pos_w = outputs[VARYING_SLOT_POS][3]; + nir_def *pos_w = out->outputs[VARYING_SLOT_POS][3]; pos_w = pos_w ? nir_u2u32(b, pos_w) : nir_imm_float(b, 1.0); nir_def *cond = nir_fneu_imm(b, pos_w, 1); rates = nir_bcsel(b, cond, nir_load_force_vrs_rates_amd(b), nir_imm_int(b, 0)); @@ -438,18 +438,18 @@ ac_nir_export_position(nir_builder *b, } if (outputs_written & VARYING_BIT_LAYER) { - vec[2] = outputs[VARYING_SLOT_LAYER][0]; + vec[2] = out->outputs[VARYING_SLOT_LAYER][0]; write_mask |= BITFIELD_BIT(2); } if (outputs_written & VARYING_BIT_VIEWPORT) { if (gfx_level >= GFX9) { /* GFX9 has the layer in [10:0] and the viewport index in [19:16]. */ - nir_def *v = nir_ishl_imm(b, outputs[VARYING_SLOT_VIEWPORT][0], 16); + nir_def *v = nir_ishl_imm(b, out->outputs[VARYING_SLOT_VIEWPORT][0], 16); vec[2] = nir_ior(b, vec[2], v); write_mask |= BITFIELD_BIT(2); } else { - vec[3] = outputs[VARYING_SLOT_VIEWPORT][0]; + vec[3] = out->outputs[VARYING_SLOT_VIEWPORT][0]; write_mask |= BITFIELD_BIT(3); } } @@ -464,7 +464,7 @@ ac_nir_export_position(nir_builder *b, if ((outputs_written & (VARYING_BIT_CLIP_DIST0 << i)) && (clip_cull_mask & BITFIELD_RANGE(i * 4, 4))) { exp[exp_num] = export( - b, get_export_output(b, outputs[VARYING_SLOT_CLIP_DIST0 + i]), row, + b, get_export_output(b, out->outputs[VARYING_SLOT_CLIP_DIST0 + i]), row, V_008DFC_SQ_EXP_POS + exp_num + exp_pos_offset, 0, (clip_cull_mask >> (i * 4)) & 0xf); exp_num++; @@ -472,7 +472,7 @@ ac_nir_export_position(nir_builder *b, } if (outputs_written & VARYING_BIT_CLIP_VERTEX) { - nir_def *vtx = get_export_output(b, outputs[VARYING_SLOT_CLIP_VERTEX]); + nir_def *vtx = get_export_output(b, out->outputs[VARYING_SLOT_CLIP_VERTEX]); /* Clip distance for clip vertex to each user clip plane. */ nir_def *clip_dist[8] = {0}; @@ -879,7 +879,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, export_outputs &= ~VARYING_BIT_LAYER; ac_nir_export_position(&b, gfx_level, clip_cull_mask, !has_param_exports, - force_vrs, true, export_outputs, out.outputs, NULL); + force_vrs, true, export_outputs, &out, NULL); if (has_param_exports) { ac_nir_export_parameters(&b, param_offsets, @@ -966,7 +966,7 @@ ac_nir_lower_legacy_vs(nir_shader *nir, export_outputs &= ~VARYING_BIT_LAYER; ac_nir_export_position(&b, gfx_level, clip_cull_mask, !has_param_exports, - force_vrs, true, export_outputs, out.outputs, NULL); + force_vrs, true, export_outputs, &out, NULL); if (has_param_exports) { ac_nir_export_parameters(&b, param_offsets, diff --git a/src/amd/common/ac_nir_helpers.h b/src/amd/common/ac_nir_helpers.h index 7b534b3070e..604e502d812 100644 --- a/src/amd/common/ac_nir_helpers.h +++ b/src/amd/common/ac_nir_helpers.h @@ -99,7 +99,7 @@ ac_nir_export_position(nir_builder *b, bool force_vrs, bool done, uint64_t outputs_written, - nir_def *(*outputs)[4], + ac_nir_prerast_out *out, nir_def *row); void diff --git a/src/amd/common/ac_nir_lower_ngg.c b/src/amd/common/ac_nir_lower_ngg.c index bb2b1e58871..66debc71355 100644 --- a/src/amd/common/ac_nir_lower_ngg.c +++ b/src/amd/common/ac_nir_lower_ngg.c @@ -2648,14 +2648,16 @@ export_pos0_wait_attr_ring(nir_builder *b, nir_if *if_es_thread, nir_def *output /* Export just the pos0 output. */ nir_if *if_export_empty_pos = nir_push_if(b, if_es_thread->condition.ssa); { - nir_def *pos_output_array[VARYING_SLOT_MAX][4] = {0}; - memcpy(pos_output_array[VARYING_SLOT_POS], pos_output.chan, sizeof(pos_output.chan)); + ac_nir_prerast_out out = { + .outputs = {{pos_output.chan[0], pos_output.chan[1], pos_output.chan[2], pos_output.chan[3]}}, + .infos = {{.components_mask = 0xf}}, + }; ac_nir_export_position(b, options->gfx_level, options->clip_cull_dist_mask, !options->has_param_exports, options->force_vrs, true, - VARYING_BIT_POS, pos_output_array, NULL); + VARYING_BIT_POS, &out, NULL); } nir_pop_if(b, if_export_empty_pos); } @@ -2916,7 +2918,7 @@ ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *option options->clip_cull_dist_mask, !options->has_param_exports, options->force_vrs, !wait_attr_ring, - export_outputs, state.out.outputs, NULL); + export_outputs, &state.out, NULL); nogs_export_vertex_params(b, impl, if_es_thread, num_es_threads, &state); @@ -3359,7 +3361,7 @@ ngg_gs_export_vertices(nir_builder *b, nir_def *max_num_out_vtx, nir_def *tid_in s->options->clip_cull_dist_mask, !s->options->has_param_exports, s->options->force_vrs, !wait_attr_ring, - export_outputs, s->out.outputs, NULL); + export_outputs, &s->out, NULL); nir_pop_if(b, if_vtx_export_thread); @@ -4628,7 +4630,7 @@ emit_ms_vertex(nir_builder *b, nir_def *index, nir_def *row, bool exports, bool if (exports) { ac_nir_export_position(b, s->gfx_level, s->clipdist_enable_mask, !s->has_param_exports, false, true, - s->per_vertex_outputs | VARYING_BIT_POS, s->out.outputs, row); + s->per_vertex_outputs | VARYING_BIT_POS, &s->out, row); } if (parameters) {