From e5ee15a42ebb4ca48d0201f03c46d67d8d02aa3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sun, 5 Jan 2025 11:37:13 -0500 Subject: [PATCH] radeonsi: gather PS inputs from shader variant NIR This further reduces dependence on si_shader_info. union si_ps_input_info is added because we don't need usage_mask in there. Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_shader.c | 91 +++++++++++++++---- src/gallium/drivers/radeonsi/si_shader.h | 11 ++- .../drivers/radeonsi/si_state_shaders.cpp | 2 +- 3 files changed, 83 insertions(+), 21 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 034c3875577..c1dfd74dddd 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2752,14 +2752,78 @@ si_get_shader_variant_info(struct si_shader *shader, nir_shader *nir) /* Find out which frag coord components are used. */ uint8_t frag_coord_mask = 0; - if (BITSET_TEST(sysvals, SYSTEM_VALUE_FRAG_COORD)) { - nir_foreach_block(block, nir_shader_get_entrypoint(nir)) { - nir_foreach_instr(instr, block) { - if (instr->type == nir_instr_type_intrinsic) { - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - if (intr->intrinsic == nir_intrinsic_load_frag_coord || - intr->intrinsic == nir_intrinsic_load_sample_pos) - frag_coord_mask |= nir_def_components_read(&intr->def); + /* Since flat+convergent and non-flat components can occur in the same vec4, start with + * all PS inputs as flat and change them to smooth when we find a component that's + * interpolated. + */ + for (unsigned i = 0; i < ARRAY_SIZE(shader->info.ps_inputs); i++) + shader->info.ps_inputs[i].interpolate = INTERP_MODE_FLAT; + + nir_foreach_block(block, nir_shader_get_entrypoint(nir)) { + nir_foreach_instr(instr, block) { + if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + switch (intr->intrinsic) { + case nir_intrinsic_load_frag_coord: + case nir_intrinsic_load_sample_pos: + frag_coord_mask |= nir_def_components_read(&intr->def); + break; + case nir_intrinsic_load_input: + case nir_intrinsic_load_interpolated_input: { + nir_io_semantics sem = nir_intrinsic_io_semantics(intr); + unsigned index = nir_intrinsic_base(intr); + assert(sem.num_slots == 1); + + shader->info.num_ps_inputs = MAX2(shader->info.num_ps_inputs, index + 1); + shader->info.ps_inputs[index].semantic = sem.location; + /* Determine interpolation mode. This only cares about FLAT/SMOOTH/COLOR. + * COLOR is only for nir_intrinsic_load_color0/1. + */ + if (intr->intrinsic == nir_intrinsic_load_interpolated_input) { + shader->info.ps_inputs[index].interpolate = INTERP_MODE_SMOOTH; + if (intr->def.bit_size == 16) + shader->info.ps_inputs[index].fp16_lo_hi_valid |= 0x1 << sem.high_16bits; + } + break; + } + case nir_intrinsic_load_color0: + assert(!shader->is_monolithic); + shader->info.ps_colors_read |= nir_def_components_read(&intr->def); + break; + case nir_intrinsic_load_color1: + assert(!shader->is_monolithic); + shader->info.ps_colors_read |= nir_def_components_read(&intr->def) << 4; + break; + default: + break; + } + } + } + } + + /* Add both front and back color inputs. */ + if (!shader->is_monolithic) { + unsigned index = shader->info.num_ps_inputs; + + for (unsigned back = 0; back < 2; back++) { + for (unsigned i = 0; i < 2; i++) { + if ((shader->info.ps_colors_read >> (i * 4)) & 0xf) { + assert(index < ARRAY_SIZE(shader->info.ps_inputs)); + shader->info.ps_inputs[index].semantic = + (back ? VARYING_SLOT_BFC0 : VARYING_SLOT_COL0) + i; + + enum glsl_interp_mode mode = i ? nir->info.fs.color1_interp + : nir->info.fs.color0_interp; + shader->info.ps_inputs[index].interpolate = + mode == INTERP_MODE_NONE ? INTERP_MODE_COLOR : mode; + index++; + + /* Back-face colors don't increment num_ps_inputs. si_emit_spi_map will use + * back-face colors conditionally only when needed. + */ + if (!back) + shader->info.num_ps_inputs++; } } } @@ -2844,17 +2908,6 @@ static void get_nir_shaders(struct si_shader *shader, struct si_linked_shaders * NIR_PASS_V(linked->consumer.nir, nir_recompute_io_bases, nir_var_shader_in); si_get_shader_variant_info(shader, linked->consumer.nir); - - struct si_shader_info info; - si_nir_scan_shader(shader->selector->screen, linked->consumer.nir, &info, - shader->is_monolithic); - - shader->info.num_ps_inputs = info.num_inputs; - shader->info.ps_colors_read = info.colors_read; - - /* A non-monolithic PS doesn't know if back colors are enabled, so copy 2 more. */ - unsigned max_interp = MIN2(info.num_inputs + 2, SI_NUM_INTERP); - memcpy(shader->info.ps_inputs, info.input, max_interp * sizeof(info.input[0])); } for (unsigned i = 0; i < SI_NUM_LINKED_SHADERS; i++) { diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 81a8f2dda03..8a7196acdaf 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -453,6 +453,15 @@ enum si_color_output_type { SI_TYPE_UINT16, }; +union si_ps_input_info { + struct { + uint8_t semantic; + uint8_t interpolate; + uint8_t fp16_lo_hi_valid; + }; + uint32_t _unused; /* this just forces 4-byte alignment */ +}; + union si_input_info { struct { uint8_t semantic; @@ -864,7 +873,7 @@ union si_shader_key { struct si_shader_binary_info { uint8_t vs_output_param_offset[NUM_TOTAL_VARYING_SLOTS]; uint32_t vs_output_ps_input_cntl[NUM_TOTAL_VARYING_SLOTS]; - union si_input_info ps_inputs[SI_NUM_INTERP]; + union si_ps_input_info ps_inputs[SI_NUM_INTERP]; uint8_t num_ps_inputs; uint8_t ps_colors_read; uint8_t num_input_sgprs; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index a89d5edca57..20dd6844a90 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -5049,7 +5049,7 @@ static void si_emit_spi_map(struct si_context *sctx, unsigned index) struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; for (unsigned i = 0; i < NUM_INTERP; i++) { - union si_input_info input = ps->info.ps_inputs[i]; + union si_ps_input_info input = ps->info.ps_inputs[i]; unsigned ps_input_cntl = vs->info.vs_output_ps_input_cntl[input.semantic]; bool non_default_val = G_028644_OFFSET(ps_input_cntl) != 0x20;