diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 6b0c2d401d4..1b2bb282089 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2978,6 +2978,15 @@ si_get_shader_variant_info(struct si_shader *shader, nir_shader *nir) SI_SPI_PS_INPUT_ADDR_FOR_PROLOG; } } + + if (nir->info.stage <= MESA_SHADER_GEOMETRY && nir->xfb_info && + !shader->key.ge.as_ls && !shader->key.ge.as_es) { + unsigned num_streamout_dwords = 0; + + for (unsigned i = 0; i < 4; i++) + num_streamout_dwords += nir->info.xfb_stride[i]; + shader->info.num_streamout_vec4s = DIV_ROUND_UP(num_streamout_dwords, 4); + } } /* Late shader variant info for AMD-specific intrinsics. */ @@ -3073,6 +3082,7 @@ si_nir_generate_gs_copy_shader(struct si_screen *sscreen, shader->selector = gs_selector; shader->is_gs_copy_shader = true; shader->wave_size = si_determine_wave_size(sscreen, shader); + shader->info.num_streamout_vec4s = gs_shader->info.num_streamout_vec4s; STATIC_ASSERT(sizeof(shader->info.vs_output_param_offset[0]) == 1); memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_DEFAULT_VAL_0000, diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index d2546cb57da..b5f7a2f8c4d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -478,9 +478,7 @@ struct si_shader_info { uint8_t output_usagemask[PIPE_MAX_SHADER_OUTPUTS]; uint8_t output_streams[PIPE_MAX_SHADER_OUTPUTS]; uint8_t output_type[PIPE_MAX_SHADER_OUTPUTS]; /* enum nir_alu_type */ - uint8_t output_xfb_writemask[PIPE_MAX_SHADER_OUTPUTS]; - uint8_t num_streamout_components; uint8_t num_vs_inputs; uint8_t num_vbos_in_user_sgprs; uint8_t num_stream_output_components[4]; /* for GS streams, not streamout */ @@ -883,6 +881,7 @@ struct si_shader_binary_info { bool uses_discard : 1; uint8_t nr_pos_exports; uint8_t nr_param_exports; + uint8_t num_streamout_vec4s; unsigned private_mem_vgprs; unsigned max_simd_waves; }; diff --git a/src/gallium/drivers/radeonsi/si_shader_info.c b/src/gallium/drivers/radeonsi/si_shader_info.c index 8c02f643507..b8bc31c24f5 100644 --- a/src/gallium/drivers/radeonsi/si_shader_info.c +++ b/src/gallium/drivers/radeonsi/si_shader_info.c @@ -184,8 +184,6 @@ static void scan_io_usage(const nir_shader *nir, struct si_shader_info *info, info->enabled_streamout_buffer_mask |= BITFIELD_BIT(stream * 4 + xfb.out[i % 2].buffer); } - - info->output_xfb_writemask[loc] |= nir_instr_xfb_write_mask(intr); } } @@ -506,13 +504,6 @@ void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir, scan_instruction(nir, info, instr, colors_lowered); } - if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL || - nir->info.stage == MESA_SHADER_GEOMETRY) { - info->num_streamout_components = 0; - for (unsigned i = 0; i < info->num_outputs; i++) - info->num_streamout_components += util_bitcount(info->output_xfb_writemask[i]); - } - if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) { /* Add the PrimitiveID output, but don't increment num_outputs. * The driver inserts PrimitiveID only when it's used by the pixel shader, diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 8072d65d23b..205e57fe491 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -1633,7 +1633,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader /* This tuning adds up to 50% streamout performance. */ if (si_shader_uses_streamout(shader)) { - unsigned num_streamout_vec4s = DIV_ROUND_UP(shader->selector->info.num_streamout_components, 4); + unsigned num_streamout_vec4s = shader->info.num_streamout_vec4s; /* TODO: Tested on a pre-production chip. Re-test on the final chip. */ if (num_streamout_vec4s <= 4)