mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 16:08:04 +02:00
radeonsi: gather pipe_stream_output_info from NIR intrinsics
This stops pipe_stream_output_info from create_*s_state context functions because NIR contains everything and can do more advanced shader linking this way. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14414>
This commit is contained in:
parent
981bd8cbe2
commit
b57a163b7d
8 changed files with 41 additions and 35 deletions
|
|
@ -606,7 +606,7 @@ static unsigned ngg_nogs_vertex_size(struct si_shader *shader)
|
|||
|
||||
/* The edgeflag is always stored in the last element that's also
|
||||
* used for padding to reduce LDS bank conflicts. */
|
||||
if (shader->selector->so.num_outputs)
|
||||
if (shader->selector->info.enabled_streamout_buffer_mask)
|
||||
lds_vertex_size = 4 * shader->selector->info.num_outputs + 1;
|
||||
if (gfx10_ngg_writes_user_edgeflags(shader))
|
||||
lds_vertex_size = MAX2(lds_vertex_size, 1);
|
||||
|
|
@ -2169,7 +2169,7 @@ unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader)
|
|||
{
|
||||
const struct si_shader_selector *sel = shader->selector;
|
||||
|
||||
if (sel->info.stage == MESA_SHADER_GEOMETRY && sel->so.num_outputs)
|
||||
if (sel->info.stage == MESA_SHADER_GEOMETRY && sel->info.enabled_streamout_buffer_mask)
|
||||
return 44;
|
||||
|
||||
return 8;
|
||||
|
|
|
|||
|
|
@ -814,7 +814,7 @@ struct si_streamout {
|
|||
|
||||
/* External state which comes from the vertex shader,
|
||||
* it must be set explicitly when binding a shader. */
|
||||
uint16_t *stride_in_dw;
|
||||
uint8_t *stride_in_dw;
|
||||
unsigned enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
|
||||
|
||||
/* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@
|
|||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
#include "nir_serialize.h"
|
||||
#include "nir/nir_helpers.h"
|
||||
#include "si_pipe.h"
|
||||
#include "si_shader_internal.h"
|
||||
#include "sid.h"
|
||||
|
|
@ -1587,7 +1588,9 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
|
|||
bool free_nir;
|
||||
struct nir_shader *nir = si_get_nir_shader(sel, &shader->key, &free_nir);
|
||||
|
||||
struct pipe_stream_output_info so = sel->so;
|
||||
struct pipe_stream_output_info so = {};
|
||||
if (sel->info.enabled_streamout_buffer_mask)
|
||||
nir_gather_stream_output_info(nir, &so);
|
||||
|
||||
/* Dump NIR before doing NIR->LLVM conversion in case the
|
||||
* conversion fails. */
|
||||
|
|
@ -1616,7 +1619,7 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
|
|||
|
||||
/* The GS copy shader is compiled next. */
|
||||
if (sel->info.stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) {
|
||||
shader->gs_copy_shader = si_generate_gs_copy_shader(sscreen, compiler, sel, debug);
|
||||
shader->gs_copy_shader = si_generate_gs_copy_shader(sscreen, compiler, sel, &so, debug);
|
||||
if (!shader->gs_copy_shader) {
|
||||
fprintf(stderr, "radeonsi: can't create GS copy shader\n");
|
||||
return false;
|
||||
|
|
@ -2312,7 +2315,7 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
|
|||
shader->uses_vs_state_outprim = sscreen->use_ngg &&
|
||||
/* Only used by streamout in vertex shaders. */
|
||||
sel->info.stage == MESA_SHADER_VERTEX &&
|
||||
sel->so.num_outputs;
|
||||
sel->info.enabled_streamout_buffer_mask;
|
||||
|
||||
if (sel->info.stage == MESA_SHADER_VERTEX) {
|
||||
shader->uses_base_instance = sel->info.uses_base_instance ||
|
||||
|
|
|
|||
|
|
@ -367,6 +367,7 @@ struct si_shader_info {
|
|||
|
||||
int constbuf0_num_slots;
|
||||
ubyte num_stream_output_components[4];
|
||||
uint16_t enabled_streamout_buffer_mask;
|
||||
|
||||
uint num_memory_stores;
|
||||
|
||||
|
|
@ -459,7 +460,6 @@ struct si_shader_selector {
|
|||
void *nir_binary;
|
||||
unsigned nir_size;
|
||||
|
||||
struct pipe_stream_output_info so;
|
||||
struct si_shader_info info;
|
||||
|
||||
enum pipe_shader_type pipe_shader_type;
|
||||
|
|
@ -486,7 +486,6 @@ struct si_shader_selector {
|
|||
uint16_t gsvs_vertex_size;
|
||||
ubyte gs_input_verts_per_prim;
|
||||
unsigned max_gsvs_emit_size;
|
||||
uint16_t enabled_streamout_buffer_mask;
|
||||
bool tess_turns_off_ngg;
|
||||
|
||||
/* PS parameters. */
|
||||
|
|
@ -959,6 +958,7 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf
|
|||
struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
|
||||
struct ac_llvm_compiler *compiler,
|
||||
struct si_shader_selector *gs_selector,
|
||||
const struct pipe_stream_output_info *so,
|
||||
struct util_debug_callback *debug);
|
||||
|
||||
/* si_shader_nir.c */
|
||||
|
|
|
|||
|
|
@ -325,6 +325,7 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr
|
|||
(nir_intrinsic_component(intr) * 2);
|
||||
unsigned new_mask = mask & ~info->output_usagemask[loc];
|
||||
|
||||
/* Iterate over all components. */
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
unsigned stream = (gs_streams >> (i * 2)) & 0x3;
|
||||
|
||||
|
|
@ -332,6 +333,16 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr
|
|||
info->output_streams[loc] |= stream << (i * 2);
|
||||
info->num_stream_output_components[stream]++;
|
||||
}
|
||||
|
||||
if (nir_intrinsic_has_io_xfb(intr)) {
|
||||
nir_io_xfb xfb = i < 2 ? nir_intrinsic_io_xfb(intr) :
|
||||
nir_intrinsic_io_xfb2(intr);
|
||||
if (xfb.out[i % 2].num_components) {
|
||||
unsigned stream = (gs_streams >> (i * 2)) & 0x3;
|
||||
info->enabled_streamout_buffer_mask |=
|
||||
BITFIELD_BIT(stream * 4 + xfb.out[i % 2].buffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (nir_intrinsic_has_src_type(intr))
|
||||
|
|
|
|||
|
|
@ -422,6 +422,7 @@ void si_preload_gs_rings(struct si_shader_context *ctx)
|
|||
struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
|
||||
struct ac_llvm_compiler *compiler,
|
||||
struct si_shader_selector *gs_selector,
|
||||
const struct pipe_stream_output_info *so,
|
||||
struct util_debug_callback *debug)
|
||||
{
|
||||
struct si_shader_context ctx;
|
||||
|
|
@ -446,7 +447,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
|
|||
si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size);
|
||||
ctx.shader = shader;
|
||||
ctx.stage = MESA_SHADER_VERTEX;
|
||||
ctx.so = gs_selector->so;
|
||||
ctx.so = *so;
|
||||
|
||||
builder = ctx.ac.builder;
|
||||
|
||||
|
|
|
|||
|
|
@ -203,9 +203,6 @@ void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es,
|
|||
_mesa_sha1_init(&ctx);
|
||||
_mesa_sha1_update(&ctx, &shader_variant_flags, 4);
|
||||
_mesa_sha1_update(&ctx, ir_binary, ir_size);
|
||||
if (sel->info.stage == MESA_SHADER_VERTEX || sel->info.stage == MESA_SHADER_TESS_EVAL ||
|
||||
sel->info.stage == MESA_SHADER_GEOMETRY)
|
||||
_mesa_sha1_update(&ctx, &sel->so, sizeof(sel->so));
|
||||
_mesa_sha1_final(&ctx, ir_sha1_cache_key);
|
||||
|
||||
if (ir_binary == blob.data)
|
||||
|
|
@ -1512,7 +1509,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
|||
}
|
||||
|
||||
shader->ctx_reg.ngg.vgt_stages.u.ngg = 1;
|
||||
shader->ctx_reg.ngg.vgt_stages.u.streamout = gs_sel->so.num_outputs;
|
||||
shader->ctx_reg.ngg.vgt_stages.u.streamout = !!gs_sel->info.enabled_streamout_buffer_mask;
|
||||
shader->ctx_reg.ngg.vgt_stages.u.ngg_passthrough = gfx10_is_ngg_passthrough(shader);
|
||||
shader->ctx_reg.ngg.vgt_stages.u.gs_wave32 = shader->wave_size == 32;
|
||||
}
|
||||
|
|
@ -1702,11 +1699,11 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
|
|||
rsrc1 |= S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8);
|
||||
|
||||
if (!sscreen->use_ngg_streamout) {
|
||||
rsrc2 |= S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) |
|
||||
S_00B12C_SO_BASE1_EN(!!shader->selector->so.stride[1]) |
|
||||
S_00B12C_SO_BASE2_EN(!!shader->selector->so.stride[2]) |
|
||||
S_00B12C_SO_BASE3_EN(!!shader->selector->so.stride[3]) |
|
||||
S_00B12C_SO_EN(!!shader->selector->so.num_outputs);
|
||||
rsrc2 |= S_00B12C_SO_BASE0_EN(!!shader->selector->info.base.xfb_stride[0]) |
|
||||
S_00B12C_SO_BASE1_EN(!!shader->selector->info.base.xfb_stride[1]) |
|
||||
S_00B12C_SO_BASE2_EN(!!shader->selector->info.base.xfb_stride[2]) |
|
||||
S_00B12C_SO_BASE3_EN(!!shader->selector->info.base.xfb_stride[3]) |
|
||||
S_00B12C_SO_EN(!!info->enabled_streamout_buffer_mask);
|
||||
}
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS, rsrc1);
|
||||
|
|
@ -2783,7 +2780,7 @@ int si_shader_select(struct pipe_context *ctx, struct si_shader_ctx_state *state
|
|||
}
|
||||
}
|
||||
|
||||
static void si_parse_next_shader_property(const struct si_shader_info *info, bool streamout,
|
||||
static void si_parse_next_shader_property(const struct si_shader_info *info,
|
||||
union si_shader_key *key)
|
||||
{
|
||||
gl_shader_stage next_shader = info->base.next_stage;
|
||||
|
|
@ -2804,7 +2801,7 @@ static void si_parse_next_shader_property(const struct si_shader_info *info, boo
|
|||
* assume that it's a HW LS. (the next shader is TCS)
|
||||
* This heuristic is needed for separate shader objects.
|
||||
*/
|
||||
if (!info->writes_position && !streamout)
|
||||
if (!info->writes_position && !info->enabled_streamout_buffer_mask)
|
||||
key->ge.as_ls = 1;
|
||||
}
|
||||
break;
|
||||
|
|
@ -2874,10 +2871,11 @@ static void si_init_shader_selector_async(void *job, void *gdata, int thread_ind
|
|||
|
||||
shader->selector = sel;
|
||||
shader->is_monolithic = false;
|
||||
si_parse_next_shader_property(&sel->info, sel->so.num_outputs != 0, &shader->key);
|
||||
si_parse_next_shader_property(&sel->info, &shader->key);
|
||||
|
||||
if (sel->info.stage <= MESA_SHADER_GEOMETRY &&
|
||||
sscreen->use_ngg && (!sel->so.num_outputs || sscreen->use_ngg_streamout) &&
|
||||
sscreen->use_ngg && (!sel->info.enabled_streamout_buffer_mask ||
|
||||
sscreen->use_ngg_streamout) &&
|
||||
((sel->info.stage == MESA_SHADER_VERTEX && !shader->key.ge.as_ls) ||
|
||||
sel->info.stage == MESA_SHADER_TESS_EVAL || sel->info.stage == MESA_SHADER_GEOMETRY))
|
||||
shader->key.ge.as_ngg = 1;
|
||||
|
|
@ -3035,8 +3033,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
|
|||
sel->compiler_ctx_state.debug = sctx->debug;
|
||||
sel->compiler_ctx_state.is_debug_context = sctx->is_debug;
|
||||
|
||||
sel->so = state->stream_output;
|
||||
|
||||
if (state->type == PIPE_SHADER_IR_TGSI) {
|
||||
sel->nir = tgsi_to_nir(state->tokens, ctx->screen, true);
|
||||
} else {
|
||||
|
|
@ -3057,12 +3053,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
|
|||
si_get_active_slot_masks(&sel->info, &sel->active_const_and_shader_buffers,
|
||||
&sel->active_samplers_and_images);
|
||||
|
||||
/* Record which streamout buffers are enabled. */
|
||||
for (unsigned i = 0; i < sel->so.num_outputs; i++) {
|
||||
sel->enabled_streamout_buffer_mask |= (1 << sel->so.output[i].output_buffer)
|
||||
<< (sel->so.output[i].stream * 4);
|
||||
}
|
||||
|
||||
sel->num_vs_inputs =
|
||||
sel->info.stage == MESA_SHADER_VERTEX && !sel->info.base.vs.blit_sgprs_amd
|
||||
? sel->info.num_inputs
|
||||
|
|
@ -3197,7 +3187,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
|
|||
!sel->info.writes_viewport_index && /* cull only against viewport 0 */
|
||||
!sel->info.base.writes_memory &&
|
||||
/* NGG GS supports culling with streamout because it culls after streamout. */
|
||||
(sel->info.stage == MESA_SHADER_GEOMETRY || !sel->so.num_outputs) &&
|
||||
(sel->info.stage == MESA_SHADER_GEOMETRY || !sel->info.enabled_streamout_buffer_mask) &&
|
||||
(sel->info.stage != MESA_SHADER_GEOMETRY || sel->info.num_stream_output_components[0]) &&
|
||||
(sel->info.stage != MESA_SHADER_VERTEX ||
|
||||
(!sel->info.base.vs.blit_sgprs_amd &&
|
||||
|
|
@ -3312,8 +3302,8 @@ static void si_update_streamout_state(struct si_context *sctx)
|
|||
if (!shader_with_so)
|
||||
return;
|
||||
|
||||
sctx->streamout.enabled_stream_buffers_mask = shader_with_so->enabled_streamout_buffer_mask;
|
||||
sctx->streamout.stride_in_dw = shader_with_so->so.stride;
|
||||
sctx->streamout.enabled_stream_buffers_mask = shader_with_so->info.enabled_streamout_buffer_mask;
|
||||
sctx->streamout.stride_in_dw = shader_with_so->info.base.xfb_stride;
|
||||
}
|
||||
|
||||
static void si_update_clip_regs(struct si_context *sctx, struct si_shader_selector *old_hw_vs,
|
||||
|
|
@ -3440,7 +3430,8 @@ bool si_update_ngg(struct si_context *sctx)
|
|||
} else if (!sctx->screen->use_ngg_streamout) {
|
||||
struct si_shader_selector *last = si_get_vs(sctx)->cso;
|
||||
|
||||
if ((last && last->so.num_outputs) || sctx->streamout.prims_gen_query_enabled)
|
||||
if ((last && last->info.enabled_streamout_buffer_mask) ||
|
||||
sctx->streamout.prims_gen_query_enabled)
|
||||
new_ngg = false;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -308,7 +308,7 @@ static void si_emit_streamout_begin(struct si_context *sctx)
|
|||
{
|
||||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
||||
struct si_streamout_target **t = sctx->streamout.targets;
|
||||
uint16_t *stride_in_dw = sctx->streamout.stride_in_dw;
|
||||
uint8_t *stride_in_dw = sctx->streamout.stride_in_dw;
|
||||
unsigned i;
|
||||
|
||||
si_flush_vgt_streamout(sctx);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue