diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 4db988bc56b..864732385e4 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -239,6 +239,7 @@ static void *si_create_compute_state(struct pipe_context *ctx, const struct pipe si_sampler_and_image_descriptors_idx(PIPE_SHADER_COMPUTE); sel->info.base.shared_size = cso->req_local_mem; program->shader.selector = &program->sel; + program->shader.wave_size = si_get_shader_wave_size(&program->shader); program->ir_type = cso->ir_type; program->private_size = cso->req_private_mem; program->input_size = cso->req_input_mem; diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index e695b2a8d2b..793d74c7b2a 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1997,6 +1997,9 @@ static inline unsigned si_get_wave_size(struct si_screen *sscreen, static inline unsigned si_get_shader_wave_size(struct si_shader *shader) { + if (shader->is_gs_copy_shader) + return shader->selector->screen->ge_wave_size; + if (shader->selector->info.stage <= MESA_SHADER_GEOMETRY) { return si_get_wave_size(shader->selector->screen, shader->selector->info.stage, shader->key.ge.as_ngg, diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 46583f06c05..80b3d6144a7 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2100,6 +2100,9 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler default:; } + assert(shader->wave_size == mainp->wave_size); + assert(!shader->previous_stage || shader->wave_size == shader->previous_stage->wave_size); + /* Update SGPR and VGPR counts. */ if (shader->prolog) { shader->config.num_sgprs = diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 31796e8152a..335c4dda83c 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -813,6 +813,7 @@ struct si_shader { bool is_optimized; bool is_binary_shared; bool is_gs_copy_shader; + uint8_t wave_size; /* The following data is all that's needed for binary shaders. */ struct si_shader_binary binary; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c index 3ca42259116..27041f9125d 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c @@ -425,6 +425,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen, shader->selector = gs_selector; shader->is_gs_copy_shader = true; + shader->wave_size = si_get_shader_wave_size(shader); si_llvm_context_init(&ctx, sscreen, compiler, si_get_wave_size(sscreen, MESA_SHADER_VERTEX, diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 81be1e07bd6..3a6ae910327 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -1748,6 +1748,8 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader) static void si_shader_init_pm4_state(struct si_screen *sscreen, struct si_shader *shader) { + assert(shader->wave_size); + switch (shader->selector->info.stage) { case MESA_SHADER_VERTEX: if (shader->key.ge.as_ls) @@ -2268,6 +2270,7 @@ static bool si_check_missing_main_part(struct si_screen *sscreen, struct si_shad main_part->key.ge.as_ngg = key->ge.as_ngg; } main_part->is_monolithic = false; + main_part->wave_size = si_get_shader_wave_size(main_part); if (!si_compile_shader(sscreen, compiler_state->compiler, main_part, &compiler_state->debug)) { @@ -2444,6 +2447,7 @@ current_not_ready: shader->selector = sel; *((SHADER_KEY_TYPE*)&shader->key) = *key; + shader->wave_size = si_get_shader_wave_size(shader); shader->compiler_ctx_state.compiler = &sctx->compiler; shader->compiler_ctx_state.debug = sctx->debug; shader->compiler_ctx_state.is_debug_context = sctx->is_debug; @@ -2710,6 +2714,8 @@ static void si_init_shader_selector_async(void *job, void *gdata, int thread_ind sel->info.stage == MESA_SHADER_TESS_EVAL || sel->info.stage == MESA_SHADER_GEOMETRY)) shader->key.ge.as_ngg = 1; + shader->wave_size = si_get_shader_wave_size(shader); + if (sel->nir) { if (sel->info.stage <= MESA_SHADER_GEOMETRY) si_get_ir_cache_key(sel, shader->key.ge.as_ngg, shader->key.ge.as_es, ir_sha1_cache_key);