mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 22:49:13 +02:00
radeonsi/gfx9: always wrap GS and TCS in an if-block (v2)
With merged ESGS shaders, the GS part of a wave may be empty, and the hardware gets confused if any GS messages are sent from that wave. Since S_SENDMSG is executed even when EXEC = 0, we have to wrap even non-monolithic GS shaders in an if-block, so that the entire shader and hence the S_SENDMSG instructions are skipped in empty waves. This change is not required for TCS/HS, but applying it there as well simplifies the logic a bit. Fixes GL45-CTS.geometry_shader.rendering.rendering.* v2: ensure that the TCS epilog doesn't run for non-existing patches Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Marek Olšák <marek.olsak@amd.com>
This commit is contained in:
parent
873789002f
commit
081ac6e5c6
2 changed files with 79 additions and 33 deletions
|
|
@ -174,6 +174,20 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper function that builds an LLVM IR PHI node and immediately adds
|
||||||
|
* incoming edges.
|
||||||
|
*/
|
||||||
|
static LLVMValueRef
|
||||||
|
build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
|
||||||
|
unsigned count_incoming, LLVMValueRef *values,
|
||||||
|
LLVMBasicBlockRef *blocks)
|
||||||
|
{
|
||||||
|
LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, "");
|
||||||
|
LLVMAddIncoming(phi, values, blocks, count_incoming);
|
||||||
|
return phi;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the value of a shader input parameter and extract a bitfield.
|
* Get the value of a shader input parameter and extract a bitfield.
|
||||||
*/
|
*/
|
||||||
|
|
@ -2698,6 +2712,7 @@ si_insert_input_ptr_as_2xi32(struct si_shader_context *ctx, LLVMValueRef ret,
|
||||||
static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
|
static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
|
||||||
{
|
{
|
||||||
struct si_shader_context *ctx = si_shader_context(bld_base);
|
struct si_shader_context *ctx = si_shader_context(bld_base);
|
||||||
|
LLVMBuilderRef builder = ctx->gallivm.builder;
|
||||||
LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
|
LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
|
||||||
|
|
||||||
si_copy_tcs_inputs(bld_base);
|
si_copy_tcs_inputs(bld_base);
|
||||||
|
|
@ -2706,8 +2721,29 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
|
||||||
invocation_id = unpack_param(ctx, ctx->param_tcs_rel_ids, 8, 5);
|
invocation_id = unpack_param(ctx, ctx->param_tcs_rel_ids, 8, 5);
|
||||||
tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
|
tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
|
||||||
|
|
||||||
|
if (ctx->screen->b.chip_class >= GFX9) {
|
||||||
|
LLVMBasicBlockRef blocks[2] = {
|
||||||
|
LLVMGetInsertBlock(builder),
|
||||||
|
ctx->merged_wrap_if_state.entry_block
|
||||||
|
};
|
||||||
|
LLVMValueRef values[2];
|
||||||
|
|
||||||
|
lp_build_endif(&ctx->merged_wrap_if_state);
|
||||||
|
|
||||||
|
values[0] = rel_patch_id;
|
||||||
|
values[1] = LLVMGetUndef(ctx->i32);
|
||||||
|
rel_patch_id = build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
|
||||||
|
|
||||||
|
values[0] = tf_lds_offset;
|
||||||
|
values[1] = LLVMGetUndef(ctx->i32);
|
||||||
|
tf_lds_offset = build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
|
||||||
|
|
||||||
|
values[0] = invocation_id;
|
||||||
|
values[1] = ctx->i32_1; /* cause the epilog to skip threads */
|
||||||
|
invocation_id = build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
|
||||||
|
}
|
||||||
|
|
||||||
/* Return epilog parameters from this function. */
|
/* Return epilog parameters from this function. */
|
||||||
LLVMBuilderRef builder = ctx->gallivm.builder;
|
|
||||||
LLVMValueRef ret = ctx->return_value;
|
LLVMValueRef ret = ctx->return_value;
|
||||||
unsigned vgpr;
|
unsigned vgpr;
|
||||||
|
|
||||||
|
|
@ -2935,6 +2971,9 @@ static void si_llvm_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)
|
||||||
|
|
||||||
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE,
|
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE,
|
||||||
si_get_gs_wave_id(ctx));
|
si_get_gs_wave_id(ctx));
|
||||||
|
|
||||||
|
if (ctx->screen->b.chip_class >= GFX9)
|
||||||
|
lp_build_endif(&ctx->merged_wrap_if_state);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base)
|
static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base)
|
||||||
|
|
@ -5502,14 +5541,20 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
|
||||||
preload_ring_buffers(ctx);
|
preload_ring_buffers(ctx);
|
||||||
|
|
||||||
/* For GFX9 merged shaders:
|
/* For GFX9 merged shaders:
|
||||||
* - Set EXEC. If the prolog is present, set EXEC there instead.
|
* - Set EXEC for the first shader. If the prolog is present, set
|
||||||
|
* EXEC there instead.
|
||||||
* - Add a barrier before the second shader.
|
* - Add a barrier before the second shader.
|
||||||
|
* - In the second shader, reset EXEC to ~0 and wrap the main part in
|
||||||
|
* an if-statement. This is required for correctness in geometry
|
||||||
|
* shaders, to ensure that empty GS waves do not send GS_EMIT and
|
||||||
|
* GS_CUT messages.
|
||||||
*
|
*
|
||||||
* The same thing for monolithic shaders is done in
|
* For monolithic merged shaders, the first shader is wrapped in an
|
||||||
* si_build_wrapper_function.
|
* if-block together with its prolog in si_build_wrapper_function.
|
||||||
*/
|
*/
|
||||||
if (ctx->screen->b.chip_class >= GFX9 && !is_monolithic) {
|
if (ctx->screen->b.chip_class >= GFX9) {
|
||||||
if (sel->info.num_instructions > 1 && /* not empty shader */
|
if (!is_monolithic &&
|
||||||
|
sel->info.num_instructions > 1 && /* not empty shader */
|
||||||
(shader->key.as_es || shader->key.as_ls) &&
|
(shader->key.as_es || shader->key.as_ls) &&
|
||||||
(ctx->type == PIPE_SHADER_TESS_EVAL ||
|
(ctx->type == PIPE_SHADER_TESS_EVAL ||
|
||||||
(ctx->type == PIPE_SHADER_VERTEX &&
|
(ctx->type == PIPE_SHADER_VERTEX &&
|
||||||
|
|
@ -5518,9 +5563,19 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
|
||||||
ctx->param_merged_wave_info, 0);
|
ctx->param_merged_wave_info, 0);
|
||||||
} else if (ctx->type == PIPE_SHADER_TESS_CTRL ||
|
} else if (ctx->type == PIPE_SHADER_TESS_CTRL ||
|
||||||
ctx->type == PIPE_SHADER_GEOMETRY) {
|
ctx->type == PIPE_SHADER_GEOMETRY) {
|
||||||
si_init_exec_from_input(ctx,
|
if (!is_monolithic)
|
||||||
ctx->param_merged_wave_info, 8);
|
si_init_exec_full_mask(ctx);
|
||||||
|
|
||||||
|
/* The barrier must execute for all shaders in a
|
||||||
|
* threadgroup.
|
||||||
|
*/
|
||||||
si_llvm_emit_barrier(NULL, bld_base, NULL);
|
si_llvm_emit_barrier(NULL, bld_base, NULL);
|
||||||
|
|
||||||
|
LLVMValueRef num_threads = unpack_param(ctx, ctx->param_merged_wave_info, 8, 8);
|
||||||
|
LLVMValueRef ena =
|
||||||
|
LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
|
||||||
|
ac_get_thread_id(&ctx->ac), num_threads, "");
|
||||||
|
lp_build_if(&ctx->merged_wrap_if_state, &ctx->gallivm, ena);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -5991,15 +6046,9 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
|
||||||
|
|
||||||
/* Merged shaders are executed conditionally depending
|
/* Merged shaders are executed conditionally depending
|
||||||
* on the number of enabled threads passed in the input SGPRs. */
|
* on the number of enabled threads passed in the input SGPRs. */
|
||||||
if (is_merged_shader(ctx->shader) &&
|
if (is_merged_shader(ctx->shader) && part == 0) {
|
||||||
(part == 0 || part == next_shader_first_part)) {
|
|
||||||
LLVMValueRef ena, count = initial[3];
|
LLVMValueRef ena, count = initial[3];
|
||||||
|
|
||||||
/* The thread count for the 2nd shader is at bit-offset 8. */
|
|
||||||
if (part == next_shader_first_part) {
|
|
||||||
count = LLVMBuildLShr(builder, count,
|
|
||||||
LLVMConstInt(ctx->i32, 8, 0), "");
|
|
||||||
}
|
|
||||||
count = LLVMBuildAnd(builder, count,
|
count = LLVMBuildAnd(builder, count,
|
||||||
LLVMConstInt(ctx->i32, 0x7f, 0), "");
|
LLVMConstInt(ctx->i32, 0x7f, 0), "");
|
||||||
ena = LLVMBuildICmp(builder, LLVMIntULT,
|
ena = LLVMBuildICmp(builder, LLVMIntULT,
|
||||||
|
|
@ -6056,26 +6105,20 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
|
||||||
ret = LLVMBuildCall(builder, parts[part], in, num_params, "");
|
ret = LLVMBuildCall(builder, parts[part], in, num_params, "");
|
||||||
|
|
||||||
if (is_merged_shader(ctx->shader) &&
|
if (is_merged_shader(ctx->shader) &&
|
||||||
(part + 1 == next_shader_first_part ||
|
part + 1 == next_shader_first_part) {
|
||||||
part + 1 == num_parts)) {
|
|
||||||
lp_build_endif(&if_state);
|
lp_build_endif(&if_state);
|
||||||
|
|
||||||
if (part + 1 == next_shader_first_part) {
|
/* The second half of the merged shader should use
|
||||||
/* A barrier is required between 2 merged shaders. */
|
* the inputs from the toplevel (wrapper) function,
|
||||||
si_llvm_emit_barrier(NULL, &ctx->bld_base, NULL);
|
* not the return value from the last call.
|
||||||
|
*
|
||||||
/* The second half of the merged shader should use
|
* That's because the last call was executed condi-
|
||||||
* the inputs from the toplevel (wrapper) function,
|
* tionally, so we can't consume it in the main
|
||||||
* not the return value from the last call.
|
* block.
|
||||||
*
|
*/
|
||||||
* That's because the last call was executed condi-
|
memcpy(out, initial, sizeof(initial));
|
||||||
* tionally, so we can't consume it in the main
|
num_out = initial_num_out;
|
||||||
* block.
|
num_out_sgpr = initial_num_out_sgpr;
|
||||||
*/
|
|
||||||
memcpy(out, initial, sizeof(initial));
|
|
||||||
num_out = initial_num_out;
|
|
||||||
num_out_sgpr = initial_num_out_sgpr;
|
|
||||||
}
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
#define SI_SHADER_PRIVATE_H
|
#define SI_SHADER_PRIVATE_H
|
||||||
|
|
||||||
#include "si_shader.h"
|
#include "si_shader.h"
|
||||||
|
#include "gallivm/lp_bld_flow.h"
|
||||||
#include "gallivm/lp_bld_init.h"
|
#include "gallivm/lp_bld_init.h"
|
||||||
#include "gallivm/lp_bld_tgsi.h"
|
#include "gallivm/lp_bld_tgsi.h"
|
||||||
#include "tgsi/tgsi_parse.h"
|
#include "tgsi/tgsi_parse.h"
|
||||||
|
|
@ -105,6 +106,8 @@ struct si_shader_context {
|
||||||
unsigned flow_depth;
|
unsigned flow_depth;
|
||||||
unsigned flow_depth_max;
|
unsigned flow_depth_max;
|
||||||
|
|
||||||
|
struct lp_build_if_state merged_wrap_if_state;
|
||||||
|
|
||||||
struct tgsi_array_info *temp_arrays;
|
struct tgsi_array_info *temp_arrays;
|
||||||
LLVMValueRef *temp_array_allocas;
|
LLVMValueRef *temp_array_allocas;
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue