mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 13:48:06 +02:00
radeonsi/nir: always lower ballot masks as 64-bit, codegen handles it
This fixes KHR-GL45.shader_ballot_tests.ShaderBallotBitmasks. This solution is better, because the IR isn't dependent on wave32.
This commit is contained in:
parent
5d37194d43
commit
223b3174bd
11 changed files with 38 additions and 24 deletions
|
|
@ -60,7 +60,8 @@ void
|
|||
ac_llvm_context_init(struct ac_llvm_context *ctx,
|
||||
struct ac_llvm_compiler *compiler,
|
||||
enum chip_class chip_class, enum radeon_family family,
|
||||
enum ac_float_mode float_mode, unsigned wave_size)
|
||||
enum ac_float_mode float_mode, unsigned wave_size,
|
||||
unsigned ballot_mask_bits)
|
||||
{
|
||||
LLVMValueRef args[1];
|
||||
|
||||
|
|
@ -69,6 +70,7 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
|
|||
ctx->chip_class = chip_class;
|
||||
ctx->family = family;
|
||||
ctx->wave_size = wave_size;
|
||||
ctx->ballot_mask_bits = ballot_mask_bits;
|
||||
ctx->module = ac_create_module(wave_size == 32 ? compiler->tm_wave32
|
||||
: compiler->tm,
|
||||
ctx->context);
|
||||
|
|
@ -93,6 +95,7 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
|
|||
ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
|
||||
ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
|
||||
ctx->iN_wavemask = LLVMIntTypeInContext(ctx->context, ctx->wave_size);
|
||||
ctx->iN_ballotmask = LLVMIntTypeInContext(ctx->context, ballot_mask_bits);
|
||||
|
||||
ctx->i8_0 = LLVMConstInt(ctx->i8, 0, false);
|
||||
ctx->i8_1 = LLVMConstInt(ctx->i8, 1, false);
|
||||
|
|
|
|||
|
|
@ -81,6 +81,7 @@ struct ac_llvm_context {
|
|||
LLVMTypeRef v4f32;
|
||||
LLVMTypeRef v8i32;
|
||||
LLVMTypeRef iN_wavemask;
|
||||
LLVMTypeRef iN_ballotmask;
|
||||
|
||||
LLVMValueRef i8_0;
|
||||
LLVMValueRef i8_1;
|
||||
|
|
@ -114,7 +115,9 @@ struct ac_llvm_context {
|
|||
|
||||
enum chip_class chip_class;
|
||||
enum radeon_family family;
|
||||
|
||||
unsigned wave_size;
|
||||
unsigned ballot_mask_bits;
|
||||
|
||||
LLVMValueRef lds;
|
||||
};
|
||||
|
|
@ -123,7 +126,8 @@ void
|
|||
ac_llvm_context_init(struct ac_llvm_context *ctx,
|
||||
struct ac_llvm_compiler *compiler,
|
||||
enum chip_class chip_class, enum radeon_family family,
|
||||
enum ac_float_mode float_mode, unsigned wave_size);
|
||||
enum ac_float_mode float_mode, unsigned wave_size,
|
||||
unsigned ballot_mask_bits);
|
||||
|
||||
void
|
||||
ac_llvm_context_dispose(struct ac_llvm_context *ctx);
|
||||
|
|
|
|||
|
|
@ -3205,6 +3205,8 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
|
|||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_ballot:
|
||||
result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
|
||||
if (ctx->ac.ballot_mask_bits > ctx->ac.wave_size)
|
||||
result = LLVMBuildZExt(ctx->ac.builder, result, ctx->ac.iN_ballotmask, "");
|
||||
break;
|
||||
case nir_intrinsic_read_invocation:
|
||||
result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]),
|
||||
|
|
|
|||
|
|
@ -4320,7 +4320,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
|
|||
AC_FLOAT_MODE_DEFAULT;
|
||||
|
||||
ac_llvm_context_init(&ctx.ac, ac_llvm, options->chip_class,
|
||||
options->family, float_mode, options->wave_size);
|
||||
options->family, float_mode, options->wave_size,
|
||||
options->wave_size);
|
||||
ctx.context = ctx.ac.context;
|
||||
|
||||
radv_nir_shader_info_init(&shader_info->info);
|
||||
|
|
@ -4834,7 +4835,7 @@ radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm,
|
|||
AC_FLOAT_MODE_DEFAULT;
|
||||
|
||||
ac_llvm_context_init(&ctx.ac, ac_llvm, options->chip_class,
|
||||
options->family, float_mode, 64);
|
||||
options->family, float_mode, 64, 64);
|
||||
ctx.context = ctx.ac.context;
|
||||
|
||||
ctx.is_gs_copy_shader = true;
|
||||
|
|
|
|||
|
|
@ -128,7 +128,7 @@ static void si_create_compute_state_async(void *job, int thread_index)
|
|||
|
||||
si_nir_opts(sel->nir);
|
||||
si_nir_scan_shader(sel->nir, &sel->info);
|
||||
si_lower_nir(sel, sscreen->compute_wave_size);
|
||||
si_lower_nir(sel);
|
||||
}
|
||||
|
||||
/* Store the declared LDS size into tgsi_shader_info for the shader
|
||||
|
|
|
|||
|
|
@ -50,7 +50,8 @@ static const char scratch_rsrc_dword1_symbol[] =
|
|||
static void si_init_shader_ctx(struct si_shader_context *ctx,
|
||||
struct si_screen *sscreen,
|
||||
struct ac_llvm_compiler *compiler,
|
||||
unsigned wave_size);
|
||||
unsigned wave_size,
|
||||
bool nir);
|
||||
|
||||
static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
|
||||
struct lp_build_tgsi_context *bld_base,
|
||||
|
|
@ -5725,7 +5726,8 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
|
|||
shader->is_gs_copy_shader = true;
|
||||
|
||||
si_init_shader_ctx(&ctx, sscreen, compiler,
|
||||
si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, false, false));
|
||||
si_get_wave_size(sscreen, PIPE_SHADER_VERTEX, false, false),
|
||||
false);
|
||||
ctx.shader = shader;
|
||||
ctx.type = PIPE_SHADER_VERTEX;
|
||||
|
||||
|
|
@ -5989,11 +5991,13 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f)
|
|||
static void si_init_shader_ctx(struct si_shader_context *ctx,
|
||||
struct si_screen *sscreen,
|
||||
struct ac_llvm_compiler *compiler,
|
||||
unsigned wave_size)
|
||||
unsigned wave_size,
|
||||
bool nir)
|
||||
{
|
||||
struct lp_build_tgsi_context *bld_base;
|
||||
|
||||
si_llvm_context_init(ctx, sscreen, compiler, wave_size);
|
||||
si_llvm_context_init(ctx, sscreen, compiler, wave_size,
|
||||
nir ? 64 : wave_size);
|
||||
|
||||
bld_base = &ctx->bld_base;
|
||||
bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
|
||||
|
|
@ -6939,7 +6943,8 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
|
|||
si_dump_streamout(&sel->so);
|
||||
}
|
||||
|
||||
si_init_shader_ctx(&ctx, sscreen, compiler, si_get_shader_wave_size(shader));
|
||||
si_init_shader_ctx(&ctx, sscreen, compiler, si_get_shader_wave_size(shader),
|
||||
sel->nir != NULL);
|
||||
si_llvm_context_set_ir(&ctx, shader);
|
||||
|
||||
memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
|
||||
|
|
@ -7319,7 +7324,8 @@ si_get_shader_part(struct si_screen *sscreen,
|
|||
struct si_shader_context ctx;
|
||||
si_init_shader_ctx(&ctx, sscreen, compiler,
|
||||
si_get_wave_size(sscreen, type, shader.key.as_ngg,
|
||||
shader.key.as_es));
|
||||
shader.key.as_es),
|
||||
false);
|
||||
ctx.shader = &shader;
|
||||
ctx.type = type;
|
||||
|
||||
|
|
|
|||
|
|
@ -757,7 +757,7 @@ void si_nir_scan_shader(const struct nir_shader *nir,
|
|||
void si_nir_scan_tess_ctrl(const struct nir_shader *nir,
|
||||
struct tgsi_tessctrl_info *out);
|
||||
void si_nir_lower_ps_inputs(struct nir_shader *nir);
|
||||
void si_lower_nir(struct si_shader_selector *sel, unsigned wave_size);
|
||||
void si_lower_nir(struct si_shader_selector *sel);
|
||||
void si_nir_opts(struct nir_shader *nir);
|
||||
|
||||
/* si_state_shaders.c */
|
||||
|
|
|
|||
|
|
@ -279,7 +279,8 @@ LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
|
|||
void si_llvm_context_init(struct si_shader_context *ctx,
|
||||
struct si_screen *sscreen,
|
||||
struct ac_llvm_compiler *compiler,
|
||||
unsigned wave_size);
|
||||
unsigned wave_size,
|
||||
unsigned ballot_mask_bits);
|
||||
void si_llvm_context_set_ir(struct si_shader_context *ctx,
|
||||
struct si_shader *shader);
|
||||
|
||||
|
|
|
|||
|
|
@ -986,8 +986,7 @@ void si_nir_lower_ps_inputs(struct nir_shader *nir)
|
|||
* Perform "lowering" operations on the NIR that are run once when the shader
|
||||
* selector is created.
|
||||
*/
|
||||
void
|
||||
si_lower_nir(struct si_shader_selector* sel, unsigned wave_size)
|
||||
void si_lower_nir(struct si_shader_selector *sel)
|
||||
{
|
||||
/* Adjust the driver location of inputs and outputs. The state tracker
|
||||
* interprets them as slots, while the ac/nir backend interprets them
|
||||
|
|
@ -1023,8 +1022,8 @@ si_lower_nir(struct si_shader_selector* sel, unsigned wave_size)
|
|||
NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options);
|
||||
|
||||
const nir_lower_subgroups_options subgroups_options = {
|
||||
.subgroup_size = wave_size,
|
||||
.ballot_bit_size = wave_size,
|
||||
.subgroup_size = 64,
|
||||
.ballot_bit_size = 64,
|
||||
.lower_to_scalar = true,
|
||||
.lower_subgroup_masks = true,
|
||||
.lower_vote_trivial = false,
|
||||
|
|
|
|||
|
|
@ -954,7 +954,8 @@ static void emit_immediate(struct lp_build_tgsi_context *bld_base,
|
|||
void si_llvm_context_init(struct si_shader_context *ctx,
|
||||
struct si_screen *sscreen,
|
||||
struct ac_llvm_compiler *compiler,
|
||||
unsigned wave_size)
|
||||
unsigned wave_size,
|
||||
unsigned ballot_mask_bits)
|
||||
{
|
||||
struct lp_type type;
|
||||
|
||||
|
|
@ -970,7 +971,7 @@ void si_llvm_context_init(struct si_shader_context *ctx,
|
|||
ac_llvm_context_init(&ctx->ac, compiler, sscreen->info.chip_class,
|
||||
sscreen->info.family,
|
||||
AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH,
|
||||
wave_size);
|
||||
wave_size, ballot_mask_bits);
|
||||
|
||||
ctx->gallivm.context = ctx->ac.context;
|
||||
ctx->gallivm.module = ctx->ac.module;
|
||||
|
|
|
|||
|
|
@ -2427,11 +2427,8 @@ static void si_init_shader_selector_async(void *job, int thread_index)
|
|||
assert(thread_index < ARRAY_SIZE(sscreen->compiler));
|
||||
compiler = &sscreen->compiler[thread_index];
|
||||
|
||||
if (sel->nir) {
|
||||
/* TODO: GS always sets wave size = default. Legacy GS will have
|
||||
* incorrect subgroup_size and ballot_bit_size. */
|
||||
si_lower_nir(sel, si_get_wave_size(sscreen, sel->type, true, false));
|
||||
}
|
||||
if (sel->nir)
|
||||
si_lower_nir(sel);
|
||||
|
||||
/* Compile the main shader part for use with a prolog and/or epilog.
|
||||
* If this fails, the driver will try to compile a monolithic shader
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue