mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-30 16:30:10 +01:00
ac,aco,radeonsi: replace SampleMaskIn with 1 << SampleID if full sample shading
Since the sample mask is always 1 << sample_id with full sample shading, just use that instead of loading sample_mask_in. Set it to 0 if it's a helper invocation. This removes the sample mask input VGPR. Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33024>
This commit is contained in:
parent
b1fc34f290
commit
d7d4d56f5b
5 changed files with 67 additions and 24 deletions
|
|
@ -254,12 +254,19 @@ lower_ps_load_sample_mask_in(nir_builder *b, nir_intrinsic_instr *intrin, lower_
|
|||
* The samplemask loaded by hardware is always the coverage of the
|
||||
* entire pixel/fragment, so mask bits out based on the sample ID.
|
||||
*/
|
||||
uint32_t ps_iter_mask = ac_get_ps_iter_mask(s->options->ps_iter_samples);
|
||||
nir_def *sampleid = nir_load_sample_id(b);
|
||||
nir_def *submask = nir_ishl(b, nir_imm_int(b, ps_iter_mask), sampleid);
|
||||
nir_def *replacement;
|
||||
|
||||
nir_def *sample_mask = nir_load_sample_mask_in(b);
|
||||
nir_def *replacement = nir_iand(b, sample_mask, submask);
|
||||
/* Set ps_iter_samples=8 if full sample shading is enabled even for 2x and 4x MSAA
|
||||
* to get this fast path that fully replaces sample_mask_in with sample_id.
|
||||
*/
|
||||
if (s->options->ps_iter_samples == 8) {
|
||||
replacement = nir_bcsel(b, nir_load_helper_invocation(b, 1), nir_imm_int(b, 0),
|
||||
nir_ishl(b, nir_imm_int(b, 1), nir_load_sample_id(b)));
|
||||
} else {
|
||||
uint32_t ps_iter_mask = ac_get_ps_iter_mask(s->options->ps_iter_samples);
|
||||
nir_def *submask = nir_ishl(b, nir_imm_int(b, ps_iter_mask), nir_load_sample_id(b));
|
||||
replacement = nir_iand(b, nir_load_sample_mask_in(b), submask);
|
||||
}
|
||||
|
||||
nir_def_replace(&intrin->def, replacement);
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -11632,13 +11632,29 @@ overwrite_samplemask_arg(isel_context* ctx, const struct aco_ps_prolog_info* fin
|
|||
Temp ancillary = get_arg(ctx, ctx->args->ancillary);
|
||||
Temp sampleid = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), ancillary, Operand::c32(8u),
|
||||
Operand::c32(4u));
|
||||
Temp samplemask = get_arg(ctx, ctx->args->sample_coverage);
|
||||
Temp samplemask;
|
||||
|
||||
uint32_t ps_iter_mask = ac_get_ps_iter_mask(1 << finfo->samplemask_log_ps_iter);
|
||||
Temp iter_mask = bld.copy(bld.def(v1), Operand::c32(ps_iter_mask));
|
||||
if (finfo->samplemask_log_ps_iter == 3) {
|
||||
Temp is_helper_invoc =
|
||||
bld.pseudo(aco_opcode::p_is_helper, bld.def(bld.lm), Operand(exec, bld.lm));
|
||||
ctx->program->needs_exact = true;
|
||||
|
||||
Temp mask = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), sampleid, iter_mask);
|
||||
samplemask = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), samplemask, mask);
|
||||
/* samplemask = is_helper ? 0 : (1 << sample_id); */
|
||||
samplemask =
|
||||
bld.vop2_e64(aco_opcode::v_lshlrev_b32, bld.def(v1), sampleid, Operand::c32(1u));
|
||||
samplemask = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), samplemask,
|
||||
Operand::c32(0u), is_helper_invoc);
|
||||
} else {
|
||||
/* samplemask &= ps_iter_mask << sample_id; */
|
||||
uint32_t ps_iter_mask = ac_get_ps_iter_mask(1 << finfo->samplemask_log_ps_iter);
|
||||
Builder::Op mask = ctx->options->gfx_level >= GFX11
|
||||
? Operand::c32(ps_iter_mask)
|
||||
: bld.copy(bld.def(v1), Operand::c32(ps_iter_mask));
|
||||
|
||||
samplemask = bld.vop2_e64(aco_opcode::v_lshlrev_b32, bld.def(v1), sampleid, mask);
|
||||
samplemask = bld.vop2(aco_opcode::v_and_b32, bld.def(v1),
|
||||
get_arg(ctx, ctx->args->sample_coverage), samplemask);
|
||||
}
|
||||
|
||||
ctx->arg_temps[ctx->args->sample_coverage.arg_index] = samplemask;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2895,9 +2895,13 @@ static void si_fixup_spi_ps_input_config(struct si_shader *shader)
|
|||
if (!(shader->config.spi_ps_input_ena & 0x7f))
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTER_ENA(1);
|
||||
|
||||
/* Samplemask fixup requires the sample ID. */
|
||||
/* The sample mask fixup requires the sample ID. */
|
||||
if (key->ps.part.prolog.samplemask_log_ps_iter)
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_ANCILLARY_ENA(1);
|
||||
|
||||
/* The sample mask fixup has an optimization that replaces the sample mask with the sample ID. */
|
||||
if (key->ps.part.prolog.samplemask_log_ps_iter == 3)
|
||||
shader->config.spi_ps_input_ena &= C_0286CC_SAMPLE_COVERAGE_ENA;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -631,20 +631,29 @@ void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part
|
|||
* entire pixel/fragment, so mask bits out based on the sample ID.
|
||||
*/
|
||||
if (key->ps_prolog.states.samplemask_log_ps_iter) {
|
||||
uint32_t ps_iter_mask =
|
||||
ac_get_ps_iter_mask(1 << key->ps_prolog.states.samplemask_log_ps_iter);
|
||||
LLVMValueRef sampleid = si_unpack_param(ctx, args->ac.ancillary, 8, 4);
|
||||
LLVMValueRef samplemask = ac_get_arg(&ctx->ac, args->ac.sample_coverage);
|
||||
LLVMValueRef sample_id = si_unpack_param(ctx, args->ac.ancillary, 8, 4);
|
||||
LLVMValueRef sample_mask_in;
|
||||
|
||||
samplemask = ac_to_integer(&ctx->ac, samplemask);
|
||||
samplemask =
|
||||
LLVMBuildAnd(ctx->ac.builder, samplemask,
|
||||
LLVMBuildShl(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, ps_iter_mask, false),
|
||||
sampleid, ""),
|
||||
"");
|
||||
samplemask = ac_to_float(&ctx->ac, samplemask);
|
||||
/* Set samplemask_log_ps_iter=3 if full sample shading is enabled even for 2x and 4x MSAA
|
||||
* to get this fast path that fully replaces sample_mask_in with sample_id.
|
||||
*/
|
||||
if (key->ps_prolog.states.samplemask_log_ps_iter == 3) {
|
||||
sample_mask_in =
|
||||
LLVMBuildSelect(ctx->ac.builder, ac_build_load_helper_invocation(&ctx->ac),
|
||||
ctx->ac.i32_0,
|
||||
LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1, sample_id, ""), "");
|
||||
} else {
|
||||
uint32_t ps_iter_mask =
|
||||
ac_get_ps_iter_mask(1 << key->ps_prolog.states.samplemask_log_ps_iter);
|
||||
sample_mask_in =
|
||||
LLVMBuildAnd(ctx->ac.builder,
|
||||
ac_to_integer(&ctx->ac, ac_get_arg(&ctx->ac, args->ac.sample_coverage)),
|
||||
LLVMBuildShl(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, ps_iter_mask, false),
|
||||
sample_id, ""), "");
|
||||
}
|
||||
|
||||
ret = insert_ret_of_arg(ctx, ret, samplemask, args->ac.sample_coverage.arg_index);
|
||||
sample_mask_in = ac_to_float(&ctx->ac, sample_mask_in);
|
||||
ret = insert_ret_of_arg(ctx, ret, sample_mask_in, args->ac.sample_coverage.arg_index);
|
||||
}
|
||||
|
||||
/* Tell LLVM to insert WQM instruction sequence when needed. */
|
||||
|
|
|
|||
|
|
@ -2816,9 +2816,16 @@ void si_ps_key_update_sample_shading(struct si_context *sctx)
|
|||
|
||||
union si_shader_key *key = &sctx->shader.ps.key;
|
||||
unsigned ps_iter_samples = si_get_ps_iter_samples(sctx);
|
||||
assert(ps_iter_samples <= MAX2(1, sctx->framebuffer.nr_color_samples));
|
||||
|
||||
if (ps_iter_samples > 1 && sel->info.reads_samplemask) {
|
||||
key->ps.part.prolog.samplemask_log_ps_iter = util_logbase2(ps_iter_samples);
|
||||
/* Set samplemask_log_ps_iter=3 if full sample shading is enabled even for 2x and 4x MSAA
|
||||
* to get the fast path that fully replaces sample_mask_in with sample_id.
|
||||
*/
|
||||
if (ps_iter_samples == sctx->framebuffer.nr_color_samples)
|
||||
key->ps.part.prolog.samplemask_log_ps_iter = 3;
|
||||
else
|
||||
key->ps.part.prolog.samplemask_log_ps_iter = util_logbase2(ps_iter_samples);
|
||||
} else {
|
||||
key->ps.part.prolog.samplemask_log_ps_iter = 0;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue