pan/blend: Use the blend builder helpers instead of nir_lower_blend()

This is a little more manual (though it's actually less code) but it
gives us a lot more control and makes the whole flow nicer.

Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39367>
This commit is contained in:
Faith Ekstrand 2026-01-17 00:01:45 -05:00 committed by Marge Bot
parent 2313bec66e
commit 669ddc5241
4 changed files with 118 additions and 155 deletions

View file

@ -71,7 +71,6 @@ spec@egl_khr_surfaceless_context@viewport,Fail
spec@egl_mesa_configless_context@basic,Fail
spec@ext_base_instance@arb_base_instance-drawarrays_gles3,Fail
spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
spec@ext_framebuffer_object@fbo-blending-format-quirks,Fail
spec@ext_framebuffer_object@fbo-blending-formats,Fail
spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA4,Fail
spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA8,Fail

View file

@ -62,7 +62,6 @@ spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_rgba,F
spec@egl_khr_surfaceless_context@viewport,Fail
spec@egl_mesa_configless_context@basic,Fail
spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
spec@ext_framebuffer_object@fbo-blending-format-quirks,Fail
spec@ext_framebuffer_object@fbo-blending-formats,Fail
spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA4,Fail
spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA8,Fail
@ -318,7 +317,6 @@ afbcp-spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_
afbcp-spec@egl_khr_surfaceless_context@viewport,Fail
afbcp-spec@egl_mesa_configless_context@basic,Fail
afbcp-spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
afbcp-spec@ext_framebuffer_object@fbo-blending-format-quirks,Fail
afbcp-spec@ext_framebuffer_object@fbo-blending-formats,Fail
afbcp-spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA4,Fail
afbcp-spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA8,Fail

View file

@ -143,7 +143,6 @@ spec@ext_framebuffer_multisample@draw-buffers-alpha-to-coverage 8,Fail
spec@ext_framebuffer_multisample@sample-alpha-to-coverage 16 color,Fail
spec@ext_framebuffer_multisample@sample-alpha-to-coverage 6 color,Fail
spec@ext_framebuffer_multisample@sample-alpha-to-coverage 8 color,Fail
spec@ext_framebuffer_object@fbo-blending-format-quirks,Fail
spec@ext_framebuffer_object@fbo-blending-formats,Fail
spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA4,Fail
spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA8,Fail

View file

@ -791,70 +791,6 @@ get_equation_str(const struct pan_blend_rt_state *rt_state, char *str,
}
}
#if PAN_ARCH >= 6
static bool
lower_rt_intrin(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
const struct pan_blend_state *state = data;
switch (intr->intrinsic) {
case nir_intrinsic_load_output: {
nir_io_semantics io = nir_intrinsic_io_semantics(intr);
assert(io.location >= FRAG_RESULT_DATA0);
unsigned rt = io.location - FRAG_RESULT_DATA0;
enum pipe_format format = state->rts[rt].format;
unsigned nr_samples = state->rts[rt].nr_samples;
nir_alu_type dest_type = nir_intrinsic_dest_type(intr);
unsigned size = nir_alu_type_get_type_size(dest_type);
uint64_t blend_desc =
GENX(pan_blend_get_internal_desc)(format, rt, size, false);
b->cursor = nir_after_instr(&intr->instr);
nir_def *sample_id =
nr_samples > 1 ? nir_load_sample_id(b) : nir_imm_int(b, 0);
nir_def *lowered = nir_load_tile_pan(
b, intr->def.num_components, intr->def.bit_size,
pan_nir_tile_rt_sample(b, nir_imm_int(b, rt), sample_id),
pan_nir_tile_default_coverage(b),
nir_imm_int(b, blend_desc >> 32),
.dest_type = dest_type,
.io_semantics = io);
nir_def_replace(&intr->def, lowered);
return true;
}
case nir_intrinsic_store_output: {
nir_io_semantics io = nir_intrinsic_io_semantics(intr);
assert(io.location >= FRAG_RESULT_DATA0);
unsigned rt = io.location - FRAG_RESULT_DATA0;
enum pipe_format format = state->rts[rt].format;
nir_alu_type src_type = nir_intrinsic_src_type(intr);
unsigned size = nir_alu_type_get_type_size(src_type);
uint64_t blend_desc =
GENX(pan_blend_get_internal_desc)(format, rt, size, false);
b->cursor = nir_instr_remove(&intr->instr);
assert(nir_intrinsic_component(intr) == 0);
nir_blend_pan(b, nir_load_cumulative_coverage_pan(b),
nir_imm_int64(b, blend_desc),
nir_pad_vec4(b, intr->src[0].ssa),
.io_semantics = io,
.src_type = src_type);
return true;
}
default:
return false;
}
}
#endif
nir_shader *
GENX(pan_blend_create_shader)(const struct pan_blend_state *state,
nir_alu_type src0_type, nir_alu_type src1_type,
@ -865,111 +801,142 @@ GENX(pan_blend_create_shader)(const struct pan_blend_state *state,
get_equation_str(rt_state, equation_str, sizeof(equation_str));
nir_builder b = nir_builder_init_simple_shader(
nir_builder builder = nir_builder_init_simple_shader(
MESA_SHADER_FRAGMENT, pan_get_nir_shader_compiler_options(PAN_ARCH),
"pan_blend(rt=%d,fmt=%s,nr_samples=%d,%s=%s)", rt,
util_format_name(rt_state->format), rt_state->nr_samples,
state->logicop_enable ? "logicop" : "equation",
state->logicop_enable ? logicop_str(state->logicop_func) : equation_str);
nir_builder *b = &builder;
const enum pipe_format format = rt_state->format;
const struct util_format_description *format_desc =
util_format_description(rt_state->format);
nir_alu_type nir_type = pan_unpacked_type_for_format(format_desc);
util_format_description(format);
/* Bifrost/Valhall support 16-bit and 32-bit register formats for
* LD_TILE/ST_TILE/BLEND, but do not support 8-bit. Rather than making
* the fragment output 8-bit and inserting extra conversions in the
* compiler, promote the output to 16-bit. The larger size is still
* compatible with correct conversion semantics.
/* Choose a type which is not going to lead to precision loss while
* blending. If we're not dual-source blending, src1_type will be
* nir_type_invalid which has a size of zero.
*/
if (PAN_ARCH >= 6 && nir_alu_type_get_type_size(nir_type) == 8)
nir_type = nir_alu_type_get_base_type(nir_type) | 16;
nir_alu_type dest_type = pan_unpacked_type_for_format(format_desc);
if (PAN_ARCH >= 6 && nir_alu_type_get_type_size(dest_type) == 8)
dest_type = nir_alu_type_get_base_type(dest_type) | 16;
nir_lower_blend_options options = {
.logicop_enable = state->logicop_enable,
.logicop_func = state->logicop_func,
};
const unsigned dest_bit_size = nir_alu_type_get_type_size(dest_type);
const nir_alu_type dest_base_type = nir_alu_type_get_base_type(dest_type);
options.rt[rt].format = rt_state->format;
options.rt[rt].colormask = rt_state->equation.color_mask;
/* Midgard doesn't always provide types at all but it's always float32 */
src0_type = src0_type ?: nir_type_float32;
src1_type = src1_type ?: nir_type_float32;
if (!rt_state->equation.blend_enable) {
static const nir_lower_blend_channel replace = {
.func = PIPE_BLEND_ADD,
.src_factor = PIPE_BLENDFACTOR_ONE,
.dst_factor = PIPE_BLENDFACTOR_ZERO,
};
nir_def *src0 = nir_load_blend_input_pan(b,
4, nir_alu_type_get_type_size(src0_type),
.io_semantics.location = FRAG_RESULT_DATA0 + rt,
.io_semantics.dual_source_blend_index = 0,
.io_semantics.num_slots = 1,
.dest_type = src0_type);
options.rt[rt].rgb = replace;
options.rt[rt].alpha = replace;
} else {
options.rt[rt].rgb.func = rt_state->equation.rgb_func;
options.rt[rt].rgb.src_factor = rt_state->equation.rgb_src_factor;
options.rt[rt].rgb.dst_factor = rt_state->equation.rgb_dst_factor;
options.rt[rt].alpha.func = rt_state->equation.alpha_func;
options.rt[rt].alpha.src_factor = rt_state->equation.alpha_src_factor;
options.rt[rt].alpha.dst_factor = rt_state->equation.alpha_dst_factor;
nir_def *src1 = nir_load_blend_input_pan(b,
4, nir_alu_type_get_type_size(src1_type),
.io_semantics.location = FRAG_RESULT_DATA0 + rt,
.io_semantics.dual_source_blend_index = 1,
.io_semantics.num_slots = 1,
.dest_type = src1_type);
/* Make sure everyone is the same type. We assume the destination type
* here because TGSI sometimes gives us bogus types. When they're not
* bogus, shader types are required to match the format anyway.
*
* On Midgard, the blend shader is responsible for format conversion.
* As the OpenGL spec requires integer conversions to saturate, we must
* saturate ourselves here. On Bifrost and later, the conversion
* hardware handles this automatically.
*/
bool should_saturate = PAN_ARCH <= 5 && dest_base_type != nir_type_float;
src0 = nir_convert_with_rounding(b, src0, dest_base_type, dest_type,
nir_rounding_mode_undef, should_saturate);
src1 = nir_convert_with_rounding(b, src1, dest_base_type, dest_type,
nir_rounding_mode_undef, should_saturate);
if (state->alpha_to_one && dest_base_type == nir_type_float) {
nir_def *one = nir_imm_floatN_t(b, 1.0, dest_bit_size);
src0 = nir_vector_insert_imm(b, src0, one, 3);
src1 = nir_vector_insert_imm(b, src1, one, 3);
}
nir_def *zero = nir_imm_int(&b, 0);
for (unsigned i = 0; i < 2; ++i) {
nir_alu_type src_type =
(i == 1 ? src1_type : src0_type) ?: nir_type_float32;
/* HACK: workaround buggy TGSI shaders (u_blitter) */
src_type = nir_alu_type_get_base_type(nir_type) |
nir_alu_type_get_type_size(src_type);
nir_def *src = nir_load_blend_input_pan(
&b, 4, nir_alu_type_get_type_size(src_type),
.io_semantics.location = FRAG_RESULT_DATA0 + rt,
.io_semantics.dual_source_blend_index = i,
.io_semantics.num_slots = 1, .dest_type = src_type);
if (state->alpha_to_one && src_type == nir_type_float32) {
/* force alpha to 1 */
src = nir_vector_insert_imm(&b, src,
nir_imm_floatN_t(&b, 1.0, src->bit_size),
3);
}
/* On Midgard, the blend shader is responsible for format conversion.
* As the OpenGL spec requires integer conversions to saturate, we must
* saturate ourselves here. On Bifrost and later, the conversion
* hardware handles this automatically.
*/
nir_alu_type T = nir_alu_type_get_base_type(nir_type);
bool should_saturate = (PAN_ARCH <= 5) && (T != nir_type_float);
src = nir_convert_with_rounding(&b, src, T, nir_type,
nir_rounding_mode_undef, should_saturate);
nir_store_output(&b, src, zero, .write_mask = BITFIELD_MASK(4),
.src_type = nir_type,
.io_semantics.location = FRAG_RESULT_DATA0 + rt,
.io_semantics.num_slots = 1,
.io_semantics.dual_source_blend_index = i);
}
b.shader->info.io_lowered = true;
NIR_PASS(_, b.shader, nir_lower_blend, &options);
#if PAN_ARCH >= 6
/* On bifrost+ we use the NIR blend/load intrinsics directly */
NIR_PASS(_, b.shader, nir_shader_intrinsics_pass,
lower_rt_intrin, nir_metadata_control_flow, (void *)state);
/* And we put a blend_return_pan at the end.
*
* We have to do this here because nir_lower_blend assumes it can stick
* stuff at the end of the shader, after the blend_return_pan.
*/
b = nir_builder_at(nir_after_impl(nir_shader_get_entrypoint(b.shader)));
nir_blend_return_pan(&b);
const uint64_t opaque_blend_desc =
GENX(pan_blend_get_internal_desc)(format, rt, dest_bit_size, false);
#else
const uint64_t opaque_blend_desc = 0;
#endif
return b.shader;
nir_def *dest;
if (PAN_ARCH >= 6) {
nir_def *sample_id =
rt_state->nr_samples > 1 ? nir_load_sample_id(b) : nir_imm_int(b, 0);
dest = nir_load_tile_pan(b,
4, dest_bit_size,
pan_nir_tile_rt_sample(b, nir_imm_int(b, rt), sample_id),
pan_nir_tile_default_coverage(b),
nir_imm_int(b, opaque_blend_desc >> 32),
.dest_type = dest_type,
.io_semantics.location = FRAG_RESULT_DATA0 + rt,
.io_semantics.num_slots = 1);
} else {
dest = nir_load_output(b,
4, dest_bit_size,
nir_imm_int(b, 0),
.dest_type = dest_type,
.io_semantics.location = FRAG_RESULT_DATA0 + rt,
.io_semantics.num_slots = 1);
}
nir_def *color = src0;
if (state->logicop_enable) {
color = nir_color_logicop(b, src0, dest, state->logicop_func, format);
} else if (rt_state->equation.blend_enable) {
const nir_lower_blend_rt nir_rt = {
.format = format,
.rgb.func = rt_state->equation.rgb_func,
.rgb.src_factor = rt_state->equation.rgb_src_factor,
.rgb.dst_factor = rt_state->equation.rgb_dst_factor,
.alpha.func = rt_state->equation.alpha_func,
.alpha.src_factor = rt_state->equation.alpha_src_factor,
.alpha.dst_factor = rt_state->equation.alpha_dst_factor,
.colormask = rt_state->equation.color_mask,
};
color = nir_color_blend(b, src0, src1, dest, &nir_rt, false);
}
color = nir_color_mask(b, color, dest, rt_state->equation.color_mask);
/* Throw away any channels we don't need */
color = nir_color_mask(b, color, nir_undef(b, 4, dest_bit_size),
util_format_colormask(format_desc));
/* Only write the destination if it changed */
if (color != dest) {
if (PAN_ARCH >= 6) {
nir_blend_pan(b, nir_load_cumulative_coverage_pan(b),
nir_imm_int64(b, opaque_blend_desc),
color,
.src_type = dest_type,
.io_semantics.location = FRAG_RESULT_DATA0 + rt,
.io_semantics.num_slots = 1);
} else {
nir_store_output(b, color, nir_imm_int(b, 0),
.src_type = dest_type,
.io_semantics.location = FRAG_RESULT_DATA0 + rt,
.io_semantics.num_slots = 1);
}
}
if (PAN_ARCH >= 6)
nir_blend_return_pan(b);
b->shader->info.io_lowered = true;
return builder.shader;
}
#if PAN_ARCH >= 6