mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 11:38:05 +02:00
pan/blend: Use the blend builder helpers instead of nir_lower_blend()
This is a little more manual (though it's actually less code) but it gives us a lot more control and makes the whole flow nicer. Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39367>
This commit is contained in:
parent
2313bec66e
commit
669ddc5241
4 changed files with 118 additions and 155 deletions
|
|
@ -71,7 +71,6 @@ spec@egl_khr_surfaceless_context@viewport,Fail
|
||||||
spec@egl_mesa_configless_context@basic,Fail
|
spec@egl_mesa_configless_context@basic,Fail
|
||||||
spec@ext_base_instance@arb_base_instance-drawarrays_gles3,Fail
|
spec@ext_base_instance@arb_base_instance-drawarrays_gles3,Fail
|
||||||
spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
|
spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
|
||||||
spec@ext_framebuffer_object@fbo-blending-format-quirks,Fail
|
|
||||||
spec@ext_framebuffer_object@fbo-blending-formats,Fail
|
spec@ext_framebuffer_object@fbo-blending-formats,Fail
|
||||||
spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA4,Fail
|
spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA4,Fail
|
||||||
spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA8,Fail
|
spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA8,Fail
|
||||||
|
|
|
||||||
|
|
@ -62,7 +62,6 @@ spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_rgba,F
|
||||||
spec@egl_khr_surfaceless_context@viewport,Fail
|
spec@egl_khr_surfaceless_context@viewport,Fail
|
||||||
spec@egl_mesa_configless_context@basic,Fail
|
spec@egl_mesa_configless_context@basic,Fail
|
||||||
spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
|
spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
|
||||||
spec@ext_framebuffer_object@fbo-blending-format-quirks,Fail
|
|
||||||
spec@ext_framebuffer_object@fbo-blending-formats,Fail
|
spec@ext_framebuffer_object@fbo-blending-formats,Fail
|
||||||
spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA4,Fail
|
spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA4,Fail
|
||||||
spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA8,Fail
|
spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA8,Fail
|
||||||
|
|
@ -318,7 +317,6 @@ afbcp-spec@egl_khr_gl_image@egl_khr_gl_renderbuffer_image-clear-shared-image gl_
|
||||||
afbcp-spec@egl_khr_surfaceless_context@viewport,Fail
|
afbcp-spec@egl_khr_surfaceless_context@viewport,Fail
|
||||||
afbcp-spec@egl_mesa_configless_context@basic,Fail
|
afbcp-spec@egl_mesa_configless_context@basic,Fail
|
||||||
afbcp-spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
|
afbcp-spec@ext_framebuffer_blit@fbo-blit-check-limits,Fail
|
||||||
afbcp-spec@ext_framebuffer_object@fbo-blending-format-quirks,Fail
|
|
||||||
afbcp-spec@ext_framebuffer_object@fbo-blending-formats,Fail
|
afbcp-spec@ext_framebuffer_object@fbo-blending-formats,Fail
|
||||||
afbcp-spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA4,Fail
|
afbcp-spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA4,Fail
|
||||||
afbcp-spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA8,Fail
|
afbcp-spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA8,Fail
|
||||||
|
|
|
||||||
|
|
@ -143,7 +143,6 @@ spec@ext_framebuffer_multisample@draw-buffers-alpha-to-coverage 8,Fail
|
||||||
spec@ext_framebuffer_multisample@sample-alpha-to-coverage 16 color,Fail
|
spec@ext_framebuffer_multisample@sample-alpha-to-coverage 16 color,Fail
|
||||||
spec@ext_framebuffer_multisample@sample-alpha-to-coverage 6 color,Fail
|
spec@ext_framebuffer_multisample@sample-alpha-to-coverage 6 color,Fail
|
||||||
spec@ext_framebuffer_multisample@sample-alpha-to-coverage 8 color,Fail
|
spec@ext_framebuffer_multisample@sample-alpha-to-coverage 8 color,Fail
|
||||||
spec@ext_framebuffer_object@fbo-blending-format-quirks,Fail
|
|
||||||
spec@ext_framebuffer_object@fbo-blending-formats,Fail
|
spec@ext_framebuffer_object@fbo-blending-formats,Fail
|
||||||
spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA4,Fail
|
spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA4,Fail
|
||||||
spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA8,Fail
|
spec@ext_framebuffer_object@fbo-blending-formats@GL_ALPHA8,Fail
|
||||||
|
|
|
||||||
|
|
@ -791,70 +791,6 @@ get_equation_str(const struct pan_blend_rt_state *rt_state, char *str,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if PAN_ARCH >= 6
|
|
||||||
static bool
|
|
||||||
lower_rt_intrin(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|
||||||
{
|
|
||||||
const struct pan_blend_state *state = data;
|
|
||||||
|
|
||||||
switch (intr->intrinsic) {
|
|
||||||
case nir_intrinsic_load_output: {
|
|
||||||
nir_io_semantics io = nir_intrinsic_io_semantics(intr);
|
|
||||||
assert(io.location >= FRAG_RESULT_DATA0);
|
|
||||||
unsigned rt = io.location - FRAG_RESULT_DATA0;
|
|
||||||
enum pipe_format format = state->rts[rt].format;
|
|
||||||
unsigned nr_samples = state->rts[rt].nr_samples;
|
|
||||||
|
|
||||||
nir_alu_type dest_type = nir_intrinsic_dest_type(intr);
|
|
||||||
unsigned size = nir_alu_type_get_type_size(dest_type);
|
|
||||||
uint64_t blend_desc =
|
|
||||||
GENX(pan_blend_get_internal_desc)(format, rt, size, false);
|
|
||||||
|
|
||||||
b->cursor = nir_after_instr(&intr->instr);
|
|
||||||
|
|
||||||
nir_def *sample_id =
|
|
||||||
nr_samples > 1 ? nir_load_sample_id(b) : nir_imm_int(b, 0);
|
|
||||||
|
|
||||||
nir_def *lowered = nir_load_tile_pan(
|
|
||||||
b, intr->def.num_components, intr->def.bit_size,
|
|
||||||
pan_nir_tile_rt_sample(b, nir_imm_int(b, rt), sample_id),
|
|
||||||
pan_nir_tile_default_coverage(b),
|
|
||||||
nir_imm_int(b, blend_desc >> 32),
|
|
||||||
.dest_type = dest_type,
|
|
||||||
.io_semantics = io);
|
|
||||||
|
|
||||||
nir_def_replace(&intr->def, lowered);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
case nir_intrinsic_store_output: {
|
|
||||||
nir_io_semantics io = nir_intrinsic_io_semantics(intr);
|
|
||||||
assert(io.location >= FRAG_RESULT_DATA0);
|
|
||||||
unsigned rt = io.location - FRAG_RESULT_DATA0;
|
|
||||||
enum pipe_format format = state->rts[rt].format;
|
|
||||||
|
|
||||||
nir_alu_type src_type = nir_intrinsic_src_type(intr);
|
|
||||||
unsigned size = nir_alu_type_get_type_size(src_type);
|
|
||||||
uint64_t blend_desc =
|
|
||||||
GENX(pan_blend_get_internal_desc)(format, rt, size, false);
|
|
||||||
|
|
||||||
b->cursor = nir_instr_remove(&intr->instr);
|
|
||||||
|
|
||||||
assert(nir_intrinsic_component(intr) == 0);
|
|
||||||
nir_blend_pan(b, nir_load_cumulative_coverage_pan(b),
|
|
||||||
nir_imm_int64(b, blend_desc),
|
|
||||||
nir_pad_vec4(b, intr->src[0].ssa),
|
|
||||||
.io_semantics = io,
|
|
||||||
.src_type = src_type);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
nir_shader *
|
nir_shader *
|
||||||
GENX(pan_blend_create_shader)(const struct pan_blend_state *state,
|
GENX(pan_blend_create_shader)(const struct pan_blend_state *state,
|
||||||
nir_alu_type src0_type, nir_alu_type src1_type,
|
nir_alu_type src0_type, nir_alu_type src1_type,
|
||||||
|
|
@ -865,111 +801,142 @@ GENX(pan_blend_create_shader)(const struct pan_blend_state *state,
|
||||||
|
|
||||||
get_equation_str(rt_state, equation_str, sizeof(equation_str));
|
get_equation_str(rt_state, equation_str, sizeof(equation_str));
|
||||||
|
|
||||||
nir_builder b = nir_builder_init_simple_shader(
|
nir_builder builder = nir_builder_init_simple_shader(
|
||||||
MESA_SHADER_FRAGMENT, pan_get_nir_shader_compiler_options(PAN_ARCH),
|
MESA_SHADER_FRAGMENT, pan_get_nir_shader_compiler_options(PAN_ARCH),
|
||||||
"pan_blend(rt=%d,fmt=%s,nr_samples=%d,%s=%s)", rt,
|
"pan_blend(rt=%d,fmt=%s,nr_samples=%d,%s=%s)", rt,
|
||||||
util_format_name(rt_state->format), rt_state->nr_samples,
|
util_format_name(rt_state->format), rt_state->nr_samples,
|
||||||
state->logicop_enable ? "logicop" : "equation",
|
state->logicop_enable ? "logicop" : "equation",
|
||||||
state->logicop_enable ? logicop_str(state->logicop_func) : equation_str);
|
state->logicop_enable ? logicop_str(state->logicop_func) : equation_str);
|
||||||
|
nir_builder *b = &builder;
|
||||||
|
|
||||||
|
const enum pipe_format format = rt_state->format;
|
||||||
const struct util_format_description *format_desc =
|
const struct util_format_description *format_desc =
|
||||||
util_format_description(rt_state->format);
|
util_format_description(format);
|
||||||
nir_alu_type nir_type = pan_unpacked_type_for_format(format_desc);
|
|
||||||
|
|
||||||
/* Bifrost/Valhall support 16-bit and 32-bit register formats for
|
/* Choose a type which is not going to lead to precision loss while
|
||||||
* LD_TILE/ST_TILE/BLEND, but do not support 8-bit. Rather than making
|
* blending. If we're not dual-source blending, src1_type will be
|
||||||
* the fragment output 8-bit and inserting extra conversions in the
|
* nir_type_invalid which has a size of zero.
|
||||||
* compiler, promote the output to 16-bit. The larger size is still
|
|
||||||
* compatible with correct conversion semantics.
|
|
||||||
*/
|
*/
|
||||||
if (PAN_ARCH >= 6 && nir_alu_type_get_type_size(nir_type) == 8)
|
nir_alu_type dest_type = pan_unpacked_type_for_format(format_desc);
|
||||||
nir_type = nir_alu_type_get_base_type(nir_type) | 16;
|
if (PAN_ARCH >= 6 && nir_alu_type_get_type_size(dest_type) == 8)
|
||||||
|
dest_type = nir_alu_type_get_base_type(dest_type) | 16;
|
||||||
|
|
||||||
nir_lower_blend_options options = {
|
const unsigned dest_bit_size = nir_alu_type_get_type_size(dest_type);
|
||||||
.logicop_enable = state->logicop_enable,
|
const nir_alu_type dest_base_type = nir_alu_type_get_base_type(dest_type);
|
||||||
.logicop_func = state->logicop_func,
|
|
||||||
};
|
|
||||||
|
|
||||||
options.rt[rt].format = rt_state->format;
|
/* Midgard doesn't always provide types at all but it's always float32 */
|
||||||
options.rt[rt].colormask = rt_state->equation.color_mask;
|
src0_type = src0_type ?: nir_type_float32;
|
||||||
|
src1_type = src1_type ?: nir_type_float32;
|
||||||
|
|
||||||
if (!rt_state->equation.blend_enable) {
|
nir_def *src0 = nir_load_blend_input_pan(b,
|
||||||
static const nir_lower_blend_channel replace = {
|
4, nir_alu_type_get_type_size(src0_type),
|
||||||
.func = PIPE_BLEND_ADD,
|
.io_semantics.location = FRAG_RESULT_DATA0 + rt,
|
||||||
.src_factor = PIPE_BLENDFACTOR_ONE,
|
.io_semantics.dual_source_blend_index = 0,
|
||||||
.dst_factor = PIPE_BLENDFACTOR_ZERO,
|
.io_semantics.num_slots = 1,
|
||||||
};
|
.dest_type = src0_type);
|
||||||
|
|
||||||
options.rt[rt].rgb = replace;
|
nir_def *src1 = nir_load_blend_input_pan(b,
|
||||||
options.rt[rt].alpha = replace;
|
4, nir_alu_type_get_type_size(src1_type),
|
||||||
} else {
|
.io_semantics.location = FRAG_RESULT_DATA0 + rt,
|
||||||
options.rt[rt].rgb.func = rt_state->equation.rgb_func;
|
.io_semantics.dual_source_blend_index = 1,
|
||||||
options.rt[rt].rgb.src_factor = rt_state->equation.rgb_src_factor;
|
.io_semantics.num_slots = 1,
|
||||||
options.rt[rt].rgb.dst_factor = rt_state->equation.rgb_dst_factor;
|
.dest_type = src1_type);
|
||||||
options.rt[rt].alpha.func = rt_state->equation.alpha_func;
|
|
||||||
options.rt[rt].alpha.src_factor = rt_state->equation.alpha_src_factor;
|
/* Make sure everyone is the same type. We assume the destination type
|
||||||
options.rt[rt].alpha.dst_factor = rt_state->equation.alpha_dst_factor;
|
* here because TGSI sometimes gives us bogus types. When they're not
|
||||||
|
* bogus, shader types are required to match the format anyway.
|
||||||
|
*
|
||||||
|
* On Midgard, the blend shader is responsible for format conversion.
|
||||||
|
* As the OpenGL spec requires integer conversions to saturate, we must
|
||||||
|
* saturate ourselves here. On Bifrost and later, the conversion
|
||||||
|
* hardware handles this automatically.
|
||||||
|
*/
|
||||||
|
bool should_saturate = PAN_ARCH <= 5 && dest_base_type != nir_type_float;
|
||||||
|
src0 = nir_convert_with_rounding(b, src0, dest_base_type, dest_type,
|
||||||
|
nir_rounding_mode_undef, should_saturate);
|
||||||
|
src1 = nir_convert_with_rounding(b, src1, dest_base_type, dest_type,
|
||||||
|
nir_rounding_mode_undef, should_saturate);
|
||||||
|
|
||||||
|
if (state->alpha_to_one && dest_base_type == nir_type_float) {
|
||||||
|
nir_def *one = nir_imm_floatN_t(b, 1.0, dest_bit_size);
|
||||||
|
src0 = nir_vector_insert_imm(b, src0, one, 3);
|
||||||
|
src1 = nir_vector_insert_imm(b, src1, one, 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
nir_def *zero = nir_imm_int(&b, 0);
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < 2; ++i) {
|
|
||||||
nir_alu_type src_type =
|
|
||||||
(i == 1 ? src1_type : src0_type) ?: nir_type_float32;
|
|
||||||
|
|
||||||
/* HACK: workaround buggy TGSI shaders (u_blitter) */
|
|
||||||
src_type = nir_alu_type_get_base_type(nir_type) |
|
|
||||||
nir_alu_type_get_type_size(src_type);
|
|
||||||
|
|
||||||
nir_def *src = nir_load_blend_input_pan(
|
|
||||||
&b, 4, nir_alu_type_get_type_size(src_type),
|
|
||||||
.io_semantics.location = FRAG_RESULT_DATA0 + rt,
|
|
||||||
.io_semantics.dual_source_blend_index = i,
|
|
||||||
.io_semantics.num_slots = 1, .dest_type = src_type);
|
|
||||||
|
|
||||||
if (state->alpha_to_one && src_type == nir_type_float32) {
|
|
||||||
/* force alpha to 1 */
|
|
||||||
src = nir_vector_insert_imm(&b, src,
|
|
||||||
nir_imm_floatN_t(&b, 1.0, src->bit_size),
|
|
||||||
3);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* On Midgard, the blend shader is responsible for format conversion.
|
|
||||||
* As the OpenGL spec requires integer conversions to saturate, we must
|
|
||||||
* saturate ourselves here. On Bifrost and later, the conversion
|
|
||||||
* hardware handles this automatically.
|
|
||||||
*/
|
|
||||||
nir_alu_type T = nir_alu_type_get_base_type(nir_type);
|
|
||||||
bool should_saturate = (PAN_ARCH <= 5) && (T != nir_type_float);
|
|
||||||
src = nir_convert_with_rounding(&b, src, T, nir_type,
|
|
||||||
nir_rounding_mode_undef, should_saturate);
|
|
||||||
|
|
||||||
nir_store_output(&b, src, zero, .write_mask = BITFIELD_MASK(4),
|
|
||||||
.src_type = nir_type,
|
|
||||||
.io_semantics.location = FRAG_RESULT_DATA0 + rt,
|
|
||||||
.io_semantics.num_slots = 1,
|
|
||||||
.io_semantics.dual_source_blend_index = i);
|
|
||||||
}
|
|
||||||
|
|
||||||
b.shader->info.io_lowered = true;
|
|
||||||
|
|
||||||
NIR_PASS(_, b.shader, nir_lower_blend, &options);
|
|
||||||
|
|
||||||
#if PAN_ARCH >= 6
|
#if PAN_ARCH >= 6
|
||||||
/* On bifrost+ we use the NIR blend/load intrinsics directly */
|
const uint64_t opaque_blend_desc =
|
||||||
NIR_PASS(_, b.shader, nir_shader_intrinsics_pass,
|
GENX(pan_blend_get_internal_desc)(format, rt, dest_bit_size, false);
|
||||||
lower_rt_intrin, nir_metadata_control_flow, (void *)state);
|
#else
|
||||||
|
const uint64_t opaque_blend_desc = 0;
|
||||||
/* And we put a blend_return_pan at the end.
|
|
||||||
*
|
|
||||||
* We have to do this here because nir_lower_blend assumes it can stick
|
|
||||||
* stuff at the end of the shader, after the blend_return_pan.
|
|
||||||
*/
|
|
||||||
b = nir_builder_at(nir_after_impl(nir_shader_get_entrypoint(b.shader)));
|
|
||||||
nir_blend_return_pan(&b);
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return b.shader;
|
nir_def *dest;
|
||||||
|
if (PAN_ARCH >= 6) {
|
||||||
|
nir_def *sample_id =
|
||||||
|
rt_state->nr_samples > 1 ? nir_load_sample_id(b) : nir_imm_int(b, 0);
|
||||||
|
dest = nir_load_tile_pan(b,
|
||||||
|
4, dest_bit_size,
|
||||||
|
pan_nir_tile_rt_sample(b, nir_imm_int(b, rt), sample_id),
|
||||||
|
pan_nir_tile_default_coverage(b),
|
||||||
|
nir_imm_int(b, opaque_blend_desc >> 32),
|
||||||
|
.dest_type = dest_type,
|
||||||
|
.io_semantics.location = FRAG_RESULT_DATA0 + rt,
|
||||||
|
.io_semantics.num_slots = 1);
|
||||||
|
} else {
|
||||||
|
dest = nir_load_output(b,
|
||||||
|
4, dest_bit_size,
|
||||||
|
nir_imm_int(b, 0),
|
||||||
|
.dest_type = dest_type,
|
||||||
|
.io_semantics.location = FRAG_RESULT_DATA0 + rt,
|
||||||
|
.io_semantics.num_slots = 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
nir_def *color = src0;
|
||||||
|
if (state->logicop_enable) {
|
||||||
|
color = nir_color_logicop(b, src0, dest, state->logicop_func, format);
|
||||||
|
} else if (rt_state->equation.blend_enable) {
|
||||||
|
const nir_lower_blend_rt nir_rt = {
|
||||||
|
.format = format,
|
||||||
|
.rgb.func = rt_state->equation.rgb_func,
|
||||||
|
.rgb.src_factor = rt_state->equation.rgb_src_factor,
|
||||||
|
.rgb.dst_factor = rt_state->equation.rgb_dst_factor,
|
||||||
|
.alpha.func = rt_state->equation.alpha_func,
|
||||||
|
.alpha.src_factor = rt_state->equation.alpha_src_factor,
|
||||||
|
.alpha.dst_factor = rt_state->equation.alpha_dst_factor,
|
||||||
|
.colormask = rt_state->equation.color_mask,
|
||||||
|
};
|
||||||
|
color = nir_color_blend(b, src0, src1, dest, &nir_rt, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
color = nir_color_mask(b, color, dest, rt_state->equation.color_mask);
|
||||||
|
|
||||||
|
/* Throw away any channels we don't need */
|
||||||
|
color = nir_color_mask(b, color, nir_undef(b, 4, dest_bit_size),
|
||||||
|
util_format_colormask(format_desc));
|
||||||
|
|
||||||
|
/* Only write the destination if it changed */
|
||||||
|
if (color != dest) {
|
||||||
|
if (PAN_ARCH >= 6) {
|
||||||
|
nir_blend_pan(b, nir_load_cumulative_coverage_pan(b),
|
||||||
|
nir_imm_int64(b, opaque_blend_desc),
|
||||||
|
color,
|
||||||
|
.src_type = dest_type,
|
||||||
|
.io_semantics.location = FRAG_RESULT_DATA0 + rt,
|
||||||
|
.io_semantics.num_slots = 1);
|
||||||
|
} else {
|
||||||
|
nir_store_output(b, color, nir_imm_int(b, 0),
|
||||||
|
.src_type = dest_type,
|
||||||
|
.io_semantics.location = FRAG_RESULT_DATA0 + rt,
|
||||||
|
.io_semantics.num_slots = 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (PAN_ARCH >= 6)
|
||||||
|
nir_blend_return_pan(b);
|
||||||
|
|
||||||
|
b->shader->info.io_lowered = true;
|
||||||
|
|
||||||
|
return builder.shader;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if PAN_ARCH >= 6
|
#if PAN_ARCH >= 6
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue