mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 11:30:11 +01:00
nir/lower_blend: Consume dual stores
Now that we're working on lowered I/O, passing in the dual source blend colour via a sideband doesn't make any sense. The primary source blend colours are implicitly passed in as the sources of store_output intrinsics; likewise, we should get dual source blend colours from their respective stores. And since dual colours are only needed by blending, we can delete the stores as we go. That means nir_lower_blend now provides an all-in-one software lowering of dual source blending with no driver support needed! It even works for 8 dual-src render targets, but I don't have a use case for that. The only tricky bit here is making sure we are robust against different orders of store_output within the exit block. In particular, if we naively lower x = ... primary color = x y = ... dual color = y we end up emitting uses of y before it has been defined, something like x = ... primary color = blend(x, y) y = ... Instead, we remove dual stores and sink blend stores to the bottom of the block, so we end up with the correct x = ... y = ... primary color = blend(x, y) lower_io_to_temporaries ensures that the stores will be in the same (exit) block, so we don't need to sink further than that ourselves. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21426>
This commit is contained in:
parent
44bdcb7214
commit
8313016543
3 changed files with 90 additions and 39 deletions
|
|
@ -36,6 +36,11 @@
|
||||||
#include "compiler/nir/nir_format_convert.h"
|
#include "compiler/nir/nir_format_convert.h"
|
||||||
#include "nir_lower_blend.h"
|
#include "nir_lower_blend.h"
|
||||||
|
|
||||||
|
struct ctx {
|
||||||
|
const nir_lower_blend_options *options;
|
||||||
|
nir_ssa_def *src1[8];
|
||||||
|
};
|
||||||
|
|
||||||
/* Given processed factors, combine them per a blend function */
|
/* Given processed factors, combine them per a blend function */
|
||||||
|
|
||||||
static nir_ssa_def *
|
static nir_ssa_def *
|
||||||
|
|
@ -364,8 +369,10 @@ nir_blend(
|
||||||
bconst = nir_load_blend_const_color_rgba(b);
|
bconst = nir_load_blend_const_color_rgba(b);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (src->bit_size == 16)
|
if (src->bit_size == 16) {
|
||||||
bconst = nir_f2f16(b, bconst);
|
bconst = nir_f2f16(b, bconst);
|
||||||
|
src1 = nir_f2f16(b, src1);
|
||||||
|
}
|
||||||
|
|
||||||
/* Fixed-point framebuffers require their inputs clamped. */
|
/* Fixed-point framebuffers require their inputs clamped. */
|
||||||
enum pipe_format format = options->format[rt];
|
enum pipe_format format = options->format[rt];
|
||||||
|
|
@ -464,7 +471,8 @@ nir_blend_replace_rt(const nir_lower_blend_rt *rt)
|
||||||
static bool
|
static bool
|
||||||
nir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data)
|
nir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data)
|
||||||
{
|
{
|
||||||
const nir_lower_blend_options *options = data;
|
struct ctx *ctx = data;
|
||||||
|
const nir_lower_blend_options *options = ctx->options;
|
||||||
if (instr->type != nir_instr_type_intrinsic)
|
if (instr->type != nir_instr_type_intrinsic)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
|
@ -479,7 +487,16 @@ nir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data)
|
||||||
if (rt < 0 || options->format[rt] == PIPE_FORMAT_NONE)
|
if (rt < 0 || options->format[rt] == PIPE_FORMAT_NONE)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
b->cursor = nir_before_instr(instr);
|
/* Only process stores once. Pass flags are cleared by consume_dual_stores */
|
||||||
|
if (instr->pass_flags)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
instr->pass_flags = 1;
|
||||||
|
|
||||||
|
/* Store are sunk to the bottom of the block to ensure that the dual
|
||||||
|
* source colour is already written.
|
||||||
|
*/
|
||||||
|
b->cursor = nir_after_block(instr->block);
|
||||||
|
|
||||||
/* Grab the input color. We always want 4 channels during blend. Dead
|
/* Grab the input color. We always want 4 channels during blend. Dead
|
||||||
* code will clean up any channels we don't need.
|
* code will clean up any channels we don't need.
|
||||||
|
|
@ -514,7 +531,7 @@ nir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data)
|
||||||
} else if (!util_format_is_pure_integer(options->format[rt]) &&
|
} else if (!util_format_is_pure_integer(options->format[rt]) &&
|
||||||
!nir_blend_replace_rt(&options->rt[rt])) {
|
!nir_blend_replace_rt(&options->rt[rt])) {
|
||||||
assert(!util_format_is_scaled(options->format[rt]));
|
assert(!util_format_is_scaled(options->format[rt]));
|
||||||
blended = nir_blend(b, options, rt, src, options->src1, dst);
|
blended = nir_blend(b, options, rt, src, ctx->src1[rt], dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Apply a colormask if necessary */
|
/* Apply a colormask if necessary */
|
||||||
|
|
@ -534,6 +551,44 @@ nir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data)
|
||||||
|
|
||||||
/* Write out the final color instead of the input */
|
/* Write out the final color instead of the input */
|
||||||
nir_instr_rewrite_src_ssa(instr, &store->src[0], blended);
|
nir_instr_rewrite_src_ssa(instr, &store->src[0], blended);
|
||||||
|
|
||||||
|
/* Sink to bottom */
|
||||||
|
nir_instr_remove(instr);
|
||||||
|
nir_builder_instr_insert(b, instr);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Dual-source colours are only for blending, so when nir_lower_blend is used,
|
||||||
|
* the dual source store_output is for us (only). Remove dual stores so the
|
||||||
|
* backend doesn't have to deal with them, collecting the sources for blending.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
consume_dual_stores(nir_builder *b, nir_instr *instr, void *data)
|
||||||
|
{
|
||||||
|
nir_ssa_def **outputs = data;
|
||||||
|
if (instr->type != nir_instr_type_intrinsic)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
nir_intrinsic_instr *store = nir_instr_as_intrinsic(instr);
|
||||||
|
if (store->intrinsic != nir_intrinsic_store_output)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* While we're here, clear the pass flags for store_outputs, since we'll set
|
||||||
|
* them later.
|
||||||
|
*/
|
||||||
|
instr->pass_flags = 0;
|
||||||
|
|
||||||
|
nir_io_semantics sem = nir_intrinsic_io_semantics(store);
|
||||||
|
if (sem.dual_source_blend_index == 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
int rt = color_index_for_location(sem.location);
|
||||||
|
assert(rt >= 0 && rt < 8 && "bounds for dual-source blending");
|
||||||
|
assert(store->src[0].is_ssa && "must be SSA");
|
||||||
|
|
||||||
|
outputs[rt] = store->src[0].ssa;
|
||||||
|
nir_instr_remove(instr);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -547,8 +602,14 @@ nir_lower_blend(nir_shader *shader, const nir_lower_blend_options *options)
|
||||||
{
|
{
|
||||||
assert(shader->info.stage == MESA_SHADER_FRAGMENT);
|
assert(shader->info.stage == MESA_SHADER_FRAGMENT);
|
||||||
|
|
||||||
|
struct ctx ctx = {.options = options};
|
||||||
|
nir_shader_instructions_pass(shader, consume_dual_stores,
|
||||||
|
nir_metadata_block_index |
|
||||||
|
nir_metadata_dominance,
|
||||||
|
ctx.src1);
|
||||||
|
|
||||||
nir_shader_instructions_pass(shader, nir_lower_blend_instr,
|
nir_shader_instructions_pass(shader, nir_lower_blend_instr,
|
||||||
nir_metadata_block_index |
|
nir_metadata_block_index |
|
||||||
nir_metadata_dominance,
|
nir_metadata_dominance,
|
||||||
(void *)options);
|
&ctx);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -57,8 +57,6 @@ typedef struct {
|
||||||
bool logicop_enable;
|
bool logicop_enable;
|
||||||
unsigned logicop_func;
|
unsigned logicop_func;
|
||||||
|
|
||||||
nir_ssa_def *src1;
|
|
||||||
|
|
||||||
/* If set, will use load_blend_const_color_{r,g,b,a}_float instead of
|
/* If set, will use load_blend_const_color_{r,g,b,a}_float instead of
|
||||||
* load_blend_const_color_rgba */
|
* load_blend_const_color_rgba */
|
||||||
bool scalar_blend_const;
|
bool scalar_blend_const;
|
||||||
|
|
|
||||||
|
|
@ -663,49 +663,41 @@ GENX(pan_blend_create_shader)(const struct panfrost_device *dev,
|
||||||
rt_state->equation.alpha_invert_dst_factor;
|
rt_state->equation.alpha_invert_dst_factor;
|
||||||
}
|
}
|
||||||
|
|
||||||
nir_alu_type src_types[] = {src0_type ?: nir_type_float32,
|
|
||||||
src1_type ?: nir_type_float32};
|
|
||||||
|
|
||||||
/* HACK: workaround buggy TGSI shaders (u_blitter) */
|
|
||||||
for (unsigned i = 0; i < ARRAY_SIZE(src_types); ++i) {
|
|
||||||
src_types[i] = nir_alu_type_get_base_type(nir_type) |
|
|
||||||
nir_alu_type_get_type_size(src_types[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
nir_ssa_def *pixel = nir_load_barycentric_pixel(&b, 32, .interp_mode = 1);
|
nir_ssa_def *pixel = nir_load_barycentric_pixel(&b, 32, .interp_mode = 1);
|
||||||
nir_ssa_def *zero = nir_imm_int(&b, 0);
|
nir_ssa_def *zero = nir_imm_int(&b, 0);
|
||||||
|
|
||||||
nir_ssa_def *s_src[2];
|
|
||||||
for (unsigned i = 0; i < 2; ++i) {
|
for (unsigned i = 0; i < 2; ++i) {
|
||||||
s_src[i] = nir_load_interpolated_input(
|
nir_alu_type src_type =
|
||||||
&b, 4, nir_alu_type_get_type_size(src_types[i]), pixel, zero,
|
(i == 1 ? src1_type : src0_type) ?: nir_type_float32;
|
||||||
|
|
||||||
|
/* HACK: workaround buggy TGSI shaders (u_blitter) */
|
||||||
|
src_type = nir_alu_type_get_base_type(nir_type) |
|
||||||
|
nir_alu_type_get_type_size(src_type);
|
||||||
|
|
||||||
|
nir_ssa_def *src = nir_load_interpolated_input(
|
||||||
|
&b, 4, nir_alu_type_get_type_size(src_type), pixel, zero,
|
||||||
.io_semantics.location = i ? VARYING_SLOT_VAR0 : VARYING_SLOT_COL0,
|
.io_semantics.location = i ? VARYING_SLOT_VAR0 : VARYING_SLOT_COL0,
|
||||||
.io_semantics.num_slots = 1, .base = i, .dest_type = src_types[i]);
|
.io_semantics.num_slots = 1, .base = i, .dest_type = src_type);
|
||||||
}
|
|
||||||
|
|
||||||
/* On Midgard, the blend shader is responsible for format conversion.
|
/* On Midgard, the blend shader is responsible for format conversion.
|
||||||
* As the OpenGL spec requires integer conversions to saturate, we must
|
* As the OpenGL spec requires integer conversions to saturate, we must
|
||||||
* saturate ourselves here. On Bifrost and later, the conversion
|
* saturate ourselves here. On Bifrost and later, the conversion
|
||||||
* hardware handles this automatically.
|
* hardware handles this automatically.
|
||||||
*/
|
*/
|
||||||
for (int i = 0; i < ARRAY_SIZE(s_src); ++i) {
|
|
||||||
nir_alu_type T = nir_alu_type_get_base_type(nir_type);
|
nir_alu_type T = nir_alu_type_get_base_type(nir_type);
|
||||||
bool should_saturate = (PAN_ARCH <= 5) && (T != nir_type_float);
|
bool should_saturate = (PAN_ARCH <= 5) && (T != nir_type_float);
|
||||||
s_src[i] =
|
src = nir_convert_with_rounding(&b, src, T, nir_type,
|
||||||
nir_convert_with_rounding(&b, s_src[i], src_types[i], nir_type,
|
|
||||||
nir_rounding_mode_undef, should_saturate);
|
nir_rounding_mode_undef, should_saturate);
|
||||||
}
|
|
||||||
|
|
||||||
/* Build a trivial blend shader */
|
nir_store_output(&b, src, zero, .write_mask = BITFIELD_MASK(4),
|
||||||
nir_store_output(&b, s_src[0], zero, .write_mask = BITFIELD_MASK(4),
|
|
||||||
.src_type = nir_type,
|
.src_type = nir_type,
|
||||||
.io_semantics.location = FRAG_RESULT_DATA0 + rt,
|
.io_semantics.location = FRAG_RESULT_DATA0 + rt,
|
||||||
.io_semantics.num_slots = 1);
|
.io_semantics.num_slots = 1,
|
||||||
|
.io_semantics.dual_source_blend_index = i);
|
||||||
|
}
|
||||||
|
|
||||||
b.shader->info.io_lowered = true;
|
b.shader->info.io_lowered = true;
|
||||||
|
|
||||||
options.src1 = s_src[1];
|
|
||||||
|
|
||||||
NIR_PASS_V(b.shader, nir_lower_blend, &options);
|
NIR_PASS_V(b.shader, nir_lower_blend, &options);
|
||||||
nir_shader_instructions_pass(
|
nir_shader_instructions_pass(
|
||||||
b.shader, pan_inline_blend_constants,
|
b.shader, pan_inline_blend_constants,
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue