v3d/compiler: Implement software blend lowering

Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33942>
This commit is contained in:
Ella Stanforth 2025-04-14 22:54:52 +01:00 committed by Marge Bot
parent b38c4e8982
commit 42154029fc
7 changed files with 146 additions and 5 deletions

View file

@ -107,6 +107,7 @@ static const struct debug_named_value debug_control[] = {
"Disable TFU (v3dv only)" }, "Disable TFU (v3dv only)" },
{ "sync", V3D_DEBUG_SYNC, { "sync", V3D_DEBUG_SYNC,
"Sync wait for each job to complete after submission." }, "Sync wait for each job to complete after submission." },
{ "soft_blend", V3D_DEBUG_SOFT_BLEND, "Force fallback to software blending" },
DEBUG_NAMED_VALUE_END DEBUG_NAMED_VALUE_END
}; };

View file

@ -70,6 +70,7 @@ extern uint32_t v3d_mesa_debug;
#define V3D_DEBUG_OPT_COMPILE_TIME (1 << 25) #define V3D_DEBUG_OPT_COMPILE_TIME (1 << 25)
#define V3D_DEBUG_DISABLE_TFU (1 << 26) #define V3D_DEBUG_DISABLE_TFU (1 << 26)
#define V3D_DEBUG_SYNC (1 << 27) #define V3D_DEBUG_SYNC (1 << 27)
#define V3D_DEBUG_SOFT_BLEND (1 << 28)
#define V3D_DEBUG_SHADERS (V3D_DEBUG_TGSI | V3D_DEBUG_NIR | \ #define V3D_DEBUG_SHADERS (V3D_DEBUG_TGSI | V3D_DEBUG_NIR | \
V3D_DEBUG_VIR | V3D_DEBUG_QPU | \ V3D_DEBUG_VIR | V3D_DEBUG_QPU | \

View file

@ -17,6 +17,7 @@ libbroadcom_compiler_files = files(
'qpu_validate.c', 'qpu_validate.c',
'v3d_tex.c', 'v3d_tex.c',
'v3d_compiler.h', 'v3d_compiler.h',
'v3d_nir_lower_blend.c',
'v3d_nir_lower_io.c', 'v3d_nir_lower_io.c',
'v3d_nir_lower_image_load_store.c', 'v3d_nir_lower_image_load_store.c',
'v3d_nir_lower_line_smooth.c', 'v3d_nir_lower_line_smooth.c',

View file

@ -1960,7 +1960,8 @@ emit_frag_end(struct v3d_compile *c)
has_any_tlb_color_write = true; has_any_tlb_color_write = true;
} }
if (c->fs_key->sample_alpha_to_coverage && c->output_color_var[0]) { if (!c->fs_key->software_blend &&
c->fs_key->sample_alpha_to_coverage && c->output_color_var[0]) {
struct nir_variable *var = c->output_color_var[0]; struct nir_variable *var = c->output_color_var[0];
struct qreg *color = &c->outputs[var->data.driver_location * 4]; struct qreg *color = &c->outputs[var->data.driver_location * 4];
@ -2485,6 +2486,15 @@ ntq_setup_outputs(struct v3d_compile *c)
case FRAG_RESULT_DATA5: case FRAG_RESULT_DATA5:
case FRAG_RESULT_DATA6: case FRAG_RESULT_DATA6:
case FRAG_RESULT_DATA7: case FRAG_RESULT_DATA7:
/* Dual source outputs have an index that is != 0.
* If they have not been removed by now they end up
* clobbering `output_color_var` with the wrong
* variable.
*/
if (var->data.index != 0 && var->data.index != NIR_VARIABLE_NO_INDEX) {
break;
}
c->output_color_var[var->data.location - c->output_color_var[var->data.location -
FRAG_RESULT_DATA0] = var; FRAG_RESULT_DATA0] = var;
break; break;
@ -3597,6 +3607,15 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
vir_uniform(c, QUNIFORM_AA_LINE_WIDTH, 0)); vir_uniform(c, QUNIFORM_AA_LINE_WIDTH, 0));
break; break;
case nir_intrinsic_demote_samples: {
struct qreg mask =
vir_NOT(c, ntq_get_src(c, instr->src[0], 0));
vir_SETMSF_dest(c, vir_nop_reg(),
vir_AND(c, mask, vir_MSF(c)));
break;
}
case nir_intrinsic_load_sample_mask_in: case nir_intrinsic_load_sample_mask_in:
ntq_store_def(c, &instr->def, 0, vir_MSF(c)); ntq_store_def(c, &instr->def, 0, vir_MSF(c));
break; break;
@ -4122,6 +4141,34 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
vir_uniform(c, QUNIFORM_VIEW_INDEX, 0)); vir_uniform(c, QUNIFORM_VIEW_INDEX, 0));
break; break;
/* We only use these when doing software blending. */
case nir_intrinsic_load_blend_const_color_r_float:
ntq_store_def(c, &instr->def, 0,
vir_uniform(c, QUNIFORM_BLEND_CONSTANT_R, 0));
break;
case nir_intrinsic_load_blend_const_color_g_float:
ntq_store_def(c, &instr->def, 0,
vir_uniform(c, QUNIFORM_BLEND_CONSTANT_G, 0));
break;
case nir_intrinsic_load_blend_const_color_b_float:
ntq_store_def(c, &instr->def, 0,
vir_uniform(c, QUNIFORM_BLEND_CONSTANT_B, 0));
break;
case nir_intrinsic_load_blend_const_color_a_float:
ntq_store_def(c, &instr->def, 0,
vir_uniform(c, QUNIFORM_BLEND_CONSTANT_A, 0));
break;
/* We only use this if alpha to coverage is enabled when using
* software blending.
*/
case nir_intrinsic_alpha_to_coverage:
assert(c->fs_key->msaa);
ntq_store_def(c, &instr->def, 0,
vir_FTOC(c, ntq_get_src(c, instr->src[0], 0)));
break;
default: default:
fprintf(stderr, "Unknown intrinsic: "); fprintf(stderr, "Unknown intrinsic: ");
nir_print_instr(&instr->instr, stderr); nir_print_instr(&instr->instr, stderr);

View file

@ -360,6 +360,14 @@ enum quniform_contents {
* Current value of DrawIndex for Multidraw * Current value of DrawIndex for Multidraw
*/ */
QUNIFORM_DRAW_ID, QUNIFORM_DRAW_ID,
/**
* Blend constants for software blend.
*/
QUNIFORM_BLEND_CONSTANT_R,
QUNIFORM_BLEND_CONSTANT_G,
QUNIFORM_BLEND_CONSTANT_B,
QUNIFORM_BLEND_CONSTANT_A,
}; };
static inline uint32_t v3d_unit_data_create(uint32_t unit, uint32_t value) static inline uint32_t v3d_unit_data_create(uint32_t unit, uint32_t value)
@ -427,6 +435,7 @@ struct v3d_fs_key {
bool sample_alpha_to_coverage; bool sample_alpha_to_coverage;
bool sample_alpha_to_one; bool sample_alpha_to_one;
bool can_earlyz_with_discard; bool can_earlyz_with_discard;
bool software_blend;
/* Mask of which color render targets are present. */ /* Mask of which color render targets are present. */
uint8_t cbufs; uint8_t cbufs;
uint8_t swap_color_rb; uint8_t swap_color_rb;
@ -439,13 +448,26 @@ struct v3d_fs_key {
uint8_t uint_color_rb; uint8_t uint_color_rb;
/* Color format information per render target. Only set when logic /* Color format information per render target. Only set when logic
* operations are enabled or when fbfetch is in use. * operations are enabled, when fbfetch is in use or when falling back
* to software blend.
*/ */
struct { struct {
enum pipe_format format; enum pipe_format format;
uint8_t swizzle[4]; uint8_t swizzle[4];
} color_fmt[V3D_MAX_DRAW_BUFFERS]; } color_fmt[V3D_MAX_DRAW_BUFFERS];
/* Software blend state. Only set when software blend is enabled.
* (currently only for handling the dual source case)
*/
struct {
enum pipe_blend_func rgb_func;
enum pipe_blendfactor rgb_src_factor;
enum pipe_blendfactor rgb_dst_factor;
enum pipe_blend_func alpha_func;
enum pipe_blendfactor alpha_src_factor;
enum pipe_blendfactor alpha_dst_factor;
} blend[V3D_MAX_DRAW_BUFFERS];
enum pipe_logicop logicop_func; enum pipe_logicop logicop_func;
uint32_t point_sprite_mask; uint32_t point_sprite_mask;
@ -1212,6 +1234,7 @@ bool v3d_nir_lower_global_2x32(nir_shader *s);
bool v3d_nir_lower_load_store_bitsize(nir_shader *s); bool v3d_nir_lower_load_store_bitsize(nir_shader *s);
bool v3d_nir_lower_algebraic(struct nir_shader *shader, const struct v3d_compile *c); bool v3d_nir_lower_algebraic(struct nir_shader *shader, const struct v3d_compile *c);
bool v3d_nir_lower_load_output(nir_shader *s, struct v3d_compile *c); bool v3d_nir_lower_load_output(nir_shader *s, struct v3d_compile *c);
bool v3d_nir_lower_blend(nir_shader *s, struct v3d_compile *c);
nir_def *v3d_nir_get_tlb_color(nir_builder *b, struct v3d_compile *c, int rt, int sample); nir_def *v3d_nir_get_tlb_color(nir_builder *b, struct v3d_compile *c, int rt, int sample);

View file

@ -0,0 +1,54 @@
/*
* Copyright 2025 Raspberry Pi Ltd
* SPDX-License-Identifier: MIT
*/
#include "util/format/u_format.h"
#include "compiler/nir/nir_builder.h"
#include "compiler/nir/nir_format_convert.h"
#include "compiler/nir/nir_lower_blend.h"
#include "v3d_compiler.h"
bool
v3d_nir_lower_blend(nir_shader *nir, struct v3d_compile *c)
{
if (!c->fs_key->software_blend)
return false;
nir_lower_blend_options options = {
/* logic op is handled elsewhere in the compiler */
.logicop_enable = false,
.scalar_blend_const = true,
};
bool lower_blend = false;
for (unsigned rt = 0; rt < V3D_MAX_DRAW_BUFFERS; rt++) {
if (!(c->fs_key->cbufs & (1 << rt))) {
static const nir_lower_blend_channel replace = {
.func = PIPE_BLEND_ADD,
.src_factor = PIPE_BLENDFACTOR_ONE,
.dst_factor = PIPE_BLENDFACTOR_ZERO,
};
options.rt[rt].rgb = replace;
options.rt[rt].alpha = replace;
continue;
}
lower_blend = true;
/* Colour write mask is handled by the hardware. */
options.rt[rt].colormask = 0xf;
options.format[rt] = c->fs_key->color_fmt[rt].format;
options.rt[rt].rgb.func = c->fs_key->blend[rt].rgb_func;
options.rt[rt].alpha.func = c->fs_key->blend[rt].alpha_func;
options.rt[rt].rgb.dst_factor = c->fs_key->blend[rt].rgb_dst_factor;
options.rt[rt].alpha.dst_factor = c->fs_key->blend[rt].alpha_dst_factor;
options.rt[rt].rgb.src_factor = c->fs_key->blend[rt].rgb_src_factor;
options.rt[rt].alpha.src_factor = c->fs_key->blend[rt].alpha_src_factor;
}
return lower_blend && nir_lower_blend(nir, &options);
}

View file

@ -1140,15 +1140,29 @@ v3d_nir_lower_fs_early(struct v3d_compile *c)
if (c->fs_key->int_color_rb || c->fs_key->uint_color_rb) if (c->fs_key->int_color_rb || c->fs_key->uint_color_rb)
v3d_fixup_fs_output_types(c); v3d_fixup_fs_output_types(c);
NIR_PASS(_, c->s, v3d_nir_lower_load_output, c);
NIR_PASS(_, c->s, v3d_nir_lower_logic_ops, c);
if (c->fs_key->line_smoothing) { if (c->fs_key->line_smoothing) {
NIR_PASS(_, c->s, v3d_nir_lower_line_smooth); NIR_PASS(_, c->s, v3d_nir_lower_line_smooth);
NIR_PASS(_, c->s, nir_lower_global_vars_to_local); NIR_PASS(_, c->s, nir_lower_global_vars_to_local);
/* The lowering pass can introduce new sysval reads */ /* The lowering pass can introduce new sysval reads */
nir_shader_gather_info(c->s, nir_shader_get_entrypoint(c->s)); nir_shader_gather_info(c->s, nir_shader_get_entrypoint(c->s));
} }
if (c->fs_key->software_blend) {
if (c->fs_key->sample_alpha_to_coverage) {
assert(c->fs_key->msaa);
NIR_PASS(_, c->s, nir_lower_alpha_to_coverage,
V3D_MAX_SAMPLES, true);
}
if (c->fs_key->sample_alpha_to_one)
NIR_PASS(_, c->s, nir_lower_alpha_to_one);
NIR_PASS(_, c->s, v3d_nir_lower_blend, c);
}
NIR_PASS(_, c->s, v3d_nir_lower_load_output, c);
NIR_PASS(_, c->s, v3d_nir_lower_logic_ops, c);
} }
static void static void