diff --git a/src/broadcom/common/v3d_debug.c b/src/broadcom/common/v3d_debug.c index b69e56bd984..cf77ec98393 100644 --- a/src/broadcom/common/v3d_debug.c +++ b/src/broadcom/common/v3d_debug.c @@ -107,6 +107,7 @@ static const struct debug_named_value debug_control[] = { "Disable TFU (v3dv only)" }, { "sync", V3D_DEBUG_SYNC, "Sync wait for each job to complete after submission." }, + { "soft_blend", V3D_DEBUG_SOFT_BLEND, "Force fallback to software blending" }, DEBUG_NAMED_VALUE_END }; diff --git a/src/broadcom/common/v3d_debug.h b/src/broadcom/common/v3d_debug.h index a01326ba40c..668b1be36bb 100644 --- a/src/broadcom/common/v3d_debug.h +++ b/src/broadcom/common/v3d_debug.h @@ -70,6 +70,7 @@ extern uint32_t v3d_mesa_debug; #define V3D_DEBUG_OPT_COMPILE_TIME (1 << 25) #define V3D_DEBUG_DISABLE_TFU (1 << 26) #define V3D_DEBUG_SYNC (1 << 27) +#define V3D_DEBUG_SOFT_BLEND (1 << 28) #define V3D_DEBUG_SHADERS (V3D_DEBUG_TGSI | V3D_DEBUG_NIR | \ V3D_DEBUG_VIR | V3D_DEBUG_QPU | \ diff --git a/src/broadcom/compiler/meson.build b/src/broadcom/compiler/meson.build index 2a58a6cad20..13d674796bc 100644 --- a/src/broadcom/compiler/meson.build +++ b/src/broadcom/compiler/meson.build @@ -17,6 +17,7 @@ libbroadcom_compiler_files = files( 'qpu_validate.c', 'v3d_tex.c', 'v3d_compiler.h', + 'v3d_nir_lower_blend.c', 'v3d_nir_lower_io.c', 'v3d_nir_lower_image_load_store.c', 'v3d_nir_lower_line_smooth.c', diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index d40f5d4c089..2499b3d3a4a 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -1960,7 +1960,8 @@ emit_frag_end(struct v3d_compile *c) has_any_tlb_color_write = true; } - if (c->fs_key->sample_alpha_to_coverage && c->output_color_var[0]) { + if (!c->fs_key->software_blend && + c->fs_key->sample_alpha_to_coverage && c->output_color_var[0]) { struct nir_variable *var = c->output_color_var[0]; struct qreg *color = &c->outputs[var->data.driver_location * 4]; @@ -2485,6 +2486,15 @@ ntq_setup_outputs(struct v3d_compile *c) case FRAG_RESULT_DATA5: case FRAG_RESULT_DATA6: case FRAG_RESULT_DATA7: + /* Dual source outputs have an index that is != 0. + * If they have not been removed by now they end up + * clobbering `output_color_var` with the wrong + * variable. + */ + if (var->data.index != 0 && var->data.index != NIR_VARIABLE_NO_INDEX) { + break; + } + c->output_color_var[var->data.location - FRAG_RESULT_DATA0] = var; break; @@ -3597,6 +3607,15 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) vir_uniform(c, QUNIFORM_AA_LINE_WIDTH, 0)); break; + case nir_intrinsic_demote_samples: { + struct qreg mask = + vir_NOT(c, ntq_get_src(c, instr->src[0], 0)); + + vir_SETMSF_dest(c, vir_nop_reg(), + vir_AND(c, mask, vir_MSF(c))); + break; + } + case nir_intrinsic_load_sample_mask_in: ntq_store_def(c, &instr->def, 0, vir_MSF(c)); break; @@ -4122,6 +4141,34 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) vir_uniform(c, QUNIFORM_VIEW_INDEX, 0)); break; + /* We only use these when doing software blending. */ + case nir_intrinsic_load_blend_const_color_r_float: + ntq_store_def(c, &instr->def, 0, + vir_uniform(c, QUNIFORM_BLEND_CONSTANT_R, 0)); + break; + case nir_intrinsic_load_blend_const_color_g_float: + ntq_store_def(c, &instr->def, 0, + vir_uniform(c, QUNIFORM_BLEND_CONSTANT_G, 0)); + break; + case nir_intrinsic_load_blend_const_color_b_float: + ntq_store_def(c, &instr->def, 0, + vir_uniform(c, QUNIFORM_BLEND_CONSTANT_B, 0)); + break; + case nir_intrinsic_load_blend_const_color_a_float: + ntq_store_def(c, &instr->def, 0, + vir_uniform(c, QUNIFORM_BLEND_CONSTANT_A, 0)); + break; + + + /* We only use this if alpha to coverage is enabled when using + * software blending. + */ + case nir_intrinsic_alpha_to_coverage: + assert(c->fs_key->msaa); + ntq_store_def(c, &instr->def, 0, + vir_FTOC(c, ntq_get_src(c, instr->src[0], 0))); + break; + default: fprintf(stderr, "Unknown intrinsic: "); nir_print_instr(&instr->instr, stderr); diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index 753474279f3..ed4efd62055 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -360,6 +360,14 @@ enum quniform_contents { * Current value of DrawIndex for Multidraw */ QUNIFORM_DRAW_ID, + + /** + * Blend constants for software blend. + */ + QUNIFORM_BLEND_CONSTANT_R, + QUNIFORM_BLEND_CONSTANT_G, + QUNIFORM_BLEND_CONSTANT_B, + QUNIFORM_BLEND_CONSTANT_A, }; static inline uint32_t v3d_unit_data_create(uint32_t unit, uint32_t value) @@ -427,6 +435,7 @@ struct v3d_fs_key { bool sample_alpha_to_coverage; bool sample_alpha_to_one; bool can_earlyz_with_discard; + bool software_blend; /* Mask of which color render targets are present. */ uint8_t cbufs; uint8_t swap_color_rb; @@ -439,13 +448,26 @@ struct v3d_fs_key { uint8_t uint_color_rb; /* Color format information per render target. Only set when logic - * operations are enabled or when fbfetch is in use. + * operations are enabled, when fbfetch is in use or when falling back + * to software blend. */ struct { enum pipe_format format; uint8_t swizzle[4]; } color_fmt[V3D_MAX_DRAW_BUFFERS]; + /* Software blend state. Only set when software blend is enabled. + * (currently only for handling the dual source case) + */ + struct { + enum pipe_blend_func rgb_func; + enum pipe_blendfactor rgb_src_factor; + enum pipe_blendfactor rgb_dst_factor; + enum pipe_blend_func alpha_func; + enum pipe_blendfactor alpha_src_factor; + enum pipe_blendfactor alpha_dst_factor; + } blend[V3D_MAX_DRAW_BUFFERS]; + enum pipe_logicop logicop_func; uint32_t point_sprite_mask; @@ -1212,6 +1234,7 @@ bool v3d_nir_lower_global_2x32(nir_shader *s); bool v3d_nir_lower_load_store_bitsize(nir_shader *s); bool v3d_nir_lower_algebraic(struct nir_shader *shader, const struct v3d_compile *c); bool v3d_nir_lower_load_output(nir_shader *s, struct v3d_compile *c); +bool v3d_nir_lower_blend(nir_shader *s, struct v3d_compile *c); nir_def *v3d_nir_get_tlb_color(nir_builder *b, struct v3d_compile *c, int rt, int sample); diff --git a/src/broadcom/compiler/v3d_nir_lower_blend.c b/src/broadcom/compiler/v3d_nir_lower_blend.c new file mode 100644 index 00000000000..4cc77f6dc9f --- /dev/null +++ b/src/broadcom/compiler/v3d_nir_lower_blend.c @@ -0,0 +1,54 @@ +/* + * Copyright 2025 Raspberry Pi Ltd + * SPDX-License-Identifier: MIT + */ + +#include "util/format/u_format.h" +#include "compiler/nir/nir_builder.h" +#include "compiler/nir/nir_format_convert.h" +#include "compiler/nir/nir_lower_blend.h" +#include "v3d_compiler.h" + +bool +v3d_nir_lower_blend(nir_shader *nir, struct v3d_compile *c) +{ + if (!c->fs_key->software_blend) + return false; + + nir_lower_blend_options options = { + /* logic op is handled elsewhere in the compiler */ + .logicop_enable = false, + .scalar_blend_const = true, + }; + + bool lower_blend = false; + for (unsigned rt = 0; rt < V3D_MAX_DRAW_BUFFERS; rt++) { + if (!(c->fs_key->cbufs & (1 << rt))) { + static const nir_lower_blend_channel replace = { + .func = PIPE_BLEND_ADD, + .src_factor = PIPE_BLENDFACTOR_ONE, + .dst_factor = PIPE_BLENDFACTOR_ZERO, + }; + + options.rt[rt].rgb = replace; + options.rt[rt].alpha = replace; + continue; + } + + lower_blend = true; + + /* Colour write mask is handled by the hardware. */ + options.rt[rt].colormask = 0xf; + + options.format[rt] = c->fs_key->color_fmt[rt].format; + + options.rt[rt].rgb.func = c->fs_key->blend[rt].rgb_func; + options.rt[rt].alpha.func = c->fs_key->blend[rt].alpha_func; + options.rt[rt].rgb.dst_factor = c->fs_key->blend[rt].rgb_dst_factor; + options.rt[rt].alpha.dst_factor = c->fs_key->blend[rt].alpha_dst_factor; + options.rt[rt].rgb.src_factor = c->fs_key->blend[rt].rgb_src_factor; + options.rt[rt].alpha.src_factor = c->fs_key->blend[rt].alpha_src_factor; + } + + return lower_blend && nir_lower_blend(nir, &options); +} diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 96bcb956ad8..435d84de7a8 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -1140,15 +1140,29 @@ v3d_nir_lower_fs_early(struct v3d_compile *c) if (c->fs_key->int_color_rb || c->fs_key->uint_color_rb) v3d_fixup_fs_output_types(c); - NIR_PASS(_, c->s, v3d_nir_lower_load_output, c); - NIR_PASS(_, c->s, v3d_nir_lower_logic_ops, c); - if (c->fs_key->line_smoothing) { NIR_PASS(_, c->s, v3d_nir_lower_line_smooth); NIR_PASS(_, c->s, nir_lower_global_vars_to_local); /* The lowering pass can introduce new sysval reads */ nir_shader_gather_info(c->s, nir_shader_get_entrypoint(c->s)); } + + if (c->fs_key->software_blend) { + if (c->fs_key->sample_alpha_to_coverage) { + assert(c->fs_key->msaa); + + NIR_PASS(_, c->s, nir_lower_alpha_to_coverage, + V3D_MAX_SAMPLES, true); + } + + if (c->fs_key->sample_alpha_to_one) + NIR_PASS(_, c->s, nir_lower_alpha_to_one); + + NIR_PASS(_, c->s, v3d_nir_lower_blend, c); + } + + NIR_PASS(_, c->s, v3d_nir_lower_load_output, c); + NIR_PASS(_, c->s, v3d_nir_lower_logic_ops, c); } static void