mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 07:28:11 +02:00
pan: Use nir_intrinsic_blend_pan for blend shaders
The one non-trivial change here is that we're now using BLEND with a constant descriptor instead of ST_TILE for MSAA blend shaders. However, this shouldn't make any practical difference. Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com> Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39244>
This commit is contained in:
parent
7d25c5c1ea
commit
f53751159a
6 changed files with 88 additions and 102 deletions
|
|
@ -100,25 +100,16 @@ GENX(pan_blend_get_shader_locked)(struct pan_blend_shader_cache *cache,
|
|||
.gpu_id = cache->gpu_id,
|
||||
.gpu_variant = cache->gpu_variant,
|
||||
.is_blend = true,
|
||||
.blend.nr_samples = key.nr_samples,
|
||||
.pushable_ubos = BITFIELD_BIT(PAN_UBO_SYSVALS),
|
||||
};
|
||||
|
||||
enum pipe_format rt_formats[8] = {0};
|
||||
rt_formats[rt] = key.format;
|
||||
|
||||
#if PAN_ARCH >= 6
|
||||
inputs.blend.bifrost_blend_desc =
|
||||
GENX(pan_blend_get_internal_desc)(key.format, key.rt, 0, false);
|
||||
#endif
|
||||
|
||||
struct pan_shader_info info;
|
||||
pan_preprocess_nir(nir, inputs.gpu_id);
|
||||
pan_postprocess_nir(nir, inputs.gpu_id);
|
||||
|
||||
#if PAN_ARCH >= 6
|
||||
NIR_PASS(_, nir, GENX(pan_inline_rt_conversion), rt_formats);
|
||||
#else
|
||||
#if PAN_ARCH < 6
|
||||
enum pipe_format rt_formats[8] = {0};
|
||||
rt_formats[rt] = key.format;
|
||||
NIR_PASS(_, nir, pan_nir_lower_framebuffer, rt_formats,
|
||||
pan_raw_format_mask_midgard(rt_formats), MAX2(key.nr_samples, 1),
|
||||
(cache->gpu_id >> 16) < 0x700);
|
||||
|
|
|
|||
|
|
@ -897,18 +897,7 @@ bi_pixel_indices(bi_builder *b, unsigned rt, unsigned sample)
|
|||
|
||||
uint32_t indices_u32 = 0;
|
||||
memcpy(&indices_u32, &pix, sizeof(indices_u32));
|
||||
bi_index indices = bi_imm_u32(indices_u32);
|
||||
|
||||
/* Implicit sample_id assignment only happens in blend shaders,
|
||||
* and we don't expect an explicit sample to be passed in that
|
||||
* case, hence the assert(sample == 0). */
|
||||
|
||||
if (b->shader->inputs->blend.nr_samples > 1) {
|
||||
assert(sample == 0);
|
||||
indices = bi_iadd_u32(b, indices, bi_load_sample_id(b), false);
|
||||
}
|
||||
|
||||
return indices;
|
||||
return bi_imm_u32(indices_u32);
|
||||
}
|
||||
|
||||
/* Source color is passed through r0-r3, or r4-r7 for the second source when
|
||||
|
|
@ -937,38 +926,20 @@ bi_emit_blend_op(bi_builder *b, bi_index rgba, nir_alu_type T, bi_index rgba2,
|
|||
unsigned size_2 = nir_alu_type_get_type_size(T2);
|
||||
unsigned sr_count = (size <= 16) ? 2 : 4;
|
||||
unsigned sr_count_2 = (size_2 <= 16) ? 2 : 4;
|
||||
const struct pan_compile_inputs *inputs = b->shader->inputs;
|
||||
uint64_t blend_desc = inputs->blend.bifrost_blend_desc;
|
||||
enum bi_register_format regfmt = bi_reg_fmt_for_nir(T);
|
||||
|
||||
/* Workaround for NIR-to-TGSI */
|
||||
if (b->shader->nir->info.fs.untyped_color_outputs)
|
||||
regfmt = BI_REGISTER_FORMAT_AUTO;
|
||||
|
||||
if (inputs->is_blend && inputs->blend.nr_samples > 1) {
|
||||
/* Conversion descriptor comes from the compile inputs, pixel
|
||||
* indices derived at run time based on sample ID */
|
||||
bi_st_tile(b, rgba, bi_pixel_indices(b, rt, 0), bi_coverage(b),
|
||||
bi_imm_u32(blend_desc >> 32), regfmt, BI_VECSIZE_V4);
|
||||
} else if (b->shader->inputs->is_blend) {
|
||||
uint64_t blend_desc = b->shader->inputs->blend.bifrost_blend_desc;
|
||||
/* Blend descriptor comes from the FAU RAM. By convention, the
|
||||
* return address on Bifrost is stored in r48 and will be used
|
||||
* by the blend shader to jump back to the fragment shader */
|
||||
|
||||
/* Blend descriptor comes from the compile inputs */
|
||||
/* Put the result in r0 */
|
||||
|
||||
bi_blend_to(b, bi_temp(b->shader), rgba, bi_coverage(b),
|
||||
bi_imm_u32(blend_desc), bi_imm_u32(blend_desc >> 32),
|
||||
bi_null(), regfmt, sr_count, 0);
|
||||
} else {
|
||||
/* Blend descriptor comes from the FAU RAM. By convention, the
|
||||
* return address on Bifrost is stored in r48 and will be used
|
||||
* by the blend shader to jump back to the fragment shader */
|
||||
|
||||
bi_blend_to(b, bi_temp(b->shader), rgba, bi_coverage(b),
|
||||
bi_fau(BIR_FAU_BLEND_0 + rt, false),
|
||||
bi_fau(BIR_FAU_BLEND_0 + rt, true), rgba2, regfmt, sr_count,
|
||||
sr_count_2);
|
||||
}
|
||||
bi_blend_to(b, bi_temp(b->shader), rgba, bi_coverage(b),
|
||||
bi_fau(BIR_FAU_BLEND_0 + rt, false),
|
||||
bi_fau(BIR_FAU_BLEND_0 + rt, true), rgba2, regfmt, sr_count,
|
||||
sr_count_2);
|
||||
|
||||
assert(rt < 8);
|
||||
b->shader->info.bifrost->blend[rt].type = T;
|
||||
|
|
@ -1039,6 +1010,9 @@ bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
unsigned loc = nir_intrinsic_io_semantics(instr).location;
|
||||
bi_index src0 = bi_src_index(&instr->src[0]);
|
||||
|
||||
/* Blend shaders should use nir_intrinsic_blend_pan */
|
||||
assert(!b->shader->inputs->is_blend);
|
||||
|
||||
/* By ISA convention, the coverage mask is stored in R60. The store
|
||||
* itself will be handled by a subsequent ATEST instruction */
|
||||
if (loc == FRAG_RESULT_SAMPLE_MASK) {
|
||||
|
|
@ -1120,19 +1094,6 @@ bi_emit_fragment_out(bi_builder *b, nir_intrinsic_instr *instr)
|
|||
|
||||
bi_emit_blend_op(b, color, nir_intrinsic_src_type(instr), color2, T2, rt);
|
||||
}
|
||||
|
||||
if (b->shader->inputs->is_blend) {
|
||||
/* Jump back to the fragment shader, return address is stored
|
||||
* in r48 (see above). On Valhall, only jump if the address is
|
||||
* nonzero. The check is free there and it implements the "jump
|
||||
* to 0 terminates the blend shader" that's automatic on
|
||||
* Bifrost.
|
||||
*/
|
||||
if (b->shader->arch >= 8)
|
||||
bi_branchzi(b, bi_preload(b, 48), bi_preload(b, 48), BI_CMPF_NE);
|
||||
else
|
||||
bi_jump(b, bi_preload(b, 48));
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned
|
||||
|
|
|
|||
|
|
@ -121,10 +121,6 @@ struct pan_compile_inputs {
|
|||
uint64_t (*get_conv_desc)(enum pipe_format fmt, unsigned rt,
|
||||
unsigned force_size, bool dithered);
|
||||
bool is_blend, is_blit;
|
||||
struct {
|
||||
unsigned nr_samples;
|
||||
uint64_t bifrost_blend_desc;
|
||||
} blend;
|
||||
bool no_idvs;
|
||||
uint32_t view_mask;
|
||||
|
||||
|
|
|
|||
|
|
@ -791,6 +791,66 @@ get_equation_str(const struct pan_blend_rt_state *rt_state, char *str,
|
|||
}
|
||||
}
|
||||
|
||||
#if PAN_ARCH >= 6
|
||||
static bool
|
||||
lower_rt_intrin(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
{
|
||||
const struct pan_blend_state *state = data;
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_output: {
|
||||
nir_io_semantics io = nir_intrinsic_io_semantics(intr);
|
||||
assert(io.location >= FRAG_RESULT_DATA0);
|
||||
unsigned rt = io.location - FRAG_RESULT_DATA0;
|
||||
enum pipe_format format = state->rts[rt].format;
|
||||
unsigned nr_samples = state->rts[rt].nr_samples;
|
||||
|
||||
nir_alu_type dest_type = nir_intrinsic_dest_type(intr);
|
||||
unsigned size = nir_alu_type_get_type_size(dest_type);
|
||||
uint64_t blend_desc =
|
||||
GENX(pan_blend_get_internal_desc)(format, rt, size, false);
|
||||
|
||||
b->cursor = nir_after_instr(&intr->instr);
|
||||
|
||||
nir_def *lowered = nir_load_converted_output_pan(
|
||||
b, intr->def.num_components, intr->def.bit_size,
|
||||
nir_imm_int(b, rt),
|
||||
nr_samples > 1 ? nir_load_sample_id(b) : nir_imm_int(b, 0),
|
||||
nir_imm_int(b, blend_desc >> 32),
|
||||
.dest_type = dest_type,
|
||||
.io_semantics = io);
|
||||
|
||||
nir_def_replace(&intr->def, lowered);
|
||||
return true;
|
||||
}
|
||||
|
||||
case nir_intrinsic_store_output: {
|
||||
nir_io_semantics io = nir_intrinsic_io_semantics(intr);
|
||||
assert(io.location >= FRAG_RESULT_DATA0);
|
||||
unsigned rt = io.location - FRAG_RESULT_DATA0;
|
||||
enum pipe_format format = state->rts[rt].format;
|
||||
|
||||
nir_alu_type src_type = nir_intrinsic_src_type(intr);
|
||||
unsigned size = nir_alu_type_get_type_size(src_type);
|
||||
uint64_t blend_desc =
|
||||
GENX(pan_blend_get_internal_desc)(format, rt, size, false);
|
||||
|
||||
b->cursor = nir_instr_remove(&intr->instr);
|
||||
|
||||
assert(nir_intrinsic_component(intr) == 0);
|
||||
nir_blend_pan(b, nir_load_cumulative_coverage_pan(b),
|
||||
nir_imm_int64(b, blend_desc),
|
||||
nir_pad_vec4(b, intr->src[0].ssa),
|
||||
.io_semantics = io,
|
||||
.src_type = src_type);
|
||||
return true;
|
||||
}
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
nir_shader *
|
||||
GENX(pan_blend_create_shader)(const struct pan_blend_state *state,
|
||||
|
|
@ -891,6 +951,20 @@ GENX(pan_blend_create_shader)(const struct pan_blend_state *state,
|
|||
|
||||
NIR_PASS(_, b.shader, nir_lower_blend, &options);
|
||||
|
||||
#if PAN_ARCH >= 6
|
||||
/* On bifrost+ we use the NIR blend/load intrinsics directly */
|
||||
NIR_PASS(_, b.shader, nir_shader_intrinsics_pass,
|
||||
lower_rt_intrin, nir_metadata_control_flow, (void *)state);
|
||||
|
||||
/* And we put a blend_return_pan at the end.
|
||||
*
|
||||
* We have to do this here because nir_lower_blend assumes it can stick
|
||||
* stuff at the end of the shader, after the blend_return_pan.
|
||||
*/
|
||||
b = nir_builder_at(nir_after_impl(nir_shader_get_entrypoint(b.shader)));
|
||||
nir_blend_return_pan(&b);
|
||||
#endif
|
||||
|
||||
return b.shader;
|
||||
}
|
||||
|
||||
|
|
@ -949,30 +1023,6 @@ GENX(pan_blend_get_internal_desc)(enum pipe_format fmt, unsigned rt,
|
|||
return res.opaque[0] | ((uint64_t)res.opaque[1] << 32);
|
||||
}
|
||||
|
||||
static bool
|
||||
inline_rt_conversion(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
||||
{
|
||||
if (intr->intrinsic != nir_intrinsic_load_rt_conversion_pan)
|
||||
return false;
|
||||
|
||||
enum pipe_format *formats = data;
|
||||
unsigned rt = nir_intrinsic_base(intr);
|
||||
unsigned size = nir_alu_type_get_type_size(nir_intrinsic_src_type(intr));
|
||||
uint64_t conversion =
|
||||
GENX(pan_blend_get_internal_desc)(formats[rt], rt, size, false);
|
||||
|
||||
b->cursor = nir_after_instr(&intr->instr);
|
||||
nir_def_rewrite_uses(&intr->def, nir_imm_int(b, conversion >> 32));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
GENX(pan_inline_rt_conversion)(nir_shader *s, enum pipe_format *formats)
|
||||
{
|
||||
return nir_shader_intrinsics_pass(s, inline_rt_conversion,
|
||||
nir_metadata_control_flow, formats);
|
||||
}
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
enum mali_register_file_format
|
||||
GENX(pan_fixup_blend_type)(nir_alu_type T_size, enum pipe_format format)
|
||||
|
|
|
|||
|
|
@ -141,8 +141,6 @@ nir_shader *GENX(pan_blend_create_shader)(const struct pan_blend_state *state,
|
|||
uint64_t GENX(pan_blend_get_internal_desc)(enum pipe_format fmt, unsigned rt,
|
||||
unsigned force_size, bool dithered);
|
||||
|
||||
bool GENX(pan_inline_rt_conversion)(nir_shader *s, enum pipe_format *formats);
|
||||
|
||||
#if PAN_ARCH < 9
|
||||
enum mali_register_file_format
|
||||
GENX(pan_fixup_blend_type)(nir_alu_type T_size, enum pipe_format format);
|
||||
|
|
|
|||
|
|
@ -90,21 +90,11 @@ get_blend_shader(struct panvk_device *dev,
|
|||
.gpu_id = pdev->kmod.dev->props.gpu_id,
|
||||
.gpu_variant = pdev->kmod.dev->props.gpu_variant,
|
||||
.is_blend = true,
|
||||
.blend = {
|
||||
.nr_samples = key.info.nr_samples,
|
||||
.bifrost_blend_desc =
|
||||
GENX(pan_blend_get_internal_desc)(key.info.format, key.info.rt, 0,
|
||||
false),
|
||||
},
|
||||
};
|
||||
|
||||
pan_preprocess_nir(nir, inputs.gpu_id);
|
||||
pan_postprocess_nir(nir, inputs.gpu_id);
|
||||
|
||||
enum pipe_format rt_formats[8] = {0};
|
||||
rt_formats[rt] = key.info.format;
|
||||
NIR_PASS(_, nir, GENX(pan_inline_rt_conversion), rt_formats);
|
||||
|
||||
VkResult result =
|
||||
panvk_per_arch(create_internal_shader)(dev, nir, &inputs, &shader);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue