mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-21 07:30:33 +01:00
Merge branch 'radv-fs-output-nsz' into 'main'
nir,radv: remove signed zero preserve based on FS output format or blending See merge request mesa/mesa!40323
This commit is contained in:
commit
b5bd9ef6aa
8 changed files with 101 additions and 17 deletions
|
|
@ -24,6 +24,7 @@ struct radv_shader_args;
|
|||
struct radv_shader_layout;
|
||||
struct radv_device;
|
||||
struct radv_graphics_state_key;
|
||||
struct radv_ps_epilog_key;
|
||||
|
||||
bool radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device,
|
||||
const struct radv_shader_stage *stage);
|
||||
|
|
@ -78,7 +79,7 @@ bool radv_nir_lower_draw_id_to_zero(nir_shader *shader);
|
|||
|
||||
bool radv_nir_remap_color_attachment(nir_shader *shader, const struct radv_graphics_state_key *gfx_state);
|
||||
|
||||
bool radv_nir_trim_fs_color_exports(nir_shader *shader, uint32_t colors_needed);
|
||||
bool radv_nir_trim_fs_color_exports(nir_shader *shader, const struct radv_ps_epilog_key *epilog_key);
|
||||
|
||||
bool radv_nir_lower_printf(nir_shader *shader);
|
||||
|
||||
|
|
|
|||
|
|
@ -8,11 +8,12 @@
|
|||
#include "nir/nir_builder.h"
|
||||
#include "radv_constants.h"
|
||||
#include "radv_nir.h"
|
||||
#include "radv_shader.h"
|
||||
|
||||
static bool
|
||||
trim_fs_color_exports(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
|
||||
{
|
||||
const uint32_t colors_needed = *(uint32_t *)state;
|
||||
const struct radv_ps_epilog_key *epilog_key = (const struct radv_ps_epilog_key *)state;
|
||||
|
||||
if (intrin->intrinsic != nir_intrinsic_store_output)
|
||||
return false;
|
||||
|
|
@ -24,14 +25,26 @@ trim_fs_color_exports(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
|
|||
if (index < 0)
|
||||
return false;
|
||||
|
||||
const unsigned needed = (colors_needed >> (index * 4) & 0xf) >> nir_intrinsic_component(intrin);
|
||||
bool progress = false;
|
||||
|
||||
if (epilog_key->no_signed_zero & BITFIELD_BIT(index)) {
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
|
||||
|
||||
if (!sem.no_signed_zero) {
|
||||
sem.no_signed_zero = 1;
|
||||
nir_intrinsic_set_io_semantics(intrin, sem);
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
|
||||
const unsigned needed = (epilog_key->colors_needed >> (index * 4) & 0xf) >> nir_intrinsic_component(intrin);
|
||||
|
||||
const unsigned write_mask = nir_intrinsic_write_mask(intrin);
|
||||
|
||||
const unsigned new_write_mask = write_mask & needed;
|
||||
|
||||
if (new_write_mask == write_mask)
|
||||
return false;
|
||||
return progress;
|
||||
|
||||
if (!new_write_mask)
|
||||
nir_instr_remove(&intrin->instr);
|
||||
|
|
@ -42,7 +55,7 @@ trim_fs_color_exports(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
|
|||
}
|
||||
|
||||
bool
|
||||
radv_nir_trim_fs_color_exports(nir_shader *shader, uint32_t colors_needed)
|
||||
radv_nir_trim_fs_color_exports(nir_shader *shader, const struct radv_ps_epilog_key *epilog_key)
|
||||
{
|
||||
return nir_shader_intrinsics_pass(shader, trim_fs_color_exports, nir_metadata_control_flow, &colors_needed);
|
||||
return nir_shader_intrinsics_pass(shader, trim_fs_color_exports, nir_metadata_control_flow, (void *)epilog_key);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -154,6 +154,23 @@ format_is_float32(VkFormat format)
|
|||
return channel >= 0 && desc->channel[channel].type == UTIL_FORMAT_TYPE_FLOAT && desc->channel[channel].size == 32;
|
||||
}
|
||||
|
||||
static bool
|
||||
format_ignores_signed_zero(VkFormat format)
|
||||
{
|
||||
const struct util_format_description *desc = radv_format_description(format);
|
||||
|
||||
/* Unsigned float formats don't care about signed zeros. */
|
||||
if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT || desc->format == PIPE_FORMAT_R9G9B9E5_FLOAT)
|
||||
return true;
|
||||
|
||||
for (unsigned i = 0; i < desc->nr_channels; i++) {
|
||||
if (desc->channel[i].pure_integer || desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
radv_pipeline_needs_ps_epilog(const struct vk_graphics_pipeline_state *state,
|
||||
VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)
|
||||
|
|
@ -1768,7 +1785,7 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_
|
|||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_instance *instance = radv_physical_device_instance(pdev);
|
||||
unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0, z_format = 0;
|
||||
unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0, z_format = 0, no_signed_zero = 0;
|
||||
struct radv_ps_epilog_key key;
|
||||
|
||||
memset(&key, 0, sizeof(key));
|
||||
|
|
@ -1794,6 +1811,8 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_
|
|||
|
||||
key.colors_needed |= comp_used << (4 * i);
|
||||
|
||||
if (format_ignores_signed_zero(fmt) || blend_enable)
|
||||
no_signed_zero |= 1 << i;
|
||||
if (format_is_int8(fmt))
|
||||
is_int8 |= 1 << i;
|
||||
if (format_is_int10(fmt))
|
||||
|
|
@ -1822,6 +1841,7 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_
|
|||
col_format |= (col_format & 0xf) << 4;
|
||||
key.color_map[1] = 1;
|
||||
key.colors_needed |= (key.colors_needed & 0xf) << 4;
|
||||
no_signed_zero |= 0x2;
|
||||
}
|
||||
|
||||
z_format = ac_get_spi_shader_z_format(state->export_depth, state->export_stencil, state->export_sample_mask,
|
||||
|
|
@ -1831,6 +1851,7 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_
|
|||
key.color_is_int8 = pdev->info.compiler_info.has_cb_lt16bit_int_clamp_bug ? is_int8 : 0;
|
||||
key.color_is_int10 = pdev->info.compiler_info.has_cb_lt16bit_int_clamp_bug ? is_int10 : 0;
|
||||
key.enable_mrt_output_nan_fixup = instance->drirc.debug.enable_mrt_output_nan_fixup ? is_float32 : 0;
|
||||
key.no_signed_zero = no_signed_zero;
|
||||
key.colors_written = state->colors_written;
|
||||
key.mrt0_is_dual_src = state->mrt0_is_dual_src && key.colors_needed & 0xf;
|
||||
key.export_depth = state->export_depth;
|
||||
|
|
@ -2836,8 +2857,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
|
|||
/* Lower FS outputs to scalar to allow dce. */
|
||||
NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
|
||||
|
||||
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_trim_fs_color_exports,
|
||||
gfx_state->ps.epilog.colors_needed);
|
||||
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, radv_nir_trim_fs_color_exports, &gfx_state->ps.epilog);
|
||||
|
||||
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_copy_prop);
|
||||
NIR_PASS(update_info, stages[MESA_SHADER_FRAGMENT].nir, nir_opt_dce);
|
||||
|
|
|
|||
|
|
@ -104,6 +104,7 @@ struct radv_ps_epilog_key {
|
|||
uint8_t color_is_int8;
|
||||
uint8_t color_is_int10;
|
||||
uint8_t enable_mrt_output_nan_fixup;
|
||||
uint8_t no_signed_zero;
|
||||
|
||||
uint32_t colors_needed;
|
||||
|
||||
|
|
|
|||
|
|
@ -2054,7 +2054,10 @@ typedef struct nir_io_semantics {
|
|||
unsigned interp_explicit_strict : 1; /* preserve original vertex order */
|
||||
/* Skip nir_validate of the intrinsic. Any new code that sets it will ba NAK'd. */
|
||||
unsigned no_validate : 1;
|
||||
unsigned padding;
|
||||
|
||||
/* Start of the second uint. */
|
||||
unsigned no_signed_zero : 1; /* whether it matters if the input/output -0.0 or +0.0. */
|
||||
unsigned padding : 31;
|
||||
} nir_io_semantics;
|
||||
|
||||
/* Transform feedback info for 4 outputs. */
|
||||
|
|
|
|||
|
|
@ -13,9 +13,10 @@
|
|||
* needed, which is a quite common. For example, any float comparison, cosinus, exp2, log2,
|
||||
* or addition with non zero value does not care about the zero sign of the inputs. Neither
|
||||
* do texture coordinates.
|
||||
* Drivers can also set no_signed_zero for fragment output stores based on state,
|
||||
* fixed point or R11G11B10 formats do not care about the sign of zero.
|
||||
*
|
||||
* Future work could also consider fragment output state, fixed point or R11G11B10 formats
|
||||
* do not care about the sign of zero.
|
||||
* Future work:
|
||||
* For pre raster stages, position doesn't care, and we could back propagate information from
|
||||
* the FS for varyings, and interpolated varyings do not care anyway.
|
||||
*/
|
||||
|
|
@ -214,8 +215,8 @@ prop_tex_fp_math_ctrl(nir_tex_instr *tex)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
prop_intrin_fp_math_ctrl(nir_intrinsic_instr *intrin)
|
||||
static bool
|
||||
opt_intrin_fp_math_ctrl(nir_intrinsic_instr *intrin)
|
||||
{
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_ddx:
|
||||
|
|
@ -226,11 +227,37 @@ prop_intrin_fp_math_ctrl(nir_intrinsic_instr *intrin)
|
|||
case nir_intrinsic_ddy_fine:
|
||||
if (intrin->instr.pass_flags)
|
||||
src_mark_preserve_sz(&intrin->src[0], NULL);
|
||||
break;
|
||||
return false;
|
||||
default:
|
||||
nir_foreach_src(&intrin->instr, src_mark_preserve_sz, NULL);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!nir_intrinsic_has_io_semantics(intrin)) {
|
||||
nir_foreach_src(&intrin->instr, src_mark_preserve_sz, NULL);
|
||||
return false;
|
||||
}
|
||||
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
|
||||
const nir_intrinsic_info *info = &nir_intrinsic_infos[(int)intrin->intrinsic];
|
||||
|
||||
if (info->has_dest) {
|
||||
nir_foreach_src(&intrin->instr, src_mark_preserve_sz, NULL);
|
||||
|
||||
/* For loads, set no signed zero flag based on gathered info. */
|
||||
if (!intrin->instr.pass_flags && !sem.no_signed_zero) {
|
||||
sem.no_signed_zero = 1;
|
||||
nir_intrinsic_set_io_semantics(intrin, sem);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
} else {
|
||||
/* For stores, propagate the signed zero information for the data source. */
|
||||
for (unsigned i = sem.no_signed_zero; i < info->num_srcs; i++)
|
||||
src_mark_preserve_sz(&intrin->src[i], NULL);
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
|
|
@ -272,7 +299,7 @@ opt_fp_math_ctrl_impl(nir_function_impl *impl)
|
|||
prop_tex_fp_math_ctrl(nir_instr_as_tex(instr));
|
||||
break;
|
||||
case nir_instr_type_intrinsic:
|
||||
prop_intrin_fp_math_ctrl(nir_instr_as_intrinsic(instr));
|
||||
progress |= opt_intrin_fp_math_ctrl(nir_instr_as_intrinsic(instr));
|
||||
break;
|
||||
case nir_instr_type_phi:
|
||||
if (!instr->pass_flags)
|
||||
|
|
|
|||
|
|
@ -176,6 +176,18 @@ vectorize_load(nir_intrinsic_instr *chan[8], unsigned start, unsigned count,
|
|||
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(new_intr);
|
||||
|
||||
for (unsigned i = start; i < start + count; i++) {
|
||||
if (chan[i]) {
|
||||
if (!nir_intrinsic_io_semantics(chan[i]).no_signed_zero)
|
||||
sem.no_signed_zero = 0;
|
||||
}
|
||||
|
||||
if (step == merge_low_high_16_to_32 && chan[4 + i]) {
|
||||
if (!nir_intrinsic_io_semantics(chan[4 + i]).no_signed_zero)
|
||||
sem.no_signed_zero = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (step == vectorize_high_16_separately) {
|
||||
assert(start >= 4);
|
||||
sem.high_16bits = 1;
|
||||
|
|
@ -298,6 +310,8 @@ vectorize_store(nir_intrinsic_instr *chan[8], unsigned start, unsigned count,
|
|||
sem.no_sysval_output = 0;
|
||||
if (!nir_intrinsic_io_semantics(chan[i]).no_varying)
|
||||
sem.no_varying = 0;
|
||||
if (!nir_intrinsic_io_semantics(chan[i]).no_signed_zero)
|
||||
sem.no_signed_zero = 0;
|
||||
}
|
||||
|
||||
if (step == merge_low_high_16_to_32) {
|
||||
|
|
@ -307,6 +321,8 @@ vectorize_store(nir_intrinsic_instr *chan[8], unsigned start, unsigned count,
|
|||
sem.no_sysval_output = 0;
|
||||
if (!nir_intrinsic_io_semantics(chan[4 + i]).no_varying)
|
||||
sem.no_varying = 0;
|
||||
if (!nir_intrinsic_io_semantics(chan[4 + i]).no_signed_zero)
|
||||
sem.no_signed_zero = 0;
|
||||
}
|
||||
|
||||
/* Update the type. */
|
||||
|
|
|
|||
|
|
@ -1601,6 +1601,9 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
|
|||
if (io.no_validate)
|
||||
fprintf(fp, " no_validate");
|
||||
|
||||
if (io.no_signed_zero)
|
||||
fprintf(fp, " no_signed_zero");
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue